├── .github ├── .gitignore └── workflows │ ├── check-release.yaml │ ├── pkgdown.yaml │ └── test-coverage.yaml ├── vignettes └── .gitignore ├── revdep ├── problems.md ├── .gitignore ├── email.yml ├── cran.md ├── failures.md └── README.md ├── README_cache └── gfm │ ├── ex1_2b50644f33f9242e4b3a2f6a7181e16f.rdb │ ├── ex_after_stat_44cf34e134db261c5d53879c855e5cb7.rdb │ ├── ex_penguins_94ce9c85c6a85209aee72043527a7434.rdb │ ├── ex_geom_hdr_rug_1_db8bf282d6472d89791c1ffdc1f51256.rdb │ ├── ex_geom_hdr_rug_2_c2e0d21eebd484842bbafecec73a79d3.rdb │ ├── ex_geom_hdr_rug_3_3ead8bcdbc146ff881386453273ef60c.rdb │ ├── ex_geom_hdr_rug_4_b9aa73d0abd62a863a4a1d04eeae98c7.rdb │ ├── ex_penguins_facet_4966579da1173ac3322b3a81b44f50e6.rdb │ ├── ex_penguins_lines_bbb213d7cc08ea19944be57b8e523f50.rdb │ ├── unnamed-chunk-2_c4bd204dfd1419586cd1fa2f59f90193.rdb │ ├── unnamed-chunk-3_5cd8ebff29d7c6ebd1d9b8e2174f490b.rdb │ ├── ex0_08952e0ed30fe2644cd3463413708f0d.rdb │ ├── ex0_08952e0ed30fe2644cd3463413708f0d.rdx │ ├── ex1_2b50644f33f9242e4b3a2f6a7181e16f.rdx │ ├── ex0_08952e0ed30fe2644cd3463413708f0d.RData │ ├── ex1_2b50644f33f9242e4b3a2f6a7181e16f.RData │ ├── hdrcde_24608eda67b7b094496dd13a3d335ced.rdb │ ├── hdrcde_24608eda67b7b094496dd13a3d335ced.rdx │ ├── get-hdr_56a211107c10d29abbf2427dc2311e41.rdb │ ├── get-hdr_56a211107c10d29abbf2427dc2311e41.rdx │ ├── hdrcde_24608eda67b7b094496dd13a3d335ced.RData │ ├── ex_methods_5cdc47a930db4912fb0c1e999fdb87b6.rdb │ ├── ex_methods_5cdc47a930db4912fb0c1e999fdb87b6.rdx │ ├── ex_penguins_94ce9c85c6a85209aee72043527a7434.rdx │ ├── get-hdr-1d_9207c40522aca39a887fed29e70de154.rdb │ ├── get-hdr-1d_9207c40522aca39a887fed29e70de154.rdx │ ├── get-hdr_56a211107c10d29abbf2427dc2311e41.RData │ ├── ex_after_stat_44cf34e134db261c5d53879c855e5cb7.rdx │ ├── ex_hdr_fun_1_5473b5f5faa0b7a2127a88e22ad8c80d.rdb │ ├── ex_hdr_fun_1_5473b5f5faa0b7a2127a88e22ad8c80d.rdx │ ├── ex_hdr_fun_2_14673d5a49ce5b7dd18d93502bd4d281.rdb │ ├── ex_hdr_fun_2_14673d5a49ce5b7dd18d93502bd4d281.rdx │ ├── ex_methods_5cdc47a930db4912fb0c1e999fdb87b6.RData │ ├── ex_penguins_94ce9c85c6a85209aee72043527a7434.RData │ ├── get-hdr-1d_9207c40522aca39a887fed29e70de154.RData │ ├── ex-expand-lims_b653f06305dd4a631453adfca8caca66.RData │ ├── ex-expand-lims_b653f06305dd4a631453adfca8caca66.rdb │ ├── ex-expand-lims_b653f06305dd4a631453adfca8caca66.rdx │ ├── ex_after_stat_44cf34e134db261c5d53879c855e5cb7.RData │ ├── ex_hdr_fun_1_5473b5f5faa0b7a2127a88e22ad8c80d.RData │ ├── ex_hdr_fun_2_14673d5a49ce5b7dd18d93502bd4d281.RData │ ├── geom_hdr_points_db8f7c026af53acd936d8266c73964ed.rdb │ ├── geom_hdr_points_db8f7c026af53acd936d8266c73964ed.rdx │ ├── unnamed-chunk-2_c4bd204dfd1419586cd1fa2f59f90193.rdx │ ├── unnamed-chunk-3_5cd8ebff29d7c6ebd1d9b8e2174f490b.rdx │ ├── __packages │ ├── ex_geom_hdr_points_047805f081167ca75d87bbbd5a0a2349.rdb │ ├── ex_geom_hdr_points_047805f081167ca75d87bbbd5a0a2349.rdx │ ├── ex_geom_hdr_rug_1_db8bf282d6472d89791c1ffdc1f51256.rdx │ ├── ex_geom_hdr_rug_2_c2e0d21eebd484842bbafecec73a79d3.rdx │ ├── ex_geom_hdr_rug_3_3ead8bcdbc146ff881386453273ef60c.rdx │ ├── ex_geom_hdr_rug_4_b9aa73d0abd62a863a4a1d04eeae98c7.rdx │ ├── ex_penguins_facet_4966579da1173ac3322b3a81b44f50e6.rdx │ ├── ex_penguins_lines_bbb213d7cc08ea19944be57b8e523f50.rdx │ ├── geom_hdr_points_db8f7c026af53acd936d8266c73964ed.RData │ ├── unnamed-chunk-2_c4bd204dfd1419586cd1fa2f59f90193.RData │ ├── unnamed-chunk-3_5cd8ebff29d7c6ebd1d9b8e2174f490b.RData │ ├── ex_geom_hdr_points_047805f081167ca75d87bbbd5a0a2349.RData │ ├── ex_geom_hdr_rug_1_db8bf282d6472d89791c1ffdc1f51256.RData │ ├── ex_geom_hdr_rug_2_c2e0d21eebd484842bbafecec73a79d3.RData │ ├── ex_geom_hdr_rug_3_3ead8bcdbc146ff881386453273ef60c.RData │ ├── ex_geom_hdr_rug_4_b9aa73d0abd62a863a4a1d04eeae98c7.RData │ ├── ex_penguins_facet_4966579da1173ac3322b3a81b44f50e6.RData │ └── ex_penguins_lines_bbb213d7cc08ea19944be57b8e523f50.RData ├── LICENSE ├── man ├── figures │ ├── logo.png │ ├── README-ex0-1.png │ ├── README-ex1-1.png │ ├── README-hdrcde-1.png │ ├── README-hdrcde-2.png │ ├── README-ex_methods-1.png │ ├── README-hdrcde-2-1.png │ ├── README-ex_hdr_fun_1-1.png │ ├── README-ex_hdr_fun_2-1.png │ ├── README-ex_penguins-1.png │ ├── README-ex-expand-lims-1.png │ ├── README-ex_after_stat-1.png │ ├── README-ex_after_stat-2.png │ ├── README-geom_hdr_points-1.png │ ├── README-unnamed-chunk-3-1.png │ ├── README-ex_geom_hdr_rug_1-1.png │ ├── README-ex_geom_hdr_rug_2-1.png │ ├── README-ex_geom_hdr_rug_3-1.png │ ├── README-ex_geom_hdr_rug_4-1.png │ ├── README-ex_penguins_facet-1.png │ ├── README-ex_penguins_lines-1.png │ └── README-ex_geom_hdr_points-1.png ├── ggdensity.Rd ├── method_norm_1d.Rd ├── method_mvnorm.Rd ├── method_freqpoly.Rd ├── method_freqpoly_1d.Rd ├── method_histogram_1d.Rd ├── method_kde.Rd ├── method_histogram.Rd ├── method_kde_1d.Rd ├── get_hdr_1d.Rd ├── geom_hdr_points.Rd ├── get_hdr.Rd ├── geom_hdr_points_fun.Rd ├── geom_hdr_fun.Rd ├── geom_hdr_rug_fun.Rd ├── geom_hdr_rug.Rd └── geom_hdr.Rd ├── _pkgdown.yml ├── pkgdown └── favicon │ ├── favicon.ico │ ├── favicon-16x16.png │ ├── favicon-32x32.png │ ├── apple-touch-icon.png │ ├── apple-touch-icon-60x60.png │ ├── apple-touch-icon-76x76.png │ ├── apple-touch-icon-120x120.png │ ├── apple-touch-icon-152x152.png │ └── apple-touch-icon-180x180.png ├── CRAN-SUBMISSION ├── cran-comments.md ├── tests ├── testthat │ ├── fixtures │ │ └── df_norm.rds │ ├── test-res_to_df_1d.R │ ├── test-fix_probs.R │ ├── test-res_to_df.R │ ├── test-get_hdr_1d.R │ ├── test-visual-tests.R │ ├── test-layer-wrappers.R │ └── test-get_hdr.R └── testthat.R ├── R ├── attach.R ├── ggdensity-package.R ├── helpers.R ├── hdr_lines.R ├── hdr_lines_fun.R ├── helpers-ggplot2.R ├── hdr_points.R ├── hdr_points_fun.R ├── hdr_fun.R ├── hdr_rug_fun.R ├── method_1d.R ├── hdr.R ├── get_hdr_1d.R ├── get_hdr.R ├── hdr_rug.R └── method.R ├── .gitignore ├── .Rbuildignore ├── ggdensity.Rproj ├── LICENSE.md ├── DESCRIPTION ├── NAMESPACE └── NEWS.md /.github/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | -------------------------------------------------------------------------------- /vignettes/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | *.R 3 | -------------------------------------------------------------------------------- /revdep/problems.md: -------------------------------------------------------------------------------- 1 | *Wow, no problems at all. :)* -------------------------------------------------------------------------------- /README_cache/gfm/ex1_2b50644f33f9242e4b3a2f6a7181e16f.rdb: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2021 2 | COPYRIGHT HOLDER: ggdensity authors 3 | -------------------------------------------------------------------------------- /README_cache/gfm/ex_after_stat_44cf34e134db261c5d53879c855e5cb7.rdb: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /README_cache/gfm/ex_penguins_94ce9c85c6a85209aee72043527a7434.rdb: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /README_cache/gfm/ex_geom_hdr_rug_1_db8bf282d6472d89791c1ffdc1f51256.rdb: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /README_cache/gfm/ex_geom_hdr_rug_2_c2e0d21eebd484842bbafecec73a79d3.rdb: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /README_cache/gfm/ex_geom_hdr_rug_3_3ead8bcdbc146ff881386453273ef60c.rdb: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /README_cache/gfm/ex_geom_hdr_rug_4_b9aa73d0abd62a863a4a1d04eeae98c7.rdb: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /README_cache/gfm/ex_penguins_facet_4966579da1173ac3322b3a81b44f50e6.rdb: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /README_cache/gfm/ex_penguins_lines_bbb213d7cc08ea19944be57b8e523f50.rdb: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /README_cache/gfm/unnamed-chunk-2_c4bd204dfd1419586cd1fa2f59f90193.rdb: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /README_cache/gfm/unnamed-chunk-3_5cd8ebff29d7c6ebd1d9b8e2174f490b.rdb: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /man/figures/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/man/figures/logo.png -------------------------------------------------------------------------------- /_pkgdown.yml: -------------------------------------------------------------------------------- 1 | url: https://jamesotto852.github.io/ggdensity/ 2 | template: 3 | bootstrap: 5 4 | 5 | -------------------------------------------------------------------------------- /pkgdown/favicon/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/pkgdown/favicon/favicon.ico -------------------------------------------------------------------------------- /CRAN-SUBMISSION: -------------------------------------------------------------------------------- 1 | Version: 1.0.0 2 | Date: 2023-02-09 22:57:39 UTC 3 | SHA: 54e4677246f7f7d4e50b02d4a5d61b993900c46f 4 | -------------------------------------------------------------------------------- /cran-comments.md: -------------------------------------------------------------------------------- 1 | ## R CMD check results 2 | 3 | 0 errors | 0 warnings | 0 notes 4 | 5 | * This is a new release. 6 | -------------------------------------------------------------------------------- /man/figures/README-ex0-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/man/figures/README-ex0-1.png -------------------------------------------------------------------------------- /man/figures/README-ex1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/man/figures/README-ex1-1.png -------------------------------------------------------------------------------- /man/figures/README-hdrcde-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/man/figures/README-hdrcde-1.png -------------------------------------------------------------------------------- /man/figures/README-hdrcde-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/man/figures/README-hdrcde-2.png -------------------------------------------------------------------------------- /man/figures/README-ex_methods-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/man/figures/README-ex_methods-1.png -------------------------------------------------------------------------------- /man/figures/README-hdrcde-2-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/man/figures/README-hdrcde-2-1.png -------------------------------------------------------------------------------- /pkgdown/favicon/favicon-16x16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/pkgdown/favicon/favicon-16x16.png -------------------------------------------------------------------------------- /pkgdown/favicon/favicon-32x32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/pkgdown/favicon/favicon-32x32.png -------------------------------------------------------------------------------- /revdep/.gitignore: -------------------------------------------------------------------------------- 1 | checks 2 | library 3 | checks.noindex 4 | library.noindex 5 | cloud.noindex 6 | data.sqlite 7 | *.html 8 | -------------------------------------------------------------------------------- /tests/testthat/fixtures/df_norm.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/tests/testthat/fixtures/df_norm.rds -------------------------------------------------------------------------------- /man/figures/README-ex_hdr_fun_1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/man/figures/README-ex_hdr_fun_1-1.png -------------------------------------------------------------------------------- /man/figures/README-ex_hdr_fun_2-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/man/figures/README-ex_hdr_fun_2-1.png -------------------------------------------------------------------------------- /man/figures/README-ex_penguins-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/man/figures/README-ex_penguins-1.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/pkgdown/favicon/apple-touch-icon.png -------------------------------------------------------------------------------- /man/figures/README-ex-expand-lims-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/man/figures/README-ex-expand-lims-1.png -------------------------------------------------------------------------------- /man/figures/README-ex_after_stat-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/man/figures/README-ex_after_stat-1.png -------------------------------------------------------------------------------- /man/figures/README-ex_after_stat-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/man/figures/README-ex_after_stat-2.png -------------------------------------------------------------------------------- /man/figures/README-geom_hdr_points-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/man/figures/README-geom_hdr_points-1.png -------------------------------------------------------------------------------- /man/figures/README-unnamed-chunk-3-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/man/figures/README-unnamed-chunk-3-1.png -------------------------------------------------------------------------------- /revdep/email.yml: -------------------------------------------------------------------------------- 1 | release_date: ??? 2 | rel_release_date: ??? 3 | my_news_url: ??? 4 | release_version: ??? 5 | release_details: ??? 6 | -------------------------------------------------------------------------------- /man/figures/README-ex_geom_hdr_rug_1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/man/figures/README-ex_geom_hdr_rug_1-1.png -------------------------------------------------------------------------------- /man/figures/README-ex_geom_hdr_rug_2-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/man/figures/README-ex_geom_hdr_rug_2-1.png -------------------------------------------------------------------------------- /man/figures/README-ex_geom_hdr_rug_3-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/man/figures/README-ex_geom_hdr_rug_3-1.png -------------------------------------------------------------------------------- /man/figures/README-ex_geom_hdr_rug_4-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/man/figures/README-ex_geom_hdr_rug_4-1.png -------------------------------------------------------------------------------- /man/figures/README-ex_penguins_facet-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/man/figures/README-ex_penguins_facet-1.png -------------------------------------------------------------------------------- /man/figures/README-ex_penguins_lines-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/man/figures/README-ex_penguins_lines-1.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-60x60.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/pkgdown/favicon/apple-touch-icon-60x60.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-76x76.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/pkgdown/favicon/apple-touch-icon-76x76.png -------------------------------------------------------------------------------- /man/figures/README-ex_geom_hdr_points-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/man/figures/README-ex_geom_hdr_points-1.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-120x120.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/pkgdown/favicon/apple-touch-icon-120x120.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-152x152.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/pkgdown/favicon/apple-touch-icon-152x152.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-180x180.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/pkgdown/favicon/apple-touch-icon-180x180.png -------------------------------------------------------------------------------- /README_cache/gfm/ex0_08952e0ed30fe2644cd3463413708f0d.rdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex0_08952e0ed30fe2644cd3463413708f0d.rdb -------------------------------------------------------------------------------- /README_cache/gfm/ex0_08952e0ed30fe2644cd3463413708f0d.rdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex0_08952e0ed30fe2644cd3463413708f0d.rdx -------------------------------------------------------------------------------- /README_cache/gfm/ex1_2b50644f33f9242e4b3a2f6a7181e16f.rdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex1_2b50644f33f9242e4b3a2f6a7181e16f.rdx -------------------------------------------------------------------------------- /README_cache/gfm/ex0_08952e0ed30fe2644cd3463413708f0d.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex0_08952e0ed30fe2644cd3463413708f0d.RData -------------------------------------------------------------------------------- /README_cache/gfm/ex1_2b50644f33f9242e4b3a2f6a7181e16f.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex1_2b50644f33f9242e4b3a2f6a7181e16f.RData -------------------------------------------------------------------------------- /README_cache/gfm/hdrcde_24608eda67b7b094496dd13a3d335ced.rdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/hdrcde_24608eda67b7b094496dd13a3d335ced.rdb -------------------------------------------------------------------------------- /README_cache/gfm/hdrcde_24608eda67b7b094496dd13a3d335ced.rdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/hdrcde_24608eda67b7b094496dd13a3d335ced.rdx -------------------------------------------------------------------------------- /README_cache/gfm/get-hdr_56a211107c10d29abbf2427dc2311e41.rdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/get-hdr_56a211107c10d29abbf2427dc2311e41.rdb -------------------------------------------------------------------------------- /README_cache/gfm/get-hdr_56a211107c10d29abbf2427dc2311e41.rdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/get-hdr_56a211107c10d29abbf2427dc2311e41.rdx -------------------------------------------------------------------------------- /README_cache/gfm/hdrcde_24608eda67b7b094496dd13a3d335ced.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/hdrcde_24608eda67b7b094496dd13a3d335ced.RData -------------------------------------------------------------------------------- /README_cache/gfm/ex_methods_5cdc47a930db4912fb0c1e999fdb87b6.rdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex_methods_5cdc47a930db4912fb0c1e999fdb87b6.rdb -------------------------------------------------------------------------------- /README_cache/gfm/ex_methods_5cdc47a930db4912fb0c1e999fdb87b6.rdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex_methods_5cdc47a930db4912fb0c1e999fdb87b6.rdx -------------------------------------------------------------------------------- /README_cache/gfm/ex_penguins_94ce9c85c6a85209aee72043527a7434.rdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex_penguins_94ce9c85c6a85209aee72043527a7434.rdx -------------------------------------------------------------------------------- /README_cache/gfm/get-hdr-1d_9207c40522aca39a887fed29e70de154.rdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/get-hdr-1d_9207c40522aca39a887fed29e70de154.rdb -------------------------------------------------------------------------------- /README_cache/gfm/get-hdr-1d_9207c40522aca39a887fed29e70de154.rdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/get-hdr-1d_9207c40522aca39a887fed29e70de154.rdx -------------------------------------------------------------------------------- /README_cache/gfm/get-hdr_56a211107c10d29abbf2427dc2311e41.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/get-hdr_56a211107c10d29abbf2427dc2311e41.RData -------------------------------------------------------------------------------- /README_cache/gfm/ex_after_stat_44cf34e134db261c5d53879c855e5cb7.rdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex_after_stat_44cf34e134db261c5d53879c855e5cb7.rdx -------------------------------------------------------------------------------- /README_cache/gfm/ex_hdr_fun_1_5473b5f5faa0b7a2127a88e22ad8c80d.rdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex_hdr_fun_1_5473b5f5faa0b7a2127a88e22ad8c80d.rdb -------------------------------------------------------------------------------- /README_cache/gfm/ex_hdr_fun_1_5473b5f5faa0b7a2127a88e22ad8c80d.rdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex_hdr_fun_1_5473b5f5faa0b7a2127a88e22ad8c80d.rdx -------------------------------------------------------------------------------- /README_cache/gfm/ex_hdr_fun_2_14673d5a49ce5b7dd18d93502bd4d281.rdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex_hdr_fun_2_14673d5a49ce5b7dd18d93502bd4d281.rdb -------------------------------------------------------------------------------- /README_cache/gfm/ex_hdr_fun_2_14673d5a49ce5b7dd18d93502bd4d281.rdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex_hdr_fun_2_14673d5a49ce5b7dd18d93502bd4d281.rdx -------------------------------------------------------------------------------- /README_cache/gfm/ex_methods_5cdc47a930db4912fb0c1e999fdb87b6.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex_methods_5cdc47a930db4912fb0c1e999fdb87b6.RData -------------------------------------------------------------------------------- /README_cache/gfm/ex_penguins_94ce9c85c6a85209aee72043527a7434.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex_penguins_94ce9c85c6a85209aee72043527a7434.RData -------------------------------------------------------------------------------- /README_cache/gfm/get-hdr-1d_9207c40522aca39a887fed29e70de154.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/get-hdr-1d_9207c40522aca39a887fed29e70de154.RData -------------------------------------------------------------------------------- /README_cache/gfm/ex-expand-lims_b653f06305dd4a631453adfca8caca66.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex-expand-lims_b653f06305dd4a631453adfca8caca66.RData -------------------------------------------------------------------------------- /README_cache/gfm/ex-expand-lims_b653f06305dd4a631453adfca8caca66.rdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex-expand-lims_b653f06305dd4a631453adfca8caca66.rdb -------------------------------------------------------------------------------- /README_cache/gfm/ex-expand-lims_b653f06305dd4a631453adfca8caca66.rdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex-expand-lims_b653f06305dd4a631453adfca8caca66.rdx -------------------------------------------------------------------------------- /README_cache/gfm/ex_after_stat_44cf34e134db261c5d53879c855e5cb7.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex_after_stat_44cf34e134db261c5d53879c855e5cb7.RData -------------------------------------------------------------------------------- /README_cache/gfm/ex_hdr_fun_1_5473b5f5faa0b7a2127a88e22ad8c80d.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex_hdr_fun_1_5473b5f5faa0b7a2127a88e22ad8c80d.RData -------------------------------------------------------------------------------- /README_cache/gfm/ex_hdr_fun_2_14673d5a49ce5b7dd18d93502bd4d281.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex_hdr_fun_2_14673d5a49ce5b7dd18d93502bd4d281.RData -------------------------------------------------------------------------------- /README_cache/gfm/geom_hdr_points_db8f7c026af53acd936d8266c73964ed.rdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/geom_hdr_points_db8f7c026af53acd936d8266c73964ed.rdb -------------------------------------------------------------------------------- /README_cache/gfm/geom_hdr_points_db8f7c026af53acd936d8266c73964ed.rdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/geom_hdr_points_db8f7c026af53acd936d8266c73964ed.rdx -------------------------------------------------------------------------------- /README_cache/gfm/unnamed-chunk-2_c4bd204dfd1419586cd1fa2f59f90193.rdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/unnamed-chunk-2_c4bd204dfd1419586cd1fa2f59f90193.rdx -------------------------------------------------------------------------------- /README_cache/gfm/unnamed-chunk-3_5cd8ebff29d7c6ebd1d9b8e2174f490b.rdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/unnamed-chunk-3_5cd8ebff29d7c6ebd1d9b8e2174f490b.rdx -------------------------------------------------------------------------------- /R/attach.R: -------------------------------------------------------------------------------- 1 | .onAttach <- function(...) { 2 | if(!interactive() || stats::runif(1) > 0.1) return() 3 | packageStartupMessage(' Please cite ggdensity! See citation("ggdensity") for details.') 4 | } 5 | -------------------------------------------------------------------------------- /README_cache/gfm/__packages: -------------------------------------------------------------------------------- 1 | ggplot2 2 | ggdensity 3 | palmerpenguins 4 | purrr 5 | patchwork 6 | tidyverse 7 | tibble 8 | tidyr 9 | readr 10 | dplyr 11 | stringr 12 | forcats 13 | hdrcde 14 | -------------------------------------------------------------------------------- /README_cache/gfm/ex_geom_hdr_points_047805f081167ca75d87bbbd5a0a2349.rdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex_geom_hdr_points_047805f081167ca75d87bbbd5a0a2349.rdb -------------------------------------------------------------------------------- /README_cache/gfm/ex_geom_hdr_points_047805f081167ca75d87bbbd5a0a2349.rdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex_geom_hdr_points_047805f081167ca75d87bbbd5a0a2349.rdx -------------------------------------------------------------------------------- /README_cache/gfm/ex_geom_hdr_rug_1_db8bf282d6472d89791c1ffdc1f51256.rdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex_geom_hdr_rug_1_db8bf282d6472d89791c1ffdc1f51256.rdx -------------------------------------------------------------------------------- /README_cache/gfm/ex_geom_hdr_rug_2_c2e0d21eebd484842bbafecec73a79d3.rdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex_geom_hdr_rug_2_c2e0d21eebd484842bbafecec73a79d3.rdx -------------------------------------------------------------------------------- /README_cache/gfm/ex_geom_hdr_rug_3_3ead8bcdbc146ff881386453273ef60c.rdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex_geom_hdr_rug_3_3ead8bcdbc146ff881386453273ef60c.rdx -------------------------------------------------------------------------------- /README_cache/gfm/ex_geom_hdr_rug_4_b9aa73d0abd62a863a4a1d04eeae98c7.rdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex_geom_hdr_rug_4_b9aa73d0abd62a863a4a1d04eeae98c7.rdx -------------------------------------------------------------------------------- /README_cache/gfm/ex_penguins_facet_4966579da1173ac3322b3a81b44f50e6.rdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex_penguins_facet_4966579da1173ac3322b3a81b44f50e6.rdx -------------------------------------------------------------------------------- /README_cache/gfm/ex_penguins_lines_bbb213d7cc08ea19944be57b8e523f50.rdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex_penguins_lines_bbb213d7cc08ea19944be57b8e523f50.rdx -------------------------------------------------------------------------------- /README_cache/gfm/geom_hdr_points_db8f7c026af53acd936d8266c73964ed.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/geom_hdr_points_db8f7c026af53acd936d8266c73964ed.RData -------------------------------------------------------------------------------- /README_cache/gfm/unnamed-chunk-2_c4bd204dfd1419586cd1fa2f59f90193.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/unnamed-chunk-2_c4bd204dfd1419586cd1fa2f59f90193.RData -------------------------------------------------------------------------------- /README_cache/gfm/unnamed-chunk-3_5cd8ebff29d7c6ebd1d9b8e2174f490b.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/unnamed-chunk-3_5cd8ebff29d7c6ebd1d9b8e2174f490b.RData -------------------------------------------------------------------------------- /README_cache/gfm/ex_geom_hdr_points_047805f081167ca75d87bbbd5a0a2349.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex_geom_hdr_points_047805f081167ca75d87bbbd5a0a2349.RData -------------------------------------------------------------------------------- /README_cache/gfm/ex_geom_hdr_rug_1_db8bf282d6472d89791c1ffdc1f51256.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex_geom_hdr_rug_1_db8bf282d6472d89791c1ffdc1f51256.RData -------------------------------------------------------------------------------- /README_cache/gfm/ex_geom_hdr_rug_2_c2e0d21eebd484842bbafecec73a79d3.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex_geom_hdr_rug_2_c2e0d21eebd484842bbafecec73a79d3.RData -------------------------------------------------------------------------------- /README_cache/gfm/ex_geom_hdr_rug_3_3ead8bcdbc146ff881386453273ef60c.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex_geom_hdr_rug_3_3ead8bcdbc146ff881386453273ef60c.RData -------------------------------------------------------------------------------- /README_cache/gfm/ex_geom_hdr_rug_4_b9aa73d0abd62a863a4a1d04eeae98c7.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex_geom_hdr_rug_4_b9aa73d0abd62a863a4a1d04eeae98c7.RData -------------------------------------------------------------------------------- /README_cache/gfm/ex_penguins_facet_4966579da1173ac3322b3a81b44f50e6.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex_penguins_facet_4966579da1173ac3322b3a81b44f50e6.RData -------------------------------------------------------------------------------- /README_cache/gfm/ex_penguins_lines_bbb213d7cc08ea19944be57b8e523f50.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jamesotto852/ggdensity/HEAD/README_cache/gfm/ex_penguins_lines_bbb213d7cc08ea19944be57b8e523f50.RData -------------------------------------------------------------------------------- /revdep/cran.md: -------------------------------------------------------------------------------- 1 | ## revdepcheck results 2 | 3 | We checked 1 reverse dependencies (0 from CRAN + 1 from Bioconductor), comparing R CMD check results across CRAN and dev versions of this package. 4 | 5 | * We saw 0 new problems 6 | * We failed to check 0 packages 7 | 8 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .Rdata 4 | .httr-oauth 5 | .DS_Store 6 | docs 7 | inst/doc 8 | /doc/ 9 | /Meta/ 10 | 11 | revdep/checks 12 | revdep/library 13 | revdep/checks.noindex 14 | revdep/library.noindex 15 | revdep/data.sqlite 16 | revdep/cloud.noindex 17 | -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^ggdensity\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^LICENSE\.md$ 4 | ^README\.Rmd$ 5 | ^README\.md$ 6 | ^README_cache$ 7 | ^man/figures$ 8 | ^_pkgdown\.yml$ 9 | ^docs$ 10 | ^pkgdown$ 11 | ^\.github$ 12 | ^CRAN-SUBMISSION$ 13 | ^doc$ 14 | ^Meta$ 15 | ^revdep$ 16 | ^cran-comments\.md$ 17 | -------------------------------------------------------------------------------- /revdep/failures.md: -------------------------------------------------------------------------------- 1 | # ggblanket 2 | 3 |
4 | 5 | * Version: 6 | * GitHub: https://github.com/jamesotto852/ggdensity 7 | * Source code: NA 8 | * Number of recursive dependencies: 0 9 | 10 |
11 | 12 | ## Error before installation 13 | 14 | ### Devel 15 | 16 | ``` 17 | 18 | 19 | 20 | 21 | 22 | 23 | ``` 24 | ### CRAN 25 | 26 | ``` 27 | 28 | 29 | 30 | 31 | 32 | 33 | ``` 34 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | # This file is part of the standard setup for testthat. 2 | # It is recommended that you do not modify it. 3 | # 4 | # Where should you do additional test configuration? 5 | # Learn more about the roles of various files in: 6 | # * https://r-pkgs.org/tests.html 7 | # * https://testthat.r-lib.org/reference/test_package.html#special-files 8 | 9 | library(testthat) 10 | library(ggdensity) 11 | 12 | test_check("ggdensity") 13 | -------------------------------------------------------------------------------- /ggdensity.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: No 4 | SaveWorkspace: No 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: knitr 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | LineEndingConversion: Posix 18 | 19 | BuildType: Package 20 | PackageUseDevtools: Yes 21 | PackageInstallArgs: --no-multiarch --with-keep.source 22 | PackageRoxygenize: rd,collate,namespace 23 | -------------------------------------------------------------------------------- /man/ggdensity.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ggdensity-package.R 3 | \docType{package} 4 | \name{ggdensity} 5 | \alias{ggdensity} 6 | \alias{package-ggdensity} 7 | \title{ggdensity: Stats and Geoms for Density Estimation with ggplot2} 8 | \description{ 9 | A package that allows more flexible computations for visualization of density 10 | estimates with ggplot2. 11 | } 12 | \seealso{ 13 | Useful links: 14 | \itemize{ 15 | \item \url{https://jamesotto852.github.io/ggdensity/} 16 | \item \url{https://github.com/jamesotto852/ggdensity/} 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /R/ggdensity-package.R: -------------------------------------------------------------------------------- 1 | #' ggdensity: Stats and Geoms for Density Estimation with ggplot2 2 | #' 3 | #' A package that allows more flexible computations for visualization of density 4 | #' estimates with ggplot2. 5 | #' 6 | #' @seealso 7 | #' 8 | #' Useful links: 9 | #' * \url{https://jamesotto852.github.io/ggdensity/} 10 | #' * \url{https://github.com/jamesotto852/ggdensity/} 11 | #' 12 | #' @import ggplot2 13 | #' @importFrom MASS bandwidth.nrd kde2d 14 | #' @importFrom stats uniroot cov pchisq setNames sd cor dnorm 15 | #' @docType package 16 | #' @name ggdensity 17 | #' @aliases ggdensity package-ggdensity 18 | NULL 19 | -------------------------------------------------------------------------------- /tests/testthat/test-res_to_df_1d.R: -------------------------------------------------------------------------------- 1 | test_that("res_to_df_1d returns correct structure for each value of output", { 2 | 3 | data <- readRDS(test_path("fixtures", "df_norm.rds")) 4 | probs <- c(.99, .95, .80, .50) 5 | 6 | res <- get_hdr_1d(data$x, method_kde_1d(), probs) 7 | 8 | # Checking output == "rug" 9 | df_rug <- res_to_df_1d(res, probs, group = 1, output = "rug") 10 | expect_type(df_rug, "list") 11 | expect_equal(colnames(df_rug), c("x", "fhat", "fhat_discretized", "probs")) 12 | expect(is.ordered(df_rug$probs), "probs is an ordered object") 13 | expect_equal(levels(df_rug$probs), scales::percent_format(accuracy = 1)(probs)) 14 | 15 | }) 16 | -------------------------------------------------------------------------------- /tests/testthat/test-fix_probs.R: -------------------------------------------------------------------------------- 1 | test_that("fix_probs() works as intended", { 2 | 3 | # Check defaults 4 | expect_equal(fix_probs(c(.99, .95, .80, .50)), c(.99, .95, .80, .50)) 5 | 6 | # Reorders probabilities correctly 7 | expect_equal(fix_probs(c(.80, .50, .99, .95)), c(.99, .95, .80, .50)) 8 | 9 | # Works with vectors of length 1 10 | expect_equal(fix_probs(.50), .5) 11 | 12 | # Issues error if any probabilites are outside (0, 1) 13 | expect_error(fix_probs(c(1.1, .80, .5)), regexp = "must be between") 14 | expect_error(fix_probs(c(.80, .5, -1)), regexp = "must be between") 15 | expect_error(fix_probs(c(1)), regexp = "must be between") 16 | expect_error(fix_probs(c(0)), regexp = "must be between") 17 | 18 | }) 19 | -------------------------------------------------------------------------------- /.github/workflows/check-release.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | branches: [main, master] 8 | 9 | name: R-CMD-check 10 | 11 | jobs: 12 | R-CMD-check: 13 | runs-on: ubuntu-latest 14 | env: 15 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 16 | R_KEEP_PKG_SOURCE: yes 17 | steps: 18 | - uses: actions/checkout@v3 19 | 20 | - uses: r-lib/actions/setup-r@v2 21 | with: 22 | use-public-rspm: true 23 | 24 | - uses: r-lib/actions/setup-r-dependencies@v2 25 | with: 26 | extra-packages: any::rcmdcheck 27 | needs: check 28 | 29 | - uses: r-lib/actions/check-r-package@v2 30 | -------------------------------------------------------------------------------- /man/method_norm_1d.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/method_1d.R 3 | \name{method_norm_1d} 4 | \alias{method_norm_1d} 5 | \title{Univariate parametric normal HDR estimator} 6 | \usage{ 7 | method_norm_1d() 8 | } 9 | \description{ 10 | Function used to specify univariate normal density estimator 11 | for \code{get_hdr_1d()} and layer functions (e.g. \code{geom_hdr_rug()}). 12 | } 13 | \details{ 14 | For more details on the use and implementation of the \verb{method_*_1d()} functions, 15 | see \code{vignette("method", "ggdensity")}. 16 | } 17 | \examples{ 18 | # Normal estimators are useful when an assumption of normality is appropriate 19 | df <- data.frame(x = rnorm(1e3)) 20 | 21 | ggplot(df, aes(x)) + 22 | geom_hdr_rug(method = method_norm_1d()) + 23 | geom_density() 24 | 25 | # Can also be used with `get_hdr_1d()` for numerical summary of HDRs 26 | res <- get_hdr_1d(df$x, method = method_norm_1d()) 27 | str(res) 28 | 29 | } 30 | -------------------------------------------------------------------------------- /man/method_mvnorm.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/method.R 3 | \name{method_mvnorm} 4 | \alias{method_mvnorm} 5 | \title{Bivariate parametric normal HDR estimator} 6 | \usage{ 7 | method_mvnorm() 8 | } 9 | \description{ 10 | Function used to specify bivariate normal density estimator 11 | for \code{get_hdr()} and layer functions (e.g. \code{geom_hdr()}). 12 | } 13 | \details{ 14 | For more details on the use and implementation of the \verb{method_*()} functions, 15 | see \code{vignette("method", "ggdensity")}. 16 | } 17 | \examples{ 18 | # Normal estimator is useful when an assumption of normality is appropriate 19 | set.seed(1) 20 | df <- data.frame(x = rnorm(1e3), y = rnorm(1e3)) 21 | 22 | ggplot(df, aes(x, y)) + 23 | geom_hdr(method = method_mvnorm(), xlim = c(-4, 4), ylim = c(-4, 4)) + 24 | geom_point(size = 1) 25 | 26 | # Can also be used with `get_hdr()` for numerical summary of HDRs 27 | res <- get_hdr(df, method = method_mvnorm()) 28 | str(res) 29 | 30 | } 31 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | Copyright (c) 2021 ggdensity authors 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /R/helpers.R: -------------------------------------------------------------------------------- 1 | # this script contains several unexported helper functions 2 | 3 | # normalization/scaling functions 4 | normalize <- function(v) v / sum(v) 5 | standardize <- function(v, min = min(v), max = max(v)) (v - min) / (max - min) 6 | rescale <- function(v) v / max(v) 7 | 8 | # discrete approximation to P[f^(X,Y) >= c] 9 | prob_above_c <- function(df, c) { 10 | if (length(c) > 1) return(vapply(c, prob_above_c, df = df, numeric(1))) 11 | with(df, sum(fhat_discretized[fhat >= c])) 12 | } 13 | 14 | # numerical approximation for finding hdr 15 | # if method = "histogram", don't want to use uniroot, runs into issue if n is small 16 | find_cutoff <- function(df, conf, uniroot = TRUE) { 17 | 18 | if (length(conf) > 1) return(vapply(conf, function(x) find_cutoff(df, x, uniroot), numeric(1))) 19 | 20 | # the following is set to FALSE to override when other code calls 21 | # the function with uniroot = TRUE, remove once we're confident the 22 | # code works in all circumstances 23 | if (FALSE) { 24 | 25 | uniroot(function(c) prob_above_c(df, c) - conf, lower = 0, upper = 1)$root 26 | 27 | } else { 28 | 29 | # sort df rows by fhat 30 | df <- df[order(df$fhat, decreasing = TRUE),] 31 | 32 | # compute cumsum of probs 33 | df$cumprob <- cumsum(df$fhat_discretized) 34 | 35 | # determine cutoff 36 | max(df[df$cumprob >= conf,]$fhat) 37 | 38 | } 39 | 40 | } 41 | -------------------------------------------------------------------------------- /tests/testthat/test-res_to_df.R: -------------------------------------------------------------------------------- 1 | test_that("res_to_df returns correct structure for each value of output", { 2 | 3 | data <- readRDS(test_path("fixtures", "df_norm.rds")) 4 | probs <- c(.99, .95, .80, .50) 5 | 6 | res <- get_hdr(data, method_kde(), probs) 7 | 8 | # Checking output == "bands" 9 | df_bands <- res_to_df(res, probs, group = 1, output = "bands") 10 | expect_type(df_bands, "list") 11 | expect_equal(colnames(df_bands), c("x", "y", "piece", "group", "subgroup", ".size", "probs")) 12 | expect(is.ordered(df_bands$probs), "probs is an ordered object") 13 | expect_equal(levels(df_bands$probs), scales::percent_format(accuracy = 1)(probs)) 14 | 15 | # Checking output == "lines" 16 | df_lines <- res_to_df(res, probs, group = 1, output = "lines") 17 | expect_type(df_lines, "list") 18 | expect_equal(colnames(df_lines), c("x", "y", "piece", "group", ".size", "probs")) 19 | expect(is.ordered(df_lines$probs), "probs is an ordered object") 20 | expect_equal(levels(df_lines$probs), scales::percent_format(accuracy = 1)(probs)) 21 | 22 | # Checking output == "points" 23 | df_points <- res_to_df(res, probs, group = 1, output = "points") 24 | expect_type(df_points, "list") 25 | expect_equal(colnames(df_points), c("x", "y", "probs")) 26 | expect(is.ordered(df_points$probs), "probs is an ordered object") 27 | expect_equal(levels(df_points$probs), scales::percent_format(accuracy = 1)(c(1, probs))) 28 | 29 | }) 30 | -------------------------------------------------------------------------------- /man/method_freqpoly.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/method.R 3 | \name{method_freqpoly} 4 | \alias{method_freqpoly} 5 | \title{Bivariate frequency polygon HDR estimator} 6 | \usage{ 7 | method_freqpoly(bins = NULL) 8 | } 9 | \arguments{ 10 | \item{bins}{Number of bins along each axis. 11 | Either a vector of length 2 or a scalar value which is recycled for both dimensions. 12 | Defaults to normal reference rule (Scott, pg 87).} 13 | } 14 | \description{ 15 | Function used to specify bivariate frequency polygon density estimator 16 | for \code{get_hdr()} and layer functions (e.g. \code{geom_hdr()}). 17 | } 18 | \details{ 19 | For more details on the use and implementation of the \verb{method_*()} functions, 20 | see \code{vignette("method", "ggdensity")}. 21 | } 22 | \examples{ 23 | set.seed(1) 24 | df <- data.frame(x = rnorm(1e3), y = rnorm(1e3)) 25 | 26 | ggplot(df, aes(x, y)) + 27 | geom_hdr(method = method_freqpoly()) + 28 | geom_point(size = 1) 29 | 30 | # The resolution of the frequency polygon estimator can be set via `bins` 31 | ggplot(df, aes(x, y)) + 32 | geom_hdr(method = method_freqpoly(bins = c(8, 25))) + 33 | geom_point(size = 1) 34 | 35 | # Can also be used with `get_hdr()` for numerical summary of HDRs 36 | res <- get_hdr(df, method = method_freqpoly()) 37 | str(res) 38 | 39 | } 40 | \references{ 41 | Scott, David W. Multivariate Density Estimation (2e), Wiley. 42 | } 43 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: ggdensity 2 | Title: Interpretable Bivariate Density Visualization with 'ggplot2' 3 | Version: 1.0.0.900 4 | Authors@R: 5 | c(person(given = "James", 6 | family = "Otto", 7 | role = c("aut", "cre"), 8 | email = "jamesotto852@gmail.com", 9 | comment = c(ORCID = "0000-0002-0665-2515")), 10 | person(given = "David", 11 | family = "Kahle", 12 | role = c("aut"), 13 | email = "david@kahle.io", 14 | comment = c(ORCID = "0000-0002-9999-1558"))) 15 | Description: The 'ggplot2' package provides simple functions for visualizing contours 16 | of 2-d kernel density estimates. 'ggdensity' implements several additional density estimators 17 | as well as more interpretable visualizations based on highest density regions instead of 18 | the traditional height of the estimated density surface. 19 | License: MIT + file LICENSE 20 | Encoding: UTF-8 21 | Roxygen: list(markdown = TRUE) 22 | RoxygenNote: 7.2.3 23 | Depends: 24 | ggplot2 25 | Imports: 26 | isoband, 27 | vctrs, 28 | tibble, 29 | MASS, 30 | stats, 31 | scales 32 | URL: https://jamesotto852.github.io/ggdensity/, https://github.com/jamesotto852/ggdensity/ 33 | BugReports: https://github.com/jamesotto852/ggdensity/issues/ 34 | Suggests: 35 | vdiffr, 36 | testthat (>= 3.0.0), 37 | knitr, 38 | rmarkdown 39 | Config/testthat/edition: 3 40 | VignetteBuilder: knitr 41 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(GeomHdr) 4 | export(GeomHdrFun) 5 | export(GeomHdrLines) 6 | export(GeomHdrLinesFun) 7 | export(GeomHdrRug) 8 | export(GeomHdrRugFun) 9 | export(StatHdr) 10 | export(StatHdrFun) 11 | export(StatHdrLines) 12 | export(StatHdrLinesFun) 13 | export(StatHdrPoints) 14 | export(StatHdrPointsFun) 15 | export(StatHdrRug) 16 | export(StatHdrRugFun) 17 | export(geom_hdr) 18 | export(geom_hdr_fun) 19 | export(geom_hdr_lines) 20 | export(geom_hdr_lines_fun) 21 | export(geom_hdr_points) 22 | export(geom_hdr_points_fun) 23 | export(geom_hdr_rug) 24 | export(geom_hdr_rug_fun) 25 | export(get_hdr) 26 | export(get_hdr_1d) 27 | export(method_freqpoly) 28 | export(method_freqpoly_1d) 29 | export(method_histogram) 30 | export(method_histogram_1d) 31 | export(method_kde) 32 | export(method_kde_1d) 33 | export(method_mvnorm) 34 | export(method_norm_1d) 35 | export(stat_hdr) 36 | export(stat_hdr_fun) 37 | export(stat_hdr_lines) 38 | export(stat_hdr_lines_fun) 39 | export(stat_hdr_points) 40 | export(stat_hdr_points_fun) 41 | export(stat_hdr_rug) 42 | export(stat_hdr_rug_fun) 43 | import(ggplot2) 44 | importFrom(MASS,bandwidth.nrd) 45 | importFrom(MASS,kde2d) 46 | importFrom(scales,percent) 47 | importFrom(scales,percent_format) 48 | importFrom(stats,cor) 49 | importFrom(stats,cov) 50 | importFrom(stats,dnorm) 51 | importFrom(stats,pchisq) 52 | importFrom(stats,sd) 53 | importFrom(stats,setNames) 54 | importFrom(stats,uniroot) 55 | -------------------------------------------------------------------------------- /man/method_freqpoly_1d.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/method_1d.R 3 | \name{method_freqpoly_1d} 4 | \alias{method_freqpoly_1d} 5 | \title{Univariate frequency polygon HDR estimator} 6 | \usage{ 7 | method_freqpoly_1d(bins = NULL) 8 | } 9 | \arguments{ 10 | \item{bins}{Number of bins. Defaults to normal reference rule (Scott, pg 59).} 11 | } 12 | \description{ 13 | Function used to specify univariate frequency polygon density estimator 14 | for \code{get_hdr_1d()} and layer functions (e.g. \code{geom_hdr_rug()}). 15 | } 16 | \details{ 17 | For more details on the use and implementation of the \verb{method_*_1d()} functions, 18 | see \code{vignette("method", "ggdensity")}. 19 | } 20 | \examples{ 21 | df <- data.frame(x = rnorm(1e3)) 22 | 23 | # Strip chart to visualize 1-d data 24 | p <- ggplot(df, aes(x)) + 25 | geom_jitter(aes(y = 0), width = 0, height = 2) + 26 | scale_y_continuous(name = NULL, breaks = NULL) + 27 | coord_cartesian(ylim = c(-3, 3)) 28 | 29 | p 30 | 31 | p + geom_hdr_rug(method = method_freqpoly_1d()) 32 | 33 | # The resolution of the frequency polygon estimator can be set via `bins` 34 | p + geom_hdr_rug(method = method_freqpoly_1d(bins = 100)) 35 | 36 | # Can also be used with `get_hdr_1d()` for numerical summary of HDRs 37 | res <- get_hdr_1d(df$x, method = method_freqpoly_1d()) 38 | str(res) 39 | 40 | } 41 | \references{ 42 | Scott, David W. Multivariate Density Estimation (2e), Wiley. 43 | } 44 | -------------------------------------------------------------------------------- /.github/workflows/pkgdown.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | branches: [main, master] 8 | release: 9 | types: [published] 10 | workflow_dispatch: 11 | 12 | name: pkgdown 13 | 14 | jobs: 15 | pkgdown: 16 | runs-on: ubuntu-latest 17 | # Only restrict concurrency for non-PR jobs 18 | concurrency: 19 | group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} 20 | env: 21 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 22 | steps: 23 | - uses: actions/checkout@v3 24 | 25 | - uses: r-lib/actions/setup-pandoc@v2 26 | 27 | - uses: r-lib/actions/setup-r@v2 28 | with: 29 | use-public-rspm: true 30 | 31 | - uses: r-lib/actions/setup-r-dependencies@v2 32 | with: 33 | extra-packages: any::pkgdown, local::. 34 | needs: website 35 | 36 | - name: Build site 37 | run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) 38 | shell: Rscript {0} 39 | 40 | - name: Deploy to GitHub pages 🚀 41 | if: github.event_name != 'pull_request' 42 | uses: JamesIves/github-pages-deploy-action@v4.4.1 43 | with: 44 | clean: false 45 | branch: gh-pages 46 | folder: docs 47 | -------------------------------------------------------------------------------- /man/method_histogram_1d.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/method_1d.R 3 | \name{method_histogram_1d} 4 | \alias{method_histogram_1d} 5 | \title{Univariate histogram HDR estimator} 6 | \usage{ 7 | method_histogram_1d(bins = NULL) 8 | } 9 | \arguments{ 10 | \item{bins}{Number of bins. Defaults to normal reference rule (Scott, pg 59).} 11 | } 12 | \description{ 13 | Function used to specify univariate histogram density estimator 14 | for \code{get_hdr_1d()} and layer functions (e.g. \code{geom_hdr_rug()}). 15 | } 16 | \details{ 17 | For more details on the use and implementation of the \verb{method_*_1d()} functions, 18 | see \code{vignette("method", "ggdensity")}. 19 | } 20 | \examples{ 21 | # Histogram estimators can be useful when data has boundary constraints 22 | df <- data.frame(x = rexp(1e3)) 23 | 24 | # Strip chart to visualize 1-d data 25 | p <- ggplot(df, aes(x)) + 26 | geom_jitter(aes(y = 0), width = 0, height = 2) + 27 | scale_y_continuous(name = NULL, breaks = NULL) + 28 | coord_cartesian(ylim = c(-3, 3)) 29 | 30 | p 31 | 32 | p + geom_hdr_rug(method = method_histogram_1d()) 33 | 34 | # The resolution of the histogram estimator can be set via `bins` 35 | p + geom_hdr_rug(method = method_histogram_1d(bins = 5)) 36 | 37 | # Can also be used with `get_hdr_1d()` for numerical summary of HDRs 38 | res <- get_hdr_1d(df$x, method = method_histogram_1d()) 39 | str(res) 40 | 41 | } 42 | \references{ 43 | Scott, David W. Multivariate Density Estimation (2e), Wiley. 44 | } 45 | -------------------------------------------------------------------------------- /.github/workflows/test-coverage.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | branches: [main, master] 8 | 9 | name: test-coverage 10 | 11 | jobs: 12 | test-coverage: 13 | runs-on: ubuntu-latest 14 | env: 15 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 16 | 17 | steps: 18 | - uses: actions/checkout@v3 19 | 20 | - uses: r-lib/actions/setup-r@v2 21 | with: 22 | use-public-rspm: true 23 | 24 | - uses: r-lib/actions/setup-r-dependencies@v2 25 | with: 26 | extra-packages: any::covr 27 | needs: coverage 28 | 29 | - name: Test coverage 30 | run: | 31 | covr::codecov( 32 | quiet = FALSE, 33 | clean = FALSE, 34 | install_path = file.path(Sys.getenv("RUNNER_TEMP"), "package") 35 | ) 36 | shell: Rscript {0} 37 | 38 | - name: Show testthat output 39 | if: always() 40 | run: | 41 | ## -------------------------------------------------------------------- 42 | find ${{ runner.temp }}/package -name 'testthat.Rout*' -exec cat '{}' \; || true 43 | shell: bash 44 | 45 | - name: Upload test results 46 | if: failure() 47 | uses: actions/upload-artifact@v3 48 | with: 49 | name: coverage-test-failures 50 | path: ${{ runner.temp }}/package 51 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # ggdensity (development version) 2 | 3 | * Fixed ordering of probabilities in the plot legend to be independent of order specified in `probs` argument (Reported by @z3tt #32) 4 | 5 | # ggdensity 1.0.0 6 | 7 | ## Features 8 | 9 | * Added `get_hdr()` and `get_hdr_1d()` functions, 10 | exporting implementation of HDR computations (Suggested by @eliocamp #28) 11 | 12 | * Reworked `method` argument, allowing for either character or function call specification. 13 | Implemented related `method_*()` and `method_*_1d()` functions (e.g. `method_kde()` and `method_kde_1d()`). 14 | See `?get_hdr` or `vignette("method", "ggdensity")` for details (Suggested by @eliocamp #29) 15 | 16 | * Added unit tests (Suggested by @eliocamp, #30) 17 | 18 | ## Breaking Changes 19 | 20 | * Removed arguments governing density estimators from `stat_hdr()` and other layer functions--these 21 | are now specified with `method_*()` and `method_*_1d()` functions 22 | 23 | ## Fixes 24 | 25 | * [Added support](https://www.tidyverse.org/blog/2022/08/ggplot2-3-4-0-size-to-linewidth/) for the new `linewidth` aesthetic (Reported by @eliocamp, #23) 26 | 27 | # ggdensity 0.1.1 28 | 29 | ## Fixes 30 | 31 | * Removed **ggplot2** build-time dependencies (Reported by @thomasp85, #21) 32 | 33 | * Fixed bug in `stat_hdr_lines_fun()` which drew lines between components of disconnected HDRs (Reported by @afranks86, #20) 34 | 35 | 36 | # ggdensity 0.1.0 37 | 38 | ## Features 39 | 40 | * Added `geom`/`stat_hdr_rug()` for visualizing marginal HDRs via "rug plot" 41 | style graphics along plot axes (#14) 42 | 43 | * Added `geom`/`stat_hdr_points()` and `geom`/`stat_hdr_points_fun()` for 44 | visualizing HDR membership of points via a colored scatterplot (#15) 45 | 46 | ## Fixes 47 | 48 | * Changed name of computed variable in all stat functions from `level` to `probs` 49 | -------------------------------------------------------------------------------- /man/method_kde.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/method.R 3 | \name{method_kde} 4 | \alias{method_kde} 5 | \title{Bivariate kernel density HDR estimator} 6 | \usage{ 7 | method_kde(h = NULL, adjust = c(1, 1)) 8 | } 9 | \arguments{ 10 | \item{h}{Bandwidth (vector of length two). If \code{NULL}, estimated 11 | using \code{\link[MASS:bandwidth.nrd]{MASS::bandwidth.nrd()}}.} 12 | 13 | \item{adjust}{A multiplicative bandwidth adjustment to be used if 'h' is 14 | 'NULL'. This makes it possible to adjust the bandwidth while still 15 | using the a bandwidth estimator. For example, \code{adjust = 1/2} means 16 | use half of the default bandwidth.} 17 | } 18 | \description{ 19 | Function used to specify bivariate kernel density estimator 20 | for \code{get_hdr()} and layer functions (e.g. \code{geom_hdr()}). 21 | } 22 | \details{ 23 | For more details on the use and implementation of the \verb{method_*()} functions, 24 | see \code{vignette("method", "ggdensity")}. 25 | } 26 | \examples{ 27 | set.seed(1) 28 | df <- data.frame(x = rnorm(1e3, sd = 3), y = rnorm(1e3, sd = 3)) 29 | 30 | ggplot(df, aes(x, y)) + 31 | geom_hdr(method = method_kde()) + 32 | geom_point(size = 1) 33 | 34 | # The defaults of `method_kde()` are the same as the estimator for `ggplot2::geom_density_2d()` 35 | ggplot(df, aes(x, y)) + 36 | geom_density_2d_filled() + 37 | geom_hdr_lines(method = method_kde(), probs = seq(.1, .9, by = .1)) + 38 | theme(legend.position = "none") 39 | 40 | # The bandwidth of the estimator can be set directly with `h` or scaled with `adjust` 41 | ggplot(df, aes(x, y)) + 42 | geom_hdr(method = method_kde(h = 1)) + 43 | geom_point(size = 1) 44 | 45 | ggplot(df, aes(x, y)) + 46 | geom_hdr(method = method_kde(adjust = 1/2)) + 47 | geom_point(size = 1) 48 | 49 | # Can also be used with `get_hdr()` for numerical summary of HDRs 50 | res <- get_hdr(df, method = method_kde()) 51 | str(res) 52 | 53 | } 54 | -------------------------------------------------------------------------------- /R/hdr_lines.R: -------------------------------------------------------------------------------- 1 | #' @rdname geom_hdr 2 | #' @usage NULL 3 | #' @export 4 | stat_hdr_lines <- function(mapping = NULL, data = NULL, 5 | geom = "hdr_lines", position = "identity", 6 | ..., 7 | method = "kde", 8 | probs = c(.99, .95, .8, .5), 9 | n = 100, 10 | xlim = NULL, 11 | ylim = NULL, 12 | na.rm = FALSE, 13 | show.legend = NA, 14 | inherit.aes = TRUE) { 15 | layer( 16 | data = data, 17 | mapping = mapping, 18 | stat = StatHdrLines, 19 | geom = geom, 20 | position = position, 21 | show.legend = show.legend, 22 | inherit.aes = inherit.aes, 23 | params = list( 24 | method = method, 25 | probs = probs, 26 | n = n, 27 | xlim = xlim, 28 | ylim = ylim, 29 | na.rm = na.rm, 30 | ... 31 | ) 32 | ) 33 | } 34 | 35 | 36 | #' @rdname geom_hdr 37 | #' @format NULL 38 | #' @usage NULL 39 | #' @importFrom scales percent_format 40 | #' @export 41 | StatHdrLines <- ggproto("StatHdrLines", StatHdr, 42 | output = "lines" 43 | ) 44 | 45 | 46 | #' @rdname geom_hdr 47 | #' @usage NULL 48 | #' @export 49 | geom_hdr_lines <- function(mapping = NULL, data = NULL, 50 | stat = "hdr_lines", position = "identity", 51 | ..., 52 | na.rm = FALSE, 53 | show.legend = NA, 54 | inherit.aes = TRUE) { 55 | layer( 56 | data = data, 57 | mapping = mapping, 58 | stat = stat, 59 | geom = GeomHdrLines, 60 | position = position, 61 | show.legend = show.legend, 62 | inherit.aes = inherit.aes, 63 | params = list( 64 | na.rm = na.rm, 65 | ... 66 | ) 67 | ) 68 | } 69 | 70 | 71 | #' @rdname geom_hdr 72 | #' @format NULL 73 | #' @usage NULL 74 | #' @export 75 | GeomHdrLines <- ggproto("GeomHdrLines", GeomPath, 76 | default_aes = aes( 77 | colour = "#000000", 78 | linewidth = 1, 79 | linetype = 1, 80 | alpha = NA 81 | )) 82 | -------------------------------------------------------------------------------- /revdep/README.md: -------------------------------------------------------------------------------- 1 | # Platform 2 | 3 | |field |value | 4 | |:--------|:--------------------------------------------------------------------| 5 | |version |R version 4.2.1 (2022-06-23) | 6 | |os |Ubuntu 20.04.3 LTS | 7 | |system |x86_64, linux-gnu | 8 | |ui |RStudio | 9 | |language |(EN) | 10 | |collate |C.UTF-8 | 11 | |ctype |C.UTF-8 | 12 | |tz |Etc/UTC | 13 | |date |2023-02-09 | 14 | |rstudio |2022.07.0+548 Spotted Wakerobin (server) | 15 | |pandoc |2.18 @ /usr/lib/rstudio-server/bin/quarto/bin/tools/ (via rmarkdown) | 16 | 17 | # Dependencies 18 | 19 | |package |old |new |Δ | 20 | |:------------|:-----|:-----|:--| 21 | |ggdensity |0.1.1 |1.0.0 |* | 22 | |cli |NA |3.6.0 |* | 23 | |colorspace |NA |2.1-0 |* | 24 | |fansi |NA |1.0.4 |* | 25 | |farver |NA |2.1.1 |* | 26 | |ggplot2 |NA |3.4.0 |* | 27 | |glue |NA |1.6.2 |* | 28 | |gtable |NA |0.3.1 |* | 29 | |isoband |NA |0.2.7 |* | 30 | |lifecycle |NA |1.0.3 |* | 31 | |magrittr |NA |2.0.3 |* | 32 | |pillar |NA |1.8.1 |* | 33 | |RColorBrewer |NA |1.1-3 |* | 34 | |rlang |NA |1.0.6 |* | 35 | |scales |NA |1.2.1 |* | 36 | |tibble |NA |3.1.8 |* | 37 | |utf8 |NA |1.2.3 |* | 38 | |vctrs |NA |0.5.2 |* | 39 | |viridisLite |NA |0.4.1 |* | 40 | |withr |NA |2.5.0 |* | 41 | 42 | # Revdeps 43 | 44 | ## Failed to check (1) 45 | 46 | |package |version |error |warning |note | 47 | |:---------|:-------|:-----|:-------|:----| 48 | |ggblanket |? | | | | 49 | 50 | -------------------------------------------------------------------------------- /R/hdr_lines_fun.R: -------------------------------------------------------------------------------- 1 | #' @rdname geom_hdr_fun 2 | #' @usage NULL 3 | #' @export 4 | stat_hdr_lines_fun <- function(mapping = NULL, data = NULL, 5 | geom = "hdr_lines_fun", position = "identity", 6 | ..., 7 | fun, args = list(), 8 | probs = c(.99, .95, .8, .5), 9 | xlim = NULL, ylim = NULL, n = 100, 10 | na.rm = FALSE, 11 | show.legend = NA, 12 | inherit.aes = TRUE) { 13 | 14 | if (is.null(data)) data <- ensure_nonempty_data 15 | 16 | layer( 17 | data = data, 18 | mapping = mapping, 19 | stat = StatHdrLinesFun, 20 | geom = geom, 21 | position = position, 22 | show.legend = show.legend, 23 | inherit.aes = inherit.aes, 24 | params = list( 25 | fun = fun, 26 | args = args, 27 | probs = probs, 28 | xlim = xlim, 29 | ylim = ylim, 30 | n = n, 31 | na.rm = na.rm, 32 | ... 33 | ) 34 | ) 35 | } 36 | 37 | 38 | 39 | #' @rdname geom_hdr_fun 40 | #' @format NULL 41 | #' @usage NULL 42 | #' @importFrom scales percent 43 | #' @export 44 | StatHdrLinesFun <- ggproto("StatHdrLinesFun", StatHdrFun, 45 | output = "lines" 46 | ) 47 | 48 | 49 | #' @rdname geom_hdr_fun 50 | #' @usage NULL 51 | #' @export 52 | geom_hdr_lines_fun <- function(mapping = NULL, data = NULL, 53 | stat = "hdr_lines_fun", position = "identity", 54 | ..., 55 | na.rm = FALSE, 56 | show.legend = NA, 57 | inherit.aes = TRUE) { 58 | 59 | if (is.null(data)) data <- ensure_nonempty_data 60 | 61 | layer( 62 | data = data, 63 | mapping = mapping, 64 | stat = stat, 65 | geom = GeomHdrLinesFun, 66 | position = position, 67 | show.legend = show.legend, 68 | inherit.aes = inherit.aes, 69 | params = list( 70 | na.rm = na.rm, 71 | ... 72 | ) 73 | ) 74 | } 75 | 76 | 77 | 78 | #' @rdname geom_hdr_fun 79 | #' @format NULL 80 | #' @usage NULL 81 | #' @export 82 | GeomHdrLinesFun <- ggproto("GeomHdrlinesfun", GeomHdrLines) 83 | -------------------------------------------------------------------------------- /man/method_histogram.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/method.R 3 | \name{method_histogram} 4 | \alias{method_histogram} 5 | \title{Bivariate histogram HDR estimator} 6 | \usage{ 7 | method_histogram(bins = NULL, smooth = FALSE, nudgex = "none", nudgey = "none") 8 | } 9 | \arguments{ 10 | \item{bins}{Number of bins along each axis. 11 | Either a vector of length 2 or a scalar value which is recycled for both dimensions. 12 | Defaults to normal reference rule (Scott, pg 87).} 13 | 14 | \item{smooth}{If \code{TRUE}, HDRs are smoothed by the marching squares algorithm.} 15 | 16 | \item{nudgex, nudgey}{Horizontal and vertical rules for choosing witness points when \code{smooth == TRUE}. 17 | Accepts character vector: \code{"left"}, \code{"none"}, \code{"right"} (\code{nudgex}) or \code{"down"}, \code{"none"}, \code{"up"} (\code{nudgey}).} 18 | } 19 | \description{ 20 | Function used to specify bivariate histogram density estimator 21 | for \code{get_hdr()} and layer functions (e.g. \code{geom_hdr()}). 22 | } 23 | \details{ 24 | For more details on the use and implementation of the \verb{method_*()} functions, 25 | see \code{vignette("method", "ggdensity")}. 26 | } 27 | \examples{ 28 | \dontrun{ 29 | 30 | # Histogram estimators can be useful when data has boundary constraints 31 | set.seed(1) 32 | df <- data.frame(x = rexp(1e3), y = rexp(1e3)) 33 | 34 | ggplot(df, aes(x, y)) + 35 | geom_hdr(method = method_histogram()) + 36 | geom_point(size = 1) 37 | 38 | # The resolution of the histogram estimator can be set via `bins` 39 | ggplot(df, aes(x, y)) + 40 | geom_hdr(method = method_histogram(bins = c(8, 25))) + 41 | geom_point(size = 1) 42 | 43 | # By setting `smooth = TRUE`, we can graphically smooth the "blocky" HDRs 44 | ggplot(df, aes(x, y)) + 45 | geom_hdr(method = method_histogram(smooth = TRUE)) + 46 | geom_point(size = 1) 47 | 48 | # However, we need to set `nudgex` and `nudgey` to align the HDRs correctly 49 | ggplot(df, aes(x, y)) + 50 | geom_hdr(method = method_histogram(smooth = TRUE, nudgex = "left", nudgey = "down")) + 51 | geom_point(size = 1) 52 | 53 | # Can also be used with `get_hdr()` for numerical summary of HDRs 54 | res <- get_hdr(df, method = method_histogram()) 55 | str(res) 56 | } 57 | 58 | } 59 | \references{ 60 | Scott, David W. Multivariate Density Estimation (2e), Wiley. 61 | } 62 | -------------------------------------------------------------------------------- /R/helpers-ggplot2.R: -------------------------------------------------------------------------------- 1 | # unexported functions from ggplot2 2 | 3 | `%||%` <- function(x, y) { 4 | if (is.null(x)) y else x 5 | } 6 | 7 | tibble0 <- function(...) { 8 | tibble::tibble(..., .name_repair = "minimal") 9 | } 10 | 11 | unique0 <- function(x, ...) { 12 | if (is.null(x)) x else vctrs::vec_unique(x, ...) 13 | } 14 | 15 | isoband_z_matrix <- function(data) { 16 | x_pos <- as.integer(factor(data$x, levels = sort(unique0(data$x)))) 17 | y_pos <- as.integer(factor(data$y, levels = sort(unique0(data$y)))) 18 | nrow <- max(y_pos) 19 | ncol <- max(x_pos) 20 | raster <- matrix(NA_real_, nrow = nrow, ncol = ncol) 21 | raster[cbind(y_pos, x_pos)] <- data$z 22 | raster 23 | } 24 | 25 | xyz_to_isobands <- function(data, breaks) { 26 | isoband::isobands(x = sort(unique0(data$x)), y = sort(unique0(data$y)), 27 | z = isoband_z_matrix(data), levels_low = breaks[-length(breaks)], 28 | levels_high = breaks[-1]) 29 | } 30 | 31 | xyz_to_isolines <- function(data, breaks) { 32 | isoband::isolines(x = sort(unique0(data$x)), y = sort(unique0(data$y)), 33 | z = isoband_z_matrix(data), levels = breaks) 34 | } 35 | 36 | iso_to_polygon <- function(iso, group = 1) { 37 | lengths <- vapply(iso, function(x) length(x$x), integer(1)) 38 | if (all(lengths == 0)) { 39 | warning("Zero contours were generated") 40 | return(tibble0()) 41 | } 42 | levels <- names(iso) 43 | xs <- unlist(lapply(iso, "[[", "x"), use.names = FALSE) 44 | ys <- unlist(lapply(iso, "[[", "y"), use.names = FALSE) 45 | ids <- unlist(lapply(iso, "[[", "id"), use.names = FALSE) 46 | item_id <- rep(seq_along(iso), lengths) 47 | groups <- paste(group, sprintf("%03d", item_id), sep = "-") 48 | groups <- factor(groups) 49 | tibble0(level = rep(levels, lengths), x = xs, y = ys, 50 | piece = as.integer(groups), group = groups, subgroup = ids, 51 | .size = length(xs)) 52 | } 53 | 54 | iso_to_path <- function(iso, group = 1) { 55 | lengths <- vapply(iso, function(x) length(x$x), integer(1)) 56 | if (all(lengths == 0)) { 57 | warning("Zero contours were generated") 58 | return(tibble0()) 59 | } 60 | levels <- names(iso) 61 | xs <- unlist(lapply(iso, "[[", "x"), use.names = FALSE) 62 | ys <- unlist(lapply(iso, "[[", "y"), use.names = FALSE) 63 | ids <- unlist(lapply(iso, "[[", "id"), use.names = FALSE) 64 | item_id <- rep(seq_along(iso), lengths) 65 | groups <- paste(group, sprintf("%03d", item_id), sprintf("%03d", 66 | ids), sep = "-") 67 | groups <- factor(groups) 68 | tibble0(level = rep(levels, lengths), x = xs, y = ys, 69 | piece = as.integer(groups), group = groups, .size = length(xs)) 70 | } 71 | 72 | empty <- function(df) { 73 | is.null(df) || nrow(df) == 0 || ncol(df) == 0 || inherits(df, "waiver") 74 | } 75 | 76 | ensure_nonempty_data <- function(data) { 77 | if (empty(data)) { 78 | tibble0(group = 1, .size = 1) 79 | } 80 | else { 81 | data 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /tests/testthat/test-get_hdr_1d.R: -------------------------------------------------------------------------------- 1 | test_that("structure of get_hdr_1d() return value is as expected", { 2 | 3 | x <- 1:10 4 | 5 | res <- get_hdr_1d(x) 6 | 7 | # Checking the top level of res 8 | expect_type(res, "list") 9 | expect_equal(length(res), 3) 10 | expect_equal(names(res), c("df_est", "breaks", "data")) 11 | 12 | # Checking res$df_est: 13 | expect_type(res$df_est, "list") 14 | expect_equal(ncol(res$df_est), 4) 15 | expect_equal(colnames(res$df_est), c("x", "fhat", "fhat_discretized", "hdr")) 16 | 17 | # Checking res$data 18 | expect_type(res$data, "list") 19 | expect_equal(ncol(res$data), 2) 20 | expect_equal(nrow(res$data), 10) 21 | expect_equal(colnames(res$data), c("x", "hdr_membership")) 22 | 23 | # Checking res$breaks 24 | expect_type(res$breaks, "double") 25 | expect_equal(length(res$breaks), 5) 26 | expect_equal(names(res$breaks), c("99%", "95%", "80%", "50%", NA)) 27 | 28 | }) 29 | 30 | test_that("`method` can be provided as a character vector or function", { 31 | 32 | x <- 1:10 33 | 34 | expect_equal(get_hdr_1d(x, "kde"), get_hdr_1d(x, method_kde_1d())) 35 | expect_equal(get_hdr_1d(x, "norm"), get_hdr_1d(x, method_norm_1d())) 36 | expect_equal(get_hdr_1d(x, "freqpoly"), get_hdr_1d(x, method_freqpoly_1d())) 37 | expect_equal(get_hdr_1d(x, "histogram"), get_hdr_1d(x, method_histogram_1d())) 38 | 39 | }) 40 | 41 | test_that("get_hdr() errors informatively if bad `method` argument", { 42 | 43 | x <- 1:10 44 | 45 | expect_error(get_hdr_1d(x, method = "not-a-method"), regexp = "Invalid method specified") 46 | expect_error(get_hdr_1d(x, method = method_kde_1d), regexp = "did you forget") 47 | expect_error(get_hdr_1d(x, method = method_kde()), regexp = "1d") 48 | 49 | }) 50 | 51 | test_that("get_hdr_1d() fails if `method != 'fun' and `x` isn't provided", { 52 | 53 | expect_error(get_hdr_1d(method = method_kde_1d()), regexp = ".x. must be provided") 54 | 55 | }) 56 | 57 | test_that("fun argument of get_hdr_1d() requires range", { 58 | 59 | expect_error(get_hdr_1d(method = "fun", fun = dexp), regexp = ".range. must be provided") 60 | 61 | }) 62 | 63 | 64 | test_that("fun argument of get_hdr_1d() works", { 65 | 66 | res <- get_hdr_1d(method = "fun", fun = dexp, range = c(0, 10)) 67 | 68 | # Structure of res is as expected 69 | expect_type(res, "list") 70 | expect_equal(length(res), 3) 71 | expect_equal(names(res), c("df_est", "breaks", "data")) 72 | 73 | expect_null(res$data) 74 | 75 | # fhat_discretized should be normalized to sum to 1 76 | expect_equal(sum(res$df_est$fhat_discretized), 1) 77 | 78 | expect_equal(range(res$df_est$x), c(0, 10)) 79 | 80 | # default grid is 512: 81 | expect_equal(nrow(res$df_est), 512) 82 | 83 | # Checksums: 84 | expect_equal(round(sum(res$df_est$fhat)), 52) 85 | expect_equal(as.numeric(round(res$breaks, 4)), c(0.0101, 0.0501, 0.201, 0.5041, Inf)) 86 | expect_equal(sort(unique(res$df_est$hdr)), c(.5, .8, .95, .99, 1)) 87 | expect_equal(as.numeric(table(res$df_est$hdr)), c(36, 47, 71, 82, 276)) 88 | }) 89 | 90 | -------------------------------------------------------------------------------- /man/method_kde_1d.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/method_1d.R 3 | \name{method_kde_1d} 4 | \alias{method_kde_1d} 5 | \title{Univariate kernel density HDR estimator} 6 | \usage{ 7 | method_kde_1d( 8 | bw = "nrd0", 9 | adjust = 1, 10 | kernel = "gaussian", 11 | weights = NULL, 12 | window = kernel 13 | ) 14 | } 15 | \arguments{ 16 | \item{bw}{the smoothing bandwidth to be used. The kernels are scaled 17 | such that this is the standard deviation of the smoothing kernel. 18 | (Note this differs from the reference books cited below, and from S-PLUS.) 19 | 20 | \code{bw} can also be a character string giving a rule to choose the 21 | bandwidth. See \code{\link[stats]{bw.nrd}}. \cr The default, 22 | \code{"nrd0"}, has remained the default for historical and 23 | compatibility reasons, rather than as a general recommendation, 24 | where e.g., \code{"SJ"} would rather fit, see also Venables and 25 | Ripley (2002). 26 | 27 | The specified (or computed) value of \code{bw} is multiplied by 28 | \code{adjust}. 29 | } 30 | 31 | \item{adjust}{the bandwidth used is actually \code{adjust*bw}. 32 | This makes it easy to specify values like \sQuote{half the default} 33 | bandwidth.} 34 | 35 | \item{kernel, window}{a character string giving the smoothing kernel 36 | to be used. This must partially match one of \code{"gaussian"}, 37 | \code{"rectangular"}, \code{"triangular"}, \code{"epanechnikov"}, 38 | \code{"biweight"}, \code{"cosine"} or \code{"optcosine"}, with default 39 | \code{"gaussian"}, and may be abbreviated to a unique prefix (single 40 | letter). 41 | 42 | \code{"cosine"} is smoother than \code{"optcosine"}, which is the 43 | usual \sQuote{cosine} kernel in the literature and almost MSE-efficient. 44 | However, \code{"cosine"} is the version used by S. 45 | } 46 | 47 | \item{weights}{numeric vector of non-negative observation weights, 48 | hence of same length as \code{x}. The default \code{NULL} is 49 | equivalent to \code{weights = rep(1/nx, nx)} where \code{nx} is the 50 | length of (the finite entries of) \code{x[]}. If \code{na.rm = TRUE} 51 | and there are \code{NA}'s in \code{x}, they \emph{and} the 52 | corresponding weights are removed before computations. In that case, 53 | when the original weights have summed to one, they are re-scaled to 54 | keep doing so.} 55 | } 56 | \description{ 57 | Function used to specify univariate kernel density estimator 58 | for \code{get_hdr_1d()} and layer functions (e.g. \code{geom_hdr_rug()}). 59 | } 60 | \details{ 61 | For more details on the use and implementation of the \verb{method_*_1d()} functions, 62 | see \code{vignette("method", "ggdensity")}. 63 | } 64 | \examples{ 65 | df <- data.frame(x = rnorm(1e3, sd = 3)) 66 | 67 | ggplot(df, aes(x)) + 68 | geom_hdr_rug(method = method_kde_1d()) + 69 | geom_density() 70 | 71 | # Details of the KDE can be adjusted with arguments to `method_kde_1d()` 72 | ggplot(df, aes(x)) + 73 | geom_hdr_rug(method = method_kde_1d(adjust = 1/5)) + 74 | geom_density(adjust = 1/5) 75 | 76 | ggplot(df, aes(x)) + 77 | geom_hdr_rug(method = method_kde_1d(kernel = "triangular")) + 78 | geom_density(kernel = "triangular") 79 | 80 | # Can also be used with `get_hdr_1d()` for numerical summary of HDRs 81 | res <- get_hdr_1d(df$x, method = method_kde_1d()) 82 | str(res) 83 | 84 | } 85 | -------------------------------------------------------------------------------- /tests/testthat/test-visual-tests.R: -------------------------------------------------------------------------------- 1 | ## Checking basic plots with vdiffr::expect_doppelganger() 2 | 3 | test_that("Basic 2d HDRs render consistently", { 4 | 5 | data <- readRDS(test_path("fixtures", "df_norm.rds")) 6 | 7 | # geom/stat_hdr 8 | geom_hdr_ggplot <- ggplot(data, aes(x, y)) + geom_hdr() 9 | stat_hdr_ggplot <- ggplot(data, aes(x, y)) + stat_hdr() 10 | vdiffr::expect_doppelganger("geom-hdr-ggplot", geom_hdr_ggplot) 11 | vdiffr::expect_doppelganger("stat-hdr-ggplot", stat_hdr_ggplot) 12 | 13 | # geom/stat_hdr_lines 14 | geom_hdr_lines_ggplot <- ggplot(data, aes(x, y)) + geom_hdr_lines() 15 | stat_hdr_lines_ggplot <- ggplot(data, aes(x, y)) + stat_hdr_lines() 16 | vdiffr::expect_doppelganger("geom-hdr_lines-ggplot", geom_hdr_lines_ggplot) 17 | vdiffr::expect_doppelganger("stat-hdr_lines-ggplot", stat_hdr_lines_ggplot) 18 | 19 | # geom/stat_hdr_points 20 | geom_hdr_points_ggplot <- ggplot(data, aes(x, y)) + geom_hdr_points() 21 | stat_hdr_points_ggplot <- ggplot(data, aes(x, y)) + stat_hdr_points() 22 | vdiffr::expect_doppelganger("geom-hdr-points-ggplot", geom_hdr_points_ggplot) 23 | vdiffr::expect_doppelganger("stat-hdr-points-ggplot", stat_hdr_points_ggplot) 24 | 25 | # geom/stat_hdr_points_fun 26 | geom_hdr_points_fun_ggplot <- ggplot(data, aes(x, y)) + geom_hdr_points_fun(fun = function(x, y) dnorm(x) * dnorm(y)) 27 | stat_hdr_points_fun_ggplot <- ggplot(data, aes(x, y)) + stat_hdr_points_fun(fun = function(x, y) dnorm(x) * dnorm(y)) 28 | vdiffr::expect_doppelganger("geom-hdr-points-ggplot", geom_hdr_points_ggplot) 29 | vdiffr::expect_doppelganger("stat-hdr-points-ggplot", stat_hdr_points_ggplot) 30 | 31 | # geom/stat_hdr_fun 32 | geom_hdr_fun_ggplot <- ggplot() + 33 | geom_hdr_fun(fun = function(x, y) dnorm(x) * dnorm(y), xlim = c(-5, 5), ylim = c(-5, 5)) 34 | stat_hdr_fun_ggplot <- ggplot() + 35 | stat_hdr_fun(fun = function(x, y) dnorm(x) * dnorm(y), xlim = c(-5, 5), ylim = c(-5, 5)) 36 | vdiffr::expect_doppelganger("geom-hdr-fun-ggplot", geom_hdr_fun_ggplot) 37 | vdiffr::expect_doppelganger("stat-hdr-fun-ggplot", stat_hdr_fun_ggplot) 38 | 39 | }) 40 | 41 | test_that("Basic 1d HDRs render consistently", { 42 | 43 | data <- readRDS(test_path("fixtures", "df_norm.rds")) 44 | 45 | # geom/stat_hdr_rug 46 | geom_hdr_rug_ggplot <- ggplot(data, aes(x, y)) + geom_hdr_rug() 47 | stat_hdr_rug_ggplot <- ggplot(data, aes(x, y)) + stat_hdr_rug() 48 | vdiffr::expect_doppelganger("geom-hdr-rug-ggplot", geom_hdr_rug_ggplot) 49 | vdiffr::expect_doppelganger("stat-hdr-rug-ggplot", stat_hdr_rug_ggplot) 50 | 51 | # geom/stat_hdr_rug_fun 52 | geom_hdr_rug_fun_ggplot <- ggplot() + 53 | geom_hdr_rug_fun(fun_x = dnorm, fun_y = dexp, xlim = c(-5, 5), ylim = c(0, 10)) 54 | stat_hdr_rug_fun_ggplot <- ggplot() + 55 | stat_hdr_rug_fun(fun_x = dnorm, fun_y = dexp, xlim = c(-5, 5), ylim = c(0, 10)) 56 | 57 | vdiffr::expect_doppelganger("geom-hdr-rug-fun-ggplot", geom_hdr_rug_fun_ggplot) 58 | vdiffr::expect_doppelganger("stat-hdr-rug-fun-ggplot", stat_hdr_rug_fun_ggplot) 59 | 60 | }) 61 | 62 | test_that("Specified order of probabilities doesn't impact legend ordering", { 63 | 64 | data <- readRDS(test_path("fixtures", "df_norm.rds")) 65 | 66 | geom_hdr_prob_order_ggplot <- ggplot(data, aes(x, y)) + 67 | geom_hdr(probs = c(.25, .5, .75, .95)) 68 | 69 | geom_hdr_rug_prob_order_ggplot <- ggplot(data, aes(x, y)) + 70 | geom_hdr_rug(probs = c(.25, .5, .75, .95)) 71 | 72 | vdiffr::expect_doppelganger("geom_hdr_prob_order_ggplot", geom_hdr_prob_order_ggplot) 73 | vdiffr::expect_doppelganger("geom_hdr_rug_prob_order_ggplot", geom_hdr_rug_prob_order_ggplot) 74 | }) 75 | 76 | 77 | 78 | -------------------------------------------------------------------------------- /tests/testthat/test-layer-wrappers.R: -------------------------------------------------------------------------------- 1 | test_that("wrapper functions for `layer()` are passing arguments on as expected", { 2 | 3 | df <- readRDS(test_path("fixtures", "df_norm.rds")) 4 | 5 | check_layer <- function(layer_fun, Geom, Stat, mapping = aes(x, y), data = df, ...) { 6 | 7 | hdr_layer <- layer_fun(data = data, mapping = mapping, ...) 8 | 9 | expect_type(hdr_layer, "environment") 10 | expect_identical(hdr_layer$geom, Geom) 11 | expect_identical(hdr_layer$stat, Stat) 12 | expect_identical(hdr_layer$mapping, mapping) 13 | 14 | } 15 | 16 | # 2-d layer functions ----------------------------------------------------- 17 | 18 | # geom/stat_hdr() 19 | check_layer(geom_hdr, GeomHdr, StatHdr) 20 | check_layer(stat_hdr, GeomHdr, StatHdr) 21 | 22 | # geom/stat_hdr_lines() 23 | check_layer(geom_hdr_lines, GeomHdrLines, StatHdrLines) 24 | check_layer(stat_hdr_lines, GeomHdrLines, StatHdrLines) 25 | 26 | # geom/stat_hdr_points() 27 | check_layer(geom_hdr_points, GeomPoint, StatHdrPoints) 28 | check_layer(stat_hdr_points, GeomPoint, StatHdrPoints) 29 | 30 | # geom/stat_hdr_lines_fun() 31 | # -- stat_hdr_points_fun needs to have a `fun` arg provided 32 | check_layer(geom_hdr_points_fun, GeomPoint, StatHdrPointsFun) 33 | check_layer(stat_hdr_points_fun, GeomPoint, StatHdrPointsFun, fun = function(x, y) dnorm(x) * dnorm(y)) 34 | 35 | # geom/stat_hdr_fun() 36 | # (stat_hdr_fun needs to have a `fun` arg provided) 37 | check_layer(geom_hdr_fun, GeomHdrFun, StatHdrFun) 38 | check_layer(stat_hdr_fun, GeomHdrFun, StatHdrFun, fun = function(x, y) dnorm(x) * dnorm(y)) 39 | 40 | # -- checking that data doesn't need to be provided 41 | check_layer(geom_hdr_fun, GeomHdrFun, StatHdrFun, data = NULL, mapping = NULL) 42 | check_layer(stat_hdr_fun, GeomHdrFun, StatHdrFun, data = NULL, mapping = NULL, fun = function(x, y) dnorm(x) * dnorm(y)) 43 | 44 | # geom/stat_hdr_lines_fun() 45 | # -- stat_hdr_lines_fun needs to have a `fun` arg provided 46 | check_layer(geom_hdr_lines_fun, GeomHdrLinesFun, StatHdrLinesFun) 47 | check_layer(stat_hdr_lines_fun, GeomHdrLinesFun, StatHdrLinesFun, fun = function(x, y) dnorm(x) * dnorm(y)) 48 | 49 | # -- checking that data doesn't need to be provided 50 | check_layer(geom_hdr_lines_fun, GeomHdrLinesFun, StatHdrLinesFun, data = NULL, mapping = NULL) 51 | check_layer(stat_hdr_lines_fun, GeomHdrLinesFun, StatHdrLinesFun, data = NULL, mapping = NULL, fun = function(x, y) dnorm(x) * dnorm(y)) 52 | 53 | # 1-d layer functions ----------------------------------------------------- 54 | 55 | # geom/stat_hdr_rug() 56 | check_layer(geom_hdr_rug, GeomHdrRug, StatHdrRug) 57 | check_layer(stat_hdr_rug, GeomHdrRug, StatHdrRug) 58 | 59 | # -- checking that single x/y aesthetics are allowed: 60 | check_layer(geom_hdr_rug, GeomHdrRug, StatHdrRug, mapping = aes(x)) 61 | check_layer(stat_hdr_rug, GeomHdrRug, StatHdrRug, mapping = aes(x)) 62 | check_layer(geom_hdr_rug, GeomHdrRug, StatHdrRug, mapping = aes(y)) 63 | check_layer(stat_hdr_rug, GeomHdrRug, StatHdrRug, mapping = aes(y)) 64 | 65 | # geom/stat_hdr_rug_fun() 66 | check_layer(geom_hdr_rug_fun, GeomHdrRugFun, StatHdrRugFun) 67 | check_layer(stat_hdr_rug_fun, GeomHdrRugFun, StatHdrRugFun) 68 | 69 | # -- checking that single x/y aesthetics are allowed: 70 | check_layer(geom_hdr_rug_fun, GeomHdrRugFun, StatHdrRugFun, mapping = aes(x)) 71 | check_layer(stat_hdr_rug_fun, GeomHdrRugFun, StatHdrRugFun, mapping = aes(x)) 72 | check_layer(geom_hdr_rug_fun, GeomHdrRugFun, StatHdrRugFun, mapping = aes(y)) 73 | check_layer(stat_hdr_rug_fun, GeomHdrRugFun, StatHdrRugFun, mapping = aes(y)) 74 | 75 | }) 76 | 77 | 78 | -------------------------------------------------------------------------------- /R/hdr_points.R: -------------------------------------------------------------------------------- 1 | #' Scatterplot colored by highest density regions of a 2D density estimate 2 | #' 3 | #' Perform 2D density estimation, compute the resulting highest density regions (HDRs), 4 | #' and plot the provided data as a scatterplot with points colored according to 5 | #' their corresponding HDR. 6 | #' 7 | #' @section Aesthetics: geom_hdr_points understands the following aesthetics (required 8 | #' aesthetics are in bold): 9 | #' 10 | #' - **x** 11 | #' - **y** 12 | #' - alpha 13 | #' - color 14 | #' - fill 15 | #' - group 16 | #' - linetype 17 | #' - size 18 | #' - subgroup 19 | #' 20 | #' @section Computed variables: 21 | #' 22 | #' \describe{ \item{probs}{The probability associated with the highest density region, specified 23 | #' by `probs`.} } 24 | #' 25 | #' @inheritParams ggplot2::stat_identity 26 | #' @inheritParams ggplot2::stat_density2d 27 | #' @inheritParams geom_hdr 28 | #' 29 | #' @name geom_hdr_points 30 | #' @rdname geom_hdr_points 31 | #' 32 | #' @import ggplot2 33 | #' 34 | #' @examples 35 | #' set.seed(1) 36 | #' df <- data.frame(x = rnorm(500), y = rnorm(500)) 37 | #' p <- ggplot(df, aes(x, y)) + 38 | #' coord_equal() 39 | #' 40 | #' p + geom_hdr_points() 41 | #' 42 | #' # Setting aes(fill = after_stat(probs)), color = "black", and 43 | #' # shape = 21 helps alleviate overplotting: 44 | #' p + geom_hdr_points(aes(fill = after_stat(probs)), color = "black", shape = 21, size = 2) 45 | #' 46 | #' # Also works well with geom_hdr_lines() 47 | #' p + 48 | #' geom_hdr_lines( 49 | #' aes(color = after_stat(probs)), alpha = 1, 50 | #' xlim = c(-5, 5), ylim = c(-5, 5) 51 | #' ) + 52 | #' geom_hdr_points( 53 | #' aes(fill = after_stat(probs)), color = "black", shape = 21, size = 2, 54 | #' xlim = c(-5, 5), ylim = c(-5, 5) 55 | #' ) 56 | #' 57 | NULL 58 | 59 | 60 | 61 | #' @export 62 | #' @rdname geom_hdr_points 63 | stat_hdr_points <- function(mapping = NULL, data = NULL, 64 | geom = "point", position = "identity", 65 | ..., 66 | method = "kde", 67 | probs = c(.99, .95, .8, .5), 68 | n = 100, 69 | xlim = NULL, 70 | ylim = NULL, 71 | na.rm = FALSE, 72 | show.legend = NA, 73 | inherit.aes = TRUE) { 74 | layer( 75 | data = data, 76 | mapping = mapping, 77 | stat = StatHdrPoints, 78 | geom = geom, 79 | position = position, 80 | show.legend = show.legend, 81 | inherit.aes = inherit.aes, 82 | params = list( 83 | method = method, 84 | probs = probs, 85 | n = n, 86 | xlim = xlim, 87 | ylim = ylim, 88 | na.rm = na.rm, 89 | ... 90 | ) 91 | ) 92 | } 93 | 94 | 95 | #' @export 96 | #' @rdname geom_hdr_points 97 | #' @format NULL 98 | #' @usage NULL 99 | StatHdrPoints <- ggproto("StatHdrPoints", StatHdr, 100 | default_aes = aes(order = after_stat(probs), color = after_stat(probs)), 101 | output = "points" 102 | ) 103 | 104 | 105 | #' @export 106 | #' @rdname geom_hdr_points 107 | geom_hdr_points <- function(mapping = NULL, data = NULL, 108 | stat = "hdr_points", position = "identity", 109 | ..., 110 | na.rm = FALSE, 111 | show.legend = NA, 112 | inherit.aes = TRUE) { 113 | 114 | layer( 115 | data = data, 116 | mapping = mapping, 117 | stat = stat, 118 | geom = GeomPoint, 119 | position = position, 120 | show.legend = show.legend, 121 | inherit.aes = inherit.aes, 122 | params = list( 123 | na.rm = na.rm, 124 | ... 125 | ) 126 | ) 127 | } 128 | 129 | -------------------------------------------------------------------------------- /R/hdr_points_fun.R: -------------------------------------------------------------------------------- 1 | #' Scatterplot colored by highest density regions of a bivariate pdf 2 | #' 3 | #' Compute the highest density regions (HDRs) of a bivariate pdf and plot the provided 4 | #' data as a scatterplot with points colored according to their corresponding HDR. 5 | #' 6 | #' @section Aesthetics: geom_hdr_points_fun understands the following aesthetics 7 | #' (required aesthetics are in bold): 8 | #' 9 | #' - **x** 10 | #' - **y** 11 | #' - alpha 12 | #' - color 13 | #' - fill 14 | #' - group 15 | #' - linetype 16 | #' - size 17 | #' - subgroup 18 | #' 19 | #' @section Computed variables: 20 | #' 21 | #' \describe{ \item{probs}{The probability associated with the highest density region, specified 22 | #' by `probs`.} } 23 | #' 24 | #' @inheritParams ggplot2::stat_identity 25 | #' @inheritParams ggplot2::stat_density2d 26 | #' @inheritParams geom_hdr_fun 27 | #' 28 | #' @name geom_hdr_points_fun 29 | #' @rdname geom_hdr_points_fun 30 | #' 31 | #' @import ggplot2 32 | #' 33 | #' @examples 34 | #' # Can plot points colored according to known pdf: 35 | #' set.seed(1) 36 | #' df <- data.frame(x = rexp(1000), y = rexp(1000)) 37 | #' f <- function(x, y) dexp(x) * dexp(y) 38 | #' 39 | #' ggplot(df, aes(x, y)) + 40 | #' geom_hdr_points_fun(fun = f, xlim = c(0, 10), ylim = c(0, 10)) 41 | #' 42 | #' 43 | #' # Also allows for hdrs of a custom parametric model 44 | #' 45 | #' # generate example data 46 | #' n <- 1000 47 | #' th_true <- c(3, 8) 48 | #' 49 | #' rdata <- function(n, th) { 50 | #' gen_single_obs <- function(th) { 51 | #' rchisq(2, df = th) # can be anything 52 | #' } 53 | #' df <- replicate(n, gen_single_obs(th)) 54 | #' setNames(as.data.frame(t(df)), c("x", "y")) 55 | #' } 56 | #' data <- rdata(n, th_true) 57 | #' 58 | #' # estimate unknown parameters via maximum likelihood 59 | #' likelihood <- function(th) { 60 | #' th <- abs(th) # hack to enforce parameter space boundary 61 | #' log_f <- function(v) { 62 | #' x <- v[1]; y <- v[2] 63 | #' dchisq(x, df = th[1], log = TRUE) + dchisq(y, df = th[2], log = TRUE) 64 | #' } 65 | #' sum(apply(data, 1, log_f)) 66 | #' } 67 | #' (th_hat <- optim(c(1, 1), likelihood, control = list(fnscale = -1))$par) 68 | #' 69 | #' # plot f for the give model 70 | #' f <- function(x, y, th) dchisq(x, df = th[1]) * dchisq(y, df = th[2]) 71 | #' 72 | #' ggplot(data, aes(x, y)) + 73 | #' geom_hdr_points_fun(fun = f, args = list(th = th_hat)) 74 | #' 75 | #' ggplot(data, aes(x, y)) + 76 | #' geom_hdr_points_fun(aes(fill = after_stat(probs)), shape = 21, color = "black", 77 | #' fun = f, args = list(th = th_hat), na.rm = TRUE) + 78 | #' geom_hdr_lines_fun(aes(color = after_stat(probs)), alpha = 1, fun = f, args = list(th = th_hat)) + 79 | #' lims(x = c(0, 15), y = c(0, 25)) 80 | #' 81 | NULL 82 | 83 | 84 | #' @export 85 | #' @rdname geom_hdr_points_fun 86 | stat_hdr_points_fun <- function(mapping = NULL, data = NULL, 87 | geom = "point", position = "identity", 88 | ..., 89 | fun, args = list(), 90 | probs = c(.99, .95, .8, .5), 91 | xlim = NULL, ylim = NULL, n = 100, 92 | na.rm = FALSE, 93 | show.legend = NA, 94 | inherit.aes = TRUE) { 95 | 96 | layer( 97 | data = data, 98 | mapping = mapping, 99 | stat = StatHdrPointsFun, 100 | geom = geom, 101 | position = position, 102 | show.legend = show.legend, 103 | inherit.aes = inherit.aes, 104 | params = list( 105 | fun = fun, 106 | args = args, 107 | probs = probs, 108 | xlim = xlim, 109 | ylim = ylim, 110 | n = n, 111 | na.rm = na.rm, 112 | ... 113 | ) 114 | ) 115 | } 116 | 117 | 118 | #' @export 119 | #' @format NULL 120 | #' @usage NULL 121 | #' @rdname geom_hdr_points_fun 122 | StatHdrPointsFun <- ggproto("StatHdrPointsFun", StatHdrFun, 123 | default_aes = aes(order = after_stat(probs), color = after_stat(probs)), 124 | output = "points" 125 | ) 126 | 127 | #' @export 128 | #' @rdname geom_hdr_points_fun 129 | geom_hdr_points_fun <- function(mapping = NULL, data = NULL, 130 | stat = "hdr_points_fun", position = "identity", 131 | ..., 132 | na.rm = FALSE, 133 | show.legend = NA, 134 | inherit.aes = TRUE) { 135 | 136 | if (is.null(data)) data <- ensure_nonempty_data 137 | 138 | layer( 139 | data = data, 140 | mapping = mapping, 141 | stat = stat, 142 | geom = GeomPoint, 143 | position = position, 144 | show.legend = show.legend, 145 | inherit.aes = inherit.aes, 146 | params = list( 147 | na.rm = na.rm, 148 | ... 149 | ) 150 | ) 151 | } 152 | 153 | -------------------------------------------------------------------------------- /man/get_hdr_1d.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_hdr_1d.R 3 | \name{get_hdr_1d} 4 | \alias{get_hdr_1d} 5 | \title{Computing the highest density regions of a 1D density} 6 | \usage{ 7 | get_hdr_1d( 8 | x = NULL, 9 | method = "kde", 10 | probs = c(0.99, 0.95, 0.8, 0.5), 11 | n = 512, 12 | range = NULL, 13 | hdr_membership = TRUE, 14 | fun, 15 | args = list() 16 | ) 17 | } 18 | \arguments{ 19 | \item{x}{A vector of data} 20 | 21 | \item{method}{Either a character (\code{"kde"}, \code{"norm"}, \code{"histogram"}, \code{"freqpoly"}, or \code{"fun"}) or \verb{method_*_1d()} function. 22 | See the "The \code{method} argument" section below for details.} 23 | 24 | \item{probs}{Probabilities to compute HDRs for.} 25 | 26 | \item{n}{Resolution of grid representing estimated density and HDRs.} 27 | 28 | \item{range}{Range of grid representing estimated density and HDRs.} 29 | 30 | \item{hdr_membership}{Should HDR membership of data points (\code{x}) be computed?} 31 | 32 | \item{fun}{Optional, a probability density function, must be vectorized in its first argument. 33 | See the "The \code{fun} argument" section below for details.} 34 | 35 | \item{args}{Optional, a list of arguments to be provided to \code{fun}.} 36 | } 37 | \value{ 38 | \code{get_hdr_1d} returns a list with elements \code{df_est} (\code{data.frame}), \code{breaks} (named \code{numeric}), and \code{data} (\code{data.frame}). 39 | \itemize{ 40 | \item \code{df_est}: the estimated HDRs and density evaluated on the grid defined by \code{range} and \code{n}. 41 | The column of estimated HDRs (\code{df_est$hdr}) is a numeric vector with values from \code{probs}. 42 | The columns \code{df_est$fhat} and \code{df_est$fhat_discretized} correspond to the estimated density 43 | on the original scale and rescaled to sum to 1, respectively. 44 | \item \code{breaks}: the heights of the estimated density (\code{df_est$fhat}) corresponding to the HDRs specified by \code{probs}. 45 | Will always have additional element \code{Inf} representing the cutoff for the 100\% HDR. 46 | \item \code{data}: the original data provided in the \code{data} argument. 47 | If \code{hdr_membership} is set to \code{TRUE}, this includes a column (\code{data$hdr_membership}) 48 | with the HDR corresponding to each data point. 49 | } 50 | } 51 | \description{ 52 | \code{get_hdr_1d} is used to estimate a 1-dimensional density and compute corresponding HDRs. 53 | The estimated density and HDRs are represented in a discrete form as a grid, defined by arguments \code{range} and \code{n}. 54 | \code{get_hdr_1d} is used internally by layer functions \code{stat_hdr_rug()} and \code{stat_hdr_rug_fun()}. 55 | } 56 | \section{The \code{method} argument}{ 57 | 58 | The density estimator used to estimate the HDRs is specified with the \code{method} argument. 59 | The simplest way to specify an estimator is to provide a character value to \code{method}, 60 | for example \code{method = "kde"} specifies a kernel density estimator. 61 | However, this specification is limited to the default behavior of the estimator. 62 | 63 | Instead, it is possible to provide a function call, for example: \code{method = method_kde_1d()}. 64 | This is slightly different from the function calls provided in \code{get_hdr()}, note the \verb{_1d} suffix. 65 | In many cases, these functions accept parameters governing the density estimation procedure. 66 | Here, \code{method_kde_1d()} accepts several parameters related to the choice of kernel. 67 | For details, see \code{?method_kde_1d}. 68 | Every method of univariate density estimation implemented has such corresponding \verb{method_*_1d()} function, 69 | each with an associated help page. 70 | 71 | Note: \code{geom_hdr_rug()} and other layer functions also have \code{method} arguments which behave in the same way. 72 | For more details on the use and implementation of the \verb{method_*_1d()} functions, 73 | see \code{vignette("method", "ggdensity")}. 74 | } 75 | 76 | \section{The \code{fun} argument}{ 77 | 78 | If \code{method} is set to \code{"fun"}, \code{get_hdr_1d()} expects a univariate probability 79 | density function to be specified with the \code{fun} argument. 80 | It is required that \code{fun} be a function of at least one argument (\code{x}). 81 | Beyond this first argument, \code{fun} can have arbitrarily many arguments; 82 | these can be set in \code{get_hdr_1d()} as a named list via the \code{args} parameter. 83 | 84 | Note: \code{get_hdr_1d()} requires that \code{fun} be vectorized in \code{x}. 85 | For an example of an appropriate choice of \code{fun}, see the final example below. 86 | } 87 | 88 | \examples{ 89 | x <- rnorm(1e3) 90 | 91 | # Two ways to specify `method` 92 | get_hdr_1d(x, method = "kde") 93 | get_hdr_1d(x, method = method_kde_1d()) 94 | 95 | \dontrun{ 96 | 97 | # If parenthesis are omitted, `get_hdr_1d()` errors 98 | get_hdr_1d(df, method = method_kde_1d) 99 | 100 | # If the `_1d` suffix is omitted, `get_hdr_1d()` errors 101 | get_hdr_1d(x, method = method_kde()) 102 | } 103 | 104 | # Adjust estimator parameters with arguments to `method_kde_1d()` 105 | get_hdr_1d(x, method = method_kde_1d(kernel = "triangular")) 106 | 107 | # Estimate different HDRs with `probs` 108 | get_hdr_1d(x, method = method_kde_1d(), probs = c(.975, .6, .2)) 109 | 110 | # Compute "population" HDRs of specified univariate pdf with `method = "fun"` 111 | f <- function(x, sd = 1) dnorm(x, sd = sd) 112 | get_hdr_1d(method = "fun", fun = f, range = c(-5, 5)) 113 | get_hdr_1d(method = "fun", fun = f, range = c(-5, 5), args = list(sd = .5)) 114 | 115 | 116 | } 117 | -------------------------------------------------------------------------------- /man/geom_hdr_points.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/hdr_points.R 3 | \docType{data} 4 | \name{geom_hdr_points} 5 | \alias{geom_hdr_points} 6 | \alias{stat_hdr_points} 7 | \alias{StatHdrPoints} 8 | \title{Scatterplot colored by highest density regions of a 2D density estimate} 9 | \usage{ 10 | stat_hdr_points( 11 | mapping = NULL, 12 | data = NULL, 13 | geom = "point", 14 | position = "identity", 15 | ..., 16 | method = "kde", 17 | probs = c(0.99, 0.95, 0.8, 0.5), 18 | n = 100, 19 | xlim = NULL, 20 | ylim = NULL, 21 | na.rm = FALSE, 22 | show.legend = NA, 23 | inherit.aes = TRUE 24 | ) 25 | 26 | geom_hdr_points( 27 | mapping = NULL, 28 | data = NULL, 29 | stat = "hdr_points", 30 | position = "identity", 31 | ..., 32 | na.rm = FALSE, 33 | show.legend = NA, 34 | inherit.aes = TRUE 35 | ) 36 | } 37 | \arguments{ 38 | \item{mapping}{Set of aesthetic mappings created by \code{\link[ggplot2:aes]{aes()}}. If specified and 39 | \code{inherit.aes = TRUE} (the default), it is combined with the default mapping 40 | at the top level of the plot. You must supply \code{mapping} if there is no plot 41 | mapping.} 42 | 43 | \item{data}{The data to be displayed in this layer. There are three 44 | options: 45 | 46 | If \code{NULL}, the default, the data is inherited from the plot 47 | data as specified in the call to \code{\link[ggplot2:ggplot]{ggplot()}}. 48 | 49 | A \code{data.frame}, or other object, will override the plot 50 | data. All objects will be fortified to produce a data frame. See 51 | \code{\link[ggplot2:fortify]{fortify()}} for which variables will be created. 52 | 53 | A \code{function} will be called with a single argument, 54 | the plot data. The return value must be a \code{data.frame}, and 55 | will be used as the layer data. A \code{function} can be created 56 | from a \code{formula} (e.g. \code{~ head(.x, 10)}).} 57 | 58 | \item{geom}{The geometric object to use to display the data, either as a 59 | \code{ggproto} \code{Geom} subclass or as a string naming the geom stripped of the 60 | \code{geom_} prefix (e.g. \code{"point"} rather than \code{"geom_point"})} 61 | 62 | \item{position}{Position adjustment, either as a string naming the adjustment 63 | (e.g. \code{"jitter"} to use \code{position_jitter}), or the result of a call to a 64 | position adjustment function. Use the latter if you need to change the 65 | settings of the adjustment.} 66 | 67 | \item{...}{Other arguments passed on to \code{\link[ggplot2:layer]{layer()}}. These are 68 | often aesthetics, used to set an aesthetic to a fixed value, like 69 | \code{colour = "red"} or \code{size = 3}. They may also be parameters 70 | to the paired geom/stat.} 71 | 72 | \item{method}{Density estimator to use, accepts character vector: 73 | \code{"kde"},\code{"histogram"}, \code{"freqpoly"}, or \code{"mvnorm"}. 74 | Alternatively accepts functions which return closures corresponding to density estimates, 75 | see \code{?get_hdr} or \code{vignette("method", "ggdensity")}.} 76 | 77 | \item{probs}{Probabilities to compute highest density regions for.} 78 | 79 | \item{n}{Number of grid points in each direction.} 80 | 81 | \item{xlim, ylim}{Range to compute and draw regions. If \code{NULL}, defaults to 82 | range of data.} 83 | 84 | \item{na.rm}{If \code{FALSE}, the default, missing values are removed with 85 | a warning. If \code{TRUE}, missing values are silently removed.} 86 | 87 | \item{show.legend}{logical. Should this layer be included in the legends? 88 | \code{NA}, the default, includes if any aesthetics are mapped. 89 | \code{FALSE} never includes, and \code{TRUE} always includes. 90 | It can also be a named logical vector to finely select the aesthetics to 91 | display.} 92 | 93 | \item{inherit.aes}{If \code{FALSE}, overrides the default aesthetics, 94 | rather than combining with them. This is most useful for helper functions 95 | that define both data and aesthetics and shouldn't inherit behaviour from 96 | the default plot specification, e.g. \code{\link[ggplot2:borders]{borders()}}.} 97 | 98 | \item{stat}{The statistical transformation to use on the data for this 99 | layer, either as a \code{ggproto} \code{Geom} subclass or as a string naming the 100 | stat stripped of the \code{stat_} prefix (e.g. \code{"count"} rather than 101 | \code{"stat_count"})} 102 | } 103 | \description{ 104 | Perform 2D density estimation, compute the resulting highest density regions (HDRs), 105 | and plot the provided data as a scatterplot with points colored according to 106 | their corresponding HDR. 107 | } 108 | \section{Aesthetics}{ 109 | geom_hdr_points understands the following aesthetics (required 110 | aesthetics are in bold): 111 | \itemize{ 112 | \item \strong{x} 113 | \item \strong{y} 114 | \item alpha 115 | \item color 116 | \item fill 117 | \item group 118 | \item linetype 119 | \item size 120 | \item subgroup 121 | } 122 | } 123 | 124 | \section{Computed variables}{ 125 | 126 | 127 | \describe{ \item{probs}{The probability associated with the highest density region, specified 128 | by \code{probs}.} } 129 | } 130 | 131 | \examples{ 132 | set.seed(1) 133 | df <- data.frame(x = rnorm(500), y = rnorm(500)) 134 | p <- ggplot(df, aes(x, y)) + 135 | coord_equal() 136 | 137 | p + geom_hdr_points() 138 | 139 | # Setting aes(fill = after_stat(probs)), color = "black", and 140 | # shape = 21 helps alleviate overplotting: 141 | p + geom_hdr_points(aes(fill = after_stat(probs)), color = "black", shape = 21, size = 2) 142 | 143 | # Also works well with geom_hdr_lines() 144 | p + 145 | geom_hdr_lines( 146 | aes(color = after_stat(probs)), alpha = 1, 147 | xlim = c(-5, 5), ylim = c(-5, 5) 148 | ) + 149 | geom_hdr_points( 150 | aes(fill = after_stat(probs)), color = "black", shape = 21, size = 2, 151 | xlim = c(-5, 5), ylim = c(-5, 5) 152 | ) 153 | 154 | } 155 | \keyword{datasets} 156 | -------------------------------------------------------------------------------- /man/get_hdr.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_hdr.R 3 | \name{get_hdr} 4 | \alias{get_hdr} 5 | \title{Computing the highest density regions of a 2D density} 6 | \usage{ 7 | get_hdr( 8 | data = NULL, 9 | method = "kde", 10 | probs = c(0.99, 0.95, 0.8, 0.5), 11 | n = 100, 12 | rangex = NULL, 13 | rangey = NULL, 14 | hdr_membership = TRUE, 15 | fun, 16 | args = list() 17 | ) 18 | } 19 | \arguments{ 20 | \item{data}{A data frame with columns \code{x} and \code{y}.} 21 | 22 | \item{method}{Either a character (\code{"kde"}, \code{"mvnorm"}, \code{"histogram"}, 23 | \code{"freqpoly"}, or \code{"fun"}) or \verb{method_*()} function. See the "The \code{method} 24 | argument" section below for details.} 25 | 26 | \item{probs}{Probabilities to compute HDRs for.} 27 | 28 | \item{n}{Resolution of grid representing estimated density and HDRs.} 29 | 30 | \item{rangex, rangey}{Range of grid representing estimated density and HDRs, 31 | along the x- and y-axes.} 32 | 33 | \item{hdr_membership}{Should HDR membership of data points (\code{data}) be 34 | computed? Defaults to \code{TRUE}, although it is computationally expensive for 35 | large data sets.} 36 | 37 | \item{fun}{Optional, a joint probability density function, must be vectorized 38 | in its first two arguments. See the "The \code{fun} argument" section below for 39 | details.} 40 | 41 | \item{args}{Optional, a list of arguments to be provided to \code{fun}.} 42 | } 43 | \value{ 44 | \code{get_hdr} returns a list with elements \code{df_est} (\code{data.frame}), \code{breaks} 45 | (named \code{numeric}), and \code{data} (\code{data.frame}). 46 | \itemize{ 47 | \item \code{df_est}: the estimated HDRs and density evaluated on the grid defined by \code{rangex}, \code{rangey}, and \code{n}. 48 | The column of estimated HDRs (\code{df_est$hdr}) is a numeric vector with values 49 | from \code{probs}. The columns \code{df_est$fhat} and \code{df_est$fhat_discretized} 50 | correspond to the estimated density on the original scale and rescaled to sum 51 | to 1, respectively. 52 | \item \code{breaks}: the heights of the estimated density (\code{df_est$fhat}) corresponding to the HDRs specified by \code{probs}. 53 | Will always have additional element \code{Inf} representing the cutoff for the 54 | 100\% HDR. 55 | \item \code{data}: the original data provided in the \code{data} argument. 56 | If \code{hdr_membership} is set to \code{TRUE}, this includes a column 57 | (\code{data$hdr_membership}) with the HDR corresponding to each data point. 58 | } 59 | } 60 | \description{ 61 | \code{get_hdr} is used to estimate a 2-dimensional density and compute 62 | corresponding HDRs. The estimated density and HDRs are represented in a 63 | discrete form as a grid, defined by arguments \code{rangex}, \code{rangey}, and \code{n}. 64 | \code{get_hdr} is used internally by layer functions \code{stat_hdr()}, 65 | \code{stat_hdr_points()}, \code{stat_hdr_fun()}, etc. 66 | } 67 | \section{The \code{method} argument}{ 68 | The density estimator used to estimate the 69 | HDRs is specified with the \code{method} argument. The simplest way to specify 70 | an estimator is to provide a character value to \code{method}, for example 71 | \code{method = "kde"} specifies a kernel density estimator. However, this 72 | specification is limited to the default behavior of the estimator. 73 | 74 | Instead, it is possible to provide a function call, for example: \code{method = method_kde()}. In many cases, these functions accept parameters governing 75 | the density estimation procedure. Here, \code{method_kde()} accepts parameters 76 | \code{h} and \code{adjust}, both related to the kernel's bandwidth. For details, see 77 | \code{?method_kde}. Every method of bivariate density estimation implemented has 78 | such corresponding \verb{method_*()} function, each with an associated help 79 | page. 80 | 81 | Note: \code{geom_hdr()} and other layer functions also have \code{method} arguments 82 | which behave in the same way. For more details on the use and 83 | implementation of the \verb{method_*()} functions, see \code{vignette("method", "ggdensity")}. 84 | } 85 | 86 | \section{The \code{fun} argument}{ 87 | If \code{method} is set to \code{"fun"}, \code{get_hdr()} 88 | expects a bivariate probability density function to be specified with the 89 | \code{fun} argument. It is required that \code{fun} be a function of at least two 90 | arguments (\code{x} and \code{y}). Beyond these first two arguments, \code{fun} can have 91 | arbitrarily many arguments; these can be set in \code{get_hdr()} as a named list 92 | via the \code{args} parameter. 93 | 94 | Note: \code{get_hdr()} requires that \code{fun} be vectorized in \code{x} and \code{y}. For an 95 | example of an appropriate choice of \code{fun}, see the final example below. 96 | } 97 | 98 | \examples{ 99 | df <- data.frame(x = rnorm(1e3), y = rnorm(1e3)) 100 | 101 | # Two ways to specify `method` 102 | get_hdr(df, method = "kde") 103 | get_hdr(df, method = method_kde()) 104 | 105 | \dontrun{ 106 | 107 | # If parenthesis are omitted, `get_hdr()` errors 108 | get_hdr(df, method = method_kde) 109 | } 110 | 111 | # Estimate different HDRs with `probs` 112 | get_hdr(df, method = method_kde(), probs = c(.975, .6, .2)) 113 | 114 | # Adjust estimator parameters with arguments to `method_kde()` 115 | get_hdr(df, method = method_kde(h = 1)) 116 | 117 | # Parametric normal estimator of density 118 | get_hdr(df, method = "mvnorm") 119 | get_hdr(df, method = method_mvnorm()) 120 | 121 | # Compute "population" HDRs of specified bivariate pdf with `method = "fun"` 122 | f <- function(x, y, sd_x = 1, sd_y = 1) dnorm(x, sd = sd_x) * dnorm(y, sd = sd_y) 123 | 124 | get_hdr( 125 | method = "fun", fun = f, 126 | rangex = c(-5, 5), rangey = c(-5, 5) 127 | ) 128 | 129 | get_hdr( 130 | method = "fun", fun = f, 131 | rangex = c(-5, 5), rangey = c(-5, 5), 132 | args = list(sd_x = .5, sd_y = .5) # specify additional arguments w/ `args` 133 | ) 134 | 135 | } 136 | -------------------------------------------------------------------------------- /R/hdr_fun.R: -------------------------------------------------------------------------------- 1 | #' Highest density regions of a bivariate pdf 2 | #' 3 | #' Compute and plot the highest density regions (HDRs) of a bivariate pdf. 4 | #' `geom_hdr_fun()` draws filled regions, and `geom_hdr_lines_fun()` draws lines outlining the regions. 5 | #' Note, the plotted objects have probabilities mapped to the `alpha` aesthetic by default. 6 | #' 7 | #' @section Aesthetics: `geom_hdr_fun()` and `geom_hdr_lines_fun()` understand the following aesthetics (required 8 | #' aesthetics are in bold): 9 | #' 10 | #' - x 11 | #' - y 12 | #' - alpha 13 | #' - color 14 | #' - fill (only `geom_hdr_fun`) 15 | #' - group 16 | #' - linetype 17 | #' - linewidth 18 | #' - subgroup 19 | #' 20 | #' @section Computed variables: 21 | #' 22 | #' \describe{ \item{probs}{The probability associated with the highest density region, specified 23 | #' by `probs`.} } 24 | #' 25 | #' @inheritParams ggplot2::geom_path 26 | #' @inheritParams ggplot2::stat_identity 27 | #' @inheritParams ggplot2::stat_density2d 28 | #' @param fun A function, the joint probability density function, must be 29 | #' vectorized in its first two arguments; see examples. 30 | #' @param args Named list of additional arguments passed on to `fun`. 31 | #' @param probs Probabilities to compute highest density regions for. 32 | #' @param n Resolution of grid `fun` is evaluated on. 33 | #' @param xlim,ylim Range to compute and draw regions. If `NULL`, defaults to 34 | #' range of data if present. 35 | #' @name geom_hdr_fun 36 | #' @rdname geom_hdr_fun 37 | #' 38 | #' @import ggplot2 39 | #' 40 | #' @examples 41 | #' # HDRs of the bivariate exponential 42 | #' f <- function(x, y) dexp(x) * dexp(y) 43 | #' ggplot() + geom_hdr_fun(fun = f, xlim = c(0, 10), ylim = c(0, 10)) 44 | #' 45 | #' 46 | #' # HDRs of a custom parametric model 47 | #' 48 | #' # generate example data 49 | #' n <- 1000 50 | #' th_true <- c(3, 8) 51 | #' 52 | #' rdata <- function(n, th) { 53 | #' gen_single_obs <- function(th) { 54 | #' rchisq(2, df = th) # can be anything 55 | #' } 56 | #' df <- replicate(n, gen_single_obs(th)) 57 | #' setNames(as.data.frame(t(df)), c("x", "y")) 58 | #' } 59 | #' data <- rdata(n, th_true) 60 | #' 61 | #' # estimate unknown parameters via maximum likelihood 62 | #' likelihood <- function(th) { 63 | #' th <- abs(th) # hack to enforce parameter space boundary 64 | #' log_f <- function(v) { 65 | #' x <- v[1]; y <- v[2] 66 | #' dchisq(x, df = th[1], log = TRUE) + dchisq(y, df = th[2], log = TRUE) 67 | #' } 68 | #' sum(apply(data, 1, log_f)) 69 | #' } 70 | #' (th_hat <- optim(c(1, 1), likelihood, control = list(fnscale = -1))$par) 71 | #' 72 | #' # plot f for the give model 73 | #' f <- function(x, y, th) dchisq(x, df = th[1]) * dchisq(y, df = th[2]) 74 | #' 75 | #' ggplot(data, aes(x, y)) + 76 | #' geom_hdr_fun(fun = f, args = list(th = th_hat)) + 77 | #' geom_point(size = .25, color = "red") + 78 | #' xlim(0, 30) + ylim(c(0, 30)) 79 | #' 80 | #' ggplot(data, aes(x, y)) + 81 | #' geom_hdr_lines_fun(fun = f, args = list(th = th_hat)) + 82 | #' geom_point(size = .25, color = "red") + 83 | #' xlim(0, 30) + ylim(c(0, 30)) 84 | #' 85 | #' 86 | NULL 87 | 88 | 89 | 90 | 91 | 92 | 93 | #' @rdname geom_hdr_fun 94 | #' @export 95 | stat_hdr_fun <- function(mapping = NULL, data = NULL, 96 | geom = "hdr_fun", position = "identity", 97 | ..., 98 | fun, args = list(), 99 | probs = c(.99, .95, .8, .5), 100 | xlim = NULL, ylim = NULL, n = 100, 101 | na.rm = FALSE, 102 | show.legend = NA, 103 | inherit.aes = TRUE) { 104 | 105 | if (is.null(data)) data <- ensure_nonempty_data 106 | 107 | layer( 108 | data = data, 109 | mapping = mapping, 110 | stat = StatHdrFun, 111 | geom = geom, 112 | position = position, 113 | show.legend = show.legend, 114 | inherit.aes = inherit.aes, 115 | params = list( 116 | fun = fun, 117 | args = args, 118 | probs = probs, 119 | xlim = xlim, 120 | ylim = ylim, 121 | n = n, 122 | na.rm = na.rm, 123 | ... 124 | ) 125 | ) 126 | } 127 | 128 | 129 | 130 | #' @rdname geom_hdr_fun 131 | #' @format NULL 132 | #' @usage NULL 133 | #' @importFrom scales percent 134 | #' @export 135 | StatHdrFun <- ggproto("StatHdrFun", Stat, 136 | 137 | default_aes = aes(order = after_stat(probs), alpha = after_stat(probs)), 138 | 139 | output = "bands", 140 | 141 | # very similar to StatHdr$compute_group(), 142 | # only difference are the parameters fun + args (vs. method + parameters) 143 | # -- this prevents factoring into one compute_group() method, 144 | # compute_group()'s arguments are different. 145 | compute_group = function(self, data, scales, na.rm = FALSE, 146 | fun, args = list(), probs = c(.99, .95, .8, .5), 147 | n = 100, xlim = NULL, ylim = NULL) { 148 | 149 | if ((is.null(xlim) & is.null(scales$x)) | (is.null(ylim) & is.null(scales$y))) { 150 | stop("If no data is provided to StatHdrFun, xlim and ylim must be specified") 151 | } 152 | 153 | rangex <- xlim %||% scales$x$dimension() 154 | rangey <- ylim %||% scales$y$dimension() 155 | 156 | # Only calculate HDR membership if we need to 157 | need_membership <- (self$output == "points") 158 | 159 | res <- get_hdr(data, method = "fun", probs, n, rangex, rangey, hdr_membership = need_membership, fun = fun, args = args) 160 | 161 | res_to_df(res, probs, data$group[1], self$output) 162 | 163 | } 164 | ) 165 | 166 | 167 | #' @rdname geom_hdr_fun 168 | #' @export 169 | geom_hdr_fun <- function(mapping = NULL, data = NULL, 170 | stat = "hdr_fun", position = "identity", 171 | ..., 172 | na.rm = FALSE, 173 | show.legend = NA, 174 | inherit.aes = TRUE) { 175 | 176 | if (is.null(data)) data <- ensure_nonempty_data 177 | 178 | layer( 179 | data = data, 180 | mapping = mapping, 181 | stat = stat, 182 | geom = GeomHdrFun, 183 | position = position, 184 | show.legend = show.legend, 185 | inherit.aes = inherit.aes, 186 | params = list( 187 | na.rm = na.rm, 188 | ... 189 | ) 190 | ) 191 | } 192 | 193 | 194 | 195 | #' @rdname geom_hdr_fun 196 | #' @format NULL 197 | #' @usage NULL 198 | #' @export 199 | GeomHdrFun <- ggproto("GeomHdrFun", GeomHdr) 200 | -------------------------------------------------------------------------------- /man/geom_hdr_points_fun.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/hdr_points_fun.R 3 | \docType{data} 4 | \name{geom_hdr_points_fun} 5 | \alias{geom_hdr_points_fun} 6 | \alias{stat_hdr_points_fun} 7 | \alias{StatHdrPointsFun} 8 | \title{Scatterplot colored by highest density regions of a bivariate pdf} 9 | \usage{ 10 | stat_hdr_points_fun( 11 | mapping = NULL, 12 | data = NULL, 13 | geom = "point", 14 | position = "identity", 15 | ..., 16 | fun, 17 | args = list(), 18 | probs = c(0.99, 0.95, 0.8, 0.5), 19 | xlim = NULL, 20 | ylim = NULL, 21 | n = 100, 22 | na.rm = FALSE, 23 | show.legend = NA, 24 | inherit.aes = TRUE 25 | ) 26 | 27 | geom_hdr_points_fun( 28 | mapping = NULL, 29 | data = NULL, 30 | stat = "hdr_points_fun", 31 | position = "identity", 32 | ..., 33 | na.rm = FALSE, 34 | show.legend = NA, 35 | inherit.aes = TRUE 36 | ) 37 | } 38 | \arguments{ 39 | \item{mapping}{Set of aesthetic mappings created by \code{\link[ggplot2:aes]{aes()}}. If specified and 40 | \code{inherit.aes = TRUE} (the default), it is combined with the default mapping 41 | at the top level of the plot. You must supply \code{mapping} if there is no plot 42 | mapping.} 43 | 44 | \item{data}{The data to be displayed in this layer. There are three 45 | options: 46 | 47 | If \code{NULL}, the default, the data is inherited from the plot 48 | data as specified in the call to \code{\link[ggplot2:ggplot]{ggplot()}}. 49 | 50 | A \code{data.frame}, or other object, will override the plot 51 | data. All objects will be fortified to produce a data frame. See 52 | \code{\link[ggplot2:fortify]{fortify()}} for which variables will be created. 53 | 54 | A \code{function} will be called with a single argument, 55 | the plot data. The return value must be a \code{data.frame}, and 56 | will be used as the layer data. A \code{function} can be created 57 | from a \code{formula} (e.g. \code{~ head(.x, 10)}).} 58 | 59 | \item{geom}{The geometric object to use to display the data, either as a 60 | \code{ggproto} \code{Geom} subclass or as a string naming the geom stripped of the 61 | \code{geom_} prefix (e.g. \code{"point"} rather than \code{"geom_point"})} 62 | 63 | \item{position}{Position adjustment, either as a string naming the adjustment 64 | (e.g. \code{"jitter"} to use \code{position_jitter}), or the result of a call to a 65 | position adjustment function. Use the latter if you need to change the 66 | settings of the adjustment.} 67 | 68 | \item{...}{Other arguments passed on to \code{\link[ggplot2:layer]{layer()}}. These are 69 | often aesthetics, used to set an aesthetic to a fixed value, like 70 | \code{colour = "red"} or \code{size = 3}. They may also be parameters 71 | to the paired geom/stat.} 72 | 73 | \item{fun}{A function, the joint probability density function, must be 74 | vectorized in its first two arguments; see examples.} 75 | 76 | \item{args}{Named list of additional arguments passed on to \code{fun}.} 77 | 78 | \item{probs}{Probabilities to compute highest density regions for.} 79 | 80 | \item{xlim, ylim}{Range to compute and draw regions. If \code{NULL}, defaults to 81 | range of data if present.} 82 | 83 | \item{n}{Number of grid points in each direction.} 84 | 85 | \item{na.rm}{If \code{FALSE}, the default, missing values are removed with 86 | a warning. If \code{TRUE}, missing values are silently removed.} 87 | 88 | \item{show.legend}{logical. Should this layer be included in the legends? 89 | \code{NA}, the default, includes if any aesthetics are mapped. 90 | \code{FALSE} never includes, and \code{TRUE} always includes. 91 | It can also be a named logical vector to finely select the aesthetics to 92 | display.} 93 | 94 | \item{inherit.aes}{If \code{FALSE}, overrides the default aesthetics, 95 | rather than combining with them. This is most useful for helper functions 96 | that define both data and aesthetics and shouldn't inherit behaviour from 97 | the default plot specification, e.g. \code{\link[ggplot2:borders]{borders()}}.} 98 | 99 | \item{stat}{The statistical transformation to use on the data for this 100 | layer, either as a \code{ggproto} \code{Geom} subclass or as a string naming the 101 | stat stripped of the \code{stat_} prefix (e.g. \code{"count"} rather than 102 | \code{"stat_count"})} 103 | } 104 | \description{ 105 | Compute the highest density regions (HDRs) of a bivariate pdf and plot the provided 106 | data as a scatterplot with points colored according to their corresponding HDR. 107 | } 108 | \section{Aesthetics}{ 109 | geom_hdr_points_fun understands the following aesthetics 110 | (required aesthetics are in bold): 111 | \itemize{ 112 | \item \strong{x} 113 | \item \strong{y} 114 | \item alpha 115 | \item color 116 | \item fill 117 | \item group 118 | \item linetype 119 | \item size 120 | \item subgroup 121 | } 122 | } 123 | 124 | \section{Computed variables}{ 125 | 126 | 127 | \describe{ \item{probs}{The probability associated with the highest density region, specified 128 | by \code{probs}.} } 129 | } 130 | 131 | \examples{ 132 | # Can plot points colored according to known pdf: 133 | set.seed(1) 134 | df <- data.frame(x = rexp(1000), y = rexp(1000)) 135 | f <- function(x, y) dexp(x) * dexp(y) 136 | 137 | ggplot(df, aes(x, y)) + 138 | geom_hdr_points_fun(fun = f, xlim = c(0, 10), ylim = c(0, 10)) 139 | 140 | 141 | # Also allows for hdrs of a custom parametric model 142 | 143 | # generate example data 144 | n <- 1000 145 | th_true <- c(3, 8) 146 | 147 | rdata <- function(n, th) { 148 | gen_single_obs <- function(th) { 149 | rchisq(2, df = th) # can be anything 150 | } 151 | df <- replicate(n, gen_single_obs(th)) 152 | setNames(as.data.frame(t(df)), c("x", "y")) 153 | } 154 | data <- rdata(n, th_true) 155 | 156 | # estimate unknown parameters via maximum likelihood 157 | likelihood <- function(th) { 158 | th <- abs(th) # hack to enforce parameter space boundary 159 | log_f <- function(v) { 160 | x <- v[1]; y <- v[2] 161 | dchisq(x, df = th[1], log = TRUE) + dchisq(y, df = th[2], log = TRUE) 162 | } 163 | sum(apply(data, 1, log_f)) 164 | } 165 | (th_hat <- optim(c(1, 1), likelihood, control = list(fnscale = -1))$par) 166 | 167 | # plot f for the give model 168 | f <- function(x, y, th) dchisq(x, df = th[1]) * dchisq(y, df = th[2]) 169 | 170 | ggplot(data, aes(x, y)) + 171 | geom_hdr_points_fun(fun = f, args = list(th = th_hat)) 172 | 173 | ggplot(data, aes(x, y)) + 174 | geom_hdr_points_fun(aes(fill = after_stat(probs)), shape = 21, color = "black", 175 | fun = f, args = list(th = th_hat), na.rm = TRUE) + 176 | geom_hdr_lines_fun(aes(color = after_stat(probs)), alpha = 1, fun = f, args = list(th = th_hat)) + 177 | lims(x = c(0, 15), y = c(0, 25)) 178 | 179 | } 180 | \keyword{datasets} 181 | -------------------------------------------------------------------------------- /man/geom_hdr_fun.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/hdr_fun.R, R/hdr_lines_fun.R 3 | \docType{data} 4 | \name{geom_hdr_fun} 5 | \alias{geom_hdr_fun} 6 | \alias{stat_hdr_fun} 7 | \alias{StatHdrFun} 8 | \alias{GeomHdrFun} 9 | \alias{stat_hdr_lines_fun} 10 | \alias{StatHdrLinesFun} 11 | \alias{geom_hdr_lines_fun} 12 | \alias{GeomHdrLinesFun} 13 | \title{Highest density regions of a bivariate pdf} 14 | \usage{ 15 | stat_hdr_fun( 16 | mapping = NULL, 17 | data = NULL, 18 | geom = "hdr_fun", 19 | position = "identity", 20 | ..., 21 | fun, 22 | args = list(), 23 | probs = c(0.99, 0.95, 0.8, 0.5), 24 | xlim = NULL, 25 | ylim = NULL, 26 | n = 100, 27 | na.rm = FALSE, 28 | show.legend = NA, 29 | inherit.aes = TRUE 30 | ) 31 | 32 | geom_hdr_fun( 33 | mapping = NULL, 34 | data = NULL, 35 | stat = "hdr_fun", 36 | position = "identity", 37 | ..., 38 | na.rm = FALSE, 39 | show.legend = NA, 40 | inherit.aes = TRUE 41 | ) 42 | } 43 | \arguments{ 44 | \item{mapping}{Set of aesthetic mappings created by \code{\link[ggplot2:aes]{aes()}}. If specified and 45 | \code{inherit.aes = TRUE} (the default), it is combined with the default mapping 46 | at the top level of the plot. You must supply \code{mapping} if there is no plot 47 | mapping.} 48 | 49 | \item{data}{The data to be displayed in this layer. There are three 50 | options: 51 | 52 | If \code{NULL}, the default, the data is inherited from the plot 53 | data as specified in the call to \code{\link[ggplot2:ggplot]{ggplot()}}. 54 | 55 | A \code{data.frame}, or other object, will override the plot 56 | data. All objects will be fortified to produce a data frame. See 57 | \code{\link[ggplot2:fortify]{fortify()}} for which variables will be created. 58 | 59 | A \code{function} will be called with a single argument, 60 | the plot data. The return value must be a \code{data.frame}, and 61 | will be used as the layer data. A \code{function} can be created 62 | from a \code{formula} (e.g. \code{~ head(.x, 10)}).} 63 | 64 | \item{geom}{The geometric object to use to display the data, either as a 65 | \code{ggproto} \code{Geom} subclass or as a string naming the geom stripped of the 66 | \code{geom_} prefix (e.g. \code{"point"} rather than \code{"geom_point"})} 67 | 68 | \item{position}{Position adjustment, either as a string naming the adjustment 69 | (e.g. \code{"jitter"} to use \code{position_jitter}), or the result of a call to a 70 | position adjustment function. Use the latter if you need to change the 71 | settings of the adjustment.} 72 | 73 | \item{...}{Other arguments passed on to \code{\link[ggplot2:layer]{layer()}}. These are 74 | often aesthetics, used to set an aesthetic to a fixed value, like 75 | \code{colour = "red"} or \code{size = 3}. They may also be parameters 76 | to the paired geom/stat.} 77 | 78 | \item{fun}{A function, the joint probability density function, must be 79 | vectorized in its first two arguments; see examples.} 80 | 81 | \item{args}{Named list of additional arguments passed on to \code{fun}.} 82 | 83 | \item{probs}{Probabilities to compute highest density regions for.} 84 | 85 | \item{xlim, ylim}{Range to compute and draw regions. If \code{NULL}, defaults to 86 | range of data if present.} 87 | 88 | \item{n}{Resolution of grid \code{fun} is evaluated on.} 89 | 90 | \item{na.rm}{If \code{FALSE}, the default, missing values are removed with 91 | a warning. If \code{TRUE}, missing values are silently removed.} 92 | 93 | \item{show.legend}{logical. Should this layer be included in the legends? 94 | \code{NA}, the default, includes if any aesthetics are mapped. 95 | \code{FALSE} never includes, and \code{TRUE} always includes. 96 | It can also be a named logical vector to finely select the aesthetics to 97 | display.} 98 | 99 | \item{inherit.aes}{If \code{FALSE}, overrides the default aesthetics, 100 | rather than combining with them. This is most useful for helper functions 101 | that define both data and aesthetics and shouldn't inherit behaviour from 102 | the default plot specification, e.g. \code{\link[ggplot2:borders]{borders()}}.} 103 | 104 | \item{stat}{The statistical transformation to use on the data for this 105 | layer, either as a \code{ggproto} \code{Geom} subclass or as a string naming the 106 | stat stripped of the \code{stat_} prefix (e.g. \code{"count"} rather than 107 | \code{"stat_count"})} 108 | } 109 | \description{ 110 | Compute and plot the highest density regions (HDRs) of a bivariate pdf. 111 | \code{geom_hdr_fun()} draws filled regions, and \code{geom_hdr_lines_fun()} draws lines outlining the regions. 112 | Note, the plotted objects have probabilities mapped to the \code{alpha} aesthetic by default. 113 | } 114 | \section{Aesthetics}{ 115 | \code{geom_hdr_fun()} and \code{geom_hdr_lines_fun()} understand the following aesthetics (required 116 | aesthetics are in bold): 117 | \itemize{ 118 | \item x 119 | \item y 120 | \item alpha 121 | \item color 122 | \item fill (only \code{geom_hdr_fun}) 123 | \item group 124 | \item linetype 125 | \item linewidth 126 | \item subgroup 127 | } 128 | } 129 | 130 | \section{Computed variables}{ 131 | 132 | 133 | \describe{ \item{probs}{The probability associated with the highest density region, specified 134 | by \code{probs}.} } 135 | } 136 | 137 | \examples{ 138 | # HDRs of the bivariate exponential 139 | f <- function(x, y) dexp(x) * dexp(y) 140 | ggplot() + geom_hdr_fun(fun = f, xlim = c(0, 10), ylim = c(0, 10)) 141 | 142 | 143 | # HDRs of a custom parametric model 144 | 145 | # generate example data 146 | n <- 1000 147 | th_true <- c(3, 8) 148 | 149 | rdata <- function(n, th) { 150 | gen_single_obs <- function(th) { 151 | rchisq(2, df = th) # can be anything 152 | } 153 | df <- replicate(n, gen_single_obs(th)) 154 | setNames(as.data.frame(t(df)), c("x", "y")) 155 | } 156 | data <- rdata(n, th_true) 157 | 158 | # estimate unknown parameters via maximum likelihood 159 | likelihood <- function(th) { 160 | th <- abs(th) # hack to enforce parameter space boundary 161 | log_f <- function(v) { 162 | x <- v[1]; y <- v[2] 163 | dchisq(x, df = th[1], log = TRUE) + dchisq(y, df = th[2], log = TRUE) 164 | } 165 | sum(apply(data, 1, log_f)) 166 | } 167 | (th_hat <- optim(c(1, 1), likelihood, control = list(fnscale = -1))$par) 168 | 169 | # plot f for the give model 170 | f <- function(x, y, th) dchisq(x, df = th[1]) * dchisq(y, df = th[2]) 171 | 172 | ggplot(data, aes(x, y)) + 173 | geom_hdr_fun(fun = f, args = list(th = th_hat)) + 174 | geom_point(size = .25, color = "red") + 175 | xlim(0, 30) + ylim(c(0, 30)) 176 | 177 | ggplot(data, aes(x, y)) + 178 | geom_hdr_lines_fun(fun = f, args = list(th = th_hat)) + 179 | geom_point(size = .25, color = "red") + 180 | xlim(0, 30) + ylim(c(0, 30)) 181 | 182 | 183 | } 184 | \keyword{datasets} 185 | -------------------------------------------------------------------------------- /man/geom_hdr_rug_fun.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/hdr_rug_fun.R 3 | \docType{data} 4 | \name{geom_hdr_rug_fun} 5 | \alias{geom_hdr_rug_fun} 6 | \alias{stat_hdr_rug_fun} 7 | \alias{StatHdrRugFun} 8 | \alias{GeomHdrRugFun} 9 | \title{Rug plots of highest density region estimates of univariate pdfs} 10 | \usage{ 11 | stat_hdr_rug_fun( 12 | mapping = NULL, 13 | data = NULL, 14 | geom = "hdr_rug_fun", 15 | position = "identity", 16 | ..., 17 | fun_x = NULL, 18 | fun_y = NULL, 19 | args_x = list(), 20 | args_y = list(), 21 | probs = c(0.99, 0.95, 0.8, 0.5), 22 | xlim = NULL, 23 | ylim = NULL, 24 | n = 512, 25 | na.rm = FALSE, 26 | show.legend = NA, 27 | inherit.aes = TRUE 28 | ) 29 | 30 | geom_hdr_rug_fun( 31 | mapping = NULL, 32 | data = NULL, 33 | stat = "hdr_rug_fun", 34 | position = "identity", 35 | ..., 36 | outside = FALSE, 37 | sides = "bl", 38 | length = unit(0.03, "npc"), 39 | na.rm = FALSE, 40 | show.legend = TRUE, 41 | inherit.aes = TRUE 42 | ) 43 | } 44 | \arguments{ 45 | \item{mapping}{Set of aesthetic mappings created by \code{\link[ggplot2:aes]{aes()}}. If specified and 46 | \code{inherit.aes = TRUE} (the default), it is combined with the default mapping 47 | at the top level of the plot. You must supply \code{mapping} if there is no plot 48 | mapping.} 49 | 50 | \item{data}{The data to be displayed in this layer. There are three 51 | options: 52 | 53 | If \code{NULL}, the default, the data is inherited from the plot 54 | data as specified in the call to \code{\link[ggplot2:ggplot]{ggplot()}}. 55 | 56 | A \code{data.frame}, or other object, will override the plot 57 | data. All objects will be fortified to produce a data frame. See 58 | \code{\link[ggplot2:fortify]{fortify()}} for which variables will be created. 59 | 60 | A \code{function} will be called with a single argument, 61 | the plot data. The return value must be a \code{data.frame}, and 62 | will be used as the layer data. A \code{function} can be created 63 | from a \code{formula} (e.g. \code{~ head(.x, 10)}).} 64 | 65 | \item{geom}{The geometric object to use to display the data, either as a 66 | \code{ggproto} \code{Geom} subclass or as a string naming the geom stripped of the 67 | \code{geom_} prefix (e.g. \code{"point"} rather than \code{"geom_point"})} 68 | 69 | \item{position}{Position adjustment, either as a string naming the adjustment 70 | (e.g. \code{"jitter"} to use \code{position_jitter}), or the result of a call to a 71 | position adjustment function. Use the latter if you need to change the 72 | settings of the adjustment.} 73 | 74 | \item{...}{Other arguments passed on to \code{\link[ggplot2:layer]{layer()}}. These are 75 | often aesthetics, used to set an aesthetic to a fixed value, like 76 | \code{colour = "red"} or \code{size = 3}. They may also be parameters 77 | to the paired geom/stat.} 78 | 79 | \item{fun_x, fun_y}{Functions, the univariate probability density function for the x- and/or y-axis. 80 | First argument must be vectorized.} 81 | 82 | \item{args_x, args_y}{Named list of additional arguments passed on to \code{fun_x} and/or \code{fun_y}.} 83 | 84 | \item{probs}{Probabilities to compute highest density regions for.} 85 | 86 | \item{xlim, ylim}{Range to compute and draw regions. If \code{NULL}, defaults to 87 | range of data.} 88 | 89 | \item{n}{Resolution of grid defined by \code{xlim} and \code{ylim}. 90 | Ignored if \code{method = "histogram"} or \code{method = "freqpoly"}.} 91 | 92 | \item{na.rm}{If \code{FALSE}, the default, missing values are removed with 93 | a warning. If \code{TRUE}, missing values are silently removed.} 94 | 95 | \item{show.legend}{logical. Should this layer be included in the legends? 96 | \code{NA}, the default, includes if any aesthetics are mapped. 97 | \code{FALSE} never includes, and \code{TRUE} always includes. 98 | It can also be a named logical vector to finely select the aesthetics to 99 | display.} 100 | 101 | \item{inherit.aes}{If \code{FALSE}, overrides the default aesthetics, 102 | rather than combining with them. This is most useful for helper functions 103 | that define both data and aesthetics and shouldn't inherit behaviour from 104 | the default plot specification, e.g. \code{\link[ggplot2:borders]{borders()}}.} 105 | 106 | \item{stat}{The statistical transformation to use on the data for this 107 | layer, either as a \code{ggproto} \code{Geom} subclass or as a string naming the 108 | stat stripped of the \code{stat_} prefix (e.g. \code{"count"} rather than 109 | \code{"stat_count"})} 110 | 111 | \item{outside}{logical that controls whether to move the rug tassels outside of the plot area. Default is off (FALSE). You will also need to use \code{coord_cartesian(clip = "off")}. When set to TRUE, also consider changing the sides argument to "tr". See examples.} 112 | 113 | \item{sides}{A string that controls which sides of the plot the rugs appear on. 114 | It can be set to a string containing any of \code{"trbl"}, for top, right, 115 | bottom, and left.} 116 | 117 | \item{length}{A \code{\link[grid:unit]{grid::unit()}} object that sets the length of the rug lines. Use scale expansion to avoid overplotting of data.} 118 | } 119 | \description{ 120 | Compute and plot the highest density regions (HDRs) of specified univariate pdf(s). 121 | Note, the plotted objects have probabilities mapped to the \code{alpha} aesthetic by default. 122 | } 123 | \section{Aesthetics}{ 124 | \code{geom_hdr_rug_fun()} understands the following aesthetics (required 125 | aesthetics are in bold): 126 | \itemize{ 127 | \item x 128 | \item y 129 | \item alpha 130 | \item fill 131 | \item group 132 | \item subgroup 133 | } 134 | } 135 | 136 | \section{Computed variables}{ 137 | 138 | 139 | \describe{ \item{probs}{The probability of the highest density region, specified 140 | by \code{probs}, corresponding to each point.} } 141 | } 142 | 143 | \examples{ 144 | # Plotting data with exponential marginals 145 | df <- data.frame(x = rexp(1e3), y = rexp(1e3)) 146 | 147 | ggplot(df, aes(x, y)) + 148 | geom_hdr_rug_fun(fun_x = dexp, fun_y = dexp) + 149 | geom_point(size = .5) + 150 | coord_fixed() 151 | 152 | # without data/aesthetic mappings 153 | ggplot() + 154 | geom_hdr_rug_fun(fun_x = dexp, fun_y = dexp, xlim = c(0, 7), ylim = c(0, 7)) + 155 | coord_fixed() 156 | 157 | 158 | # Plotting univariate normal data, estimating mean and sd 159 | df <- data.frame(x = rnorm(1e4, mean = 1, sd = 3)) 160 | 161 | # estimating parameters 162 | mu_hat <- mean(df$x) 163 | sd_hat <- sd(df$x) 164 | 165 | ggplot(df, aes(x)) + 166 | geom_hdr_rug_fun(fun_x = dnorm, args_x = list(mean = mu_hat, sd = sd_hat)) + 167 | geom_density() 168 | 169 | # Equivalent to `method_norm_1d()` with `geom_hdr_rug()` 170 | ggplot(df, aes(x)) + 171 | geom_hdr_rug(method = method_norm_1d()) + 172 | geom_density() 173 | } 174 | \keyword{datasets} 175 | -------------------------------------------------------------------------------- /R/hdr_rug_fun.R: -------------------------------------------------------------------------------- 1 | #' Rug plots of highest density region estimates of univariate pdfs 2 | #' 3 | #' Compute and plot the highest density regions (HDRs) of specified univariate pdf(s). 4 | #' Note, the plotted objects have probabilities mapped to the `alpha` aesthetic by default. 5 | #' 6 | #' @section Aesthetics: `geom_hdr_rug_fun()` understands the following aesthetics (required 7 | #' aesthetics are in bold): 8 | #' 9 | #' - x 10 | #' - y 11 | #' - alpha 12 | #' - fill 13 | #' - group 14 | #' - subgroup 15 | #' 16 | #' @section Computed variables: 17 | #' 18 | #' \describe{ \item{probs}{The probability of the highest density region, specified 19 | #' by `probs`, corresponding to each point.} } 20 | #' 21 | #' @inheritParams ggplot2::geom_rug 22 | #' @inheritParams stat_hdr_rug 23 | #' @param fun_x,fun_y Functions, the univariate probability density function for the x- and/or y-axis. 24 | #' First argument must be vectorized. 25 | #' @param args_x,args_y Named list of additional arguments passed on to `fun_x` and/or `fun_y`. 26 | #' @name geom_hdr_rug_fun 27 | #' @rdname geom_hdr_rug_fun 28 | #' 29 | #' @examples 30 | #' # Plotting data with exponential marginals 31 | #' df <- data.frame(x = rexp(1e3), y = rexp(1e3)) 32 | #' 33 | #' ggplot(df, aes(x, y)) + 34 | #' geom_hdr_rug_fun(fun_x = dexp, fun_y = dexp) + 35 | #' geom_point(size = .5) + 36 | #' coord_fixed() 37 | #' 38 | #' # without data/aesthetic mappings 39 | #' ggplot() + 40 | #' geom_hdr_rug_fun(fun_x = dexp, fun_y = dexp, xlim = c(0, 7), ylim = c(0, 7)) + 41 | #' coord_fixed() 42 | #' 43 | #' 44 | #' # Plotting univariate normal data, estimating mean and sd 45 | #' df <- data.frame(x = rnorm(1e4, mean = 1, sd = 3)) 46 | #' 47 | #' # estimating parameters 48 | #' mu_hat <- mean(df$x) 49 | #' sd_hat <- sd(df$x) 50 | #' 51 | #' ggplot(df, aes(x)) + 52 | #' geom_hdr_rug_fun(fun_x = dnorm, args_x = list(mean = mu_hat, sd = sd_hat)) + 53 | #' geom_density() 54 | #' 55 | #' # Equivalent to `method_norm_1d()` with `geom_hdr_rug()` 56 | #' ggplot(df, aes(x)) + 57 | #' geom_hdr_rug(method = method_norm_1d()) + 58 | #' geom_density() 59 | NULL 60 | 61 | 62 | #' @rdname geom_hdr_rug_fun 63 | #' @export 64 | stat_hdr_rug_fun <- function(mapping = NULL, data = NULL, 65 | geom = "hdr_rug_fun", position = "identity", 66 | ..., 67 | fun_x = NULL, fun_y = NULL, 68 | args_x = list(), args_y = list(), 69 | probs = c(.99, .95, .8, .5), 70 | xlim = NULL, ylim = NULL, n = 512, 71 | na.rm = FALSE, 72 | show.legend = NA, 73 | inherit.aes = TRUE) { 74 | 75 | if (is.null(data)) data <- ensure_nonempty_data 76 | 77 | layer( 78 | data = data, 79 | mapping = mapping, 80 | stat = StatHdrRugFun, 81 | geom = geom, 82 | position = position, 83 | show.legend = show.legend, 84 | inherit.aes = inherit.aes, 85 | params = list( 86 | fun_x = fun_x, 87 | fun_y = fun_y, 88 | args_x = args_x, 89 | args_y = args_y, 90 | probs = probs, 91 | xlim = xlim, 92 | ylim = ylim, 93 | n = n, 94 | na.rm = na.rm, 95 | ... 96 | ) 97 | ) 98 | } 99 | 100 | 101 | 102 | #' @rdname geom_hdr_rug_fun 103 | #' @format NULL 104 | #' @usage NULL 105 | #' @export 106 | StatHdrRugFun <- ggproto("StatHdrRugFun", Stat, 107 | 108 | default_aes = aes(order = after_stat(probs), alpha = after_stat(probs)), 109 | # if fun_x or fun_y are unspecified data might be dropped 110 | dropped_aes = c("x", "y"), 111 | 112 | # very similar to StatHdrRug$compute_group(), 113 | # only difference are the parameters fun + args (vs. method + parameters) 114 | # -- this prevents factoring into one compute_group() method, 115 | # compute_group()'s arguments are different. 116 | compute_group = function(self, data, scales, na.rm = FALSE, 117 | fun_x = NULL, fun_y = NULL, args_x = list(), args_y = list(), 118 | probs = c(.99, .95, .8, .5), 119 | n = 512, xlim = NULL, ylim = NULL) { 120 | 121 | 122 | # Recycle for both x, y 123 | if (length(n) == 1) n <- rep(n, 2) 124 | 125 | # Estimate marginal densities 126 | 127 | # Initialize dfs for x and y axes, 128 | # in case only x or y are supplied: 129 | df_x <- data.frame() 130 | df_y <- data.frame() 131 | 132 | 133 | if (!is.null(fun_x)) { 134 | 135 | if (is.null(xlim) & is.null(scales$x)) { 136 | stop("`xlim` must be specified if `x` aesthetic not provided to `StatHdrRugFun`") 137 | } 138 | 139 | rangex <- xlim %||% scales$x$dimension() 140 | 141 | res_x <- get_hdr_1d(data$x, method = "fun", probs, n[1], rangex, hdr_membership = FALSE, fun = fun_x, args = args_x) 142 | 143 | df_x <- res_to_df_1d(res_x, probs, data$group[1], output = "rug") 144 | 145 | # Needs correct name for ggplot2 internals 146 | df_x$axis <- "x" 147 | df_x$y <- NA 148 | 149 | } 150 | 151 | 152 | if (!is.null(fun_y)) { 153 | 154 | if (is.null(ylim) & is.null(scales$y)) { 155 | stop("`ylim` must be specified if `y` aesthetic not provided to `StatHdrRugFun`") 156 | } 157 | 158 | rangey <- ylim %||% scales$y$dimension() 159 | 160 | res_y <- get_hdr_1d(data$y, method = "fun", probs, n[1], rangey, hdr_membership = FALSE, fun = fun_y, args = args_y) 161 | 162 | df_y <- res_to_df_1d(res_y, probs, data$group[1], output = "rug") 163 | 164 | # Needs correct name for ggplot2 internals 165 | df_y$axis <- "y" 166 | df_y$y <- df_y$x 167 | df_y$x <- NA 168 | 169 | } 170 | 171 | df <- rbind(df_x, df_y) 172 | 173 | # Need to remove extra col if only plotting x or y rug 174 | if (is.null(fun_x)) df$x <- NULL 175 | if (is.null(fun_y)) df$y <- NULL 176 | 177 | df 178 | 179 | 180 | } 181 | ) 182 | 183 | 184 | 185 | #' @rdname geom_hdr_rug_fun 186 | #' @export 187 | geom_hdr_rug_fun <- function(mapping = NULL, data = NULL, 188 | stat = "hdr_rug_fun", position = "identity", 189 | ..., 190 | outside = FALSE, 191 | sides = "bl", 192 | length = unit(0.03, "npc"), 193 | na.rm = FALSE, 194 | show.legend = TRUE, 195 | inherit.aes = TRUE) { 196 | 197 | if (is.null(data)) data <- ensure_nonempty_data 198 | 199 | layer( 200 | data = data, 201 | mapping = mapping, 202 | stat = stat, 203 | geom = GeomHdrRugFun, 204 | position = position, 205 | show.legend = show.legend, 206 | inherit.aes = inherit.aes, 207 | params = list( 208 | outside = outside, 209 | sides = sides, 210 | length = length, 211 | na.rm = na.rm, 212 | ... 213 | ) 214 | ) 215 | } 216 | 217 | 218 | #' @rdname geom_hdr_rug_fun 219 | #' @format NULL 220 | #' @usage NULL 221 | #' @export 222 | GeomHdrRugFun <- ggproto("GeomHdrRugFun", GeomHdrRug) 223 | 224 | 225 | 226 | 227 | 228 | -------------------------------------------------------------------------------- /man/geom_hdr_rug.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/hdr_rug.R 3 | \docType{data} 4 | \name{geom_hdr_rug} 5 | \alias{geom_hdr_rug} 6 | \alias{stat_hdr_rug} 7 | \alias{StatHdrRug} 8 | \alias{GeomHdrRug} 9 | \title{Rug plots of marginal highest density region estimates} 10 | \usage{ 11 | stat_hdr_rug( 12 | mapping = NULL, 13 | data = NULL, 14 | geom = "hdr_rug", 15 | position = "identity", 16 | ..., 17 | method = "kde", 18 | method_y = "kde", 19 | probs = c(0.99, 0.95, 0.8, 0.5), 20 | xlim = NULL, 21 | ylim = NULL, 22 | n = 512, 23 | na.rm = FALSE, 24 | show.legend = TRUE, 25 | inherit.aes = TRUE 26 | ) 27 | 28 | geom_hdr_rug( 29 | mapping = NULL, 30 | data = NULL, 31 | stat = "hdr_rug", 32 | position = "identity", 33 | ..., 34 | outside = FALSE, 35 | sides = "bl", 36 | length = unit(0.03, "npc"), 37 | na.rm = FALSE, 38 | show.legend = TRUE, 39 | inherit.aes = TRUE 40 | ) 41 | } 42 | \arguments{ 43 | \item{mapping}{Set of aesthetic mappings created by \code{\link[ggplot2:aes]{aes()}}. If specified and 44 | \code{inherit.aes = TRUE} (the default), it is combined with the default mapping 45 | at the top level of the plot. You must supply \code{mapping} if there is no plot 46 | mapping.} 47 | 48 | \item{data}{The data to be displayed in this layer. There are three 49 | options: 50 | 51 | If \code{NULL}, the default, the data is inherited from the plot 52 | data as specified in the call to \code{\link[ggplot2:ggplot]{ggplot()}}. 53 | 54 | A \code{data.frame}, or other object, will override the plot 55 | data. All objects will be fortified to produce a data frame. See 56 | \code{\link[ggplot2:fortify]{fortify()}} for which variables will be created. 57 | 58 | A \code{function} will be called with a single argument, 59 | the plot data. The return value must be a \code{data.frame}, and 60 | will be used as the layer data. A \code{function} can be created 61 | from a \code{formula} (e.g. \code{~ head(.x, 10)}).} 62 | 63 | \item{geom}{The geometric object to use to display the data, either as a 64 | \code{ggproto} \code{Geom} subclass or as a string naming the geom stripped of the 65 | \code{geom_} prefix (e.g. \code{"point"} rather than \code{"geom_point"})} 66 | 67 | \item{position}{Position adjustment, either as a string naming the adjustment 68 | (e.g. \code{"jitter"} to use \code{position_jitter}), or the result of a call to a 69 | position adjustment function. Use the latter if you need to change the 70 | settings of the adjustment.} 71 | 72 | \item{...}{Other arguments passed on to \code{\link[ggplot2:layer]{layer()}}. These are 73 | often aesthetics, used to set an aesthetic to a fixed value, like 74 | \code{colour = "red"} or \code{size = 3}. They may also be parameters 75 | to the paired geom/stat.} 76 | 77 | \item{method, method_y}{Density estimator(s) to use. 78 | By default \code{method} is used for both x- and y-axis. 79 | If specified, \code{method_y} will be used for y-axis. 80 | Accepts character vector: \code{"kde"},\code{"histogram"}, \code{"freqpoly"}, or \code{"norm"}. 81 | Alternatively accepts functions which return closures corresponding to density estimates, 82 | see \code{?get_hdr_1d} or \code{vignette("method", "ggdensity")}.} 83 | 84 | \item{probs}{Probabilities to compute highest density regions for.} 85 | 86 | \item{xlim, ylim}{Range to compute and draw regions. If \code{NULL}, defaults to 87 | range of data.} 88 | 89 | \item{n}{Resolution of grid defined by \code{xlim} and \code{ylim}. 90 | Ignored if \code{method = "histogram"} or \code{method = "freqpoly"}.} 91 | 92 | \item{na.rm}{If \code{FALSE}, the default, missing values are removed with 93 | a warning. If \code{TRUE}, missing values are silently removed.} 94 | 95 | \item{show.legend}{logical. Should this layer be included in the legends? 96 | \code{NA}, the default, includes if any aesthetics are mapped. 97 | \code{FALSE} never includes, and \code{TRUE} always includes. 98 | It can also be a named logical vector to finely select the aesthetics to 99 | display.} 100 | 101 | \item{inherit.aes}{If \code{FALSE}, overrides the default aesthetics, 102 | rather than combining with them. This is most useful for helper functions 103 | that define both data and aesthetics and shouldn't inherit behaviour from 104 | the default plot specification, e.g. \code{\link[ggplot2:borders]{borders()}}.} 105 | 106 | \item{stat}{The statistical transformation to use on the data for this 107 | layer, either as a \code{ggproto} \code{Geom} subclass or as a string naming the 108 | stat stripped of the \code{stat_} prefix (e.g. \code{"count"} rather than 109 | \code{"stat_count"})} 110 | 111 | \item{outside}{logical that controls whether to move the rug tassels outside of the plot area. Default is off (FALSE). You will also need to use \code{coord_cartesian(clip = "off")}. When set to TRUE, also consider changing the sides argument to "tr". See examples.} 112 | 113 | \item{sides}{A string that controls which sides of the plot the rugs appear on. 114 | It can be set to a string containing any of \code{"trbl"}, for top, right, 115 | bottom, and left.} 116 | 117 | \item{length}{A \code{\link[grid:unit]{grid::unit()}} object that sets the length of the rug lines. Use scale expansion to avoid overplotting of data.} 118 | } 119 | \description{ 120 | Perform 1D density estimation, compute and plot the resulting highest density 121 | regions in a way similar to \code{\link[ggplot2:geom_rug]{ggplot2::geom_rug()}}. 122 | Note, the plotted objects have probabilities mapped to the \code{alpha} aesthetic by default. 123 | } 124 | \section{Aesthetics}{ 125 | geom_hdr_rug understands the following aesthetics (required 126 | aesthetics are in bold): 127 | \itemize{ 128 | \item x 129 | \item y 130 | \item alpha 131 | \item fill 132 | \item group 133 | \item subgroup 134 | } 135 | } 136 | 137 | \section{Computed variables}{ 138 | 139 | 140 | \describe{ \item{probs}{The probability of the highest density region, specified 141 | by \code{probs}, corresponding to each point.} } 142 | } 143 | 144 | \examples{ 145 | set.seed(1) 146 | df <- data.frame(x = rnorm(100), y = rnorm(100)) 147 | 148 | # Plot marginal HDRs for bivariate data 149 | ggplot(df, aes(x, y)) + 150 | geom_point() + 151 | geom_hdr_rug() + 152 | coord_fixed() 153 | 154 | ggplot(df, aes(x, y)) + 155 | geom_hdr() + 156 | geom_hdr_rug() + 157 | coord_fixed() 158 | 159 | # Plot HDR for univariate data 160 | ggplot(df, aes(x)) + 161 | geom_density() + 162 | geom_hdr_rug() 163 | 164 | ggplot(df, aes(y = y)) + 165 | geom_density() + 166 | geom_hdr_rug() 167 | 168 | # Specify location of marginal HDRs as in ggplot2::geom_rug() 169 | ggplot(df, aes(x, y)) + 170 | geom_hdr() + 171 | geom_hdr_rug(sides = "tr", outside = TRUE) + 172 | coord_fixed(clip = "off") 173 | 174 | # Can use same methods of density estimation as geom_hdr(). 175 | # For data with constrained support, we suggest setting method = "histogram": 176 | ggplot(df, aes(x^2)) + 177 | geom_histogram(bins = 30, boundary = 0) + 178 | geom_hdr_rug(method = "histogram") 179 | 180 | ggplot(df, aes(x^2, y^2)) + 181 | geom_hdr(method = "histogram") + 182 | geom_hdr_rug(method = "histogram") + 183 | coord_fixed() 184 | 185 | } 186 | \keyword{datasets} 187 | -------------------------------------------------------------------------------- /R/method_1d.R: -------------------------------------------------------------------------------- 1 | # methods that return est pdf as closure --------------------------------- 2 | 3 | #' Univariate parametric normal HDR estimator 4 | #' 5 | #' Function used to specify univariate normal density estimator 6 | #' for `get_hdr_1d()` and layer functions (e.g. `geom_hdr_rug()`). 7 | #' 8 | #' For more details on the use and implementation of the `method_*_1d()` functions, 9 | #' see `vignette("method", "ggdensity")`. 10 | #' 11 | #' @examples 12 | #' # Normal estimators are useful when an assumption of normality is appropriate 13 | #' df <- data.frame(x = rnorm(1e3)) 14 | #' 15 | #' ggplot(df, aes(x)) + 16 | #' geom_hdr_rug(method = method_norm_1d()) + 17 | #' geom_density() 18 | #' 19 | #' # Can also be used with `get_hdr_1d()` for numerical summary of HDRs 20 | #' res <- get_hdr_1d(df$x, method = method_norm_1d()) 21 | #' str(res) 22 | #' 23 | #' @export 24 | method_norm_1d <- function() { 25 | 26 | function(x) { 27 | 28 | mu_hat <- mean(x) 29 | sigma_hat <- sd(x) 30 | 31 | function(x) dnorm(x, mu_hat, sigma_hat) 32 | 33 | } 34 | } 35 | 36 | # methods that return closures that compute a grid ------------------------ 37 | 38 | #' Univariate kernel density HDR estimator 39 | #' 40 | #' Function used to specify univariate kernel density estimator 41 | #' for `get_hdr_1d()` and layer functions (e.g. `geom_hdr_rug()`). 42 | #' 43 | #' For more details on the use and implementation of the `method_*_1d()` functions, 44 | #' see `vignette("method", "ggdensity")`. 45 | #' 46 | #' @inheritParams stats::density 47 | #' 48 | #' @examples 49 | #' df <- data.frame(x = rnorm(1e3, sd = 3)) 50 | #' 51 | #' ggplot(df, aes(x)) + 52 | #' geom_hdr_rug(method = method_kde_1d()) + 53 | #' geom_density() 54 | #' 55 | #' # Details of the KDE can be adjusted with arguments to `method_kde_1d()` 56 | #' ggplot(df, aes(x)) + 57 | #' geom_hdr_rug(method = method_kde_1d(adjust = 1/5)) + 58 | #' geom_density(adjust = 1/5) 59 | #' 60 | #' ggplot(df, aes(x)) + 61 | #' geom_hdr_rug(method = method_kde_1d(kernel = "triangular")) + 62 | #' geom_density(kernel = "triangular") 63 | #' 64 | #' # Can also be used with `get_hdr_1d()` for numerical summary of HDRs 65 | #' res <- get_hdr_1d(df$x, method = method_kde_1d()) 66 | #' str(res) 67 | #' 68 | #' @export 69 | method_kde_1d <- function(bw = "nrd0", adjust = 1, kernel = "gaussian", weights = NULL, window = kernel) { 70 | 71 | function(x, n, range) { 72 | 73 | nx <- length(x) 74 | 75 | if (is.null(weights)) { 76 | weights <- rep(1 / nx, nx) 77 | } else { 78 | weights <- normalize(weights) 79 | } 80 | 81 | dens <- stats::density( 82 | x, 83 | bw = bw, 84 | adjust = adjust, 85 | kernel = kernel, 86 | weights = weights, 87 | window = window, 88 | n = n, 89 | from = range[1], 90 | to = range[2] 91 | ) 92 | 93 | data.frame( 94 | x = dens$x, 95 | fhat = dens$y 96 | ) 97 | 98 | } 99 | } 100 | 101 | #' Univariate histogram HDR estimator 102 | #' 103 | #' Function used to specify univariate histogram density estimator 104 | #' for `get_hdr_1d()` and layer functions (e.g. `geom_hdr_rug()`). 105 | #' 106 | #' For more details on the use and implementation of the `method_*_1d()` functions, 107 | #' see `vignette("method", "ggdensity")`. 108 | #' 109 | #' @param bins Number of bins. Defaults to normal reference rule (Scott, pg 59). 110 | #' 111 | #' @references Scott, David W. Multivariate Density Estimation (2e), Wiley. 112 | #' 113 | #' @examples 114 | #' # Histogram estimators can be useful when data has boundary constraints 115 | #' df <- data.frame(x = rexp(1e3)) 116 | #' 117 | #' # Strip chart to visualize 1-d data 118 | #' p <- ggplot(df, aes(x)) + 119 | #' geom_jitter(aes(y = 0), width = 0, height = 2) + 120 | #' scale_y_continuous(name = NULL, breaks = NULL) + 121 | #' coord_cartesian(ylim = c(-3, 3)) 122 | #' 123 | #' p 124 | #' 125 | #' p + geom_hdr_rug(method = method_histogram_1d()) 126 | #' 127 | #' # The resolution of the histogram estimator can be set via `bins` 128 | #' p + geom_hdr_rug(method = method_histogram_1d(bins = 5)) 129 | #' 130 | #' # Can also be used with `get_hdr_1d()` for numerical summary of HDRs 131 | #' res <- get_hdr_1d(df$x, method = method_histogram_1d()) 132 | #' str(res) 133 | #' 134 | #' @export 135 | method_histogram_1d <- function(bins = NULL) { 136 | 137 | function(x, n, range) { 138 | 139 | nx <- length(x) 140 | 141 | # Default to normal reference rule (Scott p. 59) 142 | if (is.null(bins)) { 143 | hx <- 3.504 * stats::sd(x) * nx^(-1/3) 144 | bins <- round((range[2] - range[1]) / hx) 145 | } 146 | 147 | sx <- seq(range[1], range[2], length.out = bins + 1) 148 | de_x <- sx[2] - sx[1] 149 | midpts <- sx[-(bins+1)] + de_x/2 150 | n <- as.numeric(table(cut(x, sx))) 151 | 152 | data.frame( 153 | x = midpts, 154 | fhat = normalize(n) 155 | ) 156 | 157 | } 158 | } 159 | 160 | #' Univariate frequency polygon HDR estimator 161 | #' 162 | #' Function used to specify univariate frequency polygon density estimator 163 | #' for `get_hdr_1d()` and layer functions (e.g. `geom_hdr_rug()`). 164 | #' 165 | #' For more details on the use and implementation of the `method_*_1d()` functions, 166 | #' see `vignette("method", "ggdensity")`. 167 | #' 168 | #' @inheritParams method_histogram_1d 169 | #' 170 | #' @references Scott, David W. Multivariate Density Estimation (2e), Wiley. 171 | #' 172 | #' @examples 173 | #' df <- data.frame(x = rnorm(1e3)) 174 | #' 175 | #' # Strip chart to visualize 1-d data 176 | #' p <- ggplot(df, aes(x)) + 177 | #' geom_jitter(aes(y = 0), width = 0, height = 2) + 178 | #' scale_y_continuous(name = NULL, breaks = NULL) + 179 | #' coord_cartesian(ylim = c(-3, 3)) 180 | #' 181 | #' p 182 | #' 183 | #' p + geom_hdr_rug(method = method_freqpoly_1d()) 184 | #' 185 | #' # The resolution of the frequency polygon estimator can be set via `bins` 186 | #' p + geom_hdr_rug(method = method_freqpoly_1d(bins = 100)) 187 | #' 188 | #' # Can also be used with `get_hdr_1d()` for numerical summary of HDRs 189 | #' res <- get_hdr_1d(df$x, method = method_freqpoly_1d()) 190 | #' str(res) 191 | #' 192 | #' @export 193 | method_freqpoly_1d <- function(bins = NULL) { 194 | 195 | function(x, n, range) { 196 | 197 | # Start with output from method_histogram 198 | df <- method_histogram_1d(bins)(x, n, range) 199 | 200 | hx <- df$x[2] - df$x[1] 201 | 202 | # need to pad df from hist_marginal() w/ bins that have est prob of 0 203 | # so that we can interpolate 204 | df <- rbind( 205 | 206 | # add initial bin w/ est prob of 0 207 | data.frame( 208 | x = min(df$x) - hx, 209 | fhat = 0 210 | ), 211 | 212 | # include original histogram estimator 213 | df, 214 | 215 | # add final bin w/ est prob of 0 216 | data.frame( 217 | x = max(df$x) + hx, 218 | fhat = 0 219 | ) 220 | 221 | ) 222 | 223 | sx <- seq(range[1], range[2], length.out = n) 224 | 225 | interpolate_fhat <- function(x) { 226 | lower_x <- df$x[max(which(df$x < x))] 227 | upper_x <- df$x[min(which(df$x >= x))] 228 | 229 | lower_fhat <- df$fhat[max(which(df$x < x))] 230 | upper_fhat <- df$fhat[min(which(df$x >= x))] 231 | 232 | lower_fhat + (x - lower_x) * (upper_fhat - lower_fhat) / (upper_x - lower_x) 233 | } 234 | 235 | dens <- vapply(sx, interpolate_fhat, numeric(1)) 236 | 237 | data.frame( 238 | x = sx, 239 | fhat = dens 240 | ) 241 | 242 | } 243 | } 244 | 245 | 246 | 247 | -------------------------------------------------------------------------------- /man/geom_hdr.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/hdr.R, R/hdr_lines.R 3 | \docType{data} 4 | \name{geom_hdr} 5 | \alias{geom_hdr} 6 | \alias{stat_hdr} 7 | \alias{StatHdr} 8 | \alias{GeomHdr} 9 | \alias{stat_hdr_lines} 10 | \alias{StatHdrLines} 11 | \alias{geom_hdr_lines} 12 | \alias{GeomHdrLines} 13 | \title{Highest density regions of a 2D density estimate} 14 | \usage{ 15 | stat_hdr( 16 | mapping = NULL, 17 | data = NULL, 18 | geom = "hdr", 19 | position = "identity", 20 | ..., 21 | method = "kde", 22 | probs = c(0.99, 0.95, 0.8, 0.5), 23 | n = 100, 24 | xlim = NULL, 25 | ylim = NULL, 26 | na.rm = FALSE, 27 | show.legend = NA, 28 | inherit.aes = TRUE 29 | ) 30 | 31 | geom_hdr( 32 | mapping = NULL, 33 | data = NULL, 34 | stat = "hdr", 35 | position = "identity", 36 | ..., 37 | na.rm = FALSE, 38 | show.legend = NA, 39 | inherit.aes = TRUE 40 | ) 41 | } 42 | \arguments{ 43 | \item{mapping}{Set of aesthetic mappings created by \code{\link[ggplot2:aes]{aes()}}. If specified and 44 | \code{inherit.aes = TRUE} (the default), it is combined with the default mapping 45 | at the top level of the plot. You must supply \code{mapping} if there is no plot 46 | mapping.} 47 | 48 | \item{data}{The data to be displayed in this layer. There are three 49 | options: 50 | 51 | If \code{NULL}, the default, the data is inherited from the plot 52 | data as specified in the call to \code{\link[ggplot2:ggplot]{ggplot()}}. 53 | 54 | A \code{data.frame}, or other object, will override the plot 55 | data. All objects will be fortified to produce a data frame. See 56 | \code{\link[ggplot2:fortify]{fortify()}} for which variables will be created. 57 | 58 | A \code{function} will be called with a single argument, 59 | the plot data. The return value must be a \code{data.frame}, and 60 | will be used as the layer data. A \code{function} can be created 61 | from a \code{formula} (e.g. \code{~ head(.x, 10)}).} 62 | 63 | \item{geom}{The geometric object to use to display the data, either as a 64 | \code{ggproto} \code{Geom} subclass or as a string naming the geom stripped of the 65 | \code{geom_} prefix (e.g. \code{"point"} rather than \code{"geom_point"})} 66 | 67 | \item{position}{Position adjustment, either as a string naming the adjustment 68 | (e.g. \code{"jitter"} to use \code{position_jitter}), or the result of a call to a 69 | position adjustment function. Use the latter if you need to change the 70 | settings of the adjustment.} 71 | 72 | \item{...}{Other arguments passed on to \code{\link[ggplot2:layer]{layer()}}. These are 73 | often aesthetics, used to set an aesthetic to a fixed value, like 74 | \code{colour = "red"} or \code{size = 3}. They may also be parameters 75 | to the paired geom/stat.} 76 | 77 | \item{method}{Density estimator to use, accepts character vector: 78 | \code{"kde"},\code{"histogram"}, \code{"freqpoly"}, or \code{"mvnorm"}. 79 | Alternatively accepts functions which return closures corresponding to density estimates, 80 | see \code{?get_hdr} or \code{vignette("method", "ggdensity")}.} 81 | 82 | \item{probs}{Probabilities to compute highest density regions for.} 83 | 84 | \item{n}{Resolution of grid defined by \code{xlim} and \code{ylim}. 85 | Ignored if \code{method = "histogram"} or \code{method = "freqpoly"}.} 86 | 87 | \item{xlim, ylim}{Range to compute and draw regions. If \code{NULL}, defaults to 88 | range of data.} 89 | 90 | \item{na.rm}{If \code{FALSE}, the default, missing values are removed with 91 | a warning. If \code{TRUE}, missing values are silently removed.} 92 | 93 | \item{show.legend}{logical. Should this layer be included in the legends? 94 | \code{NA}, the default, includes if any aesthetics are mapped. 95 | \code{FALSE} never includes, and \code{TRUE} always includes. 96 | It can also be a named logical vector to finely select the aesthetics to 97 | display.} 98 | 99 | \item{inherit.aes}{If \code{FALSE}, overrides the default aesthetics, 100 | rather than combining with them. This is most useful for helper functions 101 | that define both data and aesthetics and shouldn't inherit behaviour from 102 | the default plot specification, e.g. \code{\link[ggplot2:borders]{borders()}}.} 103 | 104 | \item{stat}{The statistical transformation to use on the data for this 105 | layer, either as a \code{ggproto} \code{Geom} subclass or as a string naming the 106 | stat stripped of the \code{stat_} prefix (e.g. \code{"count"} rather than 107 | \code{"stat_count"})} 108 | } 109 | \description{ 110 | Perform 2D density estimation, compute and plot the resulting highest density regions. 111 | \code{geom_hdr()} draws filled regions and \code{geom_hdr_lines()} draws lines outlining the regions. 112 | Note, the plotted objects have probabilities mapped to the \code{alpha} aesthetic by default. 113 | } 114 | \section{Aesthetics}{ 115 | \code{geom_hdr()} and \code{geom_hdr_lines()} understand the following aesthetics (required 116 | aesthetics are in bold): 117 | \itemize{ 118 | \item \strong{x} 119 | \item \strong{y} 120 | \item alpha 121 | \item color 122 | \item fill (only \code{geom_hdr}) 123 | \item group 124 | \item linetype 125 | \item linewidth 126 | \item subgroup 127 | } 128 | } 129 | 130 | \section{Computed variables}{ 131 | 132 | 133 | \describe{ \item{probs}{The probability associated with the highest density region, specified 134 | by \code{probs} argument.} } 135 | } 136 | 137 | \examples{ 138 | # Basic simulated data with bivariate normal data and various methods 139 | df <- data.frame(x = rnorm(1000), y = rnorm(1000)) 140 | p <- ggplot(df, aes(x, y)) + coord_equal() 141 | 142 | p + geom_hdr() 143 | p + geom_hdr(method = "mvnorm") 144 | p + geom_hdr(method = "freqpoly") 145 | # p + geom_hdr(method = "histogram") 146 | 147 | # Adding point layers on top to visually assess region estimates 148 | pts <- geom_point(size = .2, color = "red") 149 | 150 | p + geom_hdr() + pts 151 | p + geom_hdr(method = "mvnorm") + pts 152 | p + geom_hdr(method = "freqpoly") + pts 153 | # p + geom_hdr(method = "histogram") + pts 154 | 155 | # Highest density region boundary lines 156 | p + geom_hdr_lines() 157 | p + geom_hdr_lines(method = "mvnorm") 158 | p + geom_hdr_lines(method = "freqpoly") 159 | # p + geom_hdr_lines(method = "histogram") 160 | 161 | \dontrun{ 162 | 163 | # 2+ groups - mapping other aesthetics in the geom 164 | rdata <- function(n, n_groups = 3, radius = 3) { 165 | list_of_dfs <- lapply(0:(n_groups-1), function(k) { 166 | mu <- c(cos(2*k*pi/n_groups), sin(2*k*pi/n_groups)) 167 | m <- MASS::mvrnorm(n, radius*mu, diag(2)) 168 | structure(data.frame(m, as.character(k)), names = c("x", "y", "c")) 169 | }) 170 | do.call("rbind", list_of_dfs) 171 | } 172 | 173 | dfc <- rdata(1000, n_groups = 5) 174 | pf <- ggplot(dfc, aes(x, y, fill = c)) + coord_equal() 175 | 176 | pf + geom_hdr() 177 | pf + geom_hdr(method = "mvnorm") 178 | pf + geom_hdr(method = "mvnorm", probs = .90, alpha = .5) 179 | pf + geom_hdr(method = "histogram") 180 | pf + geom_hdr(method = "freqpoly") 181 | 182 | pc <- ggplot(dfc, aes(x, y, color = c)) + 183 | coord_equal() + 184 | theme_minimal() + 185 | theme(panel.grid.minor = element_blank()) 186 | 187 | pc + geom_hdr_lines() 188 | pc + geom_hdr_lines(method = "mvnorm") 189 | 190 | 191 | # Data with boundaries 192 | ggplot(df, aes(x^2)) + geom_histogram(bins = 30) 193 | ggplot(df, aes(x^2)) + geom_histogram(bins = 30, boundary = 0) 194 | ggplot(df, aes(x^2, y^2)) + geom_hdr(method = "histogram") 195 | 196 | } 197 | 198 | } 199 | \references{ 200 | Scott, David W. Multivariate Density Estimation (2e), Wiley. 201 | } 202 | \keyword{datasets} 203 | -------------------------------------------------------------------------------- /R/hdr.R: -------------------------------------------------------------------------------- 1 | #' Highest density regions of a 2D density estimate 2 | #' 3 | #' Perform 2D density estimation, compute and plot the resulting highest density regions. 4 | #' `geom_hdr()` draws filled regions and `geom_hdr_lines()` draws lines outlining the regions. 5 | #' Note, the plotted objects have probabilities mapped to the `alpha` aesthetic by default. 6 | #' 7 | #' @section Aesthetics: `geom_hdr()` and `geom_hdr_lines()` understand the following aesthetics (required 8 | #' aesthetics are in bold): 9 | #' 10 | #' - **x** 11 | #' - **y** 12 | #' - alpha 13 | #' - color 14 | #' - fill (only `geom_hdr`) 15 | #' - group 16 | #' - linetype 17 | #' - linewidth 18 | #' - subgroup 19 | #' 20 | #' @section Computed variables: 21 | #' 22 | #' \describe{ \item{probs}{The probability associated with the highest density region, specified 23 | #' by `probs` argument.} } 24 | #' 25 | #' @inheritParams ggplot2::geom_path 26 | #' @inheritParams ggplot2::stat_identity 27 | #' @inheritParams ggplot2::stat_density2d 28 | #' @param method Density estimator to use, accepts character vector: 29 | #' `"kde"`,`"histogram"`, `"freqpoly"`, or `"mvnorm"`. 30 | #' Alternatively accepts functions which return closures corresponding to density estimates, 31 | #' see `?get_hdr` or `vignette("method", "ggdensity")`. 32 | #' @param probs Probabilities to compute highest density regions for. 33 | #' @param xlim,ylim Range to compute and draw regions. If `NULL`, defaults to 34 | #' range of data. 35 | #' @param n Resolution of grid defined by `xlim` and `ylim`. 36 | #' Ignored if `method = "histogram"` or `method = "freqpoly"`. 37 | #' @name geom_hdr 38 | #' @rdname geom_hdr 39 | #' @references Scott, David W. Multivariate Density Estimation (2e), Wiley. 40 | #' 41 | #' @import ggplot2 42 | #' 43 | #' @examples 44 | #' # Basic simulated data with bivariate normal data and various methods 45 | #' df <- data.frame(x = rnorm(1000), y = rnorm(1000)) 46 | #' p <- ggplot(df, aes(x, y)) + coord_equal() 47 | #' 48 | #' p + geom_hdr() 49 | #' p + geom_hdr(method = "mvnorm") 50 | #' p + geom_hdr(method = "freqpoly") 51 | #' # p + geom_hdr(method = "histogram") 52 | #' 53 | #' # Adding point layers on top to visually assess region estimates 54 | #' pts <- geom_point(size = .2, color = "red") 55 | #' 56 | #' p + geom_hdr() + pts 57 | #' p + geom_hdr(method = "mvnorm") + pts 58 | #' p + geom_hdr(method = "freqpoly") + pts 59 | #' # p + geom_hdr(method = "histogram") + pts 60 | #' 61 | #' # Highest density region boundary lines 62 | #' p + geom_hdr_lines() 63 | #' p + geom_hdr_lines(method = "mvnorm") 64 | #' p + geom_hdr_lines(method = "freqpoly") 65 | #' # p + geom_hdr_lines(method = "histogram") 66 | #' 67 | #' \dontrun{ 68 | #' 69 | #' # 2+ groups - mapping other aesthetics in the geom 70 | #' rdata <- function(n, n_groups = 3, radius = 3) { 71 | #' list_of_dfs <- lapply(0:(n_groups-1), function(k) { 72 | #' mu <- c(cos(2*k*pi/n_groups), sin(2*k*pi/n_groups)) 73 | #' m <- MASS::mvrnorm(n, radius*mu, diag(2)) 74 | #' structure(data.frame(m, as.character(k)), names = c("x", "y", "c")) 75 | #' }) 76 | #' do.call("rbind", list_of_dfs) 77 | #' } 78 | #' 79 | #' dfc <- rdata(1000, n_groups = 5) 80 | #' pf <- ggplot(dfc, aes(x, y, fill = c)) + coord_equal() 81 | #' 82 | #' pf + geom_hdr() 83 | #' pf + geom_hdr(method = "mvnorm") 84 | #' pf + geom_hdr(method = "mvnorm", probs = .90, alpha = .5) 85 | #' pf + geom_hdr(method = "histogram") 86 | #' pf + geom_hdr(method = "freqpoly") 87 | #' 88 | #' pc <- ggplot(dfc, aes(x, y, color = c)) + 89 | #' coord_equal() + 90 | #' theme_minimal() + 91 | #' theme(panel.grid.minor = element_blank()) 92 | #' 93 | #' pc + geom_hdr_lines() 94 | #' pc + geom_hdr_lines(method = "mvnorm") 95 | #' 96 | #' 97 | #' # Data with boundaries 98 | #' ggplot(df, aes(x^2)) + geom_histogram(bins = 30) 99 | #' ggplot(df, aes(x^2)) + geom_histogram(bins = 30, boundary = 0) 100 | #' ggplot(df, aes(x^2, y^2)) + geom_hdr(method = "histogram") 101 | #' 102 | #' } 103 | #' 104 | NULL 105 | 106 | 107 | #' @rdname geom_hdr 108 | #' @export 109 | stat_hdr <- function(mapping = NULL, data = NULL, 110 | geom = "hdr", position = "identity", 111 | ..., 112 | method = "kde", 113 | probs = c(.99, .95, .8, .5), 114 | n = 100, 115 | xlim = NULL, 116 | ylim = NULL, 117 | na.rm = FALSE, 118 | show.legend = NA, 119 | inherit.aes = TRUE) { 120 | layer( 121 | data = data, 122 | mapping = mapping, 123 | stat = StatHdr, 124 | geom = geom, 125 | position = position, 126 | show.legend = show.legend, 127 | inherit.aes = inherit.aes, 128 | params = list( 129 | method = method, 130 | probs = probs, 131 | n = n, 132 | xlim = xlim, 133 | ylim = ylim, 134 | na.rm = na.rm, 135 | ... 136 | ) 137 | ) 138 | } 139 | 140 | 141 | 142 | 143 | #' @rdname geom_hdr 144 | #' @format NULL 145 | #' @usage NULL 146 | #' @importFrom scales percent 147 | #' @export 148 | StatHdr <- ggproto("StatHdr", Stat, 149 | 150 | required_aes = c("x", "y"), 151 | default_aes = aes(order = after_stat(probs), alpha = after_stat(probs)), 152 | 153 | output = "bands", 154 | 155 | compute_group = function(self, data, scales, na.rm = FALSE, 156 | method = "kde", probs = c(.99, .95, .8, .5), 157 | n = 100, xlim = NULL, ylim = NULL) { 158 | 159 | rangex <- xlim %||% scales$x$dimension() 160 | rangey <- ylim %||% scales$y$dimension() 161 | 162 | # Only calculate HDR membership if we need to 163 | need_membership <- (self$output == "points") 164 | 165 | res <- get_hdr(data, method, probs, n, rangex, rangey, hdr_membership = need_membership) 166 | 167 | res_to_df(res, probs, data$group[1], self$output) 168 | 169 | } 170 | ) 171 | 172 | # internal helper function to convert output of `get_hdr[_1d]()` into 173 | # what `GeomHdr*$draw_group()` methods need 174 | res_to_df <- function(res, probs, group, output) { 175 | 176 | probs <- fix_probs(probs) 177 | 178 | # Need z for xyz_to_isobands/lines() 179 | res$df_est$z <- res$df_est$fhat 180 | 181 | if (output == "bands") { 182 | 183 | isobands <- xyz_to_isobands(res$df_est, res$breaks) 184 | names(isobands) <- scales::percent_format(accuracy = 1)(probs) 185 | df <- iso_to_polygon(isobands, group) 186 | df$probs <- ordered(df$level, levels = names(isobands)) 187 | df$level <- NULL 188 | 189 | } else if (output == "lines") { 190 | 191 | isolines <- xyz_to_isolines(res$df_est, res$breaks) 192 | names(isolines) <- scales::percent_format(accuracy = 1)(probs) 193 | df <- iso_to_path(isolines, group) 194 | df$probs <- ordered(df$level, levels = names(isolines)) 195 | df$level <- NULL 196 | 197 | } else if (output == "points") { 198 | 199 | df <- res$data 200 | df$hdr_membership <- scales::percent_format(accuracy = 1)(df$hdr_membership) 201 | df$probs <- ordered(df$hdr_membership, levels = scales::percent_format(accuracy = 1)(c(1, probs))) 202 | df$hdr_membership <- NULL 203 | 204 | } 205 | 206 | df 207 | 208 | } 209 | 210 | 211 | 212 | #' @rdname geom_hdr 213 | #' @export 214 | geom_hdr <- function(mapping = NULL, data = NULL, 215 | stat = "hdr", position = "identity", 216 | ..., 217 | na.rm = FALSE, 218 | show.legend = NA, 219 | inherit.aes = TRUE) { 220 | layer( 221 | data = data, 222 | mapping = mapping, 223 | stat = stat, 224 | geom = GeomHdr, 225 | position = position, 226 | show.legend = show.legend, 227 | inherit.aes = inherit.aes, 228 | params = list( 229 | na.rm = na.rm, 230 | ... 231 | ) 232 | ) 233 | } 234 | 235 | 236 | 237 | #' @rdname geom_hdr 238 | #' @format NULL 239 | #' @usage NULL 240 | #' @export 241 | GeomHdr <- ggproto("GeomHdr", GeomPolygon) 242 | -------------------------------------------------------------------------------- /tests/testthat/test-get_hdr.R: -------------------------------------------------------------------------------- 1 | test_that("structure of get_hdr() return value is as expected", { 2 | 3 | data <- data.frame( 4 | x = 1:10, 5 | y = rep(1:5, each = 2) 6 | ) 7 | 8 | res <- get_hdr(data) 9 | 10 | # Checking the top level of res 11 | expect_type(res, "list") 12 | expect_equal(length(res), 3) 13 | expect_equal(names(res), c("df_est", "breaks", "data")) 14 | 15 | # Checking res$df_est: 16 | expect_type(res$df_est, "list") 17 | expect_equal(ncol(res$df_est), 5) 18 | expect_equal(colnames(res$df_est), c("x", "y", "fhat", "fhat_discretized", "hdr")) 19 | 20 | # Checking res$data 21 | expect_type(res$data, "list") 22 | expect_equal(ncol(res$data), 3) 23 | expect_equal(nrow(res$data), 10) 24 | expect_equal(colnames(res$data), c("x", "y", "hdr_membership")) 25 | 26 | # Checking res$breaks 27 | expect_type(res$breaks, "double") 28 | expect_equal(length(res$breaks), 5) 29 | expect_equal(names(res$breaks), c("99%", "95%", "80%", "50%", NA)) 30 | 31 | 32 | # Now with non-default args ----------------------------------------- 33 | res <- get_hdr(data, probs = c(.989, .878, .67, .43, .21), hdr_membership = FALSE) 34 | 35 | # Checking res$data 36 | expect_equal(ncol(res$data), 2) 37 | 38 | # Checking res$breaks 39 | expect_type(res$breaks, "double") 40 | expect_equal(length(res$breaks), 6) 41 | expect_equal(names(res$breaks), c("99%", "88%", "67%", "43%", "21%", NA)) 42 | 43 | }) 44 | 45 | test_that("`method` can be provided as a character vector or function", { 46 | 47 | data <- data.frame( 48 | x = 1:10, 49 | y = rep(1:5, each = 2) 50 | ) 51 | 52 | expect_equal(get_hdr(data, "kde"), get_hdr(data, method_kde())) 53 | expect_equal(get_hdr(data, "mvnorm"), get_hdr(data, method_mvnorm())) 54 | expect_equal(get_hdr(data, "freqpoly"), get_hdr(data, method_freqpoly())) 55 | expect_equal(get_hdr(data, "histogram"), get_hdr(data, method_histogram())) 56 | 57 | }) 58 | 59 | test_that("get_hdr() errors informatively if bad `method` argument", { 60 | 61 | data <- data.frame( 62 | x = 1:10, 63 | y = rep(1:5, each = 2) 64 | ) 65 | 66 | expect_error(get_hdr(data, method = "not-a-method"), regexp = "Invalid method specified") 67 | expect_error(get_hdr(data, method = method_kde), regexp = "did you forget") 68 | 69 | }) 70 | 71 | 72 | # # The data used for tests: 73 | # 74 | # set.seed(1) 75 | # df <- data.frame( 76 | # x = rnorm(5e3), 77 | # y = rnorm(5e3) 78 | # ) 79 | # 80 | # write_rds(df, here::here("tests/testthat/fixtures/df_norm.rds")) 81 | 82 | test_that("get_hdr(method = method_kde()) calculations are consistent", { 83 | 84 | data <- readRDS(test_path("fixtures", "df_norm.rds")) 85 | 86 | res <- get_hdr(data, method_kde()) 87 | 88 | # fhat_discretized should be normalized to sum to 1 89 | expect_equal(sum(res$df_est$fhat_discretized), 1) 90 | 91 | # By default, estimate is evaluated on the same range as original data 92 | expect_equal(range(res$df_est$x), range(data$x)) 93 | expect_equal(range(res$df_est$y), range(data$y)) 94 | 95 | # default grid is 100 x 100: 96 | expect_equal(nrow(res$df_est), 100 * 100) 97 | 98 | # Checksums: 99 | expect_equal(round(sum(res$df_est$fhat)), 185) 100 | expect_equal(as.numeric(round(res$breaks, 4)), c(0.0017, 0.0083, 0.0303, 0.0731, Inf)) 101 | expect_equal(sort(unique(res$df_est$hdr)), c(.5, .8, .95, .99, 1)) 102 | expect_equal(as.numeric(table(res$df_est$hdr)), c(858, 1149, 1597, 1771, 4625)) 103 | 104 | 105 | # Checking non-default args ------------------------ 106 | 107 | res <- get_hdr(data, method_kde(adjust = .4), probs = c(.97, .85, .4, .1), n = c(100, 200), rangex = c(-3, 2), rangey = c(-1, 3)) 108 | # fhat_discretized should be normalized to sum to 1 109 | expect_equal(sum(res$df_est$fhat_discretized), 1) 110 | 111 | # Was the custom range used 112 | expect_equal(range(res$df_est$x), c(-3, 2)) 113 | expect_equal(range(res$df_est$y), c(-1, 3)) 114 | 115 | # default grid is 100 x 100: 116 | expect_equal(nrow(res$df_est), 100 * 200) 117 | 118 | # Checksums: 119 | expect_equal(round(sum(res$df_est$fhat)), 808) 120 | expect_equal(as.numeric(round(res$breaks, 4)), c(0.0105, 0.0352, 0.1036, 0.1522, Inf)) 121 | expect_equal(sort(unique(res$df_est$hdr)), c(.1, .4, .85, .97, 1)) 122 | expect_equal(as.numeric(table(res$df_est$hdr)), c(495, 1923, 5584, 4524, 7474)) 123 | 124 | }) 125 | 126 | # TODO: above, for other methods 127 | 128 | 129 | 130 | 131 | test_that("get_hdr() works with custom function factory supplied to `method`", { 132 | 133 | data <- readRDS(test_path("fixtures", "df_norm.rds")) 134 | 135 | method_mvnorm_ind <- function() { 136 | 137 | function(data) { 138 | 139 | mean_x <- mean(data$x); s_x <- sd(data$x) 140 | mean_y <- mean(data$y); s_y <- sd(data$y) 141 | 142 | function(x, y) dnorm(x, mean = mean_x, sd = s_x) * dnorm(y, mean = mean_y, sd = s_y) 143 | 144 | } 145 | 146 | } 147 | 148 | res <- get_hdr(data, method = method_mvnorm_ind()) 149 | 150 | # fhat_discretized should be normalized to sum to 1 151 | expect_equal(sum(res$df_est$fhat_discretized), 1) 152 | 153 | # By default, estimate is evaluated on the same range as original data 154 | expect_equal(range(res$df_est$x), range(data$x)) 155 | expect_equal(range(res$df_est$y), range(data$y)) 156 | 157 | # default grid is 100 x 100: 158 | expect_equal(nrow(res$df_est), 100 * 100) 159 | 160 | # Checksums: 161 | expect_equal(round(sum(res$df_est$fhat)), 185) 162 | expect_equal(as.numeric(round(res$breaks, 4)), c(0.0017, 0.0078, 0.031, 0.078, Inf)) 163 | expect_equal(sort(unique(res$df_est$hdr)), c(.5, .8, .95, .99, 1)) 164 | expect_equal(as.numeric(table(res$df_est$hdr)), c(826, 1090, 1642, 1863, 4579)) 165 | 166 | }) 167 | 168 | 169 | 170 | test_that("get_hdr() works with custom function factory supplied to `method`", { 171 | 172 | data <- readRDS(test_path("fixtures", "df_norm.rds")) 173 | 174 | method_fixed_grid <- function() { 175 | 176 | function(data, n, rangex, rangey) { 177 | 178 | df_grid <- expand.grid( 179 | x = seq(rangex[1], rangex[2], length.out = n), 180 | y = seq(rangey[1], rangey[2], length.out = n) 181 | ) 182 | 183 | df_grid$fhat <- dnorm(df_grid$x) * dnorm(df_grid$y) 184 | 185 | df_grid 186 | 187 | } 188 | 189 | } 190 | 191 | res <- get_hdr(data, method = method_fixed_grid()) 192 | 193 | # fhat_discretized should be normalized to sum to 1 194 | expect_equal(sum(res$df_est$fhat_discretized), 1) 195 | 196 | # By default, estimate is evaluated on the same range as original data 197 | expect_equal(range(res$df_est$x), range(data$x)) 198 | expect_equal(range(res$df_est$y), range(data$y)) 199 | 200 | # default grid is 100 x 100: 201 | expect_equal(nrow(res$df_est), 100 * 100) 202 | 203 | # Checksums: 204 | expect_equal(round(sum(res$df_est$fhat)), 185) 205 | expect_equal(as.numeric(round(res$breaks, 4)), c(0.0017, 0.008, 0.0321, 0.0796, Inf)) 206 | expect_equal(sort(unique(res$df_est$hdr)), c(.5, .8, .95, .99, 1)) 207 | expect_equal(as.numeric(table(res$df_est$hdr)), c(806, 1065, 1603, 1824, 4702)) 208 | 209 | }) 210 | 211 | 212 | test_that("get_hdr() fails if `method != 'fun' and `data` isn't provided", { 213 | 214 | expect_error(get_hdr(method = method_kde()), regexp = ".data. must be provided") 215 | 216 | }) 217 | 218 | test_that("fun argument of get_hdr() requires rangex/y", { 219 | 220 | expect_error(get_hdr(method = "fun", fun = function(x, y) dexp(x) * dexp(y)), regexp = ".rangey. must be provided") 221 | 222 | }) 223 | 224 | 225 | test_that("fun argument of get_hdr() works", { 226 | 227 | res <- get_hdr(method = "fun", fun = function(x, y) dexp(x) * dexp(y), rangex = c(0, 10), rangey = c(0, 10)) 228 | 229 | # Structure of res is as expected 230 | expect_type(res, "list") 231 | expect_equal(length(res), 3) 232 | expect_equal(names(res), c("df_est", "breaks", "data")) 233 | 234 | expect_null(res$data) 235 | 236 | # fhat_discretized should be normalized to sum to 1 237 | expect_equal(sum(res$df_est$fhat_discretized), 1) 238 | 239 | expect_equal(range(res$df_est$x), c(0, 10)) 240 | expect_equal(range(res$df_est$y), c(0, 10)) 241 | 242 | # default grid is 100 x 100: 243 | expect_equal(nrow(res$df_est), 100 * 100) 244 | 245 | # Checksums: 246 | expect_equal(round(sum(res$df_est$fhat)), 108) 247 | expect_equal(as.numeric(round(res$breaks, 4)), c(0.0014, 0.0096, 0.0534, 0.1987, Inf)) 248 | expect_equal(sort(unique(res$df_est$hdr)), c(.5, .8, .95, .99, 1)) 249 | expect_equal(as.numeric(table(res$df_est$hdr)), c(145, 306, 669, 1045, 7835)) 250 | }) 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | 270 | 271 | 272 | 273 | 274 | 275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | 283 | 284 | 285 | 286 | 287 | 288 | -------------------------------------------------------------------------------- /R/get_hdr_1d.R: -------------------------------------------------------------------------------- 1 | #' Computing the highest density regions of a 1D density 2 | #' 3 | #' `get_hdr_1d` is used to estimate a 1-dimensional density and compute corresponding HDRs. 4 | #' The estimated density and HDRs are represented in a discrete form as a grid, defined by arguments `range` and `n`. 5 | #' `get_hdr_1d` is used internally by layer functions `stat_hdr_rug()` and `stat_hdr_rug_fun()`. 6 | #' 7 | #' @inheritParams get_hdr 8 | #' @param method Either a character (`"kde"`, `"norm"`, `"histogram"`, `"freqpoly"`, or `"fun"`) or `method_*_1d()` function. 9 | #' See the "The `method` argument" section below for details. 10 | #' @param x A vector of data 11 | #' @param hdr_membership Should HDR membership of data points (`x`) be computed? 12 | #' @param range Range of grid representing estimated density and HDRs. 13 | #' @param n Resolution of grid representing estimated density and HDRs. 14 | #' @param fun Optional, a probability density function, must be vectorized in its first argument. 15 | #' See the "The `fun` argument" section below for details. 16 | #' 17 | #' @section The `method` argument: 18 | #' The density estimator used to estimate the HDRs is specified with the `method` argument. 19 | #' The simplest way to specify an estimator is to provide a character value to `method`, 20 | #' for example `method = "kde"` specifies a kernel density estimator. 21 | #' However, this specification is limited to the default behavior of the estimator. 22 | #' 23 | #' Instead, it is possible to provide a function call, for example: `method = method_kde_1d()`. 24 | #' This is slightly different from the function calls provided in `get_hdr()`, note the `_1d` suffix. 25 | #' In many cases, these functions accept parameters governing the density estimation procedure. 26 | #' Here, `method_kde_1d()` accepts several parameters related to the choice of kernel. 27 | #' For details, see `?method_kde_1d`. 28 | #' Every method of univariate density estimation implemented has such corresponding `method_*_1d()` function, 29 | #' each with an associated help page. 30 | #' 31 | #' Note: `geom_hdr_rug()` and other layer functions also have `method` arguments which behave in the same way. 32 | #' For more details on the use and implementation of the `method_*_1d()` functions, 33 | #' see `vignette("method", "ggdensity")`. 34 | #' 35 | #' @section The `fun` argument: 36 | #' If `method` is set to `"fun"`, `get_hdr_1d()` expects a univariate probability 37 | #' density function to be specified with the `fun` argument. 38 | #' It is required that `fun` be a function of at least one argument (`x`). 39 | #' Beyond this first argument, `fun` can have arbitrarily many arguments; 40 | #' these can be set in `get_hdr_1d()` as a named list via the `args` parameter. 41 | #' 42 | #' Note: `get_hdr_1d()` requires that `fun` be vectorized in `x`. 43 | #' For an example of an appropriate choice of `fun`, see the final example below. 44 | #' 45 | #' @returns 46 | #' 47 | #' `get_hdr_1d` returns a list with elements `df_est` (`data.frame`), `breaks` (named `numeric`), and `data` (`data.frame`). 48 | #' 49 | #' * `df_est`: the estimated HDRs and density evaluated on the grid defined by `range` and `n`. 50 | #' The column of estimated HDRs (`df_est$hdr`) is a numeric vector with values from `probs`. 51 | #' The columns `df_est$fhat` and `df_est$fhat_discretized` correspond to the estimated density 52 | #' on the original scale and rescaled to sum to 1, respectively. 53 | #' 54 | #' * `breaks`: the heights of the estimated density (`df_est$fhat`) corresponding to the HDRs specified by `probs`. 55 | #' Will always have additional element `Inf` representing the cutoff for the 100% HDR. 56 | #' 57 | #' * `data`: the original data provided in the `data` argument. 58 | #' If `hdr_membership` is set to `TRUE`, this includes a column (`data$hdr_membership`) 59 | #' with the HDR corresponding to each data point. 60 | #' 61 | #' @examples 62 | #' x <- rnorm(1e3) 63 | #' 64 | #' # Two ways to specify `method` 65 | #' get_hdr_1d(x, method = "kde") 66 | #' get_hdr_1d(x, method = method_kde_1d()) 67 | #' 68 | #' \dontrun{ 69 | #' 70 | #' # If parenthesis are omitted, `get_hdr_1d()` errors 71 | #' get_hdr_1d(df, method = method_kde_1d) 72 | #' 73 | #' # If the `_1d` suffix is omitted, `get_hdr_1d()` errors 74 | #' get_hdr_1d(x, method = method_kde()) 75 | #' } 76 | #' 77 | #' # Adjust estimator parameters with arguments to `method_kde_1d()` 78 | #' get_hdr_1d(x, method = method_kde_1d(kernel = "triangular")) 79 | #' 80 | #' # Estimate different HDRs with `probs` 81 | #' get_hdr_1d(x, method = method_kde_1d(), probs = c(.975, .6, .2)) 82 | #' 83 | #' # Compute "population" HDRs of specified univariate pdf with `method = "fun"` 84 | #' f <- function(x, sd = 1) dnorm(x, sd = sd) 85 | #' get_hdr_1d(method = "fun", fun = f, range = c(-5, 5)) 86 | #' get_hdr_1d(method = "fun", fun = f, range = c(-5, 5), args = list(sd = .5)) 87 | #' 88 | #' 89 | #' @export 90 | get_hdr_1d <- function(x = NULL, method = "kde", probs = c(.99, .95, .8, .5), n = 512, range = NULL, hdr_membership = TRUE, fun, args = list()) { 91 | 92 | # Deal with missing data argument 93 | if (is.null(x)) { 94 | if (!is.character(method) | (is.character(method) && method != "fun")) { 95 | stop('`x` must be provided unless `method = "fun"`') 96 | } else { 97 | if (is.null(range)) { 98 | stop('If `x` is unspecified, `range` must be provided when `method = "fun"`') 99 | } 100 | } 101 | } 102 | 103 | range <- range %||% range(x) 104 | 105 | probs <- fix_probs(probs) 106 | 107 | # Create df_est (estimated density evaluated on a grid) depending on specified method: 108 | if (is.character(method) && method == "fun") { 109 | 110 | df_est <- f_est_1d(method = NULL, x = x, n, range = range, fun = fun, args = args) 111 | 112 | } else { 113 | 114 | if (is.character(method)) { 115 | 116 | if (!method %in% c("kde", "norm", "histogram", "freqpoly")) stop("Invalid method specified") 117 | 118 | # If method is provided as a character, re-assign correct function output: 119 | method <- switch(method, 120 | "kde" = method_kde_1d(), 121 | "histogram" = method_histogram_1d(), 122 | "freqpoly" = method_freqpoly_1d(), 123 | "norm" = method_norm_1d() 124 | ) 125 | 126 | } 127 | 128 | # parse args of method to determine strategy of `method` 129 | method_formals <- names(formals(method)) 130 | 131 | # If `data` is the only argument to `method`, we know `method` 132 | # is a function factory, returning a closure of pdf in terms of x, y: 133 | if (length(method_formals) == 1 && method_formals %in% c("x", "y")) { 134 | 135 | df_est <- f_est_1d(method, x, n, range) 136 | 137 | # Otherwise `method` computes a grid for us, shortcutting 138 | # representing pdf in terms of x, y: 139 | } else if (length(method_formals) == 3 && method_formals[1] %in% c("x", "y") & all(method_formals[2:3] == c("n", "range"))) { 140 | 141 | df_est <- method(x, n, range) 142 | 143 | } else if ("data" %in% method_formals) { 144 | 145 | stop("Invalid `method` argument -- did you forget the `_1d()`?") 146 | 147 | } else { 148 | 149 | stop("Invalid `method` argument -- did you forget the `()`?") 150 | 151 | } 152 | 153 | } 154 | 155 | 156 | # Manipulate df_est to get information about HDRs: 157 | 158 | # force estimate to integrate to 1 159 | df_est$fhat_discretized <- normalize(df_est$fhat) 160 | 161 | # temporarily rescale df$fhat for stability 162 | fhat_max <- max(df_est$fhat) 163 | df_est$fhat <- df_est$fhat / fhat_max 164 | 165 | # find cutoffs (in terms of rescaled fhat) 166 | breaks <- c(find_cutoff(df_est, probs), Inf) 167 | 168 | # find HDRs for points in the grid 169 | df_est$hdr <- vapply(df_est$fhat, get_hdr_val, numeric(1), breaks, probs) 170 | 171 | # find hdr membership of points from data 172 | if (!is.null(x) & hdr_membership) { 173 | 174 | data <- data.frame(x = x) 175 | 176 | if (hdr_membership) { 177 | 178 | hdr_membership <- vapply(x, get_hdr_membership_1d, numeric(1), df_est, breaks, probs) 179 | 180 | # create data frame w/ input data (x) + HDR membership 181 | data$hdr_membership <- hdr_membership 182 | 183 | } 184 | 185 | } else { 186 | 187 | data <- NULL 188 | 189 | } 190 | 191 | # transforming df_est$fhat and breaks back to original scale: 192 | df_est$fhat <- df_est$fhat * fhat_max 193 | breaks <- breaks * fhat_max 194 | 195 | # Give breaks nicely formatted names, corresponding to HDRs: 196 | names(breaks) <- scales::percent_format(accuracy = 1)(probs) 197 | 198 | # bundle everything together 199 | list( 200 | df_est = df_est, 201 | breaks = breaks, 202 | data = data 203 | ) 204 | 205 | } 206 | 207 | get_hdr_membership_1d <- function(x, df_est, breaks, probs) { 208 | df_est$dist <- (x - df_est$x)^2 209 | fhat <- df_est[which.min(df_est$dist), "fhat"] 210 | 211 | get_hdr_val(fhat, breaks, probs) 212 | } 213 | 214 | # method is a function of data vector x 215 | # fun is a function of vector x -- the grid 216 | # Might need to be more careful w/ axis transformations here 217 | f_est_1d <- function(method, x, n, range, fun = NULL, args = list()) { 218 | 219 | # If fun isn't specified, method returns a closure 220 | # representing closed form of density estimate 221 | fun <- fun %||% method(x) 222 | 223 | # grid to evaluate fun 224 | df <- data.frame(x = seq(range[1], range[2], length.out = n)) 225 | 226 | # evaluate method on the grid, f required to be vectorized in x, y: 227 | # (args is only non-empty if fun was specified) 228 | df$fhat <- do.call(fun, c(quote(df$x), args)) 229 | 230 | df 231 | 232 | } 233 | 234 | 235 | -------------------------------------------------------------------------------- /R/get_hdr.R: -------------------------------------------------------------------------------- 1 | #' Computing the highest density regions of a 2D density 2 | #' 3 | #' `get_hdr` is used to estimate a 2-dimensional density and compute 4 | #' corresponding HDRs. The estimated density and HDRs are represented in a 5 | #' discrete form as a grid, defined by arguments `rangex`, `rangey`, and `n`. 6 | #' `get_hdr` is used internally by layer functions `stat_hdr()`, 7 | #' `stat_hdr_points()`, `stat_hdr_fun()`, etc. 8 | #' 9 | #' @param method Either a character (`"kde"`, `"mvnorm"`, `"histogram"`, 10 | #' `"freqpoly"`, or `"fun"`) or `method_*()` function. See the "The `method` 11 | #' argument" section below for details. 12 | #' @param data A data frame with columns `x` and `y`. 13 | #' @param probs Probabilities to compute HDRs for. 14 | #' @param rangex,rangey Range of grid representing estimated density and HDRs, 15 | #' along the x- and y-axes. 16 | #' @param n Resolution of grid representing estimated density and HDRs. 17 | #' @param hdr_membership Should HDR membership of data points (`data`) be 18 | #' computed? Defaults to `TRUE`, although it is computationally expensive for 19 | #' large data sets. 20 | #' @param fun Optional, a joint probability density function, must be vectorized 21 | #' in its first two arguments. See the "The `fun` argument" section below for 22 | #' details. 23 | #' @param args Optional, a list of arguments to be provided to `fun`. 24 | #' 25 | #' @section The `method` argument: The density estimator used to estimate the 26 | #' HDRs is specified with the `method` argument. The simplest way to specify 27 | #' an estimator is to provide a character value to `method`, for example 28 | #' `method = "kde"` specifies a kernel density estimator. However, this 29 | #' specification is limited to the default behavior of the estimator. 30 | #' 31 | #' Instead, it is possible to provide a function call, for example: `method = 32 | #' method_kde()`. In many cases, these functions accept parameters governing 33 | #' the density estimation procedure. Here, `method_kde()` accepts parameters 34 | #' `h` and `adjust`, both related to the kernel's bandwidth. For details, see 35 | #' `?method_kde`. Every method of bivariate density estimation implemented has 36 | #' such corresponding `method_*()` function, each with an associated help 37 | #' page. 38 | #' 39 | #' Note: `geom_hdr()` and other layer functions also have `method` arguments 40 | #' which behave in the same way. For more details on the use and 41 | #' implementation of the `method_*()` functions, see `vignette("method", 42 | #' "ggdensity")`. 43 | #' 44 | #' @section The `fun` argument: If `method` is set to `"fun"`, `get_hdr()` 45 | #' expects a bivariate probability density function to be specified with the 46 | #' `fun` argument. It is required that `fun` be a function of at least two 47 | #' arguments (`x` and `y`). Beyond these first two arguments, `fun` can have 48 | #' arbitrarily many arguments; these can be set in `get_hdr()` as a named list 49 | #' via the `args` parameter. 50 | #' 51 | #' Note: `get_hdr()` requires that `fun` be vectorized in `x` and `y`. For an 52 | #' example of an appropriate choice of `fun`, see the final example below. 53 | #' 54 | #' @returns 55 | #' 56 | #' `get_hdr` returns a list with elements `df_est` (`data.frame`), `breaks` 57 | #' (named `numeric`), and `data` (`data.frame`). 58 | #' 59 | #' * `df_est`: the estimated HDRs and density evaluated on the grid defined by `rangex`, `rangey`, and `n`. 60 | #' The column of estimated HDRs (`df_est$hdr`) is a numeric vector with values 61 | #' from `probs`. The columns `df_est$fhat` and `df_est$fhat_discretized` 62 | #' correspond to the estimated density on the original scale and rescaled to sum 63 | #' to 1, respectively. 64 | #' 65 | #' * `breaks`: the heights of the estimated density (`df_est$fhat`) corresponding to the HDRs specified by `probs`. 66 | #' Will always have additional element `Inf` representing the cutoff for the 67 | #' 100% HDR. 68 | #' 69 | #' * `data`: the original data provided in the `data` argument. 70 | #' If `hdr_membership` is set to `TRUE`, this includes a column 71 | #' (`data$hdr_membership`) with the HDR corresponding to each data point. 72 | #' 73 | #' @examples 74 | #' df <- data.frame(x = rnorm(1e3), y = rnorm(1e3)) 75 | #' 76 | #' # Two ways to specify `method` 77 | #' get_hdr(df, method = "kde") 78 | #' get_hdr(df, method = method_kde()) 79 | #' 80 | #' \dontrun{ 81 | #' 82 | #' # If parenthesis are omitted, `get_hdr()` errors 83 | #' get_hdr(df, method = method_kde) 84 | #' } 85 | #' 86 | #' # Estimate different HDRs with `probs` 87 | #' get_hdr(df, method = method_kde(), probs = c(.975, .6, .2)) 88 | #' 89 | #' # Adjust estimator parameters with arguments to `method_kde()` 90 | #' get_hdr(df, method = method_kde(h = 1)) 91 | #' 92 | #' # Parametric normal estimator of density 93 | #' get_hdr(df, method = "mvnorm") 94 | #' get_hdr(df, method = method_mvnorm()) 95 | #' 96 | #' # Compute "population" HDRs of specified bivariate pdf with `method = "fun"` 97 | #' f <- function(x, y, sd_x = 1, sd_y = 1) dnorm(x, sd = sd_x) * dnorm(y, sd = sd_y) 98 | #' 99 | #' get_hdr( 100 | #' method = "fun", fun = f, 101 | #' rangex = c(-5, 5), rangey = c(-5, 5) 102 | #' ) 103 | #' 104 | #' get_hdr( 105 | #' method = "fun", fun = f, 106 | #' rangex = c(-5, 5), rangey = c(-5, 5), 107 | #' args = list(sd_x = .5, sd_y = .5) # specify additional arguments w/ `args` 108 | #' ) 109 | #' 110 | #' @export 111 | get_hdr <- function(data = NULL, method = "kde", probs = c(.99, .95, .8, .5), n = 100, rangex = NULL, rangey = NULL, hdr_membership = TRUE, fun, args = list()) { 112 | 113 | # Deal with missing data argument 114 | if (is.null(data)) { 115 | if (!is.character(method) | (is.character(method) && method != "fun")) { 116 | stop('`data` must be provided unless `method = "fun"`') 117 | } else { 118 | if (is.null(rangex) | is.null(rangey)) { 119 | stop('If `data` is unspecified, `rangex` and `rangey` must be provided when `method = "fun"`') 120 | } 121 | } 122 | } 123 | 124 | rangex <- rangex %||% range(data$x) 125 | rangey <- rangey %||% range(data$y) 126 | 127 | probs <- fix_probs(probs) 128 | 129 | # Create df_est (estimated density evaluated on a grid) depending on specified method: 130 | if (is.character(method) && method == "fun") { 131 | 132 | df_est <- f_est(method = NULL, n = n, rangex = rangex, rangey = rangey, fun = fun, args = args) 133 | 134 | } else { 135 | 136 | if (is.character(method)) { 137 | 138 | if (!method %in% c("kde", "mvnorm", "histogram", "freqpoly")) stop("Invalid method specified") 139 | 140 | # If method is provided as a character, re-assign correct function output: 141 | method <- switch(method, 142 | "kde" = method_kde(), 143 | "histogram" = method_histogram(), 144 | "freqpoly" = method_freqpoly(), 145 | "mvnorm" = method_mvnorm() 146 | ) 147 | 148 | } 149 | 150 | # parse args of method to determine strategy of `method` 151 | method_formals <- names(formals(method)) 152 | 153 | # If `data` is the only argument to `method`, we know `method` 154 | # is a function factory, returning a closure of pdf in terms of x, y: 155 | if (length(method_formals) == 1 && method_formals == "data") { 156 | 157 | df_est <- f_est(method, data, n, rangex, rangey) 158 | 159 | # Otherwise `method` computes a grid for us, shortcutting 160 | # representing pdf in terms of x, y: 161 | } else if (length(method_formals) == 4 && all(method_formals == c("data", "n", "rangex", "rangey"))) { 162 | 163 | df_est <- method(data, n, rangex, rangey) 164 | 165 | } else { 166 | 167 | stop("Invalid `method` argument -- did you forget the `()`?") 168 | 169 | } 170 | 171 | } 172 | 173 | 174 | # remove unneeded attributes 175 | attr(df_est, "out.attrs") <- NULL 176 | 177 | # Manipulate df_est to get information about HDRs: 178 | 179 | # force estimate to integrate to 1 180 | df_est$fhat_discretized <- normalize(df_est$fhat) 181 | 182 | # temporarily rescale df$fhat for stability 183 | fhat_max <- max(df_est$fhat) 184 | df_est$fhat <- df_est$fhat / fhat_max 185 | 186 | # find cutoffs (in terms of rescaled fhat) 187 | breaks <- c(find_cutoff(df_est, probs), Inf) 188 | 189 | # find HDRs for points in the grid 190 | df_est$hdr <- vapply(df_est$fhat, get_hdr_val, numeric(1), breaks, probs) 191 | 192 | # find hdr membership of points from data 193 | if (!is.null(data) & hdr_membership) { 194 | data$hdr_membership <- mapply(get_hdr_membership, data$x, data$y, MoreArgs = list(df_est, breaks, probs), SIMPLIFY = TRUE) 195 | } 196 | 197 | # transforming df_est$fhat and breaks back to original scale: 198 | df_est$fhat <- df_est$fhat * fhat_max 199 | breaks <- breaks * fhat_max 200 | 201 | # Give breaks nicely formatted names, corresponding to HDRs: 202 | names(breaks) <- scales::percent_format(accuracy = 1)(probs) 203 | 204 | # bundle everything together 205 | list( 206 | df_est = df_est, 207 | breaks = breaks, 208 | data = data 209 | ) 210 | 211 | } 212 | 213 | fix_probs <- function(probs) { 214 | stopifnot("Probabilities must be between 0 and 1, exclusive" = all(probs > 0) & all(probs < 1)) 215 | 216 | sort(probs, decreasing = TRUE) 217 | } 218 | 219 | get_hdr_val <- function(fhat, breaks, probs) { 220 | hdrs <- which(fhat >= breaks) 221 | if (length(hdrs) == 0) return(1) 222 | probs[max(hdrs)] 223 | } 224 | 225 | get_hdr_membership <- function(x, y, df_est, breaks, probs) { 226 | df_est$dist <- (x - df_est$x)^2 + (y - df_est$y)^2 227 | fhat <- df_est[which.min(df_est$dist), "fhat"] 228 | 229 | get_hdr_val(fhat, breaks, probs) 230 | } 231 | 232 | 233 | # method is a function of data 234 | # fun is a function of vectors x, y 235 | f_est <- function(method, data, n, rangex, rangey, fun = NULL, args = list()) { 236 | 237 | # If `fun` isn't specified, method returns a closure 238 | # representing closed form of density estimate 239 | fun <- fun %||% method(data) 240 | 241 | # grid to evaluate fun 242 | df <- expand.grid( 243 | "x" = seq(rangex[1], rangex[2], length.out = n), 244 | "y" = seq(rangey[1], rangey[2], length.out = n) 245 | ) 246 | 247 | # evaluate method on the grid, f required to be vectorized in x, y: 248 | # (args is only non-empty if fun was specified) 249 | df$fhat <- do.call(fun, c(quote(df$x), quote(df$y), args)) 250 | 251 | df 252 | 253 | } 254 | 255 | 256 | -------------------------------------------------------------------------------- /R/hdr_rug.R: -------------------------------------------------------------------------------- 1 | #' Rug plots of marginal highest density region estimates 2 | #' 3 | #' Perform 1D density estimation, compute and plot the resulting highest density 4 | #' regions in a way similar to [ggplot2::geom_rug()]. 5 | #' Note, the plotted objects have probabilities mapped to the `alpha` aesthetic by default. 6 | #' 7 | #' @section Aesthetics: geom_hdr_rug understands the following aesthetics (required 8 | #' aesthetics are in bold): 9 | #' 10 | #' - x 11 | #' - y 12 | #' - alpha 13 | #' - fill 14 | #' - group 15 | #' - subgroup 16 | #' 17 | #' @section Computed variables: 18 | #' 19 | #' \describe{ \item{probs}{The probability of the highest density region, specified 20 | #' by `probs`, corresponding to each point.} } 21 | #' 22 | #' @inheritParams ggplot2::geom_rug 23 | #' @inheritParams stat_hdr 24 | #' @param method,method_y Density estimator(s) to use. 25 | #' By default `method` is used for both x- and y-axis. 26 | #' If specified, `method_y` will be used for y-axis. 27 | #' Accepts character vector: `"kde"`,`"histogram"`, `"freqpoly"`, or `"norm"`. 28 | #' Alternatively accepts functions which return closures corresponding to density estimates, 29 | #' see `?get_hdr_1d` or `vignette("method", "ggdensity")`. 30 | #' @name geom_hdr_rug 31 | #' @rdname geom_hdr_rug 32 | #' 33 | #' @import ggplot2 34 | #' 35 | #' @examples 36 | #' set.seed(1) 37 | #' df <- data.frame(x = rnorm(100), y = rnorm(100)) 38 | #' 39 | #' # Plot marginal HDRs for bivariate data 40 | #' ggplot(df, aes(x, y)) + 41 | #' geom_point() + 42 | #' geom_hdr_rug() + 43 | #' coord_fixed() 44 | #' 45 | #' ggplot(df, aes(x, y)) + 46 | #' geom_hdr() + 47 | #' geom_hdr_rug() + 48 | #' coord_fixed() 49 | #' 50 | #' # Plot HDR for univariate data 51 | #' ggplot(df, aes(x)) + 52 | #' geom_density() + 53 | #' geom_hdr_rug() 54 | #' 55 | #' ggplot(df, aes(y = y)) + 56 | #' geom_density() + 57 | #' geom_hdr_rug() 58 | #' 59 | #' # Specify location of marginal HDRs as in ggplot2::geom_rug() 60 | #' ggplot(df, aes(x, y)) + 61 | #' geom_hdr() + 62 | #' geom_hdr_rug(sides = "tr", outside = TRUE) + 63 | #' coord_fixed(clip = "off") 64 | #' 65 | #' # Can use same methods of density estimation as geom_hdr(). 66 | #' # For data with constrained support, we suggest setting method = "histogram": 67 | #' ggplot(df, aes(x^2)) + 68 | #' geom_histogram(bins = 30, boundary = 0) + 69 | #' geom_hdr_rug(method = "histogram") 70 | #' 71 | #' ggplot(df, aes(x^2, y^2)) + 72 | #' geom_hdr(method = "histogram") + 73 | #' geom_hdr_rug(method = "histogram") + 74 | #' coord_fixed() 75 | #' 76 | NULL 77 | 78 | 79 | 80 | 81 | 82 | 83 | #' @rdname geom_hdr_rug 84 | #' @export 85 | stat_hdr_rug <- function(mapping = NULL, data = NULL, 86 | geom = "hdr_rug", position = "identity", 87 | ..., 88 | method = "kde", 89 | method_y = "kde", 90 | probs = c(.99, .95, .8, .5), 91 | xlim = NULL, 92 | ylim = NULL, 93 | n = 512, 94 | na.rm = FALSE, 95 | show.legend = TRUE, 96 | inherit.aes = TRUE) { 97 | layer( 98 | data = data, 99 | mapping = mapping, 100 | stat = StatHdrRug, 101 | geom = geom, 102 | position = position, 103 | show.legend = show.legend, 104 | inherit.aes = inherit.aes, 105 | params = list( 106 | method = method, 107 | method_y = method_y, 108 | probs = probs, 109 | xlim = xlim, 110 | ylim = ylim, 111 | n = n, 112 | na.rm = na.rm, 113 | ... 114 | ) 115 | ) 116 | } 117 | 118 | 119 | 120 | #' @rdname geom_hdr_rug 121 | #' @format NULL 122 | #' @usage NULL 123 | #' @export 124 | StatHdrRug <- ggproto("StatHdrRug", Stat, 125 | 126 | required_aes = c("x|y"), 127 | default_aes = aes(alpha = after_stat(probs)), 128 | 129 | compute_group = function(data, scales, na.rm = FALSE, 130 | method = "kde", method_y = NULL, 131 | probs = c(.99, .95, .8, .5), 132 | xlim = NULL, ylim = NULL, n = 512) { 133 | 134 | # Recycle for both x, y 135 | if (length(n) == 1) n <- rep(n, 2) 136 | 137 | # If no alternative method_y, use method 138 | if (is.null(method_y)) method_y <- method 139 | 140 | 141 | # Estimate marginal densities 142 | 143 | # Initialize dfs for x and y axes, 144 | # in case only x or y are supplied: 145 | df_x <- data.frame() 146 | df_y <- data.frame() 147 | 148 | if (!is.null(data$x)) { 149 | 150 | rangex <- xlim %||% scales$x$dimension() 151 | 152 | res_x <- get_hdr_1d(data$x, method, probs, n[1], rangex, hdr_membership = FALSE) 153 | 154 | df_x <- res_to_df_1d(res_x, probs, data$group[1], output = "rug") 155 | 156 | # Needs correct name for ggplot2 internals 157 | df_x$axis <- "x" 158 | df_x$y <- NA 159 | 160 | } 161 | 162 | 163 | if (!is.null(data$y)) { 164 | 165 | rangey <- ylim %||% scales$y$dimension() 166 | 167 | res_y <- get_hdr_1d(data$y, method_y, probs, n[2], rangey, hdr_membership = FALSE) 168 | 169 | df_y <- res_to_df_1d(res_y, probs, data$group[1], output = "rug") 170 | 171 | # Needs correct name for ggplot2 internals 172 | df_y$axis <- "y" 173 | df_y$y <- df_y$x 174 | df_y$x <- NA 175 | 176 | } 177 | 178 | df <- rbind(df_x, df_y) 179 | 180 | # Need to remove extra col if only plotting x or y rug 181 | if (is.null(data$x)) df$x <- NULL 182 | if (is.null(data$y)) df$y <- NULL 183 | 184 | df 185 | 186 | } 187 | ) 188 | 189 | 190 | res_to_df_1d <- function(res, probs, group, output) { 191 | 192 | probs <- fix_probs(probs) 193 | 194 | if (output == "rug") { 195 | 196 | probs_formatted <- scales::percent_format(accuracy = 1)(probs) 197 | 198 | df <- res$df_est 199 | 200 | # alpha will be mapped to df$probs 201 | df$probs <- scales::percent_format(accuracy = 1)(df$hdr) 202 | df$probs <- ordered(df$probs, levels = probs_formatted) 203 | df$hdr <- NULL 204 | 205 | # Discard 100% HDR if it's not in probs: 206 | df <- df[!is.na(df$probs),] 207 | 208 | } 209 | 210 | df 211 | 212 | } 213 | 214 | 215 | 216 | #' @rdname geom_hdr_rug 217 | #' @export 218 | geom_hdr_rug <- function(mapping = NULL, data = NULL, 219 | stat = "hdr_rug", position = "identity", 220 | ..., 221 | outside = FALSE, 222 | sides = "bl", 223 | length = unit(0.03, "npc"), 224 | na.rm = FALSE, 225 | show.legend = TRUE, 226 | inherit.aes = TRUE) { 227 | layer( 228 | data = data, 229 | mapping = mapping, 230 | stat = stat, 231 | geom = GeomHdrRug, 232 | position = position, 233 | show.legend = show.legend, 234 | inherit.aes = inherit.aes, 235 | params = list( 236 | outside = outside, 237 | sides = sides, 238 | length = length, 239 | na.rm = na.rm, 240 | ... 241 | ) 242 | ) 243 | } 244 | 245 | 246 | #' @rdname geom_hdr_rug 247 | #' @format NULL 248 | #' @usage NULL 249 | #' @export 250 | GeomHdrRug <- ggproto("GeomHdrRug", Geom, 251 | optional_aes = c("x", "y"), 252 | 253 | draw_panel = function(data, panel_params, coord, sides = "bl", 254 | outside = FALSE, length = unit(0.03, "npc")) { 255 | 256 | if (!inherits(length, "unit")) { 257 | abort("'length' must be a 'unit' object.") 258 | } 259 | rugs <- list() 260 | 261 | # For coord_flip, coord$tranform does not flip the sides where to 262 | # draw the rugs. We have to flip them. 263 | if (inherits(coord, 'CoordFlip')) { 264 | sides <- chartr('tblr', 'rlbt', sides) 265 | } 266 | 267 | # move the rug to outside the main plot space 268 | if (outside) length <- -length 269 | 270 | # Set up data frames for x and y: 271 | data_x <- data[data$axis == "x",] 272 | data_y <- data[data$axis == "y",] 273 | 274 | 275 | if (nrow(data_x) > 0) { 276 | 277 | data_x <- coord$transform(data_x, panel_params) 278 | data_x$width <- resolution(data_x$x, FALSE) 279 | 280 | gp_x <- grid::gpar( 281 | col = alpha(data_x$fill, data_x$alpha), 282 | fill = alpha(data_x$fill, data_x$alpha), 283 | lwd = 0 284 | ) 285 | 286 | # set up x axis rug rasters 287 | if (grepl("b", sides)) { 288 | rugs$x_b <- grid::rectGrob( 289 | x = unit(data_x$x, "native"), 290 | y = unit(0, "npc"), 291 | width = data_x$width, 292 | height = length, 293 | just = "bottom", 294 | gp = gp_x 295 | ) 296 | } 297 | 298 | if (grepl("t", sides)) { 299 | rugs$x_t <- grid::rectGrob( 300 | x = unit(data_x$x, "native"), 301 | y = unit(1, "npc"), 302 | width = data_x$width, 303 | height = length, 304 | just = "top", 305 | gp = gp_x 306 | ) 307 | } 308 | } 309 | 310 | if (nrow(data_y) > 0) { 311 | 312 | data_y <- coord$transform(data_y, panel_params) 313 | data_y$height <- resolution(data_y$y, FALSE) 314 | 315 | gp_y <- grid::gpar( 316 | col = alpha(data_y$fill, data_y$alpha), 317 | fill = alpha(data_y$fill, data_y$alpha), 318 | lwd = 0 319 | ) 320 | 321 | 322 | # set up y axis rug rasters 323 | if (grepl("l", sides)) { 324 | rugs$y_l <- grid::rectGrob( 325 | x = unit(0, "npc"), 326 | y = unit(data_y$y, "native"), 327 | width = length, 328 | height = data_y$height, 329 | just = "left", 330 | gp = gp_y 331 | ) 332 | } 333 | 334 | if (grepl("r", sides)) { 335 | rugs$y_r <- grid::rectGrob( 336 | x = unit(1, "npc"), 337 | y = unit(data_y$y, "native"), 338 | width = length, 339 | height = data_y$height, 340 | just = "right", 341 | gp = gp_y 342 | ) 343 | } 344 | 345 | } 346 | 347 | grid::gTree(children = do.call(grid::gList, rugs)) 348 | 349 | }, 350 | 351 | default_aes = aes(fill = "grey20", alpha = NA), 352 | 353 | draw_key = draw_key_rect 354 | ) 355 | 356 | 357 | 358 | 359 | 360 | -------------------------------------------------------------------------------- /R/method.R: -------------------------------------------------------------------------------- 1 | # methods that return est pdf as closure --------------------------------- 2 | 3 | #' Bivariate parametric normal HDR estimator 4 | #' 5 | #' Function used to specify bivariate normal density estimator 6 | #' for `get_hdr()` and layer functions (e.g. `geom_hdr()`). 7 | #' 8 | #' For more details on the use and implementation of the `method_*()` functions, 9 | #' see `vignette("method", "ggdensity")`. 10 | #' 11 | #' @examples 12 | #' # Normal estimator is useful when an assumption of normality is appropriate 13 | #' set.seed(1) 14 | #' df <- data.frame(x = rnorm(1e3), y = rnorm(1e3)) 15 | #' 16 | #' ggplot(df, aes(x, y)) + 17 | #' geom_hdr(method = method_mvnorm(), xlim = c(-4, 4), ylim = c(-4, 4)) + 18 | #' geom_point(size = 1) 19 | #' 20 | #' # Can also be used with `get_hdr()` for numerical summary of HDRs 21 | #' res <- get_hdr(df, method = method_mvnorm()) 22 | #' str(res) 23 | #' 24 | #' @export 25 | method_mvnorm <- function() { 26 | 27 | function(data) { 28 | 29 | data_matrix <- with(data, cbind(x, y)) 30 | mu_hat <- colMeans(data_matrix) 31 | R <- chol(cov(data_matrix)) # R'R = crossprod(R) = S 32 | 33 | function(x, y) { 34 | X <- cbind(x, y) 35 | tmp <- backsolve(R, t(X) - mu_hat, transpose = TRUE) 36 | logretval <- -sum(log(diag(R))) - log(2 * pi) - 0.5 * colSums(tmp^2) 37 | exp( logretval ) 38 | } 39 | 40 | } 41 | 42 | } 43 | 44 | # methods that return closures that compute a grid ------------------------ 45 | 46 | #' Bivariate kernel density HDR estimator 47 | #' 48 | #' Function used to specify bivariate kernel density estimator 49 | #' for `get_hdr()` and layer functions (e.g. `geom_hdr()`). 50 | #' 51 | #' For more details on the use and implementation of the `method_*()` functions, 52 | #' see `vignette("method", "ggdensity")`. 53 | #' 54 | #' @inheritParams ggplot2::stat_density2d 55 | #' 56 | #' @examples 57 | #' set.seed(1) 58 | #' df <- data.frame(x = rnorm(1e3, sd = 3), y = rnorm(1e3, sd = 3)) 59 | #' 60 | #' ggplot(df, aes(x, y)) + 61 | #' geom_hdr(method = method_kde()) + 62 | #' geom_point(size = 1) 63 | #' 64 | #' # The defaults of `method_kde()` are the same as the estimator for `ggplot2::geom_density_2d()` 65 | #' ggplot(df, aes(x, y)) + 66 | #' geom_density_2d_filled() + 67 | #' geom_hdr_lines(method = method_kde(), probs = seq(.1, .9, by = .1)) + 68 | #' theme(legend.position = "none") 69 | #' 70 | #' # The bandwidth of the estimator can be set directly with `h` or scaled with `adjust` 71 | #' ggplot(df, aes(x, y)) + 72 | #' geom_hdr(method = method_kde(h = 1)) + 73 | #' geom_point(size = 1) 74 | #' 75 | #' ggplot(df, aes(x, y)) + 76 | #' geom_hdr(method = method_kde(adjust = 1/2)) + 77 | #' geom_point(size = 1) 78 | #' 79 | #' # Can also be used with `get_hdr()` for numerical summary of HDRs 80 | #' res <- get_hdr(df, method = method_kde()) 81 | #' str(res) 82 | #' 83 | #' @export 84 | method_kde <- function(h = NULL, adjust = c(1, 1)) { 85 | 86 | function(data, n, rangex, rangey) { 87 | 88 | if (is.null(h)) { 89 | h <- c(MASS::bandwidth.nrd(data$x), MASS::bandwidth.nrd(data$y)) 90 | } 91 | 92 | h <- h * adjust 93 | 94 | kdeout <- MASS::kde2d( 95 | x = data$x, y = data$y, n = n, h = h, 96 | lims = c(rangex, rangey) 97 | ) 98 | 99 | df <- with(kdeout, expand.grid("x" = x, "y" = y)) 100 | df$fhat <- as.vector(kdeout$z) 101 | 102 | df 103 | 104 | } 105 | } 106 | 107 | #' Bivariate histogram HDR estimator 108 | #' 109 | #' Function used to specify bivariate histogram density estimator 110 | #' for `get_hdr()` and layer functions (e.g. `geom_hdr()`). 111 | #' 112 | #' For more details on the use and implementation of the `method_*()` functions, 113 | #' see `vignette("method", "ggdensity")`. 114 | #' 115 | #' @param bins Number of bins along each axis. 116 | #' Either a vector of length 2 or a scalar value which is recycled for both dimensions. 117 | #' Defaults to normal reference rule (Scott, pg 87). 118 | #' @param smooth If `TRUE`, HDRs are smoothed by the marching squares algorithm. 119 | #' @param nudgex,nudgey Horizontal and vertical rules for choosing witness points when `smooth == TRUE`. 120 | #' Accepts character vector: `"left"`, `"none"`, `"right"` (`nudgex`) or `"down"`, `"none"`, `"up"` (`nudgey`). 121 | #' 122 | #' @references Scott, David W. Multivariate Density Estimation (2e), Wiley. 123 | #' 124 | #' @examples 125 | #' \dontrun{ 126 | #' 127 | #' # Histogram estimators can be useful when data has boundary constraints 128 | #' set.seed(1) 129 | #' df <- data.frame(x = rexp(1e3), y = rexp(1e3)) 130 | #' 131 | #' ggplot(df, aes(x, y)) + 132 | #' geom_hdr(method = method_histogram()) + 133 | #' geom_point(size = 1) 134 | #' 135 | #' # The resolution of the histogram estimator can be set via `bins` 136 | #' ggplot(df, aes(x, y)) + 137 | #' geom_hdr(method = method_histogram(bins = c(8, 25))) + 138 | #' geom_point(size = 1) 139 | #' 140 | #' # By setting `smooth = TRUE`, we can graphically smooth the "blocky" HDRs 141 | #' ggplot(df, aes(x, y)) + 142 | #' geom_hdr(method = method_histogram(smooth = TRUE)) + 143 | #' geom_point(size = 1) 144 | #' 145 | #' # However, we need to set `nudgex` and `nudgey` to align the HDRs correctly 146 | #' ggplot(df, aes(x, y)) + 147 | #' geom_hdr(method = method_histogram(smooth = TRUE, nudgex = "left", nudgey = "down")) + 148 | #' geom_point(size = 1) 149 | #' 150 | #' # Can also be used with `get_hdr()` for numerical summary of HDRs 151 | #' res <- get_hdr(df, method = method_histogram()) 152 | #' str(res) 153 | #' } 154 | #' 155 | #' @export 156 | method_histogram <- function(bins = NULL, smooth = FALSE, nudgex = "none", nudgey = "none") { 157 | 158 | # n is an argument, but it is not used 159 | function(data, n, rangex, rangey) { 160 | 161 | if (is.null(bins)) { 162 | bins <- numeric(2) 163 | 164 | # define histogram mesh according to Scott p. 87 165 | rho <- cor(data$x, data$y) 166 | hx <- 3.504 * sd(data$x) * (1 - rho^2)^(3/8) * nrow(data)^(-1/4) 167 | hy <- 3.504 * sd(data$y) * (1 - rho^2)^(3/8) * nrow(data)^(-1/4) 168 | bins[1] <- round((rangex[2] - rangex[1]) / hx) 169 | bins[2] <- round((rangey[2] - rangey[1]) / hy) 170 | 171 | } else if (length(bins == 1)) { 172 | bins <- rep(bins, 2) 173 | } 174 | 175 | xvals <- data$x 176 | yvals <- data$y 177 | 178 | xbtwn <- (rangex[1] <= xvals & xvals <= rangex[2]) 179 | if (!all(xbtwn)) { 180 | xvals <- xvals[xbtwn] 181 | yvals <- yvals[xbtwn] 182 | } 183 | 184 | ybtwn <- (rangey[1] <= yvals & yvals <= rangey[2]) 185 | if (!all(ybtwn)) { 186 | xvals <- xvals[ybtwn] 187 | yvals <- yvals[ybtwn] 188 | } 189 | 190 | sx <- seq(rangex[1], rangex[2], length.out = bins[1] + 1) 191 | sy <- seq(rangey[1], rangey[2], length.out = bins[2] + 1) 192 | de_x <- sx[2] - sx[1] 193 | de_y <- sy[2] - sy[1] 194 | box_area <- de_x * de_y 195 | 196 | xbin_mdpts <- sx[-(bins[1]+1)] + de_x/2 197 | ybin_mdpts <- sy[-(bins[2]+1)] + de_y/2 198 | 199 | xleft <- sx[-(bins[1]+1)] 200 | xright <- sx[-1] 201 | 202 | ybottom <- sy[-(bins[2]+1)] 203 | ytop <- sy[-1] 204 | 205 | 206 | df_cuts <- data.frame("xbin" = cut(xvals, sx), "ybin" = cut(yvals, sy)) 207 | 208 | df <- with(df_cuts, expand.grid("xbin" = levels(xbin), "ybin" = levels(ybin))) 209 | df$n <- with(df_cuts, as.vector(table(xbin, ybin))) 210 | 211 | df$xbin_midpt <- xbin_mdpts[as.integer(df$xbin)] 212 | df$ybin_midpt <- ybin_mdpts[as.integer(df$ybin)] 213 | 214 | df$xmin <- df$xbin_midpt - de_x/2 215 | df$xmax <- df$xbin_midpt + de_x/2 216 | df$de_x <- de_x 217 | 218 | df$ymin <- df$ybin_midpt - de_y/2 219 | df$ymax <- df$ybin_midpt + de_y/2 220 | df$de_y <- de_y 221 | 222 | df$fhat <- with(df, n / (sum(n) * box_area)) 223 | 224 | 225 | if (smooth) { 226 | 227 | if(nudgex == "left") df$x <- df$xmin 228 | if(nudgex == "none") df$x <- df$xbin_midpt 229 | if(nudgex == "right") df$x <- df$xmax 230 | 231 | if(nudgey == "down") df$y <- df$ymin 232 | if(nudgey == "none") df$y <- df$ybin_midpt 233 | if(nudgey == "up") df$y <- df$ymax 234 | 235 | } else { 236 | 237 | # No nudging if not smoothing 238 | df$x <- df$xbin_midpt 239 | df$y <- df$ybin_midpt 240 | 241 | # Evaluate histogram on a grid 242 | # For xyz_to_iso* funs, need tightly packed values for good isobands/lines 243 | # k*k points per histogram footprint 244 | # Higher values of k -> better visuals, more computationally expensive 245 | 246 | # Currently determining k heuristically - not based on any theoretical results 247 | # The necessary value of k seems to be O((bins[1]*bins[2])^(-1/3)) 248 | # found constant which yields k = 50 for bins[1]*bins[2] = 10^2 249 | k <- if (bins[1] * bins[2] > 10^2) max(floor(225/((bins[1] * bins[2])^(1/3))), 5) else 50 250 | 251 | bbins <- bins * k 252 | 253 | ssx <- seq(rangex[1], rangex[2], length.out = bbins[1]) 254 | ssy <- seq(rangey[1], rangey[2], length.out = bbins[2]) 255 | 256 | ddf <- expand.grid(x = ssx, y = ssy) 257 | 258 | # Need fhat repeated in very particular way for grid: 259 | # e.g. 260 | # k = 2 261 | # df$fhat = 1, 2, 262 | # 3, 4 263 | # ddf$fhat = 1, 1, 2, 2, 264 | # 1, 1, 2, 2, 265 | # 3, 3, 4, 4, 266 | # 3, 3, 4, 4 267 | 268 | # m <- matrix(df$fhat, nrow = bins[2], byrow = TRUE) 269 | # ddf$fhat <- as.vector(kronecker(m, matrix(1, k, k))) 270 | 271 | fhat <- split(df$fhat, factor(rep(1:bins[2], each = bins[1]))) # split into rows 272 | fhat <- lapply(fhat, function(x) rep(x, each = k)) # repeat within rows (horizontal) 273 | fhat <- lapply(fhat, function(x) rep(x, times = k)) # repeat rows (vertical) 274 | fhat <- unlist(fhat) # concatenate 275 | ddf$fhat <- fhat 276 | 277 | df <- ddf 278 | } 279 | 280 | df[c("x", "y", "fhat")] 281 | 282 | } 283 | } 284 | 285 | #' Bivariate frequency polygon HDR estimator 286 | #' 287 | #' Function used to specify bivariate frequency polygon density estimator 288 | #' for `get_hdr()` and layer functions (e.g. `geom_hdr()`). 289 | #' 290 | #' For more details on the use and implementation of the `method_*()` functions, 291 | #' see `vignette("method", "ggdensity")`. 292 | #' 293 | #' @inheritParams method_histogram 294 | #' 295 | #' @references Scott, David W. Multivariate Density Estimation (2e), Wiley. 296 | #' 297 | #' @examples 298 | #' set.seed(1) 299 | #' df <- data.frame(x = rnorm(1e3), y = rnorm(1e3)) 300 | #' 301 | #' ggplot(df, aes(x, y)) + 302 | #' geom_hdr(method = method_freqpoly()) + 303 | #' geom_point(size = 1) 304 | #' 305 | #' # The resolution of the frequency polygon estimator can be set via `bins` 306 | #' ggplot(df, aes(x, y)) + 307 | #' geom_hdr(method = method_freqpoly(bins = c(8, 25))) + 308 | #' geom_point(size = 1) 309 | #' 310 | #' # Can also be used with `get_hdr()` for numerical summary of HDRs 311 | #' res <- get_hdr(df, method = method_freqpoly()) 312 | #' str(res) 313 | #' 314 | #' @export 315 | method_freqpoly <- function(bins = NULL) { 316 | 317 | # n is an argument, but it is not used 318 | function(data, n, rangex, rangey) { 319 | 320 | if (is.null(bins)) { 321 | bins <- numeric(2) 322 | 323 | # define histogram mesh according to Scott p. 87 324 | # TODO: fill in with rules for frequency polygons 325 | rho <- cor(data$x, data$y) 326 | hx <- 3.504 * sd(data$x) * (1 - rho^2)^(3/8) * nrow(data)^(-1/4) 327 | hy <- 3.504 * sd(data$y) * (1 - rho^2)^(3/8) * nrow(data)^(-1/4) 328 | bins[1] <- round((rangex[2] - rangex[1]) / hx) 329 | bins[2] <- round((rangey[2] - rangey[1]) / hy) 330 | 331 | } else { 332 | if (length(bins == 1)) bins <- rep(bins, 2) 333 | } 334 | 335 | xvals <- data$x 336 | yvals <- data$y 337 | 338 | xbtwn <- (rangex[1] <= xvals & xvals <= rangex[2]) 339 | if (!all(xbtwn)) { 340 | xvals <- xvals[xbtwn] 341 | yvals <- yvals[xbtwn] 342 | } 343 | 344 | ybtwn <- (rangey[1] <= yvals & yvals <= rangey[2]) 345 | if (!all(ybtwn)) { 346 | xvals <- xvals[ybtwn] 347 | yvals <- yvals[ybtwn] 348 | } 349 | 350 | 351 | de_x <- (rangex[2] - rangex[1]) / bins[1] 352 | de_y <- (rangey[2] - rangey[1]) / bins[2] 353 | rangex[1] <- rangex[1] - de_x 354 | rangex[2] <- rangex[2] + de_x 355 | rangey[1] <- rangey[1] - de_y 356 | rangey[2] <- rangey[2] + de_y 357 | bins <- bins + 2 358 | sx <- seq(rangex[1], rangex[2], length.out = bins[1] + 1) 359 | sy <- seq(rangey[1], rangey[2], length.out = bins[2] + 1) 360 | 361 | 362 | box_area <- de_x * de_y 363 | 364 | xbin_mdpts <- sx[-(bins[1]+1)] + de_x/2 365 | ybin_mdpts <- sy[-(bins[2]+1)] + de_y/2 366 | 367 | xleft <- sx[-(bins[1]+1)] 368 | xright <- sx[-1] 369 | 370 | ybottom <- sy[-(bins[2]+1)] 371 | ytop <- sy[-1] 372 | 373 | 374 | df_cuts <- data.frame("xbin" = cut(xvals, sx), "ybin" = cut(yvals, sy)) 375 | 376 | df <- with(df_cuts, expand.grid("xbin" = levels(xbin), "ybin" = levels(ybin))) 377 | df$n <- with(df_cuts, as.vector(table(xbin, ybin))) 378 | 379 | df$xbin_midpt <- xbin_mdpts[as.integer(df$xbin)] 380 | df$ybin_midpt <- ybin_mdpts[as.integer(df$ybin)] 381 | 382 | df$xmin <- df$xbin_midpt - de_x/2 383 | df$xmax <- df$xbin_midpt + de_x/2 384 | df$de_x <- de_x 385 | 386 | df$ymin <- df$ybin_midpt - de_y/2 387 | df$ymax <- df$ybin_midpt + de_y/2 388 | df$de_y <- de_y 389 | 390 | df$fhat <- with(df, n / (sum(n) * box_area)) 391 | df$fhat_discretized <- normalize(df$fhat) 392 | 393 | grid <- expand.grid( 394 | x = sx[2:bins[1]], 395 | y = sy[2:bins[2]] 396 | ) 397 | 398 | x_midpts <- unique(df$xbin_midpt) 399 | y_midpts <- unique(df$ybin_midpt) 400 | 401 | find_A <- function(coords) { 402 | x <- coords[[1]] 403 | y <- coords[[2]] 404 | 405 | row <- data.frame( 406 | x1 = max(x_midpts[x_midpts - x < 0]), 407 | x2 = min(x_midpts[x_midpts - x >= 0]), 408 | y1 = max(y_midpts[y_midpts - y < 0]), 409 | y2 = min(y_midpts[y_midpts - y >= 0]) 410 | ) 411 | 412 | row$fQ11 <- df[df$xbin_midpt == row$x1 & df$ybin_midpt == row$y1, "fhat"] 413 | row$fQ21 <- df[df$xbin_midpt == row$x2 & df$ybin_midpt == row$y1, "fhat"] 414 | row$fQ12 <- df[df$xbin_midpt == row$x1 & df$ybin_midpt == row$y2, "fhat"] 415 | row$fQ22 <- df[df$xbin_midpt == row$x2 & df$ybin_midpt == row$y2, "fhat"] 416 | 417 | xy_mat <- with(row, matrix(c( 418 | x2 * y2, -x2 * y1, -x1 * y2, x1 * y1, 419 | -y2, y1, y2, -y1, 420 | -x2, x2, x1, -x1, 421 | 1, -1, -1, 1 422 | ), nrow = 4, byrow = TRUE)) 423 | 424 | A <- with(row, 425 | 1 / ((x2 - x1) * (y2 - y1)) * xy_mat %*% c(fQ11, fQ12, fQ21, fQ22) 426 | ) 427 | 428 | row$a00 <- A[1] 429 | row$a10 <- A[2] 430 | row$a01 <- A[3] 431 | row$a11 <- A[4] 432 | 433 | row 434 | } 435 | 436 | 437 | A_list <- apply(grid, 1, find_A, simplify = FALSE) 438 | df_A <- do.call(rbind, A_list) 439 | 440 | coeffs_to_surface <- function(row, k) { 441 | sx <- seq(row[["x1"]], row[["x2"]], length.out = k)[-k] 442 | sy <- seq(row[["y1"]], row[["y2"]], length.out = k)[-k] 443 | 444 | fit <- function(x, y) row[["a00"]] + row[["a10"]] * x + row[["a01"]] * y + row[["a11"]] * x * y 445 | 446 | df <- expand.grid(x = sx, y = sy) 447 | df$fhat <- fit(df$x, df$y) 448 | 449 | df 450 | } 451 | 452 | 453 | # Currently determining k heuristically - not based on any theoretical results 454 | # The necessary value of k seems to be O((bins[1]*bins[2])^(-1/4)) 455 | k <- if (bins[1] * bins[2] > 10^2) max(floor(30/((bins[1] * bins[2])^(1/4))), 3) else 10 456 | 457 | surface_list <- apply(df_A, 1, coeffs_to_surface, k, simplify = FALSE) 458 | df <- do.call(rbind, surface_list) 459 | 460 | df[c("x","y","fhat")] 461 | 462 | } 463 | } 464 | 465 | --------------------------------------------------------------------------------