├── .Rbuildignore ├── .github ├── .gitignore └── workflows │ ├── R-CMD-check.yaml │ └── test-coverage.yaml ├── .gitignore ├── CODE_OF_CONDUCT.md ├── DESCRIPTION ├── NAMESPACE ├── NEWS.md ├── R ├── bracl.R ├── brglm2-defunct.R ├── brglm2-package.R ├── brglmControl.R ├── brglmFit.R ├── brmultinom.R ├── brnb.R ├── data.R ├── expo.R ├── mis-link.R ├── ordinal_superiority.R ├── utils.R ├── warnings.R └── zzz_conventions.R ├── README.Rmd ├── README.md ├── data ├── aids.rda ├── alligators.rda ├── coalition.rda ├── endometrial.rda ├── enzymes.rda ├── hepatitis.rda ├── lizards.rda └── stemcell.rda ├── inst ├── CITATION ├── WORDLIST ├── brpr │ ├── brpr.R │ ├── examples.R │ └── timings.R ├── glmbias_code+results │ ├── alligator_simulation_results.rda │ ├── birth_weight_simulation_results.rda │ ├── clotting_simulation_results.rda │ ├── glmbias_gamma_simulation.R │ ├── glmbias_logistic_simulation.R │ ├── glmbias_multinomial_simulation.R │ └── glmbias_simulation_functions.R ├── mbrpr │ └── mbrpr.R └── tinytest │ ├── brpr.R │ ├── mbrpr.R │ ├── test-binomial.R │ ├── test-bracl.R │ ├── test-brglmControl.R │ ├── test-brnp.R │ ├── test-checkinfinite.R │ ├── test-correction.R │ ├── test-dispersion.R │ ├── test-expo.R │ ├── test-gamma.R │ ├── test-jeffreys.R │ ├── test-median-binomial.R │ ├── test-median-dispersion.R │ ├── test-median-poisson.R │ ├── test-mis.R │ ├── test-multinom-binom.R │ ├── test-multinom.R │ ├── test-poisson.R │ ├── test-print.R │ ├── test-singular.R │ ├── test-start.R │ └── test-transformation.R ├── man ├── aids.Rd ├── alligators.Rd ├── bracl.Rd ├── brglm2-defunct.Rd ├── brglm2.Rd ├── brglmControl.Rd ├── brglmFit.Rd ├── brmultinom.Rd ├── brnb.Rd ├── coalition.Rd ├── coef.brglmFit.Rd ├── coef.brglmFit_expo.Rd ├── coef.brnb.Rd ├── confint.brglmFit.Rd ├── confint.brmultinom.Rd ├── confint.brnb.Rd ├── endometrial.Rd ├── enzymes.Rd ├── expo.brglmFit.Rd ├── figures │ └── hex_brglm2.svg ├── hepatitis.Rd ├── lizards.Rd ├── mis.Rd ├── ordinal_superiority.bracl.Rd ├── predict.bracl.Rd ├── predict.brmultinom.Rd ├── residuals.brmultinom.Rd ├── simulate.brmultinom.Rd ├── simulate.brnb.Rd ├── stemcell.Rd ├── summary.brglmFit.Rd ├── summary.brnb.Rd ├── vcov.brglmFit.Rd └── vcov.brnb.Rd ├── src ├── expectations.c ├── hats.c └── init.c ├── tests └── tinytest.R └── vignettes ├── adjacent.Rmd ├── brglm2.bib ├── expo.Rmd ├── iteration.Rmd ├── multinomial.Rmd └── negativeBinomial.Rmd /.Rbuildignore: -------------------------------------------------------------------------------- 1 | inst/scratch 2 | inst/brglm0 3 | inst/brpr 4 | inst/demos 5 | inst/mbrpr 6 | inst/other 7 | inst/useR!2016 8 | inst/glmbias_code\+results 9 | inst/dev_resids.R 10 | inst/TODO.org 11 | src/hats.c 12 | tests/ 13 | vignettes/iteration\[exported\].html 14 | vignettes/iteration\[exported\].pdf 15 | vignettes/iteration.html 16 | vignettes/iteration.pdf 17 | 18 | ^\.github$ 19 | ^\.gitignore 20 | ^\._* 21 | ^.*\.Rproj$ 22 | ^\.Rproj\.user$ 23 | ^\.travis\.yml$ 24 | 25 | anova.R 26 | ^CRAN-RELEASE$ 27 | ^README\.Rmd$ 28 | README_cache 29 | ^codecov\.yml$ 30 | ^CRAN-RELEASE$ 31 | ^code_of_conduct\.md$ 32 | ^CRAN-SUBMISSION$ 33 | ^doc$ 34 | ^Meta$ 35 | -------------------------------------------------------------------------------- /.github/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | -------------------------------------------------------------------------------- /.github/workflows/R-CMD-check.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, develop] 6 | pull_request: 7 | branches: [main, develop] 8 | 9 | name: R-CMD-check.yaml 10 | 11 | permissions: read-all 12 | 13 | jobs: 14 | R-CMD-check: 15 | runs-on: ${{ matrix.config.os }} 16 | 17 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 18 | 19 | strategy: 20 | fail-fast: false 21 | matrix: 22 | config: 23 | - {os: macos-latest, r: 'release'} 24 | - {os: windows-latest, r: 'release'} 25 | - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} 26 | - {os: ubuntu-latest, r: 'release'} 27 | - {os: ubuntu-latest, r: 'oldrel-1'} 28 | 29 | env: 30 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 31 | R_KEEP_PKG_SOURCE: yes 32 | 33 | steps: 34 | - uses: actions/checkout@v4 35 | 36 | - uses: r-lib/actions/setup-pandoc@v2 37 | 38 | - uses: r-lib/actions/setup-r@v2 39 | with: 40 | r-version: ${{ matrix.config.r }} 41 | http-user-agent: ${{ matrix.config.http-user-agent }} 42 | use-public-rspm: true 43 | 44 | - uses: r-lib/actions/setup-r-dependencies@v2 45 | with: 46 | extra-packages: any::rcmdcheck 47 | needs: check 48 | 49 | - uses: r-lib/actions/check-r-package@v2 50 | with: 51 | upload-snapshots: true 52 | build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")' 53 | -------------------------------------------------------------------------------- /.github/workflows/test-coverage.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | branches: [main, master] 8 | 9 | name: test-coverage 10 | 11 | jobs: 12 | test-coverage: 13 | runs-on: ubuntu-latest 14 | env: 15 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 16 | 17 | steps: 18 | - uses: actions/checkout@v3 19 | 20 | - uses: r-lib/actions/setup-r@v2 21 | with: 22 | use-public-rspm: true 23 | 24 | - uses: r-lib/actions/setup-r-dependencies@v2 25 | with: 26 | extra-packages: any::covr 27 | needs: coverage 28 | 29 | - name: Test coverage 30 | run: | 31 | covr::codecov( 32 | quiet = FALSE, 33 | clean = FALSE, 34 | install_path = file.path(Sys.getenv("RUNNER_TEMP"), "package") 35 | ) 36 | shell: Rscript {0} 37 | 38 | - name: Show testthat output 39 | if: always() 40 | run: | 41 | ## -------------------------------------------------------------------- 42 | find ${{ runner.temp }}/package -name 'testthat.Rout*' -exec cat '{}' \; || true 43 | shell: bash 44 | 45 | - name: Upload test results 46 | if: failure() 47 | uses: actions/upload-artifact@v3 48 | with: 49 | name: coverage-test-failures 50 | path: ${{ runner.temp }}/package 51 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | inst/doc 2 | 3 | ### macOS ### 4 | # General 5 | .DS_Store 6 | .AppleDouble 7 | .LSOverride 8 | 9 | # Icon must end with two \r 10 | Icon 11 | 12 | # Thumbnails 13 | ._* 14 | 15 | # Files that might appear in the root of a volume 16 | .DocumentRevisions-V100 17 | .fseventsd 18 | .Spotlight-V100 19 | .TemporaryItems 20 | .Trashes 21 | .VolumeIcon.icns 22 | .com.apple.timemachine.donotpresent 23 | 24 | # Directories potentially created on remote AFP share 25 | .AppleDB 26 | .AppleDesktop 27 | Network Trash Folder 28 | Temporary Items 29 | .apdisk 30 | 31 | ### R ### 32 | # History files 33 | .Rhistory 34 | .Rapp.history 35 | 36 | # Session Data files 37 | .RData 38 | 39 | # Example code in package build process 40 | *-Ex.R 41 | 42 | # Output files from R CMD build 43 | /*.tar.gz 44 | 45 | # Output files from R CMD check 46 | /*.Rcheck/ 47 | 48 | # RStudio files 49 | .Rproj.user/ 50 | 51 | # produced vignettes 52 | vignettes/*.html 53 | vignettes/*.pdf 54 | 55 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 56 | .httr-oauth 57 | 58 | # knitr and R markdown default cache directories 59 | /*_cache/ 60 | /cache/ 61 | 62 | # Temporary files created by R markdown 63 | *.utf8.md 64 | *.knit.md 65 | 66 | ### R.Bookdown Stack ### 67 | # R package: bookdown caching files 68 | /*_files/ 69 | 70 | ### Emacs ### 71 | # -*- mode: gitignore; -*- 72 | *~ 73 | \#*\# 74 | /.emacs.desktop 75 | /.emacs.desktop.lock 76 | *.elc 77 | auto-save-list 78 | tramp 79 | .\#* 80 | 81 | # Org-mode 82 | .org-id-locations 83 | *_archive 84 | 85 | # flymake-mode 86 | *_flymake.* 87 | 88 | # eshell files 89 | /eshell/history 90 | /eshell/lastdir 91 | 92 | # elpa packages 93 | /elpa/ 94 | 95 | # reftex files 96 | *.rel 97 | 98 | # AUCTeX auto folder 99 | /auto/ 100 | 101 | # cask packages 102 | .cask/ 103 | dist/ 104 | 105 | # Flycheck 106 | flycheck_*.el 107 | 108 | # server auth directory 109 | /server/ 110 | 111 | # projectiles files 112 | .projectile 113 | 114 | # directory configuration 115 | .dir-locals.el 116 | 117 | # network security 118 | /network-security.data 119 | /doc/ 120 | /Meta/ 121 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | 2 | # Contributor Covenant Code of Conduct 3 | 4 | ## Our Pledge 5 | 6 | We as members, contributors, and leaders pledge to make participation in our 7 | community a harassment-free experience for everyone, regardless of age, body 8 | size, visible or invisible disability, ethnicity, sex characteristics, gender 9 | identity and expression, level of experience, education, socio-economic status, 10 | nationality, personal appearance, race, caste, color, religion, or sexual 11 | identity and orientation. 12 | 13 | We pledge to act and interact in ways that contribute to an open, welcoming, 14 | diverse, inclusive, and healthy community. 15 | 16 | ## Our Standards 17 | 18 | Examples of behavior that contributes to a positive environment for our 19 | community include: 20 | 21 | * Demonstrating empathy and kindness toward other people 22 | * Being respectful of differing opinions, viewpoints, and experiences 23 | * Giving and gracefully accepting constructive feedback 24 | * Accepting responsibility and apologizing to those affected by our mistakes, 25 | and learning from the experience 26 | * Focusing on what is best not just for us as individuals, but for the overall 27 | community 28 | 29 | Examples of unacceptable behavior include: 30 | 31 | * The use of sexualized language or imagery, and sexual attention or advances of 32 | any kind 33 | * Trolling, insulting or derogatory comments, and personal or political attacks 34 | * Public or private harassment 35 | * Publishing others' private information, such as a physical or email address, 36 | without their explicit permission 37 | * Other conduct which could reasonably be considered inappropriate in a 38 | professional setting 39 | 40 | ## Enforcement Responsibilities 41 | 42 | Community leaders are responsible for clarifying and enforcing our standards of 43 | acceptable behavior and will take appropriate and fair corrective action in 44 | response to any behavior that they deem inappropriate, threatening, offensive, 45 | or harmful. 46 | 47 | Community leaders have the right and responsibility to remove, edit, or reject 48 | comments, commits, code, wiki edits, issues, and other contributions that are 49 | not aligned to this Code of Conduct, and will communicate reasons for moderation 50 | decisions when appropriate. 51 | 52 | ## Scope 53 | 54 | This Code of Conduct applies within all community spaces, and also applies when 55 | an individual is officially representing the community in public spaces. 56 | Examples of representing our community include using an official e-mail address, 57 | posting via an official social media account, or acting as an appointed 58 | representative at an online or offline event. 59 | 60 | ## Enforcement 61 | 62 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 63 | reported to the community leaders responsible for enforcement at 64 | [INSERT CONTACT METHOD]. 65 | All complaints will be reviewed and investigated promptly and fairly. 66 | 67 | All community leaders are obligated to respect the privacy and security of the 68 | reporter of any incident. 69 | 70 | ## Enforcement Guidelines 71 | 72 | Community leaders will follow these Community Impact Guidelines in determining 73 | the consequences for any action they deem in violation of this Code of Conduct: 74 | 75 | ### 1. Correction 76 | 77 | **Community Impact**: Use of inappropriate language or other behavior deemed 78 | unprofessional or unwelcome in the community. 79 | 80 | **Consequence**: A private, written warning from community leaders, providing 81 | clarity around the nature of the violation and an explanation of why the 82 | behavior was inappropriate. A public apology may be requested. 83 | 84 | ### 2. Warning 85 | 86 | **Community Impact**: A violation through a single incident or series of 87 | actions. 88 | 89 | **Consequence**: A warning with consequences for continued behavior. No 90 | interaction with the people involved, including unsolicited interaction with 91 | those enforcing the Code of Conduct, for a specified period of time. This 92 | includes avoiding interactions in community spaces as well as external channels 93 | like social media. Violating these terms may lead to a temporary or permanent 94 | ban. 95 | 96 | ### 3. Temporary Ban 97 | 98 | **Community Impact**: A serious violation of community standards, including 99 | sustained inappropriate behavior. 100 | 101 | **Consequence**: A temporary ban from any sort of interaction or public 102 | communication with the community for a specified period of time. No public or 103 | private interaction with the people involved, including unsolicited interaction 104 | with those enforcing the Code of Conduct, is allowed during this period. 105 | Violating these terms may lead to a permanent ban. 106 | 107 | ### 4. Permanent Ban 108 | 109 | **Community Impact**: Demonstrating a pattern of violation of community 110 | standards, including sustained inappropriate behavior, harassment of an 111 | individual, or aggression toward or disparagement of classes of individuals. 112 | 113 | **Consequence**: A permanent ban from any sort of public interaction within the 114 | community. 115 | 116 | ## Attribution 117 | 118 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], 119 | version 2.1, available at 120 | [https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1]. 121 | 122 | Community Impact Guidelines were inspired by 123 | [Mozilla's code of conduct enforcement ladder][Mozilla CoC]. 124 | 125 | For answers to common questions about this code of conduct, see the FAQ at 126 | [https://www.contributor-covenant.org/faq][FAQ]. Translations are available at 127 | [https://www.contributor-covenant.org/translations][translations]. 128 | 129 | [homepage]: https://www.contributor-covenant.org 130 | [v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html 131 | [Mozilla CoC]: https://github.com/mozilla/diversity 132 | [FAQ]: https://www.contributor-covenant.org/faq 133 | [translations]: https://www.contributor-covenant.org/translations 134 | 135 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: brglm2 2 | Title: Bias Reduction in Generalized Linear Models 3 | Version: 0.9.2 4 | Authors@R: c(person(given = "Ioannis", family = "Kosmidis", role = c("aut", "cre"), email = "ioannis.kosmidis@warwick.ac.uk", comment = c(ORCID = "0000-0003-1556-0302")), 5 | person(given = c("Euloge", "Clovis"), family = c("Kenne Pagui"), role = "aut", email = "kenne@stat.unipd.it"), 6 | person(given = "Kjell", family = "Konis", role = "ctb", email = "kjell.konis@me.com"), 7 | person(given = "Nicola", family = "Sartori", role = "ctb", email = "sartori@stat.unipd.it")) 8 | Description: Estimation and inference from generalized linear models based on various methods for bias reduction and maximum penalized likelihood with powers of the Jeffreys prior as penalty. The 'brglmFit' fitting method can achieve reduction of estimation bias by solving either the mean bias-reducing adjusted score equations in Firth (1993) and Kosmidis and Firth (2009) , or the median bias-reduction adjusted score equations in Kenne et al. (2017) , or through the direct subtraction of an estimate of the bias of the maximum likelihood estimator from the maximum likelihood estimates as in Cordeiro and McCullagh (1991) . See Kosmidis et al (2020) for more details. Estimation in all cases takes place via a quasi Fisher scoring algorithm, and S3 methods for the construction of of confidence intervals for the reduced-bias estimates are provided. In the special case of generalized linear models for binomial and multinomial responses (both ordinal and nominal), the adjusted score approaches to mean and media bias reduction have been found to return estimates with improved frequentist properties, that are also always finite, even in cases where the maximum likelihood estimates are infinite (e.g. complete and quasi-complete separation; see Kosmidis and Firth, 2020 , for a proof for mean bias reduction in logistic regression). 9 | URL: https://github.com/ikosmidis/brglm2 10 | BugReports: https://github.com/ikosmidis/brglm2/issues 11 | Depends: R (>= 3.3.0) 12 | Imports: MASS, stats, Matrix, graphics, nnet, enrichwith, numDeriv 13 | License: GPL-3 14 | Encoding: UTF-8 15 | LazyData: true 16 | RoxygenNote: 7.2.3 17 | Roxygen: list(markdown = TRUE) 18 | Suggests: detectseparation, knitr, rmarkdown, covr, tinytest, VGAM, brglm, mbrglm 19 | VignetteBuilder: knitr 20 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(coef,bracl) 4 | S3method(coef,brglmFit) 5 | S3method(coef,brglmFit_expo) 6 | S3method(coef,brmultinom) 7 | S3method(coef,brnb) 8 | S3method(confint,brglmFit) 9 | S3method(confint,brmultinom) 10 | S3method(confint,brnb) 11 | S3method(expo,brglmFit) 12 | S3method(expo,glm) 13 | S3method(fitted,bracl) 14 | S3method(fitted,brmultinom) 15 | S3method(logLik,brmultinom) 16 | S3method(model.matrix,brmultinom) 17 | S3method(ordinal_superiority,bracl) 18 | S3method(predict,bracl) 19 | S3method(predict,brmultinom) 20 | S3method(print,brglmFit_expo) 21 | S3method(print,brmultinom) 22 | S3method(print,brnb) 23 | S3method(print,summary.bracl) 24 | S3method(print,summary.brglmFit) 25 | S3method(print,summary.brmultinom) 26 | S3method(print,summary.brnb) 27 | S3method(residuals,bracl) 28 | S3method(residuals,brmultinom) 29 | S3method(simulate,brmultinom) 30 | S3method(simulate,brnb) 31 | S3method(summary,bracl) 32 | S3method(summary,brglmFit) 33 | S3method(summary,brmultinom) 34 | S3method(summary,brnb) 35 | S3method(vcov,bracl) 36 | S3method(vcov,brglmFit) 37 | S3method(vcov,brmultinom) 38 | S3method(vcov,brnb) 39 | export(bracl) 40 | export(brglmControl) 41 | export(brglmFit) 42 | export(brglm_control) 43 | export(brglm_fit) 44 | export(brmultinom) 45 | export(brnb) 46 | export(check_infinite_estimates) 47 | export(detect_separation) 48 | export(expo) 49 | export(mis) 50 | export(ordinal_superiority) 51 | import(MASS) 52 | import(enrichwith) 53 | import(stats) 54 | import(Matrix) 55 | importFrom(graphics,plot) 56 | importFrom(nnet,class.ind) 57 | importFrom(numDeriv,grad) 58 | useDynLib(brglm2) 59 | -------------------------------------------------------------------------------- /R/brglm2-defunct.R: -------------------------------------------------------------------------------- 1 | #' Defunct Functions in package \pkg{brglm2} 2 | #' 3 | #' The functions or variables listed here are no longer part of 4 | #' \pkg{brglm2}. 5 | #' 6 | #' 7 | #' @param ... arguments to be passed to functions and methods. 8 | #' 9 | #' 10 | #' @details 11 | #' 12 | #' * [detect_separation()]: This function is defunct from \pkg{brglm2} 13 | #' since version 0.8.0. A new version of [detect_separation()] is now 14 | #' maintained in the 15 | #' [\pkg{detectseparation}](https://cran.r-project.org/package=detectseparation) 16 | #' R package. 17 | #' 18 | #' * [check_infinite_estimates()] is defunct from \pkg{brglm2} since 19 | #' version 0.8.0. An new version of [check_infinite_estimates()] is 20 | #' now maintained in the 21 | #' [\pkg{detectseparation}](https://cran.r-project.org/package=detectseparation) 22 | #' R package. 23 | #' 24 | #' @name brglm2-defunct 25 | NULL 26 | 27 | #' @rdname brglm2-defunct 28 | #' @export 29 | check_infinite_estimates <- function(...) { 30 | function_moved_to_new_package(gsub("\\(|\\)", "", deparse(match.call()[1])), 31 | "0.8.0", 32 | "brglm2", 33 | "detectseparation") 34 | } 35 | 36 | #' @rdname brglm2-defunct 37 | #' @export 38 | detect_separation <- function(...) { 39 | function_moved_to_new_package(gsub("\\(|\\)", "", deparse(match.call()[1])), 40 | "0.8.0", 41 | "brglm2", 42 | "detectseparation") 43 | } 44 | -------------------------------------------------------------------------------- /R/brglm2-package.R: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2020- Ioannis Kosmidis 2 | 3 | # This program is free software; you can redistribute it and/or modify 4 | # it under the terms of the GNU General Public License as published by 5 | # the Free Software Foundation; either version 2 or 3 of the License 6 | # (at your option). 7 | # 8 | # This program is distributed in the hope that it will be useful, 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | # GNU General Public License for more details. 12 | # 13 | # A copy of the GNU General Public License is available at 14 | # http://www.r-project.org/Licenses/ 15 | 16 | #' 17 | #' brglm2: Bias Reduction in Generalized Linear Models 18 | #' 19 | #' Estimation and inference from generalized linear models using 20 | #' implicit and explicit bias reduction methods (Kosmidis, 2014), and 21 | #' other penalized maximum likelihood methods. Currently supported 22 | #' methods include the mean bias-reducing adjusted scores approach in 23 | #' Firth (1993) and Kosmidis & Firth (2009), the median bias-reduction 24 | #' adjusted scores approach in Kenne Pagui et al. (2017), the 25 | #' correction of the asymptotic bias in Cordeiro & McCullagh (1991), 26 | #' the mixed bias-reduction adjusted scores approach in Kosmidis et al 27 | #' (2020), maximum penalized likelihood with powers of the Jeffreys 28 | #' prior as penalty, and maximum likelihood. 29 | #' 30 | #' 31 | #' In the special case of generalized linear models for binomial, 32 | #' Poisson and multinomial responses (both nominal and ordinal), mean 33 | #' and median bias reduction and maximum penalized likelihood return 34 | #' estimates with improved frequentist properties, that are also 35 | #' always finite, even in cases where the maximum likelihood estimates 36 | #' are infinite (e.g. complete and quasi-complete separation in 37 | #' multinomial regression). Estimation in all cases takes place via a 38 | #' modified Fisher scoring algorithm, and S3 methods for the 39 | #' construction of confidence intervals for the reduced-bias estimates 40 | #' are provided. 41 | #' 42 | #' The core model fitters are implemented by the functions 43 | #' [brglm_fit()] (univariate generalized linear models), 44 | #' [brmultinom()] (baseline category logit models for nominal 45 | #' multinomial responses), [bracl()] (adjacent category logit models 46 | #' for ordinal multinomial responses), and [brnb()] for negative 47 | #' binomial regression. 48 | #' 49 | #' @details 50 | #' 51 | #' 52 | #' The similarly named **brglm** R package can only handle generalized 53 | #' linear models with binomial responses. Special care has been taken 54 | #' when developing **brglm2** in order not to have conflicts when the 55 | #' user loads **brglm2** and **brglm** simultaneously. The development 56 | #' and maintenance of the two packages will continue in parallel, 57 | #' until **brglm2** incorporates all **brglm** functionality and 58 | #' provides an appropriate wrapper to the [brglm::brglm()] function. 59 | #' 60 | #' @author Ioannis Kosmidis `[aut, cre]` \email{ioannis.kosmidis@warwick.ac.uk} 61 | #' 62 | #' @seealso 63 | #' 64 | #' [brglm_fit()], [brmultinom()], [bracl()] 65 | #' 66 | #' @references 67 | #' 68 | #' Kosmidis I, Firth D (2021). Jeffreys-prior penalty, finiteness 69 | #' and shrinkage in binomial-response generalized linear 70 | #' models. *Biometrika*, **108**, 71-82. \doi{10.1093/biomet/asaa052}. 71 | #' 72 | #' Cordeiro G M, McCullagh P (1991). Bias correction in generalized 73 | #' linear models. *Journal of the Royal Statistical Society. Series B 74 | #' (Methodological)*, **53**, 629-643. \doi{10.1111/j.2517-6161.1991.tb01852.x}. 75 | #' 76 | #' Firth D (1993). Bias reduction of maximum likelihood estimates, 77 | #' Biometrika, **80**, 27-38. \doi{10.2307/2336755}. 78 | #' 79 | #' Kenne Pagui E C, Salvan A, Sartori N (2017). Median bias 80 | #' reduction of maximum likelihood estimates. *Biometrika*, **104**, 81 | #' 923–938. \doi{10.1093/biomet/asx046}. 82 | #' 83 | #' Kosmidis I, Kenne Pagui E C, Sartori N (2020). Mean and median bias 84 | #' reduction in generalized linear models. *Statistics and Computing*, 85 | #' **30**, 43-59. \doi{10.1007/s11222-019-09860-6}. 86 | #' 87 | #' Kosmidis I, Firth D (2009). Bias reduction in exponential family 88 | #' nonlinear models. *Biometrika*, **96**, 793-804. \doi{10.1093/biomet/asp055}. 89 | #' 90 | #' Kosmidis I, Firth D (2010). A generic algorithm for reducing 91 | #' bias in parametric estimation. *Electronic Journal of Statistics*, 92 | #' **4**, 1097-1112. \doi{10.1214/10-EJS579}. 93 | #' 94 | #' Kosmidis I (2014). Bias in parametric estimation: reduction and 95 | #' useful side-effects. *WIRE Computational Statistics*, **6**, 96 | #' 185-196. \doi{10.1002/wics.1296}. 97 | #' 98 | #' @docType package 99 | #' @aliases brglm2-package 100 | #' @name brglm2 101 | #' @import stats 102 | #' @import enrichwith 103 | #' @import Matrix 104 | #' @import MASS 105 | #' @importFrom graphics plot 106 | #' @importFrom nnet class.ind 107 | #' @importFrom numDeriv grad 108 | #' @useDynLib brglm2 109 | #' 110 | NULL 111 | 112 | ## NAMESPACE should have import(stats), import(Matrix) 113 | 114 | 115 | ## Suggestion by Kurt Hornik to avoid a warning related to the binding 116 | ## of n which is evaluated by family$initialize 117 | if (getRversion() >= "2.15.1") globalVariables(c("n", "lambda")) 118 | 119 | #' @export 120 | ordinal_superiority <- function(object, formula, data, 121 | measure = c("gamma", "Delta"), 122 | level = 0.95, 123 | bc = FALSE) { 124 | UseMethod("ordinal_superiority") 125 | } 126 | 127 | #' @export 128 | expo <- function(object, type = c("ML", "correction", "AS_median", "Lylesetal2012"), level = 0.95) { 129 | UseMethod("expo") 130 | } 131 | -------------------------------------------------------------------------------- /R/mis-link.R: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2018- Ioannis Kosmidis 2 | 3 | # This program is free software; you can redistribute it and/or modify 4 | # it under the terms of the GNU General Public License as published by 5 | # the Free Software Foundation; either version 2 or 3 of the License 6 | # (at your option). 7 | # 8 | # This program is distributed in the hope that it will be useful, 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | # GNU General Public License for more details. 12 | # 13 | # A copy of the GNU General Public License is available at 14 | # http://www.r-project.org/Licenses/ 15 | 16 | #' A [`"link-glm"`][make.link] object for misclassified responses in binomial regression models 17 | #' 18 | #' [mis()] is a [`"link-glm"`][make.link] object that specifies the link function in Neuhaus (1999, expression (8)) for handling misclassified responses in binomial regression models using maximum likelihood. A prior specification of the sensitivity and specificity is required. 19 | #' 20 | #' @param link the baseline link to be used. 21 | #' @param sensitivity the probability of observing a success given that a success actually took place given any covariate values. 22 | #' @param specificity the probability of observing a failure given that a failure actually took place given any covariate values. 23 | #' 24 | #' @details 25 | #' 26 | #' `sensitivity + specificity` should be greater or equal to 1, 27 | #' otherwise it is implied that the procedure producing the responses 28 | #' performs worse than chance in terms of misclassification. 29 | #' 30 | #' @references 31 | #' 32 | #' Neuhaus J M (1999). Bias and efficiency loss due to misclassified 33 | #' responses in binary regression. Biometrika, **86**, 843-855. 34 | #' \url{https://www.jstor.org/stable/2673589}. 35 | #' 36 | #' @seealso [glm()], [brglm_fit()] 37 | #' 38 | #' @examples 39 | #' 40 | #' ## Define a few links with some misclassification 41 | #' logit_mis <- mis(link = "logit", sensitivity = 0.9, specificity = 0.9) 42 | #' 43 | #' lizards_f <- cbind(grahami, opalinus) ~ height + diameter + light + time 44 | #' 45 | #' lizardsML <- glm(lizards_f, family = binomial(logit), data = lizards) 46 | #' 47 | #' lizardsML_mis <- update(lizardsML, family = binomial(logit_mis), 48 | #' start = coef(lizardsML)) 49 | #' 50 | #' ## A notable change is coefficients is noted here compared to when 51 | #' ## specificity and sensitity are 1 52 | #' coef(lizardsML) 53 | #' coef(lizardsML_mis) 54 | #' 55 | #' ## Bias reduction is also possible 56 | #' update(lizardsML_mis, method = "brglmFit", type = "AS_mean", 57 | #' start = coef(lizardsML)) 58 | #' 59 | #' update(lizardsML_mis, method = "brglmFit", type = "AS_median", 60 | #' start = coef(lizardsML)) 61 | #' 62 | #' @export 63 | mis <- function(link = "logit", sensitivity = 1, specificity = 1) { 64 | link <- make.link(link) 65 | linkfun <- function(mu) { 66 | link$linkfun((mu -1 + specificity) / (sensitivity + specificity - 1)) 67 | } 68 | linkinv <- function(eta) { 69 | (sensitivity + specificity - 1) * link$linkinv(eta) + 1 - specificity 70 | } 71 | mu.eta <- function(eta) { 72 | (sensitivity + specificity - 1) * link$mu.eta(eta) 73 | } 74 | valideta <- function(eta) { 75 | TRUE 76 | } 77 | structure(list(linkfun = linkfun, 78 | linkinv = linkinv, 79 | mu.eta = mu.eta, 80 | valideta = valideta, 81 | name = "miss"), 82 | class = "link-glm") 83 | } 84 | 85 | -------------------------------------------------------------------------------- /R/utils.R: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2016- Ioannis Kosmidis 2 | 3 | # This program is free software; you can redistribute it and/or modify 4 | # it under the terms of the GNU General Public License as published by 5 | # the Free Software Foundation; either version 2 or 3 of the License 6 | # (at your option). 7 | # 8 | # This program is distributed in the hope that it will be useful, 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | # GNU General Public License for more details. 12 | # 13 | # A copy of the GNU General Public License is available at 14 | # http://www.r-project.org/Licenses/ 15 | 16 | unless_null <- function(x, if_null) { 17 | if (is.null(x)) { 18 | if_null 19 | } else { 20 | x 21 | } 22 | } 23 | 24 | 25 | 26 | get_type_description <- function(type, parenthesized = TRUE) { 27 | pp <- function(txt) { 28 | ifelse(parenthesized, paste0("(", txt, ")"), txt) 29 | } 30 | switch(type, 31 | "ML" = pp("maximum likelihood"), 32 | "correction" = pp("bias correction"), 33 | "AS_mean" = pp("mean bias-reducing adjusted score equations"), 34 | "AS_median" = pp("median bias-reducing adjusted score equations"), 35 | "AS_mixed" = pp("mixed bias-reducing adjusted score equations"), 36 | "MPL_Jeffreys" = pp("maximum penalized likelihood with Jeffreys'-prior penalty"), 37 | "Lylesetal2012" = pp("Lyles et al., 2012; doi: 10.1016/j.jspi.2012.05.005"), 38 | "correction*" = pp("explicit mean bias correction with a multiplicative adjustment"), 39 | "correction+" = pp("explicit mean bias correction with an additive adjustment") 40 | ) 41 | } 42 | -------------------------------------------------------------------------------- /R/warnings.R: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2020- Ioannis Kosmidis 2 | 3 | # This program is free software; you can redistribute it and/or modify 4 | # it under the terms of the GNU General Public License as published by 5 | # the Free Software Foundation; either version 2 or 3 of the License 6 | # (at your option). 7 | # 8 | # This program is distributed in the hope that it will be useful, 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | # GNU General Public License for more details. 12 | # 13 | # A copy of the GNU General Public License is available at 14 | # http://www.r-project.org/Licenses/ 15 | 16 | 17 | function_moves_to_new_package <- function(function_name, 18 | removal_version, 19 | current_pkg, 20 | new_pkg, 21 | extra_message = NULL) { 22 | function_name <- paste0("'", function_name, "'") 23 | current_pkg <- paste0("'", current_pkg, "'") 24 | new_pkg <- paste0("'", new_pkg, "'") 25 | msg <- paste(function_name, 26 | "will be removed from", 27 | current_pkg, 28 | "at version", 29 | paste0(removal_version, "."), 30 | "A new version of", 31 | function_name, 32 | "is now maintained in the", 33 | new_pkg, 34 | "package.", 35 | extra_message) 36 | .Deprecated(msg = msg, package = current_pkg) 37 | } 38 | 39 | function_moved_to_new_package <- function(function_name, 40 | removal_version, 41 | current_pkg, 42 | new_pkg, 43 | extra_message = NULL) { 44 | function_name <- paste0("'", function_name, "'") 45 | current_pkg <- paste0("'", current_pkg, "'") 46 | new_pkg <- paste0("'", new_pkg, "'") 47 | msg <- paste(function_name, 48 | "has been removed from", 49 | current_pkg, 50 | "at version", 51 | paste0(removal_version, "."), 52 | "A new version of", 53 | function_name, 54 | "is now maintained in the", 55 | new_pkg, 56 | "package.", 57 | extra_message) 58 | .Defunct(msg = msg, package = current_pkg) 59 | } 60 | -------------------------------------------------------------------------------- /R/zzz_conventions.R: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2017- Ioannis Kosmidis 2 | 3 | # This program is free software; you can redistribute it and/or modify 4 | # it under the terms of the GNU General Public License as published by 5 | # the Free Software Foundation; either version 2 or 3 of the License 6 | # (at your option). 7 | # 8 | # This program is distributed in the hope that it will be useful, 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | # GNU General Public License for more details. 12 | # 13 | # A copy of the GNU General Public License is available at 14 | # http://www.r-project.org/Licenses/ 15 | 16 | ## Some convenience names 17 | 18 | #' @rdname brglmControl 19 | #' @export 20 | brglm_control <- brglmControl 21 | 22 | #' @rdname brglmFit 23 | #' @export 24 | brglm_fit <- brglmFit 25 | 26 | #### Method conventions 27 | 28 | #' @rdname residuals.brmultinom 29 | #' @method residuals bracl 30 | #' @export 31 | residuals.bracl <- residuals.brmultinom 32 | 33 | -------------------------------------------------------------------------------- /data/aids.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ikosmidis/brglm2/60a8c7bb46f468f36bbfc7fe37e6cdb4805e5d85/data/aids.rda -------------------------------------------------------------------------------- /data/alligators.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ikosmidis/brglm2/60a8c7bb46f468f36bbfc7fe37e6cdb4805e5d85/data/alligators.rda -------------------------------------------------------------------------------- /data/coalition.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ikosmidis/brglm2/60a8c7bb46f468f36bbfc7fe37e6cdb4805e5d85/data/coalition.rda -------------------------------------------------------------------------------- /data/endometrial.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ikosmidis/brglm2/60a8c7bb46f468f36bbfc7fe37e6cdb4805e5d85/data/endometrial.rda -------------------------------------------------------------------------------- /data/enzymes.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ikosmidis/brglm2/60a8c7bb46f468f36bbfc7fe37e6cdb4805e5d85/data/enzymes.rda -------------------------------------------------------------------------------- /data/hepatitis.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ikosmidis/brglm2/60a8c7bb46f468f36bbfc7fe37e6cdb4805e5d85/data/hepatitis.rda -------------------------------------------------------------------------------- /data/lizards.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ikosmidis/brglm2/60a8c7bb46f468f36bbfc7fe37e6cdb4805e5d85/data/lizards.rda -------------------------------------------------------------------------------- /data/stemcell.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ikosmidis/brglm2/60a8c7bb46f468f36bbfc7fe37e6cdb4805e5d85/data/stemcell.rda -------------------------------------------------------------------------------- /inst/CITATION: -------------------------------------------------------------------------------- 1 | year <- sub("-.*", "", meta$Date) 2 | note <- sprintf("R package version %s", meta$Version) 3 | 4 | citHeader("To cite package 'brglm2' in publications use at least one of the following, as appropriate. The finiteness and shrinkage properties of the reduced-bias estimator that 'brglm2' computes by default for logistic regression is in Kosmidis & Firth (2021).") 5 | 6 | c(bibentry(bibtype = "Manual", 7 | title = "{brglm2}: Bias Reduction in Generalized Linear Models", 8 | author = c(person(given = "Ioannis", family = "Kosmidis")), 9 | year = year, 10 | note = note, 11 | url = "https://CRAN.R-project.org/package=brglm2"), 12 | bibentry(bibtype = "article", 13 | title = "Jeffreys-prior penalty, finiteness and shrinkage in binomial-response generalized linear models", 14 | author = c(person(given = "Ioannis", family = "Kosmidis"), 15 | person(given = "David", family = "Firth")), 16 | year = 2021, 17 | journal = "Biometirka", 18 | volume = "108", 19 | pages = "71-82", 20 | url = "https://doi.org/10.1093/biomet/asaa052"), 21 | bibentry(bibtype = "article", 22 | title = "Mean and median bias reduction in generalized linear models", 23 | author = c(person(given = "Ioannis", family = "Kosmidis"), 24 | person(given = c("Euloge", "Clovis"), family = c("Kenne Pagui")), 25 | person(given = "Nicola", family = "Sartori")), 26 | year = 2020, 27 | journal = "Statistics and Computing", 28 | volume = "30", 29 | pages = "43-59", 30 | url = "https://doi.org/10.1007/s11222-019-09860-6")) 31 | -------------------------------------------------------------------------------- /inst/WORDLIST: -------------------------------------------------------------------------------- 1 | Agresti 2 | Ames 3 | Andreson 4 | Biometrics 5 | Biometrika 6 | CMD 7 | Caterina 8 | Codecov 9 | Cordeiro 10 | DOI 11 | Guo 12 | Heinze 13 | ICPSR 14 | Jeffreys 15 | Jeffreys’ 16 | Jefreys 17 | Kateri 18 | Kenne 19 | Laver 20 | Licence 21 | Lyles 22 | McCullagh 23 | Nelder 24 | Neuhaus 25 | Pagui 26 | Palmgren 27 | Raphson 28 | Salvan 29 | Schemper 30 | TWD 31 | al 32 | arteria 33 | arxiv 34 | asaa 35 | asx 36 | behaviour 37 | biomet 38 | bracl 39 | brglm 40 | brglmFit 41 | brmultinom 42 | brnb 43 | bugreports 44 | ciep 45 | codebase 46 | coercible 47 | cran 48 | detectseparation 49 | df 50 | diag 51 | doi 52 | enrichwith 53 | equivariance 54 | et 55 | finiteness 56 | foodchoice 57 | frac 58 | fract 59 | github 60 | grahami 61 | http 62 | https 63 | infty 64 | iw 65 | jj 66 | ldots 67 | leftarrow 68 | lpSolveAPI 69 | misclassified 70 | mutagenicity 71 | nd 72 | neovasculation 73 | neovasculization 74 | numst 75 | opalinus 76 | overdispered 77 | pre 78 | priori 79 | pulsality 80 | quinoline 81 | resid 82 | revertant 83 | roxygen 84 | th 85 | tinytest 86 | traceback 87 | useR 88 | uterina 89 | vdots 90 | -------------------------------------------------------------------------------- /inst/brpr/brpr.R: -------------------------------------------------------------------------------- 1 | ## Bias-reduced Poisson and multinomial log-linear models 2 | ## using maximum penalized likelihood as in Firth (1993) Biometrika. 3 | ## 4 | ## Author: David Firth, d.firth@warwick.ac.uk, 2 Sep 2010 5 | ## 6 | ## NOT A POLISHED PIECE OF PUBLIC-USE SOFTWARE! Provided "as is", 7 | ## licensed under GPL2. NO WARRANTY OF FITNESS FOR ANY PURPOSE! 8 | ## 9 | ## This code still has lots of rough edges, some of which are 10 | ## indicated by the embedded comments. 11 | ## 12 | ## The intention is to include a more polished implementation as part 13 | ## of the CRAN package "brglm", in due course. 14 | 15 | ## Arguments are as for a Poisson log-linear glm, 16 | ## with one new argument: 17 | ## -- "fixed.totals", a factor indexing groups of observations 18 | ## whose response totals are fixed (for example, this might be 19 | ## the rows of a table). 20 | ## If fixed.totals is NULL (the default), pure Poisson sampling (with 21 | ## no totals fixed) is assumed. 22 | 23 | brpr <- function(formula, 24 | data, 25 | subset, 26 | na.action, 27 | offset = NULL, 28 | control = glm.control(...), 29 | contrasts = NULL, 30 | fixed.totals = NULL, 31 | weights = NULL, 32 | ...) { 33 | ## The "weights" and "offset" arguments here can only be NULL -- anything 34 | ## else is an error (for now, at least) 35 | if (!is.null(weights)) stop("The weights argument here can only be NULL") 36 | if (!is.null(offset)) stop("The offset argument here can only be NULL") 37 | ## work needed here, to allow the use of offset as an argument? 38 | 39 | ## Initial setup as in glm (except that `prior weights' are not used here, 40 | ## and an offset (if any) must be specified through the formula rather 41 | ## than (as is also allowed by glm) as a separate argument: 42 | call <- match.call() 43 | theFormula <- formula 44 | if (missing(data)) 45 | data <- environment(formula) 46 | if (is.null(call$epsilon)) control$epsilon <- 1e-8 47 | ## Is 1e-8 too stringent? Work needed here! 48 | fixed.totals <- eval(substitute(fixed.totals), envir = data) 49 | mf <- match.call(expand.dots = FALSE) 50 | m <- match(c("formula", "data", "subset", "na.action", 51 | "offset", "fixed.totals"), names(mf), 0) 52 | mf <- mf[c(1, m)] 53 | mf$drop.unused.levels <- TRUE 54 | mf[[1]] <- as.name("model.frame") 55 | mf <- eval(mf, parent.frame()) 56 | mt <- attr(mf, "terms") 57 | Y <- model.response(mf, "numeric") 58 | 59 | ## Correct the fixed.totals variable for subsetting, NA treatment, etc. 60 | if (!is.null(fixed.totals)) { 61 | fixed.totals <- mf$"(fixed.totals)" 62 | if (!is.factor(fixed.totals)) stop("fixed.totals must be a factor") 63 | rowTotals <- as.vector(tapply(Y, fixed.totals, sum))[fixed.totals] 64 | } 65 | 66 | ## Do an initial fit of the model, with constant-adjusted counts 67 | adj <- 0.5 68 | X <- model.matrix(formula, data = mf) 69 | offset <- model.offset(mf) 70 | nFixed <- if (is.null(fixed.totals)) 0 else nlevels(fixed.totals) 71 | mf$y.adj <- Y + adj * (ncol(X) - nFixed)/nrow(X) 72 | formula <- update(formula, y.adj ~ .) 73 | op <- options(warn = -1) 74 | fit <- glm.fit(X, mf$y.adj, family = poisson(), offset = offset, 75 | control = glm.control(maxit = 1)) 76 | options(op) 77 | ## Update the model iteratively, refining the adjustment at each iteration 78 | criterion <- 1 79 | iter <- 1 80 | coefs <- coef(fit) 81 | Xwork <- X[ , !is.na(coefs), drop = FALSE] 82 | muAdj <- fitted(fit) 83 | coefs <- na.omit(coefs) 84 | while (criterion > control$epsilon && iter < control$maxit) { 85 | iter <- iter + 1 86 | if (!is.null(fixed.totals)){ 87 | muTotals <- as.vector(tapply(muAdj, fixed.totals, 88 | sum))[fixed.totals] 89 | mu <- muAdj * rowTotals / muTotals 90 | } else { ## case fixed.totals is NULL 91 | mu <- muAdj 92 | } 93 | W.X <- sqrt(mu) * Xwork 94 | XWXinv <- chol2inv(chol(crossprod(W.X))) 95 | coef.se <- sqrt(diag(XWXinv)) 96 | h <- diag(Xwork %*% XWXinv %*% t(mu * Xwork)) 97 | mf$y.adj <- Y + h * adj 98 | z <- log(muAdj) + (mf$y.adj - muAdj)/muAdj 99 | lsfit <- lm.wfit(Xwork, z, muAdj, offset = offset) 100 | criterion <- max((abs(lsfit$coefficients - coefs))/coef.se) 101 | if (control$trace) cat("Iteration ", iter, 102 | ": largest (scaled) coefficient change is ", 103 | criterion, "\n", sep = "") 104 | coefs <- lsfit$coefficients 105 | muAdj <- exp(lsfit$fitted.values) 106 | } 107 | 108 | ## The object `lsfit' uses *adjusted* counts, fitted values, etc. -- 109 | ## so we need to correct various things before returning (so that, for example, 110 | ## deviance and standard errors are correct). 111 | fit$coefficients[!is.na(fit$coefficients)] <- coefs 112 | fit$y <- Y 113 | fit$fitted.values <- fit$weights <- mu 114 | fit$linear.predictors <- log(mu) 115 | dev.resids <- poisson()$dev.resids 116 | wt <- fit$prior.weights 117 | fit$deviance <- sum(dev.resids(Y, mu, wt)) 118 | fit$null.deviance <- sum(dev.resids(Y, mean(Y), wt)) 119 | fit$aic <- NA # sum((poisson()$aic)(Y, 1, mu, wt, 0)) ?? work needed! 120 | fit$residuals <- Y/mu - 1 121 | 122 | ## Next bit is straight from glm -- not sure it applies here (work needed!) 123 | if (any(offset) && attr(mt, "intercept") > 0) { 124 | fit$null.deviance <- glm.fit(x = X[, "(Intercept)", drop = FALSE], 125 | y = Y, weights = rep(1, nrow(mf)), 126 | offset = offset, family = poisson(), 127 | intercept = TRUE)$deviance 128 | } 129 | 130 | fit$iter <- iter 131 | fit$model <- mf 132 | fit$na.action <- attr(mf, "na.action") 133 | fit$x <- X 134 | fit$qr <- qr(sqrt(mu) * X) 135 | fit$R <- qr.R(fit$qr, complete = TRUE) 136 | ## The "effects" component of the fit object is almost certainly not 137 | ## correct as is -- but where does it actually get used? (work needed!) 138 | ## rownames(fit$R) <- colnames(fit$R) ## FIX THIS! 139 | fit <- c(fit, list(call = call, formula = theFormula, terms = mt, 140 | data = data, offset = offset, method = "brpr", 141 | contrasts = attr(X, "contrasts"), 142 | xlevels = .getXlevels(mt, mf))) 143 | class(fit) <- c("glm", "lm") 144 | return(fit) 145 | } 146 | -------------------------------------------------------------------------------- /inst/brpr/examples.R: -------------------------------------------------------------------------------- 1 | ## Some examples of the use of "brpr" to fit a multinomial logit 2 | ## model by penalized (reduced-bias) maximum likelihood. 3 | ## 4 | ## Examples prepared by I. Kosmidis, University College London 5 | 6 | source(system.file("inst", "brpr/brpr.R", package = "brglm2")) 7 | 8 | library(pmlr) 9 | library(MASS) # for data(housing) 10 | library(brglm) # for the penalizedDeviance profileModel objective 11 | library(gnm) # for pickCoef and expandCategorical used in coefinds and expandMult 12 | 13 | ## coefinds takes as input the glm object that brpr results and 14 | ## returns the location of the coefficients for the fitted multinomial 15 | ## model. 16 | coefinds <- function(obj) { 17 | require(gnm) 18 | cats <- as.character(obj$call$formula[3]) 19 | cats <- strsplit(gsub(" ", "", cats), "\\*\\(")[[1]][1] 20 | cats <- rev(strsplit(cats, "\\+")[[1]])[1] 21 | cc <- pickCoef(obj, cats) 22 | cc 23 | } 24 | 25 | ## expandMult will epxand the multinomial data set so that all 26 | ## available categories are represented in each covariate class. If 27 | ## there are non-available categories in the original data set they 28 | ## are included in the resulted data frame and are assigned frequency 29 | ## zero. 30 | expandMult <- function(data, catvar, countsvar) { 31 | require(gnm) 32 | temp <- expandCategorical(data = data, catvar = catvar, 33 | sep = ".", countvar = "CcCounts", 34 | idvar = "id", as.ordered = FALSE, 35 | group = TRUE) 36 | ## Get the correct category counts 37 | temp[[countsvar]] <- temp[[countsvar]] * temp$CcCounts 38 | temp$CcCounts <- temp$id <- NULL 39 | temp 40 | } 41 | 42 | 43 | ##################################################################### 44 | ## Analysis of the Coppenhagen housing data set in the MASS library 45 | ##################################################################### 46 | data(housing) 47 | contrasts(housing$Sat) <- contr.treatment(3, base = 1) 48 | 49 | housbrpr <- brpr(Freq ~ Infl * Type * Cont + 50 | Sat * (Infl + Type + Cont), data = housing, 51 | fixed.totals = Infl:Type:Cont) 52 | houspmlr <- pmlr(Sat ~ Infl + Type + Cont, weights = Freq, 53 | data = housing, method = "wald") 54 | 55 | ## The estimates are the same at least up to 5 decimals 56 | coef(housbrpr)[coefinds(housbrpr)] 57 | houspmlr$coefficients 58 | 59 | 60 | ##################################################################### 61 | ## Analysis of the hepatitis data set in Bull et. al. (2007) 62 | ##################################################################### 63 | data(hepatitis) 64 | ## Construct a variable with the multinomial categories according to 65 | ## the HCV and nonABC columns 66 | hepat <- hepatitis 67 | hepat$type <- with(hepat, factor(1 - HCV*nonABC + HCV + 2 * nonABC)) 68 | hepat$type <- factor(hepat$type, labels = c("noDisease", "C", "nonABC")) 69 | contrasts(hepat$type) <- contr.treatment(3, base = 1) 70 | ## compare the result with the one that pmlr gives 71 | hepatnew <- expandMult(data = hepat, catvar = "type", countsvar = "counts") 72 | heppmlr <- pmlr(type ~ group + time + group:time, 73 | data = hepatnew, weights = counts, method = "wald", 74 | penalized = TRUE) 75 | hepbrpr <- brpr(counts ~ group*time + type*(group*time), 76 | fixed.totals = group:time, data = hepatnew, 77 | epsilon = 1e-14) ## very strict epsilon 78 | 79 | ## The estimates are the same at least up to 5 decimals 80 | coef(hepbrpr)[coefinds(hepbrpr)] 81 | heppmlr$coefficients 82 | 83 | ## Speed comparison (it takes a while to run the next 2 lines!) 84 | ## Shows that brpr with epsilon 1e-14 is roughly 5 times faster 85 | ## than pmlr for this data set. 86 | system.time(for (i in 1:100) { 87 | pmlr(type ~ group + time + group:time, 88 | data = hepatnew, weights = counts, method = "wald", 89 | penalized = TRUE) }) 90 | system.time(for (i in 1:100) { 91 | brpr(counts ~ group*time + type*(group*time), 92 | fixed.totals = group:time, data = hepatnew, 93 | epsilon = 1e-14) }) 94 | 95 | ## Profile confidence intervals based on the penalized likelihood 96 | hepbrprCIs <- confintModel(hepbrpr, objective = "penalizedDeviance", 97 | quantile = qchisq(0.95, 1), 98 | X = model.matrix(hepbrpr), 99 | which = coefinds(hepbrpr), 100 | method = "zoom", 101 | endpoint.tolerance = 1e-04) 102 | heppmlr <- pmlr(type ~ group + time + group:time, 103 | data = hepatnew, weights = counts, 104 | method = "likelihood", penalized = TRUE) 105 | nn <- dimnames(heppmlr$coefficients) 106 | coefnames <- c(paste(nn[[2]], nn[[3]][1], sep = ":"), 107 | paste(nn[[2]], nn[[3]][2], sep = ":")) 108 | heppmlrCIs <- cbind(heppmlr$CI.lower, heppmlr$CI.upper) 109 | rownames(heppmlrCIs) <- coefnames 110 | 111 | # The confidence intervals from pmlr and profileModel seem to agree 112 | # only with slight differences. But as far as those differences are 113 | # concerned, I trust confintModel's result; those confidence intervals 114 | # are obtained through binary search. 115 | heppmlrCIs 116 | hepbrprCIs 117 | 118 | 119 | ##################################################################### 120 | ## Analysis of the enzymes data set in ?pmlr 121 | ##################################################################### 122 | data(enzymes) 123 | ## Exclude patients in Group 4 (post-necrotic cirrhosis) 124 | enzymes <- enzymes[enzymes$Group != 4,] 125 | ## Center and scale covariates 126 | AST <- scale(log(enzymes$AST)) 127 | ALT <- scale(log(enzymes$ALT)) 128 | GLDH <- scale(log(enzymes$GLDH)) 129 | OCT <- scale(log(enzymes$OCT)) 130 | enzymes <- data.frame(Patient = enzymes$Patient, 131 | Group = enzymes$Group, AST, ALT, GLDH, OCT) 132 | ## Remove 10 observations to create separation 133 | enzymes <- enzymes[-c(9, 18, 33, 58, 61, 77, 94, 97, 99, 100),] 134 | ## Multinomial: acute viral hepatitis and aggressive chronic hepatits 135 | ## vs. persistent chronic hepatitis 136 | ## Assign Group 2 (persistent chronic hepatitis) as baseline category 137 | enzymes$Group <- factor(enzymes$Group, levels=c("2","1","3")) 138 | ## Re-express data set in an appropriate form for brpr 139 | enzymes$counts <- rep(1, nrow(enzymes)) 140 | enzymes$ind <- factor(1:131) 141 | enz <- expandMult(enzymes, "Group", "counts") 142 | enzpmlr <- pmlr(Group ~ AST + GLDH, weights = counts, 143 | data = enz, method = "wald") 144 | enzbrpr <- brpr(counts ~ -1 + ind + Group * (AST + GLDH), 145 | fixed.totals = ind, data = enz, 146 | epsilon = 1e-14) 147 | ## brpr appears slower than pmlr in this case, most probably because 148 | ## of the large number of nuisances. 149 | 150 | ## The estimates are the same at least up to 5 decimals 151 | coef(enzbrpr)[coefinds(enzbrpr)] 152 | enzpmlr$coefficients 153 | -------------------------------------------------------------------------------- /inst/brpr/timings.R: -------------------------------------------------------------------------------- 1 | ## Some examples of the use of "brpr" to fit a multinomial logit 2 | ## model by penalized (reduced-bias) maximum likelihood. 3 | ## 4 | ## Examples prepared by I. Kosmidis, University College London 5 | 6 | source(system.file("inst", "brpr/brpr.R", package = "brglm2")) 7 | 8 | library("pmlr") 9 | library("MASS") # for data(housing) 10 | library("gnm") # for pickCoef and expandCategorical used in coefinds and expandMult 11 | 12 | ## coefinds takes as input the glm object that brpr results and 13 | ## returns the location of the coefficients for the fitted multinomial 14 | ## model. 15 | coefinds <- function(obj) { 16 | require(gnm) 17 | cats <- as.character(obj$call$formula[3]) 18 | cats <- strsplit(gsub(" ", "", cats), "\\*\\(")[[1]][1] 19 | cats <- rev(strsplit(cats, "\\+")[[1]])[1] 20 | cc <- pickCoef(obj, cats) 21 | cc 22 | } 23 | 24 | ## expandMult will epxand the multinomial data set so that all 25 | ## available categories are represented in each covariate class. If 26 | ## there are non-available categories in the original data set they 27 | ## are included in the resulted data frame and are assigned frequency 28 | ## zero. 29 | expandMult <- function(data, catvar, countsvar) { 30 | require(gnm) 31 | temp <- expandCategorical(data = data, catvar = catvar, 32 | sep = ".", countvar = "CcCounts", 33 | idvar = "id", as.ordered = FALSE, 34 | group = TRUE) 35 | ## Get the correct category counts 36 | temp[[countsvar]] <- temp[[countsvar]] * temp$CcCounts 37 | temp$CcCounts <- temp$id <- NULL 38 | temp 39 | } 40 | 41 | timing <- function(..., R = 10) { 42 | system.time(replicate(R, ...)) 43 | } 44 | 45 | ##################################################################### 46 | ## Analysis of the Coppenhagen housing data set in the MASS library 47 | ##################################################################### 48 | data("housing", package = "MASS") 49 | contrasts(housing$Sat) <- contr.treatment(3, base = 1) 50 | 51 | timing(housbrpr <- brpr(Freq ~ Infl * Type * Cont + 52 | Sat * (Infl + Type + Cont), data = housing, 53 | fixed.totals = Infl:Type:Cont, epsilon = 1e-06), R = 50) 54 | timing(houspmlr <- pmlr(Sat ~ Infl + Type + Cont, weights = Freq, 55 | data = housing, method = "wald"), R = 50) 56 | timing(housbrmultinom <- brmultinom(Sat ~ Infl + Type + Cont, weights = Freq, 57 | data = housing, epsilon = 1e-06), R = 50) 58 | 59 | ##################################################################### 60 | ## Analysis of the hepatitis data set in Bull et. al. (2007) 61 | ##################################################################### 62 | data("hepatitis", package = "pmlr") 63 | ## Construct a variable with the multinomial categories according to 64 | ## the HCV and nonABC columns 65 | hepat <- hepatitis 66 | hepat$type <- with(hepat, factor(1 - HCV*nonABC + HCV + 2 * nonABC)) 67 | hepat$type <- factor(hepat$type, labels = c("noDisease", "C", "nonABC")) 68 | contrasts(hepat$type) <- contr.treatment(3, base = 1) 69 | ## compare the result with the one that pmlr gives 70 | hepatnew <- expandMult(data = hepat, catvar = "type", countsvar = "counts") 71 | 72 | timing(heppmlr <- pmlr(type ~ group + time + group:time, 73 | data = hepatnew, weights = counts, method = "wald", 74 | penalized = TRUE), R = 50) 75 | timing(hepbrmultinom <- brmultinom(type ~ group + time + group:time, 76 | data = hepatnew, weights = counts, 77 | epsilon = 1e-06), R = 50) 78 | timing(hepbrpr <- brpr(counts ~ group*time + type*(group*time), 79 | fixed.totals = group:time, data = hepatnew, 80 | epsilon = 1e-06), R = 50) 81 | 82 | ##################################################################### 83 | ## Analysis of the enzymes data set in ?pmlr 84 | ##################################################################### 85 | data("enzymes", package = "pmlr") 86 | ## Exclude patients in Group 4 (post-necrotic cirrhosis) 87 | enzymes <- enzymes[enzymes$Group != 4,] 88 | ## Center and scale covariates 89 | AST <- scale(log(enzymes$AST)) 90 | ALT <- scale(log(enzymes$ALT)) 91 | GLDH <- scale(log(enzymes$GLDH)) 92 | OCT <- scale(log(enzymes$OCT)) 93 | enzymes <- data.frame(Patient = enzymes$Patient, 94 | Group = enzymes$Group, AST, ALT, GLDH, OCT) 95 | ## Remove 10 observations to create separation 96 | enzymes <- enzymes[-c(9, 18, 33, 58, 61, 77, 94, 97, 99, 100),] 97 | ## Multinomial: acute viral hepatitis and aggressive chronic hepatits 98 | ## vs. persistent chronic hepatitis 99 | ## Assign Group 2 (persistent chronic hepatitis) as baseline category 100 | enzymes$Group <- factor(enzymes$Group, levels=c("2","1","3")) 101 | ## Re-express data set in an appropriate form for brpr 102 | enzymes$counts <- rep(1, nrow(enzymes)) 103 | enzymes$ind <- factor(1:131) 104 | enz <- expandMult(enzymes, "Group", "counts") 105 | timing(enzpmlr <- pmlr(Group ~ AST + GLDH, weights = counts, 106 | data = enz, method = "wald"), R = 10) 107 | timing(enzbrpr <- brpr(counts ~ -1 + ind + Group * (AST + GLDH), 108 | fixed.totals = ind, data = enz, 109 | epsilon = 1e-14), R = 10) 110 | timing(enzbrmultinom <- brmultinom(Group ~ AST + GLDH, weights = counts, 111 | data = enz, 112 | epsilon = 1e-14), R = 10) 113 | ## max(abs(matrix(enzbrpr$fitted.values, ncol = 3, byrow = TRUE) - enzbrmultinom$fitted.values)) 114 | 115 | 116 | timing <- function(..., R = 10) { 117 | system.time(replicate(R, ...)) 118 | } 119 | -------------------------------------------------------------------------------- /inst/glmbias_code+results/alligator_simulation_results.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ikosmidis/brglm2/60a8c7bb46f468f36bbfc7fe37e6cdb4805e5d85/inst/glmbias_code+results/alligator_simulation_results.rda -------------------------------------------------------------------------------- /inst/glmbias_code+results/birth_weight_simulation_results.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ikosmidis/brglm2/60a8c7bb46f468f36bbfc7fe37e6cdb4805e5d85/inst/glmbias_code+results/birth_weight_simulation_results.rda -------------------------------------------------------------------------------- /inst/glmbias_code+results/clotting_simulation_results.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ikosmidis/brglm2/60a8c7bb46f468f36bbfc7fe37e6cdb4805e5d85/inst/glmbias_code+results/clotting_simulation_results.rda -------------------------------------------------------------------------------- /inst/glmbias_code+results/glmbias_gamma_simulation.R: -------------------------------------------------------------------------------- 1 | ## simulation study: clotting dataset 2 | 3 | library("brglm2") 4 | source("glmbias_simulation_functions.R") 5 | 6 | clotting <- data.frame( 7 | u = c(5,10,15,20,30,40,60,80,100, 5,10,15,20,30,40,60,80,100), 8 | conc = c(118,58,42,35,27,25,21,19,18,69,35,26,21,18,16,13,12,12), 9 | lot = factor(c(rep(1, 9), rep(2, 9)))) 10 | 11 | clot_ML <- glm(conc ~ lot*log(u), data = clotting, family = Gamma(link="log"), 12 | method = "brglmFit", type = "ML", maxit=1000, epsilon = 1e-8) 13 | 14 | truepar <- c(coef(clot_ML),clot_ML$dispersion) 15 | clotting_simulation_results <- sim_clot_log(10000, clotting, truepar, 123) 16 | clotting_simulation_results <- summarySim(clotting_simulation_results) 17 | 18 | bias <- clotting_simulation_results$BIAS 19 | sd <- clotting_simulation_results$SD 20 | rib <- clotting_simulation_results$RIB 21 | pu <- clotting_simulation_results$PU 22 | mae <- clotting_simulation_results$MAE 23 | cov <- clotting_simulation_results$COV_b 24 | mleg <- cbind(bias[1, ], sd[1, ], rib[1, ], pu[1, ], mae[1, ], cov[1, ]) 25 | mle <- cbind(bias[2, ], sd[2, ], rib[2, ], pu[2, ], mae[2, ], cov[2, ]) 26 | meanBR <- cbind(bias[4, ], sd[4, ], rib[4, ], pu[4, ], mae[4, ], cov[4, ]) 27 | medianBR <- cbind(bias[5, ], sd[5, ], rib[5, ], pu[5, ], mae[5, ], cov[5, ]) 28 | meanmixed <- cbind(bias[6, ], sd[6, ], rib[6, ], pu[6, ], mae[6, ], cov[6, ]) 29 | 30 | save(mle, meanBR, medianBR, meanmixed, file = "clotting_simulation_results.rda") 31 | -------------------------------------------------------------------------------- /inst/glmbias_code+results/glmbias_logistic_simulation.R: -------------------------------------------------------------------------------- 1 | ## simulation study: infant birth weights 2 | 3 | library("MASS") 4 | library("brglm2") 5 | 6 | ## Prepare the birth weight data set 7 | bwt <- with(birthwt, { 8 | age <- age 9 | racewhite <- ifelse(race==1,1,0) 10 | smoke <- smoke 11 | ptl <- ifelse(ptl>0,1,0) 12 | ptd <- factor(ptl > 0) 13 | ht <- ht 14 | loglwt <- log(lwt) 15 | data.frame(normwt = 1-low, age, racewhite, smoke, ptl,ht,loglwt,ftv) 16 | }) 17 | bwt <- subset(bwt, subset = (ftv==0), select= -c(ftv)) 18 | 19 | ## true models 20 | bwt_ml <- glm(normwt ~ ., family = binomial, data = bwt) 21 | beta <- coef(bwt_ml) 22 | 23 | ## random seed 24 | set.seed(435) 25 | 26 | ## simulated response variable 27 | Nsim <- 10000 28 | simdata <- simulate(bwt_ml, nsim = Nsim) 29 | 30 | ml <- br <- bc <- mbr <- ml_se <- br_se <- bc_se <- mbr_se <- separation <- 31 | matrix(NA, nrow = Nsim, ncol = length(beta)) 32 | 33 | for (i in 1:Nsim) { 34 | current_data <- within(bwt, { normwt <- simdata[[i]] }) 35 | if (i%%100 == 0) print(i) 36 | ml_fit <- update(bwt_ml, data = current_data) 37 | br_fit <- update(bwt_ml, method = "brglmFit", type = "AS_mean", data = current_data) 38 | bc_fit <- update(bwt_ml, method = "brglmFit", type = "correction", data = current_data) 39 | mbr_fit <- update(bwt_ml, method = "brglmFit", type = "AS_median", data = current_data) 40 | sep_fit <- update(ml_fit, method = "detect_separation") 41 | 42 | sum_ml <- summary(ml_fit) 43 | sum_brmean <- summary(br_fit) 44 | sum_bcorr <- summary(bc_fit) 45 | sum_brmedian <- summary(mbr_fit) 46 | 47 | ml[i, ] <- sum_ml$coef[, 1] 48 | ml_se[i, ] <- sum_ml$coef[, 2] 49 | br[i, ] <- sum_brmean$coef[, 1] 50 | br_se[i, ] <- sum_brmean$coef[, 2] 51 | bc[i, ] <- sum_bcorr$coef[, 1] 52 | bc_se[i, ] <- sum_bcorr$coef[, 2] 53 | mbr[i, ] <- sum_brmedian$coef[, 1] 54 | mbr_se[i, ] <- sum_brmedian$coef[, 2] 55 | separation[i, ] <- sep_fit$betas 56 | } 57 | 58 | ml.inc <- apply(separation, 1, function(b) all(b == 0)) 59 | 60 | ## bias in beta parameterization 61 | bias.beta <- data.frame(ml = colMeans(ml[ml.inc, ]), 62 | br = colMeans(br), 63 | mbr = colMeans(mbr)) - beta 64 | ## bias in psi parameterization 65 | bias.psi <- data.frame(ml = colMeans(exp(ml[ml.inc, ])), 66 | br = colMeans(exp(br)), 67 | mbr = colMeans(exp(mbr))) - exp(beta) 68 | ## Coverage of 95% Wald confidence intervals 69 | ml.ci.l <- ml[ml.inc, ] - qnorm(0.975) * ml_se[ml.inc, ] 70 | ml.ci.u <- ml[ml.inc, ] + qnorm(0.975) * ml_se[ml.inc, ] 71 | br.ci.l <- br - qnorm(0.975) * br_se 72 | br.ci.u <- br + qnorm(0.975) * br_se 73 | mbr.ci.l <- mbr - qnorm(0.975) * mbr_se 74 | mbr.ci.u <- mbr + qnorm(0.975) * mbr_se 75 | coverage <- data.frame(ml = rowMeans(t(ml.ci.l) < beta & t(ml.ci.u) > beta), 76 | br = rowMeans(t(br.ci.l) < beta & t(br.ci.u) > beta), 77 | mbr = rowMeans(t(mbr.ci.l) < beta & t(mbr.ci.u) > beta)) 78 | ## probability of underestimation 79 | PU <- data.frame(ml = rowMeans(t(ml[ml.inc, ]) < beta), 80 | br = rowMeans(t(br) < beta), 81 | mbr = rowMeans(t(mbr) < beta)) 82 | 83 | save(bias.beta, bias.psi, PU, coverage, file = "birth_weight_simulation_results.rda") 84 | -------------------------------------------------------------------------------- /inst/glmbias_code+results/glmbias_multinomial_simulation.R: -------------------------------------------------------------------------------- 1 | ###################### 2 | ## Simulation study ## 3 | ###################### 4 | 5 | library("plyr") 6 | library("doMC") 7 | library("brglm2") 8 | library("nnet") 9 | 10 | ## Use 18 cores for the simulation 11 | registerDoMC(18) 12 | 13 | all_ml <- brmultinom(foodchoice ~ size + lake , weights = freq, 14 | data = alligators, 15 | ref = 1, 16 | type = "ML") 17 | true_coefs <- coef(all_ml) 18 | 19 | agresti_contrasts <- list(lake = contr.treatment(levels(alligators$lake), 20 | base = 4), 21 | size = contr.treatment(levels(alligators$size), 22 | base = 2)) 23 | 24 | ## Contrasts matrix 25 | mat <- cbind(c(1, 1, 1, 0, 0), 26 | c(0, -1, 0, 0, 0), 27 | c(0, 0, -1, 1, 0), 28 | c(0, 0, -1, 0, 1), 29 | c(0, 0, -1, 0, 0)) 30 | 31 | ## coef(all_ml_agresti) %*% mat - true_coefs 32 | 33 | 34 | ## Model specific and not at all efficient! 35 | simulate_alligators <- function(fitted, total_factor = 1) { 36 | nams <- apply(alligators[c("lake", "gender", "size")], 1, 37 | paste0, collapse = "|") 38 | totals <- round(total_factor * tapply(alligators$freq, nams, sum)) 39 | ## this is ordered 40 | oo <- order(nams) 41 | al <- alligators[oo, ] 42 | fitted <- fitted[oo, ] 43 | inds <- which(!duplicated(al[, c("lake", "gender", "size")])) 44 | covariate_settings <- al[inds, c("lake", "gender", "size")] 45 | freq <- sapply(seq.int(totals), function(j) rmultinom(1, totals[j], 46 | fitted[inds[j], ])) 47 | out <- data.frame(foodchoice = colnames(fitted), 48 | covariate_settings[rep(rownames(covariate_settings), 49 | each = nrow(freq)),] , 50 | freq = c(freq)) 51 | out$foodchoice <- factor(out$foodchoice, 52 | levels = levels(alligators$foodchoice)) 53 | out 54 | } 55 | 56 | nsimu <- 10000 57 | factors <- c(0.5, 0.75, 1, 1.25, 1.5, 1.75, 2, 3, 4, 5) 58 | 59 | res <- as.list(numeric(length(factors))) 60 | 61 | for (k in seq_along(factors)) { 62 | set.seed(123) 63 | seeds <- sample(seq.int(nsimu*1000), nsimu, replace = FALSE) 64 | datasets <- llply(seeds, function(seed) { 65 | set.seed(seed) 66 | dat <- simulate_alligators(fitted(all_ml), 67 | total_factor = factors[k]) 68 | }, .parallel = TRUE) 69 | res[[k]] <- llply(seq_along(datasets), function(j) { 70 | current_data <- datasets[[j]] 71 | ml_fit <- multinom(foodchoice ~ size + lake , weights = freq, 72 | data = current_data, trace = FALSE) 73 | ml_c <- coef(ml_fit) 74 | ml_se <- summary(ml_fit)$standard.errors 75 | br_mean_fit <- brmultinom(foodchoice ~ size + lake , 76 | weights = freq, 77 | data = current_data, ref = "Fish", 78 | trace = FALSE, 79 | type = "AS_mean") 80 | br_mean_c <- coef(br_mean_fit) 81 | br_mean_se <- summary(br_mean_fit)$standard.errors 82 | br_median_fit <- brmultinom(foodchoice ~ size + lake , 83 | weights = freq, 84 | data = current_data, ref = "Fish", 85 | trace = FALSE, 86 | type = "AS_median") 87 | br_median_c <- coef(br_median_fit) 88 | br_median_se <- summary(br_median_fit)$standard.errors 89 | br_median_gamma_fit <- brmultinom(foodchoice ~ size + lake , 90 | weights = freq, 91 | data = current_data, 92 | ref = "Fish", 93 | contrasts = agresti_contrasts, 94 | trace = FALSE, 95 | type = "AS_median") 96 | br_median_gamma_c <- coef(br_median_gamma_fit) %*% mat 97 | colnames(br_median_gamma_c) <- colnames(ml_c) 98 | if (j %% 10 == 0) cat(factors[k], "|", j, "\n") 99 | list(ml = ml_c, 100 | br_mean = br_mean_c, 101 | br_median = br_median_c, 102 | br_median_gamma = br_median_gamma_c, 103 | ml_se = ml_se, br_mean_se = br_mean_se, 104 | br_median_se = br_median_se) 105 | }, .parallel = TRUE) 106 | } 107 | 108 | ml_coefs <- lapply(res, function(x) lapply(x, "[[", "ml")) 109 | ml_ses <- lapply(res, function(x) lapply(x, "[[", "ml_se")) 110 | ml_infinite <- lapply(ml_ses, function(x) 111 | sapply(x, function(y) any(is.na(y)) | any(y > 100, 112 | na.rm = TRUE))) 113 | ml_prob_separation <- sapply(ml_infinite, mean) 114 | ml_bias <- lapply(ml_coefs, function(x) 100*(Reduce("+", x)/nsimu - 115 | true_coefs)/true_coefs) 116 | ml_pu <- lapply(ml_coefs, function(x) 100*Reduce("+", lapply(x, "<", 117 | true_coefs))/nsimu) 118 | ## Mean bias reduction 119 | br_mean_coefs <- lapply(res, function(x) lapply(x, "[[", "br_mean")) 120 | br_mean_bias <- lapply(br_mean_coefs, function(x) 100*(Reduce("+", x)/ 121 | nsimu - true_coefs)/true_coefs) 122 | br_mean_pu <- lapply(br_mean_coefs, function(x) 100*Reduce("+", 123 | lapply(x, "<", true_coefs))/nsimu) 124 | ## Median bias reduction 125 | br_median_coefs <- lapply(res, function(x) 126 | lapply(x, "[[", "br_median")) 127 | br_median_bias <- lapply(br_median_coefs, 128 | function(x) 100*(Reduce("+", x)/ 129 | nsimu - true_coefs)/true_coefs) 130 | br_median_pu <- lapply(br_median_coefs, function(x) 100*Reduce("+", 131 | lapply(x, "<", true_coefs))/nsimu) 132 | ## Median bias reduction 133 | br_median_gamma_coefs <- lapply(res, function(x) 134 | lapply(x, "[[", "br_median_gamma")) 135 | br_median_gamma_bias <- lapply(br_median_gamma_coefs, 136 | function(x) 100*(Reduce("+", x)/nsimu - 137 | true_coefs)/true_coefs) 138 | br_median_gamma_pu <- lapply(br_median_gamma_coefs, 139 | function(x) 100*Reduce("+", 140 | lapply(x, "<", true_coefs))/nsimu) 141 | ## Prepare for plotting 142 | ml_bias_matrix <- do.call("rbind", ml_bias) 143 | categories <- rownames(ml_bias_matrix) 144 | ml_results <- stack(as.data.frame(ml_bias_matrix)) 145 | ml_results$category <- categories 146 | ml_results$factor <- rep(factors, each = 4) 147 | ml_results$method <- "ml" 148 | ml_results$pu <- stack(as.data.frame(do.call("rbind", ml_pu)))$values 149 | br_mean_bias_matrix <- do.call("rbind", br_mean_bias) 150 | categories <- rownames(br_mean_bias_matrix) 151 | br_mean_results <- stack(as.data.frame(br_mean_bias_matrix)) 152 | br_mean_results$category <- categories 153 | br_mean_results$factor <- rep(factors, each = 4) 154 | br_mean_results$method <- "mean BR" 155 | br_mean_results$pu <- stack(as.data.frame(do.call("rbind", br_mean_pu)))$values 156 | br_median_bias_matrix <- do.call("rbind", br_median_bias) 157 | categories <- rownames(br_median_bias_matrix) 158 | br_median_results <- stack(as.data.frame(br_median_bias_matrix)) 159 | br_median_results$category <- categories 160 | br_median_results$factor <- rep(factors, each = 4) 161 | br_median_results$method <- "median BR" 162 | br_median_results$pu <- stack(as.data.frame(do.call("rbind", br_median_pu)))$values 163 | br_median_gamma_bias_matrix <- do.call("rbind", br_median_gamma_bias) 164 | categories <- rownames(br_median_gamma_bias_matrix) 165 | br_median_gamma_results <- stack(as.data.frame(br_median_gamma_bias_matrix)) 166 | br_median_gamma_results$category <- categories 167 | br_median_gamma_results$factor <- rep(factors, each = 4) 168 | br_median_gamma_results$method <- "median BR gamma" 169 | br_median_gamma_results$pu <- stack(as.data.frame(do.call("rbind", br_median_gamma_pu)))$values 170 | 171 | 172 | save(br_mean_results, br_median_results, br_median_gamma_results, file = "alligator_simulation_results.rda") 173 | -------------------------------------------------------------------------------- /inst/mbrpr/mbrpr.R: -------------------------------------------------------------------------------- 1 | ############################################################# 2 | ### Median bias reduction in poisson regression ############ 3 | ############################################################# 4 | mod1 <- function(X,mu,InfoInv) { 5 | X<-as.matrix(X) 6 | n <- nrow(X) 7 | p <- ncol(X) 8 | nu_r_s_t <- array(0,c(p,p,p)) 9 | k1 <- k2 <- k3 <- rep(NA,p) 10 | for(r in 1:p) { 11 | nu_r_s_t[r,,] = t(X)%*%(X[,r]*mu*X) 12 | } 13 | k2 <- 1/diag(InfoInv) 14 | barA <- InfoInv*k2 15 | for (r in 1:p) { 16 | sum_s1 <- rep(0,p) 17 | sum_s3 <- rep(0,p) 18 | nu.tu <- InfoInv-outer(InfoInv[,r]*k2[r],InfoInv[,r]) 19 | for (s in 1:p){ 20 | sum_s1[s] <- sum(diag(nu.tu%*%(nu_r_s_t[s,,]))) 21 | sum_s3[s] <- sum((barA[r,]%*%nu_r_s_t[s,,])*barA[r,]) 22 | } 23 | k1[r] <- -0.5*sum(sum_s1*barA[r,]) 24 | k3[r] <- sum(sum_s3*barA[r,]) 25 | } 26 | return(-k1/k2+k3/(6*k2^2)) 27 | } 28 | 29 | 30 | mbrpr <- function(par,y,X,eps=1e-06,maxit=500) { 31 | step <- .Machine$integer.max 32 | nIter <- 0 33 | test <- TRUE 34 | while ( test & (nIter < maxit)) { 35 | nIter <- nIter + 1 36 | eta = drop(X%*%par) 37 | mu <- exp(eta) 38 | info <- t(X)%*%(W<-diag(mu))%*%X 39 | score <- t(X)%*%(y-mu) 40 | InfoInv <- try(chol2inv(chol(info)),TRUE) 41 | if(failedInv <- inherits(InfoInv, "try-error")) { 42 | warning("failed to invert the information matrix: iteration stopped prematurely") 43 | break 44 | } 45 | mod <- mod1(X, mu, InfoInv) 46 | modscore <- score + info%*%mod 47 | y.adj <- X%*%(par+mod)+ solve(diag(mu))%*%(y-mu) 48 | par <- InfoInv%*%t(X)%*%(W%*%y.adj) 49 | test <- sqrt(crossprod(drop(modscore))) > eps 50 | } 51 | converged <- nIter < maxit 52 | list(coefficients=drop(par),converged=converged,nIter=nIter,InfoInv=InfoInv) 53 | } 54 | 55 | 56 | 57 | -------------------------------------------------------------------------------- /inst/tinytest/brpr.R: -------------------------------------------------------------------------------- 1 | ## Bias-reduced Poisson and multinomial log-linear models 2 | ## using maximum penalized likelihood as in Firth (1993) Biometrika. 3 | ## 4 | ## Author: David Firth, d.firth@warwick.ac.uk, 2 Sep 2010 5 | ## 6 | ## NOT A POLISHED PIECE OF PUBLIC-USE SOFTWARE! Provided "as is", 7 | ## licensed under GPL2. NO WARRANTY OF FITNESS FOR ANY PURPOSE! 8 | ## 9 | ## This code still has lots of rough edges, some of which are 10 | ## indicated by the embedded comments. 11 | ## 12 | ## The intention is to include a more polished implementation as part 13 | ## of the CRAN package "brglm", in due course. 14 | 15 | ## Arguments are as for a Poisson log-linear glm, 16 | ## with one new argument: 17 | ## -- "fixed.totals", a factor indexing groups of observations 18 | ## whose response totals are fixed (for example, this might be 19 | ## the rows of a table). 20 | ## If fixed.totals is NULL (the default), pure Poisson sampling (with 21 | ## no totals fixed) is assumed. 22 | 23 | brpr <- function(formula, 24 | data, 25 | subset, 26 | na.action, 27 | offset = NULL, 28 | control = glm.control(...), 29 | contrasts = NULL, 30 | fixed.totals = NULL, 31 | weights = NULL, 32 | ...) { 33 | ## The "weights" and "offset" arguments here can only be NULL -- anything 34 | ## else is an error (for now, at least) 35 | if (!is.null(weights)) stop("The weights argument here can only be NULL") 36 | if (!is.null(offset)) stop("The offset argument here can only be NULL") 37 | ## work needed here, to allow the use of offset as an argument? 38 | 39 | ## Initial setup as in glm (except that `prior weights' are not used here, 40 | ## and an offset (if any) must be specified through the formula rather 41 | ## than (as is also allowed by glm) as a separate argument: 42 | call <- match.call() 43 | theFormula <- formula 44 | if (missing(data)) 45 | data <- environment(formula) 46 | if (is.null(call$epsilon)) control$epsilon <- 1e-8 47 | ## Is 1e-8 too stringent? Work needed here! 48 | fixed.totals <- eval(substitute(fixed.totals), envir = data) 49 | mf <- match.call(expand.dots = FALSE) 50 | m <- match(c("formula", "data", "subset", "na.action", 51 | "offset", "fixed.totals"), names(mf), 0) 52 | mf <- mf[c(1, m)] 53 | mf$drop.unused.levels <- TRUE 54 | mf[[1]] <- as.name("model.frame") 55 | mf <- eval(mf, parent.frame()) 56 | mt <- attr(mf, "terms") 57 | Y <- model.response(mf, "numeric") 58 | 59 | ## Correct the fixed.totals variable for subsetting, NA treatment, etc. 60 | if (!is.null(fixed.totals)) { 61 | fixed.totals <- mf$"(fixed.totals)" 62 | if (!is.factor(fixed.totals)) stop("fixed.totals must be a factor") 63 | rowTotals <- as.vector(tapply(Y, fixed.totals, sum))[fixed.totals] 64 | } 65 | 66 | ## Do an initial fit of the model, with constant-adjusted counts 67 | adj <- 0.5 68 | X <- model.matrix(formula, data = mf) 69 | offset <- model.offset(mf) 70 | nFixed <- if (is.null(fixed.totals)) 0 else nlevels(fixed.totals) 71 | mf$y.adj <- Y + adj * (ncol(X) - nFixed)/nrow(X) 72 | formula <- update(formula, y.adj ~ .) 73 | op <- options(warn = -1) 74 | fit <- glm.fit(X, mf$y.adj, family = poisson(), offset = offset, 75 | control = glm.control(maxit = 1)) 76 | options(op) 77 | ## Update the model iteratively, refining the adjustment at each iteration 78 | criterion <- 1 79 | iter <- 1 80 | coefs <- coef(fit) 81 | Xwork <- X[ , !is.na(coefs), drop = FALSE] 82 | muAdj <- fitted(fit) 83 | coefs <- na.omit(coefs) 84 | while (criterion > control$epsilon && iter < control$maxit) { 85 | iter <- iter + 1 86 | if (!is.null(fixed.totals)){ 87 | muTotals <- as.vector(tapply(muAdj, fixed.totals, 88 | sum))[fixed.totals] 89 | mu <- muAdj * rowTotals / muTotals 90 | } else { ## case fixed.totals is NULL 91 | mu <- muAdj 92 | } 93 | W.X <- sqrt(mu) * Xwork 94 | XWXinv <- chol2inv(chol(crossprod(W.X))) 95 | coef.se <- sqrt(diag(XWXinv)) 96 | h <- diag(Xwork %*% XWXinv %*% t(mu * Xwork)) 97 | mf$y.adj <- Y + h * adj 98 | z <- log(muAdj) + (mf$y.adj - muAdj)/muAdj 99 | lsfit <- lm.wfit(Xwork, z, muAdj, offset = offset) 100 | criterion <- max((abs(lsfit$coefficients - coefs))/coef.se) 101 | if (control$trace) cat("Iteration ", iter, 102 | ": largest (scaled) coefficient change is ", 103 | criterion, "\n", sep = "") 104 | coefs <- lsfit$coefficients 105 | muAdj <- exp(lsfit$fitted.values) 106 | } 107 | 108 | ## The object `lsfit' uses *adjusted* counts, fitted values, etc. -- 109 | ## so we need to correct various things before returning (so that, for example, 110 | ## deviance and standard errors are correct). 111 | fit$coefficients[!is.na(fit$coefficients)] <- coefs 112 | fit$y <- Y 113 | fit$fitted.values <- fit$weights <- mu 114 | fit$linear.predictors <- log(mu) 115 | dev.resids <- poisson()$dev.resids 116 | wt <- fit$prior.weights 117 | fit$deviance <- sum(dev.resids(Y, mu, wt)) 118 | fit$null.deviance <- sum(dev.resids(Y, mean(Y), wt)) 119 | fit$aic <- NA # sum((poisson()$aic)(Y, 1, mu, wt, 0)) ?? work needed! 120 | fit$residuals <- Y/mu - 1 121 | 122 | ## Next bit is straight from glm -- not sure it applies here (work needed!) 123 | if (any(offset) && attr(mt, "intercept") > 0) { 124 | fit$null.deviance <- glm.fit(x = X[, "(Intercept)", drop = FALSE], 125 | y = Y, weights = rep(1, nrow(mf)), 126 | offset = offset, family = poisson(), 127 | intercept = TRUE)$deviance 128 | } 129 | 130 | fit$iter <- iter 131 | fit$model <- mf 132 | fit$na.action <- attr(mf, "na.action") 133 | fit$x <- X 134 | fit$qr <- qr(sqrt(mu) * X) 135 | fit$R <- qr.R(fit$qr, complete = TRUE) 136 | ## The "effects" component of the fit object is almost certainly not 137 | ## correct as is -- but where does it actually get used? (work needed!) 138 | ## rownames(fit$R) <- colnames(fit$R) ## FIX THIS! 139 | fit <- c(fit, list(call = call, formula = theFormula, terms = mt, 140 | data = data, offset = offset, method = "brpr", 141 | contrasts = attr(X, "contrasts"), 142 | xlevels = .getXlevels(mt, mf))) 143 | class(fit) <- c("glm", "lm") 144 | return(fit) 145 | } 146 | -------------------------------------------------------------------------------- /inst/tinytest/mbrpr.R: -------------------------------------------------------------------------------- 1 | ############################################################# 2 | ### Median bias reduction in poisson regression ############ 3 | ############################################################# 4 | mod1 <- function(X,mu,InfoInv) { 5 | X<-as.matrix(X) 6 | n <- nrow(X) 7 | p <- ncol(X) 8 | nu_r_s_t <- array(0,c(p,p,p)) 9 | k1 <- k2 <- k3 <- rep(NA,p) 10 | for(r in 1:p) { 11 | nu_r_s_t[r,,] = t(X)%*%(X[,r]*mu*X) 12 | } 13 | k2 <- 1/diag(InfoInv) 14 | barA <- InfoInv*k2 15 | for (r in 1:p) { 16 | sum_s1 <- rep(0,p) 17 | sum_s3 <- rep(0,p) 18 | nu.tu <- InfoInv-outer(InfoInv[,r]*k2[r],InfoInv[,r]) 19 | for (s in 1:p){ 20 | sum_s1[s] <- sum(diag(nu.tu%*%(nu_r_s_t[s,,]))) 21 | sum_s3[s] <- sum((barA[r,]%*%nu_r_s_t[s,,])*barA[r,]) 22 | } 23 | k1[r] <- -0.5*sum(sum_s1*barA[r,]) 24 | k3[r] <- sum(sum_s3*barA[r,]) 25 | } 26 | return(-k1/k2+k3/(6*k2^2)) 27 | } 28 | 29 | 30 | mbrpr <- function(par,y,X,eps=1e-06,maxit=500) { 31 | step <- .Machine$integer.max 32 | nIter <- 0 33 | test <- TRUE 34 | while ( test & (nIter < maxit)) { 35 | nIter <- nIter + 1 36 | eta = drop(X%*%par) 37 | mu <- exp(eta) 38 | info <- t(X)%*%(W<-diag(mu))%*%X 39 | score <- t(X)%*%(y-mu) 40 | InfoInv <- try(chol2inv(chol(info)),TRUE) 41 | if(failedInv <- inherits(InfoInv, "try-error")) { 42 | warning("failed to invert the information matrix: iteration stopped prematurely") 43 | break 44 | } 45 | mod <- mod1(X, mu, InfoInv) 46 | modscore <- score + info%*%mod 47 | y.adj <- X%*%(par+mod)+ solve(diag(mu))%*%(y-mu) 48 | par <- InfoInv%*%t(X)%*%(W%*%y.adj) 49 | test <- sqrt(crossprod(drop(modscore))) > eps 50 | } 51 | converged <- nIter < maxit 52 | list(coefficients=drop(par),converged=converged,nIter=nIter,InfoInv=InfoInv) 53 | } 54 | 55 | 56 | 57 | -------------------------------------------------------------------------------- /inst/tinytest/test-binomial.R: -------------------------------------------------------------------------------- 1 | library("brglm") 2 | data("lizards", package = "brglm2") 3 | links <- lapply(c("logit", "probit", "cloglog", "cauchit"), make.link) 4 | 5 | tol <- 1e-08 6 | for (l in seq_along(links)) { 7 | expect_warning( 8 | lizardsBRlegacy <- brglm(cbind(grahami, opalinus) ~ height + diameter + 9 | light + time, family = binomial(links[[l]]), 10 | data = lizards, method = "brglm.fit", 11 | br.epsilon = 1e-10, br.maxit = 1000) 12 | ) 13 | 14 | expect_warning( 15 | lizardsBR <- glm(cbind(grahami, opalinus) ~ height + diameter + 16 | light + time, family = binomial(links[[l]]), data=lizards, 17 | method = "brglmFit", epsilon = 1e-10, maxit = 1000) 18 | ) 19 | ## glm with brglm.fit method and brglm_0 return the same coefficients for the lizards when link is links[[l]]$name) 20 | expect_equal(coef(lizardsBR), coef(lizardsBRlegacy), tolerance = tol) 21 | } 22 | 23 | 24 | ## Performance comparisons BR versus ML 25 | ## link1 <- "cauchit" 26 | ## system.time(replicate(100, {lizardsBR <- glm(cbind(grahami, opalinus) ~ height + diameter + light + time, family = binomial(link1), data=lizards, method = "brglmFit", epsilon = 1e-10, maxit = 1000, type = "ML")})) 27 | 28 | ## system.time(replicate(100, {lizardsBR <- glm(cbind(grahami, opalinus) ~ height + diameter + light + time, family = binomial(link1), data=lizards, method = "glm.fit", epsilon = 1e-10, maxit = 1000)})) 29 | 30 | ## link1 <- "cauchit" 31 | ## system.time(replicate(100, {lizardsBR <- glm(cbind(grahami, opalinus) ~ height + diameter + light + time, family = binomial(link1), data=lizards, method = "brglmFit", epsilon = 1e-10, maxit = 1000)})) 32 | 33 | ## system.time(replicate(100, {lizardsBR <- brglm(cbind(grahami, opalinus) ~ height + diameter + light + time, family = binomial(link1), data=lizards, br.epsilon = 1e-10, br.maxit = 1000)})) 34 | -------------------------------------------------------------------------------- /inst/tinytest/test-bracl.R: -------------------------------------------------------------------------------- 1 | library("VGAM") 2 | 3 | ### tests for bracl and its agreement with other methods 4 | 5 | ## Stem cell research data from Agresti (2017, Analysis of ordinal data, Table 4.1) 6 | freq <- c(34, 41, 58, 21, 30, 64, 7 | 67, 83, 63, 52, 52, 50, 8 | 30, 23, 15, 24, 18, 16, 9 | 25, 14, 12, 15, 11, 11) 10 | fund_research <- factor(rep(c("definitely", "probably", "probably not", "definitely not"), each = 6), 11 | levels = c("definitely", "probably", "probably not", "definitely not"), 12 | ordered = TRUE) 13 | gender <- factor(rep(rep(c("female", "male"), each = 3), 4), levels = c("male", "female")) 14 | ## gender <- rep(rep(c(1, 0), each = 3), 4) 15 | religion <- factor(rep(c("fundamentalist", "moderate", "liberal"), 8), 16 | levels = c("fundamentalist", "moderate", "liberal"), 17 | ordered = TRUE) 18 | stemcell <- data.frame(frequency = freq, research = fund_research, gender = gender, religion = religion) 19 | 20 | ff <- matrix(freq, nrow = 4, byrow = TRUE) 21 | re <- c(1, 2, 3, 1, 2, 3) 22 | ge <- c(1, 1, 1, 0, 0, 0) 23 | 24 | ## With proportional odds 25 | suppressWarnings(fit_vgam_p <- vglm(cbind(ff[1, ], ff[2, ], ff[3, ], ff[4, ]) ~ re + ge, family = acat(reverse = TRUE, parallel = TRUE))) 26 | 27 | 28 | expect_warning( 29 | fit_bracl_p <- bracl(research ~ as.numeric(religion) + gender, weights = frequency, data = stemcell, type = "ML", parallel = TRUE) 30 | ) 31 | 32 | 33 | fit_vgam <- vglm(cbind(ff[1, ], ff[2, ], ff[3, ], ff[4, ]) ~ re + ge, family = acat(reverse = TRUE, parallel = FALSE)) 34 | ## Without proportional odds 35 | expect_warning( 36 | fit_bracl <- bracl(research ~ as.numeric(religion) + gender, weights = frequency, data = stemcell, type = "ML") 37 | ) 38 | 39 | tol <- 1e-06 40 | ## "VGAM::vglm and bracl return the same coefficients 41 | expect_equal(unname(coef(fit_vgam)), unname(coef(fit_bracl)), tolerance = tol) 42 | expect_equal(unname(coef(fit_vgam_p)), unname(coef(fit_bracl_p)), tolerance = tol) 43 | 44 | ## Difference in deviance is the same with VGAM::vglm and bracl 45 | expect_equal(logLik(fit_vgam) - logLik(fit_vgam_p), 46 | unclass(logLik(fit_bracl) - logLik(fit_bracl_p)), tolerance = tol, 47 | check.attributes = FALSE) 48 | 49 | 50 | ## logLik returns the correct df for cracl 51 | expect_identical(attr(logLik(fit_bracl), "df"), as.integer(9)) 52 | expect_identical(attr(logLik(fit_bracl_p), "df"), as.integer(5)) 53 | 54 | ## bracl returns the correct fitted values 55 | expect_equal(fitted(fit_bracl)[1:6, ], fit_vgam@fitted.values, tolerance = tol, 56 | check.attributes = FALSE) 57 | expect_equal(fitted(fit_bracl_p)[1:6, ], fit_vgam_p@fitted.values, tolerance = tol, 58 | check.attributes = FALSE) 59 | 60 | shu <- function(dat) dat[sample(seq.int(nrow(dat)), nrow(dat)), ] 61 | 62 | ## bracl results is invariance to shuffling of the data 63 | for (j in 1:10) { 64 | expect_warning( 65 | fit_bracl_p_r <- bracl(research ~ as.numeric(religion) + gender, weights = frequency, data = shu(stemcell), type = "ML", parallel = TRUE) 66 | ) 67 | 68 | expect_warning( 69 | fit_bracl_r <- bracl(research ~ as.numeric(religion) + gender, weights = frequency, data = shu(stemcell), type = "ML", parallel = FALSE) 70 | ) 71 | 72 | expect_equal(coef(fit_bracl), coef(fit_bracl_r), tolerance = tol) 73 | expect_equal(coef(fit_bracl_p), coef(fit_bracl_p_r), tolerance = tol) 74 | } 75 | 76 | tol <- 1e-02 77 | ## vcov method for bracl returns the correct vcov matrix 78 | expect_equal(vcov(fit_vgam_p), vcov(fit_bracl_p), tolerance = tol, check.attributes = FALSE) 79 | expect_equal(vcov(fit_vgam), vcov(fit_bracl), tolerance = tol, check.attributes = FALSE) 80 | 81 | s1 <- summary(fit_bracl) 82 | s1p <- summary(fit_bracl_p) 83 | s2 <- summary(fit_vgam) 84 | s2p <- summary(fit_vgam_p) 85 | 86 | tol <- 1e-06 87 | ## summary method for bracl returns the correct coef mat 88 | expect_equal(coef(s1), s2@coef3, tolerance = tol, check.attributes = FALSE) 89 | expect_equal(coef(s1p), s2p@coef3, tolerance = tol, check.attributes = FALSE) 90 | 91 | newdata <- expand.grid(gender = c("male", "female"), religion = c("moderate", "fundamentalist")) 92 | ## predict.bracl works as expected 93 | pp <- predict(fit_bracl_p, newdata = stemcell, type = "probs") 94 | p <- predict(fit_bracl, newdata = stemcell, type = "probs") 95 | expect_equal(predict(fit_vgam_p, type = "response"), 96 | pp[19:24, ], 97 | tolerance = 1e-06, check.attributes = FALSE) 98 | expect_equal(predict(fit_vgam, type = "response"), 99 | p[19:24, ], 100 | tolerance = 1e-06, check.attributes = FALSE) 101 | 102 | ## no intercept returns warning 103 | expect_warning( 104 | fit_bracl_p_r <- bracl(research ~ -1 + as.numeric(religion) + gender, weights = frequency, data = shu(stemcell), type = "ML", parallel = FALSE) 105 | ) 106 | 107 | ## prediction with NAs works 108 | newd <- newdata 109 | newd[3, 2] <- NA 110 | expect_true(is.na(predict(fit_bracl_p, newd, "class")[3])) 111 | expect_true(all(is.na(predict(fit_bracl_p, newd, "probs")[3, ]))) 112 | 113 | 114 | 115 | ## simulate from bracl objects returns a data frame with expected characteristics 116 | simu_df <- simulate(fit_bracl_p) 117 | nam_mf <- names(model.frame(fit_bracl_p)) 118 | nam_simu <- names(simu_df) 119 | expect_identical(nrow(simu_df), 120 | nrow(stemcell) * nlevels(stemcell$research)) 121 | expect_identical(levels(simu_df$research), 122 | levels(stemcell$research)) 123 | expect_identical(is.ordered(simu_df$research), 124 | is.ordered(stemcell$research)) 125 | expect_identical(nam_mf[!(nam_mf %in% nam_simu)], 126 | "(weights)") 127 | expect_identical(nam_simu[!(nam_simu %in% nam_mf)], 128 | as.character(fit_bracl_p$call$weights)) 129 | expect_identical(nam_simu[(nam_simu %in% nam_mf)], 130 | nam_mf[(nam_mf %in% nam_simu)]) 131 | 132 | ## simulate from bracl objects returns a data frame with expected characteristics 133 | simu_df <- simulate(fit_bracl) 134 | nam_mf <- names(model.frame(fit_bracl)) 135 | nam_simu <- names(simu_df) 136 | expect_identical(nrow(simu_df), 137 | nrow(stemcell) * nlevels(stemcell$research)) 138 | expect_identical(levels(simu_df$research), 139 | levels(stemcell$research)) 140 | expect_identical(is.ordered(simu_df$research), 141 | is.ordered(stemcell$research)) 142 | expect_identical(nam_mf[!(nam_mf %in% nam_simu)], 143 | "(weights)") 144 | expect_identical(nam_simu[!(nam_simu %in% nam_mf)], 145 | as.character(fit_bracl$call$weights)) 146 | expect_identical(nam_simu[(nam_simu %in% nam_mf)], 147 | nam_mf[(nam_mf %in% nam_simu)]) 148 | -------------------------------------------------------------------------------- /inst/tinytest/test-brglmControl.R: -------------------------------------------------------------------------------- 1 | b_control <- brglmControl() 2 | 3 | ## the object brglmControl() returns is as expected 4 | expect_identical(b_control$epsilon, 1e-06) 5 | expect_identical(b_control$maxit, 100) 6 | expect_false(b_control$trace) 7 | expect_null(b_control$response_adjustment) 8 | expect_equal(b_control$Trans, expression(dispersion)) 9 | expect_equal(b_control$inverseTrans, expression(transformed_dispersion)) 10 | expect_equal(b_control$transformation, "identity") 11 | expect_equal(b_control$slowit, 1) 12 | expect_equal(b_control$max_step_factor, 12) 13 | 14 | b_control <- brglmControl(epsilon = 1e-02, trace = TRUE, response_adjustment = c(0.3, 0.2)) 15 | ## the object brglmControl returns with defaults is as expected 16 | expect_identical(b_control$epsilon, 1e-02) 17 | expect_identical(b_control$maxit, 100) 18 | expect_true(b_control$trace) 19 | expect_identical(b_control$response_adjustment, c(0.3, 0.2)) 20 | expect_equal(b_control$Trans, expression(dispersion)) 21 | expect_equal(b_control$inverseTrans, expression(transformed_dispersion)) 22 | expect_equal(b_control$transformation, "identity") 23 | expect_equal(b_control$slowit, 1) 24 | expect_equal(b_control$max_step_factor, 12) 25 | 26 | data("coalition", package = "brglm2") 27 | mod <- glm(duration ~ fract + numst2, family = Gamma, data = coalition, method = "brglmFit") 28 | expect_warning(mod <- update(mod, maxit = 7, transformation = "sqrt"), "brglmFit: algorithm did not converge") 29 | expect_equal(mod$iter, 7) 30 | expect_equal(mod$transformation, "sqrt") 31 | 32 | b_control <- brglmControl(epsilon = 1e-02, ABCDEFG123 = 1, response_adjustment = c(0.3, 0.2), trace = TRUE, ) 33 | ## the object brglmControl returns with defaults is as expected 34 | expect_identical(b_control$epsilon, 1e-02) 35 | expect_identical(b_control$maxit, 100) 36 | expect_true(b_control$trace) 37 | expect_identical(b_control$response_adjustment, c(0.3, 0.2)) 38 | expect_equal(b_control$Trans, expression(dispersion)) 39 | expect_equal(b_control$inverseTrans, expression(transformed_dispersion)) 40 | expect_equal(b_control$transformation, "identity") 41 | expect_equal(b_control$slowit, 1) 42 | expect_equal(b_control$max_step_factor, 12) 43 | 44 | ## A Gamma example, from McCullagh & Nelder (1989, pp. 300-2) 45 | clotting <- data.frame( 46 | u = c(5,10,15,20,30,40,60,80,100, 5,10,15,20,30,40,60,80,100), 47 | conc = c(118,58,42,35,27,25,21,19,18,69,35,26,21,18,16,13,12,12), 48 | lot = factor(c(rep(1, 9), rep(2, 9)))) 49 | 50 | clot_formula <- conc ~ lot*log(u) + I(2*log(u)) 51 | 52 | ## check_aliasing option works as expected 53 | expect_error(glm(clot_formula, data = clotting, family = Gamma(), method = brglm_fit, check_aliasing = FALSE), 54 | pattern = "NA/NaN/Inf in foreign") 55 | mod <- glm(clot_formula, data = clotting, family = Gamma(), method = brglm_fit, check_aliasing = TRUE) 56 | expect_true(is.na(coef(mod)["I(2 * log(u))"])) 57 | ## check defaults 58 | mod <- glm(clot_formula, data = clotting, family = Gamma(), method = brglm_fit) 59 | expect_true(is.na(coef(mod)["I(2 * log(u))"])) 60 | 61 | ## data("sex2", package = "logistf") 62 | ## ## brglmControl arguments can be passed directly from the brglmFit call 63 | ## expect_warning(glm(case ~ dia, data = sex2, family = binomial(), method = brglm_fit)) 64 | ## ## Set response_adjustment to 2 just to avoid non-integer successes warning 65 | ## m1 <- glm(case ~ dia, data = sex2, family = binomial(), method = brglm_fit, response_adjustment = 2) 66 | ## expect_equal(m1$iter, 7, tolerance = .Machine$double.eps/2) 67 | -------------------------------------------------------------------------------- /inst/tinytest/test-brnp.R: -------------------------------------------------------------------------------- 1 | library("MASS") 2 | 3 | salmonella <- data.frame(freq = c(15, 16, 16, 27, 33, 20, 4 | 21, 18, 26, 41, 38, 27, 5 | 29, 21, 33, 60, 41, 42), 6 | dose = rep(c(0, 10, 33, 100, 333, 1000), 3), 7 | observation = rep(1:3, each = 6)) 8 | 9 | 10 | salmonella_fm <- freq ~ dose + log(dose+10) 11 | 12 | fitML_glmnb <- glm.nb(salmonella_fm, data = salmonella) 13 | fitML <- brnb(salmonella_fm, data = salmonella, link = "log", transformation = "identity", type = "ML") 14 | fitBR_mean <- update(fitML, type = "AS_mean") 15 | fitBR_median <- update(fitML, type = "AS_median") 16 | fitBR_mixed <- update(fitML, type = "AS_mixed") 17 | fitBC_mean <- update(fitML, type = "correction") 18 | 19 | ## brnp returns the same estimates as MASS::glm.nb 20 | expect_equal(coef(fitML), coef(fitML_glmnb), tol = 1e-06) 21 | 22 | ## brnp returns mean BR and median BR estimates as in Kenne Pagui et al (2021+) 23 | expect_equal(coef(fitBR_mean, model = "full"), 24 | c(2.216, -0.001, 0.309, 0.065), 25 | check.attributes = FALSE, tol = 1e-03) 26 | expect_equal(coef(fitBR_median, model = "full"), 27 | c(2.211, -0.001, 0.309, 0.069), 28 | check.attributes = FALSE, tol = 1e-03) 29 | expect_equal(coef(fitBC_mean, model = "full"), 30 | c(2.210, -0.001, 0.311, 0.063), 31 | check.attributes = FALSE, tol = 1e-03) 32 | 33 | ## fit using weights 34 | 35 | duptimes <- c(2,1,3,5,rep(1,14)) 36 | idx <- rep(1:nrow(salmonella), duptimes) 37 | dupsalmonella <- salmonella[idx,] 38 | 39 | fitMLe <- update(fitML, data = dupsalmonella) 40 | fitMLw <- update(fitML, weights = duptimes, data = salmonella) 41 | 42 | fitBR_mediane <- update(fitML, data = dupsalmonella, type = "AS_median") 43 | fitBR_medianw <- update(fitML, weights = duptimes, data = salmonella, type = "AS_median") 44 | 45 | fitBR_meane <- update(fitML, data = dupsalmonella, type = "AS_mean") 46 | fitBR_meanw <- update(fitML, weights = duptimes, data = salmonella, type = "AS_mean") 47 | 48 | fitBR_mixede <- update(fitML, data = dupsalmonella, type = "AS_mixed") 49 | fitBR_mixedw <- update(fitML, weights = duptimes, data = salmonella, type = "AS_mixed") 50 | 51 | fitBCe <- update(fitML, data = dupsalmonella, type = "AS_mixed") 52 | fitBCw <- update(fitML, weights = duptimes, data = salmonella, type = "AS_mixed") 53 | 54 | fitJe <- update(fitML, data = dupsalmonella, type = "MPL_Jeffreys", 55 | transformation = "inverse") 56 | fitJw <- update(fitML, weights = duptimes, data = salmonella, type = "MPL_Jeffreys", 57 | transformation = "inverse") 58 | 59 | ## prior weights work as expected 60 | ## all numerical results are the same 61 | expect_equal(coef(fitMLe, "full"), coef(fitMLw, "full"), tolerance = 1e-10) 62 | expect_equal(coef(fitBR_mediane, "full"), coef(fitBR_medianw, "full"), tolerance = 1e-10) 63 | expect_equal(coef(fitBR_mixede, "full"), coef(fitBR_mixedw, "full"), tolerance = 1e-10) 64 | expect_equal(coef(fitBR_meane, "full"), coef(fitBR_meanw, "full"), tolerance = 1e-10) 65 | expect_equal(coef(fitJe, "full"), coef(fitJw, "full"), tolerance = 1e-10) 66 | 67 | ## Dispersion transformations 68 | ## dispersion transformations work as expected for ML/mixed BR/median BR 69 | for (f0 in list(fitML, fitBR_median, fitBR_mixed)) { 70 | fsqrt <- update(f0, transformation = "sqrt") 71 | flog <- update(f0, transformation = "log") 72 | expect_equal(coef(fsqrt, model = "dispersion"), 73 | sqrt(coef(f0, model = "dispersion")), 74 | tol = 1e-05, check.attributes = FALSE) 75 | expect_equal(coef(flog, model = "dispersion"), 76 | log(coef(f0, model = "dispersion")), 77 | tol = 1e-05, check.attributes = FALSE) 78 | } 79 | 80 | 81 | ## dispersion transformations work as expected for mean BR 82 | f0 <- fitBR_mean 83 | fsqrt <- update(f0, transformation = "sqrt") 84 | flog <- update(f0, transformation = "log") 85 | expect_false(isTRUE(all.equal(coef(fsqrt, model = "dispersion"), 86 | sqrt(1 / coef(f0, model = "dispersion")), 87 | tol = 1e-04, check.attributes = FALSE))) 88 | expect_false(isTRUE(all.equal(coef(flog, model = "dispersion"), 89 | -log(coef(f0, model = "dispersion")), 90 | tol = 1e-04, check.attributes = FALSE))) 91 | 92 | ## error is produced for not implemented transformations 93 | expect_error(update(fitBR_mean, transformation = "asd")) 94 | 95 | -------------------------------------------------------------------------------- /inst/tinytest/test-checkinfinite.R: -------------------------------------------------------------------------------- 1 | ## endometrial data from Heinze \& Schemper (2002) (see ?endometrial) 2 | data("endometrial", package = "brglm2") 3 | expect_warning({ 4 | endometrial_ml <- glm(HG ~ I(-NV) + PI + EH, data = endometrial, 5 | family = binomial("cloglog")) 6 | }) 7 | 8 | expect_error(cie <- check_infinite_estimates(endometrial_ml)) 9 | -------------------------------------------------------------------------------- /inst/tinytest/test-correction.R: -------------------------------------------------------------------------------- 1 | library("enrichwith") 2 | 3 | ## A Gamma example, from McCullagh & Nelder (1989, pp. 300-2) 4 | clotting <- data.frame( 5 | u = c(5,10,15,20,30,40,60,80,100, 5,10,15,20,30,40,60,80,100), 6 | conc = c(118,58,42,35,27,25,21,19,18,69,35,26,21,18,16,13,12,12), 7 | lot = factor(c(rep(1, 9), rep(2, 9)))) 8 | 9 | mod <- glm(conc ~ lot*log(u), data = clotting, family = Gamma) 10 | emod <- enrich(mod, with = "all") 11 | 12 | coefs <- coef(emod, model = "mean") 13 | disp <- coef(emod, model = "dispersion") 14 | coefs_bc <- c(coefs, disp) - get_bias_function(emod)(coefs, disp) 15 | attributes(coefs_bc) <- NULL 16 | 17 | mod_bc <- glm(conc ~ lot*log(u), data = clotting, family = Gamma, method = "brglmFit", type = "correction") 18 | 19 | ## bias corrected estimates computed using enrichwith are the same as those when having type = 'correction' in brglmFit 20 | expect_equal(unname(coefs_bc), unname(coef(mod_bc, model = "full")), check.attributes = FALSE, tolerance = 1e-06) 21 | 22 | -------------------------------------------------------------------------------- /inst/tinytest/test-dispersion.R: -------------------------------------------------------------------------------- 1 | data("anorexia", package = "MASS") 2 | 3 | anorexML <- glm(Postwt ~ Prewt + Treat+ offset(Prewt), 4 | family = gaussian, data = anorexia) 5 | anorexBR <- update(anorexML, method = "brglmFit", type = "AS_mean") 6 | 7 | tol <- sqrt(.Machine$double.eps) 8 | ## dispersion_ML is the usual biased estimate for the residual variance 9 | expect_equal(anorexBR$dispersion_ML, sum((anorexia$Postwt - fitted(anorexML))^2)/nrow(anorexia), 10 | tolerance = 1e-06) 11 | 12 | ## dispersion is the usual bias-corrected estimate for the residual variance 13 | expect_equal(anorexBR$dispersion, sum((anorexia$Postwt - fitted(anorexML))^2)/(nrow(anorexia) - length(coef(anorexML))), tolerance = 1e-06) 14 | 15 | 16 | ## context("dispersion parameter estimation") 17 | 18 | ## set.seed(123) 19 | ## N <- 20 20 | ## x <- matrix(rnorm(2*N), N, 2) 21 | ## y <- 0.3 + drop(x %*% c(-3, 1)) + rnorm(N, 0.4) 22 | 23 | ## fitML <- glm(y ~ x, family = gaussian()) 24 | ## fitBR <- update(fitML, method = "brglmFit") 25 | 26 | ## tol <- sqrt(.Machine$double.eps) 27 | ## test_that("dispersionML is the usual biased estimate for the residual variance", 28 | ## expect_equal(fitBR$dispersionML, sum((fitML$y - fitted(fitML))^2)/N)) 29 | 30 | ## test_that("dispersion is the usual bias-corrected estimate for the residual variance", 31 | ## expect_equal(fitBR$dispersion, sum((fitML$y - fitted(fitML))^2)/(N - 3))) 32 | 33 | 34 | 35 | 36 | 37 | 38 | -------------------------------------------------------------------------------- /inst/tinytest/test-expo.R: -------------------------------------------------------------------------------- 1 | ## source(system.file("inst", "brglm0/brglm0.R", package = "brglm2")) 2 | data("lizards", package = "brglm2") 3 | 4 | lfit <- glm(cbind(grahami, opalinus) ~ height + diameter + 5 | light + time, family = binomial(), data=lizards, 6 | method = "brglmFit", type = "ML", epsilon = 1e-10, maxit = 1000) 7 | 8 | info <- get_information_function(lfit) 9 | 10 | start_methods <- c("AS_mixed", "AS_mean", "AS_median", "correction", "MPL_Jeffreys", "ML") 11 | keep <- c("coef", "se", "ci") 12 | 13 | ## ML 14 | or_est_ml <- exp(coef(lfit)) 15 | or_ses_ml <- sqrt(diag(diag(or_est_ml) %*% vcov(lfit) %*% diag(or_est_ml))) 16 | ml <- expo(update(lfit, type = "ML"), type = "ML") 17 | for (met in start_methods) { 18 | obj <- expo(update(lfit, type = met), type = "ML") 19 | expect_identical(ml[keep], obj[keep]) 20 | } 21 | expect_equal(coef(ml), or_est_ml) 22 | expect_equal(ml$se, or_ses_ml, check.attributes = FALSE) 23 | expect_equal(ml$ci, exp(confint(lfit))) 24 | 25 | ## Lyles et al (2012) 26 | lfit_mix <- update(lfit, type = "AS_mixed") 27 | or_est_bc1 <- exp(coef(lfit_mix)) * exp(-diag(vcov(lfit_mix))/2) 28 | or_ses_bc1 <- sqrt(diag(diag(or_est_bc1) %*% solve(info(log(or_est_bc1))) %*% diag(or_est_bc1))) 29 | for (met in start_methods) { 30 | bc1 <- expo(update(lfit_mix, type = met), type = "Lylesetal2012") 31 | expect_equal(coef(bc1), or_est_bc1) 32 | expect_equal(bc1$se, or_ses_bc1, check.attributes = FALSE) 33 | } 34 | 35 | ## correction* 36 | or_est_bc2 <- exp(coef(lfit_mix)) / (1 + diag(vcov(lfit_mix))/2) 37 | or_ses_bc2 <- sqrt(diag(diag(or_est_bc2) %*% solve(info(log(or_est_bc2))) %*% diag(or_est_bc2))) 38 | for (met in start_methods) { 39 | bc2 <- expo(update(lfit_mix, type = met), type = "correction*") 40 | expect_equal(coef(bc2), or_est_bc2) 41 | expect_equal(bc2$se, or_ses_bc2, check.attributes = FALSE) 42 | } 43 | 44 | ## correction+ 45 | or_est_bc3 <- exp(coef(lfit_mix)) * (1 - diag(vcov(lfit_mix))/2) 46 | or_ses_bc3 <- sqrt(diag(diag(or_est_bc3) %*% solve(info(log(or_est_bc3))) %*% diag(or_est_bc3))) 47 | for (met in start_methods) { 48 | bc3 <- expo(update(lfit, type = met), type = "correction+") 49 | expect_equal(coef(bc3), or_est_bc3) 50 | expect_equal(bc3$se, or_ses_bc3, check.attributes = FALSE) 51 | } 52 | 53 | ## AS median 54 | lfit_med <- update(lfit, type = "AS_median") 55 | or_est_med <- exp(coef(lfit_med)) 56 | or_ses_med <- sqrt(diag(diag(or_est_med) %*% solve(info(log(or_est_med))) %*% diag(or_est_med))) 57 | for (met in start_methods) { 58 | med <- expo(update(lfit_med, type = met), type = "AS_median") 59 | expect_equal(coef(med), or_est_med) 60 | expect_equal(med$se, or_ses_med, check.attributes = FALSE) 61 | } 62 | 63 | ## starting from a glm object 64 | expo_methods <- c("ML", "correction*", "correction+", "Lylesetal2012", "AS_median") 65 | lfit_glm <- glm(cbind(grahami, opalinus) ~ height + diameter + 66 | light + time, family = binomial(), data=lizards) 67 | for (met in expo_methods) { 68 | out_brglmFit <- expo(lfit_med, type = met)[keep] 69 | out_glm <- expo(lfit_glm, type = met)[keep] 70 | expect_equal(out_brglmFit, out_glm, tolerance = 1e-06) 71 | } 72 | 73 | expect_stdout(print(expo(lfit_glm)), "Odds ratios") 74 | 75 | ## ## Interpretation 76 | ## set.seed(123) 77 | ## dat <- data.frame(y = rexp(10), x = rnorm(10)) 78 | ## mod <- glm(y ~ x, family = Gamma("log"), data = dat) 79 | ## expo_mod <- expo(mod, type = "ML") 80 | ## expect_stdout(print(expo_mod), "Multiplicative effects to the mean") 81 | 82 | ## set.seed(111) 83 | ## dat <- data.frame(y = exp(rnorm(10)), x = rnorm(10)) 84 | ## mod <- glm(y ~ x, family = inverse.gaussian("log"), data = dat) 85 | ## expo_mod <- expo(mod, type = "ML") 86 | ## expect_stdout(print(expo_mod), "Multiplicative effects to the mean") 87 | 88 | ## set.seed(111) 89 | ## dat <- data.frame(x = rexp(10), y = rpois(10, 10)) 90 | ## mod <- glm(y ~ x, family = poisson("log"), data = dat) 91 | ## expo_mod <- expo(mod, type = "ML") 92 | ## expect_stdout(print(expo_mod), "Multiplicative effects to the mean") 93 | 94 | ## set.seed(111) 95 | ## dat <- data.frame(x = rexp(100, 0.5)) 96 | ## dat$y <- rbinom(100, 1, exp(-1 -0.2 * dat$x)) 97 | ## mod <- glm(y ~ x, family = binomial("log"), data = dat) 98 | ## expo_mod <- expo(mod, type = "ML") 99 | ## expect_stdout(print(expo_mod), "Relative risks") 100 | 101 | 102 | ## library(parallel) 103 | ## nsimu <- 1000 104 | ## set.seed(123) 105 | ## Y <- simulate(lfit, nsimu) 106 | ## true_psi <- exp(coef(lfit)) 107 | ## X <- model.matrix(lfit) 108 | 109 | ## methods <- c("ML", "correction+", "correction*", "Lylesetal2012", "AS_median") 110 | ## results <- as.list(numeric(length(methods))) 111 | ## names(results) <- methods 112 | ## for (met in methods) { 113 | ## results[[met]] <- mclapply(1:nsimu, function(k) { 114 | ## temp_data <- lizards 115 | ## temp_data[c("grahami", "opalinus")] <- Y[[k]] 116 | ## mod <- update(lfit, data = temp_data) 117 | ## expo(mod, met) 118 | ## }, mc.cores = 8) 119 | ## } 120 | 121 | ## get_bias <- function(res, truth) rowMeans(sapply(res, function(x) x$coef - truth)) 122 | ## get_pu <- function(res, truth) rowMeans(sapply(res, function(x) x$coef < truth)) 123 | ## get_mse <- function(res, truth) rowMeans(sapply(res, function(x) (x$coef - truth)^2)) 124 | ## get_avgv <- function(res, truth) rowMeans(sapply(res, function(x) x$se^2)) 125 | ## get_pu <- function(res, truth) rowMeans(sapply(res, function(x) x$coef < truth)) 126 | ## get_coverage <- function(res, truth) { 127 | ## rowMeans(sapply(res, function(x) { 128 | ## (x$ci[, 1] < truth) & (x$ci[, 2] > truth) 129 | ## })) 130 | ## } 131 | 132 | ## round(sapply(results, get_bias, truth = true_psi), 2) 133 | ## round(sapply(results, get_mse, truth = true_psi), 2) 134 | ## round(sapply(results, get_pu, truth = true_psi), 2) 135 | ## round(sapply(results, get_coverage, truth = true_psi), 2) 136 | ## vars <- sapply(results, get_mse, truth = true_psi) - sapply(results, get_bias, truth = true_psi)^2 137 | ## avgvars <- sapply(results, get_avgv, truth = true_psi) 138 | 139 | 140 | 141 | -------------------------------------------------------------------------------- /inst/tinytest/test-gamma.R: -------------------------------------------------------------------------------- 1 | library("MASS") 2 | 3 | ## A Gamma example, from McCullagh & Nelder (1989, pp. 300-2) 4 | clotting <- data.frame( 5 | u = c(5,10,15,20,30,40,60,80,100, 5,10,15,20,30,40,60,80,100), 6 | conc = c(118,58,42,35,27,25,21,19,18,69,35,26,21,18,16,13,12,12), 7 | lot = factor(c(rep(1, 9), rep(2, 9)))) 8 | mod <- glm(conc ~ lot*log(u), data = clotting, family = Gamma) 9 | 10 | tol <- sqrt(.Machine$double.eps) 11 | ## ML estimate of gamma dispersion from brglmFit and from MASS::gamma.dispersion are the same 12 | expect_equal(update(mod, method = "brglmFit", epsilon = 1e-10)$dispersion_ML, MASS::gamma.dispersion(mod), tolerance = tol) 13 | 14 | ## ML estimate of gamma shape from brglmFit and from MASS::gamma.dispersion are the same 15 | expect_equal(1/update(mod, method = "brglmFit", epsilon = 1e-10, transformation = "inverse")$dispersion_ML, gamma.shape(mod)$alpha, tolerance = tol) 16 | -------------------------------------------------------------------------------- /inst/tinytest/test-jeffreys.R: -------------------------------------------------------------------------------- 1 | library("numDeriv") 2 | library("brglm") 3 | 4 | ## A Gamma example, from McCullagh & Nelder (1989, pp. 300-2) 5 | clotting <- data.frame( 6 | u = c(5,10,15,20,30,40,60,80,100, 5,10,15,20,30,40,60,80,100), 7 | conc = c(118,58,42,35,27,25,21,19,18,69,35,26,21,18,16,13,12,12), 8 | lot = factor(c(rep(1, 9), rep(2, 9)))) 9 | mod <- enrich(glm(conc ~ lot*log(u), data = clotting, family = Gamma())) 10 | 11 | ifun <- get_information_function(mod) 12 | 13 | penloglik <- function(pars, X, y) { 14 | p <- ncol(X) 15 | beta <- pars[1:p] 16 | dispersion <- pars[p + 1] 17 | eta <- X %*% beta 18 | mu <- 1/eta 19 | sum(dgamma(y, shape = 1/dispersion, scale = mu * dispersion, log = TRUE)) + 0.5 * log(det(ifun(beta, dispersion))) 20 | } 21 | 22 | modjef <- update(mod, method = "brglmFit", type = "MPL_Jeffreys", epsilon = 1e-15) 23 | 24 | ## the numerical gradient of the penalized log-likelihood is almost zero when evaluated at the estimates from type = 'MPL_Jeffreys 25 | expect_equal(grad(penloglik, 26 | x = coef(modjef, model = "full"), 27 | X = model.matrix(mod), 28 | y = model.response(mod$model)), rep(0, 5), tolerance = 1.5 * 1e-05) 29 | 30 | ## the numerical gradient of the penalized log-likelihood matches that from type = 'MPL_Jeffreys' 31 | expect_warning(g1 <- update(mod, method = "brglmFit", type = "MPL_Jeffreys", maxit = 0, start = coef(mod, model = "full"))$grad) 32 | g2 <- grad(penloglik, 33 | x = coef(mod, model = "full"), 34 | X = model.matrix(mod), 35 | y = model.response(mod$model)) 36 | expect_equal(unname(g1), g2, tolerance = 1e-05) 37 | 38 | ## source(system.file("inst", "brglm0/brglm0.R", package = "brglm2")) 39 | data("lizards", package = "brglm2") 40 | 41 | links <- lapply(c("logit", "probit", "cloglog", "cauchit"), make.link) 42 | 43 | 44 | tol <- 1e-08 45 | for (l in seq_along(links)) { 46 | expect_warning( 47 | lizardsBRlegacy <- brglm(cbind(grahami, opalinus) ~ height + diameter + 48 | light + time, family = binomial(links[[l]]), data=lizards, 49 | method = "brglm.fit", br.epsilon = 1e-10, br.maxit = 1000, pl = TRUE) 50 | ) 51 | 52 | expect_warning( 53 | lizardsBR <- glm(cbind(grahami, opalinus) ~ height + diameter + 54 | light + time, family = binomial(links[[l]]), data=lizards, 55 | method = "brglmFit", epsilon = 1e-10, maxit = 1000, type = "MPL_Jeffreys") 56 | ) 57 | ## glm with brglm.fit method and brglm_0 return the same coefficients for the lizards when link is links[[l]]$name) 58 | expect_equal(coef(lizardsBR), coef(lizardsBRlegacy), tolerance = tol) 59 | } 60 | -------------------------------------------------------------------------------- /inst/tinytest/test-median-binomial.R: -------------------------------------------------------------------------------- 1 | library("mbrglm") 2 | 3 | data("lizards", package = "brglm2") 4 | data("endometrial",package = "brglm2") 5 | 6 | links <- lapply(c("logit", "probit", "cloglog", "cauchit"), make.link) 7 | 8 | tol <- 1e-10 9 | mbrglmControl <- mbrglm.control(mbr.epsilon = 1e-10, mbr.maxit = 1000) 10 | 11 | for (l in seq_along(links)) { 12 | ## Lizards 13 | lizardsFormula <- cbind(grahami, opalinus) ~ height + diameter + light + time 14 | lizardsMBRlegacy <- mbrglm(lizardsFormula, family = binomial(links[[l]]), data = lizards, 15 | method = "mbrglm.fit", 16 | control.mbrglm = mbrglmControl) 17 | expect_warning(lizardsMBR <- glm(lizardsFormula, family = binomial(links[[l]]), data = lizards, 18 | method = "brglmFit", type="AS_median", epsilon = 1e-10, maxit = 1000)) 19 | 20 | ## Endometrial 21 | endoFormula <- HG ~ NV + PI + EH 22 | expect_warning({ 23 | endoMBRlegacy <- mbrglm(endoFormula, family = binomial(links[[l]]), data = endometrial, 24 | method = "mbrglm.fit", 25 | control.mbrglm = mbrglmControl) 26 | endoMBR <- glm(endoFormula, family = binomial(links[[l]]), data = endometrial, 27 | method = "brglmFit", type = "AS_median", epsilon = 1e-10, maxit = 1000) 28 | }) 29 | 30 | c1 <- coef(summary(endoMBRlegacy)) 31 | c2 <- coef(summary(endoMBR)) 32 | 33 | ## glm with brglmFit method and mbrglm return the same coefficients when link is" links[[l]]$name 34 | expect_equal(coef(lizardsMBR), coef(lizardsMBRlegacy), tolerance = tol) 35 | expect_equal(c1, c2, tolerance = tol) 36 | } 37 | 38 | -------------------------------------------------------------------------------- /inst/tinytest/test-median-dispersion.R: -------------------------------------------------------------------------------- 1 | data("anorexia", package = "MASS") 2 | 3 | anorexML <- glm(Postwt ~ Prewt + Treat+ offset(Prewt), 4 | family = gaussian, data = anorexia) 5 | anorexBR <- update(anorexML, method = "brglmFit") 6 | anorexMBR <- update(anorexML, method = "brglmFit", control = list(type="AS_median")) 7 | 8 | tol <- sqrt(.Machine$double.eps) 9 | 10 | ## dispersion is RSS over residual degrees of freedom minus 2/3" 11 | expect_equal(anorexMBR$dispersion, sum((anorexia$Postwt - fitted(anorexML))^2)/(nrow(anorexia) - length(coef(anorexML)) - 2/3)) 12 | 13 | -------------------------------------------------------------------------------- /inst/tinytest/test-median-poisson.R: -------------------------------------------------------------------------------- 1 | ## mbrpr is supplied under the permission of M. Zambelli, and is part 2 | ## of his unpublished undergraduate thesis submitted in 2016 at the 3 | ## Department of Statisitcal Science, University of Padova 4 | 5 | ## source(system.file("mbrpr", "mbrpr.R", package = "brglm2")) 6 | source("mbrpr.R") 7 | 8 | ## Dobson (1990) Page 93: Randomized Controlled Trial : 9 | counts <- c(18, 17, 15, 20, 10, 20, 25, 13, 12) 10 | outcome <- gl(3, 1,9) 11 | treatment <- gl(3, 3) 12 | dobson <- data.frame(counts, outcome, treatment) 13 | 14 | tol <- 1e-06 15 | 16 | ## MBR estimates and std. errors from brglmFit and from mbrpr are the same for poisson 17 | expect_warning( 18 | br1 <- summary(fit<-glm(counts ~ outcome + treatment, family = poisson(), method = "brglmFit", 19 | type="AS_median")) 20 | ) 21 | 22 | X <- model.matrix(fit) 23 | y <- fit$y 24 | br2 <- mbrpr(par = rep(0, ncol(X)), y = y, X = X, eps = 1e-10, maxit = 500) 25 | c1 <- coef(br1)[, c(1,2)] 26 | dimnames(c1) <- NULL 27 | c2 <- cbind(br2$coefficients,sqrt(diag(br2$InfoInv))) 28 | expect_equal(c1,c2, tolerance = tol) 29 | 30 | -------------------------------------------------------------------------------- /inst/tinytest/test-mis.R: -------------------------------------------------------------------------------- 1 | ## Dobson (1990) Page 93: Randomized Controlled Trial : 2 | 3 | logit_mis <- mis(link = "logit", sensitivity = 1, specificity = 1) 4 | probit_mis <- mis(link = "probit", sensitivity = 1, specificity = 1) 5 | 6 | expect_identical(class(logit_mis), "link-glm") 7 | expect_identical(class(probit_mis), "link-glm") 8 | 9 | lizards_f <- cbind(grahami, opalinus) ~ height + diameter + light + time 10 | lizards_logit <- glm(lizards_f, family = binomial(logit), data = lizards) 11 | lizards_probit <- glm(lizards_f, family = binomial(probit), data = lizards) 12 | 13 | lizards_logit_mis <- update(lizards_logit, family = binomial(logit_mis), 14 | start = coef(lizards_logit)) 15 | lizards_probit_mis <- update(lizards_probit, family = binomial(probit_mis), 16 | start = coef(lizards_probit)) 17 | 18 | ## mis link with sensitivity and specificity 1 are the same as original links 19 | expect_equal(coef(lizards_logit), coef(lizards_logit_mis), tolerance = 1e-06) 20 | expect_equal(coef(lizards_probit), coef(lizards_probit_mis), tolerance = 1e-06) 21 | 22 | -------------------------------------------------------------------------------- /inst/tinytest/test-multinom-binom.R: -------------------------------------------------------------------------------- 1 | ## Tolerance for comparisons 2 | tolerance <- 1e-05 3 | 4 | data("lizards") 5 | 6 | lizards_grahami <- lizards[, c("grahami", "height", "diameter", "light", "time")] 7 | lizards_grahami <- lizards_grahami[rep(seq.int(nrow(lizards_grahami)), lizards_grahami$grahami), ] 8 | lizards_grahami$species <- "grahami" 9 | lizards_grahami$grahami <- NULL 10 | lizards_opalinus <- lizards[, c("opalinus", "height", "diameter", "light", "time")] 11 | lizards_opalinus <- lizards_opalinus[rep(seq.int(nrow(lizards_opalinus)), lizards_opalinus$opalinus), ] 12 | lizards_opalinus$species <- "opalinus" 13 | lizards_opalinus$opalinus <- NULL 14 | lizards1 <- rbind(lizards_grahami, lizards_opalinus) 15 | lizards1$species <- factor(lizards1$species, levels = c("opalinus", "grahami")) 16 | 17 | expect_warning({ 18 | model1 <- glm(formula = species ~ height + diameter + light + time, family = binomial(logit), 19 | data = lizards1, 20 | method = "brglmFit") 21 | model2 <- brmultinom(species ~ height + diameter + light + time, data = lizards1) 22 | }) 23 | 24 | tolerance <- 1e-06 25 | ## coefficients from the binomial fit match those from the multinomial model with the poisson trick 26 | expect_equal(coef(model1), coef(model2)[1,], tol = tolerance) 27 | 28 | -------------------------------------------------------------------------------- /inst/tinytest/test-poisson.R: -------------------------------------------------------------------------------- 1 | ## source(system.file("brpr", "brpr.R", package = "brglm2")) 2 | source("brpr.R") 3 | 4 | ## Dobson (1990) Page 93: Randomized Controlled Trial : 5 | counts <- c(18,17,15,20,10,20,25,13,12) 6 | outcome <- gl(3,1,9) 7 | treatment <- gl(3,3) 8 | dobson <- data.frame(counts, outcome, treatment) 9 | 10 | tol <- 1e-06 11 | ## BR estimates and std. errors from brglmFit and from brpr are the same for poisson 12 | expect_warning({ 13 | br1 <- summary(glm(counts ~ outcome + treatment, family = poisson(), method = "brglmFit")) 14 | br2 <- summary(brpr(counts ~ outcome + treatment, data = dobson)) 15 | }) 16 | c1 <- coef(br1) 17 | c2 <- coef(br2) 18 | c2 <- c2[rownames(c1), ] 19 | expect_equal(c1,c2, tolerance = tol) 20 | 21 | 22 | ## ## mbest::firthglm.fit crashes for this example 23 | ## ## A Gamma example, from McCullagh & Nelder (1989, pp. 300-2) 24 | ## clotting <- data.frame( 25 | ## u = c(5,10,15,20,30,40,60,80,100, 5,10,15,20,30,40,60,80,100), 26 | ## conc = c(118,58,42,35,27,25,21,19,18,69,35,26,21,18,16,13,12,12), 27 | ## lot = factor(c(rep(1, 9), rep(2, 9)))) 28 | ## mod <- glm(conc ~ lot*log(u), data = clotting, family = Gamma()) 29 | 30 | ## tol <- sqrt(.Machine$double.eps) 31 | ## test_that("BR estimates from brglmFit and from mbest::firthglm.fit are the same for gamma", { 32 | ## br1 <- update(mod, method = "firthglm.fit") 33 | ## br2 <- update(mod, method = "brglmFit") 34 | ## c1 <- coef(br1) 35 | ## c2 <- coef(br2) 36 | ## c2 <- c2[names(c1)] 37 | ## expect_equal(c1,c2, tolerance = tol) 38 | ## }) 39 | 40 | 41 | -------------------------------------------------------------------------------- /inst/tinytest/test-print.R: -------------------------------------------------------------------------------- 1 | ## source(system.file("inst", "brglm0/brglm0.R", package = "brglm2")) 2 | data("lizards", package = "brglm2") 3 | 4 | 5 | types <- list("ML" = "(maximum likelihood)", 6 | "correction" = "(bias correction)", 7 | "AS_mean" = "(mean bias-reducing adjusted score equations)", 8 | "AS_median" = "(median bias-reducing adjusted score equations)", 9 | "AS_mixed" = "(mixed bias-reducing adjusted score equations)", 10 | "MPL_Jeffreys" = "(maximum penalized likelihood with Jeffreys'-prior penalty)") 11 | 12 | 13 | for (type in names(types)) { 14 | liz <- glm(cbind(grahami, opalinus) ~ height + diameter + light + time, family = binomial(), 15 | data = lizards, 16 | method = "brglmFit", type = type) 17 | expect_stdout(print(liz), "Coefficients") 18 | expect_stdout(print(liz), "Degrees of Freedom") 19 | expect_stdout(print(liz), "Null Deviance") 20 | expect_equal(liz$type, type) 21 | summ <- summary(liz) 22 | expect_true(all(class(summ) %in% c("summary.brglmFit", "summary.glm"))) 23 | expect_stdout(print(summ), "Type of estimator:") 24 | expect_stdout(print(summ), type) 25 | expect_stdout(print(summ), types[[type]]) 26 | } 27 | 28 | 29 | ## brnb 30 | salmonella <- data.frame(freq = c(15, 16, 16, 27, 33, 20, 31 | 21, 18, 26, 41, 38, 27, 32 | 29, 21, 33, 60, 41, 42), 33 | dose = rep(c(0, 10, 33, 100, 333, 1000), 3), 34 | observation = rep(1:3, each = 6)) 35 | salmonella_fm <- freq ~ dose + log(dose + 10) 36 | fit_brnb <- brnb(salmonella_fm, data = salmonella, 37 | link = "log", transformation = "inverse", type = "ML") 38 | summ <- summary(fit_brnb) 39 | expect_stdout(print(summ), "Type of estimator:") 40 | expect_stdout(print(summ), "ML") 41 | expect_stdout(print(summ), "(maximum likelihood)") 42 | 43 | ## brmultinom 44 | data("housing", package = "MASS") 45 | fit_brmultinom <- brmultinom(Sat ~ Infl + Type + Cont, weights = Freq, 46 | data = housing, type = "ML", ref = 1) 47 | summ <- summary(fit_brmultinom) 48 | expect_stdout(print(summ), "Type of estimator:") 49 | expect_stdout(print(summ), "ML") 50 | expect_stdout(print(summ), "(maximum likelihood)") 51 | 52 | 53 | ## bracl 54 | data("stemcell", package = "brglm2") 55 | fit_bracl <- bracl(research ~ as.numeric(religion) + gender, weights = frequency, 56 | data = stemcell, type = "ML") 57 | summ <- summary(fit_bracl) 58 | expect_stdout(print(summ), "Type of estimator:") 59 | expect_stdout(print(summ), "ML") 60 | expect_stdout(print(summ), "(maximum likelihood)") 61 | -------------------------------------------------------------------------------- /inst/tinytest/test-singular.R: -------------------------------------------------------------------------------- 1 | ## A Gamma example, from McCullagh & Nelder (1989, pp. 300-2) 2 | clotting <- data.frame( 3 | u = c(5,10,15,20,30,40,60,80,100, 5,10,15,20,30,40,60,80,100), 4 | conc = c(118,58,42,35,27,25,21,19,18,69,35,26,21,18,16,13,12,12), 5 | lot = factor(c(rep(1, 9), rep(2, 9)))) 6 | mod <- glm(conc ~ lot*log(u) + I(2*log(u)), data = clotting, family = Gamma) 7 | 8 | X <- model.matrix(mod) 9 | Y <- mod$y 10 | ## brglmFit returns an error if singular.ok = TRUE 11 | expect_error(brglm_fit(X, Y, family = Gamma(), singular.ok = FALSE), 12 | pattern = "singular fit encountered") 13 | expect_true(is.na(coef(brglm_fit(X, Y, family = Gamma(), singular.ok = TRUE))["I(2 * log(u))"])) 14 | -------------------------------------------------------------------------------- /inst/tinytest/test-start.R: -------------------------------------------------------------------------------- 1 | ## A Gamma example, from McCullagh & Nelder (1989, pp. 300-2) 2 | clotting <- data.frame( 3 | u = c(5,10,15,20,30,40,60,80,100, 5,10,15,20,30,40,60,80,100), 4 | conc = c(118,58,42,35,27,25,21,19,18,69,35,26,21,18,16,13,12,12), 5 | lot = factor(c(rep(1, 9), rep(2, 9)))) 6 | mod <- glm(conc ~ lot*log(u), data = clotting, family = Gamma, epsilon = 1e-10, maxit = 1000) 7 | 8 | mod1 <- update(mod, method = "brglmFit", start = c(coef(mod)*0.9, 1), epsilon = 1e-10, maxit = 1000) 9 | mod2 <- update(mod, method = "brglmFit", start = c(coef(mod)*0.9), epsilon = 1e-10, maxit = 1000) 10 | 11 | tol <- 1e-03 12 | ## start argument is passed correctly in brglmFit" 13 | expect_equal(coef(mod), coef(mod1), tolerance = tol) 14 | expect_equal(coef(mod), coef(mod2), tolerance = tol) 15 | -------------------------------------------------------------------------------- /inst/tinytest/test-transformation.R: -------------------------------------------------------------------------------- 1 | ## A Gamma example, from McCullagh & Nelder (1989, pp. 300-2) 2 | clotting <- data.frame( 3 | u = c(5,10,15,20,30,40,60,80,100, 5,10,15,20,30,40,60,80,100), 4 | conc = c(118,58,42,35,27,25,21,19,18,69,35,26,21,18,16,13,12,12), 5 | lot = factor(c(rep(1, 9), rep(2, 9)))) 6 | mod_identity <- glm(conc ~ lot*log(u), data = clotting, family = Gamma, 7 | method = "brglm_fit", type = "ML", 8 | transformation = "identity") 9 | mod_log <- glm(conc ~ lot*log(u), data = clotting, family = Gamma, 10 | method = "brglm_fit", type = "ML", 11 | transformation = "log") 12 | mod_sqrt <- glm(conc ~ lot*log(u), data = clotting, family = Gamma, 13 | method = "brglm_fit", type = "ML", 14 | transformation = "sqrt") 15 | mod_inverse <- glm(conc ~ lot*log(u), data = clotting, family = Gamma, 16 | method = "brglm_fit", type = "ML", 17 | transformation = "inverse") 18 | 19 | c_identity <- coef(mod_identity, model = "full") 20 | c_log <- coef(mod_log, model = "full") 21 | c_log[5] <- exp(c_log[5]) 22 | c_sqrt <- coef(mod_sqrt, model = "full") 23 | c_sqrt[5] <- c_sqrt[5]^2 24 | c_inverse <- coef(mod_inverse, model = "full") 25 | c_inverse[5] <- 1/c_inverse[5] 26 | 27 | tol <- 1e-08 28 | ## ML estimate of gamma dispersion from brglmFit is invariant to trasnformation 29 | expect_equal(c_identity, c_log, tolerance = tol, check.attributes = FALSE) 30 | expect_equal(c_identity, c_sqrt, tolerance = tol, check.attributes = FALSE) 31 | expect_equal(c_identity, c_inverse, tolerance = tol, check.attributes = FALSE) 32 | -------------------------------------------------------------------------------- /man/aids.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{aids} 5 | \alias{aids} 6 | \title{The effects of AZT in slowing the development of AIDS symptoms} 7 | \format{ 8 | A data frame with 4 rows and 4 variables: 9 | \itemize{ 10 | \item \code{symptomatic}: counts of veterans showing AIDS symptoms during the 3-year study 11 | \item \code{asymptomatic}: counts of veterans not showing AIDS symptoms during the 3-year study 12 | \item \code{race}: the race of the veterans with levels \code{"White"} and \code{"Black"} 13 | \item \code{AZT_use}: whether the veterans received AZT immediately (\code{"Yes"}) 14 | or waited until their T cells showed severe immune weakness (\code{"No"}) 15 | } 16 | } 17 | \source{ 18 | The data set is analyzed in Agresti (2002, Subsection 5.4.2). Its 19 | original source is New York Times, Feb. 15, 1991. 20 | } 21 | \usage{ 22 | aids 23 | } 24 | \description{ 25 | The data is from a 3-year study on the effects of AZT in slowing the 26 | development of AIDS symptoms. 338 veterans whose immune systems 27 | were beginning to falter after infection with the AIDS virus were 28 | randomly assigned either to receive AZT immediately or to wait 29 | until their T cells showed severe immune weakness. 30 | } 31 | \references{ 32 | Agresti A (2002). \emph{Categorical Data Analysis}. Wiley Series in 33 | Probability and Statistics. Wiley. 34 | } 35 | \seealso{ 36 | \code{\link[=brmultinom]{brmultinom()}} 37 | } 38 | \keyword{datasets} 39 | -------------------------------------------------------------------------------- /man/alligators.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{alligators} 5 | \alias{alligators} 6 | \title{Alligator food choice data} 7 | \format{ 8 | A data frame with 80 rows and 5 variables: 9 | \itemize{ 10 | \item \code{foodchoice}: primary food type, in volume, found in an alligator’s stomach, with levels \code{fish}, \code{invertebrate},\code{reptile}, \code{bird}, \code{other} 11 | \item \code{lake}: lake of capture with levels \code{Hancock}, \code{Oklawaha}, \code{Trafford}, \code{George}. 12 | \item \code{gender}: gender of the alligator with levels \code{Male} and \code{Female} 13 | \item \code{size}: size of the alligator with levels \verb{<=2.3} meters long and \verb{>2.3} meters long 14 | \item \code{freq}: number of alligators for each foodchoice, lake, gender and size combination 15 | } 16 | } 17 | \source{ 18 | The alligators data set is analyzed in Agresti (2002, Subsection 7.1.2). 19 | } 20 | \usage{ 21 | alligators 22 | } 23 | \description{ 24 | Alligator food choice data 25 | } 26 | \references{ 27 | Agresti A (2002). \emph{Categorical Data Analysis}. Wiley Series in 28 | Probability and Statistics. Wiley. 29 | } 30 | \seealso{ 31 | \code{\link[=brmultinom]{brmultinom()}} 32 | } 33 | \keyword{datasets} 34 | -------------------------------------------------------------------------------- /man/bracl.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bracl.R 3 | \name{bracl} 4 | \alias{bracl} 5 | \title{Bias reduction for adjacent category logit models for ordinal 6 | responses using the Poisson trick.} 7 | \usage{ 8 | bracl( 9 | formula, 10 | data, 11 | weights, 12 | subset, 13 | na.action, 14 | parallel = FALSE, 15 | contrasts = NULL, 16 | model = TRUE, 17 | x = TRUE, 18 | control = list(...), 19 | ... 20 | ) 21 | } 22 | \arguments{ 23 | \item{formula}{ 24 | a formula expression as for regression models, of the form 25 | \code{response ~ predictors}. The response should be a factor 26 | (preferably an ordered factor), which will be interpreted as an 27 | ordinal response, with levels ordered as in the factor. 28 | The model must have an intercept: attempts to remove one will 29 | lead to a warning and be ignored. An offset may be used. See the 30 | documentation of \code{\link{formula}} for other details. 31 | } 32 | 33 | \item{data}{ 34 | an optional data frame, list or environment in which to interpret 35 | the variables occurring in \code{formula}. 36 | } 37 | 38 | \item{weights}{ 39 | optional case weights in fitting. Default to 1. 40 | } 41 | 42 | \item{subset}{ 43 | expression saying which subset of the rows of the data should be used 44 | in the fit. All observations are included by default. 45 | } 46 | 47 | \item{na.action}{ 48 | a function to filter missing data. 49 | } 50 | 51 | \item{parallel}{if \code{FALSE} (default), then a non-proportional odds 52 | adjacent category model is fit, assuming different effects per 53 | category; if \code{TRUE} then a proportional odds adjacent category 54 | model is fit. See Details.} 55 | 56 | \item{contrasts}{ 57 | a list of contrasts to be used for some or all of 58 | the factors appearing as variables in the model formula. 59 | } 60 | 61 | \item{model}{ 62 | logical for whether the model matrix should be returned. 63 | } 64 | 65 | \item{x}{should the model matrix be included with in the result 66 | (default is \code{TRUE}).} 67 | 68 | \item{control}{a list of parameters for controlling the fitting 69 | process. See \code{\link[=brglmControl]{brglmControl()}} for details.} 70 | 71 | \item{...}{arguments to be used to form the default \code{control} 72 | argument if it is not supplied directly.} 73 | } 74 | \description{ 75 | \code{\link[=bracl]{bracl()}} is a wrapper of \code{\link[=brglmFit]{brglmFit()}} that fits adjacent category 76 | logit models with or without proportional odds using implicit and 77 | explicit bias reduction methods. See Kosmidis & Firth (2011) for 78 | details. 79 | } 80 | \details{ 81 | The \code{\link[=bracl]{bracl()}} function fits adjacent category models, which assume 82 | multinomial observations with probabilities with proportional odds 83 | of the form 84 | 85 | \deqn{\log\frac{\pi_{ij}}{\pi_{ij + 1}} = \alpha_j + \beta^T x_i}{log(pi[i, j]/pi[i, j+1]) = alpha[j] + sum(beta * x[i, ])} 86 | 87 | or with non-proportional odds of the form 88 | 89 | \deqn{\log\frac{\pi_{ij}}{\pi_{ij + 1}} = \alpha_j + \beta_j^T x_i}{log(pi[i, j]/pi[i, j+1]) = alpha[j] + sum(beta[j, ] * x[i, ])} 90 | 91 | where \eqn{x_i}{x[i, ]} is a vector of covariates and \eqn{\pi_{ij}}{pi[i, j]} is the 92 | probability that category \eqn{j} is observed at the covariate setting \eqn{i}. 93 | } 94 | \examples{ 95 | 96 | data("stemcell", package = "brglm2") 97 | 98 | # Adjacent category logit (non-proportional odds) 99 | fit_bracl <- bracl(research ~ as.numeric(religion) + gender, weights = frequency, 100 | data = stemcell, type = "ML") 101 | # Adjacent category logit (proportional odds) 102 | fit_bracl_p <- bracl(research ~ as.numeric(religion) + gender, weights = frequency, 103 | data = stemcell, type = "ML", parallel = TRUE) 104 | 105 | 106 | } 107 | \references{ 108 | Kosmidis I, Kenne Pagui E C, Sartori N (2020). Mean and median bias 109 | reduction in generalized linear models. \emph{Statistics and Computing}, 110 | \strong{30}, 43-59. \doi{10.1007/s11222-019-09860-6}. 111 | 112 | Agresti, A (2010). \emph{Analysis of Ordinal Categorical Data} (2nd 113 | edition). Wiley Series in Probability and Statistics. Wiley. 114 | 115 | Albert A, Anderson J A (1984). On the Existence of Maximum 116 | Likelihood Estimates in Logistic Regression Models. \emph{Biometrika}, 117 | \strong{71}, 1-10. \doi{10.2307/2336390}. 118 | 119 | Kosmidis I, Firth D (2011). Multinomial logit bias reduction 120 | via the Poisson log-linear model. \emph{Biometrika}, \strong{98}, 121 | 755-759. \doi{10.1093/biomet/asr026}. 122 | 123 | Palmgren J (1981). The Fisher Information Matrix for Log Linear 124 | Models Arguing Conditionally on Observed Explanatory 125 | Variables. \emph{Biometrika}, \strong{68}, 126 | 563-566. \doi{10.1093/biomet/68.2.563}. 127 | } 128 | \seealso{ 129 | \code{\link[nnet:multinom]{nnet::multinom()}}, \code{\link[=brmultinom]{brmultinom()}} 130 | } 131 | \author{ 132 | Ioannis Kosmidis \verb{[aut, cre]} \email{ioannis.kosmidis@warwick.ac.uk} 133 | } 134 | -------------------------------------------------------------------------------- /man/brglm2-defunct.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/brglm2-defunct.R 3 | \name{brglm2-defunct} 4 | \alias{brglm2-defunct} 5 | \alias{check_infinite_estimates} 6 | \alias{detect_separation} 7 | \title{Defunct Functions in package \pkg{brglm2}} 8 | \usage{ 9 | check_infinite_estimates(...) 10 | 11 | detect_separation(...) 12 | } 13 | \arguments{ 14 | \item{...}{arguments to be passed to functions and methods.} 15 | } 16 | \description{ 17 | The functions or variables listed here are no longer part of 18 | \pkg{brglm2}. 19 | } 20 | \details{ 21 | \itemize{ 22 | \item \code{\link[=detect_separation]{detect_separation()}}: This function is defunct from \pkg{brglm2} 23 | since version 0.8.0. A new version of \code{\link[=detect_separation]{detect_separation()}} is now 24 | maintained in the 25 | \href{https://cran.r-project.org/package=detectseparation}{\pkg{detectseparation}} 26 | R package. 27 | \item \code{\link[=check_infinite_estimates]{check_infinite_estimates()}} is defunct from \pkg{brglm2} since 28 | version 0.8.0. An new version of \code{\link[=check_infinite_estimates]{check_infinite_estimates()}} is 29 | now maintained in the 30 | \href{https://cran.r-project.org/package=detectseparation}{\pkg{detectseparation}} 31 | R package. 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /man/brglm2.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/brglm2-package.R 3 | \docType{package} 4 | \name{brglm2} 5 | \alias{brglm2} 6 | \alias{brglm2-package} 7 | \title{brglm2: Bias Reduction in Generalized Linear Models} 8 | \description{ 9 | Estimation and inference from generalized linear models using 10 | implicit and explicit bias reduction methods (Kosmidis, 2014), and 11 | other penalized maximum likelihood methods. Currently supported 12 | methods include the mean bias-reducing adjusted scores approach in 13 | Firth (1993) and Kosmidis & Firth (2009), the median bias-reduction 14 | adjusted scores approach in Kenne Pagui et al. (2017), the 15 | correction of the asymptotic bias in Cordeiro & McCullagh (1991), 16 | the mixed bias-reduction adjusted scores approach in Kosmidis et al 17 | (2020), maximum penalized likelihood with powers of the Jeffreys 18 | prior as penalty, and maximum likelihood. 19 | } 20 | \details{ 21 | In the special case of generalized linear models for binomial, 22 | Poisson and multinomial responses (both nominal and ordinal), mean 23 | and median bias reduction and maximum penalized likelihood return 24 | estimates with improved frequentist properties, that are also 25 | always finite, even in cases where the maximum likelihood estimates 26 | are infinite (e.g. complete and quasi-complete separation in 27 | multinomial regression). Estimation in all cases takes place via a 28 | modified Fisher scoring algorithm, and S3 methods for the 29 | construction of confidence intervals for the reduced-bias estimates 30 | are provided. 31 | 32 | The core model fitters are implemented by the functions 33 | \code{\link[=brglm_fit]{brglm_fit()}} (univariate generalized linear models), 34 | \code{\link[=brmultinom]{brmultinom()}} (baseline category logit models for nominal 35 | multinomial responses), \code{\link[=bracl]{bracl()}} (adjacent category logit models 36 | for ordinal multinomial responses), and \code{\link[=brnb]{brnb()}} for negative 37 | binomial regression. 38 | 39 | The similarly named \strong{brglm} R package can only handle generalized 40 | linear models with binomial responses. Special care has been taken 41 | when developing \strong{brglm2} in order not to have conflicts when the 42 | user loads \strong{brglm2} and \strong{brglm} simultaneously. The development 43 | and maintenance of the two packages will continue in parallel, 44 | until \strong{brglm2} incorporates all \strong{brglm} functionality and 45 | provides an appropriate wrapper to the \code{\link[brglm:brglm]{brglm::brglm()}} function. 46 | } 47 | \references{ 48 | Kosmidis I, Firth D (2021). Jeffreys-prior penalty, finiteness 49 | and shrinkage in binomial-response generalized linear 50 | models. \emph{Biometrika}, \strong{108}, 71-82. \doi{10.1093/biomet/asaa052}. 51 | 52 | Cordeiro G M, McCullagh P (1991). Bias correction in generalized 53 | linear models. \emph{Journal of the Royal Statistical Society. Series B 54 | (Methodological)}, \strong{53}, 629-643. \doi{10.1111/j.2517-6161.1991.tb01852.x}. 55 | 56 | Firth D (1993). Bias reduction of maximum likelihood estimates, 57 | Biometrika, \strong{80}, 27-38. \doi{10.2307/2336755}. 58 | 59 | Kenne Pagui E C, Salvan A, Sartori N (2017). Median bias 60 | reduction of maximum likelihood estimates. \emph{Biometrika}, \strong{104}, 61 | 923–938. \doi{10.1093/biomet/asx046}. 62 | 63 | Kosmidis I, Kenne Pagui E C, Sartori N (2020). Mean and median bias 64 | reduction in generalized linear models. \emph{Statistics and Computing}, 65 | \strong{30}, 43-59. \doi{10.1007/s11222-019-09860-6}. 66 | 67 | Kosmidis I, Firth D (2009). Bias reduction in exponential family 68 | nonlinear models. \emph{Biometrika}, \strong{96}, 793-804. \doi{10.1093/biomet/asp055}. 69 | 70 | Kosmidis I, Firth D (2010). A generic algorithm for reducing 71 | bias in parametric estimation. \emph{Electronic Journal of Statistics}, 72 | \strong{4}, 1097-1112. \doi{10.1214/10-EJS579}. 73 | 74 | Kosmidis I (2014). Bias in parametric estimation: reduction and 75 | useful side-effects. \emph{WIRE Computational Statistics}, \strong{6}, 76 | 185-196. \doi{10.1002/wics.1296}. 77 | } 78 | \seealso{ 79 | \code{\link[=brglm_fit]{brglm_fit()}}, \code{\link[=brmultinom]{brmultinom()}}, \code{\link[=bracl]{bracl()}} 80 | } 81 | \author{ 82 | Ioannis Kosmidis \verb{[aut, cre]} \email{ioannis.kosmidis@warwick.ac.uk} 83 | } 84 | -------------------------------------------------------------------------------- /man/brmultinom.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/brmultinom.R 3 | \name{brmultinom} 4 | \alias{brmultinom} 5 | \title{Bias reduction for multinomial response models using the 6 | Poisson trick.} 7 | \usage{ 8 | brmultinom( 9 | formula, 10 | data, 11 | weights, 12 | subset, 13 | na.action, 14 | contrasts = NULL, 15 | ref = 1, 16 | model = TRUE, 17 | x = TRUE, 18 | control = list(...), 19 | ... 20 | ) 21 | } 22 | \arguments{ 23 | \item{formula}{ 24 | a formula expression as for regression models, of the form 25 | \code{response ~ predictors}. The response should be a factor or a 26 | matrix with K columns, which will be interpreted as counts for each of 27 | K classes. 28 | A log-linear model is fitted, with coefficients zero for the first 29 | class. An offset can be included: it should be a numeric matrix with K columns 30 | if the response is either a matrix with K columns or a factor with K >= 2 31 | classes, or a numeric vector for a response factor with 2 levels. 32 | See the documentation of \code{\link{formula}()} for other details. 33 | } 34 | 35 | \item{data}{ 36 | an optional data frame in which to interpret the variables occurring 37 | in \code{formula}. 38 | } 39 | 40 | \item{weights}{ 41 | optional case weights in fitting. 42 | } 43 | 44 | \item{subset}{ 45 | expression saying which subset of the rows of the data should be used 46 | in the fit. All observations are included by default. 47 | } 48 | 49 | \item{na.action}{ 50 | a function to filter missing data. 51 | } 52 | 53 | \item{contrasts}{ 54 | a list of contrasts to be used for some or all of 55 | the factors appearing as variables in the model formula. 56 | } 57 | 58 | \item{ref}{the reference category to use for multinomial 59 | regression. Either an integer, in which case 60 | \code{levels(response)[ref]} is used as a baseline, or a character 61 | string. Default is 1.} 62 | 63 | \item{model}{ 64 | logical. If true, the model frame is saved as component \code{model} 65 | of the returned object. 66 | } 67 | 68 | \item{x}{should the model matrix be included with in the result 69 | (default is \code{TRUE}).} 70 | 71 | \item{control}{a list of parameters for controlling the fitting 72 | process. See \code{\link[=brglmControl]{brglmControl()}} for details.} 73 | 74 | \item{...}{arguments to be used to form the default \code{control} 75 | argument if it is not supplied directly.} 76 | } 77 | \description{ 78 | \code{\link[=brmultinom]{brmultinom()}} is a wrapper of \code{\link[=brglmFit]{brglmFit()}} that fits multinomial 79 | regression models using implicit and explicit bias reduction 80 | methods. See Kosmidis & Firth (2011) for details. 81 | } 82 | \details{ 83 | The models \code{\link[=brmultinom]{brmultinom()}} handles are also known as 84 | baseline-category logit models (see, Agresti, 2002, Section 7.1), 85 | because they model the log-odds of every category against a 86 | baseline category. The user can control which baseline (or 87 | reference) category is used via the \code{ref}. By default 88 | \code{\link[=brmultinom]{brmultinom()}} uses the first category as reference. 89 | 90 | The maximum likelihood estimates for the parameters of 91 | baseline-category logit models have infinite components with 92 | positive probability, which can result in problems in their 93 | estimation and the use of inferential procedures (e.g. Wad 94 | tests). Albert and Andreson (1984) have categorized the possible 95 | data patterns for such models into the exclusive and exhaustive 96 | categories of complete separation, quasi-complete separation and 97 | overlap, and showed that infinite maximum likelihood estimates 98 | result when complete or quasi-complete separation occurs. 99 | 100 | The adjusted score approaches to bias reduction that \code{\link[=brmultinom]{brmultinom()}} 101 | implements for \code{type = "AS_mean"} and \code{type = "AS_median"} are 102 | alternatives to maximum likelihood that result in estimates with 103 | smaller asymptotic mean and median bias, respectively, that are 104 | also \emph{always} finite, even in cases of complete or quasi-complete 105 | separation. 106 | 107 | \code{\link[=brmultinom]{brmultinom()}} is a wrapper of \code{\link[=brglmFit]{brglmFit()}} that fits multinomial 108 | logit regression models through the 'Poisson trick' (see, for 109 | example, Palmgren, 1981; Kosmidis & Firth, 2011). 110 | 111 | The implementation relies on the construction of an extended model 112 | matrix for the log-linear model and constraints on the sums of the 113 | Poisson means. Specifically, a log-linear model is fitted on a 114 | \href{https://en.wikipedia.org/wiki/Kronecker_product}{Kronecker product} of the 115 | original model matrix \code{X} implied by the formula, augmented by 116 | \code{nrow(X)} dummy variables. 117 | 118 | The extended model matrix is sparse, and the 119 | \href{https://cran.r-project.org/package=Matrix}{\pkg{Matrix}} package is 120 | used for its effective storage. 121 | 122 | While \code{\link[=brmultinom]{brmultinom()}} can be used for analyses using multinomial 123 | regression models, the current implementation is more of a proof of 124 | concept and is not expected to scale well with either of \code{nrow(X)}, 125 | \code{ncol(X)} or the number of levels in the categorical response. 126 | } 127 | \examples{ 128 | ## The housing data analysis from ?MASS::housing 129 | 130 | data("housing", package = "MASS") 131 | 132 | # Maximum likelihood using nnet::multinom 133 | houseML1nnet <- nnet::multinom(Sat ~ Infl + Type + Cont, weights = Freq, 134 | data = housing) 135 | # Maximum likelihood using brmultinom with baseline category 'Low' 136 | houseML1 <- brmultinom(Sat ~ Infl + Type + Cont, weights = Freq, 137 | data = housing, type = "ML", ref = 1) 138 | # The estimates are numerically the same as houseML0 139 | all.equal(coef(houseML1nnet), coef(houseML1), tolerance = 1e-04) 140 | 141 | # Maximum likelihood using brmultinom with 'High' as baseline 142 | houseML3 <- brmultinom(Sat ~ Infl + Type + Cont, weights = Freq, 143 | data = housing, type = "ML", ref = 3) 144 | # The fitted values are the same as houseML1 145 | all.equal(fitted(houseML3), fitted(houseML1), tolerance = 1e-10) 146 | 147 | # Bias reduction 148 | houseBR3 <- update(houseML3, type = "AS_mean") 149 | # Bias correction 150 | houseBC3 <- update(houseML3, type = "correction") 151 | 152 | ## Reproducing Bull et al. (2002, Table 3) 153 | data("hepatitis", package = "brglm2") 154 | 155 | # Construct a variable with the multinomial categories according to 156 | # the HCV and nonABC columns 157 | hepat <- hepatitis 158 | hepat$type <- with(hepat, factor(1 - HCV * nonABC + HCV + 2 * nonABC)) 159 | hepat$type <- factor(hepat$type, labels = c("noDisease", "C", "nonABC")) 160 | contrasts(hepat$type) <- contr.treatment(3, base = 1) 161 | 162 | # Maximum likelihood estimation fails to converge because some estimates are infinite 163 | hepML <- brmultinom(type ~ group * time, data = hepat, weights = counts, type = "ML", slowit = 0.1) 164 | 165 | # Mean bias reduction returns finite estimates 166 | hep_meanBR <- brmultinom(type ~ group * time, data = hepat, weights = counts, type = "AS_mean") 167 | # The estimates in Bull et al. (2002, Table 3, DOI: 10.1016/S0167-9473(01)00048-2) 168 | coef(hep_meanBR) 169 | 170 | # Median bias reduction also returns finite estimates, which are a bit larger in absolute value 171 | hep_medianBR <- brmultinom(type ~ group * time, data = hepat, weights = counts, type = "AS_median") 172 | coef(hep_medianBR) 173 | 174 | } 175 | \references{ 176 | Kosmidis I, Kenne Pagui E C, Sartori N (2020). Mean and median bias 177 | reduction in generalized linear models. \emph{Statistics and Computing}, 178 | \strong{30}, 43-59. \doi{10.1007/s11222-019-09860-6}. 179 | 180 | Agresti A (2002). \emph{Categorical data analysis} (2nd edition). Wiley 181 | Series in Probability and Statistics. Wiley. 182 | 183 | Albert A, Anderson J A (1984). On the Existence of Maximum 184 | Likelihood Estimates in Logistic Regression Models. \emph{Biometrika}, 185 | \strong{71} 1--10. \doi{10.2307/2336390}. 186 | 187 | Kosmidis I, Firth D (2011). Multinomial logit bias reduction 188 | via the Poisson log-linear model. \emph{Biometrika}, \strong{98}, 755-759. 189 | \doi{10.1093/biomet/asr026}. 190 | 191 | Palmgren, J (1981). The Fisher Information Matrix for Log Linear 192 | Models Arguing Conditionally on Observed Explanatory 193 | Variables. \emph{Biometrika}, \strong{68}, 563-566. 194 | \doi{10.1093/biomet/68.2.563}. 195 | } 196 | \seealso{ 197 | \code{\link[nnet:multinom]{nnet::multinom()}}, \code{\link[=bracl]{bracl()}} for adjacent category logit models for ordinal responses 198 | } 199 | \author{ 200 | Ioannis Kosmidis \verb{[aut, cre]} \email{ioannis.kosmidis@warwick.ac.uk} 201 | } 202 | -------------------------------------------------------------------------------- /man/coalition.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{coalition} 5 | \alias{coalition} 6 | \title{Coalition data} 7 | \format{ 8 | A data frame with 314 rows and the 7 variables "duration", 9 | "ciep12", "invest", "fract", "polar", "numst2", and "crisis". For 10 | variable descriptions, please refer to King et al (1990). 11 | } 12 | \usage{ 13 | coalition 14 | } 15 | \description{ 16 | This data set contains survival data on government coalitions in 17 | parliamentary democracies (Belgium, Canada, Denmark, Finland, 18 | France, Iceland, Ireland, Israel, Italy, Netherlands, Norway, 19 | Portugal, Spain, Sweden, and the United Kingdom) for the period 20 | 1945-1987. For parsimony, country indicator variables are omitted 21 | in the sample data. 22 | } 23 | \note{ 24 | Data is as it is provided by the 25 | \href{https://cran.r-project.org/package=Zelig}{\pkg{Zeilig}} R package. 26 | } 27 | \references{ 28 | King G, Alt J E, Burns N E, Laver M. (1990). A Unified 29 | Model of Cabinet Dissolution in Parliamentary 30 | Democracies. \emph{American Journal of Political Science}, \strong{34}, 31 | 846-870. \doi{10.2307/2111401}. 32 | 33 | King G, Alt J E, Burns N E, Laver M. ICPSR 34 | Publication Related Archive, 1115. 35 | } 36 | \seealso{ 37 | \code{\link[=brglm_fit]{brglm_fit()}} 38 | } 39 | \keyword{datasets} 40 | -------------------------------------------------------------------------------- /man/coef.brglmFit.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/brglmFit.R 3 | \name{coef.brglmFit} 4 | \alias{coef.brglmFit} 5 | \title{Extract model coefficients from \code{\link[=brglmFit]{"brglmFit"}} objects} 6 | \usage{ 7 | \method{coef}{brglmFit}(object, model = c("mean", "full", "dispersion"), ...) 8 | } 9 | \arguments{ 10 | \item{object}{an object for which the extraction of model coefficients is 11 | meaningful.} 12 | 13 | \item{model}{one of \code{"mean"} (default), \code{"dispersion"}, `"full", 14 | to return the estimates of the parameters in the linear 15 | prediction only, the estimate of the dispersion parameter only, 16 | or both, respectively.} 17 | 18 | \item{...}{other arguments.} 19 | } 20 | \description{ 21 | Extract model coefficients from \code{\link[=brglmFit]{"brglmFit"}} objects 22 | } 23 | \details{ 24 | See \code{\link[=coef]{coef()}} for more details. 25 | } 26 | \seealso{ 27 | \code{\link[=coef]{coef()}} 28 | } 29 | -------------------------------------------------------------------------------- /man/coef.brglmFit_expo.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/expo.R 3 | \name{coef.brglmFit_expo} 4 | \alias{coef.brglmFit_expo} 5 | \title{Extract estimates from \code{\link[=brglmFit_expo]{"brglmFit_expo"}} objects} 6 | \usage{ 7 | \method{coef}{brglmFit_expo}(object, ...) 8 | } 9 | \arguments{ 10 | \item{object}{an object for which the extraction of model coefficients is 11 | meaningful.} 12 | 13 | \item{...}{other arguments.} 14 | } 15 | \description{ 16 | Extract estimates from \code{\link[=brglmFit_expo]{"brglmFit_expo"}} objects 17 | } 18 | -------------------------------------------------------------------------------- /man/coef.brnb.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/brnb.R 3 | \name{coef.brnb} 4 | \alias{coef.brnb} 5 | \title{Extract model coefficients from \code{\link[=brnb]{"brnb"}} objects} 6 | \usage{ 7 | \method{coef}{brnb}(object, model = c("mean", "full", "dispersion"), ...) 8 | } 9 | \arguments{ 10 | \item{object}{an object for which the extraction of model coefficients is 11 | meaningful.} 12 | 13 | \item{model}{one of \code{"mean"} (default), \code{"full"}, \code{"dispersion"}, 14 | to return the estimates of the parameters in the linear 15 | prediction only, or both, the estimate of the dispersion 16 | parameter only, respectively.} 17 | 18 | \item{...}{other arguments.} 19 | } 20 | \description{ 21 | Extract model coefficients from \code{\link[=brnb]{"brnb"}} objects 22 | } 23 | \details{ 24 | See \code{\link[=coef]{coef()}} for more details. 25 | } 26 | -------------------------------------------------------------------------------- /man/confint.brglmFit.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/brglmFit.R 3 | \name{confint.brglmFit} 4 | \alias{confint.brglmFit} 5 | \title{Method for computing confidence intervals for one or more 6 | regression parameters in a \code{\link[=brglmFit]{"brglmFit"}} object} 7 | \usage{ 8 | \method{confint}{brglmFit}(object, parm, level = 0.95, ...) 9 | } 10 | \arguments{ 11 | \item{object}{a fitted model object.} 12 | 13 | \item{parm}{a specification of which parameters are to be given 14 | confidence intervals, either a vector of numbers or a vector of 15 | names. If missing, all parameters are considered.} 16 | 17 | \item{level}{the confidence level required.} 18 | 19 | \item{...}{additional argument(s) for methods.} 20 | } 21 | \description{ 22 | Method for computing confidence intervals for one or more 23 | regression parameters in a \code{\link[=brglmFit]{"brglmFit"}} object 24 | } 25 | -------------------------------------------------------------------------------- /man/confint.brmultinom.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/brmultinom.R 3 | \name{confint.brmultinom} 4 | \alias{confint.brmultinom} 5 | \title{Method for computing confidence intervals for one or more 6 | regression parameters in a \code{\link[=brmultinom]{"brmultinom"}} object} 7 | \usage{ 8 | \method{confint}{brmultinom}(object, parm, level = 0.95, ...) 9 | } 10 | \arguments{ 11 | \item{object}{a fitted model object.} 12 | 13 | \item{parm}{a specification of which parameters are to be given 14 | confidence intervals, either a vector of numbers or a vector of 15 | names. If missing, all parameters are considered.} 16 | 17 | \item{level}{the confidence level required.} 18 | 19 | \item{...}{additional argument(s) for methods.} 20 | } 21 | \description{ 22 | Method for computing confidence intervals for one or more 23 | regression parameters in a \code{\link[=brmultinom]{"brmultinom"}} object 24 | } 25 | -------------------------------------------------------------------------------- /man/confint.brnb.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/brnb.R 3 | \name{confint.brnb} 4 | \alias{confint.brnb} 5 | \title{Method for computing Wald confidence intervals for one or more 6 | regression parameters in a \code{\link[=brnb]{"brnb"}} object} 7 | \usage{ 8 | \method{confint}{brnb}(object, parm, level = 0.95, ...) 9 | } 10 | \arguments{ 11 | \item{object}{a fitted model object.} 12 | 13 | \item{parm}{a specification of which parameters are to be given 14 | confidence intervals, either a vector of numbers or a vector of 15 | names. If missing, all parameters are considered.} 16 | 17 | \item{level}{the confidence level required.} 18 | 19 | \item{...}{additional argument(s) for methods.} 20 | } 21 | \description{ 22 | Method for computing Wald confidence intervals for one or more 23 | regression parameters in a \code{\link[=brnb]{"brnb"}} object 24 | } 25 | -------------------------------------------------------------------------------- /man/endometrial.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{endometrial} 5 | \alias{endometrial} 6 | \title{Histology grade and risk factors for 79 cases of endometrial cancer} 7 | \format{ 8 | A data frame with 79 rows and 4 variables: 9 | \itemize{ 10 | \item \code{NV}: neovasculization with coding 0 for absent and 1 for present 11 | \item \code{PI}: pulsality index of arteria uterina 12 | \item \code{EH}: endometrium height 13 | \item \code{HG} histology grade with coding 0 for low grade and 1 for high grade 14 | } 15 | } 16 | \source{ 17 | The packaged data set was downloaded in \code{.dat} format from 18 | \url{https://users.stat.ufl.edu/~aa/glm/data/}. The latter link 19 | provides the data sets used in Agresti (2015). 20 | 21 | \if{html}{\out{
}}\preformatted{The endometrial data set was first analyzed in Heinze and 22 | Schemper (2002), and was originally provided by Dr 23 | E. Asseryanis from the Medical University of Vienna. 24 | }\if{html}{\out{
}} 25 | } 26 | \usage{ 27 | endometrial 28 | } 29 | \description{ 30 | Histology grade and risk factors for 79 cases of endometrial cancer 31 | } 32 | \references{ 33 | Agresti A (2015). \emph{Foundations of Linear and Generalized Linear 34 | Models}. Wiley Series in Probability and Statistics. Wiley. 35 | 36 | Heinze G, Schemper M (2002). A Solution to the Problem of 37 | Separation in Logistic Regression. \emph{Statistics in Medicine}, 38 | \strong{21}, 2409–2419. \doi{10.1002/sim.1047}. 39 | 40 | Kosmidis I, Firth D (2021). Jeffreys-prior penalty, finiteness 41 | and shrinkage in binomial-response generalized linear 42 | models. \emph{Biometrika}, \strong{108}, 71-82. \doi{10.1093/biomet/asaa052}. 43 | } 44 | \seealso{ 45 | \code{\link[=brglm_fit]{brglm_fit()}} 46 | } 47 | \keyword{datasets} 48 | -------------------------------------------------------------------------------- /man/enzymes.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{enzymes} 5 | \alias{enzymes} 6 | \title{Liver Enzyme Data} 7 | \format{ 8 | A data frame with 218 rows and the following 6 columns: 9 | \itemize{ 10 | \item \code{Patient}: Patient ID 11 | \item \code{Group}: Four diagnostic groups were considered: acute viral 12 | hepatitis (1), persistent chronic hepatitis (2), aggressive chronic 13 | hepatitis (3) and post-necrotic cirrhosis (4). 14 | \item \code{AST}: Aspartate aminotransferase (in U/L) 15 | \item \code{ALT}: Alanine aminotransferase (in U/L) 16 | \item \code{GLDH}: Glutamate dehydrogenase (in U/L) 17 | \item \code{OCT}: Ornithine carbamyltransferase (in U/L) 18 | } 19 | } 20 | \source{ 21 | Data from Albert and Harris (1984, Chapter 5, Appendix I), and is 22 | also provided by the 23 | \href{https://cran.r-project.org/package=pmlr}{\pkg{pmlr}} R package. 24 | } 25 | \usage{ 26 | enzymes 27 | } 28 | \description{ 29 | Liver enzyme data collected from 218 patients with liver disease 30 | (Plomteux, 1980). The laboratory profile consists of enzymatic 31 | activity measured for four liver enzymes: aspartate 32 | aminotransferase (\code{AST}), alanine aminotransferase (\code{ALT}), 33 | glutamate dehydrogenase (\code{GLDH}) and ornithine carbamyltransferase 34 | (\code{OCT}). 35 | } 36 | \references{ 37 | Albert A, Harris E K (1984). \emph{Multivariate Interpretation of 38 | Clinical Laboratory Data}. Dekker: New York. 39 | 40 | Plomteux G (1980). Multivariate analysis of an enzyme profile for 41 | the differential diagnosis of viral hepatitis. \emph{Clinical 42 | Chemistry}, \strong{26}, 1897-1899. 43 | } 44 | \keyword{datasets} 45 | -------------------------------------------------------------------------------- /man/expo.brglmFit.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/expo.R 3 | \name{expo.brglmFit} 4 | \alias{expo.brglmFit} 5 | \alias{brglmFit_expo} 6 | \alias{expo} 7 | \alias{expo.glm} 8 | \title{Estimate the exponential of parameters of generalized linear models 9 | using various methods} 10 | \usage{ 11 | \method{expo}{brglmFit}( 12 | object, 13 | type = c("correction*", "correction+", "Lylesetal2012", "AS_median", "ML"), 14 | level = 0.95 15 | ) 16 | 17 | \method{expo}{glm}( 18 | object, 19 | type = c("correction*", "correction+", "Lylesetal2012", "AS_median", "ML"), 20 | level = 0.95 21 | ) 22 | } 23 | \arguments{ 24 | \item{object}{an object of class \code{\link[=brglmFit]{"brglmFit"}} or 25 | \code{\link[=glm]{"glm"}}.} 26 | 27 | \item{type}{the type of correction to be used. The available 28 | options are \code{"correction*"} (explicit mean bias correction with 29 | a multiplicative adjustment), \code{"correction*"} (explicit mean 30 | bias correction with an additive adjustment), \code{"Lylesetal2012"} 31 | (explicit median bias correction using the multiplicative 32 | adjustment in Lyles et al., 2012), \code{"AS_median"} (median bias 33 | reduction), and \code{"ML"} (maximum likelihood). See Details.} 34 | 35 | \item{level}{the confidence level required. Default is \code{0.95}.} 36 | } 37 | \value{ 38 | a list inheriting from class \code{\link[=brglmFit_expo]{"brglmFit_expo"}} with 39 | components \code{coef} (the estimates of the exponentiated 40 | regression parameters), \code{se} (the corresponding estimated 41 | standard errors for the exponentiated parameters), \code{ci} 42 | (confidence intervals of level \code{level} for the exponentiated 43 | parameters), and \code{type} for the \code{type} of correction that has 44 | been requested. 45 | } 46 | \description{ 47 | The \code{\link[=expo]{expo()}} method uses the supplied \code{\link[=brglmFit]{"brglmFit"}} or 48 | \code{\link[=glm]{"glm"}} object to estimate the exponential of parameters of 49 | generalized linear models with maximum likelihood or various mean 50 | and median bias reduction methods. \code{\link[=expo]{expo()}} is useful for computing 51 | (corrected) estimates of the multiplicative impact of a unit 52 | increase on a covariate on the mean of a Poisson log-linear model 53 | (\code{family = poisson("log")} in \code{\link[=glm]{glm()}}) while adjusting for other 54 | covariates, the odds ratio associated with a unit increase on a 55 | covariate in a logistic regression model (\code{family = binomial("logit")} in \code{\link[=glm]{glm()}}) while adjusting for other 56 | covariates, the relative risk associated with a unit increase on a 57 | covariate in a relative risk regression model (\code{family = binomial("log")} in \code{\link[=glm]{glm()}}) while adjusting for other covariates, 58 | among others. 59 | } 60 | \details{ 61 | The supported methods through the \code{type} argument are: 62 | \itemize{ 63 | \item \code{"ML"}: the estimates of the exponentiated parameters are 64 | \eqn{\exp(\hat\theta_j)}, where \eqn{\theta_j} is the maximum 65 | likelihood estimates for the \eqn{j}th regression parameter. 66 | \item \code{"correction*"}: the estimates of the exponentiated parameters 67 | are \eqn{\exp(\hat\theta_j) / (1 + \hat{v}_j / 2)}, where 68 | \eqn{\hat\theta_j} is the estimate of the \eqn{j}th regression 69 | parameter using \code{type = "AS_mixed"} in \code{\link[=brglmFit]{brglmFit()}}. 70 | \item \code{"correction+"}: the estimates of the exponentiated parameters 71 | are \eqn{\exp(\hat\theta_j) (1 - \hat{v}_j / 2)}, where 72 | \eqn{\hat\theta_j} is the estimate of the \eqn{j}th regression 73 | parameter using \code{type = "AS_mixed"} in \code{\link[=brglmFit]{brglmFit()}}. 74 | \item \code{"Lylesetal2012"}: the estimates of the exponentiated parameters 75 | are \eqn{\exp(\hat\theta_j) exp(- \hat{v}_j / 2)}, where 76 | \eqn{\hat\theta_j} is the estimate of the \eqn{j}th regression 77 | parameter using \code{type = "AS_mixed"} in \code{\link[=brglmFit]{brglmFit()}}. This estimator 78 | has been proposed in Lyles et al. (2012). 79 | \item \code{"AS_median"}: the estimates of the exponentiated parameters are 80 | \eqn{\exp(\hat\theta_j)}, where \eqn{\hat\theta_j} is the estimate 81 | of the \eqn{j}th regression parameter using \code{type = "AS_median"} in 82 | \code{\link[=brglmFit]{brglmFit()}}. 83 | } 84 | 85 | \code{"correction*"} and \code{"correction+"} are based on multiplicative and 86 | additive adjustments, respectively, of the exponential of a 87 | reduced-bias estimator (like the ones coming from \code{\link[=brglmFit]{brglmFit()}} with 88 | \code{type = "AS_mixed"}, \code{type = "AS_mean"}, and \code{type = "correction"}). The form of those adjustments results from the 89 | expression of the first-term in the mean bias expansion of the 90 | exponential of a reduced-bias estimator. See, for example, Di 91 | Caterina & Kosmidis (2019, expression 12) for the general form of 92 | the first-term of the mean bias of a smooth transformation of a 93 | reduced-bias estimator. 94 | 95 | The estimators from \code{"correction+"}, \code{"correction*"}, 96 | \code{"Lylesetal2012"} have asymptotic mean bias of order smaller than 97 | than of the maximum likelihood estimator. The estimators from 98 | \code{"AS_median"} are asymptotically closed to being median unbiased 99 | than the maximum likelihood estimator is. 100 | 101 | Estimated standard errors are computed using the delta method, 102 | where both the Jacobin and the information matrix are evaluated at 103 | the logarithm of the estimates of the exponentiated parameters. 104 | 105 | Confidence intervals results by taking the exponential of the 106 | limits of standard Wald-type intervals computed at the logarithm of 107 | the estimates of the exponentiated parameters. 108 | } 109 | \examples{ 110 | 111 | ## The lizards example from ?brglm::brglm 112 | lizardsML <- glm(cbind(grahami, opalinus) ~ height + diameter + 113 | light + time, family = binomial(logit), data = lizards, 114 | method = "glm.fit") 115 | # Get estimates, standard errors, and confidence intervals of odds 116 | # ratios with various methods 117 | expo(lizardsML, type = "ML") 118 | expo(lizardsML, type = "correction*") 119 | expo(lizardsML, type = "Lylesetal2012") 120 | expo(lizardsML, type = "correction+") 121 | expo(lizardsML, type = "AS_median") 122 | 123 | ## Example from ?glm 124 | ## Dobson (1990) Page 93: Randomized Controlled Trial : 125 | counts <- c(18,17,15,20,10,20,25,13,12) 126 | outcome <- gl(3,1,9) 127 | treatment <- gl(3,3) 128 | glm.D93 <- glm(counts ~ outcome + treatment, family = poisson()) 129 | expo(glm.D93, type = "correction*") 130 | 131 | } 132 | \references{ 133 | Di Caterina C, Kosmidis I (2019). Location-Adjusted Wald Statistics for Scalar 134 | Parameters. \emph{Computational Statistics & Data Analysis}, \strong{138}, 135 | 126-142. \doi{10.1016/j.csda.2019.04.004}. 136 | 137 | Kosmidis I, Kenne Pagui E C, Sartori N (2020). Mean and median bias 138 | reduction in generalized linear models. \emph{Statistics and Computing}, 139 | \strong{30}, 43-59. \doi{10.1007/s11222-019-09860-6}. 140 | 141 | Cordeiro G M, McCullagh P (1991). Bias correction in generalized 142 | linear models. \emph{Journal of the Royal Statistical Society. Series B 143 | (Methodological)}, \strong{53}, 629-643. \doi{10.1111/j.2517-6161.1991.tb01852.x}. 144 | 145 | Lyles R H, Guo Y, Greenland S (2012). Reducing bias and mean 146 | squared error associated with regression-based odds ratio 147 | estimators. \emph{Journal of Statistical Planning and Inference}, 148 | \strong{142} 3235–3241. \doi{10.1016/j.jspi.2012.05.005}. 149 | } 150 | \seealso{ 151 | \code{\link[=brglm_fit]{brglm_fit()}} and and \code{\link[=brglm_control]{brglm_control()}} 152 | } 153 | \author{ 154 | Ioannis Kosmidis \verb{[aut, cre]} \email{ioannis.kosmidis@warwick.ac.uk} 155 | } 156 | -------------------------------------------------------------------------------- /man/hepatitis.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{hepatitis} 5 | \alias{hepatitis} 6 | \title{Post-transfusion hepatitis: impact of non-A, non-B hepatitis 7 | surrogate tests} 8 | \format{ 9 | A data frame with 28 rows and the following 6 columns: 10 | \itemize{ 11 | \item \code{city}: Subjects were recruited from 3 Canadian Red Cross Society 12 | Blood Centres and 13 university-affiliated hospitals in 3 cities: 13 | Toronto, Hamilton and Winnipeg. 14 | \item \code{group}: Eligible subjects were assigned to one of two allogenic 15 | blood recipient groups. One group received products that had only 16 | routine Canadian transfusion-transmissible disease marker screening 17 | (no-withhold). The other group received only products that were 18 | not positive for NANB surrogate markers (withhold). 19 | \item \code{time}: Hepatitis C (HCV) screening was introduced in Canada in 20 | May, 1990. Subjects were recruited into the study before (pre) and 21 | after (post) the introduction of anti-HCV testing. 22 | \item \code{HCV}: Post-transfusion HCV hepatitis present (1) or absent (0). 23 | \item \code{nonABC}: Post-transfusion non-A, non-B, non-C hepatitis present (1) or absent (0) 24 | \item \code{counts}: Number of subjects 25 | } 26 | } 27 | \source{ 28 | Data is from Blajchman et al. (1995), also analyzed in Bull et 29 | al. (2002), and is also provided by the 30 | \href{https://cran.r-project.org/package=pmlr}{\pkg{pmlr}} R package. 31 | } 32 | \usage{ 33 | hepatitis 34 | } 35 | \description{ 36 | Data from a randomized double-blind trial to assess whether 37 | withholding donor blood positive for the non-A, non-B (\code{"NANB"}) 38 | surrogate markers would reduce the frequency of post-transfusion 39 | hepatitis. The dataset contains \code{4588} subjects enrolled from 1988 40 | to 1992 into two study groups that received allogenic blood from 41 | which units positive for NANB surrogate markers were withheld (n = 42 | \code{2311}) or not withheld (n = \code{2277}). Subjects were followed up 43 | for 6 months and assessed for the presence of post-transfusion 44 | hepatitis. 45 | } 46 | \references{ 47 | Bull S B, Mak C, Greenwood C M T (2002). A modified score function 48 | estimator for multinomial logistic regression in small 49 | samples. \emph{Computational Statistics & Data Analysis}, \strong{39}, 50 | 57-74. \doi{10.1016/S0167-9473(01)00048-2} 51 | 52 | Blajchman M A, Bull S B and Feinman S V (1995). Post-transfusion 53 | hepatitis: impact of non-A, non-B hepatitis surrogate tests. \emph{The 54 | Lancet}, \strong{345}, 21--25. \doi{10.1016/S0140-6736(95)91153-7} 55 | } 56 | \keyword{datasets} 57 | -------------------------------------------------------------------------------- /man/lizards.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{lizards} 5 | \alias{lizards} 6 | \title{Habitat preferences of lizards} 7 | \format{ 8 | A data frame with 23 rows and 6 columns: 9 | \itemize{ 10 | \item \code{grahami}. count of grahami lizards 11 | \item \code{opalinus}. count of opalinus lizards 12 | \item \code{height}. a factor with levels \verb{<5ft}, \verb{>=5ft} 13 | \item \code{diameter}. a factor with levels \verb{<=2in}, \verb{>2in} 14 | \item \code{light}. a factor with levels \code{sunny}, \code{shady} 15 | \item \code{time}. a factor with levels \code{early}, \code{midday}, \code{late} 16 | } 17 | 18 | The variables \code{grahami} and \code{opalinus} are counts of two lizard 19 | species at two different perch heights, two different perch 20 | diameters, in sun and in shade, at three times of day. 21 | } 22 | \source{ 23 | McCullagh P, Nelder J A (1989) \emph{Generalized Linear 24 | Models} (2nd Edition). London: Chapman and Hall. 25 | 26 | Originally from 27 | 28 | \if{html}{\out{
}}\preformatted{Schoener T W (1970) Nonsynchronous spatial overlap of lizards 29 | in patchy habitats. _Ecology_ *51*, 408-418. 30 | }\if{html}{\out{
}} 31 | } 32 | \usage{ 33 | lizards 34 | } 35 | \description{ 36 | Habitat preferences of lizards 37 | } 38 | \seealso{ 39 | \code{\link[=brglm_fit]{brglm_fit()}} 40 | } 41 | \keyword{datasets} 42 | -------------------------------------------------------------------------------- /man/mis.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/mis-link.R 3 | \name{mis} 4 | \alias{mis} 5 | \title{A \code{\link[=make.link]{"link-glm"}} object for misclassified responses in binomial regression models} 6 | \usage{ 7 | mis(link = "logit", sensitivity = 1, specificity = 1) 8 | } 9 | \arguments{ 10 | \item{link}{the baseline link to be used.} 11 | 12 | \item{sensitivity}{the probability of observing a success given that a success actually took place given any covariate values.} 13 | 14 | \item{specificity}{the probability of observing a failure given that a failure actually took place given any covariate values.} 15 | } 16 | \description{ 17 | \code{\link[=mis]{mis()}} is a \code{\link[=make.link]{"link-glm"}} object that specifies the link function in Neuhaus (1999, expression (8)) for handling misclassified responses in binomial regression models using maximum likelihood. A prior specification of the sensitivity and specificity is required. 18 | } 19 | \details{ 20 | \code{sensitivity + specificity} should be greater or equal to 1, 21 | otherwise it is implied that the procedure producing the responses 22 | performs worse than chance in terms of misclassification. 23 | } 24 | \examples{ 25 | 26 | ## Define a few links with some misclassification 27 | logit_mis <- mis(link = "logit", sensitivity = 0.9, specificity = 0.9) 28 | 29 | lizards_f <- cbind(grahami, opalinus) ~ height + diameter + light + time 30 | 31 | lizardsML <- glm(lizards_f, family = binomial(logit), data = lizards) 32 | 33 | lizardsML_mis <- update(lizardsML, family = binomial(logit_mis), 34 | start = coef(lizardsML)) 35 | 36 | ## A notable change is coefficients is noted here compared to when 37 | ## specificity and sensitity are 1 38 | coef(lizardsML) 39 | coef(lizardsML_mis) 40 | 41 | ## Bias reduction is also possible 42 | update(lizardsML_mis, method = "brglmFit", type = "AS_mean", 43 | start = coef(lizardsML)) 44 | 45 | update(lizardsML_mis, method = "brglmFit", type = "AS_median", 46 | start = coef(lizardsML)) 47 | 48 | } 49 | \references{ 50 | Neuhaus J M (1999). Bias and efficiency loss due to misclassified 51 | responses in binary regression. Biometrika, \strong{86}, 843-855. 52 | \url{https://www.jstor.org/stable/2673589}. 53 | } 54 | \seealso{ 55 | \code{\link[=glm]{glm()}}, \code{\link[=brglm_fit]{brglm_fit()}} 56 | } 57 | -------------------------------------------------------------------------------- /man/ordinal_superiority.bracl.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ordinal_superiority.R 3 | \name{ordinal_superiority.bracl} 4 | \alias{ordinal_superiority.bracl} 5 | \alias{ordinal_superiority} 6 | \title{Ordinal superiority scores of Agresti and Kateri (2017)} 7 | \usage{ 8 | \method{ordinal_superiority}{bracl}( 9 | object, 10 | formula, 11 | data, 12 | measure = c("gamma", "Delta"), 13 | level = 0.95, 14 | bc = FALSE 15 | ) 16 | } 17 | \arguments{ 18 | \item{object}{a fitted object from an ordinal regression 19 | model. Currently only models from class \code{\link[=bracl]{"bracl"}} are supported.} 20 | 21 | \item{formula}{a RHS formula indicating the group variable to use.} 22 | 23 | \item{data}{an optional data frame in which to look for variables 24 | with which to compute ordinal superiority measures. If 25 | omitted, an attempt is made to use the data that produced 26 | \code{object}.} 27 | 28 | \item{measure}{either \code{"gamma"} (default) or \code{"Delta"}, specifying 29 | the ordinal superiority measure to be returned.} 30 | 31 | \item{level}{the confidence level required when computing 32 | confidence intervals for the ordinal superiority measures.} 33 | 34 | \item{bc}{logical. If \code{FALSE} (default) then the ordinal 35 | superiority measures are computed using the estimates in 36 | \code{object}. If \code{TRUE} then the ordinal superiority measure 37 | estimates are corrected for mean bias.} 38 | } 39 | \description{ 40 | \code{\link[=ordinal_superiority]{ordinal_superiority()}} is a method for the estimation and 41 | inference about model-based ordinal superiority scores introduced 42 | in Agresti and Kateri (2017, Section 5) from fitted objects. The 43 | mean bias of the estimates of the ordinal superiority scores can be 44 | corrected. 45 | } 46 | \examples{ 47 | 48 | data("stemcell", package = "brglm2") 49 | 50 | # Adjacent category logit (proportional odds) 51 | stem <- within(stemcell, {nreligion = as.numeric(religion)}) 52 | fit_bracl_p <- bracl(research ~ nreligion + gender, weights = frequency, 53 | data = stem, type = "ML", parallel = TRUE) 54 | 55 | # Estimates and 95\% confidence intervals for the probabilities that the response 56 | # category for gender "female" is higher than the response category for gender "male", 57 | # while adjusting for religion. 58 | ordinal_superiority(fit_bracl_p, ~ gender) 59 | 60 | \dontrun{ 61 | # And their (very-similar in value here) bias corrected versions 62 | # with 99\% CIs 63 | ordinal_superiority(fit_bracl_p, ~ gender, bc = TRUE, level = 0.99) 64 | # Note that the object is refitted with type = "AS_mean" 65 | 66 | } 67 | 68 | } 69 | \references{ 70 | Agresti, A., Kateri, M. (2017). Ordinal probability effect measures 71 | for group comparisons in multinomial cumulative link models. 72 | \emph{Biometrics}, \strong{73} 214-219. \doi{10.1111/biom.12565}. 73 | } 74 | -------------------------------------------------------------------------------- /man/predict.bracl.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bracl.R 3 | \name{predict.bracl} 4 | \alias{predict.bracl} 5 | \title{Predict method for \link{bracl} fits} 6 | \usage{ 7 | \method{predict}{bracl}(object, newdata, type = c("class", "probs"), ...) 8 | } 9 | \arguments{ 10 | \item{object}{a fitted object of class inheriting from \code{\link[=bracl]{"bracl"}}.} 11 | 12 | \item{newdata}{optionally, a data frame in which to look for 13 | variables with which to predict. If omitted, the fitted linear 14 | predictors are used.} 15 | 16 | \item{type}{the type of prediction required. The default is 17 | \code{"class"}, which produces predictions of the response category 18 | at the covariate values supplied in \code{"newdata"}, selecting the 19 | category with the largest probability; the alternative 20 | \code{"probs"} returns all category probabilities at the covariate 21 | values supplied in \code{newdata}.} 22 | 23 | \item{...}{further arguments passed to or from other methods.} 24 | } 25 | \value{ 26 | If \code{type = "class"} a vector with the predicted response 27 | categories; if \code{type = "probs"} a matrix of probabilities for all 28 | response categories at \code{newdata}. 29 | } 30 | \description{ 31 | Obtain class and probability predictions from a fitted adjacent 32 | category logits model. 33 | } 34 | \details{ 35 | If \code{newdata} is omitted the predictions are based on the data 36 | used for the fit. 37 | } 38 | \examples{ 39 | 40 | data("stemcell", package = "brglm2") 41 | 42 | # Adjacent category logit (non-proportional odds) 43 | fit_bracl <- bracl(research ~ as.numeric(religion) + gender, weights = frequency, 44 | data = stemcell, type = "ML") 45 | # Adjacent category logit (proportional odds) 46 | fit_bracl_p <- bracl(research ~ as.numeric(religion) + gender, weights = frequency, 47 | data = stemcell, type = "ML", parallel = TRUE) 48 | 49 | # New data 50 | newdata <- expand.grid(gender = c("male", "female"), 51 | religion = c("liberal", "moderate", "fundamentalist")) 52 | 53 | # Predictions 54 | sapply(c("class", "probs"), function(what) predict(fit_bracl, newdata, what)) 55 | sapply(c("class", "probs"), function(what) predict(fit_bracl_p, newdata, what)) 56 | 57 | } 58 | -------------------------------------------------------------------------------- /man/predict.brmultinom.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/brmultinom.R 3 | \name{predict.brmultinom} 4 | \alias{predict.brmultinom} 5 | \title{Predict method for \link{brmultinom} fits} 6 | \usage{ 7 | \method{predict}{brmultinom}(object, newdata, type = c("class", "probs"), ...) 8 | } 9 | \arguments{ 10 | \item{object}{a fitted object of class inheriting from 11 | \code{\link[=brmultinom]{"brmultinom"}}.} 12 | 13 | \item{newdata}{optionally, a data frame in which to look for 14 | variables with which to predict. If omitted, the fitted linear 15 | predictors are used.} 16 | 17 | \item{type}{the type of prediction required. The default is 18 | \code{"class"}, which produces predictions of the response category 19 | at the covariate values supplied in \code{"newdata"}, selecting the 20 | category with the largest probability; the alternative 21 | \code{"probs"} returns all category probabilities at the covariate 22 | values supplied in \code{newdata}.} 23 | 24 | \item{...}{further arguments passed to or from other methods.} 25 | } 26 | \value{ 27 | If \code{type = "class"} a vector with the predicted response 28 | categories; if \code{type = "probs"} a matrix of probabilities for all 29 | response categories at \code{newdata}. 30 | } 31 | \description{ 32 | Obtain class and probability predictions from a fitted baseline 33 | category logits model. 34 | } 35 | \details{ 36 | If \code{newdata} is omitted the predictions are based on the data used 37 | for the fit. 38 | } 39 | \examples{ 40 | 41 | data("housing", package = "MASS") 42 | 43 | # Maximum likelihood using brmultinom with baseline category 'Low' 44 | houseML1 <- brmultinom(Sat ~ Infl + Type + Cont, weights = Freq, 45 | data = housing, type = "ML", ref = 1) 46 | 47 | # New data 48 | newdata <- expand.grid(Infl = c("Low", "Medium"), 49 | Type = c("Tower", "Atrium", "Terrace"), 50 | Cont = c("Low", NA, "High")) 51 | 52 | ## Predictions 53 | sapply(c("class", "probs"), function(what) predict(houseML1, newdata, what)) 54 | 55 | } 56 | -------------------------------------------------------------------------------- /man/residuals.brmultinom.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/brmultinom.R, R/zzz_conventions.R 3 | \name{residuals.brmultinom} 4 | \alias{residuals.brmultinom} 5 | \alias{residuals.bracl} 6 | \title{Residuals for multinomial logistic regression and adjacent category logit models} 7 | \usage{ 8 | \method{residuals}{brmultinom}(object, type = c("pearson", "response", "deviance", "working"), ...) 9 | 10 | \method{residuals}{bracl}(object, type = c("pearson", "response", "deviance", "working"), ...) 11 | } 12 | \arguments{ 13 | \item{object}{the object coming out of \code{\link[=bracl]{bracl()}} and 14 | \code{\link[=brmultinom]{brmultinom()}}.} 15 | 16 | \item{type}{the type of residuals which should be returned. The 17 | options are: \code{"pearson"} (default), \code{"response"}, \code{"deviance"}, 18 | \code{"working"}. See Details.} 19 | 20 | \item{...}{Currently not used.} 21 | } 22 | \description{ 23 | Residuals for multinomial logistic regression and adjacent category logit models 24 | } 25 | \details{ 26 | The residuals computed are the residuals from the equivalent 27 | Poisson log-linear model fit, organized in a form that matches the 28 | output of \code{fitted(object, type = "probs")}. As a result, the output 29 | is residuals defined in terms of the object and expected 30 | multinomial counts. 31 | } 32 | \seealso{ 33 | brmultinom bracl 34 | } 35 | -------------------------------------------------------------------------------- /man/simulate.brmultinom.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/brmultinom.R 3 | \name{simulate.brmultinom} 4 | \alias{simulate.brmultinom} 5 | \title{Method for simulating a data set from \code{\link[=brmultinom]{"brmultinom"}} and \code{\link[=bracl]{"bracl"}} 6 | objects} 7 | \usage{ 8 | \method{simulate}{brmultinom}(object, ...) 9 | } 10 | \arguments{ 11 | \item{object}{an object of class \code{\link[=brmultinom]{"brmultinom"}} or \code{\link[=bracl]{"bracl"}}.} 12 | 13 | \item{...}{currently not used.} 14 | } 15 | \value{ 16 | A \code{\link[=data.frame]{"data.frame"}} with \code{object$ncat} times the rows that 17 | \code{model.frame(object)} have and the same variables. If \code{weights} has 18 | been specified in the call that generated \code{object}, then the 19 | simulate frequencies will populate the weights variable. Otherwise, 20 | the resulting \link{data.frame} will have a \code{".weights"} variable with 21 | the simulated multinomial counts. 22 | } 23 | \description{ 24 | Method for simulating a data set from \code{\link[=brmultinom]{"brmultinom"}} and \code{\link[=bracl]{"bracl"}} 25 | objects 26 | } 27 | \examples{ 28 | 29 | ## Multinomial logistic regression 30 | data("housing", package = "MASS") 31 | houseML1 <- brmultinom(Sat ~ Infl + Type + Cont, weights = Freq, 32 | data = housing, type = "ML", ref = 1) 33 | simulate(houseML1) 34 | 35 | ## Adjacent-category logits 36 | data("stemcell", package = "brglm2") 37 | stemML1 <- bracl(research ~ religion + gender, weights = frequency, 38 | data = stemcell, type = "ML") 39 | 40 | simulate(stemML1) 41 | 42 | } 43 | -------------------------------------------------------------------------------- /man/simulate.brnb.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/brnb.R 3 | \name{simulate.brnb} 4 | \alias{simulate.brnb} 5 | \title{Simulate Responses} 6 | \usage{ 7 | \method{simulate}{brnb}(object, nsim = 1, seed = NULL, ...) 8 | } 9 | \arguments{ 10 | \item{object}{an object representing a fitted model.} 11 | 12 | \item{nsim}{number of response vectors to simulate. Defaults to 1.} 13 | 14 | \item{seed}{an object specifying if and how the random number 15 | generator should be initialized; see \code{\link[=set.seed]{set.seed()}} for details.} 16 | 17 | \item{...}{extra arguments to be passed to methods. Not currently 18 | used.} 19 | } 20 | \description{ 21 | Simulate one or more responses from the distribution corresponding 22 | to a fitted model \code{\link[=brnb]{"brnb"}} object. 23 | } 24 | \examples{ 25 | # Example in Saha, K., & Paul, S. (2005). Bias-corrected maximum 26 | # likelihood estimator of the negative binomial dispersion 27 | # parameter. Biometrics, 61, 179--185. 28 | # 29 | # Frequency distribution of red mites on apple leaves. 30 | nomites <- 0:8 31 | noleaves <- c(70, 38, 17, 10, 9, 3, 2, 1, 0) 32 | fit_glmnb <- MASS::glm.nb(nomites~1,link="identity",weights = noleaves) 33 | fit_brnb <- brnb(nomites ~ 1, link = "identity", transformation = "inverse", 34 | type = "ML",weights = noleaves) 35 | ## Let us simulate 10 response vectors 36 | sim_glmnb <- simulate(fit_glmnb, nsim = 10, seed = 123) 37 | sim_brnb <- simulate(fit_brnb, nsim = 10, seed = 123) 38 | # The results from glm.nb and brnb with type = "ML" are 39 | # exactly the same 40 | all.equal(sim_glmnb, sim_brnb, check.attributes = FALSE) 41 | } 42 | -------------------------------------------------------------------------------- /man/stemcell.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{stemcell} 5 | \alias{stemcell} 6 | \title{Opinion on Stem Cell Research and Religious Fundamentalism} 7 | \format{ 8 | A data frame with 24 rows and 4 variables: 9 | \itemize{ 10 | \item \code{research}: opinion about funding stem cell research with levels \code{definitely}, \code{probably}, \verb{probably not}, \verb{definitely not} 11 | \item \code{gender}: the gender of the respondent with levels \code{female} and \code{male} 12 | \item \code{religion}: the fundamentalism/liberalism of one’s religious beliefs with levels \code{fundamentalist}, \code{moderate}, 13 | \code{liberal} 14 | \code{frequency}: the number of times a respondent fell in each of the combinations of levels for \code{research}, \code{religion} and \code{gender} 15 | } 16 | } 17 | \source{ 18 | The \code{stemcell} data set is analyzed in Agresti (2010, Subsection 4.1.5). 19 | } 20 | \usage{ 21 | stemcell 22 | } 23 | \description{ 24 | A data set from the 2006 General Social Survey that shows the 25 | relationship in the United States between opinion about funding 26 | stem cell research and the fundamentalism/liberalism of one’s 27 | religious beliefs, stratified by gender. 28 | } 29 | \references{ 30 | Agresti A (2010). \emph{Analysis of Ordinal Categorical Data} (2nd edition). Wiley Series in 31 | Probability and Statistics. Wiley. 32 | } 33 | \seealso{ 34 | \code{\link[=bracl]{bracl()}} 35 | } 36 | \keyword{datasets} 37 | -------------------------------------------------------------------------------- /man/summary.brglmFit.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/brglmFit.R 3 | \name{summary.brglmFit} 4 | \alias{summary.brglmFit} 5 | \alias{print.summary.brglmFit} 6 | \title{\code{\link[=summary]{summary()}} method for \link{brglmFit} objects} 7 | \usage{ 8 | \method{summary}{brglmFit}( 9 | object, 10 | dispersion = NULL, 11 | correlation = FALSE, 12 | symbolic.cor = FALSE, 13 | ... 14 | ) 15 | 16 | \method{print}{summary.brglmFit}( 17 | x, 18 | digits = max(3L, getOption("digits") - 3L), 19 | symbolic.cor = x$symbolic.cor, 20 | signif.stars = getOption("show.signif.stars"), 21 | ... 22 | ) 23 | } 24 | \arguments{ 25 | \item{object}{an object of class \code{"glm"}, usually, a result of a 26 | call to \code{\link[stats]{glm}}.} 27 | 28 | \item{dispersion}{the dispersion parameter for the family used. 29 | Either a single numerical value or \code{NULL} (the default), when 30 | it is inferred from \code{object} (see \sQuote{Details}).} 31 | 32 | \item{correlation}{logical; if \code{TRUE}, the correlation matrix of 33 | the estimated parameters is returned and printed.} 34 | 35 | \item{symbolic.cor}{logical. If \code{TRUE}, print the correlations in 36 | a symbolic form (see \code{\link[stats]{symnum}}) rather than as numbers.} 37 | 38 | \item{...}{further arguments passed to or from other methods.} 39 | 40 | \item{x}{an object of class \code{"summary.glm"}, usually, a result of a 41 | call to \code{summary.glm}.} 42 | 43 | \item{digits}{the number of significant digits to use when printing.} 44 | 45 | \item{signif.stars}{logical. If \code{TRUE}, \sQuote{significance stars} 46 | are printed for each coefficient.} 47 | } 48 | \description{ 49 | \code{\link[=summary]{summary()}} method for \link{brglmFit} objects 50 | } 51 | \details{ 52 | The interface of the summary method for \code{\link[=brglmFit]{"brglmFit"}} 53 | objects is identical to that of \code{\link[=glm]{"glm"}} objects. The summary 54 | method for \code{\link[=brglmFit]{"brglmFit"}} objects computes the p-values of the 55 | individual Wald statistics based on the standard normal 56 | distribution, unless the family is Gaussian, in which case a t 57 | distribution with appropriate degrees of freedom is used. 58 | } 59 | \examples{ 60 | ## For examples see `examples(brglmFit)` 61 | 62 | } 63 | \seealso{ 64 | \code{\link[=summary.glm]{summary.glm()}} and \code{\link[=glm]{glm()}} 65 | } 66 | -------------------------------------------------------------------------------- /man/summary.brnb.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/brnb.R 3 | \name{summary.brnb} 4 | \alias{summary.brnb} 5 | \alias{print.summary.brnb} 6 | \title{\code{\link[=summary]{summary()}} method for \code{\link[=brnb]{"brnb"}} objects} 7 | \usage{ 8 | \method{summary}{brnb}(object, ...) 9 | 10 | \method{print}{summary.brnb}(x, digits = max(3, getOption("digits") - 3), ...) 11 | } 12 | \arguments{ 13 | \item{object}{an object of class \code{\link[=brnb]{"brnb"}}, typically, a 14 | result of a call to \code{\link[=brnb]{brnb()}}.} 15 | 16 | \item{...}{further arguments passed to or from other methods.} 17 | 18 | \item{x}{an object of class \code{\link[=summary.brnb]{"summary.brnb"}}, 19 | usually, a result of a call to \link{summary.brnb}.} 20 | 21 | \item{digits}{the number of significant digits to use when printing.} 22 | } 23 | \description{ 24 | \code{\link[=summary]{summary()}} method for \code{\link[=brnb]{"brnb"}} objects 25 | } 26 | \details{ 27 | The interface of the summary method for \code{\link[=brnb]{"brnb"}} 28 | objects is similar to that of \code{\link[=brglmFit]{"brglmFit"}} objects 29 | with additional information. 30 | 31 | \if{html}{\out{
}}\preformatted{p-values of the individual Wald statistics are based on the 32 | standard normal distribution. 33 | }\if{html}{\out{
}} 34 | } 35 | \examples{ 36 | # For examples see examples(brnb) 37 | 38 | } 39 | \seealso{ 40 | \code{\link[=summary.brglmFit]{summary.brglmFit()}} and \code{\link[=glm]{glm()}} 41 | } 42 | -------------------------------------------------------------------------------- /man/vcov.brglmFit.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/brglmFit.R 3 | \name{vcov.brglmFit} 4 | \alias{vcov.brglmFit} 5 | \title{Return the variance-covariance matrix for the regression parameters 6 | in a \code{\link[=brglmFit]{brglmFit()}} object} 7 | \usage{ 8 | \method{vcov}{brglmFit}(object, model = c("mean", "full", "dispersion"), complete = TRUE, ...) 9 | } 10 | \arguments{ 11 | \item{object}{ 12 | a fitted model object, typically. Sometimes also a 13 | \code{\link{summary}()} object of such a fitted model. 14 | } 15 | 16 | \item{model}{character specifying for which component of the model coefficients should be extracted.} 17 | 18 | \item{complete}{for the \code{aov}, \code{lm}, \code{glm}, \code{mlm}, and where 19 | applicable \code{summary.lm} etc methods: logical indicating if the 20 | full variance-covariance matrix should be returned also in case of 21 | an over-determined system where some coefficients are undefined and 22 | \code{\link[stats]{coef}(.)} contains \code{NA}s correspondingly. When 23 | \code{complete = TRUE}, \code{vcov()} is compatible with 24 | \code{coef()} also in this singular case.} 25 | 26 | \item{...}{ 27 | additional arguments for method functions. For the 28 | \code{\link[stats]{glm}} method this can be used to pass a 29 | \code{dispersion} parameter.} 30 | } 31 | \description{ 32 | Return the variance-covariance matrix for the regression parameters 33 | in a \code{\link[=brglmFit]{brglmFit()}} object 34 | } 35 | \details{ 36 | The options for \code{model} are \code{"mean"} for mean regression parameters 37 | only (default), \code{"dispersion"} for the dispersion parameter (or the 38 | transformed dispersion; see \code{\link[=brglm_control]{brglm_control()}}), and \code{"full"} for 39 | both the mean regression and the (transformed) dispersion 40 | parameters. 41 | } 42 | -------------------------------------------------------------------------------- /man/vcov.brnb.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/brnb.R 3 | \name{vcov.brnb} 4 | \alias{vcov.brnb} 5 | \title{Extract model variance-covariance matrix from \code{\link[=brnb]{"brnb"}} objects} 6 | \usage{ 7 | \method{vcov}{brnb}(object, model = c("mean", "full", "dispersion"), complete = TRUE, ...) 8 | } 9 | \arguments{ 10 | \item{object}{an object of class \code{\link[=brnb]{"brnb"}}, typically, a result of a call to \code{\link[=brnb]{brnb()}}.} 11 | 12 | \item{model}{character specifying for which component of the model variance-covariance matrix should be extracted.} 13 | 14 | \item{complete}{for the \code{aov}, \code{lm}, \code{glm}, \code{mlm}, and where 15 | applicable \code{summary.lm} etc methods: logical indicating if the 16 | full variance-covariance matrix should be returned also in case of 17 | an over-determined system where some coefficients are undefined and 18 | \code{\link[stats]{coef}(.)} contains \code{NA}s correspondingly. When 19 | \code{complete = TRUE}, \code{vcov()} is compatible with 20 | \code{coef()} also in this singular case.} 21 | 22 | \item{...}{ 23 | additional arguments for method functions. For the 24 | \code{\link[stats]{glm}} method this can be used to pass a 25 | \code{dispersion} parameter.} 26 | } 27 | \description{ 28 | Extract model variance-covariance matrix from \code{\link[=brnb]{"brnb"}} objects 29 | } 30 | \details{ 31 | The options for \code{model} are \code{"mean"} for mean regression only 32 | (default), \code{"dispersion"} for the dispersion parameter (in a 33 | chosen transformation; see \code{\link[=brglmControl]{brglmControl()}}, and \code{"full"} for both 34 | the mean regression and the (transformed) dispersion parameters. 35 | See \code{\link[=vcov]{vcov()}} for more details. 36 | } 37 | \seealso{ 38 | \code{\link[=vcov]{vcov()}} 39 | } 40 | -------------------------------------------------------------------------------- /src/expectations.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | void expectedValues(double *mu, 6 | double *k, 7 | int *ymax, 8 | int *n, 9 | double *Es2, 10 | double *Es2y, 11 | double *Es1s2, 12 | double *Es3) { 13 | 14 | 15 | int r, s; 16 | double temp1, temp2, temp3, temp4, dens; 17 | double tempa, tempb, tempc; 18 | for(r = 0; r < *n; r++) { 19 | temp1 = 0.0; 20 | temp2 = 0.0; 21 | temp3 = 0.0; 22 | temp4 = 0.0; 23 | tempa = 0.0; 24 | tempb = 0.0; 25 | tempc = 0.0; 26 | for(s = 0; s <= *ymax; s++) { 27 | if (s > 1 ) { 28 | tempa += (s-1) / (*k * (s-1) + 1); 29 | tempb += R_pow_di((s-1), 2) / R_pow_di((*k * (s-1) + 1), 2); 30 | tempc += R_pow_di((s-1), 3) / R_pow_di((*k * (s-1) + 1), 3); 31 | } 32 | dens = dnbinom_mu(s, 1/k[0], mu[r], 0); 33 | temp1 += dens*tempa*tempb; 34 | temp2 += dens*tempb; 35 | temp3 += dens*tempc; 36 | temp4 += dens*tempb*s; 37 | } 38 | Es2[r] = temp2; 39 | Es2y[r] = temp4; 40 | Es3[r] = temp3; 41 | Es1s2[r] = temp1; 42 | } 43 | } 44 | 45 | 46 | -------------------------------------------------------------------------------- /src/hats.c: -------------------------------------------------------------------------------- 1 | void hatsc (int* n, int* p, double* x, double* invfisherinf, double* w, double* hat) 2 | { 3 | int i, j, o, pos1, pos2, pos3; 4 | double summi ; /*summ[*n],*/ 5 | for (o=0;o<*n;o++) { 6 | /*summ[o] = 0 ;*/ 7 | summi = 0; 8 | pos1 = o * *p ; 9 | for (i=0;i<*p;i++) { 10 | pos2 = i * *p; 11 | pos3 = pos1 + i; 12 | for (j=0;j<*p;j++) { 13 | /*summ[o] += x[o * *p + i]*x[o * *p + j]*invfisherinf[j + *p *i]; */ 14 | /*summi += x[o * *p + i]*x[o * *p + j]*invfisherinf[j + *p *i];*/ 15 | summi += x[pos3]*x[pos1 + j]*invfisherinf[j + pos2]; 16 | } 17 | } 18 | /*hat[o] = summ[o] * w[o];*/ 19 | hat[o] = summi * w[o]; 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/init.c: -------------------------------------------------------------------------------- 1 | #include // for NULL 2 | #include 3 | 4 | /* .C calls */ 5 | extern void expectedValues(void *, void *, void *, void *, void *, void *, void *, void *); 6 | 7 | static const R_CMethodDef CEntries[] = { 8 | {"expectedValues", (DL_FUNC) &expectedValues, 8}, 9 | {NULL, NULL, 0} 10 | }; 11 | 12 | void R_init_brglm2(DllInfo *dll) 13 | { 14 | R_registerRoutines(dll, CEntries, NULL, NULL, NULL); 15 | R_useDynamicSymbols(dll, FALSE); 16 | } 17 | -------------------------------------------------------------------------------- /tests/tinytest.R: -------------------------------------------------------------------------------- 1 | 2 | if ( requireNamespace("tinytest", quietly=TRUE) ){ 3 | tinytest::test_package("brglm2") 4 | } 5 | 6 | -------------------------------------------------------------------------------- /vignettes/adjacent.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Adjacent category logit models using **brglm2**" 3 | author: "[Ioannis Kosmidis](https://www.ikosmidis.com)" 4 | date: "05 February 2019" 5 | output: rmarkdown::html_vignette 6 | bibliography: brglm2.bib 7 | nocite: | 8 | @kosmidis:2019 9 | vignette: > 10 | %\VignetteIndexEntry{Adjacent category logit models using **brglm2**} 11 | %\VignetteEngine{knitr::rmarkdown} 12 | %\VignetteEncoding{UTF-8} 13 | --- 14 | 15 | ```{r setup, include = FALSE} 16 | knitr::opts_chunk$set( 17 | collapse = TRUE, 18 | comment = "#>", 19 | fig.width = 6, 20 | fig.height = 6 21 | ) 22 | ``` 23 | 24 | # **bracl** 25 | The [**brglm2**](https://github.com/ikosmidis/brglm2) R package provides `bracl()` which is a wrapper of `brglmFit()` for fitting adjacent category models for ordinal responses using either maximum likelihood or maximum penalized likelihood or any of the various bias reduction methods described in `brglmFit()`. There is a formal equivalence between adjacent category logit models for ordinal responses and multinomial logistic regression models (see, e.g. the [Multinomial logistic regression using brglm2](https://cran.r-project.org/package=brglm2/vignettes/multinomial.html) vignette and the `brmultinom()` function). `bracl()` utilizes that equivalence and fits the corresponding Poisson log-linear model, by appropriately re-scaling the Poisson means to match the multinomial totals (a.k.a. the "Poisson trick"). The mathematical details and algorithm on using the Poisson trick for mean-bias reduction are given in @kosmidis:11. 26 | 27 | 28 | 29 | # Citation 30 | If you found this vignette or **brglm2**, in general, useful, please consider citing **brglm2** and the associated paper. You can find information on how to do this by typing `citation("brglm2")`. 31 | 32 | # Opinion on stem cell research and religious fundamentalism 33 | 34 | The `stemcell` data set ships with **brglm2**. @agresti:15[, Section 4.1] provides a detailed description of the variables recorded in this data set (see also `?stemcell`). 35 | ```{r, echo = TRUE} 36 | library("brglm2") 37 | data("stemcell", package = "brglm2") 38 | stem <- within(stemcell, religion <- as.numeric(religion)) 39 | ``` 40 | ## Maximum likelihood estimation 41 | The following chunk of code fits an adjacent category logit model with proportional odds and reproduces @agresti:10[, Table 4.2]. Note that the intercept parameters are different because @agresti:10[, Table 4.2] uses different contrasts for the intercept parameters. 42 | ```{r, echo = TRUE} 43 | stem_formula <- research ~ religion + gender 44 | stemcells_ml <- bracl(stem_formula, weights = frequency, data = stem, 45 | parallel = TRUE, type = "ML") 46 | summary(stemcells_ml) 47 | ``` 48 | `stemcells_ml` is an object inheriting from 49 | ```{r, echo = TRUE} 50 | class(stemcells_ml) 51 | ``` 52 | **brglm2** implements `print`, `coef`, `fitted`, `predict`, `summary`, `vcov` and `logLik` methods for 53 | 54 | 55 | We can check if a model with non-proportional odds fits the data equally well by fitting it and carrying out a likelihood ration test. 56 | ```{r, echo = TRUE} 57 | stemcells_ml_full <- bracl(stem_formula, weights = frequency, data = stemcell, 58 | parallel = FALSE, type = "ML") 59 | summary(stemcells_ml_full) 60 | ``` 61 | The value of the log likelihood ratio statistic here is 62 | ```{r, echo = TRUE} 63 | (lrt <- deviance(stemcells_ml) - deviance(stemcells_ml_full)) 64 | ``` 65 | and has an asymptotic chi-squared distribution with 66 | ```{r, echo = TRUE} 67 | (df1 <- df.residual(stemcells_ml) - df.residual(stemcells_ml_full)) 68 | ``` 69 | The p-value from testing the hypothesis that `stemcells_ml_full` is an as good fit as `stemcells_ml` is 70 | ```{r, echo = TRUE} 71 | pchisq(lrt, df1, lower.tail = FALSE) 72 | ``` 73 | hence, the simpler model is found to be as adequate as the full model is. 74 | 75 | # Mean and median bias reduction 76 | We can use `bracl()` to fit the adjacent category model using estimators with smaller mean or median bias. For mean bias reduction we do 77 | ```{r, echo = TRUE} 78 | summary(update(stemcells_ml, type = "AS_mean")) 79 | ``` 80 | and for median 81 | ```{r, echo = TRUE} 82 | summary(update(stemcells_ml, type = "AS_median")) 83 | ``` 84 | The estimates from mean and median bias reduction are similar to the maximum likelihood ones, indicating that estimation bias is not a major issue here. 85 | 86 | # Prediction 87 | We can predict the category probabilities using the `predict()` method 88 | ```{r, echo = TRUE} 89 | predict(stemcells_ml, type = "probs") 90 | ``` 91 | 92 | # Relevant resources 93 | `?brglmFit` and `?brglm_control` provide descriptions of the various bias reduction methods supported in **brglm2**. The [`iteration`](https://cran.r-project.org/package=brglm2/brglm2.pdf) vignette describes the iteration and gives the mathematical details for the bias-reducing adjustments to the score functions for generalized linear models. 94 | 95 | 96 | # Citation 97 | If you found this vignette or **brglm2**, in general, useful, please consider citing **brglm2** and the associated paper. You can find information on how to do this by typing `citation("brglm2")`. 98 | 99 | 100 | # References 101 | -------------------------------------------------------------------------------- /vignettes/brglm2.bib: -------------------------------------------------------------------------------- 1 | @Article{kosmidis:2019, 2 | title = {Mean and median bias reduction in generalized linear models}, 3 | author = {Ioannis Kosmidis and Euloge Clovis {Kenne Pagui} and Nicola Sartori}, 4 | year = 2020, 5 | journal = {Statistics and Computing}, 6 | volume = 30, 7 | pages = {43--59}, 8 | url = {https://doi.org/10.1007/s11222-019-09860-6}, 9 | } 10 | 11 | @Book{agresti:02, 12 | Title = {Categorical Data Analysis}, 13 | Author = {Agresti, A.}, 14 | Publisher = {Wiley}, 15 | Year = 2002 16 | } 17 | 18 | @Book{agresti:15, 19 | title = {Foundations of Linear and Generalized Linear Models}, 20 | publisher = {Wiley}, 21 | year = 2015, 22 | author = {Agresti, A.}, 23 | series = {Wiley Series in Probability and Statistics} 24 | } 25 | 26 | @Book{agresti:10, 27 | title = {Analysis of Ordinal Categorical Data}, 28 | publisher = {Wiley}, 29 | edition = {2nd}, 30 | year = 2010, 31 | author = {Agresti, A.}, 32 | series = {Wiley Series in Probability and Statistics} 33 | } 34 | 35 | @Article{albert:84, 36 | title = {On the existence of maximum likelihood estimates in 37 | logistic regression models}, 38 | author = {Albert, A. and Anderson, J. A.}, 39 | journal = {Biometrika}, 40 | year = 1984, 41 | number = 1, 42 | pages = {1--10}, 43 | volume = 71 44 | } 45 | 46 | @Article{cordeiro:91, 47 | Title = {Bias correction in generalized linear models}, 48 | Author = {Cordeiro, G.~M. and McCullagh, P.}, 49 | Journal = {Journal of the Royal Statistical Society, Series B: 50 | Methodological}, 51 | Year = 1991, 52 | Number = 3, 53 | Pages = {629--643}, 54 | Volume = 53 55 | } 56 | 57 | @Article{cox:68, 58 | Title = {A General Definition of Residuals (with Discussion)}, 59 | Author = {Cox, D. R. and Snell, E. J.}, 60 | Journal = {Journal of the Royal Statistical Society, Series B: 61 | Methodological}, 62 | Year = 1968, 63 | Pages = {248--275}, 64 | Volume = 30 65 | } 66 | 67 | @Article{firth:93, 68 | Title = {Bias Reduction of maximum likelihood estimates}, 69 | Author = {Firth, D.}, 70 | Journal = Biometrika, 71 | Year = 1993, 72 | Number = 1, 73 | Pages = {27--38}, 74 | Volume = 80 75 | } 76 | 77 | @Article{heinze:02, 78 | Title = {A solution to the problem of separation in logistic 79 | regression}, 80 | Author = {Heinze, G. and Schemper, M.}, 81 | Journal = {Statistics in Medicine}, 82 | Year = 2002, 83 | Pages = {2409--2419}, 84 | Volume = 21 85 | } 86 | 87 | @Article{kenne:17, 88 | title = {Median bias reduction of maximum likelihood 89 | estimates}, 90 | author = {Kenne Pagui, E. C. and Salvan, A. and Sartori, N.}, 91 | journal = {Biometrika}, 92 | volume = 104, 93 | issue = 4, 94 | pages = {923-938}, 95 | year = 2017, 96 | Url = {https://doi.org/10.1093/biomet/asx046} 97 | } 98 | 99 | @PhdThesis{konis:07, 100 | author = {Kjell Konis}, 101 | title = {Linear programming algorithms for detecting 102 | separated data in binary logistic regression models}, 103 | school = {University of Oxford}, 104 | year = 2007, 105 | type = {DPhil}, 106 | url = 107 | {https://ora.ox.ac.uk/objects/uuid:8f9ee0d0-d78e-4101-9ab4-f9cbceed2a2a} 108 | } 109 | 110 | @PhdThesis{kosmidis:07, 111 | Title = {Bias reduction in exponential family nonlinear 112 | models}, 113 | Author = {Kosmidis, I.}, 114 | School = {Department of Statistics, University of Warwick}, 115 | Year = 2007, 116 | Url = 117 | {http://www.ucl.ac.uk/~ucakiko/files/ikosmidis_thesis.pdf} 118 | } 119 | 120 | @Article{kosmidis:09, 121 | Title = {Bias reduction in exponential family nonlinear 122 | models}, 123 | Author = {Kosmidis, I. and Firth, D.}, 124 | Journal = {Biometrika}, 125 | Year = 2009, 126 | Number = 4, 127 | Pages = {793--804}, 128 | Volume = 96, 129 | Doi = {10.1093/biomet/asp055}, 130 | Eprint = 131 | {http://biomet.oxfordjournals.org/cgi/reprint/96/4/793.pdf}, 132 | Url = 133 | {http://biomet.oxfordjournals.org/cgi/content/abstract/96/4/793} 134 | } 135 | 136 | @Article{kosmidis:10, 137 | Title = {A generic algorithm for reducing bias in parametric 138 | estimation}, 139 | Author = {Kosmidis, I. and Firth, D.}, 140 | Journal = {Electronic Journal of Statistics}, 141 | Year = 2010, 142 | Pages = {1097--1112}, 143 | Volume = 4, 144 | Doi = {10.1214/10-EJS579}, 145 | Url = {https://dx.doi.org/10.1214/10-EJS579} 146 | } 147 | 148 | @Article{kosmidis:11, 149 | Title = {Multinomial logit bias reduction via the Poisson 150 | log-linear model}, 151 | Author = {Kosmidis, I. and Firth, D.}, 152 | Journal = {Biometrika}, 153 | Year = 2011, 154 | Number = 3, 155 | Pages = {755-759}, 156 | Volume = 98, 157 | Owner = {yiannis}, 158 | Timestamp = {2010.05.16} 159 | } 160 | 161 | @Article{kosmidis:14, 162 | Title = {Bias in parametric estimation: reduction and useful 163 | side-effects}, 164 | Author = {Kosmidis, I.}, 165 | Journal = {Wiley Interdisciplinary Reviews: Computational 166 | Statistics}, 167 | Year = 2014, 168 | Number = 3, 169 | Pages = {185--196}, 170 | Volume = 6, 171 | Doi = {10.1002/wics.1296}, 172 | ISSN = {1939-0068}, 173 | Keywords = {jackknife/bootstrap, indirect inference, penalized 174 | likelihood, infinite estimates, separation in models 175 | with categorical responses}, 176 | Publisher = {John Wiley \& Sons, Inc.}, 177 | Url = {https://doi.org/10.1002/wics.1296} 178 | } 179 | 180 | @Article{kosmidis:14a, 181 | Title = {Improved estimation in cumulative link models}, 182 | Author = {Kosmidis, I.}, 183 | Journal = {Journal of the Royal Statistical Society, Series B: 184 | Methodological}, 185 | Year = 2014, 186 | Number = 1, 187 | Pages = {169--196}, 188 | Volume = 76, 189 | Doi = {10.1111/rssb.12025}, 190 | ISSN = {1467-9868}, 191 | Keywords = {Adjusted counts, Adjusted score equations, Ordinal 192 | response models, Reduction of bias, Shrinkage}, 193 | Url = {https://doi.org/10.1111/rssb.12025} 194 | } 195 | 196 | @article{lesaffre:89, 197 | URL = {https://www.jstor.org/stable/2345845}, 198 | author = {E. Lesaffre and A. Albert}, 199 | journal = {Journal of the Royal Statistical Society. Series B 200 | (Methodological)}, 201 | number = 1, 202 | pages = {109-116}, 203 | publisher = {[Royal Statistical Society, Wiley]}, 204 | title = {Partial Separation in Logistic Discrimination}, 205 | volume = 51, 206 | year = 1989 207 | } 208 | 209 | @book{mccullagh:89, 210 | author = "McCullagh, P. and Nelder, J. A.", 211 | year = 1989, 212 | edition = {2nd}, 213 | title = "Generalized Linear Models", 214 | address = "London", 215 | publisher = "Chapman and Hall" 216 | } 217 | 218 | @article{kenne:20, 219 | Author = {{Kenne Pagui}, E. C. and Salvan, A. and Sartori, N.}, 220 | Journal = {eprint arXiv:2011.02784}, 221 | Title = {Accurate inference in negative binomial regression}, 222 | Url = {https://arxiv.org/abs/2011.02784}, 223 | Year = 2020 224 | } 225 | 226 | @article{magolin:89, 227 | author = {Barry H. Margolin and Byung Soo Kim and Kenneth J. Risko}, 228 | title = {The Ames Salmonella/Microsome Mutagenicity Assay: 229 | Issues of Inference and Validation}, 230 | journal = {Journal of the American Statistical Association}, 231 | volume = 84, 232 | number = 407, 233 | pages = {651-661}, 234 | year = 1989, 235 | doi = {10.1080/01621459.1989.10478817} 236 | } 237 | 238 | @article{kosmidis+firth:21, 239 | author = {Kosmidis, Ioannis and Firth, David}, 240 | title = "{Jeffreys-prior penalty, finiteness and shrinkage in binomial-response generalized linear models}", 241 | journal = {Biometrika}, 242 | volume = {108}, 243 | number = {1}, 244 | pages = {71-82}, 245 | year = {2020}, 246 | url = {https://doi.org/10.1093/biomet/asaa052} 247 | } 248 | -------------------------------------------------------------------------------- /vignettes/expo.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Estimating the exponential of regression parameters using **brglm2**" 3 | author: "[Ioannis Kosmidis](https://www.ikosmidis.com)" 4 | date: "03 February 2023" 5 | output: rmarkdown::html_vignette 6 | bibliography: brglm2.bib 7 | vignette: > 8 | %\VignetteIndexEntry{Estimating the exponential of regression parameters using **brglm2**} 9 | %\VignetteEngine{knitr::rmarkdown} 10 | %\VignetteEncoding{UTF-8} 11 | --- 12 | 13 | ```{r setup, include = FALSE} 14 | knitr::opts_chunk$set( 15 | collapse = TRUE, 16 | comment = "#>", 17 | fig.width = 6, 18 | fig.height = 6 19 | ) 20 | ``` 21 | 22 | # The `expo()` method 23 | 24 | The [**brglm2**](https://github.com/ikosmidis/brglm2) R package provides the `expo()` method for estimating exponentiated parameters of generalized linear models using various methods. 25 | 26 | The `expo()` method uses a supplied `"brglmFit"` or `"glm"` object to estimate exponentiated parameters of generalized linear models with maximum likelihood or various mean and median bias reduction methods. `expo()` is useful for computing (corrected) estimates of the multiplicative impact of a unit increase on a covariate on the mean of a Poisson log-linear model (`family = poisson("log")` in `glm()`) while adjusting for other covariates, the odds ratio associated with a unit increase on a covariate in a logistic regression model (`family = binomial("logit")` in `glm()`) while adjusting for other covariates, the relative risk associated with a unit increase on a covariate in a relative risk regression model (`family = binomial("log")` in `glm()`) while adjusting for other covariates, among others. 27 | 28 | The vignette demonstrates the use of `expo()` and the associated methods by reproducing part of the analyses in @agresti:02[, Section 5.4.2] on the effects of AZT in slowing the development of AIDS symptoms. 29 | 30 | # AIDS and AZT use 31 | 32 | The data analyzed in @agresti:02[, Section 5.4.2] is from a 3-year 33 | study on the effects of AZT in slowing the development of AIDS 34 | symptoms. 338 veterans whose immune systems were beginning to falter 35 | after infection with the AIDS virus were randomly assigned either to 36 | receive AZT immediately or to wait until their T cells showed severe 37 | immune weakness. See `?aids` for more details. 38 | 39 | The `aids` data set cross-classifies the veterans' race (`race`), whether they received AZT immediately (`AZT`), and whether they developed AIDS symptoms during the 3-year study (`symptomatic` and `asymptomatic`). 40 | ```{r, echo = TRUE} 41 | library("brglm2") 42 | data("aids", package = "brglm2") 43 | aids 44 | ``` 45 | 46 | We now use a logistic regression model to model the probability of developing symptoms in terms of `AZT` and `race`, and reproduce part of the compute output in @agresti:02[, Table 5.6]. 47 | ```{r, echo = TRUE} 48 | aids_mod <- glm(cbind(symptomatic, asymptomatic) ~ AZT + race, 49 | family = binomial(), data = aids) 50 | summary(aids_mod) 51 | ``` 52 | 53 | The Wald test for the hypothesis of conditional independence of AZT treatment and development of AIDS symptoms, controlling for race, returns a p-value of `r round(coef(summary(aids_mod))["AZTYes", "Pr(>|z|)"], 3)`, showing evidence of association. 54 | 55 | The predicted probabilities for each combination of levels 56 | 57 | 58 | The maximum likelihood estimates of the odds ratio between immediate AZT use and development of AIDS symptoms can be inferred from `aids_mod` through the `expo()` method, which also estimates standard errors using the delta method, and returns approximate 95% confidence intervals (see `?expo` for details). 59 | ```{r, echo = TRUE} 60 | expo(aids_mod, type = "ML") 61 | ``` 62 | 63 | As noted in @agresti:02[, Section 5.4.2], for each race, the estimated 64 | odds of symptoms are half as high for those who took AZT immediately, 65 | with value $0.49$ and a nominally 95\% Wald confidence interval 66 | $(0.28, 0.84)$. 67 | 68 | The `expo()` method can be used to estimate the odds ratios using three methods that return estimates of the odds ratios with asymptotically smaller mean bias than the maximum likelihood estimator 69 | ```{r, echo = TRUE} 70 | expo(aids_mod, type = "correction*") 71 | expo(aids_mod, type = "Lylesetal2012") 72 | expo(aids_mod, type = "correction+") 73 | ``` 74 | and one method that returns estimates of the odds ratios with asymptotically smaller median bias than the maximum likelihood estimator 75 | ```{r, echo = TRUE} 76 | expo(aids_mod, type = "AS_median") 77 | ``` 78 | 79 | The estimated odds ratios and associated inferences from the methods that correct for mean and median bias are similar to those from maximum likelihood. 80 | 81 | # Infinite odds ratio estimates 82 | 83 | When `expo()` is called with `type = correction*`, `type = correction+`, `type = Lylesetal2012`, and `type = AS_median`, then the estimates of the odds ratios can be shown to be always finite and greater than zero. The reason is that the corresponding odds-ratio estimators depend on regression parameter estimates that are finite even if the maximum likelihood estimates are infinite. See, @kosmidis:2019 and @kosmidis+firth:21 for details. 84 | 85 | As an example, consider the estimated odds ratios from a logistic regression model fitted on the `endometrial` data set using maximum likelihood. 86 | ```{r, echo = TRUE} 87 | data("endometrial", package = "brglm2") 88 | endometrialML <- glm(HG ~ NV + PI + EH, data = endometrial, family = binomial()) 89 | endometrialML 90 | ``` 91 | 92 | The estimate of the coefficient for `NV` is in reality infinite as it can be verified using the [**detectseparation**](https://cran.r-project.org/package=detectseparation) R package 93 | ```{r, echo = TRUE} 94 | library("detectseparation") 95 | update(endometrialML, method = detect_separation) 96 | ``` 97 | and a naive estimate of the associated odds ratio while controlling for `PI` and `EH` is `r exp(coef(endometrialML)["NV"])`, which is in reality infinite. 98 | 99 | In contrast, `expo()` returns finite reduced-mean-bias estimates of the odds ratios 100 | ```{r, echo = TRUE} 101 | expo(endometrialML, type = "correction*") 102 | expo(endometrialML, type = "correction+") 103 | expo(endometrialML, type = "Lylesetal2012") 104 | ``` 105 | 106 | # `brglmFit` objects 107 | 108 | The `expo()` method also works seamlessly with `brglmFit` objects, returning the same results as above. For example, 109 | ```{r, echo = TRUE} 110 | aids_mod_br <- update(aids_mod, method = "brglmFit") 111 | expo(aids_mod_br, type = "correction*") 112 | ``` 113 | 114 | 115 | # References 116 | -------------------------------------------------------------------------------- /vignettes/multinomial.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Multinomial logistic regression using **brglm2**" 3 | author: "[Ioannis Kosmidis](https://www.ikosmidis.com)" 4 | date: "01 July 2017" 5 | output: rmarkdown::html_vignette 6 | bibliography: brglm2.bib 7 | vignette: > 8 | %\VignetteIndexEntry{Multinomial logistic regression using **brglm2**} 9 | %\VignetteEngine{knitr::rmarkdown} 10 | %\VignetteEncoding{UTF-8} 11 | --- 12 | 13 | ```{r setup, include = FALSE} 14 | knitr::opts_chunk$set( 15 | collapse = TRUE, 16 | comment = "#>", 17 | fig.width = 6, 18 | fig.height = 6 19 | ) 20 | ``` 21 | 22 | 23 | # **brmultinom** 24 | The [**brglm2**](https://github.com/ikosmidis/brglm2) R package provides `brmultinom()` which is a wrapper of `brglmFit` for fitting multinomial logistic regression models (a.k.a. baseline category logit models) using either maximum likelihood or maximum penalized likelihood or any of the various bias reduction methods described in `brglmFit()`. `brmultinom()` uses the equivalent Poisson log-linear model, by appropriately re-scaling the Poisson means to match the multinomial totals (a.k.a. the "Poisson trick"). The mathematical details and algorithm on using the Poisson trick for mean-bias reduction are given in @kosmidis:11. 25 | 26 | This vignettes illustrates the use of `brmultinom()` and of the associated methods, using the alligator food choice example in @agresti:02[, Section 7.1] 27 | 28 | # Alligator data 29 | 30 | The alligator data set ships with **brglm2**. @agresti:02[, Section 7.1] provides a detailed description of the variables recorded in the data set. 31 | ```{r, echo = TRUE} 32 | library("brglm2") 33 | data("alligators", package = "brglm2") 34 | ``` 35 | 36 | ## Maximum likelihood estimation 37 | 38 | The following chunk of code reproduces @agresti:02[, Table 7.4]. Note that in order to get the estimates and standard errors reported in the latter table, we have to explicitly specify the contrasts that @agresti:02 uses. 39 | ```{r, echo = TRUE} 40 | agresti_contrasts <- list(lake = contr.treatment(levels(alligators$lake), base = 4), 41 | size = contr.treatment(levels(alligators$size), base = 2)) 42 | all_ml <- brmultinom(foodchoice ~ size + lake , weights = freq, 43 | data = alligators, 44 | contrasts = agresti_contrasts, 45 | ref = 1, 46 | type = "ML") 47 | all_ml_summary <- summary(all_ml) 48 | ## Estimated regression parameters 49 | round(all_ml_summary$coefficients, 2) 50 | ## Estimated standard errors 51 | round(all_ml_summary$standard.errors, 2) 52 | ``` 53 | 54 | ## Mean and median bias reduction 55 | 56 | Fitting the model using mean-bias reducing adjusted score equations gives 57 | ```{r, echo = TRUE} 58 | all_mean <- update(all_ml, type = "AS_mean") 59 | summary(all_mean) 60 | ``` 61 | The corresponding fit using median-bias reducing adjusted score equations is 62 | ```{r, echo = TRUE} 63 | all_median <- update(all_ml, type = "AS_median") 64 | summary(all_median) 65 | ``` 66 | 67 | The estimates and the estimated standard errors from bias reduction 68 | are close to those for maximum likelihood. As a result, it is unlikely 69 | that either mean or median bias is of any real consequence for this 70 | particular model and data combination. 71 | 72 | # Infinite estimates and multinomial logistic regression 73 | 74 | Let's scale the frequencies in `alligators` by 3 in order to get a sparser data set. The differences between maximum likelihood and mean and median bias reduction should be more apparent on the resulting data set. Here we have to "slow-down" the Fisher scoring iteration (by scaling the step-size), because otherwise the Fisher information matrix quickly gets numerically rank-deficient. The reason is data separation [@albert:84]. 75 | ```{r, echo = TRUE, error = TRUE} 76 | all_ml_sparse <- update(all_ml, weights = round(freq/3), slowit = 0.1) 77 | summary(all_ml_sparse) 78 | ``` 79 | Specifically, judging from the estimated standard errors, the estimates for `(Intercept)`, `lakeHancock`, `lakeOklawaha` and `lakeTrafford` for `Reptile` and `lakeHancock` for `Bird` seem to be infinite. 80 | 81 | To quickly check if that's indeed the case we can use the `check_infinite_estimates()` method of the [**detectseparation**](https://cran.r-project.org/package=detectseparation) R package. 82 | ```{r, echo = TRUE} 83 | library("detectseparation") 84 | se_ratios <- check_infinite_estimates(all_ml_sparse) 85 | plot(se_ratios) 86 | ``` 87 | 88 | Some of the estimated standard errors diverge as the number of Fisher scoring iterations increases, which is evidence of complete or quasi-complete separation [@lesaffre:89]. 89 | 90 | In contrast, both mean and median bias reduction result in finite 91 | estimates 92 | ```{r, echo = TRUE} 93 | all_mean_sparse <- update(all_ml_sparse, type = "AS_mean") 94 | summary(all_mean_sparse) 95 | 96 | all_median_sparse <- update(all_ml_sparse, type = "AS_median") 97 | summary(all_median_sparse) 98 | ``` 99 | 100 | 101 | 102 | # Relevant resources 103 | `?brglmFit` and `?brglm_control` contain quick descriptions of the various bias reduction methods supported in **brglm2**. The [`iteration`](https://cran.r-project.org/package=brglm2/brglm2.pdf) vignette describes the iteration and gives the mathematical details for the bias-reducing adjustments to the score functions for generalized linear models. 104 | 105 | # Citation 106 | If you found this vignette or **brglm2**, in general, useful, please consider citing **brglm2** and the associated paper. You can find information on how to do this by typing `citation("brglm2")`. 107 | 108 | # References 109 | --------------------------------------------------------------------------------