├── .Rbuildignore
├── .github
    ├── .gitignore
    └── workflows
    │   ├── R-CMD-check.yaml
    │   └── test-coverage.yaml
├── .gitignore
├── CODE_OF_CONDUCT.md
├── DESCRIPTION
├── NAMESPACE
├── NEWS.md
├── R
    ├── bracl.R
    ├── brglm2-defunct.R
    ├── brglm2-package.R
    ├── brglmControl.R
    ├── brglmFit.R
    ├── brmultinom.R
    ├── brnb.R
    ├── data.R
    ├── expo.R
    ├── mis-link.R
    ├── ordinal_superiority.R
    ├── utils.R
    ├── warnings.R
    └── zzz_conventions.R
├── README.Rmd
├── README.md
├── data
    ├── aids.rda
    ├── alligators.rda
    ├── coalition.rda
    ├── endometrial.rda
    ├── enzymes.rda
    ├── hepatitis.rda
    ├── lizards.rda
    └── stemcell.rda
├── inst
    ├── CITATION
    ├── WORDLIST
    ├── brpr
    │   ├── brpr.R
    │   ├── examples.R
    │   └── timings.R
    ├── glmbias_code+results
    │   ├── alligator_simulation_results.rda
    │   ├── birth_weight_simulation_results.rda
    │   ├── clotting_simulation_results.rda
    │   ├── glmbias_gamma_simulation.R
    │   ├── glmbias_logistic_simulation.R
    │   ├── glmbias_multinomial_simulation.R
    │   └── glmbias_simulation_functions.R
    ├── mbrpr
    │   └── mbrpr.R
    └── tinytest
    │   ├── brpr.R
    │   ├── mbrpr.R
    │   ├── test-binomial.R
    │   ├── test-bracl.R
    │   ├── test-brglmControl.R
    │   ├── test-brnp.R
    │   ├── test-checkinfinite.R
    │   ├── test-correction.R
    │   ├── test-dispersion.R
    │   ├── test-expo.R
    │   ├── test-gamma.R
    │   ├── test-jeffreys.R
    │   ├── test-median-binomial.R
    │   ├── test-median-dispersion.R
    │   ├── test-median-poisson.R
    │   ├── test-mis.R
    │   ├── test-multinom-binom.R
    │   ├── test-multinom.R
    │   ├── test-poisson.R
    │   ├── test-print.R
    │   ├── test-singular.R
    │   ├── test-start.R
    │   └── test-transformation.R
├── man
    ├── aids.Rd
    ├── alligators.Rd
    ├── bracl.Rd
    ├── brglm2-defunct.Rd
    ├── brglm2.Rd
    ├── brglmControl.Rd
    ├── brglmFit.Rd
    ├── brmultinom.Rd
    ├── brnb.Rd
    ├── coalition.Rd
    ├── coef.brglmFit.Rd
    ├── coef.brglmFit_expo.Rd
    ├── coef.brnb.Rd
    ├── confint.brglmFit.Rd
    ├── confint.brmultinom.Rd
    ├── confint.brnb.Rd
    ├── endometrial.Rd
    ├── enzymes.Rd
    ├── expo.brglmFit.Rd
    ├── figures
    │   └── hex_brglm2.svg
    ├── hepatitis.Rd
    ├── lizards.Rd
    ├── mis.Rd
    ├── ordinal_superiority.bracl.Rd
    ├── predict.bracl.Rd
    ├── predict.brmultinom.Rd
    ├── residuals.brmultinom.Rd
    ├── simulate.brmultinom.Rd
    ├── simulate.brnb.Rd
    ├── stemcell.Rd
    ├── summary.brglmFit.Rd
    ├── summary.brnb.Rd
    ├── vcov.brglmFit.Rd
    └── vcov.brnb.Rd
├── src
    ├── expectations.c
    ├── hats.c
    └── init.c
├── tests
    └── tinytest.R
└── vignettes
    ├── adjacent.Rmd
    ├── brglm2.bib
    ├── expo.Rmd
    ├── iteration.Rmd
    ├── multinomial.Rmd
    └── negativeBinomial.Rmd


/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | inst/scratch
 2 | inst/brglm0
 3 | inst/brpr
 4 | inst/demos
 5 | inst/mbrpr
 6 | inst/other
 7 | inst/useR!2016
 8 | inst/glmbias_code\+results
 9 | inst/dev_resids.R
10 | inst/TODO.org
11 | src/hats.c
12 | tests/
13 | vignettes/iteration\[exported\].html
14 | vignettes/iteration\[exported\].pdf
15 | vignettes/iteration.html
16 | vignettes/iteration.pdf
17 | 
18 | ^\.github$
19 | ^\.gitignore
20 | ^\._*
21 | ^.*\.Rproj$
22 | ^\.Rproj\.user$
23 | ^\.travis\.yml$
24 | 
25 | anova.R
26 | ^CRAN-RELEASE$
27 | ^README\.Rmd$
28 | README_cache
29 | ^codecov\.yml$
30 | ^CRAN-RELEASE$
31 | ^code_of_conduct\.md$
32 | ^CRAN-SUBMISSION$
33 | ^doc$
34 | ^Meta$
35 | 


--------------------------------------------------------------------------------
/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | 


--------------------------------------------------------------------------------
/.github/workflows/R-CMD-check.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, develop]
 6 |   pull_request:
 7 |     branches: [main, develop]
 8 | 
 9 | name: R-CMD-check.yaml
10 | 
11 | permissions: read-all
12 | 
13 | jobs:
14 |   R-CMD-check:
15 |     runs-on: ${{ matrix.config.os }}
16 | 
17 |     name: ${{ matrix.config.os }} (${{ matrix.config.r }})
18 | 
19 |     strategy:
20 |       fail-fast: false
21 |       matrix:
22 |         config:
23 |           - {os: macos-latest,   r: 'release'}
24 |           - {os: windows-latest, r: 'release'}
25 |           - {os: ubuntu-latest,   r: 'devel', http-user-agent: 'release'}
26 |           - {os: ubuntu-latest,   r: 'release'}
27 |           - {os: ubuntu-latest,   r: 'oldrel-1'}
28 | 
29 |     env:
30 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
31 |       R_KEEP_PKG_SOURCE: yes
32 | 
33 |     steps:
34 |       - uses: actions/checkout@v4
35 | 
36 |       - uses: r-lib/actions/setup-pandoc@v2
37 | 
38 |       - uses: r-lib/actions/setup-r@v2
39 |         with:
40 |           r-version: ${{ matrix.config.r }}
41 |           http-user-agent: ${{ matrix.config.http-user-agent }}
42 |           use-public-rspm: true
43 | 
44 |       - uses: r-lib/actions/setup-r-dependencies@v2
45 |         with:
46 |           extra-packages: any::rcmdcheck
47 |           needs: check
48 | 
49 |       - uses: r-lib/actions/check-r-package@v2
50 |         with:
51 |           upload-snapshots: true
52 |           build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")'
53 | 


--------------------------------------------------------------------------------
/.github/workflows/test-coverage.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |   pull_request:
 7 |     branches: [main, master]
 8 | 
 9 | name: test-coverage
10 | 
11 | jobs:
12 |   test-coverage:
13 |     runs-on: ubuntu-latest
14 |     env:
15 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
16 | 
17 |     steps:
18 |       - uses: actions/checkout@v3
19 | 
20 |       - uses: r-lib/actions/setup-r@v2
21 |         with:
22 |           use-public-rspm: true
23 | 
24 |       - uses: r-lib/actions/setup-r-dependencies@v2
25 |         with:
26 |           extra-packages: any::covr
27 |           needs: coverage
28 | 
29 |       - name: Test coverage
30 |         run: |
31 |           covr::codecov(
32 |             quiet = FALSE,
33 |             clean = FALSE,
34 |             install_path = file.path(Sys.getenv("RUNNER_TEMP"), "package")
35 |           )
36 |         shell: Rscript {0}
37 | 
38 |       - name: Show testthat output
39 |         if: always()
40 |         run: |
41 |           ## --------------------------------------------------------------------
42 |           find ${{ runner.temp }}/package -name 'testthat.Rout*' -exec cat '{}' \; || true
43 |         shell: bash
44 | 
45 |       - name: Upload test results
46 |         if: failure()
47 |         uses: actions/upload-artifact@v3
48 |         with:
49 |           name: coverage-test-failures
50 |           path: ${{ runner.temp }}/package
51 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | inst/doc
  2 | 
  3 | ### macOS ###
  4 | # General
  5 | .DS_Store
  6 | .AppleDouble
  7 | .LSOverride
  8 | 
  9 | # Icon must end with two \r
 10 | Icon
 11 | 
 12 | # Thumbnails
 13 | ._*
 14 | 
 15 | # Files that might appear in the root of a volume
 16 | .DocumentRevisions-V100
 17 | .fseventsd
 18 | .Spotlight-V100
 19 | .TemporaryItems
 20 | .Trashes
 21 | .VolumeIcon.icns
 22 | .com.apple.timemachine.donotpresent
 23 | 
 24 | # Directories potentially created on remote AFP share
 25 | .AppleDB
 26 | .AppleDesktop
 27 | Network Trash Folder
 28 | Temporary Items
 29 | .apdisk
 30 | 
 31 | ### R ###
 32 | # History files
 33 | .Rhistory
 34 | .Rapp.history
 35 | 
 36 | # Session Data files
 37 | .RData
 38 | 
 39 | # Example code in package build process
 40 | *-Ex.R
 41 | 
 42 | # Output files from R CMD build
 43 | /*.tar.gz
 44 | 
 45 | # Output files from R CMD check
 46 | /*.Rcheck/
 47 | 
 48 | # RStudio files
 49 | .Rproj.user/
 50 | 
 51 | # produced vignettes
 52 | vignettes/*.html
 53 | vignettes/*.pdf
 54 | 
 55 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
 56 | .httr-oauth
 57 | 
 58 | # knitr and R markdown default cache directories
 59 | /*_cache/
 60 | /cache/
 61 | 
 62 | # Temporary files created by R markdown
 63 | *.utf8.md
 64 | *.knit.md
 65 | 
 66 | ### R.Bookdown Stack ###
 67 | # R package: bookdown caching files
 68 | /*_files/
 69 | 
 70 | ### Emacs ###
 71 | # -*- mode: gitignore; -*-
 72 | *~
 73 | \#*\#
 74 | /.emacs.desktop
 75 | /.emacs.desktop.lock
 76 | *.elc
 77 | auto-save-list
 78 | tramp
 79 | .\#*
 80 | 
 81 | # Org-mode
 82 | .org-id-locations
 83 | *_archive
 84 | 
 85 | # flymake-mode
 86 | *_flymake.*
 87 | 
 88 | # eshell files
 89 | /eshell/history
 90 | /eshell/lastdir
 91 | 
 92 | # elpa packages
 93 | /elpa/
 94 | 
 95 | # reftex files
 96 | *.rel
 97 | 
 98 | # AUCTeX auto folder
 99 | /auto/
100 | 
101 | # cask packages
102 | .cask/
103 | dist/
104 | 
105 | # Flycheck
106 | flycheck_*.el
107 | 
108 | # server auth directory
109 | /server/
110 | 
111 | # projectiles files
112 | .projectile
113 | 
114 | # directory configuration
115 | .dir-locals.el
116 | 
117 | # network security
118 | /network-security.data
119 | /doc/
120 | /Meta/
121 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # Contributor Covenant Code of Conduct
  3 | 
  4 | ## Our Pledge
  5 | 
  6 | We as members, contributors, and leaders pledge to make participation in our
  7 | community a harassment-free experience for everyone, regardless of age, body
  8 | size, visible or invisible disability, ethnicity, sex characteristics, gender
  9 | identity and expression, level of experience, education, socio-economic status,
 10 | nationality, personal appearance, race, caste, color, religion, or sexual
 11 | identity and orientation.
 12 | 
 13 | We pledge to act and interact in ways that contribute to an open, welcoming,
 14 | diverse, inclusive, and healthy community.
 15 | 
 16 | ## Our Standards
 17 | 
 18 | Examples of behavior that contributes to a positive environment for our
 19 | community include:
 20 | 
 21 | * Demonstrating empathy and kindness toward other people
 22 | * Being respectful of differing opinions, viewpoints, and experiences
 23 | * Giving and gracefully accepting constructive feedback
 24 | * Accepting responsibility and apologizing to those affected by our mistakes,
 25 |   and learning from the experience
 26 | * Focusing on what is best not just for us as individuals, but for the overall
 27 |   community
 28 | 
 29 | Examples of unacceptable behavior include:
 30 | 
 31 | * The use of sexualized language or imagery, and sexual attention or advances of
 32 |   any kind
 33 | * Trolling, insulting or derogatory comments, and personal or political attacks
 34 | * Public or private harassment
 35 | * Publishing others' private information, such as a physical or email address,
 36 |   without their explicit permission
 37 | * Other conduct which could reasonably be considered inappropriate in a
 38 |   professional setting
 39 | 
 40 | ## Enforcement Responsibilities
 41 | 
 42 | Community leaders are responsible for clarifying and enforcing our standards of
 43 | acceptable behavior and will take appropriate and fair corrective action in
 44 | response to any behavior that they deem inappropriate, threatening, offensive,
 45 | or harmful.
 46 | 
 47 | Community leaders have the right and responsibility to remove, edit, or reject
 48 | comments, commits, code, wiki edits, issues, and other contributions that are
 49 | not aligned to this Code of Conduct, and will communicate reasons for moderation
 50 | decisions when appropriate.
 51 | 
 52 | ## Scope
 53 | 
 54 | This Code of Conduct applies within all community spaces, and also applies when
 55 | an individual is officially representing the community in public spaces.
 56 | Examples of representing our community include using an official e-mail address,
 57 | posting via an official social media account, or acting as an appointed
 58 | representative at an online or offline event.
 59 | 
 60 | ## Enforcement
 61 | 
 62 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
 63 | reported to the community leaders responsible for enforcement at
 64 | [INSERT CONTACT METHOD].
 65 | All complaints will be reviewed and investigated promptly and fairly.
 66 | 
 67 | All community leaders are obligated to respect the privacy and security of the
 68 | reporter of any incident.
 69 | 
 70 | ## Enforcement Guidelines
 71 | 
 72 | Community leaders will follow these Community Impact Guidelines in determining
 73 | the consequences for any action they deem in violation of this Code of Conduct:
 74 | 
 75 | ### 1. Correction
 76 | 
 77 | **Community Impact**: Use of inappropriate language or other behavior deemed
 78 | unprofessional or unwelcome in the community.
 79 | 
 80 | **Consequence**: A private, written warning from community leaders, providing
 81 | clarity around the nature of the violation and an explanation of why the
 82 | behavior was inappropriate. A public apology may be requested.
 83 | 
 84 | ### 2. Warning
 85 | 
 86 | **Community Impact**: A violation through a single incident or series of
 87 | actions.
 88 | 
 89 | **Consequence**: A warning with consequences for continued behavior. No
 90 | interaction with the people involved, including unsolicited interaction with
 91 | those enforcing the Code of Conduct, for a specified period of time. This
 92 | includes avoiding interactions in community spaces as well as external channels
 93 | like social media. Violating these terms may lead to a temporary or permanent
 94 | ban.
 95 | 
 96 | ### 3. Temporary Ban
 97 | 
 98 | **Community Impact**: A serious violation of community standards, including
 99 | sustained inappropriate behavior.
100 | 
101 | **Consequence**: A temporary ban from any sort of interaction or public
102 | communication with the community for a specified period of time. No public or
103 | private interaction with the people involved, including unsolicited interaction
104 | with those enforcing the Code of Conduct, is allowed during this period.
105 | Violating these terms may lead to a permanent ban.
106 | 
107 | ### 4. Permanent Ban
108 | 
109 | **Community Impact**: Demonstrating a pattern of violation of community
110 | standards, including sustained inappropriate behavior, harassment of an
111 | individual, or aggression toward or disparagement of classes of individuals.
112 | 
113 | **Consequence**: A permanent ban from any sort of public interaction within the
114 | community.
115 | 
116 | ## Attribution
117 | 
118 | This Code of Conduct is adapted from the [Contributor Covenant][homepage],
119 | version 2.1, available at
120 | [https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].
121 | 
122 | Community Impact Guidelines were inspired by
123 | [Mozilla's code of conduct enforcement ladder][Mozilla CoC].
124 | 
125 | For answers to common questions about this code of conduct, see the FAQ at
126 | [https://www.contributor-covenant.org/faq][FAQ]. Translations are available at
127 | [https://www.contributor-covenant.org/translations][translations].
128 | 
129 | [homepage]: https://www.contributor-covenant.org
130 | [v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
131 | [Mozilla CoC]: https://github.com/mozilla/diversity
132 | [FAQ]: https://www.contributor-covenant.org/faq
133 | [translations]: https://www.contributor-covenant.org/translations
134 | 
135 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: brglm2
 2 | Title: Bias Reduction in Generalized Linear Models
 3 | Version: 0.9.2
 4 | Authors@R: c(person(given = "Ioannis", family = "Kosmidis", role = c("aut", "cre"), email = "ioannis.kosmidis@warwick.ac.uk", comment = c(ORCID = "0000-0003-1556-0302")),
 5 | 	     person(given = c("Euloge", "Clovis"), family = c("Kenne Pagui"), role = "aut", email = "kenne@stat.unipd.it"),
 6 | 	     person(given = "Kjell", family = "Konis", role = "ctb", email = "kjell.konis@me.com"),
 7 | 	     person(given = "Nicola", family = "Sartori", role = "ctb", email = "sartori@stat.unipd.it"))
 8 | Description: Estimation and inference from generalized linear models based on various methods for bias reduction and maximum penalized likelihood with powers of the Jeffreys prior as penalty. The 'brglmFit' fitting method can achieve reduction of estimation bias by solving either the mean bias-reducing adjusted score equations in Firth (1993) <doi:10.1093/biomet/80.1.27> and Kosmidis and Firth (2009) <doi:10.1093/biomet/asp055>, or the median bias-reduction adjusted score equations in Kenne et al. (2017) <doi:10.1093/biomet/asx046>, or through the direct subtraction of an estimate of the bias of the maximum likelihood estimator from the maximum likelihood estimates as in Cordeiro and McCullagh (1991) <https://www.jstor.org/stable/2345592>. See Kosmidis et al (2020) <doi:10.1007/s11222-019-09860-6> for more details. Estimation in all cases takes place via a quasi Fisher scoring algorithm, and S3 methods for the construction of of confidence intervals for the reduced-bias estimates are provided. In the special case of generalized linear models for binomial and multinomial responses (both ordinal and nominal), the adjusted score approaches to mean and media bias reduction have been found to return estimates with improved frequentist properties, that are also always finite, even in cases where the maximum likelihood estimates are infinite (e.g. complete and quasi-complete separation; see Kosmidis and Firth, 2020 <doi:10.1093/biomet/asaa052>, for a proof for mean bias reduction in logistic regression). 
 9 | URL: https://github.com/ikosmidis/brglm2
10 | BugReports: https://github.com/ikosmidis/brglm2/issues
11 | Depends: R (>= 3.3.0)
12 | Imports: MASS, stats, Matrix, graphics, nnet, enrichwith, numDeriv
13 | License: GPL-3
14 | Encoding: UTF-8
15 | LazyData: true
16 | RoxygenNote: 7.2.3
17 | Roxygen: list(markdown = TRUE)
18 | Suggests: detectseparation, knitr, rmarkdown, covr, tinytest, VGAM, brglm, mbrglm
19 | VignetteBuilder: knitr
20 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | S3method(coef,bracl)
 4 | S3method(coef,brglmFit)
 5 | S3method(coef,brglmFit_expo)
 6 | S3method(coef,brmultinom)
 7 | S3method(coef,brnb)
 8 | S3method(confint,brglmFit)
 9 | S3method(confint,brmultinom)
10 | S3method(confint,brnb)
11 | S3method(expo,brglmFit)
12 | S3method(expo,glm)
13 | S3method(fitted,bracl)
14 | S3method(fitted,brmultinom)
15 | S3method(logLik,brmultinom)
16 | S3method(model.matrix,brmultinom)
17 | S3method(ordinal_superiority,bracl)
18 | S3method(predict,bracl)
19 | S3method(predict,brmultinom)
20 | S3method(print,brglmFit_expo)
21 | S3method(print,brmultinom)
22 | S3method(print,brnb)
23 | S3method(print,summary.bracl)
24 | S3method(print,summary.brglmFit)
25 | S3method(print,summary.brmultinom)
26 | S3method(print,summary.brnb)
27 | S3method(residuals,bracl)
28 | S3method(residuals,brmultinom)
29 | S3method(simulate,brmultinom)
30 | S3method(simulate,brnb)
31 | S3method(summary,bracl)
32 | S3method(summary,brglmFit)
33 | S3method(summary,brmultinom)
34 | S3method(summary,brnb)
35 | S3method(vcov,bracl)
36 | S3method(vcov,brglmFit)
37 | S3method(vcov,brmultinom)
38 | S3method(vcov,brnb)
39 | export(bracl)
40 | export(brglmControl)
41 | export(brglmFit)
42 | export(brglm_control)
43 | export(brglm_fit)
44 | export(brmultinom)
45 | export(brnb)
46 | export(check_infinite_estimates)
47 | export(detect_separation)
48 | export(expo)
49 | export(mis)
50 | export(ordinal_superiority)
51 | import(MASS)
52 | import(enrichwith)
53 | import(stats)
54 | import(Matrix)
55 | importFrom(graphics,plot)
56 | importFrom(nnet,class.ind)
57 | importFrom(numDeriv,grad)
58 | useDynLib(brglm2)
59 | 


--------------------------------------------------------------------------------
/R/brglm2-defunct.R:
--------------------------------------------------------------------------------
 1 | #' Defunct Functions in package \pkg{brglm2}
 2 | #'
 3 | #'  The functions or variables listed here are no longer part of
 4 | #'  \pkg{brglm2}.
 5 | #'
 6 | #'
 7 | #' @param ... arguments to be passed to functions and methods.
 8 | #'
 9 | #'
10 | #' @details
11 | #'
12 | #' * [detect_separation()]: This function is defunct from \pkg{brglm2}
13 | #' since version 0.8.0. A new version of [detect_separation()] is now
14 | #' maintained in the
15 | #' [\pkg{detectseparation}](https://cran.r-project.org/package=detectseparation)
16 | #' R package.
17 | #'
18 | #' * [check_infinite_estimates()] is defunct from \pkg{brglm2} since
19 | #' version 0.8.0. An new version of [check_infinite_estimates()] is
20 | #' now maintained in the
21 | #' [\pkg{detectseparation}](https://cran.r-project.org/package=detectseparation)
22 | #' R package.
23 | #'
24 | #' @name brglm2-defunct
25 | NULL
26 | 
27 | #' @rdname brglm2-defunct
28 | #' @export
29 | check_infinite_estimates <- function(...) {
30 |     function_moved_to_new_package(gsub("\\(|\\)", "", deparse(match.call()[1])),
31 |                                   "0.8.0",
32 |                                   "brglm2",
33 |                                   "detectseparation")
34 | }
35 | 
36 | #' @rdname brglm2-defunct
37 | #' @export
38 | detect_separation <- function(...) {
39 |     function_moved_to_new_package(gsub("\\(|\\)", "", deparse(match.call()[1])),
40 |                                   "0.8.0",
41 |                                   "brglm2",
42 |                                   "detectseparation")
43 | }
44 | 


--------------------------------------------------------------------------------
/R/brglm2-package.R:
--------------------------------------------------------------------------------
  1 | # Copyright (C) 2020- Ioannis Kosmidis
  2 | 
  3 | #  This program is free software; you can redistribute it and/or modify
  4 | #  it under the terms of the GNU General Public License as published by
  5 | #  the Free Software Foundation; either version 2 or 3 of the License
  6 | #  (at your option).
  7 | #
  8 | #  This program is distributed in the hope that it will be useful,
  9 | #  but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 | #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 11 | #  GNU General Public License for more details.
 12 | #
 13 | #  A copy of the GNU General Public License is available at
 14 | #  http://www.r-project.org/Licenses/
 15 | 
 16 | #'
 17 | #' brglm2: Bias Reduction in Generalized Linear Models
 18 | #'
 19 | #' Estimation and inference from generalized linear models using
 20 | #' implicit and explicit bias reduction methods (Kosmidis, 2014), and
 21 | #' other penalized maximum likelihood methods. Currently supported
 22 | #' methods include the mean bias-reducing adjusted scores approach in
 23 | #' Firth (1993) and Kosmidis & Firth (2009), the median bias-reduction
 24 | #' adjusted scores approach in Kenne Pagui et al. (2017), the
 25 | #' correction of the asymptotic bias in Cordeiro & McCullagh (1991),
 26 | #' the mixed bias-reduction adjusted scores approach in Kosmidis et al
 27 | #' (2020), maximum penalized likelihood with powers of the Jeffreys
 28 | #' prior as penalty, and maximum likelihood.
 29 | #'
 30 | #'
 31 | #' In the special case of generalized linear models for binomial,
 32 | #' Poisson and multinomial responses (both nominal and ordinal), mean
 33 | #' and median bias reduction and maximum penalized likelihood return
 34 | #' estimates with improved frequentist properties, that are also
 35 | #' always finite, even in cases where the maximum likelihood estimates
 36 | #' are infinite (e.g. complete and quasi-complete separation in
 37 | #' multinomial regression). Estimation in all cases takes place via a
 38 | #' modified Fisher scoring algorithm, and S3 methods for the
 39 | #' construction of confidence intervals for the reduced-bias estimates
 40 | #' are provided.
 41 | #'
 42 | #' The core model fitters are implemented by the functions
 43 | #' [brglm_fit()] (univariate generalized linear models),
 44 | #' [brmultinom()] (baseline category logit models for nominal
 45 | #' multinomial responses), [bracl()] (adjacent category logit models
 46 | #' for ordinal multinomial responses), and [brnb()] for negative
 47 | #' binomial regression.
 48 | #'
 49 | #' @details
 50 | #'
 51 | #'
 52 | #' The similarly named **brglm** R package can only handle generalized
 53 | #' linear models with binomial responses. Special care has been taken
 54 | #' when developing **brglm2** in order not to have conflicts when the
 55 | #' user loads **brglm2** and **brglm** simultaneously. The development
 56 | #' and maintenance of the two packages will continue in parallel,
 57 | #' until **brglm2** incorporates all **brglm** functionality and
 58 | #' provides an appropriate wrapper to the [brglm::brglm()] function.
 59 | #'
 60 | #' @author Ioannis Kosmidis `[aut, cre]` \email{ioannis.kosmidis@warwick.ac.uk}
 61 | #'
 62 | #' @seealso
 63 | #'
 64 | #' [brglm_fit()], [brmultinom()], [bracl()]
 65 | #'
 66 | #' @references
 67 | #'
 68 | #' Kosmidis I, Firth D (2021). Jeffreys-prior penalty, finiteness
 69 | #' and shrinkage in binomial-response generalized linear
 70 | #' models. *Biometrika*, **108**, 71-82. \doi{10.1093/biomet/asaa052}.
 71 | #'
 72 | #' Cordeiro G M, McCullagh P (1991). Bias correction in generalized
 73 | #' linear models. *Journal of the Royal Statistical Society. Series B
 74 | #' (Methodological)*, **53**, 629-643. \doi{10.1111/j.2517-6161.1991.tb01852.x}.
 75 | #'
 76 | #' Firth D (1993). Bias reduction of maximum likelihood estimates,
 77 | #' Biometrika, **80**, 27-38. \doi{10.2307/2336755}.
 78 | #'
 79 | #' Kenne Pagui E C, Salvan A, Sartori N (2017). Median bias
 80 | #' reduction of maximum likelihood estimates. *Biometrika*, **104**,
 81 | #' 923–938. \doi{10.1093/biomet/asx046}.
 82 | #'
 83 | #' Kosmidis I, Kenne Pagui E C, Sartori N (2020). Mean and median bias
 84 | #' reduction in generalized linear models. *Statistics and Computing*,
 85 | #' **30**, 43-59. \doi{10.1007/s11222-019-09860-6}.
 86 | #'
 87 | #' Kosmidis I, Firth D (2009). Bias reduction in exponential family
 88 | #' nonlinear models. *Biometrika*, **96**, 793-804. \doi{10.1093/biomet/asp055}.
 89 | #'
 90 | #' Kosmidis I, Firth D (2010). A generic algorithm for reducing
 91 | #' bias in parametric estimation. *Electronic Journal of Statistics*,
 92 | #' **4**, 1097-1112. \doi{10.1214/10-EJS579}.
 93 | #'
 94 | #' Kosmidis I (2014). Bias in parametric estimation: reduction and
 95 | #' useful side-effects. *WIRE Computational Statistics*, **6**,
 96 | #' 185-196. \doi{10.1002/wics.1296}.
 97 | #'
 98 | #' @docType package
 99 | #' @aliases brglm2-package
100 | #' @name brglm2
101 | #' @import stats
102 | #' @import enrichwith
103 | #' @import Matrix
104 | #' @import MASS
105 | #' @importFrom graphics plot
106 | #' @importFrom nnet class.ind
107 | #' @importFrom numDeriv grad
108 | #' @useDynLib brglm2
109 | #'
110 | NULL
111 | 
112 | ## NAMESPACE should have import(stats), import(Matrix)
113 | 
114 | 
115 | ## Suggestion by Kurt Hornik to avoid a warning related to the binding
116 | ## of n which is evaluated by family$initialize
117 | if (getRversion() >= "2.15.1") globalVariables(c("n", "lambda"))
118 | 
119 | #' @export
120 | ordinal_superiority <- function(object, formula, data,
121 |                                 measure = c("gamma", "Delta"),
122 |                                 level = 0.95,
123 |                                 bc = FALSE) {
124 |     UseMethod("ordinal_superiority")
125 | }
126 | 
127 | #' @export
128 | expo <- function(object, type = c("ML", "correction", "AS_median", "Lylesetal2012"), level = 0.95) {
129 |     UseMethod("expo")
130 | }
131 | 


--------------------------------------------------------------------------------
/R/mis-link.R:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2018- Ioannis Kosmidis
 2 | 
 3 | #  This program is free software; you can redistribute it and/or modify
 4 | #  it under the terms of the GNU General Public License as published by
 5 | #  the Free Software Foundation; either version 2 or 3 of the License
 6 | #  (at your option).
 7 | #
 8 | #  This program is distributed in the hope that it will be useful,
 9 | #  but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | #  GNU General Public License for more details.
12 | #
13 | #  A copy of the GNU General Public License is available at
14 | #  http://www.r-project.org/Licenses/
15 | 
16 | #' A [`"link-glm"`][make.link] object for misclassified responses in binomial regression models
17 | #'
18 | #' [mis()] is a [`"link-glm"`][make.link] object that specifies the link function in Neuhaus (1999, expression (8)) for handling misclassified responses in binomial regression models using maximum likelihood. A prior specification of the sensitivity and specificity is required.
19 | #'
20 | #' @param link the baseline link to be used.
21 | #' @param sensitivity the probability of observing a success given that a success actually took place given any covariate values.
22 | #' @param specificity the probability of observing a failure given that a failure actually took place given any covariate values.
23 | #'
24 | #' @details
25 | #'
26 | #' `sensitivity + specificity` should be greater or equal to 1,
27 | #' otherwise it is implied that the procedure producing the responses
28 | #' performs worse than chance in terms of misclassification.
29 | #'
30 | #' @references
31 | #'
32 | #' Neuhaus J M (1999). Bias and efficiency loss due to misclassified
33 | #' responses in binary regression. Biometrika, **86**, 843-855.
34 | #' \url{https://www.jstor.org/stable/2673589}.
35 | #'
36 | #' @seealso [glm()], [brglm_fit()]
37 | #'
38 | #' @examples
39 | #'
40 | #' ## Define a few links with some misclassification
41 | #' logit_mis <- mis(link = "logit", sensitivity = 0.9, specificity = 0.9)
42 | #'
43 | #' lizards_f <- cbind(grahami, opalinus) ~ height + diameter + light + time
44 | #'
45 | #' lizardsML <- glm(lizards_f, family = binomial(logit), data = lizards)
46 | #'
47 | #' lizardsML_mis <- update(lizardsML, family = binomial(logit_mis),
48 | #'                         start = coef(lizardsML))
49 | #'
50 | #' ## A notable change is coefficients is noted here compared to when
51 | #' ## specificity and sensitity are 1
52 | #' coef(lizardsML)
53 | #' coef(lizardsML_mis)
54 | #'
55 | #' ## Bias reduction is also possible
56 | #' update(lizardsML_mis, method = "brglmFit", type = "AS_mean",
57 | #'        start = coef(lizardsML))
58 | #'
59 | #' update(lizardsML_mis, method = "brglmFit", type = "AS_median",
60 | #'        start = coef(lizardsML))
61 | #'
62 | #' @export
63 | mis <- function(link = "logit", sensitivity = 1, specificity = 1) {
64 |     link <- make.link(link)
65 |     linkfun <- function(mu) {
66 |         link$linkfun((mu -1 + specificity) / (sensitivity + specificity - 1))
67 |     }
68 |     linkinv <- function(eta) {
69 |         (sensitivity + specificity - 1) * link$linkinv(eta) + 1 - specificity
70 |     }
71 |     mu.eta <- function(eta) {
72 |         (sensitivity + specificity - 1) * link$mu.eta(eta)
73 |     }
74 |     valideta <- function(eta) {
75 |         TRUE
76 |     }
77 |     structure(list(linkfun = linkfun,
78 |                    linkinv = linkinv,
79 |                    mu.eta = mu.eta,
80 |                    valideta = valideta,
81 |                    name = "miss"),
82 |               class = "link-glm")
83 | }
84 | 
85 | 


--------------------------------------------------------------------------------
/R/utils.R:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2016- Ioannis Kosmidis
 2 | 
 3 | #  This program is free software; you can redistribute it and/or modify
 4 | #  it under the terms of the GNU General Public License as published by
 5 | #  the Free Software Foundation; either version 2 or 3 of the License
 6 | #  (at your option).
 7 | #
 8 | #  This program is distributed in the hope that it will be useful,
 9 | #  but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | #  GNU General Public License for more details.
12 | #
13 | #  A copy of the GNU General Public License is available at
14 | #  http://www.r-project.org/Licenses/
15 | 
16 | unless_null <- function(x, if_null) {
17 |     if (is.null(x)) {
18 |         if_null
19 |     } else {
20 |         x
21 |     }
22 | }
23 | 
24 | 
25 | 
26 | get_type_description <- function(type, parenthesized = TRUE) {
27 |     pp <- function(txt) {
28 |         ifelse(parenthesized, paste0("(", txt, ")"), txt)
29 |     }
30 |     switch(type,
31 |            "ML" = pp("maximum likelihood"),
32 |            "correction" = pp("bias correction"),
33 |            "AS_mean" = pp("mean bias-reducing adjusted score equations"),
34 |            "AS_median" = pp("median bias-reducing adjusted score equations"),
35 |            "AS_mixed" = pp("mixed bias-reducing adjusted score equations"),
36 |            "MPL_Jeffreys" = pp("maximum penalized likelihood with Jeffreys'-prior penalty"),
37 |            "Lylesetal2012" = pp("Lyles et al., 2012; doi: 10.1016/j.jspi.2012.05.005"),
38 |            "correction*" = pp("explicit mean bias correction with a multiplicative adjustment"),
39 |            "correction+" = pp("explicit mean bias correction with an additive adjustment")
40 |            )
41 | }
42 | 


--------------------------------------------------------------------------------
/R/warnings.R:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2020- Ioannis Kosmidis
 2 | 
 3 | #  This program is free software; you can redistribute it and/or modify
 4 | #  it under the terms of the GNU General Public License as published by
 5 | #  the Free Software Foundation; either version 2 or 3 of the License
 6 | #  (at your option).
 7 | #
 8 | #  This program is distributed in the hope that it will be useful,
 9 | #  but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | #  GNU General Public License for more details.
12 | #
13 | #  A copy of the GNU General Public License is available at
14 | #  http://www.r-project.org/Licenses/
15 | 
16 | 
17 | function_moves_to_new_package <- function(function_name,
18 |                                           removal_version,
19 |                                           current_pkg,
20 |                                           new_pkg,
21 |                                           extra_message = NULL) {
22 |     function_name <- paste0("'", function_name, "'")
23 |     current_pkg <- paste0("'", current_pkg, "'")
24 |     new_pkg <- paste0("'", new_pkg, "'")
25 |     msg <- paste(function_name,
26 |                  "will be removed from",
27 |                  current_pkg,
28 |                  "at version",
29 |                  paste0(removal_version, "."),
30 |                  "A new version of",
31 |                  function_name,
32 |                  "is now maintained in the",
33 |                  new_pkg,
34 |                  "package.",
35 |                  extra_message)
36 |     .Deprecated(msg = msg, package = current_pkg)
37 | }
38 | 
39 | function_moved_to_new_package <- function(function_name,
40 |                                           removal_version,
41 |                                           current_pkg,
42 |                                           new_pkg,
43 |                                           extra_message = NULL) {
44 |     function_name <- paste0("'", function_name, "'")
45 |     current_pkg <- paste0("'", current_pkg, "'")
46 |     new_pkg <- paste0("'", new_pkg, "'")
47 |     msg <- paste(function_name,
48 |                  "has been removed from",
49 |                  current_pkg,
50 |                  "at version",
51 |                  paste0(removal_version, "."),
52 |                  "A new version of",
53 |                  function_name,
54 |                  "is now maintained in the",
55 |                  new_pkg,
56 |                  "package.",
57 |                  extra_message)
58 |     .Defunct(msg = msg, package = current_pkg)
59 | }
60 | 


--------------------------------------------------------------------------------
/R/zzz_conventions.R:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2017- Ioannis Kosmidis
 2 | 
 3 | #  This program is free software; you can redistribute it and/or modify
 4 | #  it under the terms of the GNU General Public License as published by
 5 | #  the Free Software Foundation; either version 2 or 3 of the License
 6 | #  (at your option).
 7 | #
 8 | #  This program is distributed in the hope that it will be useful,
 9 | #  but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | #  GNU General Public License for more details.
12 | #
13 | #  A copy of the GNU General Public License is available at
14 | #  http://www.r-project.org/Licenses/
15 | 
16 | ## Some convenience names
17 | 
18 | #' @rdname brglmControl
19 | #' @export
20 | brglm_control <- brglmControl
21 | 
22 | #' @rdname brglmFit
23 | #' @export
24 | brglm_fit <- brglmFit
25 | 
26 | #### Method conventions
27 | 
28 | #' @rdname residuals.brmultinom
29 | #' @method residuals bracl
30 | #' @export
31 | residuals.bracl <- residuals.brmultinom
32 | 
33 | 


--------------------------------------------------------------------------------
/data/aids.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ikosmidis/brglm2/60a8c7bb46f468f36bbfc7fe37e6cdb4805e5d85/data/aids.rda


--------------------------------------------------------------------------------
/data/alligators.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ikosmidis/brglm2/60a8c7bb46f468f36bbfc7fe37e6cdb4805e5d85/data/alligators.rda


--------------------------------------------------------------------------------
/data/coalition.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ikosmidis/brglm2/60a8c7bb46f468f36bbfc7fe37e6cdb4805e5d85/data/coalition.rda


--------------------------------------------------------------------------------
/data/endometrial.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ikosmidis/brglm2/60a8c7bb46f468f36bbfc7fe37e6cdb4805e5d85/data/endometrial.rda


--------------------------------------------------------------------------------
/data/enzymes.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ikosmidis/brglm2/60a8c7bb46f468f36bbfc7fe37e6cdb4805e5d85/data/enzymes.rda


--------------------------------------------------------------------------------
/data/hepatitis.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ikosmidis/brglm2/60a8c7bb46f468f36bbfc7fe37e6cdb4805e5d85/data/hepatitis.rda


--------------------------------------------------------------------------------
/data/lizards.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ikosmidis/brglm2/60a8c7bb46f468f36bbfc7fe37e6cdb4805e5d85/data/lizards.rda


--------------------------------------------------------------------------------
/data/stemcell.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ikosmidis/brglm2/60a8c7bb46f468f36bbfc7fe37e6cdb4805e5d85/data/stemcell.rda


--------------------------------------------------------------------------------
/inst/CITATION:
--------------------------------------------------------------------------------
 1 | year <- sub("-.*", "", meta$Date)
 2 | note <- sprintf("R package version %s", meta$Version)
 3 | 
 4 | citHeader("To cite package 'brglm2' in publications use at least one of the following, as appropriate. The finiteness and shrinkage properties of the reduced-bias estimator that 'brglm2' computes by default for logistic regression is in Kosmidis & Firth (2021).")
 5 | 
 6 | c(bibentry(bibtype = "Manual",
 7 |            title = "{brglm2}: Bias Reduction in Generalized Linear Models",
 8 |            author = c(person(given = "Ioannis", family = "Kosmidis")),
 9 |            year = year,
10 |            note = note,
11 |            url = "https://CRAN.R-project.org/package=brglm2"),
12 |   bibentry(bibtype = "article",
13 |            title = "Jeffreys-prior penalty, finiteness and shrinkage in binomial-response generalized linear models",
14 |            author = c(person(given = "Ioannis", family = "Kosmidis"),
15 |                       person(given = "David", family = "Firth")),
16 |            year = 2021,
17 |            journal = "Biometirka",
18 |            volume = "108",
19 |            pages = "71-82",
20 |            url = "https://doi.org/10.1093/biomet/asaa052"),
21 |   bibentry(bibtype = "article",
22 |            title = "Mean and median bias reduction in generalized linear models",
23 |            author = c(person(given = "Ioannis", family = "Kosmidis"),
24 |                       person(given = c("Euloge", "Clovis"), family = c("Kenne Pagui")),
25 |                       person(given = "Nicola", family = "Sartori")),
26 |            year = 2020,
27 |            journal = "Statistics and Computing",
28 |            volume = "30",
29 |            pages = "43-59",
30 |            url = "https://doi.org/10.1007/s11222-019-09860-6"))
31 | 


--------------------------------------------------------------------------------
/inst/WORDLIST:
--------------------------------------------------------------------------------
 1 | Agresti
 2 | Ames
 3 | Andreson
 4 | Biometrics
 5 | Biometrika
 6 | CMD
 7 | Caterina
 8 | Codecov
 9 | Cordeiro
10 | DOI
11 | Guo
12 | Heinze
13 | ICPSR
14 | Jeffreys
15 | Jeffreys’
16 | Jefreys
17 | Kateri
18 | Kenne
19 | Laver
20 | Licence
21 | Lyles
22 | McCullagh
23 | Nelder
24 | Neuhaus
25 | Pagui
26 | Palmgren
27 | Raphson
28 | Salvan
29 | Schemper
30 | TWD
31 | al
32 | arteria
33 | arxiv
34 | asaa
35 | asx
36 | behaviour
37 | biomet
38 | bracl
39 | brglm
40 | brglmFit
41 | brmultinom
42 | brnb
43 | bugreports
44 | ciep
45 | codebase
46 | coercible
47 | cran
48 | detectseparation
49 | df
50 | diag
51 | doi
52 | enrichwith
53 | equivariance
54 | et
55 | finiteness
56 | foodchoice
57 | frac
58 | fract
59 | github
60 | grahami
61 | http
62 | https
63 | infty
64 | iw
65 | jj
66 | ldots
67 | leftarrow
68 | lpSolveAPI
69 | misclassified
70 | mutagenicity
71 | nd
72 | neovasculation
73 | neovasculization
74 | numst
75 | opalinus
76 | overdispered
77 | pre
78 | priori
79 | pulsality
80 | quinoline
81 | resid
82 | revertant
83 | roxygen
84 | th
85 | tinytest
86 | traceback
87 | useR
88 | uterina
89 | vdots
90 | 


--------------------------------------------------------------------------------
/inst/brpr/brpr.R:
--------------------------------------------------------------------------------
  1 | ## Bias-reduced Poisson and multinomial log-linear models
  2 | ## using maximum penalized likelihood as in Firth (1993) Biometrika.
  3 | ##
  4 | ## Author: David Firth, d.firth@warwick.ac.uk, 2 Sep 2010
  5 | ##
  6 | ## NOT A POLISHED PIECE OF PUBLIC-USE SOFTWARE!  Provided "as is",
  7 | ## licensed under GPL2.  NO WARRANTY OF FITNESS FOR ANY PURPOSE!
  8 | ##
  9 | ## This code still has lots of rough edges, some of which are
 10 | ## indicated by the embedded comments.
 11 | ##
 12 | ## The intention is to include a more polished implementation as part
 13 | ## of the CRAN package "brglm", in due course.
 14 | 
 15 | ## Arguments are as for a Poisson log-linear glm,
 16 | ## with one new argument:
 17 | ## -- "fixed.totals", a factor indexing groups of observations
 18 | ##    whose response totals are fixed (for example, this might be
 19 | ##    the rows of a table).
 20 | ## If fixed.totals is NULL (the default), pure Poisson sampling (with
 21 | ## no totals fixed) is assumed.
 22 | 
 23 | brpr <- function(formula,
 24 |                   data,
 25 |                   subset,
 26 |                   na.action,
 27 |                   offset = NULL,
 28 |                   control = glm.control(...),
 29 |                   contrasts = NULL,
 30 |                   fixed.totals = NULL,
 31 |                   weights = NULL,
 32 |                   ...) {
 33 |     ## The "weights" and "offset" arguments here can only be NULL -- anything
 34 |     ## else is an error (for now, at least)
 35 |     if (!is.null(weights)) stop("The weights argument here can only be NULL")
 36 |     if (!is.null(offset)) stop("The offset argument here can only be NULL")
 37 |     ##  work needed here, to allow the use of offset as an argument?
 38 | 
 39 |     ## Initial setup as in glm (except that `prior weights' are not used here,
 40 |     ## and an offset (if any) must be specified through the formula rather
 41 |     ## than (as is also allowed by glm) as a separate argument:
 42 |     call <- match.call()
 43 |     theFormula <- formula
 44 |     if (missing(data))
 45 |         data <- environment(formula)
 46 |     if (is.null(call$epsilon)) control$epsilon <- 1e-8
 47 |     ##  Is 1e-8 too stringent?  Work needed here!
 48 |     fixed.totals <- eval(substitute(fixed.totals), envir = data)
 49 |     mf <- match.call(expand.dots = FALSE)
 50 |     m <- match(c("formula", "data", "subset", "na.action",
 51 |                  "offset", "fixed.totals"), names(mf), 0)
 52 |     mf <- mf[c(1, m)]
 53 |     mf$drop.unused.levels <- TRUE
 54 |     mf[[1]] <- as.name("model.frame")
 55 |     mf <- eval(mf, parent.frame())
 56 |     mt <- attr(mf, "terms")
 57 |     Y <- model.response(mf, "numeric")
 58 | 
 59 |     ## Correct the fixed.totals variable for subsetting, NA treatment, etc.
 60 |     if (!is.null(fixed.totals)) {
 61 |         fixed.totals <-  mf$"(fixed.totals)"
 62 |         if (!is.factor(fixed.totals)) stop("fixed.totals must be a factor")
 63 |         rowTotals <-  as.vector(tapply(Y, fixed.totals, sum))[fixed.totals]
 64 |     }
 65 | 
 66 |     ## Do an initial fit of the model, with constant-adjusted counts
 67 |     adj <- 0.5
 68 |     X <- model.matrix(formula, data = mf)
 69 |     offset <- model.offset(mf)
 70 |     nFixed <- if (is.null(fixed.totals)) 0 else nlevels(fixed.totals)
 71 |     mf$y.adj <- Y + adj * (ncol(X) - nFixed)/nrow(X)
 72 |     formula <- update(formula, y.adj ~ .)
 73 |     op <- options(warn = -1)
 74 |     fit <- glm.fit(X, mf$y.adj, family = poisson(), offset = offset,
 75 |                    control = glm.control(maxit = 1))
 76 |     options(op)
 77 |     ## Update the model iteratively, refining the adjustment at each iteration
 78 |     criterion <- 1
 79 |     iter <- 1
 80 |     coefs <- coef(fit)
 81 |     Xwork <- X[ , !is.na(coefs), drop = FALSE]
 82 |     muAdj <- fitted(fit)
 83 |     coefs <- na.omit(coefs)
 84 |     while (criterion > control$epsilon && iter < control$maxit) {
 85 |         iter <- iter + 1
 86 |         if (!is.null(fixed.totals)){
 87 |             muTotals <-  as.vector(tapply(muAdj, fixed.totals,
 88 |                                           sum))[fixed.totals]
 89 |             mu <- muAdj * rowTotals / muTotals
 90 |         } else { ## case fixed.totals is NULL
 91 |             mu <- muAdj
 92 |         }
 93 |         W.X <- sqrt(mu) * Xwork
 94 |         XWXinv <- chol2inv(chol(crossprod(W.X)))
 95 |         coef.se <- sqrt(diag(XWXinv))
 96 |         h <- diag(Xwork %*% XWXinv %*% t(mu * Xwork))
 97 |         mf$y.adj <- Y + h * adj
 98 |         z <- log(muAdj) + (mf$y.adj - muAdj)/muAdj
 99 |         lsfit <- lm.wfit(Xwork, z, muAdj, offset = offset)
100 |         criterion <- max((abs(lsfit$coefficients - coefs))/coef.se)
101 |         if (control$trace) cat("Iteration ", iter,
102 |                                ": largest (scaled) coefficient change is ",
103 |                                criterion, "\n", sep = "")
104 |         coefs <- lsfit$coefficients
105 |         muAdj <- exp(lsfit$fitted.values)
106 |     }
107 | 
108 |     ## The object `lsfit' uses *adjusted* counts, fitted values, etc. --
109 |     ## so we need to correct various things before returning (so that, for example,
110 |     ## deviance and standard errors are correct).
111 |     fit$coefficients[!is.na(fit$coefficients)] <- coefs
112 |     fit$y <- Y
113 |     fit$fitted.values <- fit$weights <- mu
114 |     fit$linear.predictors <- log(mu)
115 |     dev.resids <- poisson()$dev.resids
116 |     wt <- fit$prior.weights
117 |     fit$deviance <- sum(dev.resids(Y, mu, wt))
118 |     fit$null.deviance <- sum(dev.resids(Y, mean(Y), wt))
119 |     fit$aic <- NA # sum((poisson()$aic)(Y, 1, mu, wt, 0)) ??  work needed!
120 |     fit$residuals <- Y/mu - 1
121 | 
122 |     ## Next bit is straight from glm -- not sure it applies here (work needed!)
123 |     if (any(offset) && attr(mt, "intercept") > 0) {
124 |         fit$null.deviance <- glm.fit(x = X[, "(Intercept)", drop = FALSE],
125 |                                      y = Y, weights = rep(1, nrow(mf)),
126 |                                      offset = offset, family = poisson(),
127 |                                      intercept = TRUE)$deviance
128 |     }
129 | 
130 |     fit$iter <- iter
131 |     fit$model <- mf
132 |     fit$na.action <- attr(mf, "na.action")
133 |     fit$x <- X
134 |     fit$qr <- qr(sqrt(mu) * X)
135 |     fit$R <- qr.R(fit$qr, complete = TRUE)
136 |     ## The "effects" component of the fit object is almost certainly not
137 |     ## correct as is -- but where does it actually get used?  (work needed!)
138 |     ##  rownames(fit$R) <- colnames(fit$R)  ##  FIX THIS!
139 |     fit <- c(fit, list(call = call, formula = theFormula, terms = mt,
140 |                        data = data, offset = offset, method = "brpr",
141 |                        contrasts = attr(X, "contrasts"),
142 |                        xlevels = .getXlevels(mt, mf)))
143 |     class(fit) <- c("glm", "lm")
144 |     return(fit)
145 | }
146 | 


--------------------------------------------------------------------------------
/inst/brpr/examples.R:
--------------------------------------------------------------------------------
  1 | ##  Some examples of the use of "brpr" to fit a multinomial logit
  2 | ##  model by penalized (reduced-bias) maximum likelihood.
  3 | ##
  4 | ##  Examples prepared by I. Kosmidis, University College London
  5 | 
  6 | source(system.file("inst", "brpr/brpr.R", package = "brglm2"))
  7 | 
  8 | library(pmlr)
  9 | library(MASS) # for data(housing)
 10 | library(brglm) # for the penalizedDeviance profileModel objective
 11 | library(gnm) # for pickCoef and expandCategorical used in coefinds and expandMult
 12 | 
 13 | ## coefinds takes as input the glm object that brpr results and
 14 | ## returns the location of the coefficients for the fitted multinomial
 15 | ## model.
 16 | coefinds <- function(obj) {
 17 |   require(gnm)
 18 |   cats <- as.character(obj$call$formula[3])
 19 |   cats <- strsplit(gsub(" ", "", cats), "\\*\\(")[[1]][1]
 20 |   cats <- rev(strsplit(cats, "\\+")[[1]])[1]
 21 |   cc <- pickCoef(obj, cats)
 22 |   cc
 23 | }
 24 | 
 25 | ## expandMult will epxand the multinomial data set so that all
 26 | ## available categories are represented in each covariate class. If
 27 | ## there are non-available categories in the original data set they
 28 | ## are included in the resulted data frame and are assigned frequency
 29 | ## zero.
 30 | expandMult <- function(data, catvar, countsvar) {
 31 |   require(gnm)
 32 |   temp <- expandCategorical(data = data, catvar = catvar,
 33 |                             sep = ".", countvar = "CcCounts",
 34 |                             idvar = "id", as.ordered = FALSE,
 35 |                             group = TRUE)
 36 |   ## Get the correct category counts
 37 |   temp[[countsvar]] <- temp[[countsvar]] * temp$CcCounts
 38 |   temp$CcCounts <-   temp$id <- NULL
 39 |   temp
 40 | }
 41 | 
 42 | 
 43 | #####################################################################
 44 | ## Analysis of the Coppenhagen housing data set in the MASS library
 45 | #####################################################################
 46 | data(housing)
 47 | contrasts(housing$Sat) <- contr.treatment(3, base = 1)
 48 | 
 49 | housbrpr <- brpr(Freq ~ Infl * Type * Cont +
 50 |                   Sat * (Infl + Type + Cont), data = housing,
 51 |                   fixed.totals = Infl:Type:Cont)
 52 | houspmlr <- pmlr(Sat ~ Infl + Type + Cont, weights = Freq,
 53 |                  data = housing, method = "wald")
 54 | 
 55 | ## The estimates are the same at least up to 5 decimals
 56 | coef(housbrpr)[coefinds(housbrpr)]
 57 | houspmlr$coefficients
 58 | 
 59 | 
 60 | #####################################################################
 61 | ## Analysis of the hepatitis data set in Bull et. al. (2007)
 62 | #####################################################################
 63 | data(hepatitis)
 64 | ## Construct a variable with the multinomial categories according to
 65 | ## the HCV and nonABC columns
 66 | hepat <- hepatitis
 67 | hepat$type <- with(hepat, factor(1 - HCV*nonABC + HCV + 2 * nonABC))
 68 | hepat$type <- factor(hepat$type, labels = c("noDisease", "C", "nonABC"))
 69 | contrasts(hepat$type) <- contr.treatment(3, base = 1)
 70 | ## compare the result with the one that pmlr gives
 71 | hepatnew <- expandMult(data = hepat, catvar = "type", countsvar = "counts")
 72 | heppmlr <- pmlr(type ~ group + time + group:time,
 73 |                 data = hepatnew, weights = counts, method = "wald",
 74 |                 penalized = TRUE)
 75 | hepbrpr <- brpr(counts ~ group*time + type*(group*time),
 76 |                  fixed.totals = group:time, data = hepatnew,
 77 |                  epsilon = 1e-14) ## very strict epsilon
 78 | 
 79 | ## The estimates are the same at least up to 5 decimals
 80 | coef(hepbrpr)[coefinds(hepbrpr)]
 81 | heppmlr$coefficients
 82 | 
 83 | ## Speed comparison (it takes a while to run the next 2 lines!)
 84 | ## Shows that brpr with epsilon 1e-14 is roughly 5 times faster
 85 | ## than pmlr for this data set.
 86 | system.time(for (i in 1:100) {
 87 |   pmlr(type ~ group + time + group:time,
 88 |        data = hepatnew, weights = counts, method = "wald",
 89 |        penalized = TRUE) })
 90 | system.time(for (i in 1:100) {
 91 |   brpr(counts ~ group*time + type*(group*time),
 92 |         fixed.totals = group:time, data = hepatnew,
 93 |         epsilon = 1e-14) })
 94 | 
 95 | ## Profile confidence intervals based on the penalized likelihood
 96 | hepbrprCIs <- confintModel(hepbrpr, objective = "penalizedDeviance",
 97 |                            quantile = qchisq(0.95, 1),
 98 |                            X = model.matrix(hepbrpr),
 99 |                            which = coefinds(hepbrpr),
100 |                            method = "zoom",
101 |                            endpoint.tolerance = 1e-04)
102 | heppmlr <- pmlr(type ~ group + time + group:time,
103 |                 data = hepatnew, weights = counts,
104 |                 method = "likelihood", penalized = TRUE)
105 | nn <- dimnames(heppmlr$coefficients)
106 | coefnames <- c(paste(nn[[2]], nn[[3]][1], sep = ":"),
107 |                paste(nn[[2]], nn[[3]][2], sep = ":"))
108 | heppmlrCIs <- cbind(heppmlr$CI.lower, heppmlr$CI.upper)
109 | rownames(heppmlrCIs) <- coefnames
110 | 
111 | # The confidence intervals from pmlr and profileModel seem to agree
112 | # only with slight differences. But as far as those differences are
113 | # concerned, I trust confintModel's result; those confidence intervals
114 | # are obtained through binary search.
115 | heppmlrCIs
116 | hepbrprCIs
117 | 
118 | 
119 | #####################################################################
120 | ## Analysis of the enzymes data set in ?pmlr
121 | #####################################################################
122 | data(enzymes)
123 | ## Exclude patients in Group 4 (post-necrotic cirrhosis)
124 | enzymes <- enzymes[enzymes$Group != 4,]
125 | ## Center and scale covariates
126 | AST <- scale(log(enzymes$AST))
127 | ALT <- scale(log(enzymes$ALT))
128 | GLDH <- scale(log(enzymes$GLDH))
129 | OCT <- scale(log(enzymes$OCT))
130 | enzymes <- data.frame(Patient = enzymes$Patient,
131 |                       Group = enzymes$Group, AST, ALT, GLDH, OCT)
132 | ## Remove 10 observations to create separation
133 | enzymes <- enzymes[-c(9, 18, 33, 58, 61, 77, 94, 97, 99, 100),]
134 | ## Multinomial: acute viral hepatitis and aggressive chronic hepatits
135 | ## vs. persistent chronic hepatitis
136 | ## Assign Group 2 (persistent chronic hepatitis) as baseline category
137 | enzymes$Group <- factor(enzymes$Group, levels=c("2","1","3"))
138 | ## Re-express data set in an appropriate form for brpr
139 | enzymes$counts <- rep(1, nrow(enzymes))
140 | enzymes$ind <- factor(1:131)
141 | enz <- expandMult(enzymes, "Group", "counts")
142 | enzpmlr <- pmlr(Group ~ AST + GLDH, weights = counts,
143 |                 data = enz, method = "wald")
144 | enzbrpr <- brpr(counts ~ -1 + ind + Group * (AST + GLDH),
145 |                  fixed.totals = ind, data = enz,
146 |                  epsilon = 1e-14)
147 | ## brpr appears slower than pmlr in this case, most probably because
148 | ## of the large number of nuisances.
149 | 
150 | ## The estimates are the same at least up to 5 decimals
151 | coef(enzbrpr)[coefinds(enzbrpr)]
152 | enzpmlr$coefficients
153 | 


--------------------------------------------------------------------------------
/inst/brpr/timings.R:
--------------------------------------------------------------------------------
  1 | ##  Some examples of the use of "brpr" to fit a multinomial logit
  2 | ##  model by penalized (reduced-bias) maximum likelihood.
  3 | ##
  4 | ##  Examples prepared by I. Kosmidis, University College London
  5 | 
  6 | source(system.file("inst", "brpr/brpr.R", package = "brglm2"))
  7 | 
  8 | library("pmlr")
  9 | library("MASS") # for data(housing)
 10 | library("gnm") # for pickCoef and expandCategorical used in coefinds and expandMult
 11 | 
 12 | ## coefinds takes as input the glm object that brpr results and
 13 | ## returns the location of the coefficients for the fitted multinomial
 14 | ## model.
 15 | coefinds <- function(obj) {
 16 |   require(gnm)
 17 |   cats <- as.character(obj$call$formula[3])
 18 |   cats <- strsplit(gsub(" ", "", cats), "\\*\\(")[[1]][1]
 19 |   cats <- rev(strsplit(cats, "\\+")[[1]])[1]
 20 |   cc <- pickCoef(obj, cats)
 21 |   cc
 22 | }
 23 | 
 24 | ## expandMult will epxand the multinomial data set so that all
 25 | ## available categories are represented in each covariate class. If
 26 | ## there are non-available categories in the original data set they
 27 | ## are included in the resulted data frame and are assigned frequency
 28 | ## zero.
 29 | expandMult <- function(data, catvar, countsvar) {
 30 |   require(gnm)
 31 |   temp <- expandCategorical(data = data, catvar = catvar,
 32 |                             sep = ".", countvar = "CcCounts",
 33 |                             idvar = "id", as.ordered = FALSE,
 34 |                             group = TRUE)
 35 |   ## Get the correct category counts
 36 |   temp[[countsvar]] <- temp[[countsvar]] * temp$CcCounts
 37 |   temp$CcCounts <-   temp$id <- NULL
 38 |   temp
 39 | }
 40 | 
 41 | timing <- function(..., R = 10) {
 42 |     system.time(replicate(R, ...))
 43 | }
 44 | 
 45 | #####################################################################
 46 | ## Analysis of the Coppenhagen housing data set in the MASS library
 47 | #####################################################################
 48 | data("housing", package = "MASS")
 49 | contrasts(housing$Sat) <- contr.treatment(3, base = 1)
 50 | 
 51 | timing(housbrpr <- brpr(Freq ~ Infl * Type * Cont +
 52 |                             Sat * (Infl + Type + Cont), data = housing,
 53 |                         fixed.totals = Infl:Type:Cont, epsilon = 1e-06), R = 50)
 54 | timing(houspmlr <- pmlr(Sat ~ Infl + Type + Cont, weights = Freq,
 55 |                         data = housing, method = "wald"), R = 50)
 56 | timing(housbrmultinom <- brmultinom(Sat ~ Infl + Type + Cont, weights = Freq,
 57 |                                     data = housing, epsilon = 1e-06), R = 50)
 58 | 
 59 | #####################################################################
 60 | ## Analysis of the hepatitis data set in Bull et. al. (2007)
 61 | #####################################################################
 62 | data("hepatitis", package = "pmlr")
 63 | ## Construct a variable with the multinomial categories according to
 64 | ## the HCV and nonABC columns
 65 | hepat <- hepatitis
 66 | hepat$type <- with(hepat, factor(1 - HCV*nonABC + HCV + 2 * nonABC))
 67 | hepat$type <- factor(hepat$type, labels = c("noDisease", "C", "nonABC"))
 68 | contrasts(hepat$type) <- contr.treatment(3, base = 1)
 69 | ## compare the result with the one that pmlr gives
 70 | hepatnew <- expandMult(data = hepat, catvar = "type", countsvar = "counts")
 71 | 
 72 | timing(heppmlr <- pmlr(type ~ group + time + group:time,
 73 |                        data = hepatnew, weights = counts, method = "wald",
 74 |                        penalized = TRUE), R = 50)
 75 | timing(hepbrmultinom <- brmultinom(type ~ group + time + group:time,
 76 |                                    data = hepatnew, weights = counts,
 77 |                                    epsilon = 1e-06), R = 50)
 78 | timing(hepbrpr <- brpr(counts ~ group*time + type*(group*time),
 79 |                        fixed.totals = group:time, data = hepatnew,
 80 |                        epsilon = 1e-06), R = 50)
 81 | 
 82 | #####################################################################
 83 | ## Analysis of the enzymes data set in ?pmlr
 84 | #####################################################################
 85 | data("enzymes", package = "pmlr")
 86 | ## Exclude patients in Group 4 (post-necrotic cirrhosis)
 87 | enzymes <- enzymes[enzymes$Group != 4,]
 88 | ## Center and scale covariates
 89 | AST <- scale(log(enzymes$AST))
 90 | ALT <- scale(log(enzymes$ALT))
 91 | GLDH <- scale(log(enzymes$GLDH))
 92 | OCT <- scale(log(enzymes$OCT))
 93 | enzymes <- data.frame(Patient = enzymes$Patient,
 94 |                       Group = enzymes$Group, AST, ALT, GLDH, OCT)
 95 | ## Remove 10 observations to create separation
 96 | enzymes <- enzymes[-c(9, 18, 33, 58, 61, 77, 94, 97, 99, 100),]
 97 | ## Multinomial: acute viral hepatitis and aggressive chronic hepatits
 98 | ## vs. persistent chronic hepatitis
 99 | ## Assign Group 2 (persistent chronic hepatitis) as baseline category
100 | enzymes$Group <- factor(enzymes$Group, levels=c("2","1","3"))
101 | ## Re-express data set in an appropriate form for brpr
102 | enzymes$counts <- rep(1, nrow(enzymes))
103 | enzymes$ind <- factor(1:131)
104 | enz <- expandMult(enzymes, "Group", "counts")
105 | timing(enzpmlr <- pmlr(Group ~ AST + GLDH, weights = counts,
106 |                        data = enz, method = "wald"), R = 10)
107 | timing(enzbrpr <- brpr(counts ~ -1 + ind + Group * (AST + GLDH),
108 |                        fixed.totals = ind, data = enz,
109 |                        epsilon = 1e-14), R = 10)
110 | timing(enzbrmultinom <- brmultinom(Group ~ AST + GLDH, weights = counts,
111 |                                    data = enz,
112 |                                    epsilon = 1e-14), R = 10)
113 | ## max(abs(matrix(enzbrpr$fitted.values, ncol = 3, byrow = TRUE) - enzbrmultinom$fitted.values))
114 | 
115 | 
116 | timing <- function(..., R = 10) {
117 |     system.time(replicate(R, ...))
118 | }
119 | 


--------------------------------------------------------------------------------
/inst/glmbias_code+results/alligator_simulation_results.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ikosmidis/brglm2/60a8c7bb46f468f36bbfc7fe37e6cdb4805e5d85/inst/glmbias_code+results/alligator_simulation_results.rda


--------------------------------------------------------------------------------
/inst/glmbias_code+results/birth_weight_simulation_results.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ikosmidis/brglm2/60a8c7bb46f468f36bbfc7fe37e6cdb4805e5d85/inst/glmbias_code+results/birth_weight_simulation_results.rda


--------------------------------------------------------------------------------
/inst/glmbias_code+results/clotting_simulation_results.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ikosmidis/brglm2/60a8c7bb46f468f36bbfc7fe37e6cdb4805e5d85/inst/glmbias_code+results/clotting_simulation_results.rda


--------------------------------------------------------------------------------
/inst/glmbias_code+results/glmbias_gamma_simulation.R:
--------------------------------------------------------------------------------
 1 | ## simulation study: clotting dataset
 2 | 
 3 | library("brglm2")
 4 | source("glmbias_simulation_functions.R")
 5 | 
 6 | clotting <- data.frame(
 7 |     u = c(5,10,15,20,30,40,60,80,100, 5,10,15,20,30,40,60,80,100),
 8 |     conc = c(118,58,42,35,27,25,21,19,18,69,35,26,21,18,16,13,12,12),
 9 |     lot = factor(c(rep(1, 9), rep(2, 9))))
10 | 
11 | clot_ML <- glm(conc ~ lot*log(u), data = clotting, family = Gamma(link="log"),
12 |                method = "brglmFit", type = "ML", maxit=1000, epsilon = 1e-8)
13 | 
14 | truepar <- c(coef(clot_ML),clot_ML$dispersion)
15 | clotting_simulation_results <- sim_clot_log(10000, clotting, truepar, 123)
16 | clotting_simulation_results <- summarySim(clotting_simulation_results)
17 | 
18 | bias <- clotting_simulation_results$BIAS
19 | sd <- clotting_simulation_results$SD
20 | rib <- clotting_simulation_results$RIB
21 | pu <- clotting_simulation_results$PU
22 | mae <- clotting_simulation_results$MAE
23 | cov <- clotting_simulation_results$COV_b
24 | mleg <-  cbind(bias[1, ], sd[1, ], rib[1, ], pu[1, ], mae[1, ], cov[1, ])
25 | mle <-  cbind(bias[2, ], sd[2, ], rib[2, ], pu[2, ], mae[2, ], cov[2, ])
26 | meanBR <- cbind(bias[4, ], sd[4, ], rib[4, ], pu[4, ], mae[4, ], cov[4, ])
27 | medianBR <- cbind(bias[5, ], sd[5, ], rib[5, ], pu[5, ], mae[5, ], cov[5, ])
28 | meanmixed <- cbind(bias[6, ], sd[6, ], rib[6, ], pu[6, ], mae[6, ], cov[6, ])
29 | 
30 | save(mle, meanBR, medianBR, meanmixed, file = "clotting_simulation_results.rda")
31 | 


--------------------------------------------------------------------------------
/inst/glmbias_code+results/glmbias_logistic_simulation.R:
--------------------------------------------------------------------------------
 1 | ## simulation study: infant birth weights
 2 | 
 3 | library("MASS")
 4 | library("brglm2")
 5 | 
 6 | ## Prepare the birth weight data set
 7 | bwt <- with(birthwt, {
 8 |     age <- age
 9 |     racewhite <- ifelse(race==1,1,0)
10 |     smoke <- smoke
11 |     ptl <- ifelse(ptl>0,1,0)
12 |     ptd <- factor(ptl > 0)
13 |     ht <- ht
14 |     loglwt <- log(lwt)
15 |     data.frame(normwt = 1-low, age, racewhite, smoke, ptl,ht,loglwt,ftv)
16 | })
17 | bwt <- subset(bwt, subset = (ftv==0), select= -c(ftv))
18 | 
19 | ## true models
20 | bwt_ml <- glm(normwt ~ ., family = binomial, data = bwt)
21 | beta <- coef(bwt_ml)
22 | 
23 | ## random seed
24 | set.seed(435)
25 | 
26 | ## simulated response variable
27 | Nsim <- 10000
28 | simdata <- simulate(bwt_ml, nsim = Nsim)
29 | 
30 | ml <- br <- bc <- mbr <- ml_se <- br_se <- bc_se <- mbr_se <- separation <-
31 |     matrix(NA, nrow = Nsim, ncol = length(beta))
32 | 
33 | for (i in 1:Nsim) {
34 |     current_data <- within(bwt, { normwt <- simdata[[i]] })
35 |     if (i%%100 == 0) print(i)
36 |     ml_fit <- update(bwt_ml, data = current_data)
37 |     br_fit <- update(bwt_ml, method = "brglmFit", type = "AS_mean", data = current_data)
38 |     bc_fit <- update(bwt_ml, method = "brglmFit", type = "correction", data = current_data)
39 |     mbr_fit <- update(bwt_ml, method = "brglmFit", type = "AS_median", data = current_data)
40 |     sep_fit <- update(ml_fit, method = "detect_separation")
41 | 
42 |     sum_ml <- summary(ml_fit)
43 |     sum_brmean <- summary(br_fit)
44 |     sum_bcorr <- summary(bc_fit)
45 |     sum_brmedian <- summary(mbr_fit)
46 | 
47 |     ml[i, ] <- sum_ml$coef[, 1]
48 |     ml_se[i, ] <- sum_ml$coef[, 2]
49 |     br[i, ] <- sum_brmean$coef[, 1]
50 |     br_se[i, ] <- sum_brmean$coef[, 2]
51 |     bc[i, ] <- sum_bcorr$coef[, 1]
52 |     bc_se[i, ] <- sum_bcorr$coef[, 2]
53 |     mbr[i, ] <- sum_brmedian$coef[, 1]
54 |     mbr_se[i, ] <- sum_brmedian$coef[, 2]
55 |     separation[i, ] <- sep_fit$betas
56 | }
57 | 
58 | ml.inc <- apply(separation, 1, function(b) all(b == 0))
59 | 
60 | ## bias in beta parameterization
61 | bias.beta <- data.frame(ml = colMeans(ml[ml.inc, ]),
62 |                         br = colMeans(br),
63 |                         mbr = colMeans(mbr)) - beta
64 | ## bias in psi parameterization
65 | bias.psi <- data.frame(ml = colMeans(exp(ml[ml.inc, ])),
66 |                        br = colMeans(exp(br)),
67 |                        mbr = colMeans(exp(mbr))) - exp(beta)
68 | ## Coverage of 95% Wald confidence intervals
69 | ml.ci.l <- ml[ml.inc, ] - qnorm(0.975) * ml_se[ml.inc, ]
70 | ml.ci.u <- ml[ml.inc, ] + qnorm(0.975) * ml_se[ml.inc, ]
71 | br.ci.l <- br - qnorm(0.975) * br_se
72 | br.ci.u <- br + qnorm(0.975) * br_se
73 | mbr.ci.l <- mbr - qnorm(0.975) * mbr_se
74 | mbr.ci.u <- mbr + qnorm(0.975) * mbr_se
75 | coverage <- data.frame(ml = rowMeans(t(ml.ci.l) < beta &  t(ml.ci.u) > beta),
76 |                        br = rowMeans(t(br.ci.l) < beta &  t(br.ci.u) > beta),
77 |                        mbr = rowMeans(t(mbr.ci.l) < beta &  t(mbr.ci.u) > beta))
78 | ## probability of underestimation
79 | PU <- data.frame(ml = rowMeans(t(ml[ml.inc, ]) < beta),
80 |                  br = rowMeans(t(br) < beta),
81 |                  mbr = rowMeans(t(mbr) < beta))
82 | 
83 | save(bias.beta, bias.psi, PU, coverage, file = "birth_weight_simulation_results.rda")
84 | 


--------------------------------------------------------------------------------
/inst/glmbias_code+results/glmbias_multinomial_simulation.R:
--------------------------------------------------------------------------------
  1 | ######################
  2 | ## Simulation study ##
  3 | ######################
  4 | 
  5 | library("plyr")
  6 | library("doMC")
  7 | library("brglm2")
  8 | library("nnet")
  9 | 
 10 | ## Use 18 cores for the simulation
 11 | registerDoMC(18)
 12 | 
 13 | all_ml <- brmultinom(foodchoice ~ size + lake , weights = freq,
 14 |                        data = alligators,
 15 |                        ref = 1,
 16 |                        type = "ML")
 17 | true_coefs <- coef(all_ml)
 18 | 
 19 | agresti_contrasts <- list(lake = contr.treatment(levels(alligators$lake),
 20 |                           base = 4),
 21 |                           size = contr.treatment(levels(alligators$size),
 22 |                           base = 2))
 23 | 
 24 | ## Contrasts matrix
 25 | mat <- cbind(c(1, 1, 1, 0, 0),
 26 |              c(0, -1, 0, 0, 0),
 27 |              c(0, 0, -1, 1, 0),
 28 |              c(0, 0, -1, 0, 1),
 29 |              c(0, 0, -1, 0, 0))
 30 | 
 31 | ## coef(all_ml_agresti) %*% mat - true_coefs
 32 | 
 33 | 
 34 | ## Model specific and not at all efficient!
 35 | simulate_alligators <- function(fitted, total_factor = 1) {
 36 |     nams <- apply(alligators[c("lake", "gender", "size")], 1,
 37 |     paste0, collapse = "|")
 38 |     totals <- round(total_factor * tapply(alligators$freq, nams, sum))
 39 |     ## this is ordered
 40 |     oo <- order(nams)
 41 |     al <- alligators[oo, ]
 42 |     fitted <- fitted[oo, ]
 43 |     inds <- which(!duplicated(al[, c("lake", "gender", "size")]))
 44 |     covariate_settings <- al[inds, c("lake", "gender", "size")]
 45 |     freq <- sapply(seq.int(totals), function(j) rmultinom(1, totals[j],
 46 |                    fitted[inds[j], ]))
 47 |     out <- data.frame(foodchoice = colnames(fitted),
 48 |            covariate_settings[rep(rownames(covariate_settings),
 49 |            each = nrow(freq)),] ,
 50 |     freq = c(freq))
 51 |     out$foodchoice <- factor(out$foodchoice,
 52 |                              levels = levels(alligators$foodchoice))
 53 |     out
 54 | }
 55 | 
 56 | nsimu <- 10000
 57 | factors <- c(0.5, 0.75, 1, 1.25, 1.5, 1.75, 2, 3, 4, 5)
 58 | 
 59 | res <- as.list(numeric(length(factors)))
 60 | 
 61 | for (k in seq_along(factors)) {
 62 |     set.seed(123)
 63 |     seeds <- sample(seq.int(nsimu*1000), nsimu, replace = FALSE)
 64 |     datasets <- llply(seeds, function(seed) {
 65 |         set.seed(seed)
 66 |         dat <- simulate_alligators(fitted(all_ml),
 67 |         total_factor = factors[k])
 68 |     }, .parallel = TRUE)
 69 |     res[[k]] <- llply(seq_along(datasets), function(j) {
 70 |         current_data <- datasets[[j]]
 71 |         ml_fit <- multinom(foodchoice ~ size + lake , weights = freq,
 72 |                            data = current_data, trace = FALSE)
 73 |         ml_c <- coef(ml_fit)
 74 |         ml_se <- summary(ml_fit)$standard.errors
 75 |         br_mean_fit <- brmultinom(foodchoice ~ size + lake ,
 76 |                                   weights = freq,
 77 |                                   data = current_data, ref = "Fish",
 78 |                                   trace = FALSE,
 79 |                                   type = "AS_mean")
 80 |         br_mean_c <- coef(br_mean_fit)
 81 |         br_mean_se <- summary(br_mean_fit)$standard.errors
 82 |         br_median_fit <- brmultinom(foodchoice ~ size + lake ,
 83 |                                     weights = freq,
 84 |                                     data = current_data, ref = "Fish",
 85 |                                     trace = FALSE,
 86 |                                     type = "AS_median")
 87 |         br_median_c <- coef(br_median_fit)
 88 |         br_median_se <- summary(br_median_fit)$standard.errors
 89 |         br_median_gamma_fit <- brmultinom(foodchoice ~ size + lake ,
 90 |                                           weights = freq,
 91 |                                           data = current_data,
 92 |                                           ref = "Fish",
 93 |                                           contrasts = agresti_contrasts,
 94 |                                           trace = FALSE,
 95 |                                           type = "AS_median")
 96 |         br_median_gamma_c <- coef(br_median_gamma_fit) %*% mat
 97 |         colnames(br_median_gamma_c) <- colnames(ml_c)
 98 |         if (j %% 10 == 0) cat(factors[k], "|", j, "\n")
 99 |         list(ml = ml_c,
100 |              br_mean = br_mean_c,
101 |              br_median = br_median_c,
102 |              br_median_gamma = br_median_gamma_c,
103 |              ml_se = ml_se, br_mean_se = br_mean_se,
104 |              br_median_se = br_median_se)
105 |     }, .parallel = TRUE)
106 | }
107 | 
108 | ml_coefs <- lapply(res, function(x) lapply(x, "[[", "ml"))
109 | ml_ses <- lapply(res, function(x) lapply(x, "[[", "ml_se"))
110 | ml_infinite <- lapply(ml_ses, function(x)
111 |                       sapply(x, function(y) any(is.na(y)) | any(y > 100,
112 |                       na.rm = TRUE)))
113 | ml_prob_separation <- sapply(ml_infinite, mean)
114 | ml_bias <- lapply(ml_coefs, function(x) 100*(Reduce("+", x)/nsimu -
115 |                   true_coefs)/true_coefs)
116 | ml_pu <- lapply(ml_coefs, function(x) 100*Reduce("+", lapply(x, "<",
117 |                 true_coefs))/nsimu)
118 | ## Mean bias reduction
119 | br_mean_coefs <- lapply(res, function(x) lapply(x, "[[", "br_mean"))
120 | br_mean_bias <- lapply(br_mean_coefs, function(x) 100*(Reduce("+", x)/
121 |                        nsimu - true_coefs)/true_coefs)
122 | br_mean_pu <- lapply(br_mean_coefs, function(x) 100*Reduce("+",
123 |                      lapply(x, "<", true_coefs))/nsimu)
124 | ## Median bias reduction
125 | br_median_coefs <- lapply(res, function(x)
126 |                           lapply(x, "[[", "br_median"))
127 | br_median_bias <- lapply(br_median_coefs,
128 |                          function(x) 100*(Reduce("+", x)/
129 |                          nsimu - true_coefs)/true_coefs)
130 | br_median_pu <- lapply(br_median_coefs, function(x) 100*Reduce("+",
131 |                        lapply(x, "<", true_coefs))/nsimu)
132 | ## Median bias reduction
133 | br_median_gamma_coefs <- lapply(res, function(x)
134 |                                 lapply(x, "[[", "br_median_gamma"))
135 | br_median_gamma_bias <- lapply(br_median_gamma_coefs,
136 |                                function(x) 100*(Reduce("+", x)/nsimu -
137 |                                 true_coefs)/true_coefs)
138 | br_median_gamma_pu <- lapply(br_median_gamma_coefs,
139 |                              function(x) 100*Reduce("+",
140 |                              lapply(x, "<", true_coefs))/nsimu)
141 | ## Prepare for plotting
142 | ml_bias_matrix <- do.call("rbind", ml_bias)
143 | categories <- rownames(ml_bias_matrix)
144 | ml_results <- stack(as.data.frame(ml_bias_matrix))
145 | ml_results$category <- categories
146 | ml_results$factor <- rep(factors, each = 4)
147 | ml_results$method <- "ml"
148 | ml_results$pu <- stack(as.data.frame(do.call("rbind", ml_pu)))$values
149 | br_mean_bias_matrix <- do.call("rbind", br_mean_bias)
150 | categories <- rownames(br_mean_bias_matrix)
151 | br_mean_results <- stack(as.data.frame(br_mean_bias_matrix))
152 | br_mean_results$category <- categories
153 | br_mean_results$factor <- rep(factors, each = 4)
154 | br_mean_results$method <- "mean BR"
155 | br_mean_results$pu <- stack(as.data.frame(do.call("rbind", br_mean_pu)))$values
156 | br_median_bias_matrix <- do.call("rbind", br_median_bias)
157 | categories <- rownames(br_median_bias_matrix)
158 | br_median_results <- stack(as.data.frame(br_median_bias_matrix))
159 | br_median_results$category <- categories
160 | br_median_results$factor <- rep(factors, each = 4)
161 | br_median_results$method <- "median BR"
162 | br_median_results$pu <- stack(as.data.frame(do.call("rbind", br_median_pu)))$values
163 | br_median_gamma_bias_matrix <- do.call("rbind", br_median_gamma_bias)
164 | categories <- rownames(br_median_gamma_bias_matrix)
165 | br_median_gamma_results <- stack(as.data.frame(br_median_gamma_bias_matrix))
166 | br_median_gamma_results$category <- categories
167 | br_median_gamma_results$factor <- rep(factors, each = 4)
168 | br_median_gamma_results$method <- "median BR gamma"
169 | br_median_gamma_results$pu <- stack(as.data.frame(do.call("rbind", br_median_gamma_pu)))$values
170 | 
171 | 
172 | save(br_mean_results, br_median_results, br_median_gamma_results, file = "alligator_simulation_results.rda")
173 | 


--------------------------------------------------------------------------------
/inst/mbrpr/mbrpr.R:
--------------------------------------------------------------------------------
 1 | #############################################################
 2 | ### Median bias reduction in poisson regression  ############
 3 | #############################################################
 4 | mod1 <- function(X,mu,InfoInv) {
 5 |     X<-as.matrix(X)
 6 |     n <- nrow(X)
 7 |     p <- ncol(X)
 8 |     nu_r_s_t  <- array(0,c(p,p,p))
 9 |     k1 <- k2 <- k3 <- rep(NA,p)
10 |     for(r in 1:p) {
11 |         nu_r_s_t[r,,] = t(X)%*%(X[,r]*mu*X)
12 |     }
13 |     k2 <- 1/diag(InfoInv)
14 |     barA <- InfoInv*k2
15 |     for (r in 1:p) {
16 |         sum_s1 <- rep(0,p)
17 |         sum_s3 <- rep(0,p)
18 |         nu.tu <- InfoInv-outer(InfoInv[,r]*k2[r],InfoInv[,r])
19 |         for (s in 1:p){
20 |             sum_s1[s] <- sum(diag(nu.tu%*%(nu_r_s_t[s,,])))
21 |             sum_s3[s] <- sum((barA[r,]%*%nu_r_s_t[s,,])*barA[r,])
22 |         }
23 |         k1[r] <- -0.5*sum(sum_s1*barA[r,])
24 |         k3[r] <- sum(sum_s3*barA[r,])
25 |     }
26 |     return(-k1/k2+k3/(6*k2^2))
27 | }
28 | 
29 | 
30 | mbrpr <- function(par,y,X,eps=1e-06,maxit=500) {
31 |     step <- .Machine$integer.max
32 |     nIter <- 0
33 |     test <- TRUE
34 |     while ( test & (nIter < maxit)) {
35 |         nIter <- nIter + 1
36 |         eta = drop(X%*%par)
37 |         mu <- exp(eta)
38 |         info <- t(X)%*%(W<-diag(mu))%*%X
39 |         score <- t(X)%*%(y-mu)
40 |         InfoInv <- try(chol2inv(chol(info)),TRUE)
41 |         if(failedInv <- inherits(InfoInv, "try-error"))  {
42 |             warning("failed to invert the information matrix: iteration stopped prematurely")
43 |             break
44 |         }
45 |         mod <- mod1(X, mu, InfoInv)
46 |         modscore <- score + info%*%mod
47 |         y.adj <- X%*%(par+mod)+ solve(diag(mu))%*%(y-mu)
48 |         par <- InfoInv%*%t(X)%*%(W%*%y.adj)
49 |         test <- sqrt(crossprod(drop(modscore))) > eps
50 |     }
51 |     converged <- nIter < maxit
52 |     list(coefficients=drop(par),converged=converged,nIter=nIter,InfoInv=InfoInv)
53 | }
54 | 
55 | 
56 | 
57 | 


--------------------------------------------------------------------------------
/inst/tinytest/brpr.R:
--------------------------------------------------------------------------------
  1 | ## Bias-reduced Poisson and multinomial log-linear models
  2 | ## using maximum penalized likelihood as in Firth (1993) Biometrika.
  3 | ##
  4 | ## Author: David Firth, d.firth@warwick.ac.uk, 2 Sep 2010
  5 | ##
  6 | ## NOT A POLISHED PIECE OF PUBLIC-USE SOFTWARE!  Provided "as is",
  7 | ## licensed under GPL2.  NO WARRANTY OF FITNESS FOR ANY PURPOSE!
  8 | ##
  9 | ## This code still has lots of rough edges, some of which are
 10 | ## indicated by the embedded comments.
 11 | ##
 12 | ## The intention is to include a more polished implementation as part
 13 | ## of the CRAN package "brglm", in due course.
 14 | 
 15 | ## Arguments are as for a Poisson log-linear glm,
 16 | ## with one new argument:
 17 | ## -- "fixed.totals", a factor indexing groups of observations
 18 | ##    whose response totals are fixed (for example, this might be
 19 | ##    the rows of a table).
 20 | ## If fixed.totals is NULL (the default), pure Poisson sampling (with
 21 | ## no totals fixed) is assumed.
 22 | 
 23 | brpr <- function(formula,
 24 |                   data,
 25 |                   subset,
 26 |                   na.action,
 27 |                   offset = NULL,
 28 |                   control = glm.control(...),
 29 |                   contrasts = NULL,
 30 |                   fixed.totals = NULL,
 31 |                   weights = NULL,
 32 |                   ...) {
 33 |     ## The "weights" and "offset" arguments here can only be NULL -- anything
 34 |     ## else is an error (for now, at least)
 35 |     if (!is.null(weights)) stop("The weights argument here can only be NULL")
 36 |     if (!is.null(offset)) stop("The offset argument here can only be NULL")
 37 |     ##  work needed here, to allow the use of offset as an argument?
 38 | 
 39 |     ## Initial setup as in glm (except that `prior weights' are not used here,
 40 |     ## and an offset (if any) must be specified through the formula rather
 41 |     ## than (as is also allowed by glm) as a separate argument:
 42 |     call <- match.call()
 43 |     theFormula <- formula
 44 |     if (missing(data))
 45 |         data <- environment(formula)
 46 |     if (is.null(call$epsilon)) control$epsilon <- 1e-8
 47 |     ##  Is 1e-8 too stringent?  Work needed here!
 48 |     fixed.totals <- eval(substitute(fixed.totals), envir = data)
 49 |     mf <- match.call(expand.dots = FALSE)
 50 |     m <- match(c("formula", "data", "subset", "na.action",
 51 |                  "offset", "fixed.totals"), names(mf), 0)
 52 |     mf <- mf[c(1, m)]
 53 |     mf$drop.unused.levels <- TRUE
 54 |     mf[[1]] <- as.name("model.frame")
 55 |     mf <- eval(mf, parent.frame())
 56 |     mt <- attr(mf, "terms")
 57 |     Y <- model.response(mf, "numeric")
 58 | 
 59 |     ## Correct the fixed.totals variable for subsetting, NA treatment, etc.
 60 |     if (!is.null(fixed.totals)) {
 61 |         fixed.totals <-  mf$"(fixed.totals)"
 62 |         if (!is.factor(fixed.totals)) stop("fixed.totals must be a factor")
 63 |         rowTotals <-  as.vector(tapply(Y, fixed.totals, sum))[fixed.totals]
 64 |     }
 65 | 
 66 |     ## Do an initial fit of the model, with constant-adjusted counts
 67 |     adj <- 0.5
 68 |     X <- model.matrix(formula, data = mf)
 69 |     offset <- model.offset(mf)
 70 |     nFixed <- if (is.null(fixed.totals)) 0 else nlevels(fixed.totals)
 71 |     mf$y.adj <- Y + adj * (ncol(X) - nFixed)/nrow(X)
 72 |     formula <- update(formula, y.adj ~ .)
 73 |     op <- options(warn = -1)
 74 |     fit <- glm.fit(X, mf$y.adj, family = poisson(), offset = offset,
 75 |                    control = glm.control(maxit = 1))
 76 |     options(op)
 77 |     ## Update the model iteratively, refining the adjustment at each iteration
 78 |     criterion <- 1
 79 |     iter <- 1
 80 |     coefs <- coef(fit)
 81 |     Xwork <- X[ , !is.na(coefs), drop = FALSE]
 82 |     muAdj <- fitted(fit)
 83 |     coefs <- na.omit(coefs)
 84 |     while (criterion > control$epsilon && iter < control$maxit) {
 85 |         iter <- iter + 1
 86 |         if (!is.null(fixed.totals)){
 87 |             muTotals <-  as.vector(tapply(muAdj, fixed.totals,
 88 |                                           sum))[fixed.totals]
 89 |             mu <- muAdj * rowTotals / muTotals
 90 |         } else { ## case fixed.totals is NULL
 91 |             mu <- muAdj
 92 |         }
 93 |         W.X <- sqrt(mu) * Xwork
 94 |         XWXinv <- chol2inv(chol(crossprod(W.X)))
 95 |         coef.se <- sqrt(diag(XWXinv))
 96 |         h <- diag(Xwork %*% XWXinv %*% t(mu * Xwork))
 97 |         mf$y.adj <- Y + h * adj
 98 |         z <- log(muAdj) + (mf$y.adj - muAdj)/muAdj
 99 |         lsfit <- lm.wfit(Xwork, z, muAdj, offset = offset)
100 |         criterion <- max((abs(lsfit$coefficients - coefs))/coef.se)
101 |         if (control$trace) cat("Iteration ", iter,
102 |                                ": largest (scaled) coefficient change is ",
103 |                                criterion, "\n", sep = "")
104 |         coefs <- lsfit$coefficients
105 |         muAdj <- exp(lsfit$fitted.values)
106 |     }
107 | 
108 |     ## The object `lsfit' uses *adjusted* counts, fitted values, etc. --
109 |     ## so we need to correct various things before returning (so that, for example,
110 |     ## deviance and standard errors are correct).
111 |     fit$coefficients[!is.na(fit$coefficients)] <- coefs
112 |     fit$y <- Y
113 |     fit$fitted.values <- fit$weights <- mu
114 |     fit$linear.predictors <- log(mu)
115 |     dev.resids <- poisson()$dev.resids
116 |     wt <- fit$prior.weights
117 |     fit$deviance <- sum(dev.resids(Y, mu, wt))
118 |     fit$null.deviance <- sum(dev.resids(Y, mean(Y), wt))
119 |     fit$aic <- NA # sum((poisson()$aic)(Y, 1, mu, wt, 0)) ??  work needed!
120 |     fit$residuals <- Y/mu - 1
121 | 
122 |     ## Next bit is straight from glm -- not sure it applies here (work needed!)
123 |     if (any(offset) && attr(mt, "intercept") > 0) {
124 |         fit$null.deviance <- glm.fit(x = X[, "(Intercept)", drop = FALSE],
125 |                                      y = Y, weights = rep(1, nrow(mf)),
126 |                                      offset = offset, family = poisson(),
127 |                                      intercept = TRUE)$deviance
128 |     }
129 | 
130 |     fit$iter <- iter
131 |     fit$model <- mf
132 |     fit$na.action <- attr(mf, "na.action")
133 |     fit$x <- X
134 |     fit$qr <- qr(sqrt(mu) * X)
135 |     fit$R <- qr.R(fit$qr, complete = TRUE)
136 |     ## The "effects" component of the fit object is almost certainly not
137 |     ## correct as is -- but where does it actually get used?  (work needed!)
138 |     ##  rownames(fit$R) <- colnames(fit$R)  ##  FIX THIS!
139 |     fit <- c(fit, list(call = call, formula = theFormula, terms = mt,
140 |                        data = data, offset = offset, method = "brpr",
141 |                        contrasts = attr(X, "contrasts"),
142 |                        xlevels = .getXlevels(mt, mf)))
143 |     class(fit) <- c("glm", "lm")
144 |     return(fit)
145 | }
146 | 


--------------------------------------------------------------------------------
/inst/tinytest/mbrpr.R:
--------------------------------------------------------------------------------
 1 | #############################################################
 2 | ### Median bias reduction in poisson regression  ############
 3 | #############################################################
 4 | mod1 <- function(X,mu,InfoInv) {
 5 |     X<-as.matrix(X)
 6 |     n <- nrow(X)
 7 |     p <- ncol(X)
 8 |     nu_r_s_t  <- array(0,c(p,p,p))
 9 |     k1 <- k2 <- k3 <- rep(NA,p)
10 |     for(r in 1:p) {
11 |         nu_r_s_t[r,,] = t(X)%*%(X[,r]*mu*X)
12 |     }
13 |     k2 <- 1/diag(InfoInv)
14 |     barA <- InfoInv*k2
15 |     for (r in 1:p) {
16 |         sum_s1 <- rep(0,p)
17 |         sum_s3 <- rep(0,p)
18 |         nu.tu <- InfoInv-outer(InfoInv[,r]*k2[r],InfoInv[,r])
19 |         for (s in 1:p){
20 |             sum_s1[s] <- sum(diag(nu.tu%*%(nu_r_s_t[s,,])))
21 |             sum_s3[s] <- sum((barA[r,]%*%nu_r_s_t[s,,])*barA[r,])
22 |         }
23 |         k1[r] <- -0.5*sum(sum_s1*barA[r,])
24 |         k3[r] <- sum(sum_s3*barA[r,])
25 |     }
26 |     return(-k1/k2+k3/(6*k2^2))
27 | }
28 | 
29 | 
30 | mbrpr <- function(par,y,X,eps=1e-06,maxit=500) {
31 |     step <- .Machine$integer.max
32 |     nIter <- 0
33 |     test <- TRUE
34 |     while ( test & (nIter < maxit)) {
35 |         nIter <- nIter + 1
36 |         eta = drop(X%*%par)
37 |         mu <- exp(eta)
38 |         info <- t(X)%*%(W<-diag(mu))%*%X
39 |         score <- t(X)%*%(y-mu)
40 |         InfoInv <- try(chol2inv(chol(info)),TRUE)
41 |         if(failedInv <- inherits(InfoInv, "try-error"))  {
42 |             warning("failed to invert the information matrix: iteration stopped prematurely")
43 |             break
44 |         }
45 |         mod <- mod1(X, mu, InfoInv)
46 |         modscore <- score + info%*%mod
47 |         y.adj <- X%*%(par+mod)+ solve(diag(mu))%*%(y-mu)
48 |         par <- InfoInv%*%t(X)%*%(W%*%y.adj)
49 |         test <- sqrt(crossprod(drop(modscore))) > eps
50 |     }
51 |     converged <- nIter < maxit
52 |     list(coefficients=drop(par),converged=converged,nIter=nIter,InfoInv=InfoInv)
53 | }
54 | 
55 | 
56 | 
57 | 


--------------------------------------------------------------------------------
/inst/tinytest/test-binomial.R:
--------------------------------------------------------------------------------
 1 | library("brglm")
 2 | data("lizards", package = "brglm2")
 3 | links <- lapply(c("logit", "probit", "cloglog", "cauchit"), make.link)
 4 | 
 5 | tol <- 1e-08
 6 | for (l in seq_along(links)) {
 7 |     expect_warning(
 8 |         lizardsBRlegacy <- brglm(cbind(grahami, opalinus) ~ height + diameter +
 9 |                                      light + time, family = binomial(links[[l]]),
10 |                                  data = lizards, method = "brglm.fit",
11 |                                  br.epsilon = 1e-10, br.maxit = 1000)
12 |     )
13 | 
14 |     expect_warning(
15 |         lizardsBR <- glm(cbind(grahami, opalinus) ~ height + diameter +
16 |                              light + time, family = binomial(links[[l]]), data=lizards,
17 |                          method = "brglmFit", epsilon = 1e-10, maxit = 1000)
18 |     )
19 |     ## glm with brglm.fit method and brglm_0 return the same coefficients for the lizards when link is links[[l]]$name)
20 |     expect_equal(coef(lizardsBR), coef(lizardsBRlegacy), tolerance = tol)
21 | }
22 | 
23 | 
24 | ## Performance comparisons BR versus ML
25 | ## link1 <- "cauchit"
26 | ## system.time(replicate(100, {lizardsBR <- glm(cbind(grahami, opalinus) ~ height + diameter + light + time, family = binomial(link1), data=lizards, method = "brglmFit", epsilon = 1e-10, maxit = 1000, type = "ML")}))
27 | 
28 | ## system.time(replicate(100, {lizardsBR <- glm(cbind(grahami, opalinus) ~ height + diameter + light + time, family = binomial(link1), data=lizards, method = "glm.fit", epsilon = 1e-10, maxit = 1000)}))
29 | 
30 | ## link1 <- "cauchit"
31 | ## system.time(replicate(100, {lizardsBR <- glm(cbind(grahami, opalinus) ~ height + diameter + light + time, family = binomial(link1), data=lizards, method = "brglmFit", epsilon = 1e-10, maxit = 1000)}))
32 | 
33 | ## system.time(replicate(100, {lizardsBR <- brglm(cbind(grahami, opalinus) ~ height + diameter + light + time, family = binomial(link1), data=lizards, br.epsilon = 1e-10, br.maxit = 1000)}))
34 | 


--------------------------------------------------------------------------------
/inst/tinytest/test-bracl.R:
--------------------------------------------------------------------------------
  1 | library("VGAM")
  2 | 
  3 | ### tests for bracl and its agreement with other methods
  4 | 
  5 | ## Stem cell research data from Agresti (2017, Analysis of ordinal data, Table 4.1)
  6 | freq <- c(34, 41, 58, 21, 30, 64,
  7 |           67, 83, 63, 52, 52, 50,
  8 |           30, 23, 15, 24, 18, 16,
  9 |           25, 14, 12, 15, 11, 11)
 10 | fund_research <- factor(rep(c("definitely", "probably", "probably not", "definitely not"), each = 6),
 11 |                         levels = c("definitely", "probably", "probably not", "definitely not"),
 12 |                         ordered = TRUE)
 13 | gender <- factor(rep(rep(c("female", "male"), each = 3), 4), levels = c("male", "female"))
 14 | ## gender <- rep(rep(c(1, 0), each = 3), 4)
 15 | religion <- factor(rep(c("fundamentalist", "moderate", "liberal"), 8),
 16 |                    levels = c("fundamentalist", "moderate", "liberal"),
 17 |                    ordered = TRUE)
 18 | stemcell <- data.frame(frequency = freq, research = fund_research, gender = gender, religion = religion)
 19 | 
 20 | ff <- matrix(freq, nrow = 4, byrow = TRUE)
 21 | re <- c(1, 2, 3, 1, 2, 3)
 22 | ge <- c(1, 1, 1, 0, 0, 0)
 23 | 
 24 | ## With proportional odds
 25 | suppressWarnings(fit_vgam_p <- vglm(cbind(ff[1, ], ff[2, ], ff[3, ], ff[4, ]) ~ re + ge, family = acat(reverse = TRUE, parallel = TRUE)))
 26 | 
 27 | 
 28 | expect_warning(
 29 |     fit_bracl_p <- bracl(research ~ as.numeric(religion) + gender, weights = frequency, data = stemcell, type = "ML", parallel = TRUE)
 30 | )
 31 | 
 32 | 
 33 | fit_vgam <- vglm(cbind(ff[1, ], ff[2, ], ff[3, ], ff[4, ]) ~ re + ge, family = acat(reverse = TRUE, parallel = FALSE))
 34 | ## Without proportional odds
 35 | expect_warning(
 36 |     fit_bracl <- bracl(research ~ as.numeric(religion) + gender, weights = frequency, data = stemcell, type = "ML")
 37 | )
 38 | 
 39 | tol <- 1e-06
 40 | ## "VGAM::vglm and bracl return the same coefficients
 41 | expect_equal(unname(coef(fit_vgam)), unname(coef(fit_bracl)), tolerance = tol)
 42 | expect_equal(unname(coef(fit_vgam_p)), unname(coef(fit_bracl_p)), tolerance = tol)
 43 | 
 44 | ## Difference in deviance is the same with VGAM::vglm and bracl
 45 | expect_equal(logLik(fit_vgam) - logLik(fit_vgam_p),
 46 |              unclass(logLik(fit_bracl) - logLik(fit_bracl_p)), tolerance = tol,
 47 |              check.attributes = FALSE)
 48 | 
 49 | 
 50 | ## logLik returns the correct df for cracl
 51 | expect_identical(attr(logLik(fit_bracl), "df"), as.integer(9))
 52 | expect_identical(attr(logLik(fit_bracl_p), "df"), as.integer(5))
 53 | 
 54 | ## bracl returns the correct fitted values
 55 | expect_equal(fitted(fit_bracl)[1:6, ], fit_vgam@fitted.values, tolerance = tol,
 56 |              check.attributes = FALSE)
 57 | expect_equal(fitted(fit_bracl_p)[1:6, ], fit_vgam_p@fitted.values, tolerance = tol,
 58 |              check.attributes = FALSE)
 59 | 
 60 | shu <- function(dat)  dat[sample(seq.int(nrow(dat)), nrow(dat)), ]
 61 | 
 62 | ## bracl results is invariance to shuffling of the data
 63 | for (j in 1:10) {
 64 |     expect_warning(
 65 |         fit_bracl_p_r <- bracl(research ~ as.numeric(religion) + gender, weights = frequency, data = shu(stemcell), type = "ML", parallel = TRUE)
 66 |     )
 67 | 
 68 |     expect_warning(
 69 |         fit_bracl_r <- bracl(research ~ as.numeric(religion) + gender, weights = frequency, data = shu(stemcell), type = "ML", parallel = FALSE)
 70 |     )
 71 | 
 72 |     expect_equal(coef(fit_bracl), coef(fit_bracl_r), tolerance = tol)
 73 |     expect_equal(coef(fit_bracl_p), coef(fit_bracl_p_r), tolerance = tol)
 74 | }
 75 | 
 76 | tol  <-  1e-02
 77 | ## vcov method for bracl returns the correct vcov matrix
 78 | expect_equal(vcov(fit_vgam_p), vcov(fit_bracl_p), tolerance = tol, check.attributes = FALSE)
 79 | expect_equal(vcov(fit_vgam), vcov(fit_bracl), tolerance = tol, check.attributes = FALSE)
 80 | 
 81 | s1 <- summary(fit_bracl)
 82 | s1p <- summary(fit_bracl_p)
 83 | s2 <- summary(fit_vgam)
 84 | s2p <- summary(fit_vgam_p)
 85 | 
 86 | tol <- 1e-06
 87 | ## summary method for bracl returns the correct coef mat
 88 | expect_equal(coef(s1), s2@coef3, tolerance = tol, check.attributes = FALSE)
 89 | expect_equal(coef(s1p), s2p@coef3, tolerance = tol, check.attributes = FALSE)
 90 | 
 91 | newdata <- expand.grid(gender = c("male", "female"),  religion = c("moderate", "fundamentalist"))
 92 | ## predict.bracl works as expected
 93 | pp <- predict(fit_bracl_p, newdata = stemcell, type = "probs")
 94 | p <- predict(fit_bracl, newdata = stemcell, type = "probs")
 95 | expect_equal(predict(fit_vgam_p, type = "response"),
 96 |              pp[19:24, ],
 97 |              tolerance = 1e-06, check.attributes = FALSE)
 98 | expect_equal(predict(fit_vgam, type = "response"),
 99 |              p[19:24, ],
100 |              tolerance = 1e-06, check.attributes = FALSE)
101 | 
102 | ## no intercept returns warning
103 | expect_warning(
104 |     fit_bracl_p_r <- bracl(research ~ -1 + as.numeric(religion) + gender, weights = frequency, data = shu(stemcell), type = "ML", parallel = FALSE)
105 | )
106 | 
107 | ## prediction with NAs works
108 | newd <- newdata
109 | newd[3, 2] <- NA
110 | expect_true(is.na(predict(fit_bracl_p, newd, "class")[3]))
111 | expect_true(all(is.na(predict(fit_bracl_p, newd, "probs")[3, ])))
112 | 
113 | 
114 | 
115 | ## simulate from bracl objects returns a data frame with expected characteristics
116 | simu_df <- simulate(fit_bracl_p)
117 | nam_mf <- names(model.frame(fit_bracl_p))
118 | nam_simu <- names(simu_df)
119 | expect_identical(nrow(simu_df),
120 |                  nrow(stemcell) * nlevels(stemcell$research))
121 | expect_identical(levels(simu_df$research),
122 |                  levels(stemcell$research))
123 | expect_identical(is.ordered(simu_df$research),
124 |                  is.ordered(stemcell$research))
125 | expect_identical(nam_mf[!(nam_mf %in% nam_simu)],
126 |                  "(weights)")
127 | expect_identical(nam_simu[!(nam_simu %in% nam_mf)],
128 |                  as.character(fit_bracl_p$call$weights))
129 | expect_identical(nam_simu[(nam_simu %in% nam_mf)],
130 |                  nam_mf[(nam_mf %in% nam_simu)])
131 | 
132 | ## simulate from bracl objects returns a data frame with expected characteristics
133 | simu_df <- simulate(fit_bracl)
134 | nam_mf <- names(model.frame(fit_bracl))
135 | nam_simu <- names(simu_df)
136 | expect_identical(nrow(simu_df),
137 |                  nrow(stemcell) * nlevels(stemcell$research))
138 | expect_identical(levels(simu_df$research),
139 |                  levels(stemcell$research))
140 | expect_identical(is.ordered(simu_df$research),
141 |                  is.ordered(stemcell$research))
142 | expect_identical(nam_mf[!(nam_mf %in% nam_simu)],
143 |                  "(weights)")
144 | expect_identical(nam_simu[!(nam_simu %in% nam_mf)],
145 |                  as.character(fit_bracl$call$weights))
146 | expect_identical(nam_simu[(nam_simu %in% nam_mf)],
147 |                  nam_mf[(nam_mf %in% nam_simu)])
148 | 


--------------------------------------------------------------------------------
/inst/tinytest/test-brglmControl.R:
--------------------------------------------------------------------------------
 1 | b_control <- brglmControl()
 2 | 
 3 | ## the object brglmControl() returns is as expected
 4 | expect_identical(b_control$epsilon, 1e-06)
 5 | expect_identical(b_control$maxit, 100)
 6 | expect_false(b_control$trace)
 7 | expect_null(b_control$response_adjustment)
 8 | expect_equal(b_control$Trans, expression(dispersion))
 9 | expect_equal(b_control$inverseTrans, expression(transformed_dispersion))
10 | expect_equal(b_control$transformation, "identity")
11 | expect_equal(b_control$slowit, 1)
12 | expect_equal(b_control$max_step_factor, 12)
13 | 
14 | b_control <- brglmControl(epsilon = 1e-02, trace = TRUE, response_adjustment = c(0.3, 0.2))
15 | ## the object brglmControl returns with defaults is as expected
16 | expect_identical(b_control$epsilon, 1e-02)
17 | expect_identical(b_control$maxit, 100)
18 | expect_true(b_control$trace)
19 | expect_identical(b_control$response_adjustment, c(0.3, 0.2))
20 | expect_equal(b_control$Trans, expression(dispersion))
21 | expect_equal(b_control$inverseTrans, expression(transformed_dispersion))
22 | expect_equal(b_control$transformation, "identity")
23 | expect_equal(b_control$slowit, 1)
24 | expect_equal(b_control$max_step_factor, 12)
25 | 
26 | data("coalition", package = "brglm2")
27 | mod <- glm(duration ~ fract + numst2, family = Gamma, data = coalition, method = "brglmFit")
28 | expect_warning(mod <- update(mod, maxit = 7, transformation = "sqrt"), "brglmFit: algorithm did not converge")
29 | expect_equal(mod$iter, 7)
30 | expect_equal(mod$transformation, "sqrt")
31 | 
32 | b_control <- brglmControl(epsilon = 1e-02, ABCDEFG123 = 1, response_adjustment = c(0.3, 0.2), trace = TRUE, )
33 | ## the object brglmControl returns with defaults is as expected
34 | expect_identical(b_control$epsilon, 1e-02)
35 | expect_identical(b_control$maxit, 100)
36 | expect_true(b_control$trace)
37 | expect_identical(b_control$response_adjustment, c(0.3, 0.2))
38 | expect_equal(b_control$Trans, expression(dispersion))
39 | expect_equal(b_control$inverseTrans, expression(transformed_dispersion))
40 | expect_equal(b_control$transformation, "identity")
41 | expect_equal(b_control$slowit, 1)
42 | expect_equal(b_control$max_step_factor, 12)
43 | 
44 | ## A Gamma example, from McCullagh & Nelder (1989, pp. 300-2)
45 | clotting <- data.frame(
46 |     u = c(5,10,15,20,30,40,60,80,100, 5,10,15,20,30,40,60,80,100),
47 |     conc = c(118,58,42,35,27,25,21,19,18,69,35,26,21,18,16,13,12,12),
48 |     lot = factor(c(rep(1, 9), rep(2, 9))))
49 | 
50 | clot_formula <- conc ~ lot*log(u) + I(2*log(u))
51 | 
52 | ## check_aliasing option works as expected
53 | expect_error(glm(clot_formula, data = clotting, family = Gamma(), method = brglm_fit, check_aliasing = FALSE),
54 |              pattern = "NA/NaN/Inf in foreign")
55 | mod <- glm(clot_formula, data = clotting, family = Gamma(), method = brglm_fit, check_aliasing = TRUE)
56 | expect_true(is.na(coef(mod)["I(2 * log(u))"]))
57 | ## check defaults
58 | mod <- glm(clot_formula, data = clotting, family = Gamma(), method = brglm_fit)
59 | expect_true(is.na(coef(mod)["I(2 * log(u))"]))
60 | 
61 | ## data("sex2", package = "logistf")
62 | ## ## brglmControl arguments can be passed directly from the brglmFit call
63 | ## expect_warning(glm(case ~ dia, data = sex2, family = binomial(), method = brglm_fit))
64 | ## ## Set response_adjustment to 2 just to avoid non-integer successes warning
65 | ## m1 <- glm(case ~ dia, data = sex2, family = binomial(), method = brglm_fit, response_adjustment = 2)
66 | ## expect_equal(m1$iter, 7, tolerance = .Machine$double.eps/2)
67 | 


--------------------------------------------------------------------------------
/inst/tinytest/test-brnp.R:
--------------------------------------------------------------------------------
 1 | library("MASS")
 2 | 
 3 | salmonella <- data.frame(freq = c(15, 16, 16, 27, 33, 20,
 4 |                                   21, 18, 26, 41, 38, 27,
 5 |                                   29, 21, 33, 60, 41, 42),
 6 |                          dose = rep(c(0, 10, 33, 100, 333, 1000), 3),
 7 |                          observation = rep(1:3, each = 6))
 8 | 
 9 | 
10 | salmonella_fm <- freq ~ dose + log(dose+10)
11 | 
12 | fitML_glmnb <- glm.nb(salmonella_fm, data = salmonella)
13 | fitML <- brnb(salmonella_fm, data = salmonella, link = "log", transformation = "identity", type = "ML")
14 | fitBR_mean <- update(fitML, type = "AS_mean")
15 | fitBR_median <- update(fitML, type = "AS_median")
16 | fitBR_mixed <- update(fitML, type = "AS_mixed")
17 | fitBC_mean <- update(fitML, type = "correction")
18 | 
19 | ## brnp returns the same estimates as MASS::glm.nb
20 | expect_equal(coef(fitML), coef(fitML_glmnb), tol = 1e-06)
21 | 
22 | ## brnp returns mean BR and median BR estimates as in Kenne Pagui et al (2021+)
23 | expect_equal(coef(fitBR_mean, model = "full"),
24 |              c(2.216, -0.001, 0.309, 0.065),
25 |              check.attributes = FALSE, tol = 1e-03)
26 | expect_equal(coef(fitBR_median, model = "full"),
27 |              c(2.211, -0.001, 0.309, 0.069),
28 |              check.attributes = FALSE, tol = 1e-03)
29 | expect_equal(coef(fitBC_mean, model = "full"),
30 |              c(2.210, -0.001, 0.311, 0.063),
31 |              check.attributes = FALSE, tol = 1e-03)
32 | 
33 | ## fit using weights
34 | 
35 | duptimes <- c(2,1,3,5,rep(1,14))
36 | idx <- rep(1:nrow(salmonella), duptimes)
37 | dupsalmonella <- salmonella[idx,]
38 | 
39 | fitMLe <- update(fitML, data = dupsalmonella)
40 | fitMLw <- update(fitML, weights = duptimes, data = salmonella)
41 | 
42 | fitBR_mediane <- update(fitML, data = dupsalmonella, type = "AS_median")
43 | fitBR_medianw <- update(fitML, weights = duptimes, data = salmonella, type = "AS_median")
44 | 
45 | fitBR_meane <- update(fitML, data = dupsalmonella, type = "AS_mean")
46 | fitBR_meanw <- update(fitML, weights = duptimes, data = salmonella, type = "AS_mean")
47 | 
48 | fitBR_mixede <- update(fitML, data = dupsalmonella, type = "AS_mixed")
49 | fitBR_mixedw <- update(fitML, weights = duptimes, data = salmonella, type = "AS_mixed")
50 | 
51 | fitBCe <- update(fitML, data = dupsalmonella, type = "AS_mixed")
52 | fitBCw <- update(fitML, weights = duptimes, data = salmonella, type = "AS_mixed")
53 | 
54 | fitJe <- update(fitML, data = dupsalmonella, type = "MPL_Jeffreys",
55 |                 transformation = "inverse")
56 | fitJw <- update(fitML, weights = duptimes, data = salmonella, type = "MPL_Jeffreys",
57 |                 transformation = "inverse")
58 | 
59 | ## prior weights work as expected
60 | ## all numerical results are the same
61 | expect_equal(coef(fitMLe, "full"), coef(fitMLw, "full"), tolerance = 1e-10)
62 | expect_equal(coef(fitBR_mediane, "full"), coef(fitBR_medianw, "full"), tolerance = 1e-10)
63 | expect_equal(coef(fitBR_mixede, "full"), coef(fitBR_mixedw, "full"), tolerance = 1e-10)
64 | expect_equal(coef(fitBR_meane, "full"), coef(fitBR_meanw, "full"), tolerance = 1e-10)
65 | expect_equal(coef(fitJe, "full"), coef(fitJw, "full"), tolerance = 1e-10)
66 | 
67 | ## Dispersion transformations
68 | ## dispersion transformations work as expected for ML/mixed BR/median BR
69 | for (f0 in list(fitML, fitBR_median, fitBR_mixed)) {
70 |     fsqrt <- update(f0, transformation = "sqrt")
71 |     flog <- update(f0, transformation = "log")
72 |     expect_equal(coef(fsqrt, model = "dispersion"),
73 |                  sqrt(coef(f0, model = "dispersion")),
74 |                  tol = 1e-05, check.attributes = FALSE)
75 |     expect_equal(coef(flog, model = "dispersion"),
76 |                  log(coef(f0, model = "dispersion")),
77 |                  tol = 1e-05, check.attributes = FALSE)
78 | }
79 | 
80 | 
81 | ## dispersion transformations work as expected for mean BR
82 | f0 <- fitBR_mean
83 | fsqrt <- update(f0, transformation = "sqrt")
84 | flog <- update(f0, transformation = "log")
85 | expect_false(isTRUE(all.equal(coef(fsqrt, model = "dispersion"),
86 |                               sqrt(1 / coef(f0, model = "dispersion")),
87 |                               tol = 1e-04, check.attributes = FALSE)))
88 | expect_false(isTRUE(all.equal(coef(flog, model = "dispersion"),
89 |                               -log(coef(f0, model = "dispersion")),
90 |                               tol = 1e-04, check.attributes = FALSE)))
91 | 
92 | ## error is produced for not implemented transformations
93 | expect_error(update(fitBR_mean, transformation = "asd"))
94 | 
95 | 


--------------------------------------------------------------------------------
/inst/tinytest/test-checkinfinite.R:
--------------------------------------------------------------------------------
1 | ## endometrial data from Heinze \& Schemper (2002) (see ?endometrial)
2 | data("endometrial", package = "brglm2")
3 | expect_warning({
4 |     endometrial_ml <- glm(HG ~ I(-NV) + PI + EH, data = endometrial,
5 |                           family = binomial("cloglog"))
6 | })
7 | 
8 | expect_error(cie <- check_infinite_estimates(endometrial_ml))
9 | 


--------------------------------------------------------------------------------
/inst/tinytest/test-correction.R:
--------------------------------------------------------------------------------
 1 | library("enrichwith")
 2 | 
 3 | ## A Gamma example, from McCullagh & Nelder (1989, pp. 300-2)
 4 | clotting <- data.frame(
 5 |     u = c(5,10,15,20,30,40,60,80,100, 5,10,15,20,30,40,60,80,100),
 6 |     conc = c(118,58,42,35,27,25,21,19,18,69,35,26,21,18,16,13,12,12),
 7 |     lot = factor(c(rep(1, 9), rep(2, 9))))
 8 | 
 9 | mod <- glm(conc ~ lot*log(u), data = clotting, family = Gamma)
10 | emod <- enrich(mod, with = "all")
11 | 
12 | coefs <- coef(emod, model = "mean")
13 | disp <- coef(emod, model = "dispersion")
14 | coefs_bc <- c(coefs, disp) - get_bias_function(emod)(coefs, disp)
15 | attributes(coefs_bc) <- NULL
16 | 
17 | mod_bc <- glm(conc ~ lot*log(u), data = clotting, family = Gamma, method = "brglmFit", type = "correction")
18 | 
19 | ## bias corrected estimates computed using enrichwith are the same as those when having type = 'correction' in brglmFit
20 | expect_equal(unname(coefs_bc), unname(coef(mod_bc, model = "full")), check.attributes = FALSE, tolerance = 1e-06)
21 | 
22 | 


--------------------------------------------------------------------------------
/inst/tinytest/test-dispersion.R:
--------------------------------------------------------------------------------
 1 | data("anorexia", package = "MASS")
 2 | 
 3 | anorexML <- glm(Postwt ~ Prewt + Treat+ offset(Prewt),
 4 |                 family = gaussian, data = anorexia)
 5 | anorexBR <- update(anorexML, method = "brglmFit", type = "AS_mean")
 6 | 
 7 | tol <- sqrt(.Machine$double.eps)
 8 | ## dispersion_ML is the usual biased estimate for the residual variance
 9 | expect_equal(anorexBR$dispersion_ML, sum((anorexia$Postwt - fitted(anorexML))^2)/nrow(anorexia),
10 |              tolerance = 1e-06)
11 | 
12 | ## dispersion is the usual bias-corrected estimate for the residual variance
13 | expect_equal(anorexBR$dispersion, sum((anorexia$Postwt - fitted(anorexML))^2)/(nrow(anorexia) - length(coef(anorexML))), tolerance = 1e-06)
14 | 
15 | 
16 | ## context("dispersion parameter estimation")
17 | 
18 | ## set.seed(123)
19 | ## N <- 20
20 | ## x <- matrix(rnorm(2*N), N, 2)
21 | ## y <- 0.3 + drop(x %*% c(-3, 1)) + rnorm(N, 0.4)
22 | 
23 | ## fitML <- glm(y ~ x, family = gaussian())
24 | ## fitBR <- update(fitML, method = "brglmFit")
25 | 
26 | ## tol <- sqrt(.Machine$double.eps)
27 | ## test_that("dispersionML is the usual biased estimate for the residual variance",
28 | ##           expect_equal(fitBR$dispersionML, sum((fitML$y - fitted(fitML))^2)/N))
29 | 
30 | ## test_that("dispersion is the usual bias-corrected estimate for the residual variance",
31 | ##           expect_equal(fitBR$dispersion, sum((fitML$y - fitted(fitML))^2)/(N - 3)))
32 | 
33 | 
34 | 
35 | 
36 | 
37 | 
38 | 


--------------------------------------------------------------------------------
/inst/tinytest/test-expo.R:
--------------------------------------------------------------------------------
  1 | ## source(system.file("inst", "brglm0/brglm0.R", package = "brglm2"))
  2 | data("lizards", package = "brglm2")
  3 | 
  4 | lfit <- glm(cbind(grahami, opalinus) ~ height + diameter +
  5 |                        light + time, family = binomial(), data=lizards,
  6 |                    method = "brglmFit", type = "ML", epsilon = 1e-10, maxit = 1000)
  7 | 
  8 | info <- get_information_function(lfit)
  9 | 
 10 | start_methods <- c("AS_mixed", "AS_mean", "AS_median", "correction", "MPL_Jeffreys", "ML")
 11 | keep <- c("coef", "se", "ci")
 12 | 
 13 | ## ML
 14 | or_est_ml <- exp(coef(lfit))
 15 | or_ses_ml <- sqrt(diag(diag(or_est_ml) %*% vcov(lfit) %*% diag(or_est_ml)))
 16 | ml <- expo(update(lfit, type = "ML"), type = "ML")
 17 | for (met in start_methods) {
 18 |     obj <- expo(update(lfit, type = met), type = "ML")
 19 |     expect_identical(ml[keep], obj[keep])
 20 | }
 21 | expect_equal(coef(ml), or_est_ml)
 22 | expect_equal(ml$se, or_ses_ml, check.attributes = FALSE)
 23 | expect_equal(ml$ci, exp(confint(lfit)))
 24 | 
 25 | ## Lyles et al (2012)
 26 | lfit_mix <- update(lfit, type = "AS_mixed")
 27 | or_est_bc1 <- exp(coef(lfit_mix)) * exp(-diag(vcov(lfit_mix))/2)
 28 | or_ses_bc1 <- sqrt(diag(diag(or_est_bc1) %*% solve(info(log(or_est_bc1))) %*% diag(or_est_bc1)))
 29 | for (met in start_methods) {
 30 |     bc1 <- expo(update(lfit_mix, type = met), type = "Lylesetal2012")
 31 |     expect_equal(coef(bc1), or_est_bc1)
 32 |     expect_equal(bc1$se, or_ses_bc1, check.attributes = FALSE)
 33 | }
 34 | 
 35 | ## correction*
 36 | or_est_bc2 <- exp(coef(lfit_mix)) / (1 + diag(vcov(lfit_mix))/2)
 37 | or_ses_bc2 <- sqrt(diag(diag(or_est_bc2) %*% solve(info(log(or_est_bc2))) %*% diag(or_est_bc2)))
 38 | for (met in start_methods) {
 39 |     bc2 <- expo(update(lfit_mix, type = met), type = "correction*")
 40 |     expect_equal(coef(bc2), or_est_bc2)
 41 |     expect_equal(bc2$se, or_ses_bc2, check.attributes = FALSE)
 42 | }
 43 | 
 44 | ## correction+
 45 | or_est_bc3 <- exp(coef(lfit_mix)) * (1 - diag(vcov(lfit_mix))/2)
 46 | or_ses_bc3 <- sqrt(diag(diag(or_est_bc3) %*% solve(info(log(or_est_bc3))) %*% diag(or_est_bc3)))
 47 | for (met in start_methods) {
 48 |     bc3 <- expo(update(lfit, type = met), type = "correction+")
 49 |     expect_equal(coef(bc3), or_est_bc3)
 50 |     expect_equal(bc3$se, or_ses_bc3, check.attributes = FALSE)
 51 | }
 52 | 
 53 | ## AS median
 54 | lfit_med <- update(lfit, type = "AS_median")
 55 | or_est_med <- exp(coef(lfit_med))
 56 | or_ses_med <- sqrt(diag(diag(or_est_med) %*% solve(info(log(or_est_med))) %*% diag(or_est_med)))
 57 | for (met in start_methods) {
 58 |     med <- expo(update(lfit_med, type = met), type = "AS_median")
 59 |     expect_equal(coef(med), or_est_med)
 60 |     expect_equal(med$se, or_ses_med, check.attributes = FALSE)
 61 | }
 62 | 
 63 | ## starting from a glm object
 64 | expo_methods <- c("ML", "correction*", "correction+", "Lylesetal2012", "AS_median")
 65 | lfit_glm <- glm(cbind(grahami, opalinus) ~ height + diameter +
 66 |                     light + time, family = binomial(), data=lizards)
 67 | for (met in expo_methods) {
 68 |     out_brglmFit <- expo(lfit_med, type = met)[keep]
 69 |     out_glm <- expo(lfit_glm, type = met)[keep]
 70 |     expect_equal(out_brglmFit, out_glm, tolerance = 1e-06)
 71 | }
 72 | 
 73 | expect_stdout(print(expo(lfit_glm)), "Odds ratios")
 74 | 
 75 | ## ## Interpretation
 76 | ## set.seed(123)
 77 | ## dat <- data.frame(y = rexp(10), x = rnorm(10))
 78 | ## mod <- glm(y ~ x, family = Gamma("log"), data = dat)
 79 | ## expo_mod <- expo(mod, type = "ML")
 80 | ## expect_stdout(print(expo_mod), "Multiplicative effects to the mean")
 81 | 
 82 | ## set.seed(111)
 83 | ## dat <- data.frame(y = exp(rnorm(10)), x = rnorm(10))
 84 | ## mod <- glm(y ~ x, family = inverse.gaussian("log"), data = dat)
 85 | ## expo_mod <- expo(mod, type = "ML")
 86 | ## expect_stdout(print(expo_mod), "Multiplicative effects to the mean")
 87 | 
 88 | ## set.seed(111)
 89 | ## dat <- data.frame(x = rexp(10), y = rpois(10, 10))
 90 | ## mod <- glm(y ~ x, family = poisson("log"), data = dat)
 91 | ## expo_mod <- expo(mod, type = "ML")
 92 | ## expect_stdout(print(expo_mod), "Multiplicative effects to the mean")
 93 | 
 94 | ## set.seed(111)
 95 | ## dat <- data.frame(x = rexp(100, 0.5))
 96 | ## dat$y <- rbinom(100, 1, exp(-1 -0.2 * dat$x))
 97 | ## mod <- glm(y ~ x, family = binomial("log"), data = dat)
 98 | ## expo_mod <- expo(mod, type = "ML")
 99 | ## expect_stdout(print(expo_mod), "Relative risks")
100 | 
101 | 
102 | ## library(parallel)
103 | ## nsimu <- 1000
104 | ## set.seed(123)
105 | ## Y <- simulate(lfit, nsimu)
106 | ## true_psi <- exp(coef(lfit))
107 | ## X <- model.matrix(lfit)
108 | 
109 | ## methods <- c("ML", "correction+", "correction*", "Lylesetal2012", "AS_median")
110 | ## results <- as.list(numeric(length(methods)))
111 | ## names(results) <- methods
112 | ## for (met in methods) {
113 | ##     results[[met]] <- mclapply(1:nsimu, function(k) {
114 | ##         temp_data <- lizards
115 | ##         temp_data[c("grahami", "opalinus")] <- Y[[k]]
116 | ##         mod <- update(lfit, data = temp_data)
117 | ##         expo(mod, met)
118 | ##     }, mc.cores = 8)
119 | ## }
120 | 
121 | ## get_bias <- function(res, truth) rowMeans(sapply(res, function(x) x$coef - truth))
122 | ## get_pu <- function(res, truth) rowMeans(sapply(res, function(x) x$coef < truth))
123 | ## get_mse <- function(res, truth) rowMeans(sapply(res, function(x) (x$coef - truth)^2))
124 | ## get_avgv <- function(res, truth) rowMeans(sapply(res, function(x) x$se^2))
125 | ## get_pu <- function(res, truth) rowMeans(sapply(res, function(x) x$coef < truth))
126 | ## get_coverage <- function(res, truth) {
127 | ##     rowMeans(sapply(res, function(x) {
128 | ##         (x$ci[, 1] < truth) & (x$ci[, 2] > truth)
129 | ##     }))
130 | ## }
131 | 
132 | ## round(sapply(results, get_bias, truth = true_psi), 2)
133 | ## round(sapply(results, get_mse, truth = true_psi), 2)
134 | ## round(sapply(results, get_pu, truth = true_psi), 2)
135 | ## round(sapply(results, get_coverage, truth = true_psi), 2)
136 | ## vars <- sapply(results, get_mse, truth = true_psi) - sapply(results, get_bias, truth = true_psi)^2
137 | ## avgvars <- sapply(results, get_avgv, truth = true_psi)
138 | 
139 | 
140 | 
141 | 


--------------------------------------------------------------------------------
/inst/tinytest/test-gamma.R:
--------------------------------------------------------------------------------
 1 | library("MASS")
 2 | 
 3 | ## A Gamma example, from McCullagh & Nelder (1989, pp. 300-2)
 4 | clotting <- data.frame(
 5 |     u = c(5,10,15,20,30,40,60,80,100, 5,10,15,20,30,40,60,80,100),
 6 |     conc = c(118,58,42,35,27,25,21,19,18,69,35,26,21,18,16,13,12,12),
 7 |     lot = factor(c(rep(1, 9), rep(2, 9))))
 8 | mod <- glm(conc ~ lot*log(u), data = clotting, family = Gamma)
 9 | 
10 | tol <- sqrt(.Machine$double.eps)
11 | ## ML estimate of gamma dispersion from brglmFit and from MASS::gamma.dispersion are the same
12 | expect_equal(update(mod, method = "brglmFit", epsilon = 1e-10)$dispersion_ML, MASS::gamma.dispersion(mod), tolerance = tol)
13 | 
14 | ## ML estimate of gamma shape from brglmFit and from MASS::gamma.dispersion are the same
15 | expect_equal(1/update(mod, method = "brglmFit", epsilon = 1e-10, transformation = "inverse")$dispersion_ML, gamma.shape(mod)$alpha, tolerance = tol)
16 | 


--------------------------------------------------------------------------------
/inst/tinytest/test-jeffreys.R:
--------------------------------------------------------------------------------
 1 | library("numDeriv")
 2 | library("brglm")
 3 | 
 4 | ## A Gamma example, from McCullagh & Nelder (1989, pp. 300-2)
 5 | clotting <- data.frame(
 6 |     u = c(5,10,15,20,30,40,60,80,100, 5,10,15,20,30,40,60,80,100),
 7 |     conc = c(118,58,42,35,27,25,21,19,18,69,35,26,21,18,16,13,12,12),
 8 |     lot = factor(c(rep(1, 9), rep(2, 9))))
 9 | mod <- enrich(glm(conc ~ lot*log(u), data = clotting, family = Gamma()))
10 | 
11 | ifun <- get_information_function(mod)
12 | 
13 | penloglik <- function(pars, X, y) {
14 |     p <- ncol(X)
15 |     beta <- pars[1:p]
16 |     dispersion <- pars[p + 1]
17 |     eta <- X %*% beta
18 |     mu <- 1/eta
19 |     sum(dgamma(y, shape = 1/dispersion, scale = mu * dispersion, log = TRUE)) + 0.5 * log(det(ifun(beta, dispersion)))
20 | }
21 | 
22 | modjef <- update(mod, method = "brglmFit", type = "MPL_Jeffreys", epsilon = 1e-15)
23 | 
24 | ## the numerical gradient of the penalized log-likelihood is almost zero when evaluated at the estimates from type = 'MPL_Jeffreys
25 | expect_equal(grad(penloglik,
26 |                   x = coef(modjef, model = "full"),
27 |                   X = model.matrix(mod),
28 |                   y = model.response(mod$model)), rep(0, 5), tolerance = 1.5 * 1e-05)
29 | 
30 | ## the numerical gradient of the penalized log-likelihood matches that from type = 'MPL_Jeffreys'
31 | expect_warning(g1 <- update(mod, method = "brglmFit", type = "MPL_Jeffreys", maxit = 0, start = coef(mod, model = "full"))$grad)
32 | g2 <- grad(penloglik,
33 |            x = coef(mod, model = "full"),
34 |            X = model.matrix(mod),
35 |            y = model.response(mod$model))
36 | expect_equal(unname(g1), g2, tolerance = 1e-05)
37 | 
38 | ## source(system.file("inst", "brglm0/brglm0.R", package = "brglm2"))
39 | data("lizards", package = "brglm2")
40 | 
41 | links <- lapply(c("logit", "probit", "cloglog", "cauchit"), make.link)
42 | 
43 | 
44 | tol <- 1e-08
45 | for (l in seq_along(links)) {
46 |     expect_warning(
47 |         lizardsBRlegacy <- brglm(cbind(grahami, opalinus) ~ height + diameter +
48 |                                      light + time, family = binomial(links[[l]]), data=lizards,
49 |                                  method = "brglm.fit", br.epsilon = 1e-10, br.maxit = 1000, pl = TRUE)
50 |     )
51 | 
52 |     expect_warning(
53 |         lizardsBR <- glm(cbind(grahami, opalinus) ~ height + diameter +
54 |                              light + time, family = binomial(links[[l]]), data=lizards,
55 |                          method = "brglmFit", epsilon = 1e-10, maxit = 1000, type = "MPL_Jeffreys")
56 |     )
57 |     ## glm with brglm.fit method and brglm_0 return the same coefficients for the lizards when link is links[[l]]$name)
58 |     expect_equal(coef(lizardsBR), coef(lizardsBRlegacy), tolerance = tol)
59 | }
60 | 


--------------------------------------------------------------------------------
/inst/tinytest/test-median-binomial.R:
--------------------------------------------------------------------------------
 1 | library("mbrglm")
 2 | 
 3 | data("lizards", package = "brglm2")
 4 | data("endometrial",package = "brglm2")
 5 | 
 6 | links <- lapply(c("logit", "probit", "cloglog", "cauchit"), make.link)
 7 | 
 8 | tol <- 1e-10
 9 | mbrglmControl <- mbrglm.control(mbr.epsilon = 1e-10, mbr.maxit = 1000)
10 | 
11 | for (l in seq_along(links)) {
12 |     ## Lizards
13 |     lizardsFormula <- cbind(grahami, opalinus) ~ height + diameter + light + time
14 |     lizardsMBRlegacy <- mbrglm(lizardsFormula, family = binomial(links[[l]]), data = lizards,
15 |                                        method = "mbrglm.fit",
16 |                                        control.mbrglm = mbrglmControl)
17 |     expect_warning(lizardsMBR <- glm(lizardsFormula, family = binomial(links[[l]]), data = lizards,
18 |                                      method = "brglmFit", type="AS_median", epsilon = 1e-10, maxit = 1000))
19 | 
20 |     ## Endometrial
21 |     endoFormula <- HG ~ NV + PI + EH
22 |     expect_warning({
23 |         endoMBRlegacy <- mbrglm(endoFormula, family = binomial(links[[l]]), data = endometrial,
24 |                                         method = "mbrglm.fit",
25 |                                         control.mbrglm = mbrglmControl)
26 |         endoMBR <- glm(endoFormula, family = binomial(links[[l]]), data = endometrial,
27 |                        method = "brglmFit", type = "AS_median", epsilon = 1e-10, maxit = 1000)
28 |     })
29 | 
30 |     c1 <- coef(summary(endoMBRlegacy))
31 |     c2 <- coef(summary(endoMBR))
32 | 
33 |     ## glm with brglmFit method and mbrglm return the same coefficients when link is" links[[l]]$name
34 |     expect_equal(coef(lizardsMBR), coef(lizardsMBRlegacy), tolerance = tol)
35 |     expect_equal(c1, c2, tolerance = tol)
36 | }
37 | 
38 | 


--------------------------------------------------------------------------------
/inst/tinytest/test-median-dispersion.R:
--------------------------------------------------------------------------------
 1 | data("anorexia", package = "MASS")
 2 | 
 3 | anorexML <- glm(Postwt ~ Prewt + Treat+ offset(Prewt),
 4 |                 family = gaussian, data = anorexia)
 5 | anorexBR <- update(anorexML, method = "brglmFit")
 6 | anorexMBR <- update(anorexML, method = "brglmFit", control = list(type="AS_median"))
 7 | 
 8 | tol <- sqrt(.Machine$double.eps)
 9 | 
10 | ## dispersion is RSS over residual degrees of freedom minus 2/3"
11 | expect_equal(anorexMBR$dispersion, sum((anorexia$Postwt - fitted(anorexML))^2)/(nrow(anorexia) - length(coef(anorexML)) - 2/3))
12 | 
13 | 


--------------------------------------------------------------------------------
/inst/tinytest/test-median-poisson.R:
--------------------------------------------------------------------------------
 1 | ## mbrpr is supplied under the permission of M. Zambelli, and is part
 2 | ## of his unpublished undergraduate thesis submitted in 2016 at the
 3 | ## Department of Statisitcal Science, University of Padova
 4 | 
 5 | ## source(system.file("mbrpr", "mbrpr.R", package = "brglm2"))
 6 | source("mbrpr.R")
 7 | 
 8 | ## Dobson (1990) Page 93: Randomized Controlled Trial :
 9 | counts <- c(18, 17, 15, 20, 10, 20, 25, 13, 12)
10 | outcome <- gl(3, 1,9)
11 | treatment <- gl(3, 3)
12 | dobson <- data.frame(counts, outcome, treatment)
13 | 
14 | tol <- 1e-06
15 | 
16 | ## MBR estimates and std. errors from brglmFit and from mbrpr are the same for poisson
17 | expect_warning(
18 |     br1 <- summary(fit<-glm(counts ~ outcome + treatment, family = poisson(), method = "brglmFit",
19 |                             type="AS_median"))
20 | )
21 | 
22 | X <- model.matrix(fit)
23 | y <- fit$y
24 | br2 <- mbrpr(par = rep(0, ncol(X)), y = y, X = X, eps = 1e-10, maxit = 500)
25 | c1 <- coef(br1)[, c(1,2)]
26 | dimnames(c1) <- NULL
27 | c2 <- cbind(br2$coefficients,sqrt(diag(br2$InfoInv)))
28 | expect_equal(c1,c2, tolerance = tol)
29 | 
30 | 


--------------------------------------------------------------------------------
/inst/tinytest/test-mis.R:
--------------------------------------------------------------------------------
 1 | ## Dobson (1990) Page 93: Randomized Controlled Trial :
 2 | 
 3 | logit_mis <- mis(link = "logit", sensitivity = 1, specificity = 1)
 4 | probit_mis <- mis(link = "probit", sensitivity = 1, specificity = 1)
 5 | 
 6 | expect_identical(class(logit_mis), "link-glm")
 7 | expect_identical(class(probit_mis), "link-glm")
 8 | 
 9 | lizards_f <- cbind(grahami, opalinus) ~ height + diameter + light + time
10 | lizards_logit <- glm(lizards_f, family = binomial(logit), data = lizards)
11 | lizards_probit <- glm(lizards_f, family = binomial(probit), data = lizards)
12 | 
13 | lizards_logit_mis <- update(lizards_logit, family = binomial(logit_mis),
14 |                             start = coef(lizards_logit))
15 | lizards_probit_mis <- update(lizards_probit, family = binomial(probit_mis),
16 |                              start = coef(lizards_probit))
17 | 
18 | ## mis link with sensitivity and specificity 1 are the same as original links
19 | expect_equal(coef(lizards_logit), coef(lizards_logit_mis), tolerance = 1e-06)
20 | expect_equal(coef(lizards_probit), coef(lizards_probit_mis), tolerance = 1e-06)
21 | 
22 | 


--------------------------------------------------------------------------------
/inst/tinytest/test-multinom-binom.R:
--------------------------------------------------------------------------------
 1 | ## Tolerance for comparisons
 2 | tolerance <- 1e-05
 3 | 
 4 | data("lizards")
 5 | 
 6 | lizards_grahami <- lizards[, c("grahami", "height", "diameter", "light", "time")]
 7 | lizards_grahami <- lizards_grahami[rep(seq.int(nrow(lizards_grahami)), lizards_grahami$grahami), ]
 8 | lizards_grahami$species <- "grahami"
 9 | lizards_grahami$grahami <- NULL
10 | lizards_opalinus <- lizards[, c("opalinus", "height", "diameter", "light", "time")]
11 | lizards_opalinus <- lizards_opalinus[rep(seq.int(nrow(lizards_opalinus)), lizards_opalinus$opalinus), ]
12 | lizards_opalinus$species <- "opalinus"
13 | lizards_opalinus$opalinus <- NULL
14 | lizards1 <- rbind(lizards_grahami, lizards_opalinus)
15 | lizards1$species <- factor(lizards1$species, levels = c("opalinus", "grahami"))
16 | 
17 | expect_warning({
18 |     model1 <- glm(formula = species ~ height + diameter + light + time, family = binomial(logit),
19 |                   data = lizards1,
20 |                   method = "brglmFit")
21 |     model2 <- brmultinom(species ~ height + diameter + light + time, data = lizards1)
22 | })
23 | 
24 | tolerance <- 1e-06
25 | ## coefficients from the binomial fit match those from the multinomial model with the poisson trick
26 | expect_equal(coef(model1), coef(model2)[1,], tol = tolerance)
27 | 
28 | 


--------------------------------------------------------------------------------
/inst/tinytest/test-poisson.R:
--------------------------------------------------------------------------------
 1 | ## source(system.file("brpr", "brpr.R", package = "brglm2"))
 2 | source("brpr.R")
 3 | 
 4 | ## Dobson (1990) Page 93: Randomized Controlled Trial :
 5 | counts <- c(18,17,15,20,10,20,25,13,12)
 6 | outcome <- gl(3,1,9)
 7 | treatment <- gl(3,3)
 8 | dobson <- data.frame(counts, outcome, treatment)
 9 | 
10 | tol <- 1e-06
11 | ## BR estimates and std. errors from brglmFit and from brpr are the same for poisson
12 | expect_warning({
13 |     br1 <- summary(glm(counts ~ outcome + treatment, family = poisson(), method = "brglmFit"))
14 |     br2 <- summary(brpr(counts ~ outcome + treatment, data = dobson))
15 | })
16 | c1 <- coef(br1)
17 | c2 <- coef(br2)
18 | c2 <- c2[rownames(c1), ]
19 | expect_equal(c1,c2, tolerance = tol)
20 | 
21 | 
22 | ## ## mbest::firthglm.fit crashes for this example
23 | ## ## A Gamma example, from McCullagh & Nelder (1989, pp. 300-2)
24 | ## clotting <- data.frame(
25 | ##     u = c(5,10,15,20,30,40,60,80,100, 5,10,15,20,30,40,60,80,100),
26 | ##     conc = c(118,58,42,35,27,25,21,19,18,69,35,26,21,18,16,13,12,12),
27 | ##     lot = factor(c(rep(1, 9), rep(2, 9))))
28 | ## mod <- glm(conc ~ lot*log(u), data = clotting, family = Gamma())
29 | 
30 | ## tol <- sqrt(.Machine$double.eps)
31 | ## test_that("BR estimates from brglmFit and from mbest::firthglm.fit are the same for gamma", {
32 | ##     br1 <- update(mod, method = "firthglm.fit")
33 | ##     br2 <- update(mod, method = "brglmFit")
34 | ##     c1 <- coef(br1)
35 | ##     c2 <- coef(br2)
36 | ##     c2 <- c2[names(c1)]
37 | ##     expect_equal(c1,c2, tolerance = tol)
38 | ## })
39 | 
40 | 
41 | 


--------------------------------------------------------------------------------
/inst/tinytest/test-print.R:
--------------------------------------------------------------------------------
 1 | ## source(system.file("inst", "brglm0/brglm0.R", package = "brglm2"))
 2 | data("lizards", package = "brglm2")
 3 | 
 4 | 
 5 | types <- list("ML" = "(maximum likelihood)",
 6 |                "correction" = "(bias correction)",
 7 |                "AS_mean" = "(mean bias-reducing adjusted score equations)",
 8 |                "AS_median" = "(median bias-reducing adjusted score equations)",
 9 |                "AS_mixed" = "(mixed bias-reducing adjusted score equations)",
10 |                "MPL_Jeffreys" = "(maximum penalized likelihood with Jeffreys'-prior penalty)")
11 | 
12 | 
13 | for (type in names(types)) {
14 |     liz <- glm(cbind(grahami, opalinus) ~ height + diameter + light + time, family = binomial(),
15 |                data = lizards,
16 |                method = "brglmFit", type = type)
17 |     expect_stdout(print(liz), "Coefficients")
18 |     expect_stdout(print(liz), "Degrees of Freedom")
19 |     expect_stdout(print(liz), "Null Deviance")
20 |     expect_equal(liz$type, type)
21 |     summ <- summary(liz)
22 |     expect_true(all(class(summ) %in% c("summary.brglmFit", "summary.glm")))
23 |     expect_stdout(print(summ), "Type of estimator:")
24 |     expect_stdout(print(summ), type)
25 |     expect_stdout(print(summ), types[[type]])
26 | }
27 | 
28 | 
29 | ## brnb
30 | salmonella <- data.frame(freq = c(15, 16, 16, 27, 33, 20,
31 |                                    21, 18, 26, 41, 38, 27,
32 |                                    29, 21, 33, 60, 41, 42),
33 |                          dose = rep(c(0, 10, 33, 100, 333, 1000), 3),
34 |                          observation = rep(1:3, each = 6))
35 | salmonella_fm <- freq ~ dose + log(dose + 10)
36 | fit_brnb <- brnb(salmonella_fm, data = salmonella,
37 |                  link = "log", transformation = "inverse", type = "ML")
38 | summ <- summary(fit_brnb)
39 | expect_stdout(print(summ), "Type of estimator:")
40 | expect_stdout(print(summ), "ML")
41 | expect_stdout(print(summ), "(maximum likelihood)")
42 | 
43 | ## brmultinom
44 | data("housing", package = "MASS")
45 | fit_brmultinom <- brmultinom(Sat ~ Infl + Type + Cont, weights = Freq,
46 |                              data = housing, type = "ML", ref = 1)
47 | summ <- summary(fit_brmultinom)
48 | expect_stdout(print(summ), "Type of estimator:")
49 | expect_stdout(print(summ), "ML")
50 | expect_stdout(print(summ), "(maximum likelihood)")
51 | 
52 | 
53 | ## bracl
54 | data("stemcell", package = "brglm2")
55 | fit_bracl <- bracl(research ~ as.numeric(religion) + gender, weights = frequency,
56 |                     data = stemcell, type = "ML")
57 | summ <- summary(fit_bracl)
58 | expect_stdout(print(summ), "Type of estimator:")
59 | expect_stdout(print(summ), "ML")
60 | expect_stdout(print(summ), "(maximum likelihood)")
61 | 


--------------------------------------------------------------------------------
/inst/tinytest/test-singular.R:
--------------------------------------------------------------------------------
 1 | ## A Gamma example, from McCullagh & Nelder (1989, pp. 300-2)
 2 | clotting <- data.frame(
 3 |     u = c(5,10,15,20,30,40,60,80,100, 5,10,15,20,30,40,60,80,100),
 4 |     conc = c(118,58,42,35,27,25,21,19,18,69,35,26,21,18,16,13,12,12),
 5 |     lot = factor(c(rep(1, 9), rep(2, 9))))
 6 | mod <- glm(conc ~ lot*log(u) + I(2*log(u)), data = clotting, family = Gamma)
 7 | 
 8 | X <- model.matrix(mod)
 9 | Y <- mod$y
10 | ## brglmFit returns an error if singular.ok = TRUE
11 | expect_error(brglm_fit(X, Y, family = Gamma(), singular.ok = FALSE),
12 |              pattern = "singular fit encountered")
13 | expect_true(is.na(coef(brglm_fit(X, Y, family = Gamma(), singular.ok = TRUE))["I(2 * log(u))"]))
14 | 


--------------------------------------------------------------------------------
/inst/tinytest/test-start.R:
--------------------------------------------------------------------------------
 1 | ## A Gamma example, from McCullagh & Nelder (1989, pp. 300-2)
 2 | clotting <- data.frame(
 3 |     u = c(5,10,15,20,30,40,60,80,100, 5,10,15,20,30,40,60,80,100),
 4 |     conc = c(118,58,42,35,27,25,21,19,18,69,35,26,21,18,16,13,12,12),
 5 |     lot = factor(c(rep(1, 9), rep(2, 9))))
 6 | mod <- glm(conc ~ lot*log(u), data = clotting, family = Gamma, epsilon = 1e-10, maxit = 1000)
 7 | 
 8 | mod1 <- update(mod, method = "brglmFit", start = c(coef(mod)*0.9, 1), epsilon = 1e-10, maxit = 1000)
 9 | mod2 <- update(mod, method = "brglmFit", start = c(coef(mod)*0.9), epsilon = 1e-10, maxit = 1000)
10 | 
11 | tol <- 1e-03
12 | ## start argument is passed correctly in brglmFit"
13 | expect_equal(coef(mod), coef(mod1), tolerance = tol)
14 | expect_equal(coef(mod), coef(mod2), tolerance = tol)
15 | 


--------------------------------------------------------------------------------
/inst/tinytest/test-transformation.R:
--------------------------------------------------------------------------------
 1 | ## A Gamma example, from McCullagh & Nelder (1989, pp. 300-2)
 2 | clotting <- data.frame(
 3 |     u = c(5,10,15,20,30,40,60,80,100, 5,10,15,20,30,40,60,80,100),
 4 |     conc = c(118,58,42,35,27,25,21,19,18,69,35,26,21,18,16,13,12,12),
 5 |     lot = factor(c(rep(1, 9), rep(2, 9))))
 6 | mod_identity <- glm(conc ~ lot*log(u), data = clotting, family = Gamma,
 7 |                     method = "brglm_fit", type = "ML",
 8 |                     transformation = "identity")
 9 | mod_log <- glm(conc ~ lot*log(u), data = clotting, family = Gamma,
10 |                     method = "brglm_fit", type = "ML",
11 |                     transformation = "log")
12 | mod_sqrt <- glm(conc ~ lot*log(u), data = clotting, family = Gamma,
13 |                     method = "brglm_fit", type = "ML",
14 |                     transformation = "sqrt")
15 | mod_inverse <- glm(conc ~ lot*log(u), data = clotting, family = Gamma,
16 |                     method = "brglm_fit", type = "ML",
17 |                     transformation = "inverse")
18 | 
19 | c_identity <- coef(mod_identity, model = "full")
20 | c_log <- coef(mod_log, model = "full")
21 | c_log[5] <- exp(c_log[5])
22 | c_sqrt <- coef(mod_sqrt, model = "full")
23 | c_sqrt[5] <- c_sqrt[5]^2
24 | c_inverse <- coef(mod_inverse, model = "full")
25 | c_inverse[5] <- 1/c_inverse[5]
26 | 
27 | tol <- 1e-08
28 | ## ML estimate of gamma dispersion from brglmFit is invariant to trasnformation
29 | expect_equal(c_identity, c_log, tolerance = tol, check.attributes = FALSE)
30 | expect_equal(c_identity, c_sqrt, tolerance = tol, check.attributes = FALSE)
31 | expect_equal(c_identity, c_inverse, tolerance = tol, check.attributes = FALSE)
32 | 


--------------------------------------------------------------------------------
/man/aids.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{aids}
 5 | \alias{aids}
 6 | \title{The effects of AZT in slowing the development of AIDS symptoms}
 7 | \format{
 8 | A data frame with 4 rows and 4 variables:
 9 | \itemize{
10 | \item \code{symptomatic}: counts of veterans showing AIDS symptoms during the 3-year study
11 | \item \code{asymptomatic}: counts of veterans not showing AIDS symptoms during the 3-year study
12 | \item \code{race}: the race of the veterans with levels \code{"White"} and \code{"Black"}
13 | \item \code{AZT_use}: whether the veterans received AZT immediately (\code{"Yes"})
14 | or waited until their T cells showed severe immune weakness (\code{"No"})
15 | }
16 | }
17 | \source{
18 | The data set is analyzed in Agresti (2002, Subsection 5.4.2). Its
19 | original source is New York Times, Feb. 15, 1991.
20 | }
21 | \usage{
22 | aids
23 | }
24 | \description{
25 | The data is from a 3-year study on the effects of AZT in slowing the
26 | development of AIDS symptoms. 338 veterans whose immune systems
27 | were beginning to falter after infection with the AIDS virus were
28 | randomly assigned either to receive AZT immediately or to wait
29 | until their T cells showed severe immune weakness.
30 | }
31 | \references{
32 | Agresti A (2002). \emph{Categorical Data Analysis}.  Wiley Series in
33 | Probability and Statistics. Wiley.
34 | }
35 | \seealso{
36 | \code{\link[=brmultinom]{brmultinom()}}
37 | }
38 | \keyword{datasets}
39 | 


--------------------------------------------------------------------------------
/man/alligators.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{alligators}
 5 | \alias{alligators}
 6 | \title{Alligator food choice data}
 7 | \format{
 8 | A data frame with 80 rows and 5 variables:
 9 | \itemize{
10 | \item \code{foodchoice}: primary food type, in volume, found in an alligator’s stomach, with levels \code{fish}, \code{invertebrate},\code{reptile}, \code{bird}, \code{other}
11 | \item \code{lake}: lake of capture with levels \code{Hancock}, \code{Oklawaha}, \code{Trafford}, \code{George}.
12 | \item \code{gender}: gender of the alligator with levels \code{Male} and \code{Female}
13 | \item \code{size}: size of the alligator with levels \verb{<=2.3} meters long and \verb{>2.3} meters long
14 | \item \code{freq}: number of alligators for each foodchoice, lake, gender and size combination
15 | }
16 | }
17 | \source{
18 | The alligators data set is analyzed in Agresti (2002, Subsection 7.1.2).
19 | }
20 | \usage{
21 | alligators
22 | }
23 | \description{
24 | Alligator food choice data
25 | }
26 | \references{
27 | Agresti A (2002). \emph{Categorical Data Analysis}.  Wiley Series in
28 | Probability and Statistics. Wiley.
29 | }
30 | \seealso{
31 | \code{\link[=brmultinom]{brmultinom()}}
32 | }
33 | \keyword{datasets}
34 | 


--------------------------------------------------------------------------------
/man/bracl.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/bracl.R
  3 | \name{bracl}
  4 | \alias{bracl}
  5 | \title{Bias reduction for adjacent category logit models for ordinal
  6 | responses using the Poisson trick.}
  7 | \usage{
  8 | bracl(
  9 |   formula,
 10 |   data,
 11 |   weights,
 12 |   subset,
 13 |   na.action,
 14 |   parallel = FALSE,
 15 |   contrasts = NULL,
 16 |   model = TRUE,
 17 |   x = TRUE,
 18 |   control = list(...),
 19 |   ...
 20 | )
 21 | }
 22 | \arguments{
 23 | \item{formula}{
 24 |     a formula expression as for regression models, of the form
 25 |     \code{response ~ predictors}. The response should be a factor
 26 |     (preferably an ordered factor), which will be interpreted as an
 27 |     ordinal response, with levels ordered as in the factor.  
 28 |     The model must have an intercept: attempts to remove one will
 29 |     lead to a warning and be ignored.  An offset may be used.  See the
 30 |     documentation of \code{\link{formula}} for other details.
 31 |   }
 32 | 
 33 | \item{data}{
 34 |     an optional data frame, list or environment in which to interpret
 35 |     the variables occurring in \code{formula}.
 36 |   }
 37 | 
 38 | \item{weights}{
 39 |     optional case weights in fitting.  Default to 1.
 40 |   }
 41 | 
 42 | \item{subset}{
 43 |     expression saying which subset of the rows of the data should  be used
 44 |     in the fit.  All observations are included by default.
 45 |   }
 46 | 
 47 | \item{na.action}{
 48 |     a function to filter missing data.
 49 |   }
 50 | 
 51 | \item{parallel}{if \code{FALSE} (default), then a non-proportional odds
 52 | adjacent category model is fit, assuming different effects per
 53 | category; if \code{TRUE} then a proportional odds adjacent category
 54 | model is fit. See Details.}
 55 | 
 56 | \item{contrasts}{
 57 |     a list of contrasts to be used for some or all of
 58 |     the factors appearing as variables in the model formula.
 59 |   }
 60 | 
 61 | \item{model}{
 62 |     logical for whether the model matrix should be returned.
 63 |   }
 64 | 
 65 | \item{x}{should the model matrix be included with in the result
 66 | (default is \code{TRUE}).}
 67 | 
 68 | \item{control}{a list of parameters for controlling the fitting
 69 | process. See \code{\link[=brglmControl]{brglmControl()}} for details.}
 70 | 
 71 | \item{...}{arguments to be used to form the default \code{control}
 72 | argument if it is not supplied directly.}
 73 | }
 74 | \description{
 75 | \code{\link[=bracl]{bracl()}} is a wrapper of \code{\link[=brglmFit]{brglmFit()}} that fits adjacent category
 76 | logit models with or without proportional odds using implicit and
 77 | explicit bias reduction methods. See Kosmidis & Firth (2011) for
 78 | details.
 79 | }
 80 | \details{
 81 | The \code{\link[=bracl]{bracl()}} function fits adjacent category models, which assume
 82 | multinomial observations with probabilities with proportional odds
 83 | of the form
 84 | 
 85 | \deqn{\log\frac{\pi_{ij}}{\pi_{ij + 1}} = \alpha_j + \beta^T x_i}{log(pi[i, j]/pi[i, j+1]) = alpha[j] + sum(beta * x[i, ])}
 86 | 
 87 | or with non-proportional odds of the form
 88 | 
 89 | \deqn{\log\frac{\pi_{ij}}{\pi_{ij + 1}} = \alpha_j + \beta_j^T x_i}{log(pi[i, j]/pi[i, j+1]) = alpha[j] + sum(beta[j, ] * x[i, ])}
 90 | 
 91 | where \eqn{x_i}{x[i, ]} is a vector of covariates and \eqn{\pi_{ij}}{pi[i, j]} is the
 92 | probability that category \eqn{j} is observed at the covariate setting \eqn{i}.
 93 | }
 94 | \examples{
 95 | 
 96 | data("stemcell", package = "brglm2")
 97 | 
 98 | # Adjacent category logit (non-proportional odds)
 99 | fit_bracl <- bracl(research ~ as.numeric(religion) + gender, weights = frequency,
100 |                    data = stemcell, type = "ML")
101 | # Adjacent category logit (proportional odds)
102 | fit_bracl_p <- bracl(research ~ as.numeric(religion) + gender, weights = frequency,
103 |                     data = stemcell, type = "ML", parallel = TRUE)
104 | 
105 | 
106 | }
107 | \references{
108 | Kosmidis I, Kenne Pagui E C, Sartori N (2020). Mean and median bias
109 | reduction in generalized linear models. \emph{Statistics and Computing},
110 | \strong{30}, 43-59. \doi{10.1007/s11222-019-09860-6}.
111 | 
112 | Agresti, A (2010). \emph{Analysis of Ordinal Categorical Data} (2nd
113 | edition).  Wiley Series in Probability and Statistics. Wiley.
114 | 
115 | Albert A, Anderson J A (1984). On the Existence of Maximum
116 | Likelihood Estimates in Logistic Regression Models. \emph{Biometrika},
117 | \strong{71}, 1-10. \doi{10.2307/2336390}.
118 | 
119 | Kosmidis I, Firth D (2011). Multinomial logit bias reduction
120 | via the Poisson log-linear model. \emph{Biometrika}, \strong{98},
121 | 755-759. \doi{10.1093/biomet/asr026}.
122 | 
123 | Palmgren J (1981). The Fisher Information Matrix for Log Linear
124 | Models Arguing Conditionally on Observed Explanatory
125 | Variables. \emph{Biometrika}, \strong{68},
126 | 563-566. \doi{10.1093/biomet/68.2.563}.
127 | }
128 | \seealso{
129 | \code{\link[nnet:multinom]{nnet::multinom()}}, \code{\link[=brmultinom]{brmultinom()}}
130 | }
131 | \author{
132 | Ioannis Kosmidis \verb{[aut, cre]} \email{ioannis.kosmidis@warwick.ac.uk}
133 | }
134 | 


--------------------------------------------------------------------------------
/man/brglm2-defunct.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/brglm2-defunct.R
 3 | \name{brglm2-defunct}
 4 | \alias{brglm2-defunct}
 5 | \alias{check_infinite_estimates}
 6 | \alias{detect_separation}
 7 | \title{Defunct Functions in package \pkg{brglm2}}
 8 | \usage{
 9 | check_infinite_estimates(...)
10 | 
11 | detect_separation(...)
12 | }
13 | \arguments{
14 | \item{...}{arguments to be passed to functions and methods.}
15 | }
16 | \description{
17 | The functions or variables listed here are no longer part of
18 | \pkg{brglm2}.
19 | }
20 | \details{
21 | \itemize{
22 | \item \code{\link[=detect_separation]{detect_separation()}}: This function is defunct from \pkg{brglm2}
23 | since version 0.8.0. A new version of \code{\link[=detect_separation]{detect_separation()}} is now
24 | maintained in the
25 | \href{https://cran.r-project.org/package=detectseparation}{\pkg{detectseparation}}
26 | R package.
27 | \item \code{\link[=check_infinite_estimates]{check_infinite_estimates()}} is defunct from \pkg{brglm2} since
28 | version 0.8.0. An new version of \code{\link[=check_infinite_estimates]{check_infinite_estimates()}} is
29 | now maintained in the
30 | \href{https://cran.r-project.org/package=detectseparation}{\pkg{detectseparation}}
31 | R package.
32 | }
33 | }
34 | 


--------------------------------------------------------------------------------
/man/brglm2.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/brglm2-package.R
 3 | \docType{package}
 4 | \name{brglm2}
 5 | \alias{brglm2}
 6 | \alias{brglm2-package}
 7 | \title{brglm2: Bias Reduction in Generalized Linear Models}
 8 | \description{
 9 | Estimation and inference from generalized linear models using
10 | implicit and explicit bias reduction methods (Kosmidis, 2014), and
11 | other penalized maximum likelihood methods. Currently supported
12 | methods include the mean bias-reducing adjusted scores approach in
13 | Firth (1993) and Kosmidis & Firth (2009), the median bias-reduction
14 | adjusted scores approach in Kenne Pagui et al. (2017), the
15 | correction of the asymptotic bias in Cordeiro & McCullagh (1991),
16 | the mixed bias-reduction adjusted scores approach in Kosmidis et al
17 | (2020), maximum penalized likelihood with powers of the Jeffreys
18 | prior as penalty, and maximum likelihood.
19 | }
20 | \details{
21 | In the special case of generalized linear models for binomial,
22 | Poisson and multinomial responses (both nominal and ordinal), mean
23 | and median bias reduction and maximum penalized likelihood return
24 | estimates with improved frequentist properties, that are also
25 | always finite, even in cases where the maximum likelihood estimates
26 | are infinite (e.g. complete and quasi-complete separation in
27 | multinomial regression). Estimation in all cases takes place via a
28 | modified Fisher scoring algorithm, and S3 methods for the
29 | construction of confidence intervals for the reduced-bias estimates
30 | are provided.
31 | 
32 | The core model fitters are implemented by the functions
33 | \code{\link[=brglm_fit]{brglm_fit()}} (univariate generalized linear models),
34 | \code{\link[=brmultinom]{brmultinom()}} (baseline category logit models for nominal
35 | multinomial responses), \code{\link[=bracl]{bracl()}} (adjacent category logit models
36 | for ordinal multinomial responses), and \code{\link[=brnb]{brnb()}} for negative
37 | binomial regression.
38 | 
39 | The similarly named \strong{brglm} R package can only handle generalized
40 | linear models with binomial responses. Special care has been taken
41 | when developing \strong{brglm2} in order not to have conflicts when the
42 | user loads \strong{brglm2} and \strong{brglm} simultaneously. The development
43 | and maintenance of the two packages will continue in parallel,
44 | until \strong{brglm2} incorporates all \strong{brglm} functionality and
45 | provides an appropriate wrapper to the \code{\link[brglm:brglm]{brglm::brglm()}} function.
46 | }
47 | \references{
48 | Kosmidis I, Firth D (2021). Jeffreys-prior penalty, finiteness
49 | and shrinkage in binomial-response generalized linear
50 | models. \emph{Biometrika}, \strong{108}, 71-82. \doi{10.1093/biomet/asaa052}.
51 | 
52 | Cordeiro G M, McCullagh P (1991). Bias correction in generalized
53 | linear models. \emph{Journal of the Royal Statistical Society. Series B
54 | (Methodological)}, \strong{53}, 629-643. \doi{10.1111/j.2517-6161.1991.tb01852.x}.
55 | 
56 | Firth D (1993). Bias reduction of maximum likelihood estimates,
57 | Biometrika, \strong{80}, 27-38. \doi{10.2307/2336755}.
58 | 
59 | Kenne Pagui E C, Salvan A, Sartori N (2017). Median bias
60 | reduction of maximum likelihood estimates. \emph{Biometrika}, \strong{104},
61 | 923–938. \doi{10.1093/biomet/asx046}.
62 | 
63 | Kosmidis I, Kenne Pagui E C, Sartori N (2020). Mean and median bias
64 | reduction in generalized linear models. \emph{Statistics and Computing},
65 | \strong{30}, 43-59. \doi{10.1007/s11222-019-09860-6}.
66 | 
67 | Kosmidis I, Firth D (2009). Bias reduction in exponential family
68 | nonlinear models. \emph{Biometrika}, \strong{96}, 793-804. \doi{10.1093/biomet/asp055}.
69 | 
70 | Kosmidis I, Firth D (2010). A generic algorithm for reducing
71 | bias in parametric estimation. \emph{Electronic Journal of Statistics},
72 | \strong{4}, 1097-1112. \doi{10.1214/10-EJS579}.
73 | 
74 | Kosmidis I (2014). Bias in parametric estimation: reduction and
75 | useful side-effects. \emph{WIRE Computational Statistics}, \strong{6},
76 | 185-196. \doi{10.1002/wics.1296}.
77 | }
78 | \seealso{
79 | \code{\link[=brglm_fit]{brglm_fit()}}, \code{\link[=brmultinom]{brmultinom()}}, \code{\link[=bracl]{bracl()}}
80 | }
81 | \author{
82 | Ioannis Kosmidis \verb{[aut, cre]} \email{ioannis.kosmidis@warwick.ac.uk}
83 | }
84 | 


--------------------------------------------------------------------------------
/man/brmultinom.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/brmultinom.R
  3 | \name{brmultinom}
  4 | \alias{brmultinom}
  5 | \title{Bias reduction for multinomial response models using the
  6 | Poisson trick.}
  7 | \usage{
  8 | brmultinom(
  9 |   formula,
 10 |   data,
 11 |   weights,
 12 |   subset,
 13 |   na.action,
 14 |   contrasts = NULL,
 15 |   ref = 1,
 16 |   model = TRUE,
 17 |   x = TRUE,
 18 |   control = list(...),
 19 |   ...
 20 | )
 21 | }
 22 | \arguments{
 23 | \item{formula}{
 24 | a formula expression as for regression models, of the form
 25 | \code{response ~ predictors}. The response should be a factor or a
 26 | matrix with K columns, which will be interpreted as counts for each of
 27 | K classes.
 28 | A log-linear model is fitted, with coefficients zero for the first
 29 | class. An offset can be included: it should be a numeric matrix with K columns
 30 | if the response is either a matrix with K columns or a factor with K >= 2
 31 | classes, or a numeric vector for a response factor with 2 levels.
 32 | See the documentation of \code{\link{formula}()} for other details.
 33 | }
 34 | 
 35 | \item{data}{
 36 | an optional data frame in which to interpret the variables occurring
 37 | in \code{formula}.
 38 | }
 39 | 
 40 | \item{weights}{
 41 | optional case weights in fitting.
 42 | }
 43 | 
 44 | \item{subset}{
 45 | expression saying which subset of the rows of the data should  be used
 46 | in the fit. All observations are included by default.
 47 | }
 48 | 
 49 | \item{na.action}{
 50 | a function to filter missing data.
 51 | }
 52 | 
 53 | \item{contrasts}{
 54 | a list of contrasts to be used for some or all of
 55 | the factors appearing as variables in the model formula.
 56 | }
 57 | 
 58 | \item{ref}{the reference category to use for multinomial
 59 | regression. Either an integer, in which case
 60 | \code{levels(response)[ref]} is used as a baseline, or a character
 61 | string. Default is 1.}
 62 | 
 63 | \item{model}{
 64 |   logical. If true, the model frame is saved as component \code{model}
 65 |   of the returned object.
 66 | }
 67 | 
 68 | \item{x}{should the model matrix be included with in the result
 69 | (default is \code{TRUE}).}
 70 | 
 71 | \item{control}{a list of parameters for controlling the fitting
 72 | process. See \code{\link[=brglmControl]{brglmControl()}} for details.}
 73 | 
 74 | \item{...}{arguments to be used to form the default \code{control}
 75 | argument if it is not supplied directly.}
 76 | }
 77 | \description{
 78 | \code{\link[=brmultinom]{brmultinom()}} is a wrapper of \code{\link[=brglmFit]{brglmFit()}} that fits multinomial
 79 | regression models using implicit and explicit bias reduction
 80 | methods. See Kosmidis & Firth (2011) for details.
 81 | }
 82 | \details{
 83 | The models \code{\link[=brmultinom]{brmultinom()}} handles are also known as
 84 | baseline-category logit models (see, Agresti, 2002, Section 7.1),
 85 | because they model the log-odds of every category against a
 86 | baseline category. The user can control which baseline (or
 87 | reference) category is used via the \code{ref}. By default
 88 | \code{\link[=brmultinom]{brmultinom()}} uses the first category as reference.
 89 | 
 90 | The maximum likelihood estimates for the parameters of
 91 | baseline-category logit models have infinite components with
 92 | positive probability, which can result in problems in their
 93 | estimation and the use of inferential procedures (e.g. Wad
 94 | tests). Albert and Andreson (1984) have categorized the possible
 95 | data patterns for such models into the exclusive and exhaustive
 96 | categories of complete separation, quasi-complete separation and
 97 | overlap, and showed that infinite maximum likelihood estimates
 98 | result when complete or quasi-complete separation occurs.
 99 | 
100 | The adjusted score approaches to bias reduction that \code{\link[=brmultinom]{brmultinom()}}
101 | implements for \code{type = "AS_mean"} and \code{type = "AS_median"} are
102 | alternatives to maximum likelihood that result in estimates with
103 | smaller asymptotic mean and median bias, respectively, that are
104 | also \emph{always} finite, even in cases of complete or quasi-complete
105 | separation.
106 | 
107 | \code{\link[=brmultinom]{brmultinom()}} is a wrapper of \code{\link[=brglmFit]{brglmFit()}} that fits multinomial
108 | logit regression models through the 'Poisson trick' (see, for
109 | example, Palmgren, 1981; Kosmidis & Firth, 2011).
110 | 
111 | The implementation relies on the construction of an extended model
112 | matrix for the log-linear model and constraints on the sums of the
113 | Poisson means. Specifically, a log-linear model is fitted on a
114 | \href{https://en.wikipedia.org/wiki/Kronecker_product}{Kronecker product} of the
115 | original model matrix \code{X} implied by the formula, augmented by
116 | \code{nrow(X)} dummy variables.
117 | 
118 | The extended model matrix is sparse, and the
119 | \href{https://cran.r-project.org/package=Matrix}{\pkg{Matrix}} package is
120 | used for its effective storage.
121 | 
122 | While \code{\link[=brmultinom]{brmultinom()}} can be used for analyses using multinomial
123 | regression models, the current implementation is more of a proof of
124 | concept and is not expected to scale well with either of \code{nrow(X)},
125 | \code{ncol(X)} or the number of levels in the categorical response.
126 | }
127 | \examples{
128 | ## The housing data analysis from ?MASS::housing
129 | 
130 | data("housing", package = "MASS")
131 | 
132 | # Maximum likelihood using nnet::multinom
133 | houseML1nnet <- nnet::multinom(Sat ~ Infl + Type + Cont, weights = Freq,
134 |                                data = housing)
135 | # Maximum likelihood using brmultinom with baseline category 'Low'
136 | houseML1 <- brmultinom(Sat ~ Infl + Type + Cont, weights = Freq,
137 |                        data = housing, type = "ML", ref = 1)
138 | # The estimates are numerically the same as houseML0
139 | all.equal(coef(houseML1nnet), coef(houseML1), tolerance = 1e-04)
140 | 
141 | # Maximum likelihood using brmultinom with 'High' as baseline
142 | houseML3 <- brmultinom(Sat ~ Infl + Type + Cont, weights = Freq,
143 |                       data = housing, type = "ML", ref = 3)
144 | # The fitted values are the same as houseML1
145 | all.equal(fitted(houseML3), fitted(houseML1), tolerance = 1e-10)
146 | 
147 | # Bias reduction
148 | houseBR3 <- update(houseML3, type = "AS_mean")
149 | # Bias correction
150 | houseBC3 <- update(houseML3, type = "correction")
151 | 
152 | ## Reproducing Bull et al. (2002, Table 3)
153 | data("hepatitis", package = "brglm2")
154 | 
155 | # Construct a variable with the multinomial categories according to
156 | # the HCV and nonABC columns
157 | hepat <- hepatitis
158 | hepat$type <- with(hepat, factor(1 - HCV * nonABC + HCV + 2 * nonABC))
159 | hepat$type <- factor(hepat$type, labels = c("noDisease", "C", "nonABC"))
160 | contrasts(hepat$type) <- contr.treatment(3, base = 1)
161 | 
162 | # Maximum likelihood estimation fails to converge because some estimates are infinite
163 | hepML <- brmultinom(type ~ group * time, data = hepat, weights = counts, type = "ML", slowit = 0.1)
164 | 
165 | # Mean bias reduction returns finite estimates
166 | hep_meanBR <- brmultinom(type ~ group * time, data = hepat, weights = counts, type = "AS_mean")
167 | # The estimates in Bull et al. (2002, Table 3, DOI: 10.1016/S0167-9473(01)00048-2)
168 | coef(hep_meanBR)
169 | 
170 | # Median bias reduction also returns finite estimates, which are a bit larger in absolute value
171 | hep_medianBR <- brmultinom(type ~ group * time, data = hepat, weights = counts, type = "AS_median")
172 | coef(hep_medianBR)
173 | 
174 | }
175 | \references{
176 | Kosmidis I, Kenne Pagui E C, Sartori N (2020). Mean and median bias
177 | reduction in generalized linear models. \emph{Statistics and Computing},
178 | \strong{30}, 43-59. \doi{10.1007/s11222-019-09860-6}.
179 | 
180 | Agresti A (2002). \emph{Categorical data analysis} (2nd edition). Wiley
181 | Series in Probability and Statistics. Wiley.
182 | 
183 | Albert A, Anderson J A (1984). On the Existence of Maximum
184 | Likelihood Estimates in Logistic Regression Models. \emph{Biometrika},
185 | \strong{71} 1--10. \doi{10.2307/2336390}.
186 | 
187 | Kosmidis I, Firth D (2011). Multinomial logit bias reduction
188 | via the Poisson log-linear model. \emph{Biometrika}, \strong{98}, 755-759.
189 | \doi{10.1093/biomet/asr026}.
190 | 
191 | Palmgren, J (1981). The Fisher Information Matrix for Log Linear
192 | Models Arguing Conditionally on Observed Explanatory
193 | Variables. \emph{Biometrika}, \strong{68}, 563-566.
194 | \doi{10.1093/biomet/68.2.563}.
195 | }
196 | \seealso{
197 | \code{\link[nnet:multinom]{nnet::multinom()}}, \code{\link[=bracl]{bracl()}} for adjacent category logit models for ordinal responses
198 | }
199 | \author{
200 | Ioannis Kosmidis \verb{[aut, cre]} \email{ioannis.kosmidis@warwick.ac.uk}
201 | }
202 | 


--------------------------------------------------------------------------------
/man/coalition.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{coalition}
 5 | \alias{coalition}
 6 | \title{Coalition data}
 7 | \format{
 8 | A data frame with 314 rows and the 7 variables "duration",
 9 | "ciep12", "invest", "fract", "polar", "numst2", and "crisis".  For
10 | variable descriptions, please refer to King et al (1990).
11 | }
12 | \usage{
13 | coalition
14 | }
15 | \description{
16 | This data set contains survival data on government coalitions in
17 | parliamentary democracies (Belgium, Canada, Denmark, Finland,
18 | France, Iceland, Ireland, Israel, Italy, Netherlands, Norway,
19 | Portugal, Spain, Sweden, and the United Kingdom) for the period
20 | 1945-1987.  For parsimony, country indicator variables are omitted
21 | in the sample data.
22 | }
23 | \note{
24 | Data is as it is provided by the
25 | \href{https://cran.r-project.org/package=Zelig}{\pkg{Zeilig}} R package.
26 | }
27 | \references{
28 | King G, Alt J E, Burns N E, Laver M. (1990). A Unified
29 | Model of Cabinet Dissolution in Parliamentary
30 | Democracies. \emph{American Journal of Political Science}, \strong{34},
31 | 846-870. \doi{10.2307/2111401}.
32 | 
33 | King G, Alt J E, Burns N E, Laver M. ICPSR
34 | Publication Related Archive, 1115.
35 | }
36 | \seealso{
37 | \code{\link[=brglm_fit]{brglm_fit()}}
38 | }
39 | \keyword{datasets}
40 | 


--------------------------------------------------------------------------------
/man/coef.brglmFit.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/brglmFit.R
 3 | \name{coef.brglmFit}
 4 | \alias{coef.brglmFit}
 5 | \title{Extract model coefficients from \code{\link[=brglmFit]{"brglmFit"}} objects}
 6 | \usage{
 7 | \method{coef}{brglmFit}(object, model = c("mean", "full", "dispersion"), ...)
 8 | }
 9 | \arguments{
10 | \item{object}{an object for which the extraction of model coefficients is
11 |     meaningful.}
12 | 
13 | \item{model}{one of \code{"mean"} (default), \code{"dispersion"}, `"full",
14 | to return the estimates of the parameters in the linear
15 | prediction only, the estimate of the dispersion parameter only,
16 | or both, respectively.}
17 | 
18 | \item{...}{other arguments.}
19 | }
20 | \description{
21 | Extract model coefficients from \code{\link[=brglmFit]{"brglmFit"}} objects
22 | }
23 | \details{
24 | See \code{\link[=coef]{coef()}} for more details.
25 | }
26 | \seealso{
27 | \code{\link[=coef]{coef()}}
28 | }
29 | 


--------------------------------------------------------------------------------
/man/coef.brglmFit_expo.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/expo.R
 3 | \name{coef.brglmFit_expo}
 4 | \alias{coef.brglmFit_expo}
 5 | \title{Extract estimates from \code{\link[=brglmFit_expo]{"brglmFit_expo"}} objects}
 6 | \usage{
 7 | \method{coef}{brglmFit_expo}(object, ...)
 8 | }
 9 | \arguments{
10 | \item{object}{an object for which the extraction of model coefficients is
11 |     meaningful.}
12 | 
13 | \item{...}{other arguments.}
14 | }
15 | \description{
16 | Extract estimates from \code{\link[=brglmFit_expo]{"brglmFit_expo"}} objects
17 | }
18 | 


--------------------------------------------------------------------------------
/man/coef.brnb.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/brnb.R
 3 | \name{coef.brnb}
 4 | \alias{coef.brnb}
 5 | \title{Extract model coefficients from \code{\link[=brnb]{"brnb"}} objects}
 6 | \usage{
 7 | \method{coef}{brnb}(object, model = c("mean", "full", "dispersion"), ...)
 8 | }
 9 | \arguments{
10 | \item{object}{an object for which the extraction of model coefficients is
11 |     meaningful.}
12 | 
13 | \item{model}{one of \code{"mean"} (default), \code{"full"}, \code{"dispersion"},
14 | to return the estimates of the parameters in the linear
15 | prediction only, or both, the estimate of the dispersion
16 | parameter only, respectively.}
17 | 
18 | \item{...}{other arguments.}
19 | }
20 | \description{
21 | Extract model coefficients from \code{\link[=brnb]{"brnb"}} objects
22 | }
23 | \details{
24 | See \code{\link[=coef]{coef()}} for more details.
25 | }
26 | 


--------------------------------------------------------------------------------
/man/confint.brglmFit.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/brglmFit.R
 3 | \name{confint.brglmFit}
 4 | \alias{confint.brglmFit}
 5 | \title{Method for computing confidence intervals for one or more
 6 | regression parameters in a \code{\link[=brglmFit]{"brglmFit"}} object}
 7 | \usage{
 8 | \method{confint}{brglmFit}(object, parm, level = 0.95, ...)
 9 | }
10 | \arguments{
11 | \item{object}{a fitted model object.}
12 | 
13 | \item{parm}{a specification of which parameters are to be given
14 |     confidence intervals, either a vector of numbers or a vector of
15 |     names.  If missing, all parameters are considered.}
16 | 
17 | \item{level}{the confidence level required.}
18 | 
19 | \item{...}{additional argument(s) for methods.}
20 | }
21 | \description{
22 | Method for computing confidence intervals for one or more
23 | regression parameters in a \code{\link[=brglmFit]{"brglmFit"}} object
24 | }
25 | 


--------------------------------------------------------------------------------
/man/confint.brmultinom.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/brmultinom.R
 3 | \name{confint.brmultinom}
 4 | \alias{confint.brmultinom}
 5 | \title{Method for computing confidence intervals for one or more
 6 | regression parameters in a \code{\link[=brmultinom]{"brmultinom"}} object}
 7 | \usage{
 8 | \method{confint}{brmultinom}(object, parm, level = 0.95, ...)
 9 | }
10 | \arguments{
11 | \item{object}{a fitted model object.}
12 | 
13 | \item{parm}{a specification of which parameters are to be given
14 |     confidence intervals, either a vector of numbers or a vector of
15 |     names.  If missing, all parameters are considered.}
16 | 
17 | \item{level}{the confidence level required.}
18 | 
19 | \item{...}{additional argument(s) for methods.}
20 | }
21 | \description{
22 | Method for computing confidence intervals for one or more
23 | regression parameters in a \code{\link[=brmultinom]{"brmultinom"}} object
24 | }
25 | 


--------------------------------------------------------------------------------
/man/confint.brnb.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/brnb.R
 3 | \name{confint.brnb}
 4 | \alias{confint.brnb}
 5 | \title{Method for computing Wald confidence intervals for one or more
 6 | regression parameters in a \code{\link[=brnb]{"brnb"}} object}
 7 | \usage{
 8 | \method{confint}{brnb}(object, parm, level = 0.95, ...)
 9 | }
10 | \arguments{
11 | \item{object}{a fitted model object.}
12 | 
13 | \item{parm}{a specification of which parameters are to be given
14 |     confidence intervals, either a vector of numbers or a vector of
15 |     names.  If missing, all parameters are considered.}
16 | 
17 | \item{level}{the confidence level required.}
18 | 
19 | \item{...}{additional argument(s) for methods.}
20 | }
21 | \description{
22 | Method for computing Wald confidence intervals for one or more
23 | regression parameters in a \code{\link[=brnb]{"brnb"}} object
24 | }
25 | 


--------------------------------------------------------------------------------
/man/endometrial.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{endometrial}
 5 | \alias{endometrial}
 6 | \title{Histology grade and risk factors for 79 cases of endometrial cancer}
 7 | \format{
 8 | A data frame with 79 rows and 4 variables:
 9 | \itemize{
10 | \item \code{NV}: neovasculization with coding 0 for absent and 1 for present
11 | \item \code{PI}: pulsality index of arteria uterina
12 | \item \code{EH}: endometrium height
13 | \item \code{HG} histology grade with coding 0 for low grade and 1 for high grade
14 | }
15 | }
16 | \source{
17 | The packaged data set was downloaded in \code{.dat} format from
18 | \url{https://users.stat.ufl.edu/~aa/glm/data/}. The latter link
19 | provides the data sets used in Agresti (2015).
20 | 
21 | \if{html}{\out{<div class="sourceCode">}}\preformatted{The endometrial data set was first analyzed in Heinze and
22 | Schemper (2002), and was originally provided by Dr
23 | E. Asseryanis from the Medical University of Vienna.
24 | }\if{html}{\out{</div>}}
25 | }
26 | \usage{
27 | endometrial
28 | }
29 | \description{
30 | Histology grade and risk factors for 79 cases of endometrial cancer
31 | }
32 | \references{
33 | Agresti A (2015). \emph{Foundations of Linear and Generalized Linear
34 | Models}.  Wiley Series in Probability and Statistics. Wiley.
35 | 
36 | Heinze G, Schemper M (2002). A Solution to the Problem of
37 | Separation in Logistic Regression. \emph{Statistics in Medicine},
38 | \strong{21}, 2409–2419. \doi{10.1002/sim.1047}.
39 | 
40 | Kosmidis I, Firth D (2021). Jeffreys-prior penalty, finiteness
41 | and shrinkage in binomial-response generalized linear
42 | models. \emph{Biometrika}, \strong{108}, 71-82. \doi{10.1093/biomet/asaa052}.
43 | }
44 | \seealso{
45 | \code{\link[=brglm_fit]{brglm_fit()}}
46 | }
47 | \keyword{datasets}
48 | 


--------------------------------------------------------------------------------
/man/enzymes.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{enzymes}
 5 | \alias{enzymes}
 6 | \title{Liver Enzyme Data}
 7 | \format{
 8 | A data frame with 218 rows and the following 6 columns:
 9 | \itemize{
10 | \item \code{Patient}: Patient ID
11 | \item \code{Group}: Four diagnostic groups were considered: acute viral
12 | hepatitis (1), persistent chronic hepatitis (2), aggressive chronic
13 | hepatitis (3) and post-necrotic cirrhosis (4).
14 | \item \code{AST}: Aspartate aminotransferase (in U/L)
15 | \item \code{ALT}: Alanine aminotransferase (in U/L)
16 | \item \code{GLDH}: Glutamate dehydrogenase (in U/L)
17 | \item \code{OCT}: Ornithine carbamyltransferase (in U/L)
18 | }
19 | }
20 | \source{
21 | Data from Albert and Harris (1984, Chapter 5, Appendix I), and is
22 | also provided by the
23 | \href{https://cran.r-project.org/package=pmlr}{\pkg{pmlr}} R package.
24 | }
25 | \usage{
26 | enzymes
27 | }
28 | \description{
29 | Liver enzyme data collected from 218 patients with liver disease
30 | (Plomteux, 1980). The laboratory profile consists of enzymatic
31 | activity measured for four liver enzymes: aspartate
32 | aminotransferase (\code{AST}), alanine aminotransferase (\code{ALT}),
33 | glutamate dehydrogenase (\code{GLDH}) and ornithine carbamyltransferase
34 | (\code{OCT}).
35 | }
36 | \references{
37 | Albert A, Harris E K (1984). \emph{Multivariate Interpretation of
38 | Clinical Laboratory Data}. Dekker: New York.
39 | 
40 | Plomteux G (1980). Multivariate analysis of an enzyme profile for
41 | the differential diagnosis of viral hepatitis. \emph{Clinical
42 | Chemistry}, \strong{26}, 1897-1899.
43 | }
44 | \keyword{datasets}
45 | 


--------------------------------------------------------------------------------
/man/expo.brglmFit.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/expo.R
  3 | \name{expo.brglmFit}
  4 | \alias{expo.brglmFit}
  5 | \alias{brglmFit_expo}
  6 | \alias{expo}
  7 | \alias{expo.glm}
  8 | \title{Estimate the exponential of parameters of generalized linear models
  9 | using various methods}
 10 | \usage{
 11 | \method{expo}{brglmFit}(
 12 |   object,
 13 |   type = c("correction*", "correction+", "Lylesetal2012", "AS_median", "ML"),
 14 |   level = 0.95
 15 | )
 16 | 
 17 | \method{expo}{glm}(
 18 |   object,
 19 |   type = c("correction*", "correction+", "Lylesetal2012", "AS_median", "ML"),
 20 |   level = 0.95
 21 | )
 22 | }
 23 | \arguments{
 24 | \item{object}{an object of class \code{\link[=brglmFit]{"brglmFit"}} or
 25 | \code{\link[=glm]{"glm"}}.}
 26 | 
 27 | \item{type}{the type of correction to be used. The available
 28 | options are \code{"correction*"} (explicit mean bias correction with
 29 | a multiplicative adjustment), \code{"correction*"} (explicit mean
 30 | bias correction with an additive adjustment), \code{"Lylesetal2012"}
 31 | (explicit median bias correction using the multiplicative
 32 | adjustment in Lyles et al., 2012), \code{"AS_median"} (median bias
 33 | reduction), and \code{"ML"} (maximum likelihood). See Details.}
 34 | 
 35 | \item{level}{the confidence level required. Default is \code{0.95}.}
 36 | }
 37 | \value{
 38 | a list inheriting from class \code{\link[=brglmFit_expo]{"brglmFit_expo"}} with
 39 | components \code{coef} (the estimates of the exponentiated
 40 | regression parameters), \code{se} (the corresponding estimated
 41 | standard errors for the exponentiated parameters), \code{ci}
 42 | (confidence intervals of level \code{level} for the exponentiated
 43 | parameters), and \code{type} for the \code{type} of correction that has
 44 | been requested.
 45 | }
 46 | \description{
 47 | The \code{\link[=expo]{expo()}} method uses the supplied \code{\link[=brglmFit]{"brglmFit"}} or
 48 | \code{\link[=glm]{"glm"}} object to estimate the exponential of parameters of
 49 | generalized linear models with maximum likelihood or various mean
 50 | and median bias reduction methods. \code{\link[=expo]{expo()}} is useful for computing
 51 | (corrected) estimates of the multiplicative impact of a unit
 52 | increase on a covariate on the mean of a Poisson log-linear model
 53 | (\code{family = poisson("log")} in \code{\link[=glm]{glm()}}) while adjusting for other
 54 | covariates, the odds ratio associated with a unit increase on a
 55 | covariate in a logistic regression model (\code{family = binomial("logit")} in \code{\link[=glm]{glm()}}) while adjusting for other
 56 | covariates, the relative risk associated with a unit increase on a
 57 | covariate in a relative risk regression model (\code{family = binomial("log")} in \code{\link[=glm]{glm()}}) while adjusting for other covariates,
 58 | among others.
 59 | }
 60 | \details{
 61 | The supported methods through the \code{type} argument are:
 62 | \itemize{
 63 | \item \code{"ML"}: the estimates of the exponentiated parameters are
 64 | \eqn{\exp(\hat\theta_j)}, where \eqn{\theta_j} is the maximum
 65 | likelihood estimates for the \eqn{j}th regression parameter.
 66 | \item \code{"correction*"}: the estimates of the exponentiated parameters
 67 | are \eqn{\exp(\hat\theta_j) / (1 + \hat{v}_j / 2)}, where
 68 | \eqn{\hat\theta_j} is the estimate of the \eqn{j}th regression
 69 | parameter using \code{type = "AS_mixed"} in \code{\link[=brglmFit]{brglmFit()}}.
 70 | \item \code{"correction+"}: the estimates of the exponentiated parameters
 71 | are \eqn{\exp(\hat\theta_j) (1 - \hat{v}_j / 2)}, where
 72 | \eqn{\hat\theta_j} is the estimate of the \eqn{j}th regression
 73 | parameter using \code{type = "AS_mixed"} in \code{\link[=brglmFit]{brglmFit()}}.
 74 | \item \code{"Lylesetal2012"}: the estimates of the exponentiated parameters
 75 | are \eqn{\exp(\hat\theta_j) exp(- \hat{v}_j / 2)}, where
 76 | \eqn{\hat\theta_j} is the estimate of the \eqn{j}th regression
 77 | parameter using \code{type = "AS_mixed"} in \code{\link[=brglmFit]{brglmFit()}}. This estimator
 78 | has been proposed in Lyles et al. (2012).
 79 | \item \code{"AS_median"}: the estimates of the exponentiated parameters are
 80 | \eqn{\exp(\hat\theta_j)}, where \eqn{\hat\theta_j} is the estimate
 81 | of the \eqn{j}th regression parameter using \code{type = "AS_median"} in
 82 | \code{\link[=brglmFit]{brglmFit()}}.
 83 | }
 84 | 
 85 | \code{"correction*"} and \code{"correction+"} are based on multiplicative and
 86 | additive adjustments, respectively, of the exponential of a
 87 | reduced-bias estimator (like the ones coming from \code{\link[=brglmFit]{brglmFit()}} with
 88 | \code{type = "AS_mixed"}, \code{type = "AS_mean"}, and \code{type = "correction"}). The form of those adjustments results from the
 89 | expression of the first-term in the mean bias expansion of the
 90 | exponential of a reduced-bias estimator. See, for example, Di
 91 | Caterina & Kosmidis (2019, expression 12) for the general form of
 92 | the first-term of the mean bias of a smooth transformation of a
 93 | reduced-bias estimator.
 94 | 
 95 | The estimators from \code{"correction+"}, \code{"correction*"},
 96 | \code{"Lylesetal2012"} have asymptotic mean bias of order smaller than
 97 | than of the maximum likelihood estimator. The estimators from
 98 | \code{"AS_median"} are asymptotically closed to being median unbiased
 99 | than the maximum likelihood estimator is.
100 | 
101 | Estimated standard errors are computed using the delta method,
102 | where both the Jacobin and the information matrix are evaluated at
103 | the logarithm of the estimates of the exponentiated parameters.
104 | 
105 | Confidence intervals results by taking the exponential of the
106 | limits of standard Wald-type intervals computed at the logarithm of
107 | the estimates of the exponentiated parameters.
108 | }
109 | \examples{
110 | 
111 | ## The lizards example from ?brglm::brglm
112 | lizardsML <- glm(cbind(grahami, opalinus) ~ height + diameter +
113 |                  light + time, family = binomial(logit), data = lizards,
114 |                  method = "glm.fit")
115 | # Get estimates, standard errors, and confidence intervals of odds
116 | # ratios with various methods
117 | expo(lizardsML, type = "ML")
118 | expo(lizardsML, type = "correction*")
119 | expo(lizardsML, type = "Lylesetal2012")
120 | expo(lizardsML, type = "correction+")
121 | expo(lizardsML, type = "AS_median")
122 | 
123 | ## Example from ?glm
124 | ## Dobson (1990) Page 93: Randomized Controlled Trial :
125 | counts <- c(18,17,15,20,10,20,25,13,12)
126 | outcome <- gl(3,1,9)
127 | treatment <- gl(3,3)
128 | glm.D93 <- glm(counts ~ outcome + treatment, family = poisson())
129 | expo(glm.D93, type = "correction*")
130 | 
131 | }
132 | \references{
133 | Di Caterina C, Kosmidis I (2019). Location-Adjusted Wald Statistics for Scalar
134 | Parameters. \emph{Computational Statistics & Data Analysis}, \strong{138},
135 | 126-142. \doi{10.1016/j.csda.2019.04.004}.
136 | 
137 | Kosmidis I, Kenne Pagui E C, Sartori N (2020). Mean and median bias
138 | reduction in generalized linear models. \emph{Statistics and Computing},
139 | \strong{30}, 43-59. \doi{10.1007/s11222-019-09860-6}.
140 | 
141 | Cordeiro G M, McCullagh P (1991). Bias correction in generalized
142 | linear models. \emph{Journal of the Royal Statistical Society. Series B
143 | (Methodological)}, \strong{53}, 629-643. \doi{10.1111/j.2517-6161.1991.tb01852.x}.
144 | 
145 | Lyles R H, Guo Y, Greenland S (2012). Reducing bias and mean
146 | squared error associated with regression-based odds ratio
147 | estimators. \emph{Journal of Statistical Planning and Inference},
148 | \strong{142} 3235–3241. \doi{10.1016/j.jspi.2012.05.005}.
149 | }
150 | \seealso{
151 | \code{\link[=brglm_fit]{brglm_fit()}} and and \code{\link[=brglm_control]{brglm_control()}}
152 | }
153 | \author{
154 | Ioannis Kosmidis \verb{[aut, cre]} \email{ioannis.kosmidis@warwick.ac.uk}
155 | }
156 | 


--------------------------------------------------------------------------------
/man/hepatitis.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{hepatitis}
 5 | \alias{hepatitis}
 6 | \title{Post-transfusion hepatitis: impact of non-A, non-B hepatitis
 7 | surrogate tests}
 8 | \format{
 9 | A data frame with 28 rows and the following 6 columns:
10 | \itemize{
11 | \item \code{city}: Subjects were recruited from 3 Canadian Red Cross Society
12 | Blood Centres and 13 university-affiliated hospitals in 3 cities:
13 | Toronto, Hamilton and Winnipeg.
14 | \item \code{group}: Eligible subjects were assigned to one of two allogenic
15 | blood recipient groups.  One group received products that had only
16 | routine Canadian transfusion-transmissible disease marker screening
17 | (no-withhold).  The other group received only products that were
18 | not positive for NANB surrogate markers (withhold).
19 | \item \code{time}: Hepatitis C (HCV) screening was introduced in Canada in
20 | May, 1990.  Subjects were recruited into the study before (pre) and
21 | after (post) the introduction of anti-HCV testing.
22 | \item \code{HCV}: Post-transfusion HCV hepatitis present (1) or absent (0).
23 | \item \code{nonABC}: Post-transfusion non-A, non-B, non-C hepatitis present (1) or absent (0)
24 | \item \code{counts}: Number of subjects
25 | }
26 | }
27 | \source{
28 | Data is from Blajchman et al. (1995), also analyzed in Bull et
29 | al. (2002), and is also provided by the
30 | \href{https://cran.r-project.org/package=pmlr}{\pkg{pmlr}} R package.
31 | }
32 | \usage{
33 | hepatitis
34 | }
35 | \description{
36 | Data from a randomized double-blind trial to assess whether
37 | withholding donor blood positive for the non-A, non-B (\code{"NANB"})
38 | surrogate markers would reduce the frequency of post-transfusion
39 | hepatitis.  The dataset contains \code{4588} subjects enrolled from 1988
40 | to 1992 into two study groups that received allogenic blood from
41 | which units positive for NANB surrogate markers were withheld (n =
42 | \code{2311}) or not withheld (n = \code{2277}).  Subjects were followed up
43 | for 6 months and assessed for the presence of post-transfusion
44 | hepatitis.
45 | }
46 | \references{
47 | Bull S B, Mak C, Greenwood C M T (2002). A modified score function
48 | estimator for multinomial logistic regression in small
49 | samples. \emph{Computational Statistics & Data Analysis}, \strong{39},
50 | 57-74. \doi{10.1016/S0167-9473(01)00048-2}
51 | 
52 | Blajchman M A, Bull S B and Feinman S V (1995). Post-transfusion
53 | hepatitis: impact of non-A, non-B hepatitis surrogate tests. \emph{The
54 | Lancet}, \strong{345}, 21--25. \doi{10.1016/S0140-6736(95)91153-7}
55 | }
56 | \keyword{datasets}
57 | 


--------------------------------------------------------------------------------
/man/lizards.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{lizards}
 5 | \alias{lizards}
 6 | \title{Habitat preferences of lizards}
 7 | \format{
 8 | A data frame with 23 rows and 6 columns:
 9 | \itemize{
10 | \item \code{grahami}. count of grahami lizards
11 | \item \code{opalinus}. count of opalinus lizards
12 | \item \code{height}. a factor with levels \verb{<5ft}, \verb{>=5ft}
13 | \item \code{diameter}. a factor with levels \verb{<=2in}, \verb{>2in}
14 | \item \code{light}. a factor with levels \code{sunny}, \code{shady}
15 | \item \code{time}. a factor with levels \code{early}, \code{midday}, \code{late}
16 | }
17 | 
18 | The variables \code{grahami} and \code{opalinus} are counts of two lizard
19 | species at two different perch heights, two different perch
20 | diameters, in sun and in shade, at three times of day.
21 | }
22 | \source{
23 | McCullagh P, Nelder J A (1989) \emph{Generalized Linear
24 | Models} (2nd Edition).  London: Chapman and Hall.
25 | 
26 | Originally from
27 | 
28 | \if{html}{\out{<div class="sourceCode">}}\preformatted{Schoener T W (1970) Nonsynchronous spatial overlap of lizards
29 | in patchy habitats.  _Ecology_ *51*, 408-418.
30 | }\if{html}{\out{</div>}}
31 | }
32 | \usage{
33 | lizards
34 | }
35 | \description{
36 | Habitat preferences of lizards
37 | }
38 | \seealso{
39 | \code{\link[=brglm_fit]{brglm_fit()}}
40 | }
41 | \keyword{datasets}
42 | 


--------------------------------------------------------------------------------
/man/mis.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/mis-link.R
 3 | \name{mis}
 4 | \alias{mis}
 5 | \title{A \code{\link[=make.link]{"link-glm"}} object for misclassified responses in binomial regression models}
 6 | \usage{
 7 | mis(link = "logit", sensitivity = 1, specificity = 1)
 8 | }
 9 | \arguments{
10 | \item{link}{the baseline link to be used.}
11 | 
12 | \item{sensitivity}{the probability of observing a success given that a success actually took place given any covariate values.}
13 | 
14 | \item{specificity}{the probability of observing a failure given that a failure actually took place given any covariate values.}
15 | }
16 | \description{
17 | \code{\link[=mis]{mis()}} is a \code{\link[=make.link]{"link-glm"}} object that specifies the link function in Neuhaus (1999, expression (8)) for handling misclassified responses in binomial regression models using maximum likelihood. A prior specification of the sensitivity and specificity is required.
18 | }
19 | \details{
20 | \code{sensitivity + specificity} should be greater or equal to 1,
21 | otherwise it is implied that the procedure producing the responses
22 | performs worse than chance in terms of misclassification.
23 | }
24 | \examples{
25 | 
26 | ## Define a few links with some misclassification
27 | logit_mis <- mis(link = "logit", sensitivity = 0.9, specificity = 0.9)
28 | 
29 | lizards_f <- cbind(grahami, opalinus) ~ height + diameter + light + time
30 | 
31 | lizardsML <- glm(lizards_f, family = binomial(logit), data = lizards)
32 | 
33 | lizardsML_mis <- update(lizardsML, family = binomial(logit_mis),
34 |                         start = coef(lizardsML))
35 | 
36 | ## A notable change is coefficients is noted here compared to when
37 | ## specificity and sensitity are 1
38 | coef(lizardsML)
39 | coef(lizardsML_mis)
40 | 
41 | ## Bias reduction is also possible
42 | update(lizardsML_mis, method = "brglmFit", type = "AS_mean",
43 |        start = coef(lizardsML))
44 | 
45 | update(lizardsML_mis, method = "brglmFit", type = "AS_median",
46 |        start = coef(lizardsML))
47 | 
48 | }
49 | \references{
50 | Neuhaus J M (1999). Bias and efficiency loss due to misclassified
51 | responses in binary regression. Biometrika, \strong{86}, 843-855.
52 | \url{https://www.jstor.org/stable/2673589}.
53 | }
54 | \seealso{
55 | \code{\link[=glm]{glm()}}, \code{\link[=brglm_fit]{brglm_fit()}}
56 | }
57 | 


--------------------------------------------------------------------------------
/man/ordinal_superiority.bracl.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ordinal_superiority.R
 3 | \name{ordinal_superiority.bracl}
 4 | \alias{ordinal_superiority.bracl}
 5 | \alias{ordinal_superiority}
 6 | \title{Ordinal superiority scores of Agresti and Kateri (2017)}
 7 | \usage{
 8 | \method{ordinal_superiority}{bracl}(
 9 |   object,
10 |   formula,
11 |   data,
12 |   measure = c("gamma", "Delta"),
13 |   level = 0.95,
14 |   bc = FALSE
15 | )
16 | }
17 | \arguments{
18 | \item{object}{a fitted object from an ordinal regression
19 | model. Currently only models from class \code{\link[=bracl]{"bracl"}} are supported.}
20 | 
21 | \item{formula}{a RHS formula indicating the group variable to use.}
22 | 
23 | \item{data}{an optional data frame in which to look for variables
24 | with which to compute ordinal superiority measures.  If
25 | omitted, an attempt is made to use the data that produced
26 | \code{object}.}
27 | 
28 | \item{measure}{either \code{"gamma"} (default) or \code{"Delta"}, specifying
29 | the ordinal superiority measure to be returned.}
30 | 
31 | \item{level}{the confidence level required when computing
32 | confidence intervals for the ordinal superiority measures.}
33 | 
34 | \item{bc}{logical. If \code{FALSE} (default) then the ordinal
35 | superiority measures are computed using the estimates in
36 | \code{object}. If \code{TRUE} then the ordinal superiority measure
37 | estimates are corrected for mean bias.}
38 | }
39 | \description{
40 | \code{\link[=ordinal_superiority]{ordinal_superiority()}} is a method for the estimation and
41 | inference about model-based ordinal superiority scores introduced
42 | in Agresti and Kateri (2017, Section 5) from fitted objects. The
43 | mean bias of the estimates of the ordinal superiority scores can be
44 | corrected.
45 | }
46 | \examples{
47 | 
48 | data("stemcell", package = "brglm2")
49 | 
50 | # Adjacent category logit (proportional odds)
51 | stem <- within(stemcell, {nreligion = as.numeric(religion)})
52 | fit_bracl_p <- bracl(research ~ nreligion + gender, weights = frequency,
53 |                      data = stem, type = "ML", parallel = TRUE)
54 | 
55 | # Estimates and 95\% confidence intervals for the probabilities that the response
56 | # category for gender "female" is higher than the response category for gender "male",
57 | # while adjusting for religion.
58 | ordinal_superiority(fit_bracl_p, ~ gender)
59 | 
60 | \dontrun{
61 | # And their (very-similar in value here) bias corrected versions
62 | # with 99\% CIs
63 | ordinal_superiority(fit_bracl_p, ~ gender, bc = TRUE, level = 0.99)
64 | # Note that the object is refitted with type = "AS_mean"
65 | 
66 | }
67 | 
68 | }
69 | \references{
70 | Agresti, A., Kateri, M. (2017). Ordinal probability effect measures
71 | for group comparisons in multinomial cumulative link models.
72 | \emph{Biometrics}, \strong{73} 214-219. \doi{10.1111/biom.12565}.
73 | }
74 | 


--------------------------------------------------------------------------------
/man/predict.bracl.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bracl.R
 3 | \name{predict.bracl}
 4 | \alias{predict.bracl}
 5 | \title{Predict method for \link{bracl} fits}
 6 | \usage{
 7 | \method{predict}{bracl}(object, newdata, type = c("class", "probs"), ...)
 8 | }
 9 | \arguments{
10 | \item{object}{a fitted object of class inheriting from \code{\link[=bracl]{"bracl"}}.}
11 | 
12 | \item{newdata}{optionally, a data frame in which to look for
13 | variables with which to predict.  If omitted, the fitted linear
14 | predictors are used.}
15 | 
16 | \item{type}{the type of prediction required. The default is
17 | \code{"class"}, which produces predictions of the response category
18 | at the covariate values supplied in \code{"newdata"}, selecting the
19 | category with the largest probability; the alternative
20 | \code{"probs"} returns all category probabilities at the covariate
21 | values supplied in \code{newdata}.}
22 | 
23 | \item{...}{further arguments passed to or from other methods.}
24 | }
25 | \value{
26 | If \code{type = "class"} a vector with the predicted response
27 | categories; if \code{type = "probs"} a matrix of probabilities for all
28 | response categories at \code{newdata}.
29 | }
30 | \description{
31 | Obtain class and probability predictions from a fitted adjacent
32 | category logits model.
33 | }
34 | \details{
35 | If \code{newdata} is omitted the predictions are based on the data
36 | used for the fit.
37 | }
38 | \examples{
39 | 
40 | data("stemcell", package = "brglm2")
41 | 
42 | # Adjacent category logit (non-proportional odds)
43 | fit_bracl <- bracl(research ~ as.numeric(religion) + gender, weights = frequency,
44 |                    data = stemcell, type = "ML")
45 | # Adjacent category logit (proportional odds)
46 | fit_bracl_p <- bracl(research ~ as.numeric(religion) + gender, weights = frequency,
47 |                     data = stemcell, type = "ML", parallel = TRUE)
48 | 
49 | # New data
50 | newdata <- expand.grid(gender = c("male", "female"),
51 |                        religion = c("liberal", "moderate", "fundamentalist"))
52 | 
53 | # Predictions
54 | sapply(c("class", "probs"), function(what) predict(fit_bracl, newdata, what))
55 | sapply(c("class", "probs"), function(what) predict(fit_bracl_p, newdata, what))
56 | 
57 | }
58 | 


--------------------------------------------------------------------------------
/man/predict.brmultinom.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/brmultinom.R
 3 | \name{predict.brmultinom}
 4 | \alias{predict.brmultinom}
 5 | \title{Predict method for \link{brmultinom} fits}
 6 | \usage{
 7 | \method{predict}{brmultinom}(object, newdata, type = c("class", "probs"), ...)
 8 | }
 9 | \arguments{
10 | \item{object}{a fitted object of class inheriting from
11 | \code{\link[=brmultinom]{"brmultinom"}}.}
12 | 
13 | \item{newdata}{optionally, a data frame in which to look for
14 | variables with which to predict.  If omitted, the fitted linear
15 | predictors are used.}
16 | 
17 | \item{type}{the type of prediction required. The default is
18 | \code{"class"}, which produces predictions of the response category
19 | at the covariate values supplied in \code{"newdata"}, selecting the
20 | category with the largest probability; the alternative
21 | \code{"probs"} returns all category probabilities at the covariate
22 | values supplied in \code{newdata}.}
23 | 
24 | \item{...}{further arguments passed to or from other methods.}
25 | }
26 | \value{
27 | If \code{type = "class"} a vector with the predicted response
28 | categories; if \code{type = "probs"} a matrix of probabilities for all
29 | response categories at \code{newdata}.
30 | }
31 | \description{
32 | Obtain class and probability predictions from a fitted baseline
33 | category logits model.
34 | }
35 | \details{
36 | If \code{newdata} is omitted the predictions are based on the data used
37 | for the fit.
38 | }
39 | \examples{
40 | 
41 | data("housing", package = "MASS")
42 | 
43 | # Maximum likelihood using brmultinom with baseline category 'Low'
44 | houseML1 <- brmultinom(Sat ~ Infl + Type + Cont, weights = Freq,
45 |                        data = housing, type = "ML", ref = 1)
46 | 
47 | # New data
48 | newdata <- expand.grid(Infl = c("Low", "Medium"),
49 |                        Type = c("Tower", "Atrium", "Terrace"),
50 |                        Cont = c("Low", NA, "High"))
51 | 
52 | ## Predictions
53 | sapply(c("class", "probs"), function(what) predict(houseML1, newdata, what))
54 | 
55 | }
56 | 


--------------------------------------------------------------------------------
/man/residuals.brmultinom.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/brmultinom.R, R/zzz_conventions.R
 3 | \name{residuals.brmultinom}
 4 | \alias{residuals.brmultinom}
 5 | \alias{residuals.bracl}
 6 | \title{Residuals for multinomial logistic regression and adjacent category logit models}
 7 | \usage{
 8 | \method{residuals}{brmultinom}(object, type = c("pearson", "response", "deviance", "working"), ...)
 9 | 
10 | \method{residuals}{bracl}(object, type = c("pearson", "response", "deviance", "working"), ...)
11 | }
12 | \arguments{
13 | \item{object}{the object coming out of \code{\link[=bracl]{bracl()}} and
14 | \code{\link[=brmultinom]{brmultinom()}}.}
15 | 
16 | \item{type}{the type of residuals which should be returned.  The
17 | options are: \code{"pearson"} (default), \code{"response"}, \code{"deviance"},
18 | \code{"working"}. See Details.}
19 | 
20 | \item{...}{Currently not used.}
21 | }
22 | \description{
23 | Residuals for multinomial logistic regression and adjacent category logit models
24 | }
25 | \details{
26 | The residuals computed are the residuals from the equivalent
27 | Poisson log-linear model fit, organized in a form that matches the
28 | output of \code{fitted(object, type = "probs")}. As a result, the output
29 | is residuals defined in terms of the object and expected
30 | multinomial counts.
31 | }
32 | \seealso{
33 | brmultinom bracl
34 | }
35 | 


--------------------------------------------------------------------------------
/man/simulate.brmultinom.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/brmultinom.R
 3 | \name{simulate.brmultinom}
 4 | \alias{simulate.brmultinom}
 5 | \title{Method for simulating a data set from \code{\link[=brmultinom]{"brmultinom"}} and \code{\link[=bracl]{"bracl"}}
 6 | objects}
 7 | \usage{
 8 | \method{simulate}{brmultinom}(object, ...)
 9 | }
10 | \arguments{
11 | \item{object}{an object of class \code{\link[=brmultinom]{"brmultinom"}} or \code{\link[=bracl]{"bracl"}}.}
12 | 
13 | \item{...}{currently not used.}
14 | }
15 | \value{
16 | A \code{\link[=data.frame]{"data.frame"}} with \code{object$ncat} times the rows that
17 | \code{model.frame(object)} have and the same variables. If \code{weights} has
18 | been specified in the call that generated \code{object}, then the
19 | simulate frequencies will populate the weights variable. Otherwise,
20 | the resulting \link{data.frame} will have a \code{".weights"} variable with
21 | the simulated multinomial counts.
22 | }
23 | \description{
24 | Method for simulating a data set from \code{\link[=brmultinom]{"brmultinom"}} and \code{\link[=bracl]{"bracl"}}
25 | objects
26 | }
27 | \examples{
28 | 
29 | ## Multinomial logistic regression
30 | data("housing", package = "MASS")
31 | houseML1 <- brmultinom(Sat ~ Infl + Type + Cont, weights = Freq,
32 |                        data = housing, type = "ML", ref = 1)
33 | simulate(houseML1)
34 | 
35 | ## Adjacent-category logits
36 | data("stemcell", package = "brglm2")
37 | stemML1 <- bracl(research ~ religion + gender, weights = frequency,
38 |                 data = stemcell, type = "ML")
39 | 
40 | simulate(stemML1)
41 | 
42 | }
43 | 


--------------------------------------------------------------------------------
/man/simulate.brnb.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/brnb.R
 3 | \name{simulate.brnb}
 4 | \alias{simulate.brnb}
 5 | \title{Simulate Responses}
 6 | \usage{
 7 | \method{simulate}{brnb}(object, nsim = 1, seed = NULL, ...)
 8 | }
 9 | \arguments{
10 | \item{object}{an object representing a fitted model.}
11 | 
12 | \item{nsim}{number of response vectors to simulate. Defaults to 1.}
13 | 
14 | \item{seed}{an object specifying if and how the random number
15 | generator should be initialized; see \code{\link[=set.seed]{set.seed()}} for details.}
16 | 
17 | \item{...}{extra arguments to be passed to methods. Not currently
18 | used.}
19 | }
20 | \description{
21 | Simulate one or more responses from the distribution corresponding
22 | to a fitted model \code{\link[=brnb]{"brnb"}} object.
23 | }
24 | \examples{
25 | # Example in Saha, K., & Paul, S. (2005). Bias-corrected maximum
26 | # likelihood estimator of the negative binomial dispersion
27 | # parameter.  Biometrics, 61, 179--185.
28 | #
29 | # Frequency distribution of red mites on apple leaves.
30 | nomites <- 0:8
31 | noleaves <- c(70, 38, 17, 10, 9, 3, 2, 1, 0)
32 | fit_glmnb <- MASS::glm.nb(nomites~1,link="identity",weights = noleaves)
33 | fit_brnb <- brnb(nomites ~ 1, link = "identity", transformation = "inverse",
34 |                  type = "ML",weights = noleaves)
35 | ## Let us simulate 10 response vectors
36 | sim_glmnb <- simulate(fit_glmnb, nsim = 10, seed = 123)
37 | sim_brnb <-  simulate(fit_brnb, nsim = 10, seed = 123)
38 | # The results from glm.nb and brnb with type = "ML" are
39 | # exactly the same
40 | all.equal(sim_glmnb, sim_brnb, check.attributes = FALSE)
41 | }
42 | 


--------------------------------------------------------------------------------
/man/stemcell.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{stemcell}
 5 | \alias{stemcell}
 6 | \title{Opinion on Stem Cell Research and Religious Fundamentalism}
 7 | \format{
 8 | A data frame with 24 rows and 4 variables:
 9 | \itemize{
10 | \item \code{research}: opinion about funding stem cell research with levels \code{definitely}, \code{probably}, \verb{probably not}, \verb{definitely not}
11 | \item \code{gender}: the gender of the respondent with levels \code{female} and \code{male}
12 | \item \code{religion}: the fundamentalism/liberalism of one’s religious beliefs with levels \code{fundamentalist}, \code{moderate},
13 | \code{liberal}
14 | \code{frequency}: the number of times a respondent fell in each of the combinations of levels for \code{research}, \code{religion} and \code{gender}
15 | }
16 | }
17 | \source{
18 | The \code{stemcell} data set is analyzed in Agresti (2010, Subsection 4.1.5).
19 | }
20 | \usage{
21 | stemcell
22 | }
23 | \description{
24 | A data set from the 2006 General Social Survey that shows the
25 | relationship in the United States between opinion about funding
26 | stem cell research and the fundamentalism/liberalism of one’s
27 | religious beliefs, stratified by gender.
28 | }
29 | \references{
30 | Agresti A (2010). \emph{Analysis of Ordinal Categorical Data} (2nd edition).  Wiley Series in
31 | Probability and Statistics. Wiley.
32 | }
33 | \seealso{
34 | \code{\link[=bracl]{bracl()}}
35 | }
36 | \keyword{datasets}
37 | 


--------------------------------------------------------------------------------
/man/summary.brglmFit.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/brglmFit.R
 3 | \name{summary.brglmFit}
 4 | \alias{summary.brglmFit}
 5 | \alias{print.summary.brglmFit}
 6 | \title{\code{\link[=summary]{summary()}} method for \link{brglmFit} objects}
 7 | \usage{
 8 | \method{summary}{brglmFit}(
 9 |   object,
10 |   dispersion = NULL,
11 |   correlation = FALSE,
12 |   symbolic.cor = FALSE,
13 |   ...
14 | )
15 | 
16 | \method{print}{summary.brglmFit}(
17 |   x,
18 |   digits = max(3L, getOption("digits") - 3L),
19 |   symbolic.cor = x$symbolic.cor,
20 |   signif.stars = getOption("show.signif.stars"),
21 |   ...
22 | )
23 | }
24 | \arguments{
25 | \item{object}{an object of class \code{"glm"}, usually, a result of a
26 |     call to \code{\link[stats]{glm}}.}
27 | 
28 | \item{dispersion}{the dispersion parameter for the family used.
29 |     Either a single numerical value or \code{NULL} (the default), when
30 |     it is inferred from \code{object} (see \sQuote{Details}).}
31 | 
32 | \item{correlation}{logical; if \code{TRUE}, the correlation matrix of
33 |     the estimated parameters is returned and printed.}
34 | 
35 | \item{symbolic.cor}{logical. If \code{TRUE}, print the correlations in
36 |     a symbolic form (see \code{\link[stats]{symnum}}) rather than as numbers.}
37 | 
38 | \item{...}{further arguments passed to or from other methods.}
39 | 
40 | \item{x}{an object of class \code{"summary.glm"}, usually, a result of a
41 |     call to \code{summary.glm}.}
42 | 
43 | \item{digits}{the number of significant digits to use when printing.}
44 | 
45 | \item{signif.stars}{logical. If \code{TRUE}, \sQuote{significance stars}
46 |     are printed for each coefficient.}
47 | }
48 | \description{
49 | \code{\link[=summary]{summary()}} method for \link{brglmFit} objects
50 | }
51 | \details{
52 | The interface of the summary method for \code{\link[=brglmFit]{"brglmFit"}}
53 | objects is identical to that of \code{\link[=glm]{"glm"}} objects. The summary
54 | method for \code{\link[=brglmFit]{"brglmFit"}} objects computes the p-values of the
55 | individual Wald statistics based on the standard normal
56 | distribution, unless the family is Gaussian, in which case a t
57 | distribution with appropriate degrees of freedom is used.
58 | }
59 | \examples{
60 | ## For examples see `examples(brglmFit)`
61 | 
62 | }
63 | \seealso{
64 | \code{\link[=summary.glm]{summary.glm()}} and \code{\link[=glm]{glm()}}
65 | }
66 | 


--------------------------------------------------------------------------------
/man/summary.brnb.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/brnb.R
 3 | \name{summary.brnb}
 4 | \alias{summary.brnb}
 5 | \alias{print.summary.brnb}
 6 | \title{\code{\link[=summary]{summary()}} method for \code{\link[=brnb]{"brnb"}} objects}
 7 | \usage{
 8 | \method{summary}{brnb}(object, ...)
 9 | 
10 | \method{print}{summary.brnb}(x, digits = max(3, getOption("digits") - 3), ...)
11 | }
12 | \arguments{
13 | \item{object}{an object of class \code{\link[=brnb]{"brnb"}}, typically, a
14 | result of a call to \code{\link[=brnb]{brnb()}}.}
15 | 
16 | \item{...}{further arguments passed to or from other methods.}
17 | 
18 | \item{x}{an object of class \code{\link[=summary.brnb]{"summary.brnb"}},
19 | usually, a result of a call to \link{summary.brnb}.}
20 | 
21 | \item{digits}{the number of significant digits to use when printing.}
22 | }
23 | \description{
24 | \code{\link[=summary]{summary()}} method for \code{\link[=brnb]{"brnb"}} objects
25 | }
26 | \details{
27 | The interface of the summary method for \code{\link[=brnb]{"brnb"}}
28 | objects is similar to that of \code{\link[=brglmFit]{"brglmFit"}} objects
29 | with additional information.
30 | 
31 | \if{html}{\out{<div class="sourceCode">}}\preformatted{p-values of the individual Wald statistics are based on the
32 | standard normal distribution.
33 | }\if{html}{\out{</div>}}
34 | }
35 | \examples{
36 | # For examples see examples(brnb)
37 | 
38 | }
39 | \seealso{
40 | \code{\link[=summary.brglmFit]{summary.brglmFit()}} and \code{\link[=glm]{glm()}}
41 | }
42 | 


--------------------------------------------------------------------------------
/man/vcov.brglmFit.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/brglmFit.R
 3 | \name{vcov.brglmFit}
 4 | \alias{vcov.brglmFit}
 5 | \title{Return the variance-covariance matrix for the regression parameters
 6 | in a \code{\link[=brglmFit]{brglmFit()}} object}
 7 | \usage{
 8 | \method{vcov}{brglmFit}(object, model = c("mean", "full", "dispersion"), complete = TRUE, ...)
 9 | }
10 | \arguments{
11 | \item{object}{
12 |     a fitted model object, typically.  Sometimes also a
13 |     \code{\link{summary}()} object of such a fitted model.
14 |   }
15 | 
16 | \item{model}{character specifying for which component of the model coefficients should be extracted.}
17 | 
18 | \item{complete}{for the \code{aov}, \code{lm}, \code{glm}, \code{mlm}, and where
19 |     applicable \code{summary.lm} etc methods: logical indicating if the
20 |     full variance-covariance matrix should be returned also in case of
21 |     an over-determined system where some coefficients are undefined and
22 |     \code{\link[stats]{coef}(.)} contains \code{NA}s correspondingly.   When
23 |     \code{complete = TRUE},  \code{vcov()} is compatible with
24 |     \code{coef()} also in this singular case.}
25 | 
26 | \item{...}{
27 |     additional arguments for method functions.  For the
28 |     \code{\link[stats]{glm}} method this can be used to pass a
29 |     \code{dispersion} parameter.}
30 | }
31 | \description{
32 | Return the variance-covariance matrix for the regression parameters
33 | in a \code{\link[=brglmFit]{brglmFit()}} object
34 | }
35 | \details{
36 | The options for \code{model} are \code{"mean"} for mean regression parameters
37 | only (default), \code{"dispersion"} for the dispersion parameter (or the
38 | transformed dispersion; see \code{\link[=brglm_control]{brglm_control()}}), and \code{"full"} for
39 | both the mean regression and the (transformed) dispersion
40 | parameters.
41 | }
42 | 


--------------------------------------------------------------------------------
/man/vcov.brnb.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/brnb.R
 3 | \name{vcov.brnb}
 4 | \alias{vcov.brnb}
 5 | \title{Extract model variance-covariance matrix from \code{\link[=brnb]{"brnb"}} objects}
 6 | \usage{
 7 | \method{vcov}{brnb}(object, model = c("mean", "full", "dispersion"), complete = TRUE, ...)
 8 | }
 9 | \arguments{
10 | \item{object}{an object of class \code{\link[=brnb]{"brnb"}}, typically, a result of a call to \code{\link[=brnb]{brnb()}}.}
11 | 
12 | \item{model}{character specifying for which component of the model variance-covariance matrix should be extracted.}
13 | 
14 | \item{complete}{for the \code{aov}, \code{lm}, \code{glm}, \code{mlm}, and where
15 |     applicable \code{summary.lm} etc methods: logical indicating if the
16 |     full variance-covariance matrix should be returned also in case of
17 |     an over-determined system where some coefficients are undefined and
18 |     \code{\link[stats]{coef}(.)} contains \code{NA}s correspondingly.   When
19 |     \code{complete = TRUE},  \code{vcov()} is compatible with
20 |     \code{coef()} also in this singular case.}
21 | 
22 | \item{...}{
23 |     additional arguments for method functions.  For the
24 |     \code{\link[stats]{glm}} method this can be used to pass a
25 |     \code{dispersion} parameter.}
26 | }
27 | \description{
28 | Extract model variance-covariance matrix from \code{\link[=brnb]{"brnb"}} objects
29 | }
30 | \details{
31 | The options for \code{model} are \code{"mean"} for mean regression only
32 | (default), \code{"dispersion"} for the dispersion parameter (in a
33 | chosen transformation; see \code{\link[=brglmControl]{brglmControl()}}, and \code{"full"} for both
34 | the mean regression and the (transformed) dispersion parameters.
35 | See \code{\link[=vcov]{vcov()}} for more details.
36 | }
37 | \seealso{
38 | \code{\link[=vcov]{vcov()}}
39 | }
40 | 


--------------------------------------------------------------------------------
/src/expectations.c:
--------------------------------------------------------------------------------
 1 | #include <R.h>
 2 | #include <Rmath.h>
 3 | #include <Rinternals.h>
 4 | 
 5 | void expectedValues(double *mu,
 6 |                     double *k,
 7 |                     int *ymax,
 8 |                     int *n,
 9 |                     double *Es2,
10 |                     double *Es2y,
11 |                     double *Es1s2,
12 |                     double *Es3) {
13 |     
14 |     
15 |     int r, s;
16 |     double temp1, temp2, temp3, temp4, dens;
17 |     double tempa, tempb, tempc;
18 |     for(r = 0; r < *n; r++) {
19 |         temp1 = 0.0;
20 |         temp2 = 0.0;
21 |         temp3 = 0.0;
22 |         temp4 = 0.0;
23 |         tempa = 0.0;
24 |         tempb = 0.0;
25 |         tempc = 0.0;
26 |         for(s = 0; s <= *ymax; s++) {
27 |             if (s > 1 ) {
28 |                 tempa +=   (s-1) / (*k * (s-1) + 1);
29 |                 tempb +=   R_pow_di((s-1), 2) / R_pow_di((*k * (s-1) + 1), 2);
30 |                 tempc +=   R_pow_di((s-1), 3) / R_pow_di((*k * (s-1) + 1), 3);
31 |             }
32 |             dens =  dnbinom_mu(s, 1/k[0],  mu[r], 0);
33 |             temp1 += dens*tempa*tempb;
34 |             temp2 += dens*tempb;
35 |             temp3 += dens*tempc;
36 |             temp4 += dens*tempb*s;
37 |         }
38 |         Es2[r] = temp2;
39 |         Es2y[r] = temp4;
40 |         Es3[r] = temp3;
41 |         Es1s2[r] = temp1;
42 |     }
43 | }
44 | 
45 | 
46 | 


--------------------------------------------------------------------------------
/src/hats.c:
--------------------------------------------------------------------------------
 1 | void hatsc (int* n, int* p, double* x, double* invfisherinf, double* w, double* hat)
 2 | {
 3 |   int i, j, o, pos1, pos2, pos3;
 4 |   double summi ; /*summ[*n],*/
 5 |   for (o=0;o<*n;o++) {
 6 |     /*summ[o] = 0 ;*/
 7 |     summi = 0;
 8 |     pos1 = o * *p ;
 9 |     for (i=0;i<*p;i++) {
10 |       pos2 = i * *p;
11 |       pos3 = pos1 + i;
12 |       for (j=0;j<*p;j++) {
13 | 	/*summ[o] += x[o * *p + i]*x[o * *p + j]*invfisherinf[j + *p *i]; */
14 | 	/*summi += x[o * *p + i]*x[o * *p + j]*invfisherinf[j + *p *i];*/
15 | 	summi += x[pos3]*x[pos1 + j]*invfisherinf[j + pos2];
16 |       }
17 |     }
18 |     /*hat[o] = summ[o] * w[o];*/
19 |     hat[o] = summi * w[o];
20 |   }
21 | }
22 | 


--------------------------------------------------------------------------------
/src/init.c:
--------------------------------------------------------------------------------
 1 | #include <stdlib.h> // for NULL
 2 | #include <R_ext/Rdynload.h>
 3 | 
 4 | /* .C calls */
 5 | extern void expectedValues(void *, void *, void *, void *, void *, void *, void *, void *);
 6 | 
 7 | static const R_CMethodDef CEntries[] = {
 8 |     {"expectedValues", (DL_FUNC) &expectedValues, 8},
 9 |     {NULL, NULL, 0}
10 | };
11 | 
12 | void R_init_brglm2(DllInfo *dll)
13 | {
14 |     R_registerRoutines(dll, CEntries, NULL, NULL, NULL);
15 |     R_useDynamicSymbols(dll, FALSE);
16 | }
17 | 


--------------------------------------------------------------------------------
/tests/tinytest.R:
--------------------------------------------------------------------------------
1 | 
2 | if ( requireNamespace("tinytest", quietly=TRUE) ){
3 |   tinytest::test_package("brglm2")
4 | }
5 | 
6 | 


--------------------------------------------------------------------------------
/vignettes/adjacent.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Adjacent category logit models using **brglm2**"
  3 | author: "[Ioannis Kosmidis](https://www.ikosmidis.com)"
  4 | date: "05 February 2019"
  5 | output: rmarkdown::html_vignette
  6 | bibliography: brglm2.bib
  7 | nocite: |
  8 |   @kosmidis:2019
  9 | vignette: >
 10 |   %\VignetteIndexEntry{Adjacent category logit models using **brglm2**}
 11 |   %\VignetteEngine{knitr::rmarkdown}
 12 |   %\VignetteEncoding{UTF-8}
 13 | ---
 14 | 
 15 | ```{r setup, include = FALSE}
 16 | knitr::opts_chunk$set(
 17 |   collapse = TRUE,
 18 |   comment = "#>",
 19 |   fig.width = 6,
 20 |   fig.height = 6
 21 | )
 22 | ```
 23 | 
 24 | # **bracl**
 25 | The [**brglm2**](https://github.com/ikosmidis/brglm2) R package provides `bracl()` which is a wrapper of `brglmFit()` for fitting adjacent category models for ordinal responses using either maximum likelihood or maximum penalized likelihood or any of the various bias reduction methods described in `brglmFit()`. There is a formal equivalence between adjacent category logit models for ordinal responses and multinomial logistic regression models (see, e.g. the [Multinomial logistic regression using brglm2](https://cran.r-project.org/package=brglm2/vignettes/multinomial.html) vignette and the `brmultinom()` function). `bracl()` utilizes that equivalence and fits the corresponding Poisson log-linear model, by appropriately re-scaling the Poisson means to match the multinomial totals (a.k.a. the "Poisson trick"). The mathematical details and algorithm on using the Poisson trick for mean-bias reduction are given in @kosmidis:11.
 26 | 
 27 | 
 28 | 
 29 | # Citation
 30 | If you found this vignette or **brglm2**, in general, useful, please consider citing **brglm2** and the associated paper. You can find information on how to do this by typing `citation("brglm2")`.
 31 | 
 32 | # Opinion on stem cell research and religious fundamentalism
 33 | 
 34 | The `stemcell` data set ships with **brglm2**. @agresti:15[, Section 4.1] provides a detailed description of the variables recorded in this data set (see also `?stemcell`).
 35 | ```{r, echo = TRUE}
 36 | library("brglm2")
 37 | data("stemcell", package = "brglm2")
 38 | stem <- within(stemcell, religion <- as.numeric(religion))
 39 | ```
 40 | ## Maximum likelihood estimation
 41 | The following chunk of code fits an adjacent category logit model with proportional odds and reproduces @agresti:10[, Table 4.2]. Note that the intercept parameters are different because @agresti:10[, Table 4.2] uses different contrasts for the intercept parameters.
 42 | ```{r, echo = TRUE}
 43 | stem_formula <- research ~ religion + gender
 44 | stemcells_ml <- bracl(stem_formula, weights = frequency, data = stem,
 45 |                       parallel = TRUE, type = "ML")
 46 | summary(stemcells_ml)
 47 | ```
 48 | `stemcells_ml` is an object inheriting from
 49 | ```{r, echo = TRUE}
 50 | class(stemcells_ml)
 51 | ```
 52 | **brglm2** implements `print`, `coef`, `fitted`, `predict`, `summary`, `vcov` and `logLik` methods for
 53 | 
 54 | 
 55 | We can check if a model with non-proportional odds fits the data equally well by fitting it and carrying out a likelihood ration test.
 56 | ```{r, echo = TRUE}
 57 | stemcells_ml_full <- bracl(stem_formula, weights = frequency, data = stemcell,
 58 |                            parallel = FALSE, type = "ML")
 59 | summary(stemcells_ml_full)
 60 | ```
 61 | The value of the log likelihood ratio statistic here is
 62 | ```{r, echo = TRUE}
 63 | (lrt <- deviance(stemcells_ml) - deviance(stemcells_ml_full))
 64 | ```
 65 | and has an asymptotic chi-squared distribution with
 66 | ```{r, echo = TRUE}
 67 | (df1 <- df.residual(stemcells_ml) - df.residual(stemcells_ml_full))
 68 | ```
 69 | The p-value from testing the hypothesis that `stemcells_ml_full` is an as good fit as `stemcells_ml` is
 70 | ```{r, echo = TRUE}
 71 | pchisq(lrt, df1, lower.tail = FALSE)
 72 | ```
 73 | hence, the simpler model is found to be as adequate as the full model is.
 74 | 
 75 | # Mean and median bias reduction
 76 | We can use `bracl()` to fit the adjacent category model using estimators with smaller mean or median bias. For mean bias reduction we do
 77 | ```{r, echo = TRUE}
 78 | summary(update(stemcells_ml, type = "AS_mean"))
 79 | ```
 80 | and for median
 81 | ```{r, echo = TRUE}
 82 | summary(update(stemcells_ml, type = "AS_median"))
 83 | ```
 84 | The estimates from mean and median bias reduction are similar to the maximum likelihood ones, indicating that estimation bias is not a major issue here.
 85 | 
 86 | # Prediction
 87 | We can predict the category probabilities using the `predict()` method
 88 | ```{r, echo = TRUE}
 89 | predict(stemcells_ml, type = "probs")
 90 | ```
 91 | 
 92 | # Relevant resources
 93 | `?brglmFit` and `?brglm_control` provide descriptions of the various bias reduction methods supported in **brglm2**. The [`iteration`](https://cran.r-project.org/package=brglm2/brglm2.pdf) vignette describes the iteration and gives the mathematical details for the bias-reducing adjustments to the score functions for generalized linear models.
 94 | 
 95 | 
 96 | # Citation
 97 | If you found this vignette or **brglm2**, in general, useful, please consider citing **brglm2** and the associated paper. You can find information on how to do this by typing `citation("brglm2")`.
 98 | 
 99 | 
100 | # References
101 | 


--------------------------------------------------------------------------------
/vignettes/brglm2.bib:
--------------------------------------------------------------------------------
  1 | @Article{kosmidis:2019,
  2 |   title =        {Mean and median bias reduction in generalized linear models},
  3 |   author =       {Ioannis Kosmidis and Euloge Clovis {Kenne Pagui} and Nicola Sartori},
  4 |   year =         2020,
  5 |   journal =      {Statistics and Computing},
  6 |   volume =       30,
  7 |   pages =        {43--59},
  8 |   url =          {https://doi.org/10.1007/s11222-019-09860-6},
  9 | }
 10 | 
 11 | @Book{agresti:02,
 12 |   Title =	 {Categorical Data Analysis},
 13 |   Author =	 {Agresti, A.},
 14 |   Publisher =	 {Wiley},
 15 |   Year =	 2002
 16 | }
 17 | 
 18 | @Book{agresti:15,
 19 |   title =	 {Foundations of Linear and Generalized Linear Models},
 20 |   publisher =	 {Wiley},
 21 |   year =	 2015,
 22 |   author =	 {Agresti, A.},
 23 |   series =	 {Wiley Series in Probability and Statistics}
 24 | }
 25 | 
 26 | @Book{agresti:10,
 27 |   title =	 {Analysis of Ordinal Categorical Data},
 28 |   publisher =	 {Wiley},
 29 |   edition =	 {2nd},
 30 |   year =	 2010,
 31 |   author =	 {Agresti, A.},
 32 |   series =	 {Wiley Series in Probability and Statistics}
 33 | }
 34 | 
 35 | @Article{albert:84,
 36 |   title =	 {On the existence of maximum likelihood estimates in
 37 |                   logistic regression models},
 38 |   author =	 {Albert, A. and Anderson, J. A.},
 39 |   journal =	 {Biometrika},
 40 |   year =	 1984,
 41 |   number =	 1,
 42 |   pages =	 {1--10},
 43 |   volume =	 71
 44 | }
 45 | 
 46 | @Article{cordeiro:91,
 47 |   Title =	 {Bias correction in generalized linear models},
 48 |   Author =	 {Cordeiro, G.~M. and McCullagh, P.},
 49 |   Journal =	 {Journal of the Royal Statistical Society, Series B:
 50 |                   Methodological},
 51 |   Year =	 1991,
 52 |   Number =	 3,
 53 |   Pages =	 {629--643},
 54 |   Volume =	 53
 55 | }
 56 | 
 57 | @Article{cox:68,
 58 |   Title =	 {A General Definition of Residuals (with Discussion)},
 59 |   Author =	 {Cox, D. R. and Snell, E. J.},
 60 |   Journal =	 {Journal of the Royal Statistical Society, Series B:
 61 |                   Methodological},
 62 |   Year =	 1968,
 63 |   Pages =	 {248--275},
 64 |   Volume =	 30
 65 | }
 66 | 
 67 | @Article{firth:93,
 68 |   Title =	 {Bias Reduction of maximum likelihood estimates},
 69 |   Author =	 {Firth, D.},
 70 |   Journal =	 Biometrika,
 71 |   Year =	 1993,
 72 |   Number =	 1,
 73 |   Pages =	 {27--38},
 74 |   Volume =	 80
 75 | }
 76 | 
 77 | @Article{heinze:02,
 78 |   Title =	 {A solution to the problem of separation in logistic
 79 |                   regression},
 80 |   Author =	 {Heinze, G. and Schemper, M.},
 81 |   Journal =	 {Statistics in Medicine},
 82 |   Year =	 2002,
 83 |   Pages =	 {2409--2419},
 84 |   Volume =	 21
 85 | }
 86 | 
 87 | @Article{kenne:17,
 88 |   title =        {Median bias reduction of maximum likelihood
 89 |                   estimates},
 90 |   author =       {Kenne Pagui, E. C. and Salvan, A. and Sartori, N.},
 91 |   journal =      {Biometrika},
 92 |   volume =       104,
 93 |   issue =        4,
 94 |   pages =        {923-938},
 95 |   year =         2017,
 96 |   Url =          {https://doi.org/10.1093/biomet/asx046}
 97 | }
 98 | 
 99 | @PhdThesis{konis:07,
100 |   author =	 {Kjell Konis},
101 |   title =	 {Linear programming algorithms for detecting
102 |                   separated data in binary logistic regression models},
103 |   school =	 {University of Oxford},
104 |   year =	 2007,
105 |   type =	 {DPhil},
106 |   url =
107 |                   {https://ora.ox.ac.uk/objects/uuid:8f9ee0d0-d78e-4101-9ab4-f9cbceed2a2a}
108 | }
109 | 
110 | @PhdThesis{kosmidis:07,
111 |   Title =	 {Bias reduction in exponential family nonlinear
112 |                   models},
113 |   Author =	 {Kosmidis, I.},
114 |   School =	 {Department of Statistics, University of Warwick},
115 |   Year =	 2007,
116 |   Url =
117 |                   {http://www.ucl.ac.uk/~ucakiko/files/ikosmidis_thesis.pdf}
118 | }
119 | 
120 | @Article{kosmidis:09,
121 |   Title =	 {Bias reduction in exponential family nonlinear
122 |                   models},
123 |   Author =	 {Kosmidis, I. and Firth, D.},
124 |   Journal =	 {Biometrika},
125 |   Year =	 2009,
126 |   Number =	 4,
127 |   Pages =	 {793--804},
128 |   Volume =	 96,
129 |   Doi =		 {10.1093/biomet/asp055},
130 |   Eprint =
131 |                   {http://biomet.oxfordjournals.org/cgi/reprint/96/4/793.pdf},
132 |   Url =
133 |                   {http://biomet.oxfordjournals.org/cgi/content/abstract/96/4/793}
134 | }
135 | 
136 | @Article{kosmidis:10,
137 |   Title =	 {A generic algorithm for reducing bias in parametric
138 |                   estimation},
139 |   Author =	 {Kosmidis, I. and Firth, D.},
140 |   Journal =	 {Electronic Journal of Statistics},
141 |   Year =	 2010,
142 |   Pages =	 {1097--1112},
143 |   Volume =	 4,
144 |   Doi =		 {10.1214/10-EJS579},
145 |   Url =		 {https://dx.doi.org/10.1214/10-EJS579}
146 | }
147 | 
148 | @Article{kosmidis:11,
149 |   Title =	 {Multinomial logit bias reduction via the Poisson
150 |                   log-linear model},
151 |   Author =	 {Kosmidis, I. and Firth, D.},
152 |   Journal =	 {Biometrika},
153 |   Year =	 2011,
154 |   Number =	 3,
155 |   Pages =	 {755-759},
156 |   Volume =	 98,
157 |   Owner =	 {yiannis},
158 |   Timestamp =	 {2010.05.16}
159 | }
160 | 
161 | @Article{kosmidis:14,
162 |   Title =	 {Bias in parametric estimation: reduction and useful
163 |                   side-effects},
164 |   Author =	 {Kosmidis, I.},
165 |   Journal =	 {Wiley Interdisciplinary Reviews: Computational
166 |                   Statistics},
167 |   Year =	 2014,
168 |   Number =	 3,
169 |   Pages =	 {185--196},
170 |   Volume =	 6,
171 |   Doi =		 {10.1002/wics.1296},
172 |   ISSN =	 {1939-0068},
173 |   Keywords =	 {jackknife/bootstrap, indirect inference, penalized
174 |                   likelihood, infinite estimates, separation in models
175 |                   with categorical responses},
176 |   Publisher =	 {John Wiley \& Sons, Inc.},
177 |   Url =		 {https://doi.org/10.1002/wics.1296}
178 | }
179 | 
180 | @Article{kosmidis:14a,
181 |   Title =	 {Improved estimation in cumulative link models},
182 |   Author =	 {Kosmidis, I.},
183 |   Journal =	 {Journal of the Royal Statistical Society, Series B:
184 |                   Methodological},
185 |   Year =	 2014,
186 |   Number =	 1,
187 |   Pages =	 {169--196},
188 |   Volume =	 76,
189 |   Doi =		 {10.1111/rssb.12025},
190 |   ISSN =	 {1467-9868},
191 |   Keywords =	 {Adjusted counts, Adjusted score equations, Ordinal
192 |                   response models, Reduction of bias, Shrinkage},
193 |   Url =		 {https://doi.org/10.1111/rssb.12025}
194 | }
195 | 
196 | @article{lesaffre:89,
197 |   URL =		 {https://www.jstor.org/stable/2345845},
198 |   author =	 {E. Lesaffre and A. Albert},
199 |   journal =	 {Journal of the Royal Statistical Society. Series B
200 |                   (Methodological)},
201 |   number =	 1,
202 |   pages =	 {109-116},
203 |   publisher =	 {[Royal Statistical Society, Wiley]},
204 |   title =	 {Partial Separation in Logistic Discrimination},
205 |   volume =	 51,
206 |   year =	 1989
207 | }
208 | 
209 | @book{mccullagh:89,
210 |   author =	 "McCullagh, P. and Nelder, J. A.",
211 |   year =	 1989,
212 |   edition =	 {2nd},
213 |   title =	 "Generalized Linear Models",
214 |   address =	 "London",
215 |   publisher =	 "Chapman and Hall"
216 | }
217 | 
218 | @article{kenne:20,
219 |   Author =       {{Kenne Pagui}, E. C. and Salvan, A. and Sartori, N.},
220 |   Journal =      {eprint arXiv:2011.02784},
221 |   Title =        {Accurate inference in negative binomial regression},
222 |   Url =          {https://arxiv.org/abs/2011.02784},
223 |   Year =         2020
224 | }
225 | 
226 | @article{magolin:89,
227 |   author =       {Barry H. Margolin and Byung Soo Kim and Kenneth J. Risko},
228 |   title =        {The Ames Salmonella/Microsome Mutagenicity Assay:
229 |                   Issues of Inference and Validation},
230 |   journal =      {Journal of the American Statistical Association},
231 |   volume =       84,
232 |   number =       407,
233 |   pages =        {651-661},
234 |   year =         1989,
235 |   doi =          {10.1080/01621459.1989.10478817}
236 | }
237 | 
238 | @article{kosmidis+firth:21,
239 |     author = {Kosmidis, Ioannis and Firth, David},
240 |     title = "{Jeffreys-prior penalty, finiteness and shrinkage in binomial-response generalized linear models}",
241 |     journal = {Biometrika},
242 |     volume = {108},
243 |     number = {1},
244 |     pages = {71-82},
245 |     year = {2020},
246 |     url = {https://doi.org/10.1093/biomet/asaa052}
247 | }
248 | 


--------------------------------------------------------------------------------
/vignettes/expo.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Estimating the exponential of regression parameters using **brglm2**"
  3 | author: "[Ioannis Kosmidis](https://www.ikosmidis.com)"
  4 | date: "03 February 2023"
  5 | output: rmarkdown::html_vignette
  6 | bibliography: brglm2.bib
  7 | vignette: >
  8 |   %\VignetteIndexEntry{Estimating the exponential of regression parameters using **brglm2**}
  9 |   %\VignetteEngine{knitr::rmarkdown}
 10 |   %\VignetteEncoding{UTF-8}
 11 | ---
 12 | 
 13 | ```{r setup, include = FALSE}
 14 | knitr::opts_chunk$set(
 15 |   collapse = TRUE,
 16 |   comment = "#>",
 17 |   fig.width = 6,
 18 |   fig.height = 6
 19 | )
 20 | ```
 21 | 
 22 | # The `expo()` method
 23 | 
 24 | The [**brglm2**](https://github.com/ikosmidis/brglm2) R package provides the `expo()` method for estimating exponentiated parameters of generalized linear models using various methods. 
 25 | 
 26 | The `expo()` method uses a supplied `"brglmFit"` or `"glm"` object to estimate exponentiated parameters of generalized linear models with maximum likelihood or various mean and median bias reduction methods. `expo()` is useful for computing (corrected) estimates of the multiplicative impact of a unit increase on a covariate on the mean of a Poisson log-linear model (`family = poisson("log")` in `glm()`) while adjusting for other covariates, the odds ratio associated with a unit increase on a covariate in a logistic regression model (`family = binomial("logit")` in `glm()`) while adjusting for other covariates, the relative risk associated with a unit increase on a covariate in a relative risk regression model (`family = binomial("log")` in `glm()`) while adjusting for other covariates, among others.
 27 | 
 28 | The vignette demonstrates the use of `expo()` and the associated methods by reproducing part of the analyses in @agresti:02[, Section 5.4.2] on the effects of AZT in slowing the development of AIDS symptoms.
 29 | 
 30 | # AIDS and AZT use
 31 | 
 32 | The data analyzed in @agresti:02[, Section 5.4.2] is from a 3-year
 33 | study on the effects of AZT in slowing the development of AIDS
 34 | symptoms. 338 veterans whose immune systems were beginning to falter
 35 | after infection with the AIDS virus were randomly assigned either to
 36 | receive AZT immediately or to wait until their T cells showed severe
 37 | immune weakness. See `?aids` for more details. 
 38 | 
 39 | The `aids` data set cross-classifies the veterans' race (`race`), whether they received AZT immediately (`AZT`), and whether they developed AIDS symptoms during the 3-year study (`symptomatic` and `asymptomatic`).
 40 | ```{r, echo = TRUE}
 41 | library("brglm2")
 42 | data("aids", package = "brglm2")
 43 | aids
 44 | ```
 45 | 
 46 | We now use a logistic regression model to model the probability of developing symptoms in terms of `AZT` and `race`, and reproduce part of the compute output in @agresti:02[, Table 5.6].
 47 | ```{r, echo = TRUE}
 48 | aids_mod <- glm(cbind(symptomatic, asymptomatic) ~ AZT + race, 
 49 |                   family = binomial(), data = aids)
 50 | summary(aids_mod)
 51 | ```
 52 | 
 53 | The Wald test for the hypothesis of conditional independence of AZT treatment and development of AIDS symptoms, controlling for race, returns a p-value of `r round(coef(summary(aids_mod))["AZTYes", "Pr(>|z|)"], 3)`, showing evidence of association.
 54 | 
 55 | The predicted probabilities for each combination of levels
 56 | 
 57 | 
 58 | The maximum likelihood estimates of the odds ratio between immediate AZT use and development of AIDS symptoms can be inferred from `aids_mod` through the `expo()` method, which also estimates standard errors using the delta method, and returns approximate 95% confidence intervals (see `?expo` for details).
 59 | ```{r, echo = TRUE}
 60 | expo(aids_mod, type = "ML")
 61 | ```
 62 | 
 63 | As noted in @agresti:02[, Section 5.4.2], for each race, the estimated
 64 | odds of symptoms are half as high for those who took AZT immediately,
 65 | with value $0.49$ and a nominally 95\% Wald confidence interval
 66 | $(0.28, 0.84)$.
 67 | 
 68 | The `expo()` method can be used to estimate the odds ratios using three methods that return estimates of the odds ratios with asymptotically smaller mean bias than the maximum likelihood estimator 
 69 | ```{r, echo = TRUE}
 70 | expo(aids_mod, type = "correction*")
 71 | expo(aids_mod, type = "Lylesetal2012")
 72 | expo(aids_mod, type = "correction+")
 73 | ```
 74 | and one method that returns estimates of the odds ratios with asymptotically smaller median bias than the maximum likelihood estimator 
 75 | ```{r, echo = TRUE}
 76 | expo(aids_mod, type = "AS_median")
 77 | ```
 78 | 
 79 | The estimated odds ratios and associated inferences from the methods that correct for mean and median bias are similar to those from maximum likelihood.
 80 | 
 81 | # Infinite odds ratio estimates 
 82 | 
 83 | When `expo()` is called with `type = correction*`, `type = correction+`, `type = Lylesetal2012`, and `type = AS_median`, then the estimates of the odds ratios can be shown to be always finite and greater than zero. The reason is that the corresponding odds-ratio estimators depend on regression parameter estimates that are finite even if the maximum likelihood estimates are infinite. See, @kosmidis:2019 and @kosmidis+firth:21 for details. 
 84 | 
 85 | As an example, consider the estimated odds ratios from a logistic regression model fitted on the `endometrial` data set using maximum likelihood. 
 86 | ```{r, echo = TRUE}
 87 | data("endometrial", package = "brglm2")
 88 | endometrialML <- glm(HG ~ NV + PI + EH, data = endometrial, family = binomial())
 89 | endometrialML
 90 | ```
 91 | 
 92 | The estimate of the coefficient for `NV` is in reality infinite as it can be verified using the [**detectseparation**](https://cran.r-project.org/package=detectseparation) R package
 93 | ```{r, echo = TRUE}
 94 | library("detectseparation")
 95 | update(endometrialML, method = detect_separation)
 96 | ```
 97 | and a naive estimate of the associated odds ratio while controlling for `PI` and `EH` is `r exp(coef(endometrialML)["NV"])`, which is in reality infinite.
 98 | 
 99 | In contrast, `expo()` returns finite reduced-mean-bias estimates of the odds ratios 
100 | ```{r, echo = TRUE}
101 | expo(endometrialML, type = "correction*")
102 | expo(endometrialML, type = "correction+")
103 | expo(endometrialML, type = "Lylesetal2012")
104 | ```
105 | 
106 | # `brglmFit` objects
107 | 
108 | The `expo()` method also works seamlessly with `brglmFit` objects, returning the same results as above. For example, 
109 | ```{r, echo = TRUE}
110 | aids_mod_br <- update(aids_mod, method = "brglmFit")
111 | expo(aids_mod_br, type = "correction*")
112 | ```
113 | 
114 | 
115 | # References
116 | 


--------------------------------------------------------------------------------
/vignettes/multinomial.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Multinomial logistic regression using **brglm2**"
  3 | author: "[Ioannis Kosmidis](https://www.ikosmidis.com)"
  4 | date: "01 July 2017"
  5 | output: rmarkdown::html_vignette
  6 | bibliography: brglm2.bib
  7 | vignette: >
  8 |   %\VignetteIndexEntry{Multinomial logistic regression using **brglm2**}
  9 |   %\VignetteEngine{knitr::rmarkdown}
 10 |   %\VignetteEncoding{UTF-8}
 11 | ---
 12 | 
 13 | ```{r setup, include = FALSE}
 14 | knitr::opts_chunk$set(
 15 |   collapse = TRUE,
 16 |   comment = "#>",
 17 |   fig.width = 6,
 18 |   fig.height = 6
 19 | )
 20 | ```
 21 | 
 22 | 
 23 | # **brmultinom**
 24 | The [**brglm2**](https://github.com/ikosmidis/brglm2) R package provides `brmultinom()` which is a wrapper of `brglmFit` for fitting multinomial logistic regression models (a.k.a. baseline category logit models) using either maximum likelihood or maximum penalized likelihood or any of the various bias reduction methods described in `brglmFit()`. `brmultinom()` uses the equivalent Poisson log-linear model, by appropriately re-scaling the Poisson means to match the multinomial totals (a.k.a. the "Poisson trick"). The mathematical details and algorithm on using the Poisson trick for mean-bias reduction are given in @kosmidis:11.
 25 | 
 26 | This vignettes illustrates the use of `brmultinom()` and of the associated methods, using the alligator food choice example in @agresti:02[, Section 7.1]
 27 | 
 28 | # Alligator data
 29 | 
 30 | The alligator data set ships with **brglm2**. @agresti:02[, Section 7.1] provides a detailed description of the variables recorded in the data set.
 31 | ```{r, echo = TRUE}
 32 | library("brglm2")
 33 | data("alligators", package = "brglm2")
 34 | ```
 35 | 
 36 | ## Maximum likelihood estimation
 37 | 
 38 | The following chunk of code reproduces @agresti:02[, Table 7.4]. Note that in order to get the estimates and standard errors reported in the latter table, we have to explicitly specify the contrasts that @agresti:02 uses.
 39 | ```{r, echo = TRUE}
 40 | agresti_contrasts <- list(lake = contr.treatment(levels(alligators$lake), base = 4),
 41 |                           size = contr.treatment(levels(alligators$size), base = 2))
 42 | all_ml <- brmultinom(foodchoice ~ size + lake , weights = freq,
 43 |                      data = alligators,
 44 |                      contrasts = agresti_contrasts,
 45 |                      ref = 1,
 46 |                      type = "ML")
 47 | all_ml_summary <- summary(all_ml)
 48 | ## Estimated regression parameters
 49 | round(all_ml_summary$coefficients, 2)
 50 | ## Estimated standard errors
 51 | round(all_ml_summary$standard.errors, 2)
 52 | ```
 53 | 
 54 | ## Mean and median bias reduction
 55 | 
 56 | Fitting the model using mean-bias reducing adjusted score equations gives
 57 | ```{r, echo = TRUE}
 58 | all_mean <- update(all_ml, type = "AS_mean")
 59 | summary(all_mean)
 60 | ```
 61 | The corresponding fit using median-bias reducing adjusted score equations is
 62 | ```{r, echo = TRUE}
 63 | all_median <- update(all_ml, type = "AS_median")
 64 | summary(all_median)
 65 | ```
 66 | 
 67 | The estimates and the estimated standard errors from bias reduction
 68 | are close to those for maximum likelihood. As a result, it is unlikely
 69 | that either mean or median bias is of any real consequence for this
 70 | particular model and data combination.
 71 | 
 72 | # Infinite estimates and multinomial logistic regression
 73 | 
 74 | Let's scale the frequencies in `alligators` by 3 in order to get a sparser data set. The differences between maximum likelihood and mean and median bias reduction should be more apparent on the resulting data set. Here we have to "slow-down" the Fisher scoring iteration (by scaling the step-size), because otherwise the Fisher information matrix quickly gets numerically rank-deficient. The reason is data separation [@albert:84].
 75 | ```{r, echo = TRUE, error = TRUE}
 76 | all_ml_sparse <- update(all_ml, weights = round(freq/3), slowit = 0.1)
 77 | summary(all_ml_sparse)
 78 | ```
 79 | Specifically, judging from the estimated standard errors, the estimates for `(Intercept)`, `lakeHancock`, `lakeOklawaha` and `lakeTrafford` for `Reptile` and `lakeHancock` for `Bird` seem to be infinite.
 80 | 
 81 | To quickly check if that's indeed the case we can use the `check_infinite_estimates()` method of the [**detectseparation**](https://cran.r-project.org/package=detectseparation) R package.
 82 | ```{r, echo = TRUE}
 83 | library("detectseparation")
 84 | se_ratios <- check_infinite_estimates(all_ml_sparse)
 85 | plot(se_ratios)
 86 | ```
 87 | 
 88 | Some of the estimated standard errors diverge as the number of Fisher scoring iterations increases, which is evidence of complete or quasi-complete separation [@lesaffre:89].
 89 | 
 90 | In contrast, both mean and median bias reduction result in finite
 91 | estimates
 92 | ```{r, echo = TRUE}
 93 | all_mean_sparse <- update(all_ml_sparse, type = "AS_mean")
 94 | summary(all_mean_sparse)
 95 | 
 96 | all_median_sparse <- update(all_ml_sparse, type = "AS_median")
 97 | summary(all_median_sparse)
 98 | ```
 99 | 
100 | 
101 | 
102 | # Relevant resources
103 | `?brglmFit` and `?brglm_control` contain quick descriptions of the various bias reduction methods supported in **brglm2**. The [`iteration`](https://cran.r-project.org/package=brglm2/brglm2.pdf) vignette describes the iteration and gives the mathematical details for the bias-reducing adjustments to the score functions for generalized linear models.
104 | 
105 | # Citation
106 | If you found this vignette or **brglm2**, in general, useful, please consider citing **brglm2** and the associated paper. You can find information on how to do this by typing `citation("brglm2")`.
107 | 
108 | # References
109 | 


--------------------------------------------------------------------------------