├── .Rbuildignore ├── .github ├── .gitignore └── workflows │ ├── R-CMD-check.yaml │ ├── pr-commands.yaml │ └── test-coverage.yaml ├── .gitignore ├── CONDUCT.md ├── DESCRIPTION ├── LICENSE ├── NAMESPACE ├── NEWS.md ├── R ├── RcppExports.R ├── S3_confint.R ├── S3_emmeans.R ├── S3_glance.R ├── S3_nobs.R ├── S3_predict.R ├── S3_print.R ├── S3_summary.R ├── S3_tidy.R ├── S3_update.R ├── S3_vcov.R ├── data.R ├── estimatr.R ├── estimatr_difference_in_means.R ├── estimatr_horvitz_thompson.R ├── estimatr_iv_robust.R ├── estimatr_lh_robust.R ├── estimatr_lm_lin.R ├── estimatr_lm_robust.R ├── helper_cis_pvals.R ├── helper_clean_model_data.R ├── helper_condition_pr_matrix.R ├── helper_extract.R ├── helper_lm_robust_fit.R ├── helper_na_omit_detailed.R ├── helper_parse_arguments.R ├── helper_return.R ├── helper_starprep.R └── zzz.R ├── README.Rmd ├── README.md ├── _pkgdown.yml ├── cran-comments.md ├── data-raw ├── STAR_public_use.dta └── build-alo-star-for-lin.R ├── data └── alo_star_men.rda ├── estimatr.Rproj ├── man ├── alo_star_men.Rd ├── commarobust.Rd ├── declaration_to_condition_pr_mat.Rd ├── difference_in_means.Rd ├── estimatr.Rd ├── estimatr_glancers.Rd ├── estimatr_tidiers.Rd ├── extract.lm_robust.Rd ├── gen_pr_matrix_cluster.Rd ├── horvitz_thompson.Rd ├── iv_robust.Rd ├── lh_robust.Rd ├── lm_lin.Rd ├── lm_robust.Rd ├── lm_robust_fit.Rd ├── na.omit_detailed.data.frame.Rd ├── permutations_to_condition_pr_mat.Rd ├── predict.lm_robust.Rd ├── reexports.Rd └── starprep.Rd ├── src ├── Makevars ├── Makevars.win ├── RcppExports.cpp ├── horvitz_thompson_variance.cpp ├── lm_robust_helper.cpp └── naomit.cpp ├── tests ├── sleep.R ├── sleep.Rout.save ├── testthat.R └── testthat │ ├── helper-lm-robust-se.R │ ├── helper-return-cleaners.R │ ├── helper-se-types.R │ ├── mtcars.csv │ ├── run-stata-areg-models.do │ ├── run-stata-iv-models.do │ ├── run-stata-models.do │ ├── stata-ests.txt │ ├── stata-fe-ests.txt │ ├── stata-iv-diagnostics.txt │ ├── stata-iv-ests.txt │ ├── test-arg-checking.R │ ├── test-condition-pr-matrix.R │ ├── test-condition1-condition2.R │ ├── test-difference-in-means.R │ ├── test-horvitz-thompson.R │ ├── test-iv-robust-fes.R │ ├── test-iv-robust.R │ ├── test-lh-robust.R │ ├── test-lm-cluster.R │ ├── test-lm-lin.R │ ├── test-lm-robust-fes.R │ ├── test-lm-robust-helper.R │ ├── test-lm-robust.R │ ├── test-lm-robust_emmeans.R │ ├── test-lm-robust_margins.R │ ├── test-modelsummary.R │ ├── test-na-omit-details.R │ ├── test-replicate-HT-middleton.R │ ├── test-replicate-lin2013.R │ ├── test-return.R │ ├── test-s3-methods.R │ ├── test-sig-testing.R │ ├── test-starprep.R │ ├── test-stata-output.R │ ├── test-texreg.R │ ├── test-zzz.R │ └── test-zzzbroom.R └── vignettes ├── absorbing-fixed-effects.Rmd ├── benchmarking-estimatr.Rmd ├── emmeans-examples.Rmd ├── estimatr-in-the-tidyverse.Rmd ├── estimatr.bib ├── getting-started.Rmd ├── lm_speed.png ├── lm_speed_covars.png ├── mathematical-notes.Rmd ├── regression-tables.Rmd ├── simulations-debias-dim.rda ├── simulations-debiasing-dim.Rmd ├── simulations-ols-var.rda ├── simulations-ols-variance.Rmd └── stata-wls-hat.Rmd /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^\.travis\.yml$ 2 | README.Rmd 3 | README.md 4 | index.Rmd 5 | index.md 6 | ^.*\.Rproj$ 7 | ^\.Rproj\.user$ 8 | appveyor.yml 9 | ^appveyor\.yml$ 10 | CONDUCT.md 11 | ^vignettes$ 12 | ^docs$ 13 | ^.github$ 14 | ^docs/templates/.*$ 15 | deploy.sh 16 | _pkgdown.yml 17 | update_repo.R 18 | ^data-raw$ 19 | ^_pkgdown\.yml$ 20 | ^tests/testthat/mtcars\.csv$ 21 | ^tests/testthat/run-stata-models\.do$ 22 | ^tests/testthat/test-zzzbroom\.R$ 23 | ^estimatr_.*\.tar\.gz$ 24 | ^tests/testthat/test-texreg\.R$ 25 | ^tests/testthat/test-gtsummary\.R$ 26 | ^cran-comments\.md$ 27 | ^tests/testthat/test-zzz\.R$ 28 | ^CRAN-RELEASE$ 29 | ^pkgdown$ 30 | ^\.github$ 31 | ^CRAN-SUBMISSION$ 32 | -------------------------------------------------------------------------------- /.github/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | -------------------------------------------------------------------------------- /.github/workflows/R-CMD-check.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | 8 | name: R-CMD-check.yaml 9 | 10 | permissions: read-all 11 | 12 | jobs: 13 | R-CMD-check: 14 | runs-on: ${{ matrix.config.os }} 15 | 16 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 17 | 18 | strategy: 19 | fail-fast: false 20 | matrix: 21 | config: 22 | - {os: macos-latest, r: 'release'} 23 | - {os: windows-latest, r: 'release'} 24 | - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} 25 | - {os: ubuntu-latest, r: 'release'} 26 | - {os: ubuntu-latest, r: 'oldrel-1'} 27 | 28 | env: 29 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 30 | R_KEEP_PKG_SOURCE: yes 31 | 32 | steps: 33 | - uses: actions/checkout@v4 34 | 35 | - uses: r-lib/actions/setup-pandoc@v2 36 | 37 | - uses: r-lib/actions/setup-r@v2 38 | with: 39 | r-version: ${{ matrix.config.r }} 40 | http-user-agent: ${{ matrix.config.http-user-agent }} 41 | use-public-rspm: true 42 | 43 | - uses: r-lib/actions/setup-r-dependencies@v2 44 | with: 45 | extra-packages: any::rcmdcheck 46 | needs: check 47 | 48 | - uses: r-lib/actions/check-r-package@v2 49 | with: 50 | upload-snapshots: true 51 | build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")' 52 | -------------------------------------------------------------------------------- /.github/workflows/pr-commands.yaml: -------------------------------------------------------------------------------- 1 | on: 2 | issue_comment: 3 | types: [created] 4 | name: Commands 5 | jobs: 6 | document: 7 | if: startsWith(github.event.comment.body, '/document') 8 | name: document 9 | runs-on: macOS-latest 10 | steps: 11 | - uses: actions/checkout@v2 12 | - uses: r-lib/actions/pr-fetch@master 13 | with: 14 | repo-token: ${{ secrets.GITHUB_TOKEN }} 15 | - uses: r-lib/actions/setup-r@master 16 | - name: Install dependencies 17 | run: Rscript -e 'install.packages(c("remotes", "roxygen2"))' -e 'remotes::install_deps(dependencies = TRUE)' 18 | - name: Document 19 | run: Rscript -e 'roxygen2::roxygenise()' 20 | - name: commit 21 | run: | 22 | git add man/\* NAMESPACE 23 | git commit -m 'Document' 24 | - uses: r-lib/actions/pr-push@master 25 | with: 26 | repo-token: ${{ secrets.GITHUB_TOKEN }} 27 | style: 28 | if: startsWith(github.event.comment.body, '/style') 29 | name: style 30 | runs-on: macOS-latest 31 | steps: 32 | - uses: actions/checkout@v2 33 | - uses: r-lib/actions/pr-fetch@master 34 | with: 35 | repo-token: ${{ secrets.GITHUB_TOKEN }} 36 | - uses: r-lib/actions/setup-r@master 37 | - name: Install dependencies 38 | run: Rscript -e 'install.packages("styler")' 39 | - name: Style 40 | run: Rscript -e 'styler::style_pkg()' 41 | - name: commit 42 | run: | 43 | git add \*.R 44 | git commit -m 'Style' 45 | - uses: r-lib/actions/pr-push@master 46 | with: 47 | repo-token: ${{ secrets.GITHUB_TOKEN }} 48 | -------------------------------------------------------------------------------- /.github/workflows/test-coverage.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | 8 | name: test-coverage.yaml 9 | 10 | permissions: read-all 11 | 12 | jobs: 13 | test-coverage: 14 | runs-on: ubuntu-latest 15 | env: 16 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 17 | 18 | steps: 19 | - uses: actions/checkout@v4 20 | 21 | - uses: r-lib/actions/setup-r@v2 22 | with: 23 | use-public-rspm: true 24 | 25 | - uses: r-lib/actions/setup-r-dependencies@v2 26 | with: 27 | extra-packages: any::covr, any::xml2 28 | needs: coverage 29 | 30 | - name: Test coverage 31 | run: | 32 | cov <- covr::package_coverage( 33 | quiet = FALSE, 34 | clean = FALSE, 35 | install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package") 36 | ) 37 | covr::to_cobertura(cov) 38 | shell: Rscript {0} 39 | 40 | - uses: codecov/codecov-action@v4 41 | with: 42 | # Fail if error if not on PR, or if on PR and token is given 43 | fail_ci_if_error: ${{ github.event_name != 'pull_request' || secrets.CODECOV_TOKEN }} 44 | file: ./cobertura.xml 45 | plugin: noop 46 | disable_search: true 47 | token: ${{ secrets.CODECOV_TOKEN }} 48 | 49 | - name: Show testthat output 50 | if: always() 51 | run: | 52 | ## -------------------------------------------------------------------- 53 | find '${{ runner.temp }}/package' -name 'testthat.Rout*' -exec cat '{}' \; || true 54 | shell: bash 55 | 56 | - name: Upload test results 57 | if: failure() 58 | uses: actions/upload-artifact@v4 59 | with: 60 | name: coverage-test-failures 61 | path: ${{ runner.temp }}/package 62 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # History files 2 | .Rhistory 3 | .Rapp.history 4 | # Example code in package build process 5 | *-Ex.R 6 | # RStudio files 7 | .Rproj.user/ 8 | # produced vignettes 9 | vignettes/*.html 10 | vignettes/*.pdf 11 | vignettes/.* 12 | docs/ 13 | .Rproj.user 14 | tests/testthat/*.pdf 15 | .DS_Store 16 | 17 | # compiled files 18 | src/*.o 19 | src/*.so 20 | src/*.dll 21 | inst/doc 22 | *_cache/ 23 | docs 24 | -------------------------------------------------------------------------------- /CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to making participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, gender identity and expression, level of experience, 9 | nationality, personal appearance, race, religion, or sexual identity and 10 | orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies both within project spaces and in public spaces 49 | when an individual is representing the project or its community. Examples of 50 | representing a project or community include using an official project e-mail 51 | address, posting via an official social media account, or acting as an appointed 52 | representative at an online or offline event. Representation of a project may be 53 | further defined and clarified by project maintainers. 54 | 55 | ## Enforcement 56 | 57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 58 | reported by contacting the project team at [abuse@declaredesign.org](mailto:abuse@declaredesign.org). All 59 | complaints will be reviewed and investigated and will result in a response that 60 | is deemed necessary and appropriate to the circumstances. The project team is 61 | obligated to maintain confidentiality with regard to the reporter of an incident. 62 | Further details of specific enforcement policies may be posted separately. 63 | 64 | Project maintainers who do not follow or enforce the Code of Conduct in good 65 | faith may face temporary or permanent repercussions as determined by other 66 | members of the project's leadership. 67 | 68 | ## Attribution 69 | 70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 71 | available at [http://contributor-covenant.org/version/1/4][version] 72 | 73 | [homepage]: http://contributor-covenant.org 74 | [version]: http://contributor-covenant.org/version/1/4/ 75 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: estimatr 2 | Type: Package 3 | Title: Fast Estimators for Design-Based Inference 4 | Version: 1.0.4 5 | Authors@R: c(person("Graeme", "Blair", email = "graeme.blair@gmail.com", role = c("aut", "cre")), 6 | person("Jasper", "Cooper", email = "jjc2247@columbia.edu", role = c("aut")), 7 | person("Alexander", "Coppock", email = "alex.coppock@yale.edu", role = c("aut")), 8 | person("Macartan", "Humphreys", email = "macartan@gmail.com", role = c("aut")), 9 | person("Luke", "Sonnet", email = "luke.sonnet@gmail.com", role = c("aut")), 10 | person("Neal", "Fultz", email = "nfultz@gmail.com", role = c("ctb")), 11 | person("Lily", "Medina", email = "lilymiru@gmail.com", role = c("ctb")), 12 | person("Russell", "Lenth", email = "russell-lenth@uiowa.edu", role = c("ctb"))) 13 | Description: Fast procedures for small set of commonly-used, design-appropriate estimators with robust standard errors and confidence intervals. Includes estimators for linear regression, instrumental variables regression, difference-in-means, Horvitz-Thompson estimation, and regression improving precision of experimental estimates by interacting treatment with centered pre-treatment covariates introduced by Lin (2013) . 14 | URL: https://declaredesign.org/r/estimatr/, https://github.com/DeclareDesign/estimatr 15 | BugReports: https://github.com/DeclareDesign/estimatr/issues 16 | License: MIT + file LICENSE 17 | Depends: R (>= 3.6.0) 18 | Imports: 19 | Formula, 20 | generics, 21 | methods, 22 | Rcpp (>= 0.12.16), 23 | rlang (>= 0.2.0) 24 | LinkingTo: Rcpp, RcppEigen 25 | Encoding: UTF-8 26 | RoxygenNote: 7.3.1 27 | LazyData: true 28 | Suggests: 29 | fabricatr (>= 0.10.0), 30 | randomizr (>= 0.20.0), 31 | AER, 32 | clubSandwich, 33 | emmeans (>= 1.4), 34 | estimability, 35 | margins, 36 | modelsummary, 37 | prediction, 38 | RcppEigen, 39 | sandwich, 40 | stargazer, 41 | testthat, 42 | car 43 | Enhances: 44 | texreg 45 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2015-2018 2 | COPYRIGHT HOLDER: Graeme Blair, Jasper Cooper, Alexander Coppock, Macartan Humphreys, and Luke Sonnet 3 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(confint,difference_in_means) 4 | S3method(confint,horvitz_thompson) 5 | S3method(confint,iv_robust) 6 | S3method(confint,lh) 7 | S3method(confint,lh_robust) 8 | S3method(confint,lm_robust) 9 | S3method(glance,difference_in_means) 10 | S3method(glance,horvitz_thompson) 11 | S3method(glance,iv_robust) 12 | S3method(glance,lh_robust) 13 | S3method(glance,lm_robust) 14 | S3method(nobs,difference_in_means) 15 | S3method(nobs,horvitz_thompson) 16 | S3method(nobs,iv_robust) 17 | S3method(nobs,lh_robust) 18 | S3method(nobs,lm_robust) 19 | S3method(nobs,summary.lm_robust) 20 | S3method(predict,iv_robust) 21 | S3method(predict,lm_robust) 22 | S3method(print,difference_in_means) 23 | S3method(print,horvitz_thompson) 24 | S3method(print,iv_robust) 25 | S3method(print,lh) 26 | S3method(print,lh_robust) 27 | S3method(print,lm_robust) 28 | S3method(print,summary.iv_robust) 29 | S3method(print,summary.lh) 30 | S3method(print,summary.lh_robust) 31 | S3method(print,summary.lm_robust) 32 | S3method(summary,difference_in_means) 33 | S3method(summary,horvitz_thompson) 34 | S3method(summary,iv_robust) 35 | S3method(summary,lh) 36 | S3method(summary,lh_robust) 37 | S3method(summary,lm_robust) 38 | S3method(tidy,difference_in_means) 39 | S3method(tidy,horvitz_thompson) 40 | S3method(tidy,iv_robust) 41 | S3method(tidy,lh) 42 | S3method(tidy,lh_robust) 43 | S3method(tidy,lm_robust) 44 | S3method(update,iv_robust) 45 | S3method(vcov,difference_in_means) 46 | S3method(vcov,horvitz_thompson) 47 | S3method(vcov,iv_robust) 48 | S3method(vcov,lm_robust) 49 | export(commarobust) 50 | export(declaration_to_condition_pr_mat) 51 | export(difference_in_means) 52 | export(extract.iv_robust) 53 | export(extract.lm_robust) 54 | export(gen_pr_matrix_cluster) 55 | export(glance) 56 | export(horvitz_thompson) 57 | export(iv_robust) 58 | export(lh_robust) 59 | export(lm_lin) 60 | export(lm_robust) 61 | export(lm_robust_fit) 62 | export(permutations_to_condition_pr_mat) 63 | export(starprep) 64 | export(tidy) 65 | importFrom(Formula,Formula) 66 | importFrom(Formula,as.Formula) 67 | importFrom(Rcpp,evalCpp) 68 | importFrom(generics,glance) 69 | importFrom(generics,tidy) 70 | importFrom(methods,className) 71 | importFrom(methods,isGeneric) 72 | importFrom(methods,setGeneric) 73 | importFrom(methods,setMethod) 74 | importFrom(rlang,"%||%") 75 | importFrom(rlang,enquo) 76 | importFrom(rlang,enquos) 77 | importFrom(rlang,eval_tidy) 78 | importFrom(rlang,f_rhs) 79 | importFrom(rlang,quo) 80 | importFrom(rlang,quo_get_expr) 81 | importFrom(rlang,quo_is_missing) 82 | importFrom(rlang,quo_set_expr) 83 | importFrom(rlang,quos) 84 | importFrom(rlang,sym) 85 | importFrom(stats,.checkMFClasses) 86 | importFrom(stats,.getXlevels) 87 | importFrom(stats,coef) 88 | importFrom(stats,complete.cases) 89 | importFrom(stats,confint) 90 | importFrom(stats,delete.response) 91 | importFrom(stats,df.residual) 92 | importFrom(stats,fitted.values) 93 | importFrom(stats,formula) 94 | importFrom(stats,getCall) 95 | importFrom(stats,lm) 96 | importFrom(stats,lm.fit) 97 | importFrom(stats,model.extract) 98 | importFrom(stats,model.frame) 99 | importFrom(stats,model.frame.default) 100 | importFrom(stats,model.matrix) 101 | importFrom(stats,model.matrix.default) 102 | importFrom(stats,model.matrix.lm) 103 | importFrom(stats,model.response) 104 | importFrom(stats,na.omit) 105 | importFrom(stats,na.pass) 106 | importFrom(stats,nobs) 107 | importFrom(stats,pchisq) 108 | importFrom(stats,pf) 109 | importFrom(stats,printCoefmat) 110 | importFrom(stats,pt) 111 | importFrom(stats,qt) 112 | importFrom(stats,reformulate) 113 | importFrom(stats,resid) 114 | importFrom(stats,residuals) 115 | importFrom(stats,sd) 116 | importFrom(stats,setNames) 117 | importFrom(stats,terms) 118 | importFrom(stats,update) 119 | importFrom(stats,var) 120 | importFrom(stats,vcov) 121 | importFrom(stats,weighted.mean) 122 | importFrom(stats,weights) 123 | importFrom(utils,getS3method) 124 | importFrom(utils,packageVersion) 125 | useDynLib(estimatr, .registration = TRUE) 126 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # estimatr 1.0.4 2 | 3 | * Test suite changes for M1 mac stay current on CRAN. 4 | 5 | # estimatr 1.0.2 6 | 7 | * Minor documentation changes to stay current on CRAN. 8 | 9 | # estimatr 1.0.0 10 | 11 | * Version bump to coincide with DeclareDesign package version 1.0.0 12 | * Tests edited 13 | 14 | # estimatr 0.30.6 15 | 16 | * Fix tests to address CRAN failures 17 | 18 | # estimatr 0.30.4 19 | 20 | * Bug fix of tidy handling of conf.level 21 | * Bug fix of lh_robust tidy 22 | 23 | # estimatr 0.30.2 24 | 25 | * Remove lfe from tests 26 | 27 | # estimatr 0.30.0 28 | 29 | * Test suite changes (skip if not installed for checking against other packages) 30 | 31 | # estimatr 0.28.0 32 | 33 | * Test suite changes 34 | 35 | # estimatr 0.26.0 36 | 37 | * Test suite changes 38 | 39 | # estimatr 0.24.0 40 | 41 | * tidy: rename nobs, nclusters, nblocks 42 | * tidy: new arguments conf.int, conf.level 43 | * Added `update.iv_robust()` 44 | * Bug fix regarding fixed effects with large numbers 45 | 46 | # estimatr 0.22.0 47 | 48 | * Bug fixes 49 | 50 | # estimatr 0.20.0 51 | 52 | * Added support for `emmeans` (thanks @rvlenth)! 53 | * Fixed bug when estimating `diagnostics` in `iv_robust()` without explicitly specifying `se_type` (issue #310) 54 | * Support for `rlang` 0.4.0 55 | 56 | # estimatr 0.18.0 57 | 58 | * Fixed bug where collinear covariates caused fixed effects estimator to crash (issue #294) 59 | * Added `glance.lh_robust()` and fixed some issues with printing and summarizing `lh_robust()` objects (issues #295 and #296) 60 | * Fixes CRAN errors in testing with new `clubSandwich` package 61 | 62 | # estimatr 0.16.0 63 | 64 | * Add `diagnostics` to `iv_robust()` 65 | * Add `glance()` methods for all estimators 66 | * Add `lh_robust()` for easy interface to `car::linearHypothesis()` 67 | * Fixed minor bug with a formula such as `is.na(var)` in the `covariates` formula in `lm_lin()` (issue #283) 68 | 69 | # estimatr 0.14.0 70 | 71 | * Removes `broom` hack for `tidy` method and instead relies on importing `generics` 72 | 73 | # estimatr 0.12.0 74 | 75 | * Fixed ambiguity about how interacted covariates were centered in `lm_lin` 76 | * A series of fixes for bugs that occurred with multiple outcomes (multivariate regression): 77 | * Fixed bug pointed out by James Pustejovsky via the `sandwich` version 2.5-0 and off-diagonal blocks of multivariate regression vcov matrices 78 | * Fixed bugs in `lm_lin` preventing multivariate regression 79 | * Fixed bug that truncated degrees of freedom with "CR2" standard errors 80 | * Fixed bug that returned incorrect R-squared for the second or later outcomes 81 | * Fixed bug preventing integration with latest version of `margins` 82 | * Fixed bug with `difference_in_means` when using `condition1` and `condition2` to subset a treatment vector with more than two treatment conditions. Previous estimates and standard errors were incorrect. 83 | 84 | # estimatr 0.10.0 85 | 86 | * Changed names of confidence interval columns in tidied data from `ci.lower` and `ci.upper` to `conf.low` and `conf.high` to be in line with other tidy methods 87 | * Added support for `fixed_effects` that are just one block 88 | * Added support for specifing `condition_prs` in `horvitz_thompson()` as a single number 89 | * Added t- and z-statistics to output 90 | * Limit unnecessary messaging in `horvitz_thompson()` 91 | 92 | # estimatr 0.8.0 93 | 94 | * Added support for absorbing fixed effects in `lm_robust` and `iv_robust` 95 | * Added `commarobust` and `starprep` for stargazer integration 96 | * Added `texreg` support for 2SLS IV models 97 | * Fixed bugs for incorrect F-statistics with robust standard errors 98 | * Refactor of main fitting engine for linear models 99 | 100 | # estimatr 0.6.0 101 | 102 | * Added support for multivariate linear models 103 | * Added support for instrumental variables regression 104 | * Major change to name of object output elements to mostly match with `broom::tidy` 105 | * old -> new 106 | * `coefficient_names` -> `term` 107 | * `se` -> `std.error` 108 | * `p` -> `p.values` 109 | * `ci_lower` -> `ci.lower` 110 | * `ci_upper` -> `ci.upper` 111 | * All of the above changes are also made to the column names on the output of `tidy`; furthermore for `tidy` objects one further name change from `coefficients` -> `estimate` has been made 112 | * Fixed bug that caused variances, standard errors, and p-values to be wrong for weighted "CR2" variance estimation 113 | * Fixed incorrect estimates when both weights and blocks were passed to `difference_in_means` 114 | * Rewrite NSE handling to be done by `rlang` 115 | * Rewrite `na.omit` handler in R 116 | * Major refactor of C++ underlying regression estimators 117 | 118 | # estimatr 0.4.0 119 | 120 | * Changed suffix added to centered variables in `lm_lin()` from `_bar` to `_c` 121 | * Added all vignettes to `.Rbuildignore`, only available on website now 122 | * Fixed `lm_robust_helper.cpp` algorithm to not catch own exception and to deal with `valgrind` memory errors 123 | * Bugfix where passing a formula as an object within a function would fail 124 | * Simplified some tests for various CRAN test platforms 125 | 126 | # estimatr 0.2.0 127 | 128 | * First **CRAN** upload 129 | -------------------------------------------------------------------------------- /R/RcppExports.R: -------------------------------------------------------------------------------- 1 | # Generated by using Rcpp::compileAttributes() -> do not edit by hand 2 | # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 3 | 4 | ht_covar_partial <- function(y1, y0, p10, p1, p0) { 5 | .Call(`_estimatr_ht_covar_partial`, y1, y0, p10, p1, p0) 6 | } 7 | 8 | ht_var_partial <- function(y, p) { 9 | .Call(`_estimatr_ht_var_partial`, y, p) 10 | } 11 | 12 | demeanMat2 <- function(what, fes, weights, start_col, eps) { 13 | .Call(`_estimatr_demeanMat2`, what, fes, weights, start_col, eps) 14 | } 15 | 16 | AtA <- function(A) { 17 | .Call(`_estimatr_AtA`, A) 18 | } 19 | 20 | Kr <- function(A, B) { 21 | .Call(`_estimatr_Kr`, A, B) 22 | } 23 | 24 | lm_solver <- function(X, y, try_cholesky) { 25 | .Call(`_estimatr_lm_solver`, X, y, try_cholesky) 26 | } 27 | 28 | lm_variance <- function(X, Xunweighted, XtX_inv, ei, weight_mean, cluster, J, ci, se_type, which_covs, fe_rank) { 29 | .Call(`_estimatr_lm_variance`, X, Xunweighted, XtX_inv, ei, weight_mean, cluster, J, ci, se_type, which_covs, fe_rank) 30 | } 31 | 32 | naomitwhy <- function(df, recursive_subset) { 33 | .Call(`_estimatr_naomitwhy`, df, recursive_subset) 34 | } 35 | 36 | -------------------------------------------------------------------------------- /R/S3_confint.R: -------------------------------------------------------------------------------- 1 | confint_lm_like <- function(object, 2 | parm = NULL, 3 | level = NULL, 4 | ...) { 5 | cis <- get_ci_mat(object, level) 6 | 7 | if (!is.null(parm)) { 8 | cis <- cis[parm, , drop = FALSE] 9 | } 10 | 11 | return(cis) 12 | } 13 | 14 | #' @export 15 | confint.lm_robust <- confint_lm_like 16 | 17 | #' @export 18 | confint.iv_robust <- confint_lm_like 19 | 20 | #' @importFrom stats confint 21 | #' @export 22 | confint.lh_robust <- function(object, parm = NULL, level = NULL, ...) { 23 | rbind(confint(object$lm_robust, parm = parm, level = level, ...), tidy(object$lh, ...)) 24 | } 25 | 26 | #' @export 27 | confint.lh <- confint_lm_like 28 | 29 | #' @export 30 | confint.difference_in_means <- function(object, 31 | parm = NULL, 32 | level = NULL, 33 | ...) { 34 | cis <- get_ci_mat(object, level) 35 | 36 | return(cis) 37 | } 38 | 39 | #' @export 40 | confint.horvitz_thompson <- function(object, 41 | parm = NULL, 42 | level = NULL, 43 | ...) { 44 | cis <- get_ci_mat(object, level, ttest = FALSE) 45 | 46 | return(cis) 47 | } 48 | 49 | 50 | ## internal method that builds confidence intervals and labels the matrix to be returned 51 | get_ci_mat <- function(object, level, ttest = TRUE) { 52 | if (!is.null(level)) { 53 | if (!is.null(object[["alpha"]])) { 54 | object[["alpha"]] <- NULL 55 | } 56 | object <- add_cis_pvals(object, alpha = 1 - level, ci = TRUE, ttest = ttest) 57 | } else { 58 | level <- 1 - object$alpha 59 | } 60 | 61 | cis <- cbind( 62 | as.vector(object$conf.low), 63 | as.vector(object$conf.high) 64 | ) 65 | 66 | if (is.matrix(object$conf.low)) { 67 | ny <- ncol(object$conf.low) 68 | p <- nrow(object$conf.low) 69 | rownames(cis) <- paste0( 70 | rep(object$outcome, each = p), 71 | ":", 72 | rep(object$term, times = ny) 73 | ) 74 | } else { 75 | rownames(cis) <- object$term 76 | } 77 | 78 | colnames(cis) <- paste((1 - level) / 2 * c(100, -100) + c(0, 100), "%") 79 | 80 | return(cis) 81 | } 82 | -------------------------------------------------------------------------------- /R/S3_emmeans.R: -------------------------------------------------------------------------------- 1 | ### Support for emmeans package 2 | # 3 | # Note: the recover_data and emm_basis methods are registered dynamically 4 | # (see zzz.R). So these functions are not exported 5 | 6 | #' @importFrom utils getS3method 7 | 8 | recover_data.lm_robust <- function(object, ...) { 9 | data <- getS3method("recover_data", "lm")(object, ...) 10 | if (object$rank < object$k) # rank-deficient. Need to pass dataset to emm_basis 11 | attr(data, "pass.it.on") <- TRUE 12 | data 13 | } 14 | 15 | emm_basis.lm_robust <- function(object, trms, xlev, grid, ...) { 16 | # coef() works right for lm but coef.aov tosses out NAs 17 | bhat <- coef(object) 18 | n.mult <- ifelse(is.matrix(bhat), ncol(bhat), 1) # columns in mult response 19 | m <- suppressWarnings(model.frame(trms, grid, na.action = na.pass, xlev = xlev)) 20 | X <- model.matrix(trms, m, contrasts.arg = object$contrasts) 21 | V <- emmeans::.my.vcov(object, ...) 22 | 23 | if (!anyNA(bhat)) 24 | nbasis <- estimability::all.estble 25 | else { 26 | desmat <- model.matrix(trms, data = attr(object, "data")) 27 | nbasis <- estimability::nonest.basis(desmat) 28 | } 29 | misc <- list() 30 | if (n.mult > 1) { # multivariate case. Need to expand some matrices 31 | eye <- diag(n.mult) 32 | X <- kronecker(eye, X) 33 | nbasis <- kronecker(eye, nbasis) 34 | if(is.null(colnames(bhat))) 35 | colnames(bhat) <- seq_len(n.mult) 36 | misc$ylevs <- list(rep.meas = colnames(bhat)) 37 | bhat <- as.numeric(bhat) # stretch coefs into a vector 38 | } 39 | dfargs <- list(df = object$df.residual) 40 | dffun <- function(k, dfargs) dfargs$df 41 | list(X = X, bhat = bhat, nbasis = nbasis, V = V, 42 | dffun = dffun, dfargs = dfargs, misc = misc) 43 | } 44 | -------------------------------------------------------------------------------- /R/S3_glance.R: -------------------------------------------------------------------------------- 1 | # Helpers to retrieve values 2 | retrieve_value <- function(x, what) if(exists(what, x)) x[[what]] else NA_real_ 3 | retrieve_fstatistic <- function(x) { 4 | if (exists("fstatistic", x)) { 5 | data.frame( 6 | statistic = x[["fstatistic"]][1], 7 | p.value = pf( 8 | x[["fstatistic"]][1], 9 | x[["fstatistic"]][2], 10 | x[["fstatistic"]][3], 11 | lower.tail = FALSE 12 | ) 13 | ) 14 | } else { 15 | data.frame(statistic = NA_real_, p.value = NA_real_) 16 | } 17 | } 18 | 19 | #' @importFrom generics glance 20 | #' @export 21 | generics::glance 22 | 23 | #' Glance at an estimatr object 24 | #' @name estimatr_glancers 25 | #' @templateVar class lm_robust 26 | #' @return For \code{glance.lm_robust}, a data.frame with columns: 27 | #' \item{r.squared}{the \eqn{R^2}, 28 | #' \deqn{R^2 = 1 - Sum(e[i]^2) / Sum((y[i] - y^*)^2),} where \eqn{y^*} 29 | #' is the mean of \eqn{y[i]} if there is an intercept and zero otherwise, 30 | #' and \eqn{e[i]} is the ith residual.} 31 | #' \item{adj.r.squared}{the \eqn{R^2} but penalized for having more parameters, \code{rank}} 32 | #' \item{se_type}{the standard error type specified by the user} 33 | #' \item{statistic}{the value of the F-statistic} 34 | #' \item{p.value}{p-value from the F test} 35 | #' \item{df.residual}{residual degrees of freedom} 36 | #' \item{nobs}{the number of observations used} 37 | #' 38 | #' @param x An object returned by one of the estimators 39 | #' @param ... extra arguments (not used) 40 | #' 41 | #' @export 42 | #' @family estimatr glancers 43 | #' @seealso [generics::glance()], [estimatr::lm_robust()], [estimatr::lm_lin()], [estimatr::iv_robust()], [estimatr::difference_in_means()], [estimatr::horvitz_thompson()] 44 | #' @md 45 | glance.lm_robust <- function(x, ...) { 46 | 47 | if (length(x[["outcome"]]) > 1) { 48 | stop("Cannot use `glance` on linear models with multiple responses.") 49 | } 50 | 51 | ret <- cbind( 52 | data.frame( 53 | r.squared = x[["r.squared"]], 54 | adj.r.squared = x[["adj.r.squared"]] 55 | ), 56 | retrieve_fstatistic(x), 57 | data.frame( 58 | df.residual = x[["df"]][1], 59 | nobs = as.integer(x[["nobs"]]), 60 | se_type = x[["se_type"]], 61 | stringsAsFactors = FALSE 62 | ) 63 | ) 64 | 65 | rownames(ret) <- NULL 66 | 67 | ret 68 | } 69 | 70 | #' @rdname estimatr_glancers 71 | #' @templateVar class lh_robust 72 | #' @return For \code{glance.lh_robust}, we glance the \code{lm_robust} component only. You can access the linear hypotheses as a data.frame directy from the \code{lh} component of the \code{lh_robust} object 73 | #' 74 | #' @export 75 | #' @family estimatr glancers 76 | glance.lh_robust <- function(x, ...) { 77 | glance(x[["lm_robust"]]) 78 | } 79 | 80 | #' @rdname estimatr_glancers 81 | #' @templateVar class iv_robust 82 | #' @return For \code{glance.iv_robust}, a data.frame with columns: 83 | #' \item{r.squared}{The \eqn{R^2} of the second stage regression} 84 | #' \item{adj.r.squared}{The \eqn{R^2} but penalized for having more parameters, \code{rank}} 85 | #' \item{df.residual}{residual degrees of freedom} 86 | #' \item{N}{the number of observations used} 87 | #' \item{se_type}{the standard error type specified by the user} 88 | #' \item{statistic}{the value of the F-statistic} 89 | #' \item{p.value}{p-value from the F test} 90 | #' \item{statistic.weakinst}{the value of the first stage F-statistic, useful for the weak instruments test; only reported if there is only one endogenous variable} 91 | #' \item{p.value.weakinst}{p-value from the first-stage F test, a test of weak instruments; only reported if there is only one endogenous variable} 92 | #' \item{statistic.endogeneity}{the value of the F-statistic for the test of endogeneity; often called the Wu-Hausman statistic, with robust standard errors, we employ the regression based test} 93 | #' \item{p.value.endogeneity}{p-value from the F-test for endogeneity} 94 | #' \item{statistic.overid}{the value of the chi-squared statistic for the test of instrument correlation with the error term; only reported with overidentification} 95 | #' \item{p.value.overid}{p-value from the chi-squared test; only reported with overidentification} 96 | #' 97 | #' @export 98 | #' @family estimatr glancers 99 | glance.iv_robust <- function(x, ...) { 100 | 101 | if (length(x[["outcome"]]) > 1) { 102 | stop("Cannot use `glance` on linear models with multiple responses.") 103 | } 104 | 105 | ret <- cbind( 106 | data.frame( 107 | r.squared = x[["r.squared"]], 108 | adj.r.squared = x[["adj.r.squared"]], 109 | df.residual = x[["df.residual"]], 110 | nobs = as.integer(x[["nobs"]]), 111 | se_type = x[["se_type"]], 112 | stringsAsFactors = FALSE 113 | ), 114 | retrieve_fstatistic(x), 115 | if (exists("diagnostic_first_stage_fstatistic", x) && length(x[["diagnostic_first_stage_fstatistic"]] == 4)) { 116 | data.frame( 117 | statistic.weakinst = x[["diagnostic_first_stage_fstatistic"]]["value"], 118 | p.value.weakinst = x[["diagnostic_first_stage_fstatistic"]]["p.value"] 119 | ) 120 | } else { 121 | data.frame(statistic.weakinst = NA_real_, p.value.weakinst = NA_real_) 122 | }, 123 | if (exists("diagnostic_endogeneity_test", x)) { 124 | data.frame( 125 | statistic.endogeneity = x[["diagnostic_endogeneity_test"]]["value"], 126 | p.value.endogeneity = x[["diagnostic_endogeneity_test"]]["p.value"] 127 | ) 128 | } else { 129 | data.frame(statistic.endogeneity = NA_real_, p.value.endogeneity = NA_real_) 130 | }, 131 | if (exists("diagnostic_overid_test", x)) { 132 | data.frame( 133 | statistic.overid = x[["diagnostic_overid_test"]]["value"], 134 | p.value.overid = x[["diagnostic_overid_test"]]["p.value"] 135 | ) 136 | } else { 137 | data.frame(statistic.overid = NA_real_, p.value.overid = NA_real_) 138 | } 139 | ) 140 | 141 | ret 142 | } 143 | 144 | #' @rdname estimatr_glancers 145 | #' @templateVar class difference_in_means 146 | #' @return For \code{glance.difference_in_means}, a data.frame with columns: 147 | #' \item{design}{the design used, and therefore the estimator used} 148 | #' \item{df}{the degrees of freedom} 149 | #' \item{nobs}{the number of observations used} 150 | #' \item{nblocks}{the number of blocks, if used} 151 | #' \item{nclusters}{the number of clusters, if used} 152 | #' \item{condition2}{the second, "treatment", condition} 153 | #' \item{condition1}{the first, "control", condition} 154 | #' 155 | #' @export 156 | #' @family estimatr glancers 157 | glance.difference_in_means <- function(x, ...) { 158 | data.frame( 159 | design = x[["design"]], 160 | df = x[["df"]], 161 | nobs = as.integer(x[["nobs"]]), 162 | nblocks = retrieve_value(x, "nblocks"), 163 | nclusters = retrieve_value(x, "nclusters"), 164 | condition2 = x[["condition2"]], 165 | condition1 = x[["condition1"]], 166 | stringsAsFactors = FALSE 167 | ) 168 | } 169 | 170 | #' @rdname estimatr_glancers 171 | #' @templateVar class horvitz_thompson 172 | #' @return For \code{glance.horvitz_thompson}, a data.frame with columns: 173 | #' \item{nobs}{the number of observations used} 174 | #' \item{se_type}{the type of standard error estimator used} 175 | #' \item{condition2}{the second, "treatment", condition} 176 | #' \item{condition1}{the first, "control", condition} 177 | #' 178 | #' @export 179 | #' @family estimatr glancers 180 | glance.horvitz_thompson <- function(x, ...) { 181 | data.frame( 182 | nobs = as.integer(x[["nobs"]]), 183 | se_type = x[["se_type"]], 184 | condition2 = x[["condition2"]], 185 | condition1 = x[["condition1"]], 186 | stringsAsFactors = FALSE 187 | ) 188 | } 189 | -------------------------------------------------------------------------------- /R/S3_nobs.R: -------------------------------------------------------------------------------- 1 | #' @export 2 | nobs.lm_robust <- function(object, ...) object$nobs 3 | 4 | #' @export 5 | nobs.lh_robust <- function(object, ...) object$nobs 6 | 7 | #' @export 8 | nobs.iv_robust <- function(object, ...) object$nobs 9 | 10 | #' @export 11 | nobs.summary.lm_robust <- nobs.lm_robust 12 | 13 | #' @export 14 | nobs.horvitz_thompson <- function(object, ...) object$nobs 15 | 16 | #' @export 17 | nobs.difference_in_means <- function(object, ...) object$nobs 18 | 19 | -------------------------------------------------------------------------------- /R/S3_print.R: -------------------------------------------------------------------------------- 1 | #' @export 2 | print.lm_robust <- function(x, ...) { 3 | print(summarize_tidy(x)) 4 | } 5 | 6 | #' @export 7 | print.iv_robust <- function(x, ...) { 8 | print(summarize_tidy(x)) 9 | } 10 | 11 | print_summary_lm_like <- function(x, 12 | digits, 13 | signif.stars = getOption("show.signif.stars"), 14 | ...) { 15 | cat( 16 | "\nCall:\n", 17 | paste(deparse(x$call, nlines = 5), sep = "\n", collapse = "\n"), 18 | "\n\n", 19 | sep = "" 20 | ) 21 | if (x$weighted) { 22 | cat("Weighted, ") 23 | } 24 | cat("Standard error type: ", x$se_type, "\n") 25 | 26 | if (x$rank < x$k) { 27 | singularities <- x$k - x$rank 28 | cat( 29 | "\nCoefficients: (", 30 | singularities, 31 | " not defined because the design matrix is rank deficient)\n", 32 | sep = "" 33 | ) 34 | } else { 35 | cat("\nCoefficients:\n") 36 | } 37 | 38 | print(coef(x), digits = digits) 39 | 40 | fstat <- if (is.numeric(x[["fstatistic"]])) { 41 | paste( 42 | "\nF-statistic:", formatC(x$fstatistic[1L], digits = digits), 43 | "on", x$fstatistic[2L], "and", x$fstatistic[3L], 44 | "DF, p-value:", 45 | format.pval(pf( 46 | x$fstatistic[1L], 47 | x$fstatistic[2L], 48 | x$fstatistic[3L], 49 | lower.tail = FALSE 50 | ), digits = digits) 51 | ) 52 | } else NULL 53 | 54 | cat( 55 | "\nMultiple R-squared: ", formatC(x$r.squared, digits = digits), 56 | ",\tAdjusted R-squared: ", formatC(x$adj.r.squared, digits = digits), 57 | fstat 58 | ) 59 | 60 | if (is.numeric(x[["proj_fstatistic"]])) { 61 | cat( 62 | "\nMultiple R-squared (proj. model): ", 63 | formatC(x$proj_r.squared, digits = digits), 64 | ",\tAdjusted R-squared (proj. model): ", 65 | formatC(x$proj_adj.r.squared, digits = digits), 66 | "\nF-statistic (proj. model):", 67 | formatC(x$proj_fstatistic[1L], digits = digits), 68 | "on", x$proj_fstatistic[2L], "and", x$proj_fstatistic[3L], 69 | "DF, p-value:", 70 | format.pval(pf( 71 | x$proj_fstatistic[1L], 72 | x$proj_fstatistic[2L], 73 | x$proj_fstatistic[3L], 74 | lower.tail = FALSE 75 | ), digits = digits) 76 | ) 77 | } 78 | cat("\n") 79 | 80 | if (is.numeric(x[["diagnostic_endogeneity_test"]])) { 81 | cat("\nDiagnostics:\n") 82 | printCoefmat( 83 | build_ivreg_diagnostics_mat(x), 84 | cs.ind = 1L:2L, 85 | tst.ind = 3L, 86 | has.Pvalue = TRUE, 87 | P.values = TRUE, 88 | digits = digits, 89 | signif.stars = signif.stars, 90 | na.print = "NA", 91 | ... 92 | ) 93 | } 94 | invisible(x) 95 | } 96 | 97 | #' @export 98 | print.summary.lm_robust <- function(x, 99 | digits = max(3L, getOption("digits") - 3L), 100 | signif.stars = getOption("show.signif.stars"), 101 | ...) { 102 | print_summary_lm_like(x, digits, ...) 103 | } 104 | 105 | #' @export 106 | print.summary.iv_robust <- function(x, 107 | digits = max(3L, getOption("digits") - 3L), 108 | signif.stars = getOption("show.signif.stars"), 109 | ...) { 110 | print_summary_lm_like(x, digits, signif.stars, ...) 111 | } 112 | 113 | #' @export 114 | print.difference_in_means <- function(x, ...) { 115 | cat("Design: ", x$design, "\n") 116 | print(summarize_tidy(x)) 117 | } 118 | 119 | 120 | #' @export 121 | print.horvitz_thompson <- function(x, ...) { 122 | print(summarize_tidy(x)) 123 | } 124 | 125 | #' @export 126 | print.lh <- function(x, ...) { 127 | print(summary(x)) 128 | } 129 | 130 | #' @export 131 | print.lh_robust <- function(x, ...) { 132 | lnames <- names(x) 133 | for (i in seq_along(x)) { 134 | cat("$", lnames[i], "\n", sep = "") 135 | print(x[[i]]) 136 | cat("\n") 137 | } 138 | invisible(x) 139 | } 140 | 141 | #' @export 142 | print.summary.lh_robust <- function(x, 143 | digits = max(3L, getOption("digits") - 3L), 144 | ...){ 145 | lnames <- names(x) 146 | for (i in seq_along(x)) { 147 | cat("$", lnames[i], "\n", sep = "") 148 | print(summary(x[[i]]), digits = digits) 149 | cat("\n") 150 | } 151 | } 152 | 153 | #' @export 154 | print.summary.lh <- function(x, 155 | digits = max(3L, getOption("digits") - 3L), 156 | ...){ 157 | class(x) <- NULL 158 | print(x, digits = digits) 159 | } 160 | -------------------------------------------------------------------------------- /R/S3_summary.R: -------------------------------------------------------------------------------- 1 | #' @export 2 | summary.lm_robust <- function(object, ...) { 3 | if (is.matrix(coef(object))) { 4 | ny <- ncol(coef(object)) 5 | 6 | ret <- setNames( 7 | vector("list", ny), 8 | paste("Response", object$outcome) 9 | ) 10 | 11 | mat_objs <- c( 12 | "coefficients", 13 | "std.error", 14 | "statistic", 15 | "df", 16 | "conf.low", 17 | "conf.high", 18 | "p.value" 19 | ) 20 | 21 | vec_objs <- c( 22 | "outcome", 23 | "r.squared", 24 | "adj.r.squared", 25 | "res_var" 26 | ) 27 | 28 | all_models <- object 29 | 30 | for (i in seq(ny)) { 31 | for (nm in names(object)) { 32 | if (nm %in% mat_objs) { 33 | object[[nm]] <- all_models[[nm]][, i, drop = TRUE] 34 | } else if (nm %in% vec_objs) { 35 | object[[nm]] <- all_models[[nm]][i] 36 | } else if (nm == "fstatistic") { 37 | object[[nm]] <- all_models[[nm]][c(i, ny + 1:2)] 38 | } 39 | } 40 | object$call$formula[[2L]] <- object$terms[[2L]] <- 41 | as.name(all_models$outcome[i]) 42 | ret[[i]] <- summary(object, ...) 43 | } 44 | 45 | class(ret) <- "listof" 46 | } else { 47 | ret <- summary_lm_model(object) 48 | } 49 | 50 | ret 51 | } 52 | 53 | #' @export 54 | summary.iv_robust <- function(object, ...) { 55 | summary_lm_model(object) 56 | } 57 | 58 | #' @export 59 | summary.lh_robust <- function(object,...){ 60 | class(object) <- "summary.lh_robust" 61 | object 62 | } 63 | 64 | #' @export 65 | summary.lh <- function(object,...){ 66 | summary_lh_object <- summarize_tidy(simplify_lh_outcome(object)) 67 | class(summary_lh_object) <- "summary.lh" 68 | summary_lh_object 69 | } 70 | 71 | 72 | summary_lm_model <- function(object) { 73 | 74 | out_values <- c( 75 | "call", 76 | "k", 77 | "rank", 78 | "df.residual", 79 | "res_var", 80 | "weighted", 81 | "se_type", 82 | "fes", 83 | "r.squared", 84 | "adj.r.squared", 85 | "fstatistic" 86 | ) 87 | # Different returns if fixed effects in the output 88 | if (object[["fes"]]) { 89 | out_values <- c( 90 | out_values, 91 | "proj_r.squared", 92 | "proj_adj.r.squared", 93 | "proj_fstatistic" 94 | ) 95 | } 96 | 97 | # Different returns if fixed effects in the output 98 | if (is.numeric(object[["diagnostic_endogeneity_test"]])) { 99 | out_values <- c( 100 | out_values, 101 | "diagnostic_first_stage_fstatistic", 102 | "diagnostic_endogeneity_test", 103 | "diagnostic_overid_test" 104 | ) 105 | } 106 | 107 | return_list <- object[out_values] 108 | 109 | # Split into two lists if multivariate linear model 110 | 111 | return_list[["coefficients"]] <- summarize_tidy(object) 112 | return_list[["nobs"]] <- nobs(object) 113 | 114 | class(return_list) <- "summary.lm_robust" 115 | return(return_list) 116 | } 117 | 118 | 119 | #' @export 120 | summary.difference_in_means <- function(object, ...) { 121 | return(list( 122 | coefficients = summarize_tidy(object), 123 | design = object$design 124 | )) 125 | } 126 | 127 | 128 | #' @export 129 | summary.horvitz_thompson <- function(object, ...) { 130 | return(list(coefficients = summarize_tidy(object, "z"))) 131 | } 132 | 133 | summarize_tidy <- function(object, test = "t", ...) { 134 | remove_cols <- c("term", "outcome") 135 | 136 | # Ugly so that summary(fit)$coefficients matches lm() 137 | tidy_out <- tidy(object, ...) 138 | colnames(tidy_out)[2:8] <- 139 | c( 140 | "Estimate", 141 | "Std. Error", 142 | paste0(test, " value"), 143 | paste0("Pr(>|", test, "|)"), 144 | "CI Lower", 145 | "CI Upper", 146 | "DF" 147 | ) 148 | tidy_mat <- as.matrix(tidy_out[, !(names(tidy_out) %in% remove_cols)]) 149 | 150 | ny <- length(object$outcome) 151 | p <- length(object$term) 152 | if (ny > 1) { 153 | rownames(tidy_mat) <- paste0( 154 | rep(object$outcome, each = p), 155 | ":", 156 | rep(object$term, times = ny) 157 | ) 158 | } else { 159 | rownames(tidy_mat) <- object$term 160 | } 161 | 162 | return(tidy_mat) 163 | } 164 | -------------------------------------------------------------------------------- /R/S3_tidy.R: -------------------------------------------------------------------------------- 1 | #' @importFrom generics tidy 2 | #' @export 3 | generics::tidy 4 | 5 | tidy_data_frame <- function(x, 6 | conf.int = TRUE, 7 | conf.level = NULL, 8 | ...) { 9 | vec_cols <- c( 10 | "coefficients", 11 | "std.error", 12 | "statistic", 13 | "p.value", 14 | "conf.low", 15 | "conf.high", 16 | "df" 17 | ) 18 | 19 | if(!conf.int){ 20 | vec_cols <- c( 21 | "coefficients", 22 | "std.error", 23 | "statistic", 24 | "p.value", 25 | "df" 26 | ) 27 | } 28 | 29 | tidy_mat <- do.call("cbind", lapply(x[vec_cols], as.vector)) 30 | vec_cols[vec_cols == "coefficients"] <- "estimate" 31 | colnames(tidy_mat) <- vec_cols 32 | 33 | return_frame <- data.frame( 34 | term = x[["term"]], 35 | tidy_mat, 36 | outcome = rep(x[["outcome"]], each = length(x[["term"]])), 37 | stringsAsFactors = FALSE 38 | ) 39 | 40 | rownames(return_frame) <- NULL 41 | 42 | if(!is.null(conf.level) && conf.int){ 43 | ci <- stats::confint(x, level = conf.level, ...) 44 | if (all(row.names(ci) == return_frame$term)) { 45 | return_frame$conf.low <- ci[, 1] 46 | return_frame$conf.high <- ci[, 2] 47 | } 48 | } 49 | return(return_frame) 50 | } 51 | 52 | warn_singularities <- function(x) { 53 | if (x$rank < x$k) { 54 | singularities <- x$k - x$rank 55 | what <- ifelse(singularities > 1, " coefficients ", " coefficient ") 56 | message( 57 | singularities, what, 58 | " not defined because the design matrix is rank deficient\n" 59 | ) 60 | } 61 | } 62 | 63 | #' Tidy an estimatr object 64 | #' @name estimatr_tidiers 65 | #' @templateVar class lm_robust 66 | #' @return A data.frame with columns for coefficient names, estimates, standard 67 | #' errors, confidence intervals, p-values, degrees of freedom, and the 68 | #' name of the outcome variable 69 | #' 70 | #' @param x An object returned by one of the estimators 71 | #' @param conf.int Logical indicating whether or not to include a 72 | #' confidence interval in the tidied output. Defaults to ‘TRUE’. 73 | #' @param conf.level The confidence level to use for the confidence 74 | #' interval if ‘conf.int = TRUE’. Must be strictly greater than 0 and less 75 | #' than 1. Defaults to 0.95, which corresponds to a 95 percent confidence 76 | #' interval. 77 | #' @param ... extra arguments (not used) 78 | #' 79 | #' @export 80 | #' @family estimatr tidiers 81 | #' @seealso [generics::tidy()], [estimatr::lm_robust()], [estimatr::iv_robust()], [estimatr::difference_in_means()], [estimatr::horvitz_thompson()] 82 | #' @md 83 | tidy.lm_robust <- function(x, 84 | conf.int = TRUE, 85 | conf.level = NULL, 86 | ...) { 87 | warn_singularities(x) 88 | tidy_data_frame(x, conf.int = conf.int, conf.level = conf.level, ...) 89 | } 90 | 91 | #' @rdname estimatr_tidiers 92 | #' @templateVar class iv_robust 93 | #' 94 | #' @export 95 | #' @family estimatr tidiers 96 | tidy.iv_robust <- function(x, conf.int = TRUE, conf.level = NULL, ...) { 97 | warn_singularities(x) 98 | tidy_data_frame(x, conf.int = conf.int, conf.level = conf.level, ...) 99 | } 100 | 101 | #' @rdname estimatr_tidiers 102 | #' @templateVar class difference_in_means 103 | #' 104 | #' @export 105 | #' @family estimatr tidiers 106 | tidy.difference_in_means <- tidy_data_frame 107 | 108 | #' @rdname estimatr_tidiers 109 | #' @templateVar class horvitz_thompson 110 | #' 111 | #' @export 112 | #' @family estimatr tidiers 113 | tidy.horvitz_thompson <- tidy_data_frame 114 | 115 | #' @rdname estimatr_tidiers 116 | #' @templateVar class lh_robust 117 | #' 118 | #' @export 119 | #' @family estimatr tidiers 120 | tidy.lh_robust <- function(x, 121 | conf.int = TRUE, 122 | conf.level = NULL, 123 | ...) { 124 | rbind(tidy(x$lm_robust, conf.int = conf.int, conf.level = conf.level, ...), 125 | tidy(x$lh, conf.int = conf.int, conf.level = conf.level, ...)) 126 | } 127 | 128 | #' @rdname estimatr_tidiers 129 | #' @templateVar class lh 130 | #' 131 | #' @export 132 | #' @family estimatr tidiers 133 | tidy.lh <- function(x, 134 | conf.int = TRUE, 135 | conf.level = NULL, 136 | ...) { 137 | tidy_data_frame(simplify_lh_outcome(x), conf.int = conf.int, conf.level = conf.level, ...) 138 | } 139 | 140 | # Simplifies the `lh` outcome column for tidy.lh and print.lh 141 | simplify_lh_outcome <- function(x) { 142 | x_list <- as.list(x) 143 | x_list[["outcome"]] <- unique(x_list[["outcome"]]) 144 | class(x_list) <- "lh" 145 | x_list 146 | } 147 | 148 | -------------------------------------------------------------------------------- /R/S3_update.R: -------------------------------------------------------------------------------- 1 | #' @importFrom Formula Formula 2 | #' @importFrom stats getCall 3 | #' @export 4 | update.iv_robust <- function(object, formula., ..., evaluate = TRUE) { 5 | if (is.null(call <- getCall(object))) 6 | stop("need an object with call component") 7 | extras <- match.call(expand.dots = FALSE)$... 8 | if (!missing(formula.)) 9 | call$formula <- formula(update(Formula(formula(object)), formula.)) 10 | if (length(extras)) { 11 | existing <- !is.na(match(names(extras), names(call))) 12 | for (a in names(extras)[existing]) call[[a]] <- extras[[a]] 13 | if (any(!existing)) { 14 | call <- c(as.list(call), extras[!existing]) 15 | call <- as.call(call) 16 | } 17 | } 18 | if (evaluate) 19 | eval(call, parent.frame()) 20 | else call 21 | } 22 | -------------------------------------------------------------------------------- /R/S3_vcov.R: -------------------------------------------------------------------------------- 1 | #' @export 2 | vcov.lm_robust <- function(object, complete = TRUE, ...) { 3 | vcov_simple(object, complete = complete) 4 | } 5 | 6 | #' @export 7 | vcov.iv_robust <- vcov.lm_robust 8 | 9 | #' @export 10 | vcov.difference_in_means <- function(object, ...) { 11 | return(object$vcov) 12 | } 13 | 14 | #' @export 15 | vcov.horvitz_thompson <- vcov.difference_in_means 16 | 17 | 18 | # Helper function for extracting vcov when it is just an element in the object list 19 | vcov_simple <- function(object, complete) { 20 | if (is.null(object$vcov)) { 21 | stop( 22 | "Object must have vcov matrix. Try setting `return_vcov = TRUE` in ", 23 | "the estimator function." 24 | ) 25 | } 26 | if (complete && (object$rank < object$k)) { 27 | vc <- matrix(NA_real_, object$k, object$k, 28 | dimnames = list(object$term, object$term)) 29 | j <- which(!is.na(coef(object, complete = TRUE))) 30 | vc[j, j] <- object$vcov 31 | return(vc) 32 | } else { 33 | return(object$vcov) 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /R/data.R: -------------------------------------------------------------------------------- 1 | #' Replication data for Lin 2013 2 | #' 3 | #' A dataset containing the data to replicate: 4 | #' Lin, Winston. 2013. "Agnostic notes on regression adjustments to experimental 5 | #' data: Reexamining Freedman's critique." The Annals of Applied Statistics. 6 | #' Stat. 7(1): 295-318. doi:10.1214/12-AOAS583. 7 | #' https://projecteuclid.org/euclid.aoas/1365527200. 8 | #' 9 | #' This data was originally taken from the following paper, subset to men who 10 | #' showed up to college, were in one of the arms with the support condition, 11 | #' and had GPA data for their first year in college. 12 | #' 13 | #' Angrist, Joshua, Daniel Lang, and Philip Oreopoulos. 2009. "Incentives and 14 | #' Services for College Achievement: Evidence from a Randomized Trial." American 15 | #' Economic Journal: Applied Economics 1(1): 136-63. 16 | #' https://www.aeaweb.org/articles?id=10.1257/app.1.1.136 17 | #' 18 | #' @format A data frame with educational treatments and outcomes: 19 | #' \describe{ 20 | #' \item{gpa0}{high school GPA} 21 | #' \item{sfsp}{financial incentives and support treatment} 22 | #' \item{ssp}{support only treatment} 23 | #' \item{GPA_year1}{college GPA year 1} 24 | #' \item{GPA_year2}{college GPA year 2} 25 | #' } 26 | #' @source \url{https://www.aeaweb.org/articles?id=10.1257/app.1.1.136} 27 | "alo_star_men" 28 | -------------------------------------------------------------------------------- /R/estimatr.R: -------------------------------------------------------------------------------- 1 | #' estimatr 2 | #' 3 | #' @description Fast procedures for small set of commonly-used, design-appropriate estimators with robust standard errors and confidence intervals. Includes estimators for linear regression, instrumental variables regression, difference-in-means, Horvitz-Thompson estimation, and regression improving precision of experimental estimates by interacting treatment with centered pre-treatment covariates introduced by Lin (2013) . 4 | #' 5 | #' @docType package 6 | #' @useDynLib estimatr, .registration = TRUE 7 | #' @importFrom Rcpp evalCpp 8 | #' @importFrom stats sd var model.matrix.default pt qt var weighted.mean lm 9 | #' vcov model.frame.default model.response complete.cases terms reformulate 10 | #' update model.extract setNames delete.response .checkMFClasses model.frame 11 | #' model.matrix na.pass nobs coef pf .getXlevels df.residual fitted.values 12 | #' formula model.matrix.lm resid weights lm.fit na.omit pchisq printCoefmat 13 | #' residuals 14 | #' @importFrom methods setGeneric setMethod isGeneric className 15 | #' @importFrom Formula as.Formula 16 | #' @importFrom rlang enquos enquo eval_tidy quo_get_expr quo_set_expr quo_is_missing sym quo 17 | #' @name estimatr 18 | "_PACKAGE" 19 | -------------------------------------------------------------------------------- /R/estimatr_lh_robust.R: -------------------------------------------------------------------------------- 1 | #' Linear Hypothesis for Ordinary Least Squares with Robust Standard Errors 2 | #' 3 | #' @description This function fits a linear model with robust standard errors and performs linear hypothesis test. 4 | #' @param ... Other arguments to be passed to \code{\link{lm_robust}} 5 | #' @param data A \code{data.frame} 6 | #' @param linear_hypothesis A character string or a matrix specifying combination, to be passed to the hypothesis.matrix argument of car::linearHypothesis 7 | #' See \code{\link[car]{linearHypothesis}} for more details. 8 | #' @details 9 | #' 10 | #' This function is a wrapper for \code{\link{lm_robust}} and for 11 | #' \code{\link[car]{linearHypothesis}}. It first runs \code{lm_robust} and 12 | #' next passes \code{"lm_robust"} object as an argument to \code{linearHypothesis}. 13 | #' 14 | #' @return An object of class \code{"lh_robust"} containing the two following components: 15 | #' 16 | #' \item{lm_robust}{an object as returned by \code{lm_robust}.} 17 | #' \item{lh}{A data frame with most of its columns pulled from \code{linearHypothesis}' output.} 18 | #' 19 | #' The only analyis directly performed by \code{lh_robust} is a \code{t-test} for the null hypothesis of no effects of the linear combination of coefficients as specified by the user. 20 | #' All other output components are either extracted from \code{linearHypothesis} or \code{lm_robust}. 21 | #' 22 | #' The original output returned by \code{linearHypothesis} is added as an attribute under the \code{"linear_hypothesis"} attribute. 23 | #' 24 | #' @examples 25 | #' 26 | #' library(fabricatr) 27 | #' dat <- fabricate( 28 | #' N = 40, 29 | #' y = rpois(N, lambda = 4), 30 | #' x = rnorm(N), 31 | #' z = rbinom(N, 1, prob = 0.4), 32 | #' clusterID = sample(1:4, 40, replace = TRUE) 33 | #' ) 34 | #' 35 | #' # Default variance estimator is HC2 robust standard errors 36 | #' lhro <- lh_robust(y ~ x + z, data = dat, linear_hypothesis = "z + 2x = 0") 37 | #' 38 | #' # The linear hypothesis argument can be specified equivalently as: 39 | #' lh_robust(y ~ x + z, data = dat, linear_hypothesis = "z = 2x") 40 | #' lh_robust(y ~ x + z, data = dat, linear_hypothesis = "2*x +1*z") 41 | #' lh_robust(y ~ x + z, data = dat, linear_hypothesis = "z + 2x = 0") 42 | #' 43 | #' # Also recovers other sorts of standard erorrs just as specified in \code{\link{lm_robust}} 44 | #' lh_robust(y ~ x + z, data = dat, linear_hypothesis = "z + 2x = 0", se_type = "classical") 45 | #' lh_robust(y ~ x + z, data = dat, linear_hypothesis = "z + 2x = 0", se_type = "HC1") 46 | #' 47 | #' # Can tidy() main output and subcomponents in to a data.frame 48 | #' lhro <- lh_robust(y ~ x + z, data = dat, linear_hypothesis = "z + 2x = 0") 49 | #' tidy(lhro ) 50 | #' tidy(lhro$lm_robust) 51 | #' tidy(lhro$lh) 52 | #' 53 | #' # Can use summary() to get more statistics on the main output and subcomponents. 54 | #' summary(lhro) 55 | #' summary(lhro$lm_robust) 56 | #' summary(lhro$lh) 57 | #' 58 | #' @importFrom rlang quos eval_tidy 59 | #' 60 | #' @export 61 | #' 62 | lh_robust <- function(..., data, linear_hypothesis) { 63 | 64 | requireNamespace("car") 65 | 66 | # fit lm_robust model 67 | lm_robust_fit <- lm_robust(..., data = data) 68 | 69 | alpha <- eval_tidy(quos(...)$alpha) 70 | if (is.null(alpha)) { 71 | alpha <- 0.05 72 | } 73 | 74 | # calculate linear hypothesis 75 | car_lht <- car::linearHypothesis( 76 | lm_robust_fit, hypothesis.matrix = linear_hypothesis, level = 1 - alpha) 77 | 78 | estimate <- drop(attr(car_lht, "value")) 79 | std.error <- sqrt(diag(attr(car_lht, "vcov"))) 80 | 81 | # this df is not in general correct, but unclear what to replace it with 82 | df <- lm_robust_fit$df.residual 83 | 84 | statistic <- estimate / std.error 85 | p.value <- 2 * pt(abs(statistic), df, lower.tail = FALSE) 86 | ci <- estimate + std.error %o% qt(c(alpha / 2, 1 - alpha / 2), df) 87 | 88 | return_lh_robust <- data.frame( 89 | coefficients = estimate, 90 | std.error = std.error, 91 | statistic = statistic, 92 | p.value = p.value, 93 | alpha = alpha, 94 | conf.low = ci[, 1], 95 | conf.high = ci[, 2], 96 | df = df, 97 | term = linear_hypothesis, 98 | outcome = lm_robust_fit$outcome 99 | ) 100 | 101 | attr(return_lh_robust, "linear_hypothesis") <- car_lht 102 | class(return_lh_robust) <- c("lh", "data.frame") 103 | 104 | return_lm_robust <- lm_robust_fit 105 | return_lm_robust[["call"]] <- match.call() 106 | 107 | return(structure( 108 | list(lm_robust = return_lm_robust, lh = return_lh_robust), 109 | class = "lh_robust" 110 | )) 111 | 112 | } 113 | -------------------------------------------------------------------------------- /R/helper_cis_pvals.R: -------------------------------------------------------------------------------- 1 | # Internal method takes the results and adds p-values and confidence intervals 2 | add_cis_pvals <- function(return_frame, alpha, ci, ttest = TRUE) { 3 | if (ci) { 4 | if (alpha <= 0 || alpha >= 1) { 5 | stop("`alpha` must be numeric between 0 and 1") 6 | } 7 | 8 | return_frame$statistic <- with(return_frame, coefficients / std.error) 9 | 10 | if (ttest) { 11 | if (any(return_frame$df <= 0, na.rm = TRUE)) { 12 | warning( 13 | "Some degrees of freedom have been estimated as negative or zero\n", 14 | "p-values and confidence intervals may not be calculated" 15 | ) 16 | 17 | return_frame$df <- ifelse(return_frame$df <= 0, NA, return_frame$df) 18 | } 19 | 20 | return_frame$p.value <- with( 21 | return_frame, 22 | 2 * pt(abs(statistic), df = df, lower.tail = FALSE) 23 | ) 24 | 25 | crit_se <- with(return_frame, qt(1 - alpha / 2, df = df) * std.error) 26 | } else { 27 | return_frame$p.value <- with( 28 | return_frame, 29 | 2 * pnorm(abs(statistic), lower.tail = FALSE) 30 | ) 31 | 32 | crit_se <- with(return_frame, qnorm(1 - alpha / 2) * std.error) 33 | 34 | return_frame$df <- NA 35 | } 36 | 37 | return_frame$conf.low <- with(return_frame, coefficients - crit_se) 38 | return_frame$conf.high <- with(return_frame, coefficients + crit_se) 39 | 40 | return(as.list(return_frame)) 41 | } else { 42 | return_frame$p.value <- NA 43 | return_frame$statistic <- NA 44 | return_frame$conf.low <- NA 45 | return_frame$conf.high <- NA 46 | 47 | return(as.list(return_frame)) 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /R/helper_clean_model_data.R: -------------------------------------------------------------------------------- 1 | # library(estimatr) 2 | # f <- function(w) { 3 | # dat <- data.frame(x = rnorm(10), y = rnorm(10)) 4 | # lm_robust(y ~ x, data = dat, w = w) 5 | # } 6 | # f(NULL) 7 | # f(1:10) 8 | 9 | 10 | # Internal method to process data 11 | #' @importFrom rlang f_rhs %||% 12 | clean_model_data <- function(data, datargs, estimator = "") { 13 | 14 | # if data exists, evaluate it 15 | data <- if (quo_is_missing(data)) NULL else eval_tidy(data) 16 | 17 | if (getOption("estimatr.debug.clean_model_data", FALSE)) browser() 18 | 19 | mfargs <- Filter(Negate(quo_is_missing), datargs) 20 | 21 | m_formula <- eval_tidy(mfargs[["formula"]]) 22 | m_formula_env <- environment(m_formula) 23 | 24 | stopifnot("`formula` argument must be a formula"=inherits(m_formula, "formula")) 25 | 26 | # From this point on we never use the environment of anything 27 | # in mfargs as we always evaluate in `data` explicitly 28 | # Therefore we can just change it to a list that can take 29 | # expressions without environments attached to them 30 | mfargs <- as.list(mfargs) 31 | 32 | args_ignored <- c("fixed_effects", "se_type") 33 | # For each ... that would go to model.fram .default, early eval, 34 | # save to formula env, and point to it 35 | # subset is also non-standard eval 36 | to_process <- setdiff( 37 | names(mfargs), 38 | c( 39 | setdiff(names(formals(stats::model.frame.default)), "subset"), 40 | args_ignored 41 | ) 42 | ) 43 | 44 | for (da in to_process) { 45 | name <- sprintf(".__%s%%%d__", da, sample.int(.Machine$integer.max, 1)) 46 | m_formula_env[[name]] <- eval_tidy(mfargs[[da]], data = data) 47 | mfargs[[da]] <- sym(name) 48 | } 49 | 50 | if ("fixed_effects" %in% names(mfargs)) { 51 | name <- sprintf(".__fixed_effects%%%d__", sample.int(.Machine$integer.max, 1)) 52 | m_formula_env[[name]] <- sapply( 53 | stats::model.frame.default( 54 | mfargs[["fixed_effects"]], 55 | data = data, 56 | na.action = NULL 57 | ), 58 | FUN = as.factor 59 | ) 60 | mfargs[["fixed_effects"]] <- sym(name) 61 | } 62 | 63 | condition_pr <- NULL 64 | if ("condition_pr" %in% names(mfargs) && 65 | length(eval(mfargs[["condition_pr"]], m_formula_env)) == 1) { 66 | condition_pr <- eval(mfargs[["condition_pr"]], m_formula_env) 67 | mfargs[["condition_pr"]] <- NULL 68 | } 69 | 70 | mfargs[["formula"]] <- Formula::as.Formula(m_formula) 71 | 72 | # Get model frame 73 | mf <- eval_tidy(quo((stats::model.frame)( 74 | !!!mfargs, 75 | data = data, 76 | na.action = na.omit_detailed.data.frame, 77 | drop.unused.levels = TRUE 78 | ))) 79 | 80 | local({ 81 | na.action <- attr(mf, "na.action") 82 | why_omit <- attr(na.action, "why_omit") 83 | 84 | # Warn if missingness in ancillary variables 85 | missing_warning <- c( 86 | "Some observations have missingness in the %s variable(s) but not in ", 87 | "the outcome or covariates. These observations have been dropped." 88 | ) 89 | 90 | to_check_if_missing <- c( 91 | "cluster", "condition_pr", "block", "weights", "fixed_effects" 92 | ) 93 | 94 | for (x in to_check_if_missing) { 95 | if (!is.null(why_omit[[sprintf("(%s)", x)]])) { 96 | warning(sprintf(missing_warning, x)) 97 | } 98 | } 99 | }) 100 | 101 | if (!is.null(attr(terms(mf), "Formula_without_dot"))) { 102 | formula <- attr(terms(mf), "Formula_without_dot") 103 | } else { 104 | formula <- eval_tidy(mfargs[["formula"]]) # unwrap quosure => a formula 105 | } 106 | 107 | ret <- list( 108 | outcome = model.response(mf, type = "numeric"), 109 | design_matrix = model.matrix(terms(formula, rhs = 1), data = mf), 110 | formula = formula 111 | ) 112 | 113 | if (estimator == "iv") { 114 | if (length(formula)[2] != 2) { 115 | stop( 116 | "Must specify a `formula` with both regressors and instruments. For ", 117 | "example, `formula = y ~ x1 + x2 | x1 + z2` where x1 and x2 are the ", 118 | "regressors and z1 and z2 are the instruments.\n\nSee ?iv_robust." 119 | ) 120 | } 121 | ret[["instrument_matrix"]] <- model.matrix(terms(formula, rhs = 2), data = mf) 122 | ret[["terms_regressors"]] <- terms(formula, rhs = 1) 123 | } else if (estimator %in% c("ht", "dim")) { 124 | ret[["original_treatment"]] <- mf[, colnames(mf) == all.vars(terms(mf)[[3]])[1]] 125 | } 126 | 127 | ret[["weights"]] <- model.extract(mf, "weights") 128 | if (any(ret[["weights"]] < 0)) { 129 | stop("`weights` must not be negative") 130 | } 131 | 132 | ret[["cluster"]] <- model.extract(mf, "cluster") 133 | if (!(class(ret[["cluster"]]) %in% c("factor", "integer")) && 134 | !is.null(ret[["cluster"]])) { 135 | ret[["cluster"]] <- as.factor(ret[["cluster"]]) 136 | } 137 | 138 | ret[["block"]] <- model.extract(mf, "block") 139 | 140 | ret[["condition_pr"]] <- if (is.numeric(condition_pr)) 141 | rep(condition_pr, nrow(ret[["design_matrix"]])) 142 | else 143 | model.extract(mf, "condition_pr") 144 | 145 | ret[["fixed_effects"]] <- model.extract(mf, "fixed_effects") 146 | # If there is NA in the blocks and only one block, returns vector not matrix 147 | # so coerce to matrix 148 | if (is.character(ret[["fixed_effects"]])) { 149 | ret[["fixed_effects"]] <- as.matrix(ret[["fixed_effects"]]) 150 | } 151 | 152 | if (any(ret[["condition_pr"]] <= 0 | ret[["condition_pr"]] > 1)) { 153 | stop( 154 | "`condition_prs` must be a vector of positive values no greater than 1" 155 | ) 156 | } 157 | 158 | ret[["terms"]] <- attr(mf, "terms") 159 | dcs <- attr(ret[["terms"]], "dataClasses") 160 | # Clobber auxiliary variables in dataClasses for margins 161 | drop_vars <- c("(fixed_effects)", "(condition_pr)", "(block)", "(cluster)") 162 | attr(ret[["terms"]], "dataClasses") <- dcs[setdiff(names(dcs), drop_vars)] 163 | ret[["xlevels"]] <- .getXlevels(ret[["terms"]], mf) 164 | if (is.character(ret[["fixed_effects"]])) { 165 | ret[["felevels"]] <- lapply(as.data.frame(ret[["fixed_effects"]]), unique) 166 | } 167 | 168 | return(ret) 169 | } 170 | 171 | demean_fes <- function(model_data) { 172 | fe.ints <- apply(model_data[["fixed_effects"]], 2, function(x) match(x, unique(x))) 173 | 174 | eps <- 1e-8 175 | weights <- model_data[["weights"]] %||% rep(1, nrow(model_data[["design_matrix"]])) 176 | has_int <- attr(model_data$terms, "intercept") 177 | 178 | 179 | demeaned <- list() 180 | 181 | # save names 182 | demeaned[["outcome"]] <- demeanMat2(as.matrix(model_data[["outcome"]]), fe.ints, weights, 0, eps) 183 | dimnames(demeaned[["outcome"]]) <- dimnames(model_data[["outcome"]]) 184 | model_data[["outcome"]] <- demeaned[["outcome"]] 185 | 186 | demeaned[["design_matrix"]] <- demeanMat2(model_data[["design_matrix"]], fe.ints, weights, has_int, eps) 187 | new_names <- dimnames(model_data[["design_matrix"]]) 188 | new_names[[2]] <- new_names[[2]][new_names[[2]] != "(Intercept)"] 189 | dimnames(demeaned[["design_matrix"]]) <- new_names 190 | model_data[["design_matrix"]] <- demeaned[["design_matrix"]] 191 | 192 | if (is.numeric(model_data[["instrument_matrix"]])) { 193 | demeaned[["instrument_matrix"]] <- demeanMat2(model_data[["instrument_matrix"]], fe.ints, weights, has_int, eps) 194 | model_data[["instrument_matrix"]] <- demeaned[["instrument_matrix"]] 195 | } 196 | 197 | 198 | model_data[["fe_levels"]] <- apply(fe.ints, 2, max) - 1 199 | 200 | return(model_data) 201 | } 202 | -------------------------------------------------------------------------------- /R/helper_extract.R: -------------------------------------------------------------------------------- 1 | # This code modified from 2 | # https://github.com/leifeld/texreg/blob/master/R/extract.R (no LICENSE) 3 | #' Extract model data for \pkg{texreg} package 4 | #' @rdname extract.lm_robust 5 | #' 6 | #' @description Prepares a \code{"lm_robust"} or \code{"iv_robust"} object for the \pkg{texreg} 7 | #' package. This is largely a clone of the \code{extract.lm} 8 | #' method. 9 | #' 10 | #' @param model an object of class \code{\link{lm_robust}} or \code{"iv_robust"} 11 | #' @param include.ci logical. Defaults to TRUE 12 | #' @param include.rsquared logical. Defaults to TRUE 13 | #' @param include.adjrs logical. Defaults to TRUE 14 | #' @param include.nobs logical. Defaults to TRUE 15 | #' @param include.fstatistic logical. Defaults to TRUE 16 | #' @param include.rmse logical. Defaults to TRUE 17 | #' @param include.nclusts logical. Defaults to TRUE if clusters in \code{model} 18 | #' @param ... unused 19 | #' 20 | extract.robust_default <- function(model, 21 | include.ci = TRUE, 22 | include.rsquared = TRUE, 23 | include.adjrs = TRUE, 24 | include.nobs = TRUE, 25 | include.fstatistic = FALSE, 26 | include.rmse = TRUE, 27 | include.nclusts = TRUE, 28 | ...) { 29 | s <- tidy(model) 30 | 31 | names <- s[["term"]] 32 | co <- s[["estimate"]] 33 | se <- s[["std.error"]] 34 | pval <- s[["p.value"]] 35 | cilow <- numeric() 36 | ciupper <- numeric() 37 | if (include.ci) { 38 | cilow <- s[["conf.low"]] 39 | ciupper <- s[["conf.high"]] 40 | } 41 | 42 | rs <- model$r.squared # extract R-squared 43 | adj <- model$adj.r.squared # extract adjusted R-squared 44 | n <- nobs(model) # extract number of observations 45 | 46 | gof <- numeric() 47 | gof.names <- character() 48 | gof.decimal <- logical() 49 | if (include.rsquared) { 50 | gof <- c(gof, rs) 51 | gof.names <- c(gof.names, "R$^2$") 52 | gof.decimal <- c(gof.decimal, TRUE) 53 | } 54 | if (include.adjrs) { 55 | gof <- c(gof, adj) 56 | gof.names <- c(gof.names, "Adj.\ R$^2$") 57 | gof.decimal <- c(gof.decimal, TRUE) 58 | } 59 | if (include.nobs) { 60 | gof <- c(gof, n) 61 | gof.names <- c(gof.names, "Num.\ obs.") 62 | gof.decimal <- c(gof.decimal, FALSE) 63 | } 64 | if (include.fstatistic) { 65 | fstat <- model[["fstatistic"]][[1]] 66 | gof <- c(gof, fstat) 67 | gof.names <- c(gof.names, "F statistic") 68 | gof.decimal <- c(gof.decimal, TRUE) 69 | } 70 | if (include.rmse && !is.null(model[["res_var"]])) { 71 | rmse <- sqrt(model[["res_var"]]) 72 | gof <- c(gof, rmse) 73 | gof.names <- c(gof.names, "RMSE") 74 | gof.decimal <- c(gof.decimal, TRUE) 75 | } 76 | if (include.nclusts && model[["clustered"]]) { 77 | rmse <- sqrt(model[["res_var"]]) 78 | gof <- c(gof, model[["nclusters"]]) 79 | gof.names <- c(gof.names, "N Clusters") 80 | gof.decimal <- c(gof.decimal, FALSE) 81 | } 82 | 83 | tr <- texreg::createTexreg( 84 | coef.names = names, 85 | coef = co, 86 | se = se, 87 | pvalues = pval, 88 | ci.low = cilow, 89 | ci.up = ciupper, 90 | gof.names = gof.names, 91 | gof = gof, 92 | gof.decimal = gof.decimal 93 | ) 94 | return(tr) 95 | } 96 | 97 | #' @rdname extract.lm_robust 98 | #' 99 | #' @export 100 | extract.lm_robust <- extract.robust_default 101 | 102 | #' @rdname extract.lm_robust 103 | #' 104 | #' @export 105 | extract.iv_robust <- extract.robust_default 106 | 107 | -------------------------------------------------------------------------------- /R/helper_na_omit_detailed.R: -------------------------------------------------------------------------------- 1 | #' Extra logging on na.omit handler 2 | #' 3 | #' @param object a data.frame 4 | #' 5 | #' @return a normal \code{omit} object, with the extra attribute \code{why_omit}, 6 | #' which contains the leftmost column containing an NA for each row that was dropped, by 7 | #' column name, if any were dropped. 8 | #' 9 | #' @seealso \code{\link{na.omit}} 10 | na.omit_detailed.data.frame <- function(object){ 11 | 12 | naomitwhy(object, function(x, w) x[w, , drop=FALSE]) 13 | 14 | } 15 | 16 | -------------------------------------------------------------------------------- /R/helper_parse_arguments.R: -------------------------------------------------------------------------------- 1 | # This function parses condition names for HT and DiM estimators 2 | parse_conditions <- function(treatment, condition1, condition2, estimator) { 3 | if (is.factor(treatment)) { 4 | condition_names <- levels(droplevels(treatment)) 5 | } else { 6 | condition_names <- sort(unique(treatment)) 7 | } 8 | 9 | if (any(!(c(condition1, condition2) %in% condition_names))) { 10 | stop("`condition1` and `condition2` must be values found in the treatment") 11 | } 12 | 13 | n_conditions <- length(condition_names) 14 | 15 | conditions <- list(NULL, NULL) 16 | 17 | if (n_conditions > 2) { 18 | if (is.null(condition1) || is.null(condition2)) { 19 | stop( 20 | "Treatment has > 2 values; must specify both `condition1` and ", 21 | "`condition2` or use a treatment with only 2 values" 22 | ) 23 | } else { 24 | conditions[1:2] <- c(condition1, condition2) 25 | } 26 | } else if (n_conditions == 2) { 27 | if (is.null(condition1) && is.null(condition2)) { 28 | conditions[1:2] <- condition_names 29 | } else if (!is.null(condition2) && is.null(condition1)) { 30 | conditions[1:2] <- c(setdiff(condition_names, condition2), condition2) 31 | } else if (!is.null(condition1) && is.null(condition2)) { 32 | conditions[1:2] <- c(condition1, setdiff(condition_names, condition1)) 33 | } else { 34 | conditions[1:2] <- c(condition1, condition2) 35 | } 36 | } else if (n_conditions == 1) { 37 | # Allowable for HT estimator 38 | if (estimator != "horvitz_thompson") { 39 | stop( 40 | "Must have more than one value in treatment unless using Horvitz-", 41 | "Thompson estimator" 42 | ) 43 | } 44 | 45 | if (is.null(condition1) && is.null(condition2)) { 46 | conditions[2] <- condition_names 47 | } else if (!is.null(condition2)) { 48 | conditions[2] <- condition2 49 | } else if (!is.null(condition1)) { 50 | conditions[1] <- condition1 51 | } 52 | } 53 | 54 | return(conditions) 55 | } 56 | 57 | # This function ensures that blocks and clusters have been specified correctly 58 | check_clusters_blocks <- function(data) { 59 | if (!is.null(data$cluster)) { 60 | one_block_per_clust <- 61 | tapply(data$block, data$cluster, function(x) all(x == x[1])) 62 | 63 | # Check that clusters nest within blocks 64 | if (any(!one_block_per_clust)) { 65 | stop("All `clusters` must be contained within `blocks`") 66 | } 67 | 68 | # get number of clusters per block 69 | clust_per_block <- tapply( 70 | data$cluster, 71 | data$block, 72 | function(x) length(unique(x)) 73 | ) 74 | } else { 75 | clust_per_block <- tabulate(as.factor(data$block)) 76 | } 77 | 78 | return(clust_per_block) 79 | } 80 | -------------------------------------------------------------------------------- /R/helper_return.R: -------------------------------------------------------------------------------- 1 | # This file has helper functions for returning the lists from various estimators 2 | lm_return <- function(return_list, model_data, formula) { 3 | if (!is.null(model_data)) { 4 | return_list[["contrasts"]] <- attr(model_data$design_matrix, "contrasts") 5 | return_list[["terms"]] <- model_data$terms 6 | return_list[["xlevels"]] <- model_data$xlevels 7 | return_list[["felevels"]] <- model_data$felevels 8 | return_list[["weights"]] <- model_data$weights 9 | if (is.matrix(model_data$outcome) && 10 | is.character(colnames(model_data$outcome))) { 11 | return_list[["outcome"]] <- colnames(model_data$outcome) 12 | } else { 13 | return_list[["outcome"]] <- deparse(formula[[2]], nlines = 5) 14 | } 15 | } 16 | 17 | # Name and flatten objects 18 | if (is.matrix(return_list[["std.error"]]) && 19 | ncol(return_list[["std.error"]]) > 1) { 20 | dimnames(return_list[["std.error"]]) <- dimnames(return_list[["coefficients"]]) 21 | } else { 22 | return_list[["coefficients"]] <- drop(return_list[["coefficients"]]) 23 | nms <- c("std.error", "statistic", "p.value", "df", "conf.low", "conf.high") 24 | for (nm in nms) { 25 | if (length(return_list[[nm]]) > 1 || !is.na(return_list[[nm]])) { 26 | return_list[[nm]] <- setNames( 27 | drop(return_list[[nm]]), 28 | names(return_list[["coefficients"]]) 29 | ) 30 | } 31 | } 32 | } 33 | if (return_list[["weighted"]]) { 34 | names(return_list[["weights"]]) <- if (is.matrix(return_list[["fitted.values"]])) 35 | rownames(return_list[["fitted.values"]]) 36 | else names(return_list[["fitted.values"]]) 37 | } 38 | return_list[["fitted.values"]] <- drop(return_list[["fitted.values"]]) 39 | return_list[["ei.iv"]] <- drop(return_list[["ei.iv"]]) 40 | return_list[["residuals"]] <- drop(return_list[["residuals"]]) 41 | return(return_list) 42 | } 43 | 44 | dim_like_return <- function(return_list, alpha, formula, conditions) { 45 | return_list[["alpha"]] <- alpha 46 | 47 | # get "max" condition to account for case with only 1 condition 48 | treat_condition <- conditions[[2]] 49 | 50 | # now we add the condition 2 value to coefficient name like it were a factor 51 | 52 | # Only add label if conditions aren't 0/1 53 | add_label <- !(conditions[[2]] == 1 && conditions[[1]] == 0) 54 | # If horvitz_thompson and there is only one treatment, add_label will be NA 55 | # In this case, we add the non-null value if it's condition 2 56 | if (is.na(add_label)) { 57 | add_label <- !is.null(conditions[[2]]) 58 | } 59 | 60 | fterms <- terms(formula) 61 | coef_name <- labels(fterms) 62 | 63 | if (add_label) { 64 | return_list[["term"]] <- paste0( 65 | coef_name, 66 | conditions[[2]] 67 | ) 68 | } else { 69 | return_list[["term"]] <- coef_name 70 | } 71 | 72 | return_list[["outcome"]] <- deparse(formula[[2]], nlines = 5) 73 | 74 | names(return_list[["coefficients"]]) <- 75 | names(return_list[["std.error"]]) <- 76 | names(return_list[["p.value"]]) <- 77 | names(return_list[["df"]]) <- return_list[["term"]] 78 | 79 | return_list[["condition2"]] <- conditions[[2]] 80 | return_list[["condition1"]] <- conditions[[1]] 81 | 82 | return_list[["vcov"]] <- matrix( 83 | data = return_list[["std.error"]] ^ 2, 84 | dimnames = list(return_list[["term"]], return_list[["term"]]) 85 | ) 86 | 87 | return(return_list) 88 | } 89 | -------------------------------------------------------------------------------- /R/zzz.R: -------------------------------------------------------------------------------- 1 | # Some of this is code modified from 2 | # https://github.com/atahk/bucky/blob/master/R/zzz.R (GPL 3.0) 3 | .onLoad <- function(libname, pkgname) { 4 | if (suppressWarnings(requireNamespace("texreg", quietly = TRUE))) { 5 | setGeneric("extract", function(model, ...) standardGeneric("extract"), 6 | package = "texreg" 7 | ) 8 | setMethod("extract", 9 | signature = className("lm_robust", pkgname), 10 | definition = extract.lm_robust 11 | ) 12 | setMethod("extract", 13 | signature = className("iv_robust", pkgname), 14 | definition = extract.iv_robust 15 | ) 16 | } 17 | if(requireNamespace("emmeans", quietly = TRUE)) { 18 | emmeans::.emm_register("lm_robust", pkgname) 19 | } 20 | invisible() 21 | } 22 | 23 | #' @importFrom utils packageVersion 24 | .onAttach <- function(libname, pkgname) { 25 | if (isNamespaceLoaded("broom") && packageVersion("broom") <= "0.5.0") { 26 | packageStartupMessage( 27 | "Warning: the `broom` package version 0.5.0 or earlier is loaded.\n", 28 | "Please upgrade `broom` or `tidy` methods may not work as expected." 29 | ) 30 | } 31 | invisible() 32 | } 33 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: github_document 3 | title: "estimatr: Fast Estimators for Design-Based Inference" 4 | --- 5 | 6 | 7 | 8 | ```{r, echo = FALSE} 9 | set.seed(42) 10 | knitr::opts_chunk$set( 11 | collapse = TRUE, 12 | message = FALSE, 13 | comment = "#>", 14 | fig.path = "README-" 15 | ) 16 | options(digits = 2) 17 | ``` 18 | 19 | [![CRAN status](https://www.r-pkg.org/badges/version/estimatr)](https://cran.r-project.org/package=estimatr) 20 | [![CRAN RStudio mirror downloads](https://cranlogs.r-pkg.org/badges/grand-total/estimatr?color=green)](https://r-pkg.org/pkg/estimatr) 21 | [![Build status](https://github.com/DeclareDesign/estimatr/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/DeclareDesign/estimatr/actions/workflows/R-CMD-check.yaml) 22 | [![Codecov test coverage](https://codecov.io/gh/DeclareDesign/estimatr/graph/badge.svg)](https://app.codecov.io/gh/DeclareDesign/estimatr) 23 | [![Replications](https://softwarecite.com/badge/estimatr)](https://softwarecite.com/package/estimatr) 24 | 25 | **estimatr** is an `R` package providing a range of commonly-used linear estimators, designed for speed and for ease-of-use. Users can easily recover robust, cluster-robust, and other design appropriate estimates. We include two functions that implement means estimators, `difference_in_means()` and `horvitz_thompson()`, and three linear regression estimators, `lm_robust()`, `lm_lin()`, and `iv_robust()`. In each case, users can choose an estimator to reflect cluster-randomized, block-randomized, and block-and-cluster-randomized designs. The [Getting Started Guide](https://declaredesign.org/r/estimatr/articles/getting-started.html) describes each estimator provided by **estimatr** and how it can be used in your analysis. 26 | 27 | You can also see the multiple ways you can [get regression tables out of estimatr](https://declaredesign.org/r/estimatr/articles/regression-tables.html) using commonly used `R` packages such as `texreg` and `stargazer`. Fast estimators also enable fast simulation of research designs to learn about their properties (see [DeclareDesign](https://declaredesign.org)). 28 | 29 | ## Installing estimatr 30 | 31 | To install the latest stable release of **estimatr**, please ensure that you are running version 3.5 or later of R and run the following code: 32 | 33 | ```{r, eval=F} 34 | install.packages("estimatr") 35 | ``` 36 | 37 | ## Easy to use 38 | 39 | Once the package is installed, getting appropriate estimates and standard errors is now both fast and easy. 40 | 41 | ```{r, eval = TRUE, echo=-1} 42 | set.seed(42) 43 | library(estimatr) 44 | 45 | # sample data from cluster-randomized experiment 46 | library(fabricatr) 47 | library(randomizr) 48 | dat <- fabricate( 49 | N = 100, 50 | y = rnorm(N), 51 | clusterID = sample(letters[1:10], size = N, replace = TRUE), 52 | z = cluster_ra(clusterID) 53 | ) 54 | 55 | # robust standard errors 56 | res_rob <- lm_robust(y ~ z, data = dat) 57 | # tidy dataframes on command! 58 | tidy(res_rob) 59 | 60 | # cluster robust standard errors 61 | res_cl <- lm_robust(y ~ z, data = dat, clusters = clusterID) 62 | # standard summary view also available 63 | summary(res_cl) 64 | 65 | # matched-pair design learned from blocks argument 66 | data(sleep) 67 | res_dim <- difference_in_means(extra ~ group, data = sleep, blocks = ID) 68 | ``` 69 | 70 | The [Getting Started Guide](/r/estimatr/articles/getting-started.html) has more examples and uses, as do the reference pages. The [Mathematical Notes](/r/estimatr/articles/mathematical-notes.html) provide more information about what each estimator is doing under the hood. 71 | 72 | ## Fast to use 73 | 74 | Getting estimates and robust standard errors is also faster than it used to be. Compare our package to using `lm()` and the `sandwich` package to get HC2 standard errors. More speed comparisons are available [here](https://declaredesign.org/r/estimatr/articles/benchmarking-estimatr.html). Furthermore, with many blocks (or fixed effects), users can use the `fixed_effects` argument of `lm_robust` with HC1 standard errors to greatly improve estimation speed. More on [fixed effects here](https://declaredesign.org/r/estimatr/articles/absorbing-fixed-effects.html). 75 | 76 | ```{r, echo=-1} 77 | set.seed(1) 78 | dat <- data.frame(X = matrix(rnorm(2000*50), 2000), y = rnorm(2000)) 79 | 80 | library(microbenchmark) 81 | library(lmtest) 82 | library(sandwich) 83 | mb <- microbenchmark( 84 | `estimatr` = lm_robust(y ~ ., data = dat), 85 | `lm + sandwich` = { 86 | lo <- lm(y ~ ., data = dat) 87 | coeftest(lo, vcov = vcovHC(lo, type = 'HC2')) 88 | } 89 | ) 90 | ``` 91 | ```{r, echo = FALSE} 92 | d <- summary(mb)[, c("expr", "median")] 93 | names(d) <- c("estimatr", "median run-time (ms)") 94 | knitr::kable(d) 95 | ``` 96 | 97 | --- 98 | 99 | This project is generously supported by a grant from the [Laura and John Arnold Foundation](http://www.arnoldfoundation.org) and seed funding from [Evidence in Governance and Politics (EGAP)](http://egap.org). 100 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | estimatr: Fast Estimators for Design-Based Inference 2 | ================ 3 | 4 | 5 | 6 | [![CRAN 7 | status](https://www.r-pkg.org/badges/version/estimatr)](https://cran.r-project.org/package=estimatr) 8 | [![CRAN RStudio mirror 9 | downloads](https://cranlogs.r-pkg.org/badges/grand-total/estimatr?color=green)](https://r-pkg.org/pkg/estimatr) 10 | [![Build 11 | status](https://github.com/DeclareDesign/estimatr/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/DeclareDesign/estimatr/actions/workflows/R-CMD-check.yaml) 12 | [![Codecov test 13 | coverage](https://codecov.io/gh/DeclareDesign/estimatr/graph/badge.svg)](https://app.codecov.io/gh/DeclareDesign/estimatr) 14 | [![Replications](https://softwarecite.com/badge/estimatr)](https://softwarecite.com/package/estimatr) 15 | 16 | **estimatr** is an `R` package providing a range of commonly-used linear 17 | estimators, designed for speed and for ease-of-use. Users can easily 18 | recover robust, cluster-robust, and other design appropriate estimates. 19 | We include two functions that implement means estimators, 20 | `difference_in_means()` and `horvitz_thompson()`, and three linear 21 | regression estimators, `lm_robust()`, `lm_lin()`, and `iv_robust()`. In 22 | each case, users can choose an estimator to reflect cluster-randomized, 23 | block-randomized, and block-and-cluster-randomized designs. The [Getting 24 | Started 25 | Guide](https://declaredesign.org/r/estimatr/articles/getting-started.html) 26 | describes each estimator provided by **estimatr** and how it can be used 27 | in your analysis. 28 | 29 | You can also see the multiple ways you can [get regression tables out of 30 | estimatr](https://declaredesign.org/r/estimatr/articles/regression-tables.html) 31 | using commonly used `R` packages such as `texreg` and `stargazer`. Fast 32 | estimators also enable fast simulation of research designs to learn 33 | about their properties (see [DeclareDesign](https://declaredesign.org)). 34 | 35 | ## Installing estimatr 36 | 37 | To install the latest stable release of **estimatr**, please ensure that 38 | you are running version 3.5 or later of R and run the following code: 39 | 40 | ``` r 41 | install.packages("estimatr") 42 | ``` 43 | 44 | ## Easy to use 45 | 46 | Once the package is installed, getting appropriate estimates and 47 | standard errors is now both fast and easy. 48 | 49 | ``` r 50 | library(estimatr) 51 | 52 | # sample data from cluster-randomized experiment 53 | library(fabricatr) 54 | library(randomizr) 55 | dat <- fabricate( 56 | N = 100, 57 | y = rnorm(N), 58 | clusterID = sample(letters[1:10], size = N, replace = TRUE), 59 | z = cluster_ra(clusterID) 60 | ) 61 | 62 | # robust standard errors 63 | res_rob <- lm_robust(y ~ z, data = dat) 64 | # tidy dataframes on command! 65 | tidy(res_rob) 66 | #> term estimate std.error statistic p.value conf.low conf.high df 67 | #> 1 (Intercept) 0.065 0.14 0.46 0.64 -0.21 0.34 98 68 | #> 2 z -0.067 0.21 -0.32 0.75 -0.48 0.35 98 69 | #> outcome 70 | #> 1 y 71 | #> 2 y 72 | 73 | # cluster robust standard errors 74 | res_cl <- lm_robust(y ~ z, data = dat, clusters = clusterID) 75 | # standard summary view also available 76 | summary(res_cl) 77 | #> 78 | #> Call: 79 | #> lm_robust(formula = y ~ z, data = dat, clusters = clusterID) 80 | #> 81 | #> Standard error type: CR2 82 | #> 83 | #> Coefficients: 84 | #> Estimate Std. Error t value Pr(>|t|) CI Lower CI Upper DF 85 | #> (Intercept) 0.0653 0.145 0.452 0.678 -0.358 0.489 3.53 86 | #> z -0.0670 0.202 -0.331 0.750 -0.544 0.410 7.05 87 | #> 88 | #> Multiple R-squared: 0.00105 , Adjusted R-squared: -0.00915 89 | #> F-statistic: 0.11 on 1 and 9 DF, p-value: 0.748 90 | 91 | # matched-pair design learned from blocks argument 92 | data(sleep) 93 | res_dim <- difference_in_means(extra ~ group, data = sleep, blocks = ID) 94 | ``` 95 | 96 | The [Getting Started Guide](/r/estimatr/articles/getting-started.html) 97 | has more examples and uses, as do the reference pages. The [Mathematical 98 | Notes](/r/estimatr/articles/mathematical-notes.html) provide more 99 | information about what each estimator is doing under the hood. 100 | 101 | ## Fast to use 102 | 103 | Getting estimates and robust standard errors is also faster than it used 104 | to be. Compare our package to using `lm()` and the `sandwich` package to 105 | get HC2 standard errors. More speed comparisons are available 106 | [here](https://declaredesign.org/r/estimatr/articles/benchmarking-estimatr.html). 107 | Furthermore, with many blocks (or fixed effects), users can use the 108 | `fixed_effects` argument of `lm_robust` with HC1 standard errors to 109 | greatly improve estimation speed. More on [fixed effects 110 | here](https://declaredesign.org/r/estimatr/articles/absorbing-fixed-effects.html). 111 | 112 | ``` r 113 | dat <- data.frame(X = matrix(rnorm(2000*50), 2000), y = rnorm(2000)) 114 | 115 | library(microbenchmark) 116 | library(lmtest) 117 | library(sandwich) 118 | mb <- microbenchmark( 119 | `estimatr` = lm_robust(y ~ ., data = dat), 120 | `lm + sandwich` = { 121 | lo <- lm(y ~ ., data = dat) 122 | coeftest(lo, vcov = vcovHC(lo, type = 'HC2')) 123 | } 124 | ) 125 | #> Warning in microbenchmark(estimatr = lm_robust(y ~ ., data = dat), `lm + 126 | #> sandwich` = {: less accurate nanosecond times to avoid potential integer 127 | #> overflows 128 | ``` 129 | 130 | | estimatr | median run-time (ms) | 131 | |:--------------|---------------------:| 132 | | estimatr | 6 | 133 | | lm + sandwich | 21 | 134 | 135 | ------------------------------------------------------------------------ 136 | 137 | This project is generously supported by a grant from the [Laura and John 138 | Arnold Foundation](http://www.arnoldfoundation.org) and seed funding 139 | from [Evidence in Governance and Politics (EGAP)](http://egap.org). 140 | -------------------------------------------------------------------------------- /_pkgdown.yml: -------------------------------------------------------------------------------- 1 | url: https://declaredesign.org/r/estimatr 2 | 3 | template: 4 | bootstrap: 5 5 | bslib: 6 | primary: "#0054AD" 7 | border-radius: 0.5rem 8 | btn-border-radius: 0.25rem 9 | 10 | development: 11 | mode: auto 12 | 13 | navbar: 14 | right: 15 | - text: "Software" 16 | menu: 17 | - text: DeclareDesign 18 | href: http://declaredesign.org/r/declaredesign/ 19 | - text: randomizr 20 | href: https://declaredesign.org/r/randomizr/ 21 | - text: fabricatr 22 | href: https://declaredesign.org/r/fabricatr/ 23 | - text: estimatr 24 | href: https://declaredesign.org/r/estimatr/ 25 | - text: rdss 26 | href: https://declaredesign.org/r/rdss/ 27 | - text: DesignLibrary 28 | href: https://declaredesign.org/r/designlibrary/ 29 | - text: DesignWizard 30 | href: https://eos.wzb.eu/ipi/DDWizard/ 31 | - text: declaredesign.org 32 | href: https://declaredesign.org 33 | left: 34 | - text: Getting started 35 | href: articles/getting-started.html 36 | - text: Reference 37 | href: reference/index.html 38 | - text: Articles 39 | menu: 40 | - text: Getting started using estimatr 41 | href: articles/getting-started.html 42 | - text: Absorbing Fixed Effects with estimatr 43 | href: articles/absorbing-fixed-effects.html 44 | - text: estimatr in the Tidyverse 45 | href: articles/estimatr-in-the-tidyverse.html 46 | - text: Benchmarking estimatr 47 | href: articles/benchmarking-estimatr.html 48 | - text: Examples with emmeans 49 | href: articles/emmeans-examples.html 50 | - text: Mathematical notes for estimatr 51 | href: articles/mathematical-notes.html 52 | - text: Regression Tables with estimatr 53 | href: articles/regression-tables.html 54 | - text: Simulations - Debiasing Difference-in-Means 55 | href: articles/simulations-debiasing-dim.html 56 | - text: Simulations - OLS and Variance 57 | href: articles/simulations-ols-variance.html 58 | - text: How Stata's hat matrix differs with weights 59 | href: articles/stata-wls-hat.html 60 | -------------------------------------------------------------------------------- /cran-comments.md: -------------------------------------------------------------------------------- 1 | ## Submission 2 | 3 | Fixes M1 Mac errors. 4 | 5 | There are no changes to worse in reverse depends. 6 | 7 | We have checked this in win builder and on OS X, linux, and Windows in release, oldrel, and devel on Github Actions with no issues. 8 | 9 | -------------------------------------------------------------------------------- /data-raw/STAR_public_use.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeclareDesign/estimatr/10a18f124ee96e133993a9a6a68ae678b7d8d051/data-raw/STAR_public_use.dta -------------------------------------------------------------------------------- /data-raw/build-alo-star-for-lin.R: -------------------------------------------------------------------------------- 1 | # This script takes the dta file from the ALO replication data at this url: 2 | # https://www.aeaweb.org/articles?id=10.1257/app.1.1.136 3 | # and turns it in to the .rda we export with the package 4 | # Full citation: 5 | # Angrist, Joshua, Daniel Lang, and Philip Oreopoulos. 2009. "Incentives and 6 | # Services for College Achievement: Evidence from a Randomized Trial." American 7 | # Economic Journal: Applied Economics 1(1): 136-63. 8 | 9 | # wd should be package root 10 | 11 | dat <- 12 | foreign::read.dta( 13 | "data-raw/STAR_public_use.dta" 14 | ) 15 | 16 | alo_star_men <- dat[ 17 | dat$sex == "M" & 18 | !is.na(dat$GPA_year1) & 19 | (dat$sfsp == 1 | dat$ssp == 1) & 20 | dat$noshow == 0, 21 | c("gpa0", "sfsp", "ssp", "GPA_year1", "GPA_year2") 22 | ] 23 | 24 | devtools::use_data( 25 | alo_star_men, 26 | internal = F, 27 | overwrite = T 28 | ) 29 | -------------------------------------------------------------------------------- /data/alo_star_men.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeclareDesign/estimatr/10a18f124ee96e133993a9a6a68ae678b7d8d051/data/alo_star_men.rda -------------------------------------------------------------------------------- /estimatr.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: knitr 13 | LaTeX: XeLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | PackageRoxygenize: rd,collate,namespace 22 | -------------------------------------------------------------------------------- /man/alo_star_men.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{alo_star_men} 5 | \alias{alo_star_men} 6 | \title{Replication data for Lin 2013} 7 | \format{ 8 | A data frame with educational treatments and outcomes: 9 | \describe{ 10 | \item{gpa0}{high school GPA} 11 | \item{sfsp}{financial incentives and support treatment} 12 | \item{ssp}{support only treatment} 13 | \item{GPA_year1}{college GPA year 1} 14 | \item{GPA_year2}{college GPA year 2} 15 | } 16 | } 17 | \source{ 18 | \url{https://www.aeaweb.org/articles?id=10.1257/app.1.1.136} 19 | } 20 | \usage{ 21 | alo_star_men 22 | } 23 | \description{ 24 | A dataset containing the data to replicate: 25 | Lin, Winston. 2013. "Agnostic notes on regression adjustments to experimental 26 | data: Reexamining Freedman's critique." The Annals of Applied Statistics. 27 | Stat. 7(1): 295-318. doi:10.1214/12-AOAS583. 28 | https://projecteuclid.org/euclid.aoas/1365527200. 29 | } 30 | \details{ 31 | This data was originally taken from the following paper, subset to men who 32 | showed up to college, were in one of the arms with the support condition, 33 | and had GPA data for their first year in college. 34 | 35 | Angrist, Joshua, Daniel Lang, and Philip Oreopoulos. 2009. "Incentives and 36 | Services for College Achievement: Evidence from a Randomized Trial." American 37 | Economic Journal: Applied Economics 1(1): 136-63. 38 | https://www.aeaweb.org/articles?id=10.1257/app.1.1.136 39 | } 40 | \keyword{datasets} 41 | -------------------------------------------------------------------------------- /man/commarobust.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/helper_starprep.R 3 | \name{commarobust} 4 | \alias{commarobust} 5 | \title{Build lm_robust object from lm fit} 6 | \usage{ 7 | commarobust(model, se_type = NULL, clusters = NULL, ci = TRUE, alpha = 0.05) 8 | } 9 | \arguments{ 10 | \item{model}{an lm model object} 11 | 12 | \item{se_type}{The sort of standard error sought. If \code{clusters} is 13 | not specified the options are "HC0", "HC1" (or "stata", the equivalent), 14 | "HC2" (default), "HC3", or "classical". If \code{clusters} is specified the 15 | options are "CR0", "CR2" (default), or "stata". Can also specify "none", 16 | which may speed up estimation of the coefficients.} 17 | 18 | \item{clusters}{A vector corresponding to the clusters in the data.} 19 | 20 | \item{ci}{logical. Whether to compute and return p-values and confidence 21 | intervals, TRUE by default.} 22 | 23 | \item{alpha}{The significance level, 0.05 by default.} 24 | } 25 | \value{ 26 | an \code{\link{lm_robust}} object. 27 | } 28 | \description{ 29 | Build lm_robust object from lm fit 30 | } 31 | \examples{ 32 | lmo <- lm(mpg ~ hp, data = mtcars) 33 | 34 | # Default HC2 35 | commarobust(lmo) 36 | 37 | commarobust(lmo, se_type = "HC3") 38 | 39 | commarobust(lmo, se_type = "stata", clusters = mtcars$carb) 40 | 41 | } 42 | -------------------------------------------------------------------------------- /man/declaration_to_condition_pr_mat.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/helper_condition_pr_matrix.R 3 | \name{declaration_to_condition_pr_mat} 4 | \alias{declaration_to_condition_pr_mat} 5 | \title{Builds condition probability matrices for Horvitz-Thompson estimation from 6 | \pkg{randomizr} declaration} 7 | \usage{ 8 | declaration_to_condition_pr_mat( 9 | ra_declaration, 10 | condition1 = NULL, 11 | condition2 = NULL, 12 | prob_matrix = NULL 13 | ) 14 | } 15 | \arguments{ 16 | \item{ra_declaration}{An object of class \code{"ra_declaration"}, generated 17 | by the \code{\link[randomizr]{declare_ra}} function in \pkg{randomizr}. This 18 | object contains the experimental design that will be represented in a 19 | condition probability matrix} 20 | 21 | \item{condition1}{The name of the first condition, often the control group. If \code{NULL}, 22 | defaults to first condition in randomizr declaration. Either both \code{condition1} 23 | and \code{condition2} have to be specified or both left as \code{NULL}.} 24 | 25 | \item{condition2}{The name of the second condition, often the treatment group. If \code{NULL}, 26 | defaults to second condition in randomizr declaration. Either both \code{condition1} 27 | and \code{condition2} have to be specified or both left as \code{NULL}.} 28 | 29 | \item{prob_matrix}{An optional probability matrix to override the one in 30 | \code{ra_declaration}} 31 | } 32 | \value{ 33 | a numeric 2n*2n matrix of marginal and joint condition treatment 34 | probabilities to be passed to the \code{condition_pr_mat} argument of 35 | \code{\link{horvitz_thompson}}. See details. 36 | } 37 | \description{ 38 | Builds condition probability matrices for Horvitz-Thompson estimation from 39 | \pkg{randomizr} declaration 40 | } 41 | \details{ 42 | This function takes a \code{"ra_declaration"}, generated 43 | by the \code{\link[randomizr]{declare_ra}} function in \pkg{randomizr} and 44 | returns a 2n*2n matrix that can be used to fully specify the design for 45 | \code{\link{horvitz_thompson}} estimation. This is done by passing this 46 | matrix to the \code{condition_pr_mat} argument of 47 | \code{\link{horvitz_thompson}}. 48 | 49 | Currently, this function can learn the condition probability matrix for a 50 | wide variety of randomizations: simple, complete, simple clustered, complete 51 | clustered, blocked, block-clustered. 52 | 53 | A condition probability matrix is made up of four submatrices, each of which 54 | corresponds to the 55 | joint and marginal probability that each observation is in one of the two 56 | treatment conditions. 57 | 58 | The upper-left quadrant is an n*n matrix. On the diagonal is the marginal 59 | probability of being in condition 1, often control, for every unit 60 | (Pr(Z_i = Condition1) where Z represents the vector of treatment conditions). 61 | The off-diagonal elements are the joint probabilities of each unit being in 62 | condition 1 with each other unit, Pr(Z_i = Condition1, Z_j = Condition1) 63 | where i indexes the rows and j indexes the columns. 64 | 65 | The upper-right quadrant is also an n*n matrix. On the diagonal is the joint 66 | probability of a unit being in condition 1 and condition 2, often the 67 | treatment, and thus is always 0. The off-diagonal elements are the joint 68 | probability of unit i being in condition 1 and unit j being in condition 2, 69 | Pr(Z_i = Condition1, Z_j = Condition2). 70 | 71 | The lower-left quadrant is also an n*n matrix. On the diagonal is the joint 72 | probability of a unit being in condition 1 and condition 2, and thus is 73 | always 0. The off-diagonal elements are the joint probability of unit i 74 | being in condition 2 and unit j being in condition 1, 75 | Pr(Z_i = Condition2, Z_j = Condition1). 76 | 77 | The lower-right quadrant is an n*n matrix. On the diagonal is the marginal 78 | probability of being in condition 2, often treatment, for every unit 79 | (Pr(Z_i = Condition2)). The off-diagonal elements are the joint probability 80 | of each unit being in condition 2 together, 81 | Pr(Z_i = Condition2, Z_j = Condition2). 82 | } 83 | \examples{ 84 | 85 | # Learn condition probability matrix from complete blocked design 86 | library(randomizr) 87 | n <- 100 88 | dat <- data.frame( 89 | blocks = sample(letters[1:10], size = n, replace = TRUE), 90 | y = rnorm(n) 91 | ) 92 | 93 | # Declare complete blocked randomization 94 | bl_declaration <- declare_ra(blocks = dat$blocks, prob = 0.4, simple = FALSE) 95 | # Get probabilities 96 | block_pr_mat <- declaration_to_condition_pr_mat(bl_declaration, 0, 1) 97 | # Do randomiztion 98 | dat$z <- conduct_ra(bl_declaration) 99 | 100 | horvitz_thompson(y ~ z, data = dat, condition_pr_mat = block_pr_mat) 101 | 102 | # When you pass a declaration to horvitz_thompson, this function is called 103 | 104 | # Equivalent to above call 105 | horvitz_thompson(y ~ z, data = dat, ra_declaration = bl_declaration) 106 | 107 | } 108 | \seealso{ 109 | \code{\link{permutations_to_condition_pr_mat}} 110 | } 111 | -------------------------------------------------------------------------------- /man/estimatr.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/estimatr.R 3 | \docType{package} 4 | \name{estimatr} 5 | \alias{estimatr-package} 6 | \alias{estimatr} 7 | \title{estimatr} 8 | \description{ 9 | Fast procedures for small set of commonly-used, design-appropriate estimators with robust standard errors and confidence intervals. Includes estimators for linear regression, instrumental variables regression, difference-in-means, Horvitz-Thompson estimation, and regression improving precision of experimental estimates by interacting treatment with centered pre-treatment covariates introduced by Lin (2013) . 10 | } 11 | \seealso{ 12 | Useful links: 13 | \itemize{ 14 | \item \url{https://declaredesign.org/r/estimatr/} 15 | \item \url{https://github.com/DeclareDesign/estimatr} 16 | \item Report bugs at \url{https://github.com/DeclareDesign/estimatr/issues} 17 | } 18 | 19 | } 20 | \author{ 21 | \strong{Maintainer}: Graeme Blair \email{graeme.blair@gmail.com} 22 | 23 | Authors: 24 | \itemize{ 25 | \item Jasper Cooper \email{jjc2247@columbia.edu} 26 | \item Alexander Coppock \email{alex.coppock@yale.edu} 27 | \item Macartan Humphreys \email{macartan@gmail.com} 28 | \item Luke Sonnet \email{luke.sonnet@gmail.com} 29 | } 30 | 31 | Other contributors: 32 | \itemize{ 33 | \item Neal Fultz \email{nfultz@gmail.com} [contributor] 34 | \item Lily Medina \email{lilymiru@gmail.com} [contributor] 35 | \item Russell Lenth \email{russell-lenth@uiowa.edu} [contributor] 36 | } 37 | 38 | } 39 | -------------------------------------------------------------------------------- /man/estimatr_glancers.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/S3_glance.R 3 | \name{estimatr_glancers} 4 | \alias{estimatr_glancers} 5 | \alias{glance.lm_robust} 6 | \alias{glance.lh_robust} 7 | \alias{glance.iv_robust} 8 | \alias{glance.difference_in_means} 9 | \alias{glance.horvitz_thompson} 10 | \title{Glance at an estimatr object} 11 | \usage{ 12 | \method{glance}{lm_robust}(x, ...) 13 | 14 | \method{glance}{lh_robust}(x, ...) 15 | 16 | \method{glance}{iv_robust}(x, ...) 17 | 18 | \method{glance}{difference_in_means}(x, ...) 19 | 20 | \method{glance}{horvitz_thompson}(x, ...) 21 | } 22 | \arguments{ 23 | \item{x}{An object returned by one of the estimators} 24 | 25 | \item{...}{extra arguments (not used)} 26 | } 27 | \value{ 28 | For \code{glance.lm_robust}, a data.frame with columns: 29 | \item{r.squared}{the \eqn{R^2}, 30 | \deqn{R^2 = 1 - Sum(e[i]^2) / Sum((y[i] - y^*)^2),} where \eqn{y^*} 31 | is the mean of \eqn{y[i]} if there is an intercept and zero otherwise, 32 | and \eqn{e[i]} is the ith residual.} 33 | \item{adj.r.squared}{the \eqn{R^2} but penalized for having more parameters, \code{rank}} 34 | \item{se_type}{the standard error type specified by the user} 35 | \item{statistic}{the value of the F-statistic} 36 | \item{p.value}{p-value from the F test} 37 | \item{df.residual}{residual degrees of freedom} 38 | \item{nobs}{the number of observations used} 39 | 40 | For \code{glance.lh_robust}, we glance the \code{lm_robust} component only. You can access the linear hypotheses as a data.frame directy from the \code{lh} component of the \code{lh_robust} object 41 | 42 | For \code{glance.iv_robust}, a data.frame with columns: 43 | \item{r.squared}{The \eqn{R^2} of the second stage regression} 44 | \item{adj.r.squared}{The \eqn{R^2} but penalized for having more parameters, \code{rank}} 45 | \item{df.residual}{residual degrees of freedom} 46 | \item{N}{the number of observations used} 47 | \item{se_type}{the standard error type specified by the user} 48 | \item{statistic}{the value of the F-statistic} 49 | \item{p.value}{p-value from the F test} 50 | \item{statistic.weakinst}{the value of the first stage F-statistic, useful for the weak instruments test; only reported if there is only one endogenous variable} 51 | \item{p.value.weakinst}{p-value from the first-stage F test, a test of weak instruments; only reported if there is only one endogenous variable} 52 | \item{statistic.endogeneity}{the value of the F-statistic for the test of endogeneity; often called the Wu-Hausman statistic, with robust standard errors, we employ the regression based test} 53 | \item{p.value.endogeneity}{p-value from the F-test for endogeneity} 54 | \item{statistic.overid}{the value of the chi-squared statistic for the test of instrument correlation with the error term; only reported with overidentification} 55 | \item{p.value.overid}{p-value from the chi-squared test; only reported with overidentification} 56 | 57 | For \code{glance.difference_in_means}, a data.frame with columns: 58 | \item{design}{the design used, and therefore the estimator used} 59 | \item{df}{the degrees of freedom} 60 | \item{nobs}{the number of observations used} 61 | \item{nblocks}{the number of blocks, if used} 62 | \item{nclusters}{the number of clusters, if used} 63 | \item{condition2}{the second, "treatment", condition} 64 | \item{condition1}{the first, "control", condition} 65 | 66 | For \code{glance.horvitz_thompson}, a data.frame with columns: 67 | \item{nobs}{the number of observations used} 68 | \item{se_type}{the type of standard error estimator used} 69 | \item{condition2}{the second, "treatment", condition} 70 | \item{condition1}{the first, "control", condition} 71 | } 72 | \description{ 73 | Glance at an estimatr object 74 | } 75 | \seealso{ 76 | \code{\link[generics:glance]{generics::glance()}}, \code{\link[=lm_robust]{lm_robust()}}, \code{\link[=lm_lin]{lm_lin()}}, \code{\link[=iv_robust]{iv_robust()}}, \code{\link[=difference_in_means]{difference_in_means()}}, \code{\link[=horvitz_thompson]{horvitz_thompson()}} 77 | } 78 | \concept{estimatr glancers} 79 | -------------------------------------------------------------------------------- /man/estimatr_tidiers.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/S3_tidy.R 3 | \name{estimatr_tidiers} 4 | \alias{estimatr_tidiers} 5 | \alias{tidy.lm_robust} 6 | \alias{tidy.iv_robust} 7 | \alias{tidy.difference_in_means} 8 | \alias{tidy.horvitz_thompson} 9 | \alias{tidy.lh_robust} 10 | \alias{tidy.lh} 11 | \title{Tidy an estimatr object} 12 | \usage{ 13 | \method{tidy}{lm_robust}(x, conf.int = TRUE, conf.level = NULL, ...) 14 | 15 | \method{tidy}{iv_robust}(x, conf.int = TRUE, conf.level = NULL, ...) 16 | 17 | \method{tidy}{difference_in_means}(x, conf.int = TRUE, conf.level = NULL, ...) 18 | 19 | \method{tidy}{horvitz_thompson}(x, conf.int = TRUE, conf.level = NULL, ...) 20 | 21 | \method{tidy}{lh_robust}(x, conf.int = TRUE, conf.level = NULL, ...) 22 | 23 | \method{tidy}{lh}(x, conf.int = TRUE, conf.level = NULL, ...) 24 | } 25 | \arguments{ 26 | \item{x}{An object returned by one of the estimators} 27 | 28 | \item{conf.int}{Logical indicating whether or not to include a 29 | confidence interval in the tidied output. Defaults to ‘TRUE’.} 30 | 31 | \item{conf.level}{The confidence level to use for the confidence 32 | interval if ‘conf.int = TRUE’. Must be strictly greater than 0 and less 33 | than 1. Defaults to 0.95, which corresponds to a 95 percent confidence 34 | interval.} 35 | 36 | \item{...}{extra arguments (not used)} 37 | } 38 | \value{ 39 | A data.frame with columns for coefficient names, estimates, standard 40 | errors, confidence intervals, p-values, degrees of freedom, and the 41 | name of the outcome variable 42 | } 43 | \description{ 44 | Tidy an estimatr object 45 | } 46 | \seealso{ 47 | \code{\link[generics:tidy]{generics::tidy()}}, \code{\link[=lm_robust]{lm_robust()}}, \code{\link[=iv_robust]{iv_robust()}}, \code{\link[=difference_in_means]{difference_in_means()}}, \code{\link[=horvitz_thompson]{horvitz_thompson()}} 48 | } 49 | \concept{estimatr tidiers} 50 | -------------------------------------------------------------------------------- /man/extract.lm_robust.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/helper_extract.R 3 | \name{extract.robust_default} 4 | \alias{extract.robust_default} 5 | \alias{extract.lm_robust} 6 | \alias{extract.iv_robust} 7 | \title{Extract model data for \pkg{texreg} package} 8 | \usage{ 9 | extract.robust_default( 10 | model, 11 | include.ci = TRUE, 12 | include.rsquared = TRUE, 13 | include.adjrs = TRUE, 14 | include.nobs = TRUE, 15 | include.fstatistic = FALSE, 16 | include.rmse = TRUE, 17 | include.nclusts = TRUE, 18 | ... 19 | ) 20 | 21 | extract.lm_robust( 22 | model, 23 | include.ci = TRUE, 24 | include.rsquared = TRUE, 25 | include.adjrs = TRUE, 26 | include.nobs = TRUE, 27 | include.fstatistic = FALSE, 28 | include.rmse = TRUE, 29 | include.nclusts = TRUE, 30 | ... 31 | ) 32 | 33 | extract.iv_robust( 34 | model, 35 | include.ci = TRUE, 36 | include.rsquared = TRUE, 37 | include.adjrs = TRUE, 38 | include.nobs = TRUE, 39 | include.fstatistic = FALSE, 40 | include.rmse = TRUE, 41 | include.nclusts = TRUE, 42 | ... 43 | ) 44 | } 45 | \arguments{ 46 | \item{model}{an object of class \code{\link{lm_robust}} or \code{"iv_robust"}} 47 | 48 | \item{include.ci}{logical. Defaults to TRUE} 49 | 50 | \item{include.rsquared}{logical. Defaults to TRUE} 51 | 52 | \item{include.adjrs}{logical. Defaults to TRUE} 53 | 54 | \item{include.nobs}{logical. Defaults to TRUE} 55 | 56 | \item{include.fstatistic}{logical. Defaults to TRUE} 57 | 58 | \item{include.rmse}{logical. Defaults to TRUE} 59 | 60 | \item{include.nclusts}{logical. Defaults to TRUE if clusters in \code{model}} 61 | 62 | \item{...}{unused} 63 | } 64 | \description{ 65 | Prepares a \code{"lm_robust"} or \code{"iv_robust"} object for the \pkg{texreg} 66 | package. This is largely a clone of the \code{extract.lm} 67 | method. 68 | } 69 | -------------------------------------------------------------------------------- /man/gen_pr_matrix_cluster.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/helper_condition_pr_matrix.R 3 | \name{gen_pr_matrix_cluster} 4 | \alias{gen_pr_matrix_cluster} 5 | \title{Generate condition probability matrix given clusters and probabilities} 6 | \usage{ 7 | gen_pr_matrix_cluster(clusters, treat_probs, simple) 8 | } 9 | \arguments{ 10 | \item{clusters}{A vector of clusters} 11 | 12 | \item{treat_probs}{A vector of treatment (condition 2) probabilities} 13 | 14 | \item{simple}{A boolean for whether the assignment is a random sample 15 | assignment (TRUE, default) or complete random assignment (FALSE)} 16 | } 17 | \value{ 18 | a numeric 2n*2n matrix of marginal and joint condition treatment 19 | probabilities to be passed to the \code{condition_pr_mat} argument of 20 | \code{\link{horvitz_thompson}}. 21 | } 22 | \description{ 23 | Generate condition probability matrix given clusters and probabilities 24 | } 25 | \seealso{ 26 | \code{\link{declaration_to_condition_pr_mat}} 27 | } 28 | -------------------------------------------------------------------------------- /man/lh_robust.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/estimatr_lh_robust.R 3 | \name{lh_robust} 4 | \alias{lh_robust} 5 | \title{Linear Hypothesis for Ordinary Least Squares with Robust Standard Errors} 6 | \usage{ 7 | lh_robust(..., data, linear_hypothesis) 8 | } 9 | \arguments{ 10 | \item{...}{Other arguments to be passed to \code{\link{lm_robust}}} 11 | 12 | \item{data}{A \code{data.frame}} 13 | 14 | \item{linear_hypothesis}{A character string or a matrix specifying combination, to be passed to the hypothesis.matrix argument of car::linearHypothesis 15 | See \code{\link[car]{linearHypothesis}} for more details.} 16 | } 17 | \value{ 18 | An object of class \code{"lh_robust"} containing the two following components: 19 | 20 | \item{lm_robust}{an object as returned by \code{lm_robust}.} 21 | \item{lh}{A data frame with most of its columns pulled from \code{linearHypothesis}' output.} 22 | 23 | The only analyis directly performed by \code{lh_robust} is a \code{t-test} for the null hypothesis of no effects of the linear combination of coefficients as specified by the user. 24 | All other output components are either extracted from \code{linearHypothesis} or \code{lm_robust}. 25 | 26 | The original output returned by \code{linearHypothesis} is added as an attribute under the \code{"linear_hypothesis"} attribute. 27 | } 28 | \description{ 29 | This function fits a linear model with robust standard errors and performs linear hypothesis test. 30 | } 31 | \details{ 32 | This function is a wrapper for \code{\link{lm_robust}} and for 33 | \code{\link[car]{linearHypothesis}}. It first runs \code{lm_robust} and 34 | next passes \code{"lm_robust"} object as an argument to \code{linearHypothesis}. 35 | } 36 | \examples{ 37 | 38 | library(fabricatr) 39 | dat <- fabricate( 40 | N = 40, 41 | y = rpois(N, lambda = 4), 42 | x = rnorm(N), 43 | z = rbinom(N, 1, prob = 0.4), 44 | clusterID = sample(1:4, 40, replace = TRUE) 45 | ) 46 | 47 | # Default variance estimator is HC2 robust standard errors 48 | lhro <- lh_robust(y ~ x + z, data = dat, linear_hypothesis = "z + 2x = 0") 49 | 50 | # The linear hypothesis argument can be specified equivalently as: 51 | lh_robust(y ~ x + z, data = dat, linear_hypothesis = "z = 2x") 52 | lh_robust(y ~ x + z, data = dat, linear_hypothesis = "2*x +1*z") 53 | lh_robust(y ~ x + z, data = dat, linear_hypothesis = "z + 2x = 0") 54 | 55 | # Also recovers other sorts of standard erorrs just as specified in \code{\link{lm_robust}} 56 | lh_robust(y ~ x + z, data = dat, linear_hypothesis = "z + 2x = 0", se_type = "classical") 57 | lh_robust(y ~ x + z, data = dat, linear_hypothesis = "z + 2x = 0", se_type = "HC1") 58 | 59 | # Can tidy() main output and subcomponents in to a data.frame 60 | lhro <- lh_robust(y ~ x + z, data = dat, linear_hypothesis = "z + 2x = 0") 61 | tidy(lhro ) 62 | tidy(lhro$lm_robust) 63 | tidy(lhro$lh) 64 | 65 | # Can use summary() to get more statistics on the main output and subcomponents. 66 | summary(lhro) 67 | summary(lhro$lm_robust) 68 | summary(lhro$lh) 69 | 70 | } 71 | -------------------------------------------------------------------------------- /man/lm_robust_fit.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/helper_lm_robust_fit.R 3 | \name{lm_robust_fit} 4 | \alias{lm_robust_fit} 5 | \title{Internal method that creates linear fits} 6 | \usage{ 7 | lm_robust_fit( 8 | y, 9 | X, 10 | yoriginal = NULL, 11 | Xoriginal = NULL, 12 | weights, 13 | cluster, 14 | fixed_effects = NULL, 15 | ci = TRUE, 16 | se_type, 17 | has_int, 18 | alpha = 0.05, 19 | return_vcov = TRUE, 20 | return_fit = TRUE, 21 | try_cholesky = FALSE, 22 | iv_stage = list(0) 23 | ) 24 | } 25 | \arguments{ 26 | \item{y}{numeric outcome vector} 27 | 28 | \item{X}{numeric design matrix} 29 | 30 | \item{yoriginal}{numeric outcome vector, unprojected if there are fixed effects} 31 | 32 | \item{Xoriginal}{numeric design matrix, unprojected if there are fixed effects. Any column named \code{"(Intercept)" will be dropped}} 33 | 34 | \item{weights}{numeric weights vector} 35 | 36 | \item{cluster}{numeric cluster vector} 37 | 38 | \item{fixed_effects}{character matrix of fixed effect groups} 39 | 40 | \item{ci}{boolean that when T returns confidence intervals and p-values} 41 | 42 | \item{se_type}{character denoting which kind of SEs to return} 43 | 44 | \item{has_int}{logical, whether the model has an intercept, used for \eqn{R^2}} 45 | 46 | \item{alpha}{numeric denoting the test size for confidence intervals} 47 | 48 | \item{return_vcov}{logical, whether to return the vcov matrix for later usage} 49 | 50 | \item{return_fit}{logical, whether to return fitted values} 51 | 52 | \item{try_cholesky}{logical, whether to try using a cholesky decomposition to solve LS instead of a QR decomposition} 53 | 54 | \item{iv_stage}{list of length two, the first element denotes the stage of 2SLS IV estimation, where 0 is used for OLS. The second element is only used for the second stage of 2SLS and has the first stage design matrix. For OLS, the default, \code{list(0)}, for the first stage of 2SLS \code{list(1)}, for second stage of 2SLS \code{list(2, first_stage_design_mat)}.} 55 | } 56 | \description{ 57 | Internal method that creates linear fits 58 | } 59 | -------------------------------------------------------------------------------- /man/na.omit_detailed.data.frame.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/helper_na_omit_detailed.R 3 | \name{na.omit_detailed.data.frame} 4 | \alias{na.omit_detailed.data.frame} 5 | \title{Extra logging on na.omit handler} 6 | \usage{ 7 | na.omit_detailed.data.frame(object) 8 | } 9 | \arguments{ 10 | \item{object}{a data.frame} 11 | } 12 | \value{ 13 | a normal \code{omit} object, with the extra attribute \code{why_omit}, 14 | which contains the leftmost column containing an NA for each row that was dropped, by 15 | column name, if any were dropped. 16 | } 17 | \description{ 18 | Extra logging on na.omit handler 19 | } 20 | \seealso{ 21 | \code{\link{na.omit}} 22 | } 23 | -------------------------------------------------------------------------------- /man/permutations_to_condition_pr_mat.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/helper_condition_pr_matrix.R 3 | \name{permutations_to_condition_pr_mat} 4 | \alias{permutations_to_condition_pr_mat} 5 | \title{Builds condition probability matrices for Horvitz-Thompson estimation from 6 | permutation matrix} 7 | \usage{ 8 | permutations_to_condition_pr_mat(permutations) 9 | } 10 | \arguments{ 11 | \item{permutations}{A matrix where the rows are units and the columns are 12 | different treatment permutations; treated units must be represented with a 13 | 1 and control units with a 0} 14 | } 15 | \value{ 16 | a numeric 2n*2n matrix of marginal and joint condition treatment 17 | probabilities to be passed to the \code{condition_pr_mat} argument of 18 | \code{\link{horvitz_thompson}}. 19 | } 20 | \description{ 21 | Builds condition probability matrices for Horvitz-Thompson estimation from 22 | permutation matrix 23 | } 24 | \details{ 25 | This function takes a matrix of permutations, for example from 26 | the \code{\link[randomizr]{obtain_permutation_matrix}} function in 27 | \pkg{randomizr} or through simulation and returns a 2n*2n matrix that can 28 | be used to fully specify the design for \code{\link{horvitz_thompson}} 29 | estimation. You can read more about these matrices in the documentation for 30 | the \code{\link{declaration_to_condition_pr_mat}} function. 31 | 32 | This is done by passing this matrix to the \code{condition_pr_mat} argument 33 | of 34 | } 35 | \examples{ 36 | 37 | # Complete randomization 38 | perms <- replicate(1000, sample(rep(0:1, each = 50))) 39 | comp_pr_mat <- permutations_to_condition_pr_mat(perms) 40 | 41 | # Arbitrary randomization 42 | possible_treats <- cbind( 43 | c(1, 1, 0, 1, 0, 0, 0, 1, 1, 0), 44 | c(0, 1, 1, 0, 1, 1, 0, 1, 0, 1), 45 | c(1, 0, 1, 1, 1, 1, 1, 0, 0, 0) 46 | ) 47 | arb_pr_mat <- permutations_to_condition_pr_mat(possible_treats) 48 | # Simulating a column to be realized treatment 49 | z <- possible_treats[, sample(ncol(possible_treats), size = 1)] 50 | y <- rnorm(nrow(possible_treats)) 51 | horvitz_thompson(y ~ z, condition_pr_mat = arb_pr_mat) 52 | 53 | } 54 | \seealso{ 55 | \code{\link[randomizr]{declare_ra}}, 56 | \code{\link{declaration_to_condition_pr_mat}} 57 | } 58 | -------------------------------------------------------------------------------- /man/predict.lm_robust.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/S3_predict.R 3 | \name{predict.lm_robust} 4 | \alias{predict.lm_robust} 5 | \title{Predict method for \code{lm_robust} object} 6 | \usage{ 7 | \method{predict}{lm_robust}( 8 | object, 9 | newdata, 10 | se.fit = FALSE, 11 | interval = c("none", "confidence", "prediction"), 12 | alpha = 0.05, 13 | na.action = na.pass, 14 | pred.var = NULL, 15 | weights, 16 | ... 17 | ) 18 | } 19 | \arguments{ 20 | \item{object}{an object of class 'lm_robust'} 21 | 22 | \item{newdata}{a data frame in which to look for variables with which to predict} 23 | 24 | \item{se.fit}{logical. Whether standard errors are required, default = FALSE} 25 | 26 | \item{interval}{type of interval calculation. Can be abbreviated, default = none} 27 | 28 | \item{alpha}{numeric denoting the test size for confidence intervals} 29 | 30 | \item{na.action}{function determining what should be done with missing 31 | values in newdata. The default is to predict NA.} 32 | 33 | \item{pred.var}{the variance(s) for future observations to be assumed for 34 | prediction intervals.} 35 | 36 | \item{weights}{variance weights for prediction. This can be a numeric 37 | vector or a bare (unquoted) name of the weights variable in the supplied 38 | newdata.} 39 | 40 | \item{...}{other arguments, unused} 41 | } 42 | \description{ 43 | Predict method for \code{lm_robust} object 44 | } 45 | \details{ 46 | Produces predicted values, obtained by evaluating the regression 47 | function in the frame \code{newdata} for fits from \code{lm_robust} and 48 | \code{lm_lin}. If the logical se.fit is TRUE, standard errors of the 49 | predictions are calculated. Setting intervals specifies computation of 50 | confidence or prediction (tolerance) intervals at the specified level, 51 | sometimes referred to as narrow vs. wide intervals. 52 | 53 | The equation used for the standard error of a prediction given a row of 54 | data \eqn{x} is: 55 | 56 | \eqn{\sqrt(x \Sigma x')}, 57 | 58 | where \eqn{\Sigma} is the estimated variance-covariance matrix from 59 | \code{lm_robust}. 60 | 61 | The prediction intervals are for a single observation at each case in 62 | \code{newdata} with error variance(s) \code{pred.var}. The the default is to assume 63 | that future observations have the same error variance as those used for 64 | fitting, which is gotten from the fit \code{\link{lm_robust}} object. If 65 | weights is supplied, the inverse of this is used as a scale factor. If the 66 | fit was weighted, the default is to assume constant prediction variance, 67 | with a warning. 68 | } 69 | \examples{ 70 | 71 | # Set seed 72 | set.seed(42) 73 | 74 | # Simulate data 75 | n <- 10 76 | dat <- data.frame(y = rnorm(n), x = rnorm(n)) 77 | 78 | # Fit lm 79 | lm_out <- lm_robust(y ~ x, data = dat) 80 | # Get predicted fits 81 | fits <- predict(lm_out, newdata = dat) 82 | # With standard errors and confidence intervals 83 | fits <- predict(lm_out, newdata = dat, se.fit = TRUE, interval = "confidence") 84 | 85 | # Use new data as well 86 | new_dat <- data.frame(x = runif(n, 5, 8)) 87 | predict(lm_out, newdata = new_dat) 88 | 89 | # You can also supply custom variance weights for prediction intervals 90 | new_dat$w <- runif(n) 91 | predict(lm_out, newdata = new_dat, weights = w, interval = "prediction") 92 | 93 | } 94 | -------------------------------------------------------------------------------- /man/reexports.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/S3_glance.R, R/S3_tidy.R 3 | \docType{import} 4 | \name{reexports} 5 | \alias{reexports} 6 | \alias{glance} 7 | \alias{tidy} 8 | \title{Objects exported from other packages} 9 | \keyword{internal} 10 | \description{ 11 | These objects are imported from other packages. Follow the links 12 | below to see their documentation. 13 | 14 | \describe{ 15 | \item{generics}{\code{\link[generics]{glance}}, \code{\link[generics]{tidy}}} 16 | }} 17 | 18 | -------------------------------------------------------------------------------- /man/starprep.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/helper_starprep.R 3 | \name{starprep} 4 | \alias{starprep} 5 | \title{Prepare model fits for stargazer} 6 | \usage{ 7 | starprep( 8 | ..., 9 | stat = c("std.error", "statistic", "p.value", "ci", "df"), 10 | se_type = NULL, 11 | clusters = NULL, 12 | alpha = 0.05 13 | ) 14 | } 15 | \arguments{ 16 | \item{...}{a list of lm_robust or lm objects} 17 | 18 | \item{stat}{either "std.error" (the default), "statistic" (the t-statistic), "p.value", "ci", or "df"} 19 | 20 | \item{se_type}{(optional) if any of the objects are lm objects, what standard 21 | errors should be used. Must only be one type and will be used for all lm 22 | objects passed to starprep. See \code{commarobust} for more.} 23 | 24 | \item{clusters}{(optional) if any of the objects are lm objects, what clusters 25 | should be used, if clusters should be used. Must only be one vector and will 26 | be used for all lm objects passed to starprep. See \code{commarobust} for more.} 27 | 28 | \item{alpha}{(optional) if any of the objects are lm objects, what significance level 29 | should be used for the p-values or confidence intervals} 30 | } 31 | \value{ 32 | a list of vectors of extracted statistics for stargazers 33 | } 34 | \description{ 35 | Prepare model fits for stargazer 36 | } 37 | \details{ 38 | Used to help extract statistics from lists of model fits for stargazer. 39 | Prefers lm_robust objects, but because \code{stargazer} does not work with \code{lm_robust} 40 | objects, \code{starprep} can also take \code{lm} objects and calls \code{commarobust} to get 41 | the preferred, robust statistics. 42 | } 43 | \examples{ 44 | 45 | library(stargazer) 46 | 47 | lm1 <- lm(mpg ~ hp, data = mtcars) 48 | lm2 <- lm(mpg ~ hp + wt, data = mtcars) 49 | 50 | # Use default "HC2" standard errors 51 | stargazer(lm1, lm2, 52 | se = starprep(lm1, lm2), 53 | p = starprep(lm1, lm2, stat = "p.value"), 54 | omit.stat = "f") 55 | # NB: We remove the F-stat because stargazer only can use original F-stat 56 | # which uses classical SEs 57 | 58 | # Use default "CR2" standard errors with clusters 59 | stargazer(lm1, lm2, 60 | se = starprep(lm1, lm2, clusters = mtcars$carb), 61 | p = starprep(lm1, lm2, clusters = mtcars$carb, stat = "p.value"), 62 | omit.stat = "f") 63 | 64 | # Can also specify significance levels and different standard errors 65 | stargazer(lm1, lm2, 66 | ci.custom = starprep(lm1, lm2, se_type = "HC3", alpha = 0.1, stat = "ci"), 67 | omit.stat = "f") 68 | 69 | } 70 | -------------------------------------------------------------------------------- /src/Makevars: -------------------------------------------------------------------------------- 1 | PKG_LIBS = $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) 2 | -------------------------------------------------------------------------------- /src/Makevars.win: -------------------------------------------------------------------------------- 1 | 2 | PKG_LIBS = $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) 3 | -------------------------------------------------------------------------------- /src/RcppExports.cpp: -------------------------------------------------------------------------------- 1 | // Generated by using Rcpp::compileAttributes() -> do not edit by hand 2 | // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 3 | 4 | #include 5 | #include 6 | 7 | using namespace Rcpp; 8 | 9 | #ifdef RCPP_USE_GLOBAL_ROSTREAM 10 | Rcpp::Rostream& Rcpp::Rcout = Rcpp::Rcpp_cout_get(); 11 | Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); 12 | #endif 13 | 14 | // ht_covar_partial 15 | double ht_covar_partial(const Eigen::VectorXd& y1, const Eigen::VectorXd& y0, const Eigen::MatrixXd& p10, const Eigen::VectorXd& p1, const Eigen::VectorXd& p0); 16 | RcppExport SEXP _estimatr_ht_covar_partial(SEXP y1SEXP, SEXP y0SEXP, SEXP p10SEXP, SEXP p1SEXP, SEXP p0SEXP) { 17 | BEGIN_RCPP 18 | Rcpp::RObject rcpp_result_gen; 19 | Rcpp::RNGScope rcpp_rngScope_gen; 20 | Rcpp::traits::input_parameter< const Eigen::VectorXd& >::type y1(y1SEXP); 21 | Rcpp::traits::input_parameter< const Eigen::VectorXd& >::type y0(y0SEXP); 22 | Rcpp::traits::input_parameter< const Eigen::MatrixXd& >::type p10(p10SEXP); 23 | Rcpp::traits::input_parameter< const Eigen::VectorXd& >::type p1(p1SEXP); 24 | Rcpp::traits::input_parameter< const Eigen::VectorXd& >::type p0(p0SEXP); 25 | rcpp_result_gen = Rcpp::wrap(ht_covar_partial(y1, y0, p10, p1, p0)); 26 | return rcpp_result_gen; 27 | END_RCPP 28 | } 29 | // ht_var_partial 30 | double ht_var_partial(const Eigen::VectorXd& y, const Eigen::MatrixXd& p); 31 | RcppExport SEXP _estimatr_ht_var_partial(SEXP ySEXP, SEXP pSEXP) { 32 | BEGIN_RCPP 33 | Rcpp::RObject rcpp_result_gen; 34 | Rcpp::RNGScope rcpp_rngScope_gen; 35 | Rcpp::traits::input_parameter< const Eigen::VectorXd& >::type y(ySEXP); 36 | Rcpp::traits::input_parameter< const Eigen::MatrixXd& >::type p(pSEXP); 37 | rcpp_result_gen = Rcpp::wrap(ht_var_partial(y, p)); 38 | return rcpp_result_gen; 39 | END_RCPP 40 | } 41 | // demeanMat2 42 | Eigen::ArrayXXd demeanMat2(const Eigen::MatrixXd& what, const Rcpp::IntegerMatrix& fes, const Rcpp::NumericVector& weights, const int& start_col, const double& eps); 43 | RcppExport SEXP _estimatr_demeanMat2(SEXP whatSEXP, SEXP fesSEXP, SEXP weightsSEXP, SEXP start_colSEXP, SEXP epsSEXP) { 44 | BEGIN_RCPP 45 | Rcpp::RObject rcpp_result_gen; 46 | Rcpp::RNGScope rcpp_rngScope_gen; 47 | Rcpp::traits::input_parameter< const Eigen::MatrixXd& >::type what(whatSEXP); 48 | Rcpp::traits::input_parameter< const Rcpp::IntegerMatrix& >::type fes(fesSEXP); 49 | Rcpp::traits::input_parameter< const Rcpp::NumericVector& >::type weights(weightsSEXP); 50 | Rcpp::traits::input_parameter< const int& >::type start_col(start_colSEXP); 51 | Rcpp::traits::input_parameter< const double& >::type eps(epsSEXP); 52 | rcpp_result_gen = Rcpp::wrap(demeanMat2(what, fes, weights, start_col, eps)); 53 | return rcpp_result_gen; 54 | END_RCPP 55 | } 56 | // AtA 57 | Eigen::MatrixXd AtA(const Eigen::MatrixXd& A); 58 | RcppExport SEXP _estimatr_AtA(SEXP ASEXP) { 59 | BEGIN_RCPP 60 | Rcpp::RObject rcpp_result_gen; 61 | Rcpp::RNGScope rcpp_rngScope_gen; 62 | Rcpp::traits::input_parameter< const Eigen::MatrixXd& >::type A(ASEXP); 63 | rcpp_result_gen = Rcpp::wrap(AtA(A)); 64 | return rcpp_result_gen; 65 | END_RCPP 66 | } 67 | // Kr 68 | Eigen::MatrixXd Kr(const Eigen::MatrixXd& A, const Eigen::MatrixXd& B); 69 | RcppExport SEXP _estimatr_Kr(SEXP ASEXP, SEXP BSEXP) { 70 | BEGIN_RCPP 71 | Rcpp::RObject rcpp_result_gen; 72 | Rcpp::RNGScope rcpp_rngScope_gen; 73 | Rcpp::traits::input_parameter< const Eigen::MatrixXd& >::type A(ASEXP); 74 | Rcpp::traits::input_parameter< const Eigen::MatrixXd& >::type B(BSEXP); 75 | rcpp_result_gen = Rcpp::wrap(Kr(A, B)); 76 | return rcpp_result_gen; 77 | END_RCPP 78 | } 79 | // lm_solver 80 | List lm_solver(const Eigen::Map& X, const Eigen::Map& y, const bool& try_cholesky); 81 | RcppExport SEXP _estimatr_lm_solver(SEXP XSEXP, SEXP ySEXP, SEXP try_choleskySEXP) { 82 | BEGIN_RCPP 83 | Rcpp::RObject rcpp_result_gen; 84 | Rcpp::RNGScope rcpp_rngScope_gen; 85 | Rcpp::traits::input_parameter< const Eigen::Map& >::type X(XSEXP); 86 | Rcpp::traits::input_parameter< const Eigen::Map& >::type y(ySEXP); 87 | Rcpp::traits::input_parameter< const bool& >::type try_cholesky(try_choleskySEXP); 88 | rcpp_result_gen = Rcpp::wrap(lm_solver(X, y, try_cholesky)); 89 | return rcpp_result_gen; 90 | END_RCPP 91 | } 92 | // lm_variance 93 | List lm_variance(Eigen::Map& X, const Rcpp::Nullable& Xunweighted, const Eigen::Map& XtX_inv, const Eigen::Map& ei, const double weight_mean, const Rcpp::Nullable& cluster, const int& J, const bool& ci, const String se_type, const std::vector& which_covs, const int& fe_rank); 94 | RcppExport SEXP _estimatr_lm_variance(SEXP XSEXP, SEXP XunweightedSEXP, SEXP XtX_invSEXP, SEXP eiSEXP, SEXP weight_meanSEXP, SEXP clusterSEXP, SEXP JSEXP, SEXP ciSEXP, SEXP se_typeSEXP, SEXP which_covsSEXP, SEXP fe_rankSEXP) { 95 | BEGIN_RCPP 96 | Rcpp::RObject rcpp_result_gen; 97 | Rcpp::RNGScope rcpp_rngScope_gen; 98 | Rcpp::traits::input_parameter< Eigen::Map& >::type X(XSEXP); 99 | Rcpp::traits::input_parameter< const Rcpp::Nullable& >::type Xunweighted(XunweightedSEXP); 100 | Rcpp::traits::input_parameter< const Eigen::Map& >::type XtX_inv(XtX_invSEXP); 101 | Rcpp::traits::input_parameter< const Eigen::Map& >::type ei(eiSEXP); 102 | Rcpp::traits::input_parameter< const double >::type weight_mean(weight_meanSEXP); 103 | Rcpp::traits::input_parameter< const Rcpp::Nullable& >::type cluster(clusterSEXP); 104 | Rcpp::traits::input_parameter< const int& >::type J(JSEXP); 105 | Rcpp::traits::input_parameter< const bool& >::type ci(ciSEXP); 106 | Rcpp::traits::input_parameter< const String >::type se_type(se_typeSEXP); 107 | Rcpp::traits::input_parameter< const std::vector& >::type which_covs(which_covsSEXP); 108 | Rcpp::traits::input_parameter< const int& >::type fe_rank(fe_rankSEXP); 109 | rcpp_result_gen = Rcpp::wrap(lm_variance(X, Xunweighted, XtX_inv, ei, weight_mean, cluster, J, ci, se_type, which_covs, fe_rank)); 110 | return rcpp_result_gen; 111 | END_RCPP 112 | } 113 | // naomitwhy 114 | DataFrame naomitwhy(DataFrame df, Function recursive_subset); 115 | RcppExport SEXP _estimatr_naomitwhy(SEXP dfSEXP, SEXP recursive_subsetSEXP) { 116 | BEGIN_RCPP 117 | Rcpp::RObject rcpp_result_gen; 118 | Rcpp::RNGScope rcpp_rngScope_gen; 119 | Rcpp::traits::input_parameter< DataFrame >::type df(dfSEXP); 120 | Rcpp::traits::input_parameter< Function >::type recursive_subset(recursive_subsetSEXP); 121 | rcpp_result_gen = Rcpp::wrap(naomitwhy(df, recursive_subset)); 122 | return rcpp_result_gen; 123 | END_RCPP 124 | } 125 | 126 | static const R_CallMethodDef CallEntries[] = { 127 | {"_estimatr_ht_covar_partial", (DL_FUNC) &_estimatr_ht_covar_partial, 5}, 128 | {"_estimatr_ht_var_partial", (DL_FUNC) &_estimatr_ht_var_partial, 2}, 129 | {"_estimatr_demeanMat2", (DL_FUNC) &_estimatr_demeanMat2, 5}, 130 | {"_estimatr_AtA", (DL_FUNC) &_estimatr_AtA, 1}, 131 | {"_estimatr_Kr", (DL_FUNC) &_estimatr_Kr, 2}, 132 | {"_estimatr_lm_solver", (DL_FUNC) &_estimatr_lm_solver, 3}, 133 | {"_estimatr_lm_variance", (DL_FUNC) &_estimatr_lm_variance, 11}, 134 | {"_estimatr_naomitwhy", (DL_FUNC) &_estimatr_naomitwhy, 2}, 135 | {NULL, NULL, 0} 136 | }; 137 | 138 | RcppExport void R_init_estimatr(DllInfo *dll) { 139 | R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); 140 | R_useDynamicSymbols(dll, FALSE); 141 | } 142 | -------------------------------------------------------------------------------- /src/horvitz_thompson_variance.cpp: -------------------------------------------------------------------------------- 1 | // [[Rcpp::depends(RcppEigen)]] 2 | // [[Rcpp::plugins(cpp11)]] 3 | 4 | #include 5 | using namespace Rcpp; 6 | 7 | // These functions help compute the variance for the Horvitz-Thompson estimators 8 | // TODO use symmetry and matrices to improve speed 9 | 10 | // [[Rcpp::export]] 11 | double ht_covar_partial(const Eigen::VectorXd & y1, 12 | const Eigen::VectorXd & y0, 13 | const Eigen::MatrixXd & p10, 14 | const Eigen::VectorXd & p1, 15 | const Eigen::VectorXd & p0) { 16 | double cov_total = 0.0; 17 | 18 | for (int i = 0; i < y1.size(); ++i) { 19 | for(int j = 0; j < y0.size(); ++j) { 20 | if(p10(i, j) == 0) { 21 | cov_total += y1(i) * y0(j) * (p10(i, j) - p1(i) * p0(j)); 22 | } else { 23 | cov_total += y1(i) * y0(j) * (p10(i, j) - p1(i) * p0(j)) / p10(i, j); 24 | } 25 | } 26 | } 27 | 28 | return cov_total; 29 | } 30 | 31 | // [[Rcpp::export]] 32 | double ht_var_partial(const Eigen::VectorXd & y, 33 | const Eigen::MatrixXd & p) { 34 | double var_total = 0.0; 35 | 36 | for (int i = 0; i < y.size(); ++i) { 37 | for(int j = 0; j < y.size(); ++j) { 38 | if(i != j) { 39 | if (p(i, j) == 0) { 40 | var_total += y(i) * y(j) * (p(i, j) - p(i,i) * p(j,j)) + 41 | std::pow(y(i), 2) * p(i, i) / 2.0 + std::pow(y(j), 2) * p(j, j) / 2.0; 42 | } else { 43 | var_total += y(i) * y(j) * (p(i, j) - p(i,i) * p(j,j)) / p(i, j); 44 | } 45 | 46 | } 47 | } 48 | } 49 | 50 | return var_total; 51 | } 52 | -------------------------------------------------------------------------------- /src/naomit.cpp: -------------------------------------------------------------------------------- 1 | // [[Rcpp::depends(RcppEigen)]] 2 | // [[Rcpp::plugins(cpp11)]] 3 | 4 | #include 5 | using namespace Rcpp; 6 | 7 | 8 | template 9 | Vector generic_logical_subset_impl( Vector xin, LogicalVector w){ 10 | return xin[w] ; 11 | } 12 | 13 | SEXP generic_logical_subset( SEXP xin , LogicalVector w){ 14 | RCPP_RETURN_VECTOR(generic_logical_subset_impl, xin, w) ; 15 | } 16 | 17 | // [[Rcpp::export]] 18 | DataFrame naomitwhy(DataFrame df, Function recursive_subset) { 19 | int m = df.nrow(); 20 | int n = df.ncol(); 21 | 22 | Function isna("is.na"); 23 | 24 | CharacterVector df_names = df.names(); 25 | 26 | LogicalVector omit = LogicalVector(m); 27 | 28 | int omit_count = 0; 29 | 30 | List why_omit(n); 31 | why_omit.names() = df_names; 32 | LogicalVector why_omit_idx(n); 33 | 34 | 35 | for (int j = 0; j < n; j++) { 36 | 37 | std::vector why_omit_j; 38 | 39 | LogicalVector v_isna = isna(df[j]); 40 | 41 | for(int ii = m; ii < LENGTH(v_isna); ){ 42 | for(int i = 0; i < m; i++, ii++) 43 | v_isna[i] |= v_isna[ii]; 44 | } 45 | 46 | for(int i = 0; i < m; i++){ 47 | if(v_isna[i]){ 48 | if(!omit[i]){ 49 | why_omit_j.push_back(i + 1); 50 | } 51 | omit[i] = true; 52 | }; 53 | } 54 | 55 | if(why_omit_j.size() > 0){ 56 | why_omit[j] = wrap(why_omit_j); 57 | why_omit_idx[j] = true; 58 | omit_count += why_omit_j.size(); 59 | } 60 | } 61 | 62 | if(omit_count == 0){ return(df); } 63 | 64 | IntegerVector omit_idx = IntegerVector(omit_count); 65 | for(int i = 0, ii=0; ii < omit_count; i++){ 66 | if(omit[i]) omit_idx[ii++] = i+1; 67 | } 68 | 69 | CharacterVector rownames = df.attr("row.names"); 70 | omit_idx.attr("names") = rownames[omit]; 71 | 72 | omit_idx.attr("why_omit") = why_omit[why_omit_idx]; 73 | omit_idx.attr("class") = CharacterVector::create("omit", "detailed"); 74 | 75 | omit = !omit; 76 | 77 | List out(n); 78 | 79 | for(int i = 0; i < n; i++){ 80 | SEXP dfi = df(i); 81 | if(LENGTH(dfi) == m){ 82 | out[i] = generic_logical_subset(dfi, omit); 83 | } else { 84 | out[i] = recursive_subset(dfi, omit); 85 | } 86 | } 87 | 88 | out.names() = df_names; 89 | out.attr("row.names") = rownames[omit]; 90 | out.attr("na.action") = omit_idx; 91 | out.attr("class") = df.attr("class"); 92 | 93 | return(out); 94 | } 95 | 96 | 97 | // require(microbenchmark) 98 | // df <- expand.grid(x=c(1:100, NA), y=c(1:5, NA), z=c(1:8, NA), q=c(NA,2:5)) 99 | // df2 <- na.omit(df) 100 | // microbenchmark(stock=na.omit(df), ours=estimatr:::na.omit_detailed.data.frame(df), unit="ms") 101 | // microbenchmark(stock=na.omit(df2), ours=estimatr:::na.omit_detailed.data.frame(df2), unit="ms") 102 | 103 | // df <- rbind(df, df2, df) 104 | // df2 <- rbind(df2, df2, df2) 105 | // microbenchmark(stock=na.omit(df), ours=estimatr:::na.omit_detailed.data.frame(df), unit="ms") 106 | // microbenchmark(stock=na.omit(df2), ours=estimatr:::na.omit_detailed.data.frame(df2), unit="ms") 107 | 108 | // df <- cbind(df, df,df) 109 | // df2 <- cbind(df2, df2, df2) 110 | // microbenchmark(stock=na.omit(df), ours=estimatr:::na.omit_detailed.data.frame(df), unit="ms") 111 | // microbenchmark(stock=na.omit(df2), ours=estimatr:::na.omit_detailed.data.frame(df2), unit="ms") 112 | 113 | // sleep[c("sleep", "foo")] = list(sleep, matrix(1:40, 20)) 114 | // sleep[cbind(c(1,5,9), c(2,1,3))] <- NA 115 | // sleep$sleep[cbind(1+c(1,5,9), c(2,1,3))] <- NA 116 | // sleep$foo[12,1] <- NA 117 | // attributes(estimatr:::na.omit_detailed.data.frame(sleep)) 118 | -------------------------------------------------------------------------------- /tests/sleep.R: -------------------------------------------------------------------------------- 1 | library(estimatr) 2 | lm_robust(extra~group, sleep) 3 | -------------------------------------------------------------------------------- /tests/sleep.Rout.save: -------------------------------------------------------------------------------- 1 | 2 | R version 3.5.0 (2018-04-23) -- "Joy in Playing" 3 | Copyright (C) 2018 The R Foundation for Statistical Computing 4 | Platform: x86_64-apple-darwin15.6.0 (64-bit) 5 | 6 | R is free software and comes with ABSOLUTELY NO WARRANTY. 7 | You are welcome to redistribute it under certain conditions. 8 | Type 'license()' or 'licence()' for distribution details. 9 | 10 | R is a collaborative project with many contributors. 11 | Type 'contributors()' for more information and 12 | 'citation()' on how to cite R or R packages in publications. 13 | 14 | Type 'demo()' for some demos, 'help()' for on-line help, or 15 | 'help.start()' for an HTML browser interface to help. 16 | Type 'q()' to quit R. 17 | 18 | > library(estimatr) 19 | > lm_robust(extra~group, sleep) 20 | Estimate Std. Error t value Pr(>|t|) CI Lower CI Upper DF 21 | (Intercept) 0.75 0.5657345 1.325710 0.20151554 -0.4385641 1.938564 18 22 | group2 1.58 0.8490910 1.860813 0.07918671 -0.2038740 3.363874 18 23 | > 24 | > proc.time() 25 | user system elapsed 26 | 0.782 0.085 0.851 27 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(estimatr) 3 | 4 | test_check("estimatr") 5 | -------------------------------------------------------------------------------- /tests/testthat/helper-lm-robust-se.R: -------------------------------------------------------------------------------- 1 | ## BMlmSE.R implements Bell-McCaffrey standard errors 2 | ## This code is taken from Michal Kolesar 3 | ## https://github.com/kolesarm/Robust-Small-Sample-Standard-Errors 4 | ## Only changed the name of one function, df 5 | 6 | #' Compute the inverse square root of a symmetric matrix 7 | #' @param A matrix 8 | MatSqrtInverse <- function(A) { 9 | ei <- eigen(A, symmetric = TRUE) 10 | 11 | if (min(ei$values) <= 0) { 12 | warning("Gram matrix doesn't appear to be positive definite") 13 | } 14 | 15 | d <- pmax(ei$values, 0) 16 | d2 <- 1 / sqrt(d) 17 | d2[d == 0] <- 0 18 | ## diag(d2) is d2 x d2 identity if d2 is scalar, instead we want 1x1 matrix 19 | ei$vectors %*% (if (length(d2) == 1) d2 else diag(d2)) %*% t(ei$vectors) 20 | } 21 | 22 | #' Compute Bell-McCaffrey Standard Errors 23 | #' @param model Fitted model returned by the \code{lm} function 24 | #' @param clustervar Factor variable that defines clusters. If \code{NULL} (or 25 | #' not supplied), the command computes heteroscedasticity-robust standard 26 | #' errors, rather than cluster-robust standard errors. 27 | #' @param ell A vector of the same length as the dimension of covariates, 28 | #' specifying which linear combination \eqn{\ell'\beta} of coefficients 29 | #' \eqn{\beta} to compute. If \code{NULL}, compute standard errors for each 30 | #' regressor coefficient 31 | #' @param IK Logical flag only relevant if cluster-robust standard errors are 32 | #' being computed. Specifies whether to compute the degrees-of-freedom 33 | #' adjustment using the Imbens-Kolesár method (if \code{TRUE}), or the 34 | #' Bell-McCaffrey method (if \code{FALSE}) 35 | #' @return Returns a list with the following components \describe{ 36 | #' 37 | #' \item{vcov}{Variance-covariance matrix estimator. For the case without 38 | #' clustering, it corresponds to the HC2 estimator (see MacKinnon and White, 39 | #' 1985 and the reference manual for the \code{sandwich} package). For the case 40 | #' with clustering, it corresponds to a generalization of the HC2 estimator, 41 | #' called LZ2 in Imbens and Kolesár.} 42 | #' 43 | #' \item{dof}{Degrees-of-freedom adjustment} 44 | #' 45 | #' \item{se}{Standard error} 46 | #' 47 | #' \item{adj.se}{Adjusted standard errors. For \beta_j, they are defined as 48 | #' \code{adj.se[j]=sqrt(vcov[j,j]se*qt(0.975,df=dof)} so that the Bell-McCaffrey 49 | #' confidence intervals are given as \code{coefficients(fm)[j] +- 1.96* adj.se=} 50 | #' 51 | #' \item{se.Stata}{Square root of the cluster-robust variance estimator used in 52 | #' STATA} 53 | #' 54 | #' } 55 | #' @examples 56 | #' ## No clustering: 57 | #' set.seed(42) 58 | #' x <- sin(1:10) 59 | #' y <- rnorm(10) 60 | #' fm <- lm(y~x) 61 | #' BMlmSE(fm) 62 | #' ## Clustering, defining the first six observations to be in cluster 1, the 63 | #' #next two in cluster 2, and the last three in cluster three. 64 | #' clustervar <- as.factor(c(rep(1, 6), rep(2, 2), rep(3, 2))) 65 | #' BMlmSE(fm, clustervar) 66 | BMlmSE <- function(model, clustervar=NULL, ell=NULL, IK=TRUE) { 67 | X <- model.matrix(model) 68 | sum.model <- summary.lm(model) 69 | n <- sum(sum.model$df[1:2]) 70 | K <- model$rank 71 | XXinv <- sum.model$cov.unscaled # XX^{-1} 72 | u <- residuals(model) 73 | 74 | ## Compute DoF given G'*Omega*G without calling eigen as suggested by 75 | ## Winston Lin 76 | DoF <- function(GG) 77 | sum(diag(GG)) ^ 2 / sum(GG * GG) 78 | ## Previously: 79 | ## lam <- eigen(GG, only.values=TRUE)$values 80 | ## sum(lam)^2/sum(lam^2) 81 | 82 | ## no clustering 83 | if (is.null(clustervar)) { 84 | Vhat <- sandwich::vcovHC(model, type = "HC2") 85 | Vhat.Stata <- Vhat * NA 86 | 87 | M <- diag(n) - X %*% XXinv %*% t(X) # annihilator matrix 88 | ## G'*Omega*G 89 | GOG <- function(ell) { 90 | Xtilde <- drop(X %*% XXinv %*% ell / sqrt(diag(M))) 91 | crossprod(M * Xtilde) 92 | } 93 | } else { 94 | if (!is.factor(clustervar)) stop("'clustervar' must be a factor") 95 | 96 | ## Stata 97 | S <- length(levels(clustervar)) # number clusters 98 | uj <- apply(u * X, 2, function(x) tapply(x, clustervar, sum)) 99 | Vhat.Stata <- S / (S - 1) * (n - 1) / (n - K) * 100 | sandwich::sandwich(model, meat = crossprod(uj) / n) 101 | 102 | ## HC2 103 | tXs <- function(s) { 104 | Xs <- X[clustervar == s, , drop = FALSE] 105 | MatSqrtInverse(diag(NROW(Xs)) - Xs %*% XXinv %*% t(Xs)) %*% Xs 106 | } 107 | tX <- lapply(levels(clustervar), tXs) # list of matrices 108 | 109 | tu <- split(u, clustervar) 110 | tutX <- sapply(seq_along(tu), function(i) crossprod(tu[[i]], tX[[i]])) 111 | Vhat <- sandwich::sandwich(model, meat = tcrossprod(tutX) / n) 112 | 113 | ## DOF adjustment 114 | tHs <- function(s) { 115 | Xs <- X[clustervar == s, , drop = FALSE] 116 | index <- which(clustervar == s) 117 | ss <- outer(rep(0, n), index) # n x ns matrix of 0 118 | ss[cbind(index, 1:length(index))] <- 1 119 | ss - X %*% XXinv %*% t(Xs) 120 | } 121 | tH <- lapply(levels(clustervar), tHs) # list of matrices 122 | 123 | Moulton <- function() { 124 | ## Moulton estimates 125 | ns <- tapply(u, clustervar, length) 126 | ssr <- sum(u ^ 2) 127 | rho <- max((sum(sapply(seq_along(tu), function(i) 128 | sum(tu[[i]] %o% tu[[i]]))) - ssr) / (sum(ns ^ 2) - n), 0) 129 | c(sig.eps = max(ssr / n - rho, 0), rho = rho) 130 | } 131 | 132 | GOG <- function(ell) { 133 | G <- sapply( 134 | seq_along(tX), 135 | function(i) tH[[i]] %*% tX[[i]] %*% XXinv %*% ell 136 | ) 137 | GG <- crossprod(G) 138 | 139 | ## IK method 140 | if (IK == TRUE) { 141 | Gsums <- apply( 142 | G, 2, 143 | function(x) tapply(x, clustervar, sum) 144 | ) # Z'*G 145 | GG <- Moulton()[1] * GG + Moulton()[2] * crossprod(Gsums) 146 | } 147 | GG 148 | } 149 | } 150 | 151 | if (!is.null(ell)) { 152 | se <- drop(sqrt(crossprod(ell, Vhat) %*% ell)) 153 | dof <- DoF(GOG(ell)) 154 | se.Stata <- drop(sqrt(crossprod(ell, Vhat.Stata) %*% ell)) 155 | } else { 156 | se <- sqrt(diag(Vhat)) 157 | dof <- sapply(seq(K), function(k) DoF(GOG(diag(K)[, k]))) 158 | se.Stata <- sqrt(diag(Vhat.Stata)) 159 | } 160 | names(dof) <- names(se) 161 | 162 | list( 163 | vcov = Vhat, dof = dof, adj.se = se * qt(0.975, df = dof) / qnorm(0.975), 164 | se = se, se.Stata = se.Stata 165 | ) 166 | } 167 | 168 | -------------------------------------------------------------------------------- /tests/testthat/helper-return-cleaners.R: -------------------------------------------------------------------------------- 1 | # This fn removes calls from function returns to make testing easier 2 | rmcall <- function(obj) { 3 | if (!is.null(obj[["call"]])) { 4 | obj[["call"]] <- NULL 5 | } 6 | return(obj) 7 | } 8 | 9 | # Casts conditions as character objects for equality purposes 10 | condchr <- function(obj) { 11 | obj[["condition2"]] <- as.character(obj[["condition2"]]) 12 | obj[["condition1"]] <- as.character(obj[["condition1"]]) 13 | 14 | obj 15 | } 16 | -------------------------------------------------------------------------------- /tests/testthat/helper-se-types.R: -------------------------------------------------------------------------------- 1 | # se_types for various files 2 | 3 | se_types <- c("classical", "HC0", "HC1", "HC2", "HC3") 4 | cr_se_types <- c("CR0", "stata", "CR2") 5 | -------------------------------------------------------------------------------- /tests/testthat/mtcars.csv: -------------------------------------------------------------------------------- 1 | "mpg","cyl","disp","hp","drat","wt","qsec","vs","am","gear","carb" 2 | 21,6,160,110,3.9,2.62,16.46,0,1,4,4 3 | 21,6,160,110,3.9,2.875,17.02,0,1,4,4 4 | 22.8,4,108,93,3.85,2.32,18.61,1,1,4,1 5 | 21.4,6,258,110,3.08,3.215,19.44,1,0,3,1 6 | 18.7,8,360,175,3.15,3.44,17.02,0,0,3,2 7 | 18.1,6,225,105,2.76,3.46,20.22,1,0,3,1 8 | 14.3,8,360,245,3.21,3.57,15.84,0,0,3,4 9 | 24.4,4,146.7,62,3.69,3.19,20,1,0,4,2 10 | 22.8,4,140.8,95,3.92,3.15,22.9,1,0,4,2 11 | 19.2,6,167.6,123,3.92,3.44,18.3,1,0,4,4 12 | 17.8,6,167.6,123,3.92,3.44,18.9,1,0,4,4 13 | 16.4,8,275.8,180,3.07,4.07,17.4,0,0,3,3 14 | 17.3,8,275.8,180,3.07,3.73,17.6,0,0,3,3 15 | 15.2,8,275.8,180,3.07,3.78,18,0,0,3,3 16 | 10.4,8,472,205,2.93,5.25,17.98,0,0,3,4 17 | 10.4,8,460,215,3,5.424,17.82,0,0,3,4 18 | 14.7,8,440,230,3.23,5.345,17.42,0,0,3,4 19 | 32.4,4,78.7,66,4.08,2.2,19.47,1,1,4,1 20 | 30.4,4,75.7,52,4.93,1.615,18.52,1,1,4,2 21 | 33.9,4,71.1,65,4.22,1.835,19.9,1,1,4,1 22 | 21.5,4,120.1,97,3.7,2.465,20.01,1,0,3,1 23 | 15.5,8,318,150,2.76,3.52,16.87,0,0,3,2 24 | 15.2,8,304,150,3.15,3.435,17.3,0,0,3,2 25 | 13.3,8,350,245,3.73,3.84,15.41,0,0,3,4 26 | 19.2,8,400,175,3.08,3.845,17.05,0,0,3,2 27 | 27.3,4,79,66,4.08,1.935,18.9,1,1,4,1 28 | 26,4,120.3,91,4.43,2.14,16.7,0,1,5,2 29 | 30.4,4,95.1,113,3.77,1.513,16.9,1,1,5,2 30 | 15.8,8,351,264,4.22,3.17,14.5,0,1,5,4 31 | 19.7,6,145,175,3.62,2.77,15.5,0,1,5,6 32 | 15,8,301,335,3.54,3.57,14.6,0,1,5,8 33 | 21.4,4,121,109,4.11,2.78,18.6,1,1,4,2 34 | -------------------------------------------------------------------------------- /tests/testthat/run-stata-areg-models.do: -------------------------------------------------------------------------------- 1 | // This file fits many models in stata and outputs the estimates for comparison with estimatr 2 | 3 | clear all 4 | import delimited mtcars.csv 5 | 6 | gen w = drat / 5 7 | 8 | file open outf using stata-fe-ests.txt, write r 9 | 10 | // xtset carb 11 | // xtreg mpg hp, fe 12 | 13 | areg mpg hp, absorb(carb) 14 | mat V=e(V) 15 | file write outf _n "classical" _tab (V[1,1]) _tab (e(F)) 16 | 17 | areg mpg hp, absorb(carb) vce(robust) 18 | mat V=e(V) 19 | file write outf _n "HC1" _tab (V[1,1]) _tab (e(F)) 20 | 21 | areg mpg hp, absorb(carb) vce(cluster cyl) 22 | mat V=e(V) 23 | file write outf _n "stata_cl" _tab (V[1,1]) _tab (e(F)) 24 | 25 | areg mpg hp [aweight=w], absorb(carb) 26 | predict hii, hat 27 | mat V=e(V) 28 | file write outf _n "classicalw" _tab (V[1,1]) _tab (e(F)) 29 | 30 | areg mpg hp [aweight=w], absorb(carb) vce(robust) 31 | mat V=e(V) 32 | file write outf _n "HC1w" _tab (V[1,1]) _tab (e(F)) 33 | 34 | areg mpg hp [aweight=w], absorb(carb) vce(cluster cyl) 35 | mat V=e(V) 36 | file write outf _n "stata_clw" _tab (V[1,1]) _tab (e(F)) 37 | 38 | file close outf 39 | -------------------------------------------------------------------------------- /tests/testthat/run-stata-iv-models.do: -------------------------------------------------------------------------------- 1 | // This file fits many models in stata and outputs the estimates for comparison with estimatr 2 | 3 | clear all 4 | import delimited mtcars.csv 5 | 6 | gen w = drat / 5 7 | 8 | file open outf using stata-iv-ests.txt, write r 9 | 10 | ivregress 2sls mpg (hp am = wt gear), small 11 | mat V=e(V) 12 | file write outf _n "classical" _tab (V[1,1]) _tab (V[2,2]) _tab (V[3,3]) _tab (e(F)) _tab (e(r2)) _tab (e(r2_a)) _tab (e(rmse)) 13 | 14 | ivregress 2sls mpg (hp am = wt gear), small rob 15 | mat V=e(V) 16 | file write outf _n "rob" _tab (V[1,1]) _tab (V[2,2]) _tab (V[3,3]) _tab (e(F)) _tab (e(r2)) _tab (e(r2_a)) _tab (e(rmse)) 17 | 18 | ivregress 2sls mpg (hp am = wt gear), small vce(cluster cyl) 19 | mat V=e(V) 20 | file write outf _n "cl" _tab (V[1,1]) _tab (V[2,2]) _tab (V[3,3]) _tab (e(F)) _tab (e(r2)) _tab (e(r2_a)) _tab (e(rmse)) 21 | 22 | ivregress 2sls mpg (hp am = wt gear) [aweight = w], small 23 | mat V=e(V) 24 | file write outf _n "classical_w" _tab (V[1,1]) _tab (V[2,2]) _tab (V[3,3]) _tab (e(F)) _tab (e(r2)) _tab (e(r2_a)) _tab (e(rmse)) 25 | 26 | ivregress 2sls mpg (hp am = wt gear) [aweight = w], small rob 27 | mat V=e(V) 28 | file write outf _n "rob_w" _tab (V[1,1]) _tab (V[2,2]) _tab (V[3,3]) _tab (e(F)) _tab (e(r2)) _tab (e(r2_a)) _tab (e(rmse)) 29 | 30 | ivregress 2sls mpg (hp am = wt gear) [aweight = w], small vce(cluster cyl) 31 | mat V=e(V) 32 | file write outf _n "cl_w" _tab (V[1,1]) _tab (V[2,2]) _tab (V[3,3]) _tab (e(F)) _tab (e(r2)) _tab (e(r2_a)) _tab (e(rmse)) 33 | 34 | file close outf 35 | 36 | 37 | cap file close outfdiag 38 | file open outfdiag using stata-iv-diagnostics.txt, write r 39 | 40 | #delimit ; 41 | local formulae = `" 42 | "(hp = wt)" 43 | "(hp am = wt gear)" 44 | "gear (hp = wt)" 45 | "gear (hp = wt am)" 46 | "' ; 47 | local options = `" 48 | "small" 49 | "rob" 50 | "cluster(cyl)" 51 | "small noconstant" 52 | "rob noconstant" 53 | "cluster(cyl) noconstant" 54 | "' ; 55 | local weights = `" 56 | "" 57 | "[aweight = w]" 58 | "' ; 59 | #delimit cr 60 | 61 | foreach f in `formulae' { 62 | display "`f'" 63 | foreach opt in `options' { 64 | foreach w in `weights' { 65 | ivregress 2sls mpg `f' `w', `opt' 66 | estat firststage, all 67 | mat singleresults=r(singleresults) 68 | local rows = rowsof(singleresults) 69 | forvalues i=1/`rows' { 70 | cap file write outfdiag "`f';`w';`opt';" "weak`i'" ";" (singleresults[`i',5]) ";" (singleresults[`i',6]) ";" (singleresults[`i',4]) ";" (singleresults[`i',7]) _n 71 | } 72 | estat endogenous, forceweights 73 | if strpos("`opt'", "rob") > 0 | strpos("`opt'", "cluster") > 0 { 74 | file write outfdiag "`f';`w';`opt';" "endog" ";" (r(regFdf_n)) ";" (r(regFdf_d)) ";" (r(regF)) ";" (r(p_regF)) _n 75 | cap estat overid, forceweights 76 | file write outfdiag "`f';`w';`opt';" "overid" ";" (r(df)) ";.;" (r(score)) ";" (r(p_score)) _n 77 | } 78 | else { 79 | file write outfdiag "`f';`w';`opt';" "endog" ";" (r(df)) ";" (r(wudf_r)) ";" (r(wu)) ";" (r(p_wu)) _n 80 | cap estat overid, forceweights 81 | file write outfdiag "`f';`w';`opt';" "overid" ";" (r(df)) ";.;" (r(sargan)) ";" (r(p_sargan)) _n 82 | } 83 | } 84 | 85 | } 86 | 87 | } 88 | 89 | file close outfdiag 90 | -------------------------------------------------------------------------------- /tests/testthat/run-stata-models.do: -------------------------------------------------------------------------------- 1 | // This file fits many models in stata and outputs the estimates for comparison with estimatr 2 | 3 | clear all 4 | import delimited mtcars.csv 5 | 6 | gen w = drat / 5 7 | 8 | file open outf using stata-ests.txt, write r 9 | 10 | reg mpg hp 11 | mat V=e(V) 12 | file write outf _n "classical" _tab (V[1,1]) _tab (V[2,2]) _tab (e(df_r)) _tab (e(F)) 13 | 14 | reg mpg hp, vce(robust) 15 | mat V=e(V) 16 | file write outf _n "HC1" _tab (V[1,1]) _tab (V[2,2]) _tab (e(df_r)) _tab (e(F)) 17 | 18 | reg mpg hp, vce(hc2) 19 | mat V=e(V) 20 | file write outf _n "HC2" _tab (V[1,1]) _tab (V[2,2]) _tab (e(df_r)) _tab (e(F)) 21 | 22 | reg mpg hp, vce(hc3) 23 | mat V=e(V) 24 | file write outf _n "HC3" _tab (V[1,1]) _tab (V[2,2]) _tab (e(df_r)) _tab (e(F)) 25 | 26 | reg mpg hp, vce(cluster cyl) 27 | mat V=e(V) 28 | file write outf _n "stata_cl" _tab (V[1,1]) _tab (V[2,2]) _tab (e(df_r)) _tab (e(F)) 29 | 30 | reg mpg hp [aweight=w] 31 | predict hii, hat 32 | mat V=e(V) 33 | file write outf _n "classicalw" _tab (V[1,1]) _tab (V[2,2]) _tab (e(df_r)) _tab (e(F)) 34 | 35 | reg mpg hp [aweight=w], vce(robust) 36 | mat V=e(V) 37 | file write outf _n "HC1w" _tab (V[1,1]) _tab (V[2,2]) _tab (e(df_r)) _tab (e(F)) 38 | 39 | reg mpg hp [aweight=w], vce(hc2) 40 | mat V=e(V) 41 | file write outf _n "HC2w" _tab (V[1,1]) _tab (V[2,2]) _tab (e(df_r)) _tab (e(F)) 42 | 43 | reg mpg hp [aweight=w], vce(hc3) 44 | mat V=e(V) 45 | file write outf _n "HC3w" _tab (V[1,1]) _tab (V[2,2]) _tab (e(df_r)) _tab (e(F)) 46 | 47 | reg mpg hp [aweight=w], vce(cluster cyl) 48 | mat V=e(V) 49 | file write outf _n "stata_clw" _tab (V[1,1]) _tab (V[2,2]) _tab (e(df_r)) _tab (e(F)) 50 | 51 | file close outf 52 | -------------------------------------------------------------------------------- /tests/testthat/stata-ests.txt: -------------------------------------------------------------------------------- 1 | 2 | classical .0001024 2.669698 30 45.459797 3 | HC1 .00018388 4.3123302 30 25.315348 4 | HC2 .00021652 4.8093019 30 21.499286 5 | HC3 .00027562 5.8084222 30 16.889294 6 | stata_cl .00034882 14.922475 2 13.345142 7 | classicalw .00009984 2.4918295 30 48.196413 8 | HC1w .00017632 4.0902569 30 27.29069 9 | HC2w .00020717 4.5180983 30 23.226908 10 | HC3w .00026334 5.4069309 30 18.2733 11 | stata_clw .00031337 13.155473 2 15.355597 -------------------------------------------------------------------------------- /tests/testthat/stata-fe-ests.txt: -------------------------------------------------------------------------------- 1 | 2 | classical .00024844 20.439199 3 | HC1 .00018026 28.169159 4 | stata_cl .00021845 23.24499 5 | classicalw .00023978 20.31818 6 | HC1w .00019399 25.11446 7 | stata_clw .00018777 25.946624 -------------------------------------------------------------------------------- /tests/testthat/stata-iv-ests.txt: -------------------------------------------------------------------------------- 1 | 2 | classical .0001602 2.2793852 5.2396632 44.808289 .75605596 .73923223 3.0776867 3 | rob .00015781 2.2003453 4.7873499 41.366981 .75605596 .73923223 3.0776867 4 | cl .00004096 .7950331 .31612954 108.37591 .75605596 .73923223 3.0776867 5 | classical_w .00015649 2.3424049 5.2400158 46.736059 .76415188 .74788649 3.0971208 6 | rob_w .00014704 2.0834808 4.5677404 45.410829 .76415188 .74788649 3.0971208 7 | cl_w .00006651 .64892154 .54670656 81.595912 .76415188 .74788649 3.0971208 -------------------------------------------------------------------------------- /tests/testthat/test-arg-checking.R: -------------------------------------------------------------------------------- 1 | context("Estimator - Arg checking fails as expected.") 2 | 3 | test_that("#349 Early fail when formula is a string", { 4 | expect_error( 5 | estimatr::lm_robust("mpg~hp", data = mtcars, cluster = wt), 6 | "formula" 7 | ) 8 | expect_length( 9 | estimatr::lm_robust(mpg~hp, data = mtcars, cluster = wt), 10 | 29 11 | ) 12 | }) 13 | -------------------------------------------------------------------------------- /tests/testthat/test-condition1-condition2.R: -------------------------------------------------------------------------------- 1 | context("Helper - Condition parsing for difference estimators") 2 | 3 | 4 | test_that("Condition arguments behave as expected", { 5 | n <- 40 6 | dat <- data.frame( 7 | y = rnorm(n), 8 | bl = rep(1:5, each = 8), 9 | z = 1:4, 10 | ps = runif(n) 11 | ) 12 | 13 | horvitz_thompson(y ~ z, data = dat, subset = z <= 2, condition_prs = ps) 14 | 15 | # Subsetting and just selecting two conditions 16 | expect_identical( 17 | horvitz_thompson(y ~ z, data = dat, subset = z <= 2, condition_prs = ps), 18 | horvitz_thompson(y ~ z, data = dat, condition1 = 1L, condition2 = 2L, condition_prs = ps) 19 | ) 20 | 21 | expect_identical( 22 | difference_in_means(y ~ z, data = dat, subset = z <= 2), 23 | difference_in_means(y ~ z, data = dat, condition1 = 1L, condition2 = 2L) 24 | ) 25 | 26 | expect_identical( 27 | difference_in_means(y ~ z, data = dat, subset = z <= 2, blocks = bl), 28 | difference_in_means(y ~ z, data = dat, condition1 = 1L, condition2 = 2L, blocks = bl) 29 | ) 30 | 31 | # Subsetting and just selecting two conditions 32 | expect_identical( 33 | tidy(horvitz_thompson( 34 | y ~ z, 35 | data = dat, 36 | condition1 = 3, 37 | condition2 = 4, 38 | condition_prs = rep(0.5, nrow(dat)) 39 | ))[c("estimate", "std.error")], 40 | tidy(horvitz_thompson( 41 | y ~ z, 42 | data = dat, 43 | condition1 = 4, 44 | condition2 = 3, 45 | condition_prs = rep(0.5, nrow(dat)) 46 | ))[c("estimate", "std.error")] * c(-1, 1) 47 | ) 48 | 49 | expect_identical( 50 | tidy(difference_in_means( 51 | y ~ z, 52 | data = dat, 53 | condition1 = 2, 54 | condition2 = 1 55 | ))[c("estimate", "std.error")], 56 | tidy(difference_in_means( 57 | y ~ z, 58 | data = dat, 59 | condition1 = 1, 60 | condition2 = 2 61 | ))[c("estimate", "std.error")] * c(-1, 1) 62 | ) 63 | 64 | # Errors if not specifying both 65 | expect_error( 66 | horvitz_thompson( 67 | y ~ z, 68 | data = dat, 69 | condition1 = 4, 70 | condition_prs = ps 71 | ), 72 | "condition1" 73 | ) 74 | expect_error( 75 | horvitz_thompson( 76 | y ~ z, 77 | data = dat, 78 | condition2 = 4, 79 | condition_prs = ps 80 | ), 81 | "condition1" 82 | ) 83 | expect_error( 84 | horvitz_thompson( 85 | y ~ z, 86 | data = dat, 87 | condition_prs = ps 88 | ), 89 | "condition1" 90 | ) 91 | 92 | expect_error( 93 | difference_in_means( 94 | y ~ z, 95 | data = dat, 96 | condition1 = 4 97 | ), 98 | "condition1" 99 | ) 100 | expect_error( 101 | difference_in_means( 102 | y ~ z, 103 | data = dat, 104 | condition2 = 4 105 | ), 106 | "condition1" 107 | ) 108 | expect_error( 109 | difference_in_means( 110 | y ~ z, 111 | data = dat 112 | ), 113 | "condition1" 114 | ) 115 | 116 | 117 | # Specifying only one works with binary treatment 118 | dat$z <- c("Treated", "Control") 119 | expect_identical( 120 | difference_in_means( 121 | y ~ z, 122 | data = dat, 123 | condition1 = "Treated" 124 | ), 125 | difference_in_means( 126 | y ~ z, 127 | data = dat, 128 | condition1 = "Treated", 129 | condition2 = "Control" 130 | ) 131 | ) 132 | 133 | expect_identical( 134 | horvitz_thompson( 135 | y ~ z, 136 | data = dat, 137 | condition1 = "Treated" 138 | ), 139 | horvitz_thompson( 140 | y ~ z, 141 | data = dat, 142 | condition1 = "Treated", 143 | condition2 = "Control" 144 | ) 145 | ) 146 | expect_identical( 147 | difference_in_means( 148 | y ~ z, 149 | data = dat, 150 | condition2 = "Treated" 151 | ), 152 | difference_in_means( 153 | y ~ z, 154 | data = dat, 155 | condition2 = "Treated", 156 | condition1 = "Control" 157 | ) 158 | ) 159 | 160 | expect_identical( 161 | horvitz_thompson( 162 | y ~ z, 163 | data = dat, 164 | condition2 = "Treated" 165 | ), 166 | horvitz_thompson( 167 | y ~ z, 168 | data = dat, 169 | condition2 = "Treated", 170 | condition1 = "Control" 171 | ) 172 | ) 173 | 174 | # Works with factor 175 | dat$z <- factor(c("T", "C")) 176 | # Must pass string! 177 | difference_in_means(y ~ z, condition2 = "T", data = dat) 178 | # Errors if not found 179 | expect_error( 180 | difference_in_means( 181 | y ~ z, 182 | condition2 = 1, 183 | data = dat 184 | ), 185 | "`condition1` and `condition2` must be values found in the treatment" 186 | ) 187 | 188 | dat$z <- 1 189 | expect_error( 190 | difference_in_means(y ~ z, data = dat), 191 | "Must have more than one value in treatment unless using Horvitz" 192 | ) 193 | }) 194 | -------------------------------------------------------------------------------- /tests/testthat/test-iv-robust-fes.R: -------------------------------------------------------------------------------- 1 | context("Estimator - iv_robust, fixed effects") 2 | 3 | set.seed(43) 4 | N <- 20 5 | dat <- data.frame( 6 | Y = rnorm(N), 7 | X1 = rnorm(N), 8 | X2 = rnorm(N), 9 | Z = rbinom(N, 1, .5), 10 | B = factor(rep(1:2, times = c(8, 12))), 11 | B2 = factor(rep(1:4, times = c(3, 3, 4, 10))), 12 | cl = sample(1:4, size = N, replace = T), 13 | w = runif(N) 14 | ) 15 | dat$Xdup <- dat$X 16 | dat$Bdup <- dat$B 17 | 18 | test_that("FE matches with multiple FEs and covars", { 19 | 20 | for (se_type in se_types) { 21 | ro <- iv_robust(Y ~ X1 + X2 + factor(B) + factor(B2) | Z + X2 + factor(B) + factor(B2), data = dat, se_type = se_type) 22 | rfo <- iv_robust(Y ~ X1 + X2 | Z + X2, fixed_effects = ~ B + B2, data = dat, se_type = se_type) 23 | 24 | 25 | expect_equivalent( 26 | tidy(ro)[ro$term %in% c("X1", "X2"), ], 27 | tidy(rfo)[rfo$term %in% c("X1", "X2"), ] 28 | ) 29 | 30 | expect_equivalent( 31 | ro$fitted.values, 32 | rfo$fitted.values 33 | ) 34 | 35 | expect_equal( 36 | ro[c("r.squared", "adj.r.squared")], 37 | rfo[c("r.squared", "adj.r.squared")] 38 | ) 39 | 40 | # weights 41 | ro <- iv_robust(Y ~ X1 + X2 + factor(B) + factor(B2) | Z + X2 + factor(B) + factor(B2), data = dat, weights = w, se_type = se_type) 42 | rfo <- iv_robust(Y ~ X1 + X2 | Z + X2, fixed_effects = ~ B + B2, data = dat, weights = w, se_type = se_type) 43 | 44 | expect_equivalent( 45 | tidy(ro)[ro$term %in% c("X1", "X2"), ], 46 | tidy(rfo)[rfo$term %in% c("X1", "X2"), ] 47 | ) 48 | 49 | expect_equivalent( 50 | ro$fitted.values, 51 | rfo$fitted.values 52 | ) 53 | 54 | expect_equal( 55 | ro[c("r.squared", "adj.r.squared")], 56 | rfo[c("r.squared", "adj.r.squared")] 57 | ) 58 | } 59 | 60 | for (se_type in cr_se_types) { 61 | ro <- iv_robust(Y ~ X1 + X2 + factor(B) + factor(B2) | Z + X2 + factor(B) + factor(B2), clusters = cl, data = dat, se_type = se_type) 62 | rfo <- iv_robust(Y ~ X1 + X2 | Z + X2, fixed_effects = ~ B + B2, clusters = cl, data = dat, se_type = se_type) 63 | 64 | expect_equivalent( 65 | tidy(ro)[ro$term %in% c("X1", "X2"), ], 66 | tidy(rfo)[rfo$term %in% c("X1", "X2"), ] 67 | ) 68 | 69 | expect_equivalent( 70 | ro$fitted.values, 71 | rfo$fitted.values 72 | ) 73 | 74 | expect_equal( 75 | ro[c("r.squared", "adj.r.squared")], 76 | rfo[c("r.squared", "adj.r.squared")] 77 | ) 78 | 79 | # weights 80 | if (se_type %in% c("CR2", "CR3")) { 81 | expect_error( 82 | rfo <- iv_robust(Y ~ X1 + X2 | Z + X2, fixed_effects = ~ B + B2, clusters = cl, data = dat, weights = w, se_type = se_type), 83 | "Cannot use `fixed_effects` with weighted CR2" 84 | ) 85 | } else { 86 | ro <- iv_robust(Y ~ X1 + X2 + factor(B) + factor(B2) | Z + X2 + factor(B) + factor(B2), clusters = cl, data = dat, weights = w, se_type = se_type) 87 | rfo <- iv_robust(Y ~ X1 + X2 | Z + X2, fixed_effects = ~ B + B2, clusters = cl, data = dat, weights = w, se_type = se_type) 88 | 89 | expect_equivalent( 90 | tidy(ro)[ro$term %in% c("X1", "X2"), ], 91 | tidy(rfo)[rfo$term %in% c("X1", "X2"), ] 92 | ) 93 | 94 | expect_equivalent( 95 | ro$fitted.values, 96 | rfo$fitted.values 97 | ) 98 | 99 | expect_equal( 100 | ro[c("r.squared", "adj.r.squared")], 101 | rfo[c("r.squared", "adj.r.squared")] 102 | ) 103 | } 104 | } 105 | }) 106 | 107 | test_that("IV FE warns about diagnostics", { 108 | 109 | expect_warning( 110 | iv_robust(mpg ~ hp | wt, data = mtcars, fixed_effects = cyl, diagnostics = TRUE), 111 | "Will not return `diagnostics` if `fixed_effects` are used." 112 | ) 113 | 114 | }) 115 | -------------------------------------------------------------------------------- /tests/testthat/test-lh-robust.R: -------------------------------------------------------------------------------- 1 | context("Estimator - lh_robust") 2 | set.seed(40) 3 | N <- 40 4 | dat <- data.frame( 5 | Y = rnorm(N), 6 | Y2 = rnorm(N), 7 | Z = rbinom(N, 1, .5), 8 | X = rnorm(N), 9 | B = factor(rep(1:2, times = c(8, 12))), 10 | cl = sample(1:4, size = N, replace = T), 11 | w = runif(N) 12 | ) 13 | 14 | # se tests 15 | test_that("lh_robust works with all se types", { 16 | skip_if_not_installed("car") 17 | for (se_type in se_types) { 18 | lhro <- 19 | tidy( 20 | lh_robust( 21 | mpg ~ cyl + disp, 22 | data = mtcars, 23 | linear_hypothesis = "cyl + disp = 0", 24 | se_type = se_type 25 | ) 26 | ) 27 | lmro <- 28 | lm_robust(mpg ~ cyl + disp, data = mtcars, se_type = se_type) 29 | linHyp <- 30 | car::linearHypothesis(lmro, hypothesis.matrix = "cyl + disp = 0") 31 | 32 | expect_equal(lhro$std.error[lhro$term == "cyl + disp = 0"], 33 | sqrt(as.numeric(attr(linHyp , "vcov")))) 34 | } 35 | }) 36 | 37 | test_that("lh_robust with clusters works for all se_types ", { 38 | skip_if_not_installed("car") 39 | for (se_type in cr_se_types) { 40 | lhro <- 41 | tidy( 42 | lh_robust( 43 | Y ~ Z * X, 44 | data = dat, 45 | clusters = cl, 46 | linear_hypothesis = "Z + Z:X = 0", 47 | se_type = se_type 48 | ) 49 | ) 50 | lmro <- 51 | lm_robust(Y ~ Z * X, 52 | data = dat, 53 | se_type = se_type, 54 | clusters = cl) 55 | linHyp <- 56 | car::linearHypothesis(lmro, hypothesis.matrix = "Z + Z:X = 0") 57 | expect_equal(lhro$std.error[lhro$term == "Z + Z:X = 0"], 58 | sqrt(as.numeric(attr(linHyp , "vcov")))) 59 | } 60 | }) 61 | 62 | test_that("lh_robust matches lm_robust with fixed effects", { 63 | skip_if_not_installed("car") 64 | 65 | lhro <- 66 | lh_robust( 67 | Y ~ Z * X, 68 | data = dat, 69 | fixed_effects = ~ B, 70 | linear_hypothesis = c("Z + Z:X = 0") 71 | ) 72 | lmro <- lm_robust(Y ~ Z * X, data = dat, fixed_effects = ~ B) 73 | linHyp <- 74 | car::linearHypothesis(lmro, hypothesis.matrix = "Z + Z:X = 0") 75 | tidy_lhro <- tidy(lhro) 76 | 77 | expect_equal(tidy_lhro$std.error[tidy_lhro$term == "Z + Z:X = 0"], 78 | sqrt(as.numeric(attr(linHyp , "vcov")))) 79 | 80 | }) 81 | 82 | test_that("lh_robust matches lm_robust with weights", { 83 | skip_if_not_installed("car") 84 | 85 | lhro <- 86 | lh_robust( 87 | Y ~ Z * X, 88 | data = dat, 89 | weights = w, 90 | linear_hypothesis = c("Z + Z:X = 0") 91 | ) 92 | tidy_lhro <- tidy(lhro) 93 | lmro <- lm_robust(Y ~ Z * X, data = dat, weights = w) 94 | linHyp <- 95 | car::linearHypothesis(lmro, hypothesis.matrix = "Z + Z:X = 0") 96 | 97 | 98 | expect_equal(tidy_lhro$std.error[tidy_lhro$term == "Z + Z:X = 0"], 99 | sqrt(as.numeric(attr(linHyp , "vcov")))) 100 | 101 | }) 102 | 103 | test_that("lh_robust matches lm_robust with subsetted data.frame", { 104 | skip_if_not_installed("car") 105 | 106 | lhro <- 107 | lh_robust(Y ~ Z * X, 108 | data = dat, 109 | subset = B == 1, 110 | linear_hypothesis = c("Z + Z:X = 0")) 111 | tidy_lhro <- tidy(lhro) 112 | lmro <- lm_robust(Y ~ Z * X, data = dat, subset = B == 1) 113 | linHyp <- 114 | car::linearHypothesis(lmro, hypothesis.matrix = "Z + Z:X = 0") 115 | 116 | 117 | expect_equal(tidy_lhro$std.error[tidy_lhro$term == "Z + Z:X = 0"], 118 | sqrt(as.numeric(attr(linHyp , "vcov")))) 119 | 120 | }) 121 | 122 | test_that("lh_robust matches lm_robust with subsetted data.frame", { 123 | skip_if_not_installed("car") 124 | 125 | lhro <- 126 | lh_robust(Y ~ Z * X, 127 | data = dat, 128 | subset = B == 1, 129 | linear_hypothesis = c("Z + Z:X = 0")) 130 | tidy_lhro <- tidy(lhro) 131 | lmro <- lm_robust(Y ~ Z * X, data = dat, subset = B == 1) 132 | linHyp <- 133 | car::linearHypothesis(lmro, hypothesis.matrix = "Z + Z:X = 0") 134 | 135 | 136 | expect_equal(tidy_lhro$std.error[tidy_lhro$term == "Z + Z:X = 0"], 137 | sqrt(as.numeric(attr(linHyp , "vcov")))) 138 | 139 | }) 140 | 141 | # lh test 142 | test_that("returns error when no linear hypothesis is specified", { 143 | expect_error(lh_robust(Y ~ Z * X, data = dat)) 144 | }) 145 | -------------------------------------------------------------------------------- /tests/testthat/test-lm-robust-helper.R: -------------------------------------------------------------------------------- 1 | # N <- 10000 2 | # dat <- data.frame(y = rnorm(N), x1 = rnorm(N), x2 = rnorm(N)) 3 | # X <- model.matrix.default(~ x1 + x2, data = dat) 4 | # y <- dat$y 5 | # fit <- lm(y ~ x1 + x2, data = dat) 6 | # 7 | # # Speed Tests ------------------------------------------------------------- 8 | # 9 | # system.time(replicate(500, lm_robust_helper(X = X, y = dat$y, type = "classical"))) 10 | # 11 | # system.time(replicate(500, lm_robust_helper(X = X, y = dat$y, type = "HC0"))) 12 | # system.time(replicate(500, lm_robust_helper(X = X, y = dat$y, type = "HC1"))) 13 | # system.time(replicate(500, lm_robust_helper(X = X, y = dat$y, type = "HC2"))) 14 | # system.time(replicate(500, lm_robust_helper(X = X, y = dat$y, type = "HC3"))) 15 | # 16 | # 17 | -------------------------------------------------------------------------------- /tests/testthat/test-lm-robust_emmeans.R: -------------------------------------------------------------------------------- 1 | context("S3 - emmeans") 2 | 3 | 4 | 5 | test_that("emmeans can work with lm_robust objects", { 6 | skip_if_not_installed("emmeans") 7 | library(emmeans) 8 | lmr <- lm_robust(mpg ~ factor(cyl) * hp + wt, data = mtcars) 9 | 10 | rg <- emmeans::ref_grid(lmr) 11 | expect_equal(class(rg)[1], "emmGrid") 12 | 13 | grid <- rg@grid 14 | expect_equal(nrow(grid), 3) 15 | expect_equal(sum(grid$.wgt.), 32) 16 | expect_equal(predict(rg)[1], 17.424, tolerance = .01) 17 | }) 18 | 19 | test_that("lm_robust multivariate model works with emmeans", { 20 | skip_if_not_installed("emmeans") 21 | library(emmeans) 22 | lmr <- lm_robust(yield ~ Block + Variety, data = emmeans::MOats) 23 | emm <- emmeans(lmr, "rep.meas") 24 | expect_equal(summary(emm)$emmean[4], 123.4, tolerance = 0.1) 25 | }) 26 | 27 | test_that("lm_robust model with rank deficiency works with emmeans", { 28 | skip_if_not_installed("emmeans") 29 | library(emmeans) 30 | lmr <- lm_robust(log(breaks) ~ wool * tension, data = warpbreaks, subset = -(19:30)) 31 | pred <- predict(ref_grid(lmr)) 32 | expect_true(is.na(pred[5])) 33 | expect_equal(length(pred), 6) 34 | expect_equal(sum(is.na(pred)), 1) 35 | }) 36 | 37 | # Not testing emmeans package capabilities themselves. If we can construct the 38 | # reference grid correctly, we are basically OK. 39 | # Pretty much anything else that could fail would happen in the emmeans package, 40 | # not in the support methods in this package. 41 | 42 | -------------------------------------------------------------------------------- /tests/testthat/test-lm-robust_margins.R: -------------------------------------------------------------------------------- 1 | context("Helper - lm_robust margins") 2 | 3 | 4 | 5 | mv <- c("AME", "SE", "z", "p") 6 | 7 | test_that("lm robust can work with margins", { 8 | skip_if_not_installed("margins") 9 | x <- lm(mpg ~ cyl * hp + wt, data = mtcars) 10 | lmr <- lm_robust(mpg ~ cyl * hp + wt, data = mtcars) 11 | 12 | # Note old package vce defaults to delta 13 | # new margins on github defaults to none with our obj 14 | lm_sum_marg <- summary( 15 | margins::margins( 16 | x, 17 | vcov = sandwich::vcovHC(x, type = "HC2"), 18 | vce = "delta" 19 | ) 20 | ) 21 | 22 | lmr_sum_marg <- margins:::summary.margins(margins::margins(lmr, vce = "delta")) 23 | 24 | # Close enough with HC2? 25 | expect_equal( 26 | lm_sum_marg[, mv], 27 | lmr_sum_marg[, mv], 28 | tolerance = 0.01 29 | ) 30 | 31 | # Close with classical 32 | lmr_class <- lm_robust(mpg ~ cyl * hp + wt, data = mtcars, se_type = "classical") 33 | lmrc <- margins:::summary.margins(margins::margins(lmr_class, vce = "delta")) 34 | lmc <- margins:::summary.margins(margins::margins(x, vce = "delta")) 35 | expect_equal( 36 | lmc[, mv], 37 | lmrc[, mv], 38 | tolerance = 0.01 39 | ) 40 | 41 | # Works with other vce 42 | set.seed(42) 43 | lmrc <- margins:::summary.margins(margins::margins(lmr_class, vce = "bootstrap", iterations = 10L)) 44 | expect_true(!any(is.na(lmrc))) 45 | lmrc <- margins:::summary.margins(margins::margins(lmr_class, vce = "simulation", iterations = 10L)) 46 | expect_true(!any(is.na(lmrc))) 47 | lmrc <- margins:::summary.margins(margins::margins(lmr_class, vce = "simulation", iterations = 10L)) 48 | expect_true(!any(is.na(lmrc))) 49 | }) 50 | 51 | test_that("lm robust + weights can work with margins", { 52 | skip_if_not_installed("margins") 53 | x <- lm(mpg ~ cyl * hp, data = mtcars, weights = wt) 54 | x2 <- lm_robust(mpg ~ cyl * hp, data = mtcars, weights = wt, se_type = "classical") 55 | expect_equal(margins::marginal_effects(x), margins::marginal_effects(x2)) 56 | 57 | 58 | suppressWarnings( 59 | {lmc <- round(margins:::summary.margins(margins::margins(x, vce = "delta"))[, mv], 3)} 60 | ) 61 | 62 | suppressWarnings( 63 | {lmr <- round(margins:::summary.margins(margins::margins(x2, vce = "delta"))[, mv], 3)} 64 | ) 65 | 66 | expect_equal(lmc, lmr) 67 | }) 68 | 69 | test_that("lm robust + cluster can work with margins", { 70 | skip_if_not_installed("margins") 71 | # works but throws a lot of warnings 72 | x <- lm(mpg ~ cyl * hp + wt, data = mtcars) 73 | x2 <- lm_robust(mpg ~ cyl * hp + wt, data = mtcars, clusters = am) 74 | 75 | lmc <- round(margins:::summary.margins(margins::margins(x, vce = "delta"))[, mv], 8) 76 | 77 | expect_warning( 78 | lmr <- round(margins:::summary.margins(margins::margins(x2, vce = "delta"))[, mv], 8), 79 | NA 80 | ) 81 | 82 | # With rounding 83 | expect_equal(lmc[, 1], lmr[, 1]) 84 | expect_true( 85 | !any(lmc[, 2] == lmr[, 2]) 86 | ) 87 | 88 | # Works with character cluster (avoided terms(mod) "dataClasses" problem) 89 | mtcars$testc <- letters[1:4] 90 | expect_error( 91 | margins::margins(lm_robust(mpg ~ cyl * hp + wt, data = mtcars, clusters = testc)), 92 | NA 93 | ) 94 | }) 95 | 96 | 97 | test_that("lm lin can work with margins", { 98 | skip_if_not_installed("margins") 99 | data("alo_star_men") 100 | lml <- lm_lin(GPA_year1 ~ ssp, ~ gpa0, data = alo_star_men, se_type = "classical") 101 | 102 | alo_star_men$gpa0_tilde <- alo_star_men$gpa0 - mean(alo_star_men$gpa0) 103 | 104 | lmo <- lm(GPA_year1 ~ ssp * gpa0_tilde, data = alo_star_men) 105 | 106 | lml_sum <- margins:::summary.margins(margins::margins(lml, vce = "delta")) 107 | lmo_sum <- margins:::summary.margins(margins::margins(lmo, vce = "delta")) 108 | 109 | expect_equal( 110 | round(lml_sum[, 4], 5), 111 | round(lmo_sum[, 4], 5) 112 | ) 113 | }) 114 | -------------------------------------------------------------------------------- /tests/testthat/test-modelsummary.R: -------------------------------------------------------------------------------- 1 | # This file is ignored by .Rbuildignore to keep from suggesting gt and modelsummary 2 | 3 | context("S3 - modelsummary works") 4 | 5 | test_that("modelsummary works with glance", { 6 | skip_if_not_installed("modelsummary") 7 | 8 | library(modelsummary) 9 | 10 | set.seed(5) 11 | 12 | model1 <- lm_robust(mpg ~ am, mtcars) 13 | model2 <- lm_robust(mpg ~ am, mtcars, clusters = cyl) 14 | model3 <- lm_lin(mpg ~ am, ~ cyl, mtcars) 15 | 16 | mso <- modelsummary(list(model1, model2, model3), output = "data.frame") 17 | 18 | expect_equal(colnames(mso), c("part", "term", "statistic", 19 | "(1)", "(2)", "(3)")) 20 | 21 | expect_equal(nrow(mso), 15L) 22 | 23 | expect_equal(ncol(mso), 6L) 24 | 25 | # iv_robust 26 | model1 <- iv_robust(mpg ~ am | gear, mtcars) 27 | model2 <- iv_robust(mpg ~ am | gear, mtcars, clusters = cyl, diagnostics = TRUE) 28 | 29 | mso <- modelsummary(list(model1, model2), 30 | gof_omit = c("N|[sS]tatistic|p.value|p{1}"), 31 | output = "data.frame") 32 | 33 | expect_equal(nrow(mso), 10) 34 | 35 | expect_equal(ncol(mso), 5) 36 | 37 | # difference_in_means 38 | model1 <- difference_in_means(mpg ~ am, mtcars) 39 | model2 <- difference_in_means(mpg ~ am, mtcars, blocks = vs) 40 | mso <- modelsummary(list(model1, model2), output = "data.frame") 41 | 42 | 43 | # horvitz_thompson 44 | model1 <- horvitz_thompson(mpg ~ am, mtcars) 45 | model2 <- horvitz_thompson(mpg ~ am, mtcars, blocks = vs) 46 | 47 | mso <-modelsummary(list(model1, model2), output = "data.frame") 48 | 49 | expect_equal(nrow(mso), 6) 50 | 51 | expect_equal(ncol(mso), 5) 52 | 53 | }) 54 | 55 | -------------------------------------------------------------------------------- /tests/testthat/test-na-omit-details.R: -------------------------------------------------------------------------------- 1 | 2 | context("Helper - na.omit_detailed") 3 | 4 | df <- expand.grid(Y = c(1:5, NA), Z = c(LETTERS, NA)) 5 | 6 | stock <- na.omit(df) 7 | detailed <- na.omit_detailed.data.frame(df) 8 | 9 | stock_action <- attr(stock, "na.action") 10 | detailed_action <- attr(detailed, "na.action") 11 | 12 | test_that("Omits are the same", { 13 | expect_equal( 14 | as.vector(stock_action), 15 | as.vector(detailed_action) 16 | ) 17 | }) 18 | 19 | test_that("Row names are set correctly", { 20 | expect_identical( 21 | names(stock_action), 22 | names(detailed_action) 23 | ) 24 | }) 25 | 26 | test_that("Logic for nested dfs and lists holds", { 27 | df$X <- list(x = c(NA, 2:nrow(df))) 28 | df$Xmat <- matrix(rep(c(1, NA, 3:nrow(df)), 2), nrow(df)) 29 | 30 | stock <- na.omit(df) 31 | detailed <- na.omit_detailed.data.frame(df) 32 | 33 | stock_action <- attr(stock, "na.action") 34 | detailed_action <- attr(detailed, "na.action") 35 | 36 | expect_identical( 37 | names(stock_action), 38 | names(detailed_action) 39 | ) 40 | }) 41 | -------------------------------------------------------------------------------- /tests/testthat/test-replicate-HT-middleton.R: -------------------------------------------------------------------------------- 1 | context("Verification - HT matches Joel Middleton code") 2 | 3 | test_that("We match Joel's estimator", { 4 | 5 | # Code from Joel Middleton 6 | n <- 400 7 | 8 | # simple random assignment 9 | d.00 <- diag(rep(1, n)) 10 | dmat <- rbind( 11 | cbind(d.00, -d.00) 12 | , cbind(-d.00, d.00) 13 | ) 14 | d.tilde <- diag(rep(2, 2 * n)) 15 | d.00 <- matrix(rep(-0.001251564, n ^ 2), ncol = n) + diag(rep(1 + 0.001251564, n)) 16 | pmat <- diag(rep(.5, 2 * n)) %*% (dmat + 1) %*% diag(rep(.5, 2 * n)) 17 | pmat.true <- pmat 18 | pmat[pmat == 0] <- 1 19 | d.tilde.wt <- d.tilde / pmat 20 | 21 | # complete random assignment 22 | dmat.CR <- round(rbind( 23 | cbind(d.00, -d.00) 24 | , cbind(-d.00, d.00) 25 | ), 10) 26 | pmat.CR <- diag(rep(.5, 2 * n)) %*% (dmat.CR + 1) %*% diag(rep(.5, 2 * n)) 27 | pmat.CR.true <- pmat.CR 28 | pmat.CR[pmat.CR == 0] <- 1 29 | d.tilde.CR <- dmat.CR + (dmat.CR == -1) + diag(rep(1, 2 * n)) 30 | d.tilde.wt.CR <- d.tilde.CR / pmat.CR 31 | 32 | # ourpmat <- declaration_to_condition_pr_mat(randomizr::declare_ra(N = 400, prob = 0.5, simple = F)) 33 | # pmat.CR.true[1:5, 1:5] 34 | # ourpmat[1:5, 1:5] 35 | # pmat.CR.true[401:410, 401:410] 36 | # ourpmat[401:410, 401:410] 37 | # pmat.CR.true[401:410, 1:10] 38 | # ourpmat[401:410,1:10] 39 | 40 | ## DGP with truly random 0.5 chance for each unit being treated 41 | dat <- 42 | data.frame( 43 | p = 0.5, 44 | z = rbinom(n, 1, 0.5), 45 | y0 = rnorm(n, sd = 3) 46 | ) 47 | 48 | # Constant treatment effects, SRS 49 | dat$y1 <- dat$y0 + 3 50 | Y <- c(-dat$y0, dat$y1) 51 | R <- c(1 - dat$z, dat$z) 52 | pi.inv <- (c(rep(1 / (1 - dat$p)), rep(1 / dat$p))) 53 | 54 | ht_est <- sum(R * pi.inv * Y) / n 55 | y1.hat <- dat$y0 + ht_est 56 | y0.hat <- dat$y1 - ht_est 57 | # true_ses_ht <- sqrt(t(Y)%*%dmat%*%Y)/n 58 | Y.hat <- R * Y + (1 - R) * c(-y0.hat, y1.hat) 59 | se_ht <- sqrt(t(Y * R) %*% d.tilde.wt %*% (Y * R)) / n 60 | se_constant_ht <- sqrt(t(Y.hat) %*% dmat %*% Y.hat) / n 61 | 62 | dat$y <- ifelse(dat$z == 1, dat$y1, dat$y0) 63 | 64 | # Simple random assignment 65 | ht_decl_o <- horvitz_thompson( 66 | y ~ z, 67 | data = dat, 68 | ra_declaration = randomizr::declare_ra( 69 | N = nrow(dat), 70 | prob = dat$p[1], 71 | simple = TRUE 72 | ) 73 | ) 74 | 75 | # Second way to do same estimator, since it's SRS 76 | ht_prob_o <- horvitz_thompson( 77 | y ~ z, 78 | data = dat, 79 | condition_prs = p 80 | ) 81 | 82 | expect_equal( 83 | tidy(ht_decl_o)[, c("estimate", "std.error")], 84 | tidy(ht_prob_o)[, c("estimate", "std.error")] 85 | ) 86 | expect_equivalent( 87 | as.numeric(tidy(ht_decl_o)[, c("estimate", "std.error")]), 88 | c(ht_est, se_ht) 89 | ) 90 | 91 | # Now with constant effects assumption 92 | ht_const_o <- horvitz_thompson( 93 | y ~ z, 94 | data = dat, 95 | ra_declaration = randomizr::declare_ra( 96 | N = nrow(dat), 97 | prob = dat$p[1], 98 | simple = TRUE 99 | ), 100 | se_type = "constant" 101 | ) 102 | 103 | expect_equivalent( 104 | as.numeric(tidy(ht_const_o)[, c("estimate", "std.error")]), 105 | c(ht_est, se_constant_ht) 106 | ) 107 | 108 | ## Constant treatment effects, CRS 109 | dat$z <- sample(rep(0:1, each = n / 2)) 110 | dat$y <- ifelse(dat$z == 1, dat$y1, dat$y0) 111 | 112 | R <- c(1 - dat$z, dat$z) 113 | pi.inv <- (c(rep(1 / (1 - dat$p)), rep(1 / dat$p))) 114 | 115 | ht_comp_est <- sum(R * pi.inv * Y) / n 116 | Y.hat <- R * Y + (1 - R) * c(-y0.hat, y1.hat) 117 | se_comp_ht <- sqrt(t(Y * R) %*% d.tilde.wt.CR %*% (Y * R)) / n 118 | se_comp_constant_ht <- sqrt(t(Y.hat) %*% dmat.CR %*% Y.hat) / n 119 | 120 | 121 | # complete random assignment 122 | ht_comp_decl_o <- horvitz_thompson( 123 | y ~ z, 124 | data = dat, 125 | ra_declaration = randomizr::declare_ra( 126 | N = nrow(dat), 127 | prob = dat$p[1], 128 | simple = FALSE 129 | ), 130 | return_condition_pr_mat = T 131 | ) 132 | # ht_comp_decl_o$condition_pr_mat[1:5, 1:5] 133 | # pmat.CR.true[1:5, 1:5] 134 | 135 | # Don't match right now because pmats are diff 136 | # expect_equal( 137 | # tidy(ht_comp_decl_o)[, c("estimate", "std.error")], 138 | # c(ht_comp_est, se_comp_ht) 139 | # ) 140 | 141 | # Does match if I use JM's pmat 142 | ht_comp_decl_o <- horvitz_thompson( 143 | y ~ z, 144 | data = dat, 145 | condition_pr_mat = pmat.CR.true 146 | ) 147 | expect_equivalent( 148 | as.numeric(tidy(ht_comp_decl_o)[, c("estimate", "std.error")]), 149 | c(ht_comp_est, se_comp_ht) 150 | ) 151 | 152 | # Now with constant effects assumption 153 | # ht_comp_const_o <- horvitz_thompson( 154 | # y ~ z, 155 | # data = dat, 156 | # ra_declaration = randomizr::declare_ra( 157 | # N = nrow(dat), 158 | # prob = dat$p[1], 159 | # simple = FALSE 160 | # ), 161 | # se_type = "constant" 162 | # ) 163 | # 164 | # expect_equivalent( 165 | # tidy(ht_comp_const_o)[, c("estimate", "std.error")], 166 | # c(ht_comp_est, se_comp_constant_ht) 167 | # ) 168 | 169 | 170 | # ht_comp_const_o <- horvitz_thompson( 171 | # y ~ z, 172 | # data = dat, 173 | # condition_pr_mat = pmat.CR.true, 174 | # se_type = "constant" 175 | # ) 176 | # expect_equivalent( 177 | # tidy(ht_comp_const_o)[, c("estimate", "std.error")], 178 | # c(ht_comp_est, se_comp_constant_ht) 179 | # ) 180 | 181 | # Not matching so we error 182 | expect_error( 183 | horvitz_thompson( 184 | y ~ z, 185 | data = dat, 186 | condition_pr_mat = pmat.CR.true, 187 | se_type = "constant" 188 | ), 189 | "`se_type` = 'constant' only supported for simple" 190 | ) 191 | }) 192 | -------------------------------------------------------------------------------- /tests/testthat/test-replicate-lin2013.R: -------------------------------------------------------------------------------- 1 | context("Verification - lm_lin replicates Lin 2013") 2 | # Lin paper available here: www.stat.berkeley.edu/~winston/agnostic.pdf 3 | # Citation: 4 | # Lin, Winston. 2013. "Agnostic notes on regression adjustments to experimental 5 | # data: Reexamining Freedman’s critique." The Annals of Applied Statistics. 6 | # Stat. 7(1): 295-318. doi:10.1214/12-AOAS583. 7 | # https://projecteuclid.org/euclid.aoas/1365527200. 8 | 9 | test_that("lm_lin recreates Lin 2013 Table 2", { 10 | data("alo_star_men") 11 | 12 | ## Table 2 13 | # Lin uses "classic sandwich," or in our package, HC0 14 | 15 | # unadjusted, Lin est = -0.036, se = 0.158 16 | expect_equivalent( 17 | round( 18 | tidy( 19 | lm_robust( 20 | GPA_year1 ~ sfsp, 21 | data = alo_star_men, 22 | se_type = "HC0" 23 | ) 24 | )[2, c("estimate", "std.error")], 25 | 3 26 | ), 27 | c(-0.036, 0.158) 28 | ) 29 | 30 | 31 | # usual adjusted for HS gpa, Lin est = -0.083, se = 0.146 32 | expect_equivalent( 33 | unlist(round( 34 | tidy( 35 | lm_robust( 36 | GPA_year1 ~ sfsp + gpa0, 37 | data = alo_star_men, 38 | se_type = "HC0" 39 | ) 40 | )[2, c("estimate", "std.error")], 41 | 3 42 | )), 43 | c(-0.083, 0.146) 44 | ) 45 | 46 | # interaction adjusted, Lin est = -0.081, se = 0.146 47 | expect_equivalent( 48 | unlist(round( 49 | tidy( 50 | lm_lin( 51 | GPA_year1 ~ sfsp, 52 | covariates = ~ gpa0, 53 | data = alo_star_men, 54 | se_type = "HC0" 55 | ) 56 | )[2, c("estimate", "std.error")], 57 | 3 58 | )), 59 | c(-0.081, 0.146) 60 | ) 61 | }) 62 | 63 | 64 | ## Table 3 too long to run 65 | rep_table_3 <- FALSE 66 | 67 | if (rep_table_3) { 68 | data("alo_star_men") 69 | 70 | ## Table 3 71 | samp_dat <- alo_star_men 72 | its <- 250000 73 | set.seed(161235) 74 | check_cover <- function(obj, point = 0) { 75 | return(obj$conf.low[2] < point & obj$conf.high[2] > point) 76 | } 77 | ci_dist <- function(obj) { 78 | return(obj$conf.high[2] - obj$conf.low[2]) 79 | } 80 | ci_custom <- function(obj) { 81 | return(list( 82 | conf.high = coef(obj)[2] + obj$std.error[2] * 1.96, 83 | conf.low = coef(obj)[2] - obj$std.error[2] * 1.96 84 | )) 85 | } 86 | 87 | ses <- c("HC0", "HC1", "HC2", "HC3") 88 | 89 | ests <- matrix( 90 | NA, 91 | nrow = its, 92 | ncol = 3 93 | ) 94 | sd_mats <- cover_mats <- width_mats <- 95 | array( 96 | NA, 97 | dim = c(its, length(ses), 3) 98 | ) 99 | for (i in 1:its) { 100 | samp_dat$sfsp <- sample(samp_dat$sfsp) 101 | sd_mat <- cover_mat <- width_mat <- 102 | matrix( 103 | NA, 104 | nrow = length(ses), 105 | ncol = 3 106 | ) 107 | for (j in seq_along(ses)) { 108 | unadj <- lm_robust( 109 | GPA_year1 ~ sfsp, 110 | data = samp_dat, 111 | se_type = ses[j] 112 | ) 113 | tradadj <- lm_robust( 114 | GPA_year1 ~ sfsp + gpa0, 115 | data = samp_dat, 116 | se_type = ses[j] 117 | ) 118 | intadj <- lm_lin( 119 | GPA_year1 ~ sfsp, 120 | covariates = ~ gpa0, 121 | data = samp_dat, 122 | se_type = ses[j] 123 | ) 124 | 125 | sd_mat[j, ] <- c(unadj$std.error[2], tradadj$std.error[2], intadj$std.error[2]) 126 | cover_mat[j, ] <- c( 127 | check_cover(ci_custom(unadj)), 128 | check_cover(ci_custom(tradadj)), 129 | check_cover(ci_custom(intadj)) 130 | ) 131 | width_mat[j, ] <- c( 132 | ci_dist(ci_custom(unadj)), 133 | ci_dist(ci_custom(tradadj)), 134 | ci_dist(ci_custom(intadj)) 135 | ) 136 | } 137 | 138 | ests[i, ] <- c(coef(unadj)[2], coef(tradadj)[2], coef(intadj)[2]) 139 | sd_mats[i, , ] <- sd_mat 140 | cover_mats[i, , ] <- cover_mat 141 | width_mats[i, , ] <- width_mat 142 | 143 | 144 | if (i %% 1000 == 0) print(i) 145 | } 146 | 147 | 148 | # Panel A 149 | colMeans(ests) 150 | # Panel B 151 | apply(sd_mats, c(2, 3), mean) - apply(ests, 2, sd) 152 | # Panel C 153 | apply(sd_mats, c(2, 3), sd) 154 | # Panel D, not replicated because he uses normal dist. while we use t dist, all slightly larger 155 | apply(cover_mats, c(2, 3), mean) 156 | # Panel E, not replicated because he uses normal dist. while we use t dist, all slightly larger 157 | apply(width_mats, c(2, 3), mean) 158 | } 159 | -------------------------------------------------------------------------------- /tests/testthat/test-return.R: -------------------------------------------------------------------------------- 1 | context("Output - test similiarity across estimators") 2 | 3 | test_that("Structure of output is the same", { 4 | n <- 40 5 | dat <- data.frame( 6 | y = rnorm(n), 7 | z = c(0, 0, rep(0:1, times = 9)), 8 | x = rnorm(n), 9 | bl = rep(1:4, each = 10), 10 | cl = rep(1:20, each = 2) 11 | ) 12 | 13 | # Should be in all estimator returns 14 | in_return <- 15 | c( 16 | "coefficients", 17 | "std.error", 18 | "df", 19 | "p.value", 20 | "conf.low", 21 | "conf.high", 22 | "outcome", 23 | "alpha", 24 | "nobs" 25 | ) 26 | 27 | lmr_o <- lm_robust(y ~ z, data = dat) 28 | lmr_cl_o <- lm_robust(y ~ z, data = dat, clusters = cl) 29 | lml_o <- lm_lin(y ~ z, ~ x, data = dat) 30 | lml_cl_o <- lm_lin(y ~ z, ~ x, data = dat) 31 | # Major branching for diff estimators is for blocks 32 | ht_o <- horvitz_thompson(y ~ z, data = dat) 33 | ht_bl_o <- horvitz_thompson(y ~ z, blocks = bl, data = dat) 34 | dim_o <- difference_in_means(y ~ z, data = dat) 35 | dim_bl_o <- difference_in_means(y ~ z, blocks = bl, data = dat) 36 | 37 | expect_true(all(in_return %in% names(lmr_o))) 38 | expect_true(all(in_return %in% names(lmr_cl_o))) 39 | expect_true(all(in_return %in% names(lml_o))) 40 | expect_true(all(in_return %in% names(lml_cl_o))) 41 | expect_true(all(in_return %in% names(dim_o))) 42 | expect_true(all(in_return %in% names(dim_bl_o))) 43 | expect_true(all(in_return %in% names(ht_o))) 44 | expect_true(all(in_return %in% names(ht_bl_o))) 45 | 46 | expect_equal( 47 | colnames(tidy(lmr_o)), 48 | colnames(tidy(lmr_cl_o)), 49 | colnames(tidy(lml_o)), 50 | colnames(tidy(lml_cl_o)), 51 | colnames(tidy(ht_o)), 52 | colnames(tidy(ht_bl_o)), 53 | colnames(tidy(dim_o)), 54 | colnames(tidy(dim_bl_o)) 55 | ) 56 | 57 | expect_equal( 58 | difference_in_means(y ~ z - 1, data = dat)$term, 59 | "z" 60 | ) 61 | }) 62 | 63 | # test_that("Warns properly if df is negative or 0", { 64 | # dat = data.frame(y = 1, z = 1, p = .5) 65 | # I can't come up with a meaningful test for this now 66 | # This should never happen I don't think 67 | # expect_warning( 68 | # (y ~ z, data = dat, condition_prs = p), 69 | # "Estimated negative or zero degrees of freedom" 70 | # ) 71 | # }) 72 | -------------------------------------------------------------------------------- /tests/testthat/test-sig-testing.R: -------------------------------------------------------------------------------- 1 | context("Helper - significance testing") 2 | 3 | test_that("Errors properly", { 4 | dat <- data.frame( 5 | mps = rep(1:4, each = 2), 6 | y = rnorm(8), 7 | z = c(0, 1) 8 | ) 9 | 10 | expect_warning( 11 | lm_lin(y ~ z, ~ factor(mps), data = dat), 12 | "Some degrees of freedom have been estimated as negative or zero" 13 | ) 14 | 15 | expect_error( 16 | lm_robust(y ~ z, data = dat, alpha = 10), 17 | "`alpha` must be numeric between 0 and 1" 18 | ) 19 | }) 20 | -------------------------------------------------------------------------------- /tests/testthat/test-stata-output.R: -------------------------------------------------------------------------------- 1 | context("Verification - lm and iv match Stata") 2 | 3 | test_that("lm_robust matches stata", { 4 | 5 | # write.csv(mtcars, 6 | # file = 'tests/testthat/mtcars.csv', 7 | # row.names = F) 8 | 9 | stata_ests <- read.table( 10 | "stata-ests.txt", 11 | col.names = c("model", "se1", "se2", "df", "fstat"), 12 | stringsAsFactors = FALSE 13 | ) 14 | 15 | mtcars$w <- mtcars$drat / 5 16 | 17 | estimatr_mat <- matrix(NA, 10, 4) 18 | 19 | lm_c <- lm_robust(mpg ~ hp, data = mtcars, se_type = "classical") 20 | estimatr_mat[1, ] <- c(lm_c$std.error ^ 2, lm_c$df[2], lm_c$fstatistic[1]) 21 | lm_hc1 <- lm_robust(mpg ~ hp, data = mtcars, se_type = "HC1") 22 | estimatr_mat[2, ] <- c(lm_hc1$std.error ^ 2, lm_hc1$df[2], lm_hc1$fstatistic[1]) 23 | lm_hc2 <- lm_robust(mpg ~ hp, data = mtcars, se_type = "HC2") 24 | estimatr_mat[3, ] <- c(lm_hc2$std.error ^ 2, lm_hc2$df[2], lm_hc2$fstatistic[1]) 25 | lm_hc3 <- lm_robust(mpg ~ hp, data = mtcars, se_type = "HC3") 26 | estimatr_mat[4, ] <- c(lm_hc3$std.error ^ 2, lm_hc3$df[2], lm_hc3$fstatistic[1]) 27 | 28 | lm_stata <- lm_robust(mpg ~ hp, clusters = cyl, data = mtcars, se_type = "stata") 29 | estimatr_mat[5, ] <- c(lm_stata$std.error ^ 2, lm_stata$df[2], lm_stata$fstatistic[1]) 30 | 31 | lm_c_w <- lm_robust(mpg ~ hp, data = mtcars, weights = w, se_type = "classical") 32 | estimatr_mat[6, ] <- c(lm_c_w$std.error ^ 2, lm_c_w$df[2], lm_c_w$fstatistic[1]) 33 | lm_hc1_w <- lm_robust(mpg ~ hp, data = mtcars, weights = w, se_type = "HC1") 34 | estimatr_mat[7, ] <- c(lm_hc1_w$std.error ^ 2, lm_hc1_w$df[2], lm_hc1_w$fstatistic[1]) 35 | lm_hc2_w <- lm_robust(mpg ~ hp, data = mtcars, weights = w, se_type = "HC2") 36 | estimatr_mat[8, ] <- c(lm_hc2_w$std.error ^ 2, lm_hc2_w$df[2], lm_hc2_w$fstatistic[1]) 37 | lm_hc3_w <- lm_robust(mpg ~ hp, data = mtcars, weights = w, se_type = "HC3") 38 | estimatr_mat[9, ] <- c(lm_hc3_w$std.error ^ 2, lm_hc3_w$df[2], lm_hc3_w$fstatistic[1]) 39 | lm_stata_w <- lm_robust(mpg ~ hp, clusters = cyl, weights = w, data = mtcars, se_type = "stata") 40 | estimatr_mat[10, ] <- c(lm_stata_w$std.error ^ 2, lm_stata_w$df[2], lm_stata_w$fstatistic[1]) 41 | 42 | # All look numerically identical except for HC2 and HC3 with weights which 43 | # have non-negligible difference. This is due to differences in how the hat 44 | # matrix is built that are still unresolved 45 | 46 | # Therefore rows 8 and 9 will have larger differences 47 | expect_true( 48 | max(abs(estimatr_mat[c(1:7, 10), 1:4] - apply(stata_ests[c(1:7, 10), c(3, 2, 4, 5)], 2, as.numeric))) < 1e-5 49 | ) 50 | 51 | }) 52 | 53 | 54 | 55 | test_that("iv_robust matches stata", { 56 | 57 | skip_if_not_installed("AER") 58 | 59 | # write.csv(mtcars, 60 | # file = 'tests/testthat/mtcars.csv', 61 | # row.names = F) 62 | 63 | stata_ests <- read.table( 64 | "stata-iv-ests.txt", 65 | col.names = c("model", "v1", "v2", "v3", "fstat", "r2", "r2_a", "rmse"), 66 | stringsAsFactors = FALSE 67 | ) 68 | 69 | mtcars$w <- mtcars$drat / 5 70 | 71 | estimatr_mat <- matrix(NA, 6, 7) 72 | iv_c <- iv_robust(mpg ~ hp + am | wt + gear, data = mtcars, se_type = "classical") 73 | estimatr_mat[1, ] <- c(iv_c$std.error ^ 2, iv_c$fstatistic[1], iv_c$r.squared, iv_c$adj.r.squared, sqrt(iv_c$res_var)) 74 | iv_hc1 <- iv_robust(mpg ~ hp + am | wt + gear, data = mtcars, se_type = "HC1") 75 | estimatr_mat[2, ] <- c(iv_hc1$std.error ^ 2, iv_hc1$fstatistic[1], iv_hc1$r.squared, iv_hc1$adj.r.squared, sqrt(iv_hc1$res_var)) 76 | iv_stata <- iv_robust(mpg ~ hp + am | wt + gear, clusters = cyl, data = mtcars, se_type = "stata") 77 | estimatr_mat[3, ] <- c(iv_stata$std.error ^ 2, iv_stata$fstatistic[1], iv_stata$r.squared, iv_stata$adj.r.squared, sqrt(iv_stata$res_var)) 78 | 79 | iv_c_w <- iv_robust(mpg ~ hp + am | wt + gear, data = mtcars, weights = w, se_type = "classical") 80 | estimatr_mat[4, ] <- c(iv_c_w$std.error ^ 2, iv_c_w$fstatistic[1], iv_c_w$r.squared, iv_c_w$adj.r.squared, sqrt(iv_c_w$res_var)) 81 | iv_hc1_w <- iv_robust(mpg ~ hp + am | wt + gear, data = mtcars, weights = w, se_type = "HC1") 82 | estimatr_mat[5, ] <- c(iv_hc1_w$std.error ^ 2, iv_hc1_w$fstatistic[1], iv_hc1_w$r.squared, iv_hc1_w$adj.r.squared, sqrt(iv_hc1_w$res_var)) 83 | iv_stata_w <- iv_robust(mpg ~ hp + am | wt + gear, clusters = cyl, weights = w, data = mtcars, se_type = "stata") 84 | estimatr_mat[6, ] <- c(iv_stata_w$std.error ^ 2, iv_stata_w$fstatistic[1], iv_stata_w$r.squared, iv_stata_w$adj.r.squared, sqrt(iv_stata_w$res_var)) 85 | 86 | expect_true( 87 | max(abs(estimatr_mat[, 1] - as.numeric(stata_ests[, 4]))) < 2e-05 88 | ) 89 | 90 | expect_true( 91 | max(abs(estimatr_mat[, 4] - as.numeric(stata_ests[, 5]))) < 3e-05 92 | ) 93 | 94 | # Note, RMSE is different for stata with weights than ivreg or iv_robust 95 | expect_true( 96 | max(abs(estimatr_mat[, 5:6] - stata_ests[, 6:7])) < 4e-08 97 | ) 98 | 99 | ivrego_w <- AER::ivreg(mpg ~ hp + am | wt + gear, data = mtcars, weights = w) 100 | 101 | expect_equal( 102 | ivrego_w$sigma, 103 | sqrt(iv_c_w$res_var) 104 | ) 105 | expect_equal( 106 | ivrego_w$sigma, 107 | sqrt(iv_hc1_w$res_var) 108 | ) 109 | expect_equal( 110 | ivrego_w$sigma, 111 | sqrt(iv_stata_w$res_var) 112 | ) 113 | 114 | }) 115 | -------------------------------------------------------------------------------- /tests/testthat/test-texreg.R: -------------------------------------------------------------------------------- 1 | # This file is ignored by .Rbuildignore to keep from suggesting texreg 2 | 3 | context("S3 - texreg builds") 4 | 5 | test_that("texreg extension works", { 6 | 7 | model2 <- lm_robust(extra~group, sleep, clusters = ID) 8 | 9 | capture.output(treg <- extract(model2, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE)) 10 | expect_is( 11 | treg, 12 | "texreg" 13 | ) 14 | 15 | # Defaults to having CIs 16 | expect_true(grepl("-0.53;", texreg::texreg(model2))) 17 | 18 | # Remove to get SEs 19 | expect_true(grepl("\\(0.57\\)", texreg::texreg(model2, include.ci = FALSE))) 20 | 21 | # Has Nclust 22 | expect_true(grepl("N Clusters.*10", texreg::texreg(model2))) 23 | 24 | }) 25 | -------------------------------------------------------------------------------- /tests/testthat/test-zzz.R: -------------------------------------------------------------------------------- 1 | context("zzz.R - .onLoad") 2 | 3 | test_that("onLoad makes generics if texreg is present", { 4 | 5 | e <- environment(.onLoad) 6 | 7 | environment(.onLoad) <- new.env(parent = parent.env(e)) 8 | environment(.onLoad)$extract.lm_robust <- e$extract.lm_robust 9 | 10 | expect_null(.onLoad("estimatr", "estimatr")) 11 | environment(.onLoad) <- e 12 | }) 13 | -------------------------------------------------------------------------------- /tests/testthat/test-zzzbroom.R: -------------------------------------------------------------------------------- 1 | # added to .Rbuildignore to keep CRAN from complaining that broom 2 | # isn't a SUGGESTed package 3 | context("zzzbroom.R - .onAttach") 4 | 5 | test_that(".onLoad message if old version of 'broom' is installed", { 6 | skip_if_not_installed("broom") 7 | skip_if(packageVersion("broom") > "0.5.0") 8 | library(broom) 9 | expect_message( 10 | .onLoad("estimatr", "estimatr"), 11 | "the `broom` package version 0.5.0 or earlier is loaded" 12 | ) 13 | }) 14 | -------------------------------------------------------------------------------- /vignettes/absorbing-fixed-effects.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Absorbing Fixed Effects with estimatr" 3 | output: 4 | html_document: 5 | df_print: paged 6 | vignette: | 7 | %\VignetteIndexEntry{Absorbing Fixed Effects with estimatr} 8 | \usepackage[utf8]{inputenc} 9 | %\VignetteEngine{knitr::rmarkdown} 10 | --- 11 | 12 | ```{r setup, include=FALSE} 13 | knitr::opts_chunk$set(echo = TRUE) 14 | ``` 15 | 16 | Whether analyzing a block-randomized experiment or adding fixed effects for a panel model, absorbing group means can speed up estimation time. The `fixed_effects` argument in both `lm_robust` and `iv_robust` allows you to do just that, although the speed gains are greatest with "HC1" standard errors. Specifying fixed effects is really simple. 17 | 18 | ```{r} 19 | library(estimatr) 20 | lmr_out <- lm_robust(mpg ~ hp, data = mtcars, fixed_effects = ~ cyl) 21 | lmr_out 22 | lmr_out$fixed_effects 23 | ``` 24 | 25 | Before proceeding, three quick notes: 26 | 27 | * Most of the speed gains occur when estimating "HC1" robust standard errors, or "stata" standard errors when there is clustering. This is because most of the speed gains come from avoiding inverting a large matrix of group dummies, but this step is still necessary for "HC2", "HC3", and "CR2" standard errors. 28 | * While you can specify multiple sets of fixed effects, such as `fixed_effects = ~ year + country`, please ensure that your model is well-specified if you do so. If there are dependencies or overlapping groups across multiple sets of fixed effects, we cannot guarantee the correct degrees of freedom. 29 | * For now, weighted "CR2" estimation is not possible with fixed_effects. 30 | 31 | ## Speed gains 32 | 33 | In general, our speed gains will be greatest as the number of groups/fixed effects is large relative to the number of observations. Imagine we have 300 matched-pairs in an experiment. 34 | 35 | ```{r, message=FALSE} 36 | # Load packages for comparison 37 | library(microbenchmark) 38 | library(sandwich) 39 | library(lmtest) 40 | 41 | # Create matched-pairs dataset using fabricatr 42 | set.seed(40) 43 | library(fabricatr) 44 | dat <- fabricate( 45 | blocks = add_level(N = 300), 46 | indiv = add_level(N = 2, z = sample(0:1), y = rnorm(N) + z) 47 | ) 48 | head(dat) 49 | 50 | # With HC2 51 | microbenchmark( 52 | `base + sandwich` = { 53 | lo <- lm(y ~ z + factor(blocks), dat) 54 | coeftest(lo, vcov = vcovHC(lo, type = "HC2")) 55 | }, 56 | `lm_robust` = lm_robust(y ~ z + factor(blocks), dat), 57 | `lm_robust + fes` = lm_robust(y ~ z, data = dat, fixed_effects = ~ blocks), 58 | times = 50 59 | ) 60 | ``` 61 | 62 | Speed gains are *considerably* greater with HC1 standard errors. This is because we need to get the hat matrix for HC2, HC3, and CR2 standard errors, which requires inverting that large matrix of dummies we previously avoided doing. HC0, HC1, CR0, and CRstata standard errors do not require this inversion. 63 | 64 | ```{r} 65 | # With HC1 66 | microbenchmark( 67 | `base + sandwich` = { 68 | lo <- lm(y ~ z + factor(blocks), dat) 69 | coeftest(lo, vcov = vcovHC(lo, type = "HC1")) 70 | }, 71 | `lm_robust` = lm_robust( 72 | y ~ z + factor(blocks), 73 | dat, 74 | se_type = "HC1" 75 | ), 76 | `lm_robust + fes` = lm_robust( 77 | y ~ z, 78 | data = dat, 79 | fixed_effects = ~ blocks, 80 | se_type = "HC1" 81 | ), 82 | times = 50 83 | ) 84 | ``` 85 | -------------------------------------------------------------------------------- /vignettes/emmeans-examples.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Examples with emmeans" 3 | author: "Russ Lenth" 4 | date: "8/3/2019" 5 | output: html_document 6 | --- 7 | 8 | ```{r setup, include=FALSE} 9 | knitr::opts_chunk$set(echo = TRUE, eval = FALSE) 10 | ``` 11 | The **emmeans** package provides a variety of *post hoc* analyses such 12 | as obtaining estimated marginal means (EMMs) and comparisons thereof, 13 | displaying these results in a graph, and a number of related tasks. 14 | 15 | This vignette illustrates basic uses of **emmeans** with `lm_robust` 16 | objects. For more details, refer to the **emmeans** package itself 17 | and its vignettes. 18 | 19 | ### A factorial experiment 20 | The `warpbreaks` dataset provided in base R has the results of a two-factor 21 | experiment. We start by fitting a model 22 | ```{r} 23 | library(estimatr) 24 | 25 | warp.rlm <- lm_robust(log(breaks) ~ wool * tension, data = warpbreaks) 26 | ``` 27 | Typical use of `emmeans()` is to obtain predictions, or marginal means thereof, 28 | via a formula of the form `~ primary.variables | by.variables`: 29 | ```{r} 30 | library(emmeans) 31 | 32 | emm <- emmeans(warp.rlm, ~ tension | wool) 33 | class(emm) 34 | str(emm) 35 | emm 36 | ``` 37 | These results may be plotted as side-by-side intervals or as an interaction-style plot: 38 | ```{r} 39 | plot(emm) 40 | emmip(emm, wool ~ tension, CIs = TRUE) 41 | ``` 42 | 43 | This particular example has a response transformation. That transformation is 44 | detected and we may back-transform to the original scale: 45 | ```{r} 46 | confint(emm, type = "response") 47 | ``` 48 | We may do comparisons and other contrasts: 49 | ```{r} 50 | pairs(emm) # pairwise comparisons 51 | contrast(emm, "trt.vs.ctrl", ref = "L", type = "response", adjust = "mvt") 52 | ``` 53 | Note that with a log transformations, it is possible to back-transform 54 | comparisons, and they become ratios. With other transformations, back-transforming 55 | is not possible. 56 | 57 | ### Rank-deficient models 58 | Let's create a variation on this example where one cell is omitted: 59 | ```{r} 60 | warpi.rlm <- update(warp.rlm, subset = -(37:48)) 61 | (rgi <- ref_grid(warpi.rlm)) 62 | summary(rgi) 63 | ``` 64 | Note that the empty cell is detected and flagged as non-estimable. 65 | 66 | Some additional explanation here. EMMs are based on a *reference grid*, defined 67 | as the grid created by all possible combinations of factor levels, together with 68 | the mean of each numerical predictor. The reference grid here (`rgi`) is also an 69 | `"emmGrid"` object just like the previous `emm`. 70 | The grid itself is available as a data frame via the `grid` member, and 71 | you can verify that the above results match those of the `predict` function 72 | for the model: 73 | ```{r} 74 | predict(warpi.rlm, newdata = rgi@grid, se.fit = TRUE) 75 | ``` 76 | There is one exception for the empty cell. I will leave it as a user exercise to demonstrate that if we were to use different contrasts when fitting `warpi.rlm`, 77 | the predictions will be the same *except* for the empty cell. 78 | 79 | ### Multivariate models 80 | If there is a multivariate response, it is treated as another factor that 81 | is crossed with the other factors in the model. To illustrate, consider 82 | the dataset `MOats`, provided in **emmeans**: 83 | ```{r} 84 | MOats.rlm <- lm_robust(yield ~ Block + Variety, data = MOats) 85 | ref_grid(MOats.rlm) 86 | ``` 87 | By default, the pseudo-factor is named `rep.meas`, but we can change it 88 | if we like: 89 | ```{r} 90 | emmeans(MOats.rlm, pairwise ~ nitro, mult.name = "nitro") 91 | ``` 92 | This illustrates an additional feature of `emmeans` that we can put a contrast method in the left side of a formula. 93 | 94 | ### Afterword 95 | There are numerous capabilities of **emmeans** not illustrated here. See that package's 96 | help files and vignettes. Using `vignette("basics", "emmeans")` is a good starting point. 97 | -------------------------------------------------------------------------------- /vignettes/estimatr-in-the-tidyverse.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "estimatr in the Tidyverse" 3 | author: "Shikhar Singh" 4 | output: 5 | html_document: 6 | df_print: paged 7 | vignette: > 8 | %\VignetteIndexEntry{Vignette Title} 9 | %\VignetteEngine{knitr::rmarkdown} 10 | %\VignetteEncoding{UTF-8} 11 | --- 12 | 13 | 14 | ```{r, echo=FALSE} 15 | library(knitr) 16 | ``` 17 | 18 | `estimatr` is for (fast) OLS and IV regression with robust standard errors. This document shows how `estimatr` integrates with RStudio's `tidyverse` suite of packages. 19 | 20 | We use the Swiss Fertility and Socioeconomic Indicators data (available in R, description [here](https://stat.ethz.ch/R-manual/R-devel/library/datasets/html/swiss.html)) to show how `lm_robust` works with `dplyr`, `ggplot2`, and `purrr`. What is shown for `lm_robust` here typically applies to all the other `estimatr` functions (`lm_robust`, `difference_in_mean`, `lm_lin`, `iv_robust`, and `horovitz_thompson`). 21 | 22 | # Getting tidy 23 | 24 | The first step to the tidyverse is turning model output into data we can manipulate. The `tidy` function converts an `lm_robust` object into a data.frame. 25 | 26 | ```{r, tidy = T, message = F, warning=T} 27 | library(estimatr) 28 | fit <- lm_robust(Fertility ~ Agriculture + Catholic, data = swiss) 29 | tidy(fit) 30 | ``` 31 | 32 | # Data manipulation with `dplyr` 33 | 34 | Once a regression fit is a data.frame, you can use any of the `dplyr` "verbs" for data manipulation, like `mutate`,`filter`, `select`, `summarise`, `group_by`, and `arrange` (more on this [here](https://dplyr.tidyverse.org)). 35 | 36 | ```{r, tidy = TRUE, message = FALSE, warning = FALSE} 37 | library(tidyverse) 38 | 39 | # lm_robust and filter 40 | fit %>% tidy %>% filter(term == "Agriculture") 41 | 42 | # lm_robust and select 43 | fit %>% tidy %>% select(term, estimate, std.error) 44 | 45 | # lm_robust and mutate 46 | fit %>% tidy %>% mutate(t_stat = estimate/ std.error, 47 | significant = p.value <= 0.05) 48 | ``` 49 | 50 | # Data visualization with `ggplot2` 51 | 52 | `ggplot2` offers a number of data visualization tools that are compatible with `estimatr` 53 | 54 | 1. Make a coefficient plot: 55 | 56 | ```{r, tidy = T, message=F, warning=F} 57 | fit %>% 58 | tidy %>% 59 | filter(term != "(Intercept)") %>% 60 | ggplot(aes(y = term, x = estimate)) + 61 | geom_vline(xintercept = 0, linetype = 2) + 62 | geom_point() + 63 | geom_errorbarh(aes(xmin = conf.low, xmax = conf.high, height = 0.1)) + 64 | theme_bw() 65 | ``` 66 | 67 | 2. Put CIs based on robust variance estimates (rather than the "classical" variance estimates) with the `geom_smooth` and `stat_smooth` functions. 68 | 69 | ```{r, tidy = T, message=F, warning=F} 70 | library(ggplot2) 71 | ggplot(swiss, aes(x = Agriculture, y = Fertility)) + 72 | geom_point() + 73 | geom_smooth(method = "lm_robust") + 74 | theme_bw() 75 | ``` 76 | 77 | Note that the functional form can include polynomials. For instance, if the model is $Fertility \sim Agriculture + Agriculture^2 + Agriculture^3$, we can model this in the following way: 78 | 79 | ```{r, tidy = T, message=F, warning=F} 80 | library(ggplot2) 81 | ggplot(swiss, aes(x = Agriculture, y = Fertility)) + 82 | geom_point() + 83 | geom_smooth(method = "lm_robust", 84 | formula = y ~ poly(x, 3, raw = TRUE)) + 85 | theme_bw() 86 | ``` 87 | 88 | # Bootstrap using `rsample` 89 | 90 | The `rsample` pacakage provides tools for bootstrapping: 91 | 92 | ```{r, tidy =T, message = F, warning=F} 93 | library(rsample) 94 | 95 | boot_out <- 96 | bootstraps(data = swiss, 500)$splits %>% 97 | map(~ lm_robust(Fertility ~ Catholic + Agriculture, data = analysis(.))) %>% 98 | map(tidy) %>% 99 | bind_rows(.id = "bootstrap_replicate") 100 | kable(head(boot_out)) 101 | ``` 102 | 103 | `boot_out` is a data.frame that contains estimates from each boostrapped sample. We can then use `dplyr` functions to summarize the bootstraps, `tidyr` functions to reshape the estimates, and `GGally::ggpairs` to visualize them. 104 | 105 | ```{r, tidy =T, message = F, warning=F} 106 | boot_out %>% 107 | group_by(term) %>% 108 | summarise(boot_se = sd(estimate)) 109 | 110 | # To visualize the sampling distribution 111 | 112 | library(GGally) 113 | boot_out %>% 114 | select(bootstrap_replicate, term, estimate) %>% 115 | spread(key = term, value = estimate) %>% 116 | select(-bootstrap_replicate) %>% 117 | ggpairs(lower = list(continuous = wrap("points", alpha = 0.1))) + 118 | theme_bw() 119 | ``` 120 | 121 | # Multiple models using `purrr` 122 | 123 | `purrr` provides tools to perform the same operation on every element of a vector. For instance, we may want to estimate a model on different subsets of data. We can use the `map` function to do this. 124 | 125 | ```{r} 126 | library(purrr) 127 | 128 | # Running the same model for highly educated and less educated cantons/districts 129 | 130 | two_subsets <- 131 | swiss %>% 132 | mutate(HighlyEducated = as.numeric(Education > 8)) %>% 133 | split(.$HighlyEducated) %>% 134 | map( ~ lm_robust(Fertility ~ Catholic, data = .)) %>% 135 | map(tidy) %>% 136 | bind_rows(.id = "HighlyEducated") 137 | 138 | kable(two_subsets, digits =2) 139 | ``` 140 | 141 | Alternatively, we might want to regress different dependent variables on the same independent variable. `map` can be used alongwith `estimatr` functions for this purpose as well. 142 | 143 | ```{r, tidy = T, message=F, warning=F} 144 | three_outcomes <- 145 | c("Fertility", "Education", "Agriculture") %>% 146 | map(~ formula(paste0(., " ~ Catholic"))) %>% 147 | map(~ lm_robust(., data = swiss)) %>% 148 | map_df(tidy) 149 | 150 | kable(three_outcomes, digits =2) 151 | ``` 152 | 153 | Using `ggplot2`, we can make a coefficient plot: 154 | 155 | ```{r, tidy= T} 156 | 157 | three_outcomes %>% 158 | filter(term == "Catholic") %>% 159 | ggplot(aes(x = estimate, y = outcome)) + 160 | geom_vline(xintercept = 0, linetype = 2) + 161 | geom_point() + 162 | geom_errorbarh(aes(xmin = conf.low, xmax = conf.high, height = 0.1)) + 163 | ggtitle("Slopes with respect to `Catholic`") + 164 | theme_bw() 165 | ``` 166 | 167 | # Concluding thoughts 168 | 169 | Using `estimatr` functions in the tidyverse is easy once the model outputs have been turned into data.frames. We accomplish this with the `tidy` function. After that, so many summary and visualization possibilities open up. Happy tidying! 170 | 171 | 172 | -------------------------------------------------------------------------------- /vignettes/estimatr.bib: -------------------------------------------------------------------------------- 1 | @article{abadieetal2017, 2 | title={A class of unbiased estimators of the average treatment effect in randomized experiments}, 3 | author={Abadie, Alberto and Athey, Susan and Imbens, Guido W and Wooldridge, Jeffrey}, 4 | journal={arXiv Pre-print}, 5 | year={2017}, 6 | url={https://arxiv.org/abs/1710.02926v2} 7 | } 8 | 9 | 10 | @article{aronowmiddleton2013, 11 | title={A class of unbiased estimators of the average treatment effect in randomized experiments}, 12 | author={Aronow, Peter M and Middleton, Joel A}, 13 | journal={Journal of Causal Inference}, 14 | volume={1}, 15 | number={1}, 16 | pages={135--154}, 17 | year={2013}, 18 | url={https://doi.org/10.1515/jci-2012-0009} 19 | } 20 | 21 | @article{aronowsamii2017, 22 | title={Estimating average causal effects under interference between units}, 23 | author={Aronow, Peter M and Samii, Cyrus}, 24 | journal={Annals of Applied Statistics}, 25 | pages={forthcoming}, 26 | year={2017}, 27 | url={https://arxiv.org/abs/1305.6156v3} 28 | } 29 | 30 | @article{bellmccaffrey2002, 31 | title={Bias reduction in standard errors for linear regression with multi-stage samples}, 32 | author={Bell, Robert M and McCaffrey, Daniel F}, 33 | journal={Survey Methodology}, 34 | volume={28}, 35 | number={2}, 36 | pages={169--182}, 37 | year={2002} 38 | } 39 | 40 | @article{freedman2008, 41 | title={On regression adjustments in experiments with several treatments}, 42 | author={Freedman, David A}, 43 | journal={The Annals of Applied Statistics}, 44 | pages={176--196}, 45 | year={2008}, 46 | publisher={JSTOR}, 47 | url={https://doi.org/10.1214/07-AOAS143} 48 | } 49 | 50 | @book{gerbergreen2012, 51 | title={Field Experiments: Design, Analysis, and Interpretation}, 52 | author={Gerber, Alan S and Green, Donald P}, 53 | year={2012}, 54 | publisher={W.W. Norton}, 55 | address={New York}, 56 | isbn={978-0-393-97995-4} 57 | } 58 | 59 | @article{imaietal2009, 60 | title={The essential role of pair matching in cluster-randomized experiments, with application to the Mexican universal health insurance evaluation}, 61 | author={Imai, Kosuke and King, Gary and Nall, Clayton}, 62 | journal={Statistical Science}, 63 | volume={24}, 64 | number={1}, 65 | pages={29--53}, 66 | year={2009}, 67 | publisher={Institute of Mathematical Statistics}, 68 | url={https://doi.org/10.1214/08-STS274} 69 | } 70 | 71 | @article{imbenskolesar2016, 72 | title={Robust standard errors in small samples: Some practical advice}, 73 | author={Imbens, Guido W and Kolesar, Michal}, 74 | journal={Review of Economics and Statistics}, 75 | volume={98}, 76 | number={4}, 77 | pages={701--712}, 78 | year={2016}, 79 | publisher={MIT Press}, 80 | url={https://doi.org/10.1162/REST_a_00552} 81 | } 82 | 83 | @article{lin2013, 84 | title={Agnostic notes on regression adjustments to experimental data: Reexamining Freedman's critique}, 85 | author={Lin, Winston}, 86 | journal={The Annals of Applied Statistics}, 87 | volume={7}, 88 | number={1}, 89 | pages={295--318}, 90 | year={2013}, 91 | publisher={Institute of Mathematical Statistics}, 92 | url={https://doi.org/10.1214/12-AOAS583} 93 | } 94 | 95 | @article{livalliant2009, 96 | title={Survey weighted hat matrix and leverages}, 97 | author={Li, Jianzhu and Valliant, Richard}, 98 | journal={Survey Methodology}, 99 | volume={35}, 100 | number={1}, 101 | pages={15--24}, 102 | year={2009}, 103 | publisher={Statistics Canada} 104 | } 105 | 106 | @article{longervin2000, 107 | title={Using heteroscedasticity consistent standard errors in the linear regression model}, 108 | author={Long, J Scott and Ervin, Laurie H}, 109 | journal={The American Statistician}, 110 | volume={54}, 111 | number={3}, 112 | pages={217--224}, 113 | year={2000}, 114 | publisher={Taylor \& Francis Group}, 115 | url={https://doi.org/10.1080/00031305.2000.10474549} 116 | } 117 | 118 | @article{mackinnonwhite1985, 119 | title = {Some heteroskedasticity-consistent covariance matrix estimators with improved finite sample properties}, 120 | author = {MacKinnon, James and White, Halbert}, 121 | year = {1985}, 122 | journal = {Journal of Econometrics}, 123 | volume = {29}, 124 | number = {3}, 125 | pages = {305-325}, 126 | url = {https://doi.org/10.1016/0304-4076(85)90158-7} 127 | } 128 | 129 | @article{middletonaronow2015, 130 | title={Unbiased estimation of the average treatment effect in cluster-randomized experiments}, 131 | author={Middleton, Joel A and Aronow, Peter M}, 132 | journal={Statistics, Politics and Policy}, 133 | volume={6}, 134 | number={1-2}, 135 | pages={39--75}, 136 | year={2015}, 137 | url={https://doi.org/10.1515/spp-2013-0002} 138 | } 139 | 140 | @article{pustejovskytipton2016, 141 | title={Small-Sample Methods for Cluster-Robust Variance Estimation and Hypothesis Testing in Fixed Effects Models}, 142 | author={Pustejovsky, James E and Tipton, Elizabeth}, 143 | journal={Journal of Business \& Economic Statistics}, 144 | year={2018}, 145 | number={4}, 146 | volume={36}, 147 | publisher={Taylor \& Francis}, 148 | url={https://doi.org/10.1080/07350015.2016.1247004} 149 | } 150 | 151 | @article{romanowolf2017, 152 | title={Resurrecting weighted least squares}, 153 | author={Romano, Joseph P and Wolf, Michael}, 154 | journal={Journal of Econometrics}, 155 | volume={197}, 156 | number={1}, 157 | pages={1--19}, 158 | year={2017}, 159 | publisher={Elsevier}, 160 | url={https://doi.org/10.1016/j.jeconom.2016.10.003} 161 | } 162 | 163 | @article{samiiaronow2012, 164 | title={On equivalencies between design-based and regression-based variance estimators for randomized experiments}, 165 | author={Samii, Cyrus and Aronow, Peter M}, 166 | journal={Statistics and Probability Letters}, 167 | year={2012}, 168 | volume={82}, 169 | number={2}, 170 | url={https://doi.org/10.1016/j.spl.2011.10.024} 171 | } 172 | -------------------------------------------------------------------------------- /vignettes/lm_speed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeclareDesign/estimatr/10a18f124ee96e133993a9a6a68ae678b7d8d051/vignettes/lm_speed.png -------------------------------------------------------------------------------- /vignettes/lm_speed_covars.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeclareDesign/estimatr/10a18f124ee96e133993a9a6a68ae678b7d8d051/vignettes/lm_speed_covars.png -------------------------------------------------------------------------------- /vignettes/regression-tables.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Regression Tables with estimatr" 3 | output: rmarkdown::html_vignette 4 | vignette: > 5 | %\VignetteIndexEntry{Regression Tables with estimatr} 6 | %\VignetteEngine{knitr::rmarkdown} 7 | %\VignetteEncoding{UTF-8} 8 | --- 9 | 10 | Preparing regression tables with estimatr is possible with all of the major r-to-LaTeX packages, including `texreg`, `modelsummary`, `stargazer`, `xtable`, and `huxtable`. 11 | 12 | ## Setup 13 | 14 | First we'll load both `estimatr` and `magrittr` (for pipes). 15 | 16 | ```{r} 17 | library(estimatr) 18 | library(magrittr) 19 | ``` 20 | 21 | ## Texreg 22 | 23 | Texreg operates directly on an `lm_robust` object. If you would like standard errors instead of confidence intervals, use `include.ci = FALSE`. 24 | 25 | ```{r, message = FALSE} 26 | library(texreg) 27 | fit <- lm_robust(mpg ~ disp, data = mtcars) 28 | texreg(fit, include.ci = FALSE) 29 | ``` 30 | 31 | ## modelsummary 32 | 33 | `modelsummary` operates directly on `lm_robust` objects: 34 | 35 | ```{r, message = FALSE, eval = FALSE} 36 | library(modelsummary) 37 | fit1 <- lm(mpg ~ disp, data = mtcars) 38 | fit2 <- lm(mpg ~ hp, data = mtcars) 39 | modelsummary(list(fit1, fit2)) 40 | ``` 41 | 42 | To learn how to customize the output table and/or statistics, please consult the `modelsummary` README file on Github: https://github.com/vincentarelbundock/modelsummary 43 | 44 | ## Stargazer 45 | 46 | Stargazer has to be *tricked* -- there's unfortunately no way around this unless something in the stargazer package changes. The maintainer has indicated that he has no active plans to update stargazer in the future. First, run the regression using `lm`. Then you can pass the `lm` fits to `starprep` a function which transforms the `lm` fits into `lm_robust` fits and then prepare the appropriate statistic you requested. The `starprep` function defaults to returning standard errors and uses `lm_robust` defaults for standard errors (robust HC2 SEs are the default in both `lm_robust`). Then you pass the `lm` fit to stargazer and then pass `starprep(fit)` to the `se` argument in `stargazer`. This process works great with multiple fits. 47 | 48 | ```{r, message = FALSE, eval = FALSE} 49 | library(stargazer) 50 | fit_1 <- lm(mpg ~ disp, data = mtcars) 51 | fit_2 <- lm(mpg ~ hp, data = mtcars) 52 | stargazer(fit_1, fit_2, se = starprep(fit_1, fit_2)) 53 | ``` 54 | ```{r, message = FALSE, echo = FALSE} 55 | library(stargazer) 56 | fit_1 <- lm(mpg ~ disp, data = mtcars) 57 | fit_2 <- lm(mpg ~ hp, data = mtcars) 58 | stargazer(fit_1, fit_2, se = starprep(fit_1, fit_2), header = FALSE) 59 | ``` 60 | 61 | Below are some more examples, although we hide the output for brevity. 62 | 63 | ```{r, eval = FALSE} 64 | # Can also specify clusters and standard error type 65 | stargazer( 66 | fit_1, fit_2, 67 | se = starprep(fit_1, fit_2, clusters = mtcars$cyl, se_type = "stata") 68 | ) 69 | 70 | # Can also precompute robust objects to save computation time 71 | # using `commarobust` 72 | fit_1_r <- commarobust(fit_1) 73 | fit_2_r <- commarobust(fit_2) 74 | stargazer(fit_1, fit_2, 75 | se = starprep(fit_1_r, fit_2_r), 76 | p = starprep(fit_1_r, fit_2_r, stat = "p.value")) 77 | 78 | # can also easily get robust confidence intervals 79 | stargazer(fit_1, fit_2, 80 | ci.custom = starprep(fit_1_r, fit_2_r, stat = "ci")) 81 | ``` 82 | 83 | ## xtable 84 | 85 | `xtable` works directly on a data.frame, so we just have to prep the `lm_robust` output with the `tidy` function. 86 | 87 | ```{r} 88 | library(xtable) 89 | fit <- lm_robust(mpg ~ disp, data = mtcars) 90 | fit %>% tidy %>% xtable() 91 | ``` 92 | 93 | ## huxtable 94 | 95 | `huxtable`, too, works on a data.frame, so all we have to do is prep with `tidy`. 96 | 97 | ```{r, message=FALSE} 98 | library(huxtable) 99 | fit <- lm_robust(mpg ~ disp, data = mtcars) 100 | fit %>% tidy %>% hux() %>% print_latex() 101 | ``` 102 | 103 | 104 | -------------------------------------------------------------------------------- /vignettes/simulations-debias-dim.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeclareDesign/estimatr/10a18f124ee96e133993a9a6a68ae678b7d8d051/vignettes/simulations-debias-dim.rda -------------------------------------------------------------------------------- /vignettes/simulations-ols-var.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeclareDesign/estimatr/10a18f124ee96e133993a9a6a68ae678b7d8d051/vignettes/simulations-ols-var.rda --------------------------------------------------------------------------------