├── .Rbuildignore
├── .github
    ├── .gitignore
    └── workflows
    │   ├── R-CMD-check.yaml
    │   ├── pr-commands.yaml
    │   └── test-coverage.yaml
├── .gitignore
├── CONDUCT.md
├── DESCRIPTION
├── LICENSE
├── NAMESPACE
├── NEWS.md
├── R
    ├── RcppExports.R
    ├── S3_confint.R
    ├── S3_emmeans.R
    ├── S3_glance.R
    ├── S3_nobs.R
    ├── S3_predict.R
    ├── S3_print.R
    ├── S3_summary.R
    ├── S3_tidy.R
    ├── S3_update.R
    ├── S3_vcov.R
    ├── data.R
    ├── estimatr.R
    ├── estimatr_difference_in_means.R
    ├── estimatr_horvitz_thompson.R
    ├── estimatr_iv_robust.R
    ├── estimatr_lh_robust.R
    ├── estimatr_lm_lin.R
    ├── estimatr_lm_robust.R
    ├── helper_cis_pvals.R
    ├── helper_clean_model_data.R
    ├── helper_condition_pr_matrix.R
    ├── helper_extract.R
    ├── helper_lm_robust_fit.R
    ├── helper_na_omit_detailed.R
    ├── helper_parse_arguments.R
    ├── helper_return.R
    ├── helper_starprep.R
    └── zzz.R
├── README.Rmd
├── README.md
├── _pkgdown.yml
├── cran-comments.md
├── data-raw
    ├── STAR_public_use.dta
    └── build-alo-star-for-lin.R
├── data
    └── alo_star_men.rda
├── estimatr.Rproj
├── man
    ├── alo_star_men.Rd
    ├── commarobust.Rd
    ├── declaration_to_condition_pr_mat.Rd
    ├── difference_in_means.Rd
    ├── estimatr.Rd
    ├── estimatr_glancers.Rd
    ├── estimatr_tidiers.Rd
    ├── extract.lm_robust.Rd
    ├── gen_pr_matrix_cluster.Rd
    ├── horvitz_thompson.Rd
    ├── iv_robust.Rd
    ├── lh_robust.Rd
    ├── lm_lin.Rd
    ├── lm_robust.Rd
    ├── lm_robust_fit.Rd
    ├── na.omit_detailed.data.frame.Rd
    ├── permutations_to_condition_pr_mat.Rd
    ├── predict.lm_robust.Rd
    ├── reexports.Rd
    └── starprep.Rd
├── src
    ├── Makevars
    ├── Makevars.win
    ├── RcppExports.cpp
    ├── horvitz_thompson_variance.cpp
    ├── lm_robust_helper.cpp
    └── naomit.cpp
├── tests
    ├── sleep.R
    ├── sleep.Rout.save
    ├── testthat.R
    └── testthat
    │   ├── helper-lm-robust-se.R
    │   ├── helper-return-cleaners.R
    │   ├── helper-se-types.R
    │   ├── mtcars.csv
    │   ├── run-stata-areg-models.do
    │   ├── run-stata-iv-models.do
    │   ├── run-stata-models.do
    │   ├── stata-ests.txt
    │   ├── stata-fe-ests.txt
    │   ├── stata-iv-diagnostics.txt
    │   ├── stata-iv-ests.txt
    │   ├── test-arg-checking.R
    │   ├── test-condition-pr-matrix.R
    │   ├── test-condition1-condition2.R
    │   ├── test-difference-in-means.R
    │   ├── test-horvitz-thompson.R
    │   ├── test-iv-robust-fes.R
    │   ├── test-iv-robust.R
    │   ├── test-lh-robust.R
    │   ├── test-lm-cluster.R
    │   ├── test-lm-lin.R
    │   ├── test-lm-robust-fes.R
    │   ├── test-lm-robust-helper.R
    │   ├── test-lm-robust.R
    │   ├── test-lm-robust_emmeans.R
    │   ├── test-lm-robust_margins.R
    │   ├── test-modelsummary.R
    │   ├── test-na-omit-details.R
    │   ├── test-replicate-HT-middleton.R
    │   ├── test-replicate-lin2013.R
    │   ├── test-return.R
    │   ├── test-s3-methods.R
    │   ├── test-sig-testing.R
    │   ├── test-starprep.R
    │   ├── test-stata-output.R
    │   ├── test-texreg.R
    │   ├── test-zzz.R
    │   └── test-zzzbroom.R
└── vignettes
    ├── absorbing-fixed-effects.Rmd
    ├── benchmarking-estimatr.Rmd
    ├── emmeans-examples.Rmd
    ├── estimatr-in-the-tidyverse.Rmd
    ├── estimatr.bib
    ├── getting-started.Rmd
    ├── lm_speed.png
    ├── lm_speed_covars.png
    ├── mathematical-notes.Rmd
    ├── regression-tables.Rmd
    ├── simulations-debias-dim.rda
    ├── simulations-debiasing-dim.Rmd
    ├── simulations-ols-var.rda
    ├── simulations-ols-variance.Rmd
    └── stata-wls-hat.Rmd


/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^\.travis\.yml$
 2 | README.Rmd
 3 | README.md
 4 | index.Rmd
 5 | index.md
 6 | ^.*\.Rproj$
 7 | ^\.Rproj\.user$
 8 | appveyor.yml
 9 | ^appveyor\.yml$
10 | CONDUCT.md
11 | ^vignettes$
12 | ^docs$
13 | ^.github$
14 | ^docs/templates/.*$
15 | deploy.sh
16 | _pkgdown.yml
17 | update_repo.R
18 | ^data-raw$
19 | ^_pkgdown\.yml$
20 | ^tests/testthat/mtcars\.csv$
21 | ^tests/testthat/run-stata-models\.do$
22 | ^tests/testthat/test-zzzbroom\.R$
23 | ^estimatr_.*\.tar\.gz$
24 | ^tests/testthat/test-texreg\.R$
25 | ^tests/testthat/test-gtsummary\.R$
26 | ^cran-comments\.md$
27 | ^tests/testthat/test-zzz\.R$
28 | ^CRAN-RELEASE$
29 | ^pkgdown$
30 | ^\.github$
31 | ^CRAN-SUBMISSION$
32 | 


--------------------------------------------------------------------------------
/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | 


--------------------------------------------------------------------------------
/.github/workflows/R-CMD-check.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |   pull_request:
 7 | 
 8 | name: R-CMD-check.yaml
 9 | 
10 | permissions: read-all
11 | 
12 | jobs:
13 |   R-CMD-check:
14 |     runs-on: ${{ matrix.config.os }}
15 | 
16 |     name: ${{ matrix.config.os }} (${{ matrix.config.r }})
17 | 
18 |     strategy:
19 |       fail-fast: false
20 |       matrix:
21 |         config:
22 |           - {os: macos-latest,   r: 'release'}
23 |           - {os: windows-latest, r: 'release'}
24 |           - {os: ubuntu-latest,   r: 'devel', http-user-agent: 'release'}
25 |           - {os: ubuntu-latest,   r: 'release'}
26 |           - {os: ubuntu-latest,   r: 'oldrel-1'}
27 | 
28 |     env:
29 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
30 |       R_KEEP_PKG_SOURCE: yes
31 | 
32 |     steps:
33 |       - uses: actions/checkout@v4
34 | 
35 |       - uses: r-lib/actions/setup-pandoc@v2
36 | 
37 |       - uses: r-lib/actions/setup-r@v2
38 |         with:
39 |           r-version: ${{ matrix.config.r }}
40 |           http-user-agent: ${{ matrix.config.http-user-agent }}
41 |           use-public-rspm: true
42 | 
43 |       - uses: r-lib/actions/setup-r-dependencies@v2
44 |         with:
45 |           extra-packages: any::rcmdcheck
46 |           needs: check
47 | 
48 |       - uses: r-lib/actions/check-r-package@v2
49 |         with:
50 |           upload-snapshots: true
51 |           build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")'
52 | 


--------------------------------------------------------------------------------
/.github/workflows/pr-commands.yaml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   issue_comment:
 3 |     types: [created]
 4 | name: Commands
 5 | jobs:
 6 |   document:
 7 |     if: startsWith(github.event.comment.body, '/document')
 8 |     name: document
 9 |     runs-on: macOS-latest
10 |     steps:
11 |       - uses: actions/checkout@v2
12 |       - uses: r-lib/actions/pr-fetch@master
13 |         with:
14 |           repo-token: ${{ secrets.GITHUB_TOKEN }}
15 |       - uses: r-lib/actions/setup-r@master
16 |       - name: Install dependencies
17 |         run: Rscript -e 'install.packages(c("remotes", "roxygen2"))' -e 'remotes::install_deps(dependencies = TRUE)'
18 |       - name: Document
19 |         run: Rscript -e 'roxygen2::roxygenise()'
20 |       - name: commit
21 |         run: |
22 |           git add man/\* NAMESPACE
23 |           git commit -m 'Document'
24 |       - uses: r-lib/actions/pr-push@master
25 |         with:
26 |           repo-token: ${{ secrets.GITHUB_TOKEN }}
27 |   style:
28 |     if: startsWith(github.event.comment.body, '/style')
29 |     name: style
30 |     runs-on: macOS-latest
31 |     steps:
32 |       - uses: actions/checkout@v2
33 |       - uses: r-lib/actions/pr-fetch@master
34 |         with:
35 |           repo-token: ${{ secrets.GITHUB_TOKEN }}
36 |       - uses: r-lib/actions/setup-r@master
37 |       - name: Install dependencies
38 |         run: Rscript -e 'install.packages("styler")'
39 |       - name: Style
40 |         run: Rscript -e 'styler::style_pkg()'
41 |       - name: commit
42 |         run: |
43 |           git add \*.R
44 |           git commit -m 'Style'
45 |       - uses: r-lib/actions/pr-push@master
46 |         with:
47 |           repo-token: ${{ secrets.GITHUB_TOKEN }}
48 | 


--------------------------------------------------------------------------------
/.github/workflows/test-coverage.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |   pull_request:
 7 | 
 8 | name: test-coverage.yaml
 9 | 
10 | permissions: read-all
11 | 
12 | jobs:
13 |   test-coverage:
14 |     runs-on: ubuntu-latest
15 |     env:
16 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
17 | 
18 |     steps:
19 |       - uses: actions/checkout@v4
20 | 
21 |       - uses: r-lib/actions/setup-r@v2
22 |         with:
23 |           use-public-rspm: true
24 | 
25 |       - uses: r-lib/actions/setup-r-dependencies@v2
26 |         with:
27 |           extra-packages: any::covr, any::xml2
28 |           needs: coverage
29 | 
30 |       - name: Test coverage
31 |         run: |
32 |           cov <- covr::package_coverage(
33 |             quiet = FALSE,
34 |             clean = FALSE,
35 |             install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package")
36 |           )
37 |           covr::to_cobertura(cov)
38 |         shell: Rscript {0}
39 | 
40 |       - uses: codecov/codecov-action@v4
41 |         with:
42 |           # Fail if error if not on PR, or if on PR and token is given
43 |           fail_ci_if_error: ${{ github.event_name != 'pull_request' || secrets.CODECOV_TOKEN }}
44 |           file: ./cobertura.xml
45 |           plugin: noop
46 |           disable_search: true
47 |           token: ${{ secrets.CODECOV_TOKEN }}
48 | 
49 |       - name: Show testthat output
50 |         if: always()
51 |         run: |
52 |           ## --------------------------------------------------------------------
53 |           find '${{ runner.temp }}/package' -name 'testthat.Rout*' -exec cat '{}' \; || true
54 |         shell: bash
55 | 
56 |       - name: Upload test results
57 |         if: failure()
58 |         uses: actions/upload-artifact@v4
59 |         with:
60 |           name: coverage-test-failures
61 |           path: ${{ runner.temp }}/package
62 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # History files
 2 | .Rhistory
 3 | .Rapp.history
 4 | # Example code in package build process
 5 | *-Ex.R
 6 | # RStudio files
 7 | .Rproj.user/
 8 | # produced vignettes
 9 | vignettes/*.html
10 | vignettes/*.pdf
11 | vignettes/.*
12 | docs/
13 | .Rproj.user
14 | tests/testthat/*.pdf
15 | .DS_Store
16 | 
17 | # compiled files
18 | src/*.o
19 | src/*.so
20 | src/*.dll
21 | inst/doc
22 | *_cache/
23 | docs
24 | 


--------------------------------------------------------------------------------
/CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Contributor Covenant Code of Conduct
 2 | 
 3 | ## Our Pledge
 4 | 
 5 | In the interest of fostering an open and welcoming environment, we as
 6 | contributors and maintainers pledge to making participation in our project and
 7 | our community a harassment-free experience for everyone, regardless of age, body
 8 | size, disability, ethnicity, gender identity and expression, level of experience,
 9 | nationality, personal appearance, race, religion, or sexual identity and
10 | orientation.
11 | 
12 | ## Our Standards
13 | 
14 | Examples of behavior that contributes to creating a positive environment
15 | include:
16 | 
17 | * Using welcoming and inclusive language
18 | * Being respectful of differing viewpoints and experiences
19 | * Gracefully accepting constructive criticism
20 | * Focusing on what is best for the community
21 | * Showing empathy towards other community members
22 | 
23 | Examples of unacceptable behavior by participants include:
24 | 
25 | * The use of sexualized language or imagery and unwelcome sexual attention or
26 | advances
27 | * Trolling, insulting/derogatory comments, and personal or political attacks
28 | * Public or private harassment
29 | * Publishing others' private information, such as a physical or electronic
30 |   address, without explicit permission
31 | * Other conduct which could reasonably be considered inappropriate in a
32 |   professional setting
33 | 
34 | ## Our Responsibilities
35 | 
36 | Project maintainers are responsible for clarifying the standards of acceptable
37 | behavior and are expected to take appropriate and fair corrective action in
38 | response to any instances of unacceptable behavior.
39 | 
40 | Project maintainers have the right and responsibility to remove, edit, or
41 | reject comments, commits, code, wiki edits, issues, and other contributions
42 | that are not aligned to this Code of Conduct, or to ban temporarily or
43 | permanently any contributor for other behaviors that they deem inappropriate,
44 | threatening, offensive, or harmful.
45 | 
46 | ## Scope
47 | 
48 | This Code of Conduct applies both within project spaces and in public spaces
49 | when an individual is representing the project or its community. Examples of
50 | representing a project or community include using an official project e-mail
51 | address, posting via an official social media account, or acting as an appointed
52 | representative at an online or offline event. Representation of a project may be
53 | further defined and clarified by project maintainers.
54 | 
55 | ## Enforcement
56 | 
57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
58 | reported by contacting the project team at [abuse@declaredesign.org](mailto:abuse@declaredesign.org). All
59 | complaints will be reviewed and investigated and will result in a response that
60 | is deemed necessary and appropriate to the circumstances. The project team is
61 | obligated to maintain confidentiality with regard to the reporter of an incident.
62 | Further details of specific enforcement policies may be posted separately.
63 | 
64 | Project maintainers who do not follow or enforce the Code of Conduct in good
65 | faith may face temporary or permanent repercussions as determined by other
66 | members of the project's leadership.
67 | 
68 | ## Attribution
69 | 
70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
71 | available at [http://contributor-covenant.org/version/1/4][version]
72 | 
73 | [homepage]: http://contributor-covenant.org
74 | [version]: http://contributor-covenant.org/version/1/4/
75 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: estimatr
 2 | Type: Package
 3 | Title: Fast Estimators for Design-Based Inference
 4 | Version: 1.0.4
 5 | Authors@R: c(person("Graeme", "Blair", email = "graeme.blair@gmail.com", role = c("aut", "cre")),
 6 |              person("Jasper", "Cooper", email = "jjc2247@columbia.edu", role = c("aut")),
 7 |              person("Alexander", "Coppock", email = "alex.coppock@yale.edu", role = c("aut")),
 8 |              person("Macartan", "Humphreys", email = "macartan@gmail.com", role = c("aut")),
 9 |              person("Luke", "Sonnet", email = "luke.sonnet@gmail.com", role = c("aut")),
10 |              person("Neal", "Fultz", email = "nfultz@gmail.com", role = c("ctb")),
11 |              person("Lily", "Medina", email = "lilymiru@gmail.com", role = c("ctb")),
12 |              person("Russell", "Lenth", email = "russell-lenth@uiowa.edu", role = c("ctb")))
13 | Description: Fast procedures for small set of commonly-used, design-appropriate estimators with robust standard errors and confidence intervals. Includes estimators for linear regression, instrumental variables regression, difference-in-means, Horvitz-Thompson estimation, and regression improving precision of experimental estimates by interacting treatment with centered pre-treatment covariates introduced by Lin (2013) <doi:10.1214/12-AOAS583>.
14 | URL: https://declaredesign.org/r/estimatr/, https://github.com/DeclareDesign/estimatr
15 | BugReports: https://github.com/DeclareDesign/estimatr/issues
16 | License: MIT + file LICENSE
17 | Depends: R (>= 3.6.0)
18 | Imports:
19 |     Formula,
20 |     generics,
21 |     methods,
22 |     Rcpp (>= 0.12.16),
23 |     rlang (>= 0.2.0)
24 | LinkingTo: Rcpp, RcppEigen
25 | Encoding: UTF-8
26 | RoxygenNote: 7.3.1
27 | LazyData: true
28 | Suggests:
29 |     fabricatr (>= 0.10.0),
30 |     randomizr (>= 0.20.0),
31 |     AER,
32 |     clubSandwich,
33 |     emmeans (>= 1.4),
34 |     estimability,
35 |     margins,
36 |     modelsummary,
37 |     prediction,
38 |     RcppEigen,
39 |     sandwich,
40 |     stargazer,
41 |     testthat,
42 |     car
43 | Enhances:
44 |     texreg
45 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2015-2018
2 | COPYRIGHT HOLDER: Graeme Blair, Jasper Cooper, Alexander Coppock, Macartan Humphreys, and Luke Sonnet
3 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
  1 | # Generated by roxygen2: do not edit by hand
  2 | 
  3 | S3method(confint,difference_in_means)
  4 | S3method(confint,horvitz_thompson)
  5 | S3method(confint,iv_robust)
  6 | S3method(confint,lh)
  7 | S3method(confint,lh_robust)
  8 | S3method(confint,lm_robust)
  9 | S3method(glance,difference_in_means)
 10 | S3method(glance,horvitz_thompson)
 11 | S3method(glance,iv_robust)
 12 | S3method(glance,lh_robust)
 13 | S3method(glance,lm_robust)
 14 | S3method(nobs,difference_in_means)
 15 | S3method(nobs,horvitz_thompson)
 16 | S3method(nobs,iv_robust)
 17 | S3method(nobs,lh_robust)
 18 | S3method(nobs,lm_robust)
 19 | S3method(nobs,summary.lm_robust)
 20 | S3method(predict,iv_robust)
 21 | S3method(predict,lm_robust)
 22 | S3method(print,difference_in_means)
 23 | S3method(print,horvitz_thompson)
 24 | S3method(print,iv_robust)
 25 | S3method(print,lh)
 26 | S3method(print,lh_robust)
 27 | S3method(print,lm_robust)
 28 | S3method(print,summary.iv_robust)
 29 | S3method(print,summary.lh)
 30 | S3method(print,summary.lh_robust)
 31 | S3method(print,summary.lm_robust)
 32 | S3method(summary,difference_in_means)
 33 | S3method(summary,horvitz_thompson)
 34 | S3method(summary,iv_robust)
 35 | S3method(summary,lh)
 36 | S3method(summary,lh_robust)
 37 | S3method(summary,lm_robust)
 38 | S3method(tidy,difference_in_means)
 39 | S3method(tidy,horvitz_thompson)
 40 | S3method(tidy,iv_robust)
 41 | S3method(tidy,lh)
 42 | S3method(tidy,lh_robust)
 43 | S3method(tidy,lm_robust)
 44 | S3method(update,iv_robust)
 45 | S3method(vcov,difference_in_means)
 46 | S3method(vcov,horvitz_thompson)
 47 | S3method(vcov,iv_robust)
 48 | S3method(vcov,lm_robust)
 49 | export(commarobust)
 50 | export(declaration_to_condition_pr_mat)
 51 | export(difference_in_means)
 52 | export(extract.iv_robust)
 53 | export(extract.lm_robust)
 54 | export(gen_pr_matrix_cluster)
 55 | export(glance)
 56 | export(horvitz_thompson)
 57 | export(iv_robust)
 58 | export(lh_robust)
 59 | export(lm_lin)
 60 | export(lm_robust)
 61 | export(lm_robust_fit)
 62 | export(permutations_to_condition_pr_mat)
 63 | export(starprep)
 64 | export(tidy)
 65 | importFrom(Formula,Formula)
 66 | importFrom(Formula,as.Formula)
 67 | importFrom(Rcpp,evalCpp)
 68 | importFrom(generics,glance)
 69 | importFrom(generics,tidy)
 70 | importFrom(methods,className)
 71 | importFrom(methods,isGeneric)
 72 | importFrom(methods,setGeneric)
 73 | importFrom(methods,setMethod)
 74 | importFrom(rlang,"%||%")
 75 | importFrom(rlang,enquo)
 76 | importFrom(rlang,enquos)
 77 | importFrom(rlang,eval_tidy)
 78 | importFrom(rlang,f_rhs)
 79 | importFrom(rlang,quo)
 80 | importFrom(rlang,quo_get_expr)
 81 | importFrom(rlang,quo_is_missing)
 82 | importFrom(rlang,quo_set_expr)
 83 | importFrom(rlang,quos)
 84 | importFrom(rlang,sym)
 85 | importFrom(stats,.checkMFClasses)
 86 | importFrom(stats,.getXlevels)
 87 | importFrom(stats,coef)
 88 | importFrom(stats,complete.cases)
 89 | importFrom(stats,confint)
 90 | importFrom(stats,delete.response)
 91 | importFrom(stats,df.residual)
 92 | importFrom(stats,fitted.values)
 93 | importFrom(stats,formula)
 94 | importFrom(stats,getCall)
 95 | importFrom(stats,lm)
 96 | importFrom(stats,lm.fit)
 97 | importFrom(stats,model.extract)
 98 | importFrom(stats,model.frame)
 99 | importFrom(stats,model.frame.default)
100 | importFrom(stats,model.matrix)
101 | importFrom(stats,model.matrix.default)
102 | importFrom(stats,model.matrix.lm)
103 | importFrom(stats,model.response)
104 | importFrom(stats,na.omit)
105 | importFrom(stats,na.pass)
106 | importFrom(stats,nobs)
107 | importFrom(stats,pchisq)
108 | importFrom(stats,pf)
109 | importFrom(stats,printCoefmat)
110 | importFrom(stats,pt)
111 | importFrom(stats,qt)
112 | importFrom(stats,reformulate)
113 | importFrom(stats,resid)
114 | importFrom(stats,residuals)
115 | importFrom(stats,sd)
116 | importFrom(stats,setNames)
117 | importFrom(stats,terms)
118 | importFrom(stats,update)
119 | importFrom(stats,var)
120 | importFrom(stats,vcov)
121 | importFrom(stats,weighted.mean)
122 | importFrom(stats,weights)
123 | importFrom(utils,getS3method)
124 | importFrom(utils,packageVersion)
125 | useDynLib(estimatr, .registration = TRUE)
126 | 


--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
  1 | # estimatr 1.0.4
  2 | 
  3 | * Test suite changes for M1 mac stay current on CRAN.
  4 | 
  5 | # estimatr 1.0.2
  6 | 
  7 | * Minor documentation changes to stay current on CRAN.
  8 | 
  9 | # estimatr 1.0.0
 10 | 
 11 | * Version bump to coincide with DeclareDesign package version 1.0.0
 12 | * Tests edited
 13 | 
 14 | # estimatr 0.30.6
 15 | 
 16 | * Fix tests to address CRAN failures
 17 | 
 18 | # estimatr 0.30.4
 19 | 
 20 | * Bug fix of tidy handling of conf.level
 21 | * Bug fix of lh_robust tidy
 22 | 
 23 | # estimatr 0.30.2
 24 | 
 25 | * Remove lfe from tests
 26 | 
 27 | # estimatr 0.30.0
 28 | 
 29 | * Test suite changes (skip if not installed for checking against other packages)
 30 | 
 31 | # estimatr 0.28.0
 32 | 
 33 | * Test suite changes
 34 | 
 35 | # estimatr 0.26.0
 36 | 
 37 | * Test suite changes
 38 | 
 39 | # estimatr 0.24.0
 40 | 
 41 | * tidy: rename nobs, nclusters, nblocks
 42 | * tidy: new arguments conf.int, conf.level
 43 | * Added `update.iv_robust()`
 44 | * Bug fix regarding fixed effects with large numbers
 45 | 
 46 | # estimatr 0.22.0 
 47 | 
 48 | * Bug fixes
 49 | 
 50 | # estimatr 0.20.0 
 51 | 
 52 | * Added support for `emmeans` (thanks @rvlenth)!
 53 | * Fixed bug when estimating `diagnostics` in `iv_robust()` without explicitly specifying `se_type` (issue #310)
 54 | * Support for `rlang` 0.4.0
 55 | 
 56 | # estimatr 0.18.0
 57 | 
 58 | * Fixed bug where collinear covariates caused fixed effects estimator to crash  (issue #294)
 59 | * Added `glance.lh_robust()` and fixed some issues with printing and summarizing `lh_robust()` objects (issues #295 and #296)
 60 | * Fixes CRAN errors in testing with new `clubSandwich` package
 61 | 
 62 | # estimatr 0.16.0
 63 | 
 64 | * Add `diagnostics` to `iv_robust()`
 65 | * Add `glance()` methods for all estimators
 66 | * Add `lh_robust()` for easy interface to `car::linearHypothesis()`
 67 | * Fixed minor bug with a formula such as `is.na(var)` in the `covariates` formula in `lm_lin()` (issue #283)
 68 | 
 69 | # estimatr 0.14.0
 70 | 
 71 | * Removes `broom` hack for `tidy` method and instead relies on importing `generics`
 72 | 
 73 | # estimatr 0.12.0
 74 | 
 75 | * Fixed ambiguity about how interacted covariates were centered in `lm_lin`
 76 | * A series of fixes for bugs that occurred with multiple outcomes (multivariate regression):
 77 |   * Fixed bug pointed out by James Pustejovsky via the `sandwich` version 2.5-0 and off-diagonal blocks of multivariate regression vcov matrices
 78 |   * Fixed bugs in `lm_lin` preventing multivariate regression
 79 |   * Fixed bug that truncated degrees of freedom with "CR2"  standard errors
 80 |   * Fixed bug that returned incorrect R-squared for the second or later outcomes
 81 | * Fixed bug preventing integration with latest version of `margins`
 82 | * Fixed bug with `difference_in_means` when using `condition1` and `condition2` to subset a treatment vector with more than two treatment conditions. Previous estimates and standard errors were incorrect.
 83 | 
 84 | # estimatr 0.10.0
 85 | 
 86 | * Changed names of confidence interval columns in tidied data from `ci.lower` and `ci.upper` to `conf.low` and `conf.high` to be in line with other tidy methods
 87 | * Added support for `fixed_effects` that are just one block
 88 | * Added support for specifing `condition_prs` in `horvitz_thompson()` as a single number
 89 | * Added t- and z-statistics to output
 90 | * Limit unnecessary messaging in `horvitz_thompson()`
 91 | 
 92 | # estimatr 0.8.0
 93 | 
 94 | * Added support for absorbing fixed effects in `lm_robust` and `iv_robust`
 95 | * Added `commarobust` and `starprep` for stargazer integration
 96 | * Added `texreg` support for 2SLS IV models
 97 | * Fixed bugs for incorrect F-statistics with robust standard errors
 98 | * Refactor of main fitting engine for linear models
 99 | 
100 | # estimatr 0.6.0
101 | 
102 | * Added support for multivariate linear models
103 | * Added support for instrumental variables regression
104 | * Major change to name of object output elements to mostly match with `broom::tidy`
105 |   * old -> new
106 |   * `coefficient_names` -> `term`
107 |   * `se` -> `std.error`
108 |   * `p` -> `p.values`
109 |   * `ci_lower` -> `ci.lower`
110 |   * `ci_upper` -> `ci.upper`
111 |   * All of the above changes are also made to the column names on the output of `tidy`; furthermore for `tidy` objects one further name change from `coefficients` -> `estimate` has been made
112 | * Fixed bug that caused variances, standard errors, and p-values to be wrong for weighted "CR2" variance estimation
113 | * Fixed incorrect estimates when both weights and blocks were passed to `difference_in_means`
114 | * Rewrite NSE handling to be done by `rlang`
115 | * Rewrite `na.omit` handler in R
116 | * Major refactor of C++ underlying regression estimators
117 | 
118 | # estimatr 0.4.0
119 | 
120 | * Changed suffix added to centered variables in `lm_lin()` from `_bar` to `_c`
121 | * Added all vignettes to `.Rbuildignore`, only available on website now
122 | * Fixed `lm_robust_helper.cpp` algorithm to not catch own exception and to deal with `valgrind` memory errors
123 | * Bugfix where passing a formula as an object within a function would fail
124 | * Simplified some tests for various CRAN test platforms
125 | 
126 | # estimatr 0.2.0
127 | 
128 | * First **CRAN** upload
129 | 


--------------------------------------------------------------------------------
/R/RcppExports.R:
--------------------------------------------------------------------------------
 1 | # Generated by using Rcpp::compileAttributes() -> do not edit by hand
 2 | # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
 3 | 
 4 | ht_covar_partial <- function(y1, y0, p10, p1, p0) {
 5 |     .Call(`_estimatr_ht_covar_partial`, y1, y0, p10, p1, p0)
 6 | }
 7 | 
 8 | ht_var_partial <- function(y, p) {
 9 |     .Call(`_estimatr_ht_var_partial`, y, p)
10 | }
11 | 
12 | demeanMat2 <- function(what, fes, weights, start_col, eps) {
13 |     .Call(`_estimatr_demeanMat2`, what, fes, weights, start_col, eps)
14 | }
15 | 
16 | AtA <- function(A) {
17 |     .Call(`_estimatr_AtA`, A)
18 | }
19 | 
20 | Kr <- function(A, B) {
21 |     .Call(`_estimatr_Kr`, A, B)
22 | }
23 | 
24 | lm_solver <- function(X, y, try_cholesky) {
25 |     .Call(`_estimatr_lm_solver`, X, y, try_cholesky)
26 | }
27 | 
28 | lm_variance <- function(X, Xunweighted, XtX_inv, ei, weight_mean, cluster, J, ci, se_type, which_covs, fe_rank) {
29 |     .Call(`_estimatr_lm_variance`, X, Xunweighted, XtX_inv, ei, weight_mean, cluster, J, ci, se_type, which_covs, fe_rank)
30 | }
31 | 
32 | naomitwhy <- function(df, recursive_subset) {
33 |     .Call(`_estimatr_naomitwhy`, df, recursive_subset)
34 | }
35 | 
36 | 


--------------------------------------------------------------------------------
/R/S3_confint.R:
--------------------------------------------------------------------------------
 1 | confint_lm_like <- function(object,
 2 |                             parm = NULL,
 3 |                             level = NULL,
 4 |                             ...) {
 5 |   cis <- get_ci_mat(object, level)
 6 | 
 7 |   if (!is.null(parm)) {
 8 |     cis <- cis[parm, , drop = FALSE]
 9 |   }
10 | 
11 |   return(cis)
12 | }
13 | 
14 | #' @export
15 | confint.lm_robust <- confint_lm_like
16 | 
17 | #' @export
18 | confint.iv_robust <- confint_lm_like
19 | 
20 | #' @importFrom stats confint
21 | #' @export
22 | confint.lh_robust <- function(object, parm = NULL, level = NULL, ...) {
23 |   rbind(confint(object$lm_robust, parm = parm, level = level, ...), tidy(object$lh, ...))
24 | }
25 | 
26 | #' @export
27 | confint.lh <- confint_lm_like
28 | 
29 | #' @export
30 | confint.difference_in_means <- function(object,
31 |                                         parm = NULL,
32 |                                         level = NULL,
33 |                                         ...) {
34 |   cis <- get_ci_mat(object, level)
35 | 
36 |   return(cis)
37 | }
38 | 
39 | #' @export
40 | confint.horvitz_thompson <- function(object,
41 |                                      parm = NULL,
42 |                                      level = NULL,
43 |                                      ...) {
44 |   cis <- get_ci_mat(object, level, ttest = FALSE)
45 | 
46 |   return(cis)
47 | }
48 | 
49 | 
50 | ## internal method that builds confidence intervals and labels the matrix to be returned
51 | get_ci_mat <- function(object, level, ttest = TRUE) {
52 |   if (!is.null(level)) {
53 |     if (!is.null(object[["alpha"]])) {
54 |       object[["alpha"]] <- NULL
55 |     }
56 |     object <- add_cis_pvals(object, alpha = 1 - level, ci = TRUE, ttest = ttest)
57 |   } else {
58 |     level <- 1 - object$alpha
59 |   }
60 | 
61 |   cis <- cbind(
62 |     as.vector(object$conf.low),
63 |     as.vector(object$conf.high)
64 |   )
65 | 
66 |   if (is.matrix(object$conf.low)) {
67 |     ny <- ncol(object$conf.low)
68 |     p <- nrow(object$conf.low)
69 |     rownames(cis) <- paste0(
70 |       rep(object$outcome, each = p),
71 |       ":",
72 |       rep(object$term, times = ny)
73 |     )
74 |   } else {
75 |     rownames(cis) <- object$term
76 |   }
77 | 
78 |   colnames(cis) <- paste((1 - level) / 2 * c(100, -100) + c(0, 100), "%")
79 | 
80 |   return(cis)
81 | }
82 | 


--------------------------------------------------------------------------------
/R/S3_emmeans.R:
--------------------------------------------------------------------------------
 1 | ### Support for emmeans package
 2 | #
 3 | # Note: the recover_data and emm_basis methods are registered dynamically
 4 | #   (see zzz.R). So these functions are not exported
 5 | 
 6 | #' @importFrom utils getS3method
 7 | 
 8 | recover_data.lm_robust <- function(object, ...) {
 9 |   data <- getS3method("recover_data", "lm")(object, ...)
10 |   if (object$rank < object$k)  # rank-deficient. Need to pass dataset to emm_basis
11 |     attr(data, "pass.it.on") <- TRUE
12 |   data
13 | }
14 | 
15 | emm_basis.lm_robust <- function(object, trms, xlev, grid, ...) {
16 |   # coef() works right for lm but coef.aov tosses out NAs
17 |   bhat <- coef(object)
18 |   n.mult <- ifelse(is.matrix(bhat), ncol(bhat), 1)  # columns in mult response
19 |   m <- suppressWarnings(model.frame(trms, grid, na.action = na.pass, xlev = xlev))
20 |   X <- model.matrix(trms, m, contrasts.arg = object$contrasts)
21 |   V <- emmeans::.my.vcov(object, ...)
22 | 
23 |   if (!anyNA(bhat))
24 |     nbasis <- estimability::all.estble
25 |   else {
26 |     desmat <- model.matrix(trms, data = attr(object, "data"))
27 |     nbasis <- estimability::nonest.basis(desmat)
28 |   }
29 |   misc <- list()
30 |   if (n.mult > 1) { # multivariate case. Need to expand some matrices
31 |     eye <- diag(n.mult)
32 |     X <- kronecker(eye, X)
33 |     nbasis <- kronecker(eye, nbasis)
34 |     if(is.null(colnames(bhat)))
35 |       colnames(bhat) <- seq_len(n.mult)
36 |     misc$ylevs <- list(rep.meas = colnames(bhat))
37 |     bhat <- as.numeric(bhat) # stretch coefs into a vector
38 |   }
39 |   dfargs <- list(df = object$df.residual)
40 |   dffun <- function(k, dfargs) dfargs$df
41 |   list(X = X, bhat = bhat, nbasis = nbasis, V = V,
42 |        dffun = dffun, dfargs = dfargs, misc = misc)
43 | }
44 | 


--------------------------------------------------------------------------------
/R/S3_glance.R:
--------------------------------------------------------------------------------
  1 | # Helpers to retrieve values
  2 | retrieve_value <- function(x, what) if(exists(what, x)) x[[what]] else NA_real_
  3 | retrieve_fstatistic <- function(x) {
  4 |   if (exists("fstatistic", x)) {
  5 |     data.frame(
  6 |       statistic = x[["fstatistic"]][1],
  7 |       p.value = pf(
  8 |         x[["fstatistic"]][1],
  9 |         x[["fstatistic"]][2],
 10 |         x[["fstatistic"]][3],
 11 |         lower.tail = FALSE
 12 |       )
 13 |     )
 14 |   } else {
 15 |     data.frame(statistic = NA_real_, p.value = NA_real_)
 16 |   }
 17 | }
 18 | 
 19 | #' @importFrom generics glance
 20 | #' @export
 21 | generics::glance
 22 | 
 23 | #' Glance at an estimatr object
 24 | #' @name estimatr_glancers
 25 | #' @templateVar class lm_robust
 26 | #' @return For \code{glance.lm_robust}, a data.frame with columns:
 27 | #'   \item{r.squared}{the \eqn{R^2},
 28 | #'   \deqn{R^2 = 1 - Sum(e[i]^2) / Sum((y[i] - y^*)^2),} where \eqn{y^*}
 29 | #'   is the mean of \eqn{y[i]} if there is an intercept and zero otherwise,
 30 | #'   and \eqn{e[i]} is the ith residual.}
 31 | #'   \item{adj.r.squared}{the \eqn{R^2} but penalized for having more parameters, \code{rank}}
 32 | #'   \item{se_type}{the standard error type specified by the user}
 33 | #'   \item{statistic}{the value of the F-statistic}
 34 | #'   \item{p.value}{p-value from the F test}
 35 | #'   \item{df.residual}{residual degrees of freedom}
 36 | #'   \item{nobs}{the number of observations used}
 37 | #'
 38 | #' @param x An object returned by one of the estimators
 39 | #' @param ... extra arguments (not used)
 40 | #'
 41 | #' @export
 42 | #' @family estimatr glancers
 43 | #' @seealso [generics::glance()], [estimatr::lm_robust()], [estimatr::lm_lin()], [estimatr::iv_robust()], [estimatr::difference_in_means()], [estimatr::horvitz_thompson()]
 44 | #' @md
 45 | glance.lm_robust <- function(x, ...) {
 46 | 
 47 |   if (length(x[["outcome"]]) > 1) {
 48 |     stop("Cannot use `glance` on linear models with multiple responses.")
 49 |   }
 50 | 
 51 |   ret <- cbind(
 52 |     data.frame(
 53 |       r.squared = x[["r.squared"]],
 54 |       adj.r.squared = x[["adj.r.squared"]]
 55 |     ),
 56 |     retrieve_fstatistic(x),
 57 |     data.frame(
 58 |       df.residual = x[["df"]][1],
 59 |       nobs = as.integer(x[["nobs"]]),
 60 |       se_type = x[["se_type"]],
 61 |       stringsAsFactors = FALSE
 62 |     )
 63 |   )
 64 | 
 65 |   rownames(ret) <- NULL
 66 | 
 67 |   ret
 68 | }
 69 | 
 70 | #' @rdname estimatr_glancers
 71 | #' @templateVar class lh_robust
 72 | #' @return For \code{glance.lh_robust}, we glance the \code{lm_robust} component only. You can access the linear hypotheses as a data.frame directy from the \code{lh} component of the \code{lh_robust} object
 73 | #'
 74 | #' @export
 75 | #' @family estimatr glancers
 76 | glance.lh_robust <- function(x, ...) {
 77 |   glance(x[["lm_robust"]])
 78 | }
 79 | 
 80 | #' @rdname estimatr_glancers
 81 | #' @templateVar class iv_robust
 82 | #' @return For \code{glance.iv_robust}, a data.frame with columns:
 83 | #'   \item{r.squared}{The \eqn{R^2} of the second stage regression}
 84 | #'   \item{adj.r.squared}{The \eqn{R^2} but penalized for having more parameters, \code{rank}}
 85 | #'   \item{df.residual}{residual degrees of freedom}
 86 | #'   \item{N}{the number of observations used}
 87 | #'   \item{se_type}{the standard error type specified by the user}
 88 | #'   \item{statistic}{the value of the F-statistic}
 89 | #'   \item{p.value}{p-value from the F test}
 90 | #'   \item{statistic.weakinst}{the value of the first stage F-statistic, useful for the weak instruments test; only reported if there is only one endogenous variable}
 91 | #'   \item{p.value.weakinst}{p-value from the first-stage F test, a test of weak instruments; only reported if there is only one endogenous variable}
 92 | #'   \item{statistic.endogeneity}{the value of the F-statistic for the test of endogeneity; often called the Wu-Hausman statistic, with robust standard errors, we employ the regression based test}
 93 | #'   \item{p.value.endogeneity}{p-value from the F-test for endogeneity}
 94 | #'   \item{statistic.overid}{the value of the chi-squared statistic for the test of instrument correlation with the error term; only reported with overidentification}
 95 | #'   \item{p.value.overid}{p-value from the chi-squared test; only reported with overidentification}
 96 | #'
 97 | #' @export
 98 | #' @family estimatr glancers
 99 | glance.iv_robust <- function(x, ...) {
100 | 
101 |   if (length(x[["outcome"]]) > 1) {
102 |     stop("Cannot use `glance` on linear models with multiple responses.")
103 |   }
104 | 
105 |   ret <- cbind(
106 |     data.frame(
107 |       r.squared = x[["r.squared"]],
108 |       adj.r.squared = x[["adj.r.squared"]],
109 |       df.residual = x[["df.residual"]],
110 |       nobs = as.integer(x[["nobs"]]),
111 |       se_type = x[["se_type"]],
112 |       stringsAsFactors = FALSE
113 |     ),
114 |     retrieve_fstatistic(x),
115 |     if (exists("diagnostic_first_stage_fstatistic", x) && length(x[["diagnostic_first_stage_fstatistic"]] == 4)) {
116 |       data.frame(
117 |         statistic.weakinst = x[["diagnostic_first_stage_fstatistic"]]["value"],
118 |         p.value.weakinst = x[["diagnostic_first_stage_fstatistic"]]["p.value"]
119 |       )
120 |     } else {
121 |       data.frame(statistic.weakinst = NA_real_, p.value.weakinst = NA_real_)
122 |     },
123 |     if (exists("diagnostic_endogeneity_test", x)) {
124 |       data.frame(
125 |         statistic.endogeneity = x[["diagnostic_endogeneity_test"]]["value"],
126 |         p.value.endogeneity = x[["diagnostic_endogeneity_test"]]["p.value"]
127 |       )
128 |     } else {
129 |       data.frame(statistic.endogeneity = NA_real_, p.value.endogeneity = NA_real_)
130 |     },
131 |     if (exists("diagnostic_overid_test", x)) {
132 |       data.frame(
133 |         statistic.overid = x[["diagnostic_overid_test"]]["value"],
134 |         p.value.overid = x[["diagnostic_overid_test"]]["p.value"]
135 |       )
136 |     } else {
137 |       data.frame(statistic.overid = NA_real_, p.value.overid = NA_real_)
138 |     }
139 |   )
140 | 
141 |   ret
142 | }
143 | 
144 | #' @rdname estimatr_glancers
145 | #' @templateVar class difference_in_means
146 | #' @return For \code{glance.difference_in_means}, a data.frame with columns:
147 | #'   \item{design}{the design used, and therefore the estimator used}
148 | #'   \item{df}{the degrees of freedom}
149 | #'   \item{nobs}{the number of observations used}
150 | #'   \item{nblocks}{the number of blocks, if used}
151 | #'   \item{nclusters}{the number of clusters, if used}
152 | #'   \item{condition2}{the second, "treatment", condition}
153 | #'   \item{condition1}{the first, "control", condition}
154 | #'
155 | #' @export
156 | #' @family estimatr glancers
157 | glance.difference_in_means <- function(x, ...) {
158 |   data.frame(
159 |     design = x[["design"]],
160 |     df = x[["df"]],
161 |     nobs = as.integer(x[["nobs"]]),
162 |     nblocks = retrieve_value(x, "nblocks"),
163 |     nclusters = retrieve_value(x, "nclusters"),
164 |     condition2 = x[["condition2"]],
165 |     condition1 = x[["condition1"]],
166 |     stringsAsFactors = FALSE
167 |   )
168 | }
169 | 
170 | #' @rdname estimatr_glancers
171 | #' @templateVar class horvitz_thompson
172 | #' @return For \code{glance.horvitz_thompson}, a data.frame with columns:
173 | #'   \item{nobs}{the number of observations used}
174 | #'   \item{se_type}{the type of standard error estimator used}
175 | #'   \item{condition2}{the second, "treatment", condition}
176 | #'   \item{condition1}{the first, "control", condition}
177 | #'
178 | #' @export
179 | #' @family estimatr glancers
180 | glance.horvitz_thompson <- function(x, ...) {
181 |   data.frame(
182 |     nobs = as.integer(x[["nobs"]]),
183 |     se_type = x[["se_type"]],
184 |     condition2 = x[["condition2"]],
185 |     condition1 = x[["condition1"]],
186 |     stringsAsFactors = FALSE
187 |   )
188 | }
189 | 


--------------------------------------------------------------------------------
/R/S3_nobs.R:
--------------------------------------------------------------------------------
 1 | #' @export
 2 | nobs.lm_robust <- function(object, ...) object$nobs
 3 | 
 4 | #' @export
 5 | nobs.lh_robust <- function(object, ...) object$nobs
 6 | 
 7 | #' @export
 8 | nobs.iv_robust <- function(object, ...) object$nobs
 9 | 
10 | #' @export
11 | nobs.summary.lm_robust <- nobs.lm_robust
12 | 
13 | #' @export
14 | nobs.horvitz_thompson <- function(object, ...) object$nobs
15 | 
16 | #' @export
17 | nobs.difference_in_means <- function(object, ...) object$nobs
18 | 
19 | 


--------------------------------------------------------------------------------
/R/S3_print.R:
--------------------------------------------------------------------------------
  1 | #' @export
  2 | print.lm_robust <- function(x, ...) {
  3 |   print(summarize_tidy(x))
  4 | }
  5 | 
  6 | #' @export
  7 | print.iv_robust <- function(x, ...) {
  8 |   print(summarize_tidy(x))
  9 | }
 10 | 
 11 | print_summary_lm_like <- function(x,
 12 |                                   digits,
 13 |                                   signif.stars = getOption("show.signif.stars"),
 14 |                                   ...) {
 15 |   cat(
 16 |     "\nCall:\n",
 17 |     paste(deparse(x$call, nlines = 5), sep = "\n", collapse = "\n"),
 18 |     "\n\n",
 19 |     sep = ""
 20 |   )
 21 |   if (x$weighted) {
 22 |     cat("Weighted, ")
 23 |   }
 24 |   cat("Standard error type: ", x$se_type, "\n")
 25 | 
 26 |   if (x$rank < x$k) {
 27 |     singularities <- x$k - x$rank
 28 |     cat(
 29 |       "\nCoefficients: (",
 30 |       singularities,
 31 |       " not defined because the design matrix is rank deficient)\n",
 32 |       sep = ""
 33 |     )
 34 |   } else {
 35 |     cat("\nCoefficients:\n")
 36 |   }
 37 | 
 38 |   print(coef(x), digits = digits)
 39 | 
 40 |   fstat <- if (is.numeric(x[["fstatistic"]])) {
 41 |     paste(
 42 |       "\nF-statistic:", formatC(x$fstatistic[1L], digits = digits),
 43 |       "on", x$fstatistic[2L], "and", x$fstatistic[3L],
 44 |       "DF,  p-value:",
 45 |       format.pval(pf(
 46 |         x$fstatistic[1L],
 47 |         x$fstatistic[2L],
 48 |         x$fstatistic[3L],
 49 |         lower.tail = FALSE
 50 |       ), digits = digits)
 51 |     )
 52 |   } else NULL
 53 | 
 54 |   cat(
 55 |     "\nMultiple R-squared: ", formatC(x$r.squared, digits = digits),
 56 |     ",\tAdjusted R-squared: ", formatC(x$adj.r.squared, digits = digits),
 57 |     fstat
 58 |   )
 59 | 
 60 |   if (is.numeric(x[["proj_fstatistic"]])) {
 61 |     cat(
 62 |       "\nMultiple R-squared (proj. model): ",
 63 |       formatC(x$proj_r.squared, digits = digits),
 64 |       ",\tAdjusted R-squared (proj. model): ",
 65 |       formatC(x$proj_adj.r.squared, digits = digits),
 66 |       "\nF-statistic (proj. model):",
 67 |       formatC(x$proj_fstatistic[1L], digits = digits),
 68 |       "on", x$proj_fstatistic[2L], "and", x$proj_fstatistic[3L],
 69 |       "DF,  p-value:",
 70 |       format.pval(pf(
 71 |         x$proj_fstatistic[1L],
 72 |         x$proj_fstatistic[2L],
 73 |         x$proj_fstatistic[3L],
 74 |         lower.tail = FALSE
 75 |       ), digits = digits)
 76 |     )
 77 |   }
 78 |   cat("\n")
 79 | 
 80 |   if (is.numeric(x[["diagnostic_endogeneity_test"]])) {
 81 |     cat("\nDiagnostics:\n")
 82 |     printCoefmat(
 83 |       build_ivreg_diagnostics_mat(x),
 84 |       cs.ind = 1L:2L,
 85 |       tst.ind = 3L,
 86 |       has.Pvalue = TRUE,
 87 |       P.values = TRUE,
 88 |       digits = digits,
 89 |       signif.stars = signif.stars,
 90 |       na.print = "NA",
 91 |       ...
 92 |     )
 93 |   }
 94 |   invisible(x)
 95 | }
 96 | 
 97 | #' @export
 98 | print.summary.lm_robust <- function(x,
 99 |                                     digits = max(3L, getOption("digits") - 3L),
100 |                                     signif.stars = getOption("show.signif.stars"),
101 |                                     ...) {
102 |   print_summary_lm_like(x, digits, ...)
103 | }
104 | 
105 | #' @export
106 | print.summary.iv_robust <- function(x,
107 |                                     digits = max(3L, getOption("digits") - 3L),
108 |                                     signif.stars = getOption("show.signif.stars"),
109 |                                     ...) {
110 |   print_summary_lm_like(x, digits, signif.stars, ...)
111 | }
112 | 
113 | #' @export
114 | print.difference_in_means <- function(x, ...) {
115 |   cat("Design: ", x$design, "\n")
116 |   print(summarize_tidy(x))
117 | }
118 | 
119 | 
120 | #' @export
121 | print.horvitz_thompson <- function(x, ...) {
122 |   print(summarize_tidy(x))
123 | }
124 | 
125 | #' @export
126 | print.lh <- function(x, ...) {
127 |   print(summary(x))
128 | }
129 | 
130 | #' @export
131 | print.lh_robust <- function(x, ...) {
132 |   lnames <- names(x)
133 |   for (i in seq_along(x)) {
134 |     cat("$", lnames[i], "\n", sep = "")
135 |     print(x[[i]])
136 |     cat("\n")
137 |   }
138 |   invisible(x)
139 | }
140 | 
141 | #' @export
142 | print.summary.lh_robust <- function(x,
143 |                                     digits = max(3L, getOption("digits") - 3L),
144 |                                     ...){
145 |   lnames <- names(x)
146 |   for (i in seq_along(x)) {
147 |     cat("$", lnames[i], "\n", sep = "")
148 |     print(summary(x[[i]]), digits = digits)
149 |     cat("\n")
150 |   }
151 | }
152 | 
153 | #' @export
154 | print.summary.lh <- function(x,
155 |                              digits = max(3L, getOption("digits") - 3L),
156 |                              ...){
157 |   class(x) <- NULL
158 |   print(x, digits = digits)
159 | }
160 | 


--------------------------------------------------------------------------------
/R/S3_summary.R:
--------------------------------------------------------------------------------
  1 | #' @export
  2 | summary.lm_robust <- function(object, ...) {
  3 |   if (is.matrix(coef(object))) {
  4 |     ny <- ncol(coef(object))
  5 | 
  6 |     ret <- setNames(
  7 |       vector("list", ny),
  8 |       paste("Response", object$outcome)
  9 |     )
 10 | 
 11 |     mat_objs <- c(
 12 |       "coefficients",
 13 |       "std.error",
 14 |       "statistic",
 15 |       "df",
 16 |       "conf.low",
 17 |       "conf.high",
 18 |       "p.value"
 19 |     )
 20 | 
 21 |     vec_objs <- c(
 22 |       "outcome",
 23 |       "r.squared",
 24 |       "adj.r.squared",
 25 |       "res_var"
 26 |     )
 27 | 
 28 |     all_models <- object
 29 | 
 30 |     for (i in seq(ny)) {
 31 |       for (nm in names(object)) {
 32 |         if (nm %in% mat_objs) {
 33 |           object[[nm]] <- all_models[[nm]][, i, drop = TRUE]
 34 |         } else if (nm %in% vec_objs) {
 35 |           object[[nm]] <- all_models[[nm]][i]
 36 |         } else if (nm == "fstatistic") {
 37 |           object[[nm]] <- all_models[[nm]][c(i, ny + 1:2)]
 38 |         }
 39 |       }
 40 |       object$call$formula[[2L]] <- object$terms[[2L]] <-
 41 |         as.name(all_models$outcome[i])
 42 |       ret[[i]] <- summary(object, ...)
 43 |     }
 44 | 
 45 |     class(ret) <- "listof"
 46 |   } else {
 47 |     ret <- summary_lm_model(object)
 48 |   }
 49 | 
 50 |   ret
 51 | }
 52 | 
 53 | #' @export
 54 | summary.iv_robust <- function(object, ...) {
 55 |   summary_lm_model(object)
 56 | }
 57 | 
 58 | #' @export
 59 | summary.lh_robust <- function(object,...){
 60 |   class(object) <- "summary.lh_robust"
 61 |   object
 62 | }
 63 | 
 64 | #' @export
 65 | summary.lh <- function(object,...){
 66 |   summary_lh_object <- summarize_tidy(simplify_lh_outcome(object))
 67 |   class(summary_lh_object) <- "summary.lh"
 68 |   summary_lh_object
 69 | }
 70 | 
 71 | 
 72 | summary_lm_model <- function(object) {
 73 | 
 74 |   out_values <- c(
 75 |     "call",
 76 |     "k",
 77 |     "rank",
 78 |     "df.residual",
 79 |     "res_var",
 80 |     "weighted",
 81 |     "se_type",
 82 |     "fes",
 83 |     "r.squared",
 84 |     "adj.r.squared",
 85 |     "fstatistic"
 86 |   )
 87 |   # Different returns if fixed effects in the output
 88 |   if (object[["fes"]]) {
 89 |     out_values <- c(
 90 |       out_values,
 91 |       "proj_r.squared",
 92 |       "proj_adj.r.squared",
 93 |       "proj_fstatistic"
 94 |     )
 95 |   }
 96 | 
 97 |   # Different returns if fixed effects in the output
 98 |   if (is.numeric(object[["diagnostic_endogeneity_test"]])) {
 99 |     out_values <- c(
100 |       out_values,
101 |       "diagnostic_first_stage_fstatistic",
102 |       "diagnostic_endogeneity_test",
103 |       "diagnostic_overid_test"
104 |     )
105 |   }
106 | 
107 |   return_list <- object[out_values]
108 | 
109 |   # Split into two lists if multivariate linear model
110 | 
111 |   return_list[["coefficients"]] <- summarize_tidy(object)
112 |   return_list[["nobs"]] <- nobs(object)
113 | 
114 |   class(return_list) <- "summary.lm_robust"
115 |   return(return_list)
116 | }
117 | 
118 | 
119 | #' @export
120 | summary.difference_in_means <- function(object, ...) {
121 |   return(list(
122 |     coefficients = summarize_tidy(object),
123 |     design = object$design
124 |   ))
125 | }
126 | 
127 | 
128 | #' @export
129 | summary.horvitz_thompson <- function(object, ...) {
130 |   return(list(coefficients = summarize_tidy(object, "z")))
131 | }
132 | 
133 | summarize_tidy <- function(object, test = "t", ...) {
134 |   remove_cols <- c("term", "outcome")
135 | 
136 |   # Ugly so that summary(fit)$coefficients matches lm()
137 |   tidy_out <- tidy(object, ...)
138 |   colnames(tidy_out)[2:8] <-
139 |     c(
140 |       "Estimate",
141 |       "Std. Error",
142 |       paste0(test, " value"),
143 |       paste0("Pr(>|", test, "|)"),
144 |       "CI Lower",
145 |       "CI Upper",
146 |       "DF"
147 |     )
148 |   tidy_mat <- as.matrix(tidy_out[, !(names(tidy_out) %in% remove_cols)])
149 | 
150 |   ny <- length(object$outcome)
151 |   p <- length(object$term)
152 |   if (ny > 1) {
153 |     rownames(tidy_mat) <- paste0(
154 |       rep(object$outcome, each = p),
155 |       ":",
156 |       rep(object$term, times = ny)
157 |     )
158 |   } else {
159 |     rownames(tidy_mat) <- object$term
160 |   }
161 | 
162 |   return(tidy_mat)
163 | }
164 | 


--------------------------------------------------------------------------------
/R/S3_tidy.R:
--------------------------------------------------------------------------------
  1 | #' @importFrom generics tidy
  2 | #' @export
  3 | generics::tidy
  4 | 
  5 | tidy_data_frame <- function(x,
  6 |                             conf.int = TRUE,
  7 |                             conf.level = NULL,
  8 |                             ...) {
  9 |   vec_cols <- c(
 10 |     "coefficients",
 11 |     "std.error",
 12 |     "statistic",
 13 |     "p.value",
 14 |     "conf.low",
 15 |     "conf.high",
 16 |     "df"
 17 |   )
 18 | 
 19 |   if(!conf.int){
 20 |     vec_cols <- c(
 21 |       "coefficients",
 22 |       "std.error",
 23 |       "statistic",
 24 |       "p.value",
 25 |       "df"
 26 |     )
 27 |   }
 28 | 
 29 |   tidy_mat <- do.call("cbind", lapply(x[vec_cols], as.vector))
 30 |   vec_cols[vec_cols == "coefficients"] <- "estimate"
 31 |   colnames(tidy_mat) <- vec_cols
 32 | 
 33 |   return_frame <- data.frame(
 34 |     term = x[["term"]],
 35 |     tidy_mat,
 36 |     outcome = rep(x[["outcome"]], each = length(x[["term"]])),
 37 |     stringsAsFactors = FALSE
 38 |   )
 39 | 
 40 |   rownames(return_frame) <- NULL
 41 | 
 42 |   if(!is.null(conf.level) && conf.int){
 43 |     ci <- stats::confint(x, level = conf.level, ...)
 44 |     if (all(row.names(ci) == return_frame$term))  {
 45 |       return_frame$conf.low <- ci[, 1]
 46 |       return_frame$conf.high <- ci[, 2]
 47 |     }
 48 |   }
 49 |   return(return_frame)
 50 | }
 51 | 
 52 | warn_singularities <- function(x) {
 53 |   if (x$rank < x$k) {
 54 |     singularities <- x$k - x$rank
 55 |     what <- ifelse(singularities > 1, " coefficients ", " coefficient ")
 56 |     message(
 57 |       singularities, what,
 58 |       " not defined because the design matrix is rank deficient\n"
 59 |     )
 60 |   }
 61 | }
 62 | 
 63 | #' Tidy an estimatr object
 64 | #' @name estimatr_tidiers
 65 | #' @templateVar class lm_robust
 66 | #' @return A data.frame with columns for coefficient names, estimates, standard
 67 | #' errors, confidence intervals, p-values, degrees of freedom, and the
 68 | #' name of the outcome variable
 69 | #'
 70 | #' @param x An object returned by one of the estimators
 71 | #' @param conf.int  Logical indicating whether or not to include a
 72 | #'   confidence interval in the tidied output. Defaults to ‘TRUE’.
 73 | #' @param conf.level  The confidence level to use for the confidence
 74 | #'   interval if ‘conf.int = TRUE’. Must be strictly greater than 0 and less
 75 | #'   than 1. Defaults to 0.95, which corresponds to a 95 percent confidence
 76 | #'   interval.
 77 | #' @param ... extra arguments (not used)
 78 | #'
 79 | #' @export
 80 | #' @family estimatr tidiers
 81 | #' @seealso [generics::tidy()], [estimatr::lm_robust()], [estimatr::iv_robust()],  [estimatr::difference_in_means()], [estimatr::horvitz_thompson()]
 82 | #' @md
 83 | tidy.lm_robust <- function(x,
 84 |                            conf.int = TRUE,
 85 |                            conf.level = NULL,
 86 |                            ...) {
 87 |   warn_singularities(x)
 88 |   tidy_data_frame(x, conf.int = conf.int, conf.level = conf.level, ...)
 89 | }
 90 | 
 91 | #' @rdname estimatr_tidiers
 92 | #' @templateVar class iv_robust
 93 | #'
 94 | #' @export
 95 | #' @family estimatr tidiers
 96 | tidy.iv_robust <- function(x, conf.int = TRUE, conf.level = NULL, ...) {
 97 |   warn_singularities(x)
 98 |   tidy_data_frame(x, conf.int = conf.int, conf.level = conf.level, ...)
 99 | }
100 | 
101 | #' @rdname estimatr_tidiers
102 | #' @templateVar class difference_in_means
103 | #'
104 | #' @export
105 | #' @family estimatr tidiers
106 | tidy.difference_in_means <- tidy_data_frame
107 | 
108 | #' @rdname estimatr_tidiers
109 | #' @templateVar class horvitz_thompson
110 | #'
111 | #' @export
112 | #' @family estimatr tidiers
113 | tidy.horvitz_thompson <- tidy_data_frame
114 | 
115 | #' @rdname estimatr_tidiers
116 | #' @templateVar class lh_robust
117 | #'
118 | #' @export
119 | #' @family estimatr tidiers
120 | tidy.lh_robust <- function(x,
121 |                            conf.int = TRUE,
122 |                            conf.level = NULL,
123 |                            ...) {
124 |   rbind(tidy(x$lm_robust, conf.int = conf.int, conf.level = conf.level, ...),
125 |         tidy(x$lh, conf.int = conf.int, conf.level = conf.level, ...))
126 | }
127 | 
128 | #' @rdname estimatr_tidiers
129 | #' @templateVar class lh
130 | #'
131 | #' @export
132 | #' @family estimatr tidiers
133 | tidy.lh <- function(x,
134 |                     conf.int = TRUE,
135 |                     conf.level = NULL,
136 |                     ...) {
137 |   tidy_data_frame(simplify_lh_outcome(x), conf.int = conf.int, conf.level = conf.level, ...)
138 | }
139 | 
140 | # Simplifies the `lh` outcome column for tidy.lh and print.lh
141 | simplify_lh_outcome <- function(x) {
142 |   x_list <- as.list(x)
143 |   x_list[["outcome"]] <- unique(x_list[["outcome"]])
144 |   class(x_list) <- "lh"
145 |   x_list
146 | }
147 | 
148 | 


--------------------------------------------------------------------------------
/R/S3_update.R:
--------------------------------------------------------------------------------
 1 | #' @importFrom Formula Formula
 2 | #' @importFrom stats getCall
 3 | #' @export
 4 | update.iv_robust <- function(object, formula., ..., evaluate = TRUE) {
 5 |   if (is.null(call <- getCall(object)))
 6 |     stop("need an object with call component")
 7 |   extras <- match.call(expand.dots = FALSE)$...
 8 |   if (!missing(formula.))
 9 |     call$formula <- formula(update(Formula(formula(object)), formula.))
10 |   if (length(extras)) {
11 |     existing <- !is.na(match(names(extras), names(call)))
12 |     for (a in names(extras)[existing]) call[[a]] <- extras[[a]]
13 |     if (any(!existing)) {
14 |       call <- c(as.list(call), extras[!existing])
15 |       call <- as.call(call)
16 |     }
17 |   }
18 |   if (evaluate)
19 |     eval(call, parent.frame())
20 |   else call
21 | }
22 | 


--------------------------------------------------------------------------------
/R/S3_vcov.R:
--------------------------------------------------------------------------------
 1 | #' @export
 2 | vcov.lm_robust <- function(object, complete = TRUE, ...) {
 3 |   vcov_simple(object, complete = complete)
 4 | }
 5 | 
 6 | #' @export
 7 | vcov.iv_robust <- vcov.lm_robust
 8 | 
 9 | #' @export
10 | vcov.difference_in_means <- function(object, ...) {
11 |   return(object$vcov)
12 | }
13 | 
14 | #' @export
15 | vcov.horvitz_thompson <- vcov.difference_in_means
16 | 
17 | 
18 | # Helper function for extracting vcov when it is just an element in the object list
19 | vcov_simple <- function(object, complete) {
20 |   if (is.null(object$vcov)) {
21 |     stop(
22 |       "Object must have vcov matrix. Try setting `return_vcov = TRUE` in ",
23 |       "the estimator function."
24 |     )
25 |   }
26 |   if (complete && (object$rank < object$k)) {
27 |     vc <- matrix(NA_real_, object$k, object$k,
28 |                  dimnames = list(object$term, object$term))
29 |     j <- which(!is.na(coef(object, complete = TRUE)))
30 |     vc[j, j] <- object$vcov
31 |     return(vc)
32 |   } else {
33 |     return(object$vcov)
34 |   }
35 | }
36 | 


--------------------------------------------------------------------------------
/R/data.R:
--------------------------------------------------------------------------------
 1 | #' Replication data for Lin 2013
 2 | #'
 3 | #' A dataset containing the data to replicate:
 4 | #' Lin, Winston. 2013. "Agnostic notes on regression adjustments to experimental
 5 | #' data: Reexamining Freedman's critique." The Annals of Applied Statistics.
 6 | #' Stat. 7(1): 295-318. doi:10.1214/12-AOAS583.
 7 | #' https://projecteuclid.org/euclid.aoas/1365527200.
 8 | #'
 9 | #' This data was originally taken from the following paper, subset to men who
10 | #' showed up to college, were in one of the arms with the support condition,
11 | #' and had GPA data for their first year in college.
12 | #'
13 | #' Angrist, Joshua, Daniel Lang, and Philip Oreopoulos. 2009. "Incentives and
14 | #' Services for College Achievement: Evidence from a Randomized Trial." American
15 | #' Economic Journal: Applied Economics 1(1): 136-63.
16 | #' https://www.aeaweb.org/articles?id=10.1257/app.1.1.136
17 | #'
18 | #' @format A data frame with educational treatments and outcomes:
19 | #' \describe{
20 | #'   \item{gpa0}{high school GPA}
21 | #'   \item{sfsp}{financial incentives and support treatment}
22 | #'   \item{ssp}{support only treatment}
23 | #'   \item{GPA_year1}{college GPA year 1}
24 | #'   \item{GPA_year2}{college GPA year 2}
25 | #' }
26 | #' @source \url{https://www.aeaweb.org/articles?id=10.1257/app.1.1.136}
27 | "alo_star_men"
28 | 


--------------------------------------------------------------------------------
/R/estimatr.R:
--------------------------------------------------------------------------------
 1 | #' estimatr
 2 | #'
 3 | #' @description Fast procedures for small set of commonly-used, design-appropriate estimators with robust standard errors and confidence intervals. Includes estimators for linear regression, instrumental variables regression, difference-in-means, Horvitz-Thompson estimation, and regression improving precision of experimental estimates by interacting treatment with centered pre-treatment covariates introduced by Lin (2013) <doi:10.1214/12-AOAS583>.
 4 | #'
 5 | #' @docType package
 6 | #' @useDynLib estimatr, .registration = TRUE
 7 | #' @importFrom Rcpp evalCpp
 8 | #' @importFrom stats sd var model.matrix.default pt qt var weighted.mean lm
 9 | #' vcov model.frame.default model.response complete.cases terms reformulate
10 | #' update model.extract setNames delete.response .checkMFClasses model.frame
11 | #' model.matrix na.pass nobs coef pf .getXlevels df.residual fitted.values
12 | #' formula model.matrix.lm resid weights lm.fit na.omit pchisq printCoefmat
13 | #' residuals
14 | #' @importFrom methods setGeneric setMethod isGeneric className
15 | #' @importFrom Formula as.Formula
16 | #' @importFrom rlang enquos enquo eval_tidy quo_get_expr quo_set_expr quo_is_missing sym quo
17 | #' @name estimatr
18 | "_PACKAGE"
19 | 


--------------------------------------------------------------------------------
/R/estimatr_lh_robust.R:
--------------------------------------------------------------------------------
  1 | #' Linear Hypothesis for Ordinary Least Squares with Robust Standard Errors
  2 | #'
  3 | #' @description This function fits a linear model with robust standard errors and performs linear hypothesis test.
  4 | #' @param ... Other arguments to be passed to  \code{\link{lm_robust}}
  5 | #' @param data A \code{data.frame}
  6 | #' @param linear_hypothesis A character string or a matrix specifying combination, to be passed to the hypothesis.matrix argument of car::linearHypothesis
  7 | #' See \code{\link[car]{linearHypothesis}} for more details.
  8 | #' @details
  9 | #'
 10 | #' This function is a wrapper for \code{\link{lm_robust}} and for
 11 | #' \code{\link[car]{linearHypothesis}}. It first runs \code{lm_robust} and
 12 | #' next passes \code{"lm_robust"} object as an argument to \code{linearHypothesis}.
 13 | #'
 14 | #' @return An object of class \code{"lh_robust"} containing the two following components:
 15 | #'
 16 | #' \item{lm_robust}{an object as returned by \code{lm_robust}.}
 17 | #' \item{lh}{A data frame with most of its columns pulled from \code{linearHypothesis}' output.}
 18 | #'
 19 | #' The only analyis directly performed by \code{lh_robust} is a \code{t-test} for the null hypothesis of no effects of the linear combination of coefficients as specified by the user.
 20 | #' All other output components are either extracted from \code{linearHypothesis} or \code{lm_robust}.
 21 | #'
 22 | #' The original output returned by \code{linearHypothesis} is added as an attribute under the \code{"linear_hypothesis"} attribute.
 23 | #'
 24 | #' @examples
 25 | #'
 26 | #' library(fabricatr)
 27 | #' dat <- fabricate(
 28 | #'   N = 40,
 29 | #'   y = rpois(N, lambda = 4),
 30 | #'   x = rnorm(N),
 31 | #'   z = rbinom(N, 1, prob = 0.4),
 32 | #'   clusterID = sample(1:4, 40, replace = TRUE)
 33 | #' )
 34 | #'
 35 | #' # Default variance estimator is HC2 robust standard errors
 36 | #' lhro <- lh_robust(y ~ x + z, data = dat, linear_hypothesis = "z + 2x = 0")
 37 | #'
 38 | #' # The linear hypothesis argument can be specified equivalently as:
 39 | #' lh_robust(y ~ x + z, data = dat, linear_hypothesis = "z = 2x")
 40 | #' lh_robust(y ~ x + z, data = dat, linear_hypothesis = "2*x +1*z")
 41 | #' lh_robust(y ~ x + z, data = dat, linear_hypothesis = "z + 2x = 0")
 42 | #'
 43 | #' # Also recovers other sorts of standard erorrs just as specified in \code{\link{lm_robust}}
 44 | #' lh_robust(y ~ x + z, data = dat, linear_hypothesis = "z + 2x = 0", se_type = "classical")
 45 | #' lh_robust(y ~ x + z, data = dat, linear_hypothesis = "z + 2x = 0", se_type =  "HC1")
 46 | #'
 47 | #' #  Can tidy() main output and subcomponents in to a data.frame
 48 | #' lhro <- lh_robust(y ~ x + z, data = dat, linear_hypothesis = "z + 2x = 0")
 49 | #' tidy(lhro )
 50 | #' tidy(lhro$lm_robust)
 51 | #' tidy(lhro$lh)
 52 | #'
 53 | #' # Can use summary() to get more statistics on the main output and subcomponents.
 54 | #' summary(lhro)
 55 | #' summary(lhro$lm_robust)
 56 | #' summary(lhro$lh)
 57 | #'
 58 | #' @importFrom rlang quos eval_tidy
 59 | #'
 60 | #' @export
 61 | #'
 62 | lh_robust <- function(..., data, linear_hypothesis) {
 63 | 
 64 |   requireNamespace("car")
 65 | 
 66 |   # fit lm_robust model
 67 |   lm_robust_fit <- lm_robust(..., data = data)
 68 | 
 69 |   alpha <- eval_tidy(quos(...)$alpha)
 70 |   if (is.null(alpha)) {
 71 |     alpha <- 0.05
 72 |   }
 73 | 
 74 |   # calculate linear hypothesis
 75 |   car_lht <- car::linearHypothesis(
 76 |     lm_robust_fit, hypothesis.matrix = linear_hypothesis, level = 1 - alpha)
 77 | 
 78 |   estimate  <- drop(attr(car_lht, "value"))
 79 |   std.error <- sqrt(diag(attr(car_lht, "vcov")))
 80 | 
 81 |   # this df is not in general correct, but unclear what to replace it with
 82 |   df <- lm_robust_fit$df.residual
 83 | 
 84 |   statistic <- estimate / std.error
 85 |   p.value <-  2 * pt(abs(statistic), df, lower.tail = FALSE)
 86 |   ci <- estimate + std.error %o% qt(c(alpha / 2, 1 - alpha / 2), df)
 87 | 
 88 |   return_lh_robust <- data.frame(
 89 |     coefficients = estimate,
 90 |     std.error = std.error,
 91 |     statistic = statistic,
 92 |     p.value = p.value,
 93 |     alpha = alpha,
 94 |     conf.low  = ci[, 1],
 95 |     conf.high = ci[, 2],
 96 |     df = df,
 97 |     term = linear_hypothesis,
 98 |     outcome =  lm_robust_fit$outcome
 99 |   )
100 | 
101 |   attr(return_lh_robust, "linear_hypothesis") <- car_lht
102 |   class(return_lh_robust) <- c("lh", "data.frame")
103 | 
104 |   return_lm_robust <- lm_robust_fit
105 |   return_lm_robust[["call"]] <- match.call()
106 | 
107 |   return(structure(
108 |     list(lm_robust = return_lm_robust, lh = return_lh_robust),
109 |     class = "lh_robust"
110 |   ))
111 | 
112 | }
113 | 


--------------------------------------------------------------------------------
/R/helper_cis_pvals.R:
--------------------------------------------------------------------------------
 1 | # Internal method takes the results and adds p-values and confidence intervals
 2 | add_cis_pvals <- function(return_frame, alpha, ci, ttest = TRUE) {
 3 |   if (ci) {
 4 |     if (alpha <= 0 || alpha >= 1) {
 5 |       stop("`alpha` must be numeric between 0 and 1")
 6 |     }
 7 | 
 8 |     return_frame$statistic <- with(return_frame, coefficients / std.error)
 9 | 
10 |     if (ttest) {
11 |       if (any(return_frame$df <= 0, na.rm = TRUE)) {
12 |         warning(
13 |           "Some degrees of freedom have been estimated as negative or zero\n",
14 |           "p-values and confidence intervals may not be calculated"
15 |         )
16 | 
17 |         return_frame$df <- ifelse(return_frame$df <= 0, NA, return_frame$df)
18 |       }
19 | 
20 |       return_frame$p.value <- with(
21 |         return_frame,
22 |         2 * pt(abs(statistic), df = df, lower.tail = FALSE)
23 |       )
24 | 
25 |       crit_se <- with(return_frame, qt(1 - alpha / 2, df = df) * std.error)
26 |     } else {
27 |       return_frame$p.value <- with(
28 |         return_frame,
29 |         2 * pnorm(abs(statistic), lower.tail = FALSE)
30 |       )
31 | 
32 |       crit_se <- with(return_frame, qnorm(1 - alpha / 2) * std.error)
33 | 
34 |       return_frame$df <- NA
35 |     }
36 | 
37 |     return_frame$conf.low <- with(return_frame, coefficients - crit_se)
38 |     return_frame$conf.high <- with(return_frame, coefficients + crit_se)
39 | 
40 |     return(as.list(return_frame))
41 |   } else {
42 |     return_frame$p.value <- NA
43 |     return_frame$statistic <- NA
44 |     return_frame$conf.low <- NA
45 |     return_frame$conf.high <- NA
46 | 
47 |     return(as.list(return_frame))
48 |   }
49 | }
50 | 


--------------------------------------------------------------------------------
/R/helper_clean_model_data.R:
--------------------------------------------------------------------------------
  1 | # library(estimatr)
  2 | # f <- function(w) {
  3 | #   dat <- data.frame(x = rnorm(10), y = rnorm(10))
  4 | #   lm_robust(y ~ x, data = dat, w = w)
  5 | # }
  6 | # f(NULL)
  7 | # f(1:10)
  8 | 
  9 | 
 10 | # Internal method to process data
 11 | #' @importFrom rlang f_rhs %||%
 12 | clean_model_data <- function(data, datargs, estimator = "") {
 13 | 
 14 |   # if data exists, evaluate it
 15 |   data <- if (quo_is_missing(data)) NULL else eval_tidy(data)
 16 | 
 17 |   if (getOption("estimatr.debug.clean_model_data", FALSE)) browser()
 18 | 
 19 |   mfargs <- Filter(Negate(quo_is_missing), datargs)
 20 | 
 21 |   m_formula <- eval_tidy(mfargs[["formula"]])
 22 |   m_formula_env <- environment(m_formula)
 23 | 
 24 |   stopifnot("`formula` argument must be a formula"=inherits(m_formula, "formula"))
 25 | 
 26 |   # From this point on we never use the environment of anything
 27 |   # in mfargs as we always evaluate in `data` explicitly
 28 |   # Therefore we can just change it to a list that can take
 29 |   # expressions without environments attached to them
 30 |   mfargs <- as.list(mfargs)
 31 | 
 32 |   args_ignored <- c("fixed_effects", "se_type")
 33 |   # For each ... that would go to model.fram .default, early eval,
 34 |   # save to formula env, and point to it
 35 |   # subset is also non-standard eval
 36 |   to_process <- setdiff(
 37 |     names(mfargs),
 38 |     c(
 39 |       setdiff(names(formals(stats::model.frame.default)), "subset"),
 40 |       args_ignored
 41 |     )
 42 |   )
 43 | 
 44 |   for (da in to_process) {
 45 |     name <- sprintf(".__%s%%%d__", da, sample.int(.Machine$integer.max, 1))
 46 |     m_formula_env[[name]] <- eval_tidy(mfargs[[da]], data = data)
 47 |     mfargs[[da]] <- sym(name)
 48 |   }
 49 | 
 50 |   if ("fixed_effects" %in% names(mfargs)) {
 51 |     name <- sprintf(".__fixed_effects%%%d__", sample.int(.Machine$integer.max, 1))
 52 |     m_formula_env[[name]] <- sapply(
 53 |       stats::model.frame.default(
 54 |         mfargs[["fixed_effects"]],
 55 |         data = data,
 56 |         na.action = NULL
 57 |       ),
 58 |       FUN = as.factor
 59 |     )
 60 |     mfargs[["fixed_effects"]] <- sym(name)
 61 |   }
 62 | 
 63 |   condition_pr <- NULL
 64 |   if ("condition_pr" %in% names(mfargs) &&
 65 |       length(eval(mfargs[["condition_pr"]], m_formula_env)) == 1) {
 66 |     condition_pr <- eval(mfargs[["condition_pr"]], m_formula_env)
 67 |     mfargs[["condition_pr"]] <- NULL
 68 |   }
 69 | 
 70 |   mfargs[["formula"]] <- Formula::as.Formula(m_formula)
 71 | 
 72 |   # Get model frame
 73 |   mf <- eval_tidy(quo((stats::model.frame)(
 74 |     !!!mfargs,
 75 |     data = data,
 76 |     na.action = na.omit_detailed.data.frame,
 77 |     drop.unused.levels = TRUE
 78 |   )))
 79 | 
 80 |   local({
 81 |     na.action <- attr(mf, "na.action")
 82 |     why_omit <- attr(na.action, "why_omit")
 83 | 
 84 |     # Warn if missingness in ancillary variables
 85 |     missing_warning <- c(
 86 |       "Some observations have missingness in the %s variable(s) but not in ",
 87 |       "the outcome or covariates. These observations have been dropped."
 88 |     )
 89 | 
 90 |     to_check_if_missing <- c(
 91 |       "cluster", "condition_pr", "block", "weights", "fixed_effects"
 92 |     )
 93 | 
 94 |     for (x in to_check_if_missing) {
 95 |       if (!is.null(why_omit[[sprintf("(%s)", x)]])) {
 96 |         warning(sprintf(missing_warning, x))
 97 |       }
 98 |     }
 99 |   })
100 | 
101 |   if (!is.null(attr(terms(mf), "Formula_without_dot"))) {
102 |     formula <- attr(terms(mf), "Formula_without_dot")
103 |   } else {
104 |     formula <- eval_tidy(mfargs[["formula"]]) # unwrap quosure => a formula
105 |   }
106 | 
107 |   ret <- list(
108 |     outcome = model.response(mf, type = "numeric"),
109 |     design_matrix = model.matrix(terms(formula, rhs = 1), data = mf),
110 |     formula = formula
111 |   )
112 | 
113 |   if (estimator == "iv") {
114 |     if (length(formula)[2] != 2) {
115 |       stop(
116 |         "Must specify a `formula` with both regressors and instruments. For ",
117 |         "example, `formula = y ~ x1 + x2 | x1 + z2` where x1 and x2 are the ",
118 |         "regressors and z1 and z2 are the instruments.\n\nSee ?iv_robust."
119 |       )
120 |     }
121 |     ret[["instrument_matrix"]] <- model.matrix(terms(formula, rhs = 2), data = mf)
122 |     ret[["terms_regressors"]] <- terms(formula, rhs = 1)
123 |   } else if (estimator %in% c("ht", "dim")) {
124 |     ret[["original_treatment"]] <- mf[, colnames(mf) == all.vars(terms(mf)[[3]])[1]]
125 |   }
126 | 
127 |   ret[["weights"]] <- model.extract(mf, "weights")
128 |   if (any(ret[["weights"]] < 0)) {
129 |     stop("`weights` must not be negative")
130 |   }
131 | 
132 |   ret[["cluster"]] <- model.extract(mf, "cluster")
133 |   if (!(class(ret[["cluster"]]) %in% c("factor", "integer")) &&
134 |       !is.null(ret[["cluster"]])) {
135 |     ret[["cluster"]] <- as.factor(ret[["cluster"]])
136 |   }
137 | 
138 |   ret[["block"]] <- model.extract(mf, "block")
139 | 
140 |   ret[["condition_pr"]] <- if (is.numeric(condition_pr))
141 |     rep(condition_pr, nrow(ret[["design_matrix"]]))
142 |   else
143 |     model.extract(mf, "condition_pr")
144 | 
145 |   ret[["fixed_effects"]] <- model.extract(mf, "fixed_effects")
146 |   # If there is NA in the blocks and only one block, returns vector not matrix
147 |   # so coerce to matrix
148 |   if (is.character(ret[["fixed_effects"]])) {
149 |     ret[["fixed_effects"]] <- as.matrix(ret[["fixed_effects"]])
150 |   }
151 | 
152 |   if (any(ret[["condition_pr"]] <= 0 | ret[["condition_pr"]] > 1)) {
153 |     stop(
154 |       "`condition_prs` must be a vector of positive values no greater than 1"
155 |     )
156 |   }
157 | 
158 |   ret[["terms"]] <- attr(mf, "terms")
159 |   dcs <- attr(ret[["terms"]], "dataClasses")
160 |   # Clobber auxiliary variables in dataClasses for margins
161 |   drop_vars <- c("(fixed_effects)", "(condition_pr)", "(block)", "(cluster)")
162 |   attr(ret[["terms"]], "dataClasses") <- dcs[setdiff(names(dcs), drop_vars)]
163 |   ret[["xlevels"]] <- .getXlevels(ret[["terms"]], mf)
164 |   if (is.character(ret[["fixed_effects"]])) {
165 |     ret[["felevels"]] <- lapply(as.data.frame(ret[["fixed_effects"]]), unique)
166 |   }
167 | 
168 |   return(ret)
169 | }
170 | 
171 | demean_fes <- function(model_data) {
172 |   fe.ints <- apply(model_data[["fixed_effects"]], 2, function(x) match(x, unique(x)))
173 | 
174 |   eps <- 1e-8
175 |   weights <- model_data[["weights"]] %||% rep(1, nrow(model_data[["design_matrix"]]))
176 |   has_int <- attr(model_data$terms, "intercept")
177 | 
178 | 
179 |   demeaned <- list()
180 | 
181 |   # save names
182 |   demeaned[["outcome"]] <- demeanMat2(as.matrix(model_data[["outcome"]]), fe.ints, weights, 0, eps)
183 |   dimnames(demeaned[["outcome"]]) <- dimnames(model_data[["outcome"]])
184 |   model_data[["outcome"]] <- demeaned[["outcome"]]
185 | 
186 |   demeaned[["design_matrix"]] <- demeanMat2(model_data[["design_matrix"]], fe.ints, weights, has_int, eps)
187 |   new_names <- dimnames(model_data[["design_matrix"]])
188 |   new_names[[2]] <- new_names[[2]][new_names[[2]] != "(Intercept)"]
189 |   dimnames(demeaned[["design_matrix"]]) <- new_names
190 |   model_data[["design_matrix"]] <- demeaned[["design_matrix"]]
191 | 
192 |   if (is.numeric(model_data[["instrument_matrix"]])) {
193 |     demeaned[["instrument_matrix"]] <- demeanMat2(model_data[["instrument_matrix"]], fe.ints, weights, has_int, eps)
194 |     model_data[["instrument_matrix"]] <- demeaned[["instrument_matrix"]]
195 |   }
196 | 
197 | 
198 |   model_data[["fe_levels"]] <- apply(fe.ints, 2, max) - 1
199 | 
200 |   return(model_data)
201 | }
202 | 


--------------------------------------------------------------------------------
/R/helper_extract.R:
--------------------------------------------------------------------------------
  1 | # This code modified from
  2 | # https://github.com/leifeld/texreg/blob/master/R/extract.R (no LICENSE)
  3 | #' Extract model data for \pkg{texreg} package
  4 | #' @rdname extract.lm_robust
  5 | #'
  6 | #' @description Prepares a \code{"lm_robust"} or \code{"iv_robust"} object for the \pkg{texreg}
  7 | #' package. This is largely a clone of the \code{extract.lm}
  8 | #' method.
  9 | #'
 10 | #' @param model an object of class \code{\link{lm_robust}} or \code{"iv_robust"}
 11 | #' @param include.ci logical. Defaults to TRUE
 12 | #' @param include.rsquared logical. Defaults to TRUE
 13 | #' @param include.adjrs logical. Defaults to TRUE
 14 | #' @param include.nobs logical. Defaults to TRUE
 15 | #' @param include.fstatistic logical. Defaults to TRUE
 16 | #' @param include.rmse logical. Defaults to TRUE
 17 | #' @param include.nclusts logical. Defaults to TRUE if clusters in \code{model}
 18 | #' @param ... unused
 19 | #'
 20 | extract.robust_default <- function(model,
 21 |                               include.ci = TRUE,
 22 |                               include.rsquared = TRUE,
 23 |                               include.adjrs = TRUE,
 24 |                               include.nobs = TRUE,
 25 |                               include.fstatistic = FALSE,
 26 |                               include.rmse = TRUE,
 27 |                               include.nclusts = TRUE,
 28 |                               ...) {
 29 |   s <- tidy(model)
 30 | 
 31 |   names <- s[["term"]]
 32 |   co <- s[["estimate"]]
 33 |   se <- s[["std.error"]]
 34 |   pval <- s[["p.value"]]
 35 |   cilow <- numeric()
 36 |   ciupper <- numeric()
 37 |   if (include.ci) {
 38 |     cilow <- s[["conf.low"]]
 39 |     ciupper <- s[["conf.high"]]
 40 |   }
 41 | 
 42 |   rs <- model$r.squared # extract R-squared
 43 |   adj <- model$adj.r.squared # extract adjusted R-squared
 44 |   n <- nobs(model) # extract number of observations
 45 | 
 46 |   gof <- numeric()
 47 |   gof.names <- character()
 48 |   gof.decimal <- logical()
 49 |   if (include.rsquared) {
 50 |     gof <- c(gof, rs)
 51 |     gof.names <- c(gof.names, "R$^2$")
 52 |     gof.decimal <- c(gof.decimal, TRUE)
 53 |   }
 54 |   if (include.adjrs) {
 55 |     gof <- c(gof, adj)
 56 |     gof.names <- c(gof.names, "Adj.\ R$^2$")
 57 |     gof.decimal <- c(gof.decimal, TRUE)
 58 |   }
 59 |   if (include.nobs) {
 60 |     gof <- c(gof, n)
 61 |     gof.names <- c(gof.names, "Num.\ obs.")
 62 |     gof.decimal <- c(gof.decimal, FALSE)
 63 |   }
 64 |   if (include.fstatistic) {
 65 |     fstat <- model[["fstatistic"]][[1]]
 66 |     gof <- c(gof, fstat)
 67 |     gof.names <- c(gof.names, "F statistic")
 68 |     gof.decimal <- c(gof.decimal, TRUE)
 69 |   }
 70 |   if (include.rmse && !is.null(model[["res_var"]])) {
 71 |     rmse <- sqrt(model[["res_var"]])
 72 |     gof <- c(gof, rmse)
 73 |     gof.names <- c(gof.names, "RMSE")
 74 |     gof.decimal <- c(gof.decimal, TRUE)
 75 |   }
 76 |   if (include.nclusts && model[["clustered"]]) {
 77 |     rmse <- sqrt(model[["res_var"]])
 78 |     gof <- c(gof, model[["nclusters"]])
 79 |     gof.names <- c(gof.names, "N Clusters")
 80 |     gof.decimal <- c(gof.decimal, FALSE)
 81 |   }
 82 | 
 83 |   tr <- texreg::createTexreg(
 84 |     coef.names = names,
 85 |     coef = co,
 86 |     se = se,
 87 |     pvalues = pval,
 88 |     ci.low = cilow,
 89 |     ci.up = ciupper,
 90 |     gof.names = gof.names,
 91 |     gof = gof,
 92 |     gof.decimal = gof.decimal
 93 |   )
 94 |   return(tr)
 95 | }
 96 | 
 97 | #' @rdname extract.lm_robust
 98 | #'
 99 | #' @export
100 | extract.lm_robust <- extract.robust_default
101 | 
102 | #' @rdname extract.lm_robust
103 | #'
104 | #' @export
105 | extract.iv_robust <- extract.robust_default
106 | 
107 | 


--------------------------------------------------------------------------------
/R/helper_na_omit_detailed.R:
--------------------------------------------------------------------------------
 1 | #' Extra logging on na.omit handler
 2 | #'
 3 | #' @param object a data.frame
 4 | #'
 5 | #' @return a normal \code{omit} object, with the extra attribute \code{why_omit},
 6 | #' which contains the leftmost column containing an NA for each row that was dropped, by
 7 | #' column name, if any were dropped.
 8 | #'
 9 | #' @seealso \code{\link{na.omit}}
10 | na.omit_detailed.data.frame <- function(object){
11 | 
12 |   naomitwhy(object, function(x, w) x[w, , drop=FALSE])
13 | 
14 | }
15 | 
16 | 


--------------------------------------------------------------------------------
/R/helper_parse_arguments.R:
--------------------------------------------------------------------------------
 1 | # This function parses condition names for HT and DiM estimators
 2 | parse_conditions <- function(treatment, condition1, condition2, estimator) {
 3 |   if (is.factor(treatment)) {
 4 |     condition_names <- levels(droplevels(treatment))
 5 |   } else {
 6 |     condition_names <- sort(unique(treatment))
 7 |   }
 8 | 
 9 |   if (any(!(c(condition1, condition2) %in% condition_names))) {
10 |     stop("`condition1` and `condition2` must be values found in the treatment")
11 |   }
12 | 
13 |   n_conditions <- length(condition_names)
14 | 
15 |   conditions <- list(NULL, NULL)
16 | 
17 |   if (n_conditions > 2) {
18 |     if (is.null(condition1) || is.null(condition2)) {
19 |       stop(
20 |         "Treatment has > 2 values; must specify both `condition1` and ",
21 |         "`condition2` or use a treatment with only 2 values"
22 |       )
23 |     } else {
24 |       conditions[1:2] <- c(condition1, condition2)
25 |     }
26 |   } else if (n_conditions == 2) {
27 |     if (is.null(condition1) && is.null(condition2)) {
28 |       conditions[1:2] <- condition_names
29 |     } else if (!is.null(condition2) && is.null(condition1)) {
30 |       conditions[1:2] <- c(setdiff(condition_names, condition2), condition2)
31 |     } else if (!is.null(condition1) && is.null(condition2)) {
32 |       conditions[1:2] <- c(condition1, setdiff(condition_names, condition1))
33 |     } else {
34 |       conditions[1:2] <- c(condition1, condition2)
35 |     }
36 |   } else if (n_conditions == 1) {
37 |     # Allowable for HT estimator
38 |     if (estimator != "horvitz_thompson") {
39 |       stop(
40 |         "Must have more than one value in treatment unless using Horvitz-",
41 |         "Thompson estimator"
42 |       )
43 |     }
44 | 
45 |     if (is.null(condition1) && is.null(condition2)) {
46 |       conditions[2] <- condition_names
47 |     } else if (!is.null(condition2)) {
48 |       conditions[2] <- condition2
49 |     } else if (!is.null(condition1)) {
50 |       conditions[1] <- condition1
51 |     }
52 |   }
53 | 
54 |   return(conditions)
55 | }
56 | 
57 | # This function ensures that blocks and  clusters have been specified correctly
58 | check_clusters_blocks <- function(data) {
59 |   if (!is.null(data$cluster)) {
60 |     one_block_per_clust <-
61 |       tapply(data$block, data$cluster, function(x) all(x == x[1]))
62 | 
63 |     # Check that clusters nest within blocks
64 |     if (any(!one_block_per_clust)) {
65 |       stop("All `clusters` must be contained within `blocks`")
66 |     }
67 | 
68 |     # get number of clusters per block
69 |     clust_per_block <- tapply(
70 |       data$cluster,
71 |       data$block,
72 |       function(x) length(unique(x))
73 |     )
74 |   } else {
75 |     clust_per_block <- tabulate(as.factor(data$block))
76 |   }
77 | 
78 |   return(clust_per_block)
79 | }
80 | 


--------------------------------------------------------------------------------
/R/helper_return.R:
--------------------------------------------------------------------------------
 1 | # This file has helper functions for returning the lists from various estimators
 2 | lm_return <- function(return_list, model_data, formula) {
 3 |   if (!is.null(model_data)) {
 4 |     return_list[["contrasts"]] <- attr(model_data$design_matrix, "contrasts")
 5 |     return_list[["terms"]] <- model_data$terms
 6 |     return_list[["xlevels"]] <- model_data$xlevels
 7 |     return_list[["felevels"]] <- model_data$felevels
 8 |     return_list[["weights"]] <- model_data$weights
 9 |     if (is.matrix(model_data$outcome) &&
10 |         is.character(colnames(model_data$outcome))) {
11 |       return_list[["outcome"]] <- colnames(model_data$outcome)
12 |     } else {
13 |       return_list[["outcome"]] <- deparse(formula[[2]], nlines = 5)
14 |     }
15 |   }
16 | 
17 |   # Name and flatten objects
18 |   if (is.matrix(return_list[["std.error"]]) &&
19 |       ncol(return_list[["std.error"]]) > 1) {
20 |     dimnames(return_list[["std.error"]]) <- dimnames(return_list[["coefficients"]])
21 |   } else {
22 |     return_list[["coefficients"]] <- drop(return_list[["coefficients"]])
23 |     nms <- c("std.error", "statistic", "p.value", "df", "conf.low", "conf.high")
24 |     for (nm in nms) {
25 |       if (length(return_list[[nm]]) > 1 || !is.na(return_list[[nm]])) {
26 |         return_list[[nm]] <- setNames(
27 |           drop(return_list[[nm]]),
28 |           names(return_list[["coefficients"]])
29 |         )
30 |       }
31 |     }
32 |   }
33 |   if (return_list[["weighted"]]) {
34 |     names(return_list[["weights"]]) <- if (is.matrix(return_list[["fitted.values"]]))
35 |       rownames(return_list[["fitted.values"]])
36 |       else names(return_list[["fitted.values"]])
37 |   }
38 |   return_list[["fitted.values"]] <- drop(return_list[["fitted.values"]])
39 |   return_list[["ei.iv"]] <- drop(return_list[["ei.iv"]])
40 |   return_list[["residuals"]] <- drop(return_list[["residuals"]])
41 |   return(return_list)
42 | }
43 | 
44 | dim_like_return <- function(return_list, alpha, formula, conditions) {
45 |   return_list[["alpha"]] <- alpha
46 | 
47 |   # get "max" condition to account for case with only 1 condition
48 |   treat_condition <- conditions[[2]]
49 | 
50 |   # now we add the condition 2 value to coefficient name like it were a factor
51 | 
52 |   # Only add label if conditions aren't 0/1
53 |   add_label <- !(conditions[[2]] == 1 && conditions[[1]] == 0)
54 |   # If horvitz_thompson and there is only one treatment, add_label will be NA
55 |   # In this case, we add the non-null value if it's condition 2
56 |   if (is.na(add_label)) {
57 |     add_label <- !is.null(conditions[[2]])
58 |   }
59 | 
60 |   fterms <- terms(formula)
61 |   coef_name <- labels(fterms)
62 | 
63 |   if (add_label) {
64 |     return_list[["term"]] <- paste0(
65 |       coef_name,
66 |       conditions[[2]]
67 |     )
68 |   } else {
69 |     return_list[["term"]] <- coef_name
70 |   }
71 | 
72 |   return_list[["outcome"]] <- deparse(formula[[2]], nlines = 5)
73 | 
74 |   names(return_list[["coefficients"]]) <-
75 |     names(return_list[["std.error"]]) <-
76 |     names(return_list[["p.value"]]) <-
77 |     names(return_list[["df"]]) <- return_list[["term"]]
78 | 
79 |   return_list[["condition2"]] <- conditions[[2]]
80 |   return_list[["condition1"]] <- conditions[[1]]
81 | 
82 |   return_list[["vcov"]] <- matrix(
83 |     data = return_list[["std.error"]] ^ 2,
84 |     dimnames = list(return_list[["term"]], return_list[["term"]])
85 |   )
86 | 
87 |   return(return_list)
88 | }
89 | 


--------------------------------------------------------------------------------
/R/zzz.R:
--------------------------------------------------------------------------------
 1 | # Some of this is code modified from
 2 | # https://github.com/atahk/bucky/blob/master/R/zzz.R (GPL 3.0)
 3 | .onLoad <- function(libname, pkgname) {
 4 |   if (suppressWarnings(requireNamespace("texreg", quietly = TRUE))) {
 5 |     setGeneric("extract", function(model, ...) standardGeneric("extract"),
 6 |       package = "texreg"
 7 |     )
 8 |     setMethod("extract",
 9 |       signature = className("lm_robust", pkgname),
10 |       definition = extract.lm_robust
11 |     )
12 |     setMethod("extract",
13 |               signature = className("iv_robust", pkgname),
14 |               definition = extract.iv_robust
15 |     )
16 |   }
17 |   if(requireNamespace("emmeans", quietly = TRUE)) {
18 |     emmeans::.emm_register("lm_robust", pkgname)
19 |   }
20 |   invisible()
21 | }
22 | 
23 | #' @importFrom utils packageVersion
24 | .onAttach <- function(libname, pkgname) {
25 |   if (isNamespaceLoaded("broom") && packageVersion("broom") <= "0.5.0") {
26 |     packageStartupMessage(
27 |       "Warning: the `broom` package version 0.5.0 or earlier is loaded.\n",
28 |       "Please upgrade `broom` or `tidy` methods may not work as expected."
29 |     )
30 |   }
31 |   invisible()
32 | }
33 | 


--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | output: github_document
  3 | title: "estimatr: Fast Estimators for Design-Based Inference"
  4 | ---
  5 | 
  6 | <!-- README.md is generated from README.Rmd. Please edit that file -->
  7 | 
  8 | ```{r, echo = FALSE}
  9 | set.seed(42)
 10 | knitr::opts_chunk$set(
 11 |   collapse = TRUE,
 12 |   message = FALSE,
 13 |   comment = "#>",
 14 |   fig.path = "README-"  
 15 | )
 16 | options(digits = 2)
 17 | ```
 18 | 
 19 | [![CRAN status](https://www.r-pkg.org/badges/version/estimatr)](https://cran.r-project.org/package=estimatr)
 20 | [![CRAN RStudio mirror downloads](https://cranlogs.r-pkg.org/badges/grand-total/estimatr?color=green)](https://r-pkg.org/pkg/estimatr)
 21 | [![Build status](https://github.com/DeclareDesign/estimatr/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/DeclareDesign/estimatr/actions/workflows/R-CMD-check.yaml)
 22 | [![Codecov test coverage](https://codecov.io/gh/DeclareDesign/estimatr/graph/badge.svg)](https://app.codecov.io/gh/DeclareDesign/estimatr)
 23 | [![Replications](https://softwarecite.com/badge/estimatr)](https://softwarecite.com/package/estimatr)
 24 | 
 25 | **estimatr** is an `R` package providing a range of commonly-used linear estimators, designed for speed and for ease-of-use. Users can easily recover robust, cluster-robust, and other design appropriate estimates. We include two functions that implement means estimators, `difference_in_means()` and `horvitz_thompson()`, and three linear regression estimators, `lm_robust()`, `lm_lin()`, and `iv_robust()`. In each case, users can choose an estimator to reflect cluster-randomized, block-randomized, and block-and-cluster-randomized designs. The [Getting Started Guide](https://declaredesign.org/r/estimatr/articles/getting-started.html) describes each estimator provided by **estimatr** and how it can be used in your analysis.
 26 | 
 27 | You can also see the multiple ways you can [get regression tables out of estimatr](https://declaredesign.org/r/estimatr/articles/regression-tables.html) using commonly used `R` packages such as `texreg` and `stargazer`. Fast estimators also enable fast simulation of research designs to learn about their properties (see [DeclareDesign](https://declaredesign.org)).
 28 | 
 29 | ## Installing estimatr
 30 | 
 31 | To install the latest stable release of **estimatr**, please ensure that you are running version 3.5 or later of R and run the following code:
 32 | 
 33 | ```{r, eval=F}
 34 | install.packages("estimatr")
 35 | ```
 36 | 
 37 | ## Easy to use
 38 | 
 39 | Once the package is installed, getting appropriate estimates and standard errors is now both fast and easy.
 40 | 
 41 | ```{r, eval = TRUE, echo=-1}
 42 | set.seed(42)
 43 | library(estimatr)
 44 | 
 45 | # sample data from cluster-randomized experiment
 46 | library(fabricatr)
 47 | library(randomizr)
 48 | dat <- fabricate(
 49 |   N = 100,
 50 |   y = rnorm(N),
 51 |   clusterID = sample(letters[1:10], size = N, replace = TRUE),
 52 |   z = cluster_ra(clusterID)
 53 | )
 54 | 
 55 | # robust standard errors
 56 | res_rob <- lm_robust(y ~ z, data = dat)
 57 | # tidy dataframes on command!
 58 | tidy(res_rob)
 59 | 
 60 | # cluster robust standard errors
 61 | res_cl <- lm_robust(y ~ z, data = dat, clusters = clusterID)
 62 | # standard summary view also available
 63 | summary(res_cl)
 64 | 
 65 | # matched-pair design learned from blocks argument
 66 | data(sleep)
 67 | res_dim <- difference_in_means(extra ~ group, data = sleep, blocks = ID)
 68 | ```
 69 | 
 70 | The [Getting Started Guide](/r/estimatr/articles/getting-started.html) has more examples and uses, as do the reference pages. The [Mathematical Notes](/r/estimatr/articles/mathematical-notes.html) provide more information about what each estimator is doing under the hood.
 71 | 
 72 | ## Fast to use
 73 | 
 74 | Getting estimates and robust standard errors is also faster than it used to be. Compare our package to using `lm()` and the `sandwich` package to get HC2 standard errors. More speed comparisons are available [here](https://declaredesign.org/r/estimatr/articles/benchmarking-estimatr.html). Furthermore, with many blocks (or fixed effects), users can use the `fixed_effects` argument of `lm_robust` with HC1 standard errors to greatly improve estimation speed. More on [fixed effects here](https://declaredesign.org/r/estimatr/articles/absorbing-fixed-effects.html).
 75 | 
 76 | ```{r, echo=-1}
 77 | set.seed(1)
 78 | dat <- data.frame(X = matrix(rnorm(2000*50), 2000), y = rnorm(2000))
 79 | 
 80 | library(microbenchmark)
 81 | library(lmtest)
 82 | library(sandwich)
 83 | mb <- microbenchmark(
 84 |   `estimatr` = lm_robust(y ~ ., data = dat),
 85 |   `lm + sandwich` = {
 86 |     lo <- lm(y ~ ., data = dat)
 87 |     coeftest(lo, vcov = vcovHC(lo, type = 'HC2'))
 88 |   }
 89 | )
 90 | ```
 91 | ```{r, echo = FALSE}
 92 | d <- summary(mb)[, c("expr", "median")]
 93 | names(d) <- c("estimatr", "median run-time (ms)")
 94 | knitr::kable(d)
 95 | ```
 96 | 
 97 | ---
 98 | 
 99 | This project is generously supported by a grant from the [Laura and John Arnold Foundation](http://www.arnoldfoundation.org) and seed funding from [Evidence in Governance and Politics (EGAP)](http://egap.org).
100 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | estimatr: Fast Estimators for Design-Based Inference
  2 | ================
  3 | 
  4 | <!-- README.md is generated from README.Rmd. Please edit that file -->
  5 | 
  6 | [![CRAN
  7 | status](https://www.r-pkg.org/badges/version/estimatr)](https://cran.r-project.org/package=estimatr)
  8 | [![CRAN RStudio mirror
  9 | downloads](https://cranlogs.r-pkg.org/badges/grand-total/estimatr?color=green)](https://r-pkg.org/pkg/estimatr)
 10 | [![Build
 11 | status](https://github.com/DeclareDesign/estimatr/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/DeclareDesign/estimatr/actions/workflows/R-CMD-check.yaml)
 12 | [![Codecov test
 13 | coverage](https://codecov.io/gh/DeclareDesign/estimatr/graph/badge.svg)](https://app.codecov.io/gh/DeclareDesign/estimatr)
 14 | [![Replications](https://softwarecite.com/badge/estimatr)](https://softwarecite.com/package/estimatr)
 15 | 
 16 | **estimatr** is an `R` package providing a range of commonly-used linear
 17 | estimators, designed for speed and for ease-of-use. Users can easily
 18 | recover robust, cluster-robust, and other design appropriate estimates.
 19 | We include two functions that implement means estimators,
 20 | `difference_in_means()` and `horvitz_thompson()`, and three linear
 21 | regression estimators, `lm_robust()`, `lm_lin()`, and `iv_robust()`. In
 22 | each case, users can choose an estimator to reflect cluster-randomized,
 23 | block-randomized, and block-and-cluster-randomized designs. The [Getting
 24 | Started
 25 | Guide](https://declaredesign.org/r/estimatr/articles/getting-started.html)
 26 | describes each estimator provided by **estimatr** and how it can be used
 27 | in your analysis.
 28 | 
 29 | You can also see the multiple ways you can [get regression tables out of
 30 | estimatr](https://declaredesign.org/r/estimatr/articles/regression-tables.html)
 31 | using commonly used `R` packages such as `texreg` and `stargazer`. Fast
 32 | estimators also enable fast simulation of research designs to learn
 33 | about their properties (see [DeclareDesign](https://declaredesign.org)).
 34 | 
 35 | ## Installing estimatr
 36 | 
 37 | To install the latest stable release of **estimatr**, please ensure that
 38 | you are running version 3.5 or later of R and run the following code:
 39 | 
 40 | ``` r
 41 | install.packages("estimatr")
 42 | ```
 43 | 
 44 | ## Easy to use
 45 | 
 46 | Once the package is installed, getting appropriate estimates and
 47 | standard errors is now both fast and easy.
 48 | 
 49 | ``` r
 50 | library(estimatr)
 51 | 
 52 | # sample data from cluster-randomized experiment
 53 | library(fabricatr)
 54 | library(randomizr)
 55 | dat <- fabricate(
 56 |   N = 100,
 57 |   y = rnorm(N),
 58 |   clusterID = sample(letters[1:10], size = N, replace = TRUE),
 59 |   z = cluster_ra(clusterID)
 60 | )
 61 | 
 62 | # robust standard errors
 63 | res_rob <- lm_robust(y ~ z, data = dat)
 64 | # tidy dataframes on command!
 65 | tidy(res_rob)
 66 | #>          term estimate std.error statistic p.value conf.low conf.high df
 67 | #> 1 (Intercept)    0.065      0.14      0.46    0.64    -0.21      0.34 98
 68 | #> 2           z   -0.067      0.21     -0.32    0.75    -0.48      0.35 98
 69 | #>   outcome
 70 | #> 1       y
 71 | #> 2       y
 72 | 
 73 | # cluster robust standard errors
 74 | res_cl <- lm_robust(y ~ z, data = dat, clusters = clusterID)
 75 | # standard summary view also available
 76 | summary(res_cl)
 77 | #> 
 78 | #> Call:
 79 | #> lm_robust(formula = y ~ z, data = dat, clusters = clusterID)
 80 | #> 
 81 | #> Standard error type:  CR2 
 82 | #> 
 83 | #> Coefficients:
 84 | #>             Estimate Std. Error t value Pr(>|t|) CI Lower CI Upper   DF
 85 | #> (Intercept)   0.0653      0.145   0.452    0.678   -0.358    0.489 3.53
 86 | #> z            -0.0670      0.202  -0.331    0.750   -0.544    0.410 7.05
 87 | #> 
 88 | #> Multiple R-squared:  0.00105 ,   Adjusted R-squared:  -0.00915 
 89 | #> F-statistic: 0.11 on 1 and 9 DF,  p-value: 0.748
 90 | 
 91 | # matched-pair design learned from blocks argument
 92 | data(sleep)
 93 | res_dim <- difference_in_means(extra ~ group, data = sleep, blocks = ID)
 94 | ```
 95 | 
 96 | The [Getting Started Guide](/r/estimatr/articles/getting-started.html)
 97 | has more examples and uses, as do the reference pages. The [Mathematical
 98 | Notes](/r/estimatr/articles/mathematical-notes.html) provide more
 99 | information about what each estimator is doing under the hood.
100 | 
101 | ## Fast to use
102 | 
103 | Getting estimates and robust standard errors is also faster than it used
104 | to be. Compare our package to using `lm()` and the `sandwich` package to
105 | get HC2 standard errors. More speed comparisons are available
106 | [here](https://declaredesign.org/r/estimatr/articles/benchmarking-estimatr.html).
107 | Furthermore, with many blocks (or fixed effects), users can use the
108 | `fixed_effects` argument of `lm_robust` with HC1 standard errors to
109 | greatly improve estimation speed. More on [fixed effects
110 | here](https://declaredesign.org/r/estimatr/articles/absorbing-fixed-effects.html).
111 | 
112 | ``` r
113 | dat <- data.frame(X = matrix(rnorm(2000*50), 2000), y = rnorm(2000))
114 | 
115 | library(microbenchmark)
116 | library(lmtest)
117 | library(sandwich)
118 | mb <- microbenchmark(
119 |   `estimatr` = lm_robust(y ~ ., data = dat),
120 |   `lm + sandwich` = {
121 |     lo <- lm(y ~ ., data = dat)
122 |     coeftest(lo, vcov = vcovHC(lo, type = 'HC2'))
123 |   }
124 | )
125 | #> Warning in microbenchmark(estimatr = lm_robust(y ~ ., data = dat), `lm +
126 | #> sandwich` = {: less accurate nanosecond times to avoid potential integer
127 | #> overflows
128 | ```
129 | 
130 | | estimatr      | median run-time (ms) |
131 | |:--------------|---------------------:|
132 | | estimatr      |                    6 |
133 | | lm + sandwich |                   21 |
134 | 
135 | ------------------------------------------------------------------------
136 | 
137 | This project is generously supported by a grant from the [Laura and John
138 | Arnold Foundation](http://www.arnoldfoundation.org) and seed funding
139 | from [Evidence in Governance and Politics (EGAP)](http://egap.org).
140 | 


--------------------------------------------------------------------------------
/_pkgdown.yml:
--------------------------------------------------------------------------------
 1 | url: https://declaredesign.org/r/estimatr
 2 | 
 3 | template:
 4 |   bootstrap: 5
 5 |   bslib:
 6 |     primary: "#0054AD"
 7 |     border-radius: 0.5rem
 8 |     btn-border-radius: 0.25rem
 9 | 
10 | development:
11 |   mode: auto
12 | 
13 | navbar:
14 |   right:
15 |   - text: "Software"
16 |     menu:
17 |       - text: DeclareDesign
18 |         href: http://declaredesign.org/r/declaredesign/
19 |       - text: randomizr
20 |         href: https://declaredesign.org/r/randomizr/
21 |       - text: fabricatr
22 |         href: https://declaredesign.org/r/fabricatr/
23 |       - text: estimatr
24 |         href: https://declaredesign.org/r/estimatr/
25 |       - text: rdss
26 |         href: https://declaredesign.org/r/rdss/
27 |       - text: DesignLibrary
28 |         href: https://declaredesign.org/r/designlibrary/
29 |       - text: DesignWizard
30 |         href: https://eos.wzb.eu/ipi/DDWizard/
31 |   - text: declaredesign.org
32 |     href: https://declaredesign.org
33 |   left:
34 |   - text: Getting started
35 |     href: articles/getting-started.html
36 |   - text: Reference
37 |     href: reference/index.html
38 |   - text: Articles
39 |     menu:
40 |      - text: Getting started using estimatr
41 |        href: articles/getting-started.html
42 |      - text: Absorbing Fixed Effects with estimatr
43 |        href: articles/absorbing-fixed-effects.html
44 |      - text: estimatr in the Tidyverse
45 |        href: articles/estimatr-in-the-tidyverse.html
46 |      - text: Benchmarking estimatr
47 |        href: articles/benchmarking-estimatr.html
48 |      - text: Examples with emmeans
49 |        href: articles/emmeans-examples.html
50 |      - text: Mathematical notes for estimatr
51 |        href: articles/mathematical-notes.html
52 |      - text: Regression Tables with estimatr
53 |        href: articles/regression-tables.html
54 |      - text: Simulations - Debiasing Difference-in-Means
55 |        href: articles/simulations-debiasing-dim.html
56 |      - text: Simulations - OLS and Variance
57 |        href: articles/simulations-ols-variance.html
58 |      - text: How Stata's hat matrix differs with weights
59 |        href: articles/stata-wls-hat.html
60 | 


--------------------------------------------------------------------------------
/cran-comments.md:
--------------------------------------------------------------------------------
1 | ## Submission
2 | 
3 | Fixes M1 Mac errors.
4 | 
5 | There are no changes to worse in reverse depends. 
6 | 
7 | We have checked this in win builder and on OS X, linux, and Windows in release, oldrel, and devel on Github Actions with no issues. 
8 | 
9 | 


--------------------------------------------------------------------------------
/data-raw/STAR_public_use.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DeclareDesign/estimatr/10a18f124ee96e133993a9a6a68ae678b7d8d051/data-raw/STAR_public_use.dta


--------------------------------------------------------------------------------
/data-raw/build-alo-star-for-lin.R:
--------------------------------------------------------------------------------
 1 | # This script takes the dta file from the ALO replication data at this url:
 2 | # https://www.aeaweb.org/articles?id=10.1257/app.1.1.136
 3 | # and turns it in to the .rda we export with the package
 4 | # Full citation:
 5 | # Angrist, Joshua, Daniel Lang, and Philip Oreopoulos. 2009. "Incentives and
 6 | # Services for College Achievement: Evidence from a Randomized Trial." American
 7 | # Economic Journal: Applied Economics 1(1): 136-63.
 8 | 
 9 | # wd should be package root
10 | 
11 | dat <-
12 |   foreign::read.dta(
13 |     "data-raw/STAR_public_use.dta"
14 |   )
15 | 
16 | alo_star_men <- dat[
17 |   dat$sex == "M" &
18 |     !is.na(dat$GPA_year1) &
19 |     (dat$sfsp == 1 | dat$ssp == 1) &
20 |     dat$noshow == 0,
21 |   c("gpa0", "sfsp", "ssp", "GPA_year1", "GPA_year2")
22 | ]
23 | 
24 | devtools::use_data(
25 |   alo_star_men,
26 |   internal = F,
27 |   overwrite = T
28 | )
29 | 


--------------------------------------------------------------------------------
/data/alo_star_men.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DeclareDesign/estimatr/10a18f124ee96e133993a9a6a68ae678b7d8d051/data/alo_star_men.rda


--------------------------------------------------------------------------------
/estimatr.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: knitr
13 | LaTeX: XeLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | 
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 | PackageRoxygenize: rd,collate,namespace
22 | 


--------------------------------------------------------------------------------
/man/alo_star_men.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{alo_star_men}
 5 | \alias{alo_star_men}
 6 | \title{Replication data for Lin 2013}
 7 | \format{
 8 | A data frame with educational treatments and outcomes:
 9 | \describe{
10 |   \item{gpa0}{high school GPA}
11 |   \item{sfsp}{financial incentives and support treatment}
12 |   \item{ssp}{support only treatment}
13 |   \item{GPA_year1}{college GPA year 1}
14 |   \item{GPA_year2}{college GPA year 2}
15 | }
16 | }
17 | \source{
18 | \url{https://www.aeaweb.org/articles?id=10.1257/app.1.1.136}
19 | }
20 | \usage{
21 | alo_star_men
22 | }
23 | \description{
24 | A dataset containing the data to replicate:
25 | Lin, Winston. 2013. "Agnostic notes on regression adjustments to experimental
26 | data: Reexamining Freedman's critique." The Annals of Applied Statistics.
27 | Stat. 7(1): 295-318. doi:10.1214/12-AOAS583.
28 | https://projecteuclid.org/euclid.aoas/1365527200.
29 | }
30 | \details{
31 | This data was originally taken from the following paper, subset to men who
32 | showed up to college, were in one of the arms with the support condition,
33 | and had GPA data for their first year in college.
34 | 
35 | Angrist, Joshua, Daniel Lang, and Philip Oreopoulos. 2009. "Incentives and
36 | Services for College Achievement: Evidence from a Randomized Trial." American
37 | Economic Journal: Applied Economics 1(1): 136-63.
38 | https://www.aeaweb.org/articles?id=10.1257/app.1.1.136
39 | }
40 | \keyword{datasets}
41 | 


--------------------------------------------------------------------------------
/man/commarobust.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/helper_starprep.R
 3 | \name{commarobust}
 4 | \alias{commarobust}
 5 | \title{Build lm_robust object from lm fit}
 6 | \usage{
 7 | commarobust(model, se_type = NULL, clusters = NULL, ci = TRUE, alpha = 0.05)
 8 | }
 9 | \arguments{
10 | \item{model}{an lm model object}
11 | 
12 | \item{se_type}{The sort of standard error sought. If \code{clusters} is
13 | not specified the options are "HC0", "HC1" (or "stata", the equivalent),
14 | "HC2" (default), "HC3", or "classical". If \code{clusters} is specified the
15 | options are "CR0", "CR2" (default), or "stata". Can also specify "none",
16 | which may speed up estimation of the coefficients.}
17 | 
18 | \item{clusters}{A vector corresponding to the clusters in the data.}
19 | 
20 | \item{ci}{logical. Whether to compute and return p-values and confidence
21 | intervals, TRUE by default.}
22 | 
23 | \item{alpha}{The significance level, 0.05 by default.}
24 | }
25 | \value{
26 | an \code{\link{lm_robust}} object.
27 | }
28 | \description{
29 | Build lm_robust object from lm fit
30 | }
31 | \examples{
32 | lmo <- lm(mpg ~ hp, data = mtcars)
33 | 
34 | # Default HC2
35 | commarobust(lmo)
36 | 
37 | commarobust(lmo, se_type = "HC3")
38 | 
39 | commarobust(lmo, se_type = "stata", clusters = mtcars$carb)
40 | 
41 | }
42 | 


--------------------------------------------------------------------------------
/man/declaration_to_condition_pr_mat.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/helper_condition_pr_matrix.R
  3 | \name{declaration_to_condition_pr_mat}
  4 | \alias{declaration_to_condition_pr_mat}
  5 | \title{Builds condition probability matrices for Horvitz-Thompson estimation from
  6 | \pkg{randomizr} declaration}
  7 | \usage{
  8 | declaration_to_condition_pr_mat(
  9 |   ra_declaration,
 10 |   condition1 = NULL,
 11 |   condition2 = NULL,
 12 |   prob_matrix = NULL
 13 | )
 14 | }
 15 | \arguments{
 16 | \item{ra_declaration}{An object of class \code{"ra_declaration"}, generated
 17 | by the \code{\link[randomizr]{declare_ra}} function in \pkg{randomizr}. This
 18 | object contains the experimental design that will be represented in a
 19 | condition probability matrix}
 20 | 
 21 | \item{condition1}{The name of the first condition, often the control group. If \code{NULL},
 22 | defaults to first condition in randomizr declaration. Either both \code{condition1}
 23 | and \code{condition2} have to be specified or both left as \code{NULL}.}
 24 | 
 25 | \item{condition2}{The name of the second condition, often the treatment group. If \code{NULL},
 26 | defaults to second condition in randomizr declaration. Either both \code{condition1}
 27 | and \code{condition2} have to be specified or both left as \code{NULL}.}
 28 | 
 29 | \item{prob_matrix}{An optional probability matrix to override the one in
 30 | \code{ra_declaration}}
 31 | }
 32 | \value{
 33 | a numeric 2n*2n matrix of marginal and joint condition treatment
 34 | probabilities to be passed to the \code{condition_pr_mat} argument of
 35 | \code{\link{horvitz_thompson}}. See details.
 36 | }
 37 | \description{
 38 | Builds condition probability matrices for Horvitz-Thompson estimation from
 39 | \pkg{randomizr} declaration
 40 | }
 41 | \details{
 42 | This function takes a \code{"ra_declaration"}, generated
 43 | by the \code{\link[randomizr]{declare_ra}} function in \pkg{randomizr} and
 44 | returns a 2n*2n matrix that can be used to fully specify the design for
 45 | \code{\link{horvitz_thompson}} estimation. This is done by passing this
 46 | matrix to the \code{condition_pr_mat} argument of
 47 | \code{\link{horvitz_thompson}}.
 48 | 
 49 | Currently, this function can learn the condition probability matrix for a
 50 | wide variety of randomizations: simple, complete, simple clustered, complete
 51 | clustered, blocked, block-clustered.
 52 | 
 53 | A condition probability matrix is made up of four submatrices, each of which
 54 | corresponds to the
 55 | joint and marginal probability that each observation is in one of the two
 56 | treatment conditions.
 57 | 
 58 | The upper-left quadrant is an n*n matrix. On the diagonal is the marginal
 59 | probability of being in condition 1, often control, for every unit
 60 | (Pr(Z_i = Condition1) where Z represents the vector of treatment conditions).
 61 | The off-diagonal elements are the joint probabilities of each unit being in
 62 | condition 1 with each other unit, Pr(Z_i = Condition1, Z_j = Condition1)
 63 | where i indexes the rows and j indexes the columns.
 64 | 
 65 | The upper-right quadrant is also an n*n matrix. On the diagonal is the joint
 66 | probability of a unit being in condition 1 and condition 2, often the
 67 | treatment, and thus is always 0. The off-diagonal elements are the joint
 68 | probability of unit i being in condition 1 and unit j being in condition 2,
 69 | Pr(Z_i = Condition1, Z_j = Condition2).
 70 | 
 71 | The lower-left quadrant is also an n*n matrix. On the diagonal is the joint
 72 | probability of a unit being in condition 1 and condition 2, and thus is
 73 | always 0. The off-diagonal elements are the joint probability of unit i
 74 | being in condition 2 and unit j being in condition 1,
 75 | Pr(Z_i = Condition2, Z_j = Condition1).
 76 | 
 77 | The lower-right quadrant is an n*n matrix. On the diagonal is the marginal
 78 | probability of being in condition 2, often treatment, for every unit
 79 | (Pr(Z_i = Condition2)). The off-diagonal elements are the joint probability
 80 | of each unit being in condition 2 together,
 81 | Pr(Z_i = Condition2, Z_j = Condition2).
 82 | }
 83 | \examples{
 84 | 
 85 | # Learn condition probability matrix from complete blocked design
 86 | library(randomizr)
 87 | n <- 100
 88 | dat <- data.frame(
 89 |   blocks = sample(letters[1:10], size = n, replace = TRUE),
 90 |   y = rnorm(n)
 91 | )
 92 | 
 93 | # Declare complete blocked randomization
 94 | bl_declaration <- declare_ra(blocks = dat$blocks, prob = 0.4, simple = FALSE)
 95 | # Get probabilities
 96 | block_pr_mat <- declaration_to_condition_pr_mat(bl_declaration, 0, 1)
 97 | # Do randomiztion
 98 | dat$z <- conduct_ra(bl_declaration)
 99 | 
100 | horvitz_thompson(y ~ z, data = dat, condition_pr_mat = block_pr_mat)
101 | 
102 | # When you pass a declaration to horvitz_thompson, this function is called
103 | 
104 | # Equivalent to above call
105 | horvitz_thompson(y ~ z, data = dat, ra_declaration = bl_declaration)
106 | 
107 | }
108 | \seealso{
109 | \code{\link{permutations_to_condition_pr_mat}}
110 | }
111 | 


--------------------------------------------------------------------------------
/man/estimatr.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/estimatr.R
 3 | \docType{package}
 4 | \name{estimatr}
 5 | \alias{estimatr-package}
 6 | \alias{estimatr}
 7 | \title{estimatr}
 8 | \description{
 9 | Fast procedures for small set of commonly-used, design-appropriate estimators with robust standard errors and confidence intervals. Includes estimators for linear regression, instrumental variables regression, difference-in-means, Horvitz-Thompson estimation, and regression improving precision of experimental estimates by interacting treatment with centered pre-treatment covariates introduced by Lin (2013) <doi:10.1214/12-AOAS583>.
10 | }
11 | \seealso{
12 | Useful links:
13 | \itemize{
14 |   \item \url{https://declaredesign.org/r/estimatr/}
15 |   \item \url{https://github.com/DeclareDesign/estimatr}
16 |   \item Report bugs at \url{https://github.com/DeclareDesign/estimatr/issues}
17 | }
18 | 
19 | }
20 | \author{
21 | \strong{Maintainer}: Graeme Blair \email{graeme.blair@gmail.com}
22 | 
23 | Authors:
24 | \itemize{
25 |   \item Jasper Cooper \email{jjc2247@columbia.edu}
26 |   \item Alexander Coppock \email{alex.coppock@yale.edu}
27 |   \item Macartan Humphreys \email{macartan@gmail.com}
28 |   \item Luke Sonnet \email{luke.sonnet@gmail.com}
29 | }
30 | 
31 | Other contributors:
32 | \itemize{
33 |   \item Neal Fultz \email{nfultz@gmail.com} [contributor]
34 |   \item Lily Medina \email{lilymiru@gmail.com} [contributor]
35 |   \item Russell Lenth \email{russell-lenth@uiowa.edu} [contributor]
36 | }
37 | 
38 | }
39 | 


--------------------------------------------------------------------------------
/man/estimatr_glancers.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/S3_glance.R
 3 | \name{estimatr_glancers}
 4 | \alias{estimatr_glancers}
 5 | \alias{glance.lm_robust}
 6 | \alias{glance.lh_robust}
 7 | \alias{glance.iv_robust}
 8 | \alias{glance.difference_in_means}
 9 | \alias{glance.horvitz_thompson}
10 | \title{Glance at an estimatr object}
11 | \usage{
12 | \method{glance}{lm_robust}(x, ...)
13 | 
14 | \method{glance}{lh_robust}(x, ...)
15 | 
16 | \method{glance}{iv_robust}(x, ...)
17 | 
18 | \method{glance}{difference_in_means}(x, ...)
19 | 
20 | \method{glance}{horvitz_thompson}(x, ...)
21 | }
22 | \arguments{
23 | \item{x}{An object returned by one of the estimators}
24 | 
25 | \item{...}{extra arguments (not used)}
26 | }
27 | \value{
28 | For \code{glance.lm_robust}, a data.frame with columns:
29 | \item{r.squared}{the \eqn{R^2},
30 | \deqn{R^2 = 1 - Sum(e[i]^2) / Sum((y[i] - y^*)^2),} where \eqn{y^*}
31 | is the mean of \eqn{y[i]} if there is an intercept and zero otherwise,
32 | and \eqn{e[i]} is the ith residual.}
33 | \item{adj.r.squared}{the \eqn{R^2} but penalized for having more parameters, \code{rank}}
34 | \item{se_type}{the standard error type specified by the user}
35 | \item{statistic}{the value of the F-statistic}
36 | \item{p.value}{p-value from the F test}
37 | \item{df.residual}{residual degrees of freedom}
38 | \item{nobs}{the number of observations used}
39 | 
40 | For \code{glance.lh_robust}, we glance the \code{lm_robust} component only. You can access the linear hypotheses as a data.frame directy from the \code{lh} component of the \code{lh_robust} object
41 | 
42 | For \code{glance.iv_robust}, a data.frame with columns:
43 |   \item{r.squared}{The \eqn{R^2} of the second stage regression}
44 |   \item{adj.r.squared}{The \eqn{R^2} but penalized for having more parameters, \code{rank}}
45 |   \item{df.residual}{residual degrees of freedom}
46 |   \item{N}{the number of observations used}
47 |   \item{se_type}{the standard error type specified by the user}
48 |   \item{statistic}{the value of the F-statistic}
49 |   \item{p.value}{p-value from the F test}
50 |   \item{statistic.weakinst}{the value of the first stage F-statistic, useful for the weak instruments test; only reported if there is only one endogenous variable}
51 |   \item{p.value.weakinst}{p-value from the first-stage F test, a test of weak instruments; only reported if there is only one endogenous variable}
52 |   \item{statistic.endogeneity}{the value of the F-statistic for the test of endogeneity; often called the Wu-Hausman statistic, with robust standard errors, we employ the regression based test}
53 |   \item{p.value.endogeneity}{p-value from the F-test for endogeneity}
54 |   \item{statistic.overid}{the value of the chi-squared statistic for the test of instrument correlation with the error term; only reported with overidentification}
55 |   \item{p.value.overid}{p-value from the chi-squared test; only reported with overidentification}
56 | 
57 | For \code{glance.difference_in_means}, a data.frame with columns:
58 |   \item{design}{the design used, and therefore the estimator used}
59 |   \item{df}{the degrees of freedom}
60 |   \item{nobs}{the number of observations used}
61 |   \item{nblocks}{the number of blocks, if used}
62 |   \item{nclusters}{the number of clusters, if used}
63 |   \item{condition2}{the second, "treatment", condition}
64 |   \item{condition1}{the first, "control", condition}
65 | 
66 | For \code{glance.horvitz_thompson}, a data.frame with columns:
67 |   \item{nobs}{the number of observations used}
68 |   \item{se_type}{the type of standard error estimator used}
69 |   \item{condition2}{the second, "treatment", condition}
70 |   \item{condition1}{the first, "control", condition}
71 | }
72 | \description{
73 | Glance at an estimatr object
74 | }
75 | \seealso{
76 | \code{\link[generics:glance]{generics::glance()}}, \code{\link[=lm_robust]{lm_robust()}}, \code{\link[=lm_lin]{lm_lin()}}, \code{\link[=iv_robust]{iv_robust()}}, \code{\link[=difference_in_means]{difference_in_means()}}, \code{\link[=horvitz_thompson]{horvitz_thompson()}}
77 | }
78 | \concept{estimatr glancers}
79 | 


--------------------------------------------------------------------------------
/man/estimatr_tidiers.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/S3_tidy.R
 3 | \name{estimatr_tidiers}
 4 | \alias{estimatr_tidiers}
 5 | \alias{tidy.lm_robust}
 6 | \alias{tidy.iv_robust}
 7 | \alias{tidy.difference_in_means}
 8 | \alias{tidy.horvitz_thompson}
 9 | \alias{tidy.lh_robust}
10 | \alias{tidy.lh}
11 | \title{Tidy an estimatr object}
12 | \usage{
13 | \method{tidy}{lm_robust}(x, conf.int = TRUE, conf.level = NULL, ...)
14 | 
15 | \method{tidy}{iv_robust}(x, conf.int = TRUE, conf.level = NULL, ...)
16 | 
17 | \method{tidy}{difference_in_means}(x, conf.int = TRUE, conf.level = NULL, ...)
18 | 
19 | \method{tidy}{horvitz_thompson}(x, conf.int = TRUE, conf.level = NULL, ...)
20 | 
21 | \method{tidy}{lh_robust}(x, conf.int = TRUE, conf.level = NULL, ...)
22 | 
23 | \method{tidy}{lh}(x, conf.int = TRUE, conf.level = NULL, ...)
24 | }
25 | \arguments{
26 | \item{x}{An object returned by one of the estimators}
27 | 
28 | \item{conf.int}{Logical indicating whether or not to include a
29 | confidence interval in the tidied output. Defaults to ‘TRUE’.}
30 | 
31 | \item{conf.level}{The confidence level to use for the confidence
32 | interval if ‘conf.int = TRUE’. Must be strictly greater than 0 and less
33 | than 1. Defaults to 0.95, which corresponds to a 95 percent confidence
34 | interval.}
35 | 
36 | \item{...}{extra arguments (not used)}
37 | }
38 | \value{
39 | A data.frame with columns for coefficient names, estimates, standard
40 | errors, confidence intervals, p-values, degrees of freedom, and the
41 | name of the outcome variable
42 | }
43 | \description{
44 | Tidy an estimatr object
45 | }
46 | \seealso{
47 | \code{\link[generics:tidy]{generics::tidy()}}, \code{\link[=lm_robust]{lm_robust()}}, \code{\link[=iv_robust]{iv_robust()}},  \code{\link[=difference_in_means]{difference_in_means()}}, \code{\link[=horvitz_thompson]{horvitz_thompson()}}
48 | }
49 | \concept{estimatr tidiers}
50 | 


--------------------------------------------------------------------------------
/man/extract.lm_robust.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/helper_extract.R
 3 | \name{extract.robust_default}
 4 | \alias{extract.robust_default}
 5 | \alias{extract.lm_robust}
 6 | \alias{extract.iv_robust}
 7 | \title{Extract model data for \pkg{texreg} package}
 8 | \usage{
 9 | extract.robust_default(
10 |   model,
11 |   include.ci = TRUE,
12 |   include.rsquared = TRUE,
13 |   include.adjrs = TRUE,
14 |   include.nobs = TRUE,
15 |   include.fstatistic = FALSE,
16 |   include.rmse = TRUE,
17 |   include.nclusts = TRUE,
18 |   ...
19 | )
20 | 
21 | extract.lm_robust(
22 |   model,
23 |   include.ci = TRUE,
24 |   include.rsquared = TRUE,
25 |   include.adjrs = TRUE,
26 |   include.nobs = TRUE,
27 |   include.fstatistic = FALSE,
28 |   include.rmse = TRUE,
29 |   include.nclusts = TRUE,
30 |   ...
31 | )
32 | 
33 | extract.iv_robust(
34 |   model,
35 |   include.ci = TRUE,
36 |   include.rsquared = TRUE,
37 |   include.adjrs = TRUE,
38 |   include.nobs = TRUE,
39 |   include.fstatistic = FALSE,
40 |   include.rmse = TRUE,
41 |   include.nclusts = TRUE,
42 |   ...
43 | )
44 | }
45 | \arguments{
46 | \item{model}{an object of class \code{\link{lm_robust}} or \code{"iv_robust"}}
47 | 
48 | \item{include.ci}{logical. Defaults to TRUE}
49 | 
50 | \item{include.rsquared}{logical. Defaults to TRUE}
51 | 
52 | \item{include.adjrs}{logical. Defaults to TRUE}
53 | 
54 | \item{include.nobs}{logical. Defaults to TRUE}
55 | 
56 | \item{include.fstatistic}{logical. Defaults to TRUE}
57 | 
58 | \item{include.rmse}{logical. Defaults to TRUE}
59 | 
60 | \item{include.nclusts}{logical. Defaults to TRUE if clusters in \code{model}}
61 | 
62 | \item{...}{unused}
63 | }
64 | \description{
65 | Prepares a \code{"lm_robust"} or \code{"iv_robust"} object for the \pkg{texreg}
66 | package. This is largely a clone of the \code{extract.lm}
67 | method.
68 | }
69 | 


--------------------------------------------------------------------------------
/man/gen_pr_matrix_cluster.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/helper_condition_pr_matrix.R
 3 | \name{gen_pr_matrix_cluster}
 4 | \alias{gen_pr_matrix_cluster}
 5 | \title{Generate condition probability matrix given clusters and probabilities}
 6 | \usage{
 7 | gen_pr_matrix_cluster(clusters, treat_probs, simple)
 8 | }
 9 | \arguments{
10 | \item{clusters}{A vector of clusters}
11 | 
12 | \item{treat_probs}{A vector of treatment (condition 2) probabilities}
13 | 
14 | \item{simple}{A boolean for whether the assignment is a random sample
15 | assignment (TRUE, default) or complete random assignment (FALSE)}
16 | }
17 | \value{
18 | a numeric 2n*2n matrix of marginal and joint condition treatment
19 | probabilities to be passed to the \code{condition_pr_mat} argument of
20 | \code{\link{horvitz_thompson}}.
21 | }
22 | \description{
23 | Generate condition probability matrix given clusters and probabilities
24 | }
25 | \seealso{
26 | \code{\link{declaration_to_condition_pr_mat}}
27 | }
28 | 


--------------------------------------------------------------------------------
/man/lh_robust.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/estimatr_lh_robust.R
 3 | \name{lh_robust}
 4 | \alias{lh_robust}
 5 | \title{Linear Hypothesis for Ordinary Least Squares with Robust Standard Errors}
 6 | \usage{
 7 | lh_robust(..., data, linear_hypothesis)
 8 | }
 9 | \arguments{
10 | \item{...}{Other arguments to be passed to  \code{\link{lm_robust}}}
11 | 
12 | \item{data}{A \code{data.frame}}
13 | 
14 | \item{linear_hypothesis}{A character string or a matrix specifying combination, to be passed to the hypothesis.matrix argument of car::linearHypothesis
15 | See \code{\link[car]{linearHypothesis}} for more details.}
16 | }
17 | \value{
18 | An object of class \code{"lh_robust"} containing the two following components:
19 | 
20 | \item{lm_robust}{an object as returned by \code{lm_robust}.}
21 | \item{lh}{A data frame with most of its columns pulled from \code{linearHypothesis}' output.}
22 | 
23 | The only analyis directly performed by \code{lh_robust} is a \code{t-test} for the null hypothesis of no effects of the linear combination of coefficients as specified by the user.
24 | All other output components are either extracted from \code{linearHypothesis} or \code{lm_robust}.
25 | 
26 | The original output returned by \code{linearHypothesis} is added as an attribute under the \code{"linear_hypothesis"} attribute.
27 | }
28 | \description{
29 | This function fits a linear model with robust standard errors and performs linear hypothesis test.
30 | }
31 | \details{
32 | This function is a wrapper for \code{\link{lm_robust}} and for
33 | \code{\link[car]{linearHypothesis}}. It first runs \code{lm_robust} and
34 | next passes \code{"lm_robust"} object as an argument to \code{linearHypothesis}.
35 | }
36 | \examples{
37 | 
38 | library(fabricatr)
39 | dat <- fabricate(
40 |   N = 40,
41 |   y = rpois(N, lambda = 4),
42 |   x = rnorm(N),
43 |   z = rbinom(N, 1, prob = 0.4),
44 |   clusterID = sample(1:4, 40, replace = TRUE)
45 | )
46 | 
47 | # Default variance estimator is HC2 robust standard errors
48 | lhro <- lh_robust(y ~ x + z, data = dat, linear_hypothesis = "z + 2x = 0")
49 | 
50 | # The linear hypothesis argument can be specified equivalently as:
51 | lh_robust(y ~ x + z, data = dat, linear_hypothesis = "z = 2x")
52 | lh_robust(y ~ x + z, data = dat, linear_hypothesis = "2*x +1*z")
53 | lh_robust(y ~ x + z, data = dat, linear_hypothesis = "z + 2x = 0")
54 | 
55 | # Also recovers other sorts of standard erorrs just as specified in \code{\link{lm_robust}}
56 | lh_robust(y ~ x + z, data = dat, linear_hypothesis = "z + 2x = 0", se_type = "classical")
57 | lh_robust(y ~ x + z, data = dat, linear_hypothesis = "z + 2x = 0", se_type =  "HC1")
58 | 
59 | #  Can tidy() main output and subcomponents in to a data.frame
60 | lhro <- lh_robust(y ~ x + z, data = dat, linear_hypothesis = "z + 2x = 0")
61 | tidy(lhro )
62 | tidy(lhro$lm_robust)
63 | tidy(lhro$lh)
64 | 
65 | # Can use summary() to get more statistics on the main output and subcomponents.
66 | summary(lhro)
67 | summary(lhro$lm_robust)
68 | summary(lhro$lh)
69 | 
70 | }
71 | 


--------------------------------------------------------------------------------
/man/lm_robust_fit.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/helper_lm_robust_fit.R
 3 | \name{lm_robust_fit}
 4 | \alias{lm_robust_fit}
 5 | \title{Internal method that creates linear fits}
 6 | \usage{
 7 | lm_robust_fit(
 8 |   y,
 9 |   X,
10 |   yoriginal = NULL,
11 |   Xoriginal = NULL,
12 |   weights,
13 |   cluster,
14 |   fixed_effects = NULL,
15 |   ci = TRUE,
16 |   se_type,
17 |   has_int,
18 |   alpha = 0.05,
19 |   return_vcov = TRUE,
20 |   return_fit = TRUE,
21 |   try_cholesky = FALSE,
22 |   iv_stage = list(0)
23 | )
24 | }
25 | \arguments{
26 | \item{y}{numeric outcome vector}
27 | 
28 | \item{X}{numeric design matrix}
29 | 
30 | \item{yoriginal}{numeric outcome vector, unprojected if there are fixed effects}
31 | 
32 | \item{Xoriginal}{numeric design matrix, unprojected if there are fixed effects. Any column named \code{"(Intercept)" will be dropped}}
33 | 
34 | \item{weights}{numeric weights vector}
35 | 
36 | \item{cluster}{numeric cluster vector}
37 | 
38 | \item{fixed_effects}{character matrix of fixed effect groups}
39 | 
40 | \item{ci}{boolean that when T returns confidence intervals and p-values}
41 | 
42 | \item{se_type}{character denoting which kind of SEs to return}
43 | 
44 | \item{has_int}{logical, whether the model has an intercept, used for \eqn{R^2}}
45 | 
46 | \item{alpha}{numeric denoting the test size for confidence intervals}
47 | 
48 | \item{return_vcov}{logical, whether to return the vcov matrix for later usage}
49 | 
50 | \item{return_fit}{logical, whether to return fitted values}
51 | 
52 | \item{try_cholesky}{logical, whether to try using a cholesky decomposition to solve LS instead of a QR decomposition}
53 | 
54 | \item{iv_stage}{list of length two, the first element denotes the stage of 2SLS IV estimation, where 0 is used for OLS. The second element is only used for the second stage of 2SLS and has the first stage design matrix. For OLS, the default, \code{list(0)}, for the first stage of 2SLS \code{list(1)}, for second stage of 2SLS \code{list(2, first_stage_design_mat)}.}
55 | }
56 | \description{
57 | Internal method that creates linear fits
58 | }
59 | 


--------------------------------------------------------------------------------
/man/na.omit_detailed.data.frame.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/helper_na_omit_detailed.R
 3 | \name{na.omit_detailed.data.frame}
 4 | \alias{na.omit_detailed.data.frame}
 5 | \title{Extra logging on na.omit handler}
 6 | \usage{
 7 | na.omit_detailed.data.frame(object)
 8 | }
 9 | \arguments{
10 | \item{object}{a data.frame}
11 | }
12 | \value{
13 | a normal \code{omit} object, with the extra attribute \code{why_omit},
14 | which contains the leftmost column containing an NA for each row that was dropped, by
15 | column name, if any were dropped.
16 | }
17 | \description{
18 | Extra logging on na.omit handler
19 | }
20 | \seealso{
21 | \code{\link{na.omit}}
22 | }
23 | 


--------------------------------------------------------------------------------
/man/permutations_to_condition_pr_mat.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/helper_condition_pr_matrix.R
 3 | \name{permutations_to_condition_pr_mat}
 4 | \alias{permutations_to_condition_pr_mat}
 5 | \title{Builds condition probability matrices for Horvitz-Thompson estimation from
 6 | permutation matrix}
 7 | \usage{
 8 | permutations_to_condition_pr_mat(permutations)
 9 | }
10 | \arguments{
11 | \item{permutations}{A matrix where the rows are units and the columns are
12 | different treatment permutations; treated units must be represented with a
13 | 1 and control units with a 0}
14 | }
15 | \value{
16 | a numeric 2n*2n matrix of marginal and joint condition treatment
17 | probabilities to be passed to the \code{condition_pr_mat} argument of
18 | \code{\link{horvitz_thompson}}.
19 | }
20 | \description{
21 | Builds condition probability matrices for Horvitz-Thompson estimation from
22 | permutation matrix
23 | }
24 | \details{
25 | This function takes a matrix of permutations, for example from
26 | the \code{\link[randomizr]{obtain_permutation_matrix}} function in
27 | \pkg{randomizr} or through simulation and returns a 2n*2n matrix that can
28 | be used to fully specify the design for \code{\link{horvitz_thompson}}
29 | estimation. You can read more about these matrices in the documentation for
30 | the \code{\link{declaration_to_condition_pr_mat}} function.
31 | 
32 | This is done by passing this matrix to the \code{condition_pr_mat} argument
33 | of
34 | }
35 | \examples{
36 | 
37 | # Complete randomization
38 | perms <- replicate(1000, sample(rep(0:1, each = 50)))
39 | comp_pr_mat <- permutations_to_condition_pr_mat(perms)
40 | 
41 | # Arbitrary randomization
42 | possible_treats <- cbind(
43 |   c(1, 1, 0, 1, 0, 0, 0, 1, 1, 0),
44 |   c(0, 1, 1, 0, 1, 1, 0, 1, 0, 1),
45 |   c(1, 0, 1, 1, 1, 1, 1, 0, 0, 0)
46 | )
47 | arb_pr_mat <- permutations_to_condition_pr_mat(possible_treats)
48 | # Simulating a column to be realized treatment
49 | z <- possible_treats[, sample(ncol(possible_treats), size = 1)]
50 | y <- rnorm(nrow(possible_treats))
51 | horvitz_thompson(y ~ z, condition_pr_mat = arb_pr_mat)
52 | 
53 | }
54 | \seealso{
55 | \code{\link[randomizr]{declare_ra}},
56 | \code{\link{declaration_to_condition_pr_mat}}
57 | }
58 | 


--------------------------------------------------------------------------------
/man/predict.lm_robust.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/S3_predict.R
 3 | \name{predict.lm_robust}
 4 | \alias{predict.lm_robust}
 5 | \title{Predict method for \code{lm_robust} object}
 6 | \usage{
 7 | \method{predict}{lm_robust}(
 8 |   object,
 9 |   newdata,
10 |   se.fit = FALSE,
11 |   interval = c("none", "confidence", "prediction"),
12 |   alpha = 0.05,
13 |   na.action = na.pass,
14 |   pred.var = NULL,
15 |   weights,
16 |   ...
17 | )
18 | }
19 | \arguments{
20 | \item{object}{an object of class 'lm_robust'}
21 | 
22 | \item{newdata}{a data frame in which to look for variables with which to predict}
23 | 
24 | \item{se.fit}{logical. Whether standard errors are required, default = FALSE}
25 | 
26 | \item{interval}{type of interval calculation. Can be abbreviated, default = none}
27 | 
28 | \item{alpha}{numeric denoting the test size for confidence intervals}
29 | 
30 | \item{na.action}{function determining what should be done with missing
31 | values in newdata. The default is to predict NA.}
32 | 
33 | \item{pred.var}{the variance(s) for future observations to be assumed for
34 | prediction intervals.}
35 | 
36 | \item{weights}{variance weights for prediction. This can be a numeric
37 | vector or a bare (unquoted) name of the weights variable in the supplied
38 | newdata.}
39 | 
40 | \item{...}{other arguments, unused}
41 | }
42 | \description{
43 | Predict method for \code{lm_robust} object
44 | }
45 | \details{
46 | Produces predicted values, obtained by evaluating the regression
47 | function in the frame \code{newdata} for fits from \code{lm_robust} and
48 | \code{lm_lin}. If the logical se.fit is TRUE, standard errors of the
49 | predictions are calculated. Setting intervals specifies computation of
50 | confidence or prediction (tolerance) intervals at the specified level,
51 | sometimes referred to as narrow vs. wide intervals.
52 | 
53 | The equation used for the standard error of a prediction given a row of
54 | data \eqn{x} is:
55 | 
56 | \eqn{\sqrt(x \Sigma x')},
57 | 
58 | where \eqn{\Sigma} is the estimated variance-covariance matrix from
59 | \code{lm_robust}.
60 | 
61 | The prediction intervals are for a single observation at each case in
62 | \code{newdata} with error variance(s) \code{pred.var}. The the default is to assume
63 | that future observations have the same error variance as those used for
64 | fitting, which is gotten from the fit \code{\link{lm_robust}} object. If
65 | weights is supplied, the inverse of this is used as a scale factor. If the
66 | fit was weighted, the default is to assume constant prediction variance,
67 | with a warning.
68 | }
69 | \examples{
70 | 
71 | # Set seed
72 | set.seed(42)
73 | 
74 | # Simulate data
75 | n <- 10
76 | dat <- data.frame(y = rnorm(n), x = rnorm(n))
77 | 
78 | # Fit lm
79 | lm_out <- lm_robust(y ~ x, data = dat)
80 | # Get predicted fits
81 | fits <- predict(lm_out, newdata = dat)
82 | # With standard errors and confidence intervals
83 | fits <- predict(lm_out, newdata = dat, se.fit = TRUE, interval = "confidence")
84 | 
85 | # Use new data as well
86 | new_dat <- data.frame(x = runif(n, 5, 8))
87 | predict(lm_out, newdata = new_dat)
88 | 
89 | # You can also supply custom variance weights for prediction intervals
90 | new_dat$w <- runif(n)
91 | predict(lm_out, newdata = new_dat, weights = w, interval = "prediction")
92 | 
93 | }
94 | 


--------------------------------------------------------------------------------
/man/reexports.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/S3_glance.R, R/S3_tidy.R
 3 | \docType{import}
 4 | \name{reexports}
 5 | \alias{reexports}
 6 | \alias{glance}
 7 | \alias{tidy}
 8 | \title{Objects exported from other packages}
 9 | \keyword{internal}
10 | \description{
11 | These objects are imported from other packages. Follow the links
12 | below to see their documentation.
13 | 
14 | \describe{
15 |   \item{generics}{\code{\link[generics]{glance}}, \code{\link[generics]{tidy}}}
16 | }}
17 | 
18 | 


--------------------------------------------------------------------------------
/man/starprep.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/helper_starprep.R
 3 | \name{starprep}
 4 | \alias{starprep}
 5 | \title{Prepare model fits for stargazer}
 6 | \usage{
 7 | starprep(
 8 |   ...,
 9 |   stat = c("std.error", "statistic", "p.value", "ci", "df"),
10 |   se_type = NULL,
11 |   clusters = NULL,
12 |   alpha = 0.05
13 | )
14 | }
15 | \arguments{
16 | \item{...}{a list of lm_robust or lm objects}
17 | 
18 | \item{stat}{either "std.error" (the default), "statistic" (the t-statistic), "p.value", "ci", or "df"}
19 | 
20 | \item{se_type}{(optional) if any of the objects are lm objects, what standard
21 | errors should be used. Must only be one type and will be used for all lm
22 | objects passed to starprep. See \code{commarobust} for more.}
23 | 
24 | \item{clusters}{(optional) if any of the objects are lm objects, what clusters
25 | should be used, if clusters should be used. Must only be one vector and will
26 | be used for all lm objects passed to starprep. See \code{commarobust} for more.}
27 | 
28 | \item{alpha}{(optional) if any of the objects are lm objects, what significance level
29 | should be used for the p-values or confidence intervals}
30 | }
31 | \value{
32 | a list of vectors of extracted statistics for stargazers
33 | }
34 | \description{
35 | Prepare model fits for stargazer
36 | }
37 | \details{
38 | Used to help extract statistics from lists of model fits for stargazer.
39 | Prefers lm_robust objects, but because \code{stargazer} does not work with \code{lm_robust}
40 | objects, \code{starprep} can also take \code{lm} objects and calls \code{commarobust} to get
41 | the preferred, robust statistics.
42 | }
43 | \examples{
44 | 
45 | library(stargazer)
46 | 
47 | lm1 <- lm(mpg ~ hp, data = mtcars)
48 | lm2 <- lm(mpg ~ hp + wt, data = mtcars)
49 | 
50 | # Use default "HC2" standard errors
51 | stargazer(lm1, lm2,
52 |           se = starprep(lm1, lm2),
53 |           p = starprep(lm1, lm2, stat = "p.value"),
54 |           omit.stat = "f")
55 | # NB: We remove the F-stat because stargazer only can use original F-stat
56 | # which uses classical SEs
57 | 
58 | # Use default "CR2" standard errors with clusters
59 | stargazer(lm1, lm2,
60 |           se = starprep(lm1, lm2, clusters = mtcars$carb),
61 |           p = starprep(lm1, lm2, clusters = mtcars$carb, stat = "p.value"),
62 |           omit.stat = "f")
63 | 
64 | # Can also specify significance levels and different standard errors
65 | stargazer(lm1, lm2,
66 |           ci.custom = starprep(lm1, lm2, se_type = "HC3", alpha = 0.1, stat = "ci"),
67 |           omit.stat = "f")
68 | 
69 | }
70 | 


--------------------------------------------------------------------------------
/src/Makevars:
--------------------------------------------------------------------------------
1 | PKG_LIBS = $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS)
2 | 


--------------------------------------------------------------------------------
/src/Makevars.win:
--------------------------------------------------------------------------------
1 | 
2 | PKG_LIBS = $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS)
3 | 


--------------------------------------------------------------------------------
/src/RcppExports.cpp:
--------------------------------------------------------------------------------
  1 | // Generated by using Rcpp::compileAttributes() -> do not edit by hand
  2 | // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
  3 | 
  4 | #include <RcppEigen.h>
  5 | #include <Rcpp.h>
  6 | 
  7 | using namespace Rcpp;
  8 | 
  9 | #ifdef RCPP_USE_GLOBAL_ROSTREAM
 10 | Rcpp::Rostream<true>&  Rcpp::Rcout = Rcpp::Rcpp_cout_get();
 11 | Rcpp::Rostream<false>& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get();
 12 | #endif
 13 | 
 14 | // ht_covar_partial
 15 | double ht_covar_partial(const Eigen::VectorXd& y1, const Eigen::VectorXd& y0, const Eigen::MatrixXd& p10, const Eigen::VectorXd& p1, const Eigen::VectorXd& p0);
 16 | RcppExport SEXP _estimatr_ht_covar_partial(SEXP y1SEXP, SEXP y0SEXP, SEXP p10SEXP, SEXP p1SEXP, SEXP p0SEXP) {
 17 | BEGIN_RCPP
 18 |     Rcpp::RObject rcpp_result_gen;
 19 |     Rcpp::RNGScope rcpp_rngScope_gen;
 20 |     Rcpp::traits::input_parameter< const Eigen::VectorXd& >::type y1(y1SEXP);
 21 |     Rcpp::traits::input_parameter< const Eigen::VectorXd& >::type y0(y0SEXP);
 22 |     Rcpp::traits::input_parameter< const Eigen::MatrixXd& >::type p10(p10SEXP);
 23 |     Rcpp::traits::input_parameter< const Eigen::VectorXd& >::type p1(p1SEXP);
 24 |     Rcpp::traits::input_parameter< const Eigen::VectorXd& >::type p0(p0SEXP);
 25 |     rcpp_result_gen = Rcpp::wrap(ht_covar_partial(y1, y0, p10, p1, p0));
 26 |     return rcpp_result_gen;
 27 | END_RCPP
 28 | }
 29 | // ht_var_partial
 30 | double ht_var_partial(const Eigen::VectorXd& y, const Eigen::MatrixXd& p);
 31 | RcppExport SEXP _estimatr_ht_var_partial(SEXP ySEXP, SEXP pSEXP) {
 32 | BEGIN_RCPP
 33 |     Rcpp::RObject rcpp_result_gen;
 34 |     Rcpp::RNGScope rcpp_rngScope_gen;
 35 |     Rcpp::traits::input_parameter< const Eigen::VectorXd& >::type y(ySEXP);
 36 |     Rcpp::traits::input_parameter< const Eigen::MatrixXd& >::type p(pSEXP);
 37 |     rcpp_result_gen = Rcpp::wrap(ht_var_partial(y, p));
 38 |     return rcpp_result_gen;
 39 | END_RCPP
 40 | }
 41 | // demeanMat2
 42 | Eigen::ArrayXXd demeanMat2(const Eigen::MatrixXd& what, const Rcpp::IntegerMatrix& fes, const Rcpp::NumericVector& weights, const int& start_col, const double& eps);
 43 | RcppExport SEXP _estimatr_demeanMat2(SEXP whatSEXP, SEXP fesSEXP, SEXP weightsSEXP, SEXP start_colSEXP, SEXP epsSEXP) {
 44 | BEGIN_RCPP
 45 |     Rcpp::RObject rcpp_result_gen;
 46 |     Rcpp::RNGScope rcpp_rngScope_gen;
 47 |     Rcpp::traits::input_parameter< const Eigen::MatrixXd& >::type what(whatSEXP);
 48 |     Rcpp::traits::input_parameter< const Rcpp::IntegerMatrix& >::type fes(fesSEXP);
 49 |     Rcpp::traits::input_parameter< const Rcpp::NumericVector& >::type weights(weightsSEXP);
 50 |     Rcpp::traits::input_parameter< const int& >::type start_col(start_colSEXP);
 51 |     Rcpp::traits::input_parameter< const double& >::type eps(epsSEXP);
 52 |     rcpp_result_gen = Rcpp::wrap(demeanMat2(what, fes, weights, start_col, eps));
 53 |     return rcpp_result_gen;
 54 | END_RCPP
 55 | }
 56 | // AtA
 57 | Eigen::MatrixXd AtA(const Eigen::MatrixXd& A);
 58 | RcppExport SEXP _estimatr_AtA(SEXP ASEXP) {
 59 | BEGIN_RCPP
 60 |     Rcpp::RObject rcpp_result_gen;
 61 |     Rcpp::RNGScope rcpp_rngScope_gen;
 62 |     Rcpp::traits::input_parameter< const Eigen::MatrixXd& >::type A(ASEXP);
 63 |     rcpp_result_gen = Rcpp::wrap(AtA(A));
 64 |     return rcpp_result_gen;
 65 | END_RCPP
 66 | }
 67 | // Kr
 68 | Eigen::MatrixXd Kr(const Eigen::MatrixXd& A, const Eigen::MatrixXd& B);
 69 | RcppExport SEXP _estimatr_Kr(SEXP ASEXP, SEXP BSEXP) {
 70 | BEGIN_RCPP
 71 |     Rcpp::RObject rcpp_result_gen;
 72 |     Rcpp::RNGScope rcpp_rngScope_gen;
 73 |     Rcpp::traits::input_parameter< const Eigen::MatrixXd& >::type A(ASEXP);
 74 |     Rcpp::traits::input_parameter< const Eigen::MatrixXd& >::type B(BSEXP);
 75 |     rcpp_result_gen = Rcpp::wrap(Kr(A, B));
 76 |     return rcpp_result_gen;
 77 | END_RCPP
 78 | }
 79 | // lm_solver
 80 | List lm_solver(const Eigen::Map<Eigen::MatrixXd>& X, const Eigen::Map<Eigen::MatrixXd>& y, const bool& try_cholesky);
 81 | RcppExport SEXP _estimatr_lm_solver(SEXP XSEXP, SEXP ySEXP, SEXP try_choleskySEXP) {
 82 | BEGIN_RCPP
 83 |     Rcpp::RObject rcpp_result_gen;
 84 |     Rcpp::RNGScope rcpp_rngScope_gen;
 85 |     Rcpp::traits::input_parameter< const Eigen::Map<Eigen::MatrixXd>& >::type X(XSEXP);
 86 |     Rcpp::traits::input_parameter< const Eigen::Map<Eigen::MatrixXd>& >::type y(ySEXP);
 87 |     Rcpp::traits::input_parameter< const bool& >::type try_cholesky(try_choleskySEXP);
 88 |     rcpp_result_gen = Rcpp::wrap(lm_solver(X, y, try_cholesky));
 89 |     return rcpp_result_gen;
 90 | END_RCPP
 91 | }
 92 | // lm_variance
 93 | List lm_variance(Eigen::Map<Eigen::MatrixXd>& X, const Rcpp::Nullable<Rcpp::NumericMatrix>& Xunweighted, const Eigen::Map<Eigen::MatrixXd>& XtX_inv, const Eigen::Map<Eigen::MatrixXd>& ei, const double weight_mean, const Rcpp::Nullable<Rcpp::IntegerVector>& cluster, const int& J, const bool& ci, const String se_type, const std::vector<bool>& which_covs, const int& fe_rank);
 94 | RcppExport SEXP _estimatr_lm_variance(SEXP XSEXP, SEXP XunweightedSEXP, SEXP XtX_invSEXP, SEXP eiSEXP, SEXP weight_meanSEXP, SEXP clusterSEXP, SEXP JSEXP, SEXP ciSEXP, SEXP se_typeSEXP, SEXP which_covsSEXP, SEXP fe_rankSEXP) {
 95 | BEGIN_RCPP
 96 |     Rcpp::RObject rcpp_result_gen;
 97 |     Rcpp::RNGScope rcpp_rngScope_gen;
 98 |     Rcpp::traits::input_parameter< Eigen::Map<Eigen::MatrixXd>& >::type X(XSEXP);
 99 |     Rcpp::traits::input_parameter< const Rcpp::Nullable<Rcpp::NumericMatrix>& >::type Xunweighted(XunweightedSEXP);
100 |     Rcpp::traits::input_parameter< const Eigen::Map<Eigen::MatrixXd>& >::type XtX_inv(XtX_invSEXP);
101 |     Rcpp::traits::input_parameter< const Eigen::Map<Eigen::MatrixXd>& >::type ei(eiSEXP);
102 |     Rcpp::traits::input_parameter< const double >::type weight_mean(weight_meanSEXP);
103 |     Rcpp::traits::input_parameter< const Rcpp::Nullable<Rcpp::IntegerVector>& >::type cluster(clusterSEXP);
104 |     Rcpp::traits::input_parameter< const int& >::type J(JSEXP);
105 |     Rcpp::traits::input_parameter< const bool& >::type ci(ciSEXP);
106 |     Rcpp::traits::input_parameter< const String >::type se_type(se_typeSEXP);
107 |     Rcpp::traits::input_parameter< const std::vector<bool>& >::type which_covs(which_covsSEXP);
108 |     Rcpp::traits::input_parameter< const int& >::type fe_rank(fe_rankSEXP);
109 |     rcpp_result_gen = Rcpp::wrap(lm_variance(X, Xunweighted, XtX_inv, ei, weight_mean, cluster, J, ci, se_type, which_covs, fe_rank));
110 |     return rcpp_result_gen;
111 | END_RCPP
112 | }
113 | // naomitwhy
114 | DataFrame naomitwhy(DataFrame df, Function recursive_subset);
115 | RcppExport SEXP _estimatr_naomitwhy(SEXP dfSEXP, SEXP recursive_subsetSEXP) {
116 | BEGIN_RCPP
117 |     Rcpp::RObject rcpp_result_gen;
118 |     Rcpp::RNGScope rcpp_rngScope_gen;
119 |     Rcpp::traits::input_parameter< DataFrame >::type df(dfSEXP);
120 |     Rcpp::traits::input_parameter< Function >::type recursive_subset(recursive_subsetSEXP);
121 |     rcpp_result_gen = Rcpp::wrap(naomitwhy(df, recursive_subset));
122 |     return rcpp_result_gen;
123 | END_RCPP
124 | }
125 | 
126 | static const R_CallMethodDef CallEntries[] = {
127 |     {"_estimatr_ht_covar_partial", (DL_FUNC) &_estimatr_ht_covar_partial, 5},
128 |     {"_estimatr_ht_var_partial", (DL_FUNC) &_estimatr_ht_var_partial, 2},
129 |     {"_estimatr_demeanMat2", (DL_FUNC) &_estimatr_demeanMat2, 5},
130 |     {"_estimatr_AtA", (DL_FUNC) &_estimatr_AtA, 1},
131 |     {"_estimatr_Kr", (DL_FUNC) &_estimatr_Kr, 2},
132 |     {"_estimatr_lm_solver", (DL_FUNC) &_estimatr_lm_solver, 3},
133 |     {"_estimatr_lm_variance", (DL_FUNC) &_estimatr_lm_variance, 11},
134 |     {"_estimatr_naomitwhy", (DL_FUNC) &_estimatr_naomitwhy, 2},
135 |     {NULL, NULL, 0}
136 | };
137 | 
138 | RcppExport void R_init_estimatr(DllInfo *dll) {
139 |     R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
140 |     R_useDynamicSymbols(dll, FALSE);
141 | }
142 | 


--------------------------------------------------------------------------------
/src/horvitz_thompson_variance.cpp:
--------------------------------------------------------------------------------
 1 | // [[Rcpp::depends(RcppEigen)]]
 2 | // [[Rcpp::plugins(cpp11)]]
 3 | 
 4 | #include <RcppEigen.h>
 5 | using namespace Rcpp;
 6 | 
 7 | // These functions help compute the variance for the Horvitz-Thompson estimators
 8 | // TODO use symmetry and matrices to improve speed
 9 | 
10 | // [[Rcpp::export]]
11 | double ht_covar_partial(const Eigen::VectorXd & y1,
12 |                         const Eigen::VectorXd & y0,
13 |                         const Eigen::MatrixXd & p10,
14 |                         const Eigen::VectorXd & p1,
15 |                         const Eigen::VectorXd & p0) {
16 |   double cov_total = 0.0;
17 | 
18 |   for (int i = 0; i < y1.size(); ++i) {
19 |     for(int j = 0; j < y0.size(); ++j) {
20 |       if(p10(i, j) == 0) {
21 |         cov_total += y1(i) * y0(j) * (p10(i, j) - p1(i) * p0(j));
22 |       } else {
23 |         cov_total += y1(i) * y0(j) * (p10(i, j) - p1(i) * p0(j)) / p10(i, j);
24 |       }
25 |     }
26 |   }
27 | 
28 |   return cov_total;
29 | }
30 | 
31 | // [[Rcpp::export]]
32 | double ht_var_partial(const Eigen::VectorXd & y,
33 |                       const Eigen::MatrixXd & p) {
34 |   double var_total = 0.0;
35 | 
36 |   for (int i = 0; i < y.size(); ++i) {
37 |     for(int j = 0; j < y.size(); ++j) {
38 |       if(i != j) {
39 |         if (p(i, j) == 0) {
40 |           var_total += y(i) * y(j) * (p(i, j) - p(i,i) * p(j,j)) +
41 |             std::pow(y(i), 2) * p(i, i) / 2.0 + std::pow(y(j), 2) * p(j, j) / 2.0;
42 |         } else {
43 |           var_total += y(i) * y(j) * (p(i, j) - p(i,i) * p(j,j)) / p(i, j);
44 |         }
45 | 
46 |       }
47 |     }
48 |   }
49 | 
50 |   return var_total;
51 | }
52 | 


--------------------------------------------------------------------------------
/src/naomit.cpp:
--------------------------------------------------------------------------------
  1 | // [[Rcpp::depends(RcppEigen)]]
  2 | // [[Rcpp::plugins(cpp11)]]
  3 | 
  4 | #include <RcppEigen.h>
  5 | using namespace Rcpp;
  6 | 
  7 | 
  8 | template <int RTYPE>
  9 | Vector<RTYPE> generic_logical_subset_impl( Vector<RTYPE> xin, LogicalVector w){
 10 |   return xin[w] ;
 11 | }
 12 | 
 13 | SEXP generic_logical_subset( SEXP xin , LogicalVector w){
 14 |   RCPP_RETURN_VECTOR(generic_logical_subset_impl, xin, w) ;
 15 | }
 16 | 
 17 | // [[Rcpp::export]]
 18 | DataFrame naomitwhy(DataFrame df, Function recursive_subset) {
 19 |   int m = df.nrow();
 20 |   int n = df.ncol();
 21 | 
 22 |   Function isna("is.na");
 23 | 
 24 |   CharacterVector df_names = df.names();
 25 | 
 26 |   LogicalVector omit = LogicalVector(m);
 27 | 
 28 |   int omit_count = 0;
 29 | 
 30 |   List why_omit(n);
 31 |   why_omit.names() = df_names;
 32 |   LogicalVector why_omit_idx(n);
 33 | 
 34 | 
 35 |   for (int j = 0; j < n; j++) {
 36 | 
 37 |     std::vector<int> why_omit_j;
 38 | 
 39 |     LogicalVector v_isna = isna(df[j]);
 40 | 
 41 |     for(int ii = m; ii < LENGTH(v_isna); ){
 42 |       for(int i = 0; i < m; i++, ii++)
 43 |         v_isna[i] |= v_isna[ii];
 44 |     }
 45 | 
 46 |     for(int i = 0; i < m; i++){
 47 |       if(v_isna[i]){
 48 |         if(!omit[i]){
 49 |           why_omit_j.push_back(i + 1);
 50 |         }
 51 |         omit[i] = true;
 52 |       };
 53 |     }
 54 | 
 55 |     if(why_omit_j.size() > 0){
 56 |       why_omit[j] = wrap(why_omit_j);
 57 |       why_omit_idx[j] = true;
 58 |       omit_count += why_omit_j.size();
 59 |     }
 60 |   }
 61 | 
 62 |   if(omit_count == 0){ return(df); }
 63 | 
 64 |   IntegerVector omit_idx = IntegerVector(omit_count);
 65 |   for(int i = 0, ii=0; ii < omit_count; i++){
 66 |     if(omit[i]) omit_idx[ii++] = i+1;
 67 |   }
 68 | 
 69 |   CharacterVector rownames = df.attr("row.names");
 70 |   omit_idx.attr("names") = rownames[omit];
 71 | 
 72 |   omit_idx.attr("why_omit") = why_omit[why_omit_idx];
 73 |   omit_idx.attr("class") = CharacterVector::create("omit", "detailed");
 74 | 
 75 |   omit = !omit;
 76 | 
 77 |   List out(n);
 78 | 
 79 |   for(int i = 0; i < n; i++){
 80 |     SEXP dfi = df(i);
 81 |     if(LENGTH(dfi) == m){
 82 |       out[i] = generic_logical_subset(dfi, omit);
 83 |     } else {
 84 |       out[i] = recursive_subset(dfi, omit);
 85 |     }
 86 |   }
 87 | 
 88 |   out.names() = df_names;
 89 |   out.attr("row.names") = rownames[omit];
 90 |   out.attr("na.action") = omit_idx;
 91 |   out.attr("class") = df.attr("class");
 92 | 
 93 |   return(out);
 94 | }
 95 | 
 96 | 
 97 |   // require(microbenchmark)
 98 |   // df <- expand.grid(x=c(1:100, NA), y=c(1:5, NA), z=c(1:8, NA), q=c(NA,2:5))
 99 |   // df2 <- na.omit(df)
100 |   // microbenchmark(stock=na.omit(df), ours=estimatr:::na.omit_detailed.data.frame(df), unit="ms")
101 |   // microbenchmark(stock=na.omit(df2), ours=estimatr:::na.omit_detailed.data.frame(df2), unit="ms")
102 | 
103 |   // df <- rbind(df, df2, df)
104 |   // df2 <- rbind(df2, df2, df2)
105 |   // microbenchmark(stock=na.omit(df), ours=estimatr:::na.omit_detailed.data.frame(df), unit="ms")
106 |   // microbenchmark(stock=na.omit(df2), ours=estimatr:::na.omit_detailed.data.frame(df2), unit="ms")
107 | 
108 |   // df <- cbind(df, df,df)
109 |   // df2 <- cbind(df2, df2, df2)
110 |   // microbenchmark(stock=na.omit(df), ours=estimatr:::na.omit_detailed.data.frame(df), unit="ms")
111 |   // microbenchmark(stock=na.omit(df2), ours=estimatr:::na.omit_detailed.data.frame(df2), unit="ms")
112 | 
113 |   // sleep[c("sleep", "foo")] = list(sleep, matrix(1:40, 20))
114 |   // sleep[cbind(c(1,5,9), c(2,1,3))] <- NA
115 |   // sleep$sleep[cbind(1+c(1,5,9), c(2,1,3))] <- NA
116 |   // sleep$foo[12,1] <- NA
117 |   // attributes(estimatr:::na.omit_detailed.data.frame(sleep))
118 | 


--------------------------------------------------------------------------------
/tests/sleep.R:
--------------------------------------------------------------------------------
1 | library(estimatr)
2 | lm_robust(extra~group, sleep)
3 | 


--------------------------------------------------------------------------------
/tests/sleep.Rout.save:
--------------------------------------------------------------------------------
 1 | 
 2 | R version 3.5.0 (2018-04-23) -- "Joy in Playing"
 3 | Copyright (C) 2018 The R Foundation for Statistical Computing
 4 | Platform: x86_64-apple-darwin15.6.0 (64-bit)
 5 | 
 6 | R is free software and comes with ABSOLUTELY NO WARRANTY.
 7 | You are welcome to redistribute it under certain conditions.
 8 | Type 'license()' or 'licence()' for distribution details.
 9 | 
10 | R is a collaborative project with many contributors.
11 | Type 'contributors()' for more information and
12 | 'citation()' on how to cite R or R packages in publications.
13 | 
14 | Type 'demo()' for some demos, 'help()' for on-line help, or
15 | 'help.start()' for an HTML browser interface to help.
16 | Type 'q()' to quit R.
17 | 
18 | > library(estimatr)
19 | > lm_robust(extra~group, sleep)
20 |             Estimate Std. Error  t value   Pr(>|t|)   CI Lower CI Upper DF
21 | (Intercept)     0.75  0.5657345 1.325710 0.20151554 -0.4385641 1.938564 18
22 | group2          1.58  0.8490910 1.860813 0.07918671 -0.2038740 3.363874 18
23 | > 
24 | > proc.time()
25 |    user  system elapsed 
26 |   0.782   0.085   0.851 
27 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(estimatr)
3 | 
4 | test_check("estimatr")
5 | 


--------------------------------------------------------------------------------
/tests/testthat/helper-lm-robust-se.R:
--------------------------------------------------------------------------------
  1 | ## BMlmSE.R implements Bell-McCaffrey standard errors
  2 | ## This code is taken from Michal Kolesar
  3 | ## https://github.com/kolesarm/Robust-Small-Sample-Standard-Errors
  4 | ## Only changed the name of one function, df
  5 | 
  6 | #'  Compute the inverse square root of a symmetric matrix
  7 | #' @param A matrix
  8 | MatSqrtInverse <- function(A) {
  9 |   ei <- eigen(A, symmetric = TRUE)
 10 | 
 11 |   if (min(ei$values) <= 0) {
 12 |     warning("Gram matrix doesn't appear to be positive definite")
 13 |   }
 14 | 
 15 |   d <- pmax(ei$values, 0)
 16 |   d2 <- 1 / sqrt(d)
 17 |   d2[d == 0] <- 0
 18 |   ## diag(d2) is d2 x d2 identity if d2 is scalar, instead we want 1x1 matrix
 19 |   ei$vectors %*% (if (length(d2) == 1) d2 else diag(d2)) %*% t(ei$vectors)
 20 | }
 21 | 
 22 | #' Compute Bell-McCaffrey Standard Errors
 23 | #' @param model Fitted model returned by the \code{lm} function
 24 | #' @param clustervar Factor variable that defines clusters. If \code{NULL} (or
 25 | #'     not supplied), the command computes heteroscedasticity-robust standard
 26 | #'     errors, rather than cluster-robust standard errors.
 27 | #' @param ell A vector of the same length as the dimension of covariates,
 28 | #'     specifying which linear combination \eqn{\ell'\beta} of coefficients
 29 | #'     \eqn{\beta} to compute. If \code{NULL}, compute standard errors for each
 30 | #'     regressor coefficient
 31 | #' @param IK Logical flag only relevant if cluster-robust standard errors are
 32 | #'     being computed. Specifies whether to compute the degrees-of-freedom
 33 | #'     adjustment using the Imbens-Kolesár method (if \code{TRUE}), or the
 34 | #'     Bell-McCaffrey method (if \code{FALSE})
 35 | #' @return Returns a list with the following components \describe{
 36 | #'
 37 | #' \item{vcov}{Variance-covariance matrix estimator. For the case without
 38 | #' clustering, it corresponds to the HC2 estimator (see MacKinnon and White,
 39 | #' 1985 and the reference manual for the \code{sandwich} package). For the case
 40 | #' with clustering, it corresponds to a generalization of the HC2 estimator,
 41 | #' called LZ2 in Imbens and Kolesár.}
 42 | #'
 43 | #' \item{dof}{Degrees-of-freedom adjustment}
 44 | #'
 45 | #' \item{se}{Standard error}
 46 | #'
 47 | #' \item{adj.se}{Adjusted standard errors. For \beta_j, they are defined as
 48 | #' \code{adj.se[j]=sqrt(vcov[j,j]se*qt(0.975,df=dof)} so that the Bell-McCaffrey
 49 | #' confidence intervals are given as \code{coefficients(fm)[j] +- 1.96* adj.se=}
 50 | #'
 51 | #' \item{se.Stata}{Square root of the cluster-robust variance estimator used in
 52 | #' STATA}
 53 | #'
 54 | #' }
 55 | #' @examples
 56 | #' ## No clustering:
 57 | #' set.seed(42)
 58 | #' x <- sin(1:10)
 59 | #' y <- rnorm(10)
 60 | #' fm <- lm(y~x)
 61 | #' BMlmSE(fm)
 62 | #' ## Clustering, defining the first six observations to be in cluster 1, the
 63 | #' #next two in cluster 2, and the last three in cluster three.
 64 | #' clustervar <- as.factor(c(rep(1, 6), rep(2, 2), rep(3, 2)))
 65 | #' BMlmSE(fm, clustervar)
 66 | BMlmSE <- function(model, clustervar=NULL, ell=NULL, IK=TRUE) {
 67 |   X <- model.matrix(model)
 68 |   sum.model <- summary.lm(model)
 69 |   n <- sum(sum.model$df[1:2])
 70 |   K <- model$rank
 71 |   XXinv <- sum.model$cov.unscaled # XX^{-1}
 72 |   u <- residuals(model)
 73 | 
 74 |   ## Compute DoF given G'*Omega*G without calling eigen as suggested by
 75 |   ## Winston Lin
 76 |   DoF <- function(GG)
 77 |     sum(diag(GG)) ^ 2 / sum(GG * GG)
 78 |   ## Previously:
 79 |   ## lam <- eigen(GG, only.values=TRUE)$values
 80 |   ## sum(lam)^2/sum(lam^2)
 81 | 
 82 |   ## no clustering
 83 |   if (is.null(clustervar)) {
 84 |     Vhat <- sandwich::vcovHC(model, type = "HC2")
 85 |     Vhat.Stata <- Vhat * NA
 86 | 
 87 |     M <- diag(n) - X %*% XXinv %*% t(X) # annihilator matrix
 88 |     ## G'*Omega*G
 89 |     GOG <- function(ell) {
 90 |       Xtilde <- drop(X %*% XXinv %*% ell / sqrt(diag(M)))
 91 |       crossprod(M * Xtilde)
 92 |     }
 93 |   } else {
 94 |     if (!is.factor(clustervar)) stop("'clustervar' must be a factor")
 95 | 
 96 |     ## Stata
 97 |     S <- length(levels(clustervar)) # number clusters
 98 |     uj <- apply(u * X, 2, function(x) tapply(x, clustervar, sum))
 99 |     Vhat.Stata <- S / (S - 1) * (n - 1) / (n - K) *
100 |       sandwich::sandwich(model, meat = crossprod(uj) / n)
101 | 
102 |     ## HC2
103 |     tXs <- function(s) {
104 |       Xs <- X[clustervar == s, , drop = FALSE]
105 |       MatSqrtInverse(diag(NROW(Xs)) - Xs %*% XXinv %*% t(Xs)) %*% Xs
106 |     }
107 |     tX <- lapply(levels(clustervar), tXs) # list of matrices
108 | 
109 |     tu <- split(u, clustervar)
110 |     tutX <- sapply(seq_along(tu), function(i) crossprod(tu[[i]], tX[[i]]))
111 |     Vhat <- sandwich::sandwich(model, meat = tcrossprod(tutX) / n)
112 | 
113 |     ## DOF adjustment
114 |     tHs <- function(s) {
115 |       Xs <- X[clustervar == s, , drop = FALSE]
116 |       index <- which(clustervar == s)
117 |       ss <- outer(rep(0, n), index) # n x ns matrix of 0
118 |       ss[cbind(index, 1:length(index))] <- 1
119 |       ss - X %*% XXinv %*% t(Xs)
120 |     }
121 |     tH <- lapply(levels(clustervar), tHs) # list of matrices
122 | 
123 |     Moulton <- function() {
124 |       ## Moulton estimates
125 |       ns <- tapply(u, clustervar, length)
126 |       ssr <- sum(u ^ 2)
127 |       rho <- max((sum(sapply(seq_along(tu), function(i)
128 |         sum(tu[[i]] %o% tu[[i]]))) - ssr) / (sum(ns ^ 2) - n), 0)
129 |       c(sig.eps = max(ssr / n - rho, 0), rho = rho)
130 |     }
131 | 
132 |     GOG <- function(ell) {
133 |       G <- sapply(
134 |         seq_along(tX),
135 |         function(i) tH[[i]] %*% tX[[i]] %*% XXinv %*% ell
136 |       )
137 |       GG <- crossprod(G)
138 | 
139 |       ## IK method
140 |       if (IK == TRUE) {
141 |         Gsums <- apply(
142 |           G, 2,
143 |           function(x) tapply(x, clustervar, sum)
144 |         ) # Z'*G
145 |         GG <- Moulton()[1] * GG + Moulton()[2] * crossprod(Gsums)
146 |       }
147 |       GG
148 |     }
149 |   }
150 | 
151 |   if (!is.null(ell)) {
152 |     se <- drop(sqrt(crossprod(ell, Vhat) %*% ell))
153 |     dof <- DoF(GOG(ell))
154 |     se.Stata <- drop(sqrt(crossprod(ell, Vhat.Stata) %*% ell))
155 |   } else {
156 |     se <- sqrt(diag(Vhat))
157 |     dof <- sapply(seq(K), function(k) DoF(GOG(diag(K)[, k])))
158 |     se.Stata <- sqrt(diag(Vhat.Stata))
159 |   }
160 |   names(dof) <- names(se)
161 | 
162 |   list(
163 |     vcov = Vhat, dof = dof, adj.se = se * qt(0.975, df = dof) / qnorm(0.975),
164 |     se = se, se.Stata = se.Stata
165 |   )
166 | }
167 | 
168 | 


--------------------------------------------------------------------------------
/tests/testthat/helper-return-cleaners.R:
--------------------------------------------------------------------------------
 1 | # This fn removes calls from function returns to make testing easier
 2 | rmcall <- function(obj) {
 3 |   if (!is.null(obj[["call"]])) {
 4 |     obj[["call"]] <- NULL
 5 |   }
 6 |   return(obj)
 7 | }
 8 | 
 9 | # Casts conditions as character objects for equality purposes
10 | condchr <- function(obj) {
11 |   obj[["condition2"]] <- as.character(obj[["condition2"]])
12 |   obj[["condition1"]] <- as.character(obj[["condition1"]])
13 | 
14 |   obj
15 | }
16 | 


--------------------------------------------------------------------------------
/tests/testthat/helper-se-types.R:
--------------------------------------------------------------------------------
1 | # se_types for various files
2 | 
3 | se_types <- c("classical", "HC0", "HC1", "HC2", "HC3")
4 | cr_se_types <- c("CR0", "stata", "CR2")
5 | 


--------------------------------------------------------------------------------
/tests/testthat/mtcars.csv:
--------------------------------------------------------------------------------
 1 | "mpg","cyl","disp","hp","drat","wt","qsec","vs","am","gear","carb"
 2 | 21,6,160,110,3.9,2.62,16.46,0,1,4,4
 3 | 21,6,160,110,3.9,2.875,17.02,0,1,4,4
 4 | 22.8,4,108,93,3.85,2.32,18.61,1,1,4,1
 5 | 21.4,6,258,110,3.08,3.215,19.44,1,0,3,1
 6 | 18.7,8,360,175,3.15,3.44,17.02,0,0,3,2
 7 | 18.1,6,225,105,2.76,3.46,20.22,1,0,3,1
 8 | 14.3,8,360,245,3.21,3.57,15.84,0,0,3,4
 9 | 24.4,4,146.7,62,3.69,3.19,20,1,0,4,2
10 | 22.8,4,140.8,95,3.92,3.15,22.9,1,0,4,2
11 | 19.2,6,167.6,123,3.92,3.44,18.3,1,0,4,4
12 | 17.8,6,167.6,123,3.92,3.44,18.9,1,0,4,4
13 | 16.4,8,275.8,180,3.07,4.07,17.4,0,0,3,3
14 | 17.3,8,275.8,180,3.07,3.73,17.6,0,0,3,3
15 | 15.2,8,275.8,180,3.07,3.78,18,0,0,3,3
16 | 10.4,8,472,205,2.93,5.25,17.98,0,0,3,4
17 | 10.4,8,460,215,3,5.424,17.82,0,0,3,4
18 | 14.7,8,440,230,3.23,5.345,17.42,0,0,3,4
19 | 32.4,4,78.7,66,4.08,2.2,19.47,1,1,4,1
20 | 30.4,4,75.7,52,4.93,1.615,18.52,1,1,4,2
21 | 33.9,4,71.1,65,4.22,1.835,19.9,1,1,4,1
22 | 21.5,4,120.1,97,3.7,2.465,20.01,1,0,3,1
23 | 15.5,8,318,150,2.76,3.52,16.87,0,0,3,2
24 | 15.2,8,304,150,3.15,3.435,17.3,0,0,3,2
25 | 13.3,8,350,245,3.73,3.84,15.41,0,0,3,4
26 | 19.2,8,400,175,3.08,3.845,17.05,0,0,3,2
27 | 27.3,4,79,66,4.08,1.935,18.9,1,1,4,1
28 | 26,4,120.3,91,4.43,2.14,16.7,0,1,5,2
29 | 30.4,4,95.1,113,3.77,1.513,16.9,1,1,5,2
30 | 15.8,8,351,264,4.22,3.17,14.5,0,1,5,4
31 | 19.7,6,145,175,3.62,2.77,15.5,0,1,5,6
32 | 15,8,301,335,3.54,3.57,14.6,0,1,5,8
33 | 21.4,4,121,109,4.11,2.78,18.6,1,1,4,2
34 | 


--------------------------------------------------------------------------------
/tests/testthat/run-stata-areg-models.do:
--------------------------------------------------------------------------------
 1 | // This file fits many models in stata and outputs the estimates for comparison with estimatr
 2 | 
 3 | clear all
 4 | import delimited mtcars.csv
 5 | 
 6 | gen w = drat / 5
 7 | 
 8 | file open outf using stata-fe-ests.txt, write r
 9 | 
10 | // xtset carb
11 | // xtreg mpg hp, fe
12 | 
13 | areg mpg hp, absorb(carb)
14 | mat V=e(V)
15 | file write outf _n "classical" _tab (V[1,1]) _tab (e(F))
16 | 
17 | areg mpg hp, absorb(carb) vce(robust)
18 | mat V=e(V)
19 | file write outf _n "HC1" _tab (V[1,1]) _tab (e(F))
20 | 
21 | areg mpg hp, absorb(carb) vce(cluster cyl)
22 | mat V=e(V)
23 | file write outf _n "stata_cl" _tab (V[1,1]) _tab (e(F))
24 | 
25 | areg mpg hp [aweight=w], absorb(carb)
26 | predict hii, hat
27 | mat V=e(V)
28 | file write outf _n "classicalw" _tab (V[1,1]) _tab (e(F))
29 | 
30 | areg mpg hp [aweight=w], absorb(carb) vce(robust)
31 | mat V=e(V)
32 | file write outf _n "HC1w" _tab (V[1,1]) _tab (e(F))
33 | 
34 | areg mpg hp [aweight=w], absorb(carb) vce(cluster cyl)
35 | mat V=e(V)
36 | file write outf _n "stata_clw" _tab (V[1,1]) _tab (e(F))
37 | 
38 | file close outf
39 | 


--------------------------------------------------------------------------------
/tests/testthat/run-stata-iv-models.do:
--------------------------------------------------------------------------------
 1 | // This file fits many models in stata and outputs the estimates for comparison with estimatr
 2 | 
 3 | clear all
 4 | import delimited mtcars.csv
 5 | 
 6 | gen w = drat / 5
 7 | 
 8 | file open outf using stata-iv-ests.txt, write r
 9 | 
10 | ivregress 2sls mpg (hp am = wt gear), small
11 | mat V=e(V)
12 | file write outf _n "classical" _tab (V[1,1])  _tab (V[2,2])  _tab (V[3,3]) _tab (e(F)) _tab (e(r2)) _tab (e(r2_a)) _tab (e(rmse))
13 | 
14 | ivregress 2sls mpg (hp am = wt gear), small rob
15 | mat V=e(V)
16 | file write outf _n "rob" _tab (V[1,1])  _tab (V[2,2])  _tab (V[3,3]) _tab (e(F)) _tab (e(r2)) _tab (e(r2_a)) _tab (e(rmse))
17 | 
18 | ivregress 2sls mpg (hp am = wt gear), small vce(cluster cyl)
19 | mat V=e(V)
20 | file write outf _n "cl" _tab (V[1,1])  _tab (V[2,2])  _tab (V[3,3]) _tab (e(F)) _tab (e(r2)) _tab (e(r2_a)) _tab (e(rmse))
21 | 
22 | ivregress 2sls mpg (hp am = wt gear) [aweight = w], small
23 | mat V=e(V)
24 | file write outf _n "classical_w" _tab (V[1,1])  _tab (V[2,2])  _tab (V[3,3]) _tab (e(F)) _tab (e(r2)) _tab (e(r2_a)) _tab (e(rmse))
25 | 
26 | ivregress 2sls mpg (hp am = wt gear) [aweight = w], small rob
27 | mat V=e(V)
28 | file write outf _n "rob_w" _tab (V[1,1])  _tab (V[2,2])  _tab (V[3,3]) _tab (e(F)) _tab (e(r2)) _tab (e(r2_a)) _tab (e(rmse))
29 | 
30 | ivregress 2sls mpg (hp am = wt gear) [aweight = w], small vce(cluster cyl)
31 | mat V=e(V)
32 | file write outf _n "cl_w" _tab (V[1,1])  _tab (V[2,2])  _tab (V[3,3]) _tab (e(F)) _tab (e(r2)) _tab (e(r2_a)) _tab (e(rmse))
33 | 
34 | file close outf
35 | 
36 | 
37 | cap file close outfdiag
38 | file open outfdiag using stata-iv-diagnostics.txt, write r
39 | 
40 | #delimit ;
41 | local formulae = `" 
42 | "(hp = wt)"
43 | "(hp am = wt gear)"
44 | "gear (hp = wt)"
45 | "gear (hp = wt am)"
46 | "' ;
47 | local options = `" 
48 | "small" 
49 | "rob" 
50 | "cluster(cyl)" 
51 | "small noconstant" 
52 | "rob noconstant" 
53 | "cluster(cyl) noconstant" 
54 | "' ;
55 | local weights = `"
56 | ""
57 | "[aweight = w]"
58 | "' ;
59 | #delimit cr
60 | 
61 | foreach f in `formulae' {
62 | 	display "`f'"
63 | 	foreach opt in `options' {
64 | 		foreach w in `weights' {
65 | 			ivregress 2sls mpg `f' `w', `opt'
66 | 			estat firststage, all
67 | 			mat singleresults=r(singleresults)
68 | 			local rows = rowsof(singleresults)
69 | 			forvalues i=1/`rows' {
70 | 				cap file write outfdiag "`f';`w';`opt';" "weak`i'" ";" (singleresults[`i',5]) ";" (singleresults[`i',6]) ";" (singleresults[`i',4]) ";" (singleresults[`i',7]) _n 
71 | 			}
72 | 			estat endogenous, forceweights
73 | 			if strpos("`opt'", "rob") > 0 | strpos("`opt'", "cluster") > 0 {
74 | 				file write outfdiag "`f';`w';`opt';" "endog" ";" (r(regFdf_n)) ";" (r(regFdf_d)) ";" (r(regF)) ";" (r(p_regF)) _n 
75 | 				cap estat overid, forceweights
76 | 				file write outfdiag "`f';`w';`opt';" "overid" ";" (r(df)) ";.;" (r(score)) ";" (r(p_score)) _n 
77 | 			} 
78 | 			else {
79 | 				file write outfdiag "`f';`w';`opt';" "endog" ";" (r(df)) ";" (r(wudf_r)) ";" (r(wu)) ";" (r(p_wu)) _n 
80 | 				cap estat overid, forceweights
81 | 				file write outfdiag "`f';`w';`opt';" "overid" ";" (r(df)) ";.;" (r(sargan)) ";" (r(p_sargan)) _n 
82 | 			}
83 | 		}
84 | 
85 | 	}
86 | 
87 | }
88 | 
89 | file close outfdiag
90 | 


--------------------------------------------------------------------------------
/tests/testthat/run-stata-models.do:
--------------------------------------------------------------------------------
 1 | // This file fits many models in stata and outputs the estimates for comparison with estimatr
 2 | 
 3 | clear all
 4 | import delimited mtcars.csv
 5 | 
 6 | gen w = drat / 5
 7 | 
 8 | file open outf using stata-ests.txt, write r
 9 | 
10 | reg mpg hp
11 | mat V=e(V)
12 | file write outf _n "classical" _tab (V[1,1]) _tab (V[2,2]) _tab (e(df_r)) _tab (e(F))
13 | 
14 | reg mpg hp, vce(robust)
15 | mat V=e(V)
16 | file write outf _n "HC1" _tab (V[1,1]) _tab (V[2,2]) _tab (e(df_r)) _tab (e(F))
17 | 
18 | reg mpg hp, vce(hc2)
19 | mat V=e(V)
20 | file write outf _n "HC2" _tab (V[1,1]) _tab (V[2,2]) _tab (e(df_r)) _tab (e(F))
21 | 
22 | reg mpg hp, vce(hc3)
23 | mat V=e(V)
24 | file write outf _n "HC3" _tab (V[1,1]) _tab (V[2,2]) _tab (e(df_r)) _tab (e(F))
25 | 
26 | reg mpg hp, vce(cluster cyl)
27 | mat V=e(V)
28 | file write outf _n "stata_cl" _tab (V[1,1]) _tab (V[2,2]) _tab (e(df_r)) _tab (e(F))
29 | 
30 | reg mpg hp [aweight=w]
31 | predict hii, hat
32 | mat V=e(V)
33 | file write outf _n "classicalw" _tab (V[1,1]) _tab (V[2,2]) _tab (e(df_r)) _tab (e(F))
34 | 
35 | reg mpg hp [aweight=w], vce(robust)
36 | mat V=e(V)
37 | file write outf _n "HC1w" _tab (V[1,1]) _tab (V[2,2]) _tab (e(df_r)) _tab (e(F))
38 | 
39 | reg mpg hp [aweight=w], vce(hc2)
40 | mat V=e(V)
41 | file write outf _n "HC2w" _tab (V[1,1]) _tab (V[2,2]) _tab (e(df_r)) _tab (e(F))
42 | 
43 | reg mpg hp [aweight=w], vce(hc3)
44 | mat V=e(V)
45 | file write outf _n "HC3w" _tab (V[1,1]) _tab (V[2,2]) _tab (e(df_r)) _tab (e(F))
46 | 
47 | reg mpg hp [aweight=w], vce(cluster cyl)
48 | mat V=e(V)
49 | file write outf _n "stata_clw" _tab (V[1,1]) _tab (V[2,2]) _tab (e(df_r)) _tab (e(F))
50 | 
51 | file close outf
52 | 


--------------------------------------------------------------------------------
/tests/testthat/stata-ests.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | classical	.0001024	2.669698	30	45.459797
 3 | HC1	.00018388	4.3123302	30	25.315348
 4 | HC2	.00021652	4.8093019	30	21.499286
 5 | HC3	.00027562	5.8084222	30	16.889294
 6 | stata_cl	.00034882	14.922475	2	13.345142
 7 | classicalw	.00009984	2.4918295	30	48.196413
 8 | HC1w	.00017632	4.0902569	30	27.29069
 9 | HC2w	.00020717	4.5180983	30	23.226908
10 | HC3w	.00026334	5.4069309	30	18.2733
11 | stata_clw	.00031337	13.155473	2	15.355597


--------------------------------------------------------------------------------
/tests/testthat/stata-fe-ests.txt:
--------------------------------------------------------------------------------
1 | 
2 | classical	.00024844	20.439199
3 | HC1	.00018026	28.169159
4 | stata_cl	.00021845	23.24499
5 | classicalw	.00023978	20.31818
6 | HC1w	.00019399	25.11446
7 | stata_clw	.00018777	25.946624


--------------------------------------------------------------------------------
/tests/testthat/stata-iv-ests.txt:
--------------------------------------------------------------------------------
1 | 
2 | classical	.0001602	2.2793852	5.2396632	44.808289	.75605596	.73923223	3.0776867
3 | rob	.00015781	2.2003453	4.7873499	41.366981	.75605596	.73923223	3.0776867
4 | cl	.00004096	.7950331	.31612954	108.37591	.75605596	.73923223	3.0776867
5 | classical_w	.00015649	2.3424049	5.2400158	46.736059	.76415188	.74788649	3.0971208
6 | rob_w	.00014704	2.0834808	4.5677404	45.410829	.76415188	.74788649	3.0971208
7 | cl_w	.00006651	.64892154	.54670656	81.595912	.76415188	.74788649	3.0971208


--------------------------------------------------------------------------------
/tests/testthat/test-arg-checking.R:
--------------------------------------------------------------------------------
 1 | context("Estimator - Arg checking fails as expected.")
 2 | 
 3 | test_that("#349 Early fail when formula is a string", {
 4 |   expect_error(
 5 |     estimatr::lm_robust("mpg~hp", data = mtcars, cluster = wt),
 6 |     "formula"
 7 |   )
 8 |   expect_length(
 9 |     estimatr::lm_robust(mpg~hp, data = mtcars, cluster = wt),
10 |     29
11 |   )
12 | })
13 | 


--------------------------------------------------------------------------------
/tests/testthat/test-condition1-condition2.R:
--------------------------------------------------------------------------------
  1 | context("Helper - Condition parsing for difference estimators")
  2 | 
  3 | 
  4 | test_that("Condition arguments behave as expected", {
  5 |   n <- 40
  6 |   dat <- data.frame(
  7 |     y = rnorm(n),
  8 |     bl = rep(1:5, each = 8),
  9 |     z = 1:4,
 10 |     ps = runif(n)
 11 |   )
 12 | 
 13 |   horvitz_thompson(y ~ z, data = dat, subset = z <= 2, condition_prs = ps)
 14 | 
 15 |   # Subsetting and just selecting two conditions
 16 |   expect_identical(
 17 |     horvitz_thompson(y ~ z, data = dat, subset = z <= 2, condition_prs = ps),
 18 |     horvitz_thompson(y ~ z, data = dat, condition1 = 1L, condition2 = 2L, condition_prs = ps)
 19 |   )
 20 | 
 21 |   expect_identical(
 22 |     difference_in_means(y ~ z, data = dat, subset = z <= 2),
 23 |     difference_in_means(y ~ z, data = dat, condition1 = 1L, condition2 = 2L)
 24 |   )
 25 | 
 26 |   expect_identical(
 27 |     difference_in_means(y ~ z, data = dat, subset = z <= 2, blocks = bl),
 28 |     difference_in_means(y ~ z, data = dat, condition1 = 1L, condition2 = 2L, blocks = bl)
 29 |   )
 30 | 
 31 |   # Subsetting and just selecting two conditions
 32 |   expect_identical(
 33 |     tidy(horvitz_thompson(
 34 |       y ~ z,
 35 |       data = dat,
 36 |       condition1 = 3,
 37 |       condition2 = 4,
 38 |       condition_prs = rep(0.5, nrow(dat))
 39 |     ))[c("estimate", "std.error")],
 40 |     tidy(horvitz_thompson(
 41 |       y ~ z,
 42 |       data = dat,
 43 |       condition1 = 4,
 44 |       condition2 = 3,
 45 |       condition_prs = rep(0.5, nrow(dat))
 46 |     ))[c("estimate", "std.error")] * c(-1, 1)
 47 |   )
 48 | 
 49 |   expect_identical(
 50 |     tidy(difference_in_means(
 51 |       y ~ z,
 52 |       data = dat,
 53 |       condition1 = 2,
 54 |       condition2 = 1
 55 |     ))[c("estimate", "std.error")],
 56 |     tidy(difference_in_means(
 57 |       y ~ z,
 58 |       data = dat,
 59 |       condition1 = 1,
 60 |       condition2 = 2
 61 |     ))[c("estimate", "std.error")] * c(-1, 1)
 62 |   )
 63 | 
 64 |   # Errors if not specifying both
 65 |   expect_error(
 66 |     horvitz_thompson(
 67 |       y ~ z,
 68 |       data = dat,
 69 |       condition1 = 4,
 70 |       condition_prs = ps
 71 |     ),
 72 |     "condition1"
 73 |   )
 74 |   expect_error(
 75 |     horvitz_thompson(
 76 |       y ~ z,
 77 |       data = dat,
 78 |       condition2 = 4,
 79 |       condition_prs = ps
 80 |     ),
 81 |     "condition1"
 82 |   )
 83 |   expect_error(
 84 |     horvitz_thompson(
 85 |       y ~ z,
 86 |       data = dat,
 87 |       condition_prs = ps
 88 |     ),
 89 |     "condition1"
 90 |   )
 91 | 
 92 |   expect_error(
 93 |     difference_in_means(
 94 |       y ~ z,
 95 |       data = dat,
 96 |       condition1 = 4
 97 |     ),
 98 |     "condition1"
 99 |   )
100 |   expect_error(
101 |     difference_in_means(
102 |       y ~ z,
103 |       data = dat,
104 |       condition2 = 4
105 |     ),
106 |     "condition1"
107 |   )
108 |   expect_error(
109 |     difference_in_means(
110 |       y ~ z,
111 |       data = dat
112 |     ),
113 |     "condition1"
114 |   )
115 | 
116 | 
117 |   # Specifying only one works with binary treatment
118 |   dat$z <- c("Treated", "Control")
119 |   expect_identical(
120 |     difference_in_means(
121 |       y ~ z,
122 |       data = dat,
123 |       condition1 = "Treated"
124 |     ),
125 |     difference_in_means(
126 |       y ~ z,
127 |       data = dat,
128 |       condition1 = "Treated",
129 |       condition2 = "Control"
130 |     )
131 |   )
132 | 
133 |   expect_identical(
134 |     horvitz_thompson(
135 |       y ~ z,
136 |       data = dat,
137 |       condition1 = "Treated"
138 |     ),
139 |     horvitz_thompson(
140 |       y ~ z,
141 |       data = dat,
142 |       condition1 = "Treated",
143 |       condition2 = "Control"
144 |     )
145 |   )
146 |   expect_identical(
147 |     difference_in_means(
148 |       y ~ z,
149 |       data = dat,
150 |       condition2 = "Treated"
151 |     ),
152 |     difference_in_means(
153 |       y ~ z,
154 |       data = dat,
155 |       condition2 = "Treated",
156 |       condition1 = "Control"
157 |     )
158 |   )
159 | 
160 |   expect_identical(
161 |     horvitz_thompson(
162 |       y ~ z,
163 |       data = dat,
164 |       condition2 = "Treated"
165 |     ),
166 |     horvitz_thompson(
167 |       y ~ z,
168 |       data = dat,
169 |       condition2 = "Treated",
170 |       condition1 = "Control"
171 |     )
172 |   )
173 | 
174 |   # Works with factor
175 |   dat$z <- factor(c("T", "C"))
176 |   # Must pass string!
177 |   difference_in_means(y ~ z, condition2 = "T", data = dat)
178 |   # Errors if not found
179 |   expect_error(
180 |     difference_in_means(
181 |       y ~ z,
182 |       condition2 = 1,
183 |       data = dat
184 |     ),
185 |     "`condition1` and `condition2` must be values found in the treatment"
186 |   )
187 | 
188 |   dat$z <- 1
189 |   expect_error(
190 |     difference_in_means(y ~ z, data = dat),
191 |     "Must have more than one value in treatment unless using Horvitz"
192 |   )
193 | })
194 | 


--------------------------------------------------------------------------------
/tests/testthat/test-iv-robust-fes.R:
--------------------------------------------------------------------------------
  1 | context("Estimator - iv_robust, fixed effects")
  2 | 
  3 | set.seed(43)
  4 | N <- 20
  5 | dat <- data.frame(
  6 |   Y = rnorm(N),
  7 |   X1 = rnorm(N),
  8 |   X2 = rnorm(N),
  9 |   Z = rbinom(N, 1, .5),
 10 |   B = factor(rep(1:2, times = c(8, 12))),
 11 |   B2 = factor(rep(1:4, times = c(3, 3, 4, 10))),
 12 |   cl = sample(1:4, size = N, replace = T),
 13 |   w = runif(N)
 14 | )
 15 | dat$Xdup <- dat$X
 16 | dat$Bdup <- dat$B
 17 | 
 18 | test_that("FE matches with multiple FEs and covars", {
 19 | 
 20 |   for (se_type in se_types) {
 21 |     ro <- iv_robust(Y ~ X1 + X2 + factor(B) + factor(B2) | Z + X2 + factor(B) + factor(B2), data = dat, se_type = se_type)
 22 |     rfo <- iv_robust(Y ~ X1 + X2 | Z + X2, fixed_effects = ~ B + B2, data = dat, se_type = se_type)
 23 | 
 24 | 
 25 |     expect_equivalent(
 26 |       tidy(ro)[ro$term %in% c("X1", "X2"), ],
 27 |       tidy(rfo)[rfo$term %in% c("X1", "X2"), ]
 28 |     )
 29 | 
 30 |     expect_equivalent(
 31 |       ro$fitted.values,
 32 |       rfo$fitted.values
 33 |     )
 34 | 
 35 |     expect_equal(
 36 |       ro[c("r.squared", "adj.r.squared")],
 37 |       rfo[c("r.squared", "adj.r.squared")]
 38 |     )
 39 | 
 40 |     # weights
 41 |     ro <- iv_robust(Y ~ X1 + X2 + factor(B) + factor(B2) | Z + X2 + factor(B) + factor(B2), data = dat, weights = w, se_type = se_type)
 42 |     rfo <- iv_robust(Y ~ X1 + X2 | Z + X2, fixed_effects = ~ B + B2, data = dat, weights = w, se_type = se_type)
 43 | 
 44 |     expect_equivalent(
 45 |       tidy(ro)[ro$term %in% c("X1", "X2"), ],
 46 |       tidy(rfo)[rfo$term %in% c("X1", "X2"), ]
 47 |     )
 48 | 
 49 |     expect_equivalent(
 50 |       ro$fitted.values,
 51 |       rfo$fitted.values
 52 |     )
 53 | 
 54 |     expect_equal(
 55 |       ro[c("r.squared", "adj.r.squared")],
 56 |       rfo[c("r.squared", "adj.r.squared")]
 57 |     )
 58 |   }
 59 | 
 60 |   for (se_type in cr_se_types) {
 61 |     ro <- iv_robust(Y ~ X1 + X2 + factor(B) + factor(B2) | Z + X2 + factor(B) + factor(B2), clusters = cl, data = dat, se_type = se_type)
 62 |     rfo <- iv_robust(Y ~ X1 + X2 | Z + X2, fixed_effects = ~ B + B2, clusters = cl, data = dat, se_type = se_type)
 63 | 
 64 |     expect_equivalent(
 65 |       tidy(ro)[ro$term %in% c("X1", "X2"), ],
 66 |       tidy(rfo)[rfo$term %in% c("X1", "X2"), ]
 67 |     )
 68 | 
 69 |     expect_equivalent(
 70 |       ro$fitted.values,
 71 |       rfo$fitted.values
 72 |     )
 73 | 
 74 |     expect_equal(
 75 |       ro[c("r.squared", "adj.r.squared")],
 76 |       rfo[c("r.squared", "adj.r.squared")]
 77 |     )
 78 | 
 79 |     # weights
 80 |     if (se_type %in% c("CR2", "CR3")) {
 81 |       expect_error(
 82 |         rfo <- iv_robust(Y ~ X1 + X2 | Z + X2, fixed_effects = ~ B + B2, clusters = cl, data = dat, weights = w, se_type = se_type),
 83 |         "Cannot use `fixed_effects` with weighted CR2"
 84 |       )
 85 |     } else {
 86 |       ro <- iv_robust(Y ~ X1 + X2 + factor(B) + factor(B2) | Z + X2 + factor(B) + factor(B2), clusters = cl, data = dat, weights = w, se_type = se_type)
 87 |       rfo <- iv_robust(Y ~ X1 + X2 | Z + X2, fixed_effects = ~ B + B2, clusters = cl, data = dat, weights = w, se_type = se_type)
 88 | 
 89 |       expect_equivalent(
 90 |         tidy(ro)[ro$term %in% c("X1", "X2"), ],
 91 |         tidy(rfo)[rfo$term %in% c("X1", "X2"), ]
 92 |       )
 93 | 
 94 |       expect_equivalent(
 95 |         ro$fitted.values,
 96 |         rfo$fitted.values
 97 |       )
 98 | 
 99 |       expect_equal(
100 |         ro[c("r.squared", "adj.r.squared")],
101 |         rfo[c("r.squared", "adj.r.squared")]
102 |       )
103 |     }
104 |   }
105 | })
106 | 
107 | test_that("IV FE warns about diagnostics", {
108 | 
109 |   expect_warning(
110 |     iv_robust(mpg ~ hp | wt, data = mtcars, fixed_effects = cyl, diagnostics = TRUE),
111 |     "Will not return `diagnostics` if `fixed_effects` are used."
112 |   )
113 | 
114 | })
115 | 


--------------------------------------------------------------------------------
/tests/testthat/test-lh-robust.R:
--------------------------------------------------------------------------------
  1 | context("Estimator - lh_robust")
  2 | set.seed(40)
  3 | N <- 40
  4 | dat <- data.frame(
  5 |   Y = rnorm(N),
  6 |   Y2 = rnorm(N),
  7 |   Z = rbinom(N, 1, .5),
  8 |   X = rnorm(N),
  9 |   B = factor(rep(1:2, times = c(8, 12))),
 10 |   cl = sample(1:4, size = N, replace = T),
 11 |   w = runif(N)
 12 | )
 13 | 
 14 | # se tests
 15 | test_that("lh_robust works with all se types", {
 16 |   skip_if_not_installed("car")
 17 |   for (se_type in se_types) {
 18 |     lhro <-
 19 |       tidy(
 20 |         lh_robust(
 21 |           mpg ~ cyl + disp,
 22 |           data = mtcars,
 23 |           linear_hypothesis = "cyl + disp = 0",
 24 |           se_type =  se_type
 25 |         )
 26 |       )
 27 |     lmro <-
 28 |       lm_robust(mpg ~ cyl + disp, data = mtcars, se_type = se_type)
 29 |     linHyp <-
 30 |       car::linearHypothesis(lmro,  hypothesis.matrix =  "cyl + disp = 0")
 31 | 
 32 |     expect_equal(lhro$std.error[lhro$term == "cyl + disp = 0"],
 33 |                  sqrt(as.numeric(attr(linHyp , "vcov"))))
 34 |   }
 35 | })
 36 | 
 37 | test_that("lh_robust with clusters works for all se_types ", {
 38 |   skip_if_not_installed("car")
 39 |   for (se_type in cr_se_types) {
 40 |     lhro <-
 41 |       tidy(
 42 |         lh_robust(
 43 |           Y ~ Z * X,
 44 |           data = dat,
 45 |           clusters = cl,
 46 |           linear_hypothesis = "Z + Z:X = 0",
 47 |           se_type = se_type
 48 |         )
 49 |       )
 50 |     lmro <-
 51 |       lm_robust(Y ~ Z * X,
 52 |                 data = dat,
 53 |                 se_type = se_type,
 54 |                 clusters =  cl)
 55 |     linHyp <-
 56 |       car::linearHypothesis(lmro,  hypothesis.matrix =  "Z + Z:X = 0")
 57 |     expect_equal(lhro$std.error[lhro$term == "Z + Z:X = 0"],
 58 |                  sqrt(as.numeric(attr(linHyp , "vcov"))))
 59 |   }
 60 | })
 61 | 
 62 | test_that("lh_robust matches lm_robust with fixed effects", {
 63 |   skip_if_not_installed("car")
 64 | 
 65 |   lhro <-
 66 |     lh_robust(
 67 |       Y ~ Z * X,
 68 |       data = dat,
 69 |       fixed_effects = ~ B,
 70 |       linear_hypothesis = c("Z + Z:X = 0")
 71 |     )
 72 |   lmro <- lm_robust(Y ~ Z * X, data = dat, fixed_effects = ~ B)
 73 |   linHyp <-
 74 |     car::linearHypothesis(lmro,  hypothesis.matrix =  "Z + Z:X = 0")
 75 |   tidy_lhro <- tidy(lhro)
 76 | 
 77 |   expect_equal(tidy_lhro$std.error[tidy_lhro$term == "Z + Z:X = 0"],
 78 |                sqrt(as.numeric(attr(linHyp , "vcov"))))
 79 | 
 80 | })
 81 | 
 82 | test_that("lh_robust matches lm_robust with weights", {
 83 |   skip_if_not_installed("car")
 84 | 
 85 |   lhro <-
 86 |     lh_robust(
 87 |       Y ~ Z * X,
 88 |       data = dat,
 89 |       weights = w,
 90 |       linear_hypothesis = c("Z + Z:X = 0")
 91 |     )
 92 |   tidy_lhro <- tidy(lhro)
 93 |   lmro <- lm_robust(Y ~ Z * X, data = dat,  weights = w)
 94 |   linHyp <-
 95 |     car::linearHypothesis(lmro,  hypothesis.matrix =  "Z + Z:X = 0")
 96 | 
 97 | 
 98 |   expect_equal(tidy_lhro$std.error[tidy_lhro$term == "Z + Z:X = 0"],
 99 |                sqrt(as.numeric(attr(linHyp , "vcov"))))
100 | 
101 | })
102 | 
103 | test_that("lh_robust matches lm_robust with subsetted data.frame", {
104 |   skip_if_not_installed("car")
105 | 
106 |   lhro <-
107 |     lh_robust(Y ~ Z * X,
108 |               data = dat,
109 |               subset = B == 1,
110 |               linear_hypothesis = c("Z + Z:X = 0"))
111 |   tidy_lhro <- tidy(lhro)
112 |   lmro <- lm_robust(Y ~ Z * X, data = dat,  subset =  B == 1)
113 |   linHyp <-
114 |     car::linearHypothesis(lmro,  hypothesis.matrix =  "Z + Z:X = 0")
115 | 
116 | 
117 |   expect_equal(tidy_lhro$std.error[tidy_lhro$term == "Z + Z:X = 0"],
118 |                sqrt(as.numeric(attr(linHyp , "vcov"))))
119 | 
120 | })
121 | 
122 | test_that("lh_robust matches lm_robust with subsetted data.frame", {
123 |   skip_if_not_installed("car")
124 | 
125 |   lhro <-
126 |     lh_robust(Y ~ Z * X,
127 |               data = dat,
128 |               subset =  B == 1,
129 |               linear_hypothesis = c("Z + Z:X = 0"))
130 |   tidy_lhro <- tidy(lhro)
131 |   lmro <- lm_robust(Y ~ Z * X, data = dat,  subset =  B == 1)
132 |   linHyp <-
133 |     car::linearHypothesis(lmro,  hypothesis.matrix =  "Z + Z:X = 0")
134 | 
135 | 
136 |   expect_equal(tidy_lhro$std.error[tidy_lhro$term == "Z + Z:X = 0"],
137 |                sqrt(as.numeric(attr(linHyp , "vcov"))))
138 | 
139 | })
140 | 
141 | # lh test
142 | test_that("returns error when no linear hypothesis is specified", {
143 |   expect_error(lh_robust(Y ~ Z * X, data = dat))
144 | })
145 | 


--------------------------------------------------------------------------------
/tests/testthat/test-lm-robust-helper.R:
--------------------------------------------------------------------------------
 1 | # N <- 10000
 2 | # dat <- data.frame(y = rnorm(N), x1 = rnorm(N), x2 = rnorm(N))
 3 | # X <- model.matrix.default(~ x1 + x2, data = dat)
 4 | # y <- dat$y
 5 | # fit <- lm(y ~ x1 + x2, data = dat)
 6 | #
 7 | # # Speed Tests -------------------------------------------------------------
 8 | #
 9 | # system.time(replicate(500, lm_robust_helper(X = X, y = dat$y, type = "classical")))
10 | #
11 | # system.time(replicate(500, lm_robust_helper(X = X, y = dat$y, type = "HC0")))
12 | # system.time(replicate(500, lm_robust_helper(X = X, y = dat$y, type = "HC1")))
13 | # system.time(replicate(500, lm_robust_helper(X = X, y = dat$y, type = "HC2")))
14 | # system.time(replicate(500, lm_robust_helper(X = X, y = dat$y, type = "HC3")))
15 | #
16 | #
17 | 


--------------------------------------------------------------------------------
/tests/testthat/test-lm-robust_emmeans.R:
--------------------------------------------------------------------------------
 1 | context("S3 - emmeans")
 2 | 
 3 | 
 4 | 
 5 | test_that("emmeans can work with lm_robust objects", {
 6 |   skip_if_not_installed("emmeans")
 7 |   library(emmeans)
 8 |   lmr <- lm_robust(mpg ~ factor(cyl) * hp + wt, data = mtcars)
 9 | 
10 |   rg <- emmeans::ref_grid(lmr)
11 |   expect_equal(class(rg)[1], "emmGrid")
12 | 
13 |   grid <- rg@grid
14 |   expect_equal(nrow(grid), 3)
15 |   expect_equal(sum(grid$.wgt.), 32)
16 |   expect_equal(predict(rg)[1], 17.424, tolerance = .01)
17 | })
18 | 
19 | test_that("lm_robust multivariate model works with emmeans", {
20 |   skip_if_not_installed("emmeans")
21 |   library(emmeans)
22 |   lmr <- lm_robust(yield ~ Block + Variety, data = emmeans::MOats)
23 |   emm <- emmeans(lmr, "rep.meas")
24 |   expect_equal(summary(emm)$emmean[4], 123.4, tolerance = 0.1)
25 | })
26 | 
27 | test_that("lm_robust model with rank deficiency works with emmeans", {
28 |   skip_if_not_installed("emmeans")
29 |   library(emmeans)
30 |   lmr <- lm_robust(log(breaks) ~ wool * tension, data = warpbreaks, subset = -(19:30))
31 |   pred <- predict(ref_grid(lmr))
32 |   expect_true(is.na(pred[5]))
33 |   expect_equal(length(pred), 6)
34 |   expect_equal(sum(is.na(pred)), 1)
35 | })
36 | 
37 | # Not testing emmeans package capabilities themselves. If we can construct the
38 | # reference grid correctly, we are basically OK.
39 | # Pretty much anything else that could fail would happen in the emmeans package,
40 | # not in the support methods in this package.
41 | 
42 | 


--------------------------------------------------------------------------------
/tests/testthat/test-lm-robust_margins.R:
--------------------------------------------------------------------------------
  1 | context("Helper - lm_robust margins")
  2 | 
  3 | 
  4 | 
  5 | mv <- c("AME", "SE", "z", "p")
  6 | 
  7 | test_that("lm robust can work with margins", {
  8 |   skip_if_not_installed("margins")
  9 |   x <- lm(mpg ~ cyl * hp + wt, data = mtcars)
 10 |   lmr <- lm_robust(mpg ~ cyl * hp + wt, data = mtcars)
 11 | 
 12 |   # Note old package vce defaults to delta
 13 |   # new margins on github defaults to none with our obj
 14 |   lm_sum_marg <- summary(
 15 |     margins::margins(
 16 |       x,
 17 |       vcov = sandwich::vcovHC(x, type = "HC2"),
 18 |       vce = "delta"
 19 |     )
 20 |   )
 21 | 
 22 |   lmr_sum_marg <- margins:::summary.margins(margins::margins(lmr, vce = "delta"))
 23 | 
 24 |   # Close enough with HC2?
 25 |   expect_equal(
 26 |     lm_sum_marg[, mv],
 27 |     lmr_sum_marg[, mv],
 28 |     tolerance = 0.01
 29 |   )
 30 | 
 31 |   # Close with classical
 32 |   lmr_class <- lm_robust(mpg ~ cyl * hp + wt, data = mtcars, se_type = "classical")
 33 |   lmrc <- margins:::summary.margins(margins::margins(lmr_class, vce = "delta"))
 34 |   lmc <- margins:::summary.margins(margins::margins(x, vce = "delta"))
 35 |   expect_equal(
 36 |     lmc[, mv],
 37 |     lmrc[, mv],
 38 |     tolerance = 0.01
 39 |   )
 40 | 
 41 |   # Works with other vce
 42 |   set.seed(42)
 43 |   lmrc <- margins:::summary.margins(margins::margins(lmr_class, vce = "bootstrap", iterations = 10L))
 44 |   expect_true(!any(is.na(lmrc)))
 45 |   lmrc <- margins:::summary.margins(margins::margins(lmr_class, vce = "simulation", iterations = 10L))
 46 |   expect_true(!any(is.na(lmrc)))
 47 |   lmrc <- margins:::summary.margins(margins::margins(lmr_class, vce = "simulation", iterations = 10L))
 48 |   expect_true(!any(is.na(lmrc)))
 49 | })
 50 | 
 51 | test_that("lm robust + weights can work with margins", {
 52 |   skip_if_not_installed("margins")
 53 |   x <- lm(mpg ~ cyl * hp, data = mtcars, weights = wt)
 54 |   x2 <- lm_robust(mpg ~ cyl * hp, data = mtcars, weights = wt, se_type = "classical")
 55 |   expect_equal(margins::marginal_effects(x), margins::marginal_effects(x2))
 56 | 
 57 | 
 58 |   suppressWarnings(
 59 |     {lmc <- round(margins:::summary.margins(margins::margins(x, vce = "delta"))[, mv], 3)}
 60 |   )
 61 | 
 62 |   suppressWarnings(
 63 |     {lmr <- round(margins:::summary.margins(margins::margins(x2, vce = "delta"))[, mv], 3)}
 64 |   )
 65 | 
 66 |   expect_equal(lmc, lmr)
 67 | })
 68 | 
 69 | test_that("lm robust + cluster can work with margins", {
 70 |   skip_if_not_installed("margins")
 71 |   # works but throws a lot of warnings
 72 |   x <- lm(mpg ~ cyl * hp + wt, data = mtcars)
 73 |   x2 <- lm_robust(mpg ~ cyl * hp + wt, data = mtcars, clusters = am)
 74 | 
 75 |   lmc <- round(margins:::summary.margins(margins::margins(x, vce = "delta"))[, mv], 8)
 76 | 
 77 |   expect_warning(
 78 |     lmr <- round(margins:::summary.margins(margins::margins(x2, vce = "delta"))[, mv], 8),
 79 |     NA
 80 |   )
 81 | 
 82 |   # With rounding
 83 |   expect_equal(lmc[, 1], lmr[, 1])
 84 |   expect_true(
 85 |     !any(lmc[, 2] == lmr[, 2])
 86 |   )
 87 | 
 88 |   # Works with character cluster (avoided terms(mod) "dataClasses" problem)
 89 |   mtcars$testc <- letters[1:4]
 90 |   expect_error(
 91 |     margins::margins(lm_robust(mpg ~ cyl * hp + wt, data = mtcars, clusters = testc)),
 92 |     NA
 93 |   )
 94 | })
 95 | 
 96 | 
 97 | test_that("lm lin can work with margins", {
 98 |   skip_if_not_installed("margins")
 99 |   data("alo_star_men")
100 |   lml <- lm_lin(GPA_year1 ~ ssp, ~  gpa0, data = alo_star_men, se_type = "classical")
101 | 
102 |   alo_star_men$gpa0_tilde <- alo_star_men$gpa0 - mean(alo_star_men$gpa0)
103 | 
104 |   lmo <- lm(GPA_year1 ~ ssp * gpa0_tilde, data = alo_star_men)
105 | 
106 |   lml_sum <- margins:::summary.margins(margins::margins(lml, vce = "delta"))
107 |   lmo_sum <- margins:::summary.margins(margins::margins(lmo, vce = "delta"))
108 | 
109 |   expect_equal(
110 |     round(lml_sum[, 4], 5),
111 |     round(lmo_sum[, 4], 5)
112 |   )
113 | })
114 | 


--------------------------------------------------------------------------------
/tests/testthat/test-modelsummary.R:
--------------------------------------------------------------------------------
 1 | # This file is ignored by .Rbuildignore to keep from suggesting gt and modelsummary
 2 | 
 3 | context("S3 - modelsummary works")
 4 | 
 5 | test_that("modelsummary works with glance", {
 6 |   skip_if_not_installed("modelsummary")
 7 | 
 8 |   library(modelsummary)
 9 | 
10 |   set.seed(5)
11 | 
12 |   model1 <- lm_robust(mpg ~ am, mtcars)
13 |   model2 <- lm_robust(mpg ~ am, mtcars, clusters = cyl)
14 |   model3 <- lm_lin(mpg ~ am, ~ cyl, mtcars)
15 | 
16 |   mso <- modelsummary(list(model1, model2, model3), output = "data.frame")
17 | 
18 |   expect_equal(colnames(mso), c("part", "term", "statistic",
19 |                                 "(1)", "(2)", "(3)"))
20 | 
21 |   expect_equal(nrow(mso), 15L)
22 | 
23 |   expect_equal(ncol(mso), 6L)
24 | 
25 |   # iv_robust
26 |   model1 <- iv_robust(mpg ~ am | gear, mtcars)
27 |   model2 <- iv_robust(mpg ~ am | gear, mtcars, clusters = cyl, diagnostics = TRUE)
28 | 
29 |   mso <- modelsummary(list(model1, model2),
30 |                                     gof_omit = c("N|[sS]tatistic|p.value|p{1}"),
31 |                                     output = "data.frame")
32 | 
33 |   expect_equal(nrow(mso), 10)
34 | 
35 |   expect_equal(ncol(mso), 5)
36 | 
37 |   # difference_in_means
38 |   model1 <- difference_in_means(mpg ~ am, mtcars)
39 |   model2 <- difference_in_means(mpg ~ am, mtcars, blocks = vs)
40 |   mso <- modelsummary(list(model1, model2), output = "data.frame")
41 | 
42 | 
43 |   # horvitz_thompson
44 |   model1 <- horvitz_thompson(mpg ~ am, mtcars)
45 |   model2 <- horvitz_thompson(mpg ~ am, mtcars, blocks = vs)
46 | 
47 |   mso <-modelsummary(list(model1, model2), output = "data.frame")
48 | 
49 |   expect_equal(nrow(mso), 6)
50 | 
51 |   expect_equal(ncol(mso), 5)
52 | 
53 | })
54 | 
55 | 


--------------------------------------------------------------------------------
/tests/testthat/test-na-omit-details.R:
--------------------------------------------------------------------------------
 1 | 
 2 | context("Helper - na.omit_detailed")
 3 | 
 4 | df <- expand.grid(Y = c(1:5, NA), Z = c(LETTERS, NA))
 5 | 
 6 | stock <- na.omit(df)
 7 | detailed <- na.omit_detailed.data.frame(df)
 8 | 
 9 | stock_action <- attr(stock, "na.action")
10 | detailed_action <- attr(detailed, "na.action")
11 | 
12 | test_that("Omits are the same", {
13 |   expect_equal(
14 |     as.vector(stock_action),
15 |     as.vector(detailed_action)
16 |   )
17 | })
18 | 
19 | test_that("Row names are set correctly", {
20 |   expect_identical(
21 |     names(stock_action),
22 |     names(detailed_action)
23 |   )
24 | })
25 | 
26 | test_that("Logic for nested dfs and lists holds", {
27 |   df$X <- list(x = c(NA, 2:nrow(df)))
28 |   df$Xmat <- matrix(rep(c(1, NA, 3:nrow(df)), 2), nrow(df))
29 | 
30 |   stock <- na.omit(df)
31 |   detailed <- na.omit_detailed.data.frame(df)
32 | 
33 |   stock_action <- attr(stock, "na.action")
34 |   detailed_action <- attr(detailed, "na.action")
35 | 
36 |   expect_identical(
37 |     names(stock_action),
38 |     names(detailed_action)
39 |   )
40 | })
41 | 


--------------------------------------------------------------------------------
/tests/testthat/test-replicate-HT-middleton.R:
--------------------------------------------------------------------------------
  1 | context("Verification - HT matches Joel Middleton code")
  2 | 
  3 | test_that("We match Joel's estimator", {
  4 | 
  5 |   # Code from Joel Middleton
  6 |   n <- 400
  7 | 
  8 |   # simple random assignment
  9 |   d.00 <- diag(rep(1, n))
 10 |   dmat <- rbind(
 11 |     cbind(d.00, -d.00)
 12 |     , cbind(-d.00, d.00)
 13 |   )
 14 |   d.tilde <- diag(rep(2, 2 * n))
 15 |   d.00 <- matrix(rep(-0.001251564, n ^ 2), ncol = n) + diag(rep(1 + 0.001251564, n))
 16 |   pmat <- diag(rep(.5, 2 * n)) %*% (dmat + 1) %*% diag(rep(.5, 2 * n))
 17 |   pmat.true <- pmat
 18 |   pmat[pmat == 0] <- 1
 19 |   d.tilde.wt <- d.tilde / pmat
 20 | 
 21 |   # complete random assignment
 22 |   dmat.CR <- round(rbind(
 23 |     cbind(d.00, -d.00)
 24 |     , cbind(-d.00, d.00)
 25 |   ), 10)
 26 |   pmat.CR <- diag(rep(.5, 2 * n)) %*% (dmat.CR + 1) %*% diag(rep(.5, 2 * n))
 27 |   pmat.CR.true <- pmat.CR
 28 |   pmat.CR[pmat.CR == 0] <- 1
 29 |   d.tilde.CR <- dmat.CR + (dmat.CR == -1) + diag(rep(1, 2 * n))
 30 |   d.tilde.wt.CR <- d.tilde.CR / pmat.CR
 31 | 
 32 |   # ourpmat <- declaration_to_condition_pr_mat(randomizr::declare_ra(N = 400, prob = 0.5, simple = F))
 33 |   # pmat.CR.true[1:5, 1:5]
 34 |   # ourpmat[1:5, 1:5]
 35 |   # pmat.CR.true[401:410, 401:410]
 36 |   # ourpmat[401:410, 401:410]
 37 |   # pmat.CR.true[401:410, 1:10]
 38 |   # ourpmat[401:410,1:10]
 39 | 
 40 |   ## DGP with truly random 0.5 chance for each unit being treated
 41 |   dat <-
 42 |     data.frame(
 43 |       p = 0.5,
 44 |       z = rbinom(n, 1, 0.5),
 45 |       y0 = rnorm(n, sd = 3)
 46 |     )
 47 | 
 48 |   # Constant treatment effects, SRS
 49 |   dat$y1 <- dat$y0 + 3
 50 |   Y <- c(-dat$y0, dat$y1)
 51 |   R <- c(1 - dat$z, dat$z)
 52 |   pi.inv <- (c(rep(1 / (1 - dat$p)), rep(1 / dat$p)))
 53 | 
 54 |   ht_est <- sum(R * pi.inv * Y) / n
 55 |   y1.hat <- dat$y0 + ht_est
 56 |   y0.hat <- dat$y1 - ht_est
 57 |   # true_ses_ht <- sqrt(t(Y)%*%dmat%*%Y)/n
 58 |   Y.hat <- R * Y + (1 - R) * c(-y0.hat, y1.hat)
 59 |   se_ht <- sqrt(t(Y * R) %*% d.tilde.wt %*% (Y * R)) / n
 60 |   se_constant_ht <- sqrt(t(Y.hat) %*% dmat %*% Y.hat) / n
 61 | 
 62 |   dat$y <- ifelse(dat$z == 1, dat$y1, dat$y0)
 63 | 
 64 |   # Simple random assignment
 65 |   ht_decl_o <- horvitz_thompson(
 66 |     y ~ z,
 67 |     data = dat,
 68 |     ra_declaration = randomizr::declare_ra(
 69 |       N = nrow(dat),
 70 |       prob = dat$p[1],
 71 |       simple = TRUE
 72 |     )
 73 |   )
 74 | 
 75 |   # Second way to do same estimator, since it's SRS
 76 |   ht_prob_o <- horvitz_thompson(
 77 |     y ~ z,
 78 |     data = dat,
 79 |     condition_prs = p
 80 |   )
 81 | 
 82 |   expect_equal(
 83 |     tidy(ht_decl_o)[, c("estimate", "std.error")],
 84 |     tidy(ht_prob_o)[, c("estimate", "std.error")]
 85 |   )
 86 |   expect_equivalent(
 87 |     as.numeric(tidy(ht_decl_o)[, c("estimate", "std.error")]),
 88 |     c(ht_est, se_ht)
 89 |   )
 90 | 
 91 |   # Now with constant effects assumption
 92 |   ht_const_o <- horvitz_thompson(
 93 |     y ~ z,
 94 |     data = dat,
 95 |     ra_declaration = randomizr::declare_ra(
 96 |       N = nrow(dat),
 97 |       prob = dat$p[1],
 98 |       simple = TRUE
 99 |     ),
100 |     se_type = "constant"
101 |   )
102 | 
103 |   expect_equivalent(
104 |     as.numeric(tidy(ht_const_o)[, c("estimate", "std.error")]),
105 |     c(ht_est, se_constant_ht)
106 |   )
107 | 
108 |   ## Constant treatment effects, CRS
109 |   dat$z <- sample(rep(0:1, each = n / 2))
110 |   dat$y <- ifelse(dat$z == 1, dat$y1, dat$y0)
111 | 
112 |   R <- c(1 - dat$z, dat$z)
113 |   pi.inv <- (c(rep(1 / (1 - dat$p)), rep(1 / dat$p)))
114 | 
115 |   ht_comp_est <- sum(R * pi.inv * Y) / n
116 |   Y.hat <- R * Y + (1 - R) * c(-y0.hat, y1.hat)
117 |   se_comp_ht <- sqrt(t(Y * R) %*% d.tilde.wt.CR %*% (Y * R)) / n
118 |   se_comp_constant_ht <- sqrt(t(Y.hat) %*% dmat.CR %*% Y.hat) / n
119 | 
120 | 
121 |   # complete random assignment
122 |   ht_comp_decl_o <- horvitz_thompson(
123 |     y ~ z,
124 |     data = dat,
125 |     ra_declaration = randomizr::declare_ra(
126 |       N = nrow(dat),
127 |       prob = dat$p[1],
128 |       simple = FALSE
129 |     ),
130 |     return_condition_pr_mat = T
131 |   )
132 |   # ht_comp_decl_o$condition_pr_mat[1:5, 1:5]
133 |   # pmat.CR.true[1:5, 1:5]
134 | 
135 |   # Don't match right now because pmats are diff
136 |   # expect_equal(
137 |   #   tidy(ht_comp_decl_o)[, c("estimate", "std.error")],
138 |   #   c(ht_comp_est, se_comp_ht)
139 |   # )
140 | 
141 |   # Does match if I use JM's pmat
142 |   ht_comp_decl_o <- horvitz_thompson(
143 |     y ~ z,
144 |     data = dat,
145 |     condition_pr_mat = pmat.CR.true
146 |   )
147 |   expect_equivalent(
148 |     as.numeric(tidy(ht_comp_decl_o)[, c("estimate", "std.error")]),
149 |     c(ht_comp_est, se_comp_ht)
150 |   )
151 | 
152 |   # Now with constant effects assumption
153 |   # ht_comp_const_o  <- horvitz_thompson(
154 |   #   y ~ z,
155 |   #   data = dat,
156 |   #   ra_declaration = randomizr::declare_ra(
157 |   #     N = nrow(dat),
158 |   #     prob = dat$p[1],
159 |   #     simple = FALSE
160 |   #   ),
161 |   #   se_type = "constant"
162 |   # )
163 |   #
164 |   # expect_equivalent(
165 |   #   tidy(ht_comp_const_o)[, c("estimate", "std.error")],
166 |   #   c(ht_comp_est, se_comp_constant_ht)
167 |   # )
168 | 
169 | 
170 |   # ht_comp_const_o  <- horvitz_thompson(
171 |   #   y ~ z,
172 |   #   data = dat,
173 |   #   condition_pr_mat = pmat.CR.true,
174 |   #   se_type = "constant"
175 |   # )
176 |   # expect_equivalent(
177 |   #   tidy(ht_comp_const_o)[, c("estimate", "std.error")],
178 |   #   c(ht_comp_est, se_comp_constant_ht)
179 |   # )
180 | 
181 |   # Not matching so we error
182 |   expect_error(
183 |     horvitz_thompson(
184 |       y ~ z,
185 |       data = dat,
186 |       condition_pr_mat = pmat.CR.true,
187 |       se_type = "constant"
188 |     ),
189 |     "`se_type` = 'constant' only supported for simple"
190 |   )
191 | })
192 | 


--------------------------------------------------------------------------------
/tests/testthat/test-replicate-lin2013.R:
--------------------------------------------------------------------------------
  1 | context("Verification - lm_lin replicates Lin 2013")
  2 | # Lin paper available here: www.stat.berkeley.edu/~winston/agnostic.pdf
  3 | # Citation:
  4 | # Lin, Winston. 2013. "Agnostic notes on regression adjustments to experimental
  5 | # data: Reexamining Freedman’s critique." The Annals of Applied Statistics.
  6 | # Stat. 7(1): 295-318. doi:10.1214/12-AOAS583.
  7 | # https://projecteuclid.org/euclid.aoas/1365527200.
  8 | 
  9 | test_that("lm_lin recreates Lin 2013 Table 2", {
 10 |   data("alo_star_men")
 11 | 
 12 |   ## Table 2
 13 |   # Lin uses "classic sandwich," or in our package, HC0
 14 | 
 15 |   # unadjusted, Lin est = -0.036, se = 0.158
 16 |   expect_equivalent(
 17 |     round(
 18 |       tidy(
 19 |         lm_robust(
 20 |           GPA_year1 ~ sfsp,
 21 |           data = alo_star_men,
 22 |           se_type = "HC0"
 23 |         )
 24 |       )[2, c("estimate", "std.error")],
 25 |       3
 26 |     ),
 27 |     c(-0.036, 0.158)
 28 |   )
 29 | 
 30 | 
 31 |   # usual adjusted for HS gpa, Lin est = -0.083, se = 0.146
 32 |   expect_equivalent(
 33 |     unlist(round(
 34 |       tidy(
 35 |         lm_robust(
 36 |           GPA_year1 ~ sfsp + gpa0,
 37 |           data = alo_star_men,
 38 |           se_type = "HC0"
 39 |         )
 40 |       )[2, c("estimate", "std.error")],
 41 |       3
 42 |     )),
 43 |     c(-0.083, 0.146)
 44 |   )
 45 | 
 46 |   # interaction adjusted, Lin est = -0.081, se = 0.146
 47 |   expect_equivalent(
 48 |     unlist(round(
 49 |       tidy(
 50 |         lm_lin(
 51 |           GPA_year1 ~ sfsp,
 52 |           covariates = ~ gpa0,
 53 |           data = alo_star_men,
 54 |           se_type = "HC0"
 55 |         )
 56 |       )[2, c("estimate", "std.error")],
 57 |       3
 58 |     )),
 59 |     c(-0.081, 0.146)
 60 |   )
 61 | })
 62 | 
 63 | 
 64 | ## Table 3 too long to run
 65 | rep_table_3 <- FALSE
 66 | 
 67 | if (rep_table_3) {
 68 |   data("alo_star_men")
 69 | 
 70 |   ## Table 3
 71 |   samp_dat <- alo_star_men
 72 |   its <- 250000
 73 |   set.seed(161235)
 74 |   check_cover <- function(obj, point = 0) {
 75 |     return(obj$conf.low[2] < point & obj$conf.high[2] > point)
 76 |   }
 77 |   ci_dist <- function(obj) {
 78 |     return(obj$conf.high[2] - obj$conf.low[2])
 79 |   }
 80 |   ci_custom <- function(obj) {
 81 |     return(list(
 82 |       conf.high = coef(obj)[2] + obj$std.error[2] * 1.96,
 83 |       conf.low = coef(obj)[2] - obj$std.error[2] * 1.96
 84 |     ))
 85 |   }
 86 | 
 87 |   ses <- c("HC0", "HC1", "HC2", "HC3")
 88 | 
 89 |   ests <- matrix(
 90 |     NA,
 91 |     nrow = its,
 92 |     ncol = 3
 93 |   )
 94 |   sd_mats <- cover_mats <- width_mats <-
 95 |     array(
 96 |       NA,
 97 |       dim = c(its, length(ses), 3)
 98 |     )
 99 |   for (i in 1:its) {
100 |     samp_dat$sfsp <- sample(samp_dat$sfsp)
101 |     sd_mat <- cover_mat <- width_mat <-
102 |       matrix(
103 |         NA,
104 |         nrow = length(ses),
105 |         ncol = 3
106 |       )
107 |     for (j in seq_along(ses)) {
108 |       unadj <- lm_robust(
109 |         GPA_year1 ~ sfsp,
110 |         data = samp_dat,
111 |         se_type = ses[j]
112 |       )
113 |       tradadj <- lm_robust(
114 |         GPA_year1 ~ sfsp + gpa0,
115 |         data = samp_dat,
116 |         se_type = ses[j]
117 |       )
118 |       intadj <- lm_lin(
119 |         GPA_year1 ~ sfsp,
120 |         covariates = ~ gpa0,
121 |         data = samp_dat,
122 |         se_type = ses[j]
123 |       )
124 | 
125 |       sd_mat[j, ] <- c(unadj$std.error[2], tradadj$std.error[2], intadj$std.error[2])
126 |       cover_mat[j, ] <- c(
127 |         check_cover(ci_custom(unadj)),
128 |         check_cover(ci_custom(tradadj)),
129 |         check_cover(ci_custom(intadj))
130 |       )
131 |       width_mat[j, ] <- c(
132 |         ci_dist(ci_custom(unadj)),
133 |         ci_dist(ci_custom(tradadj)),
134 |         ci_dist(ci_custom(intadj))
135 |       )
136 |     }
137 | 
138 |     ests[i, ] <- c(coef(unadj)[2], coef(tradadj)[2], coef(intadj)[2])
139 |     sd_mats[i, , ] <- sd_mat
140 |     cover_mats[i, , ] <- cover_mat
141 |     width_mats[i, , ] <- width_mat
142 | 
143 | 
144 |     if (i %% 1000 == 0) print(i)
145 |   }
146 | 
147 | 
148 |   # Panel A
149 |   colMeans(ests)
150 |   # Panel B
151 |   apply(sd_mats, c(2, 3), mean) - apply(ests, 2, sd)
152 |   # Panel C
153 |   apply(sd_mats, c(2, 3), sd)
154 |   # Panel D, not replicated because he uses normal dist. while we use t dist, all slightly larger
155 |   apply(cover_mats, c(2, 3), mean)
156 |   # Panel E, not replicated because he uses normal dist. while we use t dist, all slightly larger
157 |   apply(width_mats, c(2, 3), mean)
158 | }
159 | 


--------------------------------------------------------------------------------
/tests/testthat/test-return.R:
--------------------------------------------------------------------------------
 1 | context("Output - test similiarity across estimators")
 2 | 
 3 | test_that("Structure of output is the same", {
 4 |   n <- 40
 5 |   dat <- data.frame(
 6 |     y = rnorm(n),
 7 |     z = c(0, 0, rep(0:1, times = 9)),
 8 |     x = rnorm(n),
 9 |     bl = rep(1:4, each = 10),
10 |     cl = rep(1:20, each = 2)
11 |   )
12 | 
13 |   # Should be in all estimator returns
14 |   in_return <-
15 |     c(
16 |       "coefficients",
17 |       "std.error",
18 |       "df",
19 |       "p.value",
20 |       "conf.low",
21 |       "conf.high",
22 |       "outcome",
23 |       "alpha",
24 |       "nobs"
25 |     )
26 | 
27 |   lmr_o <- lm_robust(y ~ z, data = dat)
28 |   lmr_cl_o <- lm_robust(y ~ z, data = dat, clusters = cl)
29 |   lml_o <- lm_lin(y ~ z, ~ x, data = dat)
30 |   lml_cl_o <- lm_lin(y ~ z, ~ x, data = dat)
31 |   # Major branching for diff estimators is for blocks
32 |   ht_o <- horvitz_thompson(y ~ z, data = dat)
33 |   ht_bl_o <- horvitz_thompson(y ~ z, blocks = bl, data = dat)
34 |   dim_o <- difference_in_means(y ~ z, data = dat)
35 |   dim_bl_o <- difference_in_means(y ~ z, blocks = bl, data = dat)
36 | 
37 |   expect_true(all(in_return %in% names(lmr_o)))
38 |   expect_true(all(in_return %in% names(lmr_cl_o)))
39 |   expect_true(all(in_return %in% names(lml_o)))
40 |   expect_true(all(in_return %in% names(lml_cl_o)))
41 |   expect_true(all(in_return %in% names(dim_o)))
42 |   expect_true(all(in_return %in% names(dim_bl_o)))
43 |   expect_true(all(in_return %in% names(ht_o)))
44 |   expect_true(all(in_return %in% names(ht_bl_o)))
45 | 
46 |   expect_equal(
47 |     colnames(tidy(lmr_o)),
48 |     colnames(tidy(lmr_cl_o)),
49 |     colnames(tidy(lml_o)),
50 |     colnames(tidy(lml_cl_o)),
51 |     colnames(tidy(ht_o)),
52 |     colnames(tidy(ht_bl_o)),
53 |     colnames(tidy(dim_o)),
54 |     colnames(tidy(dim_bl_o))
55 |   )
56 | 
57 |   expect_equal(
58 |     difference_in_means(y ~ z - 1, data = dat)$term,
59 |     "z"
60 |   )
61 | })
62 | 
63 | # test_that("Warns properly if df is negative or 0", {
64 | #   dat = data.frame(y = 1, z = 1, p = .5)
65 | #   I can't come up with a meaningful test for this now
66 | #   This should never happen I don't think
67 | #   expect_warning(
68 | #     (y ~ z, data = dat, condition_prs = p),
69 | #     "Estimated negative or zero degrees of freedom"
70 | #   )
71 | # })
72 | 


--------------------------------------------------------------------------------
/tests/testthat/test-sig-testing.R:
--------------------------------------------------------------------------------
 1 | context("Helper - significance testing")
 2 | 
 3 | test_that("Errors properly", {
 4 |   dat <- data.frame(
 5 |     mps = rep(1:4, each = 2),
 6 |     y = rnorm(8),
 7 |     z = c(0, 1)
 8 |   )
 9 | 
10 |   expect_warning(
11 |     lm_lin(y ~ z, ~ factor(mps), data = dat),
12 |     "Some degrees of freedom have been estimated as negative or zero"
13 |   )
14 | 
15 |   expect_error(
16 |     lm_robust(y ~ z, data = dat, alpha = 10),
17 |     "`alpha` must be numeric between 0 and 1"
18 |   )
19 | })
20 | 


--------------------------------------------------------------------------------
/tests/testthat/test-stata-output.R:
--------------------------------------------------------------------------------
  1 | context("Verification - lm and iv match Stata")
  2 | 
  3 | test_that("lm_robust matches stata", {
  4 | 
  5 |   # write.csv(mtcars,
  6 |   #           file = 'tests/testthat/mtcars.csv',
  7 |   #           row.names = F)
  8 | 
  9 |   stata_ests <- read.table(
 10 |     "stata-ests.txt",
 11 |     col.names = c("model", "se1", "se2", "df", "fstat"),
 12 |     stringsAsFactors = FALSE
 13 |   )
 14 | 
 15 |   mtcars$w <- mtcars$drat / 5
 16 | 
 17 |   estimatr_mat <- matrix(NA, 10, 4)
 18 | 
 19 |   lm_c <- lm_robust(mpg ~ hp, data = mtcars, se_type = "classical")
 20 |   estimatr_mat[1, ] <- c(lm_c$std.error ^ 2, lm_c$df[2], lm_c$fstatistic[1])
 21 |   lm_hc1 <- lm_robust(mpg ~ hp, data = mtcars, se_type = "HC1")
 22 |   estimatr_mat[2, ] <- c(lm_hc1$std.error ^ 2, lm_hc1$df[2], lm_hc1$fstatistic[1])
 23 |   lm_hc2 <- lm_robust(mpg ~ hp, data = mtcars, se_type = "HC2")
 24 |   estimatr_mat[3, ] <- c(lm_hc2$std.error ^ 2, lm_hc2$df[2], lm_hc2$fstatistic[1])
 25 |   lm_hc3 <- lm_robust(mpg ~ hp, data = mtcars, se_type = "HC3")
 26 |   estimatr_mat[4, ] <- c(lm_hc3$std.error ^ 2, lm_hc3$df[2], lm_hc3$fstatistic[1])
 27 | 
 28 |   lm_stata <- lm_robust(mpg ~ hp, clusters = cyl, data = mtcars, se_type = "stata")
 29 |   estimatr_mat[5, ] <- c(lm_stata$std.error ^ 2, lm_stata$df[2], lm_stata$fstatistic[1])
 30 | 
 31 |   lm_c_w <- lm_robust(mpg ~ hp, data = mtcars, weights = w, se_type = "classical")
 32 |   estimatr_mat[6, ] <- c(lm_c_w$std.error ^ 2, lm_c_w$df[2], lm_c_w$fstatistic[1])
 33 |   lm_hc1_w <- lm_robust(mpg ~ hp, data = mtcars, weights = w, se_type = "HC1")
 34 |   estimatr_mat[7, ] <- c(lm_hc1_w$std.error ^ 2, lm_hc1_w$df[2], lm_hc1_w$fstatistic[1])
 35 |   lm_hc2_w <- lm_robust(mpg ~ hp, data = mtcars, weights = w, se_type = "HC2")
 36 |   estimatr_mat[8, ] <- c(lm_hc2_w$std.error ^ 2, lm_hc2_w$df[2], lm_hc2_w$fstatistic[1])
 37 |   lm_hc3_w <- lm_robust(mpg ~ hp, data = mtcars, weights = w, se_type = "HC3")
 38 |   estimatr_mat[9, ] <- c(lm_hc3_w$std.error ^ 2, lm_hc3_w$df[2], lm_hc3_w$fstatistic[1])
 39 |   lm_stata_w <- lm_robust(mpg ~ hp, clusters = cyl, weights = w, data = mtcars, se_type = "stata")
 40 |   estimatr_mat[10, ] <- c(lm_stata_w$std.error ^ 2, lm_stata_w$df[2], lm_stata_w$fstatistic[1])
 41 | 
 42 |   # All look numerically identical except for HC2 and HC3 with weights which
 43 |   # have non-negligible difference. This is due to differences in how the hat
 44 |   # matrix is built that are still unresolved
 45 | 
 46 |   # Therefore rows 8 and 9 will have larger differences
 47 |   expect_true(
 48 |     max(abs(estimatr_mat[c(1:7, 10), 1:4] - apply(stata_ests[c(1:7, 10), c(3, 2, 4, 5)], 2, as.numeric))) < 1e-5
 49 |   )
 50 | 
 51 | })
 52 | 
 53 | 
 54 | 
 55 | test_that("iv_robust matches stata", {
 56 | 
 57 |   skip_if_not_installed("AER")
 58 | 
 59 |   # write.csv(mtcars,
 60 |   #           file = 'tests/testthat/mtcars.csv',
 61 |   #           row.names = F)
 62 | 
 63 |   stata_ests <- read.table(
 64 |     "stata-iv-ests.txt",
 65 |     col.names = c("model", "v1", "v2", "v3", "fstat", "r2", "r2_a", "rmse"),
 66 |     stringsAsFactors = FALSE
 67 |   )
 68 | 
 69 |   mtcars$w <- mtcars$drat / 5
 70 | 
 71 |   estimatr_mat <- matrix(NA, 6, 7)
 72 |   iv_c <- iv_robust(mpg ~ hp + am | wt + gear, data = mtcars, se_type = "classical")
 73 |   estimatr_mat[1, ] <- c(iv_c$std.error ^ 2, iv_c$fstatistic[1], iv_c$r.squared, iv_c$adj.r.squared, sqrt(iv_c$res_var))
 74 |   iv_hc1 <- iv_robust(mpg ~ hp + am | wt + gear, data = mtcars, se_type = "HC1")
 75 |   estimatr_mat[2, ] <- c(iv_hc1$std.error ^ 2, iv_hc1$fstatistic[1], iv_hc1$r.squared, iv_hc1$adj.r.squared, sqrt(iv_hc1$res_var))
 76 |   iv_stata <- iv_robust(mpg ~ hp + am | wt + gear, clusters = cyl, data = mtcars, se_type = "stata")
 77 |   estimatr_mat[3, ] <- c(iv_stata$std.error ^ 2, iv_stata$fstatistic[1], iv_stata$r.squared, iv_stata$adj.r.squared, sqrt(iv_stata$res_var))
 78 | 
 79 |   iv_c_w <- iv_robust(mpg ~ hp + am | wt + gear, data = mtcars, weights = w, se_type = "classical")
 80 |   estimatr_mat[4, ] <- c(iv_c_w$std.error ^ 2, iv_c_w$fstatistic[1], iv_c_w$r.squared, iv_c_w$adj.r.squared, sqrt(iv_c_w$res_var))
 81 |   iv_hc1_w <- iv_robust(mpg ~ hp + am | wt + gear, data = mtcars, weights = w, se_type = "HC1")
 82 |   estimatr_mat[5, ] <- c(iv_hc1_w$std.error ^ 2, iv_hc1_w$fstatistic[1], iv_hc1_w$r.squared, iv_hc1_w$adj.r.squared, sqrt(iv_hc1_w$res_var))
 83 |   iv_stata_w <- iv_robust(mpg ~ hp + am | wt + gear, clusters = cyl, weights = w, data = mtcars, se_type = "stata")
 84 |   estimatr_mat[6, ] <- c(iv_stata_w$std.error ^ 2, iv_stata_w$fstatistic[1], iv_stata_w$r.squared, iv_stata_w$adj.r.squared, sqrt(iv_stata_w$res_var))
 85 | 
 86 |   expect_true(
 87 |     max(abs(estimatr_mat[, 1] - as.numeric(stata_ests[, 4]))) < 2e-05
 88 |   )
 89 | 
 90 |   expect_true(
 91 |     max(abs(estimatr_mat[, 4] - as.numeric(stata_ests[, 5]))) < 3e-05
 92 |   )
 93 | 
 94 |   # Note, RMSE is different for stata with weights than ivreg or iv_robust
 95 |   expect_true(
 96 |     max(abs(estimatr_mat[, 5:6] - stata_ests[, 6:7])) < 4e-08
 97 |   )
 98 | 
 99 |   ivrego_w <- AER::ivreg(mpg ~ hp + am | wt + gear, data = mtcars, weights = w)
100 | 
101 |   expect_equal(
102 |     ivrego_w$sigma,
103 |     sqrt(iv_c_w$res_var)
104 |   )
105 |   expect_equal(
106 |     ivrego_w$sigma,
107 |     sqrt(iv_hc1_w$res_var)
108 |   )
109 |   expect_equal(
110 |     ivrego_w$sigma,
111 |     sqrt(iv_stata_w$res_var)
112 |   )
113 | 
114 | })
115 | 


--------------------------------------------------------------------------------
/tests/testthat/test-texreg.R:
--------------------------------------------------------------------------------
 1 | # This file is ignored by .Rbuildignore to keep from suggesting texreg
 2 | 
 3 | context("S3 - texreg builds")
 4 | 
 5 | test_that("texreg extension works", {
 6 | 
 7 |   model2 <- lm_robust(extra~group, sleep, clusters = ID)
 8 | 
 9 |   capture.output(treg <- extract(model2, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE))
10 |   expect_is(
11 |     treg,
12 |     "texreg"
13 |   )
14 | 
15 |   # Defaults to having CIs
16 |   expect_true(grepl("-0.53;", texreg::texreg(model2)))
17 | 
18 |   # Remove to get SEs
19 |   expect_true(grepl("\\(0.57\\)", texreg::texreg(model2, include.ci = FALSE)))
20 | 
21 |   # Has Nclust
22 |   expect_true(grepl("N Clusters.*10", texreg::texreg(model2)))
23 | 
24 | })
25 | 


--------------------------------------------------------------------------------
/tests/testthat/test-zzz.R:
--------------------------------------------------------------------------------
 1 | context("zzz.R - .onLoad")
 2 | 
 3 | test_that("onLoad makes generics if texreg is present", {
 4 | 
 5 |   e <- environment(.onLoad)
 6 | 
 7 |   environment(.onLoad) <- new.env(parent = parent.env(e))
 8 |   environment(.onLoad)$extract.lm_robust <- e$extract.lm_robust
 9 | 
10 |   expect_null(.onLoad("estimatr", "estimatr"))
11 |   environment(.onLoad) <- e
12 | })
13 | 


--------------------------------------------------------------------------------
/tests/testthat/test-zzzbroom.R:
--------------------------------------------------------------------------------
 1 | # added to .Rbuildignore to keep CRAN from complaining that broom
 2 | # isn't a SUGGESTed package
 3 | context("zzzbroom.R - .onAttach")
 4 | 
 5 | test_that(".onLoad message if old version of 'broom' is installed", {
 6 |   skip_if_not_installed("broom")
 7 |   skip_if(packageVersion("broom") > "0.5.0")
 8 |   library(broom)
 9 |   expect_message(
10 |     .onLoad("estimatr", "estimatr"),
11 |     "the `broom` package version 0.5.0 or earlier is loaded"
12 |   )
13 | })
14 | 


--------------------------------------------------------------------------------
/vignettes/absorbing-fixed-effects.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Absorbing Fixed Effects with estimatr"
 3 | output:
 4 |   html_document:
 5 |     df_print: paged
 6 | vignette: |
 7 |   %\VignetteIndexEntry{Absorbing Fixed Effects with estimatr}
 8 |   \usepackage[utf8]{inputenc}
 9 |   %\VignetteEngine{knitr::rmarkdown}
10 | ---
11 | 
12 | ```{r setup, include=FALSE}
13 | knitr::opts_chunk$set(echo = TRUE)
14 | ```
15 | 
16 | Whether analyzing a block-randomized experiment or adding fixed effects for a panel model, absorbing group means can speed up estimation time. The `fixed_effects` argument in both `lm_robust` and `iv_robust` allows you to do just that, although the speed gains are greatest with "HC1" standard errors. Specifying fixed effects is really simple.
17 | 
18 | ```{r}
19 | library(estimatr)
20 | lmr_out <- lm_robust(mpg ~ hp, data = mtcars, fixed_effects = ~ cyl)
21 | lmr_out
22 | lmr_out$fixed_effects
23 | ```
24 | 
25 | Before proceeding, three quick notes:
26 | 
27 | * Most of the speed gains occur when estimating "HC1" robust standard errors, or "stata" standard errors when there is clustering. This is because most of the speed gains come from avoiding inverting a large matrix of group dummies, but this step is still necessary for "HC2", "HC3", and "CR2" standard errors.
28 | * While you can specify multiple sets of fixed effects, such as `fixed_effects = ~ year + country`, please ensure that your model is well-specified if you do so. If there are dependencies or overlapping groups across multiple sets of fixed effects, we cannot guarantee the correct degrees of freedom.
29 | * For now, weighted "CR2" estimation is not possible with fixed_effects.
30 | 
31 | ## Speed gains
32 | 
33 | In general, our speed gains will be greatest as the number of groups/fixed effects is large relative to the number of observations. Imagine we have 300 matched-pairs in an experiment.
34 | 
35 | ```{r, message=FALSE}
36 | # Load packages for comparison
37 | library(microbenchmark)
38 | library(sandwich)
39 | library(lmtest)
40 | 
41 | # Create matched-pairs dataset using fabricatr
42 | set.seed(40)
43 | library(fabricatr)
44 | dat <- fabricate(
45 |   blocks = add_level(N = 300),
46 |   indiv = add_level(N = 2, z = sample(0:1), y = rnorm(N) + z)
47 | )
48 | head(dat)
49 | 
50 | # With HC2
51 | microbenchmark(
52 |   `base + sandwich` = {
53 |     lo <- lm(y ~ z + factor(blocks), dat)
54 |     coeftest(lo, vcov = vcovHC(lo, type = "HC2"))
55 |   },
56 |   `lm_robust` = lm_robust(y ~ z + factor(blocks), dat),
57 |   `lm_robust + fes` = lm_robust(y ~ z, data = dat, fixed_effects = ~ blocks),
58 |   times = 50
59 | )
60 | ```
61 | 
62 | Speed gains are *considerably* greater with HC1 standard errors. This is because we need to get the hat matrix for HC2, HC3, and CR2 standard errors, which requires inverting that large matrix of dummies we previously avoided doing. HC0, HC1, CR0, and CRstata standard errors do not require this inversion.
63 | 
64 | ```{r}
65 | # With HC1
66 | microbenchmark(
67 |   `base + sandwich` = {
68 |     lo <- lm(y ~ z + factor(blocks), dat)
69 |     coeftest(lo, vcov = vcovHC(lo, type = "HC1"))
70 |   },
71 |   `lm_robust` = lm_robust(
72 |     y ~ z + factor(blocks),
73 |     dat,
74 |     se_type = "HC1"
75 |   ),
76 |   `lm_robust + fes` = lm_robust(
77 |     y ~ z, 
78 |     data = dat,
79 |     fixed_effects = ~ blocks,
80 |     se_type = "HC1"
81 |   ),
82 |   times = 50
83 | )
84 | ```
85 | 


--------------------------------------------------------------------------------
/vignettes/emmeans-examples.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Examples with emmeans"
 3 | author: "Russ Lenth"
 4 | date: "8/3/2019"
 5 | output: html_document
 6 | ---
 7 | 
 8 | ```{r setup, include=FALSE}
 9 | knitr::opts_chunk$set(echo = TRUE, eval = FALSE)
10 | ```
11 | The **emmeans** package provides a variety of *post hoc* analyses such
12 | as obtaining estimated marginal means (EMMs) and comparisons thereof,
13 | displaying these results in a graph, and a number of related tasks.
14 | 
15 | This vignette illustrates basic uses of **emmeans** with `lm_robust` 
16 | objects. For more details, refer to the **emmeans** package itself 
17 | and its vignettes.
18 | 
19 | ### A factorial experiment
20 | The `warpbreaks` dataset provided in base R has the results of a two-factor
21 | experiment. We start by fitting a model
22 | ```{r}
23 | library(estimatr)
24 | 
25 | warp.rlm <- lm_robust(log(breaks) ~ wool * tension, data = warpbreaks)
26 | ```
27 | Typical use of `emmeans()` is to obtain predictions, or marginal means thereof,
28 | via a formula of the form `~ primary.variables | by.variables`:
29 | ```{r}
30 | library(emmeans)
31 | 
32 | emm <- emmeans(warp.rlm, ~ tension | wool)
33 | class(emm)
34 | str(emm)
35 | emm
36 | ```
37 | These results may be plotted as side-by-side intervals or as an interaction-style plot:
38 | ```{r}
39 | plot(emm)
40 | emmip(emm, wool ~ tension, CIs = TRUE)
41 | ```
42 | 
43 | This particular example has a response transformation. That transformation is
44 | detected and we may back-transform to the original scale:
45 | ```{r}
46 | confint(emm, type = "response")
47 | ```
48 | We may do comparisons and other contrasts:
49 | ```{r}
50 | pairs(emm) # pairwise comparisons
51 | contrast(emm, "trt.vs.ctrl", ref = "L", type = "response", adjust = "mvt")
52 | ```
53 | Note that with a log transformations, it is possible to back-transform 
54 | comparisons, and they become ratios. With other transformations, back-transforming
55 | is not possible.
56 | 
57 | ### Rank-deficient models
58 | Let's create a variation on this example where one cell is omitted:
59 | ```{r}
60 | warpi.rlm <- update(warp.rlm, subset = -(37:48))
61 | (rgi <- ref_grid(warpi.rlm))
62 | summary(rgi)
63 | ```
64 | Note that the empty cell is detected and flagged as non-estimable.
65 | 
66 | Some additional explanation here. EMMs are based on a *reference grid*, defined
67 | as the grid created by all possible combinations of factor levels, together with
68 | the mean of each numerical predictor. The reference grid here (`rgi`) is also an
69 | `"emmGrid"` object just like the previous `emm`.
70 | The grid itself is available as a data frame via the `grid` member, and
71 | you can verify that the above results match those of the `predict` function
72 | for the model:
73 | ```{r}
74 | predict(warpi.rlm, newdata = rgi@grid, se.fit = TRUE)
75 | ```
76 | There is one exception for the empty cell. I will leave it as a user exercise to demonstrate that if we were to use different contrasts when fitting `warpi.rlm`,
77 | the predictions will be the same *except* for the empty cell.
78 | 
79 | ### Multivariate models
80 | If there is a multivariate response, it is treated as another factor that
81 | is crossed with the other factors in the model. To illustrate, consider
82 | the dataset `MOats`, provided in **emmeans**:
83 | ```{r}
84 | MOats.rlm <- lm_robust(yield ~ Block + Variety, data = MOats)
85 | ref_grid(MOats.rlm)
86 | ```
87 | By default, the pseudo-factor is named `rep.meas`, but we can change it
88 | if we like:
89 | ```{r}
90 | emmeans(MOats.rlm, pairwise ~ nitro, mult.name = "nitro")
91 | ```
92 | This illustrates an additional feature of `emmeans` that we can put a contrast method in the left side of a formula.
93 | 
94 | ### Afterword
95 | There are numerous capabilities of **emmeans** not illustrated here. See that package's
96 | help files and vignettes. Using `vignette("basics", "emmeans")` is a good starting point.
97 | 


--------------------------------------------------------------------------------
/vignettes/estimatr-in-the-tidyverse.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "estimatr in the Tidyverse"
  3 | author: "Shikhar Singh"
  4 | output:
  5 |   html_document:
  6 |     df_print: paged
  7 | vignette: >
  8 |   %\VignetteIndexEntry{Vignette Title}
  9 |   %\VignetteEngine{knitr::rmarkdown}
 10 |   %\VignetteEncoding{UTF-8}
 11 | ---
 12 | 
 13 | 
 14 | ```{r, echo=FALSE}
 15 | library(knitr)
 16 | ```
 17 | 
 18 | `estimatr` is for (fast) OLS and IV regression with robust standard errors. This document shows how `estimatr` integrates with RStudio's `tidyverse` suite of packages.
 19 | 
 20 | We use the Swiss Fertility and Socioeconomic Indicators data (available in <tt>R</tt>, description [here](https://stat.ethz.ch/R-manual/R-devel/library/datasets/html/swiss.html)) to show how `lm_robust` works with `dplyr`, `ggplot2`, and `purrr`. What is shown for `lm_robust` here typically applies to all the other `estimatr` functions (`lm_robust`, `difference_in_mean`, `lm_lin`, `iv_robust`, and `horovitz_thompson`).
 21 | 
 22 | # Getting tidy
 23 | 
 24 | The first step to the tidyverse is turning model output into data we can manipulate. The `tidy` function converts an `lm_robust` object into a data.frame.
 25 | 
 26 | ```{r, tidy = T, message = F, warning=T}
 27 | library(estimatr)
 28 | fit <- lm_robust(Fertility ~ Agriculture + Catholic, data = swiss)
 29 | tidy(fit)
 30 | ```
 31 | 
 32 | # Data manipulation with `dplyr` 
 33 | 
 34 | Once a regression fit is a data.frame, you can use any of the `dplyr` "verbs" for data manipulation, like `mutate`,`filter`, `select`, `summarise`, `group_by`, and `arrange` (more on this [here](https://dplyr.tidyverse.org)). 
 35 | 
 36 | ```{r, tidy = TRUE, message = FALSE, warning = FALSE}
 37 | library(tidyverse)
 38 | 
 39 | # lm_robust and filter
 40 | fit %>% tidy %>% filter(term == "Agriculture")
 41 | 
 42 | # lm_robust and select
 43 | fit %>% tidy %>% select(term, estimate, std.error)
 44 | 
 45 | # lm_robust and mutate
 46 | fit %>% tidy %>% mutate(t_stat = estimate/ std.error,
 47 |                         significant = p.value <= 0.05)
 48 | ```
 49 | 
 50 | # Data visualization with `ggplot2` 
 51 | 
 52 | `ggplot2` offers a number of data visualization tools that are compatible with `estimatr`
 53 | 
 54 | 1. Make a coefficient plot:
 55 | 
 56 | ```{r, tidy = T, message=F, warning=F}
 57 | fit %>% 
 58 |   tidy %>% 
 59 |   filter(term != "(Intercept)") %>%
 60 |   ggplot(aes(y = term, x = estimate)) + 
 61 |   geom_vline(xintercept = 0, linetype = 2) + 
 62 |   geom_point() + 
 63 |   geom_errorbarh(aes(xmin = conf.low, xmax = conf.high, height = 0.1)) + 
 64 |   theme_bw()
 65 | ```
 66 | 
 67 | 2. Put CIs based on robust variance estimates (rather than the "classical" variance estimates) with the `geom_smooth` and `stat_smooth` functions. 
 68 | 
 69 | ```{r, tidy = T, message=F, warning=F}
 70 | library(ggplot2)
 71 | ggplot(swiss, aes(x = Agriculture, y = Fertility)) +
 72 |   geom_point() +
 73 |   geom_smooth(method = "lm_robust") +
 74 |   theme_bw()
 75 | ```
 76 | 
 77 | Note that the functional form can include polynomials. For instance, if the model is $Fertility \sim Agriculture + Agriculture^2 + Agriculture^3$, we can model this in the following way:  
 78 | 
 79 | ```{r, tidy = T, message=F, warning=F}
 80 | library(ggplot2)
 81 | ggplot(swiss, aes(x = Agriculture, y = Fertility)) +
 82 |   geom_point() +
 83 |   geom_smooth(method = "lm_robust",
 84 |               formula = y ~ poly(x, 3, raw = TRUE)) +
 85 |   theme_bw()
 86 | ```
 87 | 
 88 | # Bootstrap using `rsample`
 89 | 
 90 | The `rsample` pacakage provides tools for bootstrapping:
 91 | 
 92 | ```{r, tidy =T, message = F, warning=F}
 93 | library(rsample)
 94 | 
 95 | boot_out <-
 96 |   bootstraps(data = swiss, 500)$splits %>%
 97 |   map(~ lm_robust(Fertility ~ Catholic + Agriculture, data = analysis(.))) %>% 
 98 |   map(tidy) %>%
 99 |   bind_rows(.id = "bootstrap_replicate")
100 | kable(head(boot_out))
101 | ```
102 | 
103 | `boot_out` is a data.frame that contains estimates from each boostrapped sample. We can then use `dplyr` functions to summarize the bootstraps, `tidyr` functions to reshape the estimates, and `GGally::ggpairs` to visualize them.
104 | 
105 | ```{r, tidy =T, message = F, warning=F}
106 | boot_out %>%
107 |   group_by(term) %>%
108 |   summarise(boot_se = sd(estimate))
109 | 
110 | # To visualize the sampling distribution
111 | 
112 | library(GGally)
113 | boot_out %>% 
114 |   select(bootstrap_replicate, term, estimate) %>%
115 |   spread(key = term, value = estimate) %>%
116 |   select(-bootstrap_replicate) %>%
117 |   ggpairs(lower = list(continuous = wrap("points", alpha = 0.1))) +
118 |   theme_bw()
119 | ```
120 | 
121 | # Multiple models using `purrr`
122 | 
123 | `purrr` provides tools to perform the same operation on every element of a vector. For instance, we may want to estimate a model on different subsets of data. We can use the `map` function to do this.
124 | 
125 | ```{r}
126 | library(purrr)
127 | 
128 | # Running the same model for highly educated and less educated cantons/districts
129 | 
130 | two_subsets <- 
131 |   swiss %>%
132 |   mutate(HighlyEducated = as.numeric(Education > 8)) %>%
133 |   split(.$HighlyEducated) %>%
134 |   map( ~ lm_robust(Fertility ~ Catholic, data = .)) %>%
135 |   map(tidy) %>%
136 |   bind_rows(.id = "HighlyEducated")
137 | 
138 | kable(two_subsets, digits =2)
139 | ```
140 | 
141 | Alternatively, we might want to regress different dependent variables on the same independent variable. `map` can be used alongwith `estimatr` functions for this purpose as well. 
142 | 
143 | ```{r, tidy = T, message=F, warning=F}
144 | three_outcomes <-
145 |   c("Fertility", "Education", "Agriculture") %>%
146 |   map(~ formula(paste0(., " ~ Catholic"))) %>%
147 |   map(~ lm_robust(., data = swiss)) %>%
148 |   map_df(tidy)
149 | 
150 | kable(three_outcomes, digits =2)
151 | ```
152 | 
153 | Using `ggplot2`, we can make a coefficient plot:
154 | 
155 | ```{r, tidy= T}
156 | 
157 | three_outcomes %>%
158 |   filter(term == "Catholic") %>%
159 |   ggplot(aes(x = estimate, y = outcome)) +
160 |   geom_vline(xintercept = 0, linetype = 2) +
161 |   geom_point() + 
162 |   geom_errorbarh(aes(xmin = conf.low, xmax = conf.high, height = 0.1)) + 
163 |   ggtitle("Slopes with respect to `Catholic`") + 
164 |   theme_bw()
165 | ```
166 | 
167 | # Concluding thoughts
168 | 
169 | Using `estimatr` functions in the tidyverse is easy once the model outputs have been turned into data.frames. We accomplish this with the `tidy` function.  After that, so many summary and visualization possibilities open up. Happy tidying!
170 | 
171 | 
172 | 


--------------------------------------------------------------------------------
/vignettes/estimatr.bib:
--------------------------------------------------------------------------------
  1 | @article{abadieetal2017,
  2 |   title={A class of unbiased estimators of the average treatment effect in randomized experiments},
  3 |   author={Abadie, Alberto and Athey, Susan and Imbens, Guido W and Wooldridge, Jeffrey},
  4 |   journal={arXiv Pre-print},
  5 |   year={2017},
  6 |   url={https://arxiv.org/abs/1710.02926v2}
  7 | }
  8 | 
  9 | 
 10 | @article{aronowmiddleton2013,
 11 |   title={A class of unbiased estimators of the average treatment effect in randomized experiments},
 12 |   author={Aronow, Peter M and Middleton, Joel A},
 13 |   journal={Journal of Causal Inference},
 14 |   volume={1},
 15 |   number={1},
 16 |   pages={135--154},
 17 |   year={2013},
 18 |   url={https://doi.org/10.1515/jci-2012-0009}
 19 | }
 20 | 
 21 | @article{aronowsamii2017,
 22 |   title={Estimating average causal effects under interference between units},
 23 |   author={Aronow, Peter M and Samii, Cyrus},
 24 |   journal={Annals of Applied Statistics},
 25 |   pages={forthcoming},
 26 |   year={2017},
 27 |   url={https://arxiv.org/abs/1305.6156v3}
 28 | }
 29 | 
 30 | @article{bellmccaffrey2002,
 31 |   title={Bias reduction in standard errors for linear regression with multi-stage samples},
 32 |   author={Bell, Robert M and McCaffrey, Daniel F},
 33 |   journal={Survey Methodology},
 34 |   volume={28},
 35 |   number={2},
 36 |   pages={169--182},
 37 |   year={2002}
 38 | }
 39 | 
 40 | @article{freedman2008,
 41 |   title={On regression adjustments in experiments with several treatments},
 42 |   author={Freedman, David A},
 43 |   journal={The Annals of Applied Statistics},
 44 |   pages={176--196},
 45 |   year={2008},
 46 |   publisher={JSTOR},
 47 |   url={https://doi.org/10.1214/07-AOAS143}
 48 | }
 49 | 
 50 | @book{gerbergreen2012,
 51 |   title={Field Experiments: Design, Analysis, and Interpretation},
 52 |   author={Gerber, Alan S and Green, Donald P},
 53 |   year={2012},
 54 |   publisher={W.W. Norton},
 55 |   address={New York},
 56 |   isbn={978-0-393-97995-4}
 57 | }
 58 | 
 59 | @article{imaietal2009,
 60 |   title={The essential role of pair matching in cluster-randomized experiments, with application to the Mexican universal health insurance evaluation},
 61 |   author={Imai, Kosuke and King, Gary and Nall, Clayton},
 62 |   journal={Statistical Science},
 63 |   volume={24},
 64 |   number={1},
 65 |   pages={29--53},
 66 |   year={2009},
 67 |   publisher={Institute of Mathematical Statistics},
 68 |   url={https://doi.org/10.1214/08-STS274}
 69 | }
 70 | 
 71 | @article{imbenskolesar2016,
 72 |   title={Robust standard errors in small samples: Some practical advice},
 73 |   author={Imbens, Guido W and Kolesar, Michal},
 74 |   journal={Review of Economics and Statistics},
 75 |   volume={98},
 76 |   number={4},
 77 |   pages={701--712},
 78 |   year={2016},
 79 |   publisher={MIT Press},
 80 |   url={https://doi.org/10.1162/REST_a_00552}
 81 | }
 82 | 
 83 | @article{lin2013,
 84 |   title={Agnostic notes on regression adjustments to experimental data: Reexamining Freedman's critique},
 85 |   author={Lin, Winston},
 86 |   journal={The Annals of Applied Statistics},
 87 |   volume={7},
 88 |   number={1},
 89 |   pages={295--318},
 90 |   year={2013},
 91 |   publisher={Institute of Mathematical Statistics},
 92 |   url={https://doi.org/10.1214/12-AOAS583}
 93 | }
 94 | 
 95 | @article{livalliant2009,
 96 |   title={Survey weighted hat matrix and leverages},
 97 |   author={Li, Jianzhu and Valliant, Richard},
 98 |   journal={Survey Methodology},
 99 |   volume={35},
100 |   number={1},
101 |   pages={15--24},
102 |   year={2009},
103 |   publisher={Statistics Canada}
104 | }
105 | 
106 | @article{longervin2000,
107 |   title={Using heteroscedasticity consistent standard errors in the linear regression model},
108 |   author={Long, J Scott and Ervin, Laurie H},
109 |   journal={The American Statistician},
110 |   volume={54},
111 |   number={3},
112 |   pages={217--224},
113 |   year={2000},
114 |   publisher={Taylor \& Francis Group},
115 |   url={https://doi.org/10.1080/00031305.2000.10474549}
116 | }
117 | 
118 | @article{mackinnonwhite1985,
119 |   title = {Some heteroskedasticity-consistent covariance matrix estimators with improved finite sample properties},
120 |   author = {MacKinnon, James and White, Halbert},
121 |   year = {1985},
122 |   journal = {Journal of Econometrics},
123 |   volume = {29},
124 |   number = {3},
125 |   pages = {305-325},
126 |   url = {https://doi.org/10.1016/0304-4076(85)90158-7}
127 | }
128 | 
129 | @article{middletonaronow2015,
130 |   title={Unbiased estimation of the average treatment effect in cluster-randomized experiments},
131 |   author={Middleton, Joel A and Aronow, Peter M},
132 |   journal={Statistics, Politics and Policy},
133 |   volume={6},
134 |   number={1-2},
135 |   pages={39--75},
136 |   year={2015},
137 |   url={https://doi.org/10.1515/spp-2013-0002}
138 | }
139 | 
140 | @article{pustejovskytipton2016,
141 |   title={Small-Sample Methods for Cluster-Robust Variance Estimation and Hypothesis Testing in Fixed Effects Models},
142 |   author={Pustejovsky, James E and Tipton, Elizabeth},
143 |   journal={Journal of Business \& Economic Statistics},
144 |   year={2018},
145 |   number={4},
146 |   volume={36},
147 |   publisher={Taylor \& Francis},
148 |   url={https://doi.org/10.1080/07350015.2016.1247004}
149 | }
150 | 
151 | @article{romanowolf2017,
152 |   title={Resurrecting weighted least squares},
153 |   author={Romano, Joseph P and Wolf, Michael},
154 |   journal={Journal of Econometrics},
155 |   volume={197},
156 |   number={1},
157 |   pages={1--19},
158 |   year={2017},
159 |   publisher={Elsevier},
160 |   url={https://doi.org/10.1016/j.jeconom.2016.10.003}
161 | }
162 | 
163 | @article{samiiaronow2012,
164 |   title={On equivalencies between design-based and regression-based variance estimators for randomized experiments},
165 |   author={Samii, Cyrus and Aronow, Peter M},
166 |   journal={Statistics and Probability Letters},
167 |   year={2012},
168 |   volume={82},
169 |   number={2},
170 |   url={https://doi.org/10.1016/j.spl.2011.10.024}
171 | }
172 | 


--------------------------------------------------------------------------------
/vignettes/lm_speed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DeclareDesign/estimatr/10a18f124ee96e133993a9a6a68ae678b7d8d051/vignettes/lm_speed.png


--------------------------------------------------------------------------------
/vignettes/lm_speed_covars.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DeclareDesign/estimatr/10a18f124ee96e133993a9a6a68ae678b7d8d051/vignettes/lm_speed_covars.png


--------------------------------------------------------------------------------
/vignettes/regression-tables.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Regression Tables with estimatr"
  3 | output: rmarkdown::html_vignette
  4 | vignette: >
  5 |   %\VignetteIndexEntry{Regression Tables with estimatr}
  6 |   %\VignetteEngine{knitr::rmarkdown}
  7 |   %\VignetteEncoding{UTF-8}
  8 | ---
  9 | 
 10 | Preparing regression tables with estimatr is possible with all of the major r-to-LaTeX packages, including `texreg`, `modelsummary`, `stargazer`, `xtable`, and `huxtable`.
 11 | 
 12 | ## Setup
 13 | 
 14 | First we'll load both `estimatr` and `magrittr` (for pipes).
 15 | 
 16 | ```{r}
 17 | library(estimatr)
 18 | library(magrittr)
 19 | ```
 20 | 
 21 | ## Texreg
 22 | 
 23 | Texreg operates directly on an `lm_robust` object. If you would like standard errors instead of confidence intervals, use `include.ci = FALSE`.
 24 | 
 25 | ```{r, message = FALSE}
 26 | library(texreg)
 27 | fit <- lm_robust(mpg ~ disp, data = mtcars)
 28 | texreg(fit, include.ci = FALSE)
 29 | ```
 30 | 
 31 | ## modelsummary
 32 | 
 33 | `modelsummary` operates directly on `lm_robust` objects:
 34 | 
 35 | ```{r, message = FALSE, eval = FALSE}
 36 | library(modelsummary)
 37 | fit1 <- lm(mpg ~ disp, data = mtcars)
 38 | fit2 <- lm(mpg ~ hp, data = mtcars)
 39 | modelsummary(list(fit1, fit2))
 40 | ```
 41 | 
 42 | To learn how to customize the output table and/or statistics, please consult the `modelsummary` README file on Github: https://github.com/vincentarelbundock/modelsummary
 43 | 
 44 | ## Stargazer
 45 | 
 46 | Stargazer has to be *tricked* -- there's unfortunately no way around this unless something in the stargazer package changes. The maintainer has indicated that he has no active plans to update stargazer in the future. First, run the regression using `lm`. Then you can pass the `lm` fits to `starprep` a function which transforms the `lm` fits into `lm_robust` fits and then prepare the appropriate statistic you requested. The `starprep` function defaults to returning standard errors and uses `lm_robust` defaults for standard errors (robust HC2 SEs are the default in both `lm_robust`).  Then you pass the `lm` fit to stargazer and then pass `starprep(fit)` to the `se` argument in `stargazer`. This process works great with multiple fits.
 47 | 
 48 | ```{r, message = FALSE, eval = FALSE}
 49 | library(stargazer)
 50 | fit_1 <- lm(mpg ~ disp, data = mtcars)
 51 | fit_2 <- lm(mpg ~ hp, data = mtcars)
 52 | stargazer(fit_1, fit_2, se = starprep(fit_1, fit_2))
 53 | ```
 54 | ```{r, message = FALSE, echo = FALSE}
 55 | library(stargazer)
 56 | fit_1 <- lm(mpg ~ disp, data = mtcars)
 57 | fit_2 <- lm(mpg ~ hp, data = mtcars)
 58 | stargazer(fit_1, fit_2, se = starprep(fit_1, fit_2), header = FALSE)
 59 | ```
 60 | 
 61 | Below are some more examples, although we hide the output for brevity.
 62 | 
 63 | ```{r, eval = FALSE}
 64 | # Can also specify clusters and standard error type
 65 | stargazer(
 66 |   fit_1, fit_2,
 67 |   se = starprep(fit_1, fit_2, clusters = mtcars$cyl, se_type = "stata")
 68 | )
 69 | 
 70 | # Can also precompute robust objects to save computation time
 71 | # using `commarobust`
 72 | fit_1_r <- commarobust(fit_1)
 73 | fit_2_r <- commarobust(fit_2)
 74 | stargazer(fit_1, fit_2,
 75 |           se = starprep(fit_1_r, fit_2_r),
 76 |           p = starprep(fit_1_r, fit_2_r, stat = "p.value"))
 77 | 
 78 | # can also easily get robust confidence intervals
 79 | stargazer(fit_1, fit_2,
 80 |           ci.custom = starprep(fit_1_r, fit_2_r, stat = "ci"))
 81 | ```
 82 | 
 83 | ## xtable
 84 | 
 85 | `xtable` works directly on a data.frame, so we just have to prep the `lm_robust` output with the `tidy` function.
 86 | 
 87 | ```{r}
 88 | library(xtable)
 89 | fit <- lm_robust(mpg ~ disp, data = mtcars)
 90 | fit %>% tidy %>% xtable()
 91 | ```
 92 | 
 93 | ## huxtable
 94 | 
 95 | `huxtable`, too, works on a data.frame, so all we have to do is prep with `tidy`.
 96 | 
 97 | ```{r, message=FALSE}
 98 | library(huxtable)
 99 | fit <- lm_robust(mpg ~ disp, data = mtcars)
100 | fit %>% tidy %>% hux() %>% print_latex()
101 | ```
102 | 
103 | 
104 | 


--------------------------------------------------------------------------------
/vignettes/simulations-debias-dim.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DeclareDesign/estimatr/10a18f124ee96e133993a9a6a68ae678b7d8d051/vignettes/simulations-debias-dim.rda


--------------------------------------------------------------------------------
/vignettes/simulations-ols-var.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DeclareDesign/estimatr/10a18f124ee96e133993a9a6a68ae678b7d8d051/vignettes/simulations-ols-var.rda


--------------------------------------------------------------------------------