├── templates ├── README.docx ├── README.pdf └── README.tex ├── assets ├── warningSmall.png ├── project-number.png ├── project-submit.png ├── publish-button.png ├── aearct-doi-citation.png ├── aer_programs_by_year.png ├── change-status-button.png ├── icpsr-start-process.png ├── upload-import-icpsr.png ├── project-data-type-icpsr.png ├── project-license-icpsr.png ├── project-metadata-icpsr.png ├── project-related-icpsr.png ├── recall-submission-icpsr.png ├── thumbnail_website_logo.jpg ├── figure_software_years_pct.png ├── project-description-icpsr.png ├── project-methodology-icpsr.png ├── project-related-icpsr-modal1.png ├── project-related-icpsr-modal2.png ├── project-related-icpsr-modal3.png ├── mermaid-diagram-20200713113652.png ├── project-scope-of-project-icpsr.png ├── warningSmall.svg └── css │ └── style.scss ├── code ├── codebook_autos.pdf ├── 02_codebook_plaintext.do ├── 01_codebook_fancy.do ├── 03_codebook_dataMaid.Rmd ├── 04_codebook_SAS.Rmd ├── 03_codebook_dataMaid.md ├── 02_codebook_plaintext.txt ├── 02_codebook_plaintext.md └── 01_codebook_fancy.md ├── data-deposit-aea-guidance.md ├── citations ├── guidance_data_citations.pdf ├── guidance_data_citations_biblatex.pdf ├── chicago-authordate-aea.tex ├── chicago-authordate-aea.sty ├── paper.bib ├── guidance_data_citations.tex ├── guidance_data_citations_biblatex.tex └── references.bib ├── sample-language-report.md ├── .gitignore ├── 2021-01-05-Answers-to-questions.md ├── addtl-data-citation-guidance.md ├── _includes ├── citation-block.html ├── header.html ├── footer.html └── toc.html ├── _config.yml ├── disclaimer.md ├── template-README.html ├── third-party-request.md ├── protocol-3rd-party-replication.md ├── step-by-step.md ├── template_inquiry_about_DOI_by_editor.md ├── _layouts ├── redirect.html ├── default.html └── withtoc.html ├── sample-report-3rd-1.md ├── Licensing_guidance.md ├── updating-data-deposit.md ├── sample-report.md ├── sharing-restricted-data.md ├── README.md ├── preparing-for-data-deposit.md ├── data-deposit-aea.md ├── FAQ.md ├── LICENSE.md └── LICENSE-template.md /templates/README.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AEADataEditor/aea-de-guidance/HEAD/templates/README.docx -------------------------------------------------------------------------------- /templates/README.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AEADataEditor/aea-de-guidance/HEAD/templates/README.pdf -------------------------------------------------------------------------------- /assets/warningSmall.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AEADataEditor/aea-de-guidance/HEAD/assets/warningSmall.png -------------------------------------------------------------------------------- /code/codebook_autos.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AEADataEditor/aea-de-guidance/HEAD/code/codebook_autos.pdf -------------------------------------------------------------------------------- /assets/project-number.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AEADataEditor/aea-de-guidance/HEAD/assets/project-number.png -------------------------------------------------------------------------------- /assets/project-submit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AEADataEditor/aea-de-guidance/HEAD/assets/project-submit.png -------------------------------------------------------------------------------- /assets/publish-button.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AEADataEditor/aea-de-guidance/HEAD/assets/publish-button.png -------------------------------------------------------------------------------- /assets/aearct-doi-citation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AEADataEditor/aea-de-guidance/HEAD/assets/aearct-doi-citation.png -------------------------------------------------------------------------------- /assets/aer_programs_by_year.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AEADataEditor/aea-de-guidance/HEAD/assets/aer_programs_by_year.png -------------------------------------------------------------------------------- /assets/change-status-button.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AEADataEditor/aea-de-guidance/HEAD/assets/change-status-button.png -------------------------------------------------------------------------------- /assets/icpsr-start-process.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AEADataEditor/aea-de-guidance/HEAD/assets/icpsr-start-process.png -------------------------------------------------------------------------------- /assets/upload-import-icpsr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AEADataEditor/aea-de-guidance/HEAD/assets/upload-import-icpsr.png -------------------------------------------------------------------------------- /assets/project-data-type-icpsr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AEADataEditor/aea-de-guidance/HEAD/assets/project-data-type-icpsr.png -------------------------------------------------------------------------------- /assets/project-license-icpsr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AEADataEditor/aea-de-guidance/HEAD/assets/project-license-icpsr.png -------------------------------------------------------------------------------- /assets/project-metadata-icpsr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AEADataEditor/aea-de-guidance/HEAD/assets/project-metadata-icpsr.png -------------------------------------------------------------------------------- /assets/project-related-icpsr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AEADataEditor/aea-de-guidance/HEAD/assets/project-related-icpsr.png -------------------------------------------------------------------------------- /assets/recall-submission-icpsr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AEADataEditor/aea-de-guidance/HEAD/assets/recall-submission-icpsr.png -------------------------------------------------------------------------------- /assets/thumbnail_website_logo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AEADataEditor/aea-de-guidance/HEAD/assets/thumbnail_website_logo.jpg -------------------------------------------------------------------------------- /assets/figure_software_years_pct.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AEADataEditor/aea-de-guidance/HEAD/assets/figure_software_years_pct.png -------------------------------------------------------------------------------- /assets/project-description-icpsr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AEADataEditor/aea-de-guidance/HEAD/assets/project-description-icpsr.png -------------------------------------------------------------------------------- /assets/project-methodology-icpsr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AEADataEditor/aea-de-guidance/HEAD/assets/project-methodology-icpsr.png -------------------------------------------------------------------------------- /data-deposit-aea-guidance.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Data deposit guidance" 3 | layout: redirect 4 | redirect: data-deposit-aea.html 5 | --- 6 | -------------------------------------------------------------------------------- /assets/project-related-icpsr-modal1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AEADataEditor/aea-de-guidance/HEAD/assets/project-related-icpsr-modal1.png -------------------------------------------------------------------------------- /assets/project-related-icpsr-modal2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AEADataEditor/aea-de-guidance/HEAD/assets/project-related-icpsr-modal2.png -------------------------------------------------------------------------------- /assets/project-related-icpsr-modal3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AEADataEditor/aea-de-guidance/HEAD/assets/project-related-icpsr-modal3.png -------------------------------------------------------------------------------- /citations/guidance_data_citations.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AEADataEditor/aea-de-guidance/HEAD/citations/guidance_data_citations.pdf -------------------------------------------------------------------------------- /assets/mermaid-diagram-20200713113652.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AEADataEditor/aea-de-guidance/HEAD/assets/mermaid-diagram-20200713113652.png -------------------------------------------------------------------------------- /assets/project-scope-of-project-icpsr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AEADataEditor/aea-de-guidance/HEAD/assets/project-scope-of-project-icpsr.png -------------------------------------------------------------------------------- /citations/guidance_data_citations_biblatex.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AEADataEditor/aea-de-guidance/HEAD/citations/guidance_data_citations_biblatex.pdf -------------------------------------------------------------------------------- /sample-language-report.md: -------------------------------------------------------------------------------- 1 | This file has been moved [here](https://github.com/AEADataEditor/replication-template/blob/master/sample-language-report.md). Please adjust your links. -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | */ado/* 2 | */weaver*/* 3 | *.RData 4 | *.Rhistory 5 | *.aux 6 | *.bbl 7 | *.bcf 8 | *.blg 9 | *.log 10 | *.out 11 | *.run.xml 12 | *.synctex.gz 13 | .DS_Store 14 | -------------------------------------------------------------------------------- /2021-01-05-Answers-to-questions.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "A discussion about various things" 3 | layout: redirect 4 | redirect: https://aeadataeditor.github.io/posts/2021-01-05-answers-to-questions 5 | --- 6 | 7 | -------------------------------------------------------------------------------- /addtl-data-citation-guidance.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Additional data citation guidance" 3 | layout: redirect 4 | redirect: https://social-science-data-editors.github.io/guidance/addtl-data-citation-guidance.html 5 | --- 6 | -------------------------------------------------------------------------------- /assets/warningSmall.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /_includes/citation-block.html: -------------------------------------------------------------------------------- 1 |

2 | Cite this page as: Vilhuber, Lars. . "{{ page.title }}". {{ site.title }}. Accessed at on . 3 | 4 |

5 | -------------------------------------------------------------------------------- /_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-minimal 2 | title: AEA Data and Code Guidance 3 | description: Guidance for authors wishing to create data and code supplements, and for replicators. 4 | logo: assets/thumbnail_website_logo.jpg 5 | show_downloads: false 6 | -------------------------------------------------------------------------------- /disclaimer.md: -------------------------------------------------------------------------------- 1 | These pages provide *unofficial and preliminary* information as it is being developed the AEA Data Editor. 2 | No information should be construed as official policy of the American Economic Association. For official 3 | policy, please see https://www.aeaweb.org. -------------------------------------------------------------------------------- /template-README.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | This page has permanently moved, please 7 |

click here

. 8 | 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /citations/chicago-authordate-aea.tex: -------------------------------------------------------------------------------- 1 | % $Id: chicago-authordate-aea.sty,v 0.1 2018-10-03 18:06:54 vilhuber Exp $ 2 | % This is a biblatex style file, meant to correct the display of DOI 3 | 4 | %%%% change display of DOI fields 5 | 6 | \DeclareFieldFormat{doi}{% 7 | \ifhyperref 8 | {\href{https://doi.org/#1}{\nolinkurl{https://doi.org/#1}}} 9 | {\nolinkurl{https://doi.org/#1}}} 10 | -------------------------------------------------------------------------------- /_includes/header.html: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /assets/css/style.scss: -------------------------------------------------------------------------------- 1 | --- 2 | --- 3 | 4 | @import "{{ site.theme }}"; 5 | 6 | $base-color: #924046; 7 | a { 8 | color: $base-color; 9 | } 10 | 11 | a:hover, a:focus { 12 | color: $base-color; 13 | font-weight: normal; 14 | text-decoration: darken($base-color, 10% ) 15 | } 16 | 17 | body { 18 | color: #222222; 19 | } 20 | 21 | footer { 22 | color: #c5c2c2; 23 | } 24 | 25 | .footeremph { 26 | color: #222222; 27 | text-decoration: underline; 28 | } 29 | 30 | .wrapper { 31 | width: 1200px; 32 | } 33 | section { 34 | width: 800px; 35 | } -------------------------------------------------------------------------------- /citations/chicago-authordate-aea.sty: -------------------------------------------------------------------------------- 1 | % $Id: chicago-authordate-aea.sty,v 0.1 2018-10-03 18:06:54 vilhuber Exp $ 2 | % This is a biblatex style file, meant to correct the display of DOI 3 | 4 | \ProvidesPackage{chicago-authordate-aea}[2018-10-03 v 0.1 biblatex 5 | bibliography style] 6 | 7 | \RequirePackage{biblatex} 8 | 9 | 10 | %%%% change display of DOI fields 11 | 12 | \DeclareFieldFormat{doi}{% 13 | \ifhyperref 14 | {\href{https://doi.org/#1}{\nolinkurl{https://doi.org/#1}}} 15 | {\nolinkurl{https://doi.org/#1}}} 16 | 17 | \endinput 18 | -------------------------------------------------------------------------------- /_includes/footer.html: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /code/02_codebook_plaintext.do: -------------------------------------------------------------------------------- 1 | // Stata codebook example 2 | /*** 3 | # Codebook example for STATA 4 | 5 | The source code for this file is [here](02_codebook_plaintext.do). 6 | 7 | ## Simple example 8 | 9 | The following is [perfectly acceptable content](02_codebook_plaintext.txt), 10 | but not necessarily pretty to view. 11 | The core code only requires native commands. Note that it is important that 12 | output be to a plaintext log file, as SMCL (Stata's fancy log format) 13 | is not portable. 14 | ***/ 15 | capture close log 16 | sysdir set PLUS "./ado/" 17 | set more 1 18 | set linesize 147 19 | log using "02_codebook_plaintext.txt", replace text 20 | // not run: use my_input_data 21 | di 22 | sysuse auto 23 | /*==== File structure ====*/ 24 | describe 25 | /*==== Summary statistics ====*/ 26 | codebook 27 | qui log c 28 | // also translate this program 29 | // This requires a bit of setup, see 01_codebook_fancy.do 30 | // markdoc "02_codebook_plaintext.do", replace export(md) install 31 | -------------------------------------------------------------------------------- /third-party-request.md: -------------------------------------------------------------------------------- 1 | # Template to be sent to a third-party replication service 2 | 3 | ## Prepare email 4 | 5 | --- 6 | 7 | Dear XXX, 8 | 9 | Could I ask you to conduct a reproducibility check of a manuscript for us? If yes, please prepare a reproducibility report (see below) for the following manuscript, code, and data: 10 | 11 | > Manuscript: Attached. 12 | 13 | > README: Attached. 14 | 15 | > Code and data: (URL) 16 | 17 | Instructions on [how to access pre-publication openICPSR](https://github.com/labordynamicsinstitute/replicability-training/blob/master/openICPSR_training.md) are available. 18 | 19 | Please file your report via reply-all to this email, within the next 10 days. 20 | 21 | Thank you very much in advance. 22 | 23 | --- 24 | 25 | ## Steps for journal 26 | 27 | - [ ] Share openICPSR deposit with the email on file for the replication service 28 | - [ ] Mark the JIRA ticket as outsourced 29 | - [ ] Use the subject line [[AEAREP-XXX] Third party verification request [MANUSCRIPT NUMBER]](mailto:somebody@here.com?subject=[AEAREP-XXX]%20Third%20party%20verification%20request%20[MANUSCRIPT%20NUMBER]) 30 | - [ ] Attach the manuscript and the README provided by the authors 31 | - [ ] CC dataeditor-queue@aeapubs.org 32 | -------------------------------------------------------------------------------- /code/01_codebook_fancy.do: -------------------------------------------------------------------------------- 1 | // Stata codebook example 2 | /*** 3 | # Codebook example for STATA 4 | The source code for this file is [here](01_codebook_fancy.do). 5 | 6 | ## Prettier example 7 | This is a bit more involved example, but illustrates the core code. 8 | 9 | ## Requirements 10 | This fancier example leverages the [`markdown`](http://haghish.com/statistics/stata-blog/reproducible-research/markdoc.php) package as of 2018-10-01. 11 | Here we install it locally to this project. 12 | ***/ 13 | set more 1 14 | set linesize 147 15 | qui shell mkdir ado 16 | sysdir set PLUS "./ado/" 17 | // The following lines need to be run the first time this code is run (not ideal) 18 | // not run: net install github, from("https://haghish.github.io/github/") 19 | // not run: github install haghish/markdoc 20 | // not run: markdoc, install 21 | 22 | /*** 23 | Once the markdoc package is installed, we can create marginally fancier 24 | codebooks as well (see [the output](01_codebook_fancy.md)). 25 | 26 | ## Fancy introduction 27 | For instance we could write a fancy introduction here. 28 | 29 | ## File structure 30 | We can now describe the file structure. 31 | ***/ 32 | sysuse auto 33 | describe 34 | /*** 35 | ## Summary statistics 36 | ***/ 37 | codebook 38 | // also translate this program 39 | // markdoc "01_codebook_fancy.do", replace export(md) install 40 | -------------------------------------------------------------------------------- /protocol-3rd-party-replication.md: -------------------------------------------------------------------------------- 1 | # Protocol for Third-party Verifications 2 | 3 | The AEA's [Protocol for Third-party Verifications](https://www.aeaweb.org/journals/data/policy-third-party) is available on the [AEA website](https://www.aeaweb.org/journals/data/policy-third-party). 4 | 5 | > Alternate protocols are possible, but should be verified with the AEA Data Editor prior to engaging any resources. 6 | 7 | 8 | ## Template email 9 | > Dear [REPLICATOR], 10 | > 11 | > I would like to ask you to conduct a reproducibility verification for the American Economic Association. You have been identified as somebody with access to the right data and resources to conduct this important part of the AEA's publication process. If you are willing, please prepare a reproducibility report (see below) for the following manuscript, code, and data: 12 | > 13 | > Manuscript: Attached. 14 | > 15 | > README: Attached. 16 | > 17 | > Code and data: [CODELOCATION] 18 | > 19 | > Instructions on how to [access pre-publication openICPSR deposits](https://labordynamicsinstitute.github.io/replicability-training-curriculum/using-openicpsr-projects-prior-to-publication.html) are available. 20 | > 21 | > While you conduct the reproducibility checks, I ask that you not communicate with the authors on this topic. Please let us know if you believe you have any conflicts of interest in this matter which would prevent you from conducting an objective check on the reproducibility of the archive. 22 | > 23 | > Please file your report via reply-all to this email, within the next 10 days. 24 | > 25 | > Thank you very much in advance. -------------------------------------------------------------------------------- /step-by-step.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Step by step guidance 3 | layout: withtoc 4 | --- 5 | 6 | The following steps outline what you should expect after conditional acceptance of your manuscript, in compliance with the [AEA Data and Code Availability Policy](https://www.aeaweb.org/journals/policies/data-code): 7 | 8 | 1. [Prepare your data and code replication package](preparing-for-data-deposit.md) (including data citations and provenance information) 9 | 2. [Provide metadata and upload the replication package](data-deposit-aea.md), for verification and subsequently publication. 10 | 3. Submit the [Data and Code Availability Form](https://www.aeaweb.org/journals/forms/data-code-availability) together with your manuscript native files as instructed, and as per guidelines at your journal (for example, [AER guidelines](https://www.aeaweb.org/journals/aer/submissions/accepted-articles/styleguide)). 11 | 4. The editorial office assigns the manuscript to the AEA Data Editor. 12 | 5. The AEA Data Editor team downloads materials, [conducts reproducibility checks](https://social-science-data-editors.github.io/guidance/Verification_guidance.html) , writes [report](https://github.com/AEADataEditor/replication-template/blob/master/REPLICATION.md). 13 | 6. The report is communicated to the editorial office and the Editor of the journal. 14 | - If accepted, the manuscript is copy-edited, and published together with the data deposit as provided by the author. 15 | - If changes need to be made, the report is communicated to the authors, who make changes, until the replication package is accepted. 16 | 17 | 18 | ![Diagram](assets/mermaid-diagram-20200713113652.png) 19 | -------------------------------------------------------------------------------- /template_inquiry_about_DOI_by_editor.md: -------------------------------------------------------------------------------- 1 | (This template can be used by the Data Editor to contact data providers) 2 | 3 | Dear [name], 4 | 5 | the American Economic Association has a data and code availability policy that applies to researchers who submit articles 6 | for publication to our journals. Inter alia, this policy requests that authors provide accurate and persistent information 7 | about how other researchers can find and access data, whether that data be created by the authors or by third parties. Elements 8 | of such information are persistent URLs or Digital Object Identifiers (DOI), description of access conditions, and terms of use. 9 | 10 | One of our authors has used your data, and we are inquiring about any such information you could provide either to us or to 11 | authors upon request. We are looking for 12 | - DOI for specific datasets or files 13 | - URLs or DOI pointing to licenses (or, if the files are license-free, a statement to that effect) 14 | - URLs or DOI pointing to terms of use - the conditions under which researchers may use and possibly republish the data 15 | (this may be the same as the license) 16 | - URLs, DOI, or information regarding how long you will maintain the datasets you make available 17 | If you have such information available, we would be grateful if you could point us in the right direction. If you do not yet have 18 | such information, any plans you might have would be of interest to us. 19 | 20 | This is a new initiative at the AEA, and we understand that historically, much of what we are looking for above was not part of 21 | standard practice in our research community. We are trying to improve the overall transparency and thus reproducible of economics, 22 | and this is part of that effort. 23 | 24 | I would be available to discuss this further with you and your institution. 25 | 26 | Sincerely, 27 | 28 | Data Editor of the American Economic Association 29 | -------------------------------------------------------------------------------- /code/03_codebook_dataMaid.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Example codebook in R" 3 | author: "Lars Vilhuber" 4 | date: "October 1, 2018" 5 | output: 6 | html_document: 7 | keep_md: yes 8 | --- 9 | 10 | ```{r setup, include=FALSE} 11 | knitr::opts_chunk$set(echo = TRUE) 12 | packages <- c("dataMaid") 13 | ``` 14 | 15 | ## Pretty example 16 | 17 | There are a variety of ways one can describe data in R, but one of the more convenient ways is to use the [`dataMaid`](https://cran.r-project.org/web/packages/dataMaid/index.html) package on CRAN. 18 | 19 | ### Installing dataMaid 20 | `dataMaid` is on CRAN, so we install it from there. 21 | ```{r install} 22 | # from https://www.r-bloggers.com/loading-andor-installing-packages-programmatically/ 23 | is_installed <- function(mypkg) is.element(mypkg, installed.packages()[,1]) 24 | load_or_install<-function(package_names) 25 | { 26 | for(package_name in package_names) 27 | { 28 | if(!is_installed(package_name)) 29 | { 30 | install.packages(package_name,repos="http://lib.stat.cmu.edu/R/CRAN") 31 | } 32 | library(package_name,character.only=TRUE,quietly=TRUE,verbose=FALSE) 33 | } 34 | } 35 | # calling our two functions: 36 | load_or_install(packages) 37 | ``` 38 | 39 | Then we can easily load it, and use the `makeCodebook()` command. 40 | ```{r sample,message=FALSE} 41 | library(foreign) 42 | # we use the same dataset as for the Stata example 43 | autos <- read.dta("http://www.stata-press.com/data/r9/auto.dta") 44 | library(dataMaid) 45 | makeDataReport(autos, 46 | replace=TRUE, 47 | openResult = FALSE, 48 | codebook = TRUE, 49 | file="codebook_autos.Rmd") 50 | ``` 51 | 52 | The codebook will be called [codebook_autos.pdf](codebook_autos.pdf), but the intermediate RMarkdown file [codebook_autos.Rmd](codebook_autos.Rmd) can also be tweaked. 53 | 54 | ### References 55 | ```{r,results='asis'} 56 | citation(package="dataMaid") 57 | ``` 58 | 59 | -------------------------------------------------------------------------------- /_layouts/redirect.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 |
13 |
14 | {% if site.logo %} 15 | Logo 16 | {% endif %} 17 |

{{ site.title | default: site.github.repository_name }}

18 | 19 |

{{ site.description | default: site.github.project_tagline }}

20 | 21 | {% if site.github.is_project_page %} 22 | 23 | {% endif %} 24 | 25 | {% if site.github.is_user_page %} 26 |

View My GitHub Profile

27 | {% endif %} 28 | {% if site.show_downloads %} 29 | 34 | {% endif %} 35 |
36 |
37 | 38 | This page has permanently moved, please 39 |

click here

. 40 | 41 | 42 | 43 |
44 | 47 |
48 | 49 | 50 | 51 | -------------------------------------------------------------------------------- /code/04_codebook_SAS.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Example codebook in SAS" 3 | author: "Lars Vilhuber" 4 | date: "October 1, 2018" 5 | output: 6 | html_document: 7 | keep_md: yes 8 | --- 9 | 10 | ```{r setup, include=FALSE} 11 | knitr::opts_chunk$set(echo = TRUE) 12 | switch(Sys.info()[['sysname']], 13 | Windows= {saspath <- "C:/Program Files/SASHome/SASFoundation/9.4/sas.exe"}, 14 | Linux = {saspath <- system("which sas", intern=TRUE)}, 15 | Darwin = {saspath <- "/Applications/SAS/fixme"}) 16 | sasopts <- " -ls 75" 17 | packages <- c("SASmarkdown") 18 | 19 | ``` 20 | 21 | ```{r install,echo=FALSE,message=FALSE} 22 | # from https://www.r-bloggers.com/loading-andor-installing-packages-programmatically/ 23 | is_installed <- function(mypkg) is.element(mypkg, installed.packages()[,1]) 24 | load_or_install<-function(package_names) 25 | { 26 | for(package_name in package_names) 27 | { 28 | if(!is_installed(package_name)) 29 | { 30 | install.packages(package_name,repos="http://lib.stat.cmu.edu/R/CRAN") 31 | } 32 | library(package_name,character.only=TRUE,quietly=TRUE,verbose=FALSE) 33 | } 34 | } 35 | # calling our two functions: 36 | load_or_install(packages) 37 | knitr::opts_chunk$set(engine.path=saspath, 38 | engine.opts=sasopts, comment="") 39 | ``` 40 | 41 | ## Get the auto.dta file 42 | ```{r, engine='sashtml', engine.path=saspath, engine.opts=sasopts, comment=NA, results='asis'} 43 | /* this next step is a kludge */ 44 | /* options SSLCALISTLOC="/usr/lib64/R/library/RCurl/CurlSSL/cacert.pem"; 45 | proc http method="get" out=auto url="http://www.stata-press.com/data/r9/auto.dta"; 46 | run;*/ 47 | x wget -O auto.dta "http://www.stata-press.com/data/r9/auto.dta" 2>/dev/null 1>&2; 48 | 49 | 50 | ``` 51 | 52 | ## Create the codebook 53 | ```{r, engine='sashtml', engine.path=saspath, engine.opts=sasopts, comment=NA, results='asis'} 54 | filename auto "auto.dta"; 55 | proc import datafile=auto out=auto dbms=dta; 56 | run; 57 | proc contents; 58 | proc means; 59 | run; 60 | 61 | ``` 62 | 63 | ## References 64 | - “SAS Using R Markdown (Windows).” n.d. Accessed October 5, 2018. https://www.ssc.wisc.edu/~hemken/SASworkshops/Markdown/SASmarkdown.html. 65 | -------------------------------------------------------------------------------- /sample-report-3rd-1.md: -------------------------------------------------------------------------------- 1 | # Code Verification - Restricted Access 2 | The following report was provided by a third party verifier, who conducted the verification on behalf of the AEA Data Editor. 3 | 4 | 1) Edits to run_all.do 5 | 6 | a) Change “`file name.do`” to “`file_name.do`” 7 | 8 | b) Change “`rdabc_analysis.do`” to “`rd_abc_analysis.do`” 9 | 10 | 2) Edits to `rd_abc_analysis.do` 11 | 12 | a) Change `${prog_dir}sumstats.do` to `“{prog_dir}/sumstats.do”` (add 13 | quotation marks and forward slash “`/`” (all platforms) 14 | 15 | > Note by DATA EDITOR: please use forward slash throughout and consistently, this works on any platform in Stata 16 | 17 | 18 | 4) Tables 19 | 20 | - Table 1: In the directory for logs, I did not find the file “`sumstats_all.log`” as it states in the readme file. I found `sumstats_rdabc.log`, `sumstats_rd.log` and 21 | `sumstats_rkd.log`. I was able to find all values for the variables listed under the RD Sample column in Table 1 though the section order of the output in the log file is somewhat different. 22 | - Table 2: See attached file “`t2_covariates.xlsx`” and the areas that are highlighted in yellow that are different from the manuscript. The differences are not numerous and are minor and do not change the interpretation. (Image provided) 23 | - Table 3: All numbers from “`t3_cross_section.xlsx`” match manuscript. 24 | - Table 4: See attached file “`t4_heterogeneity.xlsx`” and the areas that are highlighted in yellow. Differences mostly in the point estimates at the 3rd decimal place with some differences in standard errors as well. 25 | - Table 5: See attached file “`t5_otherdata.xlsx`” and the areas that are highlighted in yellow. Minor differences driven by sample size changes. 26 | - Table 6: All numbers in “`table6.xlsx`” match manuscript. 27 | - Table A.1: All numbers in “`tablea1.xlsx`” match manuscript. 28 | - Table A.2: All numbers in “`tablea2.xlsx`” match manuscript. 29 | - Table A.4: All numbers needed to match the manuscript were found in `sumstats_rd.log`, `sumstats_rdabc.log` and `sumstats_rkd.log`. 30 | 31 | > NOTE from AEA Data Editor: This is normally a failure - we expect to have clearly marked output enter a table. 32 | 33 | 5) Figures: I reviewed all the generated PDF files that contained all figures and they visually matched to the manuscript. 34 | -------------------------------------------------------------------------------- /code/03_codebook_dataMaid.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Example codebook in R" 3 | author: "Lars Vilhuber" 4 | date: "October 1, 2018" 5 | output: 6 | html_document: 7 | keep_md: yes 8 | --- 9 | 10 | 11 | 12 | ## Pretty example 13 | 14 | There are a variety of ways one can describe data in R, but one of the more convenient ways is to use the [`dataMaid`](https://cran.r-project.org/web/packages/dataMaid/index.html) package on CRAN. 15 | 16 | ### Installing dataMaid 17 | `dataMaid` is on CRAN, so we install it from there. 18 | 19 | ```r 20 | # from https://www.r-bloggers.com/loading-andor-installing-packages-programmatically/ 21 | is_installed <- function(mypkg) is.element(mypkg, installed.packages()[,1]) 22 | load_or_install<-function(package_names) 23 | { 24 | for(package_name in package_names) 25 | { 26 | if(!is_installed(package_name)) 27 | { 28 | install.packages(package_name,repos="http://lib.stat.cmu.edu/R/CRAN") 29 | } 30 | library(package_name,character.only=TRUE,quietly=TRUE,verbose=FALSE) 31 | } 32 | } 33 | # calling our two functions: 34 | load_or_install(packages) 35 | ``` 36 | 37 | Then we can easily load it, and use the `makeCodebook()` command. 38 | 39 | ```r 40 | library(foreign) 41 | # we use the same dataset as for the Stata example 42 | autos <- read.dta("http://www.stata-press.com/data/r9/auto.dta") 43 | library(dataMaid) 44 | makeDataReport(autos, 45 | replace=TRUE, 46 | openResult = FALSE, 47 | codebook = TRUE, 48 | file="codebook_autos.Rmd") 49 | ``` 50 | 51 | The codebook will be called [codebook_autos.pdf](codebook_autos.pdf), but the intermediate RMarkdown file [codebook_autos.Rmd](codebook_autos.Rmd) can also be tweaked. 52 | 53 | ### References 54 | 55 | ```r 56 | citation(package="dataMaid") 57 | ``` 58 | 59 | 60 | To cite package 'dataMaid' in publications use: 61 | 62 | Anne Helby Petersen and Claus Thorn Ekstrøm (2018). dataMaid: A 63 | Suite of Checks for Identification of Potential Errors in a Data 64 | Frame as Part of the Data Screening Process. R package version 65 | 1.1.2. https://CRAN.R-project.org/package=dataMaid 66 | 67 | A BibTeX entry for LaTeX users is 68 | 69 | @Manual{, 70 | title = {dataMaid: A Suite of Checks for Identification of Potential Errors in a 71 | Data Frame as Part of the Data Screening Process}, 72 | author = {Anne Helby Petersen and Claus Thorn Ekstrøm}, 73 | year = {2018}, 74 | note = {R package version 1.1.2}, 75 | url = {https://CRAN.R-project.org/package=dataMaid}, 76 | } 77 | 78 | -------------------------------------------------------------------------------- /_layouts/default.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | {% seo %} 9 | 10 | 13 | {% include header.html %} 14 | 15 | 16 | 17 |
18 |
19 |

{{ site.title | default: site.github.repository_name }}

20 | 21 | {% if site.logo %} 22 | Logo 23 | {% endif %} 24 | 25 |

{{ site.description | default: site.github.project_tagline }}

26 | 27 | {% if site.github.is_project_page %} 28 | 29 | {% endif %} 30 | 31 | {% if site.github.is_user_page %} 32 |

View My GitHub Profile

33 | {% endif %} 34 | 35 | {% if site.show_downloads %} 36 | 41 | {% endif %} 42 |
43 |
44 | 45 | {{ content }} 46 | 47 |
48 | {% include footer.html %} 49 |
50 | 51 | {% if site.google_analytics %} 52 | 60 | {% endif %} 61 | 64 | 65 | 66 | -------------------------------------------------------------------------------- /templates/README.tex: -------------------------------------------------------------------------------- 1 | % Options for packages loaded elsewhere 2 | \PassOptionsToPackage{unicode}{hyperref} 3 | \PassOptionsToPackage{hyphens}{url} 4 | % 5 | \documentclass[ 6 | ]{article} 7 | \usepackage{lmodern} 8 | \usepackage{amssymb,amsmath} 9 | \usepackage{ifxetex,ifluatex} 10 | \ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex 11 | \usepackage[T1]{fontenc} 12 | \usepackage[utf8]{inputenc} 13 | \usepackage{textcomp} % provide euro and other symbols 14 | \else % if luatex or xetex 15 | \usepackage{unicode-math} 16 | \defaultfontfeatures{Scale=MatchLowercase} 17 | \defaultfontfeatures[\rmfamily]{Ligatures=TeX,Scale=1} 18 | \fi 19 | % Use upquote if available, for straight quotes in verbatim environments 20 | \IfFileExists{upquote.sty}{\usepackage{upquote}}{} 21 | \IfFileExists{microtype.sty}{% use microtype if available 22 | \usepackage[]{microtype} 23 | \UseMicrotypeSet[protrusion]{basicmath} % disable protrusion for tt fonts 24 | }{} 25 | \makeatletter 26 | \@ifundefined{KOMAClassName}{% if non-KOMA class 27 | \IfFileExists{parskip.sty}{% 28 | \usepackage{parskip} 29 | }{% else 30 | \setlength{\parindent}{0pt} 31 | \setlength{\parskip}{6pt plus 2pt minus 1pt}} 32 | }{% if KOMA class 33 | \KOMAoptions{parskip=half}} 34 | \makeatother 35 | \usepackage{xcolor} 36 | \IfFileExists{xurl.sty}{\usepackage{xurl}}{} % add URL line breaks if available 37 | \IfFileExists{bookmark.sty}{\usepackage{bookmark}}{\usepackage{hyperref}} 38 | \hypersetup{ 39 | hidelinks, 40 | pdfcreator={LaTeX via pandoc}} 41 | \urlstyle{same} % disable monospaced font for URLs 42 | \usepackage{longtable,booktabs} 43 | % Correct order of tables after \paragraph or \subparagraph 44 | \usepackage{etoolbox} 45 | \makeatletter 46 | \patchcmd\longtable{\par}{\if@noskipsec\mbox{}\fi\par}{}{} 47 | \makeatother 48 | % Allow footnotes in longtable head/foot 49 | \IfFileExists{footnotehyper.sty}{\usepackage{footnotehyper}}{\usepackage{footnote}} 50 | \makesavenoteenv{longtable} 51 | \setlength{\emergencystretch}{3em} % prevent overfull lines 52 | \providecommand{\tightlist}{% 53 | \setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}} 54 | \setcounter{secnumdepth}{-\maxdimen} % remove section numbering 55 | 56 | \author{} 57 | \date{} 58 | 59 | \begin{document} 60 | 61 | \hypertarget{template-readme-and-guidance}{% 62 | \section{Template README and 63 | Guidance}\label{template-readme-and-guidance}} 64 | 65 | \begin{quote} 66 | As of 2020-05-22, the AEA-specific README has been superseded by a README endorsed by multiple data editors in economics. Please see 67 | \url{https://social-science-data-editors.github.io/guidance/template-README.html} for the latest version. 68 | \end{quote} 69 | 70 | 71 | 72 | \end{document} 73 | -------------------------------------------------------------------------------- /_layouts/withtoc.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | {% seo %} 9 | 10 | 13 | 14 | {% include header.html %} 15 | 16 | 17 | 18 |
19 |
20 |

{{ site.title | default: site.github.repository_name }}

21 | 22 | {% if site.logo %} 23 | Logo 24 | {% endif %} 25 | 26 |

{{ site.description | default: site.github.project_tagline }}

27 | 28 | {% if site.github.is_project_page %} 29 | 30 | {% endif %} 31 | 32 | {% if site.github.is_user_page %} 33 |

View My GitHub Profile

34 | {% endif %} 35 | 36 | {% if site.show_downloads %} 37 | 42 | {% endif %} 43 |
44 |
45 | {% if page.title %} 46 |

{{ page.title }}

47 | {% endif %} 48 |
49 | On this page: 50 | {% include toc.html html=content sanitize=true class="inline_toc" id="my_toc" h_min=3 h_max=3 %} 51 |
52 | {{ content }} 53 | 54 |
55 | {% include footer.html %} 56 |
57 | 58 | {% if site.google_analytics %} 59 | 67 | {% endif %} 68 | 71 | 72 | 73 | -------------------------------------------------------------------------------- /Licensing_guidance.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: withtoc 3 | title: Licensing Guidance 4 | --- 5 | 6 | > This site does not provide legal guidance. The information below is provided for discussion and as a suggestion only. Authors should consult with a qualified party, such as a university counsel or a lawyer, as appropriate. 7 | 8 | 9 | When depositing in a Code and Data Repository, a license needs to be chosen. If not chosen through the menu, a license file (typically named `LICENSE.txt` or `LICENSE.md`) needs to be provided. 10 | 11 | ### Some guidance on licensing of databases and software: 12 | 13 | See the [Social Science Data Editors' Licensing Guidance](https://social-science-data-editors.github.io/guidance/Licensing_guidance.html) and resources linked therein for more extensive guidance. 14 | 15 | ### Openness required 16 | 17 | For the purpose of replicability, the AEA will insist on an open license that allows for replication by researchers unconnected to the original parties, to the extent allowed by other agreements and the law. 18 | 19 | ### Data-only repositories 20 | 21 | We suggest the [![License: CC BY 4.0](https://img.shields.io/badge/License%20-CC%20BY%204.0-lightgrey.svg)](http://creativecommons.org/licenses/by/4.0/) [Creative Commons Attribution 4.0 International Public License](http://creativecommons.org/licenses/by/4.0/) or the more liberal 22 | [![License: CC0 1.0](https://img.shields.io/badge/License%20-CC0%201.0-lightgrey.svg)](https://creativecommons.org/publicdomain/zero/1.0/) [CC0 1.0 Universal (CC0 1.0) Public Domain Dedication]. 23 | 24 | At the AEA Data and Code Repository, [![License: CC BY 4.0](https://img.shields.io/badge/License%20-CC%20BY%204.0-lightgrey.svg)](http://creativecommons.org/licenses/by/4.0/) [Creative Commons Attribution 4.0 International Public License](http://creativecommons.org/licenses/by/4.0/) is the default license. 25 | 26 | ### Code-only repositories 27 | 28 | "Code" or "Software" is any statements or instructions to be used directly or indirectly in a computer in order to bring about a certain result, and may include interpretive, object or source code. The CC-BY license is [not recommended for software](https://creativecommons.org/faq/#Can_I_apply_a_Creative_Commons_license_to_software.3F). 29 | 30 | We suggest the [![License: Modified BSD](https://img.shields.io/badge/License-BSD-lightgrey.svg)](https://opensource.org/licenses/BSD-3-Clause) [3-Clause (Modified) BSD Clause](https://opensource.org/licenses/BSD-3-Clause), though other open-source licenses are equally acceptable (see [https://opensource.org/licenses](https://opensource.org/licenses)). 31 | 32 | 33 | ### Repositories with both Code and Data 34 | 35 | Many repositories contain both code and databases. In that case, the repository might contain files under different licenses. For instance, some components may come with more restrictive licenses (MIT License for software from third parties) or more lenient licenses (CC0 license for own code), with a third license for databases. 36 | 37 | #### Dual-license Example 38 | 39 | We provide an example of a dual-license setup, suitable for use by depositors to the AEA's Data and Code Repository (see [LICENSE-template](LICENSE-template.md)). It combines 40 | - [![License: Modified BSD](https://img.shields.io/badge/License-BSD-lightgrey.svg)](https://opensource.org/licenses/BSD-3-Clause) [Modified BSD License](https://opensource.org/licenses/BSD-3-Clause), applies to all code, scripts, programs, and SOFTWARE 41 | - [![License: CC BY 4.0](https://img.shields.io/badge/License%20-CC%20BY%204.0-lightgrey.svg)](http://creativecommons.org/licenses/by/4.0/) [Creative Commons Attribution 4.0 International Public License](https://creativecommons.org/licenses/by/4.0/), applies to databases, images, tables, text, and any other objects 42 | 43 | > NOTE: Authors must explicitly upload this license to their deposit, as `LICENSE.txt`, and choose the `Other license` option when publishing their repository. 44 | 45 | #### A different example 46 | 47 | A more complex implementation of the multi-license setup can be found on the [Social Science Data Editors website](https://social-science-data-editors.github.io/guidance/Licensing_guidance.html#dual-license-setup). -------------------------------------------------------------------------------- /citations/paper.bib: -------------------------------------------------------------------------------- 1 | @online{citation-machine, 2 | author = {Chegg}, 3 | title = {Citation Machine: Chicago Manual Of Style 17th Edition (Author Date) format citation generator for journal article}, 4 | howpublished = {\url{http://www.citationmachine.net/chicago-17-author-date}}, 5 | institution = {Citation Machine}, 6 | year = {2018}, 7 | urldate = {2018-10-02}, 8 | note = {(accessed on 2018-10-02)}, 9 | } 10 | 11 | @online{ChicagoManualofStyleChicagoManualStyle2018, 12 | timestamp = {2018-10-02T15:39:27Z}, 13 | title = {Author-Date: Sample Citations}, 14 | abstract = {Find it. Write it. Cite it.}, 15 | language = {en}, 16 | urldate = {2018-10-02}, 17 | note = {(accessed on 2018-10-02)}, 18 | url = {https://www.chicagomanualofstyle.org}, 19 | howpublished = {\url{https://www.chicagomanualofstyle.org/tools_citationguide/citation-guide-2.html}}, 20 | author = {{Chicago Manual of Style Online}}, 21 | year = {2018}, 22 | file = {Snapshot:/home/vilhuber/Zotero/storage/IVA5WXJV/citation-guide-2.html:text/html} 23 | } 24 | 25 | % 26 | % Old-style 27 | % 28 | 29 | 30 | @techreport{duflopande2006, 31 | author = {Esther Duflo and Rohini Pande}, 32 | year = 2006, 33 | title = {Dams, Poverty, Public Goods and Malaria Incidence in India}, 34 | institution = {Murray Research Archive}, 35 | type = {}, 36 | number = {Version V2, UNF:5:obNHHq1gtV400a4T+Xrp9g==}, 37 | note = {\url{http://hdl.handle.net/1902.1/IOJHHXOOLZ}} 38 | } 39 | 40 | @techreport{leiss1999, 41 | author = {Leiss, Amelia}, 42 | year = {1999}, 43 | title = {Arms Transfers to Developing Countries, 1945--1968}, 44 | institution = {Inter-University Consortium for Political and Social Research}, 45 | type = {}, 46 | number = {ICPSR05404-v1}, 47 | address = {Ann Arbor, MI}, 48 | note = {DOI: 10.3886/ICPSR05404 (accessed February 8, 2011).}, 49 | } 50 | 51 | % 52 | % New Style 53 | % 54 | @techreport{duflopande2006-new, 55 | author = {Esther Duflo and Rohini Pande}, 56 | year = 2006, 57 | title = {Dams, Poverty, Public Goods and Malaria Incidence in India}, 58 | institution = {Murray Research Archive}, 59 | type = {}, 60 | number = {Version V2, UNF:5:obNHHq1gtV400a4T+Xrp9g==}, 61 | url = {http://hdl.handle.net/1902.1/IOJHHXOOLZ} 62 | } 63 | 64 | @techreport{leiss1999-new, 65 | author = {Leiss, Amelia}, 66 | year = {1999}, 67 | title = {Arms Transfers to Developing Countries, 1945--1968}, 68 | institution = {Inter-University Consortium for Political and Social Research}, 69 | type = {}, 70 | urldate = {February 8, 2011}, 71 | number = {ICPSR05404-v1}, 72 | address = {Ann Arbor, MI}, 73 | doi = {10.3886/ICPSR05404}, 74 | } 75 | 76 | 77 | % 78 | % OTHER 79 | % 80 | 81 | @online{dataone-cite, 82 | author = {DataONE}, 83 | howpublished = {\url{https://www.dataone.org/citing-dataone}}, 84 | note = {(accessed on 2018-08-10)}, 85 | year = {2011}, 86 | title = {Data Citation and Attribution} 87 | } 88 | 89 | @online{dataone-l09, 90 | author = {DataONE}, 91 | title = {DataONE Tutorial on Data Citation}, 92 | howpublished = {\url{http://www.dataone.org/sites/all/documents/L09\_DataCitation.pptx}}, 93 | institution = {DataONE}, 94 | note = {(accessed on 2018-08-10)}, 95 | year = {2011}, 96 | urldate = {2018-05-22}, 97 | url = {http://www.dataone.org/sites/all/documents/L09_DataCitation.pptx}, 98 | 99 | } 100 | 101 | @online{icpsr-data-cite, 102 | author = {ICPSR}, 103 | howpublished = {\url{https://www.icpsr.umich.edu/icpsrweb/ICPSR/curation/citations.jsp}}, 104 | institution = {ICPSR}, 105 | note = {accessed on 2018-08-10}, 106 | year = {2018}, 107 | title = {Data Citations}, 108 | url = {https://www.icpsr.umich.edu/icpsrweb/ICPSR/curation/citations.jsp}, 109 | } 110 | 111 | 112 | @online{aeadatarefs, 113 | author = {{American Economic Association}}, 114 | title = {Sample References}, 115 | howpublished = {\url{https://www.aeaweb.org/journals/policies/sample-references}}, 116 | year = {2018}, 117 | note = {(accessed on 2018-08-10)}, 118 | } 119 | 120 | @online{force11declaration, 121 | author = "Martone, M. (ed.)", 122 | title = "Data Citation Synthesis Group: Joint Declaration of Data Citation Principles", 123 | journal = "FORCE11", 124 | doi = {10.25490/a97f-egyk}, 125 | url = {https://doi.org/10.25490/a97f-egyk}, 126 | year = 2014 127 | 128 | } 129 | -------------------------------------------------------------------------------- /updating-data-deposit.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: withtoc 3 | title: "Updating Materials available on the AEA Data and Code Repository" 4 | --- 5 | 6 | Once an article has been published in an [AEA journal](https://www.aeaweb.org/journals/), the associated code and data deposit will also have been published in the [AEA Data and Code Repository](https://www.openicpsr.org/openicpsr/search/aea/studies). Both publications are considered permanent. 7 | 8 | For a variety of reasons, it may become necessary or desirable to update the code and data deposit associated with a published manuscript. Reasons may include 9 | 10 | - the code or the data have been updated to more accurately reproduce the results in the manuscript 11 | - the code or instructions have been updated to more easily reproduce the results in the manuscript 12 | - data which previously could not be made available is now redistributable 13 | 14 | This document describes a few considerations and practical matters related to such an update. 15 | 16 | ## Important: No Replacement 17 | 18 | It is important to note that the previously published deposit will remain available. Each deposit has a version number (`V1`,`V2`, etc.). When updating a deposit, a new version is created. All versions connect to each other: If a user finds the `V1` version, there is an indication that a `V2` exists, and vice-versa. 19 | 20 | We are still exploring how to accurately reflect such updates on the article landing page at the [AEA journals](https://www.aeaweb.org/journals/) website. 21 | 22 | ## Prerequisites: Deposit Ownership 23 | 24 | If the materials were deposited with the AEA prior to the [announcement](https://www.aeaweb.org/news/member-announcements-july-16-2019) of the [2019 Data and Code Availability Policy](https://www.aeaweb.org/journals/policies/data-code), the "owner" of the deposit is the AEA Data Editor. When updating the deposit, authors should [request that the AEA Data Editor share the deposit with them](mailto:dataeditor@aeapubs.org?subject=Request%20for%20access%20to%20prior%20deposit). 25 | 26 | If the deposit was made after July 2019, chances are that one of the authors retains the ability to publish updates. No intervention by the AEA Data Editor is usually needed. 27 | 28 | Note that in all cases, the AEA Data Editor will need to review the materials provided. 29 | 30 | ## Identifying that the Deposit has been Updated 31 | 32 | All data and code deposits are required to have a README. For updates, the README should clearly state (within the first paragraph, or as a banner), that it was updated after publication. Suggested language: 33 | 34 | > The data and code in this deposit have been updated after publication of the article. For changes made, see below. 35 | 36 | After an introductory paragraph, a clearly identified section or paragraph called "Changes made" should also be added. This should briefly and succintly describe the changes made. Examples: 37 | 38 | > The code has been simplified, and better instructions provided. All results are the same. 39 | 40 | > Permission was obtained by the data provider to post additional data. Figures 5, 8, and 10 are now reproducible with this archive. 41 | 42 | ## Uploading Changes 43 | 44 | Authors should consult the generic [AEA Deposit Instructions](https://www.openicpsr.org/openicpsr/aea/deposit-instructions) and [supplementary guidance](data-deposit-aea-guidance.md). In particular, we encourage authors to update and enrich any metadata previously not entered, such as geographic scope and time periods covered by the data. 45 | 46 | - If replacing files, you will first need to delete the original file. 47 | - We strongly suggest not to replace anything that does not need replacing (surgical replacement rather than bulldozer replacement) 48 | - Do not upload ZIP files. All files need to be expanded. Authors can use the "Import from ZIP" functionality. OSX users should be aware of [this issue](https://aeadataeditor.github.io/aea-de-guidance/FAQ.html#what-is-that-__macosx-folder-which-seems-to-contain-a-second-copy-of-all-the--replication-files-i-am-not-sure-why-this-folder-exists). 49 | 50 | ## Publishing Changes 51 | 52 | When publishing a deposit, a new version number is automatically assigned. In addition, a `version title` can be provided. Suggested: 53 | 54 | > Post-publication update fixing various issues 55 | 56 | or 57 | 58 | > Post-publication update providing additional data 59 | 60 | ## Review 61 | 62 | The AEA Data Editor will review the metadata associated with the new version. However, we are not currently able to provide the same level of pre-publication verification of computational reproducibility afforded to new submissions, except in select cases. -------------------------------------------------------------------------------- /sample-report.md: -------------------------------------------------------------------------------- 1 | Key: [ISSUE-N] 2 | Replication report for [AUTHORS] "[TITLE]" 3 | 4 | # README file 5 | The Readme file was clear and concise, albeit very sparse. 6 | 7 | However, it does not identify the absence of certain data necessary to produce Appendix Tables A11-A13, 8 | nor how such data could be accessed. 9 | 10 | # Data 11 | The data used is the [Magnificent Microdata for Replication (MMR)] data. 12 | - There seems to be no information provided where to access the data (not in the README, not in Appendix A). 13 | - Summary statistics are presented in Table A1. 14 | - Datasets contain many variables that are not labeled. Several are quite intuitive (names are quasi-labels), others are not ("e1, m1") 15 | - Figures A11-A13 rely on raw confidential data. No information is provided as to who can access the raw data, 16 | where it can be accessed, etc. 17 | 18 | The data do not seem to be cited. 19 | 20 | SUGGESTION: Please add a data citation to the references, see https://www.aeaweb.org/journals/policies/sample-references for guidance. 21 | 22 | SUGGESTION: Please add information on how and where both the more public data and the raw data can be accessed. 23 | 24 | 25 | # Programs 26 | 27 | ## General notes 28 | Numerous changes to path names were necessary in the individual programs in order to be able to run the code and produce output. 29 | 30 | SUGGESTION: It may be helpful to set up a configuration file and include it at the top of the master programs. 31 | 32 | Programs to generate Figures A11-A13 were not provided. Note that this is required by AEA Data Availability Policy. 33 | 34 | SUGGESTION: Please add the missing programs. 35 | 36 | 37 | ## Installation of Dependencies 38 | 39 | In addition to the installations mentioned in the ReadMe, I also had to install other software for the following programs to run: 40 | 41 | - table5.do required me to install the command "estout" 42 | - figure1_2.do required me to install the command "coefplot" 43 | - master_appendix_tables required me to install the command "ftools" 44 | 45 | These installations would be helpful to either include in the code or make note of in the Readme 46 | 47 | SUGGESTION: Provide a single "setup" program that installs all identified dependencies. 48 | 49 | ## Configuration: 50 | 51 | - table3.do required me to set matsize 11000; this could be helpful to either change in the code or note in the Readme 52 | 53 | ## Problems encountered 54 | Programs that Failed to Run: 55 | - There was an issue with appendix_table11.do that stopped it from running to completion: 56 | 1. The matsize was too small, so I wrote "set matsize 11000" 57 | 2. The code threw the error "child_age not found" on line 204, preventing the program from running to completion 58 | 59 | # Findings 60 | 61 | ## Article Figures 62 | - Figure 1: created by master_figures.do and figure1_2.do, figures look the same 63 | - Figure 2: created by master_figures.do and figure1_2.do, figures look the same 64 | - Figure 3: created by master_figures and figure3.do, figures look the same 65 | 66 | ## Appendix Figures 67 | - Figure A2: created by master_appendix_figures.do and appendix_figure2.do, figures look the same 68 | - Figure A3: created by master_appendix_figures.do and appendix_figure3.do, figures look the same 69 | - Figure A4: no program provided 70 | - Figures A11-A13: no programs provided because of restricted data 71 | 72 | ## Article Tables 73 | - Table 1: created by master_tables.do and table1.do, visual inspection of numbers made, all numbers match 74 | - Table 2: created by master_tables.do and table9.do, visual inspection of numbers made: 75 | Discrepancies: 76 | Column 2, Row 1 of article: The standard error generated by the code is 0.046, while the article has a standard error of 0.763 77 | 78 | ## Appendix Tables 79 | - Table A1: created by master_appendix_tables.do and appendix_table1.do, visual inspection of numbers made: 80 | Discrepancies: 81 | Column 4, Row 20 of article: The value generated by the code is 0.338, while the article has a value of 0.388 82 | - Table A2: created by master_appendix_tables.do and appendix_table2.do, visual inspection of numbers made: 83 | Discrepancies: 84 | Column 2, Row 4 of article: The standard error generated by the code is 0.067, the article has a standard error of 0.090 85 | Column 4, Row 4 of article: The standard error generated by the code is 0.086, the article has a standard error of 0.115 86 | Column 6, Row 4 of article: The standard error generated by the code is 0.063, the article has a standard error of 0.085 87 | Column 2, Row 6 of article: The standard error generated by the code is 0.224, the article has a standard error of 0.300 88 | Column 4, Row 6 of article: The standard error generated by the code is 0.214, the article has a standard error of 0.288 89 | [10 more lines with discrepancies] 90 | -------------------------------------------------------------------------------- /_includes/toc.html: -------------------------------------------------------------------------------- 1 | {% capture tocWorkspace %} 2 | {% comment %} 3 | Version 1.0.7 4 | https://github.com/allejo/jekyll-toc 5 | 6 | "...like all things liquid - where there's a will, and ~36 hours to spare, there's usually a/some way" ~jaybe 7 | 8 | Usage: 9 | {% include toc.html html=content sanitize=true class="inline_toc" id="my_toc" h_min=2 h_max=3 %} 10 | 11 | Parameters: 12 | * html (string) - the HTML of compiled markdown generated by kramdown in Jekyll 13 | 14 | Optional Parameters: 15 | * sanitize (bool) : false - when set to true, the headers will be stripped of any HTML in the TOC 16 | * class (string) : '' - a CSS class assigned to the TOC 17 | * id (string) : '' - an ID to assigned to the TOC 18 | * h_min (int) : 1 - the minimum TOC header level to use; any header lower than this value will be ignored 19 | * h_max (int) : 6 - the maximum TOC header level to use; any header greater than this value will be ignored 20 | * ordered (bool) : false - when set to true, an ordered list will be outputted instead of an unordered list 21 | * item_class (string) : '' - add custom class(es) for each list item; has support for '%level%' placeholder, which is the current heading level 22 | * baseurl (string) : '' - add a base url to the TOC links for when your TOC is on another page than the actual content 23 | * anchor_class (string) : '' - add custom class(es) for each anchor element 24 | 25 | Output: 26 | An ordered or unordered list representing the table of contents of a markdown block. This snippet will only 27 | generate the table of contents and will NOT output the markdown given to it 28 | {% endcomment %} 29 | 30 | {% capture my_toc %}{% endcapture %} 31 | {% assign orderedList = include.ordered | default: false %} 32 | {% assign minHeader = include.h_min | default: 1 %} 33 | {% assign maxHeader = include.h_max | default: 6 %} 34 | {% assign nodes = include.html | split: ' maxHeader %} 47 | {% continue %} 48 | {% endif %} 49 | 50 | {% if firstHeader %} 51 | {% assign firstHeader = false %} 52 | {% assign minHeader = headerLevel %} 53 | {% endif %} 54 | 55 | {% assign indentAmount = headerLevel | minus: minHeader | add: 1 %} 56 | {% assign _workspace = node | split: '' | first }}>{% endcapture %} 71 | {% assign header = _workspace[0] | replace: _hAttrToStrip, '' %} 72 | 73 | {% assign space = '' %} 74 | {% for i in (1..indentAmount) %} 75 | {% assign space = space | prepend: ' ' %} 76 | {% endfor %} 77 | 78 | {% unless include.item_class == blank %} 79 | {% capture listItemClass %}{:.{{ include.item_class | replace: '%level%', headerLevel }}}{% endcapture %} 80 | {% endunless %} 81 | 82 | {% capture my_toc %}{{ my_toc }} 83 | {{ space }}{{ listModifier }} {{ listItemClass }} [{% if include.sanitize %}{{ header | strip_html }}{% else %}{{ header }}{% endif %}]({% if include.baseurl %}{{ include.baseurl }}{% endif %}#{{ html_id }}){% if include.anchor_class %}{:.{{ include.anchor_class }}}{% endif %}{% endcapture %} 84 | {% endfor %} 85 | 86 | {% if include.class %} 87 | {% capture my_toc %}{:.{{ include.class }}} 88 | {{ my_toc | lstrip }}{% endcapture %} 89 | {% endif %} 90 | 91 | {% if include.id %} 92 | {% capture my_toc %}{: #{{ include.id }}} 93 | {{ my_toc | lstrip }}{% endcapture %} 94 | {% endif %} 95 | {% endcapture %}{% assign tocWorkspace = '' %}{{ my_toc | markdownify | strip }} -------------------------------------------------------------------------------- /sharing-restricted-data.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: withtoc 3 | title: Sharing restricted-access data with the AEA Data Editor 4 | --- 5 | 6 | In certain cases, you may be able to share data you obtained from somebody ("data provider") with the AEA Data Editor for the purpose of conducting reproducibility checks, without later publishing the data ("privately sharing data"). This page describes some tips and methods of doing so, and considerations. Please read it carefully. 7 | 8 | ### Permissions 9 | 10 | First and foremost, you must have the rights to *privately* share data. This will be noted in the data use agreement (DUA), license, or non-disclosure agreement (NDA) that you signed to obtain access to the data from the data provider. In general, 11 | 12 | - if you signed an NDA, it is unlikely that you can share data yourself. However, the AEA Data Editor may sign the same or a similar NDA with the data provider. Note that this should always be noted in the **public** README, as this may also apply to any other future replicator. 13 | - if you signed a DUA, read it carefully, as it may explicitly allow or deny you right to redistribute the data. 14 | - In some cases, you may need to explicitly ask your data provider. Some will allow data to be shared privately between subscribers (examples in the past have included S&P and Bureau Van Dijk, but you should ask). The AEA Data Editor may be able to facilitate the conversation. 15 | - if the data you obtained (via download or purchase) had a license attached to it, read it carefully. Common licenses include CC-BY and variants thereof, which allow for redistribution. Others may not. 16 | - you may have acquired full rights to the data, by collecting it yourself (but careful: participant consent!) or by outright purchasing the data. However, not every purchase gives you full rights to the data! Read your purchase agreement (license). 17 | - In some cases, data usage rights may apply to your "research group" or your "institution." It may be possible to temporarily include the AEA Data Editor as a member of your "research group" for the purpose of conducting reproducibility checks. In other cases, rights are automatically granted to anybody within your institution (your agency, company, or university) while at the same time automatically being limited to that same institution. 18 | 19 | ### Documenting restrictions and instructions 20 | 21 | Whatever restrictions are imposed on the data typically convey to other replicators as well. All restrictions should be documented in the published README, in the section about "[Data Availability and Provenance Statements](https://social-science-data-editors.github.io/template_README/template-README.html#data-availability-and-provenance-statements)." 22 | 23 | If you filled out an application form, it may be useful to include the filled-out application form with the replication package, or to provide the information needed for a replicator to fill out the application form. The AEA Data Editor may need the same information to acquire data that you cannot share directly. 24 | 25 | ### Signaling availability 26 | 27 | AEA authors are required to submit a [Data and Code Availability Form](https://www.aeaweb.org/journals/forms/data-code-availability), which has an option to signal the ability to privately provide data to the AEA Data Editor. Please alert the editor as early as possible about this - in most cases, agreements can take a while to put in place. 28 | 29 | ### Sharing data with Data Editor 30 | 31 | #### Preferred contractual arrangement 32 | 33 | The AEA Data Editor is an employee of an educational institution, not the AEA itself (see [main page](https://aeadataeditor.github.io/) for the current AEA Data Editor's affiliation). Thus, all access rights are constrained by rules at that educational institution. 34 | 35 | - If an NDA needs to be signed, the use of [Cornell’s Individual Standard Non-Disclosure Agreement](https://researchservices.cornell.edu/resources/individual-standard-non-disclosure-agreement-nda) is the most time-efficient. Deviations thereof may need approval by the host institution's office of sponsored research and may delay signature of the agreement. 36 | 37 | 38 | - If the AEA Data Editor needs to request the data from the original data provider independently, you should provide enough information to fill out all required forms. 39 | 40 | - If IRB approval was required for you to access the data, then there are a few considerations to take into account. 41 | 42 | - In general, the AEA Data Editor does **not** need separate IRB approval, since the work of the AEA Data Editor does not constitute "engagement" (formal involvement in research) because, as described in 45CFR46 (Common Rule) "*the services performed do not merit professional recognition or publication privileges*" [[1](https://www.hhs.gov/ohrp/regulations-and-policy/guidance/guidance-on-engagement-of-institutions/index.html)] (the AEA Data Editor does not publish any of the work they conduct as part of the reproducibility checks) 43 | - However, in some cases, the data provider may still require formal consideration by the IRB at the AEA Data Editor's institution. Our IRB (at this time, Cornell University) has agreed to review such requests if necessary. 44 | 45 | #### Preferred data access 46 | 47 | - If the data do not contain personal identifiers, then in general, transfer is straightforward, but check with your DUA/NDA/IRB approval/exemption. The AEA Data Editor uses IT systems that are university-controlled, with access protected by VPN and individual sign-on, both using 2FA. 48 | - If the data do require higher security protocols, the AEA Data Editor has access to [high-security remote-access environments at Cornell University](https://ciser.cornell.edu/data/secure-data-services/cradc/), though setting up access may require additional time (and may require approval by Cornell's Office of Sponsored Research). 49 | - If your DUA/NDA allows it, you can share data with the AEA Data Editor using other means, such as Dropbox, Google Drive, OneDrive, etc. 50 | - If necessary, you can also provide the AEA Data Editor with remote access to your own computing infrastructure (remote login to compute nodes or virtual enclaves). 51 | 52 | All of these methods have been successfully used in the past. 53 | 54 | 55 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Unofficial guidance on various topics by the AEA Data Editor 2 | 3 | 4 | These web pages provide unofficial and developing guidance on the implementation of the American Economic Association (AEA)'s Data and Code Availability Policy. We also provide links to [generic guidance](https://social-science-data-editors.github.io/guidance/) being developed by a loose collective ("guild") of data editors and people in a similar role at various social science journals. 5 | 6 | Follow @aeadata 7 | 8 | --- 9 | 10 | ## Order in which AEA authors should read these resources 11 | 12 | > 1. Start with the **[official Data and Code Availability Policy](https://www.aeaweb.org/journals/policies/data-code)** 13 | > 2. Follow the **[step-by-step guidance](step-by-step.md)** to walk you through preparing and uploading your replication packages, and what to expect afterwards. 14 | 15 | ## Additional guidance 16 | 17 | In addition to the detailed [step-by-step guidance](step-by-step.md), the following sites contain useful information: 18 | 19 | > 1. Look for general guidance at the **[Social Science Data Editors pages](https://social-science-data-editors.github.io/guidance/)** 20 | > 2. Read the **[AEA's FAQ](https://www.aeaweb.org/journals/policies/data-code/faq)** 21 | > 3. Have a look at the **[draft FAQ on this site](FAQ.md)** for thorny issues 22 | 23 | Comments are welcome, please file them as [issues](https://github.com/AEADataEditor/aea-de-guidance/issues) in our Github repo. 24 | 25 | --- 26 | 27 | ## Guidance on creating replicable data and program archives 28 | 29 | How should researchers create replicable data and program archives? How should such archives be structured, how documented, and where should they be located? 30 | 31 | --- 32 | 33 | > **[README template](https://social-science-data-editors.github.io/guidance/template-README.html)** 34 | 35 | > **[Preparing for a data and code deposit](preparing-for-data-deposit.md)** 36 | 37 | --- 38 | 39 | - **Common guidance**: Several economics and other social science data editors are collaborating this. See [https://social-science-data-editors.github.io/guidance/](https://social-science-data-editors.github.io/guidance/) for more detailed guidance. Your replication package should be able to be submitted to any journal. 40 | - A **[README template](https://social-science-data-editors.github.io/guidance/template-README.html)** recommended by multiple data editors in economics can be used as a starting point for a data and code archive's master README, describing where the data comes from, what resources are necessary to reproduce the paper's results, and how to run the code. 41 | - **Licensing**: Issues about licensing are complex, we suggest the [Social Science Data Editor's discussion of licensing](https://social-science-data-editors.github.io/guidance/Licensing_guidance.html). The [AEA Data and Code Repository](https://www.openicpsr.org/openicpsr/aea) defaults to a [Creative Commons Attribution 4.0 International](https://creativecommons.org/licenses/by/4.0/) (CC-BY) license; however, we suggest that authors choose a [hybrid license](LICENSE-template.md) when their repository contains data, code and software, as the CC-BY license is [not recommended for software](https://creativecommons.org/faq/#Can_I_apply_a_Creative_Commons_license_to_software.3F). 42 | 43 | ## Guidance on data citations 44 | 45 | Data citation is critical for documenting data provenance, and the **AEA requires data citations**. But data citations can also be hard. 46 | 47 | - General guidance can be found on the [Social Science Data Editor's page on the topic](https://social-science-data-editors.github.io/guidance/Data_citation_guidance.html) 48 | - Some particularly thorny issues and their solutions can be found [on this website](https://social-science-data-editors.github.io/guidance/addtl-data-citation-guidance.html). 49 | 50 | ## Guidance on depositing in the AEA Data and Code Repository 51 | 52 | --- 53 | 54 | > **[Detailed guidance](data-deposit-aea.md)** 55 | 56 | --- 57 | 58 | The AEA migrated to a new data and code repository in July 2019. See 59 | 60 | - the [official Data and Code Availability Policy](https://www.aeaweb.org/journals/policies/data-code), 61 | - the associated [FAQ](https://www.aeaweb.org/journals/policies/data-code/faq), 62 | - [detailed additional guidance on these pages](data-deposit-aea.md), and the [draft FAQ on this site](FAQ.md) 63 | 64 | ## Guidance on testing replicability of code 65 | 66 | --- 67 | 68 | > **[Our replication template report](https://github.com/AEADataEditor/replication-template/blob/master/REPLICATION.md)** 69 | 70 | --- 71 | 72 | The code and data that have been archived should be reproducible and replicable. How do we test that? 73 | 74 | - Generic guidance is provided at the [Social Science Data Editors' Guidance](https://social-science-data-editors.github.io/guidance/) website. 75 | - At the AEA, 76 | - we use **[this template](https://github.com/AEADataEditor/replication-template/blob/master/REPLICATION.md)** to guide our replicators. 77 | - [Example 1](sample-report.md) 78 | - We assess 79 | - software availability 80 | - data availability 81 | - code availability and clarity 82 | - needed computational resources 83 | - time needed to acquire or use all of the above, and conduct the reproducibility check 84 | - When some of the conditions are not met with our own resources, we may ask others to conduct a reproducibility exercise for us. 85 | - Our [PROTOCOL](https://www.aeaweb.org/journals/data/policy-third-party) describes how we might request reports. 86 | - [Sample report 1](sample-report-3rd-1.md) 87 | - We may ask others to do so because 88 | - They are experts 89 | - They have access to the software 90 | - They have access to the data 91 | - They have access to computational resources needed 92 | - **No reproducibility check is discarded out of hand** 93 | 94 | 95 | ## Frequently Asked Questions 96 | 97 | - Start by consulting the [FAQ](https://www.aeaweb.org/journals/policies/data-code/faq) on the AEA Website. This generally apply specifically to procedures at the AEA journals. 98 | - AEA-specific updates and additions will first appear in [draft FAQ on this site](FAQ.md) 99 | - More general questions will have answers at the [Social Science Data Editors FAQ](https://social-science-data-editors.github.io/guidance/FAQ.html) 100 | 101 | 102 | 103 | ## Most Recent Tweets 104 | 105 | Tweets by AeaData 106 | 107 | -------------------------------------------------------------------------------- /citations/guidance_data_citations.tex: -------------------------------------------------------------------------------- 1 | % AEJ-Article.tex for AEA last revised 22 June 2011 2 | %%%%%!TeX TXS-program:bibliography = txs:///biber 3 | \documentclass[AEJ]{AEA} 4 | 5 | %%%%%% NOTE FROM OVERLEAF: The mathtime package is no longer publicly available nor distributed. We recommend using a different font package e.g. mathptmx if you'd like to use a Times font. 6 | % \usepackage{mathptmx} 7 | 8 | % The mathtime package uses a Times font instead of Computer Modern. 9 | % Uncomment the line below if you wish to use the mathtime package: 10 | %\usepackage[cmbold]{mathtime} 11 | % Note that miktex, by default, configures the mathtime package to use commercial fonts 12 | % which you may not have. If you would like to use mathtime but you are seeing error 13 | % messages about missing fonts (mtex.pfb, mtsy.pfb, or rmtmi.pfb) then please see 14 | % the technical support document at http://www.aeaweb.org/templates/technical_support.pdf 15 | % for instructions on fixing this problem. 16 | 17 | % Note: you may use either harvard or natbib (but not both) to provide a wider 18 | % variety of citation commands than latex supports natively. See below. 19 | 20 | % Uncomment the next line to use the natbib package with bibtex 21 | \usepackage{natbib} 22 | %\usepackage[style=chicago-authordate,sorting=ydnt,maxnames=10,backend=biber,natbib=true]{biblatex} 23 | %\addbibresource{paper.bib} 24 | %\addbibresource{references.bib} 25 | 26 | \usepackage{hyperref} 27 | \usepackage{listings} 28 | \usepackage{acronym} 29 | \usepackage[names]{xcolor} 30 | % Uncomment the next line to use the harvard package with bibtex 31 | %\usepackage[abbr]{harvard} 32 | 33 | % This command determines the leading (vertical space between lines) in draft mode 34 | % with 1.5 corresponding to "double" spacing. 35 | \draftSpacing{1.5} 36 | 37 | %% Acronyms 38 | \acrodef{AEA}{American Economic Association} 39 | \acrodef{DOI}{Digital Object Identifier} 40 | \acrodef{FAIR}{Findable, Accessible, Interoperable, Re-usable} 41 | \acrodef{PSID}{Panel Study of Income Dynamics} 42 | \acrodef{HRS}{Health and Retirement Study} 43 | 44 | % reset colors 45 | \definecolor{darkblue}{rgb}{0 0 255} 46 | \hypersetup{colorlinks,breaklinks,citecolor=darkblue,linkcolor=darkblue,urlcolor=darkblue} 47 | \begin{document} 48 | 49 | \title{Formatting Data Citations - BibTeX Version} 50 | \shortTitle{BibTeX Data Citations} 51 | \author{Lars Vilhuber\thanks{% 52 | Vilhuber: Cornell University, lars.vilhuber@cornell.edu.}} 53 | \date{\today} 54 | \pubMonth{Month} 55 | \pubYear{Year} 56 | \pubVolume{Vol} 57 | \pubIssue{Issue} 58 | \JEL{} 59 | \Keywords{} 60 | 61 | \begin{abstract} 62 | We illustrate how to create data citations with \LaTeX and BibTeX. 63 | \end{abstract} 64 | 65 | \maketitle 66 | The purpose of scientific publishing is the dissemination of robust research findings, exposing them to the scrutiny of peers. Key to this endeavor is documenting the provenance of those findings. For empirical articles, the foundations on which they reside are external to the article, and often to the journal, in which they are published. In consequence, there is a need to properly cite the digital inputs to our published output and to properly curate those inputs. 67 | 68 | 69 | \section{Data Citations} 70 | Properly referencing data goes beyond just reproducibility - it is also proper scientific writing style. In the same way that we use bibliographic references to ``printed'' resources, we should also be using such references for data resources, to give and receive credit where credit is due. Not referencing an article or book is at best an oversight, and at worst plagiarism - and the same should apply to data objects. Numerous guides and tutorials exist (ICPSR, Force11, \cite{dataone-l09}). 71 | 72 | \subsection{What to cite} 73 | 74 | In a nutshell, every dataset is to be cited. This is true for the main article as well as online appendices. In the past, use of data or code has been acknowledged in footnotes, and only rarely through bibliographic references. However, if the dataset is used, it should appear in the bibliography. The same is true for code reused from previous papers, or provided by authors. 75 | 76 | \subsection{How to cite} 77 | 78 | The AEA uses the Chicago style for citations and bibliographies \citep{aeadatarefs}. However, the Chicago Style Manual \citep{citation-machine,ChicagoManualofStyleChicagoManualStyle2018} does not provide examples for data citations, and neither does the Citation Style Language\footnote{\url{https://citationstyles.org/}} used by applications like Zotero\footnote{\url{https://www.zotero.org/}} and Mendeley Desktop\footnote{\url{https://www.mendeley.com/download-desktop/}}. 79 | 80 | 81 | DataONE \citep{dataone-cite} suggests content and style that resemble the generic working paper or article citation style (adapted to Chicago style): 82 | \begin{quote}\tt 83 | Westbrook JW, Kitajima K, Burleigh JG, Kress WJ, Erickson DL, Wright SJ (2011) Data from: What makes a leaf tough? Patterns of correlated evolution between leaf toughness traits and demographic rates among 197 shade-tolerant woody species in a neotropical forest. Dryad Digital Repository. http://dx.doi.org/10.5061/dryad.8525 84 | \end{quote} 85 | ICPSR \citep{icpsr-data-cite} notes that a citation should include the following items: 86 | \begin{itemize} 87 | \item Title 88 | \item Author 89 | \item Date 90 | \item Version 91 | \item Persistent identifier (such as the Digital Object Identifier, Uniform Resource Name URN, or Handle System) 92 | \end{itemize} 93 | and provides a few examples, with some additional modifiers: 94 | \begin{quote}\tt 95 | Esther Duflo; Rohini Pande, 2006, ``Dams, Poverty, Public Goods and Malaria Incidence in India'', http://hdl.handle.net/1902.1/IOJHHXOOLZ UNF:5:obNHHq1gtV400a4T+Xrp9g== Murray Research Archive [Distributor] V2 [Version] 96 | \end{quote} 97 | Finally, the AEA style guide \citep{aeadatarefs} suggests 98 | \begin{quote}\tt 99 | Leiss, Amelia. 1999. ``Arms Transfers to Developing Countries, 1945--1968.'' 100 | Inter-University Consortium for Political and Social Research, Ann Arbor, MI. 101 | ICPSR05404-v1. doi:10.3886/ICPSR05404 (accessed February 8, 2011). 102 | \end{quote} 103 | 104 | \subsection{Software} 105 | 106 | As part of our activities, the AEA prepress department has started the process of updating AEA templates available through such software.\footnote{For the technically inclined, this process involves updating an existing style or creating a new style on \url{https://citationstyles.org/} and \url{https://github.com/citation-style-language/styles}, from where it propagates to a large number of software packages.} 107 | 108 | \paragraph{BibLaTeX} 109 | Users of BibLaTeX should consult the companion document. 110 | 111 | \paragraph{BibTeX} 112 | For users of BibTeX, a generic database entry might look like 113 | \lstset{language=} 114 | \lstinputlisting[basicstyle=\small\ttfamily,firstline=30,lastline=38]{paper.bib} 115 | % 116 | or 117 | \lstinputlisting[basicstyle=\small\ttfamily,firstline=40,lastline=49]{paper.bib} 118 | % 119 | and thus generate ``\citet{duflopande2006}'' and ``\citet{leiss1999}'' and the bibliographic entry in the References when using the pre-2018 \texttt{aea.bst} provided by the \ac{AEA}. Note the use of the note field to encapsulate the information. The \texttt{number} fields contains the key identifying information: version and (in this case), the UNF number generated by the Dataverse software. 120 | 121 | A modification to the \texttt{aea.bst} file, tentatively named \href{aea-mod.bst}{\texttt{aea-mod.bst}}, might allow for a cleaner implementation: 122 | \lstinputlisting[basicstyle=\small\ttfamily,firstline=54,lastline=62]{paper.bib} 123 | and 124 | \lstinputlisting[basicstyle=\small\ttfamily,firstline=64,lastline=74]{paper.bib} 125 | which will generate ``\citet{duflopande2006-new}'' and ``\citet{leiss1999-new}''. Note that we have not used the access date for either dataset, since both use persistent identifiers (handle or DOI). 126 | 127 | 128 | \newpage 129 | % Remove or comment out the next two lines if you are not using bibtex. 130 | \bibliographystyle{aea-mod} 131 | \bibliography{paper,references} 132 | 133 | 134 | 135 | \end{document} 136 | 137 | -------------------------------------------------------------------------------- /citations/guidance_data_citations_biblatex.tex: -------------------------------------------------------------------------------- 1 | % AEJ-Article.tex for AEA last revised 22 June 2011 2 | %!TeX TXS-program:bibliography = txs:///biber 3 | \documentclass[AEJ]{AEA} 4 | 5 | %%%%%% NOTE FROM OVERLEAF: The mathtime package is no longer publicly available nor distributed. We recommend using a different font package e.g. mathptmx if you'd like to use a Times font. 6 | % \usepackage{mathptmx} 7 | 8 | % The mathtime package uses a Times font instead of Computer Modern. 9 | % Uncomment the line below if you wish to use the mathtime package: 10 | %\usepackage[cmbold]{mathtime} 11 | % Note that miktex, by default, configures the mathtime package to use commercial fonts 12 | % which you may not have. If you would like to use mathtime but you are seeing error 13 | % messages about missing fonts (mtex.pfb, mtsy.pfb, or rmtmi.pfb) then please see 14 | % the technical support document at http://www.aeaweb.org/templates/technical_support.pdf 15 | % for instructions on fixing this problem. 16 | 17 | % Note: you may use either harvard or natbib (but not both) to provide a wider 18 | % variety of citation commands than latex supports natively. See below. 19 | 20 | % Uncomment the next line to use the natbib package with bibtex 21 | %\usepackage{natbib} 22 | %\usepackage[style=chicago-authordate,sorting=nyt,maxnames=3,doi=true,backend=biber,natbib=true]{biblatex} 23 | \usepackage[authordate,backend=biber]{biblatex-chicago} 24 | \newcommand{\citep}{\parencite} 25 | \newcommand{\citet}{\textcite} 26 | \input{chicago-authordate-aea.tex} 27 | \addbibresource{paper.bib} 28 | \addbibresource{references.bib} 29 | 30 | \usepackage{hyperref} 31 | \usepackage{listings} 32 | \usepackage{acronym} 33 | \usepackage[names]{xcolor} 34 | % Uncomment the next line to use the harvard package with bibtex 35 | %\usepackage[abbr]{harvard} 36 | 37 | % This command determines the leading (vertical space between lines) in draft mode 38 | % with 1.5 corresponding to "double" spacing. 39 | \draftSpacing{1.5} 40 | 41 | %% Acronyms 42 | \acrodef{AEA}{American Economic Association} 43 | \acrodef{DOI}{Digital Object Identifier} 44 | \acrodef{FAIR}{Findable, Accessible, Interoperable, Re-usable} 45 | \acrodef{PSID}{Panel Study of Income Dynamics} 46 | \acrodef{HRS}{Health and Retirement Study} 47 | 48 | % reset colors 49 | \definecolor{darkblue}{rgb}{0 0 255} 50 | \hypersetup{colorlinks,breaklinks,citecolor=darkblue,linkcolor=darkblue,urlcolor=darkblue} 51 | \begin{document} 52 | 53 | \title{Formatting Data Citations - BibLaTeX Version} 54 | \shortTitle{BibLaTeX Data Citations} 55 | \author{Lars Vilhuber\thanks{% 56 | Vilhuber: Cornell University, lars.vilhuber@cornell.edu.}} 57 | \date{\today} 58 | \pubMonth{Month} 59 | \pubYear{Year} 60 | \pubVolume{Vol} 61 | \pubIssue{Issue} 62 | \JEL{} 63 | \Keywords{} 64 | 65 | \begin{abstract} 66 | We illustrate how to create data citations with \LaTeX and BibTeX. 67 | \end{abstract} 68 | 69 | \maketitle 70 | The purpose of scientific publishing is the dissemination of robust research findings, exposing them to the scrutiny of peers. Key to this endeavor is documenting the provenance of those findings. For empirical articles, the foundations on which they reside are external to the article, and often to the journal, in which they are published. In consequence, there is a need to properly cite the digital inputs to our published output and to properly curate those inputs. 71 | 72 | 73 | \section{Data Citations} 74 | Properly referencing data goes beyond just reproducibility - it is also proper scientific writing style. In the same way that we use bibliographic references to ``printed'' resources, we should also be using such references for data resources, to give and receive credit where credit is due. Not referencing an article or book is at best an oversight, and at worst plagiarism - and the same should apply to data objects. Numerous guides and tutorials exist (ICPSR, Force11, \citet{dataone-l09}). 75 | 76 | \subsection{What to cite} 77 | 78 | In a nutshell, every dataset is to be cited. This is true for the main article as well as online appendices. In the past, use of data or code has been acknowledged in footnotes, and only rarely through bibliographic references. However, if the dataset is used, it should appear in the bibliography. The same is true for code reused from previous papers, or provided by authors. 79 | 80 | \subsection{How to cite} 81 | 82 | The AEA uses the Chicago style for citations and bibliographies \citep{aeadatarefs}. However, the Chicago Style Manual \citep{citation-machine,ChicagoManualofStyleChicagoManualStyle2018} does not provide examples for data citations, and neither does the Citation Style Language\footnote{\url{https://citationstyles.org/}} used by applications like Zotero\footnote{\url{https://www.zotero.org/}} and Mendeley Desktop\footnote{\url{https://www.mendeley.com/download-desktop/}}. 83 | 84 | 85 | \citet{dataone-cite} suggests content and style that resemble the generic working paper or article citation style (adapted to Chicago style): 86 | \begin{quote}\tt 87 | Westbrook JW, Kitajima K, Burleigh JG, Kress WJ, Erickson DL, Wright SJ (2011) Data from: What makes a leaf tough? Patterns of correlated evolution between leaf toughness traits and demographic rates among 197 shade-tolerant woody species in a neotropical forest. Dryad Digital Repository. http://dx.doi.org/10.5061/dryad.8525 88 | \end{quote} 89 | ICPSR \citep{icpsr-data-cite} notes that a citation should include the following items: 90 | \begin{itemize} 91 | \item Title 92 | \item Author 93 | \item Date 94 | \item Version 95 | \item Persistent identifier (such as the Digital Object Identifier, Uniform Resource Name URN, or Handle System) 96 | \end{itemize} 97 | and provides a few examples, with some additional modifiers: 98 | \begin{quote}\tt 99 | Esther Duflo; Rohini Pande, 2006, ``Dams, Poverty, Public Goods and Malaria Incidence in India'', http://hdl.handle.net/1902.1/IOJHHXOOLZ UNF:5:obNHHq1gtV400a4T+Xrp9g== Murray Research Archive [Distributor] V2 [Version] 100 | \end{quote} 101 | Finally, the AEA style guide \citep{aeadatarefs} suggests 102 | \begin{quote}\tt 103 | Leiss, Amelia. 1999. ``Arms Transfers to Developing Countries, 1945--1968.'' 104 | Inter-University Consortium for Political and Social Research, Ann Arbor, MI. 105 | ICPSR05404-v1. doi:10.3886/ICPSR05404 (accessed February 8, 2011). 106 | \end{quote} 107 | 108 | \subsection{Software} 109 | 110 | As part of our activities, the AEA prepress department has started the process of updating AEA templates available through such software.\footnote{For the technically inclined, this process involves updating an existing style or creating a new style on \url{https://citationstyles.org/} and \url{https://github.com/citation-style-language/styles}, from where it propagates to a large number of software packages.} 111 | 112 | \paragraph{BibTeX} 113 | Users of BibTeX should consult the companion document. 114 | 115 | \paragraph{BibLaTeX} 116 | \lstset{basicstyle=\small\ttfamily} 117 | For users of BibLaTeX, both the \texttt{biblatex} package with option \texttt{style=chicago-authordate} 118 | \begin{lstlisting}[language=TeX] 119 | \usepackage[style=chicago-authordate,doi=true,backend=biber,natbib=true]{biblatex} 120 | \end{lstlisting} 121 | as well as the \texttt{biblatex-chicago} package with option \texttt{authordate} 122 | \begin{lstlisting}[language=TeX] 123 | \usepackage[authordate,backend=biber]{biblatex-chicago} 124 | % to achieve natbib compatibility, optionally define these aliases 125 | \newcommand{\citep}{\parencite} 126 | \newcommand{\citet}{\textcite} 127 | \end{lstlisting} 128 | will yield satisfactory results, with one exception: \ac{DOI} should be formatted as the full URL. We achieve this with a minor modification: 129 | \lstinputlisting[numbers=left,language=TeX]{chicago-authordate-aea.tex} 130 | 131 | A generic BibLaTeX database entry might look like 132 | \lstset{language=} 133 | \lstinputlisting[basicstyle=\small\ttfamily,firstline=30,lastline=38]{paper.bib} 134 | % 135 | or 136 | \lstinputlisting[basicstyle=\small\ttfamily,firstline=40,lastline=49]{paper.bib} 137 | % 138 | and thus generate ``\citet{duflopande2006}'' and ``\citet{leiss1999}'' as well as 139 | % 140 | \begin{quote} 141 | \fullcite{duflopande2006} 142 | \end{quote} 143 | % 144 | and 145 | % 146 | \begin{quote} 147 | \fullcite{leiss1999} 148 | \end{quote} 149 | % 150 | when using standard ``\texttt{chicago-author-date}'' options to \texttt{biblatex}. Note the use of the note field to encapsulate the information. The \texttt{number} fields contains the key identifying information: version and (in this case), the UNF number generated by the Dataverse software. 151 | 152 | A cleaner implementation: 153 | \lstinputlisting[basicstyle=\small\ttfamily,firstline=54,lastline=62]{paper.bib} 154 | and 155 | \lstinputlisting[basicstyle=\small\ttfamily,firstline=64,lastline=74]{paper.bib} 156 | which will generate ``\citet{duflopande2006-new}'' and ``\citet{leiss1999-new}'' as well as 157 | % 158 | \begin{quote} 159 | \fullcite{duflopande2006-new} 160 | \end{quote} 161 | % 162 | and 163 | % 164 | \begin{quote} 165 | \fullcite{leiss1999-new} 166 | \end{quote} 167 | % 168 | Note that we have not used the access date for either dataset, since both use persistent identifiers (handle or DOI). 169 | 170 | \newpage 171 | % Remove or comment out the next two lines if you are not using bibtex. 172 | %\bibliographystyle{aea-mod} 173 | %\bibliography{paper,references} 174 | 175 | \printbibliography[title={References}] 176 | % The appendix command is issued once, prior to all appendices, if any. 177 | %\appendix 178 | 179 | \end{document} 180 | 181 | -------------------------------------------------------------------------------- /preparing-for-data-deposit.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Preparing your files for verification 3 | layout: withtoc 4 | --- 5 | 6 | This document describes how to best **prepare a replication package** for an AEA journal. Much of the guidance here 7 | is not specific to our journals - in fact, the document links to other websites for tutorials, best practices, etc. The best moment to do the preparation described here is, in fact, when you start the project, not once you have had your manuscript conditionally accepted. However, all steps here can and have been successfully performed at the point of conditional acceptance. 8 | 9 | ### Data Citations 10 | 11 | All manuscripts will be checked for data citations. If you have not done so, now is the time to add them to your 12 | manuscript. 13 | 14 | #### What to cite 15 | 16 | All datasets actively used in your replication package must be cited. Just as with any other literature, this serves to properly identify the provenance of the information you use, and attribution of credit to the original creator. Data you created should also be cited, either by pointing to a data deposit you made elsewhere, or by pointing to your own (forthcoming) data deposit at the AEA. 17 | 18 | - [Guidance for data citations](https://social-science-data-editors.github.io/guidance/addtl-data-citation-guidance.html#distributor) 19 | - [Guidance on how to cite the data you collected](data-deposit-aea.html#citing-your-deposit) 20 | 21 | 22 | #### Where to cite 23 | 24 | Every citation has an in-text entry (`Smith (2020)` or `(Smith, 2020)`), and a list entry in the Reference section (see the [Chicago Manual of Style Quick Guide](https://www.chicagomanualofstyle.org/tools_citationguide/citation-guide-2.html)). This is true for data citations as well. 25 | 26 | - Your data citations should be listed in the Reference section of the manuscript. 27 | 28 | In rare circumstances, when there are too many data citations to accomodate succinctly, data citations for sources can be deferred to an online data appendix. In all cases, for clarity, all data citations should **also** appear in the README, including with a separate Reference section. 29 | 30 | #### Data citations and Data Availability Statements 31 | 32 | In many cases, the data citation for a public-use dataset has a unique URL that is sufficient for downloading the dataset. However, in many other cases where the URL does not lead directly to the dataset, a separate "Data Availability Statement" needs to be provided, as part of the README. A simple data citation is not sufficient. See [additional guidance on data availability statements](https://social-science-data-editors.github.io/guidance/Requested_information_dcas.html). 33 | 34 | ### Describing the contents of your replication package 35 | 36 | Every replication package requires a document outlining where the data comes from, what data is provided, what requirements are needed to run the code in the replication package, how to run the code, what results to expect, and where to find the results. This is conventionally called the "README". 37 | 38 | - The AEA requires that the README follow a prescribed schema. Please use the [template README for social science replication packages](https://social-science-data-editors.github.io/guidance/template-README.html). 39 | 40 | Follow the instructions within the template README, and provide it as part of your replication package. 41 | 42 | ### Ideal structure of a replication package 43 | 44 | The AEA uses the openICPSR platform for replication packages. The platform allows users to download complete "deposits", or only subdirectories thereof. 45 | 46 | > Users should not upload ZIP packages as files - rather, ZIP files can be used to structure code and data, but should be unzipped on the platform ("import from ZIP"). 47 | 48 | The code and data should run as downloaded from openICPSR, without further manual modifications (creating empty subdirectories programmatically is acceptable). Because code tends to be small, but data can be large, we strongly advise to not commingle data and code - interested researchers can download the code directory by itself if they wish, without also downloading a potentially very large data directory. 49 | 50 | A simple template might be 51 | ``` 52 | README.pdf 53 | data/ 54 | raw/ 55 | cps0001.dat 56 | analysis/ 57 | combined_data.dta 58 | combined_data.csv 59 | combined_data_codebook.pdf 60 | code/ 61 | 01_create/ 62 | 01_readcps.R 63 | 02_readfred.R 64 | 02_analysis/ 65 | 01_table1-5.R 66 | 02_figures1-4.R 67 | results/ 68 | table1.tex 69 | table2.tex 70 | ... 71 | figure1.pdf 72 | figure2.pdf 73 | ``` 74 | 75 | If your paper uses restricted-access data, clearly separate the restricted from the open-access data, both in terms of the raw data as well as the processed data: 76 | 77 | ``` 78 | data/ 79 | raw/ 80 | cps0001.dat 81 | confidential/ 82 | ssa.csv 83 | conf_analysis/ 84 | confidential_combined.dta 85 | ``` 86 | 87 | Keep in mind that you may be able to provide a subset of your replication package privately to the AEA Data Editor, see the [Sharing restricted-access data with the AEA Data Editor](sharing-restricted-data.md) page. 88 | 89 | #### No manual modifications 90 | 91 | The replication package should reproduce the tables and figures, as well as any in-text numbers, by **running code without manual intervention**. 92 | 93 | > The only exception to this rule is a single change to set a small number of program and data directory paths. 94 | 95 | While running a small number of distinct programs separately is acceptable (in some cases even desirable), it is not acceptable to require replicators to manually enter numbers, or configure parameter files, in order to reproduce the tables and figures in the paper. Nevertheless, it should be clear from the manuscript and the code how a replicator might deviate from the tables and figures in the paper. 96 | 97 | #### Structure in the presence of confidential (unpublished) data 98 | 99 | When the replication package relies on confidential data that cannot be shared, authors will have to 100 | 101 | - prepare a confidential (partial) replication package, to be archived wherever the confidential data is kept (see [this FAQ](https://social-science-data-editors.github.io/guidance/FAQ.html#how-can-i-ensure-that-the-confidential-data-is-preserved)) 102 | - this would contain the contents of `data/confidential` and possibly `data/conf_analysis` from the example above. 103 | - prepare a non-confidential replication package that contains all code, and any data that is not subject to publication controls 104 | - this would contain the contents of `data/raw`, `data/analysis`, `code/`, and for reference, `results/` from the example above. 105 | - ensure that replicators have detailed instructions on how to combine the two packages 106 | - specify which (if any) of the results in their paper can be reproduced without the confidential data. 107 | 108 | Authors might want to investigate the possibility of providing "fake" data that might allow replicators to run code, without obtaining meaningful results (functionality test). 109 | 110 | The [Social Science Editors' FAQ](https://social-science-data-editors.github.io/guidance/FAQ.html#i-use-confidential-data-i-am-allowed-to-provide-the-data-to-the-data-editor-for-the-purpose-of-replication-but-you-are-not-allowed-to-publish-the-data-how-do-i-proceed) describes a related issue. 111 | 112 | 113 | 114 | #### Considering the replicator 115 | 116 | The replicator of your package is likely to be less qualified than you are. After all, you are publishing something novel. 117 | 118 | You should assume 119 | 120 | - that the replicator has basic knowledge in how to run your software package, if the software is commonly used in economics 121 | - Stata, Matlab, some others are commonly used 122 | - Compiled or new computer languages are much less likely to be widely used, even if they are used in your subdiscipline 123 | 124 | ![software usage](assets/figure_software_years_pct.png) 125 | 126 | You can assume 127 | 128 | - that the replicator can manipulate a top-level configuration file 129 | - for instance, to set a base directory 130 | - but not setting a base directory at the top of 25 different files 131 | 132 | You should NOT assume 133 | 134 | - that the replicator will use the same type of operating system 135 | - describe any *hard* requirements, but do not impose any *fake* requirements 136 | - most Stata, Matlab, SAS, R, Python, etc. can run on any operating system, unless you hard-code platform-specific commands into your code 137 | - that the replicator has any of your packages/modules/etc. installed 138 | - provide a setup program to install these (not manual instructions). 139 | - provide copies of such packages/modules when the package repository does not allow you to specify a version 140 | - provide a container/ Docker image/ VM that comprises all the necessary software and libraries 141 | - that the replicator will run the software the same way you do 142 | - some software can be run in different ways (interactive, batch, etc.) and may behave differently depending on how it s run. 143 | - For instance, Stata will change the working directory to that of the program being run in batch mode, but not if running interactively 144 | - For instance, running R code using Rstudio may behave differently than running it with `rscript` 145 | 146 | For less frequently used software, provide a URL where the software can be obtained. 147 | - essentially, if not listed in the figure above, provide information on how to obtain software 148 | - if using commercial compilers, we also suggest to compile your code using open-source or free compilers (including any free performance packages, such as Intel MKL), even if the resulting code is not the most efficient. 149 | - as of 2021, the AEA Data Editor has access to the software on [this list](https://ciser.cornell.edu/computing/computing-account-software/), and any open-source (free) software that can be installed on Windows, Linux, and macOS. 150 | 151 | ### Re-run your replication package 152 | 153 | Ideally, once you have prepared your replication package, you should re-run the code again, in a clean environment, possibly a fresh computer, to ensure that (a) the package is, in fact, reproducible with minimal interaction (b) the results are numerically identical. 154 | 155 | - Wherever possible, we strongly encourage running in batch (non-interactive) mode. 156 | 157 | ### Preparing to upload 158 | 159 | Once you are done preparing your replication package, you should upload it: 160 | 161 | - if you have received a conditional acceptance, your replication package **must** be in a trusted repository. The default trusted repository is the [AEA Data and Code Repository](https://www.openicpsr.org/openicpsr/search/aea/studies). Other trusted repositories are acceptable (see [list](https://social-science-data-editors.github.io/guidance/Requested_information_hosting.html#trusted-repositories)), but replication packages should meet the [display guidelines](guidelines-other-repositories.md). 162 | - if you have confidential data that you want to transmit to the AEA Data Editor but do not want published, communicate with the AEA Data Editor directly (see [this FAQ](https://www.aeaweb.org/journals/data/faq#restricted)). 163 | - if you have received instructions during the revise-and-resubmit process to have a reproducibility check conducted, you may use the AEA Data and Code Repository, but other methods are also acceptable. Do not forget, however, that once the paper is accepted, it **must** be made available on a trusted repository - other methods are then no longer acceptable. 164 | 165 | ### Final checklist 166 | 167 | Before proceeding, do check: 168 | 169 | - [ ] your manuscript includes data citations 170 | - [ ] you have prepared a README that provides all the relevant information, as per the [README template](https://social-science-data-editors.github.io/guidance/template-README.html) 171 | - [ ] your data and code deposit contains all code, including code to read in raw data, *even when the data cannot be provided*. 172 | - [ ] your replication package has been re-executed, and reproduces the tables and figures in your manuscript faithfully. 173 | 174 | ### Next step 175 | 176 | If you are ready, you can proceed to [upload to the AEA Data and Code Repository](data-deposit-aea.md). 177 | 178 | -------------------------------------------------------------------------------- /code/02_codebook_plaintext.txt: -------------------------------------------------------------------------------- 1 | --------------------------------------------------------------------------------------------------------------------------------------------------- 2 | name: 3 | log: /mnt/local/slow_home/vilhuber/Workspace-non-encrypted/git/AEA/aea-de-guidance/code/02_codebook_plaintext.txt 4 | log type: text 5 | opened on: 1 Oct 2018, 17:33:59 6 | 7 | . // not run: use my_input_data 8 | . di 9 | 10 | 11 | . sysuse auto 12 | (1978 Automobile Data) 13 | 14 | . /*==== File structure ====*/ 15 | . describe 16 | 17 | Contains data from /usr/local/stata14/ado/base/a/auto.dta 18 | obs: 74 1978 Automobile Data 19 | vars: 12 13 Apr 2014 17:45 20 | size: 3,182 (_dta has notes) 21 | --------------------------------------------------------------------------------------------------------------------------------------------------- 22 | storage display value 23 | variable name type format label variable label 24 | --------------------------------------------------------------------------------------------------------------------------------------------------- 25 | make str18 %-18s Make and Model 26 | price int %8.0gc Price 27 | mpg int %8.0g Mileage (mpg) 28 | rep78 int %8.0g Repair Record 1978 29 | headroom float %6.1f Headroom (in.) 30 | trunk int %8.0g Trunk space (cu. ft.) 31 | weight int %8.0gc Weight (lbs.) 32 | length int %8.0g Length (in.) 33 | turn int %8.0g Turn Circle (ft.) 34 | displacement int %8.0g Displacement (cu. in.) 35 | gear_ratio float %6.2f Gear Ratio 36 | foreign byte %8.0g origin Car type 37 | --------------------------------------------------------------------------------------------------------------------------------------------------- 38 | Sorted by: foreign 39 | 40 | . /*==== Summary statistics ====*/ 41 | . codebook 42 | 43 | --------------------------------------------------------------------------------------------------------------------------------------------------- 44 | make Make and Model 45 | --------------------------------------------------------------------------------------------------------------------------------------------------- 46 | 47 | type: string (str18), but longest is str17 48 | 49 | unique values: 74 missing "": 0/74 50 | 51 | examples: "Cad. Deville" 52 | "Dodge Magnum" 53 | "Merc. XR-7" 54 | "Pont. Catalina" 55 | 56 | warning: variable has embedded blanks 57 | 58 | --------------------------------------------------------------------------------------------------------------------------------------------------- 59 | price Price 60 | --------------------------------------------------------------------------------------------------------------------------------------------------- 61 | 62 | type: numeric (int) 63 | 64 | range: [3291,15906] units: 1 65 | unique values: 74 missing .: 0/74 66 | 67 | mean: 6165.26 68 | std. dev: 2949.5 69 | 70 | percentiles: 10% 25% 50% 75% 90% 71 | 3895 4195 5006.5 6342 11385 72 | 73 | --------------------------------------------------------------------------------------------------------------------------------------------------- 74 | mpg Mileage (mpg) 75 | --------------------------------------------------------------------------------------------------------------------------------------------------- 76 | 77 | type: numeric (int) 78 | 79 | range: [12,41] units: 1 80 | unique values: 21 missing .: 0/74 81 | 82 | mean: 21.2973 83 | std. dev: 5.7855 84 | 85 | percentiles: 10% 25% 50% 75% 90% 86 | 14 18 20 25 29 87 | 88 | --------------------------------------------------------------------------------------------------------------------------------------------------- 89 | rep78 Repair Record 1978 90 | --------------------------------------------------------------------------------------------------------------------------------------------------- 91 | 92 | type: numeric (int) 93 | 94 | range: [1,5] units: 1 95 | unique values: 5 missing .: 5/74 96 | 97 | tabulation: Freq. Value 98 | 2 1 99 | 8 2 100 | 30 3 101 | 18 4 102 | 11 5 103 | 5 . 104 | 105 | --------------------------------------------------------------------------------------------------------------------------------------------------- 106 | headroom Headroom (in.) 107 | --------------------------------------------------------------------------------------------------------------------------------------------------- 108 | 109 | type: numeric (float) 110 | 111 | range: [1.5,5] units: .1 112 | unique values: 8 missing .: 0/74 113 | 114 | tabulation: Freq. Value 115 | 4 1.5 116 | 13 2 117 | 14 2.5 118 | 13 3 119 | 15 3.5 120 | 10 4 121 | 4 4.5 122 | 1 5 123 | 124 | --------------------------------------------------------------------------------------------------------------------------------------------------- 125 | trunk Trunk space (cu. ft.) 126 | --------------------------------------------------------------------------------------------------------------------------------------------------- 127 | 128 | type: numeric (int) 129 | 130 | range: [5,23] units: 1 131 | unique values: 18 missing .: 0/74 132 | 133 | mean: 13.7568 134 | std. dev: 4.2774 135 | 136 | percentiles: 10% 25% 50% 75% 90% 137 | 8 10 14 17 20 138 | 139 | --------------------------------------------------------------------------------------------------------------------------------------------------- 140 | weight Weight (lbs.) 141 | --------------------------------------------------------------------------------------------------------------------------------------------------- 142 | 143 | type: numeric (int) 144 | 145 | range: [1760,4840] units: 10 146 | unique values: 64 missing .: 0/74 147 | 148 | mean: 3019.46 149 | std. dev: 777.194 150 | 151 | percentiles: 10% 25% 50% 75% 90% 152 | 2020 2240 3190 3600 4060 153 | 154 | --------------------------------------------------------------------------------------------------------------------------------------------------- 155 | length Length (in.) 156 | --------------------------------------------------------------------------------------------------------------------------------------------------- 157 | 158 | type: numeric (int) 159 | 160 | range: [142,233] units: 1 161 | unique values: 47 missing .: 0/74 162 | 163 | mean: 187.932 164 | std. dev: 22.2663 165 | 166 | percentiles: 10% 25% 50% 75% 90% 167 | 157 170 192.5 204 218 168 | 169 | --------------------------------------------------------------------------------------------------------------------------------------------------- 170 | turn Turn Circle (ft.) 171 | --------------------------------------------------------------------------------------------------------------------------------------------------- 172 | 173 | type: numeric (int) 174 | 175 | range: [31,51] units: 1 176 | unique values: 18 missing .: 0/74 177 | 178 | mean: 39.6486 179 | std. dev: 4.39935 180 | 181 | percentiles: 10% 25% 50% 75% 90% 182 | 34 36 40 43 45 183 | 184 | --------------------------------------------------------------------------------------------------------------------------------------------------- 185 | displacement Displacement (cu. in.) 186 | --------------------------------------------------------------------------------------------------------------------------------------------------- 187 | 188 | type: numeric (int) 189 | 190 | range: [79,425] units: 1 191 | unique values: 31 missing .: 0/74 192 | 193 | mean: 197.297 194 | std. dev: 91.8372 195 | 196 | percentiles: 10% 25% 50% 75% 90% 197 | 97 119 196 250 350 198 | 199 | --------------------------------------------------------------------------------------------------------------------------------------------------- 200 | gear_ratio Gear Ratio 201 | --------------------------------------------------------------------------------------------------------------------------------------------------- 202 | 203 | type: numeric (float) 204 | 205 | range: [2.19,3.89] units: .01 206 | unique values: 36 missing .: 0/74 207 | 208 | mean: 3.01486 209 | std. dev: .456287 210 | 211 | percentiles: 10% 25% 50% 75% 90% 212 | 2.43 2.73 2.955 3.37 3.72 213 | 214 | --------------------------------------------------------------------------------------------------------------------------------------------------- 215 | foreign Car type 216 | --------------------------------------------------------------------------------------------------------------------------------------------------- 217 | 218 | type: numeric (byte) 219 | label: origin 220 | 221 | range: [0,1] units: 1 222 | unique values: 2 missing .: 0/74 223 | 224 | tabulation: Freq. Numeric Label 225 | 52 0 Domestic 226 | 22 1 Foreign 227 | 228 | . qui log c 229 | -------------------------------------------------------------------------------- /data-deposit-aea.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: withtoc 3 | title: Guidance on how to deposit data at the AEA Data and Code Repository 4 | --- 5 | 6 | ### Tutorial 7 | 8 | For a video tutorial on this process, see [this Youtube video](https://youtu.be/MnEl1shX18Y). 9 | 10 | ### Start the deposit process 11 | 12 | Go to the [AEA Data and Code Repository](https://www.openicpsr.org/openicpsr/aea), and start the process: 13 | 14 | [![Start process](assets/icpsr-start-process.png)](https://www.openicpsr.org/openicpsr/aea) 15 | 16 | --- 17 | 18 | ### Checklist for Metadata 19 | 20 | **Required**: 21 | 22 | - [ ] Title (Suggested: "*Data and code for: (NAME OF PAPER)*") 23 | - [ ] "Principal Investigators" (=Authors; these need not be in the same order). Please ensure that all authors have affiliations (if not affiliated: "Independent Researcher") 24 | - [ ] Abstract (Suggested: The abstract from the article and/or a note that this is data and/or code accompanying the article) 25 | - [ ] Subject Terms (e.g., "Machine Learning", "Randomized Control Trial", "Nudges", ...) 26 | - [ ] JEL Classification (can be the same as article) 27 | - [ ] Manuscript Number (your Scholar One tracking number as assigned by the editorial office, e.g., "AER-2019-0000") 28 | 29 | **Conditionally required** 30 | 31 | *Most deposits will also need to provide the following metadata elements. In some cases, it may not make sense to fill out (for instance, a laboratory experiment may have no meaningful "geographic coverage"). These elements contribute to better inclusion in search engines.* 32 | 33 | - [ ] Geographic coverage (e.g, "United States", "Florida, U.S.", "Indonesia", ...) 34 | - [ ] Time period(s) (e.g., "1982-2008") 35 | - [ ] Collection date(s) 36 | - [ ] Universe (e.g., "All households in Canada", "Manufacturing establishments in Indonesia", ...) 37 | - [ ] Data Type(s) 38 | 39 | **Suggested** 40 | 41 | *The following elements are suggested for certain types of data, and may not apply to all types of data.* 42 | 43 | - [ ] Data Source 44 | - [ ] Units of Observation 45 | - [ ] Any additional metadata elements 46 | 47 | --- 48 | 49 | 50 | Start by providing the metadata (descriptors) for the data and code you are uploading. 51 | 52 | 53 | ### Details on Filling Out Metadata 54 | 55 | 56 | #### Describe the project 57 | 58 | ![screenshot of project description](assets/project-description-icpsr.png) 59 | 60 | - The **title** should be "`Data and Code for: [Title of article]`" 61 | - The **authors** should be those who compiled the data and code. This may differ (if necessary) from the article. 62 | - The **summary** might be short. It can include the **abstract** of the article itself. It does not need to include information on the related article (which has its own field). 63 | - Identify any **funding sources** here - the information can be queried by some funders, and can assist with your award reporting. 64 | 65 | 66 | #### Scope of project section 67 | 68 | To fill out the required metadata elements **Subject Terms**, **JEL Classification**, and **Manuscript Number**, open the "Scope of Project" section: 69 | 70 | ![metadata of project](assets/project-metadata-icpsr.png) 71 | 72 | **Click on each + to open the related section:** 73 | 74 | 75 | ![scope of project](assets/project-scope-of-project-icpsr.png) 76 | 77 | - Authors **MUST** provide additional subject terms (keywords). You do not need to repeat JEL codes. 78 | - Authors **MUST** provide JEL codes (under "Scope of Project") 79 | - Authors **MUST** provide the **Manuscript Number**, (your Scholar One tracking number as assigned by the editorial office, e.g., "AER-2019-0000") as this will allow us to properly connect the repository with the manuscript. 80 | - Where appropriate, authors are **REQUIRED** to define 81 | - the geographical scope(s) 82 | - the time period(s) 83 | - the universe(s) 84 | - data type(s) 85 | - Most fields are repeatable, please enter as many values as needed. For instance, if subsets of the data cover different periods (e.g., `1999-2019` and `2004-2019`). Just click "add value" next to the time period field for each time period. 86 | - This information can also be provided when only code is made available. 87 | - When only code is produced, authors should choose `data type = program source code`: ![program source code](../../assets/project-data-type-icpsr.png) 88 | 89 | #### Methodology section 90 | 91 | ![methodology section](assets/project-methodology-icpsr.png) 92 | 93 | - Methodology is particularly relevant for survey or experimental data: 94 | - response rates, sampling rates, etc. 95 | - We ENCOURAGE all authors to define 96 | - the unit of observation (e.g. individual, firm, establishment, county, country) 97 | 98 | #### Related publications section 99 | 100 | ![related publications](assets/project-related-icpsr.png) 101 | 102 | - The AEA editorial office will provide an entry for this field that links back to the **published manuscript** - authors do not need to add any reference to the manuscript anywhere in the deposit form (other than the Manuscript Number) 103 | - Authors are encouraged to link back to working papers or related publications that have or will use this (same!) data. 104 | - If code is derived from or continues to be updated on a Git repository (Github, Gitlab, Bitbucket, etc.), authors can link to it here. 105 | - Future functionality will automatically list articles (including articles by third parties) that cite the data. 106 | 107 | 108 | 109 | ### Uploading 110 | 111 | Once the metadata is completed, authors can upload files. 112 | 113 | Upload files in the way you expect the files to be organized in order to run the code. 114 | 115 | --- 116 | 117 | #### Checklist for Uploading 118 | 119 | - [ ] README is in PDF or TXT format 120 | - [ ] DO NOT UPLOAD A ZIP FILE - IMPORT IT! 121 | - [ ] Do not upload manuscripts, appendices, responses to editors, etc. 122 | - [ ] Directory structure does not contain redundant directories 123 | - [ ] Do not upload data that you do not have the rights to publish! 124 | 125 | --- 126 | 127 | #### Some caveats 128 | 129 | - If the **UNCOMPRESSED** contents of the deposit (the **UNZIPPED** size of the ZIP file) are larger than **30GB**, please send an email to the AEA Data Editor to request an increase in the quota. Reasonable requests will be authorized. Size of the deposit is never a reason not to provide materials, as we have found solutions for every single case so far. 130 | - If you have **more than 1,000 files** in your deposit, talk to us before uploading. 131 | - The Import functionality can handle ZIP files, but cannot handle other compression formats (RAR,7z, etc.). Please convert to ZIP before importing. 132 | 133 | #### Restricted-access data 134 | 135 | Do not upload data that you do not want published! Contact the AEA Data Editor if you are able to share data for reproducibility checks that cannot be published. Consult the [Sharing restricted-access data with the AEA Data Editor](sharing-restricted-data.md) page. 136 | 137 | 138 | #### Tips 139 | 140 | ![screenshot of upload and import options](assets/upload-import-icpsr.png) 141 | 142 | - It is possible to **IMPORT a ZIP file** (do **NOT** upload a ZIP file - no ZIP files should be visible in the deposit). Replicators will be downloading a ZIP file that preserves the directory structure. 143 | - A well prepared ZIP file has NO folder in the root 144 | - macOS users should [see our FAQ on this topic](https://aeadataeditor.github.io/aea-de-guidance/FAQ.html#what-is-that-__macosx-folder-which-seems-to-contain-a-second-copy-of-all-the--replication-files-i-am-not-sure-why-this-folder-exists) 145 | - Instructional videos: [macOS](https://www.youtube.com/watch?v=fCfVu55YsJg), [Windows](https://youtu.be/MnEl1shX18Y?t=135) 146 | - Please upload the README (in PDF or TXT) as the very first file - ensuring that it can be found easily by browsers of the archive. 147 | - It is OK to upload Markdown or Word documents in addition to, but not instead of the PDF or TXT version 148 | - Please upload the README to the root of the repository - any data and code can be in subdirectories, but it is easier to find the README if it is not in subdirectories. 149 | - There should be no duplicate README files in the repository 150 | 151 | #### Ideal structure 152 | 153 | Your deposit should have 154 | 155 | - no redundant directories: the first thing you should see is the README and any subdirectories 156 | - there should be no ZIP files! 157 | - the structure should be as you last ran the code 158 | 159 | > [NOTE] The AEA staff will not re-arrange or otherwise restructure your deposit in any way. What you see in the deposit interface is what others will see once it is published. 160 | 161 | You should see something like this: 162 | ``` 163 | data_directory/ 164 | prog_directory/ 165 | README.pdf 166 | LICENSE.txt 167 | ``` 168 | (the `LICENSE.txt` is optional if you want to adopt one of the standard openICPSR licenses upon publication. See [our licensing guidance](Licensing_guidance.md) for other options). 169 | 170 | ### Submitting to the Data Editor 171 | 172 | Once you are satisfied that all data files are present, are complete, and all metadata is satisfactory, including all required elements filled out, you should **submit** the deposit, by changing the **status** of the deposit: 173 | 174 | ![submit project](assets/project-submit.png) 175 | 176 | Choose "Submit to AEA" under "Change Status". 177 | 178 | Should you have forgotten something, you can "recall" the submission. 179 | 180 | 181 | ### Citing Your Deposit 182 | 183 | At present (2020), the openICPSR repository does not display the Digital Object Identifier (DOI) that will be associated with your deposit. However, it can be deduced easily. 184 | 185 | - Each openICPSR project has a number (e.g., "109622"), that might show up on the right panel: 186 | 187 | ![Image of number](assets/project-number.png) 188 | 189 | - if the openICPSR project has not been published, then the DOI will be "http://doi.org/10.3886/E" + number + "V1" (e.g. **http://doi.org/10.3886/E109622V1**) 190 | - if the project has already been published before, and you are updating it, then the "V1" will be incremented. See [our FAQ](FAQ.md) 191 | - You should then cite your deposit as follows (see [AEA Sample References](https://www.aeaweb.org/journals/policies/sample-references)): 192 | 193 | | | 194 | |--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| 195 | | **Romer, Christina D., and David H. Romer**. 2010. "Replication data for: The Macroeconomic Effects of Tax Changes: Estimates Based on a New Measure of Fiscal Shocks." *American Economic Association* [publisher], Inter-university Consortium for Political and Social Research [distributor]. https://doi.org/10.3886/E112357V1. | 196 | | | 197 | 198 | #### Give it a try: 199 | 200 |
201 | 202 | 203 | 204 | 205 | 206 | 207 |
Article title:
Authors:
Project number:
Version number:
208 |
209 | 210 |

211 | 212 |
213 |

214 |
215 | 216 | 231 | 232 | 233 | ### Ready to submit manuscript 234 | 235 | Once you have completed the deposit, you are now ready to submit the manuscript native files, together with the [Data and Code Availability Form](https://www.aeaweb.org/journals/forms/data-code-availability), as per the journal's guidelines ([AER guidelines here](https://www.aeaweb.org/journals/aer/submissions/accepted-articles/styleguide)). 236 | 237 | -------------------------------------------------------------------------------- /FAQ.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: withtoc 3 | title: "Frequently Asked Questions" 4 | --- 5 | ... although some are not frequently asked, but might nevertheless be useful. Below questions and answers in random order. Please be sure to check out the [official list of FAQ](https://www.aeaweb.org/journals/data/faq) first. Should you have other questions not appearing on either page, please [create a new issue on Github](https://github.com/AEADataEditor/aea-de-guidance/issues/new), ask the question on [Twitter](https://twitter.com/aeadata), or send an email to the [AEA Data Editor](mailto:dataeditor@aeapubs.org). 6 | 7 | ### What is the DOI of my openICPSR deposit? I have not yet published it, but am asked to add a citation to it in my manuscript? 8 | 9 | Generically, each openICPSR project has a number (e.g., "109622"), that might show up on the right panel: 10 | ![Image of number](assets/project-number.png) 11 | Then 12 | 13 | - if the openICPSR project has not been published, then the DOI will be "http://doi.org/10.3886/E" + number + "V1" (e.g. http://doi.org/10.3886/E109622V1) 14 | - if the openICPSR project has already been published, then the CURRENT DOI is shown on the relevant page, but if there are any revisions the Data Editor has asked for, then the to-be-cited DOI would be the next version, e.g., "http://doi.org/10.3886/E" + number + "V3" if the current version is V2 and the next version would be V3. 15 | 16 | Give it a try: 17 | 18 | 19 |
20 | 21 | 22 | 23 | 24 | 25 | 26 |
Article title:
Authors:
Project number:
Version number:
27 |
28 | 29 |

30 | 31 |
32 |

33 |
34 | 35 | 50 | 51 | ### How do I cite my own data and code supplement? 52 | 53 | If you created your own data (experiments, surveys, etc.), you should do one of two things: 54 | 55 | - If you believe that you will re-use the data as-is, and in particular if you would like others to also use the data, we strongly suggest creating a separate data deposit at a data repository. This deposit does not need to be at the AEA Data and Code Repository - it can be at any trusted repository. Have a look at the [Social Science Data Editor's guide to Data and Code Hosting](https://social-science-data-editors.github.io/guidance/Requested_information_hosting.html). Once deposited, and published, the data should be cited in your manuscript, in accordance with the [AEA Sample References](https://www.aeaweb.org/journals/policies/sample-references). 56 | - If you only intend to write this one manuscript with the data, you should cite the manuscript's companion data and code deposit at the AEA Data and Code repository. Since that deposit is (typically) not yet published, you need to manually construct the reference, as per the [AEA Sample References](https://www.aeaweb.org/journals/policies/sample-references) (an example is given there). The DOI for your forthcoming data and code deposit can be constructed as outlined in the previous FAQ. 57 | 58 | ### Should we keep the data and directory structure as we used it ourselves or should we set up the files in a way that would make replication as straightforward as possible? 59 | 60 | > ... the directory structure has gotten a little clunky over the years working on this project... 61 | 62 | The Data and Code Availability Policy says: 63 | 64 | > "Files uploaded to the AEA Data and Code Repository should retain the file names as originally executed or used, their original file format, and their original "grouping" in terms of directories." 65 | 66 | You should feel free to reorganize, but you should ensure when we run the reorganized files, they produce the **same results that are reported in the paper**. Or put differently, the numbers in the paper should be produced by the reorganized files. We are not trying to reproduce your historical path to the paper, only the current state of the paper. 67 | 68 | Such restructuring may also be appropriate if you have a very sophisticated reproducible setup in your lab or group. A replicator does not need all sorts of fancy dynamic setup scripts that are very relevant in a lab, but unnecessarily complicate the process for a replicator. You should attempt to simplify the final setup to make it easy for anybody to run this particular project, once. 69 | 70 | 71 | ### The paper uses confidential data, covering [geography] for period [2001-2015]. The repository only contains code. Should the repository metadata be filled out for the data characteristics, even if the repository only has code? 72 | 73 | [Answer from ICPSR] I think it still makes sense to complete as much metadata as possible. There are syntax files specific to the data available through a restricted-use agreement. The metadata are for increasing findability of the data collection -- even if only the syntax are in the repository. It's useful to know the data analyzed with the syntax are about a specific geographic coverage for a specific time period. 74 | 75 | ### I use confidential data. I am allowed to provide the data to the Data Editor for the purpose of replication, but you are not allowed to publish the data. How do I proceed? 76 | 77 | [Moved to main FAQ](https://www.aeaweb.org/journals/data/faq#confidential) 78 | 79 | ### We already use git/svn/GitHub/GitLab/BitBucket/etc. Do you facilitate integration of existing version-controlled code to the AEA repo? Or even planned functionality for linking out directly to such projects where they can be found online? 80 | 81 | [Moved to main FAQ](https://www.aeaweb.org/journals/data/faq#existing) 82 | 83 | 84 | ### Some econometrics papers might be accompanied by (for example) an R or Stata package (perhaps published on CRAN or SSC). What about surfacing references to associated packages more prominently? 85 | 86 | [Moved to main FAQ](https://www.aeaweb.org/journals/data/faq#package) 87 | 88 | 89 | ### Do you support Docker/ Jupyter/ etc.? 90 | 91 | [Moved to main FAQ](https://www.aeaweb.org/journals/data/faq#support) 92 | 93 | ### I have been told by the Data Editor to remove PSID data from my submitted materials. What do I do? 94 | 95 | [Moved to main FAQ](https://www.aeaweb.org/journals/data/faq#psid) 96 | 97 | ### Aligning AEA RCT Registry and AEA Data and Code Repository 98 | 99 | The [AEA RCT registry](https://www.socialscienceregistry.org) has a field that codes whether data associated with a registration is publicly available. Many authors will have this coded as "non public" prior to the publication of the replication package. When the replication package is about to be published on the [AEA Data and Code Repository](https://www.openicpsr.org/openicpsr/aea), this field needs to be updated. Only the authors of the registry can update this field. Steps to follow: 100 | 101 | - Log in to the [AEA RCT registry](https://www.socialscienceregistry.org) and select your registration 102 | - Change the field to "public" / "published" 103 | - [Compute the DOI](#what-is-the-doi-of-my-openicpsr-deposit-i-have-not-yet-published-it-but-am-asked-to-add-a-citation-to-it-in-my-manuscript) of your forthcoming replication package publication and enter the resulting DOI in the URL field. 104 | - **Do not use** the URL of the openICPSR project in the browser address bar! 105 | 106 | [EXTRA] You should also record the RCT DOI as a related publication of your deposit on the [AEA Data and Code Repository](https://www.openicpsr.org/openicpsr/aea): 107 | 108 | - The RCT registry will show the DOI of your registration at the bottom of its public page. [Example](https://www.socialscienceregistry.org/trials/156): 109 | 110 | ![RCT DOI](assets/aearct-doi-citation.png) 111 | 112 | - You can then enter that DOI (e.g., `10.1257/rct.156-1.1`) into the "Related Publication" field of the deposit on the AEA Data and Code Repository: 113 | 114 | ![Entering related publication](assets/project-related-icpsr.png) 115 | 116 | - Choose the "Import via DOI" button: 117 | 118 | ![Selecting import via DOI](assets/project-related-icpsr-modal1.png) 119 | 120 | - Fill in the DOI (e.g., `10.1257/rct.156-1.1`) and press "Import": 121 | 122 | ![Importing via DOI](assets/project-related-icpsr-modal2.png) 123 | 124 | - Select "`is supplemented by`" and press "Save and Apply" 125 | 126 | ![Selecting relationship](assets/project-related-icpsr-modal3.png) 127 | 128 | ### I was asked to modify files in my repository (not yet published) but I cannot upload or edit anything 129 | 130 | When you first submitted to the AEA, your deposit became locked. There are two ways it can be edited: 131 | 132 | #### You can "recall" the submission 133 | 134 | On the right, under " Change status", choose "Recall submission" 135 | 136 | ![recall](assets/recall-submission-icpsr.png) 137 | You should then be able to upload and make changes. 138 | 139 | Once you are done, choose "Re-submit" from the same menu. 140 | 141 | #### The Data Editor staff can request revisions 142 | 143 | If you received a notice via the openICPSR communication log requesting revisions, you should be able to make modifications as outlined in the request. You should be all set. 144 | 145 | Again, once you are done, choose "Re-submit" from the same menu as above. 146 | 147 | ### I was wondering whether (and how) I can update the published repository for our paper. I was contacted by a researcher who is doing a replication ... couple of minor issues ... forgotten to include two auxiliary datasets in the repository without which one of the programs does not run successfully. 148 | 149 | First off, excellent initiative. Our team cannot always conduct a full replication (not all data may be accessible, not enough time, no access to the software). We appreciate it when others are able to do that work, and when authors then correct the replication package. 150 | 151 | Updating the repository is actually very easy, and updates likes these are exactly why we moved to the openICPSR repository for this. We have a policy how changes are then recorded, see [https://www.aeaweb.org/journals/data/policy-revisions](https://www.aeaweb.org/journals/data/policy-revisions). 152 | 153 | 154 | 1) Log back onto your openICPSR deposit. If you don't remember, simply click on the "Share Data" link on openICPSR, and it will show you your deposits. 155 | 156 | 2) You *may* need to click on "Create new version" - depends on when the deposit was initially created (applies for all deposits made after July 2020). 157 | 158 | 3) Update the README as [per the policy]((https://www.aeaweb.org/journals/data/policy-revisions)). Authors should list the files added, any changes made to the programs, and ideally the reason why. No more than a paragraph. 159 | 160 | 4) Once you updated all files (remember to update the README), choose "Submit to AEA" in "Change Status". 161 | 162 | 5) The AEA Data Editor will review that the criteria of the Revision Policy are satisfied, but conduct no other checks. 163 | 164 | 6) In most cases, the article will remain linked to the V1 deposit ("version of record"), but anybody navigating there will see a banner indicating that a more recent version exists (the V2 deposit). 165 | 166 | ### I have a paper that uses data from 14 different sources. How do I comply with requirement for data citations and fit within page limits? (for instance in Papers and Proceedings). 167 | 168 | We understand page limits, here are possible workarounds, in decreasing order of preference: 169 | 170 | - refer to an (online) appendix for the details, and have an appendix-only bibliography (may not be an option for Papers and Proceedings) 171 | - refer to the README in the deposit, which has a bibliography like a real paper (see the [README template](https://social-science-data-editors.github.io/template_README/)) 172 | 173 | ### Are authors allowed to reuse the data once the data is published on the AER webpage and what are the specific conditions? 174 | 175 | As of Jan 2021, this question has two answers, pending final migration of archival replication packages: 176 | 177 | #### If the deposit is still downloaded from the AEA website (download URL starts with "https://www.aeaweb.org") 178 | 179 | The copyright of older deposits was transferred (© AEA), together with the manuscript, to the AEA. However, we encourage re-use, and permission is automatically granted to any user of these deposits to use and re-publish them, under acknowledgement (citation) of the *authors* original paper and replication package. See [sample references](https://www.aeaweb.org/journals/policies/sample-references), example no. 4. 180 | 181 | #### If the deposit is downloaded from the AEA Data and Code Repository at openICPSR (download link redirects to a page at the Repository) 182 | 183 | Migrated deposits are (© AEA), newer deposits remain copyrighted by the original authors (unless otherwise stated in the deposit's LICENSE file). Most deposits are under a [mixed CC-BY/BSD](https://aeadataeditor.github.io/aea-de-guidance/LICENSE-template.html) license, or under a [CC-BY](https://creativecommons.org/licenses/by/4.0/) license. 184 | 185 | ![License on openICPSR website](assets/project-license-icpsr.png) 186 | 187 | Both licenses allow for re-use and re-distribution, under acknowledgement (citation) of the DOI of the replication package, see [sample references](https://www.aeaweb.org/journals/policies/sample-references), example no. 3. 188 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | 2 | ## Creative Commons Attribution 4.0 International Public License 3 | 4 | By exercising the Licensed Rights (defined below), You accept and agree to be bound by the terms and conditions of this Creative Commons Attribution 4.0 International Public License ("Public License"). To the extent this Public License may be interpreted as a contract, You are granted the Licensed Rights in consideration of Your acceptance of these terms and conditions, and the Licensor grants You such rights in consideration of benefits the Licensor receives from making the Licensed Material available under these terms and conditions. 5 | 6 | ### Section 1 – Definitions. 7 | 8 | a. __Adapted Material__ means material subject to Copyright and Similar Rights that is derived from or based upon the Licensed Material and in which the Licensed Material is translated, altered, arranged, transformed, or otherwise modified in a manner requiring permission under the Copyright and Similar Rights held by the Licensor. For purposes of this Public License, where the Licensed Material is a musical work, performance, or sound recording, Adapted Material is always produced where the Licensed Material is synched in timed relation with a moving image. 9 | 10 | b. __Adapter's License__ means the license You apply to Your Copyright and Similar Rights in Your contributions to Adapted Material in accordance with the terms and conditions of this Public License. 11 | 12 | c. __Copyright and Similar Rights__ means copyright and/or similar rights closely related to copyright including, without limitation, performance, broadcast, sound recording, and Sui Generis Database Rights, without regard to how the rights are labeled or categorized. For purposes of this Public License, the rights specified in Section 2(b)(1)-(2) are not Copyright and Similar Rights. 13 | 14 | d. __Effective Technological Measures__ means those measures that, in the absence of proper authority, may not be circumvented under laws fulfilling obligations under Article 11 of the WIPO Copyright Treaty adopted on December 20, 1996, and/or similar international agreements. 15 | 16 | e. __Exceptions and Limitations__ means fair use, fair dealing, and/or any other exception or limitation to Copyright and Similar Rights that applies to Your use of the Licensed Material. 17 | 18 | f. __Licensed Material__ means the artistic or literary work, database, or other material to which the Licensor applied this Public License. 19 | 20 | g. __Licensed Rights__ means the rights granted to You subject to the terms and conditions of this Public License, which are limited to all Copyright and Similar Rights that apply to Your use of the Licensed Material and that the Licensor has authority to license. 21 | 22 | h. __Licensor__ means the individual(s) or entity(ies) granting rights under this Public License. 23 | 24 | i. __Share__ means to provide material to the public by any means or process that requires permission under the Licensed Rights, such as reproduction, public display, public performance, distribution, dissemination, communication, or importation, and to make material available to the public including in ways that members of the public may access the material from a place and at a time individually chosen by them. 25 | 26 | j. __Sui Generis Database Rights__ means rights other than copyright resulting from Directive 96/9/EC of the European Parliament and of the Council of 11 March 1996 on the legal protection of databases, as amended and/or succeeded, as well as other essentially equivalent rights anywhere in the world. 27 | 28 | k. __You__ means the individual or entity exercising the Licensed Rights under this Public License. Your has a corresponding meaning. 29 | 30 | ### Section 2 – Scope. 31 | 32 | a. ___License grant.___ 33 | 34 | 1. Subject to the terms and conditions of this Public License, the Licensor hereby grants You a worldwide, royalty-free, non-sublicensable, non-exclusive, irrevocable license to exercise the Licensed Rights in the Licensed Material to: 35 | 36 | A. reproduce and Share the Licensed Material, in whole or in part; and 37 | 38 | B. produce, reproduce, and Share Adapted Material. 39 | 40 | 2. __Exceptions and Limitations.__ For the avoidance of doubt, where Exceptions and Limitations apply to Your use, this Public License does not apply, and You do not need to comply with its terms and conditions. 41 | 42 | 3. __Term.__ The term of this Public License is specified in Section 6(a). 43 | 44 | 4. __Media and formats; technical modifications allowed.__ The Licensor authorizes You to exercise the Licensed Rights in all media and formats whether now known or hereafter created, and to make technical modifications necessary to do so. The Licensor waives and/or agrees not to assert any right or authority to forbid You from making technical modifications necessary to exercise the Licensed Rights, including technical modifications necessary to circumvent Effective Technological Measures. For purposes of this Public License, simply making modifications authorized by this Section 2(a)(4) never produces Adapted Material. 45 | 46 | 5. __Downstream recipients.__ 47 | 48 | A. __Offer from the Licensor – Licensed Material.__ Every recipient of the Licensed Material automatically receives an offer from the Licensor to exercise the Licensed Rights under the terms and conditions of this Public License. 49 | 50 | B. __No downstream restrictions.__ You may not offer or impose any additional or different terms or conditions on, or apply any Effective Technological Measures to, the Licensed Material if doing so restricts exercise of the Licensed Rights by any recipient of the Licensed Material. 51 | 52 | 6. __No endorsement.__ Nothing in this Public License constitutes or may be construed as permission to assert or imply that You are, or that Your use of the Licensed Material is, connected with, or sponsored, endorsed, or granted official status by, the Licensor or others designated to receive attribution as provided in Section 3(a)(1)(A)(i). 53 | 54 | b. ___Other rights.___ 55 | 56 | 1. Moral rights, such as the right of integrity, are not licensed under this Public License, nor are publicity, privacy, and/or other similar personality rights; however, to the extent possible, the Licensor waives and/or agrees not to assert any such rights held by the Licensor to the limited extent necessary to allow You to exercise the Licensed Rights, but not otherwise. 57 | 58 | 2. Patent and trademark rights are not licensed under this Public License. 59 | 60 | 3. To the extent possible, the Licensor waives any right to collect royalties from You for the exercise of the Licensed Rights, whether directly or through a collecting society under any voluntary or waivable statutory or compulsory licensing scheme. In all other cases the Licensor expressly reserves any right to collect such royalties. 61 | 62 | ### Section 3 – License Conditions. 63 | 64 | Your exercise of the Licensed Rights is expressly made subject to the following conditions. 65 | 66 | a. ___Attribution.___ 67 | 68 | 1. If You Share the Licensed Material (including in modified form), You must: 69 | 70 | A. retain the following if it is supplied by the Licensor with the Licensed Material: 71 | 72 | i. identification of the creator(s) of the Licensed Material and any others designated to receive attribution, in any reasonable manner requested by the Licensor (including by pseudonym if designated); 73 | 74 | ii. a copyright notice; 75 | 76 | iii. a notice that refers to this Public License; 77 | 78 | iv. a notice that refers to the disclaimer of warranties; 79 | 80 | v. a URI or hyperlink to the Licensed Material to the extent reasonably practicable; 81 | 82 | B. indicate if You modified the Licensed Material and retain an indication of any previous modifications; and 83 | 84 | C. indicate the Licensed Material is licensed under this Public License, and include the text of, or the URI or hyperlink to, this Public License. 85 | 86 | 2. You may satisfy the conditions in Section 3(a)(1) in any reasonable manner based on the medium, means, and context in which You Share the Licensed Material. For example, it may be reasonable to satisfy the conditions by providing a URI or hyperlink to a resource that includes the required information. 87 | 88 | 3. If requested by the Licensor, You must remove any of the information required by Section 3(a)(1)(A) to the extent reasonably practicable. 89 | 90 | 4. If You Share Adapted Material You produce, the Adapter's License You apply must not prevent recipients of the Adapted Material from complying with this Public License. 91 | 92 | ### Section 4 – Sui Generis Database Rights. 93 | 94 | Where the Licensed Rights include Sui Generis Database Rights that apply to Your use of the Licensed Material: 95 | 96 | a. for the avoidance of doubt, Section 2(a)(1) grants You the right to extract, reuse, reproduce, and Share all or a substantial portion of the contents of the database; 97 | 98 | b. if You include all or a substantial portion of the database contents in a database in which You have Sui Generis Database Rights, then the database in which You have Sui Generis Database Rights (but not its individual contents) is Adapted Material; and 99 | 100 | c. You must comply with the conditions in Section 3(a) if You Share all or a substantial portion of the contents of the database. 101 | 102 | For the avoidance of doubt, this Section 4 supplements and does not replace Your obligations under this Public License where the Licensed Rights include other Copyright and Similar Rights. 103 | 104 | ### Section 5 – Disclaimer of Warranties and Limitation of Liability. 105 | 106 | a. __Unless otherwise separately undertaken by the Licensor, to the extent possible, the Licensor offers the Licensed Material as-is and as-available, and makes no representations or warranties of any kind concerning the Licensed Material, whether express, implied, statutory, or other. This includes, without limitation, warranties of title, merchantability, fitness for a particular purpose, non-infringement, absence of latent or other defects, accuracy, or the presence or absence of errors, whether or not known or discoverable. Where disclaimers of warranties are not allowed in full or in part, this disclaimer may not apply to You.__ 107 | 108 | b. __To the extent possible, in no event will the Licensor be liable to You on any legal theory (including, without limitation, negligence) or otherwise for any direct, special, indirect, incidental, consequential, punitive, exemplary, or other losses, costs, expenses, or damages arising out of this Public License or use of the Licensed Material, even if the Licensor has been advised of the possibility of such losses, costs, expenses, or damages. Where a limitation of liability is not allowed in full or in part, this limitation may not apply to You.__ 109 | 110 | c. The disclaimer of warranties and limitation of liability provided above shall be interpreted in a manner that, to the extent possible, most closely approximates an absolute disclaimer and waiver of all liability. 111 | 112 | ### Section 6 – Term and Termination. 113 | 114 | a. This Public License applies for the term of the Copyright and Similar Rights licensed here. However, if You fail to comply with this Public License, then Your rights under this Public License terminate automatically. 115 | 116 | b. Where Your right to use the Licensed Material has terminated under Section 6(a), it reinstates: 117 | 118 | 1. automatically as of the date the violation is cured, provided it is cured within 30 days of Your discovery of the violation; or 119 | 120 | 2. upon express reinstatement by the Licensor. 121 | 122 | For the avoidance of doubt, this Section 6(b) does not affect any right the Licensor may have to seek remedies for Your violations of this Public License. 123 | 124 | c. For the avoidance of doubt, the Licensor may also offer the Licensed Material under separate terms or conditions or stop distributing the Licensed Material at any time; however, doing so will not terminate this Public License. 125 | 126 | d. Sections 1, 5, 6, 7, and 8 survive termination of this Public License. 127 | 128 | ### Section 7 – Other Terms and Conditions. 129 | 130 | a. The Licensor shall not be bound by any additional or different terms or conditions communicated by You unless expressly agreed. 131 | 132 | b. Any arrangements, understandings, or agreements regarding the Licensed Material not stated herein are separate from and independent of the terms and conditions of this Public License. 133 | 134 | ### Section 8 – Interpretation. 135 | 136 | a. For the avoidance of doubt, this Public License does not, and shall not be interpreted to, reduce, limit, restrict, or impose conditions on any use of the Licensed Material that could lawfully be made without permission under this Public License. 137 | 138 | b. To the extent possible, if any provision of this Public License is deemed unenforceable, it shall be automatically reformed to the minimum extent necessary to make it enforceable. If the provision cannot be reformed, it shall be severed from this Public License without affecting the enforceability of the remaining terms and conditions. 139 | 140 | c. No term or condition of this Public License will be waived and no failure to comply consented to unless expressly agreed to by the Licensor. 141 | 142 | d. Nothing in this Public License constitutes or may be interpreted as a limitation upon, or waiver of, any privileges and immunities that apply to the Licensor or You, including from the legal processes of any jurisdiction or authority. 143 | 144 | > Creative Commons is not a party to its public licenses. Notwithstanding, Creative Commons may elect to apply one of its public licenses to material it publishes and in those instances will be considered the “Licensor.” Except for the limited purpose of indicating that material is shared under a Creative Commons public license or as otherwise permitted by the Creative Commons policies published at [creativecommons.org/policies](http://creativecommons.org/policies), Creative Commons does not authorize the use of the trademark “Creative Commons” or any other trademark or logo of Creative Commons without its prior written consent including, without limitation, in connection with any unauthorized modifications to any of its public licenses or any other arrangements, understandings, or agreements concerning use of licensed material. For the avoidance of doubt, this paragraph does not form part of the public licenses. 145 | > 146 | > Creative Commons may be contacted at creativecommons.org 147 | -------------------------------------------------------------------------------- /code/02_codebook_plaintext.md: -------------------------------------------------------------------------------- 1 | Codebook example for STATA 2 | ========================== 3 | 4 | The source code for this file is [here](02_codebook_plaintext.do). 5 | 6 | Simple example 7 | -------------- 8 | 9 | The following is [perfectly acceptable 10 | content](02_codebook_plaintext.txt), but not necessarily pretty to view. 11 | The core code only requires native commands. Note that it is important 12 | that output be to a plaintext log file, as SMCL (Stata's fancy log 13 | format) is not portable. 14 | 15 | . capture close log 16 | 17 | . sysdir set PLUS "./ado/" 18 | 19 | . set more 1 20 | 21 | . set linesize 147 22 | 23 | . log using "02_codebook_plaintext.txt", replace text 24 | --------------------------------------------------------------------------------------------------------------------------------------------------- 25 | name: 26 | log: /mnt/local/slow_home/vilhuber/Workspace-non-encrypted/git/AEA/aea-de-guidance/code/02_codebook_plaintext.txt 27 | log type: text 28 | opened on: 1 Oct 2018, 17:33:59 29 | 30 | . di 31 | 32 | 33 | . sysuse auto 34 | (1978 Automobile Data) 35 | 36 | . describe 37 | 38 | Contains data from /usr/local/stata14/ado/base/a/auto.dta 39 | obs: 74 1978 Automobile Data 40 | vars: 12 13 Apr 2014 17:45 41 | size: 3,182 (_dta has notes) 42 | --------------------------------------------------------------------------------------------------------------------------------------------------- 43 | storage display value 44 | variable name type format label variable label 45 | --------------------------------------------------------------------------------------------------------------------------------------------------- 46 | make str18 %-18s Make and Model 47 | price int %8.0gc Price 48 | mpg int %8.0g Mileage (mpg) 49 | rep78 int %8.0g Repair Record 1978 50 | headroom float %6.1f Headroom (in.) 51 | trunk int %8.0g Trunk space (cu. ft.) 52 | weight int %8.0gc Weight (lbs.) 53 | length int %8.0g Length (in.) 54 | turn int %8.0g Turn Circle (ft.) 55 | displacement int %8.0g Displacement (cu. in.) 56 | gear_ratio float %6.2f Gear Ratio 57 | foreign byte %8.0g origin Car type 58 | --------------------------------------------------------------------------------------------------------------------------------------------------- 59 | Sorted by: foreign 60 | 61 | . codebook 62 | 63 | --------------------------------------------------------------------------------------------------------------------------------------------------- 64 | make Make and Model 65 | --------------------------------------------------------------------------------------------------------------------------------------------------- 66 | 67 | type: string (str18), but longest is str17 68 | 69 | unique values: 74 missing "": 0/74 70 | 71 | examples: "Cad. Deville" 72 | "Dodge Magnum" 73 | "Merc. XR-7" 74 | "Pont. Catalina" 75 | 76 | warning: variable has embedded blanks 77 | 78 | --------------------------------------------------------------------------------------------------------------------------------------------------- 79 | price Price 80 | --------------------------------------------------------------------------------------------------------------------------------------------------- 81 | 82 | type: numeric (int) 83 | 84 | range: [3291,15906] units: 1 85 | unique values: 74 missing .: 0/74 86 | 87 | mean: 6165.26 88 | std. dev: 2949.5 89 | 90 | percentiles: 10% 25% 50% 75% 90% 91 | 3895 4195 5006.5 6342 11385 92 | 93 | --------------------------------------------------------------------------------------------------------------------------------------------------- 94 | mpg Mileage (mpg) 95 | --------------------------------------------------------------------------------------------------------------------------------------------------- 96 | 97 | type: numeric (int) 98 | 99 | range: [12,41] units: 1 100 | unique values: 21 missing .: 0/74 101 | 102 | mean: 21.2973 103 | std. dev: 5.7855 104 | 105 | percentiles: 10% 25% 50% 75% 90% 106 | 14 18 20 25 29 107 | 108 | --------------------------------------------------------------------------------------------------------------------------------------------------- 109 | rep78 Repair Record 1978 110 | --------------------------------------------------------------------------------------------------------------------------------------------------- 111 | 112 | type: numeric (int) 113 | 114 | range: [1,5] units: 1 115 | unique values: 5 missing .: 5/74 116 | 117 | tabulation: Freq. Value 118 | 2 1 119 | 8 2 120 | 30 3 121 | 18 4 122 | 11 5 123 | 5 . 124 | 125 | --------------------------------------------------------------------------------------------------------------------------------------------------- 126 | headroom Headroom (in.) 127 | --------------------------------------------------------------------------------------------------------------------------------------------------- 128 | 129 | type: numeric (float) 130 | 131 | range: [1.5,5] units: .1 132 | unique values: 8 missing .: 0/74 133 | 134 | tabulation: Freq. Value 135 | 4 1.5 136 | 13 2 137 | 14 2.5 138 | 13 3 139 | 15 3.5 140 | 10 4 141 | 4 4.5 142 | 1 5 143 | 144 | --------------------------------------------------------------------------------------------------------------------------------------------------- 145 | trunk Trunk space (cu. ft.) 146 | --------------------------------------------------------------------------------------------------------------------------------------------------- 147 | 148 | type: numeric (int) 149 | 150 | range: [5,23] units: 1 151 | unique values: 18 missing .: 0/74 152 | 153 | mean: 13.7568 154 | std. dev: 4.2774 155 | 156 | percentiles: 10% 25% 50% 75% 90% 157 | 8 10 14 17 20 158 | 159 | --------------------------------------------------------------------------------------------------------------------------------------------------- 160 | weight Weight (lbs.) 161 | --------------------------------------------------------------------------------------------------------------------------------------------------- 162 | 163 | type: numeric (int) 164 | 165 | range: [1760,4840] units: 10 166 | unique values: 64 missing .: 0/74 167 | 168 | mean: 3019.46 169 | std. dev: 777.194 170 | 171 | percentiles: 10% 25% 50% 75% 90% 172 | 2020 2240 3190 3600 4060 173 | 174 | --------------------------------------------------------------------------------------------------------------------------------------------------- 175 | length Length (in.) 176 | --------------------------------------------------------------------------------------------------------------------------------------------------- 177 | 178 | type: numeric (int) 179 | 180 | range: [142,233] units: 1 181 | unique values: 47 missing .: 0/74 182 | 183 | mean: 187.932 184 | std. dev: 22.2663 185 | 186 | percentiles: 10% 25% 50% 75% 90% 187 | 157 170 192.5 204 218 188 | 189 | --------------------------------------------------------------------------------------------------------------------------------------------------- 190 | turn Turn Circle (ft.) 191 | --------------------------------------------------------------------------------------------------------------------------------------------------- 192 | 193 | type: numeric (int) 194 | 195 | range: [31,51] units: 1 196 | unique values: 18 missing .: 0/74 197 | 198 | mean: 39.6486 199 | std. dev: 4.39935 200 | 201 | percentiles: 10% 25% 50% 75% 90% 202 | 34 36 40 43 45 203 | 204 | --------------------------------------------------------------------------------------------------------------------------------------------------- 205 | displacement Displacement (cu. in.) 206 | --------------------------------------------------------------------------------------------------------------------------------------------------- 207 | 208 | type: numeric (int) 209 | 210 | range: [79,425] units: 1 211 | unique values: 31 missing .: 0/74 212 | 213 | mean: 197.297 214 | std. dev: 91.8372 215 | 216 | percentiles: 10% 25% 50% 75% 90% 217 | 97 119 196 250 350 218 | 219 | --------------------------------------------------------------------------------------------------------------------------------------------------- 220 | gear_ratio Gear Ratio 221 | --------------------------------------------------------------------------------------------------------------------------------------------------- 222 | 223 | type: numeric (float) 224 | 225 | range: [2.19,3.89] units: .01 226 | unique values: 36 missing .: 0/74 227 | 228 | mean: 3.01486 229 | std. dev: .456287 230 | 231 | percentiles: 10% 25% 50% 75% 90% 232 | 2.43 2.73 2.955 3.37 3.72 233 | 234 | --------------------------------------------------------------------------------------------------------------------------------------------------- 235 | foreign Car type 236 | --------------------------------------------------------------------------------------------------------------------------------------------------- 237 | 238 | type: numeric (byte) 239 | label: origin 240 | 241 | range: [0,1] units: 1 242 | unique values: 2 missing .: 0/74 243 | 244 | tabulation: Freq. Numeric Label 245 | 52 0 Domestic 246 | 22 1 Foreign 247 | -------------------------------------------------------------------------------- /LICENSE-template.md: -------------------------------------------------------------------------------- 1 | (save as "LICENSE.txt" as part of your deposit) 2 | 3 | ``` 4 | Modified BSD License (https://opensource.org/licenses/BSD-3-Clause) 5 | - applies to all code, scripts, programs, and SOFTWARE. 6 | This is any statements or instructions to be used directly or 7 | indirectly in a computer in order to bring about a certain result, 8 | and may include interpretive, object or source code. 9 | 10 | Creative Commons Attribution 4.0 International Public License 11 | (https://creativecommons.org/licenses/by/4.0/) 12 | - applies to databases, images, tables, text, and any other objects 13 | 14 | COPYRIGHT 15 | 16 | ================================================================= 17 | Modified BSD License 18 | ================================================================= 19 | 20 | Redistribution and use in source and binary forms, with or without modification, 21 | are permitted provided that the following conditions are met: 22 | 23 | 1. Redistributions of source code must retain the above copyright notice, this 24 | list of conditions and the following disclaimer. 25 | 26 | 2. Redistributions in binary form must reproduce the above copyright notice, 27 | this list of conditions and the following disclaimer in the documentation and/or 28 | other materials provided with the distribution. 29 | 30 | 3. Neither the name of the copyright holder nor the names of its contributors 31 | may be used to endorse or promote products derived from this software without 32 | specific prior written permission. 33 | 34 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 35 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 36 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 37 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 38 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 39 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 40 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 41 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 42 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 43 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 44 | 45 | 46 | ================================================================= 47 | Creative Commons Attribution 4.0 International Public License 48 | ================================================================= 49 | 50 | By exercising the Licensed Rights (defined below), You accept and agree to be 51 | bound by the terms and conditions of this Creative Commons Attribution 4.0 52 | International Public License ("Public License"). To the extent this Public 53 | License may be interpreted as a contract, You are granted the Licensed Rights in 54 | consideration of Your acceptance of these terms and conditions, and the Licensor 55 | grants You such rights in consideration of benefits the Licensor receives from 56 | making the Licensed Material available under these terms and conditions. 57 | 58 | Section 1 – Definitions. 59 | 60 | Adapted Material means material subject to Copyright and Similar Rights that is 61 | derived from or based upon the Licensed Material and in which the Licensed 62 | Material is translated, altered, arranged, transformed, or otherwise modified in 63 | a manner requiring permission under the Copyright and Similar Rights held by the 64 | Licensor. For purposes of this Public License, where the Licensed Material is a 65 | musical work, performance, or sound recording, Adapted Material is always 66 | produced where the Licensed Material is synched in timed relation with a moving 67 | image. Adapter's License means the license You apply to Your Copyright and 68 | Similar Rights in Your contributions to Adapted Material in accordance with the 69 | terms and conditions of this Public License. Copyright and Similar Rights means 70 | copyright and/or similar rights closely related to copyright including, without 71 | limitation, performance, broadcast, sound recording, and Sui Generis Database 72 | Rights, without regard to how the rights are labeled or categorized. For 73 | purposes of this Public License, the rights specified in Section 2(b)(1)-(2) are 74 | not Copyright and Similar Rights. Effective Technological Measures means those 75 | measures that, in the absence of proper authority, may not be circumvented under 76 | laws fulfilling obligations under Article 11 of the WIPO Copyright Treaty 77 | adopted on December 20, 1996, and/or similar international agreements. 78 | Exceptions and Limitations means fair use, fair dealing, and/or any other 79 | exception or limitation to Copyright and Similar Rights that applies to Your use 80 | of the Licensed Material. Licensed Material means the artistic or literary work, 81 | database, or other material to which the Licensor applied this Public License. 82 | Licensed Rights means the rights granted to You subject to the terms and 83 | conditions of this Public License, which are limited to all Copyright and 84 | Similar Rights that apply to Your use of the Licensed Material and that the 85 | Licensor has authority to license. Licensor means the individual(s) or 86 | entity(ies) granting rights under this Public License. Share means to provide 87 | material to the public by any means or process that requires permission under 88 | the Licensed Rights, such as reproduction, public display, public performance, 89 | distribution, dissemination, communication, or importation, and to make material 90 | available to the public including in ways that members of the public may access 91 | the material from a place and at a time individually chosen by them. Sui Generis 92 | Database Rights means rights other than copyright resulting from Directive 93 | 96/9/EC of the European Parliament and of the Council of 11 March 1996 on the 94 | legal protection of databases, as amended and/or succeeded, as well as other 95 | essentially equivalent rights anywhere in the world. You means the individual or 96 | entity exercising the Licensed Rights under this Public License. Your has a 97 | corresponding meaning. Section 2 – Scope. 98 | 99 | License grant. Subject to the terms and conditions of this Public License, the 100 | Licensor hereby grants You a worldwide, royalty-free, non-sublicensable, 101 | non-exclusive, irrevocable license to exercise the Licensed Rights in the 102 | Licensed Material to: reproduce and Share the Licensed Material, in whole or in 103 | part; and produce, reproduce, and Share Adapted Material. Exceptions and 104 | Limitations. For the avoidance of doubt, where Exceptions and Limitations apply 105 | to Your use, this Public License does not apply, and You do not need to comply 106 | with its terms and conditions. Term. The term of this Public License is 107 | specified in Section 6(a). Media and formats; technical modifications allowed. 108 | The Licensor authorizes You to exercise the Licensed Rights in all media and 109 | formats whether now known or hereafter created, and to make technical 110 | modifications necessary to do so. The Licensor waives and/or agrees not to 111 | assert any right or authority to forbid You from making technical modifications 112 | necessary to exercise the Licensed Rights, including technical modifications 113 | necessary to circumvent Effective Technological Measures. For purposes of this 114 | Public License, simply making modifications authorized by this Section 2(a)(4) 115 | never produces Adapted Material. Downstream recipients. Offer from the Licensor 116 | – Licensed Material. Every recipient of the Licensed Material automatically 117 | receives an offer from the Licensor to exercise the Licensed Rights under the 118 | terms and conditions of this Public License. No downstream restrictions. You may 119 | not offer or impose any additional or different terms or conditions on, or apply 120 | any Effective Technological Measures to, the Licensed Material if doing so 121 | restricts exercise of the Licensed Rights by any recipient of the Licensed 122 | Material. No endorsement. Nothing in this Public License constitutes or may be 123 | construed as permission to assert or imply that You are, or that Your use of the 124 | Licensed Material is, connected with, or sponsored, endorsed, or granted 125 | official status by, the Licensor or others designated to receive attribution as 126 | provided in Section 3(a)(1)(A)(i). Other rights. 127 | 128 | Moral rights, such as the right of integrity, are not licensed under this Public 129 | License, nor are publicity, privacy, and/or other similar personality rights; 130 | however, to the extent possible, the Licensor waives and/or agrees not to assert 131 | any such rights held by the Licensor to the limited extent necessary to allow 132 | You to exercise the Licensed Rights, but not otherwise. Patent and trademark 133 | rights are not licensed under this Public License. To the extent possible, the 134 | Licensor waives any right to collect royalties from You for the exercise of the 135 | Licensed Rights, whether directly or through a collecting society under any 136 | voluntary or waivable statutory or compulsory licensing scheme. In all other 137 | cases the Licensor expressly reserves any right to collect such royalties. 138 | Section 3 – License Conditions. 139 | 140 | Your exercise of the Licensed Rights is expressly made subject to the following 141 | conditions. 142 | 143 | Attribution. 144 | 145 | If You Share the Licensed Material (including in modified form), You must: 146 | 147 | retain the following if it is supplied by the Licensor with the Licensed 148 | Material: identification of the creator(s) of the Licensed Material and any 149 | others designated to receive attribution, in any reasonable manner requested by 150 | the Licensor (including by pseudonym if designated); a copyright notice; a 151 | notice that refers to this Public License; a notice that refers to the 152 | disclaimer of warranties; a URI or hyperlink to the Licensed Material to the 153 | extent reasonably practicable; indicate if You modified the Licensed Material 154 | and retain an indication of any previous modifications; and indicate the 155 | Licensed Material is licensed under this Public License, and include the text 156 | of, or the URI or hyperlink to, this Public License. You may satisfy the 157 | conditions in Section 3(a)(1) in any reasonable manner based on the medium, 158 | means, and context in which You Share the Licensed Material. For example, it may 159 | be reasonable to satisfy the conditions by providing a URI or hyperlink to a 160 | resource that includes the required information. If requested by the Licensor, 161 | You must remove any of the information required by Section 3(a)(1)(A) to the 162 | extent reasonably practicable. If You Share Adapted Material You produce, the 163 | Adapter's License You apply must not prevent recipients of the Adapted Material 164 | from complying with this Public License. Section 4 – Sui Generis Database 165 | Rights. 166 | 167 | Where the Licensed Rights include Sui Generis Database Rights that apply to Your 168 | use of the Licensed Material: 169 | 170 | for the avoidance of doubt, Section 2(a)(1) grants You the right to extract, 171 | reuse, reproduce, and Share all or a substantial portion of the contents of the 172 | database; if You include all or a substantial portion of the database contents 173 | in a database in which You have Sui Generis Database Rights, then the database 174 | in which You have Sui Generis Database Rights (but not its individual contents) 175 | is Adapted Material; and You must comply with the conditions in Section 3(a) if 176 | You Share all or a substantial portion of the contents of the database. For the 177 | avoidance of doubt, this Section 4 supplements and does not replace Your 178 | obligations under this Public License where the Licensed Rights include other 179 | Copyright and Similar Rights. Section 5 – Disclaimer of Warranties and 180 | Limitation of Liability. 181 | 182 | Unless otherwise separately undertaken by the Licensor, to the extent possible, 183 | the Licensor offers the Licensed Material as-is and as-available, and makes no 184 | representations or warranties of any kind concerning the Licensed Material, 185 | whether express, implied, statutory, or other. This includes, without 186 | limitation, warranties of title, merchantability, fitness for a particular 187 | purpose, non-infringement, absence of latent or other defects, accuracy, or the 188 | presence or absence of errors, whether or not known or discoverable. Where 189 | disclaimers of warranties are not allowed in full or in part, this disclaimer 190 | may not apply to You. To the extent possible, in no event will the Licensor be 191 | liable to You on any legal theory (including, without limitation, negligence) or 192 | otherwise for any direct, special, indirect, incidental, consequential, 193 | punitive, exemplary, or other losses, costs, expenses, or damages arising out of 194 | this Public License or use of the Licensed Material, even if the Licensor has 195 | been advised of the possibility of such losses, costs, expenses, or damages. 196 | Where a limitation of liability is not allowed in full or in part, this 197 | limitation may not apply to You. The disclaimer of warranties and limitation of 198 | liability provided above shall be interpreted in a manner that, to the extent 199 | possible, most closely approximates an absolute disclaimer and waiver of all 200 | liability. Section 6 – Term and Termination. 201 | 202 | This Public License applies for the term of the Copyright and Similar Rights 203 | licensed here. However, if You fail to comply with this Public License, then 204 | Your rights under this Public License terminate automatically. Where Your right 205 | to use the Licensed Material has terminated under Section 6(a), it reinstates: 206 | 207 | automatically as of the date the violation is cured, provided it is cured within 208 | 30 days of Your discovery of the violation; or upon express reinstatement by the 209 | Licensor. For the avoidance of doubt, this Section 6(b) does not affect any 210 | right the Licensor may have to seek remedies for Your violations of this Public 211 | License. For the avoidance of doubt, the Licensor may also offer the Licensed 212 | Material under separate terms or conditions or stop distributing the Licensed 213 | Material at any time; however, doing so will not terminate this Public License. 214 | Sections 1, 5, 6, 7, and 8 survive termination of this Public License. Section 7 215 | – Other Terms and Conditions. 216 | 217 | The Licensor shall not be bound by any additional or different terms or 218 | conditions communicated by You unless expressly agreed. Any arrangements, 219 | understandings, or agreements regarding the Licensed Material not stated herein 220 | are separate from and independent of the terms and conditions of this Public 221 | License. Section 8 – Interpretation. 222 | 223 | For the avoidance of doubt, this Public License does not, and shall not be 224 | interpreted to, reduce, limit, restrict, or impose conditions on any use of the 225 | Licensed Material that could lawfully be made without permission under this 226 | Public License. To the extent possible, if any provision of this Public License 227 | is deemed unenforceable, it shall be automatically reformed to the minimum 228 | extent necessary to make it enforceable. If the provision cannot be reformed, it 229 | shall be severed from this Public License without affecting the enforceability 230 | of the remaining terms and conditions. No term or condition of this Public 231 | License will be waived and no failure to comply consented to unless expressly 232 | agreed to by the Licensor. Nothing in this Public License constitutes or may be 233 | interpreted as a limitation upon, or waiver of, any privileges and immunities 234 | that apply to the Licensor or You, including from the legal processes of any 235 | jurisdiction or authority. 236 | ``` -------------------------------------------------------------------------------- /code/01_codebook_fancy.md: -------------------------------------------------------------------------------- 1 | Codebook example for STATA 2 | ========================== 3 | 4 | The source code for this file is [here](01_codebook_fancy.do). 5 | 6 | Prettier example 7 | ---------------- 8 | 9 | This is a bit more involved example, but illustrates the core code. 10 | 11 | Requirements 12 | ------------ 13 | 14 | This fancier example leverages the 15 | [`markdown`](http://haghish.com/statistics/stata-blog/reproducible-research/markdoc.php) 16 | package as of 2018-10-01. Here we install it locally to this project. 17 | 18 | . set more 1 19 | 20 | . set linesize 147 21 | 22 | . qui shell mkdir ado 23 | 24 | . sysdir set PLUS "./ado/" 25 | 26 | Once the markdoc package is installed, we can create marginally fancier 27 | codebooks as well (see [the output](01_codebook_fancy.md)). 28 | 29 | Fancy introduction 30 | ------------------ 31 | 32 | For instance we could write a fancy introduction here. 33 | 34 | File structure 35 | -------------- 36 | 37 | We can now describe the file structure. 38 | 39 | . sysuse auto 40 | (1978 Automobile Data) 41 | 42 | . describe 43 | 44 | Contains data from /usr/local/stata14/ado/base/a/auto.dta 45 | obs: 74 1978 Automobile Data 46 | vars: 12 13 Apr 2014 17:45 47 | size: 3,182 (_dta has notes) 48 | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 49 | storage display value 50 | variable name type format label variable label 51 | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 52 | make str18 %-18s Make and Model 53 | price int %8.0gc Price 54 | mpg int %8.0g Mileage (mpg) 55 | rep78 int %8.0g Repair Record 1978 56 | headroom float %6.1f Headroom (in.) 57 | trunk int %8.0g Trunk space (cu. ft.) 58 | weight int %8.0gc Weight (lbs.) 59 | length int %8.0g Length (in.) 60 | turn int %8.0g Turn Circle (ft.) 61 | displacement int %8.0g Displacement (cu. in.) 62 | gear_ratio float %6.2f Gear Ratio 63 | foreign byte %8.0g origin Car type 64 | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 65 | Sorted by: foreign 66 | 67 | Summary statistics 68 | ------------------ 69 | 70 | . codebook 71 | 72 | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 73 | make Make and Model 74 | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 75 | 76 | type: string (str18), but longest is str17 77 | 78 | unique values: 74 missing "": 0/74 79 | 80 | examples: "Cad. Deville" 81 | "Dodge Magnum" 82 | "Merc. XR-7" 83 | "Pont. Catalina" 84 | 85 | warning: variable has embedded blanks 86 | 87 | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 88 | price Price 89 | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 90 | 91 | type: numeric (int) 92 | 93 | range: [3291,15906] units: 1 94 | unique values: 74 missing .: 0/74 95 | 96 | mean: 6165.26 97 | std. dev: 2949.5 98 | 99 | percentiles: 10% 25% 50% 75% 90% 100 | 3895 4195 5006.5 6342 11385 101 | 102 | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 103 | mpg Mileage (mpg) 104 | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 105 | 106 | type: numeric (int) 107 | 108 | range: [12,41] units: 1 109 | unique values: 21 missing .: 0/74 110 | 111 | mean: 21.2973 112 | std. dev: 5.7855 113 | 114 | percentiles: 10% 25% 50% 75% 90% 115 | 14 18 20 25 29 116 | 117 | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 118 | rep78 Repair Record 1978 119 | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 120 | 121 | type: numeric (int) 122 | 123 | range: [1,5] units: 1 124 | unique values: 5 missing .: 5/74 125 | 126 | tabulation: Freq. Value 127 | 2 1 128 | 8 2 129 | 30 3 130 | 18 4 131 | 11 5 132 | 5 . 133 | 134 | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 135 | headroom Headroom (in.) 136 | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 137 | 138 | type: numeric (float) 139 | 140 | range: [1.5,5] units: .1 141 | unique values: 8 missing .: 0/74 142 | 143 | tabulation: Freq. Value 144 | 4 1.5 145 | 13 2 146 | 14 2.5 147 | 13 3 148 | 15 3.5 149 | 10 4 150 | 4 4.5 151 | 1 5 152 | 153 | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 154 | trunk Trunk space (cu. ft.) 155 | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 156 | 157 | type: numeric (int) 158 | 159 | range: [5,23] units: 1 160 | unique values: 18 missing .: 0/74 161 | 162 | mean: 13.7568 163 | std. dev: 4.2774 164 | 165 | percentiles: 10% 25% 50% 75% 90% 166 | 8 10 14 17 20 167 | 168 | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 169 | weight Weight (lbs.) 170 | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 171 | 172 | type: numeric (int) 173 | 174 | range: [1760,4840] units: 10 175 | unique values: 64 missing .: 0/74 176 | 177 | mean: 3019.46 178 | std. dev: 777.194 179 | 180 | percentiles: 10% 25% 50% 75% 90% 181 | 2020 2240 3190 3600 4060 182 | 183 | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 184 | length Length (in.) 185 | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 186 | 187 | type: numeric (int) 188 | 189 | range: [142,233] units: 1 190 | unique values: 47 missing .: 0/74 191 | 192 | mean: 187.932 193 | std. dev: 22.2663 194 | 195 | percentiles: 10% 25% 50% 75% 90% 196 | 157 170 192.5 204 218 197 | 198 | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 199 | turn Turn Circle (ft.) 200 | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 201 | 202 | type: numeric (int) 203 | 204 | range: [31,51] units: 1 205 | unique values: 18 missing .: 0/74 206 | 207 | mean: 39.6486 208 | std. dev: 4.39935 209 | 210 | percentiles: 10% 25% 50% 75% 90% 211 | 34 36 40 43 45 212 | 213 | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 214 | displacement Displacement (cu. in.) 215 | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 216 | 217 | type: numeric (int) 218 | 219 | range: [79,425] units: 1 220 | unique values: 31 missing .: 0/74 221 | 222 | mean: 197.297 223 | std. dev: 91.8372 224 | 225 | percentiles: 10% 25% 50% 75% 90% 226 | 97 119 196 250 350 227 | 228 | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 229 | gear_ratio Gear Ratio 230 | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 231 | 232 | type: numeric (float) 233 | 234 | range: [2.19,3.89] units: .01 235 | unique values: 36 missing .: 0/74 236 | 237 | mean: 3.01486 238 | std. dev: .456287 239 | 240 | percentiles: 10% 25% 50% 75% 90% 241 | 2.43 2.73 2.955 3.37 3.72 242 | 243 | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 244 | foreign Car type 245 | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 246 | 247 | type: numeric (byte) 248 | label: origin 249 | 250 | range: [0,1] units: 1 251 | unique values: 2 missing .: 0/74 252 | 253 | tabulation: Freq. Numeric Label 254 | 52 0 Domestic 255 | 22 1 Foreign 256 | -------------------------------------------------------------------------------- /citations/references.bib: -------------------------------------------------------------------------------- 1 | @techreport{FORCE11FAIRDATAPRINCIPLES, 2 | timestamp = {2017-05-26T18:19:14Z}, 3 | title = {THE {FAIR} DATA PRINCIPLES}, 4 | urldate = {2017-05-26}, 5 | url = {https://www.force11.org/group/fairgroup/fairprinciples}, 6 | author = {{FORCE11}} 7 | } 8 | 9 | 10 | 11 | @INPROCEEDINGS{Coffman2017-si, 12 | title = "Replications: A Proposal to Increase their Visibility and 13 | Promote them", 14 | author = "Coffman, Lucas and Niederle, Muriel and Wilson, Alistair J", 15 | year = 2017, 16 | conference = "American Economic Association Meetings" 17 | } 18 | 19 | @ARTICLE{Clemens2017-zj, 20 | title = "The meaning of failed replications: A review and proposal", 21 | author = "Clemens, M A", 22 | abstract = "Abstract The welcome rise of replication tests in economics has 23 | not been accompanied by a consensus standard for determining 24 | what constitutes a replication. A discrepant replication, in 25 | current usage of the term, can signal anything from an 26 | unremarkable disagreement over", 27 | journal = "Journal of Economic Surveys", 28 | publisher = "Wiley Online Library", 29 | volume = 31, 30 | number = 1, 31 | year = 2017, 32 | keywords = "Ethics; Open data; Replication; Robustness; Transparency" 33 | } 34 | 35 | @MISC{American_Economic_Association2008-az, 36 | title = "Data Availability Policy", 37 | author = "{American Economic Association}", 38 | year = 2008, 39 | howpublished = "\url{https://www.aeaweb.org/journals/policies/data-availability-policy}", 40 | note = "(accessed: 2017-04-06)" 41 | } 42 | 43 | @MISC{Hoeffler2017-aa, 44 | title = "Replication and Economics Journal Policies", 45 | author = "Hoeffler, Jan H", 46 | abstract = "We investigate the impact of the introduction of replication 47 | policies for leading journals in economics on citations. As has 48 | previously been shown for other social sciences, there is an 49 | indication that the introduction of a replication policy 50 | increases the number of citations for a journal, presumably 51 | because readers use the data for their own investigation, 52 | possibly also because of a reliability effect. We see our 53 | results as an incentive for journals to introduce and enforce 54 | replication policies. Lamentably, only a minority of journals 55 | so far enforce their policies in a way that ensures 56 | replicability of most of the empirical work. With several 57 | examples we show how replication becomes difficult if policies 58 | are not enforced, and we suggest a pool of replicability 59 | editors as a solution: Since it would be too much to expect 60 | from journals to have experts for every single topic and 61 | software package, a joint effort of journals for such a pool of 62 | experts could help to ensure each empirical study is published 63 | with data, code, and instructions how to use them together such 64 | that all published results can easily be replicated. Reviewers 65 | can join the effort for replicability by following the 66 | principles of the Agenda for Open Research and refuse to 67 | comprehensively review empirical work that does not guarantee 68 | fully replicable empirical results. Further study is needed to 69 | investigate the citation impact on single articles, and we 70 | suggest a design for such research.", 71 | month = jan, 72 | year = 2017, 73 | conference = "ASSA Annual Meeting" 74 | } 75 | 76 | % The entry below contains non-ASCII chars that could not be converted 77 | % to a LaTeX equivalent. 78 | @ARTICLE{Lagoze2017-qv, 79 | title = "Making confidential data part of reproducible research", 80 | author = "Lagoze, C and Vilhuber, L", 81 | abstract = "The rise of data-centric research practices has uncovered 82 | shortcomings in the traditional scholarly communication system. 83 | The foundation of that system, the peer-reviewed 84 | publication,``[the] selective distribution of ink on paper, or… 85 | electronic facsimiles of the same''(Bourne, et al., 2011), does 86 | not adequately support what has become an essential element of 87 | scholarship; the reproducibility of research results. That is, 88 | duplicating a ...", 89 | journal = "Chance", 90 | year = 2017 91 | } 92 | 93 | @ARTICLE{Camerer2016-kl, 94 | title = "Evaluating replicability of laboratory experiments in economics", 95 | author = "Camerer, Colin F and Dreber, Anna and Forsell, Eskil and Ho, 96 | Teck-Hua and Huber, J{\"u}rgen and Johannesson, Magnus and 97 | Kirchler, Michael and Almenberg, Johan and Altmejd, Adam and 98 | Chan, Taizan and Heikensten, Emma and Holzmeister, Felix and 99 | Imai, Taisuke and Isaksson, Siri and Nave, Gideon and Pfeiffer, 100 | Thomas and Razen, Michael and Wu, Hang", 101 | abstract = "The replicability of some scientific findings has recently been 102 | called into question. To contribute data about replicability in 103 | economics, we replicated 18 studies published in the American 104 | Economic Review and the Quarterly Journal of Economics between 105 | 2011 and 2014. All of these replications followed predefined 106 | analysis plans that were made publicly available beforehand, and 107 | they all have a statistical power of at least 90\% to detect the 108 | original effect size at the 5\% significance level. We found a 109 | significant effect in the same direction as in the original study 110 | for 11 replications (61\%); on average, the replicated effect 111 | size is 66\% of the original. The replicability rate varies 112 | between 67\% and 78\% for four additional replicability 113 | indicators, including a prediction market measure of peer 114 | beliefs.", 115 | journal = "Science", 116 | volume = 351, 117 | number = 6280, 118 | pages = "1433--1436", 119 | month = mar, 120 | year = 2016, 121 | language = "en" 122 | } 123 | 124 | @MISC{Foote2017-uc, 125 | title = "{MobZ} - Replication archive for a re-examinination of Local Labor 126 | Market Definitions", 127 | author = "Foote, Andrew and Kutzbach, Mark and Vilhuber, Lars", 128 | month = apr, 129 | year = 2017 130 | } 131 | 132 | @TECHREPORT{Joskow2015-hd, 133 | title = "President's Letter, Alfred P. Sloan Foundation Annual Report 134 | 2014", 135 | author = "Joskow, Paul L", 136 | institution = "Alfred P. Sloan Foundation", 137 | month = sep, 138 | year = 2015 139 | } 140 | 141 | @MISC{Duvendack2017-js, 142 | title = "What is Meant by `Replication' and Why Does It Encounter 143 | Resistance in Economics?", 144 | author = "Duvendack, Maren and Palmer-Jones, Richard and Robert Reed, W", 145 | abstract = "This paper discusses recent trends in the use of replications 146 | in economics. We identify a number of sources of progress, 147 | including the results of recent replication studies that have 148 | attempted to identify replication rates within the discipline. 149 | These studies generally find that replication rates are 150 | relatively low, though they may be higher for laboratory 151 | experiments in economics. We also identify two web-based 152 | resources for replications, the Replication in Economics wiki 153 | and The Replication Network. We then consider obstacles to 154 | undertaking replication studies in economics. Two obstacles are 155 | the lack of publishing outlets and difficulties in obtaining 156 | data and code for published studies. We identify journals that 157 | publish replication studies and that ``regularly'' include data 158 | and code as supplementary files for their published research. 159 | Finally, we highlight replication initiatives in psychology and 160 | political science, behind which economics appears to lag. 161 | Whether this is because the problems that beset those 162 | disciplines are less severe in economics, or because economics 163 | is more resistant to replications, is arguable.", 164 | month = jan, 165 | year = 2017, 166 | keywords = "Replication; data sharing; publication bias", 167 | conference = "ASSA Annual Meeting" 168 | } 169 | 170 | @ARTICLE{Nature_Scientific_Data2016-hl, 171 | title = "Nature Scientific Data recommended repositories", 172 | author = "{Nature Scientific Data}", 173 | abstract = "Spreadsheet listing data repositories that are recommended by 174 | Scientific Data (Springer Nature) as being suitable for hosting 175 | data associated with peer-reviewed articles. Please see the 176 | repository list on Scientific Data's website for the most up to 177 | date list.", 178 | journal = "figshare", 179 | month = dec, 180 | year = 2016 181 | } 182 | 183 | @MISC{Hamermesh2017-kq, 184 | title = "What is Replication? The Possibly Exemplary Example of Labor 185 | Economics", 186 | author = "Hamermesh, Daniel", 187 | month = jan, 188 | year = 2017, 189 | conference = "ASSA Annual Meetings" 190 | } 191 | 192 | @ARTICLE{Moffitt2016-wl, 193 | title = "Report: American Economic Association Committee on Statistics 194 | ({AEAStat})", 195 | author = "Moffitt, Robert", 196 | journal = "American Economic Review", 197 | volume = 106, 198 | number = 5, 199 | pages = "788--793", 200 | month = may, 201 | year = 2016 202 | } 203 | 204 | @ARTICLE{Gentzkow2014-zd, 205 | title = "Competition and Ideological Diversity: Historical Evidence from 206 | {US} Newspapers", 207 | author = "Gentzkow, Matthew and Shapiro, Jesse M and Sinkinson, Michael", 208 | journal = "Am. Econ. Rev.", 209 | volume = 104, 210 | number = 10, 211 | pages = "3073--3114", 212 | month = oct, 213 | year = 2014 214 | } 215 | 216 | @ARTICLE{Wilson2016-bt, 217 | title = "Good Enough Practices in Scientific Computing", 218 | author = "Wilson, Greg and Bryan, Jennifer and Cranston, Karen and 219 | Kitzes, Justin and Nederbragt, Lex and Teal, Tracy K", 220 | abstract = "We present a set of computing tools and techniques that 221 | every researcher can and should adopt. These recommendations 222 | synthesize inspiration from our own work, from the 223 | experiences of the thousands of people who have taken part 224 | in Software Carpentry and Data Carpentry workshops over the 225 | past six years, and from a variety of other guides. Unlike 226 | some other guides, our recommendations are aimed 227 | specifically at people who are new to research computing.", 228 | month = aug, 229 | year = 2016, 230 | archivePrefix = "arXiv", 231 | primaryClass = "cs.SE", 232 | eprint = "1609.00037" 233 | } 234 | 235 | @MISC{Elsevier2014-gl, 236 | title = "The case for Data in Brief", 237 | author = "{Elsevier}", 238 | abstract = "Data in Brief articles describe research data that you've 239 | made publicly available either through a repository or 240 | directly in your Data in...", 241 | month = jun, 242 | year = 2014, 243 | howpublished = "\url{https://www.journals.elsevier.com/data-in-brief/submit-your-data-description-paper/case-for-the-data-in-brief}", 244 | note = "Accessed: 2017-4-6" 245 | } 246 | 247 | @MISC{Open_Science_Framework2017-zc, 248 | title = "Badges to Acknowledge Open Practices Wiki", 249 | author = "{Open Science Framework}", 250 | abstract = "The aim is to specify a standard by which we can say that a 251 | scientific study has been conducted in accordance with 252 | open-science principles and provide visual icons to allow 253 | advertising of such good behaviours. | Hosted on the Open 254 | Science Framework", 255 | year = 2017, 256 | howpublished = "\url{https://osf.io/tvyxz/wiki/home/}", 257 | note = "Accessed: 2017-10-18" 258 | } 259 | 260 | @MISC{Simms2017-dx, 261 | title = "{NSF} {EAGER} {DMPRoadmap}: Making Data Management Plans Actionable", 262 | author = "Simms, Stephanie", 263 | month = sep, 264 | year = 2017 265 | } 266 | 267 | @MISC{Australian_National_Data_Service_ANDS2017-re, 268 | title = "Data citation", 269 | author = "{Australian National Data Service (ANDS)}", 270 | month = jan, 271 | year = 2017, 272 | howpublished = "\url{http://www.ands.org.au/__data/assets/pdf_file/0005/724334/Data-citation.pdf}", 273 | note = "Accessed: 2017-4-18" 274 | } 275 | 276 | @ARTICLE{Stodden2016-uc, 277 | title = "Enhancing reproducibility for computational methods", 278 | author = "Stodden, Victoria and McNutt, Marcia and Bailey, David H and 279 | Deelman, Ewa and Gil, Yolanda and Hanson, Brooks and Heroux, 280 | Michael A and Ioannidis, John P A and Taufer, Michela", 281 | journal = "Science", 282 | volume = 354, 283 | number = 6317, 284 | pages = "1240--1241", 285 | month = dec, 286 | year = 2016, 287 | language = "en" 288 | } 289 | 290 | @ARTICLE{Altman2013-fl, 291 | title = "The Evolution of Data Citation: From Principles to Implementation", 292 | author = "Altman, Micah and Crosas, Merc{\`e}", 293 | journal = "IASSIST Quarterly", 294 | pages = "62--70", 295 | year = 2013 296 | } 297 | 298 | @TECHREPORT{Bollen2015-vb, 299 | title = "Social, Behavioral, and Economic Sciences Perspectives on 300 | Robust and Reliable Science", 301 | author = "Bollen, Kenneth and Cacioppo, John T and Kaplan, Robert M and 302 | Korsnick, Jon A and Olds, James L", 303 | institution = "Subcommittee on Replicability in Science, National Science 304 | Foundation Directorate for Social, Behavioral, and Economic 305 | Sciences.", 306 | month = may, 307 | year = 2015 308 | } 309 | 310 | @TECHREPORT{Gentzkow2014-va, 311 | title = "Code and data for the social sciences: A practitioner's guide", 312 | author = "Gentzkow, M and Shapiro, Jesse", 313 | abstract = "What does it mean to do empirical social science? Asking good 314 | questions. Digging up novel data. Designing statistical 315 | analysis. Writing up results. For many of us, most of the 316 | time, what it means is writing and debugging code. We write 317 | code to clean data, to transform data, to", 318 | series = "mimeo", 319 | url = "https://web.stanford.edu/~gentzkow/research/CodeAndData.pdf", 320 | institution = "University of Chicago", 321 | year = 2014 322 | } 323 | 324 | @MISC{Fuentes2016-wz, 325 | title = "Reproducible Research in {JASA}", 326 | author = "Fuentes, Montse", 327 | abstract = "JASA is leading the effort to establish publication standards 328 | that improve research quality and reproducibility.", 329 | month = jul, 330 | year = 2016, 331 | howpublished = "\url{http://magazine.amstat.org/blog/2016/07/01/jasa-reproducible16/}", 332 | note = "Accessed: 2017-4-4" 333 | } 334 | 335 | @MISC{Gentzkow2016-gn, 336 | title = "Circulation of {US} Daily Newspapers, 1924, Audit Bureau of 337 | Circulations. (Version V4)", 338 | author = "Gentzkow, M and Shapiro, J and Sinkinson, M", 339 | publisher = "ICPSR - Interuniversity Consortium for Political and Social 340 | Research.", 341 | year = 2016 342 | } 343 | --------------------------------------------------------------------------------