├── Figs
├── cc-zero.png
├── iso_8601.png
├── example_Rmd.png
├── phd101212s.png
├── example_commit.png
├── example_repo.png
├── example_history.png
├── example_Rmd_source.png
├── example_commit_zoom.png
└── example_repo_zoom.png
├── fonts
├── texgyreheros-bold.otf
├── texgyreheros-italic.otf
├── texgyreheros-regular.otf
├── texgyreheroscn-bold.otf
├── texgyreheros-bolditalic.otf
├── texgyreheroscn-italic.otf
├── texgyreheroscn-regular.otf
├── texgyreheroscn-bolditalic.otf
├── MANIFEST-TeX-Gyre-Heros.txt
└── README-TeX-Gyre-Heros.txt
├── .gitignore
├── License.md
├── Ruby
└── createVersionWithNotes.rb
├── Makefile
├── R
├── spreadsheets.R
└── spreadsheets_scripts.R
├── ReadMe.md
├── header.tex
└── repro_research.tex
/Figs/cc-zero.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kbroman/Talk_ReproRes/HEAD/Figs/cc-zero.png
--------------------------------------------------------------------------------
/Figs/iso_8601.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kbroman/Talk_ReproRes/HEAD/Figs/iso_8601.png
--------------------------------------------------------------------------------
/Figs/example_Rmd.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kbroman/Talk_ReproRes/HEAD/Figs/example_Rmd.png
--------------------------------------------------------------------------------
/Figs/phd101212s.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kbroman/Talk_ReproRes/HEAD/Figs/phd101212s.png
--------------------------------------------------------------------------------
/Figs/example_commit.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kbroman/Talk_ReproRes/HEAD/Figs/example_commit.png
--------------------------------------------------------------------------------
/Figs/example_repo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kbroman/Talk_ReproRes/HEAD/Figs/example_repo.png
--------------------------------------------------------------------------------
/Figs/example_history.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kbroman/Talk_ReproRes/HEAD/Figs/example_history.png
--------------------------------------------------------------------------------
/Figs/example_Rmd_source.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kbroman/Talk_ReproRes/HEAD/Figs/example_Rmd_source.png
--------------------------------------------------------------------------------
/Figs/example_commit_zoom.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kbroman/Talk_ReproRes/HEAD/Figs/example_commit_zoom.png
--------------------------------------------------------------------------------
/Figs/example_repo_zoom.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kbroman/Talk_ReproRes/HEAD/Figs/example_repo_zoom.png
--------------------------------------------------------------------------------
/fonts/texgyreheros-bold.otf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kbroman/Talk_ReproRes/HEAD/fonts/texgyreheros-bold.otf
--------------------------------------------------------------------------------
/fonts/texgyreheros-italic.otf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kbroman/Talk_ReproRes/HEAD/fonts/texgyreheros-italic.otf
--------------------------------------------------------------------------------
/fonts/texgyreheros-regular.otf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kbroman/Talk_ReproRes/HEAD/fonts/texgyreheros-regular.otf
--------------------------------------------------------------------------------
/fonts/texgyreheroscn-bold.otf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kbroman/Talk_ReproRes/HEAD/fonts/texgyreheroscn-bold.otf
--------------------------------------------------------------------------------
/fonts/texgyreheros-bolditalic.otf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kbroman/Talk_ReproRes/HEAD/fonts/texgyreheros-bolditalic.otf
--------------------------------------------------------------------------------
/fonts/texgyreheroscn-italic.otf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kbroman/Talk_ReproRes/HEAD/fonts/texgyreheroscn-italic.otf
--------------------------------------------------------------------------------
/fonts/texgyreheroscn-regular.otf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kbroman/Talk_ReproRes/HEAD/fonts/texgyreheroscn-regular.otf
--------------------------------------------------------------------------------
/fonts/texgyreheroscn-bolditalic.otf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kbroman/Talk_ReproRes/HEAD/fonts/texgyreheroscn-bolditalic.otf
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *_withnotes.*
2 | *.aux
3 | *.log
4 | *.nav
5 | *.out
6 | *.pdf
7 | *.snm
8 | *.toc
9 | *.vrb
10 | Examples/
11 | intro_text.txt
--------------------------------------------------------------------------------
/License.md:
--------------------------------------------------------------------------------
1 | To the extent possible under law,
2 | [Karl Broman](http://github.com/kbroman) has waived all copyright and
3 | related or neighboring rights to
4 | “[Steps toward reproducible research](https://github.com/kbroman/Talk_ReproRes)”.
5 | This work is published from the United States.
6 |
7 | [](http://creativecommons.org/publicdomain/zero/1.0/)
8 |
--------------------------------------------------------------------------------
/Ruby/createVersionWithNotes.rb:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env ruby
2 | # modify LaTeX file so that it creates a handout with notes
3 |
4 | ifilename = ARGV.length > 0 ? ARGV[0] : "openaccess.tex"
5 | ofilename = ARGV.length > 1 ? ARGV[1] : ifilename.sub(".tex", "_withnotes.tex")
6 |
7 | ifile = File.open(ifilename)
8 | ofile = File.open(ofilename, "w")
9 |
10 | do_1st_sub = true
11 | do_2nd_sub = true
12 | ifile.readlines.each do |z|
13 | if do_1st_sub and /\\setbeameroption{hide notes}/ =~ z
14 | z = z.sub("hide", "show")
15 | do_1st_sub = false
16 | end
17 |
18 | if do_2nd_sub and /\\documentclass\[aspectratio=169,12pt,t\]{beamer}/ =~ z
19 | z = z.sub("[aspectratio=169,12pt,t]", "[aspectratio=169,12pt,t,handout]")
20 | do_2nd_sub = false
21 | end
22 |
23 | if do_2nd_sub and /\\documentclass\[12pt,t\]{beamer}/ =~ z
24 | z = z.sub("[12pt,t]", "[12pt,t,handout]")
25 | do_2nd_sub = false
26 | end
27 |
28 | ofile.write("\\def\\notescolors{1}\n") if /^\\input{/ =~ z # add line saying to use notes colors
29 |
30 | ofile.write(z)
31 | end
32 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | STEM = repro_research
2 | R_OPTS=--no-save --no-restore --no-init-file --no-site-file
3 |
4 | $(STEM).pdf: $(STEM).tex header.tex Figs/data_dict.pdf Figs/spreadsheet_g_v_mg.pdf
5 | xelatex $<
6 |
7 | notes: $(STEM)_withnotes.pdf
8 | all: $(STEM).pdf notes web
9 |
10 | Figs/data_dict.pdf: R/spreadsheets.R
11 | cd $(
61 | [](http://creativecommons.org/publicdomain/zero/1.0/)
62 |
--------------------------------------------------------------------------------
/header.tex:
--------------------------------------------------------------------------------
1 | % header.tex: boring LaTeX/Beamer details + macros
2 |
3 | % get rid of junk
4 | \usetheme{default}
5 | \beamertemplatenavigationsymbolsempty
6 | \hypersetup{pdfpagemode=UseNone} % don't show bookmarks on initial view
7 |
8 |
9 | % font
10 | \usepackage{fontspec}
11 | \setsansfont
12 | [ ExternalLocation = fonts/ ,
13 | UprightFont = *-regular ,
14 | BoldFont = *-bold ,
15 | ItalicFont = *-italic ,
16 | BoldItalicFont = *-bolditalic ]{texgyreheros}
17 | \setbeamerfont{note page}{family*=pplx,size=\footnotesize} % Palatino for notes
18 | % "TeX Gyre Heros can be used as a replacement for Helvetica"
19 | % I've placed them in fonts/; alternatively you can install them
20 | % permanently on your system as follows:
21 | % Download http://www.gust.org.pl/projects/e-foundry/tex-gyre/heros/qhv2.004otf.zip
22 | % In Unix, unzip it into ~/.fonts
23 | % In Mac, unzip it, double-click the .otf files, and install using "FontBook"
24 |
25 | % named colors
26 | \definecolor{offwhite}{RGB}{255,250,240}
27 | \definecolor{gray}{RGB}{155,155,155}
28 | \definecolor{purple}{RGB}{177,13,201}
29 | \definecolor{green}{RGB}{46,204,64}
30 |
31 | \definecolor{background}{RGB}{255,255,255}
32 | \definecolor{foreground}{RGB}{24,24,24}
33 | \definecolor{title}{RGB}{27,94,134}
34 | \definecolor{subtitle}{RGB}{22,175,124}
35 | \definecolor{hilit}{RGB}{122,0,128}
36 | \definecolor{vhilit}{RGB}{255,0,128}
37 | \definecolor{codehilit}{RGB}{255,0,128}
38 | \definecolor{lolit}{RGB}{95,95,95}
39 | \definecolor{myyellow}{rgb}{1,1,0.7}
40 | \definecolor{nhilit}{RGB}{128,0,128} % hilit color in notes
41 | \definecolor{nvhilit}{RGB}{255,0,128} % vhilit for notes
42 |
43 | \newcommand{\hilit}{\color{hilit}}
44 | \newcommand{\vhilit}{\color{vhilit}}
45 | \newcommand{\nhilit}{\color{nhilit}}
46 | \newcommand{\nvhilit}{\color{nvhilit}}
47 | \newcommand{\lolit}{\color{lolit}}
48 |
49 | % use those colors
50 | \setbeamercolor{titlelike}{fg=title}
51 | \setbeamercolor{subtitle}{fg=subtitle}
52 | \setbeamercolor{institute}{fg=lolit}
53 | \setbeamercolor{normal text}{fg=foreground,bg=background}
54 | \setbeamercolor{item}{fg=foreground} % color of bullets
55 | \setbeamercolor{subitem}{fg=lolit}
56 | \setbeamercolor{itemize/enumerate subbody}{fg=lolit}
57 | \setbeamertemplate{itemize subitem}{{\textendash}}
58 | \setbeamerfont{itemize/enumerate subbody}{size=\footnotesize}
59 | \setbeamerfont{itemize/enumerate subitem}{size=\footnotesize}
60 |
61 | % page number
62 | \setbeamertemplate{footline}{%
63 | \raisebox{5pt}{\makebox[\paperwidth]{\hfill\makebox[20pt]{\lolit
64 | \scriptsize\insertframenumber}}}\hspace*{5pt}}
65 |
66 | % add a bit of space at the top of the notes page
67 | \addtobeamertemplate{note page}{\setlength{\parskip}{12pt}}
68 |
69 | % default link color
70 | \hypersetup{colorlinks, urlcolor={hilit}}
71 |
72 | \lstset{language=bash,
73 | basicstyle=\ttfamily\scriptsize,
74 | frame=single,
75 | commentstyle=,
76 | backgroundcolor=\color{offwhite},
77 | showspaces=false,
78 | showstringspaces=false
79 | }
80 |
81 |
82 | % a few macros
83 | \newcommand{\bi}{\begin{itemize}}
84 | \newcommand{\bbi}{\vspace{24pt} \begin{itemize} \itemsep8pt}
85 | \newcommand{\ei}{\end{itemize}}
86 | \newcommand{\be}{\begin{enumerate}}
87 | \newcommand{\bbe}{\vspace{24pt} \begin{enumerate} \itemsep8pt}
88 | \newcommand{\ee}{\end{enumerate}}
89 | \newcommand{\sbi}{\begin{itemize} \fontsize{9pt}{9.5}\selectfont}
90 | \newcommand{\sbe}{\begin{enumerate} \fontsize{9pt}{9.5}\selectfont}
91 | \newcommand{\ig}{\includegraphics}
92 | \newcommand{\subt}[1]{{\footnotesize \color{subtitle} {#1}}}
93 | \newcommand{\ttsm}{\tt \small}
94 | \newcommand{\ttfn}{\tt \footnotesize}
95 | \newcommand{\figh}[2]{\centerline{\includegraphics[height=#2\textheight]{#1}}}
96 | \newcommand{\figw}[2]{\centerline{\includegraphics[width=#2\textwidth]{#1}}}
97 |
--------------------------------------------------------------------------------
/R/spreadsheets_scripts.R:
--------------------------------------------------------------------------------
1 | # spreadsheet examples related to "everything with a script"
2 | library(broman)
3 |
4 | hilit_color <- "#ff0080"
5 |
6 | # 1. a few values in g where the rest are in mg
7 |
8 | mat <- cbind(id=paste0("DO-", 121:130),
9 | "Rt Kidney wt"=c(294, 296, "NA", 513, 381,
10 | 225, 262, 231, 263, 266),
11 | "Rt Adipose wt"=c(757, 583, 834, 808, 780,
12 | 1.066, 1.03, 0.687, 0.932, 985),
13 | "Liver wt"=c(930, 439, 527, 600, 493,
14 | 355, 512, 497, 580, 906))
15 |
16 | pdf("../Figs/spreadsheet_g_v_mg.pdf", height=5, width=10)
17 | excel_fig(mat, col_names=TRUE)
18 | dev.off()
19 |
20 | pdf("../Figs/spreadsheet_g_v_mg_hilit.pdf", height=5, width=10)
21 | excel_fig(mat, col_names=TRUE, hilitcells=paste0("C", 7:10), hilitcolor=hilit_color)
22 | dev.off()
23 |
24 |
25 | # 2. spreadsheets with differing column values
26 | mat1 <- cbind(id=paste0("DO-", 121:130),
27 | "glucose.mg.dl.0"=
28 | c(99.165552,185.801584,198.455624,263.25608,192.393456,
29 | 154.019344,102.5792,117.175488,89.984016,122.060536),
30 | "glucose.mg.dl.5"=
31 | c(349.303552, 297.39256, 530.638888, 597.734728, 456.421472,
32 | 329.351368, 220.232344, 377.789856, 257.193912, 290.859544),
33 | "glucose.mg.dl.15"=
34 | c(286.092208, 439.000096, 614.155552, 568.189016, 653.76564,
35 | 482.49468, 265.897792, 371.315696, 172.146992, 270.966216),
36 | "glucose.mg.dl.30"=
37 | c(312.047704, 362.251872, 647.468048, 552.886456, 744.345024,
38 | 431.407672, 202.693256, 431.996232, 221.056328, 247.65924))
39 |
40 | mat2 <- cbind(id=paste0("DO-", 221:230),
41 | "glucose.0"=
42 | c(145.742786, 138.010378, 138.219362, 100.445504, 121.030428,
43 | 118.418128, 117.4777, 98.773632, 122.44107, 129.024066),
44 | "glucose.5"=
45 | c(206.452638, 342.866944, 407.443, 310.944638, 290.41196,
46 | 189.524934, 395.321928, 149.452252, 260.63174, 417.317494),
47 | "glucose.15"=
48 | c(216.640608, 339.836676, 336.858654, 384.97722, 345.740474,
49 | 159.692468, 448.612848, 245.637138, 231.008258, 294.173672),
50 | "glucose.30"=
51 | c(299.55501, 276.148802, 235.501414, 308.907044, 313.818168,
52 | 144.488882, 310.369932, 317.423142, 202.272958, 275.52185))
53 | mat3 <- cbind(id=paste0("DO-", 321:330),
54 | "glucose.0"=
55 | c(66.839405, 98.12509, 94.68305, 121.051535, 122.95695,
56 | 201.447755, 130.025425, 143.60919, 125.29262, 135.61874),
57 | "insulin.0"=
58 | c(0.04, 0.51185, 1.7812, 0.0882, 0.19155,
59 | 0.7454, 0.0509, 0.23435, 0.04, 0.91275),
60 | "glucose.5"=
61 | c(246.685995, 246.25574, 448.1068, 407.355505, 298.193665,
62 | 386.51887, 477.302675, 438.88705, 543.74634, 393.03416),
63 | "insulin.5"=
64 | c(0.04, 1.4062, 1.0248, 0.63475, 0.6467,
65 | 0.6081, 0.166, 0.70505, 1.7366, 3.73095))
66 |
67 | pdf("../Figs/spreadsheet_colnames1.pdf", height=5, width=8.5)
68 | excel_fig(mat1, col_names=TRUE)
69 | dev.off()
70 |
71 | pdf("../Figs/spreadsheet_colnames2.pdf", height=5, width=8.5)
72 | excel_fig(mat2, col_names=TRUE)
73 | dev.off()
74 |
75 |
76 | # 3. spreadsheets with differing order of columns
77 | pdf("../Figs/spreadsheet_colnames3.pdf", height=5, width=8.5)
78 | excel_fig(mat3, col_names=TRUE)
79 | dev.off()
80 |
81 | # 4. metadata file useful for scripts
82 | metadata <- cbind(short_name= c("mouse", "sex", "sac_date", "num_islets",
83 | "Ins_per_islet", "Glu_0min", "Ins_0min",
84 | "Glu_tAUC", "Glu_iAUC", "Ins_tAUC"),
85 | file=rep(c("wave2_sheet1.csv","ex_vivo_waves1-3.csv", "gtt2.csv"), c(3,2,5)),
86 | from_column=c("mouse #", "sex", "sac date", "# islets", "IC",
87 | "glucose.mg.dl.0", "insulin.ng.ml.0",
88 | "glucose.mg.dl.tAUC", "glucose.mg.dl.iAUC",
89 | "insulin.ng.ml.tAUC"),
90 | id_column=rep(c("mouse", "id"), c(5,5)),
91 | column_offset=rep(0,10))
92 |
93 |
94 | pdf("../Figs/file_metadata_example.pdf", height=5, width=10, pointsize=10)
95 | excel_fig(metadata, col_names=TRUE)
96 | dev.off()
97 |
--------------------------------------------------------------------------------
/fonts/MANIFEST-TeX-Gyre-Heros.txt:
--------------------------------------------------------------------------------
1 | ###########################################################################
2 | ############ The TeX Gyre Collection of Fonts ############
3 | ############ The font Heros ############
4 | ###########################################################################
5 |
6 | Font: TeX Gyre Heros
7 | Authors: Bogus\l{}aw Jackowski and Janusz M. Nowacki
8 | Version: 2.004
9 | Date: 30 X 2009
10 | Downloads: http://www.gust.org.pl/projects/e-foundry/tex-gyre/heros
11 |
12 | Copyright 2007--2009 for TeX Gyre extensions by B. Jackowski
13 | and J.M. Nowacki (on behalf of TeX Users Groups). Vietnamese
14 | characters were added by Han The Thanh.
15 |
16 | This work can be freely used and distributed under
17 | the GUST Font License (GFL -- see GUST-FONT-LICENSE.txt)
18 | which is actually an instance of the LaTeX Project Public License
19 | (LPPL -- see http://www.latex-project.org/lppl.txt ).
20 |
21 | This work has the maintenance status "maintained". The Current Maintainer
22 | of this work is Bogus\l{}aw Jackowski and Janusz M. Nowacki.
23 |
24 | Below, in three sections required by the GUST Font License,
25 | font names and file names specific for the TeX Gyre Heros
26 | Family of Fonts are listed.
27 |
28 | NOTE: the names of the directories are not subject to the renaming
29 | restrictions.
30 |
31 | 1. Fonts whose names should be changed in derived works as requested
32 | by clause 1 of GUST-FONT-LICENSE.txt
33 |
34 | 1.1 PostScript names
35 | 1.1.1 PostScript family names
36 | TeXGyreHeros
37 | TeXGyreHerosCondensed
38 |
39 | 1.1.2 PostScript font and full names
40 | TeXGyreHeros-Regular
41 | TeXGyreHeros-Italic
42 | TeXGyreHeros-Bold
43 | TeXGyreHeros-BoldItalic
44 | TeXGyreHerosCondensed-Regular
45 | TeXGyreHerosCondensed-Italic
46 | TeXGyreHerosCondensed-Bold
47 | TeXGyreHerosCondensed-BoldItalic
48 |
49 | 1.2 PFM names
50 | TeXGyreHeros
51 | TeXGyreHerosCondensed
52 |
53 | 1.3 OTF full menu names
54 | TeX Gyre Heros Regular
55 | TeX Gyre Heros Italic
56 | TeX Gyre Heros Bold
57 | TeX Gyre Heros Bold Italic
58 | TeX Gyre Heros Cn Regular
59 | TeX Gyre Heros Cn Italic
60 | TeX Gyre Heros Cn Bold
61 | TeX Gyre Heros Cn Bold Italic
62 |
63 | 1.4 OTF ATM menu names
64 | TeXGyreHeros Regular
65 | TeXGyreHeros Italic
66 | TeXGyreHeros Bold
67 | TeXGyreHeros BoldItalic
68 | TeXGyreHerosCn Regular
69 | TeXGyreHerosCn Italic
70 | TeXGyreHerosCn Bold
71 | TeXGyreHerosCn BoldItalic
72 |
73 | 2. Files whose names should be changed in derived works as requested
74 | by clause 1 of GUST-FONT-LICENSE.txt
75 |
76 | 2.1 fonts/afm/public/tex-gyre/qhvb.afm
77 | fonts/afm/public/tex-gyre/qhvbi.afm
78 | fonts/afm/public/tex-gyre/qhvr.afm
79 | fonts/afm/public/tex-gyre/qhvri.afm
80 | fonts/afm/public/tex-gyre/qhvcb.afm
81 | fonts/afm/public/tex-gyre/qhvcbi.afm
82 | fonts/afm/public/tex-gyre/qhvcr.afm
83 | fonts/afm/public/tex-gyre/qhvcri.afm
84 | fonts/map/dvips/tex-gyre/qhv.map
85 | fonts/map/dvips/tex-gyre/qhv-cs.map
86 | fonts/map/dvips/tex-gyre/qhv-ec.map
87 | fonts/map/dvips/tex-gyre/qhv-l7x.map
88 | fonts/map/dvips/tex-gyre/qhv-qx.map
89 | fonts/map/dvips/tex-gyre/qhv-rm.map
90 | fonts/map/dvips/tex-gyre/qhv-t5.map
91 | fonts/map/dvips/tex-gyre/qhv-texnansi.map
92 | fonts/map/dvips/tex-gyre/qhv-ts1.map
93 | fonts/enc/dvips/tex-gyre/q-cs.enc
94 | fonts/enc/dvips/tex-gyre/q-cs-sc.enc
95 | fonts/enc/dvips/tex-gyre/q-ec.enc
96 | fonts/enc/dvips/tex-gyre/q-ec-sc.enc
97 | fonts/enc/dvips/tex-gyre/q-l7x.enc
98 | fonts/enc/dvips/tex-gyre/q-l7x-sc.enc
99 | fonts/enc/dvips/tex-gyre/q-qx.enc
100 | fonts/enc/dvips/tex-gyre/q-qx-sc.enc
101 | fonts/enc/dvips/tex-gyre/q-rm.enc
102 | fonts/enc/dvips/tex-gyre/q-rm-sc.enc
103 | fonts/enc/dvips/tex-gyre/q-t5.enc
104 | fonts/enc/dvips/tex-gyre/q-t5-sc.enc
105 | fonts/enc/dvips/tex-gyre/q-texnansi.enc
106 | fonts/enc/dvips/tex-gyre/q-texnansi-sc.enc
107 | fonts/enc/dvips/tex-gyre/q-ts1.enc
108 | fonts/opentype/public/tex-gyre/texgyreheros-bold.otf
109 | fonts/opentype/public/tex-gyre/texgyreheros-bolditalic.otf
110 | fonts/opentype/public/tex-gyre/texgyreheros-regular.otf
111 | fonts/opentype/public/tex-gyre/texgyreheros-italic.otf
112 | fonts/opentype/public/tex-gyre/texgyreheroscn-bold.otf
113 | fonts/opentype/public/tex-gyre/texgyreheroscn-bolditalic.otf
114 | fonts/opentype/public/tex-gyre/texgyreheroscn-regular.otf
115 | fonts/opentype/public/tex-gyre/texgyreheroscn-italic.otf
116 | fonts/tfm/public/tex-gyre/cs-qhvb.tfm
117 | fonts/tfm/public/tex-gyre/cs-qhvbi.tfm
118 | fonts/tfm/public/tex-gyre/cs-qhvr.tfm
119 | fonts/tfm/public/tex-gyre/cs-qhvri.tfm
120 | fonts/tfm/public/tex-gyre/cs-qhvb-sc.tfm
121 | fonts/tfm/public/tex-gyre/cs-qhvbi-sc.tfm
122 | fonts/tfm/public/tex-gyre/cs-qhvr-sc.tfm
123 | fonts/tfm/public/tex-gyre/cs-qhvri-sc.tfm
124 | fonts/tfm/public/tex-gyre/cs-qhvcb.tfm
125 | fonts/tfm/public/tex-gyre/cs-qhvcbi.tfm
126 | fonts/tfm/public/tex-gyre/cs-qhvcr.tfm
127 | fonts/tfm/public/tex-gyre/cs-qhvcri.tfm
128 | fonts/tfm/public/tex-gyre/cs-qhvcb-sc.tfm
129 | fonts/tfm/public/tex-gyre/cs-qhvcbi-sc.tfm
130 | fonts/tfm/public/tex-gyre/cs-qhvcr-sc.tfm
131 | fonts/tfm/public/tex-gyre/cs-qhvcri-sc.tfm
132 | fonts/tfm/public/tex-gyre/ec-qhvb.tfm
133 | fonts/tfm/public/tex-gyre/ec-qhvbi.tfm
134 | fonts/tfm/public/tex-gyre/ec-qhvr.tfm
135 | fonts/tfm/public/tex-gyre/ec-qhvri.tfm
136 | fonts/tfm/public/tex-gyre/ec-qhvb-sc.tfm
137 | fonts/tfm/public/tex-gyre/ec-qhvbi-sc.tfm
138 | fonts/tfm/public/tex-gyre/ec-qhvr-sc.tfm
139 | fonts/tfm/public/tex-gyre/ec-qhvri-sc.tfm
140 | fonts/tfm/public/tex-gyre/ec-qhvcb.tfm
141 | fonts/tfm/public/tex-gyre/ec-qhvcbi.tfm
142 | fonts/tfm/public/tex-gyre/ec-qhvcr.tfm
143 | fonts/tfm/public/tex-gyre/ec-qhvcri.tfm
144 | fonts/tfm/public/tex-gyre/ec-qhvcb-sc.tfm
145 | fonts/tfm/public/tex-gyre/ec-qhvcbi-sc.tfm
146 | fonts/tfm/public/tex-gyre/ec-qhvcr-sc.tfm
147 | fonts/tfm/public/tex-gyre/ec-qhvcri-sc.tfm
148 | fonts/tfm/public/tex-gyre/el-qhvb.tfm
149 | fonts/tfm/public/tex-gyre/el-qhvbi.tfm
150 | fonts/tfm/public/tex-gyre/el-qhvr.tfm
151 | fonts/tfm/public/tex-gyre/el-qhvri.tfm
152 | fonts/tfm/public/tex-gyre/el-qhvb-sc.tfm
153 | fonts/tfm/public/tex-gyre/el-qhvbi-sc.tfm
154 | fonts/tfm/public/tex-gyre/el-qhvr-sc.tfm
155 | fonts/tfm/public/tex-gyre/el-qhvri-sc.tfm
156 | fonts/tfm/public/tex-gyre/el-qhvcb.tfm
157 | fonts/tfm/public/tex-gyre/el-qhvcbi.tfm
158 | fonts/tfm/public/tex-gyre/el-qhvcr.tfm
159 | fonts/tfm/public/tex-gyre/el-qhvcri.tfm
160 | fonts/tfm/public/tex-gyre/el-qhvcb-sc.tfm
161 | fonts/tfm/public/tex-gyre/el-qhvcbi-sc.tfm
162 | fonts/tfm/public/tex-gyre/el-qhvcr-sc.tfm
163 | fonts/tfm/public/tex-gyre/el-qhvcri-sc.tfm
164 | fonts/tfm/public/tex-gyre/l7x-qhvb.tfm
165 | fonts/tfm/public/tex-gyre/l7x-qhvbi.tfm
166 | fonts/tfm/public/tex-gyre/l7x-qhvr.tfm
167 | fonts/tfm/public/tex-gyre/l7x-qhvri.tfm
168 | fonts/tfm/public/tex-gyre/l7x-qhvb-sc.tfm
169 | fonts/tfm/public/tex-gyre/l7x-qhvbi-sc.tfm
170 | fonts/tfm/public/tex-gyre/l7x-qhvr-sc.tfm
171 | fonts/tfm/public/tex-gyre/l7x-qhvri-sc.tfm
172 | fonts/tfm/public/tex-gyre/l7x-qhvcb.tfm
173 | fonts/tfm/public/tex-gyre/l7x-qhvcbi.tfm
174 | fonts/tfm/public/tex-gyre/l7x-qhvcr.tfm
175 | fonts/tfm/public/tex-gyre/l7x-qhvcri.tfm
176 | fonts/tfm/public/tex-gyre/l7x-qhvcb-sc.tfm
177 | fonts/tfm/public/tex-gyre/l7x-qhvcbi-sc.tfm
178 | fonts/tfm/public/tex-gyre/l7x-qhvcr-sc.tfm
179 | fonts/tfm/public/tex-gyre/l7x-qhvcri-sc.tfm
180 | fonts/tfm/public/tex-gyre/qx-qhvb.tfm
181 | fonts/tfm/public/tex-gyre/qx-qhvbi.tfm
182 | fonts/tfm/public/tex-gyre/qx-qhvr.tfm
183 | fonts/tfm/public/tex-gyre/qx-qhvri.tfm
184 | fonts/tfm/public/tex-gyre/qx-qhvb-sc.tfm
185 | fonts/tfm/public/tex-gyre/qx-qhvbi-sc.tfm
186 | fonts/tfm/public/tex-gyre/qx-qhvr-sc.tfm
187 | fonts/tfm/public/tex-gyre/qx-qhvri-sc.tfm
188 | fonts/tfm/public/tex-gyre/qx-qhvcb.tfm
189 | fonts/tfm/public/tex-gyre/qx-qhvcbi.tfm
190 | fonts/tfm/public/tex-gyre/qx-qhvcr.tfm
191 | fonts/tfm/public/tex-gyre/qx-qhvcri.tfm
192 | fonts/tfm/public/tex-gyre/qx-qhvcb-sc.tfm
193 | fonts/tfm/public/tex-gyre/qx-qhvcbi-sc.tfm
194 | fonts/tfm/public/tex-gyre/qx-qhvcr-sc.tfm
195 | fonts/tfm/public/tex-gyre/qx-qhvcri-sc.tfm
196 | fonts/tfm/public/tex-gyre/rm-qhvb.tfm
197 | fonts/tfm/public/tex-gyre/rm-qhvbi.tfm
198 | fonts/tfm/public/tex-gyre/rm-qhvr.tfm
199 | fonts/tfm/public/tex-gyre/rm-qhvri.tfm
200 | fonts/tfm/public/tex-gyre/rm-qhvb-sc.tfm
201 | fonts/tfm/public/tex-gyre/rm-qhvbi-sc.tfm
202 | fonts/tfm/public/tex-gyre/rm-qhvr-sc.tfm
203 | fonts/tfm/public/tex-gyre/rm-qhvri-sc.tfm
204 | fonts/tfm/public/tex-gyre/rm-qhvcb.tfm
205 | fonts/tfm/public/tex-gyre/rm-qhvcbi.tfm
206 | fonts/tfm/public/tex-gyre/rm-qhvcr.tfm
207 | fonts/tfm/public/tex-gyre/rm-qhvcri.tfm
208 | fonts/tfm/public/tex-gyre/rm-qhvcb-sc.tfm
209 | fonts/tfm/public/tex-gyre/rm-qhvcbi-sc.tfm
210 | fonts/tfm/public/tex-gyre/rm-qhvcr-sc.tfm
211 | fonts/tfm/public/tex-gyre/rm-qhvcri-sc.tfm
212 | fonts/tfm/public/tex-gyre/t5-qhvb.tfm
213 | fonts/tfm/public/tex-gyre/t5-qhvbi.tfm
214 | fonts/tfm/public/tex-gyre/t5-qhvr.tfm
215 | fonts/tfm/public/tex-gyre/t5-qhvri.tfm
216 | fonts/tfm/public/tex-gyre/t5-qhvb-sc.tfm
217 | fonts/tfm/public/tex-gyre/t5-qhvbi-sc.tfm
218 | fonts/tfm/public/tex-gyre/t5-qhvr-sc.tfm
219 | fonts/tfm/public/tex-gyre/t5-qhvri-sc.tfm
220 | fonts/tfm/public/tex-gyre/t5-qhvcb.tfm
221 | fonts/tfm/public/tex-gyre/t5-qhvcbi.tfm
222 | fonts/tfm/public/tex-gyre/t5-qhvcr.tfm
223 | fonts/tfm/public/tex-gyre/t5-qhvcri.tfm
224 | fonts/tfm/public/tex-gyre/t5-qhvcb-sc.tfm
225 | fonts/tfm/public/tex-gyre/t5-qhvcbi-sc.tfm
226 | fonts/tfm/public/tex-gyre/t5-qhvcr-sc.tfm
227 | fonts/tfm/public/tex-gyre/t5-qhvcri-sc.tfm
228 | fonts/tfm/public/tex-gyre/texnansi-qhvb.tfm
229 | fonts/tfm/public/tex-gyre/texnansi-qhvbi.tfm
230 | fonts/tfm/public/tex-gyre/texnansi-qhvr.tfm
231 | fonts/tfm/public/tex-gyre/texnansi-qhvri.tfm
232 | fonts/tfm/public/tex-gyre/texnansi-qhvb-sc.tfm
233 | fonts/tfm/public/tex-gyre/texnansi-qhvbi-sc.tfm
234 | fonts/tfm/public/tex-gyre/texnansi-qhvr-sc.tfm
235 | fonts/tfm/public/tex-gyre/texnansi-qhvri-sc.tfm
236 | fonts/tfm/public/tex-gyre/texnansi-qhvcb.tfm
237 | fonts/tfm/public/tex-gyre/texnansi-qhvcbi.tfm
238 | fonts/tfm/public/tex-gyre/texnansi-qhvcr.tfm
239 | fonts/tfm/public/tex-gyre/texnansi-qhvcri.tfm
240 | fonts/tfm/public/tex-gyre/texnansi-qhvcb-sc.tfm
241 | fonts/tfm/public/tex-gyre/texnansi-qhvcbi-sc.tfm
242 | fonts/tfm/public/tex-gyre/texnansi-qhvcr-sc.tfm
243 | fonts/tfm/public/tex-gyre/texnansi-qhvcri-sc.tfm
244 | fonts/tfm/public/tex-gyre/ts1-qhvb.tfm
245 | fonts/tfm/public/tex-gyre/ts1-qhvbi.tfm
246 | fonts/tfm/public/tex-gyre/ts1-qhvr.tfm
247 | fonts/tfm/public/tex-gyre/ts1-qhvri.tfm
248 | fonts/tfm/public/tex-gyre/ts1-qhvcb.tfm
249 | fonts/tfm/public/tex-gyre/ts1-qhvcbi.tfm
250 | fonts/tfm/public/tex-gyre/ts1-qhvcr.tfm
251 | fonts/tfm/public/tex-gyre/ts1-qhvcri.tfm
252 | fonts/type1/public/tex-gyre/qhvb.pfb
253 | fonts/type1/public/tex-gyre/qhvb.pfm
254 | fonts/type1/public/tex-gyre/qhvbi.pfb
255 | fonts/type1/public/tex-gyre/qhvbi.pfm
256 | fonts/type1/public/tex-gyre/qhvr.pfb
257 | fonts/type1/public/tex-gyre/qhvr.pfm
258 | fonts/type1/public/tex-gyre/qhvri.pfb
259 | fonts/type1/public/tex-gyre/qhvri.pfm
260 | fonts/type1/public/tex-gyre/qhvcb.pfb
261 | fonts/type1/public/tex-gyre/qhvcb.pfm
262 | fonts/type1/public/tex-gyre/qhvcbi.pfb
263 | fonts/type1/public/tex-gyre/qhvcbi.pfm
264 | fonts/type1/public/tex-gyre/qhvcr.pfb
265 | fonts/type1/public/tex-gyre/qhvcr.pfm
266 | fonts/type1/public/tex-gyre/qhvcri.pfb
267 | fonts/type1/public/tex-gyre/qhvcri.pfm
268 |
269 | 2.2 tex/latex/tex-gyre/il2qhv.fd
270 | tex/latex/tex-gyre/il2qhvc.fd
271 | tex/latex/tex-gyre/l7xqhv.fd
272 | tex/latex/tex-gyre/l7xqhvc.fd
273 | tex/latex/tex-gyre/ly1qhv.fd
274 | tex/latex/tex-gyre/ly1qhvc.fd
275 | tex/latex/tex-gyre/ot1qhv.fd
276 | tex/latex/tex-gyre/ot1qhvc.fd
277 | tex/latex/tex-gyre/ot4qhv.fd
278 | tex/latex/tex-gyre/ot4qhvc.fd
279 | tex/latex/tex-gyre/qswiss.sty
280 | tex/latex/tex-gyre/qxqhv.fd
281 | tex/latex/tex-gyre/qxqhvc.fd
282 | tex/latex/tex-gyre/t1qhv.fd
283 | tex/latex/tex-gyre/t1qhvc.fd
284 | tex/latex/tex-gyre/t5qhv.fd
285 | tex/latex/tex-gyre/t5qhvc.fd
286 | tex/latex/tex-gyre/tgheros.sty
287 | tex/latex/tex-gyre/ts1qhv.fd
288 | tex/latex/tex-gyre/ts1qhvc.fd
289 |
290 | 2.3 doc/fonts/tex-gyre/MANIFEST-TeX-Gyre-Heros.txt
291 | doc/fonts/tex-gyre/README-TeX-Gyre-Heros.txt
292 | doc/fonts/tex-gyre/goadb999.nam
293 | doc/fonts/tex-gyre/qhv-hist.txt
294 | doc/fonts/tex-gyre/qhv-info.pdf
295 | doc/fonts/tex-gyre/qhv-test.tex
296 | doc/fonts/tex-gyre/qhv-test.pdf
297 | doc/fonts/tex-gyre/qhvb.fea
298 | doc/fonts/tex-gyre/qhvbi.fea
299 | doc/fonts/tex-gyre/qhvcr.fea
300 | doc/fonts/tex-gyre/qhvcri.fea
301 | doc/fonts/tex-gyre/qhvcb.fea
302 | doc/fonts/tex-gyre/qhvcbi.fea
303 | doc/fonts/tex-gyre/qhvr.fea
304 | doc/fonts/tex-gyre/qhvri.fea
305 |
306 | 3. Files whose names need not be changed in derived works as requested
307 | by clause 1 of GUST-FONT-LICENSE.txt
308 |
309 | doc/fonts/tex-gyre/GUST-FONT-LICENSE.txt
310 |
311 |
--------------------------------------------------------------------------------
/fonts/README-TeX-Gyre-Heros.txt:
--------------------------------------------------------------------------------
1 | ###########################################################################
2 | ############ The TeX Gyre Collection of Fonts ############
3 | ############ The font Heros ############
4 | ###########################################################################
5 |
6 | Font: TeX Gyre Heros
7 | Authors: Bogus\l{}aw Jackowski and Janusz M. Nowacki
8 | Version: 2.004
9 | Date: 30 X 2009
10 | Downloads: http://www.gust.org.pl/projects/e-foundry/tex-gyre/heros
11 |
12 | License:
13 | % Copyright 2007--2009 for TeX Gyre extensions by B. Jackowski
14 | % and J.M. Nowacki (on behalf of TeX Users Groups).
15 | % Vietnamese characters were added by Han The Thanh.
16 | %
17 | % This work can be freely used and distributed under
18 | % the GUST Font License (GFL -- see GUST-FONT-LICENSE.txt)
19 | % which is actually an instance of the LaTeX Project Public License
20 | % (LPPL -- see http://www.latex-project.org/lppl.txt ).
21 | %
22 | % This work has the maintenance status "maintained". The Current Maintainer
23 | % of this work is Bogus\l{}aw Jackowski and Janusz M. Nowacki.
24 | %
25 | % This work consists of the files listed
26 | % in the MANIFEST-TeX-Gyre-Heros.txt file.
27 |
28 | ###########################################################################
29 | ############ A BRIEF DESCRIPTION OF THE PACKAGE ############
30 | ###########################################################################
31 |
32 | The current package contains the most recent version of the TeX Gyre
33 | Adventor family of fonts in the PostScript Type 1 and OpenType formats.
34 | TeX Gyre Adventor is based on the URW Nimbus Sans L kindly released by
35 | URW++ Design and Development Inc. under GFL (independently of the GPL
36 | release accompanying Ghostscript). The Vietnamese glyphs were added by
37 | Han The Thanh.
38 |
39 | TeX Gyre Heros can be used as a replacement for a popular font Helvetica,
40 | also known as Swiss (prepared by Max Miedinger with Eduard Hoffmann, 1957,
41 | at the Haas Type Foundry).
42 |
43 | Note that the widths of nearly all glyphs made consistent with the Adobe
44 | metric data (for the glyphs from the Adobe Standard Encoding):
45 | ftp://ftp.adobe.com/pub/adobe/type/win/all/afmfiles/base35/hv______.afm
46 | ftp://ftp.adobe.com/pub/adobe/type/win/all/afmfiles/base35/hvo_____.afm
47 | ftp://ftp.adobe.com/pub/adobe/type/win/all/afmfiles/base35/hvb_____.afm
48 | ftp://ftp.adobe.com/pub/adobe/type/win/all/afmfiles/base35/hvbo____.afm
49 | ftp://ftp.adobe.com/pub/adobe/type/win/all/afmfiles/base35/hvn_____.afm
50 | ftp://ftp.adobe.com/pub/adobe/type/win/all/afmfiles/base35/hvno____.afm
51 | ftp://ftp.adobe.com/pub/adobe/type/win/all/afmfiles/base35/hvnb____.afm
52 | ftp://ftp.adobe.com/pub/adobe/type/win/all/afmfiles/base35/hvnbo___.afm
53 |
54 | There are, however, a few exceptions (all of which we consider Adobe's
55 | mistakes):
56 | font glyph Adobe width TG width
57 | --------------------------------------------
58 | qhvb slash 611 716
59 | " lslash 278 418
60 | --------------------------------------------
61 | qhvbi Lslash 611 716
62 | " lslash 278 418
63 | --------------------------------------------
64 | qhvr Lslash 556 627
65 | " dotlessi 278 222
66 | " exclamdown 333 278
67 | " lslash 222 305
68 | " oslash 611 556
69 | " questiondown 611 556
70 | --------------------------------------------
71 | qhvri Lslash 556 628
72 | " dotlessi 278 222
73 | " exclamdown 333 278
74 | " lslash 222 305
75 | " questiondown 611 556
76 | --------------------------------------------
77 | qhcb Lslash 501 618
78 | " lslash 228 402
79 | --------------------------------------------
80 | qhcbi Lslash 501 618
81 | " lslash 228 402
82 | --------------------------------------------
83 | qhcr Lslash 456 552
84 | " dotlessi 228 182
85 | " exclamdown 273 228
86 | " lslash 182 314
87 | " oslash 501 456
88 | " questiondown 501 456
89 | --------------------------------------------
90 | qhcri Lslash 456 552
91 | " dotlessi 228 182
92 | " exclamdown 273 228
93 | " lslash 182 314
94 | " questiondown 501 456
95 |
96 |
97 | * * *
98 |
99 | The TeX Gyre project, following the Latin Modern project, aims at providing
100 | a rich collection of diacritical characters in the attempt to cover as many
101 | Latin-based scripts as possible. To our knowledge, the repertoire of
102 | characters covers all European languages as well as some other Latin-based
103 | alphabets such as Vietnamese and Navajo; at the request of users, recent
104 | extensions (following the enhancement of the Latin Modern collection)
105 | provide glyphs sufficient for typesetting of romanized transliterations
106 | of Arabic and Sanskrit scripts. We have frequently used the information
107 | presented by Michael Everson at the ``The Alphabets of Europe''
108 | ( http://www.evertype.com/alphabets/ ) web site. If you know about European
109 | languages that are not covered completely or if some glyphs have apparently
110 | wrong shapes -- please let us know. Note, however, that Greek glyphs
111 | bear provisional character.
112 |
113 | The TeX Gyre Project was launched and is supported by TeX USERS GROUPS
114 | (CS TUG, DANTE eV, GUST, NTG, TUG India, TUG). Hearty thanks to the
115 | representatives of these groups and also to all people who helped with
116 | comments, ideas, remarks, bug reports, objections, hints, consolations, etc.
117 |
118 | * * *
119 |
120 | The TeX Gyre Heros family consists of 8 text fonts: regular,
121 | italic, bold and bold italic (qhvr, qhvri, qhvb, qhvbi)
122 | and the condensed variants (qhvcr, qhvcri, qhvcb, qhvcbi).
123 |
124 | The TeX Gyre Heros family can be freely used and distributed
125 | under the GUST Font License (see above) which is actually
126 | an instance of the LaTeX Project Public License
127 | (LPPL; see http://www.latex-project.org/lppl.txt ).
128 |
129 | * * *
130 |
131 | The package consists of the files in the directories conforming
132 | to the TeX Directory Structure (v. 1.1). The directories contain:
133 |
134 | doc/fonts/tex-gyre this file, manifest, licence, test files,
135 | and, moreover, selected files used as input
136 | for generating OTFs (meant as a technical
137 | documentation of the OTFs)
138 | tex/latex/tex-gyre support for LaTeX (*.fd and *.sty files,
139 | prepared by Marcin Woli\'nski)
140 | fonts/enc/dvips/tex-gyre support for dvips (*.enc files);
141 | NOTE: all fonts of the TeX Gyre family
142 | share the same *.enc files with
143 | a few exceptions: CS, QX, and RM encodings
144 | for TeX Gyre Cursor (monospace) differ
145 | from the standard ones (because of the
146 | compatibility with Computer Modern
147 | typewriter fonts requested by users),
148 | and, moreover, CS, L7x, QX and RM encodings
149 | for TeX Gyre Chorus exploit exceptionally
150 | the `lslash_lslash' ligature
151 | fonts/map/dvips/tex-gyre support for dvips (*.map files)
152 | fonts/opentype/public/tex-gyre fonts in the OpenType format (*.otf files)
153 | fonts/type1/public/tex-gyre PostScript (Type 1) font files and printer
154 | font metric files (*.pfb and *.pfm,
155 | respectively);
156 | fonts/tfm/public/tex-gyre TeX font metric files (*.tfm) for:
157 | -- CS (CSTUG) encoding (cs-*.tfm),
158 | -- EC (Cork) encoding (ec-*.tfm),
159 | -- L7x (Lithuanian) encoding (l7x-*.tfm),
160 | -- QX (GUST) encoding (qx-*.tfm),
161 | -- RM (Regular Math or OT1) encoding (rm-*.tfm),
162 | -- Y&Y's TeX'n'ANSI aka LY1 encoding
163 | (texnansi-*.tfm),
164 | -- T5 (Vietnamese) encoding (t5-*.tfm),
165 | -- Text Companion for EC fonts aka TS1
166 | (ts1-*.tfm).
167 | Encodings CS, EC, L7x, QX, RM, Y&Y, and T5
168 | have their cap-small-caps counterparts
169 | (*-sc.tfm).
170 | fonts/afm/public/tex-gyre Adobe font metric files (*.afm);
171 |
172 | Email contact: Bogus\l{}aw Jackowski aka Jacko, B_Jackowski@gust.org.pl
173 |
174 | * * *
175 |
176 | In ConTeXt, support for TeX Gyre Collection can be found in the typescript
177 | definition files:
178 |
179 | ... /tex/context/base/type-enc.tex
180 | ... /tex/context/base/type-syn.tex
181 | ... /tex/context/base/type-exa.tex
182 | ... /tex/context/base/type-map.tex
183 |
184 | Additional encoding and map files can be found under:
185 |
186 | ... /texmf/fonts/map/pdftex/context
187 | ... /texmf/fonts/enc/pdftex/context
188 |
189 | * * *
190 |
191 | All eight font files, qhvb, qhvbi, qhvr, qhvri, qhvcb, qhvcbi, qhvcr, qhvcri,
192 | contain the same repertoire of 1089 characters, namely (these are the names
193 | used in Type 1 fonts):
194 |
195 | A a a.sc Aacute aacute aacute.sc Abreve abreve abreve.sc Abreveacute
196 | abreveacute abreveacute.sc Abrevedotbelow abrevedotbelow
197 | abrevedotbelow.sc Abrevegrave abrevegrave abrevegrave.sc Abrevehookabove
198 | abrevehookabove abrevehookabove.sc Abrevetilde abrevetilde abrevetilde.sc
199 | Acaron acaron acaron.sc Acircumflex acircumflex acircumflex.sc
200 | Acircumflexacute acircumflexacute acircumflexacute.sc Acircumflexdotbelow
201 | acircumflexdotbelow acircumflexdotbelow.sc Acircumflexgrave
202 | acircumflexgrave acircumflexgrave.sc Acircumflexhookabove
203 | acircumflexhookabove acircumflexhookabove.sc Acircumflextilde
204 | acircumflextilde acircumflextilde.sc Acute acute acute.dup acute.ts1
205 | Acutecomb acutecomb Adblgrave adblgrave adblgrave.sc Adieresis adieresis
206 | adieresis.sc Adotbelow adotbelow adotbelow.sc AE ae AE.dup ae.dup ae.sc
207 | AEacute aeacute aeacute.sc Agrave agrave agrave.sc Ahookabove ahookabove
208 | ahookabove.sc Alpha alpha Amacron amacron amacron.sc ampersand anglearc
209 | angleleft angleright Aogonek aogonek aogonek.sc Aogonekacute aogonekacute
210 | aogonekacute.sc approxequal Aring aring aring.sc Aringacute aringacute
211 | aringacute.sc arrowdown arrowleft arrowright arrowup asciicircum
212 | asciitilde asterisk asteriskmath at at.alt Atilde atilde atilde.sc B b
213 | b.sc backslash baht bar Beta beta bigcircle blanksymbol born braceleft
214 | braceright bracketleft bracketright Breve breve breve.ts1 Breveacute
215 | breveacute brevebelow brevebelowcomb brevebelowinverted
216 | brevebelowinvertedcomb Brevecomb brevecomb Brevegrave brevegrave
217 | Brevehookabove brevehookabove Breveinverted breveinverted
218 | Breveinvertedcomb breveinvertedcomb Brevetilde brevetilde brokenbar
219 | bullet C c c.sc Cacute cacute cacute.sc Caron caron caron.ts1 Caroncomb
220 | caroncomb Ccaron ccaron ccaron.sc Ccedilla ccedilla ccedilla.sc
221 | Ccircumflex ccircumflex ccircumflex.sc Cdotaccent cdotaccent
222 | cdotaccent.sc cedilla cedilla.dup cent cent.oldstyle centigrade Chi chi
223 | Circumflex circumflex circumflex.dup Circumflexacute circumflexacute
224 | Circumflexcomb circumflexcomb Circumflexgrave circumflexgrave
225 | Circumflexhookabove circumflexhookabove Circumflextilde circumflextilde
226 | colon colonmonetary comma commaaccent commaaccentcomb copyleft copyright
227 | copyright.alt currency cwm cwmascender cwmcapital cyrBreve cyrbreve
228 | cyrFlex cyrflex D d d.sc dagger daggerdbl dblbracketleft dblbracketright
229 | dblGrave dblgrave dblgrave.ts1 dblGravecomb dblgravecomb dblverticalbar
230 | Dcaron dcaron dcaron.sc Dcroat dcroat dcroat.sc Ddotbelow ddotbelow
231 | ddotbelow.sc degree Delta delta diameter died Dieresis dieresis
232 | dieresis.dup dieresis.ts1 Dieresisacute dieresisacute Dieresiscaron
233 | dieresiscaron Dieresiscomb dieresiscomb Dieresisgrave dieresisgrave
234 | discount divide divorced Dlinebelow dlinebelow dlinebelow.sc dollar
235 | dollar.oldstyle dong Dotaccent dotaccent Dotaccentcomb dotaccentcomb
236 | dotbelow dotbelowcomb dotlessi dotlessi.sc dotlessj dotlessj.dup
237 | dotlessj.sc E e e.sc Eacute eacute eacute.sc Ebreve ebreve ebreve.sc
238 | Ecaron ecaron ecaron.sc Ecircumflex ecircumflex ecircumflex.sc
239 | Ecircumflexacute ecircumflexacute ecircumflexacute.sc Ecircumflexdotbelow
240 | ecircumflexdotbelow ecircumflexdotbelow.sc Ecircumflexgrave
241 | ecircumflexgrave ecircumflexgrave.sc Ecircumflexhookabove
242 | ecircumflexhookabove ecircumflexhookabove.sc Ecircumflextilde
243 | ecircumflextilde ecircumflextilde.sc Edblgrave edblgrave edblgrave.sc
244 | Edieresis edieresis edieresis.sc Edotaccent edotaccent edotaccent.sc
245 | Edotbelow edotbelow edotbelow.sc Egrave egrave egrave.sc Ehookabove
246 | ehookabove ehookabove.sc eight eight.oldstyle eight.prop
247 | eight.taboldstyle ell ellipsis Emacron emacron emacron.sc emdash endash
248 | Eng eng eng.sc Eogonek eogonek eogonek.sc Eogonekacute eogonekacute
249 | eogonekacute.sc Epsilon epsilon epsilon.alt equal Ereversed ereversed
250 | ereversed.sc estimated Eta eta Eth eth eth.sc Etilde etilde etilde.sc
251 | eturned eturned.sc Euro exclam exclamdown F f f.sc f_k ff ffi ffl fi five
252 | five.oldstyle five.prop five.taboldstyle fl florin four four.oldstyle
253 | four.prop four.taboldstyle fraction fraction.alt G g g.sc Gacute gacute
254 | gacute.sc Gamma gamma Gbreve gbreve gbreve.sc Gcaron gcaron gcaron.sc
255 | Gcedilla gcedilla Gcircumflex gcircumflex gcircumflex.sc Gcommaaccent
256 | gcommaaccent gcommaaccent.sc Gdotaccent gdotaccent gdotaccent.sc
257 | Germandbls germandbls germandbls.dup germandbls.sc gnaborretni Grave
258 | grave grave.ts1 Gravecomb gravecomb greater greaterequal
259 | greaterorequalslant guarani guillemotleft guillemotright guilsinglleft
260 | guilsinglright H h h.sc Hbar hbar hbar.sc Hbrevebelow hbrevebelow
261 | hbrevebelow.sc Hcircumflex hcircumflex hcircumflex.sc Hdieresis hdieresis
262 | hdieresis.sc Hdotbelow hdotbelow hdotbelow.sc Hookabove hookabove
263 | Hookabovecomb hookabovecomb horn Htilde htilde htilde.sc Hungarumlaut
264 | hungarumlaut hungarumlaut.ts1 Hungarumlautcomb hungarumlautcomb hyphen
265 | hyphen.alt hyphen.dup hyphen.prop hyphendbl hyphendbl.alt I i i.sc Iacute
266 | iacute iacute.sc Ibreve ibreve ibreve.sc Icaron icaron icaron.sc
267 | Icircumflex icircumflex icircumflex.sc Idblgrave idblgrave idblgrave.sc
268 | Idieresis idieresis idieresis.sc Idieresisacute idieresisacute
269 | idieresisacute.sc Idotaccent idotaccent.sc Idotbelow idotbelow
270 | idotbelow.sc Igrave igrave igrave.sc Ihookabove ihookabove ihookabove.sc
271 | IJ ij ij.sc Imacron imacron Imacron.alt imacron.alt imacron.alt.sc
272 | imacron.sc infinity interrobang Iogonek iogonek iogonek.sc Iogonekacute
273 | iogonekacute iogonekacute.sc Iota iota Itilde itilde itilde.sc J j j.sc
274 | J_caron Jacute jacute jacute.sc jcaron jcaron.sc Jcircumflex jcircumflex
275 | jcircumflex.sc K k k.sc Kappa kappa Kcedilla kcedilla Kcommaaccent
276 | kcommaaccent kcommaaccent.sc L l l.sc Lacute lacute lacute.sc Lambda
277 | lambda Lcaron lcaron lcaron.sc Lcedilla lcedilla Lcommaaccent
278 | lcommaaccent lcommaaccent.sc Ldot ldot ldot.sc Ldotbelow ldotbelow
279 | ldotbelow.sc Ldotbelowmacron ldotbelowmacron ldotbelowmacron.sc leaf less
280 | lessequal lessorequalslant linebelow linebelowcomb lira logicalnot longs
281 | lozenge lscript Lslash lslash lslash.sc Ltilde ltilde ltilde.sc M m m.sc
282 | Macron macron Macron.alt macron.alt macron.dup macron.ts1 macronbelow
283 | macronbelowcomb Macroncomb macroncomb married Mdotbelow mdotbelow
284 | mdotbelow.sc mho minus minusplus Mu mu mu.alt multiply musicalnote N n
285 | n.sc Nacute nacute nacute.sc naira nbspace Ncaron ncaron ncaron.sc
286 | Ncedilla ncedilla Ncommaaccent ncommaaccent ncommaaccent.sc Ndotaccent
287 | ndotaccent ndotaccent.sc Ndotbelow ndotbelow ndotbelow.sc nine
288 | nine.oldstyle nine.prop nine.taboldstyle notequal Ntilde ntilde ntilde.sc
289 | Nu nu numbersign numero O o o.sc Oacute oacute oacute.sc Obreve obreve
290 | obreve.sc Ocaron ocaron ocaron.sc Ocircumflex ocircumflex ocircumflex.sc
291 | Ocircumflexacute ocircumflexacute ocircumflexacute.sc Ocircumflexdotbelow
292 | ocircumflexdotbelow ocircumflexdotbelow.sc Ocircumflexgrave
293 | ocircumflexgrave ocircumflexgrave.sc Ocircumflexhookabove
294 | ocircumflexhookabove ocircumflexhookabove.sc Ocircumflextilde
295 | ocircumflextilde ocircumflextilde.sc Odblgrave odblgrave odblgrave.sc
296 | Odieresis odieresis odieresis.sc Odotbelow odotbelow odotbelow.sc OE oe
297 | OE.dup oe.dup oe.sc ogonek Ograve ograve ograve.sc ohm Ohookabove
298 | ohookabove ohookabove.sc Ohorn ohorn ohorn.sc Ohornacute ohornacute
299 | ohornacute.sc Ohorndotbelow ohorndotbelow ohorndotbelow.sc Ohorngrave
300 | ohorngrave ohorngrave.sc Ohornhookabove ohornhookabove ohornhookabove.sc
301 | Ohorntilde ohorntilde ohorntilde.sc Ohungarumlaut ohungarumlaut
302 | ohungarumlaut.sc Omacron omacron omacron.sc Omega omega Omicron omicron
303 | one one.oldstyle one.prop one.superior one.taboldstyle onehalf onequarter
304 | Oogonek oogonek oogonek.sc Oogonekacute oogonekacute oogonekacute.sc
305 | openbullet ordfeminine ordmasculine Orogate orogate orogate.sc Oslash
306 | oslash Oslash.dup oslash.dup oslash.sc Oslashacute oslashacute
307 | oslashacute.sc Otilde otilde otilde.sc P p p.sc paragraph paragraph.alt
308 | parenleft parenright partialdiff percent period periodcentered permyriad
309 | perthousand perthousandzero peso Phi phi phi.alt Pi pi pi.alt plus
310 | plusminus Psi psi published Q q q.sc question questiondown
311 | quillbracketleft quillbracketright quotedbl quotedblbase quotedblbase.ts1
312 | quotedblleft quotedblright quoteleft quoteleft.dup quoteright
313 | quoteright.dup quotesinglbase quotesinglbase.ts1 quotesingle
314 | quotesingle.ts1 R r r.sc Racute racute racute.sc radical Rcaron rcaron
315 | rcaron.sc Rcedilla rcedilla Rcommaaccent rcommaaccent rcommaaccent.sc
316 | Rdblgrave rdblgrave rdblgrave.sc Rdotaccent rdotaccent rdotaccent.sc
317 | Rdotbelow rdotbelow rdotbelow.sc Rdotbelowmacron rdotbelowmacron
318 | rdotbelowmacron.sc recipe referencemark registered registered.alt Rho rho
319 | rho.alt Ring ring Ringacute ringacute Ringcomb ringcomb ringhalfleft
320 | ringhalfright S s s.sc Sacute sacute sacute.sc Scaron scaron scaron.sc
321 | Scedilla scedilla scedilla.sc schwa Scircumflex scircumflex
322 | scircumflex.sc Scommaaccent scommaaccent scommaaccent.sc Sdotbelow
323 | sdotbelow sdotbelow.sc section semicolon servicemark seven seven.oldstyle
324 | seven.prop seven.taboldstyle sfthyphen Sigma sigma sigma1 six
325 | six.oldstyle six.prop six.taboldstyle slash space star sterling summation
326 | suppress T t t.sc Tau tau Tcaron tcaron tcaron.sc Tcedilla tcedilla
327 | tcedilla.sc Tcommaaccent tcommaaccent tcommaaccent.sc Tdieresis tdieresis
328 | tdieresis.sc Tdotbelow tdotbelow tdotbelow.sc Theta theta theta.alt Thorn
329 | thorn thorn.sc three three.oldstyle three.prop three.superior
330 | three.taboldstyle threequarters threequartersemdash tie tieaccentcapital
331 | tieaccentcapital.new tieaccentlowercase tieaccentlowercase.new Tilde
332 | tilde tilde.dup tildebelow tildebelowcomb Tildecomb tildecomb tildelow
333 | Tlinebelow tlinebelow tlinebelow.sc trademark Ttilde ttilde ttilde.sc
334 | twelveudash two two.oldstyle two.prop two.superior two.taboldstyle U u
335 | u.sc Uacute uacute uacute.sc Ubreve ubreve ubreve.sc Ubrevebelowinverted
336 | ubrevebelowinverted ubrevebelowinverted.sc Ucaron ucaron ucaron.sc
337 | Ucircumflex ucircumflex ucircumflex.sc Udblgrave udblgrave udblgrave.sc
338 | Udieresis udieresis udieresis.sc Udieresisacute udieresisacute
339 | udieresisacute.sc Udieresiscaron udieresiscaron udieresiscaron.sc
340 | Udieresisgrave udieresisgrave udieresisgrave.sc Udotbelow udotbelow
341 | udotbelow.sc Ugrave ugrave ugrave.sc Uhookabove uhookabove uhookabove.sc
342 | Uhorn uhorn uhorn.sc Uhornacute uhornacute uhornacute.sc Uhorndotbelow
343 | uhorndotbelow uhorndotbelow.sc Uhorngrave uhorngrave uhorngrave.sc
344 | Uhornhookabove uhornhookabove uhornhookabove.sc Uhorntilde uhorntilde
345 | uhorntilde.sc Uhungarumlaut uhungarumlaut uhungarumlaut.sc Umacron
346 | umacron umacron.sc underscore undertie undertieinverted uni2010 uni2011
347 | uni2423 Uogonek uogonek uogonek.sc Upsilon upsilon Uring uring uring.sc
348 | Utilde utilde utilde.sc V v v.sc W w w.sc Wacute wacute wacute.sc
349 | Wcircumflex wcircumflex wcircumflex.sc Wdieresis wdieresis wdieresis.sc
350 | weierstrass Wgrave wgrave wgrave.sc won X x x.sc Xi xi Y y y.sc Yacute
351 | yacute yacute.sc Ycircumflex ycircumflex ycircumflex.sc Ydieresis
352 | ydieresis ydieresis.sc Ydotbelow ydotbelow ydotbelow.sc yen Ygrave ygrave
353 | ygrave.sc Yhookabove yhookabove yhookabove.sc Ytilde ytilde ytilde.sc Z z
354 | z.sc Zacute zacute zacute.sc Zcaron zcaron zcaron.sc Zdotaccent
355 | zdotaccent zdotaccent.sc Zdotbelow zdotbelow zdotbelow.sc zero
356 | zero.oldstyle zero.prop zero.slash zero.taboldstyle Zeta zeta
357 |
--------------------------------------------------------------------------------
/repro_research.tex:
--------------------------------------------------------------------------------
1 | \documentclass[aspectratio=169,12pt,t]{beamer}
2 | \usepackage{graphicx}
3 | \setbeameroption{hide notes}
4 | \setbeamertemplate{note page}[plain]
5 | \usepackage{listings}
6 |
7 | \input{header.tex}
8 |
9 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
10 | % end of header
11 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
12 |
13 | % title info
14 | \title{Steps toward reproducible research}
15 | \author{\href{https://kbroman.org}{Karl Broman}}
16 | \institute{Biostatistics \& Medical Informatics, UW{\textendash}Madison}
17 | \date{\href{https://kbroman.org}{\tt \scriptsize \color{foreground} kbroman.org}
18 | \\[-4pt]
19 | \href{https://github.com/kbroman}{\tt \scriptsize \color{foreground} github.com/kbroman}
20 | \\[-4pt]
21 | \href{https://rstats.me/@kbroman}{\tt \scriptsize \color{foreground} @kbroman@rstats.me}
22 | \\[2pt]
23 | \scriptsize {\lolit Slides:} \href{https://bit.ly/steps2rr}{\tt
24 | \color{foreground} bit.ly/steps2rr}
25 | }
26 |
27 |
28 | \begin{document}
29 |
30 | % title slide
31 | {
32 | \setbeamertemplate{footline}{} % no page number here
33 | \frame{
34 | \titlepage
35 |
36 | \note{This lecture is based on slides for a talk I've given a whole
37 | bunch of times.
38 |
39 | Source: {\tt https://github.com/kbroman/Talk\_ReproRes} \\
40 | These slides, with notes: {\tt https://bit.ly/steps2rr} \\
41 | Full slides without notes: {\tt https://bit.ly/steps2rr\_nonotes}
42 |
43 | By ``reproducible research,'' I'm referring to ``computational
44 | reproducibility,'' by which I mean that the data and code for a
45 | project are packaged together in a way that they can be handed to
46 | someone else, who can rerun the code and get the same
47 | results---the same figures and tables. This is surprisingly hard
48 | to do, and it's even more difficult in the context of a
49 | collaboration between two or more data analysts.
50 | }
51 | } }
52 |
53 |
54 | \begin{frame}[fragile,c]{}
55 |
56 | \begin{center}
57 | \begin{minipage}[c]{9.3cm}
58 | \begin{semiverbatim}
59 | \lstset{basicstyle=\normalsize}
60 | \begin{lstlisting}[linewidth=9.3cm]
61 | Karl -- this is very interesting,
62 | however you used an old version of
63 | the data (n=143 rather than n=226).
64 |
65 | I'm really sorry you did all that
66 | work on the incomplete dataset.
67 |
68 | Bruce
69 | \end{lstlisting}
70 | \end{semiverbatim}
71 | \end{minipage}
72 | \end{center}
73 |
74 | \note{I'm an applied statistician; my goal is to help people make
75 | sense of their data. I have a lot of collaborators, and there's
76 | nothing I enjoy more than puzzling over their data. So I write a lot of
77 | reports, describing what I've done and what I've learned.
78 |
79 | This is an email I got from a collaborator,
80 | in response to an analysis report that I had sent him.
81 | It's always a bit of a shock to get an email like this: what have I
82 | done? Why am I working with the wrong data, and where is the right data?
83 |
84 | But what he didn't know is that by this point in my life, I'd
85 | adopted a reproducible workflow.
86 | Because I'd set things up carefully, I could just substitute in the
87 | newer dataset, type a single command (``{\tt make}'') to rerun the
88 | analyses, and get the revised report.
89 |
90 | This is a reproducibility success story. We all make mistakes, but
91 | if our projects are reproducible, we can nimbly recover from those
92 | mistakes.
93 |
94 | There is a second important lesson here: At the start of such
95 | reports, I always include a paragraph about our shared goals, along
96 | with some brief data summaries. By doing so, he immediately saw that
97 | I had an old version of the data. If I hadn't done so, we might
98 | never have discovered my error.
99 | }
100 | \end{frame}
101 |
102 |
103 | \begin{frame}[c]{}
104 | \centering
105 | {\Large The results in Table 1 don't seem to \\[12pt]
106 | correspond to those in Figure 2.}
107 |
108 | \note{My computational life is not entirely rosy. This is the sort of
109 | email that will freak me out.}
110 | \end{frame}
111 |
112 |
113 | \begin{frame}[c]{}
114 | \centerline{\Large Where did we get this data file?}
115 |
116 | \note{Record the provenance of all data or metadata files.}
117 | \end{frame}
118 |
119 |
120 |
121 | \begin{frame}[c]{}
122 | \centerline{\Large Why did I omit those samples?}
123 |
124 | \note{I may decide to omit a few samples. Will I record {\nhilit why}
125 | I omitted those particular samples?}
126 | \end{frame}
127 |
128 |
129 |
130 | \begin{frame}[c]{}
131 | \centerline{\Large Which image goes with which experiment?}
132 |
133 | \note{For experimental biologists, it can be tricky to keep track of
134 | the vast set of images and experiments they perform.}
135 | \end{frame}
136 |
137 |
138 |
139 | \begin{frame}[c]{}
140 | \centerline{\Large How did I make that figure?}
141 |
142 | \note{Sometimes, in the midst of a bout of exploratory data analysis,
143 | I'll create some exciting graph and have a heck of a time
144 | reproducing it afterwards.}
145 | \end{frame}
146 |
147 |
148 | \begin{frame}[c]{}
149 | \centerline{\Large In what order do I run these scripts?}
150 |
151 | \note{Sometimes the process of data file manipulation and data
152 | cleaning gets spread across a bunch of scripts that need to be
153 | executed in a particular order. Will I record this information? Is
154 | it obvious what script does what?}
155 | \end{frame}
156 |
157 |
158 |
159 | \begin{frame}[c]{}
160 | \centerline{\Large ``Your script is now giving an error."}
161 |
162 | \note{It was working last week. Well, last month, at least.
163 |
164 | How easy is it to go back through that script's history to see when
165 | and why it stopped working?}
166 | \end{frame}
167 |
168 |
169 |
170 | \begin{frame}[c]{}
171 | \centerline{\Large ``The attached is similar to the code we used."}
172 |
173 | \note{From an email in response to my request for code used for a
174 | paper.}
175 | \end{frame}
176 |
177 |
178 |
179 | \begin{frame}[c]{Reproducible research}
180 |
181 | \begin{quotation}
182 | {\normalfont
183 | organize the data and code in a way \\[4pt]
184 | that you can hand them to someone else \\[4pt]
185 | and they can re-run the code \\[4pt]
186 | and get the same results \\[4pt]
187 | \quad (the same figures and tables)
188 | }
189 | \end{quotation}
190 |
191 | \note{
192 | To reiterate my definition of reproducible research:
193 | it's about assembly and organizing the data and code
194 | so that they can be re-run to give the same results.
195 | }
196 | \end{frame}
197 |
198 |
199 |
200 |
201 |
202 | \begin{frame}[c]{}
203 |
204 |
205 | \centering
206 | \Large
207 |
208 | Reproducible
209 |
210 | \bigskip
211 |
212 | {\color{lolit} vs.}
213 |
214 | \bigskip
215 |
216 | \only<1>{Replicable}
217 | \only<2 | handout 0>{{\color{background} p} Correct {\color{background} p}}
218 |
219 | \note{Computational work is
220 | {\color{nhilit} reproducible} if the data and code are organized in
221 | a way that they can be handed to someone else, who can rerun the
222 | code and get the same results---the same figures and tables.
223 | {\color{nhilit} Replicable} is more stringent: can
224 | someone repeat the experiment and get the same results?
225 |
226 | Reproducibility is a minimal standard. That something is
227 | reproducible doesn't imply that it is correct. The code may have bugs. The
228 | methods may be poorly behaved. There could be experimental
229 | artifacts.
230 |
231 | (But reproducibility is probably associated with correctness.)
232 |
233 | Note that some scientists say replicable for what I call
234 | reproducible, and vice versa.
235 | }
236 | \end{frame}
237 |
238 |
239 |
240 | \begin{frame}[c]{}
241 |
242 | \centerline{\Large \href{https://kbroman.org/steps2rr}{\tt \color{title}
243 | kbroman.org/steps2rr}}
244 |
245 | \note{It was a long, hard process for me to move from my old standard
246 | practice to a fully reproducible workflow. In thinking through that
247 | process, I wrote down my thoughts on the basic steps to take towards
248 | full reproducibility. This forms the basis of what I'll present here.
249 | }
250 | \end{frame}
251 |
252 |
253 | \begin{frame}[c]{}
254 |
255 | \centering
256 | \large
257 |
258 | A little bit reproducible \\
259 | is better than not reproducible.
260 |
261 | \vspace{12mm}
262 |
263 | A little bit open \\
264 | is better than not open.
265 |
266 | \vspace{12mm}
267 |
268 | Strive to make each project \\
269 | a bit better organized than the last.
270 |
271 | \note{
272 | While it's good to strive for full reproducibility, it can be difficult
273 | to achieve. But partially reproducible is better than not-at-all
274 | reproducible. Similarly, making data and code partially open is
275 | better than nothing.
276 |
277 | Don't try to change every aspect of your workflow all at once. Focus
278 | on revising one aspect at a time. When you get to the end of a
279 | project, you may be dissatisfied with the state of things, but don't
280 | give up. Try to make each project a bit better organized and
281 | reproducible than the last.
282 | }
283 |
284 | \end{frame}
285 |
286 |
287 |
288 |
289 |
290 | \begin{frame}[c]{Organize your project}
291 |
292 | \begin{center}
293 | \large
294 | \only<1|handout:0>{
295 | File organization and naming \\
296 | are powerful weapons against chaos.
297 | }
298 | \only<2>{
299 | Your closest collaborator is you six months ago, \\
300 | but you don't reply to emails.
301 | }
302 | \only<3|handout:0>{Have sympathy for your future self.}
303 | \end{center}
304 |
305 | \hfill
306 | {\lolit
307 | \only<1|handout:0>{{\textendash} \href{https://jennybryan.org}{Jenny Bryan}}
308 | \only<2>{(paraphrasing \href{https://twitter.com/kcranstn/status/370914072511791104}{Mark Holder})}
309 | \only<3|handout:0>{}
310 | }
311 |
312 | \note{The first thing to do is to make your project
313 | understandable to others (or yourself, later, when you try to figure
314 | out what it was that you did).
315 | }
316 | \end{frame}
317 |
318 |
319 |
320 | \begin{frame}[fragile,c]{Organize your project}
321 |
322 | \begin{center}
323 | \begin{minipage}[c]{10.3cm}
324 | \begin{semiverbatim}
325 | \lstset{basicstyle=\normalsize}
326 | \begin{lstlisting}[linewidth=10.3cm]
327 | RawData/ Notes/
328 | DerivedData/ Refs/
329 |
330 | Python/ ReadMe.txt
331 | R/ ToDo.txt
332 | Ruby/ Makefile
333 |
334 | Analysis/
335 | Figures/
336 | \end{lstlisting}
337 | \end{semiverbatim}
338 | \end{minipage}
339 | \end{center}
340 |
341 | \note{
342 | Segregate all the materials for a project in one directory/folder on
343 | your hard drive.
344 |
345 | There will be a lot of files. Organize them in a meaningful way.
346 |
347 | This is the way I organize a project directory. The key principles
348 | are to put everything related to a project in a common directory,
349 | but then to separate data from code and separate raw data from
350 | processed data.
351 |
352 | Write {\tt ReadMe} files to explain what's what. Make sure they stay
353 | current.
354 | }
355 |
356 | \end{frame}
357 |
358 |
359 |
360 |
361 | \begin{frame}[fragile,c]{Chaos}
362 |
363 | \begin{center}
364 | \begin{minipage}[c]{11.33cm}
365 | \begin{semiverbatim}
366 | \lstset{basicstyle=\scriptsize}
367 | \begin{lstlisting}[linewidth=11.33cm]
368 | AimeeNullSims/ Deuterium/ Ping/
369 | AimeeResults/ ExtractData4Gary/ Ping2/
370 | AnnotationFiles/ FromAimee/ Ping3/
371 | Brian/ GoldStandard/ Ping4/
372 | Chr6_extrageno/ HumanGWAS/ Play/
373 | Chr6_segdis/ Insulin/ Prdm9/
374 | ChrisPlaisier/ Int2_for_Mark/ RBM_PlasmaUrine_2012-03-08/
375 | Code4Aimee/ Islet_2011-05/ Slco1a6/
376 | CompAnnot/ MappingProbes/ StudyLineupMethods/
377 | CondScans/ MultiProbes/ kidney_chr6.R
378 | D2O_2012-02-14/ NewMap/ pck2_sucla2.R
379 | D2O_cellcycle/ Notes/ penalties.txt
380 | D2Ocorr/ NullSims/ transeQTL4Lude/
381 | Data4Aimee/ NullSims_2009-09-10/
382 | Data4Tram/ PepIns_2012-02-09/
383 | \end{lstlisting}
384 | \end{semiverbatim}
385 | \end{minipage}
386 | \end{center}
387 |
388 | \note{
389 | This is a folder on my hard drive, for the project that led me to
390 | reassess my life.
391 | }
392 |
393 | \end{frame}
394 |
395 |
396 | \begin{frame}[fragile,c]{Choose good names for things}
397 |
398 | \begin{center}
399 | \begin{minipage}[c]{13.7cm}
400 | \begin{semiverbatim}
401 | \lstset{basicstyle=\scriptsize}
402 | \begin{lstlisting}[linewidth=13.7cm]
403 | betw_tissue_corr.R expr_scatterplot_allprobes.R gve_similarity_alltissues.R
404 | coatcolor_lod.R expr_scatterplots_dup.R gve_similarity.R
405 | colors.R expr_scatterplots_mix.R gve_supp.R
406 | cover_fig.R expr_scatterplots_swap.R insulin_lod.R
407 | eqtl_counts_10.R expr_swaps.R local_eqtl_locations.R
408 | eqtl_counts.R func.R my_plot_map.R
409 | eve_hist.R genotype_plates.R my_plot_scanone.R
410 | eve_scheme.R gve_hist.R sex_vs_X.R
411 | eve_similarity.R gve_new.R xchr_fig.R
412 | eve_similarity_supp.R gve.R xist_and_y.R
413 | expr_corr_dup.R gve_scheme.R
414 | expr_corr_mix.R gve_similarity_2ndbest.R
415 | \end{lstlisting}
416 | \end{semiverbatim}
417 | \end{minipage}
418 | \end{center}
419 |
420 | \note{
421 | You'll have a lot of files. In addition to organizing them in
422 | subfolders, it's important to choose good names for them.
423 |
424 | These names of these files largely explain their contents, but
425 | they're also left rather disorganized.
426 | }
427 |
428 | \end{frame}
429 |
430 |
431 | \begin{frame}[fragile,c]{Choose good names for things}
432 |
433 | \begin{center}
434 | \begin{minipage}[c]{8.3cm}
435 | \begin{semiverbatim}
436 | \lstset{basicstyle=\normalsize}
437 | \begin{lstlisting}[linewidth=8.3cm]
438 | fig1.png fig5.png
439 | fig10.png fig6.png
440 | fig2.png fig7.png
441 | fig3.png fig8.png
442 | fig4.png fig9.png
443 | \end{lstlisting}
444 | \end{semiverbatim}
445 | \end{minipage}
446 | \end{center}
447 |
448 | \note{
449 | These names are well organized, but you have to remember the order
450 | of all of the figures to find the one you want.
451 |
452 | And note that, alphabetically, figure 10 ends up between figure 1
453 | and figure 2.
454 | }
455 |
456 | \end{frame}
457 |
458 |
459 |
460 |
461 |
462 |
463 | \begin{frame}[c]{Choose good names for things}
464 |
465 | \bbi
466 | \item Machine readable
467 | \bi
468 | \item No spaces
469 | \item No special characters except {\textunderscore} and
470 | {\tt -}
471 | \ei
472 |
473 | \item Human readable
474 | \bi
475 | \item Explain the contents
476 | \ei
477 |
478 | \item Consistent
479 | \bi
480 | \item Name similar files in a similar way
481 | \ei
482 |
483 | \item Make use of computer's sorting
484 | \bi
485 | \item pad numbers with 0's (e.g., {\tt 01}, {\tt 02}, ...)
486 | \item start with general grouping, then more specific
487 | \item dates like {\tt 2019-05-14}
488 | \ei
489 |
490 | \ei
491 |
492 | \note{
493 | You want the names to be easily to handle in software, which
494 | generally means no spaces or special characters except for
495 | underscore and hyphen (which are useful for separating words).
496 |
497 | But you want the names to explain the files' contents, so that you
498 | don't have to open the files to figure out what they are.
499 |
500 | Consistency is important: if you have a bunch of similar files, you
501 | should have some system for naming them.
502 |
503 | And make use of the computer's sort of files, by padding numbers
504 | with 0's (so that 10 appears after 9 rather than before 2) and
505 | organizing the files into groups.
506 |
507 | Dates should always be written as `YYYY-MM-DD`, so that when sorted
508 | they are in order by date.
509 | }
510 |
511 | \end{frame}
512 |
513 |
514 | \begin{frame}[c]{}
515 |
516 | \vspace{24pt}
517 |
518 | \figh{Figs/iso_8601.png}{0.8}
519 |
520 | \vfill
521 |
522 | \hfill {\tt \footnotesize \lolit \href{http://xkcd.com/1179/}{xkcd.com/1179}}
523 |
524 | \note{Go with the xkcd format for writing dates, for ease of sorting.
525 | }
526 | \end{frame}
527 |
528 |
529 | \begin{frame}[fragile,c]{Choose good names for things}
530 |
531 | \begin{center}
532 | \begin{minipage}[c]{10.3cm}
533 | \begin{semiverbatim}
534 | \lstset{basicstyle=\normalsize}
535 | \begin{lstlisting}[linewidth=10.3cm]
536 | 0_vcf2db.R
537 | 1_prep_geno.R
538 | 2_prep_pheno_clin.R
539 | 2_prep_pheno_otu.R
540 | 3_prep_covar.R
541 | 4_prep_analysis_pheno_clin.R
542 | 4_prep_analysis_pheno_otu.R
543 | 5_scans.R
544 | 6_grab_peaks.R
545 | 7_find_nearby_peaks.R
546 | \end{lstlisting}
547 | \end{semiverbatim}
548 | \end{minipage}
549 | \end{center}
550 |
551 | \note{
552 | Here's an example to take advantage of the way the computer sorts
553 | files: a set of R scripts, which show up in the order they are used.
554 | }
555 |
556 | \end{frame}
557 |
558 |
559 |
560 | \begin{frame}[c]{No ``{\hilit final}'' in file names}
561 |
562 | \vspace*{3mm}
563 |
564 | \centering
565 |
566 | % comic from http://www.phdcomics.com/comics/archive.php?comicid=1531
567 | \figh{Figs/phd101212s.png}{0.8}
568 |
569 | \note{
570 | Never include ``final'' in a file name.
571 | }
572 |
573 | \end{frame}
574 |
575 |
576 |
577 | \begin{frame}[fragile,c]{No ``{\hilit final}'' in file names}
578 |
579 |
580 | \addtocounter{framenumber}{-1}
581 |
582 |
583 | \begin{center}
584 | \begin{minipage}[c]{9.5cm}
585 | \begin{semiverbatim}
586 | \lstset{basicstyle=\tiny}
587 | \begin{lstlisting}[escapechar=!,linewidth=9.5cm]
588 | !{\color{foreground}{Deprecated/ hypo_prcomp.RData}!
589 | !{\color{foreground}{ReadMe.txt islet_int1_final.RData}!
590 | !{\color{foreground}{adipose_int1_final.RData islet_int2_final.RData}!
591 | !{\color{foreground}{adipose_int2_final.RData islet_mlratio_final.RData}!
592 | !{\color{foreground}{adipose_mlratio_final.RData islet_mlratio_nqrank_final.RData}!
593 | !{\color{foreground}{adipose_mlratio_nqrank_final.RData islet_prcomp.RData}!
594 | !{\color{foreground}{adipose_prcomp.RData kidney_int1_final.RData}!
595 | !{\color{foreground}{aligned_geno_with_pmap.RData kidney_int2_final.RData}!
596 | !{\color{foreground}{batches_final.RData kidney_mlratio_final.RData}!
597 | !{\color{foreground}{batches_raw_final.RData kidney_mlratio_nqrank_final.RData}!
598 | !{\color{foreground}{cpl_final.RData kidney_prcomp.RData}!
599 | !{\color{foreground}{d2o_final.RData lipomics_final_rev2.RData}!
600 | !{\color{foreground}{gastroc_int1_final.RData liverTG_final.RData}!
601 | !{\color{foreground}{gastroc_int2_final.RData liver_int1_final.RData}!
602 | !{\color{foreground}{gastroc_mlratio_final.RData liver_int2_final.RData}!
603 | !{\color{foreground}{gastroc_mlratio_nqrank_final.RData liver_mlratio_final.RData}!
604 | !{\color{foreground}{gastroc_prcomp.RData liver_mlratio_nqrank_final.RData}!
605 | !{\color{foreground}{hypo_int1_final.RData liver_prcomp.RData}!
606 | !{\color{foreground}{hypo_int2_final.RData mirna_final.RData}!
607 | !{\color{foreground}{hypo_mlratio_final.RData necropsy_final_rev2.RData}!
608 | !{\color{foreground}{hypo_mlratio_final_old.RData plasmaurine_final_rev.RData}!
609 | !{\color{foreground}{hypo_mlratio_nqrank_final.RData pmark.RData}!
610 | !{\color{foreground}{hypo_mlratio_nqrank_final_old.RData rbm_final.RData}!
611 | !{\color{foreground}{hypo_omit.RData}!
612 | \end{lstlisting}
613 | \end{semiverbatim}
614 | \end{minipage}
615 | \end{center}
616 |
617 |
618 |
619 | \end{frame}
620 |
621 |
622 | \begin{frame}[fragile,c]{No ``{\hilit final}'' in file names}
623 |
624 |
625 | \begin{center}
626 | \begin{minipage}[c]{9.5cm}
627 | \begin{semiverbatim}
628 | \lstset{basicstyle=\tiny}
629 | \begin{lstlisting}[escapechar=!,linewidth=9.5cm]
630 | !{\color{foreground}{Deprecated/ hypo_prcomp.RData}!
631 | !{\color{foreground}{ReadMe.txt islet_int1_final.RData}!
632 | !{\color{foreground}{adipose_int1_final.RData islet_int2_final.RData}!
633 | !{\color{foreground}{adipose_int2_final.RData islet_mlratio_final.RData}!
634 | !{\color{foreground}{adipose_mlratio_final.RData islet_mlratio_nqrank_final.RData}!
635 | !{\color{foreground}{adipose_mlratio_nqrank_final.RData islet_prcomp.RData}!
636 | !{\color{foreground}{adipose_prcomp.RData kidney_int1_final.RData}!
637 | !{\color{foreground}{aligned_geno_with_pmap.RData kidney_int2_final.RData}!
638 | !{\color{foreground}{batches_final.RData kidney_mlratio_final.RData}!
639 | !{\color{foreground}{batches_raw_final.RData kidney_mlratio_nqrank_final.RData}!
640 | !{\color{foreground}{cpl_final.RData kidney_prcomp.RData}!
641 | !{\color{foreground}{d2o_final.RData }!!{\color{vhilit} lipomics_final_rev2.RData}!
642 | !{\color{foreground}{gastroc_int1_final.RData liverTG_final.RData}!
643 | !{\color{foreground}{gastroc_int2_final.RData liver_int1_final.RData}!
644 | !{\color{foreground}{gastroc_mlratio_final.RData liver_int2_final.RData}!
645 | !{\color{foreground}{gastroc_mlratio_nqrank_final.RData liver_mlratio_final.RData}!
646 | !{\color{foreground}{gastroc_prcomp.RData liver_mlratio_nqrank_final.RData}!
647 | !{\color{foreground}{hypo_int1_final.RData liver_prcomp.RData}!
648 | !{\color{foreground}{hypo_int2_final.RData mirna_final.RData}!
649 | !{\color{foreground}{hypo_mlratio_final.RData }!!{\color{vhilit} necropsy_final_rev2.RData}!
650 | !{\color{vhilit}{hypo_mlratio_final_old.RData plasmaurine_final_rev.RData}!
651 | !{\color{foreground}{hypo_mlratio_nqrank_final.RData pmark.RData}!
652 | !{\color{vhilit}{hypo_mlratio_nqrank_final_old.RData }!!{\color{foreground} rbm_final.RData}!
653 | !{\color{foreground}{hypo_omit.RData}!
654 | \end{lstlisting}
655 | \end{semiverbatim}
656 | \end{minipage}
657 | \end{center}
658 |
659 | \note{
660 | This is an actual directory on my computer. If you include
661 | {\tt final} in a file name, there's a risk that you'll end up with
662 | {\tt final{\textunderscore}rev},
663 | {\tt final{\textunderscore}rev2}, and
664 | {\tt final{\textunderscore}old}.
665 |
666 | Another problem here is that the files aren't organized very well.
667 | }
668 |
669 |
670 | \end{frame}
671 |
672 |
673 | \begin{frame}[fragile,c]{Choose good names for things}
674 |
675 |
676 | \begin{center}
677 | \begin{minipage}[c]{9.5cm}
678 | \begin{semiverbatim}
679 | \lstset{basicstyle=\tiny}
680 | \begin{lstlisting}[escapechar=!,linewidth=9.5cm]
681 | batches_raw_v1.rds geneexpr_mlratio_gastroc_v2.rds
682 | batches_v1.rds geneexpr_mlratio_hypo_v1.rds
683 | clinical_cpl_v2.rds geneexpr_mlratio_hypo_v2.rds
684 | clinical_d2o_v2.rds geneexpr_mlratio_islet_v2.rds
685 | clinical_lipomics_v4.rds geneexpr_mlratio_kidney_v2.rds
686 | clinical_liverTG_v2.rds geneexpr_mlratio_liver_v2.rds
687 | clinical_mirna_v2.rds geneexpr_mlratio_nqrank_adipose_v2.rds
688 | clinical_necropsy_v4.rds geneexpr_mlratio_nqrank_gastroc_v2.rds
689 | clinical_plasmaurine_v3.rds geneexpr_mlratio_nqrank_hypo_v1.rds
690 | clinical_rbm_v2.rds geneexpr_mlratio_nqrank_hypo_v2.rds
691 | Deprecated/ geneexpr_mlratio_nqrank_islet_v2.rds
692 | geneexpr_int1_adipose_v2.rds geneexpr_mlratio_nqrank_kidney_v2.rds
693 | geneexpr_int1_gastroc_v2.rds geneexpr_mlratio_nqrank_liver_v2.rds
694 | geneexpr_int1_hypo_v2.rds geneexpr_omit_hypo.rds
695 | geneexpr_int1_islet_v2.rds geneexpr_prcomp_adipose_v2.rds
696 | geneexpr_int1_kidney_v2.rds geneexpr_prcomp_gastroc_v2.rds
697 | geneexpr_int1_liver_v2.rds geneexpr_prcomp_hypo_v2.rds
698 | geneexpr_int2_adipose_v2.rds geneexpr_prcomp_islet_v2.rds
699 | geneexpr_int2_gastroc_v2.rds geneexpr_prcomp_kidney_v2.rds
700 | geneexpr_int2_hypo_v2.rds geneexpr_prcomp_liver_v2.rds
701 | geneexpr_int2_islet_v2.rds geno_aligned_w_pmap.rds
702 | geneexpr_int2_kidney_v2.rds geno_pmark.rds
703 | geneexpr_int2_liver_v2.rds ReadMe.txt
704 | geneexpr_mlratio_adipose_v2.rds
705 | \end{lstlisting}
706 | \end{semiverbatim}
707 | \end{minipage}
708 | \end{center}
709 |
710 | \note{
711 | This is the same set of files, renamed. Using {\tt
712 | clinical{\textunderscore}} and {\tt geneexpr{\textunderscore}}
713 | brings similar files together.
714 |
715 | A lot of files, but less forbidding.
716 | }
717 |
718 | \end{frame}
719 |
720 |
721 |
722 | \begin{frame}[c]{Document your work}
723 |
724 | \bbi
725 | \item What is all of this stuff?
726 | \item What was your analysis process?
727 | \vspace{1cm}
728 | \item[$\boldsymbol{\rightarrow}$] {\large {\tt ReadMe} files}
729 | \ei
730 |
731 | \note{
732 | An overall {\tt ReadMe} file plus an additional such file in each
733 | directory.
734 |
735 | Well-named files and directories makes everything easier.
736 |
737 | Also, keep the documentation current. There's nothing worse than
738 | documentation that is out of date and doesn't match the contents.
739 | }
740 |
741 |
742 | \end{frame}
743 |
744 |
745 |
746 |
747 | \begin{frame}[c]{Organizing data in spreadsheets}
748 |
749 |
750 | \figw{Figs/bad_spreadsheet.pdf}{1.0}
751 |
752 |
753 |
754 | \note{
755 | How you organize your data within files can have a big impact on
756 | how easy they are to work with.
757 |
758 | You can probably figure out what the numbers mean here,
759 | particularly if I tell you that there were triplicate
760 | measurementss under two treatments (1 min or 5 min) of cells that
761 | were either normal or mutant and cam from mouse strains B6 or
762 | BTBR.
763 |
764 | But it's hard to tell a computer program about the data structure
765 | here.
766 | }
767 | \end{frame}
768 |
769 |
770 |
771 | \begin{frame}[c]{Organizing data in spreadsheets}
772 |
773 |
774 | \figh{Figs/good_spreadsheet.pdf}{0.8}
775 |
776 | \note{
777 | This is the first few rows of a reorganized version of the data,
778 | as a rectangle where the rows are individual measurements and the
779 | columns are variables.
780 |
781 | This is maybe less pretty, but it's much easier to work with.
782 | }
783 |
784 | \end{frame}
785 |
786 |
787 |
788 |
789 | \begin{frame}[c]{Organizing data in spreadsheets}
790 |
791 |
792 | \bbi
793 | \item Make it a rectangle
794 | \item Individual measurements as rows; variables as columns
795 | \item Single header row
796 | \item One item per cell
797 | \item No empty cells
798 | \item No calculations in the raw data
799 | \item No highlighting or coloring as data
800 | \ei
801 |
802 | \vspace{8mm}
803 |
804 | \hfill
805 | \href{https://doi.org/gdz6cm}{\footnotesize
806 | \lolit \tt Broman and Woo (2018) Am Stat 72:2-10 \\
807 | \hfill doi.org/gdz6cm}
808 |
809 | \note{
810 | Here are some key principles for organizing data in spreadsheets:
811 | make a rectangle with a single header row.
812 |
813 | Never do calculations in your raw data file. If you're doing
814 | analyses or making charts in Excel, do so in a copy of the data
815 | file. Every time you open the raw data file, there's a risk that
816 | you'll mess things up.
817 | }
818 | \end{frame}
819 |
820 |
821 |
822 |
823 | \begin{frame}[c]{}
824 |
825 | \begin{center}
826 | \Large
827 |
828 |
829 | ``What the heck is `{\hilit \tt FAD{\textunderscore}NAD SI 8.3{\textunderscore}3.3G}'?''
830 |
831 | \end{center}
832 |
833 | \note{
834 | Sometimes the columns in your data files have meaning only to you.
835 |
836 | If the data analyst can't connect to the measurements, they're just
837 | columns of numbers.
838 | }
839 |
840 | \end{frame}
841 |
842 |
843 |
844 |
845 | \begin{frame}[c]{Metadata}
846 |
847 | \bbi
848 | \item Create a data dictionary
849 | \bi
850 | \item Explain each column
851 | \item Include different versions of the variable names (compact vs descriptive)
852 | \item Units
853 | \item Allowable values
854 | \ei
855 | \item The metadata are data
856 | \bi
857 | \item Make it a rectangle
858 | \ei
859 | \ei
860 |
861 | \note{
862 | Clear metadata is critical for others to be able to understand
863 | your data. In particular, make a data dictionary that describes
864 | the variables. In addition to a description of each column, I like
865 | to have short and longer versions of the names for use in data
866 | visualizations, as the column names themselves can be cryptic.
867 |
868 | These metadata are data, and so rather than make a Word
869 | documention describing the data, I personally would prefer to have
870 | another data file with the metadata.
871 | }
872 |
873 | \end{frame}
874 |
875 |
876 |
877 |
878 | \begin{frame}[c]{Data dictionary}
879 |
880 |
881 | \figw{Figs/data_dict.pdf}{1.0}
882 |
883 |
884 | \note{
885 | Here's an example data dictionary. You might also include units
886 | and informationa about possible valid values.
887 | }
888 |
889 | \end{frame}
890 |
891 |
892 |
893 |
894 |
895 | \begin{frame}[c]{Everything with a script}
896 |
897 | \centering
898 | \large
899 | If you do something once, \\
900 | you'll do it 1000 times.
901 |
902 | \note{The most basic principle for reproducible research is: do
903 | everything via code.
904 |
905 | Downloading data from the web, converting an Excel file to CSV,
906 | renaming columns/variables, omitting bad samples or data points...do
907 | all of this with scripts.
908 |
909 | You may be tempted to open up a data file and hand-edit. But if you
910 | get a revised version of that file, you'll need to do it again. And
911 | it'll be harder to figure out what it was that you did.
912 |
913 | Some things are more cumbersome via code, but in the long run you'll
914 | save time.
915 | }
916 | \end{frame}
917 |
918 |
919 |
920 | \begin{frame}[c]{Small corrections}
921 |
922 | \only<1|handout 0>{\figw{Figs/spreadsheet_g_v_mg.pdf}{0.95}}
923 | \only<2>{\figw{Figs/spreadsheet_g_v_mg_hilit.pdf}{0.95}}
924 |
925 |
926 | \note{
927 | Here is a case where a few values were in grams rather than
928 | milligrams. You might be tempted to hand-edit the file. It would
929 | be better to handle it in your script. Even better would be to go
930 | back to your collaborator and have them fix the primary data.
931 | }
932 | \end{frame}
933 |
934 |
935 |
936 |
937 | \begin{frame}{Differing column names}
938 |
939 | \includegraphics[height=0.8\textheight]{Figs/spreadsheet_colnames1.pdf}
940 |
941 | \vspace*{-0.6\textheight}
942 | \hspace*{0.1\textwidth}
943 | \includegraphics[height=0.8\textheight]{Figs/spreadsheet_colnames2.pdf}
944 |
945 | \note{
946 | Here the column names have been changed between two data files.
947 | You again might be tempted to hand-edit the files to match, but if
948 | you do that once, you'll be doing that every time the files are
949 | updated.
950 | }
951 | \end{frame}
952 |
953 |
954 | \begin{frame}{Differing column order}
955 |
956 | \includegraphics[height=0.8\textheight]{Figs/spreadsheet_colnames1.pdf}
957 |
958 | \vspace*{-0.6\textheight}
959 | \hspace*{0.1\textwidth}
960 | \includegraphics[height=0.8\textheight]{Figs/spreadsheet_colnames2.pdf}
961 |
962 | \vspace*{-0.6\textheight}
963 | \hspace*{0.2\textwidth}
964 | \includegraphics[height=0.8\textheight]{Figs/spreadsheet_colnames3.pdf}
965 |
966 | \note{
967 | Now the order of the columns have changed!
968 | Again, we need to be able to handle these sorts of changes.
969 |
970 | This also emphasizes the importance of relying on the names rather
971 | than positions of columns (or rows).
972 |
973 | I once had a project where the data were in a 500-worksheet excel
974 | file, one sheet per subject. Each sheet had a complex layout where
975 | you had to pick out various values from different places. And the
976 | order of the rows was different in the middle hundred sheets,
977 | versus the other 400 sheets.
978 | }
979 | \end{frame}
980 |
981 |
982 |
983 | \begin{frame}[c]{Metadata solution}
984 |
985 | \figh{Figs/file_metadata_example}{0.8}
986 |
987 | \note{
988 | My solution to these problems was to create a metadata file that
989 | indicated the names of the variables, what files to find them,
990 | what their names were in each file, and what the individual IDs
991 | names were in those files. I also needed an "offset" column,
992 | because in some cases it was like "the column two to the right
993 | of the column name \_\_\_\_\_."
994 | }
995 | \end{frame}
996 |
997 |
998 |
999 |
1000 | \begin{frame}{}
1001 |
1002 | \centering \Large
1003 |
1004 | \bigskip \bigskip \bigskip \bigskip
1005 | \bigskip \bigskip
1006 |
1007 | ``In what form would you like the data?''
1008 |
1009 | \only<2>{
1010 | \bigskip \bigskip \bigskip \bigskip
1011 |
1012 | \emph{\lolit The answer should always be} \\[10pt]
1013 | {\vhilit ``In its present form.''}
1014 | }
1015 |
1016 | \note{
1017 | To further emphasize here: the data scientist is always in a
1018 | better position to fix data formatting issues programmatically.
1019 |
1020 | If you're unhappy with the state of your collaborators' data
1021 | files, don't have them fix them, because they'll likely do so ``by
1022 | hand'' at the risk of introducing errors. Rather, deal with the
1023 | data files as they come, but then work with your collaborators to
1024 | develop a better system for the future.
1025 | }
1026 | \end{frame}
1027 |
1028 |
1029 |
1030 |
1031 |
1032 | \begin{frame}[c]{Reproducible reports}
1033 |
1034 |
1035 | \vspace*{8mm}
1036 |
1037 | \vspace*{-0.05\textheight}
1038 | \figw{Figs/example_Rmd.png}{0.92}
1039 | \onslide<2|handout 0>{
1040 | \vspace*{-0.70\textheight}
1041 | \hspace*{0.06\textwidth}
1042 | \figw{Figs/example_Rmd_source.png}{0.92}
1043 | }
1044 |
1045 | \note{I {\nhilit love} R Markdown for making reproducible reports that
1046 | document the full details of my analysis. R Markdown mixes Markdown
1047 | (for light-weight markup of text) and R code chunks; when processed
1048 | with knitr, the R code is executed and results inserted into the
1049 | final document.
1050 |
1051 | With these informal reports, I seek to fully capture the entirety of
1052 | my data explorations and decisions.
1053 |
1054 | Python people should look at Jupyter notebooks.
1055 | }
1056 | \end{frame}
1057 |
1058 |
1059 |
1060 |
1061 | \begin{frame}[fragile,c]{Automate the process (GNU Make)}
1062 |
1063 | \begin{center}
1064 | \begin{minipage}[c]{13.8cm}
1065 | \begin{semiverbatim}
1066 | \lstset{basicstyle=\footnotesize}
1067 | \begin{lstlisting}[escapechar=!,linewidth=13.8cm]
1068 | !{\color{foreground}{R/analysis.html}}!: !{\color{foreground}{R/analysis.Rmd Data/cleandata.csv}}!
1069 | !{\color{foreground}{ cd R;R -e "rmarkdown::render('analysis.Rmd')"}!
1070 |
1071 | Data/cleandata.csv: R/prepData.R RawData/rawdata.csv
1072 | cd R;R CMD BATCH prepData.R
1073 |
1074 | RawData/rawdata.csv: Python/xls2csv.py RawData/rawdata.xls
1075 | Python/xls2csv.py RawData/rawdata.xls > RawData/rawdata.csv
1076 | \end{lstlisting}
1077 | \end{semiverbatim}
1078 | \end{minipage}
1079 | \end{center}
1080 |
1081 | \note{GNU Make is an old (and rather quirky) tool for automating the
1082 | process of building computer programs. But it's useful much more
1083 | broadly, and I find it valuable for automating the full process of
1084 | data file manipulation, data cleaning, and analysis.
1085 |
1086 | In addition to {\nhilit automating} a complex process, it also
1087 | {\nhilit documents} the process, including the dependencies among
1088 | data files and scripts.
1089 | }
1090 | \end{frame}
1091 |
1092 |
1093 |
1094 | \begin{frame}[fragile,c]{Automate the process (GNU Make)}
1095 |
1096 | \addtocounter{framenumber}{-1}
1097 |
1098 | \begin{center}
1099 | \begin{minipage}[c]{13.8cm}
1100 | \begin{semiverbatim}
1101 | \lstset{basicstyle=\footnotesize}
1102 | \begin{lstlisting}[escapechar=!,linewidth=13.8cm]
1103 | !{\color{codehilit}{R/analysis.html}}!: !{\color{foreground}{R/analysis.Rmd Data/cleandata.csv}}!
1104 | !{\color{foreground}{ cd R;R -e "rmarkdown::render('analysis.Rmd')"}!
1105 |
1106 | Data/cleandata.csv: R/prepData.R RawData/rawdata.csv
1107 | cd R;R CMD BATCH prepData.R
1108 |
1109 | RawData/rawdata.csv: Python/xls2csv.py RawData/rawdata.xls
1110 | Python/xls2csv.py RawData/rawdata.xls > RawData/rawdata.csv
1111 | \end{lstlisting}
1112 | \end{semiverbatim}
1113 | \end{minipage}
1114 | \end{center}
1115 |
1116 | \end{frame}
1117 |
1118 |
1119 |
1120 | \begin{frame}[fragile,c]{Automate the process (GNU Make)}
1121 |
1122 | \addtocounter{framenumber}{-1}
1123 |
1124 | \begin{center}
1125 | \begin{minipage}[c]{13.8cm}
1126 | \begin{semiverbatim}
1127 | \lstset{basicstyle=\footnotesize}
1128 | \begin{lstlisting}[escapechar=!,linewidth=13.8cm]
1129 | !{\color{foreground}{R/analysis.html}}!: !{\color{codehilit}{R/analysis.Rmd Data/cleandata.csv}}!
1130 | !{\color{foreground}{ cd R;R -e "rmarkdown::render('analysis.Rmd')"}!
1131 |
1132 | Data/cleandata.csv: R/prepData.R RawData/rawdata.csv
1133 | cd R;R CMD BATCH prepData.R
1134 |
1135 | RawData/rawdata.csv: Python/xls2csv.py RawData/rawdata.xls
1136 | Python/xls2csv.py RawData/rawdata.xls > RawData/rawdata.csv
1137 | \end{lstlisting}
1138 | \end{semiverbatim}
1139 | \end{minipage}
1140 | \end{center}
1141 | \end{frame}
1142 |
1143 |
1144 |
1145 | \begin{frame}[fragile,c]{Automate the process (GNU Make)}
1146 |
1147 | \addtocounter{framenumber}{-1}
1148 |
1149 | \begin{center}
1150 | \begin{minipage}[c]{13.8cm}
1151 | \begin{semiverbatim}
1152 | \lstset{basicstyle=\footnotesize}
1153 | \begin{lstlisting}[escapechar=!,linewidth=13.8cm]
1154 | !{\color{foreground}{R/analysis.html}}!: !{\color{foreground}{R/analysis.Rmd Data/cleandata.csv}}!
1155 | !{\color{codehilit}{ cd R;R -e "rmarkdown::render('analysis.Rmd')"}}!
1156 |
1157 | Data/cleandata.csv: R/prepData.R RawData/rawdata.csv
1158 | cd R;R CMD BATCH prepData.R
1159 |
1160 | RawData/rawdata.csv: Python/xls2csv.py RawData/rawdata.xls
1161 | Python/xls2csv.py RawData/rawdata.xls > RawData/rawdata.csv
1162 | \end{lstlisting}
1163 | \end{semiverbatim}
1164 | \end{minipage}
1165 | \end{center}
1166 | \end{frame}
1167 |
1168 |
1169 |
1170 |
1171 |
1172 |
1173 |
1174 | \begin{frame}[c]{Write modular code}
1175 |
1176 | \bbi
1177 | \item Modular code is easier to understand, maintain, and reuse.
1178 | \item Turn repeated code into functions
1179 | \item Combine useful functions into a package or module
1180 | \ei
1181 |
1182 | \note{
1183 | Another important step towards reproducibility is to revise your
1184 | code to make it more clear.
1185 |
1186 | The single most important step towards clear code is to pull out
1187 | complex or repeated code as a separate function.
1188 | This makes your code easier to read and maintain.
1189 |
1190 | Next, combine those functions together into a package or module.
1191 | It's surprisingly easy to create an R package (see {\tt
1192 | https://kbroman.org/pkg\_primer}) and it's even easier to make a
1193 | Python module.
1194 |
1195 | When writing functions, try to write them in a somewhat-general
1196 | way and then pull them out of the project as separate package or
1197 | module, so that you (and/or others) may reuse them for other
1198 | purposes.
1199 | }
1200 |
1201 | \end{frame}
1202 |
1203 |
1204 |
1205 | \begin{frame}[c]{Keeping track of versions}
1206 |
1207 | \bbi
1208 | \item Google drive / Dropbox / Box
1209 |
1210 | \item Version numbers in file names
1211 |
1212 | \item Formal version control (e.g., git/GitHub)
1213 | \bi
1214 | \item Browse changes
1215 | \item Try new things without fear of breaking what works
1216 | \item Jump to the state of the project at any time point
1217 | \item Merge simultaneous changes from multiple people
1218 | \ei
1219 | \ei
1220 |
1221 |
1222 | \note{
1223 | We all struggle to keep track of versions of things.
1224 |
1225 | Shared drives (like google drive, dropbox, and box) often keep
1226 | track of past versions, but usually there's a time limit (like
1227 | 30 days or a year).
1228 |
1229 | You can make copies of file with a version number appended to
1230 | the name. You might zip up a directory and include the date in the
1231 | zipped file.
1232 |
1233 | Formal version control has a number of advantages, including easy
1234 | of browsing the history or jumping to a particular time point.
1235 | The ability to merge simultaneous changes from multiple users is a
1236 | key advantage.
1237 |
1238 | git can be hard to learn; it's designed for pretty hard-core
1239 | programmers. But there are growing learning resources, and the
1240 | long-term payoff is considerable. For collaborative projects, the
1241 | payoff is immediate.
1242 | }
1243 |
1244 | \end{frame}
1245 |
1246 |
1247 |
1248 |
1249 | \begin{frame}[c]{Version control (git/GitHub)}
1250 |
1251 | \only<1>{\addtocounter{framenumber}{-1}}
1252 |
1253 | \centering
1254 |
1255 | \only<1-2>{\figh{Figs/example_repo}{0.80}}
1256 | \onslide<2>{
1257 | \vspace{-0.65\textheight}
1258 | \figh{Figs/example_repo_zoom}{0.55}
1259 | }
1260 | \end{frame}
1261 |
1262 |
1263 | \begin{frame}[c]{Version control (git/GitHub)}
1264 |
1265 | \vspace*{3mm}
1266 |
1267 | \centering
1268 |
1269 | \only<1|handout 0>{\figh{Figs/example_history}{0.80}}
1270 | \only<2>{\figh{Figs/example_commit}{0.80}}
1271 | \only<3|handout 0>{\figh{Figs/example_commit_zoom}{0.80}}
1272 |
1273 | \note{
1274 | git has a steep learning curve, but ultimately I think you'll find
1275 | it really helpful.
1276 |
1277 | The big selling point is in collaboration: merging changes from
1278 | collaborators, and keep your work synchronized.
1279 |
1280 | Longer term, there's great value in having the entire history of
1281 | changes to your project. If something stops working, you can go
1282 | back to any point in that history to see when it stopped working and
1283 | why.
1284 |
1285 | With git, you can also work on new features or analyses without fear
1286 | of breaking the parts that are currently working well.
1287 | }
1288 | \end{frame}
1289 |
1290 |
1291 |
1292 |
1293 | \begin{frame}[c]{Backups}
1294 |
1295 | \bbi
1296 | \item Multiple places, including off-site
1297 |
1298 | \item Automatic
1299 | \ei
1300 |
1301 | \note{
1302 | I can't emphasize enough the importance of backups. And you must
1303 | have a copy off-site. And if it's not automatic, it won't happen.
1304 | }
1305 |
1306 | \end{frame}
1307 |
1308 |
1309 |
1310 |
1311 |
1312 |
1313 | \begin{frame}{License your software}
1314 |
1315 | \vspace{60pt}
1316 |
1317 | \centerline{\large Pick a license, any license}
1318 |
1319 | \vspace{18pt}
1320 |
1321 | \hfill
1322 | {\textendash} \href{https://blog.codinghorror.com/pick-a-license-any-license/}{Jeff Atwood}
1323 |
1324 | \note{
1325 | If you don't pick a license for your software, no one else can use it.
1326 |
1327 | So if you want to distribute your code so that others can reproduce
1328 | your analyses, you need to pick a license, any license.
1329 |
1330 | I choose between the MIT license and the GPL.
1331 |
1332 | Don't use the Creative Commons licenses for code. But feel free to
1333 | use them for other things.
1334 | }
1335 | \end{frame}
1336 |
1337 |
1338 |
1339 |
1340 |
1341 | \begin{frame}[c]{Share your stuff}
1342 |
1343 |
1344 | \bbi
1345 | \item Code
1346 | \bi
1347 | \item GitHub / BitBucket
1348 | \item Zenodo (archival, with DOIs)
1349 | \ei
1350 |
1351 | \item Data
1352 | \bi
1353 | \item Domain-specific repository {\lolit (e.g., dbGAP)}
1354 | \item General repository {\lolit (e.g., github, figshare, zenodo, datadryad)}
1355 | \item Institutional repository
1356 | \ei
1357 |
1358 | \ei
1359 |
1360 | \note{
1361 | A reproducible workflow is valuable even if you don't intend to
1362 | share your work with others.
1363 |
1364 | But if do want to share, it's best to place things at a
1365 | third-party site. Ideally one that can be trusted as an archive
1366 | and that provides DOIs.
1367 |
1368 | Place code at GitHub (or the similar site, BitBucket). The only problem
1369 | is that it can't necessarily be trusted to still be there 5 years from now.
1370 | There's an easy way to have ``releases'' archived at zenodo.org
1371 | automatically, with a DOI. So I recommend that.
1372 |
1373 | For data, it's probably best to use a domain-specific repository,
1374 | if there is an appropriate one. Otherwise, general repositories
1375 | github, figshare, zenodo, or datadryad. Again, github is not ideal
1376 | because it's not archival and doesn't give DOIs.
1377 | }
1378 |
1379 | \end{frame}
1380 |
1381 |
1382 |
1383 |
1384 |
1385 |
1386 | \begin{frame}[c]{Summary}
1387 |
1388 | \begin{enumerate}
1389 | \item Organize your project
1390 | \item Choose good names for things
1391 | \item Document what's what
1392 | \item Organize data as a rectangle
1393 | \item Metadata is data
1394 | \item Everything with a script
1395 | \item Even better: reproducible reports
1396 | \item Automate the process {\lolit (GNU Make)}
1397 | \item Write modular code {\lolit (functions and packages)}
1398 | \item Use version control {\lolit (git/GitHub)}
1399 | \item License your software
1400 | \item Share your data and code
1401 | \end{enumerate}
1402 |
1403 |
1404 | \note{
1405 | Summaries are always good.
1406 |
1407 | Again, don't try to change everything at once.
1408 | Reproducibility can be surprisingly hard and requires a daily
1409 | commitment. And here I'm just thinking about a project with a single
1410 | data analyst. A collaboration with multiple analysts is yet harder.
1411 | }
1412 | \end{frame}
1413 |
1414 |
1415 | \begin{frame}[c]{Other considerations}
1416 |
1417 | \begin{itemize}
1418 | \itemsep12pt
1419 | \item Testing
1420 | \begin{itemize}
1421 | \item[] {\lolit are you getting the right answers?}
1422 | \end{itemize}
1423 | \item Software versions
1424 | \begin{itemize}
1425 | \item[] {\lolit will your stuff work when dependencies change?}
1426 | \end{itemize}
1427 | \item Large-scale computations
1428 | \begin{itemize}
1429 | \item[] {\lolit computation time + dependence on cluster environment}
1430 | \end{itemize}
1431 | \item Collaborations
1432 | \begin{itemize}
1433 | \item[] {\lolit coordinating who does what and where things live}
1434 | \end{itemize}
1435 | \end{itemize}
1436 |
1437 | \note{I've focused on issues for small-scale, single-investigator
1438 | projects, and even with that limited scope, I've not covered
1439 | everything.}
1440 |
1441 | \end{frame}
1442 |
1443 |
1444 |
1445 | \begin{frame}[c]{}
1446 |
1447 | \centering
1448 | \vspace{80pt}
1449 |
1450 | It's not that we don't test our code, \\[8pt]
1451 | it's that we don't store our tests \\
1452 | so they can be re-run automatically.
1453 |
1454 | \vspace{36pt}
1455 |
1456 | \hfill \lolit {\textendash} Hadley Wickham \hspace{13mm}
1457 |
1458 | \vspace{45pt}
1459 | {\footnotesize
1460 | \hfill \href{http://journal.r-project.org/archive/2011-1/RJournal_2011-1_Wickham.pdf}{R Journal 3(1):5{\textendash}10, 2011}
1461 | }
1462 |
1463 | \note{
1464 | This is from Hadley's paper about his {\tt testthat} package.
1465 | }
1466 | \end{frame}
1467 |
1468 |
1469 | \begin{frame}[c]{Testing and debugging}
1470 |
1471 | \hspace{2cm} find a bug \hspace{2mm} $\longrightarrow$ \hspace{3mm} {\hilit write a test}
1472 |
1473 | \bigskip
1474 | \bigskip
1475 | \bigskip
1476 | \bigskip
1477 |
1478 | \onslide<2>{
1479 | \hspace{2cm} fix a bug \hspace{4.3mm} $\longrightarrow$ \hspace{3mm} {\hilit look for other instances of that mistake}
1480 | }
1481 |
1482 | \note{
1483 | A few quick points about software testing: the goal is to identify
1484 | bugs earlier when they are easier to fix. (It is closer in time to
1485 | when you introduced them.)
1486 |
1487 | When you find a bug, first write a test to reproduce the problem.
1488 | Then it's clear when you've fixed it.
1489 |
1490 | When you do fix it, search through the rest of the code base to see
1491 | if you can find other instances of that mistake. We often make the
1492 | same mistakes repeatedly.
1493 | }
1494 |
1495 | \end{frame}
1496 |
1497 |
1498 |
1499 | \begin{frame}[c]{Collaboration}
1500 |
1501 | \bbi
1502 | \item Do more, by working in parallel
1503 | \item Do more, through diversity of ideas and skills
1504 | \item Reproducible pipelines have immediate advantages
1505 | \item Tests of reproducibility
1506 | \item Code review
1507 | \ei
1508 |
1509 | \note{
1510 | Collaboration has a lot of advantages, including for
1511 | reproducibility efforts.
1512 |
1513 | It can be useful to have a pair of people regularly review each
1514 | other's code, but it can be hard to get your busy friends to pay
1515 | attention to your little project. But if you are working together
1516 | on a project, you can more naturally build in some code review.
1517 |
1518 | Moreover, you can explicitly test the reproducibility of your
1519 | analyses, by having your collaborator rerun your work, and vice
1520 | versa.
1521 | }
1522 |
1523 | \end{frame}
1524 |
1525 |
1526 |
1527 | \begin{frame}[c]{Challenges in collaborations}
1528 |
1529 | \bbi
1530 | {\only<2-|handout 0>{\lolit }
1531 | \item Shared vision?
1532 | \item Compromise
1533 | \item Coordination
1534 | \item Communication
1535 | \item Sharing code and data
1536 | \item Synchronization
1537 | }
1538 | \onslide<2->{\item Weakest link?}
1539 | \ei
1540 |
1541 | \note{
1542 | Collaboration also has challenges.
1543 |
1544 | Do you have a shared vision for the reproducibility of the
1545 | project? You'll no doubt need to make some compromises about how
1546 | things are done: you can't both just do things the way you've
1547 | always done them. Careful coordination and regular communication
1548 | are key.
1549 |
1550 | And then there are the technical challenges of how to share the
1551 | code and data and make sure your two working projects remain in
1552 | sync.
1553 |
1554 | In a sense, the reproducibility of a collaborative project is
1555 | dependent on the weakest link. If one collaborator refuses to
1556 | fully participate and share their work, the chain is broken.
1557 | }
1558 |
1559 | \end{frame}
1560 |
1561 |
1562 |
1563 |
1564 |
1565 | \begin{frame}[c]{}
1566 |
1567 |
1568 | \begin{center}
1569 | \Large
1570 | {\color{title} Challenges} \\[24pt]
1571 | {\lolit \large (totally hypothetical)}
1572 | \end{center}
1573 |
1574 | \note{
1575 | A collaboration like this will pose many challenges. The following are
1576 | {\nhilit totally hypothetical}. Really.
1577 | }
1578 |
1579 | \end{frame}
1580 |
1581 |
1582 |
1583 |
1584 |
1585 | \begin{frame}[c]{}
1586 |
1587 | \begin{center}
1588 | \Large
1589 |
1590 | ``Could we meet to talk about the data file structure?'' \\[36pt]
1591 | \onslide<2->{``No.''}
1592 | \end{center}
1593 |
1594 | \note{
1595 | Say the first of many sets of data are set up in a way that is
1596 | complicated to handle, both in data entry and for analysis. Will
1597 | your collaborator work with you to refine things?
1598 |
1599 | Or will every new data file require a day of work, so that it can be
1600 | combined with prior data?
1601 | }
1602 |
1603 | \end{frame}
1604 |
1605 |
1606 |
1607 |
1608 |
1609 |
1610 | \begin{frame}[c]{}
1611 |
1612 | \begin{center}
1613 | \Large
1614 |
1615 |
1616 | ``Wait, these results seem to be based \\
1617 | on the older SNP map.''
1618 |
1619 | \end{center}
1620 |
1621 | \note{
1622 | It can be hard to keep in sync across groups in a multi-site
1623 | project. If a problem is discovered and some aspect of data
1624 | preprocessing needs to be redone, will this get communicated to all
1625 | analysis teams, so that relevant analyses get rerun as needed?
1626 | }
1627 |
1628 | \end{frame}
1629 |
1630 |
1631 |
1632 |
1633 |
1634 | \begin{frame}[c]{}
1635 |
1636 | \begin{center}
1637 | \Large
1638 |
1639 |
1640 | ``Could you write the methods section?'' \\[36pt]
1641 | ``But I didn't do the work, \\
1642 | and we don't have the code that was used.''
1643 |
1644 |
1645 |
1646 | \end{center}
1647 |
1648 | \note{
1649 | Are all teams sharing their work with each other?
1650 | }
1651 |
1652 | \end{frame}
1653 |
1654 |
1655 |
1656 |
1657 |
1658 | \begin{frame}[c]{}
1659 |
1660 | \begin{center}
1661 | \Large
1662 |
1663 |
1664 | ``My data analyst has taken a job at Google.''
1665 |
1666 | \end{center}
1667 |
1668 | \note{
1669 | What happens if a key data analyst leaves the project?
1670 | }
1671 |
1672 | \end{frame}
1673 |
1674 |
1675 |
1676 | \begin{frame}[c]{}
1677 |
1678 | \begin{center}
1679 | \Large
1680 |
1681 |
1682 | ``Could you do these analyses? X said they would, but they're not
1683 | responding to my emails.''
1684 |
1685 | \end{center}
1686 |
1687 | \note{
1688 | Everyone has multiple things going on, and sometimes there is need
1689 | for rush analyses, say for a grant submission or conference
1690 | presentation. Is there a shared understanding of who will do what
1691 | when, and how emergencies can be handled?
1692 |
1693 | The organization of a project often depends on the worst day you
1694 | spent on it. If you need to do a bunch of stuff last-minute, will
1695 | you leave the project directory in a mess, or will you clean up
1696 | after yourself?
1697 | }
1698 |
1699 | \end{frame}
1700 |
1701 |
1702 |
1703 |
1704 | \begin{frame}[c]{Shared vision}
1705 |
1706 | \bbi
1707 | \item Publication
1708 | \item Code \& data sharing
1709 | \item Who will do what
1710 | \item Timeline
1711 | \item Ongoing sharing of methods, results
1712 | \ei
1713 |
1714 | \note{
1715 | Critical for a successful collaboration is that the collaborators
1716 | have a shared vision for the project. We often maybe think about
1717 | being in agreement on the approach to publication and co-authorship.
1718 | But perhaps more difficult is coming to an agreement on data and
1719 | code sharing (what, where, and when?), on who will do what, on how
1720 | soon it will be done, and on the ongoing sharing, among collaborators,
1721 | of detailed methods and results.
1722 | }
1723 |
1724 | \end{frame}
1725 |
1726 |
1727 |
1728 | \begin{frame}[c]{Shared workspace}
1729 |
1730 | \bbi
1731 | \item Project structure
1732 | \item Data and metadata formats
1733 | \item Software environment
1734 | \item Automated sync {\lolit (or it won't happen)}
1735 | \ei
1736 |
1737 | \note{
1738 | Also important is the technology or engineering of sharing. Can
1739 | the collaborators agree on the project structure, data and
1740 | metadata formats, and the software environment?
1741 |
1742 | Some groups may use R and some python. This should not pose a problem.
1743 |
1744 | A key issue is how to keep the multiple groups' work in sync. It
1745 | is best that this can be done automatically. Experience
1746 | demonstrates that if synchronization approach requires some manual
1747 | steps, they will not be done consistently.
1748 | }
1749 |
1750 | \end{frame}
1751 |
1752 |
1753 |
1754 |
1755 | \begin{frame}[c]{Technology for sharing}
1756 |
1757 | \bbi
1758 | \item Data
1759 | \bi
1760 | \item figshare
1761 | \item dropbox / box / google drive
1762 | \ei
1763 | \item Code
1764 | \bi
1765 | \item github / bitbucket
1766 | \ei
1767 | \item Pipeline / workflow
1768 | \bi
1769 | \item make / drake / snakemake / rake
1770 | \ei
1771 | \item Full environment
1772 | \bi
1773 | \item docker containers
1774 | \item \href{https://mybinder.org}{\tt mybinder.org} /
1775 | \href{https://wholetale.org}{\tt wholetale.org}
1776 | \ei
1777 | \ei
1778 |
1779 | \note{
1780 | I must admit to not being totally confident about what advice to
1781 | give, regarding the tools to use for sharing data and code among
1782 | collaborators.
1783 |
1784 | For sharing data, simple options include posting large files on a
1785 | data repository like figshare, or using cloud drive like dropbox,
1786 | box, or google drive.
1787 |
1788 | For sharing code, I prefer to use a version control system like git,
1789 | with github, bitbucket, or a locally-managed equivalent.
1790 |
1791 | For sharing the analysis pipeline or workflow, one can
1792 | incorporate a system like make (or drake, snakemake, or rake) with
1793 | the code.
1794 |
1795 | The full software environment could be replicated across teams using
1796 | docker containers. Binder and Whole Tale are two systems for making
1797 | this easier.
1798 | }
1799 |
1800 | \end{frame}
1801 |
1802 |
1803 |
1804 |
1805 | \begin{frame}[c]{}
1806 |
1807 | \begin{center}
1808 | \large
1809 | The most important tool is the {\hilit mindset},\\
1810 | when starting, that the end product \\
1811 | will be reproducible.
1812 | \end{center}
1813 |
1814 | \hfill
1815 | {\lolit
1816 | {\textendash} \href{https://odin.mdacc.tmc.edu/~kabaggerly/}{Keith Baggerly}
1817 | }
1818 |
1819 | \note{So true. Desire for reproducibility is step one.
1820 | }
1821 | \end{frame}
1822 |
1823 |
1824 |
1825 | \begin{frame}[c]{}
1826 |
1827 | \begin{center}
1828 | \large
1829 | The second-most important tool is {\hilit training}.
1830 | \end{center}
1831 |
1832 | \vspace*{12mm}
1833 |
1834 | \hfill
1835 | {\lolit
1836 | {\textendash} \href{https://kbroman.org}{me} \hspace*{20mm}
1837 | }
1838 |
1839 | \note{
1840 | I've long felt that the key need, in getting computational
1841 | scientists to adopt a reproducible workflow, is training. For the
1842 | most part, all of the software tools are available, but many people
1843 | haven't incorporated them into their daily work.
1844 | }
1845 | \end{frame}
1846 |
1847 |
1848 |
1849 |
1850 | \begin{frame}[c]{}
1851 |
1852 | \Large
1853 |
1854 | Slides: \href{https://bit.ly/steps2rr.pdf}{\tt bit.ly/steps2rr} \quad
1855 | \includegraphics[height=5mm]{Figs/cc-zero.png}
1856 |
1857 | \vspace{10mm}
1858 |
1859 | \href{https://kbroman.org}{\tt kbroman.org}
1860 |
1861 | \vspace{10mm}
1862 |
1863 | \href{https://github.com/kbroman}{\tt github.com/kbroman}
1864 |
1865 | \vspace{10mm}
1866 |
1867 | \href{https://rstats.me/@kbroman}{\tt @kbroman@rstats.me}
1868 |
1869 |
1870 | \note{
1871 | Here's where you can find me, as well as the slides for this talk.
1872 | }
1873 | \end{frame}
1874 |
1875 |
1876 |
1877 |
1878 | \end{document}
1879 |
--------------------------------------------------------------------------------