├── .github └── workflows │ └── publish.yml ├── .gitignore ├── _freeze ├── posts │ ├── 2022-10-27-build-website │ │ ├── diamond-sizes │ │ │ ├── execute-results │ │ │ │ └── html.json │ │ │ └── figure-html │ │ │ │ └── plot-smaller-diamonds-1.png │ │ └── index │ │ │ ├── execute-results │ │ │ └── html.json │ │ │ └── figure-html │ │ │ ├── fig-mtcars-1.png │ │ │ └── unnamed-chunk-4-1.png │ ├── 2022-11-01-command-line-part-1 │ │ └── index │ │ │ └── execute-results │ │ │ └── html.json │ ├── 2022-11-03-command-line-part-2 │ │ └── index │ │ │ └── execute-results │ │ │ └── html.json │ ├── 2022-11-15-object-oriented-programming │ │ └── index │ │ │ └── execute-results │ │ │ └── html.json │ ├── 2022-11-17-r-pkg-dev-part-1 │ │ └── index │ │ │ ├── execute-results │ │ │ └── html.json │ │ │ └── figure-html │ │ │ └── unnamed-chunk-2-1.png │ ├── 2022-11-29-purrr-fun-programming │ │ └── index │ │ │ └── execute-results │ │ │ └── html.json │ ├── 2022-12-01-pkgdown-pkg-website │ │ └── index │ │ │ └── execute-results │ │ │ └── html.json │ ├── 2022-12-01-targets-proj-workflows │ │ └── index │ │ │ └── execute-results │ │ │ └── html.json │ ├── 2022-12-06-gettingdata-api │ │ └── index │ │ │ └── execute-results │ │ │ └── html.json │ ├── 2022-12-08-relational-databases │ │ └── index │ │ │ ├── execute-results │ │ │ └── html.json │ │ │ └── figure-html │ │ │ ├── unnamed-chunk-14-1.png │ │ │ ├── unnamed-chunk-15-1.png │ │ │ ├── unnamed-chunk-16-1.png │ │ │ ├── unnamed-chunk-17-1.png │ │ │ ├── unnamed-chunk-21-1.png │ │ │ └── unnamed-chunk-22-1.png │ ├── 2022-12-13-flexdashboard │ │ └── index │ │ │ └── execute-results │ │ │ └── html.json │ ├── 2022-12-15-web-rvest │ │ └── index │ │ │ └── execute-results │ │ │ └── html.json │ ├── 2022-12-20-dealing-with-large-data │ │ └── index │ │ │ ├── execute-results │ │ │ └── html.json │ │ │ └── figure-html │ │ │ └── unnamed-chunk-30-1.png │ └── 2022-12-20-profiling-r-code │ │ └── index │ │ └── execute-results │ │ └── html.json ├── projects │ ├── 2022-10-27-project-1 │ │ └── index │ │ │ └── execute-results │ │ │ └── html.json │ ├── 2022-11-08-project-2 │ │ └── index │ │ │ └── execute-results │ │ │ └── html.json │ ├── 2022-11-22-project-3 │ │ └── index │ │ │ └── execute-results │ │ │ └── html.json │ └── 2022-12-06-project-4 │ │ └── index │ │ └── execute-results │ │ └── html.json ├── schedule │ └── execute-results │ │ └── html.json ├── site_libs │ ├── clipboard │ │ └── clipboard.min.js │ └── quarto-listing │ │ ├── list.min.js │ │ └── quarto-listing.js └── syllabus │ └── execute-results │ └── html.json ├── _post_template.qmd ├── _quarto.yml ├── data ├── Chinook.sqlite ├── SRR1039508_subset_1.fastq ├── SRR1039509_subset_1.fastq ├── SRR1039512_subset_1.fastq ├── SRR1039513_subset_1.fastq └── nycflights13 │ └── nycflights13.sqlite ├── images ├── cool_icon.png ├── course_img.R ├── course_img.png ├── quarto_schematic.png ├── rstudio-hello.png ├── rstudio-qmd-how-it-works.png ├── rstudio-render-button.png ├── rstudio-render-on-save.png ├── rstudio-render.png ├── rstudio-source-visual.png ├── screen │ ├── BuildMenu.png │ ├── BuildPackage.png │ ├── BuildSourcePackage.png │ ├── ConfigureBuild.png │ ├── ConfigureButton.png │ ├── EmptyRScript.png │ ├── FileMenu.png │ ├── HelloPackage.png │ ├── InstallRestart.png │ ├── NewDirectory.png │ ├── NewPackageOpening.png │ ├── NewProject.png │ ├── PackageBuilt.png │ ├── PackageName.png │ ├── SaveRScript.png │ └── UseDevTools.png ├── shiny.png ├── ssh_protocol.png ├── targets │ ├── Addins.png │ ├── ModifyShortcuts.png │ └── ShortcutsMenu.png └── terminal.png ├── index.qmd ├── jhustatprogramming2022.Rproj ├── lectures.qmd ├── posts ├── 2022-10-27-build-website │ ├── diamond-sizes.qmd │ └── index.qmd ├── 2022-11-01-command-line-part-1 │ ├── analysis.R │ ├── combined_names.txt │ ├── index.qmd │ ├── package_names.txt │ ├── r_release.txt │ ├── release_names.txt │ ├── secret_directory │ │ └── team_standings.csv │ ├── soccer_directory │ │ └── team_standings.csv │ ├── team_standings.csv │ └── team_standings_3.csv ├── 2022-11-03-command-line-part-2 │ ├── creatures │ │ ├── basilisk.dat │ │ ├── minotaur.dat │ │ └── unicorn.dat │ ├── index.qmd │ ├── lengths.txt │ └── proteins │ │ ├── cubane.pdb │ │ ├── ethane.pdb │ │ ├── methane.pdb │ │ ├── octane.pdb │ │ ├── pentane.pdb │ │ └── propane.pdb ├── 2022-11-08-version-control-part-1 │ └── index.qmd ├── 2022-11-10-version-control-part-2 │ └── index.qmd ├── 2022-11-15-object-oriented-programming │ └── index.qmd ├── 2022-11-17-r-pkg-dev-part-1 │ └── index.qmd ├── 2022-11-22-r-pkg-dev-part-2 │ └── index.qmd ├── 2022-11-29-purrr-fun-programming │ └── index.qmd ├── 2022-12-01-pkgdown-pkg-website │ └── index.qmd ├── 2022-12-01-targets-proj-workflows │ └── index.qmd ├── 2022-12-06-gettingdata-api │ └── index.qmd ├── 2022-12-08-relational-databases │ ├── data │ │ └── survey.db │ ├── index.qmd │ └── survey.db ├── 2022-12-13-flexdashboard │ ├── Diamond_dashboard_example.Rmd │ ├── GeyserFlexDashboard.Rmd │ ├── MPGFlexDashboard.Rmd │ ├── example_dashboard.png │ ├── example_dashboard_code.png │ └── index.qmd ├── 2022-12-15-web-rvest │ ├── images │ │ ├── selectorgadget-click.png │ │ ├── selectorgadget-hover.png │ │ ├── selectorgadget-remove.png │ │ └── selectorgadget-too-many.png │ └── index.qmd ├── 2022-12-20-dealing-with-large-data │ └── index.qmd ├── 2022-12-20-profiling-r-code │ └── index.qmd └── _metadata.yml ├── profile.jpg ├── projects.qmd ├── projects ├── 2022-10-27-project-1 │ └── index.qmd ├── 2022-11-08-project-2 │ └── index.qmd ├── 2022-11-22-project-3 │ └── index.qmd └── 2022-12-06-project-4 │ └── index.qmd ├── resources.qmd ├── schedule.qmd ├── styles.css └── syllabus.qmd /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | on: 2 | workflow_dispatch: 3 | push: 4 | branches: main 5 | 6 | name: Quarto Publish 7 | 8 | jobs: 9 | build-deploy: 10 | runs-on: ubuntu-latest 11 | permissions: 12 | contents: write 13 | steps: 14 | - name: Check out repository 15 | uses: actions/checkout@v2 16 | 17 | - name: Set up Quarto 18 | uses: quarto-dev/quarto-actions/setup@v2 19 | 20 | - name: Render and Publish 21 | uses: quarto-dev/quarto-actions/publish@v2 22 | with: 23 | target: gh-pages 24 | env: 25 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 26 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .Ruserdata 5 | posts/2022-12-20-profiling-r-code/*.out 6 | 7 | /.quarto/ 8 | /_site/ 9 | -------------------------------------------------------------------------------- /_freeze/posts/2022-10-27-build-website/diamond-sizes/execute-results/html.json: -------------------------------------------------------------------------------- 1 | { 2 | "hash": "fd97ffafb0210365b2f8581ea1a02600", 3 | "result": { 4 | "markdown": "---\ntitle: \"Diamond sizes\"\ndate: 2022-09-12\nformat: html\ndraft: true\n---\n\n\n\n\nWe have data about 53940 diamonds.\nOnly 126 are larger than 2.5 carats.\nThe distribution of the remainder is shown below:\n\n\n::: {.cell}\n::: {.cell-output-display}\n![](diamond-sizes_files/figure-html/plot-smaller-diamonds-1.png){width=672}\n:::\n:::\n", 5 | "supporting": [ 6 | "diamond-sizes_files" 7 | ], 8 | "filters": [ 9 | "rmarkdown/pagebreak.lua" 10 | ], 11 | "includes": {}, 12 | "engineDependencies": {}, 13 | "preserve": {}, 14 | "postProcess": true 15 | } 16 | } -------------------------------------------------------------------------------- /_freeze/posts/2022-10-27-build-website/diamond-sizes/figure-html/plot-smaller-diamonds-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatprogramming2022/56ba8223305d93ac9da8e88f4673e924cae4faa0/_freeze/posts/2022-10-27-build-website/diamond-sizes/figure-html/plot-smaller-diamonds-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-27-build-website/index/figure-html/fig-mtcars-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatprogramming2022/56ba8223305d93ac9da8e88f4673e924cae4faa0/_freeze/posts/2022-10-27-build-website/index/figure-html/fig-mtcars-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-27-build-website/index/figure-html/unnamed-chunk-4-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatprogramming2022/56ba8223305d93ac9da8e88f4673e924cae4faa0/_freeze/posts/2022-10-27-build-website/index/figure-html/unnamed-chunk-4-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-11-17-r-pkg-dev-part-1/index/execute-results/html.json: -------------------------------------------------------------------------------- 1 | { 2 | "hash": "393463ff329c030cc8a5c6211657afc4", 3 | "result": { 4 | "markdown": "---\ntitle: \"Building R packages\"\nauthor: \n - name: Stephanie Hicks\n url: https://stephaniehicks.com\n affiliation: Department of Biostatistics, Johns Hopkins\n affiliation_url: https://publichealth.jhu.edu\ndescription: \"Introduction to building and documenting R packages\"\ndate: 2022-11-17\ncategories: [module 2, week 4, R, programming, documentation, R package, functions]\n---\n\n\n\n# Pre-lecture materials\n\n### Read ahead\n\n::: callout-note\n## Read ahead\n\n**Before class, you can prepare by reading the following materials:**\n\n1. \n2. \n\n:::\n\n### Prerequisites \n\nBefore starting you **must** install two additional packages:\n\n* `devtools` - this provides many additional tools for building packages\n* `roxygen2` - this provides tools for writing documentation\n\nYou can do this by calling\n\n\n::: {.cell}\n\n```{.r .cell-code}\ninstall.packages(c(\"devtools\", \"roxygen2\"))\n```\n:::\n\n\nor use the \"Install Packages...\" option from the \"Tools\" menu in RStudio.\n\n### Acknowledgements\n\nMaterial for this lecture was borrowed and adopted from\n\n- \n\n# Learning objectives\n\n::: callout-note\n# Learning objectives\n\n**At the end of this lesson you will:**\n\n- Create an empty R package within RStudio\n- Design a R function and write documentation\n- Describe what a `DESCRIPTION` file is and what goes in it\n- Be able to build and install a R package\n\n:::\n\n# Introduction\n\nThis lesson will cover how to build R packages using R and RStudio. Using RStudio for this lesson will be critical as RStudio includes a number of tools that make building R packages *much* simpler. \n\nFor the purposes of demonstration in this lesson, we will be building a package called `greetings` that has a single function called `hello()`. The `hello()` function takes a single argument called `name` (which is required) and makes a plot containing a message directed at `name`. For example,\n\n\n::: {.cell}\n\n```{.r .cell-code}\nlibrary(greetings)\nhello(\"Stephanie\")\n```\n\n::: {.cell-output-display}\n![](index_files/figure-html/unnamed-chunk-2-1.png){width=672}\n:::\n:::\n\n\nAdmittedly, this is not a useful package, but it allows us to demonstrate all of the necessary ingredients for building a simple R package.\n\n## Create a New R Package Project\n\n\nCreating a new R packages begins with creating a new RStudio project.\n\n\n::: {.cell layout-align=\"center\"}\n::: {.cell-output-display}\n![A screenshot of starting a New Project in R](../../images/screen/NewProject.png){fig-align='center' width=476}\n:::\n:::\n\n\n\nYou should choose **New Directory** as this will be a brand new project (not an existing project).\n\n\n::: {.cell layout-align=\"center\"}\n::: {.cell-output-display}\n![A screenshot of starting a New Directory for a New Project in R](../../images/screen/NewDirectory.png){fig-align='center' width=541}\n:::\n:::\n\n\nNext, you should choose the \"Project Type\" as **R Package using devtools** (you may need to scroll down a little in that menu).\n\n\n::: {.cell layout-align=\"center\"}\n::: {.cell-output-display}\n![Create a R package using devtools](../../images/screen/UseDevtools.png){fig-align='center' width=536}\n:::\n:::\n\n\nFinally, you should specify the name of your package. For this example, we will use **greetings** as the name of the package. Also you should double check the name of the sub-directory listed under \"Create project as subdirectory of\" is a directory that you can find. \n\n\n:::{.callout-tip}\n\n### Note \n\nThe name of this directory **should not have any spaces in its name**. \n\n:::\n\n\n\n::: {.cell layout-align=\"center\"}\n::: {.cell-output-display}\n![Package name](../../images/screen/PackageName.png){fig-align='center' width=540}\n:::\n:::\n\n\n\nClick \"Create Project\" and allow R and RStudio to restart. You should get a brand new session. You will also see a window with a series of tabs. One of those tabs will be called **Build** and that will be important as we build our package.\n\n\n::: {.cell layout-align=\"center\"}\n::: {.cell-output-display}\n![Build menu](../../images/screen/BuildMenu.png){fig-align='center' width=781}\n:::\n:::\n\n\n\n## Configure Build Tools\n\nThe next step after creating a new project is to configure your build tools. Click on the **Build** tab and then **More** and then **Configure Build Tools...**.\n\n\n::: {.cell layout-align=\"center\"}\n::: {.cell-output-display}\n![Configure Build tools](../../images/screen/ConfigureBuild.png){fig-align='center' width=430}\n:::\n:::\n\n\nIn the next screen, you should make sure that the check box for **Generate documentation with Roxygen** is checked. Then click the **Configure...** button.\n\n\n::: {.cell layout-align=\"center\"}\n::: {.cell-output-display}\n![Click the Configure button](../../images/screen/ConfigureButton.png){fig-align='center' width=550}\n:::\n:::\n\n\nIn the next menu, make sure to check the check box for **Install and Restart**.\n\n\n::: {.cell layout-align=\"center\"}\n::: {.cell-output-display}\n![Check box for Install and Restart](../../images/screen/InstallRestart.png){fig-align='center' width=545}\n:::\n:::\n\n\n\nThen click \"OK\" and then \"OK\" again to exit the options menu.\n\n\n\n\n## R Package Files\n\nIn this session, there will be the following files listed in the file browser.\n\n\n::: {.cell layout-align=\"center\"}\n::: {.cell-output-display}\n![Files are listed in the File browser](../../images/screen/FileMenu.png){fig-align='center' width=362}\n:::\n:::\n\n\n\nThe files we will focus on here are \n\n* the `DESCRIPTION` file; and \n* any files in the `R` sub-directory. This package will only have one R script in the `R` sub-directory.\n\nThere is no need to worry about the other files for now.\n\n# Edit main files\n\nNow, we need to write the R code and documentation for our one function in this package. \n\n## Add a R Script file\n\nFirst, create an R script in which the R code will go. You can do this by clicking on **File > New File > R Script**. \n\n\n::: {.cell layout-align=\"center\"}\n::: {.cell-output-display}\n![Saving a new R Script file called hello.R](../../images/screen/SaveRScript.png){fig-align='center' width=487}\n:::\n:::\n\n\n:::{.callout-tip}\n\n### Note \n\nMake sure that your R script is saved inside the `R/` sub-directory.\n\n:::\n\n\n::: {.cell layout-align=\"center\"}\n::: {.cell-output-display}\n![R Script file must be in the R/ sub-directory](../../images/screen/EmptyRScript.png){fig-align='center' width=1130}\n:::\n:::\n\n\nNext, once you have your R Script created, you can start to write the function and the **documentation**. \n\n:::{.callout-tip}\n\n### Note \n\nThe idea is that when you write a function in a R package, just above the function is the **documentation**. The function is written in the usual way and the documentation is written using a special style.\n\n:::\n\n## Documentation\n\nLet's start with the documentation. Here is the documentation for the `hello()` function.\n\n```r\n#' Print a Greeting\n#'\n#' Print a greeting for a custom name\n#'\n#' @details This function make a plot with a greeting to the name passed as an argument to the function\n#' \n#' @param name character, name of person to whom greeting should be directed\n#'\n#' @return nothing useful is returned.\n#'\n#' @import ggplot2\n#' @export\n#'\n#' @examples\n#' hello(\"Chris\")\n#'\n\n```\n\nWe will take each line of documentation in order:\n\n1. The first line is a short title for the function\n\n2. The next line is the \"description\" line and should be a slightly longer description of what the function does. Generally, this line is one sentence.\n\n3. This line contains the first **Roxygen directive**, which is `@details`. This directive indicates that the text that comes afterwards has detailed information about the function. \n\n4. The next Roxygen directive is the `@param` directive. This indicates the name of the parameter that the function will accept. In this case, this is the name to which the greeting will be directed.\n\n5. The `@return` directive indicates what the function returns to the user. This function does not return anything useful, but it is still useful to indicate that.\n\n6. This function requires the `ggplot()` function and associated plotting functions. Therefore we need to use the `@import` directive to indicate that we need to **import** all of the functions in the `ggplot2` package.\n\n7. We want to indicate with the `@export` directive that this function should be visible to the user (i.e. we want the user to call this function). Therefore, the function should be **exported** to the user. More complex packages may have many functions and not all of them will be functions that the user will need to call. In addition, any function that is exported is required to have documentation.\n\n8. Under the `@examples` directive, you can put R code that demonstrates how to call the function. Here, we provide a simple example of how to use the `hello()` function.\n\nOnce the documentation is written, we can write the code for the function itself. The complete R script file looks as follows.\n\n\n```r\n#' Print a Greeting\n#'\n#' Print a greeting for a custom name\n#'\n#' @details This function make a plot with a greeting to the name passed as an argument to the function\n\n#' @param name character, name of person to whom greeting should be directed\n#'\n#' @return nothing useful is returned.\n#'\n#' @import ggplot2\n#' @export\n#'\n#' @examples\n#' hello(\"Chris\")\n#'\nhello <- function(name) {\n message <- paste0(\"Hello, \", name, \"!\")\n ggplot() +\n geom_text(aes(0, 0), label = message, size = 4) +\n theme_minimal()\n\n}\n```\n\n:::{.callout-tip}\n\n### Note \n\nIn the function we do not actually plot any data. \nWe just use the `ggplot()` function to setup a plot window so that we \ncan add the message using `geom_text()`.\n\n:::\n\n\n## Editing the DESCRIPTION file\n\nAfter writing the code and documentation we need to edit the `DESCRIPTION` \nfile for the package. This contains metadata about the package. Here is \nthe final `DESCRIPTION` file for the package.\n\n```r\nPackage: greetings\nTitle: Displays a greeting plot\nVersion: 0.0.0.9000\nAuthors@R: \n person(given = \"Stephanie\",\n family = \"Hicks\",\n role = c(\"aut\", \"cre\"),\n email = \"shicks19@jhu.edu\", \n comment = c(ORCID = \"0000-0002-5682-5998\"))\nDescription: This package displays a nice greeting for a custom name.\nImports: ggplot2\nLicense: GPL (>= 3)\nEncoding: UTF-8\nLazyData: true\nRoxygen: list(markdown = TRUE)\nRoxygenNote: 7.2.1\n```\n\nWe can go through each field one at time:\n\n1. `Package` is just the name of the package. In this case it is `greetings`.\n\n2. `Title` is a short description of the package.\n\n3. `Version` is the version number. This is the first version so we use 1.0.\n\n4. `Authors@R` indicates the author of the package (this is you!). This package only has one author but packages can have multiple authors. Look at the help file for `person()` to see how this is specified.\n\n5. `Description` provides a multi-sentence description of what the package does.\n\n6. `Imports` is only needed because the package imports the functions from the `ggplot2` package. You will need to add this line explicitly to the `DESCRIPTION` file.\n\n6. `License` indicates the legal license for the package. This should be an open source license and we use the GNU General Public License Version 3 here. You can read more about [R package licenses](https://www.r-project.org/Licenses/). Every R package **must** have a license.\n\nThe remaining fields are auto-generated by RStudio and you don't need to worry about them for now.\n\n# Build and Install\n\nOnce you have the code, documentation, and `DESCRIPTION` file written, you can build the package and install it in order to try it out.\n\n## Within RStudio\n\nIn the **Build** tab, click the button labeled **Install and Restart**.\n\n:::{.callout-tip}\n\n### Note\n\nOn more recent versions of RStudio, it might just stay \"Install\", not \"Install and Restart\". \n\n:::\n\n\n::: {.cell layout-align=\"center\"}\n::: {.cell-output-display}\n![Click the Build tab to install and restart](../../images/screen/BuildPackage.png){fig-align='center' width=648}\n:::\n:::\n\n\nClicking this button will\n\n1. Build the R package\n\n2. Install the R package on your system\n\n3. Restart the R session\n\n4. Load your package using the `library()` function.\n\nOnce this is done, you can call the `hello()` function and see the results.\n\n\n::: {.cell layout-align=\"center\"}\n::: {.cell-output-display}\n![Now you can load and use the hello() function](../../images/screen/HelloPackage.png){fig-align='center' width=232}\n:::\n:::\n\n\n\n## Build Source Package\n\nOnce the package is completed, you must build a source package so that it can be distributed to others. This can be done in the **Build** menu and clicking **Build source package**.\n\n\n::: {.cell layout-align=\"center\"}\n::: {.cell-output-display}\n![Build the source package](../../images/screen/BuildSourcePackage.png){fig-align='center' width=548}\n:::\n:::\n\n\nThis will produce a file with a `.tar.gz` extension. This is the **package source file**. \n\nYou should see a screen that looks something like this.\n\n\n::: {.cell layout-align=\"center\"}\n::: {.cell-output-display}\n![Source file is built (file ends in a .tar.gz)](../../images/screen/PackageBuilt.png){fig-align='center' width=551}\n:::\n:::\n\n\n\nOnce your package is built, you can send to others and they will be able to install it. The package source file would also be the file that would be uploaded to CRAN if you were submitting a package to CRAN.\n\n:::{.callout-tip}\n\n### Pro-tip\n\nIf you are interested, you can also use the `usethis` package to create, build, document, and install a R package: \n\n- \n\nFWIW, this is how I create R packages.\n\n:::\n\n## Install from GitHub\n\nYou can also install an R package that's available on GitHub. For example, here is the `greetings` package on my personal GitHub page \n- \n\nTo install this, we can use the `remotes::install_github(repo=\"username/repo\")` function: \n\n\n::: {.cell}\n\n```{.r .cell-code}\nremotes::install_github(repo = \"stephaniehicks/greetings\")\n```\n:::\n\n\n# Final thoughts\n\n:::{.callout-tip}\n\n### Pro-tip\n\n- You can create multiple `.R` files in the `/R` folder. Typically, its best to have one `.R` file for each of your **exported** functions. For the **non-exported** functions, you can place into one file (e.g. `utils.R`, etc). You can read more about this here . \n- You will want to check your package with `R CMD check` or `devtools::check()`. You can read more about this here . \n- When you define `print` methods with generic functions that exist in base R (e.g. `print.object`), you need use the variable argument (`...`)\n\n\n::: {.cell}\n\n```{.r .cell-code}\nprint.object <- function(x, ...){\n}\n```\n:::\n\ninstead of \n\n::: {.cell}\n\n```{.r .cell-code}\nprint.object <- function(x){\n}\n```\n:::\n\n\nThis is because `print()` in base R has a set of arguments and to let your own S3 method pass the `R CMD check` you need allow for the same arguments as with the base R generic. Otherwise, you'll get warnings about \"checking S3 generic/method consistency ...\". \n\n:::\n\n\n# Post-lecture materials\n\n\n### Additional Resources\n\n::: callout-tip\n\n- \n- \n- \n- \n\n:::\n", 5 | "supporting": [ 6 | "index_files" 7 | ], 8 | "filters": [ 9 | "rmarkdown/pagebreak.lua" 10 | ], 11 | "includes": {}, 12 | "engineDependencies": {}, 13 | "preserve": {}, 14 | "postProcess": true 15 | } 16 | } -------------------------------------------------------------------------------- /_freeze/posts/2022-11-17-r-pkg-dev-part-1/index/figure-html/unnamed-chunk-2-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatprogramming2022/56ba8223305d93ac9da8e88f4673e924cae4faa0/_freeze/posts/2022-11-17-r-pkg-dev-part-1/index/figure-html/unnamed-chunk-2-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-12-01-pkgdown-pkg-website/index/execute-results/html.json: -------------------------------------------------------------------------------- 1 | { 2 | "hash": "79a121b1b77d176aba50b7fad15ea8ef", 3 | "result": { 4 | "markdown": "---\ntitle: \"Package development with `pkgdown`\"\nauthor: \n - name: Boyi Guo\n url: https://boyiguo1.github.io\n affiliation: Department of Biostatistics, Johns Hopkins\n affiliation_url: https://publichealth.jhu.edu\ndescription: \"Building a website for R software packages\"\ndate: 2022-12-01\ncategories: [module 3, week 6, packages, pkgdown]\n---\n\n\n\n\n# Pre-lecture materials\n\n### Read ahead\n\n::: callout-note\n## Read ahead\n\n\n**Before class, you can prepare by reading the following materials:**\n\n1. \n:::\n\n### Prerequisites\nBefore starting you must install the additional package:\n\n* `pkgdown` - the R package that helps you to build a package website with little efforts\n* `usethis` - an automation package that simplifies project creation and setup\n\nYou can do this by calling\n\n\n::: {.cell}\n\n```{.r .cell-code}\ninstall.packages(c(\"usethis\", \"pkgdown\"))\n```\n:::\n\n\nor use the “Install Packages…” option from the “Tools” menu in RStudio.\n\nYou also need to have a `GitHub` account and set up your access from your local computer to `GitHub.com`. If you forget, please re-visit your [previous lecture](https://www.stephaniehicks.com/jhustatprogramming2022/posts/2022-11-10-version-control-part-2/).\n\n### Acknowledgements\n\nMaterial for this lecture was borrowed and adopted from\n\n- \n- \n\n# Learning objectives\n\n::: callout-note\n# Learning objectives\n\n**At the end of this lesson you will:**\n\n- build a website for your package\n- deploy your website on GitHub\n:::\n\n# Why do we want a website for our packages?\nThe primary reason to make a website for our packages is to make it more popular. Having a publicly available website is similar to create a home for your package where people know where to find you package. Reciprocally, you can tell others a more stable address, i.e. your website link, to find your package.\n\nMoreover, compare to the default reference manual, a website provides more *interactability* and *accessibility*. It is easier to navigate through different functions via clickable links or to follow through a vignette with a table of content in a point-and-click system. A website is also a more integrative resources where it keeps not only the reference manual and vignettes, but also change-log and development history. All of these makes it easier for the users to understand and appreciate your work.\n\nIn addition, a website is a great place to market yourself and show your personality. Different from a `GitHub` repo which follows the same template, a website is the place where you have more freedom to showcase your creativity. You can customize the websites and have more space to explain.\n\n# What is `pkgdown`?\nWhile creating a website can be a daunting task for people who don't have extensive web programming experience, `pkgdown` provides a shortcut. `pkgdown` can automatically generate a website of an R package, containing references to the enclosed function, different vignettes if exists, within two lines of code (slight exaggeration). It also helps to deploy the website to `GitHub` server. Amazingly, `pkgdown` facilitates automatic updates of the website following any changes made to the package that are pushed to `GitHub`.\n\n\n\n# How to use `pkgdown`?\n\n## Live demonstration\nWhile the `pkgdown` website provides a comprehensive [walkthrough](https://pkgdown.r-lib.org/articles/pkgdown.html) for those who set up their `GitHub` access using [personal access token](https://usethis.r-lib.org/articles/git-credentials.html), it is slightly tricky for those whose access is [set up with SSH](https://www.stephaniehicks.com/jhustatprogramming2022/posts/2022-11-10-version-control-part-2/#ssh-background-and-setup). During this lecture, we live demonstrate how to deploy the website, specifically for SSH `GitHub` access.\n\n::: callout-tip\n### Did you try `pkgdown::deploy_to_branch()`?\nIf you run into problem when running `usethis::use_pkgdown_github_pages()` and get stuck, you should try to understand what the function does by reading its manual `?usethis::use_pkgdown_github_pages()`. Is it possible to create the necessary `gh_pages` using `pkgdown::deploy_to_branch()`? Don't forget to set up the GitHub Action by calling `usethis::use_github_action(\"pkgdown\")`. Now you should be able to find access your website via **github_account_name.github.io/pkg_name**\n:::\n\n## Website customization\nThere are a lot of customization possible. Nevertheless, there is no point-and-click system for it. You need to manually edit `_pkgdown.yml` following certain syntax. Please refer to .\n\n\n# Summary\nIn this lecture, we\n\n* introduce the package `pkgdown` that automates website\n* demonstrate how to use `pkgdown` to create website for an R package\n\n
\n\n### Additional Resources\n\n::: callout-tip\n- Create hexagon sticker for your package \n- Explore badge accessibility\n:::\n\n", 5 | "supporting": [], 6 | "filters": [ 7 | "rmarkdown/pagebreak.lua" 8 | ], 9 | "includes": {}, 10 | "engineDependencies": {}, 11 | "preserve": {}, 12 | "postProcess": true 13 | } 14 | } -------------------------------------------------------------------------------- /_freeze/posts/2022-12-01-targets-proj-workflows/index/execute-results/html.json: -------------------------------------------------------------------------------- 1 | { 2 | "hash": "f3916a40f4b28c5d84c85e270993c980", 3 | "result": { 4 | "markdown": "---\ntitle: \"Reproducibile Workflows with `targets`\"\nauthor: \n - name: Boyi Guo\n url: https://boyiguo1.github.io\n affiliation: Department of Biostatistics, Johns Hopkins\n affiliation_url: https://publichealth.jhu.edu\ndescription: \"A Make-line pipeline tool for creating reproducible workflows in R\"\ndate: 2022-12-01\ncategories: [module 3, week 6, project management, targets]\nexecute:\n eval: false\n---\n\n\n\n\n\n\n# Pre-lecture materials\n\n### Read ahead\n\n::: callout-note\n## Read ahead\n\n**Before class, you can prepare by reading the following materials:**\n\n1. \n2. \n3. \n:::\n\n### Prerequisites\nBefore starting you must install the additional package:\n\n* `targets` - the R Workflows package\n* `usethis` - an automation package that simplifies project creation and setup\n* `renv` - a package manager in R\n\nYou can do this by calling\n\n\n::: {.cell}\n\n```{.r .cell-code}\ninstall.packages( c(\"usethis\", \"targets\", \"renv\"))\n```\n:::\n\n\nor use the “Install Packages…” option from the “Tools” menu in RStudio.\n\n### Acknowledgements\n\nMaterial for this lecture was borrowed and adopted from\n\n- https://books.ropensci.org/targets/walkthrough.html\n\n\n### Special Message from Boyi\nTo students attending online: \n\n
\n\nI apologize for the scraping sound I made during last lecture.\n\n# Learning objectives\n\n::: callout-note\n# Learning objectives\n\n**At the end of this lesson you will:**\n\n- set up `targets` analytic pipeline\n- write and run a data analysis with `targets`\n- replicate and retrieve analysis results from a `targets` workflow\n:::\n\n# What is `targets`?\n
\n\n`targets` is not the grocery store. Instead, it is a R package that helps you to manage your analysis. You can think of `targets` as a butler who helps you to manage your analytics. These services include\n\n* to manage the ordering of your analysis so you won't confuse about the steps when reproducing an analysis\n* to save the output of each analytic procedures so you don't have to wait for repetitive runs of static results\n* to monitor the change in your code so you can update the only analytic tasks that changes are made\n* to reproduce the whole analysis with a click so you don't have to run multiple scripts\n\n# Why do we use `targets`?\n\n\n`targets` helps us be more efficient at managing analytic workflows, and hence improve productivity with bare minimum efforts. Let me simply put this way, managing file names for your code or saved objects can be very painful. But `targets` can help you handle that when use in combination with other version control system `git`. \n\n
\n\n# How to use `targets`\n\n[The {targets} R package user manual](https://books.ropensci.org/targets/) is a great source to learn how to use `targets`. The intro level of `targets` tutorial is well documented in [*Chapter 2 Walkthrough*](https://books.ropensci.org/targets/walkthrough.html). Instead going through the chapter with you, I will focus on some tricks that is not discussed in the user manual. \n\n## Set up a `targets` workflow\n\n::: {.cell}\n\n```{.r .cell-code}\n# Start a new R project\nusethis::create_project(\"targets_eg\")\n# Config target workflow\ntargets::use_targets()\n```\n:::\n\n\n### (Optional) Version control packages with `renv`\n\n::: {.cell}\n\n```{.r .cell-code}\n# Config renv system\nrenv::init()\ntargets::tar_renv()\n```\n:::\n\n\nIf other people opens up this project on a different computer, `renv` will automatically install all the necessary packages, especially the same versions of those packages.\n\n::: callout-tip\n### Important `renv` functions\nIdealistically, you need to keep track of your R packages in every analysis, similar to you version control your files using `git`. You may need to call the following functions periodically, i.e. after you add/remove necessary packages.\n\n* `targets::tar_renv()` updates `_targets_packages.R` by gathering all packages in your analytic workflow \n* `renv::status()` shows which packages are outdated or not recorded\n* `renv::snapshot()` updates your packages version number by taking a snapshot of your project library\n* `renv::restore()` restores all missing packages or packages whose version number doesn't match with the most updated snapshot.\n\nFor more information, visit \n:::\n\n## Set up keyboard shortcuts\n`targets` provide some `addins` to help users navigate through workflow management with a click-and-point system. For example, if you click on the `Addins` button in the tool bar (highlighted in the screen capture below) which locates on the top of the RStudio window, you can see many options that help you to work with `targets`\n\n\n::: {.cell layout-align=\"center\"}\n::: {.cell-output-display}\n![A screenshot of addins for `targets`](../../images/targets/Addins.png){fig-align='center' width=316}\n:::\n:::\n\n\nWith these addins, you don't necessarily have to remember all the functions to run `targets`, such as `targets::tar_make()`, `targets::tar_load()`, `targets::tar_visnetwork()`, etc.\n\nIf you prefer keyboard shortcuts, you can set up for these commonly used functions. In order to do that, you need to go to `Tools` -> `Modify Keyboard Shortcuts`.\n\n\n\n::: {.cell layout-align=\"center\"}\n::: {.cell-output-display}\n![A screenshot of how to modify keyboard shortcuts](../../images/targets/ModifyShortcuts.png){fig-align='center' width=382}\n:::\n:::\n\n\nWith in the pop-up keyboard shortcuts menu, you can search `addin` or `targets` or a specific `target` addin function, e.g. `Load target at cursor` in the search box. You can customize the keyboard shortcut by clicking on the input box within the `Shortcut` column. \n\n\n::: {.cell layout-align=\"center\"}\n::: {.cell-output-display}\n![A screenshot of keyboard shortcuts menu](../../images/targets/ShortcutsMenu.png){fig-align='center' width=801}\n:::\n:::\n\n\n\n# Summary\n`targets` is a workflow management powerhouse. It offers much more utility than we covered today. [The {targets} R package user manual](https://books.ropensci.org/targets/) does an excellent job on explaining how to set up parallel computing with the system, to work with markdown systems (I managed my dissertation writing in `targets`), and many more. \n\nNevertheless, I need to warn you that learning `targets` could be intimidating at the beginning because of the setup process and new syntax. It may take multiple iterations or projects until you are comfortable using it. \n\n
\n\nBut it is very rewarding and can save you a lot of time in the long run! It is a worthy investment of time. \n\n\n### Additional Resources\n\n::: callout-tip\n- `targets` website \n- The `targetopic`, a `targets` ecosystem \n- A tutorial & reproducible example on calculating residential segregation indices with decennial US census data \n:::\n", 5 | "supporting": [], 6 | "filters": [ 7 | "rmarkdown/pagebreak.lua" 8 | ], 9 | "includes": {}, 10 | "engineDependencies": {}, 11 | "preserve": {}, 12 | "postProcess": true 13 | } 14 | } -------------------------------------------------------------------------------- /_freeze/posts/2022-12-08-relational-databases/index/figure-html/unnamed-chunk-14-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatprogramming2022/56ba8223305d93ac9da8e88f4673e924cae4faa0/_freeze/posts/2022-12-08-relational-databases/index/figure-html/unnamed-chunk-14-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-12-08-relational-databases/index/figure-html/unnamed-chunk-15-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatprogramming2022/56ba8223305d93ac9da8e88f4673e924cae4faa0/_freeze/posts/2022-12-08-relational-databases/index/figure-html/unnamed-chunk-15-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-12-08-relational-databases/index/figure-html/unnamed-chunk-16-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatprogramming2022/56ba8223305d93ac9da8e88f4673e924cae4faa0/_freeze/posts/2022-12-08-relational-databases/index/figure-html/unnamed-chunk-16-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-12-08-relational-databases/index/figure-html/unnamed-chunk-17-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatprogramming2022/56ba8223305d93ac9da8e88f4673e924cae4faa0/_freeze/posts/2022-12-08-relational-databases/index/figure-html/unnamed-chunk-17-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-12-08-relational-databases/index/figure-html/unnamed-chunk-21-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatprogramming2022/56ba8223305d93ac9da8e88f4673e924cae4faa0/_freeze/posts/2022-12-08-relational-databases/index/figure-html/unnamed-chunk-21-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-12-08-relational-databases/index/figure-html/unnamed-chunk-22-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatprogramming2022/56ba8223305d93ac9da8e88f4673e924cae4faa0/_freeze/posts/2022-12-08-relational-databases/index/figure-html/unnamed-chunk-22-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-12-20-dealing-with-large-data/index/figure-html/unnamed-chunk-30-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatprogramming2022/56ba8223305d93ac9da8e88f4673e924cae4faa0/_freeze/posts/2022-12-20-dealing-with-large-data/index/figure-html/unnamed-chunk-30-1.png -------------------------------------------------------------------------------- /_freeze/projects/2022-10-27-project-1/index/execute-results/html.json: -------------------------------------------------------------------------------- 1 | { 2 | "hash": "65cdce42923fcb65489d67e811a7ae0d", 3 | "result": { 4 | "markdown": "---\ntitle: \"Project 1\"\nauthor: \n - name: Stephanie Hicks\n url: https://stephaniehicks.com\n affiliation: Department of Biostatistics, Johns Hopkins\n affiliation_url: https://publichealth.jhu.edu\ndescription: \"Building a website and practicing with command-line tools\"\ndate: 2022-10-27\ncategories: [project 1, projects]\n---\n\n# Background\n\n\n**Due date: November 11 at 11:59pm**\n\nThe goal of this assignment is to practice some of the skills we have been learning about in class around Quarto, command-line, and version control by building and deploying a website. You also are asked to practice with some command-line skills more formally. \n\n### To submit your project \n\nPlease use this Quarto file (`.qmd`) and fill in the requested components by adding the URLs pointing to the private and public repositories and deployed websites. Render this file to a **HTML file** and submit your HTML file to the dropbox on CoursePlus. Please **show all your code**, if relevant to a section.\n\n\n# Part 1\n\nComplete the Git & GitHub Fundamentals Starter course. The link to create a private GitHub repository for yourself to complete the course will be posted in CoursePlus. When you are done, add the link to the GitHub repo here: \n\n- **Link to your GitHub repository**: [Delete this text and replace the text with the link to the private GitHub repo you created above]\n\n# Part 2\n\n1. Read this blogpost titled [Building a brand as a scientist](https://www.stephaniehicks.com/blog/building-a-brand-as-a-scientist). \n2. Reflect on the questions in the \"Defining your brand\" section. \n3. Write two paragraphs (4-6 sentences) max here answering one (or more) of the questions asked in the section above.\n\n\n# Part 3\n\nNext, with the reflections from Part 2 in mind, you will create a public GitHub repository on your own GitHub account and build a small website to introduce yourself to others in the course. You will also create a small data analysis on one of the webpages to practice literate programming in [Quarto](https://quarto.org). \n\n![](../../images/quarto_schematic.png)\n\n## 1. Create a GitHub repo for your website\n\nCreate a new public GitHub repository titled `biostat840-intro--` (where you replace `` with your first name and `` with your last name) in your own personal GitHub account (e.g. `https://github.com//biostat840-intro--`).\n\n## 2. Build a website using Quarto\n\nCreate a new project locally within RStudio and build a website for yourself. Your website should include the following: \n\n1. A home/landing page. This is home page that someone will land on your website. At minimum it should include your name, a short summary about yourself (max 2-3 sentences), and a picture of something you enjoy to do for fun (or a picture of yourself if you are comfortable sharing one). \n2. A page titled 'About'. This page should describe who you are in greater detail. It could include your professional interests and your educational and/or professional background and/or experience. It could also include any personal information you feel conformable sharing on the website. \n3. A data analysis page called 'Example analysis'. You can pick any dataset you wish you analyze. In this webpage, you will analyze a dataset and summarize the results. The requirements for this webpage are the following: \n - You must describe what is the question you aim to answer with the data and data analysis. \n - You must describe who is the intended audience for the data analysis. \n - You must describe and link to where the original data come from that you chose. \n - You must include a link to a data dictionary for the data or create one inside the webpage. \n - Your analysis must include some minimal form of data wrangling with you using at least five different functions from `dplyr` or `tidyr`. \n - Your analysis should include at least three plots with you using at least three different `geom_*()` functions from `ggplot2` (or another package with `geom_*()` functions). \n - Plots should have titles, subtitles, captions, and human-understandable axis labels. \n - At least one plot should using a type of faceting (`facet_grid()` or `facet_wrap()`). \n - Your analysis must include one image or table (not one you created yourself, but one you have saved locally or one from the web). \n - Your analysis must include at least two different [callout blocks](https://quarto.org/docs/authoring/callouts.html).\n - Your analysis must include a `.bib` file, which you use to reference at least three unique citations. For example, it could be to a website or paper from where the original data came from or it could be to a paper describing a method you are using to analyze the data. \n - Your analysis must include the use of at least 1 [margin content](https://www.stephaniehicks.com/jhustatprogramming2022/posts/2022-10-27-build-website/#margin-content). \n - You must summarize your analysis and/or results with a paragraph (4-6 sentences).\n - At the end of the data analysis, list out each of the functions you used from each of the packages (`dplyr`, `tidyr`, and `ggplot2`) to help the TA with respect to making sure you met all the requirements described above. \n\n## 3. Include a `README.md` file\n\nYour local repository should include a `README.md` file describing who is the author of the website and a link to the website after it has been deployed. Other things you might include are the technical details for how the website was created and/or deployed. \n\n## 4. Deploy your website\n\nDeploy your website using [Quarto Pub](https://quarto.org/docs/publishing/quarto-pub.html), [GitHub pages](https://quarto.org/docs/publishing/github-pages.html), or [Netlify](https://quarto.org/docs/publishing/netlify.html). (**Note**: Deploying your website to RPubs will not be accepted).\n\n## 5. Share your website\n\nGo to the Discussion Board in CoursePlus and write a short post with a link (URL) to your website (and URL to the corresponding GitHub repository) that you created. Also, list the URLs below for the purposes of grading. \n\nAs you read the introductions from other folks in the class, feel free to comment/reply using Discussion board.\n\n- **Link to your GitHub repository**: [Delete this text and replace the text with the link to the public GitHub repo you created above for your website]\n\n- **Link to your deployed website**: [Delete this and replace the text with the link to the public deployed website you created above]\n\n# Part 4\n\n1. Use [`wget`](https://www.gnu.org/software/wget) to download four files that end in `.fastq` from [here](https://github.com/stephaniehicks/jhustatprogramming2022/tree/main/data). \n2. Create a directory to download the data. The top level directory should be called `raw_data` and there should be a sub-level directory called `fastq`. The command you write should force the creation of both directories at the same time if either of them do not exist yet.\n3. Move all the `.fastq` files into the `fastq` sub-level directory. \n4. Write a for loop in the shell that iterates over each `.fastq` file. For each `.fastq` file, do the following. In the first 1000 rows for each file, count the number of lines where the \"@\" symbol appears. Your final output should be four numbers printed to the screen. \n\n\n```{bash}\n# Add your solution here\n\n```\n\n", 5 | "supporting": [ 6 | "index_files" 7 | ], 8 | "filters": [], 9 | "includes": {} 10 | } 11 | } -------------------------------------------------------------------------------- /_freeze/projects/2022-11-08-project-2/index/execute-results/html.json: -------------------------------------------------------------------------------- 1 | { 2 | "hash": "6ed61b7588ac8280cca826e6d2369dc8", 3 | "result": { 4 | "markdown": "---\ntitle: \"Project 2\"\nauthor: \n - name: Stephanie Hicks\n url: https://stephaniehicks.com\n affiliation: Department of Biostatistics, Johns Hopkins\n affiliation_url: https://publichealth.jhu.edu\ndescription: \"Building an R package and practicing with S3\"\ndate: 2022-11-08\ncategories: [project 2, projects]\n---\n\n\n# Background\n\n\n**Due date: November 25 at 11:59pm**\n\nThe goal of this homework is to write a set of functions and put them into an R package so that other people can easily use the functions in their own data analyses after installing the package. In addition, they would receive documentation on how to use the functions.\n\nIn addition to building the R package, you will also build a S3 class for your package, and create a vignette where you demonstrate the functions in your R package with an example dataset from TidyTuesday.\n\nFinally, we will practice our command-line and version control skills by submitting the assignment through GitHub Classroom. \n\n### To submit your project\n\n- The link to create a private GitHub repository for yourself to complete Project 2 will be posted in CoursePlus (**Note**: this creates an empty repository and you need to push your code in your locate remote repository to GitHub when ready).\n- Build your R package locally and then push the files to the private Github repository that you created for yourself via GitHub Classroom. \n- The TA will grade the R package by cloning the repository, installing it, and checking for all the things described below. **It must be installable without any errors**. \n\n# Part 1: Create an R package\n\n\n## Part 1A: Cosine and sine transformation \n\nThe cosine and sine of a number can be written as an infinite series expansion of the form\n\n$$\n\\cos(x) = 1 - \\frac{x^2}{2!} + \\frac{x^4}{4!} - \\frac{x^6}{6!} \\cdots\n$$\n\n$$\n\\sin(x) = x - \\frac{x^3}{3!} + \\frac{x^5}{5!} - \\frac{x^7}{7!} \\cdots\n$$\n\n\nWrite two functions that compute the cosine and sine (respectively) of a number using the truncated series expansion. Each function should take two arguments:\n\n- `x`: the number to be transformed\n- `k`: the number of terms to be used in the series expansion beyond the constant 1. The value of `k` is always $\\geq 1$.\n\n:::{.callout-tip}\n\n### Notes\n\n- You can assume that the input value `x` will always be a *single* number. \n- You can assume that the value `k` will always be an integer $\\geq 1$. \n- Do not use the `cos()` or `sin()` functions in R.\n\n:::\n\n\n::: {.cell}\n\n```{.r .cell-code}\nfn_cos <- function(x, k) {\n # Add your solution here\n}\n\nfun_sin <- function(x, k) { \n # Add your solution here\n}\n```\n:::\n\n\n## Part 1B: Calculating confidence intervals\n\nWrite the following set of functions:\n\n* `sample_mean()`, which calculates the sample mean\n\n$$\n\\bar{x} = \\frac{1}{N} \\sum_{i=1}^n x_i\n$$\n\n* `sample_sd()`, which calculates the sample standard deviation\n\n$$\ns = \\sqrt{\\frac{1}{N-1} \\sum_{i=1}^N (x_i - \\overline{x})^2}\n$$\n\n* `calculate_CI()`, which calculates the confidence intervals of a sample mean and returns a named vector of length 2, where the first value is the `lower_bound`, the second value is the `upper_bound`.\n\n$$\n\\bar{x} \\pm t_{\\alpha/2, N-1} s_{\\bar{x}}\n$$ \n\n:::{.callout-tip}\n\n### Notes\n\n- You can assume that the input value `x` will always be a *vector* of numbers of length *N*.\n- Do not use the `mean()` and `sd()` functions in R.\n\n:::\n\n\n::: {.cell}\n\n```{.r .cell-code}\nsample_mean <- function(x) {\n # Add your solution here\n}\n\nsample_sd <- function(x) {\n # Add your solution here\n}\n\ncalculate_CI <- function(x, conf = 0.95) {\n # Add your solution here\n}\n```\n:::\n\n\n## Part 1C: Put functions into an R package\n\nCreate an R package for the functions you wrote from Part 1A and 1B. Your package will have three exported functions for users to call (see below). You will need to write documentation for each function that you export. Your package should include the functions: \n\n* `fn_cos()`, which computes the approximation to the cosine function (**exported**)\n* `fn_sin()`, which computes the approximation to the sine function (**exported**)\n* `sample_mean()`, which calculates the sample mean (**not exported**)\n* `sample_sd()`, which calculates the sample standard deviation (**not exported**)\n* `calculate_CI()`, which calculates the confidence intervals from simulated data (**exported**)\n\n:::{.callout-tip}\n\n### Notes\n\n* Remember that you should only export the functions that you want the user to use.\n* Functions that are *not* exported do not require any documentation.\n* Each exported function should have at least **one example** of its usage (using the `@example` directive in the documentation).\n* In the functions in your package, consider using control structures and include checks (e.g. `is.na()`, `is.numeric()`, `if()`) to make sure the input is as you expect it to be. For example, try to break the the function with unexpected values that a user might provide (e.g. providing a negative value to a log transformation). This can help guide you on ways to address the possible ways to break the function. \n* Your package should be installable without any warnings or errors.\n\n:::\n\n\n# Part 2: Create a S3 class as part of your package\n\nIn this part, you will create a new S3 class called `ci_class` (confidence interval class) to be used in your R package. You will \n\n1. Create a constructor function for the `ci_class` called `make_ci_class()`. \n2. Create a `print()` method to work with the `ci_class` to return a message with name of the class and the the number of observations in the S3 object. \n3. Modify the `calculate_CI()` function to work with the `ci_class` and still return a `lower_bound` and `upper_bound`. \n\nFor example, this is what the output of your code might look like: \n\n```\n> set.seed(1234)\n> x <- rnorm(100)\n> obj <- make_ci_class(x)\n> print(obj) # explicitly using the print() method\n#> a ci_class with 100 observations\n> obj # using autoprinting\n#> a ci_class with 100 observations\n```\n\nCalculate a 90% confidence interval:\n\n```\n> calculate_CI(obj, conf = 0.90)\n#> lower_bound upper_bound \n#> -0.32353231 0.01000883\n```\n\n\n# Part 3: Create supporting documents as part of your package\n\n## Part 3A: Create a vignette\n\nIn this part, you will create a vignette where you demonstrate the functions in your R package. \nSpecifically, you will create a R Markdown and put it in a folder called \"vignettes\" within your R package. The purpose of a vignette is to demonstrate the functions of your package in a longer tutorial instead of just short examples within the documentation of your functions (i.e. using the `@example` directive in the documentation). \n\n:::{.callout-tip}\n\n### Note \n\nYou might find the `use_vignette()` [function](https://usethis.r-lib.org/reference/use_vignette.html) from the `usethis` R package helpful. \n\n:::\n\n\n## Part 3B: Create a `README.md` file \n\nCreate a `README.md` file in the R package, which will be useful to readers when they learn about your package. The readme must include: \n\n- The title of package\n- The author of the package\n- A goal / description of the package\n- A list of **exported** functions that are in the package. Briefly describe each function. \n- A basic example with one of the functions. \n\n:::{.callout-tip}\n\n### Note \n\nYou might find the `use_readme_md()` [function](https://usethis.r-lib.org/reference/use_readme_rmd.html) from the `usethis` R package helpful. \n\n:::\n\n\n## Part 3C: Demonstrate `fn_cos()`\n\nIn the vignette, make a plot and show the output of your function `fn_cos(x,k)` and how it approximates the `cos(x)` function from base R as $k$ increases. \n\n:::{.callout-tip}\n\n### Notes\n\n- The x-axis should range between 0 and 10. \n- The y-axis should be the output from `fn_cos(x,k)` or `cos(x)`.\n- Plot the output from `cos(x)` as points on the graph. \n- Plot the output from `fn_cos(x,k)` as lines on the graph. \n- Show 5 lines for values `k` = 1, 3, 5, 7, 9. Each line should be a different color. \n\n:::\n\n\n## Part 3D: Demonstrate `fn_sin()`\n\nRepeat a similar task and make a similar plot as in Part 3C, but here using `fn_sin()` instead of `fn_cos()`. \n\n\n## Part 3E: Demonstrate `calculate_CI()`\n\nThe goal here is to demonstrate the `calculate_CI()` function in your package inside the vignette with some example data from [TidyTuesday](https://www.tidytuesday.com). However, part of the requirement is to also wrangle and plot the data. At the end of the section, you must demonstrate how to apply `calculate_CI()` as an example to the data. \n\nOther requirements for this part of vignette are the following: \n\n1. Pick any dataset you wish from [TidyTuesday](https://www.tidytuesday.com) to analyze. \n - You must describe what is the question you aim to answer with the data and data analysis. \n - You must describe and link to where the original data come from that you chose.\n - You must include a link to a data dictionary for the data or create one inside the webpage. \n2. Load the data into R (you must show the code from this section)\n - In this step, you must test if a directory named `data` exists locally. If it does not, write an R function that creates it programmatically. \n - Saves the data only once (not each time you knit/render the document). \n - Read in the data locally each time you knit/render. \n3. Your analysis must include some form of data wrangling and data visualization. \n - You must use at least eight different functions from `dplyr`, `tidyr`, `lubridate`, `stringr`, or `forcats`. \n - Your analysis should include at least three plots with you using at least three different `geom_*()` functions from `ggplot2` (or another package with `geom_*()` functions). \n - Plots should have titles, subtitles, captions, and human-understandable axis labels. \n4. Apply the function `calculate_CI()` at least once in the vignette. \n - Summarize and interpret the results in 1-2 sentences.\n5. At the end of the data analysis, list out each of the functions you used from each of the packages (`dplyr`, `tidyr`, `ggplot2`, etc) to help the TA with respect to making sure you met all the requirements described above. \n \n\n", 5 | "supporting": [], 6 | "filters": [ 7 | "rmarkdown/pagebreak.lua" 8 | ], 9 | "includes": {}, 10 | "engineDependencies": {}, 11 | "preserve": {}, 12 | "postProcess": true 13 | } 14 | } -------------------------------------------------------------------------------- /_freeze/projects/2022-11-22-project-3/index/execute-results/html.json: -------------------------------------------------------------------------------- 1 | { 2 | "hash": "e803a6dbcb8776f24b9e11c6c9a515c0", 3 | "result": { 4 | "markdown": "---\ntitle: \"Project 3\"\nauthor: \n - name: Stephanie Hicks\n url: https://stephaniehicks.com\n affiliation: Department of Biostatistics, Johns Hopkins\n affiliation_url: https://publichealth.jhu.edu\ndescription: \"Building websites for R packages; practice functional programming and APIs\"\ndate: 2022-11-29\ncategories: [project 3, projects]\n---\n\n\n# Background\n\n\n::: {.cell}\n\n```{.r .cell-code}\nlibrary(tidyverse)\n```\n:::\n\n\n\n\n**Due date: December 13 at 11:59pm**\n\nThe goal of this assignment is to practice building websites for R packages, along with practice functional programming and using APIs. \n\n### To submit your project\n\nIn both parts below, you will need to create two separate github repositories for yourself. The links to create the repositories will be in CoursePlus. \n\nThe first one (Part 1) will be a public repository to build a website for an R package. It is public because you will need to deploy the website. \n\nThe second one (Part 2) will be a private repository to practice using two different APIs, practice functional programming, and building data analyses. \n\n# Part 1\n\nHere, we will practice using [`pkgdown`](https://pkgdown.r-lib.org). Using any R package with a GitHub repository (that does not already have a pkgdown website), use `pkgdown` to create a website for the software package. \n\n\n:::{.callout-note}\n\nThis could even been a package that you have written (or are working on creating right now). Otherwise, this could be a package that you have used previously or you can pick one you are not familiar with and just want to know more about! \n\nIt should **not** be the package you created in Project 2 for this course. \n\n:::\n\n## Part 1A: Create website locally \n\nFork the GitHub repository from the original location to your own GitHub account. Clone the repository to your local computer. \n\nUse `usethis` and `pkgdown` to create a website locally for the R package of your choice. \n\n## Part 1B: Customize the website\n\nHere, you need to customize the website in **at least 5 ways**. How you customize is up to you. The `pkgdown` website has lots of suggestions for you to try out! \n\n## Part 1C: Create an example data analysis\n\nIn this part, you will create a data analysis (or a case study) where you demonstrate the functions in the R package. Specifically, you will add [another article or vignette](https://pkgdown.r-lib.org/articles/customise.html#navbar) titled \"Example analysis\" inside the `/vignettes` folder. \n\nSimilar to Project 2, you must pick out a data set from [TidyTuesday](https://www.tidytuesday.com) **that you have not worked with before** (i.e. not in a previous project or assignment from this class or from 776, but other classes or personal projects are acceptable). You must also demonstrate wrangling and plotting the data. Finally, your example analysis, must also demonstrate at least 2 functions from the R package in some way in the vignette. \n\nOther requirements for this part of vignette are the following: \n\n1. Pick any data set you wish from [TidyTuesday](https://www.tidytuesday.com) to analyze. \n - You must describe what is the question you aim to answer with the data and data analysis. \n - You must describe and link to where the original data come from that you chose.\n - You must include a link to a data dictionary for the data or create one inside the webpage. \n2. Load the data into R\n - In this step, you must test if a directory named `data` exists locally. If it does not, write an R function that creates it programmatically. \n - Saves the data only once (not each time you knit/render the document). \n - Read in the data locally each time you knit/render. \n3. Your analysis must include some form of data wrangling and data visualization. \n - You must use at least six different functions from `dplyr`, `tidyr`, `lubridate`, `stringr`, or `forcats`. \n - You **must use at least two functions from `purrr`**. \n - Your analysis should include at least three plots with you using at least three different `geom_*()` functions from `ggplot2` (or another package with `geom_*()` functions). \n - Plots should have titles, subtitles, captions, and human-understandable axis labels. \n - At least one plot should using a type of faceting (`facet_grid()` or `facet_wrap()`). \n4. Apply **at least 2 functions from the R package** in the vignette. \n5. Summarize and interpret the results in 1-2 sentences.\n6. At the end of the data analysis, list out each of the functions you used from each of the packages (`dplyr`, `tidyr`, `ggplot2`, etc) to help the TA with respect to making sure you met all the requirements described above. \n\n## Part 1D: Create a `README.md` file \n\nIf the package does not already include one, create and include a `README.md` file in the folder where the R package and pkgdown files are on your computer and add the following information below. \n\nIf it already has a `README.md` file, just edit the top of the file with the following information: \n\n- Include a URL to the GitHub link to where the original R package came from. \n- Include a URL to the deployed website that you will do in Part 1E, but it should be something like `https://jhu-statprogramming-fall-2022.github.io/biostat840-project3-pkgdown-`.\n- Include a description of the 5 things you customized in your `pkgdown` website (excluding adding the example data analysis from Part 1C).\n\nThe readme must also include (if it does not already): \n\n- The title of package\n- The original author of the package (and you who made the website and example data analysis)\n- A goal / description of the package\n- A list of **exported** functions that are in the package. Briefly describe each function. \n- A basic example with one of the functions. \n\n## Part 1E: Deploy the website\n\nThe link to create a public GitHub repository for yourself to complete this part of Project 3 will be posted in CoursePlus. This creates an empty GitHub repository. \n\nWhen ready, deploy the website. \n\n:::{.callout-note}\n\nYou need to modify the template code that is provided to you from GitHub when you set the remote. There will already be a remote `origin` (from where you cloned the remote repository to your local repository), which you can see with \n\n``` {.bash filename=\"Bash\"}\ngit remote -v\n```\n\nTo change where you push your code, instead of (you will see this in the template code from GitHub when you create the public repository)\n\n``` {.bash filename=\"Bash\"}\ngit remote add origin \n```\n\nYou want to use something like \n\n``` {.bash filename=\"Bash\"}\ngit remote add upstream \n```\n\nand when you push your code, you want to use `git push -u upstream main`, for example (not `git push -u origin main`).\n\n:::\n\n# Part 2\n\nHere, we will practice using APIs and making data visualizations. \n\nFor this part of Project 3, you need to create a private GitHub repository for yourself, which will be posted in CoursePlus. This creates an empty GitHub repository. You need to show all your code and submit both the `.qmd` file and the rendered HTML file. \n\n:::{.callout-note}\n\nWhen you use an API, you want to figure out the data you want to extract and then save it locally so that you are not using the API each time you knit or render your data analysis. \n\nMost APIs have limits on the number of times you can ping it in a given hour and your IP address can be blocked if you try to ping it too many times within a short time. \n\n:::\n\n## Part 2A\n\nThe first API we will use is `tidycensus` (), which is an R package that allows users to interface with a select number of the US Census Bureau’s data APIs and return tidyverse-ready data frames, optionally with simple feature geometry included.\n\nThe goal of this part is to create a data analysis (or a case study) using the US Census Bureau’s data. \n\nOther requirements for this part are the following: \n\n1. You must describe what is the question you aim to answer with the data and data analysis. \n2. You must use at least three different calls to the `tidycensus` API to extract out different datasets. For example, these could be across years, locations, or variables. \n - In this step, you must test if a directory named `data` exists locally. If it does not, write an R function that creates it programmatically. \n - Saves the data only once (not each time you knit/render the document). \n - Read in the data locally each time you knit/render. \n \n3. Your analysis must include some form of data wrangling and data visualization. \n - You must use at least six different functions from `dplyr`, `tidyr`, `lubridate`, `stringr`, or `forcats`. \n - You **must use at least two functions from `purrr`**. \n - Your analysis should include at least three plots with you using at least three different `geom_*()` functions from `ggplot2` (or another package with `geom_*()` functions). \n - Plots should have titles, subtitles, captions, and human-understandable axis labels. \n - At least one plot should using a type of faceting (`facet_grid()` or `facet_wrap()`). \n4. Summarize and interpret the results in 1-2 sentences.\n5. At the end of the data analysis, list out each of the functions you used from each of the packages (`dplyr`, `tidyr`, `ggplot2`, etc) to help the TA with respect to making sure you met all the requirements described above. \n\n## Part 2B \n\nThe second API we will use is the [Covid Act Now Data API](https://covidactnow.org/data-api). \n\nThe goal of this part is to create a data analysis (or a case study) using the Covid Act Now Data API. \n\nOther requirements for this part are the following: \n\n1. You must describe what is the question you aim to answer with the data and data analysis. \n2. You must use at least three different calls to the Covid Act Now Data API to extract out different datasets. For example, these could be across counties, etc. \n - In this step, you must test if a directory named `data` exists locally. If it does not, write an R function that creates it programmatically. \n - Saves the data only once (not each time you knit/render the document). \n - Read in the data locally each time you knit/render. \n3. Your analysis must include some form of data wrangling and data visualization. \n - You must use at least six different functions from `dplyr`, `tidyr`, `lubridate`, `stringr`, or `forcats`. \n - You **must use at least two functions from `purrr`**. \n - Your analysis should include at least three plots with you using at least three different `geom_*()` functions from `ggplot2` (or another package with `geom_*()` functions). \n - Plots should have titles, subtitles, captions, and human-understandable axis labels. \n - At least one plot should using a type of faceting (`facet_grid()` or `facet_wrap()`). \n4. Summarize and interpret the results in 1-2 sentences.\n5. At the end of the data analysis, list out each of the functions you used from each of the packages (`dplyr`, `tidyr`, `ggplot2`, etc) to help the TA with respect to making sure you met all the requirements described above. \n6. Push your code and rendered HTML to the private repository that you created for yourself. \n", 5 | "supporting": [ 6 | "index_files" 7 | ], 8 | "filters": [ 9 | "rmarkdown/pagebreak.lua" 10 | ], 11 | "includes": {}, 12 | "engineDependencies": {}, 13 | "preserve": {}, 14 | "postProcess": true 15 | } 16 | } -------------------------------------------------------------------------------- /_freeze/projects/2022-12-06-project-4/index/execute-results/html.json: -------------------------------------------------------------------------------- 1 | { 2 | "hash": "26df576038c8702e18011e25f815b739", 3 | "result": { 4 | "markdown": "---\ntitle: \"Project 4\"\nauthor: \n - name: Stephanie Hicks\n url: https://stephaniehicks.com\n affiliation: Department of Biostatistics, Johns Hopkins\n affiliation_url: https://publichealth.jhu.edu\ndescription: \"Building static and interactive dashboards\"\ndate: 2022-12-13\ncategories: [project 4, projects]\n---\n\n\n# Background\n\n\n::: {.cell}\n\n```{.r .cell-code}\nlibrary(tidyverse)\n```\n:::\n\n\n\n**Due date: December 23 at 11:59pm**\n\nThe goal of this assignment is to practice building interactive dashboards \nand building effective data visualizations to communicate to an audience. \n\n![](../../images/shiny.png){width=\"30%\"}\n\n### To submit your project\n\n- Create a public github repository for yourself. The link to create the repository will be in CoursePlus. \n- Follow the instructions below and push all your code to this github repository. \n- Then, you will also need to deploy your dashboard. As an example, here is a deployed interactive dashboard from the lecture we had on dashboards. \n\n- \n\n- You must include a `README.md` in your github repository with your source code that includes a link to your deployed dashboard. \n\n# Part 1: Identify the data\n\nPick a dataset from one of the datasets that you used in Project 3. You can also pick a different dataset if you wish, but to help minimize work in this project, you are encouraged to pick a dataset that you already are familiar with from Project 3. \n\nOnce you identify the dataset, save the data locally in your project repository to be able to load into R:\n\n - In this step, you must test if a directory named `data` exists locally. If it does not, write an R function that creates it programmatically. \n - Save the data only once. \n - Read in the data locally each time you knit/render. \n \n# Part 2: Design the interactive dashbard\n\nUsing the data analysis that you created from Project 3, think about the following topics and questions from Project 3. You do not have to formally answer any the questions right now, but use them to help design your dashboard.\n\n- **Motivation and Overview**: What are the goals and motivation of the data analysis?\n- **Related Work**: Anything that inspired you, such as a paper, a web site, or something we discussed in class.\n- **Data Analytic Questions**: What question(s) are you trying to answer with the data and data analysis? Did the questions change as you began to explore the data? What new questions did you consider in the course of your analysis? What variables seemed important or not important? \n- **Audience**: Who is the target audience for your analysis? Should they be expected to have a specific background or knowledge? \n- **Data**: What are the original data sources? Is there a data dictionary or did you create a table yourself? \n- **Exploratory Data Analysis**: What visualizations did you use to look at your data in different ways? Did you consider statistical models (e.g. inference or prediction)? How did you decide? Were there any major changes to your ideas? How did you reach these conclusions? \n- **Data visualization**: What data analytic components (e.g. tables, plots, etc) would be useful to show in a static format versus an interactive format? \n- **Narrative and Summary**: What did you learn from the data and data analysis? How did you answer the questions? How can you justify your answers? What are the key/important takeaways for the audience? What are the limitations of the analyses?\n\n\n# Part 3: Build an interactive dashboard \n\nBuild an interactive dashboard with the following criteria. Outside of the following criteria, create a dashboard that effectively communicates the key ideas about the data or data analysis. You are strongly encouraged to spend time exploring `flexdashboard` and `shiny` to customize your dashboard beyond the following criteria. You are welcomed to get inspired by dashboards you find online or the one we discussed in class: \n\n- \n\nSpecific criteria your dashboard must have: \n\n- `About` tab: This must describe the purpose of the dashboard and a link or original location of the data. \n- `The Data` tab: A description of the data along with a table of the dataset similar to the [one we discussed in class](https://rsconnect.biostat.jhsph.edu/ocs-bp-school-shootings-dashboard/#section-the-data). \n- Two tabs with static content. Within one of the tabs for the static content, there must be additional tabs. \n- Two tabs with interactive content. One of these tabs need to include some type of interactive plots. The other tab can be any other type of interactive content. \n- `Analysis` tab. This tab should contain the analysis you built for Project 3. \n\n# Part 4: Make a two minute video \n\nMake a two minute (max!) screencast with narration showing highlights of your data analysis and a demo of your dashboard. There are several ways to do this, but one way is to join a zoom room, share your screen, and record yourself. When you are done, upload the video to YouTube or Vimeo and embed it into the dashboard. \n\n:::{.callout-tip}\n\nThere are several ways to do this, but I like to embed an `iframe` into the `.Rmd`\n\n- \n\n:::\n\nUse principles of good storytelling and presentations to get your key points across. \n\n- Focus the majority of your screencast on your main contributions rather than on technical details. \n- What do you feel is the best part of your data analysis and dashboard? \n- What insights did you gain? \n- What is the single most important thing you would like your audience to take away? Make sure it is upfront and center rather than at the end.\n\n# Part 5: Deploy dashboard and push code to Github \n\n- Using the public github repository that you created from CoursePlus, push your source code for this dashboard to GitHub. \n- Deploy the website using [shinyapps.io](https://www.shinyapps.io). As an example, here is a deployed interactive dashboard from our lecture on dashboards.\n\n- \n\n- Include a `README.md` file in the GitHub repository with your name and a link to the deployed dashboard. \n- Share a link to your github repo with your code and your deployed interactive dashboard on CoursePlus. \n\n\n", 5 | "supporting": [], 6 | "filters": [ 7 | "rmarkdown/pagebreak.lua" 8 | ], 9 | "includes": {}, 10 | "engineDependencies": {}, 11 | "preserve": {}, 12 | "postProcess": true 13 | } 14 | } -------------------------------------------------------------------------------- /_freeze/schedule/execute-results/html.json: -------------------------------------------------------------------------------- 1 | { 2 | "hash": "dadc9669e2fee62e40cc60f3136e67e6", 3 | "result": { 4 | "markdown": "---\ntitle: \"Schedule\"\ndescription: |\n Schedule and course materials for Statistical Programming Paradigms and Workflows (JHSPH Biostatistics 140.840 Fall 2022).\ntbl-colwidths: [15,10, 55, 20]\n---\n\n\n## Schedule and course materials\n\n\nFor Rmd files, go to the [course GitHub repository](https://github.com/stephaniehicks/jhustatprogramming2022) and navigate the directories, or best of all to clone the repo and navigate within RStudio.\n\n::: column-page\n| Week | Dates | Topics | Projects |\n|---|---|---|---|---|\n| **Module 1** | | **Statistical programming at the command-line** | |\n| | | |\n| Week 1 | Oct 27 | Course introduction \\[[syllabus](syllabus.qmd)\\] | 🌴 Project 1 \\[[html](projects/2022-10-27-project-1/index.qmd)\\] \\[[Qmd](https://github.com/stephaniehicks/jhustatprogramming2022/blob/main/projects/2022-10-27-project-1/index.qmd)\\] |\n| | | Building websites with `quarto` \\[[html](posts/2022-10-27-build-website/index.qmd)\\] \\[[Qmd](https://github.com/stephaniehicks/jhustatprogramming2022/blob/main/posts/2022-10-27-build-website/index.qmd)\\] | | \n| | | | |\n| | | | |\n| Week 2 | Nov 1 | Introduction to the command-line \\[[html](posts/2022-11-01-command-line-part-1/index.qmd)\\] \\[[Qmd](https://github.com/stephaniehicks/jhustatprogramming2022/blob/main/posts/2022-11-01-command-line-part-1/index.qmd)\\] | |\n| | | | | \n| | Nov 3 | More advanced command-line tools \\[[html](posts/2022-11-03-command-line-part-2/index.qmd)\\] \\[[Qmd](https://github.com/stephaniehicks/jhustatprogramming2022/blob/main/posts/2022-11-03-command-line-part-2/index.qmd)\\] | |\n| | | | | \n| Week 3 | Nov 8 | Version control (git) \\[[html](posts/2022-11-08-version-control-part-1/index.qmd)\\] \\[[Qmd](https://github.com/stephaniehicks/jhustatprogramming2022/blob/main/posts/2022-11-08-version-control-part-1/index.qmd)\\] | 🌴 Project 2 \\[[html](projects/2022-11-08-project-2/index.qmd)\\] \\[[Qmd](https://github.com/stephaniehicks/jhustatprogramming2022/blob/main/projects/2022-11-08-project-2/index.qmd)\\] | \n| | | | | | | |\n| | Nov 10 | Version control (GitHub) \\[[html](posts/2022-11-10-version-control-part-2/index.qmd)\\] \\[[Qmd](https://github.com/stephaniehicks/jhustatprogramming2022/blob/main/posts/2022-11-10-version-control-part-2/index.qmd)\\] | |\n| | | | | \n| | Nov 11 | | 🍂 Project 1 due | \n| | | | | \n| **Module 2** | | **R software development** | | \n| | | |\n| Week 4 | Nov 15 | Object Oriented Programming \\[[html](posts/2022-11-15-object-oriented-programming/index.qmd)\\] \\[[Qmd](https://github.com/stephaniehicks/jhustatprogramming2022/blob/main/posts/2022-11-15-object-oriented-programming/index.qmd)\\] | | \n| | | | |\n| | Nov 17 | R package software development \\[[html](posts/2022-11-17-r-pkg-dev-part-1/index.qmd)\\] \\[[Qmd](https://github.com/stephaniehicks/jhustatprogramming2022/blob/main/posts/2022-11-17-r-pkg-dev-part-1/index.qmd)\\] | | \n| | | | | \n| Week 5 | Nov 22 | 🦃 Class is canceled | | \n| | | | | \n| | Nov 24 | 🦃 No lecture, enjoy the break! | | \n| | | | |\n| | Nov 25 | | |\n| | | | |\n| **Module 3** | | **Advanced programming paradigms** | | \n| | | | | \n| Week 6 | Nov 29 | Functional programming with `purrr` \\[[html](posts/2022-11-29-purrr-fun-programming/index.qmd)\\] \\[[Qmd](https://github.com/stephaniehicks/jhustatprogramming2022/blob/main/posts/2022-11-29-purrr-fun-programming/index.qmd)\\] | 🍂 Project 2 due
🌴 Project 3 \\[[html](projects/2022-11-22-project-3/index.qmd)\\] \\[[Qmd](https://github.com/stephaniehicks/jhustatprogramming2022/blob/main/projects/2022-11-22-project-3/index.qmd)\\] |\n| | | | | \n| | Dec 1 | Project management with `targets` \\[[html](posts/2022-12-01-targets-proj-workflows/index.qmd)\\] \\[[Qmd](https://github.com/stephaniehicks/jhustatprogramming2022/blob/main/posts/2022-12-01-targets-proj-workflows/index.qmd)\\] | |\n| | | Package website with `pkgdown` \\[[html](posts/2022-12-01-pkgdown-pkg-website/index.qmd)\\] \\[[Qmd](https://github.com/stephaniehicks/jhustatprogramming2022/blob/main/posts/2022-12-01-pkgdown-pkg-website/index.qmd)\\] | |\n| | | | | \n| Week 7 | Dec 6 | Retrieving data from APIs \\[[html](posts/2022-12-06-gettingdata-api/index.qmd)\\] \\[[Qmd](https://github.com/stephaniehicks/jhustatprogramming2022/blob/main/posts/2022-12-06-gettingdata-api/index.qmd)\\] | | \n| | | | |\n| | | |\n| | Dec 8 | Relational databases and SQL basics \\[[html](posts/2022-12-08-relational-databases/index.qmd)\\] \\[[Qmd](https://github.com/stephaniehicks/jhustatprogramming2022/blob/main/posts/2022-12-08-relational-databases/index.qmd)\\] | | \n| | | |\n| **Module 4** | | **Interactive web apps and dashboards** | | \n| | | |\n| Week 8 | Dec 13 | Building dashboards with `flexdashboard` and `shinydashboard` \\[[html](posts/2022-12-13-flexdashboard/index.qmd)\\] \\[[Qmd](https://github.com/stephaniehicks/jhustatprogramming2022/blob/main/posts/2022-12-13-flexdashboard/index.qmd)\\] | 🍂 Project 3 due
🌴 Project 4 \\[[html](projects/2022-12-06-project-4/index.qmd)\\] \\[[Qmd](https://github.com/stephaniehicks/jhustatprogramming2022/blob/main/projects/2022-12-06-project-4/index.qmd)\\] | \n| | | |\n| | Dec 15 | Scraping data from the web with `rvest` \\[[html](posts/2022-12-15-web-rvest/index.qmd)\\] \\[[Qmd](https://github.com/stephaniehicks/jhustatprogramming2022/blob/main/posts/2022-12-15-web-rvest/index.qmd)\\] | | \n| | | |\n| **Module 5** | | **Dealing with \"big\" data** | | \n| | | |\n| Week 9 | Dec 20 | Profiling R code \\[[html](posts/2022-12-20-profiling-r-code/index.qmd)\\] \\[[Qmd](https://github.com/stephaniehicks/jhustatprogramming2022/blob/main/posts/2022-12-20-profiling-r-code/index.qmd)\\] | | \n| | | Strategies for dealing with large data \\[[html](posts/2022-12-20-dealing-with-large-data/index.qmd)\\] \\[[Qmd](https://github.com/stephaniehicks/jhustatprogramming2022/blob/main/posts/2022-12-20-dealing-with-large-data/index.qmd)\\] | | \n| | | |\n| | Dec 22 | ❄️ Class is canceled | | \n| | | |\n| | Dec 23 | | 🍂 Project 4 due |\n:::\n\n\n", 5 | "supporting": [], 6 | "filters": [ 7 | "rmarkdown/pagebreak.lua" 8 | ], 9 | "includes": {}, 10 | "engineDependencies": {}, 11 | "preserve": {}, 12 | "postProcess": true 13 | } 14 | } -------------------------------------------------------------------------------- /_freeze/site_libs/clipboard/clipboard.min.js: -------------------------------------------------------------------------------- 1 | /*! 2 | * clipboard.js v2.0.10 3 | * https://clipboardjs.com/ 4 | * 5 | * Licensed MIT © Zeno Rocha 6 | */ 7 | !function(t,e){"object"==typeof exports&&"object"==typeof module?module.exports=e():"function"==typeof define&&define.amd?define([],e):"object"==typeof exports?exports.ClipboardJS=e():t.ClipboardJS=e()}(this,function(){return n={686:function(t,e,n){"use strict";n.d(e,{default:function(){return o}});var e=n(279),i=n.n(e),e=n(370),u=n.n(e),e=n(817),c=n.n(e);function a(t){try{return document.execCommand(t)}catch(t){return}}var f=function(t){t=c()(t);return a("cut"),t};var l=function(t){var e,n,o,r=1 { 5 | if (categoriesLoaded) { 6 | activateCategory(category); 7 | setCategoryHash(category); 8 | } 9 | }; 10 | 11 | window["quarto-listing-loaded"] = () => { 12 | // Process any existing hash 13 | const hash = getHash(); 14 | 15 | if (hash) { 16 | // If there is a category, switch to that 17 | if (hash.category) { 18 | activateCategory(hash.category); 19 | } 20 | // Paginate a specific listing 21 | const listingIds = Object.keys(window["quarto-listings"]); 22 | for (const listingId of listingIds) { 23 | const page = hash[getListingPageKey(listingId)]; 24 | if (page) { 25 | showPage(listingId, page); 26 | } 27 | } 28 | } 29 | 30 | const listingIds = Object.keys(window["quarto-listings"]); 31 | for (const listingId of listingIds) { 32 | // The actual list 33 | const list = window["quarto-listings"][listingId]; 34 | 35 | // Update the handlers for pagination events 36 | refreshPaginationHandlers(listingId); 37 | 38 | // Render any visible items that need it 39 | renderVisibleProgressiveImages(list); 40 | 41 | // Whenever the list is updated, we also need to 42 | // attach handlers to the new pagination elements 43 | // and refresh any newly visible items. 44 | list.on("updated", function () { 45 | renderVisibleProgressiveImages(list); 46 | setTimeout(() => refreshPaginationHandlers(listingId)); 47 | 48 | // Show or hide the no matching message 49 | toggleNoMatchingMessage(list); 50 | }); 51 | } 52 | }; 53 | 54 | window.document.addEventListener("DOMContentLoaded", function (_event) { 55 | // Attach click handlers to categories 56 | const categoryEls = window.document.querySelectorAll( 57 | ".quarto-listing-category .category" 58 | ); 59 | 60 | for (const categoryEl of categoryEls) { 61 | const category = categoryEl.getAttribute("data-category"); 62 | categoryEl.onclick = () => { 63 | activateCategory(category); 64 | setCategoryHash(category); 65 | }; 66 | } 67 | 68 | // Attach a click handler to the category title 69 | // (there should be only one, but since it is a class name, handle N) 70 | const categoryTitleEls = window.document.querySelectorAll( 71 | ".quarto-listing-category-title" 72 | ); 73 | for (const categoryTitleEl of categoryTitleEls) { 74 | categoryTitleEl.onclick = () => { 75 | activateCategory(""); 76 | setCategoryHash(""); 77 | }; 78 | } 79 | 80 | categoriesLoaded = true; 81 | }); 82 | 83 | function toggleNoMatchingMessage(list) { 84 | const selector = `#${list.listContainer.id} .listing-no-matching`; 85 | const noMatchingEl = window.document.querySelector(selector); 86 | if (noMatchingEl) { 87 | if (list.visibleItems.length === 0) { 88 | noMatchingEl.classList.remove("d-none"); 89 | } else { 90 | if (!noMatchingEl.classList.contains("d-none")) { 91 | noMatchingEl.classList.add("d-none"); 92 | } 93 | } 94 | } 95 | } 96 | 97 | function setCategoryHash(category) { 98 | setHash({ category }); 99 | } 100 | 101 | function setPageHash(listingId, page) { 102 | const currentHash = getHash() || {}; 103 | currentHash[getListingPageKey(listingId)] = page; 104 | setHash(currentHash); 105 | } 106 | 107 | function getListingPageKey(listingId) { 108 | return `${listingId}-page`; 109 | } 110 | 111 | function refreshPaginationHandlers(listingId) { 112 | const listingEl = window.document.getElementById(listingId); 113 | const paginationEls = listingEl.querySelectorAll( 114 | ".pagination li.page-item:not(.disabled) .page.page-link" 115 | ); 116 | for (const paginationEl of paginationEls) { 117 | paginationEl.onclick = (sender) => { 118 | setPageHash(listingId, sender.target.getAttribute("data-i")); 119 | showPage(listingId, sender.target.getAttribute("data-i")); 120 | return false; 121 | }; 122 | } 123 | } 124 | 125 | function renderVisibleProgressiveImages(list) { 126 | // Run through the visible items and render any progressive images 127 | for (const item of list.visibleItems) { 128 | const itemEl = item.elm; 129 | if (itemEl) { 130 | const progressiveImgs = itemEl.querySelectorAll( 131 | `img[${kProgressiveAttr}]` 132 | ); 133 | for (const progressiveImg of progressiveImgs) { 134 | const srcValue = progressiveImg.getAttribute(kProgressiveAttr); 135 | if (srcValue) { 136 | progressiveImg.setAttribute("src", srcValue); 137 | } 138 | progressiveImg.removeAttribute(kProgressiveAttr); 139 | } 140 | } 141 | } 142 | } 143 | 144 | function getHash() { 145 | // Hashes are of the form 146 | // #name:value|name1:value1|name2:value2 147 | const currentUrl = new URL(window.location); 148 | const hashRaw = currentUrl.hash ? currentUrl.hash.slice(1) : undefined; 149 | return parseHash(hashRaw); 150 | } 151 | 152 | const kAnd = "&"; 153 | const kEquals = "="; 154 | 155 | function parseHash(hash) { 156 | if (!hash) { 157 | return undefined; 158 | } 159 | const hasValuesStrs = hash.split(kAnd); 160 | const hashValues = hasValuesStrs 161 | .map((hashValueStr) => { 162 | const vals = hashValueStr.split(kEquals); 163 | if (vals.length === 2) { 164 | return { name: vals[0], value: vals[1] }; 165 | } else { 166 | return undefined; 167 | } 168 | }) 169 | .filter((value) => { 170 | return value !== undefined; 171 | }); 172 | 173 | const hashObj = {}; 174 | hashValues.forEach((hashValue) => { 175 | hashObj[hashValue.name] = decodeURIComponent(hashValue.value); 176 | }); 177 | return hashObj; 178 | } 179 | 180 | function makeHash(obj) { 181 | return Object.keys(obj) 182 | .map((key) => { 183 | return `${key}${kEquals}${obj[key]}`; 184 | }) 185 | .join(kAnd); 186 | } 187 | 188 | function setHash(obj) { 189 | const hash = makeHash(obj); 190 | window.history.pushState(null, null, `#${hash}`); 191 | } 192 | 193 | function showPage(listingId, page) { 194 | const list = window["quarto-listings"][listingId]; 195 | if (list) { 196 | list.show((page - 1) * list.page + 1, list.page); 197 | } 198 | } 199 | 200 | function activateCategory(category) { 201 | // Deactivate existing categories 202 | const activeEls = window.document.querySelectorAll( 203 | ".quarto-listing-category .category.active" 204 | ); 205 | for (const activeEl of activeEls) { 206 | activeEl.classList.remove("active"); 207 | } 208 | 209 | // Activate this category 210 | const categoryEl = window.document.querySelector( 211 | `.quarto-listing-category .category[data-category='${category}'` 212 | ); 213 | if (categoryEl) { 214 | categoryEl.classList.add("active"); 215 | } 216 | 217 | // Filter the listings to this category 218 | filterListingCategory(category); 219 | } 220 | 221 | function filterListingCategory(category) { 222 | const listingIds = Object.keys(window["quarto-listings"]); 223 | for (const listingId of listingIds) { 224 | const list = window["quarto-listings"][listingId]; 225 | if (list) { 226 | if (category === "") { 227 | // resets the filter 228 | list.filter(); 229 | } else { 230 | // filter to this category 231 | list.filter(function (item) { 232 | const itemValues = item.values(); 233 | if (itemValues.categories !== null) { 234 | const categories = itemValues.categories.split(","); 235 | return categories.includes(category); 236 | } else { 237 | return false; 238 | } 239 | }); 240 | } 241 | } 242 | } 243 | } 244 | -------------------------------------------------------------------------------- /_post_template.qmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Add title" 3 | author: 4 | - name: Stephanie Hicks 5 | url: https://stephaniehicks.com 6 | affiliation: Department of Biostatistics, Johns Hopkins 7 | affiliation_url: https://publichealth.jhu.edu 8 | description: "Add title" 9 | date: 2022-12-01 10 | draft: true 11 | categories: [add here] 12 | --- 13 | 14 | 15 | 16 | # Pre-lecture materials 17 | 18 | ### Read ahead 19 | 20 | ::: callout-note 21 | ## Read ahead 22 | 23 | **Before class, you can prepare by reading the following materials:** 24 | 25 | 1. Add here. 26 | 2. Add here. 27 | ::: 28 | 29 | ### Acknowledgements 30 | 31 | Material for this lecture was borrowed and adopted from 32 | 33 | - Add here. 34 | 35 | # Learning objectives 36 | 37 | ::: callout-note 38 | # Learning objectives 39 | 40 | **At the end of this lesson you will:** 41 | 42 | - Add here. 43 | ::: 44 | 45 | # Add lecture here 46 | 47 | # Post-lecture materials 48 | 49 | ### Final Questions 50 | 51 | Here are some post-lecture questions to help you think about the material discussed. 52 | 53 | ::: callout-note 54 | ### Questions 55 | 56 | 1. Add here. 57 | ::: 58 | 59 | ### Additional Resources 60 | 61 | ::: callout-tip 62 | - Add here. 63 | ::: 64 | -------------------------------------------------------------------------------- /_quarto.yml: -------------------------------------------------------------------------------- 1 | project: 2 | type: website 3 | render: 4 | - "*.qmd" 5 | - "!posts/2022-12-13-flexdashboard/*.Rmd" 6 | 7 | website: 8 | title: "Statistical Programming Paradigms and Workflows (BSPH 140.840)" 9 | description: "test test" 10 | navbar: 11 | right: 12 | - text: "Home" 13 | file: index.qmd 14 | - text: "General Information" 15 | menu: 16 | - text: "Syllabus" 17 | href: syllabus.qmd 18 | - text: "Schedule" 19 | href: schedule.qmd 20 | - text: "Course Materials" 21 | menu: 22 | - text: "Lectures" 23 | href: lectures.qmd 24 | - text: "Projects" 25 | href: projects.qmd 26 | - text: "Resources" 27 | href: resources.qmd 28 | - icon: github 29 | href: https://github.com/ 30 | - icon: twitter 31 | href: https://twitter.com 32 | format: 33 | html: 34 | theme: simplex 35 | toc: true 36 | css: styles.css 37 | code-overflow: wrap 38 | 39 | editor: source 40 | 41 | execute: 42 | freeze: auto 43 | -------------------------------------------------------------------------------- /data/Chinook.sqlite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatprogramming2022/56ba8223305d93ac9da8e88f4673e924cae4faa0/data/Chinook.sqlite -------------------------------------------------------------------------------- /data/nycflights13/nycflights13.sqlite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatprogramming2022/56ba8223305d93ac9da8e88f4673e924cae4faa0/data/nycflights13/nycflights13.sqlite -------------------------------------------------------------------------------- /images/cool_icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatprogramming2022/56ba8223305d93ac9da8e88f4673e924cae4faa0/images/cool_icon.png -------------------------------------------------------------------------------- /images/course_img.R: -------------------------------------------------------------------------------- 1 | # devtools::install_github("koenderks/aRtsy") 2 | 3 | library(aRtsy) 4 | canvas_stripes(colors = colorPalette("sky"), n=300, H = .3) 5 | 6 | -------------------------------------------------------------------------------- /images/course_img.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatprogramming2022/56ba8223305d93ac9da8e88f4673e924cae4faa0/images/course_img.png -------------------------------------------------------------------------------- /images/quarto_schematic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatprogramming2022/56ba8223305d93ac9da8e88f4673e924cae4faa0/images/quarto_schematic.png -------------------------------------------------------------------------------- /images/rstudio-hello.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatprogramming2022/56ba8223305d93ac9da8e88f4673e924cae4faa0/images/rstudio-hello.png -------------------------------------------------------------------------------- /images/rstudio-qmd-how-it-works.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatprogramming2022/56ba8223305d93ac9da8e88f4673e924cae4faa0/images/rstudio-qmd-how-it-works.png -------------------------------------------------------------------------------- /images/rstudio-render-button.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatprogramming2022/56ba8223305d93ac9da8e88f4673e924cae4faa0/images/rstudio-render-button.png -------------------------------------------------------------------------------- /images/rstudio-render-on-save.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatprogramming2022/56ba8223305d93ac9da8e88f4673e924cae4faa0/images/rstudio-render-on-save.png -------------------------------------------------------------------------------- /images/rstudio-render.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatprogramming2022/56ba8223305d93ac9da8e88f4673e924cae4faa0/images/rstudio-render.png -------------------------------------------------------------------------------- /images/rstudio-source-visual.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatprogramming2022/56ba8223305d93ac9da8e88f4673e924cae4faa0/images/rstudio-source-visual.png -------------------------------------------------------------------------------- /images/screen/BuildMenu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatprogramming2022/56ba8223305d93ac9da8e88f4673e924cae4faa0/images/screen/BuildMenu.png -------------------------------------------------------------------------------- /images/screen/BuildPackage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatprogramming2022/56ba8223305d93ac9da8e88f4673e924cae4faa0/images/screen/BuildPackage.png -------------------------------------------------------------------------------- /images/screen/BuildSourcePackage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatprogramming2022/56ba8223305d93ac9da8e88f4673e924cae4faa0/images/screen/BuildSourcePackage.png -------------------------------------------------------------------------------- /images/screen/ConfigureBuild.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatprogramming2022/56ba8223305d93ac9da8e88f4673e924cae4faa0/images/screen/ConfigureBuild.png -------------------------------------------------------------------------------- /images/screen/ConfigureButton.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatprogramming2022/56ba8223305d93ac9da8e88f4673e924cae4faa0/images/screen/ConfigureButton.png -------------------------------------------------------------------------------- /images/screen/EmptyRScript.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatprogramming2022/56ba8223305d93ac9da8e88f4673e924cae4faa0/images/screen/EmptyRScript.png -------------------------------------------------------------------------------- /images/screen/FileMenu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatprogramming2022/56ba8223305d93ac9da8e88f4673e924cae4faa0/images/screen/FileMenu.png -------------------------------------------------------------------------------- /images/screen/HelloPackage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatprogramming2022/56ba8223305d93ac9da8e88f4673e924cae4faa0/images/screen/HelloPackage.png -------------------------------------------------------------------------------- /images/screen/InstallRestart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatprogramming2022/56ba8223305d93ac9da8e88f4673e924cae4faa0/images/screen/InstallRestart.png -------------------------------------------------------------------------------- /images/screen/NewDirectory.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatprogramming2022/56ba8223305d93ac9da8e88f4673e924cae4faa0/images/screen/NewDirectory.png -------------------------------------------------------------------------------- /images/screen/NewPackageOpening.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatprogramming2022/56ba8223305d93ac9da8e88f4673e924cae4faa0/images/screen/NewPackageOpening.png -------------------------------------------------------------------------------- /images/screen/NewProject.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatprogramming2022/56ba8223305d93ac9da8e88f4673e924cae4faa0/images/screen/NewProject.png -------------------------------------------------------------------------------- /images/screen/PackageBuilt.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatprogramming2022/56ba8223305d93ac9da8e88f4673e924cae4faa0/images/screen/PackageBuilt.png -------------------------------------------------------------------------------- /images/screen/PackageName.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatprogramming2022/56ba8223305d93ac9da8e88f4673e924cae4faa0/images/screen/PackageName.png -------------------------------------------------------------------------------- /images/screen/SaveRScript.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatprogramming2022/56ba8223305d93ac9da8e88f4673e924cae4faa0/images/screen/SaveRScript.png -------------------------------------------------------------------------------- /images/screen/UseDevTools.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatprogramming2022/56ba8223305d93ac9da8e88f4673e924cae4faa0/images/screen/UseDevTools.png -------------------------------------------------------------------------------- /images/shiny.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatprogramming2022/56ba8223305d93ac9da8e88f4673e924cae4faa0/images/shiny.png -------------------------------------------------------------------------------- /images/ssh_protocol.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatprogramming2022/56ba8223305d93ac9da8e88f4673e924cae4faa0/images/ssh_protocol.png -------------------------------------------------------------------------------- /images/targets/Addins.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatprogramming2022/56ba8223305d93ac9da8e88f4673e924cae4faa0/images/targets/Addins.png -------------------------------------------------------------------------------- /images/targets/ModifyShortcuts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatprogramming2022/56ba8223305d93ac9da8e88f4673e924cae4faa0/images/targets/ModifyShortcuts.png -------------------------------------------------------------------------------- /images/targets/ShortcutsMenu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatprogramming2022/56ba8223305d93ac9da8e88f4673e924cae4faa0/images/targets/ShortcutsMenu.png -------------------------------------------------------------------------------- /images/terminal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatprogramming2022/56ba8223305d93ac9da8e88f4673e924cae4faa0/images/terminal.png -------------------------------------------------------------------------------- /index.qmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Welcome to Statistical Programming Paradigms and Workflows!" 3 | image: images/course_img.png 4 | about: 5 | template: jolla 6 | links: 7 | - icon: twitter 8 | text: Twitter 9 | href: https://twitter.com/stephaniehicks 10 | - icon: github 11 | text: Github 12 | href: https://github.com/stephaniehicks 13 | --- 14 | 15 | Welcome to Statistical Programming Paradigms and Workflows at Johns Hopkins Bloomberg School of Public Health! 16 | 17 | ## What is this course? 18 | 19 | This course covers advanced statistical computing programming paradigms and workflows required for the research and application of statistical methods. Includes the basics of programming in unix and/or using command-line tools, introduction to version control, advanced R and tidyverse skills, introduction to creating R packages with documentation, working with relational databases, introduction to functional programming, getting and using data from APIs, introduction to Shiny and dashboards. Topics in statistical data analysis provide working examples. 20 | 21 | ## Getting started 22 | 23 | I suggest that you start by looking over the [Syllabus](syllabus.qmd) and [Schedule](schedule.qmd) under **General Information**. After that, start with the Lectures content in the given order. 24 | 25 | ## Acknowledgements 26 | 27 | This course was developed and is maintained by [Stephanie Hicks](https://www.stephaniehicks.com). 28 | 29 | The following individuals have contributed to improving the course or materials have been adapted from their courses: [Roger D. Peng](https://github.com/rdpeng), [Andreas Handel](https://www.andreashandel.com), [Naim Rashid](https://naimurashid.github.io), [Michael Love](https://github.com/mikelove). 30 | 31 | The image above was generated with [aRtsy](https://github.com/koenderks/aRtsy). 32 | 33 | The course materials are licensed under the [Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License](https://creativecommons.org/licenses/by-nc-sa/4.0/). Linked and embedded materials are governed by their own licenses. I assume that all external materials used or embedded here are covered under the educational fair use policy. If this is not the case and any material displayed here violates copyright, please let me know and I will remove it. 34 | -------------------------------------------------------------------------------- /jhustatprogramming2022.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | -------------------------------------------------------------------------------- /lectures.qmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Lectures" 3 | listing: 4 | contents: posts 5 | sort: "date desc" 6 | type: default 7 | categories: true 8 | sort-ui: false 9 | filter-ui: false 10 | page-layout: full 11 | title-block-banner: false 12 | --- 13 | 14 | -------------------------------------------------------------------------------- /posts/2022-10-27-build-website/diamond-sizes.qmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Diamond sizes" 3 | date: 2022-09-12 4 | format: html 5 | draft: true 6 | --- 7 | 8 | ```{r} 9 | #| label: setup 10 | #| include: false 11 | library(tidyverse) 12 | smaller <- diamonds |> 13 | filter(carat <= 2.5) 14 | ``` 15 | 16 | We have data about `r nrow(diamonds)` diamonds. 17 | Only `r nrow(diamonds) - nrow(smaller)` are larger than 2.5 carats. 18 | The distribution of the remainder is shown below: 19 | 20 | ```{r} 21 | #| label: plot-smaller-diamonds 22 | #| echo: false 23 | smaller |> 24 | ggplot(aes(carat)) + 25 | geom_freqpoly(binwidth = 0.01) 26 | ``` 27 | -------------------------------------------------------------------------------- /posts/2022-11-01-command-line-part-1/analysis.R: -------------------------------------------------------------------------------- 1 | head(mtcars) -------------------------------------------------------------------------------- /posts/2022-11-01-command-line-part-1/combined_names.txt: -------------------------------------------------------------------------------- 1 | Funny-looking Kid 2 | Unsuffered Consequences 3 | Great Pumpkin 4 | December Snowflakes 5 | Gift-Getting Season 6 | Easter Beagle 7 | Roasted Marshmallows 8 | Trick or Treat 9 | Security Blanket 10 | Masked Marvel 11 | Good Sport 12 | Frisbee Sailing 13 | Warm Puppy 14 | Spring Dance 15 | Sock it to Me 16 | Pumpkin Helmet 17 | Smooth Sidewalk 18 | Full of Ingredients 19 | World-Famous Astronaut 20 | Fire Safety 21 | Wooden Christmas Tree 22 | Very Secure Dishes 23 | Very, Very Secure Dishes 24 | Supposedly Educational 25 | Bug in Your Hair 26 | Sincere Pumpkin Patch 27 | Another Canoe 28 | You Stupid Darkness 29 | Single Candle 30 | Short Summer 31 | Kite Eating Tree 32 | -------------------------------------------------------------------------------- /posts/2022-11-01-command-line-part-1/package_names.txt: -------------------------------------------------------------------------------- 1 | 14. RJDBC 2 | 36. mlflow 3 | 58. alfr 4 | 10. aweek 5 | 31. BIGDAWG 6 | 22. vqtl 7 | 29. sspline 8 | 64. viridisLite 9 | 39. mev 10 | 66. SuppDists 11 | 15. MIAmaxent 12 | 31. BIGDAWG 13 | 29. sspline 14 | 99. Survgini 15 | 30. logNormReg 16 | 27. gLRTH 17 | 60. Eagle 18 | 71. kfigr 19 | 83. WPKDE 20 | 72. overlapping 21 | 90. widyr 22 | 33. tailr 23 | 40. MaxentVariableSelection 24 | 11. hdnom 25 | 26. blink 26 | 33. tailr 27 | 72. overlapping 28 | 18. gazepath 29 | 52. ClimMobTools 30 | 16. randtests 31 | 12. ltxsparklines 32 | 91. rgw 33 | 35. fermicatsR 34 | 44. expstudies 35 | 65. mined 36 | 81. mgcViz 37 | 21. corclass 38 | 68. AzureStor 39 | 45. solitude 40 | 9. pAnalysis 41 | 42. OptimaRegion 42 | 61. PropScrRand 43 | 74. crsra 44 | 65. mined 45 | 94. ICAOD 46 | 48. geoknife 47 | 45. solitude 48 | 67. tictactoe 49 | 46. cbsem 50 | 93. PathSelectMP 51 | 80. SpatioTemporal 52 | 23. disparityfilter 53 | 96. poisbinom 54 | 49. SemiParSampleSel 55 | 76. errorlocate 56 | 17. ASIP 57 | 88. SphericalK 58 | 5. pls 59 | 84. BIOMASS 60 | 59. AdMit 61 | 28. splithalfr 62 | 89. foretell 63 | 25. RPyGeo 64 | 50. mbir 65 | 51. interplot 66 | 6. BinOrdNonNor 67 | 47. SMARTp 68 | 38. BenfordTests 69 | 79. mvShapiroTest 70 | 77. SetMethods 71 | 92. BioCircos 72 | 53. MVB 73 | 2. odk 74 | 86. mongolite 75 | 55. hindexcalculator 76 | 4. TIMP 77 | 97. AnalyzeTS 78 | 41. rstudioapi 79 | 87. WGScan 80 | 57. generalhoslem 81 | 63. dagitty 82 | 69. FField 83 | 13. MaXact 84 | 24. SCRT 85 | 95. TSeriesMMA 86 | 73. VineCopula 87 | 82. breakfast 88 | 7. bayesbio 89 | 34. ibd 90 | 8. MVTests 91 | 19. mcmcabn 92 | 43. accept 93 | 78. sybilccFBA 94 | 62. lue 95 | 100. addhaz 96 | 56. MARSS 97 | 70. rsed 98 | 68. AzureStor 99 | 37. CombinePValue 100 | 85. edfReader 101 | 20. rless 102 | 75. pmdplyr 103 | 32. SPEDInstabR 104 | 3. redcapAPI 105 | 1. cyclocomp 106 | 70. rsed 107 | 98. SmallCountRounding 108 | 54. OxyBS 109 | -------------------------------------------------------------------------------- /posts/2022-11-01-command-line-part-1/r_release.txt: -------------------------------------------------------------------------------- 1 | Funny-looking Kid 2 | -------------------------------------------------------------------------------- /posts/2022-11-01-command-line-part-1/release_names.txt: -------------------------------------------------------------------------------- 1 | Unsuffered Consequences 2 | Great Pumpkin 3 | December Snowflakes 4 | Gift-Getting Season 5 | Easter Beagle 6 | Roasted Marshmallows 7 | Trick or Treat 8 | Security Blanket 9 | Masked Marvel 10 | Good Sport 11 | Frisbee Sailing 12 | Warm Puppy 13 | Spring Dance 14 | Sock it to Me 15 | Pumpkin Helmet 16 | Smooth Sidewalk 17 | Full of Ingredients 18 | World-Famous Astronaut 19 | Fire Safety 20 | Wooden Christmas Tree 21 | Very Secure Dishes 22 | Very, Very Secure Dishes 23 | Supposedly Educational 24 | Bug in Your Hair 25 | Sincere Pumpkin Patch 26 | Another Canoe 27 | You Stupid Darkness 28 | Single Candle 29 | Short Summer 30 | Kite Eating Tree 31 | -------------------------------------------------------------------------------- /posts/2022-11-01-command-line-part-1/secret_directory/team_standings.csv: -------------------------------------------------------------------------------- 1 | "Standing","Team" 2 | 1,"Spain" 3 | 2,"Netherlands" 4 | 3,"Germany" 5 | 4,"Uruguay" 6 | 5,"Argentina" 7 | 6,"Brazil" 8 | 7,"Ghana" 9 | 8,"Paraguay" 10 | 9,"Japan" 11 | 10,"Chile" 12 | 11,"Portugal" 13 | 12,"USA" 14 | 13,"England" 15 | 14,"Mexico" 16 | 15,"South Korea" 17 | 16,"Slovakia" 18 | 17,"Ivory Coast" 19 | 18,"Slovenia" 20 | 19,"Switzerland" 21 | 20,"South Africa" 22 | 21,"Australia" 23 | 22,"New Zealand" 24 | 23,"Serbia" 25 | 24,"Denmark" 26 | 25,"Greece" 27 | 26,"Italy" 28 | 27,"Nigeria" 29 | 28,"Algeria" 30 | 29,"France" 31 | 30,"Honduras" 32 | 31,"Cameroon" 33 | 32,"North Korea" 34 | -------------------------------------------------------------------------------- /posts/2022-11-01-command-line-part-1/soccer_directory/team_standings.csv: -------------------------------------------------------------------------------- 1 | "Standing","Team" 2 | 1,"Spain" 3 | 2,"Netherlands" 4 | 3,"Germany" 5 | 4,"Uruguay" 6 | 5,"Argentina" 7 | 6,"Brazil" 8 | 7,"Ghana" 9 | 8,"Paraguay" 10 | 9,"Japan" 11 | 10,"Chile" 12 | 11,"Portugal" 13 | 12,"USA" 14 | 13,"England" 15 | 14,"Mexico" 16 | 15,"South Korea" 17 | 16,"Slovakia" 18 | 17,"Ivory Coast" 19 | 18,"Slovenia" 20 | 19,"Switzerland" 21 | 20,"South Africa" 22 | 21,"Australia" 23 | 22,"New Zealand" 24 | 23,"Serbia" 25 | 24,"Denmark" 26 | 25,"Greece" 27 | 26,"Italy" 28 | 27,"Nigeria" 29 | 28,"Algeria" 30 | 29,"France" 31 | 30,"Honduras" 32 | 31,"Cameroon" 33 | 32,"North Korea" 34 | -------------------------------------------------------------------------------- /posts/2022-11-01-command-line-part-1/team_standings.csv: -------------------------------------------------------------------------------- 1 | "Standing","Team" 2 | 1,"Spain" 3 | 2,"Netherlands" 4 | 3,"Germany" 5 | 4,"Uruguay" 6 | 5,"Argentina" 7 | 6,"Brazil" 8 | 7,"Ghana" 9 | 8,"Paraguay" 10 | 9,"Japan" 11 | 10,"Chile" 12 | 11,"Portugal" 13 | 12,"USA" 14 | 13,"England" 15 | 14,"Mexico" 16 | 15,"South Korea" 17 | 16,"Slovakia" 18 | 17,"Ivory Coast" 19 | 18,"Slovenia" 20 | 19,"Switzerland" 21 | 20,"South Africa" 22 | 21,"Australia" 23 | 22,"New Zealand" 24 | 23,"Serbia" 25 | 24,"Denmark" 26 | 25,"Greece" 27 | 26,"Italy" 28 | 27,"Nigeria" 29 | 28,"Algeria" 30 | 29,"France" 31 | 30,"Honduras" 32 | 31,"Cameroon" 33 | 32,"North Korea" 34 | -------------------------------------------------------------------------------- /posts/2022-11-01-command-line-part-1/team_standings_3.csv: -------------------------------------------------------------------------------- 1 | "Standing","Team" 2 | 1,"Spain" 3 | 2,"Netherlands" 4 | 3,"Germany" 5 | 4,"Uruguay" 6 | 5,"Argentina" 7 | 6,"Brazil" 8 | 7,"Ghana" 9 | 8,"Paraguay" 10 | 9,"Japan" 11 | 10,"Chile" 12 | 11,"Portugal" 13 | 12,"USA" 14 | 13,"England" 15 | 14,"Mexico" 16 | 15,"South Korea" 17 | 16,"Slovakia" 18 | 17,"Ivory Coast" 19 | 18,"Slovenia" 20 | 19,"Switzerland" 21 | 20,"South Africa" 22 | 21,"Australia" 23 | 22,"New Zealand" 24 | 23,"Serbia" 25 | 24,"Denmark" 26 | 25,"Greece" 27 | 26,"Italy" 28 | 27,"Nigeria" 29 | 28,"Algeria" 30 | 29,"France" 31 | 30,"Honduras" 32 | 31,"Cameroon" 33 | 32,"North Korea" 34 | -------------------------------------------------------------------------------- /posts/2022-11-03-command-line-part-2/creatures/basilisk.dat: -------------------------------------------------------------------------------- 1 | COMMON NAME: basilisk 2 | CLASSIFICATION: basiliscus vulgaris 3 | UPDATED: 1745-05-02 4 | CCCCAACGAG 5 | GAAACAGATC 6 | ATTAGAAGAT 7 | CTGTCGCGAA 8 | CCGCACCTCT 9 | CCTATCTACA 10 | TGTTTGTCTC 11 | TGGGTGGGGA 12 | TCCATAGGCA 13 | GCATTACCAG 14 | CACCCTACGA 15 | TAAGGACTTC 16 | CGTCAGAGAT 17 | TTCCTGGTAT 18 | TATACAGCTC 19 | CTAGTGTTAT 20 | CCAGTTTGTG 21 | TCGTCCCATA 22 | GCCAGCAAGA 23 | GCCAAACAAA 24 | AGCCGGGTCG 25 | CTTTACCTTA 26 | AAGCCGAGGG 27 | GGGTGGTACG 28 | CCGAACATAA 29 | ACGCTTTAAC 30 | GTCCCTCCAG 31 | GCTGATAATC 32 | GTTTAAGCAC 33 | ACGTGGTCTA 34 | GAGTTGCGCT 35 | TACCGGACAA 36 | ACCTGCGCGT 37 | TGAAAGGGTT 38 | ACGCCTGGTA 39 | CGAAATAAGA 40 | CGAACCCCAG 41 | GACCCAGCAG 42 | ACAAAGGAAC 43 | GTGCTAGGCC 44 | CATATAATCA 45 | GGTAGATCGA 46 | TCTCTCTCCT 47 | AAGTTGTGGT 48 | CAAACAGGCG 49 | CGCGCTAGTT 50 | GGGTATGCCC 51 | GCCCAATAAC 52 | TGGTGGGGCC 53 | TGTGTGTAAA 54 | CTTCGATCAA 55 | TTTTGCAATT 56 | TATGTGCAGC 57 | TAACTGAATA 58 | TGTGATGTGT 59 | TGGAACCCGG 60 | ACTTCCTTTA 61 | CATTTCGCGC 62 | CCTAGTGTTT 63 | CAAGCGTCGC 64 | TGAGGTTATG 65 | ATTTATAGGA 66 | CACACTCAGC 67 | CGCGAACACA 68 | CAGTGTCATG 69 | ACTAAGTAGA 70 | ATCACCCATA 71 | TTCTCGTTAG 72 | TGCGATTGTA 73 | CCGCTTGAAC 74 | CTCGTATGCC 75 | TGGACCCGCT 76 | TCGATTTACA 77 | GCAATTCGAC 78 | TGGTGGTGAT 79 | TAACTTGAAG 80 | GATATGGTTT 81 | CGGTACCGAA 82 | AAGGGTCGCG 83 | CAAGTGTTCC 84 | CGGGACAATA 85 | GTTCTGCTAA 86 | GATAAGTATG 87 | TGCCGACTTA 88 | CCCGACCGTC 89 | TAGGTTATAA 90 | GGCACAACCG 91 | CTTCACTGTA 92 | GAGGTGTACA 93 | AGGATCCGTT 94 | GCGCGGGCGG 95 | CAGTCTATGT 96 | TTTTCGACAC 97 | TGGACTGCTT 98 | CCCTTTGAGG 99 | GTGGATTTTT 100 | CGTAACGGGT 101 | GAGTCCCTTG 102 | TGATTCCAGA 103 | CACGGGTTGC 104 | CGGGCGCTAC 105 | CCGCCGACGC 106 | CAGGCTTGGC 107 | AAGAGCGGTA 108 | TAAGGGCTCG 109 | CTACCACTTG 110 | TACTAGACTG 111 | ATTTGCGGAC 112 | GGTAGTAGCG 113 | CGGTGATCCC 114 | TATCATACGG 115 | GGTTTGGTGT 116 | ATGAGCCCTG 117 | CTGTCTTTTC 118 | TAGCGTACCA 119 | CAGATTGAAC 120 | TCCTCTTCTC 121 | GCGTTTCAGC 122 | GGTTAAAATT 123 | GCGATCCTCT 124 | CAGACCTTTG 125 | GTTGCCTTCG 126 | GTCCTTCCAC 127 | GCCTTCCTAT 128 | GATAGTAGCC 129 | TCACCTTCCA 130 | TCTCAAATCC 131 | CTCCCGCGAC 132 | CTTTGATTGT 133 | ACCGTTCAAG 134 | CCCTGACATA 135 | CCCACTCAAT 136 | ATTGACCCCT 137 | GCCGTATAGA 138 | CGTTGTCGGG 139 | ATAGCCAGAA 140 | CCAGTACTCT 141 | CTATCGGAAG 142 | CGTGACAGGC 143 | CCAAATACCT 144 | CGTTTAAAGC 145 | TGGACGCATC 146 | ACACTTGCTT 147 | GACTAACAGA 148 | GTGCTGGCCC 149 | GTGTTATTCA 150 | GCTAATGCAT 151 | CATGGGATCA 152 | GACGCCTGTA 153 | TATACAATTT 154 | TCAACTGCTC 155 | GAACTACTCG 156 | CCAAAACACT 157 | TTCGACTAGG 158 | CGATCGACCA 159 | AGAAAAGAGA 160 | CTGGACTGCT 161 | TACAGAGGTT 162 | TGAACTTTAC 163 | CGCGCCCACC 164 | -------------------------------------------------------------------------------- /posts/2022-11-03-command-line-part-2/creatures/minotaur.dat: -------------------------------------------------------------------------------- 1 | COMMON NAME: minotaur 2 | CLASSIFICATION: bos hominus 3 | UPDATED: 1765-02-17 4 | CCCGAAGGAC 5 | CGACATCTCT 6 | TCTGAGCCCC 7 | GTCAAAACAC 8 | TTGAATCGTC 9 | TGCGGGTCGC 10 | TGCCGGTGTC 11 | TATCAGTCCC 12 | CACGTTCTCG 13 | GAAGCCCCCG 14 | TTAACCGGCA 15 | ATATGAGTCC 16 | GATCATATCG 17 | AGAGCGAGGT 18 | GTACCCGGGC 19 | CTTTTCTGCA 20 | GCTCTGCAGC 21 | TACCTATGCG 22 | GACCCCGGTC 23 | TTCGATTGTT 24 | GACTACCTCT 25 | GCTAGGCATT 26 | CCGTTACCAA 27 | CAGAATCAGA 28 | AGTCACTATA 29 | ACTGAGTCAC 30 | GGTTTCTCCC 31 | AGCAACGGTT 32 | GAAAGGGTTA 33 | ACTGGGACTT 34 | TCACGGATTG 35 | GGTCGTTTAT 36 | TATCCGCTCC 37 | CCTGTGTTAG 38 | AGCTCAGTCC 39 | ATTGGACTCG 40 | TCTCGTCAAT 41 | GACATTTCGC 42 | ATATAAATTA 43 | CCGTCCAGGG 44 | TCGCATTTAT 45 | ATCTTAGTCT 46 | GAGGGATGGC 47 | TAAGTCTCTC 48 | TCGGCTTGAT 49 | CGGTCTGCAT 50 | TGTAATTAAA 51 | TGTAACTCCG 52 | TCTTACCTCG 53 | GACAATTGAG 54 | CGCTGGGTAA 55 | TATTACGTAG 56 | CATCTATTTA 57 | AAGAGGCTCA 58 | CCCGGTCAAA 59 | AGAGACTGAC 60 | CGGCCGGACG 61 | AGCGTTGAAC 62 | GGACCTCCTT 63 | CAGGCCTATG 64 | TCTCTCTAGC 65 | CAATAACAGG 66 | TCAAGTCCGA 67 | TATCGCACAA 68 | TTATCTGCTA 69 | ATAATCCATA 70 | AGACGGGATG 71 | TTGGGAAAAA 72 | ATCGGAGTTC 73 | ATTGCCAGGA 74 | AAGCATTGAA 75 | GTGGTTCCCG 76 | GCCACTAGTA 77 | CAGTCGACGC 78 | TGCGAGAGTA 79 | CGATCAGGCG 80 | CCCTAGGACG 81 | GGTTGGACCG 82 | AGTGAACGAA 83 | GCGATTGATG 84 | CCCAGTATTG 85 | GTTTCGGGTC 86 | CCACCAGGAA 87 | CAATGTTGGA 88 | CTTTAGCACG 89 | ACAGGAATCA 90 | ATCGTTTCGT 91 | AATGGTGGCA 92 | CTTTGGGGTT 93 | GCAGGCCCGC 94 | CTGTGCCTTC 95 | AACGGTCGGC 96 | AAATAGGACA 97 | GAGATTTCCT 98 | GCCTCAAACA 99 | TCGCAAGACG 100 | ATTTATCCGG 101 | GAGTCCCAGT 102 | GGGTTCTCGA 103 | GTTTAAGCTG 104 | CAAGCATCCT 105 | GTTACCCCGA 106 | GTGCTGACAG 107 | GCACGAGACC 108 | AGGATTTATC 109 | GGGCACGCGC 110 | CCATCTGTAA 111 | TTTTGTTTTG 112 | GCGTAGAGGC 113 | ATTGCAATTT 114 | AAAAGGTCTC 115 | TTGCGCTGCC 116 | AGTAAGGTCC 117 | CAACGAAGAC 118 | TAGCCCGATC 119 | TCTGCATTCA 120 | AGAGTGACTT 121 | GTCGGTACCG 122 | GCTTAATTAC 123 | AGAGCCGGTC 124 | TCCATTCGAG 125 | ATGAGCATTC 126 | ACATGCGGGG 127 | ATGCTGAGAG 128 | GTTACCAGCG 129 | TGGGGGGCGG 130 | GCTAGCTGAG 131 | TGACTAGCAA 132 | TGTACTCACA 133 | CATATCGCAA 134 | GTAGGGAAGC 135 | ACCTCCTCCT 136 | TTGTTCGTCA 137 | CGGACGTGTC 138 | TAAGGGTAAT 139 | GGCAAGTGCT 140 | TCGATCGCTC 141 | AATTTTCGTA 142 | TATTTACCAC 143 | ACAAATAAGG 144 | TATACCCCTC 145 | GCCAACAATA 146 | ACCTGAATAG 147 | CGGAGCGCTC 148 | CGGTCTCCGC 149 | AATGAATCCT 150 | GGTCACAAAA 151 | CGATGCGGCA 152 | ATTGGGGCTA 153 | ATGGGATCCC 154 | GCATACCGCC 155 | ATTCCTCTCC 156 | CCTTACTTAG 157 | TATTGTGCTG 158 | AATACGACAT 159 | ATAGGCATTA 160 | AGGCACTCGT 161 | CTATAATGAG 162 | TCCAGTCCCA 163 | GCCTTCACGG 164 | -------------------------------------------------------------------------------- /posts/2022-11-03-command-line-part-2/creatures/unicorn.dat: -------------------------------------------------------------------------------- 1 | COMMON NAME: unicorn 2 | CLASSIFICATION: equus monoceros 3 | UPDATED: 1738-11-24 4 | AGCCGGGTCG 5 | CTTTACCTTA 6 | AAGCCGAGGG 7 | GGGTGGTACG 8 | CCGAACATAA 9 | ACGCTTTAAC 10 | GTCCCTCCAG 11 | GCTGATAATC 12 | GTTTAAGCAC 13 | ACGTGGTCTA 14 | GAGTTGCGCT 15 | TACCGGACAA 16 | ACCTGCGCGT 17 | TGAAAGGGTT 18 | ACGCCTGGTA 19 | CGAAATAAGA 20 | CGAACCCCAG 21 | GACCCAGCAG 22 | ACAAAGGAAC 23 | GTGCTAGGCC 24 | AGCCGGGTCG 25 | CTTTACCTTA 26 | AAGCCGAGGG 27 | GGGTGGTACG 28 | CCGAACATAA 29 | ACGCTTTAAC 30 | GTCCCTCCAG 31 | GCTGATAATC 32 | GTTTAAGCAC 33 | ACGTGGTCTA 34 | GAGTTGCGCT 35 | TACCGGACAA 36 | ACCTGCGCGT 37 | TGAAAGGGTT 38 | ACGCCTGGTA 39 | CGAAATAAGA 40 | CGAACCCCAG 41 | GACCCAGCAG 42 | ACAAAGGAAC 43 | GTGCTAGGCC 44 | CATATAATCA 45 | GGTAGATCGA 46 | TCTCTCTCCT 47 | AAGTTGTGGT 48 | CAAACAGGCG 49 | CGCGCTAGTT 50 | GGGTATGCCC 51 | GCCCAATAAC 52 | TGGTGGGGCC 53 | TGTGTGTAAA 54 | CTTCGATCAA 55 | TTTTGCAATT 56 | TATGTGCAGC 57 | TAACTGAATA 58 | TGTGATGTGT 59 | TGGAACCCGG 60 | ACTTCCTTTA 61 | CATTTCGCGC 62 | CCTAGTGTTT 63 | CAAGCGTCGC 64 | TGAGGTTATG 65 | ATTTATAGGA 66 | CACACTCAGC 67 | CGCGAACACA 68 | CAGTGTCATG 69 | ACTAAGTAGA 70 | ATCACCCATA 71 | TTCTCGTTAG 72 | TGCGATTGTA 73 | CCGCTTGAAC 74 | CTCGTATGCC 75 | TGGACCCGCT 76 | TCGATTTACA 77 | GCAATTCGAC 78 | TGGTGGTGAT 79 | TAACTTGAAG 80 | GATATGGTTT 81 | CGGTACCGAA 82 | AAGGGTCGCG 83 | CAAGTGTTCC 84 | CGGGACAATA 85 | GTTCTGCTAA 86 | GATAAGTATG 87 | TGCCGACTTA 88 | CCCGACCGTC 89 | TAGGTTATAA 90 | GGCACAACCG 91 | CTTCACTGTA 92 | GAGGTGTACA 93 | AGGATCCGTT 94 | GCGCGGGCGG 95 | CAGTCTATGT 96 | TTTTCGACAC 97 | TGGACTGCTT 98 | CCCTTTGAGG 99 | GTGGATTTTT 100 | CGTAACGGGT 101 | GAGTCCCTTG 102 | TGATTCCAGA 103 | CACGGGTTGC 104 | CGGGCGCTAC 105 | CCGCCGACGC 106 | CAGGCTTGGC 107 | AAGAGCGGTA 108 | TAAGGGCTCG 109 | CTACCACTTG 110 | TACTAGACTG 111 | ATTTGCGGAC 112 | GGTAGTAGCG 113 | CGGTGATCCC 114 | TATCATACGG 115 | GGTTTGGTGT 116 | ATGAGCCCTG 117 | CTGTCTTTTC 118 | TAGCGTACCA 119 | CAGATTGAAC 120 | TCCTCTTCTC 121 | GCGTTTCAGC 122 | GGTTAAAATT 123 | GCGATCCTCT 124 | CAGACCTTTG 125 | GTTGCCTTCG 126 | GTCCTTCCAC 127 | GCCTTCCTAT 128 | GATAGTAGCC 129 | TCACCTTCCA 130 | TCTCAAATCC 131 | CTCCCGCGAC 132 | CTTTGATTGT 133 | ACCGTTCAAG 134 | CCCTGACATA 135 | CCCACTCAAT 136 | ATTGACCCCT 137 | GCCGTATAGA 138 | CGTTGTCGGG 139 | ATAGCCAGAA 140 | CCAGTACTCT 141 | CTATCGGAAG 142 | CGTGACAGGC 143 | CCAAATACCT 144 | CGTTTAAAGC 145 | TGGACGCATC 146 | ACACTTGCTT 147 | GACTAACAGA 148 | GTGCTGGCCC 149 | GTGTTATTCA 150 | GCTAATGCAT 151 | CATGGGATCA 152 | GACGCCTGTA 153 | TATACAATTT 154 | TCAACTGCTC 155 | GAACTACTCG 156 | CCAAAACACT 157 | TTCGACTAGG 158 | CGATCGACCA 159 | AGAAAAGAGA 160 | CTGGACTGCT 161 | TACAGAGGTT 162 | TGAACTTTAC 163 | CGCGCCCACC 164 | -------------------------------------------------------------------------------- /posts/2022-11-03-command-line-part-2/lengths.txt: -------------------------------------------------------------------------------- 1 | 20 proteins/cubane.pdb 2 | 12 proteins/ethane.pdb 3 | 9 proteins/methane.pdb 4 | 30 proteins/octane.pdb 5 | 21 proteins/pentane.pdb 6 | 15 proteins/propane.pdb 7 | 107 total 8 | -------------------------------------------------------------------------------- /posts/2022-11-03-command-line-part-2/proteins/cubane.pdb: -------------------------------------------------------------------------------- 1 | COMPND CUBANE 2 | AUTHOR DAVE WOODCOCK 95 12 06 3 | ATOM 1 C 1 0.789 -0.852 0.504 1.00 0.00 4 | ATOM 2 C 1 -0.161 -1.104 -0.624 1.00 0.00 5 | ATOM 3 C 1 -1.262 -0.440 0.160 1.00 0.00 6 | ATOM 4 C 1 -0.289 -0.202 1.284 1.00 0.00 7 | ATOM 5 C 1 1.203 0.513 -0.094 1.00 0.00 8 | ATOM 6 C 1 0.099 1.184 0.694 1.00 0.00 9 | ATOM 7 C 1 -0.885 0.959 -0.460 1.00 0.00 10 | ATOM 8 C 1 0.236 0.283 -1.269 1.00 0.00 11 | ATOM 9 H 1 1.410 -1.631 0.942 1.00 0.00 12 | ATOM 10 H 1 -0.262 -2.112 -1.024 1.00 0.00 13 | ATOM 11 H 1 -2.224 -0.925 0.328 1.00 0.00 14 | ATOM 12 H 1 -0.468 -0.501 2.315 1.00 0.00 15 | ATOM 13 H 1 2.224 0.892 -0.134 1.00 0.00 16 | ATOM 14 H 1 0.240 2.112 1.251 1.00 0.00 17 | ATOM 15 H 1 -1.565 1.730 -0.831 1.00 0.00 18 | ATOM 16 H 1 0.472 0.494 -2.315 1.00 0.00 19 | TER 17 1 20 | END 21 | -------------------------------------------------------------------------------- /posts/2022-11-03-command-line-part-2/proteins/ethane.pdb: -------------------------------------------------------------------------------- 1 | COMPND ETHANE 2 | AUTHOR DAVE WOODCOCK 95 12 18 3 | ATOM 1 C 1 -0.752 0.001 -0.141 1.00 0.00 4 | ATOM 2 C 1 0.752 -0.001 0.141 1.00 0.00 5 | ATOM 3 H 1 -1.158 0.991 0.070 1.00 0.00 6 | ATOM 4 H 1 -1.240 -0.737 0.496 1.00 0.00 7 | ATOM 5 H 1 -0.924 -0.249 -1.188 1.00 0.00 8 | ATOM 6 H 1 1.158 -0.991 -0.070 1.00 0.00 9 | ATOM 7 H 1 0.924 0.249 1.188 1.00 0.00 10 | ATOM 8 H 1 1.240 0.737 -0.496 1.00 0.00 11 | TER 9 1 12 | END 13 | -------------------------------------------------------------------------------- /posts/2022-11-03-command-line-part-2/proteins/methane.pdb: -------------------------------------------------------------------------------- 1 | COMPND METHANE 2 | AUTHOR DAVE WOODCOCK 95 12 18 3 | ATOM 1 C 1 0.257 -0.363 0.000 1.00 0.00 4 | ATOM 2 H 1 0.257 0.727 0.000 1.00 0.00 5 | ATOM 3 H 1 0.771 -0.727 0.890 1.00 0.00 6 | ATOM 4 H 1 0.771 -0.727 -0.890 1.00 0.00 7 | ATOM 5 H 1 -0.771 -0.727 0.000 1.00 0.00 8 | TER 6 1 9 | END 10 | -------------------------------------------------------------------------------- /posts/2022-11-03-command-line-part-2/proteins/octane.pdb: -------------------------------------------------------------------------------- 1 | COMPND OCTANE 2 | AUTHOR DAVE WOODCOCK 96 01 05 3 | ATOM 1 C 1 -4.397 0.370 -0.255 1.00 0.00 4 | ATOM 2 C 1 -3.113 -0.447 -0.421 1.00 0.00 5 | ATOM 3 C 1 -1.896 0.386 -0.007 1.00 0.00 6 | ATOM 4 C 1 -0.611 -0.426 -0.198 1.00 0.00 7 | ATOM 5 C 1 0.608 0.405 0.216 1.00 0.00 8 | ATOM 6 C 1 1.892 -0.400 0.001 1.00 0.00 9 | ATOM 7 C 1 3.113 0.429 0.414 1.00 0.00 10 | ATOM 8 C 1 4.397 -0.374 0.199 1.00 0.00 11 | ATOM 9 H 1 -4.502 0.681 0.785 1.00 0.00 12 | ATOM 10 H 1 -5.254 -0.243 -0.537 1.00 0.00 13 | ATOM 11 H 1 -4.357 1.252 -0.895 1.00 0.00 14 | ATOM 12 H 1 -3.009 -0.741 -1.467 1.00 0.00 15 | ATOM 13 H 1 -3.172 -1.337 0.206 1.00 0.00 16 | ATOM 14 H 1 -1.992 0.668 1.044 1.00 0.00 17 | ATOM 15 H 1 -1.849 1.286 -0.621 1.00 0.00 18 | ATOM 16 H 1 -0.515 -0.707 -1.248 1.00 0.00 19 | ATOM 17 H 1 -0.659 -1.326 0.417 1.00 0.00 20 | ATOM 18 H 1 0.520 0.671 1.270 1.00 0.00 21 | ATOM 19 H 1 0.645 1.314 -0.386 1.00 0.00 22 | ATOM 20 H 1 1.979 -0.666 -1.054 1.00 0.00 23 | ATOM 21 H 1 1.855 -1.309 0.604 1.00 0.00 24 | ATOM 22 H 1 3.030 0.696 1.467 1.00 0.00 25 | ATOM 23 H 1 3.155 1.337 -0.188 1.00 0.00 26 | ATOM 24 H 1 4.493 -0.641 -0.854 1.00 0.00 27 | ATOM 25 H 1 4.368 -1.282 0.801 1.00 0.00 28 | ATOM 26 H 1 5.254 0.230 0.498 1.00 0.00 29 | TER 27 1 30 | END 31 | -------------------------------------------------------------------------------- /posts/2022-11-03-command-line-part-2/proteins/pentane.pdb: -------------------------------------------------------------------------------- 1 | COMPND PENTANE 2 | AUTHOR DAVE WOODCOCK 95 12 18 3 | ATOM 1 C 1 2.484 -0.389 0.322 1.00 0.00 4 | ATOM 2 C 1 1.261 0.350 -0.243 1.00 0.00 5 | ATOM 3 C 1 -0.027 -0.348 0.199 1.00 0.00 6 | ATOM 4 C 1 -1.249 0.421 -0.326 1.00 0.00 7 | ATOM 5 C 1 -2.536 -0.311 0.047 1.00 0.00 8 | ATOM 6 H 1 2.471 -1.420 -0.033 1.00 0.00 9 | ATOM 7 H 1 2.443 -0.371 1.412 1.00 0.00 10 | ATOM 8 H 1 3.393 0.112 -0.016 1.00 0.00 11 | ATOM 9 H 1 1.324 0.350 -1.332 1.00 0.00 12 | ATOM 10 H 1 1.271 1.378 0.122 1.00 0.00 13 | ATOM 11 H 1 -0.074 -0.384 1.288 1.00 0.00 14 | ATOM 12 H 1 -0.048 -1.362 -0.205 1.00 0.00 15 | ATOM 13 H 1 -1.183 0.500 -1.412 1.00 0.00 16 | ATOM 14 H 1 -1.259 1.420 0.112 1.00 0.00 17 | ATOM 15 H 1 -2.608 -0.407 1.130 1.00 0.00 18 | ATOM 16 H 1 -2.540 -1.303 -0.404 1.00 0.00 19 | ATOM 17 H 1 -3.393 0.254 -0.321 1.00 0.00 20 | TER 18 1 21 | END 22 | -------------------------------------------------------------------------------- /posts/2022-11-03-command-line-part-2/proteins/propane.pdb: -------------------------------------------------------------------------------- 1 | COMPND PROPANE 2 | AUTHOR DAVE WOODCOCK 95 12 18 3 | ATOM 1 C 1 1.241 0.444 0.349 1.00 0.00 4 | ATOM 2 C 1 -0.011 -0.441 0.333 1.00 0.00 5 | ATOM 3 C 1 -1.176 0.296 -0.332 1.00 0.00 6 | ATOM 4 H 1 1.516 0.699 -0.675 1.00 0.00 7 | ATOM 5 H 1 2.058 -0.099 0.827 1.00 0.00 8 | ATOM 6 H 1 1.035 1.354 0.913 1.00 0.00 9 | ATOM 7 H 1 -0.283 -0.691 1.359 1.00 0.00 10 | ATOM 8 H 1 0.204 -1.354 -0.225 1.00 0.00 11 | ATOM 9 H 1 -0.914 0.551 -1.359 1.00 0.00 12 | ATOM 10 H 1 -1.396 1.211 0.219 1.00 0.00 13 | ATOM 11 H 1 -2.058 -0.345 -0.332 1.00 0.00 14 | TER 12 1 15 | END 16 | -------------------------------------------------------------------------------- /posts/2022-11-22-r-pkg-dev-part-2/index.qmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "R Package Development: Testing" 3 | author: 4 | - name: Stephanie Hicks 5 | url: https://stephaniehicks.com 6 | affiliation: Department of Biostatistics, Johns Hopkins 7 | affiliation_url: https://publichealth.jhu.edu 8 | description: "Introduction to unit testing when building R packages." 9 | date: 2022-11-22 10 | draft: true 11 | categories: [module 2, week 5, R, package development, testing, programming] 12 | --- 13 | 14 | 15 | 16 | # Pre-lecture materials 17 | 18 | ### Read ahead 19 | 20 | ::: callout-note 21 | ## Read ahead 22 | 23 | **Before class, you can prepare by reading the following materials:** 24 | 25 | 1. Add here. 26 | 2. Add here. 27 | ::: 28 | 29 | ### Acknowledgements 30 | 31 | Material for this lecture was borrowed and adopted from 32 | 33 | - Add here. 34 | 35 | # Learning objectives 36 | 37 | ::: callout-note 38 | # Learning objectives 39 | 40 | **At the end of this lesson you will:** 41 | 42 | - Add here. 43 | ::: 44 | 45 | # Add lecture here 46 | 47 | # Post-lecture materials 48 | 49 | ### Final Questions 50 | 51 | Here are some post-lecture questions to help you think about the material discussed. 52 | 53 | ::: callout-note 54 | ### Questions 55 | 56 | 1. Add here. 57 | ::: 58 | 59 | ### Additional Resources 60 | 61 | ::: callout-tip 62 | - Add here. 63 | ::: 64 | -------------------------------------------------------------------------------- /posts/2022-11-29-purrr-fun-programming/index.qmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Functional Programming with `purrr`" 3 | author: 4 | - name: Boyi Guo 5 | url: https://boyiguo1.github.io 6 | affiliation: Department of Biostatistics, Johns Hopkins 7 | affiliation_url: https://publichealth.jhu.edu 8 | description: "Introduction to tools to work with functions and vectors in R" 9 | date: 2022-11-29 10 | categories: [module 3, week 6, functions, functional, programming, purrr] 11 | --- 12 | 13 | 14 | 15 | # Pre-lecture materials 16 | 17 | ### Read ahead 18 | 19 | ::: callout-note 20 | ## Read ahead 21 | 22 | **Before class, you can prepare by reading the following materials:** 23 | 24 | 1. 25 | 2. 26 | ::: 27 | 28 | 29 | ### Prerequisites 30 | Before starting you must install the additional package: 31 | 32 | * `purrr` - this provides a consistent functional programming interface to work with functions and vectors 33 | 34 | You can do this by calling 35 | 36 | ```{r} 37 | #| eval: false 38 | install.packages("purrr") 39 | ``` 40 | 41 | or use the “Install Packages…” option from the “Tools” menu in RStudio. 42 | 43 | 44 | 45 | ### Acknowledgements 46 | 47 | Material for this lecture was borrowed and adopted from 48 | 49 | - 50 | - 51 | - 52 | 53 | # Learning objectives 54 | 55 | ::: callout-note 56 | # Learning objectives 57 | 58 | **At the end of this lesson you will:** 59 | 60 | - Be familiar with the concept of _functional programming_ 61 | - Get comfortable with the major functions in `purrr`, e.g. the `map` family, `reduce` 62 | - Write your loops with `map` functions instead of the `for` loop 63 | ::: 64 | 65 | # Functional Programming 66 | ## The characteristics 67 | At it is core, functional programming treats functions equally as other data structures, namely **first class functions**. 68 | 69 | > In R, this means that you can do many of the things with a function that you can do with a vector: you can assign them to variables, store them in lists, pass them as arguments to other functions, create them inside functions, and even return them as the result of a function. 70 | 71 | ## What do you mean? 72 | 73 | * Assign a function to a variable 74 | ```{r} 75 | foo <- function(){ 76 | return("This is foo.") 77 | } 78 | class(foo) 79 | ``` 80 | 81 | * Store functions in a list 82 | ```{r} 83 | foo_list <- list( 84 | fun_1 = function() return("foo_1"), 85 | fun_2 = function() return("foo_2") 86 | ) 87 | 88 | str(foo_list) 89 | ``` 90 | 91 | * Pass functions as arguments to other functions 92 | ```{r} 93 | shell <- function(f) f() 94 | shell(foo_list$fun_1) 95 | shell(foo_list$fun_2) 96 | ``` 97 | 98 | * Create functions inside of functions & return them as the result of a function 99 | ```{r} 100 | foo_wrap <- function(){ 101 | foo_2 <- function(){ 102 | return("This is foo_2.") 103 | } 104 | return(foo_2) 105 | } 106 | 107 | foo_wrap() 108 | (foo_wrap())() 109 | ``` 110 | 111 | The bottom line, you can manipulate functions as the same way as you can to a vector or a matrix. 112 | 113 | ## Why is functional programming important? 114 | Functional programming introduces a new style of programming, namely **functional style**. Broadly speaking, this programming style encourages programmers to write a big function as many smaller isolated functions, where each function addresses one specific task. 115 | 116 | 117 | 118 | As a by-product, **funcitonal style** motivates more humanly readable code, and recyclable code. 119 | ```{r} 120 | #| eval: false 121 | 122 | "data_set.csv" |> 123 | import_data_from_file() |> 124 | data_cleaning() |> 125 | run_regression() |> 126 | model_diagnostics() |> 127 | model_visualization() 128 | 129 | "data_set2.csv" |> 130 | import_data_from_file() |> 131 | data_cleaning() |> 132 | run_different_regression() |> 133 | model_diagnostics() |> 134 | model_visualization() 135 | ``` 136 | 137 | ::: callout-tip 138 | ### Pipe operators 139 | R provides some pipe operators to make code readable, e.g. `|>` from the base R, `%>%` from the package `magrittr`. These pipe operators operate like a pipe, piping the output from the previous function (left hand side of the pipe operator) to the following function (right hand side of the pipe operator). The pipe operator `|>` was introduced in R 4.1.0 and requires no loading of additional packages, unlike `%>%`. 140 | 141 | A keyboard shortcut to type a pipe operator in RStudio is `shift+cmd+m` for Mac or `shift+ctrl+m` in Windows. 142 | ::: 143 | 144 | # `purrr`: the functional programming toolkit 145 | 146 | 147 | 148 | The R package `purrr`, as one important component of the [`tidyverse`](https://www.tidyverse.org/), provides a interface to manipulate vectors in the _functional style_. 149 | 150 | > `purrr` enhances R’s functional programming (FP) toolkit by providing a complete and consistent set of tools for working with functions and vectors. 151 | 152 | ::: callout-tip 153 | ### `purrr` cheatsheet 154 | It is very difficulty, if not impossible, to remember all functions that a package offers as well as their use cases. Hence, `purrr` developers offer a nice compact cheatsheet with visualizations at . Similar cheatsheets are available for other `tidyverse` packages. 155 | ::: 156 | 157 | The most popular function in `purrr` is `map()` which iterates over the supplied data structure and apply a function during the iterations. Beside the `map` function,`purrr` also offers a series of useful functions to manipulate `list` the data structure. 158 | 159 | 160 | ## The `map` family 161 | The `map` family of functions provides a convenient way to iterate through vectors or lists and apply functions during this iteration. Depending on the dimension of the input and the format of the output, there are many different variants of the basic `map` function. 162 | 163 | ::: callout-tip 164 | ### How does `map` relate to functional programming 165 | Because their arguments include functions (`.f`) besides data (`.x`), `map` functions are considered as a convinient interface to implement functional programming. 166 | ::: 167 | 168 | 169 | ### `map` as a foor loop 170 | ```{r} 171 | library(purrr) 172 | 173 | triple <- function(x) x * 3 174 | 175 | # for loop 176 | loop_ret <- list() 177 | for(i in 1:3){ 178 | loop_ret[i] <- triple(i) 179 | } 180 | 181 | # map implementation 182 | map_eg1 <- map(.x = 1:3, .f = triple) 183 | map_eg2 <- map(.x = 1:3, .f = ~triple(.x)) 184 | map_eg3 <- map(.x = 1:3, .f = function(x) triple(x)) 185 | 186 | identical(loop_ret,map_eg1) 187 | identical(loop_ret,map_eg2) 188 | identical(loop_ret,map_eg3) 189 | ``` 190 | 191 | ### `map` with a data frame 192 | ```{r} 193 | tmp_dat <- data.frame( 194 | x = 1:5, 195 | y = 6:10 196 | ) 197 | 198 | tmp_dat |> 199 | map(.f = mean) 200 | 201 | # Alternatively 202 | # map(.x = tmp_dat, .f = mean) 203 | ``` 204 | 205 | ::: callout-tip 206 | ### `data.frame` vs `list` 207 | `data.frame` is a special case of `list`, where each column as one item of the list. Don't confuse with each row as an item. 208 | ```{r} 209 | class(tmp_dat) 210 | typeof(tmp_dat) 211 | ``` 212 | ::: 213 | 214 | ### Extra arguments for functions 215 | ```{r} 216 | tmp_dat2 <- as.list(tmp_dat) 217 | tmp_dat2$y[6] <- NA 218 | str(tmp_dat2) 219 | 220 | tmp_dat2 |> map(.f = mean) # No extra arguments 221 | tmp_dat2 |> 222 | map(.f = mean, na.rm = TRUE) # With extra arguments 223 | tmp_dat2 |> 224 | map(.f = function(x, remove_na) mean(x, na.rm = remove_na), 225 | remove_na = TRUE) 226 | ``` 227 | 228 | ### Stratified analysis with `map` 229 | We use the `mtcars` from the package `datasets` to demonstrate 230 | ```{r} 231 | library(datasets) 232 | str(mtcars) 233 | 234 | unique(mtcars$cyl) # different numbers of cylinders 235 | ``` 236 | 237 | We are interested in the averaged miles per gallon for vehicles with different numbers of cylinders 238 | ```{r} 239 | # Create a dataset for cylinders level 240 | str_dat <- mtcars |> split(mtcars$cyl) 241 | length(str_dat) 242 | str(str_dat) 243 | 244 | str_dat |> 245 | map(.f = ~mean(.x$mpg)) 246 | ``` 247 | ### Matrix as the output 248 | The `map` family include functions that organize the output in different data structures, whose names follow the pattern `map_*`. As we've seen, the `map` function return a list. The following functions will return a vector of a specific kind, e.g. `map_lgl` returns a vector of logical variables, `map_chr` returns a vector of strings. It is also possible to return the the results as data frames by row binding (`map_dfr`) or column binding (`map_dfc`). 249 | 250 | ```{r} 251 | str_dat |> 252 | map_dbl(.f = ~mean(.x$mpg)) # returns a vector of doubles 253 | 254 | str_dat |> 255 | map_dfr(.f = ~colMeans(.x)) # return a data frame by row binding 256 | 257 | str_dat |> 258 | map_dfc(.f = ~colMeans(.x)) # return a data frame by col binding 259 | ``` 260 | 261 | 262 | ### Multiple Input 263 | It is possible that an operation requires a pair of variables as input. While it is still managable in `map` to achieve this, there are better options provided in `purrr`, specifically `map2` and `pmap`. 264 | 265 | ```{r} 266 | map_avg <- map_dbl(.x = mtcars, .f = mean) 267 | 268 | map2_avg <- map2_dbl(.x = mtcars, 269 | .y = list(weight = 1/nrow(mtcars)), 270 | .f = ~sum(.x*.y)) 271 | identical(map_avg, map2_avg) 272 | 273 | pmap_avg <- pmap_dbl(list(x = mtcars, 274 | y = list(weight = 1/(2*nrow(mtcars))), 275 | z = list(weight2 = 2)), 276 | .f = ~sum(..1*..2*..3)) 277 | identical(map_avg, pmap_avg) 278 | 279 | # Use element names in pmap 280 | mtcars$weight <- 1/2 281 | mtcars$weight2 <- 2 282 | pmap_eg2 <- pmap_dbl(mtcars, 283 | .f = function(mpg, weight, weight2, ...){ 284 | mpg * weight * weight2 285 | }) 286 | 287 | identical(pmap_eg2, mtcars$mpg) 288 | ``` 289 | 290 | ### No output 291 | It is possible that some operations don't need any output during the iteration, e.g. saving the dataset. In this case, `map` will force an output, e.g. `NULL`. One can consider using `walk` instead. The function `walk` behaves exactly the same as `map` but does not output anything. 292 | ```{R} 293 | tmp_fldr <- tempdir() 294 | 295 | 296 | map2(.x = str_dat, 297 | .y = 1:length(str_dat), 298 | .f = ~saveRDS(.x, 299 | file = paste0(tmp_fldr, "/",.y, ".rds")) 300 | ) 301 | 302 | # No output 303 | walk2(.x = str_dat, 304 | .y = (1:length(str_dat)), 305 | .f = ~saveRDS(.x, 306 | file = paste0(tmp_fldr, "/",.y, ".rds")) 307 | ) 308 | ``` 309 | 310 | ## Other functions in `purrr` 311 | ### `reduce` and `accumulate` 312 | `purrr` also provides functions to summarize a list by a preferred operator, namesly `reduce`. Its variant `accumulate` provides the history of this reduction process. 313 | 314 | 315 | ```{r} 316 | mtcars$weight <- 1/(2*nrow(mtcars)) 317 | mtcars$weight2 <- 2 318 | reduce_eg <- 319 | pmap_dbl(mtcars, 320 | .f = function(mpg, weight, weight2, ...){ 321 | mpg * weight * weight2 322 | }) |> 323 | reduce(`+`) 324 | 325 | pmap_dbl(mtcars, 326 | .f = function(mpg, weight, weight2, ...){ 327 | mpg * weight * weight2 328 | })|> 329 | head() |> # Only show the first 7 operations 330 | accumulate(`+`) 331 | ``` 332 | 333 | ### Working with list 334 | 335 | Let's move to the `purrr` cheatsheet at . 336 | 337 | # Summary 338 | * Introduction to functional programming. 339 | * The R package `purrr` provides a nice interface to functional programming and list manipulation. 340 | * The function `map` and its aternative `map_*` provide a neat way to iterate over a list or vector with the output in different data structures. 341 | * The function `map2` and `pmap` allow having more than one list as input. 342 | * The function `walk` and its alternatives `walk2`, `walk_*` do not provide any output. 343 | * The functions `reduce` and `accumulate` help to summarize a list with a preferred operator or function. 344 | 345 | 346 | 347 | # Post-lecture materials 348 | 349 | ::: callout-note 350 | ### Questions 351 | 1. What does `imap` and `iwalk` do? In this lecture note, can you find the one example possible to substitute with `imap` and `iwalk`? Hint: see the sub-section named _No output_ 352 | 353 | 1. Is there any function in the R base package provide nice interface for functional programming? Hint: `?with`, `?within` 354 | 355 | 2. Can you write a section of code to demonstrate the central limited theorem primarily using the `purrr` package and/or using the R base package? 356 | ::: 357 | 358 | ### Additional Resources 359 | 360 | ::: callout-tip 361 | - 362 | - 363 | ::: 364 | -------------------------------------------------------------------------------- /posts/2022-12-01-pkgdown-pkg-website/index.qmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Package development with `pkgdown`" 3 | author: 4 | - name: Boyi Guo 5 | url: https://boyiguo1.github.io 6 | affiliation: Department of Biostatistics, Johns Hopkins 7 | affiliation_url: https://publichealth.jhu.edu 8 | description: "Building a website for R software packages" 9 | date: 2022-12-01 10 | categories: [module 3, week 6, packages, pkgdown] 11 | --- 12 | 13 | 14 | 15 | # Pre-lecture materials 16 | 17 | ### Read ahead 18 | 19 | ::: callout-note 20 | ## Read ahead 21 | 22 | 23 | **Before class, you can prepare by reading the following materials:** 24 | 25 | 1. 26 | ::: 27 | 28 | ### Prerequisites 29 | Before starting you must install the additional package: 30 | 31 | * `pkgdown` - the R package that helps you to build a package website with little efforts 32 | * `usethis` - an automation package that simplifies project creation and setup 33 | 34 | You can do this by calling 35 | 36 | ```{r} 37 | #| eval: false 38 | install.packages(c("usethis", "pkgdown")) 39 | ``` 40 | 41 | or use the “Install Packages…” option from the “Tools” menu in RStudio. 42 | 43 | You also need to have a `GitHub` account and set up your access from your local computer to `GitHub.com`. If you forget, please re-visit your [previous lecture](https://www.stephaniehicks.com/jhustatprogramming2022/posts/2022-11-10-version-control-part-2/). 44 | 45 | ### Acknowledgements 46 | 47 | Material for this lecture was borrowed and adopted from 48 | 49 | - 50 | - 51 | 52 | # Learning objectives 53 | 54 | ::: callout-note 55 | # Learning objectives 56 | 57 | **At the end of this lesson you will:** 58 | 59 | - build a website for your package 60 | - deploy your website on GitHub 61 | ::: 62 | 63 | # Why do we want a website for our packages? 64 | The primary reason to make a website for our packages is to make it more popular. Having a publicly available website is similar to create a home for your package where people know where to find you package. Reciprocally, you can tell others a more stable address, i.e. your website link, to find your package. 65 | 66 | Moreover, compare to the default reference manual, a website provides more *interactability* and *accessibility*. It is easier to navigate through different functions via clickable links or to follow through a vignette with a table of content in a point-and-click system. A website is also a more integrative resources where it keeps not only the reference manual and vignettes, but also change-log and development history. All of these makes it easier for the users to understand and appreciate your work. 67 | 68 | In addition, a website is a great place to market yourself and show your personality. Different from a `GitHub` repo which follows the same template, a website is the place where you have more freedom to showcase your creativity. You can customize the websites and have more space to explain. 69 | 70 | # What is `pkgdown`? 71 | While creating a website can be a daunting task for people who don't have extensive web programming experience, `pkgdown` provides a shortcut. `pkgdown` can automatically generate a website of an R package, containing references to the enclosed function, different vignettes if exists, within two lines of code (slight exaggeration). It also helps to deploy the website to `GitHub` server. Amazingly, `pkgdown` facilitates automatic updates of the website following any changes made to the package that are pushed to `GitHub`. 72 | 73 | 74 | 75 | # How to use `pkgdown`? 76 | 77 | ## Live demonstration 78 | While the `pkgdown` website provides a comprehensive [walkthrough](https://pkgdown.r-lib.org/articles/pkgdown.html) for those who set up their `GitHub` access using [personal access token](https://usethis.r-lib.org/articles/git-credentials.html), it is slightly tricky for those whose access is [set up with SSH](https://www.stephaniehicks.com/jhustatprogramming2022/posts/2022-11-10-version-control-part-2/#ssh-background-and-setup). During this lecture, we live demonstrate how to deploy the website, specifically for SSH `GitHub` access. 79 | 80 | ::: callout-tip 81 | ### Did you try `pkgdown::deploy_to_branch()`? 82 | If you run into problem when running `usethis::use_pkgdown_github_pages()` and get stuck, you should try to understand what the function does by reading its manual `?usethis::use_pkgdown_github_pages()`. Is it possible to create the necessary `gh_pages` using `pkgdown::deploy_to_branch()`? Don't forget to set up the GitHub Action by calling `usethis::use_github_action("pkgdown")`. Now you should be able to find access your website via **github_account_name.github.io/pkg_name** 83 | ::: 84 | 85 | ## Website customization 86 | There are a lot of customization possible. Nevertheless, there is no point-and-click system for it. You need to manually edit `_pkgdown.yml` following certain syntax. Please refer to . 87 | 88 | 89 | # Summary 90 | In this lecture, we 91 | 92 | * introduce the package `pkgdown` that automates website 93 | * demonstrate how to use `pkgdown` to create website for an R package 94 | 95 |
96 | 97 | ### Additional Resources 98 | 99 | ::: callout-tip 100 | - Create hexagon sticker for your package 101 | - Explore badge accessibility 102 | ::: 103 | 104 | -------------------------------------------------------------------------------- /posts/2022-12-01-targets-proj-workflows/index.qmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Reproducibile Workflows with `targets`" 3 | author: 4 | - name: Boyi Guo 5 | url: https://boyiguo1.github.io 6 | affiliation: Department of Biostatistics, Johns Hopkins 7 | affiliation_url: https://publichealth.jhu.edu 8 | description: "A Make-line pipeline tool for creating reproducible workflows in R" 9 | date: 2022-12-01 10 | categories: [module 3, week 6, project management, targets] 11 | execute: 12 | eval: false 13 | --- 14 | 15 | ```{r, setup, include=FALSE} 16 | knitr::opts_chunk$set( 17 | eval = FALSE 18 | ) 19 | ``` 20 | 21 | 22 | 23 | # Pre-lecture materials 24 | 25 | ### Read ahead 26 | 27 | ::: callout-note 28 | ## Read ahead 29 | 30 | **Before class, you can prepare by reading the following materials:** 31 | 32 | 1. 33 | 2. 34 | 3. 35 | ::: 36 | 37 | ### Prerequisites 38 | Before starting you must install the additional package: 39 | 40 | * `targets` - the R Workflows package 41 | * `usethis` - an automation package that simplifies project creation and setup 42 | * `renv` - a package manager in R 43 | 44 | You can do this by calling 45 | 46 | ```{r} 47 | #| eval: false 48 | install.packages( c("usethis", "targets", "renv")) 49 | ``` 50 | 51 | or use the “Install Packages…” option from the “Tools” menu in RStudio. 52 | 53 | ### Acknowledgements 54 | 55 | Material for this lecture was borrowed and adopted from 56 | 57 | - https://books.ropensci.org/targets/walkthrough.html 58 | 59 | 60 | ### Special Message from Boyi 61 | To students attending online: 62 | 63 |
64 | 65 | I apologize for the scraping sound I made during last lecture. 66 | 67 | # Learning objectives 68 | 69 | ::: callout-note 70 | # Learning objectives 71 | 72 | **At the end of this lesson you will:** 73 | 74 | - set up `targets` analytic pipeline 75 | - write and run a data analysis with `targets` 76 | - replicate and retrieve analysis results from a `targets` workflow 77 | ::: 78 | 79 | # What is `targets`? 80 |
81 | 82 | `targets` is not the grocery store. Instead, it is a R package that helps you to manage your analysis. You can think of `targets` as a butler who helps you to manage your analytics. These services include 83 | 84 | * to manage the ordering of your analysis so you won't confuse about the steps when reproducing an analysis 85 | * to save the output of each analytic procedures so you don't have to wait for repetitive runs of static results 86 | * to monitor the change in your code so you can update the only analytic tasks that changes are made 87 | * to reproduce the whole analysis with a click so you don't have to run multiple scripts 88 | 89 | # Why do we use `targets`? 90 | 91 | 92 | `targets` helps us be more efficient at managing analytic workflows, and hence improve productivity with bare minimum efforts. Let me simply put this way, managing file names for your code or saved objects can be very painful. But `targets` can help you handle that when use in combination with other version control system `git`. 93 | 94 |
95 | 96 | # How to use `targets` 97 | 98 | [The {targets} R package user manual](https://books.ropensci.org/targets/) is a great source to learn how to use `targets`. The intro level of `targets` tutorial is well documented in [*Chapter 2 Walkthrough*](https://books.ropensci.org/targets/walkthrough.html). Instead going through the chapter with you, I will focus on some tricks that is not discussed in the user manual. 99 | 100 | ## Set up a `targets` workflow 101 | ```{r} 102 | # Start a new R project 103 | usethis::create_project("targets_eg") 104 | # Config target workflow 105 | targets::use_targets() 106 | ``` 107 | 108 | ### (Optional) Version control packages with `renv` 109 | ```{r} 110 | # Config renv system 111 | renv::init() 112 | targets::tar_renv() 113 | ``` 114 | 115 | If other people opens up this project on a different computer, `renv` will automatically install all the necessary packages, especially the same versions of those packages. 116 | 117 | ::: callout-tip 118 | ### Important `renv` functions 119 | Idealistically, you need to keep track of your R packages in every analysis, similar to you version control your files using `git`. You may need to call the following functions periodically, i.e. after you add/remove necessary packages. 120 | 121 | * `targets::tar_renv()` updates `_targets_packages.R` by gathering all packages in your analytic workflow 122 | * `renv::status()` shows which packages are outdated or not recorded 123 | * `renv::snapshot()` updates your packages version number by taking a snapshot of your project library 124 | * `renv::restore()` restores all missing packages or packages whose version number doesn't match with the most updated snapshot. 125 | 126 | For more information, visit 127 | ::: 128 | 129 | ## Set up keyboard shortcuts 130 | `targets` provide some `addins` to help users navigate through workflow management with a click-and-point system. For example, if you click on the `Addins` button in the tool bar (highlighted in the screen capture below) which locates on the top of the RStudio window, you can see many options that help you to work with `targets` 131 | 132 | ```{r} 133 | #| eval: true 134 | #| echo: false 135 | #| fig-cap: 'A screenshot of addins for `targets`' 136 | #| fig-align: 'center' 137 | knitr::include_graphics("../../images/targets/Addins.png") 138 | ``` 139 | 140 | With these addins, you don't necessarily have to remember all the functions to run `targets`, such as `targets::tar_make()`, `targets::tar_load()`, `targets::tar_visnetwork()`, etc. 141 | 142 | If you prefer keyboard shortcuts, you can set up for these commonly used functions. In order to do that, you need to go to `Tools` -> `Modify Keyboard Shortcuts`. 143 | 144 | 145 | ```{r} 146 | #| eval: true 147 | #| echo: false 148 | #| fig-cap: 'A screenshot of how to modify keyboard shortcuts' 149 | #| fig-align: 'center' 150 | knitr::include_graphics("../../images/targets/ModifyShortcuts.png") 151 | ``` 152 | 153 | With in the pop-up keyboard shortcuts menu, you can search `addin` or `targets` or a specific `target` addin function, e.g. `Load target at cursor` in the search box. You can customize the keyboard shortcut by clicking on the input box within the `Shortcut` column. 154 | 155 | ```{r} 156 | #| eval: true 157 | #| echo: false 158 | #| fig-cap: 'A screenshot of keyboard shortcuts menu' 159 | #| fig-align: 'center' 160 | knitr::include_graphics("../../images/targets/ShortcutsMenu.png") 161 | ``` 162 | 163 | 164 | # Summary 165 | `targets` is a workflow management powerhouse. It offers much more utility than we covered today. [The {targets} R package user manual](https://books.ropensci.org/targets/) does an excellent job on explaining how to set up parallel computing with the system, to work with markdown systems (I managed my dissertation writing in `targets`), and many more. 166 | 167 | Nevertheless, I need to warn you that learning `targets` could be intimidating at the beginning because of the setup process and new syntax. It may take multiple iterations or projects until you are comfortable using it. 168 | 169 |
170 | 171 | But it is very rewarding and can save you a lot of time in the long run! It is a worthy investment of time. 172 | 173 | 174 | ### Additional Resources 175 | 176 | ::: callout-tip 177 | - `targets` website 178 | - The `targetopic`, a `targets` ecosystem 179 | - A tutorial & reproducible example on calculating residential segregation indices with decennial US census data 180 | ::: 181 | -------------------------------------------------------------------------------- /posts/2022-12-08-relational-databases/data/survey.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatprogramming2022/56ba8223305d93ac9da8e88f4673e924cae4faa0/posts/2022-12-08-relational-databases/data/survey.db -------------------------------------------------------------------------------- /posts/2022-12-08-relational-databases/survey.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatprogramming2022/56ba8223305d93ac9da8e88f4673e924cae4faa0/posts/2022-12-08-relational-databases/survey.db -------------------------------------------------------------------------------- /posts/2022-12-13-flexdashboard/Diamond_dashboard_example.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Diamonds distribution dashboard" 3 | output: flexdashboard::flex_dashboard 4 | runtime: shiny 5 | --- 6 | 7 | ```{r setup, include = FALSE} 8 | library(ggplot2) 9 | library(dplyr) 10 | knitr::opts_chunk$set(fig.width = 5, fig.asp = 1/3) 11 | ``` 12 | 13 | ## Column 1 14 | 15 | ### Carat 16 | 17 | ```{r} 18 | ggplot(diamonds, aes(carat)) + geom_histogram(binwidth = 0.1) 19 | ``` 20 | 21 | ### Cut 22 | 23 | ```{r} 24 | ggplot(diamonds, aes(cut)) + geom_bar() 25 | ``` 26 | 27 | ### Colour 28 | 29 | ```{r} 30 | ggplot(diamonds, aes(color)) + geom_bar() 31 | ``` 32 | 33 | ## Column 2 34 | 35 | ### The largest diamonds 36 | 37 | ```{r} 38 | diamonds %>% 39 | arrange(desc(carat)) %>% 40 | head(100) %>% 41 | select(carat, cut, color, price) %>% 42 | DT::datatable() 43 | ``` -------------------------------------------------------------------------------- /posts/2022-12-13-flexdashboard/GeyserFlexDashboard.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Old Faithful Eruptions" 3 | output: flexdashboard::flex_dashboard 4 | runtime: shiny 5 | --- 6 | 7 | ```{r global, include=FALSE} 8 | # load data in 'global' chunk so it can be shared by all users of the dashboard 9 | library(datasets) 10 | data(faithful) 11 | ``` 12 | 13 | Column {.sidebar} 14 | ----------------------------------------------------------------------- 15 | 16 | Waiting time between eruptions and the duration of the eruption for the 17 | Old Faithful geyser in Yellowstone National Park, Wyoming, USA. 18 | 19 | ```{r} 20 | selectInput("n_breaks", label = "Number of bins:", 21 | choices = c(10, 20, 35, 50), selected = 20) 22 | 23 | sliderInput("bw_adjust", label = "Bandwidth adjustment:", 24 | min = 0.2, max = 2, value = 1, step = 0.2) 25 | ``` 26 | 27 | Column 28 | ----------------------------------------------------------------------- 29 | 30 | ### Geyser Eruption Duration 31 | 32 | ```{r} 33 | renderPlot({ 34 | hist(faithful$eruptions, probability = TRUE, breaks = as.numeric(input$n_breaks), 35 | xlab = "Duration (minutes)", main = "Geyser Eruption Duration") 36 | 37 | dens <- density(faithful$eruptions, adjust = input$bw_adjust) 38 | lines(dens, col = "blue") 39 | }) 40 | ``` 41 | 42 | -------------------------------------------------------------------------------- /posts/2022-12-13-flexdashboard/MPGFlexDashboard.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Mile per gallon investigation" 3 | output: flexdashboard::flex_dashboard 4 | runtime: shiny 5 | source_code: embed 6 | --- 7 | 8 | ```{r global, include=FALSE} 9 | # load data in 'global' chunk so it can be shared by all users of the dashboard 10 | mpgData <- mtcars 11 | mpgData$am <- factor(mpgData$am, labels = c("Automatic", "Manual")) 12 | ``` 13 | 14 | Column {.sidebar} 15 | ----------------------------------------------------------------------- 16 | 17 | To see how miles per gallon varies across car attribute. 18 | 19 | ```{r} 20 | selectInput("variable", "Variable:", 21 | c("Cylinders (cyl)" = "cyl", 22 | "Transmission (am)" = "am", 23 | "Gears (gear)" = "gear")) 24 | 25 | checkboxInput("outliers", "Show outliers", TRUE) 26 | ``` 27 | 28 | Column 29 | ----------------------------------------------------------------------- 30 | 31 | ### Miles per gallon (mpg) 32 | 33 | ```{r} 34 | formulaText <- reactive({ 35 | paste("mpg ~", input$variable) 36 | }) 37 | 38 | # Generate a plot of the requested variable against mpg ---- 39 | # and only exclude outliers if requested 40 | renderPlot({ 41 | boxplot(as.formula(formulaText()), 42 | data = mpgData, 43 | outline = input$outliers, 44 | col = "#75AADB", pch = 19, main=formulaText()) 45 | }) 46 | 47 | ``` 48 | 49 | -------------------------------------------------------------------------------- /posts/2022-12-13-flexdashboard/example_dashboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatprogramming2022/56ba8223305d93ac9da8e88f4673e924cae4faa0/posts/2022-12-13-flexdashboard/example_dashboard.png -------------------------------------------------------------------------------- /posts/2022-12-13-flexdashboard/example_dashboard_code.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatprogramming2022/56ba8223305d93ac9da8e88f4673e924cae4faa0/posts/2022-12-13-flexdashboard/example_dashboard_code.png -------------------------------------------------------------------------------- /posts/2022-12-15-web-rvest/images/selectorgadget-click.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatprogramming2022/56ba8223305d93ac9da8e88f4673e924cae4faa0/posts/2022-12-15-web-rvest/images/selectorgadget-click.png -------------------------------------------------------------------------------- /posts/2022-12-15-web-rvest/images/selectorgadget-hover.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatprogramming2022/56ba8223305d93ac9da8e88f4673e924cae4faa0/posts/2022-12-15-web-rvest/images/selectorgadget-hover.png -------------------------------------------------------------------------------- /posts/2022-12-15-web-rvest/images/selectorgadget-remove.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatprogramming2022/56ba8223305d93ac9da8e88f4673e924cae4faa0/posts/2022-12-15-web-rvest/images/selectorgadget-remove.png -------------------------------------------------------------------------------- /posts/2022-12-15-web-rvest/images/selectorgadget-too-many.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatprogramming2022/56ba8223305d93ac9da8e88f4673e924cae4faa0/posts/2022-12-15-web-rvest/images/selectorgadget-too-many.png -------------------------------------------------------------------------------- /posts/_metadata.yml: -------------------------------------------------------------------------------- 1 | # options specified here will apply to all posts in this folder 2 | 3 | # freeze computational output 4 | # (see https://quarto.org/docs/projects/code-execution.html#freeze) 5 | freeze: true 6 | 7 | # Enable banner style title blocks 8 | title-block-banner: true 9 | 10 | # Default for table of contents 11 | toc: true 12 | toc-title: Table of contents 13 | toc-location: left 14 | -------------------------------------------------------------------------------- /profile.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatprogramming2022/56ba8223305d93ac9da8e88f4673e924cae4faa0/profile.jpg -------------------------------------------------------------------------------- /projects.qmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Projects" 3 | listing: 4 | contents: projects 5 | sort: "date desc" 6 | type: default 7 | categories: true 8 | sort-ui: false 9 | filter-ui: false 10 | page-layout: full 11 | title-block-banner: false 12 | --- 13 | -------------------------------------------------------------------------------- /projects/2022-10-27-project-1/index.qmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Project 1" 3 | author: 4 | - name: Stephanie Hicks 5 | url: https://stephaniehicks.com 6 | affiliation: Department of Biostatistics, Johns Hopkins 7 | affiliation_url: https://publichealth.jhu.edu 8 | description: "Building a website and practicing with command-line tools" 9 | date: 2022-10-27 10 | categories: [project 1, projects] 11 | --- 12 | 13 | # Background 14 | 15 | 16 | **Due date: November 11 at 11:59pm** 17 | 18 | The goal of this assignment is to practice some of the skills we have been learning about in class around Quarto, command-line, and version control by building and deploying a website. You also are asked to practice with some command-line skills more formally. 19 | 20 | ### To submit your project 21 | 22 | Please use this Quarto file (`.qmd`) and fill in the requested components by adding the URLs pointing to the private and public repositories and deployed websites. Render this file to a **HTML file** and submit your HTML file to the dropbox on CoursePlus. Please **show all your code**, if relevant to a section. 23 | 24 | 25 | # Part 1 26 | 27 | Complete the Git & GitHub Fundamentals Starter course. The link to create a private GitHub repository for yourself to complete the course will be posted in CoursePlus. When you are done, add the link to the GitHub repo here: 28 | 29 | - **Link to your GitHub repository**: [Delete this text and replace the text with the link to the private GitHub repo you created above] 30 | 31 | # Part 2 32 | 33 | 1. Read this blogpost titled [Building a brand as a scientist](https://www.stephaniehicks.com/blog/building-a-brand-as-a-scientist). 34 | 2. Reflect on the questions in the "Defining your brand" section. 35 | 3. Write two paragraphs (4-6 sentences) max here answering one (or more) of the questions asked in the section above. 36 | 37 | 38 | # Part 3 39 | 40 | Next, with the reflections from Part 2 in mind, you will create a public GitHub repository on your own GitHub account and build a small website to introduce yourself to others in the course. You will also create a small data analysis on one of the webpages to practice literate programming in [Quarto](https://quarto.org). 41 | 42 | ![](../../images/quarto_schematic.png) 43 | 44 | ## 1. Create a GitHub repo for your website 45 | 46 | Create a new public GitHub repository titled `biostat840-intro--` (where you replace `` with your first name and `` with your last name) in your own personal GitHub account (e.g. `https://github.com//biostat840-intro--`). 47 | 48 | ## 2. Build a website using Quarto 49 | 50 | Create a new project locally within RStudio and build a website for yourself. Your website should include the following: 51 | 52 | 1. A home/landing page. This is home page that someone will land on your website. At minimum it should include your name, a short summary about yourself (max 2-3 sentences), and a picture of something you enjoy to do for fun (or a picture of yourself if you are comfortable sharing one). 53 | 2. A page titled 'About'. This page should describe who you are in greater detail. It could include your professional interests and your educational and/or professional background and/or experience. It could also include any personal information you feel conformable sharing on the website. 54 | 3. A data analysis page called 'Example analysis'. You can pick any dataset you wish you analyze. In this webpage, you will analyze a dataset and summarize the results. The requirements for this webpage are the following: 55 | - You must describe what is the question you aim to answer with the data and data analysis. 56 | - You must describe who is the intended audience for the data analysis. 57 | - You must describe and link to where the original data come from that you chose. 58 | - You must include a link to a data dictionary for the data or create one inside the webpage. 59 | - Your analysis must include some minimal form of data wrangling with you using at least five different functions from `dplyr` or `tidyr`. 60 | - Your analysis should include at least three plots with you using at least three different `geom_*()` functions from `ggplot2` (or another package with `geom_*()` functions). 61 | - Plots should have titles, subtitles, captions, and human-understandable axis labels. 62 | - At least one plot should using a type of faceting (`facet_grid()` or `facet_wrap()`). 63 | - Your analysis must include one image or table (not one you created yourself, but one you have saved locally or one from the web). 64 | - Your analysis must include at least two different [callout blocks](https://quarto.org/docs/authoring/callouts.html). 65 | - Your analysis must include a `.bib` file, which you use to reference at least three unique citations. For example, it could be to a website or paper from where the original data came from or it could be to a paper describing a method you are using to analyze the data. 66 | - Your analysis must include the use of at least 1 [margin content](https://www.stephaniehicks.com/jhustatprogramming2022/posts/2022-10-27-build-website/#margin-content). 67 | - You must summarize your analysis and/or results with a paragraph (4-6 sentences). 68 | - At the end of the data analysis, list out each of the functions you used from each of the packages (`dplyr`, `tidyr`, and `ggplot2`) to help the TA with respect to making sure you met all the requirements described above. 69 | 70 | ## 3. Include a `README.md` file 71 | 72 | Your local repository should include a `README.md` file describing who is the author of the website and a link to the website after it has been deployed. Other things you might include are the technical details for how the website was created and/or deployed. 73 | 74 | ## 4. Deploy your website 75 | 76 | Deploy your website using [Quarto Pub](https://quarto.org/docs/publishing/quarto-pub.html), [GitHub pages](https://quarto.org/docs/publishing/github-pages.html), or [Netlify](https://quarto.org/docs/publishing/netlify.html). (**Note**: Deploying your website to RPubs will not be accepted). 77 | 78 | ## 5. Share your website 79 | 80 | Go to the Discussion Board in CoursePlus and write a short post with a link (URL) to your website (and URL to the corresponding GitHub repository) that you created. Also, list the URLs below for the purposes of grading. 81 | 82 | As you read the introductions from other folks in the class, feel free to comment/reply using Discussion board. 83 | 84 | - **Link to your GitHub repository**: [Delete this text and replace the text with the link to the public GitHub repo you created above for your website] 85 | 86 | - **Link to your deployed website**: [Delete this and replace the text with the link to the public deployed website you created above] 87 | 88 | # Part 4 89 | 90 | 1. Use [`wget`](https://www.gnu.org/software/wget) to download four files that end in `.fastq` from [here](https://github.com/stephaniehicks/jhustatprogramming2022/tree/main/data). 91 | 2. Create a directory to download the data. The top level directory should be called `raw_data` and there should be a sub-level directory called `fastq`. The command you write should force the creation of both directories at the same time if either of them do not exist yet. 92 | 3. Move all the `.fastq` files into the `fastq` sub-level directory. 93 | 4. Write a for loop in the shell that iterates over each `.fastq` file. For each `.fastq` file, do the following. In the first 1000 rows for each file, count the number of lines where the "@" symbol appears. Your final output should be four numbers printed to the screen. 94 | 95 | ```{bash} 96 | # Add your solution here 97 | 98 | ``` 99 | 100 | -------------------------------------------------------------------------------- /projects/2022-11-08-project-2/index.qmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Project 2" 3 | author: 4 | - name: Stephanie Hicks 5 | url: https://stephaniehicks.com 6 | affiliation: Department of Biostatistics, Johns Hopkins 7 | affiliation_url: https://publichealth.jhu.edu 8 | description: "Building an R package and practicing with S3" 9 | date: 2022-11-08 10 | categories: [project 2, projects] 11 | --- 12 | 13 | # Background 14 | 15 | 16 | **Due date: November 25 at 11:59pm** 17 | 18 | The goal of this homework is to write a set of functions and put them into an R package so that other people can easily use the functions in their own data analyses after installing the package. In addition, they would receive documentation on how to use the functions. 19 | 20 | In addition to building the R package, you will also build a S3 class for your package, and create a vignette where you demonstrate the functions in your R package with an example dataset from TidyTuesday. 21 | 22 | Finally, we will practice our command-line and version control skills by submitting the assignment through GitHub Classroom. 23 | 24 | ### To submit your project 25 | 26 | - The link to create a private GitHub repository for yourself to complete Project 2 will be posted in CoursePlus (**Note**: this creates an empty repository and you need to push your code in your locate remote repository to GitHub when ready). 27 | - Build your R package locally and then push the files to the private Github repository that you created for yourself via GitHub Classroom. 28 | - The TA will grade the R package by cloning the repository, installing it, and checking for all the things described below. **It must be installable without any errors**. 29 | 30 | # Part 1: Create an R package 31 | 32 | 33 | ## Part 1A: Cosine and sine transformation 34 | 35 | The cosine and sine of a number can be written as an infinite series expansion of the form 36 | 37 | $$ 38 | \cos(x) = 1 - \frac{x^2}{2!} + \frac{x^4}{4!} - \frac{x^6}{6!} \cdots 39 | $$ 40 | 41 | $$ 42 | \sin(x) = x - \frac{x^3}{3!} + \frac{x^5}{5!} - \frac{x^7}{7!} \cdots 43 | $$ 44 | 45 | 46 | Write two functions that compute the cosine and sine (respectively) of a number using the truncated series expansion. Each function should take two arguments: 47 | 48 | - `x`: the number to be transformed 49 | - `k`: the number of terms to be used in the series expansion beyond the constant 1. The value of `k` is always $\geq 1$. 50 | 51 | :::{.callout-tip} 52 | 53 | ### Notes 54 | 55 | - You can assume that the input value `x` will always be a *single* number. 56 | - You can assume that the value `k` will always be an integer $\geq 1$. 57 | - Do not use the `cos()` or `sin()` functions in R. 58 | 59 | ::: 60 | 61 | ```{r} 62 | fn_cos <- function(x, k) { 63 | # Add your solution here 64 | } 65 | 66 | fun_sin <- function(x, k) { 67 | # Add your solution here 68 | } 69 | ``` 70 | 71 | ## Part 1B: Calculating confidence intervals 72 | 73 | Write the following set of functions: 74 | 75 | * `sample_mean()`, which calculates the sample mean 76 | 77 | $$ 78 | \bar{x} = \frac{1}{N} \sum_{i=1}^n x_i 79 | $$ 80 | 81 | * `sample_sd()`, which calculates the sample standard deviation 82 | 83 | $$ 84 | s = \sqrt{\frac{1}{N-1} \sum_{i=1}^N (x_i - \overline{x})^2} 85 | $$ 86 | 87 | * `calculate_CI()`, which calculates the confidence intervals of a sample mean and returns a named vector of length 2, where the first value is the `lower_bound`, the second value is the `upper_bound`. 88 | 89 | $$ 90 | \bar{x} \pm t_{\alpha/2, N-1} s_{\bar{x}} 91 | $$ 92 | 93 | :::{.callout-tip} 94 | 95 | ### Notes 96 | 97 | - You can assume that the input value `x` will always be a *vector* of numbers of length *N*. 98 | - Do not use the `mean()` and `sd()` functions in R. 99 | 100 | ::: 101 | 102 | ```{r} 103 | sample_mean <- function(x) { 104 | # Add your solution here 105 | } 106 | 107 | sample_sd <- function(x) { 108 | # Add your solution here 109 | } 110 | 111 | calculate_CI <- function(x, conf = 0.95) { 112 | # Add your solution here 113 | } 114 | ``` 115 | 116 | ## Part 1C: Put functions into an R package 117 | 118 | Create an R package for the functions you wrote from Part 1A and 1B. Your package will have three exported functions for users to call (see below). You will need to write documentation for each function that you export. Your package should include the functions: 119 | 120 | * `fn_cos()`, which computes the approximation to the cosine function (**exported**) 121 | * `fn_sin()`, which computes the approximation to the sine function (**exported**) 122 | * `sample_mean()`, which calculates the sample mean (**not exported**) 123 | * `sample_sd()`, which calculates the sample standard deviation (**not exported**) 124 | * `calculate_CI()`, which calculates the confidence intervals from simulated data (**exported**) 125 | 126 | :::{.callout-tip} 127 | 128 | ### Notes 129 | 130 | * Remember that you should only export the functions that you want the user to use. 131 | * Functions that are *not* exported do not require any documentation. 132 | * Each exported function should have at least **one example** of its usage (using the `@example` directive in the documentation). 133 | * In the functions in your package, consider using control structures and include checks (e.g. `is.na()`, `is.numeric()`, `if()`) to make sure the input is as you expect it to be. For example, try to break the the function with unexpected values that a user might provide (e.g. providing a negative value to a log transformation). This can help guide you on ways to address the possible ways to break the function. 134 | * Your package should be installable without any warnings or errors. 135 | 136 | ::: 137 | 138 | 139 | # Part 2: Create a S3 class as part of your package 140 | 141 | In this part, you will create a new S3 class called `ci_class` (confidence interval class) to be used in your R package. You will 142 | 143 | 1. Create a constructor function for the `ci_class` called `make_ci_class()`. 144 | 2. Create a `print()` method to work with the `ci_class` to return a message with name of the class and the the number of observations in the S3 object. 145 | 3. Modify the `calculate_CI()` function to work with the `ci_class` and still return a `lower_bound` and `upper_bound`. 146 | 147 | For example, this is what the output of your code might look like: 148 | 149 | ``` 150 | > set.seed(1234) 151 | > x <- rnorm(100) 152 | > obj <- make_ci_class(x) 153 | > print(obj) # explicitly using the print() method 154 | #> a ci_class with 100 observations 155 | > obj # using autoprinting 156 | #> a ci_class with 100 observations 157 | ``` 158 | 159 | Calculate a 90% confidence interval: 160 | 161 | ``` 162 | > calculate_CI(obj, conf = 0.90) 163 | #> lower_bound upper_bound 164 | #> -0.32353231 0.01000883 165 | ``` 166 | 167 | 168 | # Part 3: Create supporting documents as part of your package 169 | 170 | ## Part 3A: Create a vignette 171 | 172 | In this part, you will create a vignette where you demonstrate the functions in your R package. 173 | Specifically, you will create a R Markdown and put it in a folder called "vignettes" within your R package. The purpose of a vignette is to demonstrate the functions of your package in a longer tutorial instead of just short examples within the documentation of your functions (i.e. using the `@example` directive in the documentation). 174 | 175 | :::{.callout-tip} 176 | 177 | ### Note 178 | 179 | You might find the `use_vignette()` [function](https://usethis.r-lib.org/reference/use_vignette.html) from the `usethis` R package helpful. 180 | 181 | ::: 182 | 183 | 184 | ## Part 3B: Create a `README.md` file 185 | 186 | Create a `README.md` file in the R package, which will be useful to readers when they learn about your package. The readme must include: 187 | 188 | - The title of package 189 | - The author of the package 190 | - A goal / description of the package 191 | - A list of **exported** functions that are in the package. Briefly describe each function. 192 | - A basic example with one of the functions. 193 | 194 | :::{.callout-tip} 195 | 196 | ### Note 197 | 198 | You might find the `use_readme_md()` [function](https://usethis.r-lib.org/reference/use_readme_rmd.html) from the `usethis` R package helpful. 199 | 200 | ::: 201 | 202 | 203 | ## Part 3C: Demonstrate `fn_cos()` 204 | 205 | In the vignette, make a plot and show the output of your function `fn_cos(x,k)` and how it approximates the `cos(x)` function from base R as $k$ increases. 206 | 207 | :::{.callout-tip} 208 | 209 | ### Notes 210 | 211 | - The x-axis should range between 0 and 10. 212 | - The y-axis should be the output from `fn_cos(x,k)` or `cos(x)`. 213 | - Plot the output from `cos(x)` as points on the graph. 214 | - Plot the output from `fn_cos(x,k)` as lines on the graph. 215 | - Show 5 lines for values `k` = 1, 3, 5, 7, 9. Each line should be a different color. 216 | 217 | ::: 218 | 219 | 220 | ## Part 3D: Demonstrate `fn_sin()` 221 | 222 | Repeat a similar task and make a similar plot as in Part 3C, but here using `fn_sin()` instead of `fn_cos()`. 223 | 224 | 225 | ## Part 3E: Demonstrate `calculate_CI()` 226 | 227 | The goal here is to demonstrate the `calculate_CI()` function in your package inside the vignette with some example data from [TidyTuesday](https://www.tidytuesday.com). However, part of the requirement is to also wrangle and plot the data. At the end of the section, you must demonstrate how to apply `calculate_CI()` as an example to the data. 228 | 229 | Other requirements for this part of vignette are the following: 230 | 231 | 1. Pick any dataset you wish from [TidyTuesday](https://www.tidytuesday.com) to analyze. 232 | - You must describe what is the question you aim to answer with the data and data analysis. 233 | - You must describe and link to where the original data come from that you chose. 234 | - You must include a link to a data dictionary for the data or create one inside the webpage. 235 | 2. Load the data into R (you must show the code from this section) 236 | - In this step, you must test if a directory named `data` exists locally. If it does not, write an R function that creates it programmatically. 237 | - Saves the data only once (not each time you knit/render the document). 238 | - Read in the data locally each time you knit/render. 239 | 3. Your analysis must include some form of data wrangling and data visualization. 240 | - You must use at least eight different functions from `dplyr`, `tidyr`, `lubridate`, `stringr`, or `forcats`. 241 | - Your analysis should include at least three plots with you using at least three different `geom_*()` functions from `ggplot2` (or another package with `geom_*()` functions). 242 | - Plots should have titles, subtitles, captions, and human-understandable axis labels. 243 | 4. Apply the function `calculate_CI()` at least once in the vignette. 244 | - Summarize and interpret the results in 1-2 sentences. 245 | 5. At the end of the data analysis, list out each of the functions you used from each of the packages (`dplyr`, `tidyr`, `ggplot2`, etc) to help the TA with respect to making sure you met all the requirements described above. 246 | 247 | 248 | -------------------------------------------------------------------------------- /projects/2022-11-22-project-3/index.qmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Project 3" 3 | author: 4 | - name: Stephanie Hicks 5 | url: https://stephaniehicks.com 6 | affiliation: Department of Biostatistics, Johns Hopkins 7 | affiliation_url: https://publichealth.jhu.edu 8 | description: "Building websites for R packages; practice functional programming and APIs" 9 | date: 2022-11-29 10 | categories: [project 3, projects] 11 | --- 12 | 13 | # Background 14 | 15 | ```{r} 16 | #| message: false 17 | #| warning: false 18 | library(tidyverse) 19 | ``` 20 | 21 | 22 | 23 | **Due date: December 13 at 11:59pm** 24 | 25 | The goal of this assignment is to practice building websites for R packages, along with practice functional programming and using APIs. 26 | 27 | ### To submit your project 28 | 29 | In both parts below, you will need to create two separate github repositories for yourself. The links to create the repositories will be in CoursePlus. 30 | 31 | The first one (Part 1) will be a public repository to build a website for an R package. It is public because you will need to deploy the website. 32 | 33 | The second one (Part 2) will be a private repository to practice using two different APIs, practice functional programming, and building data analyses. 34 | 35 | # Part 1 36 | 37 | Here, we will practice using [`pkgdown`](https://pkgdown.r-lib.org). Using any R package with a GitHub repository (that does not already have a pkgdown website), use `pkgdown` to create a website for the software package. 38 | 39 | 40 | :::{.callout-note} 41 | 42 | This could even been a package that you have written (or are working on creating right now). Otherwise, this could be a package that you have used previously or you can pick one you are not familiar with and just want to know more about! 43 | 44 | It should **not** be the package you created in Project 2 for this course. 45 | 46 | ::: 47 | 48 | ## Part 1A: Create website locally 49 | 50 | Fork the GitHub repository from the original location to your own GitHub account. Clone the repository to your local computer. 51 | 52 | Use `usethis` and `pkgdown` to create a website locally for the R package of your choice. 53 | 54 | ## Part 1B: Customize the website 55 | 56 | Here, you need to customize the website in **at least 5 ways**. How you customize is up to you. The `pkgdown` website has lots of suggestions for you to try out! 57 | 58 | ## Part 1C: Create an example data analysis 59 | 60 | In this part, you will create a data analysis (or a case study) where you demonstrate the functions in the R package. Specifically, you will add [another article or vignette](https://pkgdown.r-lib.org/articles/customise.html#navbar) titled "Example analysis" inside the `/vignettes` folder. 61 | 62 | Similar to Project 2, you must pick out a data set from [TidyTuesday](https://www.tidytuesday.com) **that you have not worked with before** (i.e. not in a previous project or assignment from this class or from 776, but other classes or personal projects are acceptable). You must also demonstrate wrangling and plotting the data. Finally, your example analysis, must also demonstrate at least 2 functions from the R package in some way in the vignette. 63 | 64 | Other requirements for this part of vignette are the following: 65 | 66 | 1. Pick any data set you wish from [TidyTuesday](https://www.tidytuesday.com) to analyze. 67 | - You must describe what is the question you aim to answer with the data and data analysis. 68 | - You must describe and link to where the original data come from that you chose. 69 | - You must include a link to a data dictionary for the data or create one inside the webpage. 70 | 2. Load the data into R 71 | - In this step, you must test if a directory named `data` exists locally. If it does not, write an R function that creates it programmatically. 72 | - Saves the data only once (not each time you knit/render the document). 73 | - Read in the data locally each time you knit/render. 74 | 3. Your analysis must include some form of data wrangling and data visualization. 75 | - You must use at least six different functions from `dplyr`, `tidyr`, `lubridate`, `stringr`, or `forcats`. 76 | - You **must use at least two functions from `purrr`**. 77 | - Your analysis should include at least three plots with you using at least three different `geom_*()` functions from `ggplot2` (or another package with `geom_*()` functions). 78 | - Plots should have titles, subtitles, captions, and human-understandable axis labels. 79 | - At least one plot should using a type of faceting (`facet_grid()` or `facet_wrap()`). 80 | 4. Apply **at least 2 functions from the R package** in the vignette. 81 | 5. Summarize and interpret the results in 1-2 sentences. 82 | 6. At the end of the data analysis, list out each of the functions you used from each of the packages (`dplyr`, `tidyr`, `ggplot2`, etc) to help the TA with respect to making sure you met all the requirements described above. 83 | 84 | ## Part 1D: Create a `README.md` file 85 | 86 | If the package does not already include one, create and include a `README.md` file in the folder where the R package and pkgdown files are on your computer and add the following information below. 87 | 88 | If it already has a `README.md` file, just edit the top of the file with the following information: 89 | 90 | - Include a URL to the GitHub link to where the original R package came from. 91 | - Include a URL to the deployed website that you will do in Part 1E, but it should be something like `https://jhu-statprogramming-fall-2022.github.io/biostat840-project3-pkgdown-`. 92 | - Include a description of the 5 things you customized in your `pkgdown` website (excluding adding the example data analysis from Part 1C). 93 | 94 | The readme must also include (if it does not already): 95 | 96 | - The title of package 97 | - The original author of the package (and you who made the website and example data analysis) 98 | - A goal / description of the package 99 | - A list of **exported** functions that are in the package. Briefly describe each function. 100 | - A basic example with one of the functions. 101 | 102 | ## Part 1E: Deploy the website 103 | 104 | The link to create a public GitHub repository for yourself to complete this part of Project 3 will be posted in CoursePlus. This creates an empty GitHub repository. 105 | 106 | When ready, deploy the website. 107 | 108 | :::{.callout-note} 109 | 110 | You need to modify the template code that is provided to you from GitHub when you set the remote. There will already be a remote `origin` (from where you cloned the remote repository to your local repository), which you can see with 111 | 112 | ``` {.bash filename="Bash"} 113 | git remote -v 114 | ``` 115 | 116 | To change where you push your code, instead of (you will see this in the template code from GitHub when you create the public repository) 117 | 118 | ``` {.bash filename="Bash"} 119 | git remote add origin 120 | ``` 121 | 122 | You want to use something like 123 | 124 | ``` {.bash filename="Bash"} 125 | git remote add upstream 126 | ``` 127 | 128 | and when you push your code, you want to use `git push -u upstream main`, for example (not `git push -u origin main`). 129 | 130 | ::: 131 | 132 | # Part 2 133 | 134 | Here, we will practice using APIs and making data visualizations. 135 | 136 | For this part of Project 3, you need to create a private GitHub repository for yourself, which will be posted in CoursePlus. This creates an empty GitHub repository. You need to show all your code and submit both the `.qmd` file and the rendered HTML file. 137 | 138 | :::{.callout-note} 139 | 140 | When you use an API, you want to figure out the data you want to extract and then save it locally so that you are not using the API each time you knit or render your data analysis. 141 | 142 | Most APIs have limits on the number of times you can ping it in a given hour and your IP address can be blocked if you try to ping it too many times within a short time. 143 | 144 | ::: 145 | 146 | ## Part 2A 147 | 148 | The first API we will use is `tidycensus` (), which is an R package that allows users to interface with a select number of the US Census Bureau’s data APIs and return tidyverse-ready data frames, optionally with simple feature geometry included. 149 | 150 | The goal of this part is to create a data analysis (or a case study) using the US Census Bureau’s data. 151 | 152 | Other requirements for this part are the following: 153 | 154 | 1. You must describe what is the question you aim to answer with the data and data analysis. 155 | 2. You must use at least three different calls to the `tidycensus` API to extract out different datasets. For example, these could be across years, locations, or variables. 156 | - In this step, you must test if a directory named `data` exists locally. If it does not, write an R function that creates it programmatically. 157 | - Saves the data only once (not each time you knit/render the document). 158 | - Read in the data locally each time you knit/render. 159 | 160 | 3. Your analysis must include some form of data wrangling and data visualization. 161 | - You must use at least six different functions from `dplyr`, `tidyr`, `lubridate`, `stringr`, or `forcats`. 162 | - You **must use at least two functions from `purrr`**. 163 | - Your analysis should include at least three plots with you using at least three different `geom_*()` functions from `ggplot2` (or another package with `geom_*()` functions). 164 | - Plots should have titles, subtitles, captions, and human-understandable axis labels. 165 | - At least one plot should using a type of faceting (`facet_grid()` or `facet_wrap()`). 166 | 4. Summarize and interpret the results in 1-2 sentences. 167 | 5. At the end of the data analysis, list out each of the functions you used from each of the packages (`dplyr`, `tidyr`, `ggplot2`, etc) to help the TA with respect to making sure you met all the requirements described above. 168 | 169 | ## Part 2B 170 | 171 | The second API we will use is the [Covid Act Now Data API](https://covidactnow.org/data-api). 172 | 173 | The goal of this part is to create a data analysis (or a case study) using the Covid Act Now Data API. 174 | 175 | Other requirements for this part are the following: 176 | 177 | 1. You must describe what is the question you aim to answer with the data and data analysis. 178 | 2. You must use at least three different calls to the Covid Act Now Data API to extract out different datasets. For example, these could be across counties, etc. 179 | - In this step, you must test if a directory named `data` exists locally. If it does not, write an R function that creates it programmatically. 180 | - Saves the data only once (not each time you knit/render the document). 181 | - Read in the data locally each time you knit/render. 182 | 3. Your analysis must include some form of data wrangling and data visualization. 183 | - You must use at least six different functions from `dplyr`, `tidyr`, `lubridate`, `stringr`, or `forcats`. 184 | - You **must use at least two functions from `purrr`**. 185 | - Your analysis should include at least three plots with you using at least three different `geom_*()` functions from `ggplot2` (or another package with `geom_*()` functions). 186 | - Plots should have titles, subtitles, captions, and human-understandable axis labels. 187 | - At least one plot should using a type of faceting (`facet_grid()` or `facet_wrap()`). 188 | 4. Summarize and interpret the results in 1-2 sentences. 189 | 5. At the end of the data analysis, list out each of the functions you used from each of the packages (`dplyr`, `tidyr`, `ggplot2`, etc) to help the TA with respect to making sure you met all the requirements described above. 190 | 6. Push your code and rendered HTML to the private repository that you created for yourself. 191 | -------------------------------------------------------------------------------- /projects/2022-12-06-project-4/index.qmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Project 4" 3 | author: 4 | - name: Stephanie Hicks 5 | url: https://stephaniehicks.com 6 | affiliation: Department of Biostatistics, Johns Hopkins 7 | affiliation_url: https://publichealth.jhu.edu 8 | description: "Building static and interactive dashboards" 9 | date: 2022-12-13 10 | categories: [project 4, projects] 11 | --- 12 | 13 | # Background 14 | 15 | ```{r} 16 | #| message: false 17 | #| warning: false 18 | library(tidyverse) 19 | ``` 20 | 21 | 22 | **Due date: December 23 at 11:59pm** 23 | 24 | The goal of this assignment is to practice building interactive dashboards 25 | and building effective data visualizations to communicate to an audience. 26 | 27 | ![](../../images/shiny.png){width="30%"} 28 | 29 | ### To submit your project 30 | 31 | - Create a public github repository for yourself. The link to create the repository will be in CoursePlus. 32 | - Follow the instructions below and push all your code to this github repository. 33 | - Then, you will also need to deploy your dashboard. As an example, here is a deployed interactive dashboard from the lecture we had on dashboards. 34 | 35 | - 36 | 37 | - You must include a `README.md` in your github repository with your source code that includes a link to your deployed dashboard. 38 | 39 | # Part 1: Identify the data 40 | 41 | Pick a dataset from one of the datasets that you used in Project 3. You can also pick a different dataset if you wish, but to help minimize work in this project, you are encouraged to pick a dataset that you already are familiar with from Project 3. 42 | 43 | Once you identify the dataset, save the data locally in your project repository to be able to load into R: 44 | 45 | - In this step, you must test if a directory named `data` exists locally. If it does not, write an R function that creates it programmatically. 46 | - Save the data only once. 47 | - Read in the data locally each time you knit/render. 48 | 49 | # Part 2: Design the interactive dashbard 50 | 51 | Using the data analysis that you created from Project 3, think about the following topics and questions from Project 3. You do not have to formally answer any the questions right now, but use them to help design your dashboard. 52 | 53 | - **Motivation and Overview**: What are the goals and motivation of the data analysis? 54 | - **Related Work**: Anything that inspired you, such as a paper, a web site, or something we discussed in class. 55 | - **Data Analytic Questions**: What question(s) are you trying to answer with the data and data analysis? Did the questions change as you began to explore the data? What new questions did you consider in the course of your analysis? What variables seemed important or not important? 56 | - **Audience**: Who is the target audience for your analysis? Should they be expected to have a specific background or knowledge? 57 | - **Data**: What are the original data sources? Is there a data dictionary or did you create a table yourself? 58 | - **Exploratory Data Analysis**: What visualizations did you use to look at your data in different ways? Did you consider statistical models (e.g. inference or prediction)? How did you decide? Were there any major changes to your ideas? How did you reach these conclusions? 59 | - **Data visualization**: What data analytic components (e.g. tables, plots, etc) would be useful to show in a static format versus an interactive format? 60 | - **Narrative and Summary**: What did you learn from the data and data analysis? How did you answer the questions? How can you justify your answers? What are the key/important takeaways for the audience? What are the limitations of the analyses? 61 | 62 | 63 | # Part 3: Build an interactive dashboard 64 | 65 | Build an interactive dashboard with the following criteria. Outside of the following criteria, create a dashboard that effectively communicates the key ideas about the data or data analysis. You are strongly encouraged to spend time exploring `flexdashboard` and `shiny` to customize your dashboard beyond the following criteria. You are welcomed to get inspired by dashboards you find online or the one we discussed in class: 66 | 67 | - 68 | 69 | Specific criteria your dashboard must have: 70 | 71 | - `About` tab: This must describe the purpose of the dashboard and a link or original location of the data. 72 | - `The Data` tab: A description of the data along with a table of the dataset similar to the [one we discussed in class](https://rsconnect.biostat.jhsph.edu/ocs-bp-school-shootings-dashboard/#section-the-data). 73 | - Two tabs with static content. Within one of the tabs for the static content, there must be additional tabs. 74 | - Two tabs with interactive content. One of these tabs need to include some type of interactive plots. The other tab can be any other type of interactive content. 75 | - `Analysis` tab. This tab should contain the analysis you built for Project 3. 76 | 77 | # Part 4: Make a two minute video 78 | 79 | Make a two minute (max!) screencast with narration showing highlights of your data analysis and a demo of your dashboard. There are several ways to do this, but one way is to join a zoom room, share your screen, and record yourself. When you are done, upload the video to YouTube or Vimeo and embed it into the dashboard. 80 | 81 | :::{.callout-tip} 82 | 83 | There are several ways to do this, but I like to embed an `iframe` into the `.Rmd` 84 | 85 | - 86 | 87 | ::: 88 | 89 | Use principles of good storytelling and presentations to get your key points across. 90 | 91 | - Focus the majority of your screencast on your main contributions rather than on technical details. 92 | - What do you feel is the best part of your data analysis and dashboard? 93 | - What insights did you gain? 94 | - What is the single most important thing you would like your audience to take away? Make sure it is upfront and center rather than at the end. 95 | 96 | # Part 5: Deploy dashboard and push code to Github 97 | 98 | - Using the public github repository that you created from CoursePlus, push your source code for this dashboard to GitHub. 99 | - Deploy the website using [shinyapps.io](https://www.shinyapps.io). As an example, here is a deployed interactive dashboard from our lecture on dashboards. 100 | 101 | - 102 | 103 | - Include a `README.md` file in the GitHub repository with your name and a link to the deployed dashboard. 104 | - Share a link to your github repo with your code and your deployed interactive dashboard on CoursePlus. 105 | 106 | 107 | -------------------------------------------------------------------------------- /resources.qmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Resources" 3 | --- 4 | 5 | 6 | # Learning R 7 | 8 | - Big Book of R: https://www.bigbookofr.com 9 | - List of resources to learn R (but also Python, SQL, Javascript): https://github.com/delabj/datacamp_alternatives/blob/master/index.md 10 | - learnr4free. Resources (books, videos, interactive websites, papers) to learn R. Some of the resources are beginner-friendly and start with the installation process: https://www.learnr4free.com/en 11 | - Data Science with R by Danielle Navarro: https://robust-tools.djnavarro.net -------------------------------------------------------------------------------- /schedule.qmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Schedule" 3 | description: | 4 | Schedule and course materials for Statistical Programming Paradigms and Workflows (JHSPH Biostatistics 140.840 Fall 2022). 5 | tbl-colwidths: [15,10, 55, 20] 6 | --- 7 | 8 | ## Schedule and course materials 9 | 10 | 11 | For Rmd files, go to the [course GitHub repository](https://github.com/stephaniehicks/jhustatprogramming2022) and navigate the directories, or best of all to clone the repo and navigate within RStudio. 12 | 13 | ::: column-page 14 | | Week | Dates | Topics | Projects | 15 | |---|---|---|---|---| 16 | | **Module 1** | | **Statistical programming at the command-line** | | 17 | | | | | 18 | | Week 1 | Oct 27 | Course introduction \[[syllabus](syllabus.qmd)\] | `r emojifont::emoji('palm_tree')` Project 1 \[[html](projects/2022-10-27-project-1/index.qmd)\] \[[Qmd](https://github.com/stephaniehicks/jhustatprogramming2022/blob/main/projects/2022-10-27-project-1/index.qmd)\] | 19 | | | | Building websites with `quarto` \[[html](posts/2022-10-27-build-website/index.qmd)\] \[[Qmd](https://github.com/stephaniehicks/jhustatprogramming2022/blob/main/posts/2022-10-27-build-website/index.qmd)\] | | 20 | | | | | | 21 | | | | | | 22 | | Week 2 | Nov 1 | Introduction to the command-line \[[html](posts/2022-11-01-command-line-part-1/index.qmd)\] \[[Qmd](https://github.com/stephaniehicks/jhustatprogramming2022/blob/main/posts/2022-11-01-command-line-part-1/index.qmd)\] | | 23 | | | | | | 24 | | | Nov 3 | More advanced command-line tools \[[html](posts/2022-11-03-command-line-part-2/index.qmd)\] \[[Qmd](https://github.com/stephaniehicks/jhustatprogramming2022/blob/main/posts/2022-11-03-command-line-part-2/index.qmd)\] | | 25 | | | | | | 26 | | Week 3 | Nov 8 | Version control (git) \[[html](posts/2022-11-08-version-control-part-1/index.qmd)\] \[[Qmd](https://github.com/stephaniehicks/jhustatprogramming2022/blob/main/posts/2022-11-08-version-control-part-1/index.qmd)\] | `r emojifont::emoji('palm_tree')` Project 2 \[[html](projects/2022-11-08-project-2/index.qmd)\] \[[Qmd](https://github.com/stephaniehicks/jhustatprogramming2022/blob/main/projects/2022-11-08-project-2/index.qmd)\] | 27 | | | | | | | | | 28 | | | Nov 10 | Version control (GitHub) \[[html](posts/2022-11-10-version-control-part-2/index.qmd)\] \[[Qmd](https://github.com/stephaniehicks/jhustatprogramming2022/blob/main/posts/2022-11-10-version-control-part-2/index.qmd)\] | | 29 | | | | | | 30 | | | Nov 11 | | `r emojifont::emoji('fallen_leaf')` Project 1 due | 31 | | | | | | 32 | | **Module 2** | | **R software development** | | 33 | | | | | 34 | | Week 4 | Nov 15 | Object Oriented Programming \[[html](posts/2022-11-15-object-oriented-programming/index.qmd)\] \[[Qmd](https://github.com/stephaniehicks/jhustatprogramming2022/blob/main/posts/2022-11-15-object-oriented-programming/index.qmd)\] | | 35 | | | | | | 36 | | | Nov 17 | R package software development \[[html](posts/2022-11-17-r-pkg-dev-part-1/index.qmd)\] \[[Qmd](https://github.com/stephaniehicks/jhustatprogramming2022/blob/main/posts/2022-11-17-r-pkg-dev-part-1/index.qmd)\] | | 37 | | | | | | 38 | | Week 5 | Nov 22 | `r emojifont::emoji('turkey')` Class is canceled | | 39 | | | | | | 40 | | | Nov 24 | `r emojifont::emoji('turkey')` No lecture, enjoy the break! | | 41 | | | | | | 42 | | | Nov 25 | | | 43 | | | | | | 44 | | **Module 3** | | **Advanced programming paradigms** | | 45 | | | | | | 46 | | Week 6 | Nov 29 | Functional programming with `purrr` \[[html](posts/2022-11-29-purrr-fun-programming/index.qmd)\] \[[Qmd](https://github.com/stephaniehicks/jhustatprogramming2022/blob/main/posts/2022-11-29-purrr-fun-programming/index.qmd)\] | `r emojifont::emoji('fallen_leaf')` Project 2 due
`r emojifont::emoji('palm_tree')` Project 3 \[[html](projects/2022-11-22-project-3/index.qmd)\] \[[Qmd](https://github.com/stephaniehicks/jhustatprogramming2022/blob/main/projects/2022-11-22-project-3/index.qmd)\] | 47 | | | | | | 48 | | | Dec 1 | Project management with `targets` \[[html](posts/2022-12-01-targets-proj-workflows/index.qmd)\] \[[Qmd](https://github.com/stephaniehicks/jhustatprogramming2022/blob/main/posts/2022-12-01-targets-proj-workflows/index.qmd)\] | | 49 | | | | Package website with `pkgdown` \[[html](posts/2022-12-01-pkgdown-pkg-website/index.qmd)\] \[[Qmd](https://github.com/stephaniehicks/jhustatprogramming2022/blob/main/posts/2022-12-01-pkgdown-pkg-website/index.qmd)\] | | 50 | | | | | | 51 | | Week 7 | Dec 6 | Retrieving data from APIs \[[html](posts/2022-12-06-gettingdata-api/index.qmd)\] \[[Qmd](https://github.com/stephaniehicks/jhustatprogramming2022/blob/main/posts/2022-12-06-gettingdata-api/index.qmd)\] | | 52 | | | | | | 53 | | | | | 54 | | | Dec 8 | Relational databases and SQL basics \[[html](posts/2022-12-08-relational-databases/index.qmd)\] \[[Qmd](https://github.com/stephaniehicks/jhustatprogramming2022/blob/main/posts/2022-12-08-relational-databases/index.qmd)\] | | 55 | | | | | 56 | | **Module 4** | | **Interactive web apps and dashboards** | | 57 | | | | | 58 | | Week 8 | Dec 13 | Building dashboards with `flexdashboard` and `shinydashboard` \[[html](posts/2022-12-13-flexdashboard/index.qmd)\] \[[Qmd](https://github.com/stephaniehicks/jhustatprogramming2022/blob/main/posts/2022-12-13-flexdashboard/index.qmd)\] | `r emojifont::emoji('fallen_leaf')` Project 3 due
`r emojifont::emoji('palm_tree')` Project 4 \[[html](projects/2022-12-06-project-4/index.qmd)\] \[[Qmd](https://github.com/stephaniehicks/jhustatprogramming2022/blob/main/projects/2022-12-06-project-4/index.qmd)\] | 59 | | | | | 60 | | | Dec 15 | Scraping data from the web with `rvest` \[[html](posts/2022-12-15-web-rvest/index.qmd)\] \[[Qmd](https://github.com/stephaniehicks/jhustatprogramming2022/blob/main/posts/2022-12-15-web-rvest/index.qmd)\] | | 61 | | | | | 62 | | **Module 5** | | **Dealing with "big" data** | | 63 | | | | | 64 | | Week 9 | Dec 20 | Profiling R code \[[html](posts/2022-12-20-profiling-r-code/index.qmd)\] \[[Qmd](https://github.com/stephaniehicks/jhustatprogramming2022/blob/main/posts/2022-12-20-profiling-r-code/index.qmd)\] | | 65 | | | | Strategies for dealing with large data \[[html](posts/2022-12-20-dealing-with-large-data/index.qmd)\] \[[Qmd](https://github.com/stephaniehicks/jhustatprogramming2022/blob/main/posts/2022-12-20-dealing-with-large-data/index.qmd)\] | | 66 | | | | | 67 | | | Dec 22 | `r emojifont::emoji('snowflake')` Class is canceled | | 68 | | | | | 69 | | | Dec 23 | | `r emojifont::emoji('fallen_leaf')` Project 4 due | 70 | ::: 71 | 72 | 73 | -------------------------------------------------------------------------------- /styles.css: -------------------------------------------------------------------------------- 1 | /* css styles */ 2 | --------------------------------------------------------------------------------