├── .github
    └── workflows
    │   └── publish.yml
├── .gitignore
├── README.md
├── _freeze
    ├── posts
    │   ├── 2022-08-30-introduction-to-gitgithub
    │   │   └── index
    │   │   │   └── execute-results
    │   │   │       └── html.json
    │   ├── 2022-08-30-introduction-to-r-and-rstudio
    │   │   └── index
    │   │   │   └── execute-results
    │   │   │       └── html.json
    │   ├── 2022-09-01-literate-programming
    │   │   └── index
    │   │   │   ├── execute-results
    │   │   │       └── html.json
    │   │   │   └── figure-html
    │   │   │       └── plot2-1.png
    │   ├── 2022-09-01-reference-management
    │   │   └── index
    │   │   │   └── execute-results
    │   │   │       └── html.json
    │   ├── 2022-09-01-reproducible-research
    │   │   └── index
    │   │   │   └── execute-results
    │   │   │       └── html.json
    │   ├── 2022-09-01-reproducible-reserach
    │   │   └── index
    │   │   │   └── execute-results
    │   │   │       └── html.json
    │   ├── 2022-09-06-managing-data-frames-with-tidyverse
    │   │   └── index
    │   │   │   └── execute-results
    │   │   │       └── html.json
    │   ├── 2022-09-06-reading-and-writing-data
    │   │   └── index
    │   │   │   └── execute-results
    │   │   │       └── html.json
    │   ├── 2022-09-08-joining-data-in-r
    │   │   └── index
    │   │   │   └── execute-results
    │   │   │       └── html.json
    │   ├── 2022-09-08-tidy-data-and-the-tidyverse
    │   │   └── index
    │   │   │   └── execute-results
    │   │   │       └── html.json
    │   ├── 2022-09-13-ggplot2-plotting-system-part-1
    │   │   └── index
    │   │   │   ├── execute-results
    │   │   │       └── html.json
    │   │   │   └── figure-html
    │   │   │       ├── unnamed-chunk-1-1.png
    │   │   │       ├── unnamed-chunk-14-1.png
    │   │   │       ├── unnamed-chunk-15-1.png
    │   │   │       ├── unnamed-chunk-16-1.png
    │   │   │       ├── unnamed-chunk-17-1.png
    │   │   │       ├── unnamed-chunk-19-1.png
    │   │   │       ├── unnamed-chunk-2-1.png
    │   │   │       ├── unnamed-chunk-20-1.png
    │   │   │       ├── unnamed-chunk-21-1.png
    │   │   │       ├── unnamed-chunk-22-1.png
    │   │   │       ├── unnamed-chunk-23-1.png
    │   │   │       ├── unnamed-chunk-24-1.png
    │   │   │       ├── unnamed-chunk-26-1.png
    │   │   │       ├── unnamed-chunk-27-1.png
    │   │   │       ├── unnamed-chunk-28-1.png
    │   │   │       ├── unnamed-chunk-29-1.png
    │   │   │       ├── unnamed-chunk-30-1.png
    │   │   │       ├── unnamed-chunk-31-1.png
    │   │   │       ├── unnamed-chunk-32-1.png
    │   │   │       ├── unnamed-chunk-33-1.png
    │   │   │       ├── unnamed-chunk-34-1.png
    │   │   │       ├── unnamed-chunk-35-1.png
    │   │   │       ├── unnamed-chunk-36-1.png
    │   │   │       ├── unnamed-chunk-37-1.png
    │   │   │       ├── unnamed-chunk-4-1.png
    │   │   │       └── unnamed-chunk-5-1.png
    │   ├── 2022-09-13-plotting-systems
    │   │   └── index
    │   │   │   ├── execute-results
    │   │   │       └── html.json
    │   │   │   └── figure-html
    │   │   │       ├── unnamed-chunk-1-1.png
    │   │   │       ├── unnamed-chunk-2-1.png
    │   │   │       ├── unnamed-chunk-3-1.png
    │   │   │       ├── unnamed-chunk-5-1.png
    │   │   │       └── unnamed-chunk-8-1.png
    │   ├── 2022-09-15-ggplot2-plotting-system-part-2
    │   │   └── index
    │   │   │   ├── execute-results
    │   │   │       └── html.json
    │   │   │   └── figure-html
    │   │   │       ├── unnamed-chunk-10-1.png
    │   │   │       ├── unnamed-chunk-11-1.png
    │   │   │       ├── unnamed-chunk-12-1.png
    │   │   │       ├── unnamed-chunk-13-1.png
    │   │   │       ├── unnamed-chunk-14-1.png
    │   │   │       ├── unnamed-chunk-15-1.png
    │   │   │       ├── unnamed-chunk-16-1.png
    │   │   │       ├── unnamed-chunk-17-1.png
    │   │   │       ├── unnamed-chunk-18-1.png
    │   │   │       ├── unnamed-chunk-19-1.png
    │   │   │       ├── unnamed-chunk-20-1.png
    │   │   │       ├── unnamed-chunk-21-1.png
    │   │   │       ├── unnamed-chunk-22-1.png
    │   │   │       ├── unnamed-chunk-3-1.png
    │   │   │       ├── unnamed-chunk-4-1.png
    │   │   │       ├── unnamed-chunk-5-1.png
    │   │   │       ├── unnamed-chunk-6-1.png
    │   │   │       ├── unnamed-chunk-8-1.png
    │   │   │       └── unnamed-chunk-9-1.png
    │   ├── 2022-09-20-r-nuts-and-bolts
    │   │   └── index
    │   │   │   └── execute-results
    │   │   │       └── html.json
    │   ├── 2022-09-22-control-structures
    │   │   └── index
    │   │   │   └── execute-results
    │   │   │       └── html.json
    │   ├── 2022-09-22-functions
    │   │   └── index
    │   │   │   └── execute-results
    │   │   │       └── html.json
    │   ├── 2022-09-27-loop-functions
    │   │   └── index
    │   │   │   └── execute-results
    │   │   │       └── html.json
    │   ├── 2022-09-29-debugging-r-code
    │   │   └── index
    │   │   │   └── execute-results
    │   │   │       └── html.json
    │   ├── 2022-09-29-error-handling-and-generation
    │   │   └── index
    │   │   │   └── execute-results
    │   │   │       └── html.json
    │   ├── 2022-10-04-working-with-dates-and-times
    │   │   └── index
    │   │   │   ├── execute-results
    │   │   │       └── html.json
    │   │   │   └── figure-html
    │   │   │       ├── unnamed-chunk-34-1.png
    │   │   │       ├── unnamed-chunk-35-1.png
    │   │   │       ├── unnamed-chunk-36-1.png
    │   │   │       ├── unnamed-chunk-37-1.png
    │   │   │       ├── unnamed-chunk-38-1.png
    │   │   │       ├── unnamed-chunk-39-1.png
    │   │   │       ├── unnamed-chunk-40-1.png
    │   │   │       └── unnamed-chunk-41-1.png
    │   ├── 2022-10-06-regular-expressions
    │   │   └── index
    │   │   │   └── execute-results
    │   │   │       └── html.json
    │   ├── 2022-10-11-working-with-factors
    │   │   └── index
    │   │   │   ├── execute-results
    │   │   │       └── html.json
    │   │   │   └── figure-html
    │   │   │       ├── unnamed-chunk-18-1.png
    │   │   │       ├── unnamed-chunk-19-1.png
    │   │   │       ├── unnamed-chunk-20-1.png
    │   │   │       ├── unnamed-chunk-21-1.png
    │   │   │       ├── unnamed-chunk-22-1.png
    │   │   │       ├── unnamed-chunk-23-1.png
    │   │   │       ├── unnamed-chunk-24-1.png
    │   │   │       ├── unnamed-chunk-24-2.png
    │   │   │       ├── unnamed-chunk-25-1.png
    │   │   │       ├── unnamed-chunk-26-1.png
    │   │   │       ├── unnamed-chunk-26-2.png
    │   │   │       ├── unnamed-chunk-27-1.png
    │   │   │       ├── unnamed-chunk-27-2.png
    │   │   │       ├── unnamed-chunk-28-1.png
    │   │   │       ├── unnamed-chunk-28-2.png
    │   │   │       ├── unnamed-chunk-29-1.png
    │   │   │       ├── unnamed-chunk-29-2.png
    │   │   │       ├── unnamed-chunk-30-1.png
    │   │   │       ├── unnamed-chunk-30-2.png
    │   │   │       └── unnamed-chunk-31-1.png
    │   ├── 2022-10-13-working-with-text-sentiment-analysis
    │   │   └── index
    │   │   │   ├── execute-results
    │   │   │       └── html.json
    │   │   │   └── figure-html
    │   │   │       ├── unnamed-chunk-22-1.png
    │   │   │       ├── unnamed-chunk-23-1.png
    │   │   │       ├── unnamed-chunk-24-1.png
    │   │   │       ├── unnamed-chunk-26-1.png
    │   │   │       ├── unnamed-chunk-27-1.png
    │   │   │       ├── unnamed-chunk-28-1.png
    │   │   │       ├── unnamed-chunk-35-1.png
    │   │   │       ├── unnamed-chunk-36-1.png
    │   │   │       ├── unnamed-chunk-37-1.png
    │   │   │       └── unnamed-chunk-38-1.png
    │   ├── 2022-10-18-best-practices-data-analyses
    │   │   └── index
    │   │   │   ├── execute-results
    │   │   │       └── html.json
    │   │   │   └── figure-html
    │   │   │       ├── unnamed-chunk-10-1.png
    │   │   │       ├── unnamed-chunk-11-1.png
    │   │   │       ├── unnamed-chunk-12-1.png
    │   │   │       ├── unnamed-chunk-13-1.png
    │   │   │       ├── unnamed-chunk-14-1.png
    │   │   │       ├── unnamed-chunk-15-1.png
    │   │   │       ├── unnamed-chunk-16-1.png
    │   │   │       ├── unnamed-chunk-17-1.png
    │   │   │       ├── unnamed-chunk-18-1.png
    │   │   │       ├── unnamed-chunk-19-1.png
    │   │   │       ├── unnamed-chunk-2-1.png
    │   │   │       ├── unnamed-chunk-20-1.png
    │   │   │       ├── unnamed-chunk-21-1.png
    │   │   │       ├── unnamed-chunk-22-1.png
    │   │   │       ├── unnamed-chunk-23-1.png
    │   │   │       ├── unnamed-chunk-24-1.png
    │   │   │       ├── unnamed-chunk-25-1.png
    │   │   │       ├── unnamed-chunk-26-1.png
    │   │   │       ├── unnamed-chunk-28-1.png
    │   │   │       ├── unnamed-chunk-29-1.png
    │   │   │       ├── unnamed-chunk-3-1.png
    │   │   │       ├── unnamed-chunk-4-1.png
    │   │   │       ├── unnamed-chunk-5-1.png
    │   │   │       ├── unnamed-chunk-6-1.png
    │   │   │       ├── unnamed-chunk-7-1.png
    │   │   │       ├── unnamed-chunk-8-1.png
    │   │   │       └── unnamed-chunk-9-1.png
    │   ├── 2022-10-20-python-for-r-users
    │   │   └── index
    │   │   │   ├── execute-results
    │   │   │       └── html.json
    │   │   │   └── figure-html
    │   │   │       ├── unnamed-chunk-25-1.png
    │   │   │       └── unnamed-chunk-26-1.png
    │   ├── post-with-code
    │   │   └── index
    │   │   │   └── execute-results
    │   │   │       └── html.json
    │   └── welcome
    │   │   └── index
    │   │       └── execute-results
    │   │           └── html.json
    ├── projects
    │   ├── 2022-08-30-project-0
    │   │   ├── index
    │   │   │   └── execute-results
    │   │   │   │   └── html.json
    │   │   └── project-0
    │   │   │   └── execute-results
    │   │   │       └── html.json
    │   ├── 2022-09-06-project-1
    │   │   └── index
    │   │   │   ├── execute-results
    │   │   │       └── html.json
    │   │   │   └── figure-html
    │   │   │       └── unnamed-chunk-20-1.png
    │   ├── 2022-09-15-project-2
    │   │   └── index
    │   │   │   └── execute-results
    │   │   │       └── html.json
    │   └── 2022-10-04-project-3
    │   │   └── index
    │   │       └── execute-results
    │   │           └── html.json
    ├── schedule
    │   └── execute-results
    │   │   └── html.json
    ├── site_libs
    │   ├── clipboard
    │   │   └── clipboard.min.js
    │   └── quarto-listing
    │   │   ├── list.min.js
    │   │   └── quarto-listing.js
    └── syllabus
    │   └── execute-results
    │       └── html.json
├── _post_template.qmd
├── _quarto.yml
├── data
    ├── 2016-07-19.csv.bz2
    ├── b_lyrics.RDS
    ├── bmi_pm25_no2_sim.csv
    ├── chicago.rds
    ├── chocolate.RDS
    ├── flights.csv
    ├── maacs_sim.csv
    ├── sales.RDS
    ├── storms_2004.csv.gz
    ├── team_standings.csv
    ├── ts_lyrics.RDS
    ├── tuesdata_rainfall.RDS
    └── tuesdata_temperature.RDS
├── images
    ├── cool_icon.png
    ├── dominici_ehp.png
    ├── dspipeline.png
    ├── happygitwithr.png
    ├── lippman.png
    ├── peng_preface.png
    └── phdversioncontrol.gif
├── index.qmd
├── jhustatcomputing2022.Rproj
├── lectures.qmd
├── posts
    ├── 2022-08-30-introduction-to-gitgithub
    │   └── index.qmd
    ├── 2022-08-30-introduction-to-r-and-rstudio
    │   └── index.qmd
    ├── 2022-09-01-literate-programming
    │   ├── index.qmd
    │   └── my-refs.bib
    ├── 2022-09-01-reference-management
    │   ├── index.qmd
    │   └── my-refs.bib
    ├── 2022-09-01-reproducible-research
    │   └── index.qmd
    ├── 2022-09-06-managing-data-frames-with-tidyverse
    │   └── index.qmd
    ├── 2022-09-06-reading-and-writing-data
    │   └── index.qmd
    ├── 2022-09-08-joining-data-in-r
    │   └── index.qmd
    ├── 2022-09-08-tidy-data-and-the-tidyverse
    │   └── index.qmd
    ├── 2022-09-13-ggplot2-plotting-system-part-1
    │   └── index.qmd
    ├── 2022-09-13-plotting-systems
    │   └── index.qmd
    ├── 2022-09-15-ggplot2-plotting-system-part-2
    │   └── index.qmd
    ├── 2022-09-20-r-nuts-and-bolts
    │   └── index.qmd
    ├── 2022-09-22-control-structures
    │   └── index.qmd
    ├── 2022-09-22-functions
    │   └── index.qmd
    ├── 2022-09-27-loop-functions
    │   └── index.qmd
    ├── 2022-09-29-debugging-r-code
    │   └── index.qmd
    ├── 2022-09-29-error-handling-and-generation
    │   └── index.qmd
    ├── 2022-10-04-working-with-dates-and-times
    │   └── index.qmd
    ├── 2022-10-06-regular-expressions
    │   └── index.qmd
    ├── 2022-10-11-working-with-factors
    │   └── index.qmd
    ├── 2022-10-13-working-with-text-sentiment-analysis
    │   └── index.qmd
    ├── 2022-10-18-best-practices-data-analyses
    │   └── index.qmd
    ├── 2022-10-20-python-for-r-users
    │   └── index.qmd
    ├── _metadata.yml
    └── welcome
    │   └── index.qmd
├── profile.jpg
├── projects.qmd
├── projects
    ├── 2022-08-30-project-0
    │   └── index.qmd
    ├── 2022-09-06-project-1
    │   └── index.qmd
    ├── 2022-09-15-project-2
    │   └── index.qmd
    └── 2022-10-04-project-3
    │   └── index.qmd
├── resources.qmd
├── schedule.qmd
├── scripts
    └── make_flametree_icon.R
├── styles.css
├── syllabus.qmd
└── videos
    ├── downloadRMac.gif
    ├── downloadRStudio.gif
    ├── downloadRWindows.gif
    └── versionstring.gif


/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   workflow_dispatch:
 3 |   push:
 4 |     branches: main
 5 | 
 6 | name: Quarto Publish
 7 | 
 8 | jobs:
 9 |   build-deploy:
10 |     runs-on: ubuntu-latest
11 |     permissions:
12 |       contents: write
13 |     steps:
14 |       - name: Check out repository
15 |         uses: actions/checkout@v2
16 | 
17 |       - name: Set up Quarto
18 |         uses: quarto-dev/quarto-actions/setup@v2
19 | 
20 |       - name: Render and Publish
21 |         uses: quarto-dev/quarto-actions/publish@v2
22 |         with:
23 |           target: gh-pages
24 |         env:
25 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 | 
6 | /.quarto/
7 | /_site/


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Statistical Computing
 2 | 
 3 | This repo is for the Johns Hopkins BSPH course titled 'Statistical Computing' (BSPH 140.776.01) in Fall 2022.
 4 | 
 5 | ## Instructor
 6 | 
 7 | -   Stephanie C. Hicks (<https://www.stephaniehicks.com>)
 8 | 
 9 | ## Teaching Assistants
10 | 
11 | -   Phyllis Wei
12 | -   Joe Sartini
13 | 
14 | # Bugs
15 | 
16 | Github pull requests are welcome for bugs 
17 | 


--------------------------------------------------------------------------------
/_freeze/posts/2022-08-30-introduction-to-gitgithub/index/execute-results/html.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "hash": "82133b65958f2f7d2f5fb41a48634859",
 3 |   "result": {
 4 |     "markdown": "---\ntitle: \"Introduction to git/GitHub\"\nauthor: \n  - name: Stephanie Hicks\n    url: https://stephaniehicks.com\n    affiliation: Department of Biostatistics, Johns Hopkins\n    affiliation_url: https://publichealth.jhu.edu\ndescription: \"Version control is a game changer; or how I learned to love git/GitHub\"\ndate: 2022-08-30\nimage: \"../../images/happygitwithr.png\"\ncategories: [module 1, week 1, programming, version control, git, GitHub]\n---\n\n\n<!-- Add interesting quote -->\n\n# Pre-lecture materials\n\n### Read ahead\n\n::: callout-note\n## Read ahead\n\n**Before class, you can prepare by reading the following materials:**\n\n1.  [Happy Git with R](https://happygitwithr.com) from Jenny Bryan\n2.  [Chapter on git and GitHub in `dsbook`](https://rafalab.github.io/dsbook/git.html) from Rafael Irizarry\n:::\n\n### Acknowledgements\n\nMaterial for this lecture was borrowed and adopted from\n\n-   <https://andreashandel.github.io/MADAcourse>\n\n# Learning objectives\n\n::: callout-note\n# Learning objectives\n\n**At the end of this lesson you will:**\n\n-   Know what Git and GitHub are.\n-   Know why one might want to use them.\n-   Have created and set up a GitHub account.\n:::\n\n# Introduction to git/GitHub\n\nThis document gives a brief explanation of GitHub and how we will use it for this course.\n\n### git\n\n*Git* is what is called a *version control system* for file management. The main idea is that as you (and your collaborators) work on a project, the software tracks, and records any changes made by anyone.\n\n-   Similar to the \"track changes\" features in Microsoft Word, but more rigorous, powerful, and scaled up to multiple files\n-   Great for solo or collaborative work\n\n### GitHub\n\n*GitHub* is a hosting service on internet for git-aware folders and projects\n\n-   Similar to the DropBox or Google, but more structured, powerful, and programmatic\n-   Great for solo or collaborative work!\n-   Technically *GitHub* is distinct from *Git*. However, *GitHub* is in some sense the interface and *Git* the underlying engine (a bit like *RStudio* and *R*).\n\nSince we will only be using *Git* through *GitHub*, I tend to not distinguish between the two. In the following, I refer to all of it as just *GitHub*. Note that other interfaces to *Git* exist, e.g., *Bitbucket*, but *GitHub* is the most widely used one.\n\n### Why use git/GitHub?\n\nYou want to use *GitHub* to avoid this:\n\n\n::: {.cell}\n::: {.cell-output-display}\n![How not to use GitHub [image from PhD Comics]](../../images/phdversioncontrol.gif){width=80%}\n:::\n:::\n\n\n\\[[Source: PhD Comics](http://phdcomics.com/comics/archive_print.php?comicid=1531)\\]\n\n*GitHub* gives you a clean way to track your projects. It is also very well suited to collaborative work. Historically, version control was used for software development. However, it has become broader and is now used for many types of projects, including data science projects.\n\nTo learn a bit more about Git/GitHub and why you might want to use it, [read this article by Jenny Bryan](https://peerj.com/preprints/3159/).\n\n**Note her explanation of what's special with the `README.md` file on GitHub.**\n\n### What to (not) do\n\n**GitHub is ideal if** you have a project with a fair number of files, most of those files are text files (such as code, LaTeX, (R)markdown, etc.) and different people work on different parts of the project.\n\n**GitHub is less useful if** you have a lot of non-text files (e.g. Word or Powerpoint) and different team members might want to edit the same document at the same time. In that instance, a solution like Google Docs, Word+Dropbox, Word+Onedrive, etc. might be better.\n\n### How to use Git/GitHub\n\nGit and GitHub is fundamentally **based on commands you type into the command line**. Lots of online resources show you how to use the command line. This is the most powerful, and the way I almost always interact with git/GitHub. However, many folks find this the most confusing way to use git/GitHub. Alternatively, there are graphical interfaces.\n\n-   [GitHub itself](https://desktop.github.com/) provides a grapical interface with basic functionality.\n-   RStudio also has [Git/GitHub integration](https://happygitwithr.com/rstudio-git-github.html). Of course this only works for R project GitHub integration.\n-   There are also third party GitHub clients with many advanced features, most of which you won't need initially, but might eventually.\n\n**Note**: As student, you can (and should) upgrade to the Pro version of GitHub for free (i.e. access to unlimited private repositories is one benefit), see the [GitHub student developer pack](https://education.github.com/pack) on how to do this.\n\n# Getting Started\n\nOne of my favorite resources for getting started with git/GitHub is the Happy Git with R from Jenny Bryan:\n\n-   <https://happygitwithr.com>\n\n\n::: {.cell}\n::: {.cell-output-display}\n![A screenshot of the Happy Git with R online book from Jenny Bryan](../../images/happygitwithr.png){width=80%}\n:::\n:::\n\n\nIt truly is one of the **best resources** out there for getting started with git/GitHub, especially with the integration to RStudio. Therefore, at this point, I will encourage all of you to go read through the online book.\n\nSome of you may only need to skim it, others will need to spend some time reading through it. Either way, I will bet that you won't regret the time investment.\n\n# Using git/GitHub in our course\n\nIn this course, you will use git/GitHub in the following ways:\n\n1.  Project 0 (optional) - You will create a website introducing yourself to folks in the course and deploy it on GitHub.\n2.  Projects 1-3 - You can practice using git locally (on your compute environment) to track your changes over time and, if you wish (but highly suggested), you can practice pushing your project solutions to a private GitHub repository on your GitHub account (i.e. `git add`, `git commit`, `git push`, `git pull`, etc) .\n\nLearning these skills will be useful down the road if you ever work collaboratively on a project (i.e. writing code as a group). In this scenario, you will use the skills you have been practicing in your projects to work together as a team in a single GitHub repository.\n\n# Post-lecture materials\n\n### Final Questions\n\nHere are some post-lecture questions to help you think about the material discussed.\n\n::: callout-note\n### Questions\n\n1.  What is version control?\n\n2.  What is the difference between git and GitHub?\n\n3.  What are other version controls software/tools that are available besides git?\n:::\n\n### Additional Resources\n\n::: callout-tip\n-   [git and GitHub in the `dsbook`](https://rafalab.github.io/dsbook/git.html) by Rafael Irizarry\n:::\n\n## rtistry\n\n\n::: {.cell .fig-cap-location-top}\n::: {.cell-output-display}\n![](https://github.com/djnavarro/art/raw/master/static/gallery/flametree/extra/001_flametree_20_13.jpg)\n:::\n:::\n\n\n\\['Flametree' from Danielle Navarro <https://art.djnavarro.net>\\]\n",
 5 |     "supporting": [],
 6 |     "filters": [
 7 |       "rmarkdown/pagebreak.lua"
 8 |     ],
 9 |     "includes": {},
10 |     "engineDependencies": {},
11 |     "preserve": {},
12 |     "postProcess": true
13 |   }
14 | }


--------------------------------------------------------------------------------
/_freeze/posts/2022-09-01-literate-programming/index/figure-html/plot2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-01-literate-programming/index/figure-html/plot2-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-09-01-reference-management/index/execute-results/html.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "hash": "5e45cd6390aa1160c6f9121529e1dd49",
 3 |   "result": {
 4 |     "markdown": "---\ntitle: \"Reference management\"\nauthor: \n  - name: Stephanie Hicks\n    url: https://stephaniehicks.com\n    affiliation: Department of Biostatistics, Johns Hopkins\n    affiliation_url: https://publichealth.jhu.edu\ndescription: \"How to use citations and incorporate references from a bibliography in R Markdown.\"\ndate: 2022-09-01\ncategories: [module 1, week 1, R Markdown, programming]\nbibliography: my-refs.bib\n---\n\n\n<!-- Add interesting quote -->\n\n# Pre-lecture materials\n\n### Read ahead\n\n::: callout-note\n## Read ahead\n\n**Before class, you can prepare by reading the following materials:**\n\n1.  Authoring in [R Markdown from RStudio](https://rmarkdown.rstudio.com/authoring_bibliographies_and_citations.html)\n2.  Citations from [Reproducible Research in R](https://monashdatafluency.github.io/r-rep-res/citations.html) from the [Monash Data Fluency](https://monashdatafluency.github.io) initiative\n3.  Bibliography from [R Markdown Cookbook](https://bookdown.org/yihui/rmarkdown-cookbook/bibliography.html)\n:::\n\n### Acknowledgements\n\nMaterial for this lecture was borrowed and adopted from\n\n-   <https://andreashandel.github.io/MADAcourse>\n-   <https://rmarkdown.rstudio.com/authoring_bibliographies_and_citations.html>\n-   <https://bookdown.org/yihui/rmarkdown-cookbook/bibliography.html>\n-   <https://monashdatafluency.github.io/r-rep-res/citations.html>\n\n# Learning objectives\n\n::: callout-note\n# Learning objectives\n\n**At the end of this lesson you will:**\n\n-   Know what types of bibliography file formats can be used in a R Markdown file\n-   Learn how to add citations to a R Markdown file\n-   Know how to change the citation style (e.g. APA, Chicago, etc)\n:::\n\n# Introduction\n\nFor almost any data analysis, especially if it is meant for publication in the academic literature, you will have to cite other people's work and include the references (bibliographies or citations) in your work. In this class, you are likely to need to include references and cite other people's work like in a regular research paper.\n\nR provides nice function `citation()` that helps us generating citation blob for R packages that we have used. Let's try generating citation text for rmarkdown package by using the following command\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncitation(\"rmarkdown\")\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n\nTo cite the 'rmarkdown' package in publications, please use:\n\n  JJ Allaire and Yihui Xie and Jonathan McPherson and Javier Luraschi\n  and Kevin Ushey and Aron Atkins and Hadley Wickham and Joe Cheng and\n  Winston Chang and Richard Iannone (2022). rmarkdown: Dynamic\n  Documents for R. R package version 2.15. URL\n  https://rmarkdown.rstudio.com.\n\n  Yihui Xie and J.J. Allaire and Garrett Grolemund (2018). R Markdown:\n  The Definitive Guide. Chapman and Hall/CRC. ISBN 9781138359338. URL\n  https://bookdown.org/yihui/rmarkdown.\n\n  Yihui Xie and Christophe Dervieux and Emily Riederer (2020). R\n  Markdown Cookbook. Chapman and Hall/CRC. ISBN 9780367563837. URL\n  https://bookdown.org/yihui/rmarkdown-cookbook.\n\nTo see these entries in BibTeX format, use 'print(<citation>,\nbibtex=TRUE)', 'toBibtex(.)', or set\n'options(citation.bibtex.max=999)'.\n```\n:::\n:::\n\n\nI assume you are familiar with how citing references works, and hopefully, you are already using a reference manager. If not, let me know in the discussion boards.\n\nTo have something that plays well with R Markdown, you need file format that stores all the references. Click here to learn more other possible file formats available to you to use within a R Markdown file:\n\n-   <https://rmarkdown.rstudio.com/authoring_bibliographies_and_citations.html>\n\n### Citation management software\n\nAs you can see, there are ton of file formats including `.medline` (MEDLINE), `.bib` (BibTeX), `.ris` (RIS), `.enl` (EndNote).\n\nI will not discuss underlying citational management software itself, but I will talk briefly how you might create one of these file formats.\n\nIf you recall the output from `citation(\"rmarkdown\")` above, we might consider manually copying and pasting the output into a citation management software, but instead we can use `write_bib()` function from `knitr` package to create a bibliography file ending in `.bib`.\n\nLet's run the following code in order to generate a `my-refs.bib` file\n\n\n::: {.cell}\n\n```{.r .cell-code}\nknitr::write_bib(\"rmarkdown\", file = \"my-refs.bib\")\n```\n:::\n\n\nNow we can see we have the file saved locally.\n\n\n::: {.cell}\n\n```{.r .cell-code}\nlist.files()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n[1] \"index.qmd\"       \"index.rmarkdown\" \"my-refs.bib\"    \n```\n:::\n:::\n\n\nIf you open up the `my-refs.bib` file, you will see\n\n    @Manual{R-rmarkdown,\n      title = {rmarkdown: Dynamic Documents for R},\n      author = {JJ Allaire and Yihui Xie and Jonathan McPherson and Javier Luraschi and Kevin Ushey and Aron Atkins and Hadley Wickham and Joe Cheng and Winston Chang and Richard Iannone},\n      year = {2021},\n      note = {R package version 2.8},\n      url = {https://CRAN.R-project.org/package=rmarkdown},\n    }\n\n    @Book{rmarkdown2018,\n      title = {R Markdown: The Definitive Guide},\n      author = {Yihui Xie and J.J. Allaire and Garrett Grolemund},\n      publisher = {Chapman and Hall/CRC},\n      address = {Boca Raton, Florida},\n      year = {2018},\n      note = {ISBN 9781138359338},\n      url = {https://bookdown.org/yihui/rmarkdown},\n    }\n\n    @Book{rmarkdown2020,\n      title = {R Markdown Cookbook},\n      author = {Yihui Xie and Christophe Dervieux and Emily Riederer},\n      publisher = {Chapman and Hall/CRC},\n      address = {Boca Raton, Florida},\n      year = {2020},\n      note = {ISBN 9780367563837},\n      url = {https://bookdown.org/yihui/rmarkdown-cookbook},\n    }\n\n::: resources\n**Note there are three keys that we will use later on**:\n\n-   `R-rmarkdown`\n-   `rmarkdown2018`\n-   `rmarkdown2020`\n:::\n\n### Linking `.bib` file with `.rmd` (and `.qmd`) files\n\nIn order to use references within a R Markdown file, you will need to specify the name and a location of a bibliography file using the bibliography metadata field in a YAML metadata section. For example:\n\n``` yaml\n---\ntitle: \"My top ten favorite R packages\"\noutput: html_document\nbibliography: my-refs.bib\n---\n```\n\nYou can include multiple reference files using the following syntax, alternatively you can concatenate two bib files into one.\n\n``` yaml\n---\nbibliography: [\"my-refs1.bib\", \"my-refs2.bib\"]\n---\n```\n\n### Inline citation\n\nNow we can start using those bib keys that we have learned just before, using the following syntax\n\n-   `[@key]` for single citation\n-   `[@key1; @key2]` multiple citation can be separated by semi-colon\n-   `[-@key]` in order to suppress author name, and just display the year\n-   `[see @key1 p 12; also this ref @key2]` is also a valid syntax\n\nLet's start by citing the `rmarkdown` package using the following code and press `Knit` button:\n\n------------------------------------------------------------------------\n\nI have been using the amazing Rmarkdown package [@R-rmarkdown]! I should also go and read [@rmarkdown2018; and @rmarkdown2020] books.\n\n------------------------------------------------------------------------\n\nPretty cool, eh??\n\n### Citation styles\n\nBy default, Pandoc will use a Chicago author-date format for citations and references.\n\nTo use another style, you will need to specify a CSL (Citation Style Language) file in the `csl` metadata field, e.g.,\n\n``` yaml\n---\ntitle: \"My top ten favorite R packages\"\noutput: html_document\nbibliography: my-refs.bib\ncsl: biomed-central.csl\n---\n```\n\n::: resources\nTo find your required formats, we recommend using the [Zotero Style Repository](https://www.zotero.org/styles), which makes it easy to search for and download your desired style.\n:::\n\nCSL files can be tweaked to meet custom formatting requirements. For example, we can change the number of authors required before \"et al.\" is used to abbreviate them. This can be simplified through the use of visual editors such as the one available at https://editor.citationstyles.org.\n\n### Other cool features\n\n#### Add an item to a bibliography without using it\n\nBy default, the bibliography will only display items that are directly referenced in the document. If you want to include items in the bibliography without actually citing them in the body text, you can define a dummy nocite metadata field and put the citations there.\n\n``` yaml\n---\nnocite: |\n  @item1, @item2\n---\n```\n\n#### Add all items to the bibliography\n\nIf we do not wish to explicitly state all of the items within the bibliography but would still like to show them in our references, we can use the following syntax:\n\n``` yaml\n---\nnocite: '@*'\n---\n```\n\nThis will force all items to be displayed in the bibliography.\n\n::: resources\nYou can also have an appendix appear after bibliography. For more on this, see:\n\n-   <https://bookdown.org/yihui/rmarkdown-cookbook/bibliography.html>\n:::\n\n# Other useful tips\n\nWe have learned that inside your file that contains all your references (e.g. `my-refs.bib`), typically each reference gets a key, which is a shorthand that is generated by the reference manager or you can create yourself.\n\nFor instance, I use a format of lower-case first author last name followed by 4 digit year for each reference followed by a keyword (e.g name of a software package). Alternatively, you can omit the keyword. But note that if I cite a paper by the same first author that was published in the same year, then a lower case letter is added to the end. For instance, for a paper that I wrote as 1st author in 2010, my bibtex key might be `hicks2022` or `hicks2022a`. You can decide what scheme to use, just pick one and use it *forever*.\n\nIn your R Markdown document, you can then cite the reference by adding the key, such as `...in the paper by Hicks et al. [@hicks2022]...`.\n\n# Post-lecture materials\n\n### Practice\n\nHere are some post-lecture tasks to practice some of the material discussed.\n\n::: callout-note\n### Questions\n\n**Try out the following:**\n\n1.  What do you notice that's different when you run `citation(\"tidyverse\")` (compared to `citation(\"rmarkdown\")`)?\n\n2.  Install the following packages:\n\n\n::: {.cell}\n\n```{.r .cell-code}\ninstall.packages(c(\"bibtex\", \"RefManageR\")\n```\n:::\n\n\nWhat do they do? How might they be helpful to you in terms of reference management?\n\n3.  Instead of using a `.bib` file, try using a different bibliography file format in an R Markdown document.\n\n4.  Practice using a different CSL file to change the citation style.\n:::\n\n### Additional Resources\n\n::: callout-tip\n-   Add here.\n:::\n\n## rtistry\n\n\n::: {.cell .fig-cap-location-top}\n\n:::\n\n\n\\[Add here.\\]\n",
 5 |     "supporting": [],
 6 |     "filters": [
 7 |       "rmarkdown/pagebreak.lua"
 8 |     ],
 9 |     "includes": {},
10 |     "engineDependencies": {},
11 |     "preserve": {},
12 |     "postProcess": true
13 |   }
14 | }


--------------------------------------------------------------------------------
/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-1-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-14-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-14-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-15-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-15-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-16-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-16-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-17-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-17-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-19-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-19-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-2-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-20-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-20-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-21-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-21-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-22-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-22-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-23-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-23-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-24-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-24-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-26-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-26-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-27-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-27-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-28-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-28-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-29-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-29-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-30-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-30-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-31-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-31-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-32-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-32-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-33-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-33-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-34-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-34-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-35-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-35-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-36-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-36-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-37-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-37-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-4-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-4-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-5-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-5-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-09-13-plotting-systems/index/execute-results/html.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "hash": "98d9a4042479184cca076761ad82ddfe",
 3 |   "result": {
 4 |     "markdown": "---\ntitle: \"Plotting Systems\"\nauthor: \n  - name: Stephanie Hicks\n    url: https://stephaniehicks.com\n    affiliation: Department of Biostatistics, Johns Hopkins\n    affiliation_url: https://publichealth.jhu.edu\ndescription: \"Overview of three plotting systems in R\"\ndate: 2022-09-13\ndraft: true\ncategories: [module 1, week 3, R, programming, ggplot2, data viz]\n---\n\n\n> The data may not contain the answer. \n> And, if you torture the data long enough,\n> it will tell you anything. ---*John W. Tukey*\n\n\n# Pre-lecture materials\n\n### Read ahead\n\n::: callout-note\n## Read ahead\n\n**Before class, you can prepare by reading the following materials:**\n\n1.  <https://r4ds.had.co.nz/data-visualisation>\n2. Paul Murrell (2011). *R Graphics*, CRC Press.\n3. Hadley Wickham (2009). *ggplot2*, Springer.\n4. Deepayan Sarkar (2008). *Lattice: Multivariate Data Visualization with R*, Springer.\n\n:::\n\n### Acknowledgements\n\nMaterial for this lecture was borrowed and adopted from\n\n-   <https://rdpeng.github.io/Biostat776/lecture-plotting-systems>\n\n# Learning objectives\n\n::: callout-note\n# Learning objectives\n\n**At the end of this lesson you will:**\n\n-   Be able to identify and describe the three plotting systems in R\n:::\n\n# Plotting Systems\n\nThere are **three different plotting systems in R** and they each have different characteristics and modes of operation. \n\n:::{.callout-tip}\n\n### Important \n\nThe three systems are \n\n1. The base plotting system\n2. The lattice system\n3. The ggplot2 system \n\n**This course will focus primarily on the ggplot2 plotting system**. The other two systems are presented for context.\n\n:::\n\n\n## The Base Plotting System\n\nThe **base plotting system** is the original plotting system for R. The basic model is sometimes **referred to as the \"artist's palette\" model**. \n\nThe idea is you start with blank canvas and build up from there. \n\nIn more R-specific terms, you **typically start with `plot()` function** (or similar plot creating function) to *initiate* a plot and then *annotate* the plot with various annotation functions (`text`, `lines`, `points`, `axis`)\n\nThe base plotting system is **often the most convenient plotting system** to use because it mirrors how we sometimes think of building plots and analyzing data. \n\nIf we do not have a completely well-formed idea of how we want to look at some data, often we will start by \"throwing some data on the page\" and then slowly add more information to it as our thought process evolves. \n\n:::{.callout-tip}\n\n### Example\n\nWe might look at a simple scatterplot and then decide to add a linear regression line or a smoother to it to highlight the trends.\n\n\n::: {.cell}\n\n```{.r .cell-code}\ndata(airquality)\nwith(airquality, {\n        plot(Temp, Ozone)\n        lines(loess.smooth(Temp, Ozone))\n})\n```\n\n::: {.cell-output-display}\n![Scatterplot with loess curve](index_files/figure-html/unnamed-chunk-1-1.png){width=480}\n:::\n:::\n\n\n:::\n\nIn the code above: \n\n- The `plot()` function creates the initial plot and draws the points (circles) on the canvas.\n- The `lines` function is used to annotate or add to the plot (in this case it adds a loess smoother to the scatterplot).\n\nNext, we use the `plot()` function to draw the points on the scatterplot and then use the `main` argument to add a main title to the plot. \n\n\n::: {.cell}\n\n```{.r .cell-code}\ndata(airquality)\nwith(airquality, {\n        plot(Temp, Ozone, main = \"my plot\")\n        lines(loess.smooth(Temp, Ozone))\n})\n```\n\n::: {.cell-output-display}\n![Scatterplot with loess curve](index_files/figure-html/unnamed-chunk-2-1.png){width=480}\n:::\n:::\n\n\n:::{.callout-tip}\n\n### Note\n\nOne downside with constructing base plots is that you **cannot go backwards once the plot has started**. \n\nIt is possible that you could start down the road of constructing a plot and realize later (when it is too late) that you do not have enough room to add a y-axis label or something like that\n\n:::\n\nIf you have specific plot in mind, there is then a need to **plan in advance** to make sure, for example, that you have set your margins to be the right size to fit all of the annotations that you may want to include. \n\nWhile the base plotting system is nice in that it gives you the flexibility to specify these kinds of details to painstaking accuracy, **sometimes it would be nice if the system could just figure it out for you**.\n\n:::{.callout-tip}\n\n### Note\n\nAnother downside of the base plotting system is that it is **difficult to describe or translate a plot to others because there is no clear graphical language or grammar** that can be used to communicate what you have done. \n\nThe only real way to describe what you have done in a base plot is to just list the series of commands/functions that you have executed, which is not a particularly compact way of communicating things. \n\nThis is one problem that the `ggplot2` package attempts to address.\n\n:::\n\n\n:::{.callout-tip}\n\n### Example\n\nAnother typical base plot is constructed with the following code.\n\n\n::: {.cell}\n\n```{.r .cell-code}\ndata(cars)\n\n## Create the plot / draw canvas\nwith(cars, plot(speed, dist))\n\n## Add annotation\ntitle(\"Speed vs. Stopping distance\")\n```\n\n::: {.cell-output-display}\n![Base plot with title](index_files/figure-html/unnamed-chunk-3-1.png){width=480}\n:::\n:::\n\n\n:::\n\nWe will go into more detail on what these functions do in later lessons.\n\n\n## The Lattice System\n\nThe **lattice plotting system** is implemented in the `lattice` R package which comes with every installation of R (although it is not loaded by default).\n\nTo **use the lattice plotting functions**, you must first load the `lattice` package with the `library` function.\n\n\n::: {.cell}\n\n```{.r .cell-code}\nlibrary(lattice)\n```\n:::\n\n\nWith the lattice system, **plots are created with a single function call**, such as `xyplot()` or `bwplot()`. \n\nThere is **no real distinction between functions that create or initiate plots** and **functions that annotate plots** because it all happens at once.\n\nLattice plots tend to be **most useful for conditioning types of plots**, i.e. looking at how `y` changes with `x` across levels of `z`. \n\n- e.g. these types of plots are useful for looking at multi-dimensional data and often allow you to squeeze a lot of information into a single window or page.\n\nAnother aspect of lattice that makes it different from base plotting is that **things like margins and spacing are set automatically**. \n\nThis is possible because entire plot is specified at once via a single function call, so all of the available information needed to figure out the spacing and margins is already there.\n\n\n:::{.callout-tip}\n\n### Example\n\nHere is a lattice plot that looks at the relationship between life expectancy and income and how that relationship varies by region in the United States. \n\n\n::: {.cell}\n\n```{.r .cell-code}\nstate <- data.frame(state.x77, region = state.region)\nxyplot(Life.Exp ~ Income | region, data = state, layout = c(4, 1))\n```\n\n::: {.cell-output-display}\n![Lattice plot](index_files/figure-html/unnamed-chunk-5-1.png){width=768}\n:::\n:::\n\n\n:::\n\nYou can see that the entire plot was generated by the call to `xyplot()` and all of the data for the plot were stored in the `state` data frame. \n\nThe **plot itself contains four panels**---one for each region---and **within each panel is a scatterplot** of life expectancy and income. \n\nThe notion of *panels* comes up a lot with lattice plots because you typically have many panels in a lattice plot (each panel typically represents a *condition*, like \"region\").\n\n:::{.callout-tip}\n\n### Note\n\nDownsides with the lattice system \n\n- It can sometimes be very **awkward to specify an entire plot** in a single function call (you end up with functions with many many arguments). \n- **Annotation in panels in plots is not especially intuitive** and can be difficult to explain. In particular, the use of custom panel functions and subscripts can be difficult to wield and requires intense preparation. \n- Once a plot is created, **you cannot \"add\" to the plot** (but of course you can just make it again with modifications).\n\n:::\n\n\n## The ggplot2 System\n\nThe **ggplot2 plotting system** attempts to split the difference between base and lattice in a number of ways. \n\n:::{.callout-tip}\n\n### Note\n\nTaking cues from lattice, the ggplot2 system automatically deals with spacings, text, titles but also allows you to annotate by \"adding\" to a plot.\n\n:::\n\nThe ggplot2 system is implemented in the `ggplot2` package (part of the `tidyverse` package), which is available from CRAN (it does not come with R). \n\nYou can install it from CRAN via\n\n\n::: {.cell}\n\n```{.r .cell-code}\ninstall.packages(\"ggplot2\")\n```\n:::\n\n\nand then load it into R via the `library()` function.\n\n\n::: {.cell}\n\n```{.r .cell-code}\nlibrary(ggplot2)\n```\n:::\n\n\nSuperficially, the `ggplot2` functions are similar to `lattice`, but the system is generally easier and more intuitive to use. \n\nThe defaults used in `ggplot2` make many choices for you, but you can still customize plots to your heart's desire.\n\n:::{.callout-tip}\n\n### Example\n\nA typical plot with the `ggplot2` package looks as follows.\n\n\n::: {.cell}\n\n```{.r .cell-code}\nlibrary(tidyverse)\ndata(mpg)\nmpg %>%\n  ggplot(aes(displ, hwy)) + \n  geom_point()\n```\n\n::: {.cell-output-display}\n![ggplot2 plot](index_files/figure-html/unnamed-chunk-8-1.png){width=576}\n:::\n:::\n\n\n:::\n\nThere are additional functions in `ggplot2` that allow you to make arbitrarily sophisticated plots.\n\nWe will discuss more about this in the next lecture. \n\n",
 5 |     "supporting": [
 6 |       "index_files"
 7 |     ],
 8 |     "filters": [
 9 |       "rmarkdown/pagebreak.lua"
10 |     ],
11 |     "includes": {},
12 |     "engineDependencies": {},
13 |     "preserve": {},
14 |     "postProcess": true
15 |   }
16 | }


--------------------------------------------------------------------------------
/_freeze/posts/2022-09-13-plotting-systems/index/figure-html/unnamed-chunk-1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-13-plotting-systems/index/figure-html/unnamed-chunk-1-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-09-13-plotting-systems/index/figure-html/unnamed-chunk-2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-13-plotting-systems/index/figure-html/unnamed-chunk-2-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-09-13-plotting-systems/index/figure-html/unnamed-chunk-3-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-13-plotting-systems/index/figure-html/unnamed-chunk-3-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-09-13-plotting-systems/index/figure-html/unnamed-chunk-5-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-13-plotting-systems/index/figure-html/unnamed-chunk-5-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-09-13-plotting-systems/index/figure-html/unnamed-chunk-8-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-13-plotting-systems/index/figure-html/unnamed-chunk-8-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-10-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-10-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-11-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-11-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-12-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-12-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-13-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-13-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-14-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-14-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-15-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-15-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-16-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-16-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-17-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-17-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-18-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-18-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-19-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-19-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-20-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-20-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-21-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-21-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-22-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-22-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-3-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-3-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-4-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-4-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-5-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-5-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-6-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-6-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-8-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-8-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-9-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-9-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-10-04-working-with-dates-and-times/index/figure-html/unnamed-chunk-34-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-04-working-with-dates-and-times/index/figure-html/unnamed-chunk-34-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-10-04-working-with-dates-and-times/index/figure-html/unnamed-chunk-35-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-04-working-with-dates-and-times/index/figure-html/unnamed-chunk-35-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-10-04-working-with-dates-and-times/index/figure-html/unnamed-chunk-36-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-04-working-with-dates-and-times/index/figure-html/unnamed-chunk-36-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-10-04-working-with-dates-and-times/index/figure-html/unnamed-chunk-37-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-04-working-with-dates-and-times/index/figure-html/unnamed-chunk-37-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-10-04-working-with-dates-and-times/index/figure-html/unnamed-chunk-38-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-04-working-with-dates-and-times/index/figure-html/unnamed-chunk-38-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-10-04-working-with-dates-and-times/index/figure-html/unnamed-chunk-39-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-04-working-with-dates-and-times/index/figure-html/unnamed-chunk-39-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-10-04-working-with-dates-and-times/index/figure-html/unnamed-chunk-40-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-04-working-with-dates-and-times/index/figure-html/unnamed-chunk-40-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-10-04-working-with-dates-and-times/index/figure-html/unnamed-chunk-41-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-04-working-with-dates-and-times/index/figure-html/unnamed-chunk-41-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-18-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-18-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-19-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-19-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-20-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-20-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-21-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-21-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-22-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-22-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-23-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-23-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-24-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-24-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-24-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-24-2.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-25-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-25-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-26-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-26-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-26-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-26-2.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-27-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-27-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-27-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-27-2.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-28-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-28-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-28-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-28-2.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-29-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-29-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-29-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-29-2.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-30-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-30-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-30-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-30-2.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-31-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-31-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-10-13-working-with-text-sentiment-analysis/index/figure-html/unnamed-chunk-22-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-13-working-with-text-sentiment-analysis/index/figure-html/unnamed-chunk-22-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-10-13-working-with-text-sentiment-analysis/index/figure-html/unnamed-chunk-23-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-13-working-with-text-sentiment-analysis/index/figure-html/unnamed-chunk-23-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-10-13-working-with-text-sentiment-analysis/index/figure-html/unnamed-chunk-24-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-13-working-with-text-sentiment-analysis/index/figure-html/unnamed-chunk-24-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-10-13-working-with-text-sentiment-analysis/index/figure-html/unnamed-chunk-26-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-13-working-with-text-sentiment-analysis/index/figure-html/unnamed-chunk-26-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-10-13-working-with-text-sentiment-analysis/index/figure-html/unnamed-chunk-27-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-13-working-with-text-sentiment-analysis/index/figure-html/unnamed-chunk-27-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-10-13-working-with-text-sentiment-analysis/index/figure-html/unnamed-chunk-28-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-13-working-with-text-sentiment-analysis/index/figure-html/unnamed-chunk-28-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-10-13-working-with-text-sentiment-analysis/index/figure-html/unnamed-chunk-35-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-13-working-with-text-sentiment-analysis/index/figure-html/unnamed-chunk-35-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-10-13-working-with-text-sentiment-analysis/index/figure-html/unnamed-chunk-36-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-13-working-with-text-sentiment-analysis/index/figure-html/unnamed-chunk-36-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-10-13-working-with-text-sentiment-analysis/index/figure-html/unnamed-chunk-37-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-13-working-with-text-sentiment-analysis/index/figure-html/unnamed-chunk-37-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-10-13-working-with-text-sentiment-analysis/index/figure-html/unnamed-chunk-38-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-13-working-with-text-sentiment-analysis/index/figure-html/unnamed-chunk-38-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-10-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-10-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-11-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-11-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-12-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-12-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-13-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-13-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-14-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-14-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-15-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-15-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-16-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-16-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-17-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-17-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-18-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-18-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-19-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-19-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-2-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-20-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-20-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-21-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-21-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-22-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-22-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-23-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-23-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-24-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-24-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-25-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-25-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-26-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-26-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-28-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-28-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-29-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-29-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-3-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-3-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-4-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-4-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-5-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-5-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-6-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-6-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-7-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-7-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-8-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-8-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-9-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-9-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-10-20-python-for-r-users/index/figure-html/unnamed-chunk-25-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-20-python-for-r-users/index/figure-html/unnamed-chunk-25-1.png


--------------------------------------------------------------------------------
/_freeze/posts/2022-10-20-python-for-r-users/index/figure-html/unnamed-chunk-26-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-20-python-for-r-users/index/figure-html/unnamed-chunk-26-1.png


--------------------------------------------------------------------------------
/_freeze/posts/post-with-code/index/execute-results/html.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "hash": "60b3d422ef5002b26ad42abeb118098e",
 3 |   "result": {
 4 |     "markdown": "---\ntitle: \"Post With Code\"\nauthor: \"Harlow Malloc\"\ndate: \"2022-08-09\"\ndraft: TRUE\ncategories: [news, code, analysis]\nimage: \"image.jpg\"\n---\n\n\nThis is a post with executable code.\n\n\n::: {.cell}\n\n```{.r .cell-code}\n1 + 1\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n[1] 2\n```\n:::\n:::\n",
 5 |     "supporting": [],
 6 |     "filters": [
 7 |       "rmarkdown/pagebreak.lua"
 8 |     ],
 9 |     "includes": {},
10 |     "engineDependencies": {},
11 |     "preserve": {},
12 |     "postProcess": true
13 |   }
14 | }


--------------------------------------------------------------------------------
/_freeze/projects/2022-08-30-project-0/index/execute-results/html.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "hash": "7ab882c06a83c8de87a2b2e2b92c17a9",
 3 |   "result": {
 4 |     "markdown": "---\ntitle: \"Project 0 (optional)\"\nauthor: \n  - name: Stephanie Hicks\n    url: https://stephaniehicks.com\n    affiliation: Department of Biostatistics, Johns Hopkins\n    affiliation_url: https://publichealth.jhu.edu\ndescription: \"Information for Project 0 (entirely optional, but hopefully useful and fun!)\"\ndate: 2022-08-30\ncategories: [project 0, projects]\n---\n\n\n# Background\n\n**Due date: Sept 8 at 1:29pm**\n\nUsing the tools we learned in the first week (e.g. R, RStudio and Github). Let's apply them in a small (but also comprehensive) exercise.\n\n-   **Please note this project is entirely optional** (i.e. it will not be graded), but hopefully it will be helpful to you getting set up for the rest of the course (i.e. set up these tools on your computing environment) and give you an opportunity to introduce yourself to your classmates.\n\n-   **For anyone who completes it, you get a free hex sticker!** If you aren't familiar with the hex stickers, check out [this link](https://github.com/rstudio/hex-stickers). You can add them to your laptop for some character and swag (or [turn them into magnets](https://twitter.com/ClaireMKBowen/status/1294336123414523904)). I have a ton of different ones from the tidyverse or [RLadies Baltimore](https://pbs.twimg.com/profile_images/1236855715018559488/PuYAjTTD_400x400.jpg). You can come pick one up from my office or I can mail it to you if you email me a mailing address after you submit the project.\n\n-   **For those of you who are new to GitHub/R/Rmarkdown**: this project makes you do a lot of things that you might not be familiar with. I know that this might be time-consuming and also might feel a bit intimidating. It's partly unavoidable and partly on purpose. You need to learn how to quickly get up to speed with all kinds of new tools that come your way. So practicing it is a good idea. You are welcome to draw on any sources for help that you want (online, classmates, instructor, etc.). I'm confident with a bit of trial and error you'll get it to work.\n\n# Part 1\n\nThis part of the project is to ensure that you have successfully set up your computing environment. Please email (use the Subject line: `140.776 Setup`) the Course Instructor (Dr. Stephanie Hicks) at shicks19\\@jhu.edu the following information:\n\n### Setting up your computing environment\n\n1.  Your name, JHED ID (if applicable).\n\n2.  The type of computer/operating system you are using (Windows, Mac, Unix/Linux, other)\n\n3.  The version of R that you have installed on your computer. To do this, start up R and run the following in the R console and include the output in your email.\n\n\n::: {.cell}\n\n```{.r .cell-code}\nprint(R.version.string)\n```\n:::\n\n\n![Printing the R version string](../../videos/versionstring.gif)\n\n4.  The version of RStudio that you have installed on your computer. To do this start up RStudio and in the R console window, run the following and again include the output in your email:\n\n\n::: {.cell}\n\n```{.r .cell-code}\nprint(RStudio.Version()$version)\n```\n:::\n\n\n5.  If you have a GitHub username, please include this in your email. If you do not have a GitHub username, read https://happygitwithr.com, sign up for GitHub, and include your new username in your email to me.\n\n6.  To make sure git is installed on your system, use the 'Terminal' (e.g. it's next to the R Console within RStudio) (or whatever you use), run the following and include the output in your email:\n\nFor example, this is mine:\n\n\n::: {.cell}\n\n```{.bash .cell-code}\ngit --version\n```\n\n\n::: {.cell-output .cell-output-stdout}\n```\ngit version 2.32.1 (Apple Git-133)\n```\n:::\n:::\n\n\nIf you have any trouble with any of the steps above, try to first post on the discussion board on CoursePlus. The TAs and I will be checking it frequently, but other students may also be helpful in their replies. You can also use other resources to get help (Google, R4DS, colleagues/friends/relatives with R/Markdown experience, etc.). Still, try to do as much as possible yourself. We will use all the bits you are learning here repeatedly during this course.\n\n# Part 2\n\nThis part of the project is to help you introduce yourself (and your interests!) to others in this course. You will create a new GitHub repository and build a small website about yourself.\n\n### 1. Create a GitHub repo for your website\n\nCreate a new GitHub repository titled `biostat776-intro-<firstname>-<lastname>` (where you replace `<firstname>` with your first name and `<lastname>` with your last name) in your own personal GitHub account (e.g. `https://github.com/<yourgithubusername>/biostat776-intro-<firstname>-<lastname>`).\n\nFor example, you can find an example that I created for myself at\n\n-   github repo: <https://github.com/stephaniehicks/biostat776-intro-stephanie-hicks>\n\n### 2. Build a website using R Markdown\n\nUsing one of the many ways we discussed in class (e.g. a [simple R Markdown website](https://bookdown.org/yihui/rmarkdown/rmarkdown-site.html), [blogdown](https://bookdown.org/yihui/blogdown/), [distill](https://rstudio.github.io/distill/website.html), etc), create a new project in RStudio with the appropriate files. For example, you might include the following information:\n\n-   Write a short summary introducing yourself. Structure the webpage with headings, subheadings, etc. Talk a bit about yourself, your background, training, research interests. Let me/us know what kind of statistics, programming, data analysis experience you already have. I am also curious to know what you most hope to learn in this course.\n\n-   Five fun facts about yourself\n\n-   A web page linking to something you think is really cool/interesting/inspiring/etc. You could also describe briefly what it is and why you like it.\n\nIf you want, feel free to get creative and include other things. You can play with RMarkdown if you wish to, e.g., you can try to include some table or a video, etc.\n\n### 3. Include a `README.md` file\n\nYour project repository should include a `README.md` file (if it was not included already).\n\nEdit the repository `README.md` file. Typically it will only contain the name of your repository with a `#` sign in front. The `#` represents a level 1 heading in Markdown. Change the headline and call it \"Introducing myself\" (or something like that). Underneath write something like \"This website contains a short introduction of *Your Name*.\"\n\nMake sure the 2 files (README.md and especially `index.Rmd` / `index.html`) look the way you want. Make changes until everything works.\n\n### 4. Deploy your website\n\nDepending on how you want to deploy your website, the following may or may not be relevant to you. In general, you want to make sure you have initialized your project to use `git` (i.e. you can type `git init` to initialize the repository to use git. Add and commit your changes. Push your changes and deploy your website.\n\nFollowing steps 2-4, here is my example website:\n\n-   website: <https://www.stephaniehicks.com/biostat776-intro-stephanie-hicks>\n\n### 5. Share your website\n\n-   Go to the Discussion Board in CoursePlus and write a short post with a link (URL) to your website (and URL to the corresponding GitHub repository) that you created.\n-   As you read the introductions from other folks in the class, feel free to comment/reply using Discussion board.\n\nIn class on Sept 8, I will show as many websites as I can from Courseplus!\n",
 5 |     "supporting": [
 6 |       "index_files"
 7 |     ],
 8 |     "filters": [
 9 |       "rmarkdown/pagebreak.lua"
10 |     ],
11 |     "includes": {},
12 |     "engineDependencies": {},
13 |     "preserve": {},
14 |     "postProcess": true
15 |   }
16 | }


--------------------------------------------------------------------------------
/_freeze/projects/2022-08-30-project-0/project-0/execute-results/html.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "hash": "e115266959b77345b6f990263d8292ca",
 3 |   "result": {
 4 |     "markdown": "---\ntitle: \"Project 0 (optional)\"\nauthor: \n  - name: Stephanie Hicks\n    url: https://stephaniehicks.com\n    affiliation: Department of Biostatistics, Johns Hopkins\n    affiliation_url: https://publichealth.jhu.edu\ndescription: \"Information for Project 0 (entirely optional, but hopefully useful and fun!)\"\ndate: 2022-08-30\ncategories: [project 0, projects]\n---\n\n\n\n# Background\n\n**Due date: Sept 8 at 1:29pm**\n\nUsing the tools we learned in the first week (e.g. R, RStudio and Github). Let's apply them in a small (but also comprehensive) exercise.\n\n-   **Please note this project is entirely optional** (i.e. it will not be graded), but hopefully it will be helpful to you getting set up for the rest of the course (i.e. set up these tools on your computing environment) and give you an opportunity to introduce yourself to your classmates.\n\n-   **For anyone who completes it, you get a free hex sticker!** If you aren't familiar with the hex stickers, check out [this link](https://github.com/rstudio/hex-stickers). You can add them to your laptop for some character and swag (or [turn them into magnets](https://twitter.com/ClaireMKBowen/status/1294336123414523904)). I have a ton of different ones from the tidyverse or [RLadies Baltimore](https://pbs.twimg.com/profile_images/1236855715018559488/PuYAjTTD_400x400.jpg). You can come pick one up from my office or I can mail it to you if you email me a mailing address after you submit the project.\n\n-   **For those of you who are new to GitHub/R/Rmarkdown**: this project makes you do a lot of things that you might not be familiar with. I know that this might be time-consuming and also might feel a bit intimidating. It's partly unavoidable and partly on purpose. You need to learn how to quickly get up to speed with all kinds of new tools that come your way. So practicing it is a good idea. You are welcome to draw on any sources for help that you want (online, classmates, instructor, etc.). I'm confident with a bit of trial and error you'll get it to work.\n\n# Part 1\n\nThis part of the project is to ensure that you have successfully set up your computing environment. Please email (use the Subject line: `140.776 Setup`) the Course Instructor (Dr. Stephanie Hicks) at shicks19\\@jhu.edu the following information:\n\n### Setting up your computing environment\n\n1.  Your name, JHED ID (if applicable).\n\n2.  The type of computer/operating system you are using (Windows, Mac, Unix/Linux, other)\n\n3.  The version of R that you have installed on your computer. To do this, start up R and run the following in the R console and include the output in your email.\n\n\n::: {.cell}\n\n```{.r .cell-code}\nprint(R.version.string)\n```\n:::\n\n\n![Printing the R version string](../../videos/versionstring.gif)\n\n4.  The version of RStudio that you have installed on your computer. To do this start up RStudio and in the R console window, run the following and again include the output in your email:\n\n\n::: {.cell}\n\n```{.r .cell-code}\nprint(RStudio.Version()$version)\n```\n:::\n\n\n5.  If you have a GitHub username, please include this in your email. If you do not have a GitHub username, read https://happygitwithr.com, sign up for GitHub, and include your new username in your email to me.\n\n6.  To make sure git is installed on your system, use the 'Terminal' (e.g. it's next to the R Console within RStudio) (or whatever you use), run the following and include the output in your email:\n\nFor example, this is mine:\n\n\n::: {.cell}\n\n```{.bash .cell-code}\ngit --version\n```\n\n\n::: {.cell-output .cell-output-stdout}\n```\ngit version 2.32.1 (Apple Git-133)\n```\n:::\n:::\n\n\nIf you have any trouble with any of the steps above, try to first post on the discussion board on CoursePlus. The TAs and I will be checking it frequently, but other students may also be helpful in their replies. You can also use other resources to get help (Google, R4DS, colleagues/friends/relatives with R/Markdown experience, etc.). Still, try to do as much as possible yourself. We will use all the bits you are learning here repeatedly during this course.\n\n# Part 2\n\nThis part of the project is to help you introduce yourself (and your interests!) to others in this course. You will create a new GitHub repository and build a small website about yourself.\n\n### 1. Create a GitHub repo for your website\n\nCreate a new GitHub repository titled `biostat776-intro-<firstname>-<lastname>` (where you replace `<firstname>` with your first name and `<lastname>` with your last name) in your own personal GitHub account (e.g. `https://github.com/<yourgithubusername>/biostat776-intro-<firstname>-<lastname>`).\n\nFor example, you can find an example that I created for myself at\n\n-   github repo: <https://github.com/stephaniehicks/biostat776-intro-stephanie-hicks>\n\n### 2. Build a website using R Markdown\n\nUsing one of the many ways we discussed in class (e.g. a [simple R Markdown website](https://bookdown.org/yihui/rmarkdown/rmarkdown-site.html), [blogdown](https://bookdown.org/yihui/blogdown/), [distill](https://rstudio.github.io/distill/website.html), etc), create a new project in RStudio with the appropriate files. For example, you might include the following information:\n\n-   Write a short summary introducing yourself. Structure the webpage with headings, subheadings, etc. Talk a bit about yourself, your background, training, research interests. Let me/us know what kind of statistics, programming, data analysis experience you already have. I am also curious to know what you most hope to learn in this course.\n\n-   Five fun facts about yourself\n\n-   A web page linking to something you think is really cool/interesting/inspiring/etc. You could also describe briefly what it is and why you like it.\n\nIf you want, feel free to get creative and include other things. You can play with RMarkdown if you wish to, e.g., you can try to include some table or a video, etc.\n\n### 3. Include a `README.md` file\n\nYour project repository should include a `README.md` file (if it was not included already).\n\nEdit the repository `README.md` file. Typically it will only contain the name of your repository with a `#` sign in front. The `#` represents a level 1 heading in Markdown. Change the headline and call it \"Introducing myself\" (or something like that). Underneath write something like \"This website contains a short introduction of *Your Name*.\"\n\nMake sure the 2 files (README.md and especially `index.Rmd` / `index.html`) look the way you want. Make changes until everything works.\n\n### 4. Deploy your website\n\nDepending on how you want to deploy your website, the following may or may not be relevant to you. In general, you want to make sure you have initialized your project to use `git` (i.e. you can type `git init` to initialize the repository to use git. Add and commit your changes. Push your changes and deploy your website.\n\nFollowing steps 2-4, here is my example website:\n\n-   website: <https://www.stephaniehicks.com/biostat776-intro-stephanie-hicks>\n\n### 5. Share your website\n\n-   Go to the Discussion Board in CoursePlus and write a short post with a link (URL) to your website (and URL to the corresponding GitHub repository) that you created.\n-   As you read the introductions from other folks in the class, feel free to comment/reply using Discussion board.\n\nIn class on Sept 8, I will show as many websites as I can from Courseplus!\n",
 5 |     "supporting": [],
 6 |     "filters": [
 7 |       "rmarkdown/pagebreak.lua"
 8 |     ],
 9 |     "includes": {},
10 |     "engineDependencies": {},
11 |     "preserve": {},
12 |     "postProcess": true
13 |   }
14 | }


--------------------------------------------------------------------------------
/_freeze/projects/2022-09-06-project-1/index/figure-html/unnamed-chunk-20-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/projects/2022-09-06-project-1/index/figure-html/unnamed-chunk-20-1.png


--------------------------------------------------------------------------------
/_freeze/projects/2022-10-04-project-3/index/execute-results/html.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "hash": "4b11821c12993fd2bb8b928708ff148d",
 3 |   "result": {
 4 |     "markdown": "---\ntitle: \"Project 3\"\nauthor: \n  - name: Stephanie Hicks\n    url: https://stephaniehicks.com\n    affiliation: Department of Biostatistics, Johns Hopkins\n    affiliation_url: https://publichealth.jhu.edu\ndescription: \"Exploring album sales and sentiment of lyrics from Beyoncé and Taylor Swift\"\ndate: 2022-10-04\ncategories: [project 3, projects]\n---\n\n\n# Background\n\n**Due date: October 21 at 11:59pm**\n\nThe goal of this assignment is to practice wrangling special data types (including dates, character strings, and factors) and visualizing results while practicing our tidyverse skills.\n\n### To submit your project\n\nPlease write up your project using R Markdown and processed with `knitr`. Compile your document as an **HTML file** and submit your HTML file to the dropbox on Courseplus. Please **show all your code** (i.e. make sure to set `echo = TRUE`) for each of the answers to each part.\n\n# Load data\n\nThe datasets for this part of the assignment comes from [TidyTuesday](https://www.tidytuesday.com).\n\nData dictionary avaialble here:\n\n-   <https://github.com/rfordatascience/tidytuesday/tree/master/data/2020/2020-09-29>\n\n![Beyoncé (left) and Taylor Swift (right)](https://akns-images.eonline.com/eol_images/Entire_Site/2019721/rs_1024x759-190821125112-1024.taylor-swift-beyonce-2009-mtv-vmas.ct.082119.jpg){preview=\"TRUE\"}\n\nSpecifically, we will explore album sales and lyrics from two artists (Beyoncé and Taylor Swift), The data are available from TidyTuesday from September 2020, which I have provided for you below:\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\nb_lyrics <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-09-29/beyonce_lyrics.csv')\nts_lyrics <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-09-29/taylor_swift_lyrics.csv')\nsales <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-09-29/sales.csv')\n```\n:::\n\n\nHowever, to avoid re-downloading data, we will check to see if those files already exist using an `if()` statement:\n\n\n::: {.cell}\n\n```{.r .cell-code}\nlibrary(here)\nif(!file.exists(here(\"data\",\"b_lyrics.RDS\"))){\n  b_lyrics <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-09-29/beyonce_lyrics.csv')\n  ts_lyrics <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-09-29/taylor_swift_lyrics.csv')\n  sales <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-09-29/sales.csv')\n  \n  # save the files to RDS objects\n  saveRDS(b_lyrics, file = here(\"data\",\"b_lyrics.RDS\"))\n  saveRDS(ts_lyrics, file = here(\"data\",\"ts_lyrics.RDS\"))\n  saveRDS(sales, file = here(\"data\",\"sales.RDS\"))\n}\n```\n:::\n\n\n::: callout-note\nThe above code will only run if it cannot find the path to the `b_lyrics.RDS` on your computer. Then, we can just read in these files every time we knit the R Markdown, instead of re-downloading them every time.\n:::\n\nLet's load the datasets\n\n\n::: {.cell}\n\n```{.r .cell-code}\nb_lyrics <- readRDS(here(\"data\",\"b_lyrics.RDS\"))\nts_lyrics <- readRDS(here(\"data\",\"ts_lyrics.RDS\"))\nsales <- readRDS(here(\"data\",\"sales.RDS\"))\n```\n:::\n\n\n# Part 1: Explore album sales\n\nIn this section, the goal is to explore the sales of studio albums from Beyoncé and Taylor Swift.\n\n**Notes**\n\n-   In each of the subsections below that ask you to create a plot, you must create a title, subtitle, x-axis label, and y-axis label with units where applicable. For example, if your axis says \"sales\" as an axis label, change it to \"sales (in millions)\".\n\n## Part 1A\n\nIn this section, we will do some data wrangling.\n\n1.  Use `lubridate` to create a column called `released` that is a `Date` class. However, to be able to do this, you first need to use `stringr` to search for pattern that matches things like this \"(US)\\[51\\]\" in a string like this \"September 1, 2006 (US)\\[51\\]\" and removes them. (**Note**: to get full credit, you must create the regular expression).\n2.  Use `forcats` to create a factor called `country` (**Note**: you may need to collapse some factor levels).\n3.  Transform the `sales` into a unit that is album sales in millions of dollars.\n4.  Keep only album sales from the UK, the US or the World.\n5.  Auto print your final wrangled tibble data frame.\n\n\n::: {.cell}\n\n```{.r .cell-code}\n# Add your solution here\n```\n:::\n\n\n## Part 1B\n\nIn this section, we will do some more data wrangling followed by summarization using wrangled data from Part 1A.\n\n1.  Keep only album sales from the US.\n2.  Create a new column called `years_since_release` corresponding to the number of years since the release of each album from Beyoncé and Taylor Swift. This should be a whole number and you should round down to \"14\" if you get a non-whole number like \"14.12\" years. (**Hint**: you may find the `interval()` function from `lubridate` helpful here, but this not the only way to do this.)\n3.  Calculate the most recent, oldest, and the median years since albums were released for both Beyoncé and Taylor Swift.\n\n\n::: {.cell}\n\n```{.r .cell-code}\n# Add your solution here\n```\n:::\n\n\n## Part 1C\n\nUsing the wrangled data from Part 1A:\n\n1.  Calculate the total album sales for each artist and for each `country` (only sales from the UK, US, and World).\n2.  Using the total album sales, create a [percent stacked barchart](https://r-graph-gallery.com/48-grouped-barplot-with-ggplot2) using `ggplot2` of the percentage of sales of studio albums (in millions) along the y-axis for the two artists along the x-axis colored by the `country`.\n\n\n::: {.cell}\n\n```{.r .cell-code}\n# Add your solution here\n```\n:::\n\n\n## Part 1D\n\nUsing the wrangled data from Part 1A, use `ggplot2` to create a bar plot for the sales of studio albums (in millions) along the x-axis for each of the album titles along the y-axis.\n\n**Note**:\n\n-   You only need to consider the global World sales (you can ignore US and UK sales for this part).\n-   The title of the album must be clearly readable along the y-axis.\n-   Each bar should be colored by which artist made that album.\n-   The bars should be ordered from albums with the most sales (top) to the least sales (bottom) (**Note**: you must use functions from `forcats` for this step).\n\n\n::: {.cell}\n\n```{.r .cell-code}\n# Add your solution here\n```\n:::\n\n\n## Part 1E\n\nUsing the wrangled data from Part 1A, use `ggplot2` to create a scatter plot of sales of studio albums (in millions) along the y-axis by the released date for each album along the x-axis.\n\n**Note**:\n\n-   The points should be colored by the artist.\n-   There should be three scatter plots (one for UK, US and world sales) faceted by rows.\n\n\n::: {.cell}\n\n```{.r .cell-code}\n# Add your solution here\n```\n:::\n\n\n# Part 2: Exploring sentiment of lyrics\n\nIn Part 2, we will explore the lyrics in the `b_lyrics` and `ts_lyrics` datasets.\n\n## Part 2A\n\nUsing `ts_lyrics`, create a new column called `line` with one line containing the character string for each line of Taylor Swift's songs.\n\n-   How many lines in Taylor Swift's lyrics contain the word \"hello\"? For full credit, show all the rows in `ts_lyrics` that have \"hello\" in the `line` column and report how many rows there are in total.\n-   How many lines in Taylor Swift's lyrics contain the word \"goodbye\"? For full credit, show all the rows in `ts_lyrics` that have \"goodbye\" in the `line` column and report how many rows there are in total.\n\n\n::: {.cell}\n\n```{.r .cell-code}\n# Add your solution here\n```\n:::\n\n\n## Part 2B\n\nRepeat the same analysis for `b_lyrics` as described in Part 2A.\n\n\n::: {.cell}\n\n```{.r .cell-code}\n# Add your solution here\n```\n:::\n\n\n## Part 2C\n\nUsing the `b_lyrics` dataset,\n\n1.  Tokenize each lyrical line by words.\n2.  Remove the \"stopwords\".\n3.  Calculate the total number for each word in the lyrics.\n4.  Using the \"bing\" sentiment lexicon, add a column to the summarized data frame adding the \"bing\" sentiment lexicon.\n5.  Sort the rows from most frequent to least frequent words.\n6.  Only keep the top 25 most frequent words.\n7.  Auto print the wrangled tibble data frame.\n8.  Use `ggplot2` to create a bar plot with the top words on the y-axis and the frequency of each word on the x-axis. Color each bar by the sentiment of each word from the \"bing\" sentiment lexicon. Bars should be ordered from most frequent on the top to least frequent on the bottom of the plot.\n9.  Create a word cloud of the top 25 most frequent words.\n\n\n::: {.cell}\n\n```{.r .cell-code}\n# Add your solution here\n```\n:::\n\n\n## Part 2D\n\nRepeat the same analysis as above in Part 2C, but for `ts_lyrics`.\n\n\n::: {.cell}\n\n```{.r .cell-code}\n# Add your solution here\n```\n:::\n\n\n## Part 2E\n\nUsing the `ts_lyrics` dataset,\n\n1.  Tokenize each lyrical line by words.\n2.  Remove the \"stopwords\".\n3.  Calculate the total number for each word in the lyrics **for each Album**.\n4.  Using the \"afinn\" sentiment lexicon, add a column to the summarized data frame adding the \"afinn\" sentiment lexicon.\n5.  Calculate the average sentiment score **for each Album**.\n6.  Auto print the wrangled tibble data frame.\n7.  Join the wrangled data frame from Part 1A (album sales in millions) with the wrangled data frame from #6 above (average sentiment score for each album).\n8.  Using `ggplot2`, create a scatter plot of the average sentiment score for each album (y-axis) and the album release data along the x-axis. Make the size of each point the album sales in millions.\n9.  Add a horizontal line at y-intercept=0.\n10. Write 2-3 sentences interpreting the plot answering the question \"How has the sentiment of Taylor Swift's albums have changed over time?\". Add a title, subtitle, and useful axis labels.\n\n\n::: {.cell}\n\n```{.r .cell-code}\n# Add your solution here\n```\n:::\n",
 5 |     "supporting": [],
 6 |     "filters": [
 7 |       "rmarkdown/pagebreak.lua"
 8 |     ],
 9 |     "includes": {},
10 |     "engineDependencies": {},
11 |     "preserve": {},
12 |     "postProcess": true
13 |   }
14 | }


--------------------------------------------------------------------------------
/_freeze/site_libs/clipboard/clipboard.min.js:
--------------------------------------------------------------------------------
1 | /*!
2 |  * clipboard.js v2.0.10
3 |  * https://clipboardjs.com/
4 |  *
5 |  * Licensed MIT © Zeno Rocha
6 |  */
7 | !function(t,e){"object"==typeof exports&&"object"==typeof module?module.exports=e():"function"==typeof define&&define.amd?define([],e):"object"==typeof exports?exports.ClipboardJS=e():t.ClipboardJS=e()}(this,function(){return n={686:function(t,e,n){"use strict";n.d(e,{default:function(){return o}});var e=n(279),i=n.n(e),e=n(370),u=n.n(e),e=n(817),c=n.n(e);function a(t){try{return document.execCommand(t)}catch(t){return}}var f=function(t){t=c()(t);return a("cut"),t};var l=function(t){var e,n,o,r=1<arguments.length&&void 0!==arguments[1]?arguments[1]:{container:document.body},i="";return"string"==typeof t?(e=t,n="rtl"===document.documentElement.getAttribute("dir"),(o=document.createElement("textarea")).style.fontSize="12pt",o.style.border="0",o.style.padding="0",o.style.margin="0",o.style.position="absolute",o.style[n?"right":"left"]="-9999px",n=window.pageYOffset||document.documentElement.scrollTop,o.style.top="".concat(n,"px"),o.setAttribute("readonly",""),o.value=e,o=o,r.container.appendChild(o),i=c()(o),a("copy"),o.remove()):(i=c()(t),a("copy")),i};function r(t){return(r="function"==typeof Symbol&&"symbol"==typeof Symbol.iterator?function(t){return typeof t}:function(t){return t&&"function"==typeof Symbol&&t.constructor===Symbol&&t!==Symbol.prototype?"symbol":typeof t})(t)}var s=function(){var t=0<arguments.length&&void 0!==arguments[0]?arguments[0]:{},e=t.action,n=void 0===e?"copy":e,o=t.container,e=t.target,t=t.text;if("copy"!==n&&"cut"!==n)throw new Error('Invalid "action" value, use either "copy" or "cut"');if(void 0!==e){if(!e||"object"!==r(e)||1!==e.nodeType)throw new Error('Invalid "target" value, use a valid Element');if("copy"===n&&e.hasAttribute("disabled"))throw new Error('Invalid "target" attribute. Please use "readonly" instead of "disabled" attribute');if("cut"===n&&(e.hasAttribute("readonly")||e.hasAttribute("disabled")))throw new Error('Invalid "target" attribute. You can\'t cut text from elements with "readonly" or "disabled" attributes')}return t?l(t,{container:o}):e?"cut"===n?f(e):l(e,{container:o}):void 0};function p(t){return(p="function"==typeof Symbol&&"symbol"==typeof Symbol.iterator?function(t){return typeof t}:function(t){return t&&"function"==typeof Symbol&&t.constructor===Symbol&&t!==Symbol.prototype?"symbol":typeof t})(t)}function d(t,e){for(var n=0;n<e.length;n++){var o=e[n];o.enumerable=o.enumerable||!1,o.configurable=!0,"value"in o&&(o.writable=!0),Object.defineProperty(t,o.key,o)}}function y(t,e){return(y=Object.setPrototypeOf||function(t,e){return t.__proto__=e,t})(t,e)}function h(n){var o=function(){if("undefined"==typeof Reflect||!Reflect.construct)return!1;if(Reflect.construct.sham)return!1;if("function"==typeof Proxy)return!0;try{return Date.prototype.toString.call(Reflect.construct(Date,[],function(){})),!0}catch(t){return!1}}();return function(){var t,e=m(n);return t=o?(t=m(this).constructor,Reflect.construct(e,arguments,t)):e.apply(this,arguments),e=this,!(t=t)||"object"!==p(t)&&"function"!=typeof t?function(t){if(void 0!==t)return t;throw new ReferenceError("this hasn't been initialised - super() hasn't been called")}(e):t}}function m(t){return(m=Object.setPrototypeOf?Object.getPrototypeOf:function(t){return t.__proto__||Object.getPrototypeOf(t)})(t)}function v(t,e){t="data-clipboard-".concat(t);if(e.hasAttribute(t))return e.getAttribute(t)}var o=function(){!function(t,e){if("function"!=typeof e&&null!==e)throw new TypeError("Super expression must either be null or a function");t.prototype=Object.create(e&&e.prototype,{constructor:{value:t,writable:!0,configurable:!0}}),e&&y(t,e)}(r,i());var t,e,n,o=h(r);function r(t,e){var n;return function(t){if(!(t instanceof r))throw new TypeError("Cannot call a class as a function")}(this),(n=o.call(this)).resolveOptions(e),n.listenClick(t),n}return t=r,n=[{key:"copy",value:function(t){var e=1<arguments.length&&void 0!==arguments[1]?arguments[1]:{container:document.body};return l(t,e)}},{key:"cut",value:function(t){return f(t)}},{key:"isSupported",value:function(){var t=0<arguments.length&&void 0!==arguments[0]?arguments[0]:["copy","cut"],t="string"==typeof t?[t]:t,e=!!document.queryCommandSupported;return t.forEach(function(t){e=e&&!!document.queryCommandSupported(t)}),e}}],(e=[{key:"resolveOptions",value:function(){var t=0<arguments.length&&void 0!==arguments[0]?arguments[0]:{};this.action="function"==typeof t.action?t.action:this.defaultAction,this.target="function"==typeof t.target?t.target:this.defaultTarget,this.text="function"==typeof t.text?t.text:this.defaultText,this.container="object"===p(t.container)?t.container:document.body}},{key:"listenClick",value:function(t){var e=this;this.listener=u()(t,"click",function(t){return e.onClick(t)})}},{key:"onClick",value:function(t){var e=t.delegateTarget||t.currentTarget,n=this.action(e)||"copy",t=s({action:n,container:this.container,target:this.target(e),text:this.text(e)});this.emit(t?"success":"error",{action:n,text:t,trigger:e,clearSelection:function(){e&&e.focus(),document.activeElement.blur(),window.getSelection().removeAllRanges()}})}},{key:"defaultAction",value:function(t){return v("action",t)}},{key:"defaultTarget",value:function(t){t=v("target",t);if(t)return document.querySelector(t)}},{key:"defaultText",value:function(t){return v("text",t)}},{key:"destroy",value:function(){this.listener.destroy()}}])&&d(t.prototype,e),n&&d(t,n),r}()},828:function(t){var e;"undefined"==typeof Element||Element.prototype.matches||((e=Element.prototype).matches=e.matchesSelector||e.mozMatchesSelector||e.msMatchesSelector||e.oMatchesSelector||e.webkitMatchesSelector),t.exports=function(t,e){for(;t&&9!==t.nodeType;){if("function"==typeof t.matches&&t.matches(e))return t;t=t.parentNode}}},438:function(t,e,n){var u=n(828);function i(t,e,n,o,r){var i=function(e,n,t,o){return function(t){t.delegateTarget=u(t.target,n),t.delegateTarget&&o.call(e,t)}}.apply(this,arguments);return t.addEventListener(n,i,r),{destroy:function(){t.removeEventListener(n,i,r)}}}t.exports=function(t,e,n,o,r){return"function"==typeof t.addEventListener?i.apply(null,arguments):"function"==typeof n?i.bind(null,document).apply(null,arguments):("string"==typeof t&&(t=document.querySelectorAll(t)),Array.prototype.map.call(t,function(t){return i(t,e,n,o,r)}))}},879:function(t,n){n.node=function(t){return void 0!==t&&t instanceof HTMLElement&&1===t.nodeType},n.nodeList=function(t){var e=Object.prototype.toString.call(t);return void 0!==t&&("[object NodeList]"===e||"[object HTMLCollection]"===e)&&"length"in t&&(0===t.length||n.node(t[0]))},n.string=function(t){return"string"==typeof t||t instanceof String},n.fn=function(t){return"[object Function]"===Object.prototype.toString.call(t)}},370:function(t,e,n){var f=n(879),l=n(438);t.exports=function(t,e,n){if(!t&&!e&&!n)throw new Error("Missing required arguments");if(!f.string(e))throw new TypeError("Second argument must be a String");if(!f.fn(n))throw new TypeError("Third argument must be a Function");if(f.node(t))return c=e,a=n,(u=t).addEventListener(c,a),{destroy:function(){u.removeEventListener(c,a)}};if(f.nodeList(t))return o=t,r=e,i=n,Array.prototype.forEach.call(o,function(t){t.addEventListener(r,i)}),{destroy:function(){Array.prototype.forEach.call(o,function(t){t.removeEventListener(r,i)})}};if(f.string(t))return t=t,e=e,n=n,l(document.body,t,e,n);throw new TypeError("First argument must be a String, HTMLElement, HTMLCollection, or NodeList");var o,r,i,u,c,a}},817:function(t){t.exports=function(t){var e,n="SELECT"===t.nodeName?(t.focus(),t.value):"INPUT"===t.nodeName||"TEXTAREA"===t.nodeName?((e=t.hasAttribute("readonly"))||t.setAttribute("readonly",""),t.select(),t.setSelectionRange(0,t.value.length),e||t.removeAttribute("readonly"),t.value):(t.hasAttribute("contenteditable")&&t.focus(),n=window.getSelection(),(e=document.createRange()).selectNodeContents(t),n.removeAllRanges(),n.addRange(e),n.toString());return n}},279:function(t){function e(){}e.prototype={on:function(t,e,n){var o=this.e||(this.e={});return(o[t]||(o[t]=[])).push({fn:e,ctx:n}),this},once:function(t,e,n){var o=this;function r(){o.off(t,r),e.apply(n,arguments)}return r._=e,this.on(t,r,n)},emit:function(t){for(var e=[].slice.call(arguments,1),n=((this.e||(this.e={}))[t]||[]).slice(),o=0,r=n.length;o<r;o++)n[o].fn.apply(n[o].ctx,e);return this},off:function(t,e){var n=this.e||(this.e={}),o=n[t],r=[];if(o&&e)for(var i=0,u=o.length;i<u;i++)o[i].fn!==e&&o[i].fn._!==e&&r.push(o[i]);return r.length?n[t]=r:delete n[t],this}},t.exports=e,t.exports.TinyEmitter=e}},r={},o.n=function(t){var e=t&&t.__esModule?function(){return t.default}:function(){return t};return o.d(e,{a:e}),e},o.d=function(t,e){for(var n in e)o.o(e,n)&&!o.o(t,n)&&Object.defineProperty(t,n,{enumerable:!0,get:e[n]})},o.o=function(t,e){return Object.prototype.hasOwnProperty.call(t,e)},o(686).default;function o(t){if(r[t])return r[t].exports;var e=r[t]={exports:{}};return n[t](e,e.exports,o),e.exports}var n,r});


--------------------------------------------------------------------------------
/_freeze/site_libs/quarto-listing/quarto-listing.js:
--------------------------------------------------------------------------------
  1 | const kProgressiveAttr = "data-src";
  2 | let categoriesLoaded = false;
  3 | 
  4 | window.quartoListingCategory = (category) => {
  5 |   if (categoriesLoaded) {
  6 |     activateCategory(category);
  7 |     setCategoryHash(category);
  8 |   }
  9 | };
 10 | 
 11 | window["quarto-listing-loaded"] = () => {
 12 |   // Process any existing hash
 13 |   const hash = getHash();
 14 | 
 15 |   if (hash) {
 16 |     // If there is a category, switch to that
 17 |     if (hash.category) {
 18 |       activateCategory(hash.category);
 19 |     }
 20 |     // Paginate a specific listing
 21 |     const listingIds = Object.keys(window["quarto-listings"]);
 22 |     for (const listingId of listingIds) {
 23 |       const page = hash[getListingPageKey(listingId)];
 24 |       if (page) {
 25 |         showPage(listingId, page);
 26 |       }
 27 |     }
 28 |   }
 29 | 
 30 |   const listingIds = Object.keys(window["quarto-listings"]);
 31 |   for (const listingId of listingIds) {
 32 |     // The actual list
 33 |     const list = window["quarto-listings"][listingId];
 34 | 
 35 |     // Update the handlers for pagination events
 36 |     refreshPaginationHandlers(listingId);
 37 | 
 38 |     // Render any visible items that need it
 39 |     renderVisibleProgressiveImages(list);
 40 | 
 41 |     // Whenever the list is updated, we also need to
 42 |     // attach handlers to the new pagination elements
 43 |     // and refresh any newly visible items.
 44 |     list.on("updated", function () {
 45 |       renderVisibleProgressiveImages(list);
 46 |       setTimeout(() => refreshPaginationHandlers(listingId));
 47 | 
 48 |       // Show or hide the no matching message
 49 |       toggleNoMatchingMessage(list);
 50 |     });
 51 |   }
 52 | };
 53 | 
 54 | window.document.addEventListener("DOMContentLoaded", function (_event) {
 55 |   // Attach click handlers to categories
 56 |   const categoryEls = window.document.querySelectorAll(
 57 |     ".quarto-listing-category .category"
 58 |   );
 59 | 
 60 |   for (const categoryEl of categoryEls) {
 61 |     const category = categoryEl.getAttribute("data-category");
 62 |     categoryEl.onclick = () => {
 63 |       activateCategory(category);
 64 |       setCategoryHash(category);
 65 |     };
 66 |   }
 67 | 
 68 |   // Attach a click handler to the category title
 69 |   // (there should be only one, but since it is a class name, handle N)
 70 |   const categoryTitleEls = window.document.querySelectorAll(
 71 |     ".quarto-listing-category-title"
 72 |   );
 73 |   for (const categoryTitleEl of categoryTitleEls) {
 74 |     categoryTitleEl.onclick = () => {
 75 |       activateCategory("");
 76 |       setCategoryHash("");
 77 |     };
 78 |   }
 79 | 
 80 |   categoriesLoaded = true;
 81 | });
 82 | 
 83 | function toggleNoMatchingMessage(list) {
 84 |   const selector = `#${list.listContainer.id} .listing-no-matching`;
 85 |   const noMatchingEl = window.document.querySelector(selector);
 86 |   if (noMatchingEl) {
 87 |     if (list.visibleItems.length === 0) {
 88 |       noMatchingEl.classList.remove("d-none");
 89 |     } else {
 90 |       if (!noMatchingEl.classList.contains("d-none")) {
 91 |         noMatchingEl.classList.add("d-none");
 92 |       }
 93 |     }
 94 |   }
 95 | }
 96 | 
 97 | function setCategoryHash(category) {
 98 |   setHash({ category });
 99 | }
100 | 
101 | function setPageHash(listingId, page) {
102 |   const currentHash = getHash() || {};
103 |   currentHash[getListingPageKey(listingId)] = page;
104 |   setHash(currentHash);
105 | }
106 | 
107 | function getListingPageKey(listingId) {
108 |   return `${listingId}-page`;
109 | }
110 | 
111 | function refreshPaginationHandlers(listingId) {
112 |   const listingEl = window.document.getElementById(listingId);
113 |   const paginationEls = listingEl.querySelectorAll(
114 |     ".pagination li.page-item:not(.disabled) .page.page-link"
115 |   );
116 |   for (const paginationEl of paginationEls) {
117 |     paginationEl.onclick = (sender) => {
118 |       setPageHash(listingId, sender.target.getAttribute("data-i"));
119 |       showPage(listingId, sender.target.getAttribute("data-i"));
120 |       return false;
121 |     };
122 |   }
123 | }
124 | 
125 | function renderVisibleProgressiveImages(list) {
126 |   // Run through the visible items and render any progressive images
127 |   for (const item of list.visibleItems) {
128 |     const itemEl = item.elm;
129 |     if (itemEl) {
130 |       const progressiveImgs = itemEl.querySelectorAll(
131 |         `img[${kProgressiveAttr}]`
132 |       );
133 |       for (const progressiveImg of progressiveImgs) {
134 |         const srcValue = progressiveImg.getAttribute(kProgressiveAttr);
135 |         if (srcValue) {
136 |           progressiveImg.setAttribute("src", srcValue);
137 |         }
138 |         progressiveImg.removeAttribute(kProgressiveAttr);
139 |       }
140 |     }
141 |   }
142 | }
143 | 
144 | function getHash() {
145 |   // Hashes are of the form
146 |   // #name:value|name1:value1|name2:value2
147 |   const currentUrl = new URL(window.location);
148 |   const hashRaw = currentUrl.hash ? currentUrl.hash.slice(1) : undefined;
149 |   return parseHash(hashRaw);
150 | }
151 | 
152 | const kAnd = "&";
153 | const kEquals = "=";
154 | 
155 | function parseHash(hash) {
156 |   if (!hash) {
157 |     return undefined;
158 |   }
159 |   const hasValuesStrs = hash.split(kAnd);
160 |   const hashValues = hasValuesStrs
161 |     .map((hashValueStr) => {
162 |       const vals = hashValueStr.split(kEquals);
163 |       if (vals.length === 2) {
164 |         return { name: vals[0], value: vals[1] };
165 |       } else {
166 |         return undefined;
167 |       }
168 |     })
169 |     .filter((value) => {
170 |       return value !== undefined;
171 |     });
172 | 
173 |   const hashObj = {};
174 |   hashValues.forEach((hashValue) => {
175 |     hashObj[hashValue.name] = decodeURIComponent(hashValue.value);
176 |   });
177 |   return hashObj;
178 | }
179 | 
180 | function makeHash(obj) {
181 |   return Object.keys(obj)
182 |     .map((key) => {
183 |       return `${key}${kEquals}${obj[key]}`;
184 |     })
185 |     .join(kAnd);
186 | }
187 | 
188 | function setHash(obj) {
189 |   const hash = makeHash(obj);
190 |   window.history.pushState(null, null, `#${hash}`);
191 | }
192 | 
193 | function showPage(listingId, page) {
194 |   const list = window["quarto-listings"][listingId];
195 |   if (list) {
196 |     list.show((page - 1) * list.page + 1, list.page);
197 |   }
198 | }
199 | 
200 | function activateCategory(category) {
201 |   // Deactivate existing categories
202 |   const activeEls = window.document.querySelectorAll(
203 |     ".quarto-listing-category .category.active"
204 |   );
205 |   for (const activeEl of activeEls) {
206 |     activeEl.classList.remove("active");
207 |   }
208 | 
209 |   // Activate this category
210 |   const categoryEl = window.document.querySelector(
211 |     `.quarto-listing-category .category[data-category='${category}'`
212 |   );
213 |   if (categoryEl) {
214 |     categoryEl.classList.add("active");
215 |   }
216 | 
217 |   // Filter the listings to this category
218 |   filterListingCategory(category);
219 | }
220 | 
221 | function filterListingCategory(category) {
222 |   const listingIds = Object.keys(window["quarto-listings"]);
223 |   for (const listingId of listingIds) {
224 |     const list = window["quarto-listings"][listingId];
225 |     if (list) {
226 |       if (category === "") {
227 |         // resets the filter
228 |         list.filter();
229 |       } else {
230 |         // filter to this category
231 |         list.filter(function (item) {
232 |           const itemValues = item.values();
233 |           if (itemValues.categories !== null) {
234 |             const categories = itemValues.categories.split(",");
235 |             return categories.includes(category);
236 |           } else {
237 |             return false;
238 |           }
239 |         });
240 |       }
241 |     }
242 |   }
243 | }
244 | 


--------------------------------------------------------------------------------
/_post_template.qmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Add title"
 3 | author: 
 4 |   - name: Stephanie Hicks
 5 |     url: https://stephaniehicks.com
 6 |     affiliation: Department of Biostatistics, Johns Hopkins
 7 |     affiliation_url: https://publichealth.jhu.edu
 8 | description: "Add title"
 9 | date: 2022-08-30
10 | categories: [add here]
11 | ---
12 | 
13 | <!-- Add interesting quote -->
14 | 
15 | # Pre-lecture materials
16 | 
17 | ### Read ahead
18 | 
19 | ::: callout-note
20 | ## Read ahead
21 | 
22 | **Before class, you can prepare by reading the following materials:**
23 | 
24 | 1.  Add here.
25 | 2.  Add here.
26 | :::
27 | 
28 | ### Acknowledgements
29 | 
30 | Material for this lecture was borrowed and adopted from
31 | 
32 | -   Add here.
33 | 
34 | # Learning objectives
35 | 
36 | ::: callout-note
37 | # Learning objectives
38 | 
39 | **At the end of this lesson you will:**
40 | 
41 | -   Add here.
42 | :::
43 | 
44 | # Add lecture here
45 | 
46 | # Post-lecture materials
47 | 
48 | ### Final Questions
49 | 
50 | Here are some post-lecture questions to help you think about the material discussed.
51 | 
52 | ::: callout-note
53 | ### Questions
54 | 
55 | 1.  Add here.
56 | :::
57 | 
58 | ### Additional Resources
59 | 
60 | ::: callout-tip
61 | -   Add here.
62 | :::
63 | 
64 | ## rtistry
65 | 
66 | ```{r}
67 | #| label: flametree
68 | #| echo: false
69 | #| fig-cap-location: "top"
70 | 
71 | ```
72 | 
73 | \[Add here.\]
74 | 


--------------------------------------------------------------------------------
/_quarto.yml:
--------------------------------------------------------------------------------
 1 | project:
 2 |   type: website
 3 | 
 4 | website:
 5 |   title: "Statistical Computing (BSPH 140.776)"
 6 |   site-url: https://www.stephaniehicks.com/jhustatcomputing2022
 7 |   description: "Course website for Statistical Computing (BSPH 140.776) in Fall 2022"
 8 |   navbar:
 9 |     right:
10 |       - text: "Home"
11 |         file: index.qmd
12 |       - text: "General Information"
13 |         menu:
14 |         - text: "Syllabus"
15 |           href: syllabus.qmd
16 |         - text: "Schedule"
17 |           href: schedule.qmd
18 |       - text: "Course Materials"
19 |         menu: 
20 |         - text: "Lectures"
21 |           href: lectures.qmd
22 |         - text: "Projects"
23 |           href: projects.qmd
24 |         - text: "Resources"
25 |           href: resources.qmd
26 |       - icon: github
27 |         href: https://github.com/
28 |       - icon: twitter
29 |         href: https://twitter.com
30 |       - icon: rss
31 |         href: index.xml
32 | format:
33 |   html:
34 |     theme: simplex
35 |     toc: true
36 |     css: styles.css
37 | 
38 | editor: visual
39 | 
40 | execute:
41 |   freeze: auto
42 | 


--------------------------------------------------------------------------------
/data/2016-07-19.csv.bz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/data/2016-07-19.csv.bz2


--------------------------------------------------------------------------------
/data/b_lyrics.RDS:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/data/b_lyrics.RDS


--------------------------------------------------------------------------------
/data/chicago.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/data/chicago.rds


--------------------------------------------------------------------------------
/data/chocolate.RDS:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/data/chocolate.RDS


--------------------------------------------------------------------------------
/data/sales.RDS:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/data/sales.RDS


--------------------------------------------------------------------------------
/data/storms_2004.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/data/storms_2004.csv.gz


--------------------------------------------------------------------------------
/data/team_standings.csv:
--------------------------------------------------------------------------------
 1 | "Standing","Team"
 2 | 1,"Spain"
 3 | 2,"Netherlands"
 4 | 3,"Germany"
 5 | 4,"Uruguay"
 6 | 5,"Argentina"
 7 | 6,"Brazil"
 8 | 7,"Ghana"
 9 | 8,"Paraguay"
10 | 9,"Japan"
11 | 10,"Chile"
12 | 11,"Portugal"
13 | 12,"USA"
14 | 13,"England"
15 | 14,"Mexico"
16 | 15,"South Korea"
17 | 16,"Slovakia"
18 | 17,"Ivory Coast"
19 | 18,"Slovenia"
20 | 19,"Switzerland"
21 | 20,"South Africa"
22 | 21,"Australia"
23 | 22,"New Zealand"
24 | 23,"Serbia"
25 | 24,"Denmark"
26 | 25,"Greece"
27 | 26,"Italy"
28 | 27,"Nigeria"
29 | 28,"Algeria"
30 | 29,"France"
31 | 30,"Honduras"
32 | 31,"Cameroon"
33 | 32,"North Korea"
34 | 


--------------------------------------------------------------------------------
/data/ts_lyrics.RDS:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/data/ts_lyrics.RDS


--------------------------------------------------------------------------------
/data/tuesdata_rainfall.RDS:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/data/tuesdata_rainfall.RDS


--------------------------------------------------------------------------------
/data/tuesdata_temperature.RDS:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/data/tuesdata_temperature.RDS


--------------------------------------------------------------------------------
/images/cool_icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/images/cool_icon.png


--------------------------------------------------------------------------------
/images/dominici_ehp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/images/dominici_ehp.png


--------------------------------------------------------------------------------
/images/dspipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/images/dspipeline.png


--------------------------------------------------------------------------------
/images/happygitwithr.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/images/happygitwithr.png


--------------------------------------------------------------------------------
/images/lippman.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/images/lippman.png


--------------------------------------------------------------------------------
/images/peng_preface.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/images/peng_preface.png


--------------------------------------------------------------------------------
/images/phdversioncontrol.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/images/phdversioncontrol.gif


--------------------------------------------------------------------------------
/index.qmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Welcome to Statistical Computing!"
 3 | image: images/cool_icon.png
 4 | about:
 5 |   template: jolla
 6 |   links:
 7 |     - icon: twitter
 8 |       text: Twitter
 9 |       href: https://twitter.com/stephaniehicks
10 |     - icon: github
11 |       text: Github
12 |       href: https://github.com/stephaniehicks
13 | ---
14 | 
15 | Welcome to Statistical Computing at Johns Hopkins Bloomberg School of Public Health! 
16 | 
17 | ## What is this course? 
18 | 
19 | This course covers the basics of practical issues in programming and other computer skills required for the research and application of statistical methods. Includes programming in R and the tidyverse, data ethics, best practices for coding and reproducible research, introduction to data visualizations, best practices for working with special data types (dates/times, text data, etc), best practices for storing data, basics of debugging, organizing and commenting code, basics of leveraging Python from R. Topics in statistical data analysis provide working examples.
20 | 
21 | 
22 | ## Getting started
23 | 
24 | I suggest that you start by looking over the [Syllabus](syllabus.qmd) and [Schedule](schedule.qmd) under **General Information**. After that, start with the Lectures content in the given order.
25 | 
26 | ## Acknowledgements
27 | 
28 | This course was developed and is maintained by [Stephanie Hicks](https://www.stephaniehicks.com).
29 | 
30 | The following individuals have contributed to improving the course or materials have been adapted from their courses: [Roger D. Peng](https://github.com/rdpeng), [Andreas Handel](https://www.andreashandel.com), [Naim Rashid](https://naimurashid.github.io), [Michael Love](https://github.com/mikelove).
31 | 
32 | The course materials are licensed under the [Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License](https://creativecommons.org/licenses/by-nc-sa/4.0/). Linked and embedded materials are governed by their own licenses. I assume that all external materials used or embedded here are covered under the educational fair use policy. If this is not the case and any material displayed here violates copyright, please let me know and I will remove it.
33 | 


--------------------------------------------------------------------------------
/jhustatcomputing2022.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 


--------------------------------------------------------------------------------
/lectures.qmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Lectures"
 3 | listing:
 4 |   - id: lectures
 5 |     contents: "posts"
 6 |     sort: "date desc"
 7 |     type: default
 8 |     categories: true
 9 |     sort-ui: false
10 |     filter-ui: false
11 |     feed: true
12 | page-layout: full
13 | title-block-banner: false
14 | ---
15 | 


--------------------------------------------------------------------------------
/posts/2022-08-30-introduction-to-gitgithub/index.qmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Introduction to git/GitHub"
  3 | author: 
  4 |   - name: Stephanie Hicks
  5 |     url: https://stephaniehicks.com
  6 |     affiliation: Department of Biostatistics, Johns Hopkins
  7 |     affiliation_url: https://publichealth.jhu.edu
  8 | description: "Version control is a game changer; or how I learned to love git/GitHub"
  9 | date: 2022-08-30
 10 | image: "../../images/happygitwithr.png"
 11 | categories: [module 1, week 1, programming, version control, git, GitHub]
 12 | ---
 13 | 
 14 | <!-- Add interesting quote -->
 15 | 
 16 | # Pre-lecture materials
 17 | 
 18 | ### Read ahead
 19 | 
 20 | ::: callout-note
 21 | ## Read ahead
 22 | 
 23 | **Before class, you can prepare by reading the following materials:**
 24 | 
 25 | 1.  [Happy Git with R](https://happygitwithr.com) from Jenny Bryan
 26 | 2.  [Chapter on git and GitHub in `dsbook`](https://rafalab.github.io/dsbook/git.html) from Rafael Irizarry
 27 | :::
 28 | 
 29 | ### Acknowledgements
 30 | 
 31 | Material for this lecture was borrowed and adopted from
 32 | 
 33 | -   <https://andreashandel.github.io/MADAcourse>
 34 | 
 35 | # Learning objectives
 36 | 
 37 | ::: callout-note
 38 | # Learning objectives
 39 | 
 40 | **At the end of this lesson you will:**
 41 | 
 42 | -   Know what Git and GitHub are.
 43 | -   Know why one might want to use them.
 44 | -   Have created and set up a GitHub account.
 45 | :::
 46 | 
 47 | # Introduction to git/GitHub
 48 | 
 49 | This document gives a brief explanation of GitHub and how we will use it for this course.
 50 | 
 51 | ### git
 52 | 
 53 | *Git* is what is called a *version control system* for file management. The main idea is that as you (and your collaborators) work on a project, the software tracks, and records any changes made by anyone.
 54 | 
 55 | -   Similar to the "track changes" features in Microsoft Word, but more rigorous, powerful, and scaled up to multiple files
 56 | -   Great for solo or collaborative work
 57 | 
 58 | ### GitHub
 59 | 
 60 | *GitHub* is a hosting service on internet for git-aware folders and projects
 61 | 
 62 | -   Similar to the DropBox or Google, but more structured, powerful, and programmatic
 63 | -   Great for solo or collaborative work!
 64 | -   Technically *GitHub* is distinct from *Git*. However, *GitHub* is in some sense the interface and *Git* the underlying engine (a bit like *RStudio* and *R*).
 65 | 
 66 | Since we will only be using *Git* through *GitHub*, I tend to not distinguish between the two. In the following, I refer to all of it as just *GitHub*. Note that other interfaces to *Git* exist, e.g., *Bitbucket*, but *GitHub* is the most widely used one.
 67 | 
 68 | ### Why use git/GitHub?
 69 | 
 70 | You want to use *GitHub* to avoid this:
 71 | 
 72 | ```{r}
 73 | #| label: versioncontrol
 74 | #| echo: false
 75 | #| fig-cap: 'How not to use GitHub [image from PhD Comics]'
 76 | #| out-width: '80%'
 77 | knitr::include_graphics("../../images/phdversioncontrol.gif")
 78 | ```
 79 | 
 80 | \[[Source: PhD Comics](http://phdcomics.com/comics/archive_print.php?comicid=1531)\]
 81 | 
 82 | *GitHub* gives you a clean way to track your projects. It is also very well suited to collaborative work. Historically, version control was used for software development. However, it has become broader and is now used for many types of projects, including data science projects.
 83 | 
 84 | To learn a bit more about Git/GitHub and why you might want to use it, [read this article by Jenny Bryan](https://peerj.com/preprints/3159/).
 85 | 
 86 | **Note her explanation of what's special with the `README.md` file on GitHub.**
 87 | 
 88 | ### What to (not) do
 89 | 
 90 | **GitHub is ideal if** you have a project with a fair number of files, most of those files are text files (such as code, LaTeX, (R)markdown, etc.) and different people work on different parts of the project.
 91 | 
 92 | **GitHub is less useful if** you have a lot of non-text files (e.g. Word or Powerpoint) and different team members might want to edit the same document at the same time. In that instance, a solution like Google Docs, Word+Dropbox, Word+Onedrive, etc. might be better.
 93 | 
 94 | ### How to use Git/GitHub
 95 | 
 96 | Git and GitHub is fundamentally **based on commands you type into the command line**. Lots of online resources show you how to use the command line. This is the most powerful, and the way I almost always interact with git/GitHub. However, many folks find this the most confusing way to use git/GitHub. Alternatively, there are graphical interfaces.
 97 | 
 98 | -   [GitHub itself](https://desktop.github.com/) provides a grapical interface with basic functionality.
 99 | -   RStudio also has [Git/GitHub integration](https://happygitwithr.com/rstudio-git-github.html). Of course this only works for R project GitHub integration.
100 | -   There are also third party GitHub clients with many advanced features, most of which you won't need initially, but might eventually.
101 | 
102 | **Note**: As student, you can (and should) upgrade to the Pro version of GitHub for free (i.e. access to unlimited private repositories is one benefit), see the [GitHub student developer pack](https://education.github.com/pack) on how to do this.
103 | 
104 | # Getting Started
105 | 
106 | One of my favorite resources for getting started with git/GitHub is the Happy Git with R from Jenny Bryan:
107 | 
108 | -   <https://happygitwithr.com>
109 | 
110 | ```{r}
111 | #| label: happygitwithr
112 | #| echo: false
113 | #| fig-cap: 'A screenshot of the Happy Git with R online book from Jenny Bryan'
114 | #| out-width: '80%'
115 | knitr::include_graphics("../../images/happygitwithr.png")
116 | ```
117 | 
118 | It truly is one of the **best resources** out there for getting started with git/GitHub, especially with the integration to RStudio. Therefore, at this point, I will encourage all of you to go read through the online book.
119 | 
120 | Some of you may only need to skim it, others will need to spend some time reading through it. Either way, I will bet that you won't regret the time investment.
121 | 
122 | # Using git/GitHub in our course
123 | 
124 | In this course, you will use git/GitHub in the following ways:
125 | 
126 | 1.  Project 0 (optional) - You will create a website introducing yourself to folks in the course and deploy it on GitHub.
127 | 2.  Projects 1-3 - You can practice using git locally (on your compute environment) to track your changes over time and, if you wish (but highly suggested), you can practice pushing your project solutions to a private GitHub repository on your GitHub account (i.e. `git add`, `git commit`, `git push`, `git pull`, etc) .
128 | 
129 | Learning these skills will be useful down the road if you ever work collaboratively on a project (i.e. writing code as a group). In this scenario, you will use the skills you have been practicing in your projects to work together as a team in a single GitHub repository.
130 | 
131 | # Post-lecture materials
132 | 
133 | ### Final Questions
134 | 
135 | Here are some post-lecture questions to help you think about the material discussed.
136 | 
137 | ::: callout-note
138 | ### Questions
139 | 
140 | 1.  What is version control?
141 | 
142 | 2.  What is the difference between git and GitHub?
143 | 
144 | 3.  What are other version controls software/tools that are available besides git?
145 | :::
146 | 
147 | ### Additional Resources
148 | 
149 | ::: callout-tip
150 | -   [git and GitHub in the `dsbook`](https://rafalab.github.io/dsbook/git.html) by Rafael Irizarry
151 | :::
152 | 
153 | ## rtistry
154 | 
155 | ```{r}
156 | #| label: flametree
157 | #| echo: false
158 | #| fig-cap-location: "top"
159 | knitr::include_graphics("https://github.com/djnavarro/art/raw/master/static/gallery/flametree/extra/001_flametree_20_13.jpg")
160 | ```
161 | 
162 | \['Flametree' from Danielle Navarro <https://art.djnavarro.net>\]
163 | 


--------------------------------------------------------------------------------
/posts/2022-09-01-literate-programming/my-refs.bib:
--------------------------------------------------------------------------------
 1 | @article{knuth1984,
 2 |   author    = {Donald E. Knuth},
 3 |   title     = {Literate Programming},
 4 |   journal   = {Comput. J.},
 5 |   volume    = {27},
 6 |   number    = {2},
 7 |   pages     = {97--111},
 8 |   year      = {1984},
 9 |   url       = {https://doi.org/10.1093/comjnl/27.2.97},
10 |   doi       = {10.1093/comjnl/27.2.97},
11 |   timestamp = {Wed, 14 Nov 2018 00:00:00 +0100},
12 |   biburl    = {https://dblp.org/rec/journals/cj/Knuth84.bib},
13 |   bibsource = {dblp computer science bibliography, https://dblp.org}
14 | }


--------------------------------------------------------------------------------
/posts/2022-09-01-reference-management/index.qmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Reference management"
  3 | author: 
  4 |   - name: Stephanie Hicks
  5 |     url: https://stephaniehicks.com
  6 |     affiliation: Department of Biostatistics, Johns Hopkins
  7 |     affiliation_url: https://publichealth.jhu.edu
  8 | description: "How to use citations and incorporate references from a bibliography in R Markdown."
  9 | date: 2022-09-01
 10 | categories: [module 1, week 1, R Markdown, programming]
 11 | bibliography: my-refs.bib
 12 | ---
 13 | 
 14 | <!-- Add interesting quote -->
 15 | 
 16 | # Pre-lecture materials
 17 | 
 18 | ### Read ahead
 19 | 
 20 | ::: callout-note
 21 | ## Read ahead
 22 | 
 23 | **Before class, you can prepare by reading the following materials:**
 24 | 
 25 | 1.  Authoring in [R Markdown from RStudio](https://rmarkdown.rstudio.com/authoring_bibliographies_and_citations.html)
 26 | 2.  Citations from [Reproducible Research in R](https://monashdatafluency.github.io/r-rep-res/citations.html) from the [Monash Data Fluency](https://monashdatafluency.github.io) initiative
 27 | 3.  Bibliography from [R Markdown Cookbook](https://bookdown.org/yihui/rmarkdown-cookbook/bibliography.html)
 28 | :::
 29 | 
 30 | ### Acknowledgements
 31 | 
 32 | Material for this lecture was borrowed and adopted from
 33 | 
 34 | -   <https://andreashandel.github.io/MADAcourse>
 35 | -   <https://rmarkdown.rstudio.com/authoring_bibliographies_and_citations.html>
 36 | -   <https://bookdown.org/yihui/rmarkdown-cookbook/bibliography.html>
 37 | -   <https://monashdatafluency.github.io/r-rep-res/citations.html>
 38 | 
 39 | # Learning objectives
 40 | 
 41 | ::: callout-note
 42 | # Learning objectives
 43 | 
 44 | **At the end of this lesson you will:**
 45 | 
 46 | -   Know what types of bibliography file formats can be used in a R Markdown file
 47 | -   Learn how to add citations to a R Markdown file
 48 | -   Know how to change the citation style (e.g. APA, Chicago, etc)
 49 | :::
 50 | 
 51 | # Introduction
 52 | 
 53 | For almost any data analysis, especially if it is meant for publication in the academic literature, you will have to cite other people's work and include the references (bibliographies or citations) in your work. In this class, you are likely to need to include references and cite other people's work like in a regular research paper.
 54 | 
 55 | R provides nice function `citation()` that helps us generating citation blob for R packages that we have used. Let's try generating citation text for rmarkdown package by using the following command
 56 | 
 57 | ```{r, echo = TRUE}
 58 | citation("rmarkdown")
 59 | ```
 60 | 
 61 | I assume you are familiar with how citing references works, and hopefully, you are already using a reference manager. If not, let me know in the discussion boards.
 62 | 
 63 | To have something that plays well with R Markdown, you need file format that stores all the references. Click here to learn more other possible file formats available to you to use within a R Markdown file:
 64 | 
 65 | -   <https://rmarkdown.rstudio.com/authoring_bibliographies_and_citations.html>
 66 | 
 67 | ### Citation management software
 68 | 
 69 | As you can see, there are ton of file formats including `.medline` (MEDLINE), `.bib` (BibTeX), `.ris` (RIS), `.enl` (EndNote).
 70 | 
 71 | I will not discuss underlying citational management software itself, but I will talk briefly how you might create one of these file formats.
 72 | 
 73 | If you recall the output from `citation("rmarkdown")` above, we might consider manually copying and pasting the output into a citation management software, but instead we can use `write_bib()` function from `knitr` package to create a bibliography file ending in `.bib`.
 74 | 
 75 | Let's run the following code in order to generate a `my-refs.bib` file
 76 | 
 77 | ```{r}
 78 | knitr::write_bib("rmarkdown", file = "my-refs.bib")
 79 | ```
 80 | 
 81 | Now we can see we have the file saved locally.
 82 | 
 83 | ```{r}
 84 | list.files()
 85 | ```
 86 | 
 87 | If you open up the `my-refs.bib` file, you will see
 88 | 
 89 |     @Manual{R-rmarkdown,
 90 |       title = {rmarkdown: Dynamic Documents for R},
 91 |       author = {JJ Allaire and Yihui Xie and Jonathan McPherson and Javier Luraschi and Kevin Ushey and Aron Atkins and Hadley Wickham and Joe Cheng and Winston Chang and Richard Iannone},
 92 |       year = {2021},
 93 |       note = {R package version 2.8},
 94 |       url = {https://CRAN.R-project.org/package=rmarkdown},
 95 |     }
 96 | 
 97 |     @Book{rmarkdown2018,
 98 |       title = {R Markdown: The Definitive Guide},
 99 |       author = {Yihui Xie and J.J. Allaire and Garrett Grolemund},
100 |       publisher = {Chapman and Hall/CRC},
101 |       address = {Boca Raton, Florida},
102 |       year = {2018},
103 |       note = {ISBN 9781138359338},
104 |       url = {https://bookdown.org/yihui/rmarkdown},
105 |     }
106 | 
107 |     @Book{rmarkdown2020,
108 |       title = {R Markdown Cookbook},
109 |       author = {Yihui Xie and Christophe Dervieux and Emily Riederer},
110 |       publisher = {Chapman and Hall/CRC},
111 |       address = {Boca Raton, Florida},
112 |       year = {2020},
113 |       note = {ISBN 9780367563837},
114 |       url = {https://bookdown.org/yihui/rmarkdown-cookbook},
115 |     }
116 | 
117 | ::: resources
118 | **Note there are three keys that we will use later on**:
119 | 
120 | -   `R-rmarkdown`
121 | -   `rmarkdown2018`
122 | -   `rmarkdown2020`
123 | :::
124 | 
125 | ### Linking `.bib` file with `.rmd` (and `.qmd`) files
126 | 
127 | In order to use references within a R Markdown file, you will need to specify the name and a location of a bibliography file using the bibliography metadata field in a YAML metadata section. For example:
128 | 
129 | ``` yaml
130 | ---
131 | title: "My top ten favorite R packages"
132 | output: html_document
133 | bibliography: my-refs.bib
134 | ---
135 | ```
136 | 
137 | You can include multiple reference files using the following syntax, alternatively you can concatenate two bib files into one.
138 | 
139 | ``` yaml
140 | ---
141 | bibliography: ["my-refs1.bib", "my-refs2.bib"]
142 | ---
143 | ```
144 | 
145 | ### Inline citation
146 | 
147 | Now we can start using those bib keys that we have learned just before, using the following syntax
148 | 
149 | -   `[@key]` for single citation
150 | -   `[@key1; @key2]` multiple citation can be separated by semi-colon
151 | -   `[-@key]` in order to suppress author name, and just display the year
152 | -   `[see @key1 p 12; also this ref @key2]` is also a valid syntax
153 | 
154 | Let's start by citing the `rmarkdown` package using the following code and press `Knit` button:
155 | 
156 | ------------------------------------------------------------------------
157 | 
158 | I have been using the amazing Rmarkdown package [@R-rmarkdown]! I should also go and read [@rmarkdown2018; and @rmarkdown2020] books.
159 | 
160 | ------------------------------------------------------------------------
161 | 
162 | Pretty cool, eh??
163 | 
164 | ### Citation styles
165 | 
166 | By default, Pandoc will use a Chicago author-date format for citations and references.
167 | 
168 | To use another style, you will need to specify a CSL (Citation Style Language) file in the `csl` metadata field, e.g.,
169 | 
170 | ``` yaml
171 | ---
172 | title: "My top ten favorite R packages"
173 | output: html_document
174 | bibliography: my-refs.bib
175 | csl: biomed-central.csl
176 | ---
177 | ```
178 | 
179 | ::: resources
180 | To find your required formats, we recommend using the [Zotero Style Repository](https://www.zotero.org/styles), which makes it easy to search for and download your desired style.
181 | :::
182 | 
183 | CSL files can be tweaked to meet custom formatting requirements. For example, we can change the number of authors required before "et al." is used to abbreviate them. This can be simplified through the use of visual editors such as the one available at https://editor.citationstyles.org.
184 | 
185 | ### Other cool features
186 | 
187 | #### Add an item to a bibliography without using it
188 | 
189 | By default, the bibliography will only display items that are directly referenced in the document. If you want to include items in the bibliography without actually citing them in the body text, you can define a dummy nocite metadata field and put the citations there.
190 | 
191 | ``` yaml
192 | ---
193 | nocite: |
194 |   @item1, @item2
195 | ---
196 | ```
197 | 
198 | #### Add all items to the bibliography
199 | 
200 | If we do not wish to explicitly state all of the items within the bibliography but would still like to show them in our references, we can use the following syntax:
201 | 
202 | ``` yaml
203 | ---
204 | nocite: '@*'
205 | ---
206 | ```
207 | 
208 | This will force all items to be displayed in the bibliography.
209 | 
210 | ::: resources
211 | You can also have an appendix appear after bibliography. For more on this, see:
212 | 
213 | -   <https://bookdown.org/yihui/rmarkdown-cookbook/bibliography.html>
214 | :::
215 | 
216 | # Other useful tips
217 | 
218 | We have learned that inside your file that contains all your references (e.g. `my-refs.bib`), typically each reference gets a key, which is a shorthand that is generated by the reference manager or you can create yourself.
219 | 
220 | For instance, I use a format of lower-case first author last name followed by 4 digit year for each reference followed by a keyword (e.g name of a software package). Alternatively, you can omit the keyword. But note that if I cite a paper by the same first author that was published in the same year, then a lower case letter is added to the end. For instance, for a paper that I wrote as 1st author in 2010, my bibtex key might be `hicks2022` or `hicks2022a`. You can decide what scheme to use, just pick one and use it *forever*.
221 | 
222 | In your R Markdown document, you can then cite the reference by adding the key, such as `...in the paper by Hicks et al. [@hicks2022]...`.
223 | 
224 | # Post-lecture materials
225 | 
226 | ### Practice
227 | 
228 | Here are some post-lecture tasks to practice some of the material discussed.
229 | 
230 | ::: callout-note
231 | ### Questions
232 | 
233 | **Try out the following:**
234 | 
235 | 1.  What do you notice that's different when you run `citation("tidyverse")` (compared to `citation("rmarkdown")`)?
236 | 
237 | 2.  Install the following packages:
238 | 
239 | ```{r}
240 | #| eval: false
241 | install.packages(c("bibtex", "RefManageR")
242 | ```
243 | 
244 | What do they do? How might they be helpful to you in terms of reference management?
245 | 
246 | 3.  Instead of using a `.bib` file, try using a different bibliography file format in an R Markdown document.
247 | 
248 | 4.  Practice using a different CSL file to change the citation style.
249 | :::
250 | 
251 | ### Additional Resources
252 | 
253 | ::: callout-tip
254 | -   Add here.
255 | :::
256 | 
257 | ## rtistry
258 | 
259 | ```{r}
260 | #| label: flametree
261 | #| echo: false
262 | #| fig-cap-location: "top"
263 | 
264 | ```
265 | 
266 | \[Add here.\]
267 | 


--------------------------------------------------------------------------------
/posts/2022-09-01-reference-management/my-refs.bib:
--------------------------------------------------------------------------------
 1 | @Manual{R-rmarkdown,
 2 |   title = {rmarkdown: Dynamic Documents for R},
 3 |   author = {JJ Allaire and Yihui Xie and Jonathan McPherson and Javier Luraschi and Kevin Ushey and Aron Atkins and Hadley Wickham and Joe Cheng and Winston Chang and Richard Iannone},
 4 |   year = {2022},
 5 |   note = {R package version 2.15},
 6 |   url = {https://CRAN.R-project.org/package=rmarkdown},
 7 | }
 8 | 
 9 | @Book{rmarkdown2018,
10 |   title = {R Markdown: The Definitive Guide},
11 |   author = {Yihui Xie and J.J. Allaire and Garrett Grolemund},
12 |   publisher = {Chapman and Hall/CRC},
13 |   address = {Boca Raton, Florida},
14 |   year = {2018},
15 |   note = {ISBN 9781138359338},
16 |   url = {https://bookdown.org/yihui/rmarkdown},
17 | }
18 | 
19 | @Book{rmarkdown2020,
20 |   title = {R Markdown Cookbook},
21 |   author = {Yihui Xie and Christophe Dervieux and Emily Riederer},
22 |   publisher = {Chapman and Hall/CRC},
23 |   address = {Boca Raton, Florida},
24 |   year = {2020},
25 |   note = {ISBN 9780367563837},
26 |   url = {https://bookdown.org/yihui/rmarkdown-cookbook},
27 | }
28 | 
29 | 


--------------------------------------------------------------------------------
/posts/2022-09-08-joining-data-in-r/index.qmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Joining data in R"
  3 | author: 
  4 |   - name: Stephanie Hicks
  5 |     url: https://stephaniehicks.com
  6 |     affiliation: Department of Biostatistics, Johns Hopkins
  7 |     affiliation_url: https://publichealth.jhu.edu
  8 | description: "Introduction to relational data and join functions in the dplyr R package"
  9 | date: 2022-09-08
 10 | categories: [module 2, week 2, R, programming, dplyr, here, tidyverse]
 11 | ---
 12 | 
 13 | <!-- Add interesting quote -->
 14 | 
 15 | # Pre-lecture materials
 16 | 
 17 | ### Read ahead
 18 | 
 19 | ::: callout-note
 20 | ## Read ahead
 21 | 
 22 | **Before class, you can prepare by reading the following materials:**
 23 | 
 24 | 1.  <https://r4ds.had.co.nz/relational-data>
 25 | 2.  <https://rafalab.github.io/dsbook/joining-tables>
 26 | :::
 27 | 
 28 | ### Acknowledgements
 29 | 
 30 | Material for this lecture was borrowed and adopted from
 31 | 
 32 | -   <https://rdpeng.github.io/Biostat776/lecture-joining-data-in-r-basics>
 33 | -   <https://r4ds.had.co.nz/relational-data>
 34 | -   <https://rafalab.github.io/dsbook/joining-tables>
 35 | 
 36 | # Learning objectives
 37 | 
 38 | ::: callout-note
 39 | # Learning objectives
 40 | 
 41 | **At the end of this lesson you will:**
 42 | 
 43 | -   Be able to define relational data and keys
 44 | -   Be able to define the three types of join functions for relational data
 45 | -   Be able to implement mutational join functions
 46 | :::
 47 | 
 48 | # Relational data
 49 | 
 50 | Data analyses rarely involve only a single table of data.
 51 | 
 52 | Typically you have many tables of data, and you **must combine the datasets** to answer the questions that you are interested in.
 53 | 
 54 | Collectively, **multiple tables of data are called relational data** because it is the *relations*, not just the individual datasets, that are important.
 55 | 
 56 | Relations are **always defined between a pair of tables**. All other relations are built up from this simple idea: the relations of three or more tables are always a property of the relations between each pair.
 57 | 
 58 | Sometimes both elements of a pair can be the same table! This is needed if, for example, you have a table of people, and each person has a reference to their parents.
 59 | 
 60 | To work with relational data you **need verbs that work with pairs of tables**.
 61 | 
 62 | ::: callout-tip
 63 | ### Three important families of verbs
 64 | 
 65 | There are three families of verbs designed to work with relational data:
 66 | 
 67 | -   [**Mutating joins**](https://r4ds.had.co.nz/relational-data.html#mutating-joins): A mutating join allows you to **combine variables from two tables**. It first matches observations by their keys, then copies across variables from one table to the other on the right side of the table (similar to `mutate()`). We will discuss a few of these below.
 68 |     -   See @sec-mutjoins for Table of mutating joins.
 69 | -   [**Filtering joins**](https://r4ds.had.co.nz/relational-data.html#filtering-joins): Filtering joins **match observations** in the same way as mutating joins, **but affect the observations, not the variables** (i.e. filter observations from one data frame based on whether or not they match an observation in the other).
 70 |     -   Two types: `semi_join(x, y)` and `anti_join(x, y)`.
 71 | -   [**Set operations**](https://r4ds.had.co.nz/relational-data.html#set-operations): Treat **observations as if they were set elements**. Typically used less frequently, but occasionally useful when you want to break a single complex filter into simpler pieces. All these operations work with a complete row, comparing the values of every variable. These expect the x and y inputs to have the same variables, and treat the observations like sets:
 72 |     -   Examples of set operations: `intersect(x, y)`, `union(x, y)`, and `setdiff(x, y)`.
 73 | :::
 74 | 
 75 | ## Keys
 76 | 
 77 | The **variables used to connect each pair of tables** are called **keys**. A key is a variable (or set of variables) that uniquely identifies an observation. In simple cases, a single variable is sufficient to identify an observation.
 78 | 
 79 | ::: callout-tip
 80 | ### Note
 81 | 
 82 | There are two types of keys:
 83 | 
 84 | -   A **primary key** uniquely identifies an observation in its own table.
 85 | -   A **foreign key** uniquely identifies an observation in another table.
 86 | :::
 87 | 
 88 | Let's consider an example to help us understand the difference between a **primary key** and **foreign key**.
 89 | 
 90 | ## Example of keys
 91 | 
 92 | Imagine you are conduct a study and **collecting data on subjects and a health outcome**.
 93 | 
 94 | Often, subjects will **make multiple visits** (a so-called longitudinal study) and so we will record the outcome for each visit. Similarly, we may record other information about them, such as the kind of housing they live in.
 95 | 
 96 | ### The first table
 97 | 
 98 | This code creates a simple table with some made up data about some hypothetical subjects' outcomes.
 99 | 
100 | ```{r,message=FALSE}
101 | library(tidyverse)
102 | 
103 | outcomes <- tibble(
104 |         id = rep(c("a", "b", "c"), each = 3),
105 |         visit = rep(0:2, 3),
106 |         outcome = rnorm(3 * 3, 3)
107 | )
108 | 
109 | print(outcomes)
110 | ```
111 | 
112 | Note that subjects are labeled by a unique identifer in the `id` column.
113 | 
114 | ### A second table
115 | 
116 | Here is some code to create a second table (we will be joining the first and second tables shortly). This table contains some data about the hypothetical subjects' housing situation by recording the type of house they live in.
117 | 
118 | ```{r second_table,exercise=TRUE,message=FALSE}
119 | subjects <- tibble(
120 |         id = c("a", "b", "c"),
121 |         house = c("detached", "rowhouse", "rowhouse")
122 | )
123 | 
124 | print(subjects)
125 | ```
126 | 
127 | ::: callout-note
128 | ### Question
129 | 
130 | What is the **primary key** and **foreign key**?
131 | 
132 | -   The `outcomes$id` is a **primary key** because it uniquely identifies each subject in the `outcomes` table.
133 | -   The `subjects$id` is a **foreign key** because it appears in the `subjects` table where it matches each subject to a unique `id`.
134 | :::
135 | 
136 | # Mutating joins {#sec-mutjoins}
137 | 
138 | The `dplyr` package provides a set of **functions for joining two data frames** into a single data frame based on a set of key columns.
139 | 
140 | There are several functions in the `*_join()` family.
141 | 
142 | -   These functions all merge together two data frames
143 | -   They differ in how they handle observations that exist in one but not both data frames.
144 | 
145 | Here, are the **four functions from this family** that you will likely use the most often:
146 | 
147 | ```{r}
148 | #| echo: false
149 | #| out-width: '60%'
150 | #| fig-align: 'center'
151 | library(knitr)
152 | join_funcs <- data.frame(func = c("`left_join()`",
153 |                                   "`right_join()`",
154 |                                   "`inner_join()`",
155 |                                   "`full_join()`"),
156 |                          does = c("Includes all observations in the left data frame, whether or not there is a match in the right data frame",
157 |                                   "Includes all observations in the right data frame, whether or not there is a match in the left data frame",
158 |                                   "Includes only observations that are in both data frames",
159 |                                   "Includes all observations from both data frames"))
160 | knitr::kable(join_funcs, col.names = c("Function", "What it includes in merged data frame"))
161 | ```
162 | 
163 | ![](https://d33wubrfki0l68.cloudfront.net/aeab386461820b029b7e7606ccff1286f623bae1/ef0d4/diagrams/join-venn.png)
164 | 
165 | \[[Source from R for Data Science](https://r4ds.had.co.nz/relational-data#relational-data)\]
166 | 
167 | ## Left Join
168 | 
169 | Recall the `outcomes` and `subjects` datasets above.
170 | 
171 | ```{r}
172 | outcomes
173 | subjects
174 | ```
175 | 
176 | Suppose we want to create a table that combines the information about houses (`subjects`) with the information about the outcomes (`outcomes`).
177 | 
178 | We can use the `left_join()` function to merge the `outcomes` and `subjects` tables and produce the output above.
179 | 
180 | ```{r leftjoin}
181 | left_join(x = outcomes, y = subjects, by = "id")
182 | ```
183 | 
184 | ::: callout-tip
185 | ### Note
186 | 
187 | The `by` argument indicates the column (or columns) that the two tables have in common.
188 | :::
189 | 
190 | ### Left Join with Incomplete Data
191 | 
192 | In the previous examples, the `subjects` table didn't have a `visit` column. But suppose it did? Maybe people move around during the study. We could image a table like this one.
193 | 
194 | ```{r}
195 | subjects <- tibble(
196 |         id = c("a", "b", "c"),
197 |         visit = c(0, 1, 0),
198 |         house = c("detached", "rowhouse", "rowhouse"),
199 | )
200 | 
201 | print(subjects)
202 | ```
203 | 
204 | When we left joint the tables now we get:
205 | 
206 | ```{r}
207 | left_join(outcomes, subjects, by = c("id", "visit"))
208 | ```
209 | 
210 | ::: callout-tip
211 | ### Note
212 | 
213 | Two things to point out here:
214 | 
215 | 1.  If we do not have information about a subject's housing in a given visit, the `left_join()` function automatically inserts an `NA` value to indicate that it is missing.
216 | 
217 | 2.  We can "join" on multiple variable (e.g. here we joined on the `id` and the `visit` columns).
218 | :::
219 | 
220 | We may even have a situation where we are missing housing data for a subject completely. The following table has no information about subject `a`.
221 | 
222 | ```{r}
223 | subjects <- tibble(
224 |         id = c("b", "c"),
225 |         visit = c(1, 0),
226 |         house = c("rowhouse", "rowhouse"),
227 | )
228 | 
229 | subjects
230 | ```
231 | 
232 | But we can still join the tables together and the `house` values for subject `a` will all be `NA`.
233 | 
234 | ```{r}
235 | left_join(x = outcomes, y = subjects, by = c("id", "visit"))
236 | ```
237 | 
238 | ::: callout-tip
239 | ### Important
240 | 
241 | The bottom line for `left_join()` is that it **always retains the values in the "left" argument** (in this case the `outcomes` table).
242 | 
243 | -   If there are no corresponding values in the "right" argument, `NA` values will be filled in.
244 | :::
245 | 
246 | ## Inner Join
247 | 
248 | The `inner_join()` function only **retains the rows of both tables** that have corresponding values. Here we can see the difference.
249 | 
250 | ```{r}
251 | inner_join(x = outcomes, y = subjects, by = c("id", "visit"))
252 | ```
253 | 
254 | ## Right Join
255 | 
256 | The `right_join()` function is like the `left_join()` function except that it **gives priority to the "right" hand argument**.
257 | 
258 | ```{r}
259 | right_join(x = outcomes, y = subjects, by = c("id", "visit"))
260 | ```
261 | 
262 | # Summary
263 | 
264 | -   `left_join()` is useful for merging a "large" data frame with a "smaller" one while retaining all the rows of the "large" data frame
265 | 
266 | -   `inner_join()` gives you the intersection of the rows between two data frames
267 | 
268 | -   `right_join()` is like `left_join()` with the arguments reversed (likely only useful at the end of a pipeline)
269 | 
270 | # Post-lecture materials
271 | 
272 | ### Final Questions
273 | 
274 | Here are some post-lecture questions to help you think about the material discussed.
275 | 
276 | ::: callout-note
277 | ### Questions
278 | 
279 | 1.  If you had three data frames to combine with a shared key, how would you join them using the verbs you now know?
280 | 
281 | 2.  Using `df1` and `df2` below, what is the difference between `inner_join(df1, df2)`, `semi_join(df1, df2)` and `anti_join(df1, df2)`?
282 | 
283 | ```{r}
284 | # Create first example data frame
285 | df1 <- data.frame(ID = 1:3,
286 |                   X1 = c("a1", "a2", "a3"))
287 | # Create second example data frame
288 | df2 <- data.frame(ID = 2:4, 
289 |                   X2 = c("b1", "b2", "b3"))
290 | ```
291 | 
292 | 3.  Try changing the order from the above e.g. `inner_join(df2, df1)`, `semi_join(df2, df1)` and `anti_join(df2, df1)`. What changed? What did not change?
293 | :::
294 | 
295 | ### Additional Resources
296 | 
297 | ::: callout-tip
298 | -   <https://rdpeng.github.io/Biostat776/lecture-joining-data-in-r-basics>
299 | -   <https://r4ds.had.co.nz/relational-data>
300 | -   <https://rafalab.github.io/dsbook/joining-tables>
301 | :::
302 | 


--------------------------------------------------------------------------------
/posts/2022-09-08-tidy-data-and-the-tidyverse/index.qmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Tidy data and the Tidyverse"
  3 | author: 
  4 |   - name: Stephanie Hicks
  5 |     url: https://stephaniehicks.com
  6 |     affiliation: Department of Biostatistics, Johns Hopkins
  7 |     affiliation_url: https://publichealth.jhu.edu
  8 | description: "Introduction to tidy data and how to convert between wide and long data with the tidyr R package"
  9 | date: 2022-09-08
 10 | categories: [module 2, week 2, R, programming, tidyr, here, tidyverse]
 11 | ---
 12 | 
 13 | <!-- Add interesting quote -->
 14 | 
 15 | > "Happy families are all alike; every unhappy family is unhappy in its own way." ---- Leo Tolstoy
 16 | 
 17 | > "Tidy datasets are all alike, but every messy dataset is messy in its own way." ---- Hadley Wickham
 18 | 
 19 | # Pre-lecture materials
 20 | 
 21 | ### Read ahead
 22 | 
 23 | ::: callout-note
 24 | ## Read ahead
 25 | 
 26 | **Before class, you can prepare by reading the following materials:**
 27 | 
 28 | 1.  [Tidy Data](https://www.jstatsoft.org/article/view/v059i10) paper published in the Journal of Statistical Software
 29 | 2.  <https://r4ds.had.co.nz/tidy-data>
 30 | 3.  [tidyr cheat sheet from RStudio](http://www.rstudio.com/wp-content/uploads/2015/02/data-wrangling-cheatsheet.pdf)
 31 | :::
 32 | 
 33 | ### Acknowledgements
 34 | 
 35 | Material for this lecture was borrowed and adopted from
 36 | 
 37 | -   <https://rdpeng.github.io/Biostat776/lecture-tidy-data-and-the-tidyverse>
 38 | -   <https://r4ds.had.co.nz/tidy-data>
 39 | 
 40 | # Learning objectives
 41 | 
 42 | ::: callout-note
 43 | # Learning objectives
 44 | 
 45 | **At the end of this lesson you will:**
 46 | 
 47 | -   Define tidy data
 48 | -   Be able to transform non-tidy data into tidy data
 49 | -   Be able to transform wide data into long data
 50 | -   Be able to separate character columns into multiple columns
 51 | -   Be able to unite multiple character columns into one column
 52 | :::
 53 | 
 54 | # Tidy data
 55 | 
 56 | As we learned in the last lesson, one unifying concept of the tidyverse is the notion of **tidy data**.
 57 | 
 58 | As defined by Hadley Wickham in his 2014 paper published in the *Journal of Statistical Software*, a [tidy dataset](https://www.jstatsoft.org/article/view/v059i10) has the following properties:
 59 | 
 60 | 1.  Each variable forms a column.
 61 | 
 62 | 2.  Each observation forms a row.
 63 | 
 64 | 3.  Each type of observational unit forms a table.
 65 | 
 66 | ![Artwork by Allison Horst on tidy data](https://github.com/allisonhorst/stats-illustrations/raw/main/rstats-artwork/tidydata_1.jpg){width="80%"}
 67 | 
 68 | \[**Source**: [Artwork by Allison Horst](https://github.com/allisonhorst/stats-illustrations)\]
 69 | 
 70 | The **purpose of defining tidy data** is to highlight the fact that **most data do not start out life as tidy**.
 71 | 
 72 | In fact, much of the work of data analysis may involve simply making the data tidy (at least this has been our experience).
 73 | 
 74 | -   Once a dataset is tidy, it **can be used as input into a variety of other functions** that may transform, model, or visualize the data.
 75 | 
 76 | ::: callout-tip
 77 | ### Example
 78 | 
 79 | As a quick example, consider the following data illustrating **religion and income survey data** with the number of respondents with income range in column name.
 80 | 
 81 | This is in a classic table format:
 82 | 
 83 | ```{r}
 84 | library(tidyr)
 85 | relig_income
 86 | ```
 87 | :::
 88 | 
 89 | While this format is canonical and is useful for quickly observing the relationship between multiple variables, it is not tidy.
 90 | 
 91 | **This format violates the tidy form** because there are variables in the columns.
 92 | 
 93 | -   In this case the variables are religion, income bracket, and the number of respondents, which is the third variable, is presented inside the table.
 94 | 
 95 | Converting this data to tidy format would give us
 96 | 
 97 | ```{r, message=FALSE}
 98 | library(tidyverse)
 99 | 
100 | relig_income %>%
101 |   pivot_longer(-religion, names_to = "income", values_to = "respondents") %>%
102 |   mutate(religion = factor(religion), income = factor(income))
103 | ```
104 | 
105 | Some of these functions you have seen before, others might be new to you. Let's talk about each one in the context of the `tidyverse` R packages.
106 | 
107 | # The "Tidyverse"
108 | 
109 | There are a number of R packages that take advantage of the tidy data form and can be used to do interesting things with data. Many (but not all) of these packages are written by Hadley Wickham and **the collection of packages is often referred to as the "tidyverse"** because of their **dependence on and presumption of tidy data**.
110 | 
111 | ::: callout-tip
112 | ### Note
113 | 
114 | A subset of the "Tidyverse" packages include:
115 | 
116 | -   [ggplot2](https://cran.r-project.org/package=ggplot2): a plotting system based on the grammar of graphics
117 | 
118 | -   [magrittr](https://cran.r-project.org/package=magrittr%22): defines the `%>%` operator for chaining functions together in a series of operations on data
119 | 
120 | -   [dplyr](https://cran.r-project.org/package=dplyr): a suite of (fast) functions for working with data frames
121 | 
122 | -   [tidyr](https://cran.r-project.org/package=tidyr): easily tidy data with `pivot_wider()` and `pivot_longer()` functions (also `separate()` and `unite()`)
123 | 
124 | A complete list can be found here (<https://www.tidyverse.org/packages>).
125 | :::
126 | 
127 | We will be using these packages quite a bit.
128 | 
129 | The "tidyverse" package can be used to install all of the packages in the tidyverse at once.
130 | 
131 | For example, instead of starting an R script with this:
132 | 
133 | ```{r, eval = FALSE}
134 | library(dplyr)
135 | library(tidyr)
136 | library(readr)
137 | library(ggplot2)
138 | ```
139 | 
140 | You can start with this:
141 | 
142 | ```{r, eval = FALSE}
143 | library(tidyverse)
144 | ```
145 | 
146 | In the example above, let's talk about what we did using the `pivot_longer()` function.
147 | 
148 | We will also talk about `pivot_wider()`.
149 | 
150 | ### `pivot_longer()`
151 | 
152 | The `tidyr` package includes functions to transfer a data frame between *long* and *wide*.
153 | 
154 | -   **Wide format** data tends to have different attributes or variables describing an observation placed in separate columns.
155 | -   **Long format** data tends to have different attributes encoded as levels of a single variable, followed by another column that contains tha values of the observation at those different levels.
156 | 
157 | ::: callout-tip
158 | ### Example
159 | 
160 | In the section above, we showed an example that used `pivot_longer()` to convert data into a tidy format.
161 | 
162 | The **key problem** with the tidyness of the data is that the income variables are not in their own columns, but rather are embedded in the structure of the columns.
163 | 
164 | To **fix this**, you can use the `pivot_longer()` function to **gather values spread across several columns into a single column**, here with the column names gathered into an `income` column.
165 | 
166 | **Note**: when gathering, exclude any columns that you do not want "gathered" (`religion` in this case) by including the column names with a the minus sign in the `pivot_longer()` function.
167 | 
168 | For example:
169 | 
170 | ```{r}
171 | # Gather everything EXCEPT religion to tidy data
172 | relig_income %>%
173 |   pivot_longer(-religion, names_to = "income", values_to = "respondents")
174 | ```
175 | :::
176 | 
177 | Even if your data is in a tidy format, `pivot_longer()` is occasionally useful for pulling data together to take advantage of faceting, or plotting separate plots based on a grouping variable. We will talk more about that in a future lecture.
178 | 
179 | ### `pivot_wider()`
180 | 
181 | The `pivot_wider()` function is less commonly needed to tidy data. It can, however, be useful for creating summary tables.
182 | 
183 | ::: callout-tip
184 | ### Example
185 | 
186 | You use the `summarize()` function in `dplyr` to summarize the total number of respondents per income category.
187 | 
188 | ```{r}
189 | relig_income %>%
190 |   pivot_longer(-religion, names_to = "income", values_to = "respondents") %>%
191 |   mutate(religion = factor(religion), income = factor(income)) %>% 
192 |   group_by(income) %>% 
193 |   summarize(total_respondents = sum(respondents)) %>%
194 |   pivot_wider(names_from = "income", 
195 |               values_from = "total_respondents") %>%
196 |   knitr::kable()
197 | ```
198 | :::
199 | 
200 | Notice in this example how `pivot_wider()` has been used at the **very end of the code sequence** to convert the summarized data into a shape that **offers a better tabular presentation for a report**.
201 | 
202 | ::: callout-tip
203 | ### Note
204 | 
205 | In the `pivot_wider()` call, you first specify the name of the column to use for the new column names (`income` in this example) and then specify the column to use for the cell values (`total_respondents` here).
206 | :::
207 | 
208 | ::: callout-tip
209 | ### Example of `pivot_longer()`
210 | 
211 | Let's try another dataset. This data contain an excerpt of the [Gapminder data](https://cran.r-project.org/web/packages/gapminder/README.html#gapminder) on life expectancy, GDP per capita, and population by country.
212 | 
213 | ```{r}
214 | library(gapminder)
215 | gapminder
216 | ```
217 | 
218 | If we wanted to make `lifeExp`, `pop` and `gdpPercap` (all measurements that we observe) go from a wide table into a long table, what would we do?
219 | 
220 | ```{r}
221 | # try it yourself
222 | 
223 | ```
224 | :::
225 | 
226 | ::: callout-tip
227 | ### Example
228 | 
229 | One more! Try using `pivot_longer()` to convert the the following data that contains made-up revenues for three companies by quarter for years 2006 to 2009.
230 | 
231 | Afterward, use `group_by()` and `summarize()` to calculate the average revenue for each company across all years and all quarters.
232 | 
233 | **Bonus**: Calculate a mean revenue for each company AND each year (averaged across all 4 quarters).
234 | 
235 | ```{r, set.seed(123)}
236 | df <- tibble(
237 |   "company" = rep(1:3, each=4), 
238 |   "year"  = rep(2006:2009, 3),
239 |   "Q1"    = sample(x = 0:100, size = 12),
240 |   "Q2"    = sample(x = 0:100, size = 12),
241 |   "Q3"    = sample(x = 0:100, size = 12),
242 |   "Q4"    = sample(x = 0:100, size = 12),
243 | )
244 | df
245 | ```
246 | 
247 | ```{r}
248 | # try it yourself 
249 | 
250 | ```
251 | :::
252 | 
253 | ### `separate()` and `unite()`
254 | 
255 | The same `tidyr` package also contains two useful functions:
256 | 
257 | -   `unite()`: combine contents of two or more columns into a single column
258 | -   `separate()`: separate contents of a column into two or more columns
259 | 
260 | First, we combine the first three columns into one new column using `unite()`.
261 | 
262 | ```{r}
263 | gapminder %>% 
264 |   unite(col="country_continent_year", 
265 |         country:year, 
266 |         sep="_")
267 | ```
268 | 
269 | Next, we show how to separate the columns into three separate columns using `separate()` using the `col`, `into` and `sep` arguments.
270 | 
271 | ```{r}
272 | gapminder %>% 
273 |   unite(col="country_continent_year", 
274 |         country:year, 
275 |         sep="_") %>% 
276 |   separate(col="country_continent_year", 
277 |            into=c("country", "continent", "year"), 
278 |            sep="_")
279 | ```
280 | 
281 | # Post-lecture materials
282 | 
283 | ### Final Questions
284 | 
285 | Here are some post-lecture questions to help you think about the material discussed.
286 | 
287 | ::: callout-note
288 | ### Questions
289 | 
290 | 1.  Using prose, describe how the variables and observations are organised in a tidy dataset versus an non-tidy dataset.
291 | 
292 | 2.  What do the extra and fill arguments do in `separate()`? Experiment with the various options for the following two toy datasets.
293 | 
294 | ```{r, eval=FALSE}
295 | tibble(x = c("a,b,c", "d,e,f,g", "h,i,j")) %>% 
296 |   separate(x, c("one", "two", "three"))
297 | 
298 | tibble(x = c("a,b,c", "d,e", "f,g,i")) %>% 
299 |   separate(x, c("one", "two", "three"))
300 | ```
301 | 
302 | 3.  Both `unite()` and `separate()` have a remove argument. What does it do? Why would you set it to FALSE?
303 | 
304 | 4.  Compare and contrast `separate()` and `extract()`. Why are there three variations of separation (by position, by separator, and with groups), but only one `unite()`?
305 | :::
306 | 
307 | ### Additional Resources
308 | 
309 | ::: callout-tip
310 | -   [Tidy Data](https://www.jstatsoft.org/article/view/v059i10) paper published in the Journal of Statistical Software
311 | -   https://r4ds.had.co.nz/tidy-data.html
312 | -   [tidyr cheat sheet from RStudio](http://www.rstudio.com/wp-content/uploads/2015/02/data-wrangling-cheatsheet.pdf)
313 | :::
314 | 


--------------------------------------------------------------------------------
/posts/2022-09-13-plotting-systems/index.qmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Plotting Systems"
  3 | author: 
  4 |   - name: Stephanie Hicks
  5 |     url: https://stephaniehicks.com
  6 |     affiliation: Department of Biostatistics, Johns Hopkins
  7 |     affiliation_url: https://publichealth.jhu.edu
  8 | description: "Overview of three plotting systems in R"
  9 | date: 2022-09-13
 10 | categories: [module 3, week 3, R, programming, ggplot2, data viz]
 11 | ---
 12 | 
 13 | > The data may not contain the answer. And, if you torture the data long enough, it will tell you anything. ---*John W. Tukey*
 14 | 
 15 | # Pre-lecture materials
 16 | 
 17 | ### Read ahead
 18 | 
 19 | ::: callout-note
 20 | ## Read ahead
 21 | 
 22 | **Before class, you can prepare by reading the following materials:**
 23 | 
 24 | 1.  <https://r4ds.had.co.nz/data-visualisation>
 25 | 2.  Paul Murrell (2011). *R Graphics*, CRC Press.
 26 | 3.  Hadley Wickham (2009). *ggplot2*, Springer.
 27 | 4.  Deepayan Sarkar (2008). *Lattice: Multivariate Data Visualization with R*, Springer.
 28 | :::
 29 | 
 30 | ### Acknowledgements
 31 | 
 32 | Material for this lecture was borrowed and adopted from
 33 | 
 34 | -   <https://rdpeng.github.io/Biostat776/lecture-plotting-systems>
 35 | 
 36 | # Learning objectives
 37 | 
 38 | ::: callout-note
 39 | # Learning objectives
 40 | 
 41 | **At the end of this lesson you will:**
 42 | 
 43 | -   Be able to identify and describe the three plotting systems in R
 44 | :::
 45 | 
 46 | # Plotting Systems
 47 | 
 48 | There are **three different plotting systems in R** and they each have different characteristics and modes of operation.
 49 | 
 50 | ::: callout-tip
 51 | ### Important
 52 | 
 53 | The three systems are
 54 | 
 55 | 1.  The base plotting system
 56 | 2.  The lattice system
 57 | 3.  The ggplot2 system
 58 | 
 59 | **This course will focus primarily on the ggplot2 plotting system**. The other two systems are presented for context.
 60 | :::
 61 | 
 62 | ## The Base Plotting System
 63 | 
 64 | The **base plotting system** is the original plotting system for R. The basic model is sometimes **referred to as the "artist's palette" model**.
 65 | 
 66 | The idea is you start with blank canvas and build up from there.
 67 | 
 68 | In more R-specific terms, you **typically start with `plot()` function** (or similar plot creating function) to *initiate* a plot and then *annotate* the plot with various annotation functions (`text`, `lines`, `points`, `axis`)
 69 | 
 70 | The base plotting system is **often the most convenient plotting system** to use because it mirrors how we sometimes think of building plots and analyzing data.
 71 | 
 72 | If we do not have a completely well-formed idea of how we want to look at some data, often we will start by "throwing some data on the page" and then slowly add more information to it as our thought process evolves.
 73 | 
 74 | ::: callout-tip
 75 | ### Example
 76 | 
 77 | We might look at a simple scatterplot and then decide to add a linear regression line or a smoother to it to highlight the trends.
 78 | 
 79 | ```{r}
 80 | #| fig-width: 5
 81 | #| fig-height: 5
 82 | #| fig-cap: "Scatterplot with loess curve"
 83 | data(airquality)
 84 | with(airquality, {
 85 |         plot(Temp, Ozone)
 86 |         lines(loess.smooth(Temp, Ozone))
 87 | })
 88 | ```
 89 | :::
 90 | 
 91 | In the code above:
 92 | 
 93 | -   The `plot()` function creates the initial plot and draws the points (circles) on the canvas.
 94 | -   The `lines` function is used to annotate or add to the plot (in this case it adds a loess smoother to the scatterplot).
 95 | 
 96 | Next, we use the `plot()` function to draw the points on the scatterplot and then use the `main` argument to add a main title to the plot.
 97 | 
 98 | ```{r}
 99 | #| fig-width: 5
100 | #| fig-height: 5
101 | #| fig-cap: "Scatterplot with loess curve"
102 | data(airquality)
103 | with(airquality, {
104 |         plot(Temp, Ozone, main = "my plot")
105 |         lines(loess.smooth(Temp, Ozone))
106 | })
107 | ```
108 | 
109 | ::: callout-tip
110 | ### Note
111 | 
112 | One downside with constructing base plots is that you **cannot go backwards once the plot has started**.
113 | 
114 | It is possible that you could start down the road of constructing a plot and realize later (when it is too late) that you do not have enough room to add a y-axis label or something like that
115 | :::
116 | 
117 | If you have specific plot in mind, there is then a need to **plan in advance** to make sure, for example, that you have set your margins to be the right size to fit all of the annotations that you may want to include.
118 | 
119 | While the base plotting system is nice in that it gives you the flexibility to specify these kinds of details to painstaking accuracy, **sometimes it would be nice if the system could just figure it out for you**.
120 | 
121 | ::: callout-tip
122 | ### Note
123 | 
124 | Another downside of the base plotting system is that it is **difficult to describe or translate a plot to others because there is no clear graphical language or grammar** that can be used to communicate what you have done.
125 | 
126 | The only real way to describe what you have done in a base plot is to just list the series of commands/functions that you have executed, which is not a particularly compact way of communicating things.
127 | 
128 | This is one problem that the `ggplot2` package attempts to address.
129 | :::
130 | 
131 | ::: callout-tip
132 | ### Example
133 | 
134 | Another typical base plot is constructed with the following code.
135 | 
136 | ```{r}
137 | #| fig-width: 5
138 | #| fig-height: 5
139 | #| fig-cap: "Base plot with title"
140 | data(cars)
141 | 
142 | ## Create the plot / draw canvas
143 | with(cars, plot(speed, dist))
144 | 
145 | ## Add annotation
146 | title("Speed vs. Stopping distance")
147 | ```
148 | :::
149 | 
150 | We will go into more detail on what these functions do in later lessons.
151 | 
152 | ## The Lattice System
153 | 
154 | The **lattice plotting system** is implemented in the `lattice` R package which comes with every installation of R (although it is not loaded by default).
155 | 
156 | To **use the lattice plotting functions**, you must first load the `lattice` package with the `library` function.
157 | 
158 | ```{r}
159 | library(lattice)
160 | ```
161 | 
162 | With the lattice system, **plots are created with a single function call**, such as `xyplot()` or `bwplot()`.
163 | 
164 | There is **no real distinction between functions that create or initiate plots** and **functions that annotate plots** because it all happens at once.
165 | 
166 | Lattice plots tend to be **most useful for conditioning types of plots**, i.e. looking at how `y` changes with `x` across levels of `z`.
167 | 
168 | -   e.g. these types of plots are useful for looking at multi-dimensional data and often allow you to squeeze a lot of information into a single window or page.
169 | 
170 | Another aspect of lattice that makes it different from base plotting is that **things like margins and spacing are set automatically**.
171 | 
172 | This is possible because entire plot is specified at once via a single function call, so all of the available information needed to figure out the spacing and margins is already there.
173 | 
174 | ::: callout-tip
175 | ### Example
176 | 
177 | Here is a lattice plot that looks at the relationship between life expectancy and income and how that relationship varies by region in the United States.
178 | 
179 | ```{r}
180 | #| fig-width: 8
181 | #| fig-height: 4
182 | #| fig-cap: "Lattice plot"
183 | state <- data.frame(state.x77, region = state.region)
184 | xyplot(Life.Exp ~ Income | region, data = state, layout = c(4, 1))
185 | ```
186 | :::
187 | 
188 | You can see that the entire plot was generated by the call to `xyplot()` and all of the data for the plot were stored in the `state` data frame.
189 | 
190 | The **plot itself contains four panels**---one for each region---and **within each panel is a scatterplot** of life expectancy and income.
191 | 
192 | The notion of *panels* comes up a lot with lattice plots because you typically have many panels in a lattice plot (each panel typically represents a *condition*, like "region").
193 | 
194 | ::: callout-tip
195 | ### Note
196 | 
197 | Downsides with the lattice system
198 | 
199 | -   It can sometimes be very **awkward to specify an entire plot** in a single function call (you end up with functions with many many arguments).
200 | -   **Annotation in panels in plots is not especially intuitive** and can be difficult to explain. In particular, the use of custom panel functions and subscripts can be difficult to wield and requires intense preparation.
201 | -   Once a plot is created, **you cannot "add" to the plot** (but of course you can just make it again with modifications).
202 | :::
203 | 
204 | ## The ggplot2 System
205 | 
206 | The **ggplot2 plotting system** attempts to split the difference between base and lattice in a number of ways.
207 | 
208 | ::: callout-tip
209 | ### Note
210 | 
211 | Taking cues from lattice, the ggplot2 system automatically deals with spacings, text, titles but also allows you to annotate by "adding" to a plot.
212 | :::
213 | 
214 | The ggplot2 system is implemented in the `ggplot2` package (part of the `tidyverse` package), which is available from CRAN (it does not come with R).
215 | 
216 | You can install it from CRAN via
217 | 
218 | ```{r}
219 | #| eval: false
220 | install.packages("ggplot2")
221 | ```
222 | 
223 | and then load it into R via the `library()` function.
224 | 
225 | ```{r}
226 | library(ggplot2)
227 | ```
228 | 
229 | Superficially, the `ggplot2` functions are similar to `lattice`, but the system is generally easier and more intuitive to use.
230 | 
231 | The defaults used in `ggplot2` make many choices for you, but you can still customize plots to your heart's desire.
232 | 
233 | ::: callout-tip
234 | ### Example
235 | 
236 | A typical plot with the `ggplot2` package looks as follows.
237 | 
238 | ```{r}
239 | #| message: false
240 | #| fig-width: 6
241 | #| fig-height: 5
242 | #| fig-cap: "ggplot2 plot"
243 | library(tidyverse)
244 | data(mpg)
245 | mpg %>%
246 |   ggplot(aes(displ, hwy)) + 
247 |   geom_point()
248 | ```
249 | :::
250 | 
251 | There are additional functions in `ggplot2` that allow you to make arbitrarily sophisticated plots.
252 | 
253 | We will discuss more about this in the next lecture.
254 | 


--------------------------------------------------------------------------------
/posts/_metadata.yml:
--------------------------------------------------------------------------------
 1 | # options specified here will apply to all posts in this folder
 2 | 
 3 | # freeze computational output
 4 | # (see https://quarto.org/docs/projects/code-execution.html#freeze)
 5 | freeze: true
 6 | 
 7 | # Enable banner style title blocks
 8 | title-block-banner: true
 9 | 
10 | # Default for table of contents
11 | toc: true
12 | toc-title: Table of contents
13 | toc-location: left
14 | 


--------------------------------------------------------------------------------
/profile.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/profile.jpg


--------------------------------------------------------------------------------
/projects.qmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Projects"
 3 | listing:
 4 |   - id: projects
 5 |     contents: "projects"
 6 |     sort: "date desc"
 7 |     type: default
 8 |     categories: true
 9 |     sort-ui: false
10 |     filter-ui: false
11 | page-layout: full
12 | title-block-banner: false
13 | ---
14 | 


--------------------------------------------------------------------------------
/projects/2022-08-30-project-0/index.qmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Project 0 (optional)"
  3 | author: 
  4 |   - name: Stephanie Hicks
  5 |     url: https://stephaniehicks.com
  6 |     affiliation: Department of Biostatistics, Johns Hopkins
  7 |     affiliation_url: https://publichealth.jhu.edu
  8 | description: "Information for Project 0 (entirely optional, but hopefully useful and fun!)"
  9 | date: 2022-08-30
 10 | categories: [project 0, projects]
 11 | ---
 12 | 
 13 | # Background
 14 | 
 15 | **Due date: Sept 8 at 1:29pm**
 16 | 
 17 | Using the tools we learned in the first week (e.g. R, RStudio and Github). Let's apply them in a small (but also comprehensive) exercise.
 18 | 
 19 | -   **Please note this project is entirely optional** (i.e. it will not be graded), but hopefully it will be helpful to you getting set up for the rest of the course (i.e. set up these tools on your computing environment) and give you an opportunity to introduce yourself to your classmates.
 20 | 
 21 | -   **For anyone who completes it, you get a free hex sticker!** If you aren't familiar with the hex stickers, check out [this link](https://github.com/rstudio/hex-stickers). You can add them to your laptop for some character and swag (or [turn them into magnets](https://twitter.com/ClaireMKBowen/status/1294336123414523904)). I have a ton of different ones from the tidyverse or [RLadies Baltimore](https://pbs.twimg.com/profile_images/1236855715018559488/PuYAjTTD_400x400.jpg). You can come pick one up from my office or I can mail it to you if you email me a mailing address after you submit the project.
 22 | 
 23 | -   **For those of you who are new to GitHub/R/Rmarkdown**: this project makes you do a lot of things that you might not be familiar with. I know that this might be time-consuming and also might feel a bit intimidating. It's partly unavoidable and partly on purpose. You need to learn how to quickly get up to speed with all kinds of new tools that come your way. So practicing it is a good idea. You are welcome to draw on any sources for help that you want (online, classmates, instructor, etc.). I'm confident with a bit of trial and error you'll get it to work.
 24 | 
 25 | # Part 1
 26 | 
 27 | This part of the project is to ensure that you have successfully set up your computing environment. Please email (use the Subject line: `140.776 Setup`) the Course Instructor (Dr. Stephanie Hicks) at shicks19\@jhu.edu the following information:
 28 | 
 29 | ### Setting up your computing environment
 30 | 
 31 | 1.  Your name, JHED ID (if applicable).
 32 | 
 33 | 2.  The type of computer/operating system you are using (Windows, Mac, Unix/Linux, other)
 34 | 
 35 | 3.  The version of R that you have installed on your computer. To do this, start up R and run the following in the R console and include the output in your email.
 36 | 
 37 | ```{r}
 38 | #| label: rv
 39 | #| eval: false
 40 | #| echo: true
 41 | print(R.version.string)
 42 | ```
 43 | 
 44 | ![Printing the R version string](../../videos/versionstring.gif)
 45 | 
 46 | 4.  The version of RStudio that you have installed on your computer. To do this start up RStudio and in the R console window, run the following and again include the output in your email:
 47 | 
 48 | ```{r rstudiov, eval = FALSE, echo = TRUE}
 49 | print(RStudio.Version()$version)
 50 | ```
 51 | 
 52 | 5.  If you have a GitHub username, please include this in your email. If you do not have a GitHub username, read https://happygitwithr.com, sign up for GitHub, and include your new username in your email to me.
 53 | 
 54 | 6.  To make sure git is installed on your system, use the 'Terminal' (e.g. it's next to the R Console within RStudio) (or whatever you use), run the following and include the output in your email:
 55 | 
 56 | For example, this is mine:
 57 | 
 58 | ```{bash gv, eval = TRUE, echo = TRUE}
 59 | git --version
 60 | ```
 61 | 
 62 | If you have any trouble with any of the steps above, try to first post on the discussion board on CoursePlus. The TAs and I will be checking it frequently, but other students may also be helpful in their replies. You can also use other resources to get help (Google, R4DS, colleagues/friends/relatives with R/Markdown experience, etc.). Still, try to do as much as possible yourself. We will use all the bits you are learning here repeatedly during this course.
 63 | 
 64 | # Part 2
 65 | 
 66 | This part of the project is to help you introduce yourself (and your interests!) to others in this course. You will create a new GitHub repository and build a small website about yourself.
 67 | 
 68 | ### 1. Create a GitHub repo for your website
 69 | 
 70 | Create a new GitHub repository titled `biostat776-intro-<firstname>-<lastname>` (where you replace `<firstname>` with your first name and `<lastname>` with your last name) in your own personal GitHub account (e.g. `https://github.com/<yourgithubusername>/biostat776-intro-<firstname>-<lastname>`).
 71 | 
 72 | For example, you can find an example that I created for myself at
 73 | 
 74 | -   github repo: <https://github.com/stephaniehicks/biostat776-intro-stephanie-hicks>
 75 | 
 76 | ### 2. Build a website using R Markdown
 77 | 
 78 | Using one of the many ways we discussed in class (e.g. a [simple R Markdown website](https://bookdown.org/yihui/rmarkdown/rmarkdown-site.html), [blogdown](https://bookdown.org/yihui/blogdown/), [distill](https://rstudio.github.io/distill/website.html), etc), create a new project in RStudio with the appropriate files. For example, you might include the following information:
 79 | 
 80 | -   Write a short summary introducing yourself. Structure the webpage with headings, subheadings, etc. Talk a bit about yourself, your background, training, research interests. Let me/us know what kind of statistics, programming, data analysis experience you already have. I am also curious to know what you most hope to learn in this course.
 81 | 
 82 | -   Five fun facts about yourself
 83 | 
 84 | -   A web page linking to something you think is really cool/interesting/inspiring/etc. You could also describe briefly what it is and why you like it.
 85 | 
 86 | If you want, feel free to get creative and include other things. You can play with RMarkdown if you wish to, e.g., you can try to include some table or a video, etc.
 87 | 
 88 | ### 3. Include a `README.md` file
 89 | 
 90 | Your project repository should include a `README.md` file (if it was not included already).
 91 | 
 92 | Edit the repository `README.md` file. Typically it will only contain the name of your repository with a `#` sign in front. The `#` represents a level 1 heading in Markdown. Change the headline and call it "Introducing myself" (or something like that). Underneath write something like "This website contains a short introduction of *Your Name*."
 93 | 
 94 | Make sure the 2 files (README.md and especially `index.Rmd` / `index.html`) look the way you want. Make changes until everything works.
 95 | 
 96 | ### 4. Deploy your website
 97 | 
 98 | Depending on how you want to deploy your website, the following may or may not be relevant to you. In general, you want to make sure you have initialized your project to use `git` (i.e. you can type `git init` to initialize the repository to use git. Add and commit your changes. Push your changes and deploy your website.
 99 | 
100 | Following steps 2-4, here is my example website:
101 | 
102 | -   website: <https://www.stephaniehicks.com/biostat776-intro-stephanie-hicks>
103 | 
104 | ### 5. Share your website
105 | 
106 | -   Go to the Discussion Board in CoursePlus and write a short post with a link (URL) to your website (and URL to the corresponding GitHub repository) that you created.
107 | -   As you read the introductions from other folks in the class, feel free to comment/reply using Discussion board.
108 | 
109 | In class on Sept 8, I will show as many websites as I can from Courseplus!
110 | 


--------------------------------------------------------------------------------
/projects/2022-09-15-project-2/index.qmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Project 2"
  3 | author: 
  4 |   - name: Stephanie Hicks
  5 |     url: https://stephaniehicks.com
  6 |     affiliation: Department of Biostatistics, Johns Hopkins
  7 |     affiliation_url: https://publichealth.jhu.edu
  8 | description: "Exploring temperature and rainfall in Australia"
  9 | date: 2022-09-15
 10 | categories: [project 2, projects]
 11 | ---
 12 | 
 13 | # Background
 14 | 
 15 | **Due date: Sept 30 at 11:59pm**
 16 | 
 17 | The goal of this assignment is to practice designing and writing functions along with practicing our tidyverse skills that we learned in our previous project. Writing functions involves thinking about how code should be divided up and what the interface/arguments should be. In addition, you need to think about what the function will return as output.
 18 | 
 19 | ### To submit your project
 20 | 
 21 | Please write up your project using R Markdown and processed with `knitr`. Compile your document as an **HTML file** and submit your HTML file to the dropbox on Courseplus. Please **show all your code** (i.e. make sure to set `echo = TRUE`) for each of the answers to each part.
 22 | 
 23 | ### Install packages
 24 | 
 25 | Before attempting this assignment, you should first install the following packages, if they are not already installed:
 26 | 
 27 | ```{r}
 28 | #| eval: false
 29 | install.packages("tidyverse")
 30 | install.packages("tidytuesdayR")
 31 | ```
 32 | 
 33 | # Part 1: Fun with functions
 34 | 
 35 | In this part, we are going to practice creating functions.
 36 | 
 37 | ### Part 1A: Exponential transformation
 38 | 
 39 | The exponential of a number can be written as an infinite series expansion of the form $$
 40 | \exp(x) = 1 + x + \frac{x^2}{2!} + \frac{x^3}{3!} + \cdots
 41 | $$ Of course, we cannot compute an infinite series by the end of this term and so we must truncate it at a certain point in the series. The truncated sum of terms represents an approximation to the true exponential, but the approximation may be usable.
 42 | 
 43 | Write a function that computes the exponential of a number using the truncated series expansion. The function should take two arguments:
 44 | 
 45 | -   `x`: the number to be exponentiated
 46 | 
 47 | -   `k`: the number of terms to be used in the series expansion beyond the constant 1. The value of `k` is always $\geq 1$.
 48 | 
 49 | For example, if $k = 1$, then the `Exp` function should return the number $1 + x$. If $k = 2$, then you should return the number $1 + x + x^2/2!$.
 50 | 
 51 | Include at least one example of output using your function.
 52 | 
 53 | ::: callout-note
 54 | -   You can assume that the input value `x` will always be a *single* number.
 55 | 
 56 | -   You can assume that the value `k` will always be an integer $\geq 1$.
 57 | 
 58 | -   Do not use the `exp()` function in R.
 59 | 
 60 | -   The `factorial()` function can be used to compute factorials.
 61 | :::
 62 | 
 63 | ```{r}
 64 | Exp <- function(x, k) {
 65 |         # Add your solution here
 66 | }
 67 | ```
 68 | 
 69 | ### Part 1B: Sample mean and sample standard deviation
 70 | 
 71 | Next, write two functions called `sample_mean()` and `sample_sd()` that takes as input a vector of data of length $N$ and calculates the sample average and sample standard deviation for the set of $N$ observations.
 72 | 
 73 | $$
 74 | \bar{x} = \frac{1}{N} \sum_{i=1}^n x_i
 75 | $$ $$
 76 | s = \sqrt{\frac{1}{N-1} \sum_{i=1}^N (x_i - \overline{x})^2}
 77 | $$ Include at least one example of output using your functions.
 78 | 
 79 | ::: callout-note
 80 | -   You can assume that the input value `x` will always be a *vector* of numbers of length *N*.
 81 | 
 82 | -   Do not use the `mean()` and `sd()` functions in R.
 83 | :::
 84 | 
 85 | ```{r}
 86 | sample_mean <- function(x) {
 87 |         # Add your solution here
 88 | }
 89 | 
 90 | sample_sd <- function(x) {
 91 |         # Add your solution here
 92 | }
 93 | ```
 94 | 
 95 | ### Part 1C: Confidence intervals
 96 | 
 97 | Next, write a function called `calculate_CI()` that:
 98 | 
 99 | 1.  There should be two inputs to the `calculate_CI()`. First, it should take as input a vector of data of length $N$. Second, the function should also have a `conf` ($=1-\alpha$) argument that allows the confidence interval to be adapted for different $\alpha$.
100 | 
101 | 2.  Calculates a confidence interval (CI) (e.g. a 95% CI) for the estimate of the mean in the population. If you are not familiar with confidence intervals, it is an interval that contains the population parameter with probability $1-\alpha$ taking on this form
102 | 
103 | $$
104 | \bar{x} \pm t_{\alpha/2, N-1} s_{\bar{x}}
105 | $$
106 | 
107 | where $t_{\alpha/2, N-1}$ is the value needed to generate an area of $\alpha / 2$ in each tail of the $t$-distribution with $N-1$ degrees of freedom and $s_{\bar{x}} = \frac{s}{\sqrt{N}}$ is the standard error of the mean. For example, if we pick a 95% confidence interval and $N$=50, then you can calculate $t_{\alpha/2, N-1}$ as
108 | 
109 | ```{r}
110 | alpha <- 1 - 0.95
111 | degrees_freedom = 50 - 1
112 | t_score = qt(p=alpha/2, df=degrees_freedom, lower.tail=FALSE)
113 | ```
114 | 
115 | 3.  Returns a named vector of length 2, where the first value is the `lower_bound`, the second value is the `upper_bound`.
116 | 
117 | ```{r}
118 | calculate_CI <- function(x, conf = 0.95) {
119 |         # Add your solution here
120 | }
121 | 
122 | ```
123 | 
124 | Include example of output from your function showing the output when using two different levels of `conf`.
125 | 
126 | ::: callout-note
127 | If you want to check if your function output matches an existing function in R, consider a vector $x$ of length $N$ and see if the following two code chunks match.
128 | 
129 | ```{r, eval=FALSE}
130 | calculate_CI(x, conf = 0.95)
131 | ```
132 | 
133 | ```{r,eval=FALSE}
134 | dat = data.frame(x=x)
135 | fit <- lm(x ~ 1, dat)
136 | 
137 | # Calculate a 95% confidence interval
138 | confint(fit, level=0.95)
139 | ```
140 | :::
141 | 
142 | # Part 2: Wrangling data
143 | 
144 | In this part, we will practice our wrangling skills with the tidyverse that we learned about in module 1.
145 | 
146 | ### Data
147 | 
148 | The two datasets for this part of the assignment comes from [TidyTuesday](https://www.tidytuesday.com). Specifically, we will use the following data from January 2020, which I have provided for you below:
149 | 
150 | ```{r, eval=FALSE}
151 | tuesdata <- tidytuesdayR::tt_load('2020-01-07')
152 | rainfall <- tuesdata$rainfall
153 | temperature <- tuesdata$temperature
154 | ```
155 | 
156 | However, to avoid re-downloading data, we will check to see if those files already exist using an `if()` statement:
157 | 
158 | ```{r, message=FALSE}
159 | library(here)
160 | if(!file.exists(here("data","tuesdata_rainfall.RDS"))){
161 |   tuesdata <- tidytuesdayR::tt_load('2020-01-07')
162 |   rainfall <- tuesdata$rainfall
163 |   temperature <- tuesdata$temperature
164 |   
165 |   # save the files to RDS objects
166 |   saveRDS(tuesdata$rainfall, file = here("data","tuesdata_rainfall.RDS"))
167 |   saveRDS(tuesdata$temperature, file = here("data","tuesdata_temperature.RDS"))
168 | }
169 | ```
170 | 
171 | ::: callout-note
172 | The above code will only run if it cannot find the path to the `tuesdata_rainfall.RDS` on your computer. Then, we can just read in these files every time we knit the R Markdown, instead of re-downloading them every time.
173 | :::
174 | 
175 | Let's load the datasets
176 | 
177 | ```{r, eval=TRUE, message=FALSE}
178 | rainfall <- readRDS(here("data","tuesdata_rainfall.RDS"))
179 | temperature <- readRDS(here("data","tuesdata_temperature.RDS"))
180 | ```
181 | 
182 | Now we can look at the data with `glimpse()`
183 | 
184 | ```{r,message=FALSE}
185 | library(tidyverse)
186 | 
187 | glimpse(rainfall)
188 | glimpse(temperature)
189 | ```
190 | 
191 | If we look at the [TidyTuesday github repo](https://github.com/rfordatascience/tidytuesday/tree/master/data/2020#2020-data) from 2020, we see this dataset contains temperature and rainfall data from Australia.
192 | 
193 | ![](https://www.ga.gov.au/__data/assets/image/0005/12569/GA14206.jpg){.preview-image}
194 | 
195 | \[**Source**: [Geoscience Australia](https://www.ga.gov.au/scientific-topics/national-location-information/dimensions/climatic-extremes)\]
196 | 
197 | Here is a data dictionary for what all the column names mean:
198 | 
199 | -   <https://github.com/rfordatascience/tidytuesday/blob/master/data/2020/2020-01-07/readme.md#data-dictionary>
200 | 
201 | ### Tasks
202 | 
203 | Using the `rainfall` and `temperature` data, perform the following steps and create a new data frame called `df`:
204 | 
205 | 1.  Start with `rainfall` dataset and drop any rows with NAs.
206 | 2.  Create a new column titled `date` that combines the columns `year`, `month`, `day` into one column separated by "-". (e.g. "2020-01-01"). This column should not be a character, but should be recognized as a date. (**Hint**: check out the `ymd()` function in `lubridate` R package). You will also want to add a column that just keeps the `year`.
207 | 3.  Using the `city_name` column, convert the city names (character strings) to all upper case.
208 | 4.  Join this wrangled rainfall dataset with the `temperature` dataset such that it includes only observations that are in both data frames. (**Hint**: there are two keys that you will need to join the two datasets together). (**Hint**: If all has gone well thus far, you should have a dataset with 83,964 rows and 13 columns).
209 | 
210 | ::: callout-note
211 | -   You may need to use functions outside these packages to obtain this result, in particular you may find the functions `drop_na()` from `tidyr` and `str_to_upper()` function from `stringr` useful.
212 | :::
213 | 
214 | ```{r}
215 | # Add your solution here
216 | 
217 | ```
218 | 
219 | # Part 3: Data visualization
220 | 
221 | In this part, we will practice our `ggplot2` plotting skills within the tidyverse starting with our wrangled `df` data from Part 2. For full credit in this part (and for all plots that you make), your plots should include:
222 | 
223 | 1.  An overall title for the plot and a subtitle summarizing key trends that you found. Also include a caption in the figure.
224 | 2.  There should be an informative x-axis and y-axis label.
225 | 
226 | Consider playing around with the `theme()` function to make the figure shine, including playing with background colors, font, etc.
227 | 
228 | ### Part 3A: Plotting temperature data over time
229 | 
230 | Use the functions in `ggplot2` package to make a line plot of the max and min temperature (y-axis) over time (x-axis) for each city in our wrangled data from Part 2. You should only consider years 2014 and onwards. For full credit, your plot should include:
231 | 
232 | 1.  For a given city, the min and max temperature should both appear on the plot, but they should be two different colors.
233 | 2.  Use a facet function to facet by `city_name` to show all cities in one figure.
234 | 
235 | ```{r}
236 | # Add your solution here
237 | 
238 | ```
239 | 
240 | ### Part 3B: Plotting rainfall over time
241 | 
242 | Here we want to explore the distribution of rainfall (log scale) with histograms for a given city (indicated by the `city_name` column) for a given year (indicated by the `year` column) so we can make some exploratory plots of the data.
243 | 
244 | ::: callout-note
245 | You are again using the wrangled data from Part 2.
246 | :::
247 | 
248 | The following code plots the data from one city (`city_name == "PERTH"`) in a given year (`year == 2000`).
249 | 
250 | ```{r, eval=FALSE}
251 | df %>% 
252 |   filter(city_name == "PERTH", year == 2000) %>% 
253 |   ggplot(aes(log(rainfall))) + 
254 |     geom_histogram()
255 | ```
256 | 
257 | While this code is useful, it only provides us information on one city in one year. We could cut and paste this code to look at other cities/years, but that can be error prone and just plain messy.
258 | 
259 | The aim here is to **design** and **implement** a function that can be re-used to visualize all of the data in this dataset.
260 | 
261 | 1.  There are 2 aspects that may vary in the dataset: The **city_name** and the **year**. Note that not all combinations of `city_name` and `year` have measurements.
262 | 
263 | 2.  Your function should take as input two arguments **city_name** and **year**.
264 | 
265 | 3.  Given the input from the user, your function should return a **single** histogram for that input. Furthermore, the data should be **readable** on that plot so that it is in fact useful. It should be possible visualize the entire dataset with your function (through repeated calls to your function).
266 | 
267 | 4.  If the user enters an input that does not exist in the dataset, your function should catch that and report an error (via the `stop()` function).
268 | 
269 | For this section,
270 | 
271 | 1.  Write a short description of how you chose to design your function and why.
272 | 
273 | 2.  Present the code for your function in the R markdown document.
274 | 
275 | 3.  Include at least one example of output from your function.
276 | 
277 | ```{r}
278 | # Add your solution here
279 | 
280 | ```
281 | 
282 | # Part 4: Apply functions and plot
283 | 
284 | ### Part 4A: Tasks
285 | 
286 | In this part, we will apply the functions we wrote in Part 1 to our rainfall data starting with our wrangled `df` data from Part 2.
287 | 
288 | 1.  First, filter for only years including 2014 and onwards.
289 | 2.  For a given city and for a given year, calculate the sample mean (using your function `sample_mean()`), the sample standard deviation (using your function `sample_sd()`), and a 95% confidence interval for the average rainfall (using your function `calculate_CI()`). Specifically, you should add two columns in this summarized dataset: a column titled `lower_bound` and a column titled `upper_bound` containing the lower and upper bounds for you CI that you calculated (using your function `calculate_CI()`).
290 | 3.  Call this summarized dataset `rain_df`.
291 | 
292 | ```{r}
293 | # Add your solution here
294 | 
295 | ```
296 | 
297 | ### Part 4B: Tasks
298 | 
299 | Using the `rain_df`, plots the estimates of mean rainfall and the 95% confidence intervals on the same plot. There should be a separate faceted plot for each city. Think about using `ggplot()` with both `geom_point()` (and `geom_line()` to connect the points) for the means and `geom_errorbar()` for the lower and upper bounds of the confidence interval.
300 | 
301 | ```{r}
302 | # Add your solution here
303 | 
304 | ```
305 | 


--------------------------------------------------------------------------------
/projects/2022-10-04-project-3/index.qmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Project 3"
  3 | author: 
  4 |   - name: Stephanie Hicks
  5 |     url: https://stephaniehicks.com
  6 |     affiliation: Department of Biostatistics, Johns Hopkins
  7 |     affiliation_url: https://publichealth.jhu.edu
  8 | description: "Exploring album sales and sentiment of lyrics from Beyoncé and Taylor Swift"
  9 | date: 2022-10-04
 10 | categories: [project 3, projects]
 11 | ---
 12 | 
 13 | # Background
 14 | 
 15 | **Due date: October 21 at 11:59pm**
 16 | 
 17 | The goal of this assignment is to practice wrangling special data types (including dates, character strings, and factors) and visualizing results while practicing our tidyverse skills.
 18 | 
 19 | ### To submit your project
 20 | 
 21 | Please write up your project using R Markdown and processed with `knitr`. Compile your document as an **HTML file** and submit your HTML file to the dropbox on Courseplus. Please **show all your code** (i.e. make sure to set `echo = TRUE`) for each of the answers to each part.
 22 | 
 23 | # Load data
 24 | 
 25 | The datasets for this part of the assignment comes from [TidyTuesday](https://www.tidytuesday.com).
 26 | 
 27 | Data dictionary avaialble here:
 28 | 
 29 | -   <https://github.com/rfordatascience/tidytuesday/tree/master/data/2020/2020-09-29>
 30 | 
 31 | ![Beyoncé (left) and Taylor Swift (right)](https://akns-images.eonline.com/eol_images/Entire_Site/2019721/rs_1024x759-190821125112-1024.taylor-swift-beyonce-2009-mtv-vmas.ct.082119.jpg){preview="TRUE"}
 32 | 
 33 | Specifically, we will explore album sales and lyrics from two artists (Beyoncé and Taylor Swift), The data are available from TidyTuesday from September 2020, which I have provided for you below:
 34 | 
 35 | 
 36 | ```{r, eval=FALSE}
 37 | b_lyrics <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-09-29/beyonce_lyrics.csv')
 38 | ts_lyrics <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-09-29/taylor_swift_lyrics.csv')
 39 | sales <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-09-29/sales.csv')
 40 | ```
 41 | 
 42 | However, to avoid re-downloading data, we will check to see if those files already exist using an `if()` statement:
 43 | 
 44 | ```{r, message=FALSE}
 45 | library(here)
 46 | if(!file.exists(here("data","b_lyrics.RDS"))){
 47 |   b_lyrics <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-09-29/beyonce_lyrics.csv')
 48 |   ts_lyrics <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-09-29/taylor_swift_lyrics.csv')
 49 |   sales <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-09-29/sales.csv')
 50 |   
 51 |   # save the files to RDS objects
 52 |   saveRDS(b_lyrics, file = here("data","b_lyrics.RDS"))
 53 |   saveRDS(ts_lyrics, file = here("data","ts_lyrics.RDS"))
 54 |   saveRDS(sales, file = here("data","sales.RDS"))
 55 | }
 56 | ```
 57 | 
 58 | ::: callout-note
 59 | The above code will only run if it cannot find the path to the `b_lyrics.RDS` on your computer. Then, we can just read in these files every time we knit the R Markdown, instead of re-downloading them every time.
 60 | :::
 61 | 
 62 | Let's load the datasets
 63 | 
 64 | ```{r, eval=TRUE, message=FALSE}
 65 | b_lyrics <- readRDS(here("data","b_lyrics.RDS"))
 66 | ts_lyrics <- readRDS(here("data","ts_lyrics.RDS"))
 67 | sales <- readRDS(here("data","sales.RDS"))
 68 | ```
 69 | 
 70 | # Part 1: Explore album sales
 71 | 
 72 | In this section, the goal is to explore the sales of studio albums from Beyoncé and Taylor Swift.
 73 | 
 74 | **Notes**
 75 | 
 76 | -   In each of the subsections below that ask you to create a plot, you must create a title, subtitle, x-axis label, and y-axis label with units where applicable. For example, if your axis says "sales" as an axis label, change it to "sales (in millions)".
 77 | 
 78 | ## Part 1A
 79 | 
 80 | In this section, we will do some data wrangling.
 81 | 
 82 | 1.  Use `lubridate` to create a column called `released` that is a `Date` class. However, to be able to do this, you first need to use `stringr` to search for pattern that matches things like this "(US)\[51\]" in a string like this "September 1, 2006 (US)\[51\]" and removes them. (**Note**: to get full credit, you must create the regular expression).
 83 | 2.  Use `forcats` to create a factor called `country` (**Note**: you may need to collapse some factor levels).
 84 | 3.  Transform the `sales` into a unit that is album sales in millions of dollars.
 85 | 4.  Keep only album sales from the UK, the US or the World.
 86 | 5.  Auto print your final wrangled tibble data frame.
 87 | 
 88 | ```{r}
 89 | # Add your solution here
 90 | 
 91 | ```
 92 | 
 93 | ## Part 1B
 94 | 
 95 | In this section, we will do some more data wrangling followed by summarization using wrangled data from Part 1A.
 96 | 
 97 | 1.  Keep only album sales from the US.
 98 | 2.  Create a new column called `years_since_release` corresponding to the number of years since the release of each album from Beyoncé and Taylor Swift. This should be a whole number and you should round down to "14" if you get a non-whole number like "14.12" years. (**Hint**: you may find the `interval()` function from `lubridate` helpful here, but this not the only way to do this.)
 99 | 3.  Calculate the most recent, oldest, and the median years since albums were released for both Beyoncé and Taylor Swift.
100 | 
101 | ```{r}
102 | # Add your solution here
103 | 
104 | ```
105 | 
106 | ## Part 1C
107 | 
108 | Using the wrangled data from Part 1A:
109 | 
110 | 1.  Calculate the total album sales for each artist and for each `country` (only sales from the UK, US, and World).
111 | 2.  Using the total album sales, create a [percent stacked barchart](https://r-graph-gallery.com/48-grouped-barplot-with-ggplot2) using `ggplot2` of the percentage of sales of studio albums (in millions) along the y-axis for the two artists along the x-axis colored by the `country`.
112 | 
113 | ```{r}
114 | # Add your solution here
115 | 
116 | ```
117 | 
118 | ## Part 1D
119 | 
120 | Using the wrangled data from Part 1A, use `ggplot2` to create a bar plot for the sales of studio albums (in millions) along the x-axis for each of the album titles along the y-axis.
121 | 
122 | **Note**:
123 | 
124 | -   You only need to consider the global World sales (you can ignore US and UK sales for this part).
125 | -   The title of the album must be clearly readable along the y-axis.
126 | -   Each bar should be colored by which artist made that album.
127 | -   The bars should be ordered from albums with the most sales (top) to the least sales (bottom) (**Note**: you must use functions from `forcats` for this step).
128 | 
129 | ```{r}
130 | # Add your solution here
131 | 
132 | ```
133 | 
134 | ## Part 1E
135 | 
136 | Using the wrangled data from Part 1A, use `ggplot2` to create a scatter plot of sales of studio albums (in millions) along the y-axis by the released date for each album along the x-axis.
137 | 
138 | **Note**:
139 | 
140 | -   The points should be colored by the artist.
141 | -   There should be three scatter plots (one for UK, US and world sales) faceted by rows.
142 | 
143 | ```{r}
144 | # Add your solution here
145 | 
146 | ```
147 | 
148 | # Part 2: Exploring sentiment of lyrics
149 | 
150 | In Part 2, we will explore the lyrics in the `b_lyrics` and `ts_lyrics` datasets.
151 | 
152 | ## Part 2A
153 | 
154 | Using `ts_lyrics`, create a new column called `line` with one line containing the character string for each line of Taylor Swift's songs.
155 | 
156 | -   How many lines in Taylor Swift's lyrics contain the word "hello"? For full credit, show all the rows in `ts_lyrics` that have "hello" in the `line` column and report how many rows there are in total.
157 | -   How many lines in Taylor Swift's lyrics contain the word "goodbye"? For full credit, show all the rows in `ts_lyrics` that have "goodbye" in the `line` column and report how many rows there are in total.
158 | 
159 | ```{r}
160 | # Add your solution here
161 | 
162 | ```
163 | 
164 | ## Part 2B
165 | 
166 | Repeat the same analysis for `b_lyrics` as described in Part 2A.
167 | 
168 | ```{r}
169 | # Add your solution here
170 | 
171 | ```
172 | 
173 | ## Part 2C
174 | 
175 | Using the `b_lyrics` dataset,
176 | 
177 | 1.  Tokenize each lyrical line by words.
178 | 2.  Remove the "stopwords".
179 | 3.  Calculate the total number for each word in the lyrics.
180 | 4.  Using the "bing" sentiment lexicon, add a column to the summarized data frame adding the "bing" sentiment lexicon.
181 | 5.  Sort the rows from most frequent to least frequent words.
182 | 6.  Only keep the top 25 most frequent words.
183 | 7.  Auto print the wrangled tibble data frame.
184 | 8.  Use `ggplot2` to create a bar plot with the top words on the y-axis and the frequency of each word on the x-axis. Color each bar by the sentiment of each word from the "bing" sentiment lexicon. Bars should be ordered from most frequent on the top to least frequent on the bottom of the plot.
185 | 9.  Create a word cloud of the top 25 most frequent words.
186 | 
187 | ```{r}
188 | # Add your solution here
189 | 
190 | ```
191 | 
192 | ## Part 2D
193 | 
194 | Repeat the same analysis as above in Part 2C, but for `ts_lyrics`.
195 | 
196 | ```{r}
197 | # Add your solution here
198 | 
199 | ```
200 | 
201 | ## Part 2E
202 | 
203 | Using the `ts_lyrics` dataset,
204 | 
205 | 1.  Tokenize each lyrical line by words.
206 | 2.  Remove the "stopwords".
207 | 3.  Calculate the total number for each word in the lyrics **for each Album**.
208 | 4.  Using the "afinn" sentiment lexicon, add a column to the summarized data frame adding the "afinn" sentiment lexicon.
209 | 5.  Calculate the average sentiment score **for each Album**.
210 | 6.  Auto print the wrangled tibble data frame.
211 | 7.  Join the wrangled data frame from Part 1A (album sales in millions) with the wrangled data frame from #6 above (average sentiment score for each album).
212 | 8.  Using `ggplot2`, create a scatter plot of the average sentiment score for each album (y-axis) and the album release data along the x-axis. Make the size of each point the album sales in millions.
213 | 9.  Add a horizontal line at y-intercept=0.
214 | 10. Write 2-3 sentences interpreting the plot answering the question "How has the sentiment of Taylor Swift's albums have changed over time?". Add a title, subtitle, and useful axis labels.
215 | 
216 | ```{r}
217 | # Add your solution here
218 | 
219 | ```
220 | 


--------------------------------------------------------------------------------
/resources.qmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Resources"
 3 | ---
 4 | 
 5 | 
 6 | # Learning R 
 7 | 
 8 | - Big Book of R: https://www.bigbookofr.com
 9 | - List of resources to learn R (but also Python, SQL, Javascript): https://github.com/delabj/datacamp_alternatives/blob/master/index.md 
10 | - learnr4free. Resources (books, videos, interactive websites, papers) to learn R. Some of the resources are beginner-friendly and start with the installation process: https://www.learnr4free.com/en
11 | - Data Science with R by Danielle Navarro: https://robust-tools.djnavarro.net


--------------------------------------------------------------------------------
/scripts/make_flametree_icon.R:
--------------------------------------------------------------------------------
 1 | # color palette from https://github.com/dill/beyonce
 2 | library(here)
 3 | library(flametree)
 4 | 
 5 | # shades <- c("#A06AB4", "#FFD743", "#07BB9C", "#D773A2")
 6 | shades <- beyonce_palette(101)
 7 | flametree_grow(time = 12, trees = 2) %>% 
 8 |   flametree_plot(
 9 |     background = "black",
10 |     palette = shades, 
11 |     style = "plain") %>%
12 |   flametree_save(
13 |     filename = here("images", "cool_icon.png"),
14 |     height = 4,
15 |     width = 4
16 |   )
17 | 


--------------------------------------------------------------------------------
/styles.css:
--------------------------------------------------------------------------------
1 | /* css styles */
2 | 


--------------------------------------------------------------------------------
/videos/downloadRMac.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/videos/downloadRMac.gif


--------------------------------------------------------------------------------
/videos/downloadRStudio.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/videos/downloadRStudio.gif


--------------------------------------------------------------------------------
/videos/downloadRWindows.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/videos/downloadRWindows.gif


--------------------------------------------------------------------------------
/videos/versionstring.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/videos/versionstring.gif


--------------------------------------------------------------------------------