├── .github └── workflows │ └── publish.yml ├── .gitignore ├── README.md ├── _freeze ├── posts │ ├── 2022-08-30-introduction-to-gitgithub │ │ └── index │ │ │ └── execute-results │ │ │ └── html.json │ ├── 2022-08-30-introduction-to-r-and-rstudio │ │ └── index │ │ │ └── execute-results │ │ │ └── html.json │ ├── 2022-09-01-literate-programming │ │ └── index │ │ │ ├── execute-results │ │ │ └── html.json │ │ │ └── figure-html │ │ │ └── plot2-1.png │ ├── 2022-09-01-reference-management │ │ └── index │ │ │ └── execute-results │ │ │ └── html.json │ ├── 2022-09-01-reproducible-research │ │ └── index │ │ │ └── execute-results │ │ │ └── html.json │ ├── 2022-09-01-reproducible-reserach │ │ └── index │ │ │ └── execute-results │ │ │ └── html.json │ ├── 2022-09-06-managing-data-frames-with-tidyverse │ │ └── index │ │ │ └── execute-results │ │ │ └── html.json │ ├── 2022-09-06-reading-and-writing-data │ │ └── index │ │ │ └── execute-results │ │ │ └── html.json │ ├── 2022-09-08-joining-data-in-r │ │ └── index │ │ │ └── execute-results │ │ │ └── html.json │ ├── 2022-09-08-tidy-data-and-the-tidyverse │ │ └── index │ │ │ └── execute-results │ │ │ └── html.json │ ├── 2022-09-13-ggplot2-plotting-system-part-1 │ │ └── index │ │ │ ├── execute-results │ │ │ └── html.json │ │ │ └── figure-html │ │ │ ├── unnamed-chunk-1-1.png │ │ │ ├── unnamed-chunk-14-1.png │ │ │ ├── unnamed-chunk-15-1.png │ │ │ ├── unnamed-chunk-16-1.png │ │ │ ├── unnamed-chunk-17-1.png │ │ │ ├── unnamed-chunk-19-1.png │ │ │ ├── unnamed-chunk-2-1.png │ │ │ ├── unnamed-chunk-20-1.png │ │ │ ├── unnamed-chunk-21-1.png │ │ │ ├── unnamed-chunk-22-1.png │ │ │ ├── unnamed-chunk-23-1.png │ │ │ ├── unnamed-chunk-24-1.png │ │ │ ├── unnamed-chunk-26-1.png │ │ │ ├── unnamed-chunk-27-1.png │ │ │ ├── unnamed-chunk-28-1.png │ │ │ ├── unnamed-chunk-29-1.png │ │ │ ├── unnamed-chunk-30-1.png │ │ │ ├── unnamed-chunk-31-1.png │ │ │ ├── unnamed-chunk-32-1.png │ │ │ ├── unnamed-chunk-33-1.png │ │ │ ├── unnamed-chunk-34-1.png │ │ │ ├── unnamed-chunk-35-1.png │ │ │ ├── unnamed-chunk-36-1.png │ │ │ ├── unnamed-chunk-37-1.png │ │ │ ├── unnamed-chunk-4-1.png │ │ │ └── unnamed-chunk-5-1.png │ ├── 2022-09-13-plotting-systems │ │ └── index │ │ │ ├── execute-results │ │ │ └── html.json │ │ │ └── figure-html │ │ │ ├── unnamed-chunk-1-1.png │ │ │ ├── unnamed-chunk-2-1.png │ │ │ ├── unnamed-chunk-3-1.png │ │ │ ├── unnamed-chunk-5-1.png │ │ │ └── unnamed-chunk-8-1.png │ ├── 2022-09-15-ggplot2-plotting-system-part-2 │ │ └── index │ │ │ ├── execute-results │ │ │ └── html.json │ │ │ └── figure-html │ │ │ ├── unnamed-chunk-10-1.png │ │ │ ├── unnamed-chunk-11-1.png │ │ │ ├── unnamed-chunk-12-1.png │ │ │ ├── unnamed-chunk-13-1.png │ │ │ ├── unnamed-chunk-14-1.png │ │ │ ├── unnamed-chunk-15-1.png │ │ │ ├── unnamed-chunk-16-1.png │ │ │ ├── unnamed-chunk-17-1.png │ │ │ ├── unnamed-chunk-18-1.png │ │ │ ├── unnamed-chunk-19-1.png │ │ │ ├── unnamed-chunk-20-1.png │ │ │ ├── unnamed-chunk-21-1.png │ │ │ ├── unnamed-chunk-22-1.png │ │ │ ├── unnamed-chunk-3-1.png │ │ │ ├── unnamed-chunk-4-1.png │ │ │ ├── unnamed-chunk-5-1.png │ │ │ ├── unnamed-chunk-6-1.png │ │ │ ├── unnamed-chunk-8-1.png │ │ │ └── unnamed-chunk-9-1.png │ ├── 2022-09-20-r-nuts-and-bolts │ │ └── index │ │ │ └── execute-results │ │ │ └── html.json │ ├── 2022-09-22-control-structures │ │ └── index │ │ │ └── execute-results │ │ │ └── html.json │ ├── 2022-09-22-functions │ │ └── index │ │ │ └── execute-results │ │ │ └── html.json │ ├── 2022-09-27-loop-functions │ │ └── index │ │ │ └── execute-results │ │ │ └── html.json │ ├── 2022-09-29-debugging-r-code │ │ └── index │ │ │ └── execute-results │ │ │ └── html.json │ ├── 2022-09-29-error-handling-and-generation │ │ └── index │ │ │ └── execute-results │ │ │ └── html.json │ ├── 2022-10-04-working-with-dates-and-times │ │ └── index │ │ │ ├── execute-results │ │ │ └── html.json │ │ │ └── figure-html │ │ │ ├── unnamed-chunk-34-1.png │ │ │ ├── unnamed-chunk-35-1.png │ │ │ ├── unnamed-chunk-36-1.png │ │ │ ├── unnamed-chunk-37-1.png │ │ │ ├── unnamed-chunk-38-1.png │ │ │ ├── unnamed-chunk-39-1.png │ │ │ ├── unnamed-chunk-40-1.png │ │ │ └── unnamed-chunk-41-1.png │ ├── 2022-10-06-regular-expressions │ │ └── index │ │ │ └── execute-results │ │ │ └── html.json │ ├── 2022-10-11-working-with-factors │ │ └── index │ │ │ ├── execute-results │ │ │ └── html.json │ │ │ └── figure-html │ │ │ ├── unnamed-chunk-18-1.png │ │ │ ├── unnamed-chunk-19-1.png │ │ │ ├── unnamed-chunk-20-1.png │ │ │ ├── unnamed-chunk-21-1.png │ │ │ ├── unnamed-chunk-22-1.png │ │ │ ├── unnamed-chunk-23-1.png │ │ │ ├── unnamed-chunk-24-1.png │ │ │ ├── unnamed-chunk-24-2.png │ │ │ ├── unnamed-chunk-25-1.png │ │ │ ├── unnamed-chunk-26-1.png │ │ │ ├── unnamed-chunk-26-2.png │ │ │ ├── unnamed-chunk-27-1.png │ │ │ ├── unnamed-chunk-27-2.png │ │ │ ├── unnamed-chunk-28-1.png │ │ │ ├── unnamed-chunk-28-2.png │ │ │ ├── unnamed-chunk-29-1.png │ │ │ ├── unnamed-chunk-29-2.png │ │ │ ├── unnamed-chunk-30-1.png │ │ │ ├── unnamed-chunk-30-2.png │ │ │ └── unnamed-chunk-31-1.png │ ├── 2022-10-13-working-with-text-sentiment-analysis │ │ └── index │ │ │ ├── execute-results │ │ │ └── html.json │ │ │ └── figure-html │ │ │ ├── unnamed-chunk-22-1.png │ │ │ ├── unnamed-chunk-23-1.png │ │ │ ├── unnamed-chunk-24-1.png │ │ │ ├── unnamed-chunk-26-1.png │ │ │ ├── unnamed-chunk-27-1.png │ │ │ ├── unnamed-chunk-28-1.png │ │ │ ├── unnamed-chunk-35-1.png │ │ │ ├── unnamed-chunk-36-1.png │ │ │ ├── unnamed-chunk-37-1.png │ │ │ └── unnamed-chunk-38-1.png │ ├── 2022-10-18-best-practices-data-analyses │ │ └── index │ │ │ ├── execute-results │ │ │ └── html.json │ │ │ └── figure-html │ │ │ ├── unnamed-chunk-10-1.png │ │ │ ├── unnamed-chunk-11-1.png │ │ │ ├── unnamed-chunk-12-1.png │ │ │ ├── unnamed-chunk-13-1.png │ │ │ ├── unnamed-chunk-14-1.png │ │ │ ├── unnamed-chunk-15-1.png │ │ │ ├── unnamed-chunk-16-1.png │ │ │ ├── unnamed-chunk-17-1.png │ │ │ ├── unnamed-chunk-18-1.png │ │ │ ├── unnamed-chunk-19-1.png │ │ │ ├── unnamed-chunk-2-1.png │ │ │ ├── unnamed-chunk-20-1.png │ │ │ ├── unnamed-chunk-21-1.png │ │ │ ├── unnamed-chunk-22-1.png │ │ │ ├── unnamed-chunk-23-1.png │ │ │ ├── unnamed-chunk-24-1.png │ │ │ ├── unnamed-chunk-25-1.png │ │ │ ├── unnamed-chunk-26-1.png │ │ │ ├── unnamed-chunk-28-1.png │ │ │ ├── unnamed-chunk-29-1.png │ │ │ ├── unnamed-chunk-3-1.png │ │ │ ├── unnamed-chunk-4-1.png │ │ │ ├── unnamed-chunk-5-1.png │ │ │ ├── unnamed-chunk-6-1.png │ │ │ ├── unnamed-chunk-7-1.png │ │ │ ├── unnamed-chunk-8-1.png │ │ │ └── unnamed-chunk-9-1.png │ ├── 2022-10-20-python-for-r-users │ │ └── index │ │ │ ├── execute-results │ │ │ └── html.json │ │ │ └── figure-html │ │ │ ├── unnamed-chunk-25-1.png │ │ │ └── unnamed-chunk-26-1.png │ ├── post-with-code │ │ └── index │ │ │ └── execute-results │ │ │ └── html.json │ └── welcome │ │ └── index │ │ └── execute-results │ │ └── html.json ├── projects │ ├── 2022-08-30-project-0 │ │ ├── index │ │ │ └── execute-results │ │ │ │ └── html.json │ │ └── project-0 │ │ │ └── execute-results │ │ │ └── html.json │ ├── 2022-09-06-project-1 │ │ └── index │ │ │ ├── execute-results │ │ │ └── html.json │ │ │ └── figure-html │ │ │ └── unnamed-chunk-20-1.png │ ├── 2022-09-15-project-2 │ │ └── index │ │ │ └── execute-results │ │ │ └── html.json │ └── 2022-10-04-project-3 │ │ └── index │ │ └── execute-results │ │ └── html.json ├── schedule │ └── execute-results │ │ └── html.json ├── site_libs │ ├── clipboard │ │ └── clipboard.min.js │ └── quarto-listing │ │ ├── list.min.js │ │ └── quarto-listing.js └── syllabus │ └── execute-results │ └── html.json ├── _post_template.qmd ├── _quarto.yml ├── data ├── 2016-07-19.csv.bz2 ├── b_lyrics.RDS ├── bmi_pm25_no2_sim.csv ├── chicago.rds ├── chocolate.RDS ├── flights.csv ├── maacs_sim.csv ├── sales.RDS ├── storms_2004.csv.gz ├── team_standings.csv ├── ts_lyrics.RDS ├── tuesdata_rainfall.RDS └── tuesdata_temperature.RDS ├── images ├── cool_icon.png ├── dominici_ehp.png ├── dspipeline.png ├── happygitwithr.png ├── lippman.png ├── peng_preface.png └── phdversioncontrol.gif ├── index.qmd ├── jhustatcomputing2022.Rproj ├── lectures.qmd ├── posts ├── 2022-08-30-introduction-to-gitgithub │ └── index.qmd ├── 2022-08-30-introduction-to-r-and-rstudio │ └── index.qmd ├── 2022-09-01-literate-programming │ ├── index.qmd │ └── my-refs.bib ├── 2022-09-01-reference-management │ ├── index.qmd │ └── my-refs.bib ├── 2022-09-01-reproducible-research │ └── index.qmd ├── 2022-09-06-managing-data-frames-with-tidyverse │ └── index.qmd ├── 2022-09-06-reading-and-writing-data │ └── index.qmd ├── 2022-09-08-joining-data-in-r │ └── index.qmd ├── 2022-09-08-tidy-data-and-the-tidyverse │ └── index.qmd ├── 2022-09-13-ggplot2-plotting-system-part-1 │ └── index.qmd ├── 2022-09-13-plotting-systems │ └── index.qmd ├── 2022-09-15-ggplot2-plotting-system-part-2 │ └── index.qmd ├── 2022-09-20-r-nuts-and-bolts │ └── index.qmd ├── 2022-09-22-control-structures │ └── index.qmd ├── 2022-09-22-functions │ └── index.qmd ├── 2022-09-27-loop-functions │ └── index.qmd ├── 2022-09-29-debugging-r-code │ └── index.qmd ├── 2022-09-29-error-handling-and-generation │ └── index.qmd ├── 2022-10-04-working-with-dates-and-times │ └── index.qmd ├── 2022-10-06-regular-expressions │ └── index.qmd ├── 2022-10-11-working-with-factors │ └── index.qmd ├── 2022-10-13-working-with-text-sentiment-analysis │ └── index.qmd ├── 2022-10-18-best-practices-data-analyses │ └── index.qmd ├── 2022-10-20-python-for-r-users │ └── index.qmd ├── _metadata.yml └── welcome │ └── index.qmd ├── profile.jpg ├── projects.qmd ├── projects ├── 2022-08-30-project-0 │ └── index.qmd ├── 2022-09-06-project-1 │ └── index.qmd ├── 2022-09-15-project-2 │ └── index.qmd └── 2022-10-04-project-3 │ └── index.qmd ├── resources.qmd ├── schedule.qmd ├── scripts └── make_flametree_icon.R ├── styles.css ├── syllabus.qmd └── videos ├── downloadRMac.gif ├── downloadRStudio.gif ├── downloadRWindows.gif └── versionstring.gif /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | on: 2 | workflow_dispatch: 3 | push: 4 | branches: main 5 | 6 | name: Quarto Publish 7 | 8 | jobs: 9 | build-deploy: 10 | runs-on: ubuntu-latest 11 | permissions: 12 | contents: write 13 | steps: 14 | - name: Check out repository 15 | uses: actions/checkout@v2 16 | 17 | - name: Set up Quarto 18 | uses: quarto-dev/quarto-actions/setup@v2 19 | 20 | - name: Render and Publish 21 | uses: quarto-dev/quarto-actions/publish@v2 22 | with: 23 | target: gh-pages 24 | env: 25 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .Ruserdata 5 | 6 | /.quarto/ 7 | /_site/ -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Statistical Computing 2 | 3 | This repo is for the Johns Hopkins BSPH course titled 'Statistical Computing' (BSPH 140.776.01) in Fall 2022. 4 | 5 | ## Instructor 6 | 7 | - Stephanie C. Hicks () 8 | 9 | ## Teaching Assistants 10 | 11 | - Phyllis Wei 12 | - Joe Sartini 13 | 14 | # Bugs 15 | 16 | Github pull requests are welcome for bugs 17 | -------------------------------------------------------------------------------- /_freeze/posts/2022-08-30-introduction-to-gitgithub/index/execute-results/html.json: -------------------------------------------------------------------------------- 1 | { 2 | "hash": "82133b65958f2f7d2f5fb41a48634859", 3 | "result": { 4 | "markdown": "---\ntitle: \"Introduction to git/GitHub\"\nauthor: \n - name: Stephanie Hicks\n url: https://stephaniehicks.com\n affiliation: Department of Biostatistics, Johns Hopkins\n affiliation_url: https://publichealth.jhu.edu\ndescription: \"Version control is a game changer; or how I learned to love git/GitHub\"\ndate: 2022-08-30\nimage: \"../../images/happygitwithr.png\"\ncategories: [module 1, week 1, programming, version control, git, GitHub]\n---\n\n\n\n\n# Pre-lecture materials\n\n### Read ahead\n\n::: callout-note\n## Read ahead\n\n**Before class, you can prepare by reading the following materials:**\n\n1. [Happy Git with R](https://happygitwithr.com) from Jenny Bryan\n2. [Chapter on git and GitHub in `dsbook`](https://rafalab.github.io/dsbook/git.html) from Rafael Irizarry\n:::\n\n### Acknowledgements\n\nMaterial for this lecture was borrowed and adopted from\n\n- \n\n# Learning objectives\n\n::: callout-note\n# Learning objectives\n\n**At the end of this lesson you will:**\n\n- Know what Git and GitHub are.\n- Know why one might want to use them.\n- Have created and set up a GitHub account.\n:::\n\n# Introduction to git/GitHub\n\nThis document gives a brief explanation of GitHub and how we will use it for this course.\n\n### git\n\n*Git* is what is called a *version control system* for file management. The main idea is that as you (and your collaborators) work on a project, the software tracks, and records any changes made by anyone.\n\n- Similar to the \"track changes\" features in Microsoft Word, but more rigorous, powerful, and scaled up to multiple files\n- Great for solo or collaborative work\n\n### GitHub\n\n*GitHub* is a hosting service on internet for git-aware folders and projects\n\n- Similar to the DropBox or Google, but more structured, powerful, and programmatic\n- Great for solo or collaborative work!\n- Technically *GitHub* is distinct from *Git*. However, *GitHub* is in some sense the interface and *Git* the underlying engine (a bit like *RStudio* and *R*).\n\nSince we will only be using *Git* through *GitHub*, I tend to not distinguish between the two. In the following, I refer to all of it as just *GitHub*. Note that other interfaces to *Git* exist, e.g., *Bitbucket*, but *GitHub* is the most widely used one.\n\n### Why use git/GitHub?\n\nYou want to use *GitHub* to avoid this:\n\n\n::: {.cell}\n::: {.cell-output-display}\n![How not to use GitHub [image from PhD Comics]](../../images/phdversioncontrol.gif){width=80%}\n:::\n:::\n\n\n\\[[Source: PhD Comics](http://phdcomics.com/comics/archive_print.php?comicid=1531)\\]\n\n*GitHub* gives you a clean way to track your projects. It is also very well suited to collaborative work. Historically, version control was used for software development. However, it has become broader and is now used for many types of projects, including data science projects.\n\nTo learn a bit more about Git/GitHub and why you might want to use it, [read this article by Jenny Bryan](https://peerj.com/preprints/3159/).\n\n**Note her explanation of what's special with the `README.md` file on GitHub.**\n\n### What to (not) do\n\n**GitHub is ideal if** you have a project with a fair number of files, most of those files are text files (such as code, LaTeX, (R)markdown, etc.) and different people work on different parts of the project.\n\n**GitHub is less useful if** you have a lot of non-text files (e.g. Word or Powerpoint) and different team members might want to edit the same document at the same time. In that instance, a solution like Google Docs, Word+Dropbox, Word+Onedrive, etc. might be better.\n\n### How to use Git/GitHub\n\nGit and GitHub is fundamentally **based on commands you type into the command line**. Lots of online resources show you how to use the command line. This is the most powerful, and the way I almost always interact with git/GitHub. However, many folks find this the most confusing way to use git/GitHub. Alternatively, there are graphical interfaces.\n\n- [GitHub itself](https://desktop.github.com/) provides a grapical interface with basic functionality.\n- RStudio also has [Git/GitHub integration](https://happygitwithr.com/rstudio-git-github.html). Of course this only works for R project GitHub integration.\n- There are also third party GitHub clients with many advanced features, most of which you won't need initially, but might eventually.\n\n**Note**: As student, you can (and should) upgrade to the Pro version of GitHub for free (i.e. access to unlimited private repositories is one benefit), see the [GitHub student developer pack](https://education.github.com/pack) on how to do this.\n\n# Getting Started\n\nOne of my favorite resources for getting started with git/GitHub is the Happy Git with R from Jenny Bryan:\n\n- \n\n\n::: {.cell}\n::: {.cell-output-display}\n![A screenshot of the Happy Git with R online book from Jenny Bryan](../../images/happygitwithr.png){width=80%}\n:::\n:::\n\n\nIt truly is one of the **best resources** out there for getting started with git/GitHub, especially with the integration to RStudio. Therefore, at this point, I will encourage all of you to go read through the online book.\n\nSome of you may only need to skim it, others will need to spend some time reading through it. Either way, I will bet that you won't regret the time investment.\n\n# Using git/GitHub in our course\n\nIn this course, you will use git/GitHub in the following ways:\n\n1. Project 0 (optional) - You will create a website introducing yourself to folks in the course and deploy it on GitHub.\n2. Projects 1-3 - You can practice using git locally (on your compute environment) to track your changes over time and, if you wish (but highly suggested), you can practice pushing your project solutions to a private GitHub repository on your GitHub account (i.e. `git add`, `git commit`, `git push`, `git pull`, etc) .\n\nLearning these skills will be useful down the road if you ever work collaboratively on a project (i.e. writing code as a group). In this scenario, you will use the skills you have been practicing in your projects to work together as a team in a single GitHub repository.\n\n# Post-lecture materials\n\n### Final Questions\n\nHere are some post-lecture questions to help you think about the material discussed.\n\n::: callout-note\n### Questions\n\n1. What is version control?\n\n2. What is the difference between git and GitHub?\n\n3. What are other version controls software/tools that are available besides git?\n:::\n\n### Additional Resources\n\n::: callout-tip\n- [git and GitHub in the `dsbook`](https://rafalab.github.io/dsbook/git.html) by Rafael Irizarry\n:::\n\n## rtistry\n\n\n::: {.cell .fig-cap-location-top}\n::: {.cell-output-display}\n![](https://github.com/djnavarro/art/raw/master/static/gallery/flametree/extra/001_flametree_20_13.jpg)\n:::\n:::\n\n\n\\['Flametree' from Danielle Navarro \\]\n", 5 | "supporting": [], 6 | "filters": [ 7 | "rmarkdown/pagebreak.lua" 8 | ], 9 | "includes": {}, 10 | "engineDependencies": {}, 11 | "preserve": {}, 12 | "postProcess": true 13 | } 14 | } -------------------------------------------------------------------------------- /_freeze/posts/2022-09-01-literate-programming/index/figure-html/plot2-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-01-literate-programming/index/figure-html/plot2-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-09-01-reference-management/index/execute-results/html.json: -------------------------------------------------------------------------------- 1 | { 2 | "hash": "5e45cd6390aa1160c6f9121529e1dd49", 3 | "result": { 4 | "markdown": "---\ntitle: \"Reference management\"\nauthor: \n - name: Stephanie Hicks\n url: https://stephaniehicks.com\n affiliation: Department of Biostatistics, Johns Hopkins\n affiliation_url: https://publichealth.jhu.edu\ndescription: \"How to use citations and incorporate references from a bibliography in R Markdown.\"\ndate: 2022-09-01\ncategories: [module 1, week 1, R Markdown, programming]\nbibliography: my-refs.bib\n---\n\n\n\n\n# Pre-lecture materials\n\n### Read ahead\n\n::: callout-note\n## Read ahead\n\n**Before class, you can prepare by reading the following materials:**\n\n1. Authoring in [R Markdown from RStudio](https://rmarkdown.rstudio.com/authoring_bibliographies_and_citations.html)\n2. Citations from [Reproducible Research in R](https://monashdatafluency.github.io/r-rep-res/citations.html) from the [Monash Data Fluency](https://monashdatafluency.github.io) initiative\n3. Bibliography from [R Markdown Cookbook](https://bookdown.org/yihui/rmarkdown-cookbook/bibliography.html)\n:::\n\n### Acknowledgements\n\nMaterial for this lecture was borrowed and adopted from\n\n- \n- \n- \n- \n\n# Learning objectives\n\n::: callout-note\n# Learning objectives\n\n**At the end of this lesson you will:**\n\n- Know what types of bibliography file formats can be used in a R Markdown file\n- Learn how to add citations to a R Markdown file\n- Know how to change the citation style (e.g. APA, Chicago, etc)\n:::\n\n# Introduction\n\nFor almost any data analysis, especially if it is meant for publication in the academic literature, you will have to cite other people's work and include the references (bibliographies or citations) in your work. In this class, you are likely to need to include references and cite other people's work like in a regular research paper.\n\nR provides nice function `citation()` that helps us generating citation blob for R packages that we have used. Let's try generating citation text for rmarkdown package by using the following command\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncitation(\"rmarkdown\")\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n\nTo cite the 'rmarkdown' package in publications, please use:\n\n JJ Allaire and Yihui Xie and Jonathan McPherson and Javier Luraschi\n and Kevin Ushey and Aron Atkins and Hadley Wickham and Joe Cheng and\n Winston Chang and Richard Iannone (2022). rmarkdown: Dynamic\n Documents for R. R package version 2.15. URL\n https://rmarkdown.rstudio.com.\n\n Yihui Xie and J.J. Allaire and Garrett Grolemund (2018). R Markdown:\n The Definitive Guide. Chapman and Hall/CRC. ISBN 9781138359338. URL\n https://bookdown.org/yihui/rmarkdown.\n\n Yihui Xie and Christophe Dervieux and Emily Riederer (2020). R\n Markdown Cookbook. Chapman and Hall/CRC. ISBN 9780367563837. URL\n https://bookdown.org/yihui/rmarkdown-cookbook.\n\nTo see these entries in BibTeX format, use 'print(,\nbibtex=TRUE)', 'toBibtex(.)', or set\n'options(citation.bibtex.max=999)'.\n```\n:::\n:::\n\n\nI assume you are familiar with how citing references works, and hopefully, you are already using a reference manager. If not, let me know in the discussion boards.\n\nTo have something that plays well with R Markdown, you need file format that stores all the references. Click here to learn more other possible file formats available to you to use within a R Markdown file:\n\n- \n\n### Citation management software\n\nAs you can see, there are ton of file formats including `.medline` (MEDLINE), `.bib` (BibTeX), `.ris` (RIS), `.enl` (EndNote).\n\nI will not discuss underlying citational management software itself, but I will talk briefly how you might create one of these file formats.\n\nIf you recall the output from `citation(\"rmarkdown\")` above, we might consider manually copying and pasting the output into a citation management software, but instead we can use `write_bib()` function from `knitr` package to create a bibliography file ending in `.bib`.\n\nLet's run the following code in order to generate a `my-refs.bib` file\n\n\n::: {.cell}\n\n```{.r .cell-code}\nknitr::write_bib(\"rmarkdown\", file = \"my-refs.bib\")\n```\n:::\n\n\nNow we can see we have the file saved locally.\n\n\n::: {.cell}\n\n```{.r .cell-code}\nlist.files()\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n[1] \"index.qmd\" \"index.rmarkdown\" \"my-refs.bib\" \n```\n:::\n:::\n\n\nIf you open up the `my-refs.bib` file, you will see\n\n @Manual{R-rmarkdown,\n title = {rmarkdown: Dynamic Documents for R},\n author = {JJ Allaire and Yihui Xie and Jonathan McPherson and Javier Luraschi and Kevin Ushey and Aron Atkins and Hadley Wickham and Joe Cheng and Winston Chang and Richard Iannone},\n year = {2021},\n note = {R package version 2.8},\n url = {https://CRAN.R-project.org/package=rmarkdown},\n }\n\n @Book{rmarkdown2018,\n title = {R Markdown: The Definitive Guide},\n author = {Yihui Xie and J.J. Allaire and Garrett Grolemund},\n publisher = {Chapman and Hall/CRC},\n address = {Boca Raton, Florida},\n year = {2018},\n note = {ISBN 9781138359338},\n url = {https://bookdown.org/yihui/rmarkdown},\n }\n\n @Book{rmarkdown2020,\n title = {R Markdown Cookbook},\n author = {Yihui Xie and Christophe Dervieux and Emily Riederer},\n publisher = {Chapman and Hall/CRC},\n address = {Boca Raton, Florida},\n year = {2020},\n note = {ISBN 9780367563837},\n url = {https://bookdown.org/yihui/rmarkdown-cookbook},\n }\n\n::: resources\n**Note there are three keys that we will use later on**:\n\n- `R-rmarkdown`\n- `rmarkdown2018`\n- `rmarkdown2020`\n:::\n\n### Linking `.bib` file with `.rmd` (and `.qmd`) files\n\nIn order to use references within a R Markdown file, you will need to specify the name and a location of a bibliography file using the bibliography metadata field in a YAML metadata section. For example:\n\n``` yaml\n---\ntitle: \"My top ten favorite R packages\"\noutput: html_document\nbibliography: my-refs.bib\n---\n```\n\nYou can include multiple reference files using the following syntax, alternatively you can concatenate two bib files into one.\n\n``` yaml\n---\nbibliography: [\"my-refs1.bib\", \"my-refs2.bib\"]\n---\n```\n\n### Inline citation\n\nNow we can start using those bib keys that we have learned just before, using the following syntax\n\n- `[@key]` for single citation\n- `[@key1; @key2]` multiple citation can be separated by semi-colon\n- `[-@key]` in order to suppress author name, and just display the year\n- `[see @key1 p 12; also this ref @key2]` is also a valid syntax\n\nLet's start by citing the `rmarkdown` package using the following code and press `Knit` button:\n\n------------------------------------------------------------------------\n\nI have been using the amazing Rmarkdown package [@R-rmarkdown]! I should also go and read [@rmarkdown2018; and @rmarkdown2020] books.\n\n------------------------------------------------------------------------\n\nPretty cool, eh??\n\n### Citation styles\n\nBy default, Pandoc will use a Chicago author-date format for citations and references.\n\nTo use another style, you will need to specify a CSL (Citation Style Language) file in the `csl` metadata field, e.g.,\n\n``` yaml\n---\ntitle: \"My top ten favorite R packages\"\noutput: html_document\nbibliography: my-refs.bib\ncsl: biomed-central.csl\n---\n```\n\n::: resources\nTo find your required formats, we recommend using the [Zotero Style Repository](https://www.zotero.org/styles), which makes it easy to search for and download your desired style.\n:::\n\nCSL files can be tweaked to meet custom formatting requirements. For example, we can change the number of authors required before \"et al.\" is used to abbreviate them. This can be simplified through the use of visual editors such as the one available at https://editor.citationstyles.org.\n\n### Other cool features\n\n#### Add an item to a bibliography without using it\n\nBy default, the bibliography will only display items that are directly referenced in the document. If you want to include items in the bibliography without actually citing them in the body text, you can define a dummy nocite metadata field and put the citations there.\n\n``` yaml\n---\nnocite: |\n @item1, @item2\n---\n```\n\n#### Add all items to the bibliography\n\nIf we do not wish to explicitly state all of the items within the bibliography but would still like to show them in our references, we can use the following syntax:\n\n``` yaml\n---\nnocite: '@*'\n---\n```\n\nThis will force all items to be displayed in the bibliography.\n\n::: resources\nYou can also have an appendix appear after bibliography. For more on this, see:\n\n- \n:::\n\n# Other useful tips\n\nWe have learned that inside your file that contains all your references (e.g. `my-refs.bib`), typically each reference gets a key, which is a shorthand that is generated by the reference manager or you can create yourself.\n\nFor instance, I use a format of lower-case first author last name followed by 4 digit year for each reference followed by a keyword (e.g name of a software package). Alternatively, you can omit the keyword. But note that if I cite a paper by the same first author that was published in the same year, then a lower case letter is added to the end. For instance, for a paper that I wrote as 1st author in 2010, my bibtex key might be `hicks2022` or `hicks2022a`. You can decide what scheme to use, just pick one and use it *forever*.\n\nIn your R Markdown document, you can then cite the reference by adding the key, such as `...in the paper by Hicks et al. [@hicks2022]...`.\n\n# Post-lecture materials\n\n### Practice\n\nHere are some post-lecture tasks to practice some of the material discussed.\n\n::: callout-note\n### Questions\n\n**Try out the following:**\n\n1. What do you notice that's different when you run `citation(\"tidyverse\")` (compared to `citation(\"rmarkdown\")`)?\n\n2. Install the following packages:\n\n\n::: {.cell}\n\n```{.r .cell-code}\ninstall.packages(c(\"bibtex\", \"RefManageR\")\n```\n:::\n\n\nWhat do they do? How might they be helpful to you in terms of reference management?\n\n3. Instead of using a `.bib` file, try using a different bibliography file format in an R Markdown document.\n\n4. Practice using a different CSL file to change the citation style.\n:::\n\n### Additional Resources\n\n::: callout-tip\n- Add here.\n:::\n\n## rtistry\n\n\n::: {.cell .fig-cap-location-top}\n\n:::\n\n\n\\[Add here.\\]\n", 5 | "supporting": [], 6 | "filters": [ 7 | "rmarkdown/pagebreak.lua" 8 | ], 9 | "includes": {}, 10 | "engineDependencies": {}, 11 | "preserve": {}, 12 | "postProcess": true 13 | } 14 | } -------------------------------------------------------------------------------- /_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-1-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-14-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-14-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-15-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-15-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-16-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-16-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-17-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-17-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-19-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-19-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-2-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-2-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-20-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-20-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-21-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-21-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-22-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-22-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-23-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-23-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-24-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-24-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-26-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-26-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-27-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-27-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-28-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-28-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-29-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-29-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-30-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-30-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-31-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-31-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-32-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-32-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-33-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-33-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-34-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-34-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-35-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-35-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-36-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-36-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-37-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-37-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-4-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-4-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-5-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-13-ggplot2-plotting-system-part-1/index/figure-html/unnamed-chunk-5-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-09-13-plotting-systems/index/execute-results/html.json: -------------------------------------------------------------------------------- 1 | { 2 | "hash": "98d9a4042479184cca076761ad82ddfe", 3 | "result": { 4 | "markdown": "---\ntitle: \"Plotting Systems\"\nauthor: \n - name: Stephanie Hicks\n url: https://stephaniehicks.com\n affiliation: Department of Biostatistics, Johns Hopkins\n affiliation_url: https://publichealth.jhu.edu\ndescription: \"Overview of three plotting systems in R\"\ndate: 2022-09-13\ndraft: true\ncategories: [module 1, week 3, R, programming, ggplot2, data viz]\n---\n\n\n> The data may not contain the answer. \n> And, if you torture the data long enough,\n> it will tell you anything. ---*John W. Tukey*\n\n\n# Pre-lecture materials\n\n### Read ahead\n\n::: callout-note\n## Read ahead\n\n**Before class, you can prepare by reading the following materials:**\n\n1. \n2. Paul Murrell (2011). *R Graphics*, CRC Press.\n3. Hadley Wickham (2009). *ggplot2*, Springer.\n4. Deepayan Sarkar (2008). *Lattice: Multivariate Data Visualization with R*, Springer.\n\n:::\n\n### Acknowledgements\n\nMaterial for this lecture was borrowed and adopted from\n\n- \n\n# Learning objectives\n\n::: callout-note\n# Learning objectives\n\n**At the end of this lesson you will:**\n\n- Be able to identify and describe the three plotting systems in R\n:::\n\n# Plotting Systems\n\nThere are **three different plotting systems in R** and they each have different characteristics and modes of operation. \n\n:::{.callout-tip}\n\n### Important \n\nThe three systems are \n\n1. The base plotting system\n2. The lattice system\n3. The ggplot2 system \n\n**This course will focus primarily on the ggplot2 plotting system**. The other two systems are presented for context.\n\n:::\n\n\n## The Base Plotting System\n\nThe **base plotting system** is the original plotting system for R. The basic model is sometimes **referred to as the \"artist's palette\" model**. \n\nThe idea is you start with blank canvas and build up from there. \n\nIn more R-specific terms, you **typically start with `plot()` function** (or similar plot creating function) to *initiate* a plot and then *annotate* the plot with various annotation functions (`text`, `lines`, `points`, `axis`)\n\nThe base plotting system is **often the most convenient plotting system** to use because it mirrors how we sometimes think of building plots and analyzing data. \n\nIf we do not have a completely well-formed idea of how we want to look at some data, often we will start by \"throwing some data on the page\" and then slowly add more information to it as our thought process evolves. \n\n:::{.callout-tip}\n\n### Example\n\nWe might look at a simple scatterplot and then decide to add a linear regression line or a smoother to it to highlight the trends.\n\n\n::: {.cell}\n\n```{.r .cell-code}\ndata(airquality)\nwith(airquality, {\n plot(Temp, Ozone)\n lines(loess.smooth(Temp, Ozone))\n})\n```\n\n::: {.cell-output-display}\n![Scatterplot with loess curve](index_files/figure-html/unnamed-chunk-1-1.png){width=480}\n:::\n:::\n\n\n:::\n\nIn the code above: \n\n- The `plot()` function creates the initial plot and draws the points (circles) on the canvas.\n- The `lines` function is used to annotate or add to the plot (in this case it adds a loess smoother to the scatterplot).\n\nNext, we use the `plot()` function to draw the points on the scatterplot and then use the `main` argument to add a main title to the plot. \n\n\n::: {.cell}\n\n```{.r .cell-code}\ndata(airquality)\nwith(airquality, {\n plot(Temp, Ozone, main = \"my plot\")\n lines(loess.smooth(Temp, Ozone))\n})\n```\n\n::: {.cell-output-display}\n![Scatterplot with loess curve](index_files/figure-html/unnamed-chunk-2-1.png){width=480}\n:::\n:::\n\n\n:::{.callout-tip}\n\n### Note\n\nOne downside with constructing base plots is that you **cannot go backwards once the plot has started**. \n\nIt is possible that you could start down the road of constructing a plot and realize later (when it is too late) that you do not have enough room to add a y-axis label or something like that\n\n:::\n\nIf you have specific plot in mind, there is then a need to **plan in advance** to make sure, for example, that you have set your margins to be the right size to fit all of the annotations that you may want to include. \n\nWhile the base plotting system is nice in that it gives you the flexibility to specify these kinds of details to painstaking accuracy, **sometimes it would be nice if the system could just figure it out for you**.\n\n:::{.callout-tip}\n\n### Note\n\nAnother downside of the base plotting system is that it is **difficult to describe or translate a plot to others because there is no clear graphical language or grammar** that can be used to communicate what you have done. \n\nThe only real way to describe what you have done in a base plot is to just list the series of commands/functions that you have executed, which is not a particularly compact way of communicating things. \n\nThis is one problem that the `ggplot2` package attempts to address.\n\n:::\n\n\n:::{.callout-tip}\n\n### Example\n\nAnother typical base plot is constructed with the following code.\n\n\n::: {.cell}\n\n```{.r .cell-code}\ndata(cars)\n\n## Create the plot / draw canvas\nwith(cars, plot(speed, dist))\n\n## Add annotation\ntitle(\"Speed vs. Stopping distance\")\n```\n\n::: {.cell-output-display}\n![Base plot with title](index_files/figure-html/unnamed-chunk-3-1.png){width=480}\n:::\n:::\n\n\n:::\n\nWe will go into more detail on what these functions do in later lessons.\n\n\n## The Lattice System\n\nThe **lattice plotting system** is implemented in the `lattice` R package which comes with every installation of R (although it is not loaded by default).\n\nTo **use the lattice plotting functions**, you must first load the `lattice` package with the `library` function.\n\n\n::: {.cell}\n\n```{.r .cell-code}\nlibrary(lattice)\n```\n:::\n\n\nWith the lattice system, **plots are created with a single function call**, such as `xyplot()` or `bwplot()`. \n\nThere is **no real distinction between functions that create or initiate plots** and **functions that annotate plots** because it all happens at once.\n\nLattice plots tend to be **most useful for conditioning types of plots**, i.e. looking at how `y` changes with `x` across levels of `z`. \n\n- e.g. these types of plots are useful for looking at multi-dimensional data and often allow you to squeeze a lot of information into a single window or page.\n\nAnother aspect of lattice that makes it different from base plotting is that **things like margins and spacing are set automatically**. \n\nThis is possible because entire plot is specified at once via a single function call, so all of the available information needed to figure out the spacing and margins is already there.\n\n\n:::{.callout-tip}\n\n### Example\n\nHere is a lattice plot that looks at the relationship between life expectancy and income and how that relationship varies by region in the United States. \n\n\n::: {.cell}\n\n```{.r .cell-code}\nstate <- data.frame(state.x77, region = state.region)\nxyplot(Life.Exp ~ Income | region, data = state, layout = c(4, 1))\n```\n\n::: {.cell-output-display}\n![Lattice plot](index_files/figure-html/unnamed-chunk-5-1.png){width=768}\n:::\n:::\n\n\n:::\n\nYou can see that the entire plot was generated by the call to `xyplot()` and all of the data for the plot were stored in the `state` data frame. \n\nThe **plot itself contains four panels**---one for each region---and **within each panel is a scatterplot** of life expectancy and income. \n\nThe notion of *panels* comes up a lot with lattice plots because you typically have many panels in a lattice plot (each panel typically represents a *condition*, like \"region\").\n\n:::{.callout-tip}\n\n### Note\n\nDownsides with the lattice system \n\n- It can sometimes be very **awkward to specify an entire plot** in a single function call (you end up with functions with many many arguments). \n- **Annotation in panels in plots is not especially intuitive** and can be difficult to explain. In particular, the use of custom panel functions and subscripts can be difficult to wield and requires intense preparation. \n- Once a plot is created, **you cannot \"add\" to the plot** (but of course you can just make it again with modifications).\n\n:::\n\n\n## The ggplot2 System\n\nThe **ggplot2 plotting system** attempts to split the difference between base and lattice in a number of ways. \n\n:::{.callout-tip}\n\n### Note\n\nTaking cues from lattice, the ggplot2 system automatically deals with spacings, text, titles but also allows you to annotate by \"adding\" to a plot.\n\n:::\n\nThe ggplot2 system is implemented in the `ggplot2` package (part of the `tidyverse` package), which is available from CRAN (it does not come with R). \n\nYou can install it from CRAN via\n\n\n::: {.cell}\n\n```{.r .cell-code}\ninstall.packages(\"ggplot2\")\n```\n:::\n\n\nand then load it into R via the `library()` function.\n\n\n::: {.cell}\n\n```{.r .cell-code}\nlibrary(ggplot2)\n```\n:::\n\n\nSuperficially, the `ggplot2` functions are similar to `lattice`, but the system is generally easier and more intuitive to use. \n\nThe defaults used in `ggplot2` make many choices for you, but you can still customize plots to your heart's desire.\n\n:::{.callout-tip}\n\n### Example\n\nA typical plot with the `ggplot2` package looks as follows.\n\n\n::: {.cell}\n\n```{.r .cell-code}\nlibrary(tidyverse)\ndata(mpg)\nmpg %>%\n ggplot(aes(displ, hwy)) + \n geom_point()\n```\n\n::: {.cell-output-display}\n![ggplot2 plot](index_files/figure-html/unnamed-chunk-8-1.png){width=576}\n:::\n:::\n\n\n:::\n\nThere are additional functions in `ggplot2` that allow you to make arbitrarily sophisticated plots.\n\nWe will discuss more about this in the next lecture. \n\n", 5 | "supporting": [ 6 | "index_files" 7 | ], 8 | "filters": [ 9 | "rmarkdown/pagebreak.lua" 10 | ], 11 | "includes": {}, 12 | "engineDependencies": {}, 13 | "preserve": {}, 14 | "postProcess": true 15 | } 16 | } -------------------------------------------------------------------------------- /_freeze/posts/2022-09-13-plotting-systems/index/figure-html/unnamed-chunk-1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-13-plotting-systems/index/figure-html/unnamed-chunk-1-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-09-13-plotting-systems/index/figure-html/unnamed-chunk-2-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-13-plotting-systems/index/figure-html/unnamed-chunk-2-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-09-13-plotting-systems/index/figure-html/unnamed-chunk-3-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-13-plotting-systems/index/figure-html/unnamed-chunk-3-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-09-13-plotting-systems/index/figure-html/unnamed-chunk-5-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-13-plotting-systems/index/figure-html/unnamed-chunk-5-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-09-13-plotting-systems/index/figure-html/unnamed-chunk-8-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-13-plotting-systems/index/figure-html/unnamed-chunk-8-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-10-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-10-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-11-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-11-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-12-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-12-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-13-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-13-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-14-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-14-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-15-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-15-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-16-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-16-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-17-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-17-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-18-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-18-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-19-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-19-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-20-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-20-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-21-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-21-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-22-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-22-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-3-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-3-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-4-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-4-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-5-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-5-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-6-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-6-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-8-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-8-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-9-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-09-15-ggplot2-plotting-system-part-2/index/figure-html/unnamed-chunk-9-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-04-working-with-dates-and-times/index/figure-html/unnamed-chunk-34-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-04-working-with-dates-and-times/index/figure-html/unnamed-chunk-34-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-04-working-with-dates-and-times/index/figure-html/unnamed-chunk-35-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-04-working-with-dates-and-times/index/figure-html/unnamed-chunk-35-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-04-working-with-dates-and-times/index/figure-html/unnamed-chunk-36-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-04-working-with-dates-and-times/index/figure-html/unnamed-chunk-36-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-04-working-with-dates-and-times/index/figure-html/unnamed-chunk-37-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-04-working-with-dates-and-times/index/figure-html/unnamed-chunk-37-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-04-working-with-dates-and-times/index/figure-html/unnamed-chunk-38-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-04-working-with-dates-and-times/index/figure-html/unnamed-chunk-38-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-04-working-with-dates-and-times/index/figure-html/unnamed-chunk-39-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-04-working-with-dates-and-times/index/figure-html/unnamed-chunk-39-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-04-working-with-dates-and-times/index/figure-html/unnamed-chunk-40-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-04-working-with-dates-and-times/index/figure-html/unnamed-chunk-40-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-04-working-with-dates-and-times/index/figure-html/unnamed-chunk-41-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-04-working-with-dates-and-times/index/figure-html/unnamed-chunk-41-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-18-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-18-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-19-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-19-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-20-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-20-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-21-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-21-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-22-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-22-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-23-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-23-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-24-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-24-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-24-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-24-2.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-25-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-25-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-26-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-26-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-26-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-26-2.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-27-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-27-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-27-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-27-2.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-28-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-28-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-28-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-28-2.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-29-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-29-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-29-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-29-2.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-30-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-30-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-30-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-30-2.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-31-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-11-working-with-factors/index/figure-html/unnamed-chunk-31-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-13-working-with-text-sentiment-analysis/index/figure-html/unnamed-chunk-22-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-13-working-with-text-sentiment-analysis/index/figure-html/unnamed-chunk-22-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-13-working-with-text-sentiment-analysis/index/figure-html/unnamed-chunk-23-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-13-working-with-text-sentiment-analysis/index/figure-html/unnamed-chunk-23-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-13-working-with-text-sentiment-analysis/index/figure-html/unnamed-chunk-24-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-13-working-with-text-sentiment-analysis/index/figure-html/unnamed-chunk-24-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-13-working-with-text-sentiment-analysis/index/figure-html/unnamed-chunk-26-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-13-working-with-text-sentiment-analysis/index/figure-html/unnamed-chunk-26-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-13-working-with-text-sentiment-analysis/index/figure-html/unnamed-chunk-27-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-13-working-with-text-sentiment-analysis/index/figure-html/unnamed-chunk-27-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-13-working-with-text-sentiment-analysis/index/figure-html/unnamed-chunk-28-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-13-working-with-text-sentiment-analysis/index/figure-html/unnamed-chunk-28-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-13-working-with-text-sentiment-analysis/index/figure-html/unnamed-chunk-35-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-13-working-with-text-sentiment-analysis/index/figure-html/unnamed-chunk-35-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-13-working-with-text-sentiment-analysis/index/figure-html/unnamed-chunk-36-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-13-working-with-text-sentiment-analysis/index/figure-html/unnamed-chunk-36-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-13-working-with-text-sentiment-analysis/index/figure-html/unnamed-chunk-37-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-13-working-with-text-sentiment-analysis/index/figure-html/unnamed-chunk-37-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-13-working-with-text-sentiment-analysis/index/figure-html/unnamed-chunk-38-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-13-working-with-text-sentiment-analysis/index/figure-html/unnamed-chunk-38-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-10-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-10-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-11-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-11-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-12-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-12-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-13-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-13-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-14-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-14-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-15-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-15-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-16-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-16-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-17-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-17-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-18-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-18-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-19-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-19-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-2-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-2-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-20-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-20-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-21-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-21-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-22-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-22-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-23-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-23-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-24-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-24-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-25-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-25-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-26-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-26-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-28-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-28-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-29-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-29-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-3-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-3-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-4-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-4-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-5-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-5-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-6-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-6-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-7-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-7-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-8-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-8-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-9-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-18-best-practices-data-analyses/index/figure-html/unnamed-chunk-9-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-20-python-for-r-users/index/figure-html/unnamed-chunk-25-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-20-python-for-r-users/index/figure-html/unnamed-chunk-25-1.png -------------------------------------------------------------------------------- /_freeze/posts/2022-10-20-python-for-r-users/index/figure-html/unnamed-chunk-26-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/posts/2022-10-20-python-for-r-users/index/figure-html/unnamed-chunk-26-1.png -------------------------------------------------------------------------------- /_freeze/posts/post-with-code/index/execute-results/html.json: -------------------------------------------------------------------------------- 1 | { 2 | "hash": "60b3d422ef5002b26ad42abeb118098e", 3 | "result": { 4 | "markdown": "---\ntitle: \"Post With Code\"\nauthor: \"Harlow Malloc\"\ndate: \"2022-08-09\"\ndraft: TRUE\ncategories: [news, code, analysis]\nimage: \"image.jpg\"\n---\n\n\nThis is a post with executable code.\n\n\n::: {.cell}\n\n```{.r .cell-code}\n1 + 1\n```\n\n::: {.cell-output .cell-output-stdout}\n```\n[1] 2\n```\n:::\n:::\n", 5 | "supporting": [], 6 | "filters": [ 7 | "rmarkdown/pagebreak.lua" 8 | ], 9 | "includes": {}, 10 | "engineDependencies": {}, 11 | "preserve": {}, 12 | "postProcess": true 13 | } 14 | } -------------------------------------------------------------------------------- /_freeze/projects/2022-08-30-project-0/index/execute-results/html.json: -------------------------------------------------------------------------------- 1 | { 2 | "hash": "7ab882c06a83c8de87a2b2e2b92c17a9", 3 | "result": { 4 | "markdown": "---\ntitle: \"Project 0 (optional)\"\nauthor: \n - name: Stephanie Hicks\n url: https://stephaniehicks.com\n affiliation: Department of Biostatistics, Johns Hopkins\n affiliation_url: https://publichealth.jhu.edu\ndescription: \"Information for Project 0 (entirely optional, but hopefully useful and fun!)\"\ndate: 2022-08-30\ncategories: [project 0, projects]\n---\n\n\n# Background\n\n**Due date: Sept 8 at 1:29pm**\n\nUsing the tools we learned in the first week (e.g. R, RStudio and Github). Let's apply them in a small (but also comprehensive) exercise.\n\n- **Please note this project is entirely optional** (i.e. it will not be graded), but hopefully it will be helpful to you getting set up for the rest of the course (i.e. set up these tools on your computing environment) and give you an opportunity to introduce yourself to your classmates.\n\n- **For anyone who completes it, you get a free hex sticker!** If you aren't familiar with the hex stickers, check out [this link](https://github.com/rstudio/hex-stickers). You can add them to your laptop for some character and swag (or [turn them into magnets](https://twitter.com/ClaireMKBowen/status/1294336123414523904)). I have a ton of different ones from the tidyverse or [RLadies Baltimore](https://pbs.twimg.com/profile_images/1236855715018559488/PuYAjTTD_400x400.jpg). You can come pick one up from my office or I can mail it to you if you email me a mailing address after you submit the project.\n\n- **For those of you who are new to GitHub/R/Rmarkdown**: this project makes you do a lot of things that you might not be familiar with. I know that this might be time-consuming and also might feel a bit intimidating. It's partly unavoidable and partly on purpose. You need to learn how to quickly get up to speed with all kinds of new tools that come your way. So practicing it is a good idea. You are welcome to draw on any sources for help that you want (online, classmates, instructor, etc.). I'm confident with a bit of trial and error you'll get it to work.\n\n# Part 1\n\nThis part of the project is to ensure that you have successfully set up your computing environment. Please email (use the Subject line: `140.776 Setup`) the Course Instructor (Dr. Stephanie Hicks) at shicks19\\@jhu.edu the following information:\n\n### Setting up your computing environment\n\n1. Your name, JHED ID (if applicable).\n\n2. The type of computer/operating system you are using (Windows, Mac, Unix/Linux, other)\n\n3. The version of R that you have installed on your computer. To do this, start up R and run the following in the R console and include the output in your email.\n\n\n::: {.cell}\n\n```{.r .cell-code}\nprint(R.version.string)\n```\n:::\n\n\n![Printing the R version string](../../videos/versionstring.gif)\n\n4. The version of RStudio that you have installed on your computer. To do this start up RStudio and in the R console window, run the following and again include the output in your email:\n\n\n::: {.cell}\n\n```{.r .cell-code}\nprint(RStudio.Version()$version)\n```\n:::\n\n\n5. If you have a GitHub username, please include this in your email. If you do not have a GitHub username, read https://happygitwithr.com, sign up for GitHub, and include your new username in your email to me.\n\n6. To make sure git is installed on your system, use the 'Terminal' (e.g. it's next to the R Console within RStudio) (or whatever you use), run the following and include the output in your email:\n\nFor example, this is mine:\n\n\n::: {.cell}\n\n```{.bash .cell-code}\ngit --version\n```\n\n\n::: {.cell-output .cell-output-stdout}\n```\ngit version 2.32.1 (Apple Git-133)\n```\n:::\n:::\n\n\nIf you have any trouble with any of the steps above, try to first post on the discussion board on CoursePlus. The TAs and I will be checking it frequently, but other students may also be helpful in their replies. You can also use other resources to get help (Google, R4DS, colleagues/friends/relatives with R/Markdown experience, etc.). Still, try to do as much as possible yourself. We will use all the bits you are learning here repeatedly during this course.\n\n# Part 2\n\nThis part of the project is to help you introduce yourself (and your interests!) to others in this course. You will create a new GitHub repository and build a small website about yourself.\n\n### 1. Create a GitHub repo for your website\n\nCreate a new GitHub repository titled `biostat776-intro--` (where you replace `` with your first name and `` with your last name) in your own personal GitHub account (e.g. `https://github.com//biostat776-intro--`).\n\nFor example, you can find an example that I created for myself at\n\n- github repo: \n\n### 2. Build a website using R Markdown\n\nUsing one of the many ways we discussed in class (e.g. a [simple R Markdown website](https://bookdown.org/yihui/rmarkdown/rmarkdown-site.html), [blogdown](https://bookdown.org/yihui/blogdown/), [distill](https://rstudio.github.io/distill/website.html), etc), create a new project in RStudio with the appropriate files. For example, you might include the following information:\n\n- Write a short summary introducing yourself. Structure the webpage with headings, subheadings, etc. Talk a bit about yourself, your background, training, research interests. Let me/us know what kind of statistics, programming, data analysis experience you already have. I am also curious to know what you most hope to learn in this course.\n\n- Five fun facts about yourself\n\n- A web page linking to something you think is really cool/interesting/inspiring/etc. You could also describe briefly what it is and why you like it.\n\nIf you want, feel free to get creative and include other things. You can play with RMarkdown if you wish to, e.g., you can try to include some table or a video, etc.\n\n### 3. Include a `README.md` file\n\nYour project repository should include a `README.md` file (if it was not included already).\n\nEdit the repository `README.md` file. Typically it will only contain the name of your repository with a `#` sign in front. The `#` represents a level 1 heading in Markdown. Change the headline and call it \"Introducing myself\" (or something like that). Underneath write something like \"This website contains a short introduction of *Your Name*.\"\n\nMake sure the 2 files (README.md and especially `index.Rmd` / `index.html`) look the way you want. Make changes until everything works.\n\n### 4. Deploy your website\n\nDepending on how you want to deploy your website, the following may or may not be relevant to you. In general, you want to make sure you have initialized your project to use `git` (i.e. you can type `git init` to initialize the repository to use git. Add and commit your changes. Push your changes and deploy your website.\n\nFollowing steps 2-4, here is my example website:\n\n- website: \n\n### 5. Share your website\n\n- Go to the Discussion Board in CoursePlus and write a short post with a link (URL) to your website (and URL to the corresponding GitHub repository) that you created.\n- As you read the introductions from other folks in the class, feel free to comment/reply using Discussion board.\n\nIn class on Sept 8, I will show as many websites as I can from Courseplus!\n", 5 | "supporting": [ 6 | "index_files" 7 | ], 8 | "filters": [ 9 | "rmarkdown/pagebreak.lua" 10 | ], 11 | "includes": {}, 12 | "engineDependencies": {}, 13 | "preserve": {}, 14 | "postProcess": true 15 | } 16 | } -------------------------------------------------------------------------------- /_freeze/projects/2022-08-30-project-0/project-0/execute-results/html.json: -------------------------------------------------------------------------------- 1 | { 2 | "hash": "e115266959b77345b6f990263d8292ca", 3 | "result": { 4 | "markdown": "---\ntitle: \"Project 0 (optional)\"\nauthor: \n - name: Stephanie Hicks\n url: https://stephaniehicks.com\n affiliation: Department of Biostatistics, Johns Hopkins\n affiliation_url: https://publichealth.jhu.edu\ndescription: \"Information for Project 0 (entirely optional, but hopefully useful and fun!)\"\ndate: 2022-08-30\ncategories: [project 0, projects]\n---\n\n\n\n# Background\n\n**Due date: Sept 8 at 1:29pm**\n\nUsing the tools we learned in the first week (e.g. R, RStudio and Github). Let's apply them in a small (but also comprehensive) exercise.\n\n- **Please note this project is entirely optional** (i.e. it will not be graded), but hopefully it will be helpful to you getting set up for the rest of the course (i.e. set up these tools on your computing environment) and give you an opportunity to introduce yourself to your classmates.\n\n- **For anyone who completes it, you get a free hex sticker!** If you aren't familiar with the hex stickers, check out [this link](https://github.com/rstudio/hex-stickers). You can add them to your laptop for some character and swag (or [turn them into magnets](https://twitter.com/ClaireMKBowen/status/1294336123414523904)). I have a ton of different ones from the tidyverse or [RLadies Baltimore](https://pbs.twimg.com/profile_images/1236855715018559488/PuYAjTTD_400x400.jpg). You can come pick one up from my office or I can mail it to you if you email me a mailing address after you submit the project.\n\n- **For those of you who are new to GitHub/R/Rmarkdown**: this project makes you do a lot of things that you might not be familiar with. I know that this might be time-consuming and also might feel a bit intimidating. It's partly unavoidable and partly on purpose. You need to learn how to quickly get up to speed with all kinds of new tools that come your way. So practicing it is a good idea. You are welcome to draw on any sources for help that you want (online, classmates, instructor, etc.). I'm confident with a bit of trial and error you'll get it to work.\n\n# Part 1\n\nThis part of the project is to ensure that you have successfully set up your computing environment. Please email (use the Subject line: `140.776 Setup`) the Course Instructor (Dr. Stephanie Hicks) at shicks19\\@jhu.edu the following information:\n\n### Setting up your computing environment\n\n1. Your name, JHED ID (if applicable).\n\n2. The type of computer/operating system you are using (Windows, Mac, Unix/Linux, other)\n\n3. The version of R that you have installed on your computer. To do this, start up R and run the following in the R console and include the output in your email.\n\n\n::: {.cell}\n\n```{.r .cell-code}\nprint(R.version.string)\n```\n:::\n\n\n![Printing the R version string](../../videos/versionstring.gif)\n\n4. The version of RStudio that you have installed on your computer. To do this start up RStudio and in the R console window, run the following and again include the output in your email:\n\n\n::: {.cell}\n\n```{.r .cell-code}\nprint(RStudio.Version()$version)\n```\n:::\n\n\n5. If you have a GitHub username, please include this in your email. If you do not have a GitHub username, read https://happygitwithr.com, sign up for GitHub, and include your new username in your email to me.\n\n6. To make sure git is installed on your system, use the 'Terminal' (e.g. it's next to the R Console within RStudio) (or whatever you use), run the following and include the output in your email:\n\nFor example, this is mine:\n\n\n::: {.cell}\n\n```{.bash .cell-code}\ngit --version\n```\n\n\n::: {.cell-output .cell-output-stdout}\n```\ngit version 2.32.1 (Apple Git-133)\n```\n:::\n:::\n\n\nIf you have any trouble with any of the steps above, try to first post on the discussion board on CoursePlus. The TAs and I will be checking it frequently, but other students may also be helpful in their replies. You can also use other resources to get help (Google, R4DS, colleagues/friends/relatives with R/Markdown experience, etc.). Still, try to do as much as possible yourself. We will use all the bits you are learning here repeatedly during this course.\n\n# Part 2\n\nThis part of the project is to help you introduce yourself (and your interests!) to others in this course. You will create a new GitHub repository and build a small website about yourself.\n\n### 1. Create a GitHub repo for your website\n\nCreate a new GitHub repository titled `biostat776-intro--` (where you replace `` with your first name and `` with your last name) in your own personal GitHub account (e.g. `https://github.com//biostat776-intro--`).\n\nFor example, you can find an example that I created for myself at\n\n- github repo: \n\n### 2. Build a website using R Markdown\n\nUsing one of the many ways we discussed in class (e.g. a [simple R Markdown website](https://bookdown.org/yihui/rmarkdown/rmarkdown-site.html), [blogdown](https://bookdown.org/yihui/blogdown/), [distill](https://rstudio.github.io/distill/website.html), etc), create a new project in RStudio with the appropriate files. For example, you might include the following information:\n\n- Write a short summary introducing yourself. Structure the webpage with headings, subheadings, etc. Talk a bit about yourself, your background, training, research interests. Let me/us know what kind of statistics, programming, data analysis experience you already have. I am also curious to know what you most hope to learn in this course.\n\n- Five fun facts about yourself\n\n- A web page linking to something you think is really cool/interesting/inspiring/etc. You could also describe briefly what it is and why you like it.\n\nIf you want, feel free to get creative and include other things. You can play with RMarkdown if you wish to, e.g., you can try to include some table or a video, etc.\n\n### 3. Include a `README.md` file\n\nYour project repository should include a `README.md` file (if it was not included already).\n\nEdit the repository `README.md` file. Typically it will only contain the name of your repository with a `#` sign in front. The `#` represents a level 1 heading in Markdown. Change the headline and call it \"Introducing myself\" (or something like that). Underneath write something like \"This website contains a short introduction of *Your Name*.\"\n\nMake sure the 2 files (README.md and especially `index.Rmd` / `index.html`) look the way you want. Make changes until everything works.\n\n### 4. Deploy your website\n\nDepending on how you want to deploy your website, the following may or may not be relevant to you. In general, you want to make sure you have initialized your project to use `git` (i.e. you can type `git init` to initialize the repository to use git. Add and commit your changes. Push your changes and deploy your website.\n\nFollowing steps 2-4, here is my example website:\n\n- website: \n\n### 5. Share your website\n\n- Go to the Discussion Board in CoursePlus and write a short post with a link (URL) to your website (and URL to the corresponding GitHub repository) that you created.\n- As you read the introductions from other folks in the class, feel free to comment/reply using Discussion board.\n\nIn class on Sept 8, I will show as many websites as I can from Courseplus!\n", 5 | "supporting": [], 6 | "filters": [ 7 | "rmarkdown/pagebreak.lua" 8 | ], 9 | "includes": {}, 10 | "engineDependencies": {}, 11 | "preserve": {}, 12 | "postProcess": true 13 | } 14 | } -------------------------------------------------------------------------------- /_freeze/projects/2022-09-06-project-1/index/figure-html/unnamed-chunk-20-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/_freeze/projects/2022-09-06-project-1/index/figure-html/unnamed-chunk-20-1.png -------------------------------------------------------------------------------- /_freeze/projects/2022-10-04-project-3/index/execute-results/html.json: -------------------------------------------------------------------------------- 1 | { 2 | "hash": "4b11821c12993fd2bb8b928708ff148d", 3 | "result": { 4 | "markdown": "---\ntitle: \"Project 3\"\nauthor: \n - name: Stephanie Hicks\n url: https://stephaniehicks.com\n affiliation: Department of Biostatistics, Johns Hopkins\n affiliation_url: https://publichealth.jhu.edu\ndescription: \"Exploring album sales and sentiment of lyrics from Beyoncé and Taylor Swift\"\ndate: 2022-10-04\ncategories: [project 3, projects]\n---\n\n\n# Background\n\n**Due date: October 21 at 11:59pm**\n\nThe goal of this assignment is to practice wrangling special data types (including dates, character strings, and factors) and visualizing results while practicing our tidyverse skills.\n\n### To submit your project\n\nPlease write up your project using R Markdown and processed with `knitr`. Compile your document as an **HTML file** and submit your HTML file to the dropbox on Courseplus. Please **show all your code** (i.e. make sure to set `echo = TRUE`) for each of the answers to each part.\n\n# Load data\n\nThe datasets for this part of the assignment comes from [TidyTuesday](https://www.tidytuesday.com).\n\nData dictionary avaialble here:\n\n- \n\n![Beyoncé (left) and Taylor Swift (right)](https://akns-images.eonline.com/eol_images/Entire_Site/2019721/rs_1024x759-190821125112-1024.taylor-swift-beyonce-2009-mtv-vmas.ct.082119.jpg){preview=\"TRUE\"}\n\nSpecifically, we will explore album sales and lyrics from two artists (Beyoncé and Taylor Swift), The data are available from TidyTuesday from September 2020, which I have provided for you below:\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\nb_lyrics <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-09-29/beyonce_lyrics.csv')\nts_lyrics <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-09-29/taylor_swift_lyrics.csv')\nsales <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-09-29/sales.csv')\n```\n:::\n\n\nHowever, to avoid re-downloading data, we will check to see if those files already exist using an `if()` statement:\n\n\n::: {.cell}\n\n```{.r .cell-code}\nlibrary(here)\nif(!file.exists(here(\"data\",\"b_lyrics.RDS\"))){\n b_lyrics <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-09-29/beyonce_lyrics.csv')\n ts_lyrics <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-09-29/taylor_swift_lyrics.csv')\n sales <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-09-29/sales.csv')\n \n # save the files to RDS objects\n saveRDS(b_lyrics, file = here(\"data\",\"b_lyrics.RDS\"))\n saveRDS(ts_lyrics, file = here(\"data\",\"ts_lyrics.RDS\"))\n saveRDS(sales, file = here(\"data\",\"sales.RDS\"))\n}\n```\n:::\n\n\n::: callout-note\nThe above code will only run if it cannot find the path to the `b_lyrics.RDS` on your computer. Then, we can just read in these files every time we knit the R Markdown, instead of re-downloading them every time.\n:::\n\nLet's load the datasets\n\n\n::: {.cell}\n\n```{.r .cell-code}\nb_lyrics <- readRDS(here(\"data\",\"b_lyrics.RDS\"))\nts_lyrics <- readRDS(here(\"data\",\"ts_lyrics.RDS\"))\nsales <- readRDS(here(\"data\",\"sales.RDS\"))\n```\n:::\n\n\n# Part 1: Explore album sales\n\nIn this section, the goal is to explore the sales of studio albums from Beyoncé and Taylor Swift.\n\n**Notes**\n\n- In each of the subsections below that ask you to create a plot, you must create a title, subtitle, x-axis label, and y-axis label with units where applicable. For example, if your axis says \"sales\" as an axis label, change it to \"sales (in millions)\".\n\n## Part 1A\n\nIn this section, we will do some data wrangling.\n\n1. Use `lubridate` to create a column called `released` that is a `Date` class. However, to be able to do this, you first need to use `stringr` to search for pattern that matches things like this \"(US)\\[51\\]\" in a string like this \"September 1, 2006 (US)\\[51\\]\" and removes them. (**Note**: to get full credit, you must create the regular expression).\n2. Use `forcats` to create a factor called `country` (**Note**: you may need to collapse some factor levels).\n3. Transform the `sales` into a unit that is album sales in millions of dollars.\n4. Keep only album sales from the UK, the US or the World.\n5. Auto print your final wrangled tibble data frame.\n\n\n::: {.cell}\n\n```{.r .cell-code}\n# Add your solution here\n```\n:::\n\n\n## Part 1B\n\nIn this section, we will do some more data wrangling followed by summarization using wrangled data from Part 1A.\n\n1. Keep only album sales from the US.\n2. Create a new column called `years_since_release` corresponding to the number of years since the release of each album from Beyoncé and Taylor Swift. This should be a whole number and you should round down to \"14\" if you get a non-whole number like \"14.12\" years. (**Hint**: you may find the `interval()` function from `lubridate` helpful here, but this not the only way to do this.)\n3. Calculate the most recent, oldest, and the median years since albums were released for both Beyoncé and Taylor Swift.\n\n\n::: {.cell}\n\n```{.r .cell-code}\n# Add your solution here\n```\n:::\n\n\n## Part 1C\n\nUsing the wrangled data from Part 1A:\n\n1. Calculate the total album sales for each artist and for each `country` (only sales from the UK, US, and World).\n2. Using the total album sales, create a [percent stacked barchart](https://r-graph-gallery.com/48-grouped-barplot-with-ggplot2) using `ggplot2` of the percentage of sales of studio albums (in millions) along the y-axis for the two artists along the x-axis colored by the `country`.\n\n\n::: {.cell}\n\n```{.r .cell-code}\n# Add your solution here\n```\n:::\n\n\n## Part 1D\n\nUsing the wrangled data from Part 1A, use `ggplot2` to create a bar plot for the sales of studio albums (in millions) along the x-axis for each of the album titles along the y-axis.\n\n**Note**:\n\n- You only need to consider the global World sales (you can ignore US and UK sales for this part).\n- The title of the album must be clearly readable along the y-axis.\n- Each bar should be colored by which artist made that album.\n- The bars should be ordered from albums with the most sales (top) to the least sales (bottom) (**Note**: you must use functions from `forcats` for this step).\n\n\n::: {.cell}\n\n```{.r .cell-code}\n# Add your solution here\n```\n:::\n\n\n## Part 1E\n\nUsing the wrangled data from Part 1A, use `ggplot2` to create a scatter plot of sales of studio albums (in millions) along the y-axis by the released date for each album along the x-axis.\n\n**Note**:\n\n- The points should be colored by the artist.\n- There should be three scatter plots (one for UK, US and world sales) faceted by rows.\n\n\n::: {.cell}\n\n```{.r .cell-code}\n# Add your solution here\n```\n:::\n\n\n# Part 2: Exploring sentiment of lyrics\n\nIn Part 2, we will explore the lyrics in the `b_lyrics` and `ts_lyrics` datasets.\n\n## Part 2A\n\nUsing `ts_lyrics`, create a new column called `line` with one line containing the character string for each line of Taylor Swift's songs.\n\n- How many lines in Taylor Swift's lyrics contain the word \"hello\"? For full credit, show all the rows in `ts_lyrics` that have \"hello\" in the `line` column and report how many rows there are in total.\n- How many lines in Taylor Swift's lyrics contain the word \"goodbye\"? For full credit, show all the rows in `ts_lyrics` that have \"goodbye\" in the `line` column and report how many rows there are in total.\n\n\n::: {.cell}\n\n```{.r .cell-code}\n# Add your solution here\n```\n:::\n\n\n## Part 2B\n\nRepeat the same analysis for `b_lyrics` as described in Part 2A.\n\n\n::: {.cell}\n\n```{.r .cell-code}\n# Add your solution here\n```\n:::\n\n\n## Part 2C\n\nUsing the `b_lyrics` dataset,\n\n1. Tokenize each lyrical line by words.\n2. Remove the \"stopwords\".\n3. Calculate the total number for each word in the lyrics.\n4. Using the \"bing\" sentiment lexicon, add a column to the summarized data frame adding the \"bing\" sentiment lexicon.\n5. Sort the rows from most frequent to least frequent words.\n6. Only keep the top 25 most frequent words.\n7. Auto print the wrangled tibble data frame.\n8. Use `ggplot2` to create a bar plot with the top words on the y-axis and the frequency of each word on the x-axis. Color each bar by the sentiment of each word from the \"bing\" sentiment lexicon. Bars should be ordered from most frequent on the top to least frequent on the bottom of the plot.\n9. Create a word cloud of the top 25 most frequent words.\n\n\n::: {.cell}\n\n```{.r .cell-code}\n# Add your solution here\n```\n:::\n\n\n## Part 2D\n\nRepeat the same analysis as above in Part 2C, but for `ts_lyrics`.\n\n\n::: {.cell}\n\n```{.r .cell-code}\n# Add your solution here\n```\n:::\n\n\n## Part 2E\n\nUsing the `ts_lyrics` dataset,\n\n1. Tokenize each lyrical line by words.\n2. Remove the \"stopwords\".\n3. Calculate the total number for each word in the lyrics **for each Album**.\n4. Using the \"afinn\" sentiment lexicon, add a column to the summarized data frame adding the \"afinn\" sentiment lexicon.\n5. Calculate the average sentiment score **for each Album**.\n6. Auto print the wrangled tibble data frame.\n7. Join the wrangled data frame from Part 1A (album sales in millions) with the wrangled data frame from #6 above (average sentiment score for each album).\n8. Using `ggplot2`, create a scatter plot of the average sentiment score for each album (y-axis) and the album release data along the x-axis. Make the size of each point the album sales in millions.\n9. Add a horizontal line at y-intercept=0.\n10. Write 2-3 sentences interpreting the plot answering the question \"How has the sentiment of Taylor Swift's albums have changed over time?\". Add a title, subtitle, and useful axis labels.\n\n\n::: {.cell}\n\n```{.r .cell-code}\n# Add your solution here\n```\n:::\n", 5 | "supporting": [], 6 | "filters": [ 7 | "rmarkdown/pagebreak.lua" 8 | ], 9 | "includes": {}, 10 | "engineDependencies": {}, 11 | "preserve": {}, 12 | "postProcess": true 13 | } 14 | } -------------------------------------------------------------------------------- /_freeze/site_libs/clipboard/clipboard.min.js: -------------------------------------------------------------------------------- 1 | /*! 2 | * clipboard.js v2.0.10 3 | * https://clipboardjs.com/ 4 | * 5 | * Licensed MIT © Zeno Rocha 6 | */ 7 | !function(t,e){"object"==typeof exports&&"object"==typeof module?module.exports=e():"function"==typeof define&&define.amd?define([],e):"object"==typeof exports?exports.ClipboardJS=e():t.ClipboardJS=e()}(this,function(){return n={686:function(t,e,n){"use strict";n.d(e,{default:function(){return o}});var e=n(279),i=n.n(e),e=n(370),u=n.n(e),e=n(817),c=n.n(e);function a(t){try{return document.execCommand(t)}catch(t){return}}var f=function(t){t=c()(t);return a("cut"),t};var l=function(t){var e,n,o,r=1 { 5 | if (categoriesLoaded) { 6 | activateCategory(category); 7 | setCategoryHash(category); 8 | } 9 | }; 10 | 11 | window["quarto-listing-loaded"] = () => { 12 | // Process any existing hash 13 | const hash = getHash(); 14 | 15 | if (hash) { 16 | // If there is a category, switch to that 17 | if (hash.category) { 18 | activateCategory(hash.category); 19 | } 20 | // Paginate a specific listing 21 | const listingIds = Object.keys(window["quarto-listings"]); 22 | for (const listingId of listingIds) { 23 | const page = hash[getListingPageKey(listingId)]; 24 | if (page) { 25 | showPage(listingId, page); 26 | } 27 | } 28 | } 29 | 30 | const listingIds = Object.keys(window["quarto-listings"]); 31 | for (const listingId of listingIds) { 32 | // The actual list 33 | const list = window["quarto-listings"][listingId]; 34 | 35 | // Update the handlers for pagination events 36 | refreshPaginationHandlers(listingId); 37 | 38 | // Render any visible items that need it 39 | renderVisibleProgressiveImages(list); 40 | 41 | // Whenever the list is updated, we also need to 42 | // attach handlers to the new pagination elements 43 | // and refresh any newly visible items. 44 | list.on("updated", function () { 45 | renderVisibleProgressiveImages(list); 46 | setTimeout(() => refreshPaginationHandlers(listingId)); 47 | 48 | // Show or hide the no matching message 49 | toggleNoMatchingMessage(list); 50 | }); 51 | } 52 | }; 53 | 54 | window.document.addEventListener("DOMContentLoaded", function (_event) { 55 | // Attach click handlers to categories 56 | const categoryEls = window.document.querySelectorAll( 57 | ".quarto-listing-category .category" 58 | ); 59 | 60 | for (const categoryEl of categoryEls) { 61 | const category = categoryEl.getAttribute("data-category"); 62 | categoryEl.onclick = () => { 63 | activateCategory(category); 64 | setCategoryHash(category); 65 | }; 66 | } 67 | 68 | // Attach a click handler to the category title 69 | // (there should be only one, but since it is a class name, handle N) 70 | const categoryTitleEls = window.document.querySelectorAll( 71 | ".quarto-listing-category-title" 72 | ); 73 | for (const categoryTitleEl of categoryTitleEls) { 74 | categoryTitleEl.onclick = () => { 75 | activateCategory(""); 76 | setCategoryHash(""); 77 | }; 78 | } 79 | 80 | categoriesLoaded = true; 81 | }); 82 | 83 | function toggleNoMatchingMessage(list) { 84 | const selector = `#${list.listContainer.id} .listing-no-matching`; 85 | const noMatchingEl = window.document.querySelector(selector); 86 | if (noMatchingEl) { 87 | if (list.visibleItems.length === 0) { 88 | noMatchingEl.classList.remove("d-none"); 89 | } else { 90 | if (!noMatchingEl.classList.contains("d-none")) { 91 | noMatchingEl.classList.add("d-none"); 92 | } 93 | } 94 | } 95 | } 96 | 97 | function setCategoryHash(category) { 98 | setHash({ category }); 99 | } 100 | 101 | function setPageHash(listingId, page) { 102 | const currentHash = getHash() || {}; 103 | currentHash[getListingPageKey(listingId)] = page; 104 | setHash(currentHash); 105 | } 106 | 107 | function getListingPageKey(listingId) { 108 | return `${listingId}-page`; 109 | } 110 | 111 | function refreshPaginationHandlers(listingId) { 112 | const listingEl = window.document.getElementById(listingId); 113 | const paginationEls = listingEl.querySelectorAll( 114 | ".pagination li.page-item:not(.disabled) .page.page-link" 115 | ); 116 | for (const paginationEl of paginationEls) { 117 | paginationEl.onclick = (sender) => { 118 | setPageHash(listingId, sender.target.getAttribute("data-i")); 119 | showPage(listingId, sender.target.getAttribute("data-i")); 120 | return false; 121 | }; 122 | } 123 | } 124 | 125 | function renderVisibleProgressiveImages(list) { 126 | // Run through the visible items and render any progressive images 127 | for (const item of list.visibleItems) { 128 | const itemEl = item.elm; 129 | if (itemEl) { 130 | const progressiveImgs = itemEl.querySelectorAll( 131 | `img[${kProgressiveAttr}]` 132 | ); 133 | for (const progressiveImg of progressiveImgs) { 134 | const srcValue = progressiveImg.getAttribute(kProgressiveAttr); 135 | if (srcValue) { 136 | progressiveImg.setAttribute("src", srcValue); 137 | } 138 | progressiveImg.removeAttribute(kProgressiveAttr); 139 | } 140 | } 141 | } 142 | } 143 | 144 | function getHash() { 145 | // Hashes are of the form 146 | // #name:value|name1:value1|name2:value2 147 | const currentUrl = new URL(window.location); 148 | const hashRaw = currentUrl.hash ? currentUrl.hash.slice(1) : undefined; 149 | return parseHash(hashRaw); 150 | } 151 | 152 | const kAnd = "&"; 153 | const kEquals = "="; 154 | 155 | function parseHash(hash) { 156 | if (!hash) { 157 | return undefined; 158 | } 159 | const hasValuesStrs = hash.split(kAnd); 160 | const hashValues = hasValuesStrs 161 | .map((hashValueStr) => { 162 | const vals = hashValueStr.split(kEquals); 163 | if (vals.length === 2) { 164 | return { name: vals[0], value: vals[1] }; 165 | } else { 166 | return undefined; 167 | } 168 | }) 169 | .filter((value) => { 170 | return value !== undefined; 171 | }); 172 | 173 | const hashObj = {}; 174 | hashValues.forEach((hashValue) => { 175 | hashObj[hashValue.name] = decodeURIComponent(hashValue.value); 176 | }); 177 | return hashObj; 178 | } 179 | 180 | function makeHash(obj) { 181 | return Object.keys(obj) 182 | .map((key) => { 183 | return `${key}${kEquals}${obj[key]}`; 184 | }) 185 | .join(kAnd); 186 | } 187 | 188 | function setHash(obj) { 189 | const hash = makeHash(obj); 190 | window.history.pushState(null, null, `#${hash}`); 191 | } 192 | 193 | function showPage(listingId, page) { 194 | const list = window["quarto-listings"][listingId]; 195 | if (list) { 196 | list.show((page - 1) * list.page + 1, list.page); 197 | } 198 | } 199 | 200 | function activateCategory(category) { 201 | // Deactivate existing categories 202 | const activeEls = window.document.querySelectorAll( 203 | ".quarto-listing-category .category.active" 204 | ); 205 | for (const activeEl of activeEls) { 206 | activeEl.classList.remove("active"); 207 | } 208 | 209 | // Activate this category 210 | const categoryEl = window.document.querySelector( 211 | `.quarto-listing-category .category[data-category='${category}'` 212 | ); 213 | if (categoryEl) { 214 | categoryEl.classList.add("active"); 215 | } 216 | 217 | // Filter the listings to this category 218 | filterListingCategory(category); 219 | } 220 | 221 | function filterListingCategory(category) { 222 | const listingIds = Object.keys(window["quarto-listings"]); 223 | for (const listingId of listingIds) { 224 | const list = window["quarto-listings"][listingId]; 225 | if (list) { 226 | if (category === "") { 227 | // resets the filter 228 | list.filter(); 229 | } else { 230 | // filter to this category 231 | list.filter(function (item) { 232 | const itemValues = item.values(); 233 | if (itemValues.categories !== null) { 234 | const categories = itemValues.categories.split(","); 235 | return categories.includes(category); 236 | } else { 237 | return false; 238 | } 239 | }); 240 | } 241 | } 242 | } 243 | } 244 | -------------------------------------------------------------------------------- /_post_template.qmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Add title" 3 | author: 4 | - name: Stephanie Hicks 5 | url: https://stephaniehicks.com 6 | affiliation: Department of Biostatistics, Johns Hopkins 7 | affiliation_url: https://publichealth.jhu.edu 8 | description: "Add title" 9 | date: 2022-08-30 10 | categories: [add here] 11 | --- 12 | 13 | 14 | 15 | # Pre-lecture materials 16 | 17 | ### Read ahead 18 | 19 | ::: callout-note 20 | ## Read ahead 21 | 22 | **Before class, you can prepare by reading the following materials:** 23 | 24 | 1. Add here. 25 | 2. Add here. 26 | ::: 27 | 28 | ### Acknowledgements 29 | 30 | Material for this lecture was borrowed and adopted from 31 | 32 | - Add here. 33 | 34 | # Learning objectives 35 | 36 | ::: callout-note 37 | # Learning objectives 38 | 39 | **At the end of this lesson you will:** 40 | 41 | - Add here. 42 | ::: 43 | 44 | # Add lecture here 45 | 46 | # Post-lecture materials 47 | 48 | ### Final Questions 49 | 50 | Here are some post-lecture questions to help you think about the material discussed. 51 | 52 | ::: callout-note 53 | ### Questions 54 | 55 | 1. Add here. 56 | ::: 57 | 58 | ### Additional Resources 59 | 60 | ::: callout-tip 61 | - Add here. 62 | ::: 63 | 64 | ## rtistry 65 | 66 | ```{r} 67 | #| label: flametree 68 | #| echo: false 69 | #| fig-cap-location: "top" 70 | 71 | ``` 72 | 73 | \[Add here.\] 74 | -------------------------------------------------------------------------------- /_quarto.yml: -------------------------------------------------------------------------------- 1 | project: 2 | type: website 3 | 4 | website: 5 | title: "Statistical Computing (BSPH 140.776)" 6 | site-url: https://www.stephaniehicks.com/jhustatcomputing2022 7 | description: "Course website for Statistical Computing (BSPH 140.776) in Fall 2022" 8 | navbar: 9 | right: 10 | - text: "Home" 11 | file: index.qmd 12 | - text: "General Information" 13 | menu: 14 | - text: "Syllabus" 15 | href: syllabus.qmd 16 | - text: "Schedule" 17 | href: schedule.qmd 18 | - text: "Course Materials" 19 | menu: 20 | - text: "Lectures" 21 | href: lectures.qmd 22 | - text: "Projects" 23 | href: projects.qmd 24 | - text: "Resources" 25 | href: resources.qmd 26 | - icon: github 27 | href: https://github.com/ 28 | - icon: twitter 29 | href: https://twitter.com 30 | - icon: rss 31 | href: index.xml 32 | format: 33 | html: 34 | theme: simplex 35 | toc: true 36 | css: styles.css 37 | 38 | editor: visual 39 | 40 | execute: 41 | freeze: auto 42 | -------------------------------------------------------------------------------- /data/2016-07-19.csv.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/data/2016-07-19.csv.bz2 -------------------------------------------------------------------------------- /data/b_lyrics.RDS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/data/b_lyrics.RDS -------------------------------------------------------------------------------- /data/chicago.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/data/chicago.rds -------------------------------------------------------------------------------- /data/chocolate.RDS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/data/chocolate.RDS -------------------------------------------------------------------------------- /data/sales.RDS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/data/sales.RDS -------------------------------------------------------------------------------- /data/storms_2004.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/data/storms_2004.csv.gz -------------------------------------------------------------------------------- /data/team_standings.csv: -------------------------------------------------------------------------------- 1 | "Standing","Team" 2 | 1,"Spain" 3 | 2,"Netherlands" 4 | 3,"Germany" 5 | 4,"Uruguay" 6 | 5,"Argentina" 7 | 6,"Brazil" 8 | 7,"Ghana" 9 | 8,"Paraguay" 10 | 9,"Japan" 11 | 10,"Chile" 12 | 11,"Portugal" 13 | 12,"USA" 14 | 13,"England" 15 | 14,"Mexico" 16 | 15,"South Korea" 17 | 16,"Slovakia" 18 | 17,"Ivory Coast" 19 | 18,"Slovenia" 20 | 19,"Switzerland" 21 | 20,"South Africa" 22 | 21,"Australia" 23 | 22,"New Zealand" 24 | 23,"Serbia" 25 | 24,"Denmark" 26 | 25,"Greece" 27 | 26,"Italy" 28 | 27,"Nigeria" 29 | 28,"Algeria" 30 | 29,"France" 31 | 30,"Honduras" 32 | 31,"Cameroon" 33 | 32,"North Korea" 34 | -------------------------------------------------------------------------------- /data/ts_lyrics.RDS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/data/ts_lyrics.RDS -------------------------------------------------------------------------------- /data/tuesdata_rainfall.RDS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/data/tuesdata_rainfall.RDS -------------------------------------------------------------------------------- /data/tuesdata_temperature.RDS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/data/tuesdata_temperature.RDS -------------------------------------------------------------------------------- /images/cool_icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/images/cool_icon.png -------------------------------------------------------------------------------- /images/dominici_ehp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/images/dominici_ehp.png -------------------------------------------------------------------------------- /images/dspipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/images/dspipeline.png -------------------------------------------------------------------------------- /images/happygitwithr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/images/happygitwithr.png -------------------------------------------------------------------------------- /images/lippman.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/images/lippman.png -------------------------------------------------------------------------------- /images/peng_preface.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/images/peng_preface.png -------------------------------------------------------------------------------- /images/phdversioncontrol.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/images/phdversioncontrol.gif -------------------------------------------------------------------------------- /index.qmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Welcome to Statistical Computing!" 3 | image: images/cool_icon.png 4 | about: 5 | template: jolla 6 | links: 7 | - icon: twitter 8 | text: Twitter 9 | href: https://twitter.com/stephaniehicks 10 | - icon: github 11 | text: Github 12 | href: https://github.com/stephaniehicks 13 | --- 14 | 15 | Welcome to Statistical Computing at Johns Hopkins Bloomberg School of Public Health! 16 | 17 | ## What is this course? 18 | 19 | This course covers the basics of practical issues in programming and other computer skills required for the research and application of statistical methods. Includes programming in R and the tidyverse, data ethics, best practices for coding and reproducible research, introduction to data visualizations, best practices for working with special data types (dates/times, text data, etc), best practices for storing data, basics of debugging, organizing and commenting code, basics of leveraging Python from R. Topics in statistical data analysis provide working examples. 20 | 21 | 22 | ## Getting started 23 | 24 | I suggest that you start by looking over the [Syllabus](syllabus.qmd) and [Schedule](schedule.qmd) under **General Information**. After that, start with the Lectures content in the given order. 25 | 26 | ## Acknowledgements 27 | 28 | This course was developed and is maintained by [Stephanie Hicks](https://www.stephaniehicks.com). 29 | 30 | The following individuals have contributed to improving the course or materials have been adapted from their courses: [Roger D. Peng](https://github.com/rdpeng), [Andreas Handel](https://www.andreashandel.com), [Naim Rashid](https://naimurashid.github.io), [Michael Love](https://github.com/mikelove). 31 | 32 | The course materials are licensed under the [Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License](https://creativecommons.org/licenses/by-nc-sa/4.0/). Linked and embedded materials are governed by their own licenses. I assume that all external materials used or embedded here are covered under the educational fair use policy. If this is not the case and any material displayed here violates copyright, please let me know and I will remove it. 33 | -------------------------------------------------------------------------------- /jhustatcomputing2022.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | -------------------------------------------------------------------------------- /lectures.qmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Lectures" 3 | listing: 4 | - id: lectures 5 | contents: "posts" 6 | sort: "date desc" 7 | type: default 8 | categories: true 9 | sort-ui: false 10 | filter-ui: false 11 | feed: true 12 | page-layout: full 13 | title-block-banner: false 14 | --- 15 | -------------------------------------------------------------------------------- /posts/2022-08-30-introduction-to-gitgithub/index.qmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Introduction to git/GitHub" 3 | author: 4 | - name: Stephanie Hicks 5 | url: https://stephaniehicks.com 6 | affiliation: Department of Biostatistics, Johns Hopkins 7 | affiliation_url: https://publichealth.jhu.edu 8 | description: "Version control is a game changer; or how I learned to love git/GitHub" 9 | date: 2022-08-30 10 | image: "../../images/happygitwithr.png" 11 | categories: [module 1, week 1, programming, version control, git, GitHub] 12 | --- 13 | 14 | 15 | 16 | # Pre-lecture materials 17 | 18 | ### Read ahead 19 | 20 | ::: callout-note 21 | ## Read ahead 22 | 23 | **Before class, you can prepare by reading the following materials:** 24 | 25 | 1. [Happy Git with R](https://happygitwithr.com) from Jenny Bryan 26 | 2. [Chapter on git and GitHub in `dsbook`](https://rafalab.github.io/dsbook/git.html) from Rafael Irizarry 27 | ::: 28 | 29 | ### Acknowledgements 30 | 31 | Material for this lecture was borrowed and adopted from 32 | 33 | - 34 | 35 | # Learning objectives 36 | 37 | ::: callout-note 38 | # Learning objectives 39 | 40 | **At the end of this lesson you will:** 41 | 42 | - Know what Git and GitHub are. 43 | - Know why one might want to use them. 44 | - Have created and set up a GitHub account. 45 | ::: 46 | 47 | # Introduction to git/GitHub 48 | 49 | This document gives a brief explanation of GitHub and how we will use it for this course. 50 | 51 | ### git 52 | 53 | *Git* is what is called a *version control system* for file management. The main idea is that as you (and your collaborators) work on a project, the software tracks, and records any changes made by anyone. 54 | 55 | - Similar to the "track changes" features in Microsoft Word, but more rigorous, powerful, and scaled up to multiple files 56 | - Great for solo or collaborative work 57 | 58 | ### GitHub 59 | 60 | *GitHub* is a hosting service on internet for git-aware folders and projects 61 | 62 | - Similar to the DropBox or Google, but more structured, powerful, and programmatic 63 | - Great for solo or collaborative work! 64 | - Technically *GitHub* is distinct from *Git*. However, *GitHub* is in some sense the interface and *Git* the underlying engine (a bit like *RStudio* and *R*). 65 | 66 | Since we will only be using *Git* through *GitHub*, I tend to not distinguish between the two. In the following, I refer to all of it as just *GitHub*. Note that other interfaces to *Git* exist, e.g., *Bitbucket*, but *GitHub* is the most widely used one. 67 | 68 | ### Why use git/GitHub? 69 | 70 | You want to use *GitHub* to avoid this: 71 | 72 | ```{r} 73 | #| label: versioncontrol 74 | #| echo: false 75 | #| fig-cap: 'How not to use GitHub [image from PhD Comics]' 76 | #| out-width: '80%' 77 | knitr::include_graphics("../../images/phdversioncontrol.gif") 78 | ``` 79 | 80 | \[[Source: PhD Comics](http://phdcomics.com/comics/archive_print.php?comicid=1531)\] 81 | 82 | *GitHub* gives you a clean way to track your projects. It is also very well suited to collaborative work. Historically, version control was used for software development. However, it has become broader and is now used for many types of projects, including data science projects. 83 | 84 | To learn a bit more about Git/GitHub and why you might want to use it, [read this article by Jenny Bryan](https://peerj.com/preprints/3159/). 85 | 86 | **Note her explanation of what's special with the `README.md` file on GitHub.** 87 | 88 | ### What to (not) do 89 | 90 | **GitHub is ideal if** you have a project with a fair number of files, most of those files are text files (such as code, LaTeX, (R)markdown, etc.) and different people work on different parts of the project. 91 | 92 | **GitHub is less useful if** you have a lot of non-text files (e.g. Word or Powerpoint) and different team members might want to edit the same document at the same time. In that instance, a solution like Google Docs, Word+Dropbox, Word+Onedrive, etc. might be better. 93 | 94 | ### How to use Git/GitHub 95 | 96 | Git and GitHub is fundamentally **based on commands you type into the command line**. Lots of online resources show you how to use the command line. This is the most powerful, and the way I almost always interact with git/GitHub. However, many folks find this the most confusing way to use git/GitHub. Alternatively, there are graphical interfaces. 97 | 98 | - [GitHub itself](https://desktop.github.com/) provides a grapical interface with basic functionality. 99 | - RStudio also has [Git/GitHub integration](https://happygitwithr.com/rstudio-git-github.html). Of course this only works for R project GitHub integration. 100 | - There are also third party GitHub clients with many advanced features, most of which you won't need initially, but might eventually. 101 | 102 | **Note**: As student, you can (and should) upgrade to the Pro version of GitHub for free (i.e. access to unlimited private repositories is one benefit), see the [GitHub student developer pack](https://education.github.com/pack) on how to do this. 103 | 104 | # Getting Started 105 | 106 | One of my favorite resources for getting started with git/GitHub is the Happy Git with R from Jenny Bryan: 107 | 108 | - 109 | 110 | ```{r} 111 | #| label: happygitwithr 112 | #| echo: false 113 | #| fig-cap: 'A screenshot of the Happy Git with R online book from Jenny Bryan' 114 | #| out-width: '80%' 115 | knitr::include_graphics("../../images/happygitwithr.png") 116 | ``` 117 | 118 | It truly is one of the **best resources** out there for getting started with git/GitHub, especially with the integration to RStudio. Therefore, at this point, I will encourage all of you to go read through the online book. 119 | 120 | Some of you may only need to skim it, others will need to spend some time reading through it. Either way, I will bet that you won't regret the time investment. 121 | 122 | # Using git/GitHub in our course 123 | 124 | In this course, you will use git/GitHub in the following ways: 125 | 126 | 1. Project 0 (optional) - You will create a website introducing yourself to folks in the course and deploy it on GitHub. 127 | 2. Projects 1-3 - You can practice using git locally (on your compute environment) to track your changes over time and, if you wish (but highly suggested), you can practice pushing your project solutions to a private GitHub repository on your GitHub account (i.e. `git add`, `git commit`, `git push`, `git pull`, etc) . 128 | 129 | Learning these skills will be useful down the road if you ever work collaboratively on a project (i.e. writing code as a group). In this scenario, you will use the skills you have been practicing in your projects to work together as a team in a single GitHub repository. 130 | 131 | # Post-lecture materials 132 | 133 | ### Final Questions 134 | 135 | Here are some post-lecture questions to help you think about the material discussed. 136 | 137 | ::: callout-note 138 | ### Questions 139 | 140 | 1. What is version control? 141 | 142 | 2. What is the difference between git and GitHub? 143 | 144 | 3. What are other version controls software/tools that are available besides git? 145 | ::: 146 | 147 | ### Additional Resources 148 | 149 | ::: callout-tip 150 | - [git and GitHub in the `dsbook`](https://rafalab.github.io/dsbook/git.html) by Rafael Irizarry 151 | ::: 152 | 153 | ## rtistry 154 | 155 | ```{r} 156 | #| label: flametree 157 | #| echo: false 158 | #| fig-cap-location: "top" 159 | knitr::include_graphics("https://github.com/djnavarro/art/raw/master/static/gallery/flametree/extra/001_flametree_20_13.jpg") 160 | ``` 161 | 162 | \['Flametree' from Danielle Navarro \] 163 | -------------------------------------------------------------------------------- /posts/2022-09-01-literate-programming/my-refs.bib: -------------------------------------------------------------------------------- 1 | @article{knuth1984, 2 | author = {Donald E. Knuth}, 3 | title = {Literate Programming}, 4 | journal = {Comput. J.}, 5 | volume = {27}, 6 | number = {2}, 7 | pages = {97--111}, 8 | year = {1984}, 9 | url = {https://doi.org/10.1093/comjnl/27.2.97}, 10 | doi = {10.1093/comjnl/27.2.97}, 11 | timestamp = {Wed, 14 Nov 2018 00:00:00 +0100}, 12 | biburl = {https://dblp.org/rec/journals/cj/Knuth84.bib}, 13 | bibsource = {dblp computer science bibliography, https://dblp.org} 14 | } -------------------------------------------------------------------------------- /posts/2022-09-01-reference-management/index.qmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Reference management" 3 | author: 4 | - name: Stephanie Hicks 5 | url: https://stephaniehicks.com 6 | affiliation: Department of Biostatistics, Johns Hopkins 7 | affiliation_url: https://publichealth.jhu.edu 8 | description: "How to use citations and incorporate references from a bibliography in R Markdown." 9 | date: 2022-09-01 10 | categories: [module 1, week 1, R Markdown, programming] 11 | bibliography: my-refs.bib 12 | --- 13 | 14 | 15 | 16 | # Pre-lecture materials 17 | 18 | ### Read ahead 19 | 20 | ::: callout-note 21 | ## Read ahead 22 | 23 | **Before class, you can prepare by reading the following materials:** 24 | 25 | 1. Authoring in [R Markdown from RStudio](https://rmarkdown.rstudio.com/authoring_bibliographies_and_citations.html) 26 | 2. Citations from [Reproducible Research in R](https://monashdatafluency.github.io/r-rep-res/citations.html) from the [Monash Data Fluency](https://monashdatafluency.github.io) initiative 27 | 3. Bibliography from [R Markdown Cookbook](https://bookdown.org/yihui/rmarkdown-cookbook/bibliography.html) 28 | ::: 29 | 30 | ### Acknowledgements 31 | 32 | Material for this lecture was borrowed and adopted from 33 | 34 | - 35 | - 36 | - 37 | - 38 | 39 | # Learning objectives 40 | 41 | ::: callout-note 42 | # Learning objectives 43 | 44 | **At the end of this lesson you will:** 45 | 46 | - Know what types of bibliography file formats can be used in a R Markdown file 47 | - Learn how to add citations to a R Markdown file 48 | - Know how to change the citation style (e.g. APA, Chicago, etc) 49 | ::: 50 | 51 | # Introduction 52 | 53 | For almost any data analysis, especially if it is meant for publication in the academic literature, you will have to cite other people's work and include the references (bibliographies or citations) in your work. In this class, you are likely to need to include references and cite other people's work like in a regular research paper. 54 | 55 | R provides nice function `citation()` that helps us generating citation blob for R packages that we have used. Let's try generating citation text for rmarkdown package by using the following command 56 | 57 | ```{r, echo = TRUE} 58 | citation("rmarkdown") 59 | ``` 60 | 61 | I assume you are familiar with how citing references works, and hopefully, you are already using a reference manager. If not, let me know in the discussion boards. 62 | 63 | To have something that plays well with R Markdown, you need file format that stores all the references. Click here to learn more other possible file formats available to you to use within a R Markdown file: 64 | 65 | - 66 | 67 | ### Citation management software 68 | 69 | As you can see, there are ton of file formats including `.medline` (MEDLINE), `.bib` (BibTeX), `.ris` (RIS), `.enl` (EndNote). 70 | 71 | I will not discuss underlying citational management software itself, but I will talk briefly how you might create one of these file formats. 72 | 73 | If you recall the output from `citation("rmarkdown")` above, we might consider manually copying and pasting the output into a citation management software, but instead we can use `write_bib()` function from `knitr` package to create a bibliography file ending in `.bib`. 74 | 75 | Let's run the following code in order to generate a `my-refs.bib` file 76 | 77 | ```{r} 78 | knitr::write_bib("rmarkdown", file = "my-refs.bib") 79 | ``` 80 | 81 | Now we can see we have the file saved locally. 82 | 83 | ```{r} 84 | list.files() 85 | ``` 86 | 87 | If you open up the `my-refs.bib` file, you will see 88 | 89 | @Manual{R-rmarkdown, 90 | title = {rmarkdown: Dynamic Documents for R}, 91 | author = {JJ Allaire and Yihui Xie and Jonathan McPherson and Javier Luraschi and Kevin Ushey and Aron Atkins and Hadley Wickham and Joe Cheng and Winston Chang and Richard Iannone}, 92 | year = {2021}, 93 | note = {R package version 2.8}, 94 | url = {https://CRAN.R-project.org/package=rmarkdown}, 95 | } 96 | 97 | @Book{rmarkdown2018, 98 | title = {R Markdown: The Definitive Guide}, 99 | author = {Yihui Xie and J.J. Allaire and Garrett Grolemund}, 100 | publisher = {Chapman and Hall/CRC}, 101 | address = {Boca Raton, Florida}, 102 | year = {2018}, 103 | note = {ISBN 9781138359338}, 104 | url = {https://bookdown.org/yihui/rmarkdown}, 105 | } 106 | 107 | @Book{rmarkdown2020, 108 | title = {R Markdown Cookbook}, 109 | author = {Yihui Xie and Christophe Dervieux and Emily Riederer}, 110 | publisher = {Chapman and Hall/CRC}, 111 | address = {Boca Raton, Florida}, 112 | year = {2020}, 113 | note = {ISBN 9780367563837}, 114 | url = {https://bookdown.org/yihui/rmarkdown-cookbook}, 115 | } 116 | 117 | ::: resources 118 | **Note there are three keys that we will use later on**: 119 | 120 | - `R-rmarkdown` 121 | - `rmarkdown2018` 122 | - `rmarkdown2020` 123 | ::: 124 | 125 | ### Linking `.bib` file with `.rmd` (and `.qmd`) files 126 | 127 | In order to use references within a R Markdown file, you will need to specify the name and a location of a bibliography file using the bibliography metadata field in a YAML metadata section. For example: 128 | 129 | ``` yaml 130 | --- 131 | title: "My top ten favorite R packages" 132 | output: html_document 133 | bibliography: my-refs.bib 134 | --- 135 | ``` 136 | 137 | You can include multiple reference files using the following syntax, alternatively you can concatenate two bib files into one. 138 | 139 | ``` yaml 140 | --- 141 | bibliography: ["my-refs1.bib", "my-refs2.bib"] 142 | --- 143 | ``` 144 | 145 | ### Inline citation 146 | 147 | Now we can start using those bib keys that we have learned just before, using the following syntax 148 | 149 | - `[@key]` for single citation 150 | - `[@key1; @key2]` multiple citation can be separated by semi-colon 151 | - `[-@key]` in order to suppress author name, and just display the year 152 | - `[see @key1 p 12; also this ref @key2]` is also a valid syntax 153 | 154 | Let's start by citing the `rmarkdown` package using the following code and press `Knit` button: 155 | 156 | ------------------------------------------------------------------------ 157 | 158 | I have been using the amazing Rmarkdown package [@R-rmarkdown]! I should also go and read [@rmarkdown2018; and @rmarkdown2020] books. 159 | 160 | ------------------------------------------------------------------------ 161 | 162 | Pretty cool, eh?? 163 | 164 | ### Citation styles 165 | 166 | By default, Pandoc will use a Chicago author-date format for citations and references. 167 | 168 | To use another style, you will need to specify a CSL (Citation Style Language) file in the `csl` metadata field, e.g., 169 | 170 | ``` yaml 171 | --- 172 | title: "My top ten favorite R packages" 173 | output: html_document 174 | bibliography: my-refs.bib 175 | csl: biomed-central.csl 176 | --- 177 | ``` 178 | 179 | ::: resources 180 | To find your required formats, we recommend using the [Zotero Style Repository](https://www.zotero.org/styles), which makes it easy to search for and download your desired style. 181 | ::: 182 | 183 | CSL files can be tweaked to meet custom formatting requirements. For example, we can change the number of authors required before "et al." is used to abbreviate them. This can be simplified through the use of visual editors such as the one available at https://editor.citationstyles.org. 184 | 185 | ### Other cool features 186 | 187 | #### Add an item to a bibliography without using it 188 | 189 | By default, the bibliography will only display items that are directly referenced in the document. If you want to include items in the bibliography without actually citing them in the body text, you can define a dummy nocite metadata field and put the citations there. 190 | 191 | ``` yaml 192 | --- 193 | nocite: | 194 | @item1, @item2 195 | --- 196 | ``` 197 | 198 | #### Add all items to the bibliography 199 | 200 | If we do not wish to explicitly state all of the items within the bibliography but would still like to show them in our references, we can use the following syntax: 201 | 202 | ``` yaml 203 | --- 204 | nocite: '@*' 205 | --- 206 | ``` 207 | 208 | This will force all items to be displayed in the bibliography. 209 | 210 | ::: resources 211 | You can also have an appendix appear after bibliography. For more on this, see: 212 | 213 | - 214 | ::: 215 | 216 | # Other useful tips 217 | 218 | We have learned that inside your file that contains all your references (e.g. `my-refs.bib`), typically each reference gets a key, which is a shorthand that is generated by the reference manager or you can create yourself. 219 | 220 | For instance, I use a format of lower-case first author last name followed by 4 digit year for each reference followed by a keyword (e.g name of a software package). Alternatively, you can omit the keyword. But note that if I cite a paper by the same first author that was published in the same year, then a lower case letter is added to the end. For instance, for a paper that I wrote as 1st author in 2010, my bibtex key might be `hicks2022` or `hicks2022a`. You can decide what scheme to use, just pick one and use it *forever*. 221 | 222 | In your R Markdown document, you can then cite the reference by adding the key, such as `...in the paper by Hicks et al. [@hicks2022]...`. 223 | 224 | # Post-lecture materials 225 | 226 | ### Practice 227 | 228 | Here are some post-lecture tasks to practice some of the material discussed. 229 | 230 | ::: callout-note 231 | ### Questions 232 | 233 | **Try out the following:** 234 | 235 | 1. What do you notice that's different when you run `citation("tidyverse")` (compared to `citation("rmarkdown")`)? 236 | 237 | 2. Install the following packages: 238 | 239 | ```{r} 240 | #| eval: false 241 | install.packages(c("bibtex", "RefManageR") 242 | ``` 243 | 244 | What do they do? How might they be helpful to you in terms of reference management? 245 | 246 | 3. Instead of using a `.bib` file, try using a different bibliography file format in an R Markdown document. 247 | 248 | 4. Practice using a different CSL file to change the citation style. 249 | ::: 250 | 251 | ### Additional Resources 252 | 253 | ::: callout-tip 254 | - Add here. 255 | ::: 256 | 257 | ## rtistry 258 | 259 | ```{r} 260 | #| label: flametree 261 | #| echo: false 262 | #| fig-cap-location: "top" 263 | 264 | ``` 265 | 266 | \[Add here.\] 267 | -------------------------------------------------------------------------------- /posts/2022-09-01-reference-management/my-refs.bib: -------------------------------------------------------------------------------- 1 | @Manual{R-rmarkdown, 2 | title = {rmarkdown: Dynamic Documents for R}, 3 | author = {JJ Allaire and Yihui Xie and Jonathan McPherson and Javier Luraschi and Kevin Ushey and Aron Atkins and Hadley Wickham and Joe Cheng and Winston Chang and Richard Iannone}, 4 | year = {2022}, 5 | note = {R package version 2.15}, 6 | url = {https://CRAN.R-project.org/package=rmarkdown}, 7 | } 8 | 9 | @Book{rmarkdown2018, 10 | title = {R Markdown: The Definitive Guide}, 11 | author = {Yihui Xie and J.J. Allaire and Garrett Grolemund}, 12 | publisher = {Chapman and Hall/CRC}, 13 | address = {Boca Raton, Florida}, 14 | year = {2018}, 15 | note = {ISBN 9781138359338}, 16 | url = {https://bookdown.org/yihui/rmarkdown}, 17 | } 18 | 19 | @Book{rmarkdown2020, 20 | title = {R Markdown Cookbook}, 21 | author = {Yihui Xie and Christophe Dervieux and Emily Riederer}, 22 | publisher = {Chapman and Hall/CRC}, 23 | address = {Boca Raton, Florida}, 24 | year = {2020}, 25 | note = {ISBN 9780367563837}, 26 | url = {https://bookdown.org/yihui/rmarkdown-cookbook}, 27 | } 28 | 29 | -------------------------------------------------------------------------------- /posts/2022-09-08-joining-data-in-r/index.qmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Joining data in R" 3 | author: 4 | - name: Stephanie Hicks 5 | url: https://stephaniehicks.com 6 | affiliation: Department of Biostatistics, Johns Hopkins 7 | affiliation_url: https://publichealth.jhu.edu 8 | description: "Introduction to relational data and join functions in the dplyr R package" 9 | date: 2022-09-08 10 | categories: [module 2, week 2, R, programming, dplyr, here, tidyverse] 11 | --- 12 | 13 | 14 | 15 | # Pre-lecture materials 16 | 17 | ### Read ahead 18 | 19 | ::: callout-note 20 | ## Read ahead 21 | 22 | **Before class, you can prepare by reading the following materials:** 23 | 24 | 1. 25 | 2. 26 | ::: 27 | 28 | ### Acknowledgements 29 | 30 | Material for this lecture was borrowed and adopted from 31 | 32 | - 33 | - 34 | - 35 | 36 | # Learning objectives 37 | 38 | ::: callout-note 39 | # Learning objectives 40 | 41 | **At the end of this lesson you will:** 42 | 43 | - Be able to define relational data and keys 44 | - Be able to define the three types of join functions for relational data 45 | - Be able to implement mutational join functions 46 | ::: 47 | 48 | # Relational data 49 | 50 | Data analyses rarely involve only a single table of data. 51 | 52 | Typically you have many tables of data, and you **must combine the datasets** to answer the questions that you are interested in. 53 | 54 | Collectively, **multiple tables of data are called relational data** because it is the *relations*, not just the individual datasets, that are important. 55 | 56 | Relations are **always defined between a pair of tables**. All other relations are built up from this simple idea: the relations of three or more tables are always a property of the relations between each pair. 57 | 58 | Sometimes both elements of a pair can be the same table! This is needed if, for example, you have a table of people, and each person has a reference to their parents. 59 | 60 | To work with relational data you **need verbs that work with pairs of tables**. 61 | 62 | ::: callout-tip 63 | ### Three important families of verbs 64 | 65 | There are three families of verbs designed to work with relational data: 66 | 67 | - [**Mutating joins**](https://r4ds.had.co.nz/relational-data.html#mutating-joins): A mutating join allows you to **combine variables from two tables**. It first matches observations by their keys, then copies across variables from one table to the other on the right side of the table (similar to `mutate()`). We will discuss a few of these below. 68 | - See @sec-mutjoins for Table of mutating joins. 69 | - [**Filtering joins**](https://r4ds.had.co.nz/relational-data.html#filtering-joins): Filtering joins **match observations** in the same way as mutating joins, **but affect the observations, not the variables** (i.e. filter observations from one data frame based on whether or not they match an observation in the other). 70 | - Two types: `semi_join(x, y)` and `anti_join(x, y)`. 71 | - [**Set operations**](https://r4ds.had.co.nz/relational-data.html#set-operations): Treat **observations as if they were set elements**. Typically used less frequently, but occasionally useful when you want to break a single complex filter into simpler pieces. All these operations work with a complete row, comparing the values of every variable. These expect the x and y inputs to have the same variables, and treat the observations like sets: 72 | - Examples of set operations: `intersect(x, y)`, `union(x, y)`, and `setdiff(x, y)`. 73 | ::: 74 | 75 | ## Keys 76 | 77 | The **variables used to connect each pair of tables** are called **keys**. A key is a variable (or set of variables) that uniquely identifies an observation. In simple cases, a single variable is sufficient to identify an observation. 78 | 79 | ::: callout-tip 80 | ### Note 81 | 82 | There are two types of keys: 83 | 84 | - A **primary key** uniquely identifies an observation in its own table. 85 | - A **foreign key** uniquely identifies an observation in another table. 86 | ::: 87 | 88 | Let's consider an example to help us understand the difference between a **primary key** and **foreign key**. 89 | 90 | ## Example of keys 91 | 92 | Imagine you are conduct a study and **collecting data on subjects and a health outcome**. 93 | 94 | Often, subjects will **make multiple visits** (a so-called longitudinal study) and so we will record the outcome for each visit. Similarly, we may record other information about them, such as the kind of housing they live in. 95 | 96 | ### The first table 97 | 98 | This code creates a simple table with some made up data about some hypothetical subjects' outcomes. 99 | 100 | ```{r,message=FALSE} 101 | library(tidyverse) 102 | 103 | outcomes <- tibble( 104 | id = rep(c("a", "b", "c"), each = 3), 105 | visit = rep(0:2, 3), 106 | outcome = rnorm(3 * 3, 3) 107 | ) 108 | 109 | print(outcomes) 110 | ``` 111 | 112 | Note that subjects are labeled by a unique identifer in the `id` column. 113 | 114 | ### A second table 115 | 116 | Here is some code to create a second table (we will be joining the first and second tables shortly). This table contains some data about the hypothetical subjects' housing situation by recording the type of house they live in. 117 | 118 | ```{r second_table,exercise=TRUE,message=FALSE} 119 | subjects <- tibble( 120 | id = c("a", "b", "c"), 121 | house = c("detached", "rowhouse", "rowhouse") 122 | ) 123 | 124 | print(subjects) 125 | ``` 126 | 127 | ::: callout-note 128 | ### Question 129 | 130 | What is the **primary key** and **foreign key**? 131 | 132 | - The `outcomes$id` is a **primary key** because it uniquely identifies each subject in the `outcomes` table. 133 | - The `subjects$id` is a **foreign key** because it appears in the `subjects` table where it matches each subject to a unique `id`. 134 | ::: 135 | 136 | # Mutating joins {#sec-mutjoins} 137 | 138 | The `dplyr` package provides a set of **functions for joining two data frames** into a single data frame based on a set of key columns. 139 | 140 | There are several functions in the `*_join()` family. 141 | 142 | - These functions all merge together two data frames 143 | - They differ in how they handle observations that exist in one but not both data frames. 144 | 145 | Here, are the **four functions from this family** that you will likely use the most often: 146 | 147 | ```{r} 148 | #| echo: false 149 | #| out-width: '60%' 150 | #| fig-align: 'center' 151 | library(knitr) 152 | join_funcs <- data.frame(func = c("`left_join()`", 153 | "`right_join()`", 154 | "`inner_join()`", 155 | "`full_join()`"), 156 | does = c("Includes all observations in the left data frame, whether or not there is a match in the right data frame", 157 | "Includes all observations in the right data frame, whether or not there is a match in the left data frame", 158 | "Includes only observations that are in both data frames", 159 | "Includes all observations from both data frames")) 160 | knitr::kable(join_funcs, col.names = c("Function", "What it includes in merged data frame")) 161 | ``` 162 | 163 | ![](https://d33wubrfki0l68.cloudfront.net/aeab386461820b029b7e7606ccff1286f623bae1/ef0d4/diagrams/join-venn.png) 164 | 165 | \[[Source from R for Data Science](https://r4ds.had.co.nz/relational-data#relational-data)\] 166 | 167 | ## Left Join 168 | 169 | Recall the `outcomes` and `subjects` datasets above. 170 | 171 | ```{r} 172 | outcomes 173 | subjects 174 | ``` 175 | 176 | Suppose we want to create a table that combines the information about houses (`subjects`) with the information about the outcomes (`outcomes`). 177 | 178 | We can use the `left_join()` function to merge the `outcomes` and `subjects` tables and produce the output above. 179 | 180 | ```{r leftjoin} 181 | left_join(x = outcomes, y = subjects, by = "id") 182 | ``` 183 | 184 | ::: callout-tip 185 | ### Note 186 | 187 | The `by` argument indicates the column (or columns) that the two tables have in common. 188 | ::: 189 | 190 | ### Left Join with Incomplete Data 191 | 192 | In the previous examples, the `subjects` table didn't have a `visit` column. But suppose it did? Maybe people move around during the study. We could image a table like this one. 193 | 194 | ```{r} 195 | subjects <- tibble( 196 | id = c("a", "b", "c"), 197 | visit = c(0, 1, 0), 198 | house = c("detached", "rowhouse", "rowhouse"), 199 | ) 200 | 201 | print(subjects) 202 | ``` 203 | 204 | When we left joint the tables now we get: 205 | 206 | ```{r} 207 | left_join(outcomes, subjects, by = c("id", "visit")) 208 | ``` 209 | 210 | ::: callout-tip 211 | ### Note 212 | 213 | Two things to point out here: 214 | 215 | 1. If we do not have information about a subject's housing in a given visit, the `left_join()` function automatically inserts an `NA` value to indicate that it is missing. 216 | 217 | 2. We can "join" on multiple variable (e.g. here we joined on the `id` and the `visit` columns). 218 | ::: 219 | 220 | We may even have a situation where we are missing housing data for a subject completely. The following table has no information about subject `a`. 221 | 222 | ```{r} 223 | subjects <- tibble( 224 | id = c("b", "c"), 225 | visit = c(1, 0), 226 | house = c("rowhouse", "rowhouse"), 227 | ) 228 | 229 | subjects 230 | ``` 231 | 232 | But we can still join the tables together and the `house` values for subject `a` will all be `NA`. 233 | 234 | ```{r} 235 | left_join(x = outcomes, y = subjects, by = c("id", "visit")) 236 | ``` 237 | 238 | ::: callout-tip 239 | ### Important 240 | 241 | The bottom line for `left_join()` is that it **always retains the values in the "left" argument** (in this case the `outcomes` table). 242 | 243 | - If there are no corresponding values in the "right" argument, `NA` values will be filled in. 244 | ::: 245 | 246 | ## Inner Join 247 | 248 | The `inner_join()` function only **retains the rows of both tables** that have corresponding values. Here we can see the difference. 249 | 250 | ```{r} 251 | inner_join(x = outcomes, y = subjects, by = c("id", "visit")) 252 | ``` 253 | 254 | ## Right Join 255 | 256 | The `right_join()` function is like the `left_join()` function except that it **gives priority to the "right" hand argument**. 257 | 258 | ```{r} 259 | right_join(x = outcomes, y = subjects, by = c("id", "visit")) 260 | ``` 261 | 262 | # Summary 263 | 264 | - `left_join()` is useful for merging a "large" data frame with a "smaller" one while retaining all the rows of the "large" data frame 265 | 266 | - `inner_join()` gives you the intersection of the rows between two data frames 267 | 268 | - `right_join()` is like `left_join()` with the arguments reversed (likely only useful at the end of a pipeline) 269 | 270 | # Post-lecture materials 271 | 272 | ### Final Questions 273 | 274 | Here are some post-lecture questions to help you think about the material discussed. 275 | 276 | ::: callout-note 277 | ### Questions 278 | 279 | 1. If you had three data frames to combine with a shared key, how would you join them using the verbs you now know? 280 | 281 | 2. Using `df1` and `df2` below, what is the difference between `inner_join(df1, df2)`, `semi_join(df1, df2)` and `anti_join(df1, df2)`? 282 | 283 | ```{r} 284 | # Create first example data frame 285 | df1 <- data.frame(ID = 1:3, 286 | X1 = c("a1", "a2", "a3")) 287 | # Create second example data frame 288 | df2 <- data.frame(ID = 2:4, 289 | X2 = c("b1", "b2", "b3")) 290 | ``` 291 | 292 | 3. Try changing the order from the above e.g. `inner_join(df2, df1)`, `semi_join(df2, df1)` and `anti_join(df2, df1)`. What changed? What did not change? 293 | ::: 294 | 295 | ### Additional Resources 296 | 297 | ::: callout-tip 298 | - 299 | - 300 | - 301 | ::: 302 | -------------------------------------------------------------------------------- /posts/2022-09-08-tidy-data-and-the-tidyverse/index.qmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Tidy data and the Tidyverse" 3 | author: 4 | - name: Stephanie Hicks 5 | url: https://stephaniehicks.com 6 | affiliation: Department of Biostatistics, Johns Hopkins 7 | affiliation_url: https://publichealth.jhu.edu 8 | description: "Introduction to tidy data and how to convert between wide and long data with the tidyr R package" 9 | date: 2022-09-08 10 | categories: [module 2, week 2, R, programming, tidyr, here, tidyverse] 11 | --- 12 | 13 | 14 | 15 | > "Happy families are all alike; every unhappy family is unhappy in its own way." ---- Leo Tolstoy 16 | 17 | > "Tidy datasets are all alike, but every messy dataset is messy in its own way." ---- Hadley Wickham 18 | 19 | # Pre-lecture materials 20 | 21 | ### Read ahead 22 | 23 | ::: callout-note 24 | ## Read ahead 25 | 26 | **Before class, you can prepare by reading the following materials:** 27 | 28 | 1. [Tidy Data](https://www.jstatsoft.org/article/view/v059i10) paper published in the Journal of Statistical Software 29 | 2. 30 | 3. [tidyr cheat sheet from RStudio](http://www.rstudio.com/wp-content/uploads/2015/02/data-wrangling-cheatsheet.pdf) 31 | ::: 32 | 33 | ### Acknowledgements 34 | 35 | Material for this lecture was borrowed and adopted from 36 | 37 | - 38 | - 39 | 40 | # Learning objectives 41 | 42 | ::: callout-note 43 | # Learning objectives 44 | 45 | **At the end of this lesson you will:** 46 | 47 | - Define tidy data 48 | - Be able to transform non-tidy data into tidy data 49 | - Be able to transform wide data into long data 50 | - Be able to separate character columns into multiple columns 51 | - Be able to unite multiple character columns into one column 52 | ::: 53 | 54 | # Tidy data 55 | 56 | As we learned in the last lesson, one unifying concept of the tidyverse is the notion of **tidy data**. 57 | 58 | As defined by Hadley Wickham in his 2014 paper published in the *Journal of Statistical Software*, a [tidy dataset](https://www.jstatsoft.org/article/view/v059i10) has the following properties: 59 | 60 | 1. Each variable forms a column. 61 | 62 | 2. Each observation forms a row. 63 | 64 | 3. Each type of observational unit forms a table. 65 | 66 | ![Artwork by Allison Horst on tidy data](https://github.com/allisonhorst/stats-illustrations/raw/main/rstats-artwork/tidydata_1.jpg){width="80%"} 67 | 68 | \[**Source**: [Artwork by Allison Horst](https://github.com/allisonhorst/stats-illustrations)\] 69 | 70 | The **purpose of defining tidy data** is to highlight the fact that **most data do not start out life as tidy**. 71 | 72 | In fact, much of the work of data analysis may involve simply making the data tidy (at least this has been our experience). 73 | 74 | - Once a dataset is tidy, it **can be used as input into a variety of other functions** that may transform, model, or visualize the data. 75 | 76 | ::: callout-tip 77 | ### Example 78 | 79 | As a quick example, consider the following data illustrating **religion and income survey data** with the number of respondents with income range in column name. 80 | 81 | This is in a classic table format: 82 | 83 | ```{r} 84 | library(tidyr) 85 | relig_income 86 | ``` 87 | ::: 88 | 89 | While this format is canonical and is useful for quickly observing the relationship between multiple variables, it is not tidy. 90 | 91 | **This format violates the tidy form** because there are variables in the columns. 92 | 93 | - In this case the variables are religion, income bracket, and the number of respondents, which is the third variable, is presented inside the table. 94 | 95 | Converting this data to tidy format would give us 96 | 97 | ```{r, message=FALSE} 98 | library(tidyverse) 99 | 100 | relig_income %>% 101 | pivot_longer(-religion, names_to = "income", values_to = "respondents") %>% 102 | mutate(religion = factor(religion), income = factor(income)) 103 | ``` 104 | 105 | Some of these functions you have seen before, others might be new to you. Let's talk about each one in the context of the `tidyverse` R packages. 106 | 107 | # The "Tidyverse" 108 | 109 | There are a number of R packages that take advantage of the tidy data form and can be used to do interesting things with data. Many (but not all) of these packages are written by Hadley Wickham and **the collection of packages is often referred to as the "tidyverse"** because of their **dependence on and presumption of tidy data**. 110 | 111 | ::: callout-tip 112 | ### Note 113 | 114 | A subset of the "Tidyverse" packages include: 115 | 116 | - [ggplot2](https://cran.r-project.org/package=ggplot2): a plotting system based on the grammar of graphics 117 | 118 | - [magrittr](https://cran.r-project.org/package=magrittr%22): defines the `%>%` operator for chaining functions together in a series of operations on data 119 | 120 | - [dplyr](https://cran.r-project.org/package=dplyr): a suite of (fast) functions for working with data frames 121 | 122 | - [tidyr](https://cran.r-project.org/package=tidyr): easily tidy data with `pivot_wider()` and `pivot_longer()` functions (also `separate()` and `unite()`) 123 | 124 | A complete list can be found here (). 125 | ::: 126 | 127 | We will be using these packages quite a bit. 128 | 129 | The "tidyverse" package can be used to install all of the packages in the tidyverse at once. 130 | 131 | For example, instead of starting an R script with this: 132 | 133 | ```{r, eval = FALSE} 134 | library(dplyr) 135 | library(tidyr) 136 | library(readr) 137 | library(ggplot2) 138 | ``` 139 | 140 | You can start with this: 141 | 142 | ```{r, eval = FALSE} 143 | library(tidyverse) 144 | ``` 145 | 146 | In the example above, let's talk about what we did using the `pivot_longer()` function. 147 | 148 | We will also talk about `pivot_wider()`. 149 | 150 | ### `pivot_longer()` 151 | 152 | The `tidyr` package includes functions to transfer a data frame between *long* and *wide*. 153 | 154 | - **Wide format** data tends to have different attributes or variables describing an observation placed in separate columns. 155 | - **Long format** data tends to have different attributes encoded as levels of a single variable, followed by another column that contains tha values of the observation at those different levels. 156 | 157 | ::: callout-tip 158 | ### Example 159 | 160 | In the section above, we showed an example that used `pivot_longer()` to convert data into a tidy format. 161 | 162 | The **key problem** with the tidyness of the data is that the income variables are not in their own columns, but rather are embedded in the structure of the columns. 163 | 164 | To **fix this**, you can use the `pivot_longer()` function to **gather values spread across several columns into a single column**, here with the column names gathered into an `income` column. 165 | 166 | **Note**: when gathering, exclude any columns that you do not want "gathered" (`religion` in this case) by including the column names with a the minus sign in the `pivot_longer()` function. 167 | 168 | For example: 169 | 170 | ```{r} 171 | # Gather everything EXCEPT religion to tidy data 172 | relig_income %>% 173 | pivot_longer(-religion, names_to = "income", values_to = "respondents") 174 | ``` 175 | ::: 176 | 177 | Even if your data is in a tidy format, `pivot_longer()` is occasionally useful for pulling data together to take advantage of faceting, or plotting separate plots based on a grouping variable. We will talk more about that in a future lecture. 178 | 179 | ### `pivot_wider()` 180 | 181 | The `pivot_wider()` function is less commonly needed to tidy data. It can, however, be useful for creating summary tables. 182 | 183 | ::: callout-tip 184 | ### Example 185 | 186 | You use the `summarize()` function in `dplyr` to summarize the total number of respondents per income category. 187 | 188 | ```{r} 189 | relig_income %>% 190 | pivot_longer(-religion, names_to = "income", values_to = "respondents") %>% 191 | mutate(religion = factor(religion), income = factor(income)) %>% 192 | group_by(income) %>% 193 | summarize(total_respondents = sum(respondents)) %>% 194 | pivot_wider(names_from = "income", 195 | values_from = "total_respondents") %>% 196 | knitr::kable() 197 | ``` 198 | ::: 199 | 200 | Notice in this example how `pivot_wider()` has been used at the **very end of the code sequence** to convert the summarized data into a shape that **offers a better tabular presentation for a report**. 201 | 202 | ::: callout-tip 203 | ### Note 204 | 205 | In the `pivot_wider()` call, you first specify the name of the column to use for the new column names (`income` in this example) and then specify the column to use for the cell values (`total_respondents` here). 206 | ::: 207 | 208 | ::: callout-tip 209 | ### Example of `pivot_longer()` 210 | 211 | Let's try another dataset. This data contain an excerpt of the [Gapminder data](https://cran.r-project.org/web/packages/gapminder/README.html#gapminder) on life expectancy, GDP per capita, and population by country. 212 | 213 | ```{r} 214 | library(gapminder) 215 | gapminder 216 | ``` 217 | 218 | If we wanted to make `lifeExp`, `pop` and `gdpPercap` (all measurements that we observe) go from a wide table into a long table, what would we do? 219 | 220 | ```{r} 221 | # try it yourself 222 | 223 | ``` 224 | ::: 225 | 226 | ::: callout-tip 227 | ### Example 228 | 229 | One more! Try using `pivot_longer()` to convert the the following data that contains made-up revenues for three companies by quarter for years 2006 to 2009. 230 | 231 | Afterward, use `group_by()` and `summarize()` to calculate the average revenue for each company across all years and all quarters. 232 | 233 | **Bonus**: Calculate a mean revenue for each company AND each year (averaged across all 4 quarters). 234 | 235 | ```{r, set.seed(123)} 236 | df <- tibble( 237 | "company" = rep(1:3, each=4), 238 | "year" = rep(2006:2009, 3), 239 | "Q1" = sample(x = 0:100, size = 12), 240 | "Q2" = sample(x = 0:100, size = 12), 241 | "Q3" = sample(x = 0:100, size = 12), 242 | "Q4" = sample(x = 0:100, size = 12), 243 | ) 244 | df 245 | ``` 246 | 247 | ```{r} 248 | # try it yourself 249 | 250 | ``` 251 | ::: 252 | 253 | ### `separate()` and `unite()` 254 | 255 | The same `tidyr` package also contains two useful functions: 256 | 257 | - `unite()`: combine contents of two or more columns into a single column 258 | - `separate()`: separate contents of a column into two or more columns 259 | 260 | First, we combine the first three columns into one new column using `unite()`. 261 | 262 | ```{r} 263 | gapminder %>% 264 | unite(col="country_continent_year", 265 | country:year, 266 | sep="_") 267 | ``` 268 | 269 | Next, we show how to separate the columns into three separate columns using `separate()` using the `col`, `into` and `sep` arguments. 270 | 271 | ```{r} 272 | gapminder %>% 273 | unite(col="country_continent_year", 274 | country:year, 275 | sep="_") %>% 276 | separate(col="country_continent_year", 277 | into=c("country", "continent", "year"), 278 | sep="_") 279 | ``` 280 | 281 | # Post-lecture materials 282 | 283 | ### Final Questions 284 | 285 | Here are some post-lecture questions to help you think about the material discussed. 286 | 287 | ::: callout-note 288 | ### Questions 289 | 290 | 1. Using prose, describe how the variables and observations are organised in a tidy dataset versus an non-tidy dataset. 291 | 292 | 2. What do the extra and fill arguments do in `separate()`? Experiment with the various options for the following two toy datasets. 293 | 294 | ```{r, eval=FALSE} 295 | tibble(x = c("a,b,c", "d,e,f,g", "h,i,j")) %>% 296 | separate(x, c("one", "two", "three")) 297 | 298 | tibble(x = c("a,b,c", "d,e", "f,g,i")) %>% 299 | separate(x, c("one", "two", "three")) 300 | ``` 301 | 302 | 3. Both `unite()` and `separate()` have a remove argument. What does it do? Why would you set it to FALSE? 303 | 304 | 4. Compare and contrast `separate()` and `extract()`. Why are there three variations of separation (by position, by separator, and with groups), but only one `unite()`? 305 | ::: 306 | 307 | ### Additional Resources 308 | 309 | ::: callout-tip 310 | - [Tidy Data](https://www.jstatsoft.org/article/view/v059i10) paper published in the Journal of Statistical Software 311 | - https://r4ds.had.co.nz/tidy-data.html 312 | - [tidyr cheat sheet from RStudio](http://www.rstudio.com/wp-content/uploads/2015/02/data-wrangling-cheatsheet.pdf) 313 | ::: 314 | -------------------------------------------------------------------------------- /posts/2022-09-13-plotting-systems/index.qmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Plotting Systems" 3 | author: 4 | - name: Stephanie Hicks 5 | url: https://stephaniehicks.com 6 | affiliation: Department of Biostatistics, Johns Hopkins 7 | affiliation_url: https://publichealth.jhu.edu 8 | description: "Overview of three plotting systems in R" 9 | date: 2022-09-13 10 | categories: [module 3, week 3, R, programming, ggplot2, data viz] 11 | --- 12 | 13 | > The data may not contain the answer. And, if you torture the data long enough, it will tell you anything. ---*John W. Tukey* 14 | 15 | # Pre-lecture materials 16 | 17 | ### Read ahead 18 | 19 | ::: callout-note 20 | ## Read ahead 21 | 22 | **Before class, you can prepare by reading the following materials:** 23 | 24 | 1. 25 | 2. Paul Murrell (2011). *R Graphics*, CRC Press. 26 | 3. Hadley Wickham (2009). *ggplot2*, Springer. 27 | 4. Deepayan Sarkar (2008). *Lattice: Multivariate Data Visualization with R*, Springer. 28 | ::: 29 | 30 | ### Acknowledgements 31 | 32 | Material for this lecture was borrowed and adopted from 33 | 34 | - 35 | 36 | # Learning objectives 37 | 38 | ::: callout-note 39 | # Learning objectives 40 | 41 | **At the end of this lesson you will:** 42 | 43 | - Be able to identify and describe the three plotting systems in R 44 | ::: 45 | 46 | # Plotting Systems 47 | 48 | There are **three different plotting systems in R** and they each have different characteristics and modes of operation. 49 | 50 | ::: callout-tip 51 | ### Important 52 | 53 | The three systems are 54 | 55 | 1. The base plotting system 56 | 2. The lattice system 57 | 3. The ggplot2 system 58 | 59 | **This course will focus primarily on the ggplot2 plotting system**. The other two systems are presented for context. 60 | ::: 61 | 62 | ## The Base Plotting System 63 | 64 | The **base plotting system** is the original plotting system for R. The basic model is sometimes **referred to as the "artist's palette" model**. 65 | 66 | The idea is you start with blank canvas and build up from there. 67 | 68 | In more R-specific terms, you **typically start with `plot()` function** (or similar plot creating function) to *initiate* a plot and then *annotate* the plot with various annotation functions (`text`, `lines`, `points`, `axis`) 69 | 70 | The base plotting system is **often the most convenient plotting system** to use because it mirrors how we sometimes think of building plots and analyzing data. 71 | 72 | If we do not have a completely well-formed idea of how we want to look at some data, often we will start by "throwing some data on the page" and then slowly add more information to it as our thought process evolves. 73 | 74 | ::: callout-tip 75 | ### Example 76 | 77 | We might look at a simple scatterplot and then decide to add a linear regression line or a smoother to it to highlight the trends. 78 | 79 | ```{r} 80 | #| fig-width: 5 81 | #| fig-height: 5 82 | #| fig-cap: "Scatterplot with loess curve" 83 | data(airquality) 84 | with(airquality, { 85 | plot(Temp, Ozone) 86 | lines(loess.smooth(Temp, Ozone)) 87 | }) 88 | ``` 89 | ::: 90 | 91 | In the code above: 92 | 93 | - The `plot()` function creates the initial plot and draws the points (circles) on the canvas. 94 | - The `lines` function is used to annotate or add to the plot (in this case it adds a loess smoother to the scatterplot). 95 | 96 | Next, we use the `plot()` function to draw the points on the scatterplot and then use the `main` argument to add a main title to the plot. 97 | 98 | ```{r} 99 | #| fig-width: 5 100 | #| fig-height: 5 101 | #| fig-cap: "Scatterplot with loess curve" 102 | data(airquality) 103 | with(airquality, { 104 | plot(Temp, Ozone, main = "my plot") 105 | lines(loess.smooth(Temp, Ozone)) 106 | }) 107 | ``` 108 | 109 | ::: callout-tip 110 | ### Note 111 | 112 | One downside with constructing base plots is that you **cannot go backwards once the plot has started**. 113 | 114 | It is possible that you could start down the road of constructing a plot and realize later (when it is too late) that you do not have enough room to add a y-axis label or something like that 115 | ::: 116 | 117 | If you have specific plot in mind, there is then a need to **plan in advance** to make sure, for example, that you have set your margins to be the right size to fit all of the annotations that you may want to include. 118 | 119 | While the base plotting system is nice in that it gives you the flexibility to specify these kinds of details to painstaking accuracy, **sometimes it would be nice if the system could just figure it out for you**. 120 | 121 | ::: callout-tip 122 | ### Note 123 | 124 | Another downside of the base plotting system is that it is **difficult to describe or translate a plot to others because there is no clear graphical language or grammar** that can be used to communicate what you have done. 125 | 126 | The only real way to describe what you have done in a base plot is to just list the series of commands/functions that you have executed, which is not a particularly compact way of communicating things. 127 | 128 | This is one problem that the `ggplot2` package attempts to address. 129 | ::: 130 | 131 | ::: callout-tip 132 | ### Example 133 | 134 | Another typical base plot is constructed with the following code. 135 | 136 | ```{r} 137 | #| fig-width: 5 138 | #| fig-height: 5 139 | #| fig-cap: "Base plot with title" 140 | data(cars) 141 | 142 | ## Create the plot / draw canvas 143 | with(cars, plot(speed, dist)) 144 | 145 | ## Add annotation 146 | title("Speed vs. Stopping distance") 147 | ``` 148 | ::: 149 | 150 | We will go into more detail on what these functions do in later lessons. 151 | 152 | ## The Lattice System 153 | 154 | The **lattice plotting system** is implemented in the `lattice` R package which comes with every installation of R (although it is not loaded by default). 155 | 156 | To **use the lattice plotting functions**, you must first load the `lattice` package with the `library` function. 157 | 158 | ```{r} 159 | library(lattice) 160 | ``` 161 | 162 | With the lattice system, **plots are created with a single function call**, such as `xyplot()` or `bwplot()`. 163 | 164 | There is **no real distinction between functions that create or initiate plots** and **functions that annotate plots** because it all happens at once. 165 | 166 | Lattice plots tend to be **most useful for conditioning types of plots**, i.e. looking at how `y` changes with `x` across levels of `z`. 167 | 168 | - e.g. these types of plots are useful for looking at multi-dimensional data and often allow you to squeeze a lot of information into a single window or page. 169 | 170 | Another aspect of lattice that makes it different from base plotting is that **things like margins and spacing are set automatically**. 171 | 172 | This is possible because entire plot is specified at once via a single function call, so all of the available information needed to figure out the spacing and margins is already there. 173 | 174 | ::: callout-tip 175 | ### Example 176 | 177 | Here is a lattice plot that looks at the relationship between life expectancy and income and how that relationship varies by region in the United States. 178 | 179 | ```{r} 180 | #| fig-width: 8 181 | #| fig-height: 4 182 | #| fig-cap: "Lattice plot" 183 | state <- data.frame(state.x77, region = state.region) 184 | xyplot(Life.Exp ~ Income | region, data = state, layout = c(4, 1)) 185 | ``` 186 | ::: 187 | 188 | You can see that the entire plot was generated by the call to `xyplot()` and all of the data for the plot were stored in the `state` data frame. 189 | 190 | The **plot itself contains four panels**---one for each region---and **within each panel is a scatterplot** of life expectancy and income. 191 | 192 | The notion of *panels* comes up a lot with lattice plots because you typically have many panels in a lattice plot (each panel typically represents a *condition*, like "region"). 193 | 194 | ::: callout-tip 195 | ### Note 196 | 197 | Downsides with the lattice system 198 | 199 | - It can sometimes be very **awkward to specify an entire plot** in a single function call (you end up with functions with many many arguments). 200 | - **Annotation in panels in plots is not especially intuitive** and can be difficult to explain. In particular, the use of custom panel functions and subscripts can be difficult to wield and requires intense preparation. 201 | - Once a plot is created, **you cannot "add" to the plot** (but of course you can just make it again with modifications). 202 | ::: 203 | 204 | ## The ggplot2 System 205 | 206 | The **ggplot2 plotting system** attempts to split the difference between base and lattice in a number of ways. 207 | 208 | ::: callout-tip 209 | ### Note 210 | 211 | Taking cues from lattice, the ggplot2 system automatically deals with spacings, text, titles but also allows you to annotate by "adding" to a plot. 212 | ::: 213 | 214 | The ggplot2 system is implemented in the `ggplot2` package (part of the `tidyverse` package), which is available from CRAN (it does not come with R). 215 | 216 | You can install it from CRAN via 217 | 218 | ```{r} 219 | #| eval: false 220 | install.packages("ggplot2") 221 | ``` 222 | 223 | and then load it into R via the `library()` function. 224 | 225 | ```{r} 226 | library(ggplot2) 227 | ``` 228 | 229 | Superficially, the `ggplot2` functions are similar to `lattice`, but the system is generally easier and more intuitive to use. 230 | 231 | The defaults used in `ggplot2` make many choices for you, but you can still customize plots to your heart's desire. 232 | 233 | ::: callout-tip 234 | ### Example 235 | 236 | A typical plot with the `ggplot2` package looks as follows. 237 | 238 | ```{r} 239 | #| message: false 240 | #| fig-width: 6 241 | #| fig-height: 5 242 | #| fig-cap: "ggplot2 plot" 243 | library(tidyverse) 244 | data(mpg) 245 | mpg %>% 246 | ggplot(aes(displ, hwy)) + 247 | geom_point() 248 | ``` 249 | ::: 250 | 251 | There are additional functions in `ggplot2` that allow you to make arbitrarily sophisticated plots. 252 | 253 | We will discuss more about this in the next lecture. 254 | -------------------------------------------------------------------------------- /posts/_metadata.yml: -------------------------------------------------------------------------------- 1 | # options specified here will apply to all posts in this folder 2 | 3 | # freeze computational output 4 | # (see https://quarto.org/docs/projects/code-execution.html#freeze) 5 | freeze: true 6 | 7 | # Enable banner style title blocks 8 | title-block-banner: true 9 | 10 | # Default for table of contents 11 | toc: true 12 | toc-title: Table of contents 13 | toc-location: left 14 | -------------------------------------------------------------------------------- /profile.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/profile.jpg -------------------------------------------------------------------------------- /projects.qmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Projects" 3 | listing: 4 | - id: projects 5 | contents: "projects" 6 | sort: "date desc" 7 | type: default 8 | categories: true 9 | sort-ui: false 10 | filter-ui: false 11 | page-layout: full 12 | title-block-banner: false 13 | --- 14 | -------------------------------------------------------------------------------- /projects/2022-08-30-project-0/index.qmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Project 0 (optional)" 3 | author: 4 | - name: Stephanie Hicks 5 | url: https://stephaniehicks.com 6 | affiliation: Department of Biostatistics, Johns Hopkins 7 | affiliation_url: https://publichealth.jhu.edu 8 | description: "Information for Project 0 (entirely optional, but hopefully useful and fun!)" 9 | date: 2022-08-30 10 | categories: [project 0, projects] 11 | --- 12 | 13 | # Background 14 | 15 | **Due date: Sept 8 at 1:29pm** 16 | 17 | Using the tools we learned in the first week (e.g. R, RStudio and Github). Let's apply them in a small (but also comprehensive) exercise. 18 | 19 | - **Please note this project is entirely optional** (i.e. it will not be graded), but hopefully it will be helpful to you getting set up for the rest of the course (i.e. set up these tools on your computing environment) and give you an opportunity to introduce yourself to your classmates. 20 | 21 | - **For anyone who completes it, you get a free hex sticker!** If you aren't familiar with the hex stickers, check out [this link](https://github.com/rstudio/hex-stickers). You can add them to your laptop for some character and swag (or [turn them into magnets](https://twitter.com/ClaireMKBowen/status/1294336123414523904)). I have a ton of different ones from the tidyverse or [RLadies Baltimore](https://pbs.twimg.com/profile_images/1236855715018559488/PuYAjTTD_400x400.jpg). You can come pick one up from my office or I can mail it to you if you email me a mailing address after you submit the project. 22 | 23 | - **For those of you who are new to GitHub/R/Rmarkdown**: this project makes you do a lot of things that you might not be familiar with. I know that this might be time-consuming and also might feel a bit intimidating. It's partly unavoidable and partly on purpose. You need to learn how to quickly get up to speed with all kinds of new tools that come your way. So practicing it is a good idea. You are welcome to draw on any sources for help that you want (online, classmates, instructor, etc.). I'm confident with a bit of trial and error you'll get it to work. 24 | 25 | # Part 1 26 | 27 | This part of the project is to ensure that you have successfully set up your computing environment. Please email (use the Subject line: `140.776 Setup`) the Course Instructor (Dr. Stephanie Hicks) at shicks19\@jhu.edu the following information: 28 | 29 | ### Setting up your computing environment 30 | 31 | 1. Your name, JHED ID (if applicable). 32 | 33 | 2. The type of computer/operating system you are using (Windows, Mac, Unix/Linux, other) 34 | 35 | 3. The version of R that you have installed on your computer. To do this, start up R and run the following in the R console and include the output in your email. 36 | 37 | ```{r} 38 | #| label: rv 39 | #| eval: false 40 | #| echo: true 41 | print(R.version.string) 42 | ``` 43 | 44 | ![Printing the R version string](../../videos/versionstring.gif) 45 | 46 | 4. The version of RStudio that you have installed on your computer. To do this start up RStudio and in the R console window, run the following and again include the output in your email: 47 | 48 | ```{r rstudiov, eval = FALSE, echo = TRUE} 49 | print(RStudio.Version()$version) 50 | ``` 51 | 52 | 5. If you have a GitHub username, please include this in your email. If you do not have a GitHub username, read https://happygitwithr.com, sign up for GitHub, and include your new username in your email to me. 53 | 54 | 6. To make sure git is installed on your system, use the 'Terminal' (e.g. it's next to the R Console within RStudio) (or whatever you use), run the following and include the output in your email: 55 | 56 | For example, this is mine: 57 | 58 | ```{bash gv, eval = TRUE, echo = TRUE} 59 | git --version 60 | ``` 61 | 62 | If you have any trouble with any of the steps above, try to first post on the discussion board on CoursePlus. The TAs and I will be checking it frequently, but other students may also be helpful in their replies. You can also use other resources to get help (Google, R4DS, colleagues/friends/relatives with R/Markdown experience, etc.). Still, try to do as much as possible yourself. We will use all the bits you are learning here repeatedly during this course. 63 | 64 | # Part 2 65 | 66 | This part of the project is to help you introduce yourself (and your interests!) to others in this course. You will create a new GitHub repository and build a small website about yourself. 67 | 68 | ### 1. Create a GitHub repo for your website 69 | 70 | Create a new GitHub repository titled `biostat776-intro--` (where you replace `` with your first name and `` with your last name) in your own personal GitHub account (e.g. `https://github.com//biostat776-intro--`). 71 | 72 | For example, you can find an example that I created for myself at 73 | 74 | - github repo: 75 | 76 | ### 2. Build a website using R Markdown 77 | 78 | Using one of the many ways we discussed in class (e.g. a [simple R Markdown website](https://bookdown.org/yihui/rmarkdown/rmarkdown-site.html), [blogdown](https://bookdown.org/yihui/blogdown/), [distill](https://rstudio.github.io/distill/website.html), etc), create a new project in RStudio with the appropriate files. For example, you might include the following information: 79 | 80 | - Write a short summary introducing yourself. Structure the webpage with headings, subheadings, etc. Talk a bit about yourself, your background, training, research interests. Let me/us know what kind of statistics, programming, data analysis experience you already have. I am also curious to know what you most hope to learn in this course. 81 | 82 | - Five fun facts about yourself 83 | 84 | - A web page linking to something you think is really cool/interesting/inspiring/etc. You could also describe briefly what it is and why you like it. 85 | 86 | If you want, feel free to get creative and include other things. You can play with RMarkdown if you wish to, e.g., you can try to include some table or a video, etc. 87 | 88 | ### 3. Include a `README.md` file 89 | 90 | Your project repository should include a `README.md` file (if it was not included already). 91 | 92 | Edit the repository `README.md` file. Typically it will only contain the name of your repository with a `#` sign in front. The `#` represents a level 1 heading in Markdown. Change the headline and call it "Introducing myself" (or something like that). Underneath write something like "This website contains a short introduction of *Your Name*." 93 | 94 | Make sure the 2 files (README.md and especially `index.Rmd` / `index.html`) look the way you want. Make changes until everything works. 95 | 96 | ### 4. Deploy your website 97 | 98 | Depending on how you want to deploy your website, the following may or may not be relevant to you. In general, you want to make sure you have initialized your project to use `git` (i.e. you can type `git init` to initialize the repository to use git. Add and commit your changes. Push your changes and deploy your website. 99 | 100 | Following steps 2-4, here is my example website: 101 | 102 | - website: 103 | 104 | ### 5. Share your website 105 | 106 | - Go to the Discussion Board in CoursePlus and write a short post with a link (URL) to your website (and URL to the corresponding GitHub repository) that you created. 107 | - As you read the introductions from other folks in the class, feel free to comment/reply using Discussion board. 108 | 109 | In class on Sept 8, I will show as many websites as I can from Courseplus! 110 | -------------------------------------------------------------------------------- /projects/2022-09-15-project-2/index.qmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Project 2" 3 | author: 4 | - name: Stephanie Hicks 5 | url: https://stephaniehicks.com 6 | affiliation: Department of Biostatistics, Johns Hopkins 7 | affiliation_url: https://publichealth.jhu.edu 8 | description: "Exploring temperature and rainfall in Australia" 9 | date: 2022-09-15 10 | categories: [project 2, projects] 11 | --- 12 | 13 | # Background 14 | 15 | **Due date: Sept 30 at 11:59pm** 16 | 17 | The goal of this assignment is to practice designing and writing functions along with practicing our tidyverse skills that we learned in our previous project. Writing functions involves thinking about how code should be divided up and what the interface/arguments should be. In addition, you need to think about what the function will return as output. 18 | 19 | ### To submit your project 20 | 21 | Please write up your project using R Markdown and processed with `knitr`. Compile your document as an **HTML file** and submit your HTML file to the dropbox on Courseplus. Please **show all your code** (i.e. make sure to set `echo = TRUE`) for each of the answers to each part. 22 | 23 | ### Install packages 24 | 25 | Before attempting this assignment, you should first install the following packages, if they are not already installed: 26 | 27 | ```{r} 28 | #| eval: false 29 | install.packages("tidyverse") 30 | install.packages("tidytuesdayR") 31 | ``` 32 | 33 | # Part 1: Fun with functions 34 | 35 | In this part, we are going to practice creating functions. 36 | 37 | ### Part 1A: Exponential transformation 38 | 39 | The exponential of a number can be written as an infinite series expansion of the form $$ 40 | \exp(x) = 1 + x + \frac{x^2}{2!} + \frac{x^3}{3!} + \cdots 41 | $$ Of course, we cannot compute an infinite series by the end of this term and so we must truncate it at a certain point in the series. The truncated sum of terms represents an approximation to the true exponential, but the approximation may be usable. 42 | 43 | Write a function that computes the exponential of a number using the truncated series expansion. The function should take two arguments: 44 | 45 | - `x`: the number to be exponentiated 46 | 47 | - `k`: the number of terms to be used in the series expansion beyond the constant 1. The value of `k` is always $\geq 1$. 48 | 49 | For example, if $k = 1$, then the `Exp` function should return the number $1 + x$. If $k = 2$, then you should return the number $1 + x + x^2/2!$. 50 | 51 | Include at least one example of output using your function. 52 | 53 | ::: callout-note 54 | - You can assume that the input value `x` will always be a *single* number. 55 | 56 | - You can assume that the value `k` will always be an integer $\geq 1$. 57 | 58 | - Do not use the `exp()` function in R. 59 | 60 | - The `factorial()` function can be used to compute factorials. 61 | ::: 62 | 63 | ```{r} 64 | Exp <- function(x, k) { 65 | # Add your solution here 66 | } 67 | ``` 68 | 69 | ### Part 1B: Sample mean and sample standard deviation 70 | 71 | Next, write two functions called `sample_mean()` and `sample_sd()` that takes as input a vector of data of length $N$ and calculates the sample average and sample standard deviation for the set of $N$ observations. 72 | 73 | $$ 74 | \bar{x} = \frac{1}{N} \sum_{i=1}^n x_i 75 | $$ $$ 76 | s = \sqrt{\frac{1}{N-1} \sum_{i=1}^N (x_i - \overline{x})^2} 77 | $$ Include at least one example of output using your functions. 78 | 79 | ::: callout-note 80 | - You can assume that the input value `x` will always be a *vector* of numbers of length *N*. 81 | 82 | - Do not use the `mean()` and `sd()` functions in R. 83 | ::: 84 | 85 | ```{r} 86 | sample_mean <- function(x) { 87 | # Add your solution here 88 | } 89 | 90 | sample_sd <- function(x) { 91 | # Add your solution here 92 | } 93 | ``` 94 | 95 | ### Part 1C: Confidence intervals 96 | 97 | Next, write a function called `calculate_CI()` that: 98 | 99 | 1. There should be two inputs to the `calculate_CI()`. First, it should take as input a vector of data of length $N$. Second, the function should also have a `conf` ($=1-\alpha$) argument that allows the confidence interval to be adapted for different $\alpha$. 100 | 101 | 2. Calculates a confidence interval (CI) (e.g. a 95% CI) for the estimate of the mean in the population. If you are not familiar with confidence intervals, it is an interval that contains the population parameter with probability $1-\alpha$ taking on this form 102 | 103 | $$ 104 | \bar{x} \pm t_{\alpha/2, N-1} s_{\bar{x}} 105 | $$ 106 | 107 | where $t_{\alpha/2, N-1}$ is the value needed to generate an area of $\alpha / 2$ in each tail of the $t$-distribution with $N-1$ degrees of freedom and $s_{\bar{x}} = \frac{s}{\sqrt{N}}$ is the standard error of the mean. For example, if we pick a 95% confidence interval and $N$=50, then you can calculate $t_{\alpha/2, N-1}$ as 108 | 109 | ```{r} 110 | alpha <- 1 - 0.95 111 | degrees_freedom = 50 - 1 112 | t_score = qt(p=alpha/2, df=degrees_freedom, lower.tail=FALSE) 113 | ``` 114 | 115 | 3. Returns a named vector of length 2, where the first value is the `lower_bound`, the second value is the `upper_bound`. 116 | 117 | ```{r} 118 | calculate_CI <- function(x, conf = 0.95) { 119 | # Add your solution here 120 | } 121 | 122 | ``` 123 | 124 | Include example of output from your function showing the output when using two different levels of `conf`. 125 | 126 | ::: callout-note 127 | If you want to check if your function output matches an existing function in R, consider a vector $x$ of length $N$ and see if the following two code chunks match. 128 | 129 | ```{r, eval=FALSE} 130 | calculate_CI(x, conf = 0.95) 131 | ``` 132 | 133 | ```{r,eval=FALSE} 134 | dat = data.frame(x=x) 135 | fit <- lm(x ~ 1, dat) 136 | 137 | # Calculate a 95% confidence interval 138 | confint(fit, level=0.95) 139 | ``` 140 | ::: 141 | 142 | # Part 2: Wrangling data 143 | 144 | In this part, we will practice our wrangling skills with the tidyverse that we learned about in module 1. 145 | 146 | ### Data 147 | 148 | The two datasets for this part of the assignment comes from [TidyTuesday](https://www.tidytuesday.com). Specifically, we will use the following data from January 2020, which I have provided for you below: 149 | 150 | ```{r, eval=FALSE} 151 | tuesdata <- tidytuesdayR::tt_load('2020-01-07') 152 | rainfall <- tuesdata$rainfall 153 | temperature <- tuesdata$temperature 154 | ``` 155 | 156 | However, to avoid re-downloading data, we will check to see if those files already exist using an `if()` statement: 157 | 158 | ```{r, message=FALSE} 159 | library(here) 160 | if(!file.exists(here("data","tuesdata_rainfall.RDS"))){ 161 | tuesdata <- tidytuesdayR::tt_load('2020-01-07') 162 | rainfall <- tuesdata$rainfall 163 | temperature <- tuesdata$temperature 164 | 165 | # save the files to RDS objects 166 | saveRDS(tuesdata$rainfall, file = here("data","tuesdata_rainfall.RDS")) 167 | saveRDS(tuesdata$temperature, file = here("data","tuesdata_temperature.RDS")) 168 | } 169 | ``` 170 | 171 | ::: callout-note 172 | The above code will only run if it cannot find the path to the `tuesdata_rainfall.RDS` on your computer. Then, we can just read in these files every time we knit the R Markdown, instead of re-downloading them every time. 173 | ::: 174 | 175 | Let's load the datasets 176 | 177 | ```{r, eval=TRUE, message=FALSE} 178 | rainfall <- readRDS(here("data","tuesdata_rainfall.RDS")) 179 | temperature <- readRDS(here("data","tuesdata_temperature.RDS")) 180 | ``` 181 | 182 | Now we can look at the data with `glimpse()` 183 | 184 | ```{r,message=FALSE} 185 | library(tidyverse) 186 | 187 | glimpse(rainfall) 188 | glimpse(temperature) 189 | ``` 190 | 191 | If we look at the [TidyTuesday github repo](https://github.com/rfordatascience/tidytuesday/tree/master/data/2020#2020-data) from 2020, we see this dataset contains temperature and rainfall data from Australia. 192 | 193 | ![](https://www.ga.gov.au/__data/assets/image/0005/12569/GA14206.jpg){.preview-image} 194 | 195 | \[**Source**: [Geoscience Australia](https://www.ga.gov.au/scientific-topics/national-location-information/dimensions/climatic-extremes)\] 196 | 197 | Here is a data dictionary for what all the column names mean: 198 | 199 | - 200 | 201 | ### Tasks 202 | 203 | Using the `rainfall` and `temperature` data, perform the following steps and create a new data frame called `df`: 204 | 205 | 1. Start with `rainfall` dataset and drop any rows with NAs. 206 | 2. Create a new column titled `date` that combines the columns `year`, `month`, `day` into one column separated by "-". (e.g. "2020-01-01"). This column should not be a character, but should be recognized as a date. (**Hint**: check out the `ymd()` function in `lubridate` R package). You will also want to add a column that just keeps the `year`. 207 | 3. Using the `city_name` column, convert the city names (character strings) to all upper case. 208 | 4. Join this wrangled rainfall dataset with the `temperature` dataset such that it includes only observations that are in both data frames. (**Hint**: there are two keys that you will need to join the two datasets together). (**Hint**: If all has gone well thus far, you should have a dataset with 83,964 rows and 13 columns). 209 | 210 | ::: callout-note 211 | - You may need to use functions outside these packages to obtain this result, in particular you may find the functions `drop_na()` from `tidyr` and `str_to_upper()` function from `stringr` useful. 212 | ::: 213 | 214 | ```{r} 215 | # Add your solution here 216 | 217 | ``` 218 | 219 | # Part 3: Data visualization 220 | 221 | In this part, we will practice our `ggplot2` plotting skills within the tidyverse starting with our wrangled `df` data from Part 2. For full credit in this part (and for all plots that you make), your plots should include: 222 | 223 | 1. An overall title for the plot and a subtitle summarizing key trends that you found. Also include a caption in the figure. 224 | 2. There should be an informative x-axis and y-axis label. 225 | 226 | Consider playing around with the `theme()` function to make the figure shine, including playing with background colors, font, etc. 227 | 228 | ### Part 3A: Plotting temperature data over time 229 | 230 | Use the functions in `ggplot2` package to make a line plot of the max and min temperature (y-axis) over time (x-axis) for each city in our wrangled data from Part 2. You should only consider years 2014 and onwards. For full credit, your plot should include: 231 | 232 | 1. For a given city, the min and max temperature should both appear on the plot, but they should be two different colors. 233 | 2. Use a facet function to facet by `city_name` to show all cities in one figure. 234 | 235 | ```{r} 236 | # Add your solution here 237 | 238 | ``` 239 | 240 | ### Part 3B: Plotting rainfall over time 241 | 242 | Here we want to explore the distribution of rainfall (log scale) with histograms for a given city (indicated by the `city_name` column) for a given year (indicated by the `year` column) so we can make some exploratory plots of the data. 243 | 244 | ::: callout-note 245 | You are again using the wrangled data from Part 2. 246 | ::: 247 | 248 | The following code plots the data from one city (`city_name == "PERTH"`) in a given year (`year == 2000`). 249 | 250 | ```{r, eval=FALSE} 251 | df %>% 252 | filter(city_name == "PERTH", year == 2000) %>% 253 | ggplot(aes(log(rainfall))) + 254 | geom_histogram() 255 | ``` 256 | 257 | While this code is useful, it only provides us information on one city in one year. We could cut and paste this code to look at other cities/years, but that can be error prone and just plain messy. 258 | 259 | The aim here is to **design** and **implement** a function that can be re-used to visualize all of the data in this dataset. 260 | 261 | 1. There are 2 aspects that may vary in the dataset: The **city_name** and the **year**. Note that not all combinations of `city_name` and `year` have measurements. 262 | 263 | 2. Your function should take as input two arguments **city_name** and **year**. 264 | 265 | 3. Given the input from the user, your function should return a **single** histogram for that input. Furthermore, the data should be **readable** on that plot so that it is in fact useful. It should be possible visualize the entire dataset with your function (through repeated calls to your function). 266 | 267 | 4. If the user enters an input that does not exist in the dataset, your function should catch that and report an error (via the `stop()` function). 268 | 269 | For this section, 270 | 271 | 1. Write a short description of how you chose to design your function and why. 272 | 273 | 2. Present the code for your function in the R markdown document. 274 | 275 | 3. Include at least one example of output from your function. 276 | 277 | ```{r} 278 | # Add your solution here 279 | 280 | ``` 281 | 282 | # Part 4: Apply functions and plot 283 | 284 | ### Part 4A: Tasks 285 | 286 | In this part, we will apply the functions we wrote in Part 1 to our rainfall data starting with our wrangled `df` data from Part 2. 287 | 288 | 1. First, filter for only years including 2014 and onwards. 289 | 2. For a given city and for a given year, calculate the sample mean (using your function `sample_mean()`), the sample standard deviation (using your function `sample_sd()`), and a 95% confidence interval for the average rainfall (using your function `calculate_CI()`). Specifically, you should add two columns in this summarized dataset: a column titled `lower_bound` and a column titled `upper_bound` containing the lower and upper bounds for you CI that you calculated (using your function `calculate_CI()`). 290 | 3. Call this summarized dataset `rain_df`. 291 | 292 | ```{r} 293 | # Add your solution here 294 | 295 | ``` 296 | 297 | ### Part 4B: Tasks 298 | 299 | Using the `rain_df`, plots the estimates of mean rainfall and the 95% confidence intervals on the same plot. There should be a separate faceted plot for each city. Think about using `ggplot()` with both `geom_point()` (and `geom_line()` to connect the points) for the means and `geom_errorbar()` for the lower and upper bounds of the confidence interval. 300 | 301 | ```{r} 302 | # Add your solution here 303 | 304 | ``` 305 | -------------------------------------------------------------------------------- /projects/2022-10-04-project-3/index.qmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Project 3" 3 | author: 4 | - name: Stephanie Hicks 5 | url: https://stephaniehicks.com 6 | affiliation: Department of Biostatistics, Johns Hopkins 7 | affiliation_url: https://publichealth.jhu.edu 8 | description: "Exploring album sales and sentiment of lyrics from Beyoncé and Taylor Swift" 9 | date: 2022-10-04 10 | categories: [project 3, projects] 11 | --- 12 | 13 | # Background 14 | 15 | **Due date: October 21 at 11:59pm** 16 | 17 | The goal of this assignment is to practice wrangling special data types (including dates, character strings, and factors) and visualizing results while practicing our tidyverse skills. 18 | 19 | ### To submit your project 20 | 21 | Please write up your project using R Markdown and processed with `knitr`. Compile your document as an **HTML file** and submit your HTML file to the dropbox on Courseplus. Please **show all your code** (i.e. make sure to set `echo = TRUE`) for each of the answers to each part. 22 | 23 | # Load data 24 | 25 | The datasets for this part of the assignment comes from [TidyTuesday](https://www.tidytuesday.com). 26 | 27 | Data dictionary avaialble here: 28 | 29 | - 30 | 31 | ![Beyoncé (left) and Taylor Swift (right)](https://akns-images.eonline.com/eol_images/Entire_Site/2019721/rs_1024x759-190821125112-1024.taylor-swift-beyonce-2009-mtv-vmas.ct.082119.jpg){preview="TRUE"} 32 | 33 | Specifically, we will explore album sales and lyrics from two artists (Beyoncé and Taylor Swift), The data are available from TidyTuesday from September 2020, which I have provided for you below: 34 | 35 | 36 | ```{r, eval=FALSE} 37 | b_lyrics <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-09-29/beyonce_lyrics.csv') 38 | ts_lyrics <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-09-29/taylor_swift_lyrics.csv') 39 | sales <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-09-29/sales.csv') 40 | ``` 41 | 42 | However, to avoid re-downloading data, we will check to see if those files already exist using an `if()` statement: 43 | 44 | ```{r, message=FALSE} 45 | library(here) 46 | if(!file.exists(here("data","b_lyrics.RDS"))){ 47 | b_lyrics <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-09-29/beyonce_lyrics.csv') 48 | ts_lyrics <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-09-29/taylor_swift_lyrics.csv') 49 | sales <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-09-29/sales.csv') 50 | 51 | # save the files to RDS objects 52 | saveRDS(b_lyrics, file = here("data","b_lyrics.RDS")) 53 | saveRDS(ts_lyrics, file = here("data","ts_lyrics.RDS")) 54 | saveRDS(sales, file = here("data","sales.RDS")) 55 | } 56 | ``` 57 | 58 | ::: callout-note 59 | The above code will only run if it cannot find the path to the `b_lyrics.RDS` on your computer. Then, we can just read in these files every time we knit the R Markdown, instead of re-downloading them every time. 60 | ::: 61 | 62 | Let's load the datasets 63 | 64 | ```{r, eval=TRUE, message=FALSE} 65 | b_lyrics <- readRDS(here("data","b_lyrics.RDS")) 66 | ts_lyrics <- readRDS(here("data","ts_lyrics.RDS")) 67 | sales <- readRDS(here("data","sales.RDS")) 68 | ``` 69 | 70 | # Part 1: Explore album sales 71 | 72 | In this section, the goal is to explore the sales of studio albums from Beyoncé and Taylor Swift. 73 | 74 | **Notes** 75 | 76 | - In each of the subsections below that ask you to create a plot, you must create a title, subtitle, x-axis label, and y-axis label with units where applicable. For example, if your axis says "sales" as an axis label, change it to "sales (in millions)". 77 | 78 | ## Part 1A 79 | 80 | In this section, we will do some data wrangling. 81 | 82 | 1. Use `lubridate` to create a column called `released` that is a `Date` class. However, to be able to do this, you first need to use `stringr` to search for pattern that matches things like this "(US)\[51\]" in a string like this "September 1, 2006 (US)\[51\]" and removes them. (**Note**: to get full credit, you must create the regular expression). 83 | 2. Use `forcats` to create a factor called `country` (**Note**: you may need to collapse some factor levels). 84 | 3. Transform the `sales` into a unit that is album sales in millions of dollars. 85 | 4. Keep only album sales from the UK, the US or the World. 86 | 5. Auto print your final wrangled tibble data frame. 87 | 88 | ```{r} 89 | # Add your solution here 90 | 91 | ``` 92 | 93 | ## Part 1B 94 | 95 | In this section, we will do some more data wrangling followed by summarization using wrangled data from Part 1A. 96 | 97 | 1. Keep only album sales from the US. 98 | 2. Create a new column called `years_since_release` corresponding to the number of years since the release of each album from Beyoncé and Taylor Swift. This should be a whole number and you should round down to "14" if you get a non-whole number like "14.12" years. (**Hint**: you may find the `interval()` function from `lubridate` helpful here, but this not the only way to do this.) 99 | 3. Calculate the most recent, oldest, and the median years since albums were released for both Beyoncé and Taylor Swift. 100 | 101 | ```{r} 102 | # Add your solution here 103 | 104 | ``` 105 | 106 | ## Part 1C 107 | 108 | Using the wrangled data from Part 1A: 109 | 110 | 1. Calculate the total album sales for each artist and for each `country` (only sales from the UK, US, and World). 111 | 2. Using the total album sales, create a [percent stacked barchart](https://r-graph-gallery.com/48-grouped-barplot-with-ggplot2) using `ggplot2` of the percentage of sales of studio albums (in millions) along the y-axis for the two artists along the x-axis colored by the `country`. 112 | 113 | ```{r} 114 | # Add your solution here 115 | 116 | ``` 117 | 118 | ## Part 1D 119 | 120 | Using the wrangled data from Part 1A, use `ggplot2` to create a bar plot for the sales of studio albums (in millions) along the x-axis for each of the album titles along the y-axis. 121 | 122 | **Note**: 123 | 124 | - You only need to consider the global World sales (you can ignore US and UK sales for this part). 125 | - The title of the album must be clearly readable along the y-axis. 126 | - Each bar should be colored by which artist made that album. 127 | - The bars should be ordered from albums with the most sales (top) to the least sales (bottom) (**Note**: you must use functions from `forcats` for this step). 128 | 129 | ```{r} 130 | # Add your solution here 131 | 132 | ``` 133 | 134 | ## Part 1E 135 | 136 | Using the wrangled data from Part 1A, use `ggplot2` to create a scatter plot of sales of studio albums (in millions) along the y-axis by the released date for each album along the x-axis. 137 | 138 | **Note**: 139 | 140 | - The points should be colored by the artist. 141 | - There should be three scatter plots (one for UK, US and world sales) faceted by rows. 142 | 143 | ```{r} 144 | # Add your solution here 145 | 146 | ``` 147 | 148 | # Part 2: Exploring sentiment of lyrics 149 | 150 | In Part 2, we will explore the lyrics in the `b_lyrics` and `ts_lyrics` datasets. 151 | 152 | ## Part 2A 153 | 154 | Using `ts_lyrics`, create a new column called `line` with one line containing the character string for each line of Taylor Swift's songs. 155 | 156 | - How many lines in Taylor Swift's lyrics contain the word "hello"? For full credit, show all the rows in `ts_lyrics` that have "hello" in the `line` column and report how many rows there are in total. 157 | - How many lines in Taylor Swift's lyrics contain the word "goodbye"? For full credit, show all the rows in `ts_lyrics` that have "goodbye" in the `line` column and report how many rows there are in total. 158 | 159 | ```{r} 160 | # Add your solution here 161 | 162 | ``` 163 | 164 | ## Part 2B 165 | 166 | Repeat the same analysis for `b_lyrics` as described in Part 2A. 167 | 168 | ```{r} 169 | # Add your solution here 170 | 171 | ``` 172 | 173 | ## Part 2C 174 | 175 | Using the `b_lyrics` dataset, 176 | 177 | 1. Tokenize each lyrical line by words. 178 | 2. Remove the "stopwords". 179 | 3. Calculate the total number for each word in the lyrics. 180 | 4. Using the "bing" sentiment lexicon, add a column to the summarized data frame adding the "bing" sentiment lexicon. 181 | 5. Sort the rows from most frequent to least frequent words. 182 | 6. Only keep the top 25 most frequent words. 183 | 7. Auto print the wrangled tibble data frame. 184 | 8. Use `ggplot2` to create a bar plot with the top words on the y-axis and the frequency of each word on the x-axis. Color each bar by the sentiment of each word from the "bing" sentiment lexicon. Bars should be ordered from most frequent on the top to least frequent on the bottom of the plot. 185 | 9. Create a word cloud of the top 25 most frequent words. 186 | 187 | ```{r} 188 | # Add your solution here 189 | 190 | ``` 191 | 192 | ## Part 2D 193 | 194 | Repeat the same analysis as above in Part 2C, but for `ts_lyrics`. 195 | 196 | ```{r} 197 | # Add your solution here 198 | 199 | ``` 200 | 201 | ## Part 2E 202 | 203 | Using the `ts_lyrics` dataset, 204 | 205 | 1. Tokenize each lyrical line by words. 206 | 2. Remove the "stopwords". 207 | 3. Calculate the total number for each word in the lyrics **for each Album**. 208 | 4. Using the "afinn" sentiment lexicon, add a column to the summarized data frame adding the "afinn" sentiment lexicon. 209 | 5. Calculate the average sentiment score **for each Album**. 210 | 6. Auto print the wrangled tibble data frame. 211 | 7. Join the wrangled data frame from Part 1A (album sales in millions) with the wrangled data frame from #6 above (average sentiment score for each album). 212 | 8. Using `ggplot2`, create a scatter plot of the average sentiment score for each album (y-axis) and the album release data along the x-axis. Make the size of each point the album sales in millions. 213 | 9. Add a horizontal line at y-intercept=0. 214 | 10. Write 2-3 sentences interpreting the plot answering the question "How has the sentiment of Taylor Swift's albums have changed over time?". Add a title, subtitle, and useful axis labels. 215 | 216 | ```{r} 217 | # Add your solution here 218 | 219 | ``` 220 | -------------------------------------------------------------------------------- /resources.qmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Resources" 3 | --- 4 | 5 | 6 | # Learning R 7 | 8 | - Big Book of R: https://www.bigbookofr.com 9 | - List of resources to learn R (but also Python, SQL, Javascript): https://github.com/delabj/datacamp_alternatives/blob/master/index.md 10 | - learnr4free. Resources (books, videos, interactive websites, papers) to learn R. Some of the resources are beginner-friendly and start with the installation process: https://www.learnr4free.com/en 11 | - Data Science with R by Danielle Navarro: https://robust-tools.djnavarro.net -------------------------------------------------------------------------------- /scripts/make_flametree_icon.R: -------------------------------------------------------------------------------- 1 | # color palette from https://github.com/dill/beyonce 2 | library(here) 3 | library(flametree) 4 | 5 | # shades <- c("#A06AB4", "#FFD743", "#07BB9C", "#D773A2") 6 | shades <- beyonce_palette(101) 7 | flametree_grow(time = 12, trees = 2) %>% 8 | flametree_plot( 9 | background = "black", 10 | palette = shades, 11 | style = "plain") %>% 12 | flametree_save( 13 | filename = here("images", "cool_icon.png"), 14 | height = 4, 15 | width = 4 16 | ) 17 | -------------------------------------------------------------------------------- /styles.css: -------------------------------------------------------------------------------- 1 | /* css styles */ 2 | -------------------------------------------------------------------------------- /videos/downloadRMac.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/videos/downloadRMac.gif -------------------------------------------------------------------------------- /videos/downloadRStudio.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/videos/downloadRStudio.gif -------------------------------------------------------------------------------- /videos/downloadRWindows.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/videos/downloadRWindows.gif -------------------------------------------------------------------------------- /videos/versionstring.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stephaniehicks/jhustatcomputing2022/c4594b795fd0f078461d7f8390ea72bde80bc7ab/videos/versionstring.gif --------------------------------------------------------------------------------