├── .github
├── CODEOWNERS
└── workflows
│ ├── deploy_main_preview.yml
│ ├── deploy_pr_preview.yml
│ ├── update_book.yml
│ └── update_build_environment.yml
├── .gitignore
├── Dockerfile
├── LICENSE.md
├── README.md
├── _bookdown.yml
├── _output.yml
├── build_html.sh
├── build_pdf.sh
├── data
├── can_lang.csv
├── can_lang.db
├── can_lang.tsv
├── can_lang.xlsx
├── can_lang_meta-data.csv
├── can_lang_no_names.tsv
├── canada_wiki.html
├── create_can_lang_variants.R
├── create_penguins_no_species.R
├── create_state_property_vote_variants.R
├── create_untidy_canlang.R
├── faithful.csv
├── historical_vote.csv
├── historical_vote_messy.csv
├── historical_vote_no_header.tsv
├── historical_vote_wide.csv
├── islands.csv
├── listings.csv
├── listings_original.csv
├── marketing.csv
├── mauna_loa.csv
├── mauna_loa_data.csv
├── michelson.csv
├── mtcars.csv
├── nasa.json
├── penguins.csv
├── penguins_all_vars.csv
├── region_data.csv
├── region_lang.csv
├── region_lang_top5_cities.csv
├── region_lang_top5_cities_long.csv
├── region_lang_top5_cities_messy.csv
├── region_lang_top5_cities_wide.csv
├── region_lang_with_nas.csv
├── retrieve_data.ipynb
├── sacramento.csv
├── state_property_vote.csv
├── state_property_vote.db
├── state_property_vote.tsv
├── state_property_vote.xlsx
├── state_property_vote_meta-data.csv
├── tweets.csv
├── us_vote.csv
├── wdbc.csv
├── wdbc_missing.csv
└── wdbc_unscaled.csv
├── docker-compose.yml
├── img
├── classification1
│ └── plot3d_knn_classification.png
├── classification2
│ ├── ML-paradigm-test.ai
│ ├── ML-paradigm-test.png
│ ├── cv.ai
│ ├── cv.png
│ ├── train-test-overview.ai
│ ├── train-test-overview.png
│ ├── training_test.ai
│ └── training_test.png
├── clustering
│ └── gentoo.jpg
├── frontmatter
│ ├── chapter_overview.ai
│ ├── chapter_overview.png
│ ├── ds-a-first-intro-cover.jpg
│ └── ds-a-first-intro-graphic.jpg
├── inference
│ ├── intro-bootstrap.jpeg
│ ├── population_vs_sample.ai
│ └── population_vs_sample.png
├── intro
│ ├── arrange_function.png
│ ├── canada_map.png
│ ├── filter_function.png
│ ├── ggplot_function.png
│ ├── help-filter.png
│ ├── intro-all.ai
│ ├── read_csv_function.png
│ ├── select_function.png
│ └── spreadsheet_vs_dataframe.png
├── jupyter
│ ├── activate-and-run-button-annotated.png
│ ├── code-cell-not-run.png
│ ├── code-cell-run.png
│ ├── convert-to-markdown-cell.png
│ ├── create-new-code-cell.png
│ ├── jupyter.png
│ ├── launcher-annotated.png
│ ├── markdown-cell-not-run.png
│ ├── markdown-cell-run.png
│ ├── open_data_w_editor_01.png
│ ├── open_data_w_editor_02.png
│ ├── out-of-order-1.png
│ ├── out-of-order-2.png
│ ├── out-of-order-3.png
│ └── restart-kernel-run-all.png
├── key_files
│ ├── chapter_overview.key
│ ├── data_frame_slides_cdn.key
│ ├── dsci-100-slide-images.key
│ ├── filesystem.key
│ ├── ggplot_functions.key
│ ├── git_intro.pptx
│ ├── pivot_functions.key
│ ├── png-vs-svg.pptx
│ ├── ref_vs_tibble.key
│ ├── spreadsheet_vs_dataframe.pptx
│ ├── summarize.key
│ ├── tidy_data.key
│ └── tidydata_bootstrap_train_test_images.key
├── reading
│ ├── NASA-API-Rho-Ophiuchi.png
│ ├── NASA-API-limits.png
│ ├── NASA-API-parameters.png
│ ├── NASA-API-signup.png
│ ├── craigslist_human.png
│ ├── filesystem.ai
│ ├── filesystem.png
│ ├── ref_vs_tibble.001.jpeg
│ ├── sg1.png
│ ├── sg2.png
│ ├── sg3.png
│ ├── sg4.png
│ └── website_source.txt
├── regression1
│ └── plot3d_knn_regression.png
├── regression2
│ └── plot3d_linear_regression.png
├── setup
│ ├── docker-1.png
│ ├── docker-2.png
│ ├── docker-3.png
│ ├── docker-4.png
│ ├── jlab-1.png
│ ├── jlab-2.png
│ └── ubuntu-docker.png
├── unused
│ ├── 1024px-Supervised_machine_learning_in_a_nutshell.svg
│ ├── ML-paradigm.jpeg
│ ├── ML-paradigm.png
│ ├── Page_Under_Construction.png
│ ├── README.md
│ ├── Supervised_machine_learning_in_a_nutshell.svg.png
│ ├── activate-and-run-button.png
│ ├── add_collab_01.png
│ ├── add_collab_02.png
│ ├── add_collab_03.png
│ ├── add_collab_04.png
│ ├── add_collab_05.png
│ ├── add_collab_06.png
│ ├── add_collab_06_new.png
│ ├── chapter_overview.001.jpeg
│ ├── clone_01.png
│ ├── clone_02.png
│ ├── clone_03.png
│ ├── clone_04.png
│ ├── create-new-file_01.png
│ ├── create-new-file_02.png
│ ├── create-new-file_03.png
│ ├── data_frame_slides_cdn.001.jpeg
│ ├── data_frame_slides_cdn.002.jpeg
│ ├── data_frame_slides_cdn.003.jpeg
│ ├── data_frame_slides_cdn.006.jpeg
│ ├── dataframe.jpeg
│ ├── dsci-100-slide-images.001.jpeg
│ ├── dsci-100-slide-images.002.jpeg
│ ├── dsci-100-slide-images.004.jpeg
│ ├── file-system-for-export-to-intro-datascience.svg
│ ├── git_add_01.png
│ ├── git_add_02.png
│ ├── git_add_03.png
│ ├── git_commit_01.png
│ ├── git_commit_02.png
│ ├── git_commit_02_new.png
│ ├── git_commit_03.png
│ ├── git_pull_00.png
│ ├── git_pull_01.png
│ ├── git_pull_02.png
│ ├── git_pull_03.png
│ ├── git_pull_04.png
│ ├── git_push_01.png
│ ├── git_push_02.png
│ ├── git_push_03.png
│ ├── git_push_04.png
│ ├── git_push_05.png
│ ├── git_push_05_new.png
│ ├── issue_01.png
│ ├── issue_02.png
│ ├── issue_03.png
│ ├── issue_04.png
│ ├── issue_05.png
│ ├── issue_05_new.png
│ ├── issue_06.png
│ ├── launcher.png
│ ├── long_to_wide.jpeg
│ ├── malignant_cancer.png
│ ├── merge_conflict_01.png
│ ├── merge_conflict_02.png
│ ├── merge_conflict_02_new.png
│ ├── merge_conflict_03.png
│ ├── merge_conflict_04.png
│ ├── merge_conflict_05.png
│ ├── merge_conflict_06.png
│ ├── new_repository_01.png
│ ├── new_repository_02.png
│ ├── new_repository_03.png
│ ├── obs.jpeg
│ ├── pen-tool_01.png
│ ├── pen-tool_02.png
│ ├── pen-tool_03.png
│ ├── pivot_longer_with_table.jpeg
│ ├── pivot_wider_with_table.jpeg
│ ├── population_vs_sample.svg
│ ├── prop_val_vs_income.png
│ ├── prop_val_vs_income_by_party.png
│ ├── prop_val_vs_income_human_labs.png
│ ├── r.PNG
│ ├── ref_vs_tibble.jpeg
│ ├── sampling.001.jpeg
│ ├── sampling.002.jpeg
│ ├── spreadsheet.PNG
│ ├── testing.png
│ ├── text_cell_formatted.png
│ ├── text_cell_unformatted.png
│ ├── tidy.png
│ ├── tidy_data.jpeg
│ ├── timbits.jpg
│ ├── training_validation.jpeg
│ ├── upload-files_01.png
│ ├── upload_files_02.png
│ ├── vars.jpeg
│ ├── vc-ba1-changes.png
│ ├── vc3-add.png
│ ├── vc4-commit.png
│ ├── vc5-5-nachos-to-cheesecake.png
│ ├── vec_vs_list.jpeg
│ ├── vector.jpeg
│ ├── vectors.jpeg
│ ├── wide_to_long.jpeg
│ └── wikipedia_human.png
├── version-control
│ ├── add_collab_01.png
│ ├── add_collab_02.png
│ ├── add_collab_03.png
│ ├── add_collab_04.png
│ ├── add_collab_05.png
│ ├── clone_01.png
│ ├── clone_02.png
│ ├── clone_03.png
│ ├── clone_04.png
│ ├── create-new-file_01.png
│ ├── create-new-file_02.png
│ ├── create-new-file_03.png
│ ├── generate-pat_01.png
│ ├── generate-pat_02.png
│ ├── generate-pat_03.png
│ ├── git_add_01.png
│ ├── git_add_02.png
│ ├── git_add_03.png
│ ├── git_commit_01.png
│ ├── git_commit_03.png
│ ├── git_pull_00.png
│ ├── git_pull_01.png
│ ├── git_pull_02.png
│ ├── git_pull_03.png
│ ├── git_pull_04.png
│ ├── git_push_01.png
│ ├── git_push_02.png
│ ├── git_push_03.png
│ ├── git_push_04.png
│ ├── issue_01.png
│ ├── issue_02.png
│ ├── issue_03.png
│ ├── issue_04.png
│ ├── issue_06.png
│ ├── merge_conflict_01.png
│ ├── merge_conflict_03.png
│ ├── merge_conflict_04.png
│ ├── merge_conflict_05.png
│ ├── merge_conflict_06.png
│ ├── new_repository_01.png
│ ├── new_repository_02.png
│ ├── new_repository_03.png
│ ├── pen-tool_01.png
│ ├── pen-tool_02.png
│ ├── pen-tool_03.png
│ ├── upload-files_01.png
│ ├── upload-files_02.png
│ ├── vc-ba2-add.png
│ ├── vc-ba3-commit.png
│ ├── vc1-no-changes.png
│ ├── vc2-changes.png
│ ├── vc5-push.png
│ ├── vc6-remote-changes.png
│ ├── vc7-pull.png
│ └── version-control-all.ai
├── viz
│ ├── faithful_plot.bmp
│ ├── faithful_plot.jpg
│ ├── faithful_plot.png
│ ├── faithful_plot.svg
│ ├── faithful_plot.tiff
│ └── png-vs-svg.png
└── wrangling
│ ├── data_frame_slides_cdn.004.ai
│ ├── data_frame_slides_cdn.004.png
│ ├── data_frame_slides_cdn.005.ai
│ ├── data_frame_slides_cdn.005.png
│ ├── data_frame_slides_cdn.007.ai
│ ├── data_frame_slides_cdn.007.png
│ ├── data_frame_slides_cdn.008.ai
│ ├── data_frame_slides_cdn.008.png
│ ├── data_frame_slides_cdn.009.ai
│ ├── data_frame_slides_cdn.009.png
│ ├── mutate_function.png
│ ├── pivot_functions.001.ai
│ ├── pivot_functions.001.png
│ ├── pivot_functions.002.ai
│ ├── pivot_functions.002.png
│ ├── pivot_functions.003.ai
│ ├── pivot_functions.003.png
│ ├── pivot_functions.004.ai
│ ├── pivot_functions.004.png
│ ├── pivot_longer.png
│ ├── pivot_wider.png
│ ├── separate_function.png
│ ├── summarize.001.ai
│ ├── summarize.001.png
│ ├── summarize.002.ai
│ ├── summarize.002.png
│ ├── summarize.003.ai
│ ├── summarize.003.png
│ ├── summarize.004.ai
│ ├── summarize.004.png
│ ├── summarize.005.ai
│ ├── summarize.005.png
│ ├── tidy_data.001.ai
│ ├── tidy_data.001.png
│ └── wrangling-syntax-all.ai
├── index.Rmd
├── krantz.cls
├── scripts
├── intro_bootstrap_image.R
└── population-sample.R
└── source
├── acknowledgments.Rmd
├── after_body.tex
├── analytics.html
├── authors.Rmd
├── before_body.tex
├── classification1.Rmd
├── classification2.Rmd
├── clustering.Rmd
├── foreword.Rmd
├── inference.Rmd
├── intro.Rmd
├── jupyter.Rmd
├── preamble.tex
├── preface.Rmd
├── reading.Rmd
├── references.Rmd
├── references.bib
├── regression1.Rmd
├── regression2.Rmd
├── setup.Rmd
├── style.css
├── version-control.Rmd
├── viz.Rmd
└── wrangling.Rmd
/.github/CODEOWNERS:
--------------------------------------------------------------------------------
1 | source/** @UBC-DSCI/dsci-100-codeowners
2 |
--------------------------------------------------------------------------------
/.github/workflows/deploy_main_preview.yml:
--------------------------------------------------------------------------------
1 | name: Rebuild and deploy dev version of book to gh-pages branch in dev/ folder
2 | on:
3 | push:
4 | branches:
5 | - main
6 | paths:
7 | - 'index.Rmd'
8 | - '_bookdown.yml'
9 | - '_output.yml'
10 | - 'source/*.Rmd'
11 | - 'source/*.bib'
12 | - 'source/*.css'
13 | - 'data/**'
14 | - 'img/**'
15 | - 'build_html.sh'
16 |
17 | jobs:
18 | deploy-main-preview:
19 | runs-on: ubuntu-latest
20 | permissions:
21 | contents: write
22 | packages: write
23 |
24 | steps:
25 | - name: Get Actions user id
26 | id: get_uid
27 | run: |
28 | actions_user_id=`id -u $USER`
29 | echo $actions_user_id
30 | echo "uid=$actions_user_id" >> $GITHUB_OUTPUT
31 |
32 | - name: checkout
33 | uses: actions/checkout@v2
34 | with:
35 | ref: 'main'
36 |
37 | - name: Build the book
38 | run: |
39 | ./build_html.sh
40 |
41 | - name: Reset ownership of workspace after build
42 | uses: peter-murray/reset-workspace-ownership-action@v1
43 | with:
44 | user_id: ${{ steps.get_uid.outputs.uid }}
45 |
46 | # Push the book's HTML to github-pages
47 | - name: GitHub Pages action
48 | uses: peaceiris/actions-gh-pages@v4
49 | with:
50 | github_token: ${{ secrets.GITHUB_TOKEN }}
51 | publish_dir: docs/
52 | keep_files: true
53 | destination_dir: dev
54 | # force_orphan: true # once peaceiris updates to v4, change this to true and keep_files: true for the PR / main branch deploy previews
55 |
56 |
57 |
58 |
--------------------------------------------------------------------------------
/.github/workflows/deploy_pr_preview.yml:
--------------------------------------------------------------------------------
1 | name: "Rebuild and deploy PR version of book to gh-pages branch in pull###/ folder"
2 | on:
3 | pull_request:
4 | types: [opened, synchronize]
5 | paths:
6 | - 'index.Rmd'
7 | - '_bookdown.yml'
8 | - '_output.yml'
9 | - 'source/*.Rmd'
10 | - 'source/*.bib'
11 | - 'source/*.css'
12 | - 'data/**'
13 | - 'img/**'
14 | - 'Dockerfile'
15 | branches:
16 | - 'main'
17 |
18 | jobs:
19 | deploy-pr-preview:
20 | runs-on: ubuntu-latest
21 | permissions:
22 | contents: write
23 | packages: write
24 | pull-requests: write
25 |
26 | steps:
27 | - name: Wait for potential build environment update
28 | uses: fountainhead/action-wait-for-check@v1.1.0
29 | with:
30 | token: ${{ secrets.GITHUB_TOKEN }}
31 | checkName: "Rebuild docker image"
32 | ref: ${{ github.event.pull_request.head.sha }}
33 | timeoutSeconds: 60000
34 |
35 | - name: Get Actions user id
36 | id: get_uid
37 | run: |
38 | actions_user_id=`id -u $USER`
39 | echo $actions_user_id
40 | echo "uid=$actions_user_id" >> $GITHUB_OUTPUT
41 |
42 | - name: Checkout the repo
43 | uses: actions/checkout@v2
44 | with:
45 | fetch-depth: '0'
46 | ref: ${{ github.head_ref }}
47 |
48 | - name: Build the book
49 | run: |
50 | ./build_html.sh
51 |
52 | - name: Reset ownership of workspace after build
53 | uses: peter-murray/reset-workspace-ownership-action@v1
54 | with:
55 | user_id: ${{ steps.get_uid.outputs.uid }}
56 |
57 | # Push the book's HTML to github-pages
58 | - name: GitHub Pages action
59 | uses: peaceiris/actions-gh-pages@v4
60 | with:
61 | github_token: ${{ secrets.GITHUB_TOKEN }}
62 | publish_dir: docs/
63 | keep_files: true
64 | destination_dir: pull${{ github.event.number }}
65 | # force_orphan: true # once peaceiris updates to v4, change this to true and keep_files: true for the PR / main branch deploy previews
66 |
67 | - name: Checkout the gh-pages branch
68 | uses: actions/checkout@v2
69 | with:
70 | fetch-depth: '0'
71 | ref: 'gh-pages'
72 |
73 | - name: Run website diff
74 | run: |
75 | rustup update
76 | pip install --upgrade pip
77 | pip install website_diff
78 | rm -rf diff${{ github.event.number }}
79 | website_diff --old dev --new pull${{ github.event.number }} --diff diff${{ github.event.number }}
80 |
81 | - name: GitHub Pages action to push diff
82 | uses: peaceiris/actions-gh-pages@v4
83 | with:
84 | github_token: ${{ secrets.GITHUB_TOKEN }}
85 | publish_dir: diff${{ github.event.number }}
86 | keep_files: true
87 | destination_dir: diff${{ github.event.number }}
88 | # force_orphan: true # once peaceiris updates to v4, change this to true and keep_files: true for the PR / main branch deploy previews
89 |
90 | - name: Post URLS to PR thread
91 | uses: mshick/add-pr-comment@v2.8.1
92 | with:
93 | message: |
94 | Hello! I've built a preview of your PR so that you can compare it to the current `main` branch.
95 | * PR deploy preview available [here](https://datasciencebook.ca/pull${{ github.event.number }}/index.html)
96 | * PR diff with `main` available [here](https://datasciencebook.ca/diff${{ github.event.number }}/index.html)
97 | * Current `main` deploy preview available [here](https://datasciencebook.ca/dev/index.html)
98 | * Public production build available [here](https://datasciencebook.ca)
99 |
--------------------------------------------------------------------------------
/.github/workflows/update_book.yml:
--------------------------------------------------------------------------------
1 | name: Rebuild and deploy book to gh-pages branch
2 | on:
3 | push:
4 | branches:
5 | - production
6 | paths:
7 | - 'index.Rmd'
8 | - '_bookdown.yml'
9 | - '_output.yml'
10 | - 'source/*.Rmd'
11 | - 'source/*.bib'
12 | - 'source/*.css'
13 | - 'data/**'
14 | - 'img/**'
15 | - 'build_html.sh'
16 |
17 | jobs:
18 | deploy-book:
19 | runs-on: ubuntu-latest
20 | permissions:
21 | contents: write
22 | packages: write
23 |
24 | steps:
25 | - name: checkout gh-pages
26 | uses: actions/checkout@v2
27 | with:
28 | ref: 'gh-pages'
29 |
30 | - name: Clean the site contents except for dev, pull contents of dev/ to main site
31 | run: |
32 | # delete everything except the dev and .git folders
33 | find . -maxdepth 1 ! -name ".git" ! -name "dev" ! -name "." | xargs rm -rf
34 | # copy the contents of dev into the root
35 | cp -rf dev/* .
36 |
37 | # Push updated website, clean out old commits
38 | - name: Update website
39 | uses: peaceiris/actions-gh-pages@v4
40 | with:
41 | github_token: ${{ secrets.GITHUB_TOKEN }}
42 | publish_dir: ./
43 | force_orphan: true
44 | cname: datasciencebook.ca
45 |
--------------------------------------------------------------------------------
/.github/workflows/update_build_environment.yml:
--------------------------------------------------------------------------------
1 | name: Rebuild and publish new ubcdsci/intro-to-ds image on DockerHub
2 | on:
3 | pull_request:
4 | types: [opened, synchronize]
5 | branches:
6 | - 'main'
7 | jobs:
8 | rebuild-docker:
9 | name: Rebuild docker image
10 | runs-on: ubuntu-latest
11 | permissions:
12 | contents: write
13 | steps:
14 | - name: Checkout PR branch
15 | uses: actions/checkout@v3
16 | with:
17 | fetch-depth: '0'
18 | ref: ${{ github.head_ref }}
19 | - name: Check if Dockerfile needs to be rebuilt
20 | id: check-stale
21 | run: |
22 | echo "Checking if Dockerfile was modified since last commit on this PR"
23 | echo "GitHub PR action type: ${{ github.event.action }}"
24 | if [ "${{ github.event.action }}" == "opened" ]; then
25 | echo "GitHub base ref: ${{ github.event.pull_request.base.sha }}"
26 | echo "GitHub head ref: ${{ github.event.pull_request.head.sha }}"
27 | BEFORE=${{ github.event.pull_request.base.sha }}
28 | AFTER=${{ github.event.pull_request.head.sha }}
29 | else
30 | echo "GitHub event before: ${{ github.event.before }}"
31 | echo "GitHub event after: ${{ github.event.after }}"
32 | BEFORE=${{ github.event.before }}
33 | AFTER=${{ github.event.after }}
34 | fi
35 | if git diff --quiet $BEFORE $AFTER Dockerfile; then
36 | echo "PR synchronized, but Dockerfile was not edited. Not rebuilding the image."
37 | echo "stale_dockerfile=false" >> "$GITHUB_OUTPUT"
38 | else
39 | echo "PR synchronized, and Dockerfile was edited, so rebuilding the image."
40 | echo "stale_dockerfile=true" >> "$GITHUB_OUTPUT"
41 | fi
42 | - name: Rebuild and publish image
43 | if: ${{ steps.check-stale.outputs.stale_dockerfile == 'true' }}
44 | id: rebuild
45 | uses: elgohr/Publish-Docker-Github-Action@v5
46 | with:
47 | name: ubcdsci/intro-to-ds
48 | username: ${{ secrets.DOCKER_USERNAME }}
49 | password: ${{ secrets.DOCKER_PASSWORD }}
50 | dockerfile: Dockerfile
51 | snapshot: true
52 | - name: Update build_html.sh script
53 | if: ${{ steps.check-stale.outputs.stale_dockerfile == 'true' }}
54 | run: |
55 | git config --local user.email "action@github.com"
56 | git config --local user.name "GitHub Action"
57 | git pull origin ${{ github.head_ref }}
58 | sed 's/ubcdsci\/intro-to-ds:[[:alnum:]]\+/ubcdsci\/intro-to-ds:${{ steps.rebuild.outputs.snapshot-tag }}/g' build_html.sh > build_html.tmp && mv build_html.tmp build_html.sh
59 | chmod u+x build_html.sh
60 | git add build_html.sh
61 | git commit -m "update build_html.sh script with new docker image"
62 | - name: Update build_pdf.sh script
63 | if: ${{ steps.check-stale.outputs.stale_dockerfile == 'true' }}
64 | run: |
65 | git config --local user.email "action@github.com"
66 | git config --local user.name "GitHub Action"
67 | git pull origin ${{ github.head_ref }}
68 | sed 's/ubcdsci\/intro-to-ds:[[:alnum:]]\+/ubcdsci\/intro-to-ds:${{ steps.rebuild.outputs.snapshot-tag }}/g' build_pdf.sh > build_pdf.tmp && mv build_pdf.tmp build_pdf.sh
69 | chmod u+x build_pdf.sh
70 | git add build_pdf.sh
71 | git commit -m "update build_pdf.sh script with new docker image"
72 | - name: Update docker-compose.yml script
73 | if: ${{ steps.check-stale.outputs.stale_dockerfile == 'true' }}
74 | run: |
75 | git config --local user.email "action@github.com"
76 | git config --local user.name "GitHub Action"
77 | git pull origin ${{ github.head_ref }}
78 | sed 's/ubcdsci\/intro-to-ds:[[:alnum:]]\+/ubcdsci\/intro-to-ds:${{ steps.rebuild.outputs.snapshot-tag }}/g' docker-compose.yml > docker-compose.tmp && mv docker-compose.tmp docker-compose.yml
79 | git add docker-compose.yml
80 | git commit -m "update docker-compose.yml script with new docker image"
81 | - name: Push changes to build scripts
82 | if: ${{ steps.check-stale.outputs.stale_dockerfile == 'true' }}
83 | uses: ad-m/github-push-action@master
84 | with:
85 | github_token: ${{ secrets.GITHUB_TOKEN }}
86 | branch: ${{ github.head_ref }}
87 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 | **.DS_Store
6 | *.sw*
7 | _bookdown_files
8 | **.ipynb_checkpoints
9 | .rstudio/**
10 | docs/**
11 | .local/**
12 | *.log
13 | _main.Rmd
14 | _main_files/**
15 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | # Copyright (c) UBC-DSCI Development Team.
2 | FROM rocker/verse:4.3.1
3 |
4 | RUN apt-get update --fix-missing
5 | RUN Rscript -e "update.packages(ask = FALSE)"
6 | RUN install2.r --error magick \
7 | cowplot \
8 | kableExtra \
9 | plotly \
10 | tidymodels \
11 | tidyclust \
12 | kknn \
13 | ggpubr \
14 | ggforce \
15 | themis \
16 | egg \
17 | fontawesome \
18 | xfun \
19 | tinytex \
20 | reticulate \
21 | rsvg
22 |
23 | RUN Rscript -e "devtools::install_github('ttimbers/canlang@0.0.1')"
24 | RUN Rscript -e "reticulate::install_miniconda()"
25 | RUN Rscript -e "reticulate::conda_install('r-reticulate', 'python-kaleido')"
26 | RUN Rscript -e "reticulate::conda_install('r-reticulate', 'plotly', channel = 'plotly')"
27 | # necessary for tuning number of clusters in Kmeans
28 | # see https://github.com/tidymodels/tidyclust/issues/127
29 | RUN apt install -y libgsl27
30 |
31 | # increase the ImageMagick resource limits
32 | # this relies on the fact that there is only one place where each of these sizes are used in policy.xml
33 | # (256MiB is for memory, 512MiB is for map, 1GiB is for disk)
34 | RUN sed -i 's/256MiB/8GiB/' /etc/ImageMagick-6/policy.xml
35 | RUN sed -i 's/512MiB/8GiB/' /etc/ImageMagick-6/policy.xml
36 | RUN sed -i 's/1GiB/8GiB/' /etc/ImageMagick-6/policy.xml
37 |
38 | ## install LaTeX packages
39 | RUN tlmgr install \
40 | amsmath \
41 | latex-amsmath-dev \
42 | iftex \
43 | euenc \
44 | fontspec \
45 | tipa \
46 | unicode-math \
47 | xunicode \
48 | kvoptions \
49 | ltxcmds \
50 | kvsetkeys \
51 | etoolbox \
52 | xcolor \
53 | fancyvrb \
54 | framed \
55 | booktabs \
56 | mdwtools \
57 | float \
58 | caption \
59 | sourcecodepro \
60 | hyperref \
61 | amscls \
62 | multirow \
63 | wrapfig \
64 | colortbl \
65 | pdflscape \
66 | tabu \
67 | varwidth \
68 | threeparttable \
69 | threeparttablex \
70 | environ \
71 | trimspaces \
72 | ulem \
73 | makecell \
74 | natbib \
75 | pdftexcmds \
76 | infwarerr \
77 | fontawesome5
78 |
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | # License
2 |
3 | Copyright (c) 2020 Tiffany A. Timbers, Trevor Campbell, Melissa Lee
4 |
5 | This textbook (source contained in the [`introduction-to-datascience` repository](https://github.com/UBC-DSCI/introduction-to-datascience)) is made available under the **Attribution-NonCommercial-ShareAlike 4.0 International** ([CC BY-NC-SA 4.0](https://creativecommons.org/licenses/by-nc-sa/4.0/)).
6 |
7 | This is a human-readable summary of (and not a substitute for) the [license](https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode).
8 |
9 | ## You are free to:
10 |
11 | - **Share** — copy and redistribute the material in any medium or format
12 | - **Adapt** — remix, transform, and build upon the material
13 |
14 | The licensor cannot revoke these freedoms as long as you follow the license terms.
15 |
16 | ## Under the following terms:
17 |
18 | - **Attribution** — You must give appropriate credit (mentioning that your work is derived from work that is Copyright © Tiffany A. Timbers, Trevor Campbell, Melissa Lee and, where practical, linking to https://datasciencebook.ca/), provide a link to the license, and indicate if changes were made. You may do so in any reasonable manner, but not in any way that suggests the licensor endorses you or your use.
19 | - **NonCommercial** — You may not use the material for commercial purposes.
20 | - **ShareAlike** — If you remix, transform, or build upon the material, you must distribute your contributions under the same license as the original.
21 |
22 | **No additional restrictions** — You may not apply legal terms or technological measures that legally restrict others from doing anything the license permits.
23 |
24 | ## Notices:
25 |
26 | You do not have to comply with the license for elements of the material in the public domain or where your use is permitted by an applicable exception or limitation.
27 |
28 | No warranties are given. The license may not give you all of the permissions necessary for your intended use. For example, other rights such as publicity, privacy, or moral rights may limit how you use the material.
29 |
30 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ## Data Science: A First Introduction
2 | This is the source for the *Data Science: A First Introduction* textbook.
3 |
4 | The book is available online at: https://datasciencebook.ca/
5 |
6 | © 2020 Tiffany A. Timbers, Trevor Campbell, Melissa Lee
7 |
8 | For the python version of the textbook, please visit https://python.datasciencebook.ca or the github repository at https://github.com/ubc-dsci/introduction-to-datascience-python.
9 |
10 | ## License Information
11 |
12 | This textbook is offered under
13 | the [Attribution-NonCommercial-ShareAlike 4.0 International (CC BY-NC-SA 4.0) License](https://creativecommons.org/licenses/by-nc-sa/4.0/).
14 | See [the license file](LICENSE.md) for more information.
15 |
16 | ## Development
17 |
18 | ### Setup
19 |
20 | Building the book requires Docker (instructors here: https://docs.docker.com/get-docker/)
21 |
22 | ### Build locally
23 |
24 | You must have at least 8GB of RAM (and ideally more like 16GB RAM) to build the book.
25 |
26 | You can build the HTML version of the book on your own machine by running
27 | ```
28 | ./build_html.sh
29 | ```
30 | in the root directory of this repository. The book can be viewed in your browser by opening the `docs/index.html` file.
31 |
32 | You can build the PDF version of the book on your own machine by running
33 | ```
34 | ./build_pdf.sh
35 | ```
36 | in the root directory of this repository. The book can be viewed in a PDF reader by opening `docs/_main.pdf`.
37 |
38 | #### Working with RStudio (HTML only)
39 |
40 | If you want to edit the source material and build the book using RStudio, navigate to the repository root and run
41 | ```
42 | docker-compose up -d
43 | ```
44 | to start up the docker container. Then open a web browser and type [http://localhost:8787/](http://localhost:8787/).
45 | For the username enter `rstudio`, and for the password enter `password`.
46 | At any point you can render the book by running the following R code in the R console:
47 | ```
48 | bookdown::render_book('index.Rmd', 'bookdown::gitbook')
49 | ```
50 | When you are done working, make sure to type `docker-compose down` to shut down the container.
51 |
52 | ### Contributing
53 |
54 | Primary development in this repository happens on the `main` branch. If you want to contribute to the book,
55 | please branch off of `main` and make a pull request into `main`. You cannot commit directly to `main`.
56 |
57 | The `production` branch contains the source material corresponding to the current publicly-viewable version of the book website.
58 |
59 | The `gh-pages` branch serves the current book website at https://datasciencebook.ca.
60 |
61 | ### Workflows
62 |
63 | #### Book deployment
64 |
65 | You can update the live, publicly viewable HTML book by making changes to the `source/` folder in the `production` branch (e.g. by merging `main` into `production`).
66 | GitHub will trigger a rebuild of the public HTML site, and store the built book in the root folder of the `gh-pages` branch.
67 |
68 | #### `main` deploy previews
69 |
70 | Any commit to `source/**` on the `main` branch (from a merged PR) will trigger a rebuild of the development preview site served at `https://datasciencebook.ca/dev`.
71 | The built preview book will be stored in the `dev/` folder on the `gh-pages` branch.
72 |
73 | #### PR deploy previews
74 |
75 | Any PR to `source/` will trigger a build of a PR preview site at `https://datasciencebook.ca/pull###`, where `###` is the number of the pull request.
76 | It will also trigger a build of the diff using [website_diff](https://github.com/trevorcampbell/website_diff) to the current `dev/` site at `https://datasciencebook.ca/diff###`.
77 | The built preview and diff books will be stored in the `pull###/` and `diff###/` folders on the `gh-pages` branch.
78 |
79 | #### Build environment updates
80 |
81 | Any PR to `Dockerfile` will trigger a rebuild of the docker image, push it to DockerHub, and update the image tags in the `build_html.sh` and `build_pdf.sh` scripts on the PR automatically.
82 | This new build environment will be used for the PR deploy preview mentioned above.
83 |
84 | ## Style Guide
85 |
86 | ### General
87 | - **80 character line limit!** This is necessary to make git diffs useful
88 | - numbers in text should be english words ("four common mistakes" not "4 common mistakes") unless there are units (40km, not forty km)
89 | - use Oxford commas ("a, b, and c" not "a, b and c")
90 | - "subset" should not be used as a verb
91 | - functions in text should not have parentheses (`read_csv` not `read_csv()`)
92 | - remove all references to "course" and "student"; replace with "reader" or "you" where necessary
93 | - make sure we have permission to use all external resources that we use
94 | - remove all references to "clicking on things" in the HTML version of the book (e.g. "click this link to ...")
95 | - When we introduce a new term, use `**bolding**` to typeset it (but only the first introduction of the term)
96 | - for symbols as part of the text, make sure you give them their full name and surround with parentheses so that they
97 | don't "disappear" in the rest of the text. So for example, if I have a `,` in the text, I should do
98 | something like "here is some text about the comma (`,`)". Or for `<-`, we should do "something like this assignment operator (`<-`)".
99 | There are likely exceptions to this rule though.
100 | - Book titles in the text should be typeset in italics (e.g. *R for Data Science*)
101 |
102 | ### Code blocks
103 | - Use the knitr label format `##-[name with only alphanumeric + hyphens]` where
104 | the `##` is the 2-digit chapter number, e.g. `03-test-name` for a label `test-name` in chapter 3
105 | - Make sure to get syntax highlighting by specifying the language in each code block:
106 |
107 | ```r
108 | code
109 | ```
110 |
111 | not
112 |
113 | ```
114 | code
115 | ```
116 | (similar for `html` where needed)
117 | - always use `|>` pipe, not `%>%`
118 | - anywhere we specify a grid of tuning values, don't just do `grid = 10`; actually specify the values using `seq` or `c(...)`
119 | - do not end code blocks with `head(dataframe)`; just use `dataframe` to print
120 | - `set.seed` once at the beginning of each chapter
121 | - use `"double quotes"` for strings, not `'single quotes'`
122 | - make sure all lines of code are at most 80 characters (for LaTeX PDF output typesetting)
123 | - pass code blocks through `styler` (although must obey the 80ch limit)
124 | - use `slice`, `slice_min`, `slice_max` (not `top_n`)
125 | - just `pull(colname)`, don't `select` first
126 |
127 | ### Section headings
128 | - All (sub)section headings should be sentence case ("Loading a tabular data set", not "Loading a Tabular Data Set")
129 | - Make sure that subsections occur in 1-step hierarchies (no subsubsection directly below subsection, for example)
130 | - Make sure that `{-}` is used wherever unnumbered headings are required
131 |
132 | Choose an appropriate table of contents depth via (example has depth 2 below, which is a good default)
133 | ```
134 | bookdown::gitbook:
135 | toc_depth: 2
136 | ```
137 |
138 | ### Learning objectives
139 | - when saying that students will do things in code, always say "in R"
140 | - "you will be able to" (not "students will be able to", "the reader will be able to")
141 |
142 | ### Captions
143 | - captions should be sentence formatted and end with a period
144 | - If you have special characters (particularly underscores, quotation marks, plus signs, other LaTeX math symbols) make sure to separate
145 | the caption out of the code chunk like so
146 | ```
147 | (ref:blah)
148 |
149 | \`\`\`
150 | {r blah, other_options}
151 | code here
152 | \`\`\`
153 | ```
154 |
155 | ### Equations
156 | - make sure all equations get capitalized labels ("Equation \\@ref(blah)", not "equation below" or "equation above")
157 |
158 | ### Figures
159 | - make sure all figures get (capitalized) labels ("Figure \\@ref(blah)", not "figure below" or "figure above")
160 | - make sure all figures get captions
161 | - specify image widths of pngs and jpegs in terms of linewidth percent
162 | (e.g. `out.width="70%"`),
163 | for plots we create in R use `fig.width` and `fig.height`.
164 | - center align all images via `fig.align = "center"`
165 | - make sure we have permission for every figure/logo that we use
166 | - Make sure all figures follow the visualization principles in Chapter 4
167 | - Make sure axes are set appropriately to not inflate/deflate differences artificially *where it does not compromise clarity* (e.g. in the classification
168 | chapter there are a few examples where zoomed-in accuracy axes are better than using the full range 0 to 1)
169 | - Fig size for bar charts should be: `fig.width=5, fig.height=3` (an exception are figs 1.7 & 1.8 so that we can read the axis labels)
170 | - cropping width for syntax diagrams is 1625 (done using `image_crop`)
171 |
172 | ### Tables
173 | - make sure all tables get capitalized labels ("Table \\@ref(blah)", not "table below" or "table above")
174 | - make sure all tables get captions
175 | - make sure the row + column spacing is reasonable
176 | - Do not put links in table captions, it breaks pdf rendering
177 | - Do not put underscores in table captions, it breaks pdf rendering
178 |
179 | ### Note boxes
180 | - note boxes should be typeset as quote boxes using `>` and start with **Note:**
181 |
182 | ### Bibliography
183 | - do not put "et al" or "and others"; always use the full list of authors, BibTeX will choose how to abbreviate
184 | - read https://trevorcampbell.me/html/bibtex.html and make sure our bib follows this convention
185 |
186 | ### Naming conventions
187 | - K-means (not $K$-\*, K means, Kmeans)
188 | - K-nearest neighbors (not $K$-\*, K nearest neighbors, K nearest neighbor, use US spelling neighbor not neighbour). Note that "K-nearest neighbor" is not the singular form; "K-nearest neighbors" is
189 | - K-NN (not $K$-\*, KNN, K NN, $K$NN, K-nn)
190 | - local repository (not local computer)
191 | - package (not library, meta package, meta-package)
192 | - data science (not Data Science)
193 | - data frame (not dataframe)
194 | - data set (not dataset)
195 | - scatter plot (not scatterplot)
196 | - bar plot (not bar chart)
197 | - capitalize all initialisms and acronyms (URL not url, API not api, K-NN not k-nn)
198 | - response variable (not target, output, label)
199 | - predictor variable (not explanatory, feature)
200 | - numerical variable (not quantitative variable)
201 | - categorical variable (not class variable)
202 |
203 | ### Punctuation
204 | - emdashes should have no surrounding spaces. `This kind of typesetting—which is awesome—is correct!` and `Typesetting with spaces around em-dashes — which is bad — is not correct`
205 | - make sure `\index` commands don't break punctuation spacing. E.g. `This is an item \index{item}; it is good` will typeset with an erroneous space after item, i.e. `This is an item ; it is good`
206 |
207 | ### Common typos to check for
208 | - RMPSE: should be RMSPE
209 | - boostrap: should be bootstrap
210 |
211 | ### Use American spelling
212 | Generally the book uses American spelling. Some common British vs American and Canadian vs American gotchas:
213 | - o vs ou: neighbor and color (not neighbour and colour)
214 | - single vs double ell: labeling and labeled (not labelling and labelled)
215 | - z vs s: summarize (not summarise)
216 | - c vs s: defense (not defence)
217 | - er vs re: center (not centre)
218 |
219 | ### Whitespace
220 | We need a line of whitespace before and after code fences (code surrounded by three backticks above and below). This is for readability,
221 | and it is essential for figure captions.
222 |
223 | ### PDF Output
224 | These are absolute last steps when rendering the PDF output:
225 | - Look for and fix bad line breaks (e.g. with only one word on the next line, orphans, and widows)
226 | - Look for and fix bad line wraps in code and text
227 | - Look for and fix bad figure placement (falling off page, going over the side)
228 | - Look for and fix large whitespace sections where LaTeX doesn't want to break the next paragraph (usually `\allowdisplaybreaks` helps)
229 | - Fix incorrect indenting. LaTeX will indent for a new paragraph if there is an extra whitespace line, so these should be deleted if no paragraph break is desired.
230 | - Look for `??` in the PDF (broken refs)
231 | - Look in the index for near-duplicates, and merge if needed
232 | - Look for / fix raw LaTeX code (search for backslash and curly brace in the final PDF)
233 | - Make sure the 3D figures (and the text around them that refers to clicking and dragging) are properly modified for the PDF output
234 | - Make sure all markdown label-replaced URLs (of the form `[blah](url)`) will make
235 | sense in the hardcopy book version (i.e. nothing like "click this"). Many links appear in the additional resources: make sure the
236 | text-replacement of the URL contains enough information for someone to find the resource (without being able to click the link)
237 |
238 | ### HTML Output
239 | - Look for broken references (I *think* these end up as `??`)
240 | - Look for uncentered images
241 |
--------------------------------------------------------------------------------
/_bookdown.yml:
--------------------------------------------------------------------------------
1 | output_dir: "docs"
2 | delete_merged_file: true
3 | language:
4 | ui:
5 | edit: "Edit"
6 | chapter_name: "Chapter "
7 | rmd_files: ["index.Rmd", "source/foreword.Rmd", "source/preface.Rmd", "source/acknowledgments.Rmd", "source/authors.Rmd", "source/intro.Rmd", "source/reading.Rmd", "source/wrangling.Rmd", "source/viz.Rmd", "source/classification1.Rmd", "source/classification2.Rmd", "source/regression1.Rmd", "source/regression2.Rmd", "source/clustering.Rmd", "source/inference.Rmd", "source/jupyter.Rmd", "source/version-control.Rmd", "source/setup.Rmd", "source/references.Rmd"]
8 |
--------------------------------------------------------------------------------
/_output.yml:
--------------------------------------------------------------------------------
1 | bookdown::gitbook:
2 | css: source/style.css
3 | config:
4 | toc:
5 | before: |
6 |
Data Science: A First Introduction
7 | edit: null
8 | download: null
9 | pandoc_args: ["--verbose", "--bibliography=source/references.bib", "--citeproc"]
10 | includes:
11 | in_header: source/analytics.html
12 |
13 | bookdown::epub_book: default
14 | bookdown::pdf_book:
15 | includes:
16 | in_header: source/preamble.tex
17 | before_body: source/before_body.tex
18 | after_body: source/after_body.tex
19 | keep_tex: true
20 | dev: "cairo_pdf"
21 | latex_engine: xelatex
22 | citation_package: natbib
23 | template: null
24 | pandoc_args: ["--top-level-division=chapter", "--verbose", "--bibliography=source/references.bib"]
25 | toc_depth: 3
26 | toc_unnumbered: false
27 | toc_appendix: true
28 | quote_footer: ["\\VA{", "}{}"]
29 | highlight_bw: true
30 |
--------------------------------------------------------------------------------
/build_html.sh:
--------------------------------------------------------------------------------
1 | # Script to generate HTML book
2 | docker run --rm -m 8g -v $(pwd):/home/rstudio/introduction-to-datascience ubcdsci/intro-to-ds:202307130106229dd1c2 /bin/bash -c "cd /home/rstudio/introduction-to-datascience; Rscript -e 'bookdown::render_book(\"index.Rmd\", output_format=\"bookdown::gitbook\"); warnings(); problems()'"
3 |
4 |
--------------------------------------------------------------------------------
/build_pdf.sh:
--------------------------------------------------------------------------------
1 | # Script to generate PDF book
2 |
3 | # backup original index.Rmd
4 | cp index.Rmd index_backup.Rmd
5 |
6 | # bookdown does weird things with the root .html filename if either (1) index.rmd is not in the root dir or (2) index.rmd does not contain a heading
7 | # we need the root file to be named index.html, so we need it to be in the root dir and contain a heading. But the PDF version doesn't need the welcome
8 | # page, which is the first heading. So we manually extract that text prior to building. This is a bit painful, but it works...
9 | sed -n -i "/# Welcome/q;p" index.Rmd
10 |
11 | # need to also remove the cover image from the PDF version
12 | sed -i "/graphic\.jpg/d" index.Rmd
13 |
14 | ## Build the book with bookdown
15 | docker run --rm -m 8g -v $(pwd):/home/rstudio/introduction-to-datascience ubcdsci/intro-to-ds:202307130106229dd1c2 /bin/bash -c "cd /home/rstudio/introduction-to-datascience; Rscript -e 'bookdown::render_book(\"index.Rmd\", \"bookdown::pdf_book\"); warnings(); problems()'"
16 |
17 | # restore the backed up full index.Rmd
18 | mv index_backup.Rmd index.Rmd
19 |
20 |
21 |
--------------------------------------------------------------------------------
/data/can_lang.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/data/can_lang.db
--------------------------------------------------------------------------------
/data/can_lang.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/data/can_lang.xlsx
--------------------------------------------------------------------------------
/data/can_lang_no_names.tsv:
--------------------------------------------------------------------------------
1 | Aboriginal languages Aboriginal languages, n.o.s. 590 235 30 665
2 | Non-Official & Non-Aboriginal languages Afrikaans 10260 4785 85 23415
3 | Non-Official & Non-Aboriginal languages Afro-Asiatic languages, n.i.e. 1150 445 10 2775
4 | Non-Official & Non-Aboriginal languages Akan (Twi) 13460 5985 25 22150
5 | Non-Official & Non-Aboriginal languages Albanian 26895 13135 345 31930
6 | Aboriginal languages Algonquian languages, n.i.e. 45 10 0 120
7 | Aboriginal languages Algonquin 1260 370 40 2480
8 | Non-Official & Non-Aboriginal languages American Sign Language 2685 3020 1145 21930
9 | Non-Official & Non-Aboriginal languages Amharic 22465 12785 200 33670
10 | Non-Official & Non-Aboriginal languages Arabic 419890 223535 5585 629055
11 | Non-Official & Non-Aboriginal languages Armenian 33460 21510 450 41295
12 | Non-Official & Non-Aboriginal languages Assyrian Neo-Aramaic 16070 10510 205 19740
13 | Aboriginal languages Athabaskan languages, n.i.e. 50 10 0 85
14 | Aboriginal languages Atikamekw 6150 5465 1100 6645
15 | Non-Official & Non-Aboriginal languages Austro-Asiatic languages, n.i.e 170 80 0 190
16 | Non-Official & Non-Aboriginal languages Austronesian languages, n.i.e. 4195 1160 35 5585
17 | Non-Official & Non-Aboriginal languages Azerbaijani 3255 1245 25 5455
18 | Aboriginal languages Babine (Wetsuwet'en) 110 20 10 210
19 | Non-Official & Non-Aboriginal languages Bamanankan 1535 345 0 3190
20 | Aboriginal languages Beaver 190 50 0 340
21 | Non-Official & Non-Aboriginal languages Belarusan 810 225 0 2265
22 | Non-Official & Non-Aboriginal languages Bengali 73125 47350 525 91220
23 | Non-Official & Non-Aboriginal languages Berber languages, n.i.e. 8985 2615 15 12510
24 | Non-Official & Non-Aboriginal languages Bikol 1785 290 0 2075
25 | Non-Official & Non-Aboriginal languages Bilen 805 615 15 1085
26 | Aboriginal languages Blackfoot 2815 1110 85 5645
27 | Non-Official & Non-Aboriginal languages Bosnian 12215 6045 155 18265
28 | Non-Official & Non-Aboriginal languages Bulgarian 20020 11985 200 22425
29 | Non-Official & Non-Aboriginal languages Burmese 3585 2245 75 4995
30 | Non-Official & Non-Aboriginal languages Cantonese 565270 400220 58820 699125
31 | Aboriginal languages Carrier 1025 250 15 2100
32 | Non-Official & Non-Aboriginal languages Catalan 870 350 30 2035
33 | Aboriginal languages Cayuga 45 10 10 125
34 | Non-Official & Non-Aboriginal languages Cebuano 19890 7205 70 27040
35 | Non-Official & Non-Aboriginal languages Celtic languages, n.i.e. 525 80 10 3595
36 | Non-Official & Non-Aboriginal languages Chaldean Neo-Aramaic 5545 3445 35 7115
37 | Aboriginal languages Chilcotin 655 255 15 1150
38 | Non-Official & Non-Aboriginal languages Chinese languages, n.i.e. 615 280 0 590
39 | Non-Official & Non-Aboriginal languages Chinese, n.o.s. 38580 23940 2935 41685
40 | Aboriginal languages Comox 85 0 0 185
41 | Aboriginal languages Cree, n.o.s. 64050 37950 7800 86115
42 | Non-Official & Non-Aboriginal languages Creole languages, n.i.e. 4985 2005 15 16635
43 | Non-Official & Non-Aboriginal languages Creole, n.o.s. 64110 24570 310 133045
44 | Non-Official & Non-Aboriginal languages Croatian 48200 16775 220 69835
45 | Non-Official & Non-Aboriginal languages Cushitic languages, n.i.e. 365 180 0 480
46 | Non-Official & Non-Aboriginal languages Czech 22295 6235 70 28725
47 | Aboriginal languages Dakota 1210 255 20 1760
48 | Non-Official & Non-Aboriginal languages Danish 12630 855 85 15750
49 | Aboriginal languages Dene 10700 7710 770 13060
50 | Non-Official & Non-Aboriginal languages Dinka 2120 1130 0 2475
51 | Aboriginal languages Dogrib (Tlicho) 1650 1020 165 2375
52 | Non-Official & Non-Aboriginal languages Dravidian languages, n.i.e. 490 190 0 790
53 | Non-Official & Non-Aboriginal languages Dutch 99015 9565 1165 120870
54 | Non-Official & Non-Aboriginal languages Edo 1670 410 0 3220
55 | Official languages English 19460850 22162865 15265335 29748265
56 | Non-Official & Non-Aboriginal languages Estonian 5445 975 55 6070
57 | Non-Official & Non-Aboriginal languages Ewe 1760 405 10 3000
58 | Non-Official & Non-Aboriginal languages Fijian 745 195 0 1665
59 | Non-Official & Non-Aboriginal languages Finnish 15295 2790 105 17590
60 | Official languages French 7166700 6943800 3825215 10242945
61 | Non-Official & Non-Aboriginal languages Frisian 2100 185 40 2910
62 | Non-Official & Non-Aboriginal languages Fulah (Pular, Pulaar, Fulfulde) 2825 825 0 4725
63 | Non-Official & Non-Aboriginal languages Ga 920 250 0 2250
64 | Non-Official & Non-Aboriginal languages Ganda 1295 345 25 2495
65 | Non-Official & Non-Aboriginal languages Georgian 1710 1040 25 2150
66 | Non-Official & Non-Aboriginal languages German 384040 120335 10065 502735
67 | Non-Official & Non-Aboriginal languages Germanic languages, n.i.e. 525 1630 725 8705
68 | Aboriginal languages Gitxsan (Gitksan) 880 315 10 1305
69 | Non-Official & Non-Aboriginal languages Greek 106525 44550 1020 150965
70 | Non-Official & Non-Aboriginal languages Gujarati 108780 64150 885 149045
71 | Aboriginal languages Gwich'in 255 50 10 360
72 | Aboriginal languages Haida 80 10 0 465
73 | Aboriginal languages Haisla 90 20 0 175
74 | Non-Official & Non-Aboriginal languages Haitian Creole 3030 1280 25 6855
75 | Non-Official & Non-Aboriginal languages Hakka 10910 4085 70 12445
76 | Aboriginal languages Halkomelem 480 50 20 1060
77 | Non-Official & Non-Aboriginal languages Harari 1320 735 0 1715
78 | Non-Official & Non-Aboriginal languages Hebrew 19530 8560 825 75020
79 | Aboriginal languages Heiltsuk 100 5 10 125
80 | Non-Official & Non-Aboriginal languages Hiligaynon 6880 2210 25 7925
81 | Non-Official & Non-Aboriginal languages Hindi 110645 55510 1405 433365
82 | Non-Official & Non-Aboriginal languages Hmong-Mien languages 795 335 10 870
83 | Non-Official & Non-Aboriginal languages Hungarian 61235 19480 440 71285
84 | Non-Official & Non-Aboriginal languages Icelandic 1285 270 0 1780
85 | Non-Official & Non-Aboriginal languages Igbo 4235 1000 10 8855
86 | Non-Official & Non-Aboriginal languages Ilocano 26345 9125 110 34530
87 | Non-Official & Non-Aboriginal languages Indo-Iranian languages, n.i.e. 5185 2380 20 8870
88 | Aboriginal languages Inuinnaqtun (Inuvialuktun) 1020 165 30 1975
89 | Aboriginal languages Inuit languages, n.i.e. 310 90 15 470
90 | Aboriginal languages Inuktitut 35210 29230 8795 40620
91 | Aboriginal languages Iroquoian languages, n.i.e. 35 5 0 115
92 | Non-Official & Non-Aboriginal languages Italian 375635 115415 1705 574725
93 | Non-Official & Non-Aboriginal languages Italic (Romance) languages, n.i.e. 720 175 25 2680
94 | Non-Official & Non-Aboriginal languages Japanese 43640 19785 3255 83095
95 | Non-Official & Non-Aboriginal languages Kabyle 13150 5490 15 17120
96 | Non-Official & Non-Aboriginal languages Kannada 3970 1630 10 8245
97 | Non-Official & Non-Aboriginal languages Karenic languages 4705 3860 135 4895
98 | Non-Official & Non-Aboriginal languages Kashmiri 565 135 0 905
99 | Aboriginal languages Kaska (Nahani) 180 20 10 365
100 | Non-Official & Non-Aboriginal languages Khmer (Cambodian) 20130 10885 475 27035
101 | Non-Official & Non-Aboriginal languages Kinyarwanda (Rwanda) 5250 1530 25 7860
102 | Non-Official & Non-Aboriginal languages Konkani 3330 720 10 6790
103 | Non-Official & Non-Aboriginal languages Korean 153425 109705 12150 172750
104 | Non-Official & Non-Aboriginal languages Kurdish 11705 6580 185 15290
105 | Aboriginal languages Kutenai 110 10 0 170
106 | Aboriginal languages Kwakiutl (Kwak'wala) 325 25 15 605
107 | Non-Official & Non-Aboriginal languages Lao 12670 6175 150 17235
108 | Non-Official & Non-Aboriginal languages Latvian 5450 1255 35 6500
109 | Aboriginal languages Lillooet 315 25 15 790
110 | Non-Official & Non-Aboriginal languages Lingala 3805 1045 10 17010
111 | Non-Official & Non-Aboriginal languages Lithuanian 7075 2015 60 8185
112 | Non-Official & Non-Aboriginal languages Macedonian 16770 6830 95 23075
113 | Non-Official & Non-Aboriginal languages Malagasy 1430 430 0 2340
114 | Non-Official & Non-Aboriginal languages Malay 12275 3625 140 22470
115 | Non-Official & Non-Aboriginal languages Malayalam 28565 15440 95 37810
116 | Aboriginal languages Malecite 300 55 10 760
117 | Non-Official & Non-Aboriginal languages Maltese 5565 1125 25 7625
118 | Non-Official & Non-Aboriginal languages Mandarin 592040 462890 60090 814450
119 | Non-Official & Non-Aboriginal languages Marathi 8295 3780 30 15565
120 | Aboriginal languages Mi'kmaq 6690 3565 915 9025
121 | Aboriginal languages Michif 465 80 10 1210
122 | Non-Official & Non-Aboriginal languages Min Dong 1230 345 30 1045
123 | Non-Official & Non-Aboriginal languages Min Nan (Chaochow, Teochow, Fukien, Taiwanese) 31800 13965 565 42840
124 | Aboriginal languages Mohawk 985 255 30 2415
125 | Non-Official & Non-Aboriginal languages Mongolian 1575 905 10 2095
126 | Aboriginal languages Montagnais (Innu) 10235 8585 2055 11445
127 | Aboriginal languages Moose Cree 105 10 0 195
128 | Aboriginal languages Naskapi 1205 1195 370 1465
129 | Non-Official & Non-Aboriginal languages Nepali 18275 13375 195 21385
130 | Non-Official & Non-Aboriginal languages Niger-Congo languages, n.i.e. 19135 4010 30 40760
131 | Non-Official & Non-Aboriginal languages Nilo-Saharan languages, n.i.e. 3750 1520 0 4550
132 | Aboriginal languages Nisga'a 400 75 10 1055
133 | Aboriginal languages North Slavey (Hare) 765 340 95 1005
134 | Aboriginal languages Northern East Cree 315 110 35 550
135 | Aboriginal languages Northern Tutchone 220 30 0 280
136 | Non-Official & Non-Aboriginal languages Norwegian 4615 350 70 8120
137 | Aboriginal languages Nuu-chah-nulth (Nootka) 280 30 10 560
138 | Aboriginal languages Oji-Cree 12855 7905 1080 15605
139 | Aboriginal languages Ojibway 17885 6175 765 28580
140 | Aboriginal languages Okanagan 275 80 20 820
141 | Aboriginal languages Oneida 60 15 0 185
142 | Non-Official & Non-Aboriginal languages Oriya (Odia) 1055 475 0 1530
143 | Non-Official & Non-Aboriginal languages Oromo 4960 3410 45 6245
144 | Non-Official & Non-Aboriginal languages Other languages, n.i.e. 3685 1110 80 9730
145 | Aboriginal languages Ottawa (Odawa) 150 75 0 205
146 | Non-Official & Non-Aboriginal languages Pampangan (Kapampangan, Pampango) 4045 1200 10 5425
147 | Non-Official & Non-Aboriginal languages Pangasinan 1390 240 0 1800
148 | Non-Official & Non-Aboriginal languages Pashto 16905 10590 50 23180
149 | Non-Official & Non-Aboriginal languages Persian (Farsi) 214200 143025 4580 252325
150 | Aboriginal languages Plains Cree 3065 1345 95 5905
151 | Non-Official & Non-Aboriginal languages Polish 181710 74780 2495 214965
152 | Non-Official & Non-Aboriginal languages Portuguese 221535 98710 7485 295955
153 | Non-Official & Non-Aboriginal languages Punjabi (Panjabi) 501680 349140 27865 668240
154 | Non-Official & Non-Aboriginal languages Quebec Sign Language 695 730 130 4665
155 | Non-Official & Non-Aboriginal languages Romanian 96660 53325 745 115050
156 | Non-Official & Non-Aboriginal languages Rundi (Kirundi) 5850 2110 0 8590
157 | Non-Official & Non-Aboriginal languages Russian 188255 116595 4855 269645
158 | Aboriginal languages Salish languages, n.i.e. 260 25 0 560
159 | Aboriginal languages Sarsi (Sarcee) 80 10 0 145
160 | Non-Official & Non-Aboriginal languages Scottish Gaelic 1090 190 15 3980
161 | Aboriginal languages Sekani 85 15 0 185
162 | Non-Official & Non-Aboriginal languages Semitic languages, n.i.e. 2150 1205 65 3220
163 | Non-Official & Non-Aboriginal languages Serbian 57350 31750 530 73780
164 | Non-Official & Non-Aboriginal languages Serbo-Croatian 9550 3890 30 11275
165 | Non-Official & Non-Aboriginal languages Shona 3185 1035 0 5430
166 | Aboriginal languages Shuswap (Secwepemctsin) 445 50 35 1305
167 | Non-Official & Non-Aboriginal languages Sign languages, n.i.e 4125 6690 645 22280
168 | Non-Official & Non-Aboriginal languages Sindhi 11860 4975 35 20260
169 | Non-Official & Non-Aboriginal languages Sinhala (Sinhalese) 16335 7790 40 27825
170 | Aboriginal languages Siouan languages, n.i.e. 55 20 0 140
171 | Aboriginal languages Slavey, n.o.s. 280 105 10 675
172 | Non-Official & Non-Aboriginal languages Slavic languages, n.i.e. 2420 670 10 2995
173 | Non-Official & Non-Aboriginal languages Slovak 17580 5610 100 21470
174 | Non-Official & Non-Aboriginal languages Slovene (Slovenian) 9785 2055 15 11490
175 | Non-Official & Non-Aboriginal languages Somali 36755 22895 220 49660
176 | Aboriginal languages South Slavey 945 370 35 1365
177 | Aboriginal languages Southern East Cree 45 15 0 40
178 | Aboriginal languages Southern Tutchone 70 5 0 145
179 | Non-Official & Non-Aboriginal languages Spanish 458850 263505 13030 995260
180 | Aboriginal languages Squamish 40 5 10 285
181 | Aboriginal languages Stoney 3025 1950 240 3675
182 | Aboriginal languages Straits 80 25 15 365
183 | Non-Official & Non-Aboriginal languages Swahili 13370 5370 80 38685
184 | Aboriginal languages Swampy Cree 1440 330 10 2350
185 | Non-Official & Non-Aboriginal languages Swedish 6840 1050 125 14140
186 | Non-Official & Non-Aboriginal languages Tagalog (Pilipino, Filipino) 431385 213790 3450 612735
187 | Aboriginal languages Tahltan 95 5 0 265
188 | Non-Official & Non-Aboriginal languages Tai-Kadai languages, n.i.e 85 30 0 115
189 | Non-Official & Non-Aboriginal languages Tamil 140720 96955 2085 189860
190 | Non-Official & Non-Aboriginal languages Telugu 15660 8280 40 23165
191 | Non-Official & Non-Aboriginal languages Thai 9255 3365 525 15395
192 | Aboriginal languages Thompson (Ntlakapamux) 335 20 0 450
193 | Non-Official & Non-Aboriginal languages Tibetan 6160 4590 50 7050
194 | Non-Official & Non-Aboriginal languages Tibeto-Burman languages, n.i.e. 1405 655 15 2380
195 | Non-Official & Non-Aboriginal languages Tigrigna 16645 10205 130 21340
196 | Aboriginal languages Tlingit 95 0 10 260
197 | Aboriginal languages Tsimshian 200 30 10 410
198 | Non-Official & Non-Aboriginal languages Turkic languages, n.i.e. 1315 455 10 1875
199 | Non-Official & Non-Aboriginal languages Turkish 32815 18955 690 50770
200 | Non-Official & Non-Aboriginal languages Ukrainian 102485 28250 1210 132115
201 | Non-Official & Non-Aboriginal languages Uralic languages, n.i.e. 10 5 0 25
202 | Non-Official & Non-Aboriginal languages Urdu 210815 128785 1495 322220
203 | Non-Official & Non-Aboriginal languages Uyghur 1035 610 20 1390
204 | Non-Official & Non-Aboriginal languages Uzbek 1720 995 15 2465
205 | Non-Official & Non-Aboriginal languages Vietnamese 156430 104245 8075 198895
206 | Non-Official & Non-Aboriginal languages Vlaams (Flemish) 3895 355 35 4400
207 | Aboriginal languages Wakashan languages, n.i.e. 10 0 0 25
208 | Non-Official & Non-Aboriginal languages Waray-Waray 1110 310 0 1395
209 | Non-Official & Non-Aboriginal languages Welsh 1075 95 0 1695
210 | Non-Official & Non-Aboriginal languages Wolof 3990 1385 10 8240
211 | Aboriginal languages Woods Cree 1840 800 75 2665
212 | Non-Official & Non-Aboriginal languages Wu (Shanghainese) 12915 7650 105 16530
213 | Non-Official & Non-Aboriginal languages Yiddish 13555 7085 895 20985
214 | Non-Official & Non-Aboriginal languages Yoruba 9080 2615 15 22415
215 |
--------------------------------------------------------------------------------
/data/create_can_lang_variants.R:
--------------------------------------------------------------------------------
1 | ## Script to create variants of a plain vanilla .csv file
2 | library(tidyverse)
3 | library(openxlsx)
4 | library(RSQLite)
5 |
6 | devtools::install_github("ttimbers/canlang")
7 | library(canlang)
8 | write_csv(can_lang, "can_lang.csv")
9 |
10 | main <- function(){
11 | path <- "can_lang.csv"
12 | data <- read_csv(path)
13 | path_no_prefix <- sub(".{4}$", "", path)
14 |
15 | # file with some meta data at the top
16 | meta_data <- data.frame(metadata = c("Data source: https://ttimbers.github.io/canlang/",
17 | "Data originally published in: Statistics Canada Census of Population 2016.",
18 | "Reproduced and distributed on an as is basis with the permission of Statistics Canada."))
19 | write.table(meta_data,
20 | file = paste0(path_no_prefix, "_meta-data.csv"),
21 | sep = ",",
22 | col.names = FALSE,
23 | row.names = FALSE,
24 | quote = FALSE)
25 | write_csv(data,
26 | path = paste0(path_no_prefix, "_meta-data.csv"),
27 | append = TRUE,
28 | col_names = TRUE)
29 |
30 | # file with no column names and tab delimiters
31 | write_delim(data,
32 | path = paste0(path_no_prefix, ".tsv"),
33 | delim = "\t",
34 | col_names = FALSE)
35 |
36 | # excel file
37 | write.xlsx(data, file = paste0(path_no_prefix, ".xlsx"))
38 |
39 | # write to sqlite
40 | con <- dbConnect(RSQLite::SQLite(), paste0(path_no_prefix, ".db"))
41 | dbWriteTable(con, "lang", data, overwrite = TRUE)
42 | dbDisconnect(con)
43 | }
44 |
45 | main()
46 |
--------------------------------------------------------------------------------
/data/create_penguins_no_species.R:
--------------------------------------------------------------------------------
1 |
2 | #remotes::install_github("allisonhorst/palmerpenguins")
3 | library(tidyverse)
4 | library(palmerpenguins)
5 | library(tidymodels)
6 | data(package = 'palmerpenguins')
7 | set.seed(12345)
8 |
9 | penguins <- na.omit(penguins)
10 |
11 | penguins_no_species <- penguins %>%
12 | select(-species)
13 |
14 | split <- initial_split(penguins, prop = 0.05, strata = species)
15 |
16 | toy_penguins <- training(split) %>%
17 | mutate(cluster=as_factor(as.numeric(species))) %>%
18 | select(-species)
19 |
20 | ggplot(toy_penguins, aes(y = bill_length_mm, x = flipper_length_mm, colour = cluster)) +
21 | geom_point() +
22 | xlab("Flipper Length (mm)") +
23 | ylab("Bill Length (mm)")
24 |
25 | write_csv(toy_penguins, "data/toy_penguins.csv")
26 | write_csv(penguins_no_species, "data/penguins.csv")
27 |
--------------------------------------------------------------------------------
/data/create_state_property_vote_variants.R:
--------------------------------------------------------------------------------
1 | ## Script to create variants of a plain vanilla .csv file
2 | library(tidyverse)
3 | library(openxlsx)
4 | library(RSQLite)
5 |
6 | main <- function(){
7 | path <- "state_property_vote.csv"
8 | data <- read_csv(path)
9 | path_no_prefix <- sub(".{4}$", "", path)
10 |
11 | # file with some meta data at the top
12 | meta_data <- data.frame(metadata = c("Data source: https://datausa.io/",
13 | "Record of how data was collected: https://github.com/UBC-DSCI/introduction-to-datascience/blob/master/data/retrieve_data.ipynb",
14 | "Date collected: 2020-07-08"))
15 | write.table(meta_data,
16 | file = paste0(path_no_prefix, "_meta-data.csv"),
17 | sep = ",",
18 | col.names = FALSE,
19 | row.names = FALSE,
20 | quote = FALSE)
21 | write.table(data,
22 | file = paste0(path_no_prefix, "_meta-data.csv"),
23 | sep = ",",
24 | row.names = FALSE,
25 | quote = FALSE,
26 | append = TRUE)
27 |
28 | # file with no column names and tab delimiters
29 | write_delim(data,
30 | path = paste0(path_no_prefix, ".tsv"),
31 | delim = "\t",
32 | col_names = FALSE)
33 |
34 | # excel file
35 | write.xlsx(data, file = paste0(path_no_prefix, ".xlsx"))
36 |
37 | # write to sqlite
38 | con <- dbConnect(RSQLite::SQLite(), paste0(path_no_prefix, ".db"))
39 | dbWriteTable(con, "state", data, overwrite = TRUE)
40 | dbDisconnect(con)
41 | }
42 |
43 | main()
44 |
--------------------------------------------------------------------------------
/data/create_untidy_canlang.R:
--------------------------------------------------------------------------------
1 | ## creating untidy canlang data
2 | library(canlang)
3 | library(tidyverse)
4 |
5 | # subsetting 5 cities
6 | top5_cities <- region_lang %>%
7 | filter(region %in% c("Toronto", "Montréal", "Vancouver", "Calgary", "Edmonton"))
8 |
9 | write.csv(top5_cities, "region_lang_top5_cities.csv", row.names = F)
10 |
11 | # making a wide data set
12 | top5_cities_wide <- top5_cities %>%
13 | select(region, category, language, mother_tongue) %>%
14 | pivot_wider(names_from = region, values_from = c(mother_tongue)) %>%
15 | select(category, language, Toronto, Montréal, Vancouver, Calgary, Edmonton)
16 |
17 | write.csv(top5_cities_wide, "region_lang_top5_cities_wide.csv", row.names = F)
18 |
19 | ## making a messy data set with "/" delimiters
20 | top5_cities_messy <- top5_cities %>%
21 | unite(col = "value", most_at_home, most_at_work, sep = "/") %>%
22 | select(region, category, language, value) %>%
23 | pivot_wider(names_from = region, values_from = value) %>%
24 | select(category, language, Toronto, Montréal, Vancouver, Calgary, Edmonton)
25 |
26 | write.csv(top5_cities_messy, "region_lang_top5_cities_messy.csv", row.names = F)
27 |
28 |
29 | # making a narrow data set
30 | top5_cities_narrow <- top5_cities %>%
31 | select(region, category, language, most_at_home, most_at_work) %>%
32 | pivot_longer(!c(region, category, language), names_to = "type", values_to = "count")
33 | write.csv(top5_cities_narrow, "region_lang_top5_cities_long.csv", row.names = F)
34 |
--------------------------------------------------------------------------------
/data/faithful.csv:
--------------------------------------------------------------------------------
1 | eruptions,waiting
2 | 3.6,79.0
3 | 1.8,54.0
4 | 3.333,74.0
5 | 2.283,62.0
6 | 4.533,85.0
7 | 2.883,55.0
8 | 4.7,88.0
9 | 3.6,85.0
10 | 1.95,51.0
11 | 4.35,85.0
12 | 1.833,54.0
13 | 3.917,84.0
14 | 4.2,78.0
15 | 1.75,47.0
16 | 4.7,83.0
17 | 2.167,52.0
18 | 1.75,62.0
19 | 4.8,84.0
20 | 1.6,52.0
21 | 4.25,79.0
22 | 1.8,51.0
23 | 1.75,47.0
24 | 3.45,78.0
25 | 3.067,69.0
26 | 4.533,74.0
27 | 3.6,83.0
28 | 1.967,55.0
29 | 4.083,76.0
30 | 3.85,78.0
31 | 4.433,79.0
32 | 4.3,73.0
33 | 4.467,77.0
34 | 3.367,66.0
35 | 4.033,80.0
36 | 3.833,74.0
37 | 2.017,52.0
38 | 1.867,48.0
39 | 4.833,80.0
40 | 1.833,59.0
41 | 4.783,90.0
42 | 4.35,80.0
43 | 1.883,58.0
44 | 4.567,84.0
45 | 1.75,58.0
46 | 4.533,73.0
47 | 3.317,83.0
48 | 3.833,64.0
49 | 2.1,53.0
50 | 4.633,82.0
51 | 2.0,59.0
52 | 4.8,75.0
53 | 4.716,90.0
54 | 1.833,54.0
55 | 4.833,80.0
56 | 1.733,54.0
57 | 4.883,83.0
58 | 3.717,71.0
59 | 1.667,64.0
60 | 4.567,77.0
61 | 4.317,81.0
62 | 2.233,59.0
63 | 4.5,84.0
64 | 1.75,48.0
65 | 4.8,82.0
66 | 1.817,60.0
67 | 4.4,92.0
68 | 4.167,78.0
69 | 4.7,78.0
70 | 2.067,65.0
71 | 4.7,73.0
72 | 4.033,82.0
73 | 1.967,56.0
74 | 4.5,79.0
75 | 4.0,71.0
76 | 1.983,62.0
77 | 5.067,76.0
78 | 2.017,60.0
79 | 4.567,78.0
80 | 3.883,76.0
81 | 3.6,83.0
82 | 4.133,75.0
83 | 4.333,82.0
84 | 4.1,70.0
85 | 2.633,65.0
86 | 4.067,73.0
87 | 4.933,88.0
88 | 3.95,76.0
89 | 4.517,80.0
90 | 2.167,48.0
91 | 4.0,86.0
92 | 2.2,60.0
93 | 4.333,90.0
94 | 1.867,50.0
95 | 4.817,78.0
96 | 1.833,63.0
97 | 4.3,72.0
98 | 4.667,84.0
99 | 3.75,75.0
100 | 1.867,51.0
101 | 4.9,82.0
102 | 2.483,62.0
103 | 4.367,88.0
104 | 2.1,49.0
105 | 4.5,83.0
106 | 4.05,81.0
107 | 1.867,47.0
108 | 4.7,84.0
109 | 1.783,52.0
110 | 4.85,86.0
111 | 3.683,81.0
112 | 4.733,75.0
113 | 2.3,59.0
114 | 4.9,89.0
115 | 4.417,79.0
116 | 1.7,59.0
117 | 4.633,81.0
118 | 2.317,50.0
119 | 4.6,85.0
120 | 1.817,59.0
121 | 4.417,87.0
122 | 2.617,53.0
123 | 4.067,69.0
124 | 4.25,77.0
125 | 1.967,56.0
126 | 4.6,88.0
127 | 3.767,81.0
128 | 1.917,45.0
129 | 4.5,82.0
130 | 2.267,55.0
131 | 4.65,90.0
132 | 1.867,45.0
133 | 4.167,83.0
134 | 2.8,56.0
135 | 4.333,89.0
136 | 1.833,46.0
137 | 4.383,82.0
138 | 1.883,51.0
139 | 4.933,86.0
140 | 2.033,53.0
141 | 3.733,79.0
142 | 4.233,81.0
143 | 2.233,60.0
144 | 4.533,82.0
145 | 4.817,77.0
146 | 4.333,76.0
147 | 1.983,59.0
148 | 4.633,80.0
149 | 2.017,49.0
150 | 5.1,96.0
151 | 1.8,53.0
152 | 5.033,77.0
153 | 4.0,77.0
154 | 2.4,65.0
155 | 4.6,81.0
156 | 3.567,71.0
157 | 4.0,70.0
158 | 4.5,81.0
159 | 4.083,93.0
160 | 1.8,53.0
161 | 3.967,89.0
162 | 2.2,45.0
163 | 4.15,86.0
164 | 2.0,58.0
165 | 3.833,78.0
166 | 3.5,66.0
167 | 4.583,76.0
168 | 2.367,63.0
169 | 5.0,88.0
170 | 1.933,52.0
171 | 4.617,93.0
172 | 1.917,49.0
173 | 2.083,57.0
174 | 4.583,77.0
175 | 3.333,68.0
176 | 4.167,81.0
177 | 4.333,81.0
178 | 4.5,73.0
179 | 2.417,50.0
180 | 4.0,85.0
181 | 4.167,74.0
182 | 1.883,55.0
183 | 4.583,77.0
184 | 4.25,83.0
185 | 3.767,83.0
186 | 2.033,51.0
187 | 4.433,78.0
188 | 4.083,84.0
189 | 1.833,46.0
190 | 4.417,83.0
191 | 2.183,55.0
192 | 4.8,81.0
193 | 1.833,57.0
194 | 4.8,76.0
195 | 4.1,84.0
196 | 3.966,77.0
197 | 4.233,81.0
198 | 3.5,87.0
199 | 4.366,77.0
200 | 2.25,51.0
201 | 4.667,78.0
202 | 2.1,60.0
203 | 4.35,82.0
204 | 4.133,91.0
205 | 1.867,53.0
206 | 4.6,78.0
207 | 1.783,46.0
208 | 4.367,77.0
209 | 3.85,84.0
210 | 1.933,49.0
211 | 4.5,83.0
212 | 2.383,71.0
213 | 4.7,80.0
214 | 1.867,49.0
215 | 3.833,75.0
216 | 3.417,64.0
217 | 4.233,76.0
218 | 2.4,53.0
219 | 4.8,94.0
220 | 2.0,55.0
221 | 4.15,76.0
222 | 1.867,50.0
223 | 4.267,82.0
224 | 1.75,54.0
225 | 4.483,75.0
226 | 4.0,78.0
227 | 4.117,79.0
228 | 4.083,78.0
229 | 4.267,78.0
230 | 3.917,70.0
231 | 4.55,79.0
232 | 4.083,70.0
233 | 2.417,54.0
234 | 4.183,86.0
235 | 2.217,50.0
236 | 4.45,90.0
237 | 1.883,54.0
238 | 1.85,54.0
239 | 4.283,77.0
240 | 3.95,79.0
241 | 2.333,64.0
242 | 4.15,75.0
243 | 2.35,47.0
244 | 4.933,86.0
245 | 2.9,63.0
246 | 4.583,85.0
247 | 3.833,82.0
248 | 2.083,57.0
249 | 4.367,82.0
250 | 2.133,67.0
251 | 4.35,74.0
252 | 2.2,54.0
253 | 4.45,83.0
254 | 3.567,73.0
255 | 4.5,73.0
256 | 4.15,88.0
257 | 3.817,80.0
258 | 3.917,71.0
259 | 4.45,83.0
260 | 2.0,56.0
261 | 4.283,79.0
262 | 4.767,78.0
263 | 4.533,84.0
264 | 1.85,58.0
265 | 4.25,83.0
266 | 1.983,43.0
267 | 2.25,60.0
268 | 4.75,75.0
269 | 4.117,81.0
270 | 2.15,46.0
271 | 4.417,90.0
272 | 1.817,46.0
273 | 4.467,74.0
274 |
--------------------------------------------------------------------------------
/data/historical_vote.csv:
--------------------------------------------------------------------------------
1 | election_num,election_year,winner,winner_party,elec_coll_votes_count,elec_coll_votes_perc,pop_votes_perc,pop_votes_perc_marg,pop_votes_count,pop_votes_count_marg,runner-up,runner-up_party,turnout
2 | 10,1824,John Quincy Adams,D.-R.,84/261,32.18%,30.92%,10.44%,"113,142","38,221",Andrew Jackson,D.-R.,26.90%
3 | 23,1876,Rutherford Hayes,Rep.,185/369,50.14%,47.92%,3.00%,"4,034,142","252,666",Samuel Tilden,Dem.,82.60%
4 | 58,2016,Donald Trump,Rep.,304/538,56.50%,45.98%,2.10%,"62,979,636","2,864,974",Hillary Rodham Clinton,Dem.,60.20%
5 | 26,1888,Benjamin Harrison,Rep.,233/401,58.10%,47.80%,0.83%,"5,443,633","94,530",Grover Cleveland,Dem.,80.50%
6 | 54,2000,George W. Bush,Rep.,271/538,50.37%,47.87%,0.51%,"50,460,110","543,816",Al Gore,Dem.,54.20%
7 | 24,1880,James Garfield,Rep.,214/369,57.99%,48.31%,0.09%,"4,453,337","1,898",Winfield Scott Hancock,Dem.,80.50%
8 | 44,1960,John Kennedy,Dem.,303/537,56.42%,49.72%,0.17%,"34,220,984","112,827",Richard Nixon,Rep.,63.80%
9 | 25,1884,Grover Cleveland,Dem.,219/401,54.61%,48.85%,0.57%,"4,914,482","57,579",James Blaine,Rep.,78.20%
10 | 46,1968,Richard Nixon,Rep.,301/538,55.95%,43.42%,0.70%,"31,783,783","511,944",Hubert Humphrey,Dem.,62.50%
11 | 15,1844,James Polk,Dem.,170/275,61.82%,49.54%,1.45%,"1,339,570","39,413",Henry Clay,Whig,79.20%
12 | 48,1976,Jimmy Carter,Dem.,297/538,55.20%,50.08%,2.06%,"40,831,881","1,683,247",Gerald Ford,Rep.,54.80%
13 | 55,2004,George W. Bush,Rep.,286/538,53.16%,50.73%,2.46%,"62,040,610","3,012,171",John Kerry,Dem.,60.10%
14 | 27,1892,Grover Cleveland,Dem.,277/444,62.39%,46.02%,3.01%,"5,553,898","363,099",Benjamin Harrison,Rep.,75.80%
15 | 33,1916,Woodrow Wilson,Dem.,277/531,52.17%,49.24%,3.12%,"9,126,868","578,140",Charles Evans Hughes,Rep.,61.80%
16 | 57,2012,Barack Obama,Dem.,332/538,61.71%,51.06%,3.86%,"65,915,795","4,982,291",Mitt Romney,Rep.,58.60%
17 | 28,1896,William McKinley,Rep.,271/447,60.63%,51.02%,4.31%,"7,112,138","601,331",William Jennings Bryan,Dem.,79.60%
18 | 41,1948,Harry Truman,Dem.,303/531,57.06%,49.55%,4.48%,"24,179,347","2,188,055",Thomas Dewey,Rep.,52.20%
19 | 16,1848,Zachary Taylor,Whig,163/290,56.21%,47.28%,4.79%,"1,360,235","137,882",Lewis Cass,Dem.,72.80%
20 | 21,1868,Ulysses Grant,Rep.,214/294,72.79%,52.66%,5.32%,"3,013,790","304,810",Horatio Seymour,Dem.,80.90%
21 | 52,1992,Bill Clinton,Dem.,370/538,68.77%,43.01%,5.56%,"44,909,806","5,805,256",George H. W. Bush,Rep.,58.10%
22 | 14,1840,William Henry Harrison,Whig,234/294,79.59%,52.87%,6.05%,"1,275,583","145,938",Martin Van Buren,Dem.,80.30%
23 | 29,1900,William McKinley,Rep.,292/447,65.23%,51.64%,6.12%,"7,228,864","857,932",William Jennings Bryan,Dem.,73.70%
24 | 17,1852,Franklin Pierce,Dem.,254/296,85.81%,50.83%,6.95%,"1,605,943","219,525",Winfield Scott,Whig,69.50%
25 | 56,2008,Barack Obama,Dem.,365/538,67.84%,52.93%,7.27%,"69,498,516","9,550,193",John McCain,Rep.,61.60%
26 | 40,1944,Franklin Roosevelt,Dem.,432/531,81.36%,53.39%,7.50%,"25,612,916","3,594,987",Thomas Dewey,Rep.,55.90%
27 | 51,1988,George H. W. Bush,Rep.,426/538,79.18%,53.37%,7.72%,"48,886,597","7,077,121",Michael Dukakis,Dem.,52.80%
28 | 53,1996,Bill Clinton,Dem.,379/538,70.45%,49.23%,8.51%,"47,400,125","8,201,370",Bob Dole,Rep.,51.70%
29 | 31,1908,William Taft,Rep.,321/483,66.46%,51.57%,8.53%,"7,678,335","1,269,356",William Jennings Bryan,Dem.,65.70%
30 | 49,1980,Ronald Reagan,Rep.,489/538,90.89%,50.75%,9.74%,"43,903,230","8,423,115",Jimmy Carter,Dem.,54.20%
31 | 39,1940,Franklin Roosevelt,Dem.,449/531,84.56%,54.74%,9.96%,"27,313,945","4,966,201",Wendell Willkie,Rep.,62.40%
32 | 20,1864,Abraham Lincoln,Rep.,212/233,90.99%,55.03%,10.08%,"2,211,317","405,090",George McClellan,Dem.,76.30%
33 | 19,1860,Abraham Lincoln,Rep.,180/303,59.41%,39.65%,10.13%,"1,855,993","474,049",John Breckinridge,Dem.,81.80%
34 | 42,1952,Dwight Eisenhower,Rep.,442/531,83.24%,55.18%,10.85%,"34,075,529","6,700,439",Adlai Stevenson,Dem.,62.30%
35 | 22,1872,Ulysses Grant,Rep.,286/352,81.25%,55.58%,11.80%,"3,597,439","763,729",Horace Greeley,L. R.,72.10%
36 | 18,1856,James Buchanan,Dem.,174/296,58.78%,45.29%,12.20%,"1,835,140","494,472",John Fr̩mont,Rep.,79.40%
37 | 11,1828,Andrew Jackson,Dem.,178/261,68.20%,55.93%,12.25%,"642,806","140,839",John Quincy Adams,N. R.,57.30%
38 | 13,1836,Martin Van Buren,Dem.,170/294,57.82%,50.79%,14.20%,"763,291","213,384",William Henry Harrison,Whig,56.50%
39 | 32,1912,Woodrow Wilson,Dem.,435/531,81.92%,41.84%,14.44%,"6,296,284","2,173,563",Theodore Roosevelt,Prog.,59.00%
40 | 43,1956,Dwight Eisenhower,Rep.,457/531,86.06%,57.37%,15.40%,"35,579,180","9,551,152",Adlai Stevenson,Dem.,60.20%
41 | 36,1928,Herbert Hoover,Rep.,444/531,83.62%,58.21%,17.41%,"21,427,123","6,411,659",Al Smith,Dem.,56.90%
42 | 37,1932,Franklin Roosevelt,Dem.,472/531,88.89%,57.41%,17.76%,"22,821,277","7,060,023",Herbert Hoover,Rep.,56.90%
43 | 12,1832,Andrew Jackson,Dem.,219/286,76.57%,54.74%,17.81%,"702,735","228,628",Henry Clay,N. R.,57.00%
44 | 50,1984,Ronald Reagan,Rep.,525/538,97.58%,58.77%,18.21%,"54,455,472","16,878,120",Walter Mondale,Dem.,55.20%
45 | 30,1904,Theodore Roosevelt,Rep.,336/476,70.59%,56.42%,18.83%,"7,630,557","2,546,677",Alton Brooks Parker,Dem.,65.50%
46 | 45,1964,Lyndon Johnson,Dem.,486/538,90.33%,61.05%,22.58%,"43,127,041","15,951,287",Barry Goldwater,Rep.,62.80%
47 | 47,1972,Richard Nixon,Rep.,520/538,96.65%,60.67%,23.15%,"47,168,710","17,995,488",George McGovern,Dem.,56.20%
48 | 38,1936,Franklin Roosevelt,Dem.,523/531,98.49%,60.80%,24.26%,"27,752,648","11,070,786",Alf Landon,Rep.,61.00%
49 | 35,1924,Calvin Coolidge,Rep.,382/531,71.94%,54.04%,25.22%,"15,723,789","7,337,547",John Davis,Dem.,48.90%
50 | 34,1920,Warren Harding,Rep.,404/531,76.08%,60.32%,26.17%,"16,144,093","7,004,432",James Cox,Dem.,49.20%
--------------------------------------------------------------------------------
/data/historical_vote_messy.csv:
--------------------------------------------------------------------------------
1 | election_year,winner,runnerup
2 | 2016,Donald Trump/Rep,Hillary Clinton/Dem
3 | 2012,Barack Obama/Dem,Mitt Romney/Rep
4 | 2008,Barack Obama/Dem,John McCain/Rep
5 | 2004,George W Bush/Rep,John Kerry/Dem
6 | 2000,George W Bush/Rep,Al Gore/Dem
7 | 1996,Bill Clinton/Dem,Bob Dole/Rep
8 | 1992,Bill Clinton/Dem,George HW Bush/Rep
9 | 1988,George HW Bush/Rep,Michael Dukakis/Dem
10 | 1984,Ronald Reagan/Rep,Walter Mondale/Dem
11 | 1980,Ronald Reagan/Rep,Jimmy Carter/Dem
--------------------------------------------------------------------------------
/data/historical_vote_no_header.tsv:
--------------------------------------------------------------------------------
1 | 10 1824 John Quincy Adams D.-R. 84/261 32.18% 30.92% −10.44% 113,142 −38,221 Andrew Jackson D.-R. 26.9%
2 | 23 1876 Rutherford Hayes Rep. 185/369 50.14% 47.92% −3.00% 4,034,142 −252,666 Samuel Tilden Dem. 82.6%
3 | 58 2016 Donald Trump Rep. 304/538 56.50% 45.98% −2.10% 62,979,636 −2,864,974 Hillary Rodham Clinton Dem. 60.2%
4 | 26 1888 Benjamin Harrison Rep. 233/401 58.10% 47.80% −0.83% 5,443,633 −94,530 Grover Cleveland Dem. 80.5%
5 | 54 2000 George W. Bush Rep. 271/538 50.37% 47.87% −0.51% 50,460,110 −543,816 Al Gore Dem. 54.2%
6 | 24 1880 James Garfield Rep. 214/369 57.99% 48.31% 0.09% 4,453,337 1,898 Winfield Scott Hancock Dem. 80.5%
7 | 44 1960 John Kennedy Dem. 303/537 56.42% 49.72% 0.17% 34,220,984 112,827 Richard Nixon Rep. 63.8%
8 | 25 1884 Grover Cleveland Dem. 219/401 54.61% 48.85% 0.57% 4,914,482 57,579 James Blaine Rep. 78.2%
9 | 46 1968 Richard Nixon Rep. 301/538 55.95% 43.42% 0.70% 31,783,783 511,944 Hubert Humphrey Dem. 62.5%
10 | 15 1844 James Polk Dem. 170/275 61.82% 49.54% 1.45% 1,339,570 39,413 Henry Clay Whig 79.2%
11 | 48 1976 Jimmy Carter Dem. 297/538 55.20% 50.08% 2.06% 40,831,881 1,683,247 Gerald Ford Rep. 54.8%
12 | 55 2004 George W. Bush Rep. 286/538 53.16% 50.73% 2.46% 62,040,610 3,012,171 John Kerry Dem. 60.1%
13 | 27 1892 Grover Cleveland Dem. 277/444 62.39% 46.02% 3.01% 5,553,898 363,099 Benjamin Harrison Rep. 75.8%
14 | 33 1916 Woodrow Wilson Dem. 277/531 52.17% 49.24% 3.12% 9,126,868 578,140 Charles Evans Hughes Rep. 61.8%
15 | 57 2012 Barack Obama Dem. 332/538 61.71% 51.06% 3.86% 65,915,795 4,982,291 Mitt Romney Rep. 58.6%
16 | 28 1896 William McKinley Rep. 271/447 60.63% 51.02% 4.31% 7,112,138 601,331 William Jennings Bryan Dem. 79.6%
17 | 41 1948 Harry Truman Dem. 303/531 57.06% 49.55% 4.48% 24,179,347 2,188,055 Thomas Dewey Rep. 52.2%
18 | 16 1848 Zachary Taylor Whig 163/290 56.21% 47.28% 4.79% 1,360,235 137,882 Lewis Cass Dem. 72.8%
19 | 21 1868 Ulysses Grant Rep. 214/294 72.79% 52.66% 5.32% 3,013,790 304,810 Horatio Seymour Dem. 80.9%
20 | 52 1992 Bill Clinton Dem. 370/538 68.77% 43.01% 5.56% 44,909,806 5,805,256 George H. W. Bush Rep. 58.1%
21 | 14 1840 William Henry Harrison Whig 234/294 79.59% 52.87% 6.05% 1,275,583 145,938 Martin Van Buren Dem. 80.3%
22 | 29 1900 William McKinley Rep. 292/447 65.23% 51.64% 6.12% 7,228,864 857,932 William Jennings Bryan Dem. 73.7%
23 | 17 1852 Franklin Pierce Dem. 254/296 85.81% 50.83% 6.95% 1,605,943 219,525 Winfield Scott Whig 69.5%
24 | 56 2008 Barack Obama Dem. 365/538 67.84% 52.93% 7.27% 69,498,516 9,550,193 John McCain Rep. 61.6%
25 | 40 1944 Franklin Roosevelt Dem. 432/531 81.36% 53.39% 7.50% 25,612,916 3,594,987 Thomas Dewey Rep. 55.9%
26 | 51 1988 George H. W. Bush Rep. 426/538 79.18% 53.37% 7.72% 48,886,597 7,077,121 Michael Dukakis Dem. 52.8%
27 | 53 1996 Bill Clinton Dem. 379/538 70.45% 49.23% 8.51% 47,400,125 8,201,370 Bob Dole Rep. 51.7%
28 | 31 1908 William Taft Rep. 321/483 66.46% 51.57% 8.53% 7,678,335 1,269,356 William Jennings Bryan Dem. 65.7%
29 | 49 1980 Ronald Reagan Rep. 489/538 90.89% 50.75% 9.74% 43,903,230 8,423,115 Jimmy Carter Dem. 54.2%
30 | 39 1940 Franklin Roosevelt Dem. 449/531 84.56% 54.74% 9.96% 27,313,945 4,966,201 Wendell Willkie Rep. 62.4%
31 | 20 1864 Abraham Lincoln Rep. 212/233 90.99% 55.03% 10.08% 2,211,317 405,090 George McClellan Dem. 76.3%
32 | 19 1860 Abraham Lincoln Rep. 180/303 59.41% 39.65% 10.13% 1,855,993 474,049 John Breckinridge Dem. 81.8%
33 | 42 1952 Dwight Eisenhower Rep. 442/531 83.24% 55.18% 10.85% 34,075,529 6,700,439 Adlai Stevenson Dem. 62.3%
34 | 22 1872 Ulysses Grant Rep. 286/352 81.25% 55.58% 11.80% 3,597,439 763,729 Horace Greeley L. R. 72.1%
35 | 18 1856 James Buchanan Dem. 174/296 58.78% 45.29% 12.20% 1,835,140 494,472 John Frémont Rep. 79.4%
36 | 11 1828 Andrew Jackson Dem. 178/261 68.20% 55.93% 12.25% 642,806 140,839 John Quincy Adams N. R. 57.3%
37 | 13 1836 Martin Van Buren Dem. 170/294 57.82% 50.79% 14.20% 763,291 213,384 William Henry Harrison Whig 56.5%
38 | 32 1912 Woodrow Wilson Dem. 435/531 81.92% 41.84% 14.44% 6,296,284 2,173,563 Theodore Roosevelt Prog. 59.0%
39 | 43 1956 Dwight Eisenhower Rep. 457/531 86.06% 57.37% 15.40% 35,579,180 9,551,152 Adlai Stevenson Dem. 60.2%
40 | 36 1928 Herbert Hoover Rep. 444/531 83.62% 58.21% 17.41% 21,427,123 6,411,659 Al Smith Dem. 56.9%
41 | 37 1932 Franklin Roosevelt Dem. 472/531 88.89% 57.41% 17.76% 22,821,277 7,060,023 Herbert Hoover Rep. 56.9%
42 | 12 1832 Andrew Jackson Dem. 219/286 76.57% 54.74% 17.81% 702,735 228,628 Henry Clay N. R. 57.0%
43 | 50 1984 Ronald Reagan Rep. 525/538 97.58% 58.77% 18.21% 54,455,472 16,878,120 Walter Mondale Dem. 55.2%
44 | 30 1904 Theodore Roosevelt Rep. 336/476 70.59% 56.42% 18.83% 7,630,557 2,546,677 Alton Brooks Parker Dem. 65.5%
45 | 45 1964 Lyndon Johnson Dem. 486/538 90.33% 61.05% 22.58% 43,127,041 15,951,287 Barry Goldwater Rep. 62.8%
46 | 47 1972 Richard Nixon Rep. 520/538 96.65% 60.67% 23.15% 47,168,710 17,995,488 George McGovern Dem. 56.2%
47 | 38 1936 Franklin Roosevelt Dem. 523/531 98.49% 60.80% 24.26% 27,752,648 11,070,786 Alf Landon Rep. 61.0%
48 | 35 1924 Calvin Coolidge Rep. 382/531 71.94% 54.04% 25.22% 15,723,789 7,337,547 John Davis Dem. 48.9%
49 | 34 1920 Warren Harding Rep. 404/531 76.08% 60.32% 26.17% 16,144,093 7,004,432 James Cox Dem. 49.2%
50 |
--------------------------------------------------------------------------------
/data/historical_vote_wide.csv:
--------------------------------------------------------------------------------
1 | election_year,winner,runnerup
2 | 2016,Donald Trump,Hillary Clinton
3 | 2012,Barack Obama,Mitt Romney
4 | 2008,Barack Obama,John McCain
5 | 2004,George Bush,John Kerry
6 | 2000,George Bush,Al Gore
7 | 1996,Bill Clinton,Bob Dole
8 | 1992,Bill Clinton,George Bush
9 | 1988,George Bush,Michael Dukakis
10 | 1984,Ronald Reagan,Walter Mondale
11 | 1980,Ronald Reagan,Jimmy Carter
--------------------------------------------------------------------------------
/data/islands.csv:
--------------------------------------------------------------------------------
1 | landmass,size,landmass_type
2 | Africa,11506,Continent
3 | Antarctica,5500,Continent
4 | Asia,16988,Continent
5 | Australia,2968,Continent
6 | Axel Heiberg,16,Other
7 | Baffin,184,Other
8 | Banks,23,Other
9 | Borneo,280,Other
10 | Britain,84,Other
11 | Celebes,73,Other
12 | Celon,25,Other
13 | Cuba,43,Other
14 | Devon,21,Other
15 | Ellesmere,82,Other
16 | Europe,3745,Continent
17 | Greenland,840,Other
18 | Hainan,13,Other
19 | Hispaniola,30,Other
20 | Hokkaido,30,Other
21 | Honshu,89,Other
22 | Iceland,40,Other
23 | Ireland,33,Other
24 | Java,49,Other
25 | Kyushu,14,Other
26 | Luzon,42,Other
27 | Madagascar,227,Other
28 | Melville,16,Other
29 | Mindanao,36,Other
30 | Moluccas,29,Other
31 | New Britain,15,Other
32 | New Guinea,306,Other
33 | New Zealand (N),44,Other
34 | New Zealand (S),58,Other
35 | Newfoundland,43,Other
36 | North America,9390,Continent
37 | Novaya Zemlya,32,Other
38 | Prince of Wales,13,Other
39 | Sakhalin,29,Other
40 | South America,6795,Continent
41 | Southampton,16,Other
42 | Spitsbergen,15,Other
43 | Sumatra,183,Other
44 | Taiwan,14,Other
45 | Tasmania,26,Other
46 | Tierra del Fuego,19,Other
47 | Timor,13,Other
48 | Vancouver,12,Other
49 | Victoria,82,Other
50 |
--------------------------------------------------------------------------------
/data/marketing.csv:
--------------------------------------------------------------------------------
1 | loyalty,csat,cluster
2 | 7,1,1
3 | 7.5,1,1
4 | 8,2,1
5 | 7,2,1
6 | 8,3,1
7 | 1.5,1.75,3
8 | 1,3,3
9 | 0.5,4,3
10 | 2,4,3
11 | 7,6,2
12 | 6,6,2
13 | 7,7,2
14 | 6,7,2
15 | 5,7,2
16 | 9.5,8,2
17 | 7,8,2
18 | 8.3,9,2
19 | 4,8,2
20 | 2,3,3
21 |
--------------------------------------------------------------------------------
/data/mauna_loa_data.csv:
--------------------------------------------------------------------------------
1 | date_measured,ppm
2 | 1980-02-01,338.34
3 | 1980-03-01,340.01
4 | 1980-04-01,340.93
5 | 1980-05-01,341.48
6 | 1980-06-01,341.33
7 | 1980-07-01,339.4
8 | 1980-08-01,337.7
9 | 1980-09-01,336.19
10 | 1980-10-01,336.15
11 | 1980-11-01,337.27
12 | 1980-12-01,338.32
13 | 1981-01-01,339.29
14 | 1981-02-01,340.55
15 | 1981-03-01,341.61
16 | 1981-04-01,342.53
17 | 1981-05-01,343.03
18 | 1981-06-01,342.54
19 | 1981-07-01,340.78
20 | 1981-08-01,338.44
21 | 1981-09-01,336.95
22 | 1981-10-01,337.08
23 | 1981-11-01,338.58
24 | 1981-12-01,339.88
25 | 1982-01-01,340.96
26 | 1982-02-01,341.73
27 | 1982-03-01,342.81
28 | 1982-04-01,343.97
29 | 1982-05-01,344.63
30 | 1982-06-01,343.79
31 | 1982-07-01,342.32
32 | 1982-08-01,340.09
33 | 1982-09-01,338.28
34 | 1982-10-01,338.29
35 | 1982-11-01,339.6
36 | 1982-12-01,340.9
37 | 1983-01-01,341.68
38 | 1983-02-01,342.9
39 | 1983-03-01,343.33
40 | 1983-04-01,345.25
41 | 1983-05-01,346.03
42 | 1983-06-01,345.63
43 | 1983-07-01,344.19
44 | 1983-08-01,342.27
45 | 1983-09-01,340.35
46 | 1983-10-01,340.38
47 | 1983-11-01,341.59
48 | 1983-12-01,343.05
49 | 1984-01-01,344.1
50 | 1984-02-01,344.79
51 | 1984-03-01,345.52
52 | 1984-05-01,347.63
53 | 1984-06-01,346.98
54 | 1984-07-01,345.53
55 | 1984-08-01,343.55
56 | 1984-09-01,341.4
57 | 1984-10-01,341.67
58 | 1984-11-01,343.1
59 | 1984-12-01,344.7
60 | 1985-01-01,345.21
61 | 1985-02-01,346.16
62 | 1985-03-01,347.74
63 | 1985-04-01,348.34
64 | 1985-05-01,349.06
65 | 1985-06-01,348.38
66 | 1985-07-01,346.71
67 | 1985-08-01,345.02
68 | 1985-09-01,343.27
69 | 1985-10-01,343.13
70 | 1985-11-01,344.49
71 | 1985-12-01,345.88
72 | 1986-01-01,346.56
73 | 1986-02-01,347.28
74 | 1986-03-01,348.01
75 | 1986-04-01,349.77
76 | 1986-05-01,350.38
77 | 1986-06-01,349.93
78 | 1986-07-01,348.16
79 | 1986-08-01,346.08
80 | 1986-09-01,345.22
81 | 1986-10-01,344.51
82 | 1986-11-01,345.93
83 | 1986-12-01,347.22
84 | 1987-01-01,348.52
85 | 1987-02-01,348.73
86 | 1987-03-01,349.73
87 | 1987-04-01,351.31
88 | 1987-05-01,352.09
89 | 1987-06-01,351.53
90 | 1987-07-01,350.11
91 | 1987-08-01,348.08
92 | 1987-09-01,346.52
93 | 1987-10-01,346.59
94 | 1987-11-01,347.96
95 | 1987-12-01,349.16
96 | 1988-01-01,350.39
97 | 1988-02-01,351.64
98 | 1988-03-01,352.4
99 | 1988-04-01,353.69
100 | 1988-05-01,354.21
101 | 1988-06-01,353.72
102 | 1988-07-01,352.69
103 | 1988-08-01,350.4
104 | 1988-09-01,348.92
105 | 1988-10-01,349.13
106 | 1988-11-01,350.2
107 | 1988-12-01,351.41
108 | 1989-01-01,352.91
109 | 1989-02-01,353.27
110 | 1989-03-01,353.96
111 | 1989-04-01,355.64
112 | 1989-05-01,355.86
113 | 1989-06-01,355.37
114 | 1989-07-01,353.99
115 | 1989-08-01,351.81
116 | 1989-09-01,350.05
117 | 1989-10-01,350.25
118 | 1989-11-01,351.49
119 | 1989-12-01,352.85
120 | 1990-01-01,353.8
121 | 1990-02-01,355.04
122 | 1990-03-01,355.73
123 | 1990-04-01,356.32
124 | 1990-05-01,357.32
125 | 1990-06-01,356.34
126 | 1990-07-01,354.84
127 | 1990-08-01,353.01
128 | 1990-09-01,351.31
129 | 1990-10-01,351.62
130 | 1990-11-01,353.07
131 | 1990-12-01,354.33
132 | 1991-01-01,354.84
133 | 1991-02-01,355.73
134 | 1991-03-01,357.23
135 | 1991-04-01,358.66
136 | 1991-05-01,359.13
137 | 1991-06-01,358.13
138 | 1991-07-01,356.19
139 | 1991-08-01,353.85
140 | 1991-09-01,352.25
141 | 1991-10-01,352.35
142 | 1991-11-01,353.81
143 | 1991-12-01,355.12
144 | 1992-01-01,356.25
145 | 1992-02-01,357.11
146 | 1992-03-01,357.86
147 | 1992-04-01,359.09
148 | 1992-05-01,359.59
149 | 1992-06-01,359.33
150 | 1992-07-01,357.01
151 | 1992-08-01,354.94
152 | 1992-09-01,352.95
153 | 1992-10-01,353.32
154 | 1992-11-01,354.32
155 | 1992-12-01,355.57
156 | 1993-01-01,357
157 | 1993-02-01,357.31
158 | 1993-03-01,358.47
159 | 1993-04-01,359.27
160 | 1993-05-01,360.19
161 | 1993-06-01,359.52
162 | 1993-07-01,357.33
163 | 1993-08-01,355.64
164 | 1993-09-01,354.03
165 | 1993-10-01,354.12
166 | 1993-11-01,355.41
167 | 1993-12-01,356.91
168 | 1994-01-01,358.24
169 | 1994-02-01,358.92
170 | 1994-03-01,359.99
171 | 1994-04-01,361.23
172 | 1994-05-01,361.65
173 | 1994-06-01,360.81
174 | 1994-07-01,359.38
175 | 1994-08-01,357.46
176 | 1994-09-01,355.73
177 | 1994-10-01,356.08
178 | 1994-11-01,357.53
179 | 1994-12-01,358.98
180 | 1995-01-01,359.92
181 | 1995-02-01,360.86
182 | 1995-03-01,361.83
183 | 1995-04-01,363.3
184 | 1995-05-01,363.69
185 | 1995-06-01,363.19
186 | 1995-07-01,361.64
187 | 1995-08-01,359.12
188 | 1995-09-01,358.17
189 | 1995-10-01,357.99
190 | 1995-11-01,359.45
191 | 1995-12-01,360.68
192 | 1996-01-01,362.07
193 | 1996-02-01,363.24
194 | 1996-03-01,364.17
195 | 1996-04-01,364.57
196 | 1996-05-01,365.13
197 | 1996-06-01,364.92
198 | 1996-07-01,363.55
199 | 1996-08-01,361.38
200 | 1996-09-01,359.54
201 | 1996-10-01,359.58
202 | 1996-11-01,360.89
203 | 1996-12-01,362.24
204 | 1997-01-01,363.09
205 | 1997-02-01,364.03
206 | 1997-03-01,364.51
207 | 1997-04-01,366.35
208 | 1997-05-01,366.64
209 | 1997-06-01,365.59
210 | 1997-07-01,364.31
211 | 1997-08-01,362.25
212 | 1997-09-01,360.29
213 | 1997-10-01,360.82
214 | 1997-11-01,362.49
215 | 1997-12-01,364.38
216 | 1998-01-01,365.27
217 | 1998-02-01,365.98
218 | 1998-03-01,367.24
219 | 1998-04-01,368.66
220 | 1998-05-01,369.42
221 | 1998-06-01,368.99
222 | 1998-07-01,367.82
223 | 1998-08-01,365.95
224 | 1998-09-01,364.02
225 | 1998-10-01,364.4
226 | 1998-11-01,365.52
227 | 1998-12-01,367.13
228 | 1999-01-01,368.18
229 | 1999-02-01,369.07
230 | 1999-03-01,369.68
231 | 1999-04-01,370.99
232 | 1999-05-01,370.96
233 | 1999-06-01,370.3
234 | 1999-07-01,369.45
235 | 1999-08-01,366.9
236 | 1999-09-01,364.81
237 | 1999-10-01,365.37
238 | 1999-11-01,366.72
239 | 1999-12-01,368.1
240 | 2000-01-01,369.29
241 | 2000-02-01,369.55
242 | 2000-03-01,370.6
243 | 2000-04-01,371.82
244 | 2000-05-01,371.58
245 | 2000-06-01,371.7
246 | 2000-07-01,369.86
247 | 2000-08-01,368.13
248 | 2000-09-01,367
249 | 2000-10-01,367.03
250 | 2000-11-01,368.37
251 | 2000-12-01,369.67
252 | 2001-01-01,370.59
253 | 2001-02-01,371.51
254 | 2001-03-01,372.43
255 | 2001-04-01,373.37
256 | 2001-05-01,373.85
257 | 2001-06-01,373.22
258 | 2001-07-01,371.5
259 | 2001-08-01,369.61
260 | 2001-09-01,368.18
261 | 2001-10-01,368.45
262 | 2001-11-01,369.76
263 | 2001-12-01,371.24
264 | 2002-01-01,372.53
265 | 2002-02-01,373.2
266 | 2002-03-01,374.12
267 | 2002-04-01,375.02
268 | 2002-05-01,375.76
269 | 2002-06-01,375.52
270 | 2002-07-01,374.01
271 | 2002-08-01,371.85
272 | 2002-09-01,370.75
273 | 2002-10-01,370.55
274 | 2002-11-01,372.25
275 | 2002-12-01,373.79
276 | 2003-01-01,374.88
277 | 2003-02-01,375.64
278 | 2003-03-01,376.45
279 | 2003-04-01,377.73
280 | 2003-05-01,378.6
281 | 2003-06-01,378.28
282 | 2003-07-01,376.7
283 | 2003-08-01,374.38
284 | 2003-09-01,373.17
285 | 2003-10-01,373.15
286 | 2003-11-01,374.66
287 | 2003-12-01,375.99
288 | 2004-01-01,377
289 | 2004-02-01,377.87
290 | 2004-03-01,378.88
291 | 2004-04-01,380.35
292 | 2004-05-01,380.62
293 | 2004-06-01,379.69
294 | 2004-07-01,377.47
295 | 2004-08-01,376.01
296 | 2004-09-01,374.25
297 | 2004-10-01,374.46
298 | 2004-11-01,376.16
299 | 2004-12-01,377.51
300 | 2005-01-01,378.46
301 | 2005-02-01,379.73
302 | 2005-03-01,380.77
303 | 2005-04-01,382.29
304 | 2005-05-01,382.45
305 | 2005-06-01,382.21
306 | 2005-07-01,380.74
307 | 2005-08-01,378.74
308 | 2005-09-01,376.7
309 | 2005-10-01,377
310 | 2005-11-01,378.35
311 | 2005-12-01,380.11
312 | 2006-01-01,381.38
313 | 2006-02-01,382.2
314 | 2006-03-01,382.67
315 | 2006-04-01,384.61
316 | 2006-05-01,385.03
317 | 2006-06-01,384.05
318 | 2006-07-01,382.46
319 | 2006-08-01,380.41
320 | 2006-09-01,378.85
321 | 2006-10-01,379.13
322 | 2006-11-01,380.15
323 | 2006-12-01,381.82
324 | 2007-01-01,382.89
325 | 2007-02-01,383.9
326 | 2007-03-01,384.58
327 | 2007-04-01,386.5
328 | 2007-05-01,386.56
329 | 2007-06-01,386.1
330 | 2007-07-01,384.5
331 | 2007-08-01,381.99
332 | 2007-09-01,380.96
333 | 2007-10-01,381.12
334 | 2007-11-01,382.45
335 | 2007-12-01,383.94
336 | 2008-01-01,385.52
337 | 2008-02-01,385.82
338 | 2008-03-01,386.03
339 | 2008-04-01,387.21
340 | 2008-05-01,388.54
341 | 2008-06-01,387.76
342 | 2008-07-01,386.37
343 | 2008-08-01,384.09
344 | 2008-09-01,383.18
345 | 2008-10-01,382.99
346 | 2008-11-01,384.19
347 | 2008-12-01,385.56
348 | 2009-01-01,386.94
349 | 2009-02-01,387.48
350 | 2009-03-01,388.82
351 | 2009-04-01,389.55
352 | 2009-05-01,390.14
353 | 2009-06-01,389.48
354 | 2009-07-01,388.03
355 | 2009-08-01,386.11
356 | 2009-09-01,384.74
357 | 2009-10-01,384.43
358 | 2009-11-01,386.02
359 | 2009-12-01,387.42
360 | 2010-01-01,388.71
361 | 2010-02-01,390.2
362 | 2010-03-01,391.17
363 | 2010-04-01,392.46
364 | 2010-05-01,393
365 | 2010-06-01,392.15
366 | 2010-07-01,390.2
367 | 2010-08-01,388.35
368 | 2010-09-01,386.85
369 | 2010-10-01,387.24
370 | 2010-11-01,388.67
371 | 2010-12-01,389.79
372 | 2011-01-01,391.33
373 | 2011-02-01,391.86
374 | 2011-03-01,392.6
375 | 2011-04-01,393.25
376 | 2011-05-01,394.19
377 | 2011-06-01,393.74
378 | 2011-07-01,392.51
379 | 2011-08-01,390.13
380 | 2011-09-01,389.08
381 | 2011-10-01,389
382 | 2011-11-01,390.28
383 | 2011-12-01,391.86
384 | 2012-01-01,393.12
385 | 2012-02-01,393.86
386 | 2012-03-01,394.4
387 | 2012-04-01,396.18
388 | 2012-05-01,396.74
389 | 2012-06-01,395.71
390 | 2012-07-01,394.36
391 | 2012-08-01,392.39
392 | 2012-09-01,391.11
393 | 2012-10-01,391.05
394 | 2012-11-01,392.98
395 | 2012-12-01,394.34
396 | 2013-01-01,395.55
397 | 2013-02-01,396.8
398 | 2013-03-01,397.43
399 | 2013-04-01,398.41
400 | 2013-05-01,399.78
401 | 2013-06-01,398.6
402 | 2013-07-01,397.32
403 | 2013-08-01,395.2
404 | 2013-09-01,393.45
405 | 2013-10-01,393.7
406 | 2013-11-01,395.16
407 | 2013-12-01,396.84
408 | 2014-01-01,397.85
409 | 2014-02-01,398.01
410 | 2014-03-01,399.77
411 | 2014-04-01,401.38
412 | 2014-05-01,401.78
413 | 2014-06-01,401.25
414 | 2014-07-01,399.1
415 | 2014-08-01,397.03
416 | 2014-09-01,395.38
417 | 2014-10-01,396.03
418 | 2014-11-01,397.28
419 | 2014-12-01,398.91
420 | 2015-01-01,399.98
421 | 2015-02-01,400.28
422 | 2015-03-01,401.54
423 | 2015-04-01,403.28
424 | 2015-05-01,403.96
425 | 2015-06-01,402.8
426 | 2015-07-01,401.31
427 | 2015-08-01,398.93
428 | 2015-09-01,397.63
429 | 2015-10-01,398.29
430 | 2015-11-01,400.16
431 | 2015-12-01,401.85
432 | 2016-01-01,402.56
433 | 2016-02-01,404.12
434 | 2016-03-01,404.87
435 | 2016-04-01,407.45
436 | 2016-05-01,407.72
437 | 2016-06-01,406.83
438 | 2016-07-01,404.41
439 | 2016-08-01,402.27
440 | 2016-09-01,401.05
441 | 2016-10-01,401.59
442 | 2016-11-01,403.55
443 | 2016-12-01,404.45
444 | 2017-01-01,406.17
445 | 2017-02-01,406.46
446 | 2017-03-01,407.22
447 | 2017-04-01,409.04
448 | 2017-05-01,409.69
449 | 2017-06-01,408.88
450 | 2017-07-01,407.12
451 | 2017-08-01,405.13
452 | 2017-09-01,403.37
453 | 2017-10-01,403.63
454 | 2017-11-01,405.12
455 | 2017-12-01,406.81
456 | 2018-01-01,407.96
457 | 2018-02-01,408.32
458 | 2018-03-01,409.41
459 | 2018-04-01,410.24
460 | 2018-05-01,411.24
461 | 2018-06-01,410.79
462 | 2018-07-01,408.71
463 | 2018-08-01,406.99
464 | 2018-09-01,405.51
465 | 2018-10-01,406
466 | 2018-11-01,408.02
467 | 2018-12-01,409.07
468 | 2019-01-01,410.83
469 | 2019-02-01,411.75
470 | 2019-03-01,411.97
471 | 2019-04-01,413.33
472 | 2019-05-01,414.64
473 | 2019-06-01,413.93
474 | 2019-07-01,411.74
475 | 2019-08-01,409.95
476 | 2019-09-01,408.54
477 | 2019-10-01,408.52
478 | 2019-11-01,410.25
479 | 2019-12-01,411.76
480 | 2020-01-01,413.39
481 | 2020-02-01,414.11
482 | 2020-03-01,414.51
483 | 2020-04-01,416.21
484 | 2020-05-01,417.07
485 | 2020-06-01,416.39
486 |
--------------------------------------------------------------------------------
/data/michelson.csv:
--------------------------------------------------------------------------------
1 | Expt,Run,Speed
2 | 1,1,850
3 | 1,2,740
4 | 1,3,900
5 | 1,4,1070
6 | 1,5,930
7 | 1,6,850
8 | 1,7,950
9 | 1,8,980
10 | 1,9,980
11 | 1,10,880
12 | 1,11,1000
13 | 1,12,980
14 | 1,13,930
15 | 1,14,650
16 | 1,15,760
17 | 1,16,810
18 | 1,17,1000
19 | 1,18,1000
20 | 1,19,960
21 | 1,20,960
22 | 2,1,960
23 | 2,2,940
24 | 2,3,960
25 | 2,4,940
26 | 2,5,880
27 | 2,6,800
28 | 2,7,850
29 | 2,8,880
30 | 2,9,900
31 | 2,10,840
32 | 2,11,830
33 | 2,12,790
34 | 2,13,810
35 | 2,14,880
36 | 2,15,880
37 | 2,16,830
38 | 2,17,800
39 | 2,18,790
40 | 2,19,760
41 | 2,20,800
42 | 3,1,880
43 | 3,2,880
44 | 3,3,880
45 | 3,4,860
46 | 3,5,720
47 | 3,6,720
48 | 3,7,620
49 | 3,8,860
50 | 3,9,970
51 | 3,10,950
52 | 3,11,880
53 | 3,12,910
54 | 3,13,850
55 | 3,14,870
56 | 3,15,840
57 | 3,16,840
58 | 3,17,850
59 | 3,18,840
60 | 3,19,840
61 | 3,20,840
62 | 4,1,890
63 | 4,2,810
64 | 4,3,810
65 | 4,4,820
66 | 4,5,800
67 | 4,6,770
68 | 4,7,760
69 | 4,8,740
70 | 4,9,750
71 | 4,10,760
72 | 4,11,910
73 | 4,12,920
74 | 4,13,890
75 | 4,14,860
76 | 4,15,880
77 | 4,16,720
78 | 4,17,840
79 | 4,18,850
80 | 4,19,850
81 | 4,20,780
82 | 5,1,890
83 | 5,2,840
84 | 5,3,780
85 | 5,4,810
86 | 5,5,760
87 | 5,6,810
88 | 5,7,790
89 | 5,8,810
90 | 5,9,820
91 | 5,10,850
92 | 5,11,870
93 | 5,12,870
94 | 5,13,810
95 | 5,14,740
96 | 5,15,810
97 | 5,16,940
98 | 5,17,950
99 | 5,18,800
100 | 5,19,810
101 | 5,20,870
102 |
--------------------------------------------------------------------------------
/data/mtcars.csv:
--------------------------------------------------------------------------------
1 | mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
2 | 21.0,6.0,160.0,110.0,3.9,2.62,16.46,0.0,1.0,4.0,4.0
3 | 21.0,6.0,160.0,110.0,3.9,2.875,17.02,0.0,1.0,4.0,4.0
4 | 22.8,4.0,108.0,93.0,3.85,2.32,18.61,1.0,1.0,4.0,1.0
5 | 21.4,6.0,258.0,110.0,3.08,3.215,19.44,1.0,0.0,3.0,1.0
6 | 18.7,8.0,360.0,175.0,3.15,3.44,17.02,0.0,0.0,3.0,2.0
7 | 18.1,6.0,225.0,105.0,2.76,3.46,20.22,1.0,0.0,3.0,1.0
8 | 14.3,8.0,360.0,245.0,3.21,3.57,15.84,0.0,0.0,3.0,4.0
9 | 24.4,4.0,146.7,62.0,3.69,3.19,20.0,1.0,0.0,4.0,2.0
10 | 22.8,4.0,140.8,95.0,3.92,3.15,22.9,1.0,0.0,4.0,2.0
11 | 19.2,6.0,167.6,123.0,3.92,3.44,18.3,1.0,0.0,4.0,4.0
12 | 17.8,6.0,167.6,123.0,3.92,3.44,18.9,1.0,0.0,4.0,4.0
13 | 16.4,8.0,275.8,180.0,3.07,4.07,17.4,0.0,0.0,3.0,3.0
14 | 17.3,8.0,275.8,180.0,3.07,3.73,17.6,0.0,0.0,3.0,3.0
15 | 15.2,8.0,275.8,180.0,3.07,3.78,18.0,0.0,0.0,3.0,3.0
16 | 10.4,8.0,472.0,205.0,2.93,5.25,17.98,0.0,0.0,3.0,4.0
17 | 10.4,8.0,460.0,215.0,3.0,5.424,17.82,0.0,0.0,3.0,4.0
18 | 14.7,8.0,440.0,230.0,3.23,5.345,17.42,0.0,0.0,3.0,4.0
19 | 32.4,4.0,78.7,66.0,4.08,2.2,19.47,1.0,1.0,4.0,1.0
20 | 30.4,4.0,75.7,52.0,4.93,1.615,18.52,1.0,1.0,4.0,2.0
21 | 33.9,4.0,71.1,65.0,4.22,1.835,19.9,1.0,1.0,4.0,1.0
22 | 21.5,4.0,120.1,97.0,3.7,2.465,20.01,1.0,0.0,3.0,1.0
23 | 15.5,8.0,318.0,150.0,2.76,3.52,16.87,0.0,0.0,3.0,2.0
24 | 15.2,8.0,304.0,150.0,3.15,3.435,17.3,0.0,0.0,3.0,2.0
25 | 13.3,8.0,350.0,245.0,3.73,3.84,15.41,0.0,0.0,3.0,4.0
26 | 19.2,8.0,400.0,175.0,3.08,3.845,17.05,0.0,0.0,3.0,2.0
27 | 27.3,4.0,79.0,66.0,4.08,1.935,18.9,1.0,1.0,4.0,1.0
28 | 26.0,4.0,120.3,91.0,4.43,2.14,16.7,0.0,1.0,5.0,2.0
29 | 30.4,4.0,95.1,113.0,3.77,1.513,16.9,1.0,1.0,5.0,2.0
30 | 15.8,8.0,351.0,264.0,4.22,3.17,14.5,0.0,1.0,5.0,4.0
31 | 19.7,6.0,145.0,175.0,3.62,2.77,15.5,0.0,1.0,5.0,6.0
32 | 15.0,8.0,301.0,335.0,3.54,3.57,14.6,0.0,1.0,5.0,8.0
33 | 21.4,4.0,121.0,109.0,4.11,2.78,18.6,1.0,1.0,4.0,2.0
34 |
--------------------------------------------------------------------------------
/data/penguins.csv:
--------------------------------------------------------------------------------
1 | bill_length_mm,flipper_length_mm
2 | 39.2,196
3 | 36.5,182
4 | 34.5,187
5 | 36.7,187
6 | 38.1,181
7 | 39.2,190
8 | 36,195
9 | 37.8,193
10 | 46.5,213
11 | 46.1,215
12 | 47.8,215
13 | 45,220
14 | 49.1,212
15 | 43.3,208
16 | 46,195
17 | 46.7,195
18 | 52.2,197
19 | 46.8,189
20 |
--------------------------------------------------------------------------------
/data/penguins_all_vars.csv:
--------------------------------------------------------------------------------
1 | island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex,year,cluster
2 | Dream,39.2,21.1,196,4150,male,2007,1
3 | Dream,36.5,18,182,3150,female,2007,1
4 | Biscoe,34.5,18.1,187,2900,female,2008,1
5 | Torgersen,36.7,18.8,187,3800,female,2008,1
6 | Biscoe,38.1,17,181,3175,female,2009,1
7 | Dream,39.2,18.6,190,4250,male,2009,1
8 | Dream,36,17.8,195,3450,female,2009,1
9 | Dream,37.8,18.1,193,3750,male,2009,1
10 | Biscoe,46.5,14.5,213,4400,female,2007,3
11 | Biscoe,46.1,15.1,215,5100,male,2007,3
12 | Biscoe,47.8,15,215,5650,male,2007,3
13 | Biscoe,45,15.4,220,5050,male,2008,3
14 | Biscoe,49.1,14.5,212,4625,female,2009,3
15 | Biscoe,43.3,14,208,4575,female,2009,3
16 | Dream,46,18.9,195,4150,female,2007,2
17 | Dream,46.7,17.9,195,3300,female,2007,2
18 | Dream,52.2,18.8,197,3450,male,2009,2
19 | Dream,46.8,16.5,189,3650,female,2009,2
20 |
--------------------------------------------------------------------------------
/data/region_data.csv:
--------------------------------------------------------------------------------
1 | region,households,area,population,dwellings
2 | Belleville,43002,1354.65121,103472,45050
3 | Lethbridge,45696,3046.69699,117394,48317
4 | Thunder Bay,52545,2618.26318,121621,57146
5 | Peterborough,50533,1636.98336,121721,55662
6 | Saint John,52872,3793.42158,126202,58398
7 | Brantford,52530,1086.27106,134203,54419
8 | Moncton,61769,2625.1211,144810,66699
9 | Guelph,59280,604.00365,151984,63324
10 | Trois-Rivières,72502,1052.80206,156042,77734
11 | Saguenay,72479,3078.79919,160980,77968
12 | Kingston,67915,2142.32855,161175,77173
13 | Greater Sudbury,70445,4372.1229,164689,76619
14 | Abbotsford - Mission,62631,651.99511,180518,65967
15 | Kelowna,81383,3144.90019,194882,88374
16 | Barrie,72534,967.67675,197059,76336
17 | St. John's,85015,850.46041,205955,92353
18 | Sherbrooke,95577,1506.36002,212105,106082
19 | Regina,94955,4408.86418,236481,101719
20 | Saskatoon,115283,6218.50503,295095,124766
21 | Windsor,132912,1032.38176,329144,140408
22 | Victoria,162716,704.4339,367770,172559
23 | Oshawa,138962,908.06142,379848,142462
24 | Halifax,173459,5963.13705,403390,187478
25 | St. Catharines - Niagara,168485,1425.34399,406074,180606
26 | London,206448,2677.86088,494069,220452
27 | Kitchener - Cambridge - Waterloo,200495,1106.65072,523894,210896
28 | Hamilton,293345,1404.6567,747545,306034
29 | Winnipeg,306550,5410.82907,778489,321484
30 | Québec,361891,3475.38576,800296,382308
31 | Edmonton,502143,9857.77908,1321426,537634
32 | Ottawa - Gatineau,535499,7168.96442,1323783,571146
33 | Calgary,519693,5241.70103,1392609,544870
34 | Vancouver,960894,3040.41532,2463431,1027613
35 | Montréal,1727310,4638.24059,4098927,1823281
36 | Toronto,2135909,6269.93132,5928040,2235145
37 |
--------------------------------------------------------------------------------
/data/state_property_vote.csv:
--------------------------------------------------------------------------------
1 | state,pop,med_prop_val,med_income,avg_commute,party
2 | Montana,1042520,217200,46608,16.35,Republican
3 | Alabama,4863300,136200,42917,23.78,Republican
4 | Arizona,6931071,205900,50036,23.69,Republican
5 | Arkansas,2988248,123300,41335,20.49,Republican
6 | California,39250017,477500,61927,27.67,Democratic
7 | Colorado,5540545,314200,61324,23.02,Democratic
8 | Connecticut,3576452,274600,70007,24.92,Democratic
9 | Delaware,952065,243400,59853,24.97,Democratic
10 | District of Columbia,681170,576100,75506,28.96,Democratic
11 | Florida,20612439,197700,47439,25.8,Republican
12 | Georgia,10310371,166800,49240,26.91,Republican
13 | Hawaii,1428557,592000,69549,26.03,Democratic
14 | Idaho,1683140,189400,47572,19.71,Republican
15 | Illinois,12801539,186500,57458,27.49,Democratic
16 | Indiana,6633053,134800,49384,22.66,Republican
17 | Iowa,3134693,142300,53816,18.11,Republican
18 | Kansas,2907289,144900,52392,18.52,Republican
19 | Kentucky,4436974,135600,42914,22.4,Republican
20 | Louisiana,4681666,158000,44680,24.24,Republican
21 | Maine,1331479,184700,49381,22.52,Democratic
22 | Maryland,6016447,306900,73851,31.26,Democratic
23 | Massachusetts,6811779,366900,69200,28.05,Democratic
24 | Michigan,9928300,147100,49755,23.49,Republican
25 | Minnesota,5519952,211800,61473,22.08,Democratic
26 | Mississippi,2988726,113900,39738,23.77,Republican
27 | Missouri,6093000,151400,48288,22.45,Republican
28 | Alaska,741894,267800,70898,17.03,Republican
29 | Nebraska,1907116,148100,52803,17.78,Republican
30 | Nevada,2940058,239500,51487,23.07,Democratic
31 | New Hampshire,1334795,251100,66469,25.25,Democratic
32 | New Jersey,8944469,328200,71968,30.28,Democratic
33 | New Mexico,2081015,167500,44905,20.81,Democratic
34 | New York,19745289,302400,58771,31.95,Democratic
35 | North Carolina,10146788,165400,46596,23.07,Republican
36 | North Dakota,757953,184100,60227,16.5,Republican
37 | Ohio,11614373,140100,49349,22.43,Republican
38 | Oklahoma,3923561,132200,47524,20.47,Republican
39 | Oregon,4093465,287100,51088,22.29,Democratic
40 | Pennsylvania,12784227,174100,53224,25.61,Republican
41 | Rhode Island,1056426,247700,54797,23.78,Democratic
42 | South Carolina,4961119,153900,45337,23.67,Republican
43 | South Dakota,865454,160700,51059,15.73,Republican
44 | Tennessee,6651194,157700,44357,23.9,Republican
45 | Texas,27862596,161500,53067,25.2,Republican
46 | Utah,3051217,250300,60943,20.31,Republican
47 | Vermont,624594,223700,54153,21.49,Democratic
48 | Virginia,8411808,264000,64923,27.03,Democratic
49 | Washington,7288000,306400,61358,26.21,Democratic
50 | West Virginia,1831102,117900,41030,24.36,Republican
51 | Wisconsin,5778709,173200,52632,20.89,Republican
52 | Wyoming,585501,209500,58291,15.94,Republican
53 | Puerto Rico,3411307,111900,20078,28.36,Not Applicable
--------------------------------------------------------------------------------
/data/state_property_vote.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/data/state_property_vote.db
--------------------------------------------------------------------------------
/data/state_property_vote.tsv:
--------------------------------------------------------------------------------
1 | Montana 1042520 217200 46608 16.35 Republican
2 | Alabama 4863300 136200 42917 23.78 Republican
3 | Arizona 6931071 205900 50036 23.69 Republican
4 | Arkansas 2988248 123300 41335 20.49 Republican
5 | California 39250017 477500 61927 27.67 Democratic
6 | Colorado 5540545 314200 61324 23.02 Democratic
7 | Connecticut 3576452 274600 70007 24.92 Democratic
8 | Delaware 952065 243400 59853 24.97 Democratic
9 | District of Columbia 681170 576100 75506 28.96 Democratic
10 | Florida 20612439 197700 47439 25.8 Republican
11 | Georgia 10310371 166800 49240 26.91 Republican
12 | Hawaii 1428557 592000 69549 26.03 Democratic
13 | Idaho 1683140 189400 47572 19.71 Republican
14 | Illinois 12801539 186500 57458 27.49 Democratic
15 | Indiana 6633053 134800 49384 22.66 Republican
16 | Iowa 3134693 142300 53816 18.11 Republican
17 | Kansas 2907289 144900 52392 18.52 Republican
18 | Kentucky 4436974 135600 42914 22.4 Republican
19 | Louisiana 4681666 158000 44680 24.24 Republican
20 | Maine 1331479 184700 49381 22.52 Democratic
21 | Maryland 6016447 306900 73851 31.26 Democratic
22 | Massachusetts 6811779 366900 69200 28.05 Democratic
23 | Michigan 9928300 147100 49755 23.49 Republican
24 | Minnesota 5519952 211800 61473 22.08 Democratic
25 | Mississippi 2988726 113900 39738 23.77 Republican
26 | Missouri 6093000 151400 48288 22.45 Republican
27 | Alaska 741894 267800 70898 17.03 Republican
28 | Nebraska 1907116 148100 52803 17.78 Republican
29 | Nevada 2940058 239500 51487 23.07 Democratic
30 | New Hampshire 1334795 251100 66469 25.25 Democratic
31 | New Jersey 8944469 328200 71968 30.28 Democratic
32 | New Mexico 2081015 167500 44905 20.81 Democratic
33 | New York 19745289 302400 58771 31.95 Democratic
34 | North Carolina 10146788 165400 46596 23.07 Republican
35 | North Dakota 757953 184100 60227 16.5 Republican
36 | Ohio 11614373 140100 49349 22.43 Republican
37 | Oklahoma 3923561 132200 47524 20.47 Republican
38 | Oregon 4093465 287100 51088 22.29 Democratic
39 | Pennsylvania 12784227 174100 53224 25.61 Republican
40 | Rhode Island 1056426 247700 54797 23.78 Democratic
41 | South Carolina 4961119 153900 45337 23.67 Republican
42 | South Dakota 865454 160700 51059 15.73 Republican
43 | Tennessee 6651194 157700 44357 23.9 Republican
44 | Texas 27862596 161500 53067 25.2 Republican
45 | Utah 3051217 250300 60943 20.31 Republican
46 | Vermont 624594 223700 54153 21.49 Democratic
47 | Virginia 8411808 264000 64923 27.03 Democratic
48 | Washington 7288000 306400 61358 26.21 Democratic
49 | West Virginia 1831102 117900 41030 24.36 Republican
50 | Wisconsin 5778709 173200 52632 20.89 Republican
51 | Wyoming 585501 209500 58291 15.94 Republican
52 | Puerto Rico 3411307 111900 20078 28.36 Not Applicable
53 |
--------------------------------------------------------------------------------
/data/state_property_vote.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/data/state_property_vote.xlsx
--------------------------------------------------------------------------------
/data/state_property_vote_meta-data.csv:
--------------------------------------------------------------------------------
1 | Data source: https://datausa.io/
2 | Record of how data was collected: https://github.com/UBC-DSCI/introduction-to-datascience/blob/master/data/retrieve_data.ipynb
3 | Date collected: 2020-07-08
4 | state,pop,med_prop_val,med_income,avg_commute,party
5 | Montana,1042520,217200,46608,16.35,Republican
6 | Alabama,4863300,136200,42917,23.78,Republican
7 | Arizona,6931071,205900,50036,23.69,Republican
8 | Arkansas,2988248,123300,41335,20.49,Republican
9 | California,39250017,477500,61927,27.67,Democratic
10 | Colorado,5540545,314200,61324,23.02,Democratic
11 | Connecticut,3576452,274600,70007,24.92,Democratic
12 | Delaware,952065,243400,59853,24.97,Democratic
13 | District of Columbia,681170,576100,75506,28.96,Democratic
14 | Florida,20612439,197700,47439,25.8,Republican
15 | Georgia,10310371,166800,49240,26.91,Republican
16 | Hawaii,1428557,592000,69549,26.03,Democratic
17 | Idaho,1683140,189400,47572,19.71,Republican
18 | Illinois,12801539,186500,57458,27.49,Democratic
19 | Indiana,6633053,134800,49384,22.66,Republican
20 | Iowa,3134693,142300,53816,18.11,Republican
21 | Kansas,2907289,144900,52392,18.52,Republican
22 | Kentucky,4436974,135600,42914,22.4,Republican
23 | Louisiana,4681666,158000,44680,24.24,Republican
24 | Maine,1331479,184700,49381,22.52,Democratic
25 | Maryland,6016447,306900,73851,31.26,Democratic
26 | Massachusetts,6811779,366900,69200,28.05,Democratic
27 | Michigan,9928300,147100,49755,23.49,Republican
28 | Minnesota,5519952,211800,61473,22.08,Democratic
29 | Mississippi,2988726,113900,39738,23.77,Republican
30 | Missouri,6093000,151400,48288,22.45,Republican
31 | Alaska,741894,267800,70898,17.03,Republican
32 | Nebraska,1907116,148100,52803,17.78,Republican
33 | Nevada,2940058,239500,51487,23.07,Democratic
34 | New Hampshire,1334795,251100,66469,25.25,Democratic
35 | New Jersey,8944469,328200,71968,30.28,Democratic
36 | New Mexico,2081015,167500,44905,20.81,Democratic
37 | New York,19745289,302400,58771,31.95,Democratic
38 | North Carolina,10146788,165400,46596,23.07,Republican
39 | North Dakota,757953,184100,60227,16.5,Republican
40 | Ohio,11614373,140100,49349,22.43,Republican
41 | Oklahoma,3923561,132200,47524,20.47,Republican
42 | Oregon,4093465,287100,51088,22.29,Democratic
43 | Pennsylvania,12784227,174100,53224,25.61,Republican
44 | Rhode Island,1056426,247700,54797,23.78,Democratic
45 | South Carolina,4961119,153900,45337,23.67,Republican
46 | South Dakota,865454,160700,51059,15.73,Republican
47 | Tennessee,6651194,157700,44357,23.9,Republican
48 | Texas,27862596,161500,53067,25.2,Republican
49 | Utah,3051217,250300,60943,20.31,Republican
50 | Vermont,624594,223700,54153,21.49,Democratic
51 | Virginia,8411808,264000,64923,27.03,Democratic
52 | Washington,7288000,306400,61358,26.21,Democratic
53 | West Virginia,1831102,117900,41030,24.36,Republican
54 | Wisconsin,5778709,173200,52632,20.89,Republican
55 | Wyoming,585501,209500,58291,15.94,Republican
56 | Puerto Rico,3411307,111900,20078,28.36,Not Applicable
57 |
--------------------------------------------------------------------------------
/data/us_vote.csv:
--------------------------------------------------------------------------------
1 | election_num,election_year,winner,winner_party,elec_coll_votes_count,elec_coll_votes_tot,elec_coll_votes_perc,pop_votes_perc,pop_votes_perc_marg,pop_votes_count,pop_votes_count_marg,runnerup,runnerup_party,turnout
2 | 1,1788,George Washington,Ind.,69,69,100.00,100.00,100.00,43782,43782,No candidate,None,11.6
3 | 2,1792,George Washington,Ind.,132,132,100.00,100.00,100.00,28579,28579,No candidate,None,6.3
4 | 3,1796,John Adams,Fed.,71,138,51.45,53.45,6.90,35726,4611,Thomas Jefferson,D.-R.,20.1
5 | 4,1800,Thomas Jefferson,D.-R.,73,138,52.90,61.43,22.86,41330,15378,John Adams,Fed.,32.3
6 | 5,1804,Thomas Jefferson,D.-R.,162,176,92.05,72.79,45.58,104110,65191,Charles C. Pinckney,Fed.,23.8
7 | 6,1808,James Madison,D.-R.,122,175,69.72,64.73,32.33,124732,62301,Charles C. Pinckney,Fed.,36.8
8 | 7,1812,James Madison,D.-R.,128,217,58.99,50.37,2.74,140431,7650,DeWitt Clinton,D.-R.,40.4
9 | 8,1816,James Monroe,D.-R.,183,217,84.33,68.16,37.24,76592,41852,Rufus King,Fed.,23.5
10 | 9,1820,James Monroe,D.-R.,231,232,99.57,80.61,64.69,87343,69878,No candidate,Fed.,10.1
11 | 10,1824,John Quincy Adams,D.-R.,84,261,32.18,30.92,−10.44,113142,−38221,Andrew Jackson,D.-R.,26.9
12 | 11,1828,Andrew Jackson,Dem.,178,261,68.20,55.93,12.25,642806,140839,John Quincy Adams,N. R.,57.3
13 | 12,1832,Andrew Jackson,Dem.,219,286,76.57,54.74,17.81,702735,228628,Henry Clay,N. R.,57.0
14 | 13,1836,Martin Van Buren,Dem.,170,294,57.82,50.79,14.20,763291,213384,William Henry Harrison,Whig,56.5
15 | 14,1840,William Henry Harrison,Whig,234,294,79.59,52.87,6.05,1275583,145938,Martin Van Buren,Dem.,80.3
16 | 15,1844,James Polk,Dem.,170,275,61.82,49.54,1.45,1339570,39413,Henry Clay,Whig,79.2
17 | 16,1848,Zachary Taylor,Whig,163,290,56.21,47.28,4.79,1360235,137882,Lewis Cass,Dem.,72.8
18 | 17,1852,Franklin Pierce,Dem.,254,296,85.81,50.83,6.95,1605943,219525,Winfield Scott,Whig,69.5
19 | 18,1856,James Buchanan,Dem.,174,296,58.78,45.29,12.20,1835140,494472,John Frémont,Rep.,79.4
20 | 19,1860,Abraham Lincoln,Rep.,180,303,59.41,39.65,10.13,1855993,474049,John Breckinridge,Dem.,81.8
21 | 20,1864,Abraham Lincoln,Rep.,212,233,90.99,55.03,10.08,2211317,405090,George McClellan,Dem.,76.3
22 | 21,1868,Ulysses Grant,Rep.,214,294,72.79,52.66,5.32,3013790,304810,Horatio Seymour,Dem.,80.9
23 | 22,1872,Ulysses Grant,Rep.,286,352,81.25,55.58,11.80,3597439,763729,Horace Greeley,L. R.,72.1
24 | 23,1876,Rutherford Hayes,Rep.,185,369,50.14,47.92,−3.00,4034142,−252666,Samuel Tilden,Dem.,82.6
25 | 24,1880,James Garfield,Rep.,214,369,57.99,48.31,0.09,4453337,1898,Winfield Scott Hancock,Dem.,80.5
26 | 25,1884,Grover Cleveland,Dem.,219,401,54.61,48.85,0.57,4914482,57579,James Blaine,Rep.,78.2
27 | 26,1888,Benjamin Harrison,Rep.,233,401,58.10,47.80,−0.83,5443892,−90596,Grover Cleveland,Dem.,80.5
28 | 27,1892,Grover Cleveland,Dem.,277,444,62.39,46.02,3.01,5553898,363099,Benjamin Harrison,Rep.,75.8
29 | 28,1896,William McKinley,Rep.,271,447,60.63,51.02,4.31,7112138,601331,William Jennings Bryan,Dem.,79.6
30 | 29,1900,William McKinley,Rep.,292,447,65.23,51.64,6.12,7228864,857932,William Jennings Bryan,Dem.,73.7
31 | 30,1904,Theodore Roosevelt,Rep.,336,476,70.59,56.42,18.83,7630557,2546677,Alton Brooks Parker,Dem.,65.5
32 | 31,1908,William Taft,Rep.,321,483,66.46,51.57,8.53,7678335,1269356,William Jennings Bryan,Dem.,65.7
33 | 32,1912,Woodrow Wilson,Dem.,435,531,81.92,41.84,14.44,6296284,2173563,Theodore Roosevelt,Prog.,59.0
34 | 33,1916,Woodrow Wilson,Dem.,277,531,52.17,49.24,3.12,9126868,578140,Charles Evans Hughes,Rep.,61.8
35 | 34,1920,Warren Harding,Rep.,404,531,76.08,60.32,26.17,16144093,7004432,James Cox,Dem.,49.2
36 | 35,1924,Calvin Coolidge,Rep.,382,531,71.94,54.04,25.22,15723789,7337547,John Davis,Dem.,48.9
37 | 36,1928,Herbert Hoover,Rep.,444,531,83.62,58.21,17.41,21427123,6411659,Al Smith,Dem.,56.9
38 | 37,1932,Franklin Roosevelt,Dem.,472,531,88.89,57.41,17.76,22821277,7060023,Herbert Hoover,Rep.,56.9
39 | 38,1936,Franklin Roosevelt,Dem.,523,531,98.49,60.80,24.26,27752648,11070786,Alf Landon,Rep.,61.0
40 | 39,1940,Franklin Roosevelt,Dem.,449,531,84.56,54.74,9.96,27313945,4966201,Wendell Willkie,Rep.,62.4
41 | 40,1944,Franklin Roosevelt,Dem.,432,531,81.36,53.39,7.50,25612916,3594987,Thomas Dewey,Rep.,55.9
42 | 41,1948,Harry Truman,Dem.,303,531,57.06,49.55,4.48,24179347,2188055,Thomas Dewey,Rep.,52.2
43 | 42,1952,Dwight Eisenhower,Rep.,442,531,83.24,55.18,10.85,34075529,6700439,Adlai Stevenson,Dem.,62.3
44 | 43,1956,Dwight Eisenhower,Rep.,457,531,86.06,57.37,15.40,35579180,9551152,Adlai Stevenson,Dem.,60.2
45 | 44,1960,John F. Kennedy,Dem.,303,537,56.42,49.72,0.17,34220984,112827,Richard Nixon,Rep.,63.8
46 | 45,1964,Lyndon Johnson,Dem.,486,538,90.33,61.05,22.58,43127041,15951287,Barry Goldwater,Rep.,62.8
47 | 46,1968,Richard Nixon,Rep.,301,538,55.95,43.42,0.70,31783783,511944,Hubert Humphrey,Dem.,62.5
48 | 47,1972,Richard Nixon,Rep.,520,538,96.65,60.67,23.15,47168710,17995488,George McGovern,Dem.,56.2
49 | 48,1976,Jimmy Carter,Dem.,297,538,55.20,50.08,2.06,40831881,1683247,Gerald Ford,Rep.,54.8
50 | 49,1980,Ronald Reagan,Rep.,489,538,90.89,50.75,9.74,43903230,8423115,Jimmy Carter,Dem.,54.2
51 | 50,1984,Ronald Reagan,Rep.,525,538,97.58,58.77,18.21,54455472,16878120,Walter Mondale,Dem.,55.2
52 | 51,1988,George H. W. Bush,Rep.,426,538,79.18,53.37,7.72,48886597,7077121,Michael Dukakis,Dem.,52.8
53 | 52,1992,Bill Clinton,Dem.,370,538,68.77,43.01,5.56,44909806,5805256,George H. W. Bush,Rep.,58.1
54 | 53,1996,Bill Clinton,Dem.,379,538,70.45,49.23,8.51,47400125,8201370,Bob Dole,Rep.,51.7
55 | 54,2000,George W. Bush,Rep.,271,538,50.37,47.87,−0.51,50460110,−543816,Al Gore,Dem.,54.2
56 | 55,2004,George W. Bush,Rep.,286,538,53.16,50.73,2.46,62040610,3012171,John Kerry,Dem.,60.1
57 | 56,2008,Barack Obama,Dem.,365,538,67.84,52.93,7.27,69498516,9550193,John McCain,Rep.,61.6
58 | 57,2012,Barack Obama,Dem.,332,538,61.71,51.06,3.86,65915795,4982291,Mitt Romney,Rep.,58.6
59 | 58,2016,Donald Trump,Rep.,304,538,56.50,46.09,−2.09,62984828,−2868686,Hillary Clinton,Dem.,60.2
60 |
--------------------------------------------------------------------------------
/data/wdbc_missing.csv:
--------------------------------------------------------------------------------
1 | ID,Class,Radius,Texture,Perimeter,Area,Smoothness,Compactness,Concavity,Concave_Points,Symmetry,Fractal_Dimension
2 | 842302,M,,,1.2688172627037921,0.983509520104142,1.5670874574786582,3.2806280641246857,2.650541786383573,2.530248864134298,2.215565541846305,2.25376381072807
3 | 842517,M,1.8282119737343598,-0.3533215225500966,1.684472552277101,1.9070302686337925,-0.826235446757039,-0.486643477616135,-0.023824891805531347,0.5476622708254778,0.001391139243576388,-0.8678888068037953
4 | 84300903,M,1.5784992020342323,,1.5651259839837746,1.5575131853441093,0.941382123037953,1.051999895332493,1.362279788963212,2.0354397832616953,0.9388587199172193,-0.39765801323729066
5 | 84348301,M,-0.7682333229203782,0.25350905052192196,-0.5921661228907633,-0.7637917361139566,3.280666839299224,3.3999174223523045,1.9142128745181868,1.4504311303550237,2.864862154141668,4.906601992505377
6 | 84358402,M,1.7487579100115918,-1.1508038465489563,1.7750113282237618,1.8246238018419159,0.2801253491403896,0.5388663067660666,1.3698061492207798,1.4272369546891206,-0.009552062087244153,-0.5619555194231786
7 | 843786,M,-0.4759558742259106,-0.8346009425727322,-0.3868077174481091,-0.5052059265256544,2.2354545192675923,1.2432415648720105,0.8655400119637346,0.8239306743126811,1.0045179279021434,1.888343495245663
8 | 844359,M,1.1698783028885684,0.16050819641126807,1.1371244976904666,1.0943320099277,-0.12302797430038338,0.08821762012839307,0.2998085992698855,0.646366373937044,-0.06426806874134787,-0.7616619709077471
9 |
10 |
--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
1 | services:
2 | book-env:
3 | image: ubcdsci/intro-to-ds:202307130106229dd1c2
4 | ports:
5 | - "8787:8787"
6 | volumes:
7 | - .:/home/rstudio/introduction-to-datascience
8 | environment:
9 | PASSWORD: password
10 | deploy:
11 | resources:
12 | limits:
13 | memory: 5G
14 |
15 |
--------------------------------------------------------------------------------
/img/classification1/plot3d_knn_classification.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/classification1/plot3d_knn_classification.png
--------------------------------------------------------------------------------
/img/classification2/ML-paradigm-test.ai:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/classification2/ML-paradigm-test.ai
--------------------------------------------------------------------------------
/img/classification2/ML-paradigm-test.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/classification2/ML-paradigm-test.png
--------------------------------------------------------------------------------
/img/classification2/cv.ai:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/classification2/cv.ai
--------------------------------------------------------------------------------
/img/classification2/cv.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/classification2/cv.png
--------------------------------------------------------------------------------
/img/classification2/train-test-overview.ai:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/classification2/train-test-overview.ai
--------------------------------------------------------------------------------
/img/classification2/train-test-overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/classification2/train-test-overview.png
--------------------------------------------------------------------------------
/img/classification2/training_test.ai:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/classification2/training_test.ai
--------------------------------------------------------------------------------
/img/classification2/training_test.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/classification2/training_test.png
--------------------------------------------------------------------------------
/img/clustering/gentoo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/clustering/gentoo.jpg
--------------------------------------------------------------------------------
/img/frontmatter/chapter_overview.ai:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/frontmatter/chapter_overview.ai
--------------------------------------------------------------------------------
/img/frontmatter/chapter_overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/frontmatter/chapter_overview.png
--------------------------------------------------------------------------------
/img/frontmatter/ds-a-first-intro-cover.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/frontmatter/ds-a-first-intro-cover.jpg
--------------------------------------------------------------------------------
/img/frontmatter/ds-a-first-intro-graphic.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/frontmatter/ds-a-first-intro-graphic.jpg
--------------------------------------------------------------------------------
/img/inference/intro-bootstrap.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/inference/intro-bootstrap.jpeg
--------------------------------------------------------------------------------
/img/inference/population_vs_sample.ai:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/inference/population_vs_sample.ai
--------------------------------------------------------------------------------
/img/inference/population_vs_sample.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/inference/population_vs_sample.png
--------------------------------------------------------------------------------
/img/intro/arrange_function.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/intro/arrange_function.png
--------------------------------------------------------------------------------
/img/intro/canada_map.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/intro/canada_map.png
--------------------------------------------------------------------------------
/img/intro/filter_function.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/intro/filter_function.png
--------------------------------------------------------------------------------
/img/intro/ggplot_function.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/intro/ggplot_function.png
--------------------------------------------------------------------------------
/img/intro/help-filter.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/intro/help-filter.png
--------------------------------------------------------------------------------
/img/intro/intro-all.ai:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/intro/intro-all.ai
--------------------------------------------------------------------------------
/img/intro/read_csv_function.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/intro/read_csv_function.png
--------------------------------------------------------------------------------
/img/intro/select_function.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/intro/select_function.png
--------------------------------------------------------------------------------
/img/intro/spreadsheet_vs_dataframe.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/intro/spreadsheet_vs_dataframe.png
--------------------------------------------------------------------------------
/img/jupyter/activate-and-run-button-annotated.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/jupyter/activate-and-run-button-annotated.png
--------------------------------------------------------------------------------
/img/jupyter/code-cell-not-run.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/jupyter/code-cell-not-run.png
--------------------------------------------------------------------------------
/img/jupyter/code-cell-run.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/jupyter/code-cell-run.png
--------------------------------------------------------------------------------
/img/jupyter/convert-to-markdown-cell.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/jupyter/convert-to-markdown-cell.png
--------------------------------------------------------------------------------
/img/jupyter/create-new-code-cell.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/jupyter/create-new-code-cell.png
--------------------------------------------------------------------------------
/img/jupyter/jupyter.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/jupyter/jupyter.png
--------------------------------------------------------------------------------
/img/jupyter/launcher-annotated.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/jupyter/launcher-annotated.png
--------------------------------------------------------------------------------
/img/jupyter/markdown-cell-not-run.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/jupyter/markdown-cell-not-run.png
--------------------------------------------------------------------------------
/img/jupyter/markdown-cell-run.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/jupyter/markdown-cell-run.png
--------------------------------------------------------------------------------
/img/jupyter/open_data_w_editor_01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/jupyter/open_data_w_editor_01.png
--------------------------------------------------------------------------------
/img/jupyter/open_data_w_editor_02.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/jupyter/open_data_w_editor_02.png
--------------------------------------------------------------------------------
/img/jupyter/out-of-order-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/jupyter/out-of-order-1.png
--------------------------------------------------------------------------------
/img/jupyter/out-of-order-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/jupyter/out-of-order-2.png
--------------------------------------------------------------------------------
/img/jupyter/out-of-order-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/jupyter/out-of-order-3.png
--------------------------------------------------------------------------------
/img/jupyter/restart-kernel-run-all.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/jupyter/restart-kernel-run-all.png
--------------------------------------------------------------------------------
/img/key_files/chapter_overview.key:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/key_files/chapter_overview.key
--------------------------------------------------------------------------------
/img/key_files/data_frame_slides_cdn.key:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/key_files/data_frame_slides_cdn.key
--------------------------------------------------------------------------------
/img/key_files/dsci-100-slide-images.key:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/key_files/dsci-100-slide-images.key
--------------------------------------------------------------------------------
/img/key_files/filesystem.key:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/key_files/filesystem.key
--------------------------------------------------------------------------------
/img/key_files/ggplot_functions.key:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/key_files/ggplot_functions.key
--------------------------------------------------------------------------------
/img/key_files/git_intro.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/key_files/git_intro.pptx
--------------------------------------------------------------------------------
/img/key_files/pivot_functions.key:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/key_files/pivot_functions.key
--------------------------------------------------------------------------------
/img/key_files/png-vs-svg.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/key_files/png-vs-svg.pptx
--------------------------------------------------------------------------------
/img/key_files/ref_vs_tibble.key:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/key_files/ref_vs_tibble.key
--------------------------------------------------------------------------------
/img/key_files/spreadsheet_vs_dataframe.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/key_files/spreadsheet_vs_dataframe.pptx
--------------------------------------------------------------------------------
/img/key_files/summarize.key:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/key_files/summarize.key
--------------------------------------------------------------------------------
/img/key_files/tidy_data.key:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/key_files/tidy_data.key
--------------------------------------------------------------------------------
/img/key_files/tidydata_bootstrap_train_test_images.key:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/key_files/tidydata_bootstrap_train_test_images.key
--------------------------------------------------------------------------------
/img/reading/NASA-API-Rho-Ophiuchi.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/reading/NASA-API-Rho-Ophiuchi.png
--------------------------------------------------------------------------------
/img/reading/NASA-API-limits.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/reading/NASA-API-limits.png
--------------------------------------------------------------------------------
/img/reading/NASA-API-parameters.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/reading/NASA-API-parameters.png
--------------------------------------------------------------------------------
/img/reading/NASA-API-signup.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/reading/NASA-API-signup.png
--------------------------------------------------------------------------------
/img/reading/craigslist_human.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/reading/craigslist_human.png
--------------------------------------------------------------------------------
/img/reading/filesystem.ai:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/reading/filesystem.ai
--------------------------------------------------------------------------------
/img/reading/filesystem.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/reading/filesystem.png
--------------------------------------------------------------------------------
/img/reading/ref_vs_tibble.001.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/reading/ref_vs_tibble.001.jpeg
--------------------------------------------------------------------------------
/img/reading/sg1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/reading/sg1.png
--------------------------------------------------------------------------------
/img/reading/sg2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/reading/sg2.png
--------------------------------------------------------------------------------
/img/reading/sg3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/reading/sg3.png
--------------------------------------------------------------------------------
/img/reading/sg4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/reading/sg4.png
--------------------------------------------------------------------------------
/img/regression1/plot3d_knn_regression.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/regression1/plot3d_knn_regression.png
--------------------------------------------------------------------------------
/img/regression2/plot3d_linear_regression.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/regression2/plot3d_linear_regression.png
--------------------------------------------------------------------------------
/img/setup/docker-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/setup/docker-1.png
--------------------------------------------------------------------------------
/img/setup/docker-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/setup/docker-2.png
--------------------------------------------------------------------------------
/img/setup/docker-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/setup/docker-3.png
--------------------------------------------------------------------------------
/img/setup/docker-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/setup/docker-4.png
--------------------------------------------------------------------------------
/img/setup/jlab-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/setup/jlab-1.png
--------------------------------------------------------------------------------
/img/setup/jlab-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/setup/jlab-2.png
--------------------------------------------------------------------------------
/img/setup/ubuntu-docker.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/setup/ubuntu-docker.png
--------------------------------------------------------------------------------
/img/unused/1024px-Supervised_machine_learning_in_a_nutshell.svg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/1024px-Supervised_machine_learning_in_a_nutshell.svg
--------------------------------------------------------------------------------
/img/unused/ML-paradigm.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/ML-paradigm.jpeg
--------------------------------------------------------------------------------
/img/unused/ML-paradigm.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/ML-paradigm.png
--------------------------------------------------------------------------------
/img/unused/Page_Under_Construction.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/Page_Under_Construction.png
--------------------------------------------------------------------------------
/img/unused/README.md:
--------------------------------------------------------------------------------
1 | ## Images
2 | This is the README.md for the Introduction to Data Science textbook images.
3 |
4 | - to make the ggplot function images
5 | - go to ggplot_functions.key
6 | - take a screenshot of code
7 | - paste image in the key and update relevant text/arrows
--------------------------------------------------------------------------------
/img/unused/Supervised_machine_learning_in_a_nutshell.svg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/Supervised_machine_learning_in_a_nutshell.svg.png
--------------------------------------------------------------------------------
/img/unused/activate-and-run-button.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/activate-and-run-button.png
--------------------------------------------------------------------------------
/img/unused/add_collab_01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/add_collab_01.png
--------------------------------------------------------------------------------
/img/unused/add_collab_02.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/add_collab_02.png
--------------------------------------------------------------------------------
/img/unused/add_collab_03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/add_collab_03.png
--------------------------------------------------------------------------------
/img/unused/add_collab_04.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/add_collab_04.png
--------------------------------------------------------------------------------
/img/unused/add_collab_05.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/add_collab_05.png
--------------------------------------------------------------------------------
/img/unused/add_collab_06.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/add_collab_06.png
--------------------------------------------------------------------------------
/img/unused/add_collab_06_new.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/add_collab_06_new.png
--------------------------------------------------------------------------------
/img/unused/chapter_overview.001.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/chapter_overview.001.jpeg
--------------------------------------------------------------------------------
/img/unused/clone_01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/clone_01.png
--------------------------------------------------------------------------------
/img/unused/clone_02.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/clone_02.png
--------------------------------------------------------------------------------
/img/unused/clone_03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/clone_03.png
--------------------------------------------------------------------------------
/img/unused/clone_04.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/clone_04.png
--------------------------------------------------------------------------------
/img/unused/create-new-file_01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/create-new-file_01.png
--------------------------------------------------------------------------------
/img/unused/create-new-file_02.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/create-new-file_02.png
--------------------------------------------------------------------------------
/img/unused/create-new-file_03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/create-new-file_03.png
--------------------------------------------------------------------------------
/img/unused/data_frame_slides_cdn.001.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/data_frame_slides_cdn.001.jpeg
--------------------------------------------------------------------------------
/img/unused/data_frame_slides_cdn.002.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/data_frame_slides_cdn.002.jpeg
--------------------------------------------------------------------------------
/img/unused/data_frame_slides_cdn.003.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/data_frame_slides_cdn.003.jpeg
--------------------------------------------------------------------------------
/img/unused/data_frame_slides_cdn.006.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/data_frame_slides_cdn.006.jpeg
--------------------------------------------------------------------------------
/img/unused/dataframe.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/dataframe.jpeg
--------------------------------------------------------------------------------
/img/unused/dsci-100-slide-images.001.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/dsci-100-slide-images.001.jpeg
--------------------------------------------------------------------------------
/img/unused/dsci-100-slide-images.002.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/dsci-100-slide-images.002.jpeg
--------------------------------------------------------------------------------
/img/unused/dsci-100-slide-images.004.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/dsci-100-slide-images.004.jpeg
--------------------------------------------------------------------------------
/img/unused/git_add_01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/git_add_01.png
--------------------------------------------------------------------------------
/img/unused/git_add_02.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/git_add_02.png
--------------------------------------------------------------------------------
/img/unused/git_add_03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/git_add_03.png
--------------------------------------------------------------------------------
/img/unused/git_commit_01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/git_commit_01.png
--------------------------------------------------------------------------------
/img/unused/git_commit_02.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/git_commit_02.png
--------------------------------------------------------------------------------
/img/unused/git_commit_02_new.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/git_commit_02_new.png
--------------------------------------------------------------------------------
/img/unused/git_commit_03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/git_commit_03.png
--------------------------------------------------------------------------------
/img/unused/git_pull_00.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/git_pull_00.png
--------------------------------------------------------------------------------
/img/unused/git_pull_01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/git_pull_01.png
--------------------------------------------------------------------------------
/img/unused/git_pull_02.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/git_pull_02.png
--------------------------------------------------------------------------------
/img/unused/git_pull_03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/git_pull_03.png
--------------------------------------------------------------------------------
/img/unused/git_pull_04.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/git_pull_04.png
--------------------------------------------------------------------------------
/img/unused/git_push_01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/git_push_01.png
--------------------------------------------------------------------------------
/img/unused/git_push_02.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/git_push_02.png
--------------------------------------------------------------------------------
/img/unused/git_push_03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/git_push_03.png
--------------------------------------------------------------------------------
/img/unused/git_push_04.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/git_push_04.png
--------------------------------------------------------------------------------
/img/unused/git_push_05.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/git_push_05.png
--------------------------------------------------------------------------------
/img/unused/git_push_05_new.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/git_push_05_new.png
--------------------------------------------------------------------------------
/img/unused/issue_01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/issue_01.png
--------------------------------------------------------------------------------
/img/unused/issue_02.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/issue_02.png
--------------------------------------------------------------------------------
/img/unused/issue_03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/issue_03.png
--------------------------------------------------------------------------------
/img/unused/issue_04.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/issue_04.png
--------------------------------------------------------------------------------
/img/unused/issue_05.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/issue_05.png
--------------------------------------------------------------------------------
/img/unused/issue_05_new.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/issue_05_new.png
--------------------------------------------------------------------------------
/img/unused/issue_06.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/issue_06.png
--------------------------------------------------------------------------------
/img/unused/launcher.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/launcher.png
--------------------------------------------------------------------------------
/img/unused/long_to_wide.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/long_to_wide.jpeg
--------------------------------------------------------------------------------
/img/unused/malignant_cancer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/malignant_cancer.png
--------------------------------------------------------------------------------
/img/unused/merge_conflict_01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/merge_conflict_01.png
--------------------------------------------------------------------------------
/img/unused/merge_conflict_02.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/merge_conflict_02.png
--------------------------------------------------------------------------------
/img/unused/merge_conflict_02_new.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/merge_conflict_02_new.png
--------------------------------------------------------------------------------
/img/unused/merge_conflict_03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/merge_conflict_03.png
--------------------------------------------------------------------------------
/img/unused/merge_conflict_04.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/merge_conflict_04.png
--------------------------------------------------------------------------------
/img/unused/merge_conflict_05.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/merge_conflict_05.png
--------------------------------------------------------------------------------
/img/unused/merge_conflict_06.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/merge_conflict_06.png
--------------------------------------------------------------------------------
/img/unused/new_repository_01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/new_repository_01.png
--------------------------------------------------------------------------------
/img/unused/new_repository_02.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/new_repository_02.png
--------------------------------------------------------------------------------
/img/unused/new_repository_03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/new_repository_03.png
--------------------------------------------------------------------------------
/img/unused/obs.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/obs.jpeg
--------------------------------------------------------------------------------
/img/unused/pen-tool_01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/pen-tool_01.png
--------------------------------------------------------------------------------
/img/unused/pen-tool_02.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/pen-tool_02.png
--------------------------------------------------------------------------------
/img/unused/pen-tool_03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/pen-tool_03.png
--------------------------------------------------------------------------------
/img/unused/pivot_longer_with_table.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/pivot_longer_with_table.jpeg
--------------------------------------------------------------------------------
/img/unused/pivot_wider_with_table.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/pivot_wider_with_table.jpeg
--------------------------------------------------------------------------------
/img/unused/prop_val_vs_income.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/prop_val_vs_income.png
--------------------------------------------------------------------------------
/img/unused/prop_val_vs_income_by_party.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/prop_val_vs_income_by_party.png
--------------------------------------------------------------------------------
/img/unused/prop_val_vs_income_human_labs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/prop_val_vs_income_human_labs.png
--------------------------------------------------------------------------------
/img/unused/r.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/r.PNG
--------------------------------------------------------------------------------
/img/unused/ref_vs_tibble.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/ref_vs_tibble.jpeg
--------------------------------------------------------------------------------
/img/unused/sampling.001.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/sampling.001.jpeg
--------------------------------------------------------------------------------
/img/unused/sampling.002.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/sampling.002.jpeg
--------------------------------------------------------------------------------
/img/unused/spreadsheet.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/spreadsheet.PNG
--------------------------------------------------------------------------------
/img/unused/testing.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/testing.png
--------------------------------------------------------------------------------
/img/unused/text_cell_formatted.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/text_cell_formatted.png
--------------------------------------------------------------------------------
/img/unused/text_cell_unformatted.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/text_cell_unformatted.png
--------------------------------------------------------------------------------
/img/unused/tidy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/tidy.png
--------------------------------------------------------------------------------
/img/unused/tidy_data.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/tidy_data.jpeg
--------------------------------------------------------------------------------
/img/unused/timbits.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/timbits.jpg
--------------------------------------------------------------------------------
/img/unused/training_validation.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/training_validation.jpeg
--------------------------------------------------------------------------------
/img/unused/upload-files_01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/upload-files_01.png
--------------------------------------------------------------------------------
/img/unused/upload_files_02.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/upload_files_02.png
--------------------------------------------------------------------------------
/img/unused/vars.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/vars.jpeg
--------------------------------------------------------------------------------
/img/unused/vc-ba1-changes.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/vc-ba1-changes.png
--------------------------------------------------------------------------------
/img/unused/vc3-add.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/vc3-add.png
--------------------------------------------------------------------------------
/img/unused/vc4-commit.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/vc4-commit.png
--------------------------------------------------------------------------------
/img/unused/vc5-5-nachos-to-cheesecake.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/vc5-5-nachos-to-cheesecake.png
--------------------------------------------------------------------------------
/img/unused/vec_vs_list.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/vec_vs_list.jpeg
--------------------------------------------------------------------------------
/img/unused/vector.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/vector.jpeg
--------------------------------------------------------------------------------
/img/unused/vectors.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/vectors.jpeg
--------------------------------------------------------------------------------
/img/unused/wide_to_long.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/wide_to_long.jpeg
--------------------------------------------------------------------------------
/img/unused/wikipedia_human.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/wikipedia_human.png
--------------------------------------------------------------------------------
/img/version-control/add_collab_01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/add_collab_01.png
--------------------------------------------------------------------------------
/img/version-control/add_collab_02.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/add_collab_02.png
--------------------------------------------------------------------------------
/img/version-control/add_collab_03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/add_collab_03.png
--------------------------------------------------------------------------------
/img/version-control/add_collab_04.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/add_collab_04.png
--------------------------------------------------------------------------------
/img/version-control/add_collab_05.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/add_collab_05.png
--------------------------------------------------------------------------------
/img/version-control/clone_01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/clone_01.png
--------------------------------------------------------------------------------
/img/version-control/clone_02.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/clone_02.png
--------------------------------------------------------------------------------
/img/version-control/clone_03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/clone_03.png
--------------------------------------------------------------------------------
/img/version-control/clone_04.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/clone_04.png
--------------------------------------------------------------------------------
/img/version-control/create-new-file_01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/create-new-file_01.png
--------------------------------------------------------------------------------
/img/version-control/create-new-file_02.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/create-new-file_02.png
--------------------------------------------------------------------------------
/img/version-control/create-new-file_03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/create-new-file_03.png
--------------------------------------------------------------------------------
/img/version-control/generate-pat_01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/generate-pat_01.png
--------------------------------------------------------------------------------
/img/version-control/generate-pat_02.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/generate-pat_02.png
--------------------------------------------------------------------------------
/img/version-control/generate-pat_03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/generate-pat_03.png
--------------------------------------------------------------------------------
/img/version-control/git_add_01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/git_add_01.png
--------------------------------------------------------------------------------
/img/version-control/git_add_02.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/git_add_02.png
--------------------------------------------------------------------------------
/img/version-control/git_add_03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/git_add_03.png
--------------------------------------------------------------------------------
/img/version-control/git_commit_01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/git_commit_01.png
--------------------------------------------------------------------------------
/img/version-control/git_commit_03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/git_commit_03.png
--------------------------------------------------------------------------------
/img/version-control/git_pull_00.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/git_pull_00.png
--------------------------------------------------------------------------------
/img/version-control/git_pull_01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/git_pull_01.png
--------------------------------------------------------------------------------
/img/version-control/git_pull_02.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/git_pull_02.png
--------------------------------------------------------------------------------
/img/version-control/git_pull_03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/git_pull_03.png
--------------------------------------------------------------------------------
/img/version-control/git_pull_04.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/git_pull_04.png
--------------------------------------------------------------------------------
/img/version-control/git_push_01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/git_push_01.png
--------------------------------------------------------------------------------
/img/version-control/git_push_02.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/git_push_02.png
--------------------------------------------------------------------------------
/img/version-control/git_push_03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/git_push_03.png
--------------------------------------------------------------------------------
/img/version-control/git_push_04.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/git_push_04.png
--------------------------------------------------------------------------------
/img/version-control/issue_01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/issue_01.png
--------------------------------------------------------------------------------
/img/version-control/issue_02.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/issue_02.png
--------------------------------------------------------------------------------
/img/version-control/issue_03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/issue_03.png
--------------------------------------------------------------------------------
/img/version-control/issue_04.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/issue_04.png
--------------------------------------------------------------------------------
/img/version-control/issue_06.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/issue_06.png
--------------------------------------------------------------------------------
/img/version-control/merge_conflict_01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/merge_conflict_01.png
--------------------------------------------------------------------------------
/img/version-control/merge_conflict_03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/merge_conflict_03.png
--------------------------------------------------------------------------------
/img/version-control/merge_conflict_04.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/merge_conflict_04.png
--------------------------------------------------------------------------------
/img/version-control/merge_conflict_05.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/merge_conflict_05.png
--------------------------------------------------------------------------------
/img/version-control/merge_conflict_06.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/merge_conflict_06.png
--------------------------------------------------------------------------------
/img/version-control/new_repository_01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/new_repository_01.png
--------------------------------------------------------------------------------
/img/version-control/new_repository_02.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/new_repository_02.png
--------------------------------------------------------------------------------
/img/version-control/new_repository_03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/new_repository_03.png
--------------------------------------------------------------------------------
/img/version-control/pen-tool_01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/pen-tool_01.png
--------------------------------------------------------------------------------
/img/version-control/pen-tool_02.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/pen-tool_02.png
--------------------------------------------------------------------------------
/img/version-control/pen-tool_03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/pen-tool_03.png
--------------------------------------------------------------------------------
/img/version-control/upload-files_01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/upload-files_01.png
--------------------------------------------------------------------------------
/img/version-control/upload-files_02.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/upload-files_02.png
--------------------------------------------------------------------------------
/img/version-control/vc-ba2-add.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/vc-ba2-add.png
--------------------------------------------------------------------------------
/img/version-control/vc-ba3-commit.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/vc-ba3-commit.png
--------------------------------------------------------------------------------
/img/version-control/vc1-no-changes.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/vc1-no-changes.png
--------------------------------------------------------------------------------
/img/version-control/vc2-changes.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/vc2-changes.png
--------------------------------------------------------------------------------
/img/version-control/vc5-push.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/vc5-push.png
--------------------------------------------------------------------------------
/img/version-control/vc6-remote-changes.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/vc6-remote-changes.png
--------------------------------------------------------------------------------
/img/version-control/vc7-pull.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/vc7-pull.png
--------------------------------------------------------------------------------
/img/version-control/version-control-all.ai:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/version-control-all.ai
--------------------------------------------------------------------------------
/img/viz/faithful_plot.bmp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/viz/faithful_plot.bmp
--------------------------------------------------------------------------------
/img/viz/faithful_plot.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/viz/faithful_plot.jpg
--------------------------------------------------------------------------------
/img/viz/faithful_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/viz/faithful_plot.png
--------------------------------------------------------------------------------
/img/viz/faithful_plot.tiff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/viz/faithful_plot.tiff
--------------------------------------------------------------------------------
/img/viz/png-vs-svg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/viz/png-vs-svg.png
--------------------------------------------------------------------------------
/img/wrangling/data_frame_slides_cdn.004.ai:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/data_frame_slides_cdn.004.ai
--------------------------------------------------------------------------------
/img/wrangling/data_frame_slides_cdn.004.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/data_frame_slides_cdn.004.png
--------------------------------------------------------------------------------
/img/wrangling/data_frame_slides_cdn.005.ai:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/data_frame_slides_cdn.005.ai
--------------------------------------------------------------------------------
/img/wrangling/data_frame_slides_cdn.005.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/data_frame_slides_cdn.005.png
--------------------------------------------------------------------------------
/img/wrangling/data_frame_slides_cdn.007.ai:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/data_frame_slides_cdn.007.ai
--------------------------------------------------------------------------------
/img/wrangling/data_frame_slides_cdn.007.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/data_frame_slides_cdn.007.png
--------------------------------------------------------------------------------
/img/wrangling/data_frame_slides_cdn.008.ai:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/data_frame_slides_cdn.008.ai
--------------------------------------------------------------------------------
/img/wrangling/data_frame_slides_cdn.008.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/data_frame_slides_cdn.008.png
--------------------------------------------------------------------------------
/img/wrangling/data_frame_slides_cdn.009.ai:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/data_frame_slides_cdn.009.ai
--------------------------------------------------------------------------------
/img/wrangling/data_frame_slides_cdn.009.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/data_frame_slides_cdn.009.png
--------------------------------------------------------------------------------
/img/wrangling/mutate_function.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/mutate_function.png
--------------------------------------------------------------------------------
/img/wrangling/pivot_functions.001.ai:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/pivot_functions.001.ai
--------------------------------------------------------------------------------
/img/wrangling/pivot_functions.001.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/pivot_functions.001.png
--------------------------------------------------------------------------------
/img/wrangling/pivot_functions.002.ai:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/pivot_functions.002.ai
--------------------------------------------------------------------------------
/img/wrangling/pivot_functions.002.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/pivot_functions.002.png
--------------------------------------------------------------------------------
/img/wrangling/pivot_functions.003.ai:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/pivot_functions.003.ai
--------------------------------------------------------------------------------
/img/wrangling/pivot_functions.003.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/pivot_functions.003.png
--------------------------------------------------------------------------------
/img/wrangling/pivot_functions.004.ai:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/pivot_functions.004.ai
--------------------------------------------------------------------------------
/img/wrangling/pivot_functions.004.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/pivot_functions.004.png
--------------------------------------------------------------------------------
/img/wrangling/pivot_longer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/pivot_longer.png
--------------------------------------------------------------------------------
/img/wrangling/pivot_wider.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/pivot_wider.png
--------------------------------------------------------------------------------
/img/wrangling/separate_function.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/separate_function.png
--------------------------------------------------------------------------------
/img/wrangling/summarize.001.ai:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/summarize.001.ai
--------------------------------------------------------------------------------
/img/wrangling/summarize.001.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/summarize.001.png
--------------------------------------------------------------------------------
/img/wrangling/summarize.002.ai:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/summarize.002.ai
--------------------------------------------------------------------------------
/img/wrangling/summarize.002.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/summarize.002.png
--------------------------------------------------------------------------------
/img/wrangling/summarize.003.ai:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/summarize.003.ai
--------------------------------------------------------------------------------
/img/wrangling/summarize.003.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/summarize.003.png
--------------------------------------------------------------------------------
/img/wrangling/summarize.004.ai:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/summarize.004.ai
--------------------------------------------------------------------------------
/img/wrangling/summarize.004.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/summarize.004.png
--------------------------------------------------------------------------------
/img/wrangling/summarize.005.ai:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/summarize.005.ai
--------------------------------------------------------------------------------
/img/wrangling/summarize.005.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/summarize.005.png
--------------------------------------------------------------------------------
/img/wrangling/tidy_data.001.ai:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/tidy_data.001.ai
--------------------------------------------------------------------------------
/img/wrangling/tidy_data.001.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/tidy_data.001.png
--------------------------------------------------------------------------------
/img/wrangling/wrangling-syntax-all.ai:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/wrangling-syntax-all.ai
--------------------------------------------------------------------------------
/index.Rmd:
--------------------------------------------------------------------------------
1 |
6 | ---
7 | title: |
8 | 
9 | Data Science
10 | subtitle: "A First Introduction"
11 | knit: "bookdown::render_book"
12 | author: "Tiffany Timbers, Trevor Campbell, and Melissa Lee"
13 | date: "`r Sys.Date()`"
14 | site: bookdown::bookdown_site
15 | link-citations: yes
16 | colorlinks: yes
17 | documentclass: krantz
18 | classoption:
19 | - krantz2
20 | biblio-style: plainnat
21 | lot: yes
22 | lof: yes
23 | fontsize: 12pt
24 | description: "This is a textbook for teaching a first introduction to data science."
25 | always_allow_html: true
26 | graphics: yes
27 | url: https://datasciencebook.ca
28 | github-repo: UBC-DSCI/introduction-to-datascience
29 | ---
30 |
31 | # Welcome! {-}
32 |
33 | This is the [website](https://datasciencebook.ca/) for *Data Science: A First Introduction*.
34 | You can read the web version of the book on this site. Click a section in the table of contents
35 | on the left side of the page to navigate to it. If you are on a mobile device,
36 | you may need to open the table of contents first by clicking the menu button on
37 | the top left of the page. You can purchase a PDF or print copy of the book
38 | on the [CRC Press website](https://www.routledge.com/Data-Science-A-First-Introduction/Timbers-Campbell-Lee/p/book/9780367524685) or on [Amazon](https://www.amazon.com/Data-Science-First-Introduction-Chapman/dp/0367532174).
39 |
40 | For the python version of the textbook, visit [https://python.datasciencebook.ca](https://python.datasciencebook.ca).
41 |
42 | This book is listed in a number of open educational resource (OER) collections:
43 |
44 | - [The University of British Columbia OER collection](https://oer.open.ubc.ca/data-science-a-first-introduction/)
45 | - [The OER Commons](https://oercommons.org/courses/data-science-a-first-introduction-with-r)
46 | - [MERLOT](https://merlot.org/merlot/viewMaterial.htm?id=773420156)
47 |
48 | ```{r bookcover, echo = FALSE, fig.retina = 2, out.width = "45%"}
49 | knitr::include_graphics("img/frontmatter/ds-a-first-intro-cover.jpg")
50 | ```
51 |
52 |
53 |
54 | This work by [Tiffany Timbers](https://www.tiffanytimbers.com/), [Trevor Campbell](https://trevorcampbell.me/),
55 | and [Melissa Lee](https://www.stat.ubc.ca/users/melissa-lee) is licensed under
56 | a [Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License](http://creativecommons.org/licenses/by-nc-sa/4.0/).
57 |
58 |
--------------------------------------------------------------------------------
/scripts/intro_bootstrap_image.R:
--------------------------------------------------------------------------------
1 | library(plotrix) #draw.circle()
2 | library(RColorBrewer)
3 | library(diagram) #curvedarrow()
4 | library(infer)
5 | library(tidyverse)
6 | library(magick)
7 |
8 | # Sample values
9 | sample <- c(1, 2, 3, 5, 8, 9)
10 | df <- data.frame(value = sample)
11 | set.seed(10)
12 | estimates <- rep_sample_n(df, size = 6, replace = T, reps = 1000) %>%
13 | summarise(mean = mean(value))
14 | svg("bootstrap.svg")
15 | hist(estimates$mean,
16 | col = "dodgerblue3",
17 | yaxt = "n",
18 | xlab = "means", cex.lab = 5, ylab = "", main = "", xaxt = "n")
19 | #ggplot(estimates, aes(mean)) +
20 | # geom_histogram(binwidth = 0.5, fill = "dodgerblue3", col = "lightgrey") +
21 | # xlab("means")
22 | dev.off()
23 |
24 | bootstrap <- image_read("bootstrap.svg")
25 |
26 | svg("intro-bootstrap.svg")
27 | # blank plot
28 | par(mar=c(0, 0, 0, 0))
29 | plot(1:10, 1:10, type="n",xlab="",ylab="",
30 | bty='n',
31 | yaxt="n",
32 | xaxt = "n")
33 |
34 | # set up
35 | circle_size <- 0.85
36 | circle_x <- 2
37 | circle_y <- 5
38 | xs <- c(1.4, 1.5, 2, 2.3, 2.5, 2.7) # position of points
39 | ys <- c(4.7, 5.2, 4.6, 5.4, 4.8, 5) # position of points
40 |
41 | #sample
42 | text(xs, ys, labels = c(paste(sample)))
43 | text(circle_x, circle_y + 1., "Sample")
44 | draw.circle(circle_x, circle_y, circle_size, nv=100, border="black", col=NA, lty=1, density=NULL, angle=45,lwd=1)
45 |
46 | # arrows
47 | added_x <- 2
48 | added_y <- 2
49 | arrows(circle_x + 0.7, circle_y + 0.6, circle_x + added_x , circle_y + added_y + 0.1, length = 0.1, lwd = 1)
50 | text(circle_x + 0.8, circle_y + added_y, "sample with \n replacement", cex = 0.75)
51 |
52 | # Bootstrap sample #1
53 | added_x <- 0.5 + added_x
54 | added_y <-1 + added_y
55 | draw.circle(circle_x + added_x, circle_y + added_y, circle_size, nv=100, border="black", col=NA, lty=1, density=NULL, angle=45,lwd=1)
56 | text(circle_x + added_x, circle_y + added_y + 1.2, "Bootstrap \n Sample #1", cex = 1)
57 |
58 | set.seed(1)
59 | boot1 <- sample(sample, replace = T)
60 | xs1 <- xs + added_x # position of points
61 | ys1 <- ys + added_y
62 | text(xs1, ys1, labels = c(paste(boot1)))
63 |
64 | # means
65 | text(circle_x + added_x + 2.5, circle_y + added_y, paste("mean =",round(mean(boot1))))
66 |
67 | # Bootstrap sample #2
68 | added_y <- 0.2
69 | draw.circle(circle_x + added_x, circle_y + added_y, circle_size, nv=100, border="black", col=NA, lty=1, density=NULL, angle=45,lwd=1)
70 | text(circle_x + added_x, circle_y + added_y + 1.2, "Bootstrap \n Sample #2", cex = 1)
71 | arrows(circle_x + 0.85, circle_y, circle_x + 1.5, circle_y, length = 0.1, lwd = 1)
72 |
73 | set.seed(2)
74 | boot2 <- sample(sample, replace = T)
75 | xs2 <- xs + added_x # position of points
76 | ys2 <- ys + added_y
77 | text(xs2, ys2, labels = c(paste(boot2)))
78 |
79 | # means
80 | text(circle_x + added_x + 2.5, circle_y + added_y, paste("mean =", round(mean(boot2))))
81 |
82 |
83 | # Bootstrap sample #3
84 | added_y <- -2.6
85 | draw.circle(circle_x + added_x, circle_y + added_y, circle_size, nv=100, border="black", col=NA, lty=1, density=NULL, angle=45,lwd=1)
86 | text(circle_x + added_x, circle_y + 1.2 + added_y, "Bootstrap \n Sample #3", cex = 1)
87 | arrows(circle_x + 0.7, circle_y - 0.7, circle_x + 1.6, circle_y - 2, length = 0.1, lwd = 1)
88 |
89 | set.seed(3)
90 | boot3 <- sample(sample, replace = T)
91 | xs3 <- xs + added_x # position of points
92 | ys3 <- ys + added_y
93 | text(xs3, ys3, labels = c(paste(boot3)))
94 |
95 | # means
96 | text(circle_x + added_x + 2.5, circle_y + added_y, paste("mean =", round(mean(boot3))))
97 |
98 | # last line
99 | arrows(circle_x + 0.5, circle_y - 1, circle_x + 1.4, circle_y -3.8 , length = 0.1, lwd = 1)
100 | text(circle_x + added_x, circle_y - 4, "keep sampling with \n replacement...", cex = 0.75)
101 |
102 |
103 | # arrows to means
104 | arrows(circle_x + 3.5, circle_y + 3, circle_x + 4.2, circle_y +3, length = 0.1, lwd = 1)
105 | arrows(circle_x + 3.5, circle_y + 0.1, circle_x + 4.2, circle_y +0.1, length = 0.1, lwd = 1)
106 | arrows(circle_x + 3.5, circle_y - 2.7, circle_x + 4.2, circle_y - 2.7, length = 0.1, lwd = 1)
107 |
108 | # bootstrap distribution
109 | rasterImage(as.raster(bootstrap), 8,4,10,8)
110 | text(circle_x + 7, circle_y +3, "Bootstrap \n distribution")
111 |
112 | # arrows to bootstrap
113 | arrows(circle_x + 5.5, circle_y + 2.7, circle_x + 6.5, circle_y +0.5, length = 0.1, lwd = 1)
114 | arrows(circle_x + 5.6, circle_y + 0.1, circle_x +6.4, circle_y +0.1, length = 0.1, lwd = 1)
115 | arrows(circle_x + 5.5, circle_y - 2.4, circle_x + 6.5, circle_y -0.5, length = 0.1, lwd = 1)
116 |
117 | dev.off()
118 |
119 |
--------------------------------------------------------------------------------
/scripts/population-sample.R:
--------------------------------------------------------------------------------
1 | library(plotrix) #draw.circle()
2 | library(RColorBrewer)
3 | library(diagram) #curvedarrow()
4 | #library(shape)
5 | #library(igraph)
6 | #display.brewer.all(colorblindFriendly = TRUE)
7 | svg("img/population_vs_sample.svg")
8 | mycolours <- brewer.pal(12, "Paired")
9 | blues <- brewer.pal(9, "Blues")
10 |
11 | # blank plot
12 | par(mar=rep(0,4))
13 | plot(-50:150, seq(-100,100,length=201),type="n",xlab="",ylab="",
14 | bty='n',
15 | yaxt="n",
16 | xaxt = "n")
17 |
18 | # generating points
19 | set.seed(1)
20 | r <- sample(seq(0, 50, by = 1), size = 50, replace = F)
21 | degs <- 360*sample(seq(0, 50, by = 0.02), size = 50, replace = F)
22 |
23 | # convert the degrees to radians
24 | theta <- 2*pi*degs/360
25 |
26 | # Add a circle around the points
27 | #draw.circle(0, 0, 50, nv=100, border=NULL, col=blues[1], lty=1, density=NULL, angle=45,lwd=1)
28 |
29 | # Plot your points by converting to cartesian
30 | points(1*r*sin(theta),1*r*cos(theta), xlim=c(-max(r),max(r)),ylim=c(-max(r),max(r)),
31 | col= mycolours[c(2,6)], pch = 16, cex = 1)
32 |
33 | # Circles around the sampled points
34 | draw.circle(-41, 8, 3, nv=100, border="black", col=NA, lty=1, density=NULL, angle=45,lwd=1)
35 | draw.circle(-19, -34, 3, nv=100, border="black", col=NA, lty=1, density=NULL, angle=45,lwd=1)
36 | draw.circle(-2, -14, 3, nv=100, border="black", col=NA, lty=1, density=NULL, angle=45,lwd=1)
37 | draw.circle(0, 27, 3, nv=100, border="black", col=NA, lty=1, density=NULL, angle=45,lwd=1)
38 | draw.circle(11, 42, 3, nv=100, border="black", col=NA, lty=1, density=NULL, angle=45,lwd=1)
39 | draw.circle(34.3, 36, 3, nv=100, border="black", col=NA, lty=1, density=NULL, angle=45,lwd=1)
40 | draw.circle(37, -31, 3, nv=100, border="black", col=NA, lty=1, density=NULL, angle=45,lwd=1)
41 | draw.circle(-9, 9.5, 3, nv=100, border="black", col=NA, lty=1, density=NULL, angle=45,lwd=1)
42 | draw.circle(-2, 16, 3, nv=100, border="black", col=NA, lty=1, density=NULL, angle=45,lwd=1)
43 | draw.circle(44.3, 20.5, 3, nv=100, border="black", col=NA, lty=1, density=NULL, angle=45,lwd=1)
44 |
45 | #draw.circle(85, -20, 20, nv=100, border="black", col=Blues[1], lty=1, density=NULL, angle=45,lwd=1)
46 | # Sampled points
47 | points(c(125, 99, 91, 92, 95, 120, 100, 110, 100, 110, 105), c(0, -10, 2, 8, -50, -45, -30, -15, -30, -30, 0),
48 | col= mycolours[c(2, 6, 2,2 ,2, 6, 2, 6, 6,2)], pch = 16, cex = 1)
49 |
50 | # population box
51 | rect(-50, -55, 55, 70)
52 |
53 | # sample box
54 | rect(75, -55, 145, 29)
55 |
56 | text(-20, 80, "Population", font= 2)
57 | text(0, 60, "All undergraduate \n students in North America")
58 | text(110, 20, "10 undergraduate \n students in North America")
59 | text(95, 35, "Sample", font = 2)
60 |
61 | #iArrows <- igraph:::igraph.Arrows
62 | #iArrows(0, 46, 90, 12,
63 | # h.lwd=2, sh.lwd=2, sh.col="black",
64 | # curve=1.1 , width=1, size=1)
65 |
66 | #big arrow
67 | curvedarrow(c(0,70), c(110,32), lwd = 2, lty = 1, lcol = "black",
68 | arr.col = "black", arr.pos = 1, curve = -0.3, dr = 0.1,
69 | endhead = T)
70 | text(0, -80, "Parameter", font = 2)
71 | text(0, -92,
72 | "unknown p \n (proportion of population who own an iPhone)")
73 |
74 | text(110, -80, "Point Estimate", font = 2)
75 |
76 | text(110, -90, expression(italic(hat(p)) * "= 6/10 = 0.60" ))
77 | text(110, -98, "(proportion of sample who own an iPhone)")
78 | arrows(90, -81, 35, -81, length = 0.1, lwd = 2)
79 | text(60, -72, "Point Estimation", font = 2)
80 |
81 | arrows(0, -50, 0, -75, length = 0.1, lwd = 2)
82 | arrows(110, -50, 110, -75, length = 0.1, lwd = 2)
83 | #curvedarrow(c(0, -65), c(0,-75), lwd = 2, lty = 1, lcol = "black",
84 | # arr.col = "black", arr.pos = 4, curve = 0, dr = 1,
85 | # endhead = T)
86 |
87 | legend(120, 80, legend=c("Has iPhone", "No iPhone"),
88 | col=mycolours[c(2, 6)], pch = 16, cex=0.8)
89 | dev.off()
90 |
91 |
--------------------------------------------------------------------------------
/source/acknowledgments.Rmd:
--------------------------------------------------------------------------------
1 | # Acknowledgments {-}
2 |
3 | We'd like to thank everyone that has contributed to the development of
4 | [*Data Science: A First Introduction*](https://datasciencebook.ca).
5 | This is an open source textbook that began as a collection of course readings
6 | for DSCI 100, a new introductory data science course
7 | at the University of British Columbia (UBC).
8 | Several faculty members in the UBC Department of Statistics
9 | were pivotal in shaping the direction of that course,
10 | and as such, contributed greatly to the broad structure and
11 | list of topics in this book. We would especially like to thank Matías
12 | Salibían-Barrera for his mentorship during the initial development and roll-out
13 | of both DSCI 100 and this book. His door was always open when
14 | we needed to chat about how to best introduce and teach data science to our first-year students.
15 | We would also like to thank Gabriela Cohen Freue for her DSCI 561 (Regression I) teaching materials
16 | from the UBC Master of Data Science program, as some of our linear regression figures were inspired from these.
17 |
18 | We would also like to thank all those who contributed to the process of
19 | publishing this book. In particular, we would like to thank all of our reviewers for their feedback and suggestions:
20 | Rohan Alexander, Isabella Ghement, Virgilio Gómez Rubio, Albert Kim, Adam Loy, Maria Prokofieva, Emily Riederer, and Greg Wilson.
21 | The book was improved substantially by their insights.
22 | We would like to give special thanks to Jim Zidek
23 | for his support and encouragement throughout the process, and to
24 | Roger Peng for graciously offering to write the Foreword.
25 |
26 | Finally, we owe a debt of gratitude to all of the students of DSCI 100 over the past
27 | few years. They provided invaluable feedback on the book and worksheets;
28 | they found bugs for us (and stood by very patiently in class while
29 | we frantically fixed those bugs); and they brought a level of enthusiasm to the class
30 | that sustained us during the hard work of creating a new course and writing a textbook.
31 | Our interactions with them taught us how to teach data science, and that learning
32 | is reflected in the content of this book.
33 |
--------------------------------------------------------------------------------
/source/after_body.tex:
--------------------------------------------------------------------------------
1 | \backmatter
2 | \printindex
3 |
--------------------------------------------------------------------------------
/source/analytics.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
10 |
--------------------------------------------------------------------------------
/source/authors.Rmd:
--------------------------------------------------------------------------------
1 | # About the authors {-}
2 |
3 | **[Tiffany Timbers](https://tiffanytimbers.com/)** is an Associate Professor of Teaching in the Department of
4 | Statistics and Co-Director for the Master of Data Science program (Vancouver
5 | Option) at the University of British Columbia. In these roles she teaches and
6 | develops curriculum around the responsible application of Data Science to solve
7 | real-world problems. One of her favorite courses she teaches is a graduate
8 | course on collaborative software development, which focuses on teaching how to
9 | create R and Python packages using modern tools and workflows.
10 |
11 |
12 | **[Trevor Campbell](https://trevorcampbell.me)** is an Associate Professor in the Department of Statistics at
13 | the University of British Columbia. His research focuses on automated, scalable
14 | Bayesian inference algorithms, Bayesian nonparametrics, streaming data, and
15 | Bayesian theory. He was previously a postdoctoral associate advised by Tamara
16 | Broderick in the Computer Science and Artificial Intelligence Laboratory
17 | (CSAIL) and Institute for Data, Systems, and Society (IDSS) at MIT, a Ph.D.
18 | candidate under Jonathan How in the Laboratory for Information and Decision
19 | Systems (LIDS) at MIT, and before that he was in the Engineering Science
20 | program at the University of Toronto.
21 |
22 |
23 | **[Melissa Lee](https://www.stat.ubc.ca/users/melissa-lee)** is an Assistant Professor of Teaching in the Department of
24 | Statistics at the University of British Columbia. She teaches and develops
25 | curriculum for undergraduate statistics and data science courses. Her work
26 | focuses on student-centered approaches to teaching, developing and assessing
27 | open educational resources, and promoting equity, diversity, and inclusion
28 | initiatives.
29 |
--------------------------------------------------------------------------------
/source/before_body.tex:
--------------------------------------------------------------------------------
1 | % you may need to leave a few empty pages before the dedication page
2 |
3 | %\cleardoublepage\newpage\thispagestyle{empty}\null
4 | %\cleardoublepage\newpage\thispagestyle{empty}\null
5 | %\cleardoublepage\newpage
6 | \thispagestyle{empty}
7 |
8 | \begin{center}
9 | For my husband Curtis and daughter Rowan. Thank-you for your love
10 | \linebreak(and patience with my late night writing).
11 | \linebreak-- Tiffany
12 |
13 | To mom and dad: here's a book. Pretty neat, eh? Love you guys.
14 | \linebreak-- Trevor
15 |
16 | To mom and dad, thank you for all your love and support.
17 | \linebreak-- Melissa
18 | %\includegraphics{images/dedication.pdf}
19 | \end{center}
20 |
21 | \cleardoublepage\newpage\thispagestyle{empty}\null
22 |
23 | \setlength{\abovedisplayskip}{-5pt}
24 | \setlength{\abovedisplayshortskip}{-5pt}
25 |
--------------------------------------------------------------------------------
/source/foreword.Rmd:
--------------------------------------------------------------------------------
1 | # Foreword {-}
2 |
3 | *Roger D. Peng*
4 |
5 | *Johns Hopkins Bloomberg School of Public Health*
6 |
7 | *2022-01-04*
8 |
9 | The field of data science has expanded and grown significantly in recent years,
10 | attracting excitement and interest from many different directions. The demand for introductory
11 | educational materials has grown concurrently with the growth of the field itself, leading to
12 | a proliferation of textbooks, courses, blog posts, and tutorials. This book is an important
13 | contribution to this fast-growing literature, but given the wide availability of materials, a
14 | reader should be inclined to ask, "What is the unique contribution of *this* book?" In order
15 | to answer that question it is useful to step back for a moment and consider the development
16 | of the field of data science over the past few years.
17 |
18 | When thinking about data science, it is important to consider two questions: "What is
19 | data science?" and "How should one do data science?" The former question is under active
20 | discussion amongst a broad community of researchers and practitioners and there does
21 | not appear to be much consensus to date. However, there seems a general understanding
22 | that data science focuses on the more "active" elements—data wrangling, cleaning, and
23 | analysis—of answering questions with data. These elements are often highly
24 | problem-specific and may seem difficult to generalize across applications. Nevertheless, over time we
25 | have seen some core elements emerge that appear to repeat themselves as useful concepts
26 | across different problems. Given the lack of clear agreement over the definition of data
27 | science, there is a strong need for a book like this one to propose a vision for what the field
28 | is and what the implications are for the activities in which members of the field engage.
29 |
30 | The first important concept addressed by this book is tidy data, which is a format for
31 | tabular data formally introduced to the statistical community in a 2014 paper by Hadley
32 | Wickham. The tidy data organization strategy has proven a powerful abstract concept for
33 | conducting data analysis, in large part because of the vast toolchain implemented in the
34 | Tidyverse collection of R packages. The second key concept is the development of workflows
35 | for reproducible and auditable data analyses. Modern data analyses have only grown in
36 | complexity due to the availability of data and the ease with which we can implement complex
37 | data analysis procedures. Furthermore, these data analyses are often part of
38 | decision-making processes that may have significant impacts on people and communities. Therefore,
39 | there is a critical need to build reproducible analyses that can be studied and repeated by
40 | others in a reliable manner. Statistical methods clearly represent an important element
41 | of data science for building prediction and classification models and for making inferences
42 | about unobserved populations. Finally, because a field can succeed only if it fosters an
43 | active and collaborative community, it has become clear that being fluent in the tools of
44 | collaboration is a core element of data science.
45 |
46 | This book takes these core concepts and focuses on how one can apply them to *do* data
47 | science in a rigorous manner. Students who learn from this book will be well-versed in
48 | the techniques and principles behind producing reliable evidence from data. This book is
49 | centered around the use of the R programming language within the tidy data framework,
50 | and as such employs the most recent advances in data analysis coding. The use of Jupyter
51 | notebooks for exercises immediately places the student in an environment that encourages
52 | auditability and reproducibility of analyses. The integration of git and GitHub into the
53 | course is a key tool for teaching about collaboration and community, key concepts that are
54 | critical to data science.
55 |
56 | The demand for training in data science continues to increase. The availability of large
57 | quantities of data to answer a variety of questions, the computational power available to
58 | many more people than ever before, and the public awareness of the importance of data for
59 | decision-making have all contributed to the need for high-quality data science work. This
60 | book provides a sophisticated first introduction to the field of data science and provides
61 | a balanced mix of practical skills along with generalizable principles. As we continue to
62 | introduce students to data science and train them to confront an expanding array of data
63 | science problems, they will be well-served by the ideas presented here.
64 |
--------------------------------------------------------------------------------
/source/preamble.tex:
--------------------------------------------------------------------------------
1 | \usepackage{booktabs}
2 | \usepackage{longtable}
3 | \usepackage{float}
4 | \usepackage[bf,singlelinecheck=off]{caption}
5 | \usepackage[scale=.7]{sourcecodepro}
6 | \usepackage{url}
7 | \usepackage{fontawesome5}
8 |
9 | \usepackage{framed,color}
10 | \definecolor{shadecolor}{RGB}{248,248,248}
11 |
12 | \renewcommand{\textfraction}{0.05}
13 | \renewcommand{\topfraction}{0.8}
14 | \renewcommand{\bottomfraction}{0.8}
15 | \renewcommand{\floatpagefraction}{0.75}
16 |
17 | \renewenvironment{quote}{\begin{VF}}{\end{VF}}
18 |
19 |
20 | \IfFileExists{bookmark.sty}{\usepackage{bookmark}}{\usepackage{hyperref}}
21 | \let\oldhref\href
22 | \renewcommand{\href}[2]{#2\footnote{\url{#1}}}
23 |
24 | \makeatletter
25 | \newenvironment{kframe}{%
26 | \medskip{}
27 | \setlength{\fboxsep}{.8em}
28 | \def\at@end@of@kframe{}%
29 | \ifinner\ifhmode%
30 | \def\at@end@of@kframe{\end{minipage}}%
31 | \begin{minipage}{\columnwidth}%
32 | \fi\fi%
33 | \def\FrameCommand##1{\hskip\@totalleftmargin \hskip-\fboxsep
34 | \colorbox{shadecolor}{##1}\hskip-\fboxsep
35 | % There is no \\@totalrightmargin, so:
36 | \hskip-\linewidth \hskip-\@totalleftmargin \hskip\columnwidth}%
37 | \MakeFramed {\advance\hsize-\width
38 | \@totalleftmargin\z@ \linewidth\hsize
39 | \@setminipage}}%
40 | {\par\unskip\endMakeFramed%
41 | \at@end@of@kframe}
42 | \makeatother
43 |
44 | \renewenvironment{Shaded}{\begin{kframe}}{\end{kframe}}
45 |
46 | \usepackage{makeidx}
47 | \makeindex
48 |
49 | \urlstyle{tt}
50 |
51 | \usepackage{amsthm}
52 | \makeatletter
53 | \def\thm@space@setup{%
54 | \thm@preskip=8pt plus 2pt minus 4pt
55 | \thm@postskip=\thm@preskip
56 | }
57 | \makeatother
58 |
59 | \frontmatter
60 |
--------------------------------------------------------------------------------
/source/preface.Rmd:
--------------------------------------------------------------------------------
1 | # Preface {-}
2 |
3 | This textbook aims to be an approachable introduction to the world of data science.
4 | In this book, we define **data science** \index{data science!definition} as the process of generating
5 | insight from data through **reproducible** \index{reproducible} and **auditable** \index{auditable} processes.
6 | If you analyze some data and give your analysis to a friend or colleague, they should
7 | be able to re-run the analysis from start to finish and get the same result you did (*reproducibility*).
8 | They should also be able to see and understand all the steps in the analysis, as well as the history of how
9 | the analysis developed (*auditability*). Creating reproducible and auditable
10 | analyses allows both you and others to easily double-check and validate your work.
11 |
12 | At a high level, in this book, you will learn how to
13 |
14 | 1. identify common problems in data science, and
15 | 2. solve those problems with reproducible and auditable workflows.
16 |
17 | Figure \@ref(fig:img-chapter-overview) summarizes what you will learn in each chapter
18 | of this book.
19 | Throughout, you will learn how to use the R programming language [@Rlanguage] to perform
20 | all the tasks associated with data analysis. You will
21 | spend the first four chapters learning how to use R to load, clean, wrangle
22 | (i.e., restructure the data into a usable format) and visualize data
23 | while answering descriptive and exploratory data analysis questions. In the next
24 | six chapters, you will learn how to answer predictive, exploratory, and inferential
25 | data analysis questions with common methods in data science, including
26 | classification, regression, clustering, and estimation.
27 | In the final chapters
28 | (\@ref(jupyter)–\@ref(setup)),
29 | you will learn how to combine R code, formatted text, and images
30 | in a single coherent document with Jupyter, use version control for
31 | collaboration, and install and configure the software needed for data science
32 | on your own computer. If you are reading this book as part of a course that you are
33 | taking, the instructor may have set up all of these tools already for you; in this
34 | case, you can continue on through the book reading the chapters in order.
35 | But if you are reading this independently, you may want to jump to these last three chapters
36 | early before going on to make sure your computer is set up in such a way that you can
37 | try out the example code that we include throughout the book.
38 |
39 | ```{r img-chapter-overview, echo = FALSE, message = FALSE, warning = FALSE, fig.cap = "Where are we going?", out.width="100%", fig.retina = 2, fig.align = "center"}
40 | knitr::include_graphics("img/frontmatter/chapter_overview.png")
41 | ```
42 |
43 | Each chapter in the book has an accompanying worksheet that provides exercises
44 | to help you practice the concepts you will learn. We strongly recommend that you
45 | work through the worksheet when you finish reading each chapter
46 | before moving on to the next chapter. All of the worksheets
47 | are available at
48 | [https://worksheets.datasciencebook.ca](https://worksheets.datasciencebook.ca);
49 | the "Exercises" section at the end of each chapter points you to the right worksheet for that chapter.
50 | For each worksheet, you can either launch an interactive version of the worksheet in your browser by clicking the "launch binder" button,
51 | or preview a non-interactive version of the worksheet by clicking "view worksheet."
52 | If you instead decide to download the worksheet and run it on your own machine,
53 | make sure to follow the instructions for computer setup
54 | found in Chapter \@ref(setup). This will ensure that the automated feedback
55 | and guidance that the worksheets provide will function as intended.
56 |
--------------------------------------------------------------------------------
/source/references.Rmd:
--------------------------------------------------------------------------------
1 | `r if (knitr:::is_html_output()) '
2 | # References {-}
3 | '`
4 |
--------------------------------------------------------------------------------
/source/style.css:
--------------------------------------------------------------------------------
1 | p.caption {
2 | color: #777;
3 | margin-top: 10px;
4 | }
5 | p code {
6 | white-space: inherit;
7 | }
8 | pre {
9 | word-break: normal;
10 | word-wrap: normal;
11 | }
12 | pre code {
13 | white-space: inherit;
14 | }
15 |
16 | .book-header h1 {
17 | display: none;
18 | }
19 |
--------------------------------------------------------------------------------