├── .github ├── CODEOWNERS └── workflows │ ├── deploy_main_preview.yml │ ├── deploy_pr_preview.yml │ ├── update_book.yml │ └── update_build_environment.yml ├── .gitignore ├── Dockerfile ├── LICENSE.md ├── README.md ├── _bookdown.yml ├── _output.yml ├── build_html.sh ├── build_pdf.sh ├── data ├── can_lang.csv ├── can_lang.db ├── can_lang.tsv ├── can_lang.xlsx ├── can_lang_meta-data.csv ├── can_lang_no_names.tsv ├── canada_wiki.html ├── create_can_lang_variants.R ├── create_penguins_no_species.R ├── create_state_property_vote_variants.R ├── create_untidy_canlang.R ├── faithful.csv ├── historical_vote.csv ├── historical_vote_messy.csv ├── historical_vote_no_header.tsv ├── historical_vote_wide.csv ├── islands.csv ├── listings.csv ├── listings_original.csv ├── marketing.csv ├── mauna_loa.csv ├── mauna_loa_data.csv ├── michelson.csv ├── mtcars.csv ├── nasa.json ├── penguins.csv ├── penguins_all_vars.csv ├── region_data.csv ├── region_lang.csv ├── region_lang_top5_cities.csv ├── region_lang_top5_cities_long.csv ├── region_lang_top5_cities_messy.csv ├── region_lang_top5_cities_wide.csv ├── region_lang_with_nas.csv ├── retrieve_data.ipynb ├── sacramento.csv ├── state_property_vote.csv ├── state_property_vote.db ├── state_property_vote.tsv ├── state_property_vote.xlsx ├── state_property_vote_meta-data.csv ├── tweets.csv ├── us_vote.csv ├── wdbc.csv ├── wdbc_missing.csv └── wdbc_unscaled.csv ├── docker-compose.yml ├── img ├── classification1 │ └── plot3d_knn_classification.png ├── classification2 │ ├── ML-paradigm-test.ai │ ├── ML-paradigm-test.png │ ├── cv.ai │ ├── cv.png │ ├── train-test-overview.ai │ ├── train-test-overview.png │ ├── training_test.ai │ └── training_test.png ├── clustering │ └── gentoo.jpg ├── frontmatter │ ├── chapter_overview.ai │ ├── chapter_overview.png │ ├── ds-a-first-intro-cover.jpg │ └── ds-a-first-intro-graphic.jpg ├── inference │ ├── intro-bootstrap.jpeg │ ├── population_vs_sample.ai │ └── population_vs_sample.png ├── intro │ ├── arrange_function.png │ ├── canada_map.png │ ├── filter_function.png │ ├── ggplot_function.png │ ├── help-filter.png │ ├── intro-all.ai │ ├── read_csv_function.png │ ├── select_function.png │ └── spreadsheet_vs_dataframe.png ├── jupyter │ ├── activate-and-run-button-annotated.png │ ├── code-cell-not-run.png │ ├── code-cell-run.png │ ├── convert-to-markdown-cell.png │ ├── create-new-code-cell.png │ ├── jupyter.png │ ├── launcher-annotated.png │ ├── markdown-cell-not-run.png │ ├── markdown-cell-run.png │ ├── open_data_w_editor_01.png │ ├── open_data_w_editor_02.png │ ├── out-of-order-1.png │ ├── out-of-order-2.png │ ├── out-of-order-3.png │ └── restart-kernel-run-all.png ├── key_files │ ├── chapter_overview.key │ ├── data_frame_slides_cdn.key │ ├── dsci-100-slide-images.key │ ├── filesystem.key │ ├── ggplot_functions.key │ ├── git_intro.pptx │ ├── pivot_functions.key │ ├── png-vs-svg.pptx │ ├── ref_vs_tibble.key │ ├── spreadsheet_vs_dataframe.pptx │ ├── summarize.key │ ├── tidy_data.key │ └── tidydata_bootstrap_train_test_images.key ├── reading │ ├── NASA-API-Rho-Ophiuchi.png │ ├── NASA-API-limits.png │ ├── NASA-API-parameters.png │ ├── NASA-API-signup.png │ ├── craigslist_human.png │ ├── filesystem.ai │ ├── filesystem.png │ ├── ref_vs_tibble.001.jpeg │ ├── sg1.png │ ├── sg2.png │ ├── sg3.png │ ├── sg4.png │ └── website_source.txt ├── regression1 │ └── plot3d_knn_regression.png ├── regression2 │ └── plot3d_linear_regression.png ├── setup │ ├── docker-1.png │ ├── docker-2.png │ ├── docker-3.png │ ├── docker-4.png │ ├── jlab-1.png │ ├── jlab-2.png │ └── ubuntu-docker.png ├── unused │ ├── 1024px-Supervised_machine_learning_in_a_nutshell.svg │ ├── ML-paradigm.jpeg │ ├── ML-paradigm.png │ ├── Page_Under_Construction.png │ ├── README.md │ ├── Supervised_machine_learning_in_a_nutshell.svg.png │ ├── activate-and-run-button.png │ ├── add_collab_01.png │ ├── add_collab_02.png │ ├── add_collab_03.png │ ├── add_collab_04.png │ ├── add_collab_05.png │ ├── add_collab_06.png │ ├── add_collab_06_new.png │ ├── chapter_overview.001.jpeg │ ├── clone_01.png │ ├── clone_02.png │ ├── clone_03.png │ ├── clone_04.png │ ├── create-new-file_01.png │ ├── create-new-file_02.png │ ├── create-new-file_03.png │ ├── data_frame_slides_cdn.001.jpeg │ ├── data_frame_slides_cdn.002.jpeg │ ├── data_frame_slides_cdn.003.jpeg │ ├── data_frame_slides_cdn.006.jpeg │ ├── dataframe.jpeg │ ├── dsci-100-slide-images.001.jpeg │ ├── dsci-100-slide-images.002.jpeg │ ├── dsci-100-slide-images.004.jpeg │ ├── file-system-for-export-to-intro-datascience.svg │ ├── git_add_01.png │ ├── git_add_02.png │ ├── git_add_03.png │ ├── git_commit_01.png │ ├── git_commit_02.png │ ├── git_commit_02_new.png │ ├── git_commit_03.png │ ├── git_pull_00.png │ ├── git_pull_01.png │ ├── git_pull_02.png │ ├── git_pull_03.png │ ├── git_pull_04.png │ ├── git_push_01.png │ ├── git_push_02.png │ ├── git_push_03.png │ ├── git_push_04.png │ ├── git_push_05.png │ ├── git_push_05_new.png │ ├── issue_01.png │ ├── issue_02.png │ ├── issue_03.png │ ├── issue_04.png │ ├── issue_05.png │ ├── issue_05_new.png │ ├── issue_06.png │ ├── launcher.png │ ├── long_to_wide.jpeg │ ├── malignant_cancer.png │ ├── merge_conflict_01.png │ ├── merge_conflict_02.png │ ├── merge_conflict_02_new.png │ ├── merge_conflict_03.png │ ├── merge_conflict_04.png │ ├── merge_conflict_05.png │ ├── merge_conflict_06.png │ ├── new_repository_01.png │ ├── new_repository_02.png │ ├── new_repository_03.png │ ├── obs.jpeg │ ├── pen-tool_01.png │ ├── pen-tool_02.png │ ├── pen-tool_03.png │ ├── pivot_longer_with_table.jpeg │ ├── pivot_wider_with_table.jpeg │ ├── population_vs_sample.svg │ ├── prop_val_vs_income.png │ ├── prop_val_vs_income_by_party.png │ ├── prop_val_vs_income_human_labs.png │ ├── r.PNG │ ├── ref_vs_tibble.jpeg │ ├── sampling.001.jpeg │ ├── sampling.002.jpeg │ ├── spreadsheet.PNG │ ├── testing.png │ ├── text_cell_formatted.png │ ├── text_cell_unformatted.png │ ├── tidy.png │ ├── tidy_data.jpeg │ ├── timbits.jpg │ ├── training_validation.jpeg │ ├── upload-files_01.png │ ├── upload_files_02.png │ ├── vars.jpeg │ ├── vc-ba1-changes.png │ ├── vc3-add.png │ ├── vc4-commit.png │ ├── vc5-5-nachos-to-cheesecake.png │ ├── vec_vs_list.jpeg │ ├── vector.jpeg │ ├── vectors.jpeg │ ├── wide_to_long.jpeg │ └── wikipedia_human.png ├── version-control │ ├── add_collab_01.png │ ├── add_collab_02.png │ ├── add_collab_03.png │ ├── add_collab_04.png │ ├── add_collab_05.png │ ├── clone_01.png │ ├── clone_02.png │ ├── clone_03.png │ ├── clone_04.png │ ├── create-new-file_01.png │ ├── create-new-file_02.png │ ├── create-new-file_03.png │ ├── generate-pat_01.png │ ├── generate-pat_02.png │ ├── generate-pat_03.png │ ├── git_add_01.png │ ├── git_add_02.png │ ├── git_add_03.png │ ├── git_commit_01.png │ ├── git_commit_03.png │ ├── git_pull_00.png │ ├── git_pull_01.png │ ├── git_pull_02.png │ ├── git_pull_03.png │ ├── git_pull_04.png │ ├── git_push_01.png │ ├── git_push_02.png │ ├── git_push_03.png │ ├── git_push_04.png │ ├── issue_01.png │ ├── issue_02.png │ ├── issue_03.png │ ├── issue_04.png │ ├── issue_06.png │ ├── merge_conflict_01.png │ ├── merge_conflict_03.png │ ├── merge_conflict_04.png │ ├── merge_conflict_05.png │ ├── merge_conflict_06.png │ ├── new_repository_01.png │ ├── new_repository_02.png │ ├── new_repository_03.png │ ├── pen-tool_01.png │ ├── pen-tool_02.png │ ├── pen-tool_03.png │ ├── upload-files_01.png │ ├── upload-files_02.png │ ├── vc-ba2-add.png │ ├── vc-ba3-commit.png │ ├── vc1-no-changes.png │ ├── vc2-changes.png │ ├── vc5-push.png │ ├── vc6-remote-changes.png │ ├── vc7-pull.png │ └── version-control-all.ai ├── viz │ ├── faithful_plot.bmp │ ├── faithful_plot.jpg │ ├── faithful_plot.png │ ├── faithful_plot.svg │ ├── faithful_plot.tiff │ └── png-vs-svg.png └── wrangling │ ├── data_frame_slides_cdn.004.ai │ ├── data_frame_slides_cdn.004.png │ ├── data_frame_slides_cdn.005.ai │ ├── data_frame_slides_cdn.005.png │ ├── data_frame_slides_cdn.007.ai │ ├── data_frame_slides_cdn.007.png │ ├── data_frame_slides_cdn.008.ai │ ├── data_frame_slides_cdn.008.png │ ├── data_frame_slides_cdn.009.ai │ ├── data_frame_slides_cdn.009.png │ ├── mutate_function.png │ ├── pivot_functions.001.ai │ ├── pivot_functions.001.png │ ├── pivot_functions.002.ai │ ├── pivot_functions.002.png │ ├── pivot_functions.003.ai │ ├── pivot_functions.003.png │ ├── pivot_functions.004.ai │ ├── pivot_functions.004.png │ ├── pivot_longer.png │ ├── pivot_wider.png │ ├── separate_function.png │ ├── summarize.001.ai │ ├── summarize.001.png │ ├── summarize.002.ai │ ├── summarize.002.png │ ├── summarize.003.ai │ ├── summarize.003.png │ ├── summarize.004.ai │ ├── summarize.004.png │ ├── summarize.005.ai │ ├── summarize.005.png │ ├── tidy_data.001.ai │ ├── tidy_data.001.png │ └── wrangling-syntax-all.ai ├── index.Rmd ├── krantz.cls ├── scripts ├── intro_bootstrap_image.R └── population-sample.R └── source ├── acknowledgments.Rmd ├── after_body.tex ├── analytics.html ├── authors.Rmd ├── before_body.tex ├── classification1.Rmd ├── classification2.Rmd ├── clustering.Rmd ├── foreword.Rmd ├── inference.Rmd ├── intro.Rmd ├── jupyter.Rmd ├── preamble.tex ├── preface.Rmd ├── reading.Rmd ├── references.Rmd ├── references.bib ├── regression1.Rmd ├── regression2.Rmd ├── setup.Rmd ├── style.css ├── version-control.Rmd ├── viz.Rmd └── wrangling.Rmd /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | source/** @UBC-DSCI/dsci-100-codeowners 2 | -------------------------------------------------------------------------------- /.github/workflows/deploy_main_preview.yml: -------------------------------------------------------------------------------- 1 | name: Rebuild and deploy dev version of book to gh-pages branch in dev/ folder 2 | on: 3 | push: 4 | branches: 5 | - main 6 | paths: 7 | - 'index.Rmd' 8 | - '_bookdown.yml' 9 | - '_output.yml' 10 | - 'source/*.Rmd' 11 | - 'source/*.bib' 12 | - 'source/*.css' 13 | - 'data/**' 14 | - 'img/**' 15 | - 'build_html.sh' 16 | 17 | jobs: 18 | deploy-main-preview: 19 | runs-on: ubuntu-latest 20 | permissions: 21 | contents: write 22 | packages: write 23 | 24 | steps: 25 | - name: Get Actions user id 26 | id: get_uid 27 | run: | 28 | actions_user_id=`id -u $USER` 29 | echo $actions_user_id 30 | echo "uid=$actions_user_id" >> $GITHUB_OUTPUT 31 | 32 | - name: checkout 33 | uses: actions/checkout@v2 34 | with: 35 | ref: 'main' 36 | 37 | - name: Build the book 38 | run: | 39 | ./build_html.sh 40 | 41 | - name: Reset ownership of workspace after build 42 | uses: peter-murray/reset-workspace-ownership-action@v1 43 | with: 44 | user_id: ${{ steps.get_uid.outputs.uid }} 45 | 46 | # Push the book's HTML to github-pages 47 | - name: GitHub Pages action 48 | uses: peaceiris/actions-gh-pages@v4 49 | with: 50 | github_token: ${{ secrets.GITHUB_TOKEN }} 51 | publish_dir: docs/ 52 | keep_files: true 53 | destination_dir: dev 54 | # force_orphan: true # once peaceiris updates to v4, change this to true and keep_files: true for the PR / main branch deploy previews 55 | 56 | 57 | 58 | -------------------------------------------------------------------------------- /.github/workflows/deploy_pr_preview.yml: -------------------------------------------------------------------------------- 1 | name: "Rebuild and deploy PR version of book to gh-pages branch in pull###/ folder" 2 | on: 3 | pull_request: 4 | types: [opened, synchronize] 5 | paths: 6 | - 'index.Rmd' 7 | - '_bookdown.yml' 8 | - '_output.yml' 9 | - 'source/*.Rmd' 10 | - 'source/*.bib' 11 | - 'source/*.css' 12 | - 'data/**' 13 | - 'img/**' 14 | - 'Dockerfile' 15 | branches: 16 | - 'main' 17 | 18 | jobs: 19 | deploy-pr-preview: 20 | runs-on: ubuntu-latest 21 | permissions: 22 | contents: write 23 | packages: write 24 | pull-requests: write 25 | 26 | steps: 27 | - name: Wait for potential build environment update 28 | uses: fountainhead/action-wait-for-check@v1.1.0 29 | with: 30 | token: ${{ secrets.GITHUB_TOKEN }} 31 | checkName: "Rebuild docker image" 32 | ref: ${{ github.event.pull_request.head.sha }} 33 | timeoutSeconds: 60000 34 | 35 | - name: Get Actions user id 36 | id: get_uid 37 | run: | 38 | actions_user_id=`id -u $USER` 39 | echo $actions_user_id 40 | echo "uid=$actions_user_id" >> $GITHUB_OUTPUT 41 | 42 | - name: Checkout the repo 43 | uses: actions/checkout@v2 44 | with: 45 | fetch-depth: '0' 46 | ref: ${{ github.head_ref }} 47 | 48 | - name: Build the book 49 | run: | 50 | ./build_html.sh 51 | 52 | - name: Reset ownership of workspace after build 53 | uses: peter-murray/reset-workspace-ownership-action@v1 54 | with: 55 | user_id: ${{ steps.get_uid.outputs.uid }} 56 | 57 | # Push the book's HTML to github-pages 58 | - name: GitHub Pages action 59 | uses: peaceiris/actions-gh-pages@v4 60 | with: 61 | github_token: ${{ secrets.GITHUB_TOKEN }} 62 | publish_dir: docs/ 63 | keep_files: true 64 | destination_dir: pull${{ github.event.number }} 65 | # force_orphan: true # once peaceiris updates to v4, change this to true and keep_files: true for the PR / main branch deploy previews 66 | 67 | - name: Checkout the gh-pages branch 68 | uses: actions/checkout@v2 69 | with: 70 | fetch-depth: '0' 71 | ref: 'gh-pages' 72 | 73 | - name: Run website diff 74 | run: | 75 | rustup update 76 | pip install --upgrade pip 77 | pip install website_diff 78 | rm -rf diff${{ github.event.number }} 79 | website_diff --old dev --new pull${{ github.event.number }} --diff diff${{ github.event.number }} 80 | 81 | - name: GitHub Pages action to push diff 82 | uses: peaceiris/actions-gh-pages@v4 83 | with: 84 | github_token: ${{ secrets.GITHUB_TOKEN }} 85 | publish_dir: diff${{ github.event.number }} 86 | keep_files: true 87 | destination_dir: diff${{ github.event.number }} 88 | # force_orphan: true # once peaceiris updates to v4, change this to true and keep_files: true for the PR / main branch deploy previews 89 | 90 | - name: Post URLS to PR thread 91 | uses: mshick/add-pr-comment@v2.8.1 92 | with: 93 | message: | 94 | Hello! I've built a preview of your PR so that you can compare it to the current `main` branch. 95 | * PR deploy preview available [here](https://datasciencebook.ca/pull${{ github.event.number }}/index.html) 96 | * PR diff with `main` available [here](https://datasciencebook.ca/diff${{ github.event.number }}/index.html) 97 | * Current `main` deploy preview available [here](https://datasciencebook.ca/dev/index.html) 98 | * Public production build available [here](https://datasciencebook.ca) 99 | -------------------------------------------------------------------------------- /.github/workflows/update_book.yml: -------------------------------------------------------------------------------- 1 | name: Rebuild and deploy book to gh-pages branch 2 | on: 3 | push: 4 | branches: 5 | - production 6 | paths: 7 | - 'index.Rmd' 8 | - '_bookdown.yml' 9 | - '_output.yml' 10 | - 'source/*.Rmd' 11 | - 'source/*.bib' 12 | - 'source/*.css' 13 | - 'data/**' 14 | - 'img/**' 15 | - 'build_html.sh' 16 | 17 | jobs: 18 | deploy-book: 19 | runs-on: ubuntu-latest 20 | permissions: 21 | contents: write 22 | packages: write 23 | 24 | steps: 25 | - name: checkout gh-pages 26 | uses: actions/checkout@v2 27 | with: 28 | ref: 'gh-pages' 29 | 30 | - name: Clean the site contents except for dev, pull contents of dev/ to main site 31 | run: | 32 | # delete everything except the dev and .git folders 33 | find . -maxdepth 1 ! -name ".git" ! -name "dev" ! -name "." | xargs rm -rf 34 | # copy the contents of dev into the root 35 | cp -rf dev/* . 36 | 37 | # Push updated website, clean out old commits 38 | - name: Update website 39 | uses: peaceiris/actions-gh-pages@v4 40 | with: 41 | github_token: ${{ secrets.GITHUB_TOKEN }} 42 | publish_dir: ./ 43 | force_orphan: true 44 | cname: datasciencebook.ca 45 | -------------------------------------------------------------------------------- /.github/workflows/update_build_environment.yml: -------------------------------------------------------------------------------- 1 | name: Rebuild and publish new ubcdsci/intro-to-ds image on DockerHub 2 | on: 3 | pull_request: 4 | types: [opened, synchronize] 5 | branches: 6 | - 'main' 7 | jobs: 8 | rebuild-docker: 9 | name: Rebuild docker image 10 | runs-on: ubuntu-latest 11 | permissions: 12 | contents: write 13 | steps: 14 | - name: Checkout PR branch 15 | uses: actions/checkout@v3 16 | with: 17 | fetch-depth: '0' 18 | ref: ${{ github.head_ref }} 19 | - name: Check if Dockerfile needs to be rebuilt 20 | id: check-stale 21 | run: | 22 | echo "Checking if Dockerfile was modified since last commit on this PR" 23 | echo "GitHub PR action type: ${{ github.event.action }}" 24 | if [ "${{ github.event.action }}" == "opened" ]; then 25 | echo "GitHub base ref: ${{ github.event.pull_request.base.sha }}" 26 | echo "GitHub head ref: ${{ github.event.pull_request.head.sha }}" 27 | BEFORE=${{ github.event.pull_request.base.sha }} 28 | AFTER=${{ github.event.pull_request.head.sha }} 29 | else 30 | echo "GitHub event before: ${{ github.event.before }}" 31 | echo "GitHub event after: ${{ github.event.after }}" 32 | BEFORE=${{ github.event.before }} 33 | AFTER=${{ github.event.after }} 34 | fi 35 | if git diff --quiet $BEFORE $AFTER Dockerfile; then 36 | echo "PR synchronized, but Dockerfile was not edited. Not rebuilding the image." 37 | echo "stale_dockerfile=false" >> "$GITHUB_OUTPUT" 38 | else 39 | echo "PR synchronized, and Dockerfile was edited, so rebuilding the image." 40 | echo "stale_dockerfile=true" >> "$GITHUB_OUTPUT" 41 | fi 42 | - name: Rebuild and publish image 43 | if: ${{ steps.check-stale.outputs.stale_dockerfile == 'true' }} 44 | id: rebuild 45 | uses: elgohr/Publish-Docker-Github-Action@v5 46 | with: 47 | name: ubcdsci/intro-to-ds 48 | username: ${{ secrets.DOCKER_USERNAME }} 49 | password: ${{ secrets.DOCKER_PASSWORD }} 50 | dockerfile: Dockerfile 51 | snapshot: true 52 | - name: Update build_html.sh script 53 | if: ${{ steps.check-stale.outputs.stale_dockerfile == 'true' }} 54 | run: | 55 | git config --local user.email "action@github.com" 56 | git config --local user.name "GitHub Action" 57 | git pull origin ${{ github.head_ref }} 58 | sed 's/ubcdsci\/intro-to-ds:[[:alnum:]]\+/ubcdsci\/intro-to-ds:${{ steps.rebuild.outputs.snapshot-tag }}/g' build_html.sh > build_html.tmp && mv build_html.tmp build_html.sh 59 | chmod u+x build_html.sh 60 | git add build_html.sh 61 | git commit -m "update build_html.sh script with new docker image" 62 | - name: Update build_pdf.sh script 63 | if: ${{ steps.check-stale.outputs.stale_dockerfile == 'true' }} 64 | run: | 65 | git config --local user.email "action@github.com" 66 | git config --local user.name "GitHub Action" 67 | git pull origin ${{ github.head_ref }} 68 | sed 's/ubcdsci\/intro-to-ds:[[:alnum:]]\+/ubcdsci\/intro-to-ds:${{ steps.rebuild.outputs.snapshot-tag }}/g' build_pdf.sh > build_pdf.tmp && mv build_pdf.tmp build_pdf.sh 69 | chmod u+x build_pdf.sh 70 | git add build_pdf.sh 71 | git commit -m "update build_pdf.sh script with new docker image" 72 | - name: Update docker-compose.yml script 73 | if: ${{ steps.check-stale.outputs.stale_dockerfile == 'true' }} 74 | run: | 75 | git config --local user.email "action@github.com" 76 | git config --local user.name "GitHub Action" 77 | git pull origin ${{ github.head_ref }} 78 | sed 's/ubcdsci\/intro-to-ds:[[:alnum:]]\+/ubcdsci\/intro-to-ds:${{ steps.rebuild.outputs.snapshot-tag }}/g' docker-compose.yml > docker-compose.tmp && mv docker-compose.tmp docker-compose.yml 79 | git add docker-compose.yml 80 | git commit -m "update docker-compose.yml script with new docker image" 81 | - name: Push changes to build scripts 82 | if: ${{ steps.check-stale.outputs.stale_dockerfile == 'true' }} 83 | uses: ad-m/github-push-action@master 84 | with: 85 | github_token: ${{ secrets.GITHUB_TOKEN }} 86 | branch: ${{ github.head_ref }} 87 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .Ruserdata 5 | **.DS_Store 6 | *.sw* 7 | _bookdown_files 8 | **.ipynb_checkpoints 9 | .rstudio/** 10 | docs/** 11 | .local/** 12 | *.log 13 | _main.Rmd 14 | _main_files/** 15 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright (c) UBC-DSCI Development Team. 2 | FROM rocker/verse:4.3.1 3 | 4 | RUN apt-get update --fix-missing 5 | RUN Rscript -e "update.packages(ask = FALSE)" 6 | RUN install2.r --error magick \ 7 | cowplot \ 8 | kableExtra \ 9 | plotly \ 10 | tidymodels \ 11 | tidyclust \ 12 | kknn \ 13 | ggpubr \ 14 | ggforce \ 15 | themis \ 16 | egg \ 17 | fontawesome \ 18 | xfun \ 19 | tinytex \ 20 | reticulate \ 21 | rsvg 22 | 23 | RUN Rscript -e "devtools::install_github('ttimbers/canlang@0.0.1')" 24 | RUN Rscript -e "reticulate::install_miniconda()" 25 | RUN Rscript -e "reticulate::conda_install('r-reticulate', 'python-kaleido')" 26 | RUN Rscript -e "reticulate::conda_install('r-reticulate', 'plotly', channel = 'plotly')" 27 | # necessary for tuning number of clusters in Kmeans 28 | # see https://github.com/tidymodels/tidyclust/issues/127 29 | RUN apt install -y libgsl27 30 | 31 | # increase the ImageMagick resource limits 32 | # this relies on the fact that there is only one place where each of these sizes are used in policy.xml 33 | # (256MiB is for memory, 512MiB is for map, 1GiB is for disk) 34 | RUN sed -i 's/256MiB/8GiB/' /etc/ImageMagick-6/policy.xml 35 | RUN sed -i 's/512MiB/8GiB/' /etc/ImageMagick-6/policy.xml 36 | RUN sed -i 's/1GiB/8GiB/' /etc/ImageMagick-6/policy.xml 37 | 38 | ## install LaTeX packages 39 | RUN tlmgr install \ 40 | amsmath \ 41 | latex-amsmath-dev \ 42 | iftex \ 43 | euenc \ 44 | fontspec \ 45 | tipa \ 46 | unicode-math \ 47 | xunicode \ 48 | kvoptions \ 49 | ltxcmds \ 50 | kvsetkeys \ 51 | etoolbox \ 52 | xcolor \ 53 | fancyvrb \ 54 | framed \ 55 | booktabs \ 56 | mdwtools \ 57 | float \ 58 | caption \ 59 | sourcecodepro \ 60 | hyperref \ 61 | amscls \ 62 | multirow \ 63 | wrapfig \ 64 | colortbl \ 65 | pdflscape \ 66 | tabu \ 67 | varwidth \ 68 | threeparttable \ 69 | threeparttablex \ 70 | environ \ 71 | trimspaces \ 72 | ulem \ 73 | makecell \ 74 | natbib \ 75 | pdftexcmds \ 76 | infwarerr \ 77 | fontawesome5 78 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # License 2 | 3 | Copyright (c) 2020 Tiffany A. Timbers, Trevor Campbell, Melissa Lee 4 | 5 | This textbook (source contained in the [`introduction-to-datascience` repository](https://github.com/UBC-DSCI/introduction-to-datascience)) is made available under the **Attribution-NonCommercial-ShareAlike 4.0 International** ([CC BY-NC-SA 4.0](https://creativecommons.org/licenses/by-nc-sa/4.0/)). 6 | 7 | This is a human-readable summary of (and not a substitute for) the [license](https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode). 8 | 9 | ## You are free to: 10 | 11 | - **Share** — copy and redistribute the material in any medium or format 12 | - **Adapt** — remix, transform, and build upon the material 13 | 14 | The licensor cannot revoke these freedoms as long as you follow the license terms. 15 | 16 | ## Under the following terms: 17 | 18 | - **Attribution** — You must give appropriate credit (mentioning that your work is derived from work that is Copyright © Tiffany A. Timbers, Trevor Campbell, Melissa Lee and, where practical, linking to https://datasciencebook.ca/), provide a link to the license, and indicate if changes were made. You may do so in any reasonable manner, but not in any way that suggests the licensor endorses you or your use. 19 | - **NonCommercial** — You may not use the material for commercial purposes. 20 | - **ShareAlike** — If you remix, transform, or build upon the material, you must distribute your contributions under the same license as the original. 21 | 22 | **No additional restrictions** — You may not apply legal terms or technological measures that legally restrict others from doing anything the license permits. 23 | 24 | ## Notices: 25 | 26 | You do not have to comply with the license for elements of the material in the public domain or where your use is permitted by an applicable exception or limitation. 27 | 28 | No warranties are given. The license may not give you all of the permissions necessary for your intended use. For example, other rights such as publicity, privacy, or moral rights may limit how you use the material. 29 | 30 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Data Science: A First Introduction 2 | This is the source for the *Data Science: A First Introduction* textbook. 3 | 4 | The book is available online at: https://datasciencebook.ca/ 5 | 6 | © 2020 Tiffany A. Timbers, Trevor Campbell, Melissa Lee 7 | 8 | For the python version of the textbook, please visit https://python.datasciencebook.ca or the github repository at https://github.com/ubc-dsci/introduction-to-datascience-python. 9 | 10 | ## License Information 11 | 12 | This textbook is offered under 13 | the [Attribution-NonCommercial-ShareAlike 4.0 International (CC BY-NC-SA 4.0) License](https://creativecommons.org/licenses/by-nc-sa/4.0/). 14 | See [the license file](LICENSE.md) for more information. 15 | 16 | ## Development 17 | 18 | ### Setup 19 | 20 | Building the book requires Docker (instructors here: https://docs.docker.com/get-docker/) 21 | 22 | ### Build locally 23 | 24 | You must have at least 8GB of RAM (and ideally more like 16GB RAM) to build the book. 25 | 26 | You can build the HTML version of the book on your own machine by running 27 | ``` 28 | ./build_html.sh 29 | ``` 30 | in the root directory of this repository. The book can be viewed in your browser by opening the `docs/index.html` file. 31 | 32 | You can build the PDF version of the book on your own machine by running 33 | ``` 34 | ./build_pdf.sh 35 | ``` 36 | in the root directory of this repository. The book can be viewed in a PDF reader by opening `docs/_main.pdf`. 37 | 38 | #### Working with RStudio (HTML only) 39 | 40 | If you want to edit the source material and build the book using RStudio, navigate to the repository root and run 41 | ``` 42 | docker-compose up -d 43 | ``` 44 | to start up the docker container. Then open a web browser and type [http://localhost:8787/](http://localhost:8787/). 45 | For the username enter `rstudio`, and for the password enter `password`. 46 | At any point you can render the book by running the following R code in the R console: 47 | ``` 48 | bookdown::render_book('index.Rmd', 'bookdown::gitbook') 49 | ``` 50 | When you are done working, make sure to type `docker-compose down` to shut down the container. 51 | 52 | ### Contributing 53 | 54 | Primary development in this repository happens on the `main` branch. If you want to contribute to the book, 55 | please branch off of `main` and make a pull request into `main`. You cannot commit directly to `main`. 56 | 57 | The `production` branch contains the source material corresponding to the current publicly-viewable version of the book website. 58 | 59 | The `gh-pages` branch serves the current book website at https://datasciencebook.ca. 60 | 61 | ### Workflows 62 | 63 | #### Book deployment 64 | 65 | You can update the live, publicly viewable HTML book by making changes to the `source/` folder in the `production` branch (e.g. by merging `main` into `production`). 66 | GitHub will trigger a rebuild of the public HTML site, and store the built book in the root folder of the `gh-pages` branch. 67 | 68 | #### `main` deploy previews 69 | 70 | Any commit to `source/**` on the `main` branch (from a merged PR) will trigger a rebuild of the development preview site served at `https://datasciencebook.ca/dev`. 71 | The built preview book will be stored in the `dev/` folder on the `gh-pages` branch. 72 | 73 | #### PR deploy previews 74 | 75 | Any PR to `source/` will trigger a build of a PR preview site at `https://datasciencebook.ca/pull###`, where `###` is the number of the pull request. 76 | It will also trigger a build of the diff using [website_diff](https://github.com/trevorcampbell/website_diff) to the current `dev/` site at `https://datasciencebook.ca/diff###`. 77 | The built preview and diff books will be stored in the `pull###/` and `diff###/` folders on the `gh-pages` branch. 78 | 79 | #### Build environment updates 80 | 81 | Any PR to `Dockerfile` will trigger a rebuild of the docker image, push it to DockerHub, and update the image tags in the `build_html.sh` and `build_pdf.sh` scripts on the PR automatically. 82 | This new build environment will be used for the PR deploy preview mentioned above. 83 | 84 | ## Style Guide 85 | 86 | ### General 87 | - **80 character line limit!** This is necessary to make git diffs useful 88 | - numbers in text should be english words ("four common mistakes" not "4 common mistakes") unless there are units (40km, not forty km) 89 | - use Oxford commas ("a, b, and c" not "a, b and c") 90 | - "subset" should not be used as a verb 91 | - functions in text should not have parentheses (`read_csv` not `read_csv()`) 92 | - remove all references to "course" and "student"; replace with "reader" or "you" where necessary 93 | - make sure we have permission to use all external resources that we use 94 | - remove all references to "clicking on things" in the HTML version of the book (e.g. "click this link to ...") 95 | - When we introduce a new term, use `**bolding**` to typeset it (but only the first introduction of the term) 96 | - for symbols as part of the text, make sure you give them their full name and surround with parentheses so that they 97 | don't "disappear" in the rest of the text. So for example, if I have a `,` in the text, I should do 98 | something like "here is some text about the comma (`,`)". Or for `<-`, we should do "something like this assignment operator (`<-`)". 99 | There are likely exceptions to this rule though. 100 | - Book titles in the text should be typeset in italics (e.g. *R for Data Science*) 101 | 102 | ### Code blocks 103 | - Use the knitr label format `##-[name with only alphanumeric + hyphens]` where 104 | the `##` is the 2-digit chapter number, e.g. `03-test-name` for a label `test-name` in chapter 3 105 | - Make sure to get syntax highlighting by specifying the language in each code block: 106 |
107 |   ```r
108 |      code
109 |   ```
110 |   
111 | not 112 |
113 |   ```
114 |     code
115 |   ```
116 |   (similar for `html` where needed)
117 | - always use `|>` pipe, not `%>%`
118 | - anywhere we specify a grid of tuning values, don't just do `grid = 10`; actually specify the values using `seq` or `c(...)`
119 | - do not end code blocks with `head(dataframe)`; just use `dataframe` to print
120 | - `set.seed` once at the beginning of each chapter
121 | - use `"double quotes"` for strings, not `'single quotes'`
122 | - make sure all lines of code are at most 80 characters (for LaTeX PDF output typesetting)
123 | - pass code blocks through `styler` (although must obey the 80ch limit)
124 | - use `slice`, `slice_min`, `slice_max` (not `top_n`)
125 | - just `pull(colname)`, don't `select` first
126 | 
127 | ### Section headings
128 | - All (sub)section headings should be sentence case ("Loading a tabular data set", not "Loading a Tabular Data Set")
129 | - Make sure that subsections occur in 1-step hierarchies (no subsubsection directly below subsection, for example)
130 | - Make sure that `{-}` is used wherever unnumbered headings are required
131 | 
132 | Choose an appropriate table of contents depth via (example has depth 2 below, which is a good default)
133 | ```
134 | bookdown::gitbook:
135 |     toc_depth: 2
136 | ```
137 | 
138 | ### Learning objectives
139 | - when saying that students will do things in code, always say "in R"
140 | - "you will be able to" (not "students will be able to", "the reader will be able to")
141 | 
142 | ### Captions
143 | - captions should be sentence formatted and end with a period
144 | - If you have special characters (particularly underscores, quotation marks, plus signs, other LaTeX math symbols) make sure to separate
145 |   the caption out of the code chunk like so
146 |   ```
147 |   (ref:blah)
148 |   
149 |   \`\`\`
150 |   {r blah, other_options}
151 |   code here
152 |   \`\`\`
153 |   ```
154 | 
155 | ### Equations
156 | - make sure all equations get capitalized labels ("Equation \\@ref(blah)", not "equation below" or "equation above")
157 | 
158 | ### Figures
159 | - make sure all figures get (capitalized) labels ("Figure \\@ref(blah)", not "figure below" or "figure above")
160 | - make sure all figures get captions
161 | - specify image widths of pngs and jpegs in terms of linewidth percent 
162 | (e.g. `out.width="70%"`),
163 | for plots we create in R use `fig.width` and `fig.height`.
164 | - center align all images via `fig.align = "center"`
165 | - make sure we have permission for every figure/logo that we use
166 | - Make sure all figures follow the visualization principles in Chapter 4
167 | - Make sure axes are set appropriately to not inflate/deflate differences artificially *where it does not compromise clarity* (e.g. in the classification
168 |   chapter there are a few examples where zoomed-in accuracy axes are better than using the full range 0 to 1)
169 | - Fig size for bar charts should be: `fig.width=5, fig.height=3` (an exception are figs 1.7 & 1.8 so that we can read the axis labels)
170 | - cropping width for syntax diagrams is 1625 (done using `image_crop`)
171 | 
172 | ### Tables
173 | - make sure all tables get capitalized labels ("Table \\@ref(blah)", not "table below" or "table above")
174 | - make sure all tables get captions
175 | - make sure the row + column spacing is reasonable
176 | - Do not put links in table captions, it breaks pdf rendering
177 | - Do not put underscores in table captions, it breaks pdf rendering
178 | 
179 | ### Note boxes
180 | - note boxes should be typeset as quote boxes using `>` and start with **Note:**
181 | 
182 | ### Bibliography
183 | - do not put "et al" or "and others"; always use the full list of authors, BibTeX will choose how to abbreviate
184 | - read https://trevorcampbell.me/html/bibtex.html and make sure our bib follows this convention
185 | 
186 | ### Naming conventions
187 | - K-means (not $K$-\*, K means, Kmeans)
188 | - K-nearest neighbors (not $K$-\*, K nearest neighbors, K nearest neighbor, use US spelling neighbor not neighbour). Note that "K-nearest neighbor" is not the singular form; "K-nearest neighbors" is
189 | - K-NN (not $K$-\*, KNN, K NN, $K$NN, K-nn)
190 | - local repository (not local computer)
191 | - package (not library, meta package, meta-package)
192 | - data science (not Data Science)
193 | - data frame (not dataframe)
194 | - data set (not dataset)
195 | - scatter plot (not scatterplot)
196 | - bar plot (not bar chart)
197 | - capitalize all initialisms and acronyms (URL not url, API not api, K-NN not k-nn)
198 | - response variable (not target, output, label)
199 | - predictor variable (not explanatory, feature)
200 | - numerical variable (not quantitative variable)
201 | - categorical variable (not class variable)
202 | 
203 | ### Punctuation
204 | - emdashes should have no surrounding spaces. `This kind of typesetting—which is awesome—is correct!` and `Typesetting with spaces around em-dashes — which is bad — is not correct`
205 | - make sure `\index` commands don't break punctuation spacing. E.g. `This is an item \index{item}; it is good` will typeset with an erroneous space after item, i.e. `This is an item ; it is good`
206 | 
207 | ### Common typos to check for
208 | - RMPSE: should be RMSPE
209 | - boostrap: should be bootstrap
210 | 
211 | ### Use American spelling
212 | Generally the book uses American spelling. Some common British vs American and Canadian vs American gotchas:
213 | - o vs ou: neighbor and color (not neighbour and colour)
214 | - single vs double ell: labeling and labeled (not labelling and labelled)
215 | - z vs s: summarize (not summarise)
216 | - c vs s: defense (not defence)
217 | - er vs re: center (not centre)
218 | 
219 | ### Whitespace
220 | We need a line of whitespace before and after code fences (code surrounded by three backticks above and below). This is for readability, 
221 | and it is essential for figure captions.
222 | 
223 | ### PDF Output
224 | These are absolute last steps when rendering the PDF output:
225 | - Look for and fix bad line breaks (e.g. with only one word on the next line, orphans, and widows)
226 | - Look for and fix bad line wraps in code and text
227 | - Look for and fix bad figure placement (falling off page, going over the side)
228 | - Look for and fix large whitespace sections where LaTeX doesn't want to break the next paragraph (usually `\allowdisplaybreaks` helps)
229 | - Fix incorrect indenting. LaTeX will indent for a new paragraph if there is an extra whitespace line, so these should be deleted if no paragraph break is desired.
230 | - Look for `??` in the PDF (broken refs)
231 | - Look in the index for near-duplicates, and merge if needed
232 | - Look for / fix raw LaTeX code (search for backslash and curly brace in the final PDF)
233 | - Make sure the 3D figures (and the text around them that refers to clicking and dragging) are properly modified for the PDF output
234 | - Make sure all markdown label-replaced URLs (of the form `[blah](url)`) will make 
235 |   sense in the hardcopy book version (i.e. nothing like "click this"). Many links appear in the additional resources: make sure the 
236 |   text-replacement of the URL contains enough information for someone to find the resource (without being able to click the link)
237 | 
238 | ### HTML Output
239 | - Look for broken references (I *think* these end up as `??`)
240 | - Look for uncentered images
241 | 


--------------------------------------------------------------------------------
/_bookdown.yml:
--------------------------------------------------------------------------------
1 | output_dir: "docs"
2 | delete_merged_file: true
3 | language:
4 |   ui:
5 |     edit: "Edit"
6 |     chapter_name: "Chapter "
7 | rmd_files: ["index.Rmd", "source/foreword.Rmd", "source/preface.Rmd", "source/acknowledgments.Rmd", "source/authors.Rmd", "source/intro.Rmd", "source/reading.Rmd", "source/wrangling.Rmd", "source/viz.Rmd", "source/classification1.Rmd", "source/classification2.Rmd", "source/regression1.Rmd", "source/regression2.Rmd", "source/clustering.Rmd", "source/inference.Rmd", "source/jupyter.Rmd", "source/version-control.Rmd", "source/setup.Rmd", "source/references.Rmd"]
8 | 


--------------------------------------------------------------------------------
/_output.yml:
--------------------------------------------------------------------------------
 1 | bookdown::gitbook:
 2 |   css: source/style.css
 3 |   config:
 4 |     toc:
 5 |       before: |
 6 |         
  • Data Science: A First Introduction
  • 7 | edit: null 8 | download: null 9 | pandoc_args: ["--verbose", "--bibliography=source/references.bib", "--citeproc"] 10 | includes: 11 | in_header: source/analytics.html 12 | 13 | bookdown::epub_book: default 14 | bookdown::pdf_book: 15 | includes: 16 | in_header: source/preamble.tex 17 | before_body: source/before_body.tex 18 | after_body: source/after_body.tex 19 | keep_tex: true 20 | dev: "cairo_pdf" 21 | latex_engine: xelatex 22 | citation_package: natbib 23 | template: null 24 | pandoc_args: ["--top-level-division=chapter", "--verbose", "--bibliography=source/references.bib"] 25 | toc_depth: 3 26 | toc_unnumbered: false 27 | toc_appendix: true 28 | quote_footer: ["\\VA{", "}{}"] 29 | highlight_bw: true 30 | -------------------------------------------------------------------------------- /build_html.sh: -------------------------------------------------------------------------------- 1 | # Script to generate HTML book 2 | docker run --rm -m 8g -v $(pwd):/home/rstudio/introduction-to-datascience ubcdsci/intro-to-ds:202307130106229dd1c2 /bin/bash -c "cd /home/rstudio/introduction-to-datascience; Rscript -e 'bookdown::render_book(\"index.Rmd\", output_format=\"bookdown::gitbook\"); warnings(); problems()'" 3 | 4 | -------------------------------------------------------------------------------- /build_pdf.sh: -------------------------------------------------------------------------------- 1 | # Script to generate PDF book 2 | 3 | # backup original index.Rmd 4 | cp index.Rmd index_backup.Rmd 5 | 6 | # bookdown does weird things with the root .html filename if either (1) index.rmd is not in the root dir or (2) index.rmd does not contain a heading 7 | # we need the root file to be named index.html, so we need it to be in the root dir and contain a heading. But the PDF version doesn't need the welcome 8 | # page, which is the first heading. So we manually extract that text prior to building. This is a bit painful, but it works... 9 | sed -n -i "/# Welcome/q;p" index.Rmd 10 | 11 | # need to also remove the cover image from the PDF version 12 | sed -i "/graphic\.jpg/d" index.Rmd 13 | 14 | ## Build the book with bookdown 15 | docker run --rm -m 8g -v $(pwd):/home/rstudio/introduction-to-datascience ubcdsci/intro-to-ds:202307130106229dd1c2 /bin/bash -c "cd /home/rstudio/introduction-to-datascience; Rscript -e 'bookdown::render_book(\"index.Rmd\", \"bookdown::pdf_book\"); warnings(); problems()'" 16 | 17 | # restore the backed up full index.Rmd 18 | mv index_backup.Rmd index.Rmd 19 | 20 | 21 | -------------------------------------------------------------------------------- /data/can_lang.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/data/can_lang.db -------------------------------------------------------------------------------- /data/can_lang.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/data/can_lang.xlsx -------------------------------------------------------------------------------- /data/can_lang_no_names.tsv: -------------------------------------------------------------------------------- 1 | Aboriginal languages Aboriginal languages, n.o.s. 590 235 30 665 2 | Non-Official & Non-Aboriginal languages Afrikaans 10260 4785 85 23415 3 | Non-Official & Non-Aboriginal languages Afro-Asiatic languages, n.i.e. 1150 445 10 2775 4 | Non-Official & Non-Aboriginal languages Akan (Twi) 13460 5985 25 22150 5 | Non-Official & Non-Aboriginal languages Albanian 26895 13135 345 31930 6 | Aboriginal languages Algonquian languages, n.i.e. 45 10 0 120 7 | Aboriginal languages Algonquin 1260 370 40 2480 8 | Non-Official & Non-Aboriginal languages American Sign Language 2685 3020 1145 21930 9 | Non-Official & Non-Aboriginal languages Amharic 22465 12785 200 33670 10 | Non-Official & Non-Aboriginal languages Arabic 419890 223535 5585 629055 11 | Non-Official & Non-Aboriginal languages Armenian 33460 21510 450 41295 12 | Non-Official & Non-Aboriginal languages Assyrian Neo-Aramaic 16070 10510 205 19740 13 | Aboriginal languages Athabaskan languages, n.i.e. 50 10 0 85 14 | Aboriginal languages Atikamekw 6150 5465 1100 6645 15 | Non-Official & Non-Aboriginal languages Austro-Asiatic languages, n.i.e 170 80 0 190 16 | Non-Official & Non-Aboriginal languages Austronesian languages, n.i.e. 4195 1160 35 5585 17 | Non-Official & Non-Aboriginal languages Azerbaijani 3255 1245 25 5455 18 | Aboriginal languages Babine (Wetsuwet'en) 110 20 10 210 19 | Non-Official & Non-Aboriginal languages Bamanankan 1535 345 0 3190 20 | Aboriginal languages Beaver 190 50 0 340 21 | Non-Official & Non-Aboriginal languages Belarusan 810 225 0 2265 22 | Non-Official & Non-Aboriginal languages Bengali 73125 47350 525 91220 23 | Non-Official & Non-Aboriginal languages Berber languages, n.i.e. 8985 2615 15 12510 24 | Non-Official & Non-Aboriginal languages Bikol 1785 290 0 2075 25 | Non-Official & Non-Aboriginal languages Bilen 805 615 15 1085 26 | Aboriginal languages Blackfoot 2815 1110 85 5645 27 | Non-Official & Non-Aboriginal languages Bosnian 12215 6045 155 18265 28 | Non-Official & Non-Aboriginal languages Bulgarian 20020 11985 200 22425 29 | Non-Official & Non-Aboriginal languages Burmese 3585 2245 75 4995 30 | Non-Official & Non-Aboriginal languages Cantonese 565270 400220 58820 699125 31 | Aboriginal languages Carrier 1025 250 15 2100 32 | Non-Official & Non-Aboriginal languages Catalan 870 350 30 2035 33 | Aboriginal languages Cayuga 45 10 10 125 34 | Non-Official & Non-Aboriginal languages Cebuano 19890 7205 70 27040 35 | Non-Official & Non-Aboriginal languages Celtic languages, n.i.e. 525 80 10 3595 36 | Non-Official & Non-Aboriginal languages Chaldean Neo-Aramaic 5545 3445 35 7115 37 | Aboriginal languages Chilcotin 655 255 15 1150 38 | Non-Official & Non-Aboriginal languages Chinese languages, n.i.e. 615 280 0 590 39 | Non-Official & Non-Aboriginal languages Chinese, n.o.s. 38580 23940 2935 41685 40 | Aboriginal languages Comox 85 0 0 185 41 | Aboriginal languages Cree, n.o.s. 64050 37950 7800 86115 42 | Non-Official & Non-Aboriginal languages Creole languages, n.i.e. 4985 2005 15 16635 43 | Non-Official & Non-Aboriginal languages Creole, n.o.s. 64110 24570 310 133045 44 | Non-Official & Non-Aboriginal languages Croatian 48200 16775 220 69835 45 | Non-Official & Non-Aboriginal languages Cushitic languages, n.i.e. 365 180 0 480 46 | Non-Official & Non-Aboriginal languages Czech 22295 6235 70 28725 47 | Aboriginal languages Dakota 1210 255 20 1760 48 | Non-Official & Non-Aboriginal languages Danish 12630 855 85 15750 49 | Aboriginal languages Dene 10700 7710 770 13060 50 | Non-Official & Non-Aboriginal languages Dinka 2120 1130 0 2475 51 | Aboriginal languages Dogrib (Tlicho) 1650 1020 165 2375 52 | Non-Official & Non-Aboriginal languages Dravidian languages, n.i.e. 490 190 0 790 53 | Non-Official & Non-Aboriginal languages Dutch 99015 9565 1165 120870 54 | Non-Official & Non-Aboriginal languages Edo 1670 410 0 3220 55 | Official languages English 19460850 22162865 15265335 29748265 56 | Non-Official & Non-Aboriginal languages Estonian 5445 975 55 6070 57 | Non-Official & Non-Aboriginal languages Ewe 1760 405 10 3000 58 | Non-Official & Non-Aboriginal languages Fijian 745 195 0 1665 59 | Non-Official & Non-Aboriginal languages Finnish 15295 2790 105 17590 60 | Official languages French 7166700 6943800 3825215 10242945 61 | Non-Official & Non-Aboriginal languages Frisian 2100 185 40 2910 62 | Non-Official & Non-Aboriginal languages Fulah (Pular, Pulaar, Fulfulde) 2825 825 0 4725 63 | Non-Official & Non-Aboriginal languages Ga 920 250 0 2250 64 | Non-Official & Non-Aboriginal languages Ganda 1295 345 25 2495 65 | Non-Official & Non-Aboriginal languages Georgian 1710 1040 25 2150 66 | Non-Official & Non-Aboriginal languages German 384040 120335 10065 502735 67 | Non-Official & Non-Aboriginal languages Germanic languages, n.i.e. 525 1630 725 8705 68 | Aboriginal languages Gitxsan (Gitksan) 880 315 10 1305 69 | Non-Official & Non-Aboriginal languages Greek 106525 44550 1020 150965 70 | Non-Official & Non-Aboriginal languages Gujarati 108780 64150 885 149045 71 | Aboriginal languages Gwich'in 255 50 10 360 72 | Aboriginal languages Haida 80 10 0 465 73 | Aboriginal languages Haisla 90 20 0 175 74 | Non-Official & Non-Aboriginal languages Haitian Creole 3030 1280 25 6855 75 | Non-Official & Non-Aboriginal languages Hakka 10910 4085 70 12445 76 | Aboriginal languages Halkomelem 480 50 20 1060 77 | Non-Official & Non-Aboriginal languages Harari 1320 735 0 1715 78 | Non-Official & Non-Aboriginal languages Hebrew 19530 8560 825 75020 79 | Aboriginal languages Heiltsuk 100 5 10 125 80 | Non-Official & Non-Aboriginal languages Hiligaynon 6880 2210 25 7925 81 | Non-Official & Non-Aboriginal languages Hindi 110645 55510 1405 433365 82 | Non-Official & Non-Aboriginal languages Hmong-Mien languages 795 335 10 870 83 | Non-Official & Non-Aboriginal languages Hungarian 61235 19480 440 71285 84 | Non-Official & Non-Aboriginal languages Icelandic 1285 270 0 1780 85 | Non-Official & Non-Aboriginal languages Igbo 4235 1000 10 8855 86 | Non-Official & Non-Aboriginal languages Ilocano 26345 9125 110 34530 87 | Non-Official & Non-Aboriginal languages Indo-Iranian languages, n.i.e. 5185 2380 20 8870 88 | Aboriginal languages Inuinnaqtun (Inuvialuktun) 1020 165 30 1975 89 | Aboriginal languages Inuit languages, n.i.e. 310 90 15 470 90 | Aboriginal languages Inuktitut 35210 29230 8795 40620 91 | Aboriginal languages Iroquoian languages, n.i.e. 35 5 0 115 92 | Non-Official & Non-Aboriginal languages Italian 375635 115415 1705 574725 93 | Non-Official & Non-Aboriginal languages Italic (Romance) languages, n.i.e. 720 175 25 2680 94 | Non-Official & Non-Aboriginal languages Japanese 43640 19785 3255 83095 95 | Non-Official & Non-Aboriginal languages Kabyle 13150 5490 15 17120 96 | Non-Official & Non-Aboriginal languages Kannada 3970 1630 10 8245 97 | Non-Official & Non-Aboriginal languages Karenic languages 4705 3860 135 4895 98 | Non-Official & Non-Aboriginal languages Kashmiri 565 135 0 905 99 | Aboriginal languages Kaska (Nahani) 180 20 10 365 100 | Non-Official & Non-Aboriginal languages Khmer (Cambodian) 20130 10885 475 27035 101 | Non-Official & Non-Aboriginal languages Kinyarwanda (Rwanda) 5250 1530 25 7860 102 | Non-Official & Non-Aboriginal languages Konkani 3330 720 10 6790 103 | Non-Official & Non-Aboriginal languages Korean 153425 109705 12150 172750 104 | Non-Official & Non-Aboriginal languages Kurdish 11705 6580 185 15290 105 | Aboriginal languages Kutenai 110 10 0 170 106 | Aboriginal languages Kwakiutl (Kwak'wala) 325 25 15 605 107 | Non-Official & Non-Aboriginal languages Lao 12670 6175 150 17235 108 | Non-Official & Non-Aboriginal languages Latvian 5450 1255 35 6500 109 | Aboriginal languages Lillooet 315 25 15 790 110 | Non-Official & Non-Aboriginal languages Lingala 3805 1045 10 17010 111 | Non-Official & Non-Aboriginal languages Lithuanian 7075 2015 60 8185 112 | Non-Official & Non-Aboriginal languages Macedonian 16770 6830 95 23075 113 | Non-Official & Non-Aboriginal languages Malagasy 1430 430 0 2340 114 | Non-Official & Non-Aboriginal languages Malay 12275 3625 140 22470 115 | Non-Official & Non-Aboriginal languages Malayalam 28565 15440 95 37810 116 | Aboriginal languages Malecite 300 55 10 760 117 | Non-Official & Non-Aboriginal languages Maltese 5565 1125 25 7625 118 | Non-Official & Non-Aboriginal languages Mandarin 592040 462890 60090 814450 119 | Non-Official & Non-Aboriginal languages Marathi 8295 3780 30 15565 120 | Aboriginal languages Mi'kmaq 6690 3565 915 9025 121 | Aboriginal languages Michif 465 80 10 1210 122 | Non-Official & Non-Aboriginal languages Min Dong 1230 345 30 1045 123 | Non-Official & Non-Aboriginal languages Min Nan (Chaochow, Teochow, Fukien, Taiwanese) 31800 13965 565 42840 124 | Aboriginal languages Mohawk 985 255 30 2415 125 | Non-Official & Non-Aboriginal languages Mongolian 1575 905 10 2095 126 | Aboriginal languages Montagnais (Innu) 10235 8585 2055 11445 127 | Aboriginal languages Moose Cree 105 10 0 195 128 | Aboriginal languages Naskapi 1205 1195 370 1465 129 | Non-Official & Non-Aboriginal languages Nepali 18275 13375 195 21385 130 | Non-Official & Non-Aboriginal languages Niger-Congo languages, n.i.e. 19135 4010 30 40760 131 | Non-Official & Non-Aboriginal languages Nilo-Saharan languages, n.i.e. 3750 1520 0 4550 132 | Aboriginal languages Nisga'a 400 75 10 1055 133 | Aboriginal languages North Slavey (Hare) 765 340 95 1005 134 | Aboriginal languages Northern East Cree 315 110 35 550 135 | Aboriginal languages Northern Tutchone 220 30 0 280 136 | Non-Official & Non-Aboriginal languages Norwegian 4615 350 70 8120 137 | Aboriginal languages Nuu-chah-nulth (Nootka) 280 30 10 560 138 | Aboriginal languages Oji-Cree 12855 7905 1080 15605 139 | Aboriginal languages Ojibway 17885 6175 765 28580 140 | Aboriginal languages Okanagan 275 80 20 820 141 | Aboriginal languages Oneida 60 15 0 185 142 | Non-Official & Non-Aboriginal languages Oriya (Odia) 1055 475 0 1530 143 | Non-Official & Non-Aboriginal languages Oromo 4960 3410 45 6245 144 | Non-Official & Non-Aboriginal languages Other languages, n.i.e. 3685 1110 80 9730 145 | Aboriginal languages Ottawa (Odawa) 150 75 0 205 146 | Non-Official & Non-Aboriginal languages Pampangan (Kapampangan, Pampango) 4045 1200 10 5425 147 | Non-Official & Non-Aboriginal languages Pangasinan 1390 240 0 1800 148 | Non-Official & Non-Aboriginal languages Pashto 16905 10590 50 23180 149 | Non-Official & Non-Aboriginal languages Persian (Farsi) 214200 143025 4580 252325 150 | Aboriginal languages Plains Cree 3065 1345 95 5905 151 | Non-Official & Non-Aboriginal languages Polish 181710 74780 2495 214965 152 | Non-Official & Non-Aboriginal languages Portuguese 221535 98710 7485 295955 153 | Non-Official & Non-Aboriginal languages Punjabi (Panjabi) 501680 349140 27865 668240 154 | Non-Official & Non-Aboriginal languages Quebec Sign Language 695 730 130 4665 155 | Non-Official & Non-Aboriginal languages Romanian 96660 53325 745 115050 156 | Non-Official & Non-Aboriginal languages Rundi (Kirundi) 5850 2110 0 8590 157 | Non-Official & Non-Aboriginal languages Russian 188255 116595 4855 269645 158 | Aboriginal languages Salish languages, n.i.e. 260 25 0 560 159 | Aboriginal languages Sarsi (Sarcee) 80 10 0 145 160 | Non-Official & Non-Aboriginal languages Scottish Gaelic 1090 190 15 3980 161 | Aboriginal languages Sekani 85 15 0 185 162 | Non-Official & Non-Aboriginal languages Semitic languages, n.i.e. 2150 1205 65 3220 163 | Non-Official & Non-Aboriginal languages Serbian 57350 31750 530 73780 164 | Non-Official & Non-Aboriginal languages Serbo-Croatian 9550 3890 30 11275 165 | Non-Official & Non-Aboriginal languages Shona 3185 1035 0 5430 166 | Aboriginal languages Shuswap (Secwepemctsin) 445 50 35 1305 167 | Non-Official & Non-Aboriginal languages Sign languages, n.i.e 4125 6690 645 22280 168 | Non-Official & Non-Aboriginal languages Sindhi 11860 4975 35 20260 169 | Non-Official & Non-Aboriginal languages Sinhala (Sinhalese) 16335 7790 40 27825 170 | Aboriginal languages Siouan languages, n.i.e. 55 20 0 140 171 | Aboriginal languages Slavey, n.o.s. 280 105 10 675 172 | Non-Official & Non-Aboriginal languages Slavic languages, n.i.e. 2420 670 10 2995 173 | Non-Official & Non-Aboriginal languages Slovak 17580 5610 100 21470 174 | Non-Official & Non-Aboriginal languages Slovene (Slovenian) 9785 2055 15 11490 175 | Non-Official & Non-Aboriginal languages Somali 36755 22895 220 49660 176 | Aboriginal languages South Slavey 945 370 35 1365 177 | Aboriginal languages Southern East Cree 45 15 0 40 178 | Aboriginal languages Southern Tutchone 70 5 0 145 179 | Non-Official & Non-Aboriginal languages Spanish 458850 263505 13030 995260 180 | Aboriginal languages Squamish 40 5 10 285 181 | Aboriginal languages Stoney 3025 1950 240 3675 182 | Aboriginal languages Straits 80 25 15 365 183 | Non-Official & Non-Aboriginal languages Swahili 13370 5370 80 38685 184 | Aboriginal languages Swampy Cree 1440 330 10 2350 185 | Non-Official & Non-Aboriginal languages Swedish 6840 1050 125 14140 186 | Non-Official & Non-Aboriginal languages Tagalog (Pilipino, Filipino) 431385 213790 3450 612735 187 | Aboriginal languages Tahltan 95 5 0 265 188 | Non-Official & Non-Aboriginal languages Tai-Kadai languages, n.i.e 85 30 0 115 189 | Non-Official & Non-Aboriginal languages Tamil 140720 96955 2085 189860 190 | Non-Official & Non-Aboriginal languages Telugu 15660 8280 40 23165 191 | Non-Official & Non-Aboriginal languages Thai 9255 3365 525 15395 192 | Aboriginal languages Thompson (Ntlakapamux) 335 20 0 450 193 | Non-Official & Non-Aboriginal languages Tibetan 6160 4590 50 7050 194 | Non-Official & Non-Aboriginal languages Tibeto-Burman languages, n.i.e. 1405 655 15 2380 195 | Non-Official & Non-Aboriginal languages Tigrigna 16645 10205 130 21340 196 | Aboriginal languages Tlingit 95 0 10 260 197 | Aboriginal languages Tsimshian 200 30 10 410 198 | Non-Official & Non-Aboriginal languages Turkic languages, n.i.e. 1315 455 10 1875 199 | Non-Official & Non-Aboriginal languages Turkish 32815 18955 690 50770 200 | Non-Official & Non-Aboriginal languages Ukrainian 102485 28250 1210 132115 201 | Non-Official & Non-Aboriginal languages Uralic languages, n.i.e. 10 5 0 25 202 | Non-Official & Non-Aboriginal languages Urdu 210815 128785 1495 322220 203 | Non-Official & Non-Aboriginal languages Uyghur 1035 610 20 1390 204 | Non-Official & Non-Aboriginal languages Uzbek 1720 995 15 2465 205 | Non-Official & Non-Aboriginal languages Vietnamese 156430 104245 8075 198895 206 | Non-Official & Non-Aboriginal languages Vlaams (Flemish) 3895 355 35 4400 207 | Aboriginal languages Wakashan languages, n.i.e. 10 0 0 25 208 | Non-Official & Non-Aboriginal languages Waray-Waray 1110 310 0 1395 209 | Non-Official & Non-Aboriginal languages Welsh 1075 95 0 1695 210 | Non-Official & Non-Aboriginal languages Wolof 3990 1385 10 8240 211 | Aboriginal languages Woods Cree 1840 800 75 2665 212 | Non-Official & Non-Aboriginal languages Wu (Shanghainese) 12915 7650 105 16530 213 | Non-Official & Non-Aboriginal languages Yiddish 13555 7085 895 20985 214 | Non-Official & Non-Aboriginal languages Yoruba 9080 2615 15 22415 215 | -------------------------------------------------------------------------------- /data/create_can_lang_variants.R: -------------------------------------------------------------------------------- 1 | ## Script to create variants of a plain vanilla .csv file 2 | library(tidyverse) 3 | library(openxlsx) 4 | library(RSQLite) 5 | 6 | devtools::install_github("ttimbers/canlang") 7 | library(canlang) 8 | write_csv(can_lang, "can_lang.csv") 9 | 10 | main <- function(){ 11 | path <- "can_lang.csv" 12 | data <- read_csv(path) 13 | path_no_prefix <- sub(".{4}$", "", path) 14 | 15 | # file with some meta data at the top 16 | meta_data <- data.frame(metadata = c("Data source: https://ttimbers.github.io/canlang/", 17 | "Data originally published in: Statistics Canada Census of Population 2016.", 18 | "Reproduced and distributed on an as is basis with the permission of Statistics Canada.")) 19 | write.table(meta_data, 20 | file = paste0(path_no_prefix, "_meta-data.csv"), 21 | sep = ",", 22 | col.names = FALSE, 23 | row.names = FALSE, 24 | quote = FALSE) 25 | write_csv(data, 26 | path = paste0(path_no_prefix, "_meta-data.csv"), 27 | append = TRUE, 28 | col_names = TRUE) 29 | 30 | # file with no column names and tab delimiters 31 | write_delim(data, 32 | path = paste0(path_no_prefix, ".tsv"), 33 | delim = "\t", 34 | col_names = FALSE) 35 | 36 | # excel file 37 | write.xlsx(data, file = paste0(path_no_prefix, ".xlsx")) 38 | 39 | # write to sqlite 40 | con <- dbConnect(RSQLite::SQLite(), paste0(path_no_prefix, ".db")) 41 | dbWriteTable(con, "lang", data, overwrite = TRUE) 42 | dbDisconnect(con) 43 | } 44 | 45 | main() 46 | -------------------------------------------------------------------------------- /data/create_penguins_no_species.R: -------------------------------------------------------------------------------- 1 | 2 | #remotes::install_github("allisonhorst/palmerpenguins") 3 | library(tidyverse) 4 | library(palmerpenguins) 5 | library(tidymodels) 6 | data(package = 'palmerpenguins') 7 | set.seed(12345) 8 | 9 | penguins <- na.omit(penguins) 10 | 11 | penguins_no_species <- penguins %>% 12 | select(-species) 13 | 14 | split <- initial_split(penguins, prop = 0.05, strata = species) 15 | 16 | toy_penguins <- training(split) %>% 17 | mutate(cluster=as_factor(as.numeric(species))) %>% 18 | select(-species) 19 | 20 | ggplot(toy_penguins, aes(y = bill_length_mm, x = flipper_length_mm, colour = cluster)) + 21 | geom_point() + 22 | xlab("Flipper Length (mm)") + 23 | ylab("Bill Length (mm)") 24 | 25 | write_csv(toy_penguins, "data/toy_penguins.csv") 26 | write_csv(penguins_no_species, "data/penguins.csv") 27 | -------------------------------------------------------------------------------- /data/create_state_property_vote_variants.R: -------------------------------------------------------------------------------- 1 | ## Script to create variants of a plain vanilla .csv file 2 | library(tidyverse) 3 | library(openxlsx) 4 | library(RSQLite) 5 | 6 | main <- function(){ 7 | path <- "state_property_vote.csv" 8 | data <- read_csv(path) 9 | path_no_prefix <- sub(".{4}$", "", path) 10 | 11 | # file with some meta data at the top 12 | meta_data <- data.frame(metadata = c("Data source: https://datausa.io/", 13 | "Record of how data was collected: https://github.com/UBC-DSCI/introduction-to-datascience/blob/master/data/retrieve_data.ipynb", 14 | "Date collected: 2020-07-08")) 15 | write.table(meta_data, 16 | file = paste0(path_no_prefix, "_meta-data.csv"), 17 | sep = ",", 18 | col.names = FALSE, 19 | row.names = FALSE, 20 | quote = FALSE) 21 | write.table(data, 22 | file = paste0(path_no_prefix, "_meta-data.csv"), 23 | sep = ",", 24 | row.names = FALSE, 25 | quote = FALSE, 26 | append = TRUE) 27 | 28 | # file with no column names and tab delimiters 29 | write_delim(data, 30 | path = paste0(path_no_prefix, ".tsv"), 31 | delim = "\t", 32 | col_names = FALSE) 33 | 34 | # excel file 35 | write.xlsx(data, file = paste0(path_no_prefix, ".xlsx")) 36 | 37 | # write to sqlite 38 | con <- dbConnect(RSQLite::SQLite(), paste0(path_no_prefix, ".db")) 39 | dbWriteTable(con, "state", data, overwrite = TRUE) 40 | dbDisconnect(con) 41 | } 42 | 43 | main() 44 | -------------------------------------------------------------------------------- /data/create_untidy_canlang.R: -------------------------------------------------------------------------------- 1 | ## creating untidy canlang data 2 | library(canlang) 3 | library(tidyverse) 4 | 5 | # subsetting 5 cities 6 | top5_cities <- region_lang %>% 7 | filter(region %in% c("Toronto", "Montréal", "Vancouver", "Calgary", "Edmonton")) 8 | 9 | write.csv(top5_cities, "region_lang_top5_cities.csv", row.names = F) 10 | 11 | # making a wide data set 12 | top5_cities_wide <- top5_cities %>% 13 | select(region, category, language, mother_tongue) %>% 14 | pivot_wider(names_from = region, values_from = c(mother_tongue)) %>% 15 | select(category, language, Toronto, Montréal, Vancouver, Calgary, Edmonton) 16 | 17 | write.csv(top5_cities_wide, "region_lang_top5_cities_wide.csv", row.names = F) 18 | 19 | ## making a messy data set with "/" delimiters 20 | top5_cities_messy <- top5_cities %>% 21 | unite(col = "value", most_at_home, most_at_work, sep = "/") %>% 22 | select(region, category, language, value) %>% 23 | pivot_wider(names_from = region, values_from = value) %>% 24 | select(category, language, Toronto, Montréal, Vancouver, Calgary, Edmonton) 25 | 26 | write.csv(top5_cities_messy, "region_lang_top5_cities_messy.csv", row.names = F) 27 | 28 | 29 | # making a narrow data set 30 | top5_cities_narrow <- top5_cities %>% 31 | select(region, category, language, most_at_home, most_at_work) %>% 32 | pivot_longer(!c(region, category, language), names_to = "type", values_to = "count") 33 | write.csv(top5_cities_narrow, "region_lang_top5_cities_long.csv", row.names = F) 34 | -------------------------------------------------------------------------------- /data/faithful.csv: -------------------------------------------------------------------------------- 1 | eruptions,waiting 2 | 3.6,79.0 3 | 1.8,54.0 4 | 3.333,74.0 5 | 2.283,62.0 6 | 4.533,85.0 7 | 2.883,55.0 8 | 4.7,88.0 9 | 3.6,85.0 10 | 1.95,51.0 11 | 4.35,85.0 12 | 1.833,54.0 13 | 3.917,84.0 14 | 4.2,78.0 15 | 1.75,47.0 16 | 4.7,83.0 17 | 2.167,52.0 18 | 1.75,62.0 19 | 4.8,84.0 20 | 1.6,52.0 21 | 4.25,79.0 22 | 1.8,51.0 23 | 1.75,47.0 24 | 3.45,78.0 25 | 3.067,69.0 26 | 4.533,74.0 27 | 3.6,83.0 28 | 1.967,55.0 29 | 4.083,76.0 30 | 3.85,78.0 31 | 4.433,79.0 32 | 4.3,73.0 33 | 4.467,77.0 34 | 3.367,66.0 35 | 4.033,80.0 36 | 3.833,74.0 37 | 2.017,52.0 38 | 1.867,48.0 39 | 4.833,80.0 40 | 1.833,59.0 41 | 4.783,90.0 42 | 4.35,80.0 43 | 1.883,58.0 44 | 4.567,84.0 45 | 1.75,58.0 46 | 4.533,73.0 47 | 3.317,83.0 48 | 3.833,64.0 49 | 2.1,53.0 50 | 4.633,82.0 51 | 2.0,59.0 52 | 4.8,75.0 53 | 4.716,90.0 54 | 1.833,54.0 55 | 4.833,80.0 56 | 1.733,54.0 57 | 4.883,83.0 58 | 3.717,71.0 59 | 1.667,64.0 60 | 4.567,77.0 61 | 4.317,81.0 62 | 2.233,59.0 63 | 4.5,84.0 64 | 1.75,48.0 65 | 4.8,82.0 66 | 1.817,60.0 67 | 4.4,92.0 68 | 4.167,78.0 69 | 4.7,78.0 70 | 2.067,65.0 71 | 4.7,73.0 72 | 4.033,82.0 73 | 1.967,56.0 74 | 4.5,79.0 75 | 4.0,71.0 76 | 1.983,62.0 77 | 5.067,76.0 78 | 2.017,60.0 79 | 4.567,78.0 80 | 3.883,76.0 81 | 3.6,83.0 82 | 4.133,75.0 83 | 4.333,82.0 84 | 4.1,70.0 85 | 2.633,65.0 86 | 4.067,73.0 87 | 4.933,88.0 88 | 3.95,76.0 89 | 4.517,80.0 90 | 2.167,48.0 91 | 4.0,86.0 92 | 2.2,60.0 93 | 4.333,90.0 94 | 1.867,50.0 95 | 4.817,78.0 96 | 1.833,63.0 97 | 4.3,72.0 98 | 4.667,84.0 99 | 3.75,75.0 100 | 1.867,51.0 101 | 4.9,82.0 102 | 2.483,62.0 103 | 4.367,88.0 104 | 2.1,49.0 105 | 4.5,83.0 106 | 4.05,81.0 107 | 1.867,47.0 108 | 4.7,84.0 109 | 1.783,52.0 110 | 4.85,86.0 111 | 3.683,81.0 112 | 4.733,75.0 113 | 2.3,59.0 114 | 4.9,89.0 115 | 4.417,79.0 116 | 1.7,59.0 117 | 4.633,81.0 118 | 2.317,50.0 119 | 4.6,85.0 120 | 1.817,59.0 121 | 4.417,87.0 122 | 2.617,53.0 123 | 4.067,69.0 124 | 4.25,77.0 125 | 1.967,56.0 126 | 4.6,88.0 127 | 3.767,81.0 128 | 1.917,45.0 129 | 4.5,82.0 130 | 2.267,55.0 131 | 4.65,90.0 132 | 1.867,45.0 133 | 4.167,83.0 134 | 2.8,56.0 135 | 4.333,89.0 136 | 1.833,46.0 137 | 4.383,82.0 138 | 1.883,51.0 139 | 4.933,86.0 140 | 2.033,53.0 141 | 3.733,79.0 142 | 4.233,81.0 143 | 2.233,60.0 144 | 4.533,82.0 145 | 4.817,77.0 146 | 4.333,76.0 147 | 1.983,59.0 148 | 4.633,80.0 149 | 2.017,49.0 150 | 5.1,96.0 151 | 1.8,53.0 152 | 5.033,77.0 153 | 4.0,77.0 154 | 2.4,65.0 155 | 4.6,81.0 156 | 3.567,71.0 157 | 4.0,70.0 158 | 4.5,81.0 159 | 4.083,93.0 160 | 1.8,53.0 161 | 3.967,89.0 162 | 2.2,45.0 163 | 4.15,86.0 164 | 2.0,58.0 165 | 3.833,78.0 166 | 3.5,66.0 167 | 4.583,76.0 168 | 2.367,63.0 169 | 5.0,88.0 170 | 1.933,52.0 171 | 4.617,93.0 172 | 1.917,49.0 173 | 2.083,57.0 174 | 4.583,77.0 175 | 3.333,68.0 176 | 4.167,81.0 177 | 4.333,81.0 178 | 4.5,73.0 179 | 2.417,50.0 180 | 4.0,85.0 181 | 4.167,74.0 182 | 1.883,55.0 183 | 4.583,77.0 184 | 4.25,83.0 185 | 3.767,83.0 186 | 2.033,51.0 187 | 4.433,78.0 188 | 4.083,84.0 189 | 1.833,46.0 190 | 4.417,83.0 191 | 2.183,55.0 192 | 4.8,81.0 193 | 1.833,57.0 194 | 4.8,76.0 195 | 4.1,84.0 196 | 3.966,77.0 197 | 4.233,81.0 198 | 3.5,87.0 199 | 4.366,77.0 200 | 2.25,51.0 201 | 4.667,78.0 202 | 2.1,60.0 203 | 4.35,82.0 204 | 4.133,91.0 205 | 1.867,53.0 206 | 4.6,78.0 207 | 1.783,46.0 208 | 4.367,77.0 209 | 3.85,84.0 210 | 1.933,49.0 211 | 4.5,83.0 212 | 2.383,71.0 213 | 4.7,80.0 214 | 1.867,49.0 215 | 3.833,75.0 216 | 3.417,64.0 217 | 4.233,76.0 218 | 2.4,53.0 219 | 4.8,94.0 220 | 2.0,55.0 221 | 4.15,76.0 222 | 1.867,50.0 223 | 4.267,82.0 224 | 1.75,54.0 225 | 4.483,75.0 226 | 4.0,78.0 227 | 4.117,79.0 228 | 4.083,78.0 229 | 4.267,78.0 230 | 3.917,70.0 231 | 4.55,79.0 232 | 4.083,70.0 233 | 2.417,54.0 234 | 4.183,86.0 235 | 2.217,50.0 236 | 4.45,90.0 237 | 1.883,54.0 238 | 1.85,54.0 239 | 4.283,77.0 240 | 3.95,79.0 241 | 2.333,64.0 242 | 4.15,75.0 243 | 2.35,47.0 244 | 4.933,86.0 245 | 2.9,63.0 246 | 4.583,85.0 247 | 3.833,82.0 248 | 2.083,57.0 249 | 4.367,82.0 250 | 2.133,67.0 251 | 4.35,74.0 252 | 2.2,54.0 253 | 4.45,83.0 254 | 3.567,73.0 255 | 4.5,73.0 256 | 4.15,88.0 257 | 3.817,80.0 258 | 3.917,71.0 259 | 4.45,83.0 260 | 2.0,56.0 261 | 4.283,79.0 262 | 4.767,78.0 263 | 4.533,84.0 264 | 1.85,58.0 265 | 4.25,83.0 266 | 1.983,43.0 267 | 2.25,60.0 268 | 4.75,75.0 269 | 4.117,81.0 270 | 2.15,46.0 271 | 4.417,90.0 272 | 1.817,46.0 273 | 4.467,74.0 274 | -------------------------------------------------------------------------------- /data/historical_vote.csv: -------------------------------------------------------------------------------- 1 | election_num,election_year,winner,winner_party,elec_coll_votes_count,elec_coll_votes_perc,pop_votes_perc,pop_votes_perc_marg,pop_votes_count,pop_votes_count_marg,runner-up,runner-up_party,turnout 2 | 10,1824,John Quincy Adams,D.-R.,84/261,32.18%,30.92%,10.44%,"113,142","38,221",Andrew Jackson,D.-R.,26.90% 3 | 23,1876,Rutherford Hayes,Rep.,185/369,50.14%,47.92%,3.00%,"4,034,142","252,666",Samuel Tilden,Dem.,82.60% 4 | 58,2016,Donald Trump,Rep.,304/538,56.50%,45.98%,2.10%,"62,979,636","2,864,974",Hillary Rodham Clinton,Dem.,60.20% 5 | 26,1888,Benjamin Harrison,Rep.,233/401,58.10%,47.80%,0.83%,"5,443,633","94,530",Grover Cleveland,Dem.,80.50% 6 | 54,2000,George W. Bush,Rep.,271/538,50.37%,47.87%,0.51%,"50,460,110","543,816",Al Gore,Dem.,54.20% 7 | 24,1880,James Garfield,Rep.,214/369,57.99%,48.31%,0.09%,"4,453,337","1,898",Winfield Scott Hancock,Dem.,80.50% 8 | 44,1960,John Kennedy,Dem.,303/537,56.42%,49.72%,0.17%,"34,220,984","112,827",Richard Nixon,Rep.,63.80% 9 | 25,1884,Grover Cleveland,Dem.,219/401,54.61%,48.85%,0.57%,"4,914,482","57,579",James Blaine,Rep.,78.20% 10 | 46,1968,Richard Nixon,Rep.,301/538,55.95%,43.42%,0.70%,"31,783,783","511,944",Hubert Humphrey,Dem.,62.50% 11 | 15,1844,James Polk,Dem.,170/275,61.82%,49.54%,1.45%,"1,339,570","39,413",Henry Clay,Whig,79.20% 12 | 48,1976,Jimmy Carter,Dem.,297/538,55.20%,50.08%,2.06%,"40,831,881","1,683,247",Gerald Ford,Rep.,54.80% 13 | 55,2004,George W. Bush,Rep.,286/538,53.16%,50.73%,2.46%,"62,040,610","3,012,171",John Kerry,Dem.,60.10% 14 | 27,1892,Grover Cleveland,Dem.,277/444,62.39%,46.02%,3.01%,"5,553,898","363,099",Benjamin Harrison,Rep.,75.80% 15 | 33,1916,Woodrow Wilson,Dem.,277/531,52.17%,49.24%,3.12%,"9,126,868","578,140",Charles Evans Hughes,Rep.,61.80% 16 | 57,2012,Barack Obama,Dem.,332/538,61.71%,51.06%,3.86%,"65,915,795","4,982,291",Mitt Romney,Rep.,58.60% 17 | 28,1896,William McKinley,Rep.,271/447,60.63%,51.02%,4.31%,"7,112,138","601,331",William Jennings Bryan,Dem.,79.60% 18 | 41,1948,Harry Truman,Dem.,303/531,57.06%,49.55%,4.48%,"24,179,347","2,188,055",Thomas Dewey,Rep.,52.20% 19 | 16,1848,Zachary Taylor,Whig,163/290,56.21%,47.28%,4.79%,"1,360,235","137,882",Lewis Cass,Dem.,72.80% 20 | 21,1868,Ulysses Grant,Rep.,214/294,72.79%,52.66%,5.32%,"3,013,790","304,810",Horatio Seymour,Dem.,80.90% 21 | 52,1992,Bill Clinton,Dem.,370/538,68.77%,43.01%,5.56%,"44,909,806","5,805,256",George H. W. Bush,Rep.,58.10% 22 | 14,1840,William Henry Harrison,Whig,234/294,79.59%,52.87%,6.05%,"1,275,583","145,938",Martin Van Buren,Dem.,80.30% 23 | 29,1900,William McKinley,Rep.,292/447,65.23%,51.64%,6.12%,"7,228,864","857,932",William Jennings Bryan,Dem.,73.70% 24 | 17,1852,Franklin Pierce,Dem.,254/296,85.81%,50.83%,6.95%,"1,605,943","219,525",Winfield Scott,Whig,69.50% 25 | 56,2008,Barack Obama,Dem.,365/538,67.84%,52.93%,7.27%,"69,498,516","9,550,193",John McCain,Rep.,61.60% 26 | 40,1944,Franklin Roosevelt,Dem.,432/531,81.36%,53.39%,7.50%,"25,612,916","3,594,987",Thomas Dewey,Rep.,55.90% 27 | 51,1988,George H. W. Bush,Rep.,426/538,79.18%,53.37%,7.72%,"48,886,597","7,077,121",Michael Dukakis,Dem.,52.80% 28 | 53,1996,Bill Clinton,Dem.,379/538,70.45%,49.23%,8.51%,"47,400,125","8,201,370",Bob Dole,Rep.,51.70% 29 | 31,1908,William Taft,Rep.,321/483,66.46%,51.57%,8.53%,"7,678,335","1,269,356",William Jennings Bryan,Dem.,65.70% 30 | 49,1980,Ronald Reagan,Rep.,489/538,90.89%,50.75%,9.74%,"43,903,230","8,423,115",Jimmy Carter,Dem.,54.20% 31 | 39,1940,Franklin Roosevelt,Dem.,449/531,84.56%,54.74%,9.96%,"27,313,945","4,966,201",Wendell Willkie,Rep.,62.40% 32 | 20,1864,Abraham Lincoln,Rep.,212/233,90.99%,55.03%,10.08%,"2,211,317","405,090",George McClellan,Dem.,76.30% 33 | 19,1860,Abraham Lincoln,Rep.,180/303,59.41%,39.65%,10.13%,"1,855,993","474,049",John Breckinridge,Dem.,81.80% 34 | 42,1952,Dwight Eisenhower,Rep.,442/531,83.24%,55.18%,10.85%,"34,075,529","6,700,439",Adlai Stevenson,Dem.,62.30% 35 | 22,1872,Ulysses Grant,Rep.,286/352,81.25%,55.58%,11.80%,"3,597,439","763,729",Horace Greeley,L. R.,72.10% 36 | 18,1856,James Buchanan,Dem.,174/296,58.78%,45.29%,12.20%,"1,835,140","494,472",John Fr̩mont,Rep.,79.40% 37 | 11,1828,Andrew Jackson,Dem.,178/261,68.20%,55.93%,12.25%,"642,806","140,839",John Quincy Adams,N. R.,57.30% 38 | 13,1836,Martin Van Buren,Dem.,170/294,57.82%,50.79%,14.20%,"763,291","213,384",William Henry Harrison,Whig,56.50% 39 | 32,1912,Woodrow Wilson,Dem.,435/531,81.92%,41.84%,14.44%,"6,296,284","2,173,563",Theodore Roosevelt,Prog.,59.00% 40 | 43,1956,Dwight Eisenhower,Rep.,457/531,86.06%,57.37%,15.40%,"35,579,180","9,551,152",Adlai Stevenson,Dem.,60.20% 41 | 36,1928,Herbert Hoover,Rep.,444/531,83.62%,58.21%,17.41%,"21,427,123","6,411,659",Al Smith,Dem.,56.90% 42 | 37,1932,Franklin Roosevelt,Dem.,472/531,88.89%,57.41%,17.76%,"22,821,277","7,060,023",Herbert Hoover,Rep.,56.90% 43 | 12,1832,Andrew Jackson,Dem.,219/286,76.57%,54.74%,17.81%,"702,735","228,628",Henry Clay,N. R.,57.00% 44 | 50,1984,Ronald Reagan,Rep.,525/538,97.58%,58.77%,18.21%,"54,455,472","16,878,120",Walter Mondale,Dem.,55.20% 45 | 30,1904,Theodore Roosevelt,Rep.,336/476,70.59%,56.42%,18.83%,"7,630,557","2,546,677",Alton Brooks Parker,Dem.,65.50% 46 | 45,1964,Lyndon Johnson,Dem.,486/538,90.33%,61.05%,22.58%,"43,127,041","15,951,287",Barry Goldwater,Rep.,62.80% 47 | 47,1972,Richard Nixon,Rep.,520/538,96.65%,60.67%,23.15%,"47,168,710","17,995,488",George McGovern,Dem.,56.20% 48 | 38,1936,Franklin Roosevelt,Dem.,523/531,98.49%,60.80%,24.26%,"27,752,648","11,070,786",Alf Landon,Rep.,61.00% 49 | 35,1924,Calvin Coolidge,Rep.,382/531,71.94%,54.04%,25.22%,"15,723,789","7,337,547",John Davis,Dem.,48.90% 50 | 34,1920,Warren Harding,Rep.,404/531,76.08%,60.32%,26.17%,"16,144,093","7,004,432",James Cox,Dem.,49.20% -------------------------------------------------------------------------------- /data/historical_vote_messy.csv: -------------------------------------------------------------------------------- 1 | election_year,winner,runnerup 2 | 2016,Donald Trump/Rep,Hillary Clinton/Dem 3 | 2012,Barack Obama/Dem,Mitt Romney/Rep 4 | 2008,Barack Obama/Dem,John McCain/Rep 5 | 2004,George W Bush/Rep,John Kerry/Dem 6 | 2000,George W Bush/Rep,Al Gore/Dem 7 | 1996,Bill Clinton/Dem,Bob Dole/Rep 8 | 1992,Bill Clinton/Dem,George HW Bush/Rep 9 | 1988,George HW Bush/Rep,Michael Dukakis/Dem 10 | 1984,Ronald Reagan/Rep,Walter Mondale/Dem 11 | 1980,Ronald Reagan/Rep,Jimmy Carter/Dem -------------------------------------------------------------------------------- /data/historical_vote_no_header.tsv: -------------------------------------------------------------------------------- 1 | 10 1824 John Quincy Adams D.-R. 84/261 32.18% 30.92% −10.44% 113,142 −38,221 Andrew Jackson D.-R. 26.9% 2 | 23 1876 Rutherford Hayes Rep. 185/369 50.14% 47.92% −3.00% 4,034,142 −252,666 Samuel Tilden Dem. 82.6% 3 | 58 2016 Donald Trump Rep. 304/538 56.50% 45.98% −2.10% 62,979,636 −2,864,974 Hillary Rodham Clinton Dem. 60.2% 4 | 26 1888 Benjamin Harrison Rep. 233/401 58.10% 47.80% −0.83% 5,443,633 −94,530 Grover Cleveland Dem. 80.5% 5 | 54 2000 George W. Bush Rep. 271/538 50.37% 47.87% −0.51% 50,460,110 −543,816 Al Gore Dem. 54.2% 6 | 24 1880 James Garfield Rep. 214/369 57.99% 48.31% 0.09% 4,453,337 1,898 Winfield Scott Hancock Dem. 80.5% 7 | 44 1960 John Kennedy Dem. 303/537 56.42% 49.72% 0.17% 34,220,984 112,827 Richard Nixon Rep. 63.8% 8 | 25 1884 Grover Cleveland Dem. 219/401 54.61% 48.85% 0.57% 4,914,482 57,579 James Blaine Rep. 78.2% 9 | 46 1968 Richard Nixon Rep. 301/538 55.95% 43.42% 0.70% 31,783,783 511,944 Hubert Humphrey Dem. 62.5% 10 | 15 1844 James Polk Dem. 170/275 61.82% 49.54% 1.45% 1,339,570 39,413 Henry Clay Whig 79.2% 11 | 48 1976 Jimmy Carter Dem. 297/538 55.20% 50.08% 2.06% 40,831,881 1,683,247 Gerald Ford Rep. 54.8% 12 | 55 2004 George W. Bush Rep. 286/538 53.16% 50.73% 2.46% 62,040,610 3,012,171 John Kerry Dem. 60.1% 13 | 27 1892 Grover Cleveland Dem. 277/444 62.39% 46.02% 3.01% 5,553,898 363,099 Benjamin Harrison Rep. 75.8% 14 | 33 1916 Woodrow Wilson Dem. 277/531 52.17% 49.24% 3.12% 9,126,868 578,140 Charles Evans Hughes Rep. 61.8% 15 | 57 2012 Barack Obama Dem. 332/538 61.71% 51.06% 3.86% 65,915,795 4,982,291 Mitt Romney Rep. 58.6% 16 | 28 1896 William McKinley Rep. 271/447 60.63% 51.02% 4.31% 7,112,138 601,331 William Jennings Bryan Dem. 79.6% 17 | 41 1948 Harry Truman Dem. 303/531 57.06% 49.55% 4.48% 24,179,347 2,188,055 Thomas Dewey Rep. 52.2% 18 | 16 1848 Zachary Taylor Whig 163/290 56.21% 47.28% 4.79% 1,360,235 137,882 Lewis Cass Dem. 72.8% 19 | 21 1868 Ulysses Grant Rep. 214/294 72.79% 52.66% 5.32% 3,013,790 304,810 Horatio Seymour Dem. 80.9% 20 | 52 1992 Bill Clinton Dem. 370/538 68.77% 43.01% 5.56% 44,909,806 5,805,256 George H. W. Bush Rep. 58.1% 21 | 14 1840 William Henry Harrison Whig 234/294 79.59% 52.87% 6.05% 1,275,583 145,938 Martin Van Buren Dem. 80.3% 22 | 29 1900 William McKinley Rep. 292/447 65.23% 51.64% 6.12% 7,228,864 857,932 William Jennings Bryan Dem. 73.7% 23 | 17 1852 Franklin Pierce Dem. 254/296 85.81% 50.83% 6.95% 1,605,943 219,525 Winfield Scott Whig 69.5% 24 | 56 2008 Barack Obama Dem. 365/538 67.84% 52.93% 7.27% 69,498,516 9,550,193 John McCain Rep. 61.6% 25 | 40 1944 Franklin Roosevelt Dem. 432/531 81.36% 53.39% 7.50% 25,612,916 3,594,987 Thomas Dewey Rep. 55.9% 26 | 51 1988 George H. W. Bush Rep. 426/538 79.18% 53.37% 7.72% 48,886,597 7,077,121 Michael Dukakis Dem. 52.8% 27 | 53 1996 Bill Clinton Dem. 379/538 70.45% 49.23% 8.51% 47,400,125 8,201,370 Bob Dole Rep. 51.7% 28 | 31 1908 William Taft Rep. 321/483 66.46% 51.57% 8.53% 7,678,335 1,269,356 William Jennings Bryan Dem. 65.7% 29 | 49 1980 Ronald Reagan Rep. 489/538 90.89% 50.75% 9.74% 43,903,230 8,423,115 Jimmy Carter Dem. 54.2% 30 | 39 1940 Franklin Roosevelt Dem. 449/531 84.56% 54.74% 9.96% 27,313,945 4,966,201 Wendell Willkie Rep. 62.4% 31 | 20 1864 Abraham Lincoln Rep. 212/233 90.99% 55.03% 10.08% 2,211,317 405,090 George McClellan Dem. 76.3% 32 | 19 1860 Abraham Lincoln Rep. 180/303 59.41% 39.65% 10.13% 1,855,993 474,049 John Breckinridge Dem. 81.8% 33 | 42 1952 Dwight Eisenhower Rep. 442/531 83.24% 55.18% 10.85% 34,075,529 6,700,439 Adlai Stevenson Dem. 62.3% 34 | 22 1872 Ulysses Grant Rep. 286/352 81.25% 55.58% 11.80% 3,597,439 763,729 Horace Greeley L. R. 72.1% 35 | 18 1856 James Buchanan Dem. 174/296 58.78% 45.29% 12.20% 1,835,140 494,472 John Frémont Rep. 79.4% 36 | 11 1828 Andrew Jackson Dem. 178/261 68.20% 55.93% 12.25% 642,806 140,839 John Quincy Adams N. R. 57.3% 37 | 13 1836 Martin Van Buren Dem. 170/294 57.82% 50.79% 14.20% 763,291 213,384 William Henry Harrison Whig 56.5% 38 | 32 1912 Woodrow Wilson Dem. 435/531 81.92% 41.84% 14.44% 6,296,284 2,173,563 Theodore Roosevelt Prog. 59.0% 39 | 43 1956 Dwight Eisenhower Rep. 457/531 86.06% 57.37% 15.40% 35,579,180 9,551,152 Adlai Stevenson Dem. 60.2% 40 | 36 1928 Herbert Hoover Rep. 444/531 83.62% 58.21% 17.41% 21,427,123 6,411,659 Al Smith Dem. 56.9% 41 | 37 1932 Franklin Roosevelt Dem. 472/531 88.89% 57.41% 17.76% 22,821,277 7,060,023 Herbert Hoover Rep. 56.9% 42 | 12 1832 Andrew Jackson Dem. 219/286 76.57% 54.74% 17.81% 702,735 228,628 Henry Clay N. R. 57.0% 43 | 50 1984 Ronald Reagan Rep. 525/538 97.58% 58.77% 18.21% 54,455,472 16,878,120 Walter Mondale Dem. 55.2% 44 | 30 1904 Theodore Roosevelt Rep. 336/476 70.59% 56.42% 18.83% 7,630,557 2,546,677 Alton Brooks Parker Dem. 65.5% 45 | 45 1964 Lyndon Johnson Dem. 486/538 90.33% 61.05% 22.58% 43,127,041 15,951,287 Barry Goldwater Rep. 62.8% 46 | 47 1972 Richard Nixon Rep. 520/538 96.65% 60.67% 23.15% 47,168,710 17,995,488 George McGovern Dem. 56.2% 47 | 38 1936 Franklin Roosevelt Dem. 523/531 98.49% 60.80% 24.26% 27,752,648 11,070,786 Alf Landon Rep. 61.0% 48 | 35 1924 Calvin Coolidge Rep. 382/531 71.94% 54.04% 25.22% 15,723,789 7,337,547 John Davis Dem. 48.9% 49 | 34 1920 Warren Harding Rep. 404/531 76.08% 60.32% 26.17% 16,144,093 7,004,432 James Cox Dem. 49.2% 50 | -------------------------------------------------------------------------------- /data/historical_vote_wide.csv: -------------------------------------------------------------------------------- 1 | election_year,winner,runnerup 2 | 2016,Donald Trump,Hillary Clinton 3 | 2012,Barack Obama,Mitt Romney 4 | 2008,Barack Obama,John McCain 5 | 2004,George Bush,John Kerry 6 | 2000,George Bush,Al Gore 7 | 1996,Bill Clinton,Bob Dole 8 | 1992,Bill Clinton,George Bush 9 | 1988,George Bush,Michael Dukakis 10 | 1984,Ronald Reagan,Walter Mondale 11 | 1980,Ronald Reagan,Jimmy Carter -------------------------------------------------------------------------------- /data/islands.csv: -------------------------------------------------------------------------------- 1 | landmass,size,landmass_type 2 | Africa,11506,Continent 3 | Antarctica,5500,Continent 4 | Asia,16988,Continent 5 | Australia,2968,Continent 6 | Axel Heiberg,16,Other 7 | Baffin,184,Other 8 | Banks,23,Other 9 | Borneo,280,Other 10 | Britain,84,Other 11 | Celebes,73,Other 12 | Celon,25,Other 13 | Cuba,43,Other 14 | Devon,21,Other 15 | Ellesmere,82,Other 16 | Europe,3745,Continent 17 | Greenland,840,Other 18 | Hainan,13,Other 19 | Hispaniola,30,Other 20 | Hokkaido,30,Other 21 | Honshu,89,Other 22 | Iceland,40,Other 23 | Ireland,33,Other 24 | Java,49,Other 25 | Kyushu,14,Other 26 | Luzon,42,Other 27 | Madagascar,227,Other 28 | Melville,16,Other 29 | Mindanao,36,Other 30 | Moluccas,29,Other 31 | New Britain,15,Other 32 | New Guinea,306,Other 33 | New Zealand (N),44,Other 34 | New Zealand (S),58,Other 35 | Newfoundland,43,Other 36 | North America,9390,Continent 37 | Novaya Zemlya,32,Other 38 | Prince of Wales,13,Other 39 | Sakhalin,29,Other 40 | South America,6795,Continent 41 | Southampton,16,Other 42 | Spitsbergen,15,Other 43 | Sumatra,183,Other 44 | Taiwan,14,Other 45 | Tasmania,26,Other 46 | Tierra del Fuego,19,Other 47 | Timor,13,Other 48 | Vancouver,12,Other 49 | Victoria,82,Other 50 | -------------------------------------------------------------------------------- /data/marketing.csv: -------------------------------------------------------------------------------- 1 | loyalty,csat,cluster 2 | 7,1,1 3 | 7.5,1,1 4 | 8,2,1 5 | 7,2,1 6 | 8,3,1 7 | 1.5,1.75,3 8 | 1,3,3 9 | 0.5,4,3 10 | 2,4,3 11 | 7,6,2 12 | 6,6,2 13 | 7,7,2 14 | 6,7,2 15 | 5,7,2 16 | 9.5,8,2 17 | 7,8,2 18 | 8.3,9,2 19 | 4,8,2 20 | 2,3,3 21 | -------------------------------------------------------------------------------- /data/mauna_loa_data.csv: -------------------------------------------------------------------------------- 1 | date_measured,ppm 2 | 1980-02-01,338.34 3 | 1980-03-01,340.01 4 | 1980-04-01,340.93 5 | 1980-05-01,341.48 6 | 1980-06-01,341.33 7 | 1980-07-01,339.4 8 | 1980-08-01,337.7 9 | 1980-09-01,336.19 10 | 1980-10-01,336.15 11 | 1980-11-01,337.27 12 | 1980-12-01,338.32 13 | 1981-01-01,339.29 14 | 1981-02-01,340.55 15 | 1981-03-01,341.61 16 | 1981-04-01,342.53 17 | 1981-05-01,343.03 18 | 1981-06-01,342.54 19 | 1981-07-01,340.78 20 | 1981-08-01,338.44 21 | 1981-09-01,336.95 22 | 1981-10-01,337.08 23 | 1981-11-01,338.58 24 | 1981-12-01,339.88 25 | 1982-01-01,340.96 26 | 1982-02-01,341.73 27 | 1982-03-01,342.81 28 | 1982-04-01,343.97 29 | 1982-05-01,344.63 30 | 1982-06-01,343.79 31 | 1982-07-01,342.32 32 | 1982-08-01,340.09 33 | 1982-09-01,338.28 34 | 1982-10-01,338.29 35 | 1982-11-01,339.6 36 | 1982-12-01,340.9 37 | 1983-01-01,341.68 38 | 1983-02-01,342.9 39 | 1983-03-01,343.33 40 | 1983-04-01,345.25 41 | 1983-05-01,346.03 42 | 1983-06-01,345.63 43 | 1983-07-01,344.19 44 | 1983-08-01,342.27 45 | 1983-09-01,340.35 46 | 1983-10-01,340.38 47 | 1983-11-01,341.59 48 | 1983-12-01,343.05 49 | 1984-01-01,344.1 50 | 1984-02-01,344.79 51 | 1984-03-01,345.52 52 | 1984-05-01,347.63 53 | 1984-06-01,346.98 54 | 1984-07-01,345.53 55 | 1984-08-01,343.55 56 | 1984-09-01,341.4 57 | 1984-10-01,341.67 58 | 1984-11-01,343.1 59 | 1984-12-01,344.7 60 | 1985-01-01,345.21 61 | 1985-02-01,346.16 62 | 1985-03-01,347.74 63 | 1985-04-01,348.34 64 | 1985-05-01,349.06 65 | 1985-06-01,348.38 66 | 1985-07-01,346.71 67 | 1985-08-01,345.02 68 | 1985-09-01,343.27 69 | 1985-10-01,343.13 70 | 1985-11-01,344.49 71 | 1985-12-01,345.88 72 | 1986-01-01,346.56 73 | 1986-02-01,347.28 74 | 1986-03-01,348.01 75 | 1986-04-01,349.77 76 | 1986-05-01,350.38 77 | 1986-06-01,349.93 78 | 1986-07-01,348.16 79 | 1986-08-01,346.08 80 | 1986-09-01,345.22 81 | 1986-10-01,344.51 82 | 1986-11-01,345.93 83 | 1986-12-01,347.22 84 | 1987-01-01,348.52 85 | 1987-02-01,348.73 86 | 1987-03-01,349.73 87 | 1987-04-01,351.31 88 | 1987-05-01,352.09 89 | 1987-06-01,351.53 90 | 1987-07-01,350.11 91 | 1987-08-01,348.08 92 | 1987-09-01,346.52 93 | 1987-10-01,346.59 94 | 1987-11-01,347.96 95 | 1987-12-01,349.16 96 | 1988-01-01,350.39 97 | 1988-02-01,351.64 98 | 1988-03-01,352.4 99 | 1988-04-01,353.69 100 | 1988-05-01,354.21 101 | 1988-06-01,353.72 102 | 1988-07-01,352.69 103 | 1988-08-01,350.4 104 | 1988-09-01,348.92 105 | 1988-10-01,349.13 106 | 1988-11-01,350.2 107 | 1988-12-01,351.41 108 | 1989-01-01,352.91 109 | 1989-02-01,353.27 110 | 1989-03-01,353.96 111 | 1989-04-01,355.64 112 | 1989-05-01,355.86 113 | 1989-06-01,355.37 114 | 1989-07-01,353.99 115 | 1989-08-01,351.81 116 | 1989-09-01,350.05 117 | 1989-10-01,350.25 118 | 1989-11-01,351.49 119 | 1989-12-01,352.85 120 | 1990-01-01,353.8 121 | 1990-02-01,355.04 122 | 1990-03-01,355.73 123 | 1990-04-01,356.32 124 | 1990-05-01,357.32 125 | 1990-06-01,356.34 126 | 1990-07-01,354.84 127 | 1990-08-01,353.01 128 | 1990-09-01,351.31 129 | 1990-10-01,351.62 130 | 1990-11-01,353.07 131 | 1990-12-01,354.33 132 | 1991-01-01,354.84 133 | 1991-02-01,355.73 134 | 1991-03-01,357.23 135 | 1991-04-01,358.66 136 | 1991-05-01,359.13 137 | 1991-06-01,358.13 138 | 1991-07-01,356.19 139 | 1991-08-01,353.85 140 | 1991-09-01,352.25 141 | 1991-10-01,352.35 142 | 1991-11-01,353.81 143 | 1991-12-01,355.12 144 | 1992-01-01,356.25 145 | 1992-02-01,357.11 146 | 1992-03-01,357.86 147 | 1992-04-01,359.09 148 | 1992-05-01,359.59 149 | 1992-06-01,359.33 150 | 1992-07-01,357.01 151 | 1992-08-01,354.94 152 | 1992-09-01,352.95 153 | 1992-10-01,353.32 154 | 1992-11-01,354.32 155 | 1992-12-01,355.57 156 | 1993-01-01,357 157 | 1993-02-01,357.31 158 | 1993-03-01,358.47 159 | 1993-04-01,359.27 160 | 1993-05-01,360.19 161 | 1993-06-01,359.52 162 | 1993-07-01,357.33 163 | 1993-08-01,355.64 164 | 1993-09-01,354.03 165 | 1993-10-01,354.12 166 | 1993-11-01,355.41 167 | 1993-12-01,356.91 168 | 1994-01-01,358.24 169 | 1994-02-01,358.92 170 | 1994-03-01,359.99 171 | 1994-04-01,361.23 172 | 1994-05-01,361.65 173 | 1994-06-01,360.81 174 | 1994-07-01,359.38 175 | 1994-08-01,357.46 176 | 1994-09-01,355.73 177 | 1994-10-01,356.08 178 | 1994-11-01,357.53 179 | 1994-12-01,358.98 180 | 1995-01-01,359.92 181 | 1995-02-01,360.86 182 | 1995-03-01,361.83 183 | 1995-04-01,363.3 184 | 1995-05-01,363.69 185 | 1995-06-01,363.19 186 | 1995-07-01,361.64 187 | 1995-08-01,359.12 188 | 1995-09-01,358.17 189 | 1995-10-01,357.99 190 | 1995-11-01,359.45 191 | 1995-12-01,360.68 192 | 1996-01-01,362.07 193 | 1996-02-01,363.24 194 | 1996-03-01,364.17 195 | 1996-04-01,364.57 196 | 1996-05-01,365.13 197 | 1996-06-01,364.92 198 | 1996-07-01,363.55 199 | 1996-08-01,361.38 200 | 1996-09-01,359.54 201 | 1996-10-01,359.58 202 | 1996-11-01,360.89 203 | 1996-12-01,362.24 204 | 1997-01-01,363.09 205 | 1997-02-01,364.03 206 | 1997-03-01,364.51 207 | 1997-04-01,366.35 208 | 1997-05-01,366.64 209 | 1997-06-01,365.59 210 | 1997-07-01,364.31 211 | 1997-08-01,362.25 212 | 1997-09-01,360.29 213 | 1997-10-01,360.82 214 | 1997-11-01,362.49 215 | 1997-12-01,364.38 216 | 1998-01-01,365.27 217 | 1998-02-01,365.98 218 | 1998-03-01,367.24 219 | 1998-04-01,368.66 220 | 1998-05-01,369.42 221 | 1998-06-01,368.99 222 | 1998-07-01,367.82 223 | 1998-08-01,365.95 224 | 1998-09-01,364.02 225 | 1998-10-01,364.4 226 | 1998-11-01,365.52 227 | 1998-12-01,367.13 228 | 1999-01-01,368.18 229 | 1999-02-01,369.07 230 | 1999-03-01,369.68 231 | 1999-04-01,370.99 232 | 1999-05-01,370.96 233 | 1999-06-01,370.3 234 | 1999-07-01,369.45 235 | 1999-08-01,366.9 236 | 1999-09-01,364.81 237 | 1999-10-01,365.37 238 | 1999-11-01,366.72 239 | 1999-12-01,368.1 240 | 2000-01-01,369.29 241 | 2000-02-01,369.55 242 | 2000-03-01,370.6 243 | 2000-04-01,371.82 244 | 2000-05-01,371.58 245 | 2000-06-01,371.7 246 | 2000-07-01,369.86 247 | 2000-08-01,368.13 248 | 2000-09-01,367 249 | 2000-10-01,367.03 250 | 2000-11-01,368.37 251 | 2000-12-01,369.67 252 | 2001-01-01,370.59 253 | 2001-02-01,371.51 254 | 2001-03-01,372.43 255 | 2001-04-01,373.37 256 | 2001-05-01,373.85 257 | 2001-06-01,373.22 258 | 2001-07-01,371.5 259 | 2001-08-01,369.61 260 | 2001-09-01,368.18 261 | 2001-10-01,368.45 262 | 2001-11-01,369.76 263 | 2001-12-01,371.24 264 | 2002-01-01,372.53 265 | 2002-02-01,373.2 266 | 2002-03-01,374.12 267 | 2002-04-01,375.02 268 | 2002-05-01,375.76 269 | 2002-06-01,375.52 270 | 2002-07-01,374.01 271 | 2002-08-01,371.85 272 | 2002-09-01,370.75 273 | 2002-10-01,370.55 274 | 2002-11-01,372.25 275 | 2002-12-01,373.79 276 | 2003-01-01,374.88 277 | 2003-02-01,375.64 278 | 2003-03-01,376.45 279 | 2003-04-01,377.73 280 | 2003-05-01,378.6 281 | 2003-06-01,378.28 282 | 2003-07-01,376.7 283 | 2003-08-01,374.38 284 | 2003-09-01,373.17 285 | 2003-10-01,373.15 286 | 2003-11-01,374.66 287 | 2003-12-01,375.99 288 | 2004-01-01,377 289 | 2004-02-01,377.87 290 | 2004-03-01,378.88 291 | 2004-04-01,380.35 292 | 2004-05-01,380.62 293 | 2004-06-01,379.69 294 | 2004-07-01,377.47 295 | 2004-08-01,376.01 296 | 2004-09-01,374.25 297 | 2004-10-01,374.46 298 | 2004-11-01,376.16 299 | 2004-12-01,377.51 300 | 2005-01-01,378.46 301 | 2005-02-01,379.73 302 | 2005-03-01,380.77 303 | 2005-04-01,382.29 304 | 2005-05-01,382.45 305 | 2005-06-01,382.21 306 | 2005-07-01,380.74 307 | 2005-08-01,378.74 308 | 2005-09-01,376.7 309 | 2005-10-01,377 310 | 2005-11-01,378.35 311 | 2005-12-01,380.11 312 | 2006-01-01,381.38 313 | 2006-02-01,382.2 314 | 2006-03-01,382.67 315 | 2006-04-01,384.61 316 | 2006-05-01,385.03 317 | 2006-06-01,384.05 318 | 2006-07-01,382.46 319 | 2006-08-01,380.41 320 | 2006-09-01,378.85 321 | 2006-10-01,379.13 322 | 2006-11-01,380.15 323 | 2006-12-01,381.82 324 | 2007-01-01,382.89 325 | 2007-02-01,383.9 326 | 2007-03-01,384.58 327 | 2007-04-01,386.5 328 | 2007-05-01,386.56 329 | 2007-06-01,386.1 330 | 2007-07-01,384.5 331 | 2007-08-01,381.99 332 | 2007-09-01,380.96 333 | 2007-10-01,381.12 334 | 2007-11-01,382.45 335 | 2007-12-01,383.94 336 | 2008-01-01,385.52 337 | 2008-02-01,385.82 338 | 2008-03-01,386.03 339 | 2008-04-01,387.21 340 | 2008-05-01,388.54 341 | 2008-06-01,387.76 342 | 2008-07-01,386.37 343 | 2008-08-01,384.09 344 | 2008-09-01,383.18 345 | 2008-10-01,382.99 346 | 2008-11-01,384.19 347 | 2008-12-01,385.56 348 | 2009-01-01,386.94 349 | 2009-02-01,387.48 350 | 2009-03-01,388.82 351 | 2009-04-01,389.55 352 | 2009-05-01,390.14 353 | 2009-06-01,389.48 354 | 2009-07-01,388.03 355 | 2009-08-01,386.11 356 | 2009-09-01,384.74 357 | 2009-10-01,384.43 358 | 2009-11-01,386.02 359 | 2009-12-01,387.42 360 | 2010-01-01,388.71 361 | 2010-02-01,390.2 362 | 2010-03-01,391.17 363 | 2010-04-01,392.46 364 | 2010-05-01,393 365 | 2010-06-01,392.15 366 | 2010-07-01,390.2 367 | 2010-08-01,388.35 368 | 2010-09-01,386.85 369 | 2010-10-01,387.24 370 | 2010-11-01,388.67 371 | 2010-12-01,389.79 372 | 2011-01-01,391.33 373 | 2011-02-01,391.86 374 | 2011-03-01,392.6 375 | 2011-04-01,393.25 376 | 2011-05-01,394.19 377 | 2011-06-01,393.74 378 | 2011-07-01,392.51 379 | 2011-08-01,390.13 380 | 2011-09-01,389.08 381 | 2011-10-01,389 382 | 2011-11-01,390.28 383 | 2011-12-01,391.86 384 | 2012-01-01,393.12 385 | 2012-02-01,393.86 386 | 2012-03-01,394.4 387 | 2012-04-01,396.18 388 | 2012-05-01,396.74 389 | 2012-06-01,395.71 390 | 2012-07-01,394.36 391 | 2012-08-01,392.39 392 | 2012-09-01,391.11 393 | 2012-10-01,391.05 394 | 2012-11-01,392.98 395 | 2012-12-01,394.34 396 | 2013-01-01,395.55 397 | 2013-02-01,396.8 398 | 2013-03-01,397.43 399 | 2013-04-01,398.41 400 | 2013-05-01,399.78 401 | 2013-06-01,398.6 402 | 2013-07-01,397.32 403 | 2013-08-01,395.2 404 | 2013-09-01,393.45 405 | 2013-10-01,393.7 406 | 2013-11-01,395.16 407 | 2013-12-01,396.84 408 | 2014-01-01,397.85 409 | 2014-02-01,398.01 410 | 2014-03-01,399.77 411 | 2014-04-01,401.38 412 | 2014-05-01,401.78 413 | 2014-06-01,401.25 414 | 2014-07-01,399.1 415 | 2014-08-01,397.03 416 | 2014-09-01,395.38 417 | 2014-10-01,396.03 418 | 2014-11-01,397.28 419 | 2014-12-01,398.91 420 | 2015-01-01,399.98 421 | 2015-02-01,400.28 422 | 2015-03-01,401.54 423 | 2015-04-01,403.28 424 | 2015-05-01,403.96 425 | 2015-06-01,402.8 426 | 2015-07-01,401.31 427 | 2015-08-01,398.93 428 | 2015-09-01,397.63 429 | 2015-10-01,398.29 430 | 2015-11-01,400.16 431 | 2015-12-01,401.85 432 | 2016-01-01,402.56 433 | 2016-02-01,404.12 434 | 2016-03-01,404.87 435 | 2016-04-01,407.45 436 | 2016-05-01,407.72 437 | 2016-06-01,406.83 438 | 2016-07-01,404.41 439 | 2016-08-01,402.27 440 | 2016-09-01,401.05 441 | 2016-10-01,401.59 442 | 2016-11-01,403.55 443 | 2016-12-01,404.45 444 | 2017-01-01,406.17 445 | 2017-02-01,406.46 446 | 2017-03-01,407.22 447 | 2017-04-01,409.04 448 | 2017-05-01,409.69 449 | 2017-06-01,408.88 450 | 2017-07-01,407.12 451 | 2017-08-01,405.13 452 | 2017-09-01,403.37 453 | 2017-10-01,403.63 454 | 2017-11-01,405.12 455 | 2017-12-01,406.81 456 | 2018-01-01,407.96 457 | 2018-02-01,408.32 458 | 2018-03-01,409.41 459 | 2018-04-01,410.24 460 | 2018-05-01,411.24 461 | 2018-06-01,410.79 462 | 2018-07-01,408.71 463 | 2018-08-01,406.99 464 | 2018-09-01,405.51 465 | 2018-10-01,406 466 | 2018-11-01,408.02 467 | 2018-12-01,409.07 468 | 2019-01-01,410.83 469 | 2019-02-01,411.75 470 | 2019-03-01,411.97 471 | 2019-04-01,413.33 472 | 2019-05-01,414.64 473 | 2019-06-01,413.93 474 | 2019-07-01,411.74 475 | 2019-08-01,409.95 476 | 2019-09-01,408.54 477 | 2019-10-01,408.52 478 | 2019-11-01,410.25 479 | 2019-12-01,411.76 480 | 2020-01-01,413.39 481 | 2020-02-01,414.11 482 | 2020-03-01,414.51 483 | 2020-04-01,416.21 484 | 2020-05-01,417.07 485 | 2020-06-01,416.39 486 | -------------------------------------------------------------------------------- /data/michelson.csv: -------------------------------------------------------------------------------- 1 | Expt,Run,Speed 2 | 1,1,850 3 | 1,2,740 4 | 1,3,900 5 | 1,4,1070 6 | 1,5,930 7 | 1,6,850 8 | 1,7,950 9 | 1,8,980 10 | 1,9,980 11 | 1,10,880 12 | 1,11,1000 13 | 1,12,980 14 | 1,13,930 15 | 1,14,650 16 | 1,15,760 17 | 1,16,810 18 | 1,17,1000 19 | 1,18,1000 20 | 1,19,960 21 | 1,20,960 22 | 2,1,960 23 | 2,2,940 24 | 2,3,960 25 | 2,4,940 26 | 2,5,880 27 | 2,6,800 28 | 2,7,850 29 | 2,8,880 30 | 2,9,900 31 | 2,10,840 32 | 2,11,830 33 | 2,12,790 34 | 2,13,810 35 | 2,14,880 36 | 2,15,880 37 | 2,16,830 38 | 2,17,800 39 | 2,18,790 40 | 2,19,760 41 | 2,20,800 42 | 3,1,880 43 | 3,2,880 44 | 3,3,880 45 | 3,4,860 46 | 3,5,720 47 | 3,6,720 48 | 3,7,620 49 | 3,8,860 50 | 3,9,970 51 | 3,10,950 52 | 3,11,880 53 | 3,12,910 54 | 3,13,850 55 | 3,14,870 56 | 3,15,840 57 | 3,16,840 58 | 3,17,850 59 | 3,18,840 60 | 3,19,840 61 | 3,20,840 62 | 4,1,890 63 | 4,2,810 64 | 4,3,810 65 | 4,4,820 66 | 4,5,800 67 | 4,6,770 68 | 4,7,760 69 | 4,8,740 70 | 4,9,750 71 | 4,10,760 72 | 4,11,910 73 | 4,12,920 74 | 4,13,890 75 | 4,14,860 76 | 4,15,880 77 | 4,16,720 78 | 4,17,840 79 | 4,18,850 80 | 4,19,850 81 | 4,20,780 82 | 5,1,890 83 | 5,2,840 84 | 5,3,780 85 | 5,4,810 86 | 5,5,760 87 | 5,6,810 88 | 5,7,790 89 | 5,8,810 90 | 5,9,820 91 | 5,10,850 92 | 5,11,870 93 | 5,12,870 94 | 5,13,810 95 | 5,14,740 96 | 5,15,810 97 | 5,16,940 98 | 5,17,950 99 | 5,18,800 100 | 5,19,810 101 | 5,20,870 102 | -------------------------------------------------------------------------------- /data/mtcars.csv: -------------------------------------------------------------------------------- 1 | mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb 2 | 21.0,6.0,160.0,110.0,3.9,2.62,16.46,0.0,1.0,4.0,4.0 3 | 21.0,6.0,160.0,110.0,3.9,2.875,17.02,0.0,1.0,4.0,4.0 4 | 22.8,4.0,108.0,93.0,3.85,2.32,18.61,1.0,1.0,4.0,1.0 5 | 21.4,6.0,258.0,110.0,3.08,3.215,19.44,1.0,0.0,3.0,1.0 6 | 18.7,8.0,360.0,175.0,3.15,3.44,17.02,0.0,0.0,3.0,2.0 7 | 18.1,6.0,225.0,105.0,2.76,3.46,20.22,1.0,0.0,3.0,1.0 8 | 14.3,8.0,360.0,245.0,3.21,3.57,15.84,0.0,0.0,3.0,4.0 9 | 24.4,4.0,146.7,62.0,3.69,3.19,20.0,1.0,0.0,4.0,2.0 10 | 22.8,4.0,140.8,95.0,3.92,3.15,22.9,1.0,0.0,4.0,2.0 11 | 19.2,6.0,167.6,123.0,3.92,3.44,18.3,1.0,0.0,4.0,4.0 12 | 17.8,6.0,167.6,123.0,3.92,3.44,18.9,1.0,0.0,4.0,4.0 13 | 16.4,8.0,275.8,180.0,3.07,4.07,17.4,0.0,0.0,3.0,3.0 14 | 17.3,8.0,275.8,180.0,3.07,3.73,17.6,0.0,0.0,3.0,3.0 15 | 15.2,8.0,275.8,180.0,3.07,3.78,18.0,0.0,0.0,3.0,3.0 16 | 10.4,8.0,472.0,205.0,2.93,5.25,17.98,0.0,0.0,3.0,4.0 17 | 10.4,8.0,460.0,215.0,3.0,5.424,17.82,0.0,0.0,3.0,4.0 18 | 14.7,8.0,440.0,230.0,3.23,5.345,17.42,0.0,0.0,3.0,4.0 19 | 32.4,4.0,78.7,66.0,4.08,2.2,19.47,1.0,1.0,4.0,1.0 20 | 30.4,4.0,75.7,52.0,4.93,1.615,18.52,1.0,1.0,4.0,2.0 21 | 33.9,4.0,71.1,65.0,4.22,1.835,19.9,1.0,1.0,4.0,1.0 22 | 21.5,4.0,120.1,97.0,3.7,2.465,20.01,1.0,0.0,3.0,1.0 23 | 15.5,8.0,318.0,150.0,2.76,3.52,16.87,0.0,0.0,3.0,2.0 24 | 15.2,8.0,304.0,150.0,3.15,3.435,17.3,0.0,0.0,3.0,2.0 25 | 13.3,8.0,350.0,245.0,3.73,3.84,15.41,0.0,0.0,3.0,4.0 26 | 19.2,8.0,400.0,175.0,3.08,3.845,17.05,0.0,0.0,3.0,2.0 27 | 27.3,4.0,79.0,66.0,4.08,1.935,18.9,1.0,1.0,4.0,1.0 28 | 26.0,4.0,120.3,91.0,4.43,2.14,16.7,0.0,1.0,5.0,2.0 29 | 30.4,4.0,95.1,113.0,3.77,1.513,16.9,1.0,1.0,5.0,2.0 30 | 15.8,8.0,351.0,264.0,4.22,3.17,14.5,0.0,1.0,5.0,4.0 31 | 19.7,6.0,145.0,175.0,3.62,2.77,15.5,0.0,1.0,5.0,6.0 32 | 15.0,8.0,301.0,335.0,3.54,3.57,14.6,0.0,1.0,5.0,8.0 33 | 21.4,4.0,121.0,109.0,4.11,2.78,18.6,1.0,1.0,4.0,2.0 34 | -------------------------------------------------------------------------------- /data/penguins.csv: -------------------------------------------------------------------------------- 1 | bill_length_mm,flipper_length_mm 2 | 39.2,196 3 | 36.5,182 4 | 34.5,187 5 | 36.7,187 6 | 38.1,181 7 | 39.2,190 8 | 36,195 9 | 37.8,193 10 | 46.5,213 11 | 46.1,215 12 | 47.8,215 13 | 45,220 14 | 49.1,212 15 | 43.3,208 16 | 46,195 17 | 46.7,195 18 | 52.2,197 19 | 46.8,189 20 | -------------------------------------------------------------------------------- /data/penguins_all_vars.csv: -------------------------------------------------------------------------------- 1 | island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex,year,cluster 2 | Dream,39.2,21.1,196,4150,male,2007,1 3 | Dream,36.5,18,182,3150,female,2007,1 4 | Biscoe,34.5,18.1,187,2900,female,2008,1 5 | Torgersen,36.7,18.8,187,3800,female,2008,1 6 | Biscoe,38.1,17,181,3175,female,2009,1 7 | Dream,39.2,18.6,190,4250,male,2009,1 8 | Dream,36,17.8,195,3450,female,2009,1 9 | Dream,37.8,18.1,193,3750,male,2009,1 10 | Biscoe,46.5,14.5,213,4400,female,2007,3 11 | Biscoe,46.1,15.1,215,5100,male,2007,3 12 | Biscoe,47.8,15,215,5650,male,2007,3 13 | Biscoe,45,15.4,220,5050,male,2008,3 14 | Biscoe,49.1,14.5,212,4625,female,2009,3 15 | Biscoe,43.3,14,208,4575,female,2009,3 16 | Dream,46,18.9,195,4150,female,2007,2 17 | Dream,46.7,17.9,195,3300,female,2007,2 18 | Dream,52.2,18.8,197,3450,male,2009,2 19 | Dream,46.8,16.5,189,3650,female,2009,2 20 | -------------------------------------------------------------------------------- /data/region_data.csv: -------------------------------------------------------------------------------- 1 | region,households,area,population,dwellings 2 | Belleville,43002,1354.65121,103472,45050 3 | Lethbridge,45696,3046.69699,117394,48317 4 | Thunder Bay,52545,2618.26318,121621,57146 5 | Peterborough,50533,1636.98336,121721,55662 6 | Saint John,52872,3793.42158,126202,58398 7 | Brantford,52530,1086.27106,134203,54419 8 | Moncton,61769,2625.1211,144810,66699 9 | Guelph,59280,604.00365,151984,63324 10 | Trois-Rivières,72502,1052.80206,156042,77734 11 | Saguenay,72479,3078.79919,160980,77968 12 | Kingston,67915,2142.32855,161175,77173 13 | Greater Sudbury,70445,4372.1229,164689,76619 14 | Abbotsford - Mission,62631,651.99511,180518,65967 15 | Kelowna,81383,3144.90019,194882,88374 16 | Barrie,72534,967.67675,197059,76336 17 | St. John's,85015,850.46041,205955,92353 18 | Sherbrooke,95577,1506.36002,212105,106082 19 | Regina,94955,4408.86418,236481,101719 20 | Saskatoon,115283,6218.50503,295095,124766 21 | Windsor,132912,1032.38176,329144,140408 22 | Victoria,162716,704.4339,367770,172559 23 | Oshawa,138962,908.06142,379848,142462 24 | Halifax,173459,5963.13705,403390,187478 25 | St. Catharines - Niagara,168485,1425.34399,406074,180606 26 | London,206448,2677.86088,494069,220452 27 | Kitchener - Cambridge - Waterloo,200495,1106.65072,523894,210896 28 | Hamilton,293345,1404.6567,747545,306034 29 | Winnipeg,306550,5410.82907,778489,321484 30 | Québec,361891,3475.38576,800296,382308 31 | Edmonton,502143,9857.77908,1321426,537634 32 | Ottawa - Gatineau,535499,7168.96442,1323783,571146 33 | Calgary,519693,5241.70103,1392609,544870 34 | Vancouver,960894,3040.41532,2463431,1027613 35 | Montréal,1727310,4638.24059,4098927,1823281 36 | Toronto,2135909,6269.93132,5928040,2235145 37 | -------------------------------------------------------------------------------- /data/state_property_vote.csv: -------------------------------------------------------------------------------- 1 | state,pop,med_prop_val,med_income,avg_commute,party 2 | Montana,1042520,217200,46608,16.35,Republican 3 | Alabama,4863300,136200,42917,23.78,Republican 4 | Arizona,6931071,205900,50036,23.69,Republican 5 | Arkansas,2988248,123300,41335,20.49,Republican 6 | California,39250017,477500,61927,27.67,Democratic 7 | Colorado,5540545,314200,61324,23.02,Democratic 8 | Connecticut,3576452,274600,70007,24.92,Democratic 9 | Delaware,952065,243400,59853,24.97,Democratic 10 | District of Columbia,681170,576100,75506,28.96,Democratic 11 | Florida,20612439,197700,47439,25.8,Republican 12 | Georgia,10310371,166800,49240,26.91,Republican 13 | Hawaii,1428557,592000,69549,26.03,Democratic 14 | Idaho,1683140,189400,47572,19.71,Republican 15 | Illinois,12801539,186500,57458,27.49,Democratic 16 | Indiana,6633053,134800,49384,22.66,Republican 17 | Iowa,3134693,142300,53816,18.11,Republican 18 | Kansas,2907289,144900,52392,18.52,Republican 19 | Kentucky,4436974,135600,42914,22.4,Republican 20 | Louisiana,4681666,158000,44680,24.24,Republican 21 | Maine,1331479,184700,49381,22.52,Democratic 22 | Maryland,6016447,306900,73851,31.26,Democratic 23 | Massachusetts,6811779,366900,69200,28.05,Democratic 24 | Michigan,9928300,147100,49755,23.49,Republican 25 | Minnesota,5519952,211800,61473,22.08,Democratic 26 | Mississippi,2988726,113900,39738,23.77,Republican 27 | Missouri,6093000,151400,48288,22.45,Republican 28 | Alaska,741894,267800,70898,17.03,Republican 29 | Nebraska,1907116,148100,52803,17.78,Republican 30 | Nevada,2940058,239500,51487,23.07,Democratic 31 | New Hampshire,1334795,251100,66469,25.25,Democratic 32 | New Jersey,8944469,328200,71968,30.28,Democratic 33 | New Mexico,2081015,167500,44905,20.81,Democratic 34 | New York,19745289,302400,58771,31.95,Democratic 35 | North Carolina,10146788,165400,46596,23.07,Republican 36 | North Dakota,757953,184100,60227,16.5,Republican 37 | Ohio,11614373,140100,49349,22.43,Republican 38 | Oklahoma,3923561,132200,47524,20.47,Republican 39 | Oregon,4093465,287100,51088,22.29,Democratic 40 | Pennsylvania,12784227,174100,53224,25.61,Republican 41 | Rhode Island,1056426,247700,54797,23.78,Democratic 42 | South Carolina,4961119,153900,45337,23.67,Republican 43 | South Dakota,865454,160700,51059,15.73,Republican 44 | Tennessee,6651194,157700,44357,23.9,Republican 45 | Texas,27862596,161500,53067,25.2,Republican 46 | Utah,3051217,250300,60943,20.31,Republican 47 | Vermont,624594,223700,54153,21.49,Democratic 48 | Virginia,8411808,264000,64923,27.03,Democratic 49 | Washington,7288000,306400,61358,26.21,Democratic 50 | West Virginia,1831102,117900,41030,24.36,Republican 51 | Wisconsin,5778709,173200,52632,20.89,Republican 52 | Wyoming,585501,209500,58291,15.94,Republican 53 | Puerto Rico,3411307,111900,20078,28.36,Not Applicable -------------------------------------------------------------------------------- /data/state_property_vote.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/data/state_property_vote.db -------------------------------------------------------------------------------- /data/state_property_vote.tsv: -------------------------------------------------------------------------------- 1 | Montana 1042520 217200 46608 16.35 Republican 2 | Alabama 4863300 136200 42917 23.78 Republican 3 | Arizona 6931071 205900 50036 23.69 Republican 4 | Arkansas 2988248 123300 41335 20.49 Republican 5 | California 39250017 477500 61927 27.67 Democratic 6 | Colorado 5540545 314200 61324 23.02 Democratic 7 | Connecticut 3576452 274600 70007 24.92 Democratic 8 | Delaware 952065 243400 59853 24.97 Democratic 9 | District of Columbia 681170 576100 75506 28.96 Democratic 10 | Florida 20612439 197700 47439 25.8 Republican 11 | Georgia 10310371 166800 49240 26.91 Republican 12 | Hawaii 1428557 592000 69549 26.03 Democratic 13 | Idaho 1683140 189400 47572 19.71 Republican 14 | Illinois 12801539 186500 57458 27.49 Democratic 15 | Indiana 6633053 134800 49384 22.66 Republican 16 | Iowa 3134693 142300 53816 18.11 Republican 17 | Kansas 2907289 144900 52392 18.52 Republican 18 | Kentucky 4436974 135600 42914 22.4 Republican 19 | Louisiana 4681666 158000 44680 24.24 Republican 20 | Maine 1331479 184700 49381 22.52 Democratic 21 | Maryland 6016447 306900 73851 31.26 Democratic 22 | Massachusetts 6811779 366900 69200 28.05 Democratic 23 | Michigan 9928300 147100 49755 23.49 Republican 24 | Minnesota 5519952 211800 61473 22.08 Democratic 25 | Mississippi 2988726 113900 39738 23.77 Republican 26 | Missouri 6093000 151400 48288 22.45 Republican 27 | Alaska 741894 267800 70898 17.03 Republican 28 | Nebraska 1907116 148100 52803 17.78 Republican 29 | Nevada 2940058 239500 51487 23.07 Democratic 30 | New Hampshire 1334795 251100 66469 25.25 Democratic 31 | New Jersey 8944469 328200 71968 30.28 Democratic 32 | New Mexico 2081015 167500 44905 20.81 Democratic 33 | New York 19745289 302400 58771 31.95 Democratic 34 | North Carolina 10146788 165400 46596 23.07 Republican 35 | North Dakota 757953 184100 60227 16.5 Republican 36 | Ohio 11614373 140100 49349 22.43 Republican 37 | Oklahoma 3923561 132200 47524 20.47 Republican 38 | Oregon 4093465 287100 51088 22.29 Democratic 39 | Pennsylvania 12784227 174100 53224 25.61 Republican 40 | Rhode Island 1056426 247700 54797 23.78 Democratic 41 | South Carolina 4961119 153900 45337 23.67 Republican 42 | South Dakota 865454 160700 51059 15.73 Republican 43 | Tennessee 6651194 157700 44357 23.9 Republican 44 | Texas 27862596 161500 53067 25.2 Republican 45 | Utah 3051217 250300 60943 20.31 Republican 46 | Vermont 624594 223700 54153 21.49 Democratic 47 | Virginia 8411808 264000 64923 27.03 Democratic 48 | Washington 7288000 306400 61358 26.21 Democratic 49 | West Virginia 1831102 117900 41030 24.36 Republican 50 | Wisconsin 5778709 173200 52632 20.89 Republican 51 | Wyoming 585501 209500 58291 15.94 Republican 52 | Puerto Rico 3411307 111900 20078 28.36 Not Applicable 53 | -------------------------------------------------------------------------------- /data/state_property_vote.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/data/state_property_vote.xlsx -------------------------------------------------------------------------------- /data/state_property_vote_meta-data.csv: -------------------------------------------------------------------------------- 1 | Data source: https://datausa.io/ 2 | Record of how data was collected: https://github.com/UBC-DSCI/introduction-to-datascience/blob/master/data/retrieve_data.ipynb 3 | Date collected: 2020-07-08 4 | state,pop,med_prop_val,med_income,avg_commute,party 5 | Montana,1042520,217200,46608,16.35,Republican 6 | Alabama,4863300,136200,42917,23.78,Republican 7 | Arizona,6931071,205900,50036,23.69,Republican 8 | Arkansas,2988248,123300,41335,20.49,Republican 9 | California,39250017,477500,61927,27.67,Democratic 10 | Colorado,5540545,314200,61324,23.02,Democratic 11 | Connecticut,3576452,274600,70007,24.92,Democratic 12 | Delaware,952065,243400,59853,24.97,Democratic 13 | District of Columbia,681170,576100,75506,28.96,Democratic 14 | Florida,20612439,197700,47439,25.8,Republican 15 | Georgia,10310371,166800,49240,26.91,Republican 16 | Hawaii,1428557,592000,69549,26.03,Democratic 17 | Idaho,1683140,189400,47572,19.71,Republican 18 | Illinois,12801539,186500,57458,27.49,Democratic 19 | Indiana,6633053,134800,49384,22.66,Republican 20 | Iowa,3134693,142300,53816,18.11,Republican 21 | Kansas,2907289,144900,52392,18.52,Republican 22 | Kentucky,4436974,135600,42914,22.4,Republican 23 | Louisiana,4681666,158000,44680,24.24,Republican 24 | Maine,1331479,184700,49381,22.52,Democratic 25 | Maryland,6016447,306900,73851,31.26,Democratic 26 | Massachusetts,6811779,366900,69200,28.05,Democratic 27 | Michigan,9928300,147100,49755,23.49,Republican 28 | Minnesota,5519952,211800,61473,22.08,Democratic 29 | Mississippi,2988726,113900,39738,23.77,Republican 30 | Missouri,6093000,151400,48288,22.45,Republican 31 | Alaska,741894,267800,70898,17.03,Republican 32 | Nebraska,1907116,148100,52803,17.78,Republican 33 | Nevada,2940058,239500,51487,23.07,Democratic 34 | New Hampshire,1334795,251100,66469,25.25,Democratic 35 | New Jersey,8944469,328200,71968,30.28,Democratic 36 | New Mexico,2081015,167500,44905,20.81,Democratic 37 | New York,19745289,302400,58771,31.95,Democratic 38 | North Carolina,10146788,165400,46596,23.07,Republican 39 | North Dakota,757953,184100,60227,16.5,Republican 40 | Ohio,11614373,140100,49349,22.43,Republican 41 | Oklahoma,3923561,132200,47524,20.47,Republican 42 | Oregon,4093465,287100,51088,22.29,Democratic 43 | Pennsylvania,12784227,174100,53224,25.61,Republican 44 | Rhode Island,1056426,247700,54797,23.78,Democratic 45 | South Carolina,4961119,153900,45337,23.67,Republican 46 | South Dakota,865454,160700,51059,15.73,Republican 47 | Tennessee,6651194,157700,44357,23.9,Republican 48 | Texas,27862596,161500,53067,25.2,Republican 49 | Utah,3051217,250300,60943,20.31,Republican 50 | Vermont,624594,223700,54153,21.49,Democratic 51 | Virginia,8411808,264000,64923,27.03,Democratic 52 | Washington,7288000,306400,61358,26.21,Democratic 53 | West Virginia,1831102,117900,41030,24.36,Republican 54 | Wisconsin,5778709,173200,52632,20.89,Republican 55 | Wyoming,585501,209500,58291,15.94,Republican 56 | Puerto Rico,3411307,111900,20078,28.36,Not Applicable 57 | -------------------------------------------------------------------------------- /data/us_vote.csv: -------------------------------------------------------------------------------- 1 | election_num,election_year,winner,winner_party,elec_coll_votes_count,elec_coll_votes_tot,elec_coll_votes_perc,pop_votes_perc,pop_votes_perc_marg,pop_votes_count,pop_votes_count_marg,runnerup,runnerup_party,turnout 2 | 1,1788,George Washington,Ind.,69,69,100.00,100.00,100.00,43782,43782,No candidate,None,11.6 3 | 2,1792,George Washington,Ind.,132,132,100.00,100.00,100.00,28579,28579,No candidate,None,6.3 4 | 3,1796,John Adams,Fed.,71,138,51.45,53.45,6.90,35726,4611,Thomas Jefferson,D.-R.,20.1 5 | 4,1800,Thomas Jefferson,D.-R.,73,138,52.90,61.43,22.86,41330,15378,John Adams,Fed.,32.3 6 | 5,1804,Thomas Jefferson,D.-R.,162,176,92.05,72.79,45.58,104110,65191,Charles C. Pinckney,Fed.,23.8 7 | 6,1808,James Madison,D.-R.,122,175,69.72,64.73,32.33,124732,62301,Charles C. Pinckney,Fed.,36.8 8 | 7,1812,James Madison,D.-R.,128,217,58.99,50.37,2.74,140431,7650,DeWitt Clinton,D.-R.,40.4 9 | 8,1816,James Monroe,D.-R.,183,217,84.33,68.16,37.24,76592,41852,Rufus King,Fed.,23.5 10 | 9,1820,James Monroe,D.-R.,231,232,99.57,80.61,64.69,87343,69878,No candidate,Fed.,10.1 11 | 10,1824,John Quincy Adams,D.-R.,84,261,32.18,30.92,−10.44,113142,−38221,Andrew Jackson,D.-R.,26.9 12 | 11,1828,Andrew Jackson,Dem.,178,261,68.20,55.93,12.25,642806,140839,John Quincy Adams,N. R.,57.3 13 | 12,1832,Andrew Jackson,Dem.,219,286,76.57,54.74,17.81,702735,228628,Henry Clay,N. R.,57.0 14 | 13,1836,Martin Van Buren,Dem.,170,294,57.82,50.79,14.20,763291,213384,William Henry Harrison,Whig,56.5 15 | 14,1840,William Henry Harrison,Whig,234,294,79.59,52.87,6.05,1275583,145938,Martin Van Buren,Dem.,80.3 16 | 15,1844,James Polk,Dem.,170,275,61.82,49.54,1.45,1339570,39413,Henry Clay,Whig,79.2 17 | 16,1848,Zachary Taylor,Whig,163,290,56.21,47.28,4.79,1360235,137882,Lewis Cass,Dem.,72.8 18 | 17,1852,Franklin Pierce,Dem.,254,296,85.81,50.83,6.95,1605943,219525,Winfield Scott,Whig,69.5 19 | 18,1856,James Buchanan,Dem.,174,296,58.78,45.29,12.20,1835140,494472,John Frémont,Rep.,79.4 20 | 19,1860,Abraham Lincoln,Rep.,180,303,59.41,39.65,10.13,1855993,474049,John Breckinridge,Dem.,81.8 21 | 20,1864,Abraham Lincoln,Rep.,212,233,90.99,55.03,10.08,2211317,405090,George McClellan,Dem.,76.3 22 | 21,1868,Ulysses Grant,Rep.,214,294,72.79,52.66,5.32,3013790,304810,Horatio Seymour,Dem.,80.9 23 | 22,1872,Ulysses Grant,Rep.,286,352,81.25,55.58,11.80,3597439,763729,Horace Greeley,L. R.,72.1 24 | 23,1876,Rutherford Hayes,Rep.,185,369,50.14,47.92,−3.00,4034142,−252666,Samuel Tilden,Dem.,82.6 25 | 24,1880,James Garfield,Rep.,214,369,57.99,48.31,0.09,4453337,1898,Winfield Scott Hancock,Dem.,80.5 26 | 25,1884,Grover Cleveland,Dem.,219,401,54.61,48.85,0.57,4914482,57579,James Blaine,Rep.,78.2 27 | 26,1888,Benjamin Harrison,Rep.,233,401,58.10,47.80,−0.83,5443892,−90596,Grover Cleveland,Dem.,80.5 28 | 27,1892,Grover Cleveland,Dem.,277,444,62.39,46.02,3.01,5553898,363099,Benjamin Harrison,Rep.,75.8 29 | 28,1896,William McKinley,Rep.,271,447,60.63,51.02,4.31,7112138,601331,William Jennings Bryan,Dem.,79.6 30 | 29,1900,William McKinley,Rep.,292,447,65.23,51.64,6.12,7228864,857932,William Jennings Bryan,Dem.,73.7 31 | 30,1904,Theodore Roosevelt,Rep.,336,476,70.59,56.42,18.83,7630557,2546677,Alton Brooks Parker,Dem.,65.5 32 | 31,1908,William Taft,Rep.,321,483,66.46,51.57,8.53,7678335,1269356,William Jennings Bryan,Dem.,65.7 33 | 32,1912,Woodrow Wilson,Dem.,435,531,81.92,41.84,14.44,6296284,2173563,Theodore Roosevelt,Prog.,59.0 34 | 33,1916,Woodrow Wilson,Dem.,277,531,52.17,49.24,3.12,9126868,578140,Charles Evans Hughes,Rep.,61.8 35 | 34,1920,Warren Harding,Rep.,404,531,76.08,60.32,26.17,16144093,7004432,James Cox,Dem.,49.2 36 | 35,1924,Calvin Coolidge,Rep.,382,531,71.94,54.04,25.22,15723789,7337547,John Davis,Dem.,48.9 37 | 36,1928,Herbert Hoover,Rep.,444,531,83.62,58.21,17.41,21427123,6411659,Al Smith,Dem.,56.9 38 | 37,1932,Franklin Roosevelt,Dem.,472,531,88.89,57.41,17.76,22821277,7060023,Herbert Hoover,Rep.,56.9 39 | 38,1936,Franklin Roosevelt,Dem.,523,531,98.49,60.80,24.26,27752648,11070786,Alf Landon,Rep.,61.0 40 | 39,1940,Franklin Roosevelt,Dem.,449,531,84.56,54.74,9.96,27313945,4966201,Wendell Willkie,Rep.,62.4 41 | 40,1944,Franklin Roosevelt,Dem.,432,531,81.36,53.39,7.50,25612916,3594987,Thomas Dewey,Rep.,55.9 42 | 41,1948,Harry Truman,Dem.,303,531,57.06,49.55,4.48,24179347,2188055,Thomas Dewey,Rep.,52.2 43 | 42,1952,Dwight Eisenhower,Rep.,442,531,83.24,55.18,10.85,34075529,6700439,Adlai Stevenson,Dem.,62.3 44 | 43,1956,Dwight Eisenhower,Rep.,457,531,86.06,57.37,15.40,35579180,9551152,Adlai Stevenson,Dem.,60.2 45 | 44,1960,John F. Kennedy,Dem.,303,537,56.42,49.72,0.17,34220984,112827,Richard Nixon,Rep.,63.8 46 | 45,1964,Lyndon Johnson,Dem.,486,538,90.33,61.05,22.58,43127041,15951287,Barry Goldwater,Rep.,62.8 47 | 46,1968,Richard Nixon,Rep.,301,538,55.95,43.42,0.70,31783783,511944,Hubert Humphrey,Dem.,62.5 48 | 47,1972,Richard Nixon,Rep.,520,538,96.65,60.67,23.15,47168710,17995488,George McGovern,Dem.,56.2 49 | 48,1976,Jimmy Carter,Dem.,297,538,55.20,50.08,2.06,40831881,1683247,Gerald Ford,Rep.,54.8 50 | 49,1980,Ronald Reagan,Rep.,489,538,90.89,50.75,9.74,43903230,8423115,Jimmy Carter,Dem.,54.2 51 | 50,1984,Ronald Reagan,Rep.,525,538,97.58,58.77,18.21,54455472,16878120,Walter Mondale,Dem.,55.2 52 | 51,1988,George H. W. Bush,Rep.,426,538,79.18,53.37,7.72,48886597,7077121,Michael Dukakis,Dem.,52.8 53 | 52,1992,Bill Clinton,Dem.,370,538,68.77,43.01,5.56,44909806,5805256,George H. W. Bush,Rep.,58.1 54 | 53,1996,Bill Clinton,Dem.,379,538,70.45,49.23,8.51,47400125,8201370,Bob Dole,Rep.,51.7 55 | 54,2000,George W. Bush,Rep.,271,538,50.37,47.87,−0.51,50460110,−543816,Al Gore,Dem.,54.2 56 | 55,2004,George W. Bush,Rep.,286,538,53.16,50.73,2.46,62040610,3012171,John Kerry,Dem.,60.1 57 | 56,2008,Barack Obama,Dem.,365,538,67.84,52.93,7.27,69498516,9550193,John McCain,Rep.,61.6 58 | 57,2012,Barack Obama,Dem.,332,538,61.71,51.06,3.86,65915795,4982291,Mitt Romney,Rep.,58.6 59 | 58,2016,Donald Trump,Rep.,304,538,56.50,46.09,−2.09,62984828,−2868686,Hillary Clinton,Dem.,60.2 60 | -------------------------------------------------------------------------------- /data/wdbc_missing.csv: -------------------------------------------------------------------------------- 1 | ID,Class,Radius,Texture,Perimeter,Area,Smoothness,Compactness,Concavity,Concave_Points,Symmetry,Fractal_Dimension 2 | 842302,M,,,1.2688172627037921,0.983509520104142,1.5670874574786582,3.2806280641246857,2.650541786383573,2.530248864134298,2.215565541846305,2.25376381072807 3 | 842517,M,1.8282119737343598,-0.3533215225500966,1.684472552277101,1.9070302686337925,-0.826235446757039,-0.486643477616135,-0.023824891805531347,0.5476622708254778,0.001391139243576388,-0.8678888068037953 4 | 84300903,M,1.5784992020342323,,1.5651259839837746,1.5575131853441093,0.941382123037953,1.051999895332493,1.362279788963212,2.0354397832616953,0.9388587199172193,-0.39765801323729066 5 | 84348301,M,-0.7682333229203782,0.25350905052192196,-0.5921661228907633,-0.7637917361139566,3.280666839299224,3.3999174223523045,1.9142128745181868,1.4504311303550237,2.864862154141668,4.906601992505377 6 | 84358402,M,1.7487579100115918,-1.1508038465489563,1.7750113282237618,1.8246238018419159,0.2801253491403896,0.5388663067660666,1.3698061492207798,1.4272369546891206,-0.009552062087244153,-0.5619555194231786 7 | 843786,M,-0.4759558742259106,-0.8346009425727322,-0.3868077174481091,-0.5052059265256544,2.2354545192675923,1.2432415648720105,0.8655400119637346,0.8239306743126811,1.0045179279021434,1.888343495245663 8 | 844359,M,1.1698783028885684,0.16050819641126807,1.1371244976904666,1.0943320099277,-0.12302797430038338,0.08821762012839307,0.2998085992698855,0.646366373937044,-0.06426806874134787,-0.7616619709077471 9 | 10 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | services: 2 | book-env: 3 | image: ubcdsci/intro-to-ds:202307130106229dd1c2 4 | ports: 5 | - "8787:8787" 6 | volumes: 7 | - .:/home/rstudio/introduction-to-datascience 8 | environment: 9 | PASSWORD: password 10 | deploy: 11 | resources: 12 | limits: 13 | memory: 5G 14 | 15 | -------------------------------------------------------------------------------- /img/classification1/plot3d_knn_classification.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/classification1/plot3d_knn_classification.png -------------------------------------------------------------------------------- /img/classification2/ML-paradigm-test.ai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/classification2/ML-paradigm-test.ai -------------------------------------------------------------------------------- /img/classification2/ML-paradigm-test.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/classification2/ML-paradigm-test.png -------------------------------------------------------------------------------- /img/classification2/cv.ai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/classification2/cv.ai -------------------------------------------------------------------------------- /img/classification2/cv.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/classification2/cv.png -------------------------------------------------------------------------------- /img/classification2/train-test-overview.ai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/classification2/train-test-overview.ai -------------------------------------------------------------------------------- /img/classification2/train-test-overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/classification2/train-test-overview.png -------------------------------------------------------------------------------- /img/classification2/training_test.ai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/classification2/training_test.ai -------------------------------------------------------------------------------- /img/classification2/training_test.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/classification2/training_test.png -------------------------------------------------------------------------------- /img/clustering/gentoo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/clustering/gentoo.jpg -------------------------------------------------------------------------------- /img/frontmatter/chapter_overview.ai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/frontmatter/chapter_overview.ai -------------------------------------------------------------------------------- /img/frontmatter/chapter_overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/frontmatter/chapter_overview.png -------------------------------------------------------------------------------- /img/frontmatter/ds-a-first-intro-cover.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/frontmatter/ds-a-first-intro-cover.jpg -------------------------------------------------------------------------------- /img/frontmatter/ds-a-first-intro-graphic.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/frontmatter/ds-a-first-intro-graphic.jpg -------------------------------------------------------------------------------- /img/inference/intro-bootstrap.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/inference/intro-bootstrap.jpeg -------------------------------------------------------------------------------- /img/inference/population_vs_sample.ai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/inference/population_vs_sample.ai -------------------------------------------------------------------------------- /img/inference/population_vs_sample.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/inference/population_vs_sample.png -------------------------------------------------------------------------------- /img/intro/arrange_function.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/intro/arrange_function.png -------------------------------------------------------------------------------- /img/intro/canada_map.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/intro/canada_map.png -------------------------------------------------------------------------------- /img/intro/filter_function.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/intro/filter_function.png -------------------------------------------------------------------------------- /img/intro/ggplot_function.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/intro/ggplot_function.png -------------------------------------------------------------------------------- /img/intro/help-filter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/intro/help-filter.png -------------------------------------------------------------------------------- /img/intro/intro-all.ai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/intro/intro-all.ai -------------------------------------------------------------------------------- /img/intro/read_csv_function.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/intro/read_csv_function.png -------------------------------------------------------------------------------- /img/intro/select_function.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/intro/select_function.png -------------------------------------------------------------------------------- /img/intro/spreadsheet_vs_dataframe.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/intro/spreadsheet_vs_dataframe.png -------------------------------------------------------------------------------- /img/jupyter/activate-and-run-button-annotated.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/jupyter/activate-and-run-button-annotated.png -------------------------------------------------------------------------------- /img/jupyter/code-cell-not-run.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/jupyter/code-cell-not-run.png -------------------------------------------------------------------------------- /img/jupyter/code-cell-run.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/jupyter/code-cell-run.png -------------------------------------------------------------------------------- /img/jupyter/convert-to-markdown-cell.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/jupyter/convert-to-markdown-cell.png -------------------------------------------------------------------------------- /img/jupyter/create-new-code-cell.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/jupyter/create-new-code-cell.png -------------------------------------------------------------------------------- /img/jupyter/jupyter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/jupyter/jupyter.png -------------------------------------------------------------------------------- /img/jupyter/launcher-annotated.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/jupyter/launcher-annotated.png -------------------------------------------------------------------------------- /img/jupyter/markdown-cell-not-run.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/jupyter/markdown-cell-not-run.png -------------------------------------------------------------------------------- /img/jupyter/markdown-cell-run.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/jupyter/markdown-cell-run.png -------------------------------------------------------------------------------- /img/jupyter/open_data_w_editor_01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/jupyter/open_data_w_editor_01.png -------------------------------------------------------------------------------- /img/jupyter/open_data_w_editor_02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/jupyter/open_data_w_editor_02.png -------------------------------------------------------------------------------- /img/jupyter/out-of-order-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/jupyter/out-of-order-1.png -------------------------------------------------------------------------------- /img/jupyter/out-of-order-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/jupyter/out-of-order-2.png -------------------------------------------------------------------------------- /img/jupyter/out-of-order-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/jupyter/out-of-order-3.png -------------------------------------------------------------------------------- /img/jupyter/restart-kernel-run-all.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/jupyter/restart-kernel-run-all.png -------------------------------------------------------------------------------- /img/key_files/chapter_overview.key: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/key_files/chapter_overview.key -------------------------------------------------------------------------------- /img/key_files/data_frame_slides_cdn.key: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/key_files/data_frame_slides_cdn.key -------------------------------------------------------------------------------- /img/key_files/dsci-100-slide-images.key: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/key_files/dsci-100-slide-images.key -------------------------------------------------------------------------------- /img/key_files/filesystem.key: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/key_files/filesystem.key -------------------------------------------------------------------------------- /img/key_files/ggplot_functions.key: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/key_files/ggplot_functions.key -------------------------------------------------------------------------------- /img/key_files/git_intro.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/key_files/git_intro.pptx -------------------------------------------------------------------------------- /img/key_files/pivot_functions.key: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/key_files/pivot_functions.key -------------------------------------------------------------------------------- /img/key_files/png-vs-svg.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/key_files/png-vs-svg.pptx -------------------------------------------------------------------------------- /img/key_files/ref_vs_tibble.key: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/key_files/ref_vs_tibble.key -------------------------------------------------------------------------------- /img/key_files/spreadsheet_vs_dataframe.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/key_files/spreadsheet_vs_dataframe.pptx -------------------------------------------------------------------------------- /img/key_files/summarize.key: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/key_files/summarize.key -------------------------------------------------------------------------------- /img/key_files/tidy_data.key: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/key_files/tidy_data.key -------------------------------------------------------------------------------- /img/key_files/tidydata_bootstrap_train_test_images.key: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/key_files/tidydata_bootstrap_train_test_images.key -------------------------------------------------------------------------------- /img/reading/NASA-API-Rho-Ophiuchi.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/reading/NASA-API-Rho-Ophiuchi.png -------------------------------------------------------------------------------- /img/reading/NASA-API-limits.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/reading/NASA-API-limits.png -------------------------------------------------------------------------------- /img/reading/NASA-API-parameters.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/reading/NASA-API-parameters.png -------------------------------------------------------------------------------- /img/reading/NASA-API-signup.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/reading/NASA-API-signup.png -------------------------------------------------------------------------------- /img/reading/craigslist_human.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/reading/craigslist_human.png -------------------------------------------------------------------------------- /img/reading/filesystem.ai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/reading/filesystem.ai -------------------------------------------------------------------------------- /img/reading/filesystem.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/reading/filesystem.png -------------------------------------------------------------------------------- /img/reading/ref_vs_tibble.001.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/reading/ref_vs_tibble.001.jpeg -------------------------------------------------------------------------------- /img/reading/sg1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/reading/sg1.png -------------------------------------------------------------------------------- /img/reading/sg2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/reading/sg2.png -------------------------------------------------------------------------------- /img/reading/sg3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/reading/sg3.png -------------------------------------------------------------------------------- /img/reading/sg4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/reading/sg4.png -------------------------------------------------------------------------------- /img/regression1/plot3d_knn_regression.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/regression1/plot3d_knn_regression.png -------------------------------------------------------------------------------- /img/regression2/plot3d_linear_regression.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/regression2/plot3d_linear_regression.png -------------------------------------------------------------------------------- /img/setup/docker-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/setup/docker-1.png -------------------------------------------------------------------------------- /img/setup/docker-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/setup/docker-2.png -------------------------------------------------------------------------------- /img/setup/docker-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/setup/docker-3.png -------------------------------------------------------------------------------- /img/setup/docker-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/setup/docker-4.png -------------------------------------------------------------------------------- /img/setup/jlab-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/setup/jlab-1.png -------------------------------------------------------------------------------- /img/setup/jlab-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/setup/jlab-2.png -------------------------------------------------------------------------------- /img/setup/ubuntu-docker.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/setup/ubuntu-docker.png -------------------------------------------------------------------------------- /img/unused/1024px-Supervised_machine_learning_in_a_nutshell.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/1024px-Supervised_machine_learning_in_a_nutshell.svg -------------------------------------------------------------------------------- /img/unused/ML-paradigm.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/ML-paradigm.jpeg -------------------------------------------------------------------------------- /img/unused/ML-paradigm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/ML-paradigm.png -------------------------------------------------------------------------------- /img/unused/Page_Under_Construction.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/Page_Under_Construction.png -------------------------------------------------------------------------------- /img/unused/README.md: -------------------------------------------------------------------------------- 1 | ## Images 2 | This is the README.md for the Introduction to Data Science textbook images. 3 | 4 | - to make the ggplot function images 5 | - go to ggplot_functions.key 6 | - take a screenshot of code 7 | - paste image in the key and update relevant text/arrows -------------------------------------------------------------------------------- /img/unused/Supervised_machine_learning_in_a_nutshell.svg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/Supervised_machine_learning_in_a_nutshell.svg.png -------------------------------------------------------------------------------- /img/unused/activate-and-run-button.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/activate-and-run-button.png -------------------------------------------------------------------------------- /img/unused/add_collab_01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/add_collab_01.png -------------------------------------------------------------------------------- /img/unused/add_collab_02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/add_collab_02.png -------------------------------------------------------------------------------- /img/unused/add_collab_03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/add_collab_03.png -------------------------------------------------------------------------------- /img/unused/add_collab_04.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/add_collab_04.png -------------------------------------------------------------------------------- /img/unused/add_collab_05.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/add_collab_05.png -------------------------------------------------------------------------------- /img/unused/add_collab_06.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/add_collab_06.png -------------------------------------------------------------------------------- /img/unused/add_collab_06_new.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/add_collab_06_new.png -------------------------------------------------------------------------------- /img/unused/chapter_overview.001.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/chapter_overview.001.jpeg -------------------------------------------------------------------------------- /img/unused/clone_01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/clone_01.png -------------------------------------------------------------------------------- /img/unused/clone_02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/clone_02.png -------------------------------------------------------------------------------- /img/unused/clone_03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/clone_03.png -------------------------------------------------------------------------------- /img/unused/clone_04.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/clone_04.png -------------------------------------------------------------------------------- /img/unused/create-new-file_01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/create-new-file_01.png -------------------------------------------------------------------------------- /img/unused/create-new-file_02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/create-new-file_02.png -------------------------------------------------------------------------------- /img/unused/create-new-file_03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/create-new-file_03.png -------------------------------------------------------------------------------- /img/unused/data_frame_slides_cdn.001.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/data_frame_slides_cdn.001.jpeg -------------------------------------------------------------------------------- /img/unused/data_frame_slides_cdn.002.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/data_frame_slides_cdn.002.jpeg -------------------------------------------------------------------------------- /img/unused/data_frame_slides_cdn.003.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/data_frame_slides_cdn.003.jpeg -------------------------------------------------------------------------------- /img/unused/data_frame_slides_cdn.006.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/data_frame_slides_cdn.006.jpeg -------------------------------------------------------------------------------- /img/unused/dataframe.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/dataframe.jpeg -------------------------------------------------------------------------------- /img/unused/dsci-100-slide-images.001.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/dsci-100-slide-images.001.jpeg -------------------------------------------------------------------------------- /img/unused/dsci-100-slide-images.002.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/dsci-100-slide-images.002.jpeg -------------------------------------------------------------------------------- /img/unused/dsci-100-slide-images.004.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/dsci-100-slide-images.004.jpeg -------------------------------------------------------------------------------- /img/unused/git_add_01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/git_add_01.png -------------------------------------------------------------------------------- /img/unused/git_add_02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/git_add_02.png -------------------------------------------------------------------------------- /img/unused/git_add_03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/git_add_03.png -------------------------------------------------------------------------------- /img/unused/git_commit_01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/git_commit_01.png -------------------------------------------------------------------------------- /img/unused/git_commit_02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/git_commit_02.png -------------------------------------------------------------------------------- /img/unused/git_commit_02_new.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/git_commit_02_new.png -------------------------------------------------------------------------------- /img/unused/git_commit_03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/git_commit_03.png -------------------------------------------------------------------------------- /img/unused/git_pull_00.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/git_pull_00.png -------------------------------------------------------------------------------- /img/unused/git_pull_01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/git_pull_01.png -------------------------------------------------------------------------------- /img/unused/git_pull_02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/git_pull_02.png -------------------------------------------------------------------------------- /img/unused/git_pull_03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/git_pull_03.png -------------------------------------------------------------------------------- /img/unused/git_pull_04.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/git_pull_04.png -------------------------------------------------------------------------------- /img/unused/git_push_01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/git_push_01.png -------------------------------------------------------------------------------- /img/unused/git_push_02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/git_push_02.png -------------------------------------------------------------------------------- /img/unused/git_push_03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/git_push_03.png -------------------------------------------------------------------------------- /img/unused/git_push_04.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/git_push_04.png -------------------------------------------------------------------------------- /img/unused/git_push_05.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/git_push_05.png -------------------------------------------------------------------------------- /img/unused/git_push_05_new.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/git_push_05_new.png -------------------------------------------------------------------------------- /img/unused/issue_01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/issue_01.png -------------------------------------------------------------------------------- /img/unused/issue_02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/issue_02.png -------------------------------------------------------------------------------- /img/unused/issue_03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/issue_03.png -------------------------------------------------------------------------------- /img/unused/issue_04.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/issue_04.png -------------------------------------------------------------------------------- /img/unused/issue_05.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/issue_05.png -------------------------------------------------------------------------------- /img/unused/issue_05_new.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/issue_05_new.png -------------------------------------------------------------------------------- /img/unused/issue_06.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/issue_06.png -------------------------------------------------------------------------------- /img/unused/launcher.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/launcher.png -------------------------------------------------------------------------------- /img/unused/long_to_wide.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/long_to_wide.jpeg -------------------------------------------------------------------------------- /img/unused/malignant_cancer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/malignant_cancer.png -------------------------------------------------------------------------------- /img/unused/merge_conflict_01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/merge_conflict_01.png -------------------------------------------------------------------------------- /img/unused/merge_conflict_02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/merge_conflict_02.png -------------------------------------------------------------------------------- /img/unused/merge_conflict_02_new.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/merge_conflict_02_new.png -------------------------------------------------------------------------------- /img/unused/merge_conflict_03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/merge_conflict_03.png -------------------------------------------------------------------------------- /img/unused/merge_conflict_04.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/merge_conflict_04.png -------------------------------------------------------------------------------- /img/unused/merge_conflict_05.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/merge_conflict_05.png -------------------------------------------------------------------------------- /img/unused/merge_conflict_06.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/merge_conflict_06.png -------------------------------------------------------------------------------- /img/unused/new_repository_01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/new_repository_01.png -------------------------------------------------------------------------------- /img/unused/new_repository_02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/new_repository_02.png -------------------------------------------------------------------------------- /img/unused/new_repository_03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/new_repository_03.png -------------------------------------------------------------------------------- /img/unused/obs.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/obs.jpeg -------------------------------------------------------------------------------- /img/unused/pen-tool_01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/pen-tool_01.png -------------------------------------------------------------------------------- /img/unused/pen-tool_02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/pen-tool_02.png -------------------------------------------------------------------------------- /img/unused/pen-tool_03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/pen-tool_03.png -------------------------------------------------------------------------------- /img/unused/pivot_longer_with_table.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/pivot_longer_with_table.jpeg -------------------------------------------------------------------------------- /img/unused/pivot_wider_with_table.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/pivot_wider_with_table.jpeg -------------------------------------------------------------------------------- /img/unused/prop_val_vs_income.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/prop_val_vs_income.png -------------------------------------------------------------------------------- /img/unused/prop_val_vs_income_by_party.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/prop_val_vs_income_by_party.png -------------------------------------------------------------------------------- /img/unused/prop_val_vs_income_human_labs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/prop_val_vs_income_human_labs.png -------------------------------------------------------------------------------- /img/unused/r.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/r.PNG -------------------------------------------------------------------------------- /img/unused/ref_vs_tibble.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/ref_vs_tibble.jpeg -------------------------------------------------------------------------------- /img/unused/sampling.001.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/sampling.001.jpeg -------------------------------------------------------------------------------- /img/unused/sampling.002.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/sampling.002.jpeg -------------------------------------------------------------------------------- /img/unused/spreadsheet.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/spreadsheet.PNG -------------------------------------------------------------------------------- /img/unused/testing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/testing.png -------------------------------------------------------------------------------- /img/unused/text_cell_formatted.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/text_cell_formatted.png -------------------------------------------------------------------------------- /img/unused/text_cell_unformatted.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/text_cell_unformatted.png -------------------------------------------------------------------------------- /img/unused/tidy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/tidy.png -------------------------------------------------------------------------------- /img/unused/tidy_data.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/tidy_data.jpeg -------------------------------------------------------------------------------- /img/unused/timbits.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/timbits.jpg -------------------------------------------------------------------------------- /img/unused/training_validation.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/training_validation.jpeg -------------------------------------------------------------------------------- /img/unused/upload-files_01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/upload-files_01.png -------------------------------------------------------------------------------- /img/unused/upload_files_02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/upload_files_02.png -------------------------------------------------------------------------------- /img/unused/vars.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/vars.jpeg -------------------------------------------------------------------------------- /img/unused/vc-ba1-changes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/vc-ba1-changes.png -------------------------------------------------------------------------------- /img/unused/vc3-add.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/vc3-add.png -------------------------------------------------------------------------------- /img/unused/vc4-commit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/vc4-commit.png -------------------------------------------------------------------------------- /img/unused/vc5-5-nachos-to-cheesecake.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/vc5-5-nachos-to-cheesecake.png -------------------------------------------------------------------------------- /img/unused/vec_vs_list.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/vec_vs_list.jpeg -------------------------------------------------------------------------------- /img/unused/vector.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/vector.jpeg -------------------------------------------------------------------------------- /img/unused/vectors.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/vectors.jpeg -------------------------------------------------------------------------------- /img/unused/wide_to_long.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/wide_to_long.jpeg -------------------------------------------------------------------------------- /img/unused/wikipedia_human.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/unused/wikipedia_human.png -------------------------------------------------------------------------------- /img/version-control/add_collab_01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/add_collab_01.png -------------------------------------------------------------------------------- /img/version-control/add_collab_02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/add_collab_02.png -------------------------------------------------------------------------------- /img/version-control/add_collab_03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/add_collab_03.png -------------------------------------------------------------------------------- /img/version-control/add_collab_04.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/add_collab_04.png -------------------------------------------------------------------------------- /img/version-control/add_collab_05.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/add_collab_05.png -------------------------------------------------------------------------------- /img/version-control/clone_01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/clone_01.png -------------------------------------------------------------------------------- /img/version-control/clone_02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/clone_02.png -------------------------------------------------------------------------------- /img/version-control/clone_03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/clone_03.png -------------------------------------------------------------------------------- /img/version-control/clone_04.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/clone_04.png -------------------------------------------------------------------------------- /img/version-control/create-new-file_01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/create-new-file_01.png -------------------------------------------------------------------------------- /img/version-control/create-new-file_02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/create-new-file_02.png -------------------------------------------------------------------------------- /img/version-control/create-new-file_03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/create-new-file_03.png -------------------------------------------------------------------------------- /img/version-control/generate-pat_01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/generate-pat_01.png -------------------------------------------------------------------------------- /img/version-control/generate-pat_02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/generate-pat_02.png -------------------------------------------------------------------------------- /img/version-control/generate-pat_03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/generate-pat_03.png -------------------------------------------------------------------------------- /img/version-control/git_add_01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/git_add_01.png -------------------------------------------------------------------------------- /img/version-control/git_add_02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/git_add_02.png -------------------------------------------------------------------------------- /img/version-control/git_add_03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/git_add_03.png -------------------------------------------------------------------------------- /img/version-control/git_commit_01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/git_commit_01.png -------------------------------------------------------------------------------- /img/version-control/git_commit_03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/git_commit_03.png -------------------------------------------------------------------------------- /img/version-control/git_pull_00.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/git_pull_00.png -------------------------------------------------------------------------------- /img/version-control/git_pull_01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/git_pull_01.png -------------------------------------------------------------------------------- /img/version-control/git_pull_02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/git_pull_02.png -------------------------------------------------------------------------------- /img/version-control/git_pull_03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/git_pull_03.png -------------------------------------------------------------------------------- /img/version-control/git_pull_04.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/git_pull_04.png -------------------------------------------------------------------------------- /img/version-control/git_push_01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/git_push_01.png -------------------------------------------------------------------------------- /img/version-control/git_push_02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/git_push_02.png -------------------------------------------------------------------------------- /img/version-control/git_push_03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/git_push_03.png -------------------------------------------------------------------------------- /img/version-control/git_push_04.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/git_push_04.png -------------------------------------------------------------------------------- /img/version-control/issue_01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/issue_01.png -------------------------------------------------------------------------------- /img/version-control/issue_02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/issue_02.png -------------------------------------------------------------------------------- /img/version-control/issue_03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/issue_03.png -------------------------------------------------------------------------------- /img/version-control/issue_04.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/issue_04.png -------------------------------------------------------------------------------- /img/version-control/issue_06.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/issue_06.png -------------------------------------------------------------------------------- /img/version-control/merge_conflict_01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/merge_conflict_01.png -------------------------------------------------------------------------------- /img/version-control/merge_conflict_03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/merge_conflict_03.png -------------------------------------------------------------------------------- /img/version-control/merge_conflict_04.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/merge_conflict_04.png -------------------------------------------------------------------------------- /img/version-control/merge_conflict_05.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/merge_conflict_05.png -------------------------------------------------------------------------------- /img/version-control/merge_conflict_06.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/merge_conflict_06.png -------------------------------------------------------------------------------- /img/version-control/new_repository_01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/new_repository_01.png -------------------------------------------------------------------------------- /img/version-control/new_repository_02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/new_repository_02.png -------------------------------------------------------------------------------- /img/version-control/new_repository_03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/new_repository_03.png -------------------------------------------------------------------------------- /img/version-control/pen-tool_01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/pen-tool_01.png -------------------------------------------------------------------------------- /img/version-control/pen-tool_02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/pen-tool_02.png -------------------------------------------------------------------------------- /img/version-control/pen-tool_03.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/pen-tool_03.png -------------------------------------------------------------------------------- /img/version-control/upload-files_01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/upload-files_01.png -------------------------------------------------------------------------------- /img/version-control/upload-files_02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/upload-files_02.png -------------------------------------------------------------------------------- /img/version-control/vc-ba2-add.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/vc-ba2-add.png -------------------------------------------------------------------------------- /img/version-control/vc-ba3-commit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/vc-ba3-commit.png -------------------------------------------------------------------------------- /img/version-control/vc1-no-changes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/vc1-no-changes.png -------------------------------------------------------------------------------- /img/version-control/vc2-changes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/vc2-changes.png -------------------------------------------------------------------------------- /img/version-control/vc5-push.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/vc5-push.png -------------------------------------------------------------------------------- /img/version-control/vc6-remote-changes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/vc6-remote-changes.png -------------------------------------------------------------------------------- /img/version-control/vc7-pull.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/vc7-pull.png -------------------------------------------------------------------------------- /img/version-control/version-control-all.ai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/version-control/version-control-all.ai -------------------------------------------------------------------------------- /img/viz/faithful_plot.bmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/viz/faithful_plot.bmp -------------------------------------------------------------------------------- /img/viz/faithful_plot.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/viz/faithful_plot.jpg -------------------------------------------------------------------------------- /img/viz/faithful_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/viz/faithful_plot.png -------------------------------------------------------------------------------- /img/viz/faithful_plot.tiff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/viz/faithful_plot.tiff -------------------------------------------------------------------------------- /img/viz/png-vs-svg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/viz/png-vs-svg.png -------------------------------------------------------------------------------- /img/wrangling/data_frame_slides_cdn.004.ai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/data_frame_slides_cdn.004.ai -------------------------------------------------------------------------------- /img/wrangling/data_frame_slides_cdn.004.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/data_frame_slides_cdn.004.png -------------------------------------------------------------------------------- /img/wrangling/data_frame_slides_cdn.005.ai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/data_frame_slides_cdn.005.ai -------------------------------------------------------------------------------- /img/wrangling/data_frame_slides_cdn.005.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/data_frame_slides_cdn.005.png -------------------------------------------------------------------------------- /img/wrangling/data_frame_slides_cdn.007.ai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/data_frame_slides_cdn.007.ai -------------------------------------------------------------------------------- /img/wrangling/data_frame_slides_cdn.007.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/data_frame_slides_cdn.007.png -------------------------------------------------------------------------------- /img/wrangling/data_frame_slides_cdn.008.ai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/data_frame_slides_cdn.008.ai -------------------------------------------------------------------------------- /img/wrangling/data_frame_slides_cdn.008.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/data_frame_slides_cdn.008.png -------------------------------------------------------------------------------- /img/wrangling/data_frame_slides_cdn.009.ai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/data_frame_slides_cdn.009.ai -------------------------------------------------------------------------------- /img/wrangling/data_frame_slides_cdn.009.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/data_frame_slides_cdn.009.png -------------------------------------------------------------------------------- /img/wrangling/mutate_function.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/mutate_function.png -------------------------------------------------------------------------------- /img/wrangling/pivot_functions.001.ai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/pivot_functions.001.ai -------------------------------------------------------------------------------- /img/wrangling/pivot_functions.001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/pivot_functions.001.png -------------------------------------------------------------------------------- /img/wrangling/pivot_functions.002.ai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/pivot_functions.002.ai -------------------------------------------------------------------------------- /img/wrangling/pivot_functions.002.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/pivot_functions.002.png -------------------------------------------------------------------------------- /img/wrangling/pivot_functions.003.ai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/pivot_functions.003.ai -------------------------------------------------------------------------------- /img/wrangling/pivot_functions.003.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/pivot_functions.003.png -------------------------------------------------------------------------------- /img/wrangling/pivot_functions.004.ai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/pivot_functions.004.ai -------------------------------------------------------------------------------- /img/wrangling/pivot_functions.004.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/pivot_functions.004.png -------------------------------------------------------------------------------- /img/wrangling/pivot_longer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/pivot_longer.png -------------------------------------------------------------------------------- /img/wrangling/pivot_wider.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/pivot_wider.png -------------------------------------------------------------------------------- /img/wrangling/separate_function.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/separate_function.png -------------------------------------------------------------------------------- /img/wrangling/summarize.001.ai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/summarize.001.ai -------------------------------------------------------------------------------- /img/wrangling/summarize.001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/summarize.001.png -------------------------------------------------------------------------------- /img/wrangling/summarize.002.ai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/summarize.002.ai -------------------------------------------------------------------------------- /img/wrangling/summarize.002.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/summarize.002.png -------------------------------------------------------------------------------- /img/wrangling/summarize.003.ai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/summarize.003.ai -------------------------------------------------------------------------------- /img/wrangling/summarize.003.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/summarize.003.png -------------------------------------------------------------------------------- /img/wrangling/summarize.004.ai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/summarize.004.ai -------------------------------------------------------------------------------- /img/wrangling/summarize.004.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/summarize.004.png -------------------------------------------------------------------------------- /img/wrangling/summarize.005.ai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/summarize.005.ai -------------------------------------------------------------------------------- /img/wrangling/summarize.005.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/summarize.005.png -------------------------------------------------------------------------------- /img/wrangling/tidy_data.001.ai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/tidy_data.001.ai -------------------------------------------------------------------------------- /img/wrangling/tidy_data.001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/tidy_data.001.png -------------------------------------------------------------------------------- /img/wrangling/wrangling-syntax-all.ai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/UBC-DSCI/introduction-to-datascience/e4222637aee185fd4691c59e4fdba4b26867fa53/img/wrangling/wrangling-syntax-all.ai -------------------------------------------------------------------------------- /index.Rmd: -------------------------------------------------------------------------------- 1 | 6 | --- 7 | title: | 8 | ![](img/frontmatter/ds-a-first-intro-graphic.jpg) 9 | Data Science 10 | subtitle: "A First Introduction" 11 | knit: "bookdown::render_book" 12 | author: "Tiffany Timbers, Trevor Campbell, and Melissa Lee" 13 | date: "`r Sys.Date()`" 14 | site: bookdown::bookdown_site 15 | link-citations: yes 16 | colorlinks: yes 17 | documentclass: krantz 18 | classoption: 19 | - krantz2 20 | biblio-style: plainnat 21 | lot: yes 22 | lof: yes 23 | fontsize: 12pt 24 | description: "This is a textbook for teaching a first introduction to data science." 25 | always_allow_html: true 26 | graphics: yes 27 | url: https://datasciencebook.ca 28 | github-repo: UBC-DSCI/introduction-to-datascience 29 | --- 30 | 31 | # Welcome! {-} 32 | 33 | This is the [website](https://datasciencebook.ca/) for *Data Science: A First Introduction*. 34 | You can read the web version of the book on this site. Click a section in the table of contents 35 | on the left side of the page to navigate to it. If you are on a mobile device, 36 | you may need to open the table of contents first by clicking the menu button on 37 | the top left of the page. You can purchase a PDF or print copy of the book 38 | on the [CRC Press website](https://www.routledge.com/Data-Science-A-First-Introduction/Timbers-Campbell-Lee/p/book/9780367524685) or on [Amazon](https://www.amazon.com/Data-Science-First-Introduction-Chapman/dp/0367532174). 39 | 40 | For the python version of the textbook, visit [https://python.datasciencebook.ca](https://python.datasciencebook.ca). 41 | 42 | This book is listed in a number of open educational resource (OER) collections: 43 | 44 | - [The University of British Columbia OER collection](https://oer.open.ubc.ca/data-science-a-first-introduction/) 45 | - [The OER Commons](https://oercommons.org/courses/data-science-a-first-introduction-with-r) 46 | - [MERLOT](https://merlot.org/merlot/viewMaterial.htm?id=773420156) 47 | 48 | ```{r bookcover, echo = FALSE, fig.retina = 2, out.width = "45%"} 49 | knitr::include_graphics("img/frontmatter/ds-a-first-intro-cover.jpg") 50 | ``` 51 | 52 | 53 | 54 | This work by [Tiffany Timbers](https://www.tiffanytimbers.com/), [Trevor Campbell](https://trevorcampbell.me/), 55 | and [Melissa Lee](https://www.stat.ubc.ca/users/melissa-lee) is licensed under 56 | a [Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License](http://creativecommons.org/licenses/by-nc-sa/4.0/). 57 | 58 | -------------------------------------------------------------------------------- /scripts/intro_bootstrap_image.R: -------------------------------------------------------------------------------- 1 | library(plotrix) #draw.circle() 2 | library(RColorBrewer) 3 | library(diagram) #curvedarrow() 4 | library(infer) 5 | library(tidyverse) 6 | library(magick) 7 | 8 | # Sample values 9 | sample <- c(1, 2, 3, 5, 8, 9) 10 | df <- data.frame(value = sample) 11 | set.seed(10) 12 | estimates <- rep_sample_n(df, size = 6, replace = T, reps = 1000) %>% 13 | summarise(mean = mean(value)) 14 | svg("bootstrap.svg") 15 | hist(estimates$mean, 16 | col = "dodgerblue3", 17 | yaxt = "n", 18 | xlab = "means", cex.lab = 5, ylab = "", main = "", xaxt = "n") 19 | #ggplot(estimates, aes(mean)) + 20 | # geom_histogram(binwidth = 0.5, fill = "dodgerblue3", col = "lightgrey") + 21 | # xlab("means") 22 | dev.off() 23 | 24 | bootstrap <- image_read("bootstrap.svg") 25 | 26 | svg("intro-bootstrap.svg") 27 | # blank plot 28 | par(mar=c(0, 0, 0, 0)) 29 | plot(1:10, 1:10, type="n",xlab="",ylab="", 30 | bty='n', 31 | yaxt="n", 32 | xaxt = "n") 33 | 34 | # set up 35 | circle_size <- 0.85 36 | circle_x <- 2 37 | circle_y <- 5 38 | xs <- c(1.4, 1.5, 2, 2.3, 2.5, 2.7) # position of points 39 | ys <- c(4.7, 5.2, 4.6, 5.4, 4.8, 5) # position of points 40 | 41 | #sample 42 | text(xs, ys, labels = c(paste(sample))) 43 | text(circle_x, circle_y + 1., "Sample") 44 | draw.circle(circle_x, circle_y, circle_size, nv=100, border="black", col=NA, lty=1, density=NULL, angle=45,lwd=1) 45 | 46 | # arrows 47 | added_x <- 2 48 | added_y <- 2 49 | arrows(circle_x + 0.7, circle_y + 0.6, circle_x + added_x , circle_y + added_y + 0.1, length = 0.1, lwd = 1) 50 | text(circle_x + 0.8, circle_y + added_y, "sample with \n replacement", cex = 0.75) 51 | 52 | # Bootstrap sample #1 53 | added_x <- 0.5 + added_x 54 | added_y <-1 + added_y 55 | draw.circle(circle_x + added_x, circle_y + added_y, circle_size, nv=100, border="black", col=NA, lty=1, density=NULL, angle=45,lwd=1) 56 | text(circle_x + added_x, circle_y + added_y + 1.2, "Bootstrap \n Sample #1", cex = 1) 57 | 58 | set.seed(1) 59 | boot1 <- sample(sample, replace = T) 60 | xs1 <- xs + added_x # position of points 61 | ys1 <- ys + added_y 62 | text(xs1, ys1, labels = c(paste(boot1))) 63 | 64 | # means 65 | text(circle_x + added_x + 2.5, circle_y + added_y, paste("mean =",round(mean(boot1)))) 66 | 67 | # Bootstrap sample #2 68 | added_y <- 0.2 69 | draw.circle(circle_x + added_x, circle_y + added_y, circle_size, nv=100, border="black", col=NA, lty=1, density=NULL, angle=45,lwd=1) 70 | text(circle_x + added_x, circle_y + added_y + 1.2, "Bootstrap \n Sample #2", cex = 1) 71 | arrows(circle_x + 0.85, circle_y, circle_x + 1.5, circle_y, length = 0.1, lwd = 1) 72 | 73 | set.seed(2) 74 | boot2 <- sample(sample, replace = T) 75 | xs2 <- xs + added_x # position of points 76 | ys2 <- ys + added_y 77 | text(xs2, ys2, labels = c(paste(boot2))) 78 | 79 | # means 80 | text(circle_x + added_x + 2.5, circle_y + added_y, paste("mean =", round(mean(boot2)))) 81 | 82 | 83 | # Bootstrap sample #3 84 | added_y <- -2.6 85 | draw.circle(circle_x + added_x, circle_y + added_y, circle_size, nv=100, border="black", col=NA, lty=1, density=NULL, angle=45,lwd=1) 86 | text(circle_x + added_x, circle_y + 1.2 + added_y, "Bootstrap \n Sample #3", cex = 1) 87 | arrows(circle_x + 0.7, circle_y - 0.7, circle_x + 1.6, circle_y - 2, length = 0.1, lwd = 1) 88 | 89 | set.seed(3) 90 | boot3 <- sample(sample, replace = T) 91 | xs3 <- xs + added_x # position of points 92 | ys3 <- ys + added_y 93 | text(xs3, ys3, labels = c(paste(boot3))) 94 | 95 | # means 96 | text(circle_x + added_x + 2.5, circle_y + added_y, paste("mean =", round(mean(boot3)))) 97 | 98 | # last line 99 | arrows(circle_x + 0.5, circle_y - 1, circle_x + 1.4, circle_y -3.8 , length = 0.1, lwd = 1) 100 | text(circle_x + added_x, circle_y - 4, "keep sampling with \n replacement...", cex = 0.75) 101 | 102 | 103 | # arrows to means 104 | arrows(circle_x + 3.5, circle_y + 3, circle_x + 4.2, circle_y +3, length = 0.1, lwd = 1) 105 | arrows(circle_x + 3.5, circle_y + 0.1, circle_x + 4.2, circle_y +0.1, length = 0.1, lwd = 1) 106 | arrows(circle_x + 3.5, circle_y - 2.7, circle_x + 4.2, circle_y - 2.7, length = 0.1, lwd = 1) 107 | 108 | # bootstrap distribution 109 | rasterImage(as.raster(bootstrap), 8,4,10,8) 110 | text(circle_x + 7, circle_y +3, "Bootstrap \n distribution") 111 | 112 | # arrows to bootstrap 113 | arrows(circle_x + 5.5, circle_y + 2.7, circle_x + 6.5, circle_y +0.5, length = 0.1, lwd = 1) 114 | arrows(circle_x + 5.6, circle_y + 0.1, circle_x +6.4, circle_y +0.1, length = 0.1, lwd = 1) 115 | arrows(circle_x + 5.5, circle_y - 2.4, circle_x + 6.5, circle_y -0.5, length = 0.1, lwd = 1) 116 | 117 | dev.off() 118 | 119 | -------------------------------------------------------------------------------- /scripts/population-sample.R: -------------------------------------------------------------------------------- 1 | library(plotrix) #draw.circle() 2 | library(RColorBrewer) 3 | library(diagram) #curvedarrow() 4 | #library(shape) 5 | #library(igraph) 6 | #display.brewer.all(colorblindFriendly = TRUE) 7 | svg("img/population_vs_sample.svg") 8 | mycolours <- brewer.pal(12, "Paired") 9 | blues <- brewer.pal(9, "Blues") 10 | 11 | # blank plot 12 | par(mar=rep(0,4)) 13 | plot(-50:150, seq(-100,100,length=201),type="n",xlab="",ylab="", 14 | bty='n', 15 | yaxt="n", 16 | xaxt = "n") 17 | 18 | # generating points 19 | set.seed(1) 20 | r <- sample(seq(0, 50, by = 1), size = 50, replace = F) 21 | degs <- 360*sample(seq(0, 50, by = 0.02), size = 50, replace = F) 22 | 23 | # convert the degrees to radians 24 | theta <- 2*pi*degs/360 25 | 26 | # Add a circle around the points 27 | #draw.circle(0, 0, 50, nv=100, border=NULL, col=blues[1], lty=1, density=NULL, angle=45,lwd=1) 28 | 29 | # Plot your points by converting to cartesian 30 | points(1*r*sin(theta),1*r*cos(theta), xlim=c(-max(r),max(r)),ylim=c(-max(r),max(r)), 31 | col= mycolours[c(2,6)], pch = 16, cex = 1) 32 | 33 | # Circles around the sampled points 34 | draw.circle(-41, 8, 3, nv=100, border="black", col=NA, lty=1, density=NULL, angle=45,lwd=1) 35 | draw.circle(-19, -34, 3, nv=100, border="black", col=NA, lty=1, density=NULL, angle=45,lwd=1) 36 | draw.circle(-2, -14, 3, nv=100, border="black", col=NA, lty=1, density=NULL, angle=45,lwd=1) 37 | draw.circle(0, 27, 3, nv=100, border="black", col=NA, lty=1, density=NULL, angle=45,lwd=1) 38 | draw.circle(11, 42, 3, nv=100, border="black", col=NA, lty=1, density=NULL, angle=45,lwd=1) 39 | draw.circle(34.3, 36, 3, nv=100, border="black", col=NA, lty=1, density=NULL, angle=45,lwd=1) 40 | draw.circle(37, -31, 3, nv=100, border="black", col=NA, lty=1, density=NULL, angle=45,lwd=1) 41 | draw.circle(-9, 9.5, 3, nv=100, border="black", col=NA, lty=1, density=NULL, angle=45,lwd=1) 42 | draw.circle(-2, 16, 3, nv=100, border="black", col=NA, lty=1, density=NULL, angle=45,lwd=1) 43 | draw.circle(44.3, 20.5, 3, nv=100, border="black", col=NA, lty=1, density=NULL, angle=45,lwd=1) 44 | 45 | #draw.circle(85, -20, 20, nv=100, border="black", col=Blues[1], lty=1, density=NULL, angle=45,lwd=1) 46 | # Sampled points 47 | points(c(125, 99, 91, 92, 95, 120, 100, 110, 100, 110, 105), c(0, -10, 2, 8, -50, -45, -30, -15, -30, -30, 0), 48 | col= mycolours[c(2, 6, 2,2 ,2, 6, 2, 6, 6,2)], pch = 16, cex = 1) 49 | 50 | # population box 51 | rect(-50, -55, 55, 70) 52 | 53 | # sample box 54 | rect(75, -55, 145, 29) 55 | 56 | text(-20, 80, "Population", font= 2) 57 | text(0, 60, "All undergraduate \n students in North America") 58 | text(110, 20, "10 undergraduate \n students in North America") 59 | text(95, 35, "Sample", font = 2) 60 | 61 | #iArrows <- igraph:::igraph.Arrows 62 | #iArrows(0, 46, 90, 12, 63 | # h.lwd=2, sh.lwd=2, sh.col="black", 64 | # curve=1.1 , width=1, size=1) 65 | 66 | #big arrow 67 | curvedarrow(c(0,70), c(110,32), lwd = 2, lty = 1, lcol = "black", 68 | arr.col = "black", arr.pos = 1, curve = -0.3, dr = 0.1, 69 | endhead = T) 70 | text(0, -80, "Parameter", font = 2) 71 | text(0, -92, 72 | "unknown p \n (proportion of population who own an iPhone)") 73 | 74 | text(110, -80, "Point Estimate", font = 2) 75 | 76 | text(110, -90, expression(italic(hat(p)) * "= 6/10 = 0.60" )) 77 | text(110, -98, "(proportion of sample who own an iPhone)") 78 | arrows(90, -81, 35, -81, length = 0.1, lwd = 2) 79 | text(60, -72, "Point Estimation", font = 2) 80 | 81 | arrows(0, -50, 0, -75, length = 0.1, lwd = 2) 82 | arrows(110, -50, 110, -75, length = 0.1, lwd = 2) 83 | #curvedarrow(c(0, -65), c(0,-75), lwd = 2, lty = 1, lcol = "black", 84 | # arr.col = "black", arr.pos = 4, curve = 0, dr = 1, 85 | # endhead = T) 86 | 87 | legend(120, 80, legend=c("Has iPhone", "No iPhone"), 88 | col=mycolours[c(2, 6)], pch = 16, cex=0.8) 89 | dev.off() 90 | 91 | -------------------------------------------------------------------------------- /source/acknowledgments.Rmd: -------------------------------------------------------------------------------- 1 | # Acknowledgments {-} 2 | 3 | We'd like to thank everyone that has contributed to the development of 4 | [*Data Science: A First Introduction*](https://datasciencebook.ca). 5 | This is an open source textbook that began as a collection of course readings 6 | for DSCI 100, a new introductory data science course 7 | at the University of British Columbia (UBC). 8 | Several faculty members in the UBC Department of Statistics 9 | were pivotal in shaping the direction of that course, 10 | and as such, contributed greatly to the broad structure and 11 | list of topics in this book. We would especially like to thank Matías 12 | Salibían-Barrera for his mentorship during the initial development and roll-out 13 | of both DSCI 100 and this book. His door was always open when 14 | we needed to chat about how to best introduce and teach data science to our first-year students. 15 | We would also like to thank Gabriela Cohen Freue for her DSCI 561 (Regression I) teaching materials 16 | from the UBC Master of Data Science program, as some of our linear regression figures were inspired from these. 17 | 18 | We would also like to thank all those who contributed to the process of 19 | publishing this book. In particular, we would like to thank all of our reviewers for their feedback and suggestions: 20 | Rohan Alexander, Isabella Ghement, Virgilio Gómez Rubio, Albert Kim, Adam Loy, Maria Prokofieva, Emily Riederer, and Greg Wilson. 21 | The book was improved substantially by their insights. 22 | We would like to give special thanks to Jim Zidek 23 | for his support and encouragement throughout the process, and to 24 | Roger Peng for graciously offering to write the Foreword. 25 | 26 | Finally, we owe a debt of gratitude to all of the students of DSCI 100 over the past 27 | few years. They provided invaluable feedback on the book and worksheets; 28 | they found bugs for us (and stood by very patiently in class while 29 | we frantically fixed those bugs); and they brought a level of enthusiasm to the class 30 | that sustained us during the hard work of creating a new course and writing a textbook. 31 | Our interactions with them taught us how to teach data science, and that learning 32 | is reflected in the content of this book. 33 | -------------------------------------------------------------------------------- /source/after_body.tex: -------------------------------------------------------------------------------- 1 | \backmatter 2 | \printindex 3 | -------------------------------------------------------------------------------- /source/analytics.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 10 | -------------------------------------------------------------------------------- /source/authors.Rmd: -------------------------------------------------------------------------------- 1 | # About the authors {-} 2 | 3 | **[Tiffany Timbers](https://tiffanytimbers.com/)** is an Associate Professor of Teaching in the Department of 4 | Statistics and Co-Director for the Master of Data Science program (Vancouver 5 | Option) at the University of British Columbia. In these roles she teaches and 6 | develops curriculum around the responsible application of Data Science to solve 7 | real-world problems. One of her favorite courses she teaches is a graduate 8 | course on collaborative software development, which focuses on teaching how to 9 | create R and Python packages using modern tools and workflows. 10 | 11 | 12 | **[Trevor Campbell](https://trevorcampbell.me)** is an Associate Professor in the Department of Statistics at 13 | the University of British Columbia. His research focuses on automated, scalable 14 | Bayesian inference algorithms, Bayesian nonparametrics, streaming data, and 15 | Bayesian theory. He was previously a postdoctoral associate advised by Tamara 16 | Broderick in the Computer Science and Artificial Intelligence Laboratory 17 | (CSAIL) and Institute for Data, Systems, and Society (IDSS) at MIT, a Ph.D. 18 | candidate under Jonathan How in the Laboratory for Information and Decision 19 | Systems (LIDS) at MIT, and before that he was in the Engineering Science 20 | program at the University of Toronto. 21 | 22 | 23 | **[Melissa Lee](https://www.stat.ubc.ca/users/melissa-lee)** is an Assistant Professor of Teaching in the Department of 24 | Statistics at the University of British Columbia. She teaches and develops 25 | curriculum for undergraduate statistics and data science courses. Her work 26 | focuses on student-centered approaches to teaching, developing and assessing 27 | open educational resources, and promoting equity, diversity, and inclusion 28 | initiatives. 29 | -------------------------------------------------------------------------------- /source/before_body.tex: -------------------------------------------------------------------------------- 1 | % you may need to leave a few empty pages before the dedication page 2 | 3 | %\cleardoublepage\newpage\thispagestyle{empty}\null 4 | %\cleardoublepage\newpage\thispagestyle{empty}\null 5 | %\cleardoublepage\newpage 6 | \thispagestyle{empty} 7 | 8 | \begin{center} 9 | For my husband Curtis and daughter Rowan. Thank-you for your love 10 | \linebreak(and patience with my late night writing). 11 | \linebreak-- Tiffany 12 | 13 | To mom and dad: here's a book. Pretty neat, eh? Love you guys. 14 | \linebreak-- Trevor 15 | 16 | To mom and dad, thank you for all your love and support. 17 | \linebreak-- Melissa 18 | %\includegraphics{images/dedication.pdf} 19 | \end{center} 20 | 21 | \cleardoublepage\newpage\thispagestyle{empty}\null 22 | 23 | \setlength{\abovedisplayskip}{-5pt} 24 | \setlength{\abovedisplayshortskip}{-5pt} 25 | -------------------------------------------------------------------------------- /source/foreword.Rmd: -------------------------------------------------------------------------------- 1 | # Foreword {-} 2 | 3 | *Roger D. Peng* 4 | 5 | *Johns Hopkins Bloomberg School of Public Health* 6 | 7 | *2022-01-04* 8 | 9 | The field of data science has expanded and grown significantly in recent years, 10 | attracting excitement and interest from many different directions. The demand for introductory 11 | educational materials has grown concurrently with the growth of the field itself, leading to 12 | a proliferation of textbooks, courses, blog posts, and tutorials. This book is an important 13 | contribution to this fast-growing literature, but given the wide availability of materials, a 14 | reader should be inclined to ask, "What is the unique contribution of *this* book?" In order 15 | to answer that question it is useful to step back for a moment and consider the development 16 | of the field of data science over the past few years. 17 | 18 | When thinking about data science, it is important to consider two questions: "What is 19 | data science?" and "How should one do data science?" The former question is under active 20 | discussion amongst a broad community of researchers and practitioners and there does 21 | not appear to be much consensus to date. However, there seems a general understanding 22 | that data science focuses on the more "active" elements—data wrangling, cleaning, and 23 | analysis—of answering questions with data. These elements are often highly 24 | problem-specific and may seem difficult to generalize across applications. Nevertheless, over time we 25 | have seen some core elements emerge that appear to repeat themselves as useful concepts 26 | across different problems. Given the lack of clear agreement over the definition of data 27 | science, there is a strong need for a book like this one to propose a vision for what the field 28 | is and what the implications are for the activities in which members of the field engage. 29 | 30 | The first important concept addressed by this book is tidy data, which is a format for 31 | tabular data formally introduced to the statistical community in a 2014 paper by Hadley 32 | Wickham. The tidy data organization strategy has proven a powerful abstract concept for 33 | conducting data analysis, in large part because of the vast toolchain implemented in the 34 | Tidyverse collection of R packages. The second key concept is the development of workflows 35 | for reproducible and auditable data analyses. Modern data analyses have only grown in 36 | complexity due to the availability of data and the ease with which we can implement complex 37 | data analysis procedures. Furthermore, these data analyses are often part of 38 | decision-making processes that may have significant impacts on people and communities. Therefore, 39 | there is a critical need to build reproducible analyses that can be studied and repeated by 40 | others in a reliable manner. Statistical methods clearly represent an important element 41 | of data science for building prediction and classification models and for making inferences 42 | about unobserved populations. Finally, because a field can succeed only if it fosters an 43 | active and collaborative community, it has become clear that being fluent in the tools of 44 | collaboration is a core element of data science. 45 | 46 | This book takes these core concepts and focuses on how one can apply them to *do* data 47 | science in a rigorous manner. Students who learn from this book will be well-versed in 48 | the techniques and principles behind producing reliable evidence from data. This book is 49 | centered around the use of the R programming language within the tidy data framework, 50 | and as such employs the most recent advances in data analysis coding. The use of Jupyter 51 | notebooks for exercises immediately places the student in an environment that encourages 52 | auditability and reproducibility of analyses. The integration of git and GitHub into the 53 | course is a key tool for teaching about collaboration and community, key concepts that are 54 | critical to data science. 55 | 56 | The demand for training in data science continues to increase. The availability of large 57 | quantities of data to answer a variety of questions, the computational power available to 58 | many more people than ever before, and the public awareness of the importance of data for 59 | decision-making have all contributed to the need for high-quality data science work. This 60 | book provides a sophisticated first introduction to the field of data science and provides 61 | a balanced mix of practical skills along with generalizable principles. As we continue to 62 | introduce students to data science and train them to confront an expanding array of data 63 | science problems, they will be well-served by the ideas presented here. 64 | -------------------------------------------------------------------------------- /source/preamble.tex: -------------------------------------------------------------------------------- 1 | \usepackage{booktabs} 2 | \usepackage{longtable} 3 | \usepackage{float} 4 | \usepackage[bf,singlelinecheck=off]{caption} 5 | \usepackage[scale=.7]{sourcecodepro} 6 | \usepackage{url} 7 | \usepackage{fontawesome5} 8 | 9 | \usepackage{framed,color} 10 | \definecolor{shadecolor}{RGB}{248,248,248} 11 | 12 | \renewcommand{\textfraction}{0.05} 13 | \renewcommand{\topfraction}{0.8} 14 | \renewcommand{\bottomfraction}{0.8} 15 | \renewcommand{\floatpagefraction}{0.75} 16 | 17 | \renewenvironment{quote}{\begin{VF}}{\end{VF}} 18 | 19 | 20 | \IfFileExists{bookmark.sty}{\usepackage{bookmark}}{\usepackage{hyperref}} 21 | \let\oldhref\href 22 | \renewcommand{\href}[2]{#2\footnote{\url{#1}}} 23 | 24 | \makeatletter 25 | \newenvironment{kframe}{% 26 | \medskip{} 27 | \setlength{\fboxsep}{.8em} 28 | \def\at@end@of@kframe{}% 29 | \ifinner\ifhmode% 30 | \def\at@end@of@kframe{\end{minipage}}% 31 | \begin{minipage}{\columnwidth}% 32 | \fi\fi% 33 | \def\FrameCommand##1{\hskip\@totalleftmargin \hskip-\fboxsep 34 | \colorbox{shadecolor}{##1}\hskip-\fboxsep 35 | % There is no \\@totalrightmargin, so: 36 | \hskip-\linewidth \hskip-\@totalleftmargin \hskip\columnwidth}% 37 | \MakeFramed {\advance\hsize-\width 38 | \@totalleftmargin\z@ \linewidth\hsize 39 | \@setminipage}}% 40 | {\par\unskip\endMakeFramed% 41 | \at@end@of@kframe} 42 | \makeatother 43 | 44 | \renewenvironment{Shaded}{\begin{kframe}}{\end{kframe}} 45 | 46 | \usepackage{makeidx} 47 | \makeindex 48 | 49 | \urlstyle{tt} 50 | 51 | \usepackage{amsthm} 52 | \makeatletter 53 | \def\thm@space@setup{% 54 | \thm@preskip=8pt plus 2pt minus 4pt 55 | \thm@postskip=\thm@preskip 56 | } 57 | \makeatother 58 | 59 | \frontmatter 60 | -------------------------------------------------------------------------------- /source/preface.Rmd: -------------------------------------------------------------------------------- 1 | # Preface {-} 2 | 3 | This textbook aims to be an approachable introduction to the world of data science. 4 | In this book, we define **data science** \index{data science!definition} as the process of generating 5 | insight from data through **reproducible** \index{reproducible} and **auditable** \index{auditable} processes. 6 | If you analyze some data and give your analysis to a friend or colleague, they should 7 | be able to re-run the analysis from start to finish and get the same result you did (*reproducibility*). 8 | They should also be able to see and understand all the steps in the analysis, as well as the history of how 9 | the analysis developed (*auditability*). Creating reproducible and auditable 10 | analyses allows both you and others to easily double-check and validate your work. 11 | 12 | At a high level, in this book, you will learn how to 13 | 14 | 1. identify common problems in data science, and 15 | 2. solve those problems with reproducible and auditable workflows. 16 | 17 | Figure \@ref(fig:img-chapter-overview) summarizes what you will learn in each chapter 18 | of this book. 19 | Throughout, you will learn how to use the R programming language [@Rlanguage] to perform 20 | all the tasks associated with data analysis. You will 21 | spend the first four chapters learning how to use R to load, clean, wrangle 22 | (i.e., restructure the data into a usable format) and visualize data 23 | while answering descriptive and exploratory data analysis questions. In the next 24 | six chapters, you will learn how to answer predictive, exploratory, and inferential 25 | data analysis questions with common methods in data science, including 26 | classification, regression, clustering, and estimation. 27 | In the final chapters 28 | (\@ref(jupyter)–\@ref(setup)), 29 | you will learn how to combine R code, formatted text, and images 30 | in a single coherent document with Jupyter, use version control for 31 | collaboration, and install and configure the software needed for data science 32 | on your own computer. If you are reading this book as part of a course that you are 33 | taking, the instructor may have set up all of these tools already for you; in this 34 | case, you can continue on through the book reading the chapters in order. 35 | But if you are reading this independently, you may want to jump to these last three chapters 36 | early before going on to make sure your computer is set up in such a way that you can 37 | try out the example code that we include throughout the book. 38 | 39 | ```{r img-chapter-overview, echo = FALSE, message = FALSE, warning = FALSE, fig.cap = "Where are we going?", out.width="100%", fig.retina = 2, fig.align = "center"} 40 | knitr::include_graphics("img/frontmatter/chapter_overview.png") 41 | ``` 42 | 43 | Each chapter in the book has an accompanying worksheet that provides exercises 44 | to help you practice the concepts you will learn. We strongly recommend that you 45 | work through the worksheet when you finish reading each chapter 46 | before moving on to the next chapter. All of the worksheets 47 | are available at 48 | [https://worksheets.datasciencebook.ca](https://worksheets.datasciencebook.ca); 49 | the "Exercises" section at the end of each chapter points you to the right worksheet for that chapter. 50 | For each worksheet, you can either launch an interactive version of the worksheet in your browser by clicking the "launch binder" button, 51 | or preview a non-interactive version of the worksheet by clicking "view worksheet." 52 | If you instead decide to download the worksheet and run it on your own machine, 53 | make sure to follow the instructions for computer setup 54 | found in Chapter \@ref(setup). This will ensure that the automated feedback 55 | and guidance that the worksheets provide will function as intended. 56 | -------------------------------------------------------------------------------- /source/references.Rmd: -------------------------------------------------------------------------------- 1 | `r if (knitr:::is_html_output()) ' 2 | # References {-} 3 | '` 4 | -------------------------------------------------------------------------------- /source/style.css: -------------------------------------------------------------------------------- 1 | p.caption { 2 | color: #777; 3 | margin-top: 10px; 4 | } 5 | p code { 6 | white-space: inherit; 7 | } 8 | pre { 9 | word-break: normal; 10 | word-wrap: normal; 11 | } 12 | pre code { 13 | white-space: inherit; 14 | } 15 | 16 | .book-header h1 { 17 | display: none; 18 | } 19 | --------------------------------------------------------------------------------