├── pytorch
├── DNN.png
├── L2reg.png
├── mtn.png
├── conv2d.gif
├── maxPool.gif
├── mnist_0-9.png
├── playground.png
├── Workshop_DL.pdf
├── overfitting.png
├── overfitting2.png
├── underfitting.png
├── DNN_activations.png
├── goodRegression.png
├── iris_versicolor.jpg
├── DNNRegressor_data.png
├── DNNRegressor_fit.png
├── README.md
├── workshop_neural_net.md
├── Workshop_CNN.ipynb
├── Workshop_Classification.ipynb
└── Workshop_Regression_Class.ipynb
├── D3
├── img
│ ├── end_code.png
│ ├── create_rect.png
│ ├── exercise1.png
│ ├── g_element.png
│ ├── numerically.png
│ ├── start_code.png
│ ├── create_x_axis.png
│ ├── data_variable.png
│ ├── exercise1_sol.png
│ ├── alphabetically.png
│ ├── create_bar_element.png
│ └── team_logo_games_labels.png
├── exercise_1
│ ├── teams.csv
│ ├── exercise_1.css
│ ├── solution
│ │ ├── teams.csv
│ │ ├── solution_1.css
│ │ ├── index.html
│ │ └── solution_1.js
│ ├── index.html
│ └── exercise_1.js
├── exercise_2
│ ├── teams.csv
│ ├── exercise_2.css
│ ├── solution
│ │ ├── teams.csv
│ │ ├── solution_2.css
│ │ ├── index.html
│ │ └── solution_2.js
│ ├── index.html
│ └── exercise_2.js
├── exercise_3
│ ├── teams.csv
│ ├── exercise_3.css
│ ├── solution
│ │ ├── teams.csv
│ │ ├── solution_3.css
│ │ ├── index.html
│ │ ├── solution_3.html
│ │ └── solution_3.js
│ ├── index.html
│ └── exercise_3.js
├── index.html
├── preprocessing
│ └── preprocessing.py
├── urls.js
├── bar.css
└── sortable.js
├── NLP
├── img
│ ├── nltk_spacy.png
│ ├── spacy_comp.PNG
│ └── pipeline.svg
└── README.md
├── regex
├── img
│ ├── webpage.png
│ └── pagesource.png
├── README.md
└── data
│ └── vins.txt
├── sql
├── img
│ ├── screenshot.png
│ ├── foodforthought2.png
│ └── Visual_SQL_JOINS_orig.jpg
└── README.md
├── sqlite
├── photos
│ ├── Odie.jpg
│ ├── lassie.jpg
│ ├── scooby.jpg
│ ├── snoopy.jpg
│ └── wallace.jpg
└── README.md
├── .gitattributes
├── fusion-tables
├── README.md
└── Seattle_Parks.csv
├── thematic-maps
├── img
│ ├── contour.png
│ ├── isopleth.png
│ ├── mi_choropleth.png
│ ├── small
│ │ ├── isopleth.png
│ │ ├── mi_choropleth.png
│ │ ├── top_20_crashes.png
│ │ └── snowmobile_crashes.png
│ ├── top_20_crashes.png
│ └── snowmobile_crashes.png
├── README.md
├── snow_crashes.csv
├── snowmobile_crashes.txt
└── deer_in_the_city.txt
├── pdf-data-extraction
├── 2013-02-005-v1.pdf
├── summary_of_fees_collected.pdf
├── MDOT_fastfacts02-2011_345554_7.pdf
├── README.md
└── pdfminer_workshop.ipynb
├── matlab
├── README.md
├── gen_more_pts.m
├── letters.m
└── workshop_plotname.m
├── dotmap
└── README.md
├── flask
└── README.md
├── geospatial-analysis
└── README.md
├── datashader
└── README.md
├── pandas
└── README.md
├── network-analysis
└── README.md
├── geopandas
└── README.md
├── webscraping
└── README.md
├── pyspark
├── README.md
└── sample.csv
├── python-intro
└── README.md
├── README.md
├── .gitignore
└── sql-intermediate
└── README.md
/pytorch/DNN.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/pytorch/DNN.png
--------------------------------------------------------------------------------
/pytorch/L2reg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/pytorch/L2reg.png
--------------------------------------------------------------------------------
/pytorch/mtn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/pytorch/mtn.png
--------------------------------------------------------------------------------
/D3/img/end_code.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/D3/img/end_code.png
--------------------------------------------------------------------------------
/pytorch/conv2d.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/pytorch/conv2d.gif
--------------------------------------------------------------------------------
/pytorch/maxPool.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/pytorch/maxPool.gif
--------------------------------------------------------------------------------
/D3/img/create_rect.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/D3/img/create_rect.png
--------------------------------------------------------------------------------
/D3/img/exercise1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/D3/img/exercise1.png
--------------------------------------------------------------------------------
/D3/img/g_element.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/D3/img/g_element.png
--------------------------------------------------------------------------------
/D3/img/numerically.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/D3/img/numerically.png
--------------------------------------------------------------------------------
/D3/img/start_code.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/D3/img/start_code.png
--------------------------------------------------------------------------------
/NLP/img/nltk_spacy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/NLP/img/nltk_spacy.png
--------------------------------------------------------------------------------
/NLP/img/spacy_comp.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/NLP/img/spacy_comp.PNG
--------------------------------------------------------------------------------
/pytorch/mnist_0-9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/pytorch/mnist_0-9.png
--------------------------------------------------------------------------------
/pytorch/playground.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/pytorch/playground.png
--------------------------------------------------------------------------------
/regex/img/webpage.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/regex/img/webpage.png
--------------------------------------------------------------------------------
/sql/img/screenshot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/sql/img/screenshot.png
--------------------------------------------------------------------------------
/sqlite/photos/Odie.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/sqlite/photos/Odie.jpg
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Set the default behavior, in case people don't have core.autocrlf set.
2 | * text=auto
--------------------------------------------------------------------------------
/D3/img/create_x_axis.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/D3/img/create_x_axis.png
--------------------------------------------------------------------------------
/D3/img/data_variable.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/D3/img/data_variable.png
--------------------------------------------------------------------------------
/D3/img/exercise1_sol.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/D3/img/exercise1_sol.png
--------------------------------------------------------------------------------
/fusion-tables/README.md:
--------------------------------------------------------------------------------
1 | # Fusion Tables
2 | The slide deck is available at https://goo.gl/VDtjgn
3 |
--------------------------------------------------------------------------------
/pytorch/Workshop_DL.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/pytorch/Workshop_DL.pdf
--------------------------------------------------------------------------------
/pytorch/overfitting.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/pytorch/overfitting.png
--------------------------------------------------------------------------------
/pytorch/overfitting2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/pytorch/overfitting2.png
--------------------------------------------------------------------------------
/pytorch/underfitting.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/pytorch/underfitting.png
--------------------------------------------------------------------------------
/regex/img/pagesource.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/regex/img/pagesource.png
--------------------------------------------------------------------------------
/sqlite/photos/lassie.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/sqlite/photos/lassie.jpg
--------------------------------------------------------------------------------
/sqlite/photos/scooby.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/sqlite/photos/scooby.jpg
--------------------------------------------------------------------------------
/sqlite/photos/snoopy.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/sqlite/photos/snoopy.jpg
--------------------------------------------------------------------------------
/D3/img/alphabetically.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/D3/img/alphabetically.png
--------------------------------------------------------------------------------
/pytorch/DNN_activations.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/pytorch/DNN_activations.png
--------------------------------------------------------------------------------
/pytorch/goodRegression.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/pytorch/goodRegression.png
--------------------------------------------------------------------------------
/pytorch/iris_versicolor.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/pytorch/iris_versicolor.jpg
--------------------------------------------------------------------------------
/sql/img/foodforthought2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/sql/img/foodforthought2.png
--------------------------------------------------------------------------------
/sqlite/photos/wallace.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/sqlite/photos/wallace.jpg
--------------------------------------------------------------------------------
/D3/img/create_bar_element.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/D3/img/create_bar_element.png
--------------------------------------------------------------------------------
/pytorch/DNNRegressor_data.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/pytorch/DNNRegressor_data.png
--------------------------------------------------------------------------------
/pytorch/DNNRegressor_fit.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/pytorch/DNNRegressor_fit.png
--------------------------------------------------------------------------------
/thematic-maps/img/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/thematic-maps/img/contour.png
--------------------------------------------------------------------------------
/D3/exercise_1/teams.csv:
--------------------------------------------------------------------------------
1 | team,value
2 | Boston,100
3 | Detroit,85
4 | New York,80
5 | Chicago,75
6 | Atlanta,30
7 |
--------------------------------------------------------------------------------
/D3/exercise_2/teams.csv:
--------------------------------------------------------------------------------
1 | team,value
2 | Boston,100
3 | Detroit,85
4 | New York,80
5 | Chicago,75
6 | Atlanta,30
7 |
--------------------------------------------------------------------------------
/D3/exercise_3/teams.csv:
--------------------------------------------------------------------------------
1 | team,value
2 | Boston,100
3 | Detroit,85
4 | New York,80
5 | Chicago,75
6 | Atlanta,30
7 |
--------------------------------------------------------------------------------
/thematic-maps/img/isopleth.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/thematic-maps/img/isopleth.png
--------------------------------------------------------------------------------
/D3/exercise_1/exercise_1.css:
--------------------------------------------------------------------------------
1 | .label{
2 | text-anchor: middle;
3 | }
4 |
5 | .barlabel {
6 | text-anchor: middle;
7 | }
--------------------------------------------------------------------------------
/D3/exercise_1/solution/teams.csv:
--------------------------------------------------------------------------------
1 | team,value
2 | Boston,100
3 | Detroit,85
4 | New York,80
5 | Chicago,75
6 | Atlanta,30
7 |
--------------------------------------------------------------------------------
/D3/exercise_2/exercise_2.css:
--------------------------------------------------------------------------------
1 | .label{
2 | text-anchor: middle;
3 | }
4 |
5 | .barlabel {
6 | text-anchor: middle;
7 | }
--------------------------------------------------------------------------------
/D3/exercise_2/solution/teams.csv:
--------------------------------------------------------------------------------
1 | team,value
2 | Boston,100
3 | Detroit,85
4 | New York,80
5 | Chicago,75
6 | Atlanta,30
7 |
--------------------------------------------------------------------------------
/D3/exercise_3/exercise_3.css:
--------------------------------------------------------------------------------
1 | .label{
2 | text-anchor: middle;
3 | }
4 |
5 | .barlabel {
6 | text-anchor: middle;
7 | }
--------------------------------------------------------------------------------
/D3/exercise_3/solution/teams.csv:
--------------------------------------------------------------------------------
1 | team,value
2 | Boston,100
3 | Detroit,85
4 | New York,80
5 | Chicago,75
6 | Atlanta,30
7 |
--------------------------------------------------------------------------------
/D3/img/team_logo_games_labels.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/D3/img/team_logo_games_labels.png
--------------------------------------------------------------------------------
/sql/img/Visual_SQL_JOINS_orig.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/sql/img/Visual_SQL_JOINS_orig.jpg
--------------------------------------------------------------------------------
/thematic-maps/img/mi_choropleth.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/thematic-maps/img/mi_choropleth.png
--------------------------------------------------------------------------------
/thematic-maps/img/small/isopleth.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/thematic-maps/img/small/isopleth.png
--------------------------------------------------------------------------------
/thematic-maps/img/top_20_crashes.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/thematic-maps/img/top_20_crashes.png
--------------------------------------------------------------------------------
/D3/exercise_1/solution/solution_1.css:
--------------------------------------------------------------------------------
1 | .label{
2 | text-anchor: middle;
3 | }
4 |
5 | .barlabel {
6 | text-anchor: middle;
7 | }
--------------------------------------------------------------------------------
/D3/exercise_2/solution/solution_2.css:
--------------------------------------------------------------------------------
1 | .label{
2 | text-anchor: middle;
3 | }
4 |
5 | .barlabel {
6 | text-anchor: middle;
7 | }
--------------------------------------------------------------------------------
/pdf-data-extraction/2013-02-005-v1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/pdf-data-extraction/2013-02-005-v1.pdf
--------------------------------------------------------------------------------
/thematic-maps/img/small/mi_choropleth.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/thematic-maps/img/small/mi_choropleth.png
--------------------------------------------------------------------------------
/thematic-maps/img/small/top_20_crashes.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/thematic-maps/img/small/top_20_crashes.png
--------------------------------------------------------------------------------
/thematic-maps/img/snowmobile_crashes.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/thematic-maps/img/snowmobile_crashes.png
--------------------------------------------------------------------------------
/D3/exercise_3/solution/solution_3.css:
--------------------------------------------------------------------------------
1 | .label{
2 | text-anchor: middle;
3 | }
4 |
5 | .barlabel {
6 | text-anchor: middle;
7 | }
--------------------------------------------------------------------------------
/thematic-maps/img/small/snowmobile_crashes.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/thematic-maps/img/small/snowmobile_crashes.png
--------------------------------------------------------------------------------
/pdf-data-extraction/summary_of_fees_collected.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/pdf-data-extraction/summary_of_fees_collected.pdf
--------------------------------------------------------------------------------
/matlab/README.md:
--------------------------------------------------------------------------------
1 | # Introduction to Matlab
2 |
3 | This workshop was created for the UROP program as a brief (2 hour) intro to Matlab and its capabilities.
4 |
--------------------------------------------------------------------------------
/pdf-data-extraction/MDOT_fastfacts02-2011_345554_7.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/pdf-data-extraction/MDOT_fastfacts02-2011_345554_7.pdf
--------------------------------------------------------------------------------
/sqlite/README.md:
--------------------------------------------------------------------------------
1 | # SQLITE
2 | The jupyter notebook can be found here
3 | https://nbviewer.jupyter.org/github/caocscar/workshops/blob/master/sqlite/sqlite3.ipynb
--------------------------------------------------------------------------------
/dotmap/README.md:
--------------------------------------------------------------------------------
1 | # Working with Geographical Data and Parallel Computing on Flux
2 |
3 | The workshop code is in another repository at
4 | https://github.com/clarkdatalabs/dotmap_workshop
5 |
6 |
--------------------------------------------------------------------------------
/flask/README.md:
--------------------------------------------------------------------------------
1 | # Flask
2 |
3 | This 2 hr workshop introduces Flask for deploying web applications.
4 |
5 | My student, Ellen Paquet, prepared the workshop materials and it is located at https://github.com/epmarie/flask_example_app
--------------------------------------------------------------------------------
/geospatial-analysis/README.md:
--------------------------------------------------------------------------------
1 | You can preview the HTML material at this Github HTML previewer:
2 | https://htmlpreview.github.io/?https://github.com/caocscar/workshops/blob/master/geospatial%20analysis/Geospatial%2BAnalysis%2BWorkshop.html
3 |
--------------------------------------------------------------------------------
/datashader/README.md:
--------------------------------------------------------------------------------
1 | # Datashader
2 |
3 | This 1 hr workshop introduces the datashader visualization tool for large datasets.
4 | https://github.com/caocscar/workshops/blob/master/datashader/datashader.ipynb
5 |
6 | ## External Files
7 | Files are too large to be included.
8 |
--------------------------------------------------------------------------------
/D3/exercise_1/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/D3/exercise_2/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/D3/exercise_1/solution/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/regex/README.md:
--------------------------------------------------------------------------------
1 | # Jupyter Notebook Viewer
2 | Regular Expression Part I
3 | https://nbviewer.jupyter.org/github/caocscar/workshops/blob/master/regex/Regex%20Tutorial%20P1.ipynb
4 |
5 | Regular Expression Part II
6 | https://nbviewer.jupyter.org/github/caocscar/workshops/blob/master/regex/Regex%20Tutorial%20P2.ipynb
7 |
--------------------------------------------------------------------------------
/pandas/README.md:
--------------------------------------------------------------------------------
1 | # Intro to Pandas Workshop
2 |
3 | This workshop introduces the user to the world of `pandas` and includes common data wrangling methods.
4 | https://nbviewer.jupyter.org/github/caocscar/workshops/blob/master/pandas/pandas.ipynb
5 |
6 | ## External Files
7 |
8 | I've excluded them because the data files are large. [TODO] Include smaller version of files.
9 |
--------------------------------------------------------------------------------
/network-analysis/README.md:
--------------------------------------------------------------------------------
1 | # Introduction to Network Analysis using igraph
2 |
3 | This 2 hr workshop introduces igraph for network analysis.
4 |
5 | https://nbviewer.jupyter.org/github/epmarie/network_workshop/blob/master/IntroNetworkAnalysis.ipynb
6 |
7 | My student, Ellen Paquet, prepared the workshop materials and it is located at https://github.com/epmarie/network_workshop
8 |
9 |
--------------------------------------------------------------------------------
/D3/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/geopandas/README.md:
--------------------------------------------------------------------------------
1 | # GeoPandas
2 | This 2 hr workshop introduces `geopandas` and maybe some `fiona`, `shapely`, `rtree`, `pysal`, and `folium`.
3 | https://nbviewer.jupyter.org/github/caocscar/workshops/blob/master/geopandas/Geopandas.ipynb
4 |
5 | My student, Yiming Cai, prepared the workshop materials.
6 |
7 | ## External Files
8 | I've excluded them because the shapefiles are large. [TODO] Include smaller version of files.
9 |
--------------------------------------------------------------------------------
/webscraping/README.md:
--------------------------------------------------------------------------------
1 | Web Scraping in Python Notebook
2 | https://nbviewer.jupyter.org/github/caocscar/workshops/blob/master/webscraping/webscraping_in_python.ipynb
3 |
4 | Google API Notebook
5 | https://nbviewer.jupyter.org/github/caocscar/workshops/blob/master/webscraping/Google.ipynb
6 |
7 | Twitter API Notebook
8 | https://nbviewer.jupyter.org/github/caocscar/workshops/blob/master/webscraping/Twitter.ipynb
9 |
10 |
--------------------------------------------------------------------------------
/matlab/gen_more_pts.m:
--------------------------------------------------------------------------------
1 | function M2 = gen_more_pts(M)
2 |
3 | if size(M,1) > 20
4 | disp('You have too many points submitted. This will take forever!!!')
5 | M2 = M;
6 | return
7 | end
8 | M2 = [];
9 | for i = 1:size(M,1)-1
10 | x1 = M(i,1);
11 | x2 = M(i+1,1);
12 | y1 = M(i,2);
13 | y2 = M(i+1,2);
14 | x = [linspace(x1,x2,100)]';
15 | y = [linspace(y1,y2,100)]';
16 | M2 = [M2; x y];
17 | end
18 |
--------------------------------------------------------------------------------
/pytorch/README.md:
--------------------------------------------------------------------------------
1 | # PyTorch Workshop
2 |
3 | [**Regression Problem**](https://colab.research.google.com/github/caocscar/workshops/blob/master/pytorch/Workshop_Regression_Class.ipynb)
4 |
5 | [**Classification Problem**](https://colab.research.google.com/github/caocscar/workshops/blob/master/pytorch/Workshop_Classification.ipynb)
6 |
7 | [**Image Classification Problem**](https://colab.research.google.com/github/caocscar/workshops/blob/master/pytorch/Workshop_CNN.ipynb)
8 |
--------------------------------------------------------------------------------
/pdf-data-extraction/README.md:
--------------------------------------------------------------------------------
1 | # Extracting Data from PDF
2 |
3 | There are 2 Jupyter Notebooks for this workshop (preferably done in this order):
4 | Tabula
5 | https://nbviewer.jupyter.org/github/caocscar/workshops/blob/master/pdf%20data%20extraction/tabula_workshop.ipynb
6 |
7 | PDF Miner
8 | https://nbviewer.jupyter.org/github/caocscar/workshops/blob/master/pdf%20data%20extraction/pdfminer_workshop.ipynb
9 |
10 | ## External Files
11 | The `workshop_registration.pdf` is missing for privacy reasons. [TODO] Find a new pdf to use
12 |
13 |
14 |
--------------------------------------------------------------------------------
/thematic-maps/README.md:
--------------------------------------------------------------------------------
1 | # Create Thematic Maps with Python
2 |
3 | This 2 hr workshop demonstrates how to create thematic maps using Matplotlib.
4 | https://nbviewer.jupyter.org/github/caocscar/workshops/blob/master/thematic%20maps/Thematic%20Maps%20with%20Matplotlib.ipynb
5 |
6 | Thematic Type|Image
7 | :---:|---
8 | Choropleth Map|
9 | Dot Map|
10 | Proportional Dot Map|
11 | Isopleth|
12 |
--------------------------------------------------------------------------------
/NLP/README.md:
--------------------------------------------------------------------------------
1 | # Intro to Natural Language Processing
2 |
3 | #### Jupyter Notebook Viewer Version
4 | https://nbviewer.jupyter.org/github/caocscar/workshops/blob/master/NLP/IntroNLP.ipynb
5 |
6 | This 2.5 hr workshop covers the following Python packages:
7 | - `spaCy` (tagger, parser, named-entity recognition)
8 | - `textacy` (n-grams)
9 | - `gensim` (topic modelling)
10 | - `pyLDAvis` (visualization)
11 | - `textblob` (sentiment analysis)
12 |
13 | My student, Ellen Paquet, prepared the workshop materials. Her original repo is located at https://github.com/epmarie/IntroNLP
14 |
15 |
16 |
--------------------------------------------------------------------------------
/D3/exercise_3/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
14 |
15 |
16 |
20 |
21 |
22 |
23 |
--------------------------------------------------------------------------------
/D3/exercise_2/solution/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
14 |
15 |
16 |
20 |
21 |
22 |
23 |
--------------------------------------------------------------------------------
/D3/exercise_3/solution/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
14 |
15 |
16 |
20 |
21 |
22 |
23 |
--------------------------------------------------------------------------------
/D3/exercise_3/solution/solution_3.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
14 |
15 |
16 |
20 |
21 |
22 |
23 |
--------------------------------------------------------------------------------
/D3/preprocessing/preprocessing.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Fri Oct 18 16:17:58 2019
4 |
5 | @author: caoa
6 | """
7 | import pandas as pd
8 |
9 | pd.options.display.max_rows =20
10 | pd.options.display.max_columns = 20
11 |
12 | df = pd.read_csv('GL2018.TXT', header=None, usecols=[0,3,6,9,10])
13 | df.columns = ['date','away','home','aRuns','hRuns']
14 |
15 | #%%
16 | df['team'] = df.apply(lambda x: x['away'] if x['aRuns'] > x['hRuns'] else x['home'], axis=1)
17 | data = df[['date','team']]
18 | data.to_csv('daily_snapshot.csv', index=False)
19 |
20 | #%% Find first day where all teams have won at least one game
21 | data['date'] = pd.to_datetime(data['date'], format='%Y%m%d')
22 | daterange = pd.date_range('2018-03-29','2018-10-01',freq='D')
23 | for day in daterange:
24 | abc = data[data['date'] <= day]
25 | xyz = abc.team.value_counts()
26 | if xyz.shape[0] >= 30:
27 | print(day)
28 | break
29 |
--------------------------------------------------------------------------------
/D3/urls.js:
--------------------------------------------------------------------------------
1 | const urls = {
2 | 'ARI':'Arizona_Diamondbacks',
3 | 'ATL':'Atlanta_Braves',
4 | 'SFN':'SanFrancisco_Giants',
5 | 'CHN':'Chicago_Cubs',
6 | 'NYN':'NewYork_Mets',
7 | 'MIL':'Milwaukee_Brewers',
8 | 'BAL':'Baltimore_Orioles',
9 | 'CHA':'Chicago_White_Sox',
10 | 'OAK':'Oakland_Athletics',
11 | 'SEA':'Seattle_Mariners',
12 | 'TBA':'TampaBay_Rays',
13 | 'HOU':'Houston_Astros',
14 | 'NYA':'NewYork_Yankees',
15 | 'PHI':'Philadelphia_Phillies',
16 | 'WAS':'Washington_Nationals',
17 | 'MIA':'Miami_Marlins',
18 | 'PIT':'Pittsburgh_Pirates',
19 | 'ANA':'LosAngeles_Angels',
20 | 'BOS':'Boston_Redsox',
21 | 'TEX':'Texas_Rangers',
22 | 'COL':'Colorado_Rockies',
23 | 'LAN':'LosAngeles_Dodgers',
24 | 'MIN':'Minnesota_Twins',
25 | 'CLE':'Cleveland_Indians',
26 | 'TOR':'Toronto_Blue_Jays',
27 | 'SLN':'StLouis_Cardinals',
28 | 'CIN':'Cincinnati_Reds',
29 | 'DET':'Detroit_Tigers',
30 | 'SDN':'SanDiego_Padres',
31 | 'KCA':'KansasCity_Royals',
32 | }
--------------------------------------------------------------------------------
/D3/bar.css:
--------------------------------------------------------------------------------
1 | /* .chart {
2 | clip-path: url(#clip);
3 | } */
4 |
5 | .bar {
6 | fill: orange;
7 | }
8 |
9 | .x.axis text {
10 | font: 15px sans-serif;
11 | }
12 |
13 | .axis path, .axis line {
14 | fill: none;
15 | stroke: '#000';
16 | shape-rendering: crispEdges;
17 | }
18 |
19 | .label {
20 | text-anchor: middle;
21 | font: 20px helvetica;
22 | }
23 |
24 | #date {
25 | text-anchor: start;
26 | font: 20px helvetica;
27 | }
28 |
29 | .grid line {
30 | stroke: lightgrey;
31 | stroke-opacity: 0.7;
32 | shape-rendering: crispEdges;
33 | }
34 |
35 | .grid path {
36 | stroke-width: 0;
37 | }
38 |
39 | .team {
40 | fill: black;
41 | font: 14px sans-serif;
42 | text-anchor: end;
43 | font-weight: 600;
44 | }
45 |
46 | .barlabel{
47 | fill: black;
48 | font: 14px sans-serif;
49 | text-anchor: left;
50 | font-weight: 600;
51 | }
52 |
53 | .logo {
54 | fill: black;
55 | font: 14px sans-serif;
56 | text-anchor: middle;
57 | }
58 |
59 | .divisions {
60 | stroke: black;
61 | stroke-width: 2;
62 | stroke-dasharray: 12;
63 | }
--------------------------------------------------------------------------------
/pyspark/README.md:
--------------------------------------------------------------------------------
1 | # PySpark: DataFrames, Datasets, and SparkSQL
2 | [pyspark.md](pyspark.md) contains the markdown material for the PySpark workshop.
3 |
4 | # Scala: DataFrames, Datasets, and SparkSQL
5 | [scala.md](scala.md) contains the markdown material for the Scala workshop.
6 |
7 | # PySpark vs. Scala
8 | Here's an [article](https://www.pluralsight.com/blog/software-development/scala-vs-python) comparing the two of them.
9 |
10 | ## Using PySpark with the Twitter Decahose dataset on Cavium
11 | The github repo is located at https://github.com/caocscar/twitter-decahose-pyspark
12 |
13 | **Note**: You need to get permission to access the dataset first. More information available at: https://midas.umich.edu/research-datasets/
14 |
15 | ## Cheat Sheets
16 | My github Hadoop cheat sheet
17 | https://github.com/caocscar/hadoopcheatsheet
18 |
19 | DataCamp's cheat sheet for PySpark DataFrames
20 | https://s3.amazonaws.com/assets.datacamp.com/blog_assets/PySpark_SQL_Cheat_Sheet_Python.pdf
21 |
22 | Edrukea's cheat sheet for PySpark RDDs
23 | https://www.edureka.co/blog/cheatsheets/pyspark-cheat-sheet-python/
24 |
25 |
--------------------------------------------------------------------------------
/python-intro/README.md:
--------------------------------------------------------------------------------
1 | # Introduction to Python
2 |
3 | There are no notes for this 6 hour workshop. I do it freestyle using a project-based learning approach and provide attendees with a beginner's cheat sheet.
4 |
5 | Exercises
6 | https://goo.gl/bw1J9L
7 |
8 | Mastermind Game Online
9 | http://www.webgamesonline.com/mastermind/
10 |
11 | A good cheat sheet for beginners is located at
12 | http://ehmatthes.github.io/pcc/cheatsheets/README.html
13 |
14 | Here is a link for learning Python for programmers
15 | https://wiki.python.org/moin/BeginnersGuide/Programmers
16 |
17 | Here is a link for learning Python for non-programmers
18 | https://wiki.python.org/moin/BeginnersGuide/NonProgrammers
19 |
20 | ## Python Topics Covered
21 | ### Python Functions
22 | ```
23 | input
24 | type
25 | from import
26 | random
27 | range
28 | print
29 | len
30 | zip
31 | id
32 | time
33 | ```
34 |
35 | ### Data Types
36 | ```
37 | int
38 | float
39 | string
40 | list
41 | tuple
42 | dictionary
43 | set
44 | ```
45 |
46 | ### Control Flow
47 | ```
48 | if elif else
49 | for
50 | while
51 | continue
52 | break
53 | pass
54 | ```
55 |
56 | ### File I/O
57 | ```
58 | with
59 | open
60 | write
61 | read
62 | readlines
63 | ```
64 |
65 | ### Miscelleaneous
66 | comments
67 | list comprehension
68 | casting variables
69 | how to write a function
70 | integer division
71 | reference vs. copying variables
72 | banker's rounding
73 |
--------------------------------------------------------------------------------
/sql/README.md:
--------------------------------------------------------------------------------
1 | # Intro to SQL
2 | Here is the [WORKSHOP SLIDE DECK](http://nbviewer.jupyter.org/format/slides/github/caocscar/workshops/blob/master/sql/SQLslides.ipynb#/).
3 | We'll be using the [w3schools website](https://www.w3schools.com/sql/) to write queries. It also is a good reference for SQL.
4 |
5 | ---
6 | # Miscellaneous Stuff
7 |
8 | ## Converting Jupyter Notebook into Slide Deck
9 | The following command will render your Jupyter Notebook into a **reveal.js** slide deck.
10 |
11 | `jupyter nbconvert SQLslides.ipynb --to slides --post serve`
12 |
13 | The `--post serve` command starts up a local server to host it.
14 |
15 | **Tip**: Make sure your Jupyter notebook is closed before running the command.
16 |
17 | ### Configuration Options
18 | More options available at https://nbconvert.readthedocs.io/en/latest/config_options.html
19 |
20 | ## How to Post Slide Deck Online
21 | 1. Go to http://nbviewer.jupyter.org
22 | 2. Enter url where the Jupyter Notebook file can be located.
23 | 3. Make sure **nbviewer** is in *slide mode* and not *notebook mode* among the icons in the top right.
24 |
25 | ## Contributors
26 | The slide deck was created originally by my student [Maggie Orton](https://github.com/margamo/intro-to-SQL) on March 14, 2017.
27 |
28 | And modified by my student [Kaitlin Cornwell](https://github.com/kaitcorn/intro-to-SQL) on March 16, 2018.
29 |
30 | And further modified by my student [Jessica Zhang](https://github.com/jezzhang/sqlworkshop) on January 31, 2020.
31 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # CSCAR Workshops
2 | This is a list of some (but not all) CSCAR Workshops I've done (in no particular order).
3 | - [Introduction to Matlab](matlab)
4 | - [Introduction to Python](python-intro)
5 | - [Pandas](pandas)
6 | - [Introduction to SQL](sql)
7 | - [Intermediate SQL](sql-intermediate)
8 | - [SQLite](sqlite)
9 | - [Regular Expressions](regex)
10 | - [Natural Language Processing with Python](NLP)
11 | - [Network Analysis with igraph](network-analysis)
12 | - [SparkSQL and DataFrames with PySpark](pyspark) (Using PySpark with the [Twitter Decahose dataset on Cavium](https://github.com/caocscar/twitter-decahose-pyspark))
13 | - [GeoPandas](geopandas)
14 | - [Geospatial Analysis with Python](geospatial-analysis)
15 | - [Working with Geographical Data and Parallel Computing on Flux](dotmap)
16 | - [Thematic Maps with Python](thematic-maps)
17 | - [Datashader](datashader)
18 | - [Google Fusion Tables](fusion-tables) (this product is no longer available)
19 | - [Web Scraping with Python](webscraping)
20 | - [Scraping HTML](https://nbviewer.jupyter.org/github/caocscar/workshops/blob/master/webscraping/webscraping_in_python.ipynb)
21 | - [Google and YouTube APIs](https://nbviewer.jupyter.org/github/caocscar/workshops/blob/master/webscraping/Google.ipynb)
22 | - [Twitter APIs](https://nbviewer.jupyter.org/github/caocscar/workshops/blob/master/webscraping/Twitter.ipynb) (instructions on how to set up a [developer account](https://github.com/caocscar/twitter-create-developer-account))
23 | - [Extracting Data from PDFs](pdf-data-extraction)
24 | - [Flask](flask)
25 | - [Introduction to PyTorch](pytorch)
26 | - [Introduction to D3.js](D3)
27 | - explains code used to generate data viz located at https://d3-examples-caocscar.onrender.com/
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Jupyter Notebooks
2 | webscraping/.ipynb_checkpoints/
3 | .ipynb_checkpoints/
4 |
5 | # Byte-compiled / optimized / DLL files
6 | __pycache__/
7 | *.py[cod]
8 | *$py.class
9 |
10 | # C extensions
11 | *.so
12 |
13 | # Distribution / packaging
14 | .Python
15 | env/
16 | build/
17 | develop-eggs/
18 | dist/
19 | downloads/
20 | eggs/
21 | .eggs/
22 | lib/
23 | lib64/
24 | parts/
25 | sdist/
26 | var/
27 | wheels/
28 | *.egg-info/
29 | .installed.cfg
30 | *.egg
31 |
32 | # PyInstaller
33 | # Usually these files are written by a python script from a template
34 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
35 | *.manifest
36 | *.spec
37 |
38 | # Installer logs
39 | pip-log.txt
40 | pip-delete-this-directory.txt
41 |
42 | # Unit test / coverage reports
43 | htmlcov/
44 | .tox/
45 | .coverage
46 | .coverage.*
47 | .cache
48 | nosetests.xml
49 | coverage.xml
50 | *.cover
51 | .hypothesis/
52 |
53 | # Translations
54 | *.mo
55 | *.pot
56 |
57 | # Django stuff:
58 | *.log
59 | local_settings.py
60 |
61 | # Flask stuff:
62 | instance/
63 | .webassets-cache
64 |
65 | # Scrapy stuff:
66 | .scrapy
67 |
68 | # Sphinx documentation
69 | docs/_build/
70 |
71 | # PyBuilder
72 | target/
73 |
74 | # Jupyter Notebook
75 | .ipynb_checkpoints
76 |
77 | # pyenv
78 | .python-version
79 |
80 | # celery beat schedule file
81 | celerybeat-schedule
82 |
83 | # SageMath parsed files
84 | *.sage.py
85 |
86 | # dotenv
87 | .env
88 |
89 | # virtualenv
90 | .venv
91 | venv/
92 | ENV/
93 |
94 | # Spyder project settings
95 | .spyderproject
96 | .spyproject
97 |
98 | # Rope project settings
99 | .ropeproject
100 |
101 | # mkdocs documentation
102 | /site
103 |
104 | # mypy
105 | .mypy_cache/
106 |
--------------------------------------------------------------------------------
/matlab/letters.m:
--------------------------------------------------------------------------------
1 | A = [0 0;
2 | 1 4;
3 | 2 0;
4 | 1.5 2;
5 | 0.5 2];
6 | B = [0 0;
7 | 0 4;
8 | 1.5 3.75;
9 | 1.5 2.25;
10 | 0.1 2;
11 | 1.5 1.75
12 | 1.5 0.25;
13 | 0 0];
14 | C = [1.5 0;
15 | 0 0;
16 | 0 4;
17 | 1.5 4];
18 | D = [0 0;
19 | 0 4;
20 | 1.5 3.75;
21 | 1.5 0.25;
22 | 0 0];
23 | E = [2 0;
24 | 0 0;
25 | 0 2;
26 | 1 2;
27 | 0 2;
28 | 0 4;
29 | 2 4];
30 | F = [0 0;
31 | 0 2;
32 | 1 2;
33 | 0 2;
34 | 0 4;
35 | 2 4];
36 | G = [1.5 4;
37 | 0 4;
38 | 0 0;
39 | 1.5 0;
40 | 1.5 2;
41 | 1 2;
42 | 2 2];
43 | H = [0 0;
44 | 0 4;
45 | 0 2;
46 | 2 2;
47 | 2 0;
48 | 2 4];
49 | I = [0 0;
50 | 2 0;
51 | 1 0;
52 | 1 4;
53 | 0 4;
54 | 2 4];
55 | J = [0 0.75;
56 | 0 0;
57 | 1.25 0;
58 | 1.25 4;
59 | 0.5 4;
60 | 2 4];
61 | K = [0 0;
62 | 0 4;
63 | 0 2;
64 | 1.5 4;
65 | 0 2;
66 | 1.5 0];
67 | L = [0 4;
68 | 0 0;
69 | 2 0];
70 | M = [0 0;
71 | 0.25 4;
72 | 1 2;
73 | 1.75 4;
74 | 2 0];
75 | N = [0 0;
76 | 0 4;
77 | 2 0;
78 | 2 4];
79 | O = [0 0;
80 | 0 4;
81 | 1.5 4;
82 | 1.5 0;
83 | 0 0];
84 | P = [0 0;
85 | 0 4;
86 | 1.5 4;
87 | 1.5 2;
88 | 0 2];
89 | Q = [1.5 0.25;
90 | 0 0.25;
91 | 0 4;
92 | 1.5 4;
93 | 1.5 0.25;
94 | 1.75 0;
95 | 1.25 0.5];
96 | R = [0 0;
97 | 0 4;
98 | 1.5 4;
99 | 1.5 2;
100 | 0 2;
101 | 1.5 0];
102 | S = [0 0;
103 | 1.5 0;
104 | 1.5 2;
105 | 0 2;
106 | 0 4;
107 | 1.5 4];
108 | T = [1 0;
109 | 1 4;
110 | 0 4;
111 | 2 4];
112 | U = [0 4;
113 | 0 0;
114 | 2 0;
115 | 2 4];
116 | V = [0 4;
117 | 1 0;
118 | 2 4];
119 | W = [0 4;
120 | 0.25 0;
121 | 1 2;
122 | 1.75 0;
123 | 2 4];
124 | X = [0 4;
125 | 2 0;
126 | 1 2;
127 | 2 4;
128 | 0 0];
129 | Y = [0 0;
130 | 2 4;
131 | 1 2;
132 | 0 4];
133 | Z = [2 0;
134 | 0 0;
135 | 2 4;
136 | 0 4];
137 | alphabet = {A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P,Q,R,S,T,U,V,W,X,Y,Z};
138 | clear A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
--------------------------------------------------------------------------------
/D3/exercise_1/exercise_1.js:
--------------------------------------------------------------------------------
1 | // set the dimensions and margins of the graph
2 | var outerWidth = 650;
3 | var outerHeight = 300;
4 |
5 | var margin = {top: 20, right: 20, bottom: 70, left: 100},
6 | width = outerWidth - margin.left - margin.right - 20,
7 | height = outerHeight - margin.top - margin.bottom;
8 |
9 | // set the ranges
10 | var x = d3.scaleLinear()
11 | .range([0, width]);
12 |
13 | var y = d3.scaleBand()
14 | .range([height, 0])
15 | .padding(0.33);
16 |
17 | var xAxis = d3.axisTop(x)
18 | .ticks(5)
19 |
20 | var yAxis = d3.axisLeft(y)
21 | .tickFormat('')
22 |
23 | // append the svg object to the body of the page
24 | // append a 'group' element to 'svg'
25 | // moves the 'group' element to the top left margin
26 | var svg = d3.select('body').append('svg')
27 | .attr("class", "chart")
28 | .attr("width", outerWidth)
29 | .attr("height", outerHeight)
30 | .append("g")
31 | .attr("transform", `translate(${margin.left},${margin.top})`);
32 |
33 | // data
34 | var data = [{'team':'Boston','value':100},
35 | {'team':'Detroit','value':85},
36 | {'team':'New York','value':80},
37 | {'team':'Atlanta','value':75},
38 | {'team':'Chicago','value':30}]
39 |
40 | // scale the range of the data in the domains
41 | x.domain([0, d3.max(data, d => d.value)])
42 | y.domain(data.map(d => d.team));
43 |
44 | // append the rectangles for the bar chart
45 | var bar = svg.selectAll(".bar")
46 | .data(data)
47 | .join("g")
48 | .attr("class","bar")
49 |
50 | var rect = bar.append('rect')
51 | .attr("width", d => x(d.value))
52 | .attr("y", d => y(d.team))
53 | .attr("height", y.bandwidth())
54 | .attr("x", 0)
55 | .style('fill', d => d3.interpolatePurples(d.value/100))
56 |
57 | // add the x Axis
58 | svg.append("g")
59 | .attr("transform", `translate(0, ${height})`)
60 | .call(d3.axisBottom(x));
61 |
62 | // add the y Axis
63 | svg.append("g")
64 | .call(d3.axisLeft(y));
65 |
66 | // add chart labels
67 | labels = svg.append('g')
68 | .attr('class', 'label')
69 |
70 | // x label
71 | labels.append('text')
72 | .attr('transform', `translate(${width/2},250)`)
73 | .text('Wins')
74 |
75 | // y label
76 | ylabel = labels.append('text')
77 | .attr('transform', `translate(-65,${height/2}) rotate(-90)`)
78 | .text('Teams')
79 |
80 | barLabels = bar.append('text')
81 | .attr('class', 'barlabel')
82 | .attr('x', d => x(d.value) - 20)
83 | .attr('y', d => y(d.team) + (y.bandwidth()/2) + 4)
84 | .text(d => d.value)
85 | .style('fill', 'black')
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
--------------------------------------------------------------------------------
/D3/exercise_2/exercise_2.js:
--------------------------------------------------------------------------------
1 | // set the dimensions and margins of the graph
2 | var outerWidth = 960;
3 | var outerHeight = 500;
4 |
5 | var margin = {top: 50, right: 20, bottom: 80, left: 80},
6 | width = outerWidth - margin.left - margin.right,
7 | height = outerHeight - margin.top - margin.bottom;
8 |
9 | // set the ranges
10 | var x = d3.scaleBand()
11 | .range([0, width])
12 | .padding(0.33);
13 |
14 | var y= d3.scaleLinear()
15 | .range([height, 0]);
16 |
17 |
18 | var xAxis = d3.axisTop(x)
19 | .ticks(5)
20 |
21 | var yAxis = d3.axisLeft(y)
22 | .tickFormat('')
23 |
24 | // append the svg object to the body of the page
25 | // append a 'group' element to 'svg'
26 | // moves the 'group' element to the top left margin
27 | var svg = d3.select('body').append('svg')
28 | .attr("class", "chart")
29 | .attr("width", outerWidth)
30 | .attr("height", outerHeight)
31 | .append("g")
32 | .attr("transform", `translate(${margin.left},${margin.top})`);
33 |
34 | // data
35 | var data = [{'team':'Boston','value':100},
36 | {'team':'Detroit','value':85},
37 | {'team':'New York','value':80},
38 | {'team':'Atlanta','value':75},
39 | {'team':'Chicago','value':30}]
40 |
41 |
42 | // scale the range of the data in the domains
43 | x.domain(data.map(d => d.team));
44 | y.domain([0, d3.max(data, d => d.value)])
45 |
46 |
47 | // append the rectangles for the bar chart
48 | var bar = svg.selectAll(".bar")
49 | .data(data)
50 | .join("g")
51 | .attr("class","bar")
52 |
53 |
54 |
55 | var rect = bar.append('rect')
56 | .attr("height", d => height - y(d.value))
57 | .attr("x", d => x(d.team))
58 | .attr("width", x.bandwidth())
59 | .attr("y", d => y(d.value))
60 | .style('fill', d => d3.interpolatePurples(d.value/100));
61 |
62 | // add the x Axis
63 | svg.append("g")
64 | .attr("transform", `translate(0, ${height})`)
65 | .call(d3.axisBottom(x));
66 |
67 | // add the y Axis
68 | svg.append("g")
69 | .call(d3.axisLeft(y));
70 |
71 | // add chart labels
72 | labels = svg.append('g')
73 | .attr('class', 'label')
74 |
75 | // x label
76 | labels.append('text')
77 | .attr('transform', `translate(${width/2},450)`)
78 | .text('Teams')
79 |
80 | // y label
81 | ylabel = labels.append('text')
82 | .attr('transform', `translate(-45,${height/2}) rotate(-90)`)
83 | .text('Wins')
84 |
85 | barLabels = bar.append('text')
86 | .attr('class', 'barlabel')
87 | .attr('x', d => x(d.team) + (x.bandwidth()/2))
88 | .attr('y', d => y(d.value) - 15)
89 | .text(d => d.value)
90 | .style('fill', 'black')
91 |
--------------------------------------------------------------------------------
/D3/exercise_1/solution/solution_1.js:
--------------------------------------------------------------------------------
1 | // set the dimensions and margins of the graph
2 | var outerWidth = 960;
3 | var outerHeight = 500;
4 |
5 | var margin = {top: 50, right: 20, bottom: 80, left: 80},
6 | width = outerWidth - margin.left - margin.right,
7 | height = outerHeight - margin.top - margin.bottom;
8 |
9 | // set the ranges
10 | var x = d3.scaleBand()
11 | .range([0, width])
12 | .padding(0.33);
13 |
14 | var y= d3.scaleLinear()
15 | .range([height, 0]);
16 |
17 |
18 | var xAxis = d3.axisTop(x)
19 | .ticks(5)
20 |
21 | var yAxis = d3.axisLeft(y)
22 | .tickFormat('')
23 |
24 | // append the svg object to the body of the page
25 | // append a 'group' element to 'svg'
26 | // moves the 'group' element to the top left margin
27 | var svg = d3.select('body').append('svg')
28 | .attr("class", "chart")
29 | .attr("width", outerWidth)
30 | .attr("height", outerHeight)
31 | .append("g")
32 | .attr("transform", `translate(${margin.left},${margin.top})`);
33 |
34 | // data
35 | var data = [{'team':'Boston','value':100},
36 | {'team':'Detroit','value':85},
37 | {'team':'New York','value':80},
38 | {'team':'Atlanta','value':75},
39 | {'team':'Chicago','value':30}]
40 |
41 |
42 | // scale the range of the data in the domains
43 | x.domain(data.map(d => d.team));
44 | y.domain([0, d3.max(data, d => d.value)])
45 |
46 |
47 | // append the rectangles for the bar chart
48 | var bar = svg.selectAll(".bar")
49 | .data(data)
50 | .join("g")
51 | .attr("class","bar")
52 |
53 |
54 |
55 | var rect = bar.append('rect')
56 | .attr("height", d => height - y(d.value))
57 | .attr("x", d => x(d.team))
58 | .attr("width", x.bandwidth())
59 | .attr("y", d => y(d.value))
60 | .style('fill', d => d3.interpolatePurples(d.value/100));
61 |
62 | // add the x Axis
63 | svg.append("g")
64 | .attr("transform", `translate(0, ${height})`)
65 | .call(d3.axisBottom(x));
66 |
67 | // add the y Axis
68 | svg.append("g")
69 | .call(d3.axisLeft(y));
70 |
71 | // add chart labels
72 | labels = svg.append('g')
73 | .attr('class', 'label')
74 |
75 | // x label
76 | labels.append('text')
77 | .attr('transform', `translate(${width/2},450)`)
78 | .text('Teams')
79 |
80 | // y label
81 | ylabel = labels.append('text')
82 | .attr('transform', `translate(-45,${height/2}) rotate(-90)`)
83 | .text('Wins')
84 |
85 | barLabels = bar.append('text')
86 | .attr('class', 'barlabel')
87 | .attr('x', d => x(d.team) + (x.bandwidth()/2))
88 | .attr('y', d => y(d.value) - 15)
89 | .text(d => d.value)
90 | .style('fill', 'black')
91 |
--------------------------------------------------------------------------------
/NLP/img/pipeline.svg:
--------------------------------------------------------------------------------
1 |
31 |
--------------------------------------------------------------------------------
/pyspark/sample.csv:
--------------------------------------------------------------------------------
1 | RxDevice,FileId,TxDevice,Gentime,TxRandom,MsgCount,DSecond,Latitude,Longitude,Elevation,Speed,Heading,Ax,Ay,Az,Yawrate,PathCount,RadiusOfCurve,Confidence
2 | 30,950898,30,286304909792863,0,29,3700,42.285103,-83.813293,253.8,0.0,26.799999,0.44999999,0.30000001,-10.0,1.22,6,3276.7,100
3 | 30,950898,30,286304909892863,0,30,3800,42.285103,-83.813293,253.8,0.40000001,26.799999,0.38,0.22,-10.0,1.46,6,3276.7,100
4 | 30,950898,30,286304909992863,0,31,3900,42.285103,-83.813293,253.7,0.5,26.799999,0.38,0.22,-10.0,1.46,6,3276.7,100
5 | 30,950898,30,286304910092861,0,32,4000,42.285103,-83.813293,253.7,0.62,26.799999,0.52999997,0.30000001,-10.0,1.95,6,3276.7,100
6 | 30,950898,30,286304910193010,0,33,4100,42.285107,-83.813293,253.7,0.72000003,26.799999,0.69,0.38,-10.0,2.4400001,6,3276.7,100
7 | 30,950898,30,286304910292864,0,34,4200,42.285107,-83.813293,253.7,0.83999997,26.799999,0.83999997,0.30000001,-10.0,2.4400001,6,3276.7,100
8 | 30,950898,30,286304910392995,0,35,4300,42.285107,-83.813293,253.7,0.98000002,26.799999,1.0,0.38,-10.0,3.1700001,6,3276.7,100
9 | 30,950898,30,286304910492990,0,36,4400,42.285107,-83.813293,253.7,1.14,26.799999,1.23,0.44999999,-10.0,3.9000001,6,16.799999,100
10 | 30,950898,30,286304910593130,0,37,4500,42.285107,-83.813293,253.7,1.28,26.799999,1.3099999,0.44999999,-10.0,4.1500001,6,17.700001,100
11 | 30,950898,30,286304910693004,0,38,4600,42.28511,-83.813293,253.7,1.4400001,29.35,1.39,0.52999997,-10.0,4.8800001,6,18.299999,78
12 | 30,950898,30,286304910792863,0,39,4700,42.28511,-83.813286,253.7,1.6,29.924999,1.23,0.52999997,-10.0,5.6100001,6,18.700001,52
13 | 30,950898,30,286304910892982,0,40,4800,42.28511,-83.813286,253.7,1.78,32.299999,1.39,0.69,-10.0,6.3400002,6,18.9,45
14 | 30,950898,30,286304910992863,0,41,4900,42.285114,-83.813286,253.7,1.98,32.487499,1.39,0.69,-10.0,6.8200002,6,19.0,42
15 | 30,950898,30,286304911092864,0,42,5000,42.285114,-83.813286,253.7,2.1800001,33.637501,1.7,0.75999999,-10.0,7.8000002,6,18.9,39
16 | 30,950898,30,286304911192872,0,43,5100,42.285118,-83.813278,253.7,2.3599999,34.450001,1.7,0.75999999,-10.0,8.3000002,6,18.799999,38
17 | 30,950898,30,286304911292839,0,44,5200,42.285118,-83.813278,253.7,2.5599999,35.150002,1.77,0.92000002,-10.0,9.2700005,6,18.6,37
18 | 30,950898,30,286304911392900,0,45,5300,42.285122,-83.813278,253.7,2.76,35.137501,1.7,1.0,-10.0,10.0,6,18.4,36
19 | 30,950898,30,286304911492863,0,46,5400,42.285122,-83.813278,253.7,2.9400001,36.25,1.54,1.0,-10.0,10.73,6,18.1,36
20 | 30,950898,30,286304911592863,0,47,5500,42.285126,-83.813271,253.7,3.1600001,36.849998,1.46,1.3099999,-10.0,12.19,6,17.9,33
21 | 30,950898,30,286304911692841,0,48,5600,42.285126,-83.813271,253.60001,3.3599999,38.637501,1.3099999,1.54,-10.0,13.67,6,17.5,30
22 | 30,950898,30,286304911792896,0,49,5700,42.28513,-83.813271,253.60001,3.5,40.5,1.3099999,1.77,-10.0,14.64,6,17.200001,30
23 | 30,950898,30,286304911892840,0,50,5800,42.285133,-83.813263,253.60001,3.6800001,41.799999,1.15,1.7,-10.0,16.110001,6,16.799999,28
24 | 30,950898,30,286304911992904,0,51,5900,42.285133,-83.813263,253.60001,3.8800001,44.275002,1.15,1.7,-10.0,17.07,7,16.4,28
25 | 30,950898,30,286304912092843,0,52,6000,42.285137,-83.813255,253.60001,4.04,45.775002,1.23,1.46,-10.0,17.32,7,16.0,30
26 | 30,950898,30,286304912192874,0,53,6100,42.285137,-83.813255,253.60001,4.2199998,47.075001,1.15,2.0799999,-10.0,18.299999,7,15.7,31
27 |
--------------------------------------------------------------------------------
/fusion-tables/Seattle_Parks.csv:
--------------------------------------------------------------------------------
1 | "PMAID","LocID","ZIP.Code","address","icon"
2 | 281,2545,98119,"1200 W Howe St Seattle 98119","ylw_circle"
3 | 4159,2387,98144,"2821 12TH Ave S Seattle 98144","orange_diamond"
4 | 4467,2382,98122,"564 12th Ave Seattle 98122","orange_diamond"
5 | 4010,2546,98107,"4400 14th Ave NW Seattle 98107","ylw_circle"
6 | 296,296,98112,"3001 E Madison St Seattle 98112","grn_stars"
7 | 1000001,0,98199,"32nd Ave W Seattle 98199","donut"
8 | 3158,2378,98117,"606 NW 76th St Seattle 98117","orange_diamond"
9 | 4404,2533,98103,"723 N 35th St Seattle 98103","ylw_circle"
10 | 1000002,0,98118,"Lake Washington Blvd S & S Adams St Seattle 98118","donut"
11 | 244,1886,98125,"12526 27th Ave NE Seattle 98125","orange_diamond"
12 | 445,1888,98116,"1702 Alki Ave SW Seattle 98116","orange_diamond"
13 | 446,1049,98116,"5817 SW Lander St Seattle 98116","ltblu_square"
14 | 3914,1891,98122,"1504 34TH Ave Seattle 98122","orange_diamond"
15 | 426,1892,98144,"2000 Martin Luther King Jr Way S Seattle 98144","orange_diamond"
16 | 2927,1894,98116,"4000 Beach Dr SW Seattle 98116","orange_diamond"
17 | 1556,1898,98199,"3431 Arapahoe Pl W Seattle 98199","orange_diamond"
18 | 485,1907,98146,"4120 Arroyo Dr SW Seattle 98146","orange_diamond"
19 | 4081,1908,98118,"8702 Seward Park Ave S Seattle 98118","orange_diamond"
20 | 4243,2541,98144,"1501 21st Ave S Seattle 98144","ylw_circle"
21 | 241,2552,98103,"4020 Fremont Ave N Seattle 98103","ylw_circle"
22 | 4006,1910,98102,"2548 Delmar Dr E Seattle 98102","orange_diamond"
23 | 2840,1911,98117,"8347 14th Ave NW Seattle 98117","orange_diamond"
24 | 4278,1913,98107,"5701 22nd Ave NW Seattle 98107","orange_diamond"
25 | 4428,1914,98107,"1702 nw 62nd St Seattle 98107","orange_diamond"
26 | 497,1108,98107,"2644 NW 60th St Seattle 98107","ltblu_square"
27 | 4073,2553,98115,"7802 Banner Way NE Seattle 98115","ylw_circle"
28 | 3703,1919,98116,"6425 SW Admiral Way Seattle 98116","orange_diamond"
29 | 303,1110,98199,"2614 24th Ave W Seattle 98199","ltblu_square"
30 | 1000342,0,98119,"3rd Ave W & W Prospect St Seattle 98119","donut"
31 | 400,1074,98144,"1902 13th Ave S Seattle 98144","ltblu_square"
32 | 3119,1923,98104,"1110 S Dearborn St Seattle 98104","orange_diamond"
33 | 4481,1075,98125,"5th Ave NE & NE 103rd St Seattle 98125","ltblu_square"
34 | 4028,0,98105,"5809 15th Ave NE Seattle 98105","donut"
35 | 440,1925,98118,"8650 55th Ave S Seattle 98118","orange_diamond"
36 | 4472,2543,98121,"1st to 5th Ave on Bell St Seattle 98121","ylw_circle"
37 | 4022,1182,98102,"Bellevue Ave E & Bellevue Pl E Seattle 98102","ltblu_square"
38 | 4415,1186,98121,"2512 Elliott Ave Seattle 98121","ltblu_square"
39 | 346,1290,98102,"703 Belmont Pl E Seattle 98102","ltblu_square"
40 | 447,1188,98126,"3600 SW Admiral Way Seattle 98126","ltblu_square"
41 | 475,1189,98105,"3659 42nd Ave NE Seattle 98105","ltblu_square"
42 | 436,1077,98118,"9320 38th Ave S Seattle 98118","ltblu_square"
43 | 4245,1190,98122,"1401 23rd Ave S Seattle 98122","ltblu_square"
44 | 253,1191,98107,"5420 22nd Ave NW Seattle 98107","ltblu_square"
45 | 304,1193,98109,"1215 5th Ave N Seattle 98109","ltblu_square"
46 | 288,1113,98133,"13035 Linden Ave N Seattle 98133","ltblu_square"
47 | 4450,1194,98133,"14201 Linden Ave N Seattle 98133","ltblu_square"
48 | 3907,1196,98119,"513 W Olympic Pl Seattle 98119","ltblu_square"
49 | 4418,1198,98144,"1520 26th Ave S Seattle 98144","ltblu_square"
50 | 238,1199,98117,"1851 NW Blue Ridge Dr Seattle 98117","ltblu_square"
51 | 239,1200,98117,"Radford Ave NW & NW Milford Way Seattle 98117","ltblu_square"
52 |
--------------------------------------------------------------------------------
/D3/exercise_3/exercise_3.js:
--------------------------------------------------------------------------------
1 | // set the dimensions and margins of the graph
2 | var outerWidth = 960;
3 | var outerHeight = 500;
4 |
5 | var margin = {top: 50, right: 20, bottom: 80, left: 80},
6 | width = outerWidth - margin.left - margin.right,
7 | height = outerHeight - margin.top - margin.bottom;
8 |
9 | // set the ranges
10 | var y= d3.scaleLinear()
11 | .range([height, 0]);
12 |
13 | var x = d3.scaleBand()
14 | .range([0, width])
15 | .padding(0.33);
16 |
17 | var xAxis = d3.axisTop(x)
18 | .ticks(5)
19 |
20 | var yAxis = d3.axisLeft(y)
21 | .tickFormat('')
22 |
23 | // append the svg object to the body of the page
24 | // append a 'group' element to 'svg'
25 | // moves the 'group' element to the top left margin
26 | var svg = d3.select('body').append('svg')
27 | .attr("class", "chart")
28 | .attr("width", outerWidth)
29 | .attr("height", outerHeight)
30 | .append("g")
31 | .attr("transform", `translate(${margin.left},${margin.top})`);
32 |
33 | // data
34 | var data = [{'team':'Boston','value':100},
35 | {'team':'Detroit','value':85},
36 | {'team':'New York','value':80},
37 | {'team':'Atlanta','value':75},
38 | {'team':'Chicago','value':30}]
39 |
40 |
41 | // scale the range of the data in the domains
42 | y.domain([0, d3.max(data, d => d.value)])
43 | x.domain(data.map(d => d.team));
44 |
45 |
46 |
47 | // append the rectangles for the bar chart
48 | var bar = svg.selectAll(".bar")
49 | .data(data)
50 | .join("g")
51 | .attr("class","bar")
52 |
53 |
54 | var rect = bar.append('rect')
55 | .attr("height", d => height - y(d.value))
56 | .attr("x", d => x(d.team))
57 | .attr("width", x.bandwidth())
58 | .attr("y", d => y(d.value))
59 | .style('fill', d => d3.interpolatePurples(d.value/100));
60 |
61 |
62 | // add the x Axis
63 | svg.append("g")
64 | .attr('class', 'xaxis')
65 | .attr("transform", `translate(0, ${height})`)
66 | .call(d3.axisBottom(x));
67 |
68 | // add the y Axis
69 | svg.append("g")
70 | .call(d3.axisLeft(y));
71 |
72 | // add chart labels
73 | labels = svg.append('g')
74 | .attr('class', 'label')
75 |
76 | // x label
77 | labels.append('text')
78 | .attr('transform', `translate(${width/2},450)`)
79 | .text('Teams')
80 |
81 | // y label
82 | ylabel = labels.append('text')
83 | .attr('transform', `translate(-45,${height/2}) rotate(-90)`)
84 | .text('Wins')
85 |
86 | barLabels = bar.append('text')
87 | .attr('class', 'barlabel')
88 | .attr('x', d => x(d.team) + (x.bandwidth()/2))
89 | .attr('y', d => y(d.value) - 15)
90 | .text(d => d.value)
91 | .style('fill', 'black')
92 |
93 |
94 | function updateAlpha() {
95 |
96 | x.domain((data.map(d => d.team)).sort());
97 |
98 | bar.selectAll('rect')
99 | .attr("x", d => x(d.team))
100 |
101 | svg.select(".xaxis")
102 | .call(d3.axisBottom(x));
103 |
104 |
105 | bar.selectAll('.barlabel')
106 | .attr('x', d => x(d.team) + (x.bandwidth()/2))
107 |
108 | }
109 |
110 |
111 | function updateNum() {
112 |
113 | data.sort((a,b) => d3.ascending(a.value, b.value))
114 |
115 | x.domain(data.map(d => d.team));
116 |
117 | bar.selectAll('rect')
118 | .attr("x", d => x(d.team))
119 |
120 | svg.select(".xaxis")
121 | .call(d3.axisBottom(x));
122 |
123 | bar.selectAll('.barlabel')
124 | .attr('x', d => x(d.team) + (x.bandwidth()/2))
125 |
126 | }
127 |
--------------------------------------------------------------------------------
/D3/exercise_2/solution/solution_2.js:
--------------------------------------------------------------------------------
1 | // set the dimensions and margins of the graph
2 | var outerWidth = 960;
3 | var outerHeight = 500;
4 |
5 | var margin = {top: 50, right: 20, bottom: 80, left: 80},
6 | width = outerWidth - margin.left - margin.right,
7 | height = outerHeight - margin.top - margin.bottom;
8 |
9 | // set the ranges
10 | var x = d3.scaleBand()
11 | .range([0, width])
12 | .padding(0.33);
13 |
14 | var y= d3.scaleLinear()
15 | .range([height, 0]);
16 |
17 |
18 | var xAxis = d3.axisTop(x)
19 | .ticks(5)
20 |
21 | var yAxis = d3.axisLeft(y)
22 | .tickFormat('')
23 |
24 | // append the svg object to the body of the page
25 | // append a 'group' element to 'svg'
26 | // moves the 'group' element to the top left margin
27 | var svg = d3.select('body').append('svg')
28 | .attr("class", "chart")
29 | .attr("width", outerWidth)
30 | .attr("height", outerHeight)
31 | .append("g")
32 | .attr("transform", `translate(${margin.left},${margin.top})`);
33 |
34 | // data
35 | var data = [{'team':'Boston','value':100},
36 | {'team':'Detroit','value':85},
37 | {'team':'New York','value':80},
38 | {'team':'Atlanta','value':75},
39 | {'team':'Chicago','value':30}]
40 |
41 |
42 | // scale the range of the data in the domains
43 | x.domain(data.map(d => d.team));
44 | y.domain([0, d3.max(data, d => d.value)])
45 |
46 |
47 | // append the rectangles for the bar chart
48 | var bar = svg.selectAll(".bar")
49 | .data(data)
50 | .join("g")
51 | .attr("class","bar")
52 |
53 |
54 | var rect = bar.append('rect')
55 | .attr("height", d => height - y(d.value))
56 | .attr("x", d => x(d.team))
57 | .attr("width", x.bandwidth())
58 | .attr("y", d => y(d.value))
59 | .style('fill', d => d3.interpolatePurples(d.value/100));
60 |
61 |
62 | // add the x Axis
63 | svg.append("g")
64 | .attr('class', 'xaxis')
65 | .attr("transform", `translate(0, ${height})`)
66 | .call(d3.axisBottom(x));
67 |
68 | // add the y Axis
69 | svg.append("g")
70 | .call(d3.axisLeft(y));
71 |
72 | // add chart labels
73 | labels = svg.append('g')
74 | .attr('class', 'label')
75 |
76 | // x label
77 | labels.append('text')
78 | .attr('transform', `translate(${width/2},450)`)
79 | .text('Teams')
80 |
81 | // y label
82 | ylabel = labels.append('text')
83 | .attr('transform', `translate(-45,${height/2}) rotate(-90)`)
84 | .text('Wins')
85 |
86 | barLabels = bar.append('text')
87 | .attr('class', 'barlabel')
88 | .attr('x', d => x(d.team) + (x.bandwidth()/2))
89 | .attr('y', d => y(d.value) - 15)
90 | .text(d => d.value)
91 | .style('fill', 'black')
92 |
93 |
94 | function updateAlpha() {
95 |
96 | x.domain((data.map(d => d.team)).sort());
97 |
98 | bar.selectAll('rect')
99 | .attr("x", d => x(d.team))
100 |
101 | svg.select(".xaxis")
102 | .call(d3.axisBottom(x));
103 |
104 |
105 | bar.selectAll('.barlabel')
106 | .attr('x', d => x(d.team) + (x.bandwidth()/2))
107 |
108 |
109 |
110 | }
111 |
112 | function updateNum() {
113 |
114 | data.sort((a,b) => d3.ascending(a.value, b.value))
115 |
116 | x.domain(data.map(d => d.team));
117 |
118 | bar.selectAll('rect')
119 | .attr("x", d => x(d.team))
120 |
121 | svg.select(".xaxis")
122 | .call(d3.axisBottom(x));
123 |
124 | bar.selectAll('.barlabel')
125 | .attr('x', d => x(d.team) + (x.bandwidth()/2))
126 |
127 | }
128 |
--------------------------------------------------------------------------------
/thematic-maps/snow_crashes.csv:
--------------------------------------------------------------------------------
1 | County January February March April May June July August September October November December Total
2 | Alcona 7 7 0 1 0 0 0 0 0 0 5 1 21
3 | Alger 8 17 5 0 0 0 0 0 0 2 4 1 37
4 | Allegan 206 169 6 0 0 0 0 0 0 1 30 17 429
5 | Alpena 16 20 1 0 0 0 0 0 0 1 9 5 52
6 | Antrim 30 24 10 0 0 0 0 0 0 3 5 3 75
7 | Arenac 11 5 1 3 0 0 0 0 0 0 7 3 30
8 | Baraga 4 4 3 1 0 0 0 0 0 0 3 7 22
9 | Barry 39 25 2 0 0 0 0 0 0 0 13 13 92
10 | Bay 83 41 24 0 1 0 1 0 0 0 22 16 188
11 | Benzie 11 26 3 0 0 0 0 0 0 0 9 1 50
12 | Berrien 282 432 33 0 0 0 0 0 0 0 62 41 850
13 | Branch 37 46 5 0 0 0 0 0 0 0 26 29 143
14 | Calhoun 195 106 7 0 0 0 0 0 0 0 54 74 436
15 | Cass 39 69 7 0 0 0 0 0 0 0 17 18 150
16 | Charlevoix 15 8 1 0 0 0 0 0 0 0 5 4 33
17 | Cheboygan 11 5 9 0 0 0 0 1 0 4 3 5 38
18 | Chippewa 47 27 21 1 0 0 0 0 0 0 7 6 109
19 | Clare 41 22 4 3 0 0 0 0 0 0 9 12 91
20 | Clinton 52 48 8 0 0 0 0 0 0 0 25 21 154
21 | Crawford 25 14 4 0 0 0 0 0 0 1 17 3 64
22 | Delta 12 18 8 2 0 0 0 0 0 0 6 2 48
23 | Dickinson 17 8 4 7 0 0 0 0 0 0 2 3 41
24 | Eaton 87 74 5 0 0 0 0 0 0 0 37 36 239
25 | Emmet 12 11 5 0 0 0 0 0 0 1 8 8 45
26 | Genesee 326 167 61 1 0 0 0 0 0 2 123 120 800
27 | Gladwin 8 9 1 0 0 0 0 0 0 0 8 1 27
28 | Gogebic 8 16 2 0 0 0 0 0 0 0 11 9 46
29 | Grand Traverse 139 201 18 3 0 0 0 0 0 0 33 39 433
30 | Gratiot 33 25 6 0 0 0 0 0 0 0 13 0 77
31 | Hillsdale 35 39 2 0 0 0 0 0 0 0 34 8 118
32 | Houghton 42 47 19 6 0 0 0 0 0 0 16 37 167
33 | Huron 32 25 9 2 0 0 0 0 0 0 11 1 80
34 | Ingham 241 143 32 2 0 0 0 0 1 0 100 84 603
35 | Ionia 62 54 6 1 0 0 0 0 0 0 12 9 144
36 | Iosco 11 8 2 1 0 0 0 0 0 0 7 10 39
37 | Iron 2 1 0 2 0 0 0 0 0 0 3 2 10
38 | Isabella 86 55 12 2 0 0 0 0 0 1 29 5 190
39 | Jackson 201 81 14 0 0 0 0 0 0 0 91 36 423
40 | Kalamazoo 350 261 12 2 0 0 0 0 0 2 59 131 817
41 | Kalkaska 36 12 1 0 0 0 0 0 0 2 6 3 60
42 | Kent 764 519 68 8 0 0 0 0 0 1 126 226 1,712
43 | Keweenaw 6 2 0 1 0 0 0 0 0 0 1 2 12
44 | Lake 11 8 2 0 0 0 0 0 0 0 5 7 33
45 | Lapeer 78 50 33 3 0 0 0 0 0 0 50 18 232
46 | Leelanau 14 18 4 0 0 0 0 0 0 0 8 6 50
47 | Lenawee 47 47 10 0 0 0 0 0 0 0 29 12 145
48 | Livingston 159 108 19 2 0 1 0 0 0 4 102 47 442
49 | Luce 6 5 6 0 0 0 0 0 0 0 5 5 27
50 | Mackinac 19 15 9 7 0 0 0 0 0 0 1 3 54
51 | Macomb 444 355 112 1 0 0 0 0 0 0 102 60 1,074
52 | Manistee 34 30 5 0 0 0 0 0 0 0 6 12 87
53 | Marquette 51 110 19 15 0 0 0 0 0 4 23 31 253
54 | Mason 58 62 10 0 0 0 0 0 0 0 7 4 141
55 | Mecosta 36 45 5 0 0 0 0 0 0 0 13 12 111
56 | Menominee 6 4 8 1 0 0 0 0 0 0 2 2 23
57 | Midland 56 22 12 2 0 0 0 0 0 1 15 8 116
58 | Missaukee 14 9 0 1 0 0 0 0 0 2 11 6 43
59 | Monroe 109 128 18 0 0 0 0 0 0 0 15 16 286
60 | Montcalm 50 35 3 2 0 0 0 0 0 0 19 8 117
61 | Montmorency 9 1 0 1 0 0 0 0 0 0 4 3 18
62 | Muskegon 249 287 13 0 0 0 0 0 0 0 23 7 579
63 | Newaygo 49 28 5 0 0 0 0 0 0 0 18 4 104
64 | Oakland 863 633 193 10 1 0 0 0 0 0 367 220 2,287
65 | Oceana 36 39 5 0 0 0 0 0 0 0 6 1 87
66 | Ogemaw 9 19 1 3 0 0 0 0 0 0 11 7 50
67 | Ontonagon 7 5 6 0 0 0 0 0 0 0 6 12 36
68 | Osceola 38 16 2 2 0 0 0 0 0 0 10 13 81
69 | Oscoda 8 3 1 0 0 0 0 0 0 0 7 1 20
70 | Otsego 55 12 9 4 0 0 0 0 0 0 25 9 114
71 | Ottawa 399 388 16 1 0 0 0 0 0 0 46 33 883
72 | Presque Isle 17 10 2 1 0 0 0 0 0 2 1 3 36
73 | Roscommon 14 8 4 5 0 0 0 0 0 1 11 7 50
74 | Saginaw 150 85 39 7 0 0 0 0 0 0 49 25 355
75 | St. Clair 93 65 36 3 0 0 0 0 0 1 23 23 244
76 | St. Joseph 39 45 4 0 0 0 0 0 0 0 20 13 121
77 | Sanilac 32 17 14 1 0 0 0 0 0 0 13 5 82
78 | Schoolcraft 13 11 0 2 0 0 0 0 0 1 1 1 29
79 | Shiawassee 40 41 4 0 0 0 0 0 0 0 26 12 123
80 | Tuscola 33 27 16 1 0 0 0 0 0 0 20 4 101
81 | Van Buren 134 155 4 0 0 0 0 0 0 0 33 31 357
82 | Washtenaw 308 233 42 1 0 0 1 0 0 0 123 25 733
83 | Wayne 1,143 877 182 2 1 0 1 0 0 1 217 131 2,555
84 | Wexford 29 55 13 5 0 0 0 0 0 0 12 13 127
--------------------------------------------------------------------------------
/D3/exercise_3/solution/solution_3.js:
--------------------------------------------------------------------------------
1 | // set the dimensions and margins of the graph
2 | var outerWidth = 960;
3 | var outerHeight = 500;
4 |
5 | var margin = {top: 50, right: 20, bottom: 80, left: 80},
6 | width = outerWidth - margin.left - margin.right,
7 | height = outerHeight - margin.top - margin.bottom;
8 |
9 | // set the ranges
10 | var y= d3.scaleLinear()
11 | .range([height, 0]);
12 |
13 | var x = d3.scaleBand()
14 | .range([0, width])
15 | .padding(0.33);
16 |
17 | var xAxis = d3.axisTop(x)
18 | .ticks(5)
19 |
20 | var yAxis = d3.axisLeft(y)
21 | .tickFormat('')
22 |
23 | // append the svg object to the body of the page
24 | // append a 'group' element to 'svg'
25 | // moves the 'group' element to the top left margin
26 | var svg = d3.select('body').append('svg')
27 | .attr("class", "chart")
28 | .attr("width", outerWidth)
29 | .attr("height", outerHeight)
30 | .append("g")
31 | .attr("transform", `translate(${margin.left},${margin.top})`);
32 |
33 | // data
34 | var data = [{'team':'Boston','value':100},
35 | {'team':'Detroit','value':85},
36 | {'team':'New York','value':80},
37 | {'team':'Atlanta','value':75},
38 | {'team':'Chicago','value':30}]
39 |
40 |
41 | // scale the range of the data in the domains
42 | y.domain([0, d3.max(data, d => d.value)])
43 | x.domain(data.map(d => d.team));
44 |
45 |
46 | // append the rectangles for the bar chart
47 | var bar = svg.selectAll(".bar")
48 | .data(data)
49 | .join("g")
50 | .attr("class","bar")
51 |
52 |
53 | var rect = bar.append('rect')
54 | .attr("height", d => height - y(d.value))
55 | .attr("x", d => x(d.team))
56 | .attr("width", x.bandwidth())
57 | .attr("y", d => y(d.value))
58 | .style('fill', d => d3.interpolatePurples(d.value/100));
59 |
60 |
61 | // add the x Axis
62 | svg.append("g")
63 | .attr('class', 'xaxis')
64 | .attr("transform", `translate(0, ${height})`)
65 | .call(d3.axisBottom(x));
66 |
67 | // add the y Axis
68 | svg.append("g")
69 | .call(d3.axisLeft(y));
70 |
71 | // add chart labels
72 | labels = svg.append('g')
73 | .attr('class', 'label')
74 |
75 | // x label
76 | labels.append('text')
77 | .attr('transform', `translate(${width/2},450)`)
78 | .text('Teams')
79 |
80 | // y label
81 | ylabel = labels.append('text')
82 | .attr('transform', `translate(-45,${height/2}) rotate(-90)`)
83 | .text('Wins')
84 |
85 | barLabels = bar.append('text')
86 | .attr('class', 'barlabel')
87 | .attr('x', d => x(d.team) + (x.bandwidth()/2))
88 | .attr('y', d => y(d.value) - 15)
89 | .text(d => d.value)
90 | .style('fill', 'black')
91 |
92 |
93 | function updateAlpha() {
94 | const T = 500
95 |
96 | x.domain((data.map(d => d.team)).sort());
97 |
98 | bar.selectAll('rect')
99 | .transition().duration(T)
100 | .attr("x", d => x(d.team))
101 |
102 | svg.select(".xaxis")
103 | .transition().duration(T)
104 | .call(d3.axisBottom(x))
105 |
106 | bar.selectAll('.barlabel')
107 | .transition().duration(T)
108 | .attr('x', d => x(d.team) + (x.bandwidth()/2))
109 |
110 |
111 | }
112 |
113 | function updateNum() {
114 | const T = 500
115 |
116 | data.sort((a,b) => d3.ascending(a.value, b.value));
117 |
118 | x.domain(data.map(d => d.team));
119 |
120 | bar.selectAll('rect')
121 | .transition().duration(T)
122 | .attr("x", d => x(d.team))
123 |
124 | svg.select(".xaxis")
125 | .transition().duration(T)
126 | .call(d3.axisBottom(x))
127 |
128 | bar.selectAll('.barlabel')
129 | .transition().duration(T)
130 | .attr('x', d => x(d.team) + (x.bandwidth()/2))
131 |
132 | }
133 |
134 |
--------------------------------------------------------------------------------
/D3/sortable.js:
--------------------------------------------------------------------------------
1 | async function createChart() {
2 |
3 | // read data
4 | const fileLocation = 'https://gist.githubusercontent.com/caocscar/8cdb75721ea4f6c8a032a00ebc73516c/raw/854bbee2faffb4f6947b6b6c2424b18ca5a8970e/mlb2018.csv'
5 | DATA = await d3.csv(fileLocation, type)
6 | let chartDate = new Date(2018,3,3)
7 | let data = filterData(chartDate)
8 |
9 | // margins
10 | let margin = {top: 80, right: 90, bottom: 30+50, left: 120},
11 | width = 900 - margin.left - margin.right,
12 | height = 1500 - margin.top - margin.bottom; // 760
13 |
14 | // svg setup
15 | let svg = d3.select('body').append('svg')
16 | .attr("class", "chart")
17 | .attr("width", width + margin.left + margin.right)
18 | .attr("height", height + margin.top + margin.bottom)
19 | .append("g")
20 | .attr("transform", `translate(${margin.left},${margin.top})`);
21 |
22 | // set up scales
23 | let y = d3.scaleBand()
24 | .domain(data.map(d => d.team).reverse())
25 | .range([height, 0])
26 | .padding(0.33)
27 |
28 | let x = d3.scaleLinear()
29 | .domain([0, Math.ceil(d3.max(data, d => d.value)/5)*5])
30 | .range([0, width]);
31 |
32 | // add axes
33 | let xAxis = d3.axisTop(x)
34 | .ticks(6)
35 |
36 | svg.append("g")
37 | .attr("class", "x axis")
38 | .call(xAxis);
39 |
40 | let yAxis = d3.axisLeft(y)
41 | .tickFormat('')
42 |
43 | svg.append("g")
44 | .attr("class", "y axis")
45 | .call(yAxis);
46 |
47 | // add the x-axis gridlines
48 | let gridlines = d3.axisTop(x)
49 | .ticks(6)
50 | .tickSize(-height)
51 | .tickFormat("")
52 |
53 | svg.append("g")
54 | .attr("class", "grid")
55 | .call(gridlines)
56 |
57 | // set up bar groups
58 | let bar = svg.selectAll(".bar")
59 | .data(data)
60 | .join("g")
61 | .attr("class", "bar")
62 | .attr("transform", d => `translate(0,${y(d.team)})`)
63 |
64 | // adding bars
65 | let rects = bar.append('rect')
66 | .attr("width", (d,i) => x(d.value))
67 | .attr("height", y.bandwidth())
68 | .style('fill', d => d3.interpolateRdYlBu(d.value/100))
69 |
70 | // team labels
71 | bar.append('text')
72 | .attr('class', 'team')
73 | .attr('x', -10)
74 | .attr('y', y.bandwidth()/2 + 5)
75 | .text(d => d.team)
76 |
77 | // team logos
78 | const imgsize = 40
79 | let imgs = bar.append("svg:image")
80 | .attr('class', 'logo')
81 | .attr('x', d => x(d.value) + 5)
82 | .attr('y', -5)
83 | .attr('width', imgsize)
84 | .attr('height', imgsize)
85 | .attr("xlink:href", d => `http://www.capsinfo.com/images/MLB_Team_Logos/${urls[d.team]}.png`)
86 |
87 | // bar labels
88 | let barLabels = bar.append('text')
89 | .attr('class', 'barlabel')
90 | .attr('x', d => x(d.value) + 10 + imgsize)
91 | .attr('y', y.bandwidth()/2 + 5)
92 | .text(d => d.value)
93 |
94 | // other chart labels
95 | labels = svg.append('g')
96 | .attr('class', 'label')
97 |
98 | // x label
99 | labels.append('text')
100 | .attr('transform', `translate(${width},-40)`)
101 | .text('Wins')
102 |
103 | // y label
104 | ylabel = labels.append('text')
105 | .attr('transform', `translate(-80,${height/2}) rotate(-90)`) // order matters
106 | .text('Teams')
107 |
108 | // date label
109 | const formatDate = d3.timeFormat('%b %-d')
110 | let dateLabel = labels.append('text')
111 | .attr('id', 'date')
112 | .attr('transform', 'translate(0,-40)')
113 | .text(formatDate(chartDate))
114 |
115 | labels.append('text')
116 | .attr('id', 'season')
117 | .attr('transform', `translate(${width/2},-40)`)
118 | .text('MLB 2018 Season')
119 |
120 | // clipping rectangle
121 | const z = 0.97*(height / data.length)
122 | d3.select('.chart').append("defs")
123 | .append("clipPath")
124 | .attr("id", "clip")
125 | .append("rect")
126 | .attr('x', 0)
127 | .attr('y', 0)
128 | .attr("width", width + margin.left + margin.right)
129 | .attr("height", 0.4*height)
130 |
131 | // sorting transition
132 | const T = 300
133 | let dailyUpdate = setInterval(function() {
134 |
135 | chartDate = d3.timeDay.offset(chartDate,1)
136 | dateLabel.text(formatDate(chartDate))
137 | data = filterData(chartDate)
138 |
139 | // update x-axis
140 | x.domain([0, Math.ceil(d3.max(data, d => d.value)/5)*5]);
141 | svg.select('.x.axis').transition().duration(T)
142 | .call(xAxis);
143 | svg.select('.grid').transition().duration(T)
144 | .call(gridlines);
145 |
146 | // update bar chart
147 | rects.data(data)
148 | .transition().duration(T)
149 | .attr("width", d => x(d.value))
150 | .style('fill', d => d3.interpolateRdYlBu(d.value/100))
151 | imgs.data(data)
152 | .transition().duration(T)
153 | .attr('x', d => x(d.value) + 5)
154 | barLabels.data(data)
155 | .transition().duration(T)
156 | .attr('x', d => x(d.value) + 10 + imgsize)
157 | .text(d => d.value)
158 |
159 | // sort data
160 | data.sort((a,b) => d3.descending(a.value,b.value));
161 |
162 | // update y-axis
163 | y.domain(data.map(d => d.team).reverse());
164 | bar.transition().duration(T)
165 | .attr("transform", d => `translate(0,${y(d.team)})`)
166 |
167 | // exit function
168 | if (chartDate > new Date(2018,9,1)) {
169 | clearInterval(dailyUpdate)
170 | }
171 |
172 | }, T);
173 |
174 | }
175 |
176 | function type(d) {
177 | const formatDate = d3.timeParse('%Y%m%d')
178 | d.date = formatDate(d.date)
179 | return d
180 | }
181 |
182 | function filterData(chartDate) {
183 | const snapshot = DATA.filter(d => d.date <= chartDate)
184 | const wins = d3.rollup(snapshot, v => v.length, d => d.team) // returns Map object
185 | return Array.from(wins, ([key, value]) => ({'team':key, 'value':value}))
186 | }
--------------------------------------------------------------------------------
/sql-intermediate/README.md:
--------------------------------------------------------------------------------
1 | # Intermediate SQL
2 |
3 | Here is the [Google Slide Deck](https://docs.google.com/presentation/d/1sx7FL58BHbzPWb59Tq1S38QBL1KjNEjse3IyqK4nohY/edit?usp=sharing) for the workshop.
4 |
5 | Link to web-based database [db-fiddle](https://www.db-fiddle.com) for practicing SQL.
6 |
7 | Link to the [Covid dataset](https://gist.github.com/caocscar/b9a1418e5fd9c2cd69bb6f9d67fbc05a) for the exercises.
8 |
9 |
10 | ## Workshop Material
11 | Query Syntax Covered:
12 | - IF
13 | - CASE
14 | - WHEN
15 | - ROLLUP
16 | - GROUPING
17 | - REPLACE
18 | - OVER (Window Functions)
19 | - RANK
20 | - DENSE_RANK
21 | - WINDOW
22 | - PARTITION BY
23 | - WITH (Common Table Expressions)
24 |
25 | Schema Syntax Covered:
26 | - CREATE TABLE
27 | - INSERT
28 | - DELETE
29 | - DROP
30 | - IF [NOT] EXISTS
31 | - NOT NULL
32 | - PRIMARY KEY
33 | - AUTO_INCREMENT
34 | - SHOW COLUMNS
35 | - INSERT IGNORE INTO
36 | - UNIQUE
37 | - ALTER TABLE
38 | - ADD COLUMN
39 | - DROP COLUMN
40 | - MODIFY COLUMN
41 | - UPDATE
42 | - INDEX
43 |
44 | Miscellaneous Syntax:
45 | - SHOW COLUMNS
46 | - DESCRIBE
47 | - SHOW TABLES
48 | - SHOW INDEX
49 |
50 | ## Appendix
51 |
52 | Solutions Hiding Here
53 |
54 | #### Practice 1
55 | ```SQL
56 | SELECT County, Day, Deaths,
57 | CASE
58 | WHEN Deaths = 0 THEN -1
59 | WHEN Deaths = 1 THEN 0
60 | ELSE LOG(Deaths)
61 | END AS deathIndex
62 | FROM Covid
63 | ORDER BY deathIndex DESC
64 | ```
65 |
66 | #### Practice 2
67 | ```SQL
68 | SELECT IF(GROUPING(County), 'Total', County) as County,
69 | SUM(Deaths) AS Total
70 | FROM Covid
71 | GROUP BY County WITH ROLLUP
72 | ```
73 |
74 | #### Practice 2b
75 | ```SQL
76 | SELECT
77 | IF(GROUPING(County),'Michigan Total', IF(GROUPING(CP), 'County Total', County)) AS COUNTY,
78 | SUM(Deaths) AS DeathTotal,
79 | CP
80 | FROM Covid
81 | GROUP BY County, CP WITH ROLLUP
82 | ```
83 |
84 | #### Practice 3
85 | ```SQL
86 | SELECT REPLACE(County, "St", "Saint") AS County,
87 | Day,
88 | Cases,
89 | RANK() OVER (PARTITION BY Day ORDER BY Cases DESC) AS 'Rank'
90 | FROM Covid
91 | WHERE Day BETWEEN '2020-09-24' AND '2020-09-30'
92 | AND County LIKE 'S%'
93 | AND CP = 'Confirmed'
94 | ```
95 |
96 | #### Practice 3b
97 | ```SQL
98 | SELECT County, Day, Cases,
99 | LAG(Cases, 7) OVER (ORDER BY Day) As 'WeekAgo'
100 | FROM Covid
101 | WHERE County = 'Wayne' AND CP = 'Confirmed'
102 | ORDER BY Day DESC
103 | ```
104 |
105 | #### Practice 4
106 | ```SQL
107 | WITH cte AS
108 | (
109 | SELECT Day,
110 | WEEK(Day) AS Week,
111 | CP,
112 | SUM(Cases) as Total
113 | FROM Covid
114 | GROUP BY Day, CP
115 | )
116 |
117 | SELECT Week, MAX(Total)
118 | FROM cte
119 | GROUP BY Week
120 | ```
121 |
122 | #### Practice A
123 | ```SQL
124 | CREATE TABLE Michigan (
125 | Category VARCHAR(6),
126 | Value VARCHAR(7),
127 | `Cases` INTEGER,
128 | `Deaths` INTEGER,
129 | `CaseFatalityRatio` FLOAT
130 | );
131 |
132 | INSERT INTO Michigan
133 | (Category, `Value`, Cases, `Deaths`, `CaseFatalityRatio`)
134 | VALUES
135 | ('Gender', 'Female', '61390', '3212', '0.051'),
136 | ('Gender', 'Male', '57956', '3511', '0.061'),
137 | ('Gender', 'Unknown', '281', null, null);
138 | ```
139 |
140 | #### Practice B
141 | ```SQL
142 | CREATE TABLE MI (
143 | ID INT AUTO_INCREMENT,
144 | `Day` VARCHAR(3),
145 | `Category` VARCHAR(9),
146 | `Value` VARCHAR(19) NOT NULL,
147 | `Pct of Cases` FLOAT,
148 | `Pct of Deaths` FLOAT,
149 | PRIMARY KEY (ID)
150 | );
151 |
152 | INSERT INTO MI
153 | (`Day`, `Category`, `Value`, `Pct of Cases`, `Pct of Deaths`)
154 | VALUES
155 | ('Sat', 'Ethnicity', 'Hispanic/Latino', '0.08', '0.03'),
156 | ('Sat', 'Ethnicity', 'Non-Hispanic Latino', '0.69', '0.85'),
157 | ('Sat', 'Ethnicity', 'Unknown', '0.23', '0.12');
158 | ```
159 |
160 | #### Practice B2
161 | ```SQL
162 | INSERT INTO MI
163 | (Day, Value)
164 | VALUES
165 | ('Sun', null);
166 |
167 | INSERT INTO MI
168 | (ID, Day, Value)
169 | VALUES
170 | (3, 'Sun', 'Unknown');
171 | ```
172 |
173 | #### Practice C
174 | ```SQL
175 | CREATE TABLE mi (
176 | `Category` VARCHAR(3),
177 | `Value` VARCHAR(8) UNIQUE,
178 | `Cases` INTEGER,
179 | `Deaths` INTEGER DEFAULT 0,
180 | `CaseFatalityRatio` FLOAT DEFAULT 0
181 | );
182 |
183 | INSERT INTO mi
184 | (`Category`, `Value`, `Cases`)
185 | VALUES
186 | ('Age', '0 to 19', '13342'),
187 | ('Age', 'Unknown', '109');
188 |
189 | INSERT INTO mi
190 | VALUES
191 | ('Age', '20 to 29', '23038', '29', '0.001'),
192 | ('Age', '30 to 39', '16858', '71', '0.004'),
193 | ('Age', '40 to 49', '17345', '219', '0.013'),
194 | ('Age', '50 to 59', '18393', '541', '0.029'),
195 | ('Age', '60 to 69', '14656', '1188', '0.081'),
196 | ('Age', '70 to 79', '9374', '1808', '0.193'),
197 | ('Age', '80+', '8312', '2864', '0.345');
198 | ```
199 |
200 | #### Practice D
201 | ```SQL
202 | -- Schema SQL window
203 | CREATE TABLE mi (
204 | `Category` VARCHAR(3),
205 | `Value` VARCHAR(8),
206 | `Cases` INTEGER,
207 | `Deaths` INTEGER,
208 | `CaseFatalityRatio` FLOAT
209 | );
210 |
211 | -- Query SQL window
212 | ALTER TABLE mi
213 | ADD COLUMN day VARCHAR(10);
214 |
215 | ALTER TABLE mi
216 | DROP COLUMN Category,
217 | DROP COLUMN CaseFatalityRatio;
218 |
219 | ALTER TABLE mi
220 | MODIFY COLUMN Cases VARCHAR(6);
221 |
222 | DESCRIBE mi;
223 | ```
224 |
225 | #### Practice E
226 | ```SQL
227 | -- Schema SQL window
228 | CREATE TABLE mi (
229 | `Category` VARCHAR(3),
230 | `Value` VARCHAR(8),
231 | `Cases` INTEGER,
232 | `Deaths` INTEGER,
233 | `CaseFatalityRatio` FLOAT,
234 | INDEX(Cases)
235 | );
236 |
237 | INSERT INTO mi
238 | (`Category`, `Value`, `Cases`)
239 | VALUES
240 | ('Age', '0 to 19', '13342'),
241 | ('Age', 'Unknown', '109');
242 |
243 | INSERT INTO mi
244 | VALUES
245 | ('Age', '20 to 29', '23038', '29', '0.001'),
246 | ('Age', '30 to 39', '16858', '71', '0.004'),
247 | ('Age', '40 to 49', '17345', '219', '0.013'),
248 | ('Age', '50 to 59', '18393', '541', '0.029'),
249 | ('Age', '60 to 69', '14656', '1188', '0.081'),
250 | ('Age', '70 to 79', '9374', '1808', '0.193'),
251 | ('Age', '80+', '8312', '2864', '0.345');
252 |
253 | UPDATE mi
254 | SET Cases = 1400
255 | WHERE Deaths IS NULL;
256 |
257 | UPDATE mi
258 | SET Deaths = 5, CaseFatalityRatio = 5
259 | WHERE Deaths IS NULL;
260 |
261 | -- Query SQL window
262 | SELECT * FROM mi;
263 |
264 | DESCRIBE mi;
265 | SHOW INDEX FROM mi; -- Alternatively
266 | ```
267 |
268 |
--------------------------------------------------------------------------------
/thematic-maps/snowmobile_crashes.txt:
--------------------------------------------------------------------------------
1 | Crash Instance Worst Injury in Crash Crash Longitude Crash Latitude Crash Report
2 | 2015100675 B - nonincapacitating injury -85.800058172968 46.18084431116 UD-10
3 | 201510428 B - nonincapacitating injury -84.789822248845 45.440443992449 UD-10
4 | 2015104495 C - possible injury -86.740535569118 46.321992916472 UD-10
5 | 2015104570 No injury -85.860376433662 42.738967409964 UD-10
6 | 2015105193 A - incapacitating injury -85.836188702043 44.772364145356 UD-10
7 | 2015106068 A - incapacitating injury -85.483634043104 41.875491838558 UD-10
8 | 2015106317 C - possible injury -85.703826610636 46.309510725084 UD-10
9 | 201511928 B - nonincapacitating injury -85.72090105647 44.425194108759 UD-10
10 | 201511992 B - nonincapacitating injury -85.403925590975 43.865777605951 UD-10
11 | 201512223 A - incapacitating injury -85.849874553891 43.267780456756 UD-10
12 | 201512836 No injury -84.164175021592 46.363569085717 UD-10
13 | 201513939 C - possible injury -85.918523915019 44.5339711822 UD-10
14 | 201517334 C - possible injury -89.012113811027 46.556852689282 UD-10
15 | 201517688 B - nonincapacitating injury -83.432389693911 43.628838813508 UD-10
16 | 201517877 C - possible injury -85.618124874516 43.58429598679 UD-10
17 | 201518127 A - incapacitating injury -84.551456482753 46.360820025168 UD-10
18 | 201518864 C - possible injury -83.360621330608 42.627193483883 UD-10
19 | 201520780 No injury -85.511744264408 44.67990727913 UD-10
20 | 201522294 B - nonincapacitating injury -84.970398918174 44.767687018835 UD-10
21 | 201523414 No injury -82.621439618102 42.933385579213 UD-10
22 | 201526594 C - possible injury -88.533675091365 46.131239666743 UD-10
23 | 201526708 A - incapacitating injury -83.36768691342 43.505122056952 UD-10
24 | 201526895 B - nonincapacitating injury -85.039215942012 42.770683368895 UD-10
25 | 201527866 A - incapacitating injury -85.493314923755 44.014436135557 UD-10
26 | 201528011 A - incapacitating injury -82.63914987732 42.67631835496 UD-10
27 | 201530093 No injury -85.790072380071 43.293003810025 UD-10
28 | 201532310 A - incapacitating injury -84.484066105388 45.648368506774 UD-10
29 | 201533350 No injury -82.995564714258 42.699455054772 UD-10
30 | 201534571 C - possible injury -84.673468724015 45.139137069932 UD-10
31 | 201534662 A - incapacitating injury -84.827226280307 44.209248111641 UD-10
32 | 201535067 B - nonincapacitating injury -85.692815232846 43.439001714006 UD-10
33 | 201535324 B - nonincapacitating injury -83.661044655626 43.235162164867 UD-10
34 | 201538347 No injury -86.096055097901 44.628892241637 UD-10
35 | 201539281 C - possible injury -88.002457730606 46.250241819486 UD-10
36 | 201539615 No injury -84.374993926826 44.79095528009 UD-10
37 | 201539712 No injury -85.942129992661 42.382238641889 UD-10
38 | 201540144 C - possible injury -85.973182443957 41.983623401445 UD-10
39 | 201541015 No injury -83.525079129041 44.793058607159 UD-10
40 | 201541016 C - possible injury -83.710905733952 44.696924236573 UD-10
41 | 201541104 B - nonincapacitating injury -85.009424739563 44.64090447431 UD-10
42 | 201541854 B - nonincapacitating injury -84.322411066044 45.387324215226 UD-10
43 | 201542293 A - incapacitating injury -85.951591969289 44.238197231663 UD-10
44 | 201545186 Fatal -86.494607186789 41.950675232664 UD-10
45 | 201545288 A - incapacitating injury -85.306164346805 44.109657752931 UD-10
46 | 201545784 A - incapacitating injury -84.620777432775 45.847477649052 UD-10
47 | 201545792 C - possible injury -84.848263534832 45.017716815188 UD-10
48 | 201545795 No injury -84.702912985489 45.018060357971 UD-10
49 | 201546845 A - incapacitating injury -84.759310704179 42.853609729043 UD-10
50 | 20154704 B - nonincapacitating injury -83.481240471033 44.283673517502 UD-10
51 | 201547337 C - possible injury -85.918375795088 44.223028999258 UD-10
52 | 201547361 C - possible injury -85.236701016576 44.374961333914 UD-10
53 | 201547711 No injury -84.922743136885 44.917229501134 UD-10
54 | 201547712 No injury -84.922743136885 44.917229501134 UD-10
55 | 201547817 B - nonincapacitating injury -85.739898404349 44.352626732308 UD-10
56 | 201547819 C - possible injury -85.443656094647 44.223078164062 UD-10
57 | 201547844 B - nonincapacitating injury -84.935026772683 45.045374974985 UD-10
58 | 201548089 A - incapacitating injury -84.381270106224 43.092019957408 UD-10
59 | 201550275 C - possible injury -85.595211453211 46.514654245914 UD-10
60 | 201551026 C - possible injury -87.974715203904 47.389606520347 UD-10
61 | 201551511 No injury -88.50942705195 47.170557499475 UD-10
62 | 201552074 C - possible injury -85.003918064235 45.012957328429 UD-10
63 | 201552075 B - nonincapacitating injury -84.897606029151 45.05936146032 UD-10
64 | 201552150 No injury -84.69118329622 44.99026939293 UD-10
65 | 201552194 A - incapacitating injury -84.874932044759 44.9206778492 UD-10
66 | 201553269 B - nonincapacitating injury -84.770386527415 44.68163592871 UD-10
67 | 201553320 No injury -84.625725808129 45.852029827979 UD-10
68 | 201553435 No injury -84.74444312737 44.684979251624 UD-10
69 | 201554164 A - incapacitating injury -85.4962039299 44.230400028562 UD-10
70 | 201559413 C - possible injury -85.493362139309 43.636153579887 UD-10
71 | 20156263 No injury -84.761345269475 45.51029059808 UD-10
72 | 20156721 No injury -85.852005942417 41.852770907367 UD-10
73 | 201568314 No injury -83.858746163969 42.808999449586 UD-10
74 | 201568330 A - incapacitating injury -84.441878016653 45.149464470839 UD-10
75 | 201569422 No injury -84.947995213508 46.343717610558 UD-10
76 | 20157359 C - possible injury -84.016825650758 43.292010680844 UD-10
77 | 20157453 No injury -85.534166957762 42.575186852924 UD-10
78 | 201574886 Fatal -85.936715440967 44.807284068035 UD-10
79 | 201575072 No injury -82.630236491114 42.976273592823 UD-10
80 | 201576009 C - possible injury -83.931191320393 42.635436115333 UD-10
81 | 2015801 No injury -85.105955942282 46.752426014342 UD-10
82 | 201589723 A - incapacitating injury -85.696619536659 44.12359535407 UD-10
83 | 201589732 No injury -85.842420683732 44.139639179498 UD-10
84 | 201595228 A - incapacitating injury -85.404277569817 46.139802071388 UD-10
85 | 2015106335 A - incapacitating injury -84.947996508111 46.345976028008 UD-10
86 | 2015106339 No injury -84.604978501798 46.425548352807 UD-10
87 | 2015106353 B - nonincapacitating injury -84.32223738124 46.456877270562 UD-10
88 | 2015106490 No injury -84.918080727658 43.928749506565 UD-10
89 | 2015106713 No injury -88.294464019266 46.097688013004 UD-10
90 | 2015107643 A - incapacitating injury -89.908625169185 46.498840237387 UD-10
91 | 2015107658 No injury -89.925542849832 46.481548610279 UD-10
92 | 2015107831 A - incapacitating injury -85.373505567305 44.613005050686 UD-10
93 | 2015108064 C - possible injury -84.735796271247 43.364629423323 UD-10
94 | 2015108242 A - incapacitating injury -88.183440163537 47.37799512391 UD-10
95 | 2015108290 C - possible injury -88.805348469807 46.928776615496 UD-10
96 | 2015109219 B - nonincapacitating injury -85.009024201918 43.83084235905 UD-10
97 | 2015109388 No injury -84.374498680557 42.395729350069 UD-10
98 | 2015110669 No injury -86.144147390131 43.260363792897 UD-10
99 | 2015112610 No injury -83.410101202069 43.157321985017 UD-10
100 | 2015112789 B - nonincapacitating injury -84.286036393895 41.966878333666 UD-10
101 | 2015115235 No injury -86.014850998275 46.657393323565 UD-10
102 | 2015115236 No injury -86.552985349844 46.419702140502 UD-10
103 | 2015115353 A - incapacitating injury -87.682822892849 46.498519598707 UD-10
104 | 2015129758 B - nonincapacitating injury -85.427315822355 44.651533921877 UD-10
105 | 2015150184 No injury -86.737587061959 46.329205017028 UD-10
106 | 2015162719 No injury -83.615266056207 45.177822218761 UD-10
107 | 2015183045 No injury -83.198361994143 42.392714724347 UD-10
108 | 2015183062 No injury -83.202282094243 42.340796094387 UD-10
109 | 2015262225 B - nonincapacitating injury -82.557578993695 42.948125965502 UD-10
110 | 2015264644 B - nonincapacitating injury -85.771788811445 44.647472314438 UD-10
111 | 2015272401 No injury -83.102913294015 42.296970881048 UD-10
112 | 2015300686 C - possible injury -88.78415655252 46.969457635212 UD-10
--------------------------------------------------------------------------------
/regex/data/vins.txt:
--------------------------------------------------------------------------------
1 | 1FDXK84N9FVA40100
2 | 1G1AD5F56A7186931
3 | 2C3CCAAG3EH301682
4 | 2FMDK48C08BA77023
5 | 3GCPCSE08BG284714
6 | 3N1AB7AP8DL787925
7 | 4A3AA46L9XE004326
8 | 4S3BE645527203527
9 | 5J6RM4H38EL039758
10 | 5TFDV58128X062869
11 | JH4CU2F67EC004754
12 | JS2RD62H865350625
13 | KL8CB6S95EC465118
14 | KMHCT5AE1EU146656
15 | SAJGX2747VC015715
16 | SALAK2D40BA576362
17 | WMWZB3C59CWM05007
18 | WP0AB2A76BL061330
19 | YH4K14AA0CA001707
20 | YV4902DZ1D2400099
21 | UNKNOWN
22 | UNK
23 | UNK
24 | UNKNOWN
25 | UU
26 | """UNK"""
27 | (UNKNOWN)
28 | *****UNKNOWN****
29 | 0
30 | 0000
31 | 0000000000
32 | 00000000000
33 | 000000000000
34 | 0000000000000
35 | 00000000000000
36 | 000000000000000
37 | 0000000000000000
38 | 00000000000000000
39 | 000000000000000000
40 | 0000000000000000000
41 | 00000000000000000000
42 | 000000000000000000000
43 | 0000000000000000000000
44 | 00000000000000000000000
45 | 000000000000000000000000
46 | 0000000000000000000000000
47 | 000000000000000000778
48 | 00000000000000001
49 | 00000000000000003
50 | 00000000000000005
51 | 0000000000000000A
52 | 0000000000000000M
53 | 00000000000000012
54 | 000000000000000AA
55 | 000000000000000PP
56 | 000000000000000XX
57 | 00000000000000UNK
58 | 00000000000000VIN
59 | 00000000000001748
60 | 00000000000002172
61 | 00000000000008038
62 | 00000000000008309
63 | 0000000000000JBS2
64 | 0000000000000NONE
65 | 00000000000010979
66 | 00000000000012129
67 | 00000000000013433
68 | 00000000000013537
69 | 0000000000001996F
70 | 00000000000022817
71 | 00000000000023341
72 | 00000000000030026
73 | 00000000000030119
74 | 00000000000030778
75 | 00000000000037060
76 | 00000000000041673
77 | 00000000000042165
78 | 00000000000048043
79 | 00000000000053013
80 | 00000000000054135
81 | 00000000000054495
82 | 00000000000055585
83 | 00000000000060096
84 | 00000000000061301
85 | 00000000000061685
86 | 00000000000067828
87 | 00000000000070704
88 | 00000000000075859
89 | 00000000000080253
90 | 0000000000008267B
91 | 00000000000083889
92 | 00000000000089047
93 | 00000000000089177
94 | 00000000000091442
95 | 000000000000B7117
96 | 00000000000120391
97 | 00000000000131398
98 | 00000000000132795
99 | 00000000000135157
100 | 0000000000014438R
101 | 00000000000145136
102 | 00000000000157643
103 | 00000000000160083
104 | 00000000000161438
105 | 00000000000212146
106 | 00000000000214027
107 | 00000000000215886
108 | 00000000000216263
109 | 00000000000236360
110 | 00000000000240057
111 | 00000000000261261
112 | 00000000000266392
113 | 00000000000266416
114 | 00000000000270141
115 | 00000000000310406
116 | 00000000000326687
117 | 00000000000330392
118 | 00000000000332045
119 | 00000000000336748
120 | 00000000000406812
121 | 00000000000408107
122 | 00000000000441256
123 | 00000000000461011
124 | 0000000000046824B
125 | 00000000000502513
126 | 0000000000069105R
127 | 0000000000078010B
128 | 00000000000808791
129 | 00000000000813695
130 | 00000000000819921
131 | 00000000000824779
132 | 0000000000086309B
133 | 00000000000881673
134 | 0000000000097686B
135 | 00000000000A25840
136 | 00000000000A71011
137 | 00000000000AAAAAA
138 | 00000000000B72851
139 | 00000000000DW4121
140 | 00000000000E06246
141 | 00000000000F46117
142 | 00000000000J27248
143 | 00000000000K40121
144 | 00000000000KLF220
145 | 00000000000NL1G9S
146 | 0000000000108126B
147 | 00000000001091643
148 | 00000000001221553
149 | 00000000001424201
150 | 0000000000172385B
151 | 0000000000175629B
152 | 00000000001828867
153 | 00000000002210345
154 | 00000000002308190
155 | 0000000000231332
156 | 00000000002438360
157 | 00000000002467113
158 | 0000000000253213M
159 | 00000000002625567
160 | 00000000002707600
161 | 00000000002751887
162 | 00000000002767946
163 | 00000000003102348
164 | 0000000000369842M
165 | 00000000003D20077
166 | 000000000054321NK
167 | 00000000006418561
168 | 00000000007043523
169 | 00000000008211412
170 | 00000000008218428
171 | 00000000008218469
172 | 00000000009203322
173 | 00000000009203327
174 | 00000000009M18007
175 | 0000000000B70013M
176 | 0000000000BB35178
177 | 0000000000BB95807
178 | 0000000000BC72673
179 | 0000000000BD48041
180 | 0000000000C672629
181 | 0000000000C694106
182 | 0000000000D8556RX
183 | 0000000000F200238
184 | 0000000000F943733
185 | 0000000000HL11240
186 | 0000000000J503014
187 | 0000000000KY33931
188 | 0000000000M130078
189 | 0000000000N50053M
190 | 0000000000OOOOOOO
191 | 0000000000S323158
192 | 0000000000UNKNOWN
193 | 0000001E161B50184
194 | 00000544A02800430
195 | 0000UNKNOWN
196 | 000UNKOWN00000000
197 | 000XXX00000000000
198 | 01010101010101010
199 | 02112250000000000
200 | 02264130000000000
201 | 02292660000000000
202 | 02380690000000000
203 | 02516
204 | 02733800000000000
205 | 03183380000000000
206 | 05119E1994XXXXXXX
207 | 05181134JIANGDONG
208 | 085544B0000000000
209 | 09999999999999999
210 | 0XXXXXXXXXXXX
211 | 100000000000
212 | 10000000000000000
213 | 1000000000000000000
214 | 100000000000000000000
215 | 10101010101010101
216 | 11110000000000000
217 | 11111110000000000
218 | 1111111111111111
219 | 11111111111111111
220 | 1111111111111111111
221 | 11111111111111234
222 | 12121212121212121
223 | 12250000000000000
224 | 12332112232123454
225 | 12340000000000000
226 | 12345000000000000
227 | 12345678900987654
228 | 12345678901234567
229 | 123456789012345678
230 | 12345678909876543
231 | 12345678910111211
232 | 1234567891013333
233 | 12345678911234567
234 | 12345678912345678
235 | 123456789AAAAAAAA
236 | 123EWQ321QWE321QW
237 | 13686000000000000
238 | 18142700000000000
239 | 1850460000000000
240 | 19000000000000000
241 | 19999999999999999
242 | 1F000000000000000
243 | 1G999999999999997
244 | 1UNKNOWN
245 | 20055370000000000
246 | 20885820000000000
247 | 22510030000000000
248 | 24502600000000000
249 | 25049220000000000
250 | 25067290000000000
251 | 26402850000000000
252 | 26494030000000000
253 | 27239000000000000
254 | 28192690000000000
255 | 2ZK78870000000000
256 | 30236500000000000
257 | 30600000000000000
258 | 31844590000000000
259 | 33115100000000000
260 | 36051500000000000
261 | 372293L0000000000
262 | 42519600000000000
263 | 50232000000000000
264 | 51108000000000000
265 | 51727000000000000
266 | 51800000000005043
267 | 59767000000000000
268 | 5TDZT300000000000
269 | 5UNKNOWN
270 | 61234567890POIUYT
271 | 61M85570000000000
272 | 70561000000000000
273 | 7C390410000000000
274 | 80409100000000000
275 | 80756800000000000
276 | 84379070000000000
277 | 85066700000000000
278 | 86133000000000000
279 | 86567270000000000
280 | 88899000000000000
281 | 89821670000000000
282 | 90847910000000000
283 | 90902600000000000
284 | 99
285 | 99109999999999999
286 | 99139999999999999
287 | 9999999999
288 | 99999999999
289 | 999999999999
290 | 9999999999999
291 | 99999999999990909
292 | 99999999999999
293 | 9999999999999900-
294 | 999999999999999
295 | 9999999999999999
296 | 99999999999999999
297 | 999999999999999999
298 | 9999999999999999999
299 | 99999999999999999999
300 | 999999999999999999999
301 | 9999999999999999999999
302 | 999999999999999999999999
303 | 9999999999999999999999999
304 | AA000000000000000
305 | AAAAAAAAAAAAAAAAA
306 | ALL UNKNOWN
307 | B5268000000000000
308 | BB921150000000000
309 | BD688540000000000
310 | BD934900000000000
311 | BIKE0000000000000
312 | BKJ06980000000000
313 | DJ356710000000000
314 | E434TR4G4RTG4RTGR
315 | ES40DC00000000000
316 | FS3221 UNK VIN
317 | FS6DVR00000000000
318 | G00000000000
319 | G0904600000000000
320 | HD00000000000
321 | HD000000000000000
322 | HR554230000000000
323 | JA3AY26A5VV0416 0
324 | JT420000000000000
325 | KAW00000000000000
326 | KSV700A0000000000
327 | KY118720000000000
328 | KY123820000000000
329 | KZ000000000000000
330 | L0000000000000000
331 | LA5PWR00000000000
332 | MBCN6180000000000
333 | MF286000000000000
334 | MY025V00000000000
335 | NA000000000000000
336 | NKNOWN
337 | NL116F00000000000
338 | NONE
339 | NONE0000000000000
340 | NONE9999999999999
341 | NY628680000000000
342 | NZ0QA400000000000
343 | NoVIN999999999999
344 | P0032700000000000
345 | R210III0000000000
346 | SRP16130000000000
347 | SRR55500000000000
348 | T3TQ34TQ34TR34T
349 | T4756000000000000
350 | TH638500000000000
351 | U
352 | U NKNOWN0000000000
353 | UKN
354 | UKNOWN
355 | UKNOWN0000000000
356 | UKNOWN00000000000
357 | UNK
358 | UNK
359 | UNK H AND R
360 | UNK H-R VEH
361 | UNK HIT AND RUN
362 | UNK NOWN
363 | UNK RENTAL
364 | UNK.
365 | UNK0000000000000
366 | UNK00000000000000
367 | UNK99999999999999
368 | UNKI
369 | UNKKNOWN
370 | UNKMOWN
371 | UNKN
372 | UNKN0000000000000
373 | UNKNIOWN
374 | UNKNIWN
375 | UNKNKOWN
376 | UNKNNOWN0000000000
377 | UNKNON
378 | UNKNOW
379 | UNKNOWEN
380 | UNKNOWN
381 | UNKNOWN
382 | UNKNOWN .
383 | UNKNOWN HIT
384 | UNKNOWN / FLED
385 | UNKNOWN BODY TYPE
386 | UNKNOWN HI
387 | UNKNOWN HIT AND R
388 | UNKNOWN INFORMATI
389 | UNKNOWN M
390 | UNKNOWN VIN
391 | UNKNOWN VIN NUMBE
392 | UNKNOWN VINNUMBE
393 | UNKNOWN!!!!!!!!!!
394 | UNKNOWN##########
395 | UNKNOWN**********
396 | UNKNOWN..........
397 | UNKNOWN/UNKNOWN//
398 | UNKNOWN0000
399 | UNKNOWN0000000000
400 | UNKNOWN00000000000
401 | UNKNOWN999
402 | UNKNOWN9999999999
403 | UNKNOWNHIT
404 | UNKNOWNK
405 | UNKNOWNVIN
406 | UNKNOWN[O
407 | UNKNWN
408 | UNKNWON
409 | UNKOWN
410 | UNKOWN0000000000
411 | UNKOWN00000000000
412 | UNKU
413 | UNKWN
414 | UNKWON0000000000
415 | UNNKNOWN
416 | UNNOWN
417 | UNOWN
418 | UNknown
419 | UTL00000000000000
420 | UU
421 | UU000000000000
422 | UUNKNOWN
423 | UnKnown
424 | Unk
425 | Unknow
426 | Unknown
427 | Unknownn
428 | WL848090000000000
429 | WQ321QWE321QWE321
430 | XG424790000000000
431 | XXX00000000000000
432 | XXXXXX
433 | XXXXXXX
434 | XXXXXXX0000000000
435 | XXXXXXXX
436 | XXXXXXXX65131
437 | XXXXXXXXX
438 | XXXXXXXXXX
439 | XXXXXXXXXX20412
440 | XXXXXXXXXXX
441 | XXXXXXXXXXXX
442 | XXXXXXXXXXXXX
443 | XXXXXXXXXXXXXX
444 | XXXXXXXXXXXXXXX
445 | XXXXXXXXXXXXXXXX
446 | XXXXXXXXXXXXXXXXX
447 | XXXXXXXXXXXXXXXXXXXXXXXXX
448 | _________________
449 | unknown
450 | unknownn
451 |
--------------------------------------------------------------------------------
/thematic-maps/deer_in_the_city.txt:
--------------------------------------------------------------------------------
1 | city,Total,K,ABC,PDO,Lat,Lon
2 | Portage,191,0,5,186,42.201154,-85.580002
3 | Rochester Hills,150,0,2,148,42.658366,-83.149932
4 | Midland,137,0,1,136,43.615583,-84.247212
5 | Battle Creek,116,0,6,110,42.321152,-85.179714
6 | Farmington Hills,95,0,4,91,42.498994,-83.367717
7 | Ann Arbor,90,0,4,86,42.280826,-83.743038
8 | Novi,86,0,8,78,42.480590,-83.475491
9 | Auburn Hills,82,0,2,80,42.687532,-83.234103
10 | Lansing,81,0,0,81,42.732535,-84.555535
11 | Walker,77,0,2,75,43.001413,-85.768091
12 | Grand Rapids,63,0,1,62,42.963360,-85.668086
13 | Troy,58,0,5,53,42.606409,-83.149775
14 | Kalamazoo,57,0,1,56,42.291707,-85.587229
15 | Kentwood,57,0,5,52,42.869473,-85.644749
16 | East Lansing,55,0,3,52,42.736979,-84.483865
17 | Wyoming,46,0,0,46,42.913360,-85.705309
18 | Southfield,44,0,6,38,42.473369,-83.221873
19 | Sterling Heights,44,0,3,41,42.580312,-83.030203
20 | Norton Shores,41,0,2,39,43.168904,-86.263946
21 | Burton,40,0,1,39,42.999472,-83.616342
22 | Livonia,40,0,0,40
23 | Escanaba,32,0,1,31
24 | Charlotte,32,0,0,32
25 | Lapeer,31,0,0,31
26 | Norway,29,0,0,29
27 | Marquette,28,0,0,28
28 | Rockford,28,0,0,28
29 | Holland,27,0,1,26
30 | Gladstone,27,0,0,27
31 | Muskegon,25,0,1,24
32 | Tecumseh,25,0,0,25
33 | Alpena,25,0,1,24
34 | Grandville,25,0,0,25
35 | Fenton,24,0,0,24
36 | Coldwater,24,0,1,23
37 | Ithaca,24,0,0,24
38 | Alma,23,0,0,23
39 | Hillsdale,22,0,0,22
40 | Marshall,22,0,0,22
41 | Traverse City,21,0,1,20
42 | Petoskey,21,0,0,21
43 | Romulus,21,0,0,21
44 | Wixom,21,0,0,21
45 | Iron River,21,0,0,21
46 | Holland,20,0,2,18
47 | Negaunee,19,0,1,18
48 | Iron Mountain,19,0,0,19
49 | Sault Ste. Marie,19,0,0,19
50 | Ludington,18,0,1,17
51 | Swartz Creek,18,0,1,17
52 | Chelsea,18,0,1,17
53 | Jonesville,18,0,0,18
54 | Adrian,17,0,0,17
55 | Manistee,17,0,0,17
56 | Pontiac,16,0,1,15
57 | Warren,16,0,0,16
58 | East Jordan,16,0,0,16
59 | Perry,16,0,0,16
60 | Paw Paw,15,0,0,15
61 | Jackson,15,0,1,14
62 | Litchfield,15,0,0,15
63 | Lowell,14,0,1,13
64 | Bloomfield Hills,14,0,0,14
65 | Boyne City,14,0,0,14
66 | Westland,13,0,1,12
67 | Marysville,13,0,0,13
68 | Hastings,13,0,0,13
69 | Mt. Pleasant,13,0,0,13
70 | Mason,13,0,1,12
71 | Bad Axe,13,0,0,13
72 | Howell,12,0,0,12
73 | Reed City,12,0,0,12
74 | Goodrich,12,0,0,12
75 | Rochester,12,0,0,12
76 | Rogers City,12,0,0,12
77 | St. Louis,12,0,0,12
78 | Big Rapids,12,0,0,12
79 | Corunna,12,0,0,12
80 | Flushing,12,0,0,12
81 | Richmond,11,0,0,11
82 | Monroe,11,0,0,11
83 | Croswell,11,0,0,11
84 | Hudson,11,0,0,11
85 | Gibralter,11,0,0,11
86 | Saline,11,0,0,11
87 | Newaygo,10,0,0,10
88 | Oxford,10,0,0,10
89 | Springfield,10,0,0,10
90 | Flint,10,0,1,9
91 | Hart,10,0,0,10
92 | Holly,10,0,0,10
93 | Greenville,10,0,0,10
94 | Caro,10,0,0,10
95 | Flat Rock,10,0,0,10
96 | Franklin,10,0,0,10
97 | Grand Blanc,10,0,0,10
98 | Portland,9,0,0,9
99 | Clare,9,0,0,9
100 | Cheboygan,9,0,0,9
101 | Potterville,9,0,0,9
102 | Roscommon,9,0,0,9
103 | Madison Heights,9,0,1,8
104 | Lakeview,9,0,0,9
105 | East Lansing,9,0,0,9,42.736979,-84.483865
106 | Ferrysburg,9,0,0,9
107 | Rosebush,9,0,2,7
108 | Sturgis,9,0,0,9
109 | Brown City,9,0,1,8
110 | Niles,9,0,1,8
111 | Wood Haven,9,0,1,8
112 | Owosso,8,0,0,8
113 | Nashville,8,0,0,8
114 | Grand Ledge,8,0,0,8
115 | Montague,8,0,0,8
116 | Grand Haven,8,0,0,8
117 | Whitehall,8,0,0,8
118 | Coopersville,8,0,0,8
119 | Lake Isabella,8,0,0,8
120 | Saginaw,8,0,0,8
121 | Taylor,8,0,1,7
122 | Ishpeming,8,0,0,8
123 | Dundee,8,0,0,8
124 | Ionia,8,0,0,8
125 | Plainwell,8,0,0,8
126 | Colon,8,0,0,8
127 | Tawas City,8,0,0,8
128 | Charlevoix,8,0,0,8
129 | Lincoln,7,0,0,7
130 | Brighton,7,0,0,7
131 | Mattawan,7,0,0,7
132 | Perrinton,7,0,0,7
133 | Orchard Lake,7,0,0,7
134 | Hersey,7,0,0,7
135 | Alanson,7,0,0,7
136 | Wayland,7,0,0,7
137 | Peck,7,0,0,7
138 | Gaylord,7,0,0,7
139 | St. Joseph,7,0,0,7
140 | Rockwood,7,0,0,7
141 | Homer,7,0,0,7
142 | Hudsonville,7,0,0,7
143 | Beverly Hills,7,0,0,7
144 | Imlay City,7,0,0,7
145 | McBride,7,0,0,7
146 | Baraga,7,0,1,6
147 | Dearborn,7,0,0,7
148 | Albion,7,0,0,7
149 | Concord,7,0,0,7
150 | Detroit,7,0,1,6
151 | Gladwin,6,0,0,6
152 | Millington,6,0,0,6
153 | Royal Oak,6,0,1,5
154 | East Tawas,6,0,1,5
155 | Bessemer,6,0,2,4
156 | Standish,6,0,0,6
157 | South Haven,6,0,0,6
158 | Frankfort,6,0,0,6
159 | Harbor Beach,6,0,0,6
160 | Bay City,6,0,0,6
161 | Linden,6,0,0,6
162 | Romeo,6,0,0,6
163 | Bridgman,6,0,0,6
164 | Maple Rapids,6,0,0,6
165 | Applegate,6,0,0,6
166 | St. Clair,6,0,0,6
167 | Harbor Springs,6,0,0,6
168 | Marine City,5,0,0,5
169 | Stanton,5,0,0,5
170 | Trenton,5,0,0,5
171 | Ubly,5,0,0,5
172 | Dewitt,5,0,0,5
173 | Harrison,5,0,0,5
174 | Stockbridge,5,0,0,5
175 | Edmore,5,0,1,4
176 | Sanford,5,0,0,5
177 | Benzonia,5,0,0,5
178 | Cedar Springs,5,0,0,5
179 | Coleman,5,0,0,5
180 | Port Huron,5,0,0,5
181 | Mecosta,5,0,0,5
182 | Empire,5,0,0,5
183 | Lexington,5,0,0,5
184 | Central Lake,5,0,0,5
185 | Vicksburg,5,0,0,5
186 | Fremont,5,0,0,5
187 | Ortonville,5,0,0,5
188 | Wakefield,5,0,0,5
189 | Evart,5,0,0,5
190 | Fowlerville,5,0,0,5
191 | Cadillac,5,0,0,5
192 | Mayville,5,0,0,5
193 | Buchanan,5,0,0,5
194 | Lawton,5,0,0,5
195 | West Branch,5,0,0,5
196 | Beulah,5,0,0,5
197 | Port Austin,4,0,0,4
198 | New Baltimore,4,0,0,4
199 | Ypsilanti,4,0,0,4
200 | Sandusky,4,0,0,4
201 | Centreville,4,0,0,4
202 | Williamston,4,0,1,3
203 | Three Rivers,4,0,0,4
204 | Suttons Bay,4,0,0,4
205 | Scottville,4,0,0,4
206 | Barryton,4,0,0,4
207 | Union City,4,0,0,4
208 | Sterling,4,0,0,4
209 | Crystal Falls,4,0,0,4
210 | McBain,4,0,0,4
211 | Munising,4,0,0,4
212 | Gobles,4,0,1,3
213 | Casnovia,4,0,0,4
214 | Bangor,4,0,0,4
215 | Hillman,4,0,0,4
216 | Ravenna,4,0,0,4
217 | Dexter,4,0,0,4
218 | New Haven,4,0,0,4
219 | Stevensville,4,0,0,4
220 | Port Sanilac,4,0,0,4
221 | Zeeland,4,0,0,4
222 | Kingsford,4,0,0,4
223 | Middleville,4,0,0,4
224 | Farmington,4,0,0,4
225 | Muskegon Heights,4,0,0,4
226 | Elk Rapids,4,0,0,4
227 | New Buffalo,4,0,0,4
228 | Eaton Rapids,4,0,0,4
229 | Galesburg,4,0,0,4
230 | Cass City,3,0,0,3
231 | Caledonia,3,0,0,3
232 | Bellaire,3,0,0,3
233 | Saugatuck,3,0,0,3
234 | South Lyon,3,0,0,3
235 | Springport,3,0,0,3
236 | Walled Lake,3,0,0,3
237 | Au Gres,3,0,0,3
238 | Kingston,3,0,1,2
239 | Kalkaska,3,0,0,3
240 | Carsonville,3,0,0,3
241 | Zilwaukee,3,0,0,3
242 | Almont,3,0,0,3
243 | Menominee,3,0,0,3
244 | North Branch,3,0,0,3
245 | Midland,3,0,0,3
246 | Baldwin,3,0,1,2
247 | Clarkston,3,0,0,3
248 | Birmingham,3,0,1,2
249 | Unknown Community,3,0,0,3
250 | Spring Lake,3,0,0,3
251 | Allegan,3,0,0,3
252 | Mulliken,3,0,0,3
253 | Blissfield,3,0,0,3
254 | Morrice,3,0,0,3
255 | Douglas,3,0,0,3
256 | Milford,3,0,0,3
257 | Berrien Springs,3,0,0,3
258 | Ontonagon,3,0,0,3
259 | Mesick,3,0,0,3
260 | Kent City,3,0,0,3
261 | Kingsley,3,0,0,3
262 | Allen Park,3,0,0,3
263 | South Rockwood,3,0,0,3
264 | Lathrup Village,3,0,0,3
265 | Northport,3,0,0,3
266 | Emmett,3,0,0,3
267 | Southgate,3,0,0,3
268 | Quincy,3,0,1,2
269 | Mt. Clemens,2,0,0,2
270 | Clayton,2,0,0,2
271 | Onsted,2,0,0,2
272 | Barton Hills,2,0,0,2
273 | Plymouth,2,0,0,2
274 | Vassar,2,0,0,2
275 | Deckerville,2,0,0,2
276 | Mendon,2,0,0,2
277 | Laingsburg,2,0,0,2
278 | St. Charles,2,0,0,2
279 | Frankenmuth,2,0,0,2
280 | Birch Run,2,0,0,2
281 | Posen,2,0,0,2
282 | Onaway,2,0,0,2
283 | New Lothrop,2,0,0,2
284 | Vernon,2,0,0,2
285 | LeRoy,2,0,0,2
286 | Rose City,2,0,0,2
287 | Pentwater,2,0,0,2
288 | Sylvan Lake,2,0,0,2
289 | Hartford,2,0,0,2
290 | Utica,2,0,0,2
291 | Manchester,2,0,0,2
292 | Casnovia,2,0,0,2
293 | Pierson,2,0,0,2
294 | Howard City,2,0,0,2
295 | Carson City,2,0,0,2
296 | Carleton,2,0,0,2
297 | Lake City,2,0,0,2
298 | Dearborn Heights,2,0,0,2
299 | Carney,2,0,0,2
300 | Morley,2,0,0,2
301 | Freesoil,2,0,0,2
302 | Fountain,2,0,0,2
303 | Custer,2,0,0,2
304 | Kaleva,2,0,0,2
305 | New Era,2,0,0,2
306 | Harrisville,2,0,0,2
307 | Port Hope,2,0,0,2
308 | Davison,2,0,0,2
309 | Niles,2,0,0,2
310 | Cassopolis,2,0,0,2
311 | Breckenridge,2,0,0,2
312 | Lyons,2,0,0,2
313 | Bronson,2,0,0,2
314 | Omer,2,0,0,2
315 | Twining,2,0,0,2
316 | Houghton,2,0,0,2
317 | L'Anse,2,0,0,2
318 | Shepherd,2,0,0,2
319 | Shoreham,2,0,0,2
320 | Grand Beach,2,0,0,2
321 | Gaastra,2,0,0,2
322 | Auburn,2,0,0,2
323 | Benton Harbor,2,0,0,2
324 | Leslie,2,0,0,2
325 | Belding,2,0,0,2
326 | Elberta,2,0,0,2
327 | Pewamo,2,0,0,2
328 | Otisville,2,0,0,2
329 | Richland,2,0,0,2
330 | Honor,2,0,0,2
331 | Vermontville,2,0,0,2
332 | Olivet,2,0,0,2
333 | Otsego,2,0,0,2
334 | Farwell,2,0,0,2
335 | Lansing,2,0,0,2
336 | Grayling,2,0,0,2
337 | Sparta,2,0,0,2
338 | St. Johns,2,0,0,2
339 | Fennville,2,0,0,2
340 | Eagle,1,0,0,1
341 | Chesaning,1,0,0,1
342 | Lake Linden,1,0,0,1
343 | Bellevue,1,0,0,1
344 | North Muskegon,1,0,0,1
345 | Lakewood Club,1,0,0,1
346 | Vanderbilt,1,0,0,1
347 | Watervliet,1,0,0,1
348 | Kinde,1,0,0,1
349 | Gagetown,1,0,0,1
350 | Galien,1,0,0,1
351 | Elsie,1,0,0,1
352 | Hesperia,1,0,0,1
353 | Sheridan,1,0,0,1
354 | Garden,1,0,0,1
355 | Fowler,1,0,0,1
356 | Webberville,1,0,0,1
357 | Luna Pier,1,0,0,1
358 | Estral Beach,1,0,0,1
359 | Thompsonville,1,0,0,1
360 | Millersburg,1,0,0,1
361 | Merrill,1,0,0,1
362 | Capac,1,0,0,1
363 | Hancock,1,0,1,0
364 | Forestville,1,0,0,1
365 | Richmond,1,0,0,1
366 | Mackinaw City,1,0,0,1
367 | Boyne Falls,1,0,0,1
368 | Constantine,1,0,0,1
369 | Pleasant Ridge,1,0,0,1
370 | Ironwood,1,0,0,1
371 | Edwardsburg,1,0,0,1
372 | Dowagiac,1,0,0,1
373 | Northville,1,0,0,1
374 | Leonard,1,0,0,1
375 | Tekonsha,1,0,0,1
376 | Oakley,1,0,0,1
377 | Marlette,1,0,0,1
378 | Athens,1,0,0,1
379 | Manistique,1,0,0,1
380 | North Adams,1,0,0,1
381 | Reading,1,0,0,1
382 | Bancroft,1,0,0,1
383 | Bingham Farms,1,0,0,1
384 | Marion,1,0,0,1
385 | Lennon,1,0,0,1
386 | Wolverine,1,0,0,1
387 | White Cloud,1,0,0,1
388 | Metamora,1,0,0,1
389 | Parchment,1,0,0,1
390 | Caspian,1,0,0,1
391 | Clare,1,0,0,1
392 | Brooklyn,1,0,0,1
393 | Parma,1,0,0,1
394 | Copemish,1,0,0,1
395 | Bear Lake,1,0,0,1
396 | St. Clair Shores,1,0,0,1
397 | Roseville,1,0,0,1
398 | Inkster,1,0,0,1
399 | East Grand Rapids,1,0,0,1
400 | Sand Lake,1,0,0,1
401 | South Haven,1,0,0,1
402 | Morenci,1,0,0,1
403 | Clinton,1,0,0,1
404 | Wayne,1,0,0,1
405 | Buckley,1,0,0,1
406 | Traverse City,1,0,0,1
407 | Clifford,1,0,0,1
408 | Harrietta,1,0,0,1
409 | Manton,1,0,0,1
410 | Milan,1,0,0,1
411 | Ovid,1,0,0,1
412 | Pinconning,1,0,0,1
413 | Saranac,1,0,0,1
414 | Powers,1,0,0,1
415 | Stanwood,1,0,0,1
416 | Stephenson,1,0,0,1
417 | Essexville,1,0,0,1
418 | Daggett,1,0,0,1
419 |
--------------------------------------------------------------------------------
/pytorch/workshop_neural_net.md:
--------------------------------------------------------------------------------
1 | \titlepage
2 | ## Deep Neural Networks (DNNs)
3 |
4 | - A DNN is a mathematical function inspired by neural networks in the
5 | brain.
6 |
7 | - Input layer (features), hidden layers, output layer (targets).
8 |
9 | - Your data determines number of features and targets.
10 |
11 | - You choose number of hidden layers and "neurons" (activation units)
12 | in each hidden layer.
13 |
14 | \centering
15 | {width="50%"}
16 |
17 | ## Deep Neural Networks (DNNs), cont'd
18 |
19 | - Hidden layers have variables (weights, biases) that are trained.
20 |
21 | - Mathematical structure: Composite of nonlinear activation functions
22 | acting on matrix/vector operations, e.g.
23 | $$f(x) = A_2{\color{red}g(A_1{\color{blue}g(A_0x+b_0)}+b_1)}+b_2$$
24 |
25 | \centering
26 | {width="\textwidth"}
27 |
28 | ## Training DNNs
29 |
30 | - Training a DNN means optimizing the weights and biases to "fit"
31 | given data
32 |
33 | - i.e. minimize error between DNN prediction and the given data
34 |
35 | - Optimization: Think of mountains and valleys. Your location is like
36 | the value of the weights/biases. Your elevation is like the value of
37 | the error. As you "walk down the mountain", you are changing the
38 | values of the weights/biases to decrease the value of the error.
39 |
40 | \centering
41 | {width="50%"}
42 |
43 | ## Training DNNs, cont'd
44 |
45 | - Usually a variant of **stochastic gradient descent**:
46 |
47 | - **Gradient**: Points toward steepest slope
48 |
49 | - **Gradient descent** method: Take steps down steepest slope to
50 | get to minimum
51 |
52 | - **Stochastic gradient descent**: Calculate the error based on a
53 | small number of data (a **batch**) instead of the entire data
54 | set
55 |
56 | - You choose: step size (learning rate), batch size
57 |
58 | \centering
59 | {width="50%"}
60 |
61 | ## playground.tensorflow.org
62 |
63 | \centering
64 | {width="\textwidth"}
65 |
66 | Note: playground.tensorflow.org is an educational tool. It does not
67 | actually use the TensorFlow library, nor can you use it to train with
68 | your data.
69 |
70 | ## Underfitting (high bias)
71 |
72 | Symptoms:
73 |
74 | - High training and testing error
75 |
76 | Possible treatments:
77 |
78 | - Make the model larger (more layers, more neurons)
79 |
80 | - Increase the number of features, artificially if necessary (e.g.
81 | $x_1x_2$, $\sin(x)$, etc.)
82 |
83 | - More training
84 |
85 | \centering
86 | {width="50%"}
87 |
88 | \vspace{.5cm}
89 | ## Overfitting (high variance)
90 |
91 | Symptoms:
92 |
93 | - Low training error, high testing error
94 |
95 | - (Made worse by noisy data)
96 |
97 | Possible treatments:
98 |
99 | - More data
100 |
101 | - Regularization (L1, L2, dropout)
102 |
103 | - Less training (early stopping)
104 |
105 | - Simplify model (use w/ caution)
106 |
107 | \centering
108 | {width="50%"}
109 |
110 | \vspace{.5cm}
111 | ## Regularization
112 |
113 | - Regularization smooths the model; reduces complexity in the output
114 | ([Wikipedia](https://en.wikipedia.org/wiki/Regularization_(mathematics))).
115 |
116 | - In neural networks, this is done by keeping the weights at a
117 | similar, low magnitude.
118 |
119 | - L1 regularization adds the L1 norm of the weights to the loss.
120 |
121 | - L2 regularization adds the L2 norm of the weights (more sensitive to
122 | outliers).
123 |
124 | - Dropout randomly and temporarily drops weights to zero during
125 | training.
126 |
127 | \centering
128 | {width="50%"}
129 |
130 | ## playground.tensorflow.org
131 |
132 | \centering
133 | {width="\textwidth"}
134 |
135 | Note: playground.tensorflow.org is an educational tool. It does not
136 | actually use the TensorFlow library, nor can you use it to train with
137 | your data.
138 |
139 | ## Nonlinear regression
140 |
141 | - Begin with example of nonlinear regression.
142 |
143 | - Use a standard DNN to map continuous inputs to continuous outputs.
144 |
145 | - Data in example has two inputs, one output (slices parallel to
146 | x-axis are parabolic, slices parallel to y-axis are sinusoidal).
147 |
148 | \centering
149 | {width="50%"}
150 |
151 | ## Load data
152 |
153 | ## Build the model
154 |
155 | Define the structure of the DNN. Here, we define two hidden layers, with
156 | 5 neurons in each layer.
157 |
158 | We also specify the activation function here. The `relu` function is
159 | commonly used, but you can use others (examples:
160 | [Wikipedia](https://en.wikipedia.org/wiki/Activation_function)):
161 |
162 | \vspace{.5cm}
163 | \hspace*{10pt}
164 | `sigmoid, softplus, tanh`, etc.
165 |
166 | \vspace{.5cm}
167 | Note that no activation is used on the final layer.
168 |
169 | \vspace{.5cm}
170 | Experiment with the hidden units and activation function.
171 |
172 | ## L1, L2 regularization
173 |
174 | ## Dropout
175 |
176 | ## Training
177 |
178 | Stochastic gradient descent methods use shuffled mini-batches instead of
179 | the entire data set for each training iteration. We specify batch size,
180 | and how many epochs to train the code.
181 |
182 | \vspace{.5cm}
183 | An epoch is the number of training iterations required to go through the
184 | entire training set once. For example, 1,000 datapoints and a batch size
185 | of 10, one epoch would take 100 training iteration.
186 |
187 | \vspace{.5cm}
188 | We can also specify validation data to see how the validation loss
189 | changes during training.
190 |
191 | Experiment with batch size and number of epochs.
192 |
193 | ## Results
194 |
195 | With good settings in the code (not the current settings), we can get
196 | the following fit:
197 |
198 | \centering
199 | {width="80%"}
200 |
201 | ## Exercise 1
202 |
203 | - Run the code.
204 |
205 | - Identify the problem (underfitting or overfitting).
206 |
207 | - Try possible solutions to get a better fit.
208 |
209 | ## Classification
210 |
211 | - Consider the problem of classification.
212 |
213 | - Maps feature values to a category.
214 |
215 | - Use the example of irises
216 |
217 | - Four features: sepal length, sepal width, petal length, petal
218 | width
219 |
220 | - Three classes: Iris setosa, Iris virginica, Iris versicolor
221 |
222 | \centering
223 | ](iris_versicolor.jpg){width="40%"}
225 |
226 | ## Import data
227 |
228 | Data label format: Usually given as 0, 1, or 2; we need it to be [1,0,0], [0,1,0], or [0,0,1].
229 |
230 | ## Build the model
231 |
232 | Define the structure of the DNN. Here, we define three hidden layers,
233 | with 1000, 500, and 70 neurons in each respective layer.
234 |
235 | Since this is classification, apply the
236 | [softmax](https://en.wikipedia.org/wiki/Softmax_function) function to
237 | the last layer. This transforms the output to be a vector of
238 | probabilities that sum to one: $$\begin{aligned}
239 | p_i &= \frac{\exp(f_i)}{\sum\limits_j \exp(f_j)}\end{aligned}$$
240 | where $p_i$ is probability of category $i$ being true, $f_i$ is $i$-th
241 | component of the final layer's output.
242 |
243 | ## Loss
244 |
245 | We again define the loss function and the optimizer. For classification,
246 | we use the [cross entropy](https://en.wikipedia.org/wiki/Cross_entropy)
247 | loss function. We are also interested in the accuracy metric (%
248 | correctly classified), in addition to the loss.
249 |
250 | $$\begin{aligned}
251 | \mathrm{cross\_entropy} = \frac{1}{n_\mathrm{samples}}\sum\limits_j^{n_\mathrm{samples}}\sum\limits_i^{n_\mathrm{classes}}\hat{p}_i^j\log(p_i^j)\end{aligned}$$
252 | where $\hat{p}_i^j$ is the data and $p_i^j$ is the prediction for class
253 | $i$, sample $j$.
254 |
255 | ## Training
256 |
257 | Training is done as before.
258 |
259 | ## Exercise 2
260 |
261 | - Run the code.
262 |
263 | - Identify the problem (underfitting or overfitting).
264 |
265 | - Try possible solutions to get a better result.
266 |
267 | ## Convolutional Neural Network (CNN)
268 |
269 | - Image recognition is often done with CNNs.
270 |
271 | - CNNs perform classification by adding new types of layers, primarily
272 | "convolutions" and "pooling".
273 |
274 | - The "convolution": scanning a filter across the image.
275 |
276 | - The "pooling": take the most significant features from a group of
277 | pixels.
278 |
279 | - Some nice explanations of CNNs by [Adam
280 | Geitgey](https://medium.com/@ageitgey/machine-learning-is-fun-part-3-deep-learning-and-convolutional-neural-networks-f40359318721)
281 | and
282 | [ujjwalkarn](https://ujjwalkarn.me/2016/08/11/intuitive-explanation-convnets/).
283 |
284 | - Our example will use the [MNIST](http://yann.lecun.com/exdb/mnist/)
285 | database of handwritten digits.
286 |
287 | - Based on [this
288 | example](https://github.com/keras-team/keras/blob/master/examples/mnist_cnn.py).
289 |
290 | \centering
291 | {width="30%"}
292 |
293 | ## Initialize model, Normalize input
294 |
295 | We shift and normalize the inputs for better fitting.
296 |
297 | We also define the input shape. The images are 28 by 28 pixels, with a
298 | grayscale value. This means each image is defined by a 3D tensor,
299 | $28\times28\times1$ (a color image of the same size would be
300 | $28\times28\times3$).
301 |
302 | ## Convolutional layer
303 |
304 | The first convolutional layer is applied. This involves sweeping a
305 | filter across the image. (Gives \"translational invariance.\")
306 |
307 |
308 |
309 | We use 4 filters with a size of $5\times5$ pixels, with ReLU activation.
310 |
311 | ## Max pooling
312 |
313 | Max pooling involves looking at clusters of the output (in this example,
314 | $2\times2$ clusters), and sets the maximum filter value as the value for
315 | the cluster.
316 |
317 |
318 |
319 | I.e. a "match" anywhere in the cluster $\implies$ a "match" for the
320 | cluster.
321 |
322 | \vspace{0.5cm}
323 | Since we are also using stride of 2, the clusters don't overlap.
324 |
325 | Pooling reduces the size of the neural net, speeding up computations.
326 |
327 | ## 2nd convolution and pooling
328 |
329 | A second convolutional layer, followed by max pooling, is used.
330 |
331 | ## Fully-connected layer
332 |
333 | The 3D tensor is converted back to a 1D tensor to act as input for a
334 | dense or fully-connected layer, the same type used with the previous
335 | regression and classification examples.
336 |
337 | ## Dropout, Softmax
338 |
339 | We add a dropout layer here. In this example, dropout happens at a rate
340 | of 40% (i.e. 40% of weights are temporarily set to zero at each training
341 | iteration).
342 |
343 | As in the Iris classification problem, we finish with a dense layer and
344 | softmax activation function to return probabilities for each category.
345 |
346 | ## Compile, Train
347 |
348 | We compile and train as in the previous classification example:
349 |
350 | ## Exercise 3
351 |
352 | - Run the file.
353 |
354 | - Modify the CNN and training to see how high of a validation accuracy
355 | you can get.
356 |
--------------------------------------------------------------------------------
/pdf-data-extraction/pdfminer_workshop.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# PDF Text Mining using PDFMiner"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "## Installation\n",
15 | "\n",
16 | "`pip install pdfminer.six`"
17 | ]
18 | },
19 | {
20 | "cell_type": "markdown",
21 | "metadata": {},
22 | "source": [
23 | "## How to Use\n",
24 | "Below is an edited code example from [Tim Arnold's blog on *Manipulating PDFs with Python*]( https://www.binpress.com/tutorial/manipulating-pdfs-with-python/167). It has been modified to be compatible with Python 3.X. Most of it is boilerplate stuff that does not need to change. The only change that needs to be done is the filename and the page(s) of interest. "
25 | ]
26 | },
27 | {
28 | "cell_type": "code",
29 | "execution_count": 1,
30 | "metadata": {
31 | "collapsed": true
32 | },
33 | "outputs": [],
34 | "source": [
35 | "from io import StringIO\n",
36 | "from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter\n",
37 | "from pdfminer.converter import TextConverter\n",
38 | "from pdfminer.layout import LAParams\n",
39 | "from pdfminer.pdfpage import PDFPage "
40 | ]
41 | },
42 | {
43 | "cell_type": "markdown",
44 | "metadata": {},
45 | "source": [
46 | "Identify file and page of interest"
47 | ]
48 | },
49 | {
50 | "cell_type": "code",
51 | "execution_count": 2,
52 | "metadata": {
53 | "collapsed": true
54 | },
55 | "outputs": [],
56 | "source": [
57 | "filename = 'MDOT_fastfacts02-2011_345554_7.pdf'\n",
58 | "pagenums = [3] # empty list does all pages"
59 | ]
60 | },
61 | {
62 | "cell_type": "markdown",
63 | "metadata": {},
64 | "source": [
65 | "Create instances of classes necessary to read pdf"
66 | ]
67 | },
68 | {
69 | "cell_type": "code",
70 | "execution_count": 3,
71 | "metadata": {
72 | "collapsed": true
73 | },
74 | "outputs": [],
75 | "source": [
76 | "output = StringIO()\n",
77 | "manager = PDFResourceManager()\n",
78 | "converter = TextConverter(manager, output, laparams=LAParams())\n",
79 | "interpreter = PDFPageInterpreter(manager, converter)"
80 | ]
81 | },
82 | {
83 | "cell_type": "markdown",
84 | "metadata": {},
85 | "source": [
86 | "Open the pdf and read & process page(s) of interest"
87 | ]
88 | },
89 | {
90 | "cell_type": "code",
91 | "execution_count": 4,
92 | "metadata": {},
93 | "outputs": [],
94 | "source": [
95 | "with open(filename, 'rb') as fin:\n",
96 | " for page in PDFPage.get_pages(fin, pagenums):\n",
97 | " interpreter.process_page(page)"
98 | ]
99 | },
100 | {
101 | "cell_type": "markdown",
102 | "metadata": {},
103 | "source": [
104 | "Get output string"
105 | ]
106 | },
107 | {
108 | "cell_type": "code",
109 | "execution_count": 5,
110 | "metadata": {
111 | "collapsed": true
112 | },
113 | "outputs": [],
114 | "source": [
115 | "text = output.getvalue()\n",
116 | "converter.close()\n",
117 | "output.close()"
118 | ]
119 | },
120 | {
121 | "cell_type": "markdown",
122 | "metadata": {},
123 | "source": [
124 | "Let's look at the output text string"
125 | ]
126 | },
127 | {
128 | "cell_type": "code",
129 | "execution_count": 6,
130 | "metadata": {},
131 | "outputs": [
132 | {
133 | "data": {
134 | "text/plain": [
135 | "'Fast Facts\\n\\n201 7\\n\\nCARPOOL LOTS\\n\\n2015 MICHIGAN \\nSTATE REVENUE PACKAGE\\n\\nn There are 261 carpool parking lots located across \\n\\nthe state, 23 of which are public-private partnerships. \\nIncluded in the public-private partnerships are 17 \\nlocations that MDOT has partnered with Meijer Corp. \\nto provide carpool parking spaces in Meijer parking lots \\nlocated near the highway.\\n\\nn MDOT continues its efforts to provide bike racks at \\n\\ncarpool lots, and to attract transit service to lots \\nwhere appropriate.\\n\\nCOST OF ROAD CONSTRUCTION \\n\\nRoadway construction costs are typically based on standard \\ndesign characteristics, materials, and the type of work performed. \\nGeneral estimates are provided for the average cost per lane mile \\nof major work by roadway type, and material costs. \\n\\nAverage Cost Per Lane Mile by \\nMajor Work Type for Various Networks \\n(2016 figures; in millions) \\n\\nWork Type \\n\\nReconstruction Rehabilitation Average R&R\\n\\n \\n \\n\\nCombined \\nStatewide\\nFreeway\\nNon-Freeway\\nStatewide \\nUrban\\nStatewide \\nRural\\n\\n$2.0 \\n\\n$2.0 \\n$1.9 \\n\\n$2.1 \\n\\n$1.2 \\n\\n$0.6 \\n\\n$0.8 \\n$0.5 \\n\\n$0.7 \\n\\n$0.5 \\n\\n$1.0 \\n\\n$1.3 \\n$0.8 \\n\\n$1.2 \\n\\n$0.6 \\n\\nCost\\n\\n $64.18\\n $48.04\\n $1.31\\n $1.06\\n\\nMaterial Cost for Construction \\n(2016 Year-to-Date)\\n\\nMaterial\\nHot Mix Asphalt (HMA) per Ton\\nConcrete per Square Yard\\nStructural Steel per Pound\\nReinforcement Steel per Pound\\n\\nState Transportation Funding Package\\nOn Nov. 10, Gov. Snyder signed a package of \\ntransportation bills approved by the Legislature. In the \\nshort term, the legislation will:\\n\\n• Provide $450 million in additional fuel tax \\n\\nrevenues, beginning in January 2017. The tax \\non gasoline and diesel fuel will rise to 26.3 cents \\nat that time, as the legislation also provides for \\ndiesel parity.\\n\\n• Provide $190 million from a 20 percent increase \\n\\nin vehicle registration fees, also beginning in \\nJanuary 2017.\\n\\nThis $600 million in new revenue will be distributed to \\nMDOT, county road commissions, cities and villages, \\nand the Comprehensive Transportation Fund through \\nthe existing Act 51 formula, providing a roughly 30 \\npercent increase by 2018.\\nThe new revenue is expected to generate an average \\nof more than 4,000 jobs per year in the first two years. \\nIt will also help address the need to repair and maintain \\nMichigan’s existing transportation systems.\\nBeginning in 2019, the Legislature intends to appropriate \\nincome tax revenue to roads agencies, according \\nto the Act 51 formula, excluding the Comprehensive \\nTransportation Fund, in these amounts:\\n \\n \\n \\nBeginning in 2016, the legislation adds transparency \\nand accountability:\\n\\n• 2019…………………..…..$150 million\\n• 2020……………………....$325 million\\n• 2021 and thereafter……..$600 million\\n\\n• Administrative Expenses: MDOT \\n\\nadministrative expenses, previously capped at \\n10 percent, are now limited to 8 percent of its \\nbudget.\\n\\n• Pavement Warranties: Road agencies are \\nrequired to buy pavement warranties, where \\nappropriate, for projects costing more than \\n$2 million. \\n\\n• Competitive Bidding: To reduce project costs, \\nall agencies are required to competitively bid out \\nprojects costing more than $100,000. \\n\\n• Longer-lived Pavements: MDOT will be \\n\\nrequired to prepare a report on the potential for \\nconstructing longer-lived pavements and report \\nto the Legislature by June 2016.\\n\\n4 2017 Fast Facts \\n\\n (Updated 1/2017)\\n\\n\\x0c'"
136 | ]
137 | },
138 | "execution_count": 6,
139 | "metadata": {},
140 | "output_type": "execute_result"
141 | }
142 | ],
143 | "source": [
144 | "text"
145 | ]
146 | },
147 | {
148 | "cell_type": "markdown",
149 | "metadata": {},
150 | "source": [
151 | "Pretty Print Text"
152 | ]
153 | },
154 | {
155 | "cell_type": "code",
156 | "execution_count": 7,
157 | "metadata": {},
158 | "outputs": [
159 | {
160 | "name": "stdout",
161 | "output_type": "stream",
162 | "text": [
163 | "('Fast Facts\\n'\n",
164 | " '\\n'\n",
165 | " '201 7\\n'\n",
166 | " '\\n'\n",
167 | " 'CARPOOL LOTS\\n'\n",
168 | " '\\n'\n",
169 | " '2015 MICHIGAN \\n'\n",
170 | " 'STATE REVENUE PACKAGE\\n'\n",
171 | " '\\n'\n",
172 | " 'n There are 261 carpool parking lots located across \\n'\n",
173 | " '\\n'\n",
174 | " 'the state, 23 of which are public-private partnerships. \\n'\n",
175 | " 'Included in the public-private partnerships are 17 \\n'\n",
176 | " 'locations that MDOT has partnered with Meijer Corp. \\n'\n",
177 | " 'to provide carpool parking spaces in Meijer parking lots \\n'\n",
178 | " 'located near the highway.\\n'\n",
179 | " '\\n'\n",
180 | " 'n MDOT continues its efforts to provide bike racks at \\n'\n",
181 | " '\\n'\n",
182 | " 'carpool lots, and to attract transit service to lots \\n'\n",
183 | " 'where appropriate.\\n'\n",
184 | " '\\n'\n",
185 | " 'COST OF ROAD CONSTRUCTION \\n'\n",
186 | " '\\n'\n",
187 | " 'Roadway construction costs are typically based on standard \\n'\n",
188 | " 'design characteristics, materials, and the type of work performed. \\n'\n",
189 | " 'General estimates are provided for the average cost per lane mile \\n'\n",
190 | " 'of major work by roadway type, and material costs. \\n'\n",
191 | " '\\n'\n",
192 | " 'Average Cost Per Lane Mile by \\n'\n",
193 | " 'Major Work Type for Various Networks \\n'\n",
194 | " '(2016 figures; in millions) \\n'\n",
195 | " '\\n'\n",
196 | " 'Work Type \\n'\n",
197 | " '\\n'\n",
198 | " 'Reconstruction Rehabilitation Average R&R\\n'\n",
199 | " '\\n'\n",
200 | " ' \\n'\n",
201 | " ' \\n'\n",
202 | " '\\n'\n",
203 | " 'Combined \\n'\n",
204 | " 'Statewide\\n'\n",
205 | " 'Freeway\\n'\n",
206 | " 'Non-Freeway\\n'\n",
207 | " 'Statewide \\n'\n",
208 | " 'Urban\\n'\n",
209 | " 'Statewide \\n'\n",
210 | " 'Rural\\n'\n",
211 | " '\\n'\n",
212 | " '$2.0 \\n'\n",
213 | " '\\n'\n",
214 | " '$2.0 \\n'\n",
215 | " '$1.9 \\n'\n",
216 | " '\\n'\n",
217 | " '$2.1 \\n'\n",
218 | " '\\n'\n",
219 | " '$1.2 \\n'\n",
220 | " '\\n'\n",
221 | " '$0.6 \\n'\n",
222 | " '\\n'\n",
223 | " '$0.8 \\n'\n",
224 | " '$0.5 \\n'\n",
225 | " '\\n'\n",
226 | " '$0.7 \\n'\n",
227 | " '\\n'\n",
228 | " '$0.5 \\n'\n",
229 | " '\\n'\n",
230 | " '$1.0 \\n'\n",
231 | " '\\n'\n",
232 | " '$1.3 \\n'\n",
233 | " '$0.8 \\n'\n",
234 | " '\\n'\n",
235 | " '$1.2 \\n'\n",
236 | " '\\n'\n",
237 | " '$0.6 \\n'\n",
238 | " '\\n'\n",
239 | " 'Cost\\n'\n",
240 | " '\\n'\n",
241 | " ' $64.18\\n'\n",
242 | " ' $48.04\\n'\n",
243 | " ' $1.31\\n'\n",
244 | " ' $1.06\\n'\n",
245 | " '\\n'\n",
246 | " 'Material Cost for Construction \\n'\n",
247 | " '(2016 Year-to-Date)\\n'\n",
248 | " '\\n'\n",
249 | " 'Material\\n'\n",
250 | " 'Hot Mix Asphalt (HMA) per Ton\\n'\n",
251 | " 'Concrete per Square Yard\\n'\n",
252 | " 'Structural Steel per Pound\\n'\n",
253 | " 'Reinforcement Steel per Pound\\n'\n",
254 | " '\\n'\n",
255 | " 'State Transportation Funding Package\\n'\n",
256 | " 'On Nov. 10, Gov. Snyder signed a package of \\n'\n",
257 | " 'transportation bills approved by the Legislature. In the \\n'\n",
258 | " 'short term, the legislation will:\\n'\n",
259 | " '\\n'\n",
260 | " '• Provide $450 million in additional fuel tax \\n'\n",
261 | " '\\n'\n",
262 | " 'revenues, beginning in January 2017. The tax \\n'\n",
263 | " 'on gasoline and diesel fuel will rise to 26.3 cents \\n'\n",
264 | " 'at that time, as the legislation also provides for \\n'\n",
265 | " 'diesel parity.\\n'\n",
266 | " '\\n'\n",
267 | " '• Provide $190 million from a 20 percent increase \\n'\n",
268 | " '\\n'\n",
269 | " 'in vehicle registration fees, also beginning in \\n'\n",
270 | " 'January 2017.\\n'\n",
271 | " '\\n'\n",
272 | " 'This $600 million in new revenue will be distributed to \\n'\n",
273 | " 'MDOT, county road commissions, cities and villages, \\n'\n",
274 | " 'and the Comprehensive Transportation Fund through \\n'\n",
275 | " 'the existing Act 51 formula, providing a roughly 30 \\n'\n",
276 | " 'percent increase by 2018.\\n'\n",
277 | " 'The new revenue is expected to generate an average \\n'\n",
278 | " 'of more than 4,000 jobs per year in the first two years. \\n'\n",
279 | " 'It will also help address the need to repair and maintain \\n'\n",
280 | " 'Michigan’s existing transportation systems.\\n'\n",
281 | " 'Beginning in 2019, the Legislature intends to appropriate \\n'\n",
282 | " 'income tax revenue to roads agencies, according \\n'\n",
283 | " 'to the Act 51 formula, excluding the Comprehensive \\n'\n",
284 | " 'Transportation Fund, in these amounts:\\n'\n",
285 | " ' \\n'\n",
286 | " ' \\n'\n",
287 | " ' \\n'\n",
288 | " 'Beginning in 2016, the legislation adds transparency \\n'\n",
289 | " 'and accountability:\\n'\n",
290 | " '\\n'\n",
291 | " '• 2019…………………..…..$150 million\\n'\n",
292 | " '• 2020……………………....$325 million\\n'\n",
293 | " '• 2021 and thereafter……..$600 million\\n'\n",
294 | " '\\n'\n",
295 | " '• Administrative Expenses: MDOT \\n'\n",
296 | " '\\n'\n",
297 | " 'administrative expenses, previously capped at \\n'\n",
298 | " '10 percent, are now limited to 8 percent of its \\n'\n",
299 | " 'budget.\\n'\n",
300 | " '\\n'\n",
301 | " '• Pavement Warranties: Road agencies are \\n'\n",
302 | " 'required to buy pavement warranties, where \\n'\n",
303 | " 'appropriate, for projects costing more than \\n'\n",
304 | " '$2 million. \\n'\n",
305 | " '\\n'\n",
306 | " '• Competitive Bidding: To reduce project costs, \\n'\n",
307 | " 'all agencies are required to competitively bid out \\n'\n",
308 | " 'projects costing more than $100,000. \\n'\n",
309 | " '\\n'\n",
310 | " '• Longer-lived Pavements: MDOT will be \\n'\n",
311 | " '\\n'\n",
312 | " 'required to prepare a report on the potential for \\n'\n",
313 | " 'constructing longer-lived pavements and report \\n'\n",
314 | " 'to the Legislature by June 2016.\\n'\n",
315 | " '\\n'\n",
316 | " '4 2017 Fast Facts \\n'\n",
317 | " '\\n'\n",
318 | " ' (Updated 1/2017)\\n'\n",
319 | " '\\n'\n",
320 | " '\\x0c')\n"
321 | ]
322 | }
323 | ],
324 | "source": [
325 | "from pprint import pprint as prettyprint\n",
326 | "prettyprint(text)"
327 | ]
328 | },
329 | {
330 | "cell_type": "markdown",
331 | "metadata": {},
332 | "source": [
333 | "Write out text to file"
334 | ]
335 | },
336 | {
337 | "cell_type": "code",
338 | "execution_count": 8,
339 | "metadata": {},
340 | "outputs": [],
341 | "source": [
342 | "savefile = filename.replace('pdf','txt')\n",
343 | "with open(savefile,'w') as fout:\n",
344 | " fout.write(text)"
345 | ]
346 | },
347 | {
348 | "cell_type": "markdown",
349 | "metadata": {},
350 | "source": [
351 | "# Conclusion\n",
352 | "\n",
353 | "Trying to reconstruct tables from pdf text mining tools looks like a formatting nightmare in the same realm as copy and paste."
354 | ]
355 | },
356 | {
357 | "cell_type": "code",
358 | "execution_count": null,
359 | "metadata": {
360 | "collapsed": true
361 | },
362 | "outputs": [],
363 | "source": []
364 | }
365 | ],
366 | "metadata": {
367 | "kernelspec": {
368 | "display_name": "Python 3",
369 | "language": "python",
370 | "name": "python3"
371 | },
372 | "language_info": {
373 | "codemirror_mode": {
374 | "name": "ipython",
375 | "version": 3
376 | },
377 | "file_extension": ".py",
378 | "mimetype": "text/x-python",
379 | "name": "python",
380 | "nbconvert_exporter": "python",
381 | "pygments_lexer": "ipython3",
382 | "version": "3.5.1"
383 | }
384 | },
385 | "nbformat": 4,
386 | "nbformat_minor": 1
387 | }
388 |
--------------------------------------------------------------------------------
/pytorch/Workshop_CNN.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "name": "Workshop CNN.ipynb",
7 | "provenance": [],
8 | "include_colab_link": true
9 | },
10 | "kernelspec": {
11 | "name": "python3",
12 | "display_name": "Python 3"
13 | }
14 | },
15 | "cells": [
16 | {
17 | "cell_type": "markdown",
18 | "metadata": {
19 | "id": "view-in-github",
20 | "colab_type": "text"
21 | },
22 | "source": [
23 | "
"
24 | ]
25 | },
26 | {
27 | "cell_type": "markdown",
28 | "metadata": {
29 | "id": "aa28NQ4b50Wk",
30 | "colab_type": "text"
31 | },
32 | "source": [
33 | "# Image Classification Problem"
34 | ]
35 | },
36 | {
37 | "cell_type": "code",
38 | "metadata": {
39 | "id": "WK-LdzWl5vH6",
40 | "colab_type": "code",
41 | "outputId": "9633651c-31d5-4c8e-a623-c1e209426f7e",
42 | "colab": {
43 | "base_uri": "https://localhost:8080/",
44 | "height": 67
45 | }
46 | },
47 | "source": [
48 | "import torch\n",
49 | "import torch.nn as nn\n",
50 | "import torch.optim as optim\n",
51 | "import torch.nn.functional as F\n",
52 | "from torch.utils.data import DataLoader\n",
53 | "import torchvision\n",
54 | "from torchvision import datasets, transforms\n",
55 | "import numpy as np\n",
56 | "\n",
57 | "print('Torch version', torch.__version__)\n",
58 | "print('Torchvision version', torchvision.__version__)\n",
59 | "print('Numpy version', np.__version__)"
60 | ],
61 | "execution_count": 1,
62 | "outputs": [
63 | {
64 | "output_type": "stream",
65 | "text": [
66 | "Torch version 1.3.1\n",
67 | "Torchvision version 0.4.2\n",
68 | "Numpy version 1.17.4\n"
69 | ],
70 | "name": "stdout"
71 | }
72 | ]
73 | },
74 | {
75 | "cell_type": "markdown",
76 | "metadata": {
77 | "id": "AKCLbDM754c0",
78 | "colab_type": "text"
79 | },
80 | "source": [
81 | "The following should say `cuda:0`. If it does not, we need to go to *Edit* -> *Notebook settings* and change it to a `GPU` from `None`. You only have to do this once per notebook."
82 | ]
83 | },
84 | {
85 | "cell_type": "code",
86 | "metadata": {
87 | "id": "a2RWBSbo53bz",
88 | "colab_type": "code",
89 | "outputId": "fa70a535-a31d-405b-90cd-5ccb15a4457a",
90 | "colab": {
91 | "base_uri": "https://localhost:8080/",
92 | "height": 34
93 | }
94 | },
95 | "source": [
96 | "device = 'cuda:0' if torch.cuda.is_available() else 'cpu'\n",
97 | "device"
98 | ],
99 | "execution_count": 2,
100 | "outputs": [
101 | {
102 | "output_type": "execute_result",
103 | "data": {
104 | "text/plain": [
105 | "'cpu'"
106 | ]
107 | },
108 | "metadata": {
109 | "tags": []
110 | },
111 | "execution_count": 2
112 | }
113 | ]
114 | },
115 | {
116 | "cell_type": "markdown",
117 | "metadata": {
118 | "id": "DhBlj7GI6Npt",
119 | "colab_type": "text"
120 | },
121 | "source": [
122 | "Define a transform to convert image to PyTorch tensor"
123 | ]
124 | },
125 | {
126 | "cell_type": "code",
127 | "metadata": {
128 | "id": "VIjSGCNv53fT",
129 | "colab_type": "code",
130 | "colab": {}
131 | },
132 | "source": [
133 | "tf = transforms.ToTensor() # convert image to PyTorch tensor"
134 | ],
135 | "execution_count": 0,
136 | "outputs": []
137 | },
138 | {
139 | "cell_type": "markdown",
140 | "metadata": {
141 | "id": "0p_SPGXQ6PaD",
142 | "colab_type": "text"
143 | },
144 | "source": [
145 | "Download training **dataset** and create `DataLoader`"
146 | ]
147 | },
148 | {
149 | "cell_type": "code",
150 | "metadata": {
151 | "id": "V5R_cuLZ53ib",
152 | "colab_type": "code",
153 | "colab": {}
154 | },
155 | "source": [
156 | "train_loader = DataLoader(datasets.MNIST('data', download=True, train=True, transform=tf),\n",
157 | " batch_size=100, \n",
158 | " shuffle=True)"
159 | ],
160 | "execution_count": 0,
161 | "outputs": []
162 | },
163 | {
164 | "cell_type": "markdown",
165 | "metadata": {
166 | "id": "enL0Q9306QBM",
167 | "colab_type": "text"
168 | },
169 | "source": [
170 | "Download validation **dataset** and create `DataLoader`\n"
171 | ]
172 | },
173 | {
174 | "cell_type": "code",
175 | "metadata": {
176 | "id": "ASnI4ZrW53lj",
177 | "colab_type": "code",
178 | "colab": {}
179 | },
180 | "source": [
181 | "test_loader = DataLoader(datasets.MNIST('data', download=True, train=False, transform=tf),\n",
182 | " batch_size=100, \n",
183 | " shuffle=True)"
184 | ],
185 | "execution_count": 0,
186 | "outputs": []
187 | },
188 | {
189 | "cell_type": "markdown",
190 | "metadata": {
191 | "id": "ttYvEnkb6Qkb",
192 | "colab_type": "text"
193 | },
194 | "source": [
195 | "We'll write a python class to define out convolutional neural network."
196 | ]
197 | },
198 | {
199 | "cell_type": "code",
200 | "metadata": {
201 | "id": "RBZtZhgy6TCk",
202 | "colab_type": "code",
203 | "colab": {}
204 | },
205 | "source": [
206 | "class TwoLayerCNN(nn.Module):\n",
207 | " def __init__(self):\n",
208 | " super().__init__()\n",
209 | " self.batchnorm = nn.BatchNorm2d(1)\n",
210 | " self.conv1 = nn.Conv2d(1,4,5) # input image channel, output channels, square kernel size\n",
211 | " self.conv2 = nn.Conv2d(4,16,5)\n",
212 | " self.fc1 = nn.Linear(16*4*4,100) # fully connected, 4x4 image size result from 2 conv layers\n",
213 | " self.fc2 = nn.Linear(100,10)\n",
214 | " \n",
215 | " def forward(self,x):\n",
216 | " x1 = self.batchnorm(x)\n",
217 | " x1 = F.max_pool2d(F.relu(self.conv1(x1)), 2)\n",
218 | " x1 = F.max_pool2d(F.relu(self.conv2(x1)), 2)\n",
219 | " x1 = x1.view(-1, self.num_flat_features(x1))\n",
220 | " x1 = F.dropout(F.relu(self.fc1(x1), 0.4))\n",
221 | " x1 = F.relu(self.fc2(x1))\n",
222 | " return x1\n",
223 | " \n",
224 | " def num_flat_features(self, x):\n",
225 | " size = x.size()[1:] # all dimensions except the batch dimension\n",
226 | " num_features = np.prod(size)\n",
227 | " return num_features"
228 | ],
229 | "execution_count": 0,
230 | "outputs": []
231 | },
232 | {
233 | "cell_type": "markdown",
234 | "metadata": {
235 | "id": "M54pEgD06RoL",
236 | "colab_type": "text"
237 | },
238 | "source": [
239 | "We create an instance of this class"
240 | ]
241 | },
242 | {
243 | "cell_type": "code",
244 | "metadata": {
245 | "id": "iVdKpsuh6TS0",
246 | "colab_type": "code",
247 | "outputId": "258fd01d-b5e0-4e50-d9fd-2655d7e04704",
248 | "colab": {
249 | "base_uri": "https://localhost:8080/",
250 | "height": 134
251 | }
252 | },
253 | "source": [
254 | "model = TwoLayerCNN().to(device)\n",
255 | "model"
256 | ],
257 | "execution_count": 7,
258 | "outputs": [
259 | {
260 | "output_type": "execute_result",
261 | "data": {
262 | "text/plain": [
263 | "TwoLayerCNN(\n",
264 | " (batchnorm): BatchNorm2d(1, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
265 | " (conv1): Conv2d(1, 4, kernel_size=(5, 5), stride=(1, 1))\n",
266 | " (conv2): Conv2d(4, 16, kernel_size=(5, 5), stride=(1, 1))\n",
267 | " (fc1): Linear(in_features=256, out_features=100, bias=True)\n",
268 | " (fc2): Linear(in_features=100, out_features=10, bias=True)\n",
269 | ")"
270 | ]
271 | },
272 | "metadata": {
273 | "tags": []
274 | },
275 | "execution_count": 7
276 | }
277 | ]
278 | },
279 | {
280 | "cell_type": "markdown",
281 | "metadata": {
282 | "id": "SFRVmzOR6SB7",
283 | "colab_type": "text"
284 | },
285 | "source": [
286 | "We'll define a template for our `fit_model` function that contains `train`, `validate`, and `accuracy` functions."
287 | ]
288 | },
289 | {
290 | "cell_type": "code",
291 | "metadata": {
292 | "id": "gZnvxqPu53rs",
293 | "colab_type": "code",
294 | "colab": {}
295 | },
296 | "source": [
297 | "def fit_model(model, loss_fn, optimizer):\n",
298 | " def train(x,y):\n",
299 | " yhat = model(x)\n",
300 | " loss = loss_fn(yhat,y)\n",
301 | " optimizer.zero_grad()\n",
302 | " loss.backward()\n",
303 | " optimizer.step()\n",
304 | " return loss.item(), accuracy(yhat,y)\n",
305 | " \n",
306 | " def validate(x,y):\n",
307 | " yhat = model(x)\n",
308 | " loss = loss_fn(yhat,y)\n",
309 | " return loss.item(), accuracy(yhat,y)\n",
310 | " \n",
311 | " def accuracy(yhat,y):\n",
312 | " probs = np.argmax(yhat.cpu().detach().numpy(), axis=1)\n",
313 | " actual = y.cpu().detach().numpy()\n",
314 | " correct = (probs == actual).sum()\n",
315 | " total = y.shape[0]\n",
316 | " return correct / total \n",
317 | " \n",
318 | " return train, validate"
319 | ],
320 | "execution_count": 0,
321 | "outputs": []
322 | },
323 | {
324 | "cell_type": "markdown",
325 | "metadata": {
326 | "id": "qCrMhx8Q6TLd",
327 | "colab_type": "text"
328 | },
329 | "source": [
330 | "We define our *loss function*, *learning rate*, and our *optimizer*. We pass this to `fit_model` to return our `train` and `validate` functions."
331 | ]
332 | },
333 | {
334 | "cell_type": "code",
335 | "metadata": {
336 | "id": "XFBR4YbD53oz",
337 | "colab_type": "code",
338 | "colab": {}
339 | },
340 | "source": [
341 | "loss_fn = nn.CrossEntropyLoss()\n",
342 | "learning_rate = 0.01\n",
343 | "optimizer = optim.Adagrad(model.parameters(), lr=learning_rate)\n",
344 | "train, validate = fit_model(model, loss_fn, optimizer)"
345 | ],
346 | "execution_count": 0,
347 | "outputs": []
348 | },
349 | {
350 | "cell_type": "markdown",
351 | "metadata": {
352 | "id": "XNVzkMZI6Tam",
353 | "colab_type": "text"
354 | },
355 | "source": [
356 | "Here is our training loop with mini-batch processing. We have to move each batch onto the GPU. We also should have a `DataLoader` for the validation dataset but we'll skip that in this case since it is so small."
357 | ]
358 | },
359 | {
360 | "cell_type": "code",
361 | "metadata": {
362 | "id": "AKk5nZjM6Ths",
363 | "colab_type": "code",
364 | "outputId": "8a867760-9fc7-45de-8398-08b25a395c4c",
365 | "colab": {
366 | "base_uri": "https://localhost:8080/",
367 | "height": 101
368 | }
369 | },
370 | "source": [
371 | "epochs = 5\n",
372 | "for epoch in range(epochs):\n",
373 | " # training \n",
374 | " losses, accuracy = [], []\n",
375 | " for i, (xbatch, ybatch) in enumerate(train_loader):\n",
376 | " xbatch = xbatch.to(device)\n",
377 | " ybatch = ybatch.to(device)\n",
378 | " loss, acc = train(xbatch, ybatch)\n",
379 | " losses.append(loss)\n",
380 | " accuracy.append(acc)\n",
381 | " training_loss = np.mean(losses)\n",
382 | " training_accuracy = np.mean(accuracy)\n",
383 | " # validation\n",
384 | " val_losses, val_accuracy = [], []\n",
385 | " for j, (xtest, ytest) in enumerate(test_loader):\n",
386 | " xtest = xtest.to(device)\n",
387 | " ytest = ytest.to(device)\n",
388 | " val_loss, val_acc = validate(xtest, ytest)\n",
389 | " val_losses.append(val_loss)\n",
390 | " val_accuracy.append(val_acc)\n",
391 | " validation_loss = np.mean(val_losses)\n",
392 | " validation_accuracy = np.mean(val_accuracy)\n",
393 | " # print intermediate results\n",
394 | " print(f'{epoch}, {training_loss:.4f}, {training_accuracy:.3f}, {validation_loss:.4f}, {validation_accuracy:.3f}')"
395 | ],
396 | "execution_count": 10,
397 | "outputs": [
398 | {
399 | "output_type": "stream",
400 | "text": [
401 | "0, 0.3363, 0.899, 0.1599, 0.954\n",
402 | "1, 0.1516, 0.956, 0.1300, 0.961\n",
403 | "2, 0.1271, 0.963, 0.1067, 0.965\n",
404 | "3, 0.1139, 0.967, 0.1046, 0.969\n",
405 | "4, 0.1044, 0.970, 0.0955, 0.972\n"
406 | ],
407 | "name": "stdout"
408 | }
409 | ]
410 | },
411 | {
412 | "cell_type": "markdown",
413 | "metadata": {
414 | "id": "1AudSc0uAqt9",
415 | "colab_type": "text"
416 | },
417 | "source": [
418 | "### nn.Sequential\n",
419 | "\n",
420 | "If we wanted to user the simpler `nn.Sequential` function, our model construction would have looked like this."
421 | ]
422 | },
423 | {
424 | "cell_type": "code",
425 | "metadata": {
426 | "id": "JlIMzvDyAq3U",
427 | "colab_type": "code",
428 | "outputId": "eb88f17a-b8e3-4089-d468-7fb01d45c00c",
429 | "colab": {
430 | "base_uri": "https://localhost:8080/",
431 | "height": 269
432 | }
433 | },
434 | "source": [
435 | "model_sequential = nn.Sequential(\n",
436 | " nn.BatchNorm2d(1),\n",
437 | " nn.Conv2d(1,4,5),\n",
438 | " nn.ReLU(),\n",
439 | " nn.MaxPool2d(2),\n",
440 | " nn.Conv2d(4,16,5),\n",
441 | " nn.ReLU(),\n",
442 | " nn.MaxPool2d(2),\n",
443 | " nn.Flatten(),\n",
444 | " nn.Linear(256,100),\n",
445 | " nn.ReLU(),\n",
446 | " nn.Dropout(0.4),\n",
447 | " nn.Linear(100,10),\n",
448 | " nn.Softmax(dim=1),\n",
449 | ").to(device)\n",
450 | "model_sequential"
451 | ],
452 | "execution_count": 11,
453 | "outputs": [
454 | {
455 | "output_type": "execute_result",
456 | "data": {
457 | "text/plain": [
458 | "Sequential(\n",
459 | " (0): BatchNorm2d(1, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
460 | " (1): Conv2d(1, 4, kernel_size=(5, 5), stride=(1, 1))\n",
461 | " (2): ReLU()\n",
462 | " (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n",
463 | " (4): Conv2d(4, 16, kernel_size=(5, 5), stride=(1, 1))\n",
464 | " (5): ReLU()\n",
465 | " (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n",
466 | " (7): Flatten()\n",
467 | " (8): Linear(in_features=256, out_features=100, bias=True)\n",
468 | " (9): ReLU()\n",
469 | " (10): Dropout(p=0.4, inplace=False)\n",
470 | " (11): Linear(in_features=100, out_features=10, bias=True)\n",
471 | " (12): Softmax(dim=1)\n",
472 | ")"
473 | ]
474 | },
475 | "metadata": {
476 | "tags": []
477 | },
478 | "execution_count": 11
479 | }
480 | ]
481 | }
482 | ]
483 | }
--------------------------------------------------------------------------------
/pytorch/Workshop_Classification.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "name": "Workshop Classification.ipynb",
7 | "provenance": [],
8 | "include_colab_link": true
9 | },
10 | "kernelspec": {
11 | "name": "python3",
12 | "display_name": "Python 3"
13 | }
14 | },
15 | "cells": [
16 | {
17 | "cell_type": "markdown",
18 | "metadata": {
19 | "id": "view-in-github",
20 | "colab_type": "text"
21 | },
22 | "source": [
23 | "
"
24 | ]
25 | },
26 | {
27 | "cell_type": "markdown",
28 | "metadata": {
29 | "id": "aa28NQ4b50Wk",
30 | "colab_type": "text"
31 | },
32 | "source": [
33 | "# Classification Problem"
34 | ]
35 | },
36 | {
37 | "cell_type": "code",
38 | "metadata": {
39 | "id": "WK-LdzWl5vH6",
40 | "colab_type": "code",
41 | "colab": {
42 | "base_uri": "https://localhost:8080/",
43 | "height": 67
44 | },
45 | "outputId": "0f560fe5-5a78-4942-950e-8f5661f81fd9"
46 | },
47 | "source": [
48 | "import torch\n",
49 | "import torch.nn as nn\n",
50 | "import torch.optim as optim\n",
51 | "import torch.nn.functional as F\n",
52 | "from torch.utils.data import TensorDataset, DataLoader\n",
53 | "import numpy as np\n",
54 | "import pandas as pd\n",
55 | "\n",
56 | "print('Torch version', torch.__version__)\n",
57 | "print('Pandas version', pd.__version__)\n",
58 | "print('Numpy version', np.__version__)"
59 | ],
60 | "execution_count": 1,
61 | "outputs": [
62 | {
63 | "output_type": "stream",
64 | "text": [
65 | "Torch version 1.3.1\n",
66 | "Pandas version 0.25.3\n",
67 | "Numpy version 1.17.4\n"
68 | ],
69 | "name": "stdout"
70 | }
71 | ]
72 | },
73 | {
74 | "cell_type": "markdown",
75 | "metadata": {
76 | "id": "AKCLbDM754c0",
77 | "colab_type": "text"
78 | },
79 | "source": [
80 | "The following should say `cuda:0`. If it does not, we need to go to *Edit* -> *Notebook settings* and change it to a `GPU` from `None`. You only have to do this once per notebook."
81 | ]
82 | },
83 | {
84 | "cell_type": "code",
85 | "metadata": {
86 | "id": "a2RWBSbo53bz",
87 | "colab_type": "code",
88 | "colab": {
89 | "base_uri": "https://localhost:8080/",
90 | "height": 34
91 | },
92 | "outputId": "81fac650-e814-4b79-f433-a47d4d089dce"
93 | },
94 | "source": [
95 | "device = 'cuda:0' if torch.cuda.is_available() else 'cpu'\n",
96 | "device"
97 | ],
98 | "execution_count": 2,
99 | "outputs": [
100 | {
101 | "output_type": "execute_result",
102 | "data": {
103 | "text/plain": [
104 | "'cpu'"
105 | ]
106 | },
107 | "metadata": {
108 | "tags": []
109 | },
110 | "execution_count": 2
111 | }
112 | ]
113 | },
114 | {
115 | "cell_type": "markdown",
116 | "metadata": {
117 | "id": "DhBlj7GI6Npt",
118 | "colab_type": "text"
119 | },
120 | "source": [
121 | "Read in dataset"
122 | ]
123 | },
124 | {
125 | "cell_type": "code",
126 | "metadata": {
127 | "id": "VIjSGCNv53fT",
128 | "colab_type": "code",
129 | "colab": {}
130 | },
131 | "source": [
132 | "df_train = pd.read_csv('https://raw.githubusercontent.com/greght/Workshop-Keras-DNN/master/ChallengeProblems/iris_training.csv', header=None)\n",
133 | "df_val = pd.read_csv('https://raw.githubusercontent.com/greght/Workshop-Keras-DNN/master/ChallengeProblems/iris_test.csv', header=None)"
134 | ],
135 | "execution_count": 0,
136 | "outputs": []
137 | },
138 | {
139 | "cell_type": "markdown",
140 | "metadata": {
141 | "id": "0p_SPGXQ6PaD",
142 | "colab_type": "text"
143 | },
144 | "source": [
145 | "Construct our x,y variables along with the training and validation dataset"
146 | ]
147 | },
148 | {
149 | "cell_type": "code",
150 | "metadata": {
151 | "id": "V5R_cuLZ53ib",
152 | "colab_type": "code",
153 | "colab": {}
154 | },
155 | "source": [
156 | "x_train = df_train.iloc[:,0:-1]\n",
157 | "y_train = df_train.iloc[:,-1]\n",
158 | "x_val = df_val.iloc[:,0:-1]\n",
159 | "y_val = df_val.iloc[:,-1]"
160 | ],
161 | "execution_count": 0,
162 | "outputs": []
163 | },
164 | {
165 | "cell_type": "markdown",
166 | "metadata": {
167 | "id": "enL0Q9306QBM",
168 | "colab_type": "text"
169 | },
170 | "source": [
171 | "Preprocess our data to go from a `pandas` DataFrame to a `numpy` array to a `torch` tensor."
172 | ]
173 | },
174 | {
175 | "cell_type": "code",
176 | "metadata": {
177 | "id": "ASnI4ZrW53lj",
178 | "colab_type": "code",
179 | "colab": {}
180 | },
181 | "source": [
182 | "xtrain = torch.tensor(x_train.to_numpy(), device=device, dtype=torch.float, requires_grad=True)\n",
183 | "ytrain = torch.tensor(y_train.to_numpy(), device=device, dtype=torch.long, requires_grad=False)\n",
184 | "xval = torch.tensor(x_val.to_numpy(), device=device, dtype=torch.float, requires_grad=True)\n",
185 | "yval = torch.tensor(y_val.to_numpy(), device=device, dtype=torch.long, requires_grad=False)"
186 | ],
187 | "execution_count": 0,
188 | "outputs": []
189 | },
190 | {
191 | "cell_type": "markdown",
192 | "metadata": {
193 | "id": "ttYvEnkb6Qkb",
194 | "colab_type": "text"
195 | },
196 | "source": [
197 | "We'll write a python class to define out neural network."
198 | ]
199 | },
200 | {
201 | "cell_type": "code",
202 | "metadata": {
203 | "id": "RBZtZhgy6TCk",
204 | "colab_type": "code",
205 | "colab": {}
206 | },
207 | "source": [
208 | "class FourLayerNN(nn.Module):\n",
209 | " def __init__(self, D_in, H1, H2, H3, D_out):\n",
210 | " super().__init__()\n",
211 | " self.linear1 = nn.Linear(D_in, H1)\n",
212 | " self.linear2 = nn.Linear(H1,H2)\n",
213 | " self.linear3 = nn.Linear(H2,H3)\n",
214 | " self.linear4 = nn.Linear(H3,D_out)\n",
215 | " \n",
216 | " def forward(self,x):\n",
217 | " h1_relu = self.linear1(x).clamp(min=0)\n",
218 | " h2_relu = self.linear2(h1_relu).clamp(min=0)\n",
219 | " h3_relu = self.linear3(h2_relu).clamp(min=0)\n",
220 | " y_pred = self.linear4(h3_relu)\n",
221 | " return y_pred"
222 | ],
223 | "execution_count": 0,
224 | "outputs": []
225 | },
226 | {
227 | "cell_type": "markdown",
228 | "metadata": {
229 | "id": "M54pEgD06RoL",
230 | "colab_type": "text"
231 | },
232 | "source": [
233 | "We create an instance of this class"
234 | ]
235 | },
236 | {
237 | "cell_type": "code",
238 | "metadata": {
239 | "id": "iVdKpsuh6TS0",
240 | "colab_type": "code",
241 | "colab": {
242 | "base_uri": "https://localhost:8080/",
243 | "height": 118
244 | },
245 | "outputId": "b83a76b4-a989-4f10-a52a-2f4857de6ed1"
246 | },
247 | "source": [
248 | "model = FourLayerNN(xtrain.shape[1],1000,500,70,y_train.nunique()).to(device)\n",
249 | "model"
250 | ],
251 | "execution_count": 7,
252 | "outputs": [
253 | {
254 | "output_type": "execute_result",
255 | "data": {
256 | "text/plain": [
257 | "FourLayerNN(\n",
258 | " (linear1): Linear(in_features=4, out_features=1000, bias=True)\n",
259 | " (linear2): Linear(in_features=1000, out_features=500, bias=True)\n",
260 | " (linear3): Linear(in_features=500, out_features=70, bias=True)\n",
261 | " (linear4): Linear(in_features=70, out_features=3, bias=True)\n",
262 | ")"
263 | ]
264 | },
265 | "metadata": {
266 | "tags": []
267 | },
268 | "execution_count": 7
269 | }
270 | ]
271 | },
272 | {
273 | "cell_type": "markdown",
274 | "metadata": {
275 | "id": "SFRVmzOR6SB7",
276 | "colab_type": "text"
277 | },
278 | "source": [
279 | "We'll define a template for our `fit_model` function that contains `train`, `validate`, and `accuracy` functions."
280 | ]
281 | },
282 | {
283 | "cell_type": "code",
284 | "metadata": {
285 | "id": "gZnvxqPu53rs",
286 | "colab_type": "code",
287 | "colab": {}
288 | },
289 | "source": [
290 | "def fit_model(model, loss_fn, optimizer):\n",
291 | " def train(x,y):\n",
292 | " yhat = model(x)\n",
293 | " loss = loss_fn(yhat,y)\n",
294 | " optimizer.zero_grad()\n",
295 | " loss.backward()\n",
296 | " optimizer.step()\n",
297 | " return loss.item(), accuracy(yhat,y)\n",
298 | " \n",
299 | " def validate(x,y):\n",
300 | " yhat = model(x)\n",
301 | " loss = loss_fn(yhat,y)\n",
302 | " return loss.item(), accuracy(yhat,y)\n",
303 | " \n",
304 | " def accuracy(yhat,y):\n",
305 | " probs = np.argmax(yhat.cpu().detach().numpy(), axis=1)\n",
306 | " actual = y.cpu().detach().numpy()\n",
307 | " correct = (probs == actual).sum()\n",
308 | " total = y.shape[0]\n",
309 | " return correct / total \n",
310 | " \n",
311 | " return train, validate"
312 | ],
313 | "execution_count": 0,
314 | "outputs": []
315 | },
316 | {
317 | "cell_type": "markdown",
318 | "metadata": {
319 | "id": "qCrMhx8Q6TLd",
320 | "colab_type": "text"
321 | },
322 | "source": [
323 | "We define our *loss function*, *learning rate*, and our *optimizer*. We pass this to `fit_model` to return our `train` and `validate` functions."
324 | ]
325 | },
326 | {
327 | "cell_type": "code",
328 | "metadata": {
329 | "id": "XFBR4YbD53oz",
330 | "colab_type": "code",
331 | "colab": {}
332 | },
333 | "source": [
334 | "loss_fn = nn.CrossEntropyLoss()\n",
335 | "learning_rate = 0.01\n",
336 | "optimizer = optim.Adagrad(model.parameters(), lr=learning_rate)\n",
337 | "train, validate = fit_model(model, loss_fn, optimizer)"
338 | ],
339 | "execution_count": 0,
340 | "outputs": []
341 | },
342 | {
343 | "cell_type": "markdown",
344 | "metadata": {
345 | "id": "ME_plDOp6Slt",
346 | "colab_type": "text"
347 | },
348 | "source": [
349 | "Define a `DataLoader` for our mini-batches."
350 | ]
351 | },
352 | {
353 | "cell_type": "code",
354 | "metadata": {
355 | "id": "5SS1NgRs6Syz",
356 | "colab_type": "code",
357 | "colab": {}
358 | },
359 | "source": [
360 | "train_data = TensorDataset(xtrain, ytrain)\n",
361 | "train_loader = DataLoader(dataset=train_data, batch_size=60, shuffle=True)"
362 | ],
363 | "execution_count": 0,
364 | "outputs": []
365 | },
366 | {
367 | "cell_type": "markdown",
368 | "metadata": {
369 | "id": "XNVzkMZI6Tam",
370 | "colab_type": "text"
371 | },
372 | "source": [
373 | "Here is our training loop with mini-batch processing. We have to move each batch onto the GPU. We also should have a `DataLoader` for the validation dataset but we'll skip that in this case since it is so small."
374 | ]
375 | },
376 | {
377 | "cell_type": "code",
378 | "metadata": {
379 | "id": "AKk5nZjM6Ths",
380 | "colab_type": "code",
381 | "colab": {
382 | "base_uri": "https://localhost:8080/",
383 | "height": 353
384 | },
385 | "outputId": "adecf0cb-e8fa-4f20-d18b-2ac258a72d8f"
386 | },
387 | "source": [
388 | "epochs = 2000\n",
389 | "for epoch in range(epochs):\n",
390 | " # training\n",
391 | " losses = []\n",
392 | " for i, (xbatch, ybatch) in enumerate(train_loader):\n",
393 | " xbatch = xbatch.to(device)\n",
394 | " ybatch = ybatch.to(device)\n",
395 | " loss, accuracy = train(xbatch, ybatch)\n",
396 | " losses.append(loss)\n",
397 | " training_loss = np.mean(losses)\n",
398 | " training_accuracy = np.mean(accuracy)\n",
399 | " # validation\n",
400 | " validation_loss, validation_accuracy = validate(xval, yval)\n",
401 | " # print intermediate results\n",
402 | " if epoch%100 == 99:\n",
403 | " print(f'{epoch}, {training_loss:.4f}, {training_accuracy:.2f}, {validation_loss:.4f}, {accuracy:.2f}')"
404 | ],
405 | "execution_count": 11,
406 | "outputs": [
407 | {
408 | "output_type": "stream",
409 | "text": [
410 | "99, 0.0790, 0.97, 0.0645, 0.97\n",
411 | "199, 0.0817, 0.97, 0.0577, 0.97\n",
412 | "299, 0.0537, 1.00, 0.0652, 1.00\n",
413 | "399, 0.0497, 0.98, 0.0516, 0.98\n",
414 | "499, 0.0403, 1.00, 0.0566, 1.00\n",
415 | "599, 0.0382, 0.98, 0.0541, 0.98\n",
416 | "699, 0.0382, 0.98, 0.0578, 0.98\n",
417 | "799, 0.0355, 0.98, 0.0596, 0.98\n",
418 | "899, 0.0338, 0.98, 0.0643, 0.98\n",
419 | "999, 0.0385, 1.00, 0.0620, 1.00\n",
420 | "1099, 0.0339, 1.00, 0.0672, 1.00\n",
421 | "1199, 0.0327, 1.00, 0.0677, 1.00\n",
422 | "1299, 0.0293, 1.00, 0.0716, 1.00\n",
423 | "1399, 0.0293, 1.00, 0.0717, 1.00\n",
424 | "1499, 0.0290, 1.00, 0.0738, 1.00\n",
425 | "1599, 0.0267, 1.00, 0.0826, 1.00\n",
426 | "1699, 0.0280, 1.00, 0.0815, 1.00\n",
427 | "1799, 0.0274, 1.00, 0.0912, 1.00\n",
428 | "1899, 0.0253, 0.98, 0.1166, 0.98\n",
429 | "1999, 0.0249, 1.00, 0.0899, 1.00\n"
430 | ],
431 | "name": "stdout"
432 | }
433 | ]
434 | },
435 | {
436 | "cell_type": "markdown",
437 | "metadata": {
438 | "id": "1AudSc0uAqt9",
439 | "colab_type": "text"
440 | },
441 | "source": [
442 | "### nn.Sequential\n",
443 | "\n",
444 | "If we wanted to user the simpler `nn.Sequential` function, our model construction would have looked like this."
445 | ]
446 | },
447 | {
448 | "cell_type": "code",
449 | "metadata": {
450 | "id": "JlIMzvDyAq3U",
451 | "colab_type": "code",
452 | "colab": {
453 | "base_uri": "https://localhost:8080/",
454 | "height": 168
455 | },
456 | "outputId": "d4987403-3fbd-48ea-bcd7-06fbbc112df7"
457 | },
458 | "source": [
459 | "model_sequential = nn.Sequential(\n",
460 | " nn.Linear(xtrain.shape[1],1000),\n",
461 | " nn.ReLU(),\n",
462 | " nn.Linear(1000,500),\n",
463 | " nn.ReLU(),\n",
464 | " nn.Linear(500,70),\n",
465 | " nn.ReLU(),\n",
466 | " nn.Linear(70,y_train.nunique()),\n",
467 | ").to(device)\n",
468 | "print(model_sequential)"
469 | ],
470 | "execution_count": 12,
471 | "outputs": [
472 | {
473 | "output_type": "stream",
474 | "text": [
475 | "Sequential(\n",
476 | " (0): Linear(in_features=4, out_features=1000, bias=True)\n",
477 | " (1): ReLU()\n",
478 | " (2): Linear(in_features=1000, out_features=500, bias=True)\n",
479 | " (3): ReLU()\n",
480 | " (4): Linear(in_features=500, out_features=70, bias=True)\n",
481 | " (5): ReLU()\n",
482 | " (6): Linear(in_features=70, out_features=3, bias=True)\n",
483 | ")\n"
484 | ],
485 | "name": "stdout"
486 | }
487 | ]
488 | }
489 | ]
490 | }
--------------------------------------------------------------------------------
/matlab/workshop_plotname.m:
--------------------------------------------------------------------------------
1 | %% UROP Matlab Workshop
2 | % @author: Alex Cao, University of Michigan
3 | % Email: caoa AT umich DOT edu
4 | % Consulting for Statistics, Computing, and Analytics Research (CSCAR)
5 | % MATLAB Version: 9.0.0.370719 (R2016a)
6 | % Operating System: Microsoft Windows 7 Enterprise Version 6.1 (Build 7601: Service Pack 1)
7 | % Java Version: Java 1.7.0_60-b19 with Oracle Corporation Java HotSpot(TM) 64-Bit Server VM mixed mode
8 |
9 | % Students can install a free version of Matlab on their PC
10 | % https://www.itcs.umich.edu/sw-info/math/MATLABStudents.html
11 |
12 | %% Start with a clean slate
13 | clear; close all
14 |
15 | %% Creating variables
16 | a = 3.14
17 | b = 'this is a string'
18 | c = [2 4;
19 | 6 8]
20 |
21 | %% Built-In functions and constants
22 | % Constant
23 | d = pi
24 | % Imaginary numbers
25 | e = sqrt(-9)
26 | % Creating imaginary numbers
27 | f = 1-2i
28 |
29 | %% Creating vectors and matrices
30 | % creating a row vector
31 | row_vector = [2 4 6 8 10]
32 | % creating a column vector
33 | col_vector = [1;
34 | 3;
35 | 5;
36 | 7;
37 | 9]
38 | % transpose
39 | row_vector = row_vector' % or row_vector = [2 4 6 8 10]'
40 | % creating a matrix
41 | matrix = [9 8 7;
42 | 6 5 4;
43 | 3 2 1]
44 | % Adding rows or columns to an existing matrix or vector
45 | v = [10 20 30]
46 | addrow = [matrix;
47 | v]
48 | addcol = [matrix v']
49 | % Deleting rows or columns from an existing matrix or vector
50 | addrow(end,:) = []
51 | addcol(:,4) = []
52 |
53 | %% Selecting and accessing data
54 | % Select column(s) of data
55 | a = matrix(:,1)
56 | % To select multiple columns
57 | b = matrix(:,2:3)
58 | % Columns do not even have to be continuous
59 | c = matrix(:,[3 1])
60 | % Exact same thing for rows
61 | d = matrix(1,:)
62 | e = matrix(2:3,:)
63 | f = matrix([3 1],:)
64 |
65 | %% Plotting
66 | M = magic(3) % magic square
67 | plot(M(:,1),M(:,2),'o-')
68 |
69 | %% Exercise 1 (5 minutes)
70 | % Task 1: Construct a matrix of points to spell out the first letter of your name
71 | % Task 2: Plot the letter
72 | % For example,
73 | % Task 1
74 | A = [0 0;
75 | 1 4;
76 | 2 0;
77 | 1.5 2;
78 | 0.5 2];
79 | % Task 2
80 | plot(A(:,1),A(:,2),'x-')
81 |
82 | %% Running external Matlab programs
83 | % Just type the name of the m-file (should not have any spaces)
84 | % Run letters m-file to get custom block font alphabet by author
85 | letters
86 |
87 | %% We will plot our name in Matlab
88 | % Grab your letters from the alphabet (cell array) using the index number
89 | A = alphabet{1};
90 | L = alphabet{12};
91 | E = alphabet{5};
92 | X = alphabet{24};
93 |
94 | %% Matrix
95 | % Letters are stored as a Nx2 matrix
96 | % First column are the x-coordinates
97 | % Second column are the y-coordinates
98 | A
99 |
100 | %% Plotting your name
101 |
102 | % Create a new cell array variable with our letters
103 | name = {A,L,E,X};
104 | % Close previous figure
105 | close
106 | % Open new figure
107 | figure(1)
108 | % Iterate through the letters using a for loop
109 | for i = 1:length(name)
110 | % Grab a letter
111 | letter = name{i};
112 | % Get x and y column
113 | x = letter(:,1);
114 | y = letter(:,2);
115 | % Plot letter with a blue line
116 | plot(x,y,'b-');
117 | % Set axis limits
118 | ylim([-1 5])
119 | axis equal
120 | % Do not overwrite previous plots
121 | hold on
122 | end
123 |
124 | % Create labels
125 | xlabel('x-axis')
126 | ylabel('y-axis')
127 | title('Plotting My Name')
128 |
129 | %%
130 | % In order to see all the letters clearly, we need to offset the letters
131 | % We'll use matrix addition/subtraction to create the offset
132 | % Creating a constant offset is easy
133 | close; figure(2)
134 | for i = 1:length(name)
135 | letter = name{i};
136 | % add offset to the x-coordinate based on letter position
137 | x = letter(:,1) + i*2.5;
138 | y = letter(:,2);
139 | % plot letter with red dash dot line and circle markers
140 | plot(x,y,'r-.o');
141 | hold on
142 | end
143 | % Alternate way to set axis limits
144 | axis([-1 15 -1 5])
145 |
146 | % See the following URLs for different point and line options
147 | % http://www.mathworks.com/help/matlab/ref/plot.html#inputarg_LineSpec
148 |
149 | %%
150 | % You can also add a vector or matrix (instead of a constant) to a matrix
151 | % (i.e. letter)
152 | F = alphabet{6}
153 | plot(F(:,1),F(:,2),'g','linewidth',2)
154 | %%
155 | % Here we add a vector to change the first row (i.e. bottom point)
156 | F(1,:) = F(1,:)+[1 1]
157 | % plot a green line with a linewidth of 2
158 | plot(F(:,1),F(:,2),'g','linewidth',2)
159 |
160 | %%
161 | % We can also scale the letters so that they are smaller or bigger
162 | % We'll use matrix element multiplication to accomplish the scaling
163 | close; figure(3)
164 | for i = 1:length(name)
165 | letter = name{i};
166 | % same x-offset as before
167 | x = letter(:,1) + i*2.5;
168 | % scale the y-coordinate by multiplication of an exponential
169 | y = letter(:,2) * exp(+i/5);
170 | % plot black line with diamond markers
171 | plot(x,y,'k-d');
172 | hold on
173 | end
174 | % Alternate way to set axis limits
175 | xlim([0 15])
176 | ylim([-1 10])
177 |
178 | %% Exercise 2 (5 minutes)
179 | % Task 1: Copy the code section above
180 | % Task 2: Plot your name vertically by using matrix addition/subtraction
181 | % Task 3: Shrink the letters in your name by using matrix-element
182 | % multiplication/division and re-plot it
183 |
184 | %% Animation
185 | % Here's how to animate the letters sequentially
186 | close; figure(4)
187 | axis([-1,15,-1,10])
188 |
189 | % Set time delay between drawing lines
190 | time_delay = 0.5;
191 |
192 | % Create empty cell array for animated objects
193 | object = {};
194 | % Iterate thru the letters
195 | for i = 1:length(name)
196 | % Create animated lined object for each letter and save it to cell array
197 | object{i} = animatedline;
198 | letter = name{i};
199 | x = letter(:,1) + i*2.5;
200 | y = letter(:,2) * exp(+i/5);
201 | % Iterate through each point defining our letter and draw it
202 | for j = 1:length(letter)
203 | addpoints(object{i},x(j),y(j));
204 | drawnow
205 | pause(time_delay)
206 | end
207 | end
208 |
209 | %%
210 | % To produce smoother animation, we need more points to plot
211 | % Make lines with more points (say 100)
212 | num_of_pts = 100;
213 | % Create an evenly spaced vector using linspace
214 | % linspace(start,end,number of points)
215 | x1 = linspace(0,1,num_of_pts);
216 | y1 = linspace(0,4,num_of_pts);
217 | x2 = linspace(1,2,num_of_pts);
218 | y2 = linspace(4,0,num_of_pts);
219 | x3 = linspace(1.5,0.5,num_of_pts);
220 | y3 = linspace(2,2,num_of_pts);
221 |
222 | % quote symbol does a transpose of the matrix
223 | % we want to convert from a row vector to a column vector
224 | % we concatenate the vectors side by side and then on top of each
225 | % other
226 | A = [x1' y1';
227 | x2' y2';
228 | x3' y3'];
229 |
230 | % get size of A
231 | size(A)
232 |
233 | %% Exercise 3 (5 minutes)
234 | % Task 1: Similar to the code section above, construct a matrix for the
235 | % letter T using linspace with 100 pts. Hint: You need to create x1, y1,
236 | % x2, y2 for the vertical and horizontal lines
237 | % Task 2: Plot the matrix using x markers (e.g. plot(x,y,'x') ) and set the
238 | % axis so that the letter is not touching a border
239 |
240 | %% Redraw letter A with more points and no time delay
241 | close; figure(5)
242 | hA = animatedline;
243 | axis([-1,12,-1,5])
244 |
245 | for k = 1:length(A)
246 | addpoints(hA,A(k,1),A(k,2));
247 | drawnow
248 | end
249 |
250 | %% Smoother Animation
251 | % I've written a matlab function gen_more_pts.m to add more points to
252 | % letters for you. Let's use it to animate our names.
253 | close; figure(6)
254 | axis([-1,15,-1,10])
255 |
256 | % Create empty cell array for animated objects
257 | object = {};
258 | % For loop for adding points to a line
259 | for i = 1:length(name)
260 | % Create animated lined object for each letter and save it to cell array
261 | object{i} = animatedline;
262 | letter = name{i};
263 | % gen_more_pts function creates more points for uss
264 | animate_letter = gen_more_pts(letter);
265 | x = animate_letter(:,1) + i*2.5;
266 | y = animate_letter(:,2) * exp(+i/5);
267 | for j = 1:length(animate_letter)
268 | addpoints(object{i},x(j),y(j));
269 | drawnow
270 | end
271 | end
272 |
273 | %% Exercise 4 (5 minutes)
274 | % Task 1: Copy the code section above
275 | % Task 2: Animate the vertical version of your name
276 |
277 | %% Plot Attributes
278 | % You can change the look of your lines after they are plotted by accessing
279 | % their attributes such as Color or LineWidth or LineStyle
280 | % To get a list of a plot attributes, use the get command
281 | get(object{1})
282 | % You can also type "object{1}." followed by a tab to get a dropdown list
283 | % To make something invisible use the Visible attribute
284 | object{1}.Visible = 'off'
285 | % To make something visible again
286 | object{1}.Visible = 'on'
287 | % If you don't know what options are available to you for a specific
288 | % attribute, you can use the set command
289 | set(object{1})
290 |
291 | %% Generating Random Numbers and using a random seed
292 | % We will use random numbers to randomly change the attributes of our plot
293 | % Use a seed so that you get a predictable sequence of numbers
294 | % rng(56789)
295 | linestyle_options = {'-','--',':','-.'};
296 | for i = 1:12
297 | % generate one random integer for which letter to modify
298 | n = randi(length(name),1);
299 | % generate a 3x1 vector of random numbers from (0,1)
300 | color = rand(3,1)
301 | object{n}.Color = color;
302 | % generate one random integer for the linewidth
303 | object{n}.LineWidth = randi(10,1);
304 | % generate one random integer for the linestyle
305 | index = randi(length(linestyle_options),1);
306 | object{n}.LineStyle = linestyle_options{index};
307 | pause(1)
308 | end
309 |
310 | %% Exercise 5 (5 minutes)
311 | % Task 1: Generate a 4x1 vector of random integers from 1 to 10
312 | % Task 2: Take the sum of it
313 | % Task 3: Repeat 1 & 2
314 | % Task 4: Set a seed for the random generator using your favourite number
315 | % Task 5: Redo 1,2,3
316 |
317 | %% Let's redraw our name
318 | close; figure(7)
319 | for i = 1:length(name)
320 | letter = name{i};
321 | animate_letter = gen_more_pts(letter);
322 | x = animate_letter(:,1) + i*2.5;
323 | y = animate_letter(:,2);
324 | % plot letters as magenta line with pentagon markers
325 | plot(x,y,'m-p');
326 | hold on
327 | end
328 | % Alternate way to set axis limits
329 | axis([0 15 -1 5])
330 |
331 | %% Now suppose we wanted to cut our name (i.e. points) into half
332 | % We can segment our name using logical indexing
333 | close; figure(8)
334 | y_cutoff = 2.5;
335 | for i = 1:length(name)
336 | letter = name{i};
337 | animate_letter = gen_more_pts(letter);
338 | % original matrix size
339 | disp(size(animate_letter))
340 | % generate boolean of pts meeting criterion
341 | index = animate_letter(:,2) < y_cutoff;
342 | % grab matching pts using indices
343 | animate_letter = animate_letter(index,:);
344 | % new matrix size (should be smaller)
345 | disp(size(animate_letter))
346 | x = animate_letter(:,1) + i*2.5;
347 | y = animate_letter(:,2);
348 | plot(x,y,'m-p');
349 | hold on
350 | end
351 | axis([0 15 -1 5])
352 |
353 | %% You can use more than one logical operation at a time
354 | close; figure(9)
355 | % & means AND
356 | % | means OR
357 | y_cutoff = 2.5;
358 | for i = 1:length(name)
359 | letter = name{i};
360 | animate_letter = gen_more_pts(letter);
361 | size(animate_letter)
362 | % AND statement joining two criteria
363 | index = (animate_letter(:,2) < y_cutoff) & (animate_letter(:,2) > 1.25);
364 | animate_letter = animate_letter(index,:);
365 | size(animate_letter)
366 | x = animate_letter(:,1) + i*2.5;
367 | y = animate_letter(:,2);
368 | plot(x,y,'m-p');
369 | hold on
370 | end
371 | axis([0 15 -1 5])
372 |
373 | %% Exercise 6 (5 minutes)
374 | % Task 1: Copy the code section above
375 | % Task 2: Only show the portion of your name that is less than 1 or greater
376 | % than 2 on the y-axis
377 |
378 | %% Import Data Demo
379 | % There are many functions to import data into Matlab from external sources
380 | % Some choices are: uiimport, load, importdata, textscan, dlmread, fread,
381 | % fscanf, readtable, xlsread
382 | %
383 | % The most friendly method to beginners is uiimport which acts like excel
384 | uiimport('crash.txt')
385 |
386 | %% Exercise 7 (10 minutes)
387 | % Task 1: Import the CrashSeverity column into the workspace
388 | % Task 2: Extract the fatal crashes (value = 1) using logical indexing
389 | % Task 3: Count how many fatal crashes there are in dataset
390 | % Task 4: Import the Longitude/Latitude columns into the workspace
391 | % Task 5: Plot Longitude/Latitude coordinates using any triangle marker.
392 | % Are there any bad data points?
393 | % Tip: Longitude should be negative in this case.
394 | % Task 6: Remove the bad points using logical indexing and re-plot the
395 | % coordinates using a triangle marker
396 |
397 |
398 | %% Some useful Matlab commands to know
399 | % Saving your work
400 | % Saving variables in your workspace
401 | save workshop.mat
402 | % Clear the workspace
403 | clear
404 | % Reload everything
405 | load workshop.mat
406 | % If you just want to save a couple of variables
407 | save workshop X E L A
408 | % close last figure
409 | close
410 | % close all figures
411 | close all
412 | % clear command window
413 | clc
414 | % bring up command history
415 | commandhistory
416 | % Last unassigned answer in command window
417 | ans
418 |
419 | %% Formatting output
420 | z = 1534513546
421 | % To change the look of the output, use the format function
422 | format longg
423 | z
424 | % To change back to the default format
425 | format
426 |
427 | %% Getting Help
428 | % help for a function
429 | help plot
430 | doc plot
431 | % Bring up Matlab examples
432 | demo
433 | % You can also use the search bar in the top right corner or use the *?*
434 | % icon next to it to open up an equivalent window
435 |
436 | %% References
437 | % MathWork (makers of Matlab) Resources
438 |
439 | % Matlab tutorials from MathWorks
440 | % https://www.mathworks.com/support/learn-with-matlab-tutorials.html
441 | % http://www.mathworks.com/help/matlab/getting-started-with-matlab.html
442 |
443 | % Matlab Forum for Q&A
444 | % http://www.mathworks.com/matlabcentral/answers/
445 |
446 | % Cody: Challenge yourself to Matlab coding problems
447 | % http://www.mathworks.com/matlabcentral/cody
448 |
449 | % PDF tutorial
450 | % https://www.mathworks.com/help/pdf_doc/matlab/getstart.pdf
451 |
452 | % 3rd Party Add-Ons
453 | % http://www.mathworks.com/matlabcentral/fileexchange/
454 |
455 | % Matlab Blogs
456 | % http://blogs.mathworks.com Matlab Blog
457 |
458 | % Matlab Toolboxes
459 | % https://www.mathworks.com/products/
460 |
461 | % To see what is installed on your version of Matlab, use the ver
462 | % command
463 | ver
464 |
465 | %% Other Matlab Resources
466 |
467 | % Interactive course by the University of Edinburgh
468 | % http://www.see.ed.ac.uk/teaching/courses/matlab/
469 |
470 | % Free online book
471 | % http://greenteapress.com/matlab/
472 |
473 |
474 | %% Other Fun Stuff
475 |
476 | %% Alternate way to do animation
477 | % Rotate our name
478 | % Let's plot our name again
479 | % The plot command will be outside the for loop this time
480 | close; figure(100)
481 | alex = [];
482 | for i = 1:length(name)
483 | letter = name{i};
484 | x = letter(:,1) + i*2.5;
485 | y = letter(:,2);
486 | alex = [alex; x y];
487 | end
488 | hAlex = plot(alex(:,1),alex(:,2),'linewidth',2,'color',[0.7 0.2 0.5]);
489 | axis([-12 12 -12 12])
490 |
491 | %%
492 | % Set the DataSource attribute to this variable
493 | hAlex.XDataSource = 'rotateAlex(:,1)';
494 | hAlex.YDataSource = 'rotateAlex(:,2)';
495 | % Create an evenly spaced vector from 0 to 2*pi for rotation
496 | th = linspace(0,2*pi,500);
497 |
498 | %% Rotate about z-axis
499 | for i = 1:length(th)
500 | % Angle
501 | theta = th(i);
502 | % Rotation matrix about z-axis
503 | Rz = [cos(theta) -sin(theta);
504 | sin(theta) cos(theta)];
505 | % Matrix multiplication of rotation matrix with name points
506 | rotateAlex = (Rz*alex')';
507 | % Update figure handle
508 | refreshdata(hAlex)
509 | % Pause in seconds
510 | pause(0.01)
511 | end
512 |
513 | %% Center my name around the origin
514 | % use repmat to duplicate 2x1 vector
515 | alex2 = alex - repmat(mean(alex),size(alex,1),1);
516 | % Add the z-value of zero to my name points
517 | alex2 = [alex2 zeros(size(alex2,1),1)];
518 |
519 | %% Rotate about y-axis
520 | for i = 1:length(th)
521 | theta = th(i);
522 | Ry = [cos(theta) 0 sin(theta);
523 | 0 1 0;
524 | -sin(theta) 0 cos(theta)];
525 | rotateAlex = (Ry*alex2')';
526 | refreshdata(hAlex)
527 | pause(0.01)
528 | end
529 |
530 | %% Rotate about x-axis
531 | % Move my name around some more
532 | alex2(:,2) = alex2(:,2) + min(alex2(:,2));
533 | for i = 1:length(th)
534 | theta = th(i);
535 | Rx = [1 0 0;
536 | 0 cos(theta) -sin(theta);
537 | 0 sin(theta) cos(theta)];
538 | rotateAlex = (Rx*alex2')';
539 | refreshdata(hAlex)
540 | pause(0.01)
541 | end
542 |
543 | %% Animation of a helix
544 | n = 5000; % determines how many pts to draw
545 | xc = 3; yc = 3;
546 | r = linspace(1,6,n); % radius
547 | t = linspace(0,12*pi,n); % how many loops to make
548 | x = 0.8*r.*cos(t) + xc;
549 | y = r.*sin(t) + yc;
550 | z = linspace(0,5,n);
551 | v = linspace(0.001,1,n);
552 | close all; figure(101)
553 | h = animatedline;
554 | axis([-10,10,-10,10,0 5])
555 | grid on
556 | xlabel('X'); ylabel('Y'); zlabel('Z')
557 | for k = 1:n
558 | h.LineWidth = (v(k)+1)*4;
559 | h.Color = [v(k) 1-v(k) v(k)];
560 | addpoints(h,x(k),y(k),z(k));
561 | % Set viewing angle
562 | view(-mod(k/120,90),90-mod(k/72,70))
563 | drawnow
564 | end
565 |
--------------------------------------------------------------------------------
/pytorch/Workshop_Regression_Class.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "name": "Workshop Regression Class",
7 | "provenance": [],
8 | "collapsed_sections": [],
9 | "include_colab_link": true
10 | },
11 | "kernelspec": {
12 | "name": "python3",
13 | "display_name": "Python 3"
14 | },
15 | "accelerator": "GPU"
16 | },
17 | "cells": [
18 | {
19 | "cell_type": "markdown",
20 | "metadata": {
21 | "id": "view-in-github",
22 | "colab_type": "text"
23 | },
24 | "source": [
25 | "
"
26 | ]
27 | },
28 | {
29 | "cell_type": "markdown",
30 | "metadata": {
31 | "id": "7G_TdHMkSL8q",
32 | "colab_type": "text"
33 | },
34 | "source": [
35 | "**Regression Problem**"
36 | ]
37 | },
38 | {
39 | "cell_type": "code",
40 | "metadata": {
41 | "id": "GWhz8RPhRfF1",
42 | "colab_type": "code",
43 | "outputId": "fb2e7b2d-d11b-4eef-fc81-62d6a7bbb725",
44 | "colab": {
45 | "base_uri": "https://localhost:8080/",
46 | "height": 67
47 | }
48 | },
49 | "source": [
50 | "import torch\n",
51 | "import torch.nn as nn\n",
52 | "import torch.optim as optim\n",
53 | "import torch.nn.functional as F\n",
54 | "from torch.utils.data import TensorDataset, DataLoader\n",
55 | "import numpy as np\n",
56 | "import pandas as pd\n",
57 | "\n",
58 | "print('Torch version', torch.__version__)\n",
59 | "print('Pandas version', pd.__version__)\n",
60 | "print('Numpy version', np.__version__)"
61 | ],
62 | "execution_count": 2,
63 | "outputs": [
64 | {
65 | "output_type": "stream",
66 | "text": [
67 | "Torch version 1.3.1\n",
68 | "Pandas version 0.25.3\n",
69 | "Numpy version 1.17.4\n"
70 | ],
71 | "name": "stdout"
72 | }
73 | ]
74 | },
75 | {
76 | "cell_type": "markdown",
77 | "metadata": {
78 | "id": "d80zm5dOSsOr",
79 | "colab_type": "text"
80 | },
81 | "source": [
82 | "The following should say `cuda:0`. If it does not, we need to go to *Edit* -> *Notebook settings* and change it to a `GPU` from `None`. You only have to do this once per notebook."
83 | ]
84 | },
85 | {
86 | "cell_type": "code",
87 | "metadata": {
88 | "id": "ga1yyVAfRgK3",
89 | "colab_type": "code",
90 | "outputId": "87b9a739-2cf0-4f10-f112-544c6bf05edf",
91 | "colab": {
92 | "base_uri": "https://localhost:8080/",
93 | "height": 34
94 | }
95 | },
96 | "source": [
97 | "device = 'cuda:0' if torch.cuda.is_available() else 'cpu'\n",
98 | "device"
99 | ],
100 | "execution_count": 3,
101 | "outputs": [
102 | {
103 | "output_type": "execute_result",
104 | "data": {
105 | "text/plain": [
106 | "'cuda:0'"
107 | ]
108 | },
109 | "metadata": {
110 | "tags": []
111 | },
112 | "execution_count": 3
113 | }
114 | ]
115 | },
116 | {
117 | "cell_type": "markdown",
118 | "metadata": {
119 | "id": "rW2RnKe3hvmh",
120 | "colab_type": "text"
121 | },
122 | "source": [
123 | "Read in dataset"
124 | ]
125 | },
126 | {
127 | "cell_type": "code",
128 | "metadata": {
129 | "id": "3U_r7UGpRf-g",
130 | "colab_type": "code",
131 | "colab": {}
132 | },
133 | "source": [
134 | "df_train = pd.read_csv('https://raw.githubusercontent.com/greght/Workshop-Keras-DNN/master/ChallengeProblems/dataRegression_train.csv', header=None)\n",
135 | "df_val = pd.read_csv('https://raw.githubusercontent.com/greght/Workshop-Keras-DNN/master/ChallengeProblems/dataRegression_test.csv', header=None)"
136 | ],
137 | "execution_count": 0,
138 | "outputs": []
139 | },
140 | {
141 | "cell_type": "markdown",
142 | "metadata": {
143 | "id": "okdjDnbphzjK",
144 | "colab_type": "text"
145 | },
146 | "source": [
147 | "Construct our x,y variables along with the training and validation dataset"
148 | ]
149 | },
150 | {
151 | "cell_type": "code",
152 | "metadata": {
153 | "id": "7EBgffu2RgG_",
154 | "colab_type": "code",
155 | "colab": {}
156 | },
157 | "source": [
158 | "x_train = df_train.iloc[:,0:2]\n",
159 | "y_train = df_train.iloc[:,2]\n",
160 | "x_val = df_val.iloc[:,0:2]\n",
161 | "y_val = df_val.iloc[:,2]"
162 | ],
163 | "execution_count": 0,
164 | "outputs": []
165 | },
166 | {
167 | "cell_type": "markdown",
168 | "metadata": {
169 | "id": "7D4h_C16gcjG",
170 | "colab_type": "text"
171 | },
172 | "source": [
173 | "Preprocess our data to go from a `pandas` DataFrame to a `numpy` array to a `torch` tensor."
174 | ]
175 | },
176 | {
177 | "cell_type": "code",
178 | "metadata": {
179 | "id": "vjq5O0XfRmPv",
180 | "colab_type": "code",
181 | "colab": {}
182 | },
183 | "source": [
184 | "x_train_tensor = torch.tensor(x_train.to_numpy(), device=device, dtype=torch.float, requires_grad=True)\n",
185 | "y_train_tensor = torch.tensor(y_train.to_numpy(), device=device, dtype=torch.float, requires_grad=True)\n",
186 | "x_val_tensor = torch.tensor(x_val.to_numpy(), device=device, dtype=torch.float, requires_grad=True)\n",
187 | "y_val_tensor = torch.tensor(y_val.to_numpy(), device=device, dtype=torch.float, requires_grad=True)\n",
188 | "y_train_tensor = y_train_tensor.view(-1,1)\n",
189 | "y_val_tensor = y_val_tensor.view(-1,1)"
190 | ],
191 | "execution_count": 0,
192 | "outputs": []
193 | },
194 | {
195 | "cell_type": "markdown",
196 | "metadata": {
197 | "id": "H-JVIuXegeix",
198 | "colab_type": "text"
199 | },
200 | "source": [
201 | "We'll write a python class to define out neural network."
202 | ]
203 | },
204 | {
205 | "cell_type": "code",
206 | "metadata": {
207 | "id": "33HwoaxDR-mx",
208 | "colab_type": "code",
209 | "colab": {}
210 | },
211 | "source": [
212 | "class ThreeLayerNN(nn.Module):\n",
213 | " def __init__(self, dim_input, H):\n",
214 | " super().__init__()\n",
215 | " self.fc1 = nn.Linear(dim_input, H)\n",
216 | " self.fc2 = nn.Linear(H,H)\n",
217 | " self.fc3 = nn.Linear(H,1)\n",
218 | " \n",
219 | " def forward(self, x):\n",
220 | " x1 = F.relu(self.fc1(x))\n",
221 | " x2 = F.relu(self.fc2(x1))\n",
222 | " y_pred = self.fc3(x2)\n",
223 | " return y_pred"
224 | ],
225 | "execution_count": 0,
226 | "outputs": []
227 | },
228 | {
229 | "cell_type": "markdown",
230 | "metadata": {
231 | "id": "NRH6Qp9VglBx",
232 | "colab_type": "text"
233 | },
234 | "source": [
235 | "We create an instance of this class."
236 | ]
237 | },
238 | {
239 | "cell_type": "code",
240 | "metadata": {
241 | "id": "aqnHMyc9R-xI",
242 | "colab_type": "code",
243 | "outputId": "a3446684-71c0-4531-9bf7-3e544230f18d",
244 | "colab": {
245 | "base_uri": "https://localhost:8080/",
246 | "height": 101
247 | }
248 | },
249 | "source": [
250 | "model = ThreeLayerNN(x_train_tensor.shape[1],5).to(device)\n",
251 | "print(model)"
252 | ],
253 | "execution_count": 8,
254 | "outputs": [
255 | {
256 | "output_type": "stream",
257 | "text": [
258 | "ThreeLayerNN(\n",
259 | " (fc1): Linear(in_features=2, out_features=5, bias=True)\n",
260 | " (fc2): Linear(in_features=5, out_features=5, bias=True)\n",
261 | " (fc3): Linear(in_features=5, out_features=1, bias=True)\n",
262 | ")\n"
263 | ],
264 | "name": "stdout"
265 | }
266 | ]
267 | },
268 | {
269 | "cell_type": "markdown",
270 | "metadata": {
271 | "id": "ryc3EnW4RwqI",
272 | "colab_type": "text"
273 | },
274 | "source": [
275 | "`model.parameters()` contains the **weights** and **bias** (alternating) for each of the 3 layers\n",
276 | "\n"
277 | ]
278 | },
279 | {
280 | "cell_type": "code",
281 | "metadata": {
282 | "id": "1-VGjPHeRmWH",
283 | "colab_type": "code",
284 | "outputId": "10d21071-3079-4923-d9cf-3755f6242b22",
285 | "colab": {
286 | "base_uri": "https://localhost:8080/",
287 | "height": 437
288 | }
289 | },
290 | "source": [
291 | "params = list(model.parameters())\n",
292 | "print(f'There are {len(params)} parameters')\n",
293 | "for param in params:\n",
294 | " print(param)"
295 | ],
296 | "execution_count": 9,
297 | "outputs": [
298 | {
299 | "output_type": "stream",
300 | "text": [
301 | "There are 6 parameters\n",
302 | "Parameter containing:\n",
303 | "tensor([[-0.6722, -0.1253],\n",
304 | " [ 0.3271, -0.5386],\n",
305 | " [-0.4360, -0.6635],\n",
306 | " [-0.0597, 0.2654],\n",
307 | " [-0.4511, -0.1803]], device='cuda:0', requires_grad=True)\n",
308 | "Parameter containing:\n",
309 | "tensor([ 0.4774, 0.0608, 0.3351, 0.6132, -0.1335], device='cuda:0',\n",
310 | " requires_grad=True)\n",
311 | "Parameter containing:\n",
312 | "tensor([[-0.4279, 0.0746, -0.2874, -0.4331, 0.0757],\n",
313 | " [-0.1138, -0.2704, 0.0156, 0.3182, 0.1802],\n",
314 | " [ 0.1589, -0.3853, 0.0769, 0.0236, 0.2774],\n",
315 | " [ 0.4160, 0.0268, 0.0658, 0.0249, 0.0023],\n",
316 | " [-0.1503, 0.1482, -0.0260, 0.2199, 0.2633]], device='cuda:0',\n",
317 | " requires_grad=True)\n",
318 | "Parameter containing:\n",
319 | "tensor([ 0.1400, 0.2608, 0.2217, -0.2910, 0.0465], device='cuda:0',\n",
320 | " requires_grad=True)\n",
321 | "Parameter containing:\n",
322 | "tensor([[ 0.1069, 0.0756, -0.3563, 0.3523, -0.4246]], device='cuda:0',\n",
323 | " requires_grad=True)\n",
324 | "Parameter containing:\n",
325 | "tensor([-0.3458], device='cuda:0', requires_grad=True)\n"
326 | ],
327 | "name": "stdout"
328 | }
329 | ]
330 | },
331 | {
332 | "cell_type": "markdown",
333 | "metadata": {
334 | "id": "VQffaw77ft98",
335 | "colab_type": "text"
336 | },
337 | "source": [
338 | "We'll define a template for our `fit_model` function that contains `train` and `validate` functions.\n",
339 | "\n",
340 | "---\n",
341 | "\n"
342 | ]
343 | },
344 | {
345 | "cell_type": "code",
346 | "metadata": {
347 | "id": "amLbK4yBRmfg",
348 | "colab_type": "code",
349 | "colab": {}
350 | },
351 | "source": [
352 | "def fit_model(model, loss_fn, optimizer):\n",
353 | " def train(x,y):\n",
354 | " yhat = model(x)\n",
355 | " loss = loss_fn(yhat,y)\n",
356 | " optimizer.zero_grad()\n",
357 | " loss.backward()\n",
358 | " optimizer.step()\n",
359 | " return loss.item()\n",
360 | " \n",
361 | " def validate(x,y):\n",
362 | " yhat = model(x)\n",
363 | " loss = loss_fn(yhat,y)\n",
364 | " return loss.item()\n",
365 | " \n",
366 | " return train, validate"
367 | ],
368 | "execution_count": 0,
369 | "outputs": []
370 | },
371 | {
372 | "cell_type": "markdown",
373 | "metadata": {
374 | "id": "cKdszOgAguKD",
375 | "colab_type": "text"
376 | },
377 | "source": [
378 | "We define our *loss function*, *learning rate*, and our *optimizer*. We pass this to `fit_model` to return our `train` and `validate` functions.\n"
379 | ]
380 | },
381 | {
382 | "cell_type": "code",
383 | "metadata": {
384 | "id": "eh_iIPQnSD40",
385 | "colab_type": "code",
386 | "colab": {}
387 | },
388 | "source": [
389 | "loss_fn = nn.MSELoss(reduction='mean') #default\n",
390 | "learning_rate = 0.1\n",
391 | "optimizer = optim.Adagrad(model.parameters(), lr=learning_rate)\n",
392 | "train, validate = fit_model(model, loss_fn, optimizer)"
393 | ],
394 | "execution_count": 0,
395 | "outputs": []
396 | },
397 | {
398 | "cell_type": "markdown",
399 | "metadata": {
400 | "id": "W0uIChFNfa2c",
401 | "colab_type": "text"
402 | },
403 | "source": [
404 | "## Mini-batches\n",
405 | "From the documentation: `torch.nn` only supports mini-batches. The entire `torch.nn` package only supports inputs that are a mini-batch of samples, and not a single sample."
406 | ]
407 | },
408 | {
409 | "cell_type": "code",
410 | "metadata": {
411 | "id": "09NsOy59SD8J",
412 | "colab_type": "code",
413 | "colab": {}
414 | },
415 | "source": [
416 | "train_data = TensorDataset(x_train_tensor, y_train_tensor)\n",
417 | "train_loader = DataLoader(dataset=train_data, batch_size=10, shuffle=True)"
418 | ],
419 | "execution_count": 0,
420 | "outputs": []
421 | },
422 | {
423 | "cell_type": "markdown",
424 | "metadata": {
425 | "id": "y748bWpQg_5x",
426 | "colab_type": "text"
427 | },
428 | "source": [
429 | "Here is our training loop with mini-batch processing. We have to move each mini-batch onto the GPU."
430 | ]
431 | },
432 | {
433 | "cell_type": "code",
434 | "metadata": {
435 | "id": "fexqm4D9SHyh",
436 | "colab_type": "code",
437 | "outputId": "5c42c58a-b7d7-4c6a-9c70-e1e2d084ea12",
438 | "colab": {
439 | "base_uri": "https://localhost:8080/",
440 | "height": 185
441 | }
442 | },
443 | "source": [
444 | "epochs = 100\n",
445 | "for epoch in range(epochs):\n",
446 | " # training\n",
447 | " losses = []\n",
448 | " for i, (xbatch, ybatch) in enumerate(train_loader):\n",
449 | " xbatch = xbatch.to(device)\n",
450 | " ybatch = ybatch.to(device)\n",
451 | " loss = train(xbatch, ybatch)\n",
452 | " losses.append(loss)\n",
453 | " training_loss = np.mean(losses)\n",
454 | " # validation\n",
455 | " validation_loss = validate(x_val_tensor, y_val_tensor)\n",
456 | " # print intermediate results\n",
457 | " if epoch%10 == 9:\n",
458 | " print(epoch, training_loss, validation_loss)"
459 | ],
460 | "execution_count": 13,
461 | "outputs": [
462 | {
463 | "output_type": "stream",
464 | "text": [
465 | "9 5.217282251878218 8.100061416625977\n",
466 | "19 4.6458352262323555 6.509875774383545\n",
467 | "29 4.617666352878917 6.0749030113220215\n",
468 | "39 4.465590021827004 5.876566410064697\n",
469 | "49 4.46304219419306 5.840087413787842\n",
470 | "59 4.436497558246959 5.683042049407959\n",
471 | "69 4.447906385768544 5.73892068862915\n",
472 | "79 4.456741766496138 5.724264144897461\n",
473 | "89 4.4289374351501465 5.7146830558776855\n",
474 | "99 4.434686617417769 5.704777717590332\n"
475 | ],
476 | "name": "stdout"
477 | }
478 | ]
479 | },
480 | {
481 | "cell_type": "markdown",
482 | "metadata": {
483 | "id": "wri-bxVPhPHB",
484 | "colab_type": "text"
485 | },
486 | "source": [
487 | "We can view the current state of our model using the `state_dict` method."
488 | ]
489 | },
490 | {
491 | "cell_type": "code",
492 | "metadata": {
493 | "id": "xmiD0CQvSH2D",
494 | "colab_type": "code",
495 | "outputId": "997bf898-5732-4966-dee0-88b41b452c7b",
496 | "colab": {
497 | "base_uri": "https://localhost:8080/",
498 | "height": 319
499 | }
500 | },
501 | "source": [
502 | "model.state_dict()"
503 | ],
504 | "execution_count": 14,
505 | "outputs": [
506 | {
507 | "output_type": "execute_result",
508 | "data": {
509 | "text/plain": [
510 | "OrderedDict([('fc1.weight', tensor([[-0.9870, -0.4540],\n",
511 | " [ 2.0965, -0.3272],\n",
512 | " [-0.4208, -0.8602],\n",
513 | " [ 1.4232, 0.2407],\n",
514 | " [-0.4511, -0.1803]], device='cuda:0')),\n",
515 | " ('fc1.bias',\n",
516 | " tensor([ 0.0582, 0.2425, 0.0584, 0.6218, -0.1335], device='cuda:0')),\n",
517 | " ('fc2.weight',\n",
518 | " tensor([[-0.2153, 1.3850, -0.1548, 0.3375, 0.0757],\n",
519 | " [ 0.1091, 1.0617, 0.1496, 1.1005, 0.1802],\n",
520 | " [ 0.0043, -0.5234, -0.0231, -0.1097, 0.2774],\n",
521 | " [ 0.4160, 0.0268, 0.0658, 0.0249, 0.0023],\n",
522 | " [-0.2503, -0.0960, -0.1260, 0.0717, 0.2633]], device='cuda:0')),\n",
523 | " ('fc2.bias',\n",
524 | " tensor([ 0.3495, 0.5070, 0.0802, -0.2910, -0.1100], device='cuda:0')),\n",
525 | " ('fc3.weight',\n",
526 | " tensor([[ 1.0817, 0.8173, -0.2413, 0.3523, -0.3157]], device='cuda:0')),\n",
527 | " ('fc3.bias', tensor([-0.1517], device='cuda:0'))])"
528 | ]
529 | },
530 | "metadata": {
531 | "tags": []
532 | },
533 | "execution_count": 14
534 | }
535 | ]
536 | }
537 | ]
538 | }
--------------------------------------------------------------------------------