├── pytorch
    ├── DNN.png
    ├── L2reg.png
    ├── mtn.png
    ├── conv2d.gif
    ├── maxPool.gif
    ├── mnist_0-9.png
    ├── playground.png
    ├── Workshop_DL.pdf
    ├── overfitting.png
    ├── overfitting2.png
    ├── underfitting.png
    ├── DNN_activations.png
    ├── goodRegression.png
    ├── iris_versicolor.jpg
    ├── DNNRegressor_data.png
    ├── DNNRegressor_fit.png
    ├── README.md
    ├── workshop_neural_net.md
    ├── Workshop_CNN.ipynb
    ├── Workshop_Classification.ipynb
    └── Workshop_Regression_Class.ipynb
├── D3
    ├── img
    │   ├── end_code.png
    │   ├── create_rect.png
    │   ├── exercise1.png
    │   ├── g_element.png
    │   ├── numerically.png
    │   ├── start_code.png
    │   ├── create_x_axis.png
    │   ├── data_variable.png
    │   ├── exercise1_sol.png
    │   ├── alphabetically.png
    │   ├── create_bar_element.png
    │   └── team_logo_games_labels.png
    ├── exercise_1
    │   ├── teams.csv
    │   ├── exercise_1.css
    │   ├── solution
    │   │   ├── teams.csv
    │   │   ├── solution_1.css
    │   │   ├── index.html
    │   │   └── solution_1.js
    │   ├── index.html
    │   └── exercise_1.js
    ├── exercise_2
    │   ├── teams.csv
    │   ├── exercise_2.css
    │   ├── solution
    │   │   ├── teams.csv
    │   │   ├── solution_2.css
    │   │   ├── index.html
    │   │   └── solution_2.js
    │   ├── index.html
    │   └── exercise_2.js
    ├── exercise_3
    │   ├── teams.csv
    │   ├── exercise_3.css
    │   ├── solution
    │   │   ├── teams.csv
    │   │   ├── solution_3.css
    │   │   ├── index.html
    │   │   ├── solution_3.html
    │   │   └── solution_3.js
    │   ├── index.html
    │   └── exercise_3.js
    ├── index.html
    ├── preprocessing
    │   └── preprocessing.py
    ├── urls.js
    ├── bar.css
    └── sortable.js
├── NLP
    ├── img
    │   ├── nltk_spacy.png
    │   ├── spacy_comp.PNG
    │   └── pipeline.svg
    └── README.md
├── regex
    ├── img
    │   ├── webpage.png
    │   └── pagesource.png
    ├── README.md
    └── data
    │   └── vins.txt
├── sql
    ├── img
    │   ├── screenshot.png
    │   ├── foodforthought2.png
    │   └── Visual_SQL_JOINS_orig.jpg
    └── README.md
├── sqlite
    ├── photos
    │   ├── Odie.jpg
    │   ├── lassie.jpg
    │   ├── scooby.jpg
    │   ├── snoopy.jpg
    │   └── wallace.jpg
    └── README.md
├── .gitattributes
├── fusion-tables
    ├── README.md
    └── Seattle_Parks.csv
├── thematic-maps
    ├── img
    │   ├── contour.png
    │   ├── isopleth.png
    │   ├── mi_choropleth.png
    │   ├── small
    │   │   ├── isopleth.png
    │   │   ├── mi_choropleth.png
    │   │   ├── top_20_crashes.png
    │   │   └── snowmobile_crashes.png
    │   ├── top_20_crashes.png
    │   └── snowmobile_crashes.png
    ├── README.md
    ├── snow_crashes.csv
    ├── snowmobile_crashes.txt
    └── deer_in_the_city.txt
├── pdf-data-extraction
    ├── 2013-02-005-v1.pdf
    ├── summary_of_fees_collected.pdf
    ├── MDOT_fastfacts02-2011_345554_7.pdf
    ├── README.md
    └── pdfminer_workshop.ipynb
├── matlab
    ├── README.md
    ├── gen_more_pts.m
    ├── letters.m
    └── workshop_plotname.m
├── dotmap
    └── README.md
├── flask
    └── README.md
├── geospatial-analysis
    └── README.md
├── datashader
    └── README.md
├── pandas
    └── README.md
├── network-analysis
    └── README.md
├── geopandas
    └── README.md
├── webscraping
    └── README.md
├── pyspark
    ├── README.md
    └── sample.csv
├── python-intro
    └── README.md
├── README.md
├── .gitignore
└── sql-intermediate
    └── README.md


/pytorch/DNN.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/pytorch/DNN.png


--------------------------------------------------------------------------------
/pytorch/L2reg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/pytorch/L2reg.png


--------------------------------------------------------------------------------
/pytorch/mtn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/pytorch/mtn.png


--------------------------------------------------------------------------------
/D3/img/end_code.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/D3/img/end_code.png


--------------------------------------------------------------------------------
/pytorch/conv2d.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/pytorch/conv2d.gif


--------------------------------------------------------------------------------
/pytorch/maxPool.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/pytorch/maxPool.gif


--------------------------------------------------------------------------------
/D3/img/create_rect.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/D3/img/create_rect.png


--------------------------------------------------------------------------------
/D3/img/exercise1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/D3/img/exercise1.png


--------------------------------------------------------------------------------
/D3/img/g_element.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/D3/img/g_element.png


--------------------------------------------------------------------------------
/D3/img/numerically.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/D3/img/numerically.png


--------------------------------------------------------------------------------
/D3/img/start_code.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/D3/img/start_code.png


--------------------------------------------------------------------------------
/NLP/img/nltk_spacy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/NLP/img/nltk_spacy.png


--------------------------------------------------------------------------------
/NLP/img/spacy_comp.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/NLP/img/spacy_comp.PNG


--------------------------------------------------------------------------------
/pytorch/mnist_0-9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/pytorch/mnist_0-9.png


--------------------------------------------------------------------------------
/pytorch/playground.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/pytorch/playground.png


--------------------------------------------------------------------------------
/regex/img/webpage.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/regex/img/webpage.png


--------------------------------------------------------------------------------
/sql/img/screenshot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/sql/img/screenshot.png


--------------------------------------------------------------------------------
/sqlite/photos/Odie.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/sqlite/photos/Odie.jpg


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Set the default behavior, in case people don't have core.autocrlf set.
2 | * text=auto


--------------------------------------------------------------------------------
/D3/img/create_x_axis.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/D3/img/create_x_axis.png


--------------------------------------------------------------------------------
/D3/img/data_variable.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/D3/img/data_variable.png


--------------------------------------------------------------------------------
/D3/img/exercise1_sol.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/D3/img/exercise1_sol.png


--------------------------------------------------------------------------------
/fusion-tables/README.md:
--------------------------------------------------------------------------------
1 | # Fusion Tables
2 | The slide deck is available at https://goo.gl/VDtjgn
3 | 


--------------------------------------------------------------------------------
/pytorch/Workshop_DL.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/pytorch/Workshop_DL.pdf


--------------------------------------------------------------------------------
/pytorch/overfitting.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/pytorch/overfitting.png


--------------------------------------------------------------------------------
/pytorch/overfitting2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/pytorch/overfitting2.png


--------------------------------------------------------------------------------
/pytorch/underfitting.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/pytorch/underfitting.png


--------------------------------------------------------------------------------
/regex/img/pagesource.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/regex/img/pagesource.png


--------------------------------------------------------------------------------
/sqlite/photos/lassie.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/sqlite/photos/lassie.jpg


--------------------------------------------------------------------------------
/sqlite/photos/scooby.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/sqlite/photos/scooby.jpg


--------------------------------------------------------------------------------
/sqlite/photos/snoopy.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/sqlite/photos/snoopy.jpg


--------------------------------------------------------------------------------
/D3/img/alphabetically.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/D3/img/alphabetically.png


--------------------------------------------------------------------------------
/pytorch/DNN_activations.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/pytorch/DNN_activations.png


--------------------------------------------------------------------------------
/pytorch/goodRegression.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/pytorch/goodRegression.png


--------------------------------------------------------------------------------
/pytorch/iris_versicolor.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/pytorch/iris_versicolor.jpg


--------------------------------------------------------------------------------
/sql/img/foodforthought2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/sql/img/foodforthought2.png


--------------------------------------------------------------------------------
/sqlite/photos/wallace.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/sqlite/photos/wallace.jpg


--------------------------------------------------------------------------------
/D3/img/create_bar_element.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/D3/img/create_bar_element.png


--------------------------------------------------------------------------------
/pytorch/DNNRegressor_data.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/pytorch/DNNRegressor_data.png


--------------------------------------------------------------------------------
/pytorch/DNNRegressor_fit.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/pytorch/DNNRegressor_fit.png


--------------------------------------------------------------------------------
/thematic-maps/img/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/thematic-maps/img/contour.png


--------------------------------------------------------------------------------
/D3/exercise_1/teams.csv:
--------------------------------------------------------------------------------
1 | team,value
2 | Boston,100
3 | Detroit,85
4 | New York,80
5 | Chicago,75
6 | Atlanta,30
7 | 


--------------------------------------------------------------------------------
/D3/exercise_2/teams.csv:
--------------------------------------------------------------------------------
1 | team,value
2 | Boston,100
3 | Detroit,85
4 | New York,80
5 | Chicago,75
6 | Atlanta,30
7 | 


--------------------------------------------------------------------------------
/D3/exercise_3/teams.csv:
--------------------------------------------------------------------------------
1 | team,value
2 | Boston,100
3 | Detroit,85
4 | New York,80
5 | Chicago,75
6 | Atlanta,30
7 | 


--------------------------------------------------------------------------------
/thematic-maps/img/isopleth.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/thematic-maps/img/isopleth.png


--------------------------------------------------------------------------------
/D3/exercise_1/exercise_1.css:
--------------------------------------------------------------------------------
1 | .label{
2 |   text-anchor: middle;
3 | }
4 | 
5 | .barlabel { 
6 |   text-anchor: middle;
7 | }


--------------------------------------------------------------------------------
/D3/exercise_1/solution/teams.csv:
--------------------------------------------------------------------------------
1 | team,value
2 | Boston,100
3 | Detroit,85
4 | New York,80
5 | Chicago,75
6 | Atlanta,30
7 | 


--------------------------------------------------------------------------------
/D3/exercise_2/exercise_2.css:
--------------------------------------------------------------------------------
1 | .label{
2 |   text-anchor: middle;
3 | }
4 | 
5 | .barlabel { 
6 |   text-anchor: middle;
7 | }


--------------------------------------------------------------------------------
/D3/exercise_2/solution/teams.csv:
--------------------------------------------------------------------------------
1 | team,value
2 | Boston,100
3 | Detroit,85
4 | New York,80
5 | Chicago,75
6 | Atlanta,30
7 | 


--------------------------------------------------------------------------------
/D3/exercise_3/exercise_3.css:
--------------------------------------------------------------------------------
1 | .label{
2 |   text-anchor: middle;
3 | }
4 | 
5 | .barlabel { 
6 |   text-anchor: middle;
7 | }


--------------------------------------------------------------------------------
/D3/exercise_3/solution/teams.csv:
--------------------------------------------------------------------------------
1 | team,value
2 | Boston,100
3 | Detroit,85
4 | New York,80
5 | Chicago,75
6 | Atlanta,30
7 | 


--------------------------------------------------------------------------------
/D3/img/team_logo_games_labels.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/D3/img/team_logo_games_labels.png


--------------------------------------------------------------------------------
/sql/img/Visual_SQL_JOINS_orig.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/sql/img/Visual_SQL_JOINS_orig.jpg


--------------------------------------------------------------------------------
/thematic-maps/img/mi_choropleth.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/thematic-maps/img/mi_choropleth.png


--------------------------------------------------------------------------------
/thematic-maps/img/small/isopleth.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/thematic-maps/img/small/isopleth.png


--------------------------------------------------------------------------------
/thematic-maps/img/top_20_crashes.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/thematic-maps/img/top_20_crashes.png


--------------------------------------------------------------------------------
/D3/exercise_1/solution/solution_1.css:
--------------------------------------------------------------------------------
1 | .label{
2 |   text-anchor: middle;
3 | }
4 | 
5 | .barlabel { 
6 |   text-anchor: middle;
7 | }


--------------------------------------------------------------------------------
/D3/exercise_2/solution/solution_2.css:
--------------------------------------------------------------------------------
1 | .label{
2 |   text-anchor: middle;
3 | }
4 | 
5 | .barlabel { 
6 |   text-anchor: middle;
7 | }


--------------------------------------------------------------------------------
/pdf-data-extraction/2013-02-005-v1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/pdf-data-extraction/2013-02-005-v1.pdf


--------------------------------------------------------------------------------
/thematic-maps/img/small/mi_choropleth.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/thematic-maps/img/small/mi_choropleth.png


--------------------------------------------------------------------------------
/thematic-maps/img/small/top_20_crashes.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/thematic-maps/img/small/top_20_crashes.png


--------------------------------------------------------------------------------
/thematic-maps/img/snowmobile_crashes.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/thematic-maps/img/snowmobile_crashes.png


--------------------------------------------------------------------------------
/D3/exercise_3/solution/solution_3.css:
--------------------------------------------------------------------------------
1 | .label{
2 |     text-anchor: middle;
3 |   }
4 |   
5 |   .barlabel { 
6 |     text-anchor: middle;
7 |   }


--------------------------------------------------------------------------------
/thematic-maps/img/small/snowmobile_crashes.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/thematic-maps/img/small/snowmobile_crashes.png


--------------------------------------------------------------------------------
/pdf-data-extraction/summary_of_fees_collected.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/pdf-data-extraction/summary_of_fees_collected.pdf


--------------------------------------------------------------------------------
/matlab/README.md:
--------------------------------------------------------------------------------
1 | # Introduction to Matlab
2 | 
3 | This workshop was created for the UROP program as a brief (2 hour) intro to Matlab and its capabilities.
4 | 


--------------------------------------------------------------------------------
/pdf-data-extraction/MDOT_fastfacts02-2011_345554_7.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/caocscar/workshops/HEAD/pdf-data-extraction/MDOT_fastfacts02-2011_345554_7.pdf


--------------------------------------------------------------------------------
/sqlite/README.md:
--------------------------------------------------------------------------------
1 | # SQLITE
2 | The jupyter notebook can be found here  
3 | https://nbviewer.jupyter.org/github/caocscar/workshops/blob/master/sqlite/sqlite3.ipynb


--------------------------------------------------------------------------------
/dotmap/README.md:
--------------------------------------------------------------------------------
1 | # Working with Geographical Data and Parallel Computing on Flux
2 | 
3 | The workshop code is in another repository at  
4 | https://github.com/clarkdatalabs/dotmap_workshop
5 | 
6 | 


--------------------------------------------------------------------------------
/flask/README.md:
--------------------------------------------------------------------------------
1 | # Flask
2 | 
3 | This 2 hr workshop introduces Flask for deploying web applications.
4 | 
5 | My student, Ellen Paquet, prepared the workshop materials and it is located at https://github.com/epmarie/flask_example_app


--------------------------------------------------------------------------------
/geospatial-analysis/README.md:
--------------------------------------------------------------------------------
1 | You can preview the HTML material at this Github HTML previewer:  
2 | https://htmlpreview.github.io/?https://github.com/caocscar/workshops/blob/master/geospatial%20analysis/Geospatial%2BAnalysis%2BWorkshop.html
3 | 


--------------------------------------------------------------------------------
/datashader/README.md:
--------------------------------------------------------------------------------
1 | # Datashader
2 | 
3 | This 1 hr workshop introduces the datashader visualization tool for large datasets.  
4 | https://github.com/caocscar/workshops/blob/master/datashader/datashader.ipynb
5 | 
6 | ## External Files
7 | Files are too large to be included.
8 | 


--------------------------------------------------------------------------------
/D3/exercise_1/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |   <meta charset="UTF-8">
 5 |   <script src="https://d3js.org/d3.v5.min.js"></script>
 6 |   <link rel="stylesheet" href="exercise_1.css">
 7 | </head>
 8 | <body>
 9 |   <script src="exercise_1.js"></script>
10 | </body>
11 | </html>


--------------------------------------------------------------------------------
/D3/exercise_2/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |   <meta charset="UTF-8">
 5 |   <script src="https://d3js.org/d3.v5.min.js"></script>
 6 |   <link rel="stylesheet" href="exercise_2.css">
 7 | </head>
 8 | <body>
 9 |   <script src="exercise_2.js"></script>
10 | </body>
11 | </html>


--------------------------------------------------------------------------------
/D3/exercise_1/solution/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |   <meta charset="UTF-8">
 5 |   <script src="https://d3js.org/d3.v5.min.js"></script>
 6 |   <link rel="stylesheet" href="solution_1.css">
 7 | </head>
 8 | <body>
 9 |   <script src="solution_1.js"></script>
10 | </body>
11 | </html>


--------------------------------------------------------------------------------
/regex/README.md:
--------------------------------------------------------------------------------
1 | # Jupyter Notebook Viewer
2 | Regular Expression Part I
3 | https://nbviewer.jupyter.org/github/caocscar/workshops/blob/master/regex/Regex%20Tutorial%20P1.ipynb
4 | 
5 | Regular Expression Part II
6 | https://nbviewer.jupyter.org/github/caocscar/workshops/blob/master/regex/Regex%20Tutorial%20P2.ipynb
7 | 


--------------------------------------------------------------------------------
/pandas/README.md:
--------------------------------------------------------------------------------
1 | # Intro to Pandas Workshop
2 | 
3 | This workshop introduces the user to the world of `pandas` and includes common data wrangling methods.  
4 | https://nbviewer.jupyter.org/github/caocscar/workshops/blob/master/pandas/pandas.ipynb
5 | 
6 | ## External Files
7 | 
8 | I've excluded them because the data files are large. [TODO] Include smaller version of files.
9 | 


--------------------------------------------------------------------------------
/network-analysis/README.md:
--------------------------------------------------------------------------------
1 | # Introduction to Network Analysis using igraph
2 | 
3 | This 2 hr workshop introduces igraph for network analysis.
4 | 
5 | https://nbviewer.jupyter.org/github/epmarie/network_workshop/blob/master/IntroNetworkAnalysis.ipynb
6 | 
7 | My student, Ellen Paquet, prepared the workshop materials and it is located at https://github.com/epmarie/network_workshop
8 | 
9 | 


--------------------------------------------------------------------------------
/D3/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |   <meta charset="UTF-8">
 5 |   <script src="https://d3js.org/d3.v5.min.js"></script>
 6 |   <script src="https://d3js.org/d3-array.v2.min.js"></script>
 7 |   <script src="urls.js"></script>
 8 |   <script src="sortable.js"></script>
 9 |   <link rel="stylesheet" href="bar.css">
10 | </head>
11 | <body>
12 |   <script>createChart()</script>
13 | </body>
14 | </html>


--------------------------------------------------------------------------------
/geopandas/README.md:
--------------------------------------------------------------------------------
1 | # GeoPandas
2 | This 2 hr workshop introduces `geopandas` and maybe some `fiona`, `shapely`, `rtree`, `pysal`, and `folium`.  
3 | https://nbviewer.jupyter.org/github/caocscar/workshops/blob/master/geopandas/Geopandas.ipynb
4 | 
5 | My student, Yiming Cai, prepared the workshop materials.
6 | 
7 | ## External Files
8 | I've excluded them because the shapefiles are large. [TODO] Include smaller version of files.
9 | 


--------------------------------------------------------------------------------
/webscraping/README.md:
--------------------------------------------------------------------------------
 1 | Web Scraping in Python Notebook  
 2 | https://nbviewer.jupyter.org/github/caocscar/workshops/blob/master/webscraping/webscraping_in_python.ipynb
 3 | 
 4 | Google API Notebook  
 5 | https://nbviewer.jupyter.org/github/caocscar/workshops/blob/master/webscraping/Google.ipynb
 6 | 
 7 | Twitter API Notebook  
 8 | https://nbviewer.jupyter.org/github/caocscar/workshops/blob/master/webscraping/Twitter.ipynb
 9 | 
10 | 


--------------------------------------------------------------------------------
/matlab/gen_more_pts.m:
--------------------------------------------------------------------------------
 1 | function M2 = gen_more_pts(M)
 2 | 
 3 | if size(M,1) > 20
 4 |     disp('You have too many points submitted. This will take forever!!!')
 5 |     M2 = M;
 6 |     return
 7 | end
 8 | M2 = [];
 9 | for i = 1:size(M,1)-1
10 |     x1 = M(i,1);
11 |     x2 = M(i+1,1);
12 |     y1 = M(i,2);
13 |     y2 = M(i+1,2);
14 |     x = [linspace(x1,x2,100)]';
15 |     y = [linspace(y1,y2,100)]';
16 |     M2 = [M2; x y];
17 | end
18 |     


--------------------------------------------------------------------------------
/pytorch/README.md:
--------------------------------------------------------------------------------
1 | # PyTorch Workshop
2 | 
3 | [**Regression Problem**](https://colab.research.google.com/github/caocscar/workshops/blob/master/pytorch/Workshop_Regression_Class.ipynb)
4 | 
5 | [**Classification Problem**](https://colab.research.google.com/github/caocscar/workshops/blob/master/pytorch/Workshop_Classification.ipynb)
6 | 
7 | [**Image Classification Problem**](https://colab.research.google.com/github/caocscar/workshops/blob/master/pytorch/Workshop_CNN.ipynb)
8 | 


--------------------------------------------------------------------------------
/pdf-data-extraction/README.md:
--------------------------------------------------------------------------------
 1 | # Extracting Data from PDF
 2 | 
 3 | There are 2 Jupyter Notebooks for this workshop (preferably done in this order):  
 4 | Tabula  
 5 | https://nbviewer.jupyter.org/github/caocscar/workshops/blob/master/pdf%20data%20extraction/tabula_workshop.ipynb
 6 | 
 7 | PDF Miner  
 8 | https://nbviewer.jupyter.org/github/caocscar/workshops/blob/master/pdf%20data%20extraction/pdfminer_workshop.ipynb
 9 | 
10 | ## External Files
11 | The `workshop_registration.pdf` is missing for privacy reasons. [TODO] Find a new pdf to use
12 | 
13 | 
14 | 


--------------------------------------------------------------------------------
/thematic-maps/README.md:
--------------------------------------------------------------------------------
 1 | # Create Thematic Maps with Python
 2 | 
 3 | This 2 hr workshop demonstrates how to create thematic maps using Matplotlib.  
 4 | https://nbviewer.jupyter.org/github/caocscar/workshops/blob/master/thematic%20maps/Thematic%20Maps%20with%20Matplotlib.ipynb
 5 | 
 6 | Thematic Type|Image
 7 | :---:|---
 8 | Choropleth Map|![Choropleth](img/small/mi_choropleth.png)
 9 | Dot Map|![Dot](img/small/snowmobile_crashes.png)
10 | Proportional Dot Map|![Proportional](img/small/top_20_crashes.png)
11 | Isopleth|![Isopleth](img/small/isopleth.png)
12 | 


--------------------------------------------------------------------------------
/NLP/README.md:
--------------------------------------------------------------------------------
 1 | # Intro to Natural Language Processing
 2 | 
 3 | #### Jupyter Notebook Viewer Version
 4 | https://nbviewer.jupyter.org/github/caocscar/workshops/blob/master/NLP/IntroNLP.ipynb
 5 | 
 6 | This 2.5 hr workshop covers the following Python packages: 
 7 | - `spaCy` (tagger, parser, named-entity recognition)
 8 | - `textacy` (n-grams)
 9 | - `gensim` (topic modelling)
10 | - `pyLDAvis` (visualization)
11 | - `textblob` (sentiment analysis)
12 | 
13 | My student, Ellen Paquet, prepared the workshop materials. Her original repo is located at https://github.com/epmarie/IntroNLP
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/D3/exercise_3/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |   <meta charset="UTF-8">
 5 |   <script src="https://d3js.org/d3.v5.min.js"></script>
 6 |   <link rel="stylesheet" href="exercise_3.css">
 7 | </head>
 8 | <body>
 9 |     <div id="option">
10 |         <input name="alpabeticalButton" 
11 |                type="button" 
12 |                value="Sort Alphabetically" 
13 |                onclick="updateAlpha()" />
14 |     </div>
15 |     <div id="option">
16 |         <input name="numericalButton" 
17 |                type="button" 
18 |                value="Sort Numerically" 
19 |                onclick="updateNum()" />
20 |     </div>
21 |   <script src="exercise_3.js"></script>
22 | </body>
23 | </html>


--------------------------------------------------------------------------------
/D3/exercise_2/solution/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |   <meta charset="UTF-8">
 5 |   <script src="https://d3js.org/d3.v5.min.js"></script>
 6 |   <link rel="stylesheet" href="solution_2.css">
 7 | </head>
 8 | <body>
 9 |     <div id="option">
10 |         <input name="alpabeticalButton" 
11 |                type="button" 
12 |                value="Sort Alphabetically" 
13 |                onclick="updateAlpha()" />
14 |     </div>
15 |     <div id="option">
16 |         <input name="numericalButton" 
17 |                type="button" 
18 |                value="Sort Numerically" 
19 |                onclick="updateNum()" />
20 |     </div>
21 |   <script src="solution_2.js"></script>
22 | </body>
23 | </html>


--------------------------------------------------------------------------------
/D3/exercise_3/solution/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |   <meta charset="UTF-8">
 5 |   <script src="https://d3js.org/d3.v5.min.js"></script>
 6 |   <link rel="stylesheet" href="solution_3.css">
 7 | </head>
 8 | <body>
 9 |     <div id="option">
10 |         <input name="alpabeticalButton" 
11 |                type="button" 
12 |                value="Sort Alphabetically" 
13 |                onclick="updateAlpha()" />
14 |     </div>
15 |     <div id="option">
16 |         <input name="numericalButton" 
17 |                type="button" 
18 |                value="Sort Numerically" 
19 |                onclick="updateNum()" />
20 |     </div>
21 |   <script src="solution_3.js"></script>
22 | </body>
23 | </html>


--------------------------------------------------------------------------------
/D3/exercise_3/solution/solution_3.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |   <meta charset="UTF-8">
 5 |   <script src="https://d3js.org/d3.v5.min.js"></script>
 6 |   <!-- <link rel="stylesheet" href="button.css"> -->
 7 | </head>
 8 | <body>
 9 |     <div id="option1">
10 |         <input name="alpabeticalButton" 
11 |                type="button" 
12 |                value="Sort Alphabetically" 
13 |                onclick="updateAlpha()" />
14 |     </div>
15 |     <div id="option2">
16 |         <input name="numericalButton" 
17 |                type="button" 
18 |                value="Sort Numerically" 
19 |                onclick="updateNum()" />
20 |     </div>
21 |   <script src="animated.js"></script>
22 | </body>
23 | </html>


--------------------------------------------------------------------------------
/D3/preprocessing/preprocessing.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Fri Oct 18 16:17:58 2019
 4 | 
 5 | @author: caoa
 6 | """
 7 | import pandas as pd
 8 | 
 9 | pd.options.display.max_rows =20
10 | pd.options.display.max_columns = 20
11 | 
12 | df = pd.read_csv('GL2018.TXT', header=None, usecols=[0,3,6,9,10])
13 | df.columns = ['date','away','home','aRuns','hRuns']
14 | 
15 | #%%
16 | df['team'] = df.apply(lambda x: x['away'] if x['aRuns'] > x['hRuns'] else x['home'], axis=1)
17 | data = df[['date','team']]
18 | data.to_csv('daily_snapshot.csv', index=False)
19 | 
20 | #%% Find first day where all teams have won at least one game
21 | data['date'] = pd.to_datetime(data['date'], format='%Y%m%d')
22 | daterange = pd.date_range('2018-03-29','2018-10-01',freq='D')
23 | for day in daterange:
24 |     abc = data[data['date'] <= day]
25 |     xyz = abc.team.value_counts()
26 |     if xyz.shape[0] >= 30:
27 |         print(day)
28 |         break
29 | 


--------------------------------------------------------------------------------
/D3/urls.js:
--------------------------------------------------------------------------------
 1 | const urls = {
 2 |   'ARI':'Arizona_Diamondbacks',
 3 |   'ATL':'Atlanta_Braves',
 4 |   'SFN':'SanFrancisco_Giants',
 5 |   'CHN':'Chicago_Cubs',
 6 |   'NYN':'NewYork_Mets',
 7 |   'MIL':'Milwaukee_Brewers',
 8 |   'BAL':'Baltimore_Orioles',
 9 |   'CHA':'Chicago_White_Sox',
10 |   'OAK':'Oakland_Athletics',
11 |   'SEA':'Seattle_Mariners',
12 |   'TBA':'TampaBay_Rays',
13 |   'HOU':'Houston_Astros',
14 |   'NYA':'NewYork_Yankees',
15 |   'PHI':'Philadelphia_Phillies',
16 |   'WAS':'Washington_Nationals',
17 |   'MIA':'Miami_Marlins',
18 |   'PIT':'Pittsburgh_Pirates',
19 |   'ANA':'LosAngeles_Angels',
20 |   'BOS':'Boston_Redsox',
21 |   'TEX':'Texas_Rangers',
22 |   'COL':'Colorado_Rockies',
23 |   'LAN':'LosAngeles_Dodgers',
24 |   'MIN':'Minnesota_Twins',
25 |   'CLE':'Cleveland_Indians',
26 |   'TOR':'Toronto_Blue_Jays',
27 |   'SLN':'StLouis_Cardinals',
28 |   'CIN':'Cincinnati_Reds',
29 |   'DET':'Detroit_Tigers',
30 |   'SDN':'SanDiego_Padres',
31 |   'KCA':'KansasCity_Royals',
32 | }


--------------------------------------------------------------------------------
/D3/bar.css:
--------------------------------------------------------------------------------
 1 | /* .chart {
 2 |   clip-path: url(#clip); 
 3 | } */
 4 | 
 5 | .bar {
 6 |   fill: orange;
 7 | }
 8 | 
 9 | .x.axis text {
10 |   font: 15px sans-serif;
11 | }
12 | 
13 | .axis path, .axis line {
14 |   fill: none;
15 |   stroke: '#000';
16 |   shape-rendering: crispEdges;
17 | }
18 | 
19 | .label {
20 |   text-anchor: middle;
21 |   font: 20px helvetica;
22 | }
23 | 
24 | #date {
25 |   text-anchor: start;
26 |   font: 20px helvetica;  
27 | }
28 | 
29 | .grid line {
30 |   stroke: lightgrey;
31 |   stroke-opacity: 0.7;
32 |   shape-rendering: crispEdges;
33 | }
34 | 
35 | .grid path {
36 |   stroke-width: 0;
37 | }
38 | 
39 | .team {
40 |   fill: black;
41 |   font: 14px sans-serif;
42 |   text-anchor: end;
43 |   font-weight: 600;
44 | }
45 | 
46 | .barlabel{
47 |   fill: black;
48 |   font: 14px sans-serif;
49 |   text-anchor: left;
50 |   font-weight: 600;
51 | }
52 | 
53 | .logo {
54 |   fill: black;
55 |   font: 14px sans-serif;
56 |   text-anchor: middle;
57 | }
58 | 
59 | .divisions {
60 |   stroke: black;
61 |   stroke-width: 2;
62 |   stroke-dasharray: 12;
63 | }


--------------------------------------------------------------------------------
/pyspark/README.md:
--------------------------------------------------------------------------------
 1 | # PySpark: DataFrames, Datasets, and SparkSQL
 2 | [pyspark.md](pyspark.md) contains the markdown material for the PySpark workshop.
 3 | 
 4 | # Scala: DataFrames, Datasets, and SparkSQL
 5 | [scala.md](scala.md) contains the markdown material for the Scala workshop.
 6 | 
 7 | # PySpark vs. Scala
 8 | Here's an [article](https://www.pluralsight.com/blog/software-development/scala-vs-python) comparing the two of them.
 9 | 
10 | ## Using PySpark with the Twitter Decahose dataset on Cavium
11 | The github repo is located at https://github.com/caocscar/twitter-decahose-pyspark
12 | 
13 | **Note**: You need to get permission to access the dataset first. More information available at: https://midas.umich.edu/research-datasets/
14 | 
15 | ## Cheat Sheets
16 | My github Hadoop cheat sheet  
17 | https://github.com/caocscar/hadoopcheatsheet
18 | 
19 | DataCamp's cheat sheet for PySpark DataFrames  
20 | https://s3.amazonaws.com/assets.datacamp.com/blog_assets/PySpark_SQL_Cheat_Sheet_Python.pdf
21 | 
22 | Edrukea's cheat sheet for PySpark RDDs  
23 | https://www.edureka.co/blog/cheatsheets/pyspark-cheat-sheet-python/
24 | 
25 | 


--------------------------------------------------------------------------------
/python-intro/README.md:
--------------------------------------------------------------------------------
 1 | # Introduction to Python
 2 | 
 3 | There are no notes for this 6 hour workshop. I do it freestyle using a project-based learning approach and provide attendees with a beginner's cheat sheet.
 4 | 
 5 | Exercises  
 6 | https://goo.gl/bw1J9L
 7 | 
 8 | Mastermind Game Online  
 9 | http://www.webgamesonline.com/mastermind/
10 | 
11 | A good cheat sheet for beginners is located at  
12 | http://ehmatthes.github.io/pcc/cheatsheets/README.html
13 | 
14 | Here is a link for learning Python for programmers  
15 | https://wiki.python.org/moin/BeginnersGuide/Programmers
16 | 
17 | Here is a link for learning Python for non-programmers  
18 | https://wiki.python.org/moin/BeginnersGuide/NonProgrammers
19 | 
20 | ## Python Topics Covered
21 | ### Python Functions
22 | ```
23 | input
24 | type
25 | from import
26 | random
27 | range
28 | print
29 | len
30 | zip
31 | id
32 | time
33 | ```
34 | 
35 | ### Data Types
36 | ```
37 | int  
38 | float  
39 | string  
40 | list  
41 | tuple  
42 | dictionary  
43 | set  
44 | ```
45 | 
46 | ### Control Flow
47 | ```
48 | if elif else
49 | for
50 | while
51 | continue
52 | break
53 | pass
54 | ```
55 | 
56 | ### File I/O
57 | ```
58 | with
59 | open
60 | write
61 | read
62 | readlines
63 | ```
64 | 
65 | ### Miscelleaneous
66 | comments  
67 | list comprehension  
68 | casting variables  
69 | how to write a function  
70 | integer division  
71 | reference vs. copying variables  
72 | banker's rounding  
73 | 


--------------------------------------------------------------------------------
/sql/README.md:
--------------------------------------------------------------------------------
 1 | # Intro to SQL
 2 | Here is the [WORKSHOP SLIDE DECK](http://nbviewer.jupyter.org/format/slides/github/caocscar/workshops/blob/master/sql/SQLslides.ipynb#/).
 3 | We'll be using the [w3schools website](https://www.w3schools.com/sql/) to write queries. It also is a good reference for SQL.
 4 | 
 5 | ---
 6 | # Miscellaneous Stuff
 7 | 
 8 | ## Converting Jupyter Notebook into Slide Deck
 9 | The following command will render your Jupyter Notebook into a **reveal.js** slide deck. 
10 | 
11 | `jupyter nbconvert SQLslides.ipynb --to slides --post serve`
12 | 
13 | The `--post serve` command starts up a local server to host it. 
14 | 
15 | **Tip**: Make sure your Jupyter notebook is closed before running the command.
16 | 
17 | ### Configuration Options
18 | More options available at https://nbconvert.readthedocs.io/en/latest/config_options.html
19 | 
20 | ## How to Post Slide Deck Online
21 | 1. Go to http://nbviewer.jupyter.org
22 | 2. Enter url where the Jupyter Notebook file can be located.
23 | 3. Make sure **nbviewer** is in *slide mode* and not *notebook mode* among the icons in the top right.
24 | 
25 | ## Contributors
26 | The slide deck was created originally by my student [Maggie Orton](https://github.com/margamo/intro-to-SQL) on March 14, 2017.
27 | 
28 | And modified by my student [Kaitlin Cornwell](https://github.com/kaitcorn/intro-to-SQL) on March 16, 2018.
29 | 
30 | And further modified by my student [Jessica Zhang](https://github.com/jezzhang/sqlworkshop) on January 31, 2020.
31 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # CSCAR Workshops
 2 | This is a list of some (but not all) CSCAR Workshops I've done (in no particular order).
 3 | - [Introduction to Matlab](matlab)
 4 | - [Introduction to Python](python-intro)
 5 | - [Pandas](pandas)
 6 | - [Introduction to SQL](sql)
 7 | - [Intermediate SQL](sql-intermediate)
 8 | - [SQLite](sqlite)
 9 | - [Regular Expressions](regex)
10 | - [Natural Language Processing with Python](NLP)
11 | - [Network Analysis with igraph](network-analysis)
12 | - [SparkSQL and DataFrames with PySpark](pyspark) (Using PySpark with the [Twitter Decahose dataset on Cavium](https://github.com/caocscar/twitter-decahose-pyspark))
13 | - [GeoPandas](geopandas)
14 | - [Geospatial Analysis with Python](geospatial-analysis)
15 | - [Working with Geographical Data and Parallel Computing on Flux](dotmap)
16 | - [Thematic Maps with Python](thematic-maps)
17 | - [Datashader](datashader)
18 | - [Google Fusion Tables](fusion-tables) (this product is no longer available)
19 | - [Web Scraping with Python](webscraping)
20 |   - [Scraping HTML](https://nbviewer.jupyter.org/github/caocscar/workshops/blob/master/webscraping/webscraping_in_python.ipynb)
21 |   - [Google and YouTube APIs](https://nbviewer.jupyter.org/github/caocscar/workshops/blob/master/webscraping/Google.ipynb) 
22 |   - [Twitter APIs](https://nbviewer.jupyter.org/github/caocscar/workshops/blob/master/webscraping/Twitter.ipynb) (instructions on how to set up a [developer account](https://github.com/caocscar/twitter-create-developer-account))
23 | - [Extracting Data from PDFs](pdf-data-extraction)
24 | - [Flask](flask)
25 | - [Introduction to PyTorch](pytorch)
26 | - [Introduction to D3.js](D3)
27 |   - explains code used to generate data viz located at https://d3-examples-caocscar.onrender.com/


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Jupyter Notebooks
  2 | webscraping/.ipynb_checkpoints/
  3 | .ipynb_checkpoints/
  4 | 
  5 | # Byte-compiled / optimized / DLL files
  6 | __pycache__/
  7 | *.py[cod]
  8 | *$py.class
  9 | 
 10 | # C extensions
 11 | *.so
 12 | 
 13 | # Distribution / packaging
 14 | .Python
 15 | env/
 16 | build/
 17 | develop-eggs/
 18 | dist/
 19 | downloads/
 20 | eggs/
 21 | .eggs/
 22 | lib/
 23 | lib64/
 24 | parts/
 25 | sdist/
 26 | var/
 27 | wheels/
 28 | *.egg-info/
 29 | .installed.cfg
 30 | *.egg
 31 | 
 32 | # PyInstaller
 33 | #  Usually these files are written by a python script from a template
 34 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 35 | *.manifest
 36 | *.spec
 37 | 
 38 | # Installer logs
 39 | pip-log.txt
 40 | pip-delete-this-directory.txt
 41 | 
 42 | # Unit test / coverage reports
 43 | htmlcov/
 44 | .tox/
 45 | .coverage
 46 | .coverage.*
 47 | .cache
 48 | nosetests.xml
 49 | coverage.xml
 50 | *.cover
 51 | .hypothesis/
 52 | 
 53 | # Translations
 54 | *.mo
 55 | *.pot
 56 | 
 57 | # Django stuff:
 58 | *.log
 59 | local_settings.py
 60 | 
 61 | # Flask stuff:
 62 | instance/
 63 | .webassets-cache
 64 | 
 65 | # Scrapy stuff:
 66 | .scrapy
 67 | 
 68 | # Sphinx documentation
 69 | docs/_build/
 70 | 
 71 | # PyBuilder
 72 | target/
 73 | 
 74 | # Jupyter Notebook
 75 | .ipynb_checkpoints
 76 | 
 77 | # pyenv
 78 | .python-version
 79 | 
 80 | # celery beat schedule file
 81 | celerybeat-schedule
 82 | 
 83 | # SageMath parsed files
 84 | *.sage.py
 85 | 
 86 | # dotenv
 87 | .env
 88 | 
 89 | # virtualenv
 90 | .venv
 91 | venv/
 92 | ENV/
 93 | 
 94 | # Spyder project settings
 95 | .spyderproject
 96 | .spyproject
 97 | 
 98 | # Rope project settings
 99 | .ropeproject
100 | 
101 | # mkdocs documentation
102 | /site
103 | 
104 | # mypy
105 | .mypy_cache/
106 | 


--------------------------------------------------------------------------------
/matlab/letters.m:
--------------------------------------------------------------------------------
  1 | A = [0 0;
  2 |     1 4;
  3 |     2 0;
  4 |     1.5 2;
  5 |     0.5 2];
  6 | B = [0 0;
  7 |     0 4;
  8 |     1.5 3.75;
  9 |     1.5 2.25;
 10 |     0.1 2;
 11 |     1.5 1.75
 12 |     1.5 0.25;
 13 |     0 0];
 14 | C = [1.5 0;
 15 |     0 0;
 16 |     0 4;
 17 |     1.5 4];
 18 | D = [0 0;
 19 |     0 4;
 20 |     1.5 3.75;
 21 |     1.5 0.25;
 22 |     0 0];
 23 | E = [2 0;
 24 |     0 0;
 25 |     0 2;
 26 |     1 2;
 27 |     0 2;
 28 |     0 4;
 29 |     2 4];
 30 | F = [0 0;
 31 |     0 2;
 32 |     1 2;
 33 |     0 2;
 34 |     0 4;
 35 |     2 4];
 36 | G = [1.5 4;
 37 |     0 4;
 38 |     0 0;
 39 |     1.5 0;
 40 |     1.5 2;
 41 |     1 2;
 42 |     2 2];
 43 | H = [0 0;
 44 |     0 4;
 45 |     0 2;
 46 |     2 2;
 47 |     2 0;
 48 |     2 4];
 49 | I = [0 0;
 50 |     2 0;
 51 |     1 0;
 52 |     1 4;
 53 |     0 4;
 54 |     2 4];
 55 | J = [0 0.75;
 56 |     0 0;
 57 |     1.25 0;
 58 |     1.25 4;
 59 |     0.5 4;
 60 |     2 4];
 61 | K = [0 0;
 62 |     0 4;
 63 |     0 2;
 64 |     1.5 4;
 65 |     0 2;
 66 |     1.5 0];
 67 | L = [0 4;
 68 |     0 0;
 69 |     2 0];
 70 | M = [0 0;
 71 |     0.25 4;
 72 |     1 2;
 73 |     1.75 4;
 74 |     2 0];
 75 | N = [0 0;
 76 |     0 4;
 77 |     2 0;
 78 |     2 4];
 79 | O = [0 0;
 80 |     0 4;
 81 |     1.5 4;
 82 |     1.5 0;
 83 |     0 0];
 84 | P = [0 0;
 85 |     0 4;
 86 |     1.5 4;
 87 |     1.5 2;
 88 |     0 2];
 89 | Q = [1.5 0.25;
 90 |     0 0.25;
 91 |     0 4;
 92 |     1.5 4;
 93 |     1.5 0.25;
 94 |     1.75 0;
 95 |     1.25 0.5];
 96 | R = [0 0;
 97 |     0 4;
 98 |     1.5 4;
 99 |     1.5 2;
100 |     0 2;
101 |     1.5 0];
102 | S = [0 0;
103 |     1.5 0;
104 |     1.5 2;
105 |     0 2;
106 |     0 4;
107 |     1.5 4];
108 | T = [1 0;
109 |     1 4;
110 |     0 4;
111 |     2 4];
112 | U = [0 4;
113 |     0 0;
114 |     2 0;
115 |     2 4];
116 | V = [0 4;
117 |     1 0;
118 |     2 4];
119 | W = [0 4;
120 |     0.25 0;
121 |     1 2;
122 |     1.75 0;
123 |     2 4];
124 | X = [0 4;
125 |     2 0;
126 |     1 2;
127 |     2 4;
128 |     0 0];
129 | Y = [0 0;
130 |     2 4;
131 |     1 2;
132 |     0 4];
133 | Z = [2 0;
134 |     0 0;
135 |     2 4;
136 |     0 4];
137 | alphabet = {A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P,Q,R,S,T,U,V,W,X,Y,Z};
138 | clear A B C D E F G H I J K L M N O P Q R S T U V W X Y Z


--------------------------------------------------------------------------------
/D3/exercise_1/exercise_1.js:
--------------------------------------------------------------------------------
 1 | // set the dimensions and margins of the graph
 2 | var outerWidth = 650;
 3 | var outerHeight = 300;
 4 | 
 5 | var margin = {top: 20, right: 20, bottom: 70, left: 100},
 6 |     width = outerWidth - margin.left - margin.right - 20,
 7 |     height = outerHeight - margin.top - margin.bottom;
 8 | 
 9 | // set the ranges
10 | var x = d3.scaleLinear()
11 |     .range([0, width]);
12 |       
13 | var y = d3.scaleBand()
14 |     .range([height, 0])
15 |     .padding(0.33);
16 | 
17 | var xAxis = d3.axisTop(x)
18 |     .ticks(5)
19 | 
20 | var yAxis = d3.axisLeft(y)
21 |     .tickFormat('')
22 | 
23 | // append the svg object to the body of the page
24 | // append a 'group' element to 'svg'
25 | // moves the 'group' element to the top left margin
26 | var svg = d3.select('body').append('svg')
27 |     .attr("class", "chart")
28 |     .attr("width", outerWidth)
29 |     .attr("height", outerHeight)
30 |   .append("g")
31 |     .attr("transform", `translate(${margin.left},${margin.top})`);
32 |  
33 | // data 
34 | var data = [{'team':'Boston','value':100},
35 |         {'team':'Detroit','value':85},
36 |         {'team':'New York','value':80},
37 |         {'team':'Atlanta','value':75}, 
38 |         {'team':'Chicago','value':30}]
39 | 
40 | // scale the range of the data in the domains 
41 | x.domain([0, d3.max(data, d => d.value)])
42 | y.domain(data.map(d => d.team));
43 | 
44 | // append the rectangles for the bar chart
45 | var bar = svg.selectAll(".bar")
46 |   .data(data)
47 |   .join("g")
48 |     .attr("class","bar")
49 | 
50 | var rect = bar.append('rect')
51 |     .attr("width", d => x(d.value))
52 |     .attr("y", d => y(d.team))
53 |     .attr("height", y.bandwidth())
54 |     .attr("x", 0)
55 |     .style('fill', d => d3.interpolatePurples(d.value/100))
56 | 
57 | // add the x Axis
58 | svg.append("g")
59 |     .attr("transform", `translate(0, ${height})`)
60 |     .call(d3.axisBottom(x));
61 | 
62 | // add the y Axis
63 | svg.append("g")
64 |     .call(d3.axisLeft(y));
65 | 
66 | // add chart labels 
67 | labels = svg.append('g')
68 |     .attr('class', 'label')
69 | 
70 | // x label
71 | labels.append('text')
72 |     .attr('transform', `translate(${width/2},250)`)
73 |     .text('Wins')
74 | 
75 | // y label
76 | ylabel = labels.append('text')
77 |     .attr('transform', `translate(-65,${height/2}) rotate(-90)`) 
78 |     .text('Teams')
79 | 
80 | barLabels = bar.append('text')
81 |     .attr('class', 'barlabel')
82 |     .attr('x', d => x(d.value) - 20)
83 |     .attr('y', d => y(d.team) + (y.bandwidth()/2) + 4)
84 |     .text(d => d.value)
85 |     .style('fill', 'black')
86 | 
87 | 
88 | 
89 | 
90 |     
91 | 
92 | 
93 | 


--------------------------------------------------------------------------------
/D3/exercise_2/exercise_2.js:
--------------------------------------------------------------------------------
 1 | // set the dimensions and margins of the graph
 2 | var outerWidth = 960;
 3 | var outerHeight = 500;
 4 | 
 5 | var margin = {top: 50, right: 20, bottom: 80, left: 80},
 6 |     width = outerWidth - margin.left - margin.right,
 7 |     height = outerHeight - margin.top - margin.bottom;
 8 | 
 9 | // set the ranges
10 | var x = d3.scaleBand()
11 |     .range([0, width])
12 |     .padding(0.33);
13 | 
14 | var y= d3.scaleLinear()
15 |     .range([height, 0]);
16 |     
17 | 
18 | var xAxis = d3.axisTop(x)
19 |     .ticks(5)
20 | 
21 | var yAxis = d3.axisLeft(y)
22 |     .tickFormat('')
23 | 
24 | // append the svg object to the body of the page
25 | // append a 'group' element to 'svg'
26 | // moves the 'group' element to the top left margin
27 | var svg = d3.select('body').append('svg')
28 |     .attr("class", "chart")
29 |     .attr("width", outerWidth)
30 |     .attr("height", outerHeight)
31 |   .append("g")
32 |     .attr("transform", `translate(${margin.left},${margin.top})`);
33 |  
34 | // data 
35 | var data = [{'team':'Boston','value':100},
36 |         {'team':'Detroit','value':85},
37 |         {'team':'New York','value':80},
38 |         {'team':'Atlanta','value':75}, 
39 |         {'team':'Chicago','value':30}]
40 | 
41 | 
42 | // scale the range of the data in the domains 
43 | x.domain(data.map(d => d.team));
44 | y.domain([0, d3.max(data, d => d.value)])
45 | 
46 | 
47 | // append the rectangles for the bar chart
48 | var bar = svg.selectAll(".bar")
49 |     .data(data)
50 |     .join("g")
51 |         .attr("class","bar")
52 | 
53 | 
54 | 
55 | var rect = bar.append('rect')
56 |     .attr("height", d => height - y(d.value))
57 |     .attr("x", d => x(d.team))
58 |     .attr("width", x.bandwidth())
59 |     .attr("y", d => y(d.value))
60 |     .style('fill', d => d3.interpolatePurples(d.value/100));
61 | 
62 | // add the x Axis
63 | svg.append("g")
64 |     .attr("transform", `translate(0, ${height})`)
65 |     .call(d3.axisBottom(x));
66 | 
67 | // add the y Axis
68 | svg.append("g")
69 |     .call(d3.axisLeft(y));
70 | 
71 | // add chart labels 
72 | labels = svg.append('g')
73 |     .attr('class', 'label')
74 | 
75 | // x label
76 | labels.append('text')
77 |     .attr('transform', `translate(${width/2},450)`)
78 |     .text('Teams')
79 | 
80 | // y label
81 | ylabel = labels.append('text')
82 |     .attr('transform', `translate(-45,${height/2}) rotate(-90)`) 
83 |     .text('Wins')
84 | 
85 | barLabels = bar.append('text')
86 |     .attr('class', 'barlabel')
87 |     .attr('x', d => x(d.team) + (x.bandwidth()/2)) 
88 |     .attr('y', d => y(d.value) - 15)
89 |     .text(d => d.value)
90 |     .style('fill', 'black')
91 | 


--------------------------------------------------------------------------------
/D3/exercise_1/solution/solution_1.js:
--------------------------------------------------------------------------------
 1 | // set the dimensions and margins of the graph
 2 | var outerWidth = 960;
 3 | var outerHeight = 500;
 4 | 
 5 | var margin = {top: 50, right: 20, bottom: 80, left: 80},
 6 |     width = outerWidth - margin.left - margin.right,
 7 |     height = outerHeight - margin.top - margin.bottom;
 8 | 
 9 | // set the ranges
10 | var x = d3.scaleBand()
11 |     .range([0, width])
12 |     .padding(0.33);
13 | 
14 | var y= d3.scaleLinear()
15 |     .range([height, 0]);
16 |     
17 | 
18 | var xAxis = d3.axisTop(x)
19 |     .ticks(5)
20 | 
21 | var yAxis = d3.axisLeft(y)
22 |     .tickFormat('')
23 | 
24 | // append the svg object to the body of the page
25 | // append a 'group' element to 'svg'
26 | // moves the 'group' element to the top left margin
27 | var svg = d3.select('body').append('svg')
28 |     .attr("class", "chart")
29 |     .attr("width", outerWidth)
30 |     .attr("height", outerHeight)
31 |   .append("g")
32 |     .attr("transform", `translate(${margin.left},${margin.top})`);
33 |  
34 | // data 
35 | var data = [{'team':'Boston','value':100},
36 |         {'team':'Detroit','value':85},
37 |         {'team':'New York','value':80},
38 |         {'team':'Atlanta','value':75}, 
39 |         {'team':'Chicago','value':30}]
40 | 
41 | 
42 | // scale the range of the data in the domains 
43 | x.domain(data.map(d => d.team));
44 | y.domain([0, d3.max(data, d => d.value)])
45 | 
46 | 
47 | // append the rectangles for the bar chart
48 | var bar = svg.selectAll(".bar")
49 |     .data(data)
50 |     .join("g")
51 |         .attr("class","bar")
52 | 
53 | 
54 | 
55 | var rect = bar.append('rect')
56 |     .attr("height", d => height - y(d.value))
57 |     .attr("x", d => x(d.team))
58 |     .attr("width", x.bandwidth())
59 |     .attr("y", d => y(d.value))
60 |     .style('fill', d => d3.interpolatePurples(d.value/100));
61 | 
62 | // add the x Axis
63 | svg.append("g")
64 |     .attr("transform", `translate(0, ${height})`)
65 |     .call(d3.axisBottom(x));
66 | 
67 | // add the y Axis
68 | svg.append("g")
69 |     .call(d3.axisLeft(y));
70 | 
71 | // add chart labels 
72 | labels = svg.append('g')
73 |     .attr('class', 'label')
74 | 
75 | // x label
76 | labels.append('text')
77 |     .attr('transform', `translate(${width/2},450)`)
78 |     .text('Teams')
79 | 
80 | // y label
81 | ylabel = labels.append('text')
82 |     .attr('transform', `translate(-45,${height/2}) rotate(-90)`) 
83 |     .text('Wins')
84 | 
85 | barLabels = bar.append('text')
86 |     .attr('class', 'barlabel')
87 |     .attr('x', d => x(d.team) + (x.bandwidth()/2)) 
88 |     .attr('y', d => y(d.value) - 15)
89 |     .text(d => d.value)
90 |     .style('fill', 'black')
91 | 


--------------------------------------------------------------------------------
/NLP/img/pipeline.svg:
--------------------------------------------------------------------------------
 1 | <svg class="o-svg" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 923 200" width="923" height="200">
 2 |     <style>
 3 |         .svg__pipeline__text { fill: #1a1e23; font: 20px Arial, sans-serif }
 4 |         .svg__pipeline__text-small { fill: #1a1e23; font: bold 18px Arial, sans-serif }
 5 |         .svg__pipeline__text-code { fill: #1a1e23; font: 600 16px Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace }
 6 |     </style>
 7 |     <rect width="601" height="127" x="159" y="21" fill="none" stroke="#09a3d5" stroke-width="3" rx="19.1" stroke-dasharray="3 6" ry="19.1"/>
 8 |     <path fill="#e1d5e7" stroke="#9673a6" stroke-width="2" d="M801 55h120v60H801z"/>
 9 |     <text class="svg__pipeline__text" dy="0.75em" width="28" height="19" transform="translate(846.5 75.5)">Doc</text>
10 |     <path fill="none" stroke="#999" stroke-width="2" stroke-miterlimit="10" d="M121.2 84.7h29.4"/>
11 |     <path fill="#999" stroke="#999" stroke-width="2" stroke-miterlimit="10" d="M156.6 84.7l-8 4 2-4-2-4z"/>
12 |     <path fill="#f5f5f5" stroke="#999" stroke-width="2" d="M1 55h120v60H1z"/>
13 |     <text class="svg__pipeline__text" dy="0.85em" width="34" height="22" transform="translate(43.5 73.5)">Text</text>
14 |     <path fill="none" stroke="#999" stroke-width="2" stroke-miterlimit="10" d="M760 84.7h33"/>
15 |     <path fill="#999" stroke="#999" stroke-width="2" stroke-miterlimit="10" d="M799 84.7l-8 4 2-4-2-4z"/>
16 |     <rect width="75" height="39" x="422" y="1" fill="#dae8fc" stroke="#09a3d5" stroke-width="2" rx="5.8" ry="5.8"/>
17 |     <text class="svg__pipeline__text-code" dy="0.8em" dx="0.1em" width="29" height="17" transform="translate(444.5 11.5)">nlp</text>
18 |     <path fill="#f8cecc" stroke="#b85450" stroke-width="2" stroke-miterlimit="10" d="M176 58h103.3L296 88l-16.8 30H176l16.8-30z"/>
19 |     <text class="svg__pipeline__text-small" dy="0.75em" dx="-0.25em" width="58" height="14" transform="translate(206.5 80.5)">tokenizer</text>
20 |     <path fill="#ffe6cc" stroke="#d79b00" stroke-width="2" stroke-miterlimit="10" d="M314 58h103.3L434 88l-16.8 30H314l16.8-30z"/>
21 |     <text class="svg__pipeline__text-small" dy="0.75em" dx="8" width="62" height="14" transform="translate(342.5 80.5)">tagger</text>
22 |     <path fill="none" stroke="#999" stroke-width="2" stroke-miterlimit="10" d="M296.5 88.2h24.7"/>
23 |     <path fill="#999" stroke="#999" stroke-width="2" stroke-miterlimit="10" d="M327.2 88.2l-8 4 2-4-2-4z"/>
24 |     <path fill="#ffe6cc" stroke="#d79b00" stroke-width="2" stroke-miterlimit="10" d="M416 58h103.3L536 88l-16.8 30H416l16.8-30z"/>
25 |     <text class="svg__pipeline__text-small" dy="0.75em" dx="-0.25em" width="40" height="14" transform="translate(455.5 80.5)">parser</text>
26 |     <path fill="#ffe6cc" stroke="#d79b00" stroke-width="2" stroke-miterlimit="10" d="M519 58h103.3L639 88l-16.8 30H519l16.8-30z"/>
27 |     <text class="svg__pipeline__text-small" dy="0.75em" dx="8" width="40" height="14" transform="translate(558.5 80.5)">ner</text>
28 |     <path fill="#ffe6cc" stroke="#d79b00" stroke-width="2" stroke-miterlimit="10" d="M622 58h103.3L742 88l-16.8 30H622l16.8-30z"/>
29 |     <text class="svg__pipeline__text-small" dy="0.75em" dx="8" width="20" height="14" transform="translate(671.5 80.5)">...</text>
30 | </svg>
31 | 


--------------------------------------------------------------------------------
/pyspark/sample.csv:
--------------------------------------------------------------------------------
 1 | RxDevice,FileId,TxDevice,Gentime,TxRandom,MsgCount,DSecond,Latitude,Longitude,Elevation,Speed,Heading,Ax,Ay,Az,Yawrate,PathCount,RadiusOfCurve,Confidence
 2 | 30,950898,30,286304909792863,0,29,3700,42.285103,-83.813293,253.8,0.0,26.799999,0.44999999,0.30000001,-10.0,1.22,6,3276.7,100
 3 | 30,950898,30,286304909892863,0,30,3800,42.285103,-83.813293,253.8,0.40000001,26.799999,0.38,0.22,-10.0,1.46,6,3276.7,100
 4 | 30,950898,30,286304909992863,0,31,3900,42.285103,-83.813293,253.7,0.5,26.799999,0.38,0.22,-10.0,1.46,6,3276.7,100
 5 | 30,950898,30,286304910092861,0,32,4000,42.285103,-83.813293,253.7,0.62,26.799999,0.52999997,0.30000001,-10.0,1.95,6,3276.7,100
 6 | 30,950898,30,286304910193010,0,33,4100,42.285107,-83.813293,253.7,0.72000003,26.799999,0.69,0.38,-10.0,2.4400001,6,3276.7,100
 7 | 30,950898,30,286304910292864,0,34,4200,42.285107,-83.813293,253.7,0.83999997,26.799999,0.83999997,0.30000001,-10.0,2.4400001,6,3276.7,100
 8 | 30,950898,30,286304910392995,0,35,4300,42.285107,-83.813293,253.7,0.98000002,26.799999,1.0,0.38,-10.0,3.1700001,6,3276.7,100
 9 | 30,950898,30,286304910492990,0,36,4400,42.285107,-83.813293,253.7,1.14,26.799999,1.23,0.44999999,-10.0,3.9000001,6,16.799999,100
10 | 30,950898,30,286304910593130,0,37,4500,42.285107,-83.813293,253.7,1.28,26.799999,1.3099999,0.44999999,-10.0,4.1500001,6,17.700001,100
11 | 30,950898,30,286304910693004,0,38,4600,42.28511,-83.813293,253.7,1.4400001,29.35,1.39,0.52999997,-10.0,4.8800001,6,18.299999,78
12 | 30,950898,30,286304910792863,0,39,4700,42.28511,-83.813286,253.7,1.6,29.924999,1.23,0.52999997,-10.0,5.6100001,6,18.700001,52
13 | 30,950898,30,286304910892982,0,40,4800,42.28511,-83.813286,253.7,1.78,32.299999,1.39,0.69,-10.0,6.3400002,6,18.9,45
14 | 30,950898,30,286304910992863,0,41,4900,42.285114,-83.813286,253.7,1.98,32.487499,1.39,0.69,-10.0,6.8200002,6,19.0,42
15 | 30,950898,30,286304911092864,0,42,5000,42.285114,-83.813286,253.7,2.1800001,33.637501,1.7,0.75999999,-10.0,7.8000002,6,18.9,39
16 | 30,950898,30,286304911192872,0,43,5100,42.285118,-83.813278,253.7,2.3599999,34.450001,1.7,0.75999999,-10.0,8.3000002,6,18.799999,38
17 | 30,950898,30,286304911292839,0,44,5200,42.285118,-83.813278,253.7,2.5599999,35.150002,1.77,0.92000002,-10.0,9.2700005,6,18.6,37
18 | 30,950898,30,286304911392900,0,45,5300,42.285122,-83.813278,253.7,2.76,35.137501,1.7,1.0,-10.0,10.0,6,18.4,36
19 | 30,950898,30,286304911492863,0,46,5400,42.285122,-83.813278,253.7,2.9400001,36.25,1.54,1.0,-10.0,10.73,6,18.1,36
20 | 30,950898,30,286304911592863,0,47,5500,42.285126,-83.813271,253.7,3.1600001,36.849998,1.46,1.3099999,-10.0,12.19,6,17.9,33
21 | 30,950898,30,286304911692841,0,48,5600,42.285126,-83.813271,253.60001,3.3599999,38.637501,1.3099999,1.54,-10.0,13.67,6,17.5,30
22 | 30,950898,30,286304911792896,0,49,5700,42.28513,-83.813271,253.60001,3.5,40.5,1.3099999,1.77,-10.0,14.64,6,17.200001,30
23 | 30,950898,30,286304911892840,0,50,5800,42.285133,-83.813263,253.60001,3.6800001,41.799999,1.15,1.7,-10.0,16.110001,6,16.799999,28
24 | 30,950898,30,286304911992904,0,51,5900,42.285133,-83.813263,253.60001,3.8800001,44.275002,1.15,1.7,-10.0,17.07,7,16.4,28
25 | 30,950898,30,286304912092843,0,52,6000,42.285137,-83.813255,253.60001,4.04,45.775002,1.23,1.46,-10.0,17.32,7,16.0,30
26 | 30,950898,30,286304912192874,0,53,6100,42.285137,-83.813255,253.60001,4.2199998,47.075001,1.15,2.0799999,-10.0,18.299999,7,15.7,31
27 | 


--------------------------------------------------------------------------------
/fusion-tables/Seattle_Parks.csv:
--------------------------------------------------------------------------------
 1 | "PMAID","LocID","ZIP.Code","address","icon"
 2 | 281,2545,98119,"1200 W Howe St Seattle 98119","ylw_circle"
 3 | 4159,2387,98144,"2821 12TH Ave S Seattle 98144","orange_diamond"
 4 | 4467,2382,98122,"564 12th Ave Seattle 98122","orange_diamond"
 5 | 4010,2546,98107,"4400 14th Ave NW Seattle 98107","ylw_circle"
 6 | 296,296,98112,"3001 E Madison St Seattle 98112","grn_stars"
 7 | 1000001,0,98199,"32nd Ave W Seattle 98199","donut"
 8 | 3158,2378,98117,"606  NW 76th St Seattle 98117","orange_diamond"
 9 | 4404,2533,98103,"723 N 35th St Seattle 98103","ylw_circle"
10 | 1000002,0,98118,"Lake Washington Blvd S & S Adams St Seattle 98118","donut"
11 | 244,1886,98125,"12526 27th Ave NE Seattle 98125","orange_diamond"
12 | 445,1888,98116,"1702 Alki Ave SW Seattle 98116","orange_diamond"
13 | 446,1049,98116,"5817 SW Lander St Seattle 98116","ltblu_square"
14 | 3914,1891,98122,"1504 34TH Ave Seattle 98122","orange_diamond"
15 | 426,1892,98144,"2000 Martin Luther King Jr Way S Seattle 98144","orange_diamond"
16 | 2927,1894,98116,"4000 Beach Dr SW Seattle 98116","orange_diamond"
17 | 1556,1898,98199,"3431 Arapahoe Pl W Seattle 98199","orange_diamond"
18 | 485,1907,98146,"4120 Arroyo Dr SW Seattle 98146","orange_diamond"
19 | 4081,1908,98118,"8702 Seward Park Ave S Seattle 98118","orange_diamond"
20 | 4243,2541,98144,"1501 21st Ave S Seattle 98144","ylw_circle"
21 | 241,2552,98103,"4020 Fremont Ave N Seattle 98103","ylw_circle"
22 | 4006,1910,98102,"2548 Delmar Dr E Seattle 98102","orange_diamond"
23 | 2840,1911,98117,"8347 14th Ave NW Seattle 98117","orange_diamond"
24 | 4278,1913,98107,"5701 22nd Ave NW Seattle 98107","orange_diamond"
25 | 4428,1914,98107,"1702 nw 62nd St Seattle 98107","orange_diamond"
26 | 497,1108,98107,"2644 NW 60th St Seattle 98107","ltblu_square"
27 | 4073,2553,98115,"7802 Banner Way NE Seattle 98115","ylw_circle"
28 | 3703,1919,98116,"6425 SW Admiral Way Seattle 98116","orange_diamond"
29 | 303,1110,98199,"2614 24th Ave W Seattle 98199","ltblu_square"
30 | 1000342,0,98119,"3rd Ave W & W Prospect St Seattle 98119","donut"
31 | 400,1074,98144,"1902 13th Ave S Seattle 98144","ltblu_square"
32 | 3119,1923,98104,"1110 S Dearborn St Seattle 98104","orange_diamond"
33 | 4481,1075,98125,"5th Ave NE & NE 103rd St Seattle 98125","ltblu_square"
34 | 4028,0,98105,"5809 15th Ave NE Seattle 98105","donut"
35 | 440,1925,98118,"8650 55th Ave S Seattle 98118","orange_diamond"
36 | 4472,2543,98121,"1st to 5th Ave on Bell St Seattle 98121","ylw_circle"
37 | 4022,1182,98102,"Bellevue Ave E & Bellevue Pl E Seattle 98102","ltblu_square"
38 | 4415,1186,98121,"2512 Elliott Ave Seattle 98121","ltblu_square"
39 | 346,1290,98102,"703 Belmont Pl E Seattle 98102","ltblu_square"
40 | 447,1188,98126,"3600 SW Admiral Way Seattle 98126","ltblu_square"
41 | 475,1189,98105,"3659 42nd Ave NE Seattle 98105","ltblu_square"
42 | 436,1077,98118,"9320 38th Ave S Seattle 98118","ltblu_square"
43 | 4245,1190,98122,"1401 23rd Ave S Seattle 98122","ltblu_square"
44 | 253,1191,98107,"5420 22nd Ave NW Seattle 98107","ltblu_square"
45 | 304,1193,98109,"1215 5th Ave N Seattle 98109","ltblu_square"
46 | 288,1113,98133,"13035 Linden Ave N Seattle 98133","ltblu_square"
47 | 4450,1194,98133,"14201 Linden Ave N Seattle 98133","ltblu_square"
48 | 3907,1196,98119,"513 W Olympic Pl Seattle 98119","ltblu_square"
49 | 4418,1198,98144,"1520 26th Ave S Seattle 98144","ltblu_square"
50 | 238,1199,98117,"1851 NW Blue Ridge Dr Seattle 98117","ltblu_square"
51 | 239,1200,98117,"Radford Ave NW & NW Milford Way Seattle 98117","ltblu_square"
52 | 


--------------------------------------------------------------------------------
/D3/exercise_3/exercise_3.js:
--------------------------------------------------------------------------------
  1 | // set the dimensions and margins of the graph
  2 | var outerWidth = 960;
  3 | var outerHeight = 500;
  4 | 
  5 | var margin = {top: 50, right: 20, bottom: 80, left: 80},
  6 |     width = outerWidth - margin.left - margin.right,
  7 |     height = outerHeight - margin.top - margin.bottom;
  8 | 
  9 | // set the ranges
 10 | var y= d3.scaleLinear()
 11 |     .range([height, 0]);
 12 |     
 13 | var x = d3.scaleBand()
 14 |     .range([0, width])
 15 |     .padding(0.33);
 16 | 
 17 | var xAxis = d3.axisTop(x)
 18 |     .ticks(5)
 19 | 
 20 | var yAxis = d3.axisLeft(y)
 21 |     .tickFormat('')
 22 | 
 23 | // append the svg object to the body of the page
 24 | // append a 'group' element to 'svg'
 25 | // moves the 'group' element to the top left margin
 26 | var svg = d3.select('body').append('svg')
 27 |     .attr("class", "chart")
 28 |     .attr("width", outerWidth)
 29 |     .attr("height", outerHeight)
 30 |   .append("g")
 31 |     .attr("transform", `translate(${margin.left},${margin.top})`);
 32 |  
 33 | // data 
 34 | var data = [{'team':'Boston','value':100},
 35 |         {'team':'Detroit','value':85},
 36 |         {'team':'New York','value':80},
 37 |         {'team':'Atlanta','value':75}, 
 38 |         {'team':'Chicago','value':30}]
 39 | 
 40 | 
 41 | // scale the range of the data in the domains 
 42 | y.domain([0, d3.max(data, d => d.value)])
 43 | x.domain(data.map(d => d.team));
 44 | 
 45 | 
 46 | 
 47 | // append the rectangles for the bar chart
 48 | var bar = svg.selectAll(".bar")
 49 |     .data(data)
 50 |     .join("g")
 51 |         .attr("class","bar")
 52 | 
 53 | 
 54 | var rect = bar.append('rect')
 55 |     .attr("height", d => height - y(d.value))
 56 |     .attr("x", d => x(d.team))
 57 |     .attr("width", x.bandwidth())
 58 |     .attr("y", d => y(d.value))
 59 |     .style('fill', d => d3.interpolatePurples(d.value/100));
 60 | 
 61 | 
 62 | // add the x Axis
 63 | svg.append("g")
 64 |     .attr('class', 'xaxis')
 65 |     .attr("transform", `translate(0, ${height})`)
 66 |     .call(d3.axisBottom(x));
 67 | 
 68 | // add the y Axis
 69 | svg.append("g")
 70 |     .call(d3.axisLeft(y));
 71 | 
 72 | // add chart labels 
 73 | labels = svg.append('g')
 74 |     .attr('class', 'label')
 75 | 
 76 | // x label
 77 | labels.append('text')
 78 |     .attr('transform', `translate(${width/2},450)`)
 79 |     .text('Teams')
 80 | 
 81 | // y label
 82 | ylabel = labels.append('text')
 83 |     .attr('transform', `translate(-45,${height/2}) rotate(-90)`) 
 84 |     .text('Wins')
 85 | 
 86 | barLabels = bar.append('text')
 87 |     .attr('class', 'barlabel')
 88 |     .attr('x', d => x(d.team) + (x.bandwidth()/2)) 
 89 |     .attr('y', d => y(d.value) - 15)
 90 |     .text(d => d.value)
 91 |     .style('fill', 'black')
 92 | 
 93 | 
 94 | function updateAlpha() { 
 95 | 
 96 |     x.domain((data.map(d => d.team)).sort());
 97 | 
 98 |     bar.selectAll('rect')
 99 |         .attr("x", d => x(d.team))
100 | 
101 |     svg.select(".xaxis")
102 |         .call(d3.axisBottom(x));
103 |         
104 | 
105 |     bar.selectAll('.barlabel')
106 |         .attr('x', d => x(d.team) + (x.bandwidth()/2)) 
107 |     
108 | }
109 | 
110 | 
111 | function updateNum() { 
112 | 
113 |     data.sort((a,b) => d3.ascending(a.value, b.value))
114 |     
115 |     x.domain(data.map(d => d.team));
116 | 
117 |     bar.selectAll('rect')
118 |         .attr("x", d => x(d.team))
119 |     
120 |     svg.select(".xaxis")
121 |         .call(d3.axisBottom(x));
122 | 
123 |     bar.selectAll('.barlabel')
124 |         .attr('x', d => x(d.team) + (x.bandwidth()/2)) 
125 | 
126 | }
127 | 


--------------------------------------------------------------------------------
/D3/exercise_2/solution/solution_2.js:
--------------------------------------------------------------------------------
  1 | // set the dimensions and margins of the graph
  2 | var outerWidth = 960;
  3 | var outerHeight = 500;
  4 | 
  5 | var margin = {top: 50, right: 20, bottom: 80, left: 80},
  6 |     width = outerWidth - margin.left - margin.right,
  7 |     height = outerHeight - margin.top - margin.bottom;
  8 | 
  9 | // set the ranges
 10 | var x = d3.scaleBand()
 11 |     .range([0, width])
 12 |     .padding(0.33);
 13 | 
 14 | var y= d3.scaleLinear()
 15 |     .range([height, 0]);
 16 |     
 17 | 
 18 | var xAxis = d3.axisTop(x)
 19 |     .ticks(5)
 20 | 
 21 | var yAxis = d3.axisLeft(y)
 22 |     .tickFormat('')
 23 | 
 24 | // append the svg object to the body of the page
 25 | // append a 'group' element to 'svg'
 26 | // moves the 'group' element to the top left margin
 27 | var svg = d3.select('body').append('svg')
 28 |     .attr("class", "chart")
 29 |     .attr("width", outerWidth)
 30 |     .attr("height", outerHeight)
 31 |   .append("g")
 32 |     .attr("transform", `translate(${margin.left},${margin.top})`);
 33 |  
 34 | // data 
 35 | var data = [{'team':'Boston','value':100},
 36 |         {'team':'Detroit','value':85},
 37 |         {'team':'New York','value':80},
 38 |         {'team':'Atlanta','value':75}, 
 39 |         {'team':'Chicago','value':30}]
 40 | 
 41 | 
 42 | // scale the range of the data in the domains 
 43 | x.domain(data.map(d => d.team));
 44 | y.domain([0, d3.max(data, d => d.value)])
 45 | 
 46 | 
 47 | // append the rectangles for the bar chart
 48 | var bar = svg.selectAll(".bar")
 49 |     .data(data)
 50 |     .join("g")
 51 |         .attr("class","bar")
 52 | 
 53 | 
 54 | var rect = bar.append('rect')
 55 |     .attr("height", d => height - y(d.value))
 56 |     .attr("x", d => x(d.team))
 57 |     .attr("width", x.bandwidth())
 58 |     .attr("y", d => y(d.value))
 59 |     .style('fill', d => d3.interpolatePurples(d.value/100));
 60 | 
 61 | 
 62 | // add the x Axis
 63 | svg.append("g")
 64 |     .attr('class', 'xaxis')
 65 |     .attr("transform", `translate(0, ${height})`)
 66 |     .call(d3.axisBottom(x));
 67 | 
 68 | // add the y Axis
 69 | svg.append("g")
 70 |     .call(d3.axisLeft(y));
 71 | 
 72 | // add chart labels 
 73 | labels = svg.append('g')
 74 |     .attr('class', 'label')
 75 | 
 76 | // x label
 77 | labels.append('text')
 78 |     .attr('transform', `translate(${width/2},450)`)
 79 |     .text('Teams')
 80 | 
 81 | // y label
 82 | ylabel = labels.append('text')
 83 |     .attr('transform', `translate(-45,${height/2}) rotate(-90)`) 
 84 |     .text('Wins')
 85 | 
 86 | barLabels = bar.append('text')
 87 |     .attr('class', 'barlabel')
 88 |     .attr('x', d => x(d.team) + (x.bandwidth()/2)) 
 89 |     .attr('y', d => y(d.value) - 15)
 90 |     .text(d => d.value)
 91 |     .style('fill', 'black')
 92 | 
 93 | 
 94 | function updateAlpha() { 
 95 | 
 96 |     x.domain((data.map(d => d.team)).sort());
 97 | 
 98 |     bar.selectAll('rect')
 99 |         .attr("x", d => x(d.team))
100 | 
101 |     svg.select(".xaxis")
102 |         .call(d3.axisBottom(x));
103 |         
104 | 
105 |     bar.selectAll('.barlabel')
106 |         .attr('x', d => x(d.team) + (x.bandwidth()/2)) 
107 |     
108 |     
109 | 
110 | }
111 | 
112 | function updateNum() { 
113 | 
114 |     data.sort((a,b) => d3.ascending(a.value, b.value))
115 |     
116 |     x.domain(data.map(d => d.team));
117 | 
118 |     bar.selectAll('rect')
119 |         .attr("x", d => x(d.team))
120 |     
121 |     svg.select(".xaxis")
122 |         .call(d3.axisBottom(x));
123 | 
124 |     bar.selectAll('.barlabel')
125 |         .attr('x', d => x(d.team) + (x.bandwidth()/2)) 
126 | 
127 | }
128 | 


--------------------------------------------------------------------------------
/thematic-maps/snow_crashes.csv:
--------------------------------------------------------------------------------
 1 | County	January	February	March	April	May	June	July	August	September	October	November	December	Total
 2 | Alcona	7	7	0	1	0	0	0	0	0	0	5	1	21
 3 | Alger	8	17	5	0	0	0	0	0	0	2	4	1	37
 4 | Allegan	206	169	6	0	0	0	0	0	0	1	30	17	429
 5 | Alpena	16	20	1	0	0	0	0	0	0	1	9	5	52
 6 | Antrim	30	24	10	0	0	0	0	0	0	3	5	3	75
 7 | Arenac	11	5	1	3	0	0	0	0	0	0	7	3	30
 8 | Baraga	4	4	3	1	0	0	0	0	0	0	3	7	22
 9 | Barry	39	25	2	0	0	0	0	0	0	0	13	13	92
10 | Bay	83	41	24	0	1	0	1	0	0	0	22	16	188
11 | Benzie	11	26	3	0	0	0	0	0	0	0	9	1	50
12 | Berrien	282	432	33	0	0	0	0	0	0	0	62	41	850
13 | Branch	37	46	5	0	0	0	0	0	0	0	26	29	143
14 | Calhoun	195	106	7	0	0	0	0	0	0	0	54	74	436
15 | Cass	39	69	7	0	0	0	0	0	0	0	17	18	150
16 | Charlevoix	15	8	1	0	0	0	0	0	0	0	5	4	33
17 | Cheboygan	11	5	9	0	0	0	0	1	0	4	3	5	38
18 | Chippewa	47	27	21	1	0	0	0	0	0	0	7	6	109
19 | Clare	41	22	4	3	0	0	0	0	0	0	9	12	91
20 | Clinton	52	48	8	0	0	0	0	0	0	0	25	21	154
21 | Crawford	25	14	4	0	0	0	0	0	0	1	17	3	64
22 | Delta	12	18	8	2	0	0	0	0	0	0	6	2	48
23 | Dickinson	17	8	4	7	0	0	0	0	0	0	2	3	41
24 | Eaton	87	74	5	0	0	0	0	0	0	0	37	36	239
25 | Emmet	12	11	5	0	0	0	0	0	0	1	8	8	45
26 | Genesee	326	167	61	1	0	0	0	0	0	2	123	120	800
27 | Gladwin	8	9	1	0	0	0	0	0	0	0	8	1	27
28 | Gogebic	8	16	2	0	0	0	0	0	0	0	11	9	46
29 | Grand Traverse	139	201	18	3	0	0	0	0	0	0	33	39	433
30 | Gratiot	33	25	6	0	0	0	0	0	0	0	13	0	77
31 | Hillsdale	35	39	2	0	0	0	0	0	0	0	34	8	118
32 | Houghton	42	47	19	6	0	0	0	0	0	0	16	37	167
33 | Huron	32	25	9	2	0	0	0	0	0	0	11	1	80
34 | Ingham	241	143	32	2	0	0	0	0	1	0	100	84	603
35 | Ionia	62	54	6	1	0	0	0	0	0	0	12	9	144
36 | Iosco	11	8	2	1	0	0	0	0	0	0	7	10	39
37 | Iron	2	1	0	2	0	0	0	0	0	0	3	2	10
38 | Isabella	86	55	12	2	0	0	0	0	0	1	29	5	190
39 | Jackson	201	81	14	0	0	0	0	0	0	0	91	36	423
40 | Kalamazoo	350	261	12	2	0	0	0	0	0	2	59	131	817
41 | Kalkaska	36	12	1	0	0	0	0	0	0	2	6	3	60
42 | Kent	764	519	68	8	0	0	0	0	0	1	126	226	1,712
43 | Keweenaw	6	2	0	1	0	0	0	0	0	0	1	2	12
44 | Lake	11	8	2	0	0	0	0	0	0	0	5	7	33
45 | Lapeer	78	50	33	3	0	0	0	0	0	0	50	18	232
46 | Leelanau	14	18	4	0	0	0	0	0	0	0	8	6	50
47 | Lenawee	47	47	10	0	0	0	0	0	0	0	29	12	145
48 | Livingston	159	108	19	2	0	1	0	0	0	4	102	47	442
49 | Luce	6	5	6	0	0	0	0	0	0	0	5	5	27
50 | Mackinac	19	15	9	7	0	0	0	0	0	0	1	3	54
51 | Macomb	444	355	112	1	0	0	0	0	0	0	102	60	1,074
52 | Manistee	34	30	5	0	0	0	0	0	0	0	6	12	87
53 | Marquette	51	110	19	15	0	0	0	0	0	4	23	31	253
54 | Mason	58	62	10	0	0	0	0	0	0	0	7	4	141
55 | Mecosta	36	45	5	0	0	0	0	0	0	0	13	12	111
56 | Menominee	6	4	8	1	0	0	0	0	0	0	2	2	23
57 | Midland	56	22	12	2	0	0	0	0	0	1	15	8	116
58 | Missaukee	14	9	0	1	0	0	0	0	0	2	11	6	43
59 | Monroe	109	128	18	0	0	0	0	0	0	0	15	16	286
60 | Montcalm	50	35	3	2	0	0	0	0	0	0	19	8	117
61 | Montmorency	9	1	0	1	0	0	0	0	0	0	4	3	18
62 | Muskegon	249	287	13	0	0	0	0	0	0	0	23	7	579
63 | Newaygo	49	28	5	0	0	0	0	0	0	0	18	4	104
64 | Oakland	863	633	193	10	1	0	0	0	0	0	367	220	2,287
65 | Oceana	36	39	5	0	0	0	0	0	0	0	6	1	87
66 | Ogemaw	9	19	1	3	0	0	0	0	0	0	11	7	50
67 | Ontonagon	7	5	6	0	0	0	0	0	0	0	6	12	36
68 | Osceola	38	16	2	2	0	0	0	0	0	0	10	13	81
69 | Oscoda	8	3	1	0	0	0	0	0	0	0	7	1	20
70 | Otsego	55	12	9	4	0	0	0	0	0	0	25	9	114
71 | Ottawa	399	388	16	1	0	0	0	0	0	0	46	33	883
72 | Presque Isle	17	10	2	1	0	0	0	0	0	2	1	3	36
73 | Roscommon	14	8	4	5	0	0	0	0	0	1	11	7	50
74 | Saginaw	150	85	39	7	0	0	0	0	0	0	49	25	355
75 | St. Clair	93	65	36	3	0	0	0	0	0	1	23	23	244
76 | St. Joseph	39	45	4	0	0	0	0	0	0	0	20	13	121
77 | Sanilac	32	17	14	1	0	0	0	0	0	0	13	5	82
78 | Schoolcraft	13	11	0	2	0	0	0	0	0	1	1	1	29
79 | Shiawassee	40	41	4	0	0	0	0	0	0	0	26	12	123
80 | Tuscola	33	27	16	1	0	0	0	0	0	0	20	4	101
81 | Van Buren	134	155	4	0	0	0	0	0	0	0	33	31	357
82 | Washtenaw	308	233	42	1	0	0	1	0	0	0	123	25	733
83 | Wayne	1,143	877	182	2	1	0	1	0	0	1	217	131	2,555
84 | Wexford	29	55	13	5	0	0	0	0	0	0	12	13	127


--------------------------------------------------------------------------------
/D3/exercise_3/solution/solution_3.js:
--------------------------------------------------------------------------------
  1 | // set the dimensions and margins of the graph
  2 | var outerWidth = 960;
  3 | var outerHeight = 500;
  4 | 
  5 | var margin = {top: 50, right: 20, bottom: 80, left: 80},
  6 |     width = outerWidth - margin.left - margin.right,
  7 |     height = outerHeight - margin.top - margin.bottom;
  8 | 
  9 | // set the ranges
 10 | var y= d3.scaleLinear()
 11 |     .range([height, 0]);
 12 |     
 13 | var x = d3.scaleBand()
 14 |     .range([0, width])
 15 |     .padding(0.33);
 16 | 
 17 | var xAxis = d3.axisTop(x)
 18 |     .ticks(5)
 19 | 
 20 | var yAxis = d3.axisLeft(y)
 21 |     .tickFormat('')
 22 | 
 23 | // append the svg object to the body of the page
 24 | // append a 'group' element to 'svg'
 25 | // moves the 'group' element to the top left margin
 26 | var svg = d3.select('body').append('svg')
 27 |     .attr("class", "chart")
 28 |     .attr("width", outerWidth)
 29 |     .attr("height", outerHeight)
 30 |   .append("g")
 31 |     .attr("transform", `translate(${margin.left},${margin.top})`);
 32 |  
 33 | // data 
 34 | var data = [{'team':'Boston','value':100},
 35 |         {'team':'Detroit','value':85},
 36 |         {'team':'New York','value':80},
 37 |         {'team':'Atlanta','value':75}, 
 38 |         {'team':'Chicago','value':30}]
 39 | 
 40 | 
 41 | // scale the range of the data in the domains 
 42 | y.domain([0, d3.max(data, d => d.value)])
 43 | x.domain(data.map(d => d.team));
 44 | 
 45 | 
 46 | // append the rectangles for the bar chart
 47 | var bar = svg.selectAll(".bar")
 48 |     .data(data)
 49 |     .join("g")
 50 |         .attr("class","bar")
 51 | 
 52 | 
 53 | var rect = bar.append('rect')
 54 |     .attr("height", d => height - y(d.value))
 55 |     .attr("x", d => x(d.team))
 56 |     .attr("width", x.bandwidth())
 57 |     .attr("y", d => y(d.value))
 58 |     .style('fill', d => d3.interpolatePurples(d.value/100));
 59 | 
 60 | 
 61 | // add the x Axis
 62 | svg.append("g")
 63 |     .attr('class', 'xaxis')
 64 |     .attr("transform", `translate(0, ${height})`)
 65 |     .call(d3.axisBottom(x));
 66 | 
 67 | // add the y Axis
 68 | svg.append("g")
 69 |     .call(d3.axisLeft(y));
 70 | 
 71 | // add chart labels 
 72 | labels = svg.append('g')
 73 |     .attr('class', 'label')
 74 | 
 75 | // x label
 76 | labels.append('text')
 77 |     .attr('transform', `translate(${width/2},450)`)
 78 |     .text('Teams')
 79 | 
 80 | // y label
 81 | ylabel = labels.append('text')
 82 |     .attr('transform', `translate(-45,${height/2}) rotate(-90)`) 
 83 |     .text('Wins')
 84 | 
 85 | barLabels = bar.append('text')
 86 |     .attr('class', 'barlabel')
 87 |     .attr('x', d => x(d.team) + (x.bandwidth()/2)) 
 88 |     .attr('y', d => y(d.value) - 15)
 89 |     .text(d => d.value)
 90 |     .style('fill', 'black')
 91 | 
 92 | 
 93 | function updateAlpha() { 
 94 |     const T = 500
 95 | 
 96 |     x.domain((data.map(d => d.team)).sort());
 97 | 
 98 |     bar.selectAll('rect')
 99 |       .transition().duration(T)
100 |         .attr("x", d => x(d.team))
101 | 
102 |     svg.select(".xaxis")
103 |       .transition().duration(T)
104 |         .call(d3.axisBottom(x))
105 |         
106 |     bar.selectAll('.barlabel')
107 |       .transition().duration(T)
108 |         .attr('x', d => x(d.team) + (x.bandwidth()/2)) 
109 |     
110 | 
111 | }
112 | 
113 | function updateNum() { 
114 |     const T = 500
115 | 
116 |     data.sort((a,b) => d3.ascending(a.value, b.value));
117 | 
118 |     x.domain(data.map(d => d.team));
119 | 
120 |     bar.selectAll('rect')
121 |       .transition().duration(T)
122 |         .attr("x", d => x(d.team))
123 |     
124 |     svg.select(".xaxis")
125 |       .transition().duration(T)
126 |         .call(d3.axisBottom(x))
127 | 
128 |     bar.selectAll('.barlabel')
129 |       .transition().duration(T)
130 |         .attr('x', d => x(d.team) + (x.bandwidth()/2)) 
131 | 
132 | }
133 | 
134 | 


--------------------------------------------------------------------------------
/D3/sortable.js:
--------------------------------------------------------------------------------
  1 | async function createChart() {
  2 |     
  3 |   // read data
  4 |   const fileLocation = 'https://gist.githubusercontent.com/caocscar/8cdb75721ea4f6c8a032a00ebc73516c/raw/854bbee2faffb4f6947b6b6c2424b18ca5a8970e/mlb2018.csv'
  5 |   DATA = await d3.csv(fileLocation, type)
  6 |   let chartDate = new Date(2018,3,3)
  7 |   let data = filterData(chartDate)
  8 |   
  9 |   // margins
 10 |   let margin = {top: 80, right: 90, bottom: 30+50, left: 120},
 11 |     width = 900 - margin.left - margin.right,
 12 |     height = 1500 - margin.top - margin.bottom; // 760
 13 | 
 14 |   // svg setup
 15 |   let svg = d3.select('body').append('svg')
 16 |       .attr("class", "chart")
 17 |       .attr("width", width + margin.left + margin.right)
 18 |       .attr("height", height + margin.top + margin.bottom)
 19 |     .append("g")
 20 |       .attr("transform", `translate(${margin.left},${margin.top})`);
 21 | 
 22 |   // set up scales
 23 |   let y = d3.scaleBand()
 24 |       .domain(data.map(d => d.team).reverse())
 25 |       .range([height, 0])
 26 |       .padding(0.33)
 27 | 
 28 |   let x = d3.scaleLinear()
 29 |       .domain([0, Math.ceil(d3.max(data, d => d.value)/5)*5])
 30 |       .range([0, width]);
 31 |       
 32 |   // add axes
 33 |   let xAxis = d3.axisTop(x)
 34 |       .ticks(6)
 35 | 
 36 |   svg.append("g")
 37 |       .attr("class", "x axis")
 38 |       .call(xAxis);
 39 | 
 40 |   let yAxis = d3.axisLeft(y)
 41 |       .tickFormat('')
 42 | 
 43 |   svg.append("g")
 44 |       .attr("class", "y axis")
 45 |       .call(yAxis);
 46 | 
 47 |   // add the x-axis gridlines
 48 |   let gridlines = d3.axisTop(x)
 49 |       .ticks(6)
 50 |       .tickSize(-height)
 51 |       .tickFormat("")
 52 | 
 53 |   svg.append("g")			
 54 |       .attr("class", "grid")
 55 |       .call(gridlines)
 56 | 
 57 |   // set up bar groups
 58 |   let bar = svg.selectAll(".bar")
 59 |     .data(data)
 60 |     .join("g")
 61 |       .attr("class", "bar")
 62 |       .attr("transform", d => `translate(0,${y(d.team)})`)
 63 | 
 64 |   // adding bars
 65 |   let rects = bar.append('rect')
 66 |       .attr("width", (d,i) => x(d.value))
 67 |       .attr("height", y.bandwidth())
 68 |       .style('fill', d => d3.interpolateRdYlBu(d.value/100))
 69 | 
 70 |   // team labels
 71 |   bar.append('text')
 72 |       .attr('class', 'team')
 73 |       .attr('x', -10)
 74 |       .attr('y', y.bandwidth()/2 + 5)
 75 |       .text(d => d.team)
 76 | 
 77 |   // team logos
 78 |   const imgsize = 40
 79 |   let imgs = bar.append("svg:image")
 80 |       .attr('class', 'logo')
 81 |       .attr('x', d => x(d.value) + 5)
 82 |       .attr('y', -5)
 83 |       .attr('width', imgsize)
 84 |       .attr('height', imgsize)
 85 |       .attr("xlink:href", d => `http://www.capsinfo.com/images/MLB_Team_Logos/${urls[d.team]}.png`)
 86 |   
 87 |   // bar labels
 88 |   let barLabels = bar.append('text')
 89 |       .attr('class', 'barlabel')
 90 |       .attr('x', d => x(d.value) + 10 + imgsize)
 91 |       .attr('y', y.bandwidth()/2 + 5)
 92 |       .text(d => d.value)
 93 | 
 94 |   // other chart labels
 95 |   labels = svg.append('g')
 96 |       .attr('class', 'label')
 97 | 
 98 |   // x label
 99 |   labels.append('text')
100 |       .attr('transform', `translate(${width},-40)`)
101 |       .text('Wins')
102 | 
103 |   // y label
104 |   ylabel = labels.append('text')
105 |       .attr('transform', `translate(-80,${height/2}) rotate(-90)`) // order matters
106 |       .text('Teams')
107 | 
108 |   // date label
109 |   const formatDate = d3.timeFormat('%b %-d')
110 |   let dateLabel = labels.append('text')
111 |       .attr('id', 'date')
112 |       .attr('transform', 'translate(0,-40)')
113 |       .text(formatDate(chartDate))
114 | 
115 |   labels.append('text')
116 |       .attr('id', 'season')
117 |       .attr('transform', `translate(${width/2},-40)`)
118 |       .text('MLB 2018 Season')
119 | 
120 |   // clipping rectangle
121 |   const z = 0.97*(height / data.length)
122 |   d3.select('.chart').append("defs")
123 |     .append("clipPath")
124 |       .attr("id", "clip")
125 |     .append("rect")
126 |       .attr('x', 0)
127 |       .attr('y', 0)
128 |       .attr("width", width + margin.left + margin.right)
129 |       .attr("height", 0.4*height)    
130 | 
131 |   // sorting transition
132 |   const T = 300
133 |   let dailyUpdate = setInterval(function() {
134 | 
135 |     chartDate = d3.timeDay.offset(chartDate,1)
136 |     dateLabel.text(formatDate(chartDate))
137 |     data = filterData(chartDate)
138 | 
139 |     // update x-axis
140 |     x.domain([0, Math.ceil(d3.max(data, d => d.value)/5)*5]);
141 |     svg.select('.x.axis').transition().duration(T)
142 |         .call(xAxis);
143 |     svg.select('.grid').transition().duration(T)
144 |         .call(gridlines);
145 | 
146 |     // update bar chart
147 |     rects.data(data)
148 |       .transition().duration(T)
149 |         .attr("width", d => x(d.value))
150 |         .style('fill', d => d3.interpolateRdYlBu(d.value/100))
151 |     imgs.data(data)
152 |       .transition().duration(T)
153 |         .attr('x', d => x(d.value) + 5)
154 |     barLabels.data(data)
155 |       .transition().duration(T)
156 |         .attr('x', d => x(d.value) + 10 + imgsize)
157 |         .text(d => d.value)
158 |     
159 |     // sort data
160 |     data.sort((a,b) => d3.descending(a.value,b.value));
161 | 
162 |     // update y-axis
163 |     y.domain(data.map(d => d.team).reverse());
164 |     bar.transition().duration(T)
165 |         .attr("transform", d => `translate(0,${y(d.team)})`)
166 | 
167 |     // exit function
168 |     if (chartDate > new Date(2018,9,1)) {
169 |       clearInterval(dailyUpdate)
170 |     }
171 | 
172 |   }, T);
173 | 
174 | }
175 | 
176 | function type(d) {
177 |   const formatDate = d3.timeParse('%Y%m%d')
178 |   d.date = formatDate(d.date)
179 |   return d
180 | }
181 | 
182 | function filterData(chartDate) {
183 |   const snapshot = DATA.filter(d => d.date <= chartDate)
184 |   const wins = d3.rollup(snapshot, v => v.length, d => d.team) // returns Map object
185 |   return Array.from(wins, ([key, value]) => ({'team':key, 'value':value}))
186 | }


--------------------------------------------------------------------------------
/sql-intermediate/README.md:
--------------------------------------------------------------------------------
  1 | # Intermediate SQL
  2 | 
  3 | Here is the [Google Slide Deck](https://docs.google.com/presentation/d/1sx7FL58BHbzPWb59Tq1S38QBL1KjNEjse3IyqK4nohY/edit?usp=sharing) for the workshop.
  4 | 
  5 | Link to web-based database [db-fiddle](https://www.db-fiddle.com) for practicing SQL.
  6 | 
  7 | Link to the [Covid dataset](https://gist.github.com/caocscar/b9a1418e5fd9c2cd69bb6f9d67fbc05a) for the exercises.
  8 | <hr>
  9 | 
 10 | ## Workshop Material
 11 | Query Syntax Covered:
 12 | - IF
 13 | - CASE
 14 | - WHEN
 15 | - ROLLUP
 16 | - GROUPING
 17 | - REPLACE
 18 | - OVER (Window Functions)
 19 | - RANK
 20 | - DENSE_RANK
 21 | - WINDOW
 22 | - PARTITION BY
 23 | - WITH (Common Table Expressions)
 24 | 
 25 | Schema Syntax Covered:
 26 | - CREATE TABLE
 27 | - INSERT
 28 | - DELETE
 29 | - DROP
 30 | - IF [NOT] EXISTS
 31 | - NOT NULL
 32 | - PRIMARY KEY
 33 | - AUTO_INCREMENT
 34 | - SHOW COLUMNS
 35 | - INSERT IGNORE INTO
 36 | - UNIQUE
 37 | - ALTER TABLE
 38 | - ADD COLUMN
 39 | - DROP COLUMN
 40 | - MODIFY COLUMN
 41 | - UPDATE
 42 | - INDEX
 43 | 
 44 | Miscellaneous Syntax:
 45 | - SHOW COLUMNS
 46 | - DESCRIBE
 47 | - SHOW TABLES
 48 | - SHOW INDEX
 49 | 
 50 | ## Appendix
 51 | <details>
 52 |   <summary>Solutions Hiding Here</summary>
 53 |   
 54 | #### Practice 1
 55 | ```SQL
 56 | SELECT County, Day, Deaths,
 57 |     CASE
 58 |         WHEN Deaths = 0 THEN -1
 59 |         WHEN Deaths = 1 THEN 0
 60 |         ELSE LOG(Deaths)
 61 |     END AS deathIndex
 62 | FROM Covid
 63 | ORDER BY deathIndex DESC
 64 | ```
 65 | 
 66 | #### Practice 2
 67 | ```SQL
 68 | SELECT IF(GROUPING(County), 'Total', County) as County,
 69 |     SUM(Deaths) AS Total
 70 | FROM Covid
 71 | GROUP BY County WITH ROLLUP
 72 | ```
 73 | 
 74 | #### Practice 2b  
 75 | ```SQL
 76 | SELECT 
 77 |     IF(GROUPING(County),'Michigan Total', IF(GROUPING(CP), 'County Total', County)) AS COUNTY,
 78 |     SUM(Deaths) AS DeathTotal,
 79 |     CP
 80 | FROM Covid
 81 | GROUP BY County, CP WITH ROLLUP
 82 | ```
 83 | 
 84 | #### Practice 3
 85 | ```SQL
 86 | SELECT REPLACE(County, "St", "Saint") AS County, 
 87 |     Day,
 88 |     Cases, 
 89 |     RANK() OVER (PARTITION BY Day ORDER BY Cases DESC) AS 'Rank'
 90 | FROM Covid
 91 | WHERE Day BETWEEN '2020-09-24' AND '2020-09-30'
 92 | AND County LIKE 'S%'
 93 | AND CP = 'Confirmed'
 94 | ```
 95 | 
 96 | #### Practice 3b
 97 | ```SQL
 98 | SELECT County, Day, Cases,
 99 |     LAG(Cases, 7) OVER (ORDER BY Day) As 'WeekAgo' 
100 | FROM Covid
101 | WHERE County = 'Wayne' AND CP = 'Confirmed'
102 | ORDER BY Day DESC
103 | ```
104 | 
105 | #### Practice 4
106 | ```SQL
107 | WITH cte AS
108 | (
109 |     SELECT Day, 
110 |     WEEK(Day) AS Week,
111 |     CP, 
112 |     SUM(Cases) as Total
113 |     FROM Covid
114 |     GROUP BY Day, CP
115 | )
116 | 
117 | SELECT Week, MAX(Total)
118 | FROM cte
119 | GROUP BY Week
120 | ```
121 | 
122 | #### Practice A
123 | ```SQL
124 | CREATE TABLE Michigan (
125 |     Category VARCHAR(6),
126 |     Value VARCHAR(7),
127 |     `Cases` INTEGER,
128 |     `Deaths` INTEGER,
129 |     `CaseFatalityRatio` FLOAT
130 | );
131 | 
132 | INSERT INTO Michigan
133 |     (Category, `Value`, Cases, `Deaths`, `CaseFatalityRatio`)
134 | VALUES
135 |     ('Gender', 'Female', '61390', '3212', '0.051'),
136 |     ('Gender', 'Male', '57956', '3511', '0.061'),
137 |     ('Gender', 'Unknown', '281', null, null);
138 | ```
139 | 
140 | #### Practice B
141 | ```SQL
142 | CREATE TABLE MI (
143 |     ID INT AUTO_INCREMENT,
144 |     `Day` VARCHAR(3),
145 |     `Category` VARCHAR(9),
146 |     `Value` VARCHAR(19) NOT NULL,
147 |     `Pct of Cases` FLOAT,
148 |     `Pct of Deaths` FLOAT,
149 |     PRIMARY KEY (ID)
150 | );
151 | 
152 | INSERT INTO MI
153 |     (`Day`, `Category`, `Value`, `Pct of Cases`, `Pct of Deaths`)
154 | VALUES
155 |     ('Sat', 'Ethnicity', 'Hispanic/Latino', '0.08', '0.03'),
156 |     ('Sat', 'Ethnicity', 'Non-Hispanic Latino', '0.69', '0.85'),
157 |     ('Sat', 'Ethnicity', 'Unknown', '0.23', '0.12');
158 | ```
159 | 
160 | #### Practice B2
161 | ```SQL
162 | INSERT INTO MI 
163 |     (Day, Value)
164 | VALUES
165 |     ('Sun', null);
166 | 
167 | INSERT INTO MI
168 |     (ID, Day, Value)
169 | VALUES
170 |     (3, 'Sun', 'Unknown');
171 | ```
172 | 
173 | #### Practice C
174 | ```SQL
175 | CREATE TABLE mi (
176 |     `Category` VARCHAR(3),
177 |     `Value` VARCHAR(8) UNIQUE,
178 |     `Cases` INTEGER,
179 |     `Deaths` INTEGER DEFAULT 0,
180 |     `CaseFatalityRatio` FLOAT DEFAULT 0
181 | );
182 | 
183 | INSERT INTO mi
184 |     (`Category`, `Value`, `Cases`)
185 | VALUES
186 |     ('Age', '0 to 19', '13342'),
187 |     ('Age', 'Unknown', '109');
188 |   
189 | INSERT INTO mi
190 | VALUES
191 |     ('Age', '20 to 29', '23038', '29', '0.001'),
192 |     ('Age', '30 to 39', '16858', '71', '0.004'),
193 |     ('Age', '40 to 49', '17345', '219', '0.013'),
194 |     ('Age', '50 to 59', '18393', '541', '0.029'),
195 |     ('Age', '60 to 69', '14656', '1188', '0.081'),
196 |     ('Age', '70 to 79', '9374', '1808', '0.193'),
197 |     ('Age', '80+', '8312', '2864', '0.345');
198 | ```
199 | 
200 | #### Practice D
201 | ```SQL
202 | -- Schema SQL window
203 | CREATE TABLE mi (
204 |     `Category` VARCHAR(3),
205 |     `Value` VARCHAR(8),
206 |     `Cases` INTEGER,
207 |     `Deaths` INTEGER,
208 |     `CaseFatalityRatio` FLOAT
209 | );
210 | 
211 | -- Query SQL window
212 | ALTER TABLE mi
213 | ADD COLUMN day VARCHAR(10);
214 | 
215 | ALTER TABLE mi
216 | DROP COLUMN Category,
217 | DROP COLUMN CaseFatalityRatio;
218 | 
219 | ALTER TABLE mi
220 | MODIFY COLUMN Cases VARCHAR(6);
221 | 
222 | DESCRIBE mi;
223 | ```
224 | 
225 | #### Practice E
226 | ```SQL
227 | -- Schema SQL window
228 | CREATE TABLE mi (
229 |     `Category` VARCHAR(3),
230 |     `Value` VARCHAR(8),
231 |     `Cases` INTEGER,
232 |     `Deaths` INTEGER,
233 |     `CaseFatalityRatio` FLOAT,
234 |     INDEX(Cases)
235 | );
236 | 
237 | INSERT INTO mi
238 |     (`Category`, `Value`, `Cases`)
239 | VALUES
240 |     ('Age', '0 to 19', '13342'),
241 |     ('Age', 'Unknown', '109');
242 |   
243 | INSERT INTO mi
244 | VALUES
245 |     ('Age', '20 to 29', '23038', '29', '0.001'),
246 |     ('Age', '30 to 39', '16858', '71', '0.004'),
247 |     ('Age', '40 to 49', '17345', '219', '0.013'),
248 |     ('Age', '50 to 59', '18393', '541', '0.029'),
249 |     ('Age', '60 to 69', '14656', '1188', '0.081'),
250 |     ('Age', '70 to 79', '9374', '1808', '0.193'),
251 |     ('Age', '80+', '8312', '2864', '0.345');
252 | 
253 | UPDATE mi
254 | SET Cases = 1400
255 | WHERE Deaths IS NULL;
256 | 
257 | UPDATE mi
258 | SET Deaths = 5, CaseFatalityRatio = 5
259 | WHERE Deaths IS NULL;
260 | 
261 | -- Query SQL window
262 | SELECT * FROM mi; 
263 | 
264 | DESCRIBE mi;
265 | SHOW INDEX FROM mi; -- Alternatively
266 | ```
267 | </details>
268 | 


--------------------------------------------------------------------------------
/thematic-maps/snowmobile_crashes.txt:
--------------------------------------------------------------------------------
  1 | Crash Instance	Worst Injury in Crash	Crash Longitude	Crash Latitude	Crash Report
  2 | 2015100675	B - nonincapacitating injury	-85.800058172968	46.18084431116	UD-10
  3 | 201510428	B - nonincapacitating injury	-84.789822248845	45.440443992449	UD-10
  4 | 2015104495	C - possible injury	-86.740535569118	46.321992916472	UD-10
  5 | 2015104570	No injury	-85.860376433662	42.738967409964	UD-10
  6 | 2015105193	A - incapacitating injury	-85.836188702043	44.772364145356	UD-10
  7 | 2015106068	A - incapacitating injury	-85.483634043104	41.875491838558	UD-10
  8 | 2015106317	C - possible injury	-85.703826610636	46.309510725084	UD-10
  9 | 201511928	B - nonincapacitating injury	-85.72090105647	44.425194108759	UD-10
 10 | 201511992	B - nonincapacitating injury	-85.403925590975	43.865777605951	UD-10
 11 | 201512223	A - incapacitating injury	-85.849874553891	43.267780456756	UD-10
 12 | 201512836	No injury	-84.164175021592	46.363569085717	UD-10
 13 | 201513939	C - possible injury	-85.918523915019	44.5339711822	UD-10
 14 | 201517334	C - possible injury	-89.012113811027	46.556852689282	UD-10
 15 | 201517688	B - nonincapacitating injury	-83.432389693911	43.628838813508	UD-10
 16 | 201517877	C - possible injury	-85.618124874516	43.58429598679	UD-10
 17 | 201518127	A - incapacitating injury	-84.551456482753	46.360820025168	UD-10
 18 | 201518864	C - possible injury	-83.360621330608	42.627193483883	UD-10
 19 | 201520780	No injury	-85.511744264408	44.67990727913	UD-10
 20 | 201522294	B - nonincapacitating injury	-84.970398918174	44.767687018835	UD-10
 21 | 201523414	No injury	-82.621439618102	42.933385579213	UD-10
 22 | 201526594	C - possible injury	-88.533675091365	46.131239666743	UD-10
 23 | 201526708	A - incapacitating injury	-83.36768691342	43.505122056952	UD-10
 24 | 201526895	B - nonincapacitating injury	-85.039215942012	42.770683368895	UD-10
 25 | 201527866	A - incapacitating injury	-85.493314923755	44.014436135557	UD-10
 26 | 201528011	A - incapacitating injury	-82.63914987732	42.67631835496	UD-10
 27 | 201530093	No injury	-85.790072380071	43.293003810025	UD-10
 28 | 201532310	A - incapacitating injury	-84.484066105388	45.648368506774	UD-10
 29 | 201533350	No injury	-82.995564714258	42.699455054772	UD-10
 30 | 201534571	C - possible injury	-84.673468724015	45.139137069932	UD-10
 31 | 201534662	A - incapacitating injury	-84.827226280307	44.209248111641	UD-10
 32 | 201535067	B - nonincapacitating injury	-85.692815232846	43.439001714006	UD-10
 33 | 201535324	B - nonincapacitating injury	-83.661044655626	43.235162164867	UD-10
 34 | 201538347	No injury	-86.096055097901	44.628892241637	UD-10
 35 | 201539281	C - possible injury	-88.002457730606	46.250241819486	UD-10
 36 | 201539615	No injury	-84.374993926826	44.79095528009	UD-10
 37 | 201539712	No injury	-85.942129992661	42.382238641889	UD-10
 38 | 201540144	C - possible injury	-85.973182443957	41.983623401445	UD-10
 39 | 201541015	No injury	-83.525079129041	44.793058607159	UD-10
 40 | 201541016	C - possible injury	-83.710905733952	44.696924236573	UD-10
 41 | 201541104	B - nonincapacitating injury	-85.009424739563	44.64090447431	UD-10
 42 | 201541854	B - nonincapacitating injury	-84.322411066044	45.387324215226	UD-10
 43 | 201542293	A - incapacitating injury	-85.951591969289	44.238197231663	UD-10
 44 | 201545186	Fatal	-86.494607186789	41.950675232664	UD-10
 45 | 201545288	A - incapacitating injury	-85.306164346805	44.109657752931	UD-10
 46 | 201545784	A - incapacitating injury	-84.620777432775	45.847477649052	UD-10
 47 | 201545792	C - possible injury	-84.848263534832	45.017716815188	UD-10
 48 | 201545795	No injury	-84.702912985489	45.018060357971	UD-10
 49 | 201546845	A - incapacitating injury	-84.759310704179	42.853609729043	UD-10
 50 | 20154704	B - nonincapacitating injury	-83.481240471033	44.283673517502	UD-10
 51 | 201547337	C - possible injury	-85.918375795088	44.223028999258	UD-10
 52 | 201547361	C - possible injury	-85.236701016576	44.374961333914	UD-10
 53 | 201547711	No injury	-84.922743136885	44.917229501134	UD-10
 54 | 201547712	No injury	-84.922743136885	44.917229501134	UD-10
 55 | 201547817	B - nonincapacitating injury	-85.739898404349	44.352626732308	UD-10
 56 | 201547819	C - possible injury	-85.443656094647	44.223078164062	UD-10
 57 | 201547844	B - nonincapacitating injury	-84.935026772683	45.045374974985	UD-10
 58 | 201548089	A - incapacitating injury	-84.381270106224	43.092019957408	UD-10
 59 | 201550275	C - possible injury	-85.595211453211	46.514654245914	UD-10
 60 | 201551026	C - possible injury	-87.974715203904	47.389606520347	UD-10
 61 | 201551511	No injury	-88.50942705195	47.170557499475	UD-10
 62 | 201552074	C - possible injury	-85.003918064235	45.012957328429	UD-10
 63 | 201552075	B - nonincapacitating injury	-84.897606029151	45.05936146032	UD-10
 64 | 201552150	No injury	-84.69118329622	44.99026939293	UD-10
 65 | 201552194	A - incapacitating injury	-84.874932044759	44.9206778492	UD-10
 66 | 201553269	B - nonincapacitating injury	-84.770386527415	44.68163592871	UD-10
 67 | 201553320	No injury	-84.625725808129	45.852029827979	UD-10
 68 | 201553435	No injury	-84.74444312737	44.684979251624	UD-10
 69 | 201554164	A - incapacitating injury	-85.4962039299	44.230400028562	UD-10
 70 | 201559413	C - possible injury	-85.493362139309	43.636153579887	UD-10
 71 | 20156263	No injury	-84.761345269475	45.51029059808	UD-10
 72 | 20156721	No injury	-85.852005942417	41.852770907367	UD-10
 73 | 201568314	No injury	-83.858746163969	42.808999449586	UD-10
 74 | 201568330	A - incapacitating injury	-84.441878016653	45.149464470839	UD-10
 75 | 201569422	No injury	-84.947995213508	46.343717610558	UD-10
 76 | 20157359	C - possible injury	-84.016825650758	43.292010680844	UD-10
 77 | 20157453	No injury	-85.534166957762	42.575186852924	UD-10
 78 | 201574886	Fatal	-85.936715440967	44.807284068035	UD-10
 79 | 201575072	No injury	-82.630236491114	42.976273592823	UD-10
 80 | 201576009	C - possible injury	-83.931191320393	42.635436115333	UD-10
 81 | 2015801	No injury	-85.105955942282	46.752426014342	UD-10
 82 | 201589723	A - incapacitating injury	-85.696619536659	44.12359535407	UD-10
 83 | 201589732	No injury	-85.842420683732	44.139639179498	UD-10
 84 | 201595228	A - incapacitating injury	-85.404277569817	46.139802071388	UD-10
 85 | 2015106335	A - incapacitating injury	-84.947996508111	46.345976028008	UD-10
 86 | 2015106339	No injury	-84.604978501798	46.425548352807	UD-10
 87 | 2015106353	B - nonincapacitating injury	-84.32223738124	46.456877270562	UD-10
 88 | 2015106490	No injury	-84.918080727658	43.928749506565	UD-10
 89 | 2015106713	No injury	-88.294464019266	46.097688013004	UD-10
 90 | 2015107643	A - incapacitating injury	-89.908625169185	46.498840237387	UD-10
 91 | 2015107658	No injury	-89.925542849832	46.481548610279	UD-10
 92 | 2015107831	A - incapacitating injury	-85.373505567305	44.613005050686	UD-10
 93 | 2015108064	C - possible injury	-84.735796271247	43.364629423323	UD-10
 94 | 2015108242	A - incapacitating injury	-88.183440163537	47.37799512391	UD-10
 95 | 2015108290	C - possible injury	-88.805348469807	46.928776615496	UD-10
 96 | 2015109219	B - nonincapacitating injury	-85.009024201918	43.83084235905	UD-10
 97 | 2015109388	No injury	-84.374498680557	42.395729350069	UD-10
 98 | 2015110669	No injury	-86.144147390131	43.260363792897	UD-10
 99 | 2015112610	No injury	-83.410101202069	43.157321985017	UD-10
100 | 2015112789	B - nonincapacitating injury	-84.286036393895	41.966878333666	UD-10
101 | 2015115235	No injury	-86.014850998275	46.657393323565	UD-10
102 | 2015115236	No injury	-86.552985349844	46.419702140502	UD-10
103 | 2015115353	A - incapacitating injury	-87.682822892849	46.498519598707	UD-10
104 | 2015129758	B - nonincapacitating injury	-85.427315822355	44.651533921877	UD-10
105 | 2015150184	No injury	-86.737587061959	46.329205017028	UD-10
106 | 2015162719	No injury	-83.615266056207	45.177822218761	UD-10
107 | 2015183045	No injury	-83.198361994143	42.392714724347	UD-10
108 | 2015183062	No injury	-83.202282094243	42.340796094387	UD-10
109 | 2015262225	B - nonincapacitating injury	-82.557578993695	42.948125965502	UD-10
110 | 2015264644	B - nonincapacitating injury	-85.771788811445	44.647472314438	UD-10
111 | 2015272401	No injury	-83.102913294015	42.296970881048	UD-10
112 | 2015300686	C - possible injury	-88.78415655252	46.969457635212	UD-10


--------------------------------------------------------------------------------
/regex/data/vins.txt:
--------------------------------------------------------------------------------
  1 | 1FDXK84N9FVA40100
  2 | 1G1AD5F56A7186931
  3 | 2C3CCAAG3EH301682
  4 | 2FMDK48C08BA77023
  5 | 3GCPCSE08BG284714
  6 | 3N1AB7AP8DL787925
  7 | 4A3AA46L9XE004326
  8 | 4S3BE645527203527
  9 | 5J6RM4H38EL039758
 10 | 5TFDV58128X062869
 11 | JH4CU2F67EC004754
 12 | JS2RD62H865350625
 13 | KL8CB6S95EC465118
 14 | KMHCT5AE1EU146656
 15 | SAJGX2747VC015715
 16 | SALAK2D40BA576362
 17 | WMWZB3C59CWM05007
 18 | WP0AB2A76BL061330
 19 | YH4K14AA0CA001707
 20 | YV4902DZ1D2400099
 21 |          UNKNOWN
 22 |  UNK
 23 |  UNK 
 24 |  UNKNOWN
 25 |  UU
 26 | """UNK"""
 27 | (UNKNOWN)
 28 | *****UNKNOWN****
 29 | 0
 30 | 0000
 31 | 0000000000
 32 | 00000000000
 33 | 000000000000
 34 | 0000000000000
 35 | 00000000000000
 36 | 000000000000000
 37 | 0000000000000000
 38 | 00000000000000000
 39 | 000000000000000000
 40 | 0000000000000000000
 41 | 00000000000000000000
 42 | 000000000000000000000
 43 | 0000000000000000000000
 44 | 00000000000000000000000
 45 | 000000000000000000000000
 46 | 0000000000000000000000000
 47 | 000000000000000000778
 48 | 00000000000000001
 49 | 00000000000000003
 50 | 00000000000000005
 51 | 0000000000000000A
 52 | 0000000000000000M
 53 | 00000000000000012
 54 | 000000000000000AA
 55 | 000000000000000PP
 56 | 000000000000000XX
 57 | 00000000000000UNK
 58 | 00000000000000VIN
 59 | 00000000000001748
 60 | 00000000000002172
 61 | 00000000000008038
 62 | 00000000000008309
 63 | 0000000000000JBS2
 64 | 0000000000000NONE
 65 | 00000000000010979
 66 | 00000000000012129
 67 | 00000000000013433
 68 | 00000000000013537
 69 | 0000000000001996F
 70 | 00000000000022817
 71 | 00000000000023341
 72 | 00000000000030026
 73 | 00000000000030119
 74 | 00000000000030778
 75 | 00000000000037060
 76 | 00000000000041673
 77 | 00000000000042165
 78 | 00000000000048043
 79 | 00000000000053013
 80 | 00000000000054135
 81 | 00000000000054495
 82 | 00000000000055585
 83 | 00000000000060096
 84 | 00000000000061301
 85 | 00000000000061685
 86 | 00000000000067828
 87 | 00000000000070704
 88 | 00000000000075859
 89 | 00000000000080253
 90 | 0000000000008267B
 91 | 00000000000083889
 92 | 00000000000089047
 93 | 00000000000089177
 94 | 00000000000091442
 95 | 000000000000B7117
 96 | 00000000000120391
 97 | 00000000000131398
 98 | 00000000000132795
 99 | 00000000000135157
100 | 0000000000014438R
101 | 00000000000145136
102 | 00000000000157643
103 | 00000000000160083
104 | 00000000000161438
105 | 00000000000212146
106 | 00000000000214027
107 | 00000000000215886
108 | 00000000000216263
109 | 00000000000236360
110 | 00000000000240057
111 | 00000000000261261
112 | 00000000000266392
113 | 00000000000266416
114 | 00000000000270141
115 | 00000000000310406
116 | 00000000000326687
117 | 00000000000330392
118 | 00000000000332045
119 | 00000000000336748
120 | 00000000000406812
121 | 00000000000408107
122 | 00000000000441256
123 | 00000000000461011
124 | 0000000000046824B
125 | 00000000000502513
126 | 0000000000069105R
127 | 0000000000078010B
128 | 00000000000808791
129 | 00000000000813695
130 | 00000000000819921
131 | 00000000000824779
132 | 0000000000086309B
133 | 00000000000881673
134 | 0000000000097686B
135 | 00000000000A25840
136 | 00000000000A71011
137 | 00000000000AAAAAA
138 | 00000000000B72851
139 | 00000000000DW4121
140 | 00000000000E06246
141 | 00000000000F46117
142 | 00000000000J27248
143 | 00000000000K40121
144 | 00000000000KLF220
145 | 00000000000NL1G9S
146 | 0000000000108126B
147 | 00000000001091643
148 | 00000000001221553
149 | 00000000001424201
150 | 0000000000172385B
151 | 0000000000175629B
152 | 00000000001828867
153 | 00000000002210345
154 | 00000000002308190
155 | 0000000000231332
156 | 00000000002438360
157 | 00000000002467113
158 | 0000000000253213M
159 | 00000000002625567
160 | 00000000002707600
161 | 00000000002751887
162 | 00000000002767946
163 | 00000000003102348
164 | 0000000000369842M
165 | 00000000003D20077
166 | 000000000054321NK
167 | 00000000006418561
168 | 00000000007043523
169 | 00000000008211412
170 | 00000000008218428
171 | 00000000008218469
172 | 00000000009203322
173 | 00000000009203327
174 | 00000000009M18007
175 | 0000000000B70013M
176 | 0000000000BB35178
177 | 0000000000BB95807
178 | 0000000000BC72673
179 | 0000000000BD48041
180 | 0000000000C672629
181 | 0000000000C694106
182 | 0000000000D8556RX
183 | 0000000000F200238
184 | 0000000000F943733
185 | 0000000000HL11240
186 | 0000000000J503014
187 | 0000000000KY33931
188 | 0000000000M130078
189 | 0000000000N50053M
190 | 0000000000OOOOOOO
191 | 0000000000S323158
192 | 0000000000UNKNOWN
193 | 0000001E161B50184
194 | 00000544A02800430
195 | 0000UNKNOWN
196 | 000UNKOWN00000000
197 | 000XXX00000000000
198 | 01010101010101010
199 | 02112250000000000
200 | 02264130000000000
201 | 02292660000000000
202 | 02380690000000000
203 | 02516
204 | 02733800000000000
205 | 03183380000000000
206 | 05119E1994XXXXXXX
207 | 05181134JIANGDONG
208 | 085544B0000000000
209 | 09999999999999999
210 | 0XXXXXXXXXXXX
211 | 100000000000
212 | 10000000000000000
213 | 1000000000000000000
214 | 100000000000000000000
215 | 10101010101010101
216 | 11110000000000000
217 | 11111110000000000
218 | 1111111111111111
219 | 11111111111111111
220 | 1111111111111111111
221 | 11111111111111234
222 | 12121212121212121
223 | 12250000000000000
224 | 12332112232123454
225 | 12340000000000000
226 | 12345000000000000
227 | 12345678900987654
228 | 12345678901234567
229 | 123456789012345678
230 | 12345678909876543
231 | 12345678910111211
232 | 1234567891013333
233 | 12345678911234567
234 | 12345678912345678
235 | 123456789AAAAAAAA
236 | 123EWQ321QWE321QW
237 | 13686000000000000
238 | 18142700000000000
239 | 1850460000000000
240 | 19000000000000000
241 | 19999999999999999
242 | 1F000000000000000
243 | 1G999999999999997
244 | 1UNKNOWN
245 | 20055370000000000
246 | 20885820000000000
247 | 22510030000000000
248 | 24502600000000000
249 | 25049220000000000
250 | 25067290000000000
251 | 26402850000000000
252 | 26494030000000000
253 | 27239000000000000
254 | 28192690000000000
255 | 2ZK78870000000000
256 | 30236500000000000
257 | 30600000000000000
258 | 31844590000000000
259 | 33115100000000000
260 | 36051500000000000
261 | 372293L0000000000
262 | 42519600000000000
263 | 50232000000000000
264 | 51108000000000000
265 | 51727000000000000
266 | 51800000000005043
267 | 59767000000000000
268 | 5TDZT300000000000
269 | 5UNKNOWN
270 | 61234567890POIUYT
271 | 61M85570000000000
272 | 70561000000000000
273 | 7C390410000000000
274 | 80409100000000000
275 | 80756800000000000
276 | 84379070000000000
277 | 85066700000000000
278 | 86133000000000000
279 | 86567270000000000
280 | 88899000000000000
281 | 89821670000000000
282 | 90847910000000000
283 | 90902600000000000
284 | 99
285 | 99109999999999999
286 | 99139999999999999
287 | 9999999999
288 | 99999999999
289 | 999999999999
290 | 9999999999999
291 | 99999999999990909
292 | 99999999999999
293 | 9999999999999900-
294 | 999999999999999
295 | 9999999999999999
296 | 99999999999999999
297 | 999999999999999999
298 | 9999999999999999999
299 | 99999999999999999999
300 | 999999999999999999999
301 | 9999999999999999999999
302 | 999999999999999999999999
303 | 9999999999999999999999999
304 | AA000000000000000
305 | AAAAAAAAAAAAAAAAA
306 | ALL UNKNOWN
307 | B5268000000000000
308 | BB921150000000000
309 | BD688540000000000
310 | BD934900000000000
311 | BIKE0000000000000
312 | BKJ06980000000000
313 | DJ356710000000000
314 | E434TR4G4RTG4RTGR
315 | ES40DC00000000000
316 | FS3221 UNK VIN
317 | FS6DVR00000000000
318 | G00000000000
319 | G0904600000000000
320 | HD00000000000
321 | HD000000000000000
322 | HR554230000000000
323 | JA3AY26A5VV0416 0
324 | JT420000000000000
325 | KAW00000000000000
326 | KSV700A0000000000
327 | KY118720000000000
328 | KY123820000000000
329 | KZ000000000000000
330 | L0000000000000000
331 | LA5PWR00000000000
332 | MBCN6180000000000
333 | MF286000000000000
334 | MY025V00000000000
335 | NA000000000000000
336 | NKNOWN
337 | NL116F00000000000
338 | NONE
339 | NONE0000000000000
340 | NONE9999999999999
341 | NY628680000000000
342 | NZ0QA400000000000
343 | NoVIN999999999999
344 | P0032700000000000
345 | R210III0000000000
346 | SRP16130000000000
347 | SRR55500000000000
348 | T3TQ34TQ34TR34T
349 | T4756000000000000
350 | TH638500000000000
351 | U
352 | U NKNOWN0000000000
353 | UKN
354 | UKNOWN
355 | UKNOWN0000000000
356 | UKNOWN00000000000
357 | UNK
358 | UNK 
359 | UNK H AND R
360 | UNK H-R VEH
361 | UNK HIT AND RUN
362 | UNK NOWN
363 | UNK RENTAL
364 | UNK.
365 | UNK0000000000000
366 | UNK00000000000000
367 | UNK99999999999999
368 | UNKI
369 | UNKKNOWN
370 | UNKMOWN
371 | UNKN
372 | UNKN0000000000000
373 | UNKNIOWN
374 | UNKNIWN
375 | UNKNKOWN
376 | UNKNNOWN0000000000
377 | UNKNON
378 | UNKNOW
379 | UNKNOWEN
380 | UNKNOWN
381 | UNKNOWN 
382 | UNKNOWN         .
383 | UNKNOWN  HIT
384 | UNKNOWN / FLED
385 | UNKNOWN BODY TYPE
386 | UNKNOWN HI
387 | UNKNOWN HIT AND R
388 | UNKNOWN INFORMATI
389 | UNKNOWN M
390 | UNKNOWN VIN
391 | UNKNOWN VIN NUMBE
392 | UNKNOWN VINNUMBE
393 | UNKNOWN!!!!!!!!!!
394 | UNKNOWN##########
395 | UNKNOWN**********
396 | UNKNOWN..........
397 | UNKNOWN/UNKNOWN//
398 | UNKNOWN0000
399 | UNKNOWN0000000000
400 | UNKNOWN00000000000
401 | UNKNOWN999
402 | UNKNOWN9999999999
403 | UNKNOWNHIT
404 | UNKNOWNK
405 | UNKNOWNVIN
406 | UNKNOWN[O
407 | UNKNWN
408 | UNKNWON
409 | UNKOWN
410 | UNKOWN0000000000
411 | UNKOWN00000000000
412 | UNKU
413 | UNKWN
414 | UNKWON0000000000
415 | UNNKNOWN
416 | UNNOWN
417 | UNOWN
418 | UNknown
419 | UTL00000000000000
420 | UU
421 | UU000000000000
422 | UUNKNOWN
423 | UnKnown
424 | Unk
425 | Unknow
426 | Unknown
427 | Unknownn
428 | WL848090000000000
429 | WQ321QWE321QWE321
430 | XG424790000000000
431 | XXX00000000000000
432 | XXXXXX
433 | XXXXXXX
434 | XXXXXXX0000000000
435 | XXXXXXXX
436 | XXXXXXXX65131
437 | XXXXXXXXX
438 | XXXXXXXXXX
439 | XXXXXXXXXX20412
440 | XXXXXXXXXXX
441 | XXXXXXXXXXXX
442 | XXXXXXXXXXXXX
443 | XXXXXXXXXXXXXX
444 | XXXXXXXXXXXXXXX
445 | XXXXXXXXXXXXXXXX
446 | XXXXXXXXXXXXXXXXX
447 | XXXXXXXXXXXXXXXXXXXXXXXXX
448 | _________________
449 | unknown
450 | unknownn
451 | 


--------------------------------------------------------------------------------
/thematic-maps/deer_in_the_city.txt:
--------------------------------------------------------------------------------
  1 | city,Total,K,ABC,PDO,Lat,Lon
  2 | Portage,191,0,5,186,42.201154,-85.580002
  3 | Rochester Hills,150,0,2,148,42.658366,-83.149932
  4 | Midland,137,0,1,136,43.615583,-84.247212
  5 | Battle Creek,116,0,6,110,42.321152,-85.179714
  6 | Farmington Hills,95,0,4,91,42.498994,-83.367717
  7 | Ann Arbor,90,0,4,86,42.280826,-83.743038
  8 | Novi,86,0,8,78,42.480590,-83.475491
  9 | Auburn Hills,82,0,2,80,42.687532,-83.234103
 10 | Lansing,81,0,0,81,42.732535,-84.555535
 11 | Walker,77,0,2,75,43.001413,-85.768091
 12 | Grand Rapids,63,0,1,62,42.963360,-85.668086
 13 | Troy,58,0,5,53,42.606409,-83.149775
 14 | Kalamazoo,57,0,1,56,42.291707,-85.587229
 15 | Kentwood,57,0,5,52,42.869473,-85.644749
 16 | East Lansing,55,0,3,52,42.736979,-84.483865
 17 | Wyoming,46,0,0,46,42.913360,-85.705309
 18 | Southfield,44,0,6,38,42.473369,-83.221873
 19 | Sterling Heights,44,0,3,41,42.580312,-83.030203
 20 | Norton Shores,41,0,2,39,43.168904,-86.263946
 21 | Burton,40,0,1,39,42.999472,-83.616342
 22 | Livonia,40,0,0,40
 23 | Escanaba,32,0,1,31
 24 | Charlotte,32,0,0,32
 25 | Lapeer,31,0,0,31
 26 | Norway,29,0,0,29
 27 | Marquette,28,0,0,28
 28 | Rockford,28,0,0,28
 29 | Holland,27,0,1,26
 30 | Gladstone,27,0,0,27
 31 | Muskegon,25,0,1,24
 32 | Tecumseh,25,0,0,25
 33 | Alpena,25,0,1,24
 34 | Grandville,25,0,0,25
 35 | Fenton,24,0,0,24
 36 | Coldwater,24,0,1,23
 37 | Ithaca,24,0,0,24
 38 | Alma,23,0,0,23
 39 | Hillsdale,22,0,0,22
 40 | Marshall,22,0,0,22
 41 | Traverse City,21,0,1,20
 42 | Petoskey,21,0,0,21
 43 | Romulus,21,0,0,21
 44 | Wixom,21,0,0,21
 45 | Iron River,21,0,0,21
 46 | Holland,20,0,2,18
 47 | Negaunee,19,0,1,18
 48 | Iron Mountain,19,0,0,19
 49 | Sault Ste. Marie,19,0,0,19
 50 | Ludington,18,0,1,17
 51 | Swartz Creek,18,0,1,17
 52 | Chelsea,18,0,1,17
 53 | Jonesville,18,0,0,18
 54 | Adrian,17,0,0,17
 55 | Manistee,17,0,0,17
 56 | Pontiac,16,0,1,15
 57 | Warren,16,0,0,16
 58 | East Jordan,16,0,0,16
 59 | Perry,16,0,0,16
 60 | Paw Paw,15,0,0,15
 61 | Jackson,15,0,1,14
 62 | Litchfield,15,0,0,15
 63 | Lowell,14,0,1,13
 64 | Bloomfield Hills,14,0,0,14
 65 | Boyne City,14,0,0,14
 66 | Westland,13,0,1,12
 67 | Marysville,13,0,0,13
 68 | Hastings,13,0,0,13
 69 | Mt. Pleasant,13,0,0,13
 70 | Mason,13,0,1,12
 71 | Bad Axe,13,0,0,13
 72 | Howell,12,0,0,12
 73 | Reed City,12,0,0,12
 74 | Goodrich,12,0,0,12
 75 | Rochester,12,0,0,12
 76 | Rogers City,12,0,0,12
 77 | St. Louis,12,0,0,12
 78 | Big Rapids,12,0,0,12
 79 | Corunna,12,0,0,12
 80 | Flushing,12,0,0,12
 81 | Richmond,11,0,0,11
 82 | Monroe,11,0,0,11
 83 | Croswell,11,0,0,11
 84 | Hudson,11,0,0,11
 85 | Gibralter,11,0,0,11
 86 | Saline,11,0,0,11
 87 | Newaygo,10,0,0,10
 88 | Oxford,10,0,0,10
 89 | Springfield,10,0,0,10
 90 | Flint,10,0,1,9
 91 | Hart,10,0,0,10
 92 | Holly,10,0,0,10
 93 | Greenville,10,0,0,10
 94 | Caro,10,0,0,10
 95 | Flat Rock,10,0,0,10
 96 | Franklin,10,0,0,10
 97 | Grand Blanc,10,0,0,10
 98 | Portland,9,0,0,9
 99 | Clare,9,0,0,9
100 | Cheboygan,9,0,0,9
101 | Potterville,9,0,0,9
102 | Roscommon,9,0,0,9
103 | Madison Heights,9,0,1,8
104 | Lakeview,9,0,0,9
105 | East Lansing,9,0,0,9,42.736979,-84.483865
106 | Ferrysburg,9,0,0,9
107 | Rosebush,9,0,2,7
108 | Sturgis,9,0,0,9
109 | Brown City,9,0,1,8
110 | Niles,9,0,1,8
111 | Wood Haven,9,0,1,8
112 | Owosso,8,0,0,8
113 | Nashville,8,0,0,8
114 | Grand Ledge,8,0,0,8
115 | Montague,8,0,0,8
116 | Grand Haven,8,0,0,8
117 | Whitehall,8,0,0,8
118 | Coopersville,8,0,0,8
119 | Lake Isabella,8,0,0,8
120 | Saginaw,8,0,0,8
121 | Taylor,8,0,1,7
122 | Ishpeming,8,0,0,8
123 | Dundee,8,0,0,8
124 | Ionia,8,0,0,8
125 | Plainwell,8,0,0,8
126 | Colon,8,0,0,8
127 | Tawas City,8,0,0,8
128 | Charlevoix,8,0,0,8
129 | Lincoln,7,0,0,7
130 | Brighton,7,0,0,7
131 | Mattawan,7,0,0,7
132 | Perrinton,7,0,0,7
133 | Orchard Lake,7,0,0,7
134 | Hersey,7,0,0,7
135 | Alanson,7,0,0,7
136 | Wayland,7,0,0,7
137 | Peck,7,0,0,7
138 | Gaylord,7,0,0,7
139 | St. Joseph,7,0,0,7
140 | Rockwood,7,0,0,7
141 | Homer,7,0,0,7
142 | Hudsonville,7,0,0,7
143 | Beverly Hills,7,0,0,7
144 | Imlay City,7,0,0,7
145 | McBride,7,0,0,7
146 | Baraga,7,0,1,6
147 | Dearborn,7,0,0,7
148 | Albion,7,0,0,7
149 | Concord,7,0,0,7
150 | Detroit,7,0,1,6
151 | Gladwin,6,0,0,6
152 | Millington,6,0,0,6
153 | Royal Oak,6,0,1,5
154 | East Tawas,6,0,1,5
155 | Bessemer,6,0,2,4
156 | Standish,6,0,0,6
157 | South Haven,6,0,0,6
158 | Frankfort,6,0,0,6
159 | Harbor Beach,6,0,0,6
160 | Bay City,6,0,0,6
161 | Linden,6,0,0,6
162 | Romeo,6,0,0,6
163 | Bridgman,6,0,0,6
164 | Maple Rapids,6,0,0,6
165 | Applegate,6,0,0,6
166 | St. Clair,6,0,0,6
167 | Harbor Springs,6,0,0,6
168 | Marine City,5,0,0,5
169 | Stanton,5,0,0,5
170 | Trenton,5,0,0,5
171 | Ubly,5,0,0,5
172 | Dewitt,5,0,0,5
173 | Harrison,5,0,0,5
174 | Stockbridge,5,0,0,5
175 | Edmore,5,0,1,4
176 | Sanford,5,0,0,5
177 | Benzonia,5,0,0,5
178 | Cedar Springs,5,0,0,5
179 | Coleman,5,0,0,5
180 | Port Huron,5,0,0,5
181 | Mecosta,5,0,0,5
182 | Empire,5,0,0,5
183 | Lexington,5,0,0,5
184 | Central Lake,5,0,0,5
185 | Vicksburg,5,0,0,5
186 | Fremont,5,0,0,5
187 | Ortonville,5,0,0,5
188 | Wakefield,5,0,0,5
189 | Evart,5,0,0,5
190 | Fowlerville,5,0,0,5
191 | Cadillac,5,0,0,5
192 | Mayville,5,0,0,5
193 | Buchanan,5,0,0,5
194 | Lawton,5,0,0,5
195 | West Branch,5,0,0,5
196 | Beulah,5,0,0,5
197 | Port Austin,4,0,0,4
198 | New Baltimore,4,0,0,4
199 | Ypsilanti,4,0,0,4
200 | Sandusky,4,0,0,4
201 | Centreville,4,0,0,4
202 | Williamston,4,0,1,3
203 | Three Rivers,4,0,0,4
204 | Suttons Bay,4,0,0,4
205 | Scottville,4,0,0,4
206 | Barryton,4,0,0,4
207 | Union City,4,0,0,4
208 | Sterling,4,0,0,4
209 | Crystal Falls,4,0,0,4
210 | McBain,4,0,0,4
211 | Munising,4,0,0,4
212 | Gobles,4,0,1,3
213 | Casnovia,4,0,0,4
214 | Bangor,4,0,0,4
215 | Hillman,4,0,0,4
216 | Ravenna,4,0,0,4
217 | Dexter,4,0,0,4
218 | New Haven,4,0,0,4
219 | Stevensville,4,0,0,4
220 | Port Sanilac,4,0,0,4
221 | Zeeland,4,0,0,4
222 | Kingsford,4,0,0,4
223 | Middleville,4,0,0,4
224 | Farmington,4,0,0,4
225 | Muskegon Heights,4,0,0,4
226 | Elk Rapids,4,0,0,4
227 | New Buffalo,4,0,0,4
228 | Eaton Rapids,4,0,0,4
229 | Galesburg,4,0,0,4
230 | Cass City,3,0,0,3
231 | Caledonia,3,0,0,3
232 | Bellaire,3,0,0,3
233 | Saugatuck,3,0,0,3
234 | South Lyon,3,0,0,3
235 | Springport,3,0,0,3
236 | Walled Lake,3,0,0,3
237 | Au Gres,3,0,0,3
238 | Kingston,3,0,1,2
239 | Kalkaska,3,0,0,3
240 | Carsonville,3,0,0,3
241 | Zilwaukee,3,0,0,3
242 | Almont,3,0,0,3
243 | Menominee,3,0,0,3
244 | North Branch,3,0,0,3
245 | Midland,3,0,0,3
246 | Baldwin,3,0,1,2
247 | Clarkston,3,0,0,3
248 | Birmingham,3,0,1,2
249 | Unknown Community,3,0,0,3
250 | Spring Lake,3,0,0,3
251 | Allegan,3,0,0,3
252 | Mulliken,3,0,0,3
253 | Blissfield,3,0,0,3
254 | Morrice,3,0,0,3
255 | Douglas,3,0,0,3
256 | Milford,3,0,0,3
257 | Berrien Springs,3,0,0,3
258 | Ontonagon,3,0,0,3
259 | Mesick,3,0,0,3
260 | Kent City,3,0,0,3
261 | Kingsley,3,0,0,3
262 | Allen Park,3,0,0,3
263 | South Rockwood,3,0,0,3
264 | Lathrup Village,3,0,0,3
265 | Northport,3,0,0,3
266 | Emmett,3,0,0,3
267 | Southgate,3,0,0,3
268 | Quincy,3,0,1,2
269 | Mt. Clemens,2,0,0,2
270 | Clayton,2,0,0,2
271 | Onsted,2,0,0,2
272 | Barton Hills,2,0,0,2
273 | Plymouth,2,0,0,2
274 | Vassar,2,0,0,2
275 | Deckerville,2,0,0,2
276 | Mendon,2,0,0,2
277 | Laingsburg,2,0,0,2
278 | St. Charles,2,0,0,2
279 | Frankenmuth,2,0,0,2
280 | Birch Run,2,0,0,2
281 | Posen,2,0,0,2
282 | Onaway,2,0,0,2
283 | New Lothrop,2,0,0,2
284 | Vernon,2,0,0,2
285 | LeRoy,2,0,0,2
286 | Rose City,2,0,0,2
287 | Pentwater,2,0,0,2
288 | Sylvan Lake,2,0,0,2
289 | Hartford,2,0,0,2
290 | Utica,2,0,0,2
291 | Manchester,2,0,0,2
292 | Casnovia,2,0,0,2
293 | Pierson,2,0,0,2
294 | Howard City,2,0,0,2
295 | Carson City,2,0,0,2
296 | Carleton,2,0,0,2
297 | Lake City,2,0,0,2
298 | Dearborn Heights,2,0,0,2
299 | Carney,2,0,0,2
300 | Morley,2,0,0,2
301 | Freesoil,2,0,0,2
302 | Fountain,2,0,0,2
303 | Custer,2,0,0,2
304 | Kaleva,2,0,0,2
305 | New Era,2,0,0,2
306 | Harrisville,2,0,0,2
307 | Port Hope,2,0,0,2
308 | Davison,2,0,0,2
309 | Niles,2,0,0,2
310 | Cassopolis,2,0,0,2
311 | Breckenridge,2,0,0,2
312 | Lyons,2,0,0,2
313 | Bronson,2,0,0,2
314 | Omer,2,0,0,2
315 | Twining,2,0,0,2
316 | Houghton,2,0,0,2
317 | L'Anse,2,0,0,2
318 | Shepherd,2,0,0,2
319 | Shoreham,2,0,0,2
320 | Grand Beach,2,0,0,2
321 | Gaastra,2,0,0,2
322 | Auburn,2,0,0,2
323 | Benton Harbor,2,0,0,2
324 | Leslie,2,0,0,2
325 | Belding,2,0,0,2
326 | Elberta,2,0,0,2
327 | Pewamo,2,0,0,2
328 | Otisville,2,0,0,2
329 | Richland,2,0,0,2
330 | Honor,2,0,0,2
331 | Vermontville,2,0,0,2
332 | Olivet,2,0,0,2
333 | Otsego,2,0,0,2
334 | Farwell,2,0,0,2
335 | Lansing,2,0,0,2
336 | Grayling,2,0,0,2
337 | Sparta,2,0,0,2
338 | St. Johns,2,0,0,2
339 | Fennville,2,0,0,2
340 | Eagle,1,0,0,1
341 | Chesaning,1,0,0,1
342 | Lake Linden,1,0,0,1
343 | Bellevue,1,0,0,1
344 | North Muskegon,1,0,0,1
345 | Lakewood Club,1,0,0,1
346 | Vanderbilt,1,0,0,1
347 | Watervliet,1,0,0,1
348 | Kinde,1,0,0,1
349 | Gagetown,1,0,0,1
350 | Galien,1,0,0,1
351 | Elsie,1,0,0,1
352 | Hesperia,1,0,0,1
353 | Sheridan,1,0,0,1
354 | Garden,1,0,0,1
355 | Fowler,1,0,0,1
356 | Webberville,1,0,0,1
357 | Luna Pier,1,0,0,1
358 | Estral Beach,1,0,0,1
359 | Thompsonville,1,0,0,1
360 | Millersburg,1,0,0,1
361 | Merrill,1,0,0,1
362 | Capac,1,0,0,1
363 | Hancock,1,0,1,0
364 | Forestville,1,0,0,1
365 | Richmond,1,0,0,1
366 | Mackinaw City,1,0,0,1
367 | Boyne Falls,1,0,0,1
368 | Constantine,1,0,0,1
369 | Pleasant Ridge,1,0,0,1
370 | Ironwood,1,0,0,1
371 | Edwardsburg,1,0,0,1
372 | Dowagiac,1,0,0,1
373 | Northville,1,0,0,1
374 | Leonard,1,0,0,1
375 | Tekonsha,1,0,0,1
376 | Oakley,1,0,0,1
377 | Marlette,1,0,0,1
378 | Athens,1,0,0,1
379 | Manistique,1,0,0,1
380 | North Adams,1,0,0,1
381 | Reading,1,0,0,1
382 | Bancroft,1,0,0,1
383 | Bingham Farms,1,0,0,1
384 | Marion,1,0,0,1
385 | Lennon,1,0,0,1
386 | Wolverine,1,0,0,1
387 | White Cloud,1,0,0,1
388 | Metamora,1,0,0,1
389 | Parchment,1,0,0,1
390 | Caspian,1,0,0,1
391 | Clare,1,0,0,1
392 | Brooklyn,1,0,0,1
393 | Parma,1,0,0,1
394 | Copemish,1,0,0,1
395 | Bear Lake,1,0,0,1
396 | St. Clair Shores,1,0,0,1
397 | Roseville,1,0,0,1
398 | Inkster,1,0,0,1
399 | East Grand Rapids,1,0,0,1
400 | Sand Lake,1,0,0,1
401 | South Haven,1,0,0,1
402 | Morenci,1,0,0,1
403 | Clinton,1,0,0,1
404 | Wayne,1,0,0,1
405 | Buckley,1,0,0,1
406 | Traverse City,1,0,0,1
407 | Clifford,1,0,0,1
408 | Harrietta,1,0,0,1
409 | Manton,1,0,0,1
410 | Milan,1,0,0,1
411 | Ovid,1,0,0,1
412 | Pinconning,1,0,0,1
413 | Saranac,1,0,0,1
414 | Powers,1,0,0,1
415 | Stanwood,1,0,0,1
416 | Stephenson,1,0,0,1
417 | Essexville,1,0,0,1
418 | Daggett,1,0,0,1
419 | 


--------------------------------------------------------------------------------
/pytorch/workshop_neural_net.md:
--------------------------------------------------------------------------------
  1 | \titlepage
  2 | ## Deep Neural Networks (DNNs)
  3 | 
  4 | -   A DNN is a mathematical function inspired by neural networks in the
  5 |     brain.
  6 | 
  7 | -   Input layer (features), hidden layers, output layer (targets).
  8 | 
  9 | -   Your data determines number of features and targets.
 10 | 
 11 | -   You choose number of hidden layers and "neurons" (activation units)
 12 |     in each hidden layer.
 13 | 
 14 | \centering
 15 | ![](DNN.png){width="50%"} 
 16 | 
 17 | ## Deep Neural Networks (DNNs), cont'd
 18 | 
 19 | -   Hidden layers have variables (weights, biases) that are trained.
 20 | 
 21 | -   Mathematical structure: Composite of nonlinear activation functions
 22 |     acting on matrix/vector operations, e.g.
 23 |     $$f(x) = A_2{\color{red}g(A_1{\color{blue}g(A_0x+b_0)}+b_1)}+b_2$$
 24 | 
 25 | \centering
 26 | ![](DNN_activations.png){width="\textwidth"} 
 27 | 
 28 | ## Training DNNs
 29 | 
 30 | -   Training a DNN means optimizing the weights and biases to "fit"
 31 |     given data
 32 | 
 33 |     -   i.e. minimize error between DNN prediction and the given data
 34 | 
 35 | -   Optimization: Think of mountains and valleys. Your location is like
 36 |     the value of the weights/biases. Your elevation is like the value of
 37 |     the error. As you "walk down the mountain", you are changing the
 38 |     values of the weights/biases to decrease the value of the error.
 39 | 
 40 | \centering
 41 | ![](mtn.png){width="50%"}
 42 | 
 43 | ## Training DNNs, cont'd
 44 | 
 45 | -   Usually a variant of **stochastic gradient descent**:
 46 | 
 47 |     -   **Gradient**: Points toward steepest slope
 48 | 
 49 |     -   **Gradient descent** method: Take steps down steepest slope to
 50 |         get to minimum
 51 | 
 52 |     -   **Stochastic gradient descent**: Calculate the error based on a
 53 |         small number of data (a **batch**) instead of the entire data
 54 |         set
 55 | 
 56 | -   You choose: step size (learning rate), batch size
 57 | 
 58 | \centering
 59 | ![](mtn.png){width="50%"}
 60 | 
 61 | ## playground.tensorflow.org
 62 | 
 63 | \centering
 64 | ![](playground.png){width="\textwidth"}
 65 | 
 66 | Note: playground.tensorflow.org is an educational tool. It does not
 67 | actually use the TensorFlow library, nor can you use it to train with
 68 | your data.
 69 | 
 70 | ## Underfitting (high bias)
 71 | 
 72 | Symptoms:
 73 | 
 74 | -   High training and testing error
 75 | 
 76 | Possible treatments:
 77 | 
 78 | -   Make the model larger (more layers, more neurons)
 79 | 
 80 | -   Increase the number of features, artificially if necessary (e.g.
 81 |     $x_1x_2$, $\sin(x)$, etc.)
 82 | 
 83 | -   More training
 84 | 
 85 | \centering
 86 | ![](underfitting.png){width="50%"}
 87 | 
 88 | \vspace{.5cm}
 89 | ## Overfitting (high variance)
 90 | 
 91 | Symptoms:
 92 | 
 93 | -   Low training error, high testing error
 94 | 
 95 | -   (Made worse by noisy data)
 96 | 
 97 | Possible treatments:
 98 | 
 99 | -   More data
100 | 
101 | -   Regularization (L1, L2, dropout)
102 | 
103 | -   Less training (early stopping)
104 | 
105 | -   Simplify model (use w/ caution)
106 | 
107 | \centering
108 | ![](overfitting2.png){width="50%"}
109 | 
110 | \vspace{.5cm}
111 | ## Regularization
112 | 
113 | -   Regularization smooths the model; reduces complexity in the output
114 |     ([Wikipedia](https://en.wikipedia.org/wiki/Regularization_(mathematics))).
115 | 
116 | -   In neural networks, this is done by keeping the weights at a
117 |     similar, low magnitude.
118 | 
119 | -   L1 regularization adds the L1 norm of the weights to the loss.
120 | 
121 | -   L2 regularization adds the L2 norm of the weights (more sensitive to
122 |     outliers).
123 | 
124 | -   Dropout randomly and temporarily drops weights to zero during
125 |     training.
126 | 
127 | \centering
128 | ![](L2reg.png){width="50%"}
129 | 
130 | ## playground.tensorflow.org
131 | 
132 | \centering
133 | ![](playground.png){width="\textwidth"}
134 | 
135 | Note: playground.tensorflow.org is an educational tool. It does not
136 | actually use the TensorFlow library, nor can you use it to train with
137 | your data.
138 | 
139 | ## Nonlinear regression
140 | 
141 | -   Begin with example of nonlinear regression.
142 | 
143 | -   Use a standard DNN to map continuous inputs to continuous outputs.
144 | 
145 | -   Data in example has two inputs, one output (slices parallel to
146 |     x-axis are parabolic, slices parallel to y-axis are sinusoidal).
147 | 
148 | \centering
149 | ![](DNNRegressor_data.png){width="50%"}
150 | 
151 | ## Load data
152 | 
153 | ## Build the model
154 | 
155 | Define the structure of the DNN. Here, we define two hidden layers, with
156 | 5 neurons in each layer.
157 | 
158 | We also specify the activation function here. The `relu` function is
159 | commonly used, but you can use others (examples:
160 | [Wikipedia](https://en.wikipedia.org/wiki/Activation_function)):
161 | 
162 | \vspace{.5cm}
163 | \hspace*{10pt}
164 | `sigmoid, softplus, tanh`, etc.
165 | 
166 | \vspace{.5cm}
167 | Note that no activation is used on the final layer.
168 | 
169 | \vspace{.5cm}
170 | Experiment with the hidden units and activation function.
171 | 
172 | ## L1, L2 regularization
173 | 
174 | ## Dropout
175 | 
176 | ## Training
177 | 
178 | Stochastic gradient descent methods use shuffled mini-batches instead of
179 | the entire data set for each training iteration. We specify batch size,
180 | and how many epochs to train the code.
181 | 
182 | \vspace{.5cm}
183 | An epoch is the number of training iterations required to go through the
184 | entire training set once. For example, 1,000 datapoints and a batch size
185 | of 10, one epoch would take 100 training iteration.
186 | 
187 | \vspace{.5cm}
188 | We can also specify validation data to see how the validation loss
189 | changes during training.
190 | 
191 | Experiment with batch size and number of epochs.
192 | 
193 | ## Results
194 | 
195 | With good settings in the code (not the current settings), we can get
196 | the following fit:
197 | 
198 | \centering
199 | ![](goodRegression.png){width="80%"}
200 | 
201 | ## Exercise 1
202 | 
203 | -   Run the code.
204 | 
205 | -   Identify the problem (underfitting or overfitting).
206 | 
207 | -   Try possible solutions to get a better fit.
208 | 
209 | ## Classification
210 | 
211 | -   Consider the problem of classification.
212 | 
213 | -   Maps feature values to a category.
214 | 
215 | -   Use the example of irises
216 | 
217 |     -   Four features: sepal length, sepal width, petal length, petal
218 |         width
219 | 
220 |     -   Three classes: Iris setosa, Iris virginica, Iris versicolor
221 | 
222 | \centering
223 | ![Iris versicolor, by Danielle Langlois (CC BY-SA 3.0),
224 | [commons.wikimedia.org/w/index.php?curid=248095](commons.wikimedia.org/w/index.php?curid=248095)](iris_versicolor.jpg){width="40%"}
225 | 
226 | ## Import data
227 | 
228 | Data label format: Usually given as 0, 1, or 2; we need it to be [1,0,0], [0,1,0], or [0,0,1].
229 | 
230 | ## Build the model
231 | 
232 | Define the structure of the DNN. Here, we define three hidden layers,
233 | with 1000, 500, and 70 neurons in each respective layer.
234 | 
235 | Since this is classification, apply the
236 | [softmax](https://en.wikipedia.org/wiki/Softmax_function) function to
237 | the last layer. This transforms the output to be a vector of
238 | probabilities that sum to one: $$\begin{aligned}
239 |     p_i &= \frac{\exp(f_i)}{\sum\limits_j \exp(f_j)}\end{aligned}$$
240 | where $p_i$ is probability of category $i$ being true, $f_i$ is $i$-th
241 | component of the final layer's output.
242 | 
243 | ## Loss
244 | 
245 | We again define the loss function and the optimizer. For classification,
246 | we use the [cross entropy](https://en.wikipedia.org/wiki/Cross_entropy)
247 | loss function. We are also interested in the accuracy metric (%
248 | correctly classified), in addition to the loss.
249 | 
250 | $$\begin{aligned}
251 |     \mathrm{cross\_entropy} = \frac{1}{n_\mathrm{samples}}\sum\limits_j^{n_\mathrm{samples}}\sum\limits_i^{n_\mathrm{classes}}\hat{p}_i^j\log(p_i^j)\end{aligned}$$
252 | where $\hat{p}_i^j$ is the data and $p_i^j$ is the prediction for class
253 | $i$, sample $j$.
254 | 
255 | ## Training
256 | 
257 | Training is done as before.
258 | 
259 | ## Exercise 2
260 | 
261 | -   Run the code.
262 | 
263 | -   Identify the problem (underfitting or overfitting).
264 | 
265 | -   Try possible solutions to get a better result.
266 | 
267 | ## Convolutional Neural Network (CNN)
268 | 
269 | -   Image recognition is often done with CNNs.
270 | 
271 | -   CNNs perform classification by adding new types of layers, primarily
272 |     "convolutions" and "pooling".
273 | 
274 | -   The "convolution": scanning a filter across the image.
275 | 
276 | -   The "pooling": take the most significant features from a group of
277 |     pixels.
278 | 
279 | -   Some nice explanations of CNNs by [Adam
280 |     Geitgey](https://medium.com/@ageitgey/machine-learning-is-fun-part-3-deep-learning-and-convolutional-neural-networks-f40359318721)
281 |     and
282 |     [ujjwalkarn](https://ujjwalkarn.me/2016/08/11/intuitive-explanation-convnets/).
283 | 
284 | -   Our example will use the [MNIST](http://yann.lecun.com/exdb/mnist/)
285 |     database of handwritten digits.
286 | 
287 | -   Based on [this
288 |     example](https://github.com/keras-team/keras/blob/master/examples/mnist_cnn.py).
289 | 
290 |     \centering
291 |     ![](mnist_0-9.png){width="30%"}
292 | 
293 | ## Initialize model, Normalize input
294 | 
295 | We shift and normalize the inputs for better fitting.
296 | 
297 | We also define the input shape. The images are 28 by 28 pixels, with a
298 | grayscale value. This means each image is defined by a 3D tensor,
299 | $28\times28\times1$ (a color image of the same size would be
300 | $28\times28\times3$).
301 | 
302 | ## Convolutional layer
303 | 
304 | The first convolutional layer is applied. This involves sweeping a
305 | filter across the image. (Gives \"translational invariance.\")
306 | 
307 | <img src="conv2D.gif" style="display:block; margin: 0 auto; width:60%">
308 | 
309 | We use 4 filters with a size of $5\times5$ pixels, with ReLU activation.
310 | 
311 | ## Max pooling
312 | 
313 | Max pooling involves looking at clusters of the output (in this example,
314 | $2\times2$ clusters), and sets the maximum filter value as the value for
315 | the cluster.
316 | 
317 | <img src="maxPool.gif" style="display:block; margin: 0 auto; width:40%">
318 | 
319 | I.e. a "match" anywhere in the cluster $\implies$ a "match" for the
320 | cluster.
321 | 
322 | \vspace{0.5cm}
323 | Since we are also using stride of 2, the clusters don't overlap.
324 | 
325 | Pooling reduces the size of the neural net, speeding up computations.
326 | 
327 | ## 2nd convolution and pooling
328 | 
329 | A second convolutional layer, followed by max pooling, is used.
330 | 
331 | ## Fully-connected layer
332 | 
333 | The 3D tensor is converted back to a 1D tensor to act as input for a
334 | dense or fully-connected layer, the same type used with the previous
335 | regression and classification examples.
336 | 
337 | ## Dropout, Softmax
338 | 
339 | We add a dropout layer here. In this example, dropout happens at a rate
340 | of 40% (i.e. 40% of weights are temporarily set to zero at each training
341 | iteration).
342 | 
343 | As in the Iris classification problem, we finish with a dense layer and
344 | softmax activation function to return probabilities for each category.
345 | 
346 | ## Compile, Train
347 | 
348 | We compile and train as in the previous classification example:
349 | 
350 | ## Exercise 3
351 | 
352 | -   Run the file.
353 | 
354 | -   Modify the CNN and training to see how high of a validation accuracy
355 |     you can get.
356 | 


--------------------------------------------------------------------------------
/pdf-data-extraction/pdfminer_workshop.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# PDF Text Mining using PDFMiner"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "## Installation\n",
 15 |     "\n",
 16 |     "`pip install pdfminer.six`"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "markdown",
 21 |    "metadata": {},
 22 |    "source": [
 23 |     "## How to Use\n",
 24 |     "Below is an edited code example from [Tim Arnold's blog on *Manipulating PDFs with Python*]( https://www.binpress.com/tutorial/manipulating-pdfs-with-python/167). It has been modified to be compatible with Python 3.X. Most of it is boilerplate stuff that does not need to change. The only change that needs to be done is the filename and the  page(s) of interest. "
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "code",
 29 |    "execution_count": 1,
 30 |    "metadata": {
 31 |     "collapsed": true
 32 |    },
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "from io import StringIO\n",
 36 |     "from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter\n",
 37 |     "from pdfminer.converter import TextConverter\n",
 38 |     "from pdfminer.layout import LAParams\n",
 39 |     "from pdfminer.pdfpage import PDFPage  "
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "markdown",
 44 |    "metadata": {},
 45 |    "source": [
 46 |     "Identify file and page of interest"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": 2,
 52 |    "metadata": {
 53 |     "collapsed": true
 54 |    },
 55 |    "outputs": [],
 56 |    "source": [
 57 |     "filename = 'MDOT_fastfacts02-2011_345554_7.pdf'\n",
 58 |     "pagenums = [3] # empty list does all pages"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "markdown",
 63 |    "metadata": {},
 64 |    "source": [
 65 |     "Create instances of classes necessary to read pdf"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "code",
 70 |    "execution_count": 3,
 71 |    "metadata": {
 72 |     "collapsed": true
 73 |    },
 74 |    "outputs": [],
 75 |    "source": [
 76 |     "output = StringIO()\n",
 77 |     "manager = PDFResourceManager()\n",
 78 |     "converter = TextConverter(manager, output, laparams=LAParams())\n",
 79 |     "interpreter = PDFPageInterpreter(manager, converter)"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "markdown",
 84 |    "metadata": {},
 85 |    "source": [
 86 |     "Open the pdf and read & process page(s) of interest"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "code",
 91 |    "execution_count": 4,
 92 |    "metadata": {},
 93 |    "outputs": [],
 94 |    "source": [
 95 |     "with open(filename, 'rb') as fin:\n",
 96 |     "    for page in PDFPage.get_pages(fin, pagenums):\n",
 97 |     "        interpreter.process_page(page)"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "markdown",
102 |    "metadata": {},
103 |    "source": [
104 |     "Get output string"
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "code",
109 |    "execution_count": 5,
110 |    "metadata": {
111 |     "collapsed": true
112 |    },
113 |    "outputs": [],
114 |    "source": [
115 |     "text = output.getvalue()\n",
116 |     "converter.close()\n",
117 |     "output.close()"
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "markdown",
122 |    "metadata": {},
123 |    "source": [
124 |     "Let's look at the output text string"
125 |    ]
126 |   },
127 |   {
128 |    "cell_type": "code",
129 |    "execution_count": 6,
130 |    "metadata": {},
131 |    "outputs": [
132 |     {
133 |      "data": {
134 |       "text/plain": [
135 |        "'Fast Facts\\n\\n201 7\\n\\nCARPOOL LOTS\\n\\n2015 MICHIGAN \\nSTATE REVENUE PACKAGE\\n\\nn There are 261 carpool parking lots located across \\n\\nthe state, 23 of which are public-private partnerships. \\nIncluded in the public-private partnerships are 17 \\nlocations that MDOT has partnered with Meijer Corp. \\nto provide carpool parking spaces in Meijer parking lots \\nlocated near the highway.\\n\\nn MDOT continues its efforts to provide bike racks at \\n\\ncarpool lots, and to attract transit service to lots  \\nwhere appropriate.\\n\\nCOST OF ROAD CONSTRUCTION \\n\\nRoadway construction costs are typically based on standard  \\ndesign characteristics, materials, and the type of work performed. \\nGeneral estimates are provided for the average cost per lane mile \\nof major work by roadway type, and material costs. \\n\\nAverage Cost Per Lane Mile by  \\nMajor Work Type for Various Networks  \\n(2016 figures; in millions) \\n\\nWork Type \\n\\nReconstruction Rehabilitation Average R&R\\n\\n \\n \\n\\nCombined \\nStatewide\\nFreeway\\nNon-Freeway\\nStatewide \\nUrban\\nStatewide \\nRural\\n\\n$2.0 \\n\\n$2.0 \\n$1.9 \\n\\n$2.1 \\n\\n$1.2 \\n\\n$0.6 \\n\\n$0.8 \\n$0.5 \\n\\n$0.7 \\n\\n$0.5 \\n\\n$1.0 \\n\\n$1.3 \\n$0.8 \\n\\n$1.2 \\n\\n$0.6 \\n\\nCost\\n\\n $64.18\\n $48.04\\n $1.31\\n $1.06\\n\\nMaterial Cost for Construction  \\n(2016 Year-to-Date)\\n\\nMaterial\\nHot Mix Asphalt (HMA) per Ton\\nConcrete per Square Yard\\nStructural Steel per Pound\\nReinforcement Steel per Pound\\n\\nState Transportation Funding Package\\nOn Nov. 10, Gov. Snyder signed a package of \\ntransportation bills approved by the Legislature. In the \\nshort term, the legislation will:\\n\\n•  Provide $450 million in additional fuel tax \\n\\nrevenues, beginning in January 2017. The tax \\non gasoline and diesel fuel will rise to 26.3 cents \\nat that time, as the legislation also provides for \\ndiesel parity.\\n\\n•  Provide $190 million from a 20 percent increase  \\n\\nin vehicle registration fees, also beginning in \\nJanuary 2017.\\n\\nThis $600 million in new revenue will be distributed to \\nMDOT, county road commissions, cities and villages, \\nand the Comprehensive Transportation Fund through \\nthe existing Act 51 formula, providing a roughly 30 \\npercent increase by 2018.\\nThe new revenue is expected to generate an average \\nof more than 4,000 jobs per year in the first two years. \\nIt will also help address the need to repair and maintain \\nMichigan’s existing transportation systems.\\nBeginning in 2019, the Legislature intends to appropriate \\nincome tax revenue to roads agencies, according \\nto the Act 51 formula, excluding the Comprehensive \\nTransportation Fund, in these amounts:\\n \\n \\n \\nBeginning in 2016, the legislation adds transparency \\nand accountability:\\n\\n•  2019…………………..…..$150 million\\n•  2020……………………....$325 million\\n•  2021 and thereafter……..$600 million\\n\\n•  Administrative Expenses: MDOT \\n\\nadministrative expenses, previously capped at \\n10 percent, are now limited to 8 percent of its \\nbudget.\\n\\n•  Pavement Warranties: Road agencies are \\nrequired to buy pavement warranties, where \\nappropriate, for projects costing more than  \\n$2 million. \\n\\n•  Competitive Bidding: To reduce project costs, \\nall agencies are required to competitively bid out \\nprojects costing more than $100,000. \\n\\n•  Longer-lived Pavements: MDOT will be \\n\\nrequired to prepare a report on the potential for \\nconstructing longer-lived pavements and report \\nto the Legislature by June 2016.\\n\\n4   2017 Fast Facts  \\n\\n (Updated 1/2017)\\n\\n\\x0c'"
136 |       ]
137 |      },
138 |      "execution_count": 6,
139 |      "metadata": {},
140 |      "output_type": "execute_result"
141 |     }
142 |    ],
143 |    "source": [
144 |     "text"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "markdown",
149 |    "metadata": {},
150 |    "source": [
151 |     "Pretty Print Text"
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "code",
156 |    "execution_count": 7,
157 |    "metadata": {},
158 |    "outputs": [
159 |     {
160 |      "name": "stdout",
161 |      "output_type": "stream",
162 |      "text": [
163 |       "('Fast Facts\\n'\n",
164 |       " '\\n'\n",
165 |       " '201 7\\n'\n",
166 |       " '\\n'\n",
167 |       " 'CARPOOL LOTS\\n'\n",
168 |       " '\\n'\n",
169 |       " '2015 MICHIGAN \\n'\n",
170 |       " 'STATE REVENUE PACKAGE\\n'\n",
171 |       " '\\n'\n",
172 |       " 'n There are 261 carpool parking lots located across \\n'\n",
173 |       " '\\n'\n",
174 |       " 'the state, 23 of which are public-private partnerships. \\n'\n",
175 |       " 'Included in the public-private partnerships are 17 \\n'\n",
176 |       " 'locations that MDOT has partnered with Meijer Corp. \\n'\n",
177 |       " 'to provide carpool parking spaces in Meijer parking lots \\n'\n",
178 |       " 'located near the highway.\\n'\n",
179 |       " '\\n'\n",
180 |       " 'n MDOT continues its efforts to provide bike racks at \\n'\n",
181 |       " '\\n'\n",
182 |       " 'carpool lots, and to attract transit service to lots  \\n'\n",
183 |       " 'where appropriate.\\n'\n",
184 |       " '\\n'\n",
185 |       " 'COST OF ROAD CONSTRUCTION \\n'\n",
186 |       " '\\n'\n",
187 |       " 'Roadway construction costs are typically based on standard  \\n'\n",
188 |       " 'design characteristics, materials, and the type of work performed. \\n'\n",
189 |       " 'General estimates are provided for the average cost per lane mile \\n'\n",
190 |       " 'of major work by roadway type, and material costs. \\n'\n",
191 |       " '\\n'\n",
192 |       " 'Average Cost Per Lane Mile by  \\n'\n",
193 |       " 'Major Work Type for Various Networks  \\n'\n",
194 |       " '(2016 figures; in millions) \\n'\n",
195 |       " '\\n'\n",
196 |       " 'Work Type \\n'\n",
197 |       " '\\n'\n",
198 |       " 'Reconstruction Rehabilitation Average R&R\\n'\n",
199 |       " '\\n'\n",
200 |       " ' \\n'\n",
201 |       " ' \\n'\n",
202 |       " '\\n'\n",
203 |       " 'Combined \\n'\n",
204 |       " 'Statewide\\n'\n",
205 |       " 'Freeway\\n'\n",
206 |       " 'Non-Freeway\\n'\n",
207 |       " 'Statewide \\n'\n",
208 |       " 'Urban\\n'\n",
209 |       " 'Statewide \\n'\n",
210 |       " 'Rural\\n'\n",
211 |       " '\\n'\n",
212 |       " '$2.0 \\n'\n",
213 |       " '\\n'\n",
214 |       " '$2.0 \\n'\n",
215 |       " '$1.9 \\n'\n",
216 |       " '\\n'\n",
217 |       " '$2.1 \\n'\n",
218 |       " '\\n'\n",
219 |       " '$1.2 \\n'\n",
220 |       " '\\n'\n",
221 |       " '$0.6 \\n'\n",
222 |       " '\\n'\n",
223 |       " '$0.8 \\n'\n",
224 |       " '$0.5 \\n'\n",
225 |       " '\\n'\n",
226 |       " '$0.7 \\n'\n",
227 |       " '\\n'\n",
228 |       " '$0.5 \\n'\n",
229 |       " '\\n'\n",
230 |       " '$1.0 \\n'\n",
231 |       " '\\n'\n",
232 |       " '$1.3 \\n'\n",
233 |       " '$0.8 \\n'\n",
234 |       " '\\n'\n",
235 |       " '$1.2 \\n'\n",
236 |       " '\\n'\n",
237 |       " '$0.6 \\n'\n",
238 |       " '\\n'\n",
239 |       " 'Cost\\n'\n",
240 |       " '\\n'\n",
241 |       " ' $64.18\\n'\n",
242 |       " ' $48.04\\n'\n",
243 |       " ' $1.31\\n'\n",
244 |       " ' $1.06\\n'\n",
245 |       " '\\n'\n",
246 |       " 'Material Cost for Construction  \\n'\n",
247 |       " '(2016 Year-to-Date)\\n'\n",
248 |       " '\\n'\n",
249 |       " 'Material\\n'\n",
250 |       " 'Hot Mix Asphalt (HMA) per Ton\\n'\n",
251 |       " 'Concrete per Square Yard\\n'\n",
252 |       " 'Structural Steel per Pound\\n'\n",
253 |       " 'Reinforcement Steel per Pound\\n'\n",
254 |       " '\\n'\n",
255 |       " 'State Transportation Funding Package\\n'\n",
256 |       " 'On Nov. 10, Gov. Snyder signed a package of \\n'\n",
257 |       " 'transportation bills approved by the Legislature. In the \\n'\n",
258 |       " 'short term, the legislation will:\\n'\n",
259 |       " '\\n'\n",
260 |       " '•  Provide $450 million in additional fuel tax \\n'\n",
261 |       " '\\n'\n",
262 |       " 'revenues, beginning in January 2017. The tax \\n'\n",
263 |       " 'on gasoline and diesel fuel will rise to 26.3 cents \\n'\n",
264 |       " 'at that time, as the legislation also provides for \\n'\n",
265 |       " 'diesel parity.\\n'\n",
266 |       " '\\n'\n",
267 |       " '•  Provide $190 million from a 20 percent increase  \\n'\n",
268 |       " '\\n'\n",
269 |       " 'in vehicle registration fees, also beginning in \\n'\n",
270 |       " 'January 2017.\\n'\n",
271 |       " '\\n'\n",
272 |       " 'This $600 million in new revenue will be distributed to \\n'\n",
273 |       " 'MDOT, county road commissions, cities and villages, \\n'\n",
274 |       " 'and the Comprehensive Transportation Fund through \\n'\n",
275 |       " 'the existing Act 51 formula, providing a roughly 30 \\n'\n",
276 |       " 'percent increase by 2018.\\n'\n",
277 |       " 'The new revenue is expected to generate an average \\n'\n",
278 |       " 'of more than 4,000 jobs per year in the first two years. \\n'\n",
279 |       " 'It will also help address the need to repair and maintain \\n'\n",
280 |       " 'Michigan’s existing transportation systems.\\n'\n",
281 |       " 'Beginning in 2019, the Legislature intends to appropriate \\n'\n",
282 |       " 'income tax revenue to roads agencies, according \\n'\n",
283 |       " 'to the Act 51 formula, excluding the Comprehensive \\n'\n",
284 |       " 'Transportation Fund, in these amounts:\\n'\n",
285 |       " ' \\n'\n",
286 |       " ' \\n'\n",
287 |       " ' \\n'\n",
288 |       " 'Beginning in 2016, the legislation adds transparency \\n'\n",
289 |       " 'and accountability:\\n'\n",
290 |       " '\\n'\n",
291 |       " '•  2019…………………..…..$150 million\\n'\n",
292 |       " '•  2020……………………....$325 million\\n'\n",
293 |       " '•  2021 and thereafter……..$600 million\\n'\n",
294 |       " '\\n'\n",
295 |       " '•  Administrative Expenses: MDOT \\n'\n",
296 |       " '\\n'\n",
297 |       " 'administrative expenses, previously capped at \\n'\n",
298 |       " '10 percent, are now limited to 8 percent of its \\n'\n",
299 |       " 'budget.\\n'\n",
300 |       " '\\n'\n",
301 |       " '•  Pavement Warranties: Road agencies are \\n'\n",
302 |       " 'required to buy pavement warranties, where \\n'\n",
303 |       " 'appropriate, for projects costing more than  \\n'\n",
304 |       " '$2 million. \\n'\n",
305 |       " '\\n'\n",
306 |       " '•  Competitive Bidding: To reduce project costs, \\n'\n",
307 |       " 'all agencies are required to competitively bid out \\n'\n",
308 |       " 'projects costing more than $100,000. \\n'\n",
309 |       " '\\n'\n",
310 |       " '•  Longer-lived Pavements: MDOT will be \\n'\n",
311 |       " '\\n'\n",
312 |       " 'required to prepare a report on the potential for \\n'\n",
313 |       " 'constructing longer-lived pavements and report \\n'\n",
314 |       " 'to the Legislature by June 2016.\\n'\n",
315 |       " '\\n'\n",
316 |       " '4   2017 Fast Facts  \\n'\n",
317 |       " '\\n'\n",
318 |       " ' (Updated 1/2017)\\n'\n",
319 |       " '\\n'\n",
320 |       " '\\x0c')\n"
321 |      ]
322 |     }
323 |    ],
324 |    "source": [
325 |     "from pprint import pprint as prettyprint\n",
326 |     "prettyprint(text)"
327 |    ]
328 |   },
329 |   {
330 |    "cell_type": "markdown",
331 |    "metadata": {},
332 |    "source": [
333 |     "Write out text to file"
334 |    ]
335 |   },
336 |   {
337 |    "cell_type": "code",
338 |    "execution_count": 8,
339 |    "metadata": {},
340 |    "outputs": [],
341 |    "source": [
342 |     "savefile = filename.replace('pdf','txt')\n",
343 |     "with open(savefile,'w') as fout:\n",
344 |     "    fout.write(text)"
345 |    ]
346 |   },
347 |   {
348 |    "cell_type": "markdown",
349 |    "metadata": {},
350 |    "source": [
351 |     "# Conclusion\n",
352 |     "\n",
353 |     "Trying to reconstruct tables from pdf text mining tools looks like a formatting nightmare in the same realm as copy and paste."
354 |    ]
355 |   },
356 |   {
357 |    "cell_type": "code",
358 |    "execution_count": null,
359 |    "metadata": {
360 |     "collapsed": true
361 |    },
362 |    "outputs": [],
363 |    "source": []
364 |   }
365 |  ],
366 |  "metadata": {
367 |   "kernelspec": {
368 |    "display_name": "Python 3",
369 |    "language": "python",
370 |    "name": "python3"
371 |   },
372 |   "language_info": {
373 |    "codemirror_mode": {
374 |     "name": "ipython",
375 |     "version": 3
376 |    },
377 |    "file_extension": ".py",
378 |    "mimetype": "text/x-python",
379 |    "name": "python",
380 |    "nbconvert_exporter": "python",
381 |    "pygments_lexer": "ipython3",
382 |    "version": "3.5.1"
383 |   }
384 |  },
385 |  "nbformat": 4,
386 |  "nbformat_minor": 1
387 | }
388 | 


--------------------------------------------------------------------------------
/pytorch/Workshop_CNN.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "name": "Workshop CNN.ipynb",
  7 |       "provenance": [],
  8 |       "include_colab_link": true
  9 |     },
 10 |     "kernelspec": {
 11 |       "name": "python3",
 12 |       "display_name": "Python 3"
 13 |     }
 14 |   },
 15 |   "cells": [
 16 |     {
 17 |       "cell_type": "markdown",
 18 |       "metadata": {
 19 |         "id": "view-in-github",
 20 |         "colab_type": "text"
 21 |       },
 22 |       "source": [
 23 |         "<a href=\"https://colab.research.google.com/github/caocscar/workshops/blob/master/pytorch/Workshop_CNN.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 24 |       ]
 25 |     },
 26 |     {
 27 |       "cell_type": "markdown",
 28 |       "metadata": {
 29 |         "id": "aa28NQ4b50Wk",
 30 |         "colab_type": "text"
 31 |       },
 32 |       "source": [
 33 |         "# Image Classification Problem"
 34 |       ]
 35 |     },
 36 |     {
 37 |       "cell_type": "code",
 38 |       "metadata": {
 39 |         "id": "WK-LdzWl5vH6",
 40 |         "colab_type": "code",
 41 |         "outputId": "9633651c-31d5-4c8e-a623-c1e209426f7e",
 42 |         "colab": {
 43 |           "base_uri": "https://localhost:8080/",
 44 |           "height": 67
 45 |         }
 46 |       },
 47 |       "source": [
 48 |         "import torch\n",
 49 |         "import torch.nn as nn\n",
 50 |         "import torch.optim as optim\n",
 51 |         "import torch.nn.functional as F\n",
 52 |         "from torch.utils.data import DataLoader\n",
 53 |         "import torchvision\n",
 54 |         "from torchvision import datasets, transforms\n",
 55 |         "import numpy as np\n",
 56 |         "\n",
 57 |         "print('Torch version', torch.__version__)\n",
 58 |         "print('Torchvision version', torchvision.__version__)\n",
 59 |         "print('Numpy version', np.__version__)"
 60 |       ],
 61 |       "execution_count": 1,
 62 |       "outputs": [
 63 |         {
 64 |           "output_type": "stream",
 65 |           "text": [
 66 |             "Torch version 1.3.1\n",
 67 |             "Torchvision version 0.4.2\n",
 68 |             "Numpy version 1.17.4\n"
 69 |           ],
 70 |           "name": "stdout"
 71 |         }
 72 |       ]
 73 |     },
 74 |     {
 75 |       "cell_type": "markdown",
 76 |       "metadata": {
 77 |         "id": "AKCLbDM754c0",
 78 |         "colab_type": "text"
 79 |       },
 80 |       "source": [
 81 |         "The following should say `cuda:0`. If it does not, we need to go to *Edit* -> *Notebook settings* and change it to a `GPU` from `None`. You only have to do this once per notebook."
 82 |       ]
 83 |     },
 84 |     {
 85 |       "cell_type": "code",
 86 |       "metadata": {
 87 |         "id": "a2RWBSbo53bz",
 88 |         "colab_type": "code",
 89 |         "outputId": "fa70a535-a31d-405b-90cd-5ccb15a4457a",
 90 |         "colab": {
 91 |           "base_uri": "https://localhost:8080/",
 92 |           "height": 34
 93 |         }
 94 |       },
 95 |       "source": [
 96 |         "device = 'cuda:0' if torch.cuda.is_available() else 'cpu'\n",
 97 |         "device"
 98 |       ],
 99 |       "execution_count": 2,
100 |       "outputs": [
101 |         {
102 |           "output_type": "execute_result",
103 |           "data": {
104 |             "text/plain": [
105 |               "'cpu'"
106 |             ]
107 |           },
108 |           "metadata": {
109 |             "tags": []
110 |           },
111 |           "execution_count": 2
112 |         }
113 |       ]
114 |     },
115 |     {
116 |       "cell_type": "markdown",
117 |       "metadata": {
118 |         "id": "DhBlj7GI6Npt",
119 |         "colab_type": "text"
120 |       },
121 |       "source": [
122 |         "Define a transform to convert image to PyTorch tensor"
123 |       ]
124 |     },
125 |     {
126 |       "cell_type": "code",
127 |       "metadata": {
128 |         "id": "VIjSGCNv53fT",
129 |         "colab_type": "code",
130 |         "colab": {}
131 |       },
132 |       "source": [
133 |         "tf = transforms.ToTensor() # convert image to PyTorch tensor"
134 |       ],
135 |       "execution_count": 0,
136 |       "outputs": []
137 |     },
138 |     {
139 |       "cell_type": "markdown",
140 |       "metadata": {
141 |         "id": "0p_SPGXQ6PaD",
142 |         "colab_type": "text"
143 |       },
144 |       "source": [
145 |         "Download training **dataset** and create `DataLoader`"
146 |       ]
147 |     },
148 |     {
149 |       "cell_type": "code",
150 |       "metadata": {
151 |         "id": "V5R_cuLZ53ib",
152 |         "colab_type": "code",
153 |         "colab": {}
154 |       },
155 |       "source": [
156 |         "train_loader = DataLoader(datasets.MNIST('data', download=True, train=True, transform=tf),\n",
157 |         "                           batch_size=100, \n",
158 |         "                           shuffle=True)"
159 |       ],
160 |       "execution_count": 0,
161 |       "outputs": []
162 |     },
163 |     {
164 |       "cell_type": "markdown",
165 |       "metadata": {
166 |         "id": "enL0Q9306QBM",
167 |         "colab_type": "text"
168 |       },
169 |       "source": [
170 |         "Download validation **dataset** and create `DataLoader`\n"
171 |       ]
172 |     },
173 |     {
174 |       "cell_type": "code",
175 |       "metadata": {
176 |         "id": "ASnI4ZrW53lj",
177 |         "colab_type": "code",
178 |         "colab": {}
179 |       },
180 |       "source": [
181 |         "test_loader = DataLoader(datasets.MNIST('data', download=True, train=False, transform=tf),\n",
182 |         "                           batch_size=100, \n",
183 |         "                           shuffle=True)"
184 |       ],
185 |       "execution_count": 0,
186 |       "outputs": []
187 |     },
188 |     {
189 |       "cell_type": "markdown",
190 |       "metadata": {
191 |         "id": "ttYvEnkb6Qkb",
192 |         "colab_type": "text"
193 |       },
194 |       "source": [
195 |         "We'll write a python class to define out convolutional neural network."
196 |       ]
197 |     },
198 |     {
199 |       "cell_type": "code",
200 |       "metadata": {
201 |         "id": "RBZtZhgy6TCk",
202 |         "colab_type": "code",
203 |         "colab": {}
204 |       },
205 |       "source": [
206 |         "class TwoLayerCNN(nn.Module):\n",
207 |         "    def __init__(self):\n",
208 |         "        super().__init__()\n",
209 |         "        self.batchnorm = nn.BatchNorm2d(1)\n",
210 |         "        self.conv1 = nn.Conv2d(1,4,5) # input image channel, output channels, square kernel size\n",
211 |         "        self.conv2 = nn.Conv2d(4,16,5)\n",
212 |         "        self.fc1 = nn.Linear(16*4*4,100) # fully connected, 4x4 image size result from 2 conv layers\n",
213 |         "        self.fc2 = nn.Linear(100,10)\n",
214 |         "        \n",
215 |         "    def forward(self,x):\n",
216 |         "        x1 = self.batchnorm(x)\n",
217 |         "        x1 = F.max_pool2d(F.relu(self.conv1(x1)), 2)\n",
218 |         "        x1 = F.max_pool2d(F.relu(self.conv2(x1)), 2)\n",
219 |         "        x1 = x1.view(-1, self.num_flat_features(x1))\n",
220 |         "        x1 = F.dropout(F.relu(self.fc1(x1), 0.4))\n",
221 |         "        x1 = F.relu(self.fc2(x1))\n",
222 |         "        return x1\n",
223 |         "                      \n",
224 |         "    def num_flat_features(self, x):\n",
225 |         "        size = x.size()[1:]  # all dimensions except the batch dimension\n",
226 |         "        num_features = np.prod(size)\n",
227 |         "        return num_features"
228 |       ],
229 |       "execution_count": 0,
230 |       "outputs": []
231 |     },
232 |     {
233 |       "cell_type": "markdown",
234 |       "metadata": {
235 |         "id": "M54pEgD06RoL",
236 |         "colab_type": "text"
237 |       },
238 |       "source": [
239 |         "We create an instance of this class"
240 |       ]
241 |     },
242 |     {
243 |       "cell_type": "code",
244 |       "metadata": {
245 |         "id": "iVdKpsuh6TS0",
246 |         "colab_type": "code",
247 |         "outputId": "258fd01d-b5e0-4e50-d9fd-2655d7e04704",
248 |         "colab": {
249 |           "base_uri": "https://localhost:8080/",
250 |           "height": 134
251 |         }
252 |       },
253 |       "source": [
254 |         "model = TwoLayerCNN().to(device)\n",
255 |         "model"
256 |       ],
257 |       "execution_count": 7,
258 |       "outputs": [
259 |         {
260 |           "output_type": "execute_result",
261 |           "data": {
262 |             "text/plain": [
263 |               "TwoLayerCNN(\n",
264 |               "  (batchnorm): BatchNorm2d(1, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
265 |               "  (conv1): Conv2d(1, 4, kernel_size=(5, 5), stride=(1, 1))\n",
266 |               "  (conv2): Conv2d(4, 16, kernel_size=(5, 5), stride=(1, 1))\n",
267 |               "  (fc1): Linear(in_features=256, out_features=100, bias=True)\n",
268 |               "  (fc2): Linear(in_features=100, out_features=10, bias=True)\n",
269 |               ")"
270 |             ]
271 |           },
272 |           "metadata": {
273 |             "tags": []
274 |           },
275 |           "execution_count": 7
276 |         }
277 |       ]
278 |     },
279 |     {
280 |       "cell_type": "markdown",
281 |       "metadata": {
282 |         "id": "SFRVmzOR6SB7",
283 |         "colab_type": "text"
284 |       },
285 |       "source": [
286 |         "We'll define a template for our `fit_model` function that contains `train`,  `validate`, and `accuracy` functions."
287 |       ]
288 |     },
289 |     {
290 |       "cell_type": "code",
291 |       "metadata": {
292 |         "id": "gZnvxqPu53rs",
293 |         "colab_type": "code",
294 |         "colab": {}
295 |       },
296 |       "source": [
297 |         "def fit_model(model, loss_fn, optimizer):\n",
298 |         "    def train(x,y):\n",
299 |         "        yhat = model(x)\n",
300 |         "        loss = loss_fn(yhat,y)\n",
301 |         "        optimizer.zero_grad()\n",
302 |         "        loss.backward()\n",
303 |         "        optimizer.step()\n",
304 |         "        return loss.item(), accuracy(yhat,y)\n",
305 |         "    \n",
306 |         "    def validate(x,y):\n",
307 |         "        yhat = model(x)\n",
308 |         "        loss = loss_fn(yhat,y)\n",
309 |         "        return loss.item(), accuracy(yhat,y)\n",
310 |         "    \n",
311 |         "    def accuracy(yhat,y):\n",
312 |         "        probs = np.argmax(yhat.cpu().detach().numpy(), axis=1)\n",
313 |         "        actual = y.cpu().detach().numpy()\n",
314 |         "        correct = (probs == actual).sum()\n",
315 |         "        total = y.shape[0]\n",
316 |         "        return correct / total   \n",
317 |         "    \n",
318 |         "    return train, validate"
319 |       ],
320 |       "execution_count": 0,
321 |       "outputs": []
322 |     },
323 |     {
324 |       "cell_type": "markdown",
325 |       "metadata": {
326 |         "id": "qCrMhx8Q6TLd",
327 |         "colab_type": "text"
328 |       },
329 |       "source": [
330 |         "We define our *loss function*, *learning rate*, and our *optimizer*. We pass this to `fit_model` to return our `train` and `validate` functions."
331 |       ]
332 |     },
333 |     {
334 |       "cell_type": "code",
335 |       "metadata": {
336 |         "id": "XFBR4YbD53oz",
337 |         "colab_type": "code",
338 |         "colab": {}
339 |       },
340 |       "source": [
341 |         "loss_fn = nn.CrossEntropyLoss()\n",
342 |         "learning_rate = 0.01\n",
343 |         "optimizer = optim.Adagrad(model.parameters(), lr=learning_rate)\n",
344 |         "train, validate = fit_model(model, loss_fn, optimizer)"
345 |       ],
346 |       "execution_count": 0,
347 |       "outputs": []
348 |     },
349 |     {
350 |       "cell_type": "markdown",
351 |       "metadata": {
352 |         "id": "XNVzkMZI6Tam",
353 |         "colab_type": "text"
354 |       },
355 |       "source": [
356 |         "Here is our training loop with mini-batch processing. We have to move each batch onto the GPU. We also should have a `DataLoader` for the validation dataset but we'll skip that in this case since it is so small."
357 |       ]
358 |     },
359 |     {
360 |       "cell_type": "code",
361 |       "metadata": {
362 |         "id": "AKk5nZjM6Ths",
363 |         "colab_type": "code",
364 |         "outputId": "8a867760-9fc7-45de-8398-08b25a395c4c",
365 |         "colab": {
366 |           "base_uri": "https://localhost:8080/",
367 |           "height": 101
368 |         }
369 |       },
370 |       "source": [
371 |         "epochs = 5\n",
372 |         "for epoch in range(epochs):\n",
373 |         "    # training    \n",
374 |         "    losses, accuracy = [], []\n",
375 |         "    for i, (xbatch, ybatch) in enumerate(train_loader):\n",
376 |         "        xbatch = xbatch.to(device)\n",
377 |         "        ybatch = ybatch.to(device)\n",
378 |         "        loss, acc = train(xbatch, ybatch)\n",
379 |         "        losses.append(loss)\n",
380 |         "        accuracy.append(acc)\n",
381 |         "    training_loss = np.mean(losses)\n",
382 |         "    training_accuracy = np.mean(accuracy)\n",
383 |         "    # validation\n",
384 |         "    val_losses, val_accuracy = [], []\n",
385 |         "    for j, (xtest, ytest) in enumerate(test_loader):\n",
386 |         "        xtest = xtest.to(device)\n",
387 |         "        ytest = ytest.to(device)\n",
388 |         "        val_loss, val_acc = validate(xtest, ytest)\n",
389 |         "        val_losses.append(val_loss)\n",
390 |         "        val_accuracy.append(val_acc)\n",
391 |         "    validation_loss = np.mean(val_losses)\n",
392 |         "    validation_accuracy = np.mean(val_accuracy)\n",
393 |         "    # print intermediate results\n",
394 |         "    print(f'{epoch}, {training_loss:.4f}, {training_accuracy:.3f}, {validation_loss:.4f}, {validation_accuracy:.3f}')"
395 |       ],
396 |       "execution_count": 10,
397 |       "outputs": [
398 |         {
399 |           "output_type": "stream",
400 |           "text": [
401 |             "0, 0.3363, 0.899, 0.1599, 0.954\n",
402 |             "1, 0.1516, 0.956, 0.1300, 0.961\n",
403 |             "2, 0.1271, 0.963, 0.1067, 0.965\n",
404 |             "3, 0.1139, 0.967, 0.1046, 0.969\n",
405 |             "4, 0.1044, 0.970, 0.0955, 0.972\n"
406 |           ],
407 |           "name": "stdout"
408 |         }
409 |       ]
410 |     },
411 |     {
412 |       "cell_type": "markdown",
413 |       "metadata": {
414 |         "id": "1AudSc0uAqt9",
415 |         "colab_type": "text"
416 |       },
417 |       "source": [
418 |         "### nn.Sequential\n",
419 |         "\n",
420 |         "If we wanted to user the simpler `nn.Sequential` function, our model construction would have looked like this."
421 |       ]
422 |     },
423 |     {
424 |       "cell_type": "code",
425 |       "metadata": {
426 |         "id": "JlIMzvDyAq3U",
427 |         "colab_type": "code",
428 |         "outputId": "eb88f17a-b8e3-4089-d468-7fb01d45c00c",
429 |         "colab": {
430 |           "base_uri": "https://localhost:8080/",
431 |           "height": 269
432 |         }
433 |       },
434 |       "source": [
435 |         "model_sequential = nn.Sequential(\n",
436 |         "    nn.BatchNorm2d(1),\n",
437 |         "    nn.Conv2d(1,4,5),\n",
438 |         "    nn.ReLU(),\n",
439 |         "    nn.MaxPool2d(2),\n",
440 |         "    nn.Conv2d(4,16,5),\n",
441 |         "    nn.ReLU(),\n",
442 |         "    nn.MaxPool2d(2),\n",
443 |         "    nn.Flatten(),\n",
444 |         "    nn.Linear(256,100),\n",
445 |         "    nn.ReLU(),\n",
446 |         "    nn.Dropout(0.4),\n",
447 |         "    nn.Linear(100,10),\n",
448 |         "    nn.Softmax(dim=1),\n",
449 |         ").to(device)\n",
450 |         "model_sequential"
451 |       ],
452 |       "execution_count": 11,
453 |       "outputs": [
454 |         {
455 |           "output_type": "execute_result",
456 |           "data": {
457 |             "text/plain": [
458 |               "Sequential(\n",
459 |               "  (0): BatchNorm2d(1, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
460 |               "  (1): Conv2d(1, 4, kernel_size=(5, 5), stride=(1, 1))\n",
461 |               "  (2): ReLU()\n",
462 |               "  (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n",
463 |               "  (4): Conv2d(4, 16, kernel_size=(5, 5), stride=(1, 1))\n",
464 |               "  (5): ReLU()\n",
465 |               "  (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n",
466 |               "  (7): Flatten()\n",
467 |               "  (8): Linear(in_features=256, out_features=100, bias=True)\n",
468 |               "  (9): ReLU()\n",
469 |               "  (10): Dropout(p=0.4, inplace=False)\n",
470 |               "  (11): Linear(in_features=100, out_features=10, bias=True)\n",
471 |               "  (12): Softmax(dim=1)\n",
472 |               ")"
473 |             ]
474 |           },
475 |           "metadata": {
476 |             "tags": []
477 |           },
478 |           "execution_count": 11
479 |         }
480 |       ]
481 |     }
482 |   ]
483 | }


--------------------------------------------------------------------------------
/pytorch/Workshop_Classification.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "name": "Workshop Classification.ipynb",
  7 |       "provenance": [],
  8 |       "include_colab_link": true
  9 |     },
 10 |     "kernelspec": {
 11 |       "name": "python3",
 12 |       "display_name": "Python 3"
 13 |     }
 14 |   },
 15 |   "cells": [
 16 |     {
 17 |       "cell_type": "markdown",
 18 |       "metadata": {
 19 |         "id": "view-in-github",
 20 |         "colab_type": "text"
 21 |       },
 22 |       "source": [
 23 |         "<a href=\"https://colab.research.google.com/github/caocscar/workshops/blob/master/pytorch/Workshop_Classification.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 24 |       ]
 25 |     },
 26 |     {
 27 |       "cell_type": "markdown",
 28 |       "metadata": {
 29 |         "id": "aa28NQ4b50Wk",
 30 |         "colab_type": "text"
 31 |       },
 32 |       "source": [
 33 |         "# Classification Problem"
 34 |       ]
 35 |     },
 36 |     {
 37 |       "cell_type": "code",
 38 |       "metadata": {
 39 |         "id": "WK-LdzWl5vH6",
 40 |         "colab_type": "code",
 41 |         "colab": {
 42 |           "base_uri": "https://localhost:8080/",
 43 |           "height": 67
 44 |         },
 45 |         "outputId": "0f560fe5-5a78-4942-950e-8f5661f81fd9"
 46 |       },
 47 |       "source": [
 48 |         "import torch\n",
 49 |         "import torch.nn as nn\n",
 50 |         "import torch.optim as optim\n",
 51 |         "import torch.nn.functional as F\n",
 52 |         "from torch.utils.data import TensorDataset, DataLoader\n",
 53 |         "import numpy as np\n",
 54 |         "import pandas as pd\n",
 55 |         "\n",
 56 |         "print('Torch version', torch.__version__)\n",
 57 |         "print('Pandas version', pd.__version__)\n",
 58 |         "print('Numpy version', np.__version__)"
 59 |       ],
 60 |       "execution_count": 1,
 61 |       "outputs": [
 62 |         {
 63 |           "output_type": "stream",
 64 |           "text": [
 65 |             "Torch version 1.3.1\n",
 66 |             "Pandas version 0.25.3\n",
 67 |             "Numpy version 1.17.4\n"
 68 |           ],
 69 |           "name": "stdout"
 70 |         }
 71 |       ]
 72 |     },
 73 |     {
 74 |       "cell_type": "markdown",
 75 |       "metadata": {
 76 |         "id": "AKCLbDM754c0",
 77 |         "colab_type": "text"
 78 |       },
 79 |       "source": [
 80 |         "The following should say `cuda:0`. If it does not, we need to go to *Edit* -> *Notebook settings* and change it to a `GPU` from `None`. You only have to do this once per notebook."
 81 |       ]
 82 |     },
 83 |     {
 84 |       "cell_type": "code",
 85 |       "metadata": {
 86 |         "id": "a2RWBSbo53bz",
 87 |         "colab_type": "code",
 88 |         "colab": {
 89 |           "base_uri": "https://localhost:8080/",
 90 |           "height": 34
 91 |         },
 92 |         "outputId": "81fac650-e814-4b79-f433-a47d4d089dce"
 93 |       },
 94 |       "source": [
 95 |         "device = 'cuda:0' if torch.cuda.is_available() else 'cpu'\n",
 96 |         "device"
 97 |       ],
 98 |       "execution_count": 2,
 99 |       "outputs": [
100 |         {
101 |           "output_type": "execute_result",
102 |           "data": {
103 |             "text/plain": [
104 |               "'cpu'"
105 |             ]
106 |           },
107 |           "metadata": {
108 |             "tags": []
109 |           },
110 |           "execution_count": 2
111 |         }
112 |       ]
113 |     },
114 |     {
115 |       "cell_type": "markdown",
116 |       "metadata": {
117 |         "id": "DhBlj7GI6Npt",
118 |         "colab_type": "text"
119 |       },
120 |       "source": [
121 |         "Read in dataset"
122 |       ]
123 |     },
124 |     {
125 |       "cell_type": "code",
126 |       "metadata": {
127 |         "id": "VIjSGCNv53fT",
128 |         "colab_type": "code",
129 |         "colab": {}
130 |       },
131 |       "source": [
132 |         "df_train = pd.read_csv('https://raw.githubusercontent.com/greght/Workshop-Keras-DNN/master/ChallengeProblems/iris_training.csv', header=None)\n",
133 |         "df_val = pd.read_csv('https://raw.githubusercontent.com/greght/Workshop-Keras-DNN/master/ChallengeProblems/iris_test.csv', header=None)"
134 |       ],
135 |       "execution_count": 0,
136 |       "outputs": []
137 |     },
138 |     {
139 |       "cell_type": "markdown",
140 |       "metadata": {
141 |         "id": "0p_SPGXQ6PaD",
142 |         "colab_type": "text"
143 |       },
144 |       "source": [
145 |         "Construct our x,y variables along with the training and validation dataset"
146 |       ]
147 |     },
148 |     {
149 |       "cell_type": "code",
150 |       "metadata": {
151 |         "id": "V5R_cuLZ53ib",
152 |         "colab_type": "code",
153 |         "colab": {}
154 |       },
155 |       "source": [
156 |         "x_train = df_train.iloc[:,0:-1]\n",
157 |         "y_train = df_train.iloc[:,-1]\n",
158 |         "x_val = df_val.iloc[:,0:-1]\n",
159 |         "y_val = df_val.iloc[:,-1]"
160 |       ],
161 |       "execution_count": 0,
162 |       "outputs": []
163 |     },
164 |     {
165 |       "cell_type": "markdown",
166 |       "metadata": {
167 |         "id": "enL0Q9306QBM",
168 |         "colab_type": "text"
169 |       },
170 |       "source": [
171 |         "Preprocess our data to go from a `pandas` DataFrame to a `numpy` array to a `torch` tensor."
172 |       ]
173 |     },
174 |     {
175 |       "cell_type": "code",
176 |       "metadata": {
177 |         "id": "ASnI4ZrW53lj",
178 |         "colab_type": "code",
179 |         "colab": {}
180 |       },
181 |       "source": [
182 |         "xtrain = torch.tensor(x_train.to_numpy(), device=device, dtype=torch.float, requires_grad=True)\n",
183 |         "ytrain = torch.tensor(y_train.to_numpy(), device=device, dtype=torch.long, requires_grad=False)\n",
184 |         "xval = torch.tensor(x_val.to_numpy(), device=device, dtype=torch.float, requires_grad=True)\n",
185 |         "yval = torch.tensor(y_val.to_numpy(), device=device, dtype=torch.long, requires_grad=False)"
186 |       ],
187 |       "execution_count": 0,
188 |       "outputs": []
189 |     },
190 |     {
191 |       "cell_type": "markdown",
192 |       "metadata": {
193 |         "id": "ttYvEnkb6Qkb",
194 |         "colab_type": "text"
195 |       },
196 |       "source": [
197 |         "We'll write a python class to define out neural network."
198 |       ]
199 |     },
200 |     {
201 |       "cell_type": "code",
202 |       "metadata": {
203 |         "id": "RBZtZhgy6TCk",
204 |         "colab_type": "code",
205 |         "colab": {}
206 |       },
207 |       "source": [
208 |         "class FourLayerNN(nn.Module):\n",
209 |         "    def __init__(self, D_in, H1, H2, H3, D_out):\n",
210 |         "        super().__init__()\n",
211 |         "        self.linear1 = nn.Linear(D_in, H1)\n",
212 |         "        self.linear2 = nn.Linear(H1,H2)\n",
213 |         "        self.linear3 = nn.Linear(H2,H3)\n",
214 |         "        self.linear4 = nn.Linear(H3,D_out)\n",
215 |         "        \n",
216 |         "    def forward(self,x):\n",
217 |         "        h1_relu = self.linear1(x).clamp(min=0)\n",
218 |         "        h2_relu = self.linear2(h1_relu).clamp(min=0)\n",
219 |         "        h3_relu = self.linear3(h2_relu).clamp(min=0)\n",
220 |         "        y_pred = self.linear4(h3_relu)\n",
221 |         "        return y_pred"
222 |       ],
223 |       "execution_count": 0,
224 |       "outputs": []
225 |     },
226 |     {
227 |       "cell_type": "markdown",
228 |       "metadata": {
229 |         "id": "M54pEgD06RoL",
230 |         "colab_type": "text"
231 |       },
232 |       "source": [
233 |         "We create an instance of this class"
234 |       ]
235 |     },
236 |     {
237 |       "cell_type": "code",
238 |       "metadata": {
239 |         "id": "iVdKpsuh6TS0",
240 |         "colab_type": "code",
241 |         "colab": {
242 |           "base_uri": "https://localhost:8080/",
243 |           "height": 118
244 |         },
245 |         "outputId": "b83a76b4-a989-4f10-a52a-2f4857de6ed1"
246 |       },
247 |       "source": [
248 |         "model = FourLayerNN(xtrain.shape[1],1000,500,70,y_train.nunique()).to(device)\n",
249 |         "model"
250 |       ],
251 |       "execution_count": 7,
252 |       "outputs": [
253 |         {
254 |           "output_type": "execute_result",
255 |           "data": {
256 |             "text/plain": [
257 |               "FourLayerNN(\n",
258 |               "  (linear1): Linear(in_features=4, out_features=1000, bias=True)\n",
259 |               "  (linear2): Linear(in_features=1000, out_features=500, bias=True)\n",
260 |               "  (linear3): Linear(in_features=500, out_features=70, bias=True)\n",
261 |               "  (linear4): Linear(in_features=70, out_features=3, bias=True)\n",
262 |               ")"
263 |             ]
264 |           },
265 |           "metadata": {
266 |             "tags": []
267 |           },
268 |           "execution_count": 7
269 |         }
270 |       ]
271 |     },
272 |     {
273 |       "cell_type": "markdown",
274 |       "metadata": {
275 |         "id": "SFRVmzOR6SB7",
276 |         "colab_type": "text"
277 |       },
278 |       "source": [
279 |         "We'll define a template for our `fit_model` function that contains `train`,  `validate`, and `accuracy` functions."
280 |       ]
281 |     },
282 |     {
283 |       "cell_type": "code",
284 |       "metadata": {
285 |         "id": "gZnvxqPu53rs",
286 |         "colab_type": "code",
287 |         "colab": {}
288 |       },
289 |       "source": [
290 |         "def fit_model(model, loss_fn, optimizer):\n",
291 |         "    def train(x,y):\n",
292 |         "        yhat = model(x)\n",
293 |         "        loss = loss_fn(yhat,y)\n",
294 |         "        optimizer.zero_grad()\n",
295 |         "        loss.backward()\n",
296 |         "        optimizer.step()\n",
297 |         "        return loss.item(), accuracy(yhat,y)\n",
298 |         "    \n",
299 |         "    def validate(x,y):\n",
300 |         "        yhat = model(x)\n",
301 |         "        loss = loss_fn(yhat,y)\n",
302 |         "        return loss.item(), accuracy(yhat,y)\n",
303 |         "    \n",
304 |         "    def accuracy(yhat,y):\n",
305 |         "        probs = np.argmax(yhat.cpu().detach().numpy(), axis=1)\n",
306 |         "        actual = y.cpu().detach().numpy()\n",
307 |         "        correct = (probs == actual).sum()\n",
308 |         "        total = y.shape[0]\n",
309 |         "        return correct / total   \n",
310 |         "    \n",
311 |         "    return train, validate"
312 |       ],
313 |       "execution_count": 0,
314 |       "outputs": []
315 |     },
316 |     {
317 |       "cell_type": "markdown",
318 |       "metadata": {
319 |         "id": "qCrMhx8Q6TLd",
320 |         "colab_type": "text"
321 |       },
322 |       "source": [
323 |         "We define our *loss function*, *learning rate*, and our *optimizer*. We pass this to `fit_model` to return our `train` and `validate` functions."
324 |       ]
325 |     },
326 |     {
327 |       "cell_type": "code",
328 |       "metadata": {
329 |         "id": "XFBR4YbD53oz",
330 |         "colab_type": "code",
331 |         "colab": {}
332 |       },
333 |       "source": [
334 |         "loss_fn = nn.CrossEntropyLoss()\n",
335 |         "learning_rate = 0.01\n",
336 |         "optimizer = optim.Adagrad(model.parameters(), lr=learning_rate)\n",
337 |         "train, validate = fit_model(model, loss_fn, optimizer)"
338 |       ],
339 |       "execution_count": 0,
340 |       "outputs": []
341 |     },
342 |     {
343 |       "cell_type": "markdown",
344 |       "metadata": {
345 |         "id": "ME_plDOp6Slt",
346 |         "colab_type": "text"
347 |       },
348 |       "source": [
349 |         "Define a `DataLoader` for our mini-batches."
350 |       ]
351 |     },
352 |     {
353 |       "cell_type": "code",
354 |       "metadata": {
355 |         "id": "5SS1NgRs6Syz",
356 |         "colab_type": "code",
357 |         "colab": {}
358 |       },
359 |       "source": [
360 |         "train_data = TensorDataset(xtrain, ytrain)\n",
361 |         "train_loader = DataLoader(dataset=train_data, batch_size=60, shuffle=True)"
362 |       ],
363 |       "execution_count": 0,
364 |       "outputs": []
365 |     },
366 |     {
367 |       "cell_type": "markdown",
368 |       "metadata": {
369 |         "id": "XNVzkMZI6Tam",
370 |         "colab_type": "text"
371 |       },
372 |       "source": [
373 |         "Here is our training loop with mini-batch processing. We have to move each batch onto the GPU. We also should have a `DataLoader` for the validation dataset but we'll skip that in this case since it is so small."
374 |       ]
375 |     },
376 |     {
377 |       "cell_type": "code",
378 |       "metadata": {
379 |         "id": "AKk5nZjM6Ths",
380 |         "colab_type": "code",
381 |         "colab": {
382 |           "base_uri": "https://localhost:8080/",
383 |           "height": 353
384 |         },
385 |         "outputId": "adecf0cb-e8fa-4f20-d18b-2ac258a72d8f"
386 |       },
387 |       "source": [
388 |         "epochs = 2000\n",
389 |         "for epoch in range(epochs):\n",
390 |         "    # training\n",
391 |         "    losses = []\n",
392 |         "    for i, (xbatch, ybatch) in enumerate(train_loader):\n",
393 |         "        xbatch = xbatch.to(device)\n",
394 |         "        ybatch = ybatch.to(device)\n",
395 |         "        loss, accuracy = train(xbatch, ybatch)\n",
396 |         "        losses.append(loss)\n",
397 |         "    training_loss = np.mean(losses)\n",
398 |         "    training_accuracy = np.mean(accuracy)\n",
399 |         "    # validation\n",
400 |         "    validation_loss, validation_accuracy = validate(xval, yval)\n",
401 |         "    # print intermediate results\n",
402 |         "    if epoch%100 == 99:\n",
403 |         "        print(f'{epoch}, {training_loss:.4f}, {training_accuracy:.2f}, {validation_loss:.4f}, {accuracy:.2f}')"
404 |       ],
405 |       "execution_count": 11,
406 |       "outputs": [
407 |         {
408 |           "output_type": "stream",
409 |           "text": [
410 |             "99, 0.0790, 0.97, 0.0645, 0.97\n",
411 |             "199, 0.0817, 0.97, 0.0577, 0.97\n",
412 |             "299, 0.0537, 1.00, 0.0652, 1.00\n",
413 |             "399, 0.0497, 0.98, 0.0516, 0.98\n",
414 |             "499, 0.0403, 1.00, 0.0566, 1.00\n",
415 |             "599, 0.0382, 0.98, 0.0541, 0.98\n",
416 |             "699, 0.0382, 0.98, 0.0578, 0.98\n",
417 |             "799, 0.0355, 0.98, 0.0596, 0.98\n",
418 |             "899, 0.0338, 0.98, 0.0643, 0.98\n",
419 |             "999, 0.0385, 1.00, 0.0620, 1.00\n",
420 |             "1099, 0.0339, 1.00, 0.0672, 1.00\n",
421 |             "1199, 0.0327, 1.00, 0.0677, 1.00\n",
422 |             "1299, 0.0293, 1.00, 0.0716, 1.00\n",
423 |             "1399, 0.0293, 1.00, 0.0717, 1.00\n",
424 |             "1499, 0.0290, 1.00, 0.0738, 1.00\n",
425 |             "1599, 0.0267, 1.00, 0.0826, 1.00\n",
426 |             "1699, 0.0280, 1.00, 0.0815, 1.00\n",
427 |             "1799, 0.0274, 1.00, 0.0912, 1.00\n",
428 |             "1899, 0.0253, 0.98, 0.1166, 0.98\n",
429 |             "1999, 0.0249, 1.00, 0.0899, 1.00\n"
430 |           ],
431 |           "name": "stdout"
432 |         }
433 |       ]
434 |     },
435 |     {
436 |       "cell_type": "markdown",
437 |       "metadata": {
438 |         "id": "1AudSc0uAqt9",
439 |         "colab_type": "text"
440 |       },
441 |       "source": [
442 |         "### nn.Sequential\n",
443 |         "\n",
444 |         "If we wanted to user the simpler `nn.Sequential` function, our model construction would have looked like this."
445 |       ]
446 |     },
447 |     {
448 |       "cell_type": "code",
449 |       "metadata": {
450 |         "id": "JlIMzvDyAq3U",
451 |         "colab_type": "code",
452 |         "colab": {
453 |           "base_uri": "https://localhost:8080/",
454 |           "height": 168
455 |         },
456 |         "outputId": "d4987403-3fbd-48ea-bcd7-06fbbc112df7"
457 |       },
458 |       "source": [
459 |         "model_sequential = nn.Sequential(\n",
460 |         "    nn.Linear(xtrain.shape[1],1000),\n",
461 |         "    nn.ReLU(),\n",
462 |         "    nn.Linear(1000,500),\n",
463 |         "    nn.ReLU(),\n",
464 |         "    nn.Linear(500,70),\n",
465 |         "    nn.ReLU(),\n",
466 |         "    nn.Linear(70,y_train.nunique()),\n",
467 |         ").to(device)\n",
468 |         "print(model_sequential)"
469 |       ],
470 |       "execution_count": 12,
471 |       "outputs": [
472 |         {
473 |           "output_type": "stream",
474 |           "text": [
475 |             "Sequential(\n",
476 |             "  (0): Linear(in_features=4, out_features=1000, bias=True)\n",
477 |             "  (1): ReLU()\n",
478 |             "  (2): Linear(in_features=1000, out_features=500, bias=True)\n",
479 |             "  (3): ReLU()\n",
480 |             "  (4): Linear(in_features=500, out_features=70, bias=True)\n",
481 |             "  (5): ReLU()\n",
482 |             "  (6): Linear(in_features=70, out_features=3, bias=True)\n",
483 |             ")\n"
484 |           ],
485 |           "name": "stdout"
486 |         }
487 |       ]
488 |     }
489 |   ]
490 | }


--------------------------------------------------------------------------------
/matlab/workshop_plotname.m:
--------------------------------------------------------------------------------
  1 | %% UROP Matlab Workshop
  2 | % @author: Alex Cao, University of Michigan
  3 | % Email: caoa AT umich DOT edu
  4 | % Consulting for Statistics, Computing, and Analytics Research (CSCAR)
  5 | % MATLAB Version: 9.0.0.370719 (R2016a)
  6 | % Operating System: Microsoft Windows 7 Enterprise  Version 6.1 (Build 7601: Service Pack 1)
  7 | % Java Version: Java 1.7.0_60-b19 with Oracle Corporation Java HotSpot(TM) 64-Bit Server VM mixed mode
  8 | 
  9 | % Students can install a free version of Matlab on their PC
 10 | % https://www.itcs.umich.edu/sw-info/math/MATLABStudents.html
 11 | 
 12 | %% Start with a clean slate
 13 | clear; close all
 14 | 
 15 | %% Creating variables
 16 | a = 3.14
 17 | b = 'this is a string'
 18 | c = [2 4;
 19 |      6 8]
 20 | 
 21 | %% Built-In functions and constants
 22 | % Constant
 23 | d = pi
 24 | % Imaginary numbers
 25 | e = sqrt(-9)
 26 | % Creating imaginary numbers
 27 | f = 1-2i
 28 | 
 29 | %% Creating vectors and matrices
 30 | % creating a row vector
 31 | row_vector = [2 4 6 8 10]
 32 | % creating a column vector
 33 | col_vector = [1; 
 34 |               3;
 35 |               5;
 36 |               7;
 37 |               9]
 38 | % transpose
 39 | row_vector = row_vector' % or row_vector = [2 4 6 8 10]'
 40 | % creating a matrix
 41 | matrix = [9 8 7;
 42 |           6 5 4;
 43 |           3 2 1]
 44 | % Adding rows or columns to an existing matrix or vector
 45 | v = [10 20 30]
 46 | addrow = [matrix;
 47 |           v]
 48 | addcol = [matrix v'] 
 49 | % Deleting rows or columns from an existing matrix or vector
 50 | addrow(end,:) = []
 51 | addcol(:,4) = []
 52 | 
 53 | %% Selecting and accessing data
 54 | % Select column(s) of data
 55 | a = matrix(:,1)
 56 | % To select multiple columns
 57 | b = matrix(:,2:3)
 58 | % Columns do not even have to be continuous
 59 | c = matrix(:,[3 1])
 60 | % Exact same thing for rows
 61 | d = matrix(1,:)
 62 | e = matrix(2:3,:)
 63 | f = matrix([3 1],:)
 64 | 
 65 | %% Plotting
 66 | M = magic(3) % magic square
 67 | plot(M(:,1),M(:,2),'o-')
 68 | 
 69 | %% Exercise 1 (5 minutes)
 70 | % Task 1: Construct a matrix of points to spell out the first letter of your name
 71 | % Task 2: Plot the letter
 72 | % For example,
 73 | % Task 1
 74 | A = [0 0;
 75 |     1 4;
 76 |     2 0;
 77 |     1.5 2;
 78 |     0.5 2];
 79 | % Task 2
 80 | plot(A(:,1),A(:,2),'x-')
 81 | 
 82 | %% Running external Matlab programs
 83 | % Just type the name of the m-file (should not have any spaces)
 84 | % Run letters m-file to get custom block font alphabet by author
 85 | letters
 86 | 
 87 | %% We will plot our name in Matlab
 88 | % Grab your letters from the alphabet (cell array) using the index number
 89 | A = alphabet{1};
 90 | L = alphabet{12};
 91 | E = alphabet{5};
 92 | X = alphabet{24};
 93 | 
 94 | %% Matrix
 95 | % Letters are stored as a Nx2 matrix
 96 | % First column are the x-coordinates
 97 | % Second column are the y-coordinates
 98 | A
 99 | 
100 | %% Plotting your name
101 | 
102 | % Create a new cell array variable with our letters
103 | name = {A,L,E,X};
104 | % Close previous figure
105 | close
106 | % Open new figure
107 | figure(1)
108 | % Iterate through the letters using a for loop
109 | for i = 1:length(name)
110 |     % Grab a letter
111 |     letter = name{i};
112 |     % Get x and y column
113 |     x = letter(:,1);
114 |     y = letter(:,2);
115 |     % Plot letter with a blue line
116 |     plot(x,y,'b-');
117 |     % Set axis limits
118 |     ylim([-1 5])
119 |     axis equal
120 |     % Do not overwrite previous plots
121 |     hold on
122 | end
123 | 
124 | % Create labels
125 | xlabel('x-axis')
126 | ylabel('y-axis')
127 | title('Plotting My Name')
128 | 
129 | %% 
130 | % In order to see all the letters clearly, we need to offset the letters
131 | % We'll use matrix addition/subtraction to create the offset
132 | % Creating a constant offset is easy
133 | close; figure(2)
134 | for i = 1:length(name)
135 |     letter = name{i};
136 |     % add offset to the x-coordinate based on letter position
137 |     x = letter(:,1) + i*2.5;
138 |     y = letter(:,2);
139 |     % plot letter with red dash dot line and circle markers
140 |     plot(x,y,'r-.o');
141 |     hold on
142 | end
143 | % Alternate way to set axis limits
144 | axis([-1 15 -1 5])
145 | 
146 | % See the following URLs for different point and line options
147 | % http://www.mathworks.com/help/matlab/ref/plot.html#inputarg_LineSpec
148 | 
149 | %% 
150 | % You can also add a vector or matrix (instead of a constant) to a matrix
151 | % (i.e. letter)
152 | F = alphabet{6}
153 | plot(F(:,1),F(:,2),'g','linewidth',2)
154 | %%
155 | % Here we add a vector to change the first row (i.e. bottom point)
156 | F(1,:) = F(1,:)+[1 1]
157 | % plot a green line with a linewidth of 2
158 | plot(F(:,1),F(:,2),'g','linewidth',2)
159 | 
160 | %% 
161 | % We can also scale the letters so that they are smaller or bigger
162 | % We'll use matrix element multiplication to accomplish the scaling
163 | close; figure(3)
164 | for i = 1:length(name)
165 |     letter = name{i};
166 |     % same x-offset as before
167 |     x = letter(:,1) + i*2.5;
168 |     % scale the y-coordinate by multiplication of an exponential
169 |     y = letter(:,2) * exp(+i/5);
170 |     % plot black line with diamond markers
171 |     plot(x,y,'k-d');
172 |     hold on
173 | end
174 | % Alternate way to set axis limits
175 | xlim([0 15])
176 | ylim([-1 10])
177 | 
178 | %% Exercise 2 (5 minutes)
179 | % Task 1: Copy the code section above 
180 | % Task 2: Plot your name vertically by using matrix addition/subtraction
181 | % Task 3: Shrink the letters in your name by using matrix-element
182 | % multiplication/division and re-plot it
183 | 
184 | %% Animation 
185 | % Here's how to animate the letters sequentially
186 | close; figure(4)
187 | axis([-1,15,-1,10])
188 | 
189 | % Set time delay between drawing lines
190 | time_delay = 0.5;
191 | 
192 | % Create empty cell array for animated objects
193 | object = {};
194 | % Iterate thru the letters
195 | for i = 1:length(name)
196 |     % Create animated lined object for each letter and save it to cell array
197 |     object{i} = animatedline;
198 |     letter = name{i};
199 |     x = letter(:,1) + i*2.5;
200 |     y = letter(:,2) * exp(+i/5);
201 |     % Iterate through each point defining our letter and draw it
202 |     for j = 1:length(letter)
203 |         addpoints(object{i},x(j),y(j));
204 |         drawnow
205 |         pause(time_delay)
206 |     end
207 | end
208 | 
209 | %% 
210 | % To produce smoother animation, we need more points to plot
211 | % Make lines with more points (say 100)
212 | num_of_pts = 100;
213 | % Create an evenly spaced vector using linspace
214 | % linspace(start,end,number of points)
215 | x1 = linspace(0,1,num_of_pts);
216 | y1 = linspace(0,4,num_of_pts);
217 | x2 = linspace(1,2,num_of_pts);
218 | y2 = linspace(4,0,num_of_pts);
219 | x3 = linspace(1.5,0.5,num_of_pts);
220 | y3 = linspace(2,2,num_of_pts);
221 | 
222 | % quote symbol does a transpose of the matrix
223 | % we want to convert from a row vector to a column vector
224 | % we concatenate the vectors side by side and then on top of each
225 | % other
226 | A = [x1' y1';
227 |      x2' y2';
228 |      x3' y3'];
229 | 
230 | % get size of A
231 | size(A)
232 | 
233 | %% Exercise 3 (5 minutes)
234 | % Task 1: Similar to the code section above, construct a matrix for the
235 | % letter T using linspace with 100 pts. Hint: You need to create x1, y1,
236 | % x2, y2 for the vertical and horizontal lines
237 | % Task 2: Plot the matrix using x markers (e.g. plot(x,y,'x') ) and set the
238 | % axis so that the letter is not touching a border
239 | 
240 | %% Redraw letter A with more points and no time delay
241 | close; figure(5)
242 | hA = animatedline;
243 | axis([-1,12,-1,5])
244 | 
245 | for k = 1:length(A)
246 |     addpoints(hA,A(k,1),A(k,2));
247 |     drawnow
248 | end
249 | 
250 | %% Smoother Animation 
251 | % I've written a matlab function gen_more_pts.m to add more points to
252 | % letters for you. Let's use it to animate our names.
253 | close; figure(6)
254 | axis([-1,15,-1,10])
255 | 
256 | % Create empty cell array for animated objects
257 | object = {};
258 | % For loop for adding points to a line
259 | for i = 1:length(name)
260 |     % Create animated lined object for each letter and save it to cell array
261 |     object{i} = animatedline;
262 |     letter = name{i};
263 |     % gen_more_pts function creates more points for uss
264 |     animate_letter = gen_more_pts(letter);
265 |     x = animate_letter(:,1) + i*2.5;
266 |     y = animate_letter(:,2) * exp(+i/5);
267 |     for j = 1:length(animate_letter)
268 |         addpoints(object{i},x(j),y(j));
269 |         drawnow
270 |     end
271 | end
272 | 
273 | %% Exercise 4 (5 minutes)
274 | % Task 1: Copy the code section above
275 | % Task 2: Animate the vertical version of your name
276 | 
277 | %% Plot Attributes
278 | % You can change the look of your lines after they are plotted by accessing
279 | % their attributes such as Color or LineWidth or LineStyle
280 | % To get a list of a plot attributes, use the get command
281 | get(object{1})
282 | % You can also type "object{1}." followed by a tab to get a dropdown list
283 | % To make something invisible use the Visible attribute
284 | object{1}.Visible = 'off'
285 | % To make something visible again
286 | object{1}.Visible = 'on'
287 | % If you don't know what options are available to you for a specific
288 | % attribute, you can use the set command
289 | set(object{1})
290 | 
291 | %% Generating Random Numbers and using a random seed
292 | % We will use random numbers to randomly change the attributes of our plot
293 | % Use a seed so that you get a predictable sequence of numbers
294 | % rng(56789)
295 | linestyle_options = {'-','--',':','-.'};
296 | for i = 1:12
297 |     % generate one random integer for which letter to modify
298 |     n = randi(length(name),1);
299 |     % generate a 3x1 vector of random numbers from (0,1)
300 |     color = rand(3,1)
301 |     object{n}.Color = color;
302 |     % generate one random integer for the linewidth
303 |     object{n}.LineWidth = randi(10,1);
304 |     % generate one random integer for the linestyle
305 |     index = randi(length(linestyle_options),1);
306 |     object{n}.LineStyle = linestyle_options{index};
307 |     pause(1)
308 | end
309 | 
310 | %% Exercise 5 (5 minutes)
311 | % Task 1: Generate a 4x1 vector of random integers from 1 to 10
312 | % Task 2: Take the sum of it
313 | % Task 3: Repeat 1 & 2
314 | % Task 4: Set a seed for the random generator using your favourite number
315 | % Task 5: Redo 1,2,3
316 | 
317 | %% Let's redraw our name 
318 | close; figure(7)
319 | for i = 1:length(name)
320 |     letter = name{i};
321 |     animate_letter = gen_more_pts(letter);
322 |     x = animate_letter(:,1) + i*2.5;
323 |     y = animate_letter(:,2);
324 |     % plot letters as magenta line with pentagon markers
325 |     plot(x,y,'m-p');
326 |     hold on
327 | end
328 | % Alternate way to set axis limits
329 | axis([0 15 -1 5])
330 | 
331 | %% Now suppose we wanted to cut our name (i.e. points) into half
332 | % We can segment our name using logical indexing
333 | close; figure(8)
334 | y_cutoff = 2.5;
335 | for i = 1:length(name)
336 |     letter = name{i};
337 |     animate_letter = gen_more_pts(letter);
338 |     % original matrix size
339 |     disp(size(animate_letter))
340 |     % generate boolean of pts meeting criterion
341 |     index = animate_letter(:,2) < y_cutoff;
342 |     % grab matching pts using indices
343 |     animate_letter = animate_letter(index,:);
344 |     % new matrix size (should be smaller)
345 |     disp(size(animate_letter))
346 |     x = animate_letter(:,1) + i*2.5;
347 |     y = animate_letter(:,2);
348 |     plot(x,y,'m-p');
349 |     hold on
350 | end
351 | axis([0 15 -1 5])
352 | 
353 | %% You can use more than one logical operation at a time
354 | close; figure(9)
355 | % & means AND
356 | % | means OR
357 | y_cutoff = 2.5;
358 | for i = 1:length(name)
359 |     letter = name{i};
360 |     animate_letter = gen_more_pts(letter);
361 |     size(animate_letter)
362 |     % AND statement joining two criteria
363 |     index = (animate_letter(:,2) < y_cutoff) & (animate_letter(:,2) > 1.25);
364 |     animate_letter = animate_letter(index,:);
365 |     size(animate_letter)
366 |     x = animate_letter(:,1) + i*2.5;
367 |     y = animate_letter(:,2);
368 |     plot(x,y,'m-p');
369 |     hold on
370 | end
371 | axis([0 15 -1 5])
372 | 
373 | %% Exercise 6 (5 minutes)
374 | % Task 1: Copy the code section above
375 | % Task 2: Only show the portion of your name that is less than 1 or greater
376 | % than 2 on the y-axis
377 | 
378 | %% Import Data Demo
379 | % There are many functions to import data into Matlab from external sources
380 | % Some choices are: uiimport, load, importdata, textscan, dlmread, fread,
381 | % fscanf, readtable, xlsread
382 | %
383 | % The most friendly method to beginners is uiimport which acts like excel
384 | uiimport('crash.txt')
385 | 
386 | %% Exercise 7 (10 minutes)
387 | % Task 1: Import the CrashSeverity column into the workspace
388 | % Task 2: Extract the fatal crashes (value = 1) using logical indexing
389 | % Task 3: Count how many fatal crashes there are in dataset
390 | % Task 4: Import the Longitude/Latitude columns into the workspace
391 | % Task 5: Plot Longitude/Latitude coordinates using any triangle marker.
392 | % Are there any bad data points? 
393 | % Tip: Longitude should be negative in this case.
394 | % Task 6: Remove the bad points using logical indexing and re-plot the
395 | % coordinates using a triangle marker
396 | 
397 | 
398 | %% Some useful Matlab commands to know
399 | % Saving your work
400 | % Saving variables in your workspace
401 | save workshop.mat
402 | % Clear the workspace
403 | clear
404 | % Reload everything
405 | load workshop.mat
406 | % If you just want to save a couple of variables
407 | save workshop X E L A
408 | % close last figure
409 | close
410 | % close all figures
411 | close all
412 | % clear command window
413 | clc
414 | % bring up command history
415 | commandhistory
416 | % Last unassigned answer in command window
417 | ans
418 | 
419 | %% Formatting output
420 | z = 1534513546
421 | % To change the look of the output, use the format function
422 | format longg
423 | z
424 | % To change back to the default format
425 | format
426 | 
427 | %% Getting Help
428 | % help for a function
429 | help plot
430 | doc plot
431 | % Bring up Matlab examples
432 | demo
433 | % You can also use the search bar in the top right corner or use the *?* 
434 | % icon next to it to open up an equivalent window
435 | 
436 | %% References
437 | % MathWork (makers of Matlab) Resources
438 |  
439 | % Matlab tutorials from MathWorks 
440 | % https://www.mathworks.com/support/learn-with-matlab-tutorials.html
441 | % http://www.mathworks.com/help/matlab/getting-started-with-matlab.html
442 | 
443 | % Matlab Forum for Q&A
444 | % http://www.mathworks.com/matlabcentral/answers/ 
445 | 
446 | % Cody: Challenge yourself to Matlab coding problems
447 | % http://www.mathworks.com/matlabcentral/cody
448 | 
449 | % PDF tutorial
450 | % https://www.mathworks.com/help/pdf_doc/matlab/getstart.pdf
451 | 
452 | % 3rd Party Add-Ons
453 | % http://www.mathworks.com/matlabcentral/fileexchange/
454 | 
455 | % Matlab Blogs
456 | % http://blogs.mathworks.com Matlab Blog
457 | 
458 | % Matlab Toolboxes
459 | % https://www.mathworks.com/products/
460 | 
461 | % To see what is installed on your version of Matlab, use the ver
462 | % command
463 | ver
464 | 
465 | %% Other Matlab Resources
466 | 
467 | % Interactive course by the University of Edinburgh
468 | % http://www.see.ed.ac.uk/teaching/courses/matlab/ 
469 | 
470 | % Free online book
471 | % http://greenteapress.com/matlab/
472 | 
473 | 
474 | %% Other Fun Stuff
475 | 
476 | %% Alternate way to do animation
477 | % Rotate our name
478 | % Let's plot our name again
479 | % The plot command will be outside the for loop this time
480 | close; figure(100)
481 | alex = [];
482 | for i = 1:length(name)
483 |     letter = name{i};
484 |     x = letter(:,1) + i*2.5;
485 |     y = letter(:,2);
486 |     alex = [alex; x y];
487 | end
488 | hAlex = plot(alex(:,1),alex(:,2),'linewidth',2,'color',[0.7 0.2 0.5]);
489 | axis([-12 12 -12 12])
490 | 
491 | %%
492 | % Set the DataSource attribute to this variable
493 | hAlex.XDataSource = 'rotateAlex(:,1)';
494 | hAlex.YDataSource = 'rotateAlex(:,2)';
495 | % Create an evenly spaced vector from 0 to 2*pi for rotation
496 | th = linspace(0,2*pi,500);
497 | 
498 | %% Rotate about z-axis
499 | for i = 1:length(th)
500 |     % Angle
501 |     theta = th(i);
502 |     % Rotation matrix about z-axis
503 |     Rz = [cos(theta) -sin(theta);
504 |         sin(theta)  cos(theta)];
505 |     % Matrix multiplication of rotation matrix with name points
506 |     rotateAlex = (Rz*alex')';
507 |     % Update figure handle
508 |     refreshdata(hAlex)
509 |     % Pause in seconds
510 |     pause(0.01)
511 | end
512 | 
513 | %% Center my name around the origin
514 | % use repmat to duplicate 2x1 vector
515 | alex2 = alex - repmat(mean(alex),size(alex,1),1);
516 | % Add the z-value of zero to my name points
517 | alex2 = [alex2 zeros(size(alex2,1),1)];
518 | 
519 | %% Rotate about y-axis
520 | for i = 1:length(th)
521 |     theta = th(i);
522 |     Ry = [cos(theta) 0 sin(theta);
523 |           0 1 0;
524 |          -sin(theta) 0 cos(theta)];
525 |     rotateAlex = (Ry*alex2')';
526 |     refreshdata(hAlex)
527 |     pause(0.01)
528 | end
529 | 
530 | %% Rotate about x-axis
531 | % Move my name around some more
532 | alex2(:,2) = alex2(:,2) + min(alex2(:,2));
533 | for i = 1:length(th)
534 |     theta = th(i);
535 |     Rx = [1 0 0;
536 |           0 cos(theta) -sin(theta);
537 |           0 sin(theta)  cos(theta)];
538 |     rotateAlex = (Rx*alex2')';
539 |     refreshdata(hAlex)
540 |     pause(0.01)
541 | end
542 | 
543 | %% Animation of a helix
544 | n = 5000; % determines how many pts to draw
545 | xc = 3; yc = 3;
546 | r = linspace(1,6,n); % radius
547 | t = linspace(0,12*pi,n); % how many loops to make
548 | x = 0.8*r.*cos(t) + xc;
549 | y = r.*sin(t) + yc;
550 | z = linspace(0,5,n);
551 | v = linspace(0.001,1,n);
552 | close all; figure(101)
553 | h = animatedline;
554 | axis([-10,10,-10,10,0 5])
555 | grid on
556 | xlabel('X'); ylabel('Y'); zlabel('Z')
557 | for k = 1:n
558 |     h.LineWidth = (v(k)+1)*4;
559 |     h.Color = [v(k) 1-v(k) v(k)];
560 |     addpoints(h,x(k),y(k),z(k));
561 |     % Set viewing angle
562 |     view(-mod(k/120,90),90-mod(k/72,70))
563 |     drawnow
564 | end
565 | 


--------------------------------------------------------------------------------
/pytorch/Workshop_Regression_Class.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "name": "Workshop Regression Class",
  7 |       "provenance": [],
  8 |       "collapsed_sections": [],
  9 |       "include_colab_link": true
 10 |     },
 11 |     "kernelspec": {
 12 |       "name": "python3",
 13 |       "display_name": "Python 3"
 14 |     },
 15 |     "accelerator": "GPU"
 16 |   },
 17 |   "cells": [
 18 |     {
 19 |       "cell_type": "markdown",
 20 |       "metadata": {
 21 |         "id": "view-in-github",
 22 |         "colab_type": "text"
 23 |       },
 24 |       "source": [
 25 |         "<a href=\"https://colab.research.google.com/github/caocscar/workshops/blob/master/pytorch/Workshop_Regression_Class.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 26 |       ]
 27 |     },
 28 |     {
 29 |       "cell_type": "markdown",
 30 |       "metadata": {
 31 |         "id": "7G_TdHMkSL8q",
 32 |         "colab_type": "text"
 33 |       },
 34 |       "source": [
 35 |         "**Regression Problem**"
 36 |       ]
 37 |     },
 38 |     {
 39 |       "cell_type": "code",
 40 |       "metadata": {
 41 |         "id": "GWhz8RPhRfF1",
 42 |         "colab_type": "code",
 43 |         "outputId": "fb2e7b2d-d11b-4eef-fc81-62d6a7bbb725",
 44 |         "colab": {
 45 |           "base_uri": "https://localhost:8080/",
 46 |           "height": 67
 47 |         }
 48 |       },
 49 |       "source": [
 50 |         "import torch\n",
 51 |         "import torch.nn as nn\n",
 52 |         "import torch.optim as optim\n",
 53 |         "import torch.nn.functional as F\n",
 54 |         "from torch.utils.data import TensorDataset, DataLoader\n",
 55 |         "import numpy as np\n",
 56 |         "import pandas as pd\n",
 57 |         "\n",
 58 |         "print('Torch version', torch.__version__)\n",
 59 |         "print('Pandas version', pd.__version__)\n",
 60 |         "print('Numpy version', np.__version__)"
 61 |       ],
 62 |       "execution_count": 2,
 63 |       "outputs": [
 64 |         {
 65 |           "output_type": "stream",
 66 |           "text": [
 67 |             "Torch version 1.3.1\n",
 68 |             "Pandas version 0.25.3\n",
 69 |             "Numpy version 1.17.4\n"
 70 |           ],
 71 |           "name": "stdout"
 72 |         }
 73 |       ]
 74 |     },
 75 |     {
 76 |       "cell_type": "markdown",
 77 |       "metadata": {
 78 |         "id": "d80zm5dOSsOr",
 79 |         "colab_type": "text"
 80 |       },
 81 |       "source": [
 82 |         "The following should say `cuda:0`. If it does not, we need to go to *Edit* -> *Notebook settings* and change it to a `GPU` from `None`. You only have to do this once per notebook."
 83 |       ]
 84 |     },
 85 |     {
 86 |       "cell_type": "code",
 87 |       "metadata": {
 88 |         "id": "ga1yyVAfRgK3",
 89 |         "colab_type": "code",
 90 |         "outputId": "87b9a739-2cf0-4f10-f112-544c6bf05edf",
 91 |         "colab": {
 92 |           "base_uri": "https://localhost:8080/",
 93 |           "height": 34
 94 |         }
 95 |       },
 96 |       "source": [
 97 |         "device = 'cuda:0' if torch.cuda.is_available() else 'cpu'\n",
 98 |         "device"
 99 |       ],
100 |       "execution_count": 3,
101 |       "outputs": [
102 |         {
103 |           "output_type": "execute_result",
104 |           "data": {
105 |             "text/plain": [
106 |               "'cuda:0'"
107 |             ]
108 |           },
109 |           "metadata": {
110 |             "tags": []
111 |           },
112 |           "execution_count": 3
113 |         }
114 |       ]
115 |     },
116 |     {
117 |       "cell_type": "markdown",
118 |       "metadata": {
119 |         "id": "rW2RnKe3hvmh",
120 |         "colab_type": "text"
121 |       },
122 |       "source": [
123 |         "Read in dataset"
124 |       ]
125 |     },
126 |     {
127 |       "cell_type": "code",
128 |       "metadata": {
129 |         "id": "3U_r7UGpRf-g",
130 |         "colab_type": "code",
131 |         "colab": {}
132 |       },
133 |       "source": [
134 |         "df_train = pd.read_csv('https://raw.githubusercontent.com/greght/Workshop-Keras-DNN/master/ChallengeProblems/dataRegression_train.csv', header=None)\n",
135 |         "df_val = pd.read_csv('https://raw.githubusercontent.com/greght/Workshop-Keras-DNN/master/ChallengeProblems/dataRegression_test.csv', header=None)"
136 |       ],
137 |       "execution_count": 0,
138 |       "outputs": []
139 |     },
140 |     {
141 |       "cell_type": "markdown",
142 |       "metadata": {
143 |         "id": "okdjDnbphzjK",
144 |         "colab_type": "text"
145 |       },
146 |       "source": [
147 |         "Construct our x,y variables along with the training and validation dataset"
148 |       ]
149 |     },
150 |     {
151 |       "cell_type": "code",
152 |       "metadata": {
153 |         "id": "7EBgffu2RgG_",
154 |         "colab_type": "code",
155 |         "colab": {}
156 |       },
157 |       "source": [
158 |         "x_train = df_train.iloc[:,0:2]\n",
159 |         "y_train = df_train.iloc[:,2]\n",
160 |         "x_val = df_val.iloc[:,0:2]\n",
161 |         "y_val = df_val.iloc[:,2]"
162 |       ],
163 |       "execution_count": 0,
164 |       "outputs": []
165 |     },
166 |     {
167 |       "cell_type": "markdown",
168 |       "metadata": {
169 |         "id": "7D4h_C16gcjG",
170 |         "colab_type": "text"
171 |       },
172 |       "source": [
173 |         "Preprocess our data to go from a `pandas` DataFrame to a `numpy` array to a `torch` tensor."
174 |       ]
175 |     },
176 |     {
177 |       "cell_type": "code",
178 |       "metadata": {
179 |         "id": "vjq5O0XfRmPv",
180 |         "colab_type": "code",
181 |         "colab": {}
182 |       },
183 |       "source": [
184 |         "x_train_tensor = torch.tensor(x_train.to_numpy(), device=device, dtype=torch.float, requires_grad=True)\n",
185 |         "y_train_tensor = torch.tensor(y_train.to_numpy(), device=device, dtype=torch.float, requires_grad=True)\n",
186 |         "x_val_tensor = torch.tensor(x_val.to_numpy(), device=device, dtype=torch.float, requires_grad=True)\n",
187 |         "y_val_tensor = torch.tensor(y_val.to_numpy(), device=device, dtype=torch.float, requires_grad=True)\n",
188 |         "y_train_tensor = y_train_tensor.view(-1,1)\n",
189 |         "y_val_tensor = y_val_tensor.view(-1,1)"
190 |       ],
191 |       "execution_count": 0,
192 |       "outputs": []
193 |     },
194 |     {
195 |       "cell_type": "markdown",
196 |       "metadata": {
197 |         "id": "H-JVIuXegeix",
198 |         "colab_type": "text"
199 |       },
200 |       "source": [
201 |         "We'll write a python class to define out neural network."
202 |       ]
203 |     },
204 |     {
205 |       "cell_type": "code",
206 |       "metadata": {
207 |         "id": "33HwoaxDR-mx",
208 |         "colab_type": "code",
209 |         "colab": {}
210 |       },
211 |       "source": [
212 |         "class ThreeLayerNN(nn.Module):\n",
213 |         "    def __init__(self, dim_input, H):\n",
214 |         "        super().__init__()\n",
215 |         "        self.fc1 = nn.Linear(dim_input, H)\n",
216 |         "        self.fc2 = nn.Linear(H,H)\n",
217 |         "        self.fc3 = nn.Linear(H,1)\n",
218 |         "    \n",
219 |         "    def forward(self, x):\n",
220 |         "        x1 = F.relu(self.fc1(x))\n",
221 |         "        x2 = F.relu(self.fc2(x1))\n",
222 |         "        y_pred = self.fc3(x2)\n",
223 |         "        return y_pred"
224 |       ],
225 |       "execution_count": 0,
226 |       "outputs": []
227 |     },
228 |     {
229 |       "cell_type": "markdown",
230 |       "metadata": {
231 |         "id": "NRH6Qp9VglBx",
232 |         "colab_type": "text"
233 |       },
234 |       "source": [
235 |         "We create an instance of this class."
236 |       ]
237 |     },
238 |     {
239 |       "cell_type": "code",
240 |       "metadata": {
241 |         "id": "aqnHMyc9R-xI",
242 |         "colab_type": "code",
243 |         "outputId": "a3446684-71c0-4531-9bf7-3e544230f18d",
244 |         "colab": {
245 |           "base_uri": "https://localhost:8080/",
246 |           "height": 101
247 |         }
248 |       },
249 |       "source": [
250 |         "model = ThreeLayerNN(x_train_tensor.shape[1],5).to(device)\n",
251 |         "print(model)"
252 |       ],
253 |       "execution_count": 8,
254 |       "outputs": [
255 |         {
256 |           "output_type": "stream",
257 |           "text": [
258 |             "ThreeLayerNN(\n",
259 |             "  (fc1): Linear(in_features=2, out_features=5, bias=True)\n",
260 |             "  (fc2): Linear(in_features=5, out_features=5, bias=True)\n",
261 |             "  (fc3): Linear(in_features=5, out_features=1, bias=True)\n",
262 |             ")\n"
263 |           ],
264 |           "name": "stdout"
265 |         }
266 |       ]
267 |     },
268 |     {
269 |       "cell_type": "markdown",
270 |       "metadata": {
271 |         "id": "ryc3EnW4RwqI",
272 |         "colab_type": "text"
273 |       },
274 |       "source": [
275 |         "`model.parameters()` contains the **weights** and **bias** (alternating) for each of the 3 layers\n",
276 |         "\n"
277 |       ]
278 |     },
279 |     {
280 |       "cell_type": "code",
281 |       "metadata": {
282 |         "id": "1-VGjPHeRmWH",
283 |         "colab_type": "code",
284 |         "outputId": "10d21071-3079-4923-d9cf-3755f6242b22",
285 |         "colab": {
286 |           "base_uri": "https://localhost:8080/",
287 |           "height": 437
288 |         }
289 |       },
290 |       "source": [
291 |         "params = list(model.parameters())\n",
292 |         "print(f'There are {len(params)} parameters')\n",
293 |         "for param in params:\n",
294 |         "    print(param)"
295 |       ],
296 |       "execution_count": 9,
297 |       "outputs": [
298 |         {
299 |           "output_type": "stream",
300 |           "text": [
301 |             "There are 6 parameters\n",
302 |             "Parameter containing:\n",
303 |             "tensor([[-0.6722, -0.1253],\n",
304 |             "        [ 0.3271, -0.5386],\n",
305 |             "        [-0.4360, -0.6635],\n",
306 |             "        [-0.0597,  0.2654],\n",
307 |             "        [-0.4511, -0.1803]], device='cuda:0', requires_grad=True)\n",
308 |             "Parameter containing:\n",
309 |             "tensor([ 0.4774,  0.0608,  0.3351,  0.6132, -0.1335], device='cuda:0',\n",
310 |             "       requires_grad=True)\n",
311 |             "Parameter containing:\n",
312 |             "tensor([[-0.4279,  0.0746, -0.2874, -0.4331,  0.0757],\n",
313 |             "        [-0.1138, -0.2704,  0.0156,  0.3182,  0.1802],\n",
314 |             "        [ 0.1589, -0.3853,  0.0769,  0.0236,  0.2774],\n",
315 |             "        [ 0.4160,  0.0268,  0.0658,  0.0249,  0.0023],\n",
316 |             "        [-0.1503,  0.1482, -0.0260,  0.2199,  0.2633]], device='cuda:0',\n",
317 |             "       requires_grad=True)\n",
318 |             "Parameter containing:\n",
319 |             "tensor([ 0.1400,  0.2608,  0.2217, -0.2910,  0.0465], device='cuda:0',\n",
320 |             "       requires_grad=True)\n",
321 |             "Parameter containing:\n",
322 |             "tensor([[ 0.1069,  0.0756, -0.3563,  0.3523, -0.4246]], device='cuda:0',\n",
323 |             "       requires_grad=True)\n",
324 |             "Parameter containing:\n",
325 |             "tensor([-0.3458], device='cuda:0', requires_grad=True)\n"
326 |           ],
327 |           "name": "stdout"
328 |         }
329 |       ]
330 |     },
331 |     {
332 |       "cell_type": "markdown",
333 |       "metadata": {
334 |         "id": "VQffaw77ft98",
335 |         "colab_type": "text"
336 |       },
337 |       "source": [
338 |         "We'll define a template for our `fit_model` function that contains `train` and `validate` functions.\n",
339 |         "\n",
340 |         "---\n",
341 |         "\n"
342 |       ]
343 |     },
344 |     {
345 |       "cell_type": "code",
346 |       "metadata": {
347 |         "id": "amLbK4yBRmfg",
348 |         "colab_type": "code",
349 |         "colab": {}
350 |       },
351 |       "source": [
352 |         "def fit_model(model, loss_fn, optimizer):\n",
353 |         "    def train(x,y):\n",
354 |         "        yhat = model(x)\n",
355 |         "        loss = loss_fn(yhat,y)\n",
356 |         "        optimizer.zero_grad()\n",
357 |         "        loss.backward()\n",
358 |         "        optimizer.step()\n",
359 |         "        return loss.item()\n",
360 |         "    \n",
361 |         "    def validate(x,y):\n",
362 |         "        yhat = model(x)\n",
363 |         "        loss = loss_fn(yhat,y)\n",
364 |         "        return loss.item()\n",
365 |         "    \n",
366 |         "    return train, validate"
367 |       ],
368 |       "execution_count": 0,
369 |       "outputs": []
370 |     },
371 |     {
372 |       "cell_type": "markdown",
373 |       "metadata": {
374 |         "id": "cKdszOgAguKD",
375 |         "colab_type": "text"
376 |       },
377 |       "source": [
378 |         "We define our *loss function*, *learning rate*, and our *optimizer*. We pass this to `fit_model` to return our `train` and `validate` functions.\n"
379 |       ]
380 |     },
381 |     {
382 |       "cell_type": "code",
383 |       "metadata": {
384 |         "id": "eh_iIPQnSD40",
385 |         "colab_type": "code",
386 |         "colab": {}
387 |       },
388 |       "source": [
389 |         "loss_fn = nn.MSELoss(reduction='mean') #default\n",
390 |         "learning_rate = 0.1\n",
391 |         "optimizer = optim.Adagrad(model.parameters(), lr=learning_rate)\n",
392 |         "train, validate = fit_model(model, loss_fn, optimizer)"
393 |       ],
394 |       "execution_count": 0,
395 |       "outputs": []
396 |     },
397 |     {
398 |       "cell_type": "markdown",
399 |       "metadata": {
400 |         "id": "W0uIChFNfa2c",
401 |         "colab_type": "text"
402 |       },
403 |       "source": [
404 |         "## Mini-batches\n",
405 |         "From the documentation: `torch.nn` only supports mini-batches. The entire `torch.nn` package only supports inputs that are a mini-batch of samples, and not a single sample."
406 |       ]
407 |     },
408 |     {
409 |       "cell_type": "code",
410 |       "metadata": {
411 |         "id": "09NsOy59SD8J",
412 |         "colab_type": "code",
413 |         "colab": {}
414 |       },
415 |       "source": [
416 |         "train_data = TensorDataset(x_train_tensor, y_train_tensor)\n",
417 |         "train_loader = DataLoader(dataset=train_data, batch_size=10, shuffle=True)"
418 |       ],
419 |       "execution_count": 0,
420 |       "outputs": []
421 |     },
422 |     {
423 |       "cell_type": "markdown",
424 |       "metadata": {
425 |         "id": "y748bWpQg_5x",
426 |         "colab_type": "text"
427 |       },
428 |       "source": [
429 |         "Here is our training loop with mini-batch processing. We have to move each mini-batch onto the GPU."
430 |       ]
431 |     },
432 |     {
433 |       "cell_type": "code",
434 |       "metadata": {
435 |         "id": "fexqm4D9SHyh",
436 |         "colab_type": "code",
437 |         "outputId": "5c42c58a-b7d7-4c6a-9c70-e1e2d084ea12",
438 |         "colab": {
439 |           "base_uri": "https://localhost:8080/",
440 |           "height": 185
441 |         }
442 |       },
443 |       "source": [
444 |         "epochs = 100\n",
445 |         "for epoch in range(epochs):\n",
446 |         "    # training\n",
447 |         "    losses = []\n",
448 |         "    for i, (xbatch, ybatch) in enumerate(train_loader):\n",
449 |         "        xbatch = xbatch.to(device)\n",
450 |         "        ybatch = ybatch.to(device)\n",
451 |         "        loss = train(xbatch, ybatch)\n",
452 |         "        losses.append(loss)\n",
453 |         "    training_loss = np.mean(losses)\n",
454 |         "    # validation\n",
455 |         "    validation_loss = validate(x_val_tensor, y_val_tensor)\n",
456 |         "    # print intermediate results\n",
457 |         "    if epoch%10 == 9:\n",
458 |         "        print(epoch, training_loss, validation_loss)"
459 |       ],
460 |       "execution_count": 13,
461 |       "outputs": [
462 |         {
463 |           "output_type": "stream",
464 |           "text": [
465 |             "9 5.217282251878218 8.100061416625977\n",
466 |             "19 4.6458352262323555 6.509875774383545\n",
467 |             "29 4.617666352878917 6.0749030113220215\n",
468 |             "39 4.465590021827004 5.876566410064697\n",
469 |             "49 4.46304219419306 5.840087413787842\n",
470 |             "59 4.436497558246959 5.683042049407959\n",
471 |             "69 4.447906385768544 5.73892068862915\n",
472 |             "79 4.456741766496138 5.724264144897461\n",
473 |             "89 4.4289374351501465 5.7146830558776855\n",
474 |             "99 4.434686617417769 5.704777717590332\n"
475 |           ],
476 |           "name": "stdout"
477 |         }
478 |       ]
479 |     },
480 |     {
481 |       "cell_type": "markdown",
482 |       "metadata": {
483 |         "id": "wri-bxVPhPHB",
484 |         "colab_type": "text"
485 |       },
486 |       "source": [
487 |         "We can view the current state of our model using the `state_dict` method."
488 |       ]
489 |     },
490 |     {
491 |       "cell_type": "code",
492 |       "metadata": {
493 |         "id": "xmiD0CQvSH2D",
494 |         "colab_type": "code",
495 |         "outputId": "997bf898-5732-4966-dee0-88b41b452c7b",
496 |         "colab": {
497 |           "base_uri": "https://localhost:8080/",
498 |           "height": 319
499 |         }
500 |       },
501 |       "source": [
502 |         "model.state_dict()"
503 |       ],
504 |       "execution_count": 14,
505 |       "outputs": [
506 |         {
507 |           "output_type": "execute_result",
508 |           "data": {
509 |             "text/plain": [
510 |               "OrderedDict([('fc1.weight', tensor([[-0.9870, -0.4540],\n",
511 |               "                      [ 2.0965, -0.3272],\n",
512 |               "                      [-0.4208, -0.8602],\n",
513 |               "                      [ 1.4232,  0.2407],\n",
514 |               "                      [-0.4511, -0.1803]], device='cuda:0')),\n",
515 |               "             ('fc1.bias',\n",
516 |               "              tensor([ 0.0582,  0.2425,  0.0584,  0.6218, -0.1335], device='cuda:0')),\n",
517 |               "             ('fc2.weight',\n",
518 |               "              tensor([[-0.2153,  1.3850, -0.1548,  0.3375,  0.0757],\n",
519 |               "                      [ 0.1091,  1.0617,  0.1496,  1.1005,  0.1802],\n",
520 |               "                      [ 0.0043, -0.5234, -0.0231, -0.1097,  0.2774],\n",
521 |               "                      [ 0.4160,  0.0268,  0.0658,  0.0249,  0.0023],\n",
522 |               "                      [-0.2503, -0.0960, -0.1260,  0.0717,  0.2633]], device='cuda:0')),\n",
523 |               "             ('fc2.bias',\n",
524 |               "              tensor([ 0.3495,  0.5070,  0.0802, -0.2910, -0.1100], device='cuda:0')),\n",
525 |               "             ('fc3.weight',\n",
526 |               "              tensor([[ 1.0817,  0.8173, -0.2413,  0.3523, -0.3157]], device='cuda:0')),\n",
527 |               "             ('fc3.bias', tensor([-0.1517], device='cuda:0'))])"
528 |             ]
529 |           },
530 |           "metadata": {
531 |             "tags": []
532 |           },
533 |           "execution_count": 14
534 |         }
535 |       ]
536 |     }
537 |   ]
538 | }


--------------------------------------------------------------------------------