├── pytorch ├── DNN.png ├── L2reg.png ├── mtn.png ├── conv2d.gif ├── maxPool.gif ├── mnist_0-9.png ├── playground.png ├── Workshop_DL.pdf ├── overfitting.png ├── overfitting2.png ├── underfitting.png ├── DNN_activations.png ├── goodRegression.png ├── iris_versicolor.jpg ├── DNNRegressor_data.png ├── DNNRegressor_fit.png ├── README.md ├── workshop_neural_net.md ├── Workshop_CNN.ipynb ├── Workshop_Classification.ipynb └── Workshop_Regression_Class.ipynb ├── D3 ├── img │ ├── end_code.png │ ├── create_rect.png │ ├── exercise1.png │ ├── g_element.png │ ├── numerically.png │ ├── start_code.png │ ├── create_x_axis.png │ ├── data_variable.png │ ├── exercise1_sol.png │ ├── alphabetically.png │ ├── create_bar_element.png │ └── team_logo_games_labels.png ├── exercise_1 │ ├── teams.csv │ ├── exercise_1.css │ ├── solution │ │ ├── teams.csv │ │ ├── solution_1.css │ │ ├── index.html │ │ └── solution_1.js │ ├── index.html │ └── exercise_1.js ├── exercise_2 │ ├── teams.csv │ ├── exercise_2.css │ ├── solution │ │ ├── teams.csv │ │ ├── solution_2.css │ │ ├── index.html │ │ └── solution_2.js │ ├── index.html │ └── exercise_2.js ├── exercise_3 │ ├── teams.csv │ ├── exercise_3.css │ ├── solution │ │ ├── teams.csv │ │ ├── solution_3.css │ │ ├── index.html │ │ ├── solution_3.html │ │ └── solution_3.js │ ├── index.html │ └── exercise_3.js ├── index.html ├── preprocessing │ └── preprocessing.py ├── urls.js ├── bar.css └── sortable.js ├── NLP ├── img │ ├── nltk_spacy.png │ ├── spacy_comp.PNG │ └── pipeline.svg └── README.md ├── regex ├── img │ ├── webpage.png │ └── pagesource.png ├── README.md └── data │ └── vins.txt ├── sql ├── img │ ├── screenshot.png │ ├── foodforthought2.png │ └── Visual_SQL_JOINS_orig.jpg └── README.md ├── sqlite ├── photos │ ├── Odie.jpg │ ├── lassie.jpg │ ├── scooby.jpg │ ├── snoopy.jpg │ └── wallace.jpg └── README.md ├── .gitattributes ├── fusion-tables ├── README.md └── Seattle_Parks.csv ├── thematic-maps ├── img │ ├── contour.png │ ├── isopleth.png │ ├── mi_choropleth.png │ ├── small │ │ ├── isopleth.png │ │ ├── mi_choropleth.png │ │ ├── top_20_crashes.png │ │ └── snowmobile_crashes.png │ ├── top_20_crashes.png │ └── snowmobile_crashes.png ├── README.md ├── snow_crashes.csv ├── snowmobile_crashes.txt └── deer_in_the_city.txt ├── pdf-data-extraction ├── 2013-02-005-v1.pdf ├── summary_of_fees_collected.pdf ├── MDOT_fastfacts02-2011_345554_7.pdf ├── README.md └── pdfminer_workshop.ipynb ├── matlab ├── README.md ├── gen_more_pts.m ├── letters.m └── workshop_plotname.m ├── dotmap └── README.md ├── flask └── README.md ├── geospatial-analysis └── README.md ├── datashader └── README.md ├── pandas └── README.md ├── network-analysis └── README.md ├── geopandas └── README.md ├── webscraping └── README.md ├── pyspark ├── README.md └── sample.csv ├── python-intro └── README.md ├── README.md ├── .gitignore └── sql-intermediate └── README.md /pytorch/DNN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caocscar/workshops/HEAD/pytorch/DNN.png -------------------------------------------------------------------------------- /pytorch/L2reg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caocscar/workshops/HEAD/pytorch/L2reg.png -------------------------------------------------------------------------------- /pytorch/mtn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caocscar/workshops/HEAD/pytorch/mtn.png -------------------------------------------------------------------------------- /D3/img/end_code.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caocscar/workshops/HEAD/D3/img/end_code.png -------------------------------------------------------------------------------- /pytorch/conv2d.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caocscar/workshops/HEAD/pytorch/conv2d.gif -------------------------------------------------------------------------------- /pytorch/maxPool.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caocscar/workshops/HEAD/pytorch/maxPool.gif -------------------------------------------------------------------------------- /D3/img/create_rect.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caocscar/workshops/HEAD/D3/img/create_rect.png -------------------------------------------------------------------------------- /D3/img/exercise1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caocscar/workshops/HEAD/D3/img/exercise1.png -------------------------------------------------------------------------------- /D3/img/g_element.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caocscar/workshops/HEAD/D3/img/g_element.png -------------------------------------------------------------------------------- /D3/img/numerically.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caocscar/workshops/HEAD/D3/img/numerically.png -------------------------------------------------------------------------------- /D3/img/start_code.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caocscar/workshops/HEAD/D3/img/start_code.png -------------------------------------------------------------------------------- /NLP/img/nltk_spacy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caocscar/workshops/HEAD/NLP/img/nltk_spacy.png -------------------------------------------------------------------------------- /NLP/img/spacy_comp.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caocscar/workshops/HEAD/NLP/img/spacy_comp.PNG -------------------------------------------------------------------------------- /pytorch/mnist_0-9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caocscar/workshops/HEAD/pytorch/mnist_0-9.png -------------------------------------------------------------------------------- /pytorch/playground.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caocscar/workshops/HEAD/pytorch/playground.png -------------------------------------------------------------------------------- /regex/img/webpage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caocscar/workshops/HEAD/regex/img/webpage.png -------------------------------------------------------------------------------- /sql/img/screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caocscar/workshops/HEAD/sql/img/screenshot.png -------------------------------------------------------------------------------- /sqlite/photos/Odie.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caocscar/workshops/HEAD/sqlite/photos/Odie.jpg -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Set the default behavior, in case people don't have core.autocrlf set. 2 | * text=auto -------------------------------------------------------------------------------- /D3/img/create_x_axis.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caocscar/workshops/HEAD/D3/img/create_x_axis.png -------------------------------------------------------------------------------- /D3/img/data_variable.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caocscar/workshops/HEAD/D3/img/data_variable.png -------------------------------------------------------------------------------- /D3/img/exercise1_sol.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caocscar/workshops/HEAD/D3/img/exercise1_sol.png -------------------------------------------------------------------------------- /fusion-tables/README.md: -------------------------------------------------------------------------------- 1 | # Fusion Tables 2 | The slide deck is available at https://goo.gl/VDtjgn 3 | -------------------------------------------------------------------------------- /pytorch/Workshop_DL.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caocscar/workshops/HEAD/pytorch/Workshop_DL.pdf -------------------------------------------------------------------------------- /pytorch/overfitting.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caocscar/workshops/HEAD/pytorch/overfitting.png -------------------------------------------------------------------------------- /pytorch/overfitting2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caocscar/workshops/HEAD/pytorch/overfitting2.png -------------------------------------------------------------------------------- /pytorch/underfitting.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caocscar/workshops/HEAD/pytorch/underfitting.png -------------------------------------------------------------------------------- /regex/img/pagesource.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caocscar/workshops/HEAD/regex/img/pagesource.png -------------------------------------------------------------------------------- /sqlite/photos/lassie.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caocscar/workshops/HEAD/sqlite/photos/lassie.jpg -------------------------------------------------------------------------------- /sqlite/photos/scooby.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caocscar/workshops/HEAD/sqlite/photos/scooby.jpg -------------------------------------------------------------------------------- /sqlite/photos/snoopy.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caocscar/workshops/HEAD/sqlite/photos/snoopy.jpg -------------------------------------------------------------------------------- /D3/img/alphabetically.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caocscar/workshops/HEAD/D3/img/alphabetically.png -------------------------------------------------------------------------------- /pytorch/DNN_activations.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caocscar/workshops/HEAD/pytorch/DNN_activations.png -------------------------------------------------------------------------------- /pytorch/goodRegression.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caocscar/workshops/HEAD/pytorch/goodRegression.png -------------------------------------------------------------------------------- /pytorch/iris_versicolor.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caocscar/workshops/HEAD/pytorch/iris_versicolor.jpg -------------------------------------------------------------------------------- /sql/img/foodforthought2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caocscar/workshops/HEAD/sql/img/foodforthought2.png -------------------------------------------------------------------------------- /sqlite/photos/wallace.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caocscar/workshops/HEAD/sqlite/photos/wallace.jpg -------------------------------------------------------------------------------- /D3/img/create_bar_element.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caocscar/workshops/HEAD/D3/img/create_bar_element.png -------------------------------------------------------------------------------- /pytorch/DNNRegressor_data.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caocscar/workshops/HEAD/pytorch/DNNRegressor_data.png -------------------------------------------------------------------------------- /pytorch/DNNRegressor_fit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caocscar/workshops/HEAD/pytorch/DNNRegressor_fit.png -------------------------------------------------------------------------------- /thematic-maps/img/contour.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caocscar/workshops/HEAD/thematic-maps/img/contour.png -------------------------------------------------------------------------------- /D3/exercise_1/teams.csv: -------------------------------------------------------------------------------- 1 | team,value 2 | Boston,100 3 | Detroit,85 4 | New York,80 5 | Chicago,75 6 | Atlanta,30 7 | -------------------------------------------------------------------------------- /D3/exercise_2/teams.csv: -------------------------------------------------------------------------------- 1 | team,value 2 | Boston,100 3 | Detroit,85 4 | New York,80 5 | Chicago,75 6 | Atlanta,30 7 | -------------------------------------------------------------------------------- /D3/exercise_3/teams.csv: -------------------------------------------------------------------------------- 1 | team,value 2 | Boston,100 3 | Detroit,85 4 | New York,80 5 | Chicago,75 6 | Atlanta,30 7 | -------------------------------------------------------------------------------- /thematic-maps/img/isopleth.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caocscar/workshops/HEAD/thematic-maps/img/isopleth.png -------------------------------------------------------------------------------- /D3/exercise_1/exercise_1.css: -------------------------------------------------------------------------------- 1 | .label{ 2 | text-anchor: middle; 3 | } 4 | 5 | .barlabel { 6 | text-anchor: middle; 7 | } -------------------------------------------------------------------------------- /D3/exercise_1/solution/teams.csv: -------------------------------------------------------------------------------- 1 | team,value 2 | Boston,100 3 | Detroit,85 4 | New York,80 5 | Chicago,75 6 | Atlanta,30 7 | -------------------------------------------------------------------------------- /D3/exercise_2/exercise_2.css: -------------------------------------------------------------------------------- 1 | .label{ 2 | text-anchor: middle; 3 | } 4 | 5 | .barlabel { 6 | text-anchor: middle; 7 | } -------------------------------------------------------------------------------- /D3/exercise_2/solution/teams.csv: -------------------------------------------------------------------------------- 1 | team,value 2 | Boston,100 3 | Detroit,85 4 | New York,80 5 | Chicago,75 6 | Atlanta,30 7 | -------------------------------------------------------------------------------- /D3/exercise_3/exercise_3.css: -------------------------------------------------------------------------------- 1 | .label{ 2 | text-anchor: middle; 3 | } 4 | 5 | .barlabel { 6 | text-anchor: middle; 7 | } -------------------------------------------------------------------------------- /D3/exercise_3/solution/teams.csv: -------------------------------------------------------------------------------- 1 | team,value 2 | Boston,100 3 | Detroit,85 4 | New York,80 5 | Chicago,75 6 | Atlanta,30 7 | -------------------------------------------------------------------------------- /D3/img/team_logo_games_labels.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caocscar/workshops/HEAD/D3/img/team_logo_games_labels.png -------------------------------------------------------------------------------- /sql/img/Visual_SQL_JOINS_orig.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caocscar/workshops/HEAD/sql/img/Visual_SQL_JOINS_orig.jpg -------------------------------------------------------------------------------- /thematic-maps/img/mi_choropleth.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caocscar/workshops/HEAD/thematic-maps/img/mi_choropleth.png -------------------------------------------------------------------------------- /thematic-maps/img/small/isopleth.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caocscar/workshops/HEAD/thematic-maps/img/small/isopleth.png -------------------------------------------------------------------------------- /thematic-maps/img/top_20_crashes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caocscar/workshops/HEAD/thematic-maps/img/top_20_crashes.png -------------------------------------------------------------------------------- /D3/exercise_1/solution/solution_1.css: -------------------------------------------------------------------------------- 1 | .label{ 2 | text-anchor: middle; 3 | } 4 | 5 | .barlabel { 6 | text-anchor: middle; 7 | } -------------------------------------------------------------------------------- /D3/exercise_2/solution/solution_2.css: -------------------------------------------------------------------------------- 1 | .label{ 2 | text-anchor: middle; 3 | } 4 | 5 | .barlabel { 6 | text-anchor: middle; 7 | } -------------------------------------------------------------------------------- /pdf-data-extraction/2013-02-005-v1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caocscar/workshops/HEAD/pdf-data-extraction/2013-02-005-v1.pdf -------------------------------------------------------------------------------- /thematic-maps/img/small/mi_choropleth.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caocscar/workshops/HEAD/thematic-maps/img/small/mi_choropleth.png -------------------------------------------------------------------------------- /thematic-maps/img/small/top_20_crashes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caocscar/workshops/HEAD/thematic-maps/img/small/top_20_crashes.png -------------------------------------------------------------------------------- /thematic-maps/img/snowmobile_crashes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caocscar/workshops/HEAD/thematic-maps/img/snowmobile_crashes.png -------------------------------------------------------------------------------- /D3/exercise_3/solution/solution_3.css: -------------------------------------------------------------------------------- 1 | .label{ 2 | text-anchor: middle; 3 | } 4 | 5 | .barlabel { 6 | text-anchor: middle; 7 | } -------------------------------------------------------------------------------- /thematic-maps/img/small/snowmobile_crashes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caocscar/workshops/HEAD/thematic-maps/img/small/snowmobile_crashes.png -------------------------------------------------------------------------------- /pdf-data-extraction/summary_of_fees_collected.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caocscar/workshops/HEAD/pdf-data-extraction/summary_of_fees_collected.pdf -------------------------------------------------------------------------------- /matlab/README.md: -------------------------------------------------------------------------------- 1 | # Introduction to Matlab 2 | 3 | This workshop was created for the UROP program as a brief (2 hour) intro to Matlab and its capabilities. 4 | -------------------------------------------------------------------------------- /pdf-data-extraction/MDOT_fastfacts02-2011_345554_7.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caocscar/workshops/HEAD/pdf-data-extraction/MDOT_fastfacts02-2011_345554_7.pdf -------------------------------------------------------------------------------- /sqlite/README.md: -------------------------------------------------------------------------------- 1 | # SQLITE 2 | The jupyter notebook can be found here 3 | https://nbviewer.jupyter.org/github/caocscar/workshops/blob/master/sqlite/sqlite3.ipynb -------------------------------------------------------------------------------- /dotmap/README.md: -------------------------------------------------------------------------------- 1 | # Working with Geographical Data and Parallel Computing on Flux 2 | 3 | The workshop code is in another repository at 4 | https://github.com/clarkdatalabs/dotmap_workshop 5 | 6 | -------------------------------------------------------------------------------- /flask/README.md: -------------------------------------------------------------------------------- 1 | # Flask 2 | 3 | This 2 hr workshop introduces Flask for deploying web applications. 4 | 5 | My student, Ellen Paquet, prepared the workshop materials and it is located at https://github.com/epmarie/flask_example_app -------------------------------------------------------------------------------- /geospatial-analysis/README.md: -------------------------------------------------------------------------------- 1 | You can preview the HTML material at this Github HTML previewer: 2 | https://htmlpreview.github.io/?https://github.com/caocscar/workshops/blob/master/geospatial%20analysis/Geospatial%2BAnalysis%2BWorkshop.html 3 | -------------------------------------------------------------------------------- /datashader/README.md: -------------------------------------------------------------------------------- 1 | # Datashader 2 | 3 | This 1 hr workshop introduces the datashader visualization tool for large datasets. 4 | https://github.com/caocscar/workshops/blob/master/datashader/datashader.ipynb 5 | 6 | ## External Files 7 | Files are too large to be included. 8 | -------------------------------------------------------------------------------- /D3/exercise_1/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /D3/exercise_2/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /D3/exercise_1/solution/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /regex/README.md: -------------------------------------------------------------------------------- 1 | # Jupyter Notebook Viewer 2 | Regular Expression Part I 3 | https://nbviewer.jupyter.org/github/caocscar/workshops/blob/master/regex/Regex%20Tutorial%20P1.ipynb 4 | 5 | Regular Expression Part II 6 | https://nbviewer.jupyter.org/github/caocscar/workshops/blob/master/regex/Regex%20Tutorial%20P2.ipynb 7 | -------------------------------------------------------------------------------- /pandas/README.md: -------------------------------------------------------------------------------- 1 | # Intro to Pandas Workshop 2 | 3 | This workshop introduces the user to the world of `pandas` and includes common data wrangling methods. 4 | https://nbviewer.jupyter.org/github/caocscar/workshops/blob/master/pandas/pandas.ipynb 5 | 6 | ## External Files 7 | 8 | I've excluded them because the data files are large. [TODO] Include smaller version of files. 9 | -------------------------------------------------------------------------------- /network-analysis/README.md: -------------------------------------------------------------------------------- 1 | # Introduction to Network Analysis using igraph 2 | 3 | This 2 hr workshop introduces igraph for network analysis. 4 | 5 | https://nbviewer.jupyter.org/github/epmarie/network_workshop/blob/master/IntroNetworkAnalysis.ipynb 6 | 7 | My student, Ellen Paquet, prepared the workshop materials and it is located at https://github.com/epmarie/network_workshop 8 | 9 | -------------------------------------------------------------------------------- /D3/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /geopandas/README.md: -------------------------------------------------------------------------------- 1 | # GeoPandas 2 | This 2 hr workshop introduces `geopandas` and maybe some `fiona`, `shapely`, `rtree`, `pysal`, and `folium`. 3 | https://nbviewer.jupyter.org/github/caocscar/workshops/blob/master/geopandas/Geopandas.ipynb 4 | 5 | My student, Yiming Cai, prepared the workshop materials. 6 | 7 | ## External Files 8 | I've excluded them because the shapefiles are large. [TODO] Include smaller version of files. 9 | -------------------------------------------------------------------------------- /webscraping/README.md: -------------------------------------------------------------------------------- 1 | Web Scraping in Python Notebook 2 | https://nbviewer.jupyter.org/github/caocscar/workshops/blob/master/webscraping/webscraping_in_python.ipynb 3 | 4 | Google API Notebook 5 | https://nbviewer.jupyter.org/github/caocscar/workshops/blob/master/webscraping/Google.ipynb 6 | 7 | Twitter API Notebook 8 | https://nbviewer.jupyter.org/github/caocscar/workshops/blob/master/webscraping/Twitter.ipynb 9 | 10 | -------------------------------------------------------------------------------- /matlab/gen_more_pts.m: -------------------------------------------------------------------------------- 1 | function M2 = gen_more_pts(M) 2 | 3 | if size(M,1) > 20 4 | disp('You have too many points submitted. This will take forever!!!') 5 | M2 = M; 6 | return 7 | end 8 | M2 = []; 9 | for i = 1:size(M,1)-1 10 | x1 = M(i,1); 11 | x2 = M(i+1,1); 12 | y1 = M(i,2); 13 | y2 = M(i+1,2); 14 | x = [linspace(x1,x2,100)]'; 15 | y = [linspace(y1,y2,100)]'; 16 | M2 = [M2; x y]; 17 | end 18 | -------------------------------------------------------------------------------- /pytorch/README.md: -------------------------------------------------------------------------------- 1 | # PyTorch Workshop 2 | 3 | [**Regression Problem**](https://colab.research.google.com/github/caocscar/workshops/blob/master/pytorch/Workshop_Regression_Class.ipynb) 4 | 5 | [**Classification Problem**](https://colab.research.google.com/github/caocscar/workshops/blob/master/pytorch/Workshop_Classification.ipynb) 6 | 7 | [**Image Classification Problem**](https://colab.research.google.com/github/caocscar/workshops/blob/master/pytorch/Workshop_CNN.ipynb) 8 | -------------------------------------------------------------------------------- /pdf-data-extraction/README.md: -------------------------------------------------------------------------------- 1 | # Extracting Data from PDF 2 | 3 | There are 2 Jupyter Notebooks for this workshop (preferably done in this order): 4 | Tabula 5 | https://nbviewer.jupyter.org/github/caocscar/workshops/blob/master/pdf%20data%20extraction/tabula_workshop.ipynb 6 | 7 | PDF Miner 8 | https://nbviewer.jupyter.org/github/caocscar/workshops/blob/master/pdf%20data%20extraction/pdfminer_workshop.ipynb 9 | 10 | ## External Files 11 | The `workshop_registration.pdf` is missing for privacy reasons. [TODO] Find a new pdf to use 12 | 13 | 14 | -------------------------------------------------------------------------------- /thematic-maps/README.md: -------------------------------------------------------------------------------- 1 | # Create Thematic Maps with Python 2 | 3 | This 2 hr workshop demonstrates how to create thematic maps using Matplotlib. 4 | https://nbviewer.jupyter.org/github/caocscar/workshops/blob/master/thematic%20maps/Thematic%20Maps%20with%20Matplotlib.ipynb 5 | 6 | Thematic Type|Image 7 | :---:|--- 8 | Choropleth Map|![Choropleth](img/small/mi_choropleth.png) 9 | Dot Map|![Dot](img/small/snowmobile_crashes.png) 10 | Proportional Dot Map|![Proportional](img/small/top_20_crashes.png) 11 | Isopleth|![Isopleth](img/small/isopleth.png) 12 | -------------------------------------------------------------------------------- /NLP/README.md: -------------------------------------------------------------------------------- 1 | # Intro to Natural Language Processing 2 | 3 | #### Jupyter Notebook Viewer Version 4 | https://nbviewer.jupyter.org/github/caocscar/workshops/blob/master/NLP/IntroNLP.ipynb 5 | 6 | This 2.5 hr workshop covers the following Python packages: 7 | - `spaCy` (tagger, parser, named-entity recognition) 8 | - `textacy` (n-grams) 9 | - `gensim` (topic modelling) 10 | - `pyLDAvis` (visualization) 11 | - `textblob` (sentiment analysis) 12 | 13 | My student, Ellen Paquet, prepared the workshop materials. Her original repo is located at https://github.com/epmarie/IntroNLP 14 | 15 | 16 | -------------------------------------------------------------------------------- /D3/exercise_3/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 |
10 | 14 |
15 |
16 | 20 |
21 | 22 | 23 | -------------------------------------------------------------------------------- /D3/exercise_2/solution/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 |
10 | 14 |
15 |
16 | 20 |
21 | 22 | 23 | -------------------------------------------------------------------------------- /D3/exercise_3/solution/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 |
10 | 14 |
15 |
16 | 20 |
21 | 22 | 23 | -------------------------------------------------------------------------------- /D3/exercise_3/solution/solution_3.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 |
10 | 14 |
15 |
16 | 20 |
21 | 22 | 23 | -------------------------------------------------------------------------------- /D3/preprocessing/preprocessing.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Fri Oct 18 16:17:58 2019 4 | 5 | @author: caoa 6 | """ 7 | import pandas as pd 8 | 9 | pd.options.display.max_rows =20 10 | pd.options.display.max_columns = 20 11 | 12 | df = pd.read_csv('GL2018.TXT', header=None, usecols=[0,3,6,9,10]) 13 | df.columns = ['date','away','home','aRuns','hRuns'] 14 | 15 | #%% 16 | df['team'] = df.apply(lambda x: x['away'] if x['aRuns'] > x['hRuns'] else x['home'], axis=1) 17 | data = df[['date','team']] 18 | data.to_csv('daily_snapshot.csv', index=False) 19 | 20 | #%% Find first day where all teams have won at least one game 21 | data['date'] = pd.to_datetime(data['date'], format='%Y%m%d') 22 | daterange = pd.date_range('2018-03-29','2018-10-01',freq='D') 23 | for day in daterange: 24 | abc = data[data['date'] <= day] 25 | xyz = abc.team.value_counts() 26 | if xyz.shape[0] >= 30: 27 | print(day) 28 | break 29 | -------------------------------------------------------------------------------- /D3/urls.js: -------------------------------------------------------------------------------- 1 | const urls = { 2 | 'ARI':'Arizona_Diamondbacks', 3 | 'ATL':'Atlanta_Braves', 4 | 'SFN':'SanFrancisco_Giants', 5 | 'CHN':'Chicago_Cubs', 6 | 'NYN':'NewYork_Mets', 7 | 'MIL':'Milwaukee_Brewers', 8 | 'BAL':'Baltimore_Orioles', 9 | 'CHA':'Chicago_White_Sox', 10 | 'OAK':'Oakland_Athletics', 11 | 'SEA':'Seattle_Mariners', 12 | 'TBA':'TampaBay_Rays', 13 | 'HOU':'Houston_Astros', 14 | 'NYA':'NewYork_Yankees', 15 | 'PHI':'Philadelphia_Phillies', 16 | 'WAS':'Washington_Nationals', 17 | 'MIA':'Miami_Marlins', 18 | 'PIT':'Pittsburgh_Pirates', 19 | 'ANA':'LosAngeles_Angels', 20 | 'BOS':'Boston_Redsox', 21 | 'TEX':'Texas_Rangers', 22 | 'COL':'Colorado_Rockies', 23 | 'LAN':'LosAngeles_Dodgers', 24 | 'MIN':'Minnesota_Twins', 25 | 'CLE':'Cleveland_Indians', 26 | 'TOR':'Toronto_Blue_Jays', 27 | 'SLN':'StLouis_Cardinals', 28 | 'CIN':'Cincinnati_Reds', 29 | 'DET':'Detroit_Tigers', 30 | 'SDN':'SanDiego_Padres', 31 | 'KCA':'KansasCity_Royals', 32 | } -------------------------------------------------------------------------------- /D3/bar.css: -------------------------------------------------------------------------------- 1 | /* .chart { 2 | clip-path: url(#clip); 3 | } */ 4 | 5 | .bar { 6 | fill: orange; 7 | } 8 | 9 | .x.axis text { 10 | font: 15px sans-serif; 11 | } 12 | 13 | .axis path, .axis line { 14 | fill: none; 15 | stroke: '#000'; 16 | shape-rendering: crispEdges; 17 | } 18 | 19 | .label { 20 | text-anchor: middle; 21 | font: 20px helvetica; 22 | } 23 | 24 | #date { 25 | text-anchor: start; 26 | font: 20px helvetica; 27 | } 28 | 29 | .grid line { 30 | stroke: lightgrey; 31 | stroke-opacity: 0.7; 32 | shape-rendering: crispEdges; 33 | } 34 | 35 | .grid path { 36 | stroke-width: 0; 37 | } 38 | 39 | .team { 40 | fill: black; 41 | font: 14px sans-serif; 42 | text-anchor: end; 43 | font-weight: 600; 44 | } 45 | 46 | .barlabel{ 47 | fill: black; 48 | font: 14px sans-serif; 49 | text-anchor: left; 50 | font-weight: 600; 51 | } 52 | 53 | .logo { 54 | fill: black; 55 | font: 14px sans-serif; 56 | text-anchor: middle; 57 | } 58 | 59 | .divisions { 60 | stroke: black; 61 | stroke-width: 2; 62 | stroke-dasharray: 12; 63 | } -------------------------------------------------------------------------------- /pyspark/README.md: -------------------------------------------------------------------------------- 1 | # PySpark: DataFrames, Datasets, and SparkSQL 2 | [pyspark.md](pyspark.md) contains the markdown material for the PySpark workshop. 3 | 4 | # Scala: DataFrames, Datasets, and SparkSQL 5 | [scala.md](scala.md) contains the markdown material for the Scala workshop. 6 | 7 | # PySpark vs. Scala 8 | Here's an [article](https://www.pluralsight.com/blog/software-development/scala-vs-python) comparing the two of them. 9 | 10 | ## Using PySpark with the Twitter Decahose dataset on Cavium 11 | The github repo is located at https://github.com/caocscar/twitter-decahose-pyspark 12 | 13 | **Note**: You need to get permission to access the dataset first. More information available at: https://midas.umich.edu/research-datasets/ 14 | 15 | ## Cheat Sheets 16 | My github Hadoop cheat sheet 17 | https://github.com/caocscar/hadoopcheatsheet 18 | 19 | DataCamp's cheat sheet for PySpark DataFrames 20 | https://s3.amazonaws.com/assets.datacamp.com/blog_assets/PySpark_SQL_Cheat_Sheet_Python.pdf 21 | 22 | Edrukea's cheat sheet for PySpark RDDs 23 | https://www.edureka.co/blog/cheatsheets/pyspark-cheat-sheet-python/ 24 | 25 | -------------------------------------------------------------------------------- /python-intro/README.md: -------------------------------------------------------------------------------- 1 | # Introduction to Python 2 | 3 | There are no notes for this 6 hour workshop. I do it freestyle using a project-based learning approach and provide attendees with a beginner's cheat sheet. 4 | 5 | Exercises 6 | https://goo.gl/bw1J9L 7 | 8 | Mastermind Game Online 9 | http://www.webgamesonline.com/mastermind/ 10 | 11 | A good cheat sheet for beginners is located at 12 | http://ehmatthes.github.io/pcc/cheatsheets/README.html 13 | 14 | Here is a link for learning Python for programmers 15 | https://wiki.python.org/moin/BeginnersGuide/Programmers 16 | 17 | Here is a link for learning Python for non-programmers 18 | https://wiki.python.org/moin/BeginnersGuide/NonProgrammers 19 | 20 | ## Python Topics Covered 21 | ### Python Functions 22 | ``` 23 | input 24 | type 25 | from import 26 | random 27 | range 28 | print 29 | len 30 | zip 31 | id 32 | time 33 | ``` 34 | 35 | ### Data Types 36 | ``` 37 | int 38 | float 39 | string 40 | list 41 | tuple 42 | dictionary 43 | set 44 | ``` 45 | 46 | ### Control Flow 47 | ``` 48 | if elif else 49 | for 50 | while 51 | continue 52 | break 53 | pass 54 | ``` 55 | 56 | ### File I/O 57 | ``` 58 | with 59 | open 60 | write 61 | read 62 | readlines 63 | ``` 64 | 65 | ### Miscelleaneous 66 | comments 67 | list comprehension 68 | casting variables 69 | how to write a function 70 | integer division 71 | reference vs. copying variables 72 | banker's rounding 73 | -------------------------------------------------------------------------------- /sql/README.md: -------------------------------------------------------------------------------- 1 | # Intro to SQL 2 | Here is the [WORKSHOP SLIDE DECK](http://nbviewer.jupyter.org/format/slides/github/caocscar/workshops/blob/master/sql/SQLslides.ipynb#/). 3 | We'll be using the [w3schools website](https://www.w3schools.com/sql/) to write queries. It also is a good reference for SQL. 4 | 5 | --- 6 | # Miscellaneous Stuff 7 | 8 | ## Converting Jupyter Notebook into Slide Deck 9 | The following command will render your Jupyter Notebook into a **reveal.js** slide deck. 10 | 11 | `jupyter nbconvert SQLslides.ipynb --to slides --post serve` 12 | 13 | The `--post serve` command starts up a local server to host it. 14 | 15 | **Tip**: Make sure your Jupyter notebook is closed before running the command. 16 | 17 | ### Configuration Options 18 | More options available at https://nbconvert.readthedocs.io/en/latest/config_options.html 19 | 20 | ## How to Post Slide Deck Online 21 | 1. Go to http://nbviewer.jupyter.org 22 | 2. Enter url where the Jupyter Notebook file can be located. 23 | 3. Make sure **nbviewer** is in *slide mode* and not *notebook mode* among the icons in the top right. 24 | 25 | ## Contributors 26 | The slide deck was created originally by my student [Maggie Orton](https://github.com/margamo/intro-to-SQL) on March 14, 2017. 27 | 28 | And modified by my student [Kaitlin Cornwell](https://github.com/kaitcorn/intro-to-SQL) on March 16, 2018. 29 | 30 | And further modified by my student [Jessica Zhang](https://github.com/jezzhang/sqlworkshop) on January 31, 2020. 31 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CSCAR Workshops 2 | This is a list of some (but not all) CSCAR Workshops I've done (in no particular order). 3 | - [Introduction to Matlab](matlab) 4 | - [Introduction to Python](python-intro) 5 | - [Pandas](pandas) 6 | - [Introduction to SQL](sql) 7 | - [Intermediate SQL](sql-intermediate) 8 | - [SQLite](sqlite) 9 | - [Regular Expressions](regex) 10 | - [Natural Language Processing with Python](NLP) 11 | - [Network Analysis with igraph](network-analysis) 12 | - [SparkSQL and DataFrames with PySpark](pyspark) (Using PySpark with the [Twitter Decahose dataset on Cavium](https://github.com/caocscar/twitter-decahose-pyspark)) 13 | - [GeoPandas](geopandas) 14 | - [Geospatial Analysis with Python](geospatial-analysis) 15 | - [Working with Geographical Data and Parallel Computing on Flux](dotmap) 16 | - [Thematic Maps with Python](thematic-maps) 17 | - [Datashader](datashader) 18 | - [Google Fusion Tables](fusion-tables) (this product is no longer available) 19 | - [Web Scraping with Python](webscraping) 20 | - [Scraping HTML](https://nbviewer.jupyter.org/github/caocscar/workshops/blob/master/webscraping/webscraping_in_python.ipynb) 21 | - [Google and YouTube APIs](https://nbviewer.jupyter.org/github/caocscar/workshops/blob/master/webscraping/Google.ipynb) 22 | - [Twitter APIs](https://nbviewer.jupyter.org/github/caocscar/workshops/blob/master/webscraping/Twitter.ipynb) (instructions on how to set up a [developer account](https://github.com/caocscar/twitter-create-developer-account)) 23 | - [Extracting Data from PDFs](pdf-data-extraction) 24 | - [Flask](flask) 25 | - [Introduction to PyTorch](pytorch) 26 | - [Introduction to D3.js](D3) 27 | - explains code used to generate data viz located at https://d3-examples-caocscar.onrender.com/ -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Jupyter Notebooks 2 | webscraping/.ipynb_checkpoints/ 3 | .ipynb_checkpoints/ 4 | 5 | # Byte-compiled / optimized / DLL files 6 | __pycache__/ 7 | *.py[cod] 8 | *$py.class 9 | 10 | # C extensions 11 | *.so 12 | 13 | # Distribution / packaging 14 | .Python 15 | env/ 16 | build/ 17 | develop-eggs/ 18 | dist/ 19 | downloads/ 20 | eggs/ 21 | .eggs/ 22 | lib/ 23 | lib64/ 24 | parts/ 25 | sdist/ 26 | var/ 27 | wheels/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | 32 | # PyInstaller 33 | # Usually these files are written by a python script from a template 34 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 35 | *.manifest 36 | *.spec 37 | 38 | # Installer logs 39 | pip-log.txt 40 | pip-delete-this-directory.txt 41 | 42 | # Unit test / coverage reports 43 | htmlcov/ 44 | .tox/ 45 | .coverage 46 | .coverage.* 47 | .cache 48 | nosetests.xml 49 | coverage.xml 50 | *.cover 51 | .hypothesis/ 52 | 53 | # Translations 54 | *.mo 55 | *.pot 56 | 57 | # Django stuff: 58 | *.log 59 | local_settings.py 60 | 61 | # Flask stuff: 62 | instance/ 63 | .webassets-cache 64 | 65 | # Scrapy stuff: 66 | .scrapy 67 | 68 | # Sphinx documentation 69 | docs/_build/ 70 | 71 | # PyBuilder 72 | target/ 73 | 74 | # Jupyter Notebook 75 | .ipynb_checkpoints 76 | 77 | # pyenv 78 | .python-version 79 | 80 | # celery beat schedule file 81 | celerybeat-schedule 82 | 83 | # SageMath parsed files 84 | *.sage.py 85 | 86 | # dotenv 87 | .env 88 | 89 | # virtualenv 90 | .venv 91 | venv/ 92 | ENV/ 93 | 94 | # Spyder project settings 95 | .spyderproject 96 | .spyproject 97 | 98 | # Rope project settings 99 | .ropeproject 100 | 101 | # mkdocs documentation 102 | /site 103 | 104 | # mypy 105 | .mypy_cache/ 106 | -------------------------------------------------------------------------------- /matlab/letters.m: -------------------------------------------------------------------------------- 1 | A = [0 0; 2 | 1 4; 3 | 2 0; 4 | 1.5 2; 5 | 0.5 2]; 6 | B = [0 0; 7 | 0 4; 8 | 1.5 3.75; 9 | 1.5 2.25; 10 | 0.1 2; 11 | 1.5 1.75 12 | 1.5 0.25; 13 | 0 0]; 14 | C = [1.5 0; 15 | 0 0; 16 | 0 4; 17 | 1.5 4]; 18 | D = [0 0; 19 | 0 4; 20 | 1.5 3.75; 21 | 1.5 0.25; 22 | 0 0]; 23 | E = [2 0; 24 | 0 0; 25 | 0 2; 26 | 1 2; 27 | 0 2; 28 | 0 4; 29 | 2 4]; 30 | F = [0 0; 31 | 0 2; 32 | 1 2; 33 | 0 2; 34 | 0 4; 35 | 2 4]; 36 | G = [1.5 4; 37 | 0 4; 38 | 0 0; 39 | 1.5 0; 40 | 1.5 2; 41 | 1 2; 42 | 2 2]; 43 | H = [0 0; 44 | 0 4; 45 | 0 2; 46 | 2 2; 47 | 2 0; 48 | 2 4]; 49 | I = [0 0; 50 | 2 0; 51 | 1 0; 52 | 1 4; 53 | 0 4; 54 | 2 4]; 55 | J = [0 0.75; 56 | 0 0; 57 | 1.25 0; 58 | 1.25 4; 59 | 0.5 4; 60 | 2 4]; 61 | K = [0 0; 62 | 0 4; 63 | 0 2; 64 | 1.5 4; 65 | 0 2; 66 | 1.5 0]; 67 | L = [0 4; 68 | 0 0; 69 | 2 0]; 70 | M = [0 0; 71 | 0.25 4; 72 | 1 2; 73 | 1.75 4; 74 | 2 0]; 75 | N = [0 0; 76 | 0 4; 77 | 2 0; 78 | 2 4]; 79 | O = [0 0; 80 | 0 4; 81 | 1.5 4; 82 | 1.5 0; 83 | 0 0]; 84 | P = [0 0; 85 | 0 4; 86 | 1.5 4; 87 | 1.5 2; 88 | 0 2]; 89 | Q = [1.5 0.25; 90 | 0 0.25; 91 | 0 4; 92 | 1.5 4; 93 | 1.5 0.25; 94 | 1.75 0; 95 | 1.25 0.5]; 96 | R = [0 0; 97 | 0 4; 98 | 1.5 4; 99 | 1.5 2; 100 | 0 2; 101 | 1.5 0]; 102 | S = [0 0; 103 | 1.5 0; 104 | 1.5 2; 105 | 0 2; 106 | 0 4; 107 | 1.5 4]; 108 | T = [1 0; 109 | 1 4; 110 | 0 4; 111 | 2 4]; 112 | U = [0 4; 113 | 0 0; 114 | 2 0; 115 | 2 4]; 116 | V = [0 4; 117 | 1 0; 118 | 2 4]; 119 | W = [0 4; 120 | 0.25 0; 121 | 1 2; 122 | 1.75 0; 123 | 2 4]; 124 | X = [0 4; 125 | 2 0; 126 | 1 2; 127 | 2 4; 128 | 0 0]; 129 | Y = [0 0; 130 | 2 4; 131 | 1 2; 132 | 0 4]; 133 | Z = [2 0; 134 | 0 0; 135 | 2 4; 136 | 0 4]; 137 | alphabet = {A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P,Q,R,S,T,U,V,W,X,Y,Z}; 138 | clear A B C D E F G H I J K L M N O P Q R S T U V W X Y Z -------------------------------------------------------------------------------- /D3/exercise_1/exercise_1.js: -------------------------------------------------------------------------------- 1 | // set the dimensions and margins of the graph 2 | var outerWidth = 650; 3 | var outerHeight = 300; 4 | 5 | var margin = {top: 20, right: 20, bottom: 70, left: 100}, 6 | width = outerWidth - margin.left - margin.right - 20, 7 | height = outerHeight - margin.top - margin.bottom; 8 | 9 | // set the ranges 10 | var x = d3.scaleLinear() 11 | .range([0, width]); 12 | 13 | var y = d3.scaleBand() 14 | .range([height, 0]) 15 | .padding(0.33); 16 | 17 | var xAxis = d3.axisTop(x) 18 | .ticks(5) 19 | 20 | var yAxis = d3.axisLeft(y) 21 | .tickFormat('') 22 | 23 | // append the svg object to the body of the page 24 | // append a 'group' element to 'svg' 25 | // moves the 'group' element to the top left margin 26 | var svg = d3.select('body').append('svg') 27 | .attr("class", "chart") 28 | .attr("width", outerWidth) 29 | .attr("height", outerHeight) 30 | .append("g") 31 | .attr("transform", `translate(${margin.left},${margin.top})`); 32 | 33 | // data 34 | var data = [{'team':'Boston','value':100}, 35 | {'team':'Detroit','value':85}, 36 | {'team':'New York','value':80}, 37 | {'team':'Atlanta','value':75}, 38 | {'team':'Chicago','value':30}] 39 | 40 | // scale the range of the data in the domains 41 | x.domain([0, d3.max(data, d => d.value)]) 42 | y.domain(data.map(d => d.team)); 43 | 44 | // append the rectangles for the bar chart 45 | var bar = svg.selectAll(".bar") 46 | .data(data) 47 | .join("g") 48 | .attr("class","bar") 49 | 50 | var rect = bar.append('rect') 51 | .attr("width", d => x(d.value)) 52 | .attr("y", d => y(d.team)) 53 | .attr("height", y.bandwidth()) 54 | .attr("x", 0) 55 | .style('fill', d => d3.interpolatePurples(d.value/100)) 56 | 57 | // add the x Axis 58 | svg.append("g") 59 | .attr("transform", `translate(0, ${height})`) 60 | .call(d3.axisBottom(x)); 61 | 62 | // add the y Axis 63 | svg.append("g") 64 | .call(d3.axisLeft(y)); 65 | 66 | // add chart labels 67 | labels = svg.append('g') 68 | .attr('class', 'label') 69 | 70 | // x label 71 | labels.append('text') 72 | .attr('transform', `translate(${width/2},250)`) 73 | .text('Wins') 74 | 75 | // y label 76 | ylabel = labels.append('text') 77 | .attr('transform', `translate(-65,${height/2}) rotate(-90)`) 78 | .text('Teams') 79 | 80 | barLabels = bar.append('text') 81 | .attr('class', 'barlabel') 82 | .attr('x', d => x(d.value) - 20) 83 | .attr('y', d => y(d.team) + (y.bandwidth()/2) + 4) 84 | .text(d => d.value) 85 | .style('fill', 'black') 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | -------------------------------------------------------------------------------- /D3/exercise_2/exercise_2.js: -------------------------------------------------------------------------------- 1 | // set the dimensions and margins of the graph 2 | var outerWidth = 960; 3 | var outerHeight = 500; 4 | 5 | var margin = {top: 50, right: 20, bottom: 80, left: 80}, 6 | width = outerWidth - margin.left - margin.right, 7 | height = outerHeight - margin.top - margin.bottom; 8 | 9 | // set the ranges 10 | var x = d3.scaleBand() 11 | .range([0, width]) 12 | .padding(0.33); 13 | 14 | var y= d3.scaleLinear() 15 | .range([height, 0]); 16 | 17 | 18 | var xAxis = d3.axisTop(x) 19 | .ticks(5) 20 | 21 | var yAxis = d3.axisLeft(y) 22 | .tickFormat('') 23 | 24 | // append the svg object to the body of the page 25 | // append a 'group' element to 'svg' 26 | // moves the 'group' element to the top left margin 27 | var svg = d3.select('body').append('svg') 28 | .attr("class", "chart") 29 | .attr("width", outerWidth) 30 | .attr("height", outerHeight) 31 | .append("g") 32 | .attr("transform", `translate(${margin.left},${margin.top})`); 33 | 34 | // data 35 | var data = [{'team':'Boston','value':100}, 36 | {'team':'Detroit','value':85}, 37 | {'team':'New York','value':80}, 38 | {'team':'Atlanta','value':75}, 39 | {'team':'Chicago','value':30}] 40 | 41 | 42 | // scale the range of the data in the domains 43 | x.domain(data.map(d => d.team)); 44 | y.domain([0, d3.max(data, d => d.value)]) 45 | 46 | 47 | // append the rectangles for the bar chart 48 | var bar = svg.selectAll(".bar") 49 | .data(data) 50 | .join("g") 51 | .attr("class","bar") 52 | 53 | 54 | 55 | var rect = bar.append('rect') 56 | .attr("height", d => height - y(d.value)) 57 | .attr("x", d => x(d.team)) 58 | .attr("width", x.bandwidth()) 59 | .attr("y", d => y(d.value)) 60 | .style('fill', d => d3.interpolatePurples(d.value/100)); 61 | 62 | // add the x Axis 63 | svg.append("g") 64 | .attr("transform", `translate(0, ${height})`) 65 | .call(d3.axisBottom(x)); 66 | 67 | // add the y Axis 68 | svg.append("g") 69 | .call(d3.axisLeft(y)); 70 | 71 | // add chart labels 72 | labels = svg.append('g') 73 | .attr('class', 'label') 74 | 75 | // x label 76 | labels.append('text') 77 | .attr('transform', `translate(${width/2},450)`) 78 | .text('Teams') 79 | 80 | // y label 81 | ylabel = labels.append('text') 82 | .attr('transform', `translate(-45,${height/2}) rotate(-90)`) 83 | .text('Wins') 84 | 85 | barLabels = bar.append('text') 86 | .attr('class', 'barlabel') 87 | .attr('x', d => x(d.team) + (x.bandwidth()/2)) 88 | .attr('y', d => y(d.value) - 15) 89 | .text(d => d.value) 90 | .style('fill', 'black') 91 | -------------------------------------------------------------------------------- /D3/exercise_1/solution/solution_1.js: -------------------------------------------------------------------------------- 1 | // set the dimensions and margins of the graph 2 | var outerWidth = 960; 3 | var outerHeight = 500; 4 | 5 | var margin = {top: 50, right: 20, bottom: 80, left: 80}, 6 | width = outerWidth - margin.left - margin.right, 7 | height = outerHeight - margin.top - margin.bottom; 8 | 9 | // set the ranges 10 | var x = d3.scaleBand() 11 | .range([0, width]) 12 | .padding(0.33); 13 | 14 | var y= d3.scaleLinear() 15 | .range([height, 0]); 16 | 17 | 18 | var xAxis = d3.axisTop(x) 19 | .ticks(5) 20 | 21 | var yAxis = d3.axisLeft(y) 22 | .tickFormat('') 23 | 24 | // append the svg object to the body of the page 25 | // append a 'group' element to 'svg' 26 | // moves the 'group' element to the top left margin 27 | var svg = d3.select('body').append('svg') 28 | .attr("class", "chart") 29 | .attr("width", outerWidth) 30 | .attr("height", outerHeight) 31 | .append("g") 32 | .attr("transform", `translate(${margin.left},${margin.top})`); 33 | 34 | // data 35 | var data = [{'team':'Boston','value':100}, 36 | {'team':'Detroit','value':85}, 37 | {'team':'New York','value':80}, 38 | {'team':'Atlanta','value':75}, 39 | {'team':'Chicago','value':30}] 40 | 41 | 42 | // scale the range of the data in the domains 43 | x.domain(data.map(d => d.team)); 44 | y.domain([0, d3.max(data, d => d.value)]) 45 | 46 | 47 | // append the rectangles for the bar chart 48 | var bar = svg.selectAll(".bar") 49 | .data(data) 50 | .join("g") 51 | .attr("class","bar") 52 | 53 | 54 | 55 | var rect = bar.append('rect') 56 | .attr("height", d => height - y(d.value)) 57 | .attr("x", d => x(d.team)) 58 | .attr("width", x.bandwidth()) 59 | .attr("y", d => y(d.value)) 60 | .style('fill', d => d3.interpolatePurples(d.value/100)); 61 | 62 | // add the x Axis 63 | svg.append("g") 64 | .attr("transform", `translate(0, ${height})`) 65 | .call(d3.axisBottom(x)); 66 | 67 | // add the y Axis 68 | svg.append("g") 69 | .call(d3.axisLeft(y)); 70 | 71 | // add chart labels 72 | labels = svg.append('g') 73 | .attr('class', 'label') 74 | 75 | // x label 76 | labels.append('text') 77 | .attr('transform', `translate(${width/2},450)`) 78 | .text('Teams') 79 | 80 | // y label 81 | ylabel = labels.append('text') 82 | .attr('transform', `translate(-45,${height/2}) rotate(-90)`) 83 | .text('Wins') 84 | 85 | barLabels = bar.append('text') 86 | .attr('class', 'barlabel') 87 | .attr('x', d => x(d.team) + (x.bandwidth()/2)) 88 | .attr('y', d => y(d.value) - 15) 89 | .text(d => d.value) 90 | .style('fill', 'black') 91 | -------------------------------------------------------------------------------- /NLP/img/pipeline.svg: -------------------------------------------------------------------------------- 1 | 2 | 7 | 8 | 9 | Doc 10 | 11 | 12 | 13 | Text 14 | 15 | 16 | 17 | nlp 18 | 19 | tokenizer 20 | 21 | tagger 22 | 23 | 24 | 25 | parser 26 | 27 | ner 28 | 29 | ... 30 | 31 | -------------------------------------------------------------------------------- /pyspark/sample.csv: -------------------------------------------------------------------------------- 1 | RxDevice,FileId,TxDevice,Gentime,TxRandom,MsgCount,DSecond,Latitude,Longitude,Elevation,Speed,Heading,Ax,Ay,Az,Yawrate,PathCount,RadiusOfCurve,Confidence 2 | 30,950898,30,286304909792863,0,29,3700,42.285103,-83.813293,253.8,0.0,26.799999,0.44999999,0.30000001,-10.0,1.22,6,3276.7,100 3 | 30,950898,30,286304909892863,0,30,3800,42.285103,-83.813293,253.8,0.40000001,26.799999,0.38,0.22,-10.0,1.46,6,3276.7,100 4 | 30,950898,30,286304909992863,0,31,3900,42.285103,-83.813293,253.7,0.5,26.799999,0.38,0.22,-10.0,1.46,6,3276.7,100 5 | 30,950898,30,286304910092861,0,32,4000,42.285103,-83.813293,253.7,0.62,26.799999,0.52999997,0.30000001,-10.0,1.95,6,3276.7,100 6 | 30,950898,30,286304910193010,0,33,4100,42.285107,-83.813293,253.7,0.72000003,26.799999,0.69,0.38,-10.0,2.4400001,6,3276.7,100 7 | 30,950898,30,286304910292864,0,34,4200,42.285107,-83.813293,253.7,0.83999997,26.799999,0.83999997,0.30000001,-10.0,2.4400001,6,3276.7,100 8 | 30,950898,30,286304910392995,0,35,4300,42.285107,-83.813293,253.7,0.98000002,26.799999,1.0,0.38,-10.0,3.1700001,6,3276.7,100 9 | 30,950898,30,286304910492990,0,36,4400,42.285107,-83.813293,253.7,1.14,26.799999,1.23,0.44999999,-10.0,3.9000001,6,16.799999,100 10 | 30,950898,30,286304910593130,0,37,4500,42.285107,-83.813293,253.7,1.28,26.799999,1.3099999,0.44999999,-10.0,4.1500001,6,17.700001,100 11 | 30,950898,30,286304910693004,0,38,4600,42.28511,-83.813293,253.7,1.4400001,29.35,1.39,0.52999997,-10.0,4.8800001,6,18.299999,78 12 | 30,950898,30,286304910792863,0,39,4700,42.28511,-83.813286,253.7,1.6,29.924999,1.23,0.52999997,-10.0,5.6100001,6,18.700001,52 13 | 30,950898,30,286304910892982,0,40,4800,42.28511,-83.813286,253.7,1.78,32.299999,1.39,0.69,-10.0,6.3400002,6,18.9,45 14 | 30,950898,30,286304910992863,0,41,4900,42.285114,-83.813286,253.7,1.98,32.487499,1.39,0.69,-10.0,6.8200002,6,19.0,42 15 | 30,950898,30,286304911092864,0,42,5000,42.285114,-83.813286,253.7,2.1800001,33.637501,1.7,0.75999999,-10.0,7.8000002,6,18.9,39 16 | 30,950898,30,286304911192872,0,43,5100,42.285118,-83.813278,253.7,2.3599999,34.450001,1.7,0.75999999,-10.0,8.3000002,6,18.799999,38 17 | 30,950898,30,286304911292839,0,44,5200,42.285118,-83.813278,253.7,2.5599999,35.150002,1.77,0.92000002,-10.0,9.2700005,6,18.6,37 18 | 30,950898,30,286304911392900,0,45,5300,42.285122,-83.813278,253.7,2.76,35.137501,1.7,1.0,-10.0,10.0,6,18.4,36 19 | 30,950898,30,286304911492863,0,46,5400,42.285122,-83.813278,253.7,2.9400001,36.25,1.54,1.0,-10.0,10.73,6,18.1,36 20 | 30,950898,30,286304911592863,0,47,5500,42.285126,-83.813271,253.7,3.1600001,36.849998,1.46,1.3099999,-10.0,12.19,6,17.9,33 21 | 30,950898,30,286304911692841,0,48,5600,42.285126,-83.813271,253.60001,3.3599999,38.637501,1.3099999,1.54,-10.0,13.67,6,17.5,30 22 | 30,950898,30,286304911792896,0,49,5700,42.28513,-83.813271,253.60001,3.5,40.5,1.3099999,1.77,-10.0,14.64,6,17.200001,30 23 | 30,950898,30,286304911892840,0,50,5800,42.285133,-83.813263,253.60001,3.6800001,41.799999,1.15,1.7,-10.0,16.110001,6,16.799999,28 24 | 30,950898,30,286304911992904,0,51,5900,42.285133,-83.813263,253.60001,3.8800001,44.275002,1.15,1.7,-10.0,17.07,7,16.4,28 25 | 30,950898,30,286304912092843,0,52,6000,42.285137,-83.813255,253.60001,4.04,45.775002,1.23,1.46,-10.0,17.32,7,16.0,30 26 | 30,950898,30,286304912192874,0,53,6100,42.285137,-83.813255,253.60001,4.2199998,47.075001,1.15,2.0799999,-10.0,18.299999,7,15.7,31 27 | -------------------------------------------------------------------------------- /fusion-tables/Seattle_Parks.csv: -------------------------------------------------------------------------------- 1 | "PMAID","LocID","ZIP.Code","address","icon" 2 | 281,2545,98119,"1200 W Howe St Seattle 98119","ylw_circle" 3 | 4159,2387,98144,"2821 12TH Ave S Seattle 98144","orange_diamond" 4 | 4467,2382,98122,"564 12th Ave Seattle 98122","orange_diamond" 5 | 4010,2546,98107,"4400 14th Ave NW Seattle 98107","ylw_circle" 6 | 296,296,98112,"3001 E Madison St Seattle 98112","grn_stars" 7 | 1000001,0,98199,"32nd Ave W Seattle 98199","donut" 8 | 3158,2378,98117,"606 NW 76th St Seattle 98117","orange_diamond" 9 | 4404,2533,98103,"723 N 35th St Seattle 98103","ylw_circle" 10 | 1000002,0,98118,"Lake Washington Blvd S & S Adams St Seattle 98118","donut" 11 | 244,1886,98125,"12526 27th Ave NE Seattle 98125","orange_diamond" 12 | 445,1888,98116,"1702 Alki Ave SW Seattle 98116","orange_diamond" 13 | 446,1049,98116,"5817 SW Lander St Seattle 98116","ltblu_square" 14 | 3914,1891,98122,"1504 34TH Ave Seattle 98122","orange_diamond" 15 | 426,1892,98144,"2000 Martin Luther King Jr Way S Seattle 98144","orange_diamond" 16 | 2927,1894,98116,"4000 Beach Dr SW Seattle 98116","orange_diamond" 17 | 1556,1898,98199,"3431 Arapahoe Pl W Seattle 98199","orange_diamond" 18 | 485,1907,98146,"4120 Arroyo Dr SW Seattle 98146","orange_diamond" 19 | 4081,1908,98118,"8702 Seward Park Ave S Seattle 98118","orange_diamond" 20 | 4243,2541,98144,"1501 21st Ave S Seattle 98144","ylw_circle" 21 | 241,2552,98103,"4020 Fremont Ave N Seattle 98103","ylw_circle" 22 | 4006,1910,98102,"2548 Delmar Dr E Seattle 98102","orange_diamond" 23 | 2840,1911,98117,"8347 14th Ave NW Seattle 98117","orange_diamond" 24 | 4278,1913,98107,"5701 22nd Ave NW Seattle 98107","orange_diamond" 25 | 4428,1914,98107,"1702 nw 62nd St Seattle 98107","orange_diamond" 26 | 497,1108,98107,"2644 NW 60th St Seattle 98107","ltblu_square" 27 | 4073,2553,98115,"7802 Banner Way NE Seattle 98115","ylw_circle" 28 | 3703,1919,98116,"6425 SW Admiral Way Seattle 98116","orange_diamond" 29 | 303,1110,98199,"2614 24th Ave W Seattle 98199","ltblu_square" 30 | 1000342,0,98119,"3rd Ave W & W Prospect St Seattle 98119","donut" 31 | 400,1074,98144,"1902 13th Ave S Seattle 98144","ltblu_square" 32 | 3119,1923,98104,"1110 S Dearborn St Seattle 98104","orange_diamond" 33 | 4481,1075,98125,"5th Ave NE & NE 103rd St Seattle 98125","ltblu_square" 34 | 4028,0,98105,"5809 15th Ave NE Seattle 98105","donut" 35 | 440,1925,98118,"8650 55th Ave S Seattle 98118","orange_diamond" 36 | 4472,2543,98121,"1st to 5th Ave on Bell St Seattle 98121","ylw_circle" 37 | 4022,1182,98102,"Bellevue Ave E & Bellevue Pl E Seattle 98102","ltblu_square" 38 | 4415,1186,98121,"2512 Elliott Ave Seattle 98121","ltblu_square" 39 | 346,1290,98102,"703 Belmont Pl E Seattle 98102","ltblu_square" 40 | 447,1188,98126,"3600 SW Admiral Way Seattle 98126","ltblu_square" 41 | 475,1189,98105,"3659 42nd Ave NE Seattle 98105","ltblu_square" 42 | 436,1077,98118,"9320 38th Ave S Seattle 98118","ltblu_square" 43 | 4245,1190,98122,"1401 23rd Ave S Seattle 98122","ltblu_square" 44 | 253,1191,98107,"5420 22nd Ave NW Seattle 98107","ltblu_square" 45 | 304,1193,98109,"1215 5th Ave N Seattle 98109","ltblu_square" 46 | 288,1113,98133,"13035 Linden Ave N Seattle 98133","ltblu_square" 47 | 4450,1194,98133,"14201 Linden Ave N Seattle 98133","ltblu_square" 48 | 3907,1196,98119,"513 W Olympic Pl Seattle 98119","ltblu_square" 49 | 4418,1198,98144,"1520 26th Ave S Seattle 98144","ltblu_square" 50 | 238,1199,98117,"1851 NW Blue Ridge Dr Seattle 98117","ltblu_square" 51 | 239,1200,98117,"Radford Ave NW & NW Milford Way Seattle 98117","ltblu_square" 52 | -------------------------------------------------------------------------------- /D3/exercise_3/exercise_3.js: -------------------------------------------------------------------------------- 1 | // set the dimensions and margins of the graph 2 | var outerWidth = 960; 3 | var outerHeight = 500; 4 | 5 | var margin = {top: 50, right: 20, bottom: 80, left: 80}, 6 | width = outerWidth - margin.left - margin.right, 7 | height = outerHeight - margin.top - margin.bottom; 8 | 9 | // set the ranges 10 | var y= d3.scaleLinear() 11 | .range([height, 0]); 12 | 13 | var x = d3.scaleBand() 14 | .range([0, width]) 15 | .padding(0.33); 16 | 17 | var xAxis = d3.axisTop(x) 18 | .ticks(5) 19 | 20 | var yAxis = d3.axisLeft(y) 21 | .tickFormat('') 22 | 23 | // append the svg object to the body of the page 24 | // append a 'group' element to 'svg' 25 | // moves the 'group' element to the top left margin 26 | var svg = d3.select('body').append('svg') 27 | .attr("class", "chart") 28 | .attr("width", outerWidth) 29 | .attr("height", outerHeight) 30 | .append("g") 31 | .attr("transform", `translate(${margin.left},${margin.top})`); 32 | 33 | // data 34 | var data = [{'team':'Boston','value':100}, 35 | {'team':'Detroit','value':85}, 36 | {'team':'New York','value':80}, 37 | {'team':'Atlanta','value':75}, 38 | {'team':'Chicago','value':30}] 39 | 40 | 41 | // scale the range of the data in the domains 42 | y.domain([0, d3.max(data, d => d.value)]) 43 | x.domain(data.map(d => d.team)); 44 | 45 | 46 | 47 | // append the rectangles for the bar chart 48 | var bar = svg.selectAll(".bar") 49 | .data(data) 50 | .join("g") 51 | .attr("class","bar") 52 | 53 | 54 | var rect = bar.append('rect') 55 | .attr("height", d => height - y(d.value)) 56 | .attr("x", d => x(d.team)) 57 | .attr("width", x.bandwidth()) 58 | .attr("y", d => y(d.value)) 59 | .style('fill', d => d3.interpolatePurples(d.value/100)); 60 | 61 | 62 | // add the x Axis 63 | svg.append("g") 64 | .attr('class', 'xaxis') 65 | .attr("transform", `translate(0, ${height})`) 66 | .call(d3.axisBottom(x)); 67 | 68 | // add the y Axis 69 | svg.append("g") 70 | .call(d3.axisLeft(y)); 71 | 72 | // add chart labels 73 | labels = svg.append('g') 74 | .attr('class', 'label') 75 | 76 | // x label 77 | labels.append('text') 78 | .attr('transform', `translate(${width/2},450)`) 79 | .text('Teams') 80 | 81 | // y label 82 | ylabel = labels.append('text') 83 | .attr('transform', `translate(-45,${height/2}) rotate(-90)`) 84 | .text('Wins') 85 | 86 | barLabels = bar.append('text') 87 | .attr('class', 'barlabel') 88 | .attr('x', d => x(d.team) + (x.bandwidth()/2)) 89 | .attr('y', d => y(d.value) - 15) 90 | .text(d => d.value) 91 | .style('fill', 'black') 92 | 93 | 94 | function updateAlpha() { 95 | 96 | x.domain((data.map(d => d.team)).sort()); 97 | 98 | bar.selectAll('rect') 99 | .attr("x", d => x(d.team)) 100 | 101 | svg.select(".xaxis") 102 | .call(d3.axisBottom(x)); 103 | 104 | 105 | bar.selectAll('.barlabel') 106 | .attr('x', d => x(d.team) + (x.bandwidth()/2)) 107 | 108 | } 109 | 110 | 111 | function updateNum() { 112 | 113 | data.sort((a,b) => d3.ascending(a.value, b.value)) 114 | 115 | x.domain(data.map(d => d.team)); 116 | 117 | bar.selectAll('rect') 118 | .attr("x", d => x(d.team)) 119 | 120 | svg.select(".xaxis") 121 | .call(d3.axisBottom(x)); 122 | 123 | bar.selectAll('.barlabel') 124 | .attr('x', d => x(d.team) + (x.bandwidth()/2)) 125 | 126 | } 127 | -------------------------------------------------------------------------------- /D3/exercise_2/solution/solution_2.js: -------------------------------------------------------------------------------- 1 | // set the dimensions and margins of the graph 2 | var outerWidth = 960; 3 | var outerHeight = 500; 4 | 5 | var margin = {top: 50, right: 20, bottom: 80, left: 80}, 6 | width = outerWidth - margin.left - margin.right, 7 | height = outerHeight - margin.top - margin.bottom; 8 | 9 | // set the ranges 10 | var x = d3.scaleBand() 11 | .range([0, width]) 12 | .padding(0.33); 13 | 14 | var y= d3.scaleLinear() 15 | .range([height, 0]); 16 | 17 | 18 | var xAxis = d3.axisTop(x) 19 | .ticks(5) 20 | 21 | var yAxis = d3.axisLeft(y) 22 | .tickFormat('') 23 | 24 | // append the svg object to the body of the page 25 | // append a 'group' element to 'svg' 26 | // moves the 'group' element to the top left margin 27 | var svg = d3.select('body').append('svg') 28 | .attr("class", "chart") 29 | .attr("width", outerWidth) 30 | .attr("height", outerHeight) 31 | .append("g") 32 | .attr("transform", `translate(${margin.left},${margin.top})`); 33 | 34 | // data 35 | var data = [{'team':'Boston','value':100}, 36 | {'team':'Detroit','value':85}, 37 | {'team':'New York','value':80}, 38 | {'team':'Atlanta','value':75}, 39 | {'team':'Chicago','value':30}] 40 | 41 | 42 | // scale the range of the data in the domains 43 | x.domain(data.map(d => d.team)); 44 | y.domain([0, d3.max(data, d => d.value)]) 45 | 46 | 47 | // append the rectangles for the bar chart 48 | var bar = svg.selectAll(".bar") 49 | .data(data) 50 | .join("g") 51 | .attr("class","bar") 52 | 53 | 54 | var rect = bar.append('rect') 55 | .attr("height", d => height - y(d.value)) 56 | .attr("x", d => x(d.team)) 57 | .attr("width", x.bandwidth()) 58 | .attr("y", d => y(d.value)) 59 | .style('fill', d => d3.interpolatePurples(d.value/100)); 60 | 61 | 62 | // add the x Axis 63 | svg.append("g") 64 | .attr('class', 'xaxis') 65 | .attr("transform", `translate(0, ${height})`) 66 | .call(d3.axisBottom(x)); 67 | 68 | // add the y Axis 69 | svg.append("g") 70 | .call(d3.axisLeft(y)); 71 | 72 | // add chart labels 73 | labels = svg.append('g') 74 | .attr('class', 'label') 75 | 76 | // x label 77 | labels.append('text') 78 | .attr('transform', `translate(${width/2},450)`) 79 | .text('Teams') 80 | 81 | // y label 82 | ylabel = labels.append('text') 83 | .attr('transform', `translate(-45,${height/2}) rotate(-90)`) 84 | .text('Wins') 85 | 86 | barLabels = bar.append('text') 87 | .attr('class', 'barlabel') 88 | .attr('x', d => x(d.team) + (x.bandwidth()/2)) 89 | .attr('y', d => y(d.value) - 15) 90 | .text(d => d.value) 91 | .style('fill', 'black') 92 | 93 | 94 | function updateAlpha() { 95 | 96 | x.domain((data.map(d => d.team)).sort()); 97 | 98 | bar.selectAll('rect') 99 | .attr("x", d => x(d.team)) 100 | 101 | svg.select(".xaxis") 102 | .call(d3.axisBottom(x)); 103 | 104 | 105 | bar.selectAll('.barlabel') 106 | .attr('x', d => x(d.team) + (x.bandwidth()/2)) 107 | 108 | 109 | 110 | } 111 | 112 | function updateNum() { 113 | 114 | data.sort((a,b) => d3.ascending(a.value, b.value)) 115 | 116 | x.domain(data.map(d => d.team)); 117 | 118 | bar.selectAll('rect') 119 | .attr("x", d => x(d.team)) 120 | 121 | svg.select(".xaxis") 122 | .call(d3.axisBottom(x)); 123 | 124 | bar.selectAll('.barlabel') 125 | .attr('x', d => x(d.team) + (x.bandwidth()/2)) 126 | 127 | } 128 | -------------------------------------------------------------------------------- /thematic-maps/snow_crashes.csv: -------------------------------------------------------------------------------- 1 | County January February March April May June July August September October November December Total 2 | Alcona 7 7 0 1 0 0 0 0 0 0 5 1 21 3 | Alger 8 17 5 0 0 0 0 0 0 2 4 1 37 4 | Allegan 206 169 6 0 0 0 0 0 0 1 30 17 429 5 | Alpena 16 20 1 0 0 0 0 0 0 1 9 5 52 6 | Antrim 30 24 10 0 0 0 0 0 0 3 5 3 75 7 | Arenac 11 5 1 3 0 0 0 0 0 0 7 3 30 8 | Baraga 4 4 3 1 0 0 0 0 0 0 3 7 22 9 | Barry 39 25 2 0 0 0 0 0 0 0 13 13 92 10 | Bay 83 41 24 0 1 0 1 0 0 0 22 16 188 11 | Benzie 11 26 3 0 0 0 0 0 0 0 9 1 50 12 | Berrien 282 432 33 0 0 0 0 0 0 0 62 41 850 13 | Branch 37 46 5 0 0 0 0 0 0 0 26 29 143 14 | Calhoun 195 106 7 0 0 0 0 0 0 0 54 74 436 15 | Cass 39 69 7 0 0 0 0 0 0 0 17 18 150 16 | Charlevoix 15 8 1 0 0 0 0 0 0 0 5 4 33 17 | Cheboygan 11 5 9 0 0 0 0 1 0 4 3 5 38 18 | Chippewa 47 27 21 1 0 0 0 0 0 0 7 6 109 19 | Clare 41 22 4 3 0 0 0 0 0 0 9 12 91 20 | Clinton 52 48 8 0 0 0 0 0 0 0 25 21 154 21 | Crawford 25 14 4 0 0 0 0 0 0 1 17 3 64 22 | Delta 12 18 8 2 0 0 0 0 0 0 6 2 48 23 | Dickinson 17 8 4 7 0 0 0 0 0 0 2 3 41 24 | Eaton 87 74 5 0 0 0 0 0 0 0 37 36 239 25 | Emmet 12 11 5 0 0 0 0 0 0 1 8 8 45 26 | Genesee 326 167 61 1 0 0 0 0 0 2 123 120 800 27 | Gladwin 8 9 1 0 0 0 0 0 0 0 8 1 27 28 | Gogebic 8 16 2 0 0 0 0 0 0 0 11 9 46 29 | Grand Traverse 139 201 18 3 0 0 0 0 0 0 33 39 433 30 | Gratiot 33 25 6 0 0 0 0 0 0 0 13 0 77 31 | Hillsdale 35 39 2 0 0 0 0 0 0 0 34 8 118 32 | Houghton 42 47 19 6 0 0 0 0 0 0 16 37 167 33 | Huron 32 25 9 2 0 0 0 0 0 0 11 1 80 34 | Ingham 241 143 32 2 0 0 0 0 1 0 100 84 603 35 | Ionia 62 54 6 1 0 0 0 0 0 0 12 9 144 36 | Iosco 11 8 2 1 0 0 0 0 0 0 7 10 39 37 | Iron 2 1 0 2 0 0 0 0 0 0 3 2 10 38 | Isabella 86 55 12 2 0 0 0 0 0 1 29 5 190 39 | Jackson 201 81 14 0 0 0 0 0 0 0 91 36 423 40 | Kalamazoo 350 261 12 2 0 0 0 0 0 2 59 131 817 41 | Kalkaska 36 12 1 0 0 0 0 0 0 2 6 3 60 42 | Kent 764 519 68 8 0 0 0 0 0 1 126 226 1,712 43 | Keweenaw 6 2 0 1 0 0 0 0 0 0 1 2 12 44 | Lake 11 8 2 0 0 0 0 0 0 0 5 7 33 45 | Lapeer 78 50 33 3 0 0 0 0 0 0 50 18 232 46 | Leelanau 14 18 4 0 0 0 0 0 0 0 8 6 50 47 | Lenawee 47 47 10 0 0 0 0 0 0 0 29 12 145 48 | Livingston 159 108 19 2 0 1 0 0 0 4 102 47 442 49 | Luce 6 5 6 0 0 0 0 0 0 0 5 5 27 50 | Mackinac 19 15 9 7 0 0 0 0 0 0 1 3 54 51 | Macomb 444 355 112 1 0 0 0 0 0 0 102 60 1,074 52 | Manistee 34 30 5 0 0 0 0 0 0 0 6 12 87 53 | Marquette 51 110 19 15 0 0 0 0 0 4 23 31 253 54 | Mason 58 62 10 0 0 0 0 0 0 0 7 4 141 55 | Mecosta 36 45 5 0 0 0 0 0 0 0 13 12 111 56 | Menominee 6 4 8 1 0 0 0 0 0 0 2 2 23 57 | Midland 56 22 12 2 0 0 0 0 0 1 15 8 116 58 | Missaukee 14 9 0 1 0 0 0 0 0 2 11 6 43 59 | Monroe 109 128 18 0 0 0 0 0 0 0 15 16 286 60 | Montcalm 50 35 3 2 0 0 0 0 0 0 19 8 117 61 | Montmorency 9 1 0 1 0 0 0 0 0 0 4 3 18 62 | Muskegon 249 287 13 0 0 0 0 0 0 0 23 7 579 63 | Newaygo 49 28 5 0 0 0 0 0 0 0 18 4 104 64 | Oakland 863 633 193 10 1 0 0 0 0 0 367 220 2,287 65 | Oceana 36 39 5 0 0 0 0 0 0 0 6 1 87 66 | Ogemaw 9 19 1 3 0 0 0 0 0 0 11 7 50 67 | Ontonagon 7 5 6 0 0 0 0 0 0 0 6 12 36 68 | Osceola 38 16 2 2 0 0 0 0 0 0 10 13 81 69 | Oscoda 8 3 1 0 0 0 0 0 0 0 7 1 20 70 | Otsego 55 12 9 4 0 0 0 0 0 0 25 9 114 71 | Ottawa 399 388 16 1 0 0 0 0 0 0 46 33 883 72 | Presque Isle 17 10 2 1 0 0 0 0 0 2 1 3 36 73 | Roscommon 14 8 4 5 0 0 0 0 0 1 11 7 50 74 | Saginaw 150 85 39 7 0 0 0 0 0 0 49 25 355 75 | St. Clair 93 65 36 3 0 0 0 0 0 1 23 23 244 76 | St. Joseph 39 45 4 0 0 0 0 0 0 0 20 13 121 77 | Sanilac 32 17 14 1 0 0 0 0 0 0 13 5 82 78 | Schoolcraft 13 11 0 2 0 0 0 0 0 1 1 1 29 79 | Shiawassee 40 41 4 0 0 0 0 0 0 0 26 12 123 80 | Tuscola 33 27 16 1 0 0 0 0 0 0 20 4 101 81 | Van Buren 134 155 4 0 0 0 0 0 0 0 33 31 357 82 | Washtenaw 308 233 42 1 0 0 1 0 0 0 123 25 733 83 | Wayne 1,143 877 182 2 1 0 1 0 0 1 217 131 2,555 84 | Wexford 29 55 13 5 0 0 0 0 0 0 12 13 127 -------------------------------------------------------------------------------- /D3/exercise_3/solution/solution_3.js: -------------------------------------------------------------------------------- 1 | // set the dimensions and margins of the graph 2 | var outerWidth = 960; 3 | var outerHeight = 500; 4 | 5 | var margin = {top: 50, right: 20, bottom: 80, left: 80}, 6 | width = outerWidth - margin.left - margin.right, 7 | height = outerHeight - margin.top - margin.bottom; 8 | 9 | // set the ranges 10 | var y= d3.scaleLinear() 11 | .range([height, 0]); 12 | 13 | var x = d3.scaleBand() 14 | .range([0, width]) 15 | .padding(0.33); 16 | 17 | var xAxis = d3.axisTop(x) 18 | .ticks(5) 19 | 20 | var yAxis = d3.axisLeft(y) 21 | .tickFormat('') 22 | 23 | // append the svg object to the body of the page 24 | // append a 'group' element to 'svg' 25 | // moves the 'group' element to the top left margin 26 | var svg = d3.select('body').append('svg') 27 | .attr("class", "chart") 28 | .attr("width", outerWidth) 29 | .attr("height", outerHeight) 30 | .append("g") 31 | .attr("transform", `translate(${margin.left},${margin.top})`); 32 | 33 | // data 34 | var data = [{'team':'Boston','value':100}, 35 | {'team':'Detroit','value':85}, 36 | {'team':'New York','value':80}, 37 | {'team':'Atlanta','value':75}, 38 | {'team':'Chicago','value':30}] 39 | 40 | 41 | // scale the range of the data in the domains 42 | y.domain([0, d3.max(data, d => d.value)]) 43 | x.domain(data.map(d => d.team)); 44 | 45 | 46 | // append the rectangles for the bar chart 47 | var bar = svg.selectAll(".bar") 48 | .data(data) 49 | .join("g") 50 | .attr("class","bar") 51 | 52 | 53 | var rect = bar.append('rect') 54 | .attr("height", d => height - y(d.value)) 55 | .attr("x", d => x(d.team)) 56 | .attr("width", x.bandwidth()) 57 | .attr("y", d => y(d.value)) 58 | .style('fill', d => d3.interpolatePurples(d.value/100)); 59 | 60 | 61 | // add the x Axis 62 | svg.append("g") 63 | .attr('class', 'xaxis') 64 | .attr("transform", `translate(0, ${height})`) 65 | .call(d3.axisBottom(x)); 66 | 67 | // add the y Axis 68 | svg.append("g") 69 | .call(d3.axisLeft(y)); 70 | 71 | // add chart labels 72 | labels = svg.append('g') 73 | .attr('class', 'label') 74 | 75 | // x label 76 | labels.append('text') 77 | .attr('transform', `translate(${width/2},450)`) 78 | .text('Teams') 79 | 80 | // y label 81 | ylabel = labels.append('text') 82 | .attr('transform', `translate(-45,${height/2}) rotate(-90)`) 83 | .text('Wins') 84 | 85 | barLabels = bar.append('text') 86 | .attr('class', 'barlabel') 87 | .attr('x', d => x(d.team) + (x.bandwidth()/2)) 88 | .attr('y', d => y(d.value) - 15) 89 | .text(d => d.value) 90 | .style('fill', 'black') 91 | 92 | 93 | function updateAlpha() { 94 | const T = 500 95 | 96 | x.domain((data.map(d => d.team)).sort()); 97 | 98 | bar.selectAll('rect') 99 | .transition().duration(T) 100 | .attr("x", d => x(d.team)) 101 | 102 | svg.select(".xaxis") 103 | .transition().duration(T) 104 | .call(d3.axisBottom(x)) 105 | 106 | bar.selectAll('.barlabel') 107 | .transition().duration(T) 108 | .attr('x', d => x(d.team) + (x.bandwidth()/2)) 109 | 110 | 111 | } 112 | 113 | function updateNum() { 114 | const T = 500 115 | 116 | data.sort((a,b) => d3.ascending(a.value, b.value)); 117 | 118 | x.domain(data.map(d => d.team)); 119 | 120 | bar.selectAll('rect') 121 | .transition().duration(T) 122 | .attr("x", d => x(d.team)) 123 | 124 | svg.select(".xaxis") 125 | .transition().duration(T) 126 | .call(d3.axisBottom(x)) 127 | 128 | bar.selectAll('.barlabel') 129 | .transition().duration(T) 130 | .attr('x', d => x(d.team) + (x.bandwidth()/2)) 131 | 132 | } 133 | 134 | -------------------------------------------------------------------------------- /D3/sortable.js: -------------------------------------------------------------------------------- 1 | async function createChart() { 2 | 3 | // read data 4 | const fileLocation = 'https://gist.githubusercontent.com/caocscar/8cdb75721ea4f6c8a032a00ebc73516c/raw/854bbee2faffb4f6947b6b6c2424b18ca5a8970e/mlb2018.csv' 5 | DATA = await d3.csv(fileLocation, type) 6 | let chartDate = new Date(2018,3,3) 7 | let data = filterData(chartDate) 8 | 9 | // margins 10 | let margin = {top: 80, right: 90, bottom: 30+50, left: 120}, 11 | width = 900 - margin.left - margin.right, 12 | height = 1500 - margin.top - margin.bottom; // 760 13 | 14 | // svg setup 15 | let svg = d3.select('body').append('svg') 16 | .attr("class", "chart") 17 | .attr("width", width + margin.left + margin.right) 18 | .attr("height", height + margin.top + margin.bottom) 19 | .append("g") 20 | .attr("transform", `translate(${margin.left},${margin.top})`); 21 | 22 | // set up scales 23 | let y = d3.scaleBand() 24 | .domain(data.map(d => d.team).reverse()) 25 | .range([height, 0]) 26 | .padding(0.33) 27 | 28 | let x = d3.scaleLinear() 29 | .domain([0, Math.ceil(d3.max(data, d => d.value)/5)*5]) 30 | .range([0, width]); 31 | 32 | // add axes 33 | let xAxis = d3.axisTop(x) 34 | .ticks(6) 35 | 36 | svg.append("g") 37 | .attr("class", "x axis") 38 | .call(xAxis); 39 | 40 | let yAxis = d3.axisLeft(y) 41 | .tickFormat('') 42 | 43 | svg.append("g") 44 | .attr("class", "y axis") 45 | .call(yAxis); 46 | 47 | // add the x-axis gridlines 48 | let gridlines = d3.axisTop(x) 49 | .ticks(6) 50 | .tickSize(-height) 51 | .tickFormat("") 52 | 53 | svg.append("g") 54 | .attr("class", "grid") 55 | .call(gridlines) 56 | 57 | // set up bar groups 58 | let bar = svg.selectAll(".bar") 59 | .data(data) 60 | .join("g") 61 | .attr("class", "bar") 62 | .attr("transform", d => `translate(0,${y(d.team)})`) 63 | 64 | // adding bars 65 | let rects = bar.append('rect') 66 | .attr("width", (d,i) => x(d.value)) 67 | .attr("height", y.bandwidth()) 68 | .style('fill', d => d3.interpolateRdYlBu(d.value/100)) 69 | 70 | // team labels 71 | bar.append('text') 72 | .attr('class', 'team') 73 | .attr('x', -10) 74 | .attr('y', y.bandwidth()/2 + 5) 75 | .text(d => d.team) 76 | 77 | // team logos 78 | const imgsize = 40 79 | let imgs = bar.append("svg:image") 80 | .attr('class', 'logo') 81 | .attr('x', d => x(d.value) + 5) 82 | .attr('y', -5) 83 | .attr('width', imgsize) 84 | .attr('height', imgsize) 85 | .attr("xlink:href", d => `http://www.capsinfo.com/images/MLB_Team_Logos/${urls[d.team]}.png`) 86 | 87 | // bar labels 88 | let barLabels = bar.append('text') 89 | .attr('class', 'barlabel') 90 | .attr('x', d => x(d.value) + 10 + imgsize) 91 | .attr('y', y.bandwidth()/2 + 5) 92 | .text(d => d.value) 93 | 94 | // other chart labels 95 | labels = svg.append('g') 96 | .attr('class', 'label') 97 | 98 | // x label 99 | labels.append('text') 100 | .attr('transform', `translate(${width},-40)`) 101 | .text('Wins') 102 | 103 | // y label 104 | ylabel = labels.append('text') 105 | .attr('transform', `translate(-80,${height/2}) rotate(-90)`) // order matters 106 | .text('Teams') 107 | 108 | // date label 109 | const formatDate = d3.timeFormat('%b %-d') 110 | let dateLabel = labels.append('text') 111 | .attr('id', 'date') 112 | .attr('transform', 'translate(0,-40)') 113 | .text(formatDate(chartDate)) 114 | 115 | labels.append('text') 116 | .attr('id', 'season') 117 | .attr('transform', `translate(${width/2},-40)`) 118 | .text('MLB 2018 Season') 119 | 120 | // clipping rectangle 121 | const z = 0.97*(height / data.length) 122 | d3.select('.chart').append("defs") 123 | .append("clipPath") 124 | .attr("id", "clip") 125 | .append("rect") 126 | .attr('x', 0) 127 | .attr('y', 0) 128 | .attr("width", width + margin.left + margin.right) 129 | .attr("height", 0.4*height) 130 | 131 | // sorting transition 132 | const T = 300 133 | let dailyUpdate = setInterval(function() { 134 | 135 | chartDate = d3.timeDay.offset(chartDate,1) 136 | dateLabel.text(formatDate(chartDate)) 137 | data = filterData(chartDate) 138 | 139 | // update x-axis 140 | x.domain([0, Math.ceil(d3.max(data, d => d.value)/5)*5]); 141 | svg.select('.x.axis').transition().duration(T) 142 | .call(xAxis); 143 | svg.select('.grid').transition().duration(T) 144 | .call(gridlines); 145 | 146 | // update bar chart 147 | rects.data(data) 148 | .transition().duration(T) 149 | .attr("width", d => x(d.value)) 150 | .style('fill', d => d3.interpolateRdYlBu(d.value/100)) 151 | imgs.data(data) 152 | .transition().duration(T) 153 | .attr('x', d => x(d.value) + 5) 154 | barLabels.data(data) 155 | .transition().duration(T) 156 | .attr('x', d => x(d.value) + 10 + imgsize) 157 | .text(d => d.value) 158 | 159 | // sort data 160 | data.sort((a,b) => d3.descending(a.value,b.value)); 161 | 162 | // update y-axis 163 | y.domain(data.map(d => d.team).reverse()); 164 | bar.transition().duration(T) 165 | .attr("transform", d => `translate(0,${y(d.team)})`) 166 | 167 | // exit function 168 | if (chartDate > new Date(2018,9,1)) { 169 | clearInterval(dailyUpdate) 170 | } 171 | 172 | }, T); 173 | 174 | } 175 | 176 | function type(d) { 177 | const formatDate = d3.timeParse('%Y%m%d') 178 | d.date = formatDate(d.date) 179 | return d 180 | } 181 | 182 | function filterData(chartDate) { 183 | const snapshot = DATA.filter(d => d.date <= chartDate) 184 | const wins = d3.rollup(snapshot, v => v.length, d => d.team) // returns Map object 185 | return Array.from(wins, ([key, value]) => ({'team':key, 'value':value})) 186 | } -------------------------------------------------------------------------------- /sql-intermediate/README.md: -------------------------------------------------------------------------------- 1 | # Intermediate SQL 2 | 3 | Here is the [Google Slide Deck](https://docs.google.com/presentation/d/1sx7FL58BHbzPWb59Tq1S38QBL1KjNEjse3IyqK4nohY/edit?usp=sharing) for the workshop. 4 | 5 | Link to web-based database [db-fiddle](https://www.db-fiddle.com) for practicing SQL. 6 | 7 | Link to the [Covid dataset](https://gist.github.com/caocscar/b9a1418e5fd9c2cd69bb6f9d67fbc05a) for the exercises. 8 |
9 | 10 | ## Workshop Material 11 | Query Syntax Covered: 12 | - IF 13 | - CASE 14 | - WHEN 15 | - ROLLUP 16 | - GROUPING 17 | - REPLACE 18 | - OVER (Window Functions) 19 | - RANK 20 | - DENSE_RANK 21 | - WINDOW 22 | - PARTITION BY 23 | - WITH (Common Table Expressions) 24 | 25 | Schema Syntax Covered: 26 | - CREATE TABLE 27 | - INSERT 28 | - DELETE 29 | - DROP 30 | - IF [NOT] EXISTS 31 | - NOT NULL 32 | - PRIMARY KEY 33 | - AUTO_INCREMENT 34 | - SHOW COLUMNS 35 | - INSERT IGNORE INTO 36 | - UNIQUE 37 | - ALTER TABLE 38 | - ADD COLUMN 39 | - DROP COLUMN 40 | - MODIFY COLUMN 41 | - UPDATE 42 | - INDEX 43 | 44 | Miscellaneous Syntax: 45 | - SHOW COLUMNS 46 | - DESCRIBE 47 | - SHOW TABLES 48 | - SHOW INDEX 49 | 50 | ## Appendix 51 |
52 | Solutions Hiding Here 53 | 54 | #### Practice 1 55 | ```SQL 56 | SELECT County, Day, Deaths, 57 | CASE 58 | WHEN Deaths = 0 THEN -1 59 | WHEN Deaths = 1 THEN 0 60 | ELSE LOG(Deaths) 61 | END AS deathIndex 62 | FROM Covid 63 | ORDER BY deathIndex DESC 64 | ``` 65 | 66 | #### Practice 2 67 | ```SQL 68 | SELECT IF(GROUPING(County), 'Total', County) as County, 69 | SUM(Deaths) AS Total 70 | FROM Covid 71 | GROUP BY County WITH ROLLUP 72 | ``` 73 | 74 | #### Practice 2b 75 | ```SQL 76 | SELECT 77 | IF(GROUPING(County),'Michigan Total', IF(GROUPING(CP), 'County Total', County)) AS COUNTY, 78 | SUM(Deaths) AS DeathTotal, 79 | CP 80 | FROM Covid 81 | GROUP BY County, CP WITH ROLLUP 82 | ``` 83 | 84 | #### Practice 3 85 | ```SQL 86 | SELECT REPLACE(County, "St", "Saint") AS County, 87 | Day, 88 | Cases, 89 | RANK() OVER (PARTITION BY Day ORDER BY Cases DESC) AS 'Rank' 90 | FROM Covid 91 | WHERE Day BETWEEN '2020-09-24' AND '2020-09-30' 92 | AND County LIKE 'S%' 93 | AND CP = 'Confirmed' 94 | ``` 95 | 96 | #### Practice 3b 97 | ```SQL 98 | SELECT County, Day, Cases, 99 | LAG(Cases, 7) OVER (ORDER BY Day) As 'WeekAgo' 100 | FROM Covid 101 | WHERE County = 'Wayne' AND CP = 'Confirmed' 102 | ORDER BY Day DESC 103 | ``` 104 | 105 | #### Practice 4 106 | ```SQL 107 | WITH cte AS 108 | ( 109 | SELECT Day, 110 | WEEK(Day) AS Week, 111 | CP, 112 | SUM(Cases) as Total 113 | FROM Covid 114 | GROUP BY Day, CP 115 | ) 116 | 117 | SELECT Week, MAX(Total) 118 | FROM cte 119 | GROUP BY Week 120 | ``` 121 | 122 | #### Practice A 123 | ```SQL 124 | CREATE TABLE Michigan ( 125 | Category VARCHAR(6), 126 | Value VARCHAR(7), 127 | `Cases` INTEGER, 128 | `Deaths` INTEGER, 129 | `CaseFatalityRatio` FLOAT 130 | ); 131 | 132 | INSERT INTO Michigan 133 | (Category, `Value`, Cases, `Deaths`, `CaseFatalityRatio`) 134 | VALUES 135 | ('Gender', 'Female', '61390', '3212', '0.051'), 136 | ('Gender', 'Male', '57956', '3511', '0.061'), 137 | ('Gender', 'Unknown', '281', null, null); 138 | ``` 139 | 140 | #### Practice B 141 | ```SQL 142 | CREATE TABLE MI ( 143 | ID INT AUTO_INCREMENT, 144 | `Day` VARCHAR(3), 145 | `Category` VARCHAR(9), 146 | `Value` VARCHAR(19) NOT NULL, 147 | `Pct of Cases` FLOAT, 148 | `Pct of Deaths` FLOAT, 149 | PRIMARY KEY (ID) 150 | ); 151 | 152 | INSERT INTO MI 153 | (`Day`, `Category`, `Value`, `Pct of Cases`, `Pct of Deaths`) 154 | VALUES 155 | ('Sat', 'Ethnicity', 'Hispanic/Latino', '0.08', '0.03'), 156 | ('Sat', 'Ethnicity', 'Non-Hispanic Latino', '0.69', '0.85'), 157 | ('Sat', 'Ethnicity', 'Unknown', '0.23', '0.12'); 158 | ``` 159 | 160 | #### Practice B2 161 | ```SQL 162 | INSERT INTO MI 163 | (Day, Value) 164 | VALUES 165 | ('Sun', null); 166 | 167 | INSERT INTO MI 168 | (ID, Day, Value) 169 | VALUES 170 | (3, 'Sun', 'Unknown'); 171 | ``` 172 | 173 | #### Practice C 174 | ```SQL 175 | CREATE TABLE mi ( 176 | `Category` VARCHAR(3), 177 | `Value` VARCHAR(8) UNIQUE, 178 | `Cases` INTEGER, 179 | `Deaths` INTEGER DEFAULT 0, 180 | `CaseFatalityRatio` FLOAT DEFAULT 0 181 | ); 182 | 183 | INSERT INTO mi 184 | (`Category`, `Value`, `Cases`) 185 | VALUES 186 | ('Age', '0 to 19', '13342'), 187 | ('Age', 'Unknown', '109'); 188 | 189 | INSERT INTO mi 190 | VALUES 191 | ('Age', '20 to 29', '23038', '29', '0.001'), 192 | ('Age', '30 to 39', '16858', '71', '0.004'), 193 | ('Age', '40 to 49', '17345', '219', '0.013'), 194 | ('Age', '50 to 59', '18393', '541', '0.029'), 195 | ('Age', '60 to 69', '14656', '1188', '0.081'), 196 | ('Age', '70 to 79', '9374', '1808', '0.193'), 197 | ('Age', '80+', '8312', '2864', '0.345'); 198 | ``` 199 | 200 | #### Practice D 201 | ```SQL 202 | -- Schema SQL window 203 | CREATE TABLE mi ( 204 | `Category` VARCHAR(3), 205 | `Value` VARCHAR(8), 206 | `Cases` INTEGER, 207 | `Deaths` INTEGER, 208 | `CaseFatalityRatio` FLOAT 209 | ); 210 | 211 | -- Query SQL window 212 | ALTER TABLE mi 213 | ADD COLUMN day VARCHAR(10); 214 | 215 | ALTER TABLE mi 216 | DROP COLUMN Category, 217 | DROP COLUMN CaseFatalityRatio; 218 | 219 | ALTER TABLE mi 220 | MODIFY COLUMN Cases VARCHAR(6); 221 | 222 | DESCRIBE mi; 223 | ``` 224 | 225 | #### Practice E 226 | ```SQL 227 | -- Schema SQL window 228 | CREATE TABLE mi ( 229 | `Category` VARCHAR(3), 230 | `Value` VARCHAR(8), 231 | `Cases` INTEGER, 232 | `Deaths` INTEGER, 233 | `CaseFatalityRatio` FLOAT, 234 | INDEX(Cases) 235 | ); 236 | 237 | INSERT INTO mi 238 | (`Category`, `Value`, `Cases`) 239 | VALUES 240 | ('Age', '0 to 19', '13342'), 241 | ('Age', 'Unknown', '109'); 242 | 243 | INSERT INTO mi 244 | VALUES 245 | ('Age', '20 to 29', '23038', '29', '0.001'), 246 | ('Age', '30 to 39', '16858', '71', '0.004'), 247 | ('Age', '40 to 49', '17345', '219', '0.013'), 248 | ('Age', '50 to 59', '18393', '541', '0.029'), 249 | ('Age', '60 to 69', '14656', '1188', '0.081'), 250 | ('Age', '70 to 79', '9374', '1808', '0.193'), 251 | ('Age', '80+', '8312', '2864', '0.345'); 252 | 253 | UPDATE mi 254 | SET Cases = 1400 255 | WHERE Deaths IS NULL; 256 | 257 | UPDATE mi 258 | SET Deaths = 5, CaseFatalityRatio = 5 259 | WHERE Deaths IS NULL; 260 | 261 | -- Query SQL window 262 | SELECT * FROM mi; 263 | 264 | DESCRIBE mi; 265 | SHOW INDEX FROM mi; -- Alternatively 266 | ``` 267 |
268 | -------------------------------------------------------------------------------- /thematic-maps/snowmobile_crashes.txt: -------------------------------------------------------------------------------- 1 | Crash Instance Worst Injury in Crash Crash Longitude Crash Latitude Crash Report 2 | 2015100675 B - nonincapacitating injury -85.800058172968 46.18084431116 UD-10 3 | 201510428 B - nonincapacitating injury -84.789822248845 45.440443992449 UD-10 4 | 2015104495 C - possible injury -86.740535569118 46.321992916472 UD-10 5 | 2015104570 No injury -85.860376433662 42.738967409964 UD-10 6 | 2015105193 A - incapacitating injury -85.836188702043 44.772364145356 UD-10 7 | 2015106068 A - incapacitating injury -85.483634043104 41.875491838558 UD-10 8 | 2015106317 C - possible injury -85.703826610636 46.309510725084 UD-10 9 | 201511928 B - nonincapacitating injury -85.72090105647 44.425194108759 UD-10 10 | 201511992 B - nonincapacitating injury -85.403925590975 43.865777605951 UD-10 11 | 201512223 A - incapacitating injury -85.849874553891 43.267780456756 UD-10 12 | 201512836 No injury -84.164175021592 46.363569085717 UD-10 13 | 201513939 C - possible injury -85.918523915019 44.5339711822 UD-10 14 | 201517334 C - possible injury -89.012113811027 46.556852689282 UD-10 15 | 201517688 B - nonincapacitating injury -83.432389693911 43.628838813508 UD-10 16 | 201517877 C - possible injury -85.618124874516 43.58429598679 UD-10 17 | 201518127 A - incapacitating injury -84.551456482753 46.360820025168 UD-10 18 | 201518864 C - possible injury -83.360621330608 42.627193483883 UD-10 19 | 201520780 No injury -85.511744264408 44.67990727913 UD-10 20 | 201522294 B - nonincapacitating injury -84.970398918174 44.767687018835 UD-10 21 | 201523414 No injury -82.621439618102 42.933385579213 UD-10 22 | 201526594 C - possible injury -88.533675091365 46.131239666743 UD-10 23 | 201526708 A - incapacitating injury -83.36768691342 43.505122056952 UD-10 24 | 201526895 B - nonincapacitating injury -85.039215942012 42.770683368895 UD-10 25 | 201527866 A - incapacitating injury -85.493314923755 44.014436135557 UD-10 26 | 201528011 A - incapacitating injury -82.63914987732 42.67631835496 UD-10 27 | 201530093 No injury -85.790072380071 43.293003810025 UD-10 28 | 201532310 A - incapacitating injury -84.484066105388 45.648368506774 UD-10 29 | 201533350 No injury -82.995564714258 42.699455054772 UD-10 30 | 201534571 C - possible injury -84.673468724015 45.139137069932 UD-10 31 | 201534662 A - incapacitating injury -84.827226280307 44.209248111641 UD-10 32 | 201535067 B - nonincapacitating injury -85.692815232846 43.439001714006 UD-10 33 | 201535324 B - nonincapacitating injury -83.661044655626 43.235162164867 UD-10 34 | 201538347 No injury -86.096055097901 44.628892241637 UD-10 35 | 201539281 C - possible injury -88.002457730606 46.250241819486 UD-10 36 | 201539615 No injury -84.374993926826 44.79095528009 UD-10 37 | 201539712 No injury -85.942129992661 42.382238641889 UD-10 38 | 201540144 C - possible injury -85.973182443957 41.983623401445 UD-10 39 | 201541015 No injury -83.525079129041 44.793058607159 UD-10 40 | 201541016 C - possible injury -83.710905733952 44.696924236573 UD-10 41 | 201541104 B - nonincapacitating injury -85.009424739563 44.64090447431 UD-10 42 | 201541854 B - nonincapacitating injury -84.322411066044 45.387324215226 UD-10 43 | 201542293 A - incapacitating injury -85.951591969289 44.238197231663 UD-10 44 | 201545186 Fatal -86.494607186789 41.950675232664 UD-10 45 | 201545288 A - incapacitating injury -85.306164346805 44.109657752931 UD-10 46 | 201545784 A - incapacitating injury -84.620777432775 45.847477649052 UD-10 47 | 201545792 C - possible injury -84.848263534832 45.017716815188 UD-10 48 | 201545795 No injury -84.702912985489 45.018060357971 UD-10 49 | 201546845 A - incapacitating injury -84.759310704179 42.853609729043 UD-10 50 | 20154704 B - nonincapacitating injury -83.481240471033 44.283673517502 UD-10 51 | 201547337 C - possible injury -85.918375795088 44.223028999258 UD-10 52 | 201547361 C - possible injury -85.236701016576 44.374961333914 UD-10 53 | 201547711 No injury -84.922743136885 44.917229501134 UD-10 54 | 201547712 No injury -84.922743136885 44.917229501134 UD-10 55 | 201547817 B - nonincapacitating injury -85.739898404349 44.352626732308 UD-10 56 | 201547819 C - possible injury -85.443656094647 44.223078164062 UD-10 57 | 201547844 B - nonincapacitating injury -84.935026772683 45.045374974985 UD-10 58 | 201548089 A - incapacitating injury -84.381270106224 43.092019957408 UD-10 59 | 201550275 C - possible injury -85.595211453211 46.514654245914 UD-10 60 | 201551026 C - possible injury -87.974715203904 47.389606520347 UD-10 61 | 201551511 No injury -88.50942705195 47.170557499475 UD-10 62 | 201552074 C - possible injury -85.003918064235 45.012957328429 UD-10 63 | 201552075 B - nonincapacitating injury -84.897606029151 45.05936146032 UD-10 64 | 201552150 No injury -84.69118329622 44.99026939293 UD-10 65 | 201552194 A - incapacitating injury -84.874932044759 44.9206778492 UD-10 66 | 201553269 B - nonincapacitating injury -84.770386527415 44.68163592871 UD-10 67 | 201553320 No injury -84.625725808129 45.852029827979 UD-10 68 | 201553435 No injury -84.74444312737 44.684979251624 UD-10 69 | 201554164 A - incapacitating injury -85.4962039299 44.230400028562 UD-10 70 | 201559413 C - possible injury -85.493362139309 43.636153579887 UD-10 71 | 20156263 No injury -84.761345269475 45.51029059808 UD-10 72 | 20156721 No injury -85.852005942417 41.852770907367 UD-10 73 | 201568314 No injury -83.858746163969 42.808999449586 UD-10 74 | 201568330 A - incapacitating injury -84.441878016653 45.149464470839 UD-10 75 | 201569422 No injury -84.947995213508 46.343717610558 UD-10 76 | 20157359 C - possible injury -84.016825650758 43.292010680844 UD-10 77 | 20157453 No injury -85.534166957762 42.575186852924 UD-10 78 | 201574886 Fatal -85.936715440967 44.807284068035 UD-10 79 | 201575072 No injury -82.630236491114 42.976273592823 UD-10 80 | 201576009 C - possible injury -83.931191320393 42.635436115333 UD-10 81 | 2015801 No injury -85.105955942282 46.752426014342 UD-10 82 | 201589723 A - incapacitating injury -85.696619536659 44.12359535407 UD-10 83 | 201589732 No injury -85.842420683732 44.139639179498 UD-10 84 | 201595228 A - incapacitating injury -85.404277569817 46.139802071388 UD-10 85 | 2015106335 A - incapacitating injury -84.947996508111 46.345976028008 UD-10 86 | 2015106339 No injury -84.604978501798 46.425548352807 UD-10 87 | 2015106353 B - nonincapacitating injury -84.32223738124 46.456877270562 UD-10 88 | 2015106490 No injury -84.918080727658 43.928749506565 UD-10 89 | 2015106713 No injury -88.294464019266 46.097688013004 UD-10 90 | 2015107643 A - incapacitating injury -89.908625169185 46.498840237387 UD-10 91 | 2015107658 No injury -89.925542849832 46.481548610279 UD-10 92 | 2015107831 A - incapacitating injury -85.373505567305 44.613005050686 UD-10 93 | 2015108064 C - possible injury -84.735796271247 43.364629423323 UD-10 94 | 2015108242 A - incapacitating injury -88.183440163537 47.37799512391 UD-10 95 | 2015108290 C - possible injury -88.805348469807 46.928776615496 UD-10 96 | 2015109219 B - nonincapacitating injury -85.009024201918 43.83084235905 UD-10 97 | 2015109388 No injury -84.374498680557 42.395729350069 UD-10 98 | 2015110669 No injury -86.144147390131 43.260363792897 UD-10 99 | 2015112610 No injury -83.410101202069 43.157321985017 UD-10 100 | 2015112789 B - nonincapacitating injury -84.286036393895 41.966878333666 UD-10 101 | 2015115235 No injury -86.014850998275 46.657393323565 UD-10 102 | 2015115236 No injury -86.552985349844 46.419702140502 UD-10 103 | 2015115353 A - incapacitating injury -87.682822892849 46.498519598707 UD-10 104 | 2015129758 B - nonincapacitating injury -85.427315822355 44.651533921877 UD-10 105 | 2015150184 No injury -86.737587061959 46.329205017028 UD-10 106 | 2015162719 No injury -83.615266056207 45.177822218761 UD-10 107 | 2015183045 No injury -83.198361994143 42.392714724347 UD-10 108 | 2015183062 No injury -83.202282094243 42.340796094387 UD-10 109 | 2015262225 B - nonincapacitating injury -82.557578993695 42.948125965502 UD-10 110 | 2015264644 B - nonincapacitating injury -85.771788811445 44.647472314438 UD-10 111 | 2015272401 No injury -83.102913294015 42.296970881048 UD-10 112 | 2015300686 C - possible injury -88.78415655252 46.969457635212 UD-10 -------------------------------------------------------------------------------- /regex/data/vins.txt: -------------------------------------------------------------------------------- 1 | 1FDXK84N9FVA40100 2 | 1G1AD5F56A7186931 3 | 2C3CCAAG3EH301682 4 | 2FMDK48C08BA77023 5 | 3GCPCSE08BG284714 6 | 3N1AB7AP8DL787925 7 | 4A3AA46L9XE004326 8 | 4S3BE645527203527 9 | 5J6RM4H38EL039758 10 | 5TFDV58128X062869 11 | JH4CU2F67EC004754 12 | JS2RD62H865350625 13 | KL8CB6S95EC465118 14 | KMHCT5AE1EU146656 15 | SAJGX2747VC015715 16 | SALAK2D40BA576362 17 | WMWZB3C59CWM05007 18 | WP0AB2A76BL061330 19 | YH4K14AA0CA001707 20 | YV4902DZ1D2400099 21 | UNKNOWN 22 | UNK 23 | UNK 24 | UNKNOWN 25 | UU 26 | """UNK""" 27 | (UNKNOWN) 28 | *****UNKNOWN**** 29 | 0 30 | 0000 31 | 0000000000 32 | 00000000000 33 | 000000000000 34 | 0000000000000 35 | 00000000000000 36 | 000000000000000 37 | 0000000000000000 38 | 00000000000000000 39 | 000000000000000000 40 | 0000000000000000000 41 | 00000000000000000000 42 | 000000000000000000000 43 | 0000000000000000000000 44 | 00000000000000000000000 45 | 000000000000000000000000 46 | 0000000000000000000000000 47 | 000000000000000000778 48 | 00000000000000001 49 | 00000000000000003 50 | 00000000000000005 51 | 0000000000000000A 52 | 0000000000000000M 53 | 00000000000000012 54 | 000000000000000AA 55 | 000000000000000PP 56 | 000000000000000XX 57 | 00000000000000UNK 58 | 00000000000000VIN 59 | 00000000000001748 60 | 00000000000002172 61 | 00000000000008038 62 | 00000000000008309 63 | 0000000000000JBS2 64 | 0000000000000NONE 65 | 00000000000010979 66 | 00000000000012129 67 | 00000000000013433 68 | 00000000000013537 69 | 0000000000001996F 70 | 00000000000022817 71 | 00000000000023341 72 | 00000000000030026 73 | 00000000000030119 74 | 00000000000030778 75 | 00000000000037060 76 | 00000000000041673 77 | 00000000000042165 78 | 00000000000048043 79 | 00000000000053013 80 | 00000000000054135 81 | 00000000000054495 82 | 00000000000055585 83 | 00000000000060096 84 | 00000000000061301 85 | 00000000000061685 86 | 00000000000067828 87 | 00000000000070704 88 | 00000000000075859 89 | 00000000000080253 90 | 0000000000008267B 91 | 00000000000083889 92 | 00000000000089047 93 | 00000000000089177 94 | 00000000000091442 95 | 000000000000B7117 96 | 00000000000120391 97 | 00000000000131398 98 | 00000000000132795 99 | 00000000000135157 100 | 0000000000014438R 101 | 00000000000145136 102 | 00000000000157643 103 | 00000000000160083 104 | 00000000000161438 105 | 00000000000212146 106 | 00000000000214027 107 | 00000000000215886 108 | 00000000000216263 109 | 00000000000236360 110 | 00000000000240057 111 | 00000000000261261 112 | 00000000000266392 113 | 00000000000266416 114 | 00000000000270141 115 | 00000000000310406 116 | 00000000000326687 117 | 00000000000330392 118 | 00000000000332045 119 | 00000000000336748 120 | 00000000000406812 121 | 00000000000408107 122 | 00000000000441256 123 | 00000000000461011 124 | 0000000000046824B 125 | 00000000000502513 126 | 0000000000069105R 127 | 0000000000078010B 128 | 00000000000808791 129 | 00000000000813695 130 | 00000000000819921 131 | 00000000000824779 132 | 0000000000086309B 133 | 00000000000881673 134 | 0000000000097686B 135 | 00000000000A25840 136 | 00000000000A71011 137 | 00000000000AAAAAA 138 | 00000000000B72851 139 | 00000000000DW4121 140 | 00000000000E06246 141 | 00000000000F46117 142 | 00000000000J27248 143 | 00000000000K40121 144 | 00000000000KLF220 145 | 00000000000NL1G9S 146 | 0000000000108126B 147 | 00000000001091643 148 | 00000000001221553 149 | 00000000001424201 150 | 0000000000172385B 151 | 0000000000175629B 152 | 00000000001828867 153 | 00000000002210345 154 | 00000000002308190 155 | 0000000000231332 156 | 00000000002438360 157 | 00000000002467113 158 | 0000000000253213M 159 | 00000000002625567 160 | 00000000002707600 161 | 00000000002751887 162 | 00000000002767946 163 | 00000000003102348 164 | 0000000000369842M 165 | 00000000003D20077 166 | 000000000054321NK 167 | 00000000006418561 168 | 00000000007043523 169 | 00000000008211412 170 | 00000000008218428 171 | 00000000008218469 172 | 00000000009203322 173 | 00000000009203327 174 | 00000000009M18007 175 | 0000000000B70013M 176 | 0000000000BB35178 177 | 0000000000BB95807 178 | 0000000000BC72673 179 | 0000000000BD48041 180 | 0000000000C672629 181 | 0000000000C694106 182 | 0000000000D8556RX 183 | 0000000000F200238 184 | 0000000000F943733 185 | 0000000000HL11240 186 | 0000000000J503014 187 | 0000000000KY33931 188 | 0000000000M130078 189 | 0000000000N50053M 190 | 0000000000OOOOOOO 191 | 0000000000S323158 192 | 0000000000UNKNOWN 193 | 0000001E161B50184 194 | 00000544A02800430 195 | 0000UNKNOWN 196 | 000UNKOWN00000000 197 | 000XXX00000000000 198 | 01010101010101010 199 | 02112250000000000 200 | 02264130000000000 201 | 02292660000000000 202 | 02380690000000000 203 | 02516 204 | 02733800000000000 205 | 03183380000000000 206 | 05119E1994XXXXXXX 207 | 05181134JIANGDONG 208 | 085544B0000000000 209 | 09999999999999999 210 | 0XXXXXXXXXXXX 211 | 100000000000 212 | 10000000000000000 213 | 1000000000000000000 214 | 100000000000000000000 215 | 10101010101010101 216 | 11110000000000000 217 | 11111110000000000 218 | 1111111111111111 219 | 11111111111111111 220 | 1111111111111111111 221 | 11111111111111234 222 | 12121212121212121 223 | 12250000000000000 224 | 12332112232123454 225 | 12340000000000000 226 | 12345000000000000 227 | 12345678900987654 228 | 12345678901234567 229 | 123456789012345678 230 | 12345678909876543 231 | 12345678910111211 232 | 1234567891013333 233 | 12345678911234567 234 | 12345678912345678 235 | 123456789AAAAAAAA 236 | 123EWQ321QWE321QW 237 | 13686000000000000 238 | 18142700000000000 239 | 1850460000000000 240 | 19000000000000000 241 | 19999999999999999 242 | 1F000000000000000 243 | 1G999999999999997 244 | 1UNKNOWN 245 | 20055370000000000 246 | 20885820000000000 247 | 22510030000000000 248 | 24502600000000000 249 | 25049220000000000 250 | 25067290000000000 251 | 26402850000000000 252 | 26494030000000000 253 | 27239000000000000 254 | 28192690000000000 255 | 2ZK78870000000000 256 | 30236500000000000 257 | 30600000000000000 258 | 31844590000000000 259 | 33115100000000000 260 | 36051500000000000 261 | 372293L0000000000 262 | 42519600000000000 263 | 50232000000000000 264 | 51108000000000000 265 | 51727000000000000 266 | 51800000000005043 267 | 59767000000000000 268 | 5TDZT300000000000 269 | 5UNKNOWN 270 | 61234567890POIUYT 271 | 61M85570000000000 272 | 70561000000000000 273 | 7C390410000000000 274 | 80409100000000000 275 | 80756800000000000 276 | 84379070000000000 277 | 85066700000000000 278 | 86133000000000000 279 | 86567270000000000 280 | 88899000000000000 281 | 89821670000000000 282 | 90847910000000000 283 | 90902600000000000 284 | 99 285 | 99109999999999999 286 | 99139999999999999 287 | 9999999999 288 | 99999999999 289 | 999999999999 290 | 9999999999999 291 | 99999999999990909 292 | 99999999999999 293 | 9999999999999900- 294 | 999999999999999 295 | 9999999999999999 296 | 99999999999999999 297 | 999999999999999999 298 | 9999999999999999999 299 | 99999999999999999999 300 | 999999999999999999999 301 | 9999999999999999999999 302 | 999999999999999999999999 303 | 9999999999999999999999999 304 | AA000000000000000 305 | AAAAAAAAAAAAAAAAA 306 | ALL UNKNOWN 307 | B5268000000000000 308 | BB921150000000000 309 | BD688540000000000 310 | BD934900000000000 311 | BIKE0000000000000 312 | BKJ06980000000000 313 | DJ356710000000000 314 | E434TR4G4RTG4RTGR 315 | ES40DC00000000000 316 | FS3221 UNK VIN 317 | FS6DVR00000000000 318 | G00000000000 319 | G0904600000000000 320 | HD00000000000 321 | HD000000000000000 322 | HR554230000000000 323 | JA3AY26A5VV0416 0 324 | JT420000000000000 325 | KAW00000000000000 326 | KSV700A0000000000 327 | KY118720000000000 328 | KY123820000000000 329 | KZ000000000000000 330 | L0000000000000000 331 | LA5PWR00000000000 332 | MBCN6180000000000 333 | MF286000000000000 334 | MY025V00000000000 335 | NA000000000000000 336 | NKNOWN 337 | NL116F00000000000 338 | NONE 339 | NONE0000000000000 340 | NONE9999999999999 341 | NY628680000000000 342 | NZ0QA400000000000 343 | NoVIN999999999999 344 | P0032700000000000 345 | R210III0000000000 346 | SRP16130000000000 347 | SRR55500000000000 348 | T3TQ34TQ34TR34T 349 | T4756000000000000 350 | TH638500000000000 351 | U 352 | U NKNOWN0000000000 353 | UKN 354 | UKNOWN 355 | UKNOWN0000000000 356 | UKNOWN00000000000 357 | UNK 358 | UNK 359 | UNK H AND R 360 | UNK H-R VEH 361 | UNK HIT AND RUN 362 | UNK NOWN 363 | UNK RENTAL 364 | UNK. 365 | UNK0000000000000 366 | UNK00000000000000 367 | UNK99999999999999 368 | UNKI 369 | UNKKNOWN 370 | UNKMOWN 371 | UNKN 372 | UNKN0000000000000 373 | UNKNIOWN 374 | UNKNIWN 375 | UNKNKOWN 376 | UNKNNOWN0000000000 377 | UNKNON 378 | UNKNOW 379 | UNKNOWEN 380 | UNKNOWN 381 | UNKNOWN 382 | UNKNOWN . 383 | UNKNOWN HIT 384 | UNKNOWN / FLED 385 | UNKNOWN BODY TYPE 386 | UNKNOWN HI 387 | UNKNOWN HIT AND R 388 | UNKNOWN INFORMATI 389 | UNKNOWN M 390 | UNKNOWN VIN 391 | UNKNOWN VIN NUMBE 392 | UNKNOWN VINNUMBE 393 | UNKNOWN!!!!!!!!!! 394 | UNKNOWN########## 395 | UNKNOWN********** 396 | UNKNOWN.......... 397 | UNKNOWN/UNKNOWN// 398 | UNKNOWN0000 399 | UNKNOWN0000000000 400 | UNKNOWN00000000000 401 | UNKNOWN999 402 | UNKNOWN9999999999 403 | UNKNOWNHIT 404 | UNKNOWNK 405 | UNKNOWNVIN 406 | UNKNOWN[O 407 | UNKNWN 408 | UNKNWON 409 | UNKOWN 410 | UNKOWN0000000000 411 | UNKOWN00000000000 412 | UNKU 413 | UNKWN 414 | UNKWON0000000000 415 | UNNKNOWN 416 | UNNOWN 417 | UNOWN 418 | UNknown 419 | UTL00000000000000 420 | UU 421 | UU000000000000 422 | UUNKNOWN 423 | UnKnown 424 | Unk 425 | Unknow 426 | Unknown 427 | Unknownn 428 | WL848090000000000 429 | WQ321QWE321QWE321 430 | XG424790000000000 431 | XXX00000000000000 432 | XXXXXX 433 | XXXXXXX 434 | XXXXXXX0000000000 435 | XXXXXXXX 436 | XXXXXXXX65131 437 | XXXXXXXXX 438 | XXXXXXXXXX 439 | XXXXXXXXXX20412 440 | XXXXXXXXXXX 441 | XXXXXXXXXXXX 442 | XXXXXXXXXXXXX 443 | XXXXXXXXXXXXXX 444 | XXXXXXXXXXXXXXX 445 | XXXXXXXXXXXXXXXX 446 | XXXXXXXXXXXXXXXXX 447 | XXXXXXXXXXXXXXXXXXXXXXXXX 448 | _________________ 449 | unknown 450 | unknownn 451 | -------------------------------------------------------------------------------- /thematic-maps/deer_in_the_city.txt: -------------------------------------------------------------------------------- 1 | city,Total,K,ABC,PDO,Lat,Lon 2 | Portage,191,0,5,186,42.201154,-85.580002 3 | Rochester Hills,150,0,2,148,42.658366,-83.149932 4 | Midland,137,0,1,136,43.615583,-84.247212 5 | Battle Creek,116,0,6,110,42.321152,-85.179714 6 | Farmington Hills,95,0,4,91,42.498994,-83.367717 7 | Ann Arbor,90,0,4,86,42.280826,-83.743038 8 | Novi,86,0,8,78,42.480590,-83.475491 9 | Auburn Hills,82,0,2,80,42.687532,-83.234103 10 | Lansing,81,0,0,81,42.732535,-84.555535 11 | Walker,77,0,2,75,43.001413,-85.768091 12 | Grand Rapids,63,0,1,62,42.963360,-85.668086 13 | Troy,58,0,5,53,42.606409,-83.149775 14 | Kalamazoo,57,0,1,56,42.291707,-85.587229 15 | Kentwood,57,0,5,52,42.869473,-85.644749 16 | East Lansing,55,0,3,52,42.736979,-84.483865 17 | Wyoming,46,0,0,46,42.913360,-85.705309 18 | Southfield,44,0,6,38,42.473369,-83.221873 19 | Sterling Heights,44,0,3,41,42.580312,-83.030203 20 | Norton Shores,41,0,2,39,43.168904,-86.263946 21 | Burton,40,0,1,39,42.999472,-83.616342 22 | Livonia,40,0,0,40 23 | Escanaba,32,0,1,31 24 | Charlotte,32,0,0,32 25 | Lapeer,31,0,0,31 26 | Norway,29,0,0,29 27 | Marquette,28,0,0,28 28 | Rockford,28,0,0,28 29 | Holland,27,0,1,26 30 | Gladstone,27,0,0,27 31 | Muskegon,25,0,1,24 32 | Tecumseh,25,0,0,25 33 | Alpena,25,0,1,24 34 | Grandville,25,0,0,25 35 | Fenton,24,0,0,24 36 | Coldwater,24,0,1,23 37 | Ithaca,24,0,0,24 38 | Alma,23,0,0,23 39 | Hillsdale,22,0,0,22 40 | Marshall,22,0,0,22 41 | Traverse City,21,0,1,20 42 | Petoskey,21,0,0,21 43 | Romulus,21,0,0,21 44 | Wixom,21,0,0,21 45 | Iron River,21,0,0,21 46 | Holland,20,0,2,18 47 | Negaunee,19,0,1,18 48 | Iron Mountain,19,0,0,19 49 | Sault Ste. Marie,19,0,0,19 50 | Ludington,18,0,1,17 51 | Swartz Creek,18,0,1,17 52 | Chelsea,18,0,1,17 53 | Jonesville,18,0,0,18 54 | Adrian,17,0,0,17 55 | Manistee,17,0,0,17 56 | Pontiac,16,0,1,15 57 | Warren,16,0,0,16 58 | East Jordan,16,0,0,16 59 | Perry,16,0,0,16 60 | Paw Paw,15,0,0,15 61 | Jackson,15,0,1,14 62 | Litchfield,15,0,0,15 63 | Lowell,14,0,1,13 64 | Bloomfield Hills,14,0,0,14 65 | Boyne City,14,0,0,14 66 | Westland,13,0,1,12 67 | Marysville,13,0,0,13 68 | Hastings,13,0,0,13 69 | Mt. Pleasant,13,0,0,13 70 | Mason,13,0,1,12 71 | Bad Axe,13,0,0,13 72 | Howell,12,0,0,12 73 | Reed City,12,0,0,12 74 | Goodrich,12,0,0,12 75 | Rochester,12,0,0,12 76 | Rogers City,12,0,0,12 77 | St. Louis,12,0,0,12 78 | Big Rapids,12,0,0,12 79 | Corunna,12,0,0,12 80 | Flushing,12,0,0,12 81 | Richmond,11,0,0,11 82 | Monroe,11,0,0,11 83 | Croswell,11,0,0,11 84 | Hudson,11,0,0,11 85 | Gibralter,11,0,0,11 86 | Saline,11,0,0,11 87 | Newaygo,10,0,0,10 88 | Oxford,10,0,0,10 89 | Springfield,10,0,0,10 90 | Flint,10,0,1,9 91 | Hart,10,0,0,10 92 | Holly,10,0,0,10 93 | Greenville,10,0,0,10 94 | Caro,10,0,0,10 95 | Flat Rock,10,0,0,10 96 | Franklin,10,0,0,10 97 | Grand Blanc,10,0,0,10 98 | Portland,9,0,0,9 99 | Clare,9,0,0,9 100 | Cheboygan,9,0,0,9 101 | Potterville,9,0,0,9 102 | Roscommon,9,0,0,9 103 | Madison Heights,9,0,1,8 104 | Lakeview,9,0,0,9 105 | East Lansing,9,0,0,9,42.736979,-84.483865 106 | Ferrysburg,9,0,0,9 107 | Rosebush,9,0,2,7 108 | Sturgis,9,0,0,9 109 | Brown City,9,0,1,8 110 | Niles,9,0,1,8 111 | Wood Haven,9,0,1,8 112 | Owosso,8,0,0,8 113 | Nashville,8,0,0,8 114 | Grand Ledge,8,0,0,8 115 | Montague,8,0,0,8 116 | Grand Haven,8,0,0,8 117 | Whitehall,8,0,0,8 118 | Coopersville,8,0,0,8 119 | Lake Isabella,8,0,0,8 120 | Saginaw,8,0,0,8 121 | Taylor,8,0,1,7 122 | Ishpeming,8,0,0,8 123 | Dundee,8,0,0,8 124 | Ionia,8,0,0,8 125 | Plainwell,8,0,0,8 126 | Colon,8,0,0,8 127 | Tawas City,8,0,0,8 128 | Charlevoix,8,0,0,8 129 | Lincoln,7,0,0,7 130 | Brighton,7,0,0,7 131 | Mattawan,7,0,0,7 132 | Perrinton,7,0,0,7 133 | Orchard Lake,7,0,0,7 134 | Hersey,7,0,0,7 135 | Alanson,7,0,0,7 136 | Wayland,7,0,0,7 137 | Peck,7,0,0,7 138 | Gaylord,7,0,0,7 139 | St. Joseph,7,0,0,7 140 | Rockwood,7,0,0,7 141 | Homer,7,0,0,7 142 | Hudsonville,7,0,0,7 143 | Beverly Hills,7,0,0,7 144 | Imlay City,7,0,0,7 145 | McBride,7,0,0,7 146 | Baraga,7,0,1,6 147 | Dearborn,7,0,0,7 148 | Albion,7,0,0,7 149 | Concord,7,0,0,7 150 | Detroit,7,0,1,6 151 | Gladwin,6,0,0,6 152 | Millington,6,0,0,6 153 | Royal Oak,6,0,1,5 154 | East Tawas,6,0,1,5 155 | Bessemer,6,0,2,4 156 | Standish,6,0,0,6 157 | South Haven,6,0,0,6 158 | Frankfort,6,0,0,6 159 | Harbor Beach,6,0,0,6 160 | Bay City,6,0,0,6 161 | Linden,6,0,0,6 162 | Romeo,6,0,0,6 163 | Bridgman,6,0,0,6 164 | Maple Rapids,6,0,0,6 165 | Applegate,6,0,0,6 166 | St. Clair,6,0,0,6 167 | Harbor Springs,6,0,0,6 168 | Marine City,5,0,0,5 169 | Stanton,5,0,0,5 170 | Trenton,5,0,0,5 171 | Ubly,5,0,0,5 172 | Dewitt,5,0,0,5 173 | Harrison,5,0,0,5 174 | Stockbridge,5,0,0,5 175 | Edmore,5,0,1,4 176 | Sanford,5,0,0,5 177 | Benzonia,5,0,0,5 178 | Cedar Springs,5,0,0,5 179 | Coleman,5,0,0,5 180 | Port Huron,5,0,0,5 181 | Mecosta,5,0,0,5 182 | Empire,5,0,0,5 183 | Lexington,5,0,0,5 184 | Central Lake,5,0,0,5 185 | Vicksburg,5,0,0,5 186 | Fremont,5,0,0,5 187 | Ortonville,5,0,0,5 188 | Wakefield,5,0,0,5 189 | Evart,5,0,0,5 190 | Fowlerville,5,0,0,5 191 | Cadillac,5,0,0,5 192 | Mayville,5,0,0,5 193 | Buchanan,5,0,0,5 194 | Lawton,5,0,0,5 195 | West Branch,5,0,0,5 196 | Beulah,5,0,0,5 197 | Port Austin,4,0,0,4 198 | New Baltimore,4,0,0,4 199 | Ypsilanti,4,0,0,4 200 | Sandusky,4,0,0,4 201 | Centreville,4,0,0,4 202 | Williamston,4,0,1,3 203 | Three Rivers,4,0,0,4 204 | Suttons Bay,4,0,0,4 205 | Scottville,4,0,0,4 206 | Barryton,4,0,0,4 207 | Union City,4,0,0,4 208 | Sterling,4,0,0,4 209 | Crystal Falls,4,0,0,4 210 | McBain,4,0,0,4 211 | Munising,4,0,0,4 212 | Gobles,4,0,1,3 213 | Casnovia,4,0,0,4 214 | Bangor,4,0,0,4 215 | Hillman,4,0,0,4 216 | Ravenna,4,0,0,4 217 | Dexter,4,0,0,4 218 | New Haven,4,0,0,4 219 | Stevensville,4,0,0,4 220 | Port Sanilac,4,0,0,4 221 | Zeeland,4,0,0,4 222 | Kingsford,4,0,0,4 223 | Middleville,4,0,0,4 224 | Farmington,4,0,0,4 225 | Muskegon Heights,4,0,0,4 226 | Elk Rapids,4,0,0,4 227 | New Buffalo,4,0,0,4 228 | Eaton Rapids,4,0,0,4 229 | Galesburg,4,0,0,4 230 | Cass City,3,0,0,3 231 | Caledonia,3,0,0,3 232 | Bellaire,3,0,0,3 233 | Saugatuck,3,0,0,3 234 | South Lyon,3,0,0,3 235 | Springport,3,0,0,3 236 | Walled Lake,3,0,0,3 237 | Au Gres,3,0,0,3 238 | Kingston,3,0,1,2 239 | Kalkaska,3,0,0,3 240 | Carsonville,3,0,0,3 241 | Zilwaukee,3,0,0,3 242 | Almont,3,0,0,3 243 | Menominee,3,0,0,3 244 | North Branch,3,0,0,3 245 | Midland,3,0,0,3 246 | Baldwin,3,0,1,2 247 | Clarkston,3,0,0,3 248 | Birmingham,3,0,1,2 249 | Unknown Community,3,0,0,3 250 | Spring Lake,3,0,0,3 251 | Allegan,3,0,0,3 252 | Mulliken,3,0,0,3 253 | Blissfield,3,0,0,3 254 | Morrice,3,0,0,3 255 | Douglas,3,0,0,3 256 | Milford,3,0,0,3 257 | Berrien Springs,3,0,0,3 258 | Ontonagon,3,0,0,3 259 | Mesick,3,0,0,3 260 | Kent City,3,0,0,3 261 | Kingsley,3,0,0,3 262 | Allen Park,3,0,0,3 263 | South Rockwood,3,0,0,3 264 | Lathrup Village,3,0,0,3 265 | Northport,3,0,0,3 266 | Emmett,3,0,0,3 267 | Southgate,3,0,0,3 268 | Quincy,3,0,1,2 269 | Mt. Clemens,2,0,0,2 270 | Clayton,2,0,0,2 271 | Onsted,2,0,0,2 272 | Barton Hills,2,0,0,2 273 | Plymouth,2,0,0,2 274 | Vassar,2,0,0,2 275 | Deckerville,2,0,0,2 276 | Mendon,2,0,0,2 277 | Laingsburg,2,0,0,2 278 | St. Charles,2,0,0,2 279 | Frankenmuth,2,0,0,2 280 | Birch Run,2,0,0,2 281 | Posen,2,0,0,2 282 | Onaway,2,0,0,2 283 | New Lothrop,2,0,0,2 284 | Vernon,2,0,0,2 285 | LeRoy,2,0,0,2 286 | Rose City,2,0,0,2 287 | Pentwater,2,0,0,2 288 | Sylvan Lake,2,0,0,2 289 | Hartford,2,0,0,2 290 | Utica,2,0,0,2 291 | Manchester,2,0,0,2 292 | Casnovia,2,0,0,2 293 | Pierson,2,0,0,2 294 | Howard City,2,0,0,2 295 | Carson City,2,0,0,2 296 | Carleton,2,0,0,2 297 | Lake City,2,0,0,2 298 | Dearborn Heights,2,0,0,2 299 | Carney,2,0,0,2 300 | Morley,2,0,0,2 301 | Freesoil,2,0,0,2 302 | Fountain,2,0,0,2 303 | Custer,2,0,0,2 304 | Kaleva,2,0,0,2 305 | New Era,2,0,0,2 306 | Harrisville,2,0,0,2 307 | Port Hope,2,0,0,2 308 | Davison,2,0,0,2 309 | Niles,2,0,0,2 310 | Cassopolis,2,0,0,2 311 | Breckenridge,2,0,0,2 312 | Lyons,2,0,0,2 313 | Bronson,2,0,0,2 314 | Omer,2,0,0,2 315 | Twining,2,0,0,2 316 | Houghton,2,0,0,2 317 | L'Anse,2,0,0,2 318 | Shepherd,2,0,0,2 319 | Shoreham,2,0,0,2 320 | Grand Beach,2,0,0,2 321 | Gaastra,2,0,0,2 322 | Auburn,2,0,0,2 323 | Benton Harbor,2,0,0,2 324 | Leslie,2,0,0,2 325 | Belding,2,0,0,2 326 | Elberta,2,0,0,2 327 | Pewamo,2,0,0,2 328 | Otisville,2,0,0,2 329 | Richland,2,0,0,2 330 | Honor,2,0,0,2 331 | Vermontville,2,0,0,2 332 | Olivet,2,0,0,2 333 | Otsego,2,0,0,2 334 | Farwell,2,0,0,2 335 | Lansing,2,0,0,2 336 | Grayling,2,0,0,2 337 | Sparta,2,0,0,2 338 | St. Johns,2,0,0,2 339 | Fennville,2,0,0,2 340 | Eagle,1,0,0,1 341 | Chesaning,1,0,0,1 342 | Lake Linden,1,0,0,1 343 | Bellevue,1,0,0,1 344 | North Muskegon,1,0,0,1 345 | Lakewood Club,1,0,0,1 346 | Vanderbilt,1,0,0,1 347 | Watervliet,1,0,0,1 348 | Kinde,1,0,0,1 349 | Gagetown,1,0,0,1 350 | Galien,1,0,0,1 351 | Elsie,1,0,0,1 352 | Hesperia,1,0,0,1 353 | Sheridan,1,0,0,1 354 | Garden,1,0,0,1 355 | Fowler,1,0,0,1 356 | Webberville,1,0,0,1 357 | Luna Pier,1,0,0,1 358 | Estral Beach,1,0,0,1 359 | Thompsonville,1,0,0,1 360 | Millersburg,1,0,0,1 361 | Merrill,1,0,0,1 362 | Capac,1,0,0,1 363 | Hancock,1,0,1,0 364 | Forestville,1,0,0,1 365 | Richmond,1,0,0,1 366 | Mackinaw City,1,0,0,1 367 | Boyne Falls,1,0,0,1 368 | Constantine,1,0,0,1 369 | Pleasant Ridge,1,0,0,1 370 | Ironwood,1,0,0,1 371 | Edwardsburg,1,0,0,1 372 | Dowagiac,1,0,0,1 373 | Northville,1,0,0,1 374 | Leonard,1,0,0,1 375 | Tekonsha,1,0,0,1 376 | Oakley,1,0,0,1 377 | Marlette,1,0,0,1 378 | Athens,1,0,0,1 379 | Manistique,1,0,0,1 380 | North Adams,1,0,0,1 381 | Reading,1,0,0,1 382 | Bancroft,1,0,0,1 383 | Bingham Farms,1,0,0,1 384 | Marion,1,0,0,1 385 | Lennon,1,0,0,1 386 | Wolverine,1,0,0,1 387 | White Cloud,1,0,0,1 388 | Metamora,1,0,0,1 389 | Parchment,1,0,0,1 390 | Caspian,1,0,0,1 391 | Clare,1,0,0,1 392 | Brooklyn,1,0,0,1 393 | Parma,1,0,0,1 394 | Copemish,1,0,0,1 395 | Bear Lake,1,0,0,1 396 | St. Clair Shores,1,0,0,1 397 | Roseville,1,0,0,1 398 | Inkster,1,0,0,1 399 | East Grand Rapids,1,0,0,1 400 | Sand Lake,1,0,0,1 401 | South Haven,1,0,0,1 402 | Morenci,1,0,0,1 403 | Clinton,1,0,0,1 404 | Wayne,1,0,0,1 405 | Buckley,1,0,0,1 406 | Traverse City,1,0,0,1 407 | Clifford,1,0,0,1 408 | Harrietta,1,0,0,1 409 | Manton,1,0,0,1 410 | Milan,1,0,0,1 411 | Ovid,1,0,0,1 412 | Pinconning,1,0,0,1 413 | Saranac,1,0,0,1 414 | Powers,1,0,0,1 415 | Stanwood,1,0,0,1 416 | Stephenson,1,0,0,1 417 | Essexville,1,0,0,1 418 | Daggett,1,0,0,1 419 | -------------------------------------------------------------------------------- /pytorch/workshop_neural_net.md: -------------------------------------------------------------------------------- 1 | \titlepage 2 | ## Deep Neural Networks (DNNs) 3 | 4 | - A DNN is a mathematical function inspired by neural networks in the 5 | brain. 6 | 7 | - Input layer (features), hidden layers, output layer (targets). 8 | 9 | - Your data determines number of features and targets. 10 | 11 | - You choose number of hidden layers and "neurons" (activation units) 12 | in each hidden layer. 13 | 14 | \centering 15 | ![](DNN.png){width="50%"} 16 | 17 | ## Deep Neural Networks (DNNs), cont'd 18 | 19 | - Hidden layers have variables (weights, biases) that are trained. 20 | 21 | - Mathematical structure: Composite of nonlinear activation functions 22 | acting on matrix/vector operations, e.g. 23 | $$f(x) = A_2{\color{red}g(A_1{\color{blue}g(A_0x+b_0)}+b_1)}+b_2$$ 24 | 25 | \centering 26 | ![](DNN_activations.png){width="\textwidth"} 27 | 28 | ## Training DNNs 29 | 30 | - Training a DNN means optimizing the weights and biases to "fit" 31 | given data 32 | 33 | - i.e. minimize error between DNN prediction and the given data 34 | 35 | - Optimization: Think of mountains and valleys. Your location is like 36 | the value of the weights/biases. Your elevation is like the value of 37 | the error. As you "walk down the mountain", you are changing the 38 | values of the weights/biases to decrease the value of the error. 39 | 40 | \centering 41 | ![](mtn.png){width="50%"} 42 | 43 | ## Training DNNs, cont'd 44 | 45 | - Usually a variant of **stochastic gradient descent**: 46 | 47 | - **Gradient**: Points toward steepest slope 48 | 49 | - **Gradient descent** method: Take steps down steepest slope to 50 | get to minimum 51 | 52 | - **Stochastic gradient descent**: Calculate the error based on a 53 | small number of data (a **batch**) instead of the entire data 54 | set 55 | 56 | - You choose: step size (learning rate), batch size 57 | 58 | \centering 59 | ![](mtn.png){width="50%"} 60 | 61 | ## playground.tensorflow.org 62 | 63 | \centering 64 | ![](playground.png){width="\textwidth"} 65 | 66 | Note: playground.tensorflow.org is an educational tool. It does not 67 | actually use the TensorFlow library, nor can you use it to train with 68 | your data. 69 | 70 | ## Underfitting (high bias) 71 | 72 | Symptoms: 73 | 74 | - High training and testing error 75 | 76 | Possible treatments: 77 | 78 | - Make the model larger (more layers, more neurons) 79 | 80 | - Increase the number of features, artificially if necessary (e.g. 81 | $x_1x_2$, $\sin(x)$, etc.) 82 | 83 | - More training 84 | 85 | \centering 86 | ![](underfitting.png){width="50%"} 87 | 88 | \vspace{.5cm} 89 | ## Overfitting (high variance) 90 | 91 | Symptoms: 92 | 93 | - Low training error, high testing error 94 | 95 | - (Made worse by noisy data) 96 | 97 | Possible treatments: 98 | 99 | - More data 100 | 101 | - Regularization (L1, L2, dropout) 102 | 103 | - Less training (early stopping) 104 | 105 | - Simplify model (use w/ caution) 106 | 107 | \centering 108 | ![](overfitting2.png){width="50%"} 109 | 110 | \vspace{.5cm} 111 | ## Regularization 112 | 113 | - Regularization smooths the model; reduces complexity in the output 114 | ([Wikipedia](https://en.wikipedia.org/wiki/Regularization_(mathematics))). 115 | 116 | - In neural networks, this is done by keeping the weights at a 117 | similar, low magnitude. 118 | 119 | - L1 regularization adds the L1 norm of the weights to the loss. 120 | 121 | - L2 regularization adds the L2 norm of the weights (more sensitive to 122 | outliers). 123 | 124 | - Dropout randomly and temporarily drops weights to zero during 125 | training. 126 | 127 | \centering 128 | ![](L2reg.png){width="50%"} 129 | 130 | ## playground.tensorflow.org 131 | 132 | \centering 133 | ![](playground.png){width="\textwidth"} 134 | 135 | Note: playground.tensorflow.org is an educational tool. It does not 136 | actually use the TensorFlow library, nor can you use it to train with 137 | your data. 138 | 139 | ## Nonlinear regression 140 | 141 | - Begin with example of nonlinear regression. 142 | 143 | - Use a standard DNN to map continuous inputs to continuous outputs. 144 | 145 | - Data in example has two inputs, one output (slices parallel to 146 | x-axis are parabolic, slices parallel to y-axis are sinusoidal). 147 | 148 | \centering 149 | ![](DNNRegressor_data.png){width="50%"} 150 | 151 | ## Load data 152 | 153 | ## Build the model 154 | 155 | Define the structure of the DNN. Here, we define two hidden layers, with 156 | 5 neurons in each layer. 157 | 158 | We also specify the activation function here. The `relu` function is 159 | commonly used, but you can use others (examples: 160 | [Wikipedia](https://en.wikipedia.org/wiki/Activation_function)): 161 | 162 | \vspace{.5cm} 163 | \hspace*{10pt} 164 | `sigmoid, softplus, tanh`, etc. 165 | 166 | \vspace{.5cm} 167 | Note that no activation is used on the final layer. 168 | 169 | \vspace{.5cm} 170 | Experiment with the hidden units and activation function. 171 | 172 | ## L1, L2 regularization 173 | 174 | ## Dropout 175 | 176 | ## Training 177 | 178 | Stochastic gradient descent methods use shuffled mini-batches instead of 179 | the entire data set for each training iteration. We specify batch size, 180 | and how many epochs to train the code. 181 | 182 | \vspace{.5cm} 183 | An epoch is the number of training iterations required to go through the 184 | entire training set once. For example, 1,000 datapoints and a batch size 185 | of 10, one epoch would take 100 training iteration. 186 | 187 | \vspace{.5cm} 188 | We can also specify validation data to see how the validation loss 189 | changes during training. 190 | 191 | Experiment with batch size and number of epochs. 192 | 193 | ## Results 194 | 195 | With good settings in the code (not the current settings), we can get 196 | the following fit: 197 | 198 | \centering 199 | ![](goodRegression.png){width="80%"} 200 | 201 | ## Exercise 1 202 | 203 | - Run the code. 204 | 205 | - Identify the problem (underfitting or overfitting). 206 | 207 | - Try possible solutions to get a better fit. 208 | 209 | ## Classification 210 | 211 | - Consider the problem of classification. 212 | 213 | - Maps feature values to a category. 214 | 215 | - Use the example of irises 216 | 217 | - Four features: sepal length, sepal width, petal length, petal 218 | width 219 | 220 | - Three classes: Iris setosa, Iris virginica, Iris versicolor 221 | 222 | \centering 223 | ![Iris versicolor, by Danielle Langlois (CC BY-SA 3.0), 224 | [commons.wikimedia.org/w/index.php?curid=248095](commons.wikimedia.org/w/index.php?curid=248095)](iris_versicolor.jpg){width="40%"} 225 | 226 | ## Import data 227 | 228 | Data label format: Usually given as 0, 1, or 2; we need it to be [1,0,0], [0,1,0], or [0,0,1]. 229 | 230 | ## Build the model 231 | 232 | Define the structure of the DNN. Here, we define three hidden layers, 233 | with 1000, 500, and 70 neurons in each respective layer. 234 | 235 | Since this is classification, apply the 236 | [softmax](https://en.wikipedia.org/wiki/Softmax_function) function to 237 | the last layer. This transforms the output to be a vector of 238 | probabilities that sum to one: $$\begin{aligned} 239 | p_i &= \frac{\exp(f_i)}{\sum\limits_j \exp(f_j)}\end{aligned}$$ 240 | where $p_i$ is probability of category $i$ being true, $f_i$ is $i$-th 241 | component of the final layer's output. 242 | 243 | ## Loss 244 | 245 | We again define the loss function and the optimizer. For classification, 246 | we use the [cross entropy](https://en.wikipedia.org/wiki/Cross_entropy) 247 | loss function. We are also interested in the accuracy metric (% 248 | correctly classified), in addition to the loss. 249 | 250 | $$\begin{aligned} 251 | \mathrm{cross\_entropy} = \frac{1}{n_\mathrm{samples}}\sum\limits_j^{n_\mathrm{samples}}\sum\limits_i^{n_\mathrm{classes}}\hat{p}_i^j\log(p_i^j)\end{aligned}$$ 252 | where $\hat{p}_i^j$ is the data and $p_i^j$ is the prediction for class 253 | $i$, sample $j$. 254 | 255 | ## Training 256 | 257 | Training is done as before. 258 | 259 | ## Exercise 2 260 | 261 | - Run the code. 262 | 263 | - Identify the problem (underfitting or overfitting). 264 | 265 | - Try possible solutions to get a better result. 266 | 267 | ## Convolutional Neural Network (CNN) 268 | 269 | - Image recognition is often done with CNNs. 270 | 271 | - CNNs perform classification by adding new types of layers, primarily 272 | "convolutions" and "pooling". 273 | 274 | - The "convolution": scanning a filter across the image. 275 | 276 | - The "pooling": take the most significant features from a group of 277 | pixels. 278 | 279 | - Some nice explanations of CNNs by [Adam 280 | Geitgey](https://medium.com/@ageitgey/machine-learning-is-fun-part-3-deep-learning-and-convolutional-neural-networks-f40359318721) 281 | and 282 | [ujjwalkarn](https://ujjwalkarn.me/2016/08/11/intuitive-explanation-convnets/). 283 | 284 | - Our example will use the [MNIST](http://yann.lecun.com/exdb/mnist/) 285 | database of handwritten digits. 286 | 287 | - Based on [this 288 | example](https://github.com/keras-team/keras/blob/master/examples/mnist_cnn.py). 289 | 290 | \centering 291 | ![](mnist_0-9.png){width="30%"} 292 | 293 | ## Initialize model, Normalize input 294 | 295 | We shift and normalize the inputs for better fitting. 296 | 297 | We also define the input shape. The images are 28 by 28 pixels, with a 298 | grayscale value. This means each image is defined by a 3D tensor, 299 | $28\times28\times1$ (a color image of the same size would be 300 | $28\times28\times3$). 301 | 302 | ## Convolutional layer 303 | 304 | The first convolutional layer is applied. This involves sweeping a 305 | filter across the image. (Gives \"translational invariance.\") 306 | 307 | 308 | 309 | We use 4 filters with a size of $5\times5$ pixels, with ReLU activation. 310 | 311 | ## Max pooling 312 | 313 | Max pooling involves looking at clusters of the output (in this example, 314 | $2\times2$ clusters), and sets the maximum filter value as the value for 315 | the cluster. 316 | 317 | 318 | 319 | I.e. a "match" anywhere in the cluster $\implies$ a "match" for the 320 | cluster. 321 | 322 | \vspace{0.5cm} 323 | Since we are also using stride of 2, the clusters don't overlap. 324 | 325 | Pooling reduces the size of the neural net, speeding up computations. 326 | 327 | ## 2nd convolution and pooling 328 | 329 | A second convolutional layer, followed by max pooling, is used. 330 | 331 | ## Fully-connected layer 332 | 333 | The 3D tensor is converted back to a 1D tensor to act as input for a 334 | dense or fully-connected layer, the same type used with the previous 335 | regression and classification examples. 336 | 337 | ## Dropout, Softmax 338 | 339 | We add a dropout layer here. In this example, dropout happens at a rate 340 | of 40% (i.e. 40% of weights are temporarily set to zero at each training 341 | iteration). 342 | 343 | As in the Iris classification problem, we finish with a dense layer and 344 | softmax activation function to return probabilities for each category. 345 | 346 | ## Compile, Train 347 | 348 | We compile and train as in the previous classification example: 349 | 350 | ## Exercise 3 351 | 352 | - Run the file. 353 | 354 | - Modify the CNN and training to see how high of a validation accuracy 355 | you can get. 356 | -------------------------------------------------------------------------------- /pdf-data-extraction/pdfminer_workshop.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# PDF Text Mining using PDFMiner" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "## Installation\n", 15 | "\n", 16 | "`pip install pdfminer.six`" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "## How to Use\n", 24 | "Below is an edited code example from [Tim Arnold's blog on *Manipulating PDFs with Python*]( https://www.binpress.com/tutorial/manipulating-pdfs-with-python/167). It has been modified to be compatible with Python 3.X. Most of it is boilerplate stuff that does not need to change. The only change that needs to be done is the filename and the page(s) of interest. " 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 1, 30 | "metadata": { 31 | "collapsed": true 32 | }, 33 | "outputs": [], 34 | "source": [ 35 | "from io import StringIO\n", 36 | "from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter\n", 37 | "from pdfminer.converter import TextConverter\n", 38 | "from pdfminer.layout import LAParams\n", 39 | "from pdfminer.pdfpage import PDFPage " 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "metadata": {}, 45 | "source": [ 46 | "Identify file and page of interest" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 2, 52 | "metadata": { 53 | "collapsed": true 54 | }, 55 | "outputs": [], 56 | "source": [ 57 | "filename = 'MDOT_fastfacts02-2011_345554_7.pdf'\n", 58 | "pagenums = [3] # empty list does all pages" 59 | ] 60 | }, 61 | { 62 | "cell_type": "markdown", 63 | "metadata": {}, 64 | "source": [ 65 | "Create instances of classes necessary to read pdf" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": 3, 71 | "metadata": { 72 | "collapsed": true 73 | }, 74 | "outputs": [], 75 | "source": [ 76 | "output = StringIO()\n", 77 | "manager = PDFResourceManager()\n", 78 | "converter = TextConverter(manager, output, laparams=LAParams())\n", 79 | "interpreter = PDFPageInterpreter(manager, converter)" 80 | ] 81 | }, 82 | { 83 | "cell_type": "markdown", 84 | "metadata": {}, 85 | "source": [ 86 | "Open the pdf and read & process page(s) of interest" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": 4, 92 | "metadata": {}, 93 | "outputs": [], 94 | "source": [ 95 | "with open(filename, 'rb') as fin:\n", 96 | " for page in PDFPage.get_pages(fin, pagenums):\n", 97 | " interpreter.process_page(page)" 98 | ] 99 | }, 100 | { 101 | "cell_type": "markdown", 102 | "metadata": {}, 103 | "source": [ 104 | "Get output string" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": 5, 110 | "metadata": { 111 | "collapsed": true 112 | }, 113 | "outputs": [], 114 | "source": [ 115 | "text = output.getvalue()\n", 116 | "converter.close()\n", 117 | "output.close()" 118 | ] 119 | }, 120 | { 121 | "cell_type": "markdown", 122 | "metadata": {}, 123 | "source": [ 124 | "Let's look at the output text string" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": 6, 130 | "metadata": {}, 131 | "outputs": [ 132 | { 133 | "data": { 134 | "text/plain": [ 135 | "'Fast Facts\\n\\n201 7\\n\\nCARPOOL LOTS\\n\\n2015 MICHIGAN \\nSTATE REVENUE PACKAGE\\n\\nn There are 261 carpool parking lots located across \\n\\nthe state, 23 of which are public-private partnerships. \\nIncluded in the public-private partnerships are 17 \\nlocations that MDOT has partnered with Meijer Corp. \\nto provide carpool parking spaces in Meijer parking lots \\nlocated near the highway.\\n\\nn MDOT continues its efforts to provide bike racks at \\n\\ncarpool lots, and to attract transit service to lots \\nwhere appropriate.\\n\\nCOST OF ROAD CONSTRUCTION \\n\\nRoadway construction costs are typically based on standard \\ndesign characteristics, materials, and the type of work performed. \\nGeneral estimates are provided for the average cost per lane mile \\nof major work by roadway type, and material costs. \\n\\nAverage Cost Per Lane Mile by \\nMajor Work Type for Various Networks \\n(2016 figures; in millions) \\n\\nWork Type \\n\\nReconstruction Rehabilitation Average R&R\\n\\n \\n \\n\\nCombined \\nStatewide\\nFreeway\\nNon-Freeway\\nStatewide \\nUrban\\nStatewide \\nRural\\n\\n$2.0 \\n\\n$2.0 \\n$1.9 \\n\\n$2.1 \\n\\n$1.2 \\n\\n$0.6 \\n\\n$0.8 \\n$0.5 \\n\\n$0.7 \\n\\n$0.5 \\n\\n$1.0 \\n\\n$1.3 \\n$0.8 \\n\\n$1.2 \\n\\n$0.6 \\n\\nCost\\n\\n $64.18\\n $48.04\\n $1.31\\n $1.06\\n\\nMaterial Cost for Construction \\n(2016 Year-to-Date)\\n\\nMaterial\\nHot Mix Asphalt (HMA) per Ton\\nConcrete per Square Yard\\nStructural Steel per Pound\\nReinforcement Steel per Pound\\n\\nState Transportation Funding Package\\nOn Nov. 10, Gov. Snyder signed a package of \\ntransportation bills approved by the Legislature. In the \\nshort term, the legislation will:\\n\\n• Provide $450 million in additional fuel tax \\n\\nrevenues, beginning in January 2017. The tax \\non gasoline and diesel fuel will rise to 26.3 cents \\nat that time, as the legislation also provides for \\ndiesel parity.\\n\\n• Provide $190 million from a 20 percent increase \\n\\nin vehicle registration fees, also beginning in \\nJanuary 2017.\\n\\nThis $600 million in new revenue will be distributed to \\nMDOT, county road commissions, cities and villages, \\nand the Comprehensive Transportation Fund through \\nthe existing Act 51 formula, providing a roughly 30 \\npercent increase by 2018.\\nThe new revenue is expected to generate an average \\nof more than 4,000 jobs per year in the first two years. \\nIt will also help address the need to repair and maintain \\nMichigan’s existing transportation systems.\\nBeginning in 2019, the Legislature intends to appropriate \\nincome tax revenue to roads agencies, according \\nto the Act 51 formula, excluding the Comprehensive \\nTransportation Fund, in these amounts:\\n \\n \\n \\nBeginning in 2016, the legislation adds transparency \\nand accountability:\\n\\n• 2019…………………..…..$150 million\\n• 2020……………………....$325 million\\n• 2021 and thereafter……..$600 million\\n\\n• Administrative Expenses: MDOT \\n\\nadministrative expenses, previously capped at \\n10 percent, are now limited to 8 percent of its \\nbudget.\\n\\n• Pavement Warranties: Road agencies are \\nrequired to buy pavement warranties, where \\nappropriate, for projects costing more than \\n$2 million. \\n\\n• Competitive Bidding: To reduce project costs, \\nall agencies are required to competitively bid out \\nprojects costing more than $100,000. \\n\\n• Longer-lived Pavements: MDOT will be \\n\\nrequired to prepare a report on the potential for \\nconstructing longer-lived pavements and report \\nto the Legislature by June 2016.\\n\\n4 2017 Fast Facts \\n\\n (Updated 1/2017)\\n\\n\\x0c'" 136 | ] 137 | }, 138 | "execution_count": 6, 139 | "metadata": {}, 140 | "output_type": "execute_result" 141 | } 142 | ], 143 | "source": [ 144 | "text" 145 | ] 146 | }, 147 | { 148 | "cell_type": "markdown", 149 | "metadata": {}, 150 | "source": [ 151 | "Pretty Print Text" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": 7, 157 | "metadata": {}, 158 | "outputs": [ 159 | { 160 | "name": "stdout", 161 | "output_type": "stream", 162 | "text": [ 163 | "('Fast Facts\\n'\n", 164 | " '\\n'\n", 165 | " '201 7\\n'\n", 166 | " '\\n'\n", 167 | " 'CARPOOL LOTS\\n'\n", 168 | " '\\n'\n", 169 | " '2015 MICHIGAN \\n'\n", 170 | " 'STATE REVENUE PACKAGE\\n'\n", 171 | " '\\n'\n", 172 | " 'n There are 261 carpool parking lots located across \\n'\n", 173 | " '\\n'\n", 174 | " 'the state, 23 of which are public-private partnerships. \\n'\n", 175 | " 'Included in the public-private partnerships are 17 \\n'\n", 176 | " 'locations that MDOT has partnered with Meijer Corp. \\n'\n", 177 | " 'to provide carpool parking spaces in Meijer parking lots \\n'\n", 178 | " 'located near the highway.\\n'\n", 179 | " '\\n'\n", 180 | " 'n MDOT continues its efforts to provide bike racks at \\n'\n", 181 | " '\\n'\n", 182 | " 'carpool lots, and to attract transit service to lots \\n'\n", 183 | " 'where appropriate.\\n'\n", 184 | " '\\n'\n", 185 | " 'COST OF ROAD CONSTRUCTION \\n'\n", 186 | " '\\n'\n", 187 | " 'Roadway construction costs are typically based on standard \\n'\n", 188 | " 'design characteristics, materials, and the type of work performed. \\n'\n", 189 | " 'General estimates are provided for the average cost per lane mile \\n'\n", 190 | " 'of major work by roadway type, and material costs. \\n'\n", 191 | " '\\n'\n", 192 | " 'Average Cost Per Lane Mile by \\n'\n", 193 | " 'Major Work Type for Various Networks \\n'\n", 194 | " '(2016 figures; in millions) \\n'\n", 195 | " '\\n'\n", 196 | " 'Work Type \\n'\n", 197 | " '\\n'\n", 198 | " 'Reconstruction Rehabilitation Average R&R\\n'\n", 199 | " '\\n'\n", 200 | " ' \\n'\n", 201 | " ' \\n'\n", 202 | " '\\n'\n", 203 | " 'Combined \\n'\n", 204 | " 'Statewide\\n'\n", 205 | " 'Freeway\\n'\n", 206 | " 'Non-Freeway\\n'\n", 207 | " 'Statewide \\n'\n", 208 | " 'Urban\\n'\n", 209 | " 'Statewide \\n'\n", 210 | " 'Rural\\n'\n", 211 | " '\\n'\n", 212 | " '$2.0 \\n'\n", 213 | " '\\n'\n", 214 | " '$2.0 \\n'\n", 215 | " '$1.9 \\n'\n", 216 | " '\\n'\n", 217 | " '$2.1 \\n'\n", 218 | " '\\n'\n", 219 | " '$1.2 \\n'\n", 220 | " '\\n'\n", 221 | " '$0.6 \\n'\n", 222 | " '\\n'\n", 223 | " '$0.8 \\n'\n", 224 | " '$0.5 \\n'\n", 225 | " '\\n'\n", 226 | " '$0.7 \\n'\n", 227 | " '\\n'\n", 228 | " '$0.5 \\n'\n", 229 | " '\\n'\n", 230 | " '$1.0 \\n'\n", 231 | " '\\n'\n", 232 | " '$1.3 \\n'\n", 233 | " '$0.8 \\n'\n", 234 | " '\\n'\n", 235 | " '$1.2 \\n'\n", 236 | " '\\n'\n", 237 | " '$0.6 \\n'\n", 238 | " '\\n'\n", 239 | " 'Cost\\n'\n", 240 | " '\\n'\n", 241 | " ' $64.18\\n'\n", 242 | " ' $48.04\\n'\n", 243 | " ' $1.31\\n'\n", 244 | " ' $1.06\\n'\n", 245 | " '\\n'\n", 246 | " 'Material Cost for Construction \\n'\n", 247 | " '(2016 Year-to-Date)\\n'\n", 248 | " '\\n'\n", 249 | " 'Material\\n'\n", 250 | " 'Hot Mix Asphalt (HMA) per Ton\\n'\n", 251 | " 'Concrete per Square Yard\\n'\n", 252 | " 'Structural Steel per Pound\\n'\n", 253 | " 'Reinforcement Steel per Pound\\n'\n", 254 | " '\\n'\n", 255 | " 'State Transportation Funding Package\\n'\n", 256 | " 'On Nov. 10, Gov. Snyder signed a package of \\n'\n", 257 | " 'transportation bills approved by the Legislature. In the \\n'\n", 258 | " 'short term, the legislation will:\\n'\n", 259 | " '\\n'\n", 260 | " '• Provide $450 million in additional fuel tax \\n'\n", 261 | " '\\n'\n", 262 | " 'revenues, beginning in January 2017. The tax \\n'\n", 263 | " 'on gasoline and diesel fuel will rise to 26.3 cents \\n'\n", 264 | " 'at that time, as the legislation also provides for \\n'\n", 265 | " 'diesel parity.\\n'\n", 266 | " '\\n'\n", 267 | " '• Provide $190 million from a 20 percent increase \\n'\n", 268 | " '\\n'\n", 269 | " 'in vehicle registration fees, also beginning in \\n'\n", 270 | " 'January 2017.\\n'\n", 271 | " '\\n'\n", 272 | " 'This $600 million in new revenue will be distributed to \\n'\n", 273 | " 'MDOT, county road commissions, cities and villages, \\n'\n", 274 | " 'and the Comprehensive Transportation Fund through \\n'\n", 275 | " 'the existing Act 51 formula, providing a roughly 30 \\n'\n", 276 | " 'percent increase by 2018.\\n'\n", 277 | " 'The new revenue is expected to generate an average \\n'\n", 278 | " 'of more than 4,000 jobs per year in the first two years. \\n'\n", 279 | " 'It will also help address the need to repair and maintain \\n'\n", 280 | " 'Michigan’s existing transportation systems.\\n'\n", 281 | " 'Beginning in 2019, the Legislature intends to appropriate \\n'\n", 282 | " 'income tax revenue to roads agencies, according \\n'\n", 283 | " 'to the Act 51 formula, excluding the Comprehensive \\n'\n", 284 | " 'Transportation Fund, in these amounts:\\n'\n", 285 | " ' \\n'\n", 286 | " ' \\n'\n", 287 | " ' \\n'\n", 288 | " 'Beginning in 2016, the legislation adds transparency \\n'\n", 289 | " 'and accountability:\\n'\n", 290 | " '\\n'\n", 291 | " '• 2019…………………..…..$150 million\\n'\n", 292 | " '• 2020……………………....$325 million\\n'\n", 293 | " '• 2021 and thereafter……..$600 million\\n'\n", 294 | " '\\n'\n", 295 | " '• Administrative Expenses: MDOT \\n'\n", 296 | " '\\n'\n", 297 | " 'administrative expenses, previously capped at \\n'\n", 298 | " '10 percent, are now limited to 8 percent of its \\n'\n", 299 | " 'budget.\\n'\n", 300 | " '\\n'\n", 301 | " '• Pavement Warranties: Road agencies are \\n'\n", 302 | " 'required to buy pavement warranties, where \\n'\n", 303 | " 'appropriate, for projects costing more than \\n'\n", 304 | " '$2 million. \\n'\n", 305 | " '\\n'\n", 306 | " '• Competitive Bidding: To reduce project costs, \\n'\n", 307 | " 'all agencies are required to competitively bid out \\n'\n", 308 | " 'projects costing more than $100,000. \\n'\n", 309 | " '\\n'\n", 310 | " '• Longer-lived Pavements: MDOT will be \\n'\n", 311 | " '\\n'\n", 312 | " 'required to prepare a report on the potential for \\n'\n", 313 | " 'constructing longer-lived pavements and report \\n'\n", 314 | " 'to the Legislature by June 2016.\\n'\n", 315 | " '\\n'\n", 316 | " '4 2017 Fast Facts \\n'\n", 317 | " '\\n'\n", 318 | " ' (Updated 1/2017)\\n'\n", 319 | " '\\n'\n", 320 | " '\\x0c')\n" 321 | ] 322 | } 323 | ], 324 | "source": [ 325 | "from pprint import pprint as prettyprint\n", 326 | "prettyprint(text)" 327 | ] 328 | }, 329 | { 330 | "cell_type": "markdown", 331 | "metadata": {}, 332 | "source": [ 333 | "Write out text to file" 334 | ] 335 | }, 336 | { 337 | "cell_type": "code", 338 | "execution_count": 8, 339 | "metadata": {}, 340 | "outputs": [], 341 | "source": [ 342 | "savefile = filename.replace('pdf','txt')\n", 343 | "with open(savefile,'w') as fout:\n", 344 | " fout.write(text)" 345 | ] 346 | }, 347 | { 348 | "cell_type": "markdown", 349 | "metadata": {}, 350 | "source": [ 351 | "# Conclusion\n", 352 | "\n", 353 | "Trying to reconstruct tables from pdf text mining tools looks like a formatting nightmare in the same realm as copy and paste." 354 | ] 355 | }, 356 | { 357 | "cell_type": "code", 358 | "execution_count": null, 359 | "metadata": { 360 | "collapsed": true 361 | }, 362 | "outputs": [], 363 | "source": [] 364 | } 365 | ], 366 | "metadata": { 367 | "kernelspec": { 368 | "display_name": "Python 3", 369 | "language": "python", 370 | "name": "python3" 371 | }, 372 | "language_info": { 373 | "codemirror_mode": { 374 | "name": "ipython", 375 | "version": 3 376 | }, 377 | "file_extension": ".py", 378 | "mimetype": "text/x-python", 379 | "name": "python", 380 | "nbconvert_exporter": "python", 381 | "pygments_lexer": "ipython3", 382 | "version": "3.5.1" 383 | } 384 | }, 385 | "nbformat": 4, 386 | "nbformat_minor": 1 387 | } 388 | -------------------------------------------------------------------------------- /pytorch/Workshop_CNN.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "Workshop CNN.ipynb", 7 | "provenance": [], 8 | "include_colab_link": true 9 | }, 10 | "kernelspec": { 11 | "name": "python3", 12 | "display_name": "Python 3" 13 | } 14 | }, 15 | "cells": [ 16 | { 17 | "cell_type": "markdown", 18 | "metadata": { 19 | "id": "view-in-github", 20 | "colab_type": "text" 21 | }, 22 | "source": [ 23 | "\"Open" 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": { 29 | "id": "aa28NQ4b50Wk", 30 | "colab_type": "text" 31 | }, 32 | "source": [ 33 | "# Image Classification Problem" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "metadata": { 39 | "id": "WK-LdzWl5vH6", 40 | "colab_type": "code", 41 | "outputId": "9633651c-31d5-4c8e-a623-c1e209426f7e", 42 | "colab": { 43 | "base_uri": "https://localhost:8080/", 44 | "height": 67 45 | } 46 | }, 47 | "source": [ 48 | "import torch\n", 49 | "import torch.nn as nn\n", 50 | "import torch.optim as optim\n", 51 | "import torch.nn.functional as F\n", 52 | "from torch.utils.data import DataLoader\n", 53 | "import torchvision\n", 54 | "from torchvision import datasets, transforms\n", 55 | "import numpy as np\n", 56 | "\n", 57 | "print('Torch version', torch.__version__)\n", 58 | "print('Torchvision version', torchvision.__version__)\n", 59 | "print('Numpy version', np.__version__)" 60 | ], 61 | "execution_count": 1, 62 | "outputs": [ 63 | { 64 | "output_type": "stream", 65 | "text": [ 66 | "Torch version 1.3.1\n", 67 | "Torchvision version 0.4.2\n", 68 | "Numpy version 1.17.4\n" 69 | ], 70 | "name": "stdout" 71 | } 72 | ] 73 | }, 74 | { 75 | "cell_type": "markdown", 76 | "metadata": { 77 | "id": "AKCLbDM754c0", 78 | "colab_type": "text" 79 | }, 80 | "source": [ 81 | "The following should say `cuda:0`. If it does not, we need to go to *Edit* -> *Notebook settings* and change it to a `GPU` from `None`. You only have to do this once per notebook." 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "metadata": { 87 | "id": "a2RWBSbo53bz", 88 | "colab_type": "code", 89 | "outputId": "fa70a535-a31d-405b-90cd-5ccb15a4457a", 90 | "colab": { 91 | "base_uri": "https://localhost:8080/", 92 | "height": 34 93 | } 94 | }, 95 | "source": [ 96 | "device = 'cuda:0' if torch.cuda.is_available() else 'cpu'\n", 97 | "device" 98 | ], 99 | "execution_count": 2, 100 | "outputs": [ 101 | { 102 | "output_type": "execute_result", 103 | "data": { 104 | "text/plain": [ 105 | "'cpu'" 106 | ] 107 | }, 108 | "metadata": { 109 | "tags": [] 110 | }, 111 | "execution_count": 2 112 | } 113 | ] 114 | }, 115 | { 116 | "cell_type": "markdown", 117 | "metadata": { 118 | "id": "DhBlj7GI6Npt", 119 | "colab_type": "text" 120 | }, 121 | "source": [ 122 | "Define a transform to convert image to PyTorch tensor" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "metadata": { 128 | "id": "VIjSGCNv53fT", 129 | "colab_type": "code", 130 | "colab": {} 131 | }, 132 | "source": [ 133 | "tf = transforms.ToTensor() # convert image to PyTorch tensor" 134 | ], 135 | "execution_count": 0, 136 | "outputs": [] 137 | }, 138 | { 139 | "cell_type": "markdown", 140 | "metadata": { 141 | "id": "0p_SPGXQ6PaD", 142 | "colab_type": "text" 143 | }, 144 | "source": [ 145 | "Download training **dataset** and create `DataLoader`" 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "metadata": { 151 | "id": "V5R_cuLZ53ib", 152 | "colab_type": "code", 153 | "colab": {} 154 | }, 155 | "source": [ 156 | "train_loader = DataLoader(datasets.MNIST('data', download=True, train=True, transform=tf),\n", 157 | " batch_size=100, \n", 158 | " shuffle=True)" 159 | ], 160 | "execution_count": 0, 161 | "outputs": [] 162 | }, 163 | { 164 | "cell_type": "markdown", 165 | "metadata": { 166 | "id": "enL0Q9306QBM", 167 | "colab_type": "text" 168 | }, 169 | "source": [ 170 | "Download validation **dataset** and create `DataLoader`\n" 171 | ] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "metadata": { 176 | "id": "ASnI4ZrW53lj", 177 | "colab_type": "code", 178 | "colab": {} 179 | }, 180 | "source": [ 181 | "test_loader = DataLoader(datasets.MNIST('data', download=True, train=False, transform=tf),\n", 182 | " batch_size=100, \n", 183 | " shuffle=True)" 184 | ], 185 | "execution_count": 0, 186 | "outputs": [] 187 | }, 188 | { 189 | "cell_type": "markdown", 190 | "metadata": { 191 | "id": "ttYvEnkb6Qkb", 192 | "colab_type": "text" 193 | }, 194 | "source": [ 195 | "We'll write a python class to define out convolutional neural network." 196 | ] 197 | }, 198 | { 199 | "cell_type": "code", 200 | "metadata": { 201 | "id": "RBZtZhgy6TCk", 202 | "colab_type": "code", 203 | "colab": {} 204 | }, 205 | "source": [ 206 | "class TwoLayerCNN(nn.Module):\n", 207 | " def __init__(self):\n", 208 | " super().__init__()\n", 209 | " self.batchnorm = nn.BatchNorm2d(1)\n", 210 | " self.conv1 = nn.Conv2d(1,4,5) # input image channel, output channels, square kernel size\n", 211 | " self.conv2 = nn.Conv2d(4,16,5)\n", 212 | " self.fc1 = nn.Linear(16*4*4,100) # fully connected, 4x4 image size result from 2 conv layers\n", 213 | " self.fc2 = nn.Linear(100,10)\n", 214 | " \n", 215 | " def forward(self,x):\n", 216 | " x1 = self.batchnorm(x)\n", 217 | " x1 = F.max_pool2d(F.relu(self.conv1(x1)), 2)\n", 218 | " x1 = F.max_pool2d(F.relu(self.conv2(x1)), 2)\n", 219 | " x1 = x1.view(-1, self.num_flat_features(x1))\n", 220 | " x1 = F.dropout(F.relu(self.fc1(x1), 0.4))\n", 221 | " x1 = F.relu(self.fc2(x1))\n", 222 | " return x1\n", 223 | " \n", 224 | " def num_flat_features(self, x):\n", 225 | " size = x.size()[1:] # all dimensions except the batch dimension\n", 226 | " num_features = np.prod(size)\n", 227 | " return num_features" 228 | ], 229 | "execution_count": 0, 230 | "outputs": [] 231 | }, 232 | { 233 | "cell_type": "markdown", 234 | "metadata": { 235 | "id": "M54pEgD06RoL", 236 | "colab_type": "text" 237 | }, 238 | "source": [ 239 | "We create an instance of this class" 240 | ] 241 | }, 242 | { 243 | "cell_type": "code", 244 | "metadata": { 245 | "id": "iVdKpsuh6TS0", 246 | "colab_type": "code", 247 | "outputId": "258fd01d-b5e0-4e50-d9fd-2655d7e04704", 248 | "colab": { 249 | "base_uri": "https://localhost:8080/", 250 | "height": 134 251 | } 252 | }, 253 | "source": [ 254 | "model = TwoLayerCNN().to(device)\n", 255 | "model" 256 | ], 257 | "execution_count": 7, 258 | "outputs": [ 259 | { 260 | "output_type": "execute_result", 261 | "data": { 262 | "text/plain": [ 263 | "TwoLayerCNN(\n", 264 | " (batchnorm): BatchNorm2d(1, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 265 | " (conv1): Conv2d(1, 4, kernel_size=(5, 5), stride=(1, 1))\n", 266 | " (conv2): Conv2d(4, 16, kernel_size=(5, 5), stride=(1, 1))\n", 267 | " (fc1): Linear(in_features=256, out_features=100, bias=True)\n", 268 | " (fc2): Linear(in_features=100, out_features=10, bias=True)\n", 269 | ")" 270 | ] 271 | }, 272 | "metadata": { 273 | "tags": [] 274 | }, 275 | "execution_count": 7 276 | } 277 | ] 278 | }, 279 | { 280 | "cell_type": "markdown", 281 | "metadata": { 282 | "id": "SFRVmzOR6SB7", 283 | "colab_type": "text" 284 | }, 285 | "source": [ 286 | "We'll define a template for our `fit_model` function that contains `train`, `validate`, and `accuracy` functions." 287 | ] 288 | }, 289 | { 290 | "cell_type": "code", 291 | "metadata": { 292 | "id": "gZnvxqPu53rs", 293 | "colab_type": "code", 294 | "colab": {} 295 | }, 296 | "source": [ 297 | "def fit_model(model, loss_fn, optimizer):\n", 298 | " def train(x,y):\n", 299 | " yhat = model(x)\n", 300 | " loss = loss_fn(yhat,y)\n", 301 | " optimizer.zero_grad()\n", 302 | " loss.backward()\n", 303 | " optimizer.step()\n", 304 | " return loss.item(), accuracy(yhat,y)\n", 305 | " \n", 306 | " def validate(x,y):\n", 307 | " yhat = model(x)\n", 308 | " loss = loss_fn(yhat,y)\n", 309 | " return loss.item(), accuracy(yhat,y)\n", 310 | " \n", 311 | " def accuracy(yhat,y):\n", 312 | " probs = np.argmax(yhat.cpu().detach().numpy(), axis=1)\n", 313 | " actual = y.cpu().detach().numpy()\n", 314 | " correct = (probs == actual).sum()\n", 315 | " total = y.shape[0]\n", 316 | " return correct / total \n", 317 | " \n", 318 | " return train, validate" 319 | ], 320 | "execution_count": 0, 321 | "outputs": [] 322 | }, 323 | { 324 | "cell_type": "markdown", 325 | "metadata": { 326 | "id": "qCrMhx8Q6TLd", 327 | "colab_type": "text" 328 | }, 329 | "source": [ 330 | "We define our *loss function*, *learning rate*, and our *optimizer*. We pass this to `fit_model` to return our `train` and `validate` functions." 331 | ] 332 | }, 333 | { 334 | "cell_type": "code", 335 | "metadata": { 336 | "id": "XFBR4YbD53oz", 337 | "colab_type": "code", 338 | "colab": {} 339 | }, 340 | "source": [ 341 | "loss_fn = nn.CrossEntropyLoss()\n", 342 | "learning_rate = 0.01\n", 343 | "optimizer = optim.Adagrad(model.parameters(), lr=learning_rate)\n", 344 | "train, validate = fit_model(model, loss_fn, optimizer)" 345 | ], 346 | "execution_count": 0, 347 | "outputs": [] 348 | }, 349 | { 350 | "cell_type": "markdown", 351 | "metadata": { 352 | "id": "XNVzkMZI6Tam", 353 | "colab_type": "text" 354 | }, 355 | "source": [ 356 | "Here is our training loop with mini-batch processing. We have to move each batch onto the GPU. We also should have a `DataLoader` for the validation dataset but we'll skip that in this case since it is so small." 357 | ] 358 | }, 359 | { 360 | "cell_type": "code", 361 | "metadata": { 362 | "id": "AKk5nZjM6Ths", 363 | "colab_type": "code", 364 | "outputId": "8a867760-9fc7-45de-8398-08b25a395c4c", 365 | "colab": { 366 | "base_uri": "https://localhost:8080/", 367 | "height": 101 368 | } 369 | }, 370 | "source": [ 371 | "epochs = 5\n", 372 | "for epoch in range(epochs):\n", 373 | " # training \n", 374 | " losses, accuracy = [], []\n", 375 | " for i, (xbatch, ybatch) in enumerate(train_loader):\n", 376 | " xbatch = xbatch.to(device)\n", 377 | " ybatch = ybatch.to(device)\n", 378 | " loss, acc = train(xbatch, ybatch)\n", 379 | " losses.append(loss)\n", 380 | " accuracy.append(acc)\n", 381 | " training_loss = np.mean(losses)\n", 382 | " training_accuracy = np.mean(accuracy)\n", 383 | " # validation\n", 384 | " val_losses, val_accuracy = [], []\n", 385 | " for j, (xtest, ytest) in enumerate(test_loader):\n", 386 | " xtest = xtest.to(device)\n", 387 | " ytest = ytest.to(device)\n", 388 | " val_loss, val_acc = validate(xtest, ytest)\n", 389 | " val_losses.append(val_loss)\n", 390 | " val_accuracy.append(val_acc)\n", 391 | " validation_loss = np.mean(val_losses)\n", 392 | " validation_accuracy = np.mean(val_accuracy)\n", 393 | " # print intermediate results\n", 394 | " print(f'{epoch}, {training_loss:.4f}, {training_accuracy:.3f}, {validation_loss:.4f}, {validation_accuracy:.3f}')" 395 | ], 396 | "execution_count": 10, 397 | "outputs": [ 398 | { 399 | "output_type": "stream", 400 | "text": [ 401 | "0, 0.3363, 0.899, 0.1599, 0.954\n", 402 | "1, 0.1516, 0.956, 0.1300, 0.961\n", 403 | "2, 0.1271, 0.963, 0.1067, 0.965\n", 404 | "3, 0.1139, 0.967, 0.1046, 0.969\n", 405 | "4, 0.1044, 0.970, 0.0955, 0.972\n" 406 | ], 407 | "name": "stdout" 408 | } 409 | ] 410 | }, 411 | { 412 | "cell_type": "markdown", 413 | "metadata": { 414 | "id": "1AudSc0uAqt9", 415 | "colab_type": "text" 416 | }, 417 | "source": [ 418 | "### nn.Sequential\n", 419 | "\n", 420 | "If we wanted to user the simpler `nn.Sequential` function, our model construction would have looked like this." 421 | ] 422 | }, 423 | { 424 | "cell_type": "code", 425 | "metadata": { 426 | "id": "JlIMzvDyAq3U", 427 | "colab_type": "code", 428 | "outputId": "eb88f17a-b8e3-4089-d468-7fb01d45c00c", 429 | "colab": { 430 | "base_uri": "https://localhost:8080/", 431 | "height": 269 432 | } 433 | }, 434 | "source": [ 435 | "model_sequential = nn.Sequential(\n", 436 | " nn.BatchNorm2d(1),\n", 437 | " nn.Conv2d(1,4,5),\n", 438 | " nn.ReLU(),\n", 439 | " nn.MaxPool2d(2),\n", 440 | " nn.Conv2d(4,16,5),\n", 441 | " nn.ReLU(),\n", 442 | " nn.MaxPool2d(2),\n", 443 | " nn.Flatten(),\n", 444 | " nn.Linear(256,100),\n", 445 | " nn.ReLU(),\n", 446 | " nn.Dropout(0.4),\n", 447 | " nn.Linear(100,10),\n", 448 | " nn.Softmax(dim=1),\n", 449 | ").to(device)\n", 450 | "model_sequential" 451 | ], 452 | "execution_count": 11, 453 | "outputs": [ 454 | { 455 | "output_type": "execute_result", 456 | "data": { 457 | "text/plain": [ 458 | "Sequential(\n", 459 | " (0): BatchNorm2d(1, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", 460 | " (1): Conv2d(1, 4, kernel_size=(5, 5), stride=(1, 1))\n", 461 | " (2): ReLU()\n", 462 | " (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n", 463 | " (4): Conv2d(4, 16, kernel_size=(5, 5), stride=(1, 1))\n", 464 | " (5): ReLU()\n", 465 | " (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n", 466 | " (7): Flatten()\n", 467 | " (8): Linear(in_features=256, out_features=100, bias=True)\n", 468 | " (9): ReLU()\n", 469 | " (10): Dropout(p=0.4, inplace=False)\n", 470 | " (11): Linear(in_features=100, out_features=10, bias=True)\n", 471 | " (12): Softmax(dim=1)\n", 472 | ")" 473 | ] 474 | }, 475 | "metadata": { 476 | "tags": [] 477 | }, 478 | "execution_count": 11 479 | } 480 | ] 481 | } 482 | ] 483 | } -------------------------------------------------------------------------------- /pytorch/Workshop_Classification.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "Workshop Classification.ipynb", 7 | "provenance": [], 8 | "include_colab_link": true 9 | }, 10 | "kernelspec": { 11 | "name": "python3", 12 | "display_name": "Python 3" 13 | } 14 | }, 15 | "cells": [ 16 | { 17 | "cell_type": "markdown", 18 | "metadata": { 19 | "id": "view-in-github", 20 | "colab_type": "text" 21 | }, 22 | "source": [ 23 | "\"Open" 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": { 29 | "id": "aa28NQ4b50Wk", 30 | "colab_type": "text" 31 | }, 32 | "source": [ 33 | "# Classification Problem" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "metadata": { 39 | "id": "WK-LdzWl5vH6", 40 | "colab_type": "code", 41 | "colab": { 42 | "base_uri": "https://localhost:8080/", 43 | "height": 67 44 | }, 45 | "outputId": "0f560fe5-5a78-4942-950e-8f5661f81fd9" 46 | }, 47 | "source": [ 48 | "import torch\n", 49 | "import torch.nn as nn\n", 50 | "import torch.optim as optim\n", 51 | "import torch.nn.functional as F\n", 52 | "from torch.utils.data import TensorDataset, DataLoader\n", 53 | "import numpy as np\n", 54 | "import pandas as pd\n", 55 | "\n", 56 | "print('Torch version', torch.__version__)\n", 57 | "print('Pandas version', pd.__version__)\n", 58 | "print('Numpy version', np.__version__)" 59 | ], 60 | "execution_count": 1, 61 | "outputs": [ 62 | { 63 | "output_type": "stream", 64 | "text": [ 65 | "Torch version 1.3.1\n", 66 | "Pandas version 0.25.3\n", 67 | "Numpy version 1.17.4\n" 68 | ], 69 | "name": "stdout" 70 | } 71 | ] 72 | }, 73 | { 74 | "cell_type": "markdown", 75 | "metadata": { 76 | "id": "AKCLbDM754c0", 77 | "colab_type": "text" 78 | }, 79 | "source": [ 80 | "The following should say `cuda:0`. If it does not, we need to go to *Edit* -> *Notebook settings* and change it to a `GPU` from `None`. You only have to do this once per notebook." 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "metadata": { 86 | "id": "a2RWBSbo53bz", 87 | "colab_type": "code", 88 | "colab": { 89 | "base_uri": "https://localhost:8080/", 90 | "height": 34 91 | }, 92 | "outputId": "81fac650-e814-4b79-f433-a47d4d089dce" 93 | }, 94 | "source": [ 95 | "device = 'cuda:0' if torch.cuda.is_available() else 'cpu'\n", 96 | "device" 97 | ], 98 | "execution_count": 2, 99 | "outputs": [ 100 | { 101 | "output_type": "execute_result", 102 | "data": { 103 | "text/plain": [ 104 | "'cpu'" 105 | ] 106 | }, 107 | "metadata": { 108 | "tags": [] 109 | }, 110 | "execution_count": 2 111 | } 112 | ] 113 | }, 114 | { 115 | "cell_type": "markdown", 116 | "metadata": { 117 | "id": "DhBlj7GI6Npt", 118 | "colab_type": "text" 119 | }, 120 | "source": [ 121 | "Read in dataset" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "metadata": { 127 | "id": "VIjSGCNv53fT", 128 | "colab_type": "code", 129 | "colab": {} 130 | }, 131 | "source": [ 132 | "df_train = pd.read_csv('https://raw.githubusercontent.com/greght/Workshop-Keras-DNN/master/ChallengeProblems/iris_training.csv', header=None)\n", 133 | "df_val = pd.read_csv('https://raw.githubusercontent.com/greght/Workshop-Keras-DNN/master/ChallengeProblems/iris_test.csv', header=None)" 134 | ], 135 | "execution_count": 0, 136 | "outputs": [] 137 | }, 138 | { 139 | "cell_type": "markdown", 140 | "metadata": { 141 | "id": "0p_SPGXQ6PaD", 142 | "colab_type": "text" 143 | }, 144 | "source": [ 145 | "Construct our x,y variables along with the training and validation dataset" 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "metadata": { 151 | "id": "V5R_cuLZ53ib", 152 | "colab_type": "code", 153 | "colab": {} 154 | }, 155 | "source": [ 156 | "x_train = df_train.iloc[:,0:-1]\n", 157 | "y_train = df_train.iloc[:,-1]\n", 158 | "x_val = df_val.iloc[:,0:-1]\n", 159 | "y_val = df_val.iloc[:,-1]" 160 | ], 161 | "execution_count": 0, 162 | "outputs": [] 163 | }, 164 | { 165 | "cell_type": "markdown", 166 | "metadata": { 167 | "id": "enL0Q9306QBM", 168 | "colab_type": "text" 169 | }, 170 | "source": [ 171 | "Preprocess our data to go from a `pandas` DataFrame to a `numpy` array to a `torch` tensor." 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "metadata": { 177 | "id": "ASnI4ZrW53lj", 178 | "colab_type": "code", 179 | "colab": {} 180 | }, 181 | "source": [ 182 | "xtrain = torch.tensor(x_train.to_numpy(), device=device, dtype=torch.float, requires_grad=True)\n", 183 | "ytrain = torch.tensor(y_train.to_numpy(), device=device, dtype=torch.long, requires_grad=False)\n", 184 | "xval = torch.tensor(x_val.to_numpy(), device=device, dtype=torch.float, requires_grad=True)\n", 185 | "yval = torch.tensor(y_val.to_numpy(), device=device, dtype=torch.long, requires_grad=False)" 186 | ], 187 | "execution_count": 0, 188 | "outputs": [] 189 | }, 190 | { 191 | "cell_type": "markdown", 192 | "metadata": { 193 | "id": "ttYvEnkb6Qkb", 194 | "colab_type": "text" 195 | }, 196 | "source": [ 197 | "We'll write a python class to define out neural network." 198 | ] 199 | }, 200 | { 201 | "cell_type": "code", 202 | "metadata": { 203 | "id": "RBZtZhgy6TCk", 204 | "colab_type": "code", 205 | "colab": {} 206 | }, 207 | "source": [ 208 | "class FourLayerNN(nn.Module):\n", 209 | " def __init__(self, D_in, H1, H2, H3, D_out):\n", 210 | " super().__init__()\n", 211 | " self.linear1 = nn.Linear(D_in, H1)\n", 212 | " self.linear2 = nn.Linear(H1,H2)\n", 213 | " self.linear3 = nn.Linear(H2,H3)\n", 214 | " self.linear4 = nn.Linear(H3,D_out)\n", 215 | " \n", 216 | " def forward(self,x):\n", 217 | " h1_relu = self.linear1(x).clamp(min=0)\n", 218 | " h2_relu = self.linear2(h1_relu).clamp(min=0)\n", 219 | " h3_relu = self.linear3(h2_relu).clamp(min=0)\n", 220 | " y_pred = self.linear4(h3_relu)\n", 221 | " return y_pred" 222 | ], 223 | "execution_count": 0, 224 | "outputs": [] 225 | }, 226 | { 227 | "cell_type": "markdown", 228 | "metadata": { 229 | "id": "M54pEgD06RoL", 230 | "colab_type": "text" 231 | }, 232 | "source": [ 233 | "We create an instance of this class" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "metadata": { 239 | "id": "iVdKpsuh6TS0", 240 | "colab_type": "code", 241 | "colab": { 242 | "base_uri": "https://localhost:8080/", 243 | "height": 118 244 | }, 245 | "outputId": "b83a76b4-a989-4f10-a52a-2f4857de6ed1" 246 | }, 247 | "source": [ 248 | "model = FourLayerNN(xtrain.shape[1],1000,500,70,y_train.nunique()).to(device)\n", 249 | "model" 250 | ], 251 | "execution_count": 7, 252 | "outputs": [ 253 | { 254 | "output_type": "execute_result", 255 | "data": { 256 | "text/plain": [ 257 | "FourLayerNN(\n", 258 | " (linear1): Linear(in_features=4, out_features=1000, bias=True)\n", 259 | " (linear2): Linear(in_features=1000, out_features=500, bias=True)\n", 260 | " (linear3): Linear(in_features=500, out_features=70, bias=True)\n", 261 | " (linear4): Linear(in_features=70, out_features=3, bias=True)\n", 262 | ")" 263 | ] 264 | }, 265 | "metadata": { 266 | "tags": [] 267 | }, 268 | "execution_count": 7 269 | } 270 | ] 271 | }, 272 | { 273 | "cell_type": "markdown", 274 | "metadata": { 275 | "id": "SFRVmzOR6SB7", 276 | "colab_type": "text" 277 | }, 278 | "source": [ 279 | "We'll define a template for our `fit_model` function that contains `train`, `validate`, and `accuracy` functions." 280 | ] 281 | }, 282 | { 283 | "cell_type": "code", 284 | "metadata": { 285 | "id": "gZnvxqPu53rs", 286 | "colab_type": "code", 287 | "colab": {} 288 | }, 289 | "source": [ 290 | "def fit_model(model, loss_fn, optimizer):\n", 291 | " def train(x,y):\n", 292 | " yhat = model(x)\n", 293 | " loss = loss_fn(yhat,y)\n", 294 | " optimizer.zero_grad()\n", 295 | " loss.backward()\n", 296 | " optimizer.step()\n", 297 | " return loss.item(), accuracy(yhat,y)\n", 298 | " \n", 299 | " def validate(x,y):\n", 300 | " yhat = model(x)\n", 301 | " loss = loss_fn(yhat,y)\n", 302 | " return loss.item(), accuracy(yhat,y)\n", 303 | " \n", 304 | " def accuracy(yhat,y):\n", 305 | " probs = np.argmax(yhat.cpu().detach().numpy(), axis=1)\n", 306 | " actual = y.cpu().detach().numpy()\n", 307 | " correct = (probs == actual).sum()\n", 308 | " total = y.shape[0]\n", 309 | " return correct / total \n", 310 | " \n", 311 | " return train, validate" 312 | ], 313 | "execution_count": 0, 314 | "outputs": [] 315 | }, 316 | { 317 | "cell_type": "markdown", 318 | "metadata": { 319 | "id": "qCrMhx8Q6TLd", 320 | "colab_type": "text" 321 | }, 322 | "source": [ 323 | "We define our *loss function*, *learning rate*, and our *optimizer*. We pass this to `fit_model` to return our `train` and `validate` functions." 324 | ] 325 | }, 326 | { 327 | "cell_type": "code", 328 | "metadata": { 329 | "id": "XFBR4YbD53oz", 330 | "colab_type": "code", 331 | "colab": {} 332 | }, 333 | "source": [ 334 | "loss_fn = nn.CrossEntropyLoss()\n", 335 | "learning_rate = 0.01\n", 336 | "optimizer = optim.Adagrad(model.parameters(), lr=learning_rate)\n", 337 | "train, validate = fit_model(model, loss_fn, optimizer)" 338 | ], 339 | "execution_count": 0, 340 | "outputs": [] 341 | }, 342 | { 343 | "cell_type": "markdown", 344 | "metadata": { 345 | "id": "ME_plDOp6Slt", 346 | "colab_type": "text" 347 | }, 348 | "source": [ 349 | "Define a `DataLoader` for our mini-batches." 350 | ] 351 | }, 352 | { 353 | "cell_type": "code", 354 | "metadata": { 355 | "id": "5SS1NgRs6Syz", 356 | "colab_type": "code", 357 | "colab": {} 358 | }, 359 | "source": [ 360 | "train_data = TensorDataset(xtrain, ytrain)\n", 361 | "train_loader = DataLoader(dataset=train_data, batch_size=60, shuffle=True)" 362 | ], 363 | "execution_count": 0, 364 | "outputs": [] 365 | }, 366 | { 367 | "cell_type": "markdown", 368 | "metadata": { 369 | "id": "XNVzkMZI6Tam", 370 | "colab_type": "text" 371 | }, 372 | "source": [ 373 | "Here is our training loop with mini-batch processing. We have to move each batch onto the GPU. We also should have a `DataLoader` for the validation dataset but we'll skip that in this case since it is so small." 374 | ] 375 | }, 376 | { 377 | "cell_type": "code", 378 | "metadata": { 379 | "id": "AKk5nZjM6Ths", 380 | "colab_type": "code", 381 | "colab": { 382 | "base_uri": "https://localhost:8080/", 383 | "height": 353 384 | }, 385 | "outputId": "adecf0cb-e8fa-4f20-d18b-2ac258a72d8f" 386 | }, 387 | "source": [ 388 | "epochs = 2000\n", 389 | "for epoch in range(epochs):\n", 390 | " # training\n", 391 | " losses = []\n", 392 | " for i, (xbatch, ybatch) in enumerate(train_loader):\n", 393 | " xbatch = xbatch.to(device)\n", 394 | " ybatch = ybatch.to(device)\n", 395 | " loss, accuracy = train(xbatch, ybatch)\n", 396 | " losses.append(loss)\n", 397 | " training_loss = np.mean(losses)\n", 398 | " training_accuracy = np.mean(accuracy)\n", 399 | " # validation\n", 400 | " validation_loss, validation_accuracy = validate(xval, yval)\n", 401 | " # print intermediate results\n", 402 | " if epoch%100 == 99:\n", 403 | " print(f'{epoch}, {training_loss:.4f}, {training_accuracy:.2f}, {validation_loss:.4f}, {accuracy:.2f}')" 404 | ], 405 | "execution_count": 11, 406 | "outputs": [ 407 | { 408 | "output_type": "stream", 409 | "text": [ 410 | "99, 0.0790, 0.97, 0.0645, 0.97\n", 411 | "199, 0.0817, 0.97, 0.0577, 0.97\n", 412 | "299, 0.0537, 1.00, 0.0652, 1.00\n", 413 | "399, 0.0497, 0.98, 0.0516, 0.98\n", 414 | "499, 0.0403, 1.00, 0.0566, 1.00\n", 415 | "599, 0.0382, 0.98, 0.0541, 0.98\n", 416 | "699, 0.0382, 0.98, 0.0578, 0.98\n", 417 | "799, 0.0355, 0.98, 0.0596, 0.98\n", 418 | "899, 0.0338, 0.98, 0.0643, 0.98\n", 419 | "999, 0.0385, 1.00, 0.0620, 1.00\n", 420 | "1099, 0.0339, 1.00, 0.0672, 1.00\n", 421 | "1199, 0.0327, 1.00, 0.0677, 1.00\n", 422 | "1299, 0.0293, 1.00, 0.0716, 1.00\n", 423 | "1399, 0.0293, 1.00, 0.0717, 1.00\n", 424 | "1499, 0.0290, 1.00, 0.0738, 1.00\n", 425 | "1599, 0.0267, 1.00, 0.0826, 1.00\n", 426 | "1699, 0.0280, 1.00, 0.0815, 1.00\n", 427 | "1799, 0.0274, 1.00, 0.0912, 1.00\n", 428 | "1899, 0.0253, 0.98, 0.1166, 0.98\n", 429 | "1999, 0.0249, 1.00, 0.0899, 1.00\n" 430 | ], 431 | "name": "stdout" 432 | } 433 | ] 434 | }, 435 | { 436 | "cell_type": "markdown", 437 | "metadata": { 438 | "id": "1AudSc0uAqt9", 439 | "colab_type": "text" 440 | }, 441 | "source": [ 442 | "### nn.Sequential\n", 443 | "\n", 444 | "If we wanted to user the simpler `nn.Sequential` function, our model construction would have looked like this." 445 | ] 446 | }, 447 | { 448 | "cell_type": "code", 449 | "metadata": { 450 | "id": "JlIMzvDyAq3U", 451 | "colab_type": "code", 452 | "colab": { 453 | "base_uri": "https://localhost:8080/", 454 | "height": 168 455 | }, 456 | "outputId": "d4987403-3fbd-48ea-bcd7-06fbbc112df7" 457 | }, 458 | "source": [ 459 | "model_sequential = nn.Sequential(\n", 460 | " nn.Linear(xtrain.shape[1],1000),\n", 461 | " nn.ReLU(),\n", 462 | " nn.Linear(1000,500),\n", 463 | " nn.ReLU(),\n", 464 | " nn.Linear(500,70),\n", 465 | " nn.ReLU(),\n", 466 | " nn.Linear(70,y_train.nunique()),\n", 467 | ").to(device)\n", 468 | "print(model_sequential)" 469 | ], 470 | "execution_count": 12, 471 | "outputs": [ 472 | { 473 | "output_type": "stream", 474 | "text": [ 475 | "Sequential(\n", 476 | " (0): Linear(in_features=4, out_features=1000, bias=True)\n", 477 | " (1): ReLU()\n", 478 | " (2): Linear(in_features=1000, out_features=500, bias=True)\n", 479 | " (3): ReLU()\n", 480 | " (4): Linear(in_features=500, out_features=70, bias=True)\n", 481 | " (5): ReLU()\n", 482 | " (6): Linear(in_features=70, out_features=3, bias=True)\n", 483 | ")\n" 484 | ], 485 | "name": "stdout" 486 | } 487 | ] 488 | } 489 | ] 490 | } -------------------------------------------------------------------------------- /matlab/workshop_plotname.m: -------------------------------------------------------------------------------- 1 | %% UROP Matlab Workshop 2 | % @author: Alex Cao, University of Michigan 3 | % Email: caoa AT umich DOT edu 4 | % Consulting for Statistics, Computing, and Analytics Research (CSCAR) 5 | % MATLAB Version: 9.0.0.370719 (R2016a) 6 | % Operating System: Microsoft Windows 7 Enterprise Version 6.1 (Build 7601: Service Pack 1) 7 | % Java Version: Java 1.7.0_60-b19 with Oracle Corporation Java HotSpot(TM) 64-Bit Server VM mixed mode 8 | 9 | % Students can install a free version of Matlab on their PC 10 | % https://www.itcs.umich.edu/sw-info/math/MATLABStudents.html 11 | 12 | %% Start with a clean slate 13 | clear; close all 14 | 15 | %% Creating variables 16 | a = 3.14 17 | b = 'this is a string' 18 | c = [2 4; 19 | 6 8] 20 | 21 | %% Built-In functions and constants 22 | % Constant 23 | d = pi 24 | % Imaginary numbers 25 | e = sqrt(-9) 26 | % Creating imaginary numbers 27 | f = 1-2i 28 | 29 | %% Creating vectors and matrices 30 | % creating a row vector 31 | row_vector = [2 4 6 8 10] 32 | % creating a column vector 33 | col_vector = [1; 34 | 3; 35 | 5; 36 | 7; 37 | 9] 38 | % transpose 39 | row_vector = row_vector' % or row_vector = [2 4 6 8 10]' 40 | % creating a matrix 41 | matrix = [9 8 7; 42 | 6 5 4; 43 | 3 2 1] 44 | % Adding rows or columns to an existing matrix or vector 45 | v = [10 20 30] 46 | addrow = [matrix; 47 | v] 48 | addcol = [matrix v'] 49 | % Deleting rows or columns from an existing matrix or vector 50 | addrow(end,:) = [] 51 | addcol(:,4) = [] 52 | 53 | %% Selecting and accessing data 54 | % Select column(s) of data 55 | a = matrix(:,1) 56 | % To select multiple columns 57 | b = matrix(:,2:3) 58 | % Columns do not even have to be continuous 59 | c = matrix(:,[3 1]) 60 | % Exact same thing for rows 61 | d = matrix(1,:) 62 | e = matrix(2:3,:) 63 | f = matrix([3 1],:) 64 | 65 | %% Plotting 66 | M = magic(3) % magic square 67 | plot(M(:,1),M(:,2),'o-') 68 | 69 | %% Exercise 1 (5 minutes) 70 | % Task 1: Construct a matrix of points to spell out the first letter of your name 71 | % Task 2: Plot the letter 72 | % For example, 73 | % Task 1 74 | A = [0 0; 75 | 1 4; 76 | 2 0; 77 | 1.5 2; 78 | 0.5 2]; 79 | % Task 2 80 | plot(A(:,1),A(:,2),'x-') 81 | 82 | %% Running external Matlab programs 83 | % Just type the name of the m-file (should not have any spaces) 84 | % Run letters m-file to get custom block font alphabet by author 85 | letters 86 | 87 | %% We will plot our name in Matlab 88 | % Grab your letters from the alphabet (cell array) using the index number 89 | A = alphabet{1}; 90 | L = alphabet{12}; 91 | E = alphabet{5}; 92 | X = alphabet{24}; 93 | 94 | %% Matrix 95 | % Letters are stored as a Nx2 matrix 96 | % First column are the x-coordinates 97 | % Second column are the y-coordinates 98 | A 99 | 100 | %% Plotting your name 101 | 102 | % Create a new cell array variable with our letters 103 | name = {A,L,E,X}; 104 | % Close previous figure 105 | close 106 | % Open new figure 107 | figure(1) 108 | % Iterate through the letters using a for loop 109 | for i = 1:length(name) 110 | % Grab a letter 111 | letter = name{i}; 112 | % Get x and y column 113 | x = letter(:,1); 114 | y = letter(:,2); 115 | % Plot letter with a blue line 116 | plot(x,y,'b-'); 117 | % Set axis limits 118 | ylim([-1 5]) 119 | axis equal 120 | % Do not overwrite previous plots 121 | hold on 122 | end 123 | 124 | % Create labels 125 | xlabel('x-axis') 126 | ylabel('y-axis') 127 | title('Plotting My Name') 128 | 129 | %% 130 | % In order to see all the letters clearly, we need to offset the letters 131 | % We'll use matrix addition/subtraction to create the offset 132 | % Creating a constant offset is easy 133 | close; figure(2) 134 | for i = 1:length(name) 135 | letter = name{i}; 136 | % add offset to the x-coordinate based on letter position 137 | x = letter(:,1) + i*2.5; 138 | y = letter(:,2); 139 | % plot letter with red dash dot line and circle markers 140 | plot(x,y,'r-.o'); 141 | hold on 142 | end 143 | % Alternate way to set axis limits 144 | axis([-1 15 -1 5]) 145 | 146 | % See the following URLs for different point and line options 147 | % http://www.mathworks.com/help/matlab/ref/plot.html#inputarg_LineSpec 148 | 149 | %% 150 | % You can also add a vector or matrix (instead of a constant) to a matrix 151 | % (i.e. letter) 152 | F = alphabet{6} 153 | plot(F(:,1),F(:,2),'g','linewidth',2) 154 | %% 155 | % Here we add a vector to change the first row (i.e. bottom point) 156 | F(1,:) = F(1,:)+[1 1] 157 | % plot a green line with a linewidth of 2 158 | plot(F(:,1),F(:,2),'g','linewidth',2) 159 | 160 | %% 161 | % We can also scale the letters so that they are smaller or bigger 162 | % We'll use matrix element multiplication to accomplish the scaling 163 | close; figure(3) 164 | for i = 1:length(name) 165 | letter = name{i}; 166 | % same x-offset as before 167 | x = letter(:,1) + i*2.5; 168 | % scale the y-coordinate by multiplication of an exponential 169 | y = letter(:,2) * exp(+i/5); 170 | % plot black line with diamond markers 171 | plot(x,y,'k-d'); 172 | hold on 173 | end 174 | % Alternate way to set axis limits 175 | xlim([0 15]) 176 | ylim([-1 10]) 177 | 178 | %% Exercise 2 (5 minutes) 179 | % Task 1: Copy the code section above 180 | % Task 2: Plot your name vertically by using matrix addition/subtraction 181 | % Task 3: Shrink the letters in your name by using matrix-element 182 | % multiplication/division and re-plot it 183 | 184 | %% Animation 185 | % Here's how to animate the letters sequentially 186 | close; figure(4) 187 | axis([-1,15,-1,10]) 188 | 189 | % Set time delay between drawing lines 190 | time_delay = 0.5; 191 | 192 | % Create empty cell array for animated objects 193 | object = {}; 194 | % Iterate thru the letters 195 | for i = 1:length(name) 196 | % Create animated lined object for each letter and save it to cell array 197 | object{i} = animatedline; 198 | letter = name{i}; 199 | x = letter(:,1) + i*2.5; 200 | y = letter(:,2) * exp(+i/5); 201 | % Iterate through each point defining our letter and draw it 202 | for j = 1:length(letter) 203 | addpoints(object{i},x(j),y(j)); 204 | drawnow 205 | pause(time_delay) 206 | end 207 | end 208 | 209 | %% 210 | % To produce smoother animation, we need more points to plot 211 | % Make lines with more points (say 100) 212 | num_of_pts = 100; 213 | % Create an evenly spaced vector using linspace 214 | % linspace(start,end,number of points) 215 | x1 = linspace(0,1,num_of_pts); 216 | y1 = linspace(0,4,num_of_pts); 217 | x2 = linspace(1,2,num_of_pts); 218 | y2 = linspace(4,0,num_of_pts); 219 | x3 = linspace(1.5,0.5,num_of_pts); 220 | y3 = linspace(2,2,num_of_pts); 221 | 222 | % quote symbol does a transpose of the matrix 223 | % we want to convert from a row vector to a column vector 224 | % we concatenate the vectors side by side and then on top of each 225 | % other 226 | A = [x1' y1'; 227 | x2' y2'; 228 | x3' y3']; 229 | 230 | % get size of A 231 | size(A) 232 | 233 | %% Exercise 3 (5 minutes) 234 | % Task 1: Similar to the code section above, construct a matrix for the 235 | % letter T using linspace with 100 pts. Hint: You need to create x1, y1, 236 | % x2, y2 for the vertical and horizontal lines 237 | % Task 2: Plot the matrix using x markers (e.g. plot(x,y,'x') ) and set the 238 | % axis so that the letter is not touching a border 239 | 240 | %% Redraw letter A with more points and no time delay 241 | close; figure(5) 242 | hA = animatedline; 243 | axis([-1,12,-1,5]) 244 | 245 | for k = 1:length(A) 246 | addpoints(hA,A(k,1),A(k,2)); 247 | drawnow 248 | end 249 | 250 | %% Smoother Animation 251 | % I've written a matlab function gen_more_pts.m to add more points to 252 | % letters for you. Let's use it to animate our names. 253 | close; figure(6) 254 | axis([-1,15,-1,10]) 255 | 256 | % Create empty cell array for animated objects 257 | object = {}; 258 | % For loop for adding points to a line 259 | for i = 1:length(name) 260 | % Create animated lined object for each letter and save it to cell array 261 | object{i} = animatedline; 262 | letter = name{i}; 263 | % gen_more_pts function creates more points for uss 264 | animate_letter = gen_more_pts(letter); 265 | x = animate_letter(:,1) + i*2.5; 266 | y = animate_letter(:,2) * exp(+i/5); 267 | for j = 1:length(animate_letter) 268 | addpoints(object{i},x(j),y(j)); 269 | drawnow 270 | end 271 | end 272 | 273 | %% Exercise 4 (5 minutes) 274 | % Task 1: Copy the code section above 275 | % Task 2: Animate the vertical version of your name 276 | 277 | %% Plot Attributes 278 | % You can change the look of your lines after they are plotted by accessing 279 | % their attributes such as Color or LineWidth or LineStyle 280 | % To get a list of a plot attributes, use the get command 281 | get(object{1}) 282 | % You can also type "object{1}." followed by a tab to get a dropdown list 283 | % To make something invisible use the Visible attribute 284 | object{1}.Visible = 'off' 285 | % To make something visible again 286 | object{1}.Visible = 'on' 287 | % If you don't know what options are available to you for a specific 288 | % attribute, you can use the set command 289 | set(object{1}) 290 | 291 | %% Generating Random Numbers and using a random seed 292 | % We will use random numbers to randomly change the attributes of our plot 293 | % Use a seed so that you get a predictable sequence of numbers 294 | % rng(56789) 295 | linestyle_options = {'-','--',':','-.'}; 296 | for i = 1:12 297 | % generate one random integer for which letter to modify 298 | n = randi(length(name),1); 299 | % generate a 3x1 vector of random numbers from (0,1) 300 | color = rand(3,1) 301 | object{n}.Color = color; 302 | % generate one random integer for the linewidth 303 | object{n}.LineWidth = randi(10,1); 304 | % generate one random integer for the linestyle 305 | index = randi(length(linestyle_options),1); 306 | object{n}.LineStyle = linestyle_options{index}; 307 | pause(1) 308 | end 309 | 310 | %% Exercise 5 (5 minutes) 311 | % Task 1: Generate a 4x1 vector of random integers from 1 to 10 312 | % Task 2: Take the sum of it 313 | % Task 3: Repeat 1 & 2 314 | % Task 4: Set a seed for the random generator using your favourite number 315 | % Task 5: Redo 1,2,3 316 | 317 | %% Let's redraw our name 318 | close; figure(7) 319 | for i = 1:length(name) 320 | letter = name{i}; 321 | animate_letter = gen_more_pts(letter); 322 | x = animate_letter(:,1) + i*2.5; 323 | y = animate_letter(:,2); 324 | % plot letters as magenta line with pentagon markers 325 | plot(x,y,'m-p'); 326 | hold on 327 | end 328 | % Alternate way to set axis limits 329 | axis([0 15 -1 5]) 330 | 331 | %% Now suppose we wanted to cut our name (i.e. points) into half 332 | % We can segment our name using logical indexing 333 | close; figure(8) 334 | y_cutoff = 2.5; 335 | for i = 1:length(name) 336 | letter = name{i}; 337 | animate_letter = gen_more_pts(letter); 338 | % original matrix size 339 | disp(size(animate_letter)) 340 | % generate boolean of pts meeting criterion 341 | index = animate_letter(:,2) < y_cutoff; 342 | % grab matching pts using indices 343 | animate_letter = animate_letter(index,:); 344 | % new matrix size (should be smaller) 345 | disp(size(animate_letter)) 346 | x = animate_letter(:,1) + i*2.5; 347 | y = animate_letter(:,2); 348 | plot(x,y,'m-p'); 349 | hold on 350 | end 351 | axis([0 15 -1 5]) 352 | 353 | %% You can use more than one logical operation at a time 354 | close; figure(9) 355 | % & means AND 356 | % | means OR 357 | y_cutoff = 2.5; 358 | for i = 1:length(name) 359 | letter = name{i}; 360 | animate_letter = gen_more_pts(letter); 361 | size(animate_letter) 362 | % AND statement joining two criteria 363 | index = (animate_letter(:,2) < y_cutoff) & (animate_letter(:,2) > 1.25); 364 | animate_letter = animate_letter(index,:); 365 | size(animate_letter) 366 | x = animate_letter(:,1) + i*2.5; 367 | y = animate_letter(:,2); 368 | plot(x,y,'m-p'); 369 | hold on 370 | end 371 | axis([0 15 -1 5]) 372 | 373 | %% Exercise 6 (5 minutes) 374 | % Task 1: Copy the code section above 375 | % Task 2: Only show the portion of your name that is less than 1 or greater 376 | % than 2 on the y-axis 377 | 378 | %% Import Data Demo 379 | % There are many functions to import data into Matlab from external sources 380 | % Some choices are: uiimport, load, importdata, textscan, dlmread, fread, 381 | % fscanf, readtable, xlsread 382 | % 383 | % The most friendly method to beginners is uiimport which acts like excel 384 | uiimport('crash.txt') 385 | 386 | %% Exercise 7 (10 minutes) 387 | % Task 1: Import the CrashSeverity column into the workspace 388 | % Task 2: Extract the fatal crashes (value = 1) using logical indexing 389 | % Task 3: Count how many fatal crashes there are in dataset 390 | % Task 4: Import the Longitude/Latitude columns into the workspace 391 | % Task 5: Plot Longitude/Latitude coordinates using any triangle marker. 392 | % Are there any bad data points? 393 | % Tip: Longitude should be negative in this case. 394 | % Task 6: Remove the bad points using logical indexing and re-plot the 395 | % coordinates using a triangle marker 396 | 397 | 398 | %% Some useful Matlab commands to know 399 | % Saving your work 400 | % Saving variables in your workspace 401 | save workshop.mat 402 | % Clear the workspace 403 | clear 404 | % Reload everything 405 | load workshop.mat 406 | % If you just want to save a couple of variables 407 | save workshop X E L A 408 | % close last figure 409 | close 410 | % close all figures 411 | close all 412 | % clear command window 413 | clc 414 | % bring up command history 415 | commandhistory 416 | % Last unassigned answer in command window 417 | ans 418 | 419 | %% Formatting output 420 | z = 1534513546 421 | % To change the look of the output, use the format function 422 | format longg 423 | z 424 | % To change back to the default format 425 | format 426 | 427 | %% Getting Help 428 | % help for a function 429 | help plot 430 | doc plot 431 | % Bring up Matlab examples 432 | demo 433 | % You can also use the search bar in the top right corner or use the *?* 434 | % icon next to it to open up an equivalent window 435 | 436 | %% References 437 | % MathWork (makers of Matlab) Resources 438 | 439 | % Matlab tutorials from MathWorks 440 | % https://www.mathworks.com/support/learn-with-matlab-tutorials.html 441 | % http://www.mathworks.com/help/matlab/getting-started-with-matlab.html 442 | 443 | % Matlab Forum for Q&A 444 | % http://www.mathworks.com/matlabcentral/answers/ 445 | 446 | % Cody: Challenge yourself to Matlab coding problems 447 | % http://www.mathworks.com/matlabcentral/cody 448 | 449 | % PDF tutorial 450 | % https://www.mathworks.com/help/pdf_doc/matlab/getstart.pdf 451 | 452 | % 3rd Party Add-Ons 453 | % http://www.mathworks.com/matlabcentral/fileexchange/ 454 | 455 | % Matlab Blogs 456 | % http://blogs.mathworks.com Matlab Blog 457 | 458 | % Matlab Toolboxes 459 | % https://www.mathworks.com/products/ 460 | 461 | % To see what is installed on your version of Matlab, use the ver 462 | % command 463 | ver 464 | 465 | %% Other Matlab Resources 466 | 467 | % Interactive course by the University of Edinburgh 468 | % http://www.see.ed.ac.uk/teaching/courses/matlab/ 469 | 470 | % Free online book 471 | % http://greenteapress.com/matlab/ 472 | 473 | 474 | %% Other Fun Stuff 475 | 476 | %% Alternate way to do animation 477 | % Rotate our name 478 | % Let's plot our name again 479 | % The plot command will be outside the for loop this time 480 | close; figure(100) 481 | alex = []; 482 | for i = 1:length(name) 483 | letter = name{i}; 484 | x = letter(:,1) + i*2.5; 485 | y = letter(:,2); 486 | alex = [alex; x y]; 487 | end 488 | hAlex = plot(alex(:,1),alex(:,2),'linewidth',2,'color',[0.7 0.2 0.5]); 489 | axis([-12 12 -12 12]) 490 | 491 | %% 492 | % Set the DataSource attribute to this variable 493 | hAlex.XDataSource = 'rotateAlex(:,1)'; 494 | hAlex.YDataSource = 'rotateAlex(:,2)'; 495 | % Create an evenly spaced vector from 0 to 2*pi for rotation 496 | th = linspace(0,2*pi,500); 497 | 498 | %% Rotate about z-axis 499 | for i = 1:length(th) 500 | % Angle 501 | theta = th(i); 502 | % Rotation matrix about z-axis 503 | Rz = [cos(theta) -sin(theta); 504 | sin(theta) cos(theta)]; 505 | % Matrix multiplication of rotation matrix with name points 506 | rotateAlex = (Rz*alex')'; 507 | % Update figure handle 508 | refreshdata(hAlex) 509 | % Pause in seconds 510 | pause(0.01) 511 | end 512 | 513 | %% Center my name around the origin 514 | % use repmat to duplicate 2x1 vector 515 | alex2 = alex - repmat(mean(alex),size(alex,1),1); 516 | % Add the z-value of zero to my name points 517 | alex2 = [alex2 zeros(size(alex2,1),1)]; 518 | 519 | %% Rotate about y-axis 520 | for i = 1:length(th) 521 | theta = th(i); 522 | Ry = [cos(theta) 0 sin(theta); 523 | 0 1 0; 524 | -sin(theta) 0 cos(theta)]; 525 | rotateAlex = (Ry*alex2')'; 526 | refreshdata(hAlex) 527 | pause(0.01) 528 | end 529 | 530 | %% Rotate about x-axis 531 | % Move my name around some more 532 | alex2(:,2) = alex2(:,2) + min(alex2(:,2)); 533 | for i = 1:length(th) 534 | theta = th(i); 535 | Rx = [1 0 0; 536 | 0 cos(theta) -sin(theta); 537 | 0 sin(theta) cos(theta)]; 538 | rotateAlex = (Rx*alex2')'; 539 | refreshdata(hAlex) 540 | pause(0.01) 541 | end 542 | 543 | %% Animation of a helix 544 | n = 5000; % determines how many pts to draw 545 | xc = 3; yc = 3; 546 | r = linspace(1,6,n); % radius 547 | t = linspace(0,12*pi,n); % how many loops to make 548 | x = 0.8*r.*cos(t) + xc; 549 | y = r.*sin(t) + yc; 550 | z = linspace(0,5,n); 551 | v = linspace(0.001,1,n); 552 | close all; figure(101) 553 | h = animatedline; 554 | axis([-10,10,-10,10,0 5]) 555 | grid on 556 | xlabel('X'); ylabel('Y'); zlabel('Z') 557 | for k = 1:n 558 | h.LineWidth = (v(k)+1)*4; 559 | h.Color = [v(k) 1-v(k) v(k)]; 560 | addpoints(h,x(k),y(k),z(k)); 561 | % Set viewing angle 562 | view(-mod(k/120,90),90-mod(k/72,70)) 563 | drawnow 564 | end 565 | -------------------------------------------------------------------------------- /pytorch/Workshop_Regression_Class.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "Workshop Regression Class", 7 | "provenance": [], 8 | "collapsed_sections": [], 9 | "include_colab_link": true 10 | }, 11 | "kernelspec": { 12 | "name": "python3", 13 | "display_name": "Python 3" 14 | }, 15 | "accelerator": "GPU" 16 | }, 17 | "cells": [ 18 | { 19 | "cell_type": "markdown", 20 | "metadata": { 21 | "id": "view-in-github", 22 | "colab_type": "text" 23 | }, 24 | "source": [ 25 | "\"Open" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": { 31 | "id": "7G_TdHMkSL8q", 32 | "colab_type": "text" 33 | }, 34 | "source": [ 35 | "**Regression Problem**" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "metadata": { 41 | "id": "GWhz8RPhRfF1", 42 | "colab_type": "code", 43 | "outputId": "fb2e7b2d-d11b-4eef-fc81-62d6a7bbb725", 44 | "colab": { 45 | "base_uri": "https://localhost:8080/", 46 | "height": 67 47 | } 48 | }, 49 | "source": [ 50 | "import torch\n", 51 | "import torch.nn as nn\n", 52 | "import torch.optim as optim\n", 53 | "import torch.nn.functional as F\n", 54 | "from torch.utils.data import TensorDataset, DataLoader\n", 55 | "import numpy as np\n", 56 | "import pandas as pd\n", 57 | "\n", 58 | "print('Torch version', torch.__version__)\n", 59 | "print('Pandas version', pd.__version__)\n", 60 | "print('Numpy version', np.__version__)" 61 | ], 62 | "execution_count": 2, 63 | "outputs": [ 64 | { 65 | "output_type": "stream", 66 | "text": [ 67 | "Torch version 1.3.1\n", 68 | "Pandas version 0.25.3\n", 69 | "Numpy version 1.17.4\n" 70 | ], 71 | "name": "stdout" 72 | } 73 | ] 74 | }, 75 | { 76 | "cell_type": "markdown", 77 | "metadata": { 78 | "id": "d80zm5dOSsOr", 79 | "colab_type": "text" 80 | }, 81 | "source": [ 82 | "The following should say `cuda:0`. If it does not, we need to go to *Edit* -> *Notebook settings* and change it to a `GPU` from `None`. You only have to do this once per notebook." 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "metadata": { 88 | "id": "ga1yyVAfRgK3", 89 | "colab_type": "code", 90 | "outputId": "87b9a739-2cf0-4f10-f112-544c6bf05edf", 91 | "colab": { 92 | "base_uri": "https://localhost:8080/", 93 | "height": 34 94 | } 95 | }, 96 | "source": [ 97 | "device = 'cuda:0' if torch.cuda.is_available() else 'cpu'\n", 98 | "device" 99 | ], 100 | "execution_count": 3, 101 | "outputs": [ 102 | { 103 | "output_type": "execute_result", 104 | "data": { 105 | "text/plain": [ 106 | "'cuda:0'" 107 | ] 108 | }, 109 | "metadata": { 110 | "tags": [] 111 | }, 112 | "execution_count": 3 113 | } 114 | ] 115 | }, 116 | { 117 | "cell_type": "markdown", 118 | "metadata": { 119 | "id": "rW2RnKe3hvmh", 120 | "colab_type": "text" 121 | }, 122 | "source": [ 123 | "Read in dataset" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "metadata": { 129 | "id": "3U_r7UGpRf-g", 130 | "colab_type": "code", 131 | "colab": {} 132 | }, 133 | "source": [ 134 | "df_train = pd.read_csv('https://raw.githubusercontent.com/greght/Workshop-Keras-DNN/master/ChallengeProblems/dataRegression_train.csv', header=None)\n", 135 | "df_val = pd.read_csv('https://raw.githubusercontent.com/greght/Workshop-Keras-DNN/master/ChallengeProblems/dataRegression_test.csv', header=None)" 136 | ], 137 | "execution_count": 0, 138 | "outputs": [] 139 | }, 140 | { 141 | "cell_type": "markdown", 142 | "metadata": { 143 | "id": "okdjDnbphzjK", 144 | "colab_type": "text" 145 | }, 146 | "source": [ 147 | "Construct our x,y variables along with the training and validation dataset" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "metadata": { 153 | "id": "7EBgffu2RgG_", 154 | "colab_type": "code", 155 | "colab": {} 156 | }, 157 | "source": [ 158 | "x_train = df_train.iloc[:,0:2]\n", 159 | "y_train = df_train.iloc[:,2]\n", 160 | "x_val = df_val.iloc[:,0:2]\n", 161 | "y_val = df_val.iloc[:,2]" 162 | ], 163 | "execution_count": 0, 164 | "outputs": [] 165 | }, 166 | { 167 | "cell_type": "markdown", 168 | "metadata": { 169 | "id": "7D4h_C16gcjG", 170 | "colab_type": "text" 171 | }, 172 | "source": [ 173 | "Preprocess our data to go from a `pandas` DataFrame to a `numpy` array to a `torch` tensor." 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "metadata": { 179 | "id": "vjq5O0XfRmPv", 180 | "colab_type": "code", 181 | "colab": {} 182 | }, 183 | "source": [ 184 | "x_train_tensor = torch.tensor(x_train.to_numpy(), device=device, dtype=torch.float, requires_grad=True)\n", 185 | "y_train_tensor = torch.tensor(y_train.to_numpy(), device=device, dtype=torch.float, requires_grad=True)\n", 186 | "x_val_tensor = torch.tensor(x_val.to_numpy(), device=device, dtype=torch.float, requires_grad=True)\n", 187 | "y_val_tensor = torch.tensor(y_val.to_numpy(), device=device, dtype=torch.float, requires_grad=True)\n", 188 | "y_train_tensor = y_train_tensor.view(-1,1)\n", 189 | "y_val_tensor = y_val_tensor.view(-1,1)" 190 | ], 191 | "execution_count": 0, 192 | "outputs": [] 193 | }, 194 | { 195 | "cell_type": "markdown", 196 | "metadata": { 197 | "id": "H-JVIuXegeix", 198 | "colab_type": "text" 199 | }, 200 | "source": [ 201 | "We'll write a python class to define out neural network." 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "metadata": { 207 | "id": "33HwoaxDR-mx", 208 | "colab_type": "code", 209 | "colab": {} 210 | }, 211 | "source": [ 212 | "class ThreeLayerNN(nn.Module):\n", 213 | " def __init__(self, dim_input, H):\n", 214 | " super().__init__()\n", 215 | " self.fc1 = nn.Linear(dim_input, H)\n", 216 | " self.fc2 = nn.Linear(H,H)\n", 217 | " self.fc3 = nn.Linear(H,1)\n", 218 | " \n", 219 | " def forward(self, x):\n", 220 | " x1 = F.relu(self.fc1(x))\n", 221 | " x2 = F.relu(self.fc2(x1))\n", 222 | " y_pred = self.fc3(x2)\n", 223 | " return y_pred" 224 | ], 225 | "execution_count": 0, 226 | "outputs": [] 227 | }, 228 | { 229 | "cell_type": "markdown", 230 | "metadata": { 231 | "id": "NRH6Qp9VglBx", 232 | "colab_type": "text" 233 | }, 234 | "source": [ 235 | "We create an instance of this class." 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "metadata": { 241 | "id": "aqnHMyc9R-xI", 242 | "colab_type": "code", 243 | "outputId": "a3446684-71c0-4531-9bf7-3e544230f18d", 244 | "colab": { 245 | "base_uri": "https://localhost:8080/", 246 | "height": 101 247 | } 248 | }, 249 | "source": [ 250 | "model = ThreeLayerNN(x_train_tensor.shape[1],5).to(device)\n", 251 | "print(model)" 252 | ], 253 | "execution_count": 8, 254 | "outputs": [ 255 | { 256 | "output_type": "stream", 257 | "text": [ 258 | "ThreeLayerNN(\n", 259 | " (fc1): Linear(in_features=2, out_features=5, bias=True)\n", 260 | " (fc2): Linear(in_features=5, out_features=5, bias=True)\n", 261 | " (fc3): Linear(in_features=5, out_features=1, bias=True)\n", 262 | ")\n" 263 | ], 264 | "name": "stdout" 265 | } 266 | ] 267 | }, 268 | { 269 | "cell_type": "markdown", 270 | "metadata": { 271 | "id": "ryc3EnW4RwqI", 272 | "colab_type": "text" 273 | }, 274 | "source": [ 275 | "`model.parameters()` contains the **weights** and **bias** (alternating) for each of the 3 layers\n", 276 | "\n" 277 | ] 278 | }, 279 | { 280 | "cell_type": "code", 281 | "metadata": { 282 | "id": "1-VGjPHeRmWH", 283 | "colab_type": "code", 284 | "outputId": "10d21071-3079-4923-d9cf-3755f6242b22", 285 | "colab": { 286 | "base_uri": "https://localhost:8080/", 287 | "height": 437 288 | } 289 | }, 290 | "source": [ 291 | "params = list(model.parameters())\n", 292 | "print(f'There are {len(params)} parameters')\n", 293 | "for param in params:\n", 294 | " print(param)" 295 | ], 296 | "execution_count": 9, 297 | "outputs": [ 298 | { 299 | "output_type": "stream", 300 | "text": [ 301 | "There are 6 parameters\n", 302 | "Parameter containing:\n", 303 | "tensor([[-0.6722, -0.1253],\n", 304 | " [ 0.3271, -0.5386],\n", 305 | " [-0.4360, -0.6635],\n", 306 | " [-0.0597, 0.2654],\n", 307 | " [-0.4511, -0.1803]], device='cuda:0', requires_grad=True)\n", 308 | "Parameter containing:\n", 309 | "tensor([ 0.4774, 0.0608, 0.3351, 0.6132, -0.1335], device='cuda:0',\n", 310 | " requires_grad=True)\n", 311 | "Parameter containing:\n", 312 | "tensor([[-0.4279, 0.0746, -0.2874, -0.4331, 0.0757],\n", 313 | " [-0.1138, -0.2704, 0.0156, 0.3182, 0.1802],\n", 314 | " [ 0.1589, -0.3853, 0.0769, 0.0236, 0.2774],\n", 315 | " [ 0.4160, 0.0268, 0.0658, 0.0249, 0.0023],\n", 316 | " [-0.1503, 0.1482, -0.0260, 0.2199, 0.2633]], device='cuda:0',\n", 317 | " requires_grad=True)\n", 318 | "Parameter containing:\n", 319 | "tensor([ 0.1400, 0.2608, 0.2217, -0.2910, 0.0465], device='cuda:0',\n", 320 | " requires_grad=True)\n", 321 | "Parameter containing:\n", 322 | "tensor([[ 0.1069, 0.0756, -0.3563, 0.3523, -0.4246]], device='cuda:0',\n", 323 | " requires_grad=True)\n", 324 | "Parameter containing:\n", 325 | "tensor([-0.3458], device='cuda:0', requires_grad=True)\n" 326 | ], 327 | "name": "stdout" 328 | } 329 | ] 330 | }, 331 | { 332 | "cell_type": "markdown", 333 | "metadata": { 334 | "id": "VQffaw77ft98", 335 | "colab_type": "text" 336 | }, 337 | "source": [ 338 | "We'll define a template for our `fit_model` function that contains `train` and `validate` functions.\n", 339 | "\n", 340 | "---\n", 341 | "\n" 342 | ] 343 | }, 344 | { 345 | "cell_type": "code", 346 | "metadata": { 347 | "id": "amLbK4yBRmfg", 348 | "colab_type": "code", 349 | "colab": {} 350 | }, 351 | "source": [ 352 | "def fit_model(model, loss_fn, optimizer):\n", 353 | " def train(x,y):\n", 354 | " yhat = model(x)\n", 355 | " loss = loss_fn(yhat,y)\n", 356 | " optimizer.zero_grad()\n", 357 | " loss.backward()\n", 358 | " optimizer.step()\n", 359 | " return loss.item()\n", 360 | " \n", 361 | " def validate(x,y):\n", 362 | " yhat = model(x)\n", 363 | " loss = loss_fn(yhat,y)\n", 364 | " return loss.item()\n", 365 | " \n", 366 | " return train, validate" 367 | ], 368 | "execution_count": 0, 369 | "outputs": [] 370 | }, 371 | { 372 | "cell_type": "markdown", 373 | "metadata": { 374 | "id": "cKdszOgAguKD", 375 | "colab_type": "text" 376 | }, 377 | "source": [ 378 | "We define our *loss function*, *learning rate*, and our *optimizer*. We pass this to `fit_model` to return our `train` and `validate` functions.\n" 379 | ] 380 | }, 381 | { 382 | "cell_type": "code", 383 | "metadata": { 384 | "id": "eh_iIPQnSD40", 385 | "colab_type": "code", 386 | "colab": {} 387 | }, 388 | "source": [ 389 | "loss_fn = nn.MSELoss(reduction='mean') #default\n", 390 | "learning_rate = 0.1\n", 391 | "optimizer = optim.Adagrad(model.parameters(), lr=learning_rate)\n", 392 | "train, validate = fit_model(model, loss_fn, optimizer)" 393 | ], 394 | "execution_count": 0, 395 | "outputs": [] 396 | }, 397 | { 398 | "cell_type": "markdown", 399 | "metadata": { 400 | "id": "W0uIChFNfa2c", 401 | "colab_type": "text" 402 | }, 403 | "source": [ 404 | "## Mini-batches\n", 405 | "From the documentation: `torch.nn` only supports mini-batches. The entire `torch.nn` package only supports inputs that are a mini-batch of samples, and not a single sample." 406 | ] 407 | }, 408 | { 409 | "cell_type": "code", 410 | "metadata": { 411 | "id": "09NsOy59SD8J", 412 | "colab_type": "code", 413 | "colab": {} 414 | }, 415 | "source": [ 416 | "train_data = TensorDataset(x_train_tensor, y_train_tensor)\n", 417 | "train_loader = DataLoader(dataset=train_data, batch_size=10, shuffle=True)" 418 | ], 419 | "execution_count": 0, 420 | "outputs": [] 421 | }, 422 | { 423 | "cell_type": "markdown", 424 | "metadata": { 425 | "id": "y748bWpQg_5x", 426 | "colab_type": "text" 427 | }, 428 | "source": [ 429 | "Here is our training loop with mini-batch processing. We have to move each mini-batch onto the GPU." 430 | ] 431 | }, 432 | { 433 | "cell_type": "code", 434 | "metadata": { 435 | "id": "fexqm4D9SHyh", 436 | "colab_type": "code", 437 | "outputId": "5c42c58a-b7d7-4c6a-9c70-e1e2d084ea12", 438 | "colab": { 439 | "base_uri": "https://localhost:8080/", 440 | "height": 185 441 | } 442 | }, 443 | "source": [ 444 | "epochs = 100\n", 445 | "for epoch in range(epochs):\n", 446 | " # training\n", 447 | " losses = []\n", 448 | " for i, (xbatch, ybatch) in enumerate(train_loader):\n", 449 | " xbatch = xbatch.to(device)\n", 450 | " ybatch = ybatch.to(device)\n", 451 | " loss = train(xbatch, ybatch)\n", 452 | " losses.append(loss)\n", 453 | " training_loss = np.mean(losses)\n", 454 | " # validation\n", 455 | " validation_loss = validate(x_val_tensor, y_val_tensor)\n", 456 | " # print intermediate results\n", 457 | " if epoch%10 == 9:\n", 458 | " print(epoch, training_loss, validation_loss)" 459 | ], 460 | "execution_count": 13, 461 | "outputs": [ 462 | { 463 | "output_type": "stream", 464 | "text": [ 465 | "9 5.217282251878218 8.100061416625977\n", 466 | "19 4.6458352262323555 6.509875774383545\n", 467 | "29 4.617666352878917 6.0749030113220215\n", 468 | "39 4.465590021827004 5.876566410064697\n", 469 | "49 4.46304219419306 5.840087413787842\n", 470 | "59 4.436497558246959 5.683042049407959\n", 471 | "69 4.447906385768544 5.73892068862915\n", 472 | "79 4.456741766496138 5.724264144897461\n", 473 | "89 4.4289374351501465 5.7146830558776855\n", 474 | "99 4.434686617417769 5.704777717590332\n" 475 | ], 476 | "name": "stdout" 477 | } 478 | ] 479 | }, 480 | { 481 | "cell_type": "markdown", 482 | "metadata": { 483 | "id": "wri-bxVPhPHB", 484 | "colab_type": "text" 485 | }, 486 | "source": [ 487 | "We can view the current state of our model using the `state_dict` method." 488 | ] 489 | }, 490 | { 491 | "cell_type": "code", 492 | "metadata": { 493 | "id": "xmiD0CQvSH2D", 494 | "colab_type": "code", 495 | "outputId": "997bf898-5732-4966-dee0-88b41b452c7b", 496 | "colab": { 497 | "base_uri": "https://localhost:8080/", 498 | "height": 319 499 | } 500 | }, 501 | "source": [ 502 | "model.state_dict()" 503 | ], 504 | "execution_count": 14, 505 | "outputs": [ 506 | { 507 | "output_type": "execute_result", 508 | "data": { 509 | "text/plain": [ 510 | "OrderedDict([('fc1.weight', tensor([[-0.9870, -0.4540],\n", 511 | " [ 2.0965, -0.3272],\n", 512 | " [-0.4208, -0.8602],\n", 513 | " [ 1.4232, 0.2407],\n", 514 | " [-0.4511, -0.1803]], device='cuda:0')),\n", 515 | " ('fc1.bias',\n", 516 | " tensor([ 0.0582, 0.2425, 0.0584, 0.6218, -0.1335], device='cuda:0')),\n", 517 | " ('fc2.weight',\n", 518 | " tensor([[-0.2153, 1.3850, -0.1548, 0.3375, 0.0757],\n", 519 | " [ 0.1091, 1.0617, 0.1496, 1.1005, 0.1802],\n", 520 | " [ 0.0043, -0.5234, -0.0231, -0.1097, 0.2774],\n", 521 | " [ 0.4160, 0.0268, 0.0658, 0.0249, 0.0023],\n", 522 | " [-0.2503, -0.0960, -0.1260, 0.0717, 0.2633]], device='cuda:0')),\n", 523 | " ('fc2.bias',\n", 524 | " tensor([ 0.3495, 0.5070, 0.0802, -0.2910, -0.1100], device='cuda:0')),\n", 525 | " ('fc3.weight',\n", 526 | " tensor([[ 1.0817, 0.8173, -0.2413, 0.3523, -0.3157]], device='cuda:0')),\n", 527 | " ('fc3.bias', tensor([-0.1517], device='cuda:0'))])" 528 | ] 529 | }, 530 | "metadata": { 531 | "tags": [] 532 | }, 533 | "execution_count": 14 534 | } 535 | ] 536 | } 537 | ] 538 | } --------------------------------------------------------------------------------