├── CarlsonPythonSyllabus.pdf ├── README.md ├── homework ├── README.md ├── hw1.pdf ├── hw2.pdf ├── hw3.pdf ├── hw4.pdf ├── immSurvey.csv ├── solutions │ ├── hw1.py │ └── hw1_test.py └── trend2.csv └── in-classMaterial ├── day1 └── INTL450Intro.pdf ├── day10 └── IntroBayes.pdf ├── day11 ├── 8schools.stan ├── IntroBayesDay2.pdf ├── __pycache__ │ └── createdata.cpython-36.pyc ├── createdata.py ├── cty.dat ├── exampleStan.py ├── exampleStanSolutions.py ├── srrs2.dat ├── stan.pdf └── turnout.csv ├── day12 └── ML1.py ├── day13 ├── FremontHourly.csv ├── ML2.py └── SeaTacWeather.csv ├── day14 └── ML3.py ├── day15 └── ML4.py ├── day16 └── GP1.pdf ├── day17 ├── GP2.py ├── gp-fit.stan ├── gp-pred.stan └── immSurvey.csv ├── day18 ├── NN1.pdf ├── NN2.py ├── mnist.pkl.gz └── wine_data.csv ├── day19 ├── dcgan.gif ├── hello.png ├── image_at_epoch_0001.png ├── image_at_epoch_0002.png ├── image_at_epoch_0003.png ├── image_at_epoch_0004.png ├── image_at_epoch_0005.png ├── image_at_epoch_0006.png ├── image_at_epoch_0007.png ├── image_at_epoch_0008.png ├── image_at_epoch_0009.png ├── image_at_epoch_0010.png ├── image_at_epoch_0011.png ├── image_at_epoch_0012.png ├── image_at_epoch_0013.png ├── image_at_epoch_0014.png ├── image_at_epoch_0015.png ├── image_at_epoch_0016.png ├── image_at_epoch_0017.png ├── image_at_epoch_0018.png ├── image_at_epoch_0019.png ├── image_at_epoch_0020.png ├── image_at_epoch_0021.png ├── image_at_epoch_0022.png ├── image_at_epoch_0023.png ├── image_at_epoch_0024.png ├── image_at_epoch_0025.png ├── image_at_epoch_0026.png ├── image_at_epoch_0027.png ├── image_at_epoch_0028.png ├── image_at_epoch_0029.png ├── image_at_epoch_0030.png ├── image_at_epoch_0031.png ├── image_at_epoch_0032.png ├── image_at_epoch_0033.png ├── image_at_epoch_0034.png ├── image_at_epoch_0035.png ├── image_at_epoch_0036.png ├── image_at_epoch_0037.png ├── image_at_epoch_0038.png ├── image_at_epoch_0039.png ├── image_at_epoch_0040.png ├── image_at_epoch_0041.png ├── image_at_epoch_0042.png ├── image_at_epoch_0043.png ├── image_at_epoch_0044.png ├── image_at_epoch_0045.png ├── image_at_epoch_0046.png ├── image_at_epoch_0047.png ├── image_at_epoch_0048.png ├── image_at_epoch_0049.png ├── image_at_epoch_0050.png ├── photorec.ses └── tf.py ├── day2 ├── INTL450Syntax.pdf ├── lab1.py └── lab1_solutions.py ├── day3 ├── ClassesSlides.pdf ├── clock_lab.py ├── clock_solution.py ├── parent-child.py ├── polymorphism.py └── school.py ├── day4 ├── TestingSlides.pdf ├── __pycache__ │ └── fizzbuzz.cpython-36.pyc ├── exception.py ├── exceptions_example.py ├── fizzbuzz.py ├── fizzbuzz_test.py └── print_integer.py ├── day5 ├── Docket05-1.html ├── csvstuff.py ├── faculty.csv ├── filestuff.py ├── mathofpolitics.csv ├── readfile.txt ├── scrape.py ├── test.csv ├── test1.csv ├── test_with_fields.csv ├── urlparsing.py ├── webcrawler.py ├── whitehouse-petitions.csv ├── whitehouse.py └── writefile.txt ├── day6 ├── econ.csv ├── geo.py ├── tweepy.py ├── twint.py ├── wb.py └── ziya.csv ├── day7 ├── Seattle2014.csv ├── numpy.py └── president_heights.csv ├── day8 ├── pdintro.py ├── state-abbrevs.csv ├── state-areas.csv └── state-population.csv ├── day9 ├── births.csv ├── california_cities.csv └── matplotlib.py └── linearModels └── OLSReviewSlides.pdf /CarlsonPythonSyllabus.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/CarlsonPythonSyllabus.pdf -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # KocPython2020 2 | 3 | # Final project due the 1st of June 4 | -------------------------------------------------------------------------------- /homework/README.md: -------------------------------------------------------------------------------- 1 | # Homework 1 Due before class February 19 2 | 3 | # Homework 2 Due before class March 11 4 | 5 | # Homework 3 Due before class April 22 6 | 7 | # Homework 4 Due end of day May 22 8 | -------------------------------------------------------------------------------- /homework/hw1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/homework/hw1.pdf -------------------------------------------------------------------------------- /homework/hw2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/homework/hw2.pdf -------------------------------------------------------------------------------- /homework/hw3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/homework/hw3.pdf -------------------------------------------------------------------------------- /homework/hw4.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/homework/hw4.pdf -------------------------------------------------------------------------------- /homework/solutions/hw1.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | class Portfolio(object): 4 | def __init__(self): 5 | self.cash = 0.00 6 | self.assets = {"stock" : {}, "mutual funds" : {}, "bonds" : {}} #dictionary will use asset classes as reference to number owned 7 | self.hist = "Portfolio initialized\n" 8 | 9 | def addCash(self, cash): 10 | self.cash += int(100*cash)/100.0 #ensures adding currency compatible numbers 11 | self.hist+="Added $%.2f\n" %(int(100*cash)/100.0) 12 | 13 | def withdrawCash(self, cash): 14 | if cash > self.cash: print("Portfolio does not contain enough cash.") 15 | else: 16 | self.cash -= int(100*cash)/100.0 17 | self.hist+="Withdrew $%.2f\n" %(int(100*cash)/100.0) 18 | 19 | def buyAsset(self, number, asset): 20 | if self.cash < number*asset.price: 21 | print("Portfolio does not contain enough cash.") 22 | return None 23 | self.withdrawCash(number*asset.price) 24 | if asset in self.assets[asset.getClass()]: 25 | self.assets[asset.getClass()][asset]+=number #see below for getClass() 26 | else: self.assets[asset.getClass()][asset] = number 27 | self.hist+="Bought %d of %s named %s\n" % (number, asset.getClass(), asset.name) 28 | 29 | def buyStock(self, number, asset): self.buyAsset(int(number), asset) #same as buyAsset, but enforcing integer purchases 30 | 31 | buyMutualFund = buyBonds = buyAsset #exactly the same as buyAsset 32 | 33 | def sellAsset(self, number, asset): 34 | if asset in self.assets[asset.getClass()]: #check that it's in the portfolio 35 | if self.assets[asset.getClass()][asset] < number: #check that there is enough to sell 36 | print("The portfolio does not contain enough of %s %s" %(asset.name, asset.getClass())) 37 | else: 38 | self.assets[asset.getClass()][asset]-=number 39 | if self.assets[asset.getClass()][asset] == 0: #check if sold all of it - delete key if so 40 | del self.assets[asset.getClass()][asset] 41 | self.addCash(number*asset.SellPrice()) #call function asset.SellPrice to calculate price of asset 42 | self.hist+="Sold %d of %s named %s\n" % (number, asset.getClass(), asset.name) 43 | else: print("The portfolio does not contain %s with name %s" %(asset.getClass(), asset.name)) 44 | 45 | def sellStock(self, number, asset): self.sellAsset(int(number), asset) #enforce integer sales 46 | 47 | sellMutualFund = sellBonds = sellAsset 48 | 49 | def __str__(self): 50 | output = "cash: $%-15.2f\n" %self.cash 51 | for asset in self.assets: 52 | output+= "%s: \n"%asset 53 | if not self.assets[asset]: output+='\tnone\n' 54 | for ast in self.assets[asset]: 55 | output += str(self.assets[asset][ast]).rjust(5) + str(ast.name).rjust(5) + "\n" 56 | return output 57 | 58 | def history(self): print(self.hist) 59 | 60 | class Asset(object): #superclass for stocks, bonds, and mutual funds 61 | def __init__(self, price, name): 62 | self.price = price 63 | self.name = name 64 | 65 | def SellPrice(self): 66 | return int(100*random.uniform(.9*self.price, 1.2*self.price))/100.0 #we'll make bonds and mutual funds sell by the same distribution 67 | 68 | 69 | class Stock(Asset): 70 | def __init__(self, price, name): 71 | Asset.__init__(self, price, name) 72 | 73 | def getClass(self): return "stock" #a simple way to get the class as a string to use for calling the asset dictionary 74 | 75 | def SellPrice(self): 76 | return int(100*random.uniform(.5*self.price, 1.5*self.price))/100.0 #change the distribution for stock sales 77 | 78 | class MutualFund(Asset): 79 | def __init__(self, name): 80 | Asset.__init__(self, 1.0, name) 81 | 82 | def getClass(self): return "mutual funds" 83 | 84 | 85 | class Bonds(Asset): 86 | def __init__(self, price, name): 87 | Asset.__init__(self, price, name) 88 | 89 | def getClass(self): return "bonds" 90 | 91 | 92 | 93 | -------------------------------------------------------------------------------- /homework/solutions/hw1_test.py: -------------------------------------------------------------------------------- 1 | from hw1 import * 2 | import unittest 3 | 4 | class PortfolioTest(unittest.TestCase): 5 | def setUp(self): 6 | self.portfolio = Portfolio() 7 | self.mut1 = MutualFund("MUTA") 8 | self.mut2 = MutualFund("MUTB") 9 | self.stock1 = Stock(25, "STKA") 10 | self.stock2 = Stock(36.52, "STKB") 11 | self.bond1 = Bonds(19, "BNDA") 12 | self.bond2 = Bonds(44, "BNDB") 13 | 14 | def test_an_empty_portfolio(self): 15 | self.assertEqual(0.0, self.portfolio.cash) 16 | self.assertEqual({}, self.portfolio.assets['mutual funds']) 17 | self.assertEqual({}, self.portfolio.assets['bonds']) 18 | self.assertEqual({}, self.portfolio.assets['stock']) 19 | self.assertEqual("Portfolio initialized\n", self.portfolio.hist) 20 | 21 | def test_assets(self): 22 | self.assertEqual(1, self.mut1.price) 23 | self.assertEqual(1, self.mut2.price) 24 | self.assertEqual(25.00, self.stock1.price) 25 | self.assertEqual(36.52, self.stock2.price) 26 | self.assertEqual(19.00, self.bond1.price) 27 | self.assertEqual(44, self.bond2.price) 28 | 29 | self.assertEqual("MUTA", self.mut1.name) 30 | self.assertEqual("MUTB", self.mut2.name) 31 | self.assertEqual("STKA", self.stock1.name) 32 | self.assertEqual("STKB", self.stock2.name) 33 | self.assertEqual("BNDA", self.bond1.name) 34 | self.assertEqual("BNDB", self.bond2.name) 35 | 36 | def test_add_cash(self): 37 | self.portfolio.addCash(30956.45) 38 | self.assertEqual(30956.45, self.portfolio.cash) 39 | self.assertTrue("Added $30956.45" in self.portfolio.hist) 40 | 41 | def test_withdraw_cash(self): 42 | self.portfolio.withdrawCash(345) 43 | self.assertEqual(0, self.portfolio.cash) 44 | 45 | self.portfolio.addCash(30956.45) 46 | self.portfolio.withdrawCash(56.45) 47 | self.assertEqual(30900, self.portfolio.cash) 48 | self.assertTrue("Withdrew $56.45" in self.portfolio.hist) 49 | 50 | def test_buy_stock(self): 51 | self.portfolio.buyStock(100, self.stock1) 52 | self.assertEqual({}, self.portfolio.assets['stock']) 53 | 54 | self.portfolio.addCash(10000) 55 | self.portfolio.buyStock(100, self.stock1) 56 | self.assertEqual({self.stock1: 100}, self.portfolio.assets['stock']) 57 | self.assertEqual(10000-100*25.0, self.portfolio.cash) 58 | self.assertTrue("Bought 100 of stock named STKA" in self.portfolio.hist) 59 | 60 | self.assertTrue(self.stock2 not in self.portfolio.assets['stock']) 61 | 62 | def test_sell_stock(self): 63 | self.portfolio.sellStock(100, self.stock1) 64 | self.assertEqual({}, self.portfolio.assets['stock']) 65 | self.assertEqual(0.0, self.portfolio.cash) 66 | 67 | self.portfolio.addCash(10000) 68 | self.portfolio.buyStock(100, self.stock1) 69 | self.portfolio.sellStock(50, self.stock1) 70 | self.assertEqual(50, self.portfolio.assets['stock'][self.stock1]) 71 | newcash = 10000 - 100*25.0 72 | self.assertTrue(self.portfolio.cash <= newcash +50*25.0*1.5 and 50*25.0*.5 + newcash <= self.portfolio.cash) 73 | self.assertTrue("Sold 50 of stock named STKA" in self.portfolio.hist) 74 | 75 | 76 | def test_buy_mutual_fund(self): 77 | self.portfolio.buyMutualFund(100, self.mut1) 78 | self.assertEqual({}, self.portfolio.assets['mutual funds']) 79 | 80 | self.portfolio.addCash(10000) 81 | self.portfolio.buyMutualFund(100, self.mut1) 82 | self.assertEqual({self.mut1: 100}, self.portfolio.assets['mutual funds']) 83 | self.assertEqual(10000-100, self.portfolio.cash) 84 | self.assertTrue("Bought 100 of mutual funds named MUTA" in self.portfolio.hist) 85 | 86 | self.assertTrue(self.mut2 not in self.portfolio.assets['mutual funds']) 87 | 88 | def test_sell_mutual_fund(self): 89 | self.portfolio.sellMutualFund(100, self.mut1) 90 | self.assertEqual({}, self.portfolio.assets['mutual funds']) 91 | self.assertEqual(0.0, self.portfolio.cash) 92 | 93 | self.portfolio.addCash(10000) 94 | self.portfolio.buyMutualFund(100, self.mut1) 95 | self.portfolio.sellMutualFund(50, self.mut1) 96 | self.assertEqual(50, self.portfolio.assets['mutual funds'][self.mut1]) 97 | newcash = 10000 - 100 98 | self.assertTrue(self.portfolio.cash <= newcash +50*1.2 and 50*.9 + newcash <= self.portfolio.cash) 99 | self.assertTrue("Sold 50 of mutual funds named MUTA" in self.portfolio.hist) 100 | 101 | def test_buy_bonds(self): 102 | self.portfolio.buyBonds(100, self.bond1) 103 | self.assertEqual({}, self.portfolio.assets['bonds']) 104 | 105 | self.portfolio.addCash(10000) 106 | self.portfolio.buyBonds(100, self.bond1) 107 | self.assertEqual({self.bond1: 100}, self.portfolio.assets['bonds']) 108 | self.assertEqual(10000-100*19.0, self.portfolio.cash) 109 | self.assertTrue("Bought 100 of bonds named BNDA" in self.portfolio.hist) 110 | 111 | self.assertTrue(self.bond2 not in self.portfolio.assets['bonds']) 112 | 113 | def test_sell_bonds(self): 114 | self.portfolio.sellBonds(100, self.bond1) 115 | self.assertEqual({}, self.portfolio.assets['bonds']) 116 | self.assertEqual(0.0, self.portfolio.cash) 117 | 118 | self.portfolio.addCash(10000) 119 | self.portfolio.buyBonds(100, self.bond1) 120 | self.portfolio.sellBonds(50, self.bond1) 121 | self.assertEqual(50, self.portfolio.assets['bonds'][self.bond1]) 122 | newcash = 10000 - 100*19.0 123 | self.assertTrue(self.portfolio.cash <= newcash +50*19*1.2 and 50*.9*19 + newcash <= self.portfolio.cash) 124 | self.assertTrue("Sold 50 of bonds named BNDA" in self.portfolio.hist) 125 | 126 | def test_print(self): 127 | self.assertTrue('cash: $' in self.portfolio.__str__()) 128 | self.assertTrue('mutual funds:' in self.portfolio.__str__()) 129 | self.assertTrue('stock:' in self.portfolio.__str__()) 130 | self.assertTrue('bonds:' in self.portfolio.__str__()) 131 | 132 | self.portfolio.addCash(10000) 133 | self.portfolio.buyBonds(100, self.bond1) 134 | self.portfolio.buyStock(5, self.stock1) 135 | self.portfolio.buyMutualFund(10, self.mut1) 136 | self.assertTrue('BNDA' in self.portfolio.__str__()) 137 | self.assertTrue('STKA' in self.portfolio.__str__()) 138 | self.assertTrue('MUTA' in self.portfolio.__str__()) 139 | self.assertTrue('100' in self.portfolio.__str__()) 140 | self.assertTrue('5' in self.portfolio.__str__()) 141 | self.assertTrue('10' in self.portfolio.__str__()) 142 | 143 | 144 | if __name__ == '__main__': 145 | unittest.main() 146 | 147 | -------------------------------------------------------------------------------- /in-classMaterial/day1/INTL450Intro.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day1/INTL450Intro.pdf -------------------------------------------------------------------------------- /in-classMaterial/day10/IntroBayes.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day10/IntroBayes.pdf -------------------------------------------------------------------------------- /in-classMaterial/day11/8schools.stan: -------------------------------------------------------------------------------- 1 | data { 2 | int J; // number of schools 3 | vector[J] y; // estimated treatment effects 4 | vector[J] sigma; // s.e. of effect estimates 5 | } 6 | parameters { 7 | real mu; 8 | real tau; 9 | vector[J] eta; 10 | } 11 | transformed parameters { 12 | vector[J] theta; 13 | theta = mu + tau * eta; 14 | } 15 | model { 16 | eta ~ normal(0, 1); 17 | y ~ normal(theta, sigma); 18 | } 19 | -------------------------------------------------------------------------------- /in-classMaterial/day11/IntroBayesDay2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day11/IntroBayesDay2.pdf -------------------------------------------------------------------------------- /in-classMaterial/day11/__pycache__/createdata.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day11/__pycache__/createdata.cpython-36.pyc -------------------------------------------------------------------------------- /in-classMaterial/day11/createdata.py: -------------------------------------------------------------------------------- 1 | """ 2 | Setup a model and data 3 | """ 4 | 5 | import numpy as np 6 | 7 | # set the true values of the model parameters for creating the data 8 | m = 3.5 # gradient of the line 9 | c = 1.2 # y-intercept of the line 10 | 11 | # set the "predictor variable"/abscissa 12 | M = 100 13 | xmin = 0. 14 | xmax = 10. 15 | stepsize = (xmax-xmin)/M 16 | x = np.arange(xmin, xmax, stepsize) 17 | 18 | # define the model function 19 | def straight_line(x, m, c): 20 | """ 21 | A straight line model: y = m*x + c 22 | 23 | Args: 24 | x (list): a set of abscissa points at which the model is defined 25 | m (float): the gradient of the line 26 | c (float): the y-intercept of the line 27 | """ 28 | 29 | return m*x + c 30 | 31 | # create the data - the model plus Gaussian noise 32 | sigma = 0.5 # standard deviation of the noise 33 | data = straight_line(x, m, c) + sigma*np.random.randn(M) 34 | 35 | -------------------------------------------------------------------------------- /in-classMaterial/day11/stan.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day11/stan.pdf -------------------------------------------------------------------------------- /in-classMaterial/day12/ML1.py: -------------------------------------------------------------------------------- 1 | #Supervised learning: Model relationship between measured features of data and label associated with the data - apply labels to new data - classification and regression 2 | #Unsupervised learning: Clustering and dimensionality reduction 3 | import seaborn as sns 4 | iris = sns.load_dataset('iris') 5 | iris.head() 6 | #rows are samples, number of rows is n_samples 7 | #columns are features, number is n_features 8 | #features matrix - 2D data representation - often stored as X 9 | #NumPy array, Pandas DataFrame, or SciPy sparse matrices 10 | #label or target array - y (dependent variable) 11 | sns.pairplot(iris, hue='species', size=1.5); 12 | 13 | X_iris = iris.drop('species', axis=1) 14 | X_iris.shape 15 | y_iris = iris['species'] 16 | y_iris.shape 17 | 18 | #Estimator API 19 | #Consistency 20 | #All objects share a common interface drawn from a limited set of methods, with consistent documentation. 21 | #Inspection 22 | #All specified parameter values are exposed as public attributes. 23 | #Limited object hierarchy 24 | #Only algorithms are represented by Python classes; datasets are represented in standard formats (NumPy arrays, Pandas DataFrame s, SciPy sparse matrices) and parameter names use standard Python strings. 25 | #Composition 26 | #Many machine learning tasks can be expressed as sequences of more fundamental algorithms, and Scikit-Learn makes use of this wherever possible. 27 | #Sensible defaults 28 | #When models require user-specified parameters, the library defines an appropriate default value 29 | 30 | #1. Choose a class of model by importing the appropriate estimator class from Scikit-Learn. 31 | #2. Choose model hyperparameters by instantiating this class with desired values. 32 | #3. Arrange data into a features matrix and target vector following the discussion from before. 33 | #4. Fit the model to your data by calling the fit() method of the model instance. 34 | #5. Apply the model to new data: 35 | # • For supervised learning, often we predict labels for unknown data using the predict() method. 36 | # • For unsupervised learning, we often transform or infer properties of the data using the transform() or predict() method. 37 | 38 | #Supervised learning: Simple linear regression 39 | import matplotlib.pyplot as plt 40 | import numpy as np 41 | rng = np.random.RandomState(42) 42 | x = 10 * rng.rand(50) 43 | y = 2 * x - 1 + rng.randn(50) 44 | plt.scatter(x, y); 45 | 46 | from sklearn.linear_model import LinearRegression 47 | model = LinearRegression(fit_intercept=True) #storing of hyperparameter values 48 | model 49 | X = x[:, np.newaxis] 50 | X.shape 51 | model.fit(X, y) 52 | model.coef_ 53 | model.intercept_ 54 | #interpreting model parameters is more a statistical modeling question than a machine learning question, but if you want inferences: 55 | import statsmodels.api as sm 56 | ols = sm.OLS(y, X) 57 | ols_result = ols.fit() 58 | # Now you have at your disposition several error estimates, e.g. 59 | ols_result.HC0_se 60 | # and covariance estimates 61 | ols_result.cov_HC0 62 | #confidence intervals 63 | ols_result.conf_int() 64 | #p-values (which may come in handy for pubs, but please do not rely on them for this class!) 65 | ols_result.pvalues 66 | 67 | #prediction 68 | xfit = np.linspace(-1, 11) 69 | Xfit = xfit[:, np.newaxis] 70 | yfit = model.predict(Xfit) 71 | plt.scatter(x, y) 72 | plt.plot(xfit, yfit); 73 | 74 | #TODO: Fit the same linear regression you did for homework 2 using the LinearRegression model, and compare the results 75 | 76 | #evaluate efficacy of model by comparing results to known baseline 77 | #given a model trained on a portion of the Iris data, how well can we predict the remaining labels? 78 | #Because it is so fast and has no hyperparameters to choose, Gaussian naive Bayes is often a good model to use as a baseline classification, before you explore whether improvements can be found through more sophisticated models. 79 | from sklearn.model_selection import train_test_split 80 | Xtrain, Xtest, ytrain, ytest = train_test_split(X_iris, y_iris, 81 | random_state=1) 82 | 83 | from sklearn.naive_bayes import GaussianNB # 1. choose model class 84 | model = GaussianNB() # 2. instantiate model 85 | model.fit(Xtrain, ytrain) # 3. fit model to data 86 | y_model = model.predict(Xtest) # 4. predict on new data 87 | 88 | #accuracy 89 | from sklearn.metrics import accuracy_score 90 | accuracy_score(ytest, y_model) 91 | 92 | #TODO: Using the same homework 2 data, split your data into a train and test sample, and test the accuracy score 93 | 94 | #Unsupervised learning: Dimensionality reduction 95 | #Principal components analysis: fast linear dimensionality reduction technique 96 | from sklearn.decomposition import PCA # 1. Choose the model class 97 | model = PCA(n_components=2) # 2. Instantiate the model with hyperparameters 98 | model.fit(X_iris) # 3. Fit to data. Notice y is not specified! 99 | X_2D = model.transform(X_iris) # 4. Transform the data to two dimensions 100 | 101 | #2D species are well separated, even without labels 102 | iris['PCA1'] = X_2D[:, 0] 103 | iris['PCA2'] = X_2D[:, 1] 104 | sns.lmplot("PCA1", "PCA2", hue='species', data=iris, fit_reg=False); 105 | 106 | #Unsupervised learning: Iris clustering 107 | #Gaussian mixture model: Model data as collection of Gaussian blobs 108 | from sklearn.mixture import GaussianMixture # 1. Choose the model class 109 | model = GaussianMixture(n_components=3, covariance_type='full') # 2. Instantiate the model w/ hyperparameters 110 | model.fit(X_iris) # 3. Fit to data. Notice y is not specified! 111 | y_gmm = model.predict(X_iris) # 4. Determine cluster labels 112 | 113 | #Add cluster label - Automatically identify presence of different groups of species 114 | iris['cluster'] = y_gmm 115 | sns.lmplot("PCA1", "PCA2", data=iris, hue='species', col='cluster', fit_reg=False); 116 | 117 | #Application: Exploring Handwritten Digits 118 | from sklearn.datasets import load_digits 119 | digits = load_digits() 120 | digits.images.shape 121 | 122 | #The images data is a three-dimensional array: 1,797 samples, each consisting of an 8×8 grid of pixels. Let’s visualize the first hundred of these 123 | fig, axes = plt.subplots(10, 10, figsize=(8, 8), subplot_kw={'xticks':[], 'yticks':[]}, gridspec_kw=dict(hspace=0.1, wspace=0.1)) 124 | for i, ax in enumerate(axes.flat): 125 | ax.imshow(digits.images[i], cmap='binary', interpolation='nearest') 126 | ax.text(0.05, 0.05, str(digits.target[i]), transform=ax.transAxes, color='green') 127 | 128 | #Treat each pixel as a feature - flatten out the array so we have length-64 array of pixel values representing each digit 129 | X = digits.data 130 | X.shape 131 | y = digits.target 132 | y.shape 133 | 134 | #Unsupervised learning: Dimensionality reduction - Isomap 135 | from sklearn.manifold import Isomap 136 | iso = Isomap(n_components=2) 137 | iso.fit(digits.data) 138 | data_projected = iso.transform(digits.data) 139 | data_projected.shape 140 | 141 | plt.scatter(data_projected[:, 0], data_projected[:, 1], c=digits.target, edgecolor='none', alpha=0.5, cmap=plt.cm.get_cmap('Spectral', 10)) 142 | plt.colorbar(label='digit label', ticks=range(10)) 143 | plt.clim(-0.5, 9.5); 144 | #generally good separation in parameter space 145 | 146 | #classification 147 | Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, random_state=0) 148 | #Gaussian naive Bayes 149 | from sklearn.naive_bayes import GaussianNB 150 | model = GaussianNB() 151 | model.fit(Xtrain, ytrain) 152 | y_model = model.predict(Xtest) 153 | 154 | accuracy_score(ytest, y_model) #good considering the simplicity of the model 155 | #where did we go wrong? Confusion matrix shows frequency of misclassification 156 | from sklearn.metrics import confusion_matrix 157 | mat = confusion_matrix(ytest, y_model) 158 | sns.heatmap(mat, square=True, annot=True, cbar=False) 159 | plt.xlabel('predicted value') 160 | plt.ylabel('true value'); 161 | #plot inputs with predicted labels 162 | fig, axes = plt.subplots(10, 10, figsize=(8, 8), subplot_kw={'xticks':[], 'yticks':[]}, gridspec_kw=dict(hspace=0.1, wspace=0.1)) 163 | for i, ax in enumerate(axes.flat): 164 | ax.imshow(digits.images[i], cmap='binary', interpolation='nearest') 165 | ax.text(0.05, 0.05, str(y_model[i]), transform=ax.transAxes, color='green' if (ytest[i] == y_model[i]) else 'red') 166 | 167 | 168 | #to make an informed choice, we need a way to validate that our model and our hyperparameters are a good fit to the data 169 | #Model validation the wrong way 170 | from sklearn.datasets import load_iris 171 | iris = load_iris() 172 | X = iris.data 173 | y = iris.target 174 | #Here we'll use a k-neighbors classifier with n_neighbors=1 . This is a very simple and intuitive model that says 'the label of an unknown point is the same as the label of its closest training point' 175 | from sklearn.neighbors import KNeighborsClassifier 176 | model = KNeighborsClassifier(n_neighbors=1) 177 | #Then we train the model, and use it to predict labels for data we already know 178 | model.fit(X, y) 179 | y_model = model.predict(X) 180 | accuracy_score(y, y_model) 181 | 182 | #Model validation the right way: Holdout sets 183 | # split the data with 50% in each set 184 | X1, X2, y1, y2 = train_test_split(X, y, random_state=0, train_size=0.5, test_size=0.5) 185 | # fit the model on one set of data 186 | model.fit(X1, y1) 187 | # evaluate the model on the second set of data 188 | y2_model = model.predict(X2) 189 | accuracy_score(y2, y2_model) 190 | 191 | #Model validation via cross-validation 192 | y2_model = model.fit(X1, y1).predict(X2) 193 | y1_model = model.fit(X2, y2).predict(X1) 194 | accuracy_score(y1, y1_model), accuracy_score(y2, y2_model) 195 | #more than 2 sets 196 | from sklearn.model_selection import cross_val_score 197 | cross_val_score(model, X, y, cv=5) 198 | 199 | #loo 200 | from sklearn.model_selection import LeaveOneOut 201 | scores = cross_val_score(model, X, y, cv=LeaveOneOut(len(X))) 202 | scores 203 | 204 | scores.mean() 205 | 206 | #Selecting the Best Model 207 | # Use a more complicated/more flexible model 208 | # Use a less complicated/less flexible model 209 | # Gather more training samples 210 | # Gather more data to add features to each sample 211 | 212 | #The bias-variance trade-off 213 | #High-bias model: Underfits the data 214 | #High-variance model: Overfits the data 215 | #For high-bias models, the performance of the model on the validation set is similar to the performance on the training set. 216 | #For high-variance models, the performance of the model on the validation set is far worse than the performance on the training set. 217 | 218 | #The training score is everywhere higher than the validation score. This is generally the case: the model will be a better fit to data it has seen than to data it has not seen. 219 | #For very low model complexity (a high-bias model), the training data is underfit, which means that the model is a poor predictor both for the training data and for any previously unseen data. 220 | #For very high model complexity (a high-variance model), the training data is overfit, which means that the model predicts the training data very well, but fails for any previously unseen data. 221 | #For some intermediate value, the validation curve has a maximum. This level of complexity indicates a suitable trade-off between bias and variance. 222 | 223 | #Validation curves 224 | from sklearn.preprocessing import PolynomialFeatures 225 | from sklearn.linear_model import LinearRegression 226 | from sklearn.pipeline import make_pipeline 227 | def PolynomialRegression(degree=2, **kwargs): 228 | return make_pipeline(PolynomialFeatures(degree), LinearRegression(**kwargs)) 229 | 230 | def make_data(N, err=1.0, rseed=1): 231 | # randomly sample the data 232 | rng = np.random.RandomState(rseed) 233 | X = rng.rand(N, 1) ** 2 234 | y = 10 - 1. / (X.ravel() + 0.1) 235 | if err > 0: 236 | y += err * rng.randn(N) 237 | return X, y 238 | 239 | X, y = make_data(40) 240 | 241 | import seaborn; seaborn.set() # plot formatting 242 | X_test = np.linspace(-0.1, 1.1, 500)[:, None] 243 | plt.scatter(X.ravel(), y, color='black') 244 | axis = plt.axis() 245 | for degree in [1, 3, 5]: 246 | y_test = PolynomialRegression(degree).fit(X, y).predict(X_test) 247 | plt.plot(X_test.ravel(), y_test, label='degree={0}'.format(degree)) 248 | plt.xlim(-0.1, 1.0) 249 | plt.ylim(-2, 12) 250 | plt.legend(loc='best'); 251 | #degree of polynomial is knob controlling model complexity 252 | 253 | from sklearn.model_selection import validation_curve 254 | degree = np.arange(0, 21) 255 | train_score, val_score = validation_curve(PolynomialRegression(), X, y, 'polynomialfeatures__degree', degree, cv=7) 256 | plt.plot(degree, np.median(train_score, 1), color='blue', label='training score') 257 | plt.plot(degree, np.median(val_score, 1), color='red', label='validation score') 258 | plt.legend(loc='best') 259 | plt.ylim(0, 1) 260 | plt.xlabel('degree') 261 | plt.ylabel('score'); 262 | 263 | plt.scatter(X.ravel(), y) 264 | lim = plt.axis() 265 | y_test = PolynomialRegression(3).fit(X, y).predict(X_test) 266 | plt.plot(X_test.ravel(), y_test); 267 | plt.axis(lim); 268 | 269 | #TODO: Again using the same data you used for homework 2, fit polynomials and determine the optimal degree to use 270 | 271 | #Optimal model will generally depend on size of training data 272 | X2, y2 = make_data(200) 273 | plt.scatter(X2.ravel(), y2); 274 | 275 | degree = np.arange(21) 276 | train_score2, val_score2 = validation_curve(PolynomialRegression(), X2, y2, 277 | 'polynomialfeatures__degree', 278 | degree, cv=7) 279 | plt.plot(degree, np.median(train_score2, 1), color='blue', 280 | label='training score') 281 | plt.plot(degree, np.median(val_score2, 1), color='red', label='validation score') 282 | plt.plot(degree, np.median(train_score, 1), color='blue', alpha=0.3, 283 | linestyle='dashed') 284 | plt.plot(degree, np.median(val_score, 1), color='red', alpha=0.3, 285 | linestyle='dashed') 286 | plt.legend(loc='lower center') 287 | plt.ylim(0, 1) 288 | plt.xlabel('degree') 289 | plt.ylabel('score'); 290 | #behavior of validation curve has two important inputs: complexity and number of training points 291 | 292 | #plot of the training/validation score with respect to the size of the training set is known as a learning curve 293 | #A model of a given complexity will overfit a small dataset: this means the training score will be relatively high, while the validation score will be relatively low. 294 | #A model of a given complexity will underfit a large dataset: this means that the training score will decrease, but the validation score will increase. 295 | #A model will never, except by chance, give a better score to the validation set than the training set: this means the curves should keep getting closer together but never cross. 296 | 297 | #The notable feature of the learning curve is the convergence to a particular score as the number of training samples grows. In particular, once you have enough points that a particular model has converged, adding more training data will not help you! The only way to increase model performance in this case is to use another (often more complex) model. 298 | 299 | from sklearn.model_selection import learning_curve 300 | fig, ax = plt.subplots(1, 2, figsize=(16, 6)) 301 | fig.subplots_adjust(left=0.0625, right=0.95, wspace=0.1) 302 | for i, degree in enumerate([2, 9]): 303 | N, train_lc, val_lc = learning_curve(PolynomialRegression(degree), X, y, cv=7, train_sizes=np.linspace(0.3, 1, 25)) 304 | ax[i].plot(N, np.mean(train_lc, 1), color='blue', label='training score') 305 | ax[i].plot(N, np.mean(val_lc, 1), color='red', label='validation score') 306 | ax[i].hlines(np.mean([train_lc[-1], val_lc[-1]]), N[0], N[-1], color='gray', linestyle='dashed') 307 | ax[i].set_ylim(0, 1) 308 | ax[i].set_xlim(N[0], N[-1]) 309 | ax[i].set_xlabel('training size') 310 | ax[i].set_ylabel('score') 311 | ax[i].set_title('degree = {0}'.format(degree), size=14) 312 | ax[i].legend(loc='best') 313 | 314 | 315 | #In practice, models generally have more than one knob to turn, and thus plots of validation and learning curves change from lines to multidimensional surfaces. In these cases, such visualizations are difficult and we would rather simply find the particular model that maximizes the validation score. 316 | 317 | #We will explore a three-dimensional grid of model features—namely, the polynomial degree, the flag telling us whether to fit the intercept, and the flag telling us whether to normalize the problem 318 | 319 | from sklearn.model_selection import GridSearchCV 320 | param_grid = {'polynomialfeatures__degree': np.arange(21), 321 | 'linearregression__fit_intercept': [True, False], 322 | 'linearregression__normalize': [True, False]} 323 | grid = GridSearchCV(PolynomialRegression(), param_grid, cv=7) 324 | grid.fit(X, y); 325 | grid.best_params_ 326 | 327 | #with normalize == True, why is fit_intercept == False? 328 | 329 | model = grid.best_estimator_ 330 | 331 | plt.scatter(X.ravel(), y) 332 | lim = plt.axis() 333 | y_test = model.fit(X, y).predict(X_test) 334 | plt.plot(X_test.ravel(), y_test); 335 | plt.axis(lim); 336 | 337 | 338 | #Feature engineering 339 | #one of the more important steps in using machine learning in practice is feature engineering—that is, taking whatever information you have about your problem and turning it into numbers that you can use to build your feature matrix 340 | 341 | #categorical features 342 | data = [ 343 | {'price': 850000, 'rooms': 4, 'neighborhood': 'Queen Anne'}, 344 | {'price': 700000, 'rooms': 3, 'neighborhood': 'Fremont'}, 345 | {'price': 650000, 'rooms': 3, 'neighborhood': 'Wallingford'}, 346 | {'price': 600000, 'rooms': 2, 'neighborhood': 'Fremont'} 347 | ] 348 | #one-hot encoding; extra columns indicating the presence or absence of a category with a value of 1 or 0 349 | from sklearn.feature_extraction import DictVectorizer 350 | vec = DictVectorizer(sparse=False, dtype=int) 351 | vec.fit_transform(data) #notice it is in alphabetical order 352 | 353 | vec.get_feature_names() 354 | 355 | #if your category has many possible values, this can greatly increase the size of your dataset. However, because the encoded data contains mostly zeros, a sparse output can be a very efficient solution 356 | vec = DictVectorizer(sparse=True, dtype=int) 357 | vec.fit_transform(data) 358 | 359 | 360 | #text features 361 | #word counts 362 | sample = ['problem of evil', 363 | 'evil queen', 364 | 'horizon problem'] 365 | from sklearn.feature_extraction.text import CountVectorizer 366 | vec = CountVectorizer() 367 | X = vec.fit_transform(sample) 368 | X 369 | 370 | import pandas as pd 371 | pd.DataFrame(X.toarray(), columns=vec.get_feature_names()) 372 | 373 | #down-weighting frequent words; term frequency–inverse document frequency (TF–IDF), which weights the word counts by a measure of how often they appear in the documents 374 | from sklearn.feature_extraction.text import TfidfVectorizer 375 | vec = TfidfVectorizer() 376 | X = vec.fit_transform(sample) 377 | pd.DataFrame(X.toarray(), columns=vec.get_feature_names()) 378 | 379 | #if interested in image feature extraction, see SciKit-Image project 380 | 381 | #derived features: transforming input - basis function regression 382 | x = np.array([1, 2, 3, 4, 5]) 383 | y = np.array([4, 2, 1, 3, 7]) 384 | plt.scatter(x, y); 385 | 386 | from sklearn.linear_model import LinearRegression 387 | X = x[:, np.newaxis] 388 | model = LinearRegression().fit(X, y) 389 | yfit = model.predict(X) 390 | plt.scatter(x, y) 391 | plt.plot(x, yfit); 392 | 393 | from sklearn.preprocessing import PolynomialFeatures 394 | poly = PolynomialFeatures(degree=3, include_bias=False) 395 | X2 = poly.fit_transform(X) 396 | print(X2) 397 | 398 | model = LinearRegression().fit(X2, y) 399 | yfit = model.predict(X2) 400 | plt.scatter(x, y) 401 | plt.plot(x, yfit); 402 | 403 | #TODO: Do the same as above, fitting a polynomial to your data, but use this PolynomialFeatures method instead 404 | 405 | #imputation of missing data 406 | from numpy import nan 407 | X = np.array([[ nan, 0, 3], 408 | [ 3, 7, 9], 409 | [ 3, 5, 2], 410 | [ 4, nan, 6], 411 | [ 8, 8, 1]]) 412 | y = np.array([14, 16, -1, 8, -5]) 413 | #simply use the mean (also can use median or most_frequent value) 414 | from sklearn.preprocessing import Imputer 415 | imp = Imputer(strategy='mean') 416 | X2 = imp.fit_transform(X) 417 | X2 418 | 419 | model = LinearRegression().fit(X2, y) 420 | model.predict(X2) 421 | #if missingness is problematic, consider MICE 422 | 423 | #TODO: Fill in missing values in your data using different methods, and see if your substantive results change when modeling 424 | 425 | #feature pipelines - suppose we want to: 426 | #1. Impute missing values using the mean 427 | #2. Transform features to quadratic 428 | #3. Fit a linear regression 429 | from sklearn.pipeline import make_pipeline 430 | model = make_pipeline(Imputer(strategy='mean'), 431 | PolynomialFeatures(degree=2), 432 | LinearRegression()) 433 | 434 | model.fit(X, y) # X with missing values, from above 435 | print(y) 436 | print(model.predict(X)) 437 | 438 | 439 | 440 | 441 | 442 | -------------------------------------------------------------------------------- /in-classMaterial/day13/ML2.py: -------------------------------------------------------------------------------- 1 | #Naive Bayes classification 2 | #fast and simple classification algorithm 3 | #P(L|features) = P(features|L)P(L)/P(features) 4 | # -> P(L1|features)/P(L2|features) = P(features|L1)P(L1)/[P(features|L2)P(L2)] 5 | #generative model: specifies the hypothetical random process that generates data 6 | 7 | import numpy as np 8 | import matplotlib.pyplot as plt 9 | import seaborn as sns; sns.set() 10 | 11 | #Gaussian naive Bayes: data from each label is drawn from simple Gaussian distribution 12 | 13 | from sklearn.datasets import make_blobs 14 | X, y = make_blobs(100, 2, centers=2, random_state=2, cluster_std=1.5) 15 | plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='RdBu'); 16 | 17 | #find mean and standard deviation of points within a label, which defines the distribution 18 | #can then compute posterior ratio for given point 19 | 20 | from sklearn.naive_bayes import GaussianNB 21 | model = GaussianNB() 22 | model.fit(X, y); 23 | 24 | rng = np.random.RandomState(0) 25 | Xnew = [-6, -14] + [14, 18] * rng.rand(2000, 2) 26 | ynew = model.predict(Xnew) 27 | 28 | plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='RdBu') 29 | lim = plt.axis() 30 | plt.scatter(Xnew[:, 0], Xnew[:, 1], c=ynew, s=20, cmap='RdBu', alpha=0.1) 31 | plt.axis(lim); 32 | 33 | #in general, boundary in Gaussian naive Bayes is quadratic 34 | #allows for probabilistic classification 35 | 36 | yprob = model.predict_proba(Xnew) 37 | yprob[-8:].round(2) 38 | 39 | #Multinomial naive Bayes 40 | #features assumed to be generated from simple multinomial distribution (prob of observing counts among a number of categories - most useful when features that represent counts or rates) 41 | 42 | from sklearn.datasets import fetch_20newsgroups 43 | data = fetch_20newsgroups() 44 | data.target_names 45 | 46 | #select a few categories for simplicity 47 | categories = ['talk.religion.misc', 'soc.religion.christian', 'sci.space', 48 | 'comp.graphics'] 49 | train = fetch_20newsgroups(subset='train', categories=categories) 50 | test = fetch_20newsgroups(subset='test', categories=categories) 51 | print(train.data[5]) 52 | 53 | #TF-IDF vectorizer 54 | from sklearn.feature_extraction.text import TfidfVectorizer 55 | from sklearn.naive_bayes import MultinomialNB 56 | from sklearn.pipeline import make_pipeline 57 | 58 | #create a pipeline 59 | model = make_pipeline(TfidfVectorizer(), MultinomialNB()) 60 | 61 | #fit the model and predict 62 | model.fit(train.data, train.target) 63 | labels = model.predict(test.data) 64 | 65 | #confusion matrix 66 | from sklearn.metrics import confusion_matrix 67 | mat = confusion_matrix(test.target, labels) 68 | sns.heatmap(mat.T, square=True, annot=True, fmt='d', cbar=False, 69 | xticklabels=train.target_names, yticklabels=train.target_names) 70 | plt.xlabel('true label') 71 | plt.ylabel('predicted label'); 72 | 73 | #prediction for a single string 74 | def predict_category(s, train=train, model=model): 75 | pred = model.predict([s]) 76 | return train.target_names[pred[0]] 77 | 78 | predict_category('sending a payload to the ISS') 79 | predict_category('discussing islam vs atheism') 80 | predict_category('determining the screen resolution') 81 | 82 | #naive Bayes is usually out-performed by more complicated models, but 83 | # They are extremely fast for both training and prediction 84 | # They provide straightforward probabilistic prediction 85 | # They are often very easily interpretable 86 | # They have very few (if any) tunable parameters 87 | 88 | #Works well when: 89 | # When the naive assumptions actually match the data (very rare in practice) 90 | # For very well-separated categories, when model complexity is less important 91 | # For very high-dimensional data, when model complexity is less important 92 | 93 | #Linear regression 94 | #good starting point for regression tasks 95 | 96 | import numpy as np 97 | 98 | rng = np.random.RandomState(1) 99 | x = 10 * rng.rand(50) 100 | y = 2 * x - 5 + rng.randn(50) 101 | plt.scatter(x, y); 102 | 103 | from sklearn.linear_model import LinearRegression 104 | model = LinearRegression(fit_intercept=True) 105 | model.fit(x[:, np.newaxis], y) 106 | xfit = np.linspace(0, 10, 1000) 107 | yfit = model.predict(xfit[:, np.newaxis]) 108 | plt.scatter(x, y) 109 | plt.plot(xfit, yfit); 110 | 111 | print("Model slope:", model.coef_[0]) 112 | print("Model intercept:", model.intercept_) 113 | 114 | #multidimensional 115 | rng = np.random.RandomState(1) 116 | X = 10 * rng.rand(100, 3) 117 | y = 0.5 + np.dot(X, [1.5, -2., 1.]) 118 | model.fit(X, y) 119 | print(model.intercept_) 120 | print(model.coef_) 121 | 122 | #basis functions - transform the data through a function 123 | 124 | #polynomial basis functions 125 | from sklearn.preprocessing import PolynomialFeatures 126 | x = np.array([2, 3, 4]) 127 | poly = PolynomialFeatures(3, include_bias=False) 128 | poly.fit_transform(x[:, None]) 129 | 130 | from sklearn.pipeline import make_pipeline 131 | poly_model = make_pipeline(PolynomialFeatures(7), 132 | LinearRegression()) 133 | 134 | rng = np.random.RandomState(1) 135 | x = 10 * rng.rand(50) 136 | y = np.sin(x) + 0.1 * rng.randn(50) 137 | poly_model.fit(x[:, np.newaxis], y) 138 | yfit = poly_model.predict(xfit[:, np.newaxis]) 139 | plt.scatter(x, y) 140 | plt.plot(xfit, yfit); 141 | 142 | #Gaussian basis functions 143 | from sklearn.base import BaseEstimator, TransformerMixin 144 | 145 | class GaussianFeatures(BaseEstimator, TransformerMixin): 146 | """Uniformly spaced Gaussian features for one-dimensional input""" 147 | def __init__(self, N, width_factor=2.0): 148 | self.N = N 149 | self.width_factor = width_factor 150 | 151 | @staticmethod 152 | def _gauss_basis(x, y, width, axis=None): 153 | arg = (x - y) / width 154 | return np.exp(-0.5 * np.sum(arg ** 2, axis)) 155 | 156 | def fit(self, X, y=None): 157 | # create N centers spread along the data range 158 | self.centers_ = np.linspace(X.min(), X.max(), self.N) 159 | self.width_ = self.width_factor * (self.centers_[1] - self.centers_[0]) 160 | return self 161 | 162 | def transform(self, X): 163 | return self._gauss_basis(X[:, :, np.newaxis], self.centers_, self.width_, axis=1) 164 | 165 | gauss_model = make_pipeline(GaussianFeatures(20), LinearRegression()) 166 | gauss_model.fit(x[:, np.newaxis], y) 167 | yfit = gauss_model.predict(xfit[:, np.newaxis]) 168 | plt.scatter(x, y) 169 | plt.plot(xfit, yfit) 170 | plt.xlim(0, 10); 171 | 172 | #regularization 173 | #if use too many basis functions leads to overfitting 174 | model = make_pipeline(GaussianFeatures(30), 175 | LinearRegression()) 176 | model.fit(x[:, np.newaxis], y) 177 | plt.scatter(x, y) 178 | plt.plot(xfit, model.predict(xfit[:, np.newaxis])) 179 | plt.xlim(0, 10) 180 | plt.ylim(-1.5, 1.5); 181 | 182 | #plot the coefficients of Gaussian bases with respect to their location 183 | def basis_plot(model, title=None): 184 | fig, ax = plt.subplots(2, sharex=True) 185 | model.fit(x[:, np.newaxis], y) 186 | ax[0].scatter(x, y) 187 | ax[0].plot(xfit, model.predict(xfit[:, np.newaxis])) 188 | ax[0].set(xlabel='x', ylabel='y', ylim=(-1.5, 1.5)) 189 | 190 | if title: 191 | ax[0].set_title(title) 192 | 193 | ax[1].plot(model.steps[0][1].centers_, model.steps[1][1].coef_) 194 | ax[1].set(xlabel='basis location', ylabel='coefficient', xlim=(0, 10)) 195 | 196 | model = make_pipeline(GaussianFeatures(30), LinearRegression()) 197 | basis_plot(model) 198 | 199 | #This is typical overfitting behavior when basis functions overlap: the coefficients of adjacent basis functions blow up and cancel each other out. We know that such behavior is problematic, and it would be nice if we could limit such spikes explicitly in the model by penalizing large values of the model parameters. Such a penalty is known as regularization, and comes in several forms. 200 | 201 | #Ridge regression (L2 regularization) 202 | #penalizing sum of squares of model coefficients 203 | #P = α∑ θ^2_n 204 | #where α is a free parameter that controls the strength of the penalty 205 | from sklearn.linear_model import Ridge 206 | model = make_pipeline(GaussianFeatures(30), Ridge(alpha=0.1)) 207 | basis_plot(model, title='Ridge Regression') 208 | 209 | #as alpha goes to zero, recover standard linear regression, as it goes to infinity, all model responses will be suppressed 210 | #ridge regression is very efficient computationally 211 | 212 | #Lasso regularization (L1) 213 | #P = α∑ |θ_n| 214 | #due to geometric reasons lasso regression tends to favor sparse models where possible; that is, it preferentially sets model coefficients to exactly zero 215 | from sklearn.linear_model import Lasso 216 | model = make_pipeline(GaussianFeatures(30), Lasso(alpha=0.01)) 217 | basis_plot(model, title='Lasso Regression') 218 | 219 | 220 | 221 | import pandas as pd 222 | #read in data 223 | counts = pd.read_csv('KocPython2020/in-classMaterial/day13/FremontHourly.csv', index_col='Date', parse_dates=True) 224 | weather = pd.read_csv('KocPython2020/in-classMaterial/day13/SeaTacWeather.csv', index_col='DATE', parse_dates=True) 225 | 226 | #get totals in day 227 | daily = counts.resample('d').sum() 228 | daily['Total'] = daily.sum(axis=1) 229 | daily = daily[['Total']] # remove other columns 230 | days = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] 231 | for i in range(7): 232 | daily[days[i]] = (daily.index.dayofweek == i).astype(float) 233 | 234 | #include holidays 235 | from pandas.tseries.holiday import USFederalHolidayCalendar 236 | cal = USFederalHolidayCalendar() 237 | holidays = cal.holidays('2012', '2016') 238 | daily = daily.join(pd.Series(1, index=holidays, name='holiday')) 239 | daily['holiday'].fillna(0, inplace=True) 240 | 241 | #hours of daylight 242 | def hours_of_daylight(date, axis=23.44, latitude=47.61): 243 | """Compute the hours of daylight for the given date""" 244 | days = (date - pd.datetime(2000, 12, 21)).days 245 | m = (1. - np.tan(np.radians(latitude)) * np.tan(np.radians(axis) * np.cos(days * 2 * np.pi / 365.25))) 246 | return 24. * np.degrees(np.arccos(1 - np.clip(m, 0, 2))) / 180. 247 | 248 | #plot sunlight 249 | daily['daylight_hrs'] = list(map(hours_of_daylight, daily.index)) 250 | daily[['daylight_hrs']].plot(); 251 | 252 | # temperatures are in 1/10 deg C; convert to C 253 | weather['TMIN'] /= 10 254 | weather['TMAX'] /= 10 255 | weather['Temp (C)'] = 0.5 * (weather['TMIN'] + weather['TMAX']) 256 | # precip is in 1/10 mm; convert to inches 257 | weather['PRCP'] /= 254 258 | weather['dry day'] = (weather['PRCP'] == 0).astype(int) 259 | daily = daily.join(weather[['PRCP', 'Temp (C)', 'dry day']]) 260 | 261 | daily['annual'] = (daily.index - daily.index[0]).days / 365. 262 | 263 | daily.head() 264 | 265 | #linear regression 266 | column_names = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun', 'holiday', 267 | 'daylight_hrs', 'PRCP', 'dry day', 'Temp (C)', 'annual'] 268 | X = daily[column_names] 269 | y = daily['Total'] 270 | model = LinearRegression(fit_intercept=False) #why do we set intercept to false? 271 | model.fit(X, y) 272 | daily['predicted'] = model.predict(X) 273 | 274 | daily[['Total', 'predicted']].plot(alpha=0.5); 275 | 276 | #how much does each feature contribute? 277 | params = pd.Series(model.coef_, index=X.columns) 278 | params 279 | 280 | #measure uncertainty through bootstrap 281 | from sklearn.utils import resample 282 | np.random.seed(1) 283 | err = np.std([model.fit(*resample(X, y)).coef_ for i in range(1000)], 0) 284 | 285 | print(pd.DataFrame({'effect': params.round(0), 'error': err.round(0)})) 286 | 287 | #Our model is almost certainly missing some relevant information. For example, non‐linear effects (such as effects of precipitation and cold temperature) and nonlinear trends within each variable (such as disinclination to ride at very cold and very hot temperatures) cannot be accounted for in this model. Additionally, we have thrown away some of the finer-grained information (such as the difference between a rainy morning and a rainy afternoon), and we have ignored correlations between days (such as the possible effect of a rainy Tuesday on Wednesday’s numbers, or the effect of an unexpected sunny day after a streak of rainy days) 288 | 289 | 290 | #Support vector machines (SVMs) 291 | #both classification and regression 292 | import numpy as np 293 | import matplotlib.pyplot as plt 294 | from scipy import stats 295 | # use Seaborn plotting defaults 296 | import seaborn as sns; sns.set() 297 | 298 | #discriminative classification: rather than modeling each class, we simply find a line or curve (in two dimensions) or manifold (in multiple dimensions) that divides the classes from each other 299 | 300 | from sklearn.datasets.samples_generator import make_blobs 301 | X, y = make_blobs(n_samples=50, centers=2, 302 | random_state=0, cluster_std=0.60) 303 | plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='autumn'); 304 | 305 | #multiple lines can discriminate 306 | xfit = np.linspace(-1, 3.5) 307 | plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='autumn') 308 | plt.plot([0.6], [2.1], 'x', color='red', markeredgewidth=2, markersize=10) #one marked x will vary in prediction based on line choice 309 | for m, b in [(1, 0.65), (0.5, 1.6), (-0.2, 2.9)]: 310 | plt.plot(xfit, m * xfit + b, '-k') 311 | 312 | plt.xlim(-1, 3.5); 313 | 314 | #rather than simply drawing a zero-width line between the classes, we can draw around each line a margin of some width, up to the nearest point 315 | 316 | xfit = np.linspace(-1, 3.5) 317 | plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='autumn') 318 | 319 | for m, b, d in [(1, 0.65, 0.33), (0.5, 1.6, 0.55), (-0.2, 2.9, 0.2)]: 320 | yfit = m * xfit + b 321 | plt.plot(xfit, yfit, '-k') 322 | plt.fill_between(xfit, yfit - d, yfit + d, edgecolor='none', color='#AAAAAA', alpha=0.4) 323 | 324 | plt.xlim(-1, 3.5); 325 | 326 | #In support vector machines, the line that maximizes this margin is the one we will choose as the optimal model. Support vector machines are an example of such a maximum margin estimator 327 | 328 | from sklearn.svm import SVC # "Support vector classifier" 329 | model = SVC(kernel='linear', C=1E10) 330 | model.fit(X, y) 331 | 332 | #To better visualize what’s happening here, let’s create a quick convenience function that will plot SVM decision boundaries for us 333 | 334 | def plot_svc_decision_function(model, ax=None, plot_support=True): 335 | """Plot the decision function for a two-dimensional SVC""" 336 | if ax is None: 337 | ax = plt.gca() 338 | xlim = ax.get_xlim() 339 | ylim = ax.get_ylim() 340 | 341 | # create grid to evaluate model 342 | x = np.linspace(xlim[0], xlim[1], 30) 343 | y = np.linspace(ylim[0], ylim[1], 30) 344 | Y, X = np.meshgrid(y, x) 345 | xy = np.vstack([X.ravel(), Y.ravel()]).T 346 | P = model.decision_function(xy).reshape(X.shape) 347 | 348 | # plot decision boundary and margins 349 | ax.contour(X, Y, P, colors='k', levels=[-1, 0, 1], alpha=0.5, linestyles=['--', '-', '--']) 350 | 351 | # plot support vectors 352 | if plot_support: 353 | ax.scatter(model.support_vectors_[:, 0], model.support_vectors_[:, 1], s=300, linewidth=1, facecolors='none'); 354 | ax.set_xlim(xlim) 355 | ax.set_ylim(ylim) 356 | 357 | plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='autumn') 358 | plot_svc_decision_function(model); 359 | 360 | #points touching line are the pivotal elements of this fit, and are known as the support vectors, and give the algorithm its name 361 | model.support_vectors_ 362 | #notice that these points define the model, and new points will not necessarily change it 363 | 364 | 365 | #Beyond linear boundaries: Kernel SVM 366 | from sklearn.datasets.samples_generator import make_circles 367 | X, y = make_circles(100, factor=.1, noise=.1) 368 | clf = SVC(kernel='linear').fit(X, y) 369 | plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='autumn') 370 | plot_svc_decision_function(clf, plot_support=False); 371 | 372 | #radial basis function 373 | r = np.exp(-(X ** 2).sum(1)) 374 | #makes data trivially linear 375 | 376 | 377 | #One strategy to this end is to compute a basis function centered at every point in the dataset, and let the SVM algorithm sift through the results. This type of basis function transformation is known as a kernel transformation, as it is based on a similarity relationship (or kernel) between each pair of points 378 | 379 | #kernel trick, a fit on kernel-transformed data can be done implicitly-that is, without ever building the full N - dimensional representation of the kernel projection 380 | 381 | clf = SVC(kernel='rbf', C=1E6) 382 | clf.fit(X, y) 383 | 384 | plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='autumn') 385 | plot_svc_decision_function(clf) 386 | plt.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1], 387 | s=300, lw=1, facecolors='none'); 388 | 389 | 390 | #Tuning the SVM: Softening margins 391 | #messy data 392 | X, y = make_blobs(n_samples=100, centers=2, 393 | random_state=0, cluster_std=1.2) 394 | plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='autumn'); 395 | 396 | #The hardness of the margin is controlled by a tuning parameter, most often known as C . For very large C , the margin is hard, and points cannot lie in it. For smaller C , the margin is softer, and can grow to encompass some points. 397 | X, y = make_blobs(n_samples=100, centers=2, random_state=0, cluster_std=0.8) 398 | fig, ax = plt.subplots(1, 2, figsize=(16, 6)) 399 | fig.subplots_adjust(left=0.0625, right=0.95, wspace=0.1) 400 | 401 | for axi, C in zip(ax, [10.0, 0.1]): 402 | model = SVC(kernel='linear', C=C).fit(X, y) 403 | axi.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='autumn') 404 | plot_svc_decision_function(model, axi) 405 | axi.scatter(model.support_vectors_[:, 0], model.support_vectors_[:, 1], s=300, lw=1, facecolors='none'); 406 | axi.set_title('C = {0:.1f}'.format(C), size=14) 407 | 408 | #optimum value of C needs to be tuned through cross-validation 409 | 410 | 411 | 412 | #example: face detection 413 | from sklearn.datasets import fetch_lfw_people 414 | faces = fetch_lfw_people(min_faces_per_person=60) 415 | print(faces.target_names) 416 | print(faces.images.shape) 417 | 418 | fig, ax = plt.subplots(3, 5) 419 | for i, axi in enumerate(ax.flat): 420 | axi.imshow(faces.images[i], cmap='bone') 421 | axi.set(xticks=[], yticks=[], xlabel=faces.target_names[faces.target[i]]) 422 | 423 | 424 | #use PCA to lower the dimensionality 425 | from sklearn.svm import SVC 426 | from sklearn.decomposition import PCA 427 | from sklearn.pipeline import make_pipeline 428 | pca = PCA(n_components=150, whiten=True, random_state=42, svd_solver = 'randomized') 429 | svc = SVC(kernel='rbf', class_weight='balanced') 430 | model = make_pipeline(pca, svc) 431 | 432 | #split the data 433 | from sklearn.model_selection import train_test_split 434 | Xtrain, Xtest, ytrain, ytest = train_test_split(faces.data, faces.target, random_state=42) 435 | 436 | #grid search - CV to emplore parameters C (margin of hardness) and gamma (size of radial basis function kernel) 437 | from sklearn.grid_search import GridSearchCV 438 | param_grid = {'svc__C': [1, 5, 10, 50], 'svc__gamma': [0.0001, 0.0005, 0.001, 0.005]} 439 | grid = GridSearchCV(model, param_grid) 440 | 441 | #run the search and time it 442 | %time grid.fit(Xtrain, ytrain) 443 | print(grid.best_params_) 444 | 445 | #predict 446 | model = grid.best_estimator_ 447 | yfit = model.predict(Xtest) 448 | 449 | fig, ax = plt.subplots(4, 6) 450 | for i, axi in enumerate(ax.flat): 451 | axi.imshow(Xtest[i].reshape(62, 47), cmap='bone') 452 | axi.set(xticks=[], yticks=[]) 453 | axi.set_ylabel(faces.target_names[yfit[i]].split()[-1], 454 | color='black' if yfit[i] == ytest[i] else 'red') 455 | 456 | fig.suptitle('Predicted Names; Incorrect Labels in Red', size=14); 457 | 458 | #classification report 459 | from sklearn.metrics import classification_report 460 | print(classification_report(ytest, yfit, target_names=faces.target_names)) 461 | 462 | #confusion matrix 463 | from sklearn.metrics import confusion_matrix 464 | mat = confusion_matrix(ytest, yfit) 465 | sns.heatmap(mat.T, square=True, annot=True, fmt='d', cbar=False, xticklabels=faces.target_names, yticklabels=faces.target_names) 466 | plt.xlabel('true label') 467 | plt.ylabel('predicted label'); 468 | 469 | 470 | #These methods are a powerful classification method for a number of reasons: 471 | # Their dependence on relatively few support vectors means that they are very compact models, and take up very little memory. 472 | # Once the model is trained, the prediction phase is very fast. 473 | # Because they are affected only by points near the margin, they work well with high-dimensional data—even data with more dimensions than samples, which is a challenging regime for other algorithms. 474 | # Their integration with kernel methods makes them very versatile, able to adapt to many types of data. 475 | 476 | #However, SVMs have several disadvantages as well: 477 | # The scaling with the number of samples N is 0 N 3 at worst, or 0 N 2 for efficient implementations. For large numbers of training samples, this computational cost can be prohibitive. 478 | # The results are strongly dependent on a suitable choice for the softening parameter C . This must be carefully chosen via cross-validation, which can be expensive as datasets grow in size. 479 | # The results do not have a direct probabilistic interpretation. This can be estimated via an internal cross-validation (see the probability parameter of SVC ), but this extra estimation is costly. 480 | 481 | 482 | 483 | 484 | -------------------------------------------------------------------------------- /in-classMaterial/day16/GP1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day16/GP1.pdf -------------------------------------------------------------------------------- /in-classMaterial/day17/GP2.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def kernel(X1, X2, l=1.0, sigma_f=1.0): 4 | ''' Isotropic squared exponential kernel. Computes a covariance matrix from points in X1 and X2. Args: X1: Array of m points (m x d). X2: Array of n points (n x d). Returns: Covariance matrix (m x n). ''' 5 | sqdist = np.sum(X1**2, 1).reshape(-1, 1) + np.sum(X2**2, 1) - 2 * np.dot(X1, X2.T) 6 | return sigma_f**2 * np.exp(-0.5 / l**2 * sqdist) 7 | 8 | import matplotlib.pyplot as plt 9 | 10 | from matplotlib import cm 11 | from mpl_toolkits.mplot3d import Axes3D 12 | 13 | def plot_gp(mu, cov, X, X_train=None, Y_train=None, samples=[]): 14 | X = X.ravel() 15 | mu = mu.ravel() 16 | uncertainty = 1.96 * np.sqrt(np.diag(cov)) 17 | 18 | plt.fill_between(X, mu + uncertainty, mu - uncertainty, alpha=0.1) 19 | plt.plot(X, mu, label='Mean') 20 | for i, sample in enumerate(samples): 21 | plt.plot(X, sample, lw=1, ls='--', label=f'Sample {i+1}') 22 | if X_train is not None: 23 | plt.plot(X_train, Y_train, 'rx') 24 | plt.legend() 25 | 26 | def plot_gp_2D(gx, gy, mu, X_train, Y_train, title, i): 27 | ax = plt.gcf().add_subplot(1, 2, i, projection='3d') 28 | ax.plot_surface(gx, gy, mu.reshape(gx.shape), cmap=cm.coolwarm, linewidth=0, alpha=0.2, antialiased=False) 29 | ax.scatter(X_train[:,0], X_train[:,1], Y_train, c=Y_train, cmap=cm.coolwarm) 30 | ax.set_title(title) 31 | 32 | # Finite number of points 33 | X = np.arange(-5, 5, 0.2).reshape(-1, 1) 34 | 35 | # Mean and covariance of the prior 36 | mu = np.zeros(X.shape) 37 | cov = kernel(X, X) 38 | 39 | # Draw three samples from the prior 40 | samples = np.random.multivariate_normal(mu.ravel(), cov, 3) 41 | 42 | # Plot GP mean, confidence interval and samples 43 | plot_gp(mu, cov, X, samples=samples) 44 | 45 | 46 | from numpy.linalg import inv 47 | 48 | def posterior_predictive(X_s, X_train, Y_train, l=1.0, sigma_f=1.0, sigma_y=1e-8): 49 | ''' Computes the suffifient statistics of the GP posterior predictive distribution from m training data X_train and Y_train and n new inputs X_s. Args: X_s: New input locations (n x d). X_train: Training locations (m x d). Y_train: Training targets (m x 1). l: Kernel length parameter. sigma_f: Kernel vertical variation parameter. sigma_y: Noise parameter. Returns: Posterior mean vector (n x d) and covariance matrix (n x n). ''' 50 | K = kernel(X_train, X_train, l, sigma_f) + sigma_y**2 * np.eye(len(X_train)) 51 | K_s = kernel(X_train, X_s, l, sigma_f) 52 | K_ss = kernel(X_s, X_s, l, sigma_f) + 1e-8 * np.eye(len(X_s)) 53 | K_inv = inv(K) 54 | 55 | # Equation (4) 56 | mu_s = K_s.T.dot(K_inv).dot(Y_train) 57 | 58 | # Equation (5) 59 | cov_s = K_ss - K_s.T.dot(K_inv).dot(K_s) 60 | 61 | return mu_s, cov_s 62 | 63 | # Noise free training data 64 | X_train = np.array([-4, -3, -2, -1, 1]).reshape(-1, 1) 65 | Y_train = np.sin(X_train) 66 | 67 | # Compute mean and covariance of the posterior predictive distribution 68 | mu_s, cov_s = posterior_predictive(X, X_train, Y_train) 69 | 70 | samples = np.random.multivariate_normal(mu_s.ravel(), cov_s, 3) 71 | plot_gp(mu_s, cov_s, X, X_train=X_train, Y_train=Y_train, samples=samples) 72 | 73 | noise = 0.4 74 | 75 | # Noisy training data 76 | X_train = np.arange(-3, 4, 1).reshape(-1, 1) 77 | Y_train = np.sin(X_train) + noise * np.random.randn(*X_train.shape) 78 | 79 | # Compute mean and covariance of the posterior predictive distribution 80 | mu_s, cov_s = posterior_predictive(X, X_train, Y_train, sigma_y=noise) 81 | 82 | samples = np.random.multivariate_normal(mu_s.ravel(), cov_s, 3) 83 | plot_gp(mu_s, cov_s, X, X_train=X_train, Y_train=Y_train, samples=samples) 84 | 85 | 86 | params = [ 87 | (0.3, 1.0, 0.2), 88 | (3.0, 1.0, 0.2), 89 | (1.0, 0.3, 0.2), 90 | (1.0, 3.0, 0.2), 91 | (1.0, 1.0, 0.05), 92 | (1.0, 1.0, 1.5), 93 | ] 94 | 95 | plt.figure(figsize=(12, 5)) 96 | 97 | for i, (l, sigma_f, sigma_y) in enumerate(params): 98 | mu_s, cov_s = posterior_predictive(X, X_train, Y_train, l=l, 99 | sigma_f=sigma_f, 100 | sigma_y=sigma_y) 101 | plt.subplot(3, 2, i + 1) 102 | plt.title(f'l = {l}, sigma_f = {sigma_f}, sigma_y = {sigma_y}') 103 | plot_gp(mu_s, cov_s, X, X_train=X_train, Y_train=Y_train) 104 | 105 | from numpy.linalg import cholesky 106 | from scipy.optimize import minimize 107 | 108 | def nll_fn(X_train, Y_train, noise): 109 | ''' Returns a function that computes the negative log-likelihood for training data X_train and Y_train and given noise level. Args: X_train: training locations (m x d). Y_train: training targets (m x 1). noise: known noise level of Y_train. Returns: Minimization objective. ''' 110 | def step(theta): 111 | K = kernel(X_train, X_train, l=theta[0], sigma_f=theta[1]) + \ 112 | noise**2 * np.eye(len(X_train)) 113 | # Compute determinant via Cholesky decomposition 114 | return np.sum(np.log(np.diagonal(cholesky(K)))) + \ 115 | 0.5 * Y_train.T.dot(inv(K).dot(Y_train)) + \ 116 | 0.5 * len(X_train) * np.log(2*np.pi) 117 | return step 118 | 119 | # Minimize the negative log-likelihood w.r.t. parameters l and sigma_f. 120 | # We should actually run the minimization several times with different 121 | # initializations to avoid local minima but this is skipped here for 122 | # simplicity. 123 | res = minimize(nll_fn(X_train, Y_train, noise), [1, 1], 124 | bounds=((1e-5, None), (1e-5, None)), 125 | method='L-BFGS-B') 126 | 127 | # Store the optimization results in global variables so that we can 128 | # compare it later with the results from other implementations. 129 | l_opt, sigma_f_opt = res.x 130 | l_opt, sigma_f_opt 131 | 132 | # Compute the prosterior predictive statistics with optimized kernel parameters and plot the results 133 | mu_s, cov_s = posterior_predictive(X, X_train, Y_train, l=l_opt, sigma_f=sigma_f_opt, sigma_y=noise) 134 | plot_gp(mu_s, cov_s, X, X_train=X_train, Y_train=Y_train) 135 | 136 | noise_2D = 0.1 137 | 138 | rx, ry = np.arange(-5, 5, 0.3), np.arange(-5, 5, 0.3) 139 | gx, gy = np.meshgrid(rx, rx) 140 | 141 | X_2D = np.c_[gx.ravel(), gy.ravel()] 142 | 143 | X_2D_train = np.random.uniform(-4, 4, (100, 2)) 144 | Y_2D_train = np.sin(0.5 * np.linalg.norm(X_2D_train, axis=1)) + \ 145 | noise_2D * np.random.randn(len(X_2D_train)) 146 | 147 | plt.figure(figsize=(14,7)) 148 | 149 | mu_s, _ = posterior_predictive(X_2D, X_2D_train, Y_2D_train, sigma_y=noise_2D) 150 | plot_gp_2D(gx, gy, mu_s, X_2D_train, Y_2D_train, 151 | f'Before parameter optimization: l={1.00} sigma_f={1.00}', 1) 152 | 153 | res = minimize(nll_fn(X_2D_train, Y_2D_train, noise_2D), [1, 1], 154 | bounds=((1e-5, None), (1e-5, None)), 155 | method='L-BFGS-B') 156 | 157 | mu_s, _ = posterior_predictive(X_2D, X_2D_train, Y_2D_train, *res.x, sigma_y=noise_2D) 158 | plot_gp_2D(gx, gy, mu_s, X_2D_train, Y_2D_train, 159 | f'After parameter optimization: l={res.x[0]:.2f} sigma_f={res.x[1]:.2f}', 2) 160 | 161 | from sklearn.gaussian_process import GaussianProcessRegressor 162 | from sklearn.gaussian_process.kernels import ConstantKernel, RBF 163 | 164 | rbf = ConstantKernel(1.0) * RBF(length_scale=1.0) 165 | gpr = GaussianProcessRegressor(kernel=rbf, alpha=noise**2) 166 | 167 | # Reuse training data from previous 1D example 168 | gpr.fit(X_train, Y_train) 169 | 170 | # Compute posterior predictive mean and covariance 171 | mu_s, cov_s = gpr.predict(X, return_cov=True) 172 | 173 | # Obtain optimized kernel parameters 174 | l = gpr.kernel_.k2.get_params()['length_scale'] 175 | sigma_f = np.sqrt(gpr.kernel_.k1.get_params()['constant_value']) 176 | 177 | # Compare with previous results 178 | assert(np.isclose(l_opt, l)) 179 | assert(np.isclose(np.round(sigma_f_opt,4), np.round(sigma_f,4))) 180 | 181 | # Plot the results 182 | plot_gp(mu_s, cov_s, X, X_train=X_train, Y_train=Y_train) 183 | 184 | import GPy 185 | 186 | rbf = GPy.kern.RBF(input_dim=1, variance=1.0, lengthscale=1.0) 187 | gpr = GPy.models.GPRegression(X_train, Y_train, rbf) 188 | 189 | # Fix the noise variance to known value 190 | gpr.Gaussian_noise.variance = noise**2 191 | gpr.Gaussian_noise.variance.fix() 192 | 193 | # Run optimization 194 | gpr.optimize(); 195 | 196 | # Obtain optimized kernel parameters 197 | l = gpr.rbf.lengthscale.values[0] 198 | sigma_f = np.sqrt(gpr.rbf.variance.values[0]) 199 | 200 | # Compare with previous results 201 | assert(np.isclose(l_opt, l)) 202 | assert(np.isclose(np.round(sigma_f_opt,4), np.round(sigma_f,4))) 203 | 204 | # Plot the results with the built-in plot function 205 | gpr.plot(); 206 | 207 | from sklearn.gaussian_process import GaussianProcess 208 | from matplotlib import pyplot as pl 209 | 210 | np.random.seed(1) 211 | 212 | 213 | def f(x): 214 | """The function to predict.""" 215 | return x * np.sin(x) 216 | 217 | #---------------------------------------------------------------------- 218 | # First the noiseless case 219 | X = np.atleast_2d([1., 3., 5., 6., 7., 8.]).T 220 | 221 | # Observations 222 | y = f(X).ravel() 223 | 224 | # Mesh the input space for evaluations of the real function, the prediction and 225 | # its MSE 226 | x = np.atleast_2d(np.linspace(0, 10, 1000)).T 227 | 228 | # Instanciate a Gaussian Process model 229 | gp = GaussianProcess(corr='cubic', theta0=1e-2, thetaL=1e-4, thetaU=1e-1, 230 | random_start=100) 231 | 232 | # Fit to data using Maximum Likelihood Estimation of the parameters 233 | gp.fit(X, y) 234 | 235 | # Make the prediction on the meshed x-axis (ask for MSE as well) 236 | y_pred, MSE = gp.predict(x, eval_MSE=True) 237 | sigma = np.sqrt(MSE) 238 | 239 | # Plot the function, the prediction and the 95% confidence interval based on 240 | # the MSE 241 | fig = pl.figure() 242 | pl.plot(x, f(x), 'r:', label=u'$f(x) = x\,\sin(x)$') 243 | pl.plot(X, y, 'r.', markersize=10, label=u'Observations') 244 | pl.plot(x, y_pred, 'b-', label=u'Prediction') 245 | pl.fill(np.concatenate([x, x[::-1]]), 246 | np.concatenate([y_pred - 1.9600 * sigma, 247 | (y_pred + 1.9600 * sigma)[::-1]]), 248 | alpha=.5, fc='b', ec='None', label='95% confidence interval') 249 | pl.xlabel('$x$') 250 | pl.ylabel('$f(x)$') 251 | pl.ylim(-10, 20) 252 | pl.legend(loc='upper left') 253 | 254 | #---------------------------------------------------------------------- 255 | # now the noisy case 256 | X = np.linspace(0.1, 9.9, 20) 257 | X = np.atleast_2d(X).T 258 | 259 | # Observations and noise 260 | y = f(X).ravel() 261 | dy = 0.5 + 1.0 * np.random.random(y.shape) 262 | noise = np.random.normal(0, dy) 263 | y += noise 264 | 265 | # Mesh the input space for evaluations of the real function, the prediction and 266 | # its MSE 267 | x = np.atleast_2d(np.linspace(0, 10, 1000)).T 268 | 269 | # Instanciate a Gaussian Process model 270 | gp = GaussianProcess(corr='squared_exponential', theta0=1e-1, 271 | thetaL=1e-3, thetaU=1, 272 | nugget=(dy / y) ** 2, 273 | random_start=100) 274 | 275 | # Fit to data using Maximum Likelihood Estimation of the parameters 276 | gp.fit(X, y) 277 | 278 | # Make the prediction on the meshed x-axis (ask for MSE as well) 279 | y_pred, MSE = gp.predict(x, eval_MSE=True) 280 | sigma = np.sqrt(MSE) 281 | 282 | # Plot the function, the prediction and the 95% confidence interval based on 283 | # the MSE 284 | fig = pl.figure() 285 | pl.plot(x, f(x), 'r:', label=u'$f(x) = x\,\sin(x)$') 286 | pl.errorbar(X.ravel(), y, dy, fmt='r.', markersize=10, label=u'Observations') 287 | pl.plot(x, y_pred, 'b-', label=u'Prediction') 288 | pl.fill(np.concatenate([x, x[::-1]]), 289 | np.concatenate([y_pred - 1.9600 * sigma, 290 | (y_pred + 1.9600 * sigma)[::-1]]), 291 | alpha=.5, fc='b', ec='None', label='95% confidence interval') 292 | pl.xlabel('$x$') 293 | pl.ylabel('$f(x)$') 294 | pl.ylim(-10, 20) 295 | pl.legend(loc='upper left') 296 | 297 | pl.show() 298 | 299 | 300 | import pandas as pd 301 | import os 302 | os.chdir('KocPython2020/in-classMaterial/day17') 303 | tt = pd.read_csv('immSurvey.csv') 304 | tt.head() 305 | 306 | alphas = tt.stanMeansNewSysPooled 307 | sample = tt.textToSend 308 | 309 | from sklearn.feature_extraction.text import CountVectorizer 310 | vec = CountVectorizer() 311 | X = vec.fit_transform(sample) 312 | X 313 | 314 | pd.DataFrame(X.toarray(), columns=vec.get_feature_names()) 315 | 316 | #down-weighting frequent words; term frequency–inverse document frequency (TF–IDF), which weights the word counts by a measure of how often they appear in the documents 317 | from sklearn.feature_extraction.text import TfidfVectorizer 318 | vec = TfidfVectorizer() 319 | X = vec.fit_transform(sample) 320 | pd.DataFrame(X.toarray(), columns=vec.get_feature_names()) 321 | 322 | from sklearn.cross_validation import train_test_split 323 | Xtrain, Xtest, ytrain, ytest = train_test_split(X, alphas, 324 | random_state=1) 325 | 326 | rbf = ConstantKernel(1.0) * RBF(length_scale=1.0) 327 | gpr = GaussianProcessRegressor(kernel=rbf, alpha=1e-8) 328 | 329 | gpr.fit(Xtrain.toarray(), ytrain) 330 | 331 | # Compute posterior predictive mean and covariance 332 | mu_s, cov_s = gpr.predict(Xtest.toarray(), return_cov=True) 333 | 334 | #test correlation between test and mus 335 | np.corrcoef(ytest, mu_s) 336 | 337 | #how might we improve this? 338 | 339 | -------------------------------------------------------------------------------- /in-classMaterial/day17/gp-fit.stan: -------------------------------------------------------------------------------- 1 | data { 2 | int N; 3 | int K; 4 | int M; 5 | matrix[N,K] X; 6 | matrix[N,M] X_corr; 7 | vector[N] y; 8 | } 9 | parameters { 10 | real nug; 11 | real sig_sq; 12 | vector[M] d1; 13 | vector[M] d2; 14 | vector[K] b; 15 | } 16 | model { 17 | matrix[N,N] Sigma; 18 | vector[N] mu; 19 | matrix[N,K] Mu; 20 | vector[M] d; 21 | 22 | for(m in 1:M){ 23 | d1[m] ~ gamma(1,20); 24 | d2[m] ~ gamma(10,10); 25 | d[m] = .5*(d1[m] + d2[m]); 26 | } 27 | for (i in 1:(N-1)) { 28 | for (j in (i+1):N) { 29 | vector[M] summand; 30 | for(m in 1:M){ 31 | summand[m] = -pow(X_corr[i,m] - X_corr[j,m],2)/d[m]; 32 | } 33 | Sigma[i,j] = exp(sum(summand)); 34 | Sigma[j,i] = Sigma[i,j]; 35 | } 36 | } 37 | for (i in 1:N){ 38 | for(k in 1:K){ 39 | Mu[i,k] = X[i,k]*b[k]; 40 | } 41 | mu[i]=sum(Mu[i,1:K]); 42 | } 43 | for (i in 1:N) Sigma[i,i] = 1 + nug; // + jitter 44 | 45 | sig_sq ~ inv_gamma(1,1); 46 | 47 | nug ~ exponential(1); 48 | 49 | b ~ normal(0,3); 50 | y ~ multi_normal(mu,sig_sq*Sigma); 51 | } 52 | 53 | -------------------------------------------------------------------------------- /in-classMaterial/day17/gp-pred.stan: -------------------------------------------------------------------------------- 1 | data { 2 | int N; 3 | int zN; 4 | int K; 5 | int M; 6 | matrix[N+zN,K] XZ; 7 | matrix[N+zN,M] XZ_corr; 8 | vector[N] y; 9 | } 10 | parameters { 11 | real nug; 12 | real sig_sq; 13 | vector[M] d1; 14 | vector[M] d2; 15 | vector[K] b; 16 | vector[zN] z; 17 | } 18 | model { 19 | matrix[N+zN,N+zN] Sigma; 20 | vector[N+zN] mu; 21 | matrix[N+zN,K] Mu; 22 | vector[M] d; 23 | 24 | vector[N+zN] yz; 25 | 26 | for(m in 1:M){ 27 | d1[m] ~ gamma(1,20); 28 | d2[m] ~ gamma(10,10); 29 | d[m] = .5*(d1[m] + d2[m]); 30 | } 31 | for (i in 1:(N+zN-1)) { 32 | for (j in (i+1):(N+zN)) { 33 | vector[M] summand; 34 | for(m in 1:M){ 35 | summand[m] = -pow(XZ_corr[i,m] - XZ_corr[j,m],2)/d[m]; 36 | } 37 | Sigma[i,j] = exp(sum(summand)); 38 | Sigma[j,i] = Sigma[i,j]; 39 | } 40 | } 41 | for (i in 1:(N+zN)){ 42 | for(k in 1:K){ 43 | Mu[i,k] = XZ[i,k]*b[k]; 44 | } 45 | mu[i]=sum(Mu[i,1:K]); 46 | } 47 | for (i in 1:(N+zN)) 48 | Sigma[i,i] = 1 + nug; // + jitter 49 | 50 | sig_sq ~ inv_gamma(1,1); 51 | nug ~ exponential(1); 52 | 53 | b ~ normal(0,3); 54 | 55 | for(n in 1:N) yz[n] = y[n]; 56 | for(n in 1:zN) yz[N+n] = z[n]; 57 | 58 | yz ~ multi_normal(mu,sig_sq*Sigma); 59 | 60 | } 61 | 62 | -------------------------------------------------------------------------------- /in-classMaterial/day18/NN1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day18/NN1.pdf -------------------------------------------------------------------------------- /in-classMaterial/day18/NN2.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | import numpy as np 4 | 5 | class Network(object): 6 | 7 | def __init__(self, sizes): 8 | """The list ``sizes`` contains the number of neurons in the 9 | respective layers of the network. For example, if the list 10 | was [2, 3, 1] then it would be a three-layer network, with the 11 | first layer containing 2 neurons, the second layer 3 neurons, 12 | and the third layer 1 neuron. The biases and weights for the 13 | network are initialized randomly, using a Gaussian 14 | distribution with mean 0, and variance 1. Note that the first 15 | layer is assumed to be an input layer, and by convention we 16 | won't set any biases for those neurons, since biases are only 17 | ever used in computing the outputs from later layers.""" 18 | self.num_layers = len(sizes) 19 | self.sizes = sizes 20 | self.biases = [np.random.randn(y, 1) for y in sizes[1:]] 21 | self.weights = [np.random.randn(y, x) 22 | for x, y in zip(sizes[:-1], sizes[1:])] 23 | 24 | def feedforward(self, a): 25 | """Return the output of the network if ``a`` is input.""" 26 | for b, w in zip(self.biases, self.weights): 27 | a = sigmoid(np.dot(w, a)+b) 28 | return a 29 | 30 | def SGD(self, training_data, epochs, mini_batch_size, eta, 31 | test_data=None): 32 | """Train the neural network using mini-batch stochastic 33 | gradient descent. The ``training_data`` is a list of tuples 34 | ``(x, y)`` representing the training inputs and the desired 35 | outputs. The other non-optional parameters are 36 | self-explanatory. If ``test_data`` is provided then the 37 | network will be evaluated against the test data after each 38 | epoch, and partial progress printed out. This is useful for 39 | tracking progress, but slows things down substantially.""" 40 | if test_data: n_test = len(test_data) 41 | n = len(training_data) 42 | for j in range(epochs): 43 | random.shuffle(training_data) 44 | mini_batches = [ 45 | training_data[k:k+mini_batch_size] 46 | for k in range(0, n, mini_batch_size)] 47 | for mini_batch in mini_batches: 48 | self.update_mini_batch(mini_batch, eta) 49 | if test_data: 50 | print("Epoch {0}: {1} / {2}".format( 51 | j, self.evaluate(test_data), n_test)) 52 | else: 53 | print("Epoch {0} complete".format(j)) 54 | 55 | def update_mini_batch(self, mini_batch, eta): 56 | """Update the network's weights and biases by applying 57 | gradient descent using backpropagation to a single mini batch. 58 | The ``mini_batch`` is a list of tuples ``(x, y)``, and ``eta`` 59 | is the learning rate.""" 60 | nabla_b = [np.zeros(b.shape) for b in self.biases] 61 | nabla_w = [np.zeros(w.shape) for w in self.weights] 62 | for x, y in mini_batch: 63 | delta_nabla_b, delta_nabla_w = self.backprop(x, y) 64 | nabla_b = [nb+dnb for nb, dnb in zip(nabla_b, delta_nabla_b)] 65 | nabla_w = [nw+dnw for nw, dnw in zip(nabla_w, delta_nabla_w)] 66 | self.weights = [w-(eta/len(mini_batch))*nw 67 | for w, nw in zip(self.weights, nabla_w)] 68 | self.biases = [b-(eta/len(mini_batch))*nb 69 | for b, nb in zip(self.biases, nabla_b)] 70 | 71 | def backprop(self, x, y): 72 | """Return a tuple ``(nabla_b, nabla_w)`` representing the 73 | gradient for the cost function C_x. ``nabla_b`` and 74 | ``nabla_w`` are layer-by-layer lists of numpy arrays, similar 75 | to ``self.biases`` and ``self.weights``.""" 76 | nabla_b = [np.zeros(b.shape) for b in self.biases] 77 | nabla_w = [np.zeros(w.shape) for w in self.weights] 78 | # feedforward 79 | activation = x 80 | activations = [x] # list to store all the activations, layer by layer 81 | zs = [] # list to store all the z vectors, layer by layer 82 | for b, w in zip(self.biases, self.weights): 83 | z = np.dot(w, activation)+b 84 | zs.append(z) 85 | activation = sigmoid(z) 86 | activations.append(activation) 87 | # backward pass 88 | delta = self.cost_derivative(activations[-1], y) * \ 89 | sigmoid_prime(zs[-1]) 90 | nabla_b[-1] = delta 91 | nabla_w[-1] = np.dot(delta, activations[-2].transpose()) 92 | # Note that the variable l in the loop below is used a little 93 | # differently to the notation in Chapter 2 of the book. Here, 94 | # l = 1 means the last layer of neurons, l = 2 is the 95 | # second-last layer, and so on. It's a renumbering of the 96 | # scheme in the book, used here to take advantage of the fact 97 | # that Python can use negative indices in lists. 98 | for l in range(2, self.num_layers): 99 | z = zs[-l] 100 | sp = sigmoid_prime(z) 101 | delta = np.dot(self.weights[-l+1].transpose(), delta) * sp 102 | nabla_b[-l] = delta 103 | nabla_w[-l] = np.dot(delta, activations[-l-1].transpose()) 104 | return (nabla_b, nabla_w) 105 | 106 | def evaluate(self, test_data): 107 | """Return the number of test inputs for which the neural 108 | network outputs the correct result. Note that the neural 109 | network's output is assumed to be the index of whichever 110 | neuron in the final layer has the highest activation.""" 111 | test_results = [(np.argmax(self.feedforward(x)), y) 112 | for (x, y) in test_data] 113 | return sum(int(x == y) for (x, y) in test_results) 114 | 115 | def cost_derivative(self, output_activations, y): 116 | """Return the vector of partial derivatives \partial C_x / 117 | \partial a for the output activations.""" 118 | return (output_activations-y) 119 | 120 | #### Miscellaneous functions 121 | def sigmoid(z): 122 | """The sigmoid function.""" 123 | return 1.0/(1.0+np.exp(-z)) 124 | 125 | def sigmoid_prime(z): 126 | """Derivative of the sigmoid function.""" 127 | return sigmoid(z)*(1-sigmoid(z)) 128 | 129 | """ 130 | mnist_loader 131 | ~~~~~~~~~~~~ 132 | 133 | A library to load the MNIST image data. For details of the data 134 | structures that are returned, see the doc strings for ``load_data`` 135 | and ``load_data_wrapper``. In practice, ``load_data_wrapper`` is the 136 | function usually called by our neural network code. 137 | """ 138 | 139 | import pickle as cPickle 140 | import gzip 141 | 142 | 143 | def load_data(): 144 | """Return the MNIST data as a tuple containing the training data, 145 | the validation data, and the test data. 146 | 147 | The ``training_data`` is returned as a tuple with two entries. 148 | The first entry contains the actual training images. This is a 149 | numpy ndarray with 50,000 entries. Each entry is, in turn, a 150 | numpy ndarray with 784 values, representing the 28 * 28 = 784 151 | pixels in a single MNIST image. 152 | 153 | The second entry in the ``training_data`` tuple is a numpy ndarray 154 | containing 50,000 entries. Those entries are just the digit 155 | values (0...9) for the corresponding images contained in the first 156 | entry of the tuple. 157 | 158 | The ``validation_data`` and ``test_data`` are similar, except 159 | each contains only 10,000 images. 160 | 161 | This is a nice data format, but for use in neural networks it's 162 | helpful to modify the format of the ``training_data`` a little. 163 | That's done in the wrapper function ``load_data_wrapper()``, see 164 | below. 165 | """ 166 | with gzip.open('mnist.pkl.gz','rb') as ff : 167 | u = cPickle._Unpickler( ff ) 168 | u.encoding = 'latin1' 169 | train, val, test = u.load() 170 | return (train, val, test) 171 | 172 | def load_data_wrapper(): 173 | """Return a tuple containing ``(training_data, validation_data, 174 | test_data)``. Based on ``load_data``, but the format is more 175 | convenient for use in our implementation of neural networks. 176 | 177 | In particular, ``training_data`` is a list containing 50,000 178 | 2-tuples ``(x, y)``. ``x`` is a 784-dimensional numpy.ndarray 179 | containing the input image. ``y`` is a 10-dimensional 180 | numpy.ndarray representing the unit vector corresponding to the 181 | correct digit for ``x``. 182 | 183 | ``validation_data`` and ``test_data`` are lists containing 10,000 184 | 2-tuples ``(x, y)``. In each case, ``x`` is a 784-dimensional 185 | numpy.ndarry containing the input image, and ``y`` is the 186 | corresponding classification, i.e., the digit values (integers) 187 | corresponding to ``x``. 188 | 189 | Obviously, this means we're using slightly different formats for 190 | the training data and the validation / test data. These formats 191 | turn out to be the most convenient for use in our neural network 192 | code.""" 193 | tr_d, va_d, te_d = load_data() 194 | training_inputs = [np.reshape(x, (784, 1)) for x in tr_d[0]] 195 | training_results = [vectorized_result(y) for y in tr_d[1]] 196 | training_data = zip(training_inputs, training_results) 197 | validation_inputs = [np.reshape(x, (784, 1)) for x in va_d[0]] 198 | validation_data = zip(validation_inputs, va_d[1]) 199 | test_inputs = [np.reshape(x, (784, 1)) for x in te_d[0]] 200 | test_data = zip(test_inputs, te_d[1]) 201 | return (training_data, validation_data, test_data) 202 | 203 | def vectorized_result(j): 204 | """Return a 10-dimensional unit vector with a 1.0 in the jth 205 | position and zeroes elsewhere. This is used to convert a digit 206 | (0...9) into a corresponding desired output from the neural 207 | network.""" 208 | e = np.zeros((10, 1)) 209 | e[j] = 1.0 210 | return e 211 | 212 | import os 213 | os.chdir('KocPython2020/in-classMaterial/day18') 214 | 215 | training_data, validation_data, test_data = load_data_wrapper() 216 | 217 | training_data, validation_data, test_data = list(training_data), list(validation_data), list(test_data) 218 | 219 | net = Network([784, 30, 10]) 220 | 221 | net.SGD(training_data, 30, 10, 3.0, test_data=test_data) 222 | 223 | #TODO: change the number of layers, the number of neurons per layer, the SGD parameters, etc. 224 | 225 | 226 | import pandas as pd 227 | 228 | # Location of dataset 229 | url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data" 230 | 231 | # Assign column names to the dataset 232 | names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'Class'] 233 | 234 | # Read dataset to pandas dataframe 235 | irisdata = pd.read_csv(url, names=names) 236 | 237 | irisdata.head() 238 | 239 | # Assign data from first four columns to X variable 240 | X = irisdata.iloc[:, 0:4] 241 | 242 | # Assign data from first fifth columns to y variable 243 | y = irisdata.select_dtypes(include=[object]) 244 | 245 | y.head() 246 | 247 | y.Class.unique() 248 | 249 | from sklearn import preprocessing 250 | le = preprocessing.LabelEncoder() 251 | 252 | y = y.apply(le.fit_transform) 253 | 254 | from sklearn.model_selection import train_test_split 255 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20) 256 | 257 | 258 | from sklearn.preprocessing import StandardScaler 259 | scaler = StandardScaler() 260 | scaler.fit(X_train) 261 | 262 | X_train = scaler.transform(X_train) 263 | X_test = scaler.transform(X_test) 264 | 265 | from sklearn.neural_network import MLPClassifier 266 | mlp = MLPClassifier(hidden_layer_sizes=(10, 10, 10), max_iter=1000) 267 | mlp.fit(X_train, y_train.values.ravel()) 268 | 269 | predictions = mlp.predict(X_test) 270 | 271 | from sklearn.metrics import classification_report, confusion_matrix 272 | print(confusion_matrix(y_test,predictions)) 273 | print(classification_report(y_test,predictions)) 274 | 275 | #TODO: try to change the parameters to get a better score 276 | 277 | import os 278 | os.chdir('KocPython2020/in-classMaterial/day18') 279 | 280 | import pandas as pd 281 | wine = pd.read_csv('wine_data.csv', names = ["Cultivator", "Alchol", "Malic_Acid", "Ash", "Alcalinity_of_Ash", "Magnesium", "Total_phenols", "Falvanoids", "Nonflavanoid_phenols", "Proanthocyanins", "Color_intensity", "Hue", "OD280", "Proline"]) 282 | 283 | wine.head() 284 | 285 | wine.describe().transpose() 286 | 287 | wine.shape 288 | 289 | X = wine.drop('Cultivator',axis=1) 290 | y = wine['Cultivator'] 291 | 292 | from sklearn.model_selection import train_test_split 293 | X_train, X_test, y_train, y_test = train_test_split(X, y) 294 | 295 | from sklearn.preprocessing import StandardScaler 296 | 297 | scaler = StandardScaler() 298 | 299 | # Fit only to the training data 300 | scaler.fit(X_train) 301 | 302 | StandardScaler(copy=True, with_mean=True, with_std=True) 303 | 304 | # Now apply the transformations to the data: 305 | X_train = scaler.transform(X_train) 306 | X_test = scaler.transform(X_test) 307 | 308 | from sklearn.neural_network import MLPClassifier 309 | mlp = MLPClassifier(hidden_layer_sizes=(13,13,13),max_iter=500) 310 | 311 | mlp.fit(X_train,y_train) 312 | 313 | predictions = mlp.predict(X_test) 314 | 315 | from sklearn.metrics import classification_report, confusion_matrix 316 | 317 | print(confusion_matrix(y_test,predictions)) 318 | 319 | print(classification_report(y_test,predictions)) 320 | 321 | #coefs_ is a list of weight matrices, where weight matrix at index i represents the weights between layer i and layer i+1. 322 | 323 | #intercepts_ is a list of bias vectors, where the vector at index i represents the bias values added to layer i+1. 324 | 325 | len(mlp.coefs_) 326 | 327 | len(mlp.coefs_[0]) 328 | 329 | len(mlp.intercepts_[0]) 330 | 331 | 332 | from sklearn.pipeline import make_pipeline 333 | 334 | from matplotlib import pyplot as plt 335 | 336 | %config InlineBackend.figure_format = 'retina' 337 | plt.style.use('bmh') 338 | 339 | def make_data(): 340 | N = 2000 341 | X = 0.5*np.random.normal(size=N)+0.35 342 | 343 | Xt = 0.75*X-0.35 344 | X = X.reshape((N,1)) 345 | 346 | Y = -(8 * Xt**2 + 0.1*Xt + 0.1) + 0.05 * np.random.normal(size=N) 347 | Y = np.exp(Y) + 0.05 * np.random.normal(size=N) 348 | Y /= max(np.abs(Y)) 349 | return X, Y 350 | 351 | np.random.seed(0) 352 | X, Y = make_data() 353 | 354 | from sklearn.metrics import mean_squared_error, r2_score 355 | 356 | Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, Y, test_size=0.5, random_state=0) 357 | 358 | plt.plot(Xtest[:,0], Ytest, '.'); 359 | 360 | from sklearn.linear_model import Ridge 361 | 362 | ridge = Ridge() 363 | ridge.fit(Xtrain, Ytrain) 364 | 365 | Yguess = ridge.predict(Xtest) 366 | 367 | plt.plot(Xtest[:,0], Ytest, '.') 368 | plt.plot(Xtest[:,0], Yguess, 'r.') 369 | 370 | mean_squared_error(Ytest, Yguess), r2_score(Ytest, Yguess) 371 | 372 | from sklearn.neural_network import MLPRegressor 373 | 374 | mlp = MLPRegressor(random_state=0, activation='relu', hidden_layer_sizes=16) 375 | 376 | mlp.fit(Xtrain, Ytrain) 377 | 378 | Yguess = mlp.predict(Xtest) 379 | 380 | plt.plot(Xtest[:,0], Ytest, '.') 381 | plt.plot(Xtest[:,0], Yguess, 'r.') 382 | 383 | mean_squared_error(Ytest, Yguess), r2_score(Ytest, Yguess) 384 | 385 | mlp = MLPRegressor(random_state=0, activation='tanh', hidden_layer_sizes=(16,8)) 386 | 387 | mlp.fit(Xtrain, Ytrain) 388 | 389 | Yguess = mlp.predict(Xtest) 390 | 391 | plt.plot(Xtest[:,0], Ytest, '.') 392 | plt.plot(Xtest[:,0], Yguess, 'r.') 393 | 394 | mean_squared_error(Ytest, Yguess), r2_score(Ytest, Yguess) 395 | 396 | 397 | 398 | from sklearn.ensemble import RandomForestRegressor 399 | from sklearn.metrics import mean_squared_error 400 | plt.style.use('seaborn-poster') 401 | 402 | np.random.seed(0) 403 | x = 10 * np.random.rand(100) 404 | 405 | def model(x, sigma=0.3): 406 | fast_oscillation = np.sin(5 * x) 407 | slow_oscillation = np.sin(0.5 * x) 408 | noise = sigma * np.random.randn(len(x)) 409 | 410 | return slow_oscillation + fast_oscillation + noise 411 | 412 | plt.figure(figsize = (12,10)) 413 | y = model(x) 414 | plt.errorbar(x, y, 0.3, fmt='o') 415 | 416 | xfit = np.linspace(0, 10, 1000) 417 | 418 | # fit the model and get the estimation for each data points 419 | yfit = RandomForestRegressor(100, random_state=42).fit(x[:, None], y).predict(xfit[:, None]) 420 | ytrue = model(xfit, 0) 421 | 422 | plt.figure(figsize = (12,10)) 423 | plt.errorbar(x, y, 0.3, fmt='o') 424 | plt.plot(xfit, yfit, '-r', label = 'predicted', zorder = 10) 425 | plt.plot(xfit, ytrue, '-k', alpha=0.5, label = 'true model', zorder = 10) 426 | plt.legend() 427 | 428 | mse = mean_squared_error(ytrue, yfit) 429 | print(mse) 430 | 431 | mlp = MLPRegressor(hidden_layer_sizes=(200,200,200), max_iter = 2000, solver='lbfgs', \ 432 | alpha=0.01, activation = 'tanh', random_state = 8) 433 | 434 | yfit = mlp.fit(x[:, None], y).predict(xfit[:, None]) 435 | 436 | plt.figure(figsize = (12,10)) 437 | plt.errorbar(x, y, 0.3, fmt='o') 438 | plt.plot(xfit, yfit, '-r', label = 'predicted', zorder = 10) 439 | plt.plot(xfit, ytrue, '-k', alpha=0.5, label = 'true model', zorder = 10) 440 | plt.legend() 441 | 442 | mse = mean_squared_error(ytrue, yfit) 443 | print(mse) 444 | 445 | 446 | 447 | from sklearn.svm import SVR 448 | 449 | # define your model 450 | svr = SVR(C=1000) 451 | 452 | # get the estimation from the model 453 | yfit = svr.fit(x[:, None], y).predict(xfit[:, None]) 454 | 455 | # plot the results as above 456 | plt.figure(figsize = (12,10)) 457 | plt.errorbar(x, y, 0.3, fmt='o') 458 | plt.plot(xfit, yfit, '-r', label = 'predicted', zorder = 10) 459 | plt.plot(xfit, ytrue, '-k', alpha=0.5, label = 'true model', zorder = 10) 460 | plt.legend() 461 | 462 | mse = mean_squared_error(ytrue, yfit) 463 | print(mse) 464 | 465 | 466 | tt = pd.read_csv('../day17/immSurvey.csv') 467 | tt.head() 468 | 469 | alphas = tt.stanMeansNewSysPooled 470 | sample = tt.textToSend 471 | 472 | from sklearn.feature_extraction.text import CountVectorizer 473 | vec = CountVectorizer() 474 | X = vec.fit_transform(sample) 475 | X 476 | 477 | pd.DataFrame(X.toarray(), columns=vec.get_feature_names()) 478 | 479 | #down-weighting frequent words; term frequency–inverse document frequency (TF–IDF), which weights the word counts by a measure of how often they appear in the documents 480 | from sklearn.feature_extraction.text import TfidfVectorizer 481 | vec = TfidfVectorizer() 482 | X = vec.fit_transform(sample) 483 | pd.DataFrame(X.toarray(), columns=vec.get_feature_names()) 484 | 485 | Xtrain, Xtest, ytrain, ytest = train_test_split(X, alphas, 486 | random_state=1) 487 | 488 | mlp = MLPRegressor(random_state=0, activation='tanh', hidden_layer_sizes=(100,50)) 489 | 490 | mlp.fit(Xtrain, ytrain) 491 | 492 | yguess = mlp.predict(Xtest) 493 | 494 | np.corrcoef(ytest, yguess) 495 | 496 | mlp = MLPRegressor(random_state=0, activation='tanh', hidden_layer_sizes=(1000,500)) 497 | 498 | mlp.fit(Xtrain, ytrain) 499 | 500 | yguess = mlp.predict(Xtest) 501 | 502 | np.corrcoef(ytest, yguess) 503 | 504 | mlp = MLPRegressor(random_state=0, activation='tanh', hidden_layer_sizes=(50,25)) 505 | 506 | mlp.fit(Xtrain, ytrain) 507 | 508 | yguess = mlp.predict(Xtest) 509 | 510 | np.corrcoef(ytest, yguess) 511 | 512 | mlp = MLPRegressor(random_state=0, activation='tanh', hidden_layer_sizes=(20,10)) 513 | 514 | mlp.fit(Xtrain, ytrain) 515 | 516 | yguess = mlp.predict(Xtest) 517 | 518 | np.corrcoef(ytest, yguess) 519 | 520 | mlp = MLPRegressor(random_state=0, activation='tanh', hidden_layer_sizes=(10,5)) 521 | 522 | mlp.fit(Xtrain, ytrain) 523 | 524 | yguess = mlp.predict(Xtest) 525 | 526 | np.corrcoef(ytest, yguess) 527 | 528 | #TODO: play around with number of layers, layer sizes, different activations, etc. see if you can do better 529 | -------------------------------------------------------------------------------- /in-classMaterial/day18/mnist.pkl.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day18/mnist.pkl.gz -------------------------------------------------------------------------------- /in-classMaterial/day18/wine_data.csv: -------------------------------------------------------------------------------- 1 | 1,14.23,1.71,2.43,15.6,127,2.8,3.06,.28,2.29,5.64,1.04,3.92,1065 2 | 1,13.2,1.78,2.14,11.2,100,2.65,2.76,.26,1.28,4.38,1.05,3.4,1050 3 | 1,13.16,2.36,2.67,18.6,101,2.8,3.24,.3,2.81,5.68,1.03,3.17,1185 4 | 1,14.37,1.95,2.5,16.8,113,3.85,3.49,.24,2.18,7.8,.86,3.45,1480 5 | 1,13.24,2.59,2.87,21,118,2.8,2.69,.39,1.82,4.32,1.04,2.93,735 6 | 1,14.2,1.76,2.45,15.2,112,3.27,3.39,.34,1.97,6.75,1.05,2.85,1450 7 | 1,14.39,1.87,2.45,14.6,96,2.5,2.52,.3,1.98,5.25,1.02,3.58,1290 8 | 1,14.06,2.15,2.61,17.6,121,2.6,2.51,.31,1.25,5.05,1.06,3.58,1295 9 | 1,14.83,1.64,2.17,14,97,2.8,2.98,.29,1.98,5.2,1.08,2.85,1045 10 | 1,13.86,1.35,2.27,16,98,2.98,3.15,.22,1.85,7.22,1.01,3.55,1045 11 | 1,14.1,2.16,2.3,18,105,2.95,3.32,.22,2.38,5.75,1.25,3.17,1510 12 | 1,14.12,1.48,2.32,16.8,95,2.2,2.43,.26,1.57,5,1.17,2.82,1280 13 | 1,13.75,1.73,2.41,16,89,2.6,2.76,.29,1.81,5.6,1.15,2.9,1320 14 | 1,14.75,1.73,2.39,11.4,91,3.1,3.69,.43,2.81,5.4,1.25,2.73,1150 15 | 1,14.38,1.87,2.38,12,102,3.3,3.64,.29,2.96,7.5,1.2,3,1547 16 | 1,13.63,1.81,2.7,17.2,112,2.85,2.91,.3,1.46,7.3,1.28,2.88,1310 17 | 1,14.3,1.92,2.72,20,120,2.8,3.14,.33,1.97,6.2,1.07,2.65,1280 18 | 1,13.83,1.57,2.62,20,115,2.95,3.4,.4,1.72,6.6,1.13,2.57,1130 19 | 1,14.19,1.59,2.48,16.5,108,3.3,3.93,.32,1.86,8.7,1.23,2.82,1680 20 | 1,13.64,3.1,2.56,15.2,116,2.7,3.03,.17,1.66,5.1,.96,3.36,845 21 | 1,14.06,1.63,2.28,16,126,3,3.17,.24,2.1,5.65,1.09,3.71,780 22 | 1,12.93,3.8,2.65,18.6,102,2.41,2.41,.25,1.98,4.5,1.03,3.52,770 23 | 1,13.71,1.86,2.36,16.6,101,2.61,2.88,.27,1.69,3.8,1.11,4,1035 24 | 1,12.85,1.6,2.52,17.8,95,2.48,2.37,.26,1.46,3.93,1.09,3.63,1015 25 | 1,13.5,1.81,2.61,20,96,2.53,2.61,.28,1.66,3.52,1.12,3.82,845 26 | 1,13.05,2.05,3.22,25,124,2.63,2.68,.47,1.92,3.58,1.13,3.2,830 27 | 1,13.39,1.77,2.62,16.1,93,2.85,2.94,.34,1.45,4.8,.92,3.22,1195 28 | 1,13.3,1.72,2.14,17,94,2.4,2.19,.27,1.35,3.95,1.02,2.77,1285 29 | 1,13.87,1.9,2.8,19.4,107,2.95,2.97,.37,1.76,4.5,1.25,3.4,915 30 | 1,14.02,1.68,2.21,16,96,2.65,2.33,.26,1.98,4.7,1.04,3.59,1035 31 | 1,13.73,1.5,2.7,22.5,101,3,3.25,.29,2.38,5.7,1.19,2.71,1285 32 | 1,13.58,1.66,2.36,19.1,106,2.86,3.19,.22,1.95,6.9,1.09,2.88,1515 33 | 1,13.68,1.83,2.36,17.2,104,2.42,2.69,.42,1.97,3.84,1.23,2.87,990 34 | 1,13.76,1.53,2.7,19.5,132,2.95,2.74,.5,1.35,5.4,1.25,3,1235 35 | 1,13.51,1.8,2.65,19,110,2.35,2.53,.29,1.54,4.2,1.1,2.87,1095 36 | 1,13.48,1.81,2.41,20.5,100,2.7,2.98,.26,1.86,5.1,1.04,3.47,920 37 | 1,13.28,1.64,2.84,15.5,110,2.6,2.68,.34,1.36,4.6,1.09,2.78,880 38 | 1,13.05,1.65,2.55,18,98,2.45,2.43,.29,1.44,4.25,1.12,2.51,1105 39 | 1,13.07,1.5,2.1,15.5,98,2.4,2.64,.28,1.37,3.7,1.18,2.69,1020 40 | 1,14.22,3.99,2.51,13.2,128,3,3.04,.2,2.08,5.1,.89,3.53,760 41 | 1,13.56,1.71,2.31,16.2,117,3.15,3.29,.34,2.34,6.13,.95,3.38,795 42 | 1,13.41,3.84,2.12,18.8,90,2.45,2.68,.27,1.48,4.28,.91,3,1035 43 | 1,13.88,1.89,2.59,15,101,3.25,3.56,.17,1.7,5.43,.88,3.56,1095 44 | 1,13.24,3.98,2.29,17.5,103,2.64,2.63,.32,1.66,4.36,.82,3,680 45 | 1,13.05,1.77,2.1,17,107,3,3,.28,2.03,5.04,.88,3.35,885 46 | 1,14.21,4.04,2.44,18.9,111,2.85,2.65,.3,1.25,5.24,.87,3.33,1080 47 | 1,14.38,3.59,2.28,16,102,3.25,3.17,.27,2.19,4.9,1.04,3.44,1065 48 | 1,13.9,1.68,2.12,16,101,3.1,3.39,.21,2.14,6.1,.91,3.33,985 49 | 1,14.1,2.02,2.4,18.8,103,2.75,2.92,.32,2.38,6.2,1.07,2.75,1060 50 | 1,13.94,1.73,2.27,17.4,108,2.88,3.54,.32,2.08,8.90,1.12,3.1,1260 51 | 1,13.05,1.73,2.04,12.4,92,2.72,3.27,.17,2.91,7.2,1.12,2.91,1150 52 | 1,13.83,1.65,2.6,17.2,94,2.45,2.99,.22,2.29,5.6,1.24,3.37,1265 53 | 1,13.82,1.75,2.42,14,111,3.88,3.74,.32,1.87,7.05,1.01,3.26,1190 54 | 1,13.77,1.9,2.68,17.1,115,3,2.79,.39,1.68,6.3,1.13,2.93,1375 55 | 1,13.74,1.67,2.25,16.4,118,2.6,2.9,.21,1.62,5.85,.92,3.2,1060 56 | 1,13.56,1.73,2.46,20.5,116,2.96,2.78,.2,2.45,6.25,.98,3.03,1120 57 | 1,14.22,1.7,2.3,16.3,118,3.2,3,.26,2.03,6.38,.94,3.31,970 58 | 1,13.29,1.97,2.68,16.8,102,3,3.23,.31,1.66,6,1.07,2.84,1270 59 | 1,13.72,1.43,2.5,16.7,108,3.4,3.67,.19,2.04,6.8,.89,2.87,1285 60 | 2,12.37,.94,1.36,10.6,88,1.98,.57,.28,.42,1.95,1.05,1.82,520 61 | 2,12.33,1.1,2.28,16,101,2.05,1.09,.63,.41,3.27,1.25,1.67,680 62 | 2,12.64,1.36,2.02,16.8,100,2.02,1.41,.53,.62,5.75,.98,1.59,450 63 | 2,13.67,1.25,1.92,18,94,2.1,1.79,.32,.73,3.8,1.23,2.46,630 64 | 2,12.37,1.13,2.16,19,87,3.5,3.1,.19,1.87,4.45,1.22,2.87,420 65 | 2,12.17,1.45,2.53,19,104,1.89,1.75,.45,1.03,2.95,1.45,2.23,355 66 | 2,12.37,1.21,2.56,18.1,98,2.42,2.65,.37,2.08,4.6,1.19,2.3,678 67 | 2,13.11,1.01,1.7,15,78,2.98,3.18,.26,2.28,5.3,1.12,3.18,502 68 | 2,12.37,1.17,1.92,19.6,78,2.11,2,.27,1.04,4.68,1.12,3.48,510 69 | 2,13.34,.94,2.36,17,110,2.53,1.3,.55,.42,3.17,1.02,1.93,750 70 | 2,12.21,1.19,1.75,16.8,151,1.85,1.28,.14,2.5,2.85,1.28,3.07,718 71 | 2,12.29,1.61,2.21,20.4,103,1.1,1.02,.37,1.46,3.05,.906,1.82,870 72 | 2,13.86,1.51,2.67,25,86,2.95,2.86,.21,1.87,3.38,1.36,3.16,410 73 | 2,13.49,1.66,2.24,24,87,1.88,1.84,.27,1.03,3.74,.98,2.78,472 74 | 2,12.99,1.67,2.6,30,139,3.3,2.89,.21,1.96,3.35,1.31,3.5,985 75 | 2,11.96,1.09,2.3,21,101,3.38,2.14,.13,1.65,3.21,.99,3.13,886 76 | 2,11.66,1.88,1.92,16,97,1.61,1.57,.34,1.15,3.8,1.23,2.14,428 77 | 2,13.03,.9,1.71,16,86,1.95,2.03,.24,1.46,4.6,1.19,2.48,392 78 | 2,11.84,2.89,2.23,18,112,1.72,1.32,.43,.95,2.65,.96,2.52,500 79 | 2,12.33,.99,1.95,14.8,136,1.9,1.85,.35,2.76,3.4,1.06,2.31,750 80 | 2,12.7,3.87,2.4,23,101,2.83,2.55,.43,1.95,2.57,1.19,3.13,463 81 | 2,12,.92,2,19,86,2.42,2.26,.3,1.43,2.5,1.38,3.12,278 82 | 2,12.72,1.81,2.2,18.8,86,2.2,2.53,.26,1.77,3.9,1.16,3.14,714 83 | 2,12.08,1.13,2.51,24,78,2,1.58,.4,1.4,2.2,1.31,2.72,630 84 | 2,13.05,3.86,2.32,22.5,85,1.65,1.59,.61,1.62,4.8,.84,2.01,515 85 | 2,11.84,.89,2.58,18,94,2.2,2.21,.22,2.35,3.05,.79,3.08,520 86 | 2,12.67,.98,2.24,18,99,2.2,1.94,.3,1.46,2.62,1.23,3.16,450 87 | 2,12.16,1.61,2.31,22.8,90,1.78,1.69,.43,1.56,2.45,1.33,2.26,495 88 | 2,11.65,1.67,2.62,26,88,1.92,1.61,.4,1.34,2.6,1.36,3.21,562 89 | 2,11.64,2.06,2.46,21.6,84,1.95,1.69,.48,1.35,2.8,1,2.75,680 90 | 2,12.08,1.33,2.3,23.6,70,2.2,1.59,.42,1.38,1.74,1.07,3.21,625 91 | 2,12.08,1.83,2.32,18.5,81,1.6,1.5,.52,1.64,2.4,1.08,2.27,480 92 | 2,12,1.51,2.42,22,86,1.45,1.25,.5,1.63,3.6,1.05,2.65,450 93 | 2,12.69,1.53,2.26,20.7,80,1.38,1.46,.58,1.62,3.05,.96,2.06,495 94 | 2,12.29,2.83,2.22,18,88,2.45,2.25,.25,1.99,2.15,1.15,3.3,290 95 | 2,11.62,1.99,2.28,18,98,3.02,2.26,.17,1.35,3.25,1.16,2.96,345 96 | 2,12.47,1.52,2.2,19,162,2.5,2.27,.32,3.28,2.6,1.16,2.63,937 97 | 2,11.81,2.12,2.74,21.5,134,1.6,.99,.14,1.56,2.5,.95,2.26,625 98 | 2,12.29,1.41,1.98,16,85,2.55,2.5,.29,1.77,2.9,1.23,2.74,428 99 | 2,12.37,1.07,2.1,18.5,88,3.52,3.75,.24,1.95,4.5,1.04,2.77,660 100 | 2,12.29,3.17,2.21,18,88,2.85,2.99,.45,2.81,2.3,1.42,2.83,406 101 | 2,12.08,2.08,1.7,17.5,97,2.23,2.17,.26,1.4,3.3,1.27,2.96,710 102 | 2,12.6,1.34,1.9,18.5,88,1.45,1.36,.29,1.35,2.45,1.04,2.77,562 103 | 2,12.34,2.45,2.46,21,98,2.56,2.11,.34,1.31,2.8,.8,3.38,438 104 | 2,11.82,1.72,1.88,19.5,86,2.5,1.64,.37,1.42,2.06,.94,2.44,415 105 | 2,12.51,1.73,1.98,20.5,85,2.2,1.92,.32,1.48,2.94,1.04,3.57,672 106 | 2,12.42,2.55,2.27,22,90,1.68,1.84,.66,1.42,2.7,.86,3.3,315 107 | 2,12.25,1.73,2.12,19,80,1.65,2.03,.37,1.63,3.4,1,3.17,510 108 | 2,12.72,1.75,2.28,22.5,84,1.38,1.76,.48,1.63,3.3,.88,2.42,488 109 | 2,12.22,1.29,1.94,19,92,2.36,2.04,.39,2.08,2.7,.86,3.02,312 110 | 2,11.61,1.35,2.7,20,94,2.74,2.92,.29,2.49,2.65,.96,3.26,680 111 | 2,11.46,3.74,1.82,19.5,107,3.18,2.58,.24,3.58,2.9,.75,2.81,562 112 | 2,12.52,2.43,2.17,21,88,2.55,2.27,.26,1.22,2,.9,2.78,325 113 | 2,11.76,2.68,2.92,20,103,1.75,2.03,.6,1.05,3.8,1.23,2.5,607 114 | 2,11.41,.74,2.5,21,88,2.48,2.01,.42,1.44,3.08,1.1,2.31,434 115 | 2,12.08,1.39,2.5,22.5,84,2.56,2.29,.43,1.04,2.9,.93,3.19,385 116 | 2,11.03,1.51,2.2,21.5,85,2.46,2.17,.52,2.01,1.9,1.71,2.87,407 117 | 2,11.82,1.47,1.99,20.8,86,1.98,1.6,.3,1.53,1.95,.95,3.33,495 118 | 2,12.42,1.61,2.19,22.5,108,2,2.09,.34,1.61,2.06,1.06,2.96,345 119 | 2,12.77,3.43,1.98,16,80,1.63,1.25,.43,.83,3.4,.7,2.12,372 120 | 2,12,3.43,2,19,87,2,1.64,.37,1.87,1.28,.93,3.05,564 121 | 2,11.45,2.4,2.42,20,96,2.9,2.79,.32,1.83,3.25,.8,3.39,625 122 | 2,11.56,2.05,3.23,28.5,119,3.18,5.08,.47,1.87,6,.93,3.69,465 123 | 2,12.42,4.43,2.73,26.5,102,2.2,2.13,.43,1.71,2.08,.92,3.12,365 124 | 2,13.05,5.8,2.13,21.5,86,2.62,2.65,.3,2.01,2.6,.73,3.1,380 125 | 2,11.87,4.31,2.39,21,82,2.86,3.03,.21,2.91,2.8,.75,3.64,380 126 | 2,12.07,2.16,2.17,21,85,2.6,2.65,.37,1.35,2.76,.86,3.28,378 127 | 2,12.43,1.53,2.29,21.5,86,2.74,3.15,.39,1.77,3.94,.69,2.84,352 128 | 2,11.79,2.13,2.78,28.5,92,2.13,2.24,.58,1.76,3,.97,2.44,466 129 | 2,12.37,1.63,2.3,24.5,88,2.22,2.45,.4,1.9,2.12,.89,2.78,342 130 | 2,12.04,4.3,2.38,22,80,2.1,1.75,.42,1.35,2.6,.79,2.57,580 131 | 3,12.86,1.35,2.32,18,122,1.51,1.25,.21,.94,4.1,.76,1.29,630 132 | 3,12.88,2.99,2.4,20,104,1.3,1.22,.24,.83,5.4,.74,1.42,530 133 | 3,12.81,2.31,2.4,24,98,1.15,1.09,.27,.83,5.7,.66,1.36,560 134 | 3,12.7,3.55,2.36,21.5,106,1.7,1.2,.17,.84,5,.78,1.29,600 135 | 3,12.51,1.24,2.25,17.5,85,2,.58,.6,1.25,5.45,.75,1.51,650 136 | 3,12.6,2.46,2.2,18.5,94,1.62,.66,.63,.94,7.1,.73,1.58,695 137 | 3,12.25,4.72,2.54,21,89,1.38,.47,.53,.8,3.85,.75,1.27,720 138 | 3,12.53,5.51,2.64,25,96,1.79,.6,.63,1.1,5,.82,1.69,515 139 | 3,13.49,3.59,2.19,19.5,88,1.62,.48,.58,.88,5.7,.81,1.82,580 140 | 3,12.84,2.96,2.61,24,101,2.32,.6,.53,.81,4.92,.89,2.15,590 141 | 3,12.93,2.81,2.7,21,96,1.54,.5,.53,.75,4.6,.77,2.31,600 142 | 3,13.36,2.56,2.35,20,89,1.4,.5,.37,.64,5.6,.7,2.47,780 143 | 3,13.52,3.17,2.72,23.5,97,1.55,.52,.5,.55,4.35,.89,2.06,520 144 | 3,13.62,4.95,2.35,20,92,2,.8,.47,1.02,4.4,.91,2.05,550 145 | 3,12.25,3.88,2.2,18.5,112,1.38,.78,.29,1.14,8.21,.65,2,855 146 | 3,13.16,3.57,2.15,21,102,1.5,.55,.43,1.3,4,.6,1.68,830 147 | 3,13.88,5.04,2.23,20,80,.98,.34,.4,.68,4.9,.58,1.33,415 148 | 3,12.87,4.61,2.48,21.5,86,1.7,.65,.47,.86,7.65,.54,1.86,625 149 | 3,13.32,3.24,2.38,21.5,92,1.93,.76,.45,1.25,8.42,.55,1.62,650 150 | 3,13.08,3.9,2.36,21.5,113,1.41,1.39,.34,1.14,9.40,.57,1.33,550 151 | 3,13.5,3.12,2.62,24,123,1.4,1.57,.22,1.25,8.60,.59,1.3,500 152 | 3,12.79,2.67,2.48,22,112,1.48,1.36,.24,1.26,10.8,.48,1.47,480 153 | 3,13.11,1.9,2.75,25.5,116,2.2,1.28,.26,1.56,7.1,.61,1.33,425 154 | 3,13.23,3.3,2.28,18.5,98,1.8,.83,.61,1.87,10.52,.56,1.51,675 155 | 3,12.58,1.29,2.1,20,103,1.48,.58,.53,1.4,7.6,.58,1.55,640 156 | 3,13.17,5.19,2.32,22,93,1.74,.63,.61,1.55,7.9,.6,1.48,725 157 | 3,13.84,4.12,2.38,19.5,89,1.8,.83,.48,1.56,9.01,.57,1.64,480 158 | 3,12.45,3.03,2.64,27,97,1.9,.58,.63,1.14,7.5,.67,1.73,880 159 | 3,14.34,1.68,2.7,25,98,2.8,1.31,.53,2.7,13,.57,1.96,660 160 | 3,13.48,1.67,2.64,22.5,89,2.6,1.1,.52,2.29,11.75,.57,1.78,620 161 | 3,12.36,3.83,2.38,21,88,2.3,.92,.5,1.04,7.65,.56,1.58,520 162 | 3,13.69,3.26,2.54,20,107,1.83,.56,.5,.8,5.88,.96,1.82,680 163 | 3,12.85,3.27,2.58,22,106,1.65,.6,.6,.96,5.58,.87,2.11,570 164 | 3,12.96,3.45,2.35,18.5,106,1.39,.7,.4,.94,5.28,.68,1.75,675 165 | 3,13.78,2.76,2.3,22,90,1.35,.68,.41,1.03,9.58,.7,1.68,615 166 | 3,13.73,4.36,2.26,22.5,88,1.28,.47,.52,1.15,6.62,.78,1.75,520 167 | 3,13.45,3.7,2.6,23,111,1.7,.92,.43,1.46,10.68,.85,1.56,695 168 | 3,12.82,3.37,2.3,19.5,88,1.48,.66,.4,.97,10.26,.72,1.75,685 169 | 3,13.58,2.58,2.69,24.5,105,1.55,.84,.39,1.54,8.66,.74,1.8,750 170 | 3,13.4,4.6,2.86,25,112,1.98,.96,.27,1.11,8.5,.67,1.92,630 171 | 3,12.2,3.03,2.32,19,96,1.25,.49,.4,.73,5.5,.66,1.83,510 172 | 3,12.77,2.39,2.28,19.5,86,1.39,.51,.48,.64,9.899999,.57,1.63,470 173 | 3,14.16,2.51,2.48,20,91,1.68,.7,.44,1.24,9.7,.62,1.71,660 174 | 3,13.71,5.65,2.45,20.5,95,1.68,.61,.52,1.06,7.7,.64,1.74,740 175 | 3,13.4,3.91,2.48,23,102,1.8,.75,.43,1.41,7.3,.7,1.56,750 176 | 3,13.27,4.28,2.26,20,120,1.59,.69,.43,1.35,10.2,.59,1.56,835 177 | 3,13.17,2.59,2.37,20,120,1.65,.68,.53,1.46,9.3,.6,1.62,840 178 | 3,14.13,4.1,2.74,24.5,96,2.05,.76,.56,1.35,9.2,.61,1.6,560 179 | -------------------------------------------------------------------------------- /in-classMaterial/day19/dcgan.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/dcgan.gif -------------------------------------------------------------------------------- /in-classMaterial/day19/hello.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/hello.png -------------------------------------------------------------------------------- /in-classMaterial/day19/image_at_epoch_0001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0001.png -------------------------------------------------------------------------------- /in-classMaterial/day19/image_at_epoch_0002.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0002.png -------------------------------------------------------------------------------- /in-classMaterial/day19/image_at_epoch_0003.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0003.png -------------------------------------------------------------------------------- /in-classMaterial/day19/image_at_epoch_0004.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0004.png -------------------------------------------------------------------------------- /in-classMaterial/day19/image_at_epoch_0005.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0005.png -------------------------------------------------------------------------------- /in-classMaterial/day19/image_at_epoch_0006.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0006.png -------------------------------------------------------------------------------- /in-classMaterial/day19/image_at_epoch_0007.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0007.png -------------------------------------------------------------------------------- /in-classMaterial/day19/image_at_epoch_0008.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0008.png -------------------------------------------------------------------------------- /in-classMaterial/day19/image_at_epoch_0009.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0009.png -------------------------------------------------------------------------------- /in-classMaterial/day19/image_at_epoch_0010.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0010.png -------------------------------------------------------------------------------- /in-classMaterial/day19/image_at_epoch_0011.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0011.png -------------------------------------------------------------------------------- /in-classMaterial/day19/image_at_epoch_0012.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0012.png -------------------------------------------------------------------------------- /in-classMaterial/day19/image_at_epoch_0013.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0013.png -------------------------------------------------------------------------------- /in-classMaterial/day19/image_at_epoch_0014.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0014.png -------------------------------------------------------------------------------- /in-classMaterial/day19/image_at_epoch_0015.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0015.png -------------------------------------------------------------------------------- /in-classMaterial/day19/image_at_epoch_0016.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0016.png -------------------------------------------------------------------------------- /in-classMaterial/day19/image_at_epoch_0017.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0017.png -------------------------------------------------------------------------------- /in-classMaterial/day19/image_at_epoch_0018.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0018.png -------------------------------------------------------------------------------- /in-classMaterial/day19/image_at_epoch_0019.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0019.png -------------------------------------------------------------------------------- /in-classMaterial/day19/image_at_epoch_0020.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0020.png -------------------------------------------------------------------------------- /in-classMaterial/day19/image_at_epoch_0021.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0021.png -------------------------------------------------------------------------------- /in-classMaterial/day19/image_at_epoch_0022.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0022.png -------------------------------------------------------------------------------- /in-classMaterial/day19/image_at_epoch_0023.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0023.png -------------------------------------------------------------------------------- /in-classMaterial/day19/image_at_epoch_0024.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0024.png -------------------------------------------------------------------------------- /in-classMaterial/day19/image_at_epoch_0025.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0025.png -------------------------------------------------------------------------------- /in-classMaterial/day19/image_at_epoch_0026.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0026.png -------------------------------------------------------------------------------- /in-classMaterial/day19/image_at_epoch_0027.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0027.png -------------------------------------------------------------------------------- /in-classMaterial/day19/image_at_epoch_0028.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0028.png -------------------------------------------------------------------------------- /in-classMaterial/day19/image_at_epoch_0029.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0029.png -------------------------------------------------------------------------------- /in-classMaterial/day19/image_at_epoch_0030.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0030.png -------------------------------------------------------------------------------- /in-classMaterial/day19/image_at_epoch_0031.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0031.png -------------------------------------------------------------------------------- /in-classMaterial/day19/image_at_epoch_0032.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0032.png -------------------------------------------------------------------------------- /in-classMaterial/day19/image_at_epoch_0033.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0033.png -------------------------------------------------------------------------------- /in-classMaterial/day19/image_at_epoch_0034.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0034.png -------------------------------------------------------------------------------- /in-classMaterial/day19/image_at_epoch_0035.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0035.png -------------------------------------------------------------------------------- /in-classMaterial/day19/image_at_epoch_0036.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0036.png -------------------------------------------------------------------------------- /in-classMaterial/day19/image_at_epoch_0037.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0037.png -------------------------------------------------------------------------------- /in-classMaterial/day19/image_at_epoch_0038.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0038.png -------------------------------------------------------------------------------- /in-classMaterial/day19/image_at_epoch_0039.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0039.png -------------------------------------------------------------------------------- /in-classMaterial/day19/image_at_epoch_0040.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0040.png -------------------------------------------------------------------------------- /in-classMaterial/day19/image_at_epoch_0041.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0041.png -------------------------------------------------------------------------------- /in-classMaterial/day19/image_at_epoch_0042.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0042.png -------------------------------------------------------------------------------- /in-classMaterial/day19/image_at_epoch_0043.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0043.png -------------------------------------------------------------------------------- /in-classMaterial/day19/image_at_epoch_0044.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0044.png -------------------------------------------------------------------------------- /in-classMaterial/day19/image_at_epoch_0045.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0045.png -------------------------------------------------------------------------------- /in-classMaterial/day19/image_at_epoch_0046.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0046.png -------------------------------------------------------------------------------- /in-classMaterial/day19/image_at_epoch_0047.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0047.png -------------------------------------------------------------------------------- /in-classMaterial/day19/image_at_epoch_0048.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0048.png -------------------------------------------------------------------------------- /in-classMaterial/day19/image_at_epoch_0049.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0049.png -------------------------------------------------------------------------------- /in-classMaterial/day19/image_at_epoch_0050.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0050.png -------------------------------------------------------------------------------- /in-classMaterial/day19/tf.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | mnist = tf.keras.datasets.mnist 3 | 4 | (x_train, y_train),(x_test, y_test) = mnist.load_data() 5 | x_train, x_test = x_train / 255.0, x_test / 255.0 6 | 7 | model = tf.keras.models.Sequential([ 8 | tf.keras.layers.Flatten(input_shape=(28, 28)), 9 | tf.keras.layers.Dense(128, activation='relu'), 10 | tf.keras.layers.Dropout(0.2), 11 | tf.keras.layers.Dense(10, activation='softmax') 12 | ]) 13 | 14 | model.compile(optimizer='adam', 15 | loss='sparse_categorical_crossentropy', 16 | metrics=['accuracy']) 17 | 18 | model.fit(x_train, y_train, epochs=5) 19 | model.evaluate(x_test, y_test) 20 | 21 | 22 | #Generative Adversarial Networks (GANs) are one of the most interesting ideas in computer science today. Two models are trained simultaneously by an adversarial process. A generator ("the artist") learns to create images that look real, while a discriminator ("the art critic") learns to tell real images apart from fakes. 23 | 24 | #During training, the generator progressively becomes better at creating images that look real, while the discriminator becomes better at telling them apart. The process reaches equilibrium when the discriminator can no longer distinguish real images from fakes. 25 | 26 | #This code demonstrates this process on the MNIST dataset. The included animation shows a series of images produced by the generator as it was trained for 50 epochs. The images begin as random noise, and increasingly resemble hand written digits over time. 27 | 28 | import glob 29 | import imageio 30 | import matplotlib.pyplot as plt 31 | import numpy as np 32 | import os 33 | import PIL 34 | from tensorflow.keras import layers 35 | import time 36 | 37 | from IPython import display 38 | 39 | #You will use the MNIST dataset to train the generator and the discriminator. The generator will generate handwritten digits resembling the MNIST data. 40 | 41 | (train_images, train_labels), (_, _) = tf.keras.datasets.mnist.load_data() 42 | 43 | train_images = train_images.reshape(train_images.shape[0], 28, 28, 1).astype('float32') 44 | train_images = (train_images - 127.5) / 127.5 # Normalize the images to [-1, 1] 45 | 46 | BUFFER_SIZE = 60000 47 | BATCH_SIZE = 256 48 | 49 | # Batch and shuffle the data 50 | 51 | train_dataset = tf.data.Dataset.from_tensor_slices(train_images).shuffle(BUFFER_SIZE).batch(BATCH_SIZE) 52 | 53 | #Both the generator and discriminator are defined using the Keras Sequential API. 54 | 55 | #The generator uses tf.keras.layers.Conv2DTranspose (upsampling) layers to produce an image from a seed (random noise). Start with a Dense layer that takes this seed as input, then upsample several times until you reach the desired image size of 28x28x1. Notice the tf.keras.layers.LeakyReLU activation for each layer, except the output layer which uses tanh. 56 | 57 | def make_generator_model(): 58 | model = tf.keras.Sequential() 59 | model.add(layers.Dense(7*7*256, use_bias=False, input_shape=(100,))) 60 | model.add(layers.BatchNormalization()) 61 | model.add(layers.LeakyReLU()) 62 | 63 | model.add(layers.Reshape((7, 7, 256))) 64 | assert model.output_shape == (None, 7, 7, 256) # Note: None is the batch size 65 | 66 | model.add(layers.Conv2DTranspose(128, (5, 5), strides=(1, 1), padding='same', use_bias=False)) 67 | assert model.output_shape == (None, 7, 7, 128) 68 | model.add(layers.BatchNormalization()) 69 | model.add(layers.LeakyReLU()) 70 | 71 | model.add(layers.Conv2DTranspose(64, (5, 5), strides=(2, 2), padding='same', use_bias=False)) 72 | assert model.output_shape == (None, 14, 14, 64) 73 | model.add(layers.BatchNormalization()) 74 | model.add(layers.LeakyReLU()) 75 | 76 | model.add(layers.Conv2DTranspose(1, (5, 5), strides=(2, 2), padding='same', use_bias=False, activation='tanh')) 77 | assert model.output_shape == (None, 28, 28, 1) 78 | 79 | return model 80 | 81 | #Use the (as yet untrained) generator to create an image. 82 | 83 | generator = make_generator_model() 84 | 85 | noise = tf.random.normal([1, 100]) 86 | generated_image = generator(noise, training=False) 87 | 88 | plt.imshow(generated_image[0, :, :, 0], cmap='gray') 89 | 90 | #The discriminator is a CNN-based image classifier. 91 | 92 | def make_discriminator_model(): 93 | model = tf.keras.Sequential() 94 | model.add(layers.Conv2D(64, (5, 5), strides=(2, 2), padding='same', 95 | input_shape=[28, 28, 1])) 96 | model.add(layers.LeakyReLU()) 97 | model.add(layers.Dropout(0.3)) 98 | 99 | model.add(layers.Conv2D(128, (5, 5), strides=(2, 2), padding='same')) 100 | model.add(layers.LeakyReLU()) 101 | model.add(layers.Dropout(0.3)) 102 | 103 | model.add(layers.Flatten()) 104 | model.add(layers.Dense(1)) 105 | 106 | return model 107 | 108 | #Use the (as yet untrained) discriminator to classify the generated images as real or fake. The model will be trained to output positive values for real images, and negative values for fake images. 109 | 110 | discriminator = make_discriminator_model() 111 | decision = discriminator(generated_image) 112 | print (decision) 113 | 114 | #Define loss functions and optimizers for both models. 115 | # This method returns a helper function to compute cross entropy loss 116 | cross_entropy = tf.keras.losses.BinaryCrossentropy(from_logits=True) 117 | 118 | 119 | #This method quantifies how well the discriminator is able to distinguish real images from fakes. It compares the discriminator's predictions on real images to an array of 1s, and the discriminator's predictions on fake (generated) images to an array of 0s. 120 | 121 | def discriminator_loss(real_output, fake_output): 122 | real_loss = cross_entropy(tf.ones_like(real_output), real_output) 123 | fake_loss = cross_entropy(tf.zeros_like(fake_output), fake_output) 124 | total_loss = real_loss + fake_loss 125 | return total_loss 126 | 127 | #The generator's loss quantifies how well it was able to trick the discriminator. Intuitively, if the generator is performing well, the discriminator will classify the fake images as real (or 1). Here, we will compare the discriminators decisions on the generated images to an array of 1s. 128 | 129 | def generator_loss(fake_output): 130 | return cross_entropy(tf.ones_like(fake_output), fake_output) 131 | 132 | #The discriminator and the generator optimizers are different since we will train two networks separately. 133 | 134 | generator_optimizer = tf.keras.optimizers.Adam(1e-4) 135 | discriminator_optimizer = tf.keras.optimizers.Adam(1e-4) 136 | 137 | #This code also demonstrates how to save and restore models, which can be helpful in case a long running training task is interrupted. 138 | 139 | checkpoint_dir = './training_checkpoints' 140 | checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt") 141 | checkpoint = tf.train.Checkpoint(generator_optimizer=generator_optimizer, 142 | discriminator_optimizer=discriminator_optimizer, 143 | generator=generator, 144 | discriminator=discriminator) 145 | 146 | 147 | EPOCHS = 50 148 | noise_dim = 100 149 | num_examples_to_generate = 16 150 | 151 | # We will reuse this seed overtime (so it's easier) 152 | # to visualize progress in the animated GIF) 153 | seed = tf.random.normal([num_examples_to_generate, noise_dim]) 154 | 155 | #The training loop begins with generator receiving a random seed as input. That seed is used to produce an image. The discriminator is then used to classify real images (drawn from the training set) and fakes images (produced by the generator). The loss is calculated for each of these models, and the gradients are used to update the generator and discriminator. 156 | 157 | # Notice the use of `tf.function` 158 | # This annotation causes the function to be "compiled". 159 | @tf.function 160 | def train_step(images): 161 | noise = tf.random.normal([BATCH_SIZE, noise_dim]) 162 | 163 | with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape: 164 | generated_images = generator(noise, training=True) 165 | 166 | real_output = discriminator(images, training=True) 167 | fake_output = discriminator(generated_images, training=True) 168 | 169 | gen_loss = generator_loss(fake_output) 170 | disc_loss = discriminator_loss(real_output, fake_output) 171 | 172 | gradients_of_generator = gen_tape.gradient(gen_loss, generator.trainable_variables) 173 | gradients_of_discriminator = disc_tape.gradient(disc_loss, discriminator.trainable_variables) 174 | 175 | generator_optimizer.apply_gradients(zip(gradients_of_generator, generator.trainable_variables)) 176 | discriminator_optimizer.apply_gradients(zip(gradients_of_discriminator, discriminator.trainable_variables)) 177 | 178 | def train(dataset, epochs): 179 | for epoch in range(epochs): 180 | start = time.time() 181 | 182 | for image_batch in dataset: 183 | train_step(image_batch) 184 | 185 | # Produce images for the GIF as we go 186 | display.clear_output(wait=True) 187 | generate_and_save_images(generator, 188 | epoch + 1, 189 | seed) 190 | 191 | # Save the model every 15 epochs 192 | if (epoch + 1) % 15 == 0: 193 | checkpoint.save(file_prefix = checkpoint_prefix) 194 | 195 | print ('Time for epoch {} is {} sec'.format(epoch + 1, time.time()-start)) 196 | 197 | # Generate after the final epoch 198 | display.clear_output(wait=True) 199 | generate_and_save_images(generator, 200 | epochs, 201 | seed) 202 | 203 | 204 | def generate_and_save_images(model, epoch, test_input): 205 | # Notice `training` is set to False. 206 | # This is so all layers run in inference mode (batchnorm). 207 | predictions = model(test_input, training=False) 208 | 209 | fig = plt.figure(figsize=(4,4)) 210 | 211 | for i in range(predictions.shape[0]): 212 | plt.subplot(4, 4, i+1) 213 | plt.imshow(predictions[i, :, :, 0] * 127.5 + 127.5, cmap='gray') 214 | plt.axis('off') 215 | 216 | plt.savefig('image_at_epoch_{:04d}.png'.format(epoch)) 217 | plt.show() 218 | 219 | #Call the train() method defined above to train the generator and discriminator simultaneously. Note, training GANs can be tricky. It's important that the generator and discriminator do not overpower each other (e.g., that they train at a similar rate). 220 | 221 | #At the beginning of the training, the generated images look like random noise. As training progresses, the generated digits will look increasingly real. After about 50 epochs, they resemble MNIST digits. This may take about one minute / epoch with the default settings on Colab. 222 | 223 | train(train_dataset, EPOCHS) 224 | 225 | checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir)) 226 | 227 | # Display a single image using the epoch number 228 | def display_image(epoch_no): 229 | return PIL.Image.open('image_at_epoch_{:04d}.png'.format(epoch_no)) 230 | 231 | display_image(EPOCHS) 232 | 233 | anim_file = 'dcgan.gif' 234 | 235 | with imageio.get_writer(anim_file, mode='I') as writer: 236 | filenames = glob.glob('image*.png') 237 | filenames = sorted(filenames) 238 | last = -1 239 | for i,filename in enumerate(filenames): 240 | frame = 2*(i**0.5) 241 | if round(frame) > round(last): 242 | last = frame 243 | else: 244 | continue 245 | image = imageio.imread(filename) 246 | writer.append_data(image) 247 | image = imageio.imread(filename) 248 | writer.append_data(image) 249 | 250 | import IPython 251 | if IPython.version_info > (6,2,0,''): 252 | display.Image(filename=anim_file) 253 | 254 | 255 | 256 | 257 | -------------------------------------------------------------------------------- /in-classMaterial/day2/INTL450Syntax.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day2/INTL450Syntax.pdf -------------------------------------------------------------------------------- /in-classMaterial/day2/lab1.py: -------------------------------------------------------------------------------- 1 | def binarify(num): 2 | """convert positive integer to base 2""" 3 | if num<=0: return '0' 4 | digits = [] 5 | return ''.join(digits) 6 | 7 | def int_to_base(num, base): 8 | """convert positive integer to a string in any base""" 9 | if num<=0: return '0' 10 | digits = [] 11 | return ''.join(digits) 12 | 13 | def base_to_int(string, base): 14 | """take a string-formatted number and its base and return the base-10 integer""" 15 | if string=="0" or base <= 0 : return 0 16 | result = 0 17 | return result 18 | 19 | def flexibase_add(str1, str2, base1, base2): 20 | """add two numbers of different bases and return the sum""" 21 | result = int_to_base(tmp, base1) 22 | return result 23 | 24 | def flexibase_multiply(str1, str2, base1, base2): 25 | """multiply two numbers of different bases and return the product""" 26 | result = int_to_base(tmp, base1) 27 | return result 28 | 29 | def romanify(num): 30 | """given an integer, return the Roman numeral version""" 31 | result = "" 32 | return result 33 | 34 | 35 | -------------------------------------------------------------------------------- /in-classMaterial/day2/lab1_solutions.py: -------------------------------------------------------------------------------- 1 | def binarify(num): 2 | """convert positive integer to base 2""" 3 | if num<=0: return '0' 4 | digits=[] 5 | while num>0: 6 | digits.append(num%2) 7 | num=num//2 8 | digits=digits[::-1] 9 | return ''.join(str(e) for e in digits) 10 | 11 | def int_to_base(num, base): 12 | """convert positive integer to a string in any base""" 13 | if num==0: return '0' 14 | if base<=0: return '0' 15 | if base==1: return '1'*num 16 | digits = [] 17 | negative=False 18 | if num<0: num*=(-1); negative=True 19 | while num>0: 20 | digits.append(num%base) 21 | num=num//base 22 | digits=digits[::-1] 23 | if negative: return '-'+''.join(str(e) for e in digits) 24 | return ''.join(str(e) for e in digits) 25 | 26 | def base_to_int(string, base): 27 | """take a string-formatted number and its base and return the base-10 integer""" 28 | if string=="0" or base <= 0 : return 0 29 | negative=False 30 | if string[0]=='-': string=string[1:]; negative=True 31 | result = 0 32 | num=len(string) 33 | for i in string: 34 | num-=1 35 | result+=((base**num)*int(i)) 36 | if negative: return result*(-1) 37 | return result 38 | 39 | def flexibase_add(str1, str2, base1, base2): 40 | """add two numbers of different bases and return the sum""" 41 | return base_to_int(str1, base1)+base_to_int(str2,base2) 42 | 43 | 44 | def flexibase_multiply(str1, str2, base1, base2): 45 | """multiply two numbers of different bases and return the product""" 46 | return base_to_int(str1,base1)*base_to_int(str2,base2) 47 | 48 | def romanify(num): 49 | """given an integer, return the Roman numeral version""" 50 | result = "" 51 | result+=(num//1000*'M') 52 | num%=1000 53 | hold=num//100 54 | num%=100 55 | if hold<=3: result+=hold*'C' 56 | elif hold==4: result+='CD' 57 | elif hold>4 and hold<9: result+=('D'+'C'*(hold-5)) 58 | else: result+='CM' 59 | hold=num//10 60 | if hold<=3: result+=hold*'X' 61 | elif hold==4: result+='XL' 62 | elif hold>4 and hold<9: result+=('L'+'X'*(hold-5)) 63 | else: result+='XC' 64 | hold=num%10 65 | if hold<=3: result+=hold*'I' 66 | elif hold==4: result+='IV' 67 | elif hold>4 and hold<9: result+=('V'+'I'*(hold-5)) 68 | else: result+='IX' 69 | return result 70 | 71 | -------------------------------------------------------------------------------- /in-classMaterial/day3/ClassesSlides.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day3/ClassesSlides.pdf -------------------------------------------------------------------------------- /in-classMaterial/day3/clock_lab.py: -------------------------------------------------------------------------------- 1 | class Clock(object): 2 | def __init__(self, hour, minutes): 3 | self.minutes = minutes 4 | self.hour = hour 5 | 6 | @classmethod 7 | def at(cls, hour, minutes=0): 8 | return cls(hour, minutes) 9 | 10 | def __str__(self): 11 | 12 | def __add__(self,minutes): 13 | 14 | def __sub__(self,minutes): 15 | 16 | def __eq__(self, other): 17 | 18 | def __ne__(self, other): 19 | -------------------------------------------------------------------------------- /in-classMaterial/day3/clock_solution.py: -------------------------------------------------------------------------------- 1 | class Clock(object): 2 | def __init__(self, hour, minutes=0): 3 | self.minutes = '0'*(2-len(str(minutes)))+str(minutes) 4 | self.hour = '0'*(2-len(str(hour)))+str(hour) 5 | def __str__(self): 6 | return self.hour+":"+self.minutes 7 | def __repr__(self): 8 | return self.__str__() 9 | @classmethod 10 | def at(cls, hour, minutes=0): 11 | return cls(hour, minutes) 12 | def __add__(self,minutes): 13 | time=(int(self.hour)*60+int(self.minutes)+int(minutes))%(24*60) 14 | return Clock(time//60,time%60) 15 | def __sub__(self,minutes): 16 | return self+((-1)*minutes) 17 | def __eq__(self, other): 18 | return (self.hour==other.hour and self.minutes==other.minutes) 19 | def __ne__(self, other): 20 | return not self==other 21 | -------------------------------------------------------------------------------- /in-classMaterial/day3/parent-child.py: -------------------------------------------------------------------------------- 1 | class Parent(): 2 | def __init__(self, sex, firstname, lastname): 3 | self.sex = sex 4 | self.firstname = firstname 5 | self.lastname = lastname 6 | self.kids = [] 7 | 8 | def role(self): 9 | if self.sex == "Male": 10 | return "Father" 11 | else: 12 | return "Mother" 13 | 14 | def have_child(self, name): 15 | child = Child(name, self) 16 | print(self.firstname + " is having a child named " + child.name()) 17 | print("They will make a very good " + self.role()) 18 | self.kids.append(child) 19 | return child 20 | 21 | def list_children(self): 22 | for kid in self.kids: 23 | print("I am the " + self.role() + " of " + kid.name()) 24 | 25 | class Child(): 26 | def __init__(self, firstname, parent): 27 | self.parent = parent 28 | self.lastname = parent.lastname 29 | self.firstname = firstname 30 | 31 | def set_name(self, new_first_name, new_last_name): 32 | self.firstname = new_first_name 33 | self.lastname = new_last_name 34 | 35 | def name(self): 36 | return "%s %s" % (self.firstname, self.lastname) 37 | 38 | def introduce(self): 39 | return "Hi I'm " + self.name() 40 | 41 | def siblings(self): 42 | for kid in self.parent.kids: 43 | if kid != self: 44 | print("I have a sibling named " + kid.name()) 45 | 46 | def __str__(self): 47 | return "%s" %self.firstname 48 | 49 | mom = Parent("Female", "Jane", "Smith") 50 | mom.list_children() 51 | jill=mom.have_child("Jill") 52 | jill.firstname 53 | jill.parent.firstname 54 | jill.set_name("Jillian", "Jones") 55 | print(jill.introduce()) 56 | print(jill == mom.kids[0]) 57 | jack = mom.have_child("Jack") 58 | print(jack.introduce()) 59 | jack.parent.kids[0].parent.list_children() 60 | jack.siblings() 61 | 62 | 63 | -------------------------------------------------------------------------------- /in-classMaterial/day3/polymorphism.py: -------------------------------------------------------------------------------- 1 | class Animal(object): 2 | living="Yes!" 3 | def __init__(self, name): # Constructor of the class 4 | self.name = name 5 | 6 | def talk(self): # Abstract method, defined by convention only 7 | raise NotImplementedError("Subclass must implement abstract method") 8 | 9 | class Cat(Animal): 10 | def talk(self): 11 | return self.meow() 12 | 13 | def meow(self): 14 | return 'Meow!' 15 | 16 | class Dog(Animal): 17 | def talk(self): 18 | return self.bark() 19 | 20 | def bark(self): 21 | return 'Woof! Woof!' 22 | 23 | class Fish(Animal): 24 | 25 | def swim(self): 26 | pass 27 | 28 | def __str__(self): 29 | return "I am a fish!" 30 | 31 | animals = [Cat('Foo'), 32 | Dog('Bar'), 33 | Fish('nemo')] 34 | 35 | # for animal in animals: 36 | # print(animal.name + ': ' + animal.talk()) 37 | 38 | # f = Fish("foo") 39 | # print("Hi, " + str(f)) 40 | 41 | 42 | -------------------------------------------------------------------------------- /in-classMaterial/day3/school.py: -------------------------------------------------------------------------------- 1 | # - Add a student's name to the roster for a grade 2 | # - Get a list of all students enrolled in a grade 3 | # - Get a sorted list of all students in all grades. 4 | # 5 | # Note that all our students only have one name. 6 | # (It's a small town, what do you want?) 7 | 8 | class school(): 9 | def __init__(self, school_name): #initialize instance of class School with parameter name 10 | self.school_name = school_name #user must put name, no default 11 | self.db = {} #initialize empty dictionary to store kids and grades 12 | 13 | def add(self, name, student_grade): #add a kid to a grade in instance of School 14 | if student_grade in self.db: #need to check if the key for the grade already exists, otherwise assigning it will return error 15 | self.db[student_grade].add(name) #add kid to the set of kids within the dictionary 16 | else: self.db[student_grade] = {name} #if the key doesn't exist, create it and put kid in 17 | 18 | def sort(self): #sorts kids alphabetically and returns them in tuples (because they are immutable) 19 | sorted_students={} #sets up empty dictionary to store sorted tuples 20 | for key in self.db.keys(): #loop through each key 21 | sorted_students[key] = tuple(sorted(self.db[key])) #add dictionary entry with key being the grade and the entry the tuple of kids 22 | return sorted_students 23 | 24 | def grade(self, check_grade): 25 | if check_grade not in self.db: return None #if the key doesn't exist, there are no kids in that grade: return None 26 | return self.db[check_grade] #if None wasn't returned above, return elements within dictionary, or kids in grade 27 | 28 | def __str__(self): #print function will display the school name on one line, and sorted kids on other line 29 | return "%s\n%s" %(self.school_name, self.sort()) 30 | -------------------------------------------------------------------------------- /in-classMaterial/day4/TestingSlides.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day4/TestingSlides.pdf -------------------------------------------------------------------------------- /in-classMaterial/day4/__pycache__/fizzbuzz.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day4/__pycache__/fizzbuzz.cpython-36.pyc -------------------------------------------------------------------------------- /in-classMaterial/day4/exception.py: -------------------------------------------------------------------------------- 1 | raise Exception 2 | print("I raised an exception!") 3 | 4 | raise Exception('I raised an exception!') 5 | 6 | try: 7 | print(a) 8 | except NameError: 9 | print("oops name error") 10 | except: 11 | print("oops") 12 | finally: 13 | print("Yes! I did it!") 14 | 15 | 16 | for i in range(1,10): 17 | if i==5: 18 | print("I found five!") 19 | continue 20 | print("Here is five!") 21 | else: 22 | print(i) 23 | else: 24 | print("I went through all iterations!") 25 | 26 | -------------------------------------------------------------------------------- /in-classMaterial/day4/exceptions_example.py: -------------------------------------------------------------------------------- 1 | import traceback 2 | 3 | class CustomException(Exception): # inherits from Exception 4 | def __init__(self, value): 5 | self.value = value 6 | 7 | def __str__(self): 8 | return self.value 9 | 10 | def i_call_a_function_with_errors(): 11 | try: 12 | print("Calling a function....") 13 | #function_with_generic_error() 14 | #function_with_custom_error() 15 | #function_with_unknown_error(1) 16 | function_that_does_not_exist() 17 | print("Tada!") 18 | except CustomException as inst: # `as' gives us access to the exception 19 | print("Custom Error Caught! Error({0})".format(inst.value)) 20 | except NameError or AttributeError: 21 | print("Whoa, chill out") 22 | except: # any exception is caught, even ones you don't know about 23 | print("Default Error Caught!") 24 | else: # if nothing broke, then run this block 25 | print("No error raised.") 26 | traceback.print_exc() # this prints the traceback 27 | finally: # this block is always run 28 | print("Goodbye!") 29 | 30 | def function_with_generic_error(): 31 | raise Exception("Foo!") # this method doesn't know what to do with the exception 32 | 33 | def function_with_custom_error(): 34 | raise CustomException("Foo Bar!") # this will be handled in the function above} 35 | 36 | def function_with_unknown_error(foo): 37 | foo.bar() 38 | 39 | i_call_a_function_with_errors() 40 | -------------------------------------------------------------------------------- /in-classMaterial/day4/fizzbuzz.py: -------------------------------------------------------------------------------- 1 | 2 | def FizzBuzz(i): 3 | try: 4 | if i % 15 == 0: 5 | raise Exception("Divisible by 3 and 5!") 6 | if i % 3 == 0: 7 | return "Fizz" 8 | if i % 5 == 0: 9 | return "Buzz" 10 | print("finally") 11 | except: 12 | if i % 15 == 0: 13 | return("FizzBuzz") 14 | else: 15 | return str(i) 16 | finally: 17 | print("finally") 18 | 19 | 20 | for i in range(18): 21 | print(str(i) + ": " + FizzBuzz(i)) 22 | -------------------------------------------------------------------------------- /in-classMaterial/day4/fizzbuzz_test.py: -------------------------------------------------------------------------------- 1 | #FizzBuzzTest 2 | 3 | import unittest 4 | import fizzbuzz 5 | 6 | class FizzBuzzTest(unittest.TestCase): 7 | 8 | def test_fizz(self): 9 | self.assertEqual('Fizz',fizzbuzz.FizzBuzz(9)) 10 | self.assertNotEqual('Fizz',fizzbuzz.FizzBuzz(15)) 11 | 12 | def test_buzz(self): 13 | self.assertEqual('Buzz',fizzbuzz.FizzBuzz(10)) 14 | 15 | def test_fizzbuzz(self): 16 | self.assertEqual('FizzBuzz',fizzbuzz.FizzBuzz(15)) 17 | 18 | def test_error(self): 19 | with self.assertRaises(TypeError): 20 | fizzbuzz.FizzBuzz('b') 21 | 22 | def test5(self): 23 | self.assertEqual('Buzz',fizzbuzz.FizzBuzz(15)) 24 | 25 | if __name__ == '__main__': #Add this if you want to run the test with this script. 26 | unittest.main() 27 | -------------------------------------------------------------------------------- /in-classMaterial/day4/print_integer.py: -------------------------------------------------------------------------------- 1 | def print_integer(integer): 2 | return "Here is my integer: " + str(integer) 3 | 4 | def print_integer(integer): 5 | try: 6 | int(integer) 7 | except ValueError: 8 | print("Put in a number.") 9 | else: 10 | print("Here is my integer: " + str(integer)) 11 | 12 | def print_integer(integer): 13 | if type(integer)==int: 14 | print("Here is my integer: " + str(integer)) 15 | else: 16 | raise Exception("This is not an integer") 17 | 18 | def print_integer(integer): 19 | if type(integer)==int: 20 | return "Here is my integer: " + str(integer) 21 | else: 22 | raise TypeError("Enter an integer!") 23 | 24 | def print_integer(integer): 25 | try: 26 | if integer %1==0: 27 | return "Here is my integer: " + str(integer) 28 | else: 29 | return "This has decimals!" 30 | except: 31 | raise TypeError("Enter a number!") 32 | 33 | def print_integer(integer): 34 | try: 35 | if integer %1==0: 36 | print("Congratulations! You entered an integer!") 37 | else: 38 | raise Exception 39 | except: 40 | raise TypeError("This is not an integer!") 41 | else: 42 | return "Here is my integer: " + str(integer) 43 | 44 | 45 | def print_integer(integer): 46 | try: 47 | if integer %1==0: 48 | print("Here is my integer: " + str(integer)) 49 | else: 50 | raise Exception 51 | except TypeError: 52 | print("Enter a number!") 53 | except: 54 | print("Integers can't have decimals!") 55 | 56 | 57 | def print_integer(integer): 58 | try: 59 | if integer %1==0: 60 | print("Congratulations! You entered an integer!") 61 | else: 62 | raise Exception 63 | except TypeError: 64 | raise TypeError("Enter a number!") 65 | except: 66 | raise TypeError("Integers can't have decimals!" ) 67 | else: 68 | return "Here is my integer: " + str(integer) 69 | 70 | 71 | #Create your own exception 72 | class CustomException(Exception): 73 | def __init__(self, value): 74 | self.value = value 75 | def __str__(self): 76 | return str(self.value) 77 | 78 | 79 | def print_integer(integer): 80 | try: 81 | if integer %1==0: 82 | print("Congratulations! You entered an integer!") 83 | else: 84 | raise CustomException(integer%1) 85 | except CustomException as e: 86 | raise TypeError("Your number has a decimal: " + str(e.value)) 87 | except TypeError: 88 | # pass 89 | raise TypeError("Enter a number!") 90 | else: 91 | return "Here is my integer: " + str(integer) 92 | finally: 93 | print("I'm done!") 94 | -------------------------------------------------------------------------------- /in-classMaterial/day5/csvstuff.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import os 3 | 4 | #Open a file stream and create a CSV writer object 5 | os.getcwd() 6 | os.chdir("KocPython2020/in-classMaterial/day5/") 7 | f = open('test.csv', 'w') 8 | my_writer = csv.writer(f) 9 | 10 | for i in range(1, 100): 11 | my_writer.writerow([i, i-1]) 12 | 13 | f.flush() 14 | f.close() 15 | 16 | #The correct way! 17 | with open('test1.csv', 'w') as f: 18 | my_writer = csv.writer(f) 19 | for i in range(1, 100): 20 | my_writer.writerow([i, i-1]) 21 | 22 | #How about with field names 23 | with open('test_with_fields.csv', 'w') as f: 24 | my_writer = csv.DictWriter(f, fieldnames=("A", "B")) 25 | my_writer.writeheader() 26 | for i in range(1, 100): 27 | my_writer.writerow({"B":i, "A":i-1}) 28 | 29 | #Now lets read some things 30 | with open('test1.csv', 'r') as f: 31 | print("Reading test1.csv") 32 | my_reader = csv.reader(f) 33 | for row in my_reader: 34 | print(row) 35 | 36 | #Now lets read some things with field names 37 | with open('test_with_fields.csv', 'r') as f: 38 | print("\nReading test_with_fields.csv") 39 | my_reader = csv.DictReader(f) 40 | for row in my_reader: 41 | print(row) 42 | -------------------------------------------------------------------------------- /in-classMaterial/day5/faculty.csv: -------------------------------------------------------------------------------- 1 | Name,Email 2 | Belgin San Akca,bakca@ku.edu.tr 3 | Şener Aktürk,sakturk@ku.edu.tr 4 | Özlem Altan,ozaltan@ku.edu.tr 5 | Merih Angın,mangin@ku.edu.tr 6 | Altay Atlı,aatli@ku.edu.tr 7 | Selim Erdem Aytaç,saytac@ku.edu.tr 8 | Caner Bakır,cbakir@ku.edu.tr 9 | Umur Başdaş,ubasdas@ku.edu.tr 10 | Reşat Bayer,rbayer@ku.edu.tr 11 | David Carlson,dcarlson@ku.edu.tr 12 | Ali Çarkoğlu,acarkoglu@ku.edu.tr 13 | Boğaç Erozan,herozan@ku.edu.tr 14 | Güneş Ertan,gunesertan@ku.edu.tr 15 | Ahmet İçduygu,aicduygu@ku.edu.tr 16 | Ziya Öniş,zonis@ku.edu.tr 17 | İpek Ruacan,iruacan@ku.edu.tr 18 | Bahar Rumelili,brumelili@ku.edu.tr 19 | Murat Somer,musomer@ku.edu.tr 20 | Sedef Turper,sturper@ku.edu.tr 21 | Ayşen Üstübici,austubici@ku.edu.tr 22 | Şuhnaz Yılmaz,syilmaz@ku.edu.tr 23 | -------------------------------------------------------------------------------- /in-classMaterial/day5/filestuff.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | 4 | os.chdir('KocPython2020/in-classMaterial/day5') 5 | 6 | #The cleanest way to handle files (gracefully handles exceptions) 7 | with open('readfile.txt') as f: 8 | #We can read files in chunks 9 | the_whole_thing = f.read() 10 | print("The Whole Thing\n*****************************************************************************\n{0}".format(the_whole_thing)) 11 | 12 | #We can read files line by line 13 | print("\nLooping over lines\n*****************************************************************************\n") 14 | f.seek(0) 15 | lines = f.readlines() 16 | for l in lines: 17 | print("{0}".format(l)) 18 | 19 | #More efficiently we can loop over the file object (i.e. we don't need the variable lines) 20 | print("\nLooping over the file object\n********************\n") 21 | f.seek(0) 22 | for l in f: 23 | print("{0}".format(l)) 24 | 25 | #You can also go byte by byte (don't do this) 26 | print("\nByte by Byte\n********************\n") 27 | f.seek(0) 28 | next_byte = f.read(1) 29 | while next_byte != "": 30 | sys.stdout.write(next_byte) 31 | next_byte = f.read(1) 32 | 33 | # We can also manually open and close files, now we need to handle exceptions and closing files 34 | f = open('readfile.txt', 'r') 35 | print("\nManually Opened File\n********************\n") 36 | print(f.read()) 37 | f.close() 38 | 39 | #Writing files is easy, open command takes r, w, a plus some others 40 | with open('writefile.txt', 'w') as f: 41 | #wipes the file clean and opens it 42 | f.write("Hi guys.") 43 | f.write("Does this go on the second line?") 44 | f.writelines(['a', 'b', 'c']) 45 | # f.flush() # If using the file object interactively you may need to flush the buffer 46 | 47 | with open('writefile.txt', 'a') as f: 48 | #just tacks some things on the end 49 | f.write("\nI got appended!") 50 | f.flush() 51 | -------------------------------------------------------------------------------- /in-classMaterial/day5/mathofpolitics.csv: -------------------------------------------------------------------------------- 1 | url,is_post,publish_date,post_title,comment_count 2 | http://www.mathofpolitics.com,0,NA,NA,NA 3 | http://www.mathofpolitics.com/2016/06/22/trump-has-raised-little-money-much-unitemized-so-sad/,1,"June 22, 2016","Trump Has Raised Little Money, Much Unitemized. SO SAD! | The Math Of Politics",0 4 | http://www.mathofpolitics.com/2016/05/12/extreme-and-unpredictable-is-ideology-collapsing-in-the-senate-gop/,1,"May 12, 2016",Extreme and Unpredictable: Is Ideology Collapsing in the Senate GOP? | The Math Of Politics,0 5 | http://www.mathofpolitics.com/2016/05/11/comparing-the-legislative-records-of-the-candidates/,1,"May 11, 2016",Comparing the Legislative Records of the Candidates | The Math Of Politics,0 6 | http://www.mathofpolitics.com/2016/04/28/whos-got-the-power-measuring-how-much-trump-went-banzhaf-on-tuesday/,1,"April 28, 2016",Who’s Got The Power? Measuring How Much Trump Went Banzhaf On Tuesday | The Math Of Politics,0 7 | http://www.mathofpolitics.com/2016/02/07/trump-cruz-rubio-the-game-theory-of-when-the-enemy-of-your-enemy-is-your-enemy/,1,"February 7, 2016","Trump, Cruz, Rubio: The Game Theory of When The Enemy of Your Enemy Is Your Enemy. | The Math Of Politics",0 8 | http://www.mathofpolitics.com/2016/02/03/the-gops-reality-is-truel-indeed/,1,"February 3, 2016","The GOP’s Reality is Truel, Indeed | The Math Of Politics",0 9 | http://www.mathofpolitics.com/2016/01/25/the-patriots-are-commonly-uncommon/,1,"January 25, 2016",The Patriots Are Commonly Uncommon | The Math Of Politics,0 10 | http://www.mathofpolitics.com/2015/11/10/one-thing-leads-to-another-delaying-da-rt-standards-to-discuss-better-da-rt-standards-will-be-ironic/,1,"November 10, 2015",One Thing Leads to Another: “Delaying“ DA-RT Standards to Discuss Better DA-RT Standards Will Be Ironic | The Math Of Politics,2 11 | http://www.mathofpolitics.com/2015/11/06/responding-to-a-petition-to-nobody-or-everybody/,1,"November 6, 2015",Responding To A Petition To Nobody (Or Everybody) | The Math Of Politics,2 12 | http://www.mathofpolitics.com/2015/08/12/super-pac-bites-man/,1,"August 12, 2015",Super PAC (Bites) Man | The Math Of Politics,0 13 | http://www.mathofpolitics.com/page/2/,0,NA,NA,NA 14 | http://www.mathofpolitics.com/2015/08/05/this-thursday-at-10-fox-news-is-correct/,1,"August 5, 2015","This Thursday, At 10, FOX News Is Correct | The Math Of Politics",0 15 | http://www.mathofpolitics.com/2015/08/02/the-true-trump-card-you-cant-buy-credibility/,1,"August 2, 2015",The True Trump Card: You Can’t Buy Credibility | The Math Of Politics,0 16 | http://www.mathofpolitics.com/2015/07/30/in-comes-volatility-nonplussing-both-fairness-inequality/,1,"July 30, 2015","In Comes Volatility, Nonplussing Both Fairness & Inequality | The Math Of Politics",0 17 | http://www.mathofpolitics.com/2015/06/27/the-statistical-realities-of-measuring-segregation-its-hard-being-both-diverse-homogeneous/,1,"June 27, 2015",The Statistical Realities of Measuring Segregation: It’s Hard Being Both Diverse & Homogeneous | The Math Of Politics,0 18 | http://www.mathofpolitics.com/2015/03/19/cotton-pickin/,1,"March 19, 2015",Cotton Pickin’? | The Math Of Politics,0 19 | http://www.mathofpolitics.com/2015/03/03/how-two-peoples-rights-can-do-both-people-wrong-vaccines-anti-social-choice-theory/,1,"March 3, 2015",How Two People’s Rights Can Do Both People Wrong: Vaccines & (Anti-)Social Choice Theory | The Math Of Politics,0 20 | http://www.mathofpolitics.com/2015/02/20/default-in-our-stars-kant-ankerous-varoufakis/,1,"February 20, 2015",Default In Our Stars: Kant-ankerous Varoufakis | The Math Of Politics,0 21 | http://www.mathofpolitics.com/2014/10/29/on-the-possibility-of-an-ethical-election-experiment/,1,"October 29, 2014",On The Possibility of An Ethical Election Experiment | The Math Of Politics,0 22 | http://www.mathofpolitics.com/2014/10/28/ethics-experiments-and-election-administration/,1,"October 28, 2014","Ethics, Experiments, and Election Administration | The Math Of Politics",0 23 | http://www.mathofpolitics.com/2014/10/27/well-in-a-worst-case-scenario-your-treatment-works/,1,"October 27, 2014","Well, In a Worst Case Scenario, Your Treatment Works… | The Math Of Politics",4 24 | http://www.mathofpolitics.com/page/3/,0,NA,NA,NA 25 | http://www.mathofpolitics.com/2014/09/29/so-many-smells-so-little-time-in-defense-of-stinky-academic-writing/,1,"September 29, 2014","So Many Smells, So Little Time: In Defense of “Stinky” Academic Writing | The Math Of Politics",0 26 | http://www.mathofpolitics.com/2014/08/20/ferguson-the-racial-disconnect-on-race/,1,"August 20, 2014",#Ferguson: The Racial Disconnect On Race | The Math Of Politics,2 27 | http://www.mathofpolitics.com/2014/08/18/makes-us-stronger-the-math-of-protest-and-repression/,1,"August 18, 2014",Makes Us Stronger: The Math of Protest and Repression | The Math Of Politics,2 28 | http://www.mathofpolitics.com/2014/08/06/the-bigger-the-data-the-harder-the-theory-of-measurement/,1,"August 6, 2014","The Bigger The Data, The Harder The (Theory of) Measurement | The Math Of Politics",0 29 | http://www.mathofpolitics.com/2014/07/30/the-math-of-getting-a-job-in-political-science/,1,"July 30, 2014",The Math of Getting a Job in Political Science | The Math Of Politics,0 30 | http://www.mathofpolitics.com/2014/05/17/if-keyser-soze-ruled-america-would-we-know/,1,"May 17, 2014","If Keyser Söze Ruled America, Would We Know? | The Math Of Politics",0 31 | http://www.mathofpolitics.com/2014/04/24/how-political-science-makes-politics-make-us-less-stupid/,1,"April 24, 2014",How Political Science Makes Politics Make Us Less Stupid | The Math Of Politics,0 32 | http://www.mathofpolitics.com/2014/04/12/shining-a-little-more-light-on-transparency/,1,"April 12, 2014",Shining A Little More Light On Transparency | The Math Of Politics,0 33 | http://www.mathofpolitics.com/2014/04/10/why-separate-when-you-can-lustrate/,1,"April 10, 2014",Why Separate When You Can…Lustrate!?! | The Math Of Politics,0 34 | http://www.mathofpolitics.com/2014/04/10/how-transparency-could-harm-you-me-and-the-fomc/,1,"April 10, 2014","How Transparency Could Harm You, Me, and the FOMC | The Math Of Politics",0 35 | http://www.mathofpolitics.com/page/4/,0,NA,NA,NA 36 | http://www.mathofpolitics.com/2014/04/09/mind-the-gap-the-wages-of-aggregation-evaluation-and-conflict/,1,"April 9, 2014","Mind The Gap: The Wages of Aggregation, Evaluation, and Conflict | The Math Of Politics",0 37 | http://www.mathofpolitics.com/2014/04/08/its-better-to-fight-when-you-can-win-or-at-least-look-like-you-did/,1,"April 8, 2014","It’s Better To Fight When You Can Win, Or At Least Look Like You Did | The Math Of Politics",0 38 | http://www.mathofpolitics.com/2014/03/12/donation-discrimination-denotes-deliverance-of-democracy/,1,"March 12, 2014",Donation Discrimination Denotes Deliverance of Democracy | The Math Of Politics,0 39 | http://www.mathofpolitics.com/2014/04/08/my-ignorance-provokes-me-i-know-where-ukraine-is-and-i-still-want-to-fight/,1,"April 8, 2014",My Ignorance Provokes Me: I know Where Ukraine is and I Still Want to Fight | The Math Of Politics,2 40 | http://www.mathofpolitics.com/2014/02/08/game-theory-is-punk-and-a-flying-f-to-the-q/,1,"February 8, 2014",Game Theory is Punk | The Math Of Politics,0 41 | http://www.mathofpolitics.com/2014/02/06/speech-y-keen-or-why-nobody-worries-about-the-right-to-praise-the-government/,1,"February 6, 2014","Speech-y Keen, or Why Nobody Worries About the “Right to Praise the Government” | The Math Of Politics",1 42 | http://www.mathofpolitics.com/2014/02/04/ceiling-the-deal-quid-pro-keystone/,1,"February 4, 2014",Ceiling the Deal: Quid Pro Keystone | The Math Of Politics,0 43 | http://www.mathofpolitics.com/2014/02/02/ill-show-you-by-not-showing-up/,1,"February 2, 2014",I’ll Show You…By Not Showing Up | The Math Of Politics,0 44 | http://www.mathofpolitics.com/2014/01/31/plumbing-presidential-power-pens-phones-paperwork/,1,"January 31, 2014","Plumbing Presidential Power: Pens, Phones, & Paperwork | The Math Of Politics",0 45 | http://www.mathofpolitics.com/2014/01/30/poor-work-counting-the-working-poor/,1,"January 30, 2014",Poor Work Counting the Working Poor | The Math Of Politics,1 46 | http://www.mathofpolitics.com/page/5/,0,NA,NA,NA 47 | http://www.mathofpolitics.com/2014/01/28/what-didnt-he-say-and-how-didnt-he-say-it/,1,"January 28, 2014",What Didn’t He Say? …And How Didn’t He Say it? | The Math Of Politics,0 48 | http://www.mathofpolitics.com/2014/01/28/going-down-in-flames-to-rise-like-a-phoenix-in-the-primary/,1,"January 28, 2014",Going Down in Flames…To Rise Like A Phoenix (in the Primary) | The Math Of Politics,0 49 | http://www.mathofpolitics.com/2014/01/23/so-what-now/,1,"January 23, 2014","So, What Now? | The Math Of Politics",2 50 | http://www.mathofpolitics.com/2014/01/16/the-noted-is-always-notable/,1,"January 16, 2014",The Noted Is Always Notable | The Math Of Politics,0 51 | http://www.mathofpolitics.com/2014/01/13/youre-welcome-for-the-thankless-thanks/,1,"January 13, 2014",You’re Welcome for the Thankless Thanks | The Math Of Politics,0 52 | http://www.mathofpolitics.com/2014/01/10/ironic-quick-second-takes-on-sequential-rationality/,1,"January 10, 2014","Ironic, quick second takes on sequential rationality | The Math Of Politics",0 53 | http://www.mathofpolitics.com/2014/01/10/oh-i-thought-you-said-you-wanted-to-sell-a-bus/,1,"January 10, 2014","Oh, I Thought You Said You Wanted To Sell A Bus… | The Math Of Politics",0 54 | http://www.mathofpolitics.com/2013/12/20/cia-see-i-am-policy-relevant/,1,"December 20, 2013","CIA? See, I Am Policy Relevant | The Math Of Politics",0 55 | http://www.mathofpolitics.com/2013/12/19/the-ties-that-bind-theory/,1,"December 19, 2013",The Ties That Bind Theory | The Math Of Politics,2 56 | http://www.mathofpolitics.com/2013/12/05/mc-grammar-presents-u-shant-correct-this/,1,"December 5, 2013",MC Grammar Presents “U Shan’t Correct This” | The Math Of Politics,0 57 | http://www.mathofpolitics.com/page/6/,0,NA,NA,NA 58 | http://www.mathofpolitics.com/2013/10/25/inside-baseball-making-models-of-minds-making-models-behave/,1,"October 25, 2013","Inside Baseball: Making Models of Minds, Making Models “Behave” | The Math Of Politics",0 59 | http://www.mathofpolitics.com/2013/10/21/youre-better-than-this/,1,"October 21, 2013",You’re Better Than This | The Math Of Politics,4 60 | http://www.mathofpolitics.com/2013/10/20/let-me-confirm-your-belief-that-your-irrationality-is-rational/,1,"October 20, 2013",Let Me Confirm Your Belief That Your Irrationality Is Rational | The Math Of Politics,0 61 | http://www.mathofpolitics.com/2013/10/18/no-seriously-that-was-such-a-bad-idea-we-must-do-it-again/,1,"October 18, 2013","No, Seriously, That Was Such A Bad Idea, WE MUST DO IT AGAIN | The Math Of Politics",2 62 | http://www.mathofpolitics.com/2013/10/17/dis-spence-ing-with-the-debt-debacle/,1,"October 17, 2013",Dis-Spence-ing with the Debt Debacle | The Math Of Politics,4 63 | http://www.mathofpolitics.com/2013/10/16/boehner-in-a-manger-the-entitativity-scene-in-dc/,1,"October 16, 2013",Boehner in a Manger? The Entitativity Scene in DC | The Math Of Politics,0 64 | http://www.mathofpolitics.com/2013/10/15/my-bad-dispelling-the-implied-suspension-of-discharge/,1,"October 15, 2013",My Bad: Dispelling The Implied Suspension of Discharge | The Math Of Politics,0 65 | http://www.mathofpolitics.com/2013/10/14/legerdeboehner-or-the-rules-rule/,1,"October 14, 2013","LegerdeBoehner, or “The Rules Rule.” | The Math Of Politics",0 66 | http://www.mathofpolitics.com/2013/10/10/boehner-in-the-middle/,1,"October 10, 2013",Boehner in the Middle? | The Math Of Politics,0 67 | http://www.mathofpolitics.com/2013/10/08/why-a-clean-cr-is-a-no-boehner/,1,"October 8, 2013",Why a Clean CR is A No Boehner | The Math Of Politics,0 68 | http://www.mathofpolitics.com/page/7/,0,NA,NA,NA 69 | http://www.mathofpolitics.com/2013/10/05/why-the-house-cant-discharge-its-duties/,1,"October 5, 2013",Why The House Can’t Discharge Its Duties | The Math Of Politics,0 70 | http://www.mathofpolitics.com/2013/10/04/winning-at-all-costs-will-make-winning-costlier-than-it-needs-to-be/,1,"October 4, 2013",Winning At All Costs Will Make Winning Costlier (Than It Needs To Be) | The Math Of Politics,0 71 | http://www.mathofpolitics.com/2013/10/03/shutdown-the-inherent-tension-between-responsive-responsible-governing/,1,"October 3, 2013",SHUTDOWN: The Inherent Tension Between Responsive & Responsible Governing | The Math Of Politics,0 72 | http://www.mathofpolitics.com/2013/09/21/putting-the-come-at-me-bro-in-comity-or-boehner-is-a-painer-to-mcconnell/,1,"September 21, 2013","Putting the “Come At Me, Bro” in “Comity” or, Boehner is a Painer to McConnell | The Math Of Politics",0 73 | http://www.mathofpolitics.com/2013/09/20/the-politics-of-going-public/,1,"September 20, 2013",The Politics of Going Public | The Math Of Politics,1 74 | http://www.mathofpolitics.com/2013/09/12/damn-he-asked-us-about-damascus-or-cjt-meets-wmd/,1,"September 12, 2013","Damn, He Asked US About Damascus or, ‘CJT Meets WMD’ | The Math Of Politics",0 75 | http://www.mathofpolitics.com/2013/09/08/no-war-left-behind/,1,"September 8, 2013",No War Left Behind? | The Math Of Politics,0 76 | http://www.mathofpolitics.com/2013/09/08/a-whip-applied-twice-is-half-a-whip/,1,"September 8, 2013",A Whip Applied Twice Is Half A Whip | The Math Of Politics,0 77 | http://www.mathofpolitics.com/2013/09/06/if-you-whip-me-the-voters-will-whup-me/,1,"September 6, 2013","If You Whip Me, The Voters Will Whup Me | The Math Of Politics",4 78 | http://www.mathofpolitics.com/2013/09/05/my-research-is-kind-of-obscene-but-i-knew-it-only-when-i-blogged-it/,1,"September 5, 2013",My Research Is Kind Of Obscene…But I Knew It Only When I Blogged It. | The Math Of Politics,1 79 | http://www.mathofpolitics.com/page/8/,0,NA,NA,NA 80 | http://www.mathofpolitics.com/2013/09/03/which-comes-first-theory-or-data/,1,"September 3, 2013","Which Comes First, Theory or Data? | The Math Of Politics",0 81 | http://www.mathofpolitics.com/2013/08/16/there-is-no-networking-without-two-and-work-or-incentives-smelt-at-apsa/,1,"August 16, 2013","There is no Networking without “two” and “work” or, Incentives & Smelt at APSA! | The Math Of Politics",0 82 | http://www.mathofpolitics.com/2013/08/08/dont-panic-theory-and-empirics-are-both-alive-well-at-least-in-political-science/,1,"August 8, 2013",DON’T PANIC. Theory and Empirics Are Both Alive & Well…at least in political science. | The Math Of Politics,0 83 | http://www.mathofpolitics.com/2013/07/17/strength-numbers-is-a-weak-argument-better-than-a-strong-one/,1,"July 17, 2013",“Strength & Numbers”: Is a Weak Argument Better Than A Strong One? | The Math Of Politics,0 84 | http://www.mathofpolitics.com/2013/07/16/want-it-now-oh-well-give-it-to-you-later/,1,"July 16, 2013","Want It Now? Oh, We’ll Give It To You…Later | The Math Of Politics",0 85 | http://www.mathofpolitics.com/2013/07/15/i-would-manipulate-it-if-it-werent-so-duggan-the-gibbardish-of-measurement/,1,"July 15, 2013",I Would Manipulate It If It Weren’t So Duggan: The Gibbardish of Measurement | The Math Of Politics,0 86 | http://www.mathofpolitics.com/2013/07/12/a-byrd-in-the-hand-or-the-3-rs-of-the-senate-reid-rules-retribution/,1,"July 12, 2013","A Byrd in the Hand, or the 3 R’s of the Senate: Reid, Rules, & Retribution | The Math Of Politics",0 87 | http://www.mathofpolitics.com/2013/07/10/remuneration-of-the-nerds-or-putting-the-in-latex/,1,"July 10, 2013","Remuneration Of The Nerds, Or “Putting the $$ in LaTeX” | The Math Of Politics",0 88 | http://www.mathofpolitics.com/2013/07/08/syllogism-i-hardly-know-him-the-uneasy-wedding-of-gay-marriage-political-conservativism/,1,"July 8, 2013",“Syllogism? I Hardly Know Him!”: The Uneasy Wedding of Gay Marriage & (Political) Conservativism | The Math Of Politics,0 89 | http://www.mathofpolitics.com/2013/06/28/believe-me-when-i-say-that-i-want-to-believe-that-i-cant-believe-in-you/,1,"June 28, 2013",Believe Me When I Say That I Want To Believe That I Can’t Believe In You. | The Math Of Politics,0 90 | http://www.mathofpolitics.com/page/9/,0,NA,NA,NA 91 | http://www.mathofpolitics.com/2013/06/02/just-so-you-know-i-wont-know-the-politics-of-plausible-deniability/,1,"June 2, 2013","Just So You Know, I Won’t Know: The Politics of Plausible Deniability | The Math Of Politics",0 92 | http://www.mathofpolitics.com/2013/05/29/uninsurable-risk-adverse-selection-and-the-politics-of-scandals/,1,"May 29, 2013",Uninsurable Risk: Adverse Selection and the Politics of Scandals | The Math Of Politics,0 93 | http://www.mathofpolitics.com/2013/05/20/inside-baseball-weather-you-like-it-or-not-models-are-useful/,1,"May 20, 2013","Inside Baseball: Weather you like it or not, models are useful. | The Math Of Politics",0 94 | http://www.mathofpolitics.com/2013/05/06/the-impermissibility-of-permission-structures/,1,"May 6, 2013",The Impermissibility of Permission Structures | The Math Of Politics,0 95 | http://www.mathofpolitics.com/2013/04/22/unraveling-miranda-was-dzhokhar-told-of-the-public-safety-exception/,1,"April 22, 2013",Unraveling Miranda: Was Dzhokhar Told of the Public Safety Exception? | The Math Of Politics,0 96 | http://www.mathofpolitics.com/2013/04/18/political-issues-are-like-cookies/,1,"April 18, 2013",Political Issues are Like Cookies | The Math Of Politics,0 97 | http://www.mathofpolitics.com/2013/04/18/have-gun-will-vote/,1,"April 18, 2013","Have Gun, Will Vote | The Math Of Politics",0 98 | http://www.mathofpolitics.com/2013/04/17/inside-baseball-the-off-the-path-less-traveled/,1,"April 17, 2013",Inside Baseball: The Off-The-Path Less Traveled | The Math Of Politics,0 99 | http://www.mathofpolitics.com/2013/04/15/now-ill-show-you-mine-why-obama-budged-a-bit-on-the-budget/,1,"April 15, 2013","Now, I’ll Show You Mine: Why Obama Budged A Bit on the Budget | The Math Of Politics",0 100 | http://www.mathofpolitics.com/2013/04/02/inequality-smaller-ginis-can-fit-in-smaller-bottles/,1,"April 2, 2013",Inequality: Smaller GINIs Can Fit in Smaller Bottles | The Math Of Politics,0 101 | http://www.mathofpolitics.com/page/10/,0,NA,NA,NA 102 | http://www.mathofpolitics.com/2013/04/01/inside-baseball-uncommon-knowledge/,1,"April 1, 2013",Inside Baseball: Uncommon Knowledge | The Math Of Politics,0 103 | http://www.mathofpolitics.com/2013/03/24/the-slow-burn-of-coburn-or-get-the-hell-off-my-lawn/,1,"March 24, 2013","The Slow Burn of Coburn or, “Get The Hell Off My Lawn!” | The Math Of Politics",0 104 | http://www.mathofpolitics.com/2013/03/19/consensual-resolution/,1,"March 19, 2013",Consensual Resolution? | The Math Of Politics,0 105 | http://www.mathofpolitics.com/2013/03/17/quid-pro-status-quo-a-tale-of-two-tails/,1,"March 17, 2013",Quid Pro Status Quo: A Tale of Two Tails | The Math Of Politics,0 106 | http://www.mathofpolitics.com/2013/03/16/showdown-at-uzi-gulch-putting-the-glock-in-the-spiel/,1,"March 16, 2013",Showdown at Uzi Gulch: Putting the Glock in the Spiel | The Math Of Politics,0 107 | http://www.mathofpolitics.com/2013/03/07/greshams-law-in-the-senate-how-filibuster-reform-begot-rand-pauls-filibuster/,1,"March 7, 2013",Gresham’s Law in the Senate: How Filibuster Reform Begot Rand Paul’s Filibuster | The Math Of Politics,0 108 | http://www.mathofpolitics.com/2013/02/21/i-study-political-science-youre-welcome/,1,"February 21, 2013",I Study Political Science. You’re Welcome. | The Math Of Politics,0 109 | http://www.mathofpolitics.com/2013/02/17/immigration-reform-you-do-it-so-i-dont-have-to-really/,1,"February 17, 2013",Immigration Reform: You do it…So I Don’t Have To…Really. | The Math Of Politics,0 110 | http://www.mathofpolitics.com/2013/02/06/who-has-a-secret-list-and-flies-around-the-world-with-gifts/,1,"February 6, 2013",Who Has A Secret List and Flies Around The World With Gifts? | The Math Of Politics,0 111 | http://www.mathofpolitics.com/2013/02/02/so-optimal-you-hardly-notice/,1,"February 2, 2013",So Optimal You Hardly Notice | The Math Of Politics,0 112 | http://www.mathofpolitics.com/page/11/,0,NA,NA,NA 113 | http://www.mathofpolitics.com/2013/01/27/the-recesses-of-recess/,1,"January 27, 2013",The Recesses of Recess | The Math Of Politics,0 114 | http://www.mathofpolitics.com/2012/12/21/losing-to-win-nobody-puts-boehner-in-the-corner/,1,"December 21, 2012",Losing to Win: Nobody Puts Boehner In The Corner | The Math Of Politics,0 115 | http://www.mathofpolitics.com/2012/12/21/apocacliff-now-boehner-lost-but-does-he-really-mayan/,1,"December 21, 2012","ApocaCliff Now: Boehner “Lost,” But Does He Really Mayan? | The Math Of Politics",0 116 | http://www.mathofpolitics.com/2012/12/14/make-me-an-offer-i-cant-refuse-to-reject/,1,"December 14, 2012",Make Me an Offer I Can’t Refuse (to Reject) | The Math Of Politics,0 117 | http://www.mathofpolitics.com/2012/12/12/the-triple-ex-budget-trick-or-the-alternative-maximum-cliff/,1,"December 12, 2012","The Triple-Ex Budget Trick or, the Alternative Maximum Cliff | The Math Of Politics",0 118 | http://www.mathofpolitics.com/2012/11/21/naming-rites/,1,"November 21, 2012",Naming Rites | The Math Of Politics,0 119 | http://www.mathofpolitics.com/2012/11/16/churches-campaigns-and-taxes-the-411-on-501c3/,1,"November 16, 2012","Churches, Campaigns, and Taxes: The 411 on 501(c)(3) | The Math Of Politics",0 120 | http://www.mathofpolitics.com/2012/11/16/lets-get-fiscal-cliff/,1,"November 16, 2012","Let’s Get Fiscal, Cliff! | The Math Of Politics",0 121 | http://www.mathofpolitics.com/2012/09/10/penetrating-the-ill-logic-of-double-taxation/,1,"September 10, 2012",Penetrating the Ill Logic of Double Taxation | The Math Of Politics,0 122 | http://www.mathofpolitics.com/2012/08/06/political-antisocial-dismal-science-economics-getting-cut-next/,1,"August 6, 2012","Political, Antisocial, Dismal Science: Economics Getting Cut Next? | The Math Of Politics",0 123 | http://www.mathofpolitics.com/page/12/,0,NA,NA,NA 124 | http://www.mathofpolitics.com/2012/08/02/keeping-tract-is-income-segregation-getting-worse-in-the-us/,1,"August 2, 2012",Keeping Tract: Is Income Segregation Getting Worse in the US? | The Math Of Politics,0 125 | http://www.mathofpolitics.com/2012/07/31/vitali-statistics-measurability-issues-in-education/,1,"July 31, 2012",Vitali Statistics: Measurability Issues in Education | The Math Of Politics,0 126 | http://www.mathofpolitics.com/2012/07/29/47/,1,"July 29, 2012","But, Algebra is f(u)=n! | The Math Of Politics",0 127 | http://www.mathofpolitics.com/2012/07/25/regulatory-rithmetic/,1,"July 25, 2012",Regulatory ‘Rithmetic | The Math Of Politics,0 128 | http://www.mathofpolitics.com/2012/07/21/debits-and-credits-simple-budget-algebra/,1,"July 21, 2012",Debits and Credits: Simple Budget Algebra | The Math Of Politics,0 129 | -------------------------------------------------------------------------------- /in-classMaterial/day5/scrape.py: -------------------------------------------------------------------------------- 1 | from bs4 import BeautifulSoup 2 | import urllib 3 | import random 4 | import time 5 | import os 6 | import re 7 | 8 | # Open a web page 9 | web_address='https://case.ku.edu.tr/en/academics/international-relations/faculty/' 10 | web_page = urllib.request.urlopen(web_address) 11 | 12 | # Parse it 13 | soup = BeautifulSoup(web_page.read()) 14 | soup.prettify() 15 | 16 | # Find all cases of a certain tag 17 | soup.find_all('a') 18 | 19 | 20 | # Get the attributes 21 | my_a_tag=soup.find_all('a')[2] 22 | re.sub(r'<[^>]+>', '', str(my_a_tag)) #remove tags 23 | my_a_tag.attrs #Gives a dictionary with the attributes 24 | my_a_tag.attrs.keys() 25 | my_a_tag['href'] 26 | 27 | # Refine search by using attributes 28 | soup.find_all('span', {'class':'name'}) 29 | 30 | # There may be tags within tags 31 | mysection=soup.find_all('div')[0] 32 | mysection.a #Gives the 'a' tag within the 'div' tag 33 | mysection.find_all('a') #Gives the list of all 'a' tags within the 'div' tag 34 | mysection.get_text() 35 | 36 | 37 | # Creating a tree of objects 38 | 39 | mysection.contents #Gives a list of all children 40 | mysection.children #Creates an iterator for children 41 | 42 | for child in mysection.children: 43 | print(child) 44 | 45 | mysection.descendants #Creates an iterator for children, grandchildren, etc. 46 | 47 | # Other methods to check family: 48 | # parent 49 | # parents 50 | # next_siblings 51 | # previous_siblings 52 | 53 | # Beautiful Soup documentation 54 | # http://www.crummy.com/software/BeautifulSoup/bs4/doc/ 55 | 56 | # Function to save a web page 57 | 58 | def download_page(address,path,filename,wait=5): 59 | time.sleep(random.uniform(0,wait)) 60 | page = urllib.request.urlopen(address) 61 | page_content = page.read() 62 | if os.path.exists(path+filename)==False: 63 | with open(path+filename, 'wb') as p_html: 64 | p_html.write(page_content) 65 | else: 66 | print("Can't overwrite file" + filename) 67 | 68 | download_page('http://www.crummy.com/software/BeautifulSoup/bs4/doc/', '', 'Docket05-1.html',0) 69 | 70 | #You can also parse a page that is saved on your computer 71 | with open('Docket05-1.html') as f: 72 | #We can read files in chunks 73 | myfile = f.read() 74 | 75 | soup = BeautifulSoup(myfile) 76 | soup.prettify() 77 | 78 | #Scrape the names and email addresses of INTL faculty and save the result as a csv 79 | web_address='https://case.ku.edu.tr/en/academics/international-relations/faculty/' 80 | web_page = urllib.request.urlopen(web_address) 81 | 82 | # Parse it 83 | soup = BeautifulSoup(web_page.read()) 84 | soup.prettify() 85 | 86 | namesTags = soup.find_all('span', {'class':'name'}) 87 | names = [] 88 | for name in namesTags: 89 | names.append(name.get_text()) 90 | 91 | emailsTags = soup.find_all('a', {'class':'message'}) 92 | emails = [] 93 | for email in emailsTags: 94 | emails.append(re.sub('mailto:', '', email['href'])) 95 | 96 | with open('faculty.csv', 'w') as f: 97 | my_writer = csv.DictWriter(f, fieldnames=("Name", "Email")) 98 | my_writer.writeheader() 99 | for i in range(len(names)): 100 | my_writer.writerow({"Name":names[i], "Email":emails[i]}) 101 | 102 | -------------------------------------------------------------------------------- /in-classMaterial/day5/test.csv: -------------------------------------------------------------------------------- 1 | 1,0 2 | 2,1 3 | 3,2 4 | 4,3 5 | 5,4 6 | 6,5 7 | 7,6 8 | 8,7 9 | 9,8 10 | 10,9 11 | 11,10 12 | 12,11 13 | 13,12 14 | 14,13 15 | 15,14 16 | 16,15 17 | 17,16 18 | 18,17 19 | 19,18 20 | 20,19 21 | 21,20 22 | 22,21 23 | 23,22 24 | 24,23 25 | 25,24 26 | 26,25 27 | 27,26 28 | 28,27 29 | 29,28 30 | 30,29 31 | 31,30 32 | 32,31 33 | 33,32 34 | 34,33 35 | 35,34 36 | 36,35 37 | 37,36 38 | 38,37 39 | 39,38 40 | 40,39 41 | 41,40 42 | 42,41 43 | 43,42 44 | 44,43 45 | 45,44 46 | 46,45 47 | 47,46 48 | 48,47 49 | 49,48 50 | 50,49 51 | 51,50 52 | 52,51 53 | 53,52 54 | 54,53 55 | 55,54 56 | 56,55 57 | 57,56 58 | 58,57 59 | 59,58 60 | 60,59 61 | 61,60 62 | 62,61 63 | 63,62 64 | 64,63 65 | 65,64 66 | 66,65 67 | 67,66 68 | 68,67 69 | 69,68 70 | 70,69 71 | 71,70 72 | 72,71 73 | 73,72 74 | 74,73 75 | 75,74 76 | 76,75 77 | 77,76 78 | 78,77 79 | 79,78 80 | 80,79 81 | 81,80 82 | 82,81 83 | 83,82 84 | 84,83 85 | 85,84 86 | 86,85 87 | 87,86 88 | 88,87 89 | 89,88 90 | 90,89 91 | 91,90 92 | 92,91 93 | 93,92 94 | 94,93 95 | 95,94 96 | 96,95 97 | 97,96 98 | 98,97 99 | 99,98 100 | -------------------------------------------------------------------------------- /in-classMaterial/day5/test1.csv: -------------------------------------------------------------------------------- 1 | 1,0 2 | 2,1 3 | 3,2 4 | 4,3 5 | 5,4 6 | 6,5 7 | 7,6 8 | 8,7 9 | 9,8 10 | 10,9 11 | 11,10 12 | 12,11 13 | 13,12 14 | 14,13 15 | 15,14 16 | 16,15 17 | 17,16 18 | 18,17 19 | 19,18 20 | 20,19 21 | 21,20 22 | 22,21 23 | 23,22 24 | 24,23 25 | 25,24 26 | 26,25 27 | 27,26 28 | 28,27 29 | 29,28 30 | 30,29 31 | 31,30 32 | 32,31 33 | 33,32 34 | 34,33 35 | 35,34 36 | 36,35 37 | 37,36 38 | 38,37 39 | 39,38 40 | 40,39 41 | 41,40 42 | 42,41 43 | 43,42 44 | 44,43 45 | 45,44 46 | 46,45 47 | 47,46 48 | 48,47 49 | 49,48 50 | 50,49 51 | 51,50 52 | 52,51 53 | 53,52 54 | 54,53 55 | 55,54 56 | 56,55 57 | 57,56 58 | 58,57 59 | 59,58 60 | 60,59 61 | 61,60 62 | 62,61 63 | 63,62 64 | 64,63 65 | 65,64 66 | 66,65 67 | 67,66 68 | 68,67 69 | 69,68 70 | 70,69 71 | 71,70 72 | 72,71 73 | 73,72 74 | 74,73 75 | 75,74 76 | 76,75 77 | 77,76 78 | 78,77 79 | 79,78 80 | 80,79 81 | 81,80 82 | 82,81 83 | 83,82 84 | 84,83 85 | 85,84 86 | 86,85 87 | 87,86 88 | 88,87 89 | 89,88 90 | 90,89 91 | 91,90 92 | 92,91 93 | 93,92 94 | 94,93 95 | 95,94 96 | 96,95 97 | 97,96 98 | 98,97 99 | 99,98 100 | -------------------------------------------------------------------------------- /in-classMaterial/day5/test_with_fields.csv: -------------------------------------------------------------------------------- 1 | A,B 2 | 0,1 3 | 1,2 4 | 2,3 5 | 3,4 6 | 4,5 7 | 5,6 8 | 6,7 9 | 7,8 10 | 8,9 11 | 9,10 12 | 10,11 13 | 11,12 14 | 12,13 15 | 13,14 16 | 14,15 17 | 15,16 18 | 16,17 19 | 17,18 20 | 18,19 21 | 19,20 22 | 20,21 23 | 21,22 24 | 22,23 25 | 23,24 26 | 24,25 27 | 25,26 28 | 26,27 29 | 27,28 30 | 28,29 31 | 29,30 32 | 30,31 33 | 31,32 34 | 32,33 35 | 33,34 36 | 34,35 37 | 35,36 38 | 36,37 39 | 37,38 40 | 38,39 41 | 39,40 42 | 40,41 43 | 41,42 44 | 42,43 45 | 43,44 46 | 44,45 47 | 45,46 48 | 46,47 49 | 47,48 50 | 48,49 51 | 49,50 52 | 50,51 53 | 51,52 54 | 52,53 55 | 53,54 56 | 54,55 57 | 55,56 58 | 56,57 59 | 57,58 60 | 58,59 61 | 59,60 62 | 60,61 63 | 61,62 64 | 62,63 65 | 63,64 66 | 64,65 67 | 65,66 68 | 66,67 69 | 67,68 70 | 68,69 71 | 69,70 72 | 70,71 73 | 71,72 74 | 72,73 75 | 73,74 76 | 74,75 77 | 75,76 78 | 76,77 79 | 77,78 80 | 78,79 81 | 79,80 82 | 80,81 83 | 81,82 84 | 82,83 85 | 83,84 86 | 84,85 87 | 85,86 88 | 86,87 89 | 87,88 90 | 88,89 91 | 89,90 92 | 90,91 93 | 91,92 94 | 92,93 95 | 93,94 96 | 94,95 97 | 95,96 98 | 96,97 99 | 97,98 100 | 98,99 101 | -------------------------------------------------------------------------------- /in-classMaterial/day5/urlparsing.py: -------------------------------------------------------------------------------- 1 | from urllib import * 2 | 3 | url1 = urllib.parse.urljoin("http://www.wustl.edu", "bob/test.html") 4 | url2 = urllib.parse.urljoin("http://www.wustl.edu", "/") 5 | url3 = urllib.parse.urljoin("http://www.wustl.edu", "http://www.cnn.com") 6 | url4 = urllib.parse.urljoin("http://www.wustl.edu", "http://www.cnn.com/test.html") 7 | 8 | for url in [url1, url2, url3, url4]: 9 | p = urllib.parse.urlsplit(url) 10 | print("{0}://{1}{2}: {3}".format(p.scheme, p.hostname, p.path, "is wustl" if (p.hostname == "www.wustl.edu") else "is not wustl")) 11 | 12 | #go to a webpage and extract all links. then filter which ones are of the same host 13 | web_address='https://case.ku.edu.tr/en/academics/international-relations/faculty/' 14 | web_page = urllib.request.urlopen(web_address) 15 | 16 | # Parse it 17 | soup = BeautifulSoup(web_page.read()) 18 | soup.prettify() 19 | 20 | linksTags = soup.find_all('a') 21 | links = [] 22 | for link in linksTags: 23 | links.append(link['href']) 24 | 25 | for url in links: 26 | p = urllib.parse.urlsplit(url) 27 | print("{0}://{1}{2}: {3}".format(p.scheme, p.hostname, p.path, "is KU" if (p.hostname == "www.ku.edu.tr") else "is not KU")) 28 | 29 | 30 | -------------------------------------------------------------------------------- /in-classMaterial/day5/webcrawler.py: -------------------------------------------------------------------------------- 1 | from bs4 import BeautifulSoup 2 | import csv 3 | from nltk.util import clean_html 4 | import urllib 5 | import re 6 | 7 | def webcrawler(csvwriter, page_to_scrape = 'http://www.mathofpolitics.com'): 8 | webpage = urllib.request.urlopen(page_to_scrape) 9 | soup = BeautifulSoup(webpage.read(), 'lxml') 10 | soup.prettify() 11 | links=[] 12 | for link in soup.findAll('a'): 13 | try: 14 | if link['rel']==['bookmark']: #if link is a bookmark, points to a blog post 15 | links.append(link['href']) 16 | except KeyError: 17 | pass 18 | links2 = [] 19 | for link in links: #get rid of duplicates 20 | if link not in links2: 21 | links2.append(link) 22 | csvwriter.writerow([page_to_scrape, 0, 'NA', 'NA', 'NA']) #if there are bookmark links on the page, it is not a post page, so all entries are NA 23 | for link in links2: 24 | getInfo(csvwriter, str(link)) #get the info for each blog post 25 | prev_div = soup.findAll('div', attrs = {'class':'nav-previous'})[0] #checks for older posts link on nonblog post pages 26 | if prev_div.findAll('a'): #if it contains a link 27 | webcrawler(csvwriter, str(prev_div.findAll('a')[0]['href'])) #recursively run this function with older post link 28 | 29 | def getInfo(csvwriter, page_to_scrape): 30 | webpage = urllib.request.urlopen(page_to_scrape) 31 | soup = BeautifulSoup(webpage.read(), 'lxml') 32 | soup.prettify() 33 | date = soup.findAll('time', attrs={'class':'entry-date'})[0] #find time 34 | date = re.sub(r'<[^>]+>', '', str(date)) #remove tags 35 | title = soup.findAll('title')[0] #find title of blog post 36 | title = re.sub(r'<[^>]+>', '', str(title)) #remove tags 37 | comment_count = len(soup.findAll('div', attrs={'class':'comment-content'})) #counts number of comments - all div of class comment-content 38 | csvwriter.writerow([page_to_scrape, 1, date, title, comment_count]) #add row 39 | 40 | headers = ["url", "is_post", "publish_date", "post_title", "comment_count"] #header 41 | filename = "mathofpolitics.csv" 42 | readFile = open(filename, "w") 43 | csvwriter = csv.writer(readFile) 44 | csvwriter.writerow(headers) 45 | webcrawler(csvwriter) 46 | readFile.flush() 47 | readFile.close() 48 | 49 | -------------------------------------------------------------------------------- /in-classMaterial/day5/whitehouse-petitions.csv: -------------------------------------------------------------------------------- 1 | Summary,Signatures 2 | -------------------------------------------------------------------------------- /in-classMaterial/day5/whitehouse.py: -------------------------------------------------------------------------------- 1 | # Scraper to collect petition info from petitions.whitehouse.gov 2 | 3 | from bs4 import BeautifulSoup 4 | import csv 5 | from nltk.util import clean_html 6 | import urllib 7 | import re 8 | 9 | # What page? 10 | page_to_scrape = 'https://petitions.whitehouse.gov/' 11 | 12 | # What info do we want? 13 | headers = ["Summary", "Signatures"] 14 | 15 | # Where do we save info? 16 | filename = "whitehouse-petitions.csv" 17 | readFile = open(filename, "w") 18 | csvwriter = csv.writer(readFile) 19 | csvwriter.writerow(headers) 20 | 21 | # Open webpage 22 | webpage = urllib.request.urlopen(page_to_scrape) 23 | 24 | # Parse it 25 | soup = BeautifulSoup(webpage.read()) 26 | soup.prettify() 27 | 28 | # Extract petitions on page 29 | petitions = soup.findAll("a", href=re.compile('^/petition')) 30 | 31 | print(len(petitions)) 32 | for petition in petitions: 33 | p = BeautifulSoup.get_text(petition) 34 | print(p) 35 | 36 | pets = [] 37 | for petition in petitions: 38 | p = BeautifulSoup.get_text(petition) 39 | if 'Sign It' not in p and 'Create a Petition' not in p and 'Load More' not in p: pets.append(p) 40 | 41 | #signatures 42 | #html tag: 43 | #364,223 44 | signatures = soup.findAll("span", attrs={'class':'signatures-number'}) 45 | print(len(signatures)) 46 | sigs = [] 47 | for signature in signatures: 48 | s = BeautifulSoup.get_text(signature) 49 | sigs.append(s) 50 | 51 | for i in range(20): 52 | csvwriter.writerow([pets[i], sigs[i]]) 53 | 54 | readFile.close() 55 | 56 | #change this file to loop through all pages and scrape every petition (hint: look at the url of the page when you click load more) 57 | #then add a third column for goal, and a fourth for percentage of goal reached 58 | 59 | 60 | 61 | 62 | 63 | # Scraper to collect petition info from petitions.whitehouse.gov 64 | 65 | from bs4 import BeautifulSoup 66 | import csv 67 | from nltk.util import clean_html 68 | import urllib 69 | import re 70 | 71 | # What page? 72 | page_to_scrape = 'https://petitions.whitehouse.gov/' 73 | 74 | # What info do we want? 75 | headers = ["Summary", "Signatures", "Goal", "Prop"] 76 | 77 | # Where do we save info? 78 | filename = "whitehouse-petitions.csv" 79 | readFile = open(filename, "w") 80 | csvwriter = csv.writer(readFile) 81 | csvwriter.writerow(headers) 82 | 83 | # Open webpage 84 | webpage = urllib.request.urlopen(page_to_scrape) 85 | 86 | # Parse it 87 | soup = BeautifulSoup(webpage.read()) 88 | soup.prettify() 89 | 90 | # Extract petitions on page 91 | petitions = soup.findAll("a", href=re.compile('^/petition')) 92 | 93 | print(len(petitions)) 94 | for petition in petitions: 95 | p = BeautifulSoup.get_text(petition) 96 | print(p) 97 | 98 | pets = [] 99 | for petition in petitions: 100 | p = BeautifulSoup.get_text(petition) 101 | if 'Sign It' not in p and 'Create a Petition' not in p and 'Load More' not in p: pets.append(p) 102 | 103 | #signatures 104 | #html tag: 105 | #364,223 106 | #
100,000 107 | signatures = soup.findAll("span", attrs={'class':'signatures-number'}) 108 | goals = soup.findAll("div", attrs={'class':'goal-text-container'}) 109 | print(len(signatures)) 110 | print(len(goals)) 111 | sigs = [] 112 | gls = [] 113 | props = [] 114 | for i in range(len(signatures)): 115 | s = BeautifulSoup.get_text(signatures[i]) 116 | sigs.append(s) 117 | g = BeautifulSoup.get_text(goals[i]) 118 | g = re.sub(',', '', g) 119 | g = re.sub('\ngoal\n', '', g) 120 | gls.append(g) 121 | props.append(float(re.sub(',' , '', s))/float(g)) 122 | 123 | i = 1 124 | while True: 125 | try: 126 | new_webpage = page_to_scrape + '?page=' + str(i) 127 | #do above 128 | i += 1 129 | except: break 130 | 131 | for i in range(len(sigs): 132 | csvwriter.writerow([pets[i], sigs[i], gls[i], props[i]) 133 | 134 | readFile.close() 135 | 136 | -------------------------------------------------------------------------------- /in-classMaterial/day5/writefile.txt: -------------------------------------------------------------------------------- 1 | Hi guys.Does this go on the second line?abc 2 | I got appended! -------------------------------------------------------------------------------- /in-classMaterial/day6/geo.py: -------------------------------------------------------------------------------- 1 | from geopy.geocoders import Nominatim 2 | geolocator = Nominatim(user_agent = 'dcarlson@ku.edu.tr') #use your email address 3 | location = geolocator.geocode('Washington, DC') 4 | print(location.address) 5 | print((location.latitude, location.longitude)) 6 | 7 | location2 = geolocator.geocode('Mexico City') 8 | print(location2.address) 9 | print((location2.latitude, location2.longitude)) 10 | 11 | #distance between capitals 12 | from math import radians, sin, cos, acos 13 | 14 | def distance(loc1, loc2): 15 | return 6371.01 * acos(sin(radians(loc1.latitude))*sin(radians(loc2.latitude)) + cos(radians(loc1.latitude))*cos(radians(loc2.latitude))*cos(radians(loc1.longitude) - radians(loc2.longitude))) 16 | 17 | distance(location, location2) 18 | 19 | #TODO: Create a distance matrix of 5 capitals 20 | 21 | locations = [] 22 | locations.append(geolocator.geocode('Washington, DC')) 23 | locations.append(geolocator.geocode('Mexico City')) 24 | locations.append(geolocator.geocode('Sarajevo')) 25 | locations.append(geolocator.geocode('Ankara')) 26 | locations.append(geolocator.geocode('Beijing')) 27 | 28 | Matrix = [[0 for x in range(5)] for y in range(5)] 29 | 30 | for x in range(5): 31 | for y in range(5): 32 | Matrix[x][y] = distance(locations[x], locations[y]) 33 | 34 | 35 | -------------------------------------------------------------------------------- /in-classMaterial/day6/tweepy.py: -------------------------------------------------------------------------------- 1 | import tweepy 2 | auth = tweepy.OAuthHandler('your consumer key', '') 3 | auth.set_access_token('your consumer secret', '') 4 | api = tweepy.API(auth) 5 | 6 | #See rate limit 7 | api.rate_limit_status() 8 | 9 | 10 | 11 | #Get some users 12 | mike_ward = api.get_user('3876') 13 | 14 | #How many favorites does he have? 15 | mike_ward.favourites_count 16 | 17 | #Who does Mike follow? 18 | mikes_friends = api.friends(id=mike_ward.screen_name) 19 | for f in mikes_friends: 20 | #Note I am handling UTF encoded strings so I convert them to ASCII-compatible for macs 21 | print("{0}".format(f.screen_name.encode('ascii', 'ignore'))) 22 | 23 | mikes_friends = api.friends(id=mike_ward.screen_name) 24 | for f in mikes_friends: 25 | #Note I am handling UTF encoded strings for linux 26 | print("{0}".format(f.screen_name.encode('utf', 'ignore'))) 27 | 28 | 29 | #or get info from a screen name 30 | gary_king = api.get_user('kinggary') 31 | gary_friends = api.friends(id=gary_king.screen_name) 32 | for f in gary_friends: 33 | #Note I am handling UTF encoded strings so I convert them to ASCII-compatible for macs 34 | print("{0}".format(f.screen_name.encode('ascii', 'ignore'))) 35 | 36 | 37 | import time 38 | from datetime import timedelta 39 | 40 | followers = api.followers_ids('davidgcarlson') # Extract IDs for my followers. 41 | followers_count = 0 # Creating baseline of 0 followers. 42 | i=0 43 | while i followers_count: 47 | followers_count = user.followers_count 48 | most_followed = str(user.name) 49 | i+=1 50 | except: time.sleep(.25) # Makes request every 0.25 seconds. Should we hit the limit, waits 0.25 before making another request. Permits for loop to remain active until limit is reset. 51 | 52 | 53 | followed = api.friends_ids('mcdickenson') # Extract IDs for those users Matt is following. 54 | i = 0 55 | max_tweets = 0 # Creating baseline for number of tweets. 56 | while i inches 398 | inches.shape 399 | plt.hist(inches, 40); 400 | 401 | # booleans 402 | x = np.array([1, 2, 3, 4, 5]) 403 | x < 3 # less than 404 | x > 3 # greater than 405 | x <= 3 406 | x == 3 407 | x != 3 408 | x >= 3 409 | (2 * x) == (x ** 2) 410 | 411 | rng = np.random.RandomState(0) 412 | x = rng.randint(10, size=(3, 4)) 413 | x 414 | x < 6 415 | np.count_nonzero(x < 6) 416 | np.sum(x < 6) 417 | # how many values less than 6 in each row? 418 | np.sum(x < 6, axis=1) 419 | # are there any values greater than 8? 420 | np.any(x > 8) 421 | # are there any values less than zero? 422 | np.any(x < 0) 423 | # are all values less than 10? 424 | np.all(x < 10) 425 | # are all values equal to 6? 426 | np.all(x == 6) 427 | # are all values in each row less than 8? 428 | np.all(x < 8, axis=1) 429 | 430 | np.sum((inches > 0.5) & (inches < 1)) 431 | np.sum(~( (inches <= 0.5) | (inches >= 1) )) 432 | 433 | print("Number days without rain:", np.sum(inches == 0)) 434 | print("Number days with rain:", np.sum(inches != 0)) 435 | print("Days with more than 0.5 inches:", np.sum(inches > 0.5)) 436 | print("Rainy days with < 0.2 inches :", np.sum((inches > 0) & (inches < 0.2))) 437 | 438 | 439 | x[x < 5] 440 | # construct a mask of all rainy days 441 | rainy = (inches > 0) 442 | # construct a mask of all summer days (June 21st is the 172nd day) 443 | summer = (np.arange(365) - 172 < 90) & (np.arange(365) - 172 > 0) 444 | print("Median precip on rainy days in 2014 (inches):", np.median(inches[rainy])) 445 | print("Median precip on summer days in 2014 (inches): ", np.median(inches[summer])) 446 | print("Maximum precip on summer days in 2014 (inches): ", np.max(inches[summer])) 447 | print("Median precip on non-summer rainy days (inches):", np.median(inches[rainy & ~summer])) 448 | 449 | rand = np.random.RandomState(42) 450 | x = rand.randint(100, size=10) 451 | print(x) 452 | [x[3], x[7], x[2]] 453 | ind = [3, 7, 2] 454 | x[ind] 455 | ind = np.array([[3, 7], [4, 5]]) 456 | x[ind] 457 | 458 | X = np.arange(12).reshape((3, 4)) 459 | X 460 | row = np.array([0, 1, 2]) 461 | col = np.array([2, 1, 3]) 462 | X[row, col] 463 | #broadcasting indices 464 | X[row[:, np.newaxis], col] 465 | 466 | X[2, [2, 0, 1]] 467 | X[1:, [2, 0, 1]] 468 | mask = np.array([1, 0, 1, 0], dtype=bool) 469 | X[row[:, np.newaxis], mask] 470 | 471 | mean = [0, 0] 472 | cov = [[1, 2], 473 | [2, 5]] 474 | X = random.multivariate_normal(mean, cov, 100) 475 | X.shape 476 | 477 | plt.scatter(X[:, 0], X[:, 1]); 478 | 479 | indices = np.random.choice(X.shape[0], 20, replace=False) 480 | indices 481 | selection = X[indices] 482 | selection.shape 483 | plt.scatter(X[:, 0], X[:, 1], alpha=0.3) 484 | plt.scatter(selection[:, 0], selection[:, 1]); 485 | 486 | 487 | 488 | x = np.zeros(10) 489 | x[[0, 0]] = [4, 6] 490 | print(x) 491 | 492 | i = [2, 3, 3, 4, 4, 4] 493 | x 494 | 495 | x = np.zeros(10) 496 | np.add.at(x, i, 1) 497 | print(x) 498 | 499 | x = np.array([2, 1, 4, 3, 5]) 500 | np.sort(x) 501 | 502 | x = np.array([2, 1, 4, 3, 5]) 503 | i = np.argsort(x) 504 | print(i) 505 | x[i] 506 | 507 | rand = np.random.RandomState(42) 508 | X = rand.randint(0, 10, (4, 6)) 509 | print(X) 510 | 511 | # sort each column of X 512 | np.sort(X, axis=0) 513 | 514 | # sort each row of X 515 | np.sort(X, axis=1) 516 | 517 | 518 | x = np.array([7, 2, 3, 1, 6, 5, 4]) 519 | np.partition(x, 3) 520 | 521 | np.partition(X, 2, axis=1) 522 | 523 | #nearest neighbor 524 | 525 | X = random.rand(10, 2) 526 | plt.scatter(X[:, 0], X[:, 1], s=100); 527 | dist_sq = np.sum((X[:,np.newaxis,:] - X[np.newaxis,:,:]) ** 2, axis=-1) 528 | dist_sq 529 | nearest = np.argsort(dist_sq, axis=1) 530 | print(nearest) 531 | K = 2 532 | nearest_partition = np.argpartition(dist_sq, K + 1, axis=1) 533 | 534 | plt.scatter(X[:, 0], X[:, 1], s=100) 535 | # draw lines from each point to its two nearest neighbors 536 | K = 2 537 | for i in range(X.shape[0]): 538 | for j in nearest_partition[i, :K+1]: 539 | # plot a line from X[i] to X[j] 540 | # use some zip magic to make it happen: 541 | plt.plot(*zip(X[j], X[i]), color='black') 542 | 543 | 544 | name = ['Alice', 'Bob', 'Cathy', 'Doug'] 545 | age = [25, 45, 37, 19] 546 | weight = [55.0, 85.5, 68.0, 61.5] 547 | # Use a compound data type for structured arrays 548 | data = np.zeros(4, dtype={'names':('name', 'age', 'weight'), 'formats':('U10', 'i4', 'f8')}) #unicode, int, float 549 | print(data.dtype) 550 | data['name'] = name 551 | data['age'] = age 552 | data['weight'] = weight 553 | print(data) 554 | # Get all names 555 | data['name'] 556 | # Get first row of data 557 | data[0] 558 | # Get the name from the last row 559 | data[-1]['name'] 560 | # Get names where age is under 30 561 | data[data['age'] < 30]['name'] 562 | 563 | # transpose 564 | 565 | a = np.array([1,2,3,4]) 566 | a.T 567 | a.reshape(1,4).T 568 | 569 | M = np.array(np.arange(16)).reshape(4,4) 570 | M 571 | M.T 572 | 573 | # matrix multiplication 574 | 575 | a @ M 576 | M @ a 577 | M @ a.reshape(4,1) 578 | a @ M 579 | 580 | # inverse 581 | 582 | np.linalg.inv(M) 583 | 584 | np.linalg.inv(M.T @ M) #singular 585 | 586 | X = np.random.random((15, 3)) 587 | X.T @ X 588 | np.linalg.inv(X.T @ X) 589 | 590 | # linear regression 591 | 592 | y = np.random.random((15,1)) 593 | b = np.linalg.inv(X.T @ X) @ X.T @ y 594 | 595 | 596 | 597 | # empty filled with NaN 598 | 599 | p = np.empty((4,4)) 600 | p 601 | p.fill(np.nan) 602 | p 603 | 604 | 605 | #TODO: Answer the following questions (solutions: https://www.machinelearningplus.com/python/101-numpy-exercises-python/ continue on the site if you finish) 606 | 607 | #Create a 3×3 numpy array of all True’s 608 | 609 | 610 | #Extract all odd numbers from arr 611 | #Input: 612 | 613 | #arr = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) 614 | 615 | #Desired output: 616 | 617 | ##> array([1, 3, 5, 7, 9]) 618 | 619 | 620 | #Replace all odd numbers in arr with -1 621 | 622 | 623 | #Replace all odd numbers in arr with -1 without changing arr 624 | 625 | 626 | #Convert a 1D array to a 2D array with 2 rows 627 | 628 | 629 | #Stack arrays a and b vertically 630 | #Input 631 | 632 | #a = np.arange(10).reshape(2,-1) 633 | #b = np.repeat(1, 10).reshape(2,-1) 634 | 635 | #Desired Output: 636 | 637 | #> array([[0, 1, 2, 3, 4], 638 | #> [5, 6, 7, 8, 9], 639 | #> [1, 1, 1, 1, 1], 640 | #> [1, 1, 1, 1, 1]]) 641 | 642 | 643 | #Stack the arrays a and b horizontally. 644 | 645 | 646 | #Create the following pattern without hardcoding. Use only numpy functions and the below input array a. 647 | #Input: 648 | 649 | #a = np.array([1,2,3])` 650 | 651 | #Desired Output: 652 | 653 | #> array([1, 1, 1, 2, 2, 2, 3, 3, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3]) 654 | 655 | 656 | #Get the common items between a and b 657 | #Input: 658 | 659 | #a = np.array([1,2,3,2,3,4,3,4,5,6]) 660 | #b = np.array([7,2,10,2,7,4,9,4,9,8]) 661 | 662 | #Desired Output: 663 | 664 | #array([2, 4]) 665 | 666 | 667 | #From array a remove all items present in array b 668 | #Input: 669 | 670 | #a = np.array([1,2,3,4,5]) 671 | #b = np.array([5,6,7,8,9]) 672 | 673 | #Desired Output: 674 | 675 | #array([1,2,3,4]) 676 | 677 | 678 | #Get the positions where elements of a and b match 679 | #Input: 680 | 681 | #a = np.array([1,2,3,2,3,4,3,4,5,6]) 682 | #b = np.array([7,2,10,2,7,4,9,4,9,8]) 683 | 684 | #Desired Output: 685 | 686 | #> (array([1, 3, 5, 7]),) 687 | 688 | 689 | #Get all items between 5 and 10 from a. 690 | #Input: 691 | 692 | #a = np.array([2, 6, 1, 9, 10, 3, 27]) 693 | 694 | #Desired Output: 695 | 696 | #(array([6, 9, 10]),) 697 | 698 | 699 | 700 | 701 | -------------------------------------------------------------------------------- /in-classMaterial/day7/president_heights.csv: -------------------------------------------------------------------------------- 1 | order,name,height(cm) 2 | 1,George Washington,189 3 | 2,John Adams,170 4 | 3,Thomas Jefferson,189 5 | 4,James Madison,163 6 | 5,James Monroe,183 7 | 6,John Quincy Adams,171 8 | 7,Andrew Jackson,185 9 | 8,Martin Van Buren,168 10 | 9,William Henry Harrison,173 11 | 10,John Tyler,183 12 | 11,James K. Polk,173 13 | 12,Zachary Taylor,173 14 | 13,Millard Fillmore,175 15 | 14,Franklin Pierce,178 16 | 15,James Buchanan,183 17 | 16,Abraham Lincoln,193 18 | 17,Andrew Johnson,178 19 | 18,Ulysses S. Grant,173 20 | 19,Rutherford B. Hayes,174 21 | 20,James A. Garfield,183 22 | 21,Chester A. Arthur,183 23 | 23,Benjamin Harrison,168 24 | 25,William McKinley,170 25 | 26,Theodore Roosevelt,178 26 | 27,William Howard Taft,182 27 | 28,Woodrow Wilson,180 28 | 29,Warren G. Harding,183 29 | 30,Calvin Coolidge,178 30 | 31,Herbert Hoover,182 31 | 32,Franklin D. Roosevelt,188 32 | 33,Harry S. Truman,175 33 | 34,Dwight D. Eisenhower,179 34 | 35,John F. Kennedy,183 35 | 36,Lyndon B. Johnson,193 36 | 37,Richard Nixon,182 37 | 38,Gerald Ford,183 38 | 39,Jimmy Carter,177 39 | 40,Ronald Reagan,185 40 | 41,George H. W. Bush,188 41 | 42,Bill Clinton,188 42 | 43,George W. Bush,182 43 | 44,Barack Obama,185 44 | -------------------------------------------------------------------------------- /in-classMaterial/day8/state-abbrevs.csv: -------------------------------------------------------------------------------- 1 | "state","abbreviation" 2 | "Alabama","AL" 3 | "Alaska","AK" 4 | "Arizona","AZ" 5 | "Arkansas","AR" 6 | "California","CA" 7 | "Colorado","CO" 8 | "Connecticut","CT" 9 | "Delaware","DE" 10 | "District of Columbia","DC" 11 | "Florida","FL" 12 | "Georgia","GA" 13 | "Hawaii","HI" 14 | "Idaho","ID" 15 | "Illinois","IL" 16 | "Indiana","IN" 17 | "Iowa","IA" 18 | "Kansas","KS" 19 | "Kentucky","KY" 20 | "Louisiana","LA" 21 | "Maine","ME" 22 | "Montana","MT" 23 | "Nebraska","NE" 24 | "Nevada","NV" 25 | "New Hampshire","NH" 26 | "New Jersey","NJ" 27 | "New Mexico","NM" 28 | "New York","NY" 29 | "North Carolina","NC" 30 | "North Dakota","ND" 31 | "Ohio","OH" 32 | "Oklahoma","OK" 33 | "Oregon","OR" 34 | "Maryland","MD" 35 | "Massachusetts","MA" 36 | "Michigan","MI" 37 | "Minnesota","MN" 38 | "Mississippi","MS" 39 | "Missouri","MO" 40 | "Pennsylvania","PA" 41 | "Rhode Island","RI" 42 | "South Carolina","SC" 43 | "South Dakota","SD" 44 | "Tennessee","TN" 45 | "Texas","TX" 46 | "Utah","UT" 47 | "Vermont","VT" 48 | "Virginia","VA" 49 | "Washington","WA" 50 | "West Virginia","WV" 51 | "Wisconsin","WI" 52 | "Wyoming","WY" -------------------------------------------------------------------------------- /in-classMaterial/day8/state-areas.csv: -------------------------------------------------------------------------------- 1 | state,area (sq. mi) 2 | Alabama,52423 3 | Alaska,656425 4 | Arizona,114006 5 | Arkansas,53182 6 | California,163707 7 | Colorado,104100 8 | Connecticut,5544 9 | Delaware,1954 10 | Florida,65758 11 | Georgia,59441 12 | Hawaii,10932 13 | Idaho,83574 14 | Illinois,57918 15 | Indiana,36420 16 | Iowa,56276 17 | Kansas,82282 18 | Kentucky,40411 19 | Louisiana,51843 20 | Maine,35387 21 | Maryland,12407 22 | Massachusetts,10555 23 | Michigan,96810 24 | Minnesota,86943 25 | Mississippi,48434 26 | Missouri,69709 27 | Montana,147046 28 | Nebraska,77358 29 | Nevada,110567 30 | New Hampshire,9351 31 | New Jersey,8722 32 | New Mexico,121593 33 | New York,54475 34 | North Carolina,53821 35 | North Dakota,70704 36 | Ohio,44828 37 | Oklahoma,69903 38 | Oregon,98386 39 | Pennsylvania,46058 40 | Rhode Island,1545 41 | South Carolina,32007 42 | South Dakota,77121 43 | Tennessee,42146 44 | Texas,268601 45 | Utah,84904 46 | Vermont,9615 47 | Virginia,42769 48 | Washington,71303 49 | West Virginia,24231 50 | Wisconsin,65503 51 | Wyoming,97818 52 | District of Columbia,68 53 | Puerto Rico,3515 54 | -------------------------------------------------------------------------------- /in-classMaterial/linearModels/OLSReviewSlides.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/linearModels/OLSReviewSlides.pdf --------------------------------------------------------------------------------