├── CarlsonPythonSyllabus.pdf
├── README.md
├── homework
    ├── README.md
    ├── hw1.pdf
    ├── hw2.pdf
    ├── hw3.pdf
    ├── hw4.pdf
    ├── immSurvey.csv
    ├── solutions
    │   ├── hw1.py
    │   └── hw1_test.py
    └── trend2.csv
└── in-classMaterial
    ├── day1
        └── INTL450Intro.pdf
    ├── day10
        └── IntroBayes.pdf
    ├── day11
        ├── 8schools.stan
        ├── IntroBayesDay2.pdf
        ├── __pycache__
        │   └── createdata.cpython-36.pyc
        ├── createdata.py
        ├── cty.dat
        ├── exampleStan.py
        ├── exampleStanSolutions.py
        ├── srrs2.dat
        ├── stan.pdf
        └── turnout.csv
    ├── day12
        └── ML1.py
    ├── day13
        ├── FremontHourly.csv
        ├── ML2.py
        └── SeaTacWeather.csv
    ├── day14
        └── ML3.py
    ├── day15
        └── ML4.py
    ├── day16
        └── GP1.pdf
    ├── day17
        ├── GP2.py
        ├── gp-fit.stan
        ├── gp-pred.stan
        └── immSurvey.csv
    ├── day18
        ├── NN1.pdf
        ├── NN2.py
        ├── mnist.pkl.gz
        └── wine_data.csv
    ├── day19
        ├── dcgan.gif
        ├── hello.png
        ├── image_at_epoch_0001.png
        ├── image_at_epoch_0002.png
        ├── image_at_epoch_0003.png
        ├── image_at_epoch_0004.png
        ├── image_at_epoch_0005.png
        ├── image_at_epoch_0006.png
        ├── image_at_epoch_0007.png
        ├── image_at_epoch_0008.png
        ├── image_at_epoch_0009.png
        ├── image_at_epoch_0010.png
        ├── image_at_epoch_0011.png
        ├── image_at_epoch_0012.png
        ├── image_at_epoch_0013.png
        ├── image_at_epoch_0014.png
        ├── image_at_epoch_0015.png
        ├── image_at_epoch_0016.png
        ├── image_at_epoch_0017.png
        ├── image_at_epoch_0018.png
        ├── image_at_epoch_0019.png
        ├── image_at_epoch_0020.png
        ├── image_at_epoch_0021.png
        ├── image_at_epoch_0022.png
        ├── image_at_epoch_0023.png
        ├── image_at_epoch_0024.png
        ├── image_at_epoch_0025.png
        ├── image_at_epoch_0026.png
        ├── image_at_epoch_0027.png
        ├── image_at_epoch_0028.png
        ├── image_at_epoch_0029.png
        ├── image_at_epoch_0030.png
        ├── image_at_epoch_0031.png
        ├── image_at_epoch_0032.png
        ├── image_at_epoch_0033.png
        ├── image_at_epoch_0034.png
        ├── image_at_epoch_0035.png
        ├── image_at_epoch_0036.png
        ├── image_at_epoch_0037.png
        ├── image_at_epoch_0038.png
        ├── image_at_epoch_0039.png
        ├── image_at_epoch_0040.png
        ├── image_at_epoch_0041.png
        ├── image_at_epoch_0042.png
        ├── image_at_epoch_0043.png
        ├── image_at_epoch_0044.png
        ├── image_at_epoch_0045.png
        ├── image_at_epoch_0046.png
        ├── image_at_epoch_0047.png
        ├── image_at_epoch_0048.png
        ├── image_at_epoch_0049.png
        ├── image_at_epoch_0050.png
        ├── photorec.ses
        └── tf.py
    ├── day2
        ├── INTL450Syntax.pdf
        ├── lab1.py
        └── lab1_solutions.py
    ├── day3
        ├── ClassesSlides.pdf
        ├── clock_lab.py
        ├── clock_solution.py
        ├── parent-child.py
        ├── polymorphism.py
        └── school.py
    ├── day4
        ├── TestingSlides.pdf
        ├── __pycache__
        │   └── fizzbuzz.cpython-36.pyc
        ├── exception.py
        ├── exceptions_example.py
        ├── fizzbuzz.py
        ├── fizzbuzz_test.py
        └── print_integer.py
    ├── day5
        ├── Docket05-1.html
        ├── csvstuff.py
        ├── faculty.csv
        ├── filestuff.py
        ├── mathofpolitics.csv
        ├── readfile.txt
        ├── scrape.py
        ├── test.csv
        ├── test1.csv
        ├── test_with_fields.csv
        ├── urlparsing.py
        ├── webcrawler.py
        ├── whitehouse-petitions.csv
        ├── whitehouse.py
        └── writefile.txt
    ├── day6
        ├── econ.csv
        ├── geo.py
        ├── tweepy.py
        ├── twint.py
        ├── wb.py
        └── ziya.csv
    ├── day7
        ├── Seattle2014.csv
        ├── numpy.py
        └── president_heights.csv
    ├── day8
        ├── pdintro.py
        ├── state-abbrevs.csv
        ├── state-areas.csv
        └── state-population.csv
    ├── day9
        ├── births.csv
        ├── california_cities.csv
        └── matplotlib.py
    └── linearModels
        └── OLSReviewSlides.pdf


/CarlsonPythonSyllabus.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/CarlsonPythonSyllabus.pdf


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # KocPython2020
2 | 
3 | # Final project due the 1st of June
4 | 


--------------------------------------------------------------------------------
/homework/README.md:
--------------------------------------------------------------------------------
1 | # Homework 1 Due before class February 19
2 | 
3 | # Homework 2 Due before class March 11
4 | 
5 | # Homework 3 Due before class April 22
6 | 
7 | # Homework 4 Due end of day May 22
8 | 


--------------------------------------------------------------------------------
/homework/hw1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/homework/hw1.pdf


--------------------------------------------------------------------------------
/homework/hw2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/homework/hw2.pdf


--------------------------------------------------------------------------------
/homework/hw3.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/homework/hw3.pdf


--------------------------------------------------------------------------------
/homework/hw4.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/homework/hw4.pdf


--------------------------------------------------------------------------------
/homework/solutions/hw1.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | 
 3 | class Portfolio(object):
 4 |     def __init__(self):
 5 |         self.cash = 0.00
 6 |         self.assets = {"stock" : {}, "mutual funds" : {}, "bonds" : {}} #dictionary will use asset classes as reference to number owned
 7 |         self.hist = "Portfolio initialized\n"
 8 | 
 9 |     def addCash(self, cash):
10 |         self.cash += int(100*cash)/100.0    #ensures adding currency compatible numbers
11 |         self.hist+="Added $%.2f\n" %(int(100*cash)/100.0)
12 | 
13 |     def withdrawCash(self, cash):
14 |         if cash > self.cash: print("Portfolio does not contain enough cash.")
15 |         else:
16 |             self.cash -= int(100*cash)/100.0
17 |             self.hist+="Withdrew $%.2f\n" %(int(100*cash)/100.0)
18 | 
19 |     def buyAsset(self, number, asset):
20 |         if self.cash < number*asset.price:
21 |             print("Portfolio does not contain enough cash.")
22 |             return None
23 |         self.withdrawCash(number*asset.price)
24 |         if asset in self.assets[asset.getClass()]:
25 |             self.assets[asset.getClass()][asset]+=number #see below for getClass()
26 |         else: self.assets[asset.getClass()][asset] = number
27 |         self.hist+="Bought %d of %s named %s\n" % (number, asset.getClass(), asset.name)
28 | 
29 |     def buyStock(self, number, asset): self.buyAsset(int(number), asset) #same as buyAsset, but enforcing integer purchases
30 | 
31 |     buyMutualFund = buyBonds = buyAsset #exactly the same as buyAsset
32 | 
33 |     def sellAsset(self, number, asset):
34 |         if asset in self.assets[asset.getClass()]: #check that it's in the portfolio
35 |             if self.assets[asset.getClass()][asset] < number: #check that there is enough to sell
36 |                 print("The portfolio does not contain enough of %s %s" %(asset.name, asset.getClass()))
37 |             else:
38 |                 self.assets[asset.getClass()][asset]-=number
39 |                 if self.assets[asset.getClass()][asset] == 0: #check if sold all of it - delete key if so
40 |                     del self.assets[asset.getClass()][asset]
41 |                 self.addCash(number*asset.SellPrice()) #call function asset.SellPrice to calculate price of asset
42 |                 self.hist+="Sold %d of %s named %s\n" % (number, asset.getClass(), asset.name)
43 |         else: print("The portfolio does not contain %s with name %s" %(asset.getClass(), asset.name))
44 | 
45 |     def sellStock(self, number, asset): self.sellAsset(int(number), asset) #enforce integer sales
46 | 
47 |     sellMutualFund = sellBonds = sellAsset
48 | 
49 |     def __str__(self):
50 |         output = "cash: $%-15.2f\n" %self.cash
51 |         for asset in self.assets:
52 |             output+= "%s: \n"%asset
53 |             if not self.assets[asset]: output+='\tnone\n'
54 |             for ast in self.assets[asset]:
55 |                 output += str(self.assets[asset][ast]).rjust(5) + str(ast.name).rjust(5) + "\n"
56 |         return output
57 | 
58 |     def history(self): print(self.hist)
59 | 
60 | class Asset(object): #superclass for stocks, bonds, and mutual funds
61 |     def __init__(self, price, name):
62 |         self.price = price
63 |         self.name = name
64 | 
65 |     def SellPrice(self):
66 |         return int(100*random.uniform(.9*self.price, 1.2*self.price))/100.0 #we'll make bonds and mutual funds sell by the same distribution
67 |     
68 |     
69 | class Stock(Asset):
70 |     def __init__(self, price, name):
71 |         Asset.__init__(self, price, name)
72 | 
73 |     def getClass(self): return "stock" #a simple way to get the class as a string to use for calling the asset dictionary
74 | 
75 |     def SellPrice(self):
76 |         return int(100*random.uniform(.5*self.price, 1.5*self.price))/100.0 #change the distribution for stock sales
77 | 
78 | class MutualFund(Asset):
79 |     def __init__(self, name):
80 |         Asset.__init__(self, 1.0, name)
81 | 
82 |     def getClass(self): return "mutual funds"
83 | 
84 | 
85 | class Bonds(Asset):
86 |     def __init__(self, price, name):
87 |         Asset.__init__(self, price, name)
88 | 
89 |     def getClass(self): return "bonds"
90 | 
91 | 
92 | 
93 | 


--------------------------------------------------------------------------------
/homework/solutions/hw1_test.py:
--------------------------------------------------------------------------------
  1 | from hw1 import *
  2 | import unittest
  3 | 
  4 | class PortfolioTest(unittest.TestCase):
  5 |     def setUp(self):
  6 |         self.portfolio = Portfolio()
  7 |         self.mut1 = MutualFund("MUTA")
  8 |         self.mut2 = MutualFund("MUTB")
  9 |         self.stock1 = Stock(25, "STKA")
 10 |         self.stock2 = Stock(36.52, "STKB")
 11 |         self.bond1 = Bonds(19, "BNDA")
 12 |         self.bond2 = Bonds(44, "BNDB")
 13 | 
 14 |     def test_an_empty_portfolio(self):
 15 |         self.assertEqual(0.0, self.portfolio.cash)
 16 |         self.assertEqual({}, self.portfolio.assets['mutual funds'])
 17 |         self.assertEqual({}, self.portfolio.assets['bonds'])
 18 |         self.assertEqual({}, self.portfolio.assets['stock'])
 19 |         self.assertEqual("Portfolio initialized\n", self.portfolio.hist)
 20 | 
 21 |     def test_assets(self):
 22 |         self.assertEqual(1, self.mut1.price)
 23 |         self.assertEqual(1, self.mut2.price)
 24 |         self.assertEqual(25.00, self.stock1.price)
 25 |         self.assertEqual(36.52, self.stock2.price)
 26 |         self.assertEqual(19.00, self.bond1.price)
 27 |         self.assertEqual(44, self.bond2.price)
 28 | 
 29 |         self.assertEqual("MUTA", self.mut1.name)
 30 |         self.assertEqual("MUTB", self.mut2.name)
 31 |         self.assertEqual("STKA", self.stock1.name)
 32 |         self.assertEqual("STKB", self.stock2.name)
 33 |         self.assertEqual("BNDA", self.bond1.name)
 34 |         self.assertEqual("BNDB", self.bond2.name)
 35 | 
 36 |     def test_add_cash(self):
 37 |         self.portfolio.addCash(30956.45)
 38 |         self.assertEqual(30956.45, self.portfolio.cash)
 39 |         self.assertTrue("Added $30956.45" in self.portfolio.hist)
 40 | 
 41 |     def test_withdraw_cash(self):
 42 |         self.portfolio.withdrawCash(345)
 43 |         self.assertEqual(0, self.portfolio.cash)
 44 | 
 45 |         self.portfolio.addCash(30956.45)
 46 |         self.portfolio.withdrawCash(56.45)
 47 |         self.assertEqual(30900, self.portfolio.cash)
 48 |         self.assertTrue("Withdrew $56.45" in self.portfolio.hist)
 49 | 
 50 |     def test_buy_stock(self):
 51 |         self.portfolio.buyStock(100, self.stock1)
 52 |         self.assertEqual({}, self.portfolio.assets['stock'])
 53 | 
 54 |         self.portfolio.addCash(10000)
 55 |         self.portfolio.buyStock(100, self.stock1)
 56 |         self.assertEqual({self.stock1: 100}, self.portfolio.assets['stock'])
 57 |         self.assertEqual(10000-100*25.0, self.portfolio.cash)
 58 |         self.assertTrue("Bought 100 of stock named STKA" in self.portfolio.hist)
 59 | 
 60 |         self.assertTrue(self.stock2 not in self.portfolio.assets['stock'])
 61 | 
 62 |     def test_sell_stock(self):
 63 |         self.portfolio.sellStock(100, self.stock1)
 64 |         self.assertEqual({}, self.portfolio.assets['stock'])
 65 |         self.assertEqual(0.0, self.portfolio.cash)
 66 | 
 67 |         self.portfolio.addCash(10000)
 68 |         self.portfolio.buyStock(100, self.stock1)
 69 |         self.portfolio.sellStock(50, self.stock1)
 70 |         self.assertEqual(50, self.portfolio.assets['stock'][self.stock1])
 71 |         newcash = 10000 - 100*25.0
 72 |         self.assertTrue(self.portfolio.cash <= newcash +50*25.0*1.5 and 50*25.0*.5 + newcash <= self.portfolio.cash)
 73 |         self.assertTrue("Sold 50 of stock named STKA" in self.portfolio.hist)
 74 | 
 75 |     
 76 |     def test_buy_mutual_fund(self):
 77 |         self.portfolio.buyMutualFund(100, self.mut1)
 78 |         self.assertEqual({}, self.portfolio.assets['mutual funds'])
 79 | 
 80 |         self.portfolio.addCash(10000)
 81 |         self.portfolio.buyMutualFund(100, self.mut1)
 82 |         self.assertEqual({self.mut1: 100}, self.portfolio.assets['mutual funds'])
 83 |         self.assertEqual(10000-100, self.portfolio.cash)
 84 |         self.assertTrue("Bought 100 of mutual funds named MUTA" in self.portfolio.hist)
 85 | 
 86 |         self.assertTrue(self.mut2 not in self.portfolio.assets['mutual funds'])
 87 | 
 88 |     def test_sell_mutual_fund(self):
 89 |         self.portfolio.sellMutualFund(100, self.mut1)
 90 |         self.assertEqual({}, self.portfolio.assets['mutual funds'])
 91 |         self.assertEqual(0.0, self.portfolio.cash)
 92 | 
 93 |         self.portfolio.addCash(10000)
 94 |         self.portfolio.buyMutualFund(100, self.mut1)
 95 |         self.portfolio.sellMutualFund(50, self.mut1)
 96 |         self.assertEqual(50, self.portfolio.assets['mutual funds'][self.mut1])
 97 |         newcash = 10000 - 100
 98 |         self.assertTrue(self.portfolio.cash <= newcash +50*1.2 and 50*.9 + newcash <= self.portfolio.cash)
 99 |         self.assertTrue("Sold 50 of mutual funds named MUTA" in self.portfolio.hist)
100 |         
101 |     def test_buy_bonds(self):
102 |         self.portfolio.buyBonds(100, self.bond1)
103 |         self.assertEqual({}, self.portfolio.assets['bonds'])
104 | 
105 |         self.portfolio.addCash(10000)
106 |         self.portfolio.buyBonds(100, self.bond1)
107 |         self.assertEqual({self.bond1: 100}, self.portfolio.assets['bonds'])
108 |         self.assertEqual(10000-100*19.0, self.portfolio.cash)
109 |         self.assertTrue("Bought 100 of bonds named BNDA" in self.portfolio.hist)
110 | 
111 |         self.assertTrue(self.bond2 not in self.portfolio.assets['bonds'])
112 | 
113 |     def test_sell_bonds(self):
114 |         self.portfolio.sellBonds(100, self.bond1)
115 |         self.assertEqual({}, self.portfolio.assets['bonds'])
116 |         self.assertEqual(0.0, self.portfolio.cash)
117 | 
118 |         self.portfolio.addCash(10000)
119 |         self.portfolio.buyBonds(100, self.bond1)
120 |         self.portfolio.sellBonds(50, self.bond1)
121 |         self.assertEqual(50, self.portfolio.assets['bonds'][self.bond1])
122 |         newcash = 10000 - 100*19.0
123 |         self.assertTrue(self.portfolio.cash <= newcash +50*19*1.2 and 50*.9*19 + newcash <= self.portfolio.cash)
124 |         self.assertTrue("Sold 50 of bonds named BNDA" in self.portfolio.hist)
125 | 
126 |     def test_print(self):
127 |         self.assertTrue('cash: $' in self.portfolio.__str__())
128 |         self.assertTrue('mutual funds:' in self.portfolio.__str__())
129 |         self.assertTrue('stock:' in self.portfolio.__str__())
130 |         self.assertTrue('bonds:' in self.portfolio.__str__())
131 | 
132 |         self.portfolio.addCash(10000)
133 |         self.portfolio.buyBonds(100, self.bond1)
134 |         self.portfolio.buyStock(5, self.stock1)
135 |         self.portfolio.buyMutualFund(10, self.mut1)
136 |         self.assertTrue('BNDA' in self.portfolio.__str__())
137 |         self.assertTrue('STKA' in self.portfolio.__str__())
138 |         self.assertTrue('MUTA' in self.portfolio.__str__())
139 |         self.assertTrue('100' in self.portfolio.__str__())
140 |         self.assertTrue('5' in self.portfolio.__str__())
141 |         self.assertTrue('10' in self.portfolio.__str__())
142 | 
143 |                 
144 | if __name__ == '__main__':
145 |     unittest.main()
146 | 
147 | 


--------------------------------------------------------------------------------
/in-classMaterial/day1/INTL450Intro.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day1/INTL450Intro.pdf


--------------------------------------------------------------------------------
/in-classMaterial/day10/IntroBayes.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day10/IntroBayes.pdf


--------------------------------------------------------------------------------
/in-classMaterial/day11/8schools.stan:
--------------------------------------------------------------------------------
 1 | data {
 2 |     int<lower=0> J; // number of schools
 3 |     vector[J] y; // estimated treatment effects
 4 |     vector<lower=0>[J] sigma; // s.e. of effect estimates
 5 | }
 6 | parameters {
 7 |     real mu;
 8 |     real<lower=0> tau;
 9 |     vector[J] eta;
10 | }
11 | transformed parameters {
12 |     vector[J] theta;
13 |     theta = mu + tau * eta;
14 | }
15 | model {
16 |     eta ~ normal(0, 1);
17 |     y ~ normal(theta, sigma);
18 | }
19 | 


--------------------------------------------------------------------------------
/in-classMaterial/day11/IntroBayesDay2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day11/IntroBayesDay2.pdf


--------------------------------------------------------------------------------
/in-classMaterial/day11/__pycache__/createdata.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day11/__pycache__/createdata.cpython-36.pyc


--------------------------------------------------------------------------------
/in-classMaterial/day11/createdata.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Setup a model and data
 3 | """
 4 | 
 5 | import numpy as np
 6 | 
 7 | # set the true values of the model parameters for creating the data
 8 | m = 3.5 # gradient of the line
 9 | c = 1.2 # y-intercept of the line
10 | 
11 | # set the "predictor variable"/abscissa
12 | M = 100
13 | xmin = 0.
14 | xmax = 10.
15 | stepsize = (xmax-xmin)/M
16 | x = np.arange(xmin, xmax, stepsize)
17 | 
18 | # define the model function
19 | def straight_line(x, m, c):
20 |     """
21 |     A straight line model: y = m*x + c
22 |     
23 |     Args:
24 |         x (list): a set of abscissa points at which the model is defined
25 |         m (float): the gradient of the line
26 |         c (float): the y-intercept of the line
27 |     """
28 |     
29 |     return m*x + c
30 | 
31 | # create the data - the model plus Gaussian noise
32 | sigma = 0.5 # standard deviation of the noise
33 | data = straight_line(x, m, c) + sigma*np.random.randn(M)
34 | 
35 | 


--------------------------------------------------------------------------------
/in-classMaterial/day11/stan.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day11/stan.pdf


--------------------------------------------------------------------------------
/in-classMaterial/day12/ML1.py:
--------------------------------------------------------------------------------
  1 | #Supervised learning: Model relationship between measured features of data and label associated with the data - apply labels to new data - classification and regression
  2 | #Unsupervised learning: Clustering and dimensionality reduction
  3 | import seaborn as sns
  4 | iris = sns.load_dataset('iris')
  5 | iris.head()
  6 | #rows are samples, number of rows is n_samples
  7 | #columns are features, number is n_features
  8 | #features matrix - 2D data representation - often stored as X
  9 | #NumPy array, Pandas DataFrame, or SciPy sparse matrices
 10 | #label or target array - y (dependent variable)
 11 | sns.pairplot(iris, hue='species', size=1.5);
 12 | 
 13 | X_iris = iris.drop('species', axis=1)
 14 | X_iris.shape
 15 | y_iris = iris['species']
 16 | y_iris.shape
 17 | 
 18 | #Estimator API
 19 | #Consistency
 20 | #All objects share a common interface drawn from a limited set of methods, with consistent documentation.
 21 | #Inspection
 22 | #All specified parameter values are exposed as public attributes.
 23 | #Limited object hierarchy
 24 | #Only algorithms are represented by Python classes; datasets are represented in standard formats (NumPy arrays, Pandas DataFrame s, SciPy sparse matrices) and parameter names use standard Python strings.
 25 | #Composition
 26 | #Many machine learning tasks can be expressed as sequences of more fundamental algorithms, and Scikit-Learn makes use of this wherever possible.
 27 | #Sensible defaults
 28 | #When models require user-specified parameters, the library defines an appropriate default value
 29 | 
 30 | #1. Choose a class of model by importing the appropriate estimator class from Scikit-Learn.
 31 | #2. Choose model hyperparameters by instantiating this class with desired values.
 32 | #3. Arrange data into a features matrix and target vector following the discussion from before.
 33 | #4. Fit the model to your data by calling the fit() method of the model instance.
 34 | #5. Apply the model to new data:
 35 | #    • For supervised learning, often we predict labels for unknown data using the predict() method.
 36 | #    • For unsupervised learning, we often transform or infer properties of the data using the transform() or predict() method.
 37 | 
 38 | #Supervised learning: Simple linear regression
 39 | import matplotlib.pyplot as plt
 40 | import numpy as np
 41 | rng = np.random.RandomState(42)
 42 | x = 10 * rng.rand(50)
 43 | y = 2 * x - 1 + rng.randn(50)
 44 | plt.scatter(x, y);
 45 | 
 46 | from sklearn.linear_model import LinearRegression
 47 | model = LinearRegression(fit_intercept=True) #storing of hyperparameter values
 48 | model
 49 | X = x[:, np.newaxis]
 50 | X.shape
 51 | model.fit(X, y)
 52 | model.coef_
 53 | model.intercept_
 54 | #interpreting model parameters is more a statistical modeling question than a machine learning question, but if you want inferences:
 55 | import statsmodels.api as sm
 56 | ols = sm.OLS(y, X)
 57 | ols_result = ols.fit()
 58 | # Now you have at your disposition several error estimates, e.g.
 59 | ols_result.HC0_se
 60 | # and covariance estimates
 61 | ols_result.cov_HC0
 62 | #confidence intervals
 63 | ols_result.conf_int()
 64 | #p-values (which may come in handy for pubs, but please do not rely on them for this class!)
 65 | ols_result.pvalues
 66 | 
 67 | #prediction
 68 | xfit = np.linspace(-1, 11)
 69 | Xfit = xfit[:, np.newaxis]
 70 | yfit = model.predict(Xfit)
 71 | plt.scatter(x, y)
 72 | plt.plot(xfit, yfit);
 73 | 
 74 | #TODO: Fit the same linear regression you did for homework 2 using the LinearRegression model, and compare the results
 75 | 
 76 | #evaluate efficacy of model by comparing results to known baseline
 77 | #given a model trained on a portion of the Iris data, how well can we predict the remaining labels?
 78 | #Because it is so fast and has no hyperparameters to choose, Gaussian naive Bayes is often a good model to use as a baseline classification, before you explore whether improvements can be found through more sophisticated models.
 79 | from sklearn.model_selection import train_test_split
 80 | Xtrain, Xtest, ytrain, ytest = train_test_split(X_iris, y_iris,
 81 | random_state=1)
 82 | 
 83 | from sklearn.naive_bayes import GaussianNB # 1. choose model class
 84 | model = GaussianNB() # 2. instantiate model
 85 | model.fit(Xtrain, ytrain) # 3. fit model to data
 86 | y_model = model.predict(Xtest) # 4. predict on new data
 87 | 
 88 | #accuracy
 89 | from sklearn.metrics import accuracy_score
 90 | accuracy_score(ytest, y_model)
 91 | 
 92 | #TODO: Using the same homework 2 data, split your data into a train and test sample, and test the accuracy score
 93 | 
 94 | #Unsupervised learning: Dimensionality reduction
 95 | #Principal components analysis: fast linear dimensionality reduction technique
 96 | from sklearn.decomposition import PCA # 1. Choose the model class
 97 | model = PCA(n_components=2) # 2. Instantiate the model with hyperparameters
 98 | model.fit(X_iris) # 3. Fit to data. Notice y is not specified!
 99 | X_2D = model.transform(X_iris) # 4. Transform the data to two dimensions
100 | 
101 | #2D species are well separated, even without labels
102 | iris['PCA1'] = X_2D[:, 0]
103 | iris['PCA2'] = X_2D[:, 1]
104 | sns.lmplot("PCA1", "PCA2", hue='species', data=iris, fit_reg=False);
105 | 
106 | #Unsupervised learning: Iris clustering
107 | #Gaussian mixture model: Model data as collection of Gaussian blobs
108 | from sklearn.mixture import GaussianMixture # 1. Choose the model class
109 | model = GaussianMixture(n_components=3, covariance_type='full') # 2. Instantiate the model w/ hyperparameters
110 | model.fit(X_iris) # 3. Fit to data. Notice y is not specified!
111 | y_gmm = model.predict(X_iris) # 4. Determine cluster labels
112 | 
113 | #Add cluster label - Automatically identify presence of different groups of species
114 | iris['cluster'] = y_gmm
115 | sns.lmplot("PCA1", "PCA2", data=iris, hue='species', col='cluster', fit_reg=False);
116 | 
117 | #Application: Exploring Handwritten Digits
118 | from sklearn.datasets import load_digits
119 | digits = load_digits()
120 | digits.images.shape
121 | 
122 | #The images data is a three-dimensional array: 1,797 samples, each consisting of an 8×8 grid of pixels. Let’s visualize the first hundred of these
123 | fig, axes = plt.subplots(10, 10, figsize=(8, 8), subplot_kw={'xticks':[], 'yticks':[]}, gridspec_kw=dict(hspace=0.1, wspace=0.1))
124 | for i, ax in enumerate(axes.flat):
125 |     ax.imshow(digits.images[i], cmap='binary', interpolation='nearest')
126 |     ax.text(0.05, 0.05, str(digits.target[i]), transform=ax.transAxes, color='green')
127 | 
128 | #Treat each pixel as a feature - flatten out the array so we have length-64 array of pixel values representing each digit
129 | X = digits.data
130 | X.shape
131 | y = digits.target
132 | y.shape
133 | 
134 | #Unsupervised learning: Dimensionality reduction - Isomap
135 | from sklearn.manifold import Isomap
136 | iso = Isomap(n_components=2)
137 | iso.fit(digits.data)
138 | data_projected = iso.transform(digits.data)
139 | data_projected.shape
140 | 
141 | plt.scatter(data_projected[:, 0], data_projected[:, 1], c=digits.target, edgecolor='none', alpha=0.5, cmap=plt.cm.get_cmap('Spectral', 10))
142 | plt.colorbar(label='digit label', ticks=range(10))
143 | plt.clim(-0.5, 9.5);
144 | #generally good separation in parameter space
145 | 
146 | #classification
147 | Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, random_state=0)
148 | #Gaussian naive Bayes
149 | from sklearn.naive_bayes import GaussianNB
150 | model = GaussianNB()
151 | model.fit(Xtrain, ytrain)
152 | y_model = model.predict(Xtest)
153 | 
154 | accuracy_score(ytest, y_model) #good considering the simplicity of the model
155 | #where did we go wrong? Confusion matrix shows frequency of misclassification
156 | from sklearn.metrics import confusion_matrix
157 | mat = confusion_matrix(ytest, y_model)
158 | sns.heatmap(mat, square=True, annot=True, cbar=False)
159 | plt.xlabel('predicted value')
160 | plt.ylabel('true value');
161 | #plot inputs with predicted labels
162 | fig, axes = plt.subplots(10, 10, figsize=(8, 8), subplot_kw={'xticks':[], 'yticks':[]}, gridspec_kw=dict(hspace=0.1, wspace=0.1))
163 | for i, ax in enumerate(axes.flat):
164 |     ax.imshow(digits.images[i], cmap='binary', interpolation='nearest')
165 |     ax.text(0.05, 0.05, str(y_model[i]), transform=ax.transAxes, color='green' if (ytest[i] == y_model[i]) else 'red')
166 | 
167 | 
168 | #to make an informed choice, we need a way to validate that our model and our hyperparameters are a good fit to the data
169 | #Model validation the wrong way
170 | from sklearn.datasets import load_iris
171 | iris = load_iris()
172 | X = iris.data
173 | y = iris.target
174 | #Here we'll use a k-neighbors classifier with n_neighbors=1 . This is a very simple and intuitive model that says 'the label of an unknown point is the same as the label of its closest training point'
175 | from sklearn.neighbors import KNeighborsClassifier
176 | model = KNeighborsClassifier(n_neighbors=1)
177 | #Then we train the model, and use it to predict labels for data we already know
178 | model.fit(X, y)
179 | y_model = model.predict(X)
180 | accuracy_score(y, y_model)
181 | 
182 | #Model validation the right way: Holdout sets
183 | # split the data with 50% in each set
184 | X1, X2, y1, y2 = train_test_split(X, y, random_state=0, train_size=0.5, test_size=0.5)
185 | # fit the model on one set of data
186 | model.fit(X1, y1)
187 | # evaluate the model on the second set of data
188 | y2_model = model.predict(X2)
189 | accuracy_score(y2, y2_model)
190 | 
191 | #Model validation via cross-validation
192 | y2_model = model.fit(X1, y1).predict(X2)
193 | y1_model = model.fit(X2, y2).predict(X1)
194 | accuracy_score(y1, y1_model), accuracy_score(y2, y2_model)
195 | #more than 2 sets
196 | from sklearn.model_selection import cross_val_score
197 | cross_val_score(model, X, y, cv=5)
198 | 
199 | #loo
200 | from sklearn.model_selection import LeaveOneOut
201 | scores = cross_val_score(model, X, y, cv=LeaveOneOut(len(X)))
202 | scores
203 | 
204 | scores.mean()
205 | 
206 | #Selecting the Best Model
207 | # Use a more complicated/more flexible model
208 | # Use a less complicated/less flexible model
209 | # Gather more training samples
210 | # Gather more data to add features to each sample
211 | 
212 | #The bias-variance trade-off
213 | #High-bias model: Underfits the data
214 | #High-variance model: Overfits the data
215 | #For high-bias models, the performance of the model on the validation set is similar to the performance on the training set.
216 | #For high-variance models, the performance of the model on the validation set is far worse than the performance on the training set.
217 | 
218 | #The training score is everywhere higher than the validation score. This is generally the case: the model will be a better fit to data it has seen than to data it has not seen.
219 | #For very low model complexity (a high-bias model), the training data is underfit, which means that the model is a poor predictor both for the training data and for any previously unseen data.
220 | #For very high model complexity (a high-variance model), the training data is overfit, which means that the model predicts the training data very well, but fails for any previously unseen data.
221 | #For some intermediate value, the validation curve has a maximum. This level of complexity indicates a suitable trade-off between bias and variance.
222 | 
223 | #Validation curves
224 | from sklearn.preprocessing import PolynomialFeatures
225 | from sklearn.linear_model import LinearRegression
226 | from sklearn.pipeline import make_pipeline
227 | def PolynomialRegression(degree=2, **kwargs):
228 |     return make_pipeline(PolynomialFeatures(degree), LinearRegression(**kwargs))
229 | 
230 | def make_data(N, err=1.0, rseed=1):
231 |     # randomly sample the data
232 |     rng = np.random.RandomState(rseed)
233 |     X = rng.rand(N, 1) ** 2
234 |     y = 10 - 1. / (X.ravel() + 0.1)
235 |     if err > 0:
236 |         y += err * rng.randn(N)
237 |     return X, y
238 | 
239 | X, y = make_data(40)
240 | 
241 | import seaborn; seaborn.set() # plot formatting
242 | X_test = np.linspace(-0.1, 1.1, 500)[:, None]
243 | plt.scatter(X.ravel(), y, color='black')
244 | axis = plt.axis()
245 | for degree in [1, 3, 5]:
246 |     y_test = PolynomialRegression(degree).fit(X, y).predict(X_test)
247 |     plt.plot(X_test.ravel(), y_test, label='degree={0}'.format(degree))
248 | plt.xlim(-0.1, 1.0)
249 | plt.ylim(-2, 12)
250 | plt.legend(loc='best');
251 | #degree of polynomial is knob controlling model complexity
252 | 
253 | from sklearn.model_selection import validation_curve
254 | degree = np.arange(0, 21)
255 | train_score, val_score = validation_curve(PolynomialRegression(), X, y, 'polynomialfeatures__degree', degree, cv=7)
256 | plt.plot(degree, np.median(train_score, 1), color='blue', label='training score')
257 | plt.plot(degree, np.median(val_score, 1), color='red', label='validation score')
258 | plt.legend(loc='best')
259 | plt.ylim(0, 1)
260 | plt.xlabel('degree')
261 | plt.ylabel('score');
262 | 
263 | plt.scatter(X.ravel(), y)
264 | lim = plt.axis()
265 | y_test = PolynomialRegression(3).fit(X, y).predict(X_test)
266 | plt.plot(X_test.ravel(), y_test);
267 | plt.axis(lim);
268 | 
269 | #TODO: Again using the same data you used for homework 2, fit polynomials and determine the optimal degree to use
270 | 
271 | #Optimal model will generally depend on size of training data
272 | X2, y2 = make_data(200)
273 | plt.scatter(X2.ravel(), y2);
274 | 
275 | degree = np.arange(21)
276 | train_score2, val_score2 = validation_curve(PolynomialRegression(), X2, y2,
277 | 'polynomialfeatures__degree',
278 | degree, cv=7)
279 | plt.plot(degree, np.median(train_score2, 1), color='blue',
280 | label='training score')
281 | plt.plot(degree, np.median(val_score2, 1), color='red', label='validation score')
282 | plt.plot(degree, np.median(train_score, 1), color='blue', alpha=0.3,
283 | linestyle='dashed')
284 | plt.plot(degree, np.median(val_score, 1), color='red', alpha=0.3,
285 | linestyle='dashed')
286 | plt.legend(loc='lower center')
287 | plt.ylim(0, 1)
288 | plt.xlabel('degree')
289 | plt.ylabel('score');
290 | #behavior of validation curve has two important inputs: complexity and number of training points
291 | 
292 | #plot of the training/validation score with respect to the size of the training set is known as a learning curve
293 | #A model of a given complexity will overfit a small dataset: this means the training score will be relatively high, while the validation score will be relatively low.
294 | #A model of a given complexity will underfit a large dataset: this means that the training score will decrease, but the validation score will increase.
295 | #A model will never, except by chance, give a better score to the validation set than the training set: this means the curves should keep getting closer together but never cross.
296 | 
297 | #The notable feature of the learning curve is the convergence to a particular score as the number of training samples grows. In particular, once you have enough points that a particular model has converged, adding more training data will not help you! The only way to increase model performance in this case is to use another (often more complex) model.
298 | 
299 | from sklearn.model_selection import learning_curve
300 | fig, ax = plt.subplots(1, 2, figsize=(16, 6))
301 | fig.subplots_adjust(left=0.0625, right=0.95, wspace=0.1)
302 | for i, degree in enumerate([2, 9]):
303 |     N, train_lc, val_lc = learning_curve(PolynomialRegression(degree), X, y, cv=7, train_sizes=np.linspace(0.3, 1, 25))
304 |     ax[i].plot(N, np.mean(train_lc, 1), color='blue', label='training score')
305 |     ax[i].plot(N, np.mean(val_lc, 1), color='red', label='validation score')
306 |     ax[i].hlines(np.mean([train_lc[-1], val_lc[-1]]), N[0], N[-1], color='gray', linestyle='dashed')
307 |     ax[i].set_ylim(0, 1)
308 |     ax[i].set_xlim(N[0], N[-1])
309 |     ax[i].set_xlabel('training size')
310 |     ax[i].set_ylabel('score')
311 |     ax[i].set_title('degree = {0}'.format(degree), size=14)
312 |     ax[i].legend(loc='best')
313 | 
314 | 
315 | #In practice, models generally have more than one knob to turn, and thus plots of validation and learning curves change from lines to multidimensional surfaces. In these cases, such visualizations are difficult and we would rather simply find the particular model that maximizes the validation score.
316 | 
317 | #We will explore a three-dimensional grid of model features—namely, the polynomial degree, the flag telling us whether to fit the intercept, and the flag telling us whether to normalize the problem
318 | 
319 | from sklearn.model_selection import GridSearchCV
320 | param_grid = {'polynomialfeatures__degree': np.arange(21),
321 | 'linearregression__fit_intercept': [True, False],
322 | 'linearregression__normalize': [True, False]}
323 | grid = GridSearchCV(PolynomialRegression(), param_grid, cv=7)
324 | grid.fit(X, y);
325 | grid.best_params_
326 | 
327 | #with normalize == True, why is fit_intercept == False?
328 | 
329 | model = grid.best_estimator_
330 | 
331 | plt.scatter(X.ravel(), y)
332 | lim = plt.axis()
333 | y_test = model.fit(X, y).predict(X_test)
334 | plt.plot(X_test.ravel(), y_test);
335 | plt.axis(lim);
336 | 
337 | 
338 | #Feature engineering
339 | #one of the more important steps in using machine learning in practice is feature engineering—that is, taking whatever information you have about your problem and turning it into numbers that you can use to build your feature matrix
340 | 
341 | #categorical features
342 | data = [
343 |     {'price': 850000, 'rooms': 4, 'neighborhood': 'Queen Anne'},
344 |     {'price': 700000, 'rooms': 3, 'neighborhood': 'Fremont'},
345 |     {'price': 650000, 'rooms': 3, 'neighborhood': 'Wallingford'},
346 |     {'price': 600000, 'rooms': 2, 'neighborhood': 'Fremont'}
347 | ]
348 | #one-hot encoding; extra columns indicating the presence or absence of a category with a value of 1 or 0
349 | from sklearn.feature_extraction import DictVectorizer
350 | vec = DictVectorizer(sparse=False, dtype=int)
351 | vec.fit_transform(data) #notice it is in alphabetical order
352 | 
353 | vec.get_feature_names()
354 | 
355 | #if your category has many possible values, this can greatly increase the size of your dataset. However, because the encoded data contains mostly zeros, a sparse output can be a very efficient solution
356 | vec = DictVectorizer(sparse=True, dtype=int)
357 | vec.fit_transform(data)
358 | 
359 | 
360 | #text features
361 | #word counts
362 | sample = ['problem of evil',
363 | 'evil queen',
364 | 'horizon problem']
365 | from sklearn.feature_extraction.text import CountVectorizer
366 | vec = CountVectorizer()
367 | X = vec.fit_transform(sample)
368 | X
369 | 
370 | import pandas as pd
371 | pd.DataFrame(X.toarray(), columns=vec.get_feature_names())
372 | 
373 | #down-weighting frequent words; term frequency–inverse document frequency (TF–IDF), which weights the word counts by a measure of how often they appear in the documents
374 | from sklearn.feature_extraction.text import TfidfVectorizer
375 | vec = TfidfVectorizer()
376 | X = vec.fit_transform(sample)
377 | pd.DataFrame(X.toarray(), columns=vec.get_feature_names())
378 | 
379 | #if interested in image feature extraction, see SciKit-Image project
380 | 
381 | #derived features: transforming input - basis function regression
382 | x = np.array([1, 2, 3, 4, 5])
383 | y = np.array([4, 2, 1, 3, 7])
384 | plt.scatter(x, y);
385 | 
386 | from sklearn.linear_model import LinearRegression
387 | X = x[:, np.newaxis]
388 | model = LinearRegression().fit(X, y)
389 | yfit = model.predict(X)
390 | plt.scatter(x, y)
391 | plt.plot(x, yfit);
392 | 
393 | from sklearn.preprocessing import PolynomialFeatures
394 | poly = PolynomialFeatures(degree=3, include_bias=False)
395 | X2 = poly.fit_transform(X)
396 | print(X2)
397 | 
398 | model = LinearRegression().fit(X2, y)
399 | yfit = model.predict(X2)
400 | plt.scatter(x, y)
401 | plt.plot(x, yfit);
402 | 
403 | #TODO: Do the same as above, fitting a polynomial to your data, but use this PolynomialFeatures method instead
404 | 
405 | #imputation of missing data
406 | from numpy import nan
407 | X = np.array([[ nan, 0, 3],
408 | [ 3, 7, 9],
409 | [ 3, 5, 2],
410 | [ 4, nan, 6],
411 | [ 8, 8, 1]])
412 | y = np.array([14, 16, -1, 8, -5])
413 | #simply use the mean (also can use median or most_frequent value)
414 | from sklearn.preprocessing import Imputer
415 | imp = Imputer(strategy='mean')
416 | X2 = imp.fit_transform(X)
417 | X2
418 | 
419 | model = LinearRegression().fit(X2, y)
420 | model.predict(X2)
421 | #if missingness is problematic, consider MICE
422 | 
423 | #TODO: Fill in missing values in your data using different methods, and see if your substantive results change when modeling
424 | 
425 | #feature pipelines - suppose we want to:
426 | #1. Impute missing values using the mean
427 | #2. Transform features to quadratic
428 | #3. Fit a linear regression
429 | from sklearn.pipeline import make_pipeline
430 | model = make_pipeline(Imputer(strategy='mean'),
431 |     PolynomialFeatures(degree=2),
432 |     LinearRegression())
433 | 
434 | model.fit(X, y) # X with missing values, from above
435 | print(y)
436 | print(model.predict(X))
437 | 
438 | 
439 | 
440 | 
441 | 
442 | 


--------------------------------------------------------------------------------
/in-classMaterial/day13/ML2.py:
--------------------------------------------------------------------------------
  1 | #Naive Bayes classification
  2 | #fast and simple classification algorithm
  3 | #P(L|features) = P(features|L)P(L)/P(features)
  4 | #   -> P(L1|features)/P(L2|features) = P(features|L1)P(L1)/[P(features|L2)P(L2)]
  5 | #generative model: specifies the hypothetical random process that generates data
  6 | 
  7 | import numpy as np
  8 | import matplotlib.pyplot as plt
  9 | import seaborn as sns; sns.set()
 10 | 
 11 | #Gaussian naive Bayes: data from each label is drawn from simple Gaussian distribution
 12 | 
 13 | from sklearn.datasets import make_blobs
 14 | X, y = make_blobs(100, 2, centers=2, random_state=2, cluster_std=1.5)
 15 | plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='RdBu');
 16 | 
 17 | #find mean and standard deviation of points within a label, which defines the distribution
 18 | #can then compute posterior ratio for given point
 19 | 
 20 | from sklearn.naive_bayes import GaussianNB
 21 | model = GaussianNB()
 22 | model.fit(X, y);
 23 | 
 24 | rng = np.random.RandomState(0)
 25 | Xnew = [-6, -14] + [14, 18] * rng.rand(2000, 2)
 26 | ynew = model.predict(Xnew)
 27 | 
 28 | plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='RdBu')
 29 | lim = plt.axis()
 30 | plt.scatter(Xnew[:, 0], Xnew[:, 1], c=ynew, s=20, cmap='RdBu', alpha=0.1)
 31 | plt.axis(lim);
 32 | 
 33 | #in general, boundary in Gaussian naive Bayes is quadratic
 34 | #allows for probabilistic classification
 35 | 
 36 | yprob = model.predict_proba(Xnew)
 37 | yprob[-8:].round(2)
 38 | 
 39 | #Multinomial naive Bayes
 40 | #features assumed to be generated from simple multinomial distribution (prob of observing counts among a number of categories - most useful when features that represent counts or rates)
 41 | 
 42 | from sklearn.datasets import fetch_20newsgroups
 43 | data = fetch_20newsgroups()
 44 | data.target_names
 45 | 
 46 | #select a few categories for simplicity
 47 | categories = ['talk.religion.misc', 'soc.religion.christian', 'sci.space',
 48 | 'comp.graphics']
 49 | train = fetch_20newsgroups(subset='train', categories=categories)
 50 | test = fetch_20newsgroups(subset='test', categories=categories)
 51 | print(train.data[5])
 52 | 
 53 | #TF-IDF vectorizer
 54 | from sklearn.feature_extraction.text import TfidfVectorizer
 55 | from sklearn.naive_bayes import MultinomialNB
 56 | from sklearn.pipeline import make_pipeline
 57 | 
 58 | #create a pipeline
 59 | model = make_pipeline(TfidfVectorizer(), MultinomialNB())
 60 | 
 61 | #fit the model and predict
 62 | model.fit(train.data, train.target)
 63 | labels = model.predict(test.data)
 64 | 
 65 | #confusion matrix
 66 | from sklearn.metrics import confusion_matrix
 67 | mat = confusion_matrix(test.target, labels)
 68 | sns.heatmap(mat.T, square=True, annot=True, fmt='d', cbar=False,
 69 | xticklabels=train.target_names, yticklabels=train.target_names)
 70 | plt.xlabel('true label')
 71 | plt.ylabel('predicted label');
 72 | 
 73 | #prediction for a single string
 74 | def predict_category(s, train=train, model=model):
 75 |     pred = model.predict([s])
 76 |     return train.target_names[pred[0]]
 77 | 
 78 | predict_category('sending a payload to the ISS')
 79 | predict_category('discussing islam vs atheism')
 80 | predict_category('determining the screen resolution')
 81 | 
 82 | #naive Bayes is usually out-performed by more complicated models, but
 83 | # They are extremely fast for both training and prediction
 84 | # They provide straightforward probabilistic prediction
 85 | # They are often very easily interpretable
 86 | # They have very few (if any) tunable parameters
 87 | 
 88 | #Works well when:
 89 | # When the naive assumptions actually match the data (very rare in practice)
 90 | # For very well-separated categories, when model complexity is less important
 91 | # For very high-dimensional data, when model complexity is less important
 92 | 
 93 | #Linear regression
 94 | #good starting point for regression tasks
 95 | 
 96 | import numpy as np
 97 | 
 98 | rng = np.random.RandomState(1)
 99 | x = 10 * rng.rand(50)
100 | y = 2 * x - 5 + rng.randn(50)
101 | plt.scatter(x, y);
102 | 
103 | from sklearn.linear_model import LinearRegression
104 | model = LinearRegression(fit_intercept=True)
105 | model.fit(x[:, np.newaxis], y)
106 | xfit = np.linspace(0, 10, 1000)
107 | yfit = model.predict(xfit[:, np.newaxis])
108 | plt.scatter(x, y)
109 | plt.plot(xfit, yfit);
110 | 
111 | print("Model slope:", model.coef_[0])
112 | print("Model intercept:", model.intercept_)
113 | 
114 | #multidimensional
115 | rng = np.random.RandomState(1)
116 | X = 10 * rng.rand(100, 3)
117 | y = 0.5 + np.dot(X, [1.5, -2., 1.])
118 | model.fit(X, y)
119 | print(model.intercept_)
120 | print(model.coef_)
121 | 
122 | #basis functions - transform the data through a function
123 | 
124 | #polynomial basis functions
125 | from sklearn.preprocessing import PolynomialFeatures
126 | x = np.array([2, 3, 4])
127 | poly = PolynomialFeatures(3, include_bias=False)
128 | poly.fit_transform(x[:, None])
129 | 
130 | from sklearn.pipeline import make_pipeline
131 | poly_model = make_pipeline(PolynomialFeatures(7),
132 | LinearRegression())
133 | 
134 | rng = np.random.RandomState(1)
135 | x = 10 * rng.rand(50)
136 | y = np.sin(x) + 0.1 * rng.randn(50)
137 | poly_model.fit(x[:, np.newaxis], y)
138 | yfit = poly_model.predict(xfit[:, np.newaxis])
139 | plt.scatter(x, y)
140 | plt.plot(xfit, yfit);
141 | 
142 | #Gaussian basis functions
143 | from sklearn.base import BaseEstimator, TransformerMixin
144 | 
145 | class GaussianFeatures(BaseEstimator, TransformerMixin):
146 |     """Uniformly spaced Gaussian features for one-dimensional input"""
147 |     def __init__(self, N, width_factor=2.0):
148 |         self.N = N
149 |         self.width_factor = width_factor
150 | 
151 |     @staticmethod
152 |     def _gauss_basis(x, y, width, axis=None):
153 |         arg = (x - y) / width
154 |         return np.exp(-0.5 * np.sum(arg ** 2, axis))
155 | 
156 |     def fit(self, X, y=None):
157 |         # create N centers spread along the data range
158 |         self.centers_ = np.linspace(X.min(), X.max(), self.N)
159 |         self.width_ = self.width_factor *  (self.centers_[1] - self.centers_[0])
160 |         return self
161 | 
162 |     def transform(self, X):
163 |         return self._gauss_basis(X[:, :, np.newaxis], self.centers_, self.width_, axis=1)
164 | 
165 | gauss_model = make_pipeline(GaussianFeatures(20), LinearRegression())
166 | gauss_model.fit(x[:, np.newaxis], y)
167 | yfit = gauss_model.predict(xfit[:, np.newaxis])
168 | plt.scatter(x, y)
169 | plt.plot(xfit, yfit)
170 | plt.xlim(0, 10);
171 | 
172 | #regularization
173 | #if use too many basis functions leads to overfitting
174 | model = make_pipeline(GaussianFeatures(30),
175 | LinearRegression())
176 | model.fit(x[:, np.newaxis], y)
177 | plt.scatter(x, y)
178 | plt.plot(xfit, model.predict(xfit[:, np.newaxis]))
179 | plt.xlim(0, 10)
180 | plt.ylim(-1.5, 1.5);
181 | 
182 | #plot the coefficients of Gaussian bases with respect to their location
183 | def basis_plot(model, title=None):
184 |     fig, ax = plt.subplots(2, sharex=True)
185 |     model.fit(x[:, np.newaxis], y)
186 |     ax[0].scatter(x, y)
187 |     ax[0].plot(xfit, model.predict(xfit[:, np.newaxis]))
188 |     ax[0].set(xlabel='x', ylabel='y', ylim=(-1.5, 1.5))
189 | 
190 |     if title:
191 |         ax[0].set_title(title)
192 | 
193 |     ax[1].plot(model.steps[0][1].centers_, model.steps[1][1].coef_)
194 |     ax[1].set(xlabel='basis location', ylabel='coefficient', xlim=(0, 10))
195 | 
196 | model = make_pipeline(GaussianFeatures(30), LinearRegression())
197 | basis_plot(model)
198 | 
199 | #This is typical overfitting behavior when basis functions overlap: the coefficients of adjacent basis functions blow up and cancel each other out. We know that such behavior is problematic, and it would be nice if we could limit such spikes explicitly in the model by penalizing large values of the model parameters. Such a penalty is known as regularization, and comes in several forms.
200 | 
201 | #Ridge regression (L2 regularization)
202 | #penalizing sum of squares of model coefficients
203 | #P = α∑ θ^2_n
204 | #where α is a free parameter that controls the strength of the penalty
205 | from sklearn.linear_model import Ridge
206 | model = make_pipeline(GaussianFeatures(30), Ridge(alpha=0.1))
207 | basis_plot(model, title='Ridge Regression')
208 | 
209 | #as alpha goes to zero, recover standard linear regression, as it goes to infinity, all model responses will be suppressed
210 | #ridge regression is very efficient computationally
211 | 
212 | #Lasso regularization (L1)
213 | #P = α∑ |θ_n|
214 | #due to geometric reasons lasso regression tends to favor sparse models where possible; that is, it preferentially sets model coefficients to exactly zero
215 | from sklearn.linear_model import Lasso
216 | model = make_pipeline(GaussianFeatures(30), Lasso(alpha=0.01))
217 | basis_plot(model, title='Lasso Regression')
218 | 
219 | 
220 | 
221 | import pandas as pd
222 | #read in data
223 | counts = pd.read_csv('KocPython2020/in-classMaterial/day13/FremontHourly.csv', index_col='Date', parse_dates=True)
224 | weather = pd.read_csv('KocPython2020/in-classMaterial/day13/SeaTacWeather.csv', index_col='DATE', parse_dates=True)
225 | 
226 | #get totals in day
227 | daily = counts.resample('d').sum()
228 | daily['Total'] = daily.sum(axis=1)
229 | daily = daily[['Total']] # remove other columns
230 | days = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
231 | for i in range(7):
232 |     daily[days[i]] = (daily.index.dayofweek == i).astype(float)
233 | 
234 | #include holidays
235 | from pandas.tseries.holiday import USFederalHolidayCalendar
236 | cal = USFederalHolidayCalendar()
237 | holidays = cal.holidays('2012', '2016')
238 | daily = daily.join(pd.Series(1, index=holidays, name='holiday'))
239 | daily['holiday'].fillna(0, inplace=True)
240 | 
241 | #hours of daylight
242 | def hours_of_daylight(date, axis=23.44, latitude=47.61):
243 |     """Compute the hours of daylight for the given date"""
244 |     days = (date - pd.datetime(2000, 12, 21)).days
245 |     m = (1. - np.tan(np.radians(latitude)) * np.tan(np.radians(axis) * np.cos(days * 2 * np.pi / 365.25)))
246 |     return 24. * np.degrees(np.arccos(1 - np.clip(m, 0, 2))) / 180.
247 | 
248 | #plot sunlight
249 | daily['daylight_hrs'] = list(map(hours_of_daylight, daily.index))
250 | daily[['daylight_hrs']].plot();
251 | 
252 | # temperatures are in 1/10 deg C; convert to C
253 | weather['TMIN'] /= 10
254 | weather['TMAX'] /= 10
255 | weather['Temp (C)'] = 0.5 * (weather['TMIN'] + weather['TMAX'])
256 | # precip is in 1/10 mm; convert to inches
257 | weather['PRCP'] /= 254
258 | weather['dry day'] = (weather['PRCP'] == 0).astype(int)
259 | daily = daily.join(weather[['PRCP', 'Temp (C)', 'dry day']])
260 | 
261 | daily['annual'] = (daily.index - daily.index[0]).days / 365.
262 | 
263 | daily.head()
264 | 
265 | #linear regression
266 | column_names = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun', 'holiday',
267 | 'daylight_hrs', 'PRCP', 'dry day', 'Temp (C)', 'annual']
268 | X = daily[column_names]
269 | y = daily['Total']
270 | model = LinearRegression(fit_intercept=False) #why do we set intercept to false?
271 | model.fit(X, y)
272 | daily['predicted'] = model.predict(X)
273 | 
274 | daily[['Total', 'predicted']].plot(alpha=0.5);
275 | 
276 | #how much does each feature contribute?
277 | params = pd.Series(model.coef_, index=X.columns)
278 | params
279 | 
280 | #measure uncertainty through bootstrap
281 | from sklearn.utils import resample
282 | np.random.seed(1)
283 | err = np.std([model.fit(*resample(X, y)).coef_ for i in range(1000)], 0)
284 | 
285 | print(pd.DataFrame({'effect': params.round(0), 'error': err.round(0)}))
286 | 
287 | #Our model is almost certainly missing some relevant information. For example, non‐linear effects (such as effects of precipitation and cold temperature) and nonlinear trends within each variable (such as disinclination to ride at very cold and very hot temperatures) cannot be accounted for in this model. Additionally, we have thrown away some of the finer-grained information (such as the difference between a rainy morning and a rainy afternoon), and we have ignored correlations between days (such as the possible effect of a rainy Tuesday on Wednesday’s numbers, or the effect of an unexpected sunny day after a streak of rainy days)
288 | 
289 | 
290 | #Support vector machines (SVMs)
291 | #both classification and regression
292 | import numpy as np
293 | import matplotlib.pyplot as plt
294 | from scipy import stats
295 | # use Seaborn plotting defaults
296 | import seaborn as sns; sns.set()
297 | 
298 | #discriminative classification: rather than modeling each class, we simply find a line or curve (in two dimensions) or manifold (in multiple dimensions) that divides the classes from each other
299 | 
300 | from sklearn.datasets.samples_generator import make_blobs
301 | X, y = make_blobs(n_samples=50, centers=2,
302 | random_state=0, cluster_std=0.60)
303 | plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='autumn');
304 | 
305 | #multiple lines can discriminate
306 | xfit = np.linspace(-1, 3.5)
307 | plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='autumn')
308 | plt.plot([0.6], [2.1], 'x', color='red', markeredgewidth=2, markersize=10) #one marked x will vary in prediction based on line choice
309 | for m, b in [(1, 0.65), (0.5, 1.6), (-0.2, 2.9)]:
310 |     plt.plot(xfit, m * xfit + b, '-k')
311 | 
312 | plt.xlim(-1, 3.5);
313 | 
314 | #rather than simply drawing a zero-width line between the classes, we can draw around each line a margin of some width, up to the nearest point
315 | 
316 | xfit = np.linspace(-1, 3.5)
317 | plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='autumn')
318 | 
319 | for m, b, d in [(1, 0.65, 0.33), (0.5, 1.6, 0.55), (-0.2, 2.9, 0.2)]:
320 |     yfit = m * xfit + b
321 |     plt.plot(xfit, yfit, '-k')
322 |     plt.fill_between(xfit, yfit - d, yfit + d, edgecolor='none', color='#AAAAAA', alpha=0.4)
323 |     
324 | plt.xlim(-1, 3.5);
325 | 
326 | #In support vector machines, the line that maximizes this margin is the one we will choose as the optimal model. Support vector machines are an example of such a maximum margin estimator
327 | 
328 | from sklearn.svm import SVC # "Support vector classifier"
329 | model = SVC(kernel='linear', C=1E10)
330 | model.fit(X, y)
331 | 
332 | #To better visualize what’s happening here, let’s create a quick convenience function that will plot SVM decision boundaries for us
333 | 
334 | def plot_svc_decision_function(model, ax=None, plot_support=True):
335 |     """Plot the decision function for a two-dimensional SVC"""
336 |     if ax is None:
337 |         ax = plt.gca()
338 |     xlim = ax.get_xlim()
339 |     ylim = ax.get_ylim()
340 | 
341 |     # create grid to evaluate model
342 |     x = np.linspace(xlim[0], xlim[1], 30)
343 |     y = np.linspace(ylim[0], ylim[1], 30)
344 |     Y, X = np.meshgrid(y, x)
345 |     xy = np.vstack([X.ravel(), Y.ravel()]).T
346 |     P = model.decision_function(xy).reshape(X.shape)
347 | 
348 |     # plot decision boundary and margins
349 |     ax.contour(X, Y, P, colors='k', levels=[-1, 0, 1], alpha=0.5, linestyles=['--', '-', '--'])
350 |     
351 |     # plot support vectors
352 |     if plot_support:
353 |         ax.scatter(model.support_vectors_[:, 0], model.support_vectors_[:, 1], s=300, linewidth=1, facecolors='none');
354 |     ax.set_xlim(xlim)
355 |     ax.set_ylim(ylim)
356 | 
357 | plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='autumn')
358 | plot_svc_decision_function(model);
359 | 
360 | #points touching line are the pivotal elements of this fit, and are known as the support vectors, and give the algorithm its name
361 | model.support_vectors_
362 | #notice that these points define the model, and new points will not necessarily change it
363 | 
364 | 
365 | #Beyond linear boundaries: Kernel SVM
366 | from sklearn.datasets.samples_generator import make_circles
367 | X, y = make_circles(100, factor=.1, noise=.1)
368 | clf = SVC(kernel='linear').fit(X, y)
369 | plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='autumn')
370 | plot_svc_decision_function(clf, plot_support=False);
371 | 
372 | #radial basis function
373 | r = np.exp(-(X ** 2).sum(1))
374 | #makes data trivially linear
375 | 
376 | 
377 | #One strategy to this end is to compute a basis function centered at every point in the dataset, and let the SVM algorithm sift through the results. This type of basis function transformation is known as a kernel transformation, as it is based on a similarity relationship (or kernel) between each pair of points
378 | 
379 | #kernel trick, a fit on kernel-transformed data can be done implicitly-that is, without ever building the full N - dimensional representation of the kernel projection
380 | 
381 | clf = SVC(kernel='rbf', C=1E6)
382 | clf.fit(X, y)
383 | 
384 | plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='autumn')
385 | plot_svc_decision_function(clf)
386 | plt.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1],
387 | s=300, lw=1, facecolors='none');
388 | 
389 | 
390 | #Tuning the SVM: Softening margins
391 | #messy data
392 | X, y = make_blobs(n_samples=100, centers=2,
393 | random_state=0, cluster_std=1.2)
394 | plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='autumn');
395 | 
396 | #The hardness of the margin is controlled by a tuning parameter, most often known as C . For very large C , the margin is hard, and points cannot lie in it. For smaller C , the margin is softer, and can grow to encompass some points.
397 | X, y = make_blobs(n_samples=100, centers=2, random_state=0, cluster_std=0.8)
398 | fig, ax = plt.subplots(1, 2, figsize=(16, 6))
399 | fig.subplots_adjust(left=0.0625, right=0.95, wspace=0.1)
400 | 
401 | for axi, C in zip(ax, [10.0, 0.1]):
402 |     model = SVC(kernel='linear', C=C).fit(X, y)
403 |     axi.scatter(X[:, 0], X[:, 1], c=y, s=50,   cmap='autumn')
404 |     plot_svc_decision_function(model, axi)
405 |     axi.scatter(model.support_vectors_[:, 0], model.support_vectors_[:, 1], s=300, lw=1, facecolors='none');
406 |     axi.set_title('C = {0:.1f}'.format(C), size=14)
407 | 
408 | #optimum value of C needs to be tuned through cross-validation
409 | 
410 | 
411 | 
412 | #example: face detection
413 | from sklearn.datasets import fetch_lfw_people
414 | faces = fetch_lfw_people(min_faces_per_person=60)
415 | print(faces.target_names)
416 | print(faces.images.shape)
417 | 
418 | fig, ax = plt.subplots(3, 5)
419 | for i, axi in enumerate(ax.flat):
420 |     axi.imshow(faces.images[i], cmap='bone')
421 |     axi.set(xticks=[], yticks=[], xlabel=faces.target_names[faces.target[i]])
422 | 
423 | 
424 | #use PCA to lower the dimensionality
425 | from sklearn.svm import SVC
426 | from sklearn.decomposition import PCA
427 | from sklearn.pipeline import make_pipeline
428 | pca = PCA(n_components=150, whiten=True, random_state=42, svd_solver = 'randomized')
429 | svc = SVC(kernel='rbf', class_weight='balanced')
430 | model = make_pipeline(pca, svc)
431 | 
432 | #split the data
433 | from sklearn.model_selection import train_test_split
434 | Xtrain, Xtest, ytrain, ytest = train_test_split(faces.data, faces.target, random_state=42)
435 | 
436 | #grid search - CV to emplore parameters C (margin of hardness) and gamma (size of radial basis function kernel)
437 | from sklearn.grid_search import GridSearchCV
438 | param_grid = {'svc__C': [1, 5, 10, 50], 'svc__gamma': [0.0001, 0.0005, 0.001, 0.005]}
439 | grid = GridSearchCV(model, param_grid)
440 | 
441 | #run the search and time it
442 | %time grid.fit(Xtrain, ytrain)
443 | print(grid.best_params_)
444 | 
445 | #predict
446 | model = grid.best_estimator_
447 | yfit = model.predict(Xtest)
448 | 
449 | fig, ax = plt.subplots(4, 6)
450 | for i, axi in enumerate(ax.flat):
451 |     axi.imshow(Xtest[i].reshape(62, 47), cmap='bone')
452 |     axi.set(xticks=[], yticks=[])
453 |     axi.set_ylabel(faces.target_names[yfit[i]].split()[-1],
454 |     color='black' if yfit[i] == ytest[i] else 'red')
455 |     
456 | fig.suptitle('Predicted Names; Incorrect Labels in Red', size=14);
457 | 
458 | #classification report
459 | from sklearn.metrics import classification_report
460 | print(classification_report(ytest, yfit, target_names=faces.target_names))
461 | 
462 | #confusion matrix
463 | from sklearn.metrics import confusion_matrix
464 | mat = confusion_matrix(ytest, yfit)
465 | sns.heatmap(mat.T, square=True, annot=True, fmt='d', cbar=False, xticklabels=faces.target_names, yticklabels=faces.target_names)
466 | plt.xlabel('true label')
467 | plt.ylabel('predicted label');
468 | 
469 | 
470 | #These methods are a powerful classification method for a number of reasons:
471 | # Their dependence on relatively few support vectors means that they are very compact models, and take up very little memory.
472 | # Once the model is trained, the prediction phase is very fast.
473 | # Because they are affected only by points near the margin, they work well with high-dimensional data—even data with more dimensions than samples, which is a challenging regime for other algorithms.
474 | # Their integration with kernel methods makes them very versatile, able to adapt to many types of data.
475 | 
476 | #However, SVMs have several disadvantages as well:
477 | # The scaling with the number of samples N is 0 N 3 at worst, or 0 N 2 for efficient implementations. For large numbers of training samples, this computational cost can be prohibitive.
478 | # The results are strongly dependent on a suitable choice for the softening parameter C . This must be carefully chosen via cross-validation, which can be expensive as datasets grow in size.
479 | # The results do not have a direct probabilistic interpretation. This can be estimated via an internal cross-validation (see the probability parameter of SVC ), but this extra estimation is costly.
480 | 
481 | 
482 | 
483 | 
484 | 


--------------------------------------------------------------------------------
/in-classMaterial/day16/GP1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day16/GP1.pdf


--------------------------------------------------------------------------------
/in-classMaterial/day17/GP2.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | def kernel(X1, X2, l=1.0, sigma_f=1.0):
  4 |     ''' Isotropic squared exponential kernel. Computes a covariance matrix from points in X1 and X2. Args: X1: Array of m points (m x d). X2: Array of n points (n x d). Returns: Covariance matrix (m x n). '''
  5 |     sqdist = np.sum(X1**2, 1).reshape(-1, 1) + np.sum(X2**2, 1) - 2 * np.dot(X1, X2.T)
  6 |     return sigma_f**2 * np.exp(-0.5 / l**2 * sqdist)
  7 | 
  8 | import matplotlib.pyplot as plt
  9 | 
 10 | from matplotlib import cm
 11 | from mpl_toolkits.mplot3d import Axes3D
 12 | 
 13 | def plot_gp(mu, cov, X, X_train=None, Y_train=None, samples=[]):
 14 |     X = X.ravel()
 15 |     mu = mu.ravel()
 16 |     uncertainty = 1.96 * np.sqrt(np.diag(cov))
 17 |     
 18 |     plt.fill_between(X, mu + uncertainty, mu - uncertainty, alpha=0.1)
 19 |     plt.plot(X, mu, label='Mean')
 20 |     for i, sample in enumerate(samples):
 21 |         plt.plot(X, sample, lw=1, ls='--', label=f'Sample {i+1}')
 22 |     if X_train is not None:
 23 |         plt.plot(X_train, Y_train, 'rx')
 24 |     plt.legend()
 25 | 
 26 | def plot_gp_2D(gx, gy, mu, X_train, Y_train, title, i):
 27 |     ax = plt.gcf().add_subplot(1, 2, i, projection='3d')
 28 |     ax.plot_surface(gx, gy, mu.reshape(gx.shape), cmap=cm.coolwarm, linewidth=0, alpha=0.2, antialiased=False)
 29 |     ax.scatter(X_train[:,0], X_train[:,1], Y_train, c=Y_train, cmap=cm.coolwarm)
 30 |     ax.set_title(title)
 31 | 
 32 | # Finite number of points
 33 | X = np.arange(-5, 5, 0.2).reshape(-1, 1)
 34 | 
 35 | # Mean and covariance of the prior
 36 | mu = np.zeros(X.shape)
 37 | cov = kernel(X, X)
 38 | 
 39 | # Draw three samples from the prior
 40 | samples = np.random.multivariate_normal(mu.ravel(), cov, 3)
 41 | 
 42 | # Plot GP mean, confidence interval and samples
 43 | plot_gp(mu, cov, X, samples=samples)
 44 | 
 45 | 
 46 | from numpy.linalg import inv
 47 | 
 48 | def posterior_predictive(X_s, X_train, Y_train, l=1.0, sigma_f=1.0, sigma_y=1e-8):
 49 |     ''' Computes the suffifient statistics of the GP posterior predictive distribution from m training data X_train and Y_train and n new inputs X_s. Args: X_s: New input locations (n x d). X_train: Training locations (m x d). Y_train: Training targets (m x 1). l: Kernel length parameter. sigma_f: Kernel vertical variation parameter. sigma_y: Noise parameter. Returns: Posterior mean vector (n x d) and covariance matrix (n x n). '''
 50 |     K = kernel(X_train, X_train, l, sigma_f) + sigma_y**2 * np.eye(len(X_train))
 51 |     K_s = kernel(X_train, X_s, l, sigma_f)
 52 |     K_ss = kernel(X_s, X_s, l, sigma_f) + 1e-8 * np.eye(len(X_s))
 53 |     K_inv = inv(K)
 54 |     
 55 |     # Equation (4)
 56 |     mu_s = K_s.T.dot(K_inv).dot(Y_train)
 57 | 
 58 |     # Equation (5)
 59 |     cov_s = K_ss - K_s.T.dot(K_inv).dot(K_s)
 60 |     
 61 |     return mu_s, cov_s
 62 | 
 63 | # Noise free training data
 64 | X_train = np.array([-4, -3, -2, -1, 1]).reshape(-1, 1)
 65 | Y_train = np.sin(X_train)
 66 | 
 67 | # Compute mean and covariance of the posterior predictive distribution
 68 | mu_s, cov_s = posterior_predictive(X, X_train, Y_train)
 69 | 
 70 | samples = np.random.multivariate_normal(mu_s.ravel(), cov_s, 3)
 71 | plot_gp(mu_s, cov_s, X, X_train=X_train, Y_train=Y_train, samples=samples)
 72 | 
 73 | noise = 0.4
 74 | 
 75 | # Noisy training data
 76 | X_train = np.arange(-3, 4, 1).reshape(-1, 1)
 77 | Y_train = np.sin(X_train) + noise * np.random.randn(*X_train.shape)
 78 | 
 79 | # Compute mean and covariance of the posterior predictive distribution
 80 | mu_s, cov_s = posterior_predictive(X, X_train, Y_train, sigma_y=noise)
 81 | 
 82 | samples = np.random.multivariate_normal(mu_s.ravel(), cov_s, 3)
 83 | plot_gp(mu_s, cov_s, X, X_train=X_train, Y_train=Y_train, samples=samples)
 84 | 
 85 | 
 86 | params = [
 87 |     (0.3, 1.0, 0.2),
 88 |     (3.0, 1.0, 0.2),
 89 |     (1.0, 0.3, 0.2),
 90 |     (1.0, 3.0, 0.2),
 91 |     (1.0, 1.0, 0.05),
 92 |     (1.0, 1.0, 1.5),
 93 | ]
 94 | 
 95 | plt.figure(figsize=(12, 5))
 96 | 
 97 | for i, (l, sigma_f, sigma_y) in enumerate(params):
 98 |     mu_s, cov_s = posterior_predictive(X, X_train, Y_train, l=l, 
 99 |                                        sigma_f=sigma_f, 
100 |                                        sigma_y=sigma_y)
101 |     plt.subplot(3, 2, i + 1)
102 |     plt.title(f'l = {l}, sigma_f = {sigma_f}, sigma_y = {sigma_y}')
103 |     plot_gp(mu_s, cov_s, X, X_train=X_train, Y_train=Y_train)
104 | 
105 | from numpy.linalg import cholesky
106 | from scipy.optimize import minimize
107 | 
108 | def nll_fn(X_train, Y_train, noise):
109 |     ''' Returns a function that computes the negative log-likelihood for training data X_train and Y_train and given noise level. Args: X_train: training locations (m x d). Y_train: training targets (m x 1). noise: known noise level of Y_train. Returns: Minimization objective. '''
110 |     def step(theta):
111 |         K = kernel(X_train, X_train, l=theta[0], sigma_f=theta[1]) + \
112 |             noise**2 * np.eye(len(X_train))
113 |         # Compute determinant via Cholesky decomposition
114 |         return np.sum(np.log(np.diagonal(cholesky(K)))) + \
115 |                0.5 * Y_train.T.dot(inv(K).dot(Y_train)) + \
116 |                0.5 * len(X_train) * np.log(2*np.pi)
117 |     return step
118 | 
119 | # Minimize the negative log-likelihood w.r.t. parameters l and sigma_f.
120 | # We should actually run the minimization several times with different
121 | # initializations to avoid local minima but this is skipped here for
122 | # simplicity.
123 | res = minimize(nll_fn(X_train, Y_train, noise), [1, 1], 
124 |                bounds=((1e-5, None), (1e-5, None)),
125 |                method='L-BFGS-B')
126 | 
127 | # Store the optimization results in global variables so that we can
128 | # compare it later with the results from other implementations.
129 | l_opt, sigma_f_opt = res.x
130 | l_opt, sigma_f_opt
131 | 
132 | # Compute the prosterior predictive statistics with optimized kernel parameters and plot the results
133 | mu_s, cov_s = posterior_predictive(X, X_train, Y_train, l=l_opt, sigma_f=sigma_f_opt, sigma_y=noise)
134 | plot_gp(mu_s, cov_s, X, X_train=X_train, Y_train=Y_train)
135 | 
136 | noise_2D = 0.1
137 | 
138 | rx, ry = np.arange(-5, 5, 0.3), np.arange(-5, 5, 0.3)
139 | gx, gy = np.meshgrid(rx, rx)
140 | 
141 | X_2D = np.c_[gx.ravel(), gy.ravel()]
142 | 
143 | X_2D_train = np.random.uniform(-4, 4, (100, 2))
144 | Y_2D_train = np.sin(0.5 * np.linalg.norm(X_2D_train, axis=1)) + \
145 |              noise_2D * np.random.randn(len(X_2D_train))
146 | 
147 | plt.figure(figsize=(14,7))
148 | 
149 | mu_s, _ = posterior_predictive(X_2D, X_2D_train, Y_2D_train, sigma_y=noise_2D)
150 | plot_gp_2D(gx, gy, mu_s, X_2D_train, Y_2D_train, 
151 |            f'Before parameter optimization: l={1.00} sigma_f={1.00}', 1)
152 | 
153 | res = minimize(nll_fn(X_2D_train, Y_2D_train, noise_2D), [1, 1], 
154 |                bounds=((1e-5, None), (1e-5, None)),
155 |                method='L-BFGS-B')
156 | 
157 | mu_s, _ = posterior_predictive(X_2D, X_2D_train, Y_2D_train, *res.x, sigma_y=noise_2D)
158 | plot_gp_2D(gx, gy, mu_s, X_2D_train, Y_2D_train,
159 |            f'After parameter optimization: l={res.x[0]:.2f} sigma_f={res.x[1]:.2f}', 2)
160 |            
161 | from sklearn.gaussian_process import GaussianProcessRegressor
162 | from sklearn.gaussian_process.kernels import ConstantKernel, RBF
163 | 
164 | rbf = ConstantKernel(1.0) * RBF(length_scale=1.0)
165 | gpr = GaussianProcessRegressor(kernel=rbf, alpha=noise**2)
166 | 
167 | # Reuse training data from previous 1D example
168 | gpr.fit(X_train, Y_train)
169 | 
170 | # Compute posterior predictive mean and covariance
171 | mu_s, cov_s = gpr.predict(X, return_cov=True)
172 | 
173 | # Obtain optimized kernel parameters
174 | l = gpr.kernel_.k2.get_params()['length_scale']
175 | sigma_f = np.sqrt(gpr.kernel_.k1.get_params()['constant_value'])
176 | 
177 | # Compare with previous results
178 | assert(np.isclose(l_opt, l))
179 | assert(np.isclose(np.round(sigma_f_opt,4), np.round(sigma_f,4)))
180 | 
181 | # Plot the results
182 | plot_gp(mu_s, cov_s, X, X_train=X_train, Y_train=Y_train)
183 | 
184 | import GPy
185 | 
186 | rbf = GPy.kern.RBF(input_dim=1, variance=1.0, lengthscale=1.0)
187 | gpr = GPy.models.GPRegression(X_train, Y_train, rbf)
188 | 
189 | # Fix the noise variance to known value
190 | gpr.Gaussian_noise.variance = noise**2
191 | gpr.Gaussian_noise.variance.fix()
192 | 
193 | # Run optimization
194 | gpr.optimize();
195 | 
196 | # Obtain optimized kernel parameters
197 | l = gpr.rbf.lengthscale.values[0]
198 | sigma_f = np.sqrt(gpr.rbf.variance.values[0])
199 | 
200 | # Compare with previous results
201 | assert(np.isclose(l_opt, l))
202 | assert(np.isclose(np.round(sigma_f_opt,4), np.round(sigma_f,4)))
203 | 
204 | # Plot the results with the built-in plot function
205 | gpr.plot();
206 | 
207 | from sklearn.gaussian_process import GaussianProcess
208 | from matplotlib import pyplot as pl
209 | 
210 | np.random.seed(1)
211 | 
212 | 
213 | def f(x):
214 |     """The function to predict."""
215 |     return x * np.sin(x)
216 | 
217 | #----------------------------------------------------------------------
218 | #  First the noiseless case
219 | X = np.atleast_2d([1., 3., 5., 6., 7., 8.]).T
220 | 
221 | # Observations
222 | y = f(X).ravel()
223 | 
224 | # Mesh the input space for evaluations of the real function, the prediction and
225 | # its MSE
226 | x = np.atleast_2d(np.linspace(0, 10, 1000)).T
227 | 
228 | # Instanciate a Gaussian Process model
229 | gp = GaussianProcess(corr='cubic', theta0=1e-2, thetaL=1e-4, thetaU=1e-1,
230 |                      random_start=100)
231 | 
232 | # Fit to data using Maximum Likelihood Estimation of the parameters
233 | gp.fit(X, y)
234 | 
235 | # Make the prediction on the meshed x-axis (ask for MSE as well)
236 | y_pred, MSE = gp.predict(x, eval_MSE=True)
237 | sigma = np.sqrt(MSE)
238 | 
239 | # Plot the function, the prediction and the 95% confidence interval based on
240 | # the MSE
241 | fig = pl.figure()
242 | pl.plot(x, f(x), 'r:', label=u'$f(x) = x\,\sin(x)$')
243 | pl.plot(X, y, 'r.', markersize=10, label=u'Observations')
244 | pl.plot(x, y_pred, 'b-', label=u'Prediction')
245 | pl.fill(np.concatenate([x, x[::-1]]),
246 |         np.concatenate([y_pred - 1.9600 * sigma,
247 |                        (y_pred + 1.9600 * sigma)[::-1]]),
248 |         alpha=.5, fc='b', ec='None', label='95% confidence interval')
249 | pl.xlabel('$x$')
250 | pl.ylabel('$f(x)$')
251 | pl.ylim(-10, 20)
252 | pl.legend(loc='upper left')
253 | 
254 | #----------------------------------------------------------------------
255 | # now the noisy case
256 | X = np.linspace(0.1, 9.9, 20)
257 | X = np.atleast_2d(X).T
258 | 
259 | # Observations and noise
260 | y = f(X).ravel()
261 | dy = 0.5 + 1.0 * np.random.random(y.shape)
262 | noise = np.random.normal(0, dy)
263 | y += noise
264 | 
265 | # Mesh the input space for evaluations of the real function, the prediction and
266 | # its MSE
267 | x = np.atleast_2d(np.linspace(0, 10, 1000)).T
268 | 
269 | # Instanciate a Gaussian Process model
270 | gp = GaussianProcess(corr='squared_exponential', theta0=1e-1,
271 |                      thetaL=1e-3, thetaU=1,
272 |                      nugget=(dy / y) ** 2,
273 |                      random_start=100)
274 | 
275 | # Fit to data using Maximum Likelihood Estimation of the parameters
276 | gp.fit(X, y)
277 | 
278 | # Make the prediction on the meshed x-axis (ask for MSE as well)
279 | y_pred, MSE = gp.predict(x, eval_MSE=True)
280 | sigma = np.sqrt(MSE)
281 | 
282 | # Plot the function, the prediction and the 95% confidence interval based on
283 | # the MSE
284 | fig = pl.figure()
285 | pl.plot(x, f(x), 'r:', label=u'$f(x) = x\,\sin(x)$')
286 | pl.errorbar(X.ravel(), y, dy, fmt='r.', markersize=10, label=u'Observations')
287 | pl.plot(x, y_pred, 'b-', label=u'Prediction')
288 | pl.fill(np.concatenate([x, x[::-1]]),
289 |         np.concatenate([y_pred - 1.9600 * sigma,
290 |                        (y_pred + 1.9600 * sigma)[::-1]]),
291 |         alpha=.5, fc='b', ec='None', label='95% confidence interval')
292 | pl.xlabel('$x$')
293 | pl.ylabel('$f(x)$')
294 | pl.ylim(-10, 20)
295 | pl.legend(loc='upper left')
296 | 
297 | pl.show()
298 | 
299 | 
300 | import pandas as pd
301 | import os
302 | os.chdir('KocPython2020/in-classMaterial/day17')
303 | tt = pd.read_csv('immSurvey.csv')
304 | tt.head()
305 | 
306 | alphas = tt.stanMeansNewSysPooled
307 | sample = tt.textToSend
308 | 
309 | from sklearn.feature_extraction.text import CountVectorizer
310 | vec = CountVectorizer()
311 | X = vec.fit_transform(sample)
312 | X
313 | 
314 | pd.DataFrame(X.toarray(), columns=vec.get_feature_names())
315 | 
316 | #down-weighting frequent words; term frequency–inverse document frequency (TF–IDF), which weights the word counts by a measure of how often they appear in the documents
317 | from sklearn.feature_extraction.text import TfidfVectorizer
318 | vec = TfidfVectorizer()
319 | X = vec.fit_transform(sample)
320 | pd.DataFrame(X.toarray(), columns=vec.get_feature_names())
321 | 
322 | from sklearn.cross_validation import train_test_split
323 | Xtrain, Xtest, ytrain, ytest = train_test_split(X, alphas,
324 | random_state=1)
325 | 
326 | rbf = ConstantKernel(1.0) * RBF(length_scale=1.0)
327 | gpr = GaussianProcessRegressor(kernel=rbf, alpha=1e-8)
328 | 
329 | gpr.fit(Xtrain.toarray(), ytrain)
330 | 
331 | # Compute posterior predictive mean and covariance
332 | mu_s, cov_s = gpr.predict(Xtest.toarray(), return_cov=True)
333 | 
334 | #test correlation between test and mus
335 | np.corrcoef(ytest, mu_s)
336 | 
337 | #how might we improve this?
338 | 
339 | 


--------------------------------------------------------------------------------
/in-classMaterial/day17/gp-fit.stan:
--------------------------------------------------------------------------------
 1 | data {
 2 |     int<lower=1> N;
 3 |     int<lower=1> K;
 4 |     int<lower=1> M;
 5 |     matrix[N,K] X;
 6 |     matrix[N,M] X_corr;
 7 |     vector[N] y;
 8 | }
 9 | parameters {
10 |     real<lower=0> nug;
11 |     real<lower=0> sig_sq;
12 |     vector<lower=0>[M] d1;
13 |     vector<lower=0>[M] d2;
14 |     vector[K] b;
15 | }
16 | model {
17 |     matrix[N,N] Sigma;
18 |     vector[N] mu;
19 |     matrix[N,K] Mu;
20 |     vector[M] d;
21 | 
22 |     for(m in 1:M){
23 |         d1[m] ~ gamma(1,20);
24 |         d2[m] ~ gamma(10,10);
25 |         d[m] = .5*(d1[m] + d2[m]);
26 |     }
27 |     for (i in 1:(N-1)) {
28 |     for (j in (i+1):N) {
29 |         vector[M] summand;
30 |         for(m in 1:M){
31 |             summand[m] = -pow(X_corr[i,m] - X_corr[j,m],2)/d[m];
32 |         }
33 |         Sigma[i,j] = exp(sum(summand));
34 |         Sigma[j,i] = Sigma[i,j];
35 |     }
36 | }
37 |     for (i in 1:N){
38 |         for(k in 1:K){
39 |             Mu[i,k] = X[i,k]*b[k];
40 |         }
41 |         mu[i]=sum(Mu[i,1:K]);
42 |     }
43 |     for (i in 1:N) Sigma[i,i] = 1 + nug; // + jitter
44 | 
45 |     sig_sq ~ inv_gamma(1,1);
46 | 
47 |     nug ~ exponential(1);
48 | 
49 |     b ~ normal(0,3);
50 |     y ~ multi_normal(mu,sig_sq*Sigma);
51 | }
52 | 
53 | 


--------------------------------------------------------------------------------
/in-classMaterial/day17/gp-pred.stan:
--------------------------------------------------------------------------------
 1 | data {
 2 | int<lower=1> N;
 3 | int<lower=1> zN;
 4 | int<lower=1> K;
 5 | int<lower=1> M;
 6 | matrix[N+zN,K] XZ;
 7 | matrix[N+zN,M] XZ_corr;
 8 | vector[N] y;
 9 | }
10 | parameters {
11 | real<lower=0> nug;
12 | real<lower=0> sig_sq;
13 | vector<lower=0>[M] d1;
14 | vector<lower=0>[M] d2;
15 | vector[K] b;
16 | vector[zN] z;
17 | }
18 | model {
19 | matrix[N+zN,N+zN] Sigma;
20 | vector[N+zN] mu;
21 | matrix[N+zN,K] Mu;
22 | vector[M] d;
23 | 
24 | vector[N+zN] yz;
25 | 
26 | for(m in 1:M){
27 | d1[m] ~ gamma(1,20);
28 | d2[m] ~ gamma(10,10);
29 | d[m] = .5*(d1[m] + d2[m]);
30 | }
31 | for (i in 1:(N+zN-1)) {
32 | for (j in (i+1):(N+zN)) {
33 | vector[M] summand;
34 | for(m in 1:M){
35 | summand[m] = -pow(XZ_corr[i,m] - XZ_corr[j,m],2)/d[m];
36 | }
37 | Sigma[i,j] = exp(sum(summand));
38 | Sigma[j,i] = Sigma[i,j];
39 | }
40 | }
41 | for (i in 1:(N+zN)){
42 | for(k in 1:K){
43 | Mu[i,k] = XZ[i,k]*b[k];
44 | }
45 | mu[i]=sum(Mu[i,1:K]);
46 | }
47 | for (i in 1:(N+zN))
48 | Sigma[i,i] = 1 + nug; // + jitter
49 | 
50 | sig_sq ~ inv_gamma(1,1);
51 | nug ~ exponential(1);
52 | 
53 | b ~ normal(0,3);
54 | 
55 | for(n in 1:N) yz[n] = y[n];
56 | for(n in 1:zN) yz[N+n] = z[n];
57 | 
58 | yz ~ multi_normal(mu,sig_sq*Sigma);
59 | 
60 | }
61 | 
62 | 


--------------------------------------------------------------------------------
/in-classMaterial/day18/NN1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day18/NN1.pdf


--------------------------------------------------------------------------------
/in-classMaterial/day18/NN2.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | 
  3 | import numpy as np
  4 | 
  5 | class Network(object):
  6 | 
  7 |     def __init__(self, sizes):
  8 |         """The list ``sizes`` contains the number of neurons in the
  9 |         respective layers of the network.  For example, if the list
 10 |         was [2, 3, 1] then it would be a three-layer network, with the
 11 |         first layer containing 2 neurons, the second layer 3 neurons,
 12 |         and the third layer 1 neuron.  The biases and weights for the
 13 |         network are initialized randomly, using a Gaussian
 14 |         distribution with mean 0, and variance 1.  Note that the first
 15 |         layer is assumed to be an input layer, and by convention we
 16 |         won't set any biases for those neurons, since biases are only
 17 |         ever used in computing the outputs from later layers."""
 18 |         self.num_layers = len(sizes)
 19 |         self.sizes = sizes
 20 |         self.biases = [np.random.randn(y, 1) for y in sizes[1:]]
 21 |         self.weights = [np.random.randn(y, x)
 22 |                         for x, y in zip(sizes[:-1], sizes[1:])]
 23 | 
 24 |     def feedforward(self, a):
 25 |         """Return the output of the network if ``a`` is input."""
 26 |         for b, w in zip(self.biases, self.weights):
 27 |             a = sigmoid(np.dot(w, a)+b)
 28 |         return a
 29 | 
 30 |     def SGD(self, training_data, epochs, mini_batch_size, eta,
 31 |             test_data=None):
 32 |         """Train the neural network using mini-batch stochastic
 33 |         gradient descent.  The ``training_data`` is a list of tuples
 34 |         ``(x, y)`` representing the training inputs and the desired
 35 |         outputs.  The other non-optional parameters are
 36 |         self-explanatory.  If ``test_data`` is provided then the
 37 |         network will be evaluated against the test data after each
 38 |         epoch, and partial progress printed out.  This is useful for
 39 |         tracking progress, but slows things down substantially."""
 40 |         if test_data: n_test = len(test_data)
 41 |         n = len(training_data)
 42 |         for j in range(epochs):
 43 |             random.shuffle(training_data)
 44 |             mini_batches = [
 45 |                 training_data[k:k+mini_batch_size]
 46 |                 for k in range(0, n, mini_batch_size)]
 47 |             for mini_batch in mini_batches:
 48 |                 self.update_mini_batch(mini_batch, eta)
 49 |             if test_data:
 50 |                 print("Epoch {0}: {1} / {2}".format(
 51 |                     j, self.evaluate(test_data), n_test))
 52 |             else:
 53 |                 print("Epoch {0} complete".format(j))
 54 | 
 55 |     def update_mini_batch(self, mini_batch, eta):
 56 |         """Update the network's weights and biases by applying
 57 |         gradient descent using backpropagation to a single mini batch.
 58 |         The ``mini_batch`` is a list of tuples ``(x, y)``, and ``eta``
 59 |         is the learning rate."""
 60 |         nabla_b = [np.zeros(b.shape) for b in self.biases]
 61 |         nabla_w = [np.zeros(w.shape) for w in self.weights]
 62 |         for x, y in mini_batch:
 63 |             delta_nabla_b, delta_nabla_w = self.backprop(x, y)
 64 |             nabla_b = [nb+dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
 65 |             nabla_w = [nw+dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
 66 |         self.weights = [w-(eta/len(mini_batch))*nw
 67 |                         for w, nw in zip(self.weights, nabla_w)]
 68 |         self.biases = [b-(eta/len(mini_batch))*nb
 69 |                        for b, nb in zip(self.biases, nabla_b)]
 70 | 
 71 |     def backprop(self, x, y):
 72 |         """Return a tuple ``(nabla_b, nabla_w)`` representing the
 73 |         gradient for the cost function C_x.  ``nabla_b`` and
 74 |         ``nabla_w`` are layer-by-layer lists of numpy arrays, similar
 75 |         to ``self.biases`` and ``self.weights``."""
 76 |         nabla_b = [np.zeros(b.shape) for b in self.biases]
 77 |         nabla_w = [np.zeros(w.shape) for w in self.weights]
 78 |         # feedforward
 79 |         activation = x
 80 |         activations = [x] # list to store all the activations, layer by layer
 81 |         zs = [] # list to store all the z vectors, layer by layer
 82 |         for b, w in zip(self.biases, self.weights):
 83 |             z = np.dot(w, activation)+b
 84 |             zs.append(z)
 85 |             activation = sigmoid(z)
 86 |             activations.append(activation)
 87 |         # backward pass
 88 |         delta = self.cost_derivative(activations[-1], y) * \
 89 |             sigmoid_prime(zs[-1])
 90 |         nabla_b[-1] = delta
 91 |         nabla_w[-1] = np.dot(delta, activations[-2].transpose())
 92 |         # Note that the variable l in the loop below is used a little
 93 |         # differently to the notation in Chapter 2 of the book.  Here,
 94 |         # l = 1 means the last layer of neurons, l = 2 is the
 95 |         # second-last layer, and so on.  It's a renumbering of the
 96 |         # scheme in the book, used here to take advantage of the fact
 97 |         # that Python can use negative indices in lists.
 98 |         for l in range(2, self.num_layers):
 99 |             z = zs[-l]
100 |             sp = sigmoid_prime(z)
101 |             delta = np.dot(self.weights[-l+1].transpose(), delta) * sp
102 |             nabla_b[-l] = delta
103 |             nabla_w[-l] = np.dot(delta, activations[-l-1].transpose())
104 |         return (nabla_b, nabla_w)
105 | 
106 |     def evaluate(self, test_data):
107 |         """Return the number of test inputs for which the neural
108 |         network outputs the correct result. Note that the neural
109 |         network's output is assumed to be the index of whichever
110 |         neuron in the final layer has the highest activation."""
111 |         test_results = [(np.argmax(self.feedforward(x)), y)
112 |                         for (x, y) in test_data]
113 |         return sum(int(x == y) for (x, y) in test_results)
114 | 
115 |     def cost_derivative(self, output_activations, y):
116 |         """Return the vector of partial derivatives \partial C_x /
117 |         \partial a for the output activations."""
118 |         return (output_activations-y)
119 | 
120 | #### Miscellaneous functions
121 | def sigmoid(z):
122 |     """The sigmoid function."""
123 |     return 1.0/(1.0+np.exp(-z))
124 | 
125 | def sigmoid_prime(z):
126 |     """Derivative of the sigmoid function."""
127 |     return sigmoid(z)*(1-sigmoid(z))
128 |     
129 | """
130 | mnist_loader
131 | ~~~~~~~~~~~~
132 | 
133 | A library to load the MNIST image data.  For details of the data
134 | structures that are returned, see the doc strings for ``load_data``
135 | and ``load_data_wrapper``.  In practice, ``load_data_wrapper`` is the
136 | function usually called by our neural network code.
137 | """
138 | 
139 | import pickle as cPickle
140 | import gzip
141 | 
142 | 
143 | def load_data():
144 |     """Return the MNIST data as a tuple containing the training data,
145 |     the validation data, and the test data.
146 | 
147 |     The ``training_data`` is returned as a tuple with two entries.
148 |     The first entry contains the actual training images.  This is a
149 |     numpy ndarray with 50,000 entries.  Each entry is, in turn, a
150 |     numpy ndarray with 784 values, representing the 28 * 28 = 784
151 |     pixels in a single MNIST image.
152 | 
153 |     The second entry in the ``training_data`` tuple is a numpy ndarray
154 |     containing 50,000 entries.  Those entries are just the digit
155 |     values (0...9) for the corresponding images contained in the first
156 |     entry of the tuple.
157 | 
158 |     The ``validation_data`` and ``test_data`` are similar, except
159 |     each contains only 10,000 images.
160 | 
161 |     This is a nice data format, but for use in neural networks it's
162 |     helpful to modify the format of the ``training_data`` a little.
163 |     That's done in the wrapper function ``load_data_wrapper()``, see
164 |     below.
165 |     """
166 |     with gzip.open('mnist.pkl.gz','rb') as ff :
167 |         u = cPickle._Unpickler( ff )
168 |         u.encoding = 'latin1'
169 |         train, val, test = u.load()
170 |     return (train, val, test)
171 | 
172 | def load_data_wrapper():
173 |     """Return a tuple containing ``(training_data, validation_data,
174 |     test_data)``. Based on ``load_data``, but the format is more
175 |     convenient for use in our implementation of neural networks.
176 | 
177 |     In particular, ``training_data`` is a list containing 50,000
178 |     2-tuples ``(x, y)``.  ``x`` is a 784-dimensional numpy.ndarray
179 |     containing the input image.  ``y`` is a 10-dimensional
180 |     numpy.ndarray representing the unit vector corresponding to the
181 |     correct digit for ``x``.
182 | 
183 |     ``validation_data`` and ``test_data`` are lists containing 10,000
184 |     2-tuples ``(x, y)``.  In each case, ``x`` is a 784-dimensional
185 |     numpy.ndarry containing the input image, and ``y`` is the
186 |     corresponding classification, i.e., the digit values (integers)
187 |     corresponding to ``x``.
188 | 
189 |     Obviously, this means we're using slightly different formats for
190 |     the training data and the validation / test data.  These formats
191 |     turn out to be the most convenient for use in our neural network
192 |     code."""
193 |     tr_d, va_d, te_d = load_data()
194 |     training_inputs = [np.reshape(x, (784, 1)) for x in tr_d[0]]
195 |     training_results = [vectorized_result(y) for y in tr_d[1]]
196 |     training_data = zip(training_inputs, training_results)
197 |     validation_inputs = [np.reshape(x, (784, 1)) for x in va_d[0]]
198 |     validation_data = zip(validation_inputs, va_d[1])
199 |     test_inputs = [np.reshape(x, (784, 1)) for x in te_d[0]]
200 |     test_data = zip(test_inputs, te_d[1])
201 |     return (training_data, validation_data, test_data)
202 | 
203 | def vectorized_result(j):
204 |     """Return a 10-dimensional unit vector with a 1.0 in the jth
205 |     position and zeroes elsewhere.  This is used to convert a digit
206 |     (0...9) into a corresponding desired output from the neural
207 |     network."""
208 |     e = np.zeros((10, 1))
209 |     e[j] = 1.0
210 |     return e
211 |     
212 | import os
213 | os.chdir('KocPython2020/in-classMaterial/day18')
214 | 
215 | training_data, validation_data, test_data = load_data_wrapper()    
216 | 
217 | training_data, validation_data, test_data = list(training_data), list(validation_data), list(test_data)
218 | 
219 | net = Network([784, 30, 10])
220 | 
221 | net.SGD(training_data, 30, 10, 3.0, test_data=test_data)
222 | 
223 | #TODO: change the number of layers, the number of neurons per layer, the SGD parameters, etc.
224 | 
225 | 
226 | import pandas as pd
227 | 
228 | # Location of dataset
229 | url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
230 | 
231 | # Assign column names to the dataset
232 | names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'Class']
233 | 
234 | # Read dataset to pandas dataframe
235 | irisdata = pd.read_csv(url, names=names)
236 | 
237 | irisdata.head()  
238 | 
239 | # Assign data from first four columns to X variable
240 | X = irisdata.iloc[:, 0:4]
241 | 
242 | # Assign data from first fifth columns to y variable
243 | y = irisdata.select_dtypes(include=[object]) 
244 | 
245 | y.head()
246 | 
247 | y.Class.unique()  
248 | 
249 | from sklearn import preprocessing  
250 | le = preprocessing.LabelEncoder()
251 | 
252 | y = y.apply(le.fit_transform)
253 | 
254 | from sklearn.model_selection import train_test_split  
255 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20)  
256 | 
257 | 
258 | from sklearn.preprocessing import StandardScaler  
259 | scaler = StandardScaler()  
260 | scaler.fit(X_train)
261 | 
262 | X_train = scaler.transform(X_train)  
263 | X_test = scaler.transform(X_test)
264 | 
265 | from sklearn.neural_network import MLPClassifier  
266 | mlp = MLPClassifier(hidden_layer_sizes=(10, 10, 10), max_iter=1000)  
267 | mlp.fit(X_train, y_train.values.ravel()) 
268 | 
269 | predictions = mlp.predict(X_test)  
270 | 
271 | from sklearn.metrics import classification_report, confusion_matrix  
272 | print(confusion_matrix(y_test,predictions))  
273 | print(classification_report(y_test,predictions)) 
274 | 
275 | #TODO: try to change the parameters to get a better score
276 | 
277 | import os
278 | os.chdir('KocPython2020/in-classMaterial/day18')
279 | 
280 | import pandas as pd
281 | wine = pd.read_csv('wine_data.csv', names = ["Cultivator", "Alchol", "Malic_Acid", "Ash", "Alcalinity_of_Ash", "Magnesium", "Total_phenols", "Falvanoids", "Nonflavanoid_phenols", "Proanthocyanins", "Color_intensity", "Hue", "OD280", "Proline"])
282 | 
283 | wine.head()
284 | 
285 | wine.describe().transpose()
286 | 
287 | wine.shape
288 | 
289 | X = wine.drop('Cultivator',axis=1)
290 | y = wine['Cultivator']
291 | 
292 | from sklearn.model_selection import train_test_split  
293 | X_train, X_test, y_train, y_test = train_test_split(X, y)
294 | 
295 | from sklearn.preprocessing import StandardScaler
296 | 
297 | scaler = StandardScaler()
298 | 
299 | # Fit only to the training data
300 | scaler.fit(X_train)
301 | 
302 | StandardScaler(copy=True, with_mean=True, with_std=True)
303 | 
304 | # Now apply the transformations to the data:
305 | X_train = scaler.transform(X_train)
306 | X_test = scaler.transform(X_test)
307 | 
308 | from sklearn.neural_network import MLPClassifier  
309 | mlp = MLPClassifier(hidden_layer_sizes=(13,13,13),max_iter=500)
310 | 
311 | mlp.fit(X_train,y_train)
312 | 
313 | predictions = mlp.predict(X_test)
314 | 
315 | from sklearn.metrics import classification_report, confusion_matrix
316 | 
317 | print(confusion_matrix(y_test,predictions))
318 | 
319 | print(classification_report(y_test,predictions))
320 | 
321 | #coefs_ is a list of weight matrices, where weight matrix at index i represents the weights between layer i and layer i+1.
322 | 
323 | #intercepts_ is a list of bias vectors, where the vector at index i represents the bias values added to layer i+1.
324 | 
325 | len(mlp.coefs_)
326 | 
327 | len(mlp.coefs_[0])
328 | 
329 | len(mlp.intercepts_[0])
330 | 
331 | 
332 | from sklearn.pipeline import make_pipeline
333 | 
334 | from matplotlib import pyplot as plt
335 | 
336 | %config InlineBackend.figure_format = 'retina' 
337 | plt.style.use('bmh')
338 | 
339 | def make_data():
340 |     N = 2000
341 |     X = 0.5*np.random.normal(size=N)+0.35
342 | 
343 |     Xt = 0.75*X-0.35
344 |     X = X.reshape((N,1))
345 | 
346 |     Y = -(8 * Xt**2 + 0.1*Xt + 0.1) + 0.05 * np.random.normal(size=N)
347 |     Y = np.exp(Y) + 0.05 * np.random.normal(size=N)
348 |     Y /= max(np.abs(Y))
349 |     return X, Y
350 | 
351 | np.random.seed(0)
352 | X, Y = make_data()
353 | 
354 | from sklearn.metrics import mean_squared_error, r2_score
355 | 
356 | Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, Y, test_size=0.5, random_state=0)
357 | 
358 | plt.plot(Xtest[:,0], Ytest, '.');
359 | 
360 | from sklearn.linear_model import Ridge
361 | 
362 | ridge = Ridge()
363 | ridge.fit(Xtrain, Ytrain)
364 | 
365 | Yguess = ridge.predict(Xtest)
366 | 
367 | plt.plot(Xtest[:,0], Ytest, '.')
368 | plt.plot(Xtest[:,0], Yguess, 'r.')
369 | 
370 | mean_squared_error(Ytest, Yguess), r2_score(Ytest, Yguess)
371 | 
372 | from sklearn.neural_network import MLPRegressor
373 | 
374 | mlp = MLPRegressor(random_state=0, activation='relu', hidden_layer_sizes=16)
375 | 
376 | mlp.fit(Xtrain, Ytrain)
377 | 
378 | Yguess = mlp.predict(Xtest)
379 | 
380 | plt.plot(Xtest[:,0], Ytest, '.')
381 | plt.plot(Xtest[:,0], Yguess, 'r.')
382 | 
383 | mean_squared_error(Ytest, Yguess), r2_score(Ytest, Yguess)
384 | 
385 | mlp = MLPRegressor(random_state=0, activation='tanh', hidden_layer_sizes=(16,8))
386 | 
387 | mlp.fit(Xtrain, Ytrain)
388 | 
389 | Yguess = mlp.predict(Xtest)
390 | 
391 | plt.plot(Xtest[:,0], Ytest, '.')
392 | plt.plot(Xtest[:,0], Yguess, 'r.')
393 | 
394 | mean_squared_error(Ytest, Yguess), r2_score(Ytest, Yguess)
395 | 
396 | 
397 | 
398 | from sklearn.ensemble import RandomForestRegressor
399 | from sklearn.metrics import mean_squared_error
400 | plt.style.use('seaborn-poster')
401 | 
402 | np.random.seed(0)
403 | x = 10 * np.random.rand(100)
404 | 
405 | def model(x, sigma=0.3):
406 |     fast_oscillation = np.sin(5 * x)
407 |     slow_oscillation = np.sin(0.5 * x)
408 |     noise = sigma * np.random.randn(len(x))
409 | 
410 |     return slow_oscillation + fast_oscillation + noise
411 | 
412 | plt.figure(figsize = (12,10))
413 | y = model(x)
414 | plt.errorbar(x, y, 0.3, fmt='o')
415 | 
416 | xfit = np.linspace(0, 10, 1000)
417 | 
418 | # fit the model and get the estimation for each data points
419 | yfit = RandomForestRegressor(100, random_state=42).fit(x[:, None], y).predict(xfit[:, None])
420 | ytrue = model(xfit, 0)
421 | 
422 | plt.figure(figsize = (12,10))
423 | plt.errorbar(x, y, 0.3, fmt='o')
424 | plt.plot(xfit, yfit, '-r', label = 'predicted', zorder = 10)
425 | plt.plot(xfit, ytrue, '-k', alpha=0.5, label = 'true model', zorder = 10)
426 | plt.legend()
427 | 
428 | mse = mean_squared_error(ytrue, yfit)
429 | print(mse)
430 | 
431 | mlp = MLPRegressor(hidden_layer_sizes=(200,200,200), max_iter = 2000, solver='lbfgs', \
432 |                    alpha=0.01, activation = 'tanh', random_state = 8)
433 | 
434 | yfit = mlp.fit(x[:, None], y).predict(xfit[:, None])
435 | 
436 | plt.figure(figsize = (12,10))
437 | plt.errorbar(x, y, 0.3, fmt='o')
438 | plt.plot(xfit, yfit, '-r', label = 'predicted', zorder = 10)
439 | plt.plot(xfit, ytrue, '-k', alpha=0.5, label = 'true model', zorder = 10)
440 | plt.legend()
441 | 
442 | mse = mean_squared_error(ytrue, yfit)
443 | print(mse)
444 | 
445 | 
446 | 
447 | from sklearn.svm import SVR
448 | 
449 | # define your model
450 | svr = SVR(C=1000)
451 | 
452 | # get the estimation from the model
453 | yfit = svr.fit(x[:, None], y).predict(xfit[:, None])
454 | 
455 | # plot the results as above
456 | plt.figure(figsize = (12,10))
457 | plt.errorbar(x, y, 0.3, fmt='o')
458 | plt.plot(xfit, yfit, '-r', label = 'predicted', zorder = 10)
459 | plt.plot(xfit, ytrue, '-k', alpha=0.5, label = 'true model', zorder = 10)
460 | plt.legend()
461 | 
462 | mse = mean_squared_error(ytrue, yfit)
463 | print(mse)
464 | 
465 | 
466 | tt = pd.read_csv('../day17/immSurvey.csv')
467 | tt.head()
468 | 
469 | alphas = tt.stanMeansNewSysPooled
470 | sample = tt.textToSend
471 | 
472 | from sklearn.feature_extraction.text import CountVectorizer
473 | vec = CountVectorizer()
474 | X = vec.fit_transform(sample)
475 | X
476 | 
477 | pd.DataFrame(X.toarray(), columns=vec.get_feature_names())
478 | 
479 | #down-weighting frequent words; term frequency–inverse document frequency (TF–IDF), which weights the word counts by a measure of how often they appear in the documents
480 | from sklearn.feature_extraction.text import TfidfVectorizer
481 | vec = TfidfVectorizer()
482 | X = vec.fit_transform(sample)
483 | pd.DataFrame(X.toarray(), columns=vec.get_feature_names())
484 | 
485 | Xtrain, Xtest, ytrain, ytest = train_test_split(X, alphas,
486 | random_state=1)
487 | 
488 | mlp = MLPRegressor(random_state=0, activation='tanh', hidden_layer_sizes=(100,50))
489 | 
490 | mlp.fit(Xtrain, ytrain)
491 | 
492 | yguess = mlp.predict(Xtest)
493 | 
494 | np.corrcoef(ytest, yguess)
495 | 
496 | mlp = MLPRegressor(random_state=0, activation='tanh', hidden_layer_sizes=(1000,500))
497 | 
498 | mlp.fit(Xtrain, ytrain)
499 | 
500 | yguess = mlp.predict(Xtest)
501 | 
502 | np.corrcoef(ytest, yguess)
503 | 
504 | mlp = MLPRegressor(random_state=0, activation='tanh', hidden_layer_sizes=(50,25))
505 | 
506 | mlp.fit(Xtrain, ytrain)
507 | 
508 | yguess = mlp.predict(Xtest)
509 | 
510 | np.corrcoef(ytest, yguess)
511 | 
512 | mlp = MLPRegressor(random_state=0, activation='tanh', hidden_layer_sizes=(20,10))
513 | 
514 | mlp.fit(Xtrain, ytrain)
515 | 
516 | yguess = mlp.predict(Xtest)
517 | 
518 | np.corrcoef(ytest, yguess)
519 | 
520 | mlp = MLPRegressor(random_state=0, activation='tanh', hidden_layer_sizes=(10,5))
521 | 
522 | mlp.fit(Xtrain, ytrain)
523 | 
524 | yguess = mlp.predict(Xtest)
525 | 
526 | np.corrcoef(ytest, yguess)
527 | 
528 | #TODO: play around with number of layers, layer sizes, different activations, etc. see if you can do better
529 | 


--------------------------------------------------------------------------------
/in-classMaterial/day18/mnist.pkl.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day18/mnist.pkl.gz


--------------------------------------------------------------------------------
/in-classMaterial/day18/wine_data.csv:
--------------------------------------------------------------------------------
  1 | 1,14.23,1.71,2.43,15.6,127,2.8,3.06,.28,2.29,5.64,1.04,3.92,1065
  2 | 1,13.2,1.78,2.14,11.2,100,2.65,2.76,.26,1.28,4.38,1.05,3.4,1050
  3 | 1,13.16,2.36,2.67,18.6,101,2.8,3.24,.3,2.81,5.68,1.03,3.17,1185
  4 | 1,14.37,1.95,2.5,16.8,113,3.85,3.49,.24,2.18,7.8,.86,3.45,1480
  5 | 1,13.24,2.59,2.87,21,118,2.8,2.69,.39,1.82,4.32,1.04,2.93,735
  6 | 1,14.2,1.76,2.45,15.2,112,3.27,3.39,.34,1.97,6.75,1.05,2.85,1450
  7 | 1,14.39,1.87,2.45,14.6,96,2.5,2.52,.3,1.98,5.25,1.02,3.58,1290
  8 | 1,14.06,2.15,2.61,17.6,121,2.6,2.51,.31,1.25,5.05,1.06,3.58,1295
  9 | 1,14.83,1.64,2.17,14,97,2.8,2.98,.29,1.98,5.2,1.08,2.85,1045
 10 | 1,13.86,1.35,2.27,16,98,2.98,3.15,.22,1.85,7.22,1.01,3.55,1045
 11 | 1,14.1,2.16,2.3,18,105,2.95,3.32,.22,2.38,5.75,1.25,3.17,1510
 12 | 1,14.12,1.48,2.32,16.8,95,2.2,2.43,.26,1.57,5,1.17,2.82,1280
 13 | 1,13.75,1.73,2.41,16,89,2.6,2.76,.29,1.81,5.6,1.15,2.9,1320
 14 | 1,14.75,1.73,2.39,11.4,91,3.1,3.69,.43,2.81,5.4,1.25,2.73,1150
 15 | 1,14.38,1.87,2.38,12,102,3.3,3.64,.29,2.96,7.5,1.2,3,1547
 16 | 1,13.63,1.81,2.7,17.2,112,2.85,2.91,.3,1.46,7.3,1.28,2.88,1310
 17 | 1,14.3,1.92,2.72,20,120,2.8,3.14,.33,1.97,6.2,1.07,2.65,1280
 18 | 1,13.83,1.57,2.62,20,115,2.95,3.4,.4,1.72,6.6,1.13,2.57,1130
 19 | 1,14.19,1.59,2.48,16.5,108,3.3,3.93,.32,1.86,8.7,1.23,2.82,1680
 20 | 1,13.64,3.1,2.56,15.2,116,2.7,3.03,.17,1.66,5.1,.96,3.36,845
 21 | 1,14.06,1.63,2.28,16,126,3,3.17,.24,2.1,5.65,1.09,3.71,780
 22 | 1,12.93,3.8,2.65,18.6,102,2.41,2.41,.25,1.98,4.5,1.03,3.52,770
 23 | 1,13.71,1.86,2.36,16.6,101,2.61,2.88,.27,1.69,3.8,1.11,4,1035
 24 | 1,12.85,1.6,2.52,17.8,95,2.48,2.37,.26,1.46,3.93,1.09,3.63,1015
 25 | 1,13.5,1.81,2.61,20,96,2.53,2.61,.28,1.66,3.52,1.12,3.82,845
 26 | 1,13.05,2.05,3.22,25,124,2.63,2.68,.47,1.92,3.58,1.13,3.2,830
 27 | 1,13.39,1.77,2.62,16.1,93,2.85,2.94,.34,1.45,4.8,.92,3.22,1195
 28 | 1,13.3,1.72,2.14,17,94,2.4,2.19,.27,1.35,3.95,1.02,2.77,1285
 29 | 1,13.87,1.9,2.8,19.4,107,2.95,2.97,.37,1.76,4.5,1.25,3.4,915
 30 | 1,14.02,1.68,2.21,16,96,2.65,2.33,.26,1.98,4.7,1.04,3.59,1035
 31 | 1,13.73,1.5,2.7,22.5,101,3,3.25,.29,2.38,5.7,1.19,2.71,1285
 32 | 1,13.58,1.66,2.36,19.1,106,2.86,3.19,.22,1.95,6.9,1.09,2.88,1515
 33 | 1,13.68,1.83,2.36,17.2,104,2.42,2.69,.42,1.97,3.84,1.23,2.87,990
 34 | 1,13.76,1.53,2.7,19.5,132,2.95,2.74,.5,1.35,5.4,1.25,3,1235
 35 | 1,13.51,1.8,2.65,19,110,2.35,2.53,.29,1.54,4.2,1.1,2.87,1095
 36 | 1,13.48,1.81,2.41,20.5,100,2.7,2.98,.26,1.86,5.1,1.04,3.47,920
 37 | 1,13.28,1.64,2.84,15.5,110,2.6,2.68,.34,1.36,4.6,1.09,2.78,880
 38 | 1,13.05,1.65,2.55,18,98,2.45,2.43,.29,1.44,4.25,1.12,2.51,1105
 39 | 1,13.07,1.5,2.1,15.5,98,2.4,2.64,.28,1.37,3.7,1.18,2.69,1020
 40 | 1,14.22,3.99,2.51,13.2,128,3,3.04,.2,2.08,5.1,.89,3.53,760
 41 | 1,13.56,1.71,2.31,16.2,117,3.15,3.29,.34,2.34,6.13,.95,3.38,795
 42 | 1,13.41,3.84,2.12,18.8,90,2.45,2.68,.27,1.48,4.28,.91,3,1035
 43 | 1,13.88,1.89,2.59,15,101,3.25,3.56,.17,1.7,5.43,.88,3.56,1095
 44 | 1,13.24,3.98,2.29,17.5,103,2.64,2.63,.32,1.66,4.36,.82,3,680
 45 | 1,13.05,1.77,2.1,17,107,3,3,.28,2.03,5.04,.88,3.35,885
 46 | 1,14.21,4.04,2.44,18.9,111,2.85,2.65,.3,1.25,5.24,.87,3.33,1080
 47 | 1,14.38,3.59,2.28,16,102,3.25,3.17,.27,2.19,4.9,1.04,3.44,1065
 48 | 1,13.9,1.68,2.12,16,101,3.1,3.39,.21,2.14,6.1,.91,3.33,985
 49 | 1,14.1,2.02,2.4,18.8,103,2.75,2.92,.32,2.38,6.2,1.07,2.75,1060
 50 | 1,13.94,1.73,2.27,17.4,108,2.88,3.54,.32,2.08,8.90,1.12,3.1,1260
 51 | 1,13.05,1.73,2.04,12.4,92,2.72,3.27,.17,2.91,7.2,1.12,2.91,1150
 52 | 1,13.83,1.65,2.6,17.2,94,2.45,2.99,.22,2.29,5.6,1.24,3.37,1265
 53 | 1,13.82,1.75,2.42,14,111,3.88,3.74,.32,1.87,7.05,1.01,3.26,1190
 54 | 1,13.77,1.9,2.68,17.1,115,3,2.79,.39,1.68,6.3,1.13,2.93,1375
 55 | 1,13.74,1.67,2.25,16.4,118,2.6,2.9,.21,1.62,5.85,.92,3.2,1060
 56 | 1,13.56,1.73,2.46,20.5,116,2.96,2.78,.2,2.45,6.25,.98,3.03,1120
 57 | 1,14.22,1.7,2.3,16.3,118,3.2,3,.26,2.03,6.38,.94,3.31,970
 58 | 1,13.29,1.97,2.68,16.8,102,3,3.23,.31,1.66,6,1.07,2.84,1270
 59 | 1,13.72,1.43,2.5,16.7,108,3.4,3.67,.19,2.04,6.8,.89,2.87,1285
 60 | 2,12.37,.94,1.36,10.6,88,1.98,.57,.28,.42,1.95,1.05,1.82,520
 61 | 2,12.33,1.1,2.28,16,101,2.05,1.09,.63,.41,3.27,1.25,1.67,680
 62 | 2,12.64,1.36,2.02,16.8,100,2.02,1.41,.53,.62,5.75,.98,1.59,450
 63 | 2,13.67,1.25,1.92,18,94,2.1,1.79,.32,.73,3.8,1.23,2.46,630
 64 | 2,12.37,1.13,2.16,19,87,3.5,3.1,.19,1.87,4.45,1.22,2.87,420
 65 | 2,12.17,1.45,2.53,19,104,1.89,1.75,.45,1.03,2.95,1.45,2.23,355
 66 | 2,12.37,1.21,2.56,18.1,98,2.42,2.65,.37,2.08,4.6,1.19,2.3,678
 67 | 2,13.11,1.01,1.7,15,78,2.98,3.18,.26,2.28,5.3,1.12,3.18,502
 68 | 2,12.37,1.17,1.92,19.6,78,2.11,2,.27,1.04,4.68,1.12,3.48,510
 69 | 2,13.34,.94,2.36,17,110,2.53,1.3,.55,.42,3.17,1.02,1.93,750
 70 | 2,12.21,1.19,1.75,16.8,151,1.85,1.28,.14,2.5,2.85,1.28,3.07,718
 71 | 2,12.29,1.61,2.21,20.4,103,1.1,1.02,.37,1.46,3.05,.906,1.82,870
 72 | 2,13.86,1.51,2.67,25,86,2.95,2.86,.21,1.87,3.38,1.36,3.16,410
 73 | 2,13.49,1.66,2.24,24,87,1.88,1.84,.27,1.03,3.74,.98,2.78,472
 74 | 2,12.99,1.67,2.6,30,139,3.3,2.89,.21,1.96,3.35,1.31,3.5,985
 75 | 2,11.96,1.09,2.3,21,101,3.38,2.14,.13,1.65,3.21,.99,3.13,886
 76 | 2,11.66,1.88,1.92,16,97,1.61,1.57,.34,1.15,3.8,1.23,2.14,428
 77 | 2,13.03,.9,1.71,16,86,1.95,2.03,.24,1.46,4.6,1.19,2.48,392
 78 | 2,11.84,2.89,2.23,18,112,1.72,1.32,.43,.95,2.65,.96,2.52,500
 79 | 2,12.33,.99,1.95,14.8,136,1.9,1.85,.35,2.76,3.4,1.06,2.31,750
 80 | 2,12.7,3.87,2.4,23,101,2.83,2.55,.43,1.95,2.57,1.19,3.13,463
 81 | 2,12,.92,2,19,86,2.42,2.26,.3,1.43,2.5,1.38,3.12,278
 82 | 2,12.72,1.81,2.2,18.8,86,2.2,2.53,.26,1.77,3.9,1.16,3.14,714
 83 | 2,12.08,1.13,2.51,24,78,2,1.58,.4,1.4,2.2,1.31,2.72,630
 84 | 2,13.05,3.86,2.32,22.5,85,1.65,1.59,.61,1.62,4.8,.84,2.01,515
 85 | 2,11.84,.89,2.58,18,94,2.2,2.21,.22,2.35,3.05,.79,3.08,520
 86 | 2,12.67,.98,2.24,18,99,2.2,1.94,.3,1.46,2.62,1.23,3.16,450
 87 | 2,12.16,1.61,2.31,22.8,90,1.78,1.69,.43,1.56,2.45,1.33,2.26,495
 88 | 2,11.65,1.67,2.62,26,88,1.92,1.61,.4,1.34,2.6,1.36,3.21,562
 89 | 2,11.64,2.06,2.46,21.6,84,1.95,1.69,.48,1.35,2.8,1,2.75,680
 90 | 2,12.08,1.33,2.3,23.6,70,2.2,1.59,.42,1.38,1.74,1.07,3.21,625
 91 | 2,12.08,1.83,2.32,18.5,81,1.6,1.5,.52,1.64,2.4,1.08,2.27,480
 92 | 2,12,1.51,2.42,22,86,1.45,1.25,.5,1.63,3.6,1.05,2.65,450
 93 | 2,12.69,1.53,2.26,20.7,80,1.38,1.46,.58,1.62,3.05,.96,2.06,495
 94 | 2,12.29,2.83,2.22,18,88,2.45,2.25,.25,1.99,2.15,1.15,3.3,290
 95 | 2,11.62,1.99,2.28,18,98,3.02,2.26,.17,1.35,3.25,1.16,2.96,345
 96 | 2,12.47,1.52,2.2,19,162,2.5,2.27,.32,3.28,2.6,1.16,2.63,937
 97 | 2,11.81,2.12,2.74,21.5,134,1.6,.99,.14,1.56,2.5,.95,2.26,625
 98 | 2,12.29,1.41,1.98,16,85,2.55,2.5,.29,1.77,2.9,1.23,2.74,428
 99 | 2,12.37,1.07,2.1,18.5,88,3.52,3.75,.24,1.95,4.5,1.04,2.77,660
100 | 2,12.29,3.17,2.21,18,88,2.85,2.99,.45,2.81,2.3,1.42,2.83,406
101 | 2,12.08,2.08,1.7,17.5,97,2.23,2.17,.26,1.4,3.3,1.27,2.96,710
102 | 2,12.6,1.34,1.9,18.5,88,1.45,1.36,.29,1.35,2.45,1.04,2.77,562
103 | 2,12.34,2.45,2.46,21,98,2.56,2.11,.34,1.31,2.8,.8,3.38,438
104 | 2,11.82,1.72,1.88,19.5,86,2.5,1.64,.37,1.42,2.06,.94,2.44,415
105 | 2,12.51,1.73,1.98,20.5,85,2.2,1.92,.32,1.48,2.94,1.04,3.57,672
106 | 2,12.42,2.55,2.27,22,90,1.68,1.84,.66,1.42,2.7,.86,3.3,315
107 | 2,12.25,1.73,2.12,19,80,1.65,2.03,.37,1.63,3.4,1,3.17,510
108 | 2,12.72,1.75,2.28,22.5,84,1.38,1.76,.48,1.63,3.3,.88,2.42,488
109 | 2,12.22,1.29,1.94,19,92,2.36,2.04,.39,2.08,2.7,.86,3.02,312
110 | 2,11.61,1.35,2.7,20,94,2.74,2.92,.29,2.49,2.65,.96,3.26,680
111 | 2,11.46,3.74,1.82,19.5,107,3.18,2.58,.24,3.58,2.9,.75,2.81,562
112 | 2,12.52,2.43,2.17,21,88,2.55,2.27,.26,1.22,2,.9,2.78,325
113 | 2,11.76,2.68,2.92,20,103,1.75,2.03,.6,1.05,3.8,1.23,2.5,607
114 | 2,11.41,.74,2.5,21,88,2.48,2.01,.42,1.44,3.08,1.1,2.31,434
115 | 2,12.08,1.39,2.5,22.5,84,2.56,2.29,.43,1.04,2.9,.93,3.19,385
116 | 2,11.03,1.51,2.2,21.5,85,2.46,2.17,.52,2.01,1.9,1.71,2.87,407
117 | 2,11.82,1.47,1.99,20.8,86,1.98,1.6,.3,1.53,1.95,.95,3.33,495
118 | 2,12.42,1.61,2.19,22.5,108,2,2.09,.34,1.61,2.06,1.06,2.96,345
119 | 2,12.77,3.43,1.98,16,80,1.63,1.25,.43,.83,3.4,.7,2.12,372
120 | 2,12,3.43,2,19,87,2,1.64,.37,1.87,1.28,.93,3.05,564
121 | 2,11.45,2.4,2.42,20,96,2.9,2.79,.32,1.83,3.25,.8,3.39,625
122 | 2,11.56,2.05,3.23,28.5,119,3.18,5.08,.47,1.87,6,.93,3.69,465
123 | 2,12.42,4.43,2.73,26.5,102,2.2,2.13,.43,1.71,2.08,.92,3.12,365
124 | 2,13.05,5.8,2.13,21.5,86,2.62,2.65,.3,2.01,2.6,.73,3.1,380
125 | 2,11.87,4.31,2.39,21,82,2.86,3.03,.21,2.91,2.8,.75,3.64,380
126 | 2,12.07,2.16,2.17,21,85,2.6,2.65,.37,1.35,2.76,.86,3.28,378
127 | 2,12.43,1.53,2.29,21.5,86,2.74,3.15,.39,1.77,3.94,.69,2.84,352
128 | 2,11.79,2.13,2.78,28.5,92,2.13,2.24,.58,1.76,3,.97,2.44,466
129 | 2,12.37,1.63,2.3,24.5,88,2.22,2.45,.4,1.9,2.12,.89,2.78,342
130 | 2,12.04,4.3,2.38,22,80,2.1,1.75,.42,1.35,2.6,.79,2.57,580
131 | 3,12.86,1.35,2.32,18,122,1.51,1.25,.21,.94,4.1,.76,1.29,630
132 | 3,12.88,2.99,2.4,20,104,1.3,1.22,.24,.83,5.4,.74,1.42,530
133 | 3,12.81,2.31,2.4,24,98,1.15,1.09,.27,.83,5.7,.66,1.36,560
134 | 3,12.7,3.55,2.36,21.5,106,1.7,1.2,.17,.84,5,.78,1.29,600
135 | 3,12.51,1.24,2.25,17.5,85,2,.58,.6,1.25,5.45,.75,1.51,650
136 | 3,12.6,2.46,2.2,18.5,94,1.62,.66,.63,.94,7.1,.73,1.58,695
137 | 3,12.25,4.72,2.54,21,89,1.38,.47,.53,.8,3.85,.75,1.27,720
138 | 3,12.53,5.51,2.64,25,96,1.79,.6,.63,1.1,5,.82,1.69,515
139 | 3,13.49,3.59,2.19,19.5,88,1.62,.48,.58,.88,5.7,.81,1.82,580
140 | 3,12.84,2.96,2.61,24,101,2.32,.6,.53,.81,4.92,.89,2.15,590
141 | 3,12.93,2.81,2.7,21,96,1.54,.5,.53,.75,4.6,.77,2.31,600
142 | 3,13.36,2.56,2.35,20,89,1.4,.5,.37,.64,5.6,.7,2.47,780
143 | 3,13.52,3.17,2.72,23.5,97,1.55,.52,.5,.55,4.35,.89,2.06,520
144 | 3,13.62,4.95,2.35,20,92,2,.8,.47,1.02,4.4,.91,2.05,550
145 | 3,12.25,3.88,2.2,18.5,112,1.38,.78,.29,1.14,8.21,.65,2,855
146 | 3,13.16,3.57,2.15,21,102,1.5,.55,.43,1.3,4,.6,1.68,830
147 | 3,13.88,5.04,2.23,20,80,.98,.34,.4,.68,4.9,.58,1.33,415
148 | 3,12.87,4.61,2.48,21.5,86,1.7,.65,.47,.86,7.65,.54,1.86,625
149 | 3,13.32,3.24,2.38,21.5,92,1.93,.76,.45,1.25,8.42,.55,1.62,650
150 | 3,13.08,3.9,2.36,21.5,113,1.41,1.39,.34,1.14,9.40,.57,1.33,550
151 | 3,13.5,3.12,2.62,24,123,1.4,1.57,.22,1.25,8.60,.59,1.3,500
152 | 3,12.79,2.67,2.48,22,112,1.48,1.36,.24,1.26,10.8,.48,1.47,480
153 | 3,13.11,1.9,2.75,25.5,116,2.2,1.28,.26,1.56,7.1,.61,1.33,425
154 | 3,13.23,3.3,2.28,18.5,98,1.8,.83,.61,1.87,10.52,.56,1.51,675
155 | 3,12.58,1.29,2.1,20,103,1.48,.58,.53,1.4,7.6,.58,1.55,640
156 | 3,13.17,5.19,2.32,22,93,1.74,.63,.61,1.55,7.9,.6,1.48,725
157 | 3,13.84,4.12,2.38,19.5,89,1.8,.83,.48,1.56,9.01,.57,1.64,480
158 | 3,12.45,3.03,2.64,27,97,1.9,.58,.63,1.14,7.5,.67,1.73,880
159 | 3,14.34,1.68,2.7,25,98,2.8,1.31,.53,2.7,13,.57,1.96,660
160 | 3,13.48,1.67,2.64,22.5,89,2.6,1.1,.52,2.29,11.75,.57,1.78,620
161 | 3,12.36,3.83,2.38,21,88,2.3,.92,.5,1.04,7.65,.56,1.58,520
162 | 3,13.69,3.26,2.54,20,107,1.83,.56,.5,.8,5.88,.96,1.82,680
163 | 3,12.85,3.27,2.58,22,106,1.65,.6,.6,.96,5.58,.87,2.11,570
164 | 3,12.96,3.45,2.35,18.5,106,1.39,.7,.4,.94,5.28,.68,1.75,675
165 | 3,13.78,2.76,2.3,22,90,1.35,.68,.41,1.03,9.58,.7,1.68,615
166 | 3,13.73,4.36,2.26,22.5,88,1.28,.47,.52,1.15,6.62,.78,1.75,520
167 | 3,13.45,3.7,2.6,23,111,1.7,.92,.43,1.46,10.68,.85,1.56,695
168 | 3,12.82,3.37,2.3,19.5,88,1.48,.66,.4,.97,10.26,.72,1.75,685
169 | 3,13.58,2.58,2.69,24.5,105,1.55,.84,.39,1.54,8.66,.74,1.8,750
170 | 3,13.4,4.6,2.86,25,112,1.98,.96,.27,1.11,8.5,.67,1.92,630
171 | 3,12.2,3.03,2.32,19,96,1.25,.49,.4,.73,5.5,.66,1.83,510
172 | 3,12.77,2.39,2.28,19.5,86,1.39,.51,.48,.64,9.899999,.57,1.63,470
173 | 3,14.16,2.51,2.48,20,91,1.68,.7,.44,1.24,9.7,.62,1.71,660
174 | 3,13.71,5.65,2.45,20.5,95,1.68,.61,.52,1.06,7.7,.64,1.74,740
175 | 3,13.4,3.91,2.48,23,102,1.8,.75,.43,1.41,7.3,.7,1.56,750
176 | 3,13.27,4.28,2.26,20,120,1.59,.69,.43,1.35,10.2,.59,1.56,835
177 | 3,13.17,2.59,2.37,20,120,1.65,.68,.53,1.46,9.3,.6,1.62,840
178 | 3,14.13,4.1,2.74,24.5,96,2.05,.76,.56,1.35,9.2,.61,1.6,560
179 | 


--------------------------------------------------------------------------------
/in-classMaterial/day19/dcgan.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/dcgan.gif


--------------------------------------------------------------------------------
/in-classMaterial/day19/hello.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/hello.png


--------------------------------------------------------------------------------
/in-classMaterial/day19/image_at_epoch_0001.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0001.png


--------------------------------------------------------------------------------
/in-classMaterial/day19/image_at_epoch_0002.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0002.png


--------------------------------------------------------------------------------
/in-classMaterial/day19/image_at_epoch_0003.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0003.png


--------------------------------------------------------------------------------
/in-classMaterial/day19/image_at_epoch_0004.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0004.png


--------------------------------------------------------------------------------
/in-classMaterial/day19/image_at_epoch_0005.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0005.png


--------------------------------------------------------------------------------
/in-classMaterial/day19/image_at_epoch_0006.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0006.png


--------------------------------------------------------------------------------
/in-classMaterial/day19/image_at_epoch_0007.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0007.png


--------------------------------------------------------------------------------
/in-classMaterial/day19/image_at_epoch_0008.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0008.png


--------------------------------------------------------------------------------
/in-classMaterial/day19/image_at_epoch_0009.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0009.png


--------------------------------------------------------------------------------
/in-classMaterial/day19/image_at_epoch_0010.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0010.png


--------------------------------------------------------------------------------
/in-classMaterial/day19/image_at_epoch_0011.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0011.png


--------------------------------------------------------------------------------
/in-classMaterial/day19/image_at_epoch_0012.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0012.png


--------------------------------------------------------------------------------
/in-classMaterial/day19/image_at_epoch_0013.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0013.png


--------------------------------------------------------------------------------
/in-classMaterial/day19/image_at_epoch_0014.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0014.png


--------------------------------------------------------------------------------
/in-classMaterial/day19/image_at_epoch_0015.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0015.png


--------------------------------------------------------------------------------
/in-classMaterial/day19/image_at_epoch_0016.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0016.png


--------------------------------------------------------------------------------
/in-classMaterial/day19/image_at_epoch_0017.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0017.png


--------------------------------------------------------------------------------
/in-classMaterial/day19/image_at_epoch_0018.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0018.png


--------------------------------------------------------------------------------
/in-classMaterial/day19/image_at_epoch_0019.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0019.png


--------------------------------------------------------------------------------
/in-classMaterial/day19/image_at_epoch_0020.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0020.png


--------------------------------------------------------------------------------
/in-classMaterial/day19/image_at_epoch_0021.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0021.png


--------------------------------------------------------------------------------
/in-classMaterial/day19/image_at_epoch_0022.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0022.png


--------------------------------------------------------------------------------
/in-classMaterial/day19/image_at_epoch_0023.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0023.png


--------------------------------------------------------------------------------
/in-classMaterial/day19/image_at_epoch_0024.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0024.png


--------------------------------------------------------------------------------
/in-classMaterial/day19/image_at_epoch_0025.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0025.png


--------------------------------------------------------------------------------
/in-classMaterial/day19/image_at_epoch_0026.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0026.png


--------------------------------------------------------------------------------
/in-classMaterial/day19/image_at_epoch_0027.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0027.png


--------------------------------------------------------------------------------
/in-classMaterial/day19/image_at_epoch_0028.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0028.png


--------------------------------------------------------------------------------
/in-classMaterial/day19/image_at_epoch_0029.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0029.png


--------------------------------------------------------------------------------
/in-classMaterial/day19/image_at_epoch_0030.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0030.png


--------------------------------------------------------------------------------
/in-classMaterial/day19/image_at_epoch_0031.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0031.png


--------------------------------------------------------------------------------
/in-classMaterial/day19/image_at_epoch_0032.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0032.png


--------------------------------------------------------------------------------
/in-classMaterial/day19/image_at_epoch_0033.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0033.png


--------------------------------------------------------------------------------
/in-classMaterial/day19/image_at_epoch_0034.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0034.png


--------------------------------------------------------------------------------
/in-classMaterial/day19/image_at_epoch_0035.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0035.png


--------------------------------------------------------------------------------
/in-classMaterial/day19/image_at_epoch_0036.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0036.png


--------------------------------------------------------------------------------
/in-classMaterial/day19/image_at_epoch_0037.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0037.png


--------------------------------------------------------------------------------
/in-classMaterial/day19/image_at_epoch_0038.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0038.png


--------------------------------------------------------------------------------
/in-classMaterial/day19/image_at_epoch_0039.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0039.png


--------------------------------------------------------------------------------
/in-classMaterial/day19/image_at_epoch_0040.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0040.png


--------------------------------------------------------------------------------
/in-classMaterial/day19/image_at_epoch_0041.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0041.png


--------------------------------------------------------------------------------
/in-classMaterial/day19/image_at_epoch_0042.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0042.png


--------------------------------------------------------------------------------
/in-classMaterial/day19/image_at_epoch_0043.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0043.png


--------------------------------------------------------------------------------
/in-classMaterial/day19/image_at_epoch_0044.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0044.png


--------------------------------------------------------------------------------
/in-classMaterial/day19/image_at_epoch_0045.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0045.png


--------------------------------------------------------------------------------
/in-classMaterial/day19/image_at_epoch_0046.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0046.png


--------------------------------------------------------------------------------
/in-classMaterial/day19/image_at_epoch_0047.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0047.png


--------------------------------------------------------------------------------
/in-classMaterial/day19/image_at_epoch_0048.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0048.png


--------------------------------------------------------------------------------
/in-classMaterial/day19/image_at_epoch_0049.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0049.png


--------------------------------------------------------------------------------
/in-classMaterial/day19/image_at_epoch_0050.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day19/image_at_epoch_0050.png


--------------------------------------------------------------------------------
/in-classMaterial/day19/tf.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | mnist = tf.keras.datasets.mnist
  3 | 
  4 | (x_train, y_train),(x_test, y_test) = mnist.load_data()
  5 | x_train, x_test = x_train / 255.0, x_test / 255.0
  6 | 
  7 | model = tf.keras.models.Sequential([
  8 |   tf.keras.layers.Flatten(input_shape=(28, 28)),
  9 |   tf.keras.layers.Dense(128, activation='relu'),
 10 |   tf.keras.layers.Dropout(0.2),
 11 |   tf.keras.layers.Dense(10, activation='softmax')
 12 | ])
 13 | 
 14 | model.compile(optimizer='adam',
 15 |               loss='sparse_categorical_crossentropy',
 16 |               metrics=['accuracy'])
 17 | 
 18 | model.fit(x_train, y_train, epochs=5)
 19 | model.evaluate(x_test, y_test)
 20 | 
 21 | 
 22 | #Generative Adversarial Networks (GANs) are one of the most interesting ideas in computer science today. Two models are trained simultaneously by an adversarial process. A generator ("the artist") learns to create images that look real, while a discriminator ("the art critic") learns to tell real images apart from fakes.
 23 | 
 24 | #During training, the generator progressively becomes better at creating images that look real, while the discriminator becomes better at telling them apart. The process reaches equilibrium when the discriminator can no longer distinguish real images from fakes.
 25 | 
 26 | #This code demonstrates this process on the MNIST dataset. The included animation shows a series of images produced by the generator as it was trained for 50 epochs. The images begin as random noise, and increasingly resemble hand written digits over time.
 27 | 
 28 | import glob
 29 | import imageio
 30 | import matplotlib.pyplot as plt
 31 | import numpy as np
 32 | import os
 33 | import PIL
 34 | from tensorflow.keras import layers
 35 | import time
 36 | 
 37 | from IPython import display
 38 | 
 39 | #You will use the MNIST dataset to train the generator and the discriminator. The generator will generate handwritten digits resembling the MNIST data.
 40 | 
 41 | (train_images, train_labels), (_, _) = tf.keras.datasets.mnist.load_data()
 42 | 
 43 | train_images = train_images.reshape(train_images.shape[0], 28, 28, 1).astype('float32')
 44 | train_images = (train_images - 127.5) / 127.5 # Normalize the images to [-1, 1]
 45 | 
 46 | BUFFER_SIZE = 60000
 47 | BATCH_SIZE = 256
 48 | 
 49 | # Batch and shuffle the data
 50 | 
 51 | train_dataset = tf.data.Dataset.from_tensor_slices(train_images).shuffle(BUFFER_SIZE).batch(BATCH_SIZE)
 52 | 
 53 | #Both the generator and discriminator are defined using the Keras Sequential API.
 54 | 
 55 | #The generator uses tf.keras.layers.Conv2DTranspose (upsampling) layers to produce an image from a seed (random noise). Start with a Dense layer that takes this seed as input, then upsample several times until you reach the desired image size of 28x28x1. Notice the tf.keras.layers.LeakyReLU activation for each layer, except the output layer which uses tanh.
 56 | 
 57 | def make_generator_model():
 58 |     model = tf.keras.Sequential()
 59 |     model.add(layers.Dense(7*7*256, use_bias=False, input_shape=(100,)))
 60 |     model.add(layers.BatchNormalization())
 61 |     model.add(layers.LeakyReLU())
 62 | 
 63 |     model.add(layers.Reshape((7, 7, 256)))
 64 |     assert model.output_shape == (None, 7, 7, 256) # Note: None is the batch size
 65 | 
 66 |     model.add(layers.Conv2DTranspose(128, (5, 5), strides=(1, 1), padding='same', use_bias=False))
 67 |     assert model.output_shape == (None, 7, 7, 128)
 68 |     model.add(layers.BatchNormalization())
 69 |     model.add(layers.LeakyReLU())
 70 | 
 71 |     model.add(layers.Conv2DTranspose(64, (5, 5), strides=(2, 2), padding='same', use_bias=False))
 72 |     assert model.output_shape == (None, 14, 14, 64)
 73 |     model.add(layers.BatchNormalization())
 74 |     model.add(layers.LeakyReLU())
 75 | 
 76 |     model.add(layers.Conv2DTranspose(1, (5, 5), strides=(2, 2), padding='same', use_bias=False, activation='tanh'))
 77 |     assert model.output_shape == (None, 28, 28, 1)
 78 | 
 79 |     return model
 80 | 
 81 | #Use the (as yet untrained) generator to create an image.
 82 | 
 83 | generator = make_generator_model()
 84 | 
 85 | noise = tf.random.normal([1, 100])
 86 | generated_image = generator(noise, training=False)
 87 | 
 88 | plt.imshow(generated_image[0, :, :, 0], cmap='gray')
 89 | 
 90 | #The discriminator is a CNN-based image classifier.
 91 | 
 92 | def make_discriminator_model():
 93 |     model = tf.keras.Sequential()
 94 |     model.add(layers.Conv2D(64, (5, 5), strides=(2, 2), padding='same',
 95 |                                      input_shape=[28, 28, 1]))
 96 |     model.add(layers.LeakyReLU())
 97 |     model.add(layers.Dropout(0.3))
 98 | 
 99 |     model.add(layers.Conv2D(128, (5, 5), strides=(2, 2), padding='same'))
100 |     model.add(layers.LeakyReLU())
101 |     model.add(layers.Dropout(0.3))
102 | 
103 |     model.add(layers.Flatten())
104 |     model.add(layers.Dense(1))
105 | 
106 |     return model
107 | 
108 | #Use the (as yet untrained) discriminator to classify the generated images as real or fake. The model will be trained to output positive values for real images, and negative values for fake images.
109 | 
110 | discriminator = make_discriminator_model()
111 | decision = discriminator(generated_image)
112 | print (decision)
113 | 
114 | #Define loss functions and optimizers for both models.
115 | # This method returns a helper function to compute cross entropy loss
116 | cross_entropy = tf.keras.losses.BinaryCrossentropy(from_logits=True)
117 | 
118 | 
119 | #This method quantifies how well the discriminator is able to distinguish real images from fakes. It compares the discriminator's predictions on real images to an array of 1s, and the discriminator's predictions on fake (generated) images to an array of 0s.
120 | 
121 | def discriminator_loss(real_output, fake_output):
122 |     real_loss = cross_entropy(tf.ones_like(real_output), real_output)
123 |     fake_loss = cross_entropy(tf.zeros_like(fake_output), fake_output)
124 |     total_loss = real_loss + fake_loss
125 |     return total_loss
126 | 
127 | #The generator's loss quantifies how well it was able to trick the discriminator. Intuitively, if the generator is performing well, the discriminator will classify the fake images as real (or 1). Here, we will compare the discriminators decisions on the generated images to an array of 1s.
128 | 
129 | def generator_loss(fake_output):
130 |     return cross_entropy(tf.ones_like(fake_output), fake_output)
131 | 
132 | #The discriminator and the generator optimizers are different since we will train two networks separately.
133 | 
134 | generator_optimizer = tf.keras.optimizers.Adam(1e-4)
135 | discriminator_optimizer = tf.keras.optimizers.Adam(1e-4)
136 | 
137 | #This code also demonstrates how to save and restore models, which can be helpful in case a long running training task is interrupted.
138 | 
139 | checkpoint_dir = './training_checkpoints'
140 | checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
141 | checkpoint = tf.train.Checkpoint(generator_optimizer=generator_optimizer,
142 |                                  discriminator_optimizer=discriminator_optimizer,
143 |                                  generator=generator,
144 |                                  discriminator=discriminator)
145 | 
146 | 
147 | EPOCHS = 50
148 | noise_dim = 100
149 | num_examples_to_generate = 16
150 | 
151 | # We will reuse this seed overtime (so it's easier)
152 | # to visualize progress in the animated GIF)
153 | seed = tf.random.normal([num_examples_to_generate, noise_dim])
154 | 
155 | #The training loop begins with generator receiving a random seed as input. That seed is used to produce an image. The discriminator is then used to classify real images (drawn from the training set) and fakes images (produced by the generator). The loss is calculated for each of these models, and the gradients are used to update the generator and discriminator.
156 | 
157 | # Notice the use of `tf.function`
158 | # This annotation causes the function to be "compiled".
159 | @tf.function
160 | def train_step(images):
161 |     noise = tf.random.normal([BATCH_SIZE, noise_dim])
162 | 
163 |     with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
164 |       generated_images = generator(noise, training=True)
165 | 
166 |       real_output = discriminator(images, training=True)
167 |       fake_output = discriminator(generated_images, training=True)
168 | 
169 |       gen_loss = generator_loss(fake_output)
170 |       disc_loss = discriminator_loss(real_output, fake_output)
171 | 
172 |     gradients_of_generator = gen_tape.gradient(gen_loss, generator.trainable_variables)
173 |     gradients_of_discriminator = disc_tape.gradient(disc_loss, discriminator.trainable_variables)
174 | 
175 |     generator_optimizer.apply_gradients(zip(gradients_of_generator, generator.trainable_variables))
176 |     discriminator_optimizer.apply_gradients(zip(gradients_of_discriminator, discriminator.trainable_variables))
177 | 
178 | def train(dataset, epochs):
179 |   for epoch in range(epochs):
180 |     start = time.time()
181 | 
182 |     for image_batch in dataset:
183 |       train_step(image_batch)
184 | 
185 |     # Produce images for the GIF as we go
186 |     display.clear_output(wait=True)
187 |     generate_and_save_images(generator,
188 |                              epoch + 1,
189 |                              seed)
190 | 
191 |     # Save the model every 15 epochs
192 |     if (epoch + 1) % 15 == 0:
193 |       checkpoint.save(file_prefix = checkpoint_prefix)
194 | 
195 |     print ('Time for epoch {} is {} sec'.format(epoch + 1, time.time()-start))
196 | 
197 |   # Generate after the final epoch
198 |   display.clear_output(wait=True)
199 |   generate_and_save_images(generator,
200 |                            epochs,
201 |                            seed)
202 | 
203 | 
204 | def generate_and_save_images(model, epoch, test_input):
205 |   # Notice `training` is set to False.
206 |   # This is so all layers run in inference mode (batchnorm).
207 |   predictions = model(test_input, training=False)
208 | 
209 |   fig = plt.figure(figsize=(4,4))
210 | 
211 |   for i in range(predictions.shape[0]):
212 |       plt.subplot(4, 4, i+1)
213 |       plt.imshow(predictions[i, :, :, 0] * 127.5 + 127.5, cmap='gray')
214 |       plt.axis('off')
215 | 
216 |   plt.savefig('image_at_epoch_{:04d}.png'.format(epoch))
217 |   plt.show()
218 | 
219 | #Call the train() method defined above to train the generator and discriminator simultaneously. Note, training GANs can be tricky. It's important that the generator and discriminator do not overpower each other (e.g., that they train at a similar rate).
220 | 
221 | #At the beginning of the training, the generated images look like random noise. As training progresses, the generated digits will look increasingly real. After about 50 epochs, they resemble MNIST digits. This may take about one minute / epoch with the default settings on Colab.
222 | 
223 | train(train_dataset, EPOCHS)
224 | 
225 | checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))
226 | 
227 | # Display a single image using the epoch number
228 | def display_image(epoch_no):
229 |   return PIL.Image.open('image_at_epoch_{:04d}.png'.format(epoch_no))
230 | 
231 | display_image(EPOCHS)
232 | 
233 | anim_file = 'dcgan.gif'
234 | 
235 | with imageio.get_writer(anim_file, mode='I') as writer:
236 |   filenames = glob.glob('image*.png')
237 |   filenames = sorted(filenames)
238 |   last = -1
239 |   for i,filename in enumerate(filenames):
240 |     frame = 2*(i**0.5)
241 |     if round(frame) > round(last):
242 |       last = frame
243 |     else:
244 |       continue
245 |     image = imageio.imread(filename)
246 |     writer.append_data(image)
247 |   image = imageio.imread(filename)
248 |   writer.append_data(image)
249 | 
250 | import IPython
251 | if IPython.version_info > (6,2,0,''):
252 |   display.Image(filename=anim_file)
253 | 
254 | 
255 | 
256 | 
257 | 


--------------------------------------------------------------------------------
/in-classMaterial/day2/INTL450Syntax.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day2/INTL450Syntax.pdf


--------------------------------------------------------------------------------
/in-classMaterial/day2/lab1.py:
--------------------------------------------------------------------------------
 1 | def binarify(num): 
 2 |   """convert positive integer to base 2"""
 3 |   if num<=0: return '0'
 4 |   digits = []
 5 |   return ''.join(digits)
 6 | 
 7 | def int_to_base(num, base):
 8 |   """convert positive integer to a string in any base"""
 9 |   if num<=0:  return '0' 
10 |   digits = []
11 |   return ''.join(digits)
12 | 
13 | def base_to_int(string, base):
14 |   """take a string-formatted number and its base and return the base-10 integer"""
15 |   if string=="0" or base <= 0 : return 0 
16 |   result = 0 
17 |   return result 
18 | 
19 | def flexibase_add(str1, str2, base1, base2):
20 |   """add two numbers of different bases and return the sum"""
21 |   result = int_to_base(tmp, base1)
22 |   return result 
23 | 
24 | def flexibase_multiply(str1, str2, base1, base2):
25 |   """multiply two numbers of different bases and return the product"""
26 |   result = int_to_base(tmp, base1)
27 |   return result 
28 | 
29 | def romanify(num):
30 |   """given an integer, return the Roman numeral version"""
31 |   result = ""
32 |   return result
33 |   
34 | 
35 | 


--------------------------------------------------------------------------------
/in-classMaterial/day2/lab1_solutions.py:
--------------------------------------------------------------------------------
 1 | def binarify(num): 
 2 |   """convert positive integer to base 2"""
 3 |   if num<=0: return '0'
 4 |   digits=[]
 5 |   while num>0:
 6 |     digits.append(num%2)
 7 |     num=num//2
 8 |   digits=digits[::-1]
 9 |   return ''.join(str(e) for e in digits)
10 | 
11 | def int_to_base(num, base):
12 |   """convert positive integer to a string in any base"""
13 |   if num==0:  return '0' 
14 |   if base<=0: return '0'
15 |   if base==1: return '1'*num
16 |   digits = []
17 |   negative=False
18 |   if num<0: num*=(-1); negative=True
19 |   while num>0:
20 |     digits.append(num%base)
21 |     num=num//base
22 |   digits=digits[::-1]
23 |   if negative: return '-'+''.join(str(e) for e in digits)
24 |   return ''.join(str(e) for e in digits)
25 | 
26 | def base_to_int(string, base):
27 |   """take a string-formatted number and its base and return the base-10 integer"""
28 |   if string=="0" or base <= 0 : return 0 
29 |   negative=False
30 |   if string[0]=='-': string=string[1:]; negative=True
31 |   result = 0 
32 |   num=len(string)
33 |   for i in string:
34 |     num-=1
35 |     result+=((base**num)*int(i))
36 |   if negative: return result*(-1)
37 |   return result 
38 | 
39 | def flexibase_add(str1, str2, base1, base2):
40 |   """add two numbers of different bases and return the sum"""
41 |   return base_to_int(str1, base1)+base_to_int(str2,base2)
42 | 
43 | 
44 | def flexibase_multiply(str1, str2, base1, base2):
45 |   """multiply two numbers of different bases and return the product"""
46 |   return base_to_int(str1,base1)*base_to_int(str2,base2) 
47 | 
48 | def romanify(num):
49 |   """given an integer, return the Roman numeral version"""
50 |   result = ""
51 |   result+=(num//1000*'M')
52 |   num%=1000
53 |   hold=num//100
54 |   num%=100
55 |   if hold<=3: result+=hold*'C'
56 |   elif hold==4: result+='CD'
57 |   elif hold>4 and hold<9: result+=('D'+'C'*(hold-5))
58 |   else: result+='CM'
59 |   hold=num//10
60 |   if hold<=3: result+=hold*'X'
61 |   elif hold==4: result+='XL'
62 |   elif hold>4 and hold<9: result+=('L'+'X'*(hold-5))
63 |   else: result+='XC'
64 |   hold=num%10
65 |   if hold<=3: result+=hold*'I'
66 |   elif hold==4: result+='IV'
67 |   elif hold>4 and hold<9: result+=('V'+'I'*(hold-5))
68 |   else: result+='IX'
69 |   return result
70 | 
71 | 


--------------------------------------------------------------------------------
/in-classMaterial/day3/ClassesSlides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day3/ClassesSlides.pdf


--------------------------------------------------------------------------------
/in-classMaterial/day3/clock_lab.py:
--------------------------------------------------------------------------------
 1 | class Clock(object):
 2 |     def __init__(self, hour, minutes):
 3 |         self.minutes = minutes
 4 |         self.hour = hour
 5 | 
 6 |     @classmethod
 7 |     def at(cls, hour, minutes=0):
 8 |         return cls(hour, minutes)
 9 | 
10 |     def __str__(self):
11 |     
12 |     def __add__(self,minutes):
13 |     
14 |     def __sub__(self,minutes):
15 |     
16 |     def __eq__(self, other):
17 |     
18 |     def __ne__(self, other):
19 | 


--------------------------------------------------------------------------------
/in-classMaterial/day3/clock_solution.py:
--------------------------------------------------------------------------------
 1 | class Clock(object):
 2 |     def __init__(self, hour, minutes=0):
 3 |         self.minutes = '0'*(2-len(str(minutes)))+str(minutes)
 4 |         self.hour = '0'*(2-len(str(hour)))+str(hour)
 5 |     def __str__(self):
 6 |         return self.hour+":"+self.minutes
 7 |     def __repr__(self):
 8 |         return self.__str__()
 9 |     @classmethod
10 |     def at(cls, hour, minutes=0):
11 |         return cls(hour, minutes)
12 |     def __add__(self,minutes):
13 |         time=(int(self.hour)*60+int(self.minutes)+int(minutes))%(24*60)
14 |         return Clock(time//60,time%60)
15 |     def __sub__(self,minutes):
16 |         return self+((-1)*minutes)
17 |     def __eq__(self, other):
18 |         return (self.hour==other.hour and self.minutes==other.minutes)
19 |     def __ne__(self, other):
20 |         return not self==other
21 | 


--------------------------------------------------------------------------------
/in-classMaterial/day3/parent-child.py:
--------------------------------------------------------------------------------
 1 | class Parent():
 2 |   def __init__(self, sex, firstname, lastname):
 3 |     self.sex = sex
 4 |     self.firstname = firstname
 5 |     self.lastname = lastname
 6 |     self.kids = []
 7 | 
 8 |   def role(self):
 9 |     if self.sex == "Male":
10 |       return "Father"
11 |     else:
12 |       return "Mother"
13 | 
14 |   def have_child(self, name):
15 |     child = Child(name, self)
16 |     print(self.firstname + " is having a child named " + child.name())
17 |     print("They will make a very good " + self.role())
18 |     self.kids.append(child)
19 |     return child
20 | 
21 |   def list_children(self):
22 |     for kid in self.kids:
23 |       print("I am the " + self.role() + " of " + kid.name())
24 | 
25 | class Child():
26 |   def __init__(self, firstname, parent):
27 |     self.parent = parent 
28 |     self.lastname = parent.lastname
29 |     self.firstname = firstname
30 | 
31 |   def set_name(self, new_first_name, new_last_name):
32 |     self.firstname = new_first_name
33 |     self.lastname = new_last_name
34 | 
35 |   def name(self):
36 |     return "%s %s" % (self.firstname, self.lastname)
37 | 
38 |   def introduce(self):
39 |     return "Hi I'm " + self.name()
40 | 
41 |   def siblings(self):
42 |     for kid in self.parent.kids:
43 |       if kid != self:
44 |         print("I have a sibling named " + kid.name())
45 |   
46 |   def __str__(self):
47 |   	return "%s" %self.firstname 
48 | 
49 | mom = Parent("Female", "Jane", "Smith")
50 | mom.list_children()
51 | jill=mom.have_child("Jill")
52 | jill.firstname
53 | jill.parent.firstname
54 | jill.set_name("Jillian", "Jones")
55 | print(jill.introduce())
56 | print(jill == mom.kids[0])
57 | jack = mom.have_child("Jack")
58 | print(jack.introduce())
59 | jack.parent.kids[0].parent.list_children()
60 | jack.siblings()
61 | 
62 | 
63 | 


--------------------------------------------------------------------------------
/in-classMaterial/day3/polymorphism.py:
--------------------------------------------------------------------------------
 1 | class Animal(object):
 2 |   living="Yes!"
 3 |   def __init__(self, name):    # Constructor of the class
 4 |       self.name = name
 5 |       
 6 |   def talk(self):              # Abstract method, defined by convention only
 7 |   	raise NotImplementedError("Subclass must implement abstract method")
 8 |   	 
 9 | class Cat(Animal):
10 |   def talk(self):
11 |     return self.meow()
12 |     
13 |   def meow(self):
14 |     return 'Meow!'
15 |  
16 | class Dog(Animal):
17 |   def talk(self):
18 |     return self.bark()
19 |   
20 |   def bark(self):
21 |     return 'Woof! Woof!'
22 |       
23 | class Fish(Animal):
24 |   
25 |   def swim(self):
26 |     pass
27 |   
28 |   def __str__(self):
29 |     return "I am a fish!"
30 |       
31 | animals = [Cat('Foo'),
32 |            Dog('Bar'),
33 |            Fish('nemo')]
34 |  
35 | # for animal in animals:
36 | #   print(animal.name + ': ' + animal.talk())
37 |   
38 | # f = Fish("foo")
39 | # print("Hi, " + str(f))
40 | 
41 | 
42 | 


--------------------------------------------------------------------------------
/in-classMaterial/day3/school.py:
--------------------------------------------------------------------------------
 1 | # - Add a student's name to the roster for a grade
 2 | # - Get a list of all students enrolled in a grade
 3 | # - Get a sorted list of all students in all grades.
 4 | # 
 5 | # Note that all our students only have one name.
 6 | # (It's a small town, what do you want?)
 7 | 
 8 | class school():
 9 |     def __init__(self, school_name): #initialize instance of class School with parameter name
10 |         self.school_name = school_name #user must put name, no default
11 |         self.db = {} #initialize empty dictionary to store kids and grades
12 |         
13 |     def add(self, name, student_grade): #add a kid to a grade in instance of School
14 |         if student_grade in self.db: #need to check if the key for the grade already exists, otherwise assigning it will return error
15 |             self.db[student_grade].add(name) #add kid to the set of kids within the dictionary
16 |         else: self.db[student_grade] = {name} #if the key doesn't exist, create it and put kid in
17 | 
18 |     def sort(self): #sorts kids alphabetically and returns them in tuples (because they are immutable)
19 |         sorted_students={} #sets up empty dictionary to store sorted tuples
20 |         for key in self.db.keys(): #loop through each key
21 |             sorted_students[key] = tuple(sorted(self.db[key])) #add dictionary entry with key being the grade and the entry the tuple of kids
22 |         return sorted_students
23 | 
24 |     def grade(self, check_grade):
25 |         if check_grade not in self.db: return None #if the key doesn't exist, there are no kids in that grade: return None
26 |         return self.db[check_grade] #if None wasn't returned above, return elements within dictionary, or kids in grade
27 | 
28 |     def __str__(self): #print function will display the school name on one line, and sorted kids on other line
29 |         return "%s\n%s" %(self.school_name, self.sort())
30 | 


--------------------------------------------------------------------------------
/in-classMaterial/day4/TestingSlides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day4/TestingSlides.pdf


--------------------------------------------------------------------------------
/in-classMaterial/day4/__pycache__/fizzbuzz.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/day4/__pycache__/fizzbuzz.cpython-36.pyc


--------------------------------------------------------------------------------
/in-classMaterial/day4/exception.py:
--------------------------------------------------------------------------------
 1 | raise Exception
 2 | print("I raised an exception!")
 3 | 
 4 | raise Exception('I raised an exception!')
 5 | 
 6 | try:
 7 |     print(a)
 8 | except NameError:
 9 | 	print("oops name error")	
10 | except:
11 | 	print("oops")
12 | finally:
13 | 	print("Yes! I did it!")
14 | 	
15 | 	
16 | for i in range(1,10):
17 | 	if i==5:
18 | 		print("I found five!")
19 | 		continue
20 | 		print("Here is five!")
21 | 	else:
22 | 		print(i)
23 | else:
24 | 	print("I went through all iterations!")
25 | 
26 | 


--------------------------------------------------------------------------------
/in-classMaterial/day4/exceptions_example.py:
--------------------------------------------------------------------------------
 1 | import traceback
 2 | 
 3 | class CustomException(Exception): # inherits from Exception
 4 |   def __init__(self, value):
 5 |     self.value = value
 6 |       
 7 |   def __str__(self):
 8 |     return self.value
 9 | 
10 |     def i_call_a_function_with_errors():
11 |       try:
12 |         print("Calling a function....")
13 |         #function_with_generic_error()
14 |         #function_with_custom_error()
15 |         #function_with_unknown_error(1)
16 |         function_that_does_not_exist()
17 |         print("Tada!")
18 |       except CustomException as inst: # `as' gives us access to the exception
19 |         print("Custom Error Caught! Error({0})".format(inst.value))
20 |       except NameError or AttributeError:
21 |         print("Whoa, chill out")
22 |       except: # any exception is caught, even ones you don't know about
23 |         print("Default Error Caught!")
24 |       else: # if nothing broke, then run this block
25 |         print("No error raised.")
26 |         traceback.print_exc() # this prints the traceback
27 |       finally: # this block is always run
28 |         print("Goodbye!")
29 |           
30 |     def function_with_generic_error():
31 |       raise Exception("Foo!") # this method doesn't know what to do with the exception
32 |         
33 |     def function_with_custom_error():
34 |       raise CustomException("Foo Bar!") # this will be handled in the function above}
35 |         
36 |     def function_with_unknown_error(foo):
37 |       foo.bar()
38 | 
39 |     i_call_a_function_with_errors()
40 | 


--------------------------------------------------------------------------------
/in-classMaterial/day4/fizzbuzz.py:
--------------------------------------------------------------------------------
 1 | 
 2 | def FizzBuzz(i):
 3 |   try:
 4 |     if i % 15 == 0:
 5 |       raise Exception("Divisible by 3 and 5!")
 6 |     if i % 3 == 0:
 7 |       return "Fizz"
 8 |     if i % 5 == 0:
 9 |       return "Buzz"
10 |     print("finally")
11 |   except:
12 |     if i % 15 == 0:
13 |       return("FizzBuzz")
14 |   else:
15 |     return str(i)
16 |   finally:
17 |     print("finally")
18 |   
19 | 
20 | for i in range(18):
21 |   print(str(i) + ": " + FizzBuzz(i))
22 | 


--------------------------------------------------------------------------------
/in-classMaterial/day4/fizzbuzz_test.py:
--------------------------------------------------------------------------------
 1 | #FizzBuzzTest
 2 | 
 3 | import unittest
 4 | import fizzbuzz
 5 | 
 6 | class FizzBuzzTest(unittest.TestCase):
 7 | 	
 8 | 	def test_fizz(self):
 9 | 		self.assertEqual('Fizz',fizzbuzz.FizzBuzz(9))
10 | 		self.assertNotEqual('Fizz',fizzbuzz.FizzBuzz(15))
11 | 	
12 | 	def test_buzz(self):
13 | 		self.assertEqual('Buzz',fizzbuzz.FizzBuzz(10))
14 | 		
15 | 	def test_fizzbuzz(self):
16 | 		self.assertEqual('FizzBuzz',fizzbuzz.FizzBuzz(15))
17 | 
18 | 	def test_error(self):
19 | 		with self.assertRaises(TypeError):
20 | 			fizzbuzz.FizzBuzz('b')
21 | 			
22 | 	def test5(self):
23 | 	    self.assertEqual('Buzz',fizzbuzz.FizzBuzz(15))
24 |   
25 | if __name__ == '__main__': #Add this if you want to run the test with this script.
26 |   unittest.main()
27 | 


--------------------------------------------------------------------------------
/in-classMaterial/day4/print_integer.py:
--------------------------------------------------------------------------------
 1 | def print_integer(integer):
 2 | 	return "Here is my integer: " + str(integer)
 3 | 	
 4 | def print_integer(integer):
 5 | 	try:
 6 | 		int(integer)
 7 | 	except ValueError:
 8 | 		print("Put in a number.")
 9 | 	else:
10 | 		print("Here is my integer: " + str(integer))
11 | 
12 | def print_integer(integer):
13 | 	if type(integer)==int:
14 | 		print("Here is my integer: " + str(integer))
15 | 	else:
16 | 		raise Exception("This is not an integer")
17 | 
18 | def print_integer(integer):
19 | 	if type(integer)==int: 
20 | 		return "Here is my integer: " + str(integer)
21 | 	else:
22 | 		raise TypeError("Enter an integer!")
23 | 				
24 | def print_integer(integer):
25 | 	try:
26 | 		if integer %1==0:
27 | 			return "Here is my integer: " + str(integer)
28 | 		else:
29 | 			return "This has decimals!"
30 | 	except:
31 | 		raise TypeError("Enter a number!")
32 | 		
33 | def print_integer(integer):
34 | 	try:
35 | 		if integer %1==0:
36 | 			print("Congratulations! You entered an integer!")
37 | 		else:
38 | 			raise Exception
39 | 	except:
40 | 		raise TypeError("This is not an integer!")
41 | 	else:
42 | 		return "Here is my integer: " + str(integer)
43 | 
44 | 
45 | def print_integer(integer):
46 | 	try:
47 | 		if integer %1==0:
48 | 			print("Here is my integer: " + str(integer))
49 | 		else:
50 | 			raise Exception
51 | 	except TypeError:
52 | 		print("Enter a number!")
53 | 	except:
54 | 		print("Integers can't have decimals!")
55 | 
56 | 				
57 | def print_integer(integer):
58 | 	try:
59 | 		if integer %1==0:
60 | 			print("Congratulations! You entered an integer!")
61 | 		else:
62 | 			raise Exception
63 | 	except TypeError:
64 | 		raise TypeError("Enter a number!")
65 | 	except:
66 | 		raise TypeError("Integers can't have decimals!"	)
67 | 	else:
68 | 		return "Here is my integer: " + str(integer)
69 | 
70 | 		
71 | #Create your own exception		
72 | class CustomException(Exception): 
73 |   def __init__(self, value):
74 |     self.value = value
75 |   def __str__(self):
76 |     return str(self.value)
77 |     
78 |     
79 | def print_integer(integer):
80 | 	try:
81 | 		if integer %1==0:
82 | 			print("Congratulations! You entered an integer!")
83 | 		else:
84 | 			raise CustomException(integer%1)
85 | 	except CustomException as e:
86 | 		raise TypeError("Your number has a decimal: " + str(e.value))
87 | 	except TypeError:
88 | # 		pass
89 | 		raise TypeError("Enter a number!")
90 | 	else:
91 | 		return "Here is my integer: " + str(integer)
92 | 	finally:
93 | 		print("I'm done!")
94 | 


--------------------------------------------------------------------------------
/in-classMaterial/day5/csvstuff.py:
--------------------------------------------------------------------------------
 1 | import csv
 2 | import os
 3 | 
 4 | #Open a file stream and create a CSV writer object
 5 | os.getcwd()
 6 | os.chdir("KocPython2020/in-classMaterial/day5/")
 7 | f = open('test.csv', 'w')
 8 | my_writer = csv.writer(f)
 9 | 
10 | for i in range(1, 100):
11 |   my_writer.writerow([i, i-1])
12 |   
13 | f.flush()
14 | f.close()
15 | 
16 | #The correct way!
17 | with open('test1.csv', 'w') as f:
18 |   my_writer = csv.writer(f)
19 |   for i in range(1, 100):
20 |     my_writer.writerow([i, i-1])
21 |     
22 | #How about with field names
23 | with open('test_with_fields.csv', 'w') as f:
24 |   my_writer = csv.DictWriter(f, fieldnames=("A", "B"))
25 |   my_writer.writeheader()
26 |   for i in range(1, 100):
27 |     my_writer.writerow({"B":i, "A":i-1})
28 | 
29 | #Now lets read some things
30 | with open('test1.csv', 'r') as f:
31 |   print("Reading test1.csv")
32 |   my_reader = csv.reader(f)
33 |   for row in my_reader:
34 |     print(row)
35 | 
36 | #Now lets read some things with field names
37 | with open('test_with_fields.csv', 'r') as f:
38 |   print("\nReading test_with_fields.csv")
39 |   my_reader = csv.DictReader(f)
40 |   for row in my_reader:
41 |     print(row)
42 | 


--------------------------------------------------------------------------------
/in-classMaterial/day5/faculty.csv:
--------------------------------------------------------------------------------
 1 | Name,Email
 2 | Belgin San Akca,bakca@ku.edu.tr
 3 | Şener Aktürk,sakturk@ku.edu.tr
 4 | Özlem Altan,ozaltan@ku.edu.tr
 5 | Merih Angın,mangin@ku.edu.tr
 6 | Altay Atlı,aatli@ku.edu.tr
 7 | Selim Erdem Aytaç,saytac@ku.edu.tr
 8 | Caner Bakır,cbakir@ku.edu.tr
 9 | Umur Başdaş,ubasdas@ku.edu.tr
10 | Reşat Bayer,rbayer@ku.edu.tr
11 | David Carlson,dcarlson@ku.edu.tr
12 | Ali Çarkoğlu,acarkoglu@ku.edu.tr
13 | Boğaç Erozan,herozan@ku.edu.tr
14 | Güneş Ertan,gunesertan@ku.edu.tr
15 | Ahmet İçduygu,aicduygu@ku.edu.tr
16 | Ziya Öniş,zonis@ku.edu.tr
17 | İpek Ruacan,iruacan@ku.edu.tr
18 | Bahar Rumelili,brumelili@ku.edu.tr
19 | Murat Somer,musomer@ku.edu.tr
20 | Sedef Turper,sturper@ku.edu.tr
21 | Ayşen Üstübici,austubici@ku.edu.tr
22 | Şuhnaz Yılmaz,syilmaz@ku.edu.tr
23 | 


--------------------------------------------------------------------------------
/in-classMaterial/day5/filestuff.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import os
 3 | 
 4 | os.chdir('KocPython2020/in-classMaterial/day5')
 5 | 
 6 | #The cleanest way to handle files (gracefully handles exceptions)
 7 | with open('readfile.txt') as f:
 8 |   #We can read files in chunks
 9 |   the_whole_thing = f.read()
10 |   print("The Whole Thing\n*****************************************************************************\n{0}".format(the_whole_thing))
11 |   
12 |   #We can read files line by line
13 |   print("\nLooping over lines\n*****************************************************************************\n")
14 |   f.seek(0)
15 |   lines = f.readlines()
16 |   for l in lines:
17 |     print("{0}".format(l))
18 |     
19 |   #More efficiently we can loop over the file object (i.e. we don't need the variable lines)
20 |   print("\nLooping over the file object\n********************\n")
21 |   f.seek(0)
22 |   for l in f:
23 |     print("{0}".format(l))
24 |     
25 |   #You can also go byte by byte (don't do this)
26 |   print("\nByte by Byte\n********************\n")
27 |   f.seek(0)
28 |   next_byte = f.read(1)
29 |   while next_byte != "":
30 |     sys.stdout.write(next_byte)
31 |     next_byte = f.read(1)
32 |     
33 | # We can also manually open and close files, now we need to handle exceptions and closing files
34 | f =  open('readfile.txt', 'r')
35 | print("\nManually Opened File\n********************\n")
36 | print(f.read())
37 | f.close()
38 | 
39 | #Writing files is easy, open command takes r, w, a plus some others
40 | with open('writefile.txt', 'w') as f:
41 |   #wipes the file clean and opens it
42 |   f.write("Hi guys.")
43 |   f.write("Does this go on the second line?")
44 |   f.writelines(['a', 'b', 'c'])
45 |   # f.flush() # If using the file object interactively you may need to flush the buffer
46 | 
47 | with open('writefile.txt', 'a') as f:
48 |   #just tacks some things on the end
49 |   f.write("\nI got appended!")
50 |   f.flush()
51 | 


--------------------------------------------------------------------------------
/in-classMaterial/day5/mathofpolitics.csv:
--------------------------------------------------------------------------------
  1 | url,is_post,publish_date,post_title,comment_count
  2 | http://www.mathofpolitics.com,0,NA,NA,NA
  3 | http://www.mathofpolitics.com/2016/06/22/trump-has-raised-little-money-much-unitemized-so-sad/,1,"June 22, 2016","Trump Has Raised Little Money, Much Unitemized. SO SAD! | The Math Of Politics",0
  4 | http://www.mathofpolitics.com/2016/05/12/extreme-and-unpredictable-is-ideology-collapsing-in-the-senate-gop/,1,"May 12, 2016",Extreme and Unpredictable: Is Ideology Collapsing in the Senate GOP? | The Math Of Politics,0
  5 | http://www.mathofpolitics.com/2016/05/11/comparing-the-legislative-records-of-the-candidates/,1,"May 11, 2016",Comparing the Legislative Records of the Candidates | The Math Of Politics,0
  6 | http://www.mathofpolitics.com/2016/04/28/whos-got-the-power-measuring-how-much-trump-went-banzhaf-on-tuesday/,1,"April 28, 2016",Who’s Got The Power? Measuring How Much Trump Went Banzhaf On Tuesday | The Math Of Politics,0
  7 | http://www.mathofpolitics.com/2016/02/07/trump-cruz-rubio-the-game-theory-of-when-the-enemy-of-your-enemy-is-your-enemy/,1,"February 7, 2016","Trump, Cruz, Rubio: The Game Theory of When The Enemy of Your Enemy Is Your Enemy. | The Math Of Politics",0
  8 | http://www.mathofpolitics.com/2016/02/03/the-gops-reality-is-truel-indeed/,1,"February 3, 2016","The GOP’s Reality is Truel, Indeed | The Math Of Politics",0
  9 | http://www.mathofpolitics.com/2016/01/25/the-patriots-are-commonly-uncommon/,1,"January 25, 2016",The Patriots Are Commonly Uncommon | The Math Of Politics,0
 10 | http://www.mathofpolitics.com/2015/11/10/one-thing-leads-to-another-delaying-da-rt-standards-to-discuss-better-da-rt-standards-will-be-ironic/,1,"November 10, 2015",One Thing Leads to Another: “Delaying“ DA-RT Standards to Discuss Better DA-RT Standards Will Be Ironic | The Math Of Politics,2
 11 | http://www.mathofpolitics.com/2015/11/06/responding-to-a-petition-to-nobody-or-everybody/,1,"November 6, 2015",Responding To A Petition To Nobody (Or Everybody) | The Math Of Politics,2
 12 | http://www.mathofpolitics.com/2015/08/12/super-pac-bites-man/,1,"August 12, 2015",Super PAC (Bites) Man | The Math Of Politics,0
 13 | http://www.mathofpolitics.com/page/2/,0,NA,NA,NA
 14 | http://www.mathofpolitics.com/2015/08/05/this-thursday-at-10-fox-news-is-correct/,1,"August 5, 2015","This Thursday, At 10, FOX News Is Correct | The Math Of Politics",0
 15 | http://www.mathofpolitics.com/2015/08/02/the-true-trump-card-you-cant-buy-credibility/,1,"August 2, 2015",The True Trump Card: You Can’t Buy Credibility | The Math Of Politics,0
 16 | http://www.mathofpolitics.com/2015/07/30/in-comes-volatility-nonplussing-both-fairness-inequality/,1,"July 30, 2015","In Comes Volatility, Nonplussing Both Fairness &amp; Inequality | The Math Of Politics",0
 17 | http://www.mathofpolitics.com/2015/06/27/the-statistical-realities-of-measuring-segregation-its-hard-being-both-diverse-homogeneous/,1,"June 27, 2015",The Statistical Realities of Measuring Segregation: It’s Hard Being Both Diverse &amp; Homogeneous | The Math Of Politics,0
 18 | http://www.mathofpolitics.com/2015/03/19/cotton-pickin/,1,"March 19, 2015",Cotton Pickin’? | The Math Of Politics,0
 19 | http://www.mathofpolitics.com/2015/03/03/how-two-peoples-rights-can-do-both-people-wrong-vaccines-anti-social-choice-theory/,1,"March 3, 2015",How Two People’s Rights Can Do Both People Wrong: Vaccines &amp; (Anti-)Social Choice Theory | The Math Of Politics,0
 20 | http://www.mathofpolitics.com/2015/02/20/default-in-our-stars-kant-ankerous-varoufakis/,1,"February 20, 2015",Default In Our Stars: Kant-ankerous Varoufakis | The Math Of Politics,0
 21 | http://www.mathofpolitics.com/2014/10/29/on-the-possibility-of-an-ethical-election-experiment/,1,"October 29, 2014",On The Possibility of An Ethical Election Experiment | The Math Of Politics,0
 22 | http://www.mathofpolitics.com/2014/10/28/ethics-experiments-and-election-administration/,1,"October 28, 2014","Ethics, Experiments, and Election Administration | The Math Of Politics",0
 23 | http://www.mathofpolitics.com/2014/10/27/well-in-a-worst-case-scenario-your-treatment-works/,1,"October 27, 2014","Well, In a Worst Case Scenario, Your Treatment Works… | The Math Of Politics",4
 24 | http://www.mathofpolitics.com/page/3/,0,NA,NA,NA
 25 | http://www.mathofpolitics.com/2014/09/29/so-many-smells-so-little-time-in-defense-of-stinky-academic-writing/,1,"September 29, 2014","So Many Smells, So Little Time: In Defense of “Stinky” Academic Writing | The Math Of Politics",0
 26 | http://www.mathofpolitics.com/2014/08/20/ferguson-the-racial-disconnect-on-race/,1,"August 20, 2014",#Ferguson: The Racial Disconnect On Race | The Math Of Politics,2
 27 | http://www.mathofpolitics.com/2014/08/18/makes-us-stronger-the-math-of-protest-and-repression/,1,"August 18, 2014",Makes Us Stronger: The Math of Protest and Repression | The Math Of Politics,2
 28 | http://www.mathofpolitics.com/2014/08/06/the-bigger-the-data-the-harder-the-theory-of-measurement/,1,"August 6, 2014","The Bigger The Data, The Harder The (Theory of) Measurement  | The Math Of Politics",0
 29 | http://www.mathofpolitics.com/2014/07/30/the-math-of-getting-a-job-in-political-science/,1,"July 30, 2014",The Math of Getting a Job in Political Science | The Math Of Politics,0
 30 | http://www.mathofpolitics.com/2014/05/17/if-keyser-soze-ruled-america-would-we-know/,1,"May 17, 2014","If Keyser Söze Ruled America, Would We Know? | The Math Of Politics",0
 31 | http://www.mathofpolitics.com/2014/04/24/how-political-science-makes-politics-make-us-less-stupid/,1,"April 24, 2014",How Political Science Makes Politics Make Us Less Stupid | The Math Of Politics,0
 32 | http://www.mathofpolitics.com/2014/04/12/shining-a-little-more-light-on-transparency/,1,"April 12, 2014",Shining A Little More Light On Transparency | The Math Of Politics,0
 33 | http://www.mathofpolitics.com/2014/04/10/why-separate-when-you-can-lustrate/,1,"April 10, 2014",Why Separate When You Can…Lustrate!?! | The Math Of Politics,0
 34 | http://www.mathofpolitics.com/2014/04/10/how-transparency-could-harm-you-me-and-the-fomc/,1,"April 10, 2014","How Transparency Could Harm You, Me, and the FOMC | The Math Of Politics",0
 35 | http://www.mathofpolitics.com/page/4/,0,NA,NA,NA
 36 | http://www.mathofpolitics.com/2014/04/09/mind-the-gap-the-wages-of-aggregation-evaluation-and-conflict/,1,"April 9, 2014","Mind The Gap: The Wages of Aggregation, Evaluation, and Conflict | The Math Of Politics",0
 37 | http://www.mathofpolitics.com/2014/04/08/its-better-to-fight-when-you-can-win-or-at-least-look-like-you-did/,1,"April 8, 2014","It’s Better To Fight When You Can Win, Or At Least Look Like You Did | The Math Of Politics",0
 38 | http://www.mathofpolitics.com/2014/03/12/donation-discrimination-denotes-deliverance-of-democracy/,1,"March 12, 2014",Donation Discrimination Denotes Deliverance of Democracy  | The Math Of Politics,0
 39 | http://www.mathofpolitics.com/2014/04/08/my-ignorance-provokes-me-i-know-where-ukraine-is-and-i-still-want-to-fight/,1,"April 8, 2014",My Ignorance Provokes Me: I know Where Ukraine is and I Still Want to Fight | The Math Of Politics,2
 40 | http://www.mathofpolitics.com/2014/02/08/game-theory-is-punk-and-a-flying-f-to-the-q/,1,"February 8, 2014",Game Theory is Punk | The Math Of Politics,0
 41 | http://www.mathofpolitics.com/2014/02/06/speech-y-keen-or-why-nobody-worries-about-the-right-to-praise-the-government/,1,"February 6, 2014","Speech-y Keen, or Why Nobody Worries About the “Right to Praise the Government” | The Math Of Politics",1
 42 | http://www.mathofpolitics.com/2014/02/04/ceiling-the-deal-quid-pro-keystone/,1,"February 4, 2014",Ceiling the Deal: Quid Pro Keystone | The Math Of Politics,0
 43 | http://www.mathofpolitics.com/2014/02/02/ill-show-you-by-not-showing-up/,1,"February 2, 2014",I’ll Show You…By Not Showing Up | The Math Of Politics,0
 44 | http://www.mathofpolitics.com/2014/01/31/plumbing-presidential-power-pens-phones-paperwork/,1,"January 31, 2014","Plumbing Presidential Power: Pens, Phones, &amp; Paperwork | The Math Of Politics",0
 45 | http://www.mathofpolitics.com/2014/01/30/poor-work-counting-the-working-poor/,1,"January 30, 2014",Poor Work Counting the Working Poor | The Math Of Politics,1
 46 | http://www.mathofpolitics.com/page/5/,0,NA,NA,NA
 47 | http://www.mathofpolitics.com/2014/01/28/what-didnt-he-say-and-how-didnt-he-say-it/,1,"January 28, 2014",What Didn’t He Say? …And How Didn’t He Say it? | The Math Of Politics,0
 48 | http://www.mathofpolitics.com/2014/01/28/going-down-in-flames-to-rise-like-a-phoenix-in-the-primary/,1,"January 28, 2014",Going Down in Flames…To Rise Like A Phoenix (in the Primary) | The Math Of Politics,0
 49 | http://www.mathofpolitics.com/2014/01/23/so-what-now/,1,"January 23, 2014","So, What Now? | The Math Of Politics",2
 50 | http://www.mathofpolitics.com/2014/01/16/the-noted-is-always-notable/,1,"January 16, 2014",The Noted Is Always Notable | The Math Of Politics,0
 51 | http://www.mathofpolitics.com/2014/01/13/youre-welcome-for-the-thankless-thanks/,1,"January 13, 2014",You’re Welcome for the Thankless Thanks | The Math Of Politics,0
 52 | http://www.mathofpolitics.com/2014/01/10/ironic-quick-second-takes-on-sequential-rationality/,1,"January 10, 2014","Ironic, quick second takes on sequential rationality | The Math Of Politics",0
 53 | http://www.mathofpolitics.com/2014/01/10/oh-i-thought-you-said-you-wanted-to-sell-a-bus/,1,"January 10, 2014","Oh, I Thought You Said You Wanted To Sell A Bus… | The Math Of Politics",0
 54 | http://www.mathofpolitics.com/2013/12/20/cia-see-i-am-policy-relevant/,1,"December 20, 2013","CIA? See, I Am Policy Relevant | The Math Of Politics",0
 55 | http://www.mathofpolitics.com/2013/12/19/the-ties-that-bind-theory/,1,"December 19, 2013",The Ties That Bind Theory | The Math Of Politics,2
 56 | http://www.mathofpolitics.com/2013/12/05/mc-grammar-presents-u-shant-correct-this/,1,"December 5, 2013",MC Grammar Presents “U Shan’t Correct This” | The Math Of Politics,0
 57 | http://www.mathofpolitics.com/page/6/,0,NA,NA,NA
 58 | http://www.mathofpolitics.com/2013/10/25/inside-baseball-making-models-of-minds-making-models-behave/,1,"October 25, 2013","Inside Baseball: Making Models of Minds, Making Models “Behave” | The Math Of Politics",0
 59 | http://www.mathofpolitics.com/2013/10/21/youre-better-than-this/,1,"October 21, 2013",You’re Better Than This | The Math Of Politics,4
 60 | http://www.mathofpolitics.com/2013/10/20/let-me-confirm-your-belief-that-your-irrationality-is-rational/,1,"October 20, 2013",Let Me Confirm Your Belief That Your Irrationality Is Rational | The Math Of Politics,0
 61 | http://www.mathofpolitics.com/2013/10/18/no-seriously-that-was-such-a-bad-idea-we-must-do-it-again/,1,"October 18, 2013","No, Seriously, That Was Such A Bad Idea, WE MUST DO IT AGAIN | The Math Of Politics",2
 62 | http://www.mathofpolitics.com/2013/10/17/dis-spence-ing-with-the-debt-debacle/,1,"October 17, 2013",Dis-Spence-ing with the Debt Debacle | The Math Of Politics,4
 63 | http://www.mathofpolitics.com/2013/10/16/boehner-in-a-manger-the-entitativity-scene-in-dc/,1,"October 16, 2013",Boehner in a Manger? The Entitativity Scene in DC | The Math Of Politics,0
 64 | http://www.mathofpolitics.com/2013/10/15/my-bad-dispelling-the-implied-suspension-of-discharge/,1,"October 15, 2013",My Bad: Dispelling The Implied Suspension of Discharge | The Math Of Politics,0
 65 | http://www.mathofpolitics.com/2013/10/14/legerdeboehner-or-the-rules-rule/,1,"October 14, 2013","LegerdeBoehner, or “The Rules Rule.” | The Math Of Politics",0
 66 | http://www.mathofpolitics.com/2013/10/10/boehner-in-the-middle/,1,"October 10, 2013",Boehner in the Middle? | The Math Of Politics,0
 67 | http://www.mathofpolitics.com/2013/10/08/why-a-clean-cr-is-a-no-boehner/,1,"October 8, 2013",Why a Clean CR is A No Boehner | The Math Of Politics,0
 68 | http://www.mathofpolitics.com/page/7/,0,NA,NA,NA
 69 | http://www.mathofpolitics.com/2013/10/05/why-the-house-cant-discharge-its-duties/,1,"October 5, 2013",Why The House Can’t Discharge Its Duties | The Math Of Politics,0
 70 | http://www.mathofpolitics.com/2013/10/04/winning-at-all-costs-will-make-winning-costlier-than-it-needs-to-be/,1,"October 4, 2013",Winning At All Costs Will Make Winning Costlier (Than It Needs To Be) | The Math Of Politics,0
 71 | http://www.mathofpolitics.com/2013/10/03/shutdown-the-inherent-tension-between-responsive-responsible-governing/,1,"October 3, 2013",SHUTDOWN: The Inherent Tension Between Responsive &amp; Responsible Governing | The Math Of Politics,0
 72 | http://www.mathofpolitics.com/2013/09/21/putting-the-come-at-me-bro-in-comity-or-boehner-is-a-painer-to-mcconnell/,1,"September 21, 2013","Putting the “Come At Me, Bro” in “Comity” or, Boehner is a Painer to McConnell | The Math Of Politics",0
 73 | http://www.mathofpolitics.com/2013/09/20/the-politics-of-going-public/,1,"September 20, 2013",The Politics of Going Public | The Math Of Politics,1
 74 | http://www.mathofpolitics.com/2013/09/12/damn-he-asked-us-about-damascus-or-cjt-meets-wmd/,1,"September 12, 2013","Damn, He Asked US About Damascus or, ‘CJT Meets WMD’ | The Math Of Politics",0
 75 | http://www.mathofpolitics.com/2013/09/08/no-war-left-behind/,1,"September 8, 2013",No War Left Behind? | The Math Of Politics,0
 76 | http://www.mathofpolitics.com/2013/09/08/a-whip-applied-twice-is-half-a-whip/,1,"September 8, 2013",A Whip Applied Twice Is Half A Whip | The Math Of Politics,0
 77 | http://www.mathofpolitics.com/2013/09/06/if-you-whip-me-the-voters-will-whup-me/,1,"September 6, 2013","If You Whip Me, The Voters Will Whup Me | The Math Of Politics",4
 78 | http://www.mathofpolitics.com/2013/09/05/my-research-is-kind-of-obscene-but-i-knew-it-only-when-i-blogged-it/,1,"September 5, 2013",My Research Is Kind Of Obscene…But I Knew It Only When I Blogged It. | The Math Of Politics,1
 79 | http://www.mathofpolitics.com/page/8/,0,NA,NA,NA
 80 | http://www.mathofpolitics.com/2013/09/03/which-comes-first-theory-or-data/,1,"September 3, 2013","Which Comes First, Theory or Data? | The Math Of Politics",0
 81 | http://www.mathofpolitics.com/2013/08/16/there-is-no-networking-without-two-and-work-or-incentives-smelt-at-apsa/,1,"August 16, 2013","There is no Networking without “two” and “work” or, Incentives &amp; Smelt at APSA! | The Math Of Politics",0
 82 | http://www.mathofpolitics.com/2013/08/08/dont-panic-theory-and-empirics-are-both-alive-well-at-least-in-political-science/,1,"August 8, 2013",DON’T PANIC. Theory and Empirics Are Both Alive &amp; Well…at least in political science. | The Math Of Politics,0
 83 | http://www.mathofpolitics.com/2013/07/17/strength-numbers-is-a-weak-argument-better-than-a-strong-one/,1,"July 17, 2013",“Strength &amp; Numbers”: Is a Weak Argument Better Than A Strong One? | The Math Of Politics,0
 84 | http://www.mathofpolitics.com/2013/07/16/want-it-now-oh-well-give-it-to-you-later/,1,"July 16, 2013","Want It Now? Oh, We’ll Give It To You…Later | The Math Of Politics",0
 85 | http://www.mathofpolitics.com/2013/07/15/i-would-manipulate-it-if-it-werent-so-duggan-the-gibbardish-of-measurement/,1,"July 15, 2013",I Would Manipulate It If It Weren’t So Duggan: The Gibbardish of Measurement | The Math Of Politics,0
 86 | http://www.mathofpolitics.com/2013/07/12/a-byrd-in-the-hand-or-the-3-rs-of-the-senate-reid-rules-retribution/,1,"July 12, 2013","A Byrd in the Hand, or the 3 R’s of the Senate: Reid, Rules, &amp; Retribution | The Math Of Politics",0
 87 | http://www.mathofpolitics.com/2013/07/10/remuneration-of-the-nerds-or-putting-the-in-latex/,1,"July 10, 2013","Remuneration Of The Nerds, Or “Putting the $$ in LaTeX” | The Math Of Politics",0
 88 | http://www.mathofpolitics.com/2013/07/08/syllogism-i-hardly-know-him-the-uneasy-wedding-of-gay-marriage-political-conservativism/,1,"July 8, 2013",“Syllogism?  I Hardly Know Him!”: The Uneasy Wedding of Gay Marriage &amp; (Political) Conservativism | The Math Of Politics,0
 89 | http://www.mathofpolitics.com/2013/06/28/believe-me-when-i-say-that-i-want-to-believe-that-i-cant-believe-in-you/,1,"June 28, 2013",Believe Me When I Say That I Want To Believe That I Can’t Believe In You. | The Math Of Politics,0
 90 | http://www.mathofpolitics.com/page/9/,0,NA,NA,NA
 91 | http://www.mathofpolitics.com/2013/06/02/just-so-you-know-i-wont-know-the-politics-of-plausible-deniability/,1,"June 2, 2013","Just So You Know, I Won’t Know: The Politics of Plausible Deniability | The Math Of Politics",0
 92 | http://www.mathofpolitics.com/2013/05/29/uninsurable-risk-adverse-selection-and-the-politics-of-scandals/,1,"May 29, 2013",Uninsurable Risk: Adverse Selection and the Politics of Scandals | The Math Of Politics,0
 93 | http://www.mathofpolitics.com/2013/05/20/inside-baseball-weather-you-like-it-or-not-models-are-useful/,1,"May 20, 2013","Inside Baseball: Weather you like it or not, models are useful. | The Math Of Politics",0
 94 | http://www.mathofpolitics.com/2013/05/06/the-impermissibility-of-permission-structures/,1,"May 6, 2013",The Impermissibility of Permission Structures | The Math Of Politics,0
 95 | http://www.mathofpolitics.com/2013/04/22/unraveling-miranda-was-dzhokhar-told-of-the-public-safety-exception/,1,"April 22, 2013",Unraveling Miranda: Was Dzhokhar Told of the Public Safety Exception? | The Math Of Politics,0
 96 | http://www.mathofpolitics.com/2013/04/18/political-issues-are-like-cookies/,1,"April 18, 2013",Political Issues are Like Cookies | The Math Of Politics,0
 97 | http://www.mathofpolitics.com/2013/04/18/have-gun-will-vote/,1,"April 18, 2013","Have Gun, Will Vote | The Math Of Politics",0
 98 | http://www.mathofpolitics.com/2013/04/17/inside-baseball-the-off-the-path-less-traveled/,1,"April 17, 2013",Inside Baseball: The Off-The-Path Less Traveled | The Math Of Politics,0
 99 | http://www.mathofpolitics.com/2013/04/15/now-ill-show-you-mine-why-obama-budged-a-bit-on-the-budget/,1,"April 15, 2013","Now, I’ll Show You Mine: Why Obama Budged A Bit on the Budget | The Math Of Politics",0
100 | http://www.mathofpolitics.com/2013/04/02/inequality-smaller-ginis-can-fit-in-smaller-bottles/,1,"April 2, 2013",Inequality: Smaller GINIs Can Fit in Smaller Bottles | The Math Of Politics,0
101 | http://www.mathofpolitics.com/page/10/,0,NA,NA,NA
102 | http://www.mathofpolitics.com/2013/04/01/inside-baseball-uncommon-knowledge/,1,"April 1, 2013",Inside Baseball: Uncommon Knowledge | The Math Of Politics,0
103 | http://www.mathofpolitics.com/2013/03/24/the-slow-burn-of-coburn-or-get-the-hell-off-my-lawn/,1,"March 24, 2013","The Slow Burn of Coburn or, “Get The Hell Off My Lawn!” | The Math Of Politics",0
104 | http://www.mathofpolitics.com/2013/03/19/consensual-resolution/,1,"March 19, 2013",Consensual Resolution?  | The Math Of Politics,0
105 | http://www.mathofpolitics.com/2013/03/17/quid-pro-status-quo-a-tale-of-two-tails/,1,"March 17, 2013",Quid Pro Status Quo: A Tale of Two Tails | The Math Of Politics,0
106 | http://www.mathofpolitics.com/2013/03/16/showdown-at-uzi-gulch-putting-the-glock-in-the-spiel/,1,"March 16, 2013",Showdown at Uzi Gulch: Putting the Glock in the Spiel | The Math Of Politics,0
107 | http://www.mathofpolitics.com/2013/03/07/greshams-law-in-the-senate-how-filibuster-reform-begot-rand-pauls-filibuster/,1,"March 7, 2013",Gresham’s Law in the Senate: How Filibuster Reform Begot Rand Paul’s Filibuster | The Math Of Politics,0
108 | http://www.mathofpolitics.com/2013/02/21/i-study-political-science-youre-welcome/,1,"February 21, 2013",I Study Political Science.  You’re Welcome. | The Math Of Politics,0
109 | http://www.mathofpolitics.com/2013/02/17/immigration-reform-you-do-it-so-i-dont-have-to-really/,1,"February 17, 2013",Immigration Reform: You do it…So I Don’t Have To…Really. | The Math Of Politics,0
110 | http://www.mathofpolitics.com/2013/02/06/who-has-a-secret-list-and-flies-around-the-world-with-gifts/,1,"February 6, 2013",Who Has A Secret List and Flies Around The World With Gifts? | The Math Of Politics,0
111 | http://www.mathofpolitics.com/2013/02/02/so-optimal-you-hardly-notice/,1,"February 2, 2013",So Optimal You Hardly Notice | The Math Of Politics,0
112 | http://www.mathofpolitics.com/page/11/,0,NA,NA,NA
113 | http://www.mathofpolitics.com/2013/01/27/the-recesses-of-recess/,1,"January 27, 2013",The Recesses of Recess | The Math Of Politics,0
114 | http://www.mathofpolitics.com/2012/12/21/losing-to-win-nobody-puts-boehner-in-the-corner/,1,"December 21, 2012",Losing to Win: Nobody Puts Boehner In The Corner | The Math Of Politics,0
115 | http://www.mathofpolitics.com/2012/12/21/apocacliff-now-boehner-lost-but-does-he-really-mayan/,1,"December 21, 2012","ApocaCliff Now: Boehner “Lost,” But Does He Really Mayan? | The Math Of Politics",0
116 | http://www.mathofpolitics.com/2012/12/14/make-me-an-offer-i-cant-refuse-to-reject/,1,"December 14, 2012",Make Me an Offer I Can’t Refuse (to Reject) | The Math Of Politics,0
117 | http://www.mathofpolitics.com/2012/12/12/the-triple-ex-budget-trick-or-the-alternative-maximum-cliff/,1,"December 12, 2012","The Triple-Ex Budget Trick or, the Alternative Maximum Cliff | The Math Of Politics",0
118 | http://www.mathofpolitics.com/2012/11/21/naming-rites/,1,"November 21, 2012",Naming Rites | The Math Of Politics,0
119 | http://www.mathofpolitics.com/2012/11/16/churches-campaigns-and-taxes-the-411-on-501c3/,1,"November 16, 2012","Churches, Campaigns, and Taxes: The 411 on 501(c)(3) | The Math Of Politics",0
120 | http://www.mathofpolitics.com/2012/11/16/lets-get-fiscal-cliff/,1,"November 16, 2012","Let’s Get Fiscal, Cliff! | The Math Of Politics",0
121 | http://www.mathofpolitics.com/2012/09/10/penetrating-the-ill-logic-of-double-taxation/,1,"September 10, 2012",Penetrating the Ill Logic of Double Taxation | The Math Of Politics,0
122 | http://www.mathofpolitics.com/2012/08/06/political-antisocial-dismal-science-economics-getting-cut-next/,1,"August 6, 2012","Political, Antisocial, Dismal Science: Economics Getting Cut Next? | The Math Of Politics",0
123 | http://www.mathofpolitics.com/page/12/,0,NA,NA,NA
124 | http://www.mathofpolitics.com/2012/08/02/keeping-tract-is-income-segregation-getting-worse-in-the-us/,1,"August 2, 2012",Keeping Tract: Is Income Segregation Getting Worse in the US? | The Math Of Politics,0
125 | http://www.mathofpolitics.com/2012/07/31/vitali-statistics-measurability-issues-in-education/,1,"July 31, 2012",Vitali Statistics: Measurability Issues in Education | The Math Of Politics,0
126 | http://www.mathofpolitics.com/2012/07/29/47/,1,"July 29, 2012","But, Algebra is f(u)=n!  | The Math Of Politics",0
127 | http://www.mathofpolitics.com/2012/07/25/regulatory-rithmetic/,1,"July 25, 2012",Regulatory ‘Rithmetic  | The Math Of Politics,0
128 | http://www.mathofpolitics.com/2012/07/21/debits-and-credits-simple-budget-algebra/,1,"July 21, 2012",Debits and Credits: Simple Budget Algebra | The Math Of Politics,0
129 | 


--------------------------------------------------------------------------------
/in-classMaterial/day5/scrape.py:
--------------------------------------------------------------------------------
  1 | from bs4 import BeautifulSoup
  2 | import urllib 
  3 | import random
  4 | import time
  5 | import os
  6 | import re
  7 | 
  8 | # Open a web page
  9 | web_address='https://case.ku.edu.tr/en/academics/international-relations/faculty/'
 10 | web_page = urllib.request.urlopen(web_address)
 11 | 
 12 | # Parse it
 13 | soup = BeautifulSoup(web_page.read())
 14 | soup.prettify()
 15 | 
 16 | # Find all cases of a certain tag
 17 | soup.find_all('a')
 18 | 
 19 | 
 20 | # Get the attributes
 21 | my_a_tag=soup.find_all('a')[2]
 22 | re.sub(r'<[^>]+>', '', str(my_a_tag)) #remove tags
 23 | my_a_tag.attrs #Gives a dictionary with the attributes
 24 | my_a_tag.attrs.keys()
 25 | my_a_tag['href']
 26 | 
 27 | # Refine search by using attributes
 28 | soup.find_all('span', {'class':'name'})
 29 | 
 30 | # There may be tags within tags
 31 | mysection=soup.find_all('div')[0]
 32 | mysection.a #Gives the 'a' tag within the 'div' tag
 33 | mysection.find_all('a') #Gives the list of all 'a' tags within the 'div' tag
 34 | mysection.get_text()
 35 | 
 36 | 
 37 | # Creating a tree of objects
 38 | 
 39 | mysection.contents #Gives a list of all children
 40 | mysection.children #Creates an iterator for children
 41 | 
 42 | for child in mysection.children:
 43 | 	print(child)
 44 | 
 45 | mysection.descendants #Creates an iterator for children, grandchildren, etc.
 46 | 
 47 | # Other methods to check family:
 48 | # parent
 49 | # parents
 50 | # next_siblings
 51 | # previous_siblings
 52 | 
 53 | # Beautiful Soup documentation
 54 | # http://www.crummy.com/software/BeautifulSoup/bs4/doc/
 55 | 
 56 | # Function to save a web page
 57 | 
 58 | def download_page(address,path,filename,wait=5):
 59 | 	time.sleep(random.uniform(0,wait))
 60 | 	page = urllib.request.urlopen(address)
 61 | 	page_content = page.read()
 62 | 	if os.path.exists(path+filename)==False:
 63 | 		with open(path+filename, 'wb') as p_html:
 64 | 			p_html.write(page_content)
 65 | 	else:
 66 | 		print("Can't overwrite file" + filename)
 67 | 
 68 | download_page('http://www.crummy.com/software/BeautifulSoup/bs4/doc/', '', 'Docket05-1.html',0)
 69 | 
 70 | #You can also parse a page that is saved on your computer
 71 | with open('Docket05-1.html') as f:
 72 |   #We can read files in chunks
 73 |   myfile = f.read()
 74 |   
 75 | soup = BeautifulSoup(myfile)
 76 | soup.prettify()
 77 | 
 78 | #Scrape the names and email addresses of INTL faculty and save the result as a csv
 79 | web_address='https://case.ku.edu.tr/en/academics/international-relations/faculty/'
 80 | web_page = urllib.request.urlopen(web_address)
 81 | 
 82 | # Parse it
 83 | soup = BeautifulSoup(web_page.read())
 84 | soup.prettify()
 85 | 
 86 | namesTags = soup.find_all('span', {'class':'name'})
 87 | names = []
 88 | for name in namesTags:
 89 |   names.append(name.get_text())
 90 |   
 91 | emailsTags = soup.find_all('a', {'class':'message'})
 92 | emails = []
 93 | for email in emailsTags:
 94 |   emails.append(re.sub('mailto:', '', email['href']))
 95 | 
 96 | with open('faculty.csv', 'w') as f:
 97 |   my_writer = csv.DictWriter(f, fieldnames=("Name", "Email"))
 98 |   my_writer.writeheader()
 99 |   for i in range(len(names)):
100 |     my_writer.writerow({"Name":names[i], "Email":emails[i]})
101 | 
102 | 


--------------------------------------------------------------------------------
/in-classMaterial/day5/test.csv:
--------------------------------------------------------------------------------
  1 | 1,0
  2 | 2,1
  3 | 3,2
  4 | 4,3
  5 | 5,4
  6 | 6,5
  7 | 7,6
  8 | 8,7
  9 | 9,8
 10 | 10,9
 11 | 11,10
 12 | 12,11
 13 | 13,12
 14 | 14,13
 15 | 15,14
 16 | 16,15
 17 | 17,16
 18 | 18,17
 19 | 19,18
 20 | 20,19
 21 | 21,20
 22 | 22,21
 23 | 23,22
 24 | 24,23
 25 | 25,24
 26 | 26,25
 27 | 27,26
 28 | 28,27
 29 | 29,28
 30 | 30,29
 31 | 31,30
 32 | 32,31
 33 | 33,32
 34 | 34,33
 35 | 35,34
 36 | 36,35
 37 | 37,36
 38 | 38,37
 39 | 39,38
 40 | 40,39
 41 | 41,40
 42 | 42,41
 43 | 43,42
 44 | 44,43
 45 | 45,44
 46 | 46,45
 47 | 47,46
 48 | 48,47
 49 | 49,48
 50 | 50,49
 51 | 51,50
 52 | 52,51
 53 | 53,52
 54 | 54,53
 55 | 55,54
 56 | 56,55
 57 | 57,56
 58 | 58,57
 59 | 59,58
 60 | 60,59
 61 | 61,60
 62 | 62,61
 63 | 63,62
 64 | 64,63
 65 | 65,64
 66 | 66,65
 67 | 67,66
 68 | 68,67
 69 | 69,68
 70 | 70,69
 71 | 71,70
 72 | 72,71
 73 | 73,72
 74 | 74,73
 75 | 75,74
 76 | 76,75
 77 | 77,76
 78 | 78,77
 79 | 79,78
 80 | 80,79
 81 | 81,80
 82 | 82,81
 83 | 83,82
 84 | 84,83
 85 | 85,84
 86 | 86,85
 87 | 87,86
 88 | 88,87
 89 | 89,88
 90 | 90,89
 91 | 91,90
 92 | 92,91
 93 | 93,92
 94 | 94,93
 95 | 95,94
 96 | 96,95
 97 | 97,96
 98 | 98,97
 99 | 99,98
100 | 


--------------------------------------------------------------------------------
/in-classMaterial/day5/test1.csv:
--------------------------------------------------------------------------------
  1 | 1,0
  2 | 2,1
  3 | 3,2
  4 | 4,3
  5 | 5,4
  6 | 6,5
  7 | 7,6
  8 | 8,7
  9 | 9,8
 10 | 10,9
 11 | 11,10
 12 | 12,11
 13 | 13,12
 14 | 14,13
 15 | 15,14
 16 | 16,15
 17 | 17,16
 18 | 18,17
 19 | 19,18
 20 | 20,19
 21 | 21,20
 22 | 22,21
 23 | 23,22
 24 | 24,23
 25 | 25,24
 26 | 26,25
 27 | 27,26
 28 | 28,27
 29 | 29,28
 30 | 30,29
 31 | 31,30
 32 | 32,31
 33 | 33,32
 34 | 34,33
 35 | 35,34
 36 | 36,35
 37 | 37,36
 38 | 38,37
 39 | 39,38
 40 | 40,39
 41 | 41,40
 42 | 42,41
 43 | 43,42
 44 | 44,43
 45 | 45,44
 46 | 46,45
 47 | 47,46
 48 | 48,47
 49 | 49,48
 50 | 50,49
 51 | 51,50
 52 | 52,51
 53 | 53,52
 54 | 54,53
 55 | 55,54
 56 | 56,55
 57 | 57,56
 58 | 58,57
 59 | 59,58
 60 | 60,59
 61 | 61,60
 62 | 62,61
 63 | 63,62
 64 | 64,63
 65 | 65,64
 66 | 66,65
 67 | 67,66
 68 | 68,67
 69 | 69,68
 70 | 70,69
 71 | 71,70
 72 | 72,71
 73 | 73,72
 74 | 74,73
 75 | 75,74
 76 | 76,75
 77 | 77,76
 78 | 78,77
 79 | 79,78
 80 | 80,79
 81 | 81,80
 82 | 82,81
 83 | 83,82
 84 | 84,83
 85 | 85,84
 86 | 86,85
 87 | 87,86
 88 | 88,87
 89 | 89,88
 90 | 90,89
 91 | 91,90
 92 | 92,91
 93 | 93,92
 94 | 94,93
 95 | 95,94
 96 | 96,95
 97 | 97,96
 98 | 98,97
 99 | 99,98
100 | 


--------------------------------------------------------------------------------
/in-classMaterial/day5/test_with_fields.csv:
--------------------------------------------------------------------------------
  1 | A,B
  2 | 0,1
  3 | 1,2
  4 | 2,3
  5 | 3,4
  6 | 4,5
  7 | 5,6
  8 | 6,7
  9 | 7,8
 10 | 8,9
 11 | 9,10
 12 | 10,11
 13 | 11,12
 14 | 12,13
 15 | 13,14
 16 | 14,15
 17 | 15,16
 18 | 16,17
 19 | 17,18
 20 | 18,19
 21 | 19,20
 22 | 20,21
 23 | 21,22
 24 | 22,23
 25 | 23,24
 26 | 24,25
 27 | 25,26
 28 | 26,27
 29 | 27,28
 30 | 28,29
 31 | 29,30
 32 | 30,31
 33 | 31,32
 34 | 32,33
 35 | 33,34
 36 | 34,35
 37 | 35,36
 38 | 36,37
 39 | 37,38
 40 | 38,39
 41 | 39,40
 42 | 40,41
 43 | 41,42
 44 | 42,43
 45 | 43,44
 46 | 44,45
 47 | 45,46
 48 | 46,47
 49 | 47,48
 50 | 48,49
 51 | 49,50
 52 | 50,51
 53 | 51,52
 54 | 52,53
 55 | 53,54
 56 | 54,55
 57 | 55,56
 58 | 56,57
 59 | 57,58
 60 | 58,59
 61 | 59,60
 62 | 60,61
 63 | 61,62
 64 | 62,63
 65 | 63,64
 66 | 64,65
 67 | 65,66
 68 | 66,67
 69 | 67,68
 70 | 68,69
 71 | 69,70
 72 | 70,71
 73 | 71,72
 74 | 72,73
 75 | 73,74
 76 | 74,75
 77 | 75,76
 78 | 76,77
 79 | 77,78
 80 | 78,79
 81 | 79,80
 82 | 80,81
 83 | 81,82
 84 | 82,83
 85 | 83,84
 86 | 84,85
 87 | 85,86
 88 | 86,87
 89 | 87,88
 90 | 88,89
 91 | 89,90
 92 | 90,91
 93 | 91,92
 94 | 92,93
 95 | 93,94
 96 | 94,95
 97 | 95,96
 98 | 96,97
 99 | 97,98
100 | 98,99
101 | 


--------------------------------------------------------------------------------
/in-classMaterial/day5/urlparsing.py:
--------------------------------------------------------------------------------
 1 | from urllib import *
 2 | 
 3 | url1 = urllib.parse.urljoin("http://www.wustl.edu", "bob/test.html")
 4 | url2 = urllib.parse.urljoin("http://www.wustl.edu", "/")
 5 | url3 = urllib.parse.urljoin("http://www.wustl.edu", "http://www.cnn.com")
 6 | url4 = urllib.parse.urljoin("http://www.wustl.edu", "http://www.cnn.com/test.html")
 7 | 
 8 | for url in [url1, url2, url3, url4]:
 9 |   p = urllib.parse.urlsplit(url)
10 |   print("{0}://{1}{2}: {3}".format(p.scheme, p.hostname, p.path, "is wustl" if (p.hostname == "www.wustl.edu") else "is not wustl"))
11 |   
12 | #go to a webpage and extract all links. then filter which ones are of the same host
13 | web_address='https://case.ku.edu.tr/en/academics/international-relations/faculty/'
14 | web_page = urllib.request.urlopen(web_address)
15 | 
16 | # Parse it
17 | soup = BeautifulSoup(web_page.read())
18 | soup.prettify()
19 | 
20 | linksTags = soup.find_all('a')
21 | links = []
22 | for link in linksTags:
23 |   links.append(link['href'])
24 | 
25 | for url in links:
26 |   p = urllib.parse.urlsplit(url)
27 |   print("{0}://{1}{2}: {3}".format(p.scheme, p.hostname, p.path, "is KU" if (p.hostname == "www.ku.edu.tr") else "is not KU"))
28 | 
29 | 
30 | 


--------------------------------------------------------------------------------
/in-classMaterial/day5/webcrawler.py:
--------------------------------------------------------------------------------
 1 | from bs4 import BeautifulSoup
 2 | import csv 
 3 | from nltk.util import clean_html
 4 | import urllib
 5 | import re
 6 | 
 7 | def webcrawler(csvwriter, page_to_scrape = 'http://www.mathofpolitics.com'):
 8 |     webpage = urllib.request.urlopen(page_to_scrape)
 9 |     soup = BeautifulSoup(webpage.read(), 'lxml')
10 |     soup.prettify()
11 |     links=[]
12 |     for link in soup.findAll('a'):
13 |       try:
14 |         if link['rel']==['bookmark']: #if link is a bookmark, points to a blog post
15 |             links.append(link['href'])
16 |       except KeyError:
17 |         pass
18 |     links2 = []
19 |     for link in links: #get rid of duplicates
20 |         if link not in links2:
21 |             links2.append(link)
22 |     csvwriter.writerow([page_to_scrape, 0, 'NA', 'NA', 'NA']) #if there are bookmark links on the page, it is not a post page, so all entries are NA
23 |     for link in links2:
24 |         getInfo(csvwriter, str(link)) #get the info for each blog post
25 |     prev_div = soup.findAll('div', attrs = {'class':'nav-previous'})[0] #checks for older posts link on nonblog post pages
26 |     if prev_div.findAll('a'): #if it contains a link
27 |         webcrawler(csvwriter, str(prev_div.findAll('a')[0]['href'])) #recursively run this function with older post link
28 | 
29 | def getInfo(csvwriter, page_to_scrape):
30 |     webpage = urllib.request.urlopen(page_to_scrape)
31 |     soup = BeautifulSoup(webpage.read(), 'lxml')
32 |     soup.prettify()
33 |     date = soup.findAll('time', attrs={'class':'entry-date'})[0] #find time
34 |     date = re.sub(r'<[^>]+>', '', str(date)) #remove tags
35 |     title = soup.findAll('title')[0] #find title of blog post
36 |     title = re.sub(r'<[^>]+>', '', str(title)) #remove tags
37 |     comment_count = len(soup.findAll('div', attrs={'class':'comment-content'})) #counts number of comments - all div of class comment-content
38 |     csvwriter.writerow([page_to_scrape, 1, date, title, comment_count]) #add row
39 | 
40 | headers = ["url", "is_post", "publish_date", "post_title", "comment_count"] #header
41 | filename = "mathofpolitics.csv"
42 | readFile = open(filename, "w")
43 | csvwriter = csv.writer(readFile)
44 | csvwriter.writerow(headers)
45 | webcrawler(csvwriter)
46 | readFile.flush()
47 | readFile.close()
48 | 
49 | 


--------------------------------------------------------------------------------
/in-classMaterial/day5/whitehouse-petitions.csv:
--------------------------------------------------------------------------------
1 | Summary,Signatures
2 | 


--------------------------------------------------------------------------------
/in-classMaterial/day5/whitehouse.py:
--------------------------------------------------------------------------------
  1 | # Scraper to collect petition info from petitions.whitehouse.gov
  2 | 
  3 | from bs4 import BeautifulSoup
  4 | import csv 
  5 | from nltk.util import clean_html
  6 | import urllib 
  7 | import re
  8 | 
  9 | # What page? 
 10 | page_to_scrape = 'https://petitions.whitehouse.gov/'
 11 | 
 12 | # What info do we want? 
 13 | headers = ["Summary", "Signatures"]
 14 | 
 15 | # Where do we save info?
 16 | filename = "whitehouse-petitions.csv"
 17 | readFile = open(filename, "w")
 18 | csvwriter = csv.writer(readFile)
 19 | csvwriter.writerow(headers)
 20 | 
 21 | # Open webpage
 22 | webpage = urllib.request.urlopen(page_to_scrape)
 23 | 
 24 | # Parse it
 25 | soup = BeautifulSoup(webpage.read())
 26 | soup.prettify()
 27 | 
 28 | # Extract petitions on page
 29 | petitions = soup.findAll("a", href=re.compile('^/petition'))
 30 | 
 31 | print(len(petitions))
 32 | for petition in petitions:
 33 |   p = BeautifulSoup.get_text(petition)
 34 |   print(p)
 35 |   
 36 | pets = []  
 37 | for petition in petitions:
 38 |   p = BeautifulSoup.get_text(petition)
 39 |   if 'Sign It' not in p and 'Create a Petition' not in p and 'Load More' not in p: pets.append(p)
 40 | 
 41 | #signatures
 42 | #html tag:
 43 | #<span class="signatures-number">364,223</span>
 44 | signatures = soup.findAll("span", attrs={'class':'signatures-number'})
 45 | print(len(signatures))
 46 | sigs = []
 47 | for signature in signatures:
 48 |   s = BeautifulSoup.get_text(signature)
 49 |   sigs.append(s)
 50 | 
 51 | for i in range(20):
 52 |   csvwriter.writerow([pets[i], sigs[i]])
 53 | 
 54 | readFile.close()
 55 | 
 56 | #change this file to loop through all pages and scrape every petition (hint: look at the url of the page when you click load more)
 57 | #then add a third column for goal, and a fourth for percentage of goal reached
 58 | 
 59 | 
 60 | 
 61 | 
 62 | 
 63 | # Scraper to collect petition info from petitions.whitehouse.gov
 64 | 
 65 | from bs4 import BeautifulSoup
 66 | import csv 
 67 | from nltk.util import clean_html
 68 | import urllib 
 69 | import re
 70 | 
 71 | # What page? 
 72 | page_to_scrape = 'https://petitions.whitehouse.gov/'
 73 | 
 74 | # What info do we want? 
 75 | headers = ["Summary", "Signatures", "Goal", "Prop"]
 76 | 
 77 | # Where do we save info?
 78 | filename = "whitehouse-petitions.csv"
 79 | readFile = open(filename, "w")
 80 | csvwriter = csv.writer(readFile)
 81 | csvwriter.writerow(headers)
 82 | 
 83 | # Open webpage
 84 | webpage = urllib.request.urlopen(page_to_scrape)
 85 | 
 86 | # Parse it
 87 | soup = BeautifulSoup(webpage.read())
 88 | soup.prettify()
 89 | 
 90 | # Extract petitions on page
 91 | petitions = soup.findAll("a", href=re.compile('^/petition'))
 92 | 
 93 | print(len(petitions))
 94 | for petition in petitions:
 95 |   p = BeautifulSoup.get_text(petition)
 96 |   print(p)
 97 |   
 98 | pets = []  
 99 | for petition in petitions:
100 |   p = BeautifulSoup.get_text(petition)
101 |   if 'Sign It' not in p and 'Create a Petition' not in p and 'Load More' not in p: pets.append(p)
102 | 
103 | #signatures
104 | #html tag:
105 | #<span class="signatures-number">364,223</span>
106 | #<div class="goal-text-container"><span class="goal">100,000</span>
107 | signatures = soup.findAll("span", attrs={'class':'signatures-number'})
108 | goals = soup.findAll("div", attrs={'class':'goal-text-container'})
109 | print(len(signatures))
110 | print(len(goals))
111 | sigs = []
112 | gls = []
113 | props = []
114 | for i in range(len(signatures)):
115 |   s = BeautifulSoup.get_text(signatures[i])
116 |   sigs.append(s)
117 |   g = BeautifulSoup.get_text(goals[i])
118 |   g = re.sub(',', '', g)
119 |   g = re.sub('\ngoal\n', '', g)
120 |   gls.append(g)
121 |   props.append(float(re.sub(',' , '', s))/float(g))
122 | 
123 | i = 1
124 | while True:
125 |   try:
126 |     new_webpage = page_to_scrape + '?page=' + str(i)
127 |     #do above
128 |     i += 1
129 |   except: break 
130 | 
131 | for i in range(len(sigs):
132 |   csvwriter.writerow([pets[i], sigs[i], gls[i], props[i])
133 | 
134 | readFile.close()
135 | 
136 | 


--------------------------------------------------------------------------------
/in-classMaterial/day5/writefile.txt:
--------------------------------------------------------------------------------
1 | Hi guys.Does this go on the second line?abc
2 | I got appended!


--------------------------------------------------------------------------------
/in-classMaterial/day6/geo.py:
--------------------------------------------------------------------------------
 1 | from geopy.geocoders import Nominatim
 2 | geolocator = Nominatim(user_agent = 'dcarlson@ku.edu.tr') #use your email address
 3 | location = geolocator.geocode('Washington, DC')
 4 | print(location.address)
 5 | print((location.latitude, location.longitude))
 6 | 
 7 | location2 = geolocator.geocode('Mexico City')
 8 | print(location2.address)
 9 | print((location2.latitude, location2.longitude))
10 | 
11 | #distance between capitals
12 | from math import radians, sin, cos, acos
13 | 
14 | def distance(loc1, loc2):
15 | 	return 6371.01 * acos(sin(radians(loc1.latitude))*sin(radians(loc2.latitude)) + cos(radians(loc1.latitude))*cos(radians(loc2.latitude))*cos(radians(loc1.longitude) - radians(loc2.longitude)))
16 | 	
17 | distance(location, location2)
18 | 
19 | #TODO: Create a distance matrix of 5 capitals
20 | 
21 | locations = []
22 | locations.append(geolocator.geocode('Washington, DC'))
23 | locations.append(geolocator.geocode('Mexico City'))
24 | locations.append(geolocator.geocode('Sarajevo'))
25 | locations.append(geolocator.geocode('Ankara'))
26 | locations.append(geolocator.geocode('Beijing'))
27 | 
28 | Matrix = [[0 for x in range(5)] for y in range(5)]
29 | 
30 | for x in range(5):
31 | 	for y in range(5):
32 | 		Matrix[x][y] = distance(locations[x], locations[y])
33 | 		
34 | 
35 | 


--------------------------------------------------------------------------------
/in-classMaterial/day6/tweepy.py:
--------------------------------------------------------------------------------
 1 | import tweepy
 2 | auth = tweepy.OAuthHandler('your consumer key', '')
 3 | auth.set_access_token('your consumer secret', '')    
 4 | api = tweepy.API(auth)
 5 | 
 6 | #See rate limit
 7 | api.rate_limit_status()
 8 | 
 9 | 
10 | 
11 | #Get some users
12 | mike_ward = api.get_user('3876')
13 | 
14 | #How many favorites does he have?
15 | mike_ward.favourites_count
16 | 
17 | #Who does Mike follow?
18 | mikes_friends = api.friends(id=mike_ward.screen_name)
19 | for f in mikes_friends:
20 |   #Note I am handling UTF encoded strings so I convert them to ASCII-compatible for macs
21 |     print("{0}".format(f.screen_name.encode('ascii', 'ignore')))
22 |         
23 | mikes_friends = api.friends(id=mike_ward.screen_name)
24 | for f in mikes_friends:
25 |   #Note I am handling UTF encoded strings for linux
26 |         print("{0}".format(f.screen_name.encode('utf', 'ignore')))
27 |         
28 |         
29 | #or get info from a screen name
30 | gary_king = api.get_user('kinggary')
31 | gary_friends = api.friends(id=gary_king.screen_name)
32 | for f in gary_friends:
33 |   #Note I am handling UTF encoded strings so I convert them to ASCII-compatible for macs
34 |     print("{0}".format(f.screen_name.encode('ascii', 'ignore')))
35 | 
36 | 
37 | import time
38 | from datetime import timedelta
39 | 
40 | followers = api.followers_ids('davidgcarlson') # Extract IDs for my followers.
41 | followers_count = 0 # Creating baseline of 0 followers.
42 | i=0
43 | while i<len(followers): # Code below loops through all users who follow me, and continues to update who stored as the "most_followed" as loop runs.
44 |     try:
45 |         user = api.get_user(followers[i])
46 |         if user.followers_count > followers_count:
47 |             followers_count = user.followers_count
48 |             most_followed = str(user.name)
49 |         i+=1
50 |     except: time.sleep(.25) # Makes request every 0.25 seconds. Should we hit the limit, waits 0.25 before making another request. Permits for loop to remain active until limit is reset.
51 | 
52 | 
53 | followed = api.friends_ids('mcdickenson') # Extract IDs for those users Matt is following.
54 | i = 0
55 | max_tweets = 0 # Creating baseline for number of tweets.
56 | while i<len(followed): # Code below counts total tweets of each followed
57 |     try:
58 |         user = api.get_user(followed[i])
59 |         tweets = user.statuses_count
60 |         if max_tweets < tweets:
61 |             max_tweets = tweets
62 |             most_active = str(user.name)
63 |         i+=1
64 |     except: time.sleep(.25) # Makes request every 0.25 seconds. Should we hit the limit, waits 0.25 before making another request. Permits for loop to remain active until limit is reset.
65 | 
66 | print(most_followed)
67 | print(most_active)
68 | 
69 | #TODO: pick a user that is not too active. create a network of followers, followers of the followers, etc. until you find the same user - note how many levels you have to go down
70 | 
71 | 


--------------------------------------------------------------------------------
/in-classMaterial/day6/twint.py:
--------------------------------------------------------------------------------
 1 | import twint
 2 | 
 3 | c = twint.Config()
 4 | c.Username = "Jacob_Montg" #user name to search under
 5 | c.Links = "include" #return tweets sent by user containing links
 6 | twint.run.Search(c)
 7 | 
 8 | c = twint.Config()
 9 | c.Search = "medicare for all"
10 | c.Min_likes = 5 #only return tweets that have at least 5 likes
11 | twint.run.Search(c)
12 | 
13 | #search for up to 100 tweets by ZiyaOnis that were written in Turkish, translate them to English, and save to a csv
14 | c = twint.Config()
15 | c.Username = "ZiyaOnis"
16 | c.Limit = 100
17 | c.Store_csv = True
18 | c.Output = "KocPython2020/in-classMaterial/day6/ziya.csv"
19 | c.Lang = "tr"
20 | c.Translate = True
21 | c.TranslateDest = "en"
22 | twint.run.Search(c)
23 | 


--------------------------------------------------------------------------------
/in-classMaterial/day6/wb.py:
--------------------------------------------------------------------------------
 1 | #ease of business
 2 | 
 3 | import wbdata
 4 | wbdata.get_source()
 5 | wbdata.get_indicator(source=1)
 6 | #get country codes with a search
 7 | wbdata.search_countries('Turkey') #TUR
 8 | #get indicators with a search
 9 | wbdata.search_indicators('ease of doing business') #IC.BUS.DFRN.XQ
10 | wbdata.get_data('IC.BUS.DFRN.XQ', country='TUR')[0]
11 | wbdata.search_countries('united') #GBR
12 | wbdata.get_data('IC.BUS.DFRN.XQ', country='GBR')[0]
13 | 
14 | import datetime
15 | data_date = (datetime.datetime(2017, 1, 1), datetime.datetime(2019, 1, 1))
16 | wbdata.get_data("IC.BUS.DFRN.XQ", country=("USA", "GBR"), data_date=data_date)
17 | 
18 | wbdata.search_indicators("gdp per capita")
19 | wbdata.get_data('NY.GDP.PCAP.KD.ZG')
20 | 
21 | wbdata.get_data('NY.GDP.PCAP.KD.ZG', country = 'USA')
22 | wbdata.get_data('NY.GDP.PCAP.KD.ZG', country = 'OED')
23 | 
24 | #income level filter
25 | wbdata.get_incomelevel()
26 | countries = [i['id'] for i in wbdata.get_country(incomelevel="HIC", display=False)]
27 | indicators = {"IC.BUS.DFRN.XQ": "doing_business", "NY.GDP.PCAP.PP.KD": "gdppc"}
28 | df = wbdata.get_dataframe(indicators, country=countries, convert_date=True)
29 | 
30 | df.to_csv('KocPython2020/in-classMaterial/day6/econ.csv')
31 | df.describe()
32 | 
33 | #TODO: explore the API and pick some interesting variables that may have a theoretical connection, then run a regression (using any software is fine)
34 | 
35 | 


--------------------------------------------------------------------------------
/in-classMaterial/day7/numpy.py:
--------------------------------------------------------------------------------
  1 | #ipython --pylab
  2 | 
  3 | import numpy as np
  4 | np.__version__
  5 | 
  6 | np?
  7 | #http://www.numpy.org
  8 | 
  9 | #unlike Python lists, NumPy is constrained to arrays that all contain the same type
 10 | np.array([3.14, 4, 2, 3])
 11 | np.array([1, 2, 3, 4], dtype='float32')
 12 | 
 13 | #can be multidimensional - inner lists are treated as rows
 14 | np.array([range(i, i + 3) for i in [2, 4, 6]])
 15 | 
 16 | # Create a length-10 integer array filled with zeros
 17 | np.zeros(10, dtype=int)
 18 | 
 19 | # Create a 3x5 floating-point array filled with 1s
 20 | np.ones((3, 5), dtype=float)
 21 | 
 22 | # Create a 3x5 array filled with 3.14
 23 | np.full((3, 5), 3.14)
 24 | 
 25 | # Create an array filled with a linear sequence
 26 | # Starting at 0, ending at 20, stepping by 2
 27 | # (this is similar to the built-in range() function)
 28 | np.arange(0, 20, 2)
 29 | 
 30 | # Create an array of five values evenly spaced between 0 and 1
 31 | np.linspace(0, 1, 5)
 32 | 
 33 | # Create a 3x3 array of uniformly distributed
 34 | # random values between 0 and 1
 35 | np.random.random((3, 3))
 36 | 
 37 | # Create a 3x3 array of normally distributed random values
 38 | # with mean 0 and standard deviation 1
 39 | np.random.normal(0, 1, (3, 3))
 40 | 
 41 | # Create a 3x3 array of random integers in the interval [0, 10)
 42 | np.random.randint(0, 10, (3, 3))
 43 | 
 44 | # Create a 3x3 identity matrix
 45 | np.eye(3)
 46 | 
 47 | # Create an uninitialized array of three integers
 48 | # The values will be whatever happens to already exist at that
 49 | # memory location
 50 | np.empty(3)
 51 | 
 52 | #data types
 53 | 
 54 | #bool_ Boolean (True or False) stored as a byte
 55 | #int_ Default integer type (same as C long ; normally either int64 or int32 )
 56 | #intc Identical to C int (normally int32 or int64 )
 57 | #intp Integer used for indexing (same as C ssize_t ; normally either int32 or int64 )
 58 | #int8 Byte (–128 to 127)
 59 | #int16 Integer (–32768 to 32767)
 60 | #int32 Integer (–2147483648 to 2147483647)
 61 | #int64 Integer (–9223372036854775808 to 9223372036854775807)
 62 | #uint8 Unsigned integer (0 to 255)
 63 | #uint16 Unsigned integer (0 to 65535)
 64 | #uint32 Unsigned integer (0 to 4294967295)
 65 | #uint64 Unsigned integer (0 to 18446744073709551615)
 66 | #float_ Shorthand for float64
 67 | #float16 Half-precision float: sign bit, 5 bits exponent, 10 bits mantissa
 68 | #float32 Single-precision float: sign bit, 8 bits exponent, 23 bits mantissa
 69 | #float64 Double-precision float: sign bit, 11 bits exponent, 52 bits mantissa
 70 | #complex_ Shorthand for complex128
 71 | #complex64 Complex number, represented by two 32-bit floats
 72 | #complex128 Complex number, represented by two 64-bit floats
 73 | 
 74 | np.random.seed(0) # seed for reproducibility (can be any number, just used to rerun)
 75 | x1 = np.random.randint(10, size=6) # One-dimensional array
 76 | x2 = np.random.randint(10, size=(3, 4)) # Two-dimensional array
 77 | x3 = np.random.randint(10, size=(3, 4, 5)) # Three-dimensional array
 78 | 
 79 | print("x3 ndim: ", x3.ndim)
 80 | print("x3 shape:", x3.shape)
 81 | print("x3 size: ", x3.size)
 82 | 
 83 | x2
 84 | x2[0,0]
 85 | x2[2,0]
 86 | x2[2,-1]
 87 | 
 88 | x2[0, 0] = 12
 89 | x2
 90 | 
 91 | x1[0] = 3.14159
 92 | x1
 93 | 
 94 | #slicing similar to Python standard
 95 | x = np.arange(10)
 96 | x
 97 | x[:5]
 98 | x[5:]
 99 | x[4:7]
100 | x[::2]
101 | x[1::2]
102 | x[::-1]
103 | x[5::-2]
104 | 
105 | #multidimensional slicing
106 | x2
107 | x2[:2, :3]
108 | x2[:3, ::2]
109 | x2[::-1, ::-1]
110 | 
111 | print(x2[:, 0]) # first column of x2
112 | print(x2[0, :]) # first row of x2
113 | print(x2[0]) # equivalent to x2[0, :]
114 | 
115 | #slices are not copies! they are views
116 | print(x2)
117 | x2_sub = x2[:2, :2]
118 | print(x2_sub)
119 | x2_sub[0, 0] = 99
120 | print(x2_sub)
121 | print(x2)
122 | 
123 | #to create copies, use .copy()
124 | x2_sub_copy = x2[:2, :2].copy()
125 | x2_sub_copy[0, 0] = 42
126 | print(x2_sub_copy)
127 | print(x2)
128 | 
129 | #reshaping
130 | grid = np.arange(1, 10).reshape((3, 3))
131 | print(grid)
132 | 
133 | x = np.array([1, 2, 3])
134 | # row vector via reshape
135 | x.reshape((1, 3))
136 | 
137 | # row vector via newaxis
138 | x[np.newaxis, :]
139 | 
140 | # column vector via reshape
141 | x.reshape((3, 1))
142 | 
143 | # column vector via newaxis
144 | x[:, np.newaxis]
145 | 
146 | #concatenation
147 | 
148 | x = np.array([1, 2, 3])
149 | y = np.array([3, 2, 1])
150 | np.concatenate([x, y])
151 | z = [99, 99, 99]
152 | print(np.concatenate([x, y, z]))
153 | 
154 | grid = np.array([[1, 2, 3], [4, 5, 6]])
155 | # concatenate along the first axis
156 | np.concatenate([grid, grid])
157 | # concatenate along the second axis (zero-indexed)
158 | np.concatenate([grid, grid], axis=1)
159 | 
160 | x = np.array([1, 2, 3])
161 | grid = np.array([[9, 8, 7],
162 | [6, 5, 4]])
163 | # vertically stack the arrays
164 | np.vstack([x, grid])
165 | # horizontally stack the arrays
166 | y = np.array([[99],
167 | [99]])
168 | np.hstack([grid, y])
169 | #third axis
170 | np.dstack([x3, x3])
171 | 
172 | 
173 | #splitting
174 | x = [1, 2, 3, 99, 99, 3, 2, 1]
175 | x1, x2, x3 = np.split(x, [3, 5])
176 | print(x1, x2, x3)
177 | 
178 | grid = np.arange(16).reshape((4, 4))
179 | grid
180 | upper, lower = np.vsplit(grid, [2])
181 | print(upper)
182 | print(lower)
183 | left, right = np.hsplit(grid, [2])
184 | print(left)
185 | print(right)
186 | x3 = np.random.randint(10, size=(3, 4, 5))
187 | np.dsplit(x3, [2])
188 | 
189 | 
190 | #vectorize!
191 | def compute_reciprocals(values):
192 |     output = np.empty(len(values))
193 |     for i in range(len(values)):
194 |         output[i] = 1.0 / values[i]
195 |     return output
196 | 
197 | values = np.random.randint(1, 10, size=5)
198 | compute_reciprocals(values)
199 | 
200 | big_array = np.random.randint(1, 100, size=1000000)
201 | %timeit compute_reciprocals(big_array)
202 | 
203 | print(compute_reciprocals(values))
204 | print(1.0 / values)
205 | 
206 | %timeit (1.0 / big_array)
207 | 
208 | x = np.arange(9).reshape((3, 3))
209 | 2 ** x
210 | 
211 | np.arange(9) + np.arange(1,10)
212 | 
213 | x = np.array([-2, -1, 0, 1, 2])
214 | abs(x)
215 | 
216 | x = np.array([3 - 4j, 4 - 3j, 2 + 0j, 0 + 1j])
217 | abs(x)
218 | 
219 | #trigonometric functions
220 | 
221 | theta = np.linspace(0, np.pi, 3) #array of angles
222 | theta
223 | np.sin(theta)
224 | np.cos(theta)
225 | np.tan(theta)
226 | x = [-1, 0, 1]
227 | np.arcsin(x)
228 | np.arccos(x)
229 | np.arctan(x)
230 | 
231 | #exponentials
232 | 
233 | x = [1, 2, 3]
234 | print("x=", x)
235 | print("e^x=", np.exp(x))
236 | print("2^x=", np.exp2(x))
237 | print("3^x=", np.power(3, x))
238 | 
239 | #logarithms
240 | 
241 | x = [1, 2, 4, 10]
242 | np.log(x)
243 | np.log2(x)
244 | np.log10(x)
245 | 
246 | x = [0, 0.001, 0.01, 0.1]
247 | print("exp(x) - 1 =", np.expm1(x))
248 | print("log(1 + x) =", np.log1p(x))
249 | 
250 | from scipy import special
251 | # Gamma functions (generalized factorials) and related functions
252 | x = [1, 5, 10]
253 | print("gamma(x) =", special.gamma(x))
254 | print("ln|gamma(x)| =", special.gammaln(x))
255 | print("beta(x, 2) =", special.beta(x, 2))
256 | 
257 | # Error function (integral of Gaussian)
258 | # its complement, and its inverse
259 | x = np.array([0, 0.3, 0.7, 1.0])
260 | print("erf(x) =", special.erf(x))
261 | print("erfc(x) =", special.erfc(x))
262 | print("erfinv(x) =", special.erfinv(x))
263 | 
264 | 
265 | #out argument can save time by allocation of memory
266 | x = np.arange(5)
267 | y = np.empty(5)
268 | np.multiply(x, 10, out=y)
269 | print(y)
270 | 
271 | y = np.zeros(10)
272 | np.power(2, x, out=y[::2])
273 | print(y)
274 | 
275 | #outer product
276 | x = np.arange(1, 6)
277 | np.multiply.outer(x, x)
278 | 
279 | #sum
280 | big_array = np.random.rand(1000000)
281 | %timeit sum(big_array)
282 | %timeit np.sum(big_array)
283 | 
284 | #min and max
285 | np.min(big_array), np.max(big_array)
286 | %timeit min(big_array)
287 | %timeit np.min(big_array)
288 | 
289 | print(big_array.min(), big_array.max(), big_array.sum())
290 | 
291 | M = np.random.random((3, 4))
292 | print(M)
293 | 
294 | M.sum()
295 | M.min(axis=0)
296 | M.max(axis=1)
297 | M.max()
298 | 
299 | #np.sum np.nansum Compute sum of elements
300 | #np.prod np.nanprod Compute product of elements
301 | #np.mean np.nanmean Compute median of elements
302 | #np.std np.nanstd Compute standard deviation
303 | #np.var np.nanvar Compute variance
304 | #np.min np.nanmin Find minimum value
305 | #np.max np.nanmax Find maximum value
306 | #np.argmin np.nanargmin Find index of minimum value
307 | #np.argmax np.nanargmax Find index of maximum value
308 | #np.median np.nanmedian Compute median of elements
309 | #np.percentile np.nanpercentile Compute rank-based statistics of elements
310 | #np.any N/A Evaluate whether any elements are true
311 | #np.all N/A Evaluate whether all elements are true
312 | 
313 | #presidential height
314 | import os
315 | os.chdir('KocPython2020/in-classMaterial/day7')
316 | !head -4 president_heights.csv
317 | 
318 | import pandas as pd
319 | data = pd.read_csv('president_heights.csv')
320 | heights = np.array(data['height(cm)'])
321 | print(heights)
322 | 
323 | print("Mean height:", heights.mean())
324 | print("Standard deviation:", heights.std())
325 | print("Minimum height:", heights.min())
326 | print("Maximum height:", heights.max())
327 | 
328 | print("25th percentile:", np.percentile(heights, 25))
329 | print("Median:", np.median(heights))
330 | print("75th percentile:", np.percentile(heights, 75))
331 | 
332 | 
333 | %matplotlib osx
334 | import matplotlib.pyplot as plt
335 | import seaborn; seaborn.set() # set plot style
336 | plt.hist(heights)
337 | plt.title('Height Distribution of US Presidents')
338 | plt.xlabel('height (cm)')
339 | plt.ylabel('number');
340 | 
341 | #broadcasting
342 | a = np.array([0, 1, 2])
343 | b = np.array([5, 5, 5])
344 | a + b
345 | a + 5
346 | M = np.ones((3, 3))
347 | M
348 | M + a
349 | a = np.arange(3)
350 | b = np.arange(3)[:, np.newaxis]
351 | print(a)
352 | print(b)
353 | a + b
354 | 
355 | #Rule 1: If the two arrays differ in their number of dimensions, the shape of the
356 | #one with fewer dimensions is padded with ones on its leading (left) side.
357 | a.shape
358 | M.shape
359 | a+M
360 | #Rule 2: If the shape of the two arrays does not match in any dimension, the array
361 | #with shape equal to 1 in that dimension is stretched to match the other shape.
362 | #Rule 3: If in any dimension the sizes disagree and neither is equal to 1, an error is
363 | #raised.
364 | 
365 | M = np.ones((2, 3))
366 | a = np.arange(3)
367 | M
368 | a
369 | M + a
370 | 
371 | a = np.arange(3).reshape((3, 1))
372 | b = np.arange(3)
373 | a + b
374 | 
375 | M = np.ones((3, 2))
376 | a = np.arange(3)
377 | M + a
378 | M + a[:, np.newaxis] #right padding
379 | 
380 | #TODO: scale the mean of the following array by column (center and standardize)
381 | X = np.random.random((10, 3))
382 | 
383 | scaledX=(X-X.mean(axis=0))/X.std(0)
384 | 
385 | # x and y have 50 steps from 0 to 5
386 | x = np.linspace(0, 5, 50)
387 | y = np.linspace(0, 5, 50)[:, np.newaxis]
388 | z = np.sin(x) ** 10 + np.cos(10 + y * x) * np.cos(x)
389 | 
390 | plt.imshow(z, origin='lower', extent=[0, 5, 0, 5], cmap='viridis')
391 | plt.colorbar();
392 | 
393 | 
394 | # use Pandas to extract rainfall inches as a NumPy array
395 | import pandas as pd
396 | rainfall = pd.read_csv('Seattle2014.csv')['PRCP'].values
397 | inches = rainfall / 254 # 1/10mm -> inches
398 | inches.shape
399 | plt.hist(inches, 40);
400 | 
401 | # booleans
402 | x = np.array([1, 2, 3, 4, 5])
403 | x < 3 # less than
404 | x > 3 # greater than
405 | x <= 3
406 | x == 3
407 | x != 3
408 | x >= 3
409 | (2 * x) == (x ** 2)
410 | 
411 | rng = np.random.RandomState(0)
412 | x = rng.randint(10, size=(3, 4))
413 | x
414 | x < 6
415 | np.count_nonzero(x < 6)
416 | np.sum(x < 6)
417 | # how many values less than 6 in each row?
418 | np.sum(x < 6, axis=1)
419 | # are there any values greater than 8?
420 | np.any(x > 8)
421 | # are there any values less than zero?
422 | np.any(x < 0)
423 | # are all values less than 10?
424 | np.all(x < 10)
425 | # are all values equal to 6?
426 | np.all(x == 6)
427 | # are all values in each row less than 8?
428 | np.all(x < 8, axis=1)
429 | 
430 | np.sum((inches > 0.5) & (inches < 1))
431 | np.sum(~( (inches <= 0.5) | (inches >= 1) ))
432 | 
433 | print("Number days without rain:", np.sum(inches == 0))
434 | print("Number days with rain:", np.sum(inches != 0))
435 | print("Days with more than 0.5 inches:", np.sum(inches > 0.5))
436 | print("Rainy days with < 0.2 inches :", np.sum((inches > 0) & (inches < 0.2)))
437 | 
438 | 
439 | x[x < 5]
440 | # construct a mask of all rainy days
441 | rainy = (inches > 0)
442 | # construct a mask of all summer days (June 21st is the 172nd day)
443 | summer = (np.arange(365) - 172 < 90) & (np.arange(365) - 172 > 0)
444 | print("Median precip on rainy days in 2014 (inches):", np.median(inches[rainy]))
445 | print("Median precip on summer days in 2014 (inches): ", np.median(inches[summer]))
446 | print("Maximum precip on summer days in 2014 (inches): ", np.max(inches[summer]))
447 | print("Median precip on non-summer rainy days (inches):", np.median(inches[rainy & ~summer]))
448 | 
449 | rand = np.random.RandomState(42)
450 | x = rand.randint(100, size=10)
451 | print(x)
452 | [x[3], x[7], x[2]]
453 | ind = [3, 7, 2]
454 | x[ind]
455 | ind = np.array([[3, 7], [4, 5]])
456 | x[ind]
457 | 
458 | X = np.arange(12).reshape((3, 4))
459 | X
460 | row = np.array([0, 1, 2])
461 | col = np.array([2, 1, 3])
462 | X[row, col]
463 | #broadcasting indices
464 | X[row[:, np.newaxis], col]
465 | 
466 | X[2, [2, 0, 1]]
467 | X[1:, [2, 0, 1]]
468 | mask = np.array([1, 0, 1, 0], dtype=bool)
469 | X[row[:, np.newaxis], mask]
470 | 
471 | mean = [0, 0]
472 | cov = [[1, 2],
473 | [2, 5]]
474 | X = random.multivariate_normal(mean, cov, 100)
475 | X.shape
476 | 
477 | plt.scatter(X[:, 0], X[:, 1]);
478 | 
479 | indices = np.random.choice(X.shape[0], 20, replace=False)
480 | indices
481 | selection = X[indices]
482 | selection.shape
483 | plt.scatter(X[:, 0], X[:, 1], alpha=0.3)
484 | plt.scatter(selection[:, 0], selection[:, 1]);
485 | 
486 | 
487 | 
488 | x = np.zeros(10)
489 | x[[0, 0]] = [4, 6]
490 | print(x)
491 | 
492 | i = [2, 3, 3, 4, 4, 4]
493 | x
494 | 
495 | x = np.zeros(10)
496 | np.add.at(x, i, 1)
497 | print(x)
498 | 
499 | x = np.array([2, 1, 4, 3, 5])
500 | np.sort(x)
501 | 
502 | x = np.array([2, 1, 4, 3, 5])
503 | i = np.argsort(x)
504 | print(i)
505 | x[i]
506 | 
507 | rand = np.random.RandomState(42)
508 | X = rand.randint(0, 10, (4, 6))
509 | print(X)
510 | 
511 | # sort each column of X
512 | np.sort(X, axis=0)
513 | 
514 | # sort each row of X
515 | np.sort(X, axis=1)
516 | 
517 | 
518 | x = np.array([7, 2, 3, 1, 6, 5, 4])
519 | np.partition(x, 3)
520 | 
521 | np.partition(X, 2, axis=1)
522 | 
523 | #nearest neighbor
524 | 
525 | X = random.rand(10, 2)
526 | plt.scatter(X[:, 0], X[:, 1], s=100);
527 | dist_sq = np.sum((X[:,np.newaxis,:] - X[np.newaxis,:,:]) ** 2, axis=-1)
528 | dist_sq
529 | nearest = np.argsort(dist_sq, axis=1)
530 | print(nearest)
531 | K = 2
532 | nearest_partition = np.argpartition(dist_sq, K + 1, axis=1)
533 | 
534 | plt.scatter(X[:, 0], X[:, 1], s=100)
535 | # draw lines from each point to its two nearest neighbors
536 | K = 2
537 | for i in range(X.shape[0]):
538 |     for j in nearest_partition[i, :K+1]:
539 |         # plot a line from X[i] to X[j]
540 |         # use some zip magic to make it happen:
541 |         plt.plot(*zip(X[j], X[i]), color='black')
542 | 
543 | 
544 | name = ['Alice', 'Bob', 'Cathy', 'Doug']
545 | age = [25, 45, 37, 19]
546 | weight = [55.0, 85.5, 68.0, 61.5]
547 | # Use a compound data type for structured arrays
548 | data = np.zeros(4, dtype={'names':('name', 'age', 'weight'), 'formats':('U10', 'i4', 'f8')}) #unicode, int, float
549 | print(data.dtype)
550 | data['name'] = name
551 | data['age'] = age
552 | data['weight'] = weight
553 | print(data)
554 | # Get all names
555 | data['name']
556 | # Get first row of data
557 | data[0]
558 | # Get the name from the last row
559 | data[-1]['name']
560 | # Get names where age is under 30
561 | data[data['age'] < 30]['name']
562 | 
563 | # transpose
564 | 
565 | a = np.array([1,2,3,4])
566 | a.T
567 | a.reshape(1,4).T
568 | 
569 | M = np.array(np.arange(16)).reshape(4,4)
570 | M
571 | M.T
572 | 
573 | # matrix multiplication
574 | 
575 | a @ M
576 | M @ a
577 | M @ a.reshape(4,1)
578 | a @ M
579 | 
580 | # inverse
581 | 
582 | np.linalg.inv(M)
583 | 
584 | np.linalg.inv(M.T @ M) #singular
585 | 
586 | X = np.random.random((15, 3))
587 | X.T @ X
588 | np.linalg.inv(X.T @ X)
589 | 
590 | # linear regression
591 | 
592 | y = np.random.random((15,1))
593 | b = np.linalg.inv(X.T @ X) @ X.T @ y
594 | 
595 | 
596 | 
597 | # empty filled with NaN
598 | 
599 | p = np.empty((4,4))
600 | p
601 | p.fill(np.nan)
602 | p
603 | 
604 | 
605 | #TODO: Answer the following questions (solutions: https://www.machinelearningplus.com/python/101-numpy-exercises-python/ continue on the site if you finish)
606 | 
607 | #Create a 3×3 numpy array of all True’s
608 | 
609 | 
610 | #Extract all odd numbers from arr
611 | #Input:
612 | 
613 | #arr = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
614 | 
615 | #Desired output:
616 | 
617 | ##> array([1, 3, 5, 7, 9])
618 | 
619 | 
620 | #Replace all odd numbers in arr with -1
621 | 
622 | 
623 | #Replace all odd numbers in arr with -1 without changing arr
624 | 
625 | 
626 | #Convert a 1D array to a 2D array with 2 rows
627 | 
628 | 
629 | #Stack arrays a and b vertically
630 | #Input
631 | 
632 | #a = np.arange(10).reshape(2,-1)
633 | #b = np.repeat(1, 10).reshape(2,-1)
634 | 
635 | #Desired Output:
636 | 
637 | #> array([[0, 1, 2, 3, 4],
638 | #>        [5, 6, 7, 8, 9],
639 | #>        [1, 1, 1, 1, 1],
640 | #>        [1, 1, 1, 1, 1]])
641 | 
642 | 
643 | #Stack the arrays a and b horizontally.
644 | 
645 | 
646 | #Create the following pattern without hardcoding. Use only numpy functions and the below input array a.
647 | #Input:
648 | 
649 | #a = np.array([1,2,3])`
650 | 
651 | #Desired Output:
652 | 
653 | #> array([1, 1, 1, 2, 2, 2, 3, 3, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3])
654 | 
655 | 
656 | #Get the common items between a and b
657 | #Input:
658 | 
659 | #a = np.array([1,2,3,2,3,4,3,4,5,6])
660 | #b = np.array([7,2,10,2,7,4,9,4,9,8])
661 | 
662 | #Desired Output:
663 | 
664 | #array([2, 4])
665 | 
666 | 
667 | #From array a remove all items present in array b
668 | #Input:
669 | 
670 | #a = np.array([1,2,3,4,5])
671 | #b = np.array([5,6,7,8,9])
672 | 
673 | #Desired Output:
674 | 
675 | #array([1,2,3,4])
676 | 
677 | 
678 | #Get the positions where elements of a and b match
679 | #Input:
680 | 
681 | #a = np.array([1,2,3,2,3,4,3,4,5,6])
682 | #b = np.array([7,2,10,2,7,4,9,4,9,8])
683 | 
684 | #Desired Output:
685 | 
686 | #> (array([1, 3, 5, 7]),)
687 | 
688 | 
689 | #Get all items between 5 and 10 from a.
690 | #Input:
691 | 
692 | #a = np.array([2, 6, 1, 9, 10, 3, 27])
693 | 
694 | #Desired Output:
695 | 
696 | #(array([6, 9, 10]),)
697 | 
698 | 
699 | 
700 | 
701 | 


--------------------------------------------------------------------------------
/in-classMaterial/day7/president_heights.csv:
--------------------------------------------------------------------------------
 1 | order,name,height(cm)
 2 | 1,George Washington,189
 3 | 2,John Adams,170
 4 | 3,Thomas Jefferson,189
 5 | 4,James Madison,163
 6 | 5,James Monroe,183
 7 | 6,John Quincy Adams,171
 8 | 7,Andrew Jackson,185
 9 | 8,Martin Van Buren,168
10 | 9,William Henry Harrison,173
11 | 10,John Tyler,183
12 | 11,James K. Polk,173
13 | 12,Zachary Taylor,173
14 | 13,Millard Fillmore,175
15 | 14,Franklin Pierce,178
16 | 15,James Buchanan,183
17 | 16,Abraham Lincoln,193
18 | 17,Andrew Johnson,178
19 | 18,Ulysses S. Grant,173
20 | 19,Rutherford B. Hayes,174
21 | 20,James A. Garfield,183
22 | 21,Chester A. Arthur,183
23 | 23,Benjamin Harrison,168
24 | 25,William McKinley,170
25 | 26,Theodore Roosevelt,178
26 | 27,William Howard Taft,182
27 | 28,Woodrow Wilson,180
28 | 29,Warren G. Harding,183
29 | 30,Calvin Coolidge,178
30 | 31,Herbert Hoover,182
31 | 32,Franklin D. Roosevelt,188
32 | 33,Harry S. Truman,175
33 | 34,Dwight D. Eisenhower,179
34 | 35,John F. Kennedy,183
35 | 36,Lyndon B. Johnson,193
36 | 37,Richard Nixon,182
37 | 38,Gerald Ford,183
38 | 39,Jimmy Carter,177
39 | 40,Ronald Reagan,185
40 | 41,George H. W. Bush,188
41 | 42,Bill Clinton,188
42 | 43,George W. Bush,182
43 | 44,Barack Obama,185
44 | 


--------------------------------------------------------------------------------
/in-classMaterial/day8/state-abbrevs.csv:
--------------------------------------------------------------------------------
 1 | "state","abbreviation"
 2 | "Alabama","AL"
 3 | "Alaska","AK"
 4 | "Arizona","AZ"
 5 | "Arkansas","AR"
 6 | "California","CA"
 7 | "Colorado","CO"
 8 | "Connecticut","CT"
 9 | "Delaware","DE"
10 | "District of Columbia","DC"
11 | "Florida","FL"
12 | "Georgia","GA"
13 | "Hawaii","HI"
14 | "Idaho","ID"
15 | "Illinois","IL"
16 | "Indiana","IN"
17 | "Iowa","IA"
18 | "Kansas","KS"
19 | "Kentucky","KY"
20 | "Louisiana","LA"
21 | "Maine","ME"
22 | "Montana","MT"
23 | "Nebraska","NE"
24 | "Nevada","NV"
25 | "New Hampshire","NH"
26 | "New Jersey","NJ"
27 | "New Mexico","NM"
28 | "New York","NY"
29 | "North Carolina","NC"
30 | "North Dakota","ND"
31 | "Ohio","OH"
32 | "Oklahoma","OK"
33 | "Oregon","OR"
34 | "Maryland","MD"
35 | "Massachusetts","MA"
36 | "Michigan","MI"
37 | "Minnesota","MN"
38 | "Mississippi","MS"
39 | "Missouri","MO"
40 | "Pennsylvania","PA"
41 | "Rhode Island","RI"
42 | "South Carolina","SC"
43 | "South Dakota","SD"
44 | "Tennessee","TN"
45 | "Texas","TX"
46 | "Utah","UT"
47 | "Vermont","VT"
48 | "Virginia","VA"
49 | "Washington","WA"
50 | "West Virginia","WV"
51 | "Wisconsin","WI"
52 | "Wyoming","WY"


--------------------------------------------------------------------------------
/in-classMaterial/day8/state-areas.csv:
--------------------------------------------------------------------------------
 1 | state,area (sq. mi)
 2 | Alabama,52423
 3 | Alaska,656425
 4 | Arizona,114006
 5 | Arkansas,53182
 6 | California,163707
 7 | Colorado,104100
 8 | Connecticut,5544
 9 | Delaware,1954
10 | Florida,65758
11 | Georgia,59441
12 | Hawaii,10932
13 | Idaho,83574
14 | Illinois,57918
15 | Indiana,36420
16 | Iowa,56276
17 | Kansas,82282
18 | Kentucky,40411
19 | Louisiana,51843
20 | Maine,35387
21 | Maryland,12407
22 | Massachusetts,10555
23 | Michigan,96810
24 | Minnesota,86943
25 | Mississippi,48434
26 | Missouri,69709
27 | Montana,147046
28 | Nebraska,77358
29 | Nevada,110567
30 | New Hampshire,9351
31 | New Jersey,8722
32 | New Mexico,121593
33 | New York,54475
34 | North Carolina,53821
35 | North Dakota,70704
36 | Ohio,44828
37 | Oklahoma,69903
38 | Oregon,98386
39 | Pennsylvania,46058
40 | Rhode Island,1545
41 | South Carolina,32007
42 | South Dakota,77121
43 | Tennessee,42146
44 | Texas,268601
45 | Utah,84904
46 | Vermont,9615
47 | Virginia,42769
48 | Washington,71303
49 | West Virginia,24231
50 | Wisconsin,65503
51 | Wyoming,97818
52 | District of Columbia,68
53 | Puerto Rico,3515
54 | 


--------------------------------------------------------------------------------
/in-classMaterial/linearModels/OLSReviewSlides.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/carlson9/KocPython2020/508876a25ba13f074b39ae3823eaf9454af57695/in-classMaterial/linearModels/OLSReviewSlides.pdf


--------------------------------------------------------------------------------