├── AmesHousing-05-2010.csv ├── Fashion_MNIST.ipynb ├── House_price_regression.ipynb └── README.md /AmesHousing-05-2010.csv: -------------------------------------------------------------------------------- 1 | Order,PID,MS SubClass,MS Zoning,Lot Frontage,Lot Area,Street,Alley,Lot Shape,Land Contour,Utilities,Lot Config,Land Slope,Neighborhood,Condition 1,Condition 2,Bldg Type,House Style,Overall Qual,Overall Cond,Year Built,Year Remod/Add,Roof Style,Roof Matl,Exterior 1st,Exterior 2nd,Mas Vnr Type,Mas Vnr Area,Exter Qual,Exter Cond,Foundation,Bsmt Qual,Bsmt Cond,Bsmt Exposure,BsmtFin Type 1,BsmtFin SF 1,BsmtFin Type 2,BsmtFin SF 2,Bsmt Unf SF,Total Bsmt SF,Heating,Heating QC,Central Air,Electrical,1st Flr SF,2nd Flr SF,Low Qual Fin SF,Gr Liv Area,Bsmt Full Bath,Bsmt Half Bath,Full Bath,Half Bath,Bedroom AbvGr,Kitchen AbvGr,Kitchen Qual,TotRms AbvGrd,Functional,Fireplaces,Fireplace Qu,Garage Type,Garage Yr Blt,Garage Finish,Garage Cars,Garage Area,Garage Qual,Garage Cond,Paved Drive,Wood Deck SF,Open Porch SF,Enclosed Porch,3Ssn Porch,Screen Porch,Pool Area,Pool QC,Fence,Misc Feature,Misc Val,Mo Sold,Yr Sold,Sale Type,Sale Condition,SalePrice 2 | 1,526301100,20,RL,141,31770,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,NAmes,Norm,Norm,1Fam,1Story,6,5,1960,1960,Hip,CompShg,BrkFace,Plywood,Stone,112,TA,TA,CBlock,TA,Gd,Gd,BLQ,639,Unf,0,441,1080,GasA,Fa,Y,SBrkr,1656,0,0,1656,1,0,1,0,3,1,TA,7,Typ,2,Gd,Attchd,1960,Fin,2,528,TA,TA,P,210,62,0,0,0,0,NA,NA,NA,0,5,2010,WD ,Normal,215000 3 | 13,527166040,60,RL,63,8402,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,2Story,6,5,1998,1998,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,PConc,Gd,TA,No,Unf,0,Unf,0,789,789,GasA,Gd,Y,SBrkr,789,676,0,1465,0,0,2,1,3,1,TA,7,Typ,1,Gd,Attchd,1998,Fin,2,393,TA,TA,Y,0,75,0,0,0,0,NA,NA,NA,0,5,2010,WD ,Normal,180400 4 | 38,528112020,20,RL,98,11478,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NridgHt,Norm,Norm,1Fam,1Story,8,5,2007,2008,Gable,CompShg,VinylSd,VinylSd,Stone,200,Gd,TA,PConc,Ex,TA,No,GLQ,1218,Unf,0,486,1704,GasA,Ex,Y,SBrkr,1704,0,0,1704,1,0,2,0,3,1,Gd,7,Typ,1,Gd,Attchd,2008,RFn,3,772,TA,TA,Y,0,50,0,0,0,0,NA,NA,NA,0,5,2010,WD ,Normal,306000 5 | 56,528240070,60,RL,,7851,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,2Story,6,5,2002,2002,Gable,CompShg,VinylSd,VinylSd,,,Gd,TA,PConc,Gd,TA,No,GLQ,625,Unf,0,235,860,GasA,Ex,Y,SBrkr,860,1100,0,1960,1,0,2,1,4,1,Gd,8,Typ,2,TA,BuiltIn,2002,Fin,2,440,TA,TA,Y,288,48,0,0,0,0,NA,NA,NA,0,5,2010,WD ,Normal,216500 6 | 59,528292020,60,RL,,9505,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,Gilbert,Norm,Norm,1Fam,2Story,7,5,2001,2001,Gable,CompShg,VinylSd,VinylSd,BrkFace,180,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,884,884,GasA,Ex,Y,SBrkr,884,1151,0,2035,0,0,2,1,3,1,Gd,8,Typ,1,Gd,BuiltIn,2001,Fin,2,434,TA,TA,Y,144,48,0,0,0,0,NA,NA,NA,0,5,2010,WD ,Normal,222500 7 | 60,528328100,60,RL,108,14774,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,NoRidge,Norm,Norm,1Fam,2Story,9,5,1999,1999,Gable,CompShg,VinylSd,VinylSd,BrkFace,165,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,1393,1393,GasA,Ex,Y,SBrkr,1422,1177,0,2599,0,0,2,1,4,1,Gd,10,Typ,1,TA,BuiltIn,1999,Fin,3,779,TA,TA,Y,668,30,0,0,0,0,NA,NA,NA,0,5,2010,WD ,Normal,333168 8 | 67,528445060,20,RL,73,8987,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Somerst,Norm,Norm,1Fam,1Story,8,5,2005,2006,Gable,CompShg,VinylSd,VinylSd,BrkFace,226,Gd,TA,PConc,Gd,TA,,Unf,0,Unf,0,1595,1595,GasA,Ex,Y,SBrkr,1595,0,0,1595,0,0,2,0,2,1,Gd,6,Typ,1,Gd,Attchd,2005,RFn,3,880,TA,TA,Y,144,0,0,0,0,0,NA,NA,NA,0,5,2010,WD ,Normal,221500 9 | 71,528477080,60,FV,100,12552,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,Somerst,Norm,Norm,1Fam,2Story,7,5,2004,2005,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,No,GLQ,222,Unf,0,769,991,GasA,Ex,Y,SBrkr,991,956,0,1947,0,0,2,1,3,1,Gd,8,Typ,1,Gd,Attchd,2004,RFn,2,678,TA,TA,Y,0,136,0,0,0,0,NA,GdWo,NA,0,5,2010,WD ,Normal,254900 10 | 72,528480090,20,FV,84,10440,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,Somerst,Norm,Norm,1Fam,1Story,6,5,2005,2005,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Ex,TA,No,GLQ,1414,Unf,0,54,1468,GasA,Ex,Y,SBrkr,1468,0,0,1468,1,0,2,0,2,1,Gd,6,Typ,1,Gd,Attchd,2005,Fin,2,528,TA,TA,Y,0,102,0,0,216,0,NA,NA,NA,0,5,2010,WD ,Normal,271500 11 | 75,531380080,60,RL,,8880,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,SawyerW,Norm,Norm,1Fam,2Story,7,5,1994,2002,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,No,GLQ,695,Unf,0,253,948,GasA,Ex,Y,SBrkr,1222,888,0,2110,1,0,2,1,3,1,Gd,8,Typ,2,Fa,Attchd,1994,RFn,2,463,TA,TA,Y,0,130,0,0,0,0,NA,NA,NA,0,5,2010,WD ,Normal,205000 12 | 77,531451280,60,RL,70,11218,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,SawyerW,Norm,Norm,1Fam,2Story,6,5,1992,1992,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,Gd,TA,PConc,Gd,TA,No,Unf,0,Unf,0,1055,1055,GasA,Ex,Y,SBrkr,1055,790,0,1845,0,0,2,1,3,1,Gd,8,Typ,1,TA,Attchd,1992,RFn,2,462,TA,TA,Y,635,104,0,0,0,0,NA,GdPrv,Shed,400,5,2010,WD ,Normal,189000 13 | 81,531453010,20,RL,81,9672,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,SawyerW,Norm,Norm,1Fam,1Story,6,5,1984,1985,Hip,CompShg,HdBoard,Plywood,None,0,TA,TA,PConc,Gd,TA,No,GLQ,338,Unf,0,702,1040,GasA,TA,Y,SBrkr,1097,0,0,1097,0,0,2,0,3,1,TA,6,Typ,0,NA,Attchd,1986,Unf,2,480,TA,TA,Y,0,0,0,0,0,0,NA,GdPrv,NA,0,5,2010,WD ,Normal,152000 14 | 111,534129040,20,RL,,10456,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NWAmes,Norm,Norm,1Fam,1Story,6,6,1967,1967,Hip,CompShg,HdBoard,HdBoard,BrkFace,120,TA,TA,CBlock,TA,TA,No,GLQ,506,Unf,0,1323,1829,GasA,Gd,Y,SBrkr,1829,0,0,1829,1,0,2,0,4,1,TA,8,Typ,0,NA,Attchd,1967,RFn,2,535,TA,TA,Y,0,76,0,0,0,0,NA,NA,NA,0,5,2010,WD ,Normal,218500 15 | 131,534451150,30,RL,55,5350,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,BrkSide,Norm,Norm,1Fam,1Story,3,2,1940,1966,Gable,CompShg,Wd Sdng,Plywood,None,0,TA,Po,CBlock,TA,TA,No,Unf,0,Unf,0,728,728,GasA,Ex,Y,SBrkr,1306,0,0,1306,0,0,1,0,3,1,Fa,6,Mod,0,NA,NA,,NA,0,0,NA,NA,Y,263,0,0,0,0,0,NA,GdWo,Shed,450,5,2010,WD ,Normal,76500 16 | 141,535152130,20,RL,,8050,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,5,1959,1959,Hip,CompShg,MetalSd,MetalSd,BrkFace,150,TA,TA,CBlock,TA,TA,No,BLQ,856,Rec,162,125,1143,GasA,TA,Y,SBrkr,1143,0,0,1143,1,0,1,0,3,1,TA,6,Typ,0,NA,Attchd,1959,RFn,1,308,TA,TA,Y,0,0,0,0,0,0,NA,GdPrv,NA,0,5,2010,WD ,Normal,136000 17 | 147,535175180,20,RL,87,10725,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,5,1959,1959,Hip,CompShg,MetalSd,MetalSd,BrkFace,91,TA,TA,CBlock,TA,TA,No,Rec,936,Unf,0,270,1206,GasA,Fa,Y,SBrkr,1206,0,0,1206,0,0,1,0,3,1,TA,6,Typ,0,NA,Attchd,1959,RFn,1,312,TA,TA,Y,0,21,0,0,0,0,NA,NA,NA,0,5,2010,WD ,Normal,108538 18 | 150,535302080,20,RL,60,10950,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,7,1952,1952,Gable,CompShg,WdShing,Wd Shng,None,0,TA,TA,CBlock,TA,TA,No,BLQ,339,Unf,0,525,864,GasA,TA,Y,SBrkr,1064,0,0,1064,0,1,1,0,2,1,Fa,4,Typ,0,NA,Detchd,1952,Unf,1,318,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,5,2010,WD ,Normal,135000 19 | 156,535350030,20,RL,85,10200,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,7,1954,2003,Gable,CompShg,Wd Sdng,Wd Sdng,BrkFace,104,TA,TA,CBlock,TA,TA,No,ALQ,320,BLQ,362,404,1086,GasA,Gd,Y,SBrkr,1086,0,0,1086,1,0,1,0,3,1,TA,6,Typ,0,NA,Attchd,1989,Unf,2,490,TA,TA,Y,0,0,0,0,0,0,NA,GdWo,NA,0,5,2010,WD ,Normal,144900 20 | 157,535350040,20,RL,74,5868,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,7,1956,2000,Gable,CompShg,MetalSd,MetalSd,None,0,TA,TA,CBlock,TA,TA,No,BLQ,248,Rec,240,448,936,GasA,Ex,Y,SBrkr,936,0,0,936,1,0,1,0,2,1,TA,4,Typ,0,NA,Attchd,1956,Fin,1,308,TA,TA,Y,0,0,80,0,160,0,NA,NA,NA,0,5,2010,WD ,Normal,129000 21 | 162,535450210,50,RL,60,8064,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Artery,Norm,1Fam,1.5Fin,6,8,1948,2004,Gable,CompShg,WdShing,Wd Shng,None,0,TA,TA,CBlock,TA,TA,No,ALQ,481,Rec,174,161,816,GasA,TA,Y,SBrkr,816,408,0,1224,1,0,1,0,3,1,TA,5,Typ,0,NA,Detchd,1950,Unf,1,280,TA,TA,Y,414,0,0,0,0,0,NA,GdWo,NA,0,5,2010,WD ,Normal,132000 22 | 163,535453070,20,RL,75,7500,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,7,1959,1994,Hip,CompShg,BrkFace,BrkFace,None,0,TA,TA,CBlock,TA,TA,No,LwQ,340,Rec,906,0,1246,GasA,Ex,Y,SBrkr,1246,0,0,1246,1,0,1,1,3,1,Gd,6,Typ,0,NA,Attchd,1959,RFn,1,305,TA,TA,Y,218,0,0,0,0,0,NA,GdPrv,NA,0,5,2010,WD ,Normal,154000 23 | 165,535456110,20,RL,60,7200,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,NAmes,Norm,Norm,1Fam,1Story,5,7,1951,2000,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,CBlock,TA,TA,Mn,BLQ,234,Rec,486,180,900,GasA,TA,Y,SBrkr,900,0,0,900,0,1,1,0,3,1,Gd,5,Typ,0,NA,Detchd,2005,Unf,2,576,TA,TA,Y,222,32,0,0,0,0,NA,NA,NA,0,5,2010,WD ,Normal,134800 24 | 168,535476350,20,RL,80,9760,Pave,NA,Reg,Lvl,AllPub,Inside,Mod,NAmes,Norm,Norm,1Fam,1Story,6,7,1963,1984,Hip,CompShg,Wd Sdng,Wd Sdng,BrkFace,218,TA,TA,CBlock,TA,TA,Gd,BLQ,717,LwQ,263,415,1395,GasA,TA,Y,SBrkr,1395,0,0,1395,1,0,1,0,2,1,TA,7,Min1,1,TA,Attchd,1963,RFn,2,440,TA,TA,Y,657,0,113,0,240,0,NA,NA,NA,0,5,2010,WD ,Normal,192000 25 | 172,902106130,30,RM,56,4485,Pave,Grvl,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Artery,Norm,1Fam,1Story,5,7,1920,1950,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,PConc,TA,TA,No,BLQ,579,Unf,0,357,936,GasA,TA,Y,SBrkr,936,0,0,936,1,0,1,0,2,1,TA,5,Typ,1,Gd,NA,,NA,0,0,NA,NA,P,51,0,135,0,0,0,NA,MnPrv,NA,0,5,2010,WD ,Normal,109500 26 | 175,902125080,50,RM,60,5790,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,OldTown,Norm,Norm,1Fam,2Story,3,6,1915,1950,Gambrel,CompShg,VinylSd,VinylSd,None,0,Gd,Gd,CBlock,Fa,TA,No,Unf,0,Unf,0,840,840,GasA,Gd,N,SBrkr,840,765,0,1605,0,0,2,0,3,2,TA,8,Typ,0,NA,Detchd,1915,Unf,1,379,TA,TA,Y,0,0,202,0,0,0,NA,NA,NA,0,5,2010,WD ,Normal,107400 27 | 183,902207140,30,RM,60,8520,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,1Fam,1Story,6,8,1928,2003,Gable,CompShg,VinylSd,VinylSd,None,0,TA,Gd,BrkTil,TA,TA,No,Unf,0,Unf,0,624,624,GasA,Gd,Y,SBrkr,720,0,0,720,0,0,1,0,2,1,TA,5,Typ,0,NA,Detchd,2005,Unf,2,484,TA,TA,Y,106,0,0,0,0,0,NA,NA,NA,0,5,2010,WD ,Normal,105900 28 | 193,903206120,75,RL,,7793,Pave,NA,IR1,Bnk,AllPub,Corner,Gtl,BrkSide,Norm,Norm,1Fam,2.5Unf,7,7,1922,2005,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,BrkTil,Gd,TA,No,BLQ,474,Unf,0,634,1108,GasA,TA,N,FuseA,1160,908,0,2068,0,0,1,1,3,1,Gd,8,Typ,1,Gd,Detchd,1928,Unf,1,315,TA,TA,Y,0,0,60,0,0,0,NA,NA,NA,0,5,2010,WD ,Normal,209500 29 | 199,903232190,50,RM,52,6240,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,BrkSide,Norm,Norm,1Fam,1.5Fin,5,7,1936,1980,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,CBlock,TA,Fa,No,Rec,276,Unf,0,252,528,GasA,Gd,Y,SBrkr,548,492,0,1040,0,0,1,0,2,1,TA,5,Typ,0,NA,Detchd,1979,Fin,2,624,TA,TA,P,306,0,32,0,0,0,NA,NA,NA,0,5,2010,WD ,Normal,123900 30 | 204,903426200,70,RM,60,12900,Pave,Grvl,Reg,Lvl,AllPub,Inside,Gtl,OldTown,Norm,Norm,1Fam,2Story,6,8,1912,2009,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,Gd,Gd,PConc,TA,TA,No,Unf,0,Unf,0,780,780,GasA,Ex,Y,SBrkr,780,780,0,1560,0,0,1,1,3,1,Gd,7,Typ,0,NA,NA,,NA,0,0,NA,NA,N,344,0,0,0,168,0,NA,NA,NA,0,5,2010,WD ,Normal,159900 31 | 210,904101110,20,RL,100,15263,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,ClearCr,Feedr,Norm,1Fam,1Story,5,5,1959,1959,Gable,CompShg,HdBoard,HdBoard,BrkFace,90,TA,TA,CBlock,Gd,TA,No,Rec,766,Unf,0,656,1422,GasA,Gd,Y,SBrkr,1675,0,0,1675,0,0,2,0,3,1,TA,8,Typ,2,Gd,Attchd,1959,Unf,1,365,TA,TA,Y,0,132,0,0,0,0,NA,NA,NA,0,5,2010,WD ,Normal,173000 32 | 216,905101100,50,RL,54,6342,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Sawyer,Feedr,Norm,1Fam,1.5Fin,5,8,1875,1996,Gable,CompShg,VinylSd,VinylSd,None,0,TA,Gd,CBlock,TA,TA,No,Unf,0,Unf,0,780,780,GasA,Gd,N,SBrkr,780,240,0,1020,0,0,1,0,2,1,TA,6,Typ,0,NA,NA,,NA,0,0,NA,NA,N,0,0,176,0,0,0,NA,NA,NA,0,5,2010,WD ,Normal,94000 33 | 220,905103060,20,RL,,11341,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Sawyer,Norm,Norm,1Fam,1Story,5,6,1957,1996,Hip,CompShg,Wd Sdng,Wd Sdng,BrkFace,180,TA,TA,CBlock,Gd,TA,No,ALQ,1302,Unf,0,90,1392,GasA,TA,Y,SBrkr,1392,0,0,1392,1,0,1,1,3,1,TA,5,Mod,1,Gd,Detchd,1957,Unf,2,528,TA,TA,Y,0,0,0,0,95,0,NA,NA,NA,0,5,2010,WD ,Normal,121500 34 | 221,905104210,20,RL,70,8521,Pave,NA,Reg,Lvl,AllPub,FR2,Gtl,Sawyer,Feedr,Norm,1Fam,1Story,5,5,1967,1967,Gable,CompShg,HdBoard,HdBoard,None,0,TA,TA,CBlock,TA,TA,No,ALQ,842,Unf,0,70,912,GasA,TA,Y,SBrkr,912,0,0,912,0,0,1,0,3,1,TA,5,Typ,1,Fa,Detchd,1974,Unf,1,336,TA,TA,Y,0,0,0,0,0,0,NA,MnPrv,NA,0,5,2010,WD ,Normal,125000 35 | 222,905105070,20,RL,,8246,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Sawyer,Norm,Norm,1Fam,1Story,5,8,1968,2001,Gable,CompShg,Plywood,Plywood,None,0,TA,Gd,CBlock,TA,TA,Mn,Rec,188,ALQ,668,204,1060,GasA,Ex,Y,SBrkr,1060,0,0,1060,1,0,1,0,3,1,Gd,6,Typ,1,TA,Attchd,1968,Unf,1,270,TA,TA,Y,406,90,0,0,0,0,NA,MnPrv,NA,0,5,2010,WD ,Normal,154000 36 | 225,905106170,20,RL,65,7832,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Sawyer,Norm,Norm,1Fam,1Story,5,5,1968,1968,Hip,CompShg,HdBoard,HdBoard,None,0,TA,TA,CBlock,TA,TA,No,GLQ,775,Unf,0,89,864,GasA,Ex,Y,SBrkr,864,0,0,864,1,0,1,0,2,1,TA,4,Typ,0,NA,Detchd,1969,Unf,1,280,TA,TA,Y,226,0,0,0,0,0,NA,NA,NA,0,5,2010,WD ,Normal,137250 37 | 234,905352140,60,RL,,17082,Pave,NA,IR1,Low,AllPub,CulDSac,Mod,ClearCr,Norm,Norm,1Fam,2Story,6,5,1978,1992,Gable,CompShg,VinylSd,VinylSd,BrkFace,288,TA,TA,PConc,Gd,TA,Av,ALQ,964,Unf,0,153,1117,GasA,Ex,Y,SBrkr,1117,864,0,1981,1,0,2,1,4,1,Gd,8,Typ,1,TA,Attchd,1978,Fin,2,522,TA,TA,Y,336,104,0,0,0,0,NA,NA,NA,0,5,2010,WD ,Normal,230000 38 | 247,906204230,60,RL,87,12361,Pave,NA,IR1,Lvl,AllPub,CulDSac,Gtl,SawyerW,Norm,Norm,1Fam,2Story,6,7,1993,1993,Gable,CompShg,VinylSd,VinylSd,BrkFace,85,Gd,Gd,PConc,Gd,TA,No,GLQ,860,Unf,0,86,946,GasA,Ex,Y,SBrkr,964,838,0,1802,0,1,2,1,3,1,Gd,8,Typ,1,Gd,2Types,2000,RFn,4,1017,TA,TA,Y,450,92,0,0,0,0,NA,NA,NA,0,5,2010,WD ,Normal,218000 39 | 252,906385010,20,RL,94,10402,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,CollgCr,Norm,Norm,1Fam,1Story,7,5,2009,2009,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,PConc,Gd,TA,No,Unf,0,Unf,0,1226,1226,GasA,Ex,Y,SBrkr,1226,0,0,1226,0,0,2,0,3,1,Gd,6,Typ,0,NA,Attchd,2009,RFn,3,740,TA,TA,Y,0,36,0,0,0,0,NA,NA,NA,0,5,2010,WD ,Normal,198900 40 | 254,906402200,60,RL,90,12376,Pave,NA,Reg,Lvl,AllPub,Corner,Gtl,SawyerW,Norm,Norm,1Fam,2Story,7,5,1990,1990,Hip,CompShg,Plywood,Plywood,None,0,TA,TA,PConc,Gd,TA,Mn,GLQ,1470,Unf,0,203,1673,GasA,Gd,Y,SBrkr,1699,1523,0,3222,1,0,3,0,5,1,Gd,11,Typ,2,TA,Attchd,1990,Unf,3,594,TA,TA,Y,367,0,0,0,0,0,NA,NA,NA,0,5,2010,WD ,Normal,320000 41 | 264,907250030,20,RL,80,10389,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,1Story,8,5,2003,2003,Hip,CompShg,CemntBd,CmentBd,BrkFace,320,Gd,TA,PConc,Gd,TA,No,GLQ,1682,Unf,0,296,1978,GasA,Ex,Y,SBrkr,1978,0,0,1978,1,0,2,1,3,1,Gd,8,Typ,1,Gd,Attchd,2003,RFn,3,850,TA,TA,Y,188,25,0,0,0,0,NA,NA,NA,0,5,2010,WD ,Normal,318000 42 | 265,907252120,20,RL,,11423,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,1Story,8,5,2001,2002,Gable,CompShg,VinylSd,VinylSd,BrkFace,479,Gd,TA,PConc,Gd,TA,Av,GLQ,1358,Unf,0,223,1581,GasA,Ex,Y,SBrkr,1601,0,0,1601,1,0,2,0,3,1,Gd,6,Typ,1,TA,Attchd,2001,RFn,2,670,TA,TA,Y,180,0,0,0,0,0,NA,MnPrv,Shed,2000,5,2010,WD ,Normal,272000 43 | 268,907290040,20,RL,48,12137,Pave,NA,IR2,Lvl,AllPub,CulDSac,Gtl,CollgCr,Norm,Norm,1Fam,1Story,7,5,1998,1998,Gable,CompShg,VinylSd,VinylSd,BrkFace,442,TA,TA,PConc,Gd,TA,No,Unf,0,Unf,0,1649,1649,GasA,Ex,Y,SBrkr,1661,0,0,1661,0,0,2,0,3,1,Gd,6,Typ,0,NA,Attchd,1998,RFn,2,598,TA,TA,Y,0,34,0,0,0,0,NA,NA,NA,0,5,2010,WD ,Normal,224900 44 | 271,907401090,20,RL,48,10635,Pave,NA,IR2,Lvl,AllPub,FR2,Gtl,CollgCr,Norm,Norm,1Fam,1Story,8,5,2003,2003,Hip,CompShg,VinylSd,VinylSd,BrkFace,171,Gd,TA,PConc,Gd,TA,Av,BLQ,370,GLQ,972,315,1657,GasA,Ex,Y,SBrkr,1668,0,0,1668,1,0,2,0,3,1,Gd,8,Typ,1,TA,Attchd,2003,Fin,2,502,TA,TA,Y,0,262,0,0,0,0,NA,NA,NA,0,5,2010,WD ,Normal,232000 45 | 272,907410080,20,RL,70,8400,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,1Story,7,5,2004,2005,Gable,CompShg,VinylSd,VinylSd,BrkFace,109,Gd,TA,PConc,Gd,TA,Av,GLQ,712,Unf,0,761,1473,GasA,Ex,Y,SBrkr,1484,0,0,1484,1,0,2,0,3,1,Gd,7,Typ,0,NA,Attchd,2004,RFn,2,606,TA,TA,Y,0,35,0,144,0,0,NA,NA,NA,0,5,2010,WD ,Normal,213000 46 | 273,907414030,20,RL,65,8773,Pave,NA,Reg,Lvl,AllPub,FR2,Gtl,CollgCr,Norm,Norm,1Fam,1Story,7,5,2004,2004,Gable,CompShg,VinylSd,VinylSd,BrkFace,98,Gd,TA,PConc,Gd,TA,Av,GLQ,24,Unf,0,1390,1414,GasA,Ex,Y,SBrkr,1414,0,0,1414,0,0,2,0,3,1,Gd,6,Typ,0,NA,Attchd,2004,RFn,2,494,TA,TA,Y,132,105,0,0,0,0,NA,NA,NA,0,5,2010,WD ,Normal,185500 47 | 281,908203100,20,RL,64,6410,Pave,NA,Reg,HLS,AllPub,Inside,Gtl,Edwards,Norm,Norm,1Fam,1Story,4,5,1958,1958,Hip,CompShg,WdShing,Wd Shng,None,0,TA,TA,CBlock,TA,TA,No,Unf,0,Unf,0,960,960,GasA,Ex,Y,SBrkr,960,0,0,960,0,0,1,0,3,1,TA,5,Typ,0,NA,NA,,NA,0,0,NA,NA,Y,0,0,0,0,0,0,NA,MnPrv,NA,0,5,2010,WD ,Normal,90000 48 | 285,908275300,20,RL,68,8562,Pave,NA,Reg,Lvl,AllPub,Inside,Mod,Edwards,Norm,Norm,1Fam,1Story,5,6,1957,2002,Hip,CompShg,HdBoard,HdBoard,Stone,145,TA,TA,CBlock,TA,TA,Av,Rec,383,Unf,0,833,1216,GasA,Ex,Y,FuseA,1526,0,0,1526,0,0,1,0,4,1,TA,7,Min2,1,Gd,Basment,1957,Unf,1,364,TA,TA,Y,116,78,0,0,0,0,NA,NA,NA,0,5,2010,WD ,Normal,144500 49 | 286,909100080,30,RL,67,4853,Pave,NA,Reg,Bnk,AllPub,Inside,Gtl,SWISU,Artery,Norm,1Fam,1Story,5,6,1924,1999,Gable,CompShg,MetalSd,VinylSd,BrkFace,203,TA,TA,BrkTil,TA,TA,Mn,Rec,133,Unf,0,974,1107,GasA,Fa,N,FuseA,1296,0,0,1296,0,0,2,0,2,1,Fa,5,Typ,1,Gd,Detchd,1979,Unf,1,260,TA,TA,Y,0,0,36,0,0,0,NA,MnPrv,NA,0,5,2010,WD ,Normal,104000 50 | 287,909100110,70,RL,66,6858,Pave,NA,Reg,Bnk,AllPub,Corner,Gtl,SWISU,Norm,Norm,1Fam,2Story,6,4,1915,1950,Gable,CompShg,Wd Sdng,Wd Sdng,None,0,TA,TA,PConc,Gd,TA,No,Unf,0,Unf,0,806,806,GasA,TA,N,FuseF,841,806,0,1647,1,0,1,1,4,1,Fa,6,Typ,0,NA,Detchd,1920,Unf,1,216,TA,TA,Y,0,66,136,0,0,0,NA,NA,NA,0,5,2010,WD ,Normal,128000 51 | 289,909176140,50,RL,50,5000,Pave,Pave,Reg,Lvl,AllPub,Inside,Gtl,SWISU,Norm,Norm,1Fam,1.5Fin,5,6,1924,1950,Gable,CompShg,BrkFace,Wd Sdng,None,0,TA,TA,BrkTil,TA,TA,No,LwQ,218,Unf,0,808,1026,GasA,TA,Y,SBrkr,1026,665,0,1691,0,0,2,0,3,1,Gd,6,Typ,1,Gd,Detchd,1924,Unf,1,308,TA,TA,Y,0,0,242,0,0,0,NA,NA,NA,0,5,2010,WD ,Normal,127000 52 | 292,909250030,50,RL,51,9839,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,SWISU,Feedr,Norm,1Fam,1.5Fin,5,2,1931,1950,Gable,CompShg,VinylSd,VinylSd,None,0,TA,TA,PConc,TA,Fa,No,Unf,0,Unf,0,894,894,GasA,Ex,Y,SBrkr,894,772,0,1666,1,0,1,0,3,1,TA,7,Typ,1,Gd,NA,,NA,0,0,NA,NA,N,0,156,0,0,0,0,NA,NA,NA,0,5,2010,WD ,Normal,100000 53 | 296,909427230,70,RL,66,9042,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Crawfor,Norm,Norm,1Fam,2Story,7,9,1941,2006,Gable,CompShg,CemntBd,CmentBd,None,0,Ex,Gd,Stone,TA,Gd,No,GLQ,275,Unf,0,877,1152,GasA,Ex,Y,SBrkr,1188,1152,0,2340,0,0,2,0,4,1,Gd,9,Typ,2,Gd,Attchd,1941,RFn,1,252,TA,TA,Y,0,60,0,0,0,0,NA,GdPrv,Shed,2500,5,2010,WD ,Normal,266500 54 | 297,909428280,20,RL,100,17500,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Crawfor,PosA,Norm,1Fam,1Story,7,8,1959,2002,Gable,CompShg,BrkFace,HdBoard,None,0,Gd,Gd,PConc,Gd,TA,Av,GLQ,1406,Unf,0,496,1902,GasA,TA,Y,SBrkr,1902,0,0,1902,1,0,2,0,3,1,Ex,7,Typ,2,TA,Attchd,1959,Fin,2,567,TA,TA,Y,0,207,162,0,0,0,NA,NA,NA,0,5,2010,WD ,Normal,335000 55 | 302,910202060,50,RM,40,5436,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,IDOTRR,Norm,Norm,1Fam,1.5Fin,4,8,1922,2007,Gable,CompShg,VinylSd,VinylSd,None,0,TA,Gd,BrkTil,TA,TA,No,BLQ,735,Unf,0,61,796,GasA,Gd,Y,SBrkr,796,358,0,1154,1,0,1,0,3,1,Gd,7,Typ,0,NA,Detchd,1922,Unf,1,240,TA,TA,N,0,96,0,0,0,0,NA,MnPrv,NA,0,5,2010,WD ,Normal,125500 56 | 310,914467040,60,RL,85,11050,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Mitchel,Norm,Norm,1Fam,2Story,8,5,1998,1999,Gable,CompShg,VinylSd,VinylSd,None,0,Gd,TA,PConc,Gd,TA,No,GLQ,615,Unf,0,434,1049,GasA,Ex,Y,SBrkr,1036,880,0,1916,1,0,2,1,3,1,Gd,8,Typ,1,TA,Attchd,1998,Unf,3,741,TA,TA,Y,0,70,0,0,0,0,NA,NA,NA,0,5,2010,WD ,Normal,254900 57 | 311,914475090,80,RL,74,9620,Pave,NA,Reg,Lvl,AllPub,Inside,Gtl,Mitchel,Norm,Norm,1Fam,SLvl,6,7,1977,1977,Gable,CompShg,Plywood,Plywood,None,0,TA,TA,CBlock,TA,TA,No,ALQ,679,Unf,0,564,1243,GasA,TA,Y,SBrkr,1285,0,0,1285,0,1,2,0,3,1,Gd,6,Typ,1,Fa,Attchd,1977,Unf,2,473,TA,TA,Y,375,26,0,0,0,0,NA,GdPrv,Shed,80,5,2010,WD ,Normal,190000 58 | 313,914478045,80,RL,,12328,Pave,NA,IR1,Lvl,AllPub,Inside,Gtl,Mitchel,Norm,Norm,1Fam,SLvl,6,5,1976,1976,Gable,CompShg,HdBoard,HdBoard,BrkFace,335,TA,TA,CBlock,TA,TA,Av,GLQ,539,Unf,0,473,1012,GasA,TA,Y,SBrkr,1034,0,0,1034,1,0,1,0,3,1,TA,6,Typ,0,NA,Attchd,1976,Unf,3,888,TA,TA,Y,0,0,0,0,0,0,NA,NA,NA,0,5,2010,WD ,Normal,169900 59 | 316,916386010,20,RL,88,11896,Pave,NA,IR1,Lvl,AllPub,Corner,Gtl,Timber,Norm,Norm,1Fam,1Story,7,5,2008,2008,Gable,CompShg,VinylSd,VinylSd,Stone,60,Gd,TA,PConc,Gd,TA,No,GLQ,78,Unf,0,1258,1336,GasA,Ex,Y,SBrkr,1346,0,0,1346,1,0,2,0,3,1,Gd,6,Typ,1,TA,Attchd,2008,Fin,3,660,TA,TA,Y,100,48,0,0,0,0,NA,NA,NA,0,5,2010,WD ,Normal,220000 60 | 322,921128020,20,RL,89,13214,Pave,NA,IR1,HLS,AllPub,Inside,Gtl,Timber,Norm,Norm,1Fam,1Story,9,5,2008,2009,Hip,CompShg,Stucco,CmentBd,None,0,Ex,TA,PConc,Ex,TA,Gd,Unf,0,Unf,0,2002,2002,GasA,Ex,Y,SBrkr,2018,0,0,2018,0,0,2,0,3,1,Ex,10,Typ,1,Gd,Attchd,2009,Fin,3,746,TA,TA,Y,144,76,0,0,0,0,NA,NA,NA,0,5,2010,WD ,Normal,378500 61 | -------------------------------------------------------------------------------- /Fashion_MNIST.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "Fashion_MNIST.ipynb", 7 | "provenance": [], 8 | "collapsed_sections": [] 9 | }, 10 | "kernelspec": { 11 | "name": "python3", 12 | "display_name": "Python 3" 13 | }, 14 | "accelerator": "GPU" 15 | }, 16 | "cells": [ 17 | { 18 | "cell_type": "markdown", 19 | "metadata": { 20 | "id": "bC7-g3yGJCdW", 21 | "colab_type": "text" 22 | }, 23 | "source": [ 24 | "# Fashion-MNIST Neural Network\n", 25 | "\n", 26 | "This notebook implements a simple NN consisting of two dense layers, and uses this network to classify Fashion-MNIST images.\n", 27 | "\n", 28 | "These Fashion-MNIST images consist of clothing items from these ten classes. (*T-shirt/top, Trouser, Pullover, Dress, Coat, Sandal, Shirt, Sneaker, Bag, Ankle boot)*" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": { 34 | "id": "tL0ZtKVL6NN4", 35 | "colab_type": "text" 36 | }, 37 | "source": [ 38 | "## Load correct version of TensorFlow\n", 39 | "\n", 40 | "Before we use TensorFlow we must load the correct version. We want version 2.x. To do this we execute the Colab commands below. Note that these commands ONLY WORK IN COLAB." 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "metadata": { 46 | "id": "leSo_zfF6QXV", 47 | "colab_type": "code", 48 | "colab": {} 49 | }, 50 | "source": [ 51 | "# Install TensorFlow using Colab's tensorflow_version command\n", 52 | "try:\n", 53 | " # %tensorflow_version only exists in Colab.\n", 54 | " %tensorflow_version 2.x\n", 55 | "except Exception:\n", 56 | " pass\n" 57 | ], 58 | "execution_count": 0, 59 | "outputs": [] 60 | }, 61 | { 62 | "cell_type": "markdown", 63 | "metadata": { 64 | "id": "Sy_y_E9_NFc8", 65 | "colab_type": "text" 66 | }, 67 | "source": [ 68 | "## Import Libraries\n", 69 | "\n", 70 | "We import TensorFlow, Numpy, and Matplotlib libraries. \n", 71 | "\n", 72 | "Numpy is a powerful n-dimensional array library that\n", 73 | "allows us to easily create and manipulate arrays of data, and more!\n", 74 | "\n", 75 | "Numpy also allows us to convert TensorFlow's native data structures,\n", 76 | "to Python native data types.\n", 77 | "\n", 78 | "Matplotlib is a graphics plot library" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "metadata": { 84 | "id": "RgDK_bJeJpua", 85 | "colab_type": "code", 86 | "colab": {} 87 | }, 88 | "source": [ 89 | "# Import libraries\n", 90 | "import tensorflow as tf\n", 91 | "import numpy as np\n", 92 | "import matplotlib.pyplot as plt\n", 93 | "\n", 94 | "print(tf.__version__)" 95 | ], 96 | "execution_count": 0, 97 | "outputs": [] 98 | }, 99 | { 100 | "cell_type": "markdown", 101 | "metadata": { 102 | "id": "EMWdw_E_QPhY", 103 | "colab_type": "text" 104 | }, 105 | "source": [ 106 | "## Problem Statement\n", 107 | "\n", 108 | "Using example data, train a model that will correctly predict the class of images of fashion items.\n", 109 | "\n" 110 | ] 111 | }, 112 | { 113 | "cell_type": "markdown", 114 | "metadata": { 115 | "id": "N49Xfo21RTr9", 116 | "colab_type": "text" 117 | }, 118 | "source": [ 119 | "## Get Data\n", 120 | "\n", 121 | "### About the Fashion-MNIST dataset\n", 122 | "\n", 123 | "The Fashion-MNIST dataset is a widely available dataset. It is documented at the https://github.com/zalandoresearch/fashion-mnist\n", 124 | "\n", 125 | "Each image in the dataset is 28 X 28 pixels. The pixel values are a grayscale with values ranging from 0 to 255.\n", 126 | "\n", 127 | "Associated with each image is a number from 0 to 9 specifying the class of the fashion item in the image. The values for the classes are:\n", 128 | "\n", 129 | "| Label | Description |\n", 130 | "|-------|-------------|\n", 131 | "| 0 | T-shirt/top |\n", 132 | "| 1 | Trouser | \n", 133 | "| 2 | Pullover |\n", 134 | "| 3 | Dress |\n", 135 | "| 4 | Coat |\n", 136 | "| 5 | Sandal |\n", 137 | "| 6 | Shirt |\n", 138 | "| 7 | Sneaker |\n", 139 | "| 8 | Bag |\n", 140 | "| 9 | Ankle boot |\n", 141 | "\n", 142 | "There are 60,000 Training examples and 10,000 Testing examples. " 143 | ] 144 | }, 145 | { 146 | "cell_type": "markdown", 147 | "metadata": { 148 | "id": "qswhm4c7nMVl", 149 | "colab_type": "text" 150 | }, 151 | "source": [ 152 | "We will need these class names later in the code. So we create an list of names we can index by the class label number. For example, class_names[2] = 'Pullover' and class_names[9] = 'Ankle boot'." 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "metadata": { 158 | "id": "zK737-ABm7od", 159 | "colab_type": "code", 160 | "colab": {} 161 | }, 162 | "source": [ 163 | "# Define class names to display\n", 164 | "class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',\n", 165 | " 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']" 166 | ], 167 | "execution_count": 0, 168 | "outputs": [] 169 | }, 170 | { 171 | "cell_type": "markdown", 172 | "metadata": { 173 | "id": "YcAJeHCCm_TW", 174 | "colab_type": "text" 175 | }, 176 | "source": [ 177 | "Fashion-MNIST is one of the datasets provided with Keras in Tensorflow. Keras' Dataset library provides a load_data() method that will download the dataset of 60,000 Training images and their corresponding 60,000 labels, and 10,000 Test images and their corresponding 10,000 labels." 178 | ] 179 | }, 180 | { 181 | "cell_type": "code", 182 | "metadata": { 183 | "id": "1MMs613GNLe4", 184 | "colab_type": "code", 185 | "colab": {} 186 | }, 187 | "source": [ 188 | "fashion_mnist = tf.keras.datasets.fashion_mnist\n", 189 | "(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()\n" 190 | ], 191 | "execution_count": 0, 192 | "outputs": [] 193 | }, 194 | { 195 | "cell_type": "markdown", 196 | "metadata": { 197 | "id": "oAcxPIjJkLKZ", 198 | "colab_type": "text" 199 | }, 200 | "source": [ 201 | "### Explore the data\n", 202 | "\n", 203 | "Let's look at our data." 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "metadata": { 209 | "id": "Umx96CdYV-yz", 210 | "colab_type": "code", 211 | "colab": {} 212 | }, 213 | "source": [ 214 | "# Print the shape of the data tensors we loaded. Should be:\n", 215 | "# 60,000 training 28X28 images and their labels, and 10,000 testing images and their labels\n", 216 | "print ('Training data:', train_images.shape, train_labels.shape)\n", 217 | "print ('Test data:', test_images.shape, test_labels.shape)" 218 | ], 219 | "execution_count": 0, 220 | "outputs": [] 221 | }, 222 | { 223 | "cell_type": "markdown", 224 | "metadata": { 225 | "id": "4Jq_XBs9Xa6C", 226 | "colab_type": "text" 227 | }, 228 | "source": [ 229 | "### Inspect data\n", 230 | "Each image should be a 28 X 28 image and have a gray scale value of 0-255. Let's display an arbitrary image.\n", 231 | "\n", 232 | "You should see: \n", 233 | "* A title that specifies the class of the image, whose description is shown on the table above.\n", 234 | "* The image \n", 235 | "* The 28 by 28 width and height pixel index \n", 236 | "* and on the right the grayscale legend associated with the pixel values." 237 | ] 238 | }, 239 | { 240 | "cell_type": "code", 241 | "metadata": { 242 | "id": "ni0bRFhjWP1y", 243 | "colab_type": "code", 244 | "colab": {} 245 | }, 246 | "source": [ 247 | "def show_training_image(index):\n", 248 | " img_label = str(train_labels[index]) + ' (' + class_names[train_labels[index]] + ')'\n", 249 | " plt.figure()\n", 250 | " plt.title('Image Label ' + img_label) \n", 251 | " plt.imshow(train_images[index], cmap='gray') # data is grayscale, but displays in color without cmap='gray'\n", 252 | " plt.colorbar()\n", 253 | " plt.show()\n" 254 | ], 255 | "execution_count": 0, 256 | "outputs": [] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "metadata": { 261 | "id": "gJa1Wq_4acop", 262 | "colab_type": "code", 263 | "colab": {} 264 | }, 265 | "source": [ 266 | "img_index = 100\n", 267 | "show_training_image(img_index)" 268 | ], 269 | "execution_count": 0, 270 | "outputs": [] 271 | }, 272 | { 273 | "cell_type": "markdown", 274 | "metadata": { 275 | "id": "Db_TW6wccWHw", 276 | "colab_type": "text" 277 | }, 278 | "source": [ 279 | "## Prepare Data\n", 280 | "\n", 281 | "As usual we need to do some pre-processing of the data. Here we want to scale the pixels values from 0 to 255 to 0.0 to 1.0. We scale both the training and testing image values." 282 | ] 283 | }, 284 | { 285 | "cell_type": "code", 286 | "metadata": { 287 | "id": "o0vDE9tBYUWx", 288 | "colab_type": "code", 289 | "colab": {} 290 | }, 291 | "source": [ 292 | "# scale training and testing image values\n", 293 | "train_images = train_images / 255.0\n", 294 | "test_images = test_images / 255.0\n", 295 | "\n", 296 | "# Print the image again and notice the values now range from 0 to 1.\n", 297 | "# And the image looks the same, just on a different scale. \n", 298 | "show_training_image(img_index)" 299 | ], 300 | "execution_count": 0, 301 | "outputs": [] 302 | }, 303 | { 304 | "cell_type": "markdown", 305 | "metadata": { 306 | "id": "ZAAj8oXkeBsY", 307 | "colab_type": "text" 308 | }, 309 | "source": [ 310 | "## Create Model\n", 311 | "\n", 312 | "We can now create the model we are going to train with the data.\n", 313 | "\n", 314 | "This will be a simple model that:\n", 315 | "* Flattens the 28 X 28 pixel values into a long stream of 28 by 28 = 784 pixel values - Note this is a Keras layer but is not a neural network layer. Notice the input_shape parameter contains only the shape of a single data element. There are n data elements each with this same shape. \n", 316 | "* Passes each pixel value as the input to each of the 128 neurons. \n", 317 | "* And finally passes values to the last layer that contains 10 neurons, one for each of the ten classes (t-shirt, pullover, etc). Each of these 10 neurons uses the Softmax activation function to determine the probability that the image is each class. The final output from the model is a vector of probabilities that the image is of each class. Such as :\n", 318 | "[0.01, 0.05, 0.04, 0.06, 0.50, 0.20, 0.04, 0.00, 0.03, 0.07]. This example vector adds up to 1.0 and shows the probability that the image is class 0 (T-shirt/top) is 0.01 (1%), class 1 (Trousers) is 0.05 (5%), class 2 (Pullover) is 0.04 (4%), ....\n" 319 | ] 320 | }, 321 | { 322 | "cell_type": "code", 323 | "metadata": { 324 | "id": "0N2R893MdSZn", 325 | "colab_type": "code", 326 | "colab": {} 327 | }, 328 | "source": [ 329 | "\n", 330 | "model = tf.keras.models.Sequential() # Create a new sequential model\n", 331 | "model.add(tf.keras.layers.Flatten(input_shape=(28,28))) # keras processing layer - no neurons\n", 332 | "model.add(tf.keras.layers.Dense(128, activation='relu', name='dense-128-relu')) # 128 neurons connected to pixels\n", 333 | "model.add(tf.keras.layers.Dense(10, activation='softmax', name='dense-10-softmax')) # determines probability of each of the 10 classes\n" 334 | ], 335 | "execution_count": 0, 336 | "outputs": [] 337 | }, 338 | { 339 | "cell_type": "markdown", 340 | "metadata": { 341 | "id": "6nu2185LTBq8", 342 | "colab_type": "text" 343 | }, 344 | "source": [ 345 | "### Structure of the model\n", 346 | "\n", 347 | "To make it easy for us to visualize the model, Keras models have a summary method. When we call it we see our model structure. \n", 348 | "\n", 349 | "For completeness we also show the shape of the input data.\n" 350 | ] 351 | }, 352 | { 353 | "cell_type": "code", 354 | "metadata": { 355 | "id": "0gXXuAqQURdH", 356 | "colab_type": "code", 357 | "colab": {} 358 | }, 359 | "source": [ 360 | "print('Input Shape:', train_images.shape)\n", 361 | "print()\n", 362 | "print(model.summary())" 363 | ], 364 | "execution_count": 0, 365 | "outputs": [] 366 | }, 367 | { 368 | "cell_type": "markdown", 369 | "metadata": { 370 | "id": "BJTmzxCJ3Bpi", 371 | "colab_type": "text" 372 | }, 373 | "source": [ 374 | "### Compile the model\n", 375 | "\n", 376 | "Before we can train the model we need to specify how the model will learn from the training data. To do this we specify:\n", 377 | "* loss - how we measure loss (error). We will use sparse_categorical_crossentropy, which determines the highest predicted class and calculates loss based on how often this is the correct class.\n", 378 | "* optimizer - how the model will update the model's weights to reduce the loss. We use the Adam variant of Mini-batch Gradient Descent.\n", 379 | "* metrics - the metrics used for evaluation of training and test. In this case we use accuracy, that is how often the images are correctly classified. Higher is better. (1.00 would be perfect (100%), 0,75 = 75%, 0.25 = 25%, ...) \n" 380 | ] 381 | }, 382 | { 383 | "cell_type": "code", 384 | "metadata": { 385 | "id": "TW9pZ8kw2Nid", 386 | "colab_type": "code", 387 | "colab": {} 388 | }, 389 | "source": [ 390 | "model.compile(optimizer='adam',\n", 391 | " loss='sparse_categorical_crossentropy',\n", 392 | " metrics=['accuracy'])\n" 393 | ], 394 | "execution_count": 0, 395 | "outputs": [] 396 | }, 397 | { 398 | "cell_type": "markdown", 399 | "metadata": { 400 | "id": "T7wyz8jh6ZA6", 401 | "colab_type": "text" 402 | }, 403 | "source": [ 404 | "## Train the Model\n", 405 | "Now that we have our data and model, and have specified how the model will learn from the data we can train it." 406 | ] 407 | }, 408 | { 409 | "cell_type": "code", 410 | "metadata": { 411 | "id": "osxdZq5y6qF1", 412 | "colab_type": "code", 413 | "colab": {} 414 | }, 415 | "source": [ 416 | "train_hist = model.fit(train_images, train_labels, epochs=40)" 417 | ], 418 | "execution_count": 0, 419 | "outputs": [] 420 | }, 421 | { 422 | "cell_type": "markdown", 423 | "metadata": { 424 | "id": "R2TLdEfR9tus", 425 | "colab_type": "text" 426 | }, 427 | "source": [ 428 | "Plot the training to makes sure it is converging." 429 | ] 430 | }, 431 | { 432 | "cell_type": "code", 433 | "metadata": { 434 | "id": "5hCztJ2o90DS", 435 | "colab_type": "code", 436 | "colab": {} 437 | }, 438 | "source": [ 439 | "def plot_acc(hist):\n", 440 | " # plot the accuracy\n", 441 | " plt.title('Accuracy History')\n", 442 | " plt.plot(hist.history['accuracy'])\n", 443 | " plt.ylabel('Accuracy')\n", 444 | " plt.xlabel('epoch')\n", 445 | " plt.show()\n", 446 | " \n", 447 | "def plot_loss(hist):\n", 448 | " # plot the loss\n", 449 | " plt.title('Loss History')\n", 450 | " plt.plot(hist.history['loss'])\n", 451 | " plt.ylabel('loss')\n", 452 | " plt.xlabel('epoch')\n", 453 | " plt.show()\n", 454 | "\n" 455 | ], 456 | "execution_count": 0, 457 | "outputs": [] 458 | }, 459 | { 460 | "cell_type": "code", 461 | "metadata": { 462 | "id": "Zljp5aHqv514", 463 | "colab_type": "code", 464 | "colab": {} 465 | }, 466 | "source": [ 467 | "plot_loss(train_hist)\n", 468 | "plot_acc(train_hist)" 469 | ], 470 | "execution_count": 0, 471 | "outputs": [] 472 | }, 473 | { 474 | "cell_type": "markdown", 475 | "metadata": { 476 | "id": "0WOXQpc99Ttj", 477 | "colab_type": "text" 478 | }, 479 | "source": [ 480 | "## Evaluate Trained Model\n", 481 | "\n", 482 | "Once we have the model trained, we need to see how it will perform on data that it was not trained on, that is, test data. \n", 483 | "\n", 484 | "We do this with Keras' evaluation function and the test dataset we retrieved earlier.\n" 485 | ] 486 | }, 487 | { 488 | "cell_type": "code", 489 | "metadata": { 490 | "id": "9yEEjOV69uay", 491 | "colab_type": "code", 492 | "colab": {} 493 | }, 494 | "source": [ 495 | "test_loss, test_acc = model.evaluate(test_images, test_labels, verbose=0)\n", 496 | "\n", 497 | "print('max training accuracy:', max(train_hist.history['accuracy']), ' test accuracy:', test_acc)" 498 | ], 499 | "execution_count": 0, 500 | "outputs": [] 501 | }, 502 | { 503 | "cell_type": "markdown", 504 | "metadata": { 505 | "id": "nAB01AmFAYw6", 506 | "colab_type": "text" 507 | }, 508 | "source": [ 509 | "## Training Results\n", 510 | "\n", 511 | "The model has **about 96% accuracy** on the training data. And **only 88% accuracy** on testing data on which it was not trained. This is a classic sign that the model overfits the training data. \n", 512 | "\n", 513 | "We need to determine how we can reduce this overfitting and get **good accuracy on both training and test data!**\n", 514 | "\n", 515 | "\n" 516 | ] 517 | }, 518 | { 519 | "cell_type": "markdown", 520 | "metadata": { 521 | "id": "6olDUcpiVkxe", 522 | "colab_type": "text" 523 | }, 524 | "source": [ 525 | "# Monitoring and Improving our Trained Model’s Performance \n", 526 | "\n", 527 | "In this section, we want to make changes to improve the performance of our trained model. By improved performance we mean having the model not overfit the training data and perform poorly on the testing data.\n", 528 | "\n", 529 | "There are serveral common techniques to fix this problem. These include:\n", 530 | "\n", 531 | "1. Reducing Model Complexity - removing neurons or layers\n", 532 | "2. Dropout - Randomly removing the contributions from some neurons\n", 533 | "3. Early Stopping - Terminating training as early as possible\n", 534 | "\n", 535 | "We will try these below.\n", 536 | "\n" 537 | ] 538 | }, 539 | { 540 | "cell_type": "markdown", 541 | "metadata": { 542 | "id": "gxM0Nan9ZjmK", 543 | "colab_type": "text" 544 | }, 545 | "source": [ 546 | "## Monitoring performance with TensorBoard\n", 547 | "\n", 548 | "We want to ensure we are getting good data to evaluate the performance of our changes. Fortunately the TensorFlow family include the tool **TensorBoard**. \n", 549 | "\n", 550 | "TensorBoard provides us various ways to monitor the performance of our models including:\n", 551 | "\n", 552 | "* Visualizing metrics such as loss and accuracy\n", 553 | "* Comparision of training and evaluation metrics\n", 554 | "* Visualizing the model graph (ops and layers)\n", 555 | "* Viewing histograms of weights, biases, or other tensors as they change over time\n" 556 | ] 557 | }, 558 | { 559 | "cell_type": "markdown", 560 | "metadata": { 561 | "id": "rdydWnoYe7gy", 562 | "colab_type": "text" 563 | }, 564 | "source": [ 565 | "TensorBoard can be run inside a Colab notebook, or if you are running your code directly in Python you can invoke TensorBoard from the command line." 566 | ] 567 | }, 568 | { 569 | "cell_type": "code", 570 | "metadata": { 571 | "id": "3d1xtI7LjJyf", 572 | "colab_type": "code", 573 | "colab": {} 574 | }, 575 | "source": [ 576 | "import datetime" 577 | ], 578 | "execution_count": 0, 579 | "outputs": [] 580 | }, 581 | { 582 | "cell_type": "code", 583 | "metadata": { 584 | "id": "m6dqMFnCbkC_", 585 | "colab_type": "code", 586 | "colab": {} 587 | }, 588 | "source": [ 589 | "# Load the tensorboard extension\n", 590 | "% reload_ext tensorboard" 591 | ], 592 | "execution_count": 0, 593 | "outputs": [] 594 | }, 595 | { 596 | "cell_type": "code", 597 | "metadata": { 598 | "id": "vaXgSi2yi550", 599 | "colab_type": "code", 600 | "colab": {} 601 | }, 602 | "source": [ 603 | "# Clear any logs from previous runs\n", 604 | "!rm -rf ./logs/ " 605 | ], 606 | "execution_count": 0, 607 | "outputs": [] 608 | }, 609 | { 610 | "cell_type": "code", 611 | "metadata": { 612 | "id": "08RQKiAXi_Yz", 613 | "colab_type": "code", 614 | "colab": {} 615 | }, 616 | "source": [ 617 | "# Start with a fresh model\n", 618 | "model = tf.keras.models.Sequential() # Create a new sequential model\n", 619 | "model.add(tf.keras.layers.Flatten(input_shape=(28,28))) # keras processing layer - no neurons\n", 620 | "model.add(tf.keras.layers.Dense(128, activation='relu', name='dense-128-relu')) # 128 neurons connected to pixels\n", 621 | "model.add(tf.keras.layers.Dense(10, activation='softmax', name='dense-10-softmax')) # determine probability of each of the 10 classes" 622 | ], 623 | "execution_count": 0, 624 | "outputs": [] 625 | }, 626 | { 627 | "cell_type": "code", 628 | "metadata": { 629 | "id": "Ig5AjqzkjEwM", 630 | "colab_type": "code", 631 | "colab": {} 632 | }, 633 | "source": [ 634 | "model.compile(optimizer='adam',\n", 635 | " loss='sparse_categorical_crossentropy',\n", 636 | " metrics=['accuracy'])\n", 637 | "\n", 638 | "\n", 639 | "log_dir='logs/fit/' + datetime.datetime.now().strftime('%Y%m%d-%H%M%S')\n", 640 | "tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)\n", 641 | "\n", 642 | "# We add to the fit method the validation/test data. This will cause the training model \n", 643 | "# to evaluate itself on the validation/test data on each epoch. This provides per \n", 644 | "# epoch data points TensorBoard can plot so we can see the trend.\n", 645 | "train_hist = model.fit(train_images, train_labels, epochs=40,\n", 646 | " validation_data=(test_images, test_labels), \n", 647 | " callbacks=[tensorboard_callback])\n", 648 | "\n" 649 | ], 650 | "execution_count": 0, 651 | "outputs": [] 652 | }, 653 | { 654 | "cell_type": "code", 655 | "metadata": { 656 | "id": "wx9ATGksjWyV", 657 | "colab_type": "code", 658 | "colab": {} 659 | }, 660 | "source": [ 661 | "#!kill 1234 # sometime TensorBoard does not show all data. If it shows reusing previous instance use kill command listed\n", 662 | "%tensorboard --logdir logs/fit" 663 | ], 664 | "execution_count": 0, 665 | "outputs": [] 666 | }, 667 | { 668 | "cell_type": "markdown", 669 | "metadata": { 670 | "id": "v05icTF1mDfB", 671 | "colab_type": "text" 672 | }, 673 | "source": [ 674 | "## Fixing Fashion MNIST " 675 | ] 676 | }, 677 | { 678 | "cell_type": "markdown", 679 | "metadata": { 680 | "id": "E6zDDPjrFm17", 681 | "colab_type": "text" 682 | }, 683 | "source": [ 684 | "### Reducing Model Complexity\n", 685 | "\n", 686 | "In a large model we can consider reducing the number of hidden layers. But our model only has one hidden layer and we need it. So the only thing to do is reduce the number of neurons in the hidden layer. Everthing else is the same \n", 687 | "as above." 688 | ] 689 | }, 690 | { 691 | "cell_type": "code", 692 | "metadata": { 693 | "colab_type": "code", 694 | "id": "I1UvcFL9Qmps", 695 | "colab": {} 696 | }, 697 | "source": [ 698 | "# Load the tensorboard extension\n", 699 | "% reload_ext tensorboard" 700 | ], 701 | "execution_count": 0, 702 | "outputs": [] 703 | }, 704 | { 705 | "cell_type": "code", 706 | "metadata": { 707 | "colab_type": "code", 708 | "id": "UNwSQKJyQmp-", 709 | "colab": {} 710 | }, 711 | "source": [ 712 | "# Clear any logs from previous runs\n", 713 | "!rm -rf ./logs/ " 714 | ], 715 | "execution_count": 0, 716 | "outputs": [] 717 | }, 718 | { 719 | "cell_type": "code", 720 | "metadata": { 721 | "colab_type": "code", 722 | "id": "CCUSydw2QmqJ", 723 | "colab": {} 724 | }, 725 | "source": [ 726 | "\n", 727 | "model = tf.keras.models.Sequential() # Create a new sequential model\n", 728 | "model.add(tf.keras.layers.Flatten(input_shape=(28,28))) # keras processing layer - no neurons\n", 729 | "model.add(tf.keras.layers.Dense(64, activation='relu', name='dense-64-relu')) # 64 neurons connected to pixels\n", 730 | "#model.add(tf.keras.layers.Dense(128, activation='relu', name='dense-128-relu')) # 128 neurons connected to pixels\n", 731 | "model.add(tf.keras.layers.Dense(10, activation='softmax', name='dense-10-softmax')) # determine probability of each of the 10 classes" 732 | ], 733 | "execution_count": 0, 734 | "outputs": [] 735 | }, 736 | { 737 | "cell_type": "code", 738 | "metadata": { 739 | "colab_type": "code", 740 | "id": "n8yt3SeaQmqN", 741 | "colab": {} 742 | }, 743 | "source": [ 744 | "model.compile(optimizer='adam',\n", 745 | " loss='sparse_categorical_crossentropy',\n", 746 | " metrics=['accuracy'])\n", 747 | "\n", 748 | "\n", 749 | "log_dir= 'logs/fit/' + datetime.datetime.now().strftime('%Y%m%d-%H%M%S')\n", 750 | "tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)\n", 751 | "\n", 752 | "# We add to the fit method the validation/test data. This will cause the training model \n", 753 | "# to evaluate itself on the validation/test data on each epoch. This provides per \n", 754 | "# epoch data points TensorBoard can plot so we can see the trend.\n", 755 | "train_hist = model.fit(train_images, train_labels, epochs=40,\n", 756 | " validation_data=(test_images, test_labels), \n", 757 | " callbacks=[tensorboard_callback])\n", 758 | "\n" 759 | ], 760 | "execution_count": 0, 761 | "outputs": [] 762 | }, 763 | { 764 | "cell_type": "markdown", 765 | "metadata": { 766 | "id": "Ke-rDI6UmGaX", 767 | "colab_type": "text" 768 | }, 769 | "source": [ 770 | "Show the results with TensorBoard" 771 | ] 772 | }, 773 | { 774 | "cell_type": "code", 775 | "metadata": { 776 | "id": "hox4ABkzmFWm", 777 | "colab_type": "code", 778 | "colab": {} 779 | }, 780 | "source": [ 781 | "#!kill 1234 # sometime TensorBoard does not show all data. If it shows reusing previous instance use kill command listed\n", 782 | "%tensorboard --logdir logs/fit" 783 | ], 784 | "execution_count": 0, 785 | "outputs": [] 786 | }, 787 | { 788 | "cell_type": "markdown", 789 | "metadata": { 790 | "id": "qBwxV4HjFOwT", 791 | "colab_type": "text" 792 | }, 793 | "source": [ 794 | "### Randomly dropout some neurons\n", 795 | "\n", 796 | "To randomly shut down the contribution from some neurons, we add a Keras dropout layer. This layer randomly sets the outputs from the previous layer to 0. How many outputs are set to 0 is defined by the parameter we pass. To set 50% of \n", 797 | "the outputs to 0 pass 0.5. For 20% pass 0.2.\n", 798 | "\n", 799 | "Also, the decision on which connections get dropped is a random selection that \n", 800 | "changes from epoch to epoch." 801 | ] 802 | }, 803 | { 804 | "cell_type": "code", 805 | "metadata": { 806 | "colab_type": "code", 807 | "id": "xSMjk8zBpc2A", 808 | "colab": {} 809 | }, 810 | "source": [ 811 | "# Load the tensorboard extension\n", 812 | "% reload_ext tensorboard" 813 | ], 814 | "execution_count": 0, 815 | "outputs": [] 816 | }, 817 | { 818 | "cell_type": "code", 819 | "metadata": { 820 | "colab_type": "code", 821 | "id": "3GcFNsSBpc2O", 822 | "colab": {} 823 | }, 824 | "source": [ 825 | "# Clear any logs from previous runs\n", 826 | "!rm -rf ./logs/ " 827 | ], 828 | "execution_count": 0, 829 | "outputs": [] 830 | }, 831 | { 832 | "cell_type": "code", 833 | "metadata": { 834 | "id": "W9SazRfplKGA", 835 | "colab_type": "code", 836 | "colab": {} 837 | }, 838 | "source": [ 839 | "model = tf.keras.models.Sequential() # Create a new sequential model\n", 840 | "model.add(tf.keras.layers.Flatten(input_shape=(28,28))) # keras processing layer - no neurons\n", 841 | "model.add(tf.keras.layers.Dense(128, activation='relu', name='dense-128-relu')) # 128 neurons connected to pixels\n", 842 | "model.add(tf.keras.layers.Dropout(0.2)) # dropout 20%\n", 843 | "model.add(tf.keras.layers.Dense(10, activation='softmax', name='dense-10-softmax')) # determine probability of each of the 10 classes" 844 | ], 845 | "execution_count": 0, 846 | "outputs": [] 847 | }, 848 | { 849 | "cell_type": "code", 850 | "metadata": { 851 | "id": "OM9BuhuBlWo0", 852 | "colab_type": "code", 853 | "colab": {} 854 | }, 855 | "source": [ 856 | "model.compile(optimizer='adam',\n", 857 | " loss='sparse_categorical_crossentropy',\n", 858 | " metrics=['accuracy'])" 859 | ], 860 | "execution_count": 0, 861 | "outputs": [] 862 | }, 863 | { 864 | "cell_type": "code", 865 | "metadata": { 866 | "id": "jM5sr1YGlcIb", 867 | "colab_type": "code", 868 | "colab": {} 869 | }, 870 | "source": [ 871 | "log_dir='logs/fit/' + datetime.datetime.now().strftime('%Y%m%d-%H%M%S')\n", 872 | "tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)\n", 873 | "\n", 874 | "model.fit(x=train_images, \n", 875 | " y=train_labels, \n", 876 | " epochs=40, \n", 877 | " validation_data=(test_images, test_labels), \n", 878 | " callbacks=[tensorboard_callback])\n" 879 | ], 880 | "execution_count": 0, 881 | "outputs": [] 882 | }, 883 | { 884 | "cell_type": "code", 885 | "metadata": { 886 | "id": "qlUy6ZlRlOZ8", 887 | "colab_type": "code", 888 | "colab": {} 889 | }, 890 | "source": [ 891 | "#!kill 1234 # sometime TensorBoard does not show all data. If it shows reusing previous instance use kill command listed\n", 892 | "%tensorboard --logdir logs/fit" 893 | ], 894 | "execution_count": 0, 895 | "outputs": [] 896 | }, 897 | { 898 | "cell_type": "markdown", 899 | "metadata": { 900 | "id": "3wrym729ZWre", 901 | "colab_type": "text" 902 | }, 903 | "source": [ 904 | "### Implementing Early Stopping\n", 905 | "\n", 906 | "Early stopping involves having the training stop when the model has been trained enough. \n", 907 | "\n", 908 | "Often training is slow in terms of time, and expensive in terms of compute time on big clusters or a cloud service. So it is useful to be able to stop training as soon as reasonable. And reasonable is usually when the metric like lose quits decreasing. So the question is, how can we know when the loss quits decreasing? \n", 909 | "\n", 910 | "We could run a bunch of trials and use a tool like TensorBoard to plot curves. But it is tedious to write a model with 20 epochs, run it, see loss was decreasing, then repeat that with 40 epochs, etc. Worse, if we change the model or it's parameters, like adjusting the dropout percentage, the number of epochs required to see when the loss stops decreasing might be different.\n", 911 | "\n", 912 | "So what we need is an automated way to detect when training has quit improving and terminate training. And fortunately Keras has an early stopping feature that does just this!" 913 | ] 914 | }, 915 | { 916 | "cell_type": "markdown", 917 | "metadata": { 918 | "id": "_lKTkBVvN2dI", 919 | "colab_type": "text" 920 | }, 921 | "source": [ 922 | "We start with the same model, optimizer, loss, and log clearing code as before." 923 | ] 924 | }, 925 | { 926 | "cell_type": "code", 927 | "metadata": { 928 | "id": "D0FSUxUT3Cuf", 929 | "colab_type": "code", 930 | "colab": {} 931 | }, 932 | "source": [ 933 | "# Load the tensorboard extension\n", 934 | "% reload_ext tensorboard" 935 | ], 936 | "execution_count": 0, 937 | "outputs": [] 938 | }, 939 | { 940 | "cell_type": "code", 941 | "metadata": { 942 | "id": "dGm6LMEL2--j", 943 | "colab_type": "code", 944 | "colab": {} 945 | }, 946 | "source": [ 947 | "# Clear any logs from previous runs\n", 948 | "!rm -rf ./logs/ " 949 | ], 950 | "execution_count": 0, 951 | "outputs": [] 952 | }, 953 | { 954 | "cell_type": "code", 955 | "metadata": { 956 | "colab_type": "code", 957 | "id": "62luPqB8NqaI", 958 | "colab": {} 959 | }, 960 | "source": [ 961 | "model = tf.keras.models.Sequential() # Create a new sequential model\n", 962 | "model.add(tf.keras.layers.Flatten(input_shape=(28,28))) # keras processing layer - no neurons\n", 963 | "model.add(tf.keras.layers.Dense(128, activation='relu', name='dense-128-relu')) # 128 neurons connected to pixels\n", 964 | "model.add(tf.keras.layers.Dense(10, activation='softmax', name='dense-10-softmax')) # determine probability of each of the 10 classes" 965 | ], 966 | "execution_count": 0, 967 | "outputs": [] 968 | }, 969 | { 970 | "cell_type": "code", 971 | "metadata": { 972 | "colab_type": "code", 973 | "id": "ROXcHFavNqaS", 974 | "colab": {} 975 | }, 976 | "source": [ 977 | "model.compile(optimizer='adam',\n", 978 | " loss='sparse_categorical_crossentropy',\n", 979 | " metrics=['accuracy'])" 980 | ], 981 | "execution_count": 0, 982 | "outputs": [] 983 | }, 984 | { 985 | "cell_type": "markdown", 986 | "metadata": { 987 | "id": "F6TY_BVDOou_", 988 | "colab_type": "text" 989 | }, 990 | "source": [ 991 | "We create a new EarlyStopping callback.\n", 992 | "\n", 993 | "And we set the **monitor** parameter to the parameter to monitor, which is **validation loss**. And the **patience** parameter to the maximum number of epochs without improvement allowed. And if the parameter does not improve within the patience number of epochs, the training will be terminated.\n", 994 | "\n", 995 | "This new callback gets added to the list of callbacks." 996 | ] 997 | }, 998 | { 999 | "cell_type": "code", 1000 | "metadata": { 1001 | "id": "1o3tiyB_NyhV", 1002 | "colab_type": "code", 1003 | "colab": {} 1004 | }, 1005 | "source": [ 1006 | "log_dir='logs/fit/' + datetime.datetime.now().strftime('%Y%m%d-%H%M%S')\n", 1007 | "tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)\n", 1008 | "early_stopping_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=4)\n", 1009 | "\n", 1010 | "model.fit(x=train_images, \n", 1011 | " y=train_labels, \n", 1012 | " epochs=40, \n", 1013 | " validation_data=(test_images, test_labels), \n", 1014 | " callbacks=[tensorboard_callback, early_stopping_callback])\n" 1015 | ], 1016 | "execution_count": 0, 1017 | "outputs": [] 1018 | }, 1019 | { 1020 | "cell_type": "code", 1021 | "metadata": { 1022 | "id": "vUIOxIxcQSRH", 1023 | "colab_type": "code", 1024 | "colab": {} 1025 | }, 1026 | "source": [ 1027 | "#!kill 1234 # sometime TensorBoard does not show all data. If it shows reusing previous instance use kill command listed\n", 1028 | "%tensorboard --logdir logs/fit" 1029 | ], 1030 | "execution_count": 0, 1031 | "outputs": [] 1032 | }, 1033 | { 1034 | "cell_type": "markdown", 1035 | "metadata": { 1036 | "colab_type": "text", 1037 | "id": "AwGPItyphqXT" 1038 | }, 1039 | "source": [ 1040 | "## Save Your Model\n", 1041 | "\n", 1042 | "Now that we have our model working well. We can save it for reuse.\n", 1043 | "\n", 1044 | "By saving the model's structure and the associated trained weights we preserve our work. Those weights are well over 101,000 values for which we had to work hard to get to the proper numbers!!\n" 1045 | ] 1046 | }, 1047 | { 1048 | "cell_type": "code", 1049 | "metadata": { 1050 | "colab_type": "code", 1051 | "id": "0w5Rq8SsgWE6", 1052 | "colab": {} 1053 | }, 1054 | "source": [ 1055 | "# We use the Python tempfile library to create files in a generated folder. \n", 1056 | "# If you want to used a defined path, replace this code with your own \n", 1057 | "# path definitions. \n", 1058 | "import tempfile\n", 1059 | "import os\n", 1060 | "\n", 1061 | "# get the tempfile location for this Colab session\n", 1062 | "MODEL_DIR = tempfile.gettempdir()\n", 1063 | "version = 1 # NOTE: Adjust if you don't want to replace a version of your model\n", 1064 | "export_path = os.path.join(MODEL_DIR, str(version)) # the final path includes the version\n", 1065 | "print('Saving model to : {}\\n'.format(export_path))\n", 1066 | "\n", 1067 | "# if path already exists delete everything at the location\n", 1068 | "if os.path.isdir(export_path):\n", 1069 | " print('\\nPreviously saved model found, deleting it\\n')\n", 1070 | " !rm -r {export_path}\n", 1071 | "\n", 1072 | "# Save the model \n", 1073 | "tf.saved_model.save(model, export_path)\n", 1074 | "\n", 1075 | "# Print save complete message\n", 1076 | "print('Model saved')\n" 1077 | ], 1078 | "execution_count": 0, 1079 | "outputs": [] 1080 | }, 1081 | { 1082 | "cell_type": "markdown", 1083 | "metadata": { 1084 | "id": "8YbP9MjFLkRJ", 1085 | "colab_type": "text" 1086 | }, 1087 | "source": [ 1088 | "\n", 1089 | "\n", 1090 | "# Deploying our Trained Model\n", 1091 | "\n", 1092 | "Once we are satisified with the model's performance we can deploy it so other programs can use it. This is a complex subject since the deployment strategies depend on how our model will be used.\n", 1093 | "\n", 1094 | "In this script we will show a very basic way of deploying the model. We will do just enough so we can make predictions with our trained model from Python.\n", 1095 | "\n" 1096 | ] 1097 | }, 1098 | { 1099 | "cell_type": "markdown", 1100 | "metadata": { 1101 | "id": "BjXJXg2dZKlZ", 1102 | "colab_type": "text" 1103 | }, 1104 | "source": [ 1105 | "## TensorFlow ModelServer\n", 1106 | "\n", 1107 | "We are going to use the Tensorflow ModelServer to serve our model. This install may only work in Colab. \n", 1108 | "\n", 1109 | "*The recommendation for general installation and usage of the server is to use a docker container. This is documented in the repo, https://github.com/tensorflow/serving/blob/master/tensorflow_serving/g3doc/setup.md*" 1110 | ] 1111 | }, 1112 | { 1113 | "cell_type": "markdown", 1114 | "metadata": { 1115 | "id": "BTWb0-KjnHSP", 1116 | "colab_type": "text" 1117 | }, 1118 | "source": [ 1119 | "## Add TensorFlow Serving as installable\n", 1120 | "\n", 1121 | "Before we can install the TensorFlow ModelServer we need to make it known to the APT installer so the installer know where to fetch the bits.\n", 1122 | "\n", 1123 | "We do this by adding a key in to the APT database refering to the tensorflow-serving files location." 1124 | ] 1125 | }, 1126 | { 1127 | "cell_type": "code", 1128 | "metadata": { 1129 | "id": "yadLd4SqnwMT", 1130 | "colab_type": "code", 1131 | "colab": {} 1132 | }, 1133 | "source": [ 1134 | "# add the key\n", 1135 | "!echo 'deb http://storage.googleapis.com/tensorflow-serving-apt stable tensorflow-model-server tensorflow-model-server-universal' | tee /etc/apt/sources.list.d/tensorflow-serving.list && \\\n", 1136 | "curl https://storage.googleapis.com/tensorflow-serving-apt/tensorflow-serving.release.pub.gpg | apt-key add -\n", 1137 | "# update the database with the new key\n", 1138 | "!apt update" 1139 | ], 1140 | "execution_count": 0, 1141 | "outputs": [] 1142 | }, 1143 | { 1144 | "cell_type": "markdown", 1145 | "metadata": { 1146 | "colab_type": "text", 1147 | "id": "W1ZVp_VOU7Wu" 1148 | }, 1149 | "source": [ 1150 | "### Install TensorFlow ModelServer\n", 1151 | "\n", 1152 | "We use apt-get to fetch and install TensorFlow ModelServer" 1153 | ] 1154 | }, 1155 | { 1156 | "cell_type": "code", 1157 | "metadata": { 1158 | "colab_type": "code", 1159 | "id": "ygwa9AgRloYy", 1160 | "colab": {} 1161 | }, 1162 | "source": [ 1163 | "!apt-get install tensorflow-model-server" 1164 | ], 1165 | "execution_count": 0, 1166 | "outputs": [] 1167 | }, 1168 | { 1169 | "cell_type": "markdown", 1170 | "metadata": { 1171 | "colab_type": "text", 1172 | "id": "k5NrYdQeVm52" 1173 | }, 1174 | "source": [ 1175 | "### Start TensorFlow ModelServer\n", 1176 | "\n", 1177 | "This is where we start TensorFlow ModelServer and load our model. After it loads we can start making inference requests using REST. There are some important parameters:\n", 1178 | "\n", 1179 | "* `rest_api_port`: The port that you'll use for REST requests.\n", 1180 | "* `model_name`: You'll use this in the URL of the REST requests. It can be anything.\n", 1181 | "* `model_base_path`: This is the path to the directory where you've saved your model.\n", 1182 | "\n", 1183 | "We need to define these as Python variables and as shell environment variables. This will make this information available in Python code and in \n", 1184 | "the shell where we will start the TensorFlow ModelServer process." 1185 | ] 1186 | }, 1187 | { 1188 | "cell_type": "code", 1189 | "metadata": { 1190 | "colab_type": "code", 1191 | "id": "aUgp3vUdU5GS", 1192 | "colab": {} 1193 | }, 1194 | "source": [ 1195 | "# Define the environment variable where our saved model resides\n", 1196 | "os.environ['MODEL_DIR'] = MODEL_DIR\n", 1197 | "# Define the Python constant and environment variable to point to the port number used to access our model\n", 1198 | "REST_PORT = '8501'\n", 1199 | "os.environ['REST_PORT'] = REST_PORT\n", 1200 | "# Define the Python constant and environment variable to point to the our model name.\n", 1201 | "# This is an abitrary name.\n", 1202 | "MODEL_NAME = 'fashion_mnist'\n", 1203 | "os.environ['MODEL_NAME'] = MODEL_NAME" 1204 | ], 1205 | "execution_count": 0, 1206 | "outputs": [] 1207 | }, 1208 | { 1209 | "cell_type": "code", 1210 | "metadata": { 1211 | "colab_type": "code", 1212 | "id": "kJDhHNJVnaLN", 1213 | "colab": {} 1214 | }, 1215 | "source": [ 1216 | "# Run the shell command to launch tensorflow_model_server. Output status and error\n", 1217 | "# messages to the file server.log\n", 1218 | "%%bash --bg \n", 1219 | "nohup tensorflow_model_server \\\n", 1220 | " --rest_api_port=\"${REST_PORT}\" \\\n", 1221 | " --model_name=\"${MODEL_NAME}\" \\\n", 1222 | " --model_base_path=\"${MODEL_DIR}\" >server.log 2>&1\n" 1223 | ], 1224 | "execution_count": 0, 1225 | "outputs": [] 1226 | }, 1227 | { 1228 | "cell_type": "code", 1229 | "metadata": { 1230 | "colab_type": "code", 1231 | "id": "IxbeiOCUUs2z", 1232 | "colab": {} 1233 | }, 1234 | "source": [ 1235 | "# Display the tail (last few lines) of the server log which will show any errors\n", 1236 | "!tail server.log" 1237 | ], 1238 | "execution_count": 0, 1239 | "outputs": [] 1240 | }, 1241 | { 1242 | "cell_type": "markdown", 1243 | "metadata": { 1244 | "colab_type": "text", 1245 | "id": "vwg1JKaGXWAg" 1246 | }, 1247 | "source": [ 1248 | "### Function to display image with user defined title" 1249 | ] 1250 | }, 1251 | { 1252 | "cell_type": "code", 1253 | "metadata": { 1254 | "colab_type": "code", 1255 | "id": "Luqm_Jyff9iR", 1256 | "colab": {} 1257 | }, 1258 | "source": [ 1259 | "def show_image(index, title, show_colorbar=False):\n", 1260 | " plt.figure()\n", 1261 | " plt.title('\\n\\n{}'.format(title), fontdict={'size': 16})\n", 1262 | " plt.imshow(test_images[index].reshape(28,28), cmap='gray') # data is grayscale, but displays in color without cmap='gray'\n", 1263 | " if (show_colorbar):\n", 1264 | " plt.colorbar()\n", 1265 | " plt.axis('off')\n", 1266 | " plt.show()\n", 1267 | " \n", 1268 | " \n" 1269 | ], 1270 | "execution_count": 0, 1271 | "outputs": [] 1272 | }, 1273 | { 1274 | "cell_type": "markdown", 1275 | "metadata": { 1276 | "colab_type": "text", 1277 | "id": "x-Cc6WAFf4eg" 1278 | }, 1279 | "source": [ 1280 | "## Make a REST request to predict class of our example image\n", 1281 | "\n", 1282 | "\n" 1283 | ] 1284 | }, 1285 | { 1286 | "cell_type": "code", 1287 | "metadata": { 1288 | "id": "ReGh9AElrE4_", 1289 | "colab_type": "code", 1290 | "colab": {} 1291 | }, 1292 | "source": [ 1293 | "!pip install -q requests # Install the requests library which makes HTTP requests to the TensorFlow server \n", 1294 | " # using our trained our model \n", 1295 | "import requests # import requests " 1296 | ], 1297 | "execution_count": 0, 1298 | "outputs": [] 1299 | }, 1300 | { 1301 | "cell_type": "code", 1302 | "metadata": { 1303 | "id": "3dJFAJ9LqXMk", 1304 | "colab_type": "code", 1305 | "colab": {} 1306 | }, 1307 | "source": [ 1308 | "# Select a random image to classify\n", 1309 | "import random\n", 1310 | "image_index = random.randint(0,len(test_images)-1)\n" 1311 | ], 1312 | "execution_count": 0, 1313 | "outputs": [] 1314 | }, 1315 | { 1316 | "cell_type": "code", 1317 | "metadata": { 1318 | "id": "pll06kNWsCBF", 1319 | "colab_type": "code", 1320 | "colab": {} 1321 | }, 1322 | "source": [ 1323 | " # Import json library and create the json data structure to be passed in the request.\n", 1324 | "import json\n", 1325 | "\n", 1326 | "# The data is assumed to be a list of images, a 4D tensor of the shape [*,28,28,1]. \n", 1327 | "# Convert the single 3d to 4d\n", 1328 | "check_images = np.reshape(test_images[image_index],(-1,28,28,1))\n", 1329 | "\n", 1330 | "# Construct json data passed to server. \"instances\" will hold the image(s) we \n", 1331 | "# want the model to classify\n", 1332 | "data = json.dumps({'signature_name': 'serving_default', 'instances': check_images.tolist()}) " 1333 | ], 1334 | "execution_count": 0, 1335 | "outputs": [] 1336 | }, 1337 | { 1338 | "cell_type": "code", 1339 | "metadata": { 1340 | "colab_type": "code", 1341 | "id": "gelYGr-s2RXw", 1342 | "colab": {} 1343 | }, 1344 | "source": [ 1345 | "# Create the request\n", 1346 | "\n", 1347 | "# HTTP request header\n", 1348 | "headers = {'content-type': 'application/json'}\n", 1349 | "\n", 1350 | "# Build the url to the service using the constants we defined earlier, should be of the form:\n", 1351 | "# http://localhost:8501/v1/models/fashion_mnist:predict\n", 1352 | "predict_service_url = 'http://localhost:' + REST_PORT + '/v1/models/' + MODEL_NAME + ':predict'\n", 1353 | "\n", 1354 | "# Make request to the service. Pass the headers and data. Wait for the server's reponse.\n", 1355 | "json_response = requests.post(predict_service_url, data=data, headers=headers)\n", 1356 | "\n", 1357 | "# Parse the response. For each images we pass we get a list of probabilities \n", 1358 | "predictions = json.loads(json_response.text)['predictions']\n", 1359 | "# predictions[0] is the 10 probabilites for our first and only image.\n", 1360 | "# predictions[0] has values in the form [0.1, 0.05, ...., 0.6]\n", 1361 | "\n", 1362 | "# The numpy argmax function returns the index of the highest value. This is \n", 1363 | "# the highest probabilty class predicted by our model. \n", 1364 | "predicted_class = np.argmax(predictions[0])\n", 1365 | "\n", 1366 | "# Display image, the actual class and the predicted class\n", 1367 | "show_image(image_index, 'Model predicted class: {} (class {}). \\n Actually class: {} (class {})'.format(\n", 1368 | " class_names[predicted_class], predicted_class, class_names[test_labels[image_index]], test_labels[image_index]))" 1369 | ], 1370 | "execution_count": 0, 1371 | "outputs": [] 1372 | } 1373 | ] 1374 | } -------------------------------------------------------------------------------- /House_price_regression.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "House_price_regression.ipynb", 7 | "provenance": [], 8 | "collapsed_sections": [] 9 | }, 10 | "kernelspec": { 11 | "name": "python3", 12 | "display_name": "Python 3" 13 | }, 14 | "accelerator": "GPU" 15 | }, 16 | "cells": [ 17 | { 18 | "cell_type": "markdown", 19 | "metadata": { 20 | "id": "gpzj90oD__lN", 21 | "colab_type": "text" 22 | }, 23 | "source": [ 24 | "# TensorFlow House Price Prediction\n", 25 | "\n", 26 | "This is a simple TensorFlow based notebook that applies the steps of the machine learning workflow to create, train, and test a model that will predict house prices.\n", 27 | "\n", 28 | "The notebook:\n", 29 | "\n", 30 | "* Uses the steps of the machine learning workflow discussed in the course\n", 31 | "* Illustrates how TensorFlow makes it easier to implement a machine learning solution\n", 32 | "* Describes what is happening in various processing steps\n", 33 | "\n" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": { 39 | "id": "HMyAUbOqOGgU", 40 | "colab_type": "text" 41 | }, 42 | "source": [ 43 | "## Load correct version of TensorFlow\n", 44 | "Before we use TensorFlow we must load the correct version. We want version 2.x." 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "metadata": { 50 | "id": "uwTnmk_IPQbz", 51 | "colab_type": "code", 52 | "colab": {} 53 | }, 54 | "source": [ 55 | "# Install TensorFlow using Colab's tensorflow_version command\n", 56 | "try:\n", 57 | " # %tensorflow_version only exists in Colab.\n", 58 | " %tensorflow_version 2.x\n", 59 | "except Exception:\n", 60 | " pass" 61 | ], 62 | "execution_count": 0, 63 | "outputs": [] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "metadata": { 68 | "id": "SaSlAAqoIiv6", 69 | "colab_type": "text" 70 | }, 71 | "source": [ 72 | "## Import Libraries\n", 73 | "\n", 74 | "We import TensorFlow, Numpy, and Matplotlib libraries. \n", 75 | "\n", 76 | "Numpy is a powerful n-dimensional array library that\n", 77 | "allows us to easily create and manipulate arrays of data, and more!\n", 78 | "\n", 79 | "Numpy also allows us to convert TensorFlow's native data structures,\n", 80 | "to Python native data types.\n", 81 | "\n", 82 | "Matplotlib is a graphics plot library" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "metadata": { 88 | "id": "qVJKEAqsIXEi", 89 | "colab_type": "code", 90 | "colab": {} 91 | }, 92 | "source": [ 93 | "# Import libraries\n", 94 | "import tensorflow as tf\n", 95 | "import numpy as np\n", 96 | "import matplotlib.pyplot as plt" 97 | ], 98 | "execution_count": 0, 99 | "outputs": [] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "metadata": { 104 | "id": "_vjZGJOwSxmR", 105 | "colab_type": "code", 106 | "colab": {} 107 | }, 108 | "source": [ 109 | "# Check TensorFlow version\n", 110 | "print(\"TensorFlow version:\", tf.__version__)" 111 | ], 112 | "execution_count": 0, 113 | "outputs": [] 114 | }, 115 | { 116 | "cell_type": "markdown", 117 | "metadata": { 118 | "id": "MHIiwqsORnEb", 119 | "colab_type": "text" 120 | }, 121 | "source": [ 122 | "## Problem Statement\n", 123 | "\n", 124 | "Using example data, develop a model that predicts house prices based on the size of a house.\n" 125 | ] 126 | }, 127 | { 128 | "cell_type": "markdown", 129 | "metadata": { 130 | "id": "I7iNMKHlJa-F", 131 | "colab_type": "text" 132 | }, 133 | "source": [ 134 | "## Get Data\n", 135 | "\n", 136 | "In this example we will be using a truncated version of the Ames dataset that only contains information on homes sold in May 2010.\n", 137 | "\n", 138 | "### About the Ames dataset\n", 139 | "\n", 140 | "The Ames dataset is a widely available dataset that has become one of the standard datasets used when\n", 141 | "predicting home prices based on features of the home. It is based on the great work of Dean De Cock. His rational\n", 142 | "and insight into this dataset can be found at http://jse.amstat.org/v19n3/decock.pdf.\n", 143 | "\n", 144 | "\n", 145 | "### Getting the truncated dataset we use\n", 146 | "This dataset can be found with the exercise files for this course. The filename is **AmesHousing-05-2010.csv** .\n", 147 | "\n", 148 | "Before you run the code below, ensure you have downloaded the file to your computer. \n", 149 | "\n", 150 | "And when prompted, browse to the file's location on your computer and upload the file.\n", 151 | "\n", 152 | "*Warning: The file upload function will only show the file selection dialog for 30 seconds. After that time it will close and cause an error because a file was not specified. This timeout prevents the file dialog from blocking events in Colab. If you experience this timeout select the dataset file sooner. It may help to move the dataset file to an easier to specify location such as your desktop.*\n", 153 | "\n" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "metadata": { 159 | "id": "UtsmZTmC6qcS", 160 | "colab_type": "code", 161 | "colab": {} 162 | }, 163 | "source": [ 164 | "# You need to upload the file, AmesHousing-05-2001.csv provided with this course\n", 165 | "from google.colab import files\n", 166 | "\n", 167 | "# only load the single file\n", 168 | "uploaded = files.upload()\n", 169 | "# csv_housefile contains the name of the first, and only file uploaded\n", 170 | "csv_housefile = next(iter(uploaded.keys()))\n", 171 | "\n", 172 | "print('User uploaded file \"{name}\" with length {length} bytes'.format(\n", 173 | " name=csv_housefile, length=len(uploaded[csv_housefile])))\n" 174 | ], 175 | "execution_count": 0, 176 | "outputs": [] 177 | }, 178 | { 179 | "cell_type": "markdown", 180 | "metadata": { 181 | "id": "-hiXAp_v6urP", 182 | "colab_type": "text" 183 | }, 184 | "source": [ 185 | "### Load the data into a pandas dataframe\n", 186 | "\n", 187 | "Pandas let's us easily review and manipulate the data. If you are new to pandas, see https://pandas.pydata.org/ and the pandas 10 minute intro http://pandas.pydata.org/pandas-docs/stable/getting_started/10min.html to understand how pandas, and pandas dataframes makes working with tabular data in Python easy." 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "metadata": { 193 | "id": "zveOHSXBItTU", 194 | "colab_type": "code", 195 | "colab": {} 196 | }, 197 | "source": [ 198 | "import pandas as pd\n", 199 | "\n", 200 | "# load the housing data into a pandas dataframe for easy viewing and manipulation\n", 201 | "df_housing = pd.read_csv(csv_housefile)" 202 | ], 203 | "execution_count": 0, 204 | "outputs": [] 205 | }, 206 | { 207 | "cell_type": "markdown", 208 | "metadata": { 209 | "id": "QC9Ygn0jynyW", 210 | "colab_type": "text" 211 | }, 212 | "source": [ 213 | "### Visualize the House Price data\n", 214 | "\n", 215 | "We use panda's head method to look at the first few rows of data. This will give us an understanding of the data we are working with.\n" 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "metadata": { 221 | "id": "YtdGnno4zBeT", 222 | "colab_type": "code", 223 | "colab": {} 224 | }, 225 | "source": [ 226 | "# Visualize the dataset\n", 227 | " \n", 228 | "# turn on option to display all columns, otherwise some columns may be hidden\n", 229 | "pd.set_option('display.max_columns', None) \n", 230 | "# print the column names and first 5 rows of data\n", 231 | "df_housing.head(5)" 232 | ], 233 | "execution_count": 0, 234 | "outputs": [] 235 | }, 236 | { 237 | "cell_type": "markdown", 238 | "metadata": { 239 | "id": "H8urfea9zAhH", 240 | "colab_type": "text" 241 | }, 242 | "source": [ 243 | "Lots of feature columns. From documentation of previous work with this data we can account for most of the difference in price (SalePrice) based on the size of the house. \n", 244 | "\n", 245 | "Looking through the columns we see the following features contains square feet ('SF'): BsmtFin SF 1, BsmtFin SF 2, Bsmt Unf SF, Total Bsmt SF, 1st Flr SF, 2nd Flr SF, Low Qual Fin SF. \n", 246 | "\n", 247 | "A little more checking of the data and we see in the data that there are summary columns for the basement square footage and the above basement square footage. Specifically the feature columns Total Bsmt SF and Gr Liv Area sum the space as follows:\n", 248 | "\n", 249 | "\n", 250 | "\n", 251 | "> Total Bsmt SF = BsmtFin SF 1 + BsmtFin SF 2 + Bsmt Unf SF \n", 252 | "\n", 253 | "> Gr Liv Area = 1st Flr SF + 2nd Flr SF\n", 254 | "\n", 255 | "But, there is no feature for the total space in the house which is the sum of Basement and Upper floors or (Total Bsmt SF + Gr Liv Area ). So let's add a new feature column containing this value called 'Total SF' to the dataset.\n", 256 | "\n", 257 | "But, before we proceed, let's see if the feature columns we are dependent upon contain missing values that will effect our results. If so, we can remove the rows with missing columns or determine a way of imputing (updating the values) the missing columns.\n" 258 | ] 259 | }, 260 | { 261 | "cell_type": "code", 262 | "metadata": { 263 | "id": "9gc0jzZYvLsl", 264 | "colab_type": "code", 265 | "colab": {} 266 | }, 267 | "source": [ 268 | " df_housing[['Total Bsmt SF', 'Gr Liv Area']].isnull().values.any()" 269 | ], 270 | "execution_count": 0, 271 | "outputs": [] 272 | }, 273 | { 274 | "cell_type": "markdown", 275 | "metadata": { 276 | "id": "oDHSGCWCw6KJ", 277 | "colab_type": "text" 278 | }, 279 | "source": [ 280 | "No missing values, so no additional work is required. So we can add the new column Total SF." 281 | ] 282 | }, 283 | { 284 | "cell_type": "code", 285 | "metadata": { 286 | "id": "Lh3dHm0U-aY9", 287 | "colab_type": "code", 288 | "colab": {} 289 | }, 290 | "source": [ 291 | "# Add new feature column Total SF = Total Bsmt SF + Gr Liv Area\n", 292 | "df_housing['Total SF'] = df_housing['Total Bsmt SF'] + df_housing['Gr Liv Area']\n" 293 | ], 294 | "execution_count": 0, 295 | "outputs": [] 296 | }, 297 | { 298 | "cell_type": "markdown", 299 | "metadata": { 300 | "id": "I8WXLZlB3l0N", 301 | "colab_type": "text" 302 | }, 303 | "source": [ 304 | "And we can check that the numbers add and see associated sale price " 305 | ] 306 | }, 307 | { 308 | "cell_type": "code", 309 | "metadata": { 310 | "id": "l4LQHNxWDBRc", 311 | "colab_type": "code", 312 | "colab": {} 313 | }, 314 | "source": [ 315 | "# see the basement and living area, total sf, and price columns\n", 316 | "print(df_housing[['Total Bsmt SF', 'Gr Liv Area', 'Total SF', 'SalePrice']].head(5) )" 317 | ], 318 | "execution_count": 0, 319 | "outputs": [] 320 | }, 321 | { 322 | "cell_type": "markdown", 323 | "metadata": { 324 | "id": "aORObOZrKIMA", 325 | "colab_type": "text" 326 | }, 327 | "source": [ 328 | "### Visualize the data. \n", 329 | "\n", 330 | "We visualize the data to give us some idea of the relationships between the various features. To do that we create a function to plot the data.\n", 331 | " \n", 332 | " \n", 333 | " \n" 334 | ] 335 | }, 336 | { 337 | "cell_type": "code", 338 | "metadata": { 339 | "id": "E7hTDDs6qw0R", 340 | "colab_type": "code", 341 | "colab": {} 342 | }, 343 | "source": [ 344 | "# This function visualizes our data and optionally a learned line\n", 345 | "def visualize_data(x_vals, y_vals,\n", 346 | " addn_x_vals=None, addn_y_vals=None, add_addn_reg_line=False):\n", 347 | " \n", 348 | " f, ax = plt.subplots(figsize=(8,8))\n", 349 | " plt.plot(x_vals, y_vals, 'ro') # red dot for each data point\n", 350 | " # Optionally plot another set of data points in a different color and symbol\n", 351 | " if (addn_x_vals is not None):\n", 352 | " plt.plot(addn_x_vals, addn_y_vals, 'g^') # green triangles for additional data points\n", 353 | " # Optionally, plot a regression line.\n", 354 | " if (add_addn_reg_line):\n", 355 | " x_min_index = addn_x_vals.argmin()\n", 356 | " x_max_index = addn_x_vals.argmax()\n", 357 | " print(x_min_index,[addn_x_vals[x_min_index],addn_y_vals[x_min_index]] ) \n", 358 | " print(x_max_index,[addn_x_vals[x_max_index],addn_y_vals[x_max_index]] ) \n", 359 | " plt.plot([addn_x_vals[x_min_index],addn_y_vals[x_min_index]], \n", 360 | " [addn_x_vals[x_max_index],addn_y_vals[x_max_index]], \n", 361 | " 'b-') # draw a blue regression line\n", 362 | " \n", 363 | " plt.tick_params(axis='both', which='major', labelsize=14)\n", 364 | " \n", 365 | " plt.show() # now plot the line showing the data and the optional line" 366 | ], 367 | "execution_count": 0, 368 | "outputs": [] 369 | }, 370 | { 371 | "cell_type": "markdown", 372 | "metadata": { 373 | "id": "7d887fwIbFP1", 374 | "colab_type": "text" 375 | }, 376 | "source": [ 377 | "### Visualizing Total SF and Price\n", 378 | "\n", 379 | "Using the visualize_data function we can see the relationship between Total Square Feet (Total SF) and Price." 380 | ] 381 | }, 382 | { 383 | "cell_type": "code", 384 | "metadata": { 385 | "id": "9PyYXnSZ9v9R", 386 | "colab_type": "code", 387 | "colab": {} 388 | }, 389 | "source": [ 390 | "# Plot Total SF vs. Price\n", 391 | "visualize_data(df_housing['Total SF'], df_housing['SalePrice'])\n", 392 | "\n" 393 | ], 394 | "execution_count": 0, 395 | "outputs": [] 396 | }, 397 | { 398 | "cell_type": "markdown", 399 | "metadata": { 400 | "id": "eJjuAi3sb76r", 401 | "colab_type": "text" 402 | }, 403 | "source": [ 404 | "### What does the visualization tell us?\n", 405 | "\n", 406 | "There seems to be a **linear** relationship between the size of the house (as shown in Total SF) and Price. \n", 407 | "\n", 408 | "This suggests that we should perform Linear Regression. Where the relationship in the data is:\n", 409 | " y = mx + b. \n", 410 | " Or in our case, Price = m * Total SF + b\n", 411 | " \n", 412 | "With respect to defining the appropriate equation, we can see the difference between traditional programming and Machine Learning as:\n", 413 | "\n", 414 | "* In tradition programming **we define what the equation** is through structures such as conditional statements\n", 415 | "\n", 416 | "* In Machine Learning, our model **learns from the data** what the appropriate equation is." 417 | ] 418 | }, 419 | { 420 | "cell_type": "markdown", 421 | "metadata": { 422 | "id": "kWm_EBhwfTvX", 423 | "colab_type": "text" 424 | }, 425 | "source": [ 426 | "## Prepare Data\n", 427 | "\n", 428 | "If values are on very different scales it will be difficult for the model to determine the relationships between features. With our data, Square Footage (SF) ranges from 800-4200, and Prices range from 80,000 to 400,0000. This means there is a nearly 100 times difference in scale. When we normalize we will reduce both qualtities to the same scale while preserving the differences between prices and sizes of homes. This will help our model learn the relationship between price and size." 429 | ] 430 | }, 431 | { 432 | "cell_type": "code", 433 | "metadata": { 434 | "id": "5n-zGuMN6np6", 435 | "colab_type": "code", 436 | "colab": {} 437 | }, 438 | "source": [ 439 | "# Scale data so SF and Sale Price are on similar scales with values \n", 440 | "# from 0.0 to 1.0\n", 441 | "\n", 442 | "from sklearn.preprocessing import MinMaxScaler\n", 443 | "\n", 444 | "sf_scaler = MinMaxScaler()\n", 445 | "sf_scaled = sf_scaler.fit_transform(df_housing['Total SF'].values.reshape(-1,1).astype(np.float64))\n", 446 | " \n", 447 | "price_scaler = MinMaxScaler()\n", 448 | "price_scaled = price_scaler.fit_transform(df_housing['SalePrice'].values.reshape(-1,1).astype(np.float64))" 449 | ], 450 | "execution_count": 0, 451 | "outputs": [] 452 | }, 453 | { 454 | "cell_type": "markdown", 455 | "metadata": { 456 | "id": "2VSO8dVIW5Fb", 457 | "colab_type": "text" 458 | }, 459 | "source": [ 460 | "## Create Model\n", 461 | "\n", 462 | "Here we create a model to learn from our data. Our data looks linearly related, so we can use a straight line\n", 463 | "to fit our data. So we are going to create a model that is based on the equation of a straight line.\n", 464 | "\n", 465 | "From our training data we will learn that once we can define this line's parameters we can use it's equation to predict the value of any future data.\n", 466 | "\n", 467 | "We use TensorFlow's Keras library to make the process easy.\n", 468 | "\n", 469 | "We create a sequential model where the output of one layer becomes the input of the next layer.\n", 470 | "\n", 471 | "This model is a simple Neural Network that implements y = wx + bias, where **w is a weight** and **bias is an offset**.\n", 472 | "\n", 473 | "*You may recognize from Algebra or Geometry that y = wx + bias is the equation of straight line, where w is the slope and bias is the offset. (See https://en.wikipedia.org/wiki/Line_(geometry) for a description.)*\n", 474 | "\n", 475 | "Therefore the goal is to use our data to train the model and learn the best values for w and bias.\n" 476 | ] 477 | }, 478 | { 479 | "cell_type": "markdown", 480 | "metadata": { 481 | "id": "o30MyqTYepiV", 482 | "colab_type": "text" 483 | }, 484 | "source": [ 485 | "### Our Model\n", 486 | "\n", 487 | "We create our linear model using TensorFlow's Keras library. Using Keras makes it easy for us to create, train, and evaluate our model.\n", 488 | "\n" 489 | ] 490 | }, 491 | { 492 | "cell_type": "code", 493 | "metadata": { 494 | "id": "SgcsUBmf_guO", 495 | "colab_type": "code", 496 | "colab": {} 497 | }, 498 | "source": [ 499 | "# Create model using the TensorFlow Keras library\n", 500 | "model = tf.keras.Sequential()\n", 501 | "model.add(tf.keras.layers.Dense(units=1, activation='linear', input_shape=(1,),\n", 502 | " kernel_initializer='random_uniform',\n", 503 | " bias_initializer='zeros'))\n" 504 | ], 505 | "execution_count": 0, 506 | "outputs": [] 507 | }, 508 | { 509 | "cell_type": "markdown", 510 | "metadata": { 511 | "id": "LH040S8DdMTS", 512 | "colab_type": "text" 513 | }, 514 | "source": [ 515 | "### Compile the model\n", 516 | "\n", 517 | "The model is now defined, but is not trained, or even ready to be trained.\n", 518 | "\n", 519 | "We will train the model by passing training data throught it and adjusting the weight and bias to reduce loss (error). To perform these tasks we need a method for \n", 520 | "* measuring loss and \n", 521 | "* optimizing the values of the weight and bias to minimize this loss. \n", 522 | "\n", 523 | "There are many ways of measuring loss and optimizing the values. We are going to use **Mean-Squared Error** ('mean_squared_error') to measure loss, and the process of **Stochasitic Gradient Descent** ('sgd') to find the optimal weight and bias to minimize the loss. *Note: We use the variant of sgd called mini-batch gradient descent. And set the batch size in the fit method below.*\n", 524 | "\n", 525 | "We set these parameters for the model with the **compile** statement. " 526 | ] 527 | }, 528 | { 529 | "cell_type": "code", 530 | "metadata": { 531 | "id": "KMYudNJpY9dY", 532 | "colab_type": "code", 533 | "colab": {} 534 | }, 535 | "source": [ 536 | "# Compile model\n", 537 | "optimizer = \"sgd\"\n", 538 | "model.compile(loss='mean_squared_error', optimizer=optimizer )\n" 539 | ], 540 | "execution_count": 0, 541 | "outputs": [] 542 | }, 543 | { 544 | "cell_type": "markdown", 545 | "metadata": { 546 | "id": "O8a2y8xinVmf", 547 | "colab_type": "text" 548 | }, 549 | "source": [ 550 | "## Train the Model\n", 551 | "\n", 552 | "Once the model has been defined and compiled we can train the model. We do this with training data. \n", 553 | "\n", 554 | "We want to split our prepared dataset into 2 datasets. One dataset will be used for training, and the other dataset will used for testing. **We never used testing data for training or training data for testing.**" 555 | ] 556 | }, 557 | { 558 | "cell_type": "code", 559 | "metadata": { 560 | "id": "EeE7HxU_l0MP", 561 | "colab_type": "code", 562 | "colab": {} 563 | }, 564 | "source": [ 565 | "# Split the dataset into training dataset - 70%, Testing dataset - 30%\n", 566 | "# we do this using the sklearn train_test_split method\n", 567 | "from sklearn.model_selection import train_test_split\n", 568 | "\n", 569 | "sf_train_scaled, sf_test_scaled, price_train_scaled, price_test_scaled = train_test_split(sf_scaled, \n", 570 | " price_scaled, \n", 571 | " test_size=0.3, random_state=42)" 572 | ], 573 | "execution_count": 0, 574 | "outputs": [] 575 | }, 576 | { 577 | "cell_type": "markdown", 578 | "metadata": { 579 | "id": "RJPTS2h1yGD3", 580 | "colab_type": "text" 581 | }, 582 | "source": [ 583 | "We pass the training data through the model multiple times. On each pass, the loss function (that we defined in the model compile) will be used to calculate loss. And the optimizer will be used to make adjustments to the weights and bias (just called weights from here on) to minimize this error. \n", 584 | "\n", 585 | "We repeat this process of calculating the loss with the current weights and updating the weights to minimize loss for the specified number of **epochs**. *We should see the loss decrease over time.*\n", 586 | "\n", 587 | "We can set the *verbose* flag to tell Keras how much information to show during the training process, 0 is none, 1 and 2 show more in-progress information." 588 | ] 589 | }, 590 | { 591 | "cell_type": "code", 592 | "metadata": { 593 | "id": "HOCRKtgMnSLk", 594 | "colab_type": "code", 595 | "colab": {} 596 | }, 597 | "source": [ 598 | "\n", 599 | "# Train model using data\n", 600 | "initial_epochs = 8\n", 601 | "batch_size = 10\n", 602 | "train_hist = model.fit(sf_train_scaled, price_train_scaled, \n", 603 | " epochs=initial_epochs, batch_size=batch_size, verbose=1)\n", 604 | "\n", 605 | "# Is 8 epochs enough??? Maybe/Maybe not" 606 | ], 607 | "execution_count": 0, 608 | "outputs": [] 609 | }, 610 | { 611 | "cell_type": "markdown", 612 | "metadata": { 613 | "id": "NXB_5zlkvEpt", 614 | "colab_type": "text" 615 | }, 616 | "source": [ 617 | "### Visually Confirm Model Training\n", 618 | "\n", 619 | "Our model learned the best weight and bias for our training data that it could through the defined number of epochs. Let's see if a line drawn with the learned weight and bias looks reasonable." 620 | ] 621 | }, 622 | { 623 | "cell_type": "code", 624 | "metadata": { 625 | "id": "1KPJ8zLCJYw1", 626 | "colab_type": "code", 627 | "colab": {} 628 | }, 629 | "source": [ 630 | "\n", 631 | "# predict the price with our trained model\n", 632 | "price_predicted_scaled = model.predict(sf_train_scaled)\n", 633 | "visualize_data(sf_scaler.inverse_transform(sf_train_scaled), price_scaler.inverse_transform(price_train_scaled),\n", 634 | " sf_scaler.inverse_transform(sf_train_scaled), price_scaler.inverse_transform(price_predicted_scaled),\n", 635 | " add_addn_reg_line=False)\n", 636 | " \n", 637 | "# If line looks bad, what do we do?" 638 | ], 639 | "execution_count": 0, 640 | "outputs": [] 641 | }, 642 | { 643 | "cell_type": "markdown", 644 | "metadata": { 645 | "id": "vP7E7zNb71zw", 646 | "colab_type": "text" 647 | }, 648 | "source": [ 649 | "### Evaluate Training\n", 650 | "\n", 651 | "\n", 652 | "Our trained model does not looks too good !!!\n", 653 | "\n", 654 | "What can we do? \n", 655 | "\n", 656 | "Should we change model architecture? Does linear look incorrect? \n", 657 | "\n", 658 | "> NO\n", 659 | "\n", 660 | "\n", 661 | "What does loss look like?\n" 662 | ] 663 | }, 664 | { 665 | "cell_type": "code", 666 | "metadata": { 667 | "id": "bVoztVaenFsr", 668 | "colab_type": "code", 669 | "colab": {} 670 | }, 671 | "source": [ 672 | "def plot_loss(hist):\n", 673 | " # plot the loss\n", 674 | " plt.title('Loss History')\n", 675 | " plt.plot(hist.history['loss'])\n", 676 | " plt.ylabel('loss')\n", 677 | " plt.xlabel('epoch')\n", 678 | " plt.show()\n", 679 | "\n", 680 | "print(type(train_hist))\n", 681 | "plot_loss(train_hist)" 682 | ], 683 | "execution_count": 0, 684 | "outputs": [] 685 | }, 686 | { 687 | "cell_type": "markdown", 688 | "metadata": { 689 | "id": "bWEO8ONjm9gm", 690 | "colab_type": "text" 691 | }, 692 | "source": [ 693 | "\n", 694 | "\n", 695 | "> LOSS STILL LOOKS HIGH. BUT IS DECREASING.\n", 696 | "\n", 697 | "\n", 698 | "\n", 699 | "Perhaps we have not converged to the lowest loss. What can we do???? \n", 700 | "\n", 701 | "Train with more data? \n", 702 | "\n", 703 | "> NO. WE HAVE ALL THE DATA\n", 704 | "\n", 705 | "Train for more epochs and check loss to ensure values have converged?\n", 706 | " > YES!!!" 707 | ] 708 | }, 709 | { 710 | "cell_type": "markdown", 711 | "metadata": { 712 | "id": "Nez9pMAyhNll", 713 | "colab_type": "text" 714 | }, 715 | "source": [ 716 | "### Training more epochs\n", 717 | "\n", 718 | "The model is partially trained but the results are not great. Let's go back and train the model some more. \n", 719 | "\n", 720 | "The question is how much more? \n", 721 | "\n", 722 | "We want to ensure the model is converged to a range of loss values. So what we will see in the training output is the loss value starts to stay in a specific range . It may go up or down a little, but there should be a stable range." 723 | ] 724 | }, 725 | { 726 | "cell_type": "code", 727 | "metadata": { 728 | "id": "h2azhJj0hd7t", 729 | "colab_type": "code", 730 | "colab": {} 731 | }, 732 | "source": [ 733 | "# Train model using data. This time, train for additional epochs\n", 734 | "# Also capture the changes over time in the history variable, train_hist_addn so we \n", 735 | "# can see if loss is converging\n", 736 | "addn_epochs = 1200\n", 737 | "train_hist_addn =model.fit(sf_train_scaled, price_train_scaled, \n", 738 | " epochs=addn_epochs, verbose=1)" 739 | ], 740 | "execution_count": 0, 741 | "outputs": [] 742 | }, 743 | { 744 | "cell_type": "markdown", 745 | "metadata": { 746 | "id": "KRhVDSCDj_pu", 747 | "colab_type": "text" 748 | }, 749 | "source": [ 750 | "Plot the loss history values to see if our model converged.\n", 751 | "\n", 752 | "Because a model can have a history of many different metrics over time, there is a history object which has a keys dictionary of \n", 753 | "metrics whose values were stored on each epoch. By plotting the values for the 'loss' key we can answer:\n", 754 | "\n", 755 | "\n", 756 | "1. Did the loss converge?\n", 757 | "2. How many epochs were required for the loss to converge?\n", 758 | "\n" 759 | ] 760 | }, 761 | { 762 | "cell_type": "code", 763 | "metadata": { 764 | "id": "OEXR-tQpjmJw", 765 | "colab_type": "code", 766 | "colab": {} 767 | }, 768 | "source": [ 769 | "# add the addition epochs to the training history\n", 770 | "train_hist.history['loss'].extend(train_hist_addn.history['loss']) \n", 771 | "plot_loss(train_hist)" 772 | ], 773 | "execution_count": 0, 774 | "outputs": [] 775 | }, 776 | { 777 | "cell_type": "markdown", 778 | "metadata": { 779 | "id": "XoTv9B0Inx19", 780 | "colab_type": "text" 781 | }, 782 | "source": [ 783 | "### Visually Confirm Model after Additional Training\n", 784 | "\n", 785 | "As the plot showed, the model's loss converged by 1000 epochs. So the model should be trained to allow us to make accurate predictions. \n", 786 | "\n", 787 | "Let's again plot the predicted price for each size. And see if a line drawn with the learned weight and bias looks reasonable.\n" 788 | ] 789 | }, 790 | { 791 | "cell_type": "code", 792 | "metadata": { 793 | "id": "lUntqHjoko-n", 794 | "colab_type": "code", 795 | "colab": {} 796 | }, 797 | "source": [ 798 | "# Visualize data and regression line for learned weights\n", 799 | "price_predicted_scaled = model.predict(sf_train_scaled)\n", 800 | "visualize_data(sf_scaler.inverse_transform(sf_train_scaled), \n", 801 | " price_scaler.inverse_transform(price_train_scaled),\n", 802 | " sf_scaler.inverse_transform(sf_train_scaled), \n", 803 | " price_scaler.inverse_transform(price_predicted_scaled),\n", 804 | " add_addn_reg_line=False)\n" 805 | ], 806 | "execution_count": 0, 807 | "outputs": [] 808 | }, 809 | { 810 | "cell_type": "markdown", 811 | "metadata": { 812 | "id": "YrrEbGQOvUrv", 813 | "colab_type": "text" 814 | }, 815 | "source": [ 816 | "**Yes. The predicted line looks good!!!**" 817 | ] 818 | }, 819 | { 820 | "cell_type": "markdown", 821 | "metadata": { 822 | "id": "fZmXIWpZXmwe", 823 | "colab_type": "text" 824 | }, 825 | "source": [ 826 | "## Evaluate Trained Model\n", 827 | "\n", 828 | "Once we have the model trained, we want to test **with data not used to train the model!!!**\n", 829 | "\n", 830 | "The use of data not used in training to test the model is key. If you use training data our testing results will be overly accurate and will not reflect *real-life* usage of the trained model to make predictions." 831 | ] 832 | }, 833 | { 834 | "cell_type": "code", 835 | "metadata": { 836 | "id": "XjV3pUQ4bP2t", 837 | "colab_type": "code", 838 | "colab": {} 839 | }, 840 | "source": [ 841 | "# Make a price prediction on data the model has never seen before, i.e. \n", 842 | "# the Test data square footage\n", 843 | "price_test_pred_scaled = model.predict(sf_test_scaled)\n", 844 | "\n", 845 | "# calculate the mean squared error for the prediction, lower is better\n", 846 | "from sklearn.metrics import mean_squared_error\n", 847 | "print(\"prediction mean squared error:\", \n", 848 | " mean_squared_error(price_test_scaled, price_test_pred_scaled ))\n", 849 | "\n", 850 | "# Visualize data\n", 851 | "# plus the test square footage and predicted price\n", 852 | "print('\\nGreen triangles are Test square footage with predicted Price')\n", 853 | "visualize_data(sf_scaler.inverse_transform(sf_test_scaled),\n", 854 | " price_scaler.inverse_transform(price_test_scaled),\n", 855 | " sf_scaler.inverse_transform(sf_test_scaled), \n", 856 | " price_scaler.inverse_transform(price_test_pred_scaled),\n", 857 | " add_addn_reg_line=False)\n", 858 | "print('\\nOur predicted values fit the data well!')" 859 | ], 860 | "execution_count": 0, 861 | "outputs": [] 862 | }, 863 | { 864 | "cell_type": "markdown", 865 | "metadata": { 866 | "id": "r-otP7n7v4QJ", 867 | "colab_type": "text" 868 | }, 869 | "source": [ 870 | "## What we have learned\n", 871 | "\n", 872 | "\n", 873 | "\n", 874 | "* In Machine Learning we often spend considerable effort getting data in a form that our models can learn from the data.\n", 875 | "* A single neuron model can learn the appropriate slope and offset for the data. \n", 876 | "* Building this model was made easier by TensorFlow's implementation of the Keras library.\n", 877 | "* But to learn from the data we need to pass the data through the model enough times to get the loss to converge.\n", 878 | "* When we do that, the model learns the data's slope and offset more accurately.\n", 879 | "* And when we apply these learned values, the predict prices from the test data's square footage are reasonable. \n", 880 | "\n" 881 | ] 882 | } 883 | ] 884 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # First-NN-Tensorflow 2 | Files for the course "Build, Train, and Deploy your First Neural Network in TensorFlow" 3 | --------------------------------------------------------------------------------