├── Chapter 07
├── Data
│ └── Image.jpg
├── 1Self Organizing Map - Visualisations of heatmaps.r
└── 2Vector Quantization - Image Clustering.r
├── Chapter 14
├── Data
│ └── DT_4_ind
└── Case Study 3 - Forecast of Electricity Consumption.r
├── Chapter 02
├── 4Poisson Regression.r
├── Data
│ ├── brine.txt
│ ├── gala.txt
│ ├── tobit.csv
│ └── hsbdemo.csv
├── 2Multinomial Logistic Regression.r
├── 3Tobit Regression.r
└── 1Discriminant Function Analysis script.r
├── Chapter 13
├── Data
│ └── publicdatamay2007.xls
└── Case Study 2 - Pricing Reinsurance Contracts.r
├── Chapter 09
├── 2Hidden Markov Models for Regime Detection.r
└── 1HMM - EUR & USD.r
├── Chapter 08
├── 2Markov Chains - Multi-Channel Attribution Model.r
├── 3Markov Chains - Car Rental Agency Service.r
├── 1Markov Chains - Stocks Regime Switching Model.r
├── Data
│ └── StocksRegimeSwitching.csv
├── 4Continuous Time Markov Chains - Vehicle Service at Gas Station.r
└── 5Monte Carlo Simulations - Calibrated Hull and White short-rates.r
├── .gitattributes
├── Chapter 03
├── Data
│ ├── foodstuffs.txt
│ ├── GSE4051_design.csv
│ ├── Europenaprotein.csv
│ ├── NASAUnderstory.csv
│ ├── WBClust2013.csv
│ └── math test.txt
├── 6K-means clustering - Foodstuff.r
├── 5K-means clustering - European Protein Consumption.r
├── 4Binary Clustering - Math test.r
├── 2Hierarchical Clustering - NASA Under story.r
├── 3Hierarchical Clustering - Gene Clustering.r
└── 1Hierarchical Clustering - World Bank.r
├── .gitignore
├── LICENSE
├── Chapter 06
├── 1Decision tree learning - Advance Health Directive for Patients with Chest Pain.r
├── 2Decision tree learning - Income Based Distribution of Real Estate Value.r
├── 6Support Vector Machine - Currency Trading Strategy.r
├── 4Naive Bayes - Predicting the Direction of Stock Movement.r
├── 3Decision tree learning - Predicting the Direction of Stock Movement.r
├── 5Random Forest - Currency Trading Strategy.r
└── 7Stochastic Gradient Descent - Adult Income.r
├── Chapter 04
├── Data
│ ├── Calories_Burnt.csv
│ └── delta.csv
├── 3Principal Component Analysis - Understanding world cuisine.r
├── 2Dimension Reduction Methods - Delta's aircraft fleet.r
└── 1Shrinkage Methods - Calories burnt per day.r
├── Chapter 11
└── 1Recurrent Neural Networks - Predicting periodic signal.r
├── Chapter 10
├── 1Modelling S&P 500.r
├── 2Measuring Unemployment Rate.r
└── Data
│ └── FRED-WIUR.csv
├── README.md
├── Chapter 05
├── 2Smooth Splines.r
├── 1Generalized additive model - Measuring household income of New Zealand.r
└── 3Loess - United States Geological Survey.r
└── Chapter 12
└── Case Study 1 - World Bank data Analysis.r
/Chapter 07/Data/Image.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Practical-Machine-Learning-Cookbook/HEAD/Chapter 07/Data/Image.jpg
--------------------------------------------------------------------------------
/Chapter 14/Data/DT_4_ind:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Practical-Machine-Learning-Cookbook/HEAD/Chapter 14/Data/DT_4_ind
--------------------------------------------------------------------------------
/Chapter 02/4Poisson Regression.r:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Practical-Machine-Learning-Cookbook/HEAD/Chapter 02/4Poisson Regression.r
--------------------------------------------------------------------------------
/Chapter 13/Data/publicdatamay2007.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Practical-Machine-Learning-Cookbook/HEAD/Chapter 13/Data/publicdatamay2007.xls
--------------------------------------------------------------------------------
/Chapter 09/2Hidden Markov Models for Regime Detection.r:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Practical-Machine-Learning-Cookbook/HEAD/Chapter 09/2Hidden Markov Models for Regime Detection.r
--------------------------------------------------------------------------------
/Chapter 08/2Markov Chains - Multi-Channel Attribution Model.r:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Practical-Machine-Learning-Cookbook/HEAD/Chapter 08/2Markov Chains - Multi-Channel Attribution Model.r
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 |
4 | # Custom for Visual Studio
5 | *.cs diff=csharp
6 |
7 | # Standard to msysgit
8 | *.doc diff=astextplain
9 | *.DOC diff=astextplain
10 | *.docx diff=astextplain
11 | *.DOCX diff=astextplain
12 | *.dot diff=astextplain
13 | *.DOT diff=astextplain
14 | *.pdf diff=astextplain
15 | *.PDF diff=astextplain
16 | *.rtf diff=astextplain
17 | *.RTF diff=astextplain
18 |
--------------------------------------------------------------------------------
/Chapter 03/Data/foodstuffs.txt:
--------------------------------------------------------------------------------
1 | Food Energy Protein Fat Calcium Iron
2 | BB 340 20 28 9 2.6
3 | HR 245 21 17 9 2.7
4 | BR 420 15 39 7 2.0
5 | BS 375 19 32 9 2.5
6 | BC 180 22 10 17 3.7
7 | CB 115 20 3 8 1.4
8 | CC 170 25 7 12 1.5
9 | BH 160 26 5 14 5.9
10 | LL 265 20 20 9 2.6
11 | LS 300 18 25 9 2.3
12 | HS 340 20 28 9 2.5
13 | PR 340 19 29 9 2.5
14 | PS 355 19 30 9 2.4
15 | BT 205 18 14 7 2.5
16 | VC 185 23 9 9 2.7
17 | FB 135 22 4 25 0.6
18 | AR 70 11 1 82 6.0
19 | AC 45 7 1 74 5.4
20 | TC 90 14 2 38 0.8
21 | HF 135 16 5 15 0.5
22 | MB 200 19 13 5 1.0
23 | MC 155 16 9 157 1.8
24 | PF 195 16 11 14 1.3
25 | SC 120 17 5 159 0.7
26 | DC 180 22 9 367 2.5
27 | UC 170 25 7 7 1.2
28 | RC 110 23 1 98 2.6
--------------------------------------------------------------------------------
/Chapter 02/Data/brine.txt:
--------------------------------------------------------------------------------
1 | ,HCO3,SO4,Cl,Ca,Mg,Na,GROUP
2 | 1,10.4,30.0,967.1,95.9,53.7,857.7,1
3 | 2,6.2,29.6,1174.9,111.7,43.9,1054.7,1
4 | 3,2.1,11.4,2387.1,348.3,119.3,1932.4,1
5 | 4,8.5,22.5,2186.1,339.6,73.6,1803.4,1
6 | 5,6.7,32.8,2015.5,287.6,75.1,1691.8,1
7 | 6,3.8,18.9,2175.8,340.4,63.8,1793.9,1
8 | 7,1.5,16.5,2367.0,412.0,95.8,1872.5,1
9 | 8,25.6,0,134.7,12.7,7.1,134.7,2
10 | 9,12.0,104.6,3163.8,95.6,90.1,3093.9,2
11 | 10,9.0,104.0,1342.6,104.9,160.2,1190.1,2
12 | 11,13.7,103.3,2151.6,103.7,70.0,2054.6,2
13 | 12,16.6,92.3,905.1,91.5,50.9,871.4,2
14 | 13,14.1,80.1,554.8,118.9,62.3,472.4,2
15 | 14,1.3,10.4,3399.5,532.3,235.6,2642.5,3
16 | 15,3.6,5.2,974.5,147.5,69.0,768.1,3
17 | 16,0.8,9.8,1430.2,295.7,118.4,1027.1,3
18 | 17,1.8,25.6,183.2,35.4,13.5,161.5,3
19 | 18,8.8,3.4,289.9,32.8,22.4,225.2,3
20 | 19,6.3,16.7,360.9,41.9,24.0,318.1,3
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Windows image file caches
2 | Thumbs.db
3 | ehthumbs.db
4 |
5 | # Folder config file
6 | Desktop.ini
7 |
8 | # Recycle Bin used on file shares
9 | $RECYCLE.BIN/
10 |
11 | # Windows Installer files
12 | *.cab
13 | *.msi
14 | *.msm
15 | *.msp
16 |
17 | # Windows shortcuts
18 | *.lnk
19 |
20 | # =========================
21 | # Operating System Files
22 | # =========================
23 |
24 | # OSX
25 | # =========================
26 |
27 | .DS_Store
28 | .AppleDouble
29 | .LSOverride
30 |
31 | # Thumbnails
32 | ._*
33 |
34 | # Files that might appear in the root of a volume
35 | .DocumentRevisions-V100
36 | .fseventsd
37 | .Spotlight-V100
38 | .TemporaryItems
39 | .Trashes
40 | .VolumeIcon.icns
41 |
42 | # Directories potentially created on remote AFP share
43 | .AppleDB
44 | .AppleDesktop
45 | Network Trash Folder
46 | Temporary Items
47 | .apdisk
48 |
--------------------------------------------------------------------------------
/Chapter 03/Data/GSE4051_design.csv:
--------------------------------------------------------------------------------
1 | sidChar,sidNum,devStage,gType
2 | Sample_20,20,E16,wt
3 | Sample_21,21,E16,wt
4 | Sample_22,22,E16,wt
5 | Sample_23,23,E16,wt
6 | Sample_16,16,E16,NrlKO
7 | Sample_17,17,E16,NrlKO
8 | Sample_6,6,E16,NrlKO
9 | Sample_24,24,P2,wt
10 | Sample_25,25,P2,wt
11 | Sample_26,26,P2,wt
12 | Sample_27,27,P2,wt
13 | Sample_14,14,P2,NrlKO
14 | Sample_3,3,P2,NrlKO
15 | Sample_5,5,P2,NrlKO
16 | Sample_8,8,P2,NrlKO
17 | Sample_28,28,P6,wt
18 | Sample_29,29,P6,wt
19 | Sample_30,30,P6,wt
20 | Sample_31,31,P6,wt
21 | Sample_1,1,P6,NrlKO
22 | Sample_10,10,P6,NrlKO
23 | Sample_4,4,P6,NrlKO
24 | Sample_7,7,P6,NrlKO
25 | Sample_32,32,P10,wt
26 | Sample_33,33,P10,wt
27 | Sample_34,34,P10,wt
28 | Sample_35,35,P10,wt
29 | Sample_13,13,P10,NrlKO
30 | Sample_15,15,P10,NrlKO
31 | Sample_18,18,P10,NrlKO
32 | Sample_19,19,P10,NrlKO
33 | Sample_36,36,4_weeks,wt
34 | Sample_37,37,4_weeks,wt
35 | Sample_38,38,4_weeks,wt
36 | Sample_39,39,4_weeks,wt
37 | Sample_11,11,4_weeks,NrlKO
38 | Sample_12,12,4_weeks,NrlKO
39 | Sample_2,2,4_weeks,NrlKO
40 | Sample_9,9,4_weeks,NrlKO
41 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2017 Packt
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/Chapter 02/2Multinomial Logistic Regression.r:
--------------------------------------------------------------------------------
1 | require(foreign)
2 | require(nnet)
3 | require(ggplot2)
4 | require(reshape2)
5 |
6 | ml <- read.table("d:/hsbdemo.csv", header=TRUE, sep=",", row.names="id")
7 | with(ml, table(ses, prog))
8 |
9 |
10 | with(ml, do.call(rbind, tapply(write, prog,
11 | function(x) c(M = mean(x), SD = sd(x)))))
12 |
13 | ml$prog2 <- relevel(ml$prog, ref = "academic")
14 | test <- multinom(prog2 ~ ses + write, data = ml)
15 |
16 | summary(test)
17 |
18 | z <- summary(test)$coefficients/summary(test)$standard.errors
19 | z
20 |
21 | p <- (1 - pnorm(abs(z), 0, 1))*2
22 | p
23 |
24 | exp(coef(test))
25 |
26 | head(pp <- fitted(test))
27 |
28 | dses <- data.frame(ses = c("low", "middle", "high"),write = mean(ml$write))
29 |
30 | predict(test, newdata = dses, "probs")
31 |
32 | dwrite <- data.frame(ses = rep(c("low", "middle", "high"), each = 41), write = rep(c(30:70), 3))
33 |
34 | pp.write <- cbind(dwrite, predict(test, newdata = dwrite, type = "probs", se = TRUE))
35 |
36 | by(pp.write[, 3:5], pp.write$ses, colMeans)
37 |
38 | lpp <- melt(pp.write, id.vars = c("ses", "write"), value.name = "probability")
39 | head(lpp)
40 |
41 | ggplot(lpp, aes(x = write, y = probability, colour = ses)) +
42 | geom_line() +
43 | facet_grid(variable ~ ., scales="free")
--------------------------------------------------------------------------------
/Chapter 03/Data/Europenaprotein.csv:
--------------------------------------------------------------------------------
1 | Country,RedMeat,WhiteMeat,Eggs,Milk,Fish,Cereals,Starch,Nuts,Fr&Veg
2 | Albania,10.1,1.4,0.5,8.9,0.2,42.3,0.6,5.5,1.7
3 | Austria,8.9,14,4.3,19.9,2.1,28,3.6,1.3,4.3
4 | Belgium,13.5,9.3,4.1,17.5,4.5,26.6,5.7,2.1,4
5 | Bulgaria,7.8,6,1.6,8.3,1.2,56.7,1.1,3.7,4.2
6 | Czechoslovakia,9.7,11.4,2.8,12.5,2,34.3,5,1.1,4
7 | Denmark,10.6,10.8,3.7,25,9.9,21.9,4.8,0.7,2.4
8 | E Germany,8.4,11.6,3.7,11.1,5.4,24.6,6.5,0.8,3.6
9 | Finland,9.5,4.9,2.7,33.7,5.8,26.3,5.1,1,1.4
10 | France,18,9.9,3.3,19.5,5.7,28.1,4.8,2.4,6.5
11 | Greece,10.2,3,2.8,17.6,5.9,41.7,2.2,7.8,6.5
12 | Hungary,5.3,12.4,2.9,9.7,0.3,40.1,4,5.4,4.2
13 | Ireland,13.9,10,4.7,25.8,2.2,24,6.2,1.6,2.9
14 | Italy,9,5.1,2.9,13.7,3.4,36.8,2.1,4.3,6.7
15 | Netherlands,9.5,13.6,3.6,23.4,2.5,22.4,4.2,1.8,3.7
16 | Norway,9.4,4.7,2.7,23.3,9.7,23,4.6,1.6,2.7
17 | Poland,6.9,10.2,2.7,19.3,3,36.1,5.9,2,6.6
18 | Portugal,6.2,3.7,1.1,4.9,14.2,27,5.9,4.7,7.9
19 | Romania,6.2,6.3,1.5,11.1,1,49.6,3.1,5.3,2.8
20 | Spain,7.1,3.4,3.1,8.6,7,29.2,5.7,5.9,7.2
21 | Sweden,9.9,7.8,3.5,24.7,7.5,19.5,3.7,1.4,2
22 | Switzerland,13.1,10.1,3.1,23.8,2.3,25.6,2.8,2.4,4.9
23 | UK,17.4,5.7,4.7,20.6,4.3,24.3,4.7,3.4,3.3
24 | USSR,9.3,4.6,2.1,16.6,3,43.6,6.4,3.4,2.9
25 | W Germany,11.4,12.5,4.1,18.8,3.4,18.6,5.2,1.5,3.8
26 | Yugoslavia,4.4,5,1.2,9.5,0.6,55.9,3,5.7,3.2
27 |
--------------------------------------------------------------------------------
/Chapter 02/Data/gala.txt:
--------------------------------------------------------------------------------
1 | Species Endemics Area Elevation Nearest Scruz Adjacent
2 | Baltra 58 23 25.09 346 0.6 0.6 1.84
3 | Bartolome 31 21 1.24 109 0.6 26.3 572.33
4 | Caldwell 3 3 0.21 114 2.8 58.7 0.78
5 | Champion 25 9 0.1 46 1.9 47.4 0.18
6 | Coamano 2 1 0.05 77 1.9 1.9 903.82
7 | Daphne.Major 18 11 0.34 119 8 8 1.84
8 | Daphne.Minor 24 0 0.08 93 6 12 0.34
9 | Darwin 10 7 2.33 168 34.1 290.2 2.85
10 | Eden 8 4 0.03 71 0.4 0.4 17.95
11 | Enderby 2 2 0.18 112 2.6 50.2 0.1
12 | Espanola 97 26 58.27 198 1.1 88.3 0.57
13 | Fernandina 93 35 634.49 1494 4.3 95.3 4669.32
14 | Gardner1 58 17 0.57 49 1.1 93.1 58.27
15 | Gardner2 5 4 0.78 227 4.6 62.2 0.21
16 | Genovesa 40 19 17.35 76 47.4 92.2 129.49
17 | Isabela 347 89 4669.32 1707 0.7 28.1 634.49
18 | Marchena 51 23 129.49 343 29.1 85.9 59.56
19 | Onslow 2 2 0.01 25 3.3 45.9 0.1
20 | Pinta 104 37 59.56 777 29.1 119.6 129.49
21 | Pinzon 108 33 17.95 458 10.7 10.7 0.03
22 | Las.Plazas 12 9 0.23 94 0.5 0.6 25.09
23 | Rabida 70 30 4.89 367 4.4 24.4 572.33
24 | SanCristobal 280 65 551.62 716 45.2 66.6 0.57
25 | SanSalvador 237 81 572.33 906 0.2 19.8 4.89
26 | SantaCruz 444 95 903.82 864 0.6 0 0.52
27 | SantaFe 62 28 24.08 259 16.5 16.5 0.52
28 | SantaMaria 285 73 170.92 640 2.6 49.2 0.1
29 | Seymour 44 16 1.84 147 0.6 9.6 25.09
30 | Tortuga 16 8 1.24 186 6.8 50.9 17.95
31 | Wolf 21 12 2.85 253 34.1 254.7 2.33
--------------------------------------------------------------------------------
/Chapter 06/1Decision tree learning - Advance Health Directive for Patients with Chest Pain.r:
--------------------------------------------------------------------------------
1 | R version 3.3.0 (2016-05-03) -- "Supposedly Educational"
2 | Copyright (C) 2016 The R Foundation for Statistical Computing
3 | Platform: x86_64-w64-mingw32/x64 (64-bit)
4 |
5 | R is free software and comes with ABSOLUTELY NO WARRANTY.
6 | You are welcome to redistribute it under certain conditions.
7 | Type 'license()' or 'licence()' for distribution details.
8 |
9 | R is a collaborative project with many contributors.
10 | Type 'contributors()' for more information and
11 | 'citation()' on how to cite R or R packages in publications.
12 |
13 | Type 'demo()' for some demos, 'help()' for on-line help, or
14 | 'help.start()' for an HTML browser interface to help.
15 | Type 'q()' to quit R.
16 |
17 | [Workspace loaded from ~/.RData]
18 | install.packages("tree")
19 | install.packages("caret")
20 | install.packages("e1071")
21 | library(tree)
22 | library(caret)
23 |
24 | AHD_data <- read.csv("d:/Heart.csv", header = TRUE)
25 |
26 | str(AHD_data)
27 |
28 | head(AHD_data)
29 |
30 | dim(AHD_data)
31 |
32 | split <- createDataPartition(y=AHD_data$AHD, p = 0.5, list=FALSE)
33 |
34 | split
35 |
36 | train <- AHD_data[split,]
37 |
38 | train
39 |
40 | test <- AHD_data[-split,]
41 |
42 | test
43 |
44 | trees <- tree(AHD ~., train)
45 |
46 | plot(trees)
47 |
48 | cv.trees <- cv.tree(trees, FUN=prune.misclass)
49 |
50 | cv.trees
51 |
52 | plot(cv.trees)
53 |
54 | prune.trees <- prune.misclass(trees, best=4)
55 |
56 | plot(prune.trees)
57 |
58 | text(prune.trees, pretty=0)
59 |
60 | tree.pred <- predict(prune.trees, test, type='class')
61 |
62 | tree.pred
63 |
64 | confusionMatrix(tree.pred, test$AHD)
65 |
--------------------------------------------------------------------------------
/Chapter 03/6K-means clustering - Foodstuff.r:
--------------------------------------------------------------------------------
1 | library(cluster)
2 |
3 | food.energycontent <- read.table("d:/foodstuffs.txt", header=T)
4 | head(food.energycontent)
5 | Food Energy Protein Fat Calcium Iron
6 | 1 BB 340 20 28 9 2.6
7 | 2 HR 245 21 17 9 2.7
8 | 3 BR 420 15 39 7 2.0
9 | 4 BS 375 19 32 9 2.5
10 | 5 BC 180 22 10 17 3.7
11 | 6 CB 115 20 3 8 1.4
12 |
13 | str(food.energycontent)
14 |
15 | standard.deviation <- apply(food.energycontent[,-1], 2, sd)
16 | standard.deviation
17 |
18 | foodergycnt.stddev <- sweep(food.energycontent[,-1],2,standard.deviation,FUN="/")
19 | foodergycnt.stddev
20 |
21 | food.5cluster <- kmeans(foodergycnt.stddev, centers=5, iter.max=100, nstart=25)
22 | food.5cluster
23 |
24 | food.4cluster <- kmeans(foodergycnt.stddev, centers=4, iter.max=100, nstart=25)
25 | food.4cluster
26 |
27 | food.4cluster$cluster
28 |
29 | food.4cluster.clust <- lapply(1:4, function(nc) protein[food.4cluster$cluster==nc])
30 | food.4cluster.clust
31 |
32 | pairs(food.energycontent[,-1], panel=function(x,y) text(x,y,food.4cluster$cluster))
33 |
34 | food.pc <- princomp(food.energycontent[,-1],cor=T)
35 | my.color.vector <- rep("green", times=nrow(food.energycontent))
36 | my.color.vector[food.4cluster$cluster==2] <- "blue"
37 | my.color.vector[food.4cluster$cluster==3] <- "red"
38 | my.color.vector[food.4cluster$cluster==4] <- "orange"
39 | par(pty="s")
40 | plot(food.pc$scores[,1], food.pc$scores[,2], ylim=range(food.pc$scores[,1]),
41 | xlab="PC 1", ylab="PC 2", type ='n', lwd=2)
42 | text(food.pc$scores[,1], food.pc$scores[,2], labels=Food, cex=0.7, lwd=2,
43 | col=my.color.vector)
--------------------------------------------------------------------------------
/Chapter 03/5K-means clustering - European Protein Consumption.r:
--------------------------------------------------------------------------------
1 |
2 |
3 | R version 3.2.3 (2015-12-10) -- "Wooden Christmas-Tree"
4 | Copyright (C) 2015 The R Foundation for Statistical Computing
5 | Platform: x86_64-w64-mingw32/x64 (64-bit)
6 |
7 | R is free software and comes with ABSOLUTELY NO WARRANTY.
8 | You are welcome to redistribute it under certain conditions.
9 | Type 'license()' or 'licence()' for distribution details.
10 |
11 | R is a collaborative project with many contributors.
12 | Type 'contributors()' for more information and
13 | 'citation()' on how to cite R or R packages in publications.
14 |
15 | Type 'demo()' for some demos, 'help()' for on-line help, or
16 | 'help.start()' for an HTML browser interface to help.
17 | Type 'q()' to quit R.
18 |
19 | [Workspace loaded from ~/.RData]
20 |
21 | protein = read.csv("d:/Europenaprotein.csv",header=T)
22 | head(protein)
23 |
24 | set.seed(123456789)
25 | groupMeat <- kmeans(protein[,c("WhiteMeat","RedMeat")], centers=3, nstart=10)
26 |
27 | groupMeat
28 |
29 | o=order(groupMeat$cluster)
30 |
31 | data.frame(protein$Country[o],groupMeat$cluster[o])
32 |
33 | plot(protein$Red, protein$White, type="n", xlim=c(3,19), xlab="Red Meat", ylab="White Meat")
34 | text(x=protein$Red, y=protein$White, labels=protein$Country,col=groupMeat$cluster+1)
35 |
36 |
37 | set.seed(123456789)
38 | groupProtein <- kmeans(protein[,-1], centers=7, nstart=10)
39 | o=order(groupProtein$cluster)
40 | data.frame(protein$Country[o],groupProtein$cluster[o])
41 |
42 | library(cluster)
43 | clusplot(protein[,-1], groupProtein$cluster, main='2D representation of the Cluster solution', color=TRUE, shade=TRUE, labels=2, lines=0)
44 | foodagg=agnes(protein,diss=FALSE,metric="euclidian")
45 | foodagg
46 | plot(foodagg, main='Dendrogram')
47 |
48 | groups <- cutree(foodagg, k=4)
49 | rect.hclust(foodagg, k=4, border="red")
--------------------------------------------------------------------------------
/Chapter 02/3Tobit Regression.r:
--------------------------------------------------------------------------------
1 | require(ggplot2)
2 | require(GGally)
3 | require(VGAM)
4 |
5 | dat <- read.table("d:/tobit.csv", header=TRUE, sep=",", row.names="id")
6 |
7 | summary(dat)
8 |
9 | f <- function(x, var, bw = 15) {
10 | dnorm(x, mean = mean(var), sd(var)) * length(var) * bw
11 | }
12 |
13 | p <- ggplot(dat, aes(x = apt, fill=prog))
14 |
15 | p + stat_bin(binwidth=15) +
16 | stat_function(fun = f, size = 1,
17 | args = list(var = dat$apt))
18 |
19 | p + stat_bin(binwidth = 1) + stat_function(fun = f, size = 1, args = list(var = dat$apt,
20 | bw = 1))
21 |
22 | cor(dat[, c("read", "math", "apt")])
23 |
24 | ggpairs(dat[, c("read", "math", "apt")])
25 |
26 | ------------------------------------------------------------------------------------------
27 |
28 | #Below we run the tobit model, using the vglm function of the VGAM package.
29 |
30 | summary(m <- vglm(apt ~ read + math + prog, tobit(Upper = 800), data = dat))
31 |
32 | ctable <- coef(summary(m))
33 | pvals <- 2 * pt(abs(ctable[, "z value"]), df.residual(m), lower.tail = FALSE)
34 | cbind(ctable, pvals)
35 |
36 | m2 <- vglm(apt ~ read + math, tobit(Upper = 800), data = dat)
37 | (p <- pchisq(2 * (logLik(m) - logLik(m2)), df = 2, lower.tail = FALSE))
38 |
39 | b <- coef(m)
40 | se <- sqrt(diag(vcov(m)))
41 |
42 |
43 | cbind(LL = b - qnorm(0.975) * se, UL = b + qnorm(0.975) * se)
44 |
45 |
46 | dat$yhat <- fitted(m)[,1]
47 | dat$rr <- resid(m, type = "response")
48 | dat$rp <- resid(m, type = "pearson")[,1]
49 | par(mfcol = c(2, 3))
50 |
51 | with(dat, {
52 | plot(yhat, rr, main = "Fitted vs Residuals")
53 | qqnorm(rr)
54 | plot(yhat, rp, main = "Fitted vs Pearson Residuals")
55 | qqnorm(rp)
56 | plot(apt, rp, main = "Actual vs Pearson Residuals")
57 | plot(apt, yhat, main = "Actual vs Fitted")
58 | })
59 |
60 | (r <- with(dat, cor(yhat, apt)))
61 | r^2
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
--------------------------------------------------------------------------------
/Chapter 08/3Markov Chains - Car Rental Agency Service.r:
--------------------------------------------------------------------------------
1 | R version 3.2.2 (2015-08-14) -- "Fire Safety"
2 | Copyright (C) 2015 The R Foundation for Statistical Computing
3 | Platform: x86_64-w64-mingw32/x64 (64-bit)
4 |
5 | R is free software and comes with ABSOLUTELY NO WARRANTY.
6 | You are welcome to redistribute it under certain conditions.
7 | Type 'license()' or 'licence()' for distribution details.
8 |
9 | R is a collaborative project with many contributors.
10 | Type 'contributors()' for more information and
11 | 'citation()' on how to cite R or R packages in publications.
12 |
13 | Type 'demo()' for some demos, 'help()' for on-line help, or
14 | 'help.start()' for an HTML browser interface to help.
15 | Type 'q()' to quit R.
16 | [Workspace loaded from ~/.RData]
17 |
18 | install.packages("markovchain")
19 | library(markovchain)
20 |
21 | RentalStates <- c("Downtown", "East", "West")
22 |
23 | RentalStates
24 |
25 | RentalTransitionMatrix <- matrix(c(0.3, 0.3, 0.4,
26 | 0.4, 0.4, 0.2,
27 | 0.5, 0.3, 0.2),
28 | byrow = T, nrow = 3, dimnames = list(RentalStates, RentalStates))
29 |
30 |
31 | RentalTransitionMatrix
32 |
33 | mcRental <- new("markovchain", states = RentalStates, byrow = T, transitionMatrix = RentalTransitionMatrix, name = "Rental Cars")
34 |
35 | mcRental
36 |
37 | mcRental[2]
38 |
39 | plot(mcRental)
40 |
41 | transitionProbability(mcRental, "East", "West")
42 |
43 | x <- 0.3 * 0.3
44 | y <- 0.3 * 0.4
45 | z <- 0.4 * 0.5
46 | x + y + z
47 |
48 | mcRental ^ 2
49 |
50 | mcRental^20
51 |
52 | mcRental ^ 30
53 |
54 | 70 * steadyStates(mcRental)
55 |
56 | summary(mcRental)
57 |
58 | conditionalDistribution(mcRental, "Downtown")
59 |
60 | conditionalDistribution(mcRental, "West")
61 |
62 | conditionalDistribution(mcRental, "East")
63 |
--------------------------------------------------------------------------------
/Chapter 04/Data/Calories_Burnt.csv:
--------------------------------------------------------------------------------
1 | Activities,,,,,,,,,
2 | Date,Calories Burned,Steps,Distance,Floors,Minutes Sedentary,Minutes Lightly Active,Minutes Fairly Active,Minutes Very Active,Activity Calories
3 | 7/07/2016,"2,682","12,541",9.02,13,667,171,18,60,"1,248"
4 | 8/07/2016,"2,423","8,029",5.7,35,760,208,13,6,928
5 | 9/07/2016,"2,875","10,801",7.67,3,496,148,18,46,"1,040"
6 | 10/07/2016,"2,638","11,997",8.52,22,771,248,3,27,"1,285"
7 | 11/07/2016,"2,423","9,039",6.42,12,714,232,10,16,"1,044"
8 | 12/07/2016,"3,102","17,721",12.58,8,519,226,30,107,"1,805"
9 | 13/07/2016,"2,450","10,544",7.49,4,637,191,8,45,"1,064"
10 | 14/07/2016,"2,555","10,047",7.16,68,711,161,10,57,"1,060"
11 | 15/07/2016,"2,245","4,733",3.36,9,783,139,20,6,667
12 | 16/07/2016,"2,936","12,056",8.56,25,604,282,32,36,"1,576"
13 | 17/07/2016,"2,717","11,791",8.37,46,870,235,28,45,"1,340"
14 | 18/07/2016,"2,690","10,721",7.61,7,481,248,10,33,"1,226"
15 | 19/07/2016,"3,147","13,007",9.23,110,448,199,70,80,"1,836"
16 | 20/07/2016,"2,837","13,401",9.51,115,559,201,20,77,"1,450"
17 | 21/07/2016,"2,851","15,281",10.85,143,589,208,18,82,"1,523"
18 | 22/07/2016,"2,611","11,337",8.05,15,620,242,7,31,"1,234"
19 | 23/07/2016,"2,307","7,738",5.49,24,738,133,18,37,806
20 | 24/07/2016,"3,109","11,767",8.4,41,534,333,53,31,"1,840"
21 | 25/07/2016,"3,164","13,324",9.46,41,582,242,40,98,"1,842"
22 | 26/07/2016,"2,593","5,957",4.23,11,694,262,7,4,"1,080"
23 | 27/07/2016,"2,490","10,206",7.25,1,692,204,10,19,"1,060"
24 | 28/07/2016,"2,638","11,557",8.21,2,498,179,46,49,"1,252"
25 | 29/07/2016,"2,769","11,013",7.84,33,478,221,40,40,"1,399"
26 | 30/07/2016,"2,629","10,168",7.22,9,800,245,12,9,"1,164"
27 | 31/07/2016,"2,555","11,686",8.34,28,824,151,14,62,"1,158"
28 | 1/08/2016,"3,010","13,991",9.93,38,632,207,56,62,"1,643"
29 | 2/08/2016,"2,694","13,444",9.59,30,858,161,12,80,"1,273"
30 | 3/08/2016,"2,713","12,398",8.8,53,680,187,10,67,"1,271"
31 | 4/08/2016,"2,640","11,986",8.51,44,651,269,4,12,"1,255"
32 | 5/08/2016,"2,680","12,858",9.15,49,756,169,6,61,"1,220"
33 |
--------------------------------------------------------------------------------
/Chapter 03/4Binary Clustering - Math test.r:
--------------------------------------------------------------------------------
1 |
2 | R version 3.2.3 (2015-12-10) -- "Wooden Christmas-Tree"
3 | Copyright (C) 2015 The R Foundation for Statistical Computing
4 | Platform: x86_64-w64-mingw32/x64 (64-bit)
5 |
6 | R is free software and comes with ABSOLUTELY NO WARRANTY.
7 | You are welcome to redistribute it under certain conditions.
8 | Type 'license()' or 'licence()' for distribution details.
9 |
10 | R is a collaborative project with many contributors.
11 | Type 'contributors()' for more information and
12 | 'citation()' on how to cite R or R packages in publications.
13 |
14 | Type 'demo()' for some demos, 'help()' for on-line help, or
15 | 'help.start()' for an HTML browser interface to help.
16 | Type 'q()' to quit R.
17 |
18 | [Workspace loaded from ~/.RData]
19 |
20 | Mathtest = read.table("d:/math test.txt",header=T)
21 | dist.items <- dist(Mathtest[,-1], method='euclidean')^2
22 | dist.items
23 |
24 | dist.items.2 <- dist(Mathtest[,-1], method='binary')
25 | dist.items.2
26 |
27 | dist.items.3 <- dist(1 - Mathtest[,-1], method='binary')
28 | dist.items.3
29 |
30 | items.complete.link <- hclust(dist.items, method='complete')
31 | items.complete.link
32 |
33 | plot(items.complete.link, labels=Mathtest[,1], ylab="Distance")
34 |
35 | items.sing.link <- hclust(dist.items, method='single')
36 | items.sing.link
37 |
38 | plot(items.sing.link, labels=Mathtest[,1], ylab="Distance")
39 |
40 | library(cluster)
41 | my.k.choices <- 2:8
42 |
43 | avg.sil.width <- rep(0, times=length(my.k.choices))
44 | for (ii in (1:length(my.k.choices)) ){
45 | avg.sil.width[ii] <- pam(dist.items, k=my.k.choices[ii])$silinfo$avg.width
46 | }
47 | print( cbind(my.k.choices, avg.sil.width) )
48 |
49 | items.kmed.2 <- pam(dist.items, k=2, diss=T)
50 | items.kmed.2
51 |
52 | items.2.clust <- lapply(1:2, function(nc) Mathtest[,1][items.kmed.2$clustering==nc])
53 | items.2.clust
54 |
55 | items.kmed.3 <- pam(dist.items, k=3, diss=T)
56 | items.kmed.3
57 |
58 | items.3.clust <- lapply(1:3, function(nc) Mathtest[,1][items.kmed.3$clustering==nc])
59 | items.3.clust
--------------------------------------------------------------------------------
/Chapter 11/1Recurrent Neural Networks - Predicting periodic signal.r:
--------------------------------------------------------------------------------
1 |
2 | R version 3.3.2 (2016-10-31) -- "Sincere Pumpkin Patch"
3 | Copyright (C) 2016 The R Foundation for Statistical Computing
4 | Platform: x86_64-w64-mingw32/x64 (64-bit)
5 |
6 | R is free software and comes with ABSOLUTELY NO WARRANTY.
7 | You are welcome to redistribute it under certain conditions.
8 | Type 'license()' or 'licence()' for distribution details.
9 |
10 | R is a collaborative project with many contributors.
11 | Type 'contributors()' for more information and
12 | 'citation()' on how to cite R or R packages in publications.
13 |
14 | Type 'demo()' for some demos, 'help()' for on-line help, or
15 | 'help.start()' for an HTML browser interface to help.
16 | Type 'q()' to quit R.
17 | install.packages("rnn")
18 | library(rnn)
19 |
20 | set.seed(10)
21 |
22 | f <- 5
23 |
24 | w <- 2*pi*f
25 |
26 | t <- seq(0.005,2,by=0.005)
27 |
28 | x <- sin(t*w) + rnorm(200, 0, 0.25)
29 |
30 | y <- cos(t*w)
31 |
32 | X <- matrix(x, nrow = 40)
33 |
34 |
35 | Y <- matrix(y, nrow = 40)
36 |
37 |
38 | plot(as.vector(X), col='blue', type='l', ylab = "x-matrix, y-matrix", main = "Noisy waves")
39 |
40 |
41 | lines(as.vector(Y), col = "red")
42 |
43 |
44 | X <- (X - min(X)) / (max(X) - min(X))
45 |
46 | X
47 |
48 | Y <- (Y - min(Y)) / (max(Y) - min(Y))
49 |
50 | Y
51 |
52 | X <- t(X)
53 |
54 | Y <- t(Y)
55 |
56 | train <- 1:8
57 |
58 | test <- 9:10
59 |
60 | model <- trainr(Y = Y[train,],
61 | X = X[train,],
62 | learningrate = 0.05,
63 | hidden_dim = 16,
64 | numepochs = 1500)
65 |
66 |
67 |
68 | Y_predicted <- predictr(model, X)
69 |
70 |
71 | plot(as.vector(t(Y)), col = 'red', type = 'l', main = "Actual values vs Predicted values", ylab = "Y, Y-predicted")
72 |
73 |
74 | lines(as.vector(t(Y_predicted)), type = 'l', col = 'blue')
75 |
76 |
77 | plot(as.vector(t(Y[test,])), col = 'red', type='l', main = "Actual vs predicted: testing set", ylab = "Y,Y-predicted")
78 |
79 |
80 |
81 | lines(as.vector(t(Y_predicted[test,])), type = 'l', col = 'blue')
82 |
--------------------------------------------------------------------------------
/Chapter 06/2Decision tree learning - Income Based Distribution of Real Estate Value.r:
--------------------------------------------------------------------------------
1 |
2 | R version 3.3.0 (2016-05-03) -- "Supposedly Educational"
3 | Copyright (C) 2016 The R Foundation for Statistical Computing
4 | Platform: x86_64-w64-mingw32/x64 (64-bit)
5 |
6 | R is free software and comes with ABSOLUTELY NO WARRANTY.
7 | You are welcome to redistribute it under certain conditions.
8 | Type 'license()' or 'licence()' for distribution details.
9 |
10 | R is a collaborative project with many contributors.
11 | Type 'contributors()' for more information and
12 | 'citation()' on how to cite R or R packages in publications.
13 |
14 | Type 'demo()' for some demos, 'help()' for on-line help, or
15 | 'help.start()' for an HTML browser interface to help.
16 | Type 'q()' to quit R.
17 |
18 | [Workspace loaded from ~/.RData]
19 |
20 | library(tree)
21 |
22 | realEstate <- read.table("d:/RealEstate.txt", header=TRUE)
23 |
24 | dim(realEstate)
25 |
26 | str(realEstate)
27 |
28 | head(realEstate)
29 |
30 | summary(realEstate)
31 |
32 | treeModel <- tree(log(MedianHouseValue) ~ Longitude + Latitude, data=realEstate)
33 |
34 | summary(treeModel)
35 |
36 | plot(treeModel)
37 |
38 | text(treeModel, cex=.75)
39 |
40 | priceDeciles <- quantile(realEstate$MedianHouseValue, 0:10/10)
41 |
42 | priceDeciles
43 |
44 | summary(priceDeciles)
45 |
46 | cutPrices <- cut(realEstate$MedianHouseValue, priceDeciles, include.lowest=TRUE)
47 |
48 | head(cutPrices)
49 |
50 | summary(cutPrices)
51 |
52 | plot(realEstate$Longitude, realEstate$Latitude, col=grey(10:2/11)[cutPrices], pch=20, xlab="Longitude",ylab="Latitude")
53 |
54 | summary(realEstate$Longitude)
55 |
56 | head(realEstate$Longitude)
57 |
58 | summary( realEstate$Latitude)
59 |
60 | head( realEstate$Latitude)
61 |
62 | partition.tree(treeModel, ordvars=c("Longitude","Latitude"), add=TRUE)
63 |
64 | treeModel2 <- tree(log(MedianHouseValue) ~ Longitude + Latitude, data=realEstate, mindev=0.001)
65 |
66 | summary(treeModel2)
67 |
68 | plot(treeModel2)
69 |
70 | text(treeModel2, cex=.65)
71 |
72 | treeModel3 <- tree(log(MedianHouseValue) ~ ., data=realEstate)
73 |
74 | summary(treeModel3)
75 |
76 | plot(treeModel3)
77 |
78 | text(treeModel3, cex=.75)
--------------------------------------------------------------------------------
/Chapter 03/2Hierarchical Clustering - NASA Under story.r:
--------------------------------------------------------------------------------
1 |
2 | R version 3.2.3 (2015-12-10) -- "Wooden Christmas-Tree"
3 | Copyright (C) 2015 The R Foundation for Statistical Computing
4 | Platform: x86_64-w64-mingw32/x64 (64-bit)
5 |
6 | R is free software and comes with ABSOLUTELY NO WARRANTY.
7 | You are welcome to redistribute it under certain conditions.
8 | Type 'license()' or 'licence()' for distribution details.
9 |
10 | R is a collaborative project with many contributors.
11 | Type 'contributors()' for more information and
12 | 'citation()' on how to cite R or R packages in publications.
13 |
14 | Type 'demo()' for some demos, 'help()' for on-line help, or
15 | 'help.start()' for an HTML browser interface to help.
16 | Type 'q()' to quit R.
17 |
18 | [Workspace loaded from ~/.RData]
19 |
20 | NASA = read.csv("d:/NASAUnderstory.csv",header=T)
21 | NASA.lab=NASA$Labels
22 | NASA.lab
23 | NASA=NASA[,-32]
24 | NASA
25 |
26 | NASAscale<-scale(NASA[,3:31])
27 | NASAscale
28 |
29 | rownames(NASAscale)=as.factor(NASA$Overstory.Species)
30 | rownames(NASAscale)
31 |
32 | dist1<-dist(NASAscale, method="euclidean")
33 |
34 | clust1<-hclust(dist1,method="ward.D")
35 | clust1
36 |
37 | plot(clust1,labels= NASA[,2], cex=0.5, xlab="",ylab="Distance",main="Clustering for NASA Understory Data")
38 |
39 | rect.hclust(clust1,k=2)
40 |
41 | cuts=cutree(clust1,k=2)
42 |
43 | cuts
44 |
45 |
46 | library(vegan)
47 |
48 | dist1<-vegdist(NASA[,3:31], method="jaccard", upper=T)
49 | clust1<-hclust(dist1,method="ward.D")
50 |
51 | clust1
52 |
53 | plot(clust1,labels= NASA[,2], cex=0.5, xlab="",ylab="Distance",main="Clustering for NASA Understory Data")
54 | rect.hclust(clust1,k=2)
55 | cuts=cutree(clust1,k=2)
56 | cuts
57 |
58 |
59 | clusplot(NASA, cuts, color=TRUE, shade=TRUE, labels=2, lines=0,
60 | main="NASA Two Cluster Plot, Ward's Method, First two PC")
61 |
62 | library(fpc)
63 | NASAtrans=t(NASAscale)
64 |
65 |
66 | dist1<-dist(NASAtrans, method="minkowski", p=3)
67 |
68 |
69 | clust1<-hclust(dist1,method="ward.D")
70 | clust1
71 |
72 | plot(clust1,labels= NASA.lab[1:29], cex=1, xlab="",ylab="Distance",main="Clustering for NASA Understory Data")
73 | rect.hclust(clust1,k=3)
74 |
75 |
76 |
77 |
--------------------------------------------------------------------------------
/Chapter 08/1Markov Chains - Stocks Regime Switching Model.r:
--------------------------------------------------------------------------------
1 | R version 3.2.2 (2015-08-14) -- "Fire Safety"
2 | Copyright (C) 2015 The R Foundation for Statistical Computing
3 | Platform: x86_64-w64-mingw32/x64 (64-bit)
4 |
5 | R is free software and comes with ABSOLUTELY NO WARRANTY.
6 | You are welcome to redistribute it under certain conditions.
7 | Type 'license()' or 'licence()' for distribution details.
8 |
9 | R is a collaborative project with many contributors.
10 | Type 'contributors()' for more information and
11 | 'citation()' on how to cite R or R packages in publications.
12 |
13 | Type 'demo()' for some demos, 'help()' for on-line help, or
14 | 'help.start()' for an HTML browser interface to help.
15 | Type 'q()' to quit R.
16 |
17 | [Workspace loaded from ~/.RData]
18 |
19 | install.packages("MSwM")
20 |
21 | library(MSwM)
22 |
23 | MarkovSwitchData <- read.csv("d:/StocksRegimeSwitching.csv", header = TRUE)
24 |
25 | attach(MarkovSwitchData)
26 |
27 | head(MarkovSwitchData)
28 |
29 | dim(MarkovSwitchData)
30 |
31 | summary(MarkovSwitchData)
32 |
33 | yLogValueStocks <- cbind(LVS)
34 |
35 | head(yLogValueStocks)
36 |
37 | yLogGrowthStocks <- cbind(LGS)
38 |
39 | head(yLogGrowthStocks)
40 |
41 | x <- cbind(LRY, LRC, INT, LRV)
42 |
43 | olsLogValueStocks <- lm(yLogValueStocks~x)
44 |
45 | summary(olsLogValueStocks)
46 |
47 | olsLogGrowthStocks <- lm(yLogGrowthStocks~x)
48 |
49 | summary(olsLogGrowthStocks)
50 |
51 | MarkovSwtchLogValueStocks <- msmFit(olsLogValueStocks, k = 2, sw = rep(TRUE, 6))
52 |
53 | summary(MarkovSwtchLogValueStocks)
54 |
55 | MarkoSwtchLogGrowthStocks <- msmFit(olsLogGrowthStocks, k = 2, sw = rep(TRUE, 6))
56 |
57 | summary(MarkoSwtchLogGrowthStocks)
58 |
59 | par(mar=c(3,3,3,3))
60 |
61 | plotProb(MarkovSwtchLogValueStocks, which=1)
62 |
63 | plotProb(MarkoSwtchLogValueStocks, which=2)
64 |
65 | plotProb(MarkoSwtchLogGrowthStocks, which=1)
66 |
67 | plotProb(MarkoSwtchLogGrowthStocks, which=2)
68 |
69 | par(mar=c(3,3,3,3))
70 |
71 | plotDiag(MarkovSwtchLogValueStocks, regime=1, which=1)
72 |
73 | plotDiag(MarkovSwtchLogValueStocks, regime=1, which=2)
74 |
75 | plotDiag(MarkoSwtchLogGrowthStocks, regime=1, which=1)
76 |
77 | plotDiag(MarkoSwtchLogGrowthStocks, regime=1, which=2)
78 |
79 | plotDiag(MarkoSwtchLogGrowthStocks, regime=1, which=3)
80 |
81 |
--------------------------------------------------------------------------------
/Chapter 09/1HMM - EUR & USD.r:
--------------------------------------------------------------------------------
1 | R version 3.2.2 (2015-08-14) -- "Fire Safety"
2 | Copyright (C) 2015 The R Foundation for Statistical Computing
3 | Platform: x86_64-w64-mingw32/x64 (64-bit)
4 |
5 | R is free software and comes with ABSOLUTELY NO WARRANTY.
6 | You are welcome to redistribute it under certain conditions.
7 | Type 'license()' or 'licence()' for distribution details.
8 |
9 | R is a collaborative project with many contributors.
10 | Type 'contributors()' for more information and
11 | 'citation()' on how to cite R or R packages in publications.
12 |
13 | Type 'demo()' for some demos, 'help()' for on-line help, or
14 | 'help.start()' for an HTML browser interface to help.
15 | Type 'q()' to quit R.
16 | install.packages("depmixS4")
17 | library(depmixS4)
18 | install.packages("quantmod")
19 | library(quantmod)
20 | install.packages("ggplot2")
21 | library(ggplot2)
22 |
23 |
24 | EuroUSD <- read.csv("d:/EURUSD1d.csv", header = TRUE)
25 |
26 | head(EuroUSD)
27 |
28 |
29 | summary(EuroUSD)
30 |
31 |
32 | str(EuroUSD)
33 |
34 | Date <- as.character(EuroUSD[,1])
35 |
36 | DateTimeSeries <- as.POSIXlt(Date, format = "%Y.%m.%d %H:%M:%S")
37 |
38 | TimeSeriesData <- data.frame(EuroUSD[,2:5], row.names = DateTimeSeries)
39 |
40 |
41 | head(TimeSeriesData)
42 |
43 |
44 | TimeSeriesData <-as.xts(TimeSeriesData)
45 |
46 | ATRindicator <- ATR(TimeSeriesData[,2:4],n=14)
47 |
48 | head(ATRindicator)
49 |
50 |
51 | TrueRange <- ATRindicator[,2]
52 |
53 | head(TrueRange)
54 |
55 |
56 | LogReturns <- log(EuroUSD$Close) - log(EuroUSD$Open)
57 |
58 | summary(LogReturns)
59 |
60 | HMMModel <- data.frame(LogReturns, TrueRange)
61 |
62 | HMMModel <- HMMModel[-c(1:14),]
63 |
64 | head(HMMModel)
65 |
66 | colnames(HMMModel) <- c("LogReturns","TrueRange")
67 |
68 |
69 | colnames(HMMModel)
70 |
71 | set.seed(1)
72 |
73 | HMM <- depmix(list(LogReturns~1, TrueRange~1), data = HMMModel, nstates=3, family=list(gaussian(), gaussian()))
74 |
75 | HMMfit <- fit(HMM, verbose = FALSE)
76 |
77 |
78 | print(HMMfit)
79 |
80 | summary(HMMfit)
81 |
82 | HMMstate <- posterior(HMMfit)
83 |
84 | head(HMMstate)
85 |
86 |
87 | DFIndicators <- data.frame(DateTimeSeries, LogReturns, TrueRange)
88 |
89 | DFIndicatorsClean <- DFIndicators[-c(1:14), ]
90 |
91 | Plot1Data <- data.frame(DFIndicatorsClean, HMMstate$state)
92 |
93 | LogReturnsPlot <- ggplot(Plot1Data,aes(x=Plot1Data[,1],y=Plot1Data[,2]))+geom_line(color="darkred")+labs(title="Euro USD Daily Log Returns",y="Log Return Values",x="Date")
94 | LogReturnsPlot
95 |
--------------------------------------------------------------------------------
/Chapter 04/3Principal Component Analysis - Understanding world cuisine.r:
--------------------------------------------------------------------------------
1 | R version 3.3.2 (2016-10-31) -- "Sincere Pumpkin Patch"
2 | Copyright (C) 2016 The R Foundation for Statistical Computing
3 | Platform: x86_64-w64-mingw32/x64 (64-bit)
4 |
5 | R is free software and comes with ABSOLUTELY NO WARRANTY.
6 | You are welcome to redistribute it under certain conditions.
7 | Type 'license()' or 'licence()' for distribution details.
8 |
9 | R is a collaborative project with many contributors.
10 | Type 'contributors()' for more information and
11 | 'citation()' on how to cite R or R packages in publications.
12 |
13 | Type 'demo()' for some demos, 'help()' for on-line help, or
14 | 'help.start()' for an HTML browser interface to help.
15 | Type 'q()' to quit R.
16 |
17 | [Workspace loaded from ~/.RData]
18 | install.packages("glmnet")
19 |
20 | library(ggplot2)
21 | library(glmnet)
22 |
23 | datafile <- file.path("d:","epic_recipes.txt")
24 |
25 | recipes_data <- read.table(datafile, fill=TRUE, col.names=1:max(count.fields(datafile)), na.strings=c("", "NA"), stringsAsFactors = FALSE)
26 |
27 |
28 | agg <- aggregate(recipes_data[,-1], by=list(recipes_data[,1]), paste, collapse=",")
29 |
30 | agg$combined <- apply(agg[,2:ncol(agg)], 1, paste, collapse=",")
31 |
32 | agg$combined <- gsub(",NA","",agg$combined)
33 |
34 |
35 |
36 | cuisines <- as.data.frame(table(recipes_data[,1]))
37 |
38 | cuisines
39 |
40 |
41 | ingredients_freq <- lapply(lapply(strsplit(a$combined,","), table), as.data.frame)
42 |
43 |
44 | names(ingredients_freq) <- agg[,1]
45 |
46 | proportion <- lapply(seq_along(ingredients_freq), function(i) {
47 | colnames(ingredients_freq[[i]])[2] <- names(ingredients_freq)[i]
48 | ingredients_freq[[i]][,2] <- ingredients_freq[[i]][,2]/cuisines[i,2]
49 | ingredients_freq[[i]]}
50 | )
51 |
52 |
53 | names(proportion) <- a[,1]
54 |
55 |
56 | final <- Reduce(function(...) merge(..., all=TRUE, by="Var1"), proportion)
57 |
58 |
59 | row.names(final) <- final[,1]
60 |
61 | final <- final[,-1]
62 |
63 | final[is.na(final)] <- 0
64 |
65 | prop_matrix <- t(final)
66 |
67 | s <- sort(apply(prop_matrix, 2, sd), decreasing=TRUE)
68 |
69 | final_imp<- scale(subset(prop_matrix, select=names(which(s > 0.1))))
70 |
71 | heatmap.2(final_imp, trace="none", margins = c(6,11), col=topo.colors(7), key=TRUE, key.title=NA, keysize=1.2, density.info="none")
72 |
73 |
74 | pca_computation <- princomp(final_imp)
75 |
76 |
77 | pca_computation
78 |
79 |
80 | biplot(pca_computation, pc.biplot=TRUE, col=c("black","red"), cex=c(0.9,0.8), xlim=c(-2.5,2.5), xlab="PC1, 39.7%", ylab="PC2, 24.5%")
--------------------------------------------------------------------------------
/Chapter 06/6Support Vector Machine - Currency Trading Strategy.r:
--------------------------------------------------------------------------------
1 | R version 3.3.0 (2016-05-03) -- "Supposedly Educational"
2 | Copyright (C) 2016 The R Foundation for Statistical Computing
3 | Platform: x86_64-w64-mingw32/x64 (64-bit)
4 |
5 | R is free software and comes with ABSOLUTELY NO WARRANTY.
6 | You are welcome to redistribute it under certain conditions.
7 | Type 'license()' or 'licence()' for distribution details.
8 |
9 | R is a collaborative project with many contributors.
10 | Type 'contributors()' for more information and
11 | 'citation()' on how to cite R or R packages in publications.
12 |
13 | Type 'demo()' for some demos, 'help()' for on-line help, or
14 | 'help.start()' for an HTML browser interface to help.
15 | Type 'q()' to quit R.
16 |
17 | install.packages("quantmod")
18 | library(quantmod)
19 | install.packages("e1071")
20 | library(e1071)
21 | install.packages("ggplot2")
22 | install.packages("Hmisc")
23 | library(ggplot2)
24 |
25 | library(Hmisc)
26 |
27 | PoundDollar <- read.csv("d:/PoundDollar.csv")
28 |
29 | head(PoundDollar)
30 |
31 | str(PoundDollar)
32 |
33 | relativeStrengthIndex3 <- RSI(Op(PoundDollar), n= 3)
34 |
35 | summary(relativeStrengthIndex3)
36 |
37 | SeriesMeanAvg50 <- SMA(Op(PoundDollar),n=50)
38 |
39 | summary(SeriesMeanAvg50)
40 |
41 | describe(SeriesMeanAvg50)
42 |
43 | Trend <- Op(PoundDollar) - SeriesMeanAvg50
44 |
45 | summary(Trend)
46 |
47 | PriceDiff <- Cl(PoundDollar) - Op(PoundDollar)
48 |
49 | summary(PriceDiff)
50 |
51 | binaryClassification <- ifelse(PriceDiff>0,"UP","DOWN")
52 |
53 | summary(binaryClassification)
54 |
55 | DataSet <- data.frame(relativeStrengthIndex3, Trend, binaryClassification)
56 |
57 | str(DataSet)
58 |
59 | DataSet<-DataSet[-c(1:49),]
60 |
61 | dim(DataSet)
62 |
63 | TrainingDataSet <- DataSet[1:4528,]
64 |
65 | dim(TrainingDataSet)
66 |
67 | summary(TrainingDataSet)
68 |
69 | TestDataSet <- DataSet[4529:6038,]
70 |
71 | dim(TestDataSet)
72 |
73 | summary(TestDataSet)
74 |
75 | SVM <- svm(binaryClassification~relativeStrengthIndex3+Trend, data=TrainingDataSet, kernel="radial",cost=1,gamma=1/2)
76 |
77 | summary(SVM)
78 |
79 | TrainingPredictions <- predict(SVM,TrainingDataSet, type="class")
80 |
81 | summary(TrainingPredictions)
82 |
83 | describe(TrainingPredictions)
84 |
85 | TrainingData <- data.frame(TrainingDataSet, TrainingPredictions)
86 |
87 | summary(TrainingData)
88 |
89 | ggplot(TrainingData,aes(x=Trend,y=relativeStrengthIndex3))+stat_density2d(geom="contour",aes(color=TrainingPredictions))+labs(title="SVM Relative Strength Index & Trend Predictions",x="Open - SMA50",y="RSI3",color="Training Predictions")
--------------------------------------------------------------------------------
/Chapter 07/1Self Organizing Map - Visualisations of heatmaps.r:
--------------------------------------------------------------------------------
1 | R version 3.3.2 (2016-10-31) -- "Sincere Pumpkin Patch"
2 | Copyright (C) 2016 The R Foundation for Statistical Computing
3 | Platform: x86_64-w64-mingw32/x64 (64-bit)
4 |
5 | R is free software and comes with ABSOLUTELY NO WARRANTY.
6 | You are welcome to redistribute it under certain conditions.
7 | Type 'license()' or 'licence()' for distribution details.
8 |
9 | R is a collaborative project with many contributors.
10 | Type 'contributors()' for more information and
11 | 'citation()' on how to cite R or R packages in publications.
12 |
13 | Type 'demo()' for some demos, 'help()' for on-line help, or
14 | 'help.start()' for an HTML browser interface to help.
15 | Type 'q()' to quit R
16 |
17 | library(kohonen)
18 |
19 | training_frame <- data[, c(2,4,5,8)]
20 |
21 | training_matrix <- as.matrix(scale(training_frame))
22 |
23 | training_matrix
24 | zn chas nox dis
25 | 1 0.28454827 -0.2723291 -0.14407485 0.1400749840
26 | 2 -0.48724019 -0.2723291 -0.73953036 0.5566090496
27 | 3 -0.48724019 -0.2723291 -0.73953036 0.5566090496
28 | 4 -0.48724019 -0.2723291 -0.83445805 1.0766711351
29 | 5 -0.48724019 -0.2723291 -0.83445805 1.0766711351
30 | 6 -0.48724019 -0.2723291 -0.83445805 1.0766711351
31 | 7 0.04872402 -0.2723291 -0.26489191 0.8384142195
32 | 8 0.04872402 -0.2723291 -0.26489191 1.0236248974
33 | 9 0.04872402 -0.2723291 -0.26489191 1.0861216287
34 | 10 0.04872402 -0.2723291 -0.26489191 1.3283202075
35 | 11 0.04872402 -0.2723291 -0.26489191 1.2117799501
36 | 12 0.04872402 -0.2723291 -0.26489191 1.1547920492
37 | 13 0.04872402 -0.2723291 -0.26489191 0.7863652700
38 | 14 -0.48724019 -0.2723291 -0.14407485 0.4333252240
39 | 15 -0.48724019 -0.2723291 -0.14407485 0.3166899868
40 | 16 -0.48724019 -0.2723291 -0.14407485 0.3341187865
41 | 17 -0.48724019 -0.2723291 -0.14407485 0.3341187865
42 | 18 -0.48724019 -0.2723291 -0.14407485 0.2198105553
43 | 19 -0.48724019 -0.2723291 -0.14407485 0.0006920764
44 |
45 |
46 |
47 | som_grid <- somgrid(xdim = 20, ydim=20, topo="hexagonal")
48 |
49 |
50 | som_model <- som(training_matrix,
51 | grid=som_grid,
52 | rlen=1000,
53 | alpha=c(0.05,0.01),
54 | keep.data = TRUE,
55 | n.hood="circular")
56 |
57 | plot(som_model, main ="Training Progress", type="changes", col = "red")
58 |
59 |
60 |
61 | plot(som_model, main ="Node Count", type="count")
62 |
63 |
64 | plot(som_model, main ="Neighbour Distances", type="dist.neighbours")
65 |
66 |
67 | plot(som_model, type="codes")
68 |
69 | plot(som_model, type = "property", property = som_model$codes[,4], main=names(som_model$data)[4])
70 |
--------------------------------------------------------------------------------
/Chapter 10/1Modelling S&P 500.r:
--------------------------------------------------------------------------------
1 |
2 | R version 3.3.2 (2016-10-31) -- "Sincere Pumpkin Patch"
3 | Copyright (C) 2016 The R Foundation for Statistical Computing
4 | Platform: x86_64-w64-mingw32/x64 (64-bit)
5 |
6 | R is free software and comes with ABSOLUTELY NO WARRANTY.
7 | You are welcome to redistribute it under certain conditions.
8 | Type 'license()' or 'licence()' for distribution details.
9 |
10 | R is a collaborative project with many contributors.
11 | Type 'contributors()' for more information and
12 | 'citation()' on how to cite R or R packages in publications.
13 |
14 | Type 'demo()' for some demos, 'help()' for on-line help, or
15 | 'help.start()' for an HTML browser interface to help.
16 | Type 'q()' to quit R.
17 |
18 | install.packages("quantmod")
19 | install.packages("neuralnet")
20 | library(quantmod)
21 | library(neuralnet)
22 |
23 | startDate = as.Date("2009-01-01")
24 |
25 | endDate = as.Date("2014-01-01")
26 |
27 | getSymbols("^GSPC",src="yahoo",from=startDate,to=endDate)
28 |
29 | relativeStrengthIndex3 <- RSI(Op(GSPC),n=3)
30 |
31 | summary(relativeStrengthIndex3)
32 |
33 | exponentialMovingAverage5 <- EMA(Op(GSPC),n=5)
34 |
35 |
36 | head(exponentialMovingAverage5)
37 |
38 | summary(exponentialMovingAverage5)
39 |
40 | exponentialMovingAverageDiff <- Op(GSPC) - exponentialMovingAverage5
41 |
42 | head(exponentialMovingAverageDiff)
43 |
44 | summary(exponentialMovingAverageDiff)
45 |
46 |
47 | MACD <- MACD(Op(GSPC),fast = 12, slow = 26, signal = 9)
48 |
49 | tail(MACD)
50 |
51 |
52 | summary(MACD)
53 |
54 | MACDsignal <- MACD[,2]
55 |
56 | BollingerBands <- BBands(Op(GSPC),n=20,sd=2)
57 |
58 | tail(BollingerBands)
59 |
60 | summary(BollingerBands)
61 |
62 | PercentageChngpctB <- BollingerBands[,4]
63 |
64 |
65 |
66 | tail(PercentageChngpctB)
67 |
68 | summary(PercentageChngpctB)
69 |
70 |
71 | Price <- Cl(GSPC)-Op(GSPC)
72 |
73 | tail(Price)
74 |
75 | DataSet<-data.frame(relativeStrengthIndex3, exponentialMovingAverage5, MACDsignal, PercentageChngpctB, Price)
76 |
77 | str(DataSet)
78 |
79 | DataSet<-DataSet[-c(1:33),]
80 |
81 | dim(DataSet)
82 |
83 | colnames(DataSet)<-c("RSI3","EMAcross","MACDsignal","BollingerB","Price")
84 |
85 | str(DataSet)
86 |
87 | Normalized <-function(x) {(x-min(x))/(max(x)-min(x))}
88 |
89 | NormalizedData<-as.data.frame(lapply(DataSet,Normalized))
90 |
91 | tail(NormalizedData)
92 |
93 | TrainingSet<-NormalizedData[1:816,]
94 |
95 | dim(TrainingSet)
96 |
97 | summary(TrainingSet)
98 |
99 |
100 | TestSet<-NormalizedData[817:1225 ,]
101 |
102 | dim(TestSet)
103 |
104 | summary(TestSet)
105 |
106 | nn1<-neuralnet(Price~RSI3+EMAcross+MACDsignal+BollingerB,data=TrainingSet, hidden=c(3,3), learningrate=.001,algorithm="backprop")
107 |
108 |
109 | plot(nn1)
--------------------------------------------------------------------------------
/Chapter 04/2Dimension Reduction Methods - Delta's aircraft fleet.r:
--------------------------------------------------------------------------------
1 | R version 3.3.2 (2016-10-31) -- "Sincere Pumpkin Patch"
2 | Copyright (C) 2016 The R Foundation for Statistical Computing
3 | Platform: x86_64-w64-mingw32/x64 (64-bit)
4 |
5 | R is free software and comes with ABSOLUTELY NO WARRANTY.
6 | You are welcome to redistribute it under certain conditions.
7 | Type 'license()' or 'licence()' for distribution details.
8 |
9 | R is a collaborative project with many contributors.
10 | Type 'contributors()' for more information and
11 | 'citation()' on how to cite R or R packages in publications.
12 |
13 | Type 'demo()' for some demos, 'help()' for on-line help, or
14 | 'help.start()' for an HTML browser interface to help.
15 | Type 'q()' to quit R
16 |
17 | install.packages("rgl")
18 | install.packages("RColorBrewer")
19 | install.packages("scales")
20 |
21 |
22 | library(rgl)
23 | library(RColorBrewer)
24 | library(scales)
25 |
26 | delta <- read.csv(file="d:/delta.csv", header=T, sep=",", row.names=1)
27 |
28 | str(delta)
29 |
30 | plot(delta[,16:22], main = "Aircraft Physical Characteristics", col = "red")
31 |
32 | principal_comp_analysis <- princomp(delta)
33 |
34 | principal_comp_analysis
35 |
36 | plot(principal_comp_analysis, main ="Principal Components Analysis of Raw Data", col ="blue")
37 |
38 |
39 | loadings(principal_comp_analysis)
40 |
41 | mar <- par()$mar
42 | par(mar=mar+c(0,5,0,0))
43 |
44 | barplot(sapply(delta, var), horiz=T, las=1, cex.names=0.8, main = "Regular Scaling of Variance", col = "Red", xlab = "Variance")
45 |
46 |
47 | barplot(sapply(delta, var), horiz=T, las=1, cex.names=0.8, log='x', main = "Logarithmic Scaling of Variance", col = "Blue", xlab = "Variance")
48 |
49 |
50 | par(mar=mar)
51 |
52 |
53 | delta2 <- data.frame(scale(delta))
54 |
55 |
56 | plot(sapply(delta2, var), main = "Variances Across Different Variables", ylab = "Variances")
57 |
58 | principal_comp_analysis <- princomp(delta2)
59 |
60 |
61 |
62 | plot(principal_comp_analysis, main ="Principal Components Analysis of Scaled Data", col ="red")
63 |
64 |
65 |
66 | plot(principal_comp_analysis, type='l', main ="Principal Components Analysis of Scaled Data")
67 |
68 |
69 |
70 | summary(principal_comp_analysis)
71 |
72 |
73 | principal_comp_vectors <- prcomp(delta2)
74 |
75 | comp <- data.frame(principal_comp_vectors$x[,1:4])
76 |
77 | k_means <- kmeans(comp, 4, nstart=25, iter.max=1000)
78 |
79 |
80 |
81 | palette(alpha(brewer.pal(9,'Set1'), 0.5))
82 |
83 | plot(comp, col=k_means$clust, pch=16)
84 |
85 |
86 |
87 | plot3d(comp$PC1, comp$PC2, comp$PC3, col=k_means$clust)
88 |
89 |
90 | plot3d(comp$PC1, comp$PC3, comp$PC4, col=k_means$clust)
91 |
92 | sort(table(k_means$clust))
93 |
94 |
95 | clust <- names(sort(table(k_means$clust)))
96 |
97 | row.names(delta[k_means$clust==clust[1],])
98 |
99 | row.names(delta[k_means$clust==clust[2],])
100 |
101 |
102 | row.names(delta[k_means$clust==clust[3],])
103 |
104 | row.names(delta[k_means$clust==clust[4],])
105 |
--------------------------------------------------------------------------------
/Chapter 10/2Measuring Unemployment Rate.r:
--------------------------------------------------------------------------------
1 | R version 3.3.2 (2016-10-31) -- "Sincere Pumpkin Patch"
2 | Copyright (C) 2016 The R Foundation for Statistical Computing
3 | Platform: x86_64-w64-mingw32/x64 (64-bit)
4 |
5 | R is free software and comes with ABSOLUTELY NO WARRANTY.
6 | You are welcome to redistribute it under certain conditions.
7 | Type 'license()' or 'licence()' for distribution details.
8 |
9 | R is a collaborative project with many contributors.
10 | Type 'contributors()' for more information and
11 | 'citation()' on how to cite R or R packages in publications.
12 |
13 | Type 'demo()' for some demos, 'help()' for on-line help, or
14 | 'help.start()' for an HTML browser interface to help.
15 | Type 'q()' to quit R.
16 |
17 | library(forecast)
18 | library(lmtest)
19 | library(caret)
20 |
21 | ud <- read.csv("d:/FRED-WIUR.csv", colClasses=c('Date'='Date'))
22 |
23 |
24 | tail(ud)
25 |
26 | colnames(ud) <- c('date', 'rate')
27 |
28 | ud$date <- as.Date(ud$date)
29 |
30 | summary(ud)
31 |
32 | ud.b <- ud[1:436,]
33 |
34 |
35 | summary(ud.b)
36 |
37 | ud.p <- ud[437:448,]
38 |
39 | summary(ud.p)
40 |
41 | ud.ts <- ts(ud.b$rate, start=c(1976, 1), frequency=12)
42 |
43 | ud.ts
44 |
45 | ud.p.ts<-ts(ud.p$rate, start=c(2012, 5), frequency=12)
46 |
47 | ud.p.ts
48 |
49 | plot.ts(ud.ts)
50 |
51 |
52 | plot.ts(ud.p.ts)
53 |
54 |
55 | mean <- meanf(ud.ts, 12)
56 |
57 | forecast_randomwalk <- rwf(ud.ts, 12)
58 |
59 | forecast_arima <- snaive(ud.ts, 12)
60 |
61 | drift <- rwf(ud.ts, 12, drift=T)
62 |
63 |
64 | m1<-tslm(ud.ts~trend)
65 |
66 | m2<-tslm(ud.ts~trend+season)
67 |
68 | residual_1 <- residuals(m1)
69 |
70 | par(mfrow=c(1,2))
71 |
72 | plot(residual_1, ylab="Residuals",xlab="Year", title("Residual - Trends"), col = "red")
73 |
74 | acf(residual_1, main="ACF of residuals")
75 |
76 |
77 | residual_2 <- residuals(m2)
78 |
79 | par(mfrow=c(1,2))
80 |
81 | plot(residual_2, ylab="Residuals",xlab="Year",title("Residual - Trends + Seasonality"), col = "red")
82 |
83 | acf(residual_2, main="ACF of residuals")
84 |
85 |
86 | dwtest(m1, alt="two.sided")
87 |
88 | m3 <- stl(ud.ts, s.window='periodic')
89 |
90 | plot(m3)
91 |
92 | m4<-ets(ud.ts, model='ZZZ')
93 |
94 | plot(m4)
95 |
96 | m5 <- auto.arima(ud.ts)
97 |
98 | plot(forecast(m5, h=12))
99 |
100 | m6<-nnetar(ud.ts)
101 |
102 | m6
103 |
104 | plot(forecast(m6, h=12))
105 |
106 |
107 | a1 <- accuracy(mean, ud.p.ts)
108 |
109 | a2 <- accuracy(forecast_randomwalk, ud.p.ts)
110 |
111 | a3<-accuracy(forecast_arima, ud.p.ts)
112 |
113 | a4<-accuracy(drift, ud.p.ts)
114 |
115 | a.table<-rbind(a1, a2, a3, a4)
116 |
117 | a.table
118 |
119 | f1<-forecast(m1, h=12)
120 |
121 | f2<-forecast(m2, h=12)
122 |
123 | f3<-forecast(m3, h=12)
124 |
125 | f4<-forecast(m4, h=12)
126 |
127 | f5<-forecast(m5, h=12)
128 |
129 | f6<-forecast(m6, h=12)
130 |
131 | a5 <- accuracy(f1, ud.p.ts)
132 |
133 | a6 <- accuracy(f2, ud.p.ts)
134 |
135 | a7 <- accuracy(f3, ud.p.ts)
136 |
137 | a8 <- accuracy(f4, ud.p.ts)
138 |
139 | a9 <- accuracy(f5, ud.p.ts)
140 |
141 | a10 <- accuracy(f6, ud.p.ts)
142 |
143 | a.table.1 <- rbind(a5, a6, a7, a8, a9, a10)
144 |
145 | a.table.1
146 |
--------------------------------------------------------------------------------
/Chapter 06/4Naive Bayes - Predicting the Direction of Stock Movement.r:
--------------------------------------------------------------------------------
1 | R version 3.3.0 (2016-05-03) -- "Supposedly Educational"
2 | Copyright (C) 2016 The R Foundation for Statistical Computing
3 | Platform: x86_64-w64-mingw32/x64 (64-bit)
4 |
5 | R is free software and comes with ABSOLUTELY NO WARRANTY.
6 | You are welcome to redistribute it under certain conditions.
7 | Type 'license()' or 'licence()' for distribution details.
8 |
9 | R is a collaborative project with many contributors.
10 | Type 'contributors()' for more information and
11 | 'citation()' on how to cite R or R packages in publications.
12 |
13 | Type 'demo()' for some demos, 'help()' for on-line help, or
14 | 'help.start()' for an HTML browser interface to help.
15 | Type 'q()' to quit R.
16 |
17 | [Workspace loaded from ~/.RData]
18 |
19 | install.packages("quantmod")
20 | library(quantmod)
21 | install.packages("lubridate")
22 | library(lubridate)
23 | install.packages("e1071")
24 | library(e1071)
25 |
26 | startDate = as.Date("2012-01-01")
27 |
28 | endDate = as.Date("2014-01-01")
29 |
30 | getSymbols("AAPL", env = .GlobalEnv, src = "yahoo", from = startDate, to = endDate)
31 |
32 | weekDays <- wday(AAPL, label=TRUE)
33 |
34 | head(weekDays)
35 |
36 | changeInPrices <- Cl(AAPL) - Op(AAPL)
37 |
38 | head(changeInPrices)
39 |
40 | summary(changeInPrices)
41 |
42 | dim(changeInPrices)
43 |
44 | binaryClassification <- ifelse(changeInPrices>0,"UP","DOWN")
45 |
46 | binaryClassification
47 |
48 | summary(binaryClassification)
49 |
50 | AAPLDataSet <- data.frame(weekDays,binaryClassification)
51 |
52 | AAPLDataSet
53 |
54 | head(AAPLDataSet)
55 |
56 | dim(AAPLDataSet)
57 |
58 | NaiveBayesclassifier <- naiveBayes(AAPLDataSet[,1], AAPLDataSet[,2])
59 |
60 | NaiveBayesclassifier
61 |
62 | exponentialMovingAverage5 <- EMA(Op(AAPL),n = 5)
63 |
64 | exponentialMovingAverage5
65 |
66 | summary(exponentialMovingAverage5)
67 |
68 | exponentialMovingAverage10 <-EMA(Op(AAPL),n = 10)
69 |
70 | exponentialMovingAverage10
71 |
72 | summary(exponentialMovingAverage10)
73 |
74 | dim(exponentialMovingAverage10)
75 |
76 | exponentialMovingAverageDiff <- exponentialMovingAverage5 - exponentialMovingAverage10
77 |
78 | exponentialMovingAverageDiff
79 |
80 | summary(exponentialMovingAverageDiff)
81 |
82 | exponentialMovingAverageDiffRound <- round(exponentialMovingAverageDiff, 2)
83 |
84 | summary(exponentialMovingAverageDiffRound)
85 |
86 | AAPLDataSetNew <- data.frame(weekDays,exponentialMovingAverageDiffRound, binaryClassification)
87 |
88 | AAPLDataSetNew
89 |
90 | summary(AAPLDataSetNew)
91 |
92 | AAPLDataSetNew <- AAPLDataSetNew[-c(1:10),]
93 |
94 | AAPLDataSetNew
95 |
96 | summary(AAPLDataSetNew)
97 |
98 | dim(AAPLDataSetNew)
99 |
100 | trainingDataSet <- AAPLDataSetNew[1:328,]
101 |
102 | dim(trainingDataSet)
103 |
104 | summary(trainingDataSet)
105 |
106 | TestDataSet <- AAPLDataSetNew[329:492,]
107 |
108 | dim(TestDataSet)
109 |
110 | summary(TestDataSet)
111 |
112 | exponentialMovingAverageDiffRoundModel <- naiveBayes(trainingDataSet[,1:2],trainingDataSet[,3])
113 |
114 | exponentialMovingAverageDiffRoundModel
115 |
116 | table(predict(exponentialMovingAverageDiffRoundModel,TestDataSet),TestDataSet[,3],dnn=list('Predicted','Actual'))
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | #Practical Machine Learning Cookbook
5 | This is the code repository for [Practical Machine Learning Cookbook](https://www.packtpub.com/big-data-and-business-intelligence/practical-machine-learning-cookbook?utm_source=github&utm_medium=repository&utm_campaign=9781785280511), published by [Packt](https://www.packtpub.com/?utm_source=github). It contains all the supporting project files necessary to work through the book from start to finish.
6 | ## About the Book
7 | Data in today’s world is the new black gold which is growing exponentially. This growth can be attributed to the growth of existing data, and new data in a structured and unstructured format from multiple sources such as social media, Internet, documents and the Internet of Things. The flow of data must be collected, processed, analyzed, and finally presented in real time to ensure that the consumers of the data are able to take informed decisions in today’s fast-changing environment. Machine learning techniques are applied to the data using the context of the problem to be solved to ensure that fast arriving and complex data can be analyzed in a scientific manner using statistical techniques. Using machine learning algorithms that iteratively learn from data, hidden patterns can be discovered. The iterative aspect of machine learning is important because as models are exposed to new data, they are able to independently adapt and learn to produce reliable decisions from new data sets.
8 | ##Instructions and Navigation
9 | All of the code is organized into folders. Each folder starts with a number followed by the application name. For example, Chapter02.
10 |
11 | Chapter 1 covers overviews of concepts and does not contain code.
12 | The required support files can be found in the Data folder within the chapter folders.
13 |
14 | The code will look like the following:
15 | ```
16 | install.packages("ggplot2")
17 | ```
18 |
19 | This book is focused on building machine learning-based applications in R. We have used R to build various solutions. We focused on how to utilize various R libraries and functions in the best possible way to overcome real-world challenges. We have tried to keep all the code as friendly and readable as possible. We feel that this will enable our readers to easily understand the code and readily use it in different scenarios.
20 |
21 | ## Related Products
22 | * [Python Machine Learning](https://www.packtpub.com/big-data-and-business-intelligence/python-machine-learning?utm_source=github&utm_medium=repository&utm_campaign=9781783555130)
23 |
24 | * [Practical Data Analysis - Second Edition](https://www.packtpub.com/big-data-and-business-intelligence/practical-data-analysis-second-edition?utm_source=github&utm_medium=repository&utm_campaign=9781785289712)
25 |
26 | * [Practical Machine Learning](https://www.packtpub.com/big-data-and-business-intelligence/practical-machine-learning?utm_source=github&utm_medium=repository&utm_campaign=9781784399689)
27 |
28 | ### Suggestions and Feedback
29 | [Click here](https://docs.google.com/forms/d/e/1FAIpQLSe5qwunkGf6PUvzPirPDtuy1Du5Rlzew23UBp2S-P3wB-GcwQ/viewform) if you have any feedback or suggestions.
30 | ### Download a free PDF
31 |
32 | If you have already purchased a print or Kindle version of this book, you can get a DRM-free PDF version at no cost.
Simply click on the link to claim your free PDF.
33 |
https://packt.link/free-ebook/9781785280511
--------------------------------------------------------------------------------
/Chapter 06/3Decision tree learning - Predicting the Direction of Stock Movement.r:
--------------------------------------------------------------------------------
1 | R version 3.3.0 (2016-05-03) -- "Supposedly Educational"
2 | Copyright (C) 2016 The R Foundation for Statistical Computing
3 | Platform: x86_64-w64-mingw32/x64 (64-bit)
4 |
5 | R is free software and comes with ABSOLUTELY NO WARRANTY.
6 | You are welcome to redistribute it under certain conditions.
7 | Type 'license()' or 'licence()' for distribution details.
8 |
9 | R is a collaborative project with many contributors.
10 | Type 'contributors()' for more information and
11 | 'citation()' on how to cite R or R packages in publications.
12 |
13 | Type 'demo()' for some demos, 'help()' for on-line help, or
14 | 'help.start()' for an HTML browser interface to help.
15 | Type 'q()' to quit R.
16 |
17 | [Workspace loaded from ~/.RData]
18 |
19 | install.packages("quantmod")
20 | library("quantmod")
21 |
22 | install.packages("rpart")
23 | library("rpart")
24 |
25 | install.packages("rpart.plot")
26 | library("rpart.plot")
27 |
28 |
29 | startDate = as.Date("2012-01-01")
30 |
31 | endDate = as.Date("2014-01-01")
32 |
33 | getSymbols("BAC", env = .GlobalEnv, src = "yahoo", from = startDate, to = endDate)
34 |
35 | relativeStrengthIndex3<-RSI(Op(BAC), n= 3)
36 |
37 | relativeStrengthIndex3
38 |
39 | exponentialMovingAverage5<-EMA(Op(BAC),n=5)
40 |
41 | exponentialMovingAverage5
42 |
43 | dim(exponentialMovingAverage5)
44 |
45 | str(exponentialMovingAverage5)
46 |
47 | exponentialMovingAverageDiff<- Op(BAC)-exponentialMovingAverage5
48 |
49 | exponentialMovingAverageDiff
50 |
51 | MACD<-MACD(Op(BAC),fast = 12, slow = 26, signal = 9)
52 |
53 | MACD
54 |
55 | head(MACD)
56 |
57 | MACDsignal<-MACD[,2]
58 |
59 | MACDsignal
60 |
61 | stochasticOscillator <- SMI(Op(BAC),n=13,slow=25,fast=2,signal=9)
62 |
63 |
64 | stochasticOscillator
65 |
66 |
67 | stochasticOscillatorSignal <- stochasticOscillator[,1]
68 |
69 | stochasticOscillatorSignal
70 |
71 |
72 | PriceChange <- Cl(BAC) - Op(BAC)
73 |
74 | PriceChange
75 |
76 | binaryClassification <- ifelse(PriceChange>0,"UP","DOWN")
77 |
78 | binaryClassification
79 |
80 | str(binaryClassification)
81 |
82 |
83 | DataSet<-data.frame(relativeStrengthIndex3, exponentialMovingAverageDiff, MACDsignal, stochasticOscillator, binaryClassification)
84 |
85 | DataSet
86 |
87 |
88 | head(DataSet)
89 |
90 | str(DataSet)
91 |
92 | colnames(DataSet) <- c("relativeStrengthIndex3", "exponentialMovingAverageDiff", "MACDsignal", "stochasticOscillator", "binaryClassification")
93 |
94 |
95 | colnames(DataSet)
96 |
97 | DataSet<-DataSet[-c(1:33),]
98 |
99 |
100 | DataSet
101 |
102 |
103 | head(DataSet)
104 |
105 | str(DataSet)
106 |
107 | dim(DataSet)
108 |
109 | TrainingDataSet <- DataSet[1:312,]
110 | TrainingDataSet
111 |
112 | str(TrainingDataSet)
113 |
114 | TestDataSet <- DataSet[313:469,]
115 |
116 | TestDataSet
117 |
118 | str(TestDataSet)
119 |
120 | DecisionTree<-rpart(binaryClassification~relativeStrengthIndex3+exponentialMovingAverageDiff+MACDsignal+stochasticOscillator,data=TrainingDataSet, cp=.001)
121 |
122 |
123 | prp(DecisionTree,type=2)
124 |
125 | printcp(DecisionTree)
126 |
127 | plotcp(DecisionTree,upper="splits")
128 |
129 | PrunedDecisionTree<-prune(DecisionTree,cp=0.041428)
130 |
131 | prp(PrunedDecisionTree, type=4)
132 |
133 | table(predict(PrunedDecisionTree,TestDataSet),TestDataSet[,5],dnn=list('predicted','actual'))
134 |
--------------------------------------------------------------------------------
/Chapter 06/5Random Forest - Currency Trading Strategy.r:
--------------------------------------------------------------------------------
1 | R version 3.3.0 (2016-05-03) -- "Supposedly Educational"
2 | Copyright (C) 2016 The R Foundation for Statistical Computing
3 | Platform: x86_64-w64-mingw32/x64 (64-bit)
4 |
5 | R is free software and comes with ABSOLUTELY NO WARRANTY.
6 | You are welcome to redistribute it under certain conditions.
7 | Type 'license()' or 'licence()' for distribution details.
8 |
9 | R is a collaborative project with many contributors.
10 | Type 'contributors()' for more information and
11 | 'citation()' on how to cite R or R packages in publications.
12 |
13 | Type 'demo()' for some demos, 'help()' for on-line help, or
14 | 'help.start()' for an HTML browser interface to help.
15 | Type 'q()' to quit R.
16 |
17 | install.packages("quantmod")
18 | library(quantmod)
19 | install.packages("randomForest")
20 | library(randomForest)
21 | install.packages("Hmisc")
22 | library(describe)
23 |
24 | PoundDollar <- read.csv("d:/PoundDollar.csv")
25 |
26 | head(PoundDollar)
27 |
28 | summary(PoundDollar)
29 |
30 | dim(PoundDollar)
31 |
32 | DateAndTime <- as.POSIXlt(PoundDollar[,2],format="%m/%d/%y %H:%M")
33 |
34 | HighLowClose <- PoundDollar[,4:6]
35 |
36 | head(HighLowClose)
37 |
38 | summary(HighLowClose)
39 |
40 | str(HighLowClose)
41 |
42 | HighLowClosets <- data.frame(HighLowClose,row.names=DateAndTime)
43 |
44 | describe(HighLowClosets)
45 |
46 | HighLowClosexts <- as.xts(HighLowClosets)
47 |
48 | BollingerBands <- BBands(HighLowClosexts,n=20,SMA,sd=2)
49 |
50 | describe(BollingerBands)
51 |
52 | Upper <- BollingerBands$up - HighLowClosexts$Close
53 |
54 | summary(Upper)
55 |
56 | Lower <- BollingerBands$dn - HighLowClosexts$Close
57 |
58 | summary(Lower)
59 |
60 | Middle <- BollingerBands$mavg - HighLowClosexts$Close
61 |
62 | summary(Middle)
63 |
64 | PercentageChngpctB <- Delt(BollingerBands$pctB,k=1)
65 |
66 | describe(PercentageChngpctB)
67 |
68 | PercentageChngUp <-Delt(Upper,k=1)
69 |
70 | describe(PercentageChngUp)
71 |
72 | PercentageChngLow <- Delt(Lower,k=1)
73 |
74 | describe(PercentageChngLow)
75 |
76 | PercentageChngMid <- Delt(Middle,k=1)
77 |
78 | describe(PercentageChngMid)
79 |
80 | Returns <- Delt(HighLowClosexts$Close,k=1)
81 |
82 | binaryClassification <- ifelse(Returns>0,"Up","Down")
83 |
84 |
85 | summary(binaryClassification)
86 |
87 | ClassShifted <- binaryClassification[-1]
88 |
89 | FeaturesCombined <- data.frame(Upper, Lower, Middle, BollingerBands$pctB, PercentageChngpctB, PercentageChngUp, PercentageChngLow, PercentageChngMid)
90 |
91 | summary(FeaturesCombined)
92 |
93 | FeaturesShifted <- FeaturesCombined[-5257,]
94 |
95 | FeaturesClassData <- data.frame(FeaturesShifted,ClassShifted)
96 |
97 | summary(FeaturesClassData)
98 |
99 | FinalModelData <- FeaturesClassData[-c(1:20),]
100 |
101 | colnames(FinalModelData) <- c("pctB","LowDiff","UpDiff","MidDiff","PercentageChngpctB","PercentageChngUp","PercentageChngLow","PercentageChngMid","binaryClassification")
102 |
103 | str(FinalModelData)
104 |
105 | set.seed(1)
106 |
107 | FeatureNumber <- tuneRF(FinalModelData[,-9],FinalModelData[,9],ntreeTry=100, stepFactor=1.5,improve=0.01, trace=TRUE, plot=TRUE, dobest=FALSE)
108 |
109 | RandomForest <- randomForest(binaryClassification~.,data=FinalModelData,mtry=2,ntree=2000,keep.forest=TRUE,importance=TRUE)
110 |
111 | varImpPlot(RandomForest, main = 'Random Forest: Measurement of Importance of Each Feature',pch=16,col='blue' )
112 |
113 |
114 |
115 |
116 |
--------------------------------------------------------------------------------
/Chapter 08/Data/StocksRegimeSwitching.csv:
--------------------------------------------------------------------------------
1 | DATE,LRY, LRV ,INT,LRC,LVS,LGS
2 | 1997Q3, 11.49 , 6.74 , 10.73 , 13.35 , 4.95 , 4.90
3 | 1997Q4, 11.51 , 12.97 , 11.09 , 13.33 , 4.63 , 4.43
4 | 1998Q1, 11.44 , 13.41 , 12.45 , 13.30 , 4.36 , 3.99
5 | 1998Q2, 11.42 , 4.26 , 13.51 , 13.31 , 4.55 , 4.20
6 | 1998Q3, 11.39 , 33.71 , 12.55 , 13.28 , 4.05 , 3.83
7 | 1998Q4, 11.39 , 3.54 , 10.00 , 13.31 , 3.90 , 3.36
8 | 1999Q1, 11.43 , 2.64 , 9.59 , 13.31 , 4.49 , 3.83
9 | 1999Q2, 11.47 , 3.53 , 8.73 , 13.29 , 4.40 , 3.70
10 | 1999Q3, 11.47 , 3.93 , 8.10 , 13.28 , 4.91 , 4.15
11 | 1999Q4, 11.50 , 1.34 , 7.82 , 13.26 , 4.63 , 3.93
12 | 2000Q1, 11.54 , 2.04 , 7.77 , 13.23 , 4.78 , 4.19
13 | 2000Q2, 11.55 , 2.25 , 7.73 , 13.23 , 4.95 , 4.30
14 | 2000Q3, 11.56 , 1.49 , 7.63 , 13.22 , 4.75 , 4.06
15 | 2000Q4, 11.57 , 1.34 , 7.54 , 13.23 , 4.58 , 3.89
16 | 2001Q1, 11.55 , 0.67 , 7.36 , 13.26 , 4.55 , 3.90
17 | 2001Q2, 11.55 , 2.52 , 7.25 , 13.28 , 4.52 , 3.89
18 | 2001Q3, 11.55 , 2.22 , 7.10 , 13.30 , 4.52 , 3.67
19 | 2001Q4, 11.57 , 0.67 , 6.80 , 13.32 , 4.60 , 3.74
20 | 2002Q1, 11.58 , 0.61 , 6.64 , 13.32 , 4.69 , 3.82
21 | 2002Q2, 11.60 , 0.63 , 6.61 , 13.30 , 4.84 , 3.87
22 | 2002Q3, 11.62 , 0.73 , 6.63 , 13.30 , 4.83 , 3.83
23 | 2002Q4, 11.64 , 0.54 , 6.55 , 13.31 , 4.68 , 3.71
24 | 2003Q1, 11.64 , 0.58 , 6.53 , 13.31 , 4.65 , 3.66
25 | 2003Q2, 11.66 , 0.41 , 6.34 , 13.34 , 4.63 , 3.70
26 | 2003Q3, 11.67 , 0.41 , 6.19 , 13.36 , 4.75 , 3.81
27 | 2003Q4, 11.70 , 0.62 , 6.13 , 13.35 , 4.80 , 3.81
28 | 2004Q1, 11.72 , 0.66 , 6.12 , 13.33 , 4.83 , 3.87
29 | 2004Q2, 11.73 , 0.71 , 6.05 , 13.34 , 4.93 , 3.99
30 | 2004Q3, 11.73 , 0.31 , 6.03 , 13.33 , 4.81 , 3.89
31 | 2004Q4, 11.75 , 0.32 , 5.99 , 13.33 , 4.83 , 3.90
32 | 2005Q1, 11.81 , 0.24 , 5.94 , 13.35 , 4.89 , 3.96
33 | 2005Q2, 11.81 , 0.32 , 5.94 , 13.34 , 4.85 , 3.88
34 | 2005Q3, 11.82 , 0.23 , 5.90 , 13.33 , 4.83 , 3.86
35 | 2005Q4, 11.84 , 0.11 , 6.02 , 13.35 , 4.87 , 3.89
36 | 2006Q1, 11.87 , 0.16 , 6.24 , 13.36 , 4.84 , 3.85
37 | 2006Q2, 11.86 , 0.28 , 6.51 , 13.36 , 4.87 , 3.90
38 | 2006Q3, 11.87 , 0.18 , 6.65 , 13.36 , 4.83 , 3.88
39 | 2006Q4, 11.89 , 0.45 , 6.58 , 13.39 , 4.90 , 3.94
40 | 2007Q1, 11.92 , 1.58 , 6.55 , 13.40 , 5.02 , 4.09
41 | 2007Q2, 11.92 , 0.47 , 6.46 , 13.39 , 5.13 , 4.20
42 | 2007Q3, 11.94 , 1.54 , 6.33 , 13.41 , 5.20 , 4.26
43 | 2007Q4, 11.96 , 0.59 , 6.29 , 13.36 , 5.16 , 4.20
44 | 2008Q1, 11.99 , 3.35 , 6.25 , 13.37 , 5.19 , 4.23
45 | 2008Q2, 11.99 , 0.61 , 6.13 , 13.36 , 5.03 , 4.02
46 | 2008Q3, 11.99 , 1.15 , 5.99 , 13.38 , 4.91 , 3.90
47 | 2008Q4, 11.96 , 2.14 , 5.95 , 13.46 , 4.88 , 3.79
48 | 2009Q1, 11.93 , 0.88 , 5.47 , 13.51 , 4.85 , 3.66
49 | 2009Q2, 11.95 , 1.03 , 5.06 , 13.53 , 4.90 , 3.70
50 | 2009Q3, 11.98 , 0.43 , 4.92 , 13.54 , 5.05 , 3.87
51 | 2009Q4, 12.00 , 0.19 , 4.88 , 13.53 , 5.13 , 3.92
52 | 2010Q1, 12.02 , 0.32 , 4.89 , 13.54 , 5.17 , 3.96
53 | 2010Q2, 12.04 , 0.40 , 5.00 , 13.57 , 5.22 , 4.02
54 | 2010Q3, 12.04 , 0.23 , 5.20 , 13.58 , 5.17 , 3.98
55 | 2010Q4, 12.05 , 0.20 , 4.92 , 13.59 , 5.25 , 4.10
56 | 2011Q1, 12.07 , 0.38 , 4.95 , 13.58 , 5.27 , 4.13
57 | 2011Q2, 12.08 , 0.18 , 4.95 , 13.62 , 5.28 , 4.14
58 | 2011Q3, 12.10 , 0.77 , 4.89 , 13.64 , 5.31 , 4.16
59 | 2011Q4, 12.10 , 0.65 , 4.87 , 13.67 , 5.17 , 3.97
60 | 2012Q1, 12.12 , 0.20 , 4.83 , 13.67 , 5.24 , 4.09
61 | 2012Q2, 12.14 , 0.23 , 4.85 , 13.72 , 5.29 , 4.14
62 | 2012Q3, 12.15 , 0.17 , 4.73 , 13.76 , 5.31 , 4.12
63 | 2012Q4, 12.17 , 0.13 , 4.73 , 13.79 , 5.31 , 4.15
64 | 2013Q1, 12.16 , 0.26 , 4.70 , 13.80 , 5.30 , 4.17
65 | 2013Q2, 12.18 , 0.49 , 4.68 , 13.83 , 5.34 , 4.19
66 | 2013Q3, 12.20 , 0.33 , 4.52 , 13.84 , 5.40 , 4.23
67 | 2013Q4, 12.21 , 0.12 , 4.55 , 13.85 , 5.37 , 4.19
--------------------------------------------------------------------------------
/Chapter 08/4Continuous Time Markov Chains - Vehicle Service at Gas Station.r:
--------------------------------------------------------------------------------
1 | R version 3.2.2 (2015-08-14) -- "Fire Safety"
2 | Copyright (C) 2015 The R Foundation for Statistical Computing
3 | Platform: x86_64-w64-mingw32/x64 (64-bit)
4 |
5 | R is free software and comes with ABSOLUTELY NO WARRANTY.
6 | You are welcome to redistribute it under certain conditions.
7 | Type 'license()' or 'licence()' for distribution details.
8 |
9 | R is a collaborative project with many contributors.
10 | Type 'contributors()' for more information and
11 | 'citation()' on how to cite R or R packages in publications.
12 |
13 | Type 'demo()' for some demos, 'help()' for on-line help, or
14 | 'help.start()' for an HTML browser interface to help.
15 | Type 'q()' to quit R.
16 |
17 | [Workspace loaded from ~/.RData]
18 |
19 | install.packages("simmer")
20 | library(simmer)
21 | library(ggplot2)
22 |
23 | ArrivalRate <- 3/20
24 |
25 | ArrivalRate
26 |
27 | ServiceRate <- c(1/8, 1/3)
28 |
29 | ServiceRate
30 |
31 | p <- 0.75
32 |
33 | TransitionMatrix <- matrix(c(1, ServiceRate[1], 0,
34 | 1, -ArrivalRate, (1-p)*ArrivalRate,
35 | 1, ServiceRate[2], -ServiceRate[2]), byrow=T, ncol=3)
36 |
37 | TransitionMatrix
38 |
39 | B <- c(1, 0, 0)
40 |
41 | P <- solve(t(A), B)
42 |
43 | P
44 |
45 | Resolution <- sum(P * c(1, 0, 1))
46 |
47 | Resolution
48 |
49 | set.seed(1234)
50 |
51 | option.1 <- function(t) {
52 | car <- create_trajectory() %>%
53 | seize("pump", amount=1) %>%
54 | timeout(function() rexp(1, ServiceRate[1])) %>%
55 | release("pump", amount=1)
56 |
57 | motorcycle <- create_trajectory() %>%
58 | seize("pump", amount=1) %>%
59 | timeout(function() rexp(1, ServiceRate[2])) %>%
60 | release("pump", amount=1)
61 |
62 | simmer() %>%
63 | add_resource("pump", capacity=1, queue_size=0) %>%
64 | add_generator("car", car, function() rexp(1, p*ArrivalRate)) %>%
65 | add_generator("motorcycle", motorcycle, function() rexp(1, (1-p)*ArrivalRate)) %>%
66 | run(until=t)
67 | }
68 |
69 |
70 |
71 | option.2 <- function(t) {
72 | vehicle <- create_trajectory() %>%
73 | seize("pump", amount=1) %>%
74 | branch(function() sample(c(1, 2), 1, prob=c(p, 1-p)), merge=c(T, T),
75 | create_trajectory("car") %>%
76 | timeout(function() rexp(1, ServiceRate[1])),
77 | create_trajectory("motorcycle") %>%
78 | timeout(function() rexp(1, ServiceRate[2]))) %>%
79 | release("pump", amount=1)
80 |
81 | simmer() %>%
82 | add_resource("pump", capacity=1, queue_size=0) %>%
83 | add_generator("vehicle", vehicle, function() rexp(1, ArrivalRate)) %>%
84 | run(until=t)
85 | }
86 |
87 | option.3 <- function(t) {
88 | vehicle <- create_trajectory() %>%
89 | seize("pump", amount=1) %>%
90 | timeout(function() {
91 | if (runif(1) < p) rexp(1, ServiceRate[1])
92 | else rexp(1, ServiceRate[2])
93 | }) %>%
94 | release("pump", amount=1)
95 |
96 | simmer() %>%
97 | add_resource("pump", capacity=1, queue_size=0) %>%
98 | add_generator("vehicle", vehicle, function() rexp(1, ArrivalRate)) %>%
99 | run(until=t)
100 | }
101 |
102 | gas.station <- option.3(5000)
103 |
104 | graph <- plot_resource_usage(gas.station, "pump", items="system")
105 |
106 | graph + geom_hline(yintercept = Resolution)
107 |
--------------------------------------------------------------------------------
/Chapter 05/2Smooth Splines.r:
--------------------------------------------------------------------------------
1 |
2 | R version 3.3.2 (2016-10-31) -- "Sincere Pumpkin Patch"
3 | Copyright (C) 2016 The R Foundation for Statistical Computing
4 | Platform: x86_64-w64-mingw32/x64 (64-bit)
5 |
6 | R is free software and comes with ABSOLUTELY NO WARRANTY.
7 | You are welcome to redistribute it under certain conditions.
8 | Type 'license()' or 'licence()' for distribution details.
9 |
10 | R is a collaborative project with many contributors.
11 | Type 'contributors()' for more information and
12 | 'citation()' on how to cite R or R packages in publications.
13 |
14 | Type 'demo()' for some demos, 'help()' for on-line help, or
15 | 'help.start()' for an HTML browser interface to help.
16 | Type 'q()' to quit R.
17 |
18 | library(graphics)
19 | library(splines)
20 |
21 | matrx = matrix(cbind(1,.99, .99,1),nrow=2)
22 |
23 | cholsky = t(chol(matrx))
24 |
25 | nvars = dim(cholsky)[1]
26 |
27 | numobs = 1000
28 |
29 |
30 | set.seed(1)
31 |
32 | random_normal = matrix(rnorm(nvars*numobs,10,1), nrow=nvars, ncol=numobs)
33 |
34 |
35 | X = cholsky %*% random_normal
36 |
37 |
38 |
39 | newX = t(X)
40 |
41 | raw = as.data.frame(newX)
42 |
43 | head(raw)
44 |
45 | raw_original = as.data.frame(t(random_normal))
46 |
47 | names(raw) = c("response","predictor1")
48 |
49 | raw$predictor1_3 = raw$predictor1^3
50 |
51 | head(raw$predictor1_3)
52 |
53 | raw$predictor1_2 = raw$predictor1^2
54 |
55 | head(raw$predictor1_2)
56 |
57 |
58 | fit = lm(raw$response ~ raw$predictor1_3)
59 |
60 |
61 | fit
62 |
63 | plot(raw$response ~ raw$predictor1_3, pch=16, cex=.4, xlab="Predictor", ylab="Response", col ="red", main="Simulated Data with Slight Curve")
64 |
65 |
66 | abline(fit)
67 |
68 | x_axis <- with(cars, speed)
69 |
70 | y_axis <- with(cars, dist)
71 |
72 |
73 | eval_length = 50
74 |
75 | fit_loess <- loess.smooth(x_axis, y_axis, evaluation = eval_length, family="gaussian", span=.75, degree=1)
76 |
77 | fit_loess
78 |
79 | fit_loess_2 <- loess(y_axis ~ x_axis, family="gaussian", span=.75, degree=1)
80 |
81 | fit_loess_2
82 |
83 | new_x_axis = seq(min(x_axis),max(x_axis), length.out=eval_length)
84 |
85 | new_x_axis
86 |
87 | conf_int = cbind(
88 | predict(fit_loess_2, data.frame(x=new_x_axis)),
89 | predict(fit_loess_2, data.frame(x=new_x_axis))+
90 | predict(fit_loess_2, data.frame(x=new_x_axis), se=TRUE)$se.fit*qnorm(1-.05/2),
91 | predict(fit_loess_2, data.frame(x=new_x_axis))-
92 | predict(fit_loess_2, data.frame(x=new_x_axis), se=TRUE)$se.fit*qnorm(1-.05/2)
93 | )
94 |
95 |
96 |
97 |
98 | fit_lm = lm(y_axis ~ x_axis)
99 |
100 |
101 | fit_lm
102 |
103 | fit_poly = lm(y_axis ~ poly(x_axis,3) )
104 |
105 | fit_poly
106 |
107 |
108 | fit_nat_spline = lm(y_axis ~ ns(x_axis, 3) )
109 |
110 | fit_nat_spline
111 |
112 | fit_smth_spline = smooth.spline(y_axis ~ x_axis, nknots=15)
113 |
114 | fit_smth_spline
115 |
116 |
117 | plot(x_axis, y_axis, xlim=c(min(x_axis),max(x_axis)), ylim=c(min(y_axis),max(y_axis)), pch=16, cex=.5, ylab = "Stopping Distance (feet)", xlab= "Speed (MPH)", main="Comparison of Models", sub="Splines")
118 |
119 |
120 | matplot(new_x_axis, conf_int, lty = c(1,2,2), col=c(1,2,2), type = "l", add=T)
121 |
122 |
123 | lines(new_x_axis, predict(fit_lm, data.frame(x=new_x_axis)), col="red", lty=3)
124 |
125 |
126 | lines(new_x_axis, predict(fit_poly, data.frame(x=new_x_axis)), col="blue", lty=4)
127 |
128 | lines(new_x_axis, predict(fit_nat_spline, data.frame(x=new_x_axis)), col="green", lty=5)
129 |
130 | lines(fit_smth_spline, col="dark grey", lty=6)
131 |
132 | lines(ksmooth(x_axis, y_axis, "normal", bandwidth = 5), col = 'purple', lty=7)
133 |
134 |
135 |
--------------------------------------------------------------------------------
/Chapter 05/1Generalized additive model - Measuring household income of New Zealand.r:
--------------------------------------------------------------------------------
1 |
2 | R version 3.3.2 (2016-10-31) -- "Sincere Pumpkin Patch"
3 | Copyright (C) 2016 The R Foundation for Statistical Computing
4 | Platform: x86_64-w64-mingw32/x64 (64-bit)
5 |
6 | R is free software and comes with ABSOLUTELY NO WARRANTY.
7 | You are welcome to redistribute it under certain conditions.
8 | Type 'license()' or 'licence()' for distribution details.
9 |
10 | R is a collaborative project with many contributors.
11 | Type 'contributors()' for more information and
12 | 'citation()' on how to cite R or R packages in publications.
13 |
14 | Type 'demo()' for some demos, 'help()' for on-line help, or
15 | 'help.start()' for an HTML browser interface to help.
16 | Type 'q()' to quit R.
17 |
18 | devtools::install_github("ellisp/nzelect/pkg2")
19 |
20 | library(leaflet)
21 | library(nzcensus)
22 | library(Metrics)
23 | library(ggplot2)
24 | library(scales)
25 | library(boot)
26 | library(dplyr)
27 | library(Hmisc)
28 | library(mgcv)
29 | library(caret)
30 | library(grid)
31 | library(stringr)
32 | library(ggrepel)
33 | library(glmnet)
34 | library(maps)
35 |
36 | tmp <- AreaUnits2013[AreaUnits2013$WGS84Longitude 0 & !is.na(AreaUnits2013$MedianIncome2013), ]
37 |
38 | palette <- colorQuantile("RdBu", NULL, n = 10)
39 |
40 | labels <- paste0(tmp$AU_NAM, " $", format(tmp$MedianIncome2013, big.mark = ","))
41 |
42 | leaflet() %>%
43 | addProviderTiles("CartoDB.Positron") %>%
44 | addCircles(lng = tmp$WGS84Longitude, lat = tmp$WGS84Latitude,
45 | color = pal(-tmp$MedianIncome2013),
46 | popup = labs,
47 | radius = 500) %>%
48 | addLegend(
49 | pal = pal,
50 | values = -tmp$MedianIncome2013,
51 | title = "Quantile of median%
57 | select(-AU2014, -AU_NAM, -NZTM2000Easting, -NZTM2000Northing) %>%
58 | select(-PropWorked40_49hours2013, -Prop35to39_2013, -PropFemale2013)
59 |
60 | row.names(au) <- AreaUnits2013$AU_NAM
61 |
62 |
63 | names(au) <- gsub("_2013", "", names(au))
64 |
65 | names(au) <- gsub("2013", "", names(au))
66 |
67 | names(au) <- gsub("Prop", "", names(au))
68 |
69 | au <- au[complete.cases(au), ]
70 |
71 | data_use <- au
72 |
73 | dim(data_use)
74 |
75 | data_use <- data_use[the_data$WGS84Longitude > 100, ]
76 |
77 | names(data_use) <- make.names(names(data_use))
78 |
79 | names(data_use)
80 |
81 | reg_data <- spearman2(MedianIncome ~ ., data = data_use)
82 | reg_data[order(-reg_data[ ,6])[1:15], ]
83 |
84 |
85 | reg_formula <- terms(MedianIncome ~
86 | s(FullTimeEmployed, k = 6) +
87 | s(InternetHH, k = 6) +
88 | s(NoQualification, k = 5) +
89 | s(UnemploymentBenefit, k = 5) +
90 | s(Smoker, k = 5) +
91 | s(Partnered, k = 5) +
92 | s(Managers, k = 4) +
93 | s(Bachelor, k = 4) +
94 | s(SelfEmployed, k = 4) +
95 | s(NoMotorVehicle, k = 4) +
96 | s(Unemployed, k = 3) +
97 | s(Labourers, k = 3) +
98 | s(Worked50_59hours, k = 3) +
99 | s(Separated, k = 3) +
100 | s(Maori, k = 3) +
101 | s(WGS84Longitude, WGS84Latitude) +
102 | .,
103 | data = data_use)
104 |
105 |
106 |
107 | gam_model <- gam(reg_formula, data = data_use)
108 |
109 |
110 | par(bty = "l", mar = c(5,4, 2, 1))
111 |
112 |
113 | par(mar = rep(2, 4))
114 |
115 | plot(gam_model, residuals = TRUE, pages = 1, shade = TRUE, seWithMean = TRUE, ylab = "")
116 |
117 | rmses_gam_boot <- boot(data = data_use, statistic = fit_gam, R = 99)
118 |
119 | rmses_gam_boot
120 |
121 |
122 | gam_rmse <- mean(rmses_gam_boot$t)
123 |
124 | gam_rmse
125 |
--------------------------------------------------------------------------------
/Chapter 07/2Vector Quantization - Image Clustering.r:
--------------------------------------------------------------------------------
1 | R version 3.3.2 (2016-10-31) -- "Sincere Pumpkin Patch"
2 | Copyright (C) 2016 The R Foundation for Statistical Computing
3 | Platform: x86_64-w64-mingw32/x64 (64-bit)
4 |
5 | R is free software and comes with ABSOLUTELY NO WARRANTY.
6 | You are welcome to redistribute it under certain conditions.
7 | Type 'license()' or 'licence()' for distribution details.
8 |
9 | R is a collaborative project with many contributors.
10 | Type 'contributors()' for more information and
11 | 'citation()' on how to cite R or R packages in publications.
12 |
13 | Type 'demo()' for some demos, 'help()' for on-line help, or
14 | 'help.start()' for an HTML browser interface to help.
15 | Type 'q()' to quit R
16 |
17 | library(jpeg)
18 | library(ggplot2)
19 |
20 | img <- readJPEG("d:/Image.jpg")
21 |
22 | img_Dim <- dim(img)
23 |
24 | img_Dim
25 | [1] 526 800 3
26 |
27 |
28 |
29 | img_RGB_channels <- data.frame(
30 | x = rep(1:img_Dim[2], each = img_Dim[1]),
31 | y = rep(img_Dim[1]:1, img_Dim[2]),
32 | R = as.vector(img[,,1]),
33 | G = as.vector(img[,,2]),
34 | B = as.vector(img[,,3])
35 | )
36 |
37 |
38 | plotTheme <- function() {
39 | theme(
40 | panel.background = element_rect(
41 | size = 3,
42 | colour = "black",
43 | fill = "white"),
44 | axis.ticks = element_line(
45 | size = 2),
46 | panel.grid.major = element_line(
47 | colour = "gray80",
48 | linetype = "dotted"),
49 | panel.grid.minor = element_line(
50 | colour = "gray90",
51 | linetype = "dashed"),
52 | axis.title.x = element_text(
53 | size = rel(1.2),
54 | face = "bold"),
55 | axis.title.y = element_text(
56 | size = rel(1.2),
57 | face = "bold"),
58 | plot.title = element_text(
59 | size = 20,
60 | face = "bold",
61 | vjust = 1.5)
62 | )
63 | }
64 |
65 |
66 |
67 | ggplot(data = img_RGB_channels, aes(x = x, y = y)) +
68 | geom_point(colour = rgb(img_RGB_channels[c("R", "G", "B")])) +
69 | labs(title = "Original Image: Colorful Bird") +
70 | xlab("x") +
71 | ylab("y") +
72 | plotTheme()
73 |
74 | kClusters <- 3
75 |
76 | kMeans_clst <- kmeans(img_RGB_channels[, c("R", "G", "B")], centers = kClusters)
77 |
78 | kColours <- rgb(kMeans_clst$centers[kMeans_clst$cluster,])
79 |
80 | ggplot(data = img_RGB_channels, aes(x = x, y = y)) +
81 | geom_point(colour = kColours) +
82 | labs(title = paste("k-Means Clustering of", kClusters, "Colours")) +
83 | xlab("x") +
84 | ylab("y") +
85 | plotTheme()
86 |
87 | kClusters <- 5
88 |
89 | kMeans_clst <- kmeans(img_RGB_channels[, c("R", "G", "B")], centers = kClusters)
90 |
91 | kColours <- rgb(kMeans_clst$centers[kMeans_clst$cluster,])
92 |
93 | ggplot(data = img_RGB_channels, aes(x = x, y = y)) +
94 | geom_point(colour = kColours) +
95 | labs(title = paste("k-Means Clustering of", kClusters, "Colours")) +
96 | xlab("x") +
97 | ylab("y") +
98 | plotTheme()
99 |
100 |
101 | ggplot(data = img_RGB_channels, aes(x = x, y = y)) +
102 | geom_point(colour = rgb(img_RGB_channels[c("R", "G", "B")])) +
103 | labs(title = "Original Image: Colorful Bird") +
104 | xlab("x") +
105 | ylab("y") +
106 | plotTheme()
107 |
108 | kClusters <- 3
109 |
110 | kMeans_clst <- kmeans(imgRGB[, c("R", "G", "B")], centers = kClusters)
111 |
112 | kColours <- rgb(kMeans_clst$centers[kMeans_clst$cluster,])
113 |
114 | ggplot(data = imgRGB, aes(x = x, y = y)) +
115 | geom_point(colour = kColours) +
116 | labs(title = paste("k-Means Clustering of", kClusters, "Colours")) +
117 | xlab("x") +
118 | ylab("y") +
119 | plotTheme()
120 |
121 | kClusters <- 5
122 |
123 | kMeans_clst <- kmeans(imgRGB[, c("R", "G", "B")], centers = kClusters)
124 |
125 | kColours <- rgb(kMeans_clst$centers[kMeans_clst$cluster,])
126 |
127 | ggplot(data = imgRGB, aes(x = x, y = y)) +
128 | geom_point(colour = kColours) +
129 | labs(title = paste("k-Means Clustering of", kClusters, "Colours")) +
130 | xlab("x") +
131 | ylab("y") +
132 | plotTheme()
133 |
134 |
--------------------------------------------------------------------------------
/Chapter 13/Case Study 2 - Pricing Reinsurance Contracts.r:
--------------------------------------------------------------------------------
1 | R version 3.3.2 (2016-10-31) -- "Sincere Pumpkin Patch"
2 | Copyright (C) 2016 The R Foundation for Statistical Computing
3 | Platform: x86_64-w64-mingw32/x64 (64-bit)
4 |
5 | R is free software and comes with ABSOLUTELY NO WARRANTY.
6 | You are welcome to redistribute it under certain conditions.
7 | Type 'license()' or 'licence()' for distribution details.
8 |
9 | R is a collaborative project with many contributors.
10 | Type 'contributors()' for more information and
11 | 'citation()' on how to cite R or R packages in publications.
12 |
13 | Type 'demo()' for some demos, 'help()' for on-line help, or
14 | 'help.start()' for an HTML browser interface to help.
15 | Type 'q()' to quit R.
16 |
17 | [Workspace loaded from ~/.RData]
18 |
19 | install.packages("gdata")
20 | install.packages("evir")
21 | library(gdata)
22 | library(evir)
23 |
24 | StormDamageData <- read.xls("d:/publicdatamay2007.xls", sheet = 1)
25 |
26 | head(StormDamageData)
27 |
28 | tail(StormDamageData)
29 |
30 | dim(StormDamageData)
31 |
32 | ChangeFormat <- function(x){
33 | x=as.character(x)
34 | for(i in 1:10){x=sub(",","",as.character(x))}
35 | return(as.numeric(x))}
36 |
37 | base <- StormDamageData[,1:4]
38 |
39 | base$Base.Economic.Damage <- Vectorize(ChangeFormat)(StormDamageData$Base.Economic.Damage)
40 |
41 | base$Normalized.PL05 <- Vectorize(ChangeFormat)(StormDamageData$Normalized.PL05)
42 |
43 | base$Normalized.CL05 <- Vectorize(ChangeFormat)(StormDamageData$Normalized.CL05)
44 |
45 | head(base)
46 |
47 | plot(base$Normalized.PL05/1e9,type="h",ylim=c(0,155), main = "207 Hurricanes, Normalized Costs: 1900 - 2005", xlab = "Index of Loss", ylab = "Normalized Costs", col = "red")
48 |
49 | TestBase <- table(base$Year)
50 |
51 | TestBase
52 |
53 | years <- as.numeric(names(TestBase))
54 |
55 | years
56 |
57 | frequency <- as.numeric(TestBase)
58 |
59 | frequency
60 |
61 | years0frequency <- (1900:2005)[which(!(1900:2005)%in%years)]
62 |
63 | years0frequency
64 |
65 | StormDamageData <- data.frame(years=c(years, years0frequency), frequency=c(frequency, rep(0,length(years0frequency))))
66 |
67 | head(StormDamageData)
68 |
69 | plot(years, frequency, type="h", main = "Frequency of Hurricanes: 1900 - 2005", xlab = "Time (Years)", ylab = "Annual Frequency", col = "red")
70 |
71 | mean(StormDamageData$frequency)
72 |
73 | LinearTrend <- glm(frequency~years, data = StormDamageData, family=poisson(link="identity"), start=lm(frequency~years, data = StormDamageData)$coefficients)
74 |
75 | LinearTrend
76 |
77 | ExpTrend <- glm(frequency~years, data=StormDamageData, family = poisson(link="log"))
78 |
79 | ExpTrend
80 |
81 | plot(years, frequency, type='h', ylim=c(0,6), main = "No. of Major Hurricanes Predicted for 2014", xlim=c(1900,2020))
82 |
83 | cpred1 <- predict(ExpTrend, newdata = data.frame(years=1890:2030), type="response")
84 |
85 | cpred1
86 |
87 | lines(1890:2030,cpred1,col="blue")
88 |
89 | cpred0 <- predict(LinearTrend, newdata=data.frame(years=1890:2030), type="response")
90 |
91 | cpred0
92 |
93 | lines(1890:2030, cpred0, col="red")
94 |
95 | abline(h = mean(StormDamageData$frequency), col="black")
96 |
97 | predictions <- cbind(constant = mean(StormDamageData$frequency), linear= cpred0[126], exponential=cpred1[126])
98 |
99 | predictions
100 |
101 | points(rep((1890:2030)[126],3), predictions, col=c("black","red","blue"), pch=19)
102 |
103 | hill(base$Normalized.PL05)
104 |
105 | threshold <- .5
106 |
107 | gpd.PL <- gpd(base$Normalized.PL05/1e9/20, threshold)$par.ests
108 |
109 | mean(base$Normalized.CL05/1e9/20 >.5)
110 |
111 | ExpectedValue <- function(yinf,ysup,xi,beta){
112 | as.numeric(integrate(function(x) (x-yinf) * dgpd(x,xi,mu=threshold,beta),
113 | lower=yinf,upper=ysup)$value +
114 | (1-pgpd(ysup,xi,mu=threshold,beta))*(ysup-yinf))
115 | }
116 |
117 | predictions[1]
118 |
119 | mean(base$Normalized.PL05/1e9/20.5)
120 |
121 | ExpectedValue(2,6,gpd.PL[1],gpd.PL[2])*1e3
122 |
123 | predictions[1] * mean(base$Normalized.PL05/1e9/20 >.5) * ExpectedValue(2, 6, gpd.PL[1], gpd.PL[2]) * 1e3
124 |
--------------------------------------------------------------------------------
/Chapter 02/1Discriminant Function Analysis script.r:
--------------------------------------------------------------------------------
1 |
2 | R version 3.2.3 (2015-12-10) -- "Wooden Christmas-Tree"
3 | Copyright (C) 2015 The R Foundation for Statistical Computing
4 | Platform: x86_64-w64-mingw32/x64 (64-bit)
5 |
6 | R is free software and comes with ABSOLUTELY NO WARRANTY.
7 | You are welcome to redistribute it under certain conditions.
8 | Type 'license()' or 'licence()' for distribution details.
9 |
10 | R is a collaborative project with many contributors.
11 | library(MASS)
12 |
13 | brine <- read.table("d:/brine.txt", header=TRUE, sep=",", row.names=1)
14 | head(brine)
15 |
16 | HCO3 SO4 Cl Ca Mg Na GROUP
17 | 1 10.4 30.0 967.1 95.9 53.7 857.7 1
18 | 2 6.2 29.6 1174.9 111.7 43.9 1054.7 1
19 | 3 2.1 11.4 2387.1 348.3 119.3 1932.4 1
20 | 4 8.5 22.5 2186.1 339.6 73.6 1803.4 1
21 | 5 6.7 32.8 2015.5 287.6 75.1 1691.8 1
22 | 6 3.8 18.9 2175.8 340.4 63.8 1793.9 1
23 |
24 | pairs(brine[ ,1:6])
25 |
26 | brine.log <- brine
27 | brine.log[ ,1:6] <- log(brine[ ,1:6]+1)
28 | pairs(brine.log[ ,1:6])
29 |
30 |
31 | brine.log.lda <- lda(GROUP ~ HCO3 + SO4 + Cl + Ca + Mg + Na, data=brine.log)
32 |
33 | brine.log.lda
34 |
35 | Call:
36 | lda(GROUP ~ HCO3 + SO4 + Cl + Ca + Mg + Na, data = brine.log)
37 |
38 | Prior probabilities of groups:
39 | 1 2 3
40 | 0.3684211 0.3157895 0.3157895
41 |
42 | Group means:
43 | HCO3 SO4 Cl Ca Mg Na
44 | 1 1.759502 3.129009 7.496891 5.500942 4.283490 7.320686
45 | 2 2.736481 3.815399 6.829565 4.302573 4.007725 6.765017
46 | 3 1.374438 2.378965 6.510211 4.641049 3.923851 6.289692
47 |
48 | Coefficients of linear discriminants:
49 | LD1 LD2
50 | HCO3 -1.67799521 0.64415802
51 | SO4 0.07983656 0.02903096
52 | Cl 22.27520614 -0.31427770
53 | Ca -1.26859368 2.54458682
54 | Mg -1.88732009 -2.89413332
55 | Na -20.86566883 1.29368129
56 |
57 | Proportion of trace:
58 | LD1 LD2
59 | 0.7435 0.2565
60 |
61 | brine.log.hat <- predict(brine.log.lda)
62 |
63 | brine.log.hat
64 |
65 | $class
66 | [1] 2 1 1 1 1 1 1 2 2 2 2 2 2 3 3 3 3 3 3
67 | Levels: 1 2 3
68 |
69 | $posterior
70 | 1 2 3
71 | 1 2.312733e-01 7.627845e-01 5.942270e-03
72 | 2 9.488842e-01 3.257237e-02 1.854347e-02
73 | 3 8.453057e-01 9.482540e-04 1.537461e-01
74 | 4 9.990242e-01 8.794725e-04 9.632578e-05
75 | 5 9.965920e-01 2.849903e-03 5.581176e-04
76 | 6 9.984987e-01 1.845534e-05 1.482872e-03
77 | 7 8.676660e-01 7.666611e-06 1.323263e-01
78 | 8 4.938019e-03 9.949035e-01 1.584755e-04
79 | 9 4.356152e-03 9.956351e-01 8.770078e-06
80 | 10 2.545287e-05 9.999439e-01 3.066264e-05
81 | 11 2.081510e-02 9.791728e-01 1.210748e-05
82 | 12 1.097540e-03 9.989023e-01 1.455693e-07
83 | 13 1.440307e-02 9.854613e-01 1.356671e-04
84 | 14 4.359641e-01 2.367602e-03 5.616683e-01
85 | 15 6.169265e-02 1.540353e-04 9.381533e-01
86 | 16 7.500357e-04 4.706701e-09 9.992500e-01
87 | 17 1.430433e-03 1.095281e-06 9.985685e-01
88 | 18 2.549733e-04 3.225658e-07 9.997447e-01
89 | 19 6.433759e-02 8.576694e-03 9.270857e-01
90 |
91 | $x
92 | LD1 LD2
93 | 1 -1.1576284 -0.1998499
94 | 2 -0.1846803 0.6655823
95 | 3 1.0179998 0.6827867
96 | 4 -0.3939366 2.6798084
97 | 5 -0.3167164 2.0188002
98 | 6 1.0061340 2.6434491
99 | 7 2.0725443 1.5714400
100 | 8 -2.0387449 -0.9731745
101 | 9 -2.6054261 -0.2774844
102 | 10 -2.5191350 -2.8304663
103 | 11 -2.4915044 0.3194247
104 | 12 -3.4448401 0.1869864
105 | 13 -2.0343204 -0.4674925
106 | 14 1.0441237 -0.0991014
107 | 15 1.6987023 -0.6036252
108 | 16 3.9138884 -0.7211078
109 | 17 2.7083649 -1.3896956
110 | 18 2.9310268 -1.9243611
111 | 19 0.7941483 -1.2819190
112 |
113 |
114 | apply(brine.log.hat$posterior, MARGIN=1, FUN=max)
115 | 1 2 3 4 5 6 7 8
116 | 0.7627845 0.9488842 0.8453057 0.9990242 0.9965920 0.9984987 0.8676660 0.9949035
117 | 9 10 11 12 13 14 15 16
118 | 0.9956351 0.9999439 0.9791728 0.9989023 0.9854613 0.5616683 0.9381533 0.9992500
119 | 17 18 19
120 | 0.9985685 0.9997447 0.9270857
121 |
122 | plot(brine.log.lda)
123 | plot(brine.log.lda, dimen=1, type="both")
124 | tab <- table(brine.log$GROUP, brine.log.hat$class)
125 | tab
126 |
127 | 1 2 3
128 | 1 6 1 0
129 | 2 0 6 0
130 | 3 0 0 6
131 | sum(tab[row(tab) == col(tab)]) / sum(tab)
132 | [1] 0.9473684
133 |
134 | brine.log.lda <- lda(GROUP ~ HCO3 + SO4 + Cl + Ca + Mg + Na, data=brine.log, CV=TRUE)
135 | tab <- table(brine.log$GROUP, brine.log.lda$class)
136 |
137 | tab
138 |
139 | 1 2 3
140 | 1 6 1 0
141 | 2 1 4 1
142 | 3 1 0 5
143 | sum(tab[row(tab) == col(tab)]) / sum(tab)
144 | [1] 0.7894737
--------------------------------------------------------------------------------
/Chapter 08/5Monte Carlo Simulations - Calibrated Hull and White short-rates.r:
--------------------------------------------------------------------------------
1 | R version 3.2.2 (2015-08-14) -- "Fire Safety"
2 | Copyright (C) 2015 The R Foundation for Statistical Computing
3 | Platform: x86_64-w64-mingw32/x64 (64-bit)
4 |
5 | R is free software and comes with ABSOLUTELY NO WARRANTY.
6 | You are welcome to redistribute it under certain conditions.
7 | Type 'license()' or 'licence()' for distribution details.
8 |
9 | R is a collaborative project with many contributors.
10 | Type 'contributors()' for more information and
11 | 'citation()' on how to cite R or R packages in publications.
12 |
13 | Type 'demo()' for some demos, 'help()' for on-line help, or
14 | 'help.start()' for an HTML browser interface to help.
15 | Type 'q()' to quit R.
16 |
17 | [Workspace loaded from ~/.RData]
18 | install.packages("RQuantLib", type="binary")
19 | install.packages("ESGtoolkit")
20 | library(RQuantLib)
21 | library(ESGtoolkit)
22 |
23 |
24 | freq <- "monthly"
25 |
26 | delta_t <- 1/12
27 |
28 | delta_t
29 |
30 | params <- list(tradeDate=as.Date('2002-2-15'),
31 | settleDate=as.Date('2002-2-19'),
32 | payFixed=TRUE,
33 | dt=delta_t,
34 | strike=.06,
35 | method="HWAnalytic",
36 | interpWhat="zero",
37 | interpHow= "spline")
38 |
39 |
40 | TermQuotes <- list(d1w =0.0382,
41 | d1m =0.0372,
42 | d3m = 0.0363,
43 | d6m = 0.0353,
44 | d9m = 0.0348,
45 | d1y = 0.0345,
46 | s2y = 0.037125,
47 | s3y =0.0398,
48 | s5y =0.0443,
49 | s10y =0.05165,
50 | s15y =0.055175)
51 |
52 |
53 | SwaptionMaturities <- c(1,2,3,4,5)
54 |
55 | SwaptionMaturities
56 |
57 | SwapTenors <- c(1,2,3,4,5)
58 |
59 | SwapTenors
60 |
61 | VolatilityMatrix <- matrix(
62 | c(0.1490, 0.1340, 0.1228, 0.1189, 0.1148,
63 | 0.1290, 0.1201, 0.1146, 0.1108, 0.1040,
64 | 0.1149, 0.1112, 0.1070, 0.1010, 0.0957,
65 | 0.1047, 0.1021, 0.0980, 0.0951, 0.1270,
66 | 0.1000, 0.0950, 0.0900, 0.1230, 0.1160),
67 | ncol=5, byrow=TRUE)
68 |
69 |
70 | BermudanSwaption <- RQuantLib::BermudanSwaption(params, TermQuotes, SwaptionMaturities, SwapTenors, VolatilityMatrix)
71 |
72 | summary(BermudanSwaption)
73 |
74 |
75 | BermudanSwaption
76 |
77 |
78 | times <- seq(from = delta_t, to = 5, by = delta_t)
79 |
80 |
81 | DiscountCurve <- RQuantLib::DiscountCurve(params, TermQuotes, times)
82 |
83 |
84 | str(DiscountCurve)
85 |
86 | maturities <- DiscountCurve$times
87 |
88 | maturities
89 |
90 |
91 | MarketZeroRates <- DiscountCurve$zerorates
92 |
93 | MarketZeroRates
94 |
95 | MarketPrices <- DiscountCurve$discounts
96 |
97 | MarketPrices
98 |
99 | horizon <- 5
100 |
101 | NoSimulations <- 10000
102 |
103 | a <- BermudanSwaption$a
104 | a
105 |
106 | sigma <- BermudanSwaption$sigma
107 |
108 | sigma
109 |
110 |
111 | GaussianShocks <- ESGtoolkit::simshocks(n = NoSimulations, horizon = horizon, frequency = freq)
112 |
113 | x <- ESGtoolkit::simdiff(n = NoSimulations, horizon = horizon, frequency = freq, model = "OU", x0 = 0, theta1 = 0, theta2 = a, theta3 = sigma, eps = GaussianShocks)
114 |
115 |
116 | ForwardRates <- ts(replicate(nb.sims, DiscountCurve$forwards), start = start(x), deltat = deltat(x))
117 |
118 | t.out <- seq(from = 0, to = horizon, by = delta_t)
119 |
120 | param.alpha <- ts(replicate(NoSimulations, 0.5*(sigma^2)*(1 - exp(-a*t.out))^2/(a^2)), start = start(x), deltat = deltat(x))
121 |
122 |
123 | alpha <- ForwardRates + param.alpha
124 |
125 |
126 | ShortRates <- x + alpha
127 |
128 | StochasticDiscount <- ESGtoolkit::esgdiscountfactor(r = ShortRates, X = 1)
129 |
130 | MonteCarloPrices <- rowMeans(StochasticDiscount)
131 |
132 | MonteCarloPrices
133 |
134 |
135 | MonteCarloZeroRates <- -log(MonteCarloPrices)/maturities
136 |
137 | MonteCarloZeroRates
138 |
139 | ConfidenceInterval <- t(apply((StochasticDiscount - MarketPrices)[-1, ], 1, function(x) t.test(x)$conf.int))
140 |
141 |
142 | head(ConfidenceInterval)
143 |
144 |
145 | par(mfrow = c(2, 2))
146 |
147 |
148 |
149 | ESGtoolkit::esgplotbands(ShortRates, xlab = "maturities", ylab = "short-rate quantiles", main = "Short Rate Quantiles")
150 |
151 |
152 | plot(maturities, MonteCarloZeroRates, type='l', col = 'blue', lwd = 1, main = "Monte Carlo v/s Market n Zero Rates")
153 |
154 | points(maturities, MonteCarloZeroRates, col = 'red')
155 |
156 |
157 | plot(maturities, MonteCarloPrices, type='l', col = 'blue', lwd = 1, main = "Monte Carlo v/s Market n Zero Rates")
158 |
159 |
160 |
161 | points(maturities, MonteCarloPrices, col = 'red')
162 |
163 |
164 | matplot(maturities[-1], conf.int, type = 'l', main = "Confidence Interval for the price difference")
165 |
166 |
167 |
168 |
169 |
--------------------------------------------------------------------------------
/Chapter 03/Data/NASAUnderstory.csv:
--------------------------------------------------------------------------------
1 | PlotID,Overstory Species,SPHA,BLIT,ASMA,MOSS,LEGR,CHCA,GRAS,SEDG,SMTR,PTAQ,COCA,VAAN,GAHI,ARNU,LYOB,PIMA,RUBU,VAOX,ACSP,COCO,ACRU,TRBO,MACA,CLBO,STRO,FUNG,DILO,ERIO,GATR,Labels
2 | 2,Aspen,68,14,0,3,33,5,5,0,14,0,0,4,28,0,0,1,0,6,0,0,0,0,0,0,0,1,0,0,0,Sphagnum Moss
3 | 3,Spruce,0,6,18,2,0,0,5,0,0,2,0,0,0,7,0,0,6,0,2,1,2,2,2,1,1,1,3,0,4,Brown Litter
4 | 12,Aspen,60,1,0,0,5,9,12,1,14,0,0,0,2,0,0,3,0,4,0,0,0,0,0,0,0,4,0,3,0,Big-leaved Aster
5 | 14,Aspen,16,14,0,72,14,1,2,0,13,0,0,5,7,0,0,2,0,1,0,0,0,0,0,0,0,0,0,0,0,Mosses (Non-Sphagnum)
6 | 15,Aspen,68,7,0,30,27,8,4,0,12,0,0,5,13,0,0,3,0,3,0,0,0,0,0,0,0,4,0,0,0,Labrador Tea
7 | 16,Spruce,0,14,14,3,0,0,2,0,0,1,0,0,0,2,1,0,1,0,17,6,10,1,0,3,4,1,1,0,2,Leatherleaf
8 | 18,Aspen,62,7,0,3,6,12,4,0,0,0,0,0,0,0,0,13,0,5,0,0,0,0,0,0,0,1,0,11,0,Grasses (Unidentified)
9 | 19,Aspen,62,5,0,0,6,28,1,0,0,0,0,0,0,0,0,15,0,5,0,0,0,0,0,0,0,0,0,10,0,Sedges (Unidentified)
10 | 20,Spruce,0,5,8,14,0,0,4,6,0,14,10,3,0,0,6,0,0,0,1,8,1,1,0,3,0,0,0,0,0,Bog False Solomon Seal
11 | 21,Spruce,0,16,26,5,0,0,1,0,0,18,6,10,0,10,11,0,3,0,1,5,5,4,4,2,0,0,1,0,0,Bracken Fern
12 | 36,Spruce,0,25,38,3,0,0,5,3,0,6,7,0,0,0,0,0,1,0,0,3,6,4,4,6,3,0,1,0,3,Bunchberry
13 | 38,Aspen,82,10,0,10,24,12,8,0,6,0,0,4,2,0,0,2,0,1,0,0,0,0,0,2,0,1,0,0,0,Lowbush Blueberry
14 | 39,Aspen,60,3,0,34,24,0,5,0,9,0,1,5,4,0,1,6,0,4,0,0,0,0,0,0,0,0,0,0,0,Creeping Snowberry
15 | 41,Aspen,72,17,0,4,6,0,3,0,5,0,0,5,6,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,Wild Sarsaparilla
16 | 42,Aspen,34,14,0,36,15,1,0,5,6,0,4,9,8,0,1,4,0,2,0,0,0,1,0,0,0,0,0,0,0,Ground Pine
17 | 43,Aspen,32,13,0,34,28,3,0,0,6,0,0,5,5,0,0,2,0,1,0,0,0,0,0,0,0,0,0,0,0,Spruce (Black)
18 | 45,Aspen,55,25,0,8,7,4,0,10,4,0,0,5,3,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,Brier
19 | 47,Aspen,64,8,0,5,9,8,1,32,20,0,0,5,4,0,0,2,0,5,0,0,0,0,0,0,0,3,0,0,0,Small Cranberry
20 | 48,Aspen,64,12,0,13,6,0,10,2,10,0,0,0,1,0,7,1,0,0,0,0,0,0,0,0,0,3,0,0,0,Maple (Mountain)
21 | 49,Aspen,48,5,0,6,1,0,5,14,5,0,1,2,2,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,Hazelnut (Beaked)
22 | 50,Aspen,38,18,0,38,11,0,2,6,4,0,2,0,3,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,Maple (Red)
23 | 51,Aspen,74,3,0,2,34,17,1,0,2,0,0,0,0,0,0,6,0,3,0,0,0,0,0,0,0,0,0,8,0,Starflower
24 | 52,Aspen,86,3,0,5,36,9,17,1,0,0,0,0,4,0,0,1,0,2,0,0,0,0,0,0,0,1,0,0,0,Canadian Mayflower
25 | 54,Aspen,62,16,0,0,22,10,5,0,7,1,0,4,3,0,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0,Blue-bead Lily
26 | 55,Aspen,38,30,0,18,4,2,0,2,6,0,0,5,6,0,0,4,0,2,0,0,0,0,0,0,0,0,0,0,0,Twisted Stalk
27 | 56,Aspen,68,13,0,6,34,5,0,5,2,0,0,2,3,0,0,6,0,4,0,0,0,0,0,0,0,4,0,0,0,Fungi
28 | 57,Aspen,72,5,0,4,17,20,0,14,0,0,0,0,4,0,0,3,0,4,0,0,0,0,0,0,0,3,0,0,0,Bush Honeysuckle
29 | 62,Aspen,66,1,0,12,7,20,17,6,0,0,0,0,0,0,0,4,0,4,0,0,0,0,0,0,0,0,0,0,0,Cotton Grass
30 | 63,Aspen,56,8,0,1,12,12,6,0,0,0,0,0,0,0,0,8,0,3,0,0,0,0,0,0,0,0,0,24,0,Bedstraw (Narrow Leaves)
31 | 64,Aspen,56,0,0,6,6,24,0,34,0,0,0,0,0,0,0,7,0,4,0,0,0,0,0,0,0,0,0,0,0,
32 | 68,Aspen,62,8,0,9,4,5,1,10,1,0,0,0,5,0,0,1,0,5,0,0,0,0,0,0,0,2,0,0,0,
33 | 69,Spruce,1,15,11,5,0,0,2,0,0,2,17,7,1,1,3,0,0,0,0,2,1,1,3,4,1,0,0,0,0,
34 | 71,Spruce,1,20,16,5,0,0,2,2,0,4,5,3,0,4,2,0,3,0,0,1,2,0,2,5,4,0,2,0,1,
35 | 72,Spruce,0,32,14,4,0,0,1,0,0,0,4,0,0,5,5,0,4,0,3,2,2,5,4,3,5,0,3,0,2,
36 | 73,Spruce,0,34,21,5,0,0,4,0,0,1,3,1,0,6,4,0,2,0,4,2,4,4,4,2,4,0,1,0,4,
37 | 74,Spruce,0,24,4,2,0,0,5,0,0,3,4,1,0,7,3,0,4,0,4,5,2,4,3,2,4,0,2,0,1,
38 | 75,Spruce,0,20,14,6,0,0,6,1,0,0,4,1,0,4,0,0,5,0,7,2,5,5,5,2,4,0,2,0,2,
39 | 77,Spruce,0,46,2,7,0,0,2,1,0,0,4,0,0,4,2,0,3,0,4,0,2,4,3,5,3,0,0,0,2,
40 | 79,Spruce,0,14,14,7,0,0,3,1,0,7,8,2,0,14,2,0,7,0,9,1,3,5,3,3,5,0,8,0,4,
41 | 80,Spruce,0,6,18,2,0,0,5,0,0,2,0,0,0,7,0,0,6,0,2,1,2,2,2,1,1,1,3,0,4,
42 | 81,Spruce,0,32,31,5,0,0,4,0,0,0,0,0,0,6,0,0,0,0,8,0,3,0,4,0,2,0,2,0,3,
43 | 82,Spruce,0,18,48,6,0,0,5,0,0,1,2,0,0,8,0,0,4,0,12,0,1,2,3,1,3,0,3,0,4,
44 | 83,Spruce,0,24,22,6,0,0,5,1,0,2,0,0,0,6,0,0,5,0,3,2,4,1,3,0,4,0,3,0,4,
45 | 84,Spruce,0,22,23,4,0,0,1,1,0,20,1,3,0,8,3,0,0,0,1,1,5,3,2,0,1,1,0,0,0,
46 | 85,Spruce,0,24,30,11,0,0,4,0,0,7,2,5,0,0,4,0,0,0,2,0,4,4,0,1,0,1,1,0,0,
47 | 86,Spruce,2,22,10,4,0,0,5,0,0,4,6,5,0,0,4,0,4,0,0,2,0,0,0,0,0,1,1,0,0,
48 | 87,Spruce,0,30,7,5,0,0,3,0,0,0,0,0,0,0,3,0,1,0,0,3,0,2,0,2,0,3,1,0,0,
49 | 88,Spruce,0,42,12,4,0,0,2,1,0,0,4,2,0,4,6,0,2,0,1,3,2,3,2,4,1,3,1,0,0,
50 | 89,Spruce,0,24,3,17,0,0,1,0,0,0,8,3,0,0,6,0,1,0,1,1,1,0,0,4,0,1,2,0,0,
51 | 90,Spruce,0,24,13,6,0,0,5,1,0,0,4,2,0,0,6,0,1,0,0,3,5,2,0,2,0,2,0,0,0,
52 | 92,Spruce,0,32,10,3,0,0,5,0,0,1,4,0,0,5,8,0,1,0,2,7,1,4,4,6,5,0,1,0,0,
53 | 93,Spruce,0,26,12,1,0,0,4,0,0,3,3,2,0,3,6,0,3,0,4,6,2,4,4,4,5,1,1,0,4,
54 | 94,Spruce,0,10,36,0,1,0,4,0,0,4,4,0,0,6,5,0,3,0,0,5,1,0,0,1,1,0,1,0,1,
55 | 95,Spruce,0,16,26,2,1,0,7,0,0,3,3,6,0,2,0,0,8,0,0,9,0,1,1,0,0,0,1,0,2,
56 | 96,Spruce,0,31,17,3,0,0,4,1,0,6,8,2,0,2,3,0,4,0,1,1,6,5,6,1,1,0,2,0,2,
57 | 97,Spruce,0,11,36,2,0,0,3,2,0,18,4,1,0,2,3,0,3,0,0,1,3,2,5,2,2,0,8,0,1,
58 | 98,Spruce,0,14,38,3,0,0,3,0,0,7,0,2,0,0,6,0,2,0,0,3,0,2,3,0,1,0,1,0,1,
59 | 99,Spruce,0,42,20,4,0,0,5,1,0,9,4,5,0,0,1,0,5,0,0,3,0,0,0,0,1,0,1,0,0,
60 | 100,Aspen,60,17,0,10,0,0,0,20,2,0,0,1,2,0,0,0,0,1,0,0,0,0,0,0,0,5,0,0,0,
61 | 101,Spruce,36,35,0,10,0,0,0,1,0,0,0,0,1,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,
62 | 102,Aspen,50,8,0,10,8,4,6,6,2,0,0,0,4,0,0,3,0,4,0,0,0,0,0,0,0,3,0,0,0,
63 | 103,Aspen,60,2,0,9,10,15,0,26,1,0,0,0,1,0,0,8,0,5,0,0,0,0,0,0,0,3,0,0,0,
64 | 105,Aspen,70,9,0,16,6,0,2,11,3,0,0,0,3,0,0,3,0,5,0,0,0,0,0,0,0,5,0,0,0,
65 |
--------------------------------------------------------------------------------
/Chapter 05/3Loess - United States Geological Survey.r:
--------------------------------------------------------------------------------
1 | R version 3.3.2 (2016-10-31) -- "Sincere Pumpkin Patch"
2 | Copyright (C) 2016 The R Foundation for Statistical Computing
3 | Platform: x86_64-w64-mingw32/x64 (64-bit)
4 |
5 | R is free software and comes with ABSOLUTELY NO WARRANTY.
6 | You are welcome to redistribute it under certain conditions.
7 | Type 'license()' or 'licence()' for distribution details.
8 |
9 | R is a collaborative project with many contributors.
10 | Type 'contributors()' for more information and
11 | 'citation()' on how to cite R or R packages in publications.
12 |
13 | Type 'demo()' for some demos, 'help()' for on-line help, or
14 | 'help.start()' for an HTML browser interface to help.
15 | Type 'q()' to quit R.
16 |
17 | library(dataRetrieval)
18 | library(dplyr)
19 |
20 | siteNumber<-c("01538000")
21 |
22 | parameterCd <- "00060"
23 |
24 |
25 | Q_daily <- readNWISdv(siteNumber, parameterCd)
26 |
27 |
28 | tail(Q_daily)
29 |
30 |
31 | str(Q_daily)
32 |
33 |
34 | Q_daily <- renameNWISColumns(Q_daily)
35 |
36 | tail(Q_daily)
37 |
38 | stationInfo <- readNWISsite(siteNumber)
39 |
40 | if(as.numeric(diff(range(Q_daily$Date))) != (nrow(Q_daily)+1)){
41 | fullDates <- seq(from=min(Q_daily$Date),
42 | to = max(Q_daily$Date), by="1 day")
43 | fullDates <- data.frame(Date = fullDates,
44 | agency_cd = Q_daily$agency_cd[1],
45 | site_no = Q_daily$site_no[1],
46 | stringsAsFactors = FALSE)
47 | Q_daily <- full_join(Q_daily, fullDates,
48 | by=c("Date","agency_cd","site_no")) %>%
49 | arrange(Date)
50 | }
51 |
52 | moving_avg <- function(x,n=30){stats::filter(x,rep(1/n,n), sides=1)}
53 |
54 |
55 | Q_daily <- Q_daily %>% mutate(rollMean = as.numeric(moving_avg(Flow)), day.of.year = as.numeric(strftime(Date, format = "%j")))
56 |
57 | tail(Q_daily)
58 |
59 |
60 | Q_summary <- Q_daily %>%
61 | group_by(day.of.year) %>%
62 | summarize(p75 = quantile(rollMean, probs = .75, na.rm = TRUE),
63 | p25 = quantile(rollMean, probs = .25, na.rm = TRUE),
64 | p10 = quantile(rollMean, probs = 0.1, na.rm = TRUE),
65 | p05 = quantile(rollMean, probs = 0.05, na.rm = TRUE),
66 | p00 = quantile(rollMean, probs = 0, na.rm = TRUE))
67 |
68 | current_year <- as.numeric(strftime(Sys.Date(), format = "%Y"))
69 |
70 |
71 | summary.0 <- Q_summary %>% mutate(Date = as.Date(day.of.year - 1, origin = paste0(current_year-2,"-01-01")), day.of.year = day.of.year - 365)
72 |
73 |
74 | summary.1 <- Q_summary %>% mutate(Date = as.Date(day.of.year - 1, origin = paste0(current_year-1,"-01-01")))
75 |
76 |
77 | summary.2 <- Q_summary %>% mutate(Date = as.Date(day.of.year - 1, origin = paste0(current_year,"-01-01")), day.of.year = day.of.year 365)
78 |
79 |
80 | Q_summary <- bind_rows(summary.0, summary.1, summary.2)
81 |
82 | Q_summary
83 |
84 | smooth.span <- 0.3
85 |
86 |
87 | Q_summary$sm.75 <- predict(loess(p75~day.of.year, data = Q_summary, span = smooth.span))
88 |
89 |
90 | head(Q_summary$sm.75)
91 |
92 | Q_summary$sm.25 <- predict(loess(p25~day.of.year, data = Q_summary, span = smooth.span))
93 |
94 | head(summaryQ$sm.25)
95 |
96 | Q_summary$sm.10 <- predict(loess(p10~day.of.year, data = Q_summary, span = smooth.span))
97 |
98 | head(summaryQ$sm.10)
99 |
100 | Q_summary$sm.05 <- predict(loess(p05~day.of.year, data = Q_summary, span = smooth.span))
101 |
102 | head(summaryQ$sm.05)
103 |
104 | Q_summary$sm.00 <- predict(loess(p00~day.of.year, data = Q_summary, span = smooth.span))
105 |
106 | head(summaryQ$sm.00)
107 |
108 | Q_summary <- select(Q_summary, Date, day.of.year, sm.75, sm.25, sm.10, sm.05, sm.00) %>% filter(Date = as.Date(paste0(current_year-1,"-01-01")))
109 |
110 | Q_summary
111 |
112 | latest.years <- Q_daily %>% filter(Date = as.Date(paste0(current_year-1,"-01-01"))) %>% mutate(day.of.year = 1:nrow(.))
113 |
114 | title.text <- paste0(stationInfo$station_nm,"\n", "Provisional Data - Subject to change\n", "Record Start = ", min(Q_daily$Date), " Number of years = ", as.integer(as.numeric(difftime(time1 = max(Q_daily$Date), time2 = min(Q_daily$Date), units = "weeks"))/52.25), "\nDate of plot = ",Sys.Date(), " Drainage Area = ",stationInfo$drain_area_va, "mi^2")
115 |
116 |
117 | mid.month.days <- c(15, 45, 74, 105, 135, 166, 196, 227, 258, 288, 319, 349)
118 |
119 | month.letters <- c("J","F","M","A","M","J","J","A","S","O","N","D")
120 |
121 | start.month.days <- c(1, 32, 61, 92, 121, 152, 182, 214, 245, 274, 305, 335)
122 |
123 | label.text <- c("Normal","Drought Watch","Drought Warning","Drought Emergency")
124 |
125 |
126 | year1_summary <- data.frame(Q_summary[2:366,])
127 |
128 | head(year1_summary)
129 |
130 | year2_summary <- data.frame(Q_summary[367:733,])
131 |
132 | head(year2_summary)
133 |
134 | simple.plot <- ggplot(data = Q_summary, aes(x = day.of.year))
135 | geom_ribbon(aes(ymin = sm.25, ymax = sm.75, fill = "Normal"))
136 | geom_ribbon(aes(ymin = sm.10, ymax = sm.25, fill = "Drought Watch"))
137 | geom_ribbon(aes(ymin = sm.05, ymax = sm.10, fill = "Drought Warning"))
138 | geom_ribbon(aes(ymin = sm.00, ymax = sm.05, fill = "Drought Emergency"))
139 | scale_y_log10(limits = c(1,1000))
140 | geom_line(data = latest.years, aes(x=day.of.year, y=rollMean, color = "30-Day Mean"),size=2)
141 | geom_vline(xintercept = 365)
142 |
143 | simple.plot
144 |
145 |
146 |
--------------------------------------------------------------------------------
/Chapter 06/7Stochastic Gradient Descent - Adult Income.r:
--------------------------------------------------------------------------------
1 | R version 3.3.0 (2016-05-03) -- "Supposedly Educational"
2 | Copyright (C) 2016 The R Foundation for Statistical Computing
3 | Platform: x86_64-w64-mingw32/x64 (64-bit)
4 |
5 | R is free software and comes with ABSOLUTELY NO WARRANTY.
6 | You are welcome to redistribute it under certain conditions.
7 | Type 'license()' or 'licence()' for distribution details.
8 |
9 | R is a collaborative project with many contributors.
10 | Type 'contributors()' for more information and
11 | 'citation()' on how to cite R or R packages in publications.
12 |
13 | Type 'demo()' for some demos, 'help()' for on-line help, or
14 | 'help.start()' for an HTML browser interface to help.
15 | Type 'q()' to quit R.
16 |
17 | library("klaR")
18 | library("caret")
19 | library ("stringr")
20 |
21 | allData <- read.table('d:/adult.txt', header = FALSE)
22 |
23 | str(allData)
24 |
25 | labels <- as.factor(allData[,15])
26 |
27 | allFeatures <- allData[,-c(15)]
28 |
29 | head(allFeatures)
30 |
31 | continuousFeatures <- scale(continuousFeatures)
32 |
33 |
34 | head(continuousFeatures)
35 |
36 | labels.n = rep(0,length(labels))
37 | labels.n[labels==" <=50K"] = -1
38 | labels.n[labels==" 50K"] = 1
39 | labels = labels.n
40 | rm(labels.n)
41 |
42 |
43 | trainingData <- createDataPartition(y=labels, p=.8, list=FALSE)
44 |
45 | dim(trainingData)
46 |
47 | remainingLabels <- labels[-trainingData]
48 | remainingFeatures <- continuousFeatures[-trainingData,]
49 |
50 | testingData <- createDataPartition(y=remainingLabels, p=.5, list=FALSE)
51 | testingLabels <- remainingLabels[testingData]
52 | testingFeatures <- remainingFeatures[testingData,]
53 |
54 | validationLabels <- remainingLabels[-testingData]
55 | validationFeatures <- remainingFeatures[-testingData,]
56 |
57 |
58 | getAccuracy <- function(a,b,features,labels){
59 | estFxn = features %*% a + b
60 | predictedLabels = rep(0,length(labels))
61 | predictedLabels [estFxn < 0] = -1
62 | predictedLabels [estFxn = 0] = 1
63 |
64 | return(sum(predictedLabels == labels) / length(labels))
65 | }
66 |
67 | numEpochs = 100
68 | numStepsPerEpoch = 500
69 | nStepsPerPlot = 30
70 | evalidationSetSize = 50
71 | c1 = 0.01
72 | c2 = 50
73 |
74 | lambda_vals = c(0.001, 0.01, 0.1, 1)
75 | bestAccuracy = 0
76 |
77 |
78 | str(lambda_vals)
79 |
80 | accMat <- matrix(NA, nrow = (numStepsPerEpoch/nStepsPerPlot)*numEpochs+1, ncol = length(lambda_vals))
81 | accMatv <- matrix(NA, nrow = (numStepsPerEpoch/nStepsPerPlot)*numEpochs+1, ncol = length(lambda_vals))
82 |
83 |
84 | for(i in 1:4){
85 | lambda = lambda_vals[i]
86 | accMatRow = 1
87 | accMatCol = i
88 |
89 | a = rep(0,ncol(continuousFeatures))
90 | b = 0
91 |
92 | stepIndex = 0
93 |
94 | for (e in 1:numEpochs){
95 |
96 | etrainingData <- createDataPartition(y=trainingLabels, p=(1 - evalidationSetSize/length(trainingLabels)), list=FALSE)
97 |
98 | etrainingFeatures <- trainingFeatures[etrainingData,]
99 | etrainingLabels <- trainingLabels[etrainingData]
100 |
101 | evalidationFeatures <- trainingFeatures[-etrainingData,]
102 | evalidationLabels <- trainingLabels[-etrainingData]
103 |
104 | steplength = 1 / (e*c1 + c2)
105 |
106 | for (step in 1:numStepsPerEpoch){
107 | stepIndex = stepIndex+1
108 | index = sample.int(nrow(etrainingFeatures),1)
109 | xk = etrainingFeatures[index,]
110 | yk = etrainingLabels[index]
111 |
112 | costfxn = yk * (a %*% xk + b)
113 |
114 | if(costfxn >= 1){
115 |
116 | a_dir = lambda * a
117 | a = a - steplength * a_dir
118 |
119 | } else {
120 |
121 | a_dir = (lambda * a) - (yk * xk)
122 | a = a - steplength * a_dir
123 | b_dir = -yk
124 | b = b - (steplength * b_dir)
125 |
126 | }
127 |
128 |
129 | if (stepIndex %>% nStepsPerPlot == 1){#30){
130 | accMat[accMatRow,accMatCol] = getAccuracy(a,b,evalidationFeatures,evalidationLabels)
131 | accMatv[accMatRow,accMatCol] = getAccuracy(a,b,validationFeatures,validationLabels)
132 | accMatRow = accMatRow + 1
133 | }
134 |
135 | }
136 |
137 | }
138 |
139 | tempAccuracy = getAccuracy(a,b,validationFeatures,validationLabels)
140 | print(str_c("tempAcc = ", tempAccuracy," and bestAcc = ", bestAccuracy) )
141 | if(tempAccuracy > bestAccuracy){
142 | bestAccuracy = tempAccuracy
143 | best_a = a
144 | best_b = b
145 | best_lambdaIndex = i
146 | }
147 |
148 | }
149 |
150 | getAccuracy(best_a,best_b, testingFeatures, testingLabels)
151 |
152 | colors = c("red","blue","green","black")
153 |
154 | xaxislabel = "Step"
155 |
156 | yaxislabels = c("Accuracy on Randomized Epoch Validation Set","Accuracy on Validation Set")
157 |
158 | title="Accuracy as a Function of Step and Lambda"
159 |
160 | ylims=c(0,1)
161 |
162 | stepValues = seq(1,15000,length=500)
163 |
164 | mats = list(accMat,accMatv)
165 |
166 | for(j in 1:length(mats)){
167 |
168 | mat = mats[[j]]
169 |
170 | for(i in 1:4){
171 |
172 | if(i == 1){
173 | plot(stepValues, mat[1:500,i], type = "l",xlim=c(0, 15000), ylim=ylims,
174 | col=colors[i],xlab=xaxislabel,ylab=yaxislabels[j],main=title)
175 | } else{
176 | lines(stepValues, mat[1:500,i], type = "l",xlim=c(0, 15000), ylim=ylims,
177 | col=colors[i],xlab=xaxislabel,ylab=yaxislabels[j],main=title)
178 | }
179 | Sys.sleep(1)
180 | }
181 | legend(x=10000,y=.5,legend=c("lambda=.001","lambda=.01","lambda=.1","lambda=1"),fill=colors)
182 |
183 | }
184 |
--------------------------------------------------------------------------------
/Chapter 04/Data/delta.csv:
--------------------------------------------------------------------------------
1 | Aircraft,Seat Width (Club),Seat Pitch (Club),Seat (Club),Seat Width (First Class),Seat Pitch (First Class),Seats (First Class),Seat Width (Business),Seat Pitch (Business),Seats (Business),Seat Width (Eco Comfort),Seat Pitch (Eco Comfort),Seats (Eco Comfort),Seat Width (Economy),Seat Pitch (Economy),Seats (Economy),Accommodation,Cruising Speed (mph),Range (miles),Engines,Wingspan (ft),Tail Height (ft),Length (ft),Wifi,Video,Power,Satellite,Flat-bed,Sleeper,Club,First Class,Business,Eco Comfort,Economy
2 | Airbus A319,0,0,0,21,36,12,0,0,0,17.2,34,18,17.2,30.5,96,126,517,2399,2,111.83,38.583,111,1,0,0,0,0,0,0,1,0,1,1
3 | Airbus A319 VIP,19.4,44,12,19.4,40,28,21,59,14,0,0,0,0,0,0,54,517,3119,2,111.83,38.583,111,1,1,0,0,0,0,1,1,1,0,0
4 | Airbus A320,0,0,0,21,36,12,0,0,0,17.2,34,18,17.2,31.5,120,150,517,2420,2,111.83,38.583,123.25,1,0,0,0,0,0,0,1,0,1,1
5 | Airbus A320 32-R,0,0,0,21,36,12,0,0,0,17.2,34,18,17.2,31.5,120,150,517,2420,2,111.83,38.583,123.25,1,0,0,0,0,0,0,1,0,1,1
6 | Airbus A330-200,0,0,0,0,0,0,21,60,32,18,35,30,18,30.5,181,243,531,6536,2,197.83,59.83,188.67,0,1,1,0,1,0,0,0,1,1,1
7 | Airbus A330-200 (3L2),0,0,0,0,0,0,21,80,34,18,35,32,18,30.5,168,243,531,6536,2,197.83,59.83,188.67,0,1,1,0,0,1,0,0,1,1,1
8 | Airbus A330-200 (3L3),0,0,0,0,0,0,21,80,34,18,35,32,18,30.5,227,293,531,5343,2,197.83,56.33,208.83,0,1,1,0,1,0,0,0,1,1,1
9 | Airbus A330-300,0,0,0,0,0,0,20,60,34,18,35,32,18,30.5,232,298,531,5343,2,197.83,56.33,208.83,0,1,1,0,0,1,0,0,1,1,1
10 | Boeing 717,0,0,0,19.6,37,12,0,0,0,18.1,34,15,18.1,31,83,110,504,1510,2,93.33,29.083,120,1,0,1,0,0,0,0,1,0,1,1
11 | Boeing 737-700 (73W),0,0,0,21,37,12,0,0,0,17.2,34,18,17.2,30.5,94,124,517,2925,2,117.416,41.167,110.33,1,1,1,1,0,0,0,1,0,1,1
12 | Boeing 737-800 (738),0,0,0,21,37,16,0,0,0,17.2,34,18,17.2,30.5,126,124,517,2925,2,117.416,41.167,110.33,1,1,1,1,0,0,0,1,0,1,1
13 | Boeing 737-800 (73H),0,0,0,21,38,16,0,0,0,17.2,34,18,17.2,30.5,126,160,517,2850,2,117.416,41.167,129.5,1,1,1,1,0,0,0,1,0,1,1
14 | Boeing 737-900ER (739),0,0,0,21,37,20,0,0,0,17.2,34,21,17.2,30.5,139,180,517,2870,2,117.416,41.167,138.167,1,1,1,0,0,0,0,1,0,1,1
15 | Boeing 747-400 (74S),0,0,0,0,0,0,20.5,82,48,17.2,35,42,17.2,30.5,286,376,564,7365,4,213,62.5416,231.83,0,1,1,0,1,0,0,0,1,1,1
16 | Boeing 757-200 (75A),0,0,0,21,40,24,0,0,0,17.2,35,18,17.2,31,132,174,517,4344,2,134.75,44.5,155.25,0,1,0,0,0,0,0,1,0,1,1
17 | Boeing 757-200 (75E),0,0,0,0,0,0,21,54.5,16,17.2,35,25,17.2,30.5,130,171,517,4705,2,134.75,44.5,155.25,1,1,1,0,0,1,0,0,1,1,1
18 | Boeing 757-200 (75M),0,0,0,21,37,22,0,0,0,17.2,34.5,18,17.2,31,141,181,517,2854,2,134.75,44.5,155.25,1,0,0,0,0,0,0,1,0,1,1
19 | Boeing 757-200 (75N),0,0,0,21,37,22,0,0,0,17.2,34,19,17.2,31.5,141,182,517,2854,2,134.75,44.5,155.25,1,0,0,0,0,0,0,1,0,1,1
20 | Boeing 757-200 (757),0,0,0,21,37.5,24,0,0,0,17.2,34.5,20,17.2,31.5,136,180,517,3393,2,111.83,38.583,123.25,1,1,0,0,0,0,0,1,0,1,1
21 | Boeing 757-200 (75V),0,0,0,21,45,22,0,0,0,17.2,34.5,21,17.2,32,132,175,517,3764,2,134.75,44.5,155.25,1,1,0,0,0,0,0,1,0,1,1
22 | Boeing 757-200 (75X),0,0,0,0,0,0,21,38,26,17.2,34,26,17.2,31.5,132,184,517,3764,2,134.75,44.5,155.25,1,1,1,1,0,0,0,0,1,1,1
23 | Boeing 757-300,0,0,0,21,37,24,0,0,0,17.2,34,23,17.2,31,177,224,517,3228,2,124.83,45.083,177.416,1,1,0,0,0,0,0,1,0,1,1
24 | Boeing 767-300 (76G),0,0,0,0,0,0,18.5,60,30,17.9,35,38,17.9,31.5,140,208,517,6408,2,156.083,52,180.25,0,1,1,0,0,1,0,0,1,1,1
25 | Boeing 767-300 (76L),0,0,0,0,0,0,21,78.95,36,17.9,35,32,17.9,31.5,143,211,517,6408,2,156.083,52,180.25,0,1,1,0,1,0,0,0,1,1,1
26 | Boeing 767-300 (76P),0,0,0,18.5,37.5,30,0,0,0,17.9,34,28,17.9,31.5,203,261,517,3515,2,156.083,52,180.25,1,1,1,1,0,0,0,1,0,1,1
27 | Boeing 767-300 (76Q),0,0,0,18.5,37.5,30,0,0,0,17.9,34,28,17.9,31.5,203,261,517,3515,2,156.083,52,180.25,1,1,1,1,0,0,0,1,0,1,1
28 | Boeing 767-300 (76T),0,0,0,0,0,0,21,78.95,36,17.9,35,29,17.9,31.5,143,208,517,6640,2,156.083,52,180.25,0,1,1,0,1,0,0,0,1,1,1
29 | Boeing 767-300 (76U),0,0,0,18.5,60,36,0,0,0,17.9,35,31,17.9,31.5,143,210,517,6408,2,156.083,52,180.25,0,1,1,0,0,1,0,0,1,1,1
30 | Boeing 767-300 (76Z V.1),0,0,0,0,0,0,21,78.95,26,17.9,35,35,17.9,31.5,171,226,517,6221,2,156.083,52,180.25,0,1,1,0,1,0,0,0,1,1,1
31 | Boeing 767-300 (76Z V.2),0,0,0,0,0,0,21,78.95,26,17.9,35,29,17.9,31.5,171,226,517,6221,2,156.083,52,180.25,0,1,1,0,0,1,0,0,1,1,1
32 | Boeing 767-400 (76D),0,0,0,0,0,0,21,78.95,40,17.9,35,28,17.9,31.5,178,246,517,6336,2,171,54.917,201.33,0,1,1,0,1,0,0,0,1,1,1
33 | Boeing 777-200ER,0,0,0,0,0,0,21,78,45,18.5,36,36,18.5,31.5,188,269,550,8542,2,212.583,61.33,209.083,0,1,1,0,1,0,0,0,1,1,1
34 | Boeing 777-200LR,0,0,0,0,0,0,21,78,45,18.5,35,36,18.5,31.5,188,269,550,10375,2,212.583,61.33,209.083,0,1,1,0,1,0,0,0,1,1,1
35 | CRJ 100/200 Pinnacle/SkyWest,0,0,0,0,0,0,0,0,0,0,0,0,17.3,31,50,50,488,1265,2,69.583,20.416,87.83,0,0,0,0,0,0,0,0,0,0,1
36 | CRJ 100/200 ExpressJet,0,0,0,0,0,0,0,0,0,0,0,0,17.3,31,50,50,488,1265,2,69.583,20.416,87.83,0,0,0,0,0,0,0,0,0,0,1
37 | CRJ 700,0,0,0,19.6,36,9,0,0,0,17.3,34,8,17.3,31,48,65,515,1650,2,76.25,24.83,106.083,1,0,0,0,0,0,0,1,0,1,1
38 | CRJ 900,0,0,0,19.6,37,12,0,0,0,17.3,34,12,17.3,31,52,76,515,1744,2,81.583,24.583,118.916,1,0,0,0,0,0,0,1,0,1,1
39 | E120,0,0,0,0,0,0,0,0,0,0,0,0,18.2,31,28,28,364,920,2,64.916,20.83,65.583,0,0,0,0,0,0,0,0,0,0,1
40 | E170,0,0,0,20,37,9,0,0,0,18.25,34,12,18.25,31,48,69,545,1800,2,85.416,32.33,98.083,1,0,0,0,0,0,0,1,0,1,1
41 | E175,0,0,0,20,37,12,0,0,0,18.25,34,12,18.25,31,52,76,545,1800,2,85.416,31.916,103.916,1,0,0,0,0,0,0,1,0,1,1
42 | ERJ-145,0,0,0,0,0,0,0,0,0,0,0,0,17.3,31,50,50,517,1496,2,65.75,22.167,98,0,0,0,0,0,0,0,0,0,0,1
43 | MD-88,0,0,0,19.6,37,16,0,0,0,18.1,34,15,18.1,32,118,149,498,1510,2,107.83,30.5,147.83,1,0,1,0,0,0,0,1,0,1,1
44 | MD-90,0,0,0,19.6,37,16,0,0,0,18.1,34,15,18.1,31.5,129,160,498,1992,2,107.83,30.583,152.583,1,0,1,0,0,0,0,1,0,1,1
45 | MD-DC9-50,0,0,0,19.6,38,16,0,0,0,18.1,35,14,18.1,31,90,120,504,731,2,93.83,27.5,133.5,1,0,0,0,0,0,0,1,0,1,1
46 |
--------------------------------------------------------------------------------
/Chapter 02/Data/tobit.csv:
--------------------------------------------------------------------------------
1 | id,read,math,prog,apt
2 | 1,34,40,"vocational",352
3 | 2,39,33,"vocational",449
4 | 3,63,48,"general",648
5 | 4,44,41,"general",501
6 | 5,47,43,"general",762
7 | 6,47,46,"general",658
8 | 7,57,59,"general",800
9 | 8,39,52,"general",613
10 | 9,48,52,"vocational",531
11 | 10,47,49,"academic",528
12 | 11,34,45,"general",584
13 | 12,37,45,"vocational",610
14 | 13,47,39,"vocational",586
15 | 14,47,54,"general",769
16 | 15,39,44,"vocational",402
17 | 16,47,44,"vocational",521
18 | 17,47,48,"general",478
19 | 18,50,49,"vocational",629
20 | 19,28,43,"academic",603
21 | 20,60,57,"general",633
22 | 21,44,61,"academic",724
23 | 22,42,39,"vocational",515
24 | 23,65,64,"general",748
25 | 24,52,66,"general",634
26 | 25,47,42,"academic",630
27 | 26,60,62,"general",800
28 | 27,53,61,"general",652
29 | 28,39,54,"academic",621
30 | 29,52,49,"academic",683
31 | 30,41,42,"general",531
32 | 31,55,52,"academic",625
33 | 32,50,66,"vocational",605
34 | 33,57,72,"general",698
35 | 34,73,57,"general",679
36 | 35,60,50,"academic",691
37 | 36,44,44,"academic",612
38 | 37,41,40,"vocational",572
39 | 38,45,50,"general",625
40 | 39,66,67,"general",734
41 | 40,42,43,"academic",551
42 | 41,50,45,"general",549
43 | 42,46,55,"vocational",622
44 | 43,47,43,"general",557
45 | 44,47,45,"vocational",678
46 | 45,34,41,"vocational",467
47 | 46,45,44,"general",631
48 | 47,47,49,"general",625
49 | 48,57,52,"general",584
50 | 49,50,39,"vocational",485
51 | 50,50,42,"academic",568
52 | 51,42,42,"academic",593
53 | 52,50,53,"general",590
54 | 53,34,46,"vocational",529
55 | 54,47,46,"academic",661
56 | 55,52,49,"general",579
57 | 56,55,46,"vocational",502
58 | 57,71,72,"general",794
59 | 58,55,40,"vocational",529
60 | 59,65,63,"general",703
61 | 60,57,51,"general",635
62 | 61,76,60,"general",765
63 | 62,65,48,"academic",732
64 | 63,52,60,"academic",537
65 | 64,50,45,"vocational",648
66 | 65,55,66,"general",667
67 | 66,68,56,"vocational",576
68 | 67,37,42,"vocational",476
69 | 68,73,71,"general",797
70 | 69,44,40,"vocational",548
71 | 70,57,41,"academic",599
72 | 71,57,56,"academic",766
73 | 72,42,47,"vocational",596
74 | 73,50,53,"general",716
75 | 74,57,50,"general",661
76 | 75,60,51,"vocational",548
77 | 76,47,51,"general",595
78 | 77,61,49,"general",689
79 | 78,39,54,"general",577
80 | 79,60,49,"general",633
81 | 80,65,68,"general",713
82 | 81,63,59,"general",668
83 | 82,68,65,"general",800
84 | 83,50,41,"vocational",571
85 | 84,63,54,"academic",636
86 | 85,55,57,"academic",691
87 | 86,44,54,"academic",682
88 | 87,50,46,"academic",605
89 | 88,68,64,"general",618
90 | 89,35,40,"vocational",522
91 | 90,42,50,"general",671
92 | 91,50,56,"vocational",666
93 | 92,52,57,"academic",739
94 | 93,73,62,"general",800
95 | 94,55,61,"general",782
96 | 95,73,71,"general",800
97 | 96,65,61,"general",749
98 | 97,60,58,"general",613
99 | 98,57,51,"vocational",648
100 | 99,47,56,"academic",640
101 | 100,63,71,"general",793
102 | 101,60,67,"general",800
103 | 102,52,51,"general",698
104 | 103,76,64,"general",676
105 | 104,54,57,"general",630
106 | 105,50,45,"general",598
107 | 106,36,37,"vocational",404
108 | 107,47,47,"vocational",629
109 | 108,34,41,"academic",637
110 | 109,42,42,"academic",574
111 | 110,52,50,"vocational",620
112 | 111,39,39,"academic",622
113 | 112,52,48,"general",689
114 | 113,44,51,"general",556
115 | 114,68,62,"general",725
116 | 115,42,43,"academic",571
117 | 116,57,54,"general",681
118 | 117,34,39,"vocational",565
119 | 118,55,58,"academic",629
120 | 119,42,45,"academic",584
121 | 120,63,54,"general",589
122 | 121,68,53,"vocational",788
123 | 122,52,58,"general",779
124 | 123,68,56,"academic",605
125 | 124,42,41,"vocational",614
126 | 125,68,58,"general",768
127 | 126,42,57,"academic",715
128 | 127,63,57,"general",770
129 | 128,39,38,"general",508
130 | 129,44,46,"academic",527
131 | 130,43,55,"academic",685
132 | 131,65,57,"general",649
133 | 132,73,73,"general",800
134 | 133,50,40,"vocational",535
135 | 134,44,39,"academic",474
136 | 135,63,65,"general",696
137 | 136,65,70,"general",792
138 | 137,63,65,"general",800
139 | 138,43,40,"vocational",427
140 | 139,68,61,"general",800
141 | 140,44,40,"vocational",399
142 | 141,63,47,"vocational",566
143 | 142,47,52,"vocational",523
144 | 143,63,75,"vocational",800
145 | 144,60,58,"academic",712
146 | 145,42,38,"vocational",458
147 | 146,55,64,"general",688
148 | 147,47,53,"general",619
149 | 148,42,51,"vocational",565
150 | 149,63,49,"academic",727
151 | 150,42,57,"vocational",554
152 | 151,47,52,"vocational",633
153 | 152,55,56,"general",687
154 | 153,39,40,"vocational",665
155 | 154,65,66,"general",796
156 | 155,44,46,"academic",614
157 | 156,50,53,"general",618
158 | 157,68,58,"academic",733
159 | 158,52,55,"academic",657
160 | 159,55,54,"general",592
161 | 160,55,55,"general",746
162 | 161,57,72,"general",800
163 | 162,57,40,"vocational",702
164 | 163,52,64,"general",800
165 | 164,31,46,"vocational",516
166 | 165,36,54,"vocational",604
167 | 166,52,53,"general",669
168 | 167,63,35,"academic",563
169 | 168,52,57,"general",695
170 | 169,55,63,"academic",779
171 | 170,47,61,"general",712
172 | 171,60,60,"general",678
173 | 172,47,57,"general",618
174 | 173,50,61,"academic",650
175 | 174,68,71,"general",750
176 | 175,36,42,"academic",454
177 | 176,47,41,"general",586
178 | 177,55,62,"general",688
179 | 178,47,57,"vocational",640
180 | 179,47,60,"general",609
181 | 180,71,69,"general",800
182 | 181,50,45,"general",662
183 | 182,44,43,"general",462
184 | 183,63,49,"general",591
185 | 184,50,53,"vocational",496
186 | 185,63,55,"general",647
187 | 186,57,63,"general",681
188 | 187,57,57,"academic",800
189 | 188,63,56,"general",796
190 | 189,47,63,"general",669
191 | 190,47,54,"general",661
192 | 191,47,43,"general",567
193 | 192,65,63,"general",800
194 | 193,44,48,"general",666
195 | 194,63,69,"general",800
196 | 195,57,60,"academic",727
197 | 196,44,49,"general",539
198 | 197,50,50,"general",594
199 | 198,47,51,"general",616
200 | 199,52,50,"general",558
201 | 200,68,75,"general",800
202 |
--------------------------------------------------------------------------------
/Chapter 12/Case Study 1 - World Bank data Analysis.r:
--------------------------------------------------------------------------------
1 | R version 3.3.2 (2016-10-31) -- "Sincere Pumpkin Patch"
2 | Copyright (C) 2016 The R Foundation for Statistical Computing
3 | Platform: x86_64-w64-mingw32/x64 (64-bit)
4 |
5 | R is free software and comes with ABSOLUTELY NO WARRANTY.
6 | You are welcome to redistribute it under certain conditions.
7 | Type 'license()' or 'licence()' for distribution details.
8 |
9 | R is a collaborative project with many contributors.
10 | Type 'contributors()' for more information and
11 | 'citation()' on how to cite R or R packages in publications.
12 |
13 | Type 'demo()' for some demos, 'help()' for on-line help, or
14 | 'help.start()' for an HTML browser interface to help.
15 | Type 'q()' to quit R.
16 |
17 | [Workspace loaded from ~/.RData]
18 |
19 | install.packages("wbstats")
20 | install.packages("data.table")
21 | install.packages("googleVis")
22 |
23 | library(wbstats)
24 | library(data.table)
25 | library(googleVis)
26 |
27 |
28 | Pop_LifeExp_FertRt <- data.table(wb(indicator = c("SP.POP.TOTL", "SP.DYN.LE00.IN", "SP.DYN.TFRT.IN"), startdate = 1960, enddate = 2016))
29 |
30 | Pop_GDPUSD_HeadCnt <- data.table(wb(indicator = c("SP.POP.TOTL", "NY.GDP.MKTP.CD", "SI.POV.2DAY"), startdate = 1960, enddate = 2016))
31 |
32 | Pop_GDPUSD_Sanitation <- data.table(wb(indicator = c("SP.POP.TOTL", "NY.GDP.MKTP.CD", "SH.STA.ACSN"), startdate = 1960, enddate = 2016))
33 |
34 | GDPUSD_Electricity_CO2 <- data.table(wb(indicator = c("NY.GDP.MKTP.CD", "EG.ELC.ACCS.ZS", "EN.ATM.CO2E.KT"), startdate = 1960, enddate = 2016))
35 |
36 | dim(Pop_LifeExp_FertRt)
37 |
38 | dim(Pop_GDPUSD_HeadCnt)
39 |
40 | dim(Pop_GDPUSD_Sanitation)
41 |
42 | dim(GDPUSD_Electricity_CO2)
43 |
44 | str(Pop_LifeExp_FertRt)
45 |
46 | str(Pop_GDPUSD_HeadCnt)
47 |
48 | str(Pop_GDPUSD_Sanitation)
49 |
50 | str(GDPUSD_Electricity_CO2)
51 |
52 | head(Pop_LifeExp_FertRt)
53 |
54 | head(Pop_GDPUSD_HeadCnt)
55 |
56 |
57 | head(Pop_GDPUSD_Sanitation)
58 |
59 |
60 |
61 | head(GDPUSD_Electricity_CO2)
62 |
63 | dim(wb(indicator = "SP.POP.TOTL"))
64 |
65 | dim(wb(indicator = "SP.DYN.LE00.IN"))
66 |
67 | dim(wb(indicator = "SP.DYN.TFRT.IN"))
68 |
69 | dim(wb(indicator = "NY.GDP.MKTP.CD"))
70 |
71 | dim(wb(indicator = "SI.POV.2DAY"))
72 |
73 | dim(wb(indicator = "SH.STA.ACSN"))
74 |
75 | dim(wb(indicator = "EG.ELC.ACCS.ZS"))
76 |
77 | dim(wb(indicator = "EN.ATM.CO2E.KT"))
78 |
79 | Countries <- data.table(wbcountries())
80 |
81 | head(Countries)
82 |
83 | setkey(Pop_LifeExp_FertRt, iso2c)
84 |
85 | setkey(Pop_GDPUSD_HeadCnt, iso2c)
86 |
87 | setkey(Pop_GDPUSD_Sanitation, iso2c)
88 |
89 | setkey(GDPUSD_Electricity_CO2, iso2c)
90 |
91 |
92 | setkey(Countries, iso2c)
93 |
94 | head(setkey(Countries, iso2c))
95 |
96 | Pop_LifeExp_FertRt <- Countries[Pop_LifeExp_FertRt][ ! region %in% "Aggregates"]
97 |
98 |
99 | head(Pop_LifeExp_FertRt)
100 |
101 | Pop_GDPUSD_HeadCnt <- Countries[Pop_GDPUSD_HeadCnt][ ! region %in% "Aggregates"]
102 |
103 | Pop_GDPUSD_Sanitation <- Countries[Pop_GDPUSD_Sanitation][ ! region %in% "Aggregates"]
104 |
105 | GDPUSD_Electricity_CO2 <- Countries[GDPUSD_Electricity_CO2][ ! region %in% "Aggregates"]
106 |
107 |
108 | wPop_LifeExp_FertRt <- reshape(Pop_LifeExp_FertRt[, list(country, region, date, value, indicator)], v.names = "value", idvar=c("date", "country", "region"), timevar="indicator", direction = "wide")
109 |
110 | wPop_GDPUSD_HeadCnt <- reshape(Pop_GDPUSD_HeadCnt[, list(country, region, date, value, indicator)], v.names = "value", idvar=c("date", "country", "region"), timevar="indicator", direction = "wide")
111 |
112 | wPop_GDPUSD_Sanitation <- reshape(Pop_GDPUSD_Sanitation[, list(country, region, date, value, indicator)], v.names = "value", idvar=c("date", "country", "region"), timevar="indicator", direction = "wide")
113 |
114 | wGDPUSD_Electricity_CO2 <- reshape(GDPUSD_Electricity_CO2[, list(country, region, date, value, indicator)], v.names = "value", idvar=c("date", "country", "region"), timevar="indicator", direction = "wide")
115 |
116 | wPop_LifeExp_FertRt
117 |
118 | wGDPUSD_Electricity_CO2
119 |
120 | wPop_LifeExp_FertRt[, date := as.integer(date)]
121 |
122 | wPop_GDPUSD_HeadCnt[, date := as.integer(date)]
123 |
124 | wPop_GDPUSD_Sanitation[, date := as.integer(date)]
125 |
126 | wGDPUSD_Electricity_CO2[, date := as.integer(date)]
127 |
128 |
129 | setnames(wPop_LifeExp_FertRt, names(wPop_LifeExp_FertRt), c("Country", "Region", "Year", "Population", "Fertility", "LifeExpectancy"))
130 |
131 | setnames(wPop_GDPUSD_HeadCnt, names(wPop_GDPUSD_HeadCnt), c("Country", "Region", "Year", "Population", "GDPUSD", "PovertyHead"))
132 |
133 | setnames(wPop_GDPUSD_Sanitation, names(wPop_GDPUSD_Sanitation), c("Country", "Region", "Year", "Population", "GDPUSD", "SanitationAccess"))
134 |
135 | setnames(wGDPUSD_Electricity_CO2, names(wGDPUSD_Electricity_CO2), c("Country", "Region", "Year", "GDPUSD", "ElectricityConsumption", "CO2Emissions"))
136 |
137 | pltPop_LifeExp_FertRt <- gvisMotionChart(wPop_LifeExp_FertRt, idvar = "Country", timevar = "Year", xvar = "LifeExpectancy", yvar = "Fertility", sizevar = "Population", colorvar = "Region")
138 |
139 | plot(pltPop_LifeExp_FertRt)
140 |
141 | pltPop_GDPUSD_HeadCnt <- gvisMotionChart(wPop_GDPUSD_HeadCnt, idvar = "Country", timevar = "Year", xvar = "GDPUSD", yvar = "PovertyHead", sizevar = "Population", colorvar = "Region")
142 |
143 | plot(pltPop_GDPUSD_HeadCnt)
144 |
145 | pltPop_GDPUSD_Sanitation <- gvisMotionChart(wPop_GDPUSD_Sanitation, idvar = "Country", timevar = "Year", xvar = "GDPUSD", yvar = "SanitationAccess", sizevar = "Population", colorvar = "Region")
146 |
147 | plot(pltPop_GDPUSD_Sanitation)
148 |
149 | pltGDPUSD_Electricity_CO2 <- gvisMotionChart(wGDPUSD_Electricity_CO2, idvar = "Country", timevar = "Year", xvar = "GDPUSD", yvar = "ElectricityAccess", sizevar = "CO2Emissions", colorvar = "Region")
150 |
151 | plot(pltGDPUSD_Electricity_CO2)
152 |
--------------------------------------------------------------------------------
/Chapter 03/3Hierarchical Clustering - Gene Clustering.r:
--------------------------------------------------------------------------------
1 | install.packages(c("RColorBrewer", "cluster", "pvclust", "xtable", "plyr"))
2 | library(RColorBrewer)
3 | library(cluster)
4 | library(pvclust)
5 | library(xtable)
6 | library(plyr)
7 |
8 |
9 | GSE4051_data <- read.csv("d:/GSE4051_data.csv", header = TRUE)
10 | str(GSE4051_data, max.level = 0)
11 | 'data.frame': 29949 obs. of 39 variables:
12 |
13 |
14 | GSE4051_design <- read.csv("d:/GSE4051_design.csv", header = TRUE)
15 | str(GSE4051_design)
16 | 'data.frame': 39 obs. of 4 variables:
17 | $ sidChar : Factor w/ 39 levels "Sample_1","Sample_10",..: 13 14 15 16 8 9 36 17 18 19 ...
18 | $ sidNum : int 20 21 22 23 16 17 6 24 25 26 ...
19 | $ devStage: Factor w/ 5 levels "4_weeks","E16",..: 2 2 2 2 2 2 2 4 4 4 ...
20 | $ gType : Factor w/ 2 levels "NrlKO","wt": 2 2 2 2 1 1 1 2 2 2 ...
21 |
22 |
23 | trans_GSE4051_data <- t(scale(t(GSE4051_data)))
24 | str(trans_GSE4051_data, max.level = 0, give.attr = FALSE)
25 | num [1:29949, 1:39] 0.0838 0.1758 0.7797 -0.3196 0.8358 ...
26 |
27 |
28 | round(data.frame(avgBefore = rowMeans(head(GSE4051_data)),
29 | avgAfter = rowMeans(head(trans_GSE4051_data)),
30 | varBefore = apply(head(GSE4051_data), 1, var),
31 | varAfter = apply(head(trans_GSE4051_data), 1, var)), 2)
32 | avgBefore avgAfter varBefore varAfter
33 | 1 7.22 0 0.02 1
34 | 2 9.37 0 0.35 1
35 | 3 9.70 0 0.15 1
36 | 4 8.42 0 0.03 1
37 | 5 8.47 0 0.02 1
38 | 6 9.67 0 0.03 1
39 |
40 |
41 | pair_dist_GSE4051_data <- dist(t(trans_GSE4051_data), method = 'euclidean')
42 |
43 |
44 | GSE4051_design$group <- with(GSE4051_design, interaction(gType, devStage))
45 | summary(GSE4051_design$group)
46 | NrlKO.4_weeks wt.4_weeks NrlKO.E16 wt.E16 NrlKO.P10 wt.P10 NrlKO.P2 wt.P2 NrlKO.P6
47 | 4 4 3 4 4 4 4 4 4
48 | wt.P6
49 | 4
50 |
51 |
52 | pr.hc.single <- hclust(pair_dist_GSE4051_data, method = 'single')
53 | pr.hc.single
54 | Call:
55 | hclust(d = pr.dis, method = "single")
56 |
57 | Cluster method : single
58 | Distance : euclidean
59 | Number of objects: 39
60 |
61 |
62 | pr.hc.complete <- hclust(pair_dist_GSE4051_data, method = 'complete')
63 | pr.hc.complete
64 |
65 | Call:
66 | hclust(d = pr.dis, method = "complete")
67 |
68 | Cluster method : complete
69 |
70 |
71 | pr.hc.average <- hclust(pair_dist_GSE4051_data, method = 'average')
72 | pr.hc.average
73 |
74 | Call:
75 | hclust(d = pr.dis, method = "average")
76 |
77 | Cluster method : average
78 | Distance : euclidean
79 | Number of objects: 39
80 |
81 |
82 | pr.hc.ward <- hclust(pair_dist_GSE4051_data, method = 'ward.D2')
83 | pr.hc.ward
84 |
85 | Call:
86 | hclust(d = pr.dis, method = "ward.D2")
87 |
88 | Cluster method : ward.D2
89 | Distance : euclidean
90 | Number of objects: 39
91 |
92 |
93 | Distance : euclidean
94 | Number of objects: 39
95 |
96 |
97 | op <- par(mar = c(0,4,4,2), mfrow = c(2,2))
98 |
99 |
100 | plot(pr.hc.single, labels = FALSE, main = "Single Linkage Representation", xlab = "")
101 |
102 | plot(pr.hc.complete, labels = FALSE, main = "Complete Linkage Representation", xlab = "")
103 |
104 | plot(pr.hc.average, labels = FALSE, main = "Average Linkage Representation", xlab = "")
105 |
106 | plot(pr.hc.ward, labels = FALSE, main = "Ward Linkage Representation", xlab = "")
107 |
108 | par(op)
109 |
110 | op <- par(mar = c(1,4,4,1))
111 |
112 | plot(pr.hc.single, labels = GSE4051_design$group, cex = 0.6, main = "Single Hierarchical Cluster - 10 clusters")
113 | rect.hclust(pr.hc.single, k = 10)
114 | par(op)
115 | jGraysFun <- colorRampPalette(brewer.pal(n = 9, "Blues"))
116 | gTypeCols <- brewer.pal(9, "Spectral")[c(4,7)]
117 | heatmap(as.matrix(trans_GSE4051_data), Rowv = NA, col = jGraysFun(256), hclustfun = function(x) hclust(x, method = 'single'),
118 | scale = "none", labCol = GSE4051_design $group, labRow = NA, margins = c(8,1),
119 | ColSideColor = gTypeCols[unclass(GSE4051_design$gType)])
120 | legend("topright", legend = levels(GSE4051_design$gType), col = gTypeCols, lty = 1, lwd = 5, cex = 0.5)
121 |
122 |
123 | plot(pr.hc.complete, labels = GSE4051_design$group, cex = 0.6, main = "Complete Hierarchical Cluster - 10 clusters")
124 | rect.hclust(pr.hc.complete, k = 10)
125 | par(op)
126 | jGraysFun <- colorRampPalette(brewer.pal(n = 9, "Greens"))
127 | gTypeCols <- brewer.pal(11, "PRGn")[c(4,7)]
128 | heatmap(as.matrix(trans_GSE4051_data), Rowv = NA, col = jGraysFun(256), hclustfun = function(x) hclust(x, method = 'complete'),
129 | scale = "none", labCol = GSE4051_design$group, labRow = NA, margins = c(8,1),
130 | ColSideColor = gTypeCols[unclass(GSE4051_design$gType)])
131 | legend("topright", legend = levels(GSE4051_design$gType), col = gTypeCols, lty = 1, lwd = 5, cex = 0.5)
132 |
133 |
134 | plot(pr.hc.average, labels = GSE4051_design$group, cex = 0.6, main = "Average Hierarchical Cluster - 10 clusters")
135 | rect.hclust(pr.hc.average, k = 10)
136 | jGraysFun <- colorRampPalette(brewer.pal(n = 9, "Oranges"))
137 | gTypeCols <- brewer.pal(9, "Oranges")[c(4,7)]
138 | heatmap(as.matrix(trans_GSE4051_data), Rowv = NA, col = jGraysFun(256), hclustfun = function(x) hclust(x, method = 'average'),
139 | scale = "none", labCol = GSE4051_design$group, labRow = NA, margins = c(8,1),
140 | ColSideColor = gTypeCols[unclass(GSE4051_design$gType)])
141 | legend("topright", legend = levels(GSE4051_design$gType), col = gTypeCols, lty = 1, lwd = 5, cex = 0.5)
142 |
143 |
144 | plot(pr.hc.ward, labels = GSE4051_design$group, cex = 0.6, main = "Ward Hierarchical Cluster - 10 clusters")
145 | rect.hclust(pr.hc.ward, k = 10)
146 | jGraysFun <- colorRampPalette(brewer.pal(n = 9, "Reds"))
147 | gTypeCols <- brewer.pal(9, "Reds")[c(4,7)]
148 | heatmap(as.matrix(trans_GSE4051_data), Rowv = NA, col = jGraysFun(256), hclustfun = function(x) hclust(x, method = 'ward.D2'),
149 | scale = "none", labCol = GSE4051_design$group, labRow = NA, margins = c(8,1),
150 | ColSideColor = gTypeCols[unclass(GSE4051_design$gType)])
151 | legend("topright", legend = levels(GSE4051_design$gType), col = gTypeCols, lty = 1, lwd = 5, cex = 0.5)
152 |
153 |
154 |
155 |
--------------------------------------------------------------------------------
/Chapter 03/1Hierarchical Clustering - World Bank.r:
--------------------------------------------------------------------------------
1 | R version 3.2.3 (2015-12-10) -- "Wooden Christmas-Tree"
2 | Copyright (C) 2015 The R Foundation for Statistical Computing
3 | Platform: x86_64-w64-mingw32/x64 (64-bit)
4 |
5 | R is free software and comes with ABSOLUTELY NO WARRANTY.
6 | You are welcome to redistribute it under certain conditions.
7 | Type 'license()' or 'licence()' for distribution details.
8 |
9 | R is a collaborative project with many contributors.
10 | Type 'contributors()' for more information and
11 | 'citation()' on how to cite R or R packages in publications.
12 |
13 | Type 'demo()' for some demos, 'help()' for on-line help, or
14 | 'help.start()' for an HTML browser interface to help.
15 | Type 'q()' to quit R.
16 |
17 | [Workspace loaded from ~/.RData]
18 |
19 |
20 | wbclust=read.csv("d:/WBClust2013.csv",header=T)
21 | head(wbclust)
22 |
23 | wbnorm<-scale(wbclust[,2:13])
24 | wbnorm
25 |
26 | rownames(wbnorm)=wbclust[,1]
27 | rownames(wbnorm)
28 |
29 | dist1<-dist(wbnorm, method="euclidean")
30 |
31 | clust1<-hclust(dist1,method="ward.D")
32 | clust1
33 |
34 | plot(clust1,labels= wbclust$Country, cex=0.7, xlab="",ylab="Distance",main="Clustering for 80 Most Populous Countries")
35 |
36 |
37 | rect.hclust(clust1,k=5)
38 |
39 | cuts=cutree(clust1,k=5)
40 |
41 | cuts
42 | China India United States Indonesia
43 | 1 2 1 2
44 | Brazil Pakistan Nigeria Bangladesh
45 | 2 3 4 3
46 | Russian Federation Japan Mexico Philippines
47 | 1 1 2 2
48 | Ethiopia Vietnam Egypt, Arab Rep. Germany
49 | 4 5 2 5
50 | Turkey Thailand France United Kingdom
51 | 1 5 1 1
52 | Italy South Africa Korea, Rep. Tanzania
53 | 1 1 5 4
54 | Colombia Spain Ukraine Kenya
55 | 2 1 5 4
56 | Algeria Sudan Canada Iraq
57 | 2 3 1 2
58 | Morocco Peru Uzbekistan Malaysia
59 | 2 2 2 5
60 | Saudi Arabia Nepal Ghana Mozambique
61 | 1 3 3 4
62 | Australia Cameroon Angola Sri Lanka
63 | 1 4 4 3
64 | Cote d'Ivoire Chile Kazakhstan Netherlands
65 | 4 1 1 5
66 | Ecuador Guatemala Cambodia Zambia
67 | 2 2 3 4
68 | Zimbabwe Senegal Belgium Greece
69 | 3 3 5 1
70 | Tunisia Bolivia Czech Republic Portugal
71 | 5 2 5 1
72 | Dominican Republic Benin Haiti Hungary
73 | 2 3 3 5
74 | Sweden Belarus Azerbaijan United Arab Emirates
75 | 5 5 2 5
76 | Austria Tajikistan Honduras Switzerland
77 | 5 3 3 5
78 | Israel Bulgaria Serbia Togo
79 | 1 5 1 4
80 | Paraguay Jordan El Salvador Nicaragua
81 | 3 5 2 3
82 |
83 | for (i in 1:5){
84 | print(paste("Countries in Cluster ",i))
85 | print(wbclust$Country[cuts==i])
86 | print (" ")
87 | }
88 | [1] "Countries in Cluster 1"
89 | [1] China United States Russian Federation Japan
90 | [5] Turkey France United Kingdom Italy
91 | [9] South Africa Spain Canada Saudi Arabia
92 | [13] Australia Chile Kazakhstan Greece
93 | [17] Portugal Israel Serbia
94 | 80 Levels: Algeria Angola Australia Austria Azerbaijan Bangladesh Belarus ... Zimbabwe
95 | [1] " "
96 | [1] "Countries in Cluster 2"
97 | [1] India Indonesia Brazil Mexico
98 | [5] Philippines Egypt, Arab Rep. Colombia Algeria
99 | [9] Iraq Morocco Peru Uzbekistan
100 | [13] Ecuador Guatemala Bolivia Dominican Republic
101 | [17] Azerbaijan El Salvador
102 | 80 Levels: Algeria Angola Australia Austria Azerbaijan Bangladesh Belarus ... Zimbabwe
103 | [1] " "
104 | [1] "Countries in Cluster 3"
105 | [1] Pakistan Bangladesh Sudan Nepal Ghana Sri Lanka Cambodia
106 | [8] Zimbabwe Senegal Benin Haiti Tajikistan Honduras Paraguay
107 | [15] Nicaragua
108 | 80 Levels: Algeria Angola Australia Austria Azerbaijan Bangladesh Belarus ... Zimbabwe
109 | [1] " "
110 | [1] "Countries in Cluster 4"
111 | [1] Nigeria Ethiopia Tanzania Kenya Mozambique
112 | [6] Cameroon Angola Cote d'Ivoire Zambia Togo
113 | 80 Levels: Algeria Angola Australia Austria Azerbaijan Bangladesh Belarus ... Zimbabwe
114 | [1] " "
115 | [1] "Countries in Cluster 5"
116 | [1] Vietnam Germany Thailand
117 | [4] Korea, Rep. Ukraine Malaysia
118 | [7] Netherlands Belgium Tunisia
119 | [10] Czech Republic Hungary Sweden
120 | [13] Belarus United Arab Emirates Austria
121 | [16] Switzerland Bulgaria Jordan
122 | 80 Levels: Algeria Angola Australia Austria Azerbaijan Bangladesh Belarus ... Zimbabwe
123 | [1] " "
--------------------------------------------------------------------------------
/Chapter 04/1Shrinkage Methods - Calories burnt per day.r:
--------------------------------------------------------------------------------
1 | R version 3.2.3 (2015-12-10) -- "Wooden Christmas-Tree"
2 | Copyright (C) 2015 The R Foundation for Statistical Computing
3 | Platform: x86_64-w64-mingw32/x64 (64-bit)
4 |
5 | R is free software and comes with ABSOLUTELY NO WARRANTY.
6 | You are welcome to redistribute it under certain conditions.
7 | Type 'license()' or 'licence()' for distribution details.
8 |
9 | R is a collaborative project with many contributors.
10 | Type 'contributors()' for more information and
11 | 'citation()' on how to cite R or R packages in publications.
12 |
13 | Type 'demo()' for some demos, 'help()' for on-line help, or
14 | 'help.start()' for an HTML browser interface to help.
15 | Type 'q()' to quit R.
16 |
17 | [Workspace loaded from ~/.RData]
18 | install.packages("glmnet")
19 | install.packages("dplyr")
20 | install.packages("tidyr")
21 | install.packages("ggplot2")
22 | install.packages("caret")
23 | install.packages("glmnet")
24 | install.packages("boot")
25 | install.packages("RColorBrewer")
26 | install.packages("glmnet")
27 | install.packages("Metrics")
28 |
29 | library(dplyr)
30 | library(tidyr)
31 | library(ggplot2)
32 | library(caret)
33 | library(glmnet)
34 | library(boot)
35 | library(RColorBrewer)
36 | library(glmnet)
37 | library(Metrics)
38 |
39 | fitbit_details <- read.csv("https://raw.githubusercontent.com/ellisp/ellisp.github.io/source/data/fitbit_export_20160806.csv",
40 | skip = 1, stringsAsFactors = FALSE) %>%
41 | mutate(
42 | Calories.Burned = as.numeric(gsub(",", "", Calories.Burned)),
43 | Steps = as.numeric(gsub(",", "", Steps)),
44 | Activity.Calories = as.numeric(gsub(",", "", Activity.Calories)),
45 | Date = as.Date(Date, format = "%d/%m/%Y")
46 | )
47 |
48 | fitbit <- fitbit_details
49 |
50 | head(fitbit)
51 |
52 |
53 |
54 | fitbit$Activity.Calories <- NULL
55 |
56 | fitbit$Date <- NULL
57 |
58 | fitbit$Steps <- fitbit$Steps / 1000
59 |
60 | fitbit$Steps
61 |
62 | panel_correlations <- function(x, y, digits = 2, prefix = "", cex.cor, ...){
63 | usr <- par("usr"); on.exit(par(usr))
64 | par(usr = c(0, 1, 0, 1))
65 | r <- abs(cor(x, y))
66 | txt <- format(c(r, 0.123456789), digits = digits)[1]
67 | txt <- paste0(prefix, txt)
68 | if(missing(cex.cor)) cex.cor <- 0.8/strwidth(txt)
69 | text(0.5, 0.5, txt, cex = cex.cor * r)
70 | }
71 |
72 | pairs(fitbit[ , -1], lower.panel = panel_correlations, main = "Pairwise Relationship - Fitbit's Measured Activities")
73 |
74 | ggplot(fitbit, aes(x = Distance / Steps)) + geom_rug() + geom_density() +ggtitle("Stride Length Reverse- Engineered from Fitbit Data", subtitle = "Not all strides identical, due to rounding or other jitter")
75 |
76 | moderate <- lm(Calories.Burned ~ Steps, data = fitbit)
77 |
78 | moderate
79 |
80 |
81 | round(coef(moderate))
82 |
83 | plot(moderate, which = 1, bty = "l", main ="Predicted Calories compared with Residuals")
84 |
85 | pacf(resid(moderate), main = "Partial Autocorrelation of residuals from single variable regression")
86 | grid()
87 |
88 | X <- as.matrix(fitbit[ , -1])
89 |
90 | head(X)
91 |
92 | Y <- fitbit$Calories.Burned
93 |
94 | Y
95 |
96 |
97 | set.seed(123)
98 |
99 |
100 | alphas <- seq(from = 0, to = 1, length.out = 10)
101 |
102 |
103 | res <- matrix(0, nrow = length(alphas), ncol = 6)
104 |
105 |
106 | for(i in 1:length(alphas)){
107 | for(j in 2:6){
108 | cvmod <- cv.glmnet(X, Y, alpha = alphas[i])
109 | res[i, c(1, j)] <- c(alphas[i], sqrt(min(cvmod$cvm)))
110 | }
111 | }
112 |
113 | res <- data.frame(res)
114 |
115 | res
116 |
117 | res$average_rmse <- apply(res[ , 2:6], 1, mean)
118 |
119 | res$average_rmse
120 | [1] 109.2894 108.6945 108.4060 109.9822 111.0122 108.6028 106.0425 108.7103 109.5337 109.7035
121 |
122 |
123 |
124 | res <- res[order(res$average_rmse), ]
125 |
126 | res
127 |
128 |
129 | names(res)[1] <- "alpha"
130 |
131 | res %>%
132 | select(-average_rmse) %>%
133 | gather(trial, rmse, -alpha) %>%
134 | ggplot(aes(x = alpha, y = rmse)) +
135 | geom_point() +
136 | geom_smooth(se = FALSE) +
137 | labs(y = "Root Mean Square Error") +
138 | ggtitle("Cross Validation best RMSE for differing values of alpha")
139 |
140 |
141 |
142 | bestalpha <- res[1, 1]
143 |
144 | bestalpha
145 |
146 | crossvalidated <- cv.glmnet(X, Y, alpha = bestalpha)
147 |
148 |
149 | moderate1 <- glmnet(X, Y, alpha = bestalpha)
150 |
151 |
152 | OLSmodel <- lm(Calories.Burned ~ ., data = fitbit)
153 |
154 | OLSmodel
155 |
156 |
157 | coeffs <- data.frame(original = coef(OLSmodel),
158 | shrunk = as.vector(coef(moderate1, s = crossvalidated$lambda.min)),
159 | very.shrunk = as.vector(coef(moderate1, s = crossvalidated$lambda.1se)))
160 |
161 | coeffs
162 |
163 |
164 | round(coeffs, 3)
165 |
166 | moderate2 <- glmnet(X, Y, lambda = 0)
167 |
168 |
169 | moderate2
170 |
171 |
172 | round(data.frame("elastic, lambda = 0" = as.vector(coef(moderate2)), "lm" = coef(OLSmodel), check.names = FALSE), 3)
173 |
174 |
175 | moderate3 <- glmnet(X[ , -2], Y, lambda = 0)
176 |
177 | moderate3
178 |
179 |
180 | moderate4 <- lm(Y ~ X[ , -2])
181 |
182 | moderate4
183 |
184 |
185 | round(data.frame("elastic, lambda = 0" = as.vector(coef(moderate3)), "lm" = coef(moderate4), check.names = FALSE), 3)
186 |
187 |
188 | > modellingfucn1 <- function(data, i){
189 | X <- as.matrix(data[i , -1])
190 | Y <- data[i , 1]
191 | # k-fold cross-validation for glmnet
192 | crossvalidated <- cv.glmnet(X, Y, alpha = 1, nfolds = 30)
193 | # Fitting a generalized linear model via penalized maximum likelihood
194 | moderate1 <- glmnet(X, Y, alpha = 1)
195 | # Computing the root mean squared error
196 | rmse(predict(moderate1, newx = as.matrix(data[ , -1]), s = crossvalidated$lambda.min), data[ , 1])
197 | }
198 |
199 |
200 |
201 | elastic_boot <- boot(fitbit, statistic = modellingfucn1, R = 99)
202 |
203 |
204 | elastic_boot
205 |
206 |
207 |
208 | modellingfucn2 <- function(data, i){
209 | OLSmodel <- lm(Calories.Burned ~ ., data = data[i, ])
210 | rmse(predict(OLSmodel, newdata = data), data[ , 1])
211 | }
212 |
213 | lm_boot <- boot(fitbit, statistic = modellingfucn2, R = 99)
214 |
215 | lm_boot
216 |
217 |
218 |
219 | modellingOLS <- function(data, i){
220 | mod0 <- lm(Calories.Burned ~ Steps, data = data[i, ])
221 | rmse(predict(moderate, newdata = data), data[ , 1])
222 | }
223 |
224 |
225 | lmOLS_boot <- boot(fitbit, statistic = modellingOLS, R = 99)
226 |
227 | lmOLS_boot
228 |
229 |
230 |
231 | round(c("elastic modelling" = mean(elastic_boot$t),
232 | "OLS modelling" = mean(lm_boot$t),
233 | "OLS modelling, only one explanatory variable" = mean(lmOLS_boot$t)), 1)
234 | elastic modelling OLS modelling
235 | 95.8 99.7
236 | OLS modelling, only one explanatory variable
237 | 159.7
238 |
239 |
240 |
241 |
242 | ordering <- c(7,5,6,2,1,3,4)
243 | par(mar = c(5.1, 4.1, 6.5, 1), bg = "grey90")
244 |
245 | model_scaled <- glmnet(scale(X), Y, alpha = bestalpha)
246 |
247 | the_palette <- brewer.pal(7, "Set1")
248 |
249 |
250 | plot(model_scaled, xvar = "dev", label = TRUE, col = the_palette, lwd = 2, main = "Increasing contribution of different explanatory variables\nas penalty for including them is relaxed")
251 |
252 | legend("topleft", legend = colnames(X)[ordering], text.col = the_palette[ordering], lwd = 2, bty = "n", col = the_palette[ordering])
253 |
254 |
--------------------------------------------------------------------------------
/Chapter 10/Data/FRED-WIUR.csv:
--------------------------------------------------------------------------------
1 | DATE,VALUE
2 | 2013-04-01,6.8
3 | 2013-03-01,6.9
4 | 2013-02-01,6.9
5 | 2013-01-01,6.9
6 | 2012-12-01,6.9
7 | 2012-11-01,6.9
8 | 2012-10-01,6.9
9 | 2012-09-01,7.0
10 | 2012-08-01,7.0
11 | 2012-07-01,7.1
12 | 2012-06-01,7.1
13 | 2012-05-01,7.1
14 | 2012-04-01,7.1
15 | 2012-03-01,7.1
16 | 2012-02-01,7.1
17 | 2012-01-01,7.1
18 | 2011-12-01,7.2
19 | 2011-11-01,7.4
20 | 2011-10-01,7.5
21 | 2011-09-01,7.6
22 | 2011-08-01,7.7
23 | 2011-07-01,7.8
24 | 2011-06-01,7.8
25 | 2011-05-01,7.8
26 | 2011-04-01,7.9
27 | 2011-03-01,7.9
28 | 2011-02-01,7.9
29 | 2011-01-01,8.0
30 | 2010-12-01,8.1
31 | 2010-11-01,8.2
32 | 2010-10-01,8.3
33 | 2010-09-01,8.3
34 | 2010-08-01,8.4
35 | 2010-07-01,8.4
36 | 2010-06-01,8.5
37 | 2010-05-01,8.7
38 | 2010-04-01,8.9
39 | 2010-03-01,9.0
40 | 2010-02-01,9.1
41 | 2010-01-01,9.2
42 | 2009-12-01,9.2
43 | 2009-11-01,9.1
44 | 2009-10-01,9.1
45 | 2009-09-01,9.0
46 | 2009-08-01,9.0
47 | 2009-07-01,9.0
48 | 2009-06-01,8.9
49 | 2009-05-01,8.8
50 | 2009-04-01,8.6
51 | 2009-03-01,8.2
52 | 2009-02-01,7.7
53 | 2009-01-01,7.2
54 | 2008-12-01,6.6
55 | 2008-11-01,6.0
56 | 2008-10-01,5.6
57 | 2008-09-01,5.2
58 | 2008-08-01,5.0
59 | 2008-07-01,4.8
60 | 2008-06-01,4.6
61 | 2008-05-01,4.5
62 | 2008-04-01,4.4
63 | 2008-03-01,4.4
64 | 2008-02-01,4.5
65 | 2008-01-01,4.6
66 | 2007-12-01,4.8
67 | 2007-11-01,4.9
68 | 2007-10-01,5.0
69 | 2007-09-01,5.0
70 | 2007-08-01,5.1
71 | 2007-07-01,5.0
72 | 2007-06-01,5.0
73 | 2007-05-01,4.9
74 | 2007-04-01,4.9
75 | 2007-03-01,4.9
76 | 2007-02-01,4.9
77 | 2007-01-01,4.9
78 | 2006-12-01,4.9
79 | 2006-11-01,4.9
80 | 2006-10-01,4.8
81 | 2006-09-01,4.8
82 | 2006-08-01,4.8
83 | 2006-07-01,4.7
84 | 2006-06-01,4.7
85 | 2006-05-01,4.7
86 | 2006-04-01,4.7
87 | 2006-03-01,4.7
88 | 2006-02-01,4.7
89 | 2006-01-01,4.7
90 | 2005-12-01,4.8
91 | 2005-11-01,4.8
92 | 2005-10-01,4.8
93 | 2005-09-01,4.8
94 | 2005-08-01,4.7
95 | 2005-07-01,4.7
96 | 2005-06-01,4.6
97 | 2005-05-01,4.6
98 | 2005-04-01,4.6
99 | 2005-03-01,4.6
100 | 2005-02-01,4.7
101 | 2005-01-01,4.7
102 | 2004-12-01,4.8
103 | 2004-11-01,4.8
104 | 2004-10-01,4.8
105 | 2004-09-01,4.9
106 | 2004-08-01,4.9
107 | 2004-07-01,5.0
108 | 2004-06-01,5.0
109 | 2004-05-01,5.1
110 | 2004-04-01,5.1
111 | 2004-03-01,5.2
112 | 2004-02-01,5.3
113 | 2004-01-01,5.3
114 | 2003-12-01,5.4
115 | 2003-11-01,5.5
116 | 2003-10-01,5.6
117 | 2003-09-01,5.7
118 | 2003-08-01,5.8
119 | 2003-07-01,5.8
120 | 2003-06-01,5.8
121 | 2003-05-01,5.8
122 | 2003-04-01,5.8
123 | 2003-03-01,5.8
124 | 2003-02-01,5.7
125 | 2003-01-01,5.6
126 | 2002-12-01,5.5
127 | 2002-11-01,5.4
128 | 2002-10-01,5.4
129 | 2002-09-01,5.3
130 | 2002-08-01,5.3
131 | 2002-07-01,5.3
132 | 2002-06-01,5.3
133 | 2002-05-01,5.4
134 | 2002-04-01,5.4
135 | 2002-03-01,5.5
136 | 2002-02-01,5.5
137 | 2002-01-01,5.4
138 | 2001-12-01,5.3
139 | 2001-11-01,5.1
140 | 2001-10-01,4.9
141 | 2001-09-01,4.8
142 | 2001-08-01,4.6
143 | 2001-07-01,4.5
144 | 2001-06-01,4.5
145 | 2001-05-01,4.4
146 | 2001-04-01,4.3
147 | 2001-03-01,4.2
148 | 2001-02-01,4.1
149 | 2001-01-01,3.9
150 | 2000-12-01,3.8
151 | 2000-11-01,3.7
152 | 2000-10-01,3.7
153 | 2000-09-01,3.7
154 | 2000-08-01,3.7
155 | 2000-07-01,3.7
156 | 2000-06-01,3.6
157 | 2000-05-01,3.5
158 | 2000-04-01,3.4
159 | 2000-03-01,3.3
160 | 2000-02-01,3.2
161 | 2000-01-01,3.2
162 | 1999-12-01,3.2
163 | 1999-11-01,3.2
164 | 1999-10-01,3.1
165 | 1999-09-01,3.1
166 | 1999-08-01,3.1
167 | 1999-07-01,3.0
168 | 1999-06-01,3.0
169 | 1999-05-01,3.0
170 | 1999-04-01,3.1
171 | 1999-03-01,3.1
172 | 1999-02-01,3.2
173 | 1999-01-01,3.4
174 | 1998-12-01,3.5
175 | 1998-11-01,3.6
176 | 1998-10-01,3.6
177 | 1998-09-01,3.6
178 | 1998-08-01,3.5
179 | 1998-07-01,3.4
180 | 1998-06-01,3.3
181 | 1998-05-01,3.2
182 | 1998-04-01,3.1
183 | 1998-03-01,3.1
184 | 1998-02-01,3.2
185 | 1998-01-01,3.2
186 | 1997-12-01,3.3
187 | 1997-11-01,3.4
188 | 1997-10-01,3.4
189 | 1997-09-01,3.5
190 | 1997-08-01,3.5
191 | 1997-07-01,3.6
192 | 1997-06-01,3.6
193 | 1997-05-01,3.6
194 | 1997-04-01,3.7
195 | 1997-03-01,3.7
196 | 1997-02-01,3.7
197 | 1997-01-01,3.6
198 | 1996-12-01,3.6
199 | 1996-11-01,3.5
200 | 1996-10-01,3.5
201 | 1996-09-01,3.5
202 | 1996-08-01,3.5
203 | 1996-07-01,3.5
204 | 1996-06-01,3.6
205 | 1996-05-01,3.7
206 | 1996-04-01,3.7
207 | 1996-03-01,3.8
208 | 1996-02-01,3.8
209 | 1996-01-01,3.8
210 | 1995-12-01,3.8
211 | 1995-11-01,3.8
212 | 1995-10-01,3.8
213 | 1995-09-01,3.7
214 | 1995-08-01,3.7
215 | 1995-07-01,3.7
216 | 1995-06-01,3.7
217 | 1995-05-01,3.7
218 | 1995-04-01,3.8
219 | 1995-03-01,3.8
220 | 1995-02-01,3.8
221 | 1995-01-01,3.8
222 | 1994-12-01,3.9
223 | 1994-11-01,4.1
224 | 1994-10-01,4.2
225 | 1994-09-01,4.3
226 | 1994-08-01,4.4
227 | 1994-07-01,4.5
228 | 1994-06-01,4.5
229 | 1994-05-01,4.6
230 | 1994-04-01,4.7
231 | 1994-03-01,4.8
232 | 1994-02-01,4.9
233 | 1994-01-01,4.9
234 | 1993-12-01,4.9
235 | 1993-11-01,4.8
236 | 1993-10-01,4.7
237 | 1993-09-01,4.7
238 | 1993-08-01,4.8
239 | 1993-07-01,4.8
240 | 1993-06-01,4.9
241 | 1993-05-01,4.9
242 | 1993-04-01,4.8
243 | 1993-03-01,4.7
244 | 1993-02-01,4.7
245 | 1993-01-01,4.6
246 | 1992-12-01,4.7
247 | 1992-11-01,4.8
248 | 1992-10-01,5.0
249 | 1992-09-01,5.2
250 | 1992-08-01,5.3
251 | 1992-07-01,5.3
252 | 1992-06-01,5.3
253 | 1992-05-01,5.2
254 | 1992-04-01,5.1
255 | 1992-03-01,5.1
256 | 1992-02-01,5.1
257 | 1992-01-01,5.2
258 | 1991-12-01,5.3
259 | 1991-11-01,5.3
260 | 1991-10-01,5.4
261 | 1991-09-01,5.4
262 | 1991-08-01,5.4
263 | 1991-07-01,5.4
264 | 1991-06-01,5.5
265 | 1991-05-01,5.6
266 | 1991-04-01,5.7
267 | 1991-03-01,5.7
268 | 1991-02-01,5.6
269 | 1991-01-01,5.4
270 | 1990-12-01,5.1
271 | 1990-11-01,4.9
272 | 1990-10-01,4.7
273 | 1990-09-01,4.5
274 | 1990-08-01,4.4
275 | 1990-07-01,4.2
276 | 1990-06-01,4.1
277 | 1990-05-01,4.1
278 | 1990-04-01,4.1
279 | 1990-03-01,4.1
280 | 1990-02-01,4.2
281 | 1990-01-01,4.2
282 | 1989-12-01,4.3
283 | 1989-11-01,4.3
284 | 1989-10-01,4.3
285 | 1989-09-01,4.4
286 | 1989-08-01,4.4
287 | 1989-07-01,4.4
288 | 1989-06-01,4.4
289 | 1989-05-01,4.4
290 | 1989-04-01,4.4
291 | 1989-03-01,4.3
292 | 1989-02-01,4.2
293 | 1989-01-01,4.2
294 | 1988-12-01,4.2
295 | 1988-11-01,4.2
296 | 1988-10-01,4.2
297 | 1988-09-01,4.2
298 | 1988-08-01,4.2
299 | 1988-07-01,4.3
300 | 1988-06-01,4.3
301 | 1988-05-01,4.4
302 | 1988-04-01,4.5
303 | 1988-03-01,4.6
304 | 1988-02-01,4.8
305 | 1988-01-01,5.0
306 | 1987-12-01,5.2
307 | 1987-11-01,5.4
308 | 1987-10-01,5.6
309 | 1987-09-01,5.7
310 | 1987-08-01,5.7
311 | 1987-07-01,5.8
312 | 1987-06-01,5.9
313 | 1987-05-01,6.0
314 | 1987-04-01,6.1
315 | 1987-03-01,6.3
316 | 1987-02-01,6.4
317 | 1987-01-01,6.5
318 | 1986-12-01,6.6
319 | 1986-11-01,6.7
320 | 1986-10-01,6.8
321 | 1986-09-01,6.8
322 | 1986-08-01,6.9
323 | 1986-07-01,7.0
324 | 1986-06-01,7.1
325 | 1986-05-01,7.1
326 | 1986-04-01,7.2
327 | 1986-03-01,7.2
328 | 1986-02-01,7.1
329 | 1986-01-01,7.1
330 | 1985-12-01,7.2
331 | 1985-11-01,7.2
332 | 1985-10-01,7.2
333 | 1985-09-01,7.2
334 | 1985-08-01,7.2
335 | 1985-07-01,7.2
336 | 1985-06-01,7.2
337 | 1985-05-01,7.2
338 | 1985-04-01,7.2
339 | 1985-03-01,7.1
340 | 1985-02-01,7.1
341 | 1985-01-01,7.0
342 | 1984-12-01,6.9
343 | 1984-11-01,6.9
344 | 1984-10-01,7.0
345 | 1984-09-01,7.0
346 | 1984-08-01,7.2
347 | 1984-07-01,7.3
348 | 1984-06-01,7.4
349 | 1984-05-01,7.5
350 | 1984-04-01,7.6
351 | 1984-03-01,7.7
352 | 1984-02-01,7.9
353 | 1984-01-01,8.2
354 | 1983-12-01,8.5
355 | 1983-11-01,8.8
356 | 1983-10-01,9.2
357 | 1983-09-01,9.5
358 | 1983-08-01,9.8
359 | 1983-07-01,10.0
360 | 1983-06-01,10.3
361 | 1983-05-01,10.6
362 | 1983-04-01,11.0
363 | 1983-03-01,11.4
364 | 1983-02-01,11.7
365 | 1983-01-01,11.9
366 | 1982-12-01,11.8
367 | 1982-11-01,11.6
368 | 1982-10-01,11.4
369 | 1982-09-01,11.1
370 | 1982-08-01,10.9
371 | 1982-07-01,10.7
372 | 1982-06-01,10.5
373 | 1982-05-01,10.3
374 | 1982-04-01,10.0
375 | 1982-03-01,9.7
376 | 1982-02-01,9.4
377 | 1982-01-01,9.1
378 | 1981-12-01,8.8
379 | 1981-11-01,8.5
380 | 1981-10-01,8.3
381 | 1981-09-01,8.0
382 | 1981-08-01,7.8
383 | 1981-07-01,7.7
384 | 1981-06-01,7.6
385 | 1981-05-01,7.6
386 | 1981-04-01,7.7
387 | 1981-03-01,7.9
388 | 1981-02-01,8.0
389 | 1981-01-01,8.1
390 | 1980-12-01,8.3
391 | 1980-11-01,8.4
392 | 1980-10-01,8.6
393 | 1980-09-01,8.7
394 | 1980-08-01,8.5
395 | 1980-07-01,7.1
396 | 1980-06-01,7.1
397 | 1980-05-01,6.8
398 | 1980-04-01,6.5
399 | 1980-03-01,6.0
400 | 1980-02-01,5.5
401 | 1980-01-01,5.2
402 | 1979-12-01,4.9
403 | 1979-11-01,4.7
404 | 1979-10-01,4.6
405 | 1979-09-01,4.5
406 | 1979-08-01,4.5
407 | 1979-07-01,4.4
408 | 1979-06-01,4.4
409 | 1979-05-01,4.4
410 | 1979-04-01,4.5
411 | 1979-03-01,4.6
412 | 1979-02-01,4.7
413 | 1979-01-01,4.9
414 | 1978-12-01,5.0
415 | 1978-11-01,5.1
416 | 1978-10-01,5.1
417 | 1978-09-01,5.1
418 | 1978-08-01,5.0
419 | 1978-07-01,4.9
420 | 1978-06-01,4.9
421 | 1978-05-01,4.9
422 | 1978-04-01,4.9
423 | 1978-03-01,4.9
424 | 1978-02-01,4.9
425 | 1978-01-01,4.9
426 | 1977-12-01,5.0
427 | 1977-11-01,5.0
428 | 1977-10-01,5.0
429 | 1977-09-01,5.0
430 | 1977-08-01,5.0
431 | 1977-07-01,5.0
432 | 1977-06-01,5.0
433 | 1977-05-01,5.1
434 | 1977-04-01,5.2
435 | 1977-03-01,5.3
436 | 1977-02-01,5.4
437 | 1977-01-01,5.4
438 | 1976-12-01,5.5
439 | 1976-11-01,5.4
440 | 1976-10-01,5.4
441 | 1976-09-01,5.4
442 | 1976-08-01,5.4
443 | 1976-07-01,5.4
444 | 1976-06-01,5.4
445 | 1976-05-01,5.5
446 | 1976-04-01,5.6
447 | 1976-03-01,5.7
448 | 1976-02-01,5.9
449 | 1976-01-01,6.0
450 |
--------------------------------------------------------------------------------
/Chapter 14/Case Study 3 - Forecast of Electricity Consumption.r:
--------------------------------------------------------------------------------
1 | R version 3.3.2 (2016-10-31) -- "Sincere Pumpkin Patch"
2 | Copyright (C) 2016 The R Foundation for Statistical Computing
3 | Platform: x86_64-w64-mingw32/x64 (64-bit)
4 |
5 | R is free software and comes with ABSOLUTELY NO WARRANTY.
6 | You are welcome to redistribute it under certain conditions.
7 | Type 'license()' or 'licence()' for distribution details.
8 |
9 | R is a collaborative project with many contributors.
10 | Type 'contributors()' for more information and
11 | 'citation()' on how to cite R or R packages in publications.
12 |
13 | Type 'demo()' for some demos, 'help()' for on-line help, or
14 | 'help.start()' for an HTML browser interface to help.
15 | Type 'q()' to quit R.
16 |
17 | [Workspace loaded from ~/.RData]
18 |
19 | install.packages("feather")
20 | install.packages("data.table")
21 | install.packages("ggplot2")
22 | install.packages("plotly")
23 | install.packages("animation")
24 |
25 | library(feather)
26 | library(data.table)
27 | library(ggplot2)
28 | library(plotly)
29 | library(animation)
30 |
31 | AggData <- as.data.table(read_feather("d:/DT_4_ind"))
32 |
33 |
34 | str(AggData)
35 |
36 |
37 |
38 | head(AggData)
39 |
40 |
41 |
42 | ggplot(data = AggData, aes(x = date, y = value)) +
43 | geom_line() +
44 | facet_grid(type ~ ., scales = "free_y") +
45 | theme(panel.border = element_blank(),
46 | panel.background = element_blank(),
47 | panel.grid.minor = element_line(colour = "grey90"),
48 | panel.grid.major = element_line(colour = "green"),
49 | panel.grid.major.x = element_line(colour = "red"),
50 | axis.text = element_text(size = 10),
51 | axis.title = element_text(size = 12, face = "bold"),
52 | strip.text = element_text(size = 9, face = "bold")) +
53 | labs(title = "Electricity Consumption - Industry", x = "Date", y = "Load (kW)")
54 |
55 |
56 |
57 | AggData
58 |
59 |
60 | AggData[, week_num := as.integer(as.factor(AggData[, week]))]
61 |
62 |
63 | AggData
64 |
65 |
66 | n_type <- unique(AggData[, type])
67 |
68 | n_type
69 |
70 |
71 | n_date <- unique(AggData[, date])
72 |
73 |
74 | n_weekdays <- unique(AggData[, week])
75 |
76 | period <- 48
77 |
78 | data_reg <- AggData[(type == n_type[2] & date %in% n_date[57:70])]
79 |
80 | data_reg
81 |
82 |
83 | ggplot(data_reg, aes(date_time, value)) +
84 | geom_line() +
85 | theme(panel.border = element_blank(),
86 | panel.background = element_blank(),
87 | panel.grid.minor = element_line(colour = "grey90"),
88 | panel.grid.major = element_line(colour = "green"),
89 | panel.grid.major.x = element_line(colour = "red"),
90 | axis.text = element_text(size = 10),
91 | axis.title = element_text(size = 12, face = "bold")) +
92 | labs(title = "Regression Analysis - Education Buildings", x = "Date", y = "Load (kW)")
93 |
94 |
95 | N <- nrow(data_reg)
96 |
97 | trainset_window <- N / period
98 |
99 | matrix_train <- data.table(Load = data_reg[, value], Daily = as.factor(rep(1:period, trainset_window)), Weekly = as.factor(data_reg[, week_num]))
100 |
101 | matrix_train
102 |
103 | linear_model_1 <- lm(Load ~ 0 + ., data = matrix_train)
104 |
105 | linear_model_1
106 |
107 | summary_1 <- summary(linear_model_1)
108 |
109 | summary_1
110 |
111 | paste("R-squared: ", round(summary_1$r.squared, 3), ", p-value of F test: ", 1-pf(summary_1$fstatistic[1], summary_1$fstatistic[2], summary_1$fstatistic[3]))
112 |
113 |
114 |
115 |
116 | datas <- rbindlist(list(data_reg[, .(value, date_time)], data.table(value = linear_model_1$fitted.values, data_time = data_reg[, date_time])))
117 |
118 | datas
119 |
120 |
121 | datas[, type := rep(c("Real", "Fitted"), each = nrow(data_reg))]
122 |
123 | datas
124 |
125 |
126 | ggplot(data = datas, aes(date_time, value, group = type, colour = type)) + geom_line(size = 0.8) + theme_bw() +
127 | labs(x = "Time", y = "Load (kW)", title = "Fit from Multiple Linear Regression")
128 |
129 | ggplot(data = data.table(Fitted_values = linear_model_1$fitted.values,
130 | Residuals = linear_model_1$residuals),
131 | aes(Fitted_values, Residuals)) +
132 | geom_point(size = 1.7) +
133 | geom_smooth() +
134 | geom_hline(yintercept = 0, color = "red", size = 1) +
135 | labs(title = "Fitted values vs Residuals")
136 |
137 |
138 | ggQQ <- function(lm){
139 | d <- data.frame(std.resid = rstandard(lm))
140 | y <- quantile(d$std.resid[!is.na(d$std.resid)], c(0.25, 0.75))
141 | x <- qnorm(c(0.25, 0.75))
142 | slope <- diff(y)/diff(x)
143 | int <- y[1L] - slope * x[1L]
144 |
145 | p <- ggplot(data = d, aes(sample = std.resid)) +
146 | stat_qq(shape = 1, size = 3) +
147 | labs(title = "Normal Q-Q",
148 | x = "Theoretical Quantiles",
149 | y = "Standardized Residuals") +
150 | geom_abline(slope = slope, intercept = int, linetype = "dashed",
151 | size = 1, col = "firebrick1")
152 | return(p)
153 | }
154 |
155 |
156 | ggQQ(linear_model_1)
157 |
158 | linear_model_2 <- lm(Load ~ 0 + Daily + Weekly + Daily:Weekly, data = matrix_train)
159 |
160 | linear_model_2
161 |
162 |
163 | c(Previous = summary(linear_model_1)$r.squared, New = summary(linear_model_2)$r.squared)
164 |
165 |
166 | ggplot(data.table(Residuals = c(linear_model_1$residuals, linear_model_2$residuals), Type = c(rep("Multiple Linear Reg - simple", nrow(data_reg)), rep("Multiple Linear Reg with interactions", nrow(data_reg)))), aes(Type, Residuals, fill = Type)) + geom_boxplot()
167 |
168 |
169 | ggplotly()
170 |
171 | datas <- rbindlist(list(data_reg[, .(value, date_time)], data.table(value = linear_model_2$fitted.values, data_time = data_reg[, date_time])))
172 |
173 | datas
174 |
175 |
176 | datas[, type := rep(c("Real", "Fitted"), each = nrow(data_reg))]
177 |
178 | datas
179 |
180 | ggplot(data = datas, aes(date_time, value, group = type, colour = type)) + geom_line(size = 0.8) + theme_bw() + labs(x = "Time", y = "Load (kW)", title = "Fit from Multiple Linear Reg")
181 |
182 |
183 | ggplot(data = data.table(Fitted_values = linear_model_2$fitted.values, Residuals = linear_model_2$residuals), aes(Fitted_values, Residuals)) + geom_point(size = 1.7) + geom_hline(yintercept = 0, color = "red", size = 1) +
184 | labs(title = "Fitted values vs Residuals")
185 |
186 | ggQQ(linear_model_2)
187 |
188 |
189 | predWeekReg <- function(data, set_of_date){
190 | data_train <- data[date %in% set_of_date]
191 |
192 | N <- nrow(data_train)
193 | window <- N / period # number of days in the train set
194 | matrix_train <- data.table(Load = data_train[, value],
195 | Daily = as.factor(rep(1:period, window)),
196 | Weekly = as.factor(data_train[, week_num]))
197 | lm_m <- lm(Load ~ 0 + Daily + Weekly + Daily:Weekly, data = matrix_train)
198 |
199 | pred_week <- predict(lm_m, matrix_train[1:(7*period), -1, with = FALSE])
200 |
201 | return(as.vector(pred_week))
202 | }
203 |
204 |
205 | mape <- function(real, pred){
206 | return(100 * mean(abs((real - pred)/real)))
207 | }
208 |
209 |
210 |
211 | n_weeks <- floor(length(n_date)/7) - 2
212 |
213 | n_weeks
214 |
215 |
216 |
217 | lm_pred_weeks_1 <- sapply(0:(n_weeks-1), function(i)
218 | predWeekReg(AggData[type == n_type[1]], n_date[((i*7)+1):((i*7)+7*2)]))
219 |
220 |
221 | lm_pred_weeks_2 <- sapply(0:(n_weeks-1), function(i)
222 | predWeekReg(AggData[type == n_type[2]], n_date[((i*7)+1):((i*7)+7*2)]))
223 |
224 |
225 | lm_pred_weeks_3 <- sapply(0:(n_weeks-1), function(i)
226 | predWeekReg(AggData[type == n_type[3]], n_date[((i*7)+1):((i*7)+7*2)]))
227 |
228 |
229 | lm_pred_weeks_4 <- sapply(0:(n_weeks-1), function(i)
230 | predWeekReg(AggData[type == n_type[4]], n_date[((i*7)+1):((i*7)+7*2)]))
231 |
232 |
233 | lm_err_mape_1 <- sapply(0:(n_weeks-1), function(i)
234 | mape(AggData[(type == n_type[1] & date %in% n_date[(15+(i*7)):(21+(i*7))]), value],
235 | lm_pred_weeks_1[, i+1]))
236 |
237 |
238 | lm_err_mape_1
239 |
240 | lm_err_mape_2 <- sapply(0:(n_weeks-1), function(i)
241 | mape(AggData[(type == n_type[2] & date %in% n_date[(15+(i*7)):(21+(i*7))]), value],
242 | lm_pred_weeks_2[, i+1]))
243 |
244 | lm_err_mape_2
245 |
246 |
247 | lm_err_mape_3 <- sapply(0:(n_weeks-1), function(i)
248 | mape(AggData[(type == n_type[3] & date %in% n_date[(15+(i*7)):(21+(i*7))]), value],
249 | lm_pred_weeks_3[, i+1]))
250 |
251 | lm_err_mape_3
252 |
253 | lm_err_mape_4 <- sapply(0:(n_weeks-1), function(i)
254 | mape(AggData[(type == n_type[4] & date %in% n_date[(15+(i*7)):(21+(i*7))]), value],
255 | lm_pred_weeks_4[, i+1]))
256 |
257 | lm_err_mape_4
258 |
259 |
260 | datas <- data.table(value = c(as.vector(lm_pred_weeks_1),
261 | AggData[(type == n_type[1]) & (date %in% n_date[-c(1:14,365)]), value]),
262 | date_time = c(rep(AggData[-c(1:(14*48), (17473:nrow(AggData))), date_time], 2)),
263 | type = c(rep("MLR", nrow(lm_pred_weeks_1)*ncol(lm_pred_weeks_1)),
264 | rep("Real", nrow(lm_pred_weeks_1)*ncol(lm_pred_weeks_1))),
265 | week = c(rep(1:50, each = 336), rep(1:50, each = 336)))
266 |
267 |
268 |
269 |
270 | saveGIF({
271 | oopt = ani.options(interval = 0.9, nmax = 50)
272 | for(i in 1:ani.options("nmax")){
273 | print(ggplot(data = datas[week == i], aes(date_time, value, group = type, colour = type)) +
274 | geom_line(size = 0.8) +
275 | scale_y_continuous(limits = c(min(datas[, value]), max(datas[, value]))) +
276 | theme(panel.border = element_blank(), panel.background = element_blank(),
277 | panel.grid.minor = element_line(colour = "grey90"),
278 | panel.grid.major = element_line(colour = "grey90"),
279 | panel.grid.major.x = element_line(colour = "grey90"),
280 | title = element_text(size = 15),
281 | axis.text = element_text(size = 10),
282 | axis.title = element_text(size = 12, face = "bold")) +
283 | labs(x = "Time", y = "Load (kW)",
284 | title = paste("Forecast of MLR (", n_type[1], "); ", "week: ", i, "; MAPE: ",
285 | round(lm_err_mape_1[i], 2), "%", sep = "")))
286 | ani.pause()
287 | }}, movie.name = "industry_1.gif", ani.height = 450, ani.width = 750)
--------------------------------------------------------------------------------
/Chapter 03/Data/WBClust2013.csv:
--------------------------------------------------------------------------------
1 | Country,new.forest,Rural,log.CO2,log.GNI,log.Energy.2011,LifeExp,Fertility,InfMort,log.Exports,log.Imports,CellPhone,RuralWater,Pop
2 | China,-5.929374602,46.832,1.839733042,8.651724084,7.615477213,75.1995122,1.663,10.9,3.350966008,3.259900191,88.70833462,84.9,1357380000
3 | India,-2.735634368,68.006,0.568835582,7.34601021,6.419536925,66.21085366,2.505,41.4,3.17248544,3.408213415,70.78318167,90.7,1252139596
4 | United States,-1.688899022,18.723,2.871537728,10.86570722,8.858292522,78.74146341,1.8805,5.9,2.604612908,2.84419291,95.52954726,98,316128839
5 | Indonesia,4.636429459,47.748,0.6435402,8.13739583,6.753775337,70.6072439,2.37,24.5,3.271910744,3.216864756,121.5434067,76.4,249865631
6 | Brazil,3.222813308,14.829,0.81104934,9.362202721,7.223404787,73.61787805,1.811,12.3,2.475627706,2.535484629,135.3050481,85.3,200361925
7 | Pakistan,6.053448784,62.14,0.031613484,7.13089883,6.177147466,66.43587805,3.264,69,2.636212664,2.942568239,70.13037598,89,182142594
8 | Nigeria,7.231399689,53.906,-0.520722812,7.807916629,6.580144764,52.10902439,6.002,74.3,3.444569888,3.066391075,73.29196161,49.1,173615345
9 | Bangladesh,1.95669311,67.247,-0.751700659,6.721425701,5.321644762,70.29485366,2.208,33.2,3.131174391,3.453487433,67.08453366,84.4,156594962
10 | Russian Federation,-0.179277519,26.149,2.511694234,9.452501929,8.539587423,70.46097561,1.59,8.6,3.410135198,3.078918034,152.8353524,92.2,143499861
11 | Japan,-0.432173052,7.509,2.228470291,10.77247701,8.191565931,83.09609756,1.41,2.1,2.716517908,2.774735635,115.1885934,100,127338621
12 | Mexico,2.872378525,21.309,1.351592205,9.181940897,7.352266494,77.13507317,2.216,12.5,3.441960374,3.481173501,85.83640136,90.8,122332399
13 | Philippines,-4.281921929,55.367,-0.027218803,7.989560449,6.053432903,68.55373171,3.076,23.5,3.464773594,3.572818237,104.5023215,91.2,98393574
14 | Ethiopia,5.311115459,81.41,-1.745458287,6.01615716,5.942948958,62.96595122,4.642,44.4,2.833157626,3.469950184,27.25468539,42.1,94100756
15 | Vietnam,-7.101588109,67.691,0.603287261,7.34601021,6.546547749,75.60668293,1.768,19,4.374383052,4.425129792,130.8904335,93.6,89708900
16 | "Egypt, Arab Rep.",-7.862453931,56.975,1.001657573,7.999678579,6.88554715,70.9072439,2.807,18.6,3.023708641,3.206325662,121.507893,98.8,82056378
17 | Germany,-1.766038005,25.11,2.220815392,10.71818843,8.245770767,80.89268293,1.38,3.2,3.924572763,3.816259065,119.0306009,100,80621788
18 | Turkey,-4.420556788,27.63,1.442445644,9.288226911,7.339078748,74.86243902,2.06,16.5,3.177080078,3.485767145,92.96497103,98.8,74932641
19 | Thailand,1.673362851,52.057,1.514435908,8.565983356,7.489767958,74.18797561,1.412,11.3,4.343071632,4.28230261,137.9828493,95.3,67010502
20 | France,-3.226131523,20.945,1.73272852,10.642086,8.260800196,82.56585366,2.01,3.5,3.291951557,3.396475544,98.49547248,100,66028467
21 | United Kingdom,-3.300360344,17.908,2.074751655,10.55320518,7.997354088,81.5,1.9,3.9,3.467897284,3.514009229,123.7698841,100,64097085
22 | Italy,-4.753474563,31.314,1.939365189,10.45765998,7.944286271,82.93658537,1.4,3,3.3615716,3.408910595,158.8835332,100,59831093
23 | South Africa,0,36.212,2.21271853,8.917310693,7.916023279,56.09831707,2.412,32.8,3.421193457,3.406965992,147.4645166,88.3,52981991
24 | "Korea, Rep.",1.590789818,17.751,2.449866985,10.11212642,8.562524185,81.36829268,1.297,3.2,4.020832108,3.993650513,110.9984678,87.9,50219669
25 | Tanzania,4.62441641,69.804,-1.377414,6.345636361,6.103834615,60.84643902,5.287,36.4,3.43583762,3.916313924,55.71793149,44,49253126
26 | Colombia,1.885237266,24.117,0.547804309,8.85509298,6.509500831,73.77707317,2.316,14.5,2.930452491,2.992364745,104.0845439,73.6,48321405
27 | Spain,-5.836325814,20.871,1.773236362,10.28670705,7.895964563,82.37804878,1.32,3.6,3.428904749,3.462392333,106.9052178,100,46647421
28 | Ukraine,-2.282128106,30.726,1.908781792,8.199738961,7.92527176,70.94414634,1.531,8.6,3.995830396,4.104093527,138.0646549,97.7,45489600
29 | Kenya,2.663225936,75.22,-0.906880984,6.768493212,6.174061246,61.08317073,4.459,47.5,3.350727323,3.808683742,70.58942851,55.1,44353691
30 | Algeria,3.399071824,30.49,1.233000936,8.511175119,7.010566012,70.88217073,2.82,21.6,3.656764718,3.358265995,102.0107812,79.5,39208194
31 | Sudan,10,66.54,-0.889506147,7.286191715,5.872339646,61.86395122,4.491,51.2,2.866595849,2.74376606,72.85231291,50.2,37964306
32 | Canada,0,18.528,2.693155429,10.83269451,8.900178041,81.23804878,1.61,4.6,3.424879915,3.464530426,78.39781723,99,35158304
33 | Iraq,-1.616149838,30.738,1.335903996,8.720950029,7.14391003,69.24192683,4.086,28,3.76483461,3.29457351,96.10477428,68.5,33417476
34 | Morocco,-1.421338109,40.8,0.530265511,7.97590836,6.289868653,70.64312195,2.706,26.1,3.571652109,3.885385772,128.5252097,63.6,33008150
35 | Peru,1.874079206,22.046,0.726416441,8.681011277,6.543904875,74.51553659,2.448,12.9,3.389643637,3.242063257,98.08298127,71.6,30375603
36 | Uzbekistan,-2.704372182,63.783,1.323535003,7.43838353,7.394906704,68.104,2.53,36.7,3.498969714,3.44183483,74.30678167,80.9,30241100
37 | Malaysia,3.058832023,26.716,2.049944156,9.192176401,7.878318976,74.84229268,1.975,7.2,4.5175056,4.319877314,144.6850309,98.5,29716965
38 | Saudi Arabia,0,17.281,2.841409863,10.11293778,8.815580129,75.49690244,2.702,13.4,4.028814483,3.386780262,176.4978024,97,28828870
39 | Nepal,4.951498202,82.123,-1.427649513,6.551080335,5.947085623,67.98270732,2.389,32.2,2.186504031,3.494208685,71.46285,87.6,27797457
40 | Ghana,6.06401582,47.265,-0.753135077,7.365180126,6.052172677,60.94712195,3.92,52.3,3.786865769,3.905954439,108.1911482,81.3,25904598
41 | Mozambique,3.319133077,68.331,-1.512954434,6.234410726,6.028359065,49.83626829,5.263,61.5,3.427117769,4.102937082,48.00421557,35,25833752
42 | Australia,2.135839951,10.847,2.835195569,10.9985937,8.612647033,82.09512195,1.927,3.4,3.05100135,3.002864811,106.8434974,100,23130900
43 | Cameroon,4.461455992,46.75,-0.796734565,7.081708586,5.760866249,54.5875122,4.859,60.8,3.38393443,3.473099603,70.3904685,51.9,22253959
44 | Angola,2.121969647,57.51,0.5043843,8.414052432,6.511357758,51.464,5.979,101.6,4.174032974,3.768313569,61.87329711,34.3,21471618
45 | Sri Lanka,4.700384774,81.7,-0.334915767,7.97590836,6.21328392,74.06804878,2.346,8.2,3.137885442,3.627225265,95.49632054,92.9,20483000
46 | Cote d'Ivoire,-1.327729042,47.234,-0.901658487,7.106606138,6.361823919,50.40173171,4.894,71.3,4.054932466,3.634853836,95.44605196,67.8,20316086
47 | Chile,-2.614350651,10.825,1.461661878,9.567315271,7.57029898,79.57265854,1.832,7.1,3.638230558,3.551478589,134.2782809,91.3,17619708
48 | Kazakhstan,1.90511324,46.639,2.73041606,9.188094763,8.458975784,69.61,2.59,14.6,3.901466767,3.323198525,180.5020819,86,17037508
49 | Netherlands,-2.407717062,10.729,2.403187149,10.78124533,8.441988283,81.10487805,1.72,3.3,4.429212809,4.321228728,113.7299037,100,16804224
50 | Ecuador,5.609161336,36.702,0.822242809,8.550627968,6.743854336,76.19256098,2.59,19.1,3.451303806,3.538436942,111.4620027,75.2,15737878
51 | Guatemala,5.034422369,49.339,-0.133242755,8.048788284,6.538262585,71.66385366,3.844,25.8,3.281866431,3.620567904,140.393535,88.6,15468203
52 | Cambodia,4.897591548,79.681,-0.939014768,6.779921907,5.90028732,71.40882927,2.894,32.5,3.990478409,4.085994592,133.8902393,65.6,15135169
53 | Zambia,2.634704284,59.973,-1.259946258,7.251344983,6.43078853,57.02258537,5.731,55.8,3.829700204,3.611910261,71.50463179,49.2,14538640
54 | Zimbabwe,5.697195807,67.346,-0.197292168,6.684611728,6.5468933,58.04597561,3.564,55,3.757931516,4.353266577,96.3498668,68.7,14149648
55 | Senegal,3.196562198,56.921,-0.438395115,6.937314081,5.574629633,63.20217073,4.981,43.9,3.225526038,3.730177095,92.92798275,60.3,14133280
56 | Belgium,-0.65223171,2.224,2.310210309,10.71018661,8.584660157,80.38536585,1.79,3.5,4.442465493,4.432656894,110.9031799,100,11195138
57 | Greece,-4.487698318,22.657,2.063683065,10.0719637,7.784234934,80.63414634,1.34,3.7,3.221070705,3.500906635,116.8206478,99.4,11032328
58 | Tunisia,-7.845712839,33.544,0.937309116,8.352318548,6.791647597,75.1,2.17,13.1,3.899300587,4.033103188,115.6035799,90.5,10886500
59 | Bolivia,3.14579256,32.304,0.483542083,7.705262475,6.614935656,66.92663415,3.264,31.2,3.786876556,3.648367932,97.69945273,71.9,10671200
60 | Czech Republic,-1.103264632,26.94,2.376674692,9.805323304,8.32788531,78.07560976,1.45,2.9,4.288957918,4.230372041,131.2534239,99.6,10521468
61 | Portugal,-2.026277627,37.662,1.619842272,9.934016758,7.690060059,80.37317073,1.28,3.1,3.574632428,3.690155767,113.035602,99.9,10459806
62 | Dominican Republic,0,22.918,0.785228718,8.599694413,6.589548632,73.23297561,2.517,23.6,3.22071992,3.565713716,88.43350015,77.2,10403761
63 | Benin,4.750318602,56.914,-0.437532943,6.620073207,5.952102492,59.1194878,4.927,56.2,2.706843759,3.294401447,93.25782193,69.1,10323474
64 | Haiti,3.782901641,43.832,-1.157816532,6.633318433,5.768448855,62.70265854,3.212,54.7,2.597975432,3.992060125,69.39885695,47.5,10317461
65 | Hungary,-3.698822539,29.694,1.640597019,9.429475902,7.82542454,75.06341463,1.34,5.2,4.543275192,4.460179918,116.4278724,100,9897247
66 | Sweden,-1.838380132,14.486,1.740421303,10.93524753,8.554554297,81.70487805,1.91,2.4,3.909128179,3.790550341,124.3952501,100,9592552
67 | Belarus,-3.452952241,24.123,1.895601277,8.764053269,8.0437322,72.06341463,1.62,3.7,4.396002026,4.419677139,118.786176,99,9466000
68 | Azerbaijan,0,45.9,1.639142113,8.74671635,7.222071643,70.62495122,2,29.9,4.032947422,3.181425825,107.6134072,70.7,9416598
69 | United Arab Emirates,-5.511398208,15.019,2.993419487,10.56152556,8.910182683,76.95787805,1.82,7,4.503306079,4.28053198,171.8737993,100,9346129
70 | Austria,-1.790097526,34.116,2.088605428,10.77812261,8.275864605,80.93658537,1.44,3.2,4.048318047,3.994698209,156.2304116,100,8473786
71 | Tajikistan,-0.700140042,73.379,-0.744436956,6.779921907,5.725201254,67.25717073,3.819,40.9,2.882112659,4.207266166,91.8281242,64,8207834
72 | Honduras,6.255774127,46.463,0.151724357,7.668561108,6.412618747,73.49343902,3.05,18.9,3.936864994,4.262104691,95.91916853,81.5,8097688
73 | Switzerland,-2.920909371,26.213,1.619975771,11.30158696,8.073079874,82.69756098,1.52,3.6,3.936759386,3.71046252,133.79826,100,8081482
74 | Israel,-4.045199175,7.988,2.237302864,10.37442824,8.004515131,81.70487805,3.04,3.2,3.56958338,3.583483237,122.8478352,100,8059400
75 | Bulgaria,-4.620884102,26.7,1.815029748,8.832003931,7.869032869,74.31463415,1.5,10.1,4.19715621,4.197481553,145.1876887,99,7265115
76 | Serbia,-4.625162855,44.627,1.856857806,8.58485184,7.713059061,75.23414634,1.32,5.8,3.599265979,3.969914259,119.3917104,98.9,7163976
77 | Togo,7.809782374,61.021,-1.066435013,6.194405391,6.056903482,56.15034146,4.695,55.8,3.674645665,4.031228887,62.53490181,40.3,6816982
78 | Paraguay,4.311126401,40.826,-0.121425016,8.107720062,6.605306221,72.19373171,2.897,18.7,3.963456083,3.913950214,103.6893284,83.4,6802295
79 | Jordan,0,16.789,1.26520012,8.446770727,7.041326875,73.74739024,3.314,16,3.86398806,4.302160013,141.7962745,90.5,6459000
80 | El Salvador,5.119267189,34.225,0.099740838,8.188689124,6.537127246,72.10453659,2.209,13.5,3.331491174,3.842956045,136.1874402,81,6340454
81 | Nicaragua,5.840897474,41.854,-0.12671037,7.432483808,6.243199302,74.46539024,2.541,20,3.690407785,4.021385091,111.9801766,67.8,6080478
82 |
--------------------------------------------------------------------------------
/Chapter 03/Data/math test.txt:
--------------------------------------------------------------------------------
1 | 1 0 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 0 1 0 1 0 1 0 1 1 0 1 1 1 1 1 1 1 1 0 1 1 1 1 1 0 1 1
2 | 2 0 1 1 1 1 1 1 1 1 0 1 0 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 0 1 0 1 1 1 1 1 0 0 1 1 1 0 1 1 0 1 1 1 1 1 1 1 1 1 1
3 | 3 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 0 0 1 1 0 0 1 1 0 1 1 0 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1
4 | 4 1 1 1 1 0 1 1 0 0 1 1 1 1 1 0 1 0 1 1 1 1 1 0 0 1 0 1 1 1 1 0 1 1 1 0 1 0 0 1 1 1 1 1 1 1 1 0 1 0 1 1 0 1 1 1
5 | 5 1 0 1 1 0 1 1 1 0 1 1 1 1 1 0 1 1 1 0 1 1 1 1 0 0 0 0 1 1 1 1 1 0 0 0 1 1 1 0 0 1 0 0 1 1 1 1 1 0 0 1 1 1 1 1
6 | 6 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 1 1 1 1 0 0 1 1 0 1 1 1 0 1 1 1 0 1 1 0 1 1 1 1 1 1 1 1 1 1 0 1 1 0 1 1 1
7 | 7 0 1 1 1 1 1 1 0 0 1 1 0 1 1 1 1 0 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 0 0 1 1 1 0 1 1 1 1 1 1 1 0 1 1 1 1 1 0 1 1 1
8 | 8 0 0 1 1 0 1 1 1 0 1 1 1 1 1 0 1 0 1 0 1 1 1 0 1 1 1 1 1 1 0 1 1 0 1 1 0 0 0 0 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1
9 | 9 0 0 1 1 0 1 1 1 0 1 1 0 1 1 1 1 0 1 1 1 1 0 0 0 0 0 0 0 0 1 0 1 1 1 0 1 0 0 1 1 0 0 0 1 1 0 0 1 1 1 1 0 1 1 1
10 | 10 0 0 0 1 0 1 1 0 0 1 1 0 1 1 0 0 0 0 1 1 1 1 0 0 1 1 1 1 1 1 1 0 1 0 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1
11 | 11 1 0 1 1 0 1 1 1 0 1 1 0 1 1 1 0 0 1 0 0 1 1 1 1 1 0 1 0 0 0 0 0 0 0 0 0 1 1 1 1 0 0 0 1 1 1 1 1 1 0 1 0 0 1 1
12 | 12 1 1 1 1 0 1 1 0 0 1 1 0 0 1 1 1 0 1 1 1 1 1 0 0 0 1 1 0 1 1 1 1 0 0 0 0 0 0 1 0 1 0 0 1 1 0 1 0 0 1 1 0 1 1 1
13 | 13 0 0 1 0 0 0 1 1 0 1 1 0 1 1 1 0 0 1 1 1 1 1 0 1 1 0 0 0 1 0 0 1 0 1 1 0 1 0 1 1 1 0 1 0 1 0 1 1 1 1 1 0 1 1 1
14 | 14 1 0 1 1 0 1 1 0 1 1 1 1 1 1 1 1 1 1 0 0 1 1 0 0 0 0 1 1 1 1 0 0 1 0 0 0 0 0 0 1 1 1 1 1 1 0 1 1 0 0 1 0 1 1 1
15 | 15 0 1 1 1 1 1 0 1 0 1 0 1 1 1 0 1 0 0 0 1 1 1 0 0 1 1 1 0 1 1 1 1 1 1 0 1 0 0 1 1 1 0 1 1 1 0 1 1 1 0 1 1 1 1 1
16 | 16 0 0 1 0 0 1 1 1 0 1 1 1 0 0 1 1 1 1 0 1 1 1 1 0 1 0 1 0 0 1 0 1 0 0 0 1 1 0 1 0 1 0 1 1 1 1 1 1 0 0 1 0 1 1 1
17 | 17 0 1 1 0 1 1 1 1 0 1 1 1 1 0 0 0 1 1 0 1 1 1 0 0 0 1 1 1 1 0 1 0 1 1 0 0 1 1 0 1 0 0 1 1 1 1 1 1 1 1 1 0 1 0 1
18 | 18 0 1 0 1 0 1 1 0 0 1 1 1 1 0 0 1 1 0 1 0 0 1 0 0 1 0 0 0 1 1 0 0 1 0 0 1 0 1 1 1 1 1 0 1 0 0 0 0 0 1 0 0 1 1 1
19 | 19 0 1 1 1 0 1 1 0 1 1 1 1 0 1 0 1 1 0 0 1 1 1 0 0 1 1 0 0 0 1 0 1 0 0 0 0 0 0 1 0 0 0 0 1 1 0 0 0 0 0 0 0 1 1 1
20 | 20 1 0 1 1 1 1 0 1 0 1 1 0 1 1 0 1 0 0 1 1 1 0 0 0 1 0 0 1 1 0 0 0 0 0 0 1 1 0 0 1 0 1 0 1 1 1 0 0 1 0 0 0 0 1 0
21 | 21 1 0 1 1 0 1 1 1 1 0 1 0 1 1 0 1 1 0 1 0 0 1 0 0 1 0 0 0 0 1 0 1 1 1 0 1 0 0 0 1 1 1 1 0 0 0 0 1 1 1 1 0 1 1 1
22 | 22 1 0 0 0 0 0 1 1 0 1 1 0 0 1 0 1 1 0 0 0 1 1 0 0 1 0 0 0 0 1 0 1 1 1 0 1 0 0 1 0 0 0 0 1 1 1 1 1 0 0 0 0 1 1 1
23 | 23 0 1 1 0 0 1 1 0 1 1 1 0 1 1 0 1 1 1 0 0 1 1 0 0 1 1 0 1 0 1 0 1 0 0 0 1 0 0 1 1 0 0 1 1 0 0 1 1 1 0 1 0 1 1 1
24 | 24 0 1 0 1 1 1 1 0 0 1 1 0 1 1 0 1 0 1 1 0 1 1 0 0 1 0 0 1 1 0 1 1 0 1 0 1 0 1 1 1 0 1 0 1 1 1 1 1 0 0 1 0 1 1 1
25 | 25 1 1 0 1 1 1 1 1 0 1 1 1 1 1 0 1 0 1 1 0 1 1 0 0 1 1 0 1 1 1 1 0 1 1 1 1 1 0 0 1 0 0 1 1 1 0 1 1 0 0 1 1 1 1 1
26 | 26 1 0 1 0 0 1 1 0 0 1 1 0 0 1 1 0 1 0 1 0 1 1 0 0 1 0 1 0 0 0 0 1 0 0 0 1 1 1 1 0 0 0 1 1 1 1 0 0 1 0 1 0 1 1 1
27 | 27 0 0 1 1 1 1 1 1 0 1 0 0 1 1 1 1 0 0 0 0 1 0 0 0 1 0 1 1 0 0 0 0 1 1 1 1 0 0 0 0 1 0 1 1 1 1 1 0 0 0 1 0 1 1 1
28 | 28 0 0 1 1 0 1 1 0 0 1 1 0 1 1 0 0 1 0 0 1 1 0 0 0 0 0 1 0 1 1 0 0 1 0 0 0 0 1 0 0 1 1 0 1 1 1 0 1 0 0 1 0 1 1 1
29 | 29 0 0 0 1 0 1 1 0 0 1 1 1 1 1 0 1 0 0 0 0 1 1 0 0 1 0 1 0 0 1 0 1 0 0 0 1 0 0 0 1 1 0 0 1 1 1 0 1 0 1 0 0 1 1 1
30 | 30 0 0 0 1 1 1 0 0 0 1 1 0 1 0 0 1 1 1 1 1 1 0 1 0 0 0 1 0 1 1 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 1 1 0 0 1 1 1
31 | 31 0 0 0 1 1 1 1 0 0 1 1 0 1 0 0 1 0 1 1 0 1 0 0 0 1 0 0 0 1 1 0 1 0 1 1 0 0 0 0 1 1 0 0 1 1 0 1 1 1 0 0 0 1 1 1
32 | 32 0 0 1 1 1 1 1 0 0 1 1 0 1 0 1 1 1 1 0 0 0 1 0 0 1 1 1 1 0 1 0 0 1 1 1 1 0 0 1 1 0 0 0 1 1 0 1 0 0 0 0 0 0 1 1
33 | 33 1 1 0 1 0 1 0 1 0 1 1 0 1 0 1 1 0 0 1 0 0 1 1 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 1 0 1 0 0 1 1 0 1 1 1 0 1 0 0 1 1
34 | 34 0 1 0 0 0 1 1 1 0 1 0 0 1 1 0 1 0 1 1 0 1 1 0 0 1 1 1 1 0 1 0 0 1 0 0 0 1 0 0 0 0 0 1 1 1 1 1 1 1 1 0 0 1 1 1
35 | 35 1 1 1 0 1 1 1 0 1 1 1 0 0 1 0 1 0 1 1 1 1 0 0 0 1 0 0 1 0 1 0 1 1 1 0 0 0 0 0 1 1 0 0 1 1 0 1 1 1 1 1 0 0 1 1
36 | 36 0 0 1 1 1 1 1 1 0 1 1 1 1 1 0 0 0 1 1 1 1 1 1 0 1 0 1 0 1 1 0 0 1 0 0 1 0 0 1 0 0 0 0 0 1 0 1 1 0 0 1 1 1 1 1
37 | 37 0 0 1 0 0 1 1 1 1 1 1 1 1 0 1 0 1 1 1 0 1 0 0 1 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 1 0 1 1 0 1 1 1
38 | 38 1 0 0 1 0 1 1 0 1 1 0 0 1 1 0 1 0 0 1 1 1 1 0 1 0 0 0 0 1 1 0 0 0 0 0 1 0 0 1 0 0 0 0 1 1 1 0 1 1 0 1 0 1 1 1
39 | 39 0 0 1 1 0 1 1 0 1 1 0 0 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0 0 1 1 0 1 0 0 1 1 0 0 1 0 1 0 0 1 1 0 1 1 1 1 0 0 1 1 1
40 | 40 1 1 1 0 1 1 1 0 1 0 1 0 0 1 1 1 0 0 0 1 1 0 0 0 0 0 1 0 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 1 0
41 | 41 0 0 1 0 0 0 0 0 0 1 1 0 1 1 0 0 0 1 1 0 1 1 0 1 1 0 1 1 0 1 0 0 0 1 0 0 0 0 0 0 0 1 0 1 1 0 1 1 0 0 1 0 1 1 0
42 | 42 1 1 0 0 0 1 1 0 0 1 1 0 1 1 0 0 1 1 0 0 1 0 0 1 0 0 0 0 0 1 0 1 1 0 1 0 0 0 0 0 0 0 0 1 1 0 0 1 0 0 1 0 1 1 1
43 | 43 1 0 1 0 1 1 1 0 0 1 1 1 1 0 0 1 0 0 1 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 1 0 1 0 1 0 0 1 1 1
44 | 44 1 0 0 1 0 0 1 0 0 0 0 1 1 0 0 1 0 0 1 0 0 0 0 0 0 0 1 0 0 0 1 0 0 1 0 0 1 0 0 0 0 0 0 1 1 0 0 1 0 0 1 0 1 1 0
45 | 45 0 0 1 0 1 1 0 0 0 0 0 1 1 0 1 1 1 0 0 0 1 0 0 1 1 0 0 0 1 0 0 1 1 0 0 0 1 0 0 1 0 0 0 1 0 0 1 1 1 0 0 0 1 1 1
46 | 46 0 0 0 0 0 1 1 0 0 1 1 0 0 0 0 0 1 0 0 0 1 0 0 0 0 1 1 0 0 1 0 0 1 0 0 0 0 1 0 0 0 0 0 1 0 1 0 0 0 0 0 0 1 1 0
47 | 47 1 0 0 0 0 1 1 1 1 0 1 0 0 0 0 0 0 0 1 0 1 0 0 0 1 0 0 0 1 1 0 1 0 1 1 0 0 0 1 0 0 0 0 1 1 0 0 0 0 0 0 0 1 1 0
48 | 48 1 0 0 0 0 1 1 0 0 1 1 0 1 0 0 0 0 0 0 1 1 1 1 0 1 1 0 0 1 1 0 0 1 1 0 0 0 1 0 0 1 0 0 1 1 0 1 1 0 1 0 0 1 1 1
49 | 49 0 1 0 1 0 1 0 0 0 0 1 0 1 1 1 0 0 1 0 0 1 0 1 1 1 1 0 1 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0
50 | 50 0 0 0 0 0 1 1 1 1 1 0 1 1 0 0 1 0 1 1 0 1 1 0 0 0 1 0 1 0 1 0 0 0 0 0 1 1 0 0 0 0 0 0 1 1 0 1 1 0 0 0 0 1 1 1
51 | 51 0 0 1 0 0 1 1 0 0 0 1 0 0 1 1 0 0 0 0 0 1 0 0 0 0 0 0 1 0 1 0 1 0 0 0 0 0 0 0 1 0 0 0 1 1 1 0 1 0 0 0 0 1 1 0
52 | 52 0 0 0 1 1 0 1 0 0 1 0 0 0 1 0 1 0 0 1 0 0 1 0 0 0 1 0 0 0 0 1 0 1 0 1 1 0 0 0 0 1 0 0 0 0 1 1 0 0 0 0 0 1 1 1
53 | 53 0 0 1 0 0 1 1 0 0 1 1 0 0 0 0 0 0 1 0 1 0 0 0 0 1 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 1 1 0 0 1 0 1 0 0 0 0 1
54 | 54 1 0 1 0 0 1 1 0 0 1 1 0 1 1 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 1 1 1 1 0 0 0 0 1 1 1
55 | 55 0 0 0 0 0 1 1 0 0 1 1 1 0 1 1 1 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 1 0 0 0 1 1 1 0 1 0 1 0 0 1 1 1
56 | 56 0 0 1 0 0 1 0 0 0 0 0 0 0 0 1 0 0 1 0 1 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
57 | 57 0 0 1 0 0 1 1 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 1 0 0 0 0 1 1 0
58 | 58 0 1 0 0 0 1 1 0 0 0 0 1 0 1 0 0 0 1 0 0 0 0 1 0 0 0 1 0 1 1 0 0 0 0 0 1 1 0 0 0 1 0 1 1 1 1 1 1 0 1 0 0 1 0 0
59 | 59 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 1 0 0 0 0 1 1 0 0 1 0 0 0 0 1 1 0
60 | 60 0 0 0 0 0 0 1 0 0 0 1 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
61 |
--------------------------------------------------------------------------------
/Chapter 02/Data/hsbdemo.csv:
--------------------------------------------------------------------------------
1 | id,female,ses,schtyp,prog,read,write,math,science,socst,honors,awards,cid
2 | 45,female,low,public,vocation,34,35,41,29,26,not enrolled,0,1
3 | 108,male,middle,public,general,34,33,41,36,36,not enrolled,0,1
4 | 15,male,high,public,vocation,39,39,44,26,42,not enrolled,0,1
5 | 67,male,low,public,vocation,37,37,42,33,32,not enrolled,0,1
6 | 153,male,middle,public,vocation,39,31,40,39,51,not enrolled,0,1
7 | 51,female,high,public,general,42,36,42,31,39,not enrolled,0,1
8 | 164,male,middle,public,vocation,31,36,46,39,46,not enrolled,0,1
9 | 133,male,middle,public,vocation,50,31,40,34,31,not enrolled,0,1
10 | 2,female,middle,public,vocation,39,41,33,42,41,not enrolled,0,1
11 | 53,male,middle,public,vocation,34,37,46,39,31,not enrolled,0,1
12 | 1,female,low,public,vocation,34,44,40,39,41,not enrolled,0,1
13 | 128,male,high,public,academic,39,33,38,47,41,not enrolled,0,2
14 | 16,male,low,public,vocation,47,31,44,36,36,not enrolled,0,2
15 | 106,female,middle,public,vocation,36,44,37,42,41,not enrolled,0,2
16 | 89,female,low,public,vocation,35,35,40,51,33,not enrolled,0,2
17 | 134,male,low,public,general,44,44,39,34,46,not enrolled,0,2
18 | 19,female,low,public,general,28,46,43,44,51,not enrolled,0,2
19 | 145,female,middle,public,vocation,42,46,38,36,46,not enrolled,0,2
20 | 11,male,middle,public,academic,34,46,45,39,36,not enrolled,0,2
21 | 117,male,high,public,vocation,34,49,39,42,56,not enrolled,0,2
22 | 109,female,middle,public,general,42,39,42,42,41,not enrolled,0,2
23 | 12,male,middle,public,vocation,37,44,45,39,46,not enrolled,0,3
24 | 37,female,low,public,vocation,41,47,40,39,51,not enrolled,0,3
25 | 69,female,low,public,vocation,44,44,40,40,31,not enrolled,0,3
26 | 43,female,low,public,academic,47,37,43,42,46,not enrolled,0,3
27 | 196,male,high,private,academic,44,38,49,39,46,not enrolled,0,3
28 | 36,female,low,public,general,44,49,44,35,51,not enrolled,0,3
29 | 155,male,middle,public,general,44,44,46,39,51,not enrolled,0,3
30 | 6,female,low,public,academic,47,41,46,40,41,not enrolled,0,3
31 | 4,female,low,public,academic,44,50,41,39,51,not enrolled,1,3
32 | 25,female,middle,public,general,47,44,42,42,36,not enrolled,0,4
33 | 107,male,low,public,vocation,47,39,47,42,26,not enrolled,0,4
34 | 5,male,low,public,academic,47,40,43,45,31,not enrolled,0,4
35 | 47,female,low,public,academic,47,46,49,33,41,not enrolled,0,4
36 | 140,male,middle,public,vocation,44,41,40,50,26,not enrolled,0,4
37 | 22,male,middle,public,vocation,42,39,39,56,46,not enrolled,0,4
38 | 18,male,middle,public,vocation,50,33,49,44,36,not enrolled,0,4
39 | 30,female,high,public,academic,41,59,42,34,51,not enrolled,2,4
40 | 40,male,low,public,general,42,41,43,50,41,not enrolled,0,4
41 | 176,male,middle,private,academic,47,47,41,42,51,not enrolled,0,4
42 | 126,male,middle,public,general,42,31,57,47,51,not enrolled,0,4
43 | 197,male,high,private,academic,50,42,50,36,61,not enrolled,0,5
44 | 46,female,low,public,academic,45,55,44,34,41,not enrolled,2,5
45 | 49,male,high,public,vocation,50,40,39,49,47,not enrolled,0,5
46 | 8,female,low,public,academic,39,44,52,44,48,not enrolled,0,5
47 | 124,female,low,public,vocation,42,54,41,42,41,not enrolled,1,5
48 | 13,female,middle,public,vocation,47,46,39,47,61,not enrolled,0,5
49 | 111,female,low,public,general,39,54,39,47,36,not enrolled,1,5
50 | 142,female,middle,public,vocation,47,42,52,39,51,not enrolled,0,5
51 | 193,female,middle,private,academic,44,49,48,39,51,not enrolled,0,5
52 | 105,female,middle,public,academic,50,41,45,44,56,not enrolled,0,5
53 | 58,male,middle,public,vocation,55,41,40,44,41,not enrolled,0,5
54 | 129,female,low,public,general,44,44,46,47,51,not enrolled,0,6
55 | 38,male,low,public,academic,45,57,50,31,56,not enrolled,2,6
56 | 182,female,middle,private,academic,44,52,43,44,51,not enrolled,1,6
57 | 115,male,low,public,general,42,49,43,50,56,not enrolled,0,6
58 | 14,male,high,public,academic,47,41,54,42,56,not enrolled,0,6
59 | 175,female,high,private,general,36,57,42,50,41,not enrolled,2,6
60 | 44,female,low,public,vocation,47,62,45,34,46,enrolled,3,6
61 | 86,male,high,public,general,44,33,54,58,31,not enrolled,0,6
62 | 72,female,middle,public,vocation,42,54,47,47,46,not enrolled,1,6
63 | 41,male,middle,public,academic,50,40,45,55,56,not enrolled,0,7
64 | 191,female,high,private,academic,47,52,43,48,61,not enrolled,1,7
65 | 138,female,middle,public,vocation,43,57,40,50,51,not enrolled,2,7
66 | 9,male,middle,public,vocation,48,49,52,44,51,not enrolled,0,7
67 | 151,female,middle,public,vocation,47,46,52,48,46,not enrolled,0,7
68 | 119,female,low,public,general,42,57,45,50,43,not enrolled,2,7
69 | 55,female,middle,private,academic,52,49,49,44,61,not enrolled,0,7
70 | 73,female,middle,public,academic,50,52,53,39,56,not enrolled,1,7
71 | 28,female,middle,public,general,39,53,54,50,41,not enrolled,1,7
72 | 90,female,high,public,academic,42,54,50,50,52,not enrolled,1,8
73 | 17,female,middle,public,academic,47,57,48,44,41,not enrolled,2,8
74 | 102,male,high,public,academic,52,41,51,53,56,not enrolled,0,8
75 | 70,male,low,public,general,57,52,41,47,57,not enrolled,1,8
76 | 148,female,middle,public,vocation,42,57,51,47,61,not enrolled,2,8
77 | 54,female,low,private,general,47,54,46,50,56,not enrolled,1,8
78 | 42,female,middle,public,vocation,46,52,55,44,56,not enrolled,1,8
79 | 87,female,middle,public,general,50,52,46,50,56,not enrolled,1,8
80 | 21,male,middle,public,general,44,44,61,50,46,not enrolled,0,8
81 | 181,male,middle,private,academic,50,46,45,58,61,not enrolled,0,8
82 | 165,male,low,public,vocation,36,49,54,61,36,not enrolled,0,8
83 | 78,female,middle,public,academic,39,54,54,53,41,not enrolled,1,9
84 | 76,male,high,public,academic,47,52,51,50,56,not enrolled,1,9
85 | 29,male,low,public,general,52,44,49,55,41,not enrolled,0,9
86 | 91,female,high,public,vocation,50,49,56,47,46,not enrolled,0,9
87 | 52,female,low,public,academic,50,46,53,53,66,not enrolled,0,9
88 | 10,female,middle,public,general,47,54,49,53,61,not enrolled,1,9
89 | 85,male,middle,public,general,55,39,57,53,46,not enrolled,0,9
90 | 50,male,middle,public,general,50,59,42,53,61,not enrolled,2,9
91 | 56,male,middle,public,vocation,55,45,46,58,51,not enrolled,0,9
92 | 64,female,high,public,vocation,50,52,45,58,36,not enrolled,1,10
93 | 130,female,high,public,general,43,54,55,55,46,not enrolled,1,10
94 | 141,male,high,public,vocation,63,44,47,53,56,not enrolled,0,10
95 | 74,female,middle,public,academic,57,50,50,51,58,not enrolled,1,10
96 | 83,female,middle,public,vocation,50,62,41,55,31,enrolled,3,10
97 | 31,female,middle,private,general,55,59,52,42,56,not enrolled,2,10
98 | 172,male,middle,public,academic,47,52,57,53,61,not enrolled,1,10
99 | 184,female,middle,private,vocation,50,52,53,55,56,not enrolled,1,10
100 | 75,male,middle,public,vocation,60,46,51,53,61,not enrolled,0,10
101 | 187,female,middle,private,general,57,41,57,55,52,not enrolled,0,10
102 | 113,male,middle,public,academic,44,52,51,63,61,not enrolled,1,11
103 | 162,female,middle,public,vocation,57,52,40,61,56,not enrolled,1,11
104 | 110,female,middle,public,vocation,52,55,50,54,61,not enrolled,2,11
105 | 150,male,middle,public,vocation,42,41,57,72,31,not enrolled,0,11
106 | 167,male,middle,public,general,63,49,35,66,41,not enrolled,0,11
107 | 77,female,low,public,academic,61,59,49,44,66,not enrolled,2,11
108 | 35,female,low,private,general,60,54,50,50,51,not enrolled,1,11
109 | 158,female,middle,public,general,52,54,55,53,51,not enrolled,1,11
110 | 112,female,middle,public,academic,52,59,48,55,61,not enrolled,2,11
111 | 48,male,middle,public,academic,57,55,52,50,51,not enrolled,2,11
112 | 147,female,low,public,academic,47,62,53,53,61,enrolled,3,11
113 | 7,male,middle,public,academic,57,54,59,47,51,not enrolled,1,11
114 | 65,female,middle,public,academic,55,54,66,42,56,not enrolled,1,12
115 | 168,male,middle,public,academic,52,54,57,55,51,not enrolled,1,12
116 | 190,female,middle,private,academic,47,59,54,58,46,not enrolled,2,12
117 | 178,male,middle,private,vocation,47,57,57,58,46,not enrolled,2,12
118 | 159,male,high,public,academic,55,61,54,49,61,enrolled,3,12
119 | 120,female,high,public,academic,63,52,54,50,51,not enrolled,1,12
120 | 116,female,middle,public,academic,57,59,54,50,56,not enrolled,2,12
121 | 79,female,middle,public,academic,60,62,49,50,51,enrolled,3,12
122 | 98,female,low,public,vocation,57,60,51,53,37,enrolled,2,12
123 | 122,female,middle,public,academic,52,59,58,53,66,not enrolled,2,12
124 | 179,female,middle,private,academic,47,65,60,50,56,enrolled,5,13
125 | 198,female,high,private,academic,47,61,51,63,31,enrolled,3,13
126 | 189,male,middle,private,academic,47,59,63,53,46,not enrolled,2,13
127 | 199,male,high,private,academic,52,59,50,61,61,not enrolled,2,13
128 | 156,female,middle,public,academic,50,59,53,61,61,not enrolled,2,13
129 | 166,female,middle,public,academic,52,59,53,61,51,not enrolled,2,13
130 | 160,female,middle,public,academic,55,65,55,50,61,enrolled,5,13
131 | 152,female,high,public,academic,55,57,56,58,61,not enrolled,2,13
132 | 183,male,middle,private,academic,63,59,49,55,71,not enrolled,2,13
133 | 94,male,high,public,academic,55,49,61,61,56,not enrolled,0,14
134 | 149,male,low,public,general,63,49,49,66,46,not enrolled,0,14
135 | 131,female,high,public,academic,65,59,57,46,66,not enrolled,2,14
136 | 24,male,middle,public,academic,52,62,66,47,46,enrolled,3,14
137 | 99,female,high,public,general,47,59,56,66,61,not enrolled,2,14
138 | 171,male,middle,public,academic,60,54,60,55,66,not enrolled,1,14
139 | 104,male,high,public,academic,54,63,57,55,46,enrolled,4,14
140 | 81,male,low,public,academic,63,43,59,65,44,not enrolled,0,14
141 | 97,male,high,public,academic,60,54,58,58,61,not enrolled,1,14
142 | 20,male,high,public,academic,60,52,57,61,61,not enrolled,1,14
143 | 163,female,low,public,academic,52,57,64,58,56,not enrolled,2,15
144 | 195,male,middle,private,general,57,57,60,58,56,not enrolled,2,15
145 | 84,male,middle,public,general,63,57,54,58,51,not enrolled,2,15
146 | 27,male,middle,public,academic,53,61,61,57,56,enrolled,3,15
147 | 118,female,middle,public,general,55,62,58,58,61,enrolled,3,15
148 | 71,female,middle,public,general,57,62,56,58,66,enrolled,3,15
149 | 63,female,low,public,general,52,65,60,56,51,enrolled,5,15
150 | 185,male,middle,private,academic,63,57,55,58,41,not enrolled,2,15
151 | 127,male,high,public,academic,63,59,57,55,56,not enrolled,2,15
152 | 177,male,middle,private,academic,55,59,62,58,51,not enrolled,2,15
153 | 188,female,high,private,academic,63,62,56,55,61,enrolled,3,16
154 | 60,male,middle,public,academic,57,65,51,63,61,enrolled,5,16
155 | 66,female,middle,public,vocation,68,62,56,50,51,enrolled,3,16
156 | 173,female,low,public,general,50,62,61,63,51,enrolled,3,16
157 | 186,female,middle,private,academic,57,62,63,55,41,enrolled,3,16
158 | 96,female,high,public,academic,65,54,61,58,56,not enrolled,1,16
159 | 101,female,high,public,academic,60,62,67,50,56,enrolled,3,16
160 | 3,male,low,public,academic,63,65,48,63,56,enrolled,5,16
161 | 170,male,high,public,academic,47,62,61,69,66,enrolled,3,16
162 | 92,female,high,public,general,52,67,57,63,61,enrolled,7,16
163 | 62,male,high,public,general,65,65,48,63,66,enrolled,5,16
164 | 135,female,low,public,academic,63,60,65,54,66,enrolled,2,17
165 | 26,female,high,public,academic,60,59,62,61,51,not enrolled,2,17
166 | 139,female,middle,public,academic,68,59,61,55,71,not enrolled,2,17
167 | 121,female,middle,public,vocation,68,59,53,63,61,not enrolled,2,17
168 | 144,male,high,public,general,60,65,58,61,66,enrolled,5,17
169 | 146,male,high,public,academic,55,62,64,63,66,enrolled,3,17
170 | 137,female,high,public,academic,63,65,65,53,61,enrolled,5,17
171 | 123,male,high,public,general,68,59,56,63,66,not enrolled,2,18
172 | 169,male,low,public,general,55,59,63,69,46,not enrolled,2,18
173 | 34,female,high,private,academic,73,61,57,55,66,enrolled,3,18
174 | 33,female,low,public,academic,57,65,72,54,56,enrolled,5,18
175 | 32,female,high,public,vocation,50,67,66,66,56,enrolled,7,18
176 | 114,male,high,public,academic,68,65,62,55,61,enrolled,5,18
177 | 125,female,low,public,academic,68,65,58,59,56,enrolled,5,18
178 | 59,female,middle,public,academic,65,67,63,55,71,enrolled,7,18
179 | 23,female,low,public,academic,65,65,64,58,71,enrolled,5,18
180 | 161,female,low,public,academic,57,62,72,61,61,enrolled,3,18
181 | 103,male,high,public,academic,76,52,64,64,61,not enrolled,1,19
182 | 194,female,high,private,academic,63,63,69,61,61,enrolled,4,19
183 | 136,male,middle,public,academic,65,59,70,63,51,not enrolled,2,19
184 | 154,male,high,public,academic,65,65,66,61,66,enrolled,5,19
185 | 157,male,middle,public,general,68,59,58,74,66,not enrolled,2,19
186 | 93,female,high,public,academic,73,67,62,58,66,enrolled,7,19
187 | 39,female,high,public,academic,66,67,67,61,66,enrolled,7,19
188 | 88,female,high,public,academic,68,60,64,69,66,enrolled,2,19
189 | 192,male,high,private,academic,65,67,63,66,71,enrolled,7,19
190 | 80,male,high,public,academic,65,62,68,66,66,enrolled,3,19
191 | 200,male,middle,private,academic,68,54,75,66,66,not enrolled,1,19
192 | 180,female,high,private,academic,71,65,69,58,71,enrolled,5,20
193 | 82,female,high,public,academic,68,62,65,69,61,enrolled,3,20
194 | 174,male,middle,private,academic,68,59,71,66,56,not enrolled,2,20
195 | 95,male,high,public,academic,73,60,71,61,71,enrolled,2,20
196 | 61,female,high,public,academic,76,63,60,67,66,enrolled,4,20
197 | 100,female,high,public,academic,63,65,71,69,71,enrolled,5,20
198 | 143,male,middle,public,vocation,63,63,75,72,66,enrolled,4,20
199 | 68,male,middle,public,academic,73,67,71,63,66,enrolled,7,20
200 | 57,female,middle,public,academic,71,65,72,66,56,enrolled,5,20
201 | 132,male,middle,public,academic,73,62,73,69,66,enrolled,3,20
202 |
--------------------------------------------------------------------------------