├── .gitignore
├── Holidays
    ├── data
    │   └── country_data.csv
    └── scripts
    │   └── Holidays.R
├── Twitter Sentiment Analysis
    ├── app
    │   └── www
    │   │   ├── Page2.PNG
    │   │   ├── Panels.PNG
    │   │   ├── Loading1.gif
    │   │   ├── Loading2.gif
    │   │   ├── Nextgraph.PNG
    │   │   ├── busyIndicator.css
    │   │   └── style.css
    └── scripts
    │   └── Sentiment Analysis - Telcos.R
├── Consitent Billionaire Guide
    ├── app
    │   ├── www
    │   │   ├── Loading.gif
    │   │   ├── Loading1.gif
    │   │   ├── Loading2.gif
    │   │   ├── Loading9.gif
    │   │   ├── Typing.gif
    │   │   ├── The Ghost.PNG
    │   │   ├── The Ghosts.PNG
    │   │   ├── The Newbie.png
    │   │   ├── The Hustler.PNG
    │   │   ├── The Consistent.PNG
    │   │   ├── busyIndicator.css
    │   │   ├── Loading1.htm
    │   │   └── style.css
    │   └── app.R
    ├── data
    │   ├── year_list.csv
    │   └── billionaire_data.csv
    └── scripts
    │   ├── Machine Learning for App.R
    │   ├── billionaire_functions.R
    │   ├── Billionaires.R
    │   └── Billionaires - Compressed.R
├── Friends Analysis Laughter Prediction
    ├── S1
    │   ├── Friends - 1x03 - The One With The Thumb.en.srt
    │   ├── Friends - 1x23 - The One With The Birth.en.srt
    │   ├── Friends - 1x18 - The One With All The Poker.en.srt
    │   ├── Friends - 1x08 - The One Where Nana Dies Twice.en.srt
    │   ├── Friends - 1x21 - The One With The Fake Monica.en.srt
    │   ├── Friends - 1x22 - The One With The Ick Factor.en.srt
    │   ├── Friends - 1x24 - The One Where Rachel Finds Out.en.srt
    │   ├── Friends - 1x04 - The One With George Stephanopoulos.en.srt
    │   ├── Friends - 1x07 - The One With The Blackout.720p HDTV.TvR.en.srt
    │   ├── Friends - 1x08 - The One Where Nana Dies Twice.720p HDTV.Morphz.en.srt
    │   ├── Friends - 1x04 - The One With George Stephanopoulos.720p HDTV.TvR.en.srt
    │   ├── Friends - 1x05 - The One With The East German Laundry Detergent.720p HDTV.TvR.en.srt
    │   └── Friends - 1x07 - The One With The Blackout.en.srt
    ├── Laughter_Detection.csv
    ├── Audio File to Dataset Conversion.ipynb
    └── canned_laughter_detection.R
├── Twitter Crawler
    └── scripts
    │   ├── Database Initial Setup.R
    │   └── Telecoms twitter crawler.R
├── README.md
├── The Making of Great Music
    ├── data
    │   └── topic_dataset.csv
    ├── README.md
    └── scripts
    │   ├── music_sentiment.py
    │   └── music_sentiment.R
├── Football Analysis
    ├── scripts
    │   ├── FootballAnalysis - 2017_functions.R
    │   ├── FootballAnalysis - 2017.R
    │   └── FootballAnalysis - 2016.R
    └── data
    │   └── epl_data.csv
└── Others
    └── CBN.R


/.gitignore:
--------------------------------------------------------------------------------
1 | /packrat
2 | /.spyproject
3 | .httr-oauth
4 | .RData
5 | .Rhistory
6 | .Rproj.user
7 | README.md
8 | 


--------------------------------------------------------------------------------
/Holidays/data/country_data.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RosebudAnwuri/TheArtandScienceofData/HEAD/Holidays/data/country_data.csv


--------------------------------------------------------------------------------
/Twitter Sentiment Analysis/app/www/Page2.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RosebudAnwuri/TheArtandScienceofData/HEAD/Twitter Sentiment Analysis/app/www/Page2.PNG


--------------------------------------------------------------------------------
/Twitter Sentiment Analysis/app/www/Panels.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RosebudAnwuri/TheArtandScienceofData/HEAD/Twitter Sentiment Analysis/app/www/Panels.PNG


--------------------------------------------------------------------------------
/Consitent Billionaire Guide/app/www/Loading.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RosebudAnwuri/TheArtandScienceofData/HEAD/Consitent Billionaire Guide/app/www/Loading.gif


--------------------------------------------------------------------------------
/Consitent Billionaire Guide/app/www/Loading1.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RosebudAnwuri/TheArtandScienceofData/HEAD/Consitent Billionaire Guide/app/www/Loading1.gif


--------------------------------------------------------------------------------
/Consitent Billionaire Guide/app/www/Loading2.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RosebudAnwuri/TheArtandScienceofData/HEAD/Consitent Billionaire Guide/app/www/Loading2.gif


--------------------------------------------------------------------------------
/Consitent Billionaire Guide/app/www/Loading9.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RosebudAnwuri/TheArtandScienceofData/HEAD/Consitent Billionaire Guide/app/www/Loading9.gif


--------------------------------------------------------------------------------
/Consitent Billionaire Guide/app/www/Typing.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RosebudAnwuri/TheArtandScienceofData/HEAD/Consitent Billionaire Guide/app/www/Typing.gif


--------------------------------------------------------------------------------
/Consitent Billionaire Guide/data/year_list.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RosebudAnwuri/TheArtandScienceofData/HEAD/Consitent Billionaire Guide/data/year_list.csv


--------------------------------------------------------------------------------
/Twitter Sentiment Analysis/app/www/Loading1.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RosebudAnwuri/TheArtandScienceofData/HEAD/Twitter Sentiment Analysis/app/www/Loading1.gif


--------------------------------------------------------------------------------
/Twitter Sentiment Analysis/app/www/Loading2.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RosebudAnwuri/TheArtandScienceofData/HEAD/Twitter Sentiment Analysis/app/www/Loading2.gif


--------------------------------------------------------------------------------
/Twitter Sentiment Analysis/app/www/Nextgraph.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RosebudAnwuri/TheArtandScienceofData/HEAD/Twitter Sentiment Analysis/app/www/Nextgraph.PNG


--------------------------------------------------------------------------------
/Consitent Billionaire Guide/app/www/The Ghost.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RosebudAnwuri/TheArtandScienceofData/HEAD/Consitent Billionaire Guide/app/www/The Ghost.PNG


--------------------------------------------------------------------------------
/Consitent Billionaire Guide/app/www/The Ghosts.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RosebudAnwuri/TheArtandScienceofData/HEAD/Consitent Billionaire Guide/app/www/The Ghosts.PNG


--------------------------------------------------------------------------------
/Consitent Billionaire Guide/app/www/The Newbie.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RosebudAnwuri/TheArtandScienceofData/HEAD/Consitent Billionaire Guide/app/www/The Newbie.png


--------------------------------------------------------------------------------
/Consitent Billionaire Guide/app/www/The Hustler.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RosebudAnwuri/TheArtandScienceofData/HEAD/Consitent Billionaire Guide/app/www/The Hustler.PNG


--------------------------------------------------------------------------------
/Consitent Billionaire Guide/data/billionaire_data.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RosebudAnwuri/TheArtandScienceofData/HEAD/Consitent Billionaire Guide/data/billionaire_data.csv


--------------------------------------------------------------------------------
/Consitent Billionaire Guide/app/www/The Consistent.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RosebudAnwuri/TheArtandScienceofData/HEAD/Consitent Billionaire Guide/app/www/The Consistent.PNG


--------------------------------------------------------------------------------
/Friends Analysis Laughter Prediction/S1/Friends - 1x03 - The One With The Thumb.en.srt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RosebudAnwuri/TheArtandScienceofData/HEAD/Friends Analysis Laughter Prediction/S1/Friends - 1x03 - The One With The Thumb.en.srt


--------------------------------------------------------------------------------
/Friends Analysis Laughter Prediction/S1/Friends - 1x23 - The One With The Birth.en.srt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RosebudAnwuri/TheArtandScienceofData/HEAD/Friends Analysis Laughter Prediction/S1/Friends - 1x23 - The One With The Birth.en.srt


--------------------------------------------------------------------------------
/Friends Analysis Laughter Prediction/S1/Friends - 1x18 - The One With All The Poker.en.srt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RosebudAnwuri/TheArtandScienceofData/HEAD/Friends Analysis Laughter Prediction/S1/Friends - 1x18 - The One With All The Poker.en.srt


--------------------------------------------------------------------------------
/Friends Analysis Laughter Prediction/S1/Friends - 1x08 - The One Where Nana Dies Twice.en.srt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RosebudAnwuri/TheArtandScienceofData/HEAD/Friends Analysis Laughter Prediction/S1/Friends - 1x08 - The One Where Nana Dies Twice.en.srt


--------------------------------------------------------------------------------
/Friends Analysis Laughter Prediction/S1/Friends - 1x21 - The One With The Fake Monica.en.srt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RosebudAnwuri/TheArtandScienceofData/HEAD/Friends Analysis Laughter Prediction/S1/Friends - 1x21 - The One With The Fake Monica.en.srt


--------------------------------------------------------------------------------
/Friends Analysis Laughter Prediction/S1/Friends - 1x22 - The One With The Ick Factor.en.srt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RosebudAnwuri/TheArtandScienceofData/HEAD/Friends Analysis Laughter Prediction/S1/Friends - 1x22 - The One With The Ick Factor.en.srt


--------------------------------------------------------------------------------
/Friends Analysis Laughter Prediction/S1/Friends - 1x24 - The One Where Rachel Finds Out.en.srt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RosebudAnwuri/TheArtandScienceofData/HEAD/Friends Analysis Laughter Prediction/S1/Friends - 1x24 - The One Where Rachel Finds Out.en.srt


--------------------------------------------------------------------------------
/Friends Analysis Laughter Prediction/S1/Friends - 1x04 - The One With George Stephanopoulos.en.srt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RosebudAnwuri/TheArtandScienceofData/HEAD/Friends Analysis Laughter Prediction/S1/Friends - 1x04 - The One With George Stephanopoulos.en.srt


--------------------------------------------------------------------------------
/Friends Analysis Laughter Prediction/S1/Friends - 1x07 - The One With The Blackout.720p HDTV.TvR.en.srt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RosebudAnwuri/TheArtandScienceofData/HEAD/Friends Analysis Laughter Prediction/S1/Friends - 1x07 - The One With The Blackout.720p HDTV.TvR.en.srt


--------------------------------------------------------------------------------
/Twitter Crawler/scripts/Database Initial Setup.R:
--------------------------------------------------------------------------------
1 | Telecoms-tweets-database/Database initial setup.R
2 | #Setup of sqlite database to store tweets
3 | library(dplyr)
4 | telecoms_db = src_sqlite("Telecoms tweets database",create = T)
5 | copy_to(telecoms_db,final_file,temporary = F)


--------------------------------------------------------------------------------
/Friends Analysis Laughter Prediction/S1/Friends - 1x08 - The One Where Nana Dies Twice.720p HDTV.Morphz.en.srt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RosebudAnwuri/TheArtandScienceofData/HEAD/Friends Analysis Laughter Prediction/S1/Friends - 1x08 - The One Where Nana Dies Twice.720p HDTV.Morphz.en.srt


--------------------------------------------------------------------------------
/Friends Analysis Laughter Prediction/S1/Friends - 1x04 - The One With George Stephanopoulos.720p HDTV.TvR.en.srt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RosebudAnwuri/TheArtandScienceofData/HEAD/Friends Analysis Laughter Prediction/S1/Friends - 1x04 - The One With George Stephanopoulos.720p HDTV.TvR.en.srt


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # TheArtandScienceofData
2 | This is the code Repository for my blog: The Art and Science of Data. 
3 | 
4 | Feel free to message me on any issues you had running the scripts or suggestions you would like to make.
5 | 
6 | The link for my blog is: https://theartandscienceofdata.wordpress.com/
7 | 


--------------------------------------------------------------------------------
/Friends Analysis Laughter Prediction/S1/Friends - 1x05 - The One With The East German Laundry Detergent.720p HDTV.TvR.en.srt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RosebudAnwuri/TheArtandScienceofData/HEAD/Friends Analysis Laughter Prediction/S1/Friends - 1x05 - The One With The East German Laundry Detergent.720p HDTV.TvR.en.srt


--------------------------------------------------------------------------------
/Consitent Billionaire Guide/app/www/busyIndicator.css:
--------------------------------------------------------------------------------
 1 | div.shinysky-busy-indicator {
 2 |   
 3 |   top: 20%;
 4 |   left: 40%;
 5 |   margin-top:  30%;
 6 |   margin-left:  auto;
 7 |   display:none;
 8 |   background: NA;
 9 |   text-align: center;
10 |   padding-top: 20px;
11 |   padding-left: 30px;
12 |   padding-bottom: 40px;
13 |   padding-right: 30px;
14 |   border-radius: 5px;
15 | }


--------------------------------------------------------------------------------
/The Making of Great Music/data/topic_dataset.csv:
--------------------------------------------------------------------------------
 1 | document,topic,gamma
 2 | 60s,1,0.9999971261483629
 3 | 50s,1,0.9999926749082607
 4 | 70s,1,0.9999174345245359
 5 | 80s,1,0.9998679405308054
 6 | 00s,2,0.9985746025827739
 7 | 10s,2,0.9945805765167997
 8 | 90s,1,0.5665976165494422
 9 | 90s,2,0.43340238345055787
10 | 10s,1,0.00541942348320034
11 | 00s,1,0.0014253974172261596
12 | 80s,2,1.320594691946073e-4
13 | 70s,2,8.256547546413914e-5
14 | 50s,2,7.325091739302442e-6
15 | 60s,2,2.8738516370438335e-6
16 | 


--------------------------------------------------------------------------------
/Consitent Billionaire Guide/app/www/Loading1.htm:
--------------------------------------------------------------------------------
1 | <script>window.googleJavaScriptRedirect=1</script><META name="referrer" content="origin"><script>var n={navigateTo:function(b,a,d){if(b!=a&&b.google){if(b.google.r){b.google.r=0;b.location.href=d;a.location.replace("about:blank");}}else{a.location.replace(d);}}};n.navigateTo(window.parent,window,"http://www.downgraf.com/inspiration/25-beautiful-loading-bar-design-examples-gif-animated/");
2 | </script><noscript><META http-equiv="refresh" content="0;URL='http://www.downgraf.com/inspiration/25-beautiful-loading-bar-design-examples-gif-animated/'"></noscript>


--------------------------------------------------------------------------------
/The Making of Great Music/README.md:
--------------------------------------------------------------------------------
 1 | <h1>The Making of Great Music</h1>
 2 | <h2>Dataset Explanations</h2>
 3 | </br>
 4 | <ul>
 5 |   <li><strong>music_df.csv</strong>: Music dataset with features from Spotify's API and Kevin Schaich's Billboard Hot 100 dataset.</li>
 6 | </br>
 7 | <li><strong>features_dataset.csv</strong>: Contains features and genres for all artist and featured artists combinations.</li>
 8 | </br>
 9 | <li><strong>topic_dataset.csv</strong>: Dataset with most frequent topics in each decade.</li>
10 | </ul>
11 | </br>
12 | Feel free to message me on any issues you had running the scripts or suggestions you would like to make.
13 | 
14 | The link for my blog is: https://theartandscienceofdata.wordpress.com/
15 | 


--------------------------------------------------------------------------------
/Twitter Sentiment Analysis/app/www/busyIndicator.css:
--------------------------------------------------------------------------------
 1 | div.shinysky-busy-indicator {
 2 |   
 3 |   top: 20% !important;
 4 |   left: 3% !important;
 5 |   position:absolute;
 6 |   margin-left:  auto;
 7 |   display:none;
 8 |   background: NA;
 9 |   text-align: center;
10 |   margin-top: 0% !important;
11 |   margin-left: auto;
12 |   display: none;
13 |   background: NA;
14 |   text-align: center;
15 |   padding-top: 0% !important;
16 |   padding-left: 0% !important;
17 |   padding-bottom: 0% !important;
18 |   padding-right: 0% !important;
19 |   border-radius: 5px;
20 |   
21 |  
22 |   
23 | }
24 | #shiny-tab-Table > div > div:nth-child(2) > div > div.box-body > div.shinysky-busy-indicator > img{
25 | 	
26 | 	width:500px;
27 | }


--------------------------------------------------------------------------------
/Twitter Sentiment Analysis/app/www/style.css:
--------------------------------------------------------------------------------
 1 | @import url('https://fonts.googleapis.com/css?family=Raleway:300,400');
 2 | body{
 3 |  font-family:'Raleway', sans-serif ;
 4 |  }
 5 | .logo{
 6 | 	font-family:'Raleway', sans-serif !important;
 7 | }
 8 |  h1,h2,h3,h4,h5,h6{
 9 | 	 font-family:'Raleway', sans-serif ;
10 |  
11 |  }
12 | #Sarch1, #Sarch{
13 | 	border-radius:0px;
14 | 	border-color:#689FB0;
15 | 	background-color:#689FB0;
16 | 	animation-duration:2s;
17 | -moz-animation-duration: 2s;
18 | -webkit-animation-duration: 2s;
19 | -o-animation-duration:.2s;
20 | -ms-animation-duration:.2s;
21 | 	
22 | }
23 | #hider{
24 | 	font-size:14pt;
25 | }
26 | #severalPlot{
27 | 	box-sizing:unset;
28 | }
29 | 
30 | #twitterTable{
31 | 	margin-left: 5%;
32 | }
33 | .shinysky-busy-indicator{
34 | 	width:300;
35 | 	height:300;
36 | }


--------------------------------------------------------------------------------
/Friends Analysis Laughter Prediction/Laughter_Detection.csv:
--------------------------------------------------------------------------------
 1 | File,Start,End,Length,Talk Over
 2 | Friends.S01.E01.480p,63,65,2,
 3 | Friends.S01.E01.480p,68,70,2,
 4 | Friends.S01.E01.480p,72,72,0,
 5 | Friends.S01.E01.480p,76,78,2,1
 6 | Friends.S01.E01.480p,87,89,2,
 7 | Friends.S01.E01.480p,105,107,2,
 8 | Friends.S01.E01.480p,109,110,1,1
 9 | Friends.S01.E01.480p,121,122,1,
10 | Friends.S01.E01.480p,125,126,1,
11 | Friends.S01.E01.480p,135,136,1,
12 | Friends.S01.E01.480p,141,143,2,
13 | Friends.S01.E01.480p,148,149,1,
14 | Friends.S01.E01.480p,158,159,1,
15 | Friends.S01.E01.480p,173,173,0,
16 | Friends.S01.E01.480p,178,178,0,
17 | Friends.S01.E01.480p,186,187,1,
18 | Friends.S01.E01.480p,189,192,3,
19 | Friends.S01.E01.480p,194,195,1,1
20 | Friends.S01.E01.480p,202,203,1,
21 | Friends.S01.E01.480p,211,213,2,
22 | Friends.S01.E01.480p,226,226,0,
23 | Friends.S01.E01.480p,240,241,1,
24 | Friends.S01.E01.480p,244,244,0,
25 | Friends.S01.E01.480p,252,254,2,
26 | Friends.S01.E01.480p,256,261,5,
27 | Friends.S01.E01.480p,275,276,1,
28 | Friends.S01.E01.480p,288,294,6,
29 | Friends.S01.E01.480p,301,302,1,
30 | Friends.S01.E01.480p,320,321,1,
31 | Friends.S01.E01.480p,331,332,1,1
32 | Friends.S01.E01.480p,335,337,2,
33 | Friends.S01.E01.480p,358,360,2,
34 | Friends.S01.E01.480p,379,380,1,
35 | Friends.S01.E01.480p,384,385,1,
36 | Friends.S01.E01.480p,388,389,1,
37 | Friends.S01.E01.480p,400,403,3,
38 | Friends.S01.E01.480p,408,409,1,
39 | Friends.S01.E01.480p,413,414,1,
40 | Friends.S01.E01.480p,447,449,2,
41 | Friends.S01.E01.480p,463,464,1,
42 | Friends.S01.E01.480p,467,468,1,
43 | Friends.S01.E01.480p,476,478,2,
44 | Friends.S01.E01.480p,493,494,1,
45 | Friends.S01.E01.480p,498,499,1,
46 | Friends.S01.E01.480p,505,506,1,
47 | Friends.S01.E01.480p,508,510,2,
48 | Friends.S01.E01.480p,527,528,1,
49 | Friends.S01.E01.480p,534,535,1,
50 | Friends.S01.E01.480p,560,561,1,
51 | Friends.S01.E01.480p,573,574,1,
52 | Friends.S01.E01.480p,582,583,1,
53 | Friends.S01.E01.480p,588,588,0,
54 | Friends.S01.E01.480p,595,596,1,
55 | Friends.S01.E01.480p,602,603,1,
56 | Friends.S01.E01.480p,613,618,5,
57 | Friends.S01.E01.480p,629,630,1,
58 | Friends.S01.E01.480p,631,633,2,
59 | Friends.S01.E01.480p,641,642,1,
60 | Friends.S01.E01.480p,654,658,4,
61 | Friends.S01.E01.480p,667,667,0,
62 | 


--------------------------------------------------------------------------------
/Consitent Billionaire Guide/scripts/Machine Learning for App.R:
--------------------------------------------------------------------------------
 1 | library(h2o)
 2 | library(nnet)
 3 | library(plyr)
 4 | library(dplyr)
 5 | library(purrr)
 6 | library(scales)
 7 | library(caTools)
 8 | library(lime)
 9 | h2o.init()
10 | 
11 | 
12 | 
13 | #############################
14 | ##      Pre Processing     ##
15 | #############################
16 | dat = read.csv("C:/Users/rose.anwuri/Documents/TheArtandScienceofData/Consitent Billionaire Guide/data/billionaire_data2.csv")
17 | dat1=dat %>%
18 |   filter(Cluster !="The Newbie")
19 | data = dat1 %>% 
20 |   select(-Detail1,-Detail2) %>%
21 |   as.h2o()
22 | 
23 | splits = h2o.splitFrame(data,ratios = c(0.6,0.2),destination_frames = c("train", "valid", "test"), seed = 1234)
24 | train = h2o.getFrame("train")
25 | val = h2o.getFrame("valid")
26 | test = h2o.getFrame("test")
27 | #After some feature selection, these are the final features
28 | features=2:21
29 | response=22
30 | 
31 | 
32 | ##########################################
33 | ##Model Selection, Tuning and Validation## 
34 | ##           in one step!               ##
35 | #########################################
36 | 
37 | #Select best model (already tuned) using the automl fuction
38 | #Fix maximum number of models to be trained as 10
39 | model_selection=h2o.automl(x=features,y=response,training_frame = train,validation_frame = val,max_models = 10,stopping_metric = "AUC")
40 | 
41 | 
42 | #Extract the best model
43 | final_model = model_selection@leader
44 | 
45 | model = do.call(h2o.gbm,
46 |                 {
47 |                   p <- final_model@parameters
48 |                   p$model_id = NULL          ## do not overwrite the original grid model
49 |                   p$training_frame = data      ## use the full dataset
50 |                   p$validation_frame = NULL  ## no validation frame
51 |                   p$nfolds = 5               ## cross-validation
52 |                   p
53 |                 })
54 | 
55 | #Save Model to be loaded into Shiny App!
56 | h2o.saveModel(model,"local_model1")
57 | 
58 | #Lime Predictions [Optional]
59 | df_lime = as.data.frame(data)[,c(features,response)]
60 | df_lime=read.csv("C:/Users/rose.anwuri/Documents/TheArtandScienceofData/Consitent Billionaire Guide/app/billionaire_data_for_ml.csv")
61 | model=h2o.loadModel("C:/Users/rose.anwuri/Documents/TheArtandScienceofData/Consitent Billionaire Guide/app/local_model")
62 | df_lime$prediction =predict(model,data)[,1] %>% as.vector()
63 | 
64 | explainer <- lime::lime(
65 |   df_lime, 
66 |   model          = model, 
67 |   bin_continuous = T)
68 | 
69 | explanation <- lime::explain(
70 |   df_lime[c(7)], 
71 |   explainer    = explainer, 
72 |   n_labels     = 1, 
73 |   n_features   = 8,
74 |   kernel_width = 0.5)
75 | 
76 | plot_features(explanation)
77 |   
78 | 


--------------------------------------------------------------------------------
/Friends Analysis Laughter Prediction/Audio File to Dataset Conversion.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import numpy as np\n",
 10 |     "import pandas as pd\n",
 11 |     "import librosa\n",
 12 |     "import os\n",
 13 |     "import re\n",
 14 |     "import requests\n",
 15 |     "import urllib\n",
 16 |     "import io\n",
 17 |     "import gzip\n",
 18 |     "import warnings\n",
 19 |     "import time\n",
 20 |     "warnings.filterwarnings('ignore')"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "code",
 25 |    "execution_count": null,
 26 |    "metadata": {},
 27 |    "outputs": [],
 28 |    "source": [
 29 |     "def create_mfcc(path, offset=0, duration=1, n_mfcc=40):\n",
 30 |     "    start_time = time.time()    \n",
 31 |     "    audio_array = librosa.load(path, offset=offset, duration=duration)[0]\n",
 32 |     "    audio_array = audio_array.reshape(duration, -1)\n",
 33 |     "    mfcc = np.array([np.mean(librosa.feature.mfcc(ary, n_mfcc=n_mfcc), axis=1) for ary in audio_array])\n",
 34 |     "    episode=re.search('E\\d{2}',url).group(0)\n",
 35 |     "    season=re.search('S\\d{2}',url).group(0)\n",
 36 |     "    duration=np.arange(0,duration,1).reshape(duration,-1)\n",
 37 |     "    extras = np.full((mfcc.shape[0], 2), (season, episode))\n",
 38 |     "    elapsed_time = int(time.time() - start_time)\n",
 39 |     "    print('{:02d}:{:02d}:{:02d}'.format(elapsed_time // 3600, (elapsed_time % 3600 // 60), elapsed_time % 60))\n",
 40 |     "    return np.hstack((mfcc, extras,duration))"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": null,
 46 |    "metadata": {},
 47 |    "outputs": [],
 48 |    "source": [
 49 |     "list_of_urls=['FRIENDS.S02E'+\"{:02d}\".format(i)+'.480p.mkv' for i in range(1,25,1)]\n",
 50 |     "list_of_urls"
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "code",
 55 |    "execution_count": null,
 56 |    "metadata": {},
 57 |    "outputs": [],
 58 |    "source": [
 59 |     "season_two_audio=[]\n",
 60 |     "for url in list_of_urls:\n",
 61 |     "    Y, sample_rate=librosa.load(url,res_type='kaiser_fast')\n",
 62 |     "    print('Reading File for Episode ' + re.search('E\\d{2}',url).group(0)+'...')\n",
 63 |     "    duration=(np.floor(len(Y)/sample_rate))\n",
 64 |     "    result=create_mfcc(url,duration=int(duration))\n",
 65 |     "    season_two_audio.append(result)\n"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "code",
 70 |    "execution_count": null,
 71 |    "metadata": {},
 72 |    "outputs": [],
 73 |    "source": [
 74 |     "pd.DataFrame(np.vstack(season_two_audio)).to_csv('season_two_audio.csv')\n"
 75 |    ]
 76 |   }
 77 |  ],
 78 |  "metadata": {
 79 |   "kernelspec": {
 80 |    "display_name": "Python 3",
 81 |    "language": "python",
 82 |    "name": "python3"
 83 |   },
 84 |   "language_info": {
 85 |    "codemirror_mode": {
 86 |     "name": "ipython",
 87 |     "version": 3
 88 |    },
 89 |    "file_extension": ".py",
 90 |    "mimetype": "text/x-python",
 91 |    "name": "python",
 92 |    "nbconvert_exporter": "python",
 93 |    "pygments_lexer": "ipython3",
 94 |    "version": "3.7.3"
 95 |   }
 96 |  },
 97 |  "nbformat": 4,
 98 |  "nbformat_minor": 2
 99 | }
100 | 


--------------------------------------------------------------------------------
/Consitent Billionaire Guide/app/www/style.css:
--------------------------------------------------------------------------------
  1 | @import url('https://fonts.googleapis.com/css?family=Raleway:300,400');
  2 |  
  3 |  .navbar-default {
  4 |  background-color: teal;
  5 |  border-color: #E7E7E7;
  6 |  }
  7 |  .navbar-default .navbar-nav > li > a {
  8 |  color: white;
  9 |  }
 10 |  .navbar-default .navbar-nav > .active > a, .navbar-default .navbar-nav > .active > a:hover, .navbar-default .navbar-nav > .active > a:focus {
 11 |  background-color: #00897b;
 12 |  color: white;
 13 |  }
 14 |  body > nav > div{
 15 |  background-color: #009688;
 16 |  }
 17 |  body{
 18 |  font-family:'Raleway', sans-serif;
 19 |  }
 20 |  .selectize-input {border-radius:0px;
 21 |  background-color: rgba(236, 240, 241,0) !important;
 22 |  border-width:1px;
 23 |  
 24 |  border-top: none;
 25 |  border-right: none;
 26 |  border-left: none;
 27 |  border-bottom-color: #546e7a;
 28 |  border-radius: 0;}
 29 |  
 30 |  .selectize-input.dropdown-active {border-radius:0px;
 31 |  background-color: rgba(236, 240, 241,0);
 32 |  border-width:1px;
 33 |  border-top: none;
 34 |  border-right: none;
 35 |  border-left: none;
 36 |  border-bottom-color: #546e7a;
 37 |  border-radius: 0}
 38 |  .selectize-dropdown {border-radius:0px;
 39 |  background-color: rgba(236, 240, 241,0);
 40 |  border-width:1px;
 41 |  border-top: none;
 42 |  border-right: none;
 43 |  border-left: none;
 44 |  border-bottom-color: #546e7a;
 45 |  border-radius: 0}
 46 | 
 47 | 
 48 | 
 49 | body > nav{
 50 | 	background-color:#009688;
 51 | }
 52 | .card-action {
 53 | box-shadow: 0 1px 3px rgba(0,0,0,0.12), 0 1px 2px rgba(0,0,0,0.24);
 54 | transition: all 0.3s cubic-bezier(.25,.8,.25,1);
 55 | height:50px;
 56 | text-align:center;
 57 | }
 58 | .card-action:hover {
 59 | box-shadow: 0 14px 28px rgba(0,0,0,0.25), 0 10px 10px rgba(0,0,0,0.22);
 60 | }
 61 | @media only screen and (max-width : 500px) {
 62 | img{
 63 | width:100%;
 64 | min-width:100%;
 65 | }
 66 | }
 67 | 
 68 | #run{
 69 | animation-duration:2s;
 70 | -moz-animation-duration: 2s;
 71 | -webkit-animation-duration: 2s;
 72 | -o-animation-duration:.2s;
 73 | -ms-animation-duration:.2s;
 74 | border-radius: 0px;
 75 | }
 76 | #run1{
 77 | animation-duration:2s;
 78 | -moz-animation-duration: 2s;
 79 | -webkit-animation-duration: 2s;
 80 | -o-animation-duration:.2s;
 81 | -ms-animation-duration:.2s;
 82 | }
 83 | .pulse{
 84 | animation-duration:2s;
 85 | -moz-animation-duration: 2s;
 86 | -webkit-animation-duration: 2s;
 87 | -o-animation-duration:.2s;
 88 | -ms-animation-duration:.2s;
 89 | }
 90 | #close-button{
 91 | animation-duration:2s;
 92 | -moz-animation-duration: 2s;
 93 | -webkit-animation-duration: 2s;
 94 | -o-animation-duration:.2s;
 95 | -ms-animation-duration:.2s;
 96 | }
 97 | .modal-content{
 98 | background: none;
 99 | border: none;
100 | box-shadow: none;
101 | }
102 | .modal-header{
103 | border:none;
104 | }
105 | .modal .close{
106 | color:white;
107 | }
108 | .modal-body{
109 | display:none;
110 | }
111 | .modal-footer{
112 | display:none;
113 | }
114 | 
115 | #prefix_final{
116 | font-size: 6vh; text-align:center;
117 | }
118 | #prediction_final{
119 | font-size: 6vh; text-align:center;
120 | }
121 | #sidebar{background-color: rgba(236, 240, 241,0.4);}
122 | form-group label{}
123 | .form-group{color: #90a4ae ;text-transform:uppercase;font-size:10pt;}
124 | #Age,#founding_year{background-color: rgba(236, 240, 241,0);
125 | border-width:1px;
126 | 
127 | border-top: none;
128 | border-right: none;
129 | border-left: none;
130 | border-bottom-color: #546e7a;
131 | border-radius: 0}


--------------------------------------------------------------------------------
/The Making of Great Music/scripts/music_sentiment.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Fri Jan 19 17:18:58 2018
 4 | 
 5 | @author: rose.anwuri
 6 | """
 7 | 
 8 | import pandas as pd
 9 | import matplotlib.pyplot as plt
10 | import nltk
11 | from nltk.corpus import stopwords
12 | nltk.download()
13 | music_df = pd.read_csv('https://raw.githubusercontent.com/walkerkq/musiclyrics/master/billboard_lyrics_1964-2015.csv')
14 | music_df.head(n=5)
15 | music_df['Lyrics']=music_df.Lyrics.fillna("")
16 | from afinn import Afinn
17 | afinn = Afinn()
18 | music_df['sentiment_score']=map(afinn.score,music_df['Lyrics'])
19 | music_df.head(n=5)
20 | year_and_avg_sentiment=music_df[["Year","sentiment_score"]].groupby('Year').mean()
21 | plt.plot( year_and_avg_sentiment.index,year_and_avg_sentiment.sentiment_score, label='linear')
22 | plt.title("Sentiment of Popular Music between 1965-2015")
23 | plt.show()
24 | nrc_db = pd.read_table("https://raw.githubusercontent.com/mhbashari/NRC-Persian-Lexicon/master/NRC-emotion-lexicon-wordlevel-alphabetized-v0.92.txt",
25 |                        names = ["Word", "Sentiment", "Exists"])
26 | nrc_db.head(n=5)
27 | nrc_db=nrc_db[nrc_db.Exists >0]
28 | nrc_db.head(n=5)
29 | sentiment=list(set(nrc_db.Sentiment))
30 | for i in sentiment:
31 |     music_df[i]=0
32 | 
33 | music_df.head(n=5)
34 | stop_words=set(stopwords.words('english'))
35 | def add_sentiment(lyrics):
36 |     words=lyrics.split()
37 |     words = set(words) - stop_words
38 |     words = list(words)
39 |     for i in words:
40 |         sentiment=list(nrc_db[nrc_db.Word==i]["Sentiment"])
41 |         if len(sentiment)>0:
42 |             music_df.loc[music_df.Lyrics==lyrics,sentiment] +=1
43 |         else:
44 |             music_df.loc[music_df.Lyrics==lyrics,list(set(nrc_db.Sentiment))]+=0
45 | 
46 | for i in range(5100):
47 |     add_sentiment(music_df.Lyrics[i])
48 |     print(i)
49 | 
50 | import spotipy
51 | client_credentials_manager = SpotifyClientCredentials()
52 | from spotipy.oauth2 import SpotifyClientCredentials
53 | client_credentials_manager = SpotifyClientCredentials(client_id="769ef3519e8444238fde9c8981c6371c",client_secret="b17e4a7ca0b4426f9962645ba5c74a63")
54 | sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)
55 | from collections import OrderedDict
56 | def get_spotify_features(track, artist):
57 |     #Search for Spofity song ID 
58 |     songs=sp.search(q='track:'+track+' '+'artist:'+artist+'*' , type='track')
59 |     items = songs['tracks']['items']
60 |     if len(items) ==0:
61 |         return([0]*len(features))
62 |     else:
63 |         track = items[0]
64 |         song_id = str(track["id"])
65 |         #Use ID to get Song features
66 |         track_features=sp.audio_features(song_id)
67 |         if len(track_features[0]) <18:
68 |             return([0]*len(features))
69 |         else:
70 |             features_to_df = np.array(track_features)[0]
71 |             #Order Dictionary
72 |             features_to_df = OrderedDict(features_to_df)
73 |             #Get Dictionary values
74 |             feature_values = features_to_df.values()
75 |             return(feature_values)
76 | 
77 | music_df.loc[:,features]= music_df.loc[:,].apply(lambda row: pd.Series(get_spotify_features(row["Song"],row["artist_shortened"]),index=features) ,axis=1)
78 |             
79 | ind=np.linspace(0,5100,num=5101-1)
80 | for i in ind:
81 |     music_df.loc[i,features]=pd.Series(get_spotify_features(music_df.loc[i,"Song"],music_df.loc[i,"artist_shortened"]),index=features)
82 |     print i
83 | 
84 | music_df = pd.read_csv('C:/Users/rose.anwuri/Documents/TheArtandScienceofData/Music Sentiment Analysis/data/music_data.csv')
85 | 
86 | def gather( df, key, value, cols ):
87 |     id_vars = [ col for col in df.columns if col not in cols ]
88 |     id_values = cols
89 |     var_name = key
90 |     value_name = value
91 |     return pd.melt( df, id_vars, id_values, var_name, value_name )
92 | 
93 | music_df_gathered = gather(music_df,"Sentiment","Score",list(music_df.columns[9:19]))
94 | music_df_gathered = gather(music_df,"audio_feature","feature_value",list(music_df_gathered.columns[10:28]))
95 | 


--------------------------------------------------------------------------------
/Twitter Crawler/scripts/Telecoms twitter crawler.R:
--------------------------------------------------------------------------------
 1 | library(twitteR)
 2 | library(base64enc)
 3 | library(RCurl)
 4 | library(httr)
 5 | library(RJSONIO)
 6 | library(stringr)
 7 | 
 8 | 
 9 | api_key <- "UE0sCwrNmxHb8YL759R7SuLEc" # From dev.twitter.com
10 | api_secret <- "C9OxWPmBAOwzQ6G4VXbCKeXd3XEHG5XvJjzTA1AVLoKbtwnpJy" # From dev.twitter.com
11 | token <- "370018889-WKxIRFsc8OJhvdtW3BOOdgIy1qGco48d7QlUO0in" # From dev.twitter.com
12 | token_secret <- "7Ah8qplWJf5ey4zB4IPTTBlypMCUenXnQsrCH7808UbRE" # From dev.twitter.com
13 | 
14 | # Create Twitter Connection
15 | setup_twitter_oauth(api_key, api_secret, token, token_secret)
16 | set_config(config (ssl_verifypeer= 0L))
17 | date_time = Sys.Date()-1
18 | date_time = as.character.Date(date_time)
19 | tweetdetails <- searchTwitter("etisalat network -from:etisalat_9ja", n=1500, lang="en", since = date_time,until= date_time, geocode = '10,9, 200km')# Transform tweets list into a data frame 
20 | tweetData_network = twListToDF(tweetData)
21 | tweetdetails <- searchTwitter("etisalat data -from:etisalat_9ja", n=1500, lang="en", since = "2016-05-03",until= date_time, geocode = '10,9, 200km')# Transform tweets list into a data frame 
22 | tweetData = append(tweetData, tweetdetails)
23 | 
24 | tweetData = twListToDF(tweetData)
25 | tweetDataFinal = rbind(tweetData, tweetData_network)
26 | tweetdetails <- searchTwitter("etisalat call -from:etisalat_9ja", n=1500, lang="en", since = "2016-05-03",until= date_time, geocode = '10,9, 200km')# Transform tweets list into a data frame 
27 | tweetData = twListToDF(tweetData)
28 | etisalat = rbind(tweetData, tweetDataFinal)
29 | etisalat$service_provider = "Etisalat"
30 | 
31 | #MTN
32 | tweetdetails <- searchTwitter("mtn network -from:mtnng", n=1500, lang="en", since = date_time,until= date_time, geocode = '10,9, 200km')# Transform tweets list into a data frame 
33 | tweetData_network = twListToDF(tweetData)
34 | tweetdetails <- searchTwitter("mtn data -from:mtnng", n=1500, lang="en", since = "2016-05-03",until= date_time, geocode = '10,9, 200km')# Transform tweets list into a data frame 
35 | tweetData = append(tweetData, tweetdetails)
36 | 
37 | tweetData = twListToDF(tweetData)
38 | tweetDataFinal = rbind(tweetData, tweetData_network)
39 | tweetdetails <- searchTwitter("mtn call -from:mtnng", n=1500, lang="en", since = "2016-05-03",until= date_time, geocode = '10,9, 200km')# Transform tweets list into a data frame 
40 | tweetData = twListToDF(tweetData)
41 | mtn = rbind(tweetData, tweetDataFinal)
42 | mtn$serviceProvider = "MTN"
43 | 
44 | #Airtel
45 | tweetdetails <- searchTwitter("airtel network -from:airtelnigeria", n=1500, lang="en", since = date_time,until= date_time, geocode = '10,9, 200km')# Transform tweets list into a data frame 
46 | tweetData_network = twListToDF(tweetData)
47 | tweetdetails <- searchTwitter("airtel data -from:airtelnigeria", n=1500, lang="en", since = "2016-05-03",until= date_time, geocode = '10,9, 200km')# Transform tweets list into a data frame 
48 | tweetData = append(tweetData, tweetdetails)
49 | 
50 | tweetData = twListToDF(tweetData)
51 | tweetDataFinal = rbind(tweetData, tweetData_network)
52 | tweetdetails <- searchTwitter("airtel call -from:airtelnigeria", n=1500, lang="en", since = "2016-05-03",until= date_time, geocode = '10,9, 200km')# Transform tweets list into a data frame 
53 | tweetData = twListToDF(tweetData)
54 | airtel = rbind(tweetData, tweetDataFinal)
55 | airtel$service_provider = "Airtel"
56 | #Glo
57 | tweetdetails <- searchTwitter("glo network -from:gloworld", n=1500, lang="en", since = date_time,until= date_time, geocode = '10,9, 200km')# Transform tweets list into a data frame 
58 | tweetData_network = twListToDF(tweetData)
59 | tweetdetails <- searchTwitter("glo data -from:gloworld", n=1500, lang="en", since = "2016-05-03",until= date_time, geocode = '10,9, 200km')# Transform tweets list into a data frame 
60 | tweetData = append(tweetData, tweetdetails)
61 | 
62 | tweetData = twListToDF(tweetData)
63 | tweetDataFinal = rbind(tweetData, tweetData_network)
64 | tweetdetails <- searchTwitter("glo call -from:gloworld", n=1500, lang="en", since = "2016-05-03",until= date_time, geocode = '10,9, 200km')# Transform tweets list into a data frame 
65 | tweetData = twListToDF(tweetData)
66 | glo = rbind(tweetData, tweetDataFinal)
67 | glo$service_provider = "Glo"  
68 | 
69 | final_file = rbind(glo,etisalat,mtn,airtel)
70 | write.csv(final_file,paste0("CMT DATA_"),Sys.Date(),".csv",row.names = F)


--------------------------------------------------------------------------------
/Football Analysis/scripts/FootballAnalysis - 2017_functions.R:
--------------------------------------------------------------------------------
  1 | #Apply moving average 
  2 | 
  3 | roll_apply = function(pts, no_of_lags){
  4 |   if(length(pts)<no_of_lags){
  5 |     return(cummean(pts))
  6 |   }
  7 |   else{
  8 |     return(c(pts[1],rollapply(pts,no_of_lags,mean)))
  9 |   }
 10 | }
 11 | summary_gathering_pipeline = function(number){
 12 |   remDr$navigate(paste0("https://www.whoscored.com/Regions/252/Tournaments/2/Seasons/",number,"/England-Premier-League"))
 13 |   #Go to Team Statistics
 14 |   clicktype = remDr$findElement(using = "css selector", '#sub-navigation > ul > li:nth-child(3) > a')
 15 |   remDr$mouseMoveToLocation(webElement = clicktype)
 16 |   clicktype$click()
 17 |   doc = remDr$getPageSource()[[1]]
 18 |   current_doc = remDr$getPageSource()[[1]] %>%
 19 |     read_html(doc) %>% 
 20 |     html_table(fill=T)
 21 |   current_doc= current_doc[c(1,3,6)]
 22 |   team_stat= current_doc %>%
 23 |     Reduce(function(x,y) merge(x,y,by="Team"),.) %>%
 24 |     select(-R.x,-R.y,-R)
 25 |   return(team_stat)
 26 |   
 27 | }
 28 | percent_to_numberic = function(percent){
 29 |   require(stringr)
 30 |   percent = str_replace(percent,"%","") %>%
 31 |     as.numeric
 32 |   return(percent/100)
 33 |   
 34 | }
 35 | defensive_gathering_pipeline = function(number){
 36 |   remDr$navigate(paste0("https://www.whoscored.com/Regions/252/Tournaments/2/Seasons/",number,"/England-Premier-League"))
 37 |   #Go to Team Statistics
 38 |   clicktype = remDr$findElement(using = "css selector", '#sub-navigation > ul > li:nth-child(3) > a')
 39 |   remDr$mouseMoveToLocation(webElement = clicktype)
 40 |   clicktype$click()
 41 |   clicktype = remDr$findElement(using = "css selector", '#stage-team-stats-options > li:nth-child(2) > a')
 42 |   remDr$mouseMoveToLocation(webElement = clicktype)
 43 |   clicktype$click()
 44 |   Sys.sleep(2)
 45 |   doc = remDr$findElement(using = "css selector", '#statistics-team-table-defensive')
 46 |   doc =doc$getElementAttribute("innerHTML")[[1]] %>% 
 47 |     read_html %>% 
 48 |     html_table()
 49 |   defense_stat= doc[[1]] %>%
 50 |     select(-R)
 51 |   return(defense_stat)
 52 |   
 53 | }
 54 | 
 55 | gathering_pipeline = function(type,number){
 56 |   remDr$navigate(paste0("https://www.whoscored.com/Regions/252/Tournaments/2/Seasons/",number,"/England-Premier-League"))
 57 |   #Go to Team Statistics
 58 |   clicktype = remDr$findElement(using = "css selector", '#sub-navigation > ul > li:nth-child(3) > a')
 59 |   remDr$mouseMoveToLocation(webElement = clicktype)
 60 |   clicktype$click()
 61 |   if(type=="Offensive"){
 62 |     clicktype = remDr$findElement(using = "css selector", '#stage-team-stats-options > li:nth-child(3) > a')
 63 |     remDr$mouseMoveToLocation(webElement = clicktype)
 64 |     clicktype$click()
 65 |   }
 66 |   else if(type=="Detailed"){
 67 |     clicktype = remDr$findElement(using = "css selector", '#stage-team-stats-options > li:nth-child(4) > a')
 68 |     remDr$mouseMoveToLocation(webElement = clicktype)
 69 |     clicktype$click()
 70 |   }
 71 |   else if(type=="Pass Types"){
 72 |     clicktype = remDr$findElement(using = "css selector", '#stage-situation-stats-options > li:nth-child(2) > a')
 73 |     remDr$mouseMoveToLocation(webElement = clicktype)
 74 |     clicktype$click()
 75 |   }
 76 |   else if(type=="Card Situations"){
 77 |     clicktype = remDr$findElement(using = "css selector", '#stage-situation-stats-options > li:nth-child(3) > a')
 78 |     remDr$mouseMoveToLocation(webElement = clicktype)
 79 |     clicktype$click()
 80 |   }
 81 |   Sys.sleep(2)
 82 |   if(type=="Offensive"){
 83 |     doc = remDr$findElement(using = "css selector", '#statistics-team-table-offensive')
 84 |   }
 85 |   else if(type=="Detailed"){
 86 |     doc = remDr$findElement(using = "css selector", '#top-team-stats-summary-grid')
 87 |   }
 88 |   else if(type=="Pass Types"){
 89 |     doc = remDr$findElement(using = "css selector", '#stage-passes-grid')
 90 |   }
 91 |   else if(type=="Card Situations"){
 92 |     doc = remDr$findElement(using = "css selector", '#stage-cards-grid')
 93 |   }
 94 |   doc =doc$getElementAttribute("outerHTML")[[1]] %>% 
 95 |     read_html %>% 
 96 |     html_table()
 97 |   stat= doc[[1]] %>%
 98 |     select(-R)
 99 |   return(stat)
100 |   
101 | }
102 | 
103 | cummean_fn = function(df, no_of_cols,no_of_rolls){
104 |   if(nrow(df_split[[df]])<no_of_rolls){
105 |     df_split[[df]][,no_of_cols] = apply(df_split[[df]][,no_of_cols],2,cummean)
106 |     
107 |   }
108 |   else{
109 |     roll_sum = rollapply(df_split[[df]][,no_of_cols],no_of_rolls,mean)
110 |     first_rows = apply(df_split[[df]][1:no_of_rolls-1,no_of_cols],2,cummean)
111 |     
112 |     
113 |     df_split[[df]][,no_of_cols] = rbind(first_rows,roll_sum)
114 |     
115 |   }
116 |   return(df_split[[df]])
117 | }
118 | 
119 | #RMSE on Test Set
120 | test.rmse = function(model){
121 |   require(caret)
122 |   test$test_pred = predict(model,test)
123 |   test.df = test %>% as.data.frame
124 |   test.df$rank_pred=rank(-test.df$test_pred,ties.method = "min")
125 |   rmse= RMSE(test.df$rank_pred,as.numeric(as.character(test.df$rank)),na.rm = T)
126 |   return(rmse)
127 | }
128 | 
129 | getID = function(model,newID){
130 |   ID=model@model_id
131 |   return(ID)
132 | }


--------------------------------------------------------------------------------
/Holidays/scripts/Holidays.R:
--------------------------------------------------------------------------------
  1 | library(rvest)
  2 | library(stringr)
  3 | library(dplyr)
  4 | library(doParallel)
  5 | library(foreach)
  6 | 
  7 | 
  8 | url1 <- "https://www.timeanddate.com/holidays/"
  9 | 
 10 | links <- url1 %>% read_html %>% html_nodes(".main-content-div a") %>% html_attr("href")
 11 | links <- links [4:231]
 12 | links <- paste0("https://www.timeanddate.com",links)
 13 | country <- str_replace_all(str_replace(links,"https://www.timeanddate.com/holidays/",""),"-"," ")
 14 | first_word_cap <- function(word){
 15 |   require(stringr)
 16 |   words <- unlist(str_split(word, " "))
 17 |   words <- lapply(words, function(x) str_replace(x,"//w",toupper(substr(x,1,1))))
 18 |   word <- paste(words, collapse = " ")
 19 | }
 20 | country <-unlist(lapply(country,first_word_cap))
 21 | 
 22 | no_of_holiday = function(x){
 23 |   require(rvest)
 24 |   require(dplyr)
 25 |   ifelse(is.list(try(x %>% 
 26 |                        read_html %>% 
 27 |                        html_node(".zebra") %>% 
 28 |                        html_table %>% 
 29 |                        filter (grepl("Holiday",`Holiday type`,ignore.case = T)),silent = T))
 30 |          ,nrow(x %>% read_html %>% html_node(".zebra") %>% html_table %>% filter (grepl("Holiday",`Holiday type`,ignore.case = T)))-1,0)
 31 | }
 32 | cores=detectCores()-1
 33 | cl = makeCluster(cores)
 34 | registerDoParallel(cl)
 35 | no_of_holidays = unlist(foreach(n=links,.packages=c("rvest","dplyr")) %dopar% no_of_holiday(x=n))
 36 | country_holidays <- cbind(data.frame(country),data.frame(no_of_holidays))
 37 | hist(country_holidays$no_of_holidays)
 38 | 
 39 | #Investigated outliers and relaized that some countires have county and regional holidays. 
 40 | #I wanted to ficus on more general holidays so I extracted countries that had a column stating what region the holiday was observed.
 41 |  outliers = function(x) {  
 42 |    if("Where it is observed" %in% names(x %>%
 43 |                                         read_html %>%
 44 |                                         html_node(".zebra") %>%
 45 |                                         html_table) == T){ 
 46 |      x
 47 |    }
 48 |  
 49 | }
 50 | outliers = unlist(foreach(n=links,.packages=c("rvest","dplyr")) %dopar% outliers(x=n))
 51 | 
 52 | #After extracting that, I extracted the new number of holidays.
 53 | updated_holidays= function (x){ 
 54 |   nrow(x %>% 
 55 |          read_html %>%
 56 |          html_node(".zebra") %>%
 57 |          html_table  %>%
 58 |          filter( grepl('holiday',`Holiday type`,ignore.case =T)) %>%
 59 |          filter(Encoding(`Where it is observed`) == "UTF-8"))
 60 |   }
 61 | updated_holidays = unlist(foreach(n=outliers,.packages=c("rvest","dplyr")) %dopar% updated_holidays(x=n))
 62 | 
 63 | #Housekeeping... getting the country names and putting them in a consistent format
 64 | outliers_country <- str_replace(outliers,"https://www.timeanddate.com/holidays/","")
 65 | outliers_country <- str_replace_all(outliers_country,"-"," ")
 66 | outliers_country <-unlist(lapply(outliers_country,first_word_cap))
 67 | 
 68 | #Updated the dataframe with the new holidays
 69 | country_holidays$no_of_holidays[country_holidays$country %in% outliers_country]=unlist(lapply(1:10,
 70 |   function(x) 
 71 |   country_holidays$no_of_holidays[country_holidays$country==outliers_country[x]]=updated_holidays[x])
 72 |   )
 73 | #remove countries with no holidays! 0_0 (Actually not countries. just the UN)
 74 | country_holidays = subset(country_holidays,no_of_holidays>0)
 75 | 
 76 | GDP.Data <- read.csv("~/Data Analysis/Datasets/GDP Data.csv")
 77 | ###Merging the GDP data frame with the holiday dataframe
 78 | #At this point, I should point out that I really had to clean the country name formats outside R
 79 | #It would have been terribly cumbersome doing it any other way
 80 | country_holidays=rename(country_holidays,Country_Name = country)
 81 | country_data = merge(country_holidays,GDP.Data,by = "Country_Name",all.y = T)
 82 | country_data$no_of_holidays[210] = 9
 83 | country_data$no_of_holidays[213] = 13
 84 | country_data$no_of_holidays[214] = 10
 85 | country_data$no_of_holidays[219] = 15
 86 | country_data = country_data[complete.cases(country_data),]
 87 | 
 88 | #Health Spend
 89 | health_spend = read_csv("~/Data Analysis/Datasets/Health expenditure, total (current US).csv")
 90 | 
 91 | #Rehsaping the data to a better format
 92 | health_spend=health_spend %>% gather(Year, GDP,YR2007:YR2015)
 93 | health_spend$Year = str_replace(health_spend$Year,"YR",'')
 94 | avg_health_spend=health_spend %>% 
 95 |   filter(Year != 2015) %>% 
 96 |   group_by(Country_Name) %>% 
 97 |   summarise(Average_Total = mean(GDP))
 98 | country_data = merge(avg_health_spend,country_data,by = "Country_Name",all.y = T) 
 99 | country_data = country_data%>% select(Country_Name,Average,no_of_holidays,Avg_seven)
100 | 
101 | Health_expenditure_PPP <- read.csv("~/Data Analysis/Datasets/Health expenditure per capita, PPP.csv")
102 | Health_expenditure_PPP=Health_expenditure_PPP %>% gather(Year, GDP,YR2007:YR2014)
103 | Health_expenditure_PPP$Year = str_replace(Health_expenditure_PPP$Year,"YR",'')
104 | Health_expenditure_PPP = rename(Health_expenditure_PPP,Country_Name=Country.Name)
105 | avg_health_PPP= Health_expenditure_PPP %>% 
106 |   filter(Year != 2015) %>% 
107 |   group_by(Country_Name) %>% 
108 |   summarise(Average_PPP = mean(GDP))
109 | 
110 | country_data = merge(avg_health_PPP,country_data,by = "Country_Name",all.y = T) 
111 | country_data = country_data%>% select(Country_Name,Average,no_of_holidays,Avg_seven,Average_PPP)
112 | 


--------------------------------------------------------------------------------
/Consitent Billionaire Guide/scripts/billionaire_functions.R:
--------------------------------------------------------------------------------
  1 | #All Functions for Billionaires.R
  2 | get_historical_ranks = function(year){
  3 |   year_update = ifelse(year <10,paste0(0,year),year)
  4 |   url = paste0("http://stats.areppim.com/listes/list_billionairesx",year_update,"xwor.htm")
  5 |   tbl="http://stats.areppim.com/listes/list_billionairesx17xwor.htm" %>% 
  6 |     read_html() %>%
  7 |     html_node("body > table") %>%
  8 |     html_table(trim = T)
  9 |   names(tbl) = tbl[2,]
 10 |   tbl = tbl[3:nrow(tbl),]
 11 |   tbl = tbl[,-6]
 12 |   names(tbl) = c("Rank","Name","Citizenship","Age","Net Worth")
 13 |   tbl=tbl %>% mutate(Year= paste0(20,year_update))
 14 | }
 15 | 
 16 | joining_year = function(row){
 17 |   yr=names(full_list)[(which(is.na(full_list[row,3:13])==F))[1]+2]
 18 |   return(yr)
 19 | }
 20 | 
 21 | leavers = function(row){
 22 |   total =0
 23 |   for(i in 1:(length(row)-1)){
 24 |     if(is.na(as.character(row[i+1])) ==T && is.na(as.character(row[i])) ==F ){
 25 |       total = total+1
 26 |     }
 27 |   }
 28 |   return(total)
 29 | }
 30 | 
 31 | comers = function(row){
 32 |   total =0
 33 |   first_time = min(which(is.na(row)==F))
 34 |   
 35 |   if (first_time>=10){
 36 |     return(0)
 37 |   }
 38 |   else{
 39 |     for(i in first_time:(length(row)-1)){
 40 |       if(is.na(as.character(row[i+1])) ==F && is.na(as.character(row[i])) ==T ){
 41 |         total = total+1
 42 |       }
 43 |     }
 44 |   }
 45 |   return(total)
 46 | }
 47 | 
 48 | max_distance = function(row){
 49 |   lst=which(is.na(row)==F)
 50 |   lt = NULL
 51 |   if( length(lst)==1){
 52 |     return(0)
 53 |   }
 54 |   for (i in 1:(length(lst)-1)){
 55 |     h= lst[i+1]-(lst[i]+1)
 56 |     lt = append(lt,h)
 57 |   }
 58 |   return(max(lt))
 59 | }
 60 | 
 61 | 
 62 | last_year_on_list = function(row){
 63 |   years_onList=which(is.na(row)==F)
 64 |   year_left = names(full_list[,5:15])[max(years_onList)]
 65 |   return(year_left)
 66 |   
 67 | }
 68 | last_year_left = function(row){
 69 |   lst=NULL
 70 |   if (length(which(is.na(row)==T))==0){
 71 |     return ("")
 72 |   }
 73 |   else{
 74 |     for (i in 2:length(row)){
 75 |       if (is.na(row[i])==T&is.na(row[i-1])==F){
 76 |         lst = append(lst,i)
 77 |       }
 78 |     }
 79 |     year_left = names(full_list[,5:15])[max(lst)]
 80 |     return(year_left)
 81 |   }
 82 | }
 83 | 
 84 | #Make the names into URLs
 85 | names_to_URLs = function(name){
 86 |   name = as.character(name)
 87 |   name = str_trim(name)
 88 |   name=str_replace_all(name,"[^0-9A-Za-z\\- ]",'')
 89 |   name=str_replace_all(name,"\\s+",' ')
 90 |   name = str_replace_all(name," ","-")
 91 |   name=tolower(name)
 92 |   url = paste0("https://www.forbes.com/profile/",name,"/")
 93 |   return(url)
 94 | }
 95 | 
 96 | #Making a function to display name backwards e.g Joy Jones as Jones Joy 
 97 | #This will come in handy when getting the billionaire's info
 98 | backward_names = function(name){
 99 |   name = unlist(str_split(name," "))
100 |   name = rev(name)
101 |   name = paste(name,collapse = ' ')
102 |   name = str_trim(name)
103 |   name=names_to_URLs(name)
104 |   return(name)
105 | }
106 | 
107 | 
108 | get_billionaire_info = function(names_of_billionaires){
109 |   url=names_to_URLs(names_of_billionaires)
110 |   data=try(url %>% 
111 |              read_html %>% 
112 |              html_nodes(".stats li") %>% 
113 |              html_text(trim=T)
114 |            ,silent = T)
115 |   if(class(data) =="try-error"){
116 |     url = backward_names(names_of_billionaires)
117 |     data=try(url %>% 
118 |                read_html %>% 
119 |                html_nodes(".stats li") %>% 
120 |                html_text(trim=T)
121 |              ,silent = T)
122 |   }
123 |   sector = ifelse(length(data[grepl("Source of Wealth",data,T)])>0,data[grepl("Source of Wealth",data,T)],NA)
124 |   education = ifelse(length(data[grepl("Education",data,T)])>0,data[grepl("Education",data,T)],NA)
125 |   sector = str_replace_all(sector,".*\t|.*\n","")
126 |   education = str_replace_all(education,".*\t|.*\n","")
127 |   return(c(sector,education))
128 | }
129 | 
130 | education_columns_creator=function(df){
131 |   df$Self_Made = ifelse(grepl("self made",df$detail1,ignore.case = T),"Y","N")
132 |   df$dropped_out = ifelse(grepl("drop out",df$detail2,ignore.case = T),"Y",ifelse(is.na(df$detail2),NA,"N"))
133 |   df$bachelors_degree = ifelse(grepl("bachelor",df$detail2,ignore.case = T),"Y",ifelse(is.na(df$detail2),NA,"N"))
134 |   df$masters_degree = ifelse(grepl("master of arts|master of science",df$detail2,ignore.case = T),"Y",ifelse(is.na(df$detail2),NA,"N"))
135 |   df$MBA = ifelse(grepl("Master of Business Administration",df$detail2,ignore.case = T),"Y",ifelse(is.na(df$detail2),NA,"N"))
136 |   df$phd_or_professional_degree = ifelse(grepl("doctor|llb",df$detail2,ignore.case = T),"Y",ifelse(is.na(df$detail2),NA,"N"))
137 |   return(df)
138 | }
139 | 
140 | #Extract Model ID from model
141 | getID = function(model){
142 |   ID=model@model_id
143 |   return(ID)
144 | }
145 | 
146 | #Function to check validation set accuracy
147 | test_accuracy = function(model){
148 |   table_accuracy=h2o.hit_ratio_table(model,valid = T)[1,2]
149 |   return(table_accuracy)
150 | }
151 | 
152 | 
153 | model_type.H2OMultinomialModel = function(x, ...) {
154 |   return("classification")
155 |   
156 | }
157 | 
158 | predict_model.H2OMultinomialModel = function(x, newdata, type, ...) {
159 |   # Function performs prediction and returns dataframe with Response
160 |   #
161 |   # x is h2o model
162 |   # newdata is data frame
163 |   # type is only setup for data frame
164 |   
165 |   pred <- h2o.predict(x, as.h2o(newdata))
166 |   
167 |   # return classification probabilities only
168 |   return(as.data.frame(pred[,-1]))
169 |   
170 | }
171 | 


--------------------------------------------------------------------------------
/Consitent Billionaire Guide/scripts/Billionaires.R:
--------------------------------------------------------------------------------
  1 | library(RSelenium)
  2 | library(rvest)
  3 | library(stringr)
  4 | library(dplyr)
  5 | library(lubridate)
  6 | library(readr)
  7 | library(purrr)
  8 | library(lime)
  9 | 
 10 | #List of billionaires from 2007-2017
 11 | 
 12 | years = 7:17
 13 | 
 14 | 
 15 | full_list = lapply(years,get_historical_ranks)
 16 | full_list = do.call("rbind",full_list)
 17 | 
 18 | #At this point had to some excel work to harmonize the names e.g. some year bill gates was mentioned as William Gates III
 19 | #This is where I spent most of my time
 20 | #Mark X for each billionaire in each year (this will come in handy later)
 21 | full_list = full_list %>% mutate(markings= "X")
 22 | 
 23 | #Three billionaires have exactly the same names - Wang Wei, Robert Miller and Jim Davis
 24 | full_list[9865,2] = "Jim Davis, new balance"
 25 | full_list[12711,2] ="Jim Davis, recruitment"
 26 | full_list[11497,2] = "Jim Davis, new balance"
 27 | full_list[11030,2] ="Jim Davis, recruitment"
 28 | full_list[9783,2] = "Wang Wei, delivery"
 29 | full_list[10981,2] = "Wang Wei, computer hardware"
 30 | 
 31 | #Use spread to make the year columns - the markings are useful here
 32 | full_list=full_list %>% 
 33 |   select(Name_updated,Citizenship,Year,markings) %>%
 34 |   spread(Year,markings)
 35 | 
 36 | #Create year billionaire joined
 37 | 
 38 | full_list$joining_year = unlist(lapply(1:nrow(full_list),joining_year))
 39 | 
 40 | #No of time billionaire has been on the list
 41 | full_list=full_list %>%
 42 |   mutate(no_of_times_on_list =rowSums(is.na(full_list[,3:13])==F))
 43 | 
 44 | #No of times billionaires left
 45 | 
 46 | 
 47 | full_list$no_of_times_left =apply(full_list[,3:13],1,leavers)
 48 | 
 49 | #No of times billionaires have comeback to the list
 50 | 
 51 | 
 52 | 
 53 | full_list$no_of_comebacks=apply(full_list[,3:13],1,comers)
 54 | 
 55 | #Longest time away from the list
 56 | 
 57 | full_list$longest_time_away = apply(full_list[,3:13],1,max_distance)
 58 | 
 59 | #Last year on list and last year off it
 60 | 
 61 | full_list$last_off_list = apply(full_list[,5:15],1,last_year_left)
 62 | 
 63 | 
 64 | #Make the names into URLs
 65 | 
 66 | 
 67 | #Making a function to display name backwards e.g Joy Jones as Jones Joy 
 68 | #This will come in handy when getting the billionaire's info
 69 | 
 70 | #Get the billionaires info
 71 | 
 72 | 
 73 | #Getting the columns for the details of billionaires set
 74 | full_list$detail1 =""
 75 | full_list$detail2 =""
 76 | 
 77 | #I opted for a for loop as opposed to the sexy apply function as this gives me the ability
 78 | #to store intermediate results
 79 | for (i in 1:nrow(full_list)){
 80 |   dat=get_billionaire_info(full_list$Name[i])
 81 |   #dat = list(dat)
 82 |   full_list$detail1[i]=dat[1]
 83 |   full_list$detail2[i]=dat[2]
 84 |   print(paste0(round((i/nrow(full_list))*100,2),"% done at ",i))
 85 | }
 86 | 
 87 | #Create columns for educational information
 88 | 
 89 | full_list = education_columns_creator(full_list)
 90 | full_list$detail1 = unlist(full_list$detail1)
 91 | full_list$detail2 = unlist(full_list$detail2)
 92 | 
 93 | #Data Munging complete! :)
 94 | 
 95 | #Clusters were made in Tableau using Sets and imported back into the csv file
 96 | #================================Machine Learning======================================#
 97 | library(h2o)
 98 | library(plyr)
 99 | library(dplyr)
100 | library(purrr)
101 | library(scales)
102 | library(lime)
103 | #Initialize H2o cluster
104 | h2o.init()
105 | 
106 | dat1 = full_list %>%
107 |   filter(Cluster !="The Newbie")
108 | 
109 | data = dat1 %>% 
110 |         select(-Detail1,-Detail2) %>%
111 |   as.h2o()
112 |               
113 | splits = h2o.splitFrame(data,ratios = c(0.6,0.2),destination_frames = c("train", "valid", "test"), seed = 1234)
114 | train = h2o.getFrame("train")
115 | val = h2o.getFrame("valid")
116 | test = h2o.getFrame("test")
117 | 
118 | 
119 | 
120 | #Column index numbers
121 | features=c(5,7,9,18,25,26,28)
122 |   #c(4:11,18:19,25:26,28)
123 |   
124 |   #
125 | #2,4:11,18:19,25:26,29
126 | response=27
127 | 
128 | #Models we would like to train and test the accuracy
129 | models = c("h2o.randomForest", "h2o.deeplearning" ,"h2o.gbm")
130 | names_of_models = c("Random Forest", "Deep Learning" ,"GBM")
131 | 
132 | #The map function will invoke the functions in the "models" vector on the parameters below
133 | list_of_models =invoke_map(models, x=features,y=response,training_frame =train,validation_frame = val) 
134 | 
135 | 
136 | #Store IDs for retreival later
137 | IDs = list_of_models %>%
138 |   map_chr(getID)
139 | 
140 | 
141 | #check accuracy on validation set
142 | list_of_models %>%
143 |   map_dbl(test_accuracy) %>%
144 |   set_names(.,names_of_models)
145 | #GBM performed best so let's compute that variable importance
146 | model_gbm = h2o.getModel(IDs[3])
147 | h2o.varimp_plot(h2o.getModel(IDs[3]))
148 | #Store variable importance in csv for visualization
149 | var_imp_rf=h2o.varimp(model_gbm)
150 | write.csv(var_imp_rf,"Variable Importance GBM.csv",row.names = F)
151 | model_gbm=h2o.loadModel("C:/Users/rose.anwuri/Documents/TheArtandScienceofData/Consitent Billionaire Guide/app/GBM_Model")
152 | features_lime=model_gbm@parameters$x
153 | train_lime = train[,c(features_lime,"Cluster")] %>% h2o.na_omit()
154 | test_lime = val[,c(features_lime,"Cluster")]%>% h2o.na_omit()
155 | 
156 | predict_model(x = model_gbm, newdata =test_lime[,-9], type = 'raw') %>%
157 |   tibble::as_tibble()
158 | train_lime_df=as.data.frame(train_lime[,-9])
159 | train_lime_df=train_lime_df[complete.cases(train_lime_df),]
160 | 
161 | explainer <- lime::lime(
162 |   train_lime_df, 
163 |   model          = model_gbm, 
164 |   bin_continuous = FALSE)
165 | test_lime_df = as.data.frame(test_lime) %>% filter(Cluster=="The Hustler") %>%as_tibble()
166 | test_lime_df=test_lime_df[complete.cases(test_lime_df),]
167 | explanation <- lime::explain(
168 |   as.data.frame(test_lime_df[,-9]), 
169 |   explainer    = explainer, 
170 |   n_labels     = 1, 
171 |   n_features   = 4,
172 |   kernel_width = 0.5)
173 | plot_features(explanation)
174 | 


--------------------------------------------------------------------------------
/Football Analysis/scripts/FootballAnalysis - 2017.R:
--------------------------------------------------------------------------------
  1 | library(plyr)
  2 | library(dplyr)
  3 | library(stringr)
  4 | library(RSelenium)
  5 | library(rvest)
  6 | library(XML)
  7 | library(zoo)
  8 | library(h2o)
  9 | library(purrr)
 10 | library(caret)
 11 | 
 12 | options(stringsAsFactors = F)
 13 | #Get standard league table from skysports.com
 14 | Dat = NULL
 15 | for (i in 2009:2016){
 16 | teamStats = readHTMLTable(paste0('http://www.skysports.com/premier-league-table/',i))
 17 | names(teamStats) = NULL
 18 | teamStats = data.frame(teamStats)
 19 | teamStats$Year = i
 20 | Dat = rbind(Dat,teamStats)
 21 | print(i)
 22 | flush.console()
 23 | }
 24 | 
 25 | ####Housekeeping!#####
 26 | Dat= subset(Dat, Year !=2006)
 27 | Dat$X. = NULL
 28 | Dat$Last.6 = NULL
 29 | Dat[,2:9] = apply(Dat[,2:9],2,function(x) as.integer(as.character(x)))
 30 | Dat$Team = factor(str_replace_all(as.character(Dat$Team),pattern = "[*]",''))
 31 | Dat$Team = str_trim(Dat$Team)
 32 | 
 33 | Dat1=Dat
 34 | Dat$rank = rep(1:20,length.out=320)
 35 | 
 36 | epl_db = ddply(Dat, ~Team,transform, W=roll_apply(W,2), 
 37 |                L=roll_apply(L,2),
 38 |                Cummulative.Pts=roll_apply(Pts,2), 
 39 |                Cummulative.rank=roll_apply(rank,2),
 40 |                GD=roll_apply(GD,2),
 41 |                D=roll_apply(D,2),
 42 |                F=roll_apply(F,2),
 43 |                A=roll_apply(A,2),
 44 |                Pts = lead(Pts),
 45 |                rank=lead(rank)) 
 46 | 
 47 | ####Rendering whoscored.com website using a remote selenium driver and getting a data####
 48 | 
 49 | rD=rsDriver()
 50 | remDr = rD[["client"]]
 51 | 
 52 | #Whoscored's weird way of encoding their year variables. This 2009 - 2016
 53 | year_coding=c(1849,2458,2935,3389,3853,4311,5826,6335)
 54 | 
 55 | remDr$setImplicitWaitTimeout(30000)
 56 | 
 57 | team_statistics = ldply(year_coding,summary_gathering_pipeline)
 58 | 
 59 | team_statistics[,14:16] = apply(team_statistics[,14:16],2,percent_to_numberic)
 60 | 
 61 | defense_statistics = ldply(year_coding,defensive_gathering_pipeline)
 62 | 
 63 | offense_stat = ldply(year_coding,function(x) gathering_pipeline(type = "Offensive",x))
 64 | detailed_stat = ldply(year_coding,function(x) gathering_pipeline(type = "Detailed",x))
 65 | pass_stat     = ldply(year_coding,function(x) gathering_pipeline(type = "Pass Types",x))
 66 | card_stat     = ldply(year_coding,function(x) gathering_pipeline(type = "Card Situations",x))
 67 | 
 68 | 
 69 | remDr$close()
 70 | 
 71 | #Adding a Year column to the final table
 72 | team_statistics$Year = rep(2009:2016, each=20, length.out = 160)
 73 | defense_statistics$Year = rep(2009:2016, each=20, length.out = 160)
 74 | offense_stat$Year = rep(2009:2016, each=20, length.out = 160)
 75 | detailed_stat$Year = rep(2009:2016, each=20, length.out = 160)
 76 | pass_stat$Year = rep(2009:2016, each=20, length.out = 160)
 77 | card_stat$Year = rep(2009:2016, each=20, length.out = 160)
 78 | offense_stat$`Shots pg` = NULL
 79 | names(defense_statistics)[2] = "Shots.conceded.pg"
 80 | in_c = left_join(team_statistics,defense_statistics,by=c("Team","Year"))
 81 | in_c = left_join(in_c,offense_stat,by=c("Team","Year"))
 82 | in_c = left_join(in_c,pass_stat,by=c("Team","Year"))
 83 | epl_database = left_join(in_c,card_stat,by=c("Team","Year"))
 84 | epl_database=epl_database %>% 
 85 |   select(-matches("Rating"),-matches("\\X."),-X)
 86 | rm(in_c)
 87 | #Create cummulative means for all numerical variables
 88 | df_split=split(epl_database,epl_database$Team)
 89 | epl_data =ldply(names(df_split),function(x) cummean_fn(x,c(2:15,17:32),2))
 90 | epl_data$Team = str_trim(epl_data$Team)
 91 | 
 92 | ####Housekeeping! Again! I am about to join two tables based on their team names. Hence, I have to ensure that they are of the same format####
 93 | epl_db$Team = as.character(epl_db$Team)
 94 | epl_db$Team[epl_db$Team == "Birmingham City"] = "Birmingham"
 95 | epl_db$Team[epl_db$Team == "Blackburn Rovers"] = "Blackburn"
 96 | epl_db$Team[epl_db$Team == "Bolton Wanderers"] = "Bolton"
 97 | epl_db$Team[epl_db$Team == "Cardiff City"] = "Cardiff"
 98 | epl_db$Team[epl_db$Team == "Norwich City"] = "Norwich"
 99 | epl_db$Team[epl_db$Team == "Swansea City"] = "Swansea"
100 | epl_db$Team[epl_db$Team == "Stoke City"] = "Stoke"
101 | epl_db$Team[epl_db$Team == "Tottenham Hotspur"] = "Tottenham"
102 | epl_db$Team[epl_db$Team == "West Ham United"] = "West Ham"
103 | epl_db$Team[epl_db$Team == "Wigan Athletic"] = "Wigan"
104 | epl_db$Team[epl_db$Team == "Hull City"] = "Hull"
105 | epl_db$Team[epl_db$Team == "Leicester City"] = "Leicester"
106 | 
107 | #Merge final tables from skysports and whoscored together and remove duplicate columns
108 | epl_database = right_join(epl_db,epl_data,by = c("Year","Team"))
109 | write.csv(epl_database,"Football Analysis/data/epl_data.csv",row.names = F)
110 | 
111 | #####Preprocessing#####
112 | h2o.init()
113 | model_data = epl_database %>%
114 |   select(-Pl)
115 | 
116 | train = model_data %>% filter(Year<2016)   %>%
117 |   as.h2o %>%
118 |   h2o.na_omit()
119 | test = model_data %>% filter(Year == 2016) %>%
120 |   as.h2o 
121 | 
122 | train$Team = as.factor(train$Team)
123 | test$Team = as.factor(test$Team)
124 | cor=h2o.cor(model_data[,2:42] %>% as.h2o %>% h2o.na_omit)
125 | row.names(cor) = colnames(cor)
126 | cor$Pts = abs(cor$Pts)
127 | features = c(2,4,7,11:13)
128 | #7,13,6,11
129 | 
130 | response = 8
131 | 
132 | #####Model Selection########
133 | models = c("h2o.glm","h2o.gbm","h2o.deeplearning","h2o.randomForest")
134 | 
135 | list_of_models = invoke_map(models,x=features,y=response,training_frame =train, validation_frame = test)
136 | 
137 | #Extract Model ID from model
138 | 
139 | #Store IDs for retreival later
140 | IDs = list_of_models %>%
141 |   map_chr(getID)
142 | 
143 | #######Model Validation########
144 | list_of_models %>%
145 |   map_dbl(test.r2) %>%
146 |   set_names(.,models)
147 | 
148 |   list_of_models %>%
149 |   map_dbl(test.rmse) %>%
150 |   set_names(.,models)
151 | 
152 | 
153 | 
154 | 
155 | #######Model Tuning#######
156 | 
157 | ##GLM###
158 | hyper_params =list(alpha=seq(0,1,0.1))
159 | 
160 | 
161 | glm_Grid = h2o.grid(algorithm = "glm",
162 |                     grid_id = "glm_tuning1",
163 |                     hyper_params = hyper_params,
164 |                     x=features,
165 |                     y=response,training_frame = train,
166 |                     validation_frame = test,
167 |                     seed=5.388119e+18,lambda_search=T)
168 | 
169 | 
170 | grid_sorted=h2o.getGrid("glm_tuning1",sort_by = "r2",decreasing = T)
171 | rmse=ldply(grid_sorted@model_ids %>% unlist, function(x) h2o.getModel(x) %>% test.rmse)
172 | cbind(alpha=grid_sorted@summary_table$alpha,rmse=rmse) %>% arrange(V1)
173 | 
174 | #GBM
175 | hyper_params =list(learn_rate=seq(0.2,0.3,0.01),ntrees=seq(5,100,5),min_rows=1:10)
176 | gbm_Grid = h2o.grid(algorithm = "gbm",
177 |                     
178 |                     hyper_params = hyper_params,
179 |                     x=features,seed=8.973598e+18,
180 |                     y=response,training_frame = train,
181 |                     validation_frame = test
182 |                     
183 |                   )
184 | 
185 | grid_sorted=h2o.getGrid(gbm_Grid@grid_id,sort_by = "r2",decreasing = T)
186 | rmse=ldply(grid_sorted@model_ids %>% unlist, function(x) h2o.getModel(x) %>% test.rmse)
187 | cbind(val=grid_sorted@summary_table[,1],rmse=rmse) %>% arrange(V1) %>%top_n(-5)
188 | model_gbm=h2o.getModel("gbm_grid7_model_0")
189 | hyper_params =list(col_sample_rate_per_tree=seq(0.1,1,0.1),min_rows=1:10,ntrees=seq(5,100,5),col_sample_rate_change_per_level=seq(1,2,0.1))
190 | rf_Grid = h2o.grid(algorithm = "randomForest",
191 |                    grid_id = "rfD_tuning8",
192 |                    hyper_params = hyper_params,
193 |                    x=features,
194 |                    y=response,training_frame = train,
195 |                    validation_frame = test,
196 |                    seed=-4.02063e+18
197 |                    
198 |                    )
199 | 
200 | 
201 | rmse=ldply(grid_sorted@model_ids %>% unlist, function(x) h2o.getModel(x) %>% test.rmse)
202 | cbind(val=grid_sorted@summary_table[,1],rmse=rmse) %>% arrange(V1) %>%top_n(-5)
203 | 
204 | 
205 | 
206 | #####Final Model######
207 | #What if I told you all that tuning was of no use?
208 | model_gbm=h2o.gbm(x=features,
209 |                   y=response,training_frame = train,
210 |                   validation_frame = test
211 |                   )
212 | test$predict = h2o.predict(model_gbm,test)
213 | test.df = as.data.frame(test)
214 | test.df$pred_rank = rank(-test.df$predict,ties.method = "first")
215 | 


--------------------------------------------------------------------------------
/Others/CBN.R:
--------------------------------------------------------------------------------
  1 | library(h2o)
  2 | library(shiny)
  3 | library(shinyjs)
  4 | library(shinydashboard)
  5 | library(shinythemes)
  6 | library(plyr)
  7 | library(dplyr)
  8 | library(wesanderson)
  9 | library(scales)
 10 | library(lubridate)
 11 | #library(shinysky)
 12 | library(extrafont)
 13 | library(stringr)
 14 | h2o.init()
 15 | 
 16 | df = read.csv("C:/Users/rose.anwuri/OneDrive/TheArtandScienceofData/billionaire_data.csv")
 17 | dat = df %>% 
 18 |   filter(Cluster !="The Newbie") %>%
 19 |   filter(is.na(Age)==F)
 20 | 
 21 | #model = h2o.loadModel("C:\\Users\\rose.anwuri\\Documents\\TheArtandScienceofData\\DRF_model_R_1501438266398_1")
 22 | Countries=levels(dat$Country)
 23 | Sectors = levels(dat$Sector)
 24 | Relations = levels(dat$relation)
 25 | ui = shinyUI(
 26 |   navbarPage("Billion Dollar Questions",inverse = F,collapsible = T,fluid = T,
 27 |              theme = shinytheme("flatly"),
 28 |              tabPanel("What Type of Billionaire Are You?", icon = icon("money"),
 29 |                       sidebarLayout(position = 'left',
 30 |                                     sidebarPanel(id = "sidebar",
 31 |                                                  selectizeInput("Country",label = h4("What country are you from?",style ="font-size: 12pt;"),choices=Countries),
 32 |                                                  numericInput('Age',h4("How Old Are You?",style ="font-size: 12pt;"),value = 20,min = 12,max = 100),
 33 |                                                  selectizeInput("selfMade",h4("Do you have your own company (or plan to have one)?",style ="font-size: 12pt;"),choices = c("Yes","No")),
 34 |                                                  conditionalPanel("input.selfMade=='Yes'",selectizeInput("relation",h4("Choose one below that best describes your role in the business:",style ="font-size: 12pt;"),choices = Relations)),
 35 |                                                  conditionalPanel("input.selfMade=='No'",selectizeInput("buffer",h4("That's okay. Think of a business where you are likely to make money from. What best describes your role in that company?",style ="font-size: 12pt;"),choices = Relations)),
 36 |                                                  numericInput("founding_year",h4("When was/will this business (be) established?",style ="font-size: 12pt;"),value = 1999,min=1600,max=year(Sys.Date())+10),
 37 |                                                  selectizeInput("Sector",h4("What Sector is this business?",style ="font-size: 12pt;"),choices = Sectors),
 38 |                                                  selectizeInput("Bsc",h4("Do you have a Bachelor's Degree?",style ="font-size: 12pt;"),choices = c("Yes","No")),
 39 |                                                  tags$head(
 40 |                                                    tags$style(HTML('#run{border-radius: 0px;}'))
 41 |                                                  ),
 42 |                                                  div(actionButton("run",div("PREDICT",icon("flask"),style="text-align:center;font-size:10pt;"),styleclass = "success",size = "mini",css.class = "z-depth-5"),style="text-align:left;")
 43 |                                                  
 44 |                                                  
 45 |                                     ),
 46 |                                     mainPanel(position="left",
 47 |                                               br(),
 48 |                                              
 49 |                                               busyIndicator(text = h4("Running Model...",style="font-size: 40px; font-family:Papyrus;"),img = "shinysky/busyIndicator/Loading3.gif"),
 50 |                                               
 51 |                                               textOutput("prefix_final"),
 52 |                                               div(imageOutput("image_final"), style="text-align: center;"),
 53 |                                               textOutput("prediction_final"),
 54 |                                               br(),
 55 |                                               tags$head(tags$style("#prefix_final{font-size: 40px; text-align:center;}")),
 56 |                                               tags$head(tags$style("#prediction_final{font-size: 40px; text-align:center;}")),
 57 |                                               conditionalPanel("typeof output.image_final !== 'undefined'",div(uiOutput("urlInput"),style="text-align:center;"))
 58 |                                               
 59 |                                     ))),
 60 |              tabPanel("About", icon = icon("info"),
 61 |              
 62 |              box(width = 12,title= h2(strong("The Art and Science of Data",img(src="Typing.gif",height=60)),style= "font-size: 36pt;text-align:center; font-family:Papyrus; color:#009688;"),
 63 |              br(),
 64 |              div("This application was humbly created by me (Rosebud Anwuri). It's purely for fun and in no way guarantees you'd become a billionaire (I'm sorry). If you like this and would like to see more of the stuff I work on, you can visit my blog here:",a("The Art and Science of Data.",style= "font-size: 16pt; display: inline;color:#009688;",href="http://theartandscienceofdata.wordpress.com",target="_blank"),"My blog is mostly focused on the application of Data Science to everyday life and culture to make it more accessible, less technical and much more fun to a wider audience. Feel free to add a comment or drop me a message for questions or suggestions. Thank you!",style= "font-size: 14pt; font-family:Helvetica;")
 65 |              ))
 66 |              ))
 67 | 
 68 | 
 69 | 
 70 | 
 71 | 
 72 | 
 73 | server = shinyServer(function(input, output, session){
 74 |   category_predictors = function(){
 75 |     self_made=ifelse(input$selfMade=="Yes","Y","N")
 76 |     year_born = year(Sys.Date())-input$Age
 77 |     age_at_start = ifelse(input$founding_year<year_born,0,input$founding_year-year(Sys.Date())+input$Age)
 78 |     bachelors_degree=ifelse(input$Bsc=="Yes","Y","N")
 79 |     Worth = round(runif(1,1,10),1)
 80 |     relationship = ifelse(input$selfMade=="Yes",input$relation,input$buffer)
 81 |     data_list = list(Worth,input$Country,self_made,bachelors_degree,input$Sector,
 82 |                      input$founding_year,relationship,age_at_start,input$Age)
 83 |     names(data_list)=model@parameters$x
 84 |     data_list =as.h2o(data_list)
 85 |     prediction = predict(model,data_list)$predict
 86 |     prediction = as.vector(prediction)
 87 |     prediction=sub("The","A",paste(prediction,"Billionaire"))
 88 |     
 89 |     return(prediction)
 90 |   }
 91 |   prediction_reactive= eventReactive(input$run, {
 92 |     paste(category_predictors())
 93 |   })
 94 |   output$prediction_final = renderText({prediction_reactive()})
 95 |   output$urlInput = renderUI({
 96 |     a(p(icon("twitter"),"Tweet It"),type="button",class = "waves-effect waves-light btn shiny-material-button z-depth-5" , 
 97 |       style = "fontweight:600; background-color: #4099FF; color: #FFFFFF;", target = "_blank",href=paste0(sprintf('https://twitter.com/intent/tweet?text=I+am+going+to+be+%s', 
 98 |                                                                                                                   str_replace_all(prediction_reactive()," ","+")),"!+Check+yours+out+at:&url=https%3A%2F%2Ftheartandscienceofdata.shinyapps.io%2FBillionDollarQuestions") )
 99 |   })
100 |   
101 |   
102 |   result_prefix = function(prediction){
103 |     prefix = ifelse(prediction=="A Consistent Billionaire","Congratulations! You Are:",
104 |                     ifelse(prediction=="A Ghost Billionaire", "Oops! Looks like You're going be:",
105 |                            "Hmmmm... Looks like You're going to be:"))
106 |     return(prefix)
107 |   }
108 |   prefix_reactive= eventReactive(input$run, {
109 |     result_prefix(prediction_reactive())
110 |   })
111 |   output$prefix_final = renderText({prefix_reactive()})
112 |   
113 |   image_per_prediction = function(prediction){
114 |     image = ifelse(prediction=="A Consistent Billionaire","www/The Consistent.PNG",
115 |                     ifelse(prediction=="A Ghost Billionaire", "www/The Ghost.PNG",
116 |                            "www/The Hustler.PNG"))
117 |     return(image)
118 |   }
119 |   
120 |   image_reactive= eventReactive(input$run, {
121 |     image_per_prediction(prediction_reactive())
122 |   })
123 |   output$image_final = renderImage({list(src=image_reactive())},deleteFile = F)
124 |   
125 |   
126 |   output$niceGIF = renderImage({list(src="www/Typing.gif")},deleteFile = F)
127 |   
128 | })
129 | 
130 | 
131 | shinyApp(ui = ui, server = server)


--------------------------------------------------------------------------------
/Football Analysis/scripts/FootballAnalysis - 2016.R:
--------------------------------------------------------------------------------
  1 | library(DataCombine)
  2 | library(rpart)
  3 | library(stringr)
  4 | library(RSelenium)
  5 | library(rvest)
  6 | library(XML)
  7 | library(digest)
  8 | 
  9 | options(stringsAsFactors = F)
 10 | #Get standard league table from skysports.com
 11 | Dat = NULL
 12 | for (i in 2009:2015){
 13 | teamStats = readHTMLTable(paste0('http://www.skysports.com/premier-league-table/',i))
 14 | names(teamStats) = NULL
 15 | teamStats = data.frame(teamStats)
 16 | teamStats$Year = i
 17 | Dat = rbind(Dat,teamStats)
 18 | print(i)
 19 | flush.console()
 20 | }
 21 | 
 22 | ####Housekeeping!#####
 23 | Dat$X. = NULL
 24 | Dat$Last.6 = NULL
 25 | Dat[,2:9] = apply(Dat[,2:9],2,function(x) as.integer(as.character(x)))
 26 | Dat$Team = factor(str_replace_all(as.character(Dat$Team),pattern = "[*]",''))
 27 | 
 28 | #Create a lead variable for the Premier League points 
 29 | datasetLagged = slide(Dat,Var = "Pts",TimeVar = "Year",GroupVar = "Team",slideBy = +1)
 30 | names(datasetLagged)[ncol(datasetLagged)] = 'leadPoints'
 31 | 
 32 | 
 33 | ####Rendering whoscored.com website using a remote selenium driver and getting a data####
 34 | 
 35 | startServer()
 36 | remDr = remoteDriver()
 37 | remDr$open(silent = F)
 38 | remDr$navigate("https://www.whoscored.com/Regions/252/Tournaments/2/Seasons/5826/Stages/12496/TeamStatistics/England-Premier-League-2015-2016")
 39 | remDr$setImplicitWaitTimeout(30000)
 40 | inTable = NULL
 41 | for (i in 2:8){
 42 | clicktype = remDr$findElement(using = "css selector", '#seasons')
 43 | remDr$mouseMoveToLocation(webElement = clicktype)
 44 | clicktype$click()
 45 | clicktype = remDr$findElement(using = "css selector", paste0('#seasons > option:nth-child(',i,')'))
 46 | remDr$mouseMoveToLocation(webElement = clicktype)
 47 | clicktype$click()
 48 | clicktype = remDr$findElement(using = "css selector", '#sub-navigation > ul:nth-child(1) > li:nth-child(3) > a:nth-child(1)')
 49 | remDr$mouseMoveToLocation(webElement = clicktype)
 50 | clicktype$click()
 51 | doc = remDr$getPageSource()[[1]]
 52 | current_doc = read_html(doc)
 53 | firstTable = htmlParse(remDr$getPageSource()[[1]])
 54 | 
 55 | #Housekeeping!
 56 | v=readHTMLTable(firstTable, as.data.frame = T)
 57 | v= v[c(1,3,6)]
 58 | names(v) = NULL
 59 | v=lapply(v, function(x){data.frame(x)})
 60 | testData =Reduce(function(x,y) merge(x,y,by="Team", all = T),v)
 61 | testData[,2:15] = apply(testData[,2:15],2,function(x) as.numeric(as.character(x)))
 62 | testData$RedCards = substring(as.character(testData$Discipline),nchar(as.character(testData$Discipline)))
 63 | testData$YellowCards = substring(as.character(testData$Discipline),1,nchar(as.character(testData$Discipline))-1)
 64 | testData$Discipline = NULL
 65 | 
 66 | 
 67 | #Defense Table
 68 | clicktype = remDr$findElement(using = "css selector", '#stage-team-stats-options > li:nth-child(2) > a:nth-child(1)')
 69 | 
 70 | remDr$mouseMoveToLocation(webElement = clicktype)
 71 | clicktype$click()
 72 | webElem = remDr$findElement(using = "css selector", '#statistics-team-table-defensive')
 73 | 
 74 | #Dealing with web can be a real pain especially living with place terrible internet
 75 | #This is area is for error handling. Whenever the table is not populated but instead is of the class try-error (which means it failed), we retry all the steps in getting that table till it works
 76 | #Crude I know, will keep on thinking of ways to refine this.
 77 | webElemtext = try(webElem$getElementAttribute("outerHTML")[[1]])
 78 | defenseTable = try(readHTMLTable(webElemtext, header = T,as.data.frame = T)[[1]])
 79 | while (class(defenseTable) == "try-error"){
 80 |   clicktype = remDr$findElement(using = "css selector", '#stage-team-stats-options > li:nth-child(2) > a:nth-child(1)')
 81 |   remDr$mouseMoveToLocation(webElement = clicktype)
 82 |   clicktype$click()
 83 |   webElem = remDr$findElement(using = "css selector", '#statistics-team-table-defensive')
 84 |   webElemtext = try(webElem$getElementAttribute("outerHTML")[[1]],silent = F)
 85 |   defenseTable = try(readHTMLTable(webElemtext, header = T,as.data.frame = T)[[1]])
 86 | }
 87 | 
 88 | defenseTable[,3:8] = apply(defenseTable[,3:8],2,function(x) as.numeric(x))
 89 | bigTable = merge(testData,defenseTable,by = "Team",all = T)
 90 | 
 91 | 
 92 | #Offensive
 93 | clicktype1 = remDr$findElement(using = "css selector", '#stage-team-stats-options > li:nth-child(3) > a:nth-child(1)')
 94 | remDr$mouseMoveToLocation(webElement = clicktype1)
 95 | clicktype1$click()
 96 | webElem1 = remDr$findElement(using = "css selector", '#statistics-team-table-offensive')
 97 | webElemtext = try(webElem1$getElementAttribute("outerHTML")[[1]],silent = F)
 98 | offenseTable = try(readHTMLTable(webElemtext, header = T,as.data.frame = T)[[1]])
 99 | while(class(offenseTable) == "try-error"){
100 |   clicktype1 = remDr$findElement(using = "css selector", '#stage-team-stats-options > li:nth-child(3) > a:nth-child(1)')
101 |   remDr$mouseMoveToLocation(webElement = clicktype1)
102 |   clicktype1$click()
103 |   webElem1 = remDr$findElement(using = "css selector", '#statistics-team-table-offensive')
104 | webElemtext = try(webElem1$getElementAttribute("outerHTML")[[1]], silent = F)
105 | offenseTable = try(readHTMLTable(webElemtext, header = T,as.data.frame = T)[[1]])
106 | }
107 | 
108 | offenseTable[,3:7] = apply(offenseTable[,3:7],2,function(x) as.numeric(x))
109 | 
110 | bigTable = merge(bigTable,offenseTable,by = "Team",all = T)
111 | 
112 | 
113 | 
114 | #Detailed
115 | clicktype3 = remDr$findElement(using = "css selector", '#stage-team-stats-options > li:nth-child(4) > a:nth-child(1)')
116 | remDr$mouseMoveToLocation(webElement = clicktype3)
117 | clicktype3$click()
118 | webElem2 = remDr$findElement(using = "css selector", '#statistics-team-table-detailed')
119 | webElemtext = try(webElem$getElementAttribute("outerHTML")[[1]])
120 | detailedTable = try(readHTMLTable(webElemtext, header = T,as.data.frame = T)[[1]])
121 | while (class(detailedTable) == "try-error"){
122 | clicktype3 = remDr$findElement(using = "css selector", '#stage-team-stats-options > li:nth-child(4) > a:nth-child(1)')
123 | remDr$mouseMoveToLocation(webElement = clicktype3)
124 | clicktype3$click()
125 | webElem2 = remDr$findElement(using = "css selector", '#statistics-team-table-detailed')
126 | webElemtext = try(webElem$getElementAttribute("outerHTML")[[1]])
127 | detailedTable = try(readHTMLTable(webElemtext, header = T,as.data.frame = T)[[1]])
128 | }
129 | bigTable = merge(bigTable,detailedTable,by = "Team",all = T)
130 | inTable = rbind(inTable,bigTable)
131 | print(i)
132 | flush.console()
133 | }
134 | remDr$close()
135 | browseURL("http://localhost:4444/selenium-server/driver/?cmd=shutDownSeleniumServer")
136 | 
137 | #Adding a Year column to the final table
138 | inTable$Year = rep(2015:2009, each=20, length.out = 140)
139 | 
140 | ####Housekeeping! Again! I am about to join two tables based on their team names. Hence, I have to ensure that they are of the same format####
141 | inTable$Team[inTable$Team == "Birmingham City"] = "Birmingham"
142 | inTable$Team[inTable$Team == "Blackburn Rovers"] = "Blackburn"
143 | inTable$Team[inTable$Team == "Bolton Wanderers"] = "Bolton"
144 | inTable$Team[inTable$Team == "Cardiff City"] = "Cardiff"
145 | inTable$Team[inTable$Team == "Norwich City"] = "Norwich"
146 | inTable$Team[inTable$Team == "Swansea City"] = "Swansea"
147 | inTable$Team[inTable$Team == "Stoke City"] = "Stoke"
148 | inTable$Team[inTable$Team == "Tottenham Hotspur"] = "Tottenham"
149 | inTable$Team[inTable$Team == "West Ham United"] = "West Ham"
150 | inTable$Team[inTable$Team == "Wigan Athletic"] = "Wigan"
151 | 
152 | #Removing trailing and leading spaces in the team names
153 | inTable$Team = gsub("^\\s+|\\s+$", "",inTable$Team)
154 | 
155 | #Merge final tables from skysports and whoscored together and remove duplicate columns
156 | Datasetfinal = merge(inTable,datasetLagged,by = c("Year","Team"),all = T)
157 | Datasetfinal <- Datasetfinal[, !duplicated(colnames(Datasetfinal), fromLast = TRUE)] 
158 | 
159 | #Converting the percentages to numbers
160 | Datasetfinal$Left.Side = as.numeric(str_replace(Datasetfinal$Left.Side,"%",""))/100
161 | Datasetfinal$Middle.of.the.pitch = as.numeric(str_replace(Datasetfinal$Middle.of.the.pitch,"%",""))/100
162 | Datasetfinal$Right.Side = as.numeric(str_replace(Datasetfinal$Right.Side,"%",""))/100
163 | 
164 | #Removing not so useful columns
165 | Datasetfinal$R.x = NULL
166 | Datasetfinal$Rating.x = NULL
167 | Datasetfinal$R.y = NULL
168 | Datasetfinal$Rating.y = NULL
169 | Datasetfinal$Pl = NULL
170 | 
171 | #Convert column types. Again.
172 | Datasetfinal[,3:37] = apply(Datasetfinal[,3:37],2,function(x) as.numeric(x))
173 | 
174 | #Now, we have our data!


--------------------------------------------------------------------------------
/Consitent Billionaire Guide/scripts/Billionaires - Compressed.R:
--------------------------------------------------------------------------------
  1 | library(RSelenium)
  2 | library(rvest)
  3 | library(stringr)
  4 | library(dplyr)
  5 | library(lubridate)
  6 | library(readr)
  7 | library(purrr)
  8 | library(lime)
  9 | 
 10 | #All Functions
 11 | get_historical_ranks = function(year){
 12 |   year_update = ifelse(year <10,paste0(0,year),year)
 13 |   url = paste0("http://stats.areppim.com/listes/list_billionairesx",year_update,"xwor.htm")
 14 |   tbl="http://stats.areppim.com/listes/list_billionairesx17xwor.htm" %>% 
 15 |     read_html() %>%
 16 |     html_node("body > table") %>%
 17 |     html_table(trim = T)
 18 |   names(tbl) = tbl[2,]
 19 |   tbl = tbl[3:nrow(tbl),]
 20 |   tbl = tbl[,-6]
 21 |   names(tbl) = c("Rank","Name","Citizenship","Age","Net Worth")
 22 |   tbl=tbl %>% mutate(Year= paste0(20,year_update))
 23 | }
 24 | 
 25 | joining_year = function(row){
 26 |   yr=names(full_list)[(which(is.na(full_list[row,3:13])==F))[1]+2]
 27 |   return(yr)
 28 | }
 29 | 
 30 | leavers = function(row){
 31 |   total =0
 32 |   for(i in 1:(length(row)-1)){
 33 |     if(is.na(as.character(row[i+1])) ==T && is.na(as.character(row[i])) ==F ){
 34 |       total = total+1
 35 |     }
 36 |   }
 37 |   return(total)
 38 | }
 39 | 
 40 | comers = function(row){
 41 |   total =0
 42 |   first_time = min(which(is.na(row)==F))
 43 |   
 44 |   if (first_time>=10){
 45 |     return(0)
 46 |   }
 47 |   else{
 48 |     for(i in first_time:(length(row)-1)){
 49 |       if(is.na(as.character(row[i+1])) ==F && is.na(as.character(row[i])) ==T ){
 50 |         total = total+1
 51 |       }
 52 |     }
 53 |   }
 54 |   return(total)
 55 | }
 56 | 
 57 | max_distance = function(row){
 58 |   lst=which(is.na(row)==F)
 59 |   lt = NULL
 60 |   if( length(lst)==1){
 61 |     return(0)
 62 |   }
 63 |   for (i in 1:(length(lst)-1)){
 64 |     h= lst[i+1]-(lst[i]+1)
 65 |     lt = append(lt,h)
 66 |   }
 67 |   return(max(lt))
 68 | }
 69 | 
 70 | 
 71 | last_year_on_list = function(row){
 72 |   years_onList=which(is.na(row)==F)
 73 |   year_left = names(full_list[,5:15])[max(years_onList)]
 74 |   return(year_left)
 75 |   
 76 | }
 77 | last_year_left = function(row){
 78 |   lst=NULL
 79 |   if (length(which(is.na(row)==T))==0){
 80 |     return ("")
 81 |   }
 82 |   else{
 83 |     for (i in 2:length(row)){
 84 |       if (is.na(row[i])==T&is.na(row[i-1])==F){
 85 |         lst = append(lst,i)
 86 |       }
 87 |     }
 88 |     year_left = names(full_list[,5:15])[max(lst)]
 89 |     return(year_left)
 90 |   }
 91 | }
 92 | 
 93 | #Make the names into URLs
 94 | names_to_URLs = function(name){
 95 |   name = as.character(name)
 96 |   name = str_trim(name)
 97 |   name=str_replace_all(name,"[^0-9A-Za-z\\- ]",'')
 98 |   name=str_replace_all(name,"\\s+",' ')
 99 |   name = str_replace_all(name," ","-")
100 |   name=tolower(name)
101 |   url = paste0("https://www.forbes.com/profile/",name,"/")
102 |   return(url)
103 | }
104 | 
105 | #Making a function to display name backwards e.g Joy Jones as Jones Joy 
106 | #This will come in handy when getting the billionaire's info
107 | backward_names = function(name){
108 |   name = unlist(str_split(name," "))
109 |   name = rev(name)
110 |   name = paste(name,collapse = ' ')
111 |   name = str_trim(name)
112 |   name=names_to_URLs(name)
113 |   return(name)
114 | }
115 | 
116 | 
117 | get_billionaire_info = function(names_of_billionaires){
118 |   url=names_to_URLs(names_of_billionaires)
119 |   data=try(url %>% 
120 |              read_html %>% 
121 |              html_nodes(".stats li") %>% 
122 |              html_text(trim=T)
123 |            ,silent = T)
124 |   if(class(data) =="try-error"){
125 |     url = backward_names(names_of_billionaires)
126 |     data=try(url %>% 
127 |                read_html %>% 
128 |                html_nodes(".stats li") %>% 
129 |                html_text(trim=T)
130 |              ,silent = T)
131 |   }
132 |   sector = ifelse(length(data[grepl("Source of Wealth",data,T)])>0,data[grepl("Source of Wealth",data,T)],NA)
133 |   education = ifelse(length(data[grepl("Education",data,T)])>0,data[grepl("Education",data,T)],NA)
134 |   sector = str_replace_all(sector,".*\t|.*\n","")
135 |   education = str_replace_all(education,".*\t|.*\n","")
136 |   return(c(sector,education))
137 | }
138 | 
139 | education_columns_creator=function(df){
140 |   df$Self_Made = ifelse(grepl("self made",df$detail1,ignore.case = T),"Y","N")
141 |   df$dropped_out = ifelse(grepl("drop out",df$detail2,ignore.case = T),"Y",ifelse(is.na(df$detail2),NA,"N"))
142 |   df$bachelors_degree = ifelse(grepl("bachelor",df$detail2,ignore.case = T),"Y",ifelse(is.na(df$detail2),NA,"N"))
143 |   df$masters_degree = ifelse(grepl("master of arts|master of science",df$detail2,ignore.case = T),"Y",ifelse(is.na(df$detail2),NA,"N"))
144 |   df$MBA = ifelse(grepl("Master of Business Administration",df$detail2,ignore.case = T),"Y",ifelse(is.na(df$detail2),NA,"N"))
145 |   df$phd_or_professional_degree = ifelse(grepl("doctor|llb",df$detail2,ignore.case = T),"Y",ifelse(is.na(df$detail2),NA,"N"))
146 |   return(df)
147 | }
148 | 
149 | #Extract Model ID from model
150 | getID = function(model){
151 |   ID=model@model_id
152 |   return(ID)
153 | }
154 | 
155 | #Function to check validation set accuracy
156 | test_accuracy = function(model){
157 |   table_accuracy=h2o.hit_ratio_table(model,valid = T)[1,2]
158 |   return(table_accuracy)
159 | }
160 | 
161 | 
162 | model_type.H2OMultinomialModel = function(x, ...) {
163 |   return("classification")
164 |   
165 | }
166 | 
167 | predict_model.H2OMultinomialModel = function(x, newdata, type, ...) {
168 |   # Function performs prediction and returns dataframe with Response
169 |   #
170 |   # x is h2o model
171 |   # newdata is data frame
172 |   # type is only setup for data frame
173 |   
174 |   pred <- h2o.predict(x, as.h2o(newdata))
175 |   
176 |   # return classification probabilities only
177 |   return(as.data.frame(pred[,-1]))
178 |   
179 | }
180 | 
181 | #List of billionaires from 2007-2017
182 | 
183 | years = 7:17
184 | 
185 | 
186 | full_list = lapply(years,get_historical_ranks)
187 | full_list = do.call("rbind",full_list)
188 | 
189 | #At this point had to some excel work to harmonize the names e.g. some year bill gates was mentioned as William Gates III
190 | #This is where I spent most of my time
191 | #Mark X for each billionaire in each year (this will come in handy later)
192 | full_list = full_list %>% mutate(markings= "X")
193 | 
194 | #Three billionaires have exactly the same names - Wang Wei, Robert Miller and Jim Davis
195 | full_list[9865,2] = "Jim Davis, new balance"
196 | full_list[12711,2] ="Jim Davis, recruitment"
197 | full_list[11497,2] = "Jim Davis, new balance"
198 | full_list[11030,2] ="Jim Davis, recruitment"
199 | full_list[9783,2] = "Wang Wei, delivery"
200 | full_list[10981,2] = "Wang Wei, computer hardware"
201 | 
202 | #Use spread to make the year columns - the markings are useful here
203 | full_list=full_list %>% 
204 |   select(Name_updated,Citizenship,Year,markings) %>%
205 |   spread(Year,markings)
206 | 
207 | #Create year billionaire joined
208 | 
209 | full_list$joining_year = unlist(lapply(1:nrow(full_list),joining_year))
210 | 
211 | #No of time billionaire has been on the list
212 | full_list=full_list %>%
213 |   mutate(no_of_times_on_list =rowSums(is.na(full_list[,3:13])==F))
214 | 
215 | #No of times billionaires left
216 | 
217 | 
218 | full_list$no_of_times_left =apply(full_list[,3:13],1,leavers)
219 | 
220 | #No of times billionaires have comeback to the list
221 | 
222 | 
223 | 
224 | full_list$no_of_comebacks=apply(full_list[,3:13],1,comers)
225 | 
226 | #Longest time away from the list
227 | 
228 | full_list$longest_time_away = apply(full_list[,3:13],1,max_distance)
229 | 
230 | #Last year on list and last year off it
231 | 
232 | full_list$last_off_list = apply(full_list[,5:15],1,last_year_left)
233 | 
234 | 
235 | #Make the names into URLs
236 | 
237 | 
238 | #Making a function to display name backwards e.g Joy Jones as Jones Joy 
239 | #This will come in handy when getting the billionaire's info
240 | 
241 | #Get the billionaires info
242 | 
243 | 
244 | #Getting the columns for the details of billionaires set
245 | full_list$detail1 =""
246 | full_list$detail2 =""
247 | 
248 | #I opted for a for loop as opposed to the sexy apply function as this gives me the ability
249 | #to store intermediate results
250 | for (i in 1:nrow(full_list)){
251 |   dat=get_billionaire_info(full_list$Name[i])
252 |   #dat = list(dat)
253 |   full_list$detail1[i]=dat[1]
254 |   full_list$detail2[i]=dat[2]
255 |   print(paste0(round((i/nrow(full_list))*100,2),"% done at ",i))
256 | }
257 | 
258 | #Create columns for educational information
259 | 
260 | full_list = education_columns_creator(full_list)
261 | full_list$detail1 = unlist(full_list$detail1)
262 | full_list$detail2 = unlist(full_list$detail2)
263 | 
264 | #Data Munging complete! :)
265 | 
266 | #Clusters were made in Tableau using Sets and imported back into the csv file
267 | #================================Machine Learning======================================#
268 | library(h2o)
269 | library(plyr)
270 | library(dplyr)
271 | library(purrr)
272 | library(scales)
273 | library(lime)
274 | #Initialize H2o cluster
275 | h2o.init()
276 | 
277 | dat1 = full_list %>%
278 |   filter(Cluster !="The Newbie")
279 | 
280 | data = dat1 %>% 
281 |         select(-Detail1,-Detail2) %>%
282 |   as.h2o()
283 |               
284 | splits = h2o.splitFrame(data,ratios = c(0.6,0.2),destination_frames = c("train", "valid", "test"), seed = 1234)
285 | train = h2o.getFrame("train")
286 | val = h2o.getFrame("valid")
287 | test = h2o.getFrame("test")
288 | 
289 | 
290 | 
291 | #Column index numbers
292 | features=c(5,7,9,18,25,26,28)
293 |   #c(4:11,18:19,25:26,28)
294 |   
295 |   #
296 | #2,4:11,18:19,25:26,29
297 | response=27
298 | 
299 | #Models we would like to train and test the accuracy
300 | models = c("h2o.randomForest", "h2o.deeplearning" ,"h2o.gbm")
301 | names_of_models = c("Random Forest", "Deep Learning" ,"GBM")
302 | 
303 | #The map function will invoke the functions in the "models" vector on the parameters below
304 | list_of_models =invoke_map(models, x=features,y=response,training_frame =train,validation_frame = val) 
305 | 
306 | 
307 | #Store IDs for retreival later
308 | IDs = list_of_models %>%
309 |   map_chr(getID)
310 | 
311 | 
312 | #check accuracy on validation set
313 | list_of_models %>%
314 |   map_dbl(test_accuracy) %>%
315 |   set_names(.,names_of_models)
316 | #GBM performed best so let's compute that variable importance
317 | model_gbm = h2o.getModel(IDs[3])
318 | h2o.varimp_plot(h2o.getModel(IDs[3]))
319 | #Store variable importance in csv for visualization
320 | var_imp_rf=h2o.varimp(model_gbm)
321 | write.csv(var_imp_rf,"Variable Importance GBM.csv",row.names = F)
322 | model_gbm=h2o.loadModel("C:/Users/rose.anwuri/Documents/TheArtandScienceofData/Consitent Billionaire Guide/app/GBM_Model")
323 | features_lime=model_gbm@parameters$x
324 | train_lime = train[,c(features_lime,"Cluster")] %>% h2o.na_omit()
325 | test_lime = val[,c(features_lime,"Cluster")]%>% h2o.na_omit()
326 | 
327 | predict_model(x = model_gbm, newdata =test_lime[,-9], type = 'raw') %>%
328 |   tibble::as_tibble()
329 | train_lime_df=as.data.frame(train_lime[,-9])
330 | train_lime_df=train_lime_df[complete.cases(train_lime_df),]
331 | 
332 | explainer <- lime::lime(
333 |   train_lime_df, 
334 |   model          = model_gbm, 
335 |   bin_continuous = FALSE)
336 | test_lime_df = as.data.frame(test_lime) %>% filter(Cluster=="The Hustler") %>%as_tibble()
337 | test_lime_df=test_lime_df[complete.cases(test_lime_df),]
338 | explanation <- lime::explain(
339 |   as.data.frame(test_lime_df[,-9]), 
340 |   explainer    = explainer, 
341 |   n_labels     = 1, 
342 |   n_features   = 4,
343 |   kernel_width = 0.5)
344 | plot_features(explanation)
345 | 


--------------------------------------------------------------------------------
/Twitter Sentiment Analysis/scripts/Sentiment Analysis - Telcos.R:
--------------------------------------------------------------------------------
  1 | library(twitteR)
  2 | library(base64enc)
  3 | library(RCurl)
  4 | library(httr)
  5 | library(RJSONIO)
  6 | library(stringr)
  7 | library(tm)
  8 | library(SnowballC)
  9 | library(wordcloud)
 10 | library(dplyr)
 11 | library(tidytext)
 12 | 
 13 | ####This has been removed for privacy####
 14 | api_key <- "xxx" # From dev.twitter.com
 15 | api_secret <- "xxx" # From dev.twitter.com
 16 | token <- "xxx" # From dev.twitter.com
 17 | token_secret <- "xxx" # From dev.twitter.com
 18 | ####This has been removed for privacy####
 19 | # Create Twitter Connection
 20 | setup_twitter_oauth(api_key, api_secret, token, token_secret)
 21 | set_config(config (ssl_verifypeer= 0L))
 22 | date_time = Sys.Date()-10
 23 | tweetData = NULL
 24 | 
 25 | #The search words changes to each of the Telcos, I exclude tweets from their own company run accounts also
 26 | #I had to search for keywords like data, call, network with the Telco names to avoid getting stuff like mtn bike but they still happened!
 27 | while (date_time <= Sys.Date()+1){
 28 |   date_time = as.character.Date(date_time)
 29 |   tweetdetails <- searchTwitter("etisalat network -from:etisalat_9ja", n=1500, lang="en", since = "2016-05-03",until= date_time, geocode = '10,9, 200km')# Transform tweets list into a data frame 
 30 |   tweetData = append(tweetData, tweetdetails)
 31 |   date_time = as.Date(date_time) +1
 32 |   print(date_time)
 33 |   flush.console()
 34 | }
 35 | 
 36 | tweetData_network = twListToDF(tweetData)
 37 |  
 38 | date_time = Sys.Date()-10
 39 | tweetData = NULL
 40 | while (date_time <= Sys.Date()+1){
 41 |   date_time = as.character.Date(date_time)
 42 |   tweetdetails <- searchTwitter("etisalat data -from:etisalat_9ja", n=1500, lang="en", since = "2016-05-03",until= date_time, geocode = '10,9, 200km')# Transform tweets list into a data frame 
 43 |   tweetData = append(tweetData, tweetdetails)
 44 |   date_time = as.Date(date_time) +1
 45 |   print(date_time)
 46 |   flush.console()
 47 | }
 48 | tweetData = twListToDF(tweetData)
 49 | tweetDataFinal = rbind(tweetData, tweetData_network)
 50 | 
 51 | date_time = Sys.Date()-10
 52 | tweetData = NULL
 53 | while (date_time <= Sys.Date()+1){
 54 |   date_time = as.character.Date(date_time)
 55 |   tweetdetails <- searchTwitter("etisalat call -from:etisalat_9ja", n=1500, lang="en", since = "2016-05-03",until= date_time, geocode = '10,9, 200km')# Transform tweets list into a data frame 
 56 |   tweetData = append(tweetData, tweetdetails)
 57 |   date_time = as.Date(date_time) +1
 58 |   print(date_time)
 59 |   flush.console()
 60 | }
 61 | tweetData = twListToDF(tweetData)
 62 | tweetDataFinal = rbind(tweetData, tweetDataFinal)
 63 | View(tweetDataFinal)
 64 | 
 65 | tweetDataFinal$text = as.character(tweetDataFinal$text)
 66 | #Just to make sure the twitter search only picked tweets that about mtn
 67 | tweetDataFinal = subset(tweetDataFinal,grepl('etisalat',text,ignore.case = T))
 68 | 
 69 | #We shouldn't be analyzing tweets that MTN actually tweeted right?
 70 | tweetDataFinal = subset(tweetDataFinal,!grepl('etisalat',screenName,ignore.case = T))
 71 | 
 72 | #So we have emoji's to deal with and the because they are beyond the UTF-8 they appear...well..weird
 73 | #One option is to remove them completely
 74 | #But I'd be losing a lot of information right? Because Emoji's show sentiment
 75 | #So I'd rather do some housekeeping and deal with this
 76 | #An awesome human compiled a list of emojis bytes and utf-8 characters and their descriptions on GitHub. Shout to Jessica Peterka-Bonetta!
 77 | 
 78 | emoticons = read.csv("https://raw.githubusercontent.com/today-is-a-good-day/Emoticons/master/emDict.csv",sep = ";")
 79 | emoticons$Native = NULL
 80 | 
 81 | #Ensuring the text is in the native R encoding
 82 | tweetDataFinal$text = enc2native(tweetDataFinal$text)
 83 | 
 84 | #Remove all the U+00 and leave the R encoding only
 85 | tweetDataFinal$text = tolower(str_replace_all(tweetDataFinal$text,"U\\+00",''))
 86 | 
 87 | #The function below find the first emoji in a tweet and puts it in a new column.
 88 | #The idea behind picking only one emoji comes from the assumption that one emoji out of the many that people put in a tweet is enough to find the sentiment of that tweet
 89 | #In fact, A lot of the time emojis are just repitions or are conveying the same emotion
 90 | extract_emojis = function(df){
 91 |   df$emoticon = ''
 92 |   pt = txtProgressBar(min = 0, max=nrow(df),initial = 0)
 93 |   for (i in 1:nrow(df)){
 94 |     if (str_count(df$text[i],"<e") >0){
 95 |       emoji_placeholder = "<e"
 96 |       k = str_locate(df$text[i],"<e")[2]
 97 |       while (emoji_placeholder %in% emoticons$R.encoding == F){
 98 |         emoji_placeholder = substr(df$text[i],start = str_locate(df$text[i],"<e")[1],  stop = k)
 99 |         k=k+1
100 |         if (k > nchar(df$text[i])){
101 |           break
102 |         }
103 |       }
104 |       
105 |       df$emoticon[i] = emoji_placeholder
106 |     }
107 |     
108 |     setTxtProgressBar(pt,i)
109 |   }
110 |   
111 |   return (df)
112 | }
113 | 
114 | tweetDataFinal = extract_emojis(tweetDataFinal)
115 | 
116 | #Removing trailing and leading white space
117 | tweetDataFinal$emoticon = gsub("^\\s+|\\s+$", "",tweetDataFinal$emoticon)
118 | 
119 | #By spot checking (really trial and error), I see that two emoticons do not exist in the emoticon csv, hence the loop searches to the end of the tweet
120 | #and because it checks till the end of the tweet, it picks up some stuff like hashtags, links and the likes. This function removes everything after
121 | #the last occurrence of ">" in the emoticon column
122 | tweetDataFinal$emoticon=str_replace(tweetDataFinal$emoticon,"(?<=>)[^>]*$",'')
123 | 
124 | #Get the description of the emoticons by merging the emoticons table with the tweets dataframe, tweetDataFinal
125 | tweetDataFinal = merge(tweetDataFinal,emoticons,by.x = "emoticon",by.y = "R.encoding",all.x = T)
126 | 
127 |   
128 | #We see that some emoticons (two or three if we look carefully) have no description for them.
129 | #Let's take a closer look at them
130 | 
131 | exploreUnknown = subset(tweetDataFinal,is.na(Description),emoticon!='')
132 | 
133 | #Nothing particularly strage asides that they are some repition of the same tweet by the same person tweeted at the same time
134 | #This will be removed anyways.
135 | 
136 | #I'm going to remove mentions and @s in the tweet because it adds no value to our analysis
137 | tweetDataFinal$text = str_replace_all(tweetDataFinal$text,'@\\S+',"")
138 | 
139 | #Also removing RT because that information is already captured in the column isRetweet
140 | tweetDataFinal$text=str_replace_all(tweetDataFinal$text,'rt ','')
141 | 
142 | #Removing links from the tweets because again, there is no feasible way that this will make our analysis better
143 | tweetDataFinal$text = gsub('http\\S+\\s*', "", tweetDataFinal$text)
144 | 
145 | #Let's not forget to remove the R encoding since we have extracted the sentiment we needed from it
146 | tweetDataFinal$text = gsub("<.*>",'',tweetDataFinal$text)
147 | 
148 | #Now, It's safe to remove all other non aplha numeric characters
149 | tweetDataFinal$text = gsub("[^0-9A-Za-z/// ]", "", tweetDataFinal$text, T)
150 | 
151 | #If you look carefully, there are some duplicate tweets from the exact same person at the same time. This happens sometimes on twitter when a person tweets once and
152 | #for some network reasons, it is duplicated (but 6 times though? Weird.)
153 | #Further a lot of duplicate tweets are probably promotional tweets 
154 | #The first step is to get out all the tweets that are not retweets and the remove duplicates
155 | notRetweets = subset(tweetDataFinal,isRetweet == F)
156 | notRetweets = notRetweets[!duplicated(notRetweets["text"]),]
157 | 
158 | #I noticed something pretty weird with the retweeted tweets. I found duplicates of the same person retweeting the same thing within the same second
159 | #Pretty dodgy eh? So I factored that in and removed the duplicated
160 | Retweet = subset(tweetDataFinal,isRetweet == T)
161 | Retweets = Retweet[!duplicated(Retweet[c("text","screenName")]),]
162 | 
163 | #Lets just put these two datasets together
164 | tweetDataFinal = rbind(Retweets,notRetweets)
165 | 
166 | 
167 | #######SPECIFIC TO MTN#########
168 | #Are we done cleaning now? Not quite but we are almost there. Apparently people spell mountain as mtn.
169 | #Natural Language. Sigh. So Just incase any of them pass through the network, call, data filter, let's remove them
170 | tweetDataFinal = subset(tweetDataFinal,!grepl("dew",text))
171 | tweetDataFinal = subset(tweetDataFinal,!grepl("bike",text))
172 | #######SPECIFIC TO MTN#########
173 | 
174 | #######SPECIFIC TO GLO#########
175 | tweetDataFinal = subset(tweetDataFinal,!grepl("glo up",text))
176 | tweetDataFinal = subset(tweetDataFinal,!grepl("glo'd",text))
177 | tweetDataFinal = subset(tweetDataFinal,!grepl("glo gang",text))
178 | tweetDataFinal = subset(tweetDataFinal,!grepl("glo ing",text))
179 | tweetDataFinal = subset(tweetDataFinal,!grepl("gloing",text))
180 | tweetDataFinal = subset(tweetDataFinal,!grepl("glogang",text))
181 | tweetDataFinal = subset(tweetDataFinal,!grepl("lil glo",text))
182 | #######SPECIFIC TO MTN#########
183 | 
184 | #Now let's add the description of our emoticons to the tweets
185 | #First, let's remove those pesky NAs in the description column and convert them to empty strings
186 | tweetDataFinal$Description = apply(data.frame(tweetDataFinal$Description),1,function(x) ifelse(is.na(x),"",x))
187 | 
188 | #Now we are going to concatenate the tweets and emoji description
189 | tweetDataFinal$text = paste(tweetDataFinal$text,tolower(tweetDataFinal$Description))
190 | 
191 | # I assume that because some description were empty we ould have some trailing spaces.
192 | #Let's remove trailing and leading spaces one last time just incase.
193 | tweetDataFinal$text = gsub("^\\s+|\\s+$", "",tweetDataFinal$text)
194 | 
195 | #And we are done! Now for some sentiment analysis!
196 | 
197 | #From the tidytext package, we would use the AFINN lexicon which scores word between -5 to 5 based on
198 | #negative or positive sentiment
199 | lexicon = sentiments %>% filter(lexicon == "AFINN")
200 | 
201 | #This function uses the AFINN lexicon we loop through the tweets and score them accordingly 
202 | #It also takes into account when the word 'not' is in a sentence. If the word 'not' is there,
203 | #I definitely need to look at the next word and check if that not + word exists in the lexicon
204 | #If it does not I check if the second word alone exists in the lexicon and simply 
205 | #reverse the sign of the score e.g "not great". great exists with a score of 3 and with this function
206 | #not great would have a score of -3. Pretty awesome eh?
207 | sentiment_score = function(sentence) {
208 |   score = 0
209 |   words = str_split(sentence, " ")
210 |   words = unlist(words)
211 |   for (i in 1:length(words)) {
212 |     if (words[i] == "not") {
213 |       word = paste("not", words[i + 1])
214 |       word.split = unlist(str_split(word, " "))
215 |       if (word %in% lexicon$word == T) {
216 |         score = score + lexicon$score[lexicon$word == word]
217 |         
218 |       }
219 |       else if (word.split[2] %in% lexicon$word == T) {
220 |         score = score - lexicon$score[lexicon$word == word.split[2]]
221 |         
222 |       }
223 |       
224 |       
225 |     }
226 |     else if (i > 1 && words[i - 1] == "not")
227 |       next
228 |     
229 |     else if (words[i] %in% lexicon$word == T) {
230 |       score = score + lexicon$score[lexicon$word == words[i]]
231 |       
232 |     }
233 |     
234 |     
235 |   }
236 |   return (score)
237 |   
238 | }
239 | 
240 | #Let's apply this function to every row in the data. So much faster than a for loop!
241 | tweetDataFinal$score=apply(data.frame(tweetDataFinal$text),1,sentiment_score)
242 | 
243 | #A Histogram of the scores
244 | hist(tweetDataFinal$score[tweetDataFinal$score !=0])
245 | 
246 | table(tweetDataFinal$score[tweetDataFinal$score !=0]>0)
247 | median(tweetDataFinal$score[tweetDataFinal$score !=0])
248 | tweetDataFinal$Telecoms = rep("Etisalat",nrow(tweetDataFinal))
249 | write.csv(tweetDataFinal,'etisalat.csv',row.names = F)
250 | 
251 | 
252 | 
253 | #Let's create a term-frequency document using the tm package by creating a corpus of all the words in our data
254 | 
255 | corpus = Corpus(VectorSource(tweetDataFinal$text[tweetDatafinal$score !=0]))
256 | corpus = tm_map(corpus, PlainTextDocument)
257 | corpus = tm_map(corpus, removeNumbers)
258 | corpus = tm_map(corpus, content_transformer(stripWhitespace))
259 | corpus = tm_map(corpus, content_transformer(tolower))
260 | corpus = tm_map(corpus, removeWords, c("etisalat","9ja","etisalat_9ja","vodacom","nigeria","home","sleep","start","tweet","phone","aaaaay","face","datamustfall","datamustfal","network","data","call","amp"))
261 | corpus = tm_map(corpus, stemDocument)
262 | corpus = tm_map(corpus, removeWords, stop_words$word)
263 | corpus = tm_map(corpus, removeWords, removePunctuation(stop_words$word))
264 | corpus = tm_map(corpus, removePunctuation)
265 | 
266 | frequencies = DocumentTermMatrix(corpus)
267 | 
268 | frequencies.common = removeSparseTerms(frequencies,0.9996)
269 | 
270 | etisalatTweets = as.data.frame(as.matrix(frequencies.common))
271 | 
272 | termFreq = data.frame(word = colnames(etisalatTweets),frequency = colSums(etisalatTweets))
273 | 
274 | wordcloud(colnames(etisalatTweets), colSums(etisalatTweets), scale = c(4, 0.5),colors = 'darkgreen')
275 | 
276 | 


--------------------------------------------------------------------------------
/Friends Analysis Laughter Prediction/canned_laughter_detection.R:
--------------------------------------------------------------------------------
  1 | library(parsnip)
  2 | library(rsample)
  3 | library(tidyverse)
  4 | library(recipes)
  5 | library(ggthemr)
  6 | library(DALEX)
  7 | library(subtools) #devtools::install_github("fkeck/subtools")
  8 | library(httr)
  9 | library(jsonlite)
 10 | library(rvest)
 11 | library(stringdist)
 12 | laughter_transcribed <- read_csv('Laughter_Detection.csv')
 13 | laughter_transcribed=laughter_transcribed%>%
 14 |   rowwise()%>%
 15 |   mutate(seconds=paste0(seq(Start,End),collapse = ','))%>%
 16 |   separate_rows(seconds,sep = ',')%>%
 17 |   mutate(seconds=as.numeric(seconds))
 18 | 
 19 | season_one_audio=read_csv('season_one_audio.csv')
 20 | season_one_audio=season_one_audio%>%
 21 |   select(-X1,-`Unnamed: 0`)%>%
 22 |   mutate(seconds=seconds-1)
 23 | 
 24 | audio_training_data <- season_one_audio%>%
 25 |   filter(Episode=='E01')%>%
 26 |   mutate(seconds=seconds-1)%>%
 27 |   left_join(laughter_transcribed)%>%
 28 |   filter(seconds <max(laughter_transcribed$seconds))%>%
 29 |   mutate(label=ifelse(is.na(File),0,1))%>%
 30 |   select(-File,-Start,-End,-Length,-`Talk Over`,-seconds)
 31 | 
 32 | audio_training_data <- select_if(audio_training_data,is.numeric)
 33 | 
 34 | 
 35 | audio_training_data=audio_training_data%>%
 36 |   mutate(label=as.factor(label))
 37 | 
 38 | data_split <- initial_split(audio_training_data, prop = 0.7)
 39 | 
 40 | train=data_split %>%
 41 |   training() 
 42 | 
 43 | test=data_split %>%
 44 |   testing() 
 45 | 
 46 | 
 47 | model1=svm_poly( mode = "classification")%>%
 48 |   set_engine("kernlab") %>%
 49 |   parsnip::fit(label~., data = train)
 50 | 
 51 | model2=mlp( mode = "classification")%>%
 52 |   set_engine("nnet") %>%
 53 |   parsnip::fit(label~., data = train)
 54 | 
 55 | model3=rand_forest( mode = "classification")%>%
 56 |   set_engine("randomForest") %>%
 57 |   parsnip::fit(label~., data = train)
 58 | 
 59 | 
 60 | model4=boost_tree( mode = "classification")%>%
 61 |   set_engine("xgboost") %>%
 62 |   parsnip::fit(label~., data = train)
 63 | 
 64 | model5=logistic_reg( mode = "classification")%>%
 65 |   set_engine("glm") %>%
 66 |   parsnip::fit(label~., data = train)
 67 | 
 68 | model6=rand_forest( mode = "classification")%>%
 69 |   set_engine("ranger") %>%
 70 |   parsnip::fit(label~., data = train)
 71 | 
 72 | 
 73 | custom_predict_classif <- function(objectPred, set){
 74 |   as.data.frame(predict(objectPred, set, type = "prob"))[,2]
 75 | }
 76 | 
 77 | build_explainer <- function(model,test,label){
 78 |   explainer <- DALEX::explain(model, data=test, y=pull(test,label)%>%as.character()%>%as.numeric(), label =label, 
 79 |                               predict_function = custom_predict_classif, colorize = FALSE,verbose=FALSE)
 80 |   return(explainer)
 81 |   
 82 | }
 83 | 
 84 | get_weighted_precision <- function(model,data){
 85 |   data$prediction=predict(model,data)%>%pull()
 86 |   #data$prediction=ifelse(data$prediction>=0.5,'Uplift','Drop')
 87 |   model_name=model$spec$engine
 88 |   #f1_score=data%>%mutate_if(is.character,as.factor)%>%f_meas(revenue_success,prediction)%>%pull(.estimate)
 89 |   conf=table(data$label,data$prediction)
 90 |   res=conf[2,2]/(conf[2,2]+conf[1,2])
 91 |   recall=conf[2,2]/(conf[2,2]+conf[2,1])
 92 |   res1=(conf[2,2]+conf[1,1])/(conf[2,2]+conf[1,2]+conf[1,1]+conf[2,1])
 93 |   f1_score=2*((res*recall)/(res+recall))
 94 |   df=tibble(name=model_name,accuracy=100*res1,precision=100*res,f1_score=100*f1_score,recall=100*recall)
 95 |   return(df)
 96 | }
 97 | 
 98 | 
 99 | compare_explainers=pmap(list(
100 |   model=list(model1,model2,model3,model4,model5,model6),
101 |   test=list(test),
102 |   label=c('svm_poly','nnet','rf','xgboost','glm','ranger')
103 | ),build_explainer)
104 | 
105 | do.call('plot',c(map(compare_explainers,DALEX::model_performance),list(geom='roc')))+
106 |   ggtitle('AUC for Test')
107 | 
108 | 
109 | test_results=map2_dfr(list(model1,model2,model3,model4,model5,model6),list(test),get_weighted_precision)
110 | 
111 | test_results%>%
112 |   arrange(-f1_score)
113 | 
114 | canned_laughter_detection_model=model3
115 | saveRDS(canned_laughter_detection_model,'canned_laughter_detection_model')
116 | 
117 | 
118 | subtitles=list.files('S1')
119 | subtitles=subtitles[str_detect(subtitles,'SAiNTS')]
120 | subtitles=subtitles[c(1:15,17:23)]
121 | subtitles[23:24]=c('Friends - 1x16 - The One With Two Parts (1).en.srt','Friends - 1x17 - The One With Two Parts (2).en.srt')
122 | subtitles=sort(subtitles)
123 | read_subtitles_custom<- function(file_path){
124 |   path <- paste0('S1/',file_path)
125 |   
126 |   season_ep=str_extract(file_path,'\\dx\\d{2}')%>%str_split('x',simplify = T)%>%as.vector()
127 |   season=as.numeric(season_ep[1])
128 |   episode=as.numeric(season_ep[2])
129 |   subs=read_subtitles(path)
130 |   subs <- subs%>%
131 |     mutate(season=season,
132 |            epsiode=episode)
133 |   return(subs)
134 |   
135 | }
136 | 
137 | subtitles_df=map_dfr(subtitles,read_subtitles_custom)
138 | 
139 | scripts_links=paste0('https://fangj.github.io/friends/season/01',sprintf('%02d', seq_len(24)),'.html')
140 | read_scripts <- function(url){
141 |   script_text=url%>%
142 |     read_html%>%
143 |     html_nodes('p[align="left"],p')%>%
144 |     html_text()
145 |   script_df=tibble(script_text)%>%
146 |     filter(!str_detect(script_text,'Written by:') & !script_text %in% c('End','Opening Credits','Closing Credits','Commercial Break'))%>%
147 |     mutate(scene=ifelse(str_detect(script_text,'^\\['),script_text,NA))%>%
148 |     fill(scene,.direction = 'down')%>%
149 |     filter(!str_detect(script_text,'^\\[|^\\(|^\\{'))%>%
150 |     mutate(speaker=str_extract(script_text,'.*?:'))%>%
151 |     mutate(script_text=str_remove_all(script_text,speaker)%>%str_trim())%>%
152 |     mutate(addressee=str_extract(script_text,'Monica|Joey|Chandler|Phoebe|Ross|Rachel'),
153 |            addressee=ifelse(is.na(addressee),'All',addressee))%>%
154 |     filter(!is.na(script_text))%>%
155 |     mutate(speaker=str_remove_all(speaker,':'))%>%
156 |     mutate(script_text=str_replace_all(script_text,'\n',' '))
157 |   season_ep=str_extract(url,'\\d{4}')
158 |   season=str_sub(season_ep,1,2)%>%as.numeric()
159 |   episode=str_sub(season_ep,3,4)%>%as.numeric()
160 |   script_df=script_df%>%
161 |     mutate(season=season,
162 |            episode=episode)
163 |   return(script_df)
164 | }
165 | scripts_df=map_dfr(scripts_links,read_scripts)
166 | 
167 | scripts_df=scripts_df%>%
168 |   mutate(script_text=str_remove_all(script_text,'[:punct:]'))
169 | 
170 | subtitles_df=subtitles_df%>%
171 |   mutate(Text_content=str_remove_all(Text_content,'[:punct:]'))
172 | library(fuzzyjoin)
173 | 
174 | script_to_subtitle_match <- function(sub_df,script_df,row){
175 |   window=seq(row-5,row+5)
176 |   window=window[window>0]
177 |   matched=sub_df%>%
178 |     slice(window)%>%
179 |     mutate(check=str_detect(script_df$script_text[row],Text_content))%>%
180 |     filter(check)%>%
181 |     mutate(
182 |       scene=script_df$scene[row],
183 |       speaker=script_df$speaker[row],
184 |       addressee=script_df$addressee[row],
185 |       script_text=script_df$script_text[row]
186 |     )
187 |   if(nrow(matched)==0){
188 |     matched=sub_df%>%
189 |       slice(window)%>%
190 |       mutate(check=stringdist(script_df$script_text[row],Text_content,method = 'dl'))%>%
191 |       top_n(1,desc(check))%>%
192 |       mutate(
193 |         scene=script_df$scene[row],
194 |         speaker=script_df$speaker[row],
195 |         addressee=script_df$addressee[row],
196 |         script_text=script_df$script_text[row]
197 |       )
198 |   }
199 |   return(matched)
200 |   
201 |  
202 | }
203 | 
204 | final=tibble()
205 | for(i in 1:24){
206 |   print(paste('Running Episode',i))
207 |   sub_df=subtitles_df%>%
208 |     filter(epsiode==i)
209 |   script_df=scripts_df%>%
210 |     filter(episode==i)
211 |   matched=map_dfr(seq_len(nrow(sub_df)),~script_to_subtitle_match(sub_df,script_df,.))
212 |   final=final%>%bind_rows(matched)
213 | }
214 | season_one_text=final
215 | season_one_text=season_one_text%>%
216 |   separate(Timecode_in,c('hour','minute','second'),sep = ':',remove = F)%>%
217 |   mutate_at(3:5,as.numeric)%>%
218 |   rowwise()%>%
219 |   mutate(Timecode_in=sum(hour*60,minute*60,second))%>%
220 |   select(-hour,-minute,-second)%>%
221 |   separate(Timecode_out,c('hour','minute','second'),sep = ':',remove = F)%>%
222 |   mutate_at(4:6,as.numeric)%>%
223 |   rowwise()%>%
224 |   mutate(Timecode_out=sum(hour*60,minute*60,second))%>%
225 |   select(-check,-hour,-minute,-second)
226 | 
227 | write_csv(season_one_audio,'season_one_audio.csv')
228 | write_csv(season_one_text,'season_one_text.csv')
229 | season_one_text=read_csv('season_one_text.csv')
230 | season_one_audio=read_csv('season_one_audio.csv')
231 | canned_laughter_detection_model=readRDS('canned_laughter_detection_model')
232 | season_one_audio$laughter=predict(canned_laughter_detection_model,season_one_audio)%>%pull(.pred_class)
233 | season_one_text=season_one_text%>%
234 |   distinct(Timecode_in,Timecode_out,season,epsiode,scene,speaker,addressee,script_text)%>%
235 |   group_by(epsiode,season,scene,speaker,addressee,script_text)%>%
236 |   summarise(Timecode_in=min(Timecode_in),Timecode_out=max(Timecode_out))%>%
237 |   ungroup()%>%
238 |   arrange(epsiode,Timecode_in)
239 |   
240 | audio_text_join <- function(episode,seconds){
241 |   audio=season_one_text %>% 
242 |     filter(epsiode==episode)%>%
243 |     mutate(diff=abs(seconds-Timecode_out))%>%
244 |     top_n(1,desc(diff))%>%
245 |     mutate(seconds=seconds)
246 |   return(audio)
247 |   
248 | }
249 | audio_text_dataset = tibble()
250 | 
251 | for(i in 1:24){
252 |   ep_df=season_one_text%>%
253 |     filter(epsiode==i)
254 |   one_ep_test=map_dfr(seq_len(nrow(ep_df)),~audio_text_join(i,.))
255 |   audio_text_dataset=audio_text_dataset%>%bind_rows(one_ep_test)
256 | 
257 | }
258 | 
259 | 
260 | 
261 | 
262 | 
263 | library(ggformula)
264 | library(hrbrthemes)
265 | library(magick)
266 | library(grid)
267 | library(extrafont)
268 | font_import(pattern="GABRWFFR",paths = '/Users/rosebudanwuri/Downloads')
269 | color_palette=c('#F7483E','#e7d509','#93AAC9','#F7483D','#D6D2CE','#C59A80','#FFEAD5','#E78D86')
270 | theme_friends <- define_palette(
271 |   swatch = color_palette, 
272 |   background = 'white',
273 |   text = 'black',
274 |   gradient = c(lower = '#f27789', upper = '#31c2a4'),
275 |   line = '#f3f4f8'
276 | )
277 | ggthemr(theme_friends)
278 | chandler=image_read('chandler.png')%>%rasterGrob(interpolate=TRUE)
279 | joey=image_read('joey.png')%>%rasterGrob(interpolate=TRUE)
280 | monica=image_read('monica.png')%>%rasterGrob(interpolate=TRUE)
281 | phoebe=image_read('phoebe.png')%>%rasterGrob(interpolate=TRUE)
282 | rachel=image_read('rachel.png')%>%rasterGrob(interpolate=TRUE)
283 | ross=image_read('ross.png')%>%rasterGrob(interpolate=TRUE)
284 | p1=audio_text_dataset%>%
285 |   left_join(season_one_audio%>%
286 |               select(Season,Episode,seconds,laughter)%>%
287 |               mutate(epsiode=str_extract(Episode,'\\d{2}')%>%as.numeric,
288 |                      season=str_extract(Season,'\\d{2}')%>%as.numeric))%>%
289 |   distinct(season,epsiode,script_text,laughter,speaker,addressee)%>%
290 |   filter(laughter==1)%>%
291 |   count(epsiode,speaker)%>%
292 |   arrange(-n)%>%
293 |   filter(speaker %in% c('Ross','Joey','Chandler','Monica','Phoebe','Rachel'))%>%
294 |   ggplot(aes(epsiode,n,color=speaker))+
295 |   geom_spline(size=1,spar = 0.5)+
296 |   ggtitle('Who Was Funniest blahh')+
297 |   theme_ipsum()+
298 |   theme( panel.grid.major = element_blank(),plot.title = element_text(size=16, family="Gabriel Weiss' Friends Font"),legend.position="top",
299 |          text=element_text(  family="Gabriel Weiss' Friends Font"), legend.key.width  = unit(1, "cm"))+
300 |   guides(color=guide_legend(ncol=6))+
301 |   scale_color_discrete(name = " ")
302 | 
303 | 
304 | p2=audio_text_dataset%>%
305 |   left_join(season_one_audio%>%
306 |               select(Season,Episode,seconds,laughter)%>%
307 |               mutate(epsiode=str_extract(Episode,'\\d{2}')%>%as.numeric,
308 |                      season=str_extract(Season,'\\d{2}')%>%as.numeric))%>%
309 |   distinct(season,epsiode,script_text,laughter,speaker,addressee)%>%
310 |   filter(laughter==1)%>%
311 |   count(speaker)%>%
312 |   arrange(-n)%>%
313 |   filter(speaker %in% c('Ross','Joey','Chandler','Monica','Phoebe','Rachel'))%>%
314 |   ggplot(aes(reorder(speaker,n),n,fill=speaker))+
315 |   geom_col(color='black')+
316 |   coord_flip()+
317 |   theme_ipsum()+
318 |   theme( panel.grid.major = element_blank(),plot.title = element_text(size=16, family="Gabriel Weiss' Friends Font"),legend.position="top",
319 |          text=element_text(  family="Gabriel Weiss' Friends Font"), legend.key.width  = unit(1, "cm"))+
320 |   guides(fill=guide_legend(ncol=6))+
321 |   scale_fill_discrete(name = " ")
322 | 
323 | library(patchwork)
324 | p2/p1
325 | audio_text_dataset%>%
326 |   left_join(season_one_audio%>%
327 |               select(Season,Episode,seconds,laughter)%>%
328 |               mutate(epsiode=str_extract(Episode,'\\d{2}')%>%as.numeric,
329 |                      season=str_extract(Season,'\\d{2}')%>%as.numeric))%>%
330 |   distinct(season,epsiode,script_text,laughter,speaker,addressee)%>%
331 |   filter(laughter==1)%>%
332 |   count(speaker,addressee)%>%
333 |   filter(addressee!='All')%>%
334 |   arrange(-n)
335 | 
336 | audio_text_dataset%>%
337 |   left_join(season_one_audio%>%
338 |               select(Season,Episode,seconds,laughter)%>%
339 |               mutate(epsiode=str_extract(Episode,'\\d{2}')%>%as.numeric,
340 |                      season=str_extract(Season,'\\d{2}')%>%as.numeric))%>%
341 |   distinct(season,epsiode,script_text,speaker,addressee)%>%
342 |   count(speaker,name='total_dialogue')%>%
343 |   arrange(-total_dialogue)%>%
344 |   filter(speaker %in% c('Ross','Joey','Chandler','Monica','Phoebe','Rachel'))%>%
345 | left_join(
346 |   audio_text_dataset%>%
347 |     left_join(season_one_audio%>%
348 |                 select(Season,Episode,seconds,laughter)%>%
349 |                 mutate(epsiode=str_extract(Episode,'\\d{2}')%>%as.numeric,
350 |                        season=str_extract(Season,'\\d{2}')%>%as.numeric))%>%
351 |     distinct(season,epsiode,script_text,laughter,speaker,addressee)%>%
352 |     filter(laughter==1)%>%
353 |     count(speaker,name='funny_dialogue')%>%
354 |     arrange(-funny_dialogue)%>%
355 |     filter(speaker %in% c('Ross','Joey','Chandler','Monica','Phoebe','Rachel')))%>%
356 |   mutate(funny_ratio=100*funny_dialogue/total_dialogue)%>%
357 |   arrange(-funny_ratio)
358 | audio_text_dataset <- audio_text_dataset%>%
359 |   left_join(season_one_audio%>%
360 |               select(Season,Episode,seconds,laughter)%>%
361 |               mutate(epsiode=str_extract(Episode,'\\d{2}')%>%as.numeric,
362 |                      season=str_extract(Season,'\\d{2}')%>%as.numeric,
363 |                      laughter=as.character(laughter)%>%as.numeric))%>%
364 |   group_by(season,epsiode,script_text,speaker,addressee)%>%
365 |   summarise(laughter=max(laughter),seconds=max(seconds))%>%
366 |   arrange(epsiode,seconds)
367 | write_csv(audio_text_dataset,'season_one_final.csv')
368 | season_one_final=read_csv('season_one_final.csv')
369 | season_one_final=season_one_final%>%
370 |   mutate(word_count=str_count(script_text," ")+1)%>%
371 |   filter(word_count>2)
372 | write_csv(season_one_final,'season_one_final.csv')
373 | 


--------------------------------------------------------------------------------
/The Making of Great Music/scripts/music_sentiment.R:
--------------------------------------------------------------------------------
  1 | library(jsonlite)
  2 | library(plyr)
  3 | library(dplyr)
  4 | library(tidyr)
  5 | library(devtools)
  6 | library(readr)
  7 | library(stringr)
  8 | library(tm)
  9 | library(SnowballC)
 10 | library(tidytext)
 11 | library(wordcloud)
 12 | library(httr)
 13 | #List of Years with Billboard data available
 14 | year_list=1950:2015
 15 | 
 16 | set_config(config(ssl_verifypeer = 0L))
 17 | get_billboard_data = function(year){
 18 |   df =fromJSON(paste0("https://raw.githubusercontent.com/kevinschaich/billboard-top-100-lyrics/master/data/years/",year,".json"))
 19 | df[,c("neg","neu","pos","compound")]=df$sentiment
 20 | df$sentiment=NULL
 21 |   return(df)
 22 | }
 23 | 
 24 | music_df=ldply(year_list,get_billboard_data)
 25 | music_df=music_df %>%
 26 |   select(-tags)
 27 | #install_github("tiagomendesdantas/Rspotify")
 28 | library(Rspotify)
 29 | 
 30 | keys <- spotifyOAuth(app_id="ASD","769ef3519e8444238fde9c8981c6371c","b17e4a7ca0b4426f9962645ba5c74a63")
 31 | 
 32 | #name of Spotify Features
 33 | features_name=c("id","danceability","energy","key","loudness","mode","speechiness",
 34 |                 "acousticness","instrumentalness","liveness","valence","tempo",
 35 |                 "duration_ms","time_signature","uri","analysis_url")
 36 | 
 37 | #Initiatilizing the features in the data
 38 | music_df[,c("id","danceability","energy","key","loudness","mode","speechiness",
 39 |          "acousticness","instrumentalness","liveness","valence","tempo",
 40 |          "duration_ms","time_signature","uri","analysis_url")]=0
 41 | 
 42 | 
 43 | get_spotify_features=function(track, artist){
 44 |   songs= try(searchTrack(paste0("track:",track ," artist:",artist),keys),silent = T)
 45 |   if (class(songs)=="try-error"){
 46 |     return(rep(0,length(features_name)))
 47 |   }
 48 |   else{
 49 |   song_id=songs[,"id"][1]
 50 |   features=getFeatures(song_id,keys)
 51 |   return(features)
 52 |   }
 53 | }
 54 | 
 55 | #Two options for this
 56 | #Option 1 Mapply: cleaner code but no intermediate data is saved 
 57 | #so if your internet suddenly goes off, you would have to start from scratch
 58 | 
 59 | #music_df[,17:32]=mapply(get_spotify_features,music_df$title,music_df$artist)
 60 | 
 61 | #Option 2 for loop: slower and less elegant but intermediate data is saved 
 62 | #so if your internet suddenly goes off, you can start from where it was aborted
 63 | for(i in 1:nrow(music_df)){
 64 |   music_df[i,17:32]=  get_spotify_features(track = music_df$title[i],music_df$artist[i])
 65 |   print(i)
 66 | 
 67 | }
 68 | 
 69 | #In order to avoid the code breaking where we cannot find an artist using Spotify's API
 70 | #We initialized all song features and now we want to see if we can get partial matches
 71 | #using first names because usually it is spelling error
 72 | failed_ids = which(music_df$id =="0")
 73 | get_spotify_features1=function(track, artist){
 74 |   artist=str_split(artist," ",simplify = T)[[1]]
 75 |   songs= try(searchTrack(paste0("track:",track ," artist:",artist,"*"),keys),silent = T)
 76 |   if (class(songs)=="try-error"){
 77 |     return(rep(0,length(features_name)))
 78 |   }
 79 |   else{
 80 |     song_id=songs[,"id"][1]
 81 |     features=getFeatures(song_id,keys)
 82 |     return(features)
 83 |   }
 84 | }
 85 | 
 86 | #Update the data for the failed IDs
 87 | for(i in failed_ids){
 88 |   music_df[i,17:32]=  get_spotify_features1(track = music_df$title[i],music_df$artist[i])
 89 |   print(i)
 90 |   
 91 | }
 92 | 
 93 | #Function to get spotify's featured and main artists in a song
 94 | get_featured_artists=function(track, artist){
 95 |   songs= try(searchTrack(paste0("track:",track ," artist:",artist),keys),silent = T)
 96 |   if (class(songs)=="try-error"){
 97 |     return(NA)
 98 |   }
 99 |   else{
100 |     song_id=songs[,"id"][1]
101 |     aboutSong=getTrack(song_id,keys)
102 |     artists=aboutSong$artists
103 |     artists=as.character(artists)
104 |     return(artists)
105 |   }
106 | }
107 | 
108 | ###Get the featured artists in each song###
109 | 
110 | #Initialize column
111 | music_df$artist_with_features=""
112 | 
113 | #Update new column with function output using Option 2: for loop
114 | for (i in 1:nrow(music_df)){
115 |   music_df$artist_with_features[i]=get_featured_artists(music_df$title[i],(music_df$artist[i]))
116 | }
117 | 
118 | #Take out the main artists for the "artists with features" column
119 | remove_main_artist=function(main_artist,artists){
120 |   regex_pattern=paste0("(?<=;).*",main_artist,";|;.*",main_artist,"|",main_artist,".?;")
121 |   new_list=str_replace(artists,regex_pattern,"")
122 |   return(new_list)
123 | }
124 | music_df$artist_with_features=remove_main_artist(music_df$artist,music_df$artist_with_features)
125 | 
126 | music_df$artist_with_features=ifelse(music_df$artist_with_features==music_df$artist,"",music_df$artist_with_features)
127 | 
128 | #Get Images of Artist
129 | get_artist_image=function(artist){
130 |   base_lang=Encoding(artist)
131 |   base_lang=ifelse(base_lang=="unknown","UTF-8",base_lang)
132 |   artist=iconv(artist,from=base_lang,to="ASCII//TRANSLIT")
133 |   artists_tbl= try(searchArtist(artist,keys),silent = T)
134 |   if (class(artists_tbl)=="try-error" ){ 
135 |     #Picture not available image
136 |     return("https://www.tabithaknowel.com/integrated/uploads/2017/05/noPhotoFound.png")
137 |   }
138 |   if (nrow(artists_tbl)==0){
139 |     return("https://www.tabithaknowel.com/integrated/uploads/2017/05/noPhotoFound.png")
140 |   }
141 |   id=artists_tbl$id[1]
142 |   req <- httr::GET(paste0("https://api.spotify.com/v1/artists/", 
143 |                           id), httr::config(token = keys))
144 |   json1 <- httr::content(req)
145 |   no_of_images=length(json1$images)
146 |   if(no_of_images <=0){
147 |     return("https://www.tabithaknowel.com/integrated/uploads/2017/05/noPhotoFound.png")
148 |   }
149 |   image=json1$images[[1]]$url
150 |   return(image)
151 | }
152 | 
153 | 
154 | artistImage=data.frame(artist=character(),image=character(), stringsAsFactors=FALSE)
155 | unique_artists=unique(music_df$artist)
156 | for (i in unique_artists[1:length(unique_artists)]){
157 |   url=get_artist_image(i)
158 |   artist_and_image=c(i,url)
159 |   idx=which(unique_artists %in% i)
160 |   artistImage[idx,]=artist_and_image
161 |   
162 | }
163 | 
164 | music_df=music_df %>%
165 |   left_join(artistImage,by="artist")
166 | 
167 | #Create the decades in the data
168 | music_df = music_df %>% mutate(year_bin= case_when(
169 |   year<1960 ~"50s",
170 |   year<1970 ~"60s",
171 |   year<1980 ~"70s",
172 |   year<1990 ~"80s",
173 |   year<2000 ~"90s",
174 |   year<2010 ~"00s",
175 |   year>=2010 ~"10s"
176 | ))
177 | #Using the semi-colon seperator, we would create each feature as its own column from
178 | #Feature1 to Feature6. Feature6 because that's highest number of features for any
179 | #song in this dataset
180 | music_df_with_features=music_df %>%
181 |   select(artist,title,artist_with_features)%>%
182 |   separate(artist_with_features,paste0("Features",1:6),";")
183 | 
184 | #We will then gather this into an an "artist, featured artist" key-value pair
185 | music_df_with_features=music_df_with_features%>%
186 |   gather(ArtistFeatures,FeaturedArtists,Features1:Features6) %>%
187 |   select(-ArtistFeatures)
188 | 
189 | 
190 | #List of Genres as defined fron Kevin Schaic
191 | genres = list("rock"= c("symphonic rock", "jazz-rock", "heartland rock", "rap rock", "garage rock", "folk-rock", "roots rock", "adult alternative pop rock", "rock roll", "punk rock", "arena rock", "pop-rock", "glam rock", "southern rock", "indie rock", "funk rock", "country rock", "piano rock", "art rock", "rockabilly", "acoustic rock", "progressive rock", "folk rock", "psychedelic rock", "rock & roll", "blues rock", "alternative rock", "rock and roll", "soft rock", "rock and indie", "hard rock", "pop/rock", "pop rock", "rock", "classic pop and rock", "psychedelic", "british psychedelia", "punk", "metal", "heavy metal"),
192 |               "alternative/indie"= c("adult alternative pop rock", "alternative rock", "alternative metal", "alternative", "lo-fi indie", "indie", "indie folk", "indietronica", "indie pop", "indie rock", "rock and indie"),
193 |               "electronic/dance"= c("dance and electronica", "electro house", "electronic", "electropop", "progressive house", "hip house", "house", "eurodance", "dancehall", "dance", "trap"),
194 |               "soul"= c("psychedelic soul", "deep soul", "neo-soul", "neo soul", "southern soul", "smooth soul", "blue-eyed soul", "soul and reggae", "soul"),
195 |               "classical/soundtrack"= c("classical", "orchestral", "film soundtrack", "composer"),
196 |               "pop"= c("country-pop", "latin pop", "classical pop", "pop-metal", "orchestral pop", "instrumental pop", "indie pop", "sophisti-pop", "pop punk", "pop reggae", "britpop", "traditional pop", "power pop", "sunshine pop", "baroque pop", "synthpop", "art pop", "teen pop", "psychedelic pop", "folk pop", "country pop", "pop rap", "pop soul", "pop and chart", "dance-pop", "pop", "top 40"),
197 |               "hip-hop"= c("conscious hip hop", "east coast hip hop", "hardcore hip hop", "west coast hip hop", "hiphop", "southern hip hop", "hip-hop", "hip hop", "hip hop rnb and dance hall", "gangsta rap", "rapper", "rap"),
198 |               "rnb"=c("contemporary r b","rhythm and blues", "contemporary rnb", "contemporary r&b", "rnb", "rhythm & blues","r&b", "blues"),
199 |               "disco"= c("disco"),
200 |               "swing"=  c("swing"),
201 |               "folk"= c("contemporary folk", "folk"),
202 |               "country"= c("country rock", "country-pop", "country pop", "contemporary country", "country"),
203 |               "jazz"= c("vocal jazz", "jazz", "jazz-rock"),
204 |               "religious"= c("christian", "christmas music", "gospel"),
205 |               "blues"= c("delta blues", "rock blues", "urban blues", "electric blues", "acoustic blues", "soul blues", "country blues", "jump blues", "classic rock. blues rock", "jazz and blues", "piano blues", "british blues", "british rhythm & blues", "rhythm and blues", "blues", "blues rock", "rhythm & blues"),
206 |               "reggae"= c("reggae fusion", "roots reggae", "reggaeton", "pop reggae", "reggae", "soul and reggae"))
207 | 
208 | 
209 | #Get Genre of an artist from Spotify
210 | get_artist_genre=function(artist){
211 |   base_lang=Encoding(artist)
212 |   #Takes care of non-UTF characters
213 |   base_lang=ifelse(base_lang=="unknown","UTF-8",base_lang)
214 |   artist=iconv(artist,from=base_lang,to="ASCII//TRANSLIT")
215 |   artists_tbl= try(searchArtist(artist,keys),silent = T)
216 |   if (class(artists_tbl)=="try-error" ){
217 |     return("")
218 |   }
219 |   if (nrow(artists_tbl)==0){
220 |     return("")
221 |   }
222 |   lst1=artists_tbl$genre[1]
223 |   genre_list=str_split(lst1,",")[[1]]
224 |   num_list=NULL
225 |   for (i in 1:length(genres)){
226 |     chk= genre_list%in% genres[i][[1]]
227 |     totchk=sum(chk)
228 |     num_list=append(num_list,totchk)
229 |     
230 |   }
231 |   if (sum(num_list)==0){
232 |     return("")
233 |   }
234 |   idx=which(num_list == max(num_list))
235 | 
236 |   final_genre=names(genres)[idx]
237 |   final_genre=paste0(final_genre,collapse = "&")
238 |   return(final_genre)
239 | }
240 | 
241 | 
242 | #Get genre of main artist i.e. artist that owns the song
243 | unique_main_artists=unique(music_df_with_features$artist)
244 | 
245 | main_genre_df=data.frame(artist=character(),genre=character())
246 | for (i in unique_main_artists){
247 |   gn=get_artist_genre(i)
248 |   lt=list(i,gn)
249 |   lt=data.frame(lt)
250 |   names(lt)=c("artist","genre")
251 |   main_genre_df=rbind(main_genre_df,lt)
252 | }
253 | 
254 | #Like the features, seperate the genres into artist, genre key-value pairs from genre1
255 | #to genre5. genre5 because five is the highest amount of genres one artist is affliated with
256 | main_genre_df=main_genre_df %>% 
257 |   separate(genre,paste0("genre",1:5),"&")
258 | 
259 | #Update the music features data with the genre for the main artists 
260 | music_df_with_features=music_df_with_features%>%
261 |   left_join(main_genre_df,by = "artist")%>%
262 |   gather(xx,main_genre,genre1:genre5)
263 |   
264 | #Filter out rows with no genre information and take out the dummy key column xx created above
265 | music_df_with_features=music_df_with_features%>%
266 |   filter(!is.na(main_genre)) %>%
267 |   select(-xx)
268 | 
269 | #Get Genres for featured artists
270 | unique_featured_artists=unique(music_df_with_features$FeaturedArtists)
271 | 
272 | feat_genre_df=data.frame(artist=character(),genre=character(), stringsAsFactors=FALSE)
273 | 
274 | for (i in unique_featured_artists){
275 |  i= str_replace_all(i,pattern = '[:punct:]',"")
276 |   gn=get_artist_genre(i)
277 |   lt=list(i,gn)
278 |   lt=data.frame(lt)
279 |   names(lt)=c("artist","genre")
280 |   feat_genre_df=rbind(feat_genre_df,lt)
281 | }
282 | 
283 | feat_genre_df=feat_genre_df %>% 
284 |   separate(genre,paste0("genre",1:5),"&")
285 | 
286 | #Also make a key-value pair of main artist to featured artisr
287 | music_df_with_features=music_df_with_features%>%
288 |   left_join(feat_genre_df,by=c("FeaturedArtists"="artist")) %>%
289 |   gather(xx,featured_genre,genre1:genre5)%>%
290 |   filter(!is.na(featured_genre)) %>%
291 |   select(-xx)
292 | 
293 | ################################Topic Modelling######################################
294 | library(topicmodels)
295 | library(tidytext)
296 | lyrics_words=music_df%>%
297 |   unnest_tokens(word,lyrics) %>%
298 |   select(word,year_bin,artist)
299 | 
300 | #Take out stop words
301 | lyrics_words =lyrics_words %>%
302 |   anti_join(stop_words)
303 | 
304 | lyrics_words=lyrics_words %>%
305 |   group_by(word,year_bin) %>%
306 |   summarise(count =n())
307 | 
308 | lyrics_dtm = lyrics_words %>%
309 |   cast_dtm(year_bin,word,count)
310 | 
311 | #Topic Modeling
312 | yearbin_lda <- LDA(lyrics_dtm, k = 2, control = list(seed = 1234))
313 | 
314 | yearbin_lda_tidy = tidy(yearbin_lda)
315 | 
316 | #select Top 5 terms for each topic
317 | top_terms=yearbin_lda_tidy %>%
318 |   group_by(topic)%>%
319 |   top_n(5,beta)
320 |   
321 | 
322 | 
323 | library(ggplot2)
324 | library(hrbrthemes)
325 | 
326 | #Plot Terms
327 | theme_set(theme_bw())
328 | top_terms$topic=paste("Topic", top_terms$topic)
329 | top_terms %>%
330 |   ggplot(aes(reorder(term,-beta), beta)) +
331 |   geom_bar(stat = "identity",aes(fill=as.character(topic)))+
332 |   scale_fill_manual(values = c('Topic 1' = "skyblue3","Topic 2" = "seagreen3"))+
333 |   facet_wrap(~ topic, scales = "free")+
334 |   theme(axis.text.x = element_text(size = 15, angle = 90, hjust = 1))
335 | 
336 | #Get presence of each topic in each decade
337 | year_gamma = tidy(yearbin_lda,matrix="gamma")
338 | topics_df=year_gamma %>%
339 |   arrange(-gamma)
340 |   
341 | 
342 | write_csv(topics_df,"topic_dataset.csv")
343 | 
344 | ##################CLUSTERING#############################
345 | #Prepare the data
346 | 
347 | #Group artists by the median of all their song features
348 | cols=c(1:2,4:8,9,10:14,16:28,31)
349 | 
350 | firstDf=music_df %>%
351 | mutate(year_bin=as.character(year_bin))%>%
352 | select(-lyrics,-title) %>%
353 | select(cols)%>%
354 | group_by(artist) %>%
355 | summarise_if(is.numeric,median)
356 | 
357 | #Function to return mode of a vector
358 | Mode <- function(x) {
359 | ux <- unique(x)
360 | ux[which.max(tabulate(match(x, ux)))]
361 | }
362 | 
363 | #Group artists by the decade the exist in the most 
364 | secondDf=music_df %>%
365 | mutate(year_bin=as.character(year_bin))%>%
366 | select(-lyrics,-title) %>%
367 | group_by(artist) %>%
368 | summarise(year_bin=Mode(year_bin)) %>%
369 | as_tibble()
370 | 
371 | #Join both tables
372 | artist_df=firstDf %>%
373 | inner_join(secondDf,by="artist")
374 | 
375 | library(h2o)
376 | h2o.init()
377 | 
378 | 
379 | data = artist_df %>% as.h2o()
380 | 
381 | #Spilt frame into train and validation set
382 | splits = h2o.splitFrame(data,ratios = 0.7,destination_frames = c("train", "valid"), seed = 1234)
383 | train = h2o.getFrame("train")
384 | val = h2o.getFrame("valid")
385 | 
386 | #Column indices for all song features
387 | song_features=2:24
388 | 
389 | #create k-means model
390 | knn_model=h2o.kmeans(train,song_features,validation_frame = val,k=2)
391 | 
392 | #Update full dataset with knn prediction
393 | data$cluster=predict(knn_model,data)
394 | data$year_bin=h2o.asfactor(data$year_bin)
395 | 
396 | #View tabular results
397 | h2o.table(data$year_bin,data$cluster)
398 | 
399 | #Make H2O frame a dataframe and update cluster names
400 | cluster_df=as.data.frame(data)
401 | cluster_df=cluster_df %>%
402 |   mutate(cluster=ifelse(cluster==0,"String Lover","Poetic"))
403 | 
404 | #Update music dataset
405 | music_df=music_df%>%
406 |   left_join((cluster_df%>%
407 |               select(artist,cluster)),by="artist")
408 | 
409 | #Find centers between both clusters
410 | ctrs = h2o.centers(knn_model)
411 | ctrs = as.data.frame(ctrs)
412 | 
413 | #Calculate % difference between song features in each cluster
414 | ctrs[3,]=abs((ctrs[2,]-ctrs[1,])/ctrs[1,])
415 | 
416 | 
417 | 
418 | 
419 | 
420 | 
421 | ######################
422 | data = music_df %>%
423 |   select(-lyrics,-title) %>%
424 |   as.h2o()
425 | splits = h2o.splitFrame(data,ratios = c(0.6,0.2),destination_frames = c("train", "valid", "test"), seed = 1234)
426 | train = h2o.getFrame("train")
427 | features=c(1:2,4:8,10:14,16:28)
428 | test = h2o.getFrame("test")
429 | val = h2o.getFrame("valid")
430 | features=c(1:2,4:8,10:14,16:28)
431 | knn_model=h2o.kmeans(train,features,validation_frame = val,k=7)
432 | predict(knn_model,train)
433 | data$cluster=predict(knn_model,train)
434 | 
435 | write_csv(music_df,"music_df.csv")
436 | write_csv(music_df_with_features,"features_dataset.csv")
437 | 


--------------------------------------------------------------------------------
/Consitent Billionaire Guide/app/app.R:
--------------------------------------------------------------------------------
  1 | library(h2o)
  2 | library(shiny)
  3 | library(shinydashboard)
  4 | library(shinyjs)
  5 | library(shinyBS)
  6 | library(shinythemes)
  7 | library(plyr)
  8 | library(dplyr)
  9 | library(scales)
 10 | library(lubridate)
 11 | library(shinysky)
 12 | library(stringr)
 13 | library(lime)
 14 | library(ggplot2)
 15 | 
 16 | 
 17 | dat = read.csv("billionaire_data_for_ml.csv")
 18 | 
 19 | model_type.H2OMultinomialModel <<- function(x, ...)
 20 |   "classification"
 21 | 
 22 | predict_model.H2OMultinomialModel <<-
 23 |   function(x, newdata, type, ...) {
 24 |     # Function performs prediction and returns dataframe with Response
 25 |     #
 26 |     # x is h2o model
 27 |     # newdata is data frame
 28 |     # type is only setup for data frame
 29 |     
 30 |     pred <- h2o.predict(x, as.h2o(newdata))
 31 |     
 32 |     # return classification probabilities only
 33 |     return(as.data.frame(pred[, -1]))
 34 |     
 35 |   }
 36 | 
 37 | 
 38 | Countries = levels(dat$Country)
 39 | Sectors = levels(dat$Sector)
 40 | Relations = levels(dat$relation)
 41 | busyIndicators <-
 42 |   function(text = "Calculation in progress..",
 43 |            img = "shinysky/busyIndicator/ajaxloaderq.gif",
 44 |            wait = 1000) {
 45 |     tagList(
 46 |       singleton(tags$head(
 47 |         tags$link(rel = "stylesheet", type = "text/css", href = "busyIndicator.css")
 48 |       ))
 49 |       ,
 50 |       div(class = "shinysky-busy-indicator", p(text), img(src = img))
 51 |       ,
 52 |       tags$script(
 53 |         sprintf(
 54 |           "	setInterval(function(){
 55 |           if ($('html').hasClass('shiny-busy')) {
 56 |           setTimeout(function() {
 57 |           if ($('html').hasClass('shiny-busy')) {
 58 |           $('div.shinysky-busy-indicator').show()
 59 |           }
 60 |           }, %d)
 61 |           } else {
 62 |           $('div.shinysky-busy-indicator').hide()
 63 |           }
 64 |   },100)
 65 |           ",
 66 |           wait
 67 |       )
 68 |       )
 69 |       )
 70 |     }
 71 | ui = shinyUI(
 72 |   navbarPage(
 73 |     "Billion Dollar Questions",
 74 |     inverse = F,
 75 |     collapsible = T,
 76 |     fluid = T,
 77 |     theme = shinytheme("flatly"),
 78 |     tabPanel(
 79 |       "What Type of Billionaire Are You?",
 80 |       icon = icon("money"),
 81 |       sidebarLayout(
 82 |         position = 'left',
 83 |         
 84 |         sidebarPanel(
 85 |           id = "sidebar",
 86 |           selectizeInput(
 87 |             "Country",
 88 |             label = h4("What country are you from?", style = "font-size: 10pt;font-family: 'Raleway';text-transform: uppercase;"),
 89 |             choices = Countries
 90 |           ),
 91 |           numericInput(
 92 |             'Age',
 93 |             h4("How Old Are You?", style = "font-size: 10pt;font-family: 'Raleway';text-transform: uppercase;"),
 94 |             value = 20,
 95 |             min = 12,
 96 |             max = 100
 97 |           ),
 98 |           selectizeInput(
 99 |             "selfMade",
100 |             h4("Do you have your own company (or plan to have one)?", style = "font-size: 10pt;font-family: 'Raleway';text-transform: uppercase;"),
101 |             choices = c("Yes", "No")
102 |           ),
103 |           selectizeInput(
104 |             "relation",
105 |             h4(
106 |               "Choose one below that best describes your role in the business:",
107 |               style = "font-size: 10pt;font-family: 'Raleway';text-transform: uppercase;"
108 |             ),
109 |             choices = Relations
110 |           ),
111 |           
112 |           numericInput(
113 |             "founding_year",
114 |             h4("When was/will this business (be) established?", style = "font-size: 10pt;font-family: 'Raleway';text-transform: uppercase;"),
115 |             value = 1999,
116 |             min = 1600,
117 |             max = year(Sys.Date()) + 10
118 |           ),
119 |           selectizeInput(
120 |             "Sector",
121 |             h4("What Sector is this business?", style = "font-size: 10pt;font-family: 'Raleway';text-transform: uppercase;"),
122 |             choices = Sectors
123 |           ),
124 |           selectizeInput(
125 |             "Bsc",
126 |             h4("Do you have a Bachelor's Degree?", style = "font-size: 10pt;font-family: 'Raleway';text-transform: uppercase;"),
127 |             choices = c("Yes", "No")
128 |           ),
129 |           selectizeInput(
130 |             "MBA",
131 |             h4("Do you have an MBA?", style = "font-size: 10pt;font-family: 'Raleway';text-transform: uppercase;"),
132 |             choices = c("Yes", "No")
133 |           ),
134 |           div(
135 |             conditionalPanel(
136 |               "!$('html').hasClass('shiny-busy')",
137 |               actionButton(
138 |                 "run",
139 |                 div("PREDICT", icon("flask"), style = "text-align:center;font-size:10pt;width: 150px;"),
140 |                 styleclass = "success",
141 |                 size = "mini",
142 |                 css.class = "animated infinite rubberBand"
143 |               )
144 |             ),
145 |             style = "text-align:center;"
146 |           )
147 |           
148 |           
149 |         ),
150 |         mainPanel(
151 |           position = "left",
152 |           
153 |           
154 |           tags$head(
155 |             tags$link(rel = "stylesheet", type = "text/css", href = "animate.min.css")
156 |           ),tags$head(
157 |             tags$link(rel = "stylesheet", type = "text/css", href = "style.css")
158 |           ),
159 |           bsModal(
160 |             id = "startupMessage",
161 |             trigger = '',
162 |             size = 'large',
163 |             HTML(
164 |               '
165 |               
166 |               <div class="row animated wobble" style="padding-right:5%;padding-left:5%;">
167 |               <div class="col s12 m12">
168 |               <div style="background:#009688; color:white;font-size:12pt;font-weight:200;box-shadow: 0 1px 3px rgba(0,0,0,0.12), 0 1px 2px rgba(0,0,0,0.24);">
169 |               <div class="card-content white-text" style="padding:10px;">
170 |               
171 |               
172 |               <p>Have you ever wondered what sort of billionaire you would end up as? A Consistent one, A Hustler or a Ghost?
173 |               Learn a bit more about it on the link below and predict the one you could be with this simple app!</p>
174 |               </div>
175 |               <div style="background-color:#ffffff;">
176 |               <div class="card-action">
177 |               <a style="color:#00897b ; text-transform:uppercase;font-weight:400;font-size:3.7vh" href="http://theartandscienceofdata.wordpress.com/blog" target"_blank">The Art and Science of Data</a>
178 |               
179 |               </div>
180 |               
181 |               </div>
182 |               
183 |               </div>
184 |               </div>
185 |               </div>
186 |               <div class="animated wobble" style="text-align:center;"><button type="button"id="close-button" class="btn btn-default animated swing infinite" data-dismiss="modal" style="
187 |               text-align: center; margin-top:20px;;width:150px;height:55px;border:none;border-radius:0;background-color:#00897b;
188 |               ">GET STARTED!</button></div>'
189 |             )
190 |             ),
191 |                     busyIndicators(
192 |             text = h4("Running Model...", style = "font-size: 40px; font-family:Papyrus;"),
193 |             img = "Loading2.gif"
194 |           ),
195 |           
196 |           conditionalPanel(
197 |             "!$('html').hasClass('shiny-busy')",
198 |             textOutput("prefix_final"),
199 |             div(imageOutput("image_final"), style =
200 |                   "text-align: center;"),
201 |             textOutput("prediction_final")
202 |           ),
203 |           br(),
204 |           
205 |           
206 |           conditionalPanel(
207 |             "typeof output.image_final !== 'undefined' &&!$('html').hasClass('shiny-busy')",
208 |             div(uiOutput("urlInput"), style = "text-align:center;")
209 |           ),
210 |           br()
211 |           # 
212 |           # ,conditionalPanel("!$('html').hasClass('shiny-busy')",
213 |           #                  plotOutput("limePlot"))
214 |           
215 |             )
216 |           )
217 |             ),
218 |     
219 |     tabPanel("About", icon = icon("info"),
220 |              
221 |              box(
222 |                width = 12,
223 |                title = h2(strong(
224 |                  "The Art and Science of Data", img(src = "Typing.gif", height = 60)
225 |                ), style = "font-size: 36pt;text-align:center; font-family:Papyrus; color:#009688;"),
226 |                br(),
227 |                div(
228 |                  "This application was humbly created by me (Rosebud Anwuri). It's purely for fun and in no way guarantees you'd become a billionaire (I'm sorry). If you like this and would like to see more of the stuff I work on, you can visit my blog here:",
229 |                  a(
230 |                    "The Art and Science of Data.",
231 |                    style = "font-size: 16pt; display: inline;color:#009688;",
232 |                    href = "http://theartandscienceofdata.wordpress.com",
233 |                    target = "_blank"
234 |                  ),
235 |                  "My blog is mostly focused on the application of Data Science and aims to portray just how relevant Data Science can be in our everyday lives and how much fun it is! Feel free to add a comment or drop me a message for questions or suggestions. Thank you!",
236 |                  style = "font-size: 14pt; font-family:Helvetica;"
237 |                )
238 |              ))
239 |           )
240 |         )
241 | 
242 | 
243 | 
244 | 
245 | 
246 | 
247 | server = shinyServer(function(input, output, session) {
248 |   toggleModal(session, "startupMessage", toggle = "open")
249 |   h2o.init()
250 |   model = h2o.loadModel(
251 |     "local_model"
252 |   )
253 |   feature_processing = function() {
254 |     
255 |     self_made = ifelse(input$selfMade == "Yes", "Y", "N")
256 |     year_born = year(Sys.Date()) - input$Age
257 |     age_at_start = input$founding_year - year(Sys.Date()) + input$Age
258 |     bachelors_degree = ifelse(input$Bsc == "Yes", "Y", "N")
259 |     MBA = ifelse(input$MBA == "Yes", "Y", "N")
260 |     relationship = ifelse(input$selfMade == "Yes", input$relation, input$buffer)
261 |     data_list = list(
262 |       input$Country,
263 |       self_made,
264 |       bachelors_degree,
265 |       MBA,
266 |       input$Sector,
267 |       input$founding_year,
268 |       relationship,
269 |       age_at_start
270 |     )
271 |     names(data_list) = model@parameters$x
272 |     return(data_list)
273 |   }
274 |   category_predictors = function() {
275 |     df = feature_processing()
276 |     
277 |     
278 |     data_list = as.h2o(df)
279 |     prediction = predict(model, data_list)$predict
280 |     prediction = as.vector(prediction)
281 |     prediction = sub("The", "A", paste(prediction, "Billionaire"))
282 |     
283 |     return(prediction)
284 |   }
285 |   # lime_explainer = function() {
286 |   #   df = feature_processing()
287 |   #   df_lime = as.data.frame(df)
288 |   #   explainer <- lime::lime(dat,
289 |   #                           model          = model,
290 |   #                           bin_continuous = T)
291 |   #   explanation <- lime::explain(
292 |   #     df_lime,
293 |   #     explainer    = explainer,
294 |   #     n_labels     = 1,
295 |   #     n_features   = 8,
296 |   #     kernel_width = 0.5
297 |   #   )
298 |   #   pref = explanation %>%
299 |   #     top_n(3, feature_weight) %>%
300 |   #     select(feature, feature_desc, feature_value) %>%
301 |   #     mutate(feature_desc = ifelse(
302 |   #       feature %in% c("founding_year", "Age.at.start"),
303 |   #       feature_value,
304 |   #       str_split(feature_desc, "[=]", simplify = T)[, 2] %>% str_trim(.)
305 |   #     )) %>%
306 |   #     mutate(prefix = ifelse(
307 |   #       feature == "Country",
308 |   #       paste("Coming from", feature_desc),
309 |   #       ifelse(
310 |   #         feature == "Sector",
311 |   #         paste("Working in the", feature_desc, "Sector"),
312 |   #         ifelse(
313 |   #           feature == "relation",
314 |   #           paste("Being a", feature_desc),
315 |   #           ifelse(
316 |   #             feature %in% c("Bachelors.Degree", "MBA1") &
317 |   #               feature_value == 2,
318 |   #             paste(
319 |   #               "Having a(n)",
320 |   #               str_replace(feature, "[\\.1]", " ") %>% str_trim()
321 |   #             ),
322 |   #             ifelse(
323 |   #               feature %in% c("Bachelors.Degree", "MBA1") &
324 |   #                 feature_value == 1,
325 |   #               paste(
326 |   #                 "Not having a(n)",
327 |   #                 str_replace(feature, "[\\.1]", " ") %>% str_trim()
328 |   #               ),
329 |   #               ifelse(
330 |   #                 feature == "Self.Made" & feature_value == 1,
331 |   #                 "Not being self-made",
332 |   #                 ifelse(
333 |   #                   feature == "Self.Made" & feature_value == 2,
334 |   #                   "Being self-made",
335 |   #                   ifelse(
336 |   #                     feature == "founding_year",
337 |   #                     paste("having a business that started in", feature_value),
338 |   #                     ifelse(
339 |   #                       feature == "Age.at.start",
340 |   #                       paste(
341 |   #                         "Being",
342 |   #                         feature_value,
343 |   #                         "years old when the business was founded"
344 |   #                       ) ,
345 |   #                       ""
346 |   #                     )
347 |   #                   )
348 |   #                 )
349 |   #               )
350 |   #             )
351 |   #           )
352 |   #         )
353 |   #       )
354 |   #     ))
355 |   #   text_explainer = paste(pref$prefix, collapse = ",")
356 |   #   text_explainer = sub(",\\s*(?=[^,]+$)", " and ", text_explainer, perl = T)
357 |   #   pred = str_replace_all(explanation$label[1], "\\.", " ")
358 |   #   pred = paste(str_replace_all(pred, "The", "A"), "Billionaire.")
359 |   #   
360 |   #   text_explainer = paste(text_explainer, "all support you being", pred)
361 |   #   text_explainer = strwrap(text_explainer, 0.9 * getOption("width"))
362 |   #   text_explainer = paste(text_explainer, collapse = "\n")
363 |   #   
364 |   #   return(
365 |   #     plot_features(explanation) +
366 |   #       labs(title = "Feature Importance Visualisation",
367 |   #            subtitle = text_explainer) +
368 |   #       theme(text = element_text(size = 16))
369 |   #   )
370 |   # }
371 |   # lime_plot = eventReactive(input$run, {
372 |   #   lime_explainer()
373 |   # })
374 |   # output$limePlot = renderPlot({
375 |   #   lime_plot()
376 |   # })
377 |   prediction_reactive = eventReactive(input$run, {
378 |     paste(category_predictors())
379 |   })
380 |   output$prediction_final = renderText({
381 |     prediction_reactive()
382 |   })
383 |   output$urlInput = renderUI({
384 |    div( a(
385 |       p(icon("twitter"), "SHARE ON TWITTER", style = "padding-top:5%"),
386 |       type = "button",
387 |       class = "waves-effect waves-light btn shiny-material-button z-depth-5 animated infinite pulse" ,
388 |       style = "fontweight:600; background-color: #4099FF; color: #FFFFFF; border-radius: 0;border-width: 0;padding:5px 10px",
389 |       target = "_blank",
390 |       href = paste0(
391 |         sprintf(
392 |           'https://twitter.com/intent/tweet?text=I+am+going+to+be+%s',
393 |           str_replace_all(prediction_reactive(), " ", "+")
394 |         ),
395 |         "!+Check+yours+out+at:&url=https://goo.gl/jFsjbD"
396 |       )
397 |     ),
398 |     a(
399 |       p(icon("facebook"), "SHARE ON FACEBOOK", style = "padding-top:5%;"),
400 |       type = "button",
401 |       class = "waves-effect waves-light btn shiny-material-button z-depth-5 animated infinite pulse" ,
402 |       style = "margin-left:5%;fontweight:600; background-color: #3b5998; color: #FFFFFF; border-radius: 0;border-width: 0;padding:5px 10px",
403 |       target = "_blank",
404 |       href = paste0(
405 |         sprintf(
406 |           'https://www.facebook.com/dialog/share?app_id=145634995501895&display=popup&quote=I am going to be %s',
407 |           str_replace_all(prediction_reactive(), " ", "+")
408 |         ),
409 |         "!+Check+yours+out+at: https://goo.gl/jFsjbD&href=https%3A%2F%2Ftheartandscienceofdata.shinyapps.io%2Fbilliondollarquestions%2F"
410 |       )
411 |         
412 |       
413 |     )
414 |    )
415 |   })
416 |   
417 |   
418 |   result_prefix = function(prediction) {
419 |     prefix = ifelse(
420 |       prediction == "A Consistent Billionaire",
421 |       "Congratulations! You Are:",
422 |       ifelse(
423 |         prediction == "A Ghost Billionaire",
424 |         "Oops! Looks like You're going be:",
425 |         "Hmmmm... Looks like You're going to be:"
426 |       )
427 |     )
428 |     return(prefix)
429 |   }
430 |   prefix_reactive = eventReactive(input$run, {
431 |     result_prefix(prediction_reactive())
432 |   })
433 |   output$prefix_final = renderText({
434 |     prefix_reactive()
435 |   })
436 |   
437 |   image_per_prediction = function(prediction) {
438 |     image = ifelse(
439 |       prediction == "A Consistent Billionaire",
440 |       "www/The Consistent.PNG",
441 |       ifelse(
442 |         prediction == "A Ghost Billionaire",
443 |         "www/The Ghost.PNG",
444 |         "www/The Hustler.PNG"
445 |       )
446 |     )
447 |     return(image)
448 |   }
449 |   
450 |   image_reactive = eventReactive(input$run, {
451 |     image_per_prediction(prediction_reactive())
452 |   })
453 |   output$image_final = renderImage({
454 |     list(src = image_reactive())
455 |   }, deleteFile = F)
456 |   
457 |   
458 |   output$niceGIF = renderImage({
459 |     list(src = "www/Typing.gif")
460 |   }, deleteFile = F)
461 |   
462 |   
463 | })
464 | 
465 | 
466 | shinyApp(ui = ui, server = server)


--------------------------------------------------------------------------------
/Football Analysis/data/epl_data.csv:
--------------------------------------------------------------------------------
  1 | "Team","Pl","W","D","L","F","A","GD","Pts","Year","rank","Cummulative.Pts","Cummulative.rank","Goals","Shots.pg","Discipline","Possession.","PassSuccess.","AerialsWon","Open.Play","Counter.Attack","Set.Piece","Penalty","Own.Goal","Left.Side","Middle.of.the.pitch","Right.Side","Shots.conceded.pg","Tackles.pg","Interceptions.pg","Fouls.pg","Offsides.pg","Shots.OT.pg","Dribbles.pg","Fouled.pg","Cross.pg","Through.Ball.pg","Long.Balls.pg","Short.Passes.pg","Fouls","Unprofessional","Dive","Other"
  2 | "Arsenal",38,23,6,9,83,41,42,68,2009,4,75,3,83,17.4,561,57,80.8,14.4,49,9,16,3,6,0.32,0.3,0.38,10.1,21.5,22.1,12.4,2.4,6.1,11.9,14,27,9,56,459,31,3,4,19
  3 | "Arsenal",38,21,8.5,8.5,77.5,42,35.5,70,2010,3,71.5,3.5,77.5,17.3,608.5,57.35,82.1,14.05,51.5,6,12.5,3.5,4,0.315,0.32,0.365,10.35,20.9,20.4,12.15,2.4,6.2,11.2,12.55,25.5,9.5,50.5,475.5,41,3.5,2,18.5
  4 | "Arsenal",38,20,9,9,73,46,27,73,2011,4,69,3.5,73,17,650,57.7,83.75,12.15,54,6.5,8.5,3,1,0.305,0.325,0.365,10.45,20,17.3,11.3,2.35,6.25,10.15,10.95,24,9,48,496.5,51,5,0.5,15.5
  5 | "Arsenal",38,21,8.5,8.5,73,43,30,79,2012,4,71.5,3.5,73,16.25,534.5,56.95,84.65,13.7,49.5,8.5,10,3.5,1.5,0.32,0.295,0.375,10.45,19.7,16.3,10.4,2.2,5.8,10.25,10.8,24.5,7,52.5,505.5,44.5,4,1,10.5
  6 | "Arsenal",38,22.5,8.5,7,70,39,31,75,2013,3,76,4,70,14.75,479.5,55.95,85.35,17.35,49.5,5,10,4,1.5,0.33,0.28,0.385,11.25,19.25,15.5,9.65,2,5.5,9.85,10.7,22.5,5,54.5,517,39.5,2.5,0.5,11
  7 | "Arsenal",38,23,8,7,69.5,38.5,31,71,2014,2,77,3.5,69.5,14.95,608,55.7,84.65,18.85,49.5,2.5,12,4.5,1,0.33,0.3,0.37,11.3,19.1,17.25,9.55,1.8,5.8,12.4,10.8,21.5,4.5,50.5,509,48.5,3.5,0.5,12
  8 | "Arsenal",38,21,10,7,68,36,32,75,2015,5,73,2.5,68,15.6,543,56.3,84,17.5,45,2.5,14.5,3.5,2.5,0.355,0.31,0.335,11.25,19,19.8,9.55,2,5.8,14.4,11.2,21.5,4.5,47.5,503.5,43.5,4,0.5,10.5
  9 | "Arsenal",38,21.5,8.5,8,71,40,31,NA,2016,NA,73,3.5,69,15,543.5,56.7,84,15.95,50,2.5,12.5,2.5,3.5,0.36,0.31,0.325,11.6,18.15,16.8,9.85,2.4,5.45,12.55,10.8,19,3.5,51,514,39.5,2.5,0,16.5
 10 | "Aston Villa",38,17,13,8,52,39,13,48,2009,9,64,6,52,13.1,592,48.5,71.8,18.8,23,4,16,5,4,0.4,0.26,0.35,16,23.2,18.2,13.4,2.1,4.2,4.9,11.4,27,2,65,294,46,4,1,12
 11 | "Aston Villa",38,14.5,12.5,11,50,49,1,38,2010,16,56,7.5,50,13.2,652,48.8,72.6,17.65,25.5,4,12.5,4.5,3.5,0.375,0.265,0.365,15.9,22.7,17.9,12.85,2.25,4.25,5.4,11.65,26.5,2.5,64,315,49.5,3.5,0.5,14.5
 12 | "Aston Villa",38,9.5,14.5,14,42.5,56,-13.5,41,2011,15,43,12.5,42.5,12.4,707,47.4,72.8,15,29,2.5,6,3.5,1.5,0.365,0.275,0.36,15.5,21.3,18.55,11.6,2.3,3.95,5.75,10.65,24,2.5,63,316.5,54.5,3,0.5,15
 13 | "Aston Villa",38,8.5,14,15.5,42,61,-19,38,2012,15,39.5,15.5,42,11.5,707.5,46.4,74.7,16.2,29,3.5,6.5,3,0,0.375,0.275,0.35,15.95,19.9,19.9,11.45,2.4,3.65,6.1,9.25,20,2,62.5,312.5,54,3,1,17
 14 | "Aston Villa",38,10,9.5,18.5,43,65,-22,38,2013,17,39.5,15,43,11.4,746.5,46,75.2,19.1,26.5,5,8.5,2.5,0.5,0.365,0.285,0.35,14.85,19.55,16.15,12.45,2.4,3.65,6.4,9.15,19,1.5,66.5,317.5,56.5,3,1,17
 15 | "Aston Villa",38,10,8,20,35,59,-24,17,2014,20,38,16,35,11.15,743.5,47,76.1,18.9,21.5,3,8.5,1.5,0.5,0.365,0.28,0.355,12.8,18.05,12.35,11.75,2.05,3.45,6.9,10.15,20,0.5,72.5,336.5,56,4.5,0.5,18
 16 | "Aston Villa",38,6.5,8,23.5,29,66.5,-37.5,NA,2015,NA,27.5,18.5,29,10.5,730,48.2,78.4,18.8,16,1.5,8.5,1.5,1.5,0.37,0.27,0.36,12.9,17.8,16.45,10.95,1.5,3.1,9.35,10.35,20.5,0,70.5,353.5,53,6.5,0,20
 17 | "Birmingham",38,13,11,14,38,47,-9,39,2009,18,50,9,38,11.9,741,47.7,67.2,16.9,17,4,14,2,1,0.36,0.26,0.38,17.3,20.7,20.4,11.9,2.6,4.1,6.3,13.2,19,3,76,293,44,4,1,27
 18 | "Birmingham",38,10.5,13,14.5,37.5,52.5,-15,NA,2010,NA,44.5,13.5,37.5,11.25,657,47.8,68.9,16.9,17.5,2.5,15,2,0.5,0.355,0.265,0.38,17.7,19.15,17.75,11.65,2.5,3.65,5.15,12.5,20,2.5,79,297.5,45,5,1.5,17
 19 | "Blackburn",38,13,11,14,41,55,-14,43,2009,15,50,10,41,12.9,572,45.9,63.8,21.2,13,2,21,3,2,0.38,0.32,0.3,13.4,18.7,19.2,14.8,3.3,4.2,4.4,12.1,20,2,68,254,42,7,1,11
 20 | "Blackburn",38,12,10.5,15.5,43.5,57,-13.5,31,2010,19,46.5,12.5,43.5,12.4,613.5,44.95,64,18.45,17.5,1.5,19.5,2,3,0.39,0.315,0.3,14.8,19.2,19.25,13.95,2.8,3.9,5,11.7,21,1.5,66.5,248,45.5,5,1.5,14
 21 | "Blackburn",38,9.5,8.5,20,47,68.5,-21.5,NA,2011,NA,37,17,47,11.9,660,43.9,67.7,12.55,20.5,1.5,18.5,2.5,4,0.39,0.32,0.29,16.2,18.95,19.1,12.6,1.95,3.7,5.9,10.45,19,1.5,66,258,51,3.5,1,16.5
 22 | "Blackpool",38,10,9,19,55,78,-23,NA,2010,NA,39,19,55,14,472,49.9,75.4,13.5,27,3,17,7,1,0.36,0.32,0.32,19.4,20.5,11.9,11.6,2.6,4.2,7,10.5,22,2,80,335,38,2,0,9
 23 | "Bolton",38,10,9,19,42,67,-25,46,2009,14,39,14,42,14.7,745,45.1,63.7,20.1,18,0,20,4,0,0.32,0.33,0.35,16.2,24,19.3,14.8,1.5,4.3,5.2,12.3,23,3,71,240,60,3,2,17
 24 | "Bolton",38,11,9.5,17.5,47,61.5,-14.5,36,2010,18,42.5,14,47,14.85,710,46.25,65.15,20.5,23.5,1.5,17,4,1,0.32,0.325,0.355,16,24.35,17.9,14.15,1.5,4.45,5.55,11.65,22.5,3,72.5,256.5,61,4.5,1,11.5
 25 | "Bolton",38,11,8,19,49,66.5,-17.5,NA,2011,NA,41,16,49,14,590,47.45,68.85,16.85,26.5,1.5,13.5,4.5,3,0.325,0.31,0.365,16.55,21.95,17.55,12.85,1.45,4.45,6,10.15,21,3,71,288,50.5,3.5,0.5,10.5
 26 | "Bournemouth",38,11,9,18,45,67,-22,46,2015,9,42,16,45,12.2,531,51,79.8,15.3,28,0,12,3,2,0.37,0.24,0.39,11.6,18.6,16.8,9.5,1.7,3.7,10.9,10.4,20,1,67,394,39,3,0,12
 27 | "Bournemouth",38,11.5,9.5,17,50,67,-17,NA,2016,NA,44,12.5,51.5,12.05,527,51.15,80.55,14.85,30,1,12,5,2,0.37,0.25,0.385,13.05,17.25,14.45,9.6,1.55,3.95,11.1,11.5,18.5,1,66.5,392.5,37,3,0,15.5
 28 | "Burnley",38,8,6,24,42,82,-40,33,2009,19,30,18,42,12.1,572,49.2,68.7,15.2,30,2,4,6,0,0.31,0.25,0.44,16.7,18.1,17.4,12.1,1.7,4,6,12.2,22,2,73,297,52,0,0,8
 29 | "Burnley",38,7.5,9,21.5,35,67.5,-32.5,40,2014,16,31.5,18.5,35,11.7,607,46.6,69.55,20.15,24.5,1,5.5,3.5,0.5,0.305,0.27,0.425,16.15,17.1,16.15,11.4,1.7,3.65,6.2,11.35,21.5,1,77,290,52.5,0.5,1,9.5
 30 | "Burnley",38,9,9.5,19.5,33.5,54,-20.5,NA,2016,NA,36.5,17.5,34,10.8,647,43.35,69.1,24.55,19,1,9.5,3.5,0.5,0.325,0.305,0.37,16.65,16.4,14.5,11,2.2,3.25,5.55,10.6,19,0.5,83,266.5,57,0.5,1,9
 31 | "Cardiff",38,7,9,22,32,74,-42,NA,2013,NA,30,20,32,11,491,45.9,75.4,18.5,16,0,13,1,2,0.29,0.27,0.43,18.2,16.3,13,9.1,2.1,3.3,8.3,9.7,21,1,64,321,30,5,1,15
 32 | "Chelsea",38,27,5,6,103,32,71,71,2009,2,86,1,103,21.9,545,56.9,82.5,11.6,63,8,17,11,4,0.36,0.33,0.31,9.8,20.3,18.6,12.4,2.5,7.6,8.6,12.3,29,7,61,444,39,3,0,20
 33 | "Chelsea",38,24,6.5,7.5,86,32.5,53.5,64,2010,6,78.5,1.5,86,20.75,568,56.45,82.75,12.15,51.5,6,17.5,8.5,2.5,0.37,0.32,0.31,10.7,20.25,17.4,12,2.75,7,9.35,11.8,27.5,7,61,449.5,43.5,2.5,0,15
 34 | "Chelsea",38,19.5,9,9.5,67,39.5,27.5,75,2011,3,67.5,4,67,18.65,667.5,55.1,83.75,10.5,41.5,3,16,5,1.5,0.38,0.3,0.325,11.7,19.3,16.45,11.45,2.4,6.15,8.8,11.15,26,5.5,61,449.5,53.5,3.5,1,11.5
 35 | "Chelsea",38,20,9.5,8.5,70,42.5,27.5,82,2012,3,69.5,4.5,70,17.1,628.5,53.55,83.6,10.9,44,2.5,14.5,6.5,2.5,0.355,0.3,0.35,12,18.55,14.95,10.9,1.95,5.75,7.4,11.45,24.5,2.5,60.5,441,45.5,3.5,4,14.5
 36 | "Chelsea",38,23.5,8,6.5,73,33,40,87,2013,1,78.5,3,73,17.35,543,53.4,82.7,16.4,44.5,4,15,7,2.5,0.34,0.305,0.355,11.25,18.95,11.5,10.85,2.45,5.8,9.35,11.85,22,1,61,435,34.5,3,4,17.5
 37 | "Chelsea",38,25.5,8,4.5,72,29.5,42.5,50,2014,10,84.5,2,72,16.5,653.5,54,82.95,18.95,48,3,15,4.5,1.5,0.365,0.305,0.335,10.6,19.6,9.75,10.65,2.4,5.75,12.15,11.6,19.5,1.5,61.5,454,41.5,6,3.5,19
 38 | "Chelsea",38,19,11.5,7.5,66,42.5,23.5,93,2015,1,68.5,5.5,66,14.3,659.5,54.25,82.85,16.35,45.5,1,13,4,2.5,0.38,0.305,0.32,11.8,20.4,11.65,10.35,2.2,5.05,12.6,12,19,2,61.5,468.5,44,8,2.5,17
 39 | "Chelsea",38,21,8.5,8.5,72,43,29,NA,2016,NA,71.5,5.5,71.5,14.55,652.5,54.2,83.2,14.15,45.5,4,16.5,3,3,0.365,0.305,0.335,10.6,19,13.6,10.5,2.05,5,12.8,12.45,19,2.5,62.5,465.5,46.5,6,0,16
 40 | "Crystal Palace",38,13,6,19,33,48,-15,48,2013,10,45,11,33,10.9,582,41.1,70.2,23.2,17,2,8,5,1,0.33,0.27,0.39,13.9,22.3,18.3,10.8,1.9,3.8,7.7,9.7,19,1,60,263,40,3,2,15
 41 | "Crystal Palace",38,13,7.5,17.5,40,49.5,-9.5,42,2014,15,46.5,10.5,40,11.25,608,41.9,69.8,23.45,19.5,1,13.5,5,1,0.36,0.265,0.37,13.85,21.55,17.05,12.35,1.95,3.75,8.55,10.55,20.5,1,64.5,257,42.5,2.5,2,18.5
 42 | "Crystal Palace",38,12,9,17,43,51,-8,41,2015,14,45,12.5,43,11.95,618,44.75,71.9,20.75,20.5,0,17,4.5,1,0.39,0.25,0.36,14.35,18.95,17.1,13.15,2.05,3.85,9.85,11.35,24,0.5,71.5,272,45.5,2,1,18.5
 43 | "Crystal Palace",38,11.5,7,19.5,44.5,57,-12.5,NA,2016,NA,41.5,14.5,43.5,11.95,686,46.8,73.9,19.85,25.5,0,13.5,4,1.5,0.38,0.26,0.36,14.25,17.75,16.4,12.4,1.75,3.8,10.35,11.75,24.5,0.5,73.5,290.5,48,3.5,0,18.5
 44 | "Everton",38,16,13,9,60,49,11,54,2009,7,61,8,60,15.5,572,51.2,75,16.2,32,2,17,5,4,0.37,0.3,0.33,13.3,20.2,21.8,13.4,2.5,5.1,5.8,14.3,27,3,72,333,36,7,0,17
 45 | "Everton",38,14.5,14,9.5,55.5,47,8.5,56,2010,7,57.5,7.5,55.5,15.4,563.5,50.55,75.25,15.3,30.5,3,15.5,4,2.5,0.38,0.285,0.335,13.3,20.2,17.1,13.25,2.3,4.95,6.4,14.1,27,2.5,71,329,42,5.5,0,13.5
 46 | "Everton",38,14,13,11,50.5,42.5,8,63,2011,6,55,7,50.5,14.5,578.5,49.15,76.1,13.1,29,2.5,13.5,4,1.5,0.39,0.265,0.345,12.85,18.3,14.35,12.9,2.6,4.55,5.7,12.25,25,2,67.5,336.5,49.5,3.5,0,9
 47 | "Everton",38,15.5,13,9.5,52.5,40,12.5,72,2012,5,59.5,6.5,52.5,15.2,597.5,50.1,77.85,15.2,33,2,12,3.5,2,0.405,0.265,0.33,12.15,17.65,15.9,12.2,2.85,4.85,4.85,11.85,24.5,1.5,64.5,358.5,48.5,3,0.5,11
 48 | "Everton",38,18.5,12,7.5,58,39.5,18.5,47,2013,11,67.5,5.5,58,15.75,572,53.1,81.1,16.45,38.5,3,11.5,2.5,2.5,0.41,0.265,0.325,12.45,19.15,13.85,10.65,2.35,5.3,8.95,12.4,24,1,66,400,40.5,2,2,15.5
 49 | "Everton",38,16.5,10,11.5,54.5,44.5,10,47,2014,11,59.5,8,54.5,13.75,606.5,54,82.85,14.8,33,4,11.5,3.5,2.5,0.385,0.27,0.345,13.1,18.4,11.95,9.95,2.15,4.8,11.45,11.75,20,1.5,69,431.5,43,3,2,15
 50 | "Everton",38,11.5,12.5,14,53.5,52.5,1,61,2015,7,47,11,53.5,12.8,553.5,52.55,81.95,14.1,33.5,4,9.5,4,2.5,0.35,0.285,0.365,13.9,18.3,12.55,9.3,1.8,4.5,11,11.3,19.5,2,71.5,418.5,41,5,0.5,14.5
 51 | "Everton",38,14,12,12,60.5,49.5,11,NA,2016,NA,54,9,59,13.05,583.5,51.5,80.2,15.2,41,2.5,10,3.5,3.5,0.325,0.285,0.39,13.2,19,12.75,10.2,1.8,4.8,11.15,10.8,20.5,1.5,76,396.5,43.5,4.5,0,15.5
 52 | "Fulham",38,12,10,16,39,46,-7,49,2009,8,46,12,39,12.2,461,49.9,73.8,15.5,25,0,9,4,1,0.28,0.3,0.42,14.6,19.6,19.8,12.3,2.4,3.8,3.9,11.8,19,3,66,383,33,0,1,13
 53 | "Fulham",38,11.5,13,13.5,44,44.5,-0.5,52,2010,9,47.5,10,44,13.3,491,50.05,74.65,15.75,26,1,13,3,1,0.29,0.325,0.39,14.3,20.3,18.7,12.55,2.5,4.15,4.65,11.9,21,3,65,381.5,37,0.5,0.5,12
 54 | "Fulham",38,12.5,13,12.5,48.5,47,1.5,43,2011,12,50.5,8.5,48.5,14.3,530.5,49.75,78.45,12.75,27.5,2.5,13,2.5,3,0.285,0.365,0.35,14.7,19.3,17.25,11.35,2.4,4.8,5.9,11.55,20,2.5,63.5,399.5,40.5,2.5,0,10.5
 55 | "Fulham",38,12.5,10,15.5,49,55.5,-6.5,32,2012,19,47.5,10.5,49,13.15,511.5,49.45,81.4,12.4,28.5,2.5,8.5,4.5,5,0.29,0.345,0.36,16.1,17.75,18.25,10.4,2.05,4.65,6.3,10.95,18,1.5,64.5,407,39.5,2.5,0.5,10
 56 | "Fulham",38,10,7.5,20.5,45,72.5,-27.5,NA,2013,NA,37.5,15.5,44,11.7,532,47.65,79.7,16.2,25,1.5,11,4,3.5,0.31,0.295,0.395,17.25,18.2,16.35,10.55,2.1,4.05,7.1,10.5,19,1,67,364.5,39,3,1,12
 57 | "Hull",38,6,12,20,34,75,-41,37,2009,16,30,19,34,10.2,646,45.4,66.7,16.2,15,1,12,6,0,0.33,0.3,0.37,17.3,20,17.7,14.7,2.3,3.2,4.7,13.6,20,1,69,256,49,4,2,18
 58 | "Hull",38,8,9.5,20.5,36,64,-28,35,2013,18,33.5,17.5,36,10.75,590,45.6,71.4,17.55,18,1.5,11,4,1.5,0.325,0.275,0.4,15.65,19.5,16.9,12.9,2.3,3.35,5.25,12.05,21.5,1,68,301.5,45.5,4,2,14
 59 | "Hull",38,9,9,20,35.5,52,-16.5,34,2014,18,36,17,35.5,11.3,635,45.7,76.3,20.4,21.5,2,8.5,1.5,2,0.33,0.255,0.415,13.6,19.55,16,11.5,2.1,3.45,6.25,10.55,22,1,70,334.5,49,5,1,14
 60 | "Hull",38,8.5,9,20.5,35,65.5,-30.5,NA,2016,NA,34.5,18,36.5,10.85,705.5,46.15,76.95,18.7,19,2.5,10.5,2,1,0.34,0.265,0.395,14.95,18.35,15.4,11.25,1.9,3.35,6.8,10.3,20.5,1,72,333,54.5,3.5,0,18
 61 | "Leicester",38,11,8,19,46,55,-9,81,2014,1,41,14,46,12,504,44.8,71.1,23.4,27,3,12,3,1,0.34,0.25,0.41,14.7,20.8,19.6,12,1.6,3.6,9.2,9.2,24,1,73,282,40,2,0,12
 62 | "Leicester",38,17,10,11,57,45.5,11.5,44,2015,12,61,7.5,57,12.85,493.5,44.8,70.8,21.1,34,4,11.5,6.5,1,0.35,0.25,0.405,14.15,21.85,20.6,11.35,1.45,4.15,10.25,9.25,22,1,72.5,282,40.5,2,0,11.5
 63 | "Leicester",38,17.5,10,10.5,58,49.5,8.5,NA,2016,NA,62.5,6.5,58.5,12.55,602,44.25,70.3,19.5,34.5,4.5,10.5,7,1.5,0.37,0.255,0.38,14.1,20.35,17.85,11.3,1.5,4.25,9.65,8.95,19.5,1,73.5,285.5,48.5,1.5,0,13.5
 64 | "Liverpool",38,18,9,11,61,35,26,58,2009,6,63,7,61,16.9,555,55.8,78.5,13,37,4,14,3,3,0.31,0.26,0.43,10.2,25.2,20.1,12.2,1.6,5,8.1,12.8,27,4,72,429,47,2,1,11
 65 | "Liverpool",38,17.5,8,12.5,60,39.5,20.5,52,2010,8,60.5,6.5,60,16.1,593.5,53.65,77.55,13.45,37.5,3.5,12.5,4.5,2,0.31,0.265,0.42,11.55,24.55,17.85,12.15,1.95,5.2,7.85,12.1,24.5,4,68,410,49.5,2,0.5,11.5
 66 | "Liverpool",38,15.5,8.5,14,53,42,11,61,2011,7,55,7,53,16.45,583.5,52.55,78.65,13.4,31,3,12.5,3.5,3,0.34,0.275,0.385,12,21.5,14.45,11.35,2.85,5.4,7.55,10.75,25.5,3.5,65.5,416,47,3.5,2,10.5
 67 | "Liverpool",38,15,11.5,11.5,59,41.5,17.5,84,2012,2,56.5,7.5,59,18.55,538.5,54.4,82.3,12.8,37,2,12.5,3,4.5,0.36,0.28,0.365,11.25,20,13.55,10.6,3.15,5.75,8.7,9.75,25,3,64,458,41,3,3,11
 68 | "Liverpool",38,21,9.5,7.5,86,46.5,39.5,62,2013,6,72.5,4.5,86.5,18.3,541.5,55,84,13.95,50.5,5,18.5,7.5,4.5,0.335,0.285,0.385,12,21.6,13,10.7,2.5,6.45,11.15,10.1,19,4,58.5,472,39.5,2,1,13
 69 | "Liverpool",38,22,7,9,76.5,49,27.5,60,2014,8,73,4,77,16.3,602,54.6,83.45,15.1,43,5.5,16,7.5,4.5,0.33,0.295,0.38,11.75,21.65,11.9,10.45,2.05,5.95,11.95,11.55,17,4,56.5,463,47.5,4,0.5,11
 70 | "Liverpool",38,17,10,11,57.5,49,8.5,76,2015,4,61,7,57.5,16.05,638,54.7,81.5,15.75,39.5,1.5,10.5,3.5,2.5,0.34,0.3,0.365,10.7,21.95,13.05,10.6,2.2,5.2,11.15,11.1,19,2.5,60,458.5,54,4.5,0.5,10
 71 | "Liverpool",38,19,11,8,70.5,46,24.5,NA,2016,NA,68,6,70,16.7,576.5,56.65,81.3,16.9,49.5,1.5,14,4.5,1,0.345,0.295,0.365,9.35,20.9,12.65,10.75,2.2,5.75,10.85,9.9,21,2,65.5,492,42.5,4,0,14
 72 | "Manchester City",38,18,13,7,73,45,28,71,2009,3,67,5,73,13.8,492,52.2,76,12.7,37,10,21,5,0,0.39,0.25,0.36,13.5,21.1,21.2,11.9,3.2,5.1,7.7,12.8,21,5,71,370,39,3,1,10
 73 | "Manchester City",38,19.5,10.5,8,66.5,39,27.5,89,2010,1,69,4,66.5,14.1,603.5,52.4,78.75,12.45,36,6,17,6.5,1,0.38,0.26,0.365,13.5,21.4,18.9,12.35,2.95,4.9,8.4,11.95,20.5,4.5,63.5,397.5,45,4.5,0.5,15.5
 74 | "Manchester City",38,24.5,6.5,7,76.5,31,45.5,78,2011,2,80,2,76.5,16.9,615,54.6,83.45,10.6,47,4,16,7.5,2,0.365,0.28,0.36,11.95,20,16.6,12.05,2.45,5.65,8.85,10.45,21,6,53.5,464.5,47,4.5,1.5,16
 75 | "Manchester City",38,25.5,7,5.5,79.5,31.5,48,86,2012,1,83.5,1.5,79.5,18.35,569,56.3,85,12.4,49.5,6,16.5,5.5,2,0.365,0.28,0.355,10.4,18.1,14.75,11.85,2.1,6.35,8.15,9.5,22.5,6,51,496,41,5,2,14.5
 76 | "Manchester City",38,25,7,6,84,35.5,48.5,79,2013,2,82,1.5,84.5,17.5,672,55.85,85.05,15.5,53,5.5,17,5,3.5,0.355,0.275,0.37,9.9,18.9,13,11.4,1.8,6.2,8.3,9.1,23,3.5,53,495,45,5,1,18
 77 | "Manchester City",38,25.5,6,6.5,92.5,37.5,55,66,2014,4,82.5,1.5,93,17.65,746.5,56.35,85.05,15.55,59.5,7.5,16,6.5,3,0.34,0.285,0.375,9.8,19.85,14.05,11.05,1.85,6.15,10.15,8.4,22,3,53.5,517.5,57,1.5,1.5,17
 78 | "Manchester City",38,21.5,8,8.5,77,39.5,37.5,78,2015,3,72.5,3,77,16.9,691,56.1,83.85,15.8,50,7.5,13,6,0.5,0.345,0.275,0.38,9.55,19.5,15.8,11.25,1.9,5.75,11.7,8.95,22,2.5,54,510,55.5,0,1,14.5
 79 | "Manchester City",38,21,9,8,75.5,40,35.5,NA,2016,NA,72,3.5,73,16.45,662,58.05,84.3,15.55,49,5.5,12.5,6,2.5,0.35,0.275,0.375,8.4,18.05,14.85,10.6,2,5.6,12.6,10.65,22,2.5,54.5,517.5,47.5,2.5,0,18
 80 | "Manchester United",38,27,4,7,86,28,58,80,2009,1,85,2,86,18.3,494,55.6,79.9,13.2,51,6,13,6,10,0.34,0.3,0.36,10.9,21.8,15.8,11.7,2.2,5.8,8.7,11.7,31,6,66,453,41,2,1,12
 81 | "Manchester United",38,25,7.5,5.5,82,32.5,49.5,89,2010,2,82.5,1.5,82,17.3,528.5,55.1,80.35,12.9,52.5,5.5,13,4.5,6.5,0.345,0.295,0.36,11.5,20,16.4,11.4,2.45,5.7,8.45,10.95,29,6,67,446.5,43.5,2,1.5,11.5
 82 | "Manchester United",38,25.5,8,4.5,83.5,35,48.5,89,2011,1,84.5,1.5,83.5,16.65,537,55.25,82.8,10.45,56,3.5,15.5,6,2.5,0.335,0.295,0.375,12.7,19.65,16.05,10.65,2.5,6,8.45,10.2,27,4.5,67,459,44.5,1.5,1,9.5
 83 | "Manchester United",38,28,5,5,87.5,38,49.5,64,2012,7,89,1.5,87.5,15.85,541,55.4,85,11.25,54.5,2.5,20,6.5,4,0.32,0.29,0.395,13.05,20.15,14.3,10.05,2.4,6,7.55,10.65,26,3,62.5,474.5,43.5,2,0.5,9
 84 | "Manchester United",38,23.5,6,8.5,75,43,32,70,2013,4,76.5,4,75,14.25,617,54.6,84.6,14.8,48.5,2,16.5,4.5,3.5,0.33,0.265,0.41,12.35,18.5,14.55,10.25,2.55,5.2,7.65,11.45,26,2.5,60.5,459,47.5,3,2.5,11.5
 85 | "Manchester United",38,19.5,8.5,10,63,40,23,66,2014,5,67,5.5,63,13.65,654,56.55,84.55,15.6,44.5,1.5,11.5,4,1.5,0.345,0.26,0.4,11,18.6,16.75,11.25,2.45,4.75,9.35,11.1,25.5,2,71,466,52.5,2,3,13.5
 86 | "Manchester United",38,19.5,9.5,9,55.5,36,19.5,69,2015,6,68,4.5,55.5,12.4,648,57.35,83.7,14.45,38.5,2,9.5,3,2.5,0.38,0.255,0.365,10.45,19.7,16.75,12.15,2.15,4.25,9.8,9.9,22.5,1.5,75.5,473,55.5,1.5,1,11
 87 | "Manchester United",38,18.5,12,7.5,51.5,32,19.5,NA,2016,NA,67.5,5.5,52,13.45,716.5,55.45,83.25,14.75,37.5,2.5,7,3,1.5,0.4,0.26,0.34,10.15,19.1,15.45,12.75,2.3,4.65,10.2,10.5,22,2,68,466,55,3.5,0,15.5
 88 | "Middlesbrough",38,5,13,20,27,53,-26,NA,2016,NA,28,19,27,9.2,771,47.9,77.3,20.3,14,3,7,2,1,0.32,0.3,0.38,14.5,19.1,14.4,12.7,1.3,2.6,10.5,10.3,19,1,76,342,56,3,0,20
 89 | "Newcastle United",38,11,13,14,56,57,-1,65,2010,5,46,12,56,13.3,782,49.8,75.4,15.8,29,1,20,5,1,0.38,0.32,0.3,13.3,17.5,16,12.4,2.2,4.4,7.6,13.5,27,3,66,320,59,6,0,15
 90 | "Newcastle United",38,15,10.5,12.5,56,54,2,41,2011,16,55.5,8.5,56,13.1,727,48.85,75.7,13.35,33,2.5,14,3.5,3,0.36,0.315,0.325,13.8,18.55,16.15,11.6,2.4,4.25,6.95,12.45,23,3,66.5,325.5,53.5,7.5,0,14
 91 | "Newcastle United",38,15,8,15,50.5,59.5,-9,49,2012,10,53,10.5,50.5,13.45,693,48.6,77.55,14.5,34,3,7,3,3.5,0.345,0.29,0.365,13.85,19.4,14.4,11.55,2.4,4.35,7.6,11,22,2.5,68,329,51,6.5,0.5,16
 92 | "Newcastle United",38,13,6,19,44,63.5,-19.5,39,2013,15,45,13,44,14.6,620,49.55,79.25,19.35,29.5,2.5,7,3,2,0.34,0.285,0.375,13.1,19.25,14.1,11.6,2.15,4.7,9.5,9.9,23,2.5,62.5,353,45,6,0.5,17.5
 93 | "Newcastle United",38,12.5,6.5,19,41.5,61,-19.5,37,2014,18,44,12.5,41.5,13.75,591.5,48.95,78.15,19.15,29.5,2,7.5,1.5,1,0.345,0.265,0.385,12.25,20.5,16.3,11.15,1.55,4.3,10.9,10.4,22,2.5,59.5,357,46,6,0,17
 94 | "Newcastle United",38,9.5,9.5,19,42,64,-22,NA,2015,NA,38,16.5,42,11.35,631,47.75,76.75,15.8,29.5,2.5,7.5,2,0.5,0.325,0.25,0.42,13.1,21.25,16.9,11.2,1.3,3.9,10.7,10.55,22,1.5,66,334,52.5,4,0,15.5
 95 | "Norwich",38,12,11,15,52,66,-14,44,2011,11,47,12,52,13.5,583,47.4,75.4,10.5,30,4,16,2,0,0.37,0.29,0.34,16.2,18.2,12.3,10.7,1.9,4.3,3,8.7,23,1,75,339,44,2,1,15
 96 | "Norwich",38,11,12.5,14.5,46.5,62,-15.5,33,2012,18,45.5,11.5,46.5,12.2,592,45.9,74.2,13.75,26,2,16.5,1.5,0.5,0.34,0.285,0.38,15.15,18,13.25,11.1,2.25,3.9,3.9,9.6,22.5,1,69.5,320.5,42,1,1,17.5
 97 | "Norwich",38,9,11.5,17.5,34.5,60,-25.5,34,2013,19,38.5,14.5,34.5,11.6,611.5,45.35,74.45,16.4,22,0.5,10.5,1,0.5,0.31,0.28,0.415,14.7,17.75,14,11.35,1.95,3.7,5.75,10.4,22,1.5,64.5,319,43,1,0.5,18.5
 98 | "Norwich",38,8.5,8,21.5,33.5,64.5,-31,NA,2015,NA,33.5,18.5,33.5,11.65,617.5,46.4,74.75,16.6,24.5,0.5,7,1,0.5,0.325,0.265,0.41,14.1,16.85,13.1,11.1,1.85,3.65,7.55,9.2,22,1,69.5,324.5,47.5,1.5,0,16
 99 | "Portsmouth",38,7,7,24,34,66,-32,NA,2009,NA,19,20,34,14,685,48.4,73.6,13.7,17,2,12,3,0,0.32,0.3,0.38,15.4,20.4,17.9,13.5,2.4,4,6,11.8,22,3,65,306,51,6,0,20
100 | "Queens Park Rangers",38,10,7,21,43,66,-23,25,2011,20,37,17,43,14.2,549,46.6,73.8,11,28,0,11,2,2,0.3,0.32,0.38,16.5,20.1,14.4,10.8,2.2,3.9,5.9,11.7,21,1,67,298,45,8,0,13
101 | "Queens Park Rangers",38,7,10,21,36.5,63,-26.5,30,2012,20,31,18.5,36.5,13.7,571,46.75,75,13.15,24,2,8,1.5,1,0.295,0.32,0.385,16.15,20.8,15.9,10.75,2.5,3.9,7.55,11.55,19.5,1,63,306.5,41,6,0.5,17
102 | "Queens Park Rangers",38,6,9.5,22.5,36,66.5,-30.5,NA,2014,NA,27.5,20,36,13.65,673,46.6,73.5,18.5,21.5,2.5,8.5,2,1.5,0.31,0.305,0.385,16.1,21.05,16.1,11.25,2.45,3.9,8.65,10.85,19,0.5,70,299,52,3.5,0.5,15.5
103 | "Reading",38,6,10,22,43,73,-30,NA,2012,NA,28,19,43,10.3,451,42.8,69.2,18.4,22,2,13,4,2,0.37,0.24,0.39,18.6,15.8,14.7,10.4,2.1,3.4,4.4,11.3,27,1,61,265,34,1,1,10
104 | "Southampton",38,9,14,15,49,60,-11,56,2012,8,41,14,49,13.6,432,52.2,77.3,16.4,32,2,8,3,4,0.35,0.29,0.36,11.8,21.6,20.5,10.8,2.3,4.4,6.3,9.4,24,6,55,396,29,4,2,10
105 | "Southampton",38,12,12.5,13.5,51.5,53,-1.5,60,2013,7,48.5,11,51.5,13.85,516,54.35,79.2,16.9,33.5,2,10,3,3,0.345,0.29,0.37,10.7,21.4,17.2,11.25,2.45,4.55,7.55,9.65,23.5,4,62.5,426.5,38,3,1,10.5
106 | "Southampton",38,16.5,8.5,13,54,39.5,14.5,63,2014,6,58,7.5,54,13.75,586.5,54.2,80.4,17.15,38,1,9,2.5,3.5,0.34,0.3,0.365,9.85,21.3,15.2,12,2.5,4.55,7.9,10.15,24,1.5,71.5,433.5,46.5,2,0,12
107 | "Southampton",38,18,7.5,12.5,56.5,37,19.5,46,2015,8,61.5,6.5,56.5,13.55,574.5,50.6,78.7,18.1,39.5,1.5,9.5,2.5,3.5,0.345,0.305,0.345,11.15,20.1,17.55,11.65,2.5,4.4,7.05,10,25,1,70,382.5,48,1.5,0,14
108 | "Southampton",38,15,9.5,13.5,50,44.5,5.5,NA,2016,NA,54.5,7,49.5,14.1,584,51.2,79.1,18.2,33.5,1.5,10,3,2,0.37,0.28,0.345,11.85,18.3,16.05,11.15,2.25,4.55,7.75,10.25,25,1.5,64.5,388,47,1.5,0,15.5
109 | "Stoke",38,11,14,13,34,48,-14,46,2009,13,47,11,34,10.6,635,43.4,62.2,19.6,16,1,14,2,1,0.36,0.29,0.35,15.3,19.1,19.4,13,2.3,3,3.9,10.3,17,2,69,215,50,5,2,14
110 | "Stoke",38,12,10.5,15.5,40,48,-8,45,2010,14,46.5,12,40,11.65,658.5,42.7,62.8,19.7,17.5,2.5,16,2.5,1.5,0.35,0.305,0.345,14.85,18.95,16.7,12.3,2.1,3.4,4.2,10.45,18.5,2,67.5,217.5,55,4,1,11
111 | "Stoke",38,12,9.5,16.5,41,50.5,-9.5,42,2011,13,45.5,13.5,41,11.3,642,42.65,66.25,17.55,16.5,2.5,17,3.5,1.5,0.33,0.29,0.38,14.3,18,13.05,11.75,1.75,3.15,4.1,10.5,21,1.5,67,224.5,55.5,2.5,0,8.5
112 | "Stoke",38,10,13.5,14.5,35,49,-14,50,2012,9,43.5,13.5,35,10.1,688,43.85,69.5,22.1,14,1,15.5,3.5,1,0.315,0.27,0.415,14.25,18.15,12.65,12.35,1.75,2.75,3.95,9.9,22.5,1,65.5,251.5,58,1.5,0.5,13.5
113 | "Stoke",38,11,13,14,39.5,48.5,-9,54,2013,9,46,11,39.5,10.8,749.5,46.25,73.6,27.1,21.5,2,11,3.5,1.5,0.33,0.27,0.405,14.35,19.95,12.9,12.8,2,3.3,5.9,9.45,21,1,62.5,315,58,6,1,17.5
114 | "Stoke",38,14,10,14,46.5,48.5,-2,51,2014,9,52,9,46.5,12.25,773,49.15,77.4,24.85,33,2,6.5,3.5,1.5,0.355,0.26,0.39,13.2,20.05,12.9,12.8,2.05,3.6,8.5,10.15,21.5,0.5,63.5,350.5,57.5,5.5,1,18
115 | "Stoke",38,14.5,9,14.5,44.5,50,-5.5,44,2015,13,52.5,9,44.5,12.1,667.5,50.1,78.3,20.4,33,0.5,6,4,1,0.37,0.27,0.365,13.05,19,14.2,11.9,2.35,3.5,10,10.45,21,1,64,358,50,1.5,0.5,18
116 | "Stoke",38,12.5,10,15.5,41,55.5,-14.5,NA,2016,NA,47.5,11,41,11.1,608,48.6,77.2,18.85,26.5,0,7.5,4.5,2.5,0.365,0.285,0.355,14.15,18.1,14.15,11.2,2.45,3.5,10.1,10.75,18.5,1.5,65,345.5,44,4.5,0,16
117 | "Sunderland",38,11,11,16,48,56,-8,47,2009,10,44,13,48,11.7,779,47.3,65.9,18.9,25,6,9,6,2,0.36,0.29,0.35,16.3,23.4,21.8,14.6,2.4,4.2,5.7,11.8,23,4,72,269,66,6,1,18
118 | "Sunderland",38,11.5,11,15.5,46.5,56,-9.5,45,2010,13,45.5,11.5,46.5,12.85,677,48.05,68.45,18.35,26.5,4,8.5,5,2.5,0.335,0.285,0.38,15.35,22.5,20.05,13.1,2.5,4.15,6.15,11.35,23,3.5,70.5,292,59.5,4,0.5,14
119 | "Sunderland",38,11.5,11.5,15,45,51,-6,39,2011,17,46,11.5,45,13.05,584.5,47.2,72.3,14.3,28.5,2.5,9,2.5,2.5,0.325,0.28,0.395,15.3,21.65,17.75,11.2,2.3,3.9,6,10.5,23,2,68.5,313.5,53,3,0.5,9
120 | "Sunderland",38,10,12,16,43,50,-7,38,2012,14,42,15,43,11.55,603.5,45.15,74.4,13.9,25,3,10.5,2.5,2,0.36,0.265,0.375,16.45,21.15,16.25,11.4,1.9,3.7,5.8,11.1,23,1,64.5,304,52,2.5,1,10.5
121 | "Sunderland",38,9.5,10,18.5,41,57,-16,38,2013,16,38.5,15.5,41,11.95,655,45.7,76.35,16,19.5,3.5,12,5,1,0.38,0.255,0.36,16.7,19.55,13.1,11.9,1.8,3.65,6.3,12,21.5,1,63.5,313,48.5,3.5,1,18.5
122 | "Sunderland",38,8.5,12.5,17,36,56.5,-20.5,39,2014,17,38,15,36,11.8,819.5,46.45,76.8,15.15,16.5,3,11,5,0.5,0.39,0.26,0.345,16.6,19.25,13.05,11.7,1.8,3.5,7.7,11.85,19,1,66,324,62.5,3.5,2,19
123 | "Sunderland",38,8,14.5,15.5,39.5,57.5,-18,24,2015,20,38.5,16.5,39.5,11.15,792,44.75,73.55,16.3,19.5,2,12,3.5,2.5,0.37,0.275,0.36,15.75,20.1,15.05,11.1,2.05,3.55,8.5,10.95,17,1.5,68.5,294,65,1.5,1.5,14.5
124 | "Sunderland",38,7.5,9,21.5,38.5,65.5,-27,NA,2016,NA,31.5,18.5,39,10.9,713,43.05,71.8,16.75,21,1,10,4,2.5,0.355,0.275,0.375,16.4,18.4,14.65,11.05,2.2,3.4,8.2,10.15,16,1.5,70,265,58.5,1,0,17
125 | "Swansea",38,12,11,15,44,51,-7,46,2011,9,47,11,44,12.4,402,56.2,85.1,5.9,30,1,7,5,1,0.31,0.26,0.42,15.7,18.6,15.3,8.4,1.8,3.8,8,11.4,19,3,65,497,30,4,0,9
126 | "Swansea",38,11.5,12,14.5,45.5,51,-5.5,42,2012,12,46.5,10,45.5,12.85,492,55.3,84.75,9.4,35,1.5,6,2.5,0.5,0.295,0.29,0.41,15.3,18.3,16.85,9.6,2.05,4.1,6.6,11.2,20,2.5,63.5,487,36.5,2.5,1,12.5
127 | "Swansea",38,11,11,16,50.5,52.5,-2,56,2013,8,44,10.5,50.5,13.15,567.5,55,84.6,13.9,36.5,1,7.5,2,3.5,0.28,0.325,0.395,13.8,17.85,17.65,10.35,2.15,4.35,6.4,10.45,22.5,1.5,63,491.5,39.5,4.5,1,15.5
128 | "Swansea",38,13.5,8.5,16,50,51.5,-1.5,47,2014,12,49,10,50,12.1,519,53.15,83.45,14.65,34.5,0.5,7,3,5,0.29,0.315,0.395,13.65,17.2,17.55,10.2,2.05,4.1,8.4,11.05,20.5,1,63,460.5,41.5,4.5,0,11.5
129 | "Swansea",38,14,9.5,14.5,44,50.5,-6.5,41,2015,15,51.5,10,44,11.4,543,51.35,81.5,14.05,30.5,2,6,3.5,2,0.31,0.285,0.405,14.15,16.75,18.05,10.4,2.15,3.75,8.55,12.05,18,1,63.5,417,46,2,0,10.5
130 | "Swansea",38,12,8,18,43.5,61,-17.5,NA,2016,NA,44,13.5,46,11.15,580.5,50.15,79.9,15.75,24.5,2,12.5,4,0.5,0.325,0.265,0.415,13.7,16.75,14.95,10.45,1.75,3.7,8.1,11.45,21,0.5,67,390,46.5,2,0,10
131 | "Tottenham",38,21,7,10,67,41,26,62,2009,5,70,4,67,17.9,581,52.2,76.1,18.2,42,6,17,2,0,0.36,0.3,0.34,13.7,22.2,21.2,12.2,3.1,5.9,9,13.2,24,3,79,345,42,6,0,11
132 | "Tottenham",38,18.5,10.5,9,61,43.5,17.5,69,2010,4,66,4.5,61,17.6,546.5,52.8,77.85,17.2,37.5,4.5,14.5,3.5,1,0.375,0.3,0.325,14.3,21,19.3,11.35,2.35,5.45,9.45,13.1,24.5,3,77,364,38,5,1,12
133 | "Tottenham",38,18,11.5,8.5,60.5,43.5,17,72,2011,5,65.5,4.5,60.5,17.85,472.5,54.45,81.9,13.6,40,4,10.5,4.5,1.5,0.385,0.29,0.325,13.55,19.55,18.1,10.2,2,5.55,9.25,11.6,23.5,3.5,73,420.5,37.5,2,2,9
134 | "Tottenham",38,20.5,9,8.5,66,43.5,22.5,69,2012,6,70.5,4.5,66,18.15,492.5,54.15,83.35,12.8,46.5,5.5,9.5,2,2.5,0.375,0.28,0.34,10.95,19.45,18.9,10.1,2.2,6.2,8.3,10.25,22.5,3.5,66.5,424,38,0.5,3.5,10.5
135 | "Tottenham",38,21,7.5,9.5,60.5,48.5,12,64,2013,5,70.5,5.5,60.5,16.65,608,53.1,81.8,14.8,38,5,10.5,2.5,4.5,0.365,0.275,0.355,10.45,19.7,16.85,10.5,1.85,5.7,9.5,10.25,23,2,63,405.5,42.5,2,3.5,15.5
136 | "Tottenham",38,20,6.5,11.5,56.5,52,4.5,70,2014,3,66.5,5.5,56.5,14.65,729,54.35,81.15,15.5,33,3,13,4,3.5,0.355,0.285,0.36,12.05,20.6,16.25,11.15,1.85,4.8,10.95,10.15,21.5,1.5,63.5,435.5,54.5,2.5,1.5,18.5
137 | "Tottenham",38,19,10,9,63.5,44,19.5,86,2015,2,67,4,63.5,15.6,757,55.3,80.85,15.15,40,2,16,4,1.5,0.365,0.295,0.34,12,21.25,17.4,11.75,1.95,5.55,10.2,10.15,20,2,65.5,438.5,55.5,4.5,0.5,17.5
138 | "Tottenham",38,22.5,10.5,5,77.5,30.5,47,NA,2016,NA,78,2.5,77.5,17.45,670,56.35,81.5,14.65,52.5,2.5,15,6,1.5,0.38,0.29,0.33,10.15,19.25,12.65,11.65,1.9,6.7,10.7,11.1,20,1.5,70,445.5,51,5,0,11
139 | "Watford",38,12,9,17,40,50,-10,40,2015,17,45,13,40,11.7,733,46.3,72.6,17.4,23,0,8,6,3,0.38,0.3,0.32,13.2,20.9,20.2,12.1,1.4,3.7,8.7,10.2,19,1,80,308,55,6,0,16
140 | "Watford",38,11.5,8,18.5,40,59,-19,NA,2016,NA,42.5,15,41,11.4,789,46.65,73.35,18.3,24,0,9.5,4.5,2,0.39,0.295,0.315,13.7,18.55,16.9,12.85,1.5,3.75,8.8,10.45,18.5,1,75.5,306,58.5,6,0,20
141 | "West Bromwich Albion",38,12,11,15,56,71,-15,47,2010,10,47,11,56,15.7,527,48.7,77.2,11.2,35,3,11,4,3,0.38,0.27,0.35,14,19.1,17.4,11.3,2.7,4.8,8,10.5,21,3,70,318,42,6,1,13
142 | "West Bromwich Albion",38,12.5,9.5,16,50.5,61.5,-11,49,2011,8,47,10.5,50.5,15,499,47.7,76.7,11,30,3,13,2.5,2,0.375,0.28,0.345,14.75,18.55,16,11.2,3.05,4.6,7.15,9.95,21,2.5,64,331.5,40,4.5,1.5,10
143 | "West Bromwich Albion",38,13.5,7.5,17,49,54.5,-5.5,36,2012,17,48,9,49,13.8,552.5,46.5,76.95,13.25,28.5,3.5,12,3,2,0.365,0.315,0.325,15.55,18.2,12.25,11.3,3,4.45,5.9,9.85,21,1.5,59.5,339.5,39.5,5.5,1.5,12
144 | "West Bromwich Albion",38,10.5,11,16.5,48,58,-10,44,2013,13,42.5,12.5,48,13.05,652,46.75,77.4,16.95,32,2.5,7.5,3.5,2.5,0.355,0.315,0.335,14.3,18.4,12.8,11.4,2.5,4.2,6.65,10.85,21.5,1,60.5,337.5,41,5.5,0.5,20
145 | "West Bromwich Albion",38,9,13,16,40.5,55,-14.5,43,2014,14,40,15,40.5,11.8,656.5,46.05,76.05,17.7,22.5,1,12.5,3,1.5,0.355,0.28,0.365,13.7,18.35,15.6,11.2,2.05,3.7,7.05,11.05,20.5,1,66.5,326,43.5,3,1,19.5
146 | "West Bromwich Albion",38,10.5,12,15.5,36,49.5,-13.5,45,2015,10,43.5,13.5,36,10.5,648,43.55,72.5,18.15,16,0.5,15.5,2.5,1.5,0.335,0.27,0.39,14.4,17.6,17.25,10.7,1.75,3.15,6.95,10.5,20.5,1,72,283,45,2.5,1,19
147 | "West Bromwich Albion",38,11,11,16,38.5,49.5,-11,NA,2016,NA,44,12,39,10.35,726.5,41.45,70.2,18.4,19,1.5,16,1,1,0.33,0.27,0.395,14,16.4,16.55,11.05,1.65,3,7.85,9.6,21,1,70.5,249,52.5,3.5,0,18
148 | "West Ham",38,8,11,19,47,66,-19,33,2009,20,35,17,47,14.4,623,49.4,74.9,15.3,22,0,18,7,0,0.33,0.32,0.34,15.9,21.8,19.9,14.2,2.4,5,7.2,13.5,18,4,70,329,53,3,2,10
149 | "West Ham",38,7.5,11.5,19,45,68,-23,46,2010,10,34,18.5,45,14.75,607,47.9,74.9,15.8,22,0.5,15.5,6,1,0.335,0.315,0.345,17.1,21.2,17.6,13.15,2.8,4.8,6.7,11.7,19.5,3.5,69,314,49,2.5,1,12
150 | "West Ham",38,9.5,11,17.5,44,61.5,-17.5,40,2012,13,39.5,15,44,14.05,666,45.75,74.25,18.7,23.5,1.5,12.5,4.5,2,0.36,0.29,0.35,17.35,19.9,13.7,12.25,2.7,4.4,5.6,9.75,24,2,65,291.5,47,2.5,0.5,18
151 | "West Ham",38,11.5,8.5,18,42.5,52,-9.5,47,2013,12,43,11.5,42.5,12.05,683,44.65,73.35,21.15,24,2,11,3,2.5,0.375,0.265,0.36,17.15,18.15,11.35,11.55,2.1,3.7,6.2,9.65,26,1,64,279,50.5,3.5,0.5,17.5
152 | "West Ham",38,11.5,9,17.5,42,49,-7,62,2014,7,43.5,12.5,42,12,633.5,45.45,73.85,22.05,26,1.5,11,1.5,2,0.365,0.265,0.37,16.7,17.05,11.95,10.9,2.2,3.6,7.1,10.35,25.5,1,67.5,283.5,52,3.5,0.5,12
153 | "West Ham",38,14,12.5,11.5,54.5,49,5.5,45,2015,11,54.5,9.5,54.5,13.8,613.5,47.9,76.1,20.3,35.5,1.5,14,2.5,1,0.365,0.275,0.36,14.6,17.85,15.4,10.6,2.3,4.45,8.6,11.4,23.5,1,69.5,312.5,48,4,0.5,12.5
154 | "West Ham",38,14,11.5,12.5,56,57.5,-1.5,NA,2016,NA,53.5,9,55.5,13.9,685,48.75,77.9,18.4,33,2.5,16,3.5,1,0.37,0.285,0.345,13.5,16.9,16.85,10.4,2.05,4.25,10.55,11.9,20.5,1,70,332,52.5,4,0,18.5
155 | "Wigan",38,9,9,20,37,79,-42,42,2009,16,36,16,37,14.6,653,51.4,74.3,14.9,24,1,10,1,1,0.34,0.29,0.37,14.3,23.9,19.6,13.4,1.3,4.6,8.9,12.7,18,3,77,322,50,1,1,17
156 | "Wigan",38,9,12,17,38.5,70,-31.5,43,2010,15,39,16,38.5,14,663.5,51,74.55,13.7,22,3,9.5,1.5,2.5,0.355,0.285,0.36,13.8,24,19.85,13.25,1.6,4.55,9.4,12.05,17,3,72.5,333,50,2.5,0.5,17.5
157 | "Wigan",38,10,12.5,15.5,41,61.5,-20.5,36,2011,18,42.5,15.5,41,13.55,673.5,50.25,77.35,10.2,21.5,3.5,10,3.5,2.5,0.395,0.275,0.33,14,21.65,17.65,12.95,1.9,4.4,8.75,11.25,18,2,67,353,51,3.5,0.5,16.5
158 | "Wigan",38,10,9.5,18.5,44.5,67.5,-23,NA,2012,NA,39.5,16.5,44.5,13.45,667.5,51.4,81,9.1,26,2,12,3,1.5,0.42,0.275,0.305,14.35,18.95,16.85,12.35,2.05,4.4,7.3,11.35,20.5,1,64,393,51,1.5,1,16
159 | "Wolverhampton Wanderers",38,9,11,18,32,56,-24,40,2009,17,38,15,32,11.5,634,47.6,67.3,15,14,0,12,2,4,0.37,0.27,0.36,15.5,18.6,14.1,13.8,1.7,3.1,3.6,11,25,2,72,283,48,3,2,17
160 | "Wolverhampton Wanderers",38,10,9,19,39,61,-22,25,2010,20,39,16,39,11.8,628,48.6,69.45,14.85,19.5,0.5,14,2.5,2.5,0.385,0.26,0.355,15.25,18.2,13.9,13.2,2.05,3.4,3.9,11.45,27,2,72,307,50.5,3,1,13
161 | "Wolverhampton Wanderers",38,8,8.5,21.5,43,74,-31,NA,2011,NA,32.5,18.5,43,12.25,633,48.9,73.6,12.65,26,1,12.5,3,0.5,0.41,0.255,0.335,16.3,17,14.7,11.25,2.3,3.85,4.25,11.1,27.5,1.5,70.5,333.5,51.5,3.5,0.5,11.5
162 | 


--------------------------------------------------------------------------------
/Friends Analysis Laughter Prediction/S1/Friends - 1x07 - The One With The Blackout.en.srt:
--------------------------------------------------------------------------------
   1 | 1
   2 | 00:00:02,245 --> 00:00:06,158
   3 | Everybody! Central Perk
   4 | is proud to present. . .
   5 | 
   6 | 2
   7 | 00:00:06,605 --> 00:00:10,154
   8 | . . .the music of Miss Phoebe Buffay.
   9 | 
  10 | 3
  11 | 00:00:12,765 --> 00:00:13,561
  12 | Thanks.
  13 | 
  14 | 4
  15 | 00:00:14,805 --> 00:00:17,444
  16 | I wanna start with a song
  17 | that's about that moment. . .
  18 | 
  19 | 5
  20 | 00:00:17,685 --> 00:00:21,121
  21 | . . .when you suddenly realize
  22 | what life is really all about.
  23 | 
  24 | 6
  25 | 00:00:21,885 --> 00:00:23,000
  26 | Here we go.
  27 | 
  28 | 7
  29 | 00:00:28,005 --> 00:00:29,404
  30 | Thank you very much.
  31 | 
  32 | 8
  33 | 00:00:41,605 --> 00:00:43,960
  34 | Oh, great! This is just--
  35 | 
  36 | 9
  37 | 00:00:49,685 --> 00:00:51,676
  38 | The One With the Blackout
  39 | 
  40 | 10
  41 | 00:00:52,965 --> 00:00:53,954
  42 | English Subtitles by
  43 | GELULA & CO., INC.
  44 | 
  45 | 11
  46 | 00:01:38,485 --> 00:01:40,203
  47 | This is so cool, you guys.
  48 | 
  49 | 12
  50 | 00:01:40,445 --> 00:01:42,401
  51 | The entire city's blacked out.
  52 | 
  53 | 13
  54 | 00:01:42,645 --> 00:01:45,034
  55 | Mom says it's Manhattan,
  56 | Brooklyn and Queens. . .
  57 | 
  58 | 14
  59 | 00:01:45,245 --> 00:01:47,475
  60 | . . .and doesn't know when
  61 | it'll be back on.
  62 | 
  63 | 15
  64 | 00:01:47,685 --> 00:01:49,164
  65 | You guys, this is big.
  66 | 
  67 | 16
  68 | 00:01:49,405 --> 00:01:51,600
  69 | Pants and a sweater.
  70 | Why, Mom?
  71 | 
  72 | 17
  73 | 00:01:51,845 --> 00:01:53,517
  74 | Who will I meet in a blackout?
  75 | 
  76 | 18
  77 | 00:01:54,965 --> 00:01:58,196
  78 | Power-company guys?
  79 | Eligible looters?
  80 | 
  81 | 19
  82 | 00:01:58,605 --> 00:02:00,402
  83 | Can we talk about this later?
  84 | 
  85 | 20
  86 | 00:02:01,365 --> 00:02:02,434
  87 | Can I borrow the phone?
  88 | 
  89 | 21
  90 | 00:02:02,685 --> 00:02:05,802
  91 | I wanna call my apartment
  92 | and check on my grandma.
  93 | 
  94 | 22
  95 | 00:02:06,205 --> 00:02:07,479
  96 | What's my number?
  97 | 
  98 | 23
  99 | 00:02:09,285 --> 00:02:10,957
 100 | I never call me.
 101 | 
 102 | 24
 103 | 00:02:12,605 --> 00:02:14,038
 104 | Oh, my God! It's her.
 105 | 
 106 | 25
 107 | 00:02:14,285 --> 00:02:16,196
 108 | It's that Victoria Secret model.
 109 | 
 110 | 26
 111 | 00:02:16,405 --> 00:02:17,724
 112 | Something Goodacre.
 113 | 
 114 | 27
 115 | 00:02:17,965 --> 00:02:19,239
 116 | Hi, Mom, it's Jill.
 117 | 
 118 | 28
 119 | 00:02:19,485 --> 00:02:21,794
 120 | She's right. It's Jill.
 121 | Jill Goodacre.
 122 | 
 123 | 29
 124 | 00:02:22,485 --> 00:02:23,838
 125 | Oh, my God!
 126 | 
 127 | 30
 128 | 00:02:24,085 --> 00:02:27,760
 129 | I am trapped in an A TM vestibule
 130 | with Jill Goodacre.
 131 | 
 132 | 31
 133 | 00:02:28,885 --> 00:02:31,797
 134 | Is it a vestibule?
 135 | Maybe it's an atrium?
 136 | 
 137 | 32
 138 | 00:02:32,245 --> 00:02:35,794
 139 | Yeah, that is the part
 140 | to focus on, you idiot.
 141 | 
 142 | 33
 143 | 00:02:36,845 --> 00:02:40,554
 144 | I'm fine. I'm just stuck
 145 | at the bank in an ATM vestibule.
 146 | 
 147 | 34
 148 | 00:02:40,805 --> 00:02:42,636
 149 | Jill says vestibule...
 150 | 
 151 | 35
 152 | 00:02:42,885 --> 00:02:45,001
 153 | ...I'm going with vestibule.
 154 | 
 155 | 36
 156 | 00:02:45,845 --> 00:02:46,880
 157 | I'm fine.
 158 | 
 159 | 37
 160 | 00:02:47,245 --> 00:02:48,997
 161 | No, I'm not alone.
 162 | 
 163 | 38
 164 | 00:02:49,485 --> 00:02:50,964
 165 | I don't know, some guy.
 166 | 
 167 | 39
 168 | 00:02:51,205 --> 00:02:55,244
 169 | Oh, some guy. I am some guy.
 170 | 
 171 | 40
 172 | 00:02:55,485 --> 00:02:58,045
 173 | Hey Jill, I saw you
 174 | with some guy last night.
 175 | 
 176 | 41
 177 | 00:02:58,285 --> 00:03:00,924
 178 | Yes, he was some guy.
 179 | 
 180 | 42
 181 | 00:03:09,685 --> 00:03:11,403
 182 | Hey, everyone.
 183 | 
 184 | 43
 185 | 00:03:17,565 --> 00:03:20,284
 186 | Officiating at tonight's blackout
 187 | is Rabbi Tribbiani.
 188 | 
 189 | 44
 190 | 00:03:24,365 --> 00:03:28,643
 191 | Chandler's old roommate was Jewish, and
 192 | these are the only candles we have. . .
 193 | 
 194 | 45
 195 | 00:03:28,885 --> 00:03:30,034
 196 | . . .so happy Hanukkah!
 197 | 
 198 | 46
 199 | 00:03:32,005 --> 00:03:35,202
 200 | Look! Ugly Naked Guy
 201 | lit a bunch of candles.
 202 | 
 203 | 47
 204 | 00:03:42,405 --> 00:03:44,202
 205 | That had to hurt!
 206 | 
 207 | 48
 208 | 00:03:45,765 --> 00:03:48,438
 209 | All right, all right!
 210 | It's been 14 1/2 minutes...
 211 | 
 212 | 49
 213 | 00:03:48,685 --> 00:03:51,358
 214 | ...and you still haven 't said
 215 | one word. Oh, God!
 216 | 
 217 | 50
 218 | 00:03:51,645 --> 00:03:55,194
 219 | Do something!
 220 | Just make contact. Smile!
 221 | 
 222 | 51
 223 | 00:03:58,685 --> 00:04:00,118
 224 | There you go.
 225 | 
 226 | 52
 227 | 00:04:03,805 --> 00:04:05,841
 228 | You're definitely scaring her.
 229 | 
 230 | 53
 231 | 00:04:06,685 --> 00:04:08,516
 232 | Would you like to call somebody?
 233 | 
 234 | 54
 235 | 00:04:09,085 --> 00:04:12,395
 236 | Yeah, about 300 guys I went
 237 | to high school with.
 238 | 
 239 | 55
 240 | 00:04:17,605 --> 00:04:19,197
 241 | Yeah, thanks.
 242 | 
 243 | 56
 244 | 00:04:23,605 --> 00:04:24,401
 245 | It's me.
 246 | 
 247 | 57
 248 | 00:04:24,605 --> 00:04:25,515
 249 | It's Chandler.
 250 | 
 251 | 58
 252 | 00:04:25,765 --> 00:04:26,754
 253 | Are you okay?
 254 | 
 255 | 59
 256 | 00:04:27,005 --> 00:04:28,404
 257 | I'm fine.
 258 | 
 259 | 60
 260 | 00:04:28,765 --> 00:04:32,394
 261 | I'm stuck in an ATM vestibule. . .
 262 | 
 263 | 61
 264 | 00:04:33,565 --> 00:04:36,523
 265 | . . .with Jill Goodacre.
 266 | 
 267 | 62
 268 | 00:04:37,365 --> 00:04:38,161
 269 | What?
 270 | 
 271 | 63
 272 | 00:04:38,405 --> 00:04:40,202
 273 | I'm stuck. . .
 274 | 
 275 | 64
 276 | 00:04:40,885 --> 00:04:43,479
 277 | . . .in an ATM vestibule. . .
 278 | 
 279 | 65
 280 | 00:04:43,685 --> 00:04:45,801
 281 | . . .with Jill Goodacre.
 282 | 
 283 | 66
 284 | 00:04:47,085 --> 00:04:48,837
 285 | I have no idea what you said.
 286 | 
 287 | 67
 288 | 00:04:49,085 --> 00:04:50,882
 289 | Put Joey on the phone.
 290 | 
 291 | 68
 292 | 00:04:54,085 --> 00:04:54,961
 293 | What's up, man?
 294 | 
 295 | 69
 296 | 00:04:55,205 --> 00:04:56,433
 297 | I'm stuck. . .
 298 | 
 299 | 70
 300 | 00:04:57,085 --> 00:04:59,645
 301 | . . .in an ATM vestibule. . .
 302 | 
 303 | 71
 304 | 00:05:00,205 --> 00:05:02,799
 305 | . . .with Jill Goodacre!
 306 | 
 307 | 72
 308 | 00:05:04,045 --> 00:05:05,034
 309 | Oh, my God!
 310 | 
 311 | 73
 312 | 00:05:05,285 --> 00:05:08,436
 313 | He's trapped in an ATM vestibule
 314 | with Jill Goodacre!
 315 | 
 316 | 74
 317 | 00:05:10,485 --> 00:05:11,474
 318 | Chandler, listen.
 319 | 
 320 | 75
 321 | 00:05:15,365 --> 00:05:18,004
 322 | Like that thought
 323 | never entered my mind.
 324 | 
 325 | 76
 326 | 00:05:21,485 --> 00:05:22,759
 327 | Come on, somebody.
 328 | 
 329 | 77
 330 | 00:05:22,965 --> 00:05:24,239
 331 | I'll go. I'll go.
 332 | 
 333 | 78
 334 | 00:05:24,445 --> 00:05:28,484
 335 | Senior year of college,
 336 | on a pool table.
 337 | 
 338 | 79
 339 | 00:05:28,685 --> 00:05:30,118
 340 | Pool table?
 341 | 
 342 | 80
 343 | 00:05:30,365 --> 00:05:32,003
 344 | That's my sister.
 345 | 
 346 | 81
 347 | 00:05:34,085 --> 00:05:36,804
 348 | My weirdest place would have to be. . .
 349 | 
 350 | 82
 351 | 00:05:37,045 --> 00:05:41,402
 352 | . . .the women's room on the second floor
 353 | of the New York City Public Library.
 354 | 
 355 | 83
 356 | 00:05:41,605 --> 00:05:44,403
 357 | Oh, my God! What were you
 358 | doing in a library?
 359 | 
 360 | 84
 361 | 00:05:46,445 --> 00:05:48,003
 362 | Pheebs, what about you?
 363 | 
 364 | 85
 365 | 00:05:48,805 --> 00:05:50,204
 366 | Milwaukee.
 367 | 
 368 | 86
 369 | 00:05:54,365 --> 00:05:56,083
 370 | Ross?
 371 | 
 372 | 87
 373 | 00:05:57,485 --> 00:06:00,955
 374 | Disneyland, 1989.
 375 | ''It's a small world after all. ''
 376 | 
 377 | 88
 378 | 00:06:03,245 --> 00:06:04,280
 379 | No way!
 380 | 
 381 | 89
 382 | 00:06:04,525 --> 00:06:05,799
 383 | The ride broke down. . .
 384 | 
 385 | 90
 386 | 00:06:06,005 --> 00:06:09,964
 387 | . . .so Carol and I went behind a couple
 388 | of those mechanical Dutch children.
 389 | 
 390 | 91
 391 | 00:06:11,365 --> 00:06:15,597
 392 | They fixed the ride, and we were asked
 393 | never to return to the Magic Kingdom.
 394 | 
 395 | 92
 396 | 00:06:16,885 --> 00:06:17,840
 397 | Rachel?
 398 | 
 399 | 93
 400 | 00:06:18,045 --> 00:06:19,194
 401 | I already went.
 402 | 
 403 | 94
 404 | 00:06:19,445 --> 00:06:21,242
 405 | -You did not go.
 406 | -Yes, I did.
 407 | 
 408 | 95
 409 | 00:06:21,445 --> 00:06:22,958
 410 | Tell us. Come on.
 411 | 
 412 | 96
 413 | 00:06:24,205 --> 00:06:27,800
 414 | The weirdest place would have to be. . .
 415 | 
 416 | 97
 417 | 00:06:30,205 --> 00:06:32,002
 418 | . . .the foot of the bed.
 419 | 
 420 | 98
 421 | 00:06:33,685 --> 00:06:36,119
 422 | -Step back!
 423 | -We have a winner!
 424 | 
 425 | 99
 426 | 00:06:42,965 --> 00:06:46,116
 427 | I've never had a relationship
 428 | with that kind of passion.
 429 | 
 430 | 100
 431 | 00:06:46,365 --> 00:06:50,278
 432 | Where you have to have somebody right
 433 | there in the middle of a theme park.
 434 | 
 435 | 101
 436 | 00:06:53,285 --> 00:06:56,561
 437 | It was the only thing to do there
 438 | that didn't have a line.
 439 | 
 440 | 102
 441 | 00:06:58,245 --> 00:07:02,124
 442 | Barry wouldn't even kiss me
 443 | on a miniature golf course.
 444 | 
 445 | 103
 446 | 00:07:02,325 --> 00:07:03,155
 447 | Come on.
 448 | 
 449 | 104
 450 | 00:07:03,365 --> 00:07:05,959
 451 | He said we were holding up
 452 | the people behind us.
 453 | 
 454 | 105
 455 | 00:07:06,845 --> 00:07:08,881
 456 | And you didn't marry him because. . .?
 457 | 
 458 | 106
 459 | 00:07:10,685 --> 00:07:14,121
 460 | Do you think there are people
 461 | that go through life. . .
 462 | 
 463 | 107
 464 | 00:07:14,365 --> 00:07:16,674
 465 | . . .never having that kind of. . .?
 466 | 
 467 | 108
 468 | 00:07:16,925 --> 00:07:18,438
 469 | -Probably.
 470 | -Really?
 471 | 
 472 | 109
 473 | 00:07:18,685 --> 00:07:21,836
 474 | I'll tell you something.
 475 | Passion is way overrated.
 476 | 
 477 | 110
 478 | 00:07:22,085 --> 00:07:23,484
 479 | -Yeah, right.
 480 | -It is.
 481 | 
 482 | 111
 483 | 00:07:24,165 --> 00:07:26,315
 484 | Eventually, it kind of burns out.
 485 | 
 486 | 112
 487 | 00:07:26,565 --> 00:07:31,161
 488 | But hopefully what you're left
 489 | with is trust and security and. . . .
 490 | 
 491 | 113
 492 | 00:07:31,365 --> 00:07:33,276
 493 | In my ex-wife's case,
 494 | lesbianism.
 495 | 
 496 | 114
 497 | 00:07:38,885 --> 00:07:42,434
 498 | For all those people who miss out
 499 | on that passion thing. . .
 500 | 
 501 | 115
 502 | 00:07:42,685 --> 00:07:44,198
 503 | . . .there's other good stuff.
 504 | 
 505 | 116
 506 | 00:07:47,445 --> 00:07:49,322
 507 | But I don't think that'll be you.
 508 | 
 509 | 117
 510 | 00:07:50,245 --> 00:07:51,360
 511 | You don't?
 512 | 
 513 | 118
 514 | 00:07:53,765 --> 00:07:55,915
 515 | I see big passion in your future.
 516 | 
 517 | 119
 518 | 00:07:56,125 --> 00:07:56,921
 519 | Really?
 520 | 
 521 | 120
 522 | 00:07:57,845 --> 00:07:59,119
 523 | -You do?
 524 | -I do.
 525 | 
 526 | 121
 527 | 00:07:59,845 --> 00:08:01,198
 528 | Ross, you're so great.
 529 | 
 530 | 122
 531 | 00:08:11,885 --> 00:08:14,001
 532 | It's never gonna happen.
 533 | 
 534 | 123
 535 | 00:08:20,605 --> 00:08:21,515
 536 | What?
 537 | 
 538 | 124
 539 | 00:08:21,765 --> 00:08:22,595
 540 | You and Rachel.
 541 | 
 542 | 125
 543 | 00:08:31,565 --> 00:08:32,600
 544 | Why not?
 545 | 
 546 | 126
 547 | 00:08:34,645 --> 00:08:36,954
 548 | Because you waited too long
 549 | to make your move. . .
 550 | 
 551 | 127
 552 | 00:08:37,165 --> 00:08:39,998
 553 | . . .and now you're in the ''Friend Zone. ''
 554 | 
 555 | 128
 556 | 00:08:41,885 --> 00:08:42,954
 557 | I'm not in the Zone.
 558 | 
 559 | 129
 560 | 00:08:43,205 --> 00:08:45,161
 561 | No, Ross.
 562 | You're mayor of the Zone.
 563 | 
 564 | 130
 565 | 00:08:47,045 --> 00:08:49,400
 566 | Look, I'm taking my time, all right?
 567 | 
 568 | 131
 569 | 00:08:49,605 --> 00:08:51,402
 570 | I'm laying the groundwork.
 571 | 
 572 | 132
 573 | 00:08:51,605 --> 00:08:54,073
 574 | Every day I get
 575 | a little bit closer to. . . .
 576 | 
 577 | 133
 578 | 00:08:54,325 --> 00:08:55,394
 579 | Priesthood!
 580 | 
 581 | 134
 582 | 00:08:57,605 --> 00:09:00,802
 583 | I'm telling you, she has
 584 | no idea what you're thinking.
 585 | 
 586 | 135
 587 | 00:09:01,005 --> 00:09:05,078
 588 | If you don't ask her out soon,
 589 | you'll end up stuck in the Zone forever.
 590 | 
 591 | 136
 592 | 00:09:05,325 --> 00:09:06,724
 593 | I will, I will.
 594 | 
 595 | 137
 596 | 00:09:06,965 --> 00:09:09,195
 597 | I'm waiting for the right moment.
 598 | 
 599 | 138
 600 | 00:09:11,805 --> 00:09:12,999
 601 | What?
 602 | 
 603 | 139
 604 | 00:09:13,685 --> 00:09:14,481
 605 | What, now?
 606 | 
 607 | 140
 608 | 00:09:17,445 --> 00:09:18,719
 609 | What's messing you up?
 610 | The wine?
 611 | 
 612 | 141
 613 | 00:09:18,965 --> 00:09:21,604
 614 | The candles? The moonlight?
 615 | 
 616 | 142
 617 | 00:09:21,805 --> 00:09:25,195
 618 | You gotta go up to her and say,
 619 | ''Look, Rachel, I think--''
 620 | 
 621 | 143
 622 | 00:09:27,165 --> 00:09:28,917
 623 | -What are we shushing?
 624 | -We're shushing. . .
 625 | 
 626 | 144
 627 | 00:09:29,485 --> 00:09:32,477
 628 | . . .because we're trying to
 629 | hear something.
 630 | 
 631 | 145
 632 | 00:09:32,725 --> 00:09:33,999
 633 | What?
 634 | 
 635 | 146
 636 | 00:09:34,485 --> 00:09:35,998
 637 | Don't you hear that?
 638 | 
 639 | 147
 640 | 00:09:41,125 --> 00:09:42,399
 641 | See?
 642 | 
 643 | 148
 644 | 00:09:47,085 --> 00:09:48,438
 645 | Would you like some gum?
 646 | 
 647 | 149
 648 | 00:09:48,685 --> 00:09:49,561
 649 | Is it sugarless?
 650 | 
 651 | 150
 652 | 00:09:52,005 --> 00:09:54,041
 653 | -Sorry, it's not.
 654 | -Then, no thanks.
 655 | 
 656 | 151
 657 | 00:09:54,285 --> 00:09:55,604
 658 | What the hell was that?
 659 | 
 660 | 152
 661 | 00:09:58,005 --> 00:10:01,839
 662 | Mental note: If Jill Goodacre
 663 | offers you gum, you take it.
 664 | 
 665 | 153
 666 | 00:10:02,085 --> 00:10:05,998
 667 | If she offers you mangled
 668 | animal carcass, you take it!
 669 | 
 670 | 154
 671 | 00:10:08,245 --> 00:10:11,317
 672 | New York City has no power
 673 | 
 674 | 155
 675 | 00:10:11,565 --> 00:10:15,001
 676 | And the milk is getting sour
 677 | 
 678 | 156
 679 | 00:10:15,245 --> 00:10:18,442
 680 | But to me it is not scary
 681 | 
 682 | 157
 683 | 00:10:18,685 --> 00:10:22,598
 684 | 'Cause I stay away from dairy
 685 | 
 686 | 158
 687 | 00:10:31,485 --> 00:10:32,804
 688 | -Here goes.
 689 | -You'll do it?
 690 | 
 691 | 159
 692 | 00:10:33,005 --> 00:10:34,404
 693 | -I'll do it.
 694 | -Want help?
 695 | 
 696 | 160
 697 | 00:10:34,645 --> 00:10:36,795
 698 | You come out there,
 699 | you're a dead man.
 700 | 
 701 | 161
 702 | 00:10:39,365 --> 00:10:40,320
 703 | Good luck, man.
 704 | 
 705 | 162
 706 | 00:10:41,205 --> 00:10:42,399
 707 | Thanks.
 708 | 
 709 | 163
 710 | 00:10:51,285 --> 00:10:52,843
 711 | -Where you going?
 712 | -Outside.
 713 | 
 714 | 164
 715 | 00:10:53,045 --> 00:10:54,000
 716 | You can't!
 717 | 
 718 | 165
 719 | 00:10:54,645 --> 00:10:55,441
 720 | Why not?
 721 | 
 722 | 166
 723 | 00:10:55,685 --> 00:10:57,721
 724 | Because of the reason.
 725 | 
 726 | 167
 727 | 00:10:59,885 --> 00:11:00,681
 728 | That would be. . .?
 729 | 
 730 | 168
 731 | 00:11:02,285 --> 00:11:03,081
 732 | I can't tell you.
 733 | 
 734 | 169
 735 | 00:11:03,325 --> 00:11:04,360
 736 | What's going on?
 737 | 
 738 | 170
 739 | 00:11:05,885 --> 00:11:09,878
 740 | Listen. You gotta promise you'll
 741 | never, ever tell Ross that I told you.
 742 | 
 743 | 171
 744 | 00:11:10,085 --> 00:11:12,838
 745 | -About what?
 746 | -He's planning your birthday party.
 747 | 
 748 | 172
 749 | 00:11:14,485 --> 00:11:16,237
 750 | Oh, my God! I love him!
 751 | 
 752 | 173
 753 | 00:11:16,485 --> 00:11:17,884
 754 | You better act surprised.
 755 | 
 756 | 174
 757 | 00:11:18,125 --> 00:11:18,921
 758 | About what?
 759 | 
 760 | 175
 761 | 00:11:19,165 --> 00:11:20,314
 762 | My surprise party.
 763 | 
 764 | 176
 765 | 00:11:20,525 --> 00:11:24,404
 766 | -What surprise party?
 767 | -Oh, stop it. Joey already told me.
 768 | 
 769 | 177
 770 | 00:11:25,005 --> 00:11:26,438
 771 | Well, he didn't tell me.
 772 | 
 773 | 178
 774 | 00:11:27,085 --> 00:11:29,838
 775 | Don't look at me.
 776 | This is Ross' thing.
 777 | 
 778 | 179
 779 | 00:11:30,085 --> 00:11:33,236
 780 | This is so typical. I'm always
 781 | the last to know everything.
 782 | 
 783 | 180
 784 | 00:11:33,485 --> 00:11:35,316
 785 | You are not. We tell you stuff.
 786 | 
 787 | 181
 788 | 00:11:36,205 --> 00:11:40,642
 789 | I was the last one to know when Chandler
 790 | got bit by the peacock at the zoo.
 791 | 
 792 | 182
 793 | 00:11:41,365 --> 00:11:45,199
 794 | I was the last to know you had a crush
 795 | on Joey when he was moving in.
 796 | 
 797 | 183
 798 | 00:11:47,245 --> 00:11:49,076
 799 | Looks like I was second to last.
 800 | 
 801 | 184
 802 | 00:11:54,485 --> 00:11:56,203
 803 | It's so nice.
 804 | 
 805 | 185
 806 | 00:12:00,645 --> 00:12:02,397
 807 | I have a question.
 808 | 
 809 | 186
 810 | 00:12:04,005 --> 00:12:06,439
 811 | Actually, it's not
 812 | so much a question. . .
 813 | 
 814 | 187
 815 | 00:12:06,685 --> 00:12:09,199
 816 | . . .it's more of a general wondering. . . .
 817 | 
 818 | 188
 819 | 00:12:10,165 --> 00:12:11,393
 820 | --ment.
 821 | 
 822 | 189
 823 | 00:12:14,485 --> 00:12:15,998
 824 | Here goes.
 825 | 
 826 | 190
 827 | 00:12:18,565 --> 00:12:22,194
 828 | Well, for a while now,
 829 | I've been wanting to. . . .
 830 | 
 831 | 191
 832 | 00:12:23,285 --> 00:12:24,923
 833 | Yes, that's right.
 834 | 
 835 | 192
 836 | 00:12:25,125 --> 00:12:27,355
 837 | Look at that little kitty!
 838 | 
 839 | 193
 840 | 00:12:30,845 --> 00:12:35,236
 841 | I'm on the top of the world
 842 | Looking down on creation
 843 | 
 844 | 194
 845 | 00:12:35,485 --> 00:12:40,400
 846 | And the only explanation I can find
 847 | 
 848 | 195
 849 | 00:12:40,885 --> 00:12:45,561
 850 | Is the love that I found
 851 | Ever since you've been around
 852 | 
 853 | 196
 854 | 00:12:47,285 --> 00:12:50,004
 855 | All right, this is just Bactine.
 856 | It won't hurt.
 857 | 
 858 | 197
 859 | 00:12:53,165 --> 00:12:55,395
 860 | Sorry, that was wax.
 861 | 
 862 | 198
 863 | 00:12:57,525 --> 00:13:00,642
 864 | Poor little Tootie's scared.
 865 | We should find his owner.
 866 | 
 867 | 199
 868 | 00:13:00,885 --> 00:13:03,115
 869 | Why not put ''poor little Tootie''
 870 | in the hall?
 871 | 
 872 | 200
 873 | 00:13:04,245 --> 00:13:06,884
 874 | During a blackout?
 875 | She'll get trampled.
 876 | 
 877 | 201
 878 | 00:13:11,085 --> 00:13:13,599
 879 | On second thought,
 880 | gum would be perfection.
 881 | 
 882 | 202
 883 | 00:13:15,005 --> 00:13:16,563
 884 | Gum would be perfection?
 885 | 
 886 | 203
 887 | 00:13:17,725 --> 00:13:19,795
 888 | Gum would be perfection.
 889 | 
 890 | 204
 891 | 00:13:20,005 --> 00:13:23,441
 892 | Could've said, ''Gum would be nice. ''
 893 | ''I'll have a stick. '' But, no.
 894 | 
 895 | 205
 896 | 00:13:23,685 --> 00:13:26,757
 897 | For me, gum is perfection.
 898 | 
 899 | 206
 900 | 00:13:27,805 --> 00:13:30,558
 901 | I loathe myself!
 902 | 
 903 | 207
 904 | 00:13:33,005 --> 00:13:36,395
 905 | Oh, no, the Mellons.
 906 | They hate all living things, right?
 907 | 
 908 | 208
 909 | 00:13:42,885 --> 00:13:44,284
 910 | We just found this cat. . .
 911 | 
 912 | 209
 913 | 00:13:44,525 --> 00:13:45,958
 914 | . . .and we're looking for the owner.
 915 | 
 916 | 210
 917 | 00:13:47,685 --> 00:13:48,481
 918 | It's mine.
 919 | 
 920 | 211
 921 | 00:13:49,885 --> 00:13:52,319
 922 | He seems to hate you.
 923 | Are you sure?
 924 | 
 925 | 212
 926 | 00:13:52,565 --> 00:13:54,237
 927 | It's my cat. Give me my cat.
 928 | 
 929 | 213
 930 | 00:13:55,165 --> 00:13:56,803
 931 | Wait a minute. What's his name?
 932 | 
 933 | 214
 934 | 00:13:57,765 --> 00:13:58,754
 935 | Bob-- Buttons.
 936 | 
 937 | 215
 938 | 00:14:00,085 --> 00:14:00,995
 939 | Bob Buttons?
 940 | 
 941 | 216
 942 | 00:14:01,685 --> 00:14:03,562
 943 | Bob Buttons.
 944 | Here, Bob Buttons.
 945 | 
 946 | 217
 947 | 00:14:05,445 --> 00:14:08,005
 948 | You are a very bad man!
 949 | 
 950 | 218
 951 | 00:14:09,365 --> 00:14:11,003
 952 | You owe me a cat.
 953 | 
 954 | 219
 955 | 00:14:15,285 --> 00:14:17,003
 956 | Here, kitty, kitty, kitty.
 957 | 
 958 | 220
 959 | 00:14:19,245 --> 00:14:21,805
 960 | Where did you go,
 961 | little kitty, kitty?
 962 | 
 963 | 221
 964 | 00:14:38,445 --> 00:14:40,197
 965 | Come on, lucky sixes!
 966 | 
 967 | 222
 968 | 00:14:41,085 --> 00:14:43,724
 969 | Everybody, this is Paolo.
 970 | 
 971 | 223
 972 | 00:14:44,245 --> 00:14:46,201
 973 | Paolo, I want you to meet my friends.
 974 | 
 975 | 224
 976 | 00:14:46,445 --> 00:14:48,242
 977 | This is Monica. . .
 978 | 
 979 | 225
 980 | 00:14:49,285 --> 00:14:50,798
 981 | . . .and Joey. . .
 982 | 
 983 | 226
 984 | 00:14:52,165 --> 00:14:53,803
 985 | . . .and Ross.
 986 | 
 987 | 227
 988 | 00:15:01,005 --> 00:15:03,200
 989 | He doesn't speak much English.
 990 | 
 991 | 228
 992 | 00:15:04,405 --> 00:15:06,202
 993 | Monopoly!
 994 | 
 995 | 229
 996 | 00:15:07,605 --> 00:15:09,323
 997 | Look at that!
 998 | 
 999 | 230
1000 | 00:15:12,405 --> 00:15:13,838
1001 | Where did Paolo come from?
1002 | 
1003 | 231
1004 | 00:15:14,685 --> 00:15:16,277
1005 | Italy, I think.
1006 | 
1007 | 232
1008 | 00:15:16,565 --> 00:15:20,274
1009 | No, I mean tonight, in the building?
1010 | Suddenly into our lives?
1011 | 
1012 | 233
1013 | 00:15:21,565 --> 00:15:25,524
1014 | The cat turned out to be
1015 | Paolo's cat. Isn't that funny?
1016 | 
1017 | 234
1018 | 00:15:27,285 --> 00:15:30,402
1019 | That is funny. And Rachel
1020 | keeps touching him.
1021 | 
1022 | 235
1023 | 00:15:33,045 --> 00:15:37,004
1024 | I looked all over the building
1025 | and I couldn't find the kitty anywhere.
1026 | 
1027 | 236
1028 | 00:15:37,245 --> 00:15:39,554
1029 | I found him.
1030 | It was Paolo's cat.
1031 | 
1032 | 237
1033 | 00:15:40,285 --> 00:15:43,004
1034 | Well, there you go.
1035 | Last to know again.
1036 | 
1037 | 238
1038 | 00:15:44,045 --> 00:15:47,196
1039 | And I'm guessing, since nobody
1040 | told me, this is Paolo?
1041 | 
1042 | 239
1043 | 00:15:47,965 --> 00:15:50,081
1044 | Paolo, this is Phoebe.
1045 | 
1046 | 240
1047 | 00:15:55,445 --> 00:15:56,639
1048 | You betcha!
1049 | 
1050 | 241
1051 | 00:16:00,245 --> 00:16:03,157
1052 | All right. Okay. What next?
1053 | 
1054 | 242
1055 | 00:16:03,605 --> 00:16:06,438
1056 | Blow a bubble. A bubble's good.
1057 | 
1058 | 243
1059 | 00:16:06,645 --> 00:16:09,239
1060 | It's got a boyish charm.
1061 | It's impish.
1062 | 
1063 | 244
1064 | 00:16:09,445 --> 00:16:10,798
1065 | Here we go.
1066 | 
1067 | 245
1068 | 00:16:21,885 --> 00:16:23,398
1069 | Nice going, imp!
1070 | 
1071 | 246
1072 | 00:16:24,165 --> 00:16:26,520
1073 | It's okay. All I need to do is...
1074 | 
1075 | 247
1076 | 00:16:26,725 --> 00:16:29,683
1077 | ...reach over and put it
1078 | back in my mouth.
1079 | 
1080 | 248
1081 | 00:16:32,765 --> 00:16:36,235
1082 | Good save!
1083 | We're back on track and I'm...
1084 | 
1085 | 249
1086 | 00:16:36,845 --> 00:16:39,484
1087 | ...chewing someone else's gum.
1088 | 
1089 | 250
1090 | 00:16:40,845 --> 00:16:42,244
1091 | This is not my gum!
1092 | 
1093 | 251
1094 | 00:16:42,445 --> 00:16:44,242
1095 | Oh, my God! Oh, my God!
1096 | 
1097 | 252
1098 | 00:16:45,165 --> 00:16:46,996
1099 | And now you're choking.
1100 | 
1101 | 253
1102 | 00:16:49,085 --> 00:16:49,915
1103 | You all right?
1104 | 
1105 | 254
1106 | 00:16:52,085 --> 00:16:53,803
1107 | Oh, my God!
1108 | You're choking!
1109 | 
1110 | 255
1111 | 00:16:58,765 --> 00:16:59,993
1112 | Better?
1113 | 
1114 | 256
1115 | 00:17:01,645 --> 00:17:03,397
1116 | Thank you. That was. . . .
1117 | 
1118 | 257
1119 | 00:17:04,005 --> 00:17:04,835
1120 | That was. . . .
1121 | 
1122 | 258
1123 | 00:17:05,605 --> 00:17:06,435
1124 | Perfection?
1125 | 
1126 | 259
1127 | 00:17:28,285 --> 00:17:30,003
1128 | What'd he say that was so funny?
1129 | 
1130 | 260
1131 | 00:17:30,245 --> 00:17:32,805
1132 | I have absolutely no idea.
1133 | 
1134 | 261
1135 | 00:17:33,245 --> 00:17:35,122
1136 | That's classic.
1137 | 
1138 | 262
1139 | 00:17:35,365 --> 00:17:37,242
1140 | My God, you guys!
1141 | What am I doing?
1142 | 
1143 | 263
1144 | 00:17:37,485 --> 00:17:39,237
1145 | This is so un-me!
1146 | 
1147 | 264
1148 | 00:17:39,485 --> 00:17:40,918
1149 | If you want, I'll do it.
1150 | 
1151 | 265
1152 | 00:17:43,205 --> 00:17:45,719
1153 | I just wanna bite his bottom lip.
1154 | 
1155 | 266
1156 | 00:17:46,685 --> 00:17:48,004
1157 | But I won't.
1158 | 
1159 | 267
1160 | 00:17:49,205 --> 00:17:51,196
1161 | The first time he smiled at me. . .
1162 | 
1163 | 268
1164 | 00:17:51,445 --> 00:17:55,358
1165 | . . .those seconds were more exciting
1166 | than three weeks in Bermuda with Barry.
1167 | 
1168 | 269
1169 | 00:17:55,605 --> 00:17:56,924
1170 | Did you rent mopeds?
1171 | 
1172 | 270
1173 | 00:17:59,005 --> 00:18:00,404
1174 | Because I've heard--
1175 | 
1176 | 271
1177 | 00:18:01,005 --> 00:18:03,155
1178 | Oh, it's not about that right now.
1179 | 
1180 | 272
1181 | 00:18:03,885 --> 00:18:07,195
1182 | I know it's totally superficial,
1183 | and we have nothing in common. . .
1184 | 
1185 | 273
1186 | 00:18:07,445 --> 00:18:09,834
1187 | . . . .and we don't even speak
1188 | the same language. . .
1189 | 
1190 | 274
1191 | 00:18:10,045 --> 00:18:11,478
1192 | . . .but God!
1193 | 
1194 | 275
1195 | 00:18:18,685 --> 00:18:20,004
1196 | Listen. . . .
1197 | 
1198 | 276
1199 | 00:18:23,645 --> 00:18:27,001
1200 | Listen, something you should know.
1201 | 
1202 | 277
1203 | 00:18:27,845 --> 00:18:30,234
1204 | Rachel and I are. . . .
1205 | 
1206 | 278
1207 | 00:18:30,485 --> 00:18:32,794
1208 | We're kind of a thing.
1209 | 
1210 | 279
1211 | 00:18:33,285 --> 00:18:35,845
1212 | -Thing?
1213 | -Thing. Yes, thing.
1214 | 
1215 | 280
1216 | 00:18:36,205 --> 00:18:38,241
1217 | You have the sex?
1218 | 
1219 | 281
1220 | 00:18:41,045 --> 00:18:45,197
1221 | Technically, the sex is not being had.
1222 | But that's. . . .
1223 | 
1224 | 282
1225 | 00:18:45,885 --> 00:18:47,284
1226 | That's not the point.
1227 | 
1228 | 283
1229 | 00:18:48,045 --> 00:18:52,277
1230 | The point is that
1231 | Rachel and I should be. . . .
1232 | 
1233 | 284
1234 | 00:18:53,045 --> 00:18:55,605
1235 | Well, Rachel and I
1236 | should be together.
1237 | 
1238 | 285
1239 | 00:18:55,805 --> 00:18:57,602
1240 | And if you get in the. . . .
1241 | 
1242 | 286
1243 | 00:18:58,205 --> 00:18:59,399
1244 | In bed?
1245 | 
1246 | 287
1247 | 00:19:01,765 --> 00:19:03,357
1248 | No, not where I was going.
1249 | 
1250 | 288
1251 | 00:19:03,565 --> 00:19:08,320
1252 | If you get in the way of us
1253 | becoming a thing. . .
1254 | 
1255 | 289
1256 | 00:19:08,565 --> 00:19:11,284
1257 | . . .then I would be, well, very sad.
1258 | 
1259 | 290
1260 | 00:19:13,285 --> 00:19:14,354
1261 | So do you. . .?
1262 | 
1263 | 291
1264 | 00:19:20,645 --> 00:19:22,283
1265 | You do know a little English?
1266 | 
1267 | 292
1268 | 00:19:22,525 --> 00:19:23,401
1269 | Little.
1270 | 
1271 | 293
1272 | 00:19:24,005 --> 00:19:26,121
1273 | Do you know the word crap-weasel?
1274 | 
1275 | 294
1276 | 00:19:28,485 --> 00:19:30,999
1277 | That's funny,
1278 | you are a huge crap-weasel.
1279 | 
1280 | 295
1281 | 00:19:36,085 --> 00:19:39,555
1282 | Chandler, we've been here for an hour
1283 | doing this. Watch, it's easy.
1284 | 
1285 | 296
1286 | 00:19:39,765 --> 00:19:40,993
1287 | Ready?
1288 | 
1289 | 297
1290 | 00:19:45,085 --> 00:19:46,404
1291 | Now try it.
1292 | 
1293 | 298
1294 | 00:19:48,005 --> 00:19:49,597
1295 | You gotta whip it.
1296 | 
1297 | 299
1298 | 00:19:54,845 --> 00:19:57,996
1299 | Look, look! The last candle's
1300 | about to burn out.
1301 | 
1302 | 300
1303 | 00:19:58,245 --> 00:20:02,238
1304 | 1 0, 9, 8, 7. . .
1305 | 
1306 | 301
1307 | 00:20:02,485 --> 00:20:06,683
1308 | . . .minus 46, minus 47,
1309 | minus 48. . . .
1310 | 
1311 | 302
1312 | 00:20:07,805 --> 00:20:09,796
1313 | -Thank you.
1314 | -Thanks.
1315 | 
1316 | 303
1317 | 00:20:10,965 --> 00:20:13,843
1318 | Kind of spooky without any lights.
1319 | 
1320 | 304
1321 | 00:20:21,045 --> 00:20:23,605
1322 | Okay, guys, I have the definitive one.
1323 | 
1324 | 305
1325 | 00:20:36,965 --> 00:20:39,320
1326 | This isn't the best time
1327 | to bring it up. . .
1328 | 
1329 | 306
1330 | 00:20:39,565 --> 00:20:41,999
1331 | . . .but you have to throw
1332 | a party for Monica.
1333 | 
1334 | 307
1335 | 00:20:50,765 --> 00:20:52,562
1336 | This has been fun.
1337 | 
1338 | 308
1339 | 00:20:52,805 --> 00:20:56,036
1340 | Yes. Thanks for letting me
1341 | use your phone. . .
1342 | 
1343 | 309
1344 | 00:20:56,445 --> 00:20:58,242
1345 | . . .and for saving my life.
1346 | 
1347 | 310
1348 | 00:20:59,685 --> 00:21:01,118
1349 | Well, goodbye, Chandler.
1350 | 
1351 | 311
1352 | 00:21:01,365 --> 00:21:03,276
1353 | I had a great blackout.
1354 | 
1355 | 312
1356 | 00:21:04,205 --> 00:21:05,399
1357 | See you.
1358 | 
1359 | 313
1360 | 00:21:21,405 --> 00:21:26,001
1361 | I'm account number 7143457. And I don't
1362 | know if you got any of that. . .
1363 | 
1364 | 314
1365 | 00:21:26,205 --> 00:21:29,595
1366 | . . .but I would really
1367 | like a copy of the tape.
1368 | 
1369 | 
1370 | 
1371 | 9999
1372 | 00:00:0,500 --> 00:00:2,00
1373 | <font color="#ffff00" size=14>www.tvsubtitles.net</font>
1374 | 


--------------------------------------------------------------------------------