├── README.md ├── Rank 1: Rock n Rolla ├── README.md ├── ensemble.py ├── getOutcome.py ├── model_10.R ├── finalModel_v3.py └── DataExploration.ipynb ├── Rank 3: sonny └── Final_Model_Sonny_Knoctober_Submited_to_AV.R └── Rank 2: Nut Crackers └── final_model.R /README.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Rank 1: Rock n Rolla /README.md: -------------------------------------------------------------------------------- 1 | Codes and Files used for AV [Data hack - Knocktober](https://datahack.analyticsvidhya.com/contest/knocktober-2016/) 2 | 3 | The leaderboard can be accessed [here](https://datahack.analyticsvidhya.com/contest/knocktober-2016/lb) 4 | 5 | The code file - vopani_final.R is written by Rohan Rao. 6 | 7 | The code files getOutcome.py, finalModel_v3.py, ensemble.py are written by Sudalai Rajkumar. 8 | 9 | 10 | -------------------------------------------------------------------------------- /Rank 1: Rock n Rolla /ensemble.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | 4 | path = "../input/Train/" 5 | s1 = pd.read_csv(path+"model_10.csv") 6 | s2 = pd.read_csv(path+"sub35.csv") 7 | 8 | s1 = s1.merge(s2, on=['Patient_ID','Health_Camp_ID'], how='left') 9 | print s1.columns 10 | print np.corrcoef(s1.Outcome_x.values, s1.Outcome_y.values) 11 | 12 | s1["Outcome"] = (0.48*s1.Outcome_x.values + 0.52*s1.Outcome_y.values) 13 | s1.drop(["Outcome_x", "Outcome_y"], axis=1, inplace=True) 14 | s1.to_csv("final.csv", index=False) 15 | 16 | -------------------------------------------------------------------------------- /Rank 1: Rock n Rolla /getOutcome.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | data_path = "../input/Train/" 5 | first_camp = pd.read_csv( data_path + "First_Health_Camp_Attended.csv" ) 6 | second_camp = pd.read_csv( data_path + "Second_Health_Camp_Attended.csv" ) 7 | third_camp = pd.read_csv( data_path + "Third_Health_Camp_Attended.csv" ) 8 | print first_camp.shape, second_camp.shape, third_camp.shape 9 | 10 | col_names = [['Patient_ID','Health_Camp_ID','Outcome']] 11 | first_camp = first_camp[['Patient_ID','Health_Camp_ID','Health_Score']] 12 | first_camp.columns = col_names 13 | second_camp = second_camp[['Patient_ID','Health_Camp_ID','Health Score']] 14 | second_camp.columns = col_names 15 | third_camp = third_camp[['Patient_ID','Health_Camp_ID','Number_of_stall_visited']] 16 | third_camp = third_camp[third_camp['Number_of_stall_visited']>0] 17 | third_camp.columns = col_names 18 | print third_camp.shape 19 | 20 | all_camps = pd.concat([first_camp, second_camp, third_camp]) 21 | all_camps['Outcome'] = 1 22 | print all_camps.shape 23 | 24 | train = pd.read_csv(data_path + "Train.csv") 25 | print train.shape 26 | 27 | train = train.merge(all_camps, on=['Patient_ID','Health_Camp_ID'], how='left') 28 | train['Outcome'] = train['Outcome'].fillna(0).astype('int') 29 | train.to_csv(data_path+'train_with_outcome.csv', index=False) 30 | print train.Outcome.value_counts() 31 | -------------------------------------------------------------------------------- /Rank 1: Rock n Rolla /model_10.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | seed <- 235 5 | set.seed(seed) 6 | 7 | 8 | ## loading libraries 9 | library(data.table) 10 | library(xgboost) 11 | 12 | 13 | ## loading data 14 | train <- fread("Train.csv") 15 | test <- fread("Test.csv") 16 | 17 | health_camp <- fread("Health_Camp_Detail.csv") 18 | 19 | health_1 <- fread("First_Health_Camp_Attended.csv") 20 | health_2 <- fread("Second_Health_Camp_Attended.csv") 21 | health_3 <- fread("Third_Health_Camp_Attended.csv") 22 | 23 | health_1[, V5 := NULL] 24 | setnames(health_1, "Health_Score", "Health_Score_1") 25 | setnames(health_2, "Health Score", "Health_Score_2") 26 | 27 | patient <- fread("Patient_Profile.csv") 28 | 29 | train[, train_flag := 1] 30 | test[, train_flag := 0] 31 | 32 | 33 | ## processing data 34 | X_panel <- rbind(train, test) 35 | 36 | X_panel <- merge(X_panel, health_1, all.x = TRUE, by = c("Patient_ID", "Health_Camp_ID")) 37 | X_panel <- merge(X_panel, health_2, all.x = TRUE, by = c("Patient_ID", "Health_Camp_ID")) 38 | X_panel <- merge(X_panel, health_3, all.x = TRUE, by = c("Patient_ID", "Health_Camp_ID")) 39 | 40 | X_panel <- merge(X_panel, health_camp, all.x = TRUE, by = "Health_Camp_ID") 41 | X_panel <- merge(X_panel, patient, all.x = TRUE, by = "Patient_ID") 42 | 43 | X_panel[, target := 0] 44 | 45 | X_panel$target[X_panel$Category1 != "Third" & (X_panel$Health_Score_1 > 0 | X_panel$Health_Score_2 > 0)] <- 1 46 | X_panel$target[X_panel$Category1 == "Third" & X_panel$Number_of_stall_visited > 0] <- 1 47 | 48 | X_panel[, ":="(Registration_Date = as.Date(Registration_Date, "%d-%b-%y"), 49 | Camp_Start_Date = as.Date(Camp_Start_Date, "%d-%b-%y"), 50 | Camp_End_Date = as.Date(Camp_End_Date, "%d-%b-%y"), 51 | First_Interaction = as.Date(First_Interaction, "%d-%b-%y"), 52 | Category1 = as.numeric(as.factor(Category1)), 53 | Category2 = as.numeric(as.factor(Category2)), 54 | City_Type = as.numeric(as.factor(City_Type)), 55 | Income = as.numeric(as.factor(Income)), 56 | Employer_Category = as.numeric(as.factor(Employer_Category)), 57 | Education_Score = as.numeric(Education_Score), 58 | Age = as.numeric(Age))] 59 | 60 | setorder(X_panel, Patient_ID, Registration_Date) 61 | X_panel$order <- seq(1, nrow(X_panel)) 62 | 63 | X_date <- X_panel[, c("Patient_ID", "Registration_Date", "order"), with = FALSE] 64 | X_date$order <- X_date$order + 1 65 | names(X_date)[2] <- "Prev_Date" 66 | 67 | X_panel <- merge(X_panel, X_date, all.x = TRUE, by = c("Patient_ID", "order")) 68 | 69 | X_date$order <- X_date$order - 2 70 | names(X_date)[2] <- "Next_Date" 71 | 72 | X_panel <- merge(X_panel, X_date, all.x = TRUE, by = c("Patient_ID", "order")) 73 | 74 | X_panel[, ":="(Start_Date_Diff = as.numeric(Registration_Date - Camp_Start_Date), 75 | End_Date_Diff = as.numeric(Camp_End_Date - Registration_Date), 76 | Interaction_Date_Diff = as.numeric(Registration_Date - First_Interaction), 77 | Prev_Date_Diff = as.numeric(Registration_Date - Prev_Date), 78 | Next_Date_Diff = as.numeric(Registration_Date - Next_Date), 79 | Camp_Start_Year = year(Camp_Start_Date), 80 | Registration_Year = year(Registration_Date), 81 | Registration_Month = month(Registration_Date), 82 | Registration_Day = wday(Registration_Date))] 83 | 84 | X_panel <- X_panel[Camp_Start_Year >= 2005] 85 | X_panel <- X_panel[!is.na(Registration_Date)] 86 | X_panel <- X_panel[Category3 == 2] 87 | 88 | X_patient <- X_panel[, .(Count_Patient = .N), .(Patient_ID)] 89 | X_panel <- merge(X_panel, X_patient, by = "Patient_ID") 90 | 91 | X_patient_date <- X_panel[, .(Count_Patient_Date = .N), .(Patient_ID, Registration_Date)] 92 | X_panel <- merge(X_panel, X_patient_date, by = c("Patient_ID", "Registration_Date")) 93 | 94 | X_donation <- X_panel[Donation > 0, .(Min_Date_Donation = min(Registration_Date)), .(Patient_ID)] 95 | X_panel <- merge(X_panel, X_donation, all.x = T, by = "Patient_ID") 96 | 97 | X_panel[, Donation_Flag := ifelse(is.na(Min_Date_Donation), 0, ifelse(Registration_Date > Min_Date_Donation, 1, 0))] 98 | 99 | X_train <- X_panel[train_flag == 1] 100 | X_test <- X_panel[train_flag == 0] 101 | 102 | X_features <- c("Count_Patient", "Count_Patient_Date", "Donation_Flag", 103 | "City_Type", "Income", "Education_Score", "Age", 104 | "Category1", "Category2", 105 | "Start_Date_Diff", "End_Date_Diff", "Prev_Date_Diff", "Next_Date_Diff") 106 | X_target <- X_train$target 107 | 108 | xgtrain <- xgb.DMatrix(data = as.matrix(X_train[, X_features, with = FALSE]), label = X_target, missing = NA) 109 | xgtest <- xgb.DMatrix(data = as.matrix(X_test[, X_features, with = FALSE]), missing = NA) 110 | 111 | 112 | ## xgboost 113 | params <- list() 114 | params$objective <- "binary:logistic" 115 | params$eta <- 0.1 116 | params$max_depth <- 5 117 | params$subsample <- 0.9 118 | params$colsample_bytree <- 0.9 119 | params$min_child_weight <- 2 120 | params$eval_metric <- "auc" 121 | 122 | model_xgb_cv <- xgb.cv(params=params, xgtrain, nrounds = 100, nfold = 5, early.stop.round = 30, prediction = TRUE) 123 | 124 | model_xgb <- xgb.train(params = params, xgtrain, nrounds = 100) 125 | 126 | vimp <- xgb.importance(model = model_xgb, feature_names = X_features) 127 | View(vimp) 128 | 129 | 130 | ## submission 131 | pred <- predict(model_xgb, xgtest) 132 | 133 | submit <- data.table(Patient_ID = X_test$Patient_ID, 134 | Health_Camp_ID = X_test$Health_Camp_ID, 135 | Outcome = pred) 136 | 137 | write.csv(submit, "model_10.csv", row.names = FALSE) 138 | -------------------------------------------------------------------------------- /Rank 3: sonny/Final_Model_Sonny_Knoctober_Submited_to_AV.R: -------------------------------------------------------------------------------- 1 | library(readr) 2 | library(dplyr) 3 | library(caret) 4 | library(xgboost) 5 | library(randomForest) 6 | library(lubridate) 7 | library(gbm) 8 | 9 | train <- read_csv("Train.csv") 10 | H1 <- read_csv("First_Health_Camp_Attended.csv") 11 | H1$"NA" <- NULL 12 | 13 | H2 <- read_csv("Second_Health_Camp_Attended.csv") 14 | H3 <- read_csv("Third_Health_Camp_Attended.csv") 15 | 16 | Camp_details <- read_csv("Health_Camp_Detail.csv") 17 | patient <- read_csv("Patient_Profile.csv") 18 | test <- read_csv("Test.csv") 19 | 20 | train$isTrain <- T 21 | test$isTrain <- F 22 | 23 | df_all <- bind_rows(train, test) 24 | 25 | df_all <- left_join(df_all, H1, by = c("Patient_ID", "Health_Camp_ID")) 26 | df_all <- left_join(df_all, H2, by = c("Patient_ID", "Health_Camp_ID")) 27 | df_all <- left_join(df_all, H3, by = c("Patient_ID", "Health_Camp_ID")) 28 | df_all <- left_join(df_all, Camp_details, by = c("Health_Camp_ID")) 29 | df_all <- left_join(df_all, patient, by = c("Patient_ID")) 30 | 31 | #Create the desired Target Column 32 | df_all$Outcome <- ifelse(!is.na(df_all$Health_Score) | !is.na(df_all$"Health Score") | (df_all$Number_of_stall_visited > 0), 1 ,0) 33 | df_all$Outcome <- ifelse(is.na(df_all$Outcome), 0, df_all$Outcome ) 34 | 35 | gen_feature_oneHot <- function(column, data) { 36 | 37 | deltaData <- select(data, -get(column)) 38 | data <- select(data, get(column)) 39 | dummies <- dummyVars(~ . -1, data = data) 40 | df2 <- predict(dummies, newdata = data) 41 | 42 | df2 <- cbind(deltaData, df2) 43 | return(df2) 44 | } 45 | 46 | featureEngg <- function(dat) { 47 | #Drop these 48 | for (i in c("Donation", 49 | "Health_Score", 50 | "Health Score", 51 | "Number_of_stall_visited", 52 | "Last_Stall_Visited_Number")) { 53 | print(i) 54 | dat[[i]] <- NULL 55 | } 56 | 57 | #Convert the Date fields into date format 58 | for (i in c("Registration_Date", 59 | "Camp_Start_Date", 60 | "Camp_End_Date", 61 | "First_Interaction")) { 62 | print(i) 63 | dat[[i]] <- dmy(dat[[i]]) 64 | } 65 | 66 | dat$feat_durationOfCamp <- as.numeric(difftime(dat$Camp_End_Date , dat$Camp_Start_Date, units = "days")) 67 | dat$feat_didUserRegisterBeforeEventStarts <- ifelse(dat$Registration_Date < dat$Camp_Start_Date, 1, 0) 68 | dat$feat_daysLeftForEventSinceRegistraion <- as.numeric(difftime(dat$Camp_End_Date, dat$Registration_Date, units = "days")) 69 | dat$feat_ratio_daysLeftForEventSinceRegistraion_by_durationOfCamp <- dat$feat_daysLeftForEventSinceRegistraion / dat$feat_durationOfCamp 70 | 71 | dat$feat_weekdayEvent <- wday(dat$Camp_Start_Date) 72 | dat$feat_weekOfEvent <- week(dat$Camp_Start_Date) 73 | dat$feat_monthOfEvent <- month(dat$Camp_Start_Date) 74 | dat$feat_quarterOfEvent <- quarter(dat$Camp_Start_Date) 75 | 76 | dat$feat_weekdayRegistration <- wday(dat$Registration_Date) 77 | #dat$feat_weekOfRegistration <- week(dat$Registration_Date) 78 | dat$feat_monthOfRegistration <- month(dat$Registration_Date) 79 | #dat$feat_quarterOfRegistration <- quarter(dat$Registration_Date) 80 | 81 | dat$feat_weekdayEndDate <- wday(dat$Camp_End_Date) 82 | dat$feat_weekOfEndDate <- week(dat$Camp_End_Date) 83 | dat$feat_monthOfEndDate <- month(dat$Camp_End_Date) 84 | #dat$feat_quarterOfEndDate <- quarter(dat$Camp_End_Date) 85 | 86 | 87 | dat$feat_sum_of_socialMediaShares <- rowSums(dat[, c("Online_Follower", 88 | "LinkedIn_Shared", 89 | "Twitter_Shared", 90 | "Facebook_Shared")], na.rm = T) 91 | dat$Income <- as.numeric(ifelse(dat$Income == "None", -9999, dat$Income)) 92 | 93 | dat$Education_Score <- as.numeric(ifelse(dat$Education_Score == "None", -9999, dat$Education_Score)) 94 | 95 | dat$Age <- as.numeric(ifelse(dat$Age == "None", -9999, dat$Age)) 96 | #Recalculate Age 97 | dat$Age <- ifelse(dat$Age != -9999, 98 | dat$Age + as.numeric(difftime(dat$Registration_Date, dat$First_Interaction, units = "days"))/365, 99 | dat$Age) 100 | 101 | dat$feat_agebin <- NA 102 | dat$feat_agebin <- ifelse(dat$Age > 39, 1, dat$feat_agebin) 103 | dat$feat_agebin <- ifelse(dat$Age > 30 & dat$Age <= 39, 2, dat$feat_agebin) 104 | dat$feat_agebin <- ifelse(dat$Age > 20 & dat$Age <= 30, 3, dat$feat_agebin) 105 | dat$feat_agebin <- ifelse(dat$Age < 20, 4, dat$feat_agebin) 106 | 107 | dat$feat_daysBetweenFirstInteraction_and_registration <- as.numeric(difftime(dat$Registration_Date , dat$First_Interaction, units = "days")) 108 | dat$feat_daysBetweenFirstInteraction_and_EventStart <- as.numeric(difftime(dat$Camp_Start_Date , dat$First_Interaction, units = "days")) 109 | 110 | #Feature of Football in each event 111 | dat %>% 112 | group_by(Health_Camp_ID) %>% 113 | summarise(feat_CountOfEventsFootfall = n()) -> df_temp 114 | dat <- left_join(dat, df_temp, by = "Health_Camp_ID") 115 | 116 | #Feature of How many events has the patient registered 117 | dat %>% 118 | group_by(Patient_ID) %>% 119 | summarise(feat_CountOfPatientVisits = n()) -> df_temp 120 | dat <- left_join(dat, df_temp, by = "Patient_ID") 121 | 122 | #Feature of how many days have elapsed since the last registration made by any patient 123 | dat %>% 124 | group_by(Patient_ID) %>% 125 | arrange(Registration_Date) %>% 126 | mutate(feat_elapseDays = as.numeric(difftime(lead(Registration_Date), Registration_Date, units = "days"))) %>% 127 | ungroup() -> dat 128 | dat$feat_elapseDays <- ifelse(is.na(dat$feat_elapseDays) , -9999, dat$feat_elapseDays) 129 | 130 | #OneHot Encode all Categorical variables 131 | OneHotList <- c("Category1", 132 | "Category2", 133 | "Category3", 134 | "Income", 135 | "City_Type", 136 | "Var1", 137 | "Employer_Category") 138 | for (i in OneHotList) { 139 | cat("One Hot Features ", i, "\n") 140 | dat <- gen_feature_oneHot(i, dat) 141 | } 142 | 143 | 144 | #Drop any columns with no variation 145 | for (i in names(dat)) { 146 | if (length(unique(dat[[i]])) <= 1) { 147 | cat("Dropping no variation column - ", i, "\n") 148 | dat[[i]] <- NULL 149 | } 150 | } 151 | 152 | return(dat) 153 | } 154 | 155 | df_all <- featureEngg(df_all) 156 | 157 | #Split back to train and test 158 | train <- df_all[df_all$isTrain == T, ] 159 | test <- df_all[df_all$isTrain == F, ] 160 | 161 | TARGET = "Outcome" 162 | DropList = c("Patient_ID", 163 | "Health_Camp_ID", 164 | "isTrain", 165 | "Registration_Date", 166 | "Camp_Start_Date", 167 | "Camp_End_Date", 168 | "First_Interaction", 169 | TARGET) 170 | ETA <- 0.01 171 | MAX_DEPTH <- 2 172 | SUB_SAMPLE <- 0.8 173 | MIN_CHILD_WEIGHT <- 1 174 | COL_SAMPLE <- 0.7 175 | GAMMA <- 0 176 | seed <- c(1000, 5000) #Any 2 random seeds 177 | BOOSTER <- "gbtree" # "gblinear" "gbtree" 178 | nrounds <- 3800 179 | 180 | X_train <- train 181 | Y_train <- train[[TARGET]] 182 | 183 | p <- test$Patient_ID 184 | h <- test$Health_Camp_ID 185 | 186 | for (i in DropList) { 187 | cat("Dropping", i, "\n") 188 | X_train[[i]] <- NULL 189 | test[[i]] <- NULL 190 | } 191 | 192 | EVAL_METRIC <- "auc" 193 | OBJECTIVE <- "binary:logistic" 194 | BOOSTER <- BOOSTER 195 | nthread <- parallel::detectCores() 196 | isMaximize <- T 197 | EARLY_STOPPING <- 50 198 | print.every.n <- 10 199 | param <- list( 200 | objective = OBJECTIVE, 201 | booster = BOOSTER, 202 | eval_metric = EVAL_METRIC, 203 | eta = ETA, 204 | max_depth = MAX_DEPTH, 205 | subsample = SUB_SAMPLE, 206 | min_child_weight = MIN_CHILD_WEIGHT, 207 | colsample_bytree = COL_SAMPLE, 208 | gamma = GAMMA, 209 | nthread = nthread, 210 | num_parallel_tree = 1 211 | ) 212 | 213 | dtrain <- xgb.DMatrix( data = data.matrix(X_train), 214 | label = data.matrix(Y_train), 215 | missing = NA) 216 | watchlist <- list(train = dtrain) 217 | 218 | test_target_xgb <- rep(0, nrow(test)) 219 | for (s in seed) { 220 | set.seed(s) 221 | cat("########## XGB Seed ", s, "\n") 222 | bst <- xgb.train( params = param, 223 | data = dtrain, 224 | nrounds = nrounds, 225 | verbose = 1, 226 | print.every.n = print.every.n, 227 | early.stop.round = EARLY_STOPPING, 228 | watchlist = watchlist, 229 | maximize = isMaximize 230 | ) 231 | tmp <- predict(bst, data.matrix(test), missing=NA) 232 | test_target_xgb <- tmp + test_target_xgb 233 | } 234 | test_target_xgb <- test_target_xgb / length(seed) 235 | probs <- as.data.frame(matrix(test_target_xgb, nrow=nrow(test), byrow = TRUE)) 236 | 237 | #GBM 238 | ntree <- 2200 239 | test_target_gbm <- rep(0, nrow(test)) 240 | for (s in seed) { 241 | set.seed(s) 242 | cat("########## GBM Seed ", s, "\n") 243 | bst <- gbm.fit(x = X_train, 244 | y = Y_train, 245 | distribution = "bernoulli", 246 | n.trees = ntree, 247 | interaction.depth = 3, 248 | n.minobsinnode = 10, 249 | bag.fraction = 0.8, 250 | shrinkage = 0.01) 251 | tmp <- predict(bst, test, n.trees = ntree, type = "response") 252 | test_target_gbm <- tmp + test_target_gbm 253 | } 254 | test_target_gbm <- test_target_gbm / length(seed) 255 | 256 | final_test <- data.frame(Patient_ID = p, 257 | Health_Camp_ID = h, 258 | Outcome1 = probs$V1, 259 | Outcome2 = test_target_gbm) 260 | 261 | #Build a rank average ensemble 262 | final_test <- mutate(final_test, rank1 = dense_rank((Outcome1))) 263 | final_test <- mutate(final_test, rank2 = dense_rank((Outcome2))) 264 | final_test$WeightedScore <- (final_test$rank1 * 0.5 + final_test$rank2 * 0.5) 265 | final_test$WeightedScore <- final_test$WeightedScore / max(final_test$WeightedScore) 266 | 267 | final_test$Outcome <- final_test$WeightedScore 268 | write_csv(final_test[, c("Patient_ID", "Health_Camp_ID", "Outcome")], "Sub_final.csv") 269 | 270 | -------------------------------------------------------------------------------- /Rank 2: Nut Crackers /final_model.R: -------------------------------------------------------------------------------- 1 | # Analytics Vidhya 2 | # Knocktober 3 | # Team : Nut Crackers 4 | # Members : Naveen Kumar Kaveti & Suprit Saha 5 | 6 | # Load Required packages 7 | # ========================================================================= 8 | package_names <- c("lubridate","tidyr","Metrics","dplyr") 9 | 10 | loadPackage <- function(pkg) 11 | { 12 | if(missing(pkg) || !is.character(pkg)) 13 | { 14 | stop("Package not correctly entered !!!") 15 | } 16 | new.pkg <- pkg[!(pkg %in% installed.packages()[, "Package"])] 17 | if(length(new.pkg)) 18 | { 19 | install.packages(new.pkg, dependencies = TRUE) 20 | } 21 | sapply(pkg, require, character.only = TRUE) 22 | cat("Packages Loaded !!!") 23 | } 24 | suppressPackageStartupMessages(suppressWarnings(loadPackage(package_names))) 25 | 26 | 27 | # Reading datasets 28 | # ================================================================================= 29 | FHCA <- read.csv("First_Health_Camp_Attended.csv",header = TRUE,na.strings = "") 30 | HCD <- read.csv("Health_Camp_Detail.csv",header = TRUE,na.strings = "") 31 | PP <- read.csv("Patient_Profile.csv",header = TRUE,na.strings = "") 32 | SHCA <- read.csv("Second_Health_Camp_Attended.csv",header = TRUE,na.strings = "") 33 | THCA <- read.csv("Third_Health_Camp_Attended.csv",header = TRUE,na.strings = "") 34 | Train <- read.csv("Train.csv",header = TRUE,na.strings = "") 35 | 36 | # Reading test data 37 | # ================================================================================= 38 | Test <- read.csv("Test.csv",header = TRUE,na.strings = "") 39 | 40 | # Feature extraction from existing datasets 41 | # ================================================================================= 42 | Train <- left_join(Train, PP, by = "Patient_ID") 43 | Train <- left_join(Train, HCD, by = "Health_Camp_ID") 44 | Train <- left_join(Train, FHCA, by = c("Patient_ID", "Health_Camp_ID")) 45 | Train <- left_join(Train, SHCA, by = c("Patient_ID", "Health_Camp_ID")) 46 | Train <- left_join(Train, THCA, by = c("Patient_ID", "Health_Camp_ID")) 47 | 48 | Test <- left_join(Test, PP, by = "Patient_ID") 49 | Test <- left_join(Test, HCD, by = "Health_Camp_ID") 50 | 51 | # Dropping variables 52 | # ================================================================================= 53 | Train$Category3 <- NULL 54 | Test$Category3 <- NULL 55 | Train$Donation <- NULL 56 | 57 | Train$X <- NULL 58 | Train$Last_Stall_Visited_Number <- NULL 59 | 60 | 61 | # Defining Target variable 62 | # ================================================================================= 63 | Train$Y <- ifelse((is.na(Train$Health_Score) & is.na(Train$Health.Score) & Train$Number_of_stall_visited <1),0,1) 64 | Train$Y[is.na(Train$Y)] <- 0 65 | 66 | table(Train$Y) 67 | 68 | 69 | # Feature engineering 70 | # ================================================================================== 71 | Cleansing <- function(df) 72 | { 73 | if(length(formals(Cleansing)) != nargs()) 74 | { 75 | stop("Check for missing arguments !!!") 76 | } 77 | if(is.character(df)) 78 | { 79 | df <- eval(parse(text = df)) 80 | } 81 | if(missing(df) | !is.data.frame(df) ) 82 | { 83 | stop("Enter valid data frame !!!") 84 | } 85 | 86 | df$Camp_Start_Date <- as.Date(df$Camp_Start_Date, format = "%d-%b-%y") 87 | df$CSD_Day <- lubridate::day(df$Camp_Start_Date) 88 | df$CSD_Mon <- lubridate::month(df$Camp_Start_Date) 89 | df$CSD_Year <- lubridate::year(df$Camp_Start_Date) 90 | 91 | df$Camp_End_Date <- as.Date(df$Camp_End_Date, format = "%d-%b-%y") 92 | df$CED_Day <- lubridate::day(df$Camp_End_Date) 93 | df$CED_Mon <- lubridate::month(df$Camp_End_Date) 94 | df$CED_Year <- lubridate::year(df$Camp_End_Date) 95 | 96 | df$Camp_Duration <- difftime(df$Camp_End_Date, df$Camp_Start_Date, units = c("days")) 97 | df$Camp_Duration <- as.numeric(df$Camp_Duration) 98 | 99 | df$Registration_Date <- as.Date(df$Registration_Date, format = "%d-%b-%y") 100 | if(sum(is.na(df$Registration_Date)) > 0){ 101 | df$Registration_Date[is.na(df$Registration_Date)] <- df$Camp_Start_Date[is.na(df$Registration_Date)] + days(round(df$Camp_Duration[is.na(df$Registration_Date)]/2)) 102 | } 103 | df$Reg_Day <- lubridate::day(df$Registration_Date) 104 | df$Reg_Mon <- lubridate::month(df$Registration_Date) 105 | df$Reg_Year <- lubridate::year(df$Registration_Date) 106 | 107 | df$Online_Follower <- as.factor(df$Online_Follower) 108 | df$LinkedIn_Shared <- as.factor(df$LinkedIn_Shared) 109 | df$Twitter_Shared <- as.factor(df$Twitter_Shared) 110 | df$Facebook_Shared <- as.factor(df$Facebook_Shared) 111 | 112 | levels(df$Income)[levels(df$Income) == "None"] <- 7 113 | 114 | df$Education_Score <- as.character(df$Education_Score) 115 | df$Education_Score[df$Education_Score == "None"] <- 0 116 | df$Education_Score <- as.numeric(df$Education_Score) 117 | df$Education_Score[df$Education_Score == 0] <- median(df$Education_Score[df$Education_Score != 0]) 118 | 119 | df$Age <- as.character(df$Age) 120 | df$Age[df$Age == "None"] <- 0 121 | df$Age <- as.numeric(df$Age) 122 | df$Age[df$Age == 0] <- median(df$Age[df$Age != 0]) 123 | 124 | df$First_Interaction <- as.Date(df$First_Interaction, format = "%d-%b-%y") 125 | df$FI_Day <- lubridate::day(df$First_Interaction) 126 | df$FI_Mon <- lubridate::month(df$Registration_Date) 127 | # df$FI_Year <- year(df$Registration_Date) # Equals to some other variable (Perfect multi-collinearity) 128 | 129 | levels(df$City_Type) <- c(1:length(levels(df$City_Type))) 130 | 131 | levels(df$Employer_Category) <- c(1:length(levels(df$Employer_Category))) 132 | 133 | levels(df$Category1) <- c(1:length(levels(df$Category1))) 134 | 135 | levels(df$Category2) <- c(1:length(levels(df$Category2))) 136 | 137 | df$Reg_Year <- factor(df$Reg_Year, levels = c(2003, 2004, 2005, 2006, 2007)) 138 | 139 | df$CSD_Year <- factor(df$CSD_Year, levels = c(2003, 2004, 2005, 2006, 2007)) 140 | 141 | return(df) 142 | } 143 | Train <- Cleansing(Train) 144 | Test <- Cleansing(Test) 145 | 146 | # Recency & Frequency variables 147 | # ======================================================================== 148 | tab1 <- rbind(Train[, c("Patient_ID", "Registration_Date")], Test[, c("Patient_ID", "Registration_Date")]) 149 | 150 | tab2 <- as.data.frame(tab1 %>% group_by(Patient_ID) %>% summarise(PRRD = max(Registration_Date))) 151 | tab2$Recency <- as.integer(difftime(max(Train$Camp_End_Date, Test$Camp_End_Date), tab2$PRRD, units = "days")) 152 | tab2$PRRD <- NULL 153 | 154 | 155 | tab3 <- as.data.frame(table(tab1$Patient_ID)) 156 | colnames(tab3) <- c("Patient_ID", "Frequency") 157 | tab3$Patient_ID <- as.integer(as.character(tab3$Patient_ID)) 158 | 159 | Train <- left_join(Train, tab2, by = c("Patient_ID")) 160 | Train <- left_join(Train, tab3, by = c("Patient_ID")) 161 | Test <- left_join(Test, tab2, by = c("Patient_ID")) 162 | Test <- left_join(Test, tab3, by = c("Patient_ID")) 163 | 164 | Train$Recency <- as.integer(Train$Recency) 165 | Test$Recency <- as.integer(Test$Recency) 166 | 167 | # Time Difference features 168 | # ================================================================================= 169 | Train$Lag <- as.integer(difftime(Train$Registration_Date, Train$Camp_Start_Date, units = "days")) 170 | Train$Lag2 <- as.integer(difftime(Train$Registration_Date, Train$Camp_End_Date, units = "days")) 171 | Train$Lag3 <- as.integer(difftime(Train$Registration_Date, Train$First_Interaction, units = "days")) 172 | 173 | Test$Lag <- as.integer(difftime(Test$Registration_Date, Test$Camp_Start_Date, units = "days")) 174 | Test$Lag2 <- as.integer(difftime(Test$Registration_Date, Test$Camp_End_Date, units = "days")) 175 | Test$Lag3 <- as.integer(difftime(Test$Registration_Date, Test$First_Interaction, units = "days")) 176 | 177 | 178 | # Assigning probabilities for each Age group 179 | # ================================================================================== 180 | library(classInt) 181 | temp <- classIntervals(Train$Age, 10, style = "fixed", fixedBreaks = c(30, 40, 50, 60, 70, 80)) 182 | Train$Age_Bucket <- as.factor(findCols(temp)) 183 | 184 | temp2 <- classIntervals(Test$Age, 10, style = "fixed", fixedBreaks = c(30, 40, 50, 60, 70, 80)) 185 | Test$Age_Bucket <- as.factor(findCols(temp2)) 186 | 187 | Age_of <- as.data.frame(Train %>% group_by(Age_Bucket) %>% summarise(Age_of = mean(as.integer(as.character(Y))))) 188 | 189 | Train <- left_join(Train, Age_of, by = "Age_Bucket") 190 | Test <- left_join(Test, Age_of, by = "Age_Bucket") 191 | 192 | # Feature set 193 | # =================================================================================== 194 | Features <- Train[, c("LinkedIn_Shared", "Income", "Education_Score", "Age", "City_Type", "Employer_Category", "Category1", "Category2", "CSD_Mon", "CED_Year", "Camp_Duration", "Reg_Year", "Recency", "Frequency", "Age_of", "Lag", "Lag2", "Lag3", "Y")] 195 | 196 | Train_XY <- data.frame(Features) 197 | Train_XY$Y <- as.factor(Train_XY$Y) 198 | 199 | 200 | # Using GBM from h2o package 201 | # ============================================================= 202 | library(h2o) 203 | h2o.init() 204 | 205 | train_h2o <- as.h2o(Train_XY) # Creating h2o dataframe 206 | 207 | splits <- h2o.splitFrame( 208 | train_h2o, ## splitting the H2O frame we read above 209 | c(0.6,0.2), ## create splits of 60% and 20%; 210 | seed=1234) ## setting a seed will ensure reproducible results (not R's seed) 211 | 212 | train <- h2o.assign(splits[[1]], "train.hex") 213 | valid <- h2o.assign(splits[[2]], "valid.hex") 214 | holdout <- h2o.assign(splits[[3]], "test.hex") 215 | 216 | gbm <- h2o.gbm( 217 | training_frame = train, ## the H2O frame for training 218 | validation_frame = valid, ## the H2O frame for validation (not required) 219 | x=c(1:18), ## the predictor columns, by column index 220 | y=19, ## the target index (what we are predicting) 221 | model_id = "gbm_covType1", ## name the model in H2O 222 | seed = 2000000) ## Set the random seed for reproducability 223 | 224 | summary(gbm) ## View information about the model. 225 | 226 | # holdout predictions 227 | # =================================================================== 228 | pred_test_gbm <- predict(gbm, holdout) 229 | pred_test_gbm <- data.frame(outcome = as.data.frame(holdout[19]), prob = as.data.frame(pred_test_gbm)[, "p1"]) 230 | auc(pred_test_gbm[,1], pred_test_gbm[,2]) 231 | 232 | # Test Data predictions 233 | # ================================================================== 234 | Test_h2o <- as.h2o(Test) 235 | pred_Test_gbm <- predict(gbm, Test_h2o) 236 | result <- as.data.frame(pred_Test_gbm) 237 | result <- data.frame(Patient_ID = Test$Patient_ID, Health_Camp_ID = Test$Health_Camp_ID, Outcome = result$p1) 238 | write.csv(result, file = "Predictions_GBM.csv", row.names = FALSE) 239 | 240 | -------------------------------------------------------------------------------- /Rank 1: Rock n Rolla /finalModel_v3.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import operator 3 | import pandas as pd 4 | import numpy as np 5 | from sklearn import preprocessing, model_selection, metrics, ensemble 6 | import xgboost as xgb 7 | 8 | def getCountVar(compute_df, count_df, var_name, count_var="v1"): 9 | grouped_df = count_df.groupby(var_name, as_index=False).agg('size').reset_index() 10 | grouped_df.columns = [var_name, "var_count"] 11 | merged_df = pd.merge(compute_df, grouped_df, how="left", on=var_name) 12 | merged_df.fillna(-1, inplace=True) 13 | return list(merged_df["var_count"]) 14 | 15 | def create_feature_map(features): 16 | outfile = open('xgb.fmap', 'w') 17 | for i, feat in enumerate(features): 18 | outfile.write('{0}\t{1}\tq\n'.format(i,feat)) 19 | outfile.close() 20 | 21 | def runXGB(train_X, train_y, test_X, test_y=None, feature_names=None, extra_X=None, seed_val=0, num_rounds=200): 22 | params = {} 23 | params["objective"] = "binary:logistic" 24 | params['eval_metric'] = 'auc' 25 | params["eta"] = 0.02 26 | params["subsample"] = 0.8 27 | params["min_child_weight"] = 5 28 | params["colsample_bytree"] = 0.7 29 | params["max_depth"] = 6 30 | params["silent"] = 1 31 | params["seed"] = seed_val 32 | 33 | plst = list(params.items()) 34 | xgtrain = xgb.DMatrix(train_X, label=train_y) 35 | 36 | if test_y is not None: 37 | xgtest = xgb.DMatrix(test_X, label=test_y) 38 | watchlist = [ (xgtrain,'train'), (xgtest, 'test') ] 39 | model = xgb.train(plst, xgtrain, num_rounds, watchlist, early_stopping_rounds=300) 40 | else: 41 | xgtest = xgb.DMatrix(test_X) 42 | model = xgb.train(plst, xgtrain, num_rounds) 43 | 44 | if feature_names is not None: 45 | create_feature_map(feature_names) 46 | model.dump_model('xgbmodel.txt', 'xgb.fmap', with_stats=True) 47 | importance = model.get_fscore(fmap='xgb.fmap') 48 | importance = sorted(importance.items(), key=operator.itemgetter(1), reverse=True) 49 | imp_df = pd.DataFrame(importance, columns=['feature','fscore']) 50 | imp_df['fscore'] = imp_df['fscore'] / imp_df['fscore'].sum() 51 | imp_df.to_csv("imp_feat.txt", index=False) 52 | 53 | pred_test_y = model.predict(xgtest) 54 | loss = 0 55 | 56 | if extra_X is not None: 57 | xgtest = xgb.DMatrix(extra_X) 58 | pred_extra_y = model.predict(xgtest) 59 | return pred_test_y, pred_extra_y, loss 60 | 61 | if test_y is not None: 62 | loss = metrics.roc_auc_score(test_y, pred_test_y) 63 | print loss 64 | return pred_test_y, loss 65 | else: 66 | return pred_test_y,loss 67 | 68 | if __name__ == "__main__": 69 | ## Reading the files and converting the dates ## 70 | data_path = "../input/Train/" 71 | train = pd.read_csv(data_path + "train_with_outcome.csv") 72 | test = pd.read_csv(data_path + "Test.csv") 73 | train['Registration_Date'].fillna('10-jan-90', inplace=True) 74 | test['Registration_Date'].fillna('10-jan-90', inplace=True) 75 | train['Registration_Date'] = pd.to_datetime(train['Registration_Date'], format="%d-%b-%y") 76 | test['Registration_Date'] = pd.to_datetime(test['Registration_Date'], format="%d-%b-%y") 77 | train['Registration_Date'] = train['Registration_Date'].apply(lambda x: x.toordinal()) 78 | test['Registration_Date'] = test['Registration_Date'].apply(lambda x: x.toordinal()) 79 | print train.shape, test.shape 80 | 81 | ## Getting patient details and merging with train and test ## 82 | patient = pd.read_csv(data_path + "Patient_Profile.csv", na_values=['None','']) 83 | patient['First_Interaction'] = pd.to_datetime(patient['First_Interaction'], format="%d-%b-%y") 84 | patient['First_Interaction'] = patient['First_Interaction'].apply(lambda x: x.toordinal()) 85 | print patient.shape 86 | train = train.merge(patient, on=['Patient_ID'], how='left') 87 | test = test.merge(patient, on=['Patient_ID'], how='left') 88 | print train.shape, test.shape 89 | 90 | ## Getting health camp details and merging with train and test ## 91 | hc_details = pd.read_csv(data_path + "Health_Camp_Detail.csv") 92 | hc_ids = list(hc_details.Health_Camp_ID.values) 93 | hc_details['Camp_Start_Date'] = pd.to_datetime(hc_details['Camp_Start_Date'], format="%d-%b-%y") 94 | hc_details['Camp_End_Date'] = pd.to_datetime(hc_details['Camp_End_Date'], format="%d-%b-%y") 95 | hc_details['Camp_Start_Date'] = hc_details['Camp_Start_Date'].apply(lambda x: x.toordinal()) 96 | hc_details['Camp_End_Date'] = hc_details['Camp_End_Date'].apply(lambda x: x.toordinal()) 97 | hc_details['Camp_Duration_Days'] = hc_details['Camp_End_Date'] - hc_details['Camp_Start_Date'] 98 | print hc_details.head() 99 | train = train.merge(hc_details, on=['Health_Camp_ID'], how='left') 100 | test = test.merge(hc_details, on=['Health_Camp_ID'], how='left') 101 | print train.shape, test.shape 102 | 103 | ## Reading the camp files ## 104 | first_camp_details = pd.read_csv(data_path + "First_Health_Camp_Attended.csv") 105 | first_camp_details = first_camp_details[["Patient_ID","Health_Camp_ID","Donation","Health_Score"]] 106 | train = train.merge(first_camp_details, on=["Patient_ID","Health_Camp_ID"], how='left') 107 | third_camp_details = pd.read_csv(data_path + "Third_Health_Camp_Attended.csv") 108 | third_camp_details = third_camp_details[["Patient_ID","Health_Camp_ID","Number_of_stall_visited","Last_Stall_Visited_Number"]] 109 | train = train.merge(third_camp_details, on=["Patient_ID","Health_Camp_ID"], how='left') 110 | train["Number_of_stall_visited"].fillna(0, inplace=True) 111 | train["Donation"].fillna(0, inplace=True) 112 | train["Health_Score"].fillna(0, inplace=True) 113 | print train.shape, test.shape 114 | 115 | 116 | ## Filling NA with -99 ## 117 | train.fillna(-99, inplace=True) 118 | test.fillna(-99, inplace=True) 119 | 120 | ## print create additional features ## 121 | print "Getting additional features." 122 | train["Diff_CampStart_Registration"] = train["Camp_Start_Date"] - train["Registration_Date"] 123 | test["Diff_CampStart_Registration"] = test["Camp_Start_Date"] - test["Registration_Date"] 124 | 125 | train["Diff_CampEnd_Registration"] = train["Camp_End_Date"] - train["Registration_Date"] 126 | test["Diff_CampEnd_Registration"] = test["Camp_End_Date"] - test["Registration_Date"] 127 | 128 | train["Diff_Registration_FirstInteraction"] = train["Registration_Date"] - train["First_Interaction"] 129 | test["Diff_Registration_FirstInteraction"] = test["Registration_Date"] - test["First_Interaction"] 130 | 131 | train["Diff_CampStart_FirstInteraction"] = train["Camp_Start_Date"] - train["First_Interaction"] 132 | test["Diff_CampStart_FirstInteraction"] = test["Camp_Start_Date"] - test["First_Interaction"] 133 | print train.shape, test.shape 134 | 135 | ## Getitng the cat columns and label encode them ## 136 | cat_columns = [] 137 | for col in train.columns: 138 | if train[col].dtype == 'object': 139 | print col 140 | cat_columns.append(col) 141 | enc = preprocessing.LabelEncoder() 142 | full_list = list(train[col].values) + list(test[col].values) 143 | enc.fit(full_list) 144 | train[col] = enc.transform(list(train[col].values)) 145 | test[col] = enc.transform(list(test[col].values)) 146 | 147 | # getting count # 148 | for col in ["Patient_ID", "Health_Camp_ID"]: 149 | print "Count : ", col 150 | full_df = pd.concat([train, test]) 151 | train["Count_"+col] = getCountVar(train, full_df, col) 152 | test["Count_"+col] = getCountVar(test, full_df, col) 153 | 154 | 155 | ## do sorting so as to compute the next variables ## 156 | train = train.sort_values(['Camp_Start_Date', 'Camp_End_Date', 'Patient_ID']).reset_index(drop=True) 157 | test = test.sort_values(['Camp_Start_Date', 'Camp_End_Date', 'Patient_ID']).reset_index(drop=True) 158 | print train.head() 159 | 160 | print "First pass to get necessary details.." 161 | people_camp_dict = {} 162 | people_date_dict = {} 163 | people_dv_dict = {} 164 | people_cat1_dict = {} 165 | people_cdate_dict = {} 166 | people_donation_dict = {} 167 | people_num_stall_dict = {} 168 | people_last_stall_dict = {} 169 | people_fscore_dict = {} 170 | for ind, row in train.iterrows(): 171 | pid = row['Patient_ID'] 172 | cid = row['Health_Camp_ID'] 173 | reg_date = row['Registration_Date'] 174 | dv = row['Outcome'] 175 | cat1 = row['Category1'] 176 | cdate = row['Camp_Start_Date'] 177 | donation = row['Donation'] 178 | num_stall = row['Number_of_stall_visited'] 179 | fscore = row['Health_Score'] 180 | 181 | tlist = people_camp_dict.get(pid,[]) 182 | tlist.append(cid) 183 | people_camp_dict[pid] = tlist[:] 184 | 185 | tlist = people_date_dict.get(pid,[]) 186 | tlist.append(reg_date) 187 | people_date_dict[pid] = tlist[:] 188 | 189 | tlist = people_dv_dict.get(pid, []) 190 | tlist.append(dv) 191 | people_dv_dict[pid] = tlist[:] 192 | 193 | tlist = people_donation_dict.get(pid, []) 194 | tlist.append(donation) 195 | people_donation_dict[pid] = tlist[:] 196 | 197 | tlist = people_num_stall_dict.get(pid, []) 198 | tlist.append(num_stall) 199 | people_num_stall_dict[pid] = tlist[:] 200 | 201 | tlist = people_fscore_dict.get(pid, []) 202 | tlist.append(fscore) 203 | people_fscore_dict[pid] = tlist[:] 204 | 205 | tlist = people_cat1_dict.get(pid, []) 206 | tlist.append(cat1) 207 | people_cat1_dict[pid] = tlist[:] 208 | 209 | tlist = people_cdate_dict.get(pid, []) 210 | tlist.append(cdate) 211 | people_cdate_dict[pid] = tlist[:] 212 | 213 | print "Creating features now using dict for train.." 214 | last_date_list = [] 215 | last_dv_list = [] 216 | last_cat1_list = [] 217 | mean_dv_list = [] 218 | last_cdate_list = [] 219 | last_donation_list = [] 220 | last_num_stall_list = [] 221 | last_fscore_list=[] 222 | for ind, row in train.iterrows(): 223 | pid = row['Patient_ID'] 224 | reg_date = row['Registration_Date'] 225 | cat1 = row['Category1'] 226 | cid = row['Health_Camp_ID'] 227 | cdate = row['Camp_Start_Date'] 228 | 229 | camp_list = people_camp_dict[pid] 230 | for ind, camp in enumerate(camp_list): 231 | if camp == cid: 232 | use_index = ind 233 | break 234 | 235 | tlist = people_date_dict[pid][:use_index] 236 | if len(tlist)>0: 237 | last_date_list.append(reg_date-tlist[-1]) 238 | else: 239 | last_date_list.append(-99) 240 | 241 | tlist = people_dv_dict[pid][:use_index] 242 | if len(tlist)>0: 243 | last_dv_list.append(tlist[-1]) 244 | mean_dv_list.append(np.mean(tlist)) 245 | else: 246 | last_dv_list.append(-99) 247 | mean_dv_list.append(-99) 248 | 249 | tlist = people_donation_dict[pid][:use_index] 250 | if len(tlist)>0: 251 | last_donation_list.append(np.sum(tlist)) 252 | else: 253 | last_donation_list.append(-99) 254 | 255 | tlist = people_num_stall_dict[pid][:use_index] 256 | if len(tlist)>0: 257 | last_num_stall_list.append(np.sum(tlist)) 258 | else: 259 | last_num_stall_list.append(-99) 260 | 261 | tlist = people_fscore_dict[pid][:use_index] 262 | if len(tlist)>0: 263 | last_fscore_list.append(np.mean([i for i in tlist if i!=0])) 264 | else: 265 | last_fscore_list.append(-99) 266 | 267 | tlist = people_cat1_dict[pid][:use_index] 268 | if len(tlist)>0: 269 | last_cat1_list.append(tlist[-1]) 270 | else: 271 | last_cat1_list.append(-99) 272 | 273 | tlist = people_date_dict[pid][use_index+1:] 274 | if len(tlist)>0: 275 | last_cdate_list.append(reg_date-tlist[0]) 276 | else: 277 | last_cdate_list.append(-99) 278 | 279 | print last_fscore_list[:50] 280 | 281 | train["Last_Reg_Date"] = last_date_list[:] 282 | train["Mean_Outcome"] = mean_dv_list[:] 283 | train["Last_Cat1"] = last_cat1_list[:] 284 | train["Next_Reg_Date"] = last_cdate_list 285 | train["Sum_Donations"] = last_donation_list[:] 286 | train["Sum_NumStalls"] = last_num_stall_list[:] 287 | train["Mean_Fscore"] = last_fscore_list[:] 288 | 289 | print "Prepare dict using val.." 290 | for ind, row in test.iterrows(): 291 | pid = row['Patient_ID'] 292 | cid = row['Health_Camp_ID'] 293 | reg_date = row['Registration_Date'] 294 | cat1 = row['Category1'] 295 | cdate = row['Camp_Start_Date'] 296 | 297 | tlist = people_camp_dict.get(pid,[]) 298 | tlist.append(cid) 299 | people_camp_dict[pid] = tlist[:] 300 | 301 | tlist = people_date_dict.get(pid,[]) 302 | tlist.append(reg_date) 303 | people_date_dict[pid] = tlist[:] 304 | 305 | tlist = people_cat1_dict.get(pid, []) 306 | tlist.append(cat1) 307 | people_cat1_dict[pid] = tlist[:] 308 | 309 | tlist = people_cdate_dict.get(pid, []) 310 | tlist.append(cdate) 311 | people_cdate_dict[pid] = tlist[:] 312 | 313 | print "Creating features for val using dict.." 314 | last_date_list = [] 315 | last_dv_list = [] 316 | last_cat1_list = [] 317 | mean_dv_list = [] 318 | last_cdate_list = [] 319 | last_donation_list = [] 320 | last_num_stall_list = [] 321 | last_fscore_list = [] 322 | for ind, row in test.iterrows(): 323 | pid = row['Patient_ID'] 324 | reg_date = row['Registration_Date'] 325 | cat1 = row['Category1'] 326 | cid = row['Health_Camp_ID'] 327 | cdate = row['Camp_Start_Date'] 328 | 329 | camp_list = people_camp_dict[pid] 330 | for ind, camp in enumerate(camp_list): 331 | if camp == cid: 332 | use_index = ind 333 | break 334 | 335 | tlist = people_date_dict[pid][:use_index] 336 | if len(tlist)>0: 337 | last_date_list.append(reg_date-tlist[-1]) 338 | else: 339 | last_date_list.append(-99) 340 | 341 | tlist = people_dv_dict.get(pid, []) 342 | if len(tlist)>0: 343 | last_dv_list.append(tlist[-1]) 344 | mean_dv_list.append(np.mean(tlist)) 345 | else: 346 | last_dv_list.append(-99) 347 | mean_dv_list.append(-99) 348 | 349 | tlist = people_donation_dict.get(pid, []) 350 | if len(tlist)>0: 351 | last_donation_list.append(np.sum(tlist)) 352 | else: 353 | last_donation_list.append(-99) 354 | 355 | tlist = people_num_stall_dict.get(pid, []) 356 | if len(tlist)>0: 357 | last_num_stall_list.append(np.sum(tlist)) 358 | else: 359 | last_num_stall_list.append(-99) 360 | 361 | tlist = people_fscore_dict.get(pid, []) 362 | if len(tlist)>0: 363 | last_fscore_list.append(np.mean([i for i in tlist if i!=0])) 364 | else: 365 | last_fscore_list.append(-99) 366 | 367 | tlist = people_cat1_dict[pid][:use_index] 368 | if len(tlist)>0: 369 | last_cat1_list.append(tlist[-1]) 370 | else: 371 | last_cat1_list.append(-99) 372 | 373 | tlist = people_date_dict[pid][use_index+1:] 374 | if len(tlist)>0: 375 | last_cdate_list.append(reg_date-tlist[0]) 376 | else: 377 | last_cdate_list.append(-99) 378 | 379 | test["Last_Reg_Date"] = last_date_list[:] 380 | test["Mean_Outcome"] = mean_dv_list[:] 381 | test["Last_Cat1"] = last_cat1_list[:] 382 | test["Next_Reg_Date"] = last_cdate_list[:] 383 | test["Sum_Donations"] = last_donation_list[:] 384 | test["Sum_NumStalls"] = last_num_stall_list[:] 385 | test["Mean_Fscore"] = last_fscore_list[:] 386 | 387 | train.fillna(-99, inplace=True) 388 | test.fillna(-99, inplace=True) 389 | 390 | print "Getting dv and id values" 391 | train_y = train.Outcome.values 392 | 393 | ## Columns to drop ## 394 | print "Dropping columns.." 395 | drop_cols = ["Camp_Start_Date", "Camp_End_Date", "Registration_Date"] #, "First_Interaction"] 396 | drop_cols = drop_cols + ["LinkedIn_Shared", "Facebook_Shared", "Twitter_Shared", "Online_Follower", "Var4"] 397 | train.drop(drop_cols, axis=1, inplace=True) 398 | test.drop(drop_cols, axis=1, inplace=True) 399 | print train.shape, test.shape 400 | 401 | # preparing train and test # 402 | print "Choose the columns to use.." 403 | xcols = [col for col in train.columns if col not in ["Outcome", "Health_Camp_ID", "Patient_ID", "Der_Var1", "Number_of_stall_visited","Last_Stall_Visited_Number", "Donation", "Health_Score", "Mean_Fscore"]] 404 | print xcols 405 | train_X = np.array(train[xcols]) 406 | test_X = np.array(test[xcols]) 407 | print train_X.shape, test_X.shape 408 | 409 | print "Final Model.." 410 | preds = 0 411 | for seed_val, num_rounds in [[0,200], [2016,250], [1323, 225]]: 412 | print seed_val, num_rounds 413 | temp_preds, loss = runXGB(train_X, train_y, test_X, feature_names=xcols, seed_val=seed_val, num_rounds=num_rounds) 414 | preds += temp_preds 415 | preds = preds/3. 416 | 417 | out_df = pd.DataFrame({"Patient_ID":test.Patient_ID.values}) 418 | out_df["Health_Camp_ID"] = test.Health_Camp_ID.values 419 | out_df["Outcome"] = preds 420 | out_df.to_csv("sub35.csv", index=False) 421 | -------------------------------------------------------------------------------- /Rank 1: Rock n Rolla /DataExploration.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "### Data Exploration " 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": { 14 | "collapsed": false 15 | }, 16 | "outputs": [], 17 | "source": [ 18 | "import numpy as np\n", 19 | "import pandas as pd\n", 20 | "from matplotlib import pyplot as plt\n", 21 | "import seaborn as sns\n", 22 | "%matplotlib inline" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "##### Loading all the files and checking the number of rows and columns " 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 2, 35 | "metadata": { 36 | "collapsed": false 37 | }, 38 | "outputs": [ 39 | { 40 | "name": "stdout", 41 | "output_type": "stream", 42 | "text": [ 43 | "Rows and Columns in Train : (75278, 8)\n", 44 | "Rows and Columns in Test : (35249, 8)\n", 45 | "Rows and Columns in Health Camp Details : (65, 6)\n", 46 | "Rows and Columns in Patient Profile : (37633, 11)\n", 47 | "Rows and Columns in First Format Health Camp Attended : (6218, 5)\n", 48 | "Rows and Columns in Second Format Health Camp Attended : (7819, 3)\n", 49 | "Rows and Columns in Third Format Health Camp Attended : (6515, 4)\n" 50 | ] 51 | } 52 | ], 53 | "source": [ 54 | "train = pd.read_csv(\"Train.csv\")\n", 55 | "test = pd.read_csv(\"Test.csv\")\n", 56 | "health_camp = pd.read_csv(\"Health_Camp_Detail.csv\")\n", 57 | "patient = pd.read_csv(\"Patient_Profile.csv\")\n", 58 | "first_format_camp = pd.read_csv(\"First_Health_Camp_Attended.csv\")\n", 59 | "second_format_camp = pd.read_csv(\"Second_Health_Camp_Attended.csv\")\n", 60 | "third_format_camp = pd.read_csv(\"Third_Health_Camp_Attended.csv\")\n", 61 | "print \"Rows and Columns in Train : \", train.shape\n", 62 | "print \"Rows and Columns in Test : \", test.shape\n", 63 | "print \"Rows and Columns in Health Camp Details : \", health_camp.shape\n", 64 | "print \"Rows and Columns in Patient Profile : \", patient.shape\n", 65 | "print \"Rows and Columns in First Format Health Camp Attended : \", first_format_camp.shape\n", 66 | "print \"Rows and Columns in Second Format Health Camp Attended : \", second_format_camp.shape\n", 67 | "print \"Rows and Columns in Third Format Health Camp Attended : \", third_format_camp.shape" 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "metadata": {}, 73 | "source": [ 74 | "##### Train data exploration" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 3, 80 | "metadata": { 81 | "collapsed": false 82 | }, 83 | "outputs": [ 84 | { 85 | "data": { 86 | "text/plain": [ 87 | "['Patient_ID',\n", 88 | " 'Health_Camp_ID',\n", 89 | " 'Registration_Date',\n", 90 | " 'Var1',\n", 91 | " 'Var2',\n", 92 | " 'Var3',\n", 93 | " 'Var4',\n", 94 | " 'Var5']" 95 | ] 96 | }, 97 | "execution_count": 3, 98 | "metadata": {}, 99 | "output_type": "execute_result" 100 | } 101 | ], 102 | "source": [ 103 | "# Names of the columns present in the train data\n", 104 | "list(train.columns)" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": 4, 110 | "metadata": { 111 | "collapsed": false 112 | }, 113 | "outputs": [ 114 | { 115 | "data": { 116 | "text/html": [ 117 | "
\n", 118 | "\n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | "
Patient_IDHealth_Camp_IDRegistration_DateVar1Var2Var3Var4Var5
0489652657810-Sep-0540002
1507246657818-Aug-05455007
2523729653429-Apr-0600000
3524931653507-Feb-0400000
4521364652928-Feb-06151007
\n", 190 | "
" 191 | ], 192 | "text/plain": [ 193 | " Patient_ID Health_Camp_ID Registration_Date Var1 Var2 Var3 Var4 Var5\n", 194 | "0 489652 6578 10-Sep-05 4 0 0 0 2\n", 195 | "1 507246 6578 18-Aug-05 45 5 0 0 7\n", 196 | "2 523729 6534 29-Apr-06 0 0 0 0 0\n", 197 | "3 524931 6535 07-Feb-04 0 0 0 0 0\n", 198 | "4 521364 6529 28-Feb-06 15 1 0 0 7" 199 | ] 200 | }, 201 | "execution_count": 4, 202 | "metadata": {}, 203 | "output_type": "execute_result" 204 | } 205 | ], 206 | "source": [ 207 | "# Let us take a look at the top few rows\n", 208 | "train.head()" 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": 5, 214 | "metadata": { 215 | "collapsed": false 216 | }, 217 | "outputs": [ 218 | { 219 | "name": "stdout", 220 | "output_type": "stream", 221 | "text": [ 222 | "Number of unique persons in the train : 29828\n" 223 | ] 224 | } 225 | ], 226 | "source": [ 227 | "# Number of unique persons present in the train\n", 228 | "print \"Number of unique persons in the train : \", len(train.Patient_ID.unique())" 229 | ] 230 | }, 231 | { 232 | "cell_type": "markdown", 233 | "metadata": {}, 234 | "source": [ 235 | "So 29,828 unique patients are present in the 75,278 rows of the training set. Now let us see how they are distributed." 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "execution_count": 6, 241 | "metadata": { 242 | "collapsed": false 243 | }, 244 | "outputs": [ 245 | { 246 | "data": { 247 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAh0AAAFoCAYAAADzZ0kIAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAAPYQAAD2EBqD+naQAAIABJREFUeJzt3Xu0nVV96P1vsvdJSDjsGu05JBYK2o7zEzgcG1AIlEuh\n9lDqqTCwF5HXDoFSFChVYDSvFaFvqQe5yKWeg4hgMcfLi7yireVYrJYBpibDBJRC1J89XogQTCUj\nNpbczM5+/5jPcj9d5rLXztpzZ+/9/Yyxh2s9vzmfNdevszs/5jOfZ88aGRlBkiRpos2e7AFIkqSZ\nwaJDkiRVYdEhSZKqsOiQJElVWHRIkqQqLDokSVIVFh2SJKkKiw5JklSFRYckSapicLwdI2IusBq4\nNDMfbY4dCnwAOBV4FnhnZt7f6nMucB2wEPgccFFmbmjF3wNcQCmG7snMpa3Yi4EPAr8G/AC4JjM/\nOt7xS5Kkusa10tEUHB8HjmwdGwD+N7AV+CXgZuAjEXFkEz8OuBu4FlgCLADubfW/EjgXOAt4PXBe\nRFzR+tgPAwcBxwPvBu6OiFeNZ/ySJKm+nlc6IuII4GO7CL0W+DlgSWa+APxTRPw6cCLwNeBS4L7O\n6kREvAl4OiIOy8yngcuBqzNzRRNfSlkVuSUifqE5/2GZ+T3g6xFxAnAJZWVEkiTt58az0nEq8AXg\nBGBW9/Gm4AAgM8/JzLubt0uAR1uxZ4C1wJKIWAQcCnyxdb7lwGERcTBwHLC2KTja8RPGMX5JkjQJ\nel7pyMw7O68joh16OfCdiLgeeBNl38WfZuZfNfFFwLqu060HDmliI13x9ZSi5pC99JUkSVNAP+9e\n+ffA+cCLgP8G/C/g/4uIY5r4fGBbV59twNwmRmZu74rRiu+uryRJmgLGfffKLuwAns/MtzbvvxoR\nJwN/ALyFssG0u0iYC2xuYkTEnFbh0Wm7eS99x2RkZGRk1qxZe28oSZK69eUf0H4WHc8BO7uOJXB0\n8/pZyq2ybQubfs9SvtBCyj6PTmykFd9d3zGZNWsWmzZtYXi4e4gzy8DAbIaG5s34XJiHUeaiMA+j\nzEVhHopOHvqhn0XHSuCdETErM0eaY0cA323FTwKWwU+e6XEIsCIzn4uItU28c2fMyZTNo+sjYiVl\nU+lLM7Ozt+Ok5pxjNjy8kx07Zu7EaTMXhXkYZS4K8zDKXBTmoX/6WXR8HHgXcEdE3AycAfw65c4T\ngPcDDzcFxGrgNuAzmbm2Fb8hIjqrHtcDNwFk5nci4iHKcz/+qDnnucApfRy/JEmaQPu6kbSzokFm\n/ojytNAjgCeBPwR+JzOfaOIrgYspDwdbDmzg3z5j4ybgPuAB4BPAhzPz9lb894BNlNWNdwDnZ+Zj\n+zh+SZJUyayRkZG9t5oeRjZufKGnJbLt27ezZs2TY2p71FFHM2fOnPGOrZrBwdksWHAgveZiujEP\no8xFYR5GmYvCPBRNHva7jaTTzpo1T7LujNM4am/tAB56mMWLj60wKkmSpiaLjr04Cnj1GNptnOiB\nSJI0xfmn7SVJUhUWHZIkqQqLDkmSVIVFhyRJqsKiQ5IkVWHRIUmSqrDokCRJVVh0SJKkKiw6JElS\nFRYdkiSpCosOSZJUhUWHJEmqwqJDkiRVYdEhSZKqsOiQJElVWHRIkqQqLDokSVIVFh2SJKkKiw5J\nklSFRYckSarCokOSJFVh0SFJkqqw6JAkSVVYdEiSpCosOiRJUhWD4+0YEXOB1cClmfloV2wI+Drw\njsxc1jp+LnAdsBD4HHBRZm5oxd8DXEAphu7JzKWt2IuBDwK/BvwAuCYzPzre8UuSpLrGtdLRFBwf\nB47cTZMbKYVFu89xwN3AtcASYAFwbyt+JXAucBbweuC8iLiidYoPAwcBxwPvBu6OiFeNZ/ySJKm+\nnouOiDgCWAm8bDfxk4DTge93hS4F7svMj2bmU8CbgN+IiMOa+OXA1Zm5IjMfAZYClzXn/AXgtcCF\nmfn1zPwQ8BHgkl7HL0mSJsd4VjpOBb4AnADMagci4t8BH6AUA9u7+i0BfnIZJjOfAdYCSyJiEXAo\n8MVW++XAYRFxMHAcsDYzv9cVP2Ec45ckSZOg5z0dmXln53VEdIevBh7PzM/vIrYIWNd1bD1wSBMb\n6YqvpxQ1h+ylryRJmgLGvZG0W0QcCfwBcPRumswHtnUd2wbMbWJk5vauGK347vqO2cBAbws7vbQf\nGJjN4OD+fzNQ5zv1movpxjyMMheFeRhlLgrzUPTz+/et6ADuotxR8vxu4lv56SJhLrC5iRERc1qF\nR6ft5r30HbOhoXm9NO+p/dDQPBYsOLCn80+mXnMxXZmHUeaiMA+jzEVhHvqnL0VHRPw8cCLwXyLi\nlubwfOADEfG7mfla4Fm67mhp3j/XxGY179e2YiOt+O76jtmmTVsYHt7ZU/uhHtpu3PhCL8OZFAMD\nsxkamtdzLqYb8zDKXBTmYZS5KMxD0clDP/RrpeMZ4Be7jj0C3A50nqWxEjgJWAYQEYdS9mSsyMzn\nImJtE/9Y0/5kyubR9RGxkrKp9KWZ2dnbcVJzzjEbHt7Jjh1jnzi9TLJezz3Zptp4J4p5GGUuCvMw\nylwU5qF/+lJ0ZOZO4NvtYxGxA/jnzOysRrwfeLgpIFYDtwGfycy1rfgNEdFZ9bgeuKk5/3ci4iHg\nIxHxR5S7Wc4FTunH+CVJ0sTb16JjZKyxzFwZERdTnki6AHiIsvG04ybgPwAPAMPABzPz9lb89ygP\nF1tJuaxyfmY+to/jlyRJlexT0ZGZA3uIvXwXx5bRXF7ZRWwncFXzs6v488DZ4xupJEmabDP7PiBJ\nklSNRYckSarCokOSJFVh0SFJkqqw6JAkSVVYdEiSpCosOiRJUhUWHZIkqQqLDkmSVIVFhyRJqsKi\nQ5IkVWHRIUmSqrDokCRJVVh0SJKkKiw6JElSFRYdkiSpCosOSZJUhUWHJEmqwqJDkiRVYdEhSZKq\nsOiQJElVWHRIkqQqLDokSVIVFh2SJKkKiw5JklSFRYckSarCokOSJFUxON6OETEXWA1cmpmPNseW\nAO8F/gvwDHBzZt7T6vMa4Fbg5cAK4KLM/E4r/jbgKuAg4H7gsszc2vq8O4BzgM3AezPzlvGOX5Ik\n1TWulY6mAPg4cGTr2MHA/wb+Hvgl4E+B90XEmU3854FPAfcArwKeBz7d6v964BrgIuB0YAlwY+tj\nbwaOAX4FuAS4NiLOGc/4JUlSfT2vdETEEcDHdhE6G3guM9/VvP9WRJwGvBH4LPD7wKrMvK05z/nA\n9yPilGal5HLg1sz8bBO/GPhcRPwxpTi6EDgjM58AnoiIG4HLgAd6/Q6SJKm+8ax0nAp8ATgBmNU6\n/lng/F20/5nmf48HHu0czMwtwOPACRExG3g18MVWv5XAHOCVzc8g5ZJMx/LmnJIkaQroeaUjM+/s\nvI6I9vG1wNpW7D8Cb6BcMgFYBKzrOt164BDgRcAB7XhmDkfEhiY+AjyfmTu6+h4QES/JzA29fg9J\nklTXuDeS7klEHAB8klJE3NUcng9s62q6DZjbxNhDfPZuYjTxMRkY6G1hp5f2AwOzGRzc/28G6nyn\nXnMx3ZiHUeaiMA+jzEVhHop+fv++Fx0RcSDw18AvAr/cufsE2MpPFwhzgY1NjN3ENzfj3FWMJj4m\nQ0Pzxtq05/ZDQ/NYsODAns4/mXrNxXRlHkaZi8I8jDIXhXnon74WHRFxEPC3lFtiT8vMb7fCzwIL\nu7osBL4CbKAUHguBbzbnGgBeAjxHWen42YiYnZk7W323ZOYPxzq+TZu2MDy8c+8NW+2Hemi7ceML\nYz73ZBkYmM3Q0LyeczHdmIdR5qIwD6PMRWEeik4e+qFvRUdEzKLcEns4cEpm/lNXk5XASa3284HF\nwDWZORIRq5p4Z7PpicB24AnKhtUfU26j/VITPxlY1csYh4d3smPH2CdOL5Os13NPtqk23oliHkaZ\ni8I8jDIXhXnon36udPw+5Rkavwlsap7bAbA9MzcCHwKuam6B/RvgWuDbnQeLUR78dWdErKHsBbkD\nuKv1cLBlTfwCyubSK4E393H8kiRpAu3r7pCR5gfKk0JnUQqKda2fTwJk5tNNmwuAL1PuWDm7c6LM\nvA+4HvgA8BDl9tilrc+6AniM8vCx9wHvysxPI0mSpoR9WunIzIHW6zPH0P4h4BV7iN/Iv30KaTu2\nhfIckF09C0SSJO3nZvZ9QJIkqRqLDkmSVIVFhyRJqsKiQ5IkVWHRIUmSqrDokCRJVVh0SJKkKiw6\nJElSFRYdkiSpCosOSZJUhUWHJEmqwqJDkiRVYdEhSZKqsOiQJElVWHRIkqQqLDokSVIVFh2SJKkK\niw5JklSFRYckSarCokOSJFVh0SFJkqqw6JAkSVVYdEiSpCosOiRJUhUWHZIkqQqLDkmSVMXgeDtG\nxFxgNXBpZj7aHDsc+CBwAvBd4O2Z+XetPq8BbgVeDqwALsrM77TibwOuAg4C7gcuy8ytrc+7AzgH\n2Ay8NzNvGe/4JUlSXeNa6WgKgI8DR3aFPg2sA44FPgJ8KiIOafocCnwKuAd4FfB8075zztcD1wAX\nAacDS4AbW+e+GTgG+BXgEuDaiDhnPOOXJEn19Vx0RMQRwErgZV3HT6esYFycxXsoqxkXNE0uAlZl\n5m2Z+XXgfODwiDiliV8O3JqZn83Mx4CLgQsj4oCImA9cCFyemU9k5l9RCpLLeh2/JEmaHONZ6TgV\n+ALlEsqs1vHjgcc7l0May5t2nfijnUBmbgEeB06IiNnAq4EvtvquBOYAr2x+BilFTPvcx49j/JIk\naRL0vKcjM+/svI6IdmgR5dJK23rgkDHEXwQc0I5n5nBEbGjiI8Dzmbmjq+8BEfGSzNzQ6/eQJEl1\njXsj6S7MB7Z1HdsGzB1DfH7r/a7is3cTo3V+SZK0H+tn0bEVeHHXsbmUO0068e4CYS6wsYmxm/hm\nyjh3FaN1/r0aGOjtalIv7QcGZjM4uP/fgdz5Tr3mYroxD6PMRWEeRpmLwjwU/fz+/Sw6nuWn72ZZ\nCDzXii/cRfwrwAZK4bEQ+CZARAwAL2n6zwZ+NiJmZ+bOVt8tmfnDsQ5waGjemL9Mr+2HhuaxYMGB\nPZ1/MvWai+nKPIwyF4V5GGUuCvPQP/0sOlYCSyNibmZ2Ln2cxOjm0JXNewCaO1IWA9dk5khErGri\nnc2mJwLbgScoG1Z/TLmN9ktN/GRgVS8D3LRpC8PDO/fesNV+qIe2Gze+0MtwJsXAwGyGhub1nIvp\nxjyMMheFeRhlLgrzUHTy0A/9LDoeAb4H3BsR1wGvo9yR8uYm/iHgqoj4Y+BvgGuBb3ceLEZ58Ned\nEbGGsqH0DuCu1sPBljXxCyibS69snXtMhod3smPH2CdOL5Os13NPtqk23oliHkaZi8I8jDIXhXno\nn329UDPSedFc9jiLctljNfBG4OzMfKaJP015mugFwJcpd6yc3ep/H3A98AHgIcrtsUtbn3UF8Bjw\n98D7gHdl5qeRJElTwj6tdGTmQNf7bwOn7aH9Q8Ar9hC/kX/7FNJ2bAvlgWLnj2uwkiRpUs3sLbmS\nJKkaiw5JklSFRYckSarCokOSJFVh0SFJkqqw6JAkSVVYdEiSpCosOiRJUhUWHZIkqQqLDkmSVIVF\nhyRJqsKiQ5IkVWHRIUmSqrDokCRJVVh0SJKkKiw6JElSFRYdkiSpCosOSZJUhUWHJEmqwqJDkiRV\nYdEhSZKqsOiQJElVWHRIkqQqLDokSVIVFh2SJKkKiw5JklSFRYckSapisJ8ni4hDgPcDpwAbgNsz\n8/YmtriJHQ08Bbw1Mx9v9T0XuA5YCHwOuCgzN7Ti7wEuoBRK92Tm0n6OXZIkTax+r3TcD/wIOAZ4\nG/DuiDgrIuYDDwKPNLEVwIMRMQ8gIo4D7gauBZYAC4B7OyeNiCuBc4GzgNcD50XEFX0euyRJmkB9\nW+mIiBcBxwMXZua3gG9FxN8Cvwq8GNjcWp14W0T8BvDbwDLgUuC+zPxoc643AU9HxGGZ+TRwOXB1\nZq5o4kspqyK39Gv8kiRpYvVzpWML8AJwfkQMRkQAJwJfoaxeLO9q/w/ACc3rJcCjnUBmPgOsBZZE\nxCLgUOCLrb7LgcMi4uA+jl+SJE2gvhUdmbkNuAx4C6UA+Trw2cz8S2ARsK6ry3rgkOb1nuKLgJGu\n+HpgVqu/JEnaz/V1IylwBPDXwM2UDaPvi4gvAPOBbV1ttwFzm9d7is8HyMztXTFa/cdkYKC3GquX\n9gMDsxkc3P9vBup8p15zMd2Yh1HmojAPo8xFYR6Kfn7/fu7p+FXgQuCQZtXjK83dLFcD3+KnC4S5\nwObm9dY9xLc255/TKjw6bTfTg6Gheb0076n90NA8Fiw4sKfzT6ZeczFdmYdR5qIwD6PMRWEe+qef\nKx3HAP/UFBwdXwHeSdmvsbCr/ULgueb1s3uIP0u5lLKQss+jExtp9R+TTZu2MDy8s6f2Qz203bjx\nhV6GMykGBmYzNDSv51xMN+ZhlLkozMMoc1GYh6KTh37oZ9GxDvjFiBjMzB3NsSOAbwMrgXd0tT8R\n+PPm9UrgJMqdLETEoZT9Gisy87mIWNvEP9a0PxlYm5nrexng8PBOduwY+8TpZZL1eu7JNtXGO1HM\nwyhzUZiHUeaiMA/908+i4zPAjcDdEfFu4BWUQuMdwCeBGyLiVuAuymbTAynP9YDy0LCHI2IlsBq4\nDfhMZq5txW+IiM6qx/XATX0cuyRJmmD9vHtlE+WZHIuALwPvBf4sM+/OzB8Br6U8qXQ1cBxwZmZu\nafquBC6mPBxsOeVpphe0Tn8TcB/wAPAJ4MOdJ51KkqSpoa93r2TmN4AzdhNbDRy7h77LaC6v7CK2\nE7iq+ZEkSVPQzL4PSJIkVWPRIUmSqrDokCRJVVh0SJKkKiw6JElSFRYdkiSpCosOSZJUhUWHJEmq\nwqJDkiRVYdEhSZKqsOiQJElVWHRIkqQqLDokSVIVFh2SJKkKiw5JklSFRYckSarCokOSJFVh0SFJ\nkqqw6JAkSVVYdEiSpCosOiRJUhUWHZIkqQqLDkmSVIVFhyRJqsKiQ5IkVWHRIUmSqrDokCRJVQz2\n82QRMQe4FTgX2AZ8KDPf2cQWA+8HjgaeAt6amY+3+p4LXAcsBD4HXJSZG1rx9wAXUAqlezJzaT/H\nLkmSJla/Vzr+AvhV4NeANwIXRcRFETEfeBB4BDgGWAE8GBHzACLiOOBu4FpgCbAAuLdz0oi4klLI\nnAW8HjgvIq7o89glSdIE6ttKR0QsoKxEnJ6ZjzXHbgaOB3YAm1urE2+LiN8AfhtYBlwK3JeZH236\nvQl4OiIOy8yngcuBqzNzRRNfSlkVuaVf45ckSROrnysdJwE/zMzlnQOZeWNm/j5l9WJ5V/t/AE5o\nXi8BHm31ewZYCyyJiEXAocAXW32XA4dFxMF9HL8kSZpA/dzT8XLgu80qxZ8Ac4C/BN4NLKLs42hb\nDxzVvF4ErNtF/JAmNtIVXw/MauLr+/cVJEnSROln0fHvgf8EXAS8mVIsfAB4AZhP2Vjatg2Y27ze\nU3w+QGZu74rR6j8mAwO9Lez00n5gYDaDg/v/zUCd79RrLqYb8zDKXBTmYZS5KMxD0c/v38+iYwdw\nEPDG5vIIEXEYcAnwTX66QJgLbG5eb91DfGtzrjmtwqPTdjM9GBqa10vzntoPDc1jwYIDezr/ZOo1\nF9OVeRhlLgrzMMpcFOahf/pZdDwHbO0UHI2k7Md4mHIrbNvCpg/As3uIP0u5lLKQss+jExtp9R+T\nTZu2MDy8s6f2Qz203bjxBbZv385TTz055s/4z//5aObMmTPm9vtqYGA2Q0Pzes7FdGMeRpmLwjyM\nMheFeSg6eeiHfhYdK4ADIuIXM/P/NMeOBL4DrATe0dX+RODPm9crKRtRlwFExKGU/RorMvO5iFjb\nxD/WtD8ZWJuZPe3nGB7eyY4dY584vUyyzrmfeOIJ1p1x2k82q+zJGmD4oYdZvPjYMX9Ov/Sai+nK\nPIwyF4V5GGUuCvPQP30rOjLznyLiQeDeiLiEsqdjKfBnwCeBGyLiVuAu4C3AgcD9Tff3Aw9HxEpg\nNXAb8JnMXNuK3xARnVWP64Gb+jX2fjsKePUY226cyIFIkrQf6ffumPOA/0O5vfVe4H2Z+T8z80fA\na4FTKEXFccCZmbkFIDNXAhdTHg62HNhAeeZHx03AfcADwCeAD2fm7X0euyRJmkB9fQx6U1y8ufnp\njq0GdnsdITOX0Vxe2UVsJ3BV8yNJkqagmX0fkCRJqsaiQ5IkVWHRIUmSqrDokCRJVVh0SJKkKiw6\nJElSFRYdkiSpCosOSZJUhUWHJEmqwqJDkiRVYdEhSZKqsOiQJElVWHRIkqQqLDokSVIVFh2SJKkK\niw5JklSFRYckSarCokOSJFVh0SFJkqqw6JAkSVVYdEiSpCosOiRJUhUWHZIkqQqLDkmSVIVFhyRJ\nqsKiQ5IkVWHRIUmSqhicqBNHxIPA+sy8oHm/GHg/cDTwFPDWzHy81f5c4DpgIfA54KLM3NCKvwe4\ngFIo3ZOZSydq7JIkqf8mZKUjIt4AnNl6Px94EHgEOAZYATwYEfOa+HHA3cC1wBJgAXBvq/+VwLnA\nWcDrgfMi4oqJGLskSZoYfS86ImIBcCPw5dbhNwCbM3NpFm8DfgT8dhO/FLgvMz+amU8BbwJ+IyIO\na+KXA1dn5orMfARYClzW77FLkqSJMxErHTcDy4Cvt44dDyzvavcPwAnN6yXAo51AZj4DrAWWRMQi\n4FDgi62+y4HDIuLg/g5dkiRNlL4WHRFxOnAyZW9G2yJgXdex9cAhY4gvAka64uuBWa3+kiRpP9e3\njaQRMZeyUfSSzNwWEe3wfGBbV5dtwNwxxOcDZOb2rhit/mMyMNBbjdVL+4GB2QwOzh7XZwwO1ruJ\nqDO+Xsc53ZiHUeaiMA+jzEVhHop+fv9+3r3yp8DqzPz8LmJb+ekCYS6weQzxrQARMadVeHTabqYH\nQ0PzemneU/uhoXksWHDguD5jwYIDe+rTD72Oc7oyD6PMRWEeRpmLwjz0Tz+Ljt8FDo6IHzXv5wJE\nxG8BH6PcCtu2EHiuef3sHuLPUi6lLKTs8+jERlr9x2TTpi0MD+/sqf1QD203bnyhpz7tfrUMDMxm\naGhez7mYbszDKHNRmIdR5qIwD0UnD/3Qz6LjVODftd7fSCkMljax7udqnAj8efN6JXASZQMqEXEo\nZb/Gisx8LiLWNvGPNe1PBtZm5vpeBjg8vJMdO8Y+cXqZZJ1z9zoxex1Tv0zW5+5vzMMoc1GYh1Hm\nojAP/dO3oiMzv9d+36x4jGTmtyPiB8D1EXErcBfwFuBA4P6m+fuBhyNiJbAauA34TGaubcVviIjO\nqsf1wE39GrskSZp4VXbHZOaPgP8GnEIpKo4DzszMLU18JXAx5eFgy4ENlKePdtwE3Ac8AHwC+HBm\n3l5j7JIkqT8m7DHomXl+1/vVwLF7aL+M5vLKLmI7gauaH0mSNAXN7PuAJElSNRYdkiSpCosOSZJU\nhUWHJEmqwqJDkiRVYdEhSZKqsOiQJElVWHRIkqQqLDokSVIVFh2SJKkKiw5JklSFRYckSarCokOS\nJFVh0SFJkqqw6JAkSVVYdEiSpCosOiRJUhUWHZIkqYrByR6AYPv27axZ8+SY2x911NHMmTNnAkck\nSVL/WXTsB9aseZJ1Z5zGUWNpC/DQwyxefOwEj0qSpP6y6NhPHAW8eoxtN07kQCRJmiDu6ZAkSVVY\ndEiSpCosOiRJUhUWHZIkqQqLDkmSVEVf716JiJcCfwGcBmwGPgG8IzO3R8ThwAeBE4DvAm/PzL9r\n9X0NcCvwcmAFcFFmfqcVfxtwFXAQcD9wWWZu7ef4JUnSxOn3SscngQOAXwbeAPwmcF0T+ytgHXAs\n8BHgUxFxCEBEHAp8CrgHeBXwPPDpzkkj4vXANcBFwOnAEuDGPo9dkiRNoL4VHRERwHHAmzPzG5n5\nD5RC4Y0RcRrwMuDiLN5DWc24oOl+EbAqM2/LzK8D5wOHR8QpTfxy4NbM/GxmPgZcDFwYEQf0a/yS\nJGli9XOl4/vAmZn5fNfxn6GsTDzedTlkOeVSC8DxwKOdQGZuAR4HToiI2ZTnZn2x1XclMAd4ZR/H\nL0mSJlDfio7M/JfM/FznfUTMAi4DvgAsolxaaVsPHNK83lP8RZRLNj+JZ+YwsKHVX5Ik7ecm8u6V\nm4DFwDuB+cC2rvg2YG7zek/x+a33u+svSZL2cxPyt1ci4gbKPozfycyvRcRW4MVdzeZS7nAB2MpP\nFxBzKX9mZGvr/e76j8nAQG81Vi/tBwZmMzg4e0I/o/uzxqPzeeP53OnEPIwyF4V5GGUuCvNQ9PP7\n973oiIj3UTZ6npeZnTtQngWO7Gq6EHiuFV+4i/hXKJdRtjbvv9l8xgDwklb/MRkamtdL857aDw3N\nY8GCAyf0M7o/a1+M53OnI/MwylwU5mGUuSjMQ//0+zkd1wJ/APxuZn6qFVoJLI2IuZnZuUxyEqOb\nQ1c27zvnmU+5NHNNZo5ExKom3tlseiKwHXiil/Ft2rSF4eGdPbUf6qHtxo0v9NSn0w/ouc/GjS/0\n0GPUwMBshobm9ZyL6cY8jDIXhXkYZS4K81B08tAPfSs6IuII4GrgvwNfioiDW+FHgO8B90bEdcDr\nKHekvLmJfwi4KiL+GPgb4Frg25nZKTLuAO6MiDWUDaV3AHf1+nCw4eGd7Ngx9onTyyTrnLvXiTme\nidzr95ioc0wH5mGUuSjMwyhzUZiH/unnharXNee7mlIYrKNc/liXmTuBsymXSFYDbwTOzsxnADLz\naeAcynM7vky5Y+Xszokz8z7geuADwEOUZ3ws7ePYJUnSBOvbSkdm3gDcsIf4tyiPR99d/CHgFXuI\n34hPIZUkacqa2VtyJUlSNRYdkiSpCosOSZJUxYQ8HEx1bN++nTVrnhxT26OOOpo5c+ZM8IgkSdo9\ni44pbM2aJ1l3xmkctbd2AA89zOLFx1YYlSRJu2bRMcUdRXngyd5snOiBSJK0F+7pkCRJVVh0SJKk\nKiw6JEm9XatTAAAIu0lEQVRSFRYdkiSpCosOSZJUhUWHJEmqwltmZ5jt27ezatXX2LRpC8PDe/9T\nzT5UTJLULxYdM8xTTz3J915z6l4fKAY+VEyS1F8WHTPQWB8oBj5UTJLUP+7pkCRJVVh0SJKkKiw6\nJElSFRYdkiSpCjeSaq+2b9/OmjVPjrm9t9lKknbFokN7tWbNk6w74zRvs5Uk7ROLDo2Jt9lKkvaV\nezokSVIVrnRoQrgPRJLUzaJDE8J9IJKkbhYdmjDj2QfSywqJqyOSNLVYdGi/MtYVkvbqyHgu5QwO\nHrAvw5QkjcOUKjoiYi5wB3AOsBl4b2beMrmjUr+NdYWkszoynks5r371WNdgJEn9MqWKDuBm4Bjg\nV4DDgWUR8d3MfGAyB6XJ1+ulnO3bt7Nq1dfYtGkLw8M7937+5lKOl38kafymTNEREfOBC4EzMvMJ\n4ImIuBG4DLDoUE+eeupJvveaU3ve6Frr8o/FiqTpaMoUHcArKeNd0Tq2HPiTyRmOprrxPvCsxuWf\no4462kJF0rQzlYqORcDzmbmjdWw9cEBEvCQzN0zSuKS96rXAqVmo9HKpaTyXmTr9gHGPz0ta0vQw\nlYqO+cC2rmOd93PHcoKBgd4ewDowMLv8Qt+LNcChA7MZHJw95j7tfp3XvfTp5bPafWbPnlVlfOPp\nM97vNJ6cjycPNcfXq4GB2XzjG2v4/BmncfgY2n8XGPj8IxxzzLF89atP8dnTT9lrv3aff/zH3j8L\nGNf4xvpZ7T4Ajz/+2Bg+qTjmmGMZGJjNqlWr+Nd/3crOnSNj6jOez+kYa7/x9Gn3G0+fXnLRHl8t\n4815rzr/ZvT6b8d4jWdO1NDP7z9rZGTv/8+1P4iI3wL+IjNf2jr2Csrv6Zdk5g8nbXCSJGmvptLf\nXnkW+NmIaI95IbDFgkOSpP3fVCo6vgr8GFjSOnYysGpyhiNJknoxZS6vAETE+4FfBi4ADgHuBd6c\nmZ+ezHFJkqS9m0obSQGuoDyR9O+BfwHeZcEhSdLUMKVWOiRJ0tQ1lfZ0SJKkKcyiQ5IkVWHRIUmS\nqrDokCRJVVh0SJKkKqbaLbM9i4i5lNtszwE2A+/NzFsmd1STIyLOBh4ARoBZzf9+MjN/Z1IHVkkz\nF1YDl2bmo82xw4EPAidQ/nzH2zPz7yZrjLXsJhe3A3/Iv50ff5iZd0zaQCdIRLwU+AvgNMrvhU8A\n78jM7TNpTuwlDzNmPgBExC8A/5PyLKgNwP/IzJub2OHMnDmxpzzs85yYCSsdNwPHAL8CXAJcGxHn\nTOqIJs+RwF9THh+/kPKXe39/UkdUSfOP7McpOWj7NLAOOBb4CPCpiDik8vCq2kMujgCWUuZFZ358\nqO7oqvkkcADlF+sbgN8Ermtif8XMmRN7ysOMmQ8RMQt4kPKXy38JeAtwdUS8oWkyI+bEGPKwz3Ni\nWq90RMR84ELgjMx8AngiIm4ELqP8F/9McwTwVGb+YLIHUlNEHAF8bBfHTwdeDizJzK3AeyLiVylP\nvP2zuqOsY3e5aBwB3JiZ/1xxSNVFRADHAQdn5vPNsWuAmyLib4GXAcdP9zmxpzxQ/mGZEfOhcTDw\nFeCSzHwB+FZEfAE4KSLWM0PmBHvIA/D/0oc5Ma2LDuCVlO+4onVsOfAnkzOcSXckMC2XBPfiVOAL\nwNWUJeSO44HHm18kHcspS6jT1S5zEREHAT8HfHOSxlXT94EzO//QtvwM5W87zZQ5sas8zAJ+ZobN\nBzLz+8C5nfcR8cuUv+11CTNoTuwmD6cAb+nXnJjuRcci4PnM3NE6th44ICJekpkbJmlckyWAX4+I\ndwIDwP3ANZn548kd1sTKzDs7r8t/3P3EIsqSadt6yt/1mZb2kIsjKNdnr46IMynXcm/JzGV1Rzjx\nMvNfgM913jdLypdRirEZMyf2kIfPM4PmQ7eI+C5wKPA3lBXx25ghc6JtF3k4jj7Miem+p2M+sK3r\nWOf93MpjmVQR8fPAPGAL8NvAlcB5wI2TOa5Jtrv5MaPmRuMVwE7ga8CZwN3AXRFx1qSOqo6bgMXA\nO5nZc+ImynX8q5nZ8+Ecyt6WXwJuZebOiU4eFlMKr6APc2K6r3Rs5acnRuf9ZmaQzFzbrO78sDn0\njxExAPyviLgiM2fiH+HZCry469hcZtjcAMjMZRHx16358VRE/CfgrZRNdNNSRNwAXA78TmZ+LSJm\n5JzozgPwtZk4HwAy83GAiLgC+ChwD7Cgq9m0nxOtPLydsnl2CNjnOTHdVzqeBX42ItrfcyGwpZW4\nGWMX3/nrlJ3r3b9kZ4pnKfOhbSHw3CSMZdLtZn783GSMpYaIeB/wduC81l+rnnFzYjd5mFHzISL+\n4y7+i/1rwBzK/+1nxJzYSx4O6secmO5Fx1eBH1M2AnWcDKyanOFMnoj4rxHxfEQc0Dq8GNgwA/e2\ndKwEjmluIe04qTk+o0TE/xMR3ZuMFwPfmIzxTLSIuBb4A+B3M/P+VmhGzYnd5WGmzQfK3SkPRES7\nuHgV8M+UTaPHzpA5sbs8/AD4o37MiWl9eSUzt0TEMuDOiLiAsvHnSuDNkzqwyfElynLg3RHxZ8Av\nUPZz3DCpo5pcjwDfA+6NiOuA1wGvZmbOj88A/3ezpPxp4Azg/6I832ZaaW4bvhr478CXIuLgVnjG\nzIm95GHGzIfGKsrD8v6y+c4vo/xu/HPgUWbInGDPeVhJH+bEdF/pALgCeAz4e+B9wLvaS4gzRWb+\nK2WS/AfKxPogcGdmvndSB1bfT/auZOZO4CzKUulq4I3A2Zn5zCSNrbZ2LlYDvwX8HvAk5S6GczPz\ny5M0ton0OsrvvqspdyWsoyyVr2vmxNnMjDmxpzzMpPnQ/l3wAuU/0O4Cbs/M/9HEXscMmBN7yUNf\n5sSskZGZuH9QkiTVNhNWOiRJ0n7AokOSJFVh0SFJkqqw6JAkSVVYdEiSpCosOiRJUhUWHZIkqQqL\nDkmSVIVFhyRJqsKiQ5IkVWHRIUmSqvj/Aa/2q0pDQpg1AAAAAElFTkSuQmCC\n", 248 | "text/plain": [ 249 | "" 250 | ] 251 | }, 252 | "metadata": {}, 253 | "output_type": "display_data" 254 | }, 255 | { 256 | "data": { 257 | "text/html": [ 258 | "
\n", 259 | "\n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | "
Number_of_entries_per_personCount
0115714
125878
232675
341527
451038
56670
67490
78388
89335
910207
1011195
1112175
1213124
131490
141573
151647
161756
171844
181934
192019
202116
212216
22235
23247
24251
25283
26321
\n", 405 | "
" 406 | ], 407 | "text/plain": [ 408 | " Number_of_entries_per_person Count\n", 409 | "0 1 15714\n", 410 | "1 2 5878\n", 411 | "2 3 2675\n", 412 | "3 4 1527\n", 413 | "4 5 1038\n", 414 | "5 6 670\n", 415 | "6 7 490\n", 416 | "7 8 388\n", 417 | "8 9 335\n", 418 | "9 10 207\n", 419 | "10 11 195\n", 420 | "11 12 175\n", 421 | "12 13 124\n", 422 | "13 14 90\n", 423 | "14 15 73\n", 424 | "15 16 47\n", 425 | "16 17 56\n", 426 | "17 18 44\n", 427 | "18 19 34\n", 428 | "19 20 19\n", 429 | "20 21 16\n", 430 | "21 22 16\n", 431 | "22 23 5\n", 432 | "23 24 7\n", 433 | "24 25 1\n", 434 | "25 28 3\n", 435 | "26 32 1" 436 | ] 437 | }, 438 | "execution_count": 6, 439 | "metadata": {}, 440 | "output_type": "execute_result" 441 | } 442 | ], 443 | "source": [ 444 | "grouped_df = train.groupby('Patient_ID').agg('size').reset_index()\n", 445 | "grouped_df.columns = [\"Patient_ID\", \"Number_of_entries_per_person\"]\n", 446 | "grouped_df = grouped_df.groupby('Number_of_entries_per_person').agg('size').reset_index()\n", 447 | "grouped_df.columns = [\"Number_of_entries_per_person\", \"Count\"]\n", 448 | "plt.bar(grouped_df.Number_of_entries_per_person, grouped_df.Count, color='r')\n", 449 | "plt.show()\n", 450 | "grouped_df" 451 | ] 452 | }, 453 | { 454 | "cell_type": "markdown", 455 | "metadata": {}, 456 | "source": [ 457 | "So 15,714 patients have 1 entry, 5878 have 2 entries and so on.\n", 458 | "\n", 459 | "Now let us look at the number of health camps in train data and their distribution" 460 | ] 461 | }, 462 | { 463 | "cell_type": "code", 464 | "execution_count": 7, 465 | "metadata": { 466 | "collapsed": false 467 | }, 468 | "outputs": [ 469 | { 470 | "name": "stdout", 471 | "output_type": "stream", 472 | "text": [ 473 | "Number of Health Camp ID in the train dataset : 44\n" 474 | ] 475 | } 476 | ], 477 | "source": [ 478 | "print \"Number of Health Camp ID in the train dataset : \", len(train.Health_Camp_ID.unique())" 479 | ] 480 | }, 481 | { 482 | "cell_type": "code", 483 | "execution_count": 8, 484 | "metadata": { 485 | "collapsed": false 486 | }, 487 | "outputs": [ 488 | { 489 | "data": { 490 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAg0AAAFoCAYAAADUycjgAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAAPYQAAD2EBqD+naQAAIABJREFUeJzt3XucXVV58PFfZkJCAgQiKokQueoSAiIqlwCiIn0t2lfx\n1opoC9iIt9eiUKKtFRVbBKygrWBVFBCpSAWtoqK1CEaJBFEUxIUIJFxChHCZQG5kZt4/nrWZnZMz\nmT2TM8mQ/L6fTz45c/ba+9l77bXXfvb1jOvv70eSJGkoXRt7BiRJ0lODSYMkSWrEpEGSJDVi0iBJ\nkhoxaZAkSY2YNEiSpEZMGiRJUiMmDZIkqRGTBkmS1Mj44RROKf0N8BWgHxhX+78v5zw+pbQfcB6w\nD3Az8K6c84218Y8GTgOmAT8EZuecl9SGfxI4nkhmzs85z1mPZZMkSR003DMNXyd2+NPL/zsDtwPn\npJQmA1cC1wAvBK4DrkwpTQJIKR0AfAk4FTgImApcUE04pXQScDTwWuANwDEppQ+MdMEkSVJnjVuf\n355IKX0IOA6YCbwN+Iec8x614bcBn8g5X5RSuhDozTkfX4btBCwAdss5L0gpLQA+nHP+ahl+DHBa\nznm3Ec+gJEnqmBHf05BSmgqcAszJOT8BHAjMbSn2M2BW+XwQcG01IOd8D7AQOCilNB2YAfy0Nu5c\nYOeU0g4jnUdJktQ563Mj5LuBe3POV5S/pwP3tZRZDOzUYPh04v6I+1qGjauNL0mSNqL1SRreDny2\n9vdkYGVLmZXAxAbDJwPknFe1DKM2viRJ2oiG9fREJaW0P7AjcGnt6xWsvYOfCCxrMHxFme6EWuJQ\nlV1GQ/39/f3jxo1rWlySJA0Ycgc6oqQBeCVwbc750dp39xJPVNRNAxY1GH4vMbPTiPscqmH9tfGH\nNG7cOHp6ltPb2zdome7uLqZMmdSxcqMxTWMb29jGNraxOzHN4Zg6dashy4w0aWh30+M8oPW9CgcD\nn6gNPxS4CCClNIO4X+G6nPOilNLCMvySUv4lwMKc8+LhzFhvbx+rVw9diZ0uZ2xjG9vYxjb2WIzd\nSSNNGvYGvtry3X8Bp6eUzga+ALwT2Aq4rAw/D7g6pTQPuAE4B/hOznlhbfgZKaXqrMPpwFkjnD9J\nktRhI70R8pnAw/Uvcs5Lgb8ADiOSggOAI3POy8vwecAJxMud5gJLiLc/Vs4i7pG4HPgGcGHO+TMj\nnD9JktRhIzrTkHNue+Ej53wD8KJ1jHcR5fJEm2F9wMnlnyRJGmP8wSpJktSISYMkSWrEpEGSJDVi\n0iBJkhoxaZAkSY2YNEiSpEZMGiRJUiMmDZIkqRGTBkmS1IhJgyRJasSkQZIkNWLSIEmSGjFpkCRJ\njZg0SJKkRkb009hj1fz58+npWU5vb1/b4TNn7sP48Vtu4LmSJGnTsEklDbcccAAzBxsGcNXV7L//\n/htwjiRJ2nRsUknDTGBdKcHDG2pGJEnaBHlPgyRJasSkQZIkNWLSIEmSGjFpkCRJjZg0SJKkRkwa\nJElSIyYNkiSpEZMGSZLUiEmDJElqxKRBkiQ1YtIgSZIaMWmQJEmNmDRIkqRGTBokSVIjJg2SJKmR\n8cMdIaU0ATgbOBpYCXw55/yPZdh+wHnAPsDNwLtyzjfWxj0aOA2YBvwQmJ1zXlIb/kngeCKZOT/n\nPGeEyyVJkjpsJGcaPgu8Avgz4C3A7JTS7JTSZOBK4BrghcB1wJUppUkAKaUDgC8BpwIHAVOBC6qJ\nppROIhKR1wJvAI5JKX1gZIslSZI6bVhnGlJKU4kzAYfnnH9ZvvsUcCCwGlhWOztwYkrpVcCbgIuA\n9wCX5py/VsZ7G7AgpbRzznkB8D7gwznn68rwOcRZiU+v5zJKkqQOGO6ZhkOBR3LOc6svcs5n5pz/\nljh7MLel/M+AWeXzQcC1tfHuARYCB6WUpgMzgJ/Wxp0L7JxS2mGY8yhJkkbBcO9p2A24q5wl+Adg\nAvAV4J+B6cR9DHWLgZnl83TgvjbDdyrD+luGLwbGleGLhzmfkiSpw4abNGwNPBeYDRxL7Oz/A3gc\nmEzcGFm3EphYPq9r+GSAnPOqlmHUxpckSRvRcJOG1cA2wFvK5QVSSjsD7wZuY+0d/ERgWfm8Yh3D\nV5RpTaglDlXZZXRId3cX3d1dT34eqmyTcsMpa2xjG9vYxjb2hpxmpw03aVgErKgShiIT9yNcTTxK\nWTetjANw7zqG30tciphG3OdQDeuvjb/epkyZxJQpk5783HSc4Ux/Y5QztrGNbWxjG3tDGG7ScB2w\nZUppj5zz7eW7vYA7gXnAh1rKHwx8onyeR9xIeRFASmkGcb/CdTnnRSmlhWX4JaX8S4CFOeeO3c/Q\n07Ocnp7lTJkyiZ6e5fT29g1atru7q1G54ZTtdDljG9vYxja2sTtl6tSthiwzrKQh5/yHlNKVwAUp\npXcT9zTMAT4OfBM4I6V0NvAF4J3AVsBlZfTzgKtTSvOAG4BzgO/knBfWhp+RUqrOOpwOnDWc+RtK\nb2/fkxXc29vH6tVDV3bTcqMxTWMb29jGNraxOzHNThnJBZFjgNuJxyMvAP4t5/y5nPNS4NXAYURS\ncABwZM55OUDOeR5wAvFyp7nAEuKdD5WzgEuBy4FvABfmnD8zgvmTJEmjYNivkS7JwbHlX+uwG4AX\nrWPciyiXJ9oM6wNOLv8kSdIY4w9WSZKkRkwaJElSIyYNkiSpEZMGSZLUiEmDJElqxKRBkiQ1YtIg\nSZIaMWmQJEmNmDRIkqRGTBokSVIjJg2SJKkRkwZJktSISYMkSWrEpEGSJDVi0iBJkhoxaZAkSY2Y\nNEiSpEZMGiRJUiMmDZIkqRGTBkmS1IhJgyRJasSkQZIkNWLSIEmSGjFpkCRJjZg0SJKkRkwaJElS\nIyYNkiSpEZMGSZLUiEmDJElqxKRBkiQ1YtIgSZIaMWmQJEmNmDRIkqRGxg93hJTSUcDlQD8wrvz/\nzZzzX6aU9gPOA/YBbgbelXO+sTbu0cBpwDTgh8DsnPOS2vBPAscTycz5Oec5I10wSZLUWSM507AX\n8N/Ejn8aMB3425TSZOBK4BrghcB1wJUppUkAKaUDgC8BpwIHAVOBC6qJppROAo4GXgu8ATgmpfSB\nES2VJEnquGGfaQD2BG7OOT9Q/zKldDywrHZ24MSU0quANwEXAe8BLs05f62UfxuwIKW0c855AfA+\n4MM55+vK8DnEWYlPj2AeJUlSh430TMNtbb4/EJjb8t3PgFnl80HAtdWAnPM9wELgoJTSdGAG8NPa\nuHOBnVNKO4xgHiVJUoeN5ExDAv48pfSPQDfwDeKSw3TiPoa6xcDM8nk6cF+b4TuVYf0twxcT90zs\nVD5LkqSNaFhJQ0rp2cAkYDlx2WFX4LPA5PJvZcsoK4GJ5fO6hk8GyDmvahlGbfz11t3dRXd315Of\nhyrbpNxwyhrb2MY2trGNvSGn2WnDShpyzgtTStvnnB8pX/0mpdQNXAxczdo7+InAsvJ5xTqGrwBI\nKU2oJQ5V2WV0yJQpk5gyZdKTn5uOM5zpb4xyxja2sY1tbGNvCMO+PFFLGCq3AlsC9xNPU9RNAxaV\nz/euY/i9xKWIacR9DtWw/tr4662nZzk9PcuZMmUSPT3L6e3tG7Rsd3dXo3LDKdvpcsY2trGNbWxj\nd8rUqVsNWWa4lyf+D3AJsFPOeUX5ej/gQeImxg+1jHIw8InyeR5wKPEkBSmlGcT9CtflnBellBaW\n4ZeU8i8BFuacO3Y/Q29v35MV3Nvbx+rVQ1d203KjMU1jG9vYxja2sTsxzU4Z7pmGnxOXC76UUvo4\nsDtwJnAG8E3gjJTS2cAXgHcCWwGXlXHPA65OKc0DbgDOAb6Tc15YG35GSqk663A6cNZIF0ySJHXW\nsO6iyDk/BrwSeAYwH/gi8Pmc87/mnJcCrwYOI5KCA4Ajc87Ly7jzgBOIJy3mAkuItz9WzgIuJd42\n+Q3gwpzzZ0a+aJIkqZNGck/DrUTi0G7YDcCL1jHuRZTLE22G9QEnl3+SJGmM8QerJElSIyYNkiSp\nEZMGSZLUiEmDJElqxKRBkiQ1YtIgSZIaMWmQJEmNmDRIkqRGTBokSVIjJg2SJKkRkwZJktSISYMk\nSWrEpEGSJDVi0iBJkhoxaZAkSY2YNEiSpEZMGiRJUiMmDZIkqRGTBkmS1IhJgyRJasSkQZIkNWLS\nIEmSGjFpkCRJjZg0SJKkRkwaJElSIyYNkiSpEZMGSZLUiEmDJElqxKRBkiQ1YtIgSZIaMWmQJEmN\njN/YM7ChrVq1ivnzf0dPz3J6e/valpk5cx/Gj99yA8+ZJElj24iThpTSlcDinPPx5e/9gPOAfYCb\ngXflnG+slT8aOA2YBvwQmJ1zXlIb/kngeOLsx/k55zkjnbd1ufnm33L3ES9l5iDDbwG46mr233//\n0QgvSdJT1oguT6SU3gwcWft7MnAlcA3wQuA64MqU0qQy/ADgS8CpwEHAVOCC2vgnAUcDrwXeAByT\nUvrASOatiZnA/oP8GyyZkCRpczfspCGlNBU4E7i+9vWbgWU55zk5nAgsBd5Uhr8HuDTn/LWc883A\n24BXpZR2LsPfB3w453xdzvkaYA7w3pEtkiRJGg0jOdPwKeAi4NbadwcCc1vK/QyYVT4fBFxbDcg5\n3wMsBA5KKU0HZgA/rY07F9g5pbTDCOZPkiSNgmElDSmlw4GXEPcm1E0H7mv5bjGwU4Ph04H+luGL\ngXG18SVJ0kbW+EbIlNJE4kbHd+ecV6aU6oMnAytbRlkJTGwwfDJAznlVyzBq43dEd3cXXV3jGpXr\n7u568nOT8k3KdrqcsY1tbGMb29gb0nCenvgocEPO+X/aDFvB2jv4icCyBsNXAKSUJtQSh6rsMjpo\nypRJjctVZZuOM9zpd7KcsY1tbGMb29gbwnCShr8CdkgpLS1/TwRIKb0RuIR4lLJuGrCofL53HcPv\nJS5FTCPuc6iG9dfG74ienuV0dY1j6wblHnzwUe688zYee2wFfX39g5bde+99mDRpS6ZMmbTOdz9A\nZIWdLDca0zS2sY1tbGNvGrGHa+rUrYYsM5yk4aXAFrW/zyR27HPKsNb3KhwMfKJ8ngccStxASUpp\nBnG/wnU550UppYVl+CWl/EuAhTnnxcOYvyFF5Q59Oqe3t4+bbrppne9zgHinQ2/tnQ69vX2sXj30\nCux0OWMb29jGNraxN4TGSUPO+e763+WMQ3/O+Y6U0gPA6Smls4EvAO8EtgIuK8XPA65OKc0DbgDO\nAb6Tc15YG35GSqk663A6cNbIF6szqvc5rMvDG2JGJEkaAzpyF0XOeSnwF8BhRFJwAHBkznl5GT4P\nOIF4udNcYAnx9sfKWcClwOXAN4ALc86f6cS8SZKkzhjxa6Rzzse1/H0D8KJ1lL+IcnmizbA+4OTy\n7ynH37OQJG0ONrsfrBoN/p6FJGlzYNLQIUPd//AwnpGQJD21mTRsQE3PSOy7775DJhdggiFJ2rBM\nGjawJmckhkouwEsekqQNz6RhjPJxT0nSWLPhX1wtSZKekkwaJElSIyYNkiSpEZMGSZLUiEmDJElq\nxKRBkiQ14iOXT3G+ZVKStKGYNDzFdfItkyYXkqR1MWnYBHTiLZO+YVKSNBSThs1Ik+RCkqTBmDRo\nDU3ukQAvZUjS5sikQWvwx7IkSYMxadBa/LEsSVI7vqdBkiQ1YtIgSZIaMWmQJEmNmDRIkqRGTBok\nSVIjJg2SJKkRkwZJktSISYMkSWrEpEGSJDVi0iBJkhoxaZAkSY2YNEiSpEZMGiRJUiMmDZIkqZFh\n/zR2Sml34HPAIcAS4N9zzp8qw3YBvgjMAu4C3p9z/lFt3COAs4HdgOuA2TnnO2vDTwROBrYBLgPe\nm3NeMZIFkyRJnTWsMw0ppXHAlcBi4AXAO4EPp5TeXIp8G7gPeBFwMXBFSmmnMu4M4ArgfODFwIPA\nt2rTfgPwEWA2cDhwEHDmSBdMkiR11nAvT+wA/Ap4d875jznnHwA/Bg5NKb0c2BU4IYdPEmcTji/j\nzgbm55zPyTnfChwH7JJSOqwMfx9wds75+znnXwInAG9PKW25XksoSZI6YlhJQ875/pzz0TnnxwFS\nSocALwF+QpwZuLHlcsJc4lIFwIHAtbVpLQduBGallLqA/YGf1sadB0wA9h3OPEqSpNEx4hshU0p3\nEUnAdcDlwHTi0kTdYmCn8nldw7cDtqwPzzn3EvdM7IQkSdrohn0jZM3rgWnAecTNjZOBlS1lVgIT\ny+d1DZ9c+3uw8ddbd3cXXV3jOlZuNKb5VIrd3d315Oehynay3GhM09jGNraxN7XYo2HESUPO+UaA\nlNIHgK8RNzhObSk2EVhWPq9g7QRgIvBwGcYgw5fRIVOmTOpoudGY5lMpdlV+Y8+HsY1tbGMbe8MY\nVtKQUnomMCvn/O3a178j7j1YBOzZMsq08j3AveXv1uG/Ii5DrCh/31ZidQPb18Zfbz09y+nqGsfW\nHSo3GtN8KsXu6VnOlCmT6OlZTm9v36Blu7u7OlpuNKZpbGMb29ibWuzhmjp1qyHLDPdMw67A5Sml\nHXPO95fvXgz8ibjp8e9TShNzztVlhkMZuLlxXvkbgJTSZGA/4CM55/6U0vwyvLpZ8mBgFXDTMOdx\nUFG5Q5/OaVpuNKb5VIpdNdbe3j5Wrx664Xa6nLGNbWxjG7tzSUMTw00a5gM3AF8plyV2Bc4APkHs\n7O8GLkgpnQa8hngi4tgy7peBk1NKpwDfBU4F7sg5V0nCucDnU0q3EDdEngt8wZc7SZI0Ngz3kcs+\n4LXA48DPgS8An8k5/3sZ9hriEsMNwFuAo3LO95RxFxA3Tx4PXE88MXFUbdqXAqcD/wFcRTyVMWd9\nFk6SJHXOsG+ELJcl3jjIsDuAl69j3KuA561j+Jn4FkhJksYkf7BKkiQ1sj7vadBmbtWqVcyf/7t1\n3sE7c+Y+jB/vm8AlaVNg0qARu/nm33L3ES9l5iDDbwG46mr233//DThXkqTRYtKg9TKTeERmMA9v\nqBmRJI0672mQJEmNmDRIkqRGTBokSVIjJg2SJKkRkwZJktSISYMkSWrERy416nwJlCRtGkwaNOp8\nCZQkbRpMGrRB+BIoSXrq854GSZLUiEmDJElqxKRBkiQ1YtIgSZIaMWmQJEmNmDRIkqRGTBokSVIj\nJg2SJKkRX+6kMaPJ66bBV05L0sZi0qAxY6jXTYOvnJakjcmkQWPKUK+bhnjltD+CJUkbnkmDnpKa\n/gjWvvvua3IhSR1i0qCnrCY/guUvbEpS55g0aJPnL2xKUmf4yKUkSWrEpEGSJDVi0iBJkhoxaZAk\nSY2YNEiSpEaG9fRESulZwGeBlwPLgG8AH8o5r0op7QJ8EZgF3AW8P+f8o9q4RwBnA7sB1wGzc853\n1oafCJwMbANcBrw357xixEsmSZI6arhnGr4JbAkcArwZ+L/AaWXYt4H7gBcBFwNXpJR2AkgpzQCu\nAM4HXgw8CHyrmmhK6Q3AR4DZwOHAQcCZI1oiSZI0KhqfaUgpJeAAYIec84Plu48AZ6WUfgDsChxY\nzg58MqX0CuB44ONEMjA/53xOGe844P6U0mE552uB9wFn55y/X4afAPwwpXSKZxu0IQznx7L6+rp8\ny6SkzdJwLk/cDxxZJQw12xJnBm5s2cHPJS5VABwIXFsNyDkvTyndCMxKKc0l3r1zam3cecAEYF/g\nF8OYR2lEhvNjWd3dXb5lUtJmqXHSkHN+FPhh9XdKaRzwXuDHwHTi0kTdYmCn8nldw7cjLnk8OTzn\n3JtSWlKGmzRog2j6Y1lNyvqjWpI2RevzGumzgP2IvvMDwMqW4SuBieXz5HUMn1z7e7Dxpaccf/dC\n0qZmRElDSukM4j6Ev8w5/y6ltAJ4WkuxicQTFgArWDsBmEgckK2o/T3Y+B3R3d1FV9e4jpUbjWka\ne9OKPdQZiZ7uLnp7VzN//nwee2wFfX39g5bde+996O3tGrLs3nvvQ3f3lk/Ox1Dz2aTccMp2upyx\njW3skU+z04adNKSU/g04ATgm51w9AXEvsFdL0WnAotrwaW2G/wpYQiQO04DbSoxuYPva+B0xZcqk\njpYbjWkae/OLfeedt3HLAQcMeT/FlOuvj8/rKFuVq85ePFXqwNjGNvboTrNThvuehlOBdwB/lXO+\nojZoHjAnpTQx51xdZjgU+Glt+KG16UwmLm18JOfcn1KaX4ZXN0seDKwCbhrm8qxTT89yurrGsXWH\nyo3GNI29ecZucj9F07I9Pcvp6VnOlCmThnwapLu7q1G54ZTtdDljG9vYI5/mcEydutWQZYbzyOWe\nwIeBfwF+nlLaoTb4GuBu4IKU0mnAa4h+7dgy/MvAySmlU4DvEk9K3FEetwQ4F/h8SukW4obIc4Ev\ndPpxy6jcoU/nNC03GtM0trE7Mc2qI+nt7WP16qE7lablRmOaxja2sUd/mp0ynAsirynlP0zs2O8j\nLh/cl3PuA44iLjHcALwFOCrnfA9AznkB8HrivQ3XE09MHFVNOOd8KXA68B/AVcQbI+esz4JJkqTO\nGs4jl2cAZ6xj+B+J10sPNvwq4HnrGH4mvgVS6ggf95Q0GtbnkUtJY5SPe0oaDSYN0iaqUy+gAs9K\nSAomDdJmbDivz/ashCSTBmkzN5zXZ0vavG3410lJkqSnJM80SGqk6RMZ/nS4tOkyaZDUSNMnMvzp\ncGnTZdIgqbEmT2QMp5ykpxbvaZAkSY2YNEiSpEZMGiRJUiMmDZIkqRGTBkmS1IhJgyRJasRHLiVt\nFP5YlvTUY9IgaaPwx7Kkpx6TBkkbjT+WJT21mDRIGvOa/u6FlzGk0WXSIGnMa/q7F17GkEaXSYOk\np4Qmv2fhGQlpdJk0SNpkND0jse+++/rkhjQCJg2SNilNzkj45IY0MiYNkjZLPrkhDZ9vhJQkSY2Y\nNEiSpEZMGiRJUiMmDZIkqRGTBkmS1IhJgyRJasSkQZIkNWLSIEmSGjFpkCRJjZg0SJKkRkb8GumU\n0kTgBuA9Oedry3e7AF8EZgF3Ae/POf+oNs4RwNnAbsB1wOyc85214ScCJwPbAJcB7805rxjpPEqS\npM4Z0ZmGkjD8J7BXy6BvAfcBLwIuBq5IKe1UxpkBXAGcD7wYeLCUr6b5BuAjwGzgcOAg4MyRzJ8k\nSeq8YScNKaU9gXnAri3fH06cQTghh08SZxOOL0VmA/NzzufknG8FjgN2SSkdVoa/Dzg75/z9nPMv\ngROAt6eU/F1aSZLGgJGcaXgp8GPiEsS42vcHAje2XE6YW8pVw6+tBuSclwM3ArNSSl3ED879tDbu\nPGACsO8I5lGSJHXYsO9pyDl/vvqcUqoPmk5cmqhbDOzUYPh2wJb14Tnn3pTSkjL8F8OdT0mS1Fkj\nvhGyjcnAypbvVgITGwyfXPt7sPHXW3d3F11d4zpWbjSmaWxjG3tsxe7u7nry81BlO1luNKZp7M0r\n9mjoZNKwAnhay3cTgWW14a0JwETg4TKMQYYvo0OmTJnU0XKjMU1jG9vYYyt2VX5jz4exjb0+0+yU\nTiYN97L20xTTgEW14dPaDP8VsIRIHKYBtwGklLqB7Wvjr7eenuV0dY1j6w6VG41pGtvYxh5bsXt6\nljNlyiR6epbT29s3aNnu7q6OlhuNaRp784o9XFOnbjVkmU4mDfOAOSmliTnn6jLDoQzc3Div/A1A\nSmkysB/wkZxzf0ppfhle3Sx5MLAKuKlTMxiVO/TpnKblRmOaxja2scdW7KpT7u3tY/XqoTvoTpcz\ntrE7Mc1O6WTScA1wN3BBSuk04DXEExHHluFfBk5OKZ0CfBc4FbijejEUcC7w+ZTSLcQNkecCX/Dl\nTpIkjQ3rexdFf/Uh59wHvJa4xHAD8BbgqJzzPWX4AuD1xHsbrieemDiqNv6lwOnAfwBXEe94mLOe\n8ydJkjpkvc405Jy7W/6+A3j5OspfBTxvHcPPxLdASpI0JvmDVZIkqRGTBkmS1IhJgyRJasSkQZIk\nNWLSIEmSGjFpkCRJjZg0SJKkRkwaJElSIyYNkiSpEZMGSZLUiEmDJElqxKRBkiQ1YtIgSZIaMWmQ\nJEmNmDRIkqRGxm/sGZCksWzVqlXMn/87enqW09vb17bMzJn7MH78lht4zqQNz6RBktbh5pt/y91H\nvJSZgwy/BeCqq9l///034FxJG4dJgyQNYSawrpTg4Q01I9JG5j0NkiSpEc80SFIHeO+DNgcmDZLU\nAd77oM2BSYMkdYj3PmhT5z0NkiSpEZMGSZLUiEmDJElqxKRBkiQ14o2QkrQBNXk0E3w8U2OTSYMk\nbUBDPZoJPp6pscukQZI2sKEezYR4PNMXRmmsMWmQpDGq6Quj9t13X5MLbRAmDZI0hjV5YVQnkwsw\nwdDgTBokaRPQieQCPHuhdRtTSUNKaSJwLvB6YBnwrznnT2/cuZKkTUfT+yk6fWmkr6+r8VmOJmVN\nWDaOMZU0AJ8CXgi8DNgFuCildFfO+fKNOVOStDnq5KWR7u6uxmc5hio7mgmLici6jZmkIaU0GXg7\n8Mqc803ATSmlM4H3AiYNkjRGNf2hrqZnOZpOczQSlk4mIptiwjJmkgZgX2J+rqt9Nxf4h40zO5Kk\nsa7TCUunEpGxkLBMmDBhHVFHZiwlDdOBB3POq2vfLQa2TCltn3NespHmS5K0GelUIrKxE5b99nvR\nEFGHbywlDZOBlS3fVX9PbDKBW4YYNqO7i66ucR0pNxrTNLaxjW1sY2/esYfSPYxy48d3/uelxvX3\n93d8oiORUnoj8Nmc87Nq3z2PqM/tc86PbLSZkyRJY+pXLu8Fnp5Sqs/TNGC5CYMkSRvfWEoafg08\nARxU++4lwPyNMzuSJKluzFyeAEgpnQccAhwP7ARcABybc/7WxpwvSZI0tm6EBPgA8UbI/wUeBf7J\nhEGSpLFhTJ1pkCRJY9dYuqdBkiSNYSYNkiSpEZMGSZLUiEmDJElqxKRBkiQ1MtYeuQQgpTQBOBs4\nmvj9iS/nnP+xDPs28H+BfqB6AffjwGMt5W4B9gL6Srl+4GfALKAbWAEsL8NWEG+frKZZf7F3Pc4T\nwBa1aQJbswyVAAAbs0lEQVQsY6AeVxGJ2IQ25QZ7WfiiltgAvWUeHwZ+AewD7NBmmo+XeF1lfMq8\n1Mv1A98jXpS1bS1uHwNJY32aVexHgeuBF5TxJg6y3NXvgtTnv/q7H/gp8e6NbmJdrmSg7nceZLmX\nAh/OOf9bm/VIGXd8GXdp+bx1+b++3NcCh5ZpVt9Rm05rXS4HntGmHLXvqrZ2KTC71NPTWbvOf0LU\n+Xii3awuy70UeHbLcq8u5R4HbgdmAFNo39a6y7h95XN3m9j/A7y8THNVib9qkNj1ZewDvkn8vk5r\nnS8rcfqBnvL9tm1i/y/w0hJ7XXXeuj30Af8KvLpN7NY6XwZsw5ptsmrnh5Rl3bpM583AqcAk2re1\nodr5qvJdfTmqbadd7JVl2OeI9b2u2I8BNwPPpf36fpyBtruyDNuGtdt5fbmnAg+Uco8RPwRYj13N\n+wPEun418KyWaUJsC1uUz0vL/9vQvm+plvsxYnv//SDLXa3HLuKlffsMErtq511luuPKOhgs9hPA\nM1m7bdWn2c9Af7eu7bu+vtv12VW/Vu9bqm2s6kPq/VrrNPqJ9U+b5W4Su/653b7qwZzzMwFSSjOJ\ndvgi4B7gYznnr7eZ7rCM1TMNnwVeAfwZ8BZgdkppdhm2Z/luOnAR8EfgiHq5lNJupdyfSrlpwNeB\ng4GLge8QK2db4N3EBvvHUvZrwJ3AnwOvJ1bk6hJrEXAX8NVa7DuJn/CG6CS6gEdq5aaVaf6xLM/l\nREN5CPhKS+yLiI3kVqJz3KrMx7nEa7ZvJN6cWcX+EbCkxOxhYKdUj/1V4DmlPj9d5vP6Em8p8Adg\nxzLNZcAvgeOIHxD7M+LdGYuJRvfFWuxfAb8ty3ppme97icTsBqIz+Fqp868D/00kU1sDJ5Xh368t\n93LgdyU2wKdSSm9rWY8XAXeUuvsfYkPdjthweogd7rfKcv8n0aF8FziN2KAeA+aUabyDtdtQ9U6Q\nq8o07qgtz1dr5d4B/L9SR+9tWTdVnR9apvMTBnbuZ5bhP67FXlrGf1epn32I9T1YW/s1sb6XlnpZ\nztrr+yVEG5pLdEzdwCfaxH4IuAZ4HfAg8PelXLs6f4RoN/1lPrcq47Rb7h8DHy9lVwHHtqnzHuCy\nltgXtondrs7/mjXbZL2dv57YIW4FvAf4CLGzqfcZw2nn3weuINrOE2VZ7mHNtlbF/jOib3lmWQft\nYtfb+ZbEG3A/PMj6/j7RNnrL8kwkDiTaxa76qgml/mcBTwN+zkC/1kvs1I8jEoB3EX3tvbVpXkS0\ns5XAfxF9X3cpv6TN+q6W+4vAHsDzB1nuaj1W9fPnwN+1iV31LTcz0H62KLGr+qnHflWpO4jtptpu\nq7b2n0R/ezFxELq01MNf0H77rtZ3D7HO9yr1cQLRNlv7lu8ykCB/hTX7tUtYc1/yWK1cu+VuEntX\n4sWH04mD54fKvw+V4XvBkwfe3ynL9nyi77kwpfRC1tOYe09DSmkqsfEennOeW747hcjG301kc3sS\njWiwcs8mGug9Oednl2n+CfhRzvlVKaVFxE7nSOAsoqPsJjKyX7RMcwGwPdHQHwX+BXgTsYG8hsh2\n9yY2miVEx3JUmeYbiM7jyfkssRcBuxMr/tFa7OuJHcJuOecFKaWeMu6DwIvLtH5ObMivBb4NHA6c\nUWJfB/wzsZNcIzbxw183ERviCiIpuabU042l3Hhg55zz3Sml+8q8/AA4pkz3TcSG+lYiEdiZ2BCf\nS2wMx5Xl37vM85/KNLYp830nsVM5E/gkkbDMKuUA9ijLfQrwN0QnuRvRSe9b5vGDRCf7TKITuZ/Y\n0E4jOql/II50qx3q08q0PkzsTFYBB9LShsp0rinze1+Zr2Wlrg8pdXt4qb+LgF3KMj+HOLKq1k3V\nhq4p7eCMsmyvJjqFE4gO4sASexzw8tI2fkYkQb8A3saabe1viI5vh7LelxDJ2sXEDrpa31Wd70bs\niAEOIDrKA1pi/y9wQ875w+22nVqdfxw4meis7iPa6e1Ex3xwy3KPJ9b/rFLnAFcy0PFVdX4r8G85\n5y8NEbte5zOIHcgepc7/meiMX1rK/l2Zz57a+LcTbe45tdhN2/nhwOlEMvNRoo0cTPRBb2OgrbVr\nQ5OB37TEbm3nP2Ngh/7BlvV9JNHpv5LYSd1PJN4fZc12Xl/u6UQicinRXpaV5a+2sRXA9JzzYyml\nexhIMN5YluefyrweDxyRcz629EHXE9vRvsS2UF/frX3L04n+pLXO6/XzzDI/u9bqsopd71u+XsZ/\ngOhbWvu148pyP41oO1cS7aG+j3iy34cn9xGnEP3+P7P29l2t71eWOM+uppdzvr22L6n6lhuIfm1H\nom+ZxUC/1rp/eqzU0fZtlnvI2LRIKb2d2M/knPPLWoa9gEiMt8k5Lyvf3QBcknP+dOu0hmMsnmk4\nFHikqmiAnPOZOee/BRKRNd45WDnitNHTiE5ydW2aD5eEYRzROVxJ7JQWlGkCHFafZmkgU4iVu3sp\n93+IjvcR4GpipR5HbMB/X6bRX8pdXZ/PEvsEokM/tjbNKvYy4IHSoYwjdjofJDbQPuDuUvYxouN7\nJOc8N+d8SM75NUTW+0S72MCniI73f4ns+tOljn5Syi0HFuacqxi3EhnsS0vsw4hk5xEiyVmYc767\ntm7uIHaA/5Fz/kOZ5qOlLv6W2IBnEJ3aslL2F6XcCuDXOecFtfV4RolVrcdDy98Xl3qZRGT1f0N0\nVACX5ZyfXyv7Fznn1UT2fTVxlqc6jdnahj5PnFL9PbFTTsT28YUSpyr3SuBLxNEuZV3W103Vhv4s\n5/x4zvm9RKe/NdH5jQN+WGIvI9rl3LLch+ScZxKX5lrb2veAI3POD9bW944lZn19P9mGcs7vLfE/\nQrSheuxHiEsxt9XqvHXbqcqtLPXyeKn7NxId57jW5WZge5hJdFpbEjuLNeq8zPuQsVvq/IYS6/kM\ntMkba9OcXsq9sZRbSCQD1GI3bucl9p5lGau2VvVBa7S1ljb0cPnXGru1nR+Sc94N+AZrr+/qEtxv\na7EXljpvbefV0fyWJd6cMp+9ZT0cWtbhLSVh2KZM87wyrI846/PBsiwXl4Sh6oO+VJteazuv9y0L\niCP51uWu18+/lOW6vF6XVWzW7Fuqdn5Hmd4a/RpxMPJj4qAB4mCkdR/xcC1hGFfWd9Xvt9u+9yTa\n5UJim6tPD9buW5YQZ6y/RyT09X6tvi/ZjzhbdFy75W4Yu9UhRH/2gTbDHqL0vSmlcSmlWWV6Nw4y\nrcbG4pmGvyN26v9GZNMTiNM51RHA54jT8q8mGti5xGmaCQxk2J8CPsbAaftVxEbziTLN5xEVeh6R\n+V5ArOxVLdN8JtGBTSMa60uJHXZ1fexcIjOu35NwP7ERP0Z0oCtaYten+aaW2NW9ELeX/79CdM6f\nJxrBYqJhLSMa5sEMXF9eWv7ftU3sy4mGdQ/R4BcRHXEPcQr91USH82D5bgKx4b+xzC9lvK5SP9cT\np1UX1dbNBcSO9IoyrKrzX5flrF9bhIHr539OJAB3ENdjn1ZijSeOcKv1WF0HrtbN1sSO588ZOEV4\nV5lWde9EVefPLutlx9p6+h5xyaab2GG9rCzPuLIs/SX+FbVy9bZW1el7S7k/Ee3q7pbYe5Rx7yGO\n1PuJU5JHlDp/qNTFM8o6PLsMa9fWqthVnT9MdFj19d3FwCnaqg29ijjy6avF7i7TWl7q7LFS/mNE\nklGv82uIDnthLfY3iR3KrS3L/RMiSawOSB4o09iqVufjS8ye8v0jZV4+yuDrewKR7L6ttg4fYaCN\n1+t861LXRxBHnu8k+obhtvMLiEscy4gzBwvKfD+99rmKfQ3RFywmkuPzynTqsVvb+cPE2Zi3sPb6\n/hFxCr1qt68Azie27wXEjq++3B8rw/qJZG33Ms/fKeuuKn8fA9vOJcSZQso0t29T5zuXuv4RkXi2\ntvNqO1hE7Hy/Wz7Xl7u+jS1h4HeFZhFH2fXY7fqWm4jLdoP1qc8uy3orseOut7X68tT7/fuJ7fZb\nxFF+fX0vJdrJKgbOKNxdvmuNvTWRvGzLwL1GVb9Wjz2jzNfhJX7rcjeJvQA4Nef8A4CU0kIG2tSO\nxKWn9+ec7y/DP8jA9tQFfDTnfBrraSyeadiaOFU5mzgaP4m4lnkisdInEad5LyQq+2TitM5JwPuJ\nU2XbEA1+GbFzvL1M8/3A24lG3lOmP6dM86ttpvlEGXYicapxObECV9TK/ZJYWY8QR5TbEyv8bmKH\nUI99bMs0j2qJPa5M937iGtWJxCm5ql4SkVl3E42vj+gAu4nOelqZfrvYJ5V/exGnp6uMu6rLLmKj\nr+ryGCK56SvLu4zYaXQTN9nd1bJuDi7zeUVLnR9BHEldxMBR5A9KvGfUYqcy33OIDXAbYP/aemxd\nN58r9fXtMt2VJf6tbep8KdGh9hIb9BbEjW8XEjuwI4mN6ySiQ5tF7GxWl+Vp19aOLfH3qK2bU9vE\nnkOccdmWaJv9ROdbLfe0Mn//WGLMKd+3a2tV7P9HnDGaXJa7vr5nlBiZgTb0XAaStSp2dcPsr4kz\nZNWNpK9vU+dHEm16fon9r8R17HFtlvu1DNxr01uW+9dt6nwcsSP5QCkzqUxzsPV9ErG9QLSnqk3O\nbVPn95RyhxEJ/DhG1s7fU76/vkxvu/KvupGt3taOJY4iFzDQLtrFrrfzicTB0WOsvb6PIpKKqp1f\nQuw8KX+3tvPTGUim/kDcj7AFcSbswjLeFOKM0Xml3NFlnGrbqfqSqs5PIdru10pdbkf7dn4S0R+M\nI3Zerctd38Y+V2K9kThz1Bq7Xd/yh1JmsD61ulHzFiKJqbe1annOZM1+/wRivV9eK1et7x+Wv5cT\n/VBv+bykTeyvl3pdRrTLLiIBrcf+NAM3y18yyHI3if094DsppRemlKrE+EHi8tRfUpK2cmZhPLG/\nPI/oRz8AfDCldBjraSwmDauJynpLzvkX5Qer/gU4Ief8cWDHnPNXiRtJJhD3JhxMrLTHiGuVHydW\n6mM5598SnRjE6bmf55zPJhr+E8B+xLXVd7RM87XEEcE/l9jvIzaGlxGJwQTiVM8RRCb/caJD/hiR\n+b2MaFzVtahbiEZRTfMkooOux94CmFfm6RIGGlrV4Z9T5m0CsSFNInbQHydOVX2I2OBeRmSyVexF\nOed/L3X5mzL/H2ypy24i8TmYSEy2LPGXMnBWZK8S+zbgafV1U8o8nnP+akuddxEJ4OvL/C8ry1Nd\nt3u0lFnNwA1QjxIb99Oq9cja6/uaMv2JZXmnETfVvYy4PlnV+Q3EWaJVxGn7t5a6mcHAky53AruW\n5VlA3D+yC3B0u7ZWylWd+a3l/3Nyzp9gzbb2i3L98KOlHvcodbhjmV53KTs/53wW0SaXEZ1Oa1ur\nx/5tGfaXxJHVy1izrf0PcV9B1YZ6S53vS3Q01Rmtq8v831rq/KFSpl2d/4To0J9OnF2pbhptXe5t\niW1nJ2Kn01PWzXOII8KuMk9XE0e9ucR+hNjBDba+f8zAkzqnMNAmX8ea63tbBp4YOIa4D2DBCNv5\nj4jO/4NlessY2MZeR5wdqGJnYie/E7H9v7hN7CfbeWkXHyGOnMez9vq+iTgyvZqBO/Y/RPQhx7B2\nO/9N+dwPHFD6lMOIvuRRYn3fyEB/QYn9UIn9OiKxrre1b5ayBxBnDbpZe30vIvqDnYjtN7VZ7ie3\nsbI8AP1lW2yN3a5v+WJZhpfR0qfmnH9BbKvVcs9mzbZWLc+BtX7/8RK3dfv+EXBhzvmNRBt7iDib\ncFepy0QkXVWd/55Iap4g+pY3s2a/VsV+U4nZv47lbhL7KiIReweRhP0JeEHOeV7O+edEIrYvcVD4\n18CLc84n5px/nXP+DJH8zWE9jcWkYRGwIud8T+27THTy5JwfrZcjGuqOxKNV2wLPTSktJbK7ncuN\nPFsTG+tzatOcVso/URKC1mnOJM4gbFWPnXP+EwN3Ui8lssBeYmOfUcadUcotITLK6i7/VxJHV9uV\nef58m9jXln/PIo4EphLr6fM555Nr5W4jOpD7a7EzkQj8iWh0VTY7I6W0tNRLdW376y11uYqBI4Xn\nEZ3MdkTC8WhZruoySiYSu2rd7Ex0fNX1zmqafcTG8jziqPgnRKKzM3GE2M3A44iPlnVSrcepxJHn\np0v5j5bYvyI6qI+UOE+UnfOjZV4o03uiTOMy4hTpPxJnNsg5V09gVI877URc+1tK7LieThw9/HfL\nuqnaGgxcZz23tm6qsquBlFIan1J6HZGIPI3oHB6srZsnyr/typHB9mXZZ7Rpa78Cdkwp/TvRea7M\nOX+rVq7e1vqITuZviaPfbYlHsW6ulVtBnDmobzvPKuupXZ335Zx3IHZYfUTbXNyy3H3A8pzzypzz\nCuIpgW2IRP6OErtvkNjPKPPcLnbVJqskmlqb7KrXY5netLJedyeOtJ5d6weG085/Vearujt/KpFg\nTWfNtgZx2fDKEnMPog19uSX2k+28tr63GmR9/5bYVu6vxZ5G9DfVvR1VO9+uNo/9xHqEOAtalVtB\n7MB+TCQB/UTCdFfLtrOKuK/iBVVVEzvltxLbWn19Q/Q71XLvT6zLr7Qs95PbWIkPsE1K6bdtYucy\nrKqfGWW5Fw3Sp1LaGpRLTC1tbSWRzFXbbS7z2EX77fsZ5btbyjzfStw8+kBtmlXfcgXRJv+pNt5c\nBtpkNc1qh08ZjzbL3ST2juW7HYl9yXdyzlUfTyn3UBn+IgYSycqviG1rvYzFpOE6YMuU0h617/YC\n7kopfSWl9KV6OeJU8u+J7OxM4gj8B0SF3UtkXv9JrNz9UkoTUkqvIE7JLgN6BpnmeKIBnAI8VMVO\nKT2b2NC3IDqPiUQ9Vtnhx4BVpdz2RGa4BbHxH1jm6e8pN9C0if1O4M6ygbymjPsosfHVy+3AwF3y\nVeyPt4kNcapyXwbuKO8FrmqJvQWxMS0kjgDHERvc+FJuTwYet5sGTEopPausm+qGx+rIuZpmV5nX\nanovIzrFKcRRcH+Zx26iA/hjbT0+Uua1Wo+vKrFnEQ3/4DKtB2vtonq+/wdleapHTfuJjL5qQ5fU\n6qePOH3+5VI/dxPtYgmRENTrvGprEAkNRPJQrZuq7HjiSPOZxGWZ1zOQKHyuxP4eA5cIFpV5PIXo\naFbV2trWpcwsosOZXco+0NImq2luUWJvS5z6fl1ZnmUppery2ffL8ryf2F6qOr+/zGNrnb+CuKYM\ncXRdXUJY3LLcXcS2u11KaQlxTf6Rsjz1Op9EnK2ob7dLiM693fq+lYE2BPE4br1N1rexU4izf/3E\nUdrNxI626geG085fRxxxLyTOmDxEXMteQbS1fgZu+vsv4lr8y8r8P0Ls+Ouxq3Z+OwPrexUwpc36\n3pXY2e9Yi/3p8rm1nS8q81g9jXNPSukrDDxu+ANifb8DuDXnfGOZTl9ZX/VtZ0JZhheUx/ZeW757\niNjO6+u7dbl/T2yT1aXcarmrbex84ibWXuJg4ndtYk8j2sc01jzi7m7Tpy5KKW1T2hrA/W227y2J\n7e+2UmYvBt6r0Lp9vw44uNwo+k3iSP4TwIqU0o5EIlhtY88htrN+ypNcpc7r/VrVZ0wmkoYeavuS\nluVuEvv3RDJ3a5nu0fXLDaXc9qXcfWX6dXsy+E2VjY25pCHHnfdXAheklJ6fUnolcUrlXOL69VvL\n8/t9RId3CAM36cwGqtMw+xErrYs4bVOdor2MONVW3XB0cZnme8rf1TSrxKC3TCcTN2H9uMT7A9Fw\nlhM3//QTO8O9yv9Vuepxt33LPB1V/v4UceRdxa6uRU4B9kgpHUucmusjOouq3FSi03khcSnj8hJ7\nG6JRbNMm9u5EZ/QcItl4sNRRa11C7Iz/ooy3NXEj1duIzuE7pdyLiCPF7xFH8I9TbppKKb0tpbQ7\ncSTaW+b3Y0RHcmKZv+8RndE8olP4A9Ghvrh8/15iQzu/th4X1tbNTGInP65M709lHr9BXBN8ZVmm\n5xI3Ft5W6vJ/iZ38m8v83FZbnv8q9fMsYgf2yzb1cwjww5TSkcTNaxB3X781pfSelNIBxJF4LwPX\nOH9evtuW6Lz/ijhNfHutzmcRbZeyPNcw0NZ+WIt9INF+nyj1cy9xGrJa3weW2FPKfP2UOELcmjgq\nqmL/gUjQtiA68dcQ11S3JDr51jrfD9g6pXQxseNZUtbX89os93jilP4CIgGeUuatXud3lPnfmWhr\n1fr+4iDr+0fE5ZbqqP73rNkmjyyxX1DW365E2zi/1P9kBp4gGE473x2YkFK6jIEzEnuV/+ttrdrG\nnijz/kzips2ftIndTRyRH1vmcSviDEDr+p5FtJeLa7H3JI5GW2O/IKX0fOLU/7gS+yZiW/olsb3f\nXqYxPaX018SOeUKZTn3b+QOx3TxM9GvvIJLabYgzYq3ru77cuxCJUrvlrm9jlHq4pU3sqm+p+tTt\niHayC2v3ay8g1nUuy/3zEr91+x4P7J1SOo6Bl079hrW3791LnXy5jFP1qduV2D8psau+5dNEMvR3\nZfidrN2vHVLG35aBG4LvbLPcTWK/oUzvc6UebwDOSSm9OMX7F/4T+H7O+Rai3eyeUjo9pbRbWc63\nE/vH9TLmnp4AKNlW/Sjpc+U6Giml44kkYgaRcS0mKrK1XPVY3Gqicf4D8TKT1zBwRFxdKxzHwBHH\nLWWaRxIr/rPEin4rsQKrpxyq2C8pf/cRCUQ3sZFX5arYxxBPdtTj0ib2UmJj62Lg7XP1chAbwx/L\ncm/BwLXSPgbeAtgau6rLycSZjBe0qcuHiCOG6rrvr4hOsnqUa2JtuQ8tMapTcVcRG3o1vSr2iUTn\n1sXAncfLy3RmMHAa7hHi2l03cURzSs75iy3r8VYi4352rR6ruukvdbdFiX0/cVmi/tRGVZ/XEclT\ndbqv3oaqdzP8V6njdm3tCeJyQzW9+rr5PfA+4lTs6xm4BLOylP0NcRRVLfeDDLy98XGiE9qdtdva\nVsRpztblhoHrp1Wdv5PovLrK8qws89Ea+zFi59ld6u6snPMn2tT54rJuJjJwiWCw5X43sTPuLvO/\nqpS9uaXOVxIdcBexg/rgIOu7vm4uIxKMB8ryUuaptZ2vJNrJLqVuP0W07ZG088NKrP5SX91lePXS\noXbbWHUj54I2sVvb+S+IxK1d31LfvleUup6wjtjVcj9A7BhvLn9X63sFkVh2ETeLfreMt11tmvV+\nbSIDb4VsbXPV+q4v931EsnJNm+Vu3cbuIBLBdrHrfUt1L0D1REG7fm05kahVT760trV6O3+4tuzt\ntu/6+q5O/Y9n4G3A6+pbesuy1/u1h4kzdQ8Q+5I+4gbPdsvdJHZ1Y+gi4iDrPcTTGROJ+4z+rrrs\nnFI6iLhpeW8iCT8t5/wN1tOYTBokSdLYM+YuT0iSpLHJpEGSJDVi0iBJkhoxaZAkSY2YNEiSpEZM\nGiRJUiMmDZIkqRGTBkmS1IhJgyRJasSkQZIkNWLSIEmSGvn/4D/X1Vhu4eMAAAAASUVORK5CYII=\n", 491 | "text/plain": [ 492 | "" 493 | ] 494 | }, 495 | "metadata": {}, 496 | "output_type": "display_data" 497 | }, 498 | { 499 | "data": { 500 | "text/html": [ 501 | "
\n", 502 | "\n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | " \n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | " \n", 655 | " \n", 656 | " \n", 657 | " \n", 658 | " \n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | " \n", 664 | " \n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | " \n", 669 | " \n", 670 | " \n", 671 | " \n", 672 | " \n", 673 | " \n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | " \n", 690 | " \n", 691 | " \n", 692 | " \n", 693 | " \n", 694 | " \n", 695 | " \n", 696 | " \n", 697 | " \n", 698 | " \n", 699 | " \n", 700 | " \n", 701 | " \n", 702 | " \n", 703 | " \n", 704 | " \n", 705 | " \n", 706 | " \n", 707 | " \n", 708 | " \n", 709 | " \n", 710 | " \n", 711 | " \n", 712 | " \n", 713 | " \n", 714 | " \n", 715 | " \n", 716 | " \n", 717 | " \n", 718 | " \n", 719 | " \n", 720 | " \n", 721 | " \n", 722 | " \n", 723 | " \n", 724 | " \n", 725 | " \n", 726 | " \n", 727 | " \n", 728 | " \n", 729 | " \n", 730 | " \n", 731 | " \n", 732 | "
Health_Camp_IDCount
065436543
165274144
265383954
365373859
465293823
565263809
665343597
765703564
865803517
965782837
1065862624
1165422368
1265622338
1365542303
1465712086
1565232084
1665362037
1765321993
1865391992
1965351882
2065491835
2165281744
2265551738
2365411547
2465811485
2565401426
2665851398
276564514
286546403
296530259
306561200
316569177
326563171
336524149
346544128
356560123
366531120
37655394
38657590
39655282
40658779
41656566
42655752
43655844
\n", 733 | "
" 734 | ], 735 | "text/plain": [ 736 | " Health_Camp_ID Count\n", 737 | "0 6543 6543\n", 738 | "1 6527 4144\n", 739 | "2 6538 3954\n", 740 | "3 6537 3859\n", 741 | "4 6529 3823\n", 742 | "5 6526 3809\n", 743 | "6 6534 3597\n", 744 | "7 6570 3564\n", 745 | "8 6580 3517\n", 746 | "9 6578 2837\n", 747 | "10 6586 2624\n", 748 | "11 6542 2368\n", 749 | "12 6562 2338\n", 750 | "13 6554 2303\n", 751 | "14 6571 2086\n", 752 | "15 6523 2084\n", 753 | "16 6536 2037\n", 754 | "17 6532 1993\n", 755 | "18 6539 1992\n", 756 | "19 6535 1882\n", 757 | "20 6549 1835\n", 758 | "21 6528 1744\n", 759 | "22 6555 1738\n", 760 | "23 6541 1547\n", 761 | "24 6581 1485\n", 762 | "25 6540 1426\n", 763 | "26 6585 1398\n", 764 | "27 6564 514\n", 765 | "28 6546 403\n", 766 | "29 6530 259\n", 767 | "30 6561 200\n", 768 | "31 6569 177\n", 769 | "32 6563 171\n", 770 | "33 6524 149\n", 771 | "34 6544 128\n", 772 | "35 6560 123\n", 773 | "36 6531 120\n", 774 | "37 6553 94\n", 775 | "38 6575 90\n", 776 | "39 6552 82\n", 777 | "40 6587 79\n", 778 | "41 6565 66\n", 779 | "42 6557 52\n", 780 | "43 6558 44" 781 | ] 782 | }, 783 | "execution_count": 8, 784 | "metadata": {}, 785 | "output_type": "execute_result" 786 | } 787 | ], 788 | "source": [ 789 | "grouped_df = train.groupby(\"Health_Camp_ID\").agg('size').reset_index()\n", 790 | "grouped_df.columns = [\"Health_Camp_ID\", \"Count\"]\n", 791 | "grouped_df = grouped_df.sort_values('Count', ascending=False).reset_index(drop=True)\n", 792 | "plt.bar(range(len(grouped_df.Health_Camp_ID)), grouped_df.Count, tick_label=grouped_df.Health_Camp_ID, color='r')\n", 793 | "plt.show()\n", 794 | "grouped_df" 795 | ] 796 | }, 797 | { 798 | "cell_type": "markdown", 799 | "metadata": {}, 800 | "source": [ 801 | "So the number of patients per medical camp in train set ranges from 6543 for campid=6543 to 44 for campid=6558" 802 | ] 803 | }, 804 | { 805 | "cell_type": "markdown", 806 | "metadata": {}, 807 | "source": [ 808 | "##### Test data exploration" 809 | ] 810 | }, 811 | { 812 | "cell_type": "code", 813 | "execution_count": 9, 814 | "metadata": { 815 | "collapsed": false 816 | }, 817 | "outputs": [ 818 | { 819 | "data": { 820 | "text/html": [ 821 | "
\n", 822 | "\n", 823 | " \n", 824 | " \n", 825 | " \n", 826 | " \n", 827 | " \n", 828 | " \n", 829 | " \n", 830 | " \n", 831 | " \n", 832 | " \n", 833 | " \n", 834 | " \n", 835 | " \n", 836 | " \n", 837 | " \n", 838 | " \n", 839 | " \n", 840 | " \n", 841 | " \n", 842 | " \n", 843 | " \n", 844 | " \n", 845 | " \n", 846 | " \n", 847 | " \n", 848 | " \n", 849 | " \n", 850 | " \n", 851 | " \n", 852 | " \n", 853 | " \n", 854 | " \n", 855 | " \n", 856 | " \n", 857 | " \n", 858 | " \n", 859 | " \n", 860 | " \n", 861 | " \n", 862 | " \n", 863 | " \n", 864 | " \n", 865 | " \n", 866 | " \n", 867 | " \n", 868 | " \n", 869 | " \n", 870 | " \n", 871 | " \n", 872 | " \n", 873 | " \n", 874 | " \n", 875 | " \n", 876 | " \n", 877 | " \n", 878 | " \n", 879 | " \n", 880 | " \n", 881 | " \n", 882 | " \n", 883 | " \n", 884 | " \n", 885 | " \n", 886 | " \n", 887 | " \n", 888 | " \n", 889 | " \n", 890 | " \n", 891 | " \n", 892 | " \n", 893 | "
Patient_IDHealth_Camp_IDRegistration_DateVar1Var2Var3Var4Var5
0505701654821-May-0610002
1500633658402-Jun-0600000
2506945658210-Aug-0600000
3497447655127-Aug-0600000
4496446653319-Sep-0600000
\n", 894 | "
" 895 | ], 896 | "text/plain": [ 897 | " Patient_ID Health_Camp_ID Registration_Date Var1 Var2 Var3 Var4 Var5\n", 898 | "0 505701 6548 21-May-06 1 0 0 0 2\n", 899 | "1 500633 6584 02-Jun-06 0 0 0 0 0\n", 900 | "2 506945 6582 10-Aug-06 0 0 0 0 0\n", 901 | "3 497447 6551 27-Aug-06 0 0 0 0 0\n", 902 | "4 496446 6533 19-Sep-06 0 0 0 0 0" 903 | ] 904 | }, 905 | "execution_count": 9, 906 | "metadata": {}, 907 | "output_type": "execute_result" 908 | } 909 | ], 910 | "source": [ 911 | "# Let us take a look at the top few rows\n", 912 | "test.head()" 913 | ] 914 | }, 915 | { 916 | "cell_type": "code", 917 | "execution_count": 10, 918 | "metadata": { 919 | "collapsed": false 920 | }, 921 | "outputs": [ 922 | { 923 | "name": "stdout", 924 | "output_type": "stream", 925 | "text": [ 926 | "Number of unique patients in the test : 15324\n" 927 | ] 928 | } 929 | ], 930 | "source": [ 931 | "# Number of unique persons present in the test\n", 932 | "print \"Number of unique patients in the test : \", len(test.Patient_ID.unique())" 933 | ] 934 | }, 935 | { 936 | "cell_type": "markdown", 937 | "metadata": {}, 938 | "source": [ 939 | "So 15,324 patients are present in the 35,249 rows present in test set. Now let us see the distribution just like train set" 940 | ] 941 | }, 942 | { 943 | "cell_type": "code", 944 | "execution_count": 11, 945 | "metadata": { 946 | "collapsed": false 947 | }, 948 | "outputs": [ 949 | { 950 | "data": { 951 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAhUAAAFoCAYAAADgsAn8AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAAPYQAAD2EBqD+naQAAIABJREFUeJzt3X20XXV97/t3snMSEnAfUk4laUNB6uhXjFwLCAQEPKDn\nUGwLHNAqch0XoSmKXIrIMYNKoUOkQEABtaAIihy1F6jgE9fiw+UAsUlNDKJE+1VBiCSYSkZsgDw1\ne+f+MeeC5SLZ3XPtuR728v0aI4M1528+/L6slbU/+c3fnHvKjh07kCRJmqipve6AJEkaDIYKSZJU\nC0OFJEmqhaFCkiTVwlAhSZJqYaiQJEm1MFRIkqRaGCokSVItDBWSJKkW06ruEBG/DdwIvB74JXB5\nZn6mbNsP+CRwBPA48J7M/EbTvm8ArgX2B5YCCzPzZ03t5wMXAi8B7gTOzcwt7RQmSZK6q52Rii8C\nvwO8Djgf+HBEnFy2fQlYCxwCfBa4OyLmAUTEPsDdwC3Aa4Cny2NRtp8KXAIsBI4DFgCL2+ifJEnq\ngSlVfvdHRBwCfAfYPzOfKNe9DzgZeD9FqHhpY3QhIr4BPJiZH4iIDwBHZeZxZdtM4BfAn2bmAxFx\nP/DNzLysbH8t8HVgL0crJEnqf1VHKvYHftkIFKXvU4w8HA2sbAkASyguhQAcDjzQaMjMzcBK4IiI\nmAocCjzYtO8yYDrw6op9lCRJPVA1VKwD9oyI3ZrW/R7F3Iy9KS59tG4/r3w9d4z2PYHdmtszcwRY\n37S/JEnqY1Unav4z8BTwsYg4j2JuxXuAHRShYGvL9luBGeXrWWO0z2pa3tX+kiSpj1UKFZm5NSLe\nBNwBbKQYaVhMcUfHKDCzZZcZwKby9RZeHBBmABvKNnbRvolx2rFjx44pU6aMd3NJkvSCCf8ArXxL\naWZ+F/j9iHgpxR0cx1PcWvoo8N9bNp9DMbIBsKZcbm1/iOIyx5Zy+ccAETEE7NW0/39oypQpbNy4\nmZGR0Sol9Z2hoakMD88ciFrAevrZINUC1tPPBqkWGNx6JqpSqIiI2cCXgRMz81/LdX8C/G+KSyMX\nRcSMzGxcxjiKFyZfLiuXG8eaBRwEXJKZOyJiednemMx5JLANeLhKH0dGRtm+ffK/wTBYtYD19LNB\nqgWsp58NUi0wePVMVNXLHxsiYndgcUT8LcUDsM6guPPje8Bq4NaIuAw4keKOjjPK3T8FXFjegvpV\n4FLgscxshIgbgI9HxCqKCZs3ADd5O6kkSZNDOw+/egvwcopbSc8D3pSZKzNzFDiJ4hLGCuBtwMmZ\n+SRAeRvqKcCZFM+62JPi+RaU7bcDVwCfAO6leOLmovbKkiRJ3Vbp4VeTwI4NG56b9ENR06ZNZfbs\n3RmEWsB6+tkg1QLW088GqRYY2HomPFHTXygmSZJqYaiQJEm1MFRIkqRaGCokSVItDBWSJKkWhgpJ\nklQLQ4UkSaqFoUKSJNXCUCFJkmphqJAkSbUwVEiSpFoYKiRJUi0MFZIkqRaGCkmSVAtDhSRJqoWh\nQpIk1cJQIUmSamGokCRJtTBUSJKkWhgqJElSLQwVkiSpFoYKSZJUi2lVd4iIecCNwDHAeuD6zLy+\nbDuobDsQeAR4V2aubNr3NOAyYA7wdWBhZq5var8SOJMi7NySmYvarEuSJHVZOyMVdwLPAAcD5wOX\nR8RJETELuAe4v2xbCtwTETMBIuIw4GbgUmABMBu4tXHQiHgvcBpwEnAqcHpEXNBeWZIkqdsqjVRE\nxJ7A4cBZmfko8GhE/CPweuC3gE1NowvnR8QbgTcDtwHvBm7PzM+Vx3o78ERE7JuZTwDnARdn5tKy\nfRHFqMaHJ1qkJEnqvKojFZuB54B3RMS0iAjgSOAhitGHJS3bfxs4ony9AHig0ZCZTwKrgQURMRfY\nB3iwad8lwL4RsXfFPkqSpB6oFCoycytwLvBOioDxI+BrmflpYC6wtmWXdcC88vVY7XOBHS3t64Ap\nTftLkqQ+VnmiJnAA8GXgGooJmR+NiG8Bs4CtLdtuBWaUr8dqnwWQmdta2mjavy9s27aNVat+UOsx\n588/kOnTp9d6TEmSuq3qnIrXA2cB88pRi4fKu0EuBh7lxQFgBrCpfL1ljPYt5fGnNwWLxrabqGBo\nqLN3yX7/+6tYe/yxzK/peKuAoW/ez8EHH/L8ukYNna6lW6ynfw1SLWA9/WyQaoHBrWeiqo5UHAz8\npAwUDQ8B76eYLzGnZfs5wFPl6zVjtK+huNQxh2KeRaNtR9P+4zI8PLPK5pUND89kPnBorQedyezZ\nu+/0XIPEevrXINUC1tPPBqkWGLx6JqpqqFgLvDwipmXm9nLdAcBjwDLgopbtjwQ+WL5eBhxFcScI\nEbEPxXyJpZn5VESsLts/X25/NLA6M9dV6eDGjZsZGRmtVlXF4w934JgbNjz3/PLQ0FSGh2d2vJZu\nsZ7+NUi1gPX0s0GqBQa3nomqGiq+AiwGbo6Iy4FXUASJi4AvAFdFxLXATRSTOXeneK4FFA/Fui8i\nlgErgOuAr2Tm6qb2qyKiMWpxBXB11YJGRkbZvr1zb3AnPjy76nOna+k26+lfg1QLWE8/G6RaYPDq\nmaiqd39spHgmxVzgO8CHgA9k5s2Z+QzwxxRP2lwBHAackJmby32XAWdTPPxqCcXTOM9sOvzVwO3A\nXcAdwGcaT+qUJEn9r/LdH5n5L8Dxu2hbARyys7ay/TbKyx87aRsFLiz/SJKkSWYwpq1KkqSeM1RI\nkqRaGCokSVItDBWSJKkWhgpJklQLQ4UkSaqFoUKSJNXCUCFJkmphqJAkSbUwVEiSpFoYKiRJUi0M\nFZIkqRaGCkmSVAtDhSRJqoWhQpIk1cJQIUmSamGokCRJtTBUSJKkWhgqJElSLQwVkiSpFoYKSZJU\nC0OFJEmqhaFCkiTVYlqVjSPi/wI+DewApjT9dzQzp0XEQcCNwIHAI8C7MnNl0/6nAZcBc4CvAwsz\nc31T+5XAmRRh55bMXDSB2iRJUhdVHan4fygCwdzyv/sCPwWui4hZwD3A/cDBwFLgnoiYCRARhwE3\nA5cCC4DZwK2NA0fEe4HTgJOAU4HTI+KCdguTJEndVWmkIjO3Av/aWI6Ii8qXFwFvBzY1jS6cHxFv\nBN4M3Aa8G7g9Mz9X7vt24ImI2DcznwDOAy7OzKVl+yKKUY0Pt1ucJEnqnrbnVETEbOB9wKLM/Hfg\ncGBJy2bfBo4oXy8AHmg0ZOaTwGpgQUTMBfYBHmzadwmwb0Ts3W4fJUlS90xkouY5wJrMvLtcngus\nbdlmHTBvHO1zKeZnrG1pm9K0vyRJ6mOVLn+0OAu4sml5FrC1ZZutwIxxtM8CyMxtLW007T8uQ0Od\nvaGlE8cfGprKtGlTf225U+fqBevpX4NUC1hPPxukWmBw65motkJFRBwK/C5we9PqLbw4AMwANo2j\nfUt53OlNwaKx7SYqGB6eWWXzyjpx/OHhmcyevXtXztVL1tO/BqkWsJ5+Nki1wODVM1HtjlQcDzyQ\nmf/WtG4NxR0hzeYAT42jfQ3FpY45FPMsGm07mvYfl40bNzMyMlpll0o2btzMcAeOuWHDc88vDw1N\nZXh4Zsdr6Rbr6V+DVAtYTz8bpFpgcOuZqHZDxc4mZS4DWp8rcSTwwab2oyjuBCEi9qGYL7E0M5+K\niNVl++fL7Y8GVmfmuiodGxkZZfv2zr3Bnfjw7KrPna6l26ynfw1SLWA9/WyQaoHBq2ei2g0VrwL+\nV8u6fwCuiIhrgZuAdwK7A3eW7TcC90XEMmAFcB3wlcxc3dR+VUQ0Ri2uAK5us3+SJKnL2p2Z8VJg\nQ/OKzHwG+BPgGIrQcBhwQmZuLtuXAWdTPPxqCbCe4umZDVdTzNG4C7gD+ExmXt9m/yRJUpe1NVKR\nmS+eVVisXwEcMsZ+t1Fe/thJ2yhwYflHkiRNMoNxL4wkSeo5Q4UkSaqFoUKSJNXCUCFJkmphqJAk\nSbUwVEiSpFoYKiRJUi0MFZIkqRaGCkmSVAtDhSRJqoWhQpIk1cJQIUmSamGokCRJtTBUSJKkWhgq\nJElSLQwVkiSpFoYKSZJUC0OFJEmqhaFCkiTVwlAhSZJqYaiQJEm1MFRIkqRaTKu6Q0RMB64FTgO2\nAp/KzPeXbQcBNwIHAo8A78rMlU37ngZcBswBvg4szMz1Te1XAmdShJ1bMnNRm3VJkqQua2ek4iPA\n64H/BrwNWBgRCyNiFnAPcD9wMLAUuCciZgJExGHAzcClwAJgNnBr46AR8V6KoHIScCpwekRc0F5Z\nkiSp2yqNVETEbIqRhOMy87vlumuAw4HtwKam0YXzI+KNwJuB24B3A7dn5ufK/d4OPBER+2bmE8B5\nwMWZubRsX0QxqvHhCdYoSZK6oOpIxVHArzJzSWNFZi7OzD+nGH1Y0rL9t4EjytcLgAea9nsSWA0s\niIi5wD7Ag037LgH2jYi9K/ZRkiT1QNU5FfsDj5ejDH8FTAc+DVwOzKWYR9FsHTC/fD0XWLuT9nll\n246W9nXAlLJ9XcV+SpKkLqsaKvYA/gBYCJxBEQY+ATwHzKKYuNlsKzCjfD1W+yyAzNzW0kbT/pIk\nqY9VDRXbgZcAbysvXxAR+wLnAD/mxQFgBrCpfL1ljPYt5bGmNwWLxrabqGBoqLN3yXbi+ENDU5k2\nbeqvLXfqXL1gPf1rkGoB6+lng1QLDG49E1U1VDwFbGkEilJSzIe4j+JW0WZzyn0A1ozRvobiUscc\ninkWjbYdTfuPy/DwzCqbV9aJ4w8Pz2T27N27cq5esp7+NUi1gPX0s0GqBQavnomqGiqWArtFxMsz\n86flulcCPwOWARe1bH8k8MHy9TKKiZ63AUTEPhTzJZZm5lMRsbps/3y5/dHA6sysNJ9i48bNjIyM\nVquq4vGHO3DMDRuee355aGgqw8MzO15Lt1hP/xqkWsB6+tkg1QKDW89EVQoVmfmTiLgHuDUizqGY\nU7EI+ADwBeCqiLgWuAl4J7A7cGe5+43AfRGxDFgBXAd8JTNXN7VfFRGNUYsrgKurFjQyMsr27Z17\ngzvx4dlVnztdS7dZT/8apFrAevrZINUCg1fPRLVzEeV04KcUt3/eCnw0M/8uM58B/hg4hiI0HAac\nkJmbATJzGXA2xcOvlgDrKZ550XA1cDtwF3AH8JnMvL6N/kmSpB6o/JjuMjycUf5pbVsBHDLGvrdR\nXv7YSdsocGH5R5IkTTKDMW1VkiT1nKFCkiTVwlAhSZJqYaiQJEm1MFRIkqRaGCokSVItDBWSJKkW\nhgpJklQLQ4UkSaqFoUKSJNXCUCFJkmphqJAkSbUwVEiSpFoYKiRJUi0MFZIkqRaGCkmSVAtDhSRJ\nqoWhQpIk1cJQIUmSamGokCRJtTBUSJKkWhgqJElSLQwVkiSpFtOq7hARJwN3ATuAKeV/v5CZfxYR\nBwE3AgcCjwDvysyVTfueBlwGzAG+DizMzPVN7VcCZ1KEnVsyc1G7hUmSpO5qZ6TilcCXKYLBHGAu\n8OcRMQu4B7gfOBhYCtwTETMBIuIw4GbgUmABMBu4tXHQiHgvcBpwEnAqcHpEXNBWVZIkqesqj1QA\nBwCPZOYvm1dGxJnApqbRhfMj4o3Am4HbgHcDt2fm58rt3w48ERH7ZuYTwHnAxZm5tGxfRDGq8eE2\n+ihJkrqs3ZGKH+9k/eHAkpZ13waOKF8vAB5oNGTmk8BqYEFEzAX2AR5s2ncJsG9E7N1GHyVJUpe1\nM1IRwB9FxPuBIeAOiksacynmUTRbB8wvX88F1u6kfV7ZtqOlfR3FnI155WtJktTHKoWKiPg9YCaw\nmeKyxsuAjwCzyj9bW3bZCswoX4/VPgsgM7e1tNG0/7gMDXX2hpZOHH9oaCrTpk39teVOnasXrKd/\nDVItYD39bJBqgcGtZ6IqhYrMXB0Re2Xmr8pV34+IIeCzwH28OADMADaVr7eM0b4FICKmNwWLxrab\nqGB4eGaVzSvrxPGHh2cye/buXTlXL1lP/xqkWsB6+tkg1QKDV89EVb780RQoGn4E7Ab8guJukGZz\ngKfK12vGaF9DcaljDsU8i0bbjqb9x2Xjxs2MjIxW2aWSjRs3M9yBY27Y8Nzzy0NDUxkentnxWrrF\nevrXINUC1tPPBqkWGNx6Jqrq5Y//DnwemJeZW8rVBwFPU0yyvKhllyOBD5avlwFHUdwJQkTsQzFf\nYmlmPhURq8v2z5fbHw2szsxK8ylGRkbZvr1zb3AnPjy76nOna+k26+lfg1QLWE8/G6RaYPDqmaiq\nIxX/RHE54uaI+ADw+8Bi4CrgC8BVEXEtcBPwTmB34M5y3xuB+yJiGbACuA74Smaubmq/KiIaoxZX\nAFe3W5gkSequSjMzMvNZ4Hjgt4HlwCeBj2fmhzLzGeCPgWMoQsNhwAmZubncdxlwNsWdIkuA9RRP\nz2y4Grid4mmddwCfyczr2y9NkiR1UztzKn5EESx21rYCOGSMfW+jvPyxk7ZR4MLyjyRJmmQG414Y\nSZLUc4YKSZJUC0OFJEmqhaFCkiTVwlAhSZJqYaiQJEm1MFRIkqRaGCokSVItDBWSJKkWhgpJklQL\nQ4UkSaqFoUKSJNXCUCFJkmphqJAkSbUwVEiSpFoYKiRJUi0MFZIkqRaGCkmSVAtDhSRJqoWhQpIk\n1cJQIUmSamGokCRJtZjW7o4RcQ+wLjPPLJcPAm4EDgQeAd6VmSubtj8NuAyYA3wdWJiZ65varwTO\npAg6t2Tmonb7JkmSuq+tkYqIeCtwQtPyLOAe4H7gYGApcE9EzCzbDwNuBi4FFgCzgVub9n8vcBpw\nEnAqcHpEXNBO3yRJUm9UDhURMRtYDHynafVbgU2ZuSgL5wPPAG8u298N3J6Zn8vMR4C3A2+MiH3L\n9vOAizNzaWbeDywCzm2vJEmS1AvtjFRcA9wG/Khp3eHAkpbtvg0cUb5eADzQaMjMJ4HVwIKImAvs\nAzzYtO8SYN+I2LuN/kmSpB6oFCoi4jjgaIq5Ec3mAmtb1q0D5o2jfS6wo6V9HTClaX9JktTnxh0q\nImIGxUTMczJza0vzLKB13VZgxjjaZwFk5raWNpr2lyRJfa7K3R9/A6zIzG/upG0LLw4AM4BN42jf\nAhAR05uCRWPbTVQ0NNTZu2Q7cfyhoalMmzb115Y7da5esJ7+NUi1gPX0s0GqBQa3nomqEireAuwd\nEc+UyzMAIuJNwOcpbhVtNgd4qny9Zoz2NRSXOuZQzLNotO1o2n/chodnVt2l58cfHp7J7Nm7d+Vc\nvWQ9/WuQagHr6WeDVAsMXj0TVSVUvA74T03Liyl+8C8q21qfK3Ek8MHy9TLgKIoJnkTEPhTzJZZm\n5lMRsbps/3y5/dHA6sxcV6F/AGzcuJmRkdGqu1U6/nAHjrlhw3PPLw8NTWV4eGbHa+kW6+lfg1QL\nWE8/G6RaYHDrmahxh4rM/HnzcjlisSMzH4uIXwJXRMS1wE3AO4HdgTvLzW8E7ouIZcAK4DrgK5m5\nuqn9qohojFpcAVzdTkEjI6Ns3965N7gTH55d9bnTtXSb9fSvQaoFrKefDVItMHj1TFQtF1Ey8xng\nT4BjKELDYcAJmbm5bF8GnE3x8KslwHqKp2c2XA3cDtwF3AF8JjOvr6NvkiSpO9p+THdmvqNleQVw\nyBjb30Z5+WMnbaPAheUfSZI0CQ3GtFVJktRzhgpJklQLQ4UkSaqFoUKSJNXCUCFJkmphqJAkSbUw\nVEiSpFoYKiRJUi0MFZIkqRaGCkmSVAtDhSRJqoWhQpIk1cJQIUmSamGokCRJtTBUSJKkWhgqJElS\nLQwVkiSpFoYKSZJUC0OFJEmqhaFCkiTVwlAhSZJqYaiQJEm1MFRIkqRaTKu6Q0T8PvB3wGuB9cDH\nMvOasm0/4JPAEcDjwHsy8xtN+74BuBbYH1gKLMzMnzW1nw9cCLwEuBM4NzO3tFOYJEnqrkojFREx\nBbgHWAf8IfBO4OKIeGu5yZeAtcAhwGeBuyNiXrnvPsDdwC3Aa4CngS82HftU4BJgIXAcsABY3G5h\nkiSpu6pe/tgbeAg4JzMfzcx/BL4FHBURxwIvA87OwpUUoxFnlvsuBJZn5nWZ+SPgHcB+EXFM2X4e\ncG1mfi0zvwucDZwVEbtNqEJJktQVlUJFZv4iM0/LzOcAIuK1wNHA/6YYWVjZcrliCcWlEIDDgQea\njrUZWAkcERFTgUOBB5v2XQZMB15dpY+SJKk32p6oGRGPU4SEpcBdwFyKSx/N1gHzytdjte8J7Nbc\nnpkjFHM25iFJkvpe5YmaTU4B5gA3Uky+nAVsbdlmKzCjfD1W+6ym5V3tPy5DQ529oaUTxx8amsq0\naVN/bblT5+oF6+lfg1QLWE8/G6RaYHDrmai2Q0VmrgSIiAuAz1FMwJzdstkMYFP5egsvDggzgA1l\nG7to30QFw8Mzq2xeWSeOPzw8k9mzd+/KuXrJevrXINUC1tPPBqkWGLx6JqpSqIiIlwJHZOaXmlb/\nkGLuw1PAAS27zCnXA6wpl1vbH6K4zLGlXP5xea4hYK+m/cdl48bNjIyMVtmlko0bNzPcgWNu2PDc\n88tDQ1MZHp7Z8Vq6xXr61yDVAtbTzwapFhjceiaq6kjFy4C7IuJ3M/MX5brXAP9KMSnzf0bEjMxs\nXMY4ihcmXy4rlwGIiFnAQcAlmbkjIpaX7Y3JnEcC24CHq3RwZGSU7ds79wZ34sOzqz53upZus57+\nNUi1gPX0s0GqBQavnomqGiqWAyuAT5eXPV4GXAV8kCIM/By4NSIuA06kuKPjjHLfTwEXRsT7gK8C\nlwKPZWYjRNwAfDwiVlFM2LwBuOk38eFX27ZtY/nyH9aegOfPP5Dp06fXdjxJkppVChWZORoRJwEf\nA/4JeA64PjM/BhARJ1LMrVgB/BQ4OTOfLPd9IiJOAa6neMjVt4GTm459e0TsC3yC4nLKPwCLJlbe\n5PTIIz/g5294HfNrPOYqgHvv46CDDqnxqJIkvaDyRM3yssebdtH2GHDsGPveC7xijPbF+BRNAOZT\nDPPUaUPNx5Mkqdlg3AsjSZJ6zlAhSZJqYaiQJEm1MFRIkqRaGCokSVItDBWSJKkWhgpJklQLQ4Uk\nSaqFoUKSJNXCUCFJkmphqJAkSbUwVEiSpFoYKiRJUi0MFZIkqRaGCkmSVAtDhSRJqoWhQpIk1cJQ\nIUmSamGokCRJtTBUSJKkWhgqJElSLQwVkiSpFtOqbBwRvwN8BDgW2ATcAVyUmdsiYj/gk8ARwOPA\nezLzG037vgG4FtgfWAoszMyfNbWfD1wIvAS4Ezg3M7e0XZkkSeqqqiMVXwB2A14LvBX4U+Cysu1L\nwFrgEOCzwN0RMQ8gIvYB7gZuAV4DPA18sXHQiDgVuARYCBwHLAAWt1WRJEnqiXGHiogI4DDgjMz8\nl8z8NkUQeFtEHAu8DDg7C1dSjEacWe6+EFiemddl5o+AdwD7RcQxZft5wLWZ+bXM/C5wNnBWROxW\nR5GSJKnzqoxU/AI4ITOfbln/nylGFla2XK5YQnEpBOBw4IFGQ2ZuBlYCR0TEVOBQ4MGmfZcB04FX\nV+ifJEnqoXGHisz8t8z8emM5IqYA5wLfAuZSXPpotg6YV74eq31Piksqz7dn5giwvml/SZLU5ypN\n1GxxNXAQxSjDBcDWlvatwIzy9awx2mc1Le9q/3EbGursDS2dOP7Q0FSmTXvhuFOnTqn9HDs7T7c0\n/p91+r3plkGqZ5BqAevpZ4NUCwxuPRPVVqiIiKso5kH8WWb+MCK2AL/VstkMijtEALbw4oAwA9hQ\ntrGL9k1UNDw8s+ouPT/+8PBMZs/e/fnlPfbozFSS1vN0W6ffm24bpHoGqRawnn42SLXA4NUzUZVD\nRUR8lGIi5emZ2biDYw3wypZN5wBPNbXP2Un7QxSXObaUyz8uzzEE7NW0/7ht3LiZkZHRqrtVOv5w\nB465YcNzzy8/++wW9qj5HDs7T7cMDU1leHhmx9+bbhmkegapFrCefjZItcDg1jNRVZ9TcSnwF8Bb\nMvPupqZlwKKImJGZjcsYR/HC5Mtl5XLjOLMoLp1ckpk7ImJ52d6YzHkksA14uGI9jIyMsn17597g\nTnx4Wvs8Orqj9nPs7Dzd1uvz122Q6hmkWsB6+tkg1QKDV89EjTtURMQBwMXA3wL/FBF7NzXfD/wc\nuDUiLgNOpJhrcUbZ/ingwoh4H/BV4FLgscxshIgbgI9HxCqKCZs3ADf58CtJkiaPKjMzTiy3v5ji\nB/9aissTazNzFDiZ4hLGCuBtwMmZ+SRAZj4BnELx3IrvUNzxcXLjwJl5O3AF8AngXopnXCyaSGGS\nJKm7xj1SkZlXAVeN0f4oxeO7d9V+L/CKMdoX41M0JUmatAbjXhhJktRzhgpJklQLQ4UkSaqFoUKS\nJNXCUCFJkmphqJAkSbUwVEiSpFoYKiRJUi0m8qvPNclt27aNVat+UPtx588/kOnTp9d+XElSfzNU\n/AZbteoHrD3+WObXeUyAe+/joIMOqfGokqTJwFDxG24+xW9+q9OGmo8nSZocnFMhSZJqYaiQJEm1\nMFRIkqRaGCokSVItDBWSJKkWhgpJklQLQ4UkSaqFoUKSJNXCUCFJkmphqJAkSbUwVEiSpFoYKiRJ\nUi3a/oViETEDWAG8OzMfKNftB3wSOAJ4HHhPZn6jaZ83ANcC+wNLgYWZ+bOm9vOBC4GXAHcC52bm\nlnb7KEmSuqetkYoyUPw98MqWpi8Ca4FDgM8Cd0fEvHKffYC7gVuA1wBPl9s3jnkqcAmwEDgOWAAs\nbqd/kiSp+yqHiog4AFgGvKxl/XEUIxBnZ+FKitGIM8tNFgLLM/O6zPwR8A5gv4g4pmw/D7g2M7+W\nmd8FzgbOiojd2ilMkiR1VzsjFa8DvkVxiWNK0/rDgZUtlyuWlNs12h9oNGTmZmAlcERETAUOBR5s\n2ncZMB14dRt9lCRJXVZ5TkVmfrzxOiKam+ZSXPpotg6YN472PYHdmtszcyQi1pft/1y1n5Ikqbva\nnqi5E7OArS3rtgIzxtE+q2l5V/uPy9BQZ29o6cTxh4amMm3aC8edOnXKGFvXd55O/b9qPc/IyHaW\nL1/Os89O/X1QAAAMOklEQVRuYXR0R23nedWrDmT69Om1HW+8Gv/fOv1Z64ZBqgWsp58NUi0wuPVM\nVJ2hYgvwWy3rZgCbmtpbA8IMYEPZxi7aN1HB8PDMKptX1onjDw/PZPbs3Z9f3mOPzkwjaT1Pp/5f\ntZ5n+fLlrDrsMObXeI5VwPB3vsOhhx5a41Gr6fRnrZsGqRawnn42SLXA4NUzUXWGijW8+G6QOcBT\nTe1zdtL+ELCeIljMAX4MEBFDwF5N+4/Lxo2bGRkZrdTxqscf7sAxN2x47vnlZ5/dwh41n2Nn5+lE\nLTs7z7PPbmE+xaSZTp6nW4aGpjI8PLPjn7VuGKRawHr62SDVAoNbz0TVGSqWAYsiYkZmNi5jHMUL\nky+XlcsARMQs4CDgkszcERHLy/bGZM4jgW3Aw1U6MTIyyvbtnXuDO/Hhae1znZcIxjpPp/4i9Kqe\nbuv1+es0SLWA9fSzQaoFBq+eiaozVNwP/By4NSIuA06k+MfpGWX7p4ALI+J9wFeBS4HHGg/OAm4A\nPh4RqygmbN4A3OTDryRJmhwmOjPj+X+CZuYocBLFJYwVwNuAkzPzybL9CeAUiudWfIfijo+Tm/a/\nHbgC+ARwL8UzLhZNsH+SJKlLJjRSkZlDLcuPAceOsf29wCvGaF+MT9GUJGlSGox7YSRJUs8ZKiRJ\nUi0MFZIkqRaGCkmSVAtDhSRJqkWdz6mQemrbtm2sWvWD2o87f35vfseIJE02hgoNjFWrfsDa44+t\n/XeMcO99HHTQITUeVZIGk6FCA6UTv2NkQ83Hk6RB5ZwKSZJUC0OFJEmqhaFCkiTVwlAhSZJqYaiQ\nJEm1MFRIkqRaeEupVNG2bdtYvvyHbNy4mZGR0VqO6QO2JA0CQ4VU0SOP/ICfv+F1tT1kywdsSRoU\nhgqpDXU/ZMsHbEkaBM6pkCRJtXCkQupTnfgFac7dkNRJhgqpT9X9C9KcuyGp0wwVUh9z7oakycRQ\nIf0G68TtseBlFuk3VV+FioiYAdwAnAJsAj6UmR/uba+kwVX37bGw88ssnZgfAoYXqd/0VagArgEO\nBv4rsB9wW0Q8npl39bJT0iCr+xILvPgyS93zQ8A5IlI/6ptQERGzgLOA4zPzYeDhiFgMnAsYKqRJ\nrhvhxcs5Um/1TagAXk3Rn6VN65YAf9Wb7kiabLycI/VWP4WKucDTmbm9ad06YLeI2Csz1/eoX5Im\nkUG6nNOt3zPjM1FUl34KFbOArS3rGsszxnuQoaHOPiR0aGhq8Ze/JquAfYamMm3aC/2eOnVKrefY\n1XnqrmVX57Ge6ucAP2vtnKeb9XTCUMt5vve9R/jaccewX03HfxwY+ub9HHzwr89D+f73V/HN44/t\n6HmGhqayfPlynn12C6OjO2o6Ey+qBWDlyu/WdvxdnafxGej0zxzobj0TNWXHjvre3ImIiDcBH8nM\n32la9wqKv9N7ZeavetY5SZL0H+qn3/2xBvgvEdHcpznAZgOFJEn9r59CxfeAfwcWNK07Gljem+5I\nkqQq+ubyB0BE3Ai8FjgTmAfcCpyRmV/sZb8kSdJ/rJ8magJcQPFEzf8P+Dfgrw0UkiRNDn01UiFJ\nkiavfppTIUmSJjFDhSRJqoWhQpIk1cJQIUmSamGokCRJtei3W0ori4gZFLehngJsAj6UmR/uba/a\nFxG/A3wEOJainjuAizJzW087NkERcQ+wLjPP7HVf2hUR04FrgdMofi/NpzLz/b3tVfsiYh5wI3AM\nsB64PjOv722vqiu/A1YA787MB8p1+wGfBI6g+FUU78nMb/Sqj1Xsop4FwIeA/wN4ErgmM2/pXS/H\nZ2e1NLUNAz+i+H67rRf9q2oX780+wCeA11E8Gfr9mXln73o5fruo52iK77lXAD8G/mdmfmu8xxyE\nkYprgIOB/wqcA1waEaf0tEcT8wVgN4qHgL0V+FPgsp72aIIi4q3ACb3uRw0+Arwe+G/A24CFEbGw\nt12akDuBZyj+/pwPXB4RJ/W2S9WUX4p/D7yypemLwFrgEOCzwN1liOprO6snIvYG/l+K5/f8IfA3\nwEcjoq//To3x3jQspvhVDJPCLt6bIYr3ZgvFe3MN8NmI2FXNfWMX9fw28GXg88CrKL4jvlT+Y3dc\nJvVIRUTMAs4Cjs/Mh4GHI2IxcC5wV08714aICOAwYO/MfLpcdwlwNbCol31rV0TMpvjy+E6v+zIR\nZR1nAsdl5nfLddcAh1P8i3hSiYg9Kfp+VmY+CjwaEf9IEZq+1NPOjVNEHEDx5de6/jhgf2BBZm4B\nroyI11O8fx/obi/Hb1f1ACcDT2XmX5fLj0bEsRTB9mvd6l8VY9TSaD8KOA74Rdc6NQFj1PPHwO9S\nfNaeA34SEX8EHAn8sItdrGSMel4L/HvTaP8VEfFeil+fMa6fqZN9pOLVFMFoadO6JRRflpPRL4AT\nGoGiNAX4zz3qTx2uAW6jGOaczI4CfpWZSxorMnNxZv55D/s0EZuB54B3RMS0MtC+FljZ225V8jrg\nWxSXOKY0rT8cWFkGioYl5Xb9bFf1fA14x0627+fvhV3VQkT8J4rLBecAk+Wy7q7qeR3wrTJQAJCZ\np2TmzV3uX1W7qmc9sFdE/A+AiDgZ2AP4wXgPPKlHKoC5wNOZub1p3Tpgt4jYKzPX96hfbcnMfwO+\n3liOiCkUoy7f7FmnJqD8F+PRwIHAx3vcnYnaH3g8It4O/BUwHfg0cHlmTrrH0mbm1og4F/gYxaWP\nIeDTmXlrTztWQWY+/5kqMtHz5lJc+mi2juL3CfWtXdWTmauB1U1tL6W4NHpJN/tXxRjvDcDFFKHv\nmztp60tj1LM/8LOIuAJ4O/BL4G8ys69H+8b4rD0YETcA/xARoxQDD+/IzJ+M99iTfaRiFsWEuWaN\n5Rld7ksnXE1xnW7STQYsr9fdCJyTma3v0WS0B/AHwELgDOC9wHnAX/awTxN1AMX108MoanpTRJzW\n0x7VY1ffC5P+OyEidqOYd7UWuKnH3amsnGvwF8B7et2XmuxBMYq0J/AnwP+i+IF8cE971aaI2IMi\nKF0CHApcTjF/5w/Ge4zJPlKxhRd/UTSWN3W5L7WKiKsofmj9WWZOxksHfwOsyMxJOcqyE9uBlwBv\ny8wnASJiX+BdwHW97Fg7yjkGZwHzytD3UDmR8WKKyVuT2Rbgt1rWzWDyfyfsThECXw68tuXyzmRx\nE3BJyyXeyWw7xWj5u8rl75V3T/wF8M7edattiwAy8/Jy+XvlnUd/Cbx7PAeY7CMVa4D/EhHNdcwB\nNmfmr3rUpwmLiI9SJPnTJ/FvaX0LcHJEPBMRzwCnA/9nRGzscb/a9RSwpREoSgns06P+TNTBwE9a\nRpEeAvbtUX/qtIYX31Uwh+I9nJQi4iUUl0ZfCRybmY/1uEuVRcTvUUxg/FDT98LvAR8vbzmfjJ6i\nuO2y2WT/Xni4ZV2l74XJHiq+B/w7xczUhqOB5b3pzsRFxKUUKfctk+Ve5114HcVcileXf75McVfB\nq3vZqQlYSjFX5+VN615J8QyEyWgt8PKIaB6tPAD4WY/6U6dlwMHlJbiGo8r1k045t+puYD/gmMz8\nl972qG1PUoyy/CEvfC+sBf4amKwTnpcBryrfo4YDmNzfC623w76CCt8Lk/ryR2ZujojbKJLumRQT\nsd5LcX140ilv87kY+Fvgn8r70wHIzHU961gbMvPnzcvlv0p2ZOak/KGVmT8p/zV1a0ScQzEZcBF9\nfIvif+ArFLf63hwRl1N8cVxU/pns7gd+TvFeXQacSHF9+IxedmoC/pziOTx/Cmxs+l7Ylpkbetar\nijJzFPi1EZaI2A78MjMn6yjS31OEohvKW8yPB/6IYp7SZHQz8GBE/CXFPwRPoqjpD8d7gMk+UgFw\nAfBdigfDfBT460l8yeBEivfkYorEuJZieK11Jrt643Tgp8CDwK3ARzPz73raozZl5kaKZ1LMpXiG\nyIeAD0yCW+F25fk7cMofXidRXPJYQfE8h5NbLl31ux28UNMpFLf9fZUXvhfWUkzYnAzGujtq0t05\nxa9/1p6heBjeARS3Xf7fFPPgWi8h9LPmev6Z4vN2BsVlkNMpHnMw7tGxKTt2TMb3VJIk9ZtBGKmQ\nJEl9wFAhSZJqYaiQJEm1MFRIkqRaGCokSVItDBWSJKkWhgpJklQLQ4UkSaqFoUKSJNXCUCFJkmph\nqJAkSbX4/wGFg+QD2iAF7wAAAABJRU5ErkJggg==\n", 952 | "text/plain": [ 953 | "" 954 | ] 955 | }, 956 | "metadata": {}, 957 | "output_type": "display_data" 958 | }, 959 | { 960 | "data": { 961 | "text/html": [ 962 | "
\n", 963 | "\n", 964 | " \n", 965 | " \n", 966 | " \n", 967 | " \n", 968 | " \n", 969 | " \n", 970 | " \n", 971 | " \n", 972 | " \n", 973 | " \n", 974 | " \n", 975 | " \n", 976 | " \n", 977 | " \n", 978 | " \n", 979 | " \n", 980 | " \n", 981 | " \n", 982 | " \n", 983 | " \n", 984 | " \n", 985 | " \n", 986 | " \n", 987 | " \n", 988 | " \n", 989 | " \n", 990 | " \n", 991 | " \n", 992 | " \n", 993 | " \n", 994 | " \n", 995 | " \n", 996 | " \n", 997 | " \n", 998 | " \n", 999 | " \n", 1000 | " \n", 1001 | " \n", 1002 | " \n", 1003 | " \n", 1004 | " \n", 1005 | " \n", 1006 | " \n", 1007 | " \n", 1008 | " \n", 1009 | " \n", 1010 | " \n", 1011 | " \n", 1012 | " \n", 1013 | " \n", 1014 | " \n", 1015 | " \n", 1016 | " \n", 1017 | " \n", 1018 | " \n", 1019 | " \n", 1020 | " \n", 1021 | " \n", 1022 | " \n", 1023 | " \n", 1024 | " \n", 1025 | " \n", 1026 | " \n", 1027 | " \n", 1028 | " \n", 1029 | " \n", 1030 | " \n", 1031 | " \n", 1032 | " \n", 1033 | " \n", 1034 | " \n", 1035 | " \n", 1036 | " \n", 1037 | " \n", 1038 | " \n", 1039 | " \n", 1040 | " \n", 1041 | " \n", 1042 | " \n", 1043 | " \n", 1044 | " \n", 1045 | " \n", 1046 | " \n", 1047 | " \n", 1048 | " \n", 1049 | " \n", 1050 | " \n", 1051 | " \n", 1052 | " \n", 1053 | "
Number_of_entries_per_personCount
018023
122877
231570
34984
45640
56391
67272
78187
89163
91095
101159
111225
121326
13146
14155
15171
\n", 1054 | "
" 1055 | ], 1056 | "text/plain": [ 1057 | " Number_of_entries_per_person Count\n", 1058 | "0 1 8023\n", 1059 | "1 2 2877\n", 1060 | "2 3 1570\n", 1061 | "3 4 984\n", 1062 | "4 5 640\n", 1063 | "5 6 391\n", 1064 | "6 7 272\n", 1065 | "7 8 187\n", 1066 | "8 9 163\n", 1067 | "9 10 95\n", 1068 | "10 11 59\n", 1069 | "11 12 25\n", 1070 | "12 13 26\n", 1071 | "13 14 6\n", 1072 | "14 15 5\n", 1073 | "15 17 1" 1074 | ] 1075 | }, 1076 | "execution_count": 11, 1077 | "metadata": {}, 1078 | "output_type": "execute_result" 1079 | } 1080 | ], 1081 | "source": [ 1082 | "grouped_df = test.groupby('Patient_ID').agg('size').reset_index()\n", 1083 | "grouped_df.columns = [\"Patient_ID\", \"Number_of_entries_per_person\"]\n", 1084 | "grouped_df = grouped_df.groupby('Number_of_entries_per_person').agg('size').reset_index()\n", 1085 | "grouped_df.columns = [\"Number_of_entries_per_person\", \"Count\"]\n", 1086 | "plt.bar(grouped_df.Number_of_entries_per_person, grouped_df.Count, color='r')\n", 1087 | "plt.show()\n", 1088 | "grouped_df" 1089 | ] 1090 | }, 1091 | { 1092 | "cell_type": "markdown", 1093 | "metadata": {}, 1094 | "source": [ 1095 | "Now let us look at the number of health camps in test set and their distribution" 1096 | ] 1097 | }, 1098 | { 1099 | "cell_type": "code", 1100 | "execution_count": 54, 1101 | "metadata": { 1102 | "collapsed": false 1103 | }, 1104 | "outputs": [ 1105 | { 1106 | "name": "stdout", 1107 | "output_type": "stream", 1108 | "text": [ 1109 | "Number of Health Camp ID in the test dataset : 21\n" 1110 | ] 1111 | } 1112 | ], 1113 | "source": [ 1114 | "print \"Number of Health Camp ID in the test dataset : \", len(test.Health_Camp_ID.unique())" 1115 | ] 1116 | }, 1117 | { 1118 | "cell_type": "code", 1119 | "execution_count": 12, 1120 | "metadata": { 1121 | "collapsed": false 1122 | }, 1123 | "outputs": [ 1124 | { 1125 | "data": { 1126 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAg0AAAFoCAYAAADUycjgAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAAPYQAAD2EBqD+naQAAIABJREFUeJzt3XucVXW9//HXXJoBxKnRSkhIrXP6iHM8NpqK9zTL7FSa\nWolmxywyr5ladOHIKTwieNcS8x5dTmapZFZoxS8lIUHNRPTjKRVQiJLQievEzPz++H43LLZ7hrX2\nXjMs5f18PHiw9/ru9ZnP/u611/7s7/qutet6enoQERER2Zz6LZ2AiIiIvDqoaBAREZFUVDSIiIhI\nKioaREREJBUVDSIiIpKKigYRERFJRUWDiIiIpKKiQURERFJR0SAiIiKpNFa7opndAyxz91Pi/enA\nh4AeoC7+/yF3/3lsPwc4H9gWuB04093XxrZm4FrgGGA1cJm7X15tbiIiIpK/qkYazOx44MiyxaOA\nE4DhwLD4/33x8ccCFwBjgcOA0cCUxLqXAnsC7wZOByaY2THV5CYiIiL9oy7rb0+YWSvwGLAEWODu\np5hZE7AKGOXuf6qwzm+BX7n7xHj/AOBeYHtC4fIicIS7PxDbvwa8x90Pq/qZiYiISK6qGWm4FJgG\nPJlYtivQDTxb/mAzqwf2Bh5ILJ4DNAF7xH+NwOxE+yxg3ypyExERkX6SqWgws8OAg4CJZU27Ah3A\nd81siZn93szeH9veAAwijEwA4O5dwHJgBOEwxovuvj4RbxkwyMy2z5KfiIiI9J/URUOcrDgVON3d\n15U17woMBn4BHAH8HLjbzPYEhsTHlK+zDmiO7ZXaiO0iIiJSAFnOnvhvYJ67/6q8wd2/YWZXufvL\ncdHjZrYX8FlgfFxWXgA0E86UaOyljdieWk9PT09dXV2WVURERCTY7AdolqLh48AOZvaPeL8ZwMyO\nc/eWRMFQ8iSwG+EwxFrCGRVPx3UaCJMglxJGO95oZvXu3h3XHQascfeXMuRHXV0dHR1r6Orq3vyD\ne9HQUE9Ly+Ca4xQ1lnIa+FjKaeBjKaeBj1XEnCSb1tZtNvuYLEXDIcDrEvenEK7FMM7MbgG63P0z\nifZ3Ao+5e4+ZzQUOBO6PbfsDnYSzMOqAfxJOw3wwth8EzM2Q2wZdXd2sX1/7RpZXnKLGUk4DH0s5\nDXws5TTwsYqYk+QnddHg7ouT9+OIQ4+7PxMv7PTDeGrlg8CJwAFAqYi4FrjOzJ4gTIi8Frg+cXGn\nabH9FMLkyPOAk2t5YiIiIpKvXC4j7e53ES7KNB54nHBlyCNKhYa73wZMAr4NzCCcXjkuEeJc4GHg\nN8A1wH/FmCIiIlIQVV9G2t0/VXb/ZuDmPh4/hU2vAplsWwN8Kv4TERGRAtIPVomIiEgqKhpEREQk\nFRUNIiIikoqKBhEREUlFRYOIiIikoqJBREREUlHRICIiIqmoaBAREZFUVDSIiIhIKioaREREJBUV\nDSIiIpKKigYRERFJRUWDiIiIpKKiQURERFJR0SAiIiKpqGgQERGRVFQ0iIiISCoqGkRERCQVFQ0i\nIiKSiooGERERSUVFg4iIiKSiokFERERSaax2RTO7B1jm7qfE++3AVGB3YD5wmrs/knj8GGAiMAy4\nFxjr7ssT7RcDpxAKmZvcfVy1uYmIiEj+qhppMLPjgSMT94cA9wC/BfYEZgP3mNng2L4PcCMwARgN\ntAK3JtY/DxgDHAUcC5xoZudWk5uIiIj0j8wjDWbWCkwBHkosPh5YnRgdOMfMPgB8FJgGnAHc5u7f\njzFOAhaa2U7uvhA4Gxjv7rNj+zjCqMTlWXKbO3cuHR1r6Orqzvq0aGvbnaampszriYiIbC2qOTxx\nKaEQ2DGxbF9gVtnjfgfsFx87GphUanD3581sETDazDqBkcADiXVnATuZ2Q7uvixtYk/ssw9tWZ5J\naT2AGTNpb9+rirVFRES2DpmKBjM7DDiIMG/hukTTcMI8hqRlsOEzfDiwpEL7iNjWU9a+DKiL7amL\nhjZg77QPLrOiyvVERES2FqnnNJhZM2Gi4+nuvq6seQhQvmwd0JyifQiAu3eWtZFYX0RERLawLCMN\n/w3Mc/dfVWhbyys/4JuB1Sna1wKYWVOicCg9djUDpKGhnsbGehoa6jfczyNm0WIpp4GPpZwGPpZy\nGvhYRcxJ8pelaPg4sIOZ/SPebwYws+OAHxBOpUwaBiyNt1/oo/0FwqGIYcCiRFtPYv1+19IymNbW\nbTa5n2fsosVSTgMfSzkNfCzlNPCxipiT5CdL0XAI8LrE/SmED/Zxsa38ugr7AxfG23OAAwmTIjGz\nkYT5CrPdfWmcFHkgofiAMG9iUZZJkLXq6FjDihWraGiop6VlcNVnYSQVMZZyGvhYymngYymngY9V\nxJwkm+QX596kLhrcfXHyfhxx6HH3Z8zsb8AkM7sCuB74HLANcHt8+FRgppnNAeYBVwJ3u/uiRPtk\nMyuNOkwCLkmbWx66urpZv7671/t5xi5CLOU08LGU08DHUk4DH6uIOUl+cjlg5O7/AD4IHEwoCvYB\njnT3NbF9DnAq4eJOs4DlhKs/llwC3AbcAfwI+I67X5VHbiIiIpKPqi8j7e6fKrs/D+j1QgfuPo14\neKJCWzdwfvwnIiIiBaSpqSIiIpKKigYRERFJRUWDiIiIpKKiQURERFJR0SAiIiKpqGgQERGRVFQ0\niIiISCoqGkRERCQVFQ0iIiKSiooGERERSUVFg4iIiKSiokFERERSUdEgIiIiqahoEBERkVRUNIiI\niEgqKhpEREQkFRUNIiIikoqKBhEREUlFRYOIiIikoqJBREREUlHRICIiIqmoaBAREZFUVDSIiIhI\nKo1ZVzCztwPfAg4AlgPfdPdLY9tVwFlAD1AX/z/L3a+N7WOAicAw4F5grLsvT8S+GDiFUMzc5O7j\nqn9qIiIikqdMIw1mVgfcAywD3gl8DhhvZsfHh4wCxgHDCYXBcODmuO4+wI3ABGA00Arcmoh9HjAG\nOAo4FjjRzM6t8nmJiIhIzrKONOwAPAqc7u6rgD+b2a+BA4EfEoqGKe7+1wrrngHc5u7fBzCzk4CF\nZraTuy8EzgbGu/vs2D6OMCpxeRXPS0RERHKWaaTB3f/i7mNiwYCZHQAcDMw0s22BHYGne1l9NHB/\nItbzwCJgtJkNB0YCDyQePwvYycx2yJKjiIiI9I+qJ0Ka2XOEIuBB4A5gN8IchvFmttjM/mBmn0ys\nMhxYUhZmGTAitvWUtS8jzIsYUW2OIiIikp/MEyETjiHMW7gOuBJ4GOgGFgBXA+8Grjezl919OjAE\nWFcWYx3QHNtw986yNmJ7v2toqKexsZ6GhvoN9/OIWbRYymngYymngY+lnAY+VhFzkvxVXTS4+yMA\nZvYF4HtAC/BTd38pPmS+mb0DOA2YDqzllQVAM7A6tmFmTYnCofTY1dXmmEVLy2BaW7fZ5H6esYsW\nSzkNfCzlNPCxlNPAxypiTpKfTEWDmb0Z2C+OHJQsAJqAbd3972WrPAkcGm+/QBiZSBoGLI1tdfH+\nokRbT2zvdx0da1ixYhUNDfW0tAymo2MNXV3dNcUsYizlNPCxlNPAx1JOAx+riDlJNskvzr3JOtKw\nC3CHme3o7n+Jy94F/A34vJnt7+7vTTy+HXgq3p5DOMtiGoCZjSTMV5jt7kvNbFFs/0F8/EHAIndf\nljHHqnR1dbN+fXev9/OMXYRYymngYymngY+lnAY+VhFzkvxkLRrmAvOAW+I1FHYBJgMXEoqCL8fl\ndwFHAJ8gzG0AmEo4y2JOjHElcLe7L0q0Tzaz0qjDJOCSKp+XiIiI5CzrKZfdhIsvrSKcNXE9cJW7\nf9Pd5wHHAZ8EHgfOBMa4+0Nx3TnAqYSLO80iXE3ylET4S4DbCGdi/Aj4jrtfVf1TExERkTxlnggZ\nD0sc10vb3cDdfaw7jXh4okJbN3B+/CciIiIFo/NZREREJBUVDSIiIpKKigYRERFJRUWDiIiIpKKi\nQURERFJR0SAiIiKpqGgQERGRVFQ0iIiISCoqGkRERCQVFQ0iIiKSiooGERERSUVFg4iIiKSiokFE\nRERSUdEgIiIiqahoEBERkVRUNIiIiEgqKhpEREQkFRUNIiIikoqKBhEREUlFRYOIiIikoqJBRERE\nUlHRICIiIqk0Zl3BzN4OfAs4AFgOfNPdL41tOwM3APsBzwFfcPf7EuseDlwBvA2YDYx192cT7ecA\n5wPbArcDZ7r72mqemIiIiOQr00iDmdUB9wDLgHcCnwPGm9nx8SHTgSXAXsD3gDvNbERcdyRwJ3AT\n8C7gReCuROxjgQuAscBhwGhgSrVPTERERPKV9fDEDsCjwOnu/md3/yXwa+BAMzsU2AU41YOLCaMJ\np8R1xwJz3f1Kd38S+BSws5kdHNvPBq5w91+4+8PAqcCnzWxQTc9QREREcpHp8IS7/wUYU7pvZgcA\nBwGnE0YGHik7nDCLcKgCYF/g/kSsNWb2CLCfmc0C9gYmJNadAzQBewC/z5JnLTo7O5k7dwEdHWvo\n6urOvH5b2+40NTX1Q2YiIiJbVuY5DSVm9hwwEvgZcAdwJeHQRNIyYES8PbyP9jcAg5Lt7t5lZstj\n+4AVDfPnP87iww+hrYp1nwCYMZP29r2AfAuQWmKpkBERkTxUXTQAxwDDgKmEyY1DgHVlj1kHNMfb\nfbUPSdzvbf0B00YY9qjGisTtPAuQamOVxxEREalW1UWDuz8CYGbnAt8nTHBsLXtYM7A63l7LKwuA\nZsLn7NrE/d7W71cNDfU0NtZTX1+XSxyA+vq6mgqQjpxiJeM0NGz6fy3yilXEnPKMpZwGPpZyGvhY\nRcxJ8pepaDCzNwP7ufv0xOIFhLkHS4FRZasMi8sBXoj3y9sfJZy6uTbefzr+rQZg+8T6/aqlZTCt\nrdswdGht8y5LcYDCxErGSS7LS16xiphTnrGU08DHUk4DH6uIOUl+so407ALcYWY7xkmREE6f/Cth\n0uMXzazZ3UuHGQ4EHoi358T7AJjZEKAduMDde8xsbmwvTZbcH+gEHsuYY1U6OtawYsUqVq5cy9Ac\n4gCFiZWM09BQT0vL4KrnWSTlFauIOeUZSzkNfCzlNPCxipiTZFP+5bKSrEXDXGAecEs8LLELMBm4\nkPBhvxi41cwmAh8mjKafHNe9GTjfzL5EmDw5AXjG3UtFwrXAdWb2BGFC5LXA9QN1caeurm7Wr++m\nu7snlzhAYWIl4/S1LM/4WzJOUWMpp4GPpZwGPlYRc5L8ZDpg5O7dwFHAKuBB4HrgKnf/Zmz7MOEQ\nwzzgBOBod38+rruQMHnyFOAhwhkTRydi3wZMAr4NzCBc42FcLU9ORERE8pN5ImQ8LHFcL23PAIf2\nse4MYNc+2qegq0CKiIgUkqamioiISCoqGkRERCQVFQ0iIiKSiooGERERSUVFg4iIiKSiokFERERS\nUdEgIiIiqahoEBERkVRUNIiIiEgqKhpEREQklcyXkZZXt87OTubOXVD1r8e1te1OU1NTP2QmIiJF\np6JhKzN//uMsPvwQ2qpY9wmAGTNpb98r56xEROTVQEXDVqiN8Jvl1ViRuK1RCxGRrYuKBqmaRi1E\nRLYuKhqkJnmNWoiISPHp7AkRERFJRUWDiIiIpKKiQURERFJR0SAiIiKpqGgQERGRVFQ0iIiISCoq\nGkRERCQVFQ0iIiKSSuaLO5nZW4CrgUOB1cCPgK+4e6eZXQWcBfQAdfH/s9z92rjuGGAiMAy4Fxjr\n7ssTsS8GTiEUMze5+7ganpuIiIjkqJqRhp8Ag4ADgOOBDxEKAYBRwDhgOKEwGA7cDGBm+wA3AhOA\n0UArcGspqJmdB4wBjgKOBU40s3OryE9ERET6QaaRBjMzYB9gB3d/MS67ALiEUCyMAqa4+18rrH4G\ncJu7fz+udxKw0Mx2cveFwNnAeHefHdvHEYqRy6t6ZiIiIpKrrCMNfwGOLBUMUR3wejPbFtgReLqX\ndUcD95fuuPvzwCJgtJkNB0YCDyQePwvYycx2yJijiIiI9INMIw3u/jJhLgIAZlYHnAn8ijDK0AOM\nN7MjgeXA5e4+LT58OLCkLOQyYERs6ylrX0YoSEbE2yIiIrIF1forl5cA7yT80OG7gG5gAWGi5LuB\n683sZXefDgwB1pWtvw5ojm24e2dZG7G93zU01NPYWE99fV0ucYDCxCpiTslYDQ31G+7XqoixlNPA\nx1JOAx+riDlJ/qouGsxsMmEewsfcfQGwwMx+6u4vxYfMN7N3AKcB04G1vLIAaCacgbE2xmxKFA6l\nx66uNscsWloG09q6DUOHDsolDlCYWEXMqTxW6X5eihhLOQ18LOU08LGKmJPkp6qiwcyuAU4FTnT3\nu0rLEwVDyZOEUzMBXiCcUZE0DFga2+ri/UWJtp7Y3u86OtawYsUqVq5cy9Ac4gCFiVXEnJKxGhrq\naWkZTEfHGrq6umuISCFjKaeBj6WcBj5WEXOSbJJf4npTzXUaJgCfBT7u7ncmln8d2N/d35t4eDvw\nVLw9BzgQmBYfP5IwX2G2uy81s0Wx/Qfx8QcBi9x9QOYzdHV1s359N93dPbnEAQoTq4g5lceqdD/P\n2EWIpZwGPpZyGvhYRcxJ8pP1lMtRwHjgIuDBsjMb7ga+HK+tcBdwBPAJwtwGgKnATDObA8wDrgTu\ndvdFifbJZlYadZhEmDMhr3GdnZ3Mnbug6m8VbW2709TU1A+ZiYhIUtaRhg8TTtMcH/9BvPKjuzeY\n2XGEaytMBJ4Dxrj7QwDuPsfMTo1trcAMwohFySXAm4A7gC7gBne/qponJa8u8+c/zuLDD6GtinWf\nAJgxk/b2vXLOSkREymU95XIyMLmP9rsJIw69tU8jHp6o0NYNnB//yVamjXAKTjVWJG5r1EJEpP/U\nesqlSKFo1EJEpP+oaJDXnLxGLUREZFO6coaIiIikoqJBREREUlHRICIiIqmoaBAREZFUVDSIiIhI\nKioaREREJBUVDSIiIpKKigYRERFJRUWDiIiIpKKiQURERFJR0SAiIiKpqGgQERGRVFQ0iIiISCr6\nlUuRXnR2djJ37gI6OtbQ1dWdad22tt1pamrqp8xERLYMFQ0ivZg//3EWH34IbRnXewJgxkza2/fq\nh6xERLYcFQ0ifWgD9q5ivRV5JyIiUgCa0yAiIiKpqGgQERGRVFQ0iIiISCoqGkRERCSVTBMhzewt\nwNXAocBq4EfAV9y908x2Bm4A9gOeA77g7vcl1j0cuAJ4GzAbGOvuzybazwHOB7YFbgfOdPe1VT8z\nERERyVXWkYafAIOAA4DjgQ8BE2PbdGAJsBfwPeBOMxsBYGYjgTuBm4B3AS8Cd5WCmtmxwAXAWOAw\nYDQwpapnJCIiIv0iddFgZgbsA5zs7k+5++8IH/QnmNmhwC7AqR5cTBhNOCWuPhaY6+5XuvuTwKeA\nnc3s4Nh+NnCFu//C3R8GTgU+bWaD8niSIiIiUrssIw1/AY509xfLlr+eMDLwSNnhhFmEQxUA+wL3\nlxrcfQ3wCLCfmdUTToV/ILHuHKAJ2CNDfiIiItKPUhcN7v6yu99bum9mdcCZwK+B4YRDE0nLgBHx\ndl/tbyAc8tjQ7u5dwPLE+iIiIrKF1XL2xCVAO/A1YAiwrqx9HdAcb/fVPiRxv7f1RUREZAur6jLS\nZjaZMA/hY+6+wMzWAtuVPayZcIYFwFpeWQA0E662uzZxv7f1+11DQz2NjfXU19flEgcoTKwi5pSM\nVcScao2VjNPQsOn/teaXR6wi5pRnLOU08LGKmJPkL3PRYGbXECYqnujupTMgXgB2K3voMGBpon1Y\nhfZHCYch1sb7T8e/0QBsn1i/37W0DKa1dRuGDq1t7mUpDlCYWEXMKRmriDnVGisZJ/xa5tyqY+2x\nxx6v+MXMlpbBVcfrjzhFjaWcBj5WEXOS/GS9TsME4LPAx939zkTTHGCcmTW7e+kww4FsnNw4J94v\nxRlCOLRxgbv3mNnc2F6aLLk/0Ak8lvH5VK2jYw0rVqxi5cq1DM0hDlCYWEXMKRmriDnVGisZ5w9/\neISFhx2c+dcyIfxiZsevfsuee4ZfzGxoqKelZXBVP9edlFecosZSTgMfq4g5STalLzp9SV00mNko\nYDxwEfCgme2QaP4tsBi41cwmAh8mnBFxcmy/GTjfzL4E/AyYADzj7qUi4VrgOjN7gjAh8lrg+oG8\nuFNXVzfr13fT3d2TSxygMLGKmFMyVhFzqjVWeZxqfy0TYEUiVqX4tcgrTlFjKaeBj1XEnCQ/WQ4Y\nfTg+fjzhg30J4fDBEnfvBo4mHGKYB5wAHO3uzwO4+0LgGMJ1Gx4inDFxdCmwu98GTAK+DcwgXONh\nXC1PTERERPKVeqTB3ScDk/to/zPh8tK9tc8Adu2jfQq6CqSIiEhhaWqqiIiIpKKiQURERFJR0SAi\nIiKpqGgQERGRVFQ0iIiISCoqGkRERCQVFQ0iIiKSiooGERERSaWqX7kUkS0j/PjVgqqvyd/Wtvsr\nfvxKRCQtFQ0iryLz5z/O4sMPqfrHr5gxk/b2vXLOSkS2FioaRF5lavrxqzwTEZGtjuY0iIiISCoq\nGkRERCQVFQ0iIiKSiooGERERSUUTIUW2Qjp1U0SqoaJBZCukUzdFpBoqGkS2Ujp1U0Sy0pwGERER\nSUVFg4iIiKSiokFERERSUdEgIiIiqahoEBERkVSqPnvCzJqBecAZ7n5/XHYVcBbQA9TF/89y92tj\n+xhgIjAMuBcY6+7LEzEvBk4hFDM3ufu4avMTERGRfFU10hALhv8FditrGgWMA4YTCoPhwM1xnX2A\nG4EJwGigFbg1EfM8YAxwFHAscKKZnVtNfiIiIpK/zCMNZjYK+EEvzaOAKe7+1wptZwC3ufv3Y5yT\ngIVmtpO7LwTOBsa7++zYPo4wKnF51hxFREQkf9WMNBwC/BrYj3AIAgAz2xbYEXi6l/VGA/eX7rj7\n88AiYLSZDQdGAg8kHj8L2MnMdqgiRxEREclZ5pEGd7+udNvMkk2jCHMYxpvZkcBy4HJ3nxbbhwNL\nysItA0bEtp6y9mWEomREvC0iIiJbUJ6Xkd4V6AYWAFcD7wauN7OX3X06MARYV7bOOqA5tuHunWVt\nxPZ+19BQT2NjPfX1dZt/cIo4QGFiFTGnZKwi5lRrrCLmlIyVZ04NDZv+X2vcPGIpp4GPVcScJH+5\nFQ3uPs3MfuruL8VF883sHcBpwHRgLa8sAJqB1bENM2tKFA6lx67OK8e+tLQMprV1G4YOHZRLHKAw\nsYqYUzJWEXOqNVYRc0rGyjOn5LK85BVLOQ18rCLmJPnJ9QerEgVDyZPAofH2C4QzKpKGAUtjW128\nvyjR1hPb+11HxxpWrFjFypVrGZpDHKAwsYqYUzJWEXOqNVYRc0rGyjOnhoZ6WloGV/0z20l5xVJO\nAx+riDlJNuVfBCrJrWgws68D+7v7exOL24Gn4u05wIHAtPj4kYT5CrPdfamZLYrtpTMzDgIWufuA\nzGfo6upm/fpuurt7cokDFCZWEXNKxipiTrXGKmJOyVh55tTXsjzjb8k4ecYqYk55xipiTpKfPEca\n7ga+HK+tcBdwBPAJwtwGgKnATDObQ7go1JXA3e6+KNE+2cxKow6TgEtyzE9ERERqUGvRsOHrirvP\nM7PjCNdWmAg8B4xx94di+xwzOzW2tQIzgM8mYl0CvAm4A+gCbnD3q2rMT0T6WWdnJ3PnLqh6KLmt\nbXeampr6ITMRyVtNRYO7N5Tdv5sw4tDb46cRD09UaOsGzo//RORVYv78x1l8+CG0VbHuEwAzZtLe\nvlfOWYlIf8h1IqSIbJ3agL2rXHdFnomISL/SSbAiIiKSiooGERERSUVFg4iIiKSiokFERERSUdEg\nIiIiqahoEBERkVRUNIiIiEgqKhpEREQkFRUNIiIikoqKBhEREUlFRYOIiIikoqJBREREUlHRICIi\nIqmoaBAREZFUVDSIiIhIKioaREREJBUVDSIiIpKKigYRERFJRUWDiIiIpKKiQURERFJR0SAiIiKp\nNFa7opk1A/OAM9z9/rhsZ+AGYD/gOeAL7n5fYp3DgSuAtwGzgbHu/myi/RzgfGBb4HbgTHdfW22O\nIiIikp+qRhpiwfC/wG5lTXcBS4C9gO8Bd5rZiLjOSOBO4CbgXcCL8fGlmMcCFwBjgcOA0cCUavIT\nERGR/GUuGsxsFDAH2KVs+WGEEYRTPbiYMJpwSnzIWGCuu1/p7k8CnwJ2NrODY/vZwBXu/gt3fxg4\nFfi0mQ2q5omJiIhIvqoZaTgE+DXhEERdYvm+wCNlhxNmxceV2u8vNbj7GuARYD8zqwf2Bh5IrDsH\naAL2qCJHERERyVnmOQ3ufl3ptpklm4YTDk0kLQNGpGh/AzAo2e7uXWa2PLb/PmueIiIikq+qJ0JW\nMARYV7ZsHdCcon1I4n5v6/erhoZ6Ghvrqa+v2/yDU8QBChOriDklYxUxp1pjFTGnZKwi5lS6nfy/\nlph5xHmt55RnrCLmJPnLs2hYC2xXtqwZWJ1oLy8AmoEVsY1e2lczAFpaBtPaug1Dh9Y2haIUByhM\nrCLmlIxVxJxqjVXEnJKxiphT+bI85BUnz1hFzCnPWEXMSfKTZ9HwAq88m2IYsDTRPqxC+6PAckLh\nMAx4GsDMGoDtE+v3q46ONaxYsYqVK9cyNIc4QGFiFTGnZKwi5lRrrCLmlIxVxJwgfLNsaRlMR8ca\nurq6q46ZV5zXek55xipiTpJNefFeSZ5FwxxgnJk1u3vpMMOBbJzcOCfeB8DMhgDtwAXu3mNmc2N7\nabLk/kAn8FiOOfaqq6ub9eu76e7uySUOUJhYRcwpGauIOdUaq4g5JWMVMafNLcsr9paOVcSc8oxV\nxJwkP3kWDb8FFgO3mtlE4MOEMyJOju03A+eb2ZeAnwETgGdKF4YCrgWuM7MnCBMirwWu18WdRERE\niqHWWSYbvmK4ezdwFOEQwzzgBOBod38+ti8EjiFct+EhwhkTRyfWvw2YBHwbmEG4xsO4GvMTERGR\nnNQ00uDuDWX3nwEO7ePxM4Bd+2ifgq4CKSIiUkg6n0VERERSUdEgIiIiqahoEBERkVRUNIiIiEgq\nKhpERESveUOkAAAXe0lEQVQkFRUNIiIikoqKBhEREUlFRYOIiIikoqJBREREUlHRICIiIqmoaBAR\nEZFUVDSIiIhIKioaREREJBUVDSIiIpKKigYRERFJpXFLJyAiUtLZ2cncuQvo6FhDV1d3pnXb2nan\nqampnzITEVDRICIFMn/+4yw+/BDaMq73BMCMmbS379UPWYlIiYoGESmUNmDvKtZbkXciIvIKKhpE\n5DWnlsMcoEMdIr1R0SAirznVHuYAHeoQ6YuKBhF5Tar2MAfoUIdIb3TKpYiIiKSiokFERERSyfXw\nhJkdDdwB9AB18f+fuPvHzKwdmArsDswHTnP3RxLrjgEmAsOAe4Gx7r48z/xERESkenmPNOwG/JTw\nwT8MGA58xsyGAPcAvwX2BGYD95jZYAAz2we4EZgAjAZagVtzzk1ERERqkPdEyFHAfHf/W3KhmZ0C\nrHb3cXHROWb2AeCjwDTgDOA2d/9+fPxJwEIz28ndF+aco4iIiFShP0Yanq6wfF9gVtmy3wH7xduj\ngftLDe7+PLAoLhcREZECyHukwYD3m9nXgAbgR4RDDsMJ8xiSlsGG06iHA0sqtI/IOT8RERGpUm5F\ng5m9FRgMrCEcdtgFuBoYEv+tK1tlHdAcb2+uvd81NNTT2FhPfX1dLnGAwsQqYk7JWEXMqdZYRcwp\nGauIOdUaq79yamjY9P9aYuYRp6ixipiT5C+3osHdF5nZ9u7+Ulz0RzNrAL4HzOSVBUAzsDreXruZ\n9n7X0jKY1tZtGDp0UC5xgMLEKmJOyVhFzKnWWEXMKRmriDnVGqu/ckouy0NecYoaq4g5SX5yPTyR\nKBhKngQGAX8hnE2RNAxYGm+/sJn2ftfRsYYVK1axcuVahuYQByhMrCLmlIxVxJxqjVXEnJKxiphT\nrbH6K6eGhnpaWgZX/TsWJXnFKWqsIuYk2ZQXypXkeXjifcAPgBHuvjYubgdeBB4AvlK2yv7AhfH2\nHOBAwpkUmNlIwnyGOXnltzldXd2sX99Nd3dPLnGAwsQqYk7JWEXMqdZYRcwpGauIOdUaq79y6mtZ\nXrFfS7GKmJPkJ8+RhgcJhxNuNLNvAG8HpgCTgZ8Ak83sCuB64HPANsDtcd2pwEwzmwPMA64E7tbp\nliIiIsWR2ywTd18JHAG8CZgL3ABc5+6Xufs/gP8ADiYUBfsAR7r7mrjuHOBUwpkWs4DlwCl55SYi\nIiK1y3tOw5OEwqFS2zyg19+adfdpxMMTIiIiUjw6n0VERERSyfviTiIirymdnZ3Mnbug6pn8bW27\n09TUlFsckS1JRYOISB/mz3+cxYcfsuHytVk8ATBjJu3te+UWR2RLUtEgIrIZbcDeVa67oh/iiGwp\nmtMgIiIiqahoEBERkVRUNIiIiEgqKhpEREQkFRUNIiIikoqKBhEREUlFRYOIiIikoqJBREREUlHR\nICIiIqmoaBAREZFUVDSIiIhIKvrtCRGRVxn9YqZsKSoaREReZfL8xcxaChAVH1sfFQ0iIq9Cef1i\nZrUFiH6ue+ukokFEZCtXbQGin+ve+mgipIiIiKSikQYREalZnpMzNdGzuFQ0iIhIzfKcnJlnLMlX\noYoGM2sGrgWOAVYDl7n75Vs2KxERSSOvyZl5x5L8FG1Ow6XAnsC7gdOBCWZ2zBbNSERERIACFQ1m\nNgT4NHC2uz/m7tOBKcCZWzYzERERgQIVDcAehMMlsxPLZgH7bpl0REREJKlIRcNw4EV3X59YtgwY\nZGbbb6GcREREJCrSRMghwLqyZaX7zWkCPFHlH34CGNlQT2NjPfX1dbnEAQoRq4g5lccqYk61xCpi\nTuWxiphTLbGKmFN5rCLmVEusIuZUKZbkq66np2dL5wCAmR0HXO3ub0ks25WwDWzv7i9tseRERESk\nUIcnXgDeaGbJnIYBa1QwiIiIbHlFKhr+APwTGJ1YdhAwd8ukIyIiIkmFOTwBYGZTgQOAU4ARwK3A\nye5+15bMS0RERIo1ERLgXMIVIX8DvAz8lwoGERGRYijUSIOIiIgUV5HmNIiIiEiBqWgQERGRVFQ0\niIiISCoqGkRERCQVFQ0iIiKSStFOuQTAzJqAK4AxhN+fuNndvxbbpgMfAnqAuvj/h9z957H9DOBL\nwBuAGcAZwH/HWJ3Ai8BOwArgJeDfKsUys2bgUuBjcflPge54f0NOZtYA/B3YNmWcu2J+k2JO3cAa\nYDtgEbAaaO/t+SX66EvAacDPE/30U8CAveLDtqkyp/XAP4A3xv5aB+xaHotwie9ny5bXxb99F/Du\nLH1e4fW7L65zLPA6YGjsr7rE3yHGSD6/N8TXJJnPi8DtZN8OKsVaG/8lt4PtgCVAU5X9tB5YCWwP\nPEfYDvatMlaqbaqPWHWE138V6bfzNH3+EvAW4G/xdsXtPEWf3wH8Kxt/AbdiTrDZ/UFX7JvtgOfj\n892zin6qI/22OQS4CvhIfNyPgXHA/7Dpe+/NwHLCNrFbhpxg021qs+/jmFdTzOF4wn7jt4TT3j8c\n15tFeB/2sPHLZk/i/772UUbYn38KGJxYj0TOpb7rM5a775JYdg5wednzuczdv4T0m0IWDcDVhI3+\nvUALcJuZPefuNwCjgBMI13IoWQFgZh8HpgAnAk8DNwMPEnYO7yVcA6IdmAgsIOzQJhHexJvEIuxY\nDgLeT3iT3BeXb5ITYSffEmOdmSLONMIbsAk4GphO2GlNJOzgvwdMAL5dIRbxeb4NuIDwRntPzGk7\n4JfAr4B/J/zE+KD4dx/PkNMH43N5C3Ah8DDws9iXXy3LaT3hUt9JV8ScdiNjn1d4/WbG53AY8Cbg\nu3Hd7wG/J/T9RTG35PPbjbCTbGPjTmlKoq+ybAflsaYQLkB2PBu3g78SCrBmwjZwe8Z++gDhg3BH\nQp8/S3hNxgM3ZoyVZZuqFOt3hAurvYfwQZh2O99cn19DKGYvJFwy/sZenl+lWMk+3x74BfAY4eqx\nvyFstx+OyzbE2cz+4EjgTsJ29Q3CL+reRNjGS9tT2n7anvTb5lWEwuTweP+WGKsh9tOdhPfeROAR\n4J7Yd/+TIqfkdpDlfUzsg6MIhcuLhH3JDsCBhNf9bkL/vg+YA0wm7GtWlsUBNtlH/ZVN9+dvBq6P\nfXUP4f33HeDLFXKqFCtpN+BbMffSNrcK6VeFKxrMrJVwRcjD3P3huOxSYF8z+w6wCzDP3cs3IIjf\nlksXhDKzrxM2zIMJl6huJ3zD3omw0fcA2/YS60jgend/NObUAjxXltN7CTszgCWbixPXuzX+7YMJ\nb47phDfOOwhV8/eBIb3EKpkK/BHYD/iMuz9sZu8kvHGWEHbK2wNPAXu4+68z5LQt4dvA14F/AS6J\ncffoJacNy8xsf8K3kXrgOLL3+YbXL/b5dsBC4BF37zGzSwg7ipeAtxK2g4srxBkFPO3uf4t5tRIK\nzcOqyGlDrGScxHZwO2EH/0x8/B+r6KcRhF95nQjsHHP6DrCvu1+UMVbWbSoZ6wjCaz7G3efGZWm3\n8776fAXhA34SG/v8RqA9a5/HH7brJGz/fyZ8i76NcOXYE8vi9LU/WEf4ULwgPufLCUVDb9vB5vop\n7ba5DjjT3f8Q1/tf4OKY0wuEAmI+8DbCNloHbFfFNpX1ffyfwFnuPiv2+VsInw8vxX73GPvl+Px+\n4+7PVIhTMpVQ9LyVjfvz38Vc/zX21aiY00Up9nePEN4nSaOA75S2ORkYhSsaCJXtS+4+q7TA3acA\nmNnuhG/Xz5avZGbbEj4MPplY3Ai8GN8IZwGPuftXE7HWA1/oJY/lwHFm9gPgiPh3N8nJzH5FqJon\nUFYd9xIH4BPAusTzOznmU0cYOoUwvFiRmX2SsDP4PeFDpRTn7zHHRwnDgT2ED6BHsuRkZjsAR7v7\n/fHv7R5jpfl58knAvcDorH1e4fU7EFjh7v9SekxiO/ggYQd2RnmcaDfCN8uSDdtUFdtBMtYrtk1C\nX04i/HbKz4HFvcRJKu+nLwJPufuFiZy6gd2zxorLTo5xUm1TCZcSfiDuh6UFGbbzvvp8DLAocYhx\nd8K3/X9LEau8z3chvH4nJ/rpd8AXkwFS7A/eStiu/wGMJRQmdUB5gV1JxX6Kf7fPbdPdN4zSmNlO\nhA/UtYnnNybRflLM6acpcirfpt5Myvdx3E5OZOO+4kCgg1C0vz7x0CsI+5aK++BEvNI+6iZCgfiK\n/bmZvYNQHK3PEGtCWfMoNt3mZAAU7oqQZvZ5wgZ8DWEYrYkwhPc/wEcJw1H3EYa7FgMT3P2XZrYH\nYaM/nPCNbRfCccpG4Mr4rxl4gLDhNxHeEHeXx4p57EkYMh5JeOOuAT4PnBfXfYwwtHkNYUjyKcK3\nnr7iACwF/kIYpiw9v+8AXyMct19HGKKslNMb4999b1z3OMIOrxTnT3G9hvjvjzHHanIq9fkJhKHy\np2N/bRIr8bodQBgqvpBwzPWaLH1e4fX7N8KowNcIH+jJnO4jHF75SS/99PPYly2EIf9lhB355Vly\nqhDLCB92EwgfCsmcTiNsm9OBfTL20+T4On2djdv5LTH+3zLGyrRNlcX6LeFQ1uWJOGm38776/NLY\nxxcmnt/1hGPnr3h+m+nz7eLr18rG/cFfCR8gc0m/P/gqYaj89YQP0wbCe3x6lf2UattMxLmVUND8\nnXAI6YqyWJ8hfEv/Z4pYfW0Hmd7HMd7nCfMstmHjPJ0RiTx2IGwHreVxyvZRewOXEUbhynPaiXC4\naiQV9ue9xJrg7m+LbW8m7LO+E9ddA9zk7peVPx/JVxHPnhhKGFYdS/jGdB5wFnAOYRLPYMIxzSMI\n3+zujh+CQwkf7t8kVN3HEXZe/x5jPRDjH0r4wHworrOsQiwIE60WEjbIWwjH1ifGnErH/2bFnOqB\n/0sR532EN83uZc/vDMKb6sfx73T3EusK4BZ3XxAf97pEnC8ShjjnEHbInYQd7lerzKnU519O9Gul\nnErGxn7tZuPrl6XPy1+/HxJ2SheV5fQlwk65m8rbAfE12ZZQ5H2M8EFTen5Zt4NkrNsIO9xJFfrp\n7fHx/6+KfvoU4fX6MuH3Vw5j44SxrLFKeaXdppKxniB8uJbiZNnO++rzO+N6yfdxM+H1ztrnXyQU\nE78gjEhsQ3if/5Ns+4NPEz6Q1hGKiF8SDhE9U0U/Zdk2Sy4mHLLpSOSUjPVjwmGl1xEONVa7HWR9\nH0P4gB5O+FA+OfZfE+GQyZ2EbXUHwpyGvvZRELaZ8pzOBk4iFF697c8rxUoqTehcSpi/cREwPhY8\n0o+KWDSsJ+wwTnD338fjkRcBp7r7N4Ad3f277v64u3+dsMF9Nq4H4RjmPe4+m/DGayTsfJYSvrF8\nDfigux9DmBA3qjxWHNq8ETjP3R9g4xDYGwkz299PGMb8SMzpQcKQaZ9x4tyCHxPegGeXPb8PuvvH\nCJPD9q8Q632EOQwTYy7dhNfvBHf/PeED9iVgB3c/nfAtahrw0SpzmkSYMLkzcLi7f6tCnwNgYWb9\nUYTJYBtevyx9Xv76EYYs64jzGhL9dDZhiHxYL9sBhA+TQ9x9jrs/GPNqIBzXzZLTJrEIHyh1hB1d\nXXLbJEz26gbuyNpP7j6DcDy6m/Ct/LD4Or9URazU21SFWA8n45ByO0/R538iHAZIvo9PBFZW0efT\nCBM6D4+v4RLCZMeXM+4PRhHeI18lzNb/AOFXdduz9lPGbRMAd3/K3R8ijN40AFdVeO1OIsw1OBh4\nssrtIPX7OMY7Gvg44QP5PTHOjcBXgDe4+xmECZifJWyz36D3fVTpjIbynO4BGt39P+llf95LrGT/\n3Q+80d2/6u5PuPv34mNPQ/pVEYuGpYRjfM8nljlxKN3dXy57/JOEbxBLE48tKZ010Bjbnyd8SxiZ\naB+ZeHwp1q6Ebx1/TOZE6K+RhDfVQYCZ2T+A/YGvmdnjm4kDYYcD0GhmO8U3x4bnRxhSfWOFnI4n\nDBG+GP/m0bF9QRye3JMwZJjsp0cJw4CpcyI8qUbC0G8LcKS7J89UScYq2T+u+ys2ff2y9Hn561fq\ncxKPd8KO/u4+tgPcfa27r0u0/Sn+35Qxp/JYpZyWJ56/l61bMaeot34iDsl+DFjt7u8gfMt7Lmus\njNtUeawH2PS9l3Y731yfv0D4oEnm1MLG17yvWJX6fDphhGCEhzkvKxJ9lXZ/sCfhPZDM6SE2vl82\nySnqrZ9Kf6fPbdPMGs3sI2Y2NNFW6qceM3uzmR1VltM8Qh+2bCanV2xTWd/HZnY88CPCoY41Zc/v\nD2y6b3mSMIK1Hb3vo6YSvszUs2m/DieedtnH+7hSrJ3MrCPu73D38vk15X0j/aCIRcNsYJCZ/Uti\n2W7Ac2Z2i5ndWPb4dxKq8EWEbx17JNpejP/XEYbtdyZMjnrOzG4hDN09Vx4rxqmLf3dDTvH2s4TZ\nyJcRhlx/STguOZVw6lxfcZI5QRievI0wjFvKaWJcN5nTU4Shz93i89uDMHQH4VSzh+PfekdZP41i\n4ySjtDkB3EDYqT4LfLqXPn8qcX8fwmzxTjZ9/VL3eYXXr9Tn3YRDKcScuwgfYhW3AzPb1syWm9nB\nibbSLO9VWXKqEKuU0xsTz3+3uG5pdnrmfjKzXeNkww3bOeGb3cyssci2TW0Si3AYIvneS7Wdp+zz\nnWJupZy+HJezmVib9LmZ7UoY1XkOuDhuB/+R6Ku0+4MlhP5O9vmZbDopL20/QYptk/BBOS3mW/Jc\nop92IRxm2DeR01Tgb+7+975yqrAdQIb3sZm9J+Z2deyHDXFiEfpjYGFi39IOLHf35fS+j7qAUPx2\ns+kE2j2AZX3sz3uL9UK8Pc/MPm1mT5at217WN9IPCjcREsDMfkqoYE8nVKXTCMNgLxCOdY8lDJWe\nSNi4Rrn7YjM7j3DM7JOECVbfJkwmeoZwjPfHhGp5MuHb9vmEneLUCrHuIUwA+xwbr9PwT8KwaHlO\nP2HjBZI2F2cq4VvXwpjrdMIx28mEYbxzCROsrk3E2i3uBJN99J/xMY/GftqDMCnoPsKO5xrCm3Us\nYWeUNqdpwHWE4eRJhG8v3yZMRry3Uk5x57YmHhZJvn5Z+7z89ZsZn8NhiT7fnvCBOrGP7eCu+Lp/\nNq5/JaGg+lMVOZXHKl2v49Cy7aCTMJfkC4RJlVn66RzCh/KgmNOuhMM7nycM5WaJlXmbSsbazHuv\nr+28rz4/nXBI7J2E4ur18XW4APhBxj5/a3x+M2Iu34p9/z7CNQrS7g8mEo7P98R+ao2vw2TCh201\n/ZRm27yUcGGnTxJGUK4hFLGlfdRthPfr5Pic/yvxGmTZDtK+j0cRCqhnCCMcJ8W/O41wQazTCO+V\nOwhfTm6Nr+3KGH9teU4lcR81gXCWRKmvdiTMXbgk9k+l/XmvsXzjRMi3EkaKboh57E14/37G3X+C\n9JsinnIJG8+eeIBwxbZr3P1bAGZ2OuEY7UjCEPMR7r4YwN0vs3C1w+8SJkhNJ7xBJwH3EzbwZwg7\nkhcJH7IfImzMm8Qi7LQvI+y0ewgXShncS05PA4cQhj83F+dOwjHGSwhv3nWE4fJzCTu2mwkTe05L\nxNrkDZTwV8JOuZTT9YRvmFNiWz1hR5Elp2sI38S2ZeMFZeoIZ1as7yWnNxOGL0tKr1+mPq/w+t1D\n+PZWen43E3YqdxGGqytuB4TjwKXn1xwfPyY+n6zbQXmsn8X+2GQ7sHD6XA9h1OKijP30a0JB+nJ8\nfvMJM+HPIEyYyxKrmm0qGauv915f23lfff4AYXb7nwgfzEsJO/pPED5ks/b59Wycmb+QMFnwvvI4\nm9kf/CjGWkXYDhbFnD5CmJuQtZ/SbptfJRRCtxMK1p8AxxBe59K2WZo0uopQ4I4u6/M020Ha9/Fi\nM9uXcChgBKGAKD2WmNM/CBMod419t5Lwvjynl5zKJftqLeF9coO7/18v+/O+YgHg7ovM7AOEfdZp\nhInMX1LB0P8KOdIgIiIixVPEOQ0iIiJSQCoaREREJBUVDSIiIpKKigYRERFJRUWDiIiIpKKiQURE\nRFJR0SAiIiKpqGgQERGRVFQ0iIiISCoqGkRERCQVFQ0iIiKSyv8HWaNpi/E0QfYAAAAASUVORK5C\nYII=\n", 1127 | "text/plain": [ 1128 | "" 1129 | ] 1130 | }, 1131 | "metadata": {}, 1132 | "output_type": "display_data" 1133 | }, 1134 | { 1135 | "data": { 1136 | "text/html": [ 1137 | "
\n", 1138 | "\n", 1139 | " \n", 1140 | " \n", 1141 | " \n", 1142 | " \n", 1143 | " \n", 1144 | " \n", 1145 | " \n", 1146 | " \n", 1147 | " \n", 1148 | " \n", 1149 | " \n", 1150 | " \n", 1151 | " \n", 1152 | " \n", 1153 | " \n", 1154 | " \n", 1155 | " \n", 1156 | " \n", 1157 | " \n", 1158 | " \n", 1159 | " \n", 1160 | " \n", 1161 | " \n", 1162 | " \n", 1163 | " \n", 1164 | " \n", 1165 | " \n", 1166 | " \n", 1167 | " \n", 1168 | " \n", 1169 | " \n", 1170 | " \n", 1171 | " \n", 1172 | " \n", 1173 | " \n", 1174 | " \n", 1175 | " \n", 1176 | " \n", 1177 | " \n", 1178 | " \n", 1179 | " \n", 1180 | " \n", 1181 | " \n", 1182 | " \n", 1183 | " \n", 1184 | " \n", 1185 | " \n", 1186 | " \n", 1187 | " \n", 1188 | " \n", 1189 | " \n", 1190 | " \n", 1191 | " \n", 1192 | " \n", 1193 | " \n", 1194 | " \n", 1195 | " \n", 1196 | " \n", 1197 | " \n", 1198 | " \n", 1199 | " \n", 1200 | " \n", 1201 | " \n", 1202 | " \n", 1203 | " \n", 1204 | " \n", 1205 | " \n", 1206 | " \n", 1207 | " \n", 1208 | " \n", 1209 | " \n", 1210 | " \n", 1211 | " \n", 1212 | " \n", 1213 | " \n", 1214 | " \n", 1215 | " \n", 1216 | " \n", 1217 | " \n", 1218 | " \n", 1219 | " \n", 1220 | " \n", 1221 | " \n", 1222 | " \n", 1223 | " \n", 1224 | " \n", 1225 | " \n", 1226 | " \n", 1227 | " \n", 1228 | " \n", 1229 | " \n", 1230 | " \n", 1231 | " \n", 1232 | " \n", 1233 | " \n", 1234 | " \n", 1235 | " \n", 1236 | " \n", 1237 | " \n", 1238 | " \n", 1239 | " \n", 1240 | " \n", 1241 | " \n", 1242 | " \n", 1243 | " \n", 1244 | " \n", 1245 | " \n", 1246 | " \n", 1247 | " \n", 1248 | " \n", 1249 | " \n", 1250 | " \n", 1251 | " \n", 1252 | " \n", 1253 | "
Health_Camp_IDCount
065664061
165843041
265833026
365483020
465822763
565762739
665672441
765562419
865512180
965791981
1065731795
1165741691
1265501425
136559692
146568643
156577385
166533377
176572184
186525166
196547111
206545109
\n", 1254 | "
" 1255 | ], 1256 | "text/plain": [ 1257 | " Health_Camp_ID Count\n", 1258 | "0 6566 4061\n", 1259 | "1 6584 3041\n", 1260 | "2 6583 3026\n", 1261 | "3 6548 3020\n", 1262 | "4 6582 2763\n", 1263 | "5 6576 2739\n", 1264 | "6 6567 2441\n", 1265 | "7 6556 2419\n", 1266 | "8 6551 2180\n", 1267 | "9 6579 1981\n", 1268 | "10 6573 1795\n", 1269 | "11 6574 1691\n", 1270 | "12 6550 1425\n", 1271 | "13 6559 692\n", 1272 | "14 6568 643\n", 1273 | "15 6577 385\n", 1274 | "16 6533 377\n", 1275 | "17 6572 184\n", 1276 | "18 6525 166\n", 1277 | "19 6547 111\n", 1278 | "20 6545 109" 1279 | ] 1280 | }, 1281 | "execution_count": 12, 1282 | "metadata": {}, 1283 | "output_type": "execute_result" 1284 | } 1285 | ], 1286 | "source": [ 1287 | "grouped_df = test.Health_Camp_ID.value_counts().reset_index()\n", 1288 | "grouped_df.columns = [\"Health_Camp_ID\", \"Count\"]\n", 1289 | "plt.bar(range(len(grouped_df.Health_Camp_ID)), grouped_df.Count, tick_label=grouped_df.Health_Camp_ID, color='r')\n", 1290 | "plt.show()\n", 1291 | "grouped_df" 1292 | ] 1293 | }, 1294 | { 1295 | "cell_type": "markdown", 1296 | "metadata": {}, 1297 | "source": [ 1298 | "Now let us see what is the number of common patients and Health camps between train and test set." 1299 | ] 1300 | }, 1301 | { 1302 | "cell_type": "code", 1303 | "execution_count": 13, 1304 | "metadata": { 1305 | "collapsed": false 1306 | }, 1307 | "outputs": [ 1308 | { 1309 | "name": "stdout", 1310 | "output_type": "stream", 1311 | "text": [ 1312 | "Number of common patients between train and test : 8460\n", 1313 | "Number of common health camps between train and test : 0\n" 1314 | ] 1315 | } 1316 | ], 1317 | "source": [ 1318 | "print \"Number of common patients between train and test : \",len(set(train.Patient_ID).intersection(test.Patient_ID))\n", 1319 | "print \"Number of common health camps between train and test :\", len(set(train.Health_Camp_ID).intersection(test.Health_Camp_ID))" 1320 | ] 1321 | }, 1322 | { 1323 | "cell_type": "markdown", 1324 | "metadata": {}, 1325 | "source": [ 1326 | "###### Points to ponder\n", 1327 | "* 8460 patients are common between train and test. So it might be helpful to use the behaviors of these patients as features.\n", 1328 | "\n", 1329 | "* There are no common health camps between train and test since the dates are disjoint. So it might be helpful to have validation sample that displays the same behavior." 1330 | ] 1331 | }, 1332 | { 1333 | "cell_type": "markdown", 1334 | "metadata": {}, 1335 | "source": [ 1336 | "##### Getting the outcome variable\n", 1337 | "\n", 1338 | "We do not have an 'outcome' variable as such in the training set and we are told that (from data page)\n", 1339 | "\n", 1340 | "\"MedCamp runs 3 formats of these camps. The first and second format provides people with an instantaneous health score. The third format provides information about several health issues through various awareness stalls.\n", 1341 | "\n", 1342 | "&\n", 1343 | "\n", 1344 | "For the first 2 formats, a favourable outcome is defined as getting a health_score, while in the third format it is defined as visiting at least a stall\"\n", 1345 | "\n", 1346 | "So let us use this information to create a response variable and add it to the train dataset" 1347 | ] 1348 | }, 1349 | { 1350 | "cell_type": "code", 1351 | "execution_count": 14, 1352 | "metadata": { 1353 | "collapsed": false 1354 | }, 1355 | "outputs": [ 1356 | { 1357 | "name": "stdout", 1358 | "output_type": "stream", 1359 | "text": [ 1360 | "Number of favourable outcomes from all camps : 20534\n", 1361 | "(75278, 8)\n" 1362 | ] 1363 | }, 1364 | { 1365 | "data": { 1366 | "text/html": [ 1367 | "
\n", 1368 | "\n", 1369 | " \n", 1370 | " \n", 1371 | " \n", 1372 | " \n", 1373 | " \n", 1374 | " \n", 1375 | " \n", 1376 | " \n", 1377 | " \n", 1378 | " \n", 1379 | " \n", 1380 | " \n", 1381 | " \n", 1382 | " \n", 1383 | " \n", 1384 | " \n", 1385 | " \n", 1386 | " \n", 1387 | " \n", 1388 | " \n", 1389 | " \n", 1390 | " \n", 1391 | " \n", 1392 | " \n", 1393 | " \n", 1394 | " \n", 1395 | " \n", 1396 | " \n", 1397 | " \n", 1398 | " \n", 1399 | " \n", 1400 | " \n", 1401 | " \n", 1402 | " \n", 1403 | " \n", 1404 | " \n", 1405 | " \n", 1406 | " \n", 1407 | " \n", 1408 | " \n", 1409 | " \n", 1410 | " \n", 1411 | " \n", 1412 | " \n", 1413 | " \n", 1414 | " \n", 1415 | " \n", 1416 | " \n", 1417 | " \n", 1418 | " \n", 1419 | " \n", 1420 | " \n", 1421 | " \n", 1422 | " \n", 1423 | " \n", 1424 | " \n", 1425 | " \n", 1426 | " \n", 1427 | " \n", 1428 | " \n", 1429 | " \n", 1430 | " \n", 1431 | " \n", 1432 | " \n", 1433 | " \n", 1434 | " \n", 1435 | " \n", 1436 | " \n", 1437 | " \n", 1438 | " \n", 1439 | " \n", 1440 | " \n", 1441 | " \n", 1442 | " \n", 1443 | " \n", 1444 | " \n", 1445 | "
Patient_IDHealth_Camp_IDRegistration_DateVar1Var2Var3Var4Var5Outcome
0489652657810-Sep-05400021
1507246657818-Aug-054550070
2523729653429-Apr-06000001
3524931653507-Feb-04000000
4521364652928-Feb-061510071
\n", 1446 | "
" 1447 | ], 1448 | "text/plain": [ 1449 | " Patient_ID Health_Camp_ID Registration_Date Var1 Var2 Var3 Var4 Var5 \\\n", 1450 | "0 489652 6578 10-Sep-05 4 0 0 0 2 \n", 1451 | "1 507246 6578 18-Aug-05 45 5 0 0 7 \n", 1452 | "2 523729 6534 29-Apr-06 0 0 0 0 0 \n", 1453 | "3 524931 6535 07-Feb-04 0 0 0 0 0 \n", 1454 | "4 521364 6529 28-Feb-06 15 1 0 0 7 \n", 1455 | "\n", 1456 | " Outcome \n", 1457 | "0 1 \n", 1458 | "1 0 \n", 1459 | "2 1 \n", 1460 | "3 0 \n", 1461 | "4 1 " 1462 | ] 1463 | }, 1464 | "execution_count": 14, 1465 | "metadata": {}, 1466 | "output_type": "execute_result" 1467 | } 1468 | ], 1469 | "source": [ 1470 | "## Get only the necessary columns and rename them for concatenating ##\n", 1471 | "col_names = [['Patient_ID','Health_Camp_ID','Outcome']]\n", 1472 | "first_camp = first_format_camp[['Patient_ID','Health_Camp_ID','Health_Score']]\n", 1473 | "first_camp.columns = col_names\n", 1474 | "second_camp = second_format_camp[['Patient_ID','Health_Camp_ID','Health Score']]\n", 1475 | "second_camp.columns = col_names\n", 1476 | "third_camp = third_format_camp[['Patient_ID','Health_Camp_ID','Number_of_stall_visited']]\n", 1477 | "third_camp = third_camp[third_camp['Number_of_stall_visited']>0]\n", 1478 | "third_camp.columns = col_names\n", 1479 | "\n", 1480 | "## concat all the three camps ##\n", 1481 | "all_camps = pd.concat([first_camp, second_camp, third_camp])\n", 1482 | "all_camps['Outcome'] = 1\n", 1483 | "print \"Number of favourable outcomes from all camps : \", all_camps.shape[0]\n", 1484 | "\n", 1485 | "train = pd.read_csv(\"Train.csv\")\n", 1486 | "print train.shape\n", 1487 | "\n", 1488 | "## merging with train and create a new variable 'outcome' which can be used as\n", 1489 | "train = train.merge(all_camps, on=['Patient_ID','Health_Camp_ID'], how='left')\n", 1490 | "train['Outcome'] = train['Outcome'].fillna(0).astype('int')\n", 1491 | "train.head()" 1492 | ] 1493 | } 1494 | ], 1495 | "metadata": { 1496 | "kernelspec": { 1497 | "display_name": "Python 2", 1498 | "language": "python", 1499 | "name": "python2" 1500 | }, 1501 | "language_info": { 1502 | "codemirror_mode": { 1503 | "name": "ipython", 1504 | "version": 2 1505 | }, 1506 | "file_extension": ".py", 1507 | "mimetype": "text/x-python", 1508 | "name": "python", 1509 | "nbconvert_exporter": "python", 1510 | "pygments_lexer": "ipython2", 1511 | "version": "2.7.10" 1512 | } 1513 | }, 1514 | "nbformat": 4, 1515 | "nbformat_minor": 1 1516 | } 1517 | --------------------------------------------------------------------------------