├── README.md
├── Rank 1: Rock n Rolla
├── README.md
├── ensemble.py
├── getOutcome.py
├── model_10.R
├── finalModel_v3.py
└── DataExploration.ipynb
├── Rank 3: sonny
└── Final_Model_Sonny_Knoctober_Submited_to_AV.R
└── Rank 2: Nut Crackers
└── final_model.R
/README.md:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/Rank 1: Rock n Rolla /README.md:
--------------------------------------------------------------------------------
1 | Codes and Files used for AV [Data hack - Knocktober](https://datahack.analyticsvidhya.com/contest/knocktober-2016/)
2 |
3 | The leaderboard can be accessed [here](https://datahack.analyticsvidhya.com/contest/knocktober-2016/lb)
4 |
5 | The code file - vopani_final.R is written by Rohan Rao.
6 |
7 | The code files getOutcome.py, finalModel_v3.py, ensemble.py are written by Sudalai Rajkumar.
8 |
9 |
10 |
--------------------------------------------------------------------------------
/Rank 1: Rock n Rolla /ensemble.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import numpy as np
3 |
4 | path = "../input/Train/"
5 | s1 = pd.read_csv(path+"model_10.csv")
6 | s2 = pd.read_csv(path+"sub35.csv")
7 |
8 | s1 = s1.merge(s2, on=['Patient_ID','Health_Camp_ID'], how='left')
9 | print s1.columns
10 | print np.corrcoef(s1.Outcome_x.values, s1.Outcome_y.values)
11 |
12 | s1["Outcome"] = (0.48*s1.Outcome_x.values + 0.52*s1.Outcome_y.values)
13 | s1.drop(["Outcome_x", "Outcome_y"], axis=1, inplace=True)
14 | s1.to_csv("final.csv", index=False)
15 |
16 |
--------------------------------------------------------------------------------
/Rank 1: Rock n Rolla /getOutcome.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pandas as pd
3 |
4 | data_path = "../input/Train/"
5 | first_camp = pd.read_csv( data_path + "First_Health_Camp_Attended.csv" )
6 | second_camp = pd.read_csv( data_path + "Second_Health_Camp_Attended.csv" )
7 | third_camp = pd.read_csv( data_path + "Third_Health_Camp_Attended.csv" )
8 | print first_camp.shape, second_camp.shape, third_camp.shape
9 |
10 | col_names = [['Patient_ID','Health_Camp_ID','Outcome']]
11 | first_camp = first_camp[['Patient_ID','Health_Camp_ID','Health_Score']]
12 | first_camp.columns = col_names
13 | second_camp = second_camp[['Patient_ID','Health_Camp_ID','Health Score']]
14 | second_camp.columns = col_names
15 | third_camp = third_camp[['Patient_ID','Health_Camp_ID','Number_of_stall_visited']]
16 | third_camp = third_camp[third_camp['Number_of_stall_visited']>0]
17 | third_camp.columns = col_names
18 | print third_camp.shape
19 |
20 | all_camps = pd.concat([first_camp, second_camp, third_camp])
21 | all_camps['Outcome'] = 1
22 | print all_camps.shape
23 |
24 | train = pd.read_csv(data_path + "Train.csv")
25 | print train.shape
26 |
27 | train = train.merge(all_camps, on=['Patient_ID','Health_Camp_ID'], how='left')
28 | train['Outcome'] = train['Outcome'].fillna(0).astype('int')
29 | train.to_csv(data_path+'train_with_outcome.csv', index=False)
30 | print train.Outcome.value_counts()
31 |
--------------------------------------------------------------------------------
/Rank 1: Rock n Rolla /model_10.R:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | seed <- 235
5 | set.seed(seed)
6 |
7 |
8 | ## loading libraries
9 | library(data.table)
10 | library(xgboost)
11 |
12 |
13 | ## loading data
14 | train <- fread("Train.csv")
15 | test <- fread("Test.csv")
16 |
17 | health_camp <- fread("Health_Camp_Detail.csv")
18 |
19 | health_1 <- fread("First_Health_Camp_Attended.csv")
20 | health_2 <- fread("Second_Health_Camp_Attended.csv")
21 | health_3 <- fread("Third_Health_Camp_Attended.csv")
22 |
23 | health_1[, V5 := NULL]
24 | setnames(health_1, "Health_Score", "Health_Score_1")
25 | setnames(health_2, "Health Score", "Health_Score_2")
26 |
27 | patient <- fread("Patient_Profile.csv")
28 |
29 | train[, train_flag := 1]
30 | test[, train_flag := 0]
31 |
32 |
33 | ## processing data
34 | X_panel <- rbind(train, test)
35 |
36 | X_panel <- merge(X_panel, health_1, all.x = TRUE, by = c("Patient_ID", "Health_Camp_ID"))
37 | X_panel <- merge(X_panel, health_2, all.x = TRUE, by = c("Patient_ID", "Health_Camp_ID"))
38 | X_panel <- merge(X_panel, health_3, all.x = TRUE, by = c("Patient_ID", "Health_Camp_ID"))
39 |
40 | X_panel <- merge(X_panel, health_camp, all.x = TRUE, by = "Health_Camp_ID")
41 | X_panel <- merge(X_panel, patient, all.x = TRUE, by = "Patient_ID")
42 |
43 | X_panel[, target := 0]
44 |
45 | X_panel$target[X_panel$Category1 != "Third" & (X_panel$Health_Score_1 > 0 | X_panel$Health_Score_2 > 0)] <- 1
46 | X_panel$target[X_panel$Category1 == "Third" & X_panel$Number_of_stall_visited > 0] <- 1
47 |
48 | X_panel[, ":="(Registration_Date = as.Date(Registration_Date, "%d-%b-%y"),
49 | Camp_Start_Date = as.Date(Camp_Start_Date, "%d-%b-%y"),
50 | Camp_End_Date = as.Date(Camp_End_Date, "%d-%b-%y"),
51 | First_Interaction = as.Date(First_Interaction, "%d-%b-%y"),
52 | Category1 = as.numeric(as.factor(Category1)),
53 | Category2 = as.numeric(as.factor(Category2)),
54 | City_Type = as.numeric(as.factor(City_Type)),
55 | Income = as.numeric(as.factor(Income)),
56 | Employer_Category = as.numeric(as.factor(Employer_Category)),
57 | Education_Score = as.numeric(Education_Score),
58 | Age = as.numeric(Age))]
59 |
60 | setorder(X_panel, Patient_ID, Registration_Date)
61 | X_panel$order <- seq(1, nrow(X_panel))
62 |
63 | X_date <- X_panel[, c("Patient_ID", "Registration_Date", "order"), with = FALSE]
64 | X_date$order <- X_date$order + 1
65 | names(X_date)[2] <- "Prev_Date"
66 |
67 | X_panel <- merge(X_panel, X_date, all.x = TRUE, by = c("Patient_ID", "order"))
68 |
69 | X_date$order <- X_date$order - 2
70 | names(X_date)[2] <- "Next_Date"
71 |
72 | X_panel <- merge(X_panel, X_date, all.x = TRUE, by = c("Patient_ID", "order"))
73 |
74 | X_panel[, ":="(Start_Date_Diff = as.numeric(Registration_Date - Camp_Start_Date),
75 | End_Date_Diff = as.numeric(Camp_End_Date - Registration_Date),
76 | Interaction_Date_Diff = as.numeric(Registration_Date - First_Interaction),
77 | Prev_Date_Diff = as.numeric(Registration_Date - Prev_Date),
78 | Next_Date_Diff = as.numeric(Registration_Date - Next_Date),
79 | Camp_Start_Year = year(Camp_Start_Date),
80 | Registration_Year = year(Registration_Date),
81 | Registration_Month = month(Registration_Date),
82 | Registration_Day = wday(Registration_Date))]
83 |
84 | X_panel <- X_panel[Camp_Start_Year >= 2005]
85 | X_panel <- X_panel[!is.na(Registration_Date)]
86 | X_panel <- X_panel[Category3 == 2]
87 |
88 | X_patient <- X_panel[, .(Count_Patient = .N), .(Patient_ID)]
89 | X_panel <- merge(X_panel, X_patient, by = "Patient_ID")
90 |
91 | X_patient_date <- X_panel[, .(Count_Patient_Date = .N), .(Patient_ID, Registration_Date)]
92 | X_panel <- merge(X_panel, X_patient_date, by = c("Patient_ID", "Registration_Date"))
93 |
94 | X_donation <- X_panel[Donation > 0, .(Min_Date_Donation = min(Registration_Date)), .(Patient_ID)]
95 | X_panel <- merge(X_panel, X_donation, all.x = T, by = "Patient_ID")
96 |
97 | X_panel[, Donation_Flag := ifelse(is.na(Min_Date_Donation), 0, ifelse(Registration_Date > Min_Date_Donation, 1, 0))]
98 |
99 | X_train <- X_panel[train_flag == 1]
100 | X_test <- X_panel[train_flag == 0]
101 |
102 | X_features <- c("Count_Patient", "Count_Patient_Date", "Donation_Flag",
103 | "City_Type", "Income", "Education_Score", "Age",
104 | "Category1", "Category2",
105 | "Start_Date_Diff", "End_Date_Diff", "Prev_Date_Diff", "Next_Date_Diff")
106 | X_target <- X_train$target
107 |
108 | xgtrain <- xgb.DMatrix(data = as.matrix(X_train[, X_features, with = FALSE]), label = X_target, missing = NA)
109 | xgtest <- xgb.DMatrix(data = as.matrix(X_test[, X_features, with = FALSE]), missing = NA)
110 |
111 |
112 | ## xgboost
113 | params <- list()
114 | params$objective <- "binary:logistic"
115 | params$eta <- 0.1
116 | params$max_depth <- 5
117 | params$subsample <- 0.9
118 | params$colsample_bytree <- 0.9
119 | params$min_child_weight <- 2
120 | params$eval_metric <- "auc"
121 |
122 | model_xgb_cv <- xgb.cv(params=params, xgtrain, nrounds = 100, nfold = 5, early.stop.round = 30, prediction = TRUE)
123 |
124 | model_xgb <- xgb.train(params = params, xgtrain, nrounds = 100)
125 |
126 | vimp <- xgb.importance(model = model_xgb, feature_names = X_features)
127 | View(vimp)
128 |
129 |
130 | ## submission
131 | pred <- predict(model_xgb, xgtest)
132 |
133 | submit <- data.table(Patient_ID = X_test$Patient_ID,
134 | Health_Camp_ID = X_test$Health_Camp_ID,
135 | Outcome = pred)
136 |
137 | write.csv(submit, "model_10.csv", row.names = FALSE)
138 |
--------------------------------------------------------------------------------
/Rank 3: sonny/Final_Model_Sonny_Knoctober_Submited_to_AV.R:
--------------------------------------------------------------------------------
1 | library(readr)
2 | library(dplyr)
3 | library(caret)
4 | library(xgboost)
5 | library(randomForest)
6 | library(lubridate)
7 | library(gbm)
8 |
9 | train <- read_csv("Train.csv")
10 | H1 <- read_csv("First_Health_Camp_Attended.csv")
11 | H1$"NA" <- NULL
12 |
13 | H2 <- read_csv("Second_Health_Camp_Attended.csv")
14 | H3 <- read_csv("Third_Health_Camp_Attended.csv")
15 |
16 | Camp_details <- read_csv("Health_Camp_Detail.csv")
17 | patient <- read_csv("Patient_Profile.csv")
18 | test <- read_csv("Test.csv")
19 |
20 | train$isTrain <- T
21 | test$isTrain <- F
22 |
23 | df_all <- bind_rows(train, test)
24 |
25 | df_all <- left_join(df_all, H1, by = c("Patient_ID", "Health_Camp_ID"))
26 | df_all <- left_join(df_all, H2, by = c("Patient_ID", "Health_Camp_ID"))
27 | df_all <- left_join(df_all, H3, by = c("Patient_ID", "Health_Camp_ID"))
28 | df_all <- left_join(df_all, Camp_details, by = c("Health_Camp_ID"))
29 | df_all <- left_join(df_all, patient, by = c("Patient_ID"))
30 |
31 | #Create the desired Target Column
32 | df_all$Outcome <- ifelse(!is.na(df_all$Health_Score) | !is.na(df_all$"Health Score") | (df_all$Number_of_stall_visited > 0), 1 ,0)
33 | df_all$Outcome <- ifelse(is.na(df_all$Outcome), 0, df_all$Outcome )
34 |
35 | gen_feature_oneHot <- function(column, data) {
36 |
37 | deltaData <- select(data, -get(column))
38 | data <- select(data, get(column))
39 | dummies <- dummyVars(~ . -1, data = data)
40 | df2 <- predict(dummies, newdata = data)
41 |
42 | df2 <- cbind(deltaData, df2)
43 | return(df2)
44 | }
45 |
46 | featureEngg <- function(dat) {
47 | #Drop these
48 | for (i in c("Donation",
49 | "Health_Score",
50 | "Health Score",
51 | "Number_of_stall_visited",
52 | "Last_Stall_Visited_Number")) {
53 | print(i)
54 | dat[[i]] <- NULL
55 | }
56 |
57 | #Convert the Date fields into date format
58 | for (i in c("Registration_Date",
59 | "Camp_Start_Date",
60 | "Camp_End_Date",
61 | "First_Interaction")) {
62 | print(i)
63 | dat[[i]] <- dmy(dat[[i]])
64 | }
65 |
66 | dat$feat_durationOfCamp <- as.numeric(difftime(dat$Camp_End_Date , dat$Camp_Start_Date, units = "days"))
67 | dat$feat_didUserRegisterBeforeEventStarts <- ifelse(dat$Registration_Date < dat$Camp_Start_Date, 1, 0)
68 | dat$feat_daysLeftForEventSinceRegistraion <- as.numeric(difftime(dat$Camp_End_Date, dat$Registration_Date, units = "days"))
69 | dat$feat_ratio_daysLeftForEventSinceRegistraion_by_durationOfCamp <- dat$feat_daysLeftForEventSinceRegistraion / dat$feat_durationOfCamp
70 |
71 | dat$feat_weekdayEvent <- wday(dat$Camp_Start_Date)
72 | dat$feat_weekOfEvent <- week(dat$Camp_Start_Date)
73 | dat$feat_monthOfEvent <- month(dat$Camp_Start_Date)
74 | dat$feat_quarterOfEvent <- quarter(dat$Camp_Start_Date)
75 |
76 | dat$feat_weekdayRegistration <- wday(dat$Registration_Date)
77 | #dat$feat_weekOfRegistration <- week(dat$Registration_Date)
78 | dat$feat_monthOfRegistration <- month(dat$Registration_Date)
79 | #dat$feat_quarterOfRegistration <- quarter(dat$Registration_Date)
80 |
81 | dat$feat_weekdayEndDate <- wday(dat$Camp_End_Date)
82 | dat$feat_weekOfEndDate <- week(dat$Camp_End_Date)
83 | dat$feat_monthOfEndDate <- month(dat$Camp_End_Date)
84 | #dat$feat_quarterOfEndDate <- quarter(dat$Camp_End_Date)
85 |
86 |
87 | dat$feat_sum_of_socialMediaShares <- rowSums(dat[, c("Online_Follower",
88 | "LinkedIn_Shared",
89 | "Twitter_Shared",
90 | "Facebook_Shared")], na.rm = T)
91 | dat$Income <- as.numeric(ifelse(dat$Income == "None", -9999, dat$Income))
92 |
93 | dat$Education_Score <- as.numeric(ifelse(dat$Education_Score == "None", -9999, dat$Education_Score))
94 |
95 | dat$Age <- as.numeric(ifelse(dat$Age == "None", -9999, dat$Age))
96 | #Recalculate Age
97 | dat$Age <- ifelse(dat$Age != -9999,
98 | dat$Age + as.numeric(difftime(dat$Registration_Date, dat$First_Interaction, units = "days"))/365,
99 | dat$Age)
100 |
101 | dat$feat_agebin <- NA
102 | dat$feat_agebin <- ifelse(dat$Age > 39, 1, dat$feat_agebin)
103 | dat$feat_agebin <- ifelse(dat$Age > 30 & dat$Age <= 39, 2, dat$feat_agebin)
104 | dat$feat_agebin <- ifelse(dat$Age > 20 & dat$Age <= 30, 3, dat$feat_agebin)
105 | dat$feat_agebin <- ifelse(dat$Age < 20, 4, dat$feat_agebin)
106 |
107 | dat$feat_daysBetweenFirstInteraction_and_registration <- as.numeric(difftime(dat$Registration_Date , dat$First_Interaction, units = "days"))
108 | dat$feat_daysBetweenFirstInteraction_and_EventStart <- as.numeric(difftime(dat$Camp_Start_Date , dat$First_Interaction, units = "days"))
109 |
110 | #Feature of Football in each event
111 | dat %>%
112 | group_by(Health_Camp_ID) %>%
113 | summarise(feat_CountOfEventsFootfall = n()) -> df_temp
114 | dat <- left_join(dat, df_temp, by = "Health_Camp_ID")
115 |
116 | #Feature of How many events has the patient registered
117 | dat %>%
118 | group_by(Patient_ID) %>%
119 | summarise(feat_CountOfPatientVisits = n()) -> df_temp
120 | dat <- left_join(dat, df_temp, by = "Patient_ID")
121 |
122 | #Feature of how many days have elapsed since the last registration made by any patient
123 | dat %>%
124 | group_by(Patient_ID) %>%
125 | arrange(Registration_Date) %>%
126 | mutate(feat_elapseDays = as.numeric(difftime(lead(Registration_Date), Registration_Date, units = "days"))) %>%
127 | ungroup() -> dat
128 | dat$feat_elapseDays <- ifelse(is.na(dat$feat_elapseDays) , -9999, dat$feat_elapseDays)
129 |
130 | #OneHot Encode all Categorical variables
131 | OneHotList <- c("Category1",
132 | "Category2",
133 | "Category3",
134 | "Income",
135 | "City_Type",
136 | "Var1",
137 | "Employer_Category")
138 | for (i in OneHotList) {
139 | cat("One Hot Features ", i, "\n")
140 | dat <- gen_feature_oneHot(i, dat)
141 | }
142 |
143 |
144 | #Drop any columns with no variation
145 | for (i in names(dat)) {
146 | if (length(unique(dat[[i]])) <= 1) {
147 | cat("Dropping no variation column - ", i, "\n")
148 | dat[[i]] <- NULL
149 | }
150 | }
151 |
152 | return(dat)
153 | }
154 |
155 | df_all <- featureEngg(df_all)
156 |
157 | #Split back to train and test
158 | train <- df_all[df_all$isTrain == T, ]
159 | test <- df_all[df_all$isTrain == F, ]
160 |
161 | TARGET = "Outcome"
162 | DropList = c("Patient_ID",
163 | "Health_Camp_ID",
164 | "isTrain",
165 | "Registration_Date",
166 | "Camp_Start_Date",
167 | "Camp_End_Date",
168 | "First_Interaction",
169 | TARGET)
170 | ETA <- 0.01
171 | MAX_DEPTH <- 2
172 | SUB_SAMPLE <- 0.8
173 | MIN_CHILD_WEIGHT <- 1
174 | COL_SAMPLE <- 0.7
175 | GAMMA <- 0
176 | seed <- c(1000, 5000) #Any 2 random seeds
177 | BOOSTER <- "gbtree" # "gblinear" "gbtree"
178 | nrounds <- 3800
179 |
180 | X_train <- train
181 | Y_train <- train[[TARGET]]
182 |
183 | p <- test$Patient_ID
184 | h <- test$Health_Camp_ID
185 |
186 | for (i in DropList) {
187 | cat("Dropping", i, "\n")
188 | X_train[[i]] <- NULL
189 | test[[i]] <- NULL
190 | }
191 |
192 | EVAL_METRIC <- "auc"
193 | OBJECTIVE <- "binary:logistic"
194 | BOOSTER <- BOOSTER
195 | nthread <- parallel::detectCores()
196 | isMaximize <- T
197 | EARLY_STOPPING <- 50
198 | print.every.n <- 10
199 | param <- list(
200 | objective = OBJECTIVE,
201 | booster = BOOSTER,
202 | eval_metric = EVAL_METRIC,
203 | eta = ETA,
204 | max_depth = MAX_DEPTH,
205 | subsample = SUB_SAMPLE,
206 | min_child_weight = MIN_CHILD_WEIGHT,
207 | colsample_bytree = COL_SAMPLE,
208 | gamma = GAMMA,
209 | nthread = nthread,
210 | num_parallel_tree = 1
211 | )
212 |
213 | dtrain <- xgb.DMatrix( data = data.matrix(X_train),
214 | label = data.matrix(Y_train),
215 | missing = NA)
216 | watchlist <- list(train = dtrain)
217 |
218 | test_target_xgb <- rep(0, nrow(test))
219 | for (s in seed) {
220 | set.seed(s)
221 | cat("########## XGB Seed ", s, "\n")
222 | bst <- xgb.train( params = param,
223 | data = dtrain,
224 | nrounds = nrounds,
225 | verbose = 1,
226 | print.every.n = print.every.n,
227 | early.stop.round = EARLY_STOPPING,
228 | watchlist = watchlist,
229 | maximize = isMaximize
230 | )
231 | tmp <- predict(bst, data.matrix(test), missing=NA)
232 | test_target_xgb <- tmp + test_target_xgb
233 | }
234 | test_target_xgb <- test_target_xgb / length(seed)
235 | probs <- as.data.frame(matrix(test_target_xgb, nrow=nrow(test), byrow = TRUE))
236 |
237 | #GBM
238 | ntree <- 2200
239 | test_target_gbm <- rep(0, nrow(test))
240 | for (s in seed) {
241 | set.seed(s)
242 | cat("########## GBM Seed ", s, "\n")
243 | bst <- gbm.fit(x = X_train,
244 | y = Y_train,
245 | distribution = "bernoulli",
246 | n.trees = ntree,
247 | interaction.depth = 3,
248 | n.minobsinnode = 10,
249 | bag.fraction = 0.8,
250 | shrinkage = 0.01)
251 | tmp <- predict(bst, test, n.trees = ntree, type = "response")
252 | test_target_gbm <- tmp + test_target_gbm
253 | }
254 | test_target_gbm <- test_target_gbm / length(seed)
255 |
256 | final_test <- data.frame(Patient_ID = p,
257 | Health_Camp_ID = h,
258 | Outcome1 = probs$V1,
259 | Outcome2 = test_target_gbm)
260 |
261 | #Build a rank average ensemble
262 | final_test <- mutate(final_test, rank1 = dense_rank((Outcome1)))
263 | final_test <- mutate(final_test, rank2 = dense_rank((Outcome2)))
264 | final_test$WeightedScore <- (final_test$rank1 * 0.5 + final_test$rank2 * 0.5)
265 | final_test$WeightedScore <- final_test$WeightedScore / max(final_test$WeightedScore)
266 |
267 | final_test$Outcome <- final_test$WeightedScore
268 | write_csv(final_test[, c("Patient_ID", "Health_Camp_ID", "Outcome")], "Sub_final.csv")
269 |
270 |
--------------------------------------------------------------------------------
/Rank 2: Nut Crackers /final_model.R:
--------------------------------------------------------------------------------
1 | # Analytics Vidhya
2 | # Knocktober
3 | # Team : Nut Crackers
4 | # Members : Naveen Kumar Kaveti & Suprit Saha
5 |
6 | # Load Required packages
7 | # =========================================================================
8 | package_names <- c("lubridate","tidyr","Metrics","dplyr")
9 |
10 | loadPackage <- function(pkg)
11 | {
12 | if(missing(pkg) || !is.character(pkg))
13 | {
14 | stop("Package not correctly entered !!!")
15 | }
16 | new.pkg <- pkg[!(pkg %in% installed.packages()[, "Package"])]
17 | if(length(new.pkg))
18 | {
19 | install.packages(new.pkg, dependencies = TRUE)
20 | }
21 | sapply(pkg, require, character.only = TRUE)
22 | cat("Packages Loaded !!!")
23 | }
24 | suppressPackageStartupMessages(suppressWarnings(loadPackage(package_names)))
25 |
26 |
27 | # Reading datasets
28 | # =================================================================================
29 | FHCA <- read.csv("First_Health_Camp_Attended.csv",header = TRUE,na.strings = "")
30 | HCD <- read.csv("Health_Camp_Detail.csv",header = TRUE,na.strings = "")
31 | PP <- read.csv("Patient_Profile.csv",header = TRUE,na.strings = "")
32 | SHCA <- read.csv("Second_Health_Camp_Attended.csv",header = TRUE,na.strings = "")
33 | THCA <- read.csv("Third_Health_Camp_Attended.csv",header = TRUE,na.strings = "")
34 | Train <- read.csv("Train.csv",header = TRUE,na.strings = "")
35 |
36 | # Reading test data
37 | # =================================================================================
38 | Test <- read.csv("Test.csv",header = TRUE,na.strings = "")
39 |
40 | # Feature extraction from existing datasets
41 | # =================================================================================
42 | Train <- left_join(Train, PP, by = "Patient_ID")
43 | Train <- left_join(Train, HCD, by = "Health_Camp_ID")
44 | Train <- left_join(Train, FHCA, by = c("Patient_ID", "Health_Camp_ID"))
45 | Train <- left_join(Train, SHCA, by = c("Patient_ID", "Health_Camp_ID"))
46 | Train <- left_join(Train, THCA, by = c("Patient_ID", "Health_Camp_ID"))
47 |
48 | Test <- left_join(Test, PP, by = "Patient_ID")
49 | Test <- left_join(Test, HCD, by = "Health_Camp_ID")
50 |
51 | # Dropping variables
52 | # =================================================================================
53 | Train$Category3 <- NULL
54 | Test$Category3 <- NULL
55 | Train$Donation <- NULL
56 |
57 | Train$X <- NULL
58 | Train$Last_Stall_Visited_Number <- NULL
59 |
60 |
61 | # Defining Target variable
62 | # =================================================================================
63 | Train$Y <- ifelse((is.na(Train$Health_Score) & is.na(Train$Health.Score) & Train$Number_of_stall_visited <1),0,1)
64 | Train$Y[is.na(Train$Y)] <- 0
65 |
66 | table(Train$Y)
67 |
68 |
69 | # Feature engineering
70 | # ==================================================================================
71 | Cleansing <- function(df)
72 | {
73 | if(length(formals(Cleansing)) != nargs())
74 | {
75 | stop("Check for missing arguments !!!")
76 | }
77 | if(is.character(df))
78 | {
79 | df <- eval(parse(text = df))
80 | }
81 | if(missing(df) | !is.data.frame(df) )
82 | {
83 | stop("Enter valid data frame !!!")
84 | }
85 |
86 | df$Camp_Start_Date <- as.Date(df$Camp_Start_Date, format = "%d-%b-%y")
87 | df$CSD_Day <- lubridate::day(df$Camp_Start_Date)
88 | df$CSD_Mon <- lubridate::month(df$Camp_Start_Date)
89 | df$CSD_Year <- lubridate::year(df$Camp_Start_Date)
90 |
91 | df$Camp_End_Date <- as.Date(df$Camp_End_Date, format = "%d-%b-%y")
92 | df$CED_Day <- lubridate::day(df$Camp_End_Date)
93 | df$CED_Mon <- lubridate::month(df$Camp_End_Date)
94 | df$CED_Year <- lubridate::year(df$Camp_End_Date)
95 |
96 | df$Camp_Duration <- difftime(df$Camp_End_Date, df$Camp_Start_Date, units = c("days"))
97 | df$Camp_Duration <- as.numeric(df$Camp_Duration)
98 |
99 | df$Registration_Date <- as.Date(df$Registration_Date, format = "%d-%b-%y")
100 | if(sum(is.na(df$Registration_Date)) > 0){
101 | df$Registration_Date[is.na(df$Registration_Date)] <- df$Camp_Start_Date[is.na(df$Registration_Date)] + days(round(df$Camp_Duration[is.na(df$Registration_Date)]/2))
102 | }
103 | df$Reg_Day <- lubridate::day(df$Registration_Date)
104 | df$Reg_Mon <- lubridate::month(df$Registration_Date)
105 | df$Reg_Year <- lubridate::year(df$Registration_Date)
106 |
107 | df$Online_Follower <- as.factor(df$Online_Follower)
108 | df$LinkedIn_Shared <- as.factor(df$LinkedIn_Shared)
109 | df$Twitter_Shared <- as.factor(df$Twitter_Shared)
110 | df$Facebook_Shared <- as.factor(df$Facebook_Shared)
111 |
112 | levels(df$Income)[levels(df$Income) == "None"] <- 7
113 |
114 | df$Education_Score <- as.character(df$Education_Score)
115 | df$Education_Score[df$Education_Score == "None"] <- 0
116 | df$Education_Score <- as.numeric(df$Education_Score)
117 | df$Education_Score[df$Education_Score == 0] <- median(df$Education_Score[df$Education_Score != 0])
118 |
119 | df$Age <- as.character(df$Age)
120 | df$Age[df$Age == "None"] <- 0
121 | df$Age <- as.numeric(df$Age)
122 | df$Age[df$Age == 0] <- median(df$Age[df$Age != 0])
123 |
124 | df$First_Interaction <- as.Date(df$First_Interaction, format = "%d-%b-%y")
125 | df$FI_Day <- lubridate::day(df$First_Interaction)
126 | df$FI_Mon <- lubridate::month(df$Registration_Date)
127 | # df$FI_Year <- year(df$Registration_Date) # Equals to some other variable (Perfect multi-collinearity)
128 |
129 | levels(df$City_Type) <- c(1:length(levels(df$City_Type)))
130 |
131 | levels(df$Employer_Category) <- c(1:length(levels(df$Employer_Category)))
132 |
133 | levels(df$Category1) <- c(1:length(levels(df$Category1)))
134 |
135 | levels(df$Category2) <- c(1:length(levels(df$Category2)))
136 |
137 | df$Reg_Year <- factor(df$Reg_Year, levels = c(2003, 2004, 2005, 2006, 2007))
138 |
139 | df$CSD_Year <- factor(df$CSD_Year, levels = c(2003, 2004, 2005, 2006, 2007))
140 |
141 | return(df)
142 | }
143 | Train <- Cleansing(Train)
144 | Test <- Cleansing(Test)
145 |
146 | # Recency & Frequency variables
147 | # ========================================================================
148 | tab1 <- rbind(Train[, c("Patient_ID", "Registration_Date")], Test[, c("Patient_ID", "Registration_Date")])
149 |
150 | tab2 <- as.data.frame(tab1 %>% group_by(Patient_ID) %>% summarise(PRRD = max(Registration_Date)))
151 | tab2$Recency <- as.integer(difftime(max(Train$Camp_End_Date, Test$Camp_End_Date), tab2$PRRD, units = "days"))
152 | tab2$PRRD <- NULL
153 |
154 |
155 | tab3 <- as.data.frame(table(tab1$Patient_ID))
156 | colnames(tab3) <- c("Patient_ID", "Frequency")
157 | tab3$Patient_ID <- as.integer(as.character(tab3$Patient_ID))
158 |
159 | Train <- left_join(Train, tab2, by = c("Patient_ID"))
160 | Train <- left_join(Train, tab3, by = c("Patient_ID"))
161 | Test <- left_join(Test, tab2, by = c("Patient_ID"))
162 | Test <- left_join(Test, tab3, by = c("Patient_ID"))
163 |
164 | Train$Recency <- as.integer(Train$Recency)
165 | Test$Recency <- as.integer(Test$Recency)
166 |
167 | # Time Difference features
168 | # =================================================================================
169 | Train$Lag <- as.integer(difftime(Train$Registration_Date, Train$Camp_Start_Date, units = "days"))
170 | Train$Lag2 <- as.integer(difftime(Train$Registration_Date, Train$Camp_End_Date, units = "days"))
171 | Train$Lag3 <- as.integer(difftime(Train$Registration_Date, Train$First_Interaction, units = "days"))
172 |
173 | Test$Lag <- as.integer(difftime(Test$Registration_Date, Test$Camp_Start_Date, units = "days"))
174 | Test$Lag2 <- as.integer(difftime(Test$Registration_Date, Test$Camp_End_Date, units = "days"))
175 | Test$Lag3 <- as.integer(difftime(Test$Registration_Date, Test$First_Interaction, units = "days"))
176 |
177 |
178 | # Assigning probabilities for each Age group
179 | # ==================================================================================
180 | library(classInt)
181 | temp <- classIntervals(Train$Age, 10, style = "fixed", fixedBreaks = c(30, 40, 50, 60, 70, 80))
182 | Train$Age_Bucket <- as.factor(findCols(temp))
183 |
184 | temp2 <- classIntervals(Test$Age, 10, style = "fixed", fixedBreaks = c(30, 40, 50, 60, 70, 80))
185 | Test$Age_Bucket <- as.factor(findCols(temp2))
186 |
187 | Age_of <- as.data.frame(Train %>% group_by(Age_Bucket) %>% summarise(Age_of = mean(as.integer(as.character(Y)))))
188 |
189 | Train <- left_join(Train, Age_of, by = "Age_Bucket")
190 | Test <- left_join(Test, Age_of, by = "Age_Bucket")
191 |
192 | # Feature set
193 | # ===================================================================================
194 | Features <- Train[, c("LinkedIn_Shared", "Income", "Education_Score", "Age", "City_Type", "Employer_Category", "Category1", "Category2", "CSD_Mon", "CED_Year", "Camp_Duration", "Reg_Year", "Recency", "Frequency", "Age_of", "Lag", "Lag2", "Lag3", "Y")]
195 |
196 | Train_XY <- data.frame(Features)
197 | Train_XY$Y <- as.factor(Train_XY$Y)
198 |
199 |
200 | # Using GBM from h2o package
201 | # =============================================================
202 | library(h2o)
203 | h2o.init()
204 |
205 | train_h2o <- as.h2o(Train_XY) # Creating h2o dataframe
206 |
207 | splits <- h2o.splitFrame(
208 | train_h2o, ## splitting the H2O frame we read above
209 | c(0.6,0.2), ## create splits of 60% and 20%;
210 | seed=1234) ## setting a seed will ensure reproducible results (not R's seed)
211 |
212 | train <- h2o.assign(splits[[1]], "train.hex")
213 | valid <- h2o.assign(splits[[2]], "valid.hex")
214 | holdout <- h2o.assign(splits[[3]], "test.hex")
215 |
216 | gbm <- h2o.gbm(
217 | training_frame = train, ## the H2O frame for training
218 | validation_frame = valid, ## the H2O frame for validation (not required)
219 | x=c(1:18), ## the predictor columns, by column index
220 | y=19, ## the target index (what we are predicting)
221 | model_id = "gbm_covType1", ## name the model in H2O
222 | seed = 2000000) ## Set the random seed for reproducability
223 |
224 | summary(gbm) ## View information about the model.
225 |
226 | # holdout predictions
227 | # ===================================================================
228 | pred_test_gbm <- predict(gbm, holdout)
229 | pred_test_gbm <- data.frame(outcome = as.data.frame(holdout[19]), prob = as.data.frame(pred_test_gbm)[, "p1"])
230 | auc(pred_test_gbm[,1], pred_test_gbm[,2])
231 |
232 | # Test Data predictions
233 | # ==================================================================
234 | Test_h2o <- as.h2o(Test)
235 | pred_Test_gbm <- predict(gbm, Test_h2o)
236 | result <- as.data.frame(pred_Test_gbm)
237 | result <- data.frame(Patient_ID = Test$Patient_ID, Health_Camp_ID = Test$Health_Camp_ID, Outcome = result$p1)
238 | write.csv(result, file = "Predictions_GBM.csv", row.names = FALSE)
239 |
240 |
--------------------------------------------------------------------------------
/Rank 1: Rock n Rolla /finalModel_v3.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import operator
3 | import pandas as pd
4 | import numpy as np
5 | from sklearn import preprocessing, model_selection, metrics, ensemble
6 | import xgboost as xgb
7 |
8 | def getCountVar(compute_df, count_df, var_name, count_var="v1"):
9 | grouped_df = count_df.groupby(var_name, as_index=False).agg('size').reset_index()
10 | grouped_df.columns = [var_name, "var_count"]
11 | merged_df = pd.merge(compute_df, grouped_df, how="left", on=var_name)
12 | merged_df.fillna(-1, inplace=True)
13 | return list(merged_df["var_count"])
14 |
15 | def create_feature_map(features):
16 | outfile = open('xgb.fmap', 'w')
17 | for i, feat in enumerate(features):
18 | outfile.write('{0}\t{1}\tq\n'.format(i,feat))
19 | outfile.close()
20 |
21 | def runXGB(train_X, train_y, test_X, test_y=None, feature_names=None, extra_X=None, seed_val=0, num_rounds=200):
22 | params = {}
23 | params["objective"] = "binary:logistic"
24 | params['eval_metric'] = 'auc'
25 | params["eta"] = 0.02
26 | params["subsample"] = 0.8
27 | params["min_child_weight"] = 5
28 | params["colsample_bytree"] = 0.7
29 | params["max_depth"] = 6
30 | params["silent"] = 1
31 | params["seed"] = seed_val
32 |
33 | plst = list(params.items())
34 | xgtrain = xgb.DMatrix(train_X, label=train_y)
35 |
36 | if test_y is not None:
37 | xgtest = xgb.DMatrix(test_X, label=test_y)
38 | watchlist = [ (xgtrain,'train'), (xgtest, 'test') ]
39 | model = xgb.train(plst, xgtrain, num_rounds, watchlist, early_stopping_rounds=300)
40 | else:
41 | xgtest = xgb.DMatrix(test_X)
42 | model = xgb.train(plst, xgtrain, num_rounds)
43 |
44 | if feature_names is not None:
45 | create_feature_map(feature_names)
46 | model.dump_model('xgbmodel.txt', 'xgb.fmap', with_stats=True)
47 | importance = model.get_fscore(fmap='xgb.fmap')
48 | importance = sorted(importance.items(), key=operator.itemgetter(1), reverse=True)
49 | imp_df = pd.DataFrame(importance, columns=['feature','fscore'])
50 | imp_df['fscore'] = imp_df['fscore'] / imp_df['fscore'].sum()
51 | imp_df.to_csv("imp_feat.txt", index=False)
52 |
53 | pred_test_y = model.predict(xgtest)
54 | loss = 0
55 |
56 | if extra_X is not None:
57 | xgtest = xgb.DMatrix(extra_X)
58 | pred_extra_y = model.predict(xgtest)
59 | return pred_test_y, pred_extra_y, loss
60 |
61 | if test_y is not None:
62 | loss = metrics.roc_auc_score(test_y, pred_test_y)
63 | print loss
64 | return pred_test_y, loss
65 | else:
66 | return pred_test_y,loss
67 |
68 | if __name__ == "__main__":
69 | ## Reading the files and converting the dates ##
70 | data_path = "../input/Train/"
71 | train = pd.read_csv(data_path + "train_with_outcome.csv")
72 | test = pd.read_csv(data_path + "Test.csv")
73 | train['Registration_Date'].fillna('10-jan-90', inplace=True)
74 | test['Registration_Date'].fillna('10-jan-90', inplace=True)
75 | train['Registration_Date'] = pd.to_datetime(train['Registration_Date'], format="%d-%b-%y")
76 | test['Registration_Date'] = pd.to_datetime(test['Registration_Date'], format="%d-%b-%y")
77 | train['Registration_Date'] = train['Registration_Date'].apply(lambda x: x.toordinal())
78 | test['Registration_Date'] = test['Registration_Date'].apply(lambda x: x.toordinal())
79 | print train.shape, test.shape
80 |
81 | ## Getting patient details and merging with train and test ##
82 | patient = pd.read_csv(data_path + "Patient_Profile.csv", na_values=['None',''])
83 | patient['First_Interaction'] = pd.to_datetime(patient['First_Interaction'], format="%d-%b-%y")
84 | patient['First_Interaction'] = patient['First_Interaction'].apply(lambda x: x.toordinal())
85 | print patient.shape
86 | train = train.merge(patient, on=['Patient_ID'], how='left')
87 | test = test.merge(patient, on=['Patient_ID'], how='left')
88 | print train.shape, test.shape
89 |
90 | ## Getting health camp details and merging with train and test ##
91 | hc_details = pd.read_csv(data_path + "Health_Camp_Detail.csv")
92 | hc_ids = list(hc_details.Health_Camp_ID.values)
93 | hc_details['Camp_Start_Date'] = pd.to_datetime(hc_details['Camp_Start_Date'], format="%d-%b-%y")
94 | hc_details['Camp_End_Date'] = pd.to_datetime(hc_details['Camp_End_Date'], format="%d-%b-%y")
95 | hc_details['Camp_Start_Date'] = hc_details['Camp_Start_Date'].apply(lambda x: x.toordinal())
96 | hc_details['Camp_End_Date'] = hc_details['Camp_End_Date'].apply(lambda x: x.toordinal())
97 | hc_details['Camp_Duration_Days'] = hc_details['Camp_End_Date'] - hc_details['Camp_Start_Date']
98 | print hc_details.head()
99 | train = train.merge(hc_details, on=['Health_Camp_ID'], how='left')
100 | test = test.merge(hc_details, on=['Health_Camp_ID'], how='left')
101 | print train.shape, test.shape
102 |
103 | ## Reading the camp files ##
104 | first_camp_details = pd.read_csv(data_path + "First_Health_Camp_Attended.csv")
105 | first_camp_details = first_camp_details[["Patient_ID","Health_Camp_ID","Donation","Health_Score"]]
106 | train = train.merge(first_camp_details, on=["Patient_ID","Health_Camp_ID"], how='left')
107 | third_camp_details = pd.read_csv(data_path + "Third_Health_Camp_Attended.csv")
108 | third_camp_details = third_camp_details[["Patient_ID","Health_Camp_ID","Number_of_stall_visited","Last_Stall_Visited_Number"]]
109 | train = train.merge(third_camp_details, on=["Patient_ID","Health_Camp_ID"], how='left')
110 | train["Number_of_stall_visited"].fillna(0, inplace=True)
111 | train["Donation"].fillna(0, inplace=True)
112 | train["Health_Score"].fillna(0, inplace=True)
113 | print train.shape, test.shape
114 |
115 |
116 | ## Filling NA with -99 ##
117 | train.fillna(-99, inplace=True)
118 | test.fillna(-99, inplace=True)
119 |
120 | ## print create additional features ##
121 | print "Getting additional features."
122 | train["Diff_CampStart_Registration"] = train["Camp_Start_Date"] - train["Registration_Date"]
123 | test["Diff_CampStart_Registration"] = test["Camp_Start_Date"] - test["Registration_Date"]
124 |
125 | train["Diff_CampEnd_Registration"] = train["Camp_End_Date"] - train["Registration_Date"]
126 | test["Diff_CampEnd_Registration"] = test["Camp_End_Date"] - test["Registration_Date"]
127 |
128 | train["Diff_Registration_FirstInteraction"] = train["Registration_Date"] - train["First_Interaction"]
129 | test["Diff_Registration_FirstInteraction"] = test["Registration_Date"] - test["First_Interaction"]
130 |
131 | train["Diff_CampStart_FirstInteraction"] = train["Camp_Start_Date"] - train["First_Interaction"]
132 | test["Diff_CampStart_FirstInteraction"] = test["Camp_Start_Date"] - test["First_Interaction"]
133 | print train.shape, test.shape
134 |
135 | ## Getitng the cat columns and label encode them ##
136 | cat_columns = []
137 | for col in train.columns:
138 | if train[col].dtype == 'object':
139 | print col
140 | cat_columns.append(col)
141 | enc = preprocessing.LabelEncoder()
142 | full_list = list(train[col].values) + list(test[col].values)
143 | enc.fit(full_list)
144 | train[col] = enc.transform(list(train[col].values))
145 | test[col] = enc.transform(list(test[col].values))
146 |
147 | # getting count #
148 | for col in ["Patient_ID", "Health_Camp_ID"]:
149 | print "Count : ", col
150 | full_df = pd.concat([train, test])
151 | train["Count_"+col] = getCountVar(train, full_df, col)
152 | test["Count_"+col] = getCountVar(test, full_df, col)
153 |
154 |
155 | ## do sorting so as to compute the next variables ##
156 | train = train.sort_values(['Camp_Start_Date', 'Camp_End_Date', 'Patient_ID']).reset_index(drop=True)
157 | test = test.sort_values(['Camp_Start_Date', 'Camp_End_Date', 'Patient_ID']).reset_index(drop=True)
158 | print train.head()
159 |
160 | print "First pass to get necessary details.."
161 | people_camp_dict = {}
162 | people_date_dict = {}
163 | people_dv_dict = {}
164 | people_cat1_dict = {}
165 | people_cdate_dict = {}
166 | people_donation_dict = {}
167 | people_num_stall_dict = {}
168 | people_last_stall_dict = {}
169 | people_fscore_dict = {}
170 | for ind, row in train.iterrows():
171 | pid = row['Patient_ID']
172 | cid = row['Health_Camp_ID']
173 | reg_date = row['Registration_Date']
174 | dv = row['Outcome']
175 | cat1 = row['Category1']
176 | cdate = row['Camp_Start_Date']
177 | donation = row['Donation']
178 | num_stall = row['Number_of_stall_visited']
179 | fscore = row['Health_Score']
180 |
181 | tlist = people_camp_dict.get(pid,[])
182 | tlist.append(cid)
183 | people_camp_dict[pid] = tlist[:]
184 |
185 | tlist = people_date_dict.get(pid,[])
186 | tlist.append(reg_date)
187 | people_date_dict[pid] = tlist[:]
188 |
189 | tlist = people_dv_dict.get(pid, [])
190 | tlist.append(dv)
191 | people_dv_dict[pid] = tlist[:]
192 |
193 | tlist = people_donation_dict.get(pid, [])
194 | tlist.append(donation)
195 | people_donation_dict[pid] = tlist[:]
196 |
197 | tlist = people_num_stall_dict.get(pid, [])
198 | tlist.append(num_stall)
199 | people_num_stall_dict[pid] = tlist[:]
200 |
201 | tlist = people_fscore_dict.get(pid, [])
202 | tlist.append(fscore)
203 | people_fscore_dict[pid] = tlist[:]
204 |
205 | tlist = people_cat1_dict.get(pid, [])
206 | tlist.append(cat1)
207 | people_cat1_dict[pid] = tlist[:]
208 |
209 | tlist = people_cdate_dict.get(pid, [])
210 | tlist.append(cdate)
211 | people_cdate_dict[pid] = tlist[:]
212 |
213 | print "Creating features now using dict for train.."
214 | last_date_list = []
215 | last_dv_list = []
216 | last_cat1_list = []
217 | mean_dv_list = []
218 | last_cdate_list = []
219 | last_donation_list = []
220 | last_num_stall_list = []
221 | last_fscore_list=[]
222 | for ind, row in train.iterrows():
223 | pid = row['Patient_ID']
224 | reg_date = row['Registration_Date']
225 | cat1 = row['Category1']
226 | cid = row['Health_Camp_ID']
227 | cdate = row['Camp_Start_Date']
228 |
229 | camp_list = people_camp_dict[pid]
230 | for ind, camp in enumerate(camp_list):
231 | if camp == cid:
232 | use_index = ind
233 | break
234 |
235 | tlist = people_date_dict[pid][:use_index]
236 | if len(tlist)>0:
237 | last_date_list.append(reg_date-tlist[-1])
238 | else:
239 | last_date_list.append(-99)
240 |
241 | tlist = people_dv_dict[pid][:use_index]
242 | if len(tlist)>0:
243 | last_dv_list.append(tlist[-1])
244 | mean_dv_list.append(np.mean(tlist))
245 | else:
246 | last_dv_list.append(-99)
247 | mean_dv_list.append(-99)
248 |
249 | tlist = people_donation_dict[pid][:use_index]
250 | if len(tlist)>0:
251 | last_donation_list.append(np.sum(tlist))
252 | else:
253 | last_donation_list.append(-99)
254 |
255 | tlist = people_num_stall_dict[pid][:use_index]
256 | if len(tlist)>0:
257 | last_num_stall_list.append(np.sum(tlist))
258 | else:
259 | last_num_stall_list.append(-99)
260 |
261 | tlist = people_fscore_dict[pid][:use_index]
262 | if len(tlist)>0:
263 | last_fscore_list.append(np.mean([i for i in tlist if i!=0]))
264 | else:
265 | last_fscore_list.append(-99)
266 |
267 | tlist = people_cat1_dict[pid][:use_index]
268 | if len(tlist)>0:
269 | last_cat1_list.append(tlist[-1])
270 | else:
271 | last_cat1_list.append(-99)
272 |
273 | tlist = people_date_dict[pid][use_index+1:]
274 | if len(tlist)>0:
275 | last_cdate_list.append(reg_date-tlist[0])
276 | else:
277 | last_cdate_list.append(-99)
278 |
279 | print last_fscore_list[:50]
280 |
281 | train["Last_Reg_Date"] = last_date_list[:]
282 | train["Mean_Outcome"] = mean_dv_list[:]
283 | train["Last_Cat1"] = last_cat1_list[:]
284 | train["Next_Reg_Date"] = last_cdate_list
285 | train["Sum_Donations"] = last_donation_list[:]
286 | train["Sum_NumStalls"] = last_num_stall_list[:]
287 | train["Mean_Fscore"] = last_fscore_list[:]
288 |
289 | print "Prepare dict using val.."
290 | for ind, row in test.iterrows():
291 | pid = row['Patient_ID']
292 | cid = row['Health_Camp_ID']
293 | reg_date = row['Registration_Date']
294 | cat1 = row['Category1']
295 | cdate = row['Camp_Start_Date']
296 |
297 | tlist = people_camp_dict.get(pid,[])
298 | tlist.append(cid)
299 | people_camp_dict[pid] = tlist[:]
300 |
301 | tlist = people_date_dict.get(pid,[])
302 | tlist.append(reg_date)
303 | people_date_dict[pid] = tlist[:]
304 |
305 | tlist = people_cat1_dict.get(pid, [])
306 | tlist.append(cat1)
307 | people_cat1_dict[pid] = tlist[:]
308 |
309 | tlist = people_cdate_dict.get(pid, [])
310 | tlist.append(cdate)
311 | people_cdate_dict[pid] = tlist[:]
312 |
313 | print "Creating features for val using dict.."
314 | last_date_list = []
315 | last_dv_list = []
316 | last_cat1_list = []
317 | mean_dv_list = []
318 | last_cdate_list = []
319 | last_donation_list = []
320 | last_num_stall_list = []
321 | last_fscore_list = []
322 | for ind, row in test.iterrows():
323 | pid = row['Patient_ID']
324 | reg_date = row['Registration_Date']
325 | cat1 = row['Category1']
326 | cid = row['Health_Camp_ID']
327 | cdate = row['Camp_Start_Date']
328 |
329 | camp_list = people_camp_dict[pid]
330 | for ind, camp in enumerate(camp_list):
331 | if camp == cid:
332 | use_index = ind
333 | break
334 |
335 | tlist = people_date_dict[pid][:use_index]
336 | if len(tlist)>0:
337 | last_date_list.append(reg_date-tlist[-1])
338 | else:
339 | last_date_list.append(-99)
340 |
341 | tlist = people_dv_dict.get(pid, [])
342 | if len(tlist)>0:
343 | last_dv_list.append(tlist[-1])
344 | mean_dv_list.append(np.mean(tlist))
345 | else:
346 | last_dv_list.append(-99)
347 | mean_dv_list.append(-99)
348 |
349 | tlist = people_donation_dict.get(pid, [])
350 | if len(tlist)>0:
351 | last_donation_list.append(np.sum(tlist))
352 | else:
353 | last_donation_list.append(-99)
354 |
355 | tlist = people_num_stall_dict.get(pid, [])
356 | if len(tlist)>0:
357 | last_num_stall_list.append(np.sum(tlist))
358 | else:
359 | last_num_stall_list.append(-99)
360 |
361 | tlist = people_fscore_dict.get(pid, [])
362 | if len(tlist)>0:
363 | last_fscore_list.append(np.mean([i for i in tlist if i!=0]))
364 | else:
365 | last_fscore_list.append(-99)
366 |
367 | tlist = people_cat1_dict[pid][:use_index]
368 | if len(tlist)>0:
369 | last_cat1_list.append(tlist[-1])
370 | else:
371 | last_cat1_list.append(-99)
372 |
373 | tlist = people_date_dict[pid][use_index+1:]
374 | if len(tlist)>0:
375 | last_cdate_list.append(reg_date-tlist[0])
376 | else:
377 | last_cdate_list.append(-99)
378 |
379 | test["Last_Reg_Date"] = last_date_list[:]
380 | test["Mean_Outcome"] = mean_dv_list[:]
381 | test["Last_Cat1"] = last_cat1_list[:]
382 | test["Next_Reg_Date"] = last_cdate_list[:]
383 | test["Sum_Donations"] = last_donation_list[:]
384 | test["Sum_NumStalls"] = last_num_stall_list[:]
385 | test["Mean_Fscore"] = last_fscore_list[:]
386 |
387 | train.fillna(-99, inplace=True)
388 | test.fillna(-99, inplace=True)
389 |
390 | print "Getting dv and id values"
391 | train_y = train.Outcome.values
392 |
393 | ## Columns to drop ##
394 | print "Dropping columns.."
395 | drop_cols = ["Camp_Start_Date", "Camp_End_Date", "Registration_Date"] #, "First_Interaction"]
396 | drop_cols = drop_cols + ["LinkedIn_Shared", "Facebook_Shared", "Twitter_Shared", "Online_Follower", "Var4"]
397 | train.drop(drop_cols, axis=1, inplace=True)
398 | test.drop(drop_cols, axis=1, inplace=True)
399 | print train.shape, test.shape
400 |
401 | # preparing train and test #
402 | print "Choose the columns to use.."
403 | xcols = [col for col in train.columns if col not in ["Outcome", "Health_Camp_ID", "Patient_ID", "Der_Var1", "Number_of_stall_visited","Last_Stall_Visited_Number", "Donation", "Health_Score", "Mean_Fscore"]]
404 | print xcols
405 | train_X = np.array(train[xcols])
406 | test_X = np.array(test[xcols])
407 | print train_X.shape, test_X.shape
408 |
409 | print "Final Model.."
410 | preds = 0
411 | for seed_val, num_rounds in [[0,200], [2016,250], [1323, 225]]:
412 | print seed_val, num_rounds
413 | temp_preds, loss = runXGB(train_X, train_y, test_X, feature_names=xcols, seed_val=seed_val, num_rounds=num_rounds)
414 | preds += temp_preds
415 | preds = preds/3.
416 |
417 | out_df = pd.DataFrame({"Patient_ID":test.Patient_ID.values})
418 | out_df["Health_Camp_ID"] = test.Health_Camp_ID.values
419 | out_df["Outcome"] = preds
420 | out_df.to_csv("sub35.csv", index=False)
421 |
--------------------------------------------------------------------------------
/Rank 1: Rock n Rolla /DataExploration.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "### Data Exploration "
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 1,
13 | "metadata": {
14 | "collapsed": false
15 | },
16 | "outputs": [],
17 | "source": [
18 | "import numpy as np\n",
19 | "import pandas as pd\n",
20 | "from matplotlib import pyplot as plt\n",
21 | "import seaborn as sns\n",
22 | "%matplotlib inline"
23 | ]
24 | },
25 | {
26 | "cell_type": "markdown",
27 | "metadata": {},
28 | "source": [
29 | "##### Loading all the files and checking the number of rows and columns "
30 | ]
31 | },
32 | {
33 | "cell_type": "code",
34 | "execution_count": 2,
35 | "metadata": {
36 | "collapsed": false
37 | },
38 | "outputs": [
39 | {
40 | "name": "stdout",
41 | "output_type": "stream",
42 | "text": [
43 | "Rows and Columns in Train : (75278, 8)\n",
44 | "Rows and Columns in Test : (35249, 8)\n",
45 | "Rows and Columns in Health Camp Details : (65, 6)\n",
46 | "Rows and Columns in Patient Profile : (37633, 11)\n",
47 | "Rows and Columns in First Format Health Camp Attended : (6218, 5)\n",
48 | "Rows and Columns in Second Format Health Camp Attended : (7819, 3)\n",
49 | "Rows and Columns in Third Format Health Camp Attended : (6515, 4)\n"
50 | ]
51 | }
52 | ],
53 | "source": [
54 | "train = pd.read_csv(\"Train.csv\")\n",
55 | "test = pd.read_csv(\"Test.csv\")\n",
56 | "health_camp = pd.read_csv(\"Health_Camp_Detail.csv\")\n",
57 | "patient = pd.read_csv(\"Patient_Profile.csv\")\n",
58 | "first_format_camp = pd.read_csv(\"First_Health_Camp_Attended.csv\")\n",
59 | "second_format_camp = pd.read_csv(\"Second_Health_Camp_Attended.csv\")\n",
60 | "third_format_camp = pd.read_csv(\"Third_Health_Camp_Attended.csv\")\n",
61 | "print \"Rows and Columns in Train : \", train.shape\n",
62 | "print \"Rows and Columns in Test : \", test.shape\n",
63 | "print \"Rows and Columns in Health Camp Details : \", health_camp.shape\n",
64 | "print \"Rows and Columns in Patient Profile : \", patient.shape\n",
65 | "print \"Rows and Columns in First Format Health Camp Attended : \", first_format_camp.shape\n",
66 | "print \"Rows and Columns in Second Format Health Camp Attended : \", second_format_camp.shape\n",
67 | "print \"Rows and Columns in Third Format Health Camp Attended : \", third_format_camp.shape"
68 | ]
69 | },
70 | {
71 | "cell_type": "markdown",
72 | "metadata": {},
73 | "source": [
74 | "##### Train data exploration"
75 | ]
76 | },
77 | {
78 | "cell_type": "code",
79 | "execution_count": 3,
80 | "metadata": {
81 | "collapsed": false
82 | },
83 | "outputs": [
84 | {
85 | "data": {
86 | "text/plain": [
87 | "['Patient_ID',\n",
88 | " 'Health_Camp_ID',\n",
89 | " 'Registration_Date',\n",
90 | " 'Var1',\n",
91 | " 'Var2',\n",
92 | " 'Var3',\n",
93 | " 'Var4',\n",
94 | " 'Var5']"
95 | ]
96 | },
97 | "execution_count": 3,
98 | "metadata": {},
99 | "output_type": "execute_result"
100 | }
101 | ],
102 | "source": [
103 | "# Names of the columns present in the train data\n",
104 | "list(train.columns)"
105 | ]
106 | },
107 | {
108 | "cell_type": "code",
109 | "execution_count": 4,
110 | "metadata": {
111 | "collapsed": false
112 | },
113 | "outputs": [
114 | {
115 | "data": {
116 | "text/html": [
117 | "
\n",
118 | "
\n",
119 | " \n",
120 | " \n",
121 | " | \n",
122 | " Patient_ID | \n",
123 | " Health_Camp_ID | \n",
124 | " Registration_Date | \n",
125 | " Var1 | \n",
126 | " Var2 | \n",
127 | " Var3 | \n",
128 | " Var4 | \n",
129 | " Var5 | \n",
130 | "
\n",
131 | " \n",
132 | " \n",
133 | " \n",
134 | " | 0 | \n",
135 | " 489652 | \n",
136 | " 6578 | \n",
137 | " 10-Sep-05 | \n",
138 | " 4 | \n",
139 | " 0 | \n",
140 | " 0 | \n",
141 | " 0 | \n",
142 | " 2 | \n",
143 | "
\n",
144 | " \n",
145 | " | 1 | \n",
146 | " 507246 | \n",
147 | " 6578 | \n",
148 | " 18-Aug-05 | \n",
149 | " 45 | \n",
150 | " 5 | \n",
151 | " 0 | \n",
152 | " 0 | \n",
153 | " 7 | \n",
154 | "
\n",
155 | " \n",
156 | " | 2 | \n",
157 | " 523729 | \n",
158 | " 6534 | \n",
159 | " 29-Apr-06 | \n",
160 | " 0 | \n",
161 | " 0 | \n",
162 | " 0 | \n",
163 | " 0 | \n",
164 | " 0 | \n",
165 | "
\n",
166 | " \n",
167 | " | 3 | \n",
168 | " 524931 | \n",
169 | " 6535 | \n",
170 | " 07-Feb-04 | \n",
171 | " 0 | \n",
172 | " 0 | \n",
173 | " 0 | \n",
174 | " 0 | \n",
175 | " 0 | \n",
176 | "
\n",
177 | " \n",
178 | " | 4 | \n",
179 | " 521364 | \n",
180 | " 6529 | \n",
181 | " 28-Feb-06 | \n",
182 | " 15 | \n",
183 | " 1 | \n",
184 | " 0 | \n",
185 | " 0 | \n",
186 | " 7 | \n",
187 | "
\n",
188 | " \n",
189 | "
\n",
190 | "
"
191 | ],
192 | "text/plain": [
193 | " Patient_ID Health_Camp_ID Registration_Date Var1 Var2 Var3 Var4 Var5\n",
194 | "0 489652 6578 10-Sep-05 4 0 0 0 2\n",
195 | "1 507246 6578 18-Aug-05 45 5 0 0 7\n",
196 | "2 523729 6534 29-Apr-06 0 0 0 0 0\n",
197 | "3 524931 6535 07-Feb-04 0 0 0 0 0\n",
198 | "4 521364 6529 28-Feb-06 15 1 0 0 7"
199 | ]
200 | },
201 | "execution_count": 4,
202 | "metadata": {},
203 | "output_type": "execute_result"
204 | }
205 | ],
206 | "source": [
207 | "# Let us take a look at the top few rows\n",
208 | "train.head()"
209 | ]
210 | },
211 | {
212 | "cell_type": "code",
213 | "execution_count": 5,
214 | "metadata": {
215 | "collapsed": false
216 | },
217 | "outputs": [
218 | {
219 | "name": "stdout",
220 | "output_type": "stream",
221 | "text": [
222 | "Number of unique persons in the train : 29828\n"
223 | ]
224 | }
225 | ],
226 | "source": [
227 | "# Number of unique persons present in the train\n",
228 | "print \"Number of unique persons in the train : \", len(train.Patient_ID.unique())"
229 | ]
230 | },
231 | {
232 | "cell_type": "markdown",
233 | "metadata": {},
234 | "source": [
235 | "So 29,828 unique patients are present in the 75,278 rows of the training set. Now let us see how they are distributed."
236 | ]
237 | },
238 | {
239 | "cell_type": "code",
240 | "execution_count": 6,
241 | "metadata": {
242 | "collapsed": false
243 | },
244 | "outputs": [
245 | {
246 | "data": {
247 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAh0AAAFoCAYAAADzZ0kIAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAAPYQAAD2EBqD+naQAAIABJREFUeJzt3Xu0nVV96P1vsvdJSDjsGu05JBYK2o7zEzgcG1AIlEuh\n9lDqqTCwF5HXDoFSFChVYDSvFaFvqQe5yKWeg4hgMcfLi7yireVYrJYBpibDBJRC1J89XogQTCUj\nNpbczM5+/5jPcj9d5rLXztpzZ+/9/Yyxh2s9vzmfNdevszs/5jOfZ88aGRlBkiRpos2e7AFIkqSZ\nwaJDkiRVYdEhSZKqsOiQJElVWHRIkqQqLDokSVIVFh2SJKkKiw5JklSFRYckSapicLwdI2IusBq4\nNDMfbY4dCnwAOBV4FnhnZt7f6nMucB2wEPgccFFmbmjF3wNcQCmG7snMpa3Yi4EPAr8G/AC4JjM/\nOt7xS5Kkusa10tEUHB8HjmwdGwD+N7AV+CXgZuAjEXFkEz8OuBu4FlgCLADubfW/EjgXOAt4PXBe\nRFzR+tgPAwcBxwPvBu6OiFeNZ/ySJKm+nlc6IuII4GO7CL0W+DlgSWa+APxTRPw6cCLwNeBS4L7O\n6kREvAl4OiIOy8yngcuBqzNzRRNfSlkVuSUifqE5/2GZ+T3g6xFxAnAJZWVEkiTt58az0nEq8AXg\nBGBW9/Gm4AAgM8/JzLubt0uAR1uxZ4C1wJKIWAQcCnyxdb7lwGERcTBwHLC2KTja8RPGMX5JkjQJ\nel7pyMw7O68joh16OfCdiLgeeBNl38WfZuZfNfFFwLqu060HDmliI13x9ZSi5pC99JUkSVNAP+9e\n+ffA+cCLgP8G/C/g/4uIY5r4fGBbV59twNwmRmZu74rRiu+uryRJmgLGfffKLuwAns/MtzbvvxoR\nJwN/ALyFssG0u0iYC2xuYkTEnFbh0Wm7eS99x2RkZGRk1qxZe28oSZK69eUf0H4WHc8BO7uOJXB0\n8/pZyq2ybQubfs9SvtBCyj6PTmykFd9d3zGZNWsWmzZtYXi4e4gzy8DAbIaG5s34XJiHUeaiMA+j\nzEVhHopOHvqhn0XHSuCdETErM0eaY0cA323FTwKWwU+e6XEIsCIzn4uItU28c2fMyZTNo+sjYiVl\nU+lLM7Ozt+Ok5pxjNjy8kx07Zu7EaTMXhXkYZS4K8zDKXBTmoX/6WXR8HHgXcEdE3AycAfw65c4T\ngPcDDzcFxGrgNuAzmbm2Fb8hIjqrHtcDNwFk5nci4iHKcz/+qDnnucApfRy/JEmaQPu6kbSzokFm\n/ojytNAjgCeBPwR+JzOfaOIrgYspDwdbDmzg3z5j4ybgPuAB4BPAhzPz9lb894BNlNWNdwDnZ+Zj\n+zh+SZJUyayRkZG9t5oeRjZufKGnJbLt27ezZs2TY2p71FFHM2fOnPGOrZrBwdksWHAgveZiujEP\no8xFYR5GmYvCPBRNHva7jaTTzpo1T7LujNM4am/tAB56mMWLj60wKkmSpiaLjr04Cnj1GNptnOiB\nSJI0xfmn7SVJUhUWHZIkqQqLDkmSVIVFhyRJqsKiQ5IkVWHRIUmSqrDokCRJVVh0SJKkKiw6JElS\nFRYdkiSpCosOSZJUhUWHJEmqwqJDkiRVYdEhSZKqsOiQJElVWHRIkqQqLDokSVIVFh2SJKkKiw5J\nklSFRYckSarCokOSJFVh0SFJkqqw6JAkSVVYdEiSpCosOiRJUhWD4+0YEXOB1cClmfloV2wI+Drw\njsxc1jp+LnAdsBD4HHBRZm5oxd8DXEAphu7JzKWt2IuBDwK/BvwAuCYzPzre8UuSpLrGtdLRFBwf\nB47cTZMbKYVFu89xwN3AtcASYAFwbyt+JXAucBbweuC8iLiidYoPAwcBxwPvBu6OiFeNZ/ySJKm+\nnouOiDgCWAm8bDfxk4DTge93hS4F7svMj2bmU8CbgN+IiMOa+OXA1Zm5IjMfAZYClzXn/AXgtcCF\nmfn1zPwQ8BHgkl7HL0mSJsd4VjpOBb4AnADMagci4t8BH6AUA9u7+i0BfnIZJjOfAdYCSyJiEXAo\n8MVW++XAYRFxMHAcsDYzv9cVP2Ec45ckSZOg5z0dmXln53VEdIevBh7PzM/vIrYIWNd1bD1wSBMb\n6YqvpxQ1h+ylryRJmgLGvZG0W0QcCfwBcPRumswHtnUd2wbMbWJk5vauGK347vqO2cBAbws7vbQf\nGJjN4OD+fzNQ5zv1movpxjyMMheFeRhlLgrzUPTz+/et6ADuotxR8vxu4lv56SJhLrC5iRERc1qF\nR6ft5r30HbOhoXm9NO+p/dDQPBYsOLCn80+mXnMxXZmHUeaiMA+jzEVhHvqnL0VHRPw8cCLwXyLi\nlubwfOADEfG7mfla4Fm67mhp3j/XxGY179e2YiOt+O76jtmmTVsYHt7ZU/uhHtpu3PhCL8OZFAMD\nsxkamtdzLqYb8zDKXBTmYZS5KMxD0clDP/RrpeMZ4Be7jj0C3A50nqWxEjgJWAYQEYdS9mSsyMzn\nImJtE/9Y0/5kyubR9RGxkrKp9KWZ2dnbcVJzzjEbHt7Jjh1jnzi9TLJezz3Zptp4J4p5GGUuCvMw\nylwU5qF/+lJ0ZOZO4NvtYxGxA/jnzOysRrwfeLgpIFYDtwGfycy1rfgNEdFZ9bgeuKk5/3ci4iHg\nIxHxR5S7Wc4FTunH+CVJ0sTb16JjZKyxzFwZERdTnki6AHiIsvG04ybgPwAPAMPABzPz9lb89ygP\nF1tJuaxyfmY+to/jlyRJlexT0ZGZA3uIvXwXx5bRXF7ZRWwncFXzs6v488DZ4xupJEmabDP7PiBJ\nklSNRYckSarCokOSJFVh0SFJkqqw6JAkSVVYdEiSpCosOiRJUhUWHZIkqQqLDkmSVIVFhyRJqsKi\nQ5IkVWHRIUmSqrDokCRJVVh0SJKkKiw6JElSFRYdkiSpCosOSZJUhUWHJEmqwqJDkiRVYdEhSZKq\nsOiQJElVWHRIkqQqLDokSVIVFh2SJKkKiw5JklSFRYckSarCokOSJFUxON6OETEXWA1cmpmPNseW\nAO8F/gvwDHBzZt7T6vMa4Fbg5cAK4KLM/E4r/jbgKuAg4H7gsszc2vq8O4BzgM3AezPzlvGOX5Ik\n1TWulY6mAPg4cGTr2MHA/wb+Hvgl4E+B90XEmU3854FPAfcArwKeBz7d6v964BrgIuB0YAlwY+tj\nbwaOAX4FuAS4NiLOGc/4JUlSfT2vdETEEcDHdhE6G3guM9/VvP9WRJwGvBH4LPD7wKrMvK05z/nA\n9yPilGal5HLg1sz8bBO/GPhcRPwxpTi6EDgjM58AnoiIG4HLgAd6/Q6SJKm+8ax0nAp8ATgBmNU6\n/lng/F20/5nmf48HHu0czMwtwOPACRExG3g18MVWv5XAHOCVzc8g5ZJMx/LmnJIkaQroeaUjM+/s\nvI6I9vG1wNpW7D8Cb6BcMgFYBKzrOt164BDgRcAB7XhmDkfEhiY+AjyfmTu6+h4QES/JzA29fg9J\nklTXuDeS7klEHAB8klJE3NUcng9s62q6DZjbxNhDfPZuYjTxMRkY6G1hp5f2AwOzGRzc/28G6nyn\nXnMx3ZiHUeaiMA+jzEVhHop+fv++Fx0RcSDw18AvAr/cufsE2MpPFwhzgY1NjN3ENzfj3FWMJj4m\nQ0Pzxtq05/ZDQ/NYsODAns4/mXrNxXRlHkaZi8I8jDIXhXnon74WHRFxEPC3lFtiT8vMb7fCzwIL\nu7osBL4CbKAUHguBbzbnGgBeAjxHWen42YiYnZk7W323ZOYPxzq+TZu2MDy8c+8NW+2Hemi7ceML\nYz73ZBkYmM3Q0LyeczHdmIdR5qIwD6PMRWEeik4e+qFvRUdEzKLcEns4cEpm/lNXk5XASa3284HF\nwDWZORIRq5p4Z7PpicB24AnKhtUfU26j/VITPxlY1csYh4d3smPH2CdOL5Os13NPtqk23oliHkaZ\ni8I8jDIXhXnon36udPw+5Rkavwlsap7bAbA9MzcCHwKuam6B/RvgWuDbnQeLUR78dWdErKHsBbkD\nuKv1cLBlTfwCyubSK4E393H8kiRpAu3r7pCR5gfKk0JnUQqKda2fTwJk5tNNmwuAL1PuWDm7c6LM\nvA+4HvgA8BDl9tilrc+6AniM8vCx9wHvysxPI0mSpoR9WunIzIHW6zPH0P4h4BV7iN/Iv30KaTu2\nhfIckF09C0SSJO3nZvZ9QJIkqRqLDkmSVIVFhyRJqsKiQ5IkVWHRIUmSqrDokCRJVVh0SJKkKiw6\nJElSFRYdkiSpCosOSZJUhUWHJEmqwqJDkiRVYdEhSZKqsOiQJElVWHRIkqQqLDokSVIVFh2SJKkK\niw5JklSFRYckSarCokOSJFVh0SFJkqqw6JAkSVVYdEiSpCosOiRJUhUWHZIkqQqLDkmSVMXgeDtG\nxFxgNXBpZj7aHDsc+CBwAvBd4O2Z+XetPq8BbgVeDqwALsrM77TibwOuAg4C7gcuy8ytrc+7AzgH\n2Ay8NzNvGe/4JUlSXeNa6WgKgI8DR3aFPg2sA44FPgJ8KiIOafocCnwKuAd4FfB8075zztcD1wAX\nAacDS4AbW+e+GTgG+BXgEuDaiDhnPOOXJEn19Vx0RMQRwErgZV3HT6esYFycxXsoqxkXNE0uAlZl\n5m2Z+XXgfODwiDiliV8O3JqZn83Mx4CLgQsj4oCImA9cCFyemU9k5l9RCpLLeh2/JEmaHONZ6TgV\n+ALlEsqs1vHjgcc7l0May5t2nfijnUBmbgEeB06IiNnAq4EvtvquBOYAr2x+BilFTPvcx49j/JIk\naRL0vKcjM+/svI6IdmgR5dJK23rgkDHEXwQc0I5n5nBEbGjiI8Dzmbmjq+8BEfGSzNzQ6/eQJEl1\njXsj6S7MB7Z1HdsGzB1DfH7r/a7is3cTo3V+SZK0H+tn0bEVeHHXsbmUO0068e4CYS6wsYmxm/hm\nyjh3FaN1/r0aGOjtalIv7QcGZjM4uP/fgdz5Tr3mYroxD6PMRWEeRpmLwjwU/fz+/Sw6nuWn72ZZ\nCDzXii/cRfwrwAZK4bEQ+CZARAwAL2n6zwZ+NiJmZ+bOVt8tmfnDsQ5waGjemL9Mr+2HhuaxYMGB\nPZ1/MvWai+nKPIwyF4V5GGUuCvPQP/0sOlYCSyNibmZ2Ln2cxOjm0JXNewCaO1IWA9dk5khErGri\nnc2mJwLbgScoG1Z/TLmN9ktN/GRgVS8D3LRpC8PDO/fesNV+qIe2Gze+0MtwJsXAwGyGhub1nIvp\nxjyMMheFeRhlLgrzUHTy0A/9LDoeAb4H3BsR1wGvo9yR8uYm/iHgqoj4Y+BvgGuBb3ceLEZ58Ned\nEbGGsqH0DuCu1sPBljXxCyibS69snXtMhod3smPH2CdOL5Os13NPtqk23oliHkaZi8I8jDIXhXno\nn329UDPSedFc9jiLctljNfBG4OzMfKaJP015mugFwJcpd6yc3ep/H3A98AHgIcrtsUtbn3UF8Bjw\n98D7gHdl5qeRJElTwj6tdGTmQNf7bwOn7aH9Q8Ar9hC/kX/7FNJ2bAvlgWLnj2uwkiRpUs3sLbmS\nJKkaiw5JklSFRYckSarCokOSJFVh0SFJkqqw6JAkSVVYdEiSpCosOiRJUhUWHZIkqQqLDkmSVIVF\nhyRJqsKiQ5IkVWHRIUmSqrDokCRJVVh0SJKkKiw6JElSFRYdkiSpCosOSZJUhUWHJEmqwqJDkiRV\nYdEhSZKqsOiQJElVWHRIkqQqLDokSVIVFh2SJKkKiw5JklSFRYckSapisJ8ni4hDgPcDpwAbgNsz\n8/YmtriJHQ08Bbw1Mx9v9T0XuA5YCHwOuCgzN7Ti7wEuoBRK92Tm0n6OXZIkTax+r3TcD/wIOAZ4\nG/DuiDgrIuYDDwKPNLEVwIMRMQ8gIo4D7gauBZYAC4B7OyeNiCuBc4GzgNcD50XEFX0euyRJmkB9\nW+mIiBcBxwMXZua3gG9FxN8Cvwq8GNjcWp14W0T8BvDbwDLgUuC+zPxoc643AU9HxGGZ+TRwOXB1\nZq5o4kspqyK39Gv8kiRpYvVzpWML8AJwfkQMRkQAJwJfoaxeLO9q/w/ACc3rJcCjnUBmPgOsBZZE\nxCLgUOCLrb7LgcMi4uA+jl+SJE2gvhUdmbkNuAx4C6UA+Trw2cz8S2ARsK6ry3rgkOb1nuKLgJGu\n+HpgVqu/JEnaz/V1IylwBPDXwM2UDaPvi4gvAPOBbV1ttwFzm9d7is8HyMztXTFa/cdkYKC3GquX\n9gMDsxkc3P9vBup8p15zMd2Yh1HmojAPo8xFYR6Kfn7/fu7p+FXgQuCQZtXjK83dLFcD3+KnC4S5\nwObm9dY9xLc255/TKjw6bTfTg6Gheb0076n90NA8Fiw4sKfzT6ZeczFdmYdR5qIwD6PMRWEe+qef\nKx3HAP/UFBwdXwHeSdmvsbCr/ULgueb1s3uIP0u5lLKQss+jExtp9R+TTZu2MDy8s6f2Qz203bjx\nhV6GMykGBmYzNDSv51xMN+ZhlLkozMMoc1GYh6KTh37oZ9GxDvjFiBjMzB3NsSOAbwMrgXd0tT8R\n+PPm9UrgJMqdLETEoZT9Gisy87mIWNvEP9a0PxlYm5nrexng8PBOduwY+8TpZZL1eu7JNtXGO1HM\nwyhzUZiHUeaiMA/908+i4zPAjcDdEfFu4BWUQuMdwCeBGyLiVuAuymbTAynP9YDy0LCHI2IlsBq4\nDfhMZq5txW+IiM6qx/XATX0cuyRJmmD9vHtlE+WZHIuALwPvBf4sM+/OzB8Br6U8qXQ1cBxwZmZu\nafquBC6mPBxsOeVpphe0Tn8TcB/wAPAJ4MOdJ51KkqSpoa93r2TmN4AzdhNbDRy7h77LaC6v7CK2\nE7iq+ZEkSVPQzL4PSJIkVWPRIUmSqrDokCRJVVh0SJKkKiw6JElSFRYdkiSpCosOSZJUhUWHJEmq\nwqJDkiRVYdEhSZKqsOiQJElVWHRIkqQqLDokSVIVFh2SJKkKiw5JklSFRYckSarCokOSJFVh0SFJ\nkqqw6JAkSVVYdEiSpCosOiRJUhUWHZIkqQqLDkmSVIVFhyRJqsKiQ5IkVWHRIUmSqrDokCRJVQz2\n82QRMQe4FTgX2AZ8KDPf2cQWA+8HjgaeAt6amY+3+p4LXAcsBD4HXJSZG1rx9wAXUAqlezJzaT/H\nLkmSJla/Vzr+AvhV4NeANwIXRcRFETEfeBB4BDgGWAE8GBHzACLiOOBu4FpgCbAAuLdz0oi4klLI\nnAW8HjgvIq7o89glSdIE6ttKR0QsoKxEnJ6ZjzXHbgaOB3YAm1urE2+LiN8AfhtYBlwK3JeZH236\nvQl4OiIOy8yngcuBqzNzRRNfSlkVuaVf45ckSROrnysdJwE/zMzlnQOZeWNm/j5l9WJ5V/t/AE5o\nXi8BHm31ewZYCyyJiEXAocAXW32XA4dFxMF9HL8kSZpA/dzT8XLgu80qxZ8Ac4C/BN4NLKLs42hb\nDxzVvF4ErNtF/JAmNtIVXw/MauLr+/cVJEnSROln0fHvgf8EXAS8mVIsfAB4AZhP2Vjatg2Y27ze\nU3w+QGZu74rR6j8mAwO9Lez00n5gYDaDg/v/zUCd79RrLqYb8zDKXBTmYZS5KMxD0c/v38+iYwdw\nEPDG5vIIEXEYcAnwTX66QJgLbG5eb91DfGtzrjmtwqPTdjM9GBqa10vzntoPDc1jwYIDezr/ZOo1\nF9OVeRhlLgrzMMpcFOahf/pZdDwHbO0UHI2k7Md4mHIrbNvCpg/As3uIP0u5lLKQss+jExtp9R+T\nTZu2MDy8s6f2Qz203bjxBbZv385TTz055s/4z//5aObMmTPm9vtqYGA2Q0Pzes7FdGMeRpmLwjyM\nMheFeSg6eeiHfhYdK4ADIuIXM/P/NMeOBL4DrATe0dX+RODPm9crKRtRlwFExKGU/RorMvO5iFjb\nxD/WtD8ZWJuZPe3nGB7eyY4dY584vUyyzrmfeOIJ1p1x2k82q+zJGmD4oYdZvPjYMX9Ov/Sai+nK\nPIwyF4V5GGUuCvPQP30rOjLznyLiQeDeiLiEsqdjKfBnwCeBGyLiVuAu4C3AgcD9Tff3Aw9HxEpg\nNXAb8JnMXNuK3xARnVWP64Gb+jX2fjsKePUY226cyIFIkrQf6ffumPOA/0O5vfVe4H2Z+T8z80fA\na4FTKEXFccCZmbkFIDNXAhdTHg62HNhAeeZHx03AfcADwCeAD2fm7X0euyRJmkB9fQx6U1y8ufnp\njq0GdnsdITOX0Vxe2UVsJ3BV8yNJkqagmX0fkCRJqsaiQ5IkVWHRIUmSqrDokCRJVVh0SJKkKiw6\nJElSFRYdkiSpCosOSZJUhUWHJEmqwqJDkiRVYdEhSZKqsOiQJElVWHRIkqQqLDokSVIVFh2SJKkK\niw5JklSFRYckSarCokOSJFVh0SFJkqqw6JAkSVVYdEiSpCosOiRJUhUWHZIkqQqLDkmSVIVFhyRJ\nqsKiQ5IkVWHRIUmSqhicqBNHxIPA+sy8oHm/GHg/cDTwFPDWzHy81f5c4DpgIfA54KLM3NCKvwe4\ngFIo3ZOZSydq7JIkqf8mZKUjIt4AnNl6Px94EHgEOAZYATwYEfOa+HHA3cC1wBJgAXBvq/+VwLnA\nWcDrgfMi4oqJGLskSZoYfS86ImIBcCPw5dbhNwCbM3NpFm8DfgT8dhO/FLgvMz+amU8BbwJ+IyIO\na+KXA1dn5orMfARYClzW77FLkqSJMxErHTcDy4Cvt44dDyzvavcPwAnN6yXAo51AZj4DrAWWRMQi\n4FDgi62+y4HDIuLg/g5dkiRNlL4WHRFxOnAyZW9G2yJgXdex9cAhY4gvAka64uuBWa3+kiRpP9e3\njaQRMZeyUfSSzNwWEe3wfGBbV5dtwNwxxOcDZOb2rhit/mMyMNBbjdVL+4GB2QwOzh7XZwwO1ruJ\nqDO+Xsc53ZiHUeaiMA+jzEVhHop+fv9+3r3yp8DqzPz8LmJb+ekCYS6weQzxrQARMadVeHTabqYH\nQ0PzemneU/uhoXksWHDguD5jwYIDe+rTD72Oc7oyD6PMRWEeRpmLwjz0Tz+Ljt8FDo6IHzXv5wJE\nxG8BH6PcCtu2EHiuef3sHuLPUi6lLKTs8+jERlr9x2TTpi0MD+/sqf1QD203bnyhpz7tfrUMDMxm\naGhez7mYbszDKHNRmIdR5qIwD0UnD/3Qz6LjVODftd7fSCkMljax7udqnAj8efN6JXASZQMqEXEo\nZb/Gisx8LiLWNvGPNe1PBtZm5vpeBjg8vJMdO8Y+cXqZZJ1z9zoxex1Tv0zW5+5vzMMoc1GYh1Hm\nojAP/dO3oiMzv9d+36x4jGTmtyPiB8D1EXErcBfwFuBA4P6m+fuBhyNiJbAauA34TGaubcVviIjO\nqsf1wE39GrskSZp4VXbHZOaPgP8GnEIpKo4DzszMLU18JXAx5eFgy4ENlKePdtwE3Ac8AHwC+HBm\n3l5j7JIkqT8m7DHomXl+1/vVwLF7aL+M5vLKLmI7gauaH0mSNAXN7PuAJElSNRYdkiSpCosOSZJU\nhUWHJEmqwqJDkiRVYdEhSZKqsOiQJElVWHRIkqQqLDokSVIVFh2SJKkKiw5JklSFRYckSarCokOS\nJFVh0SFJkqqw6JAkSVVYdEiSpCosOiRJUhUWHZIkqYrByR6AYPv27axZ8+SY2x911NHMmTNnAkck\nSVL/WXTsB9aseZJ1Z5zGUWNpC/DQwyxefOwEj0qSpP6y6NhPHAW8eoxtN07kQCRJmiDu6ZAkSVVY\ndEiSpCosOiRJUhUWHZIkqQqLDkmSVEVf716JiJcCfwGcBmwGPgG8IzO3R8ThwAeBE4DvAm/PzL9r\n9X0NcCvwcmAFcFFmfqcVfxtwFXAQcD9wWWZu7ef4JUnSxOn3SscngQOAXwbeAPwmcF0T+ytgHXAs\n8BHgUxFxCEBEHAp8CrgHeBXwPPDpzkkj4vXANcBFwOnAEuDGPo9dkiRNoL4VHRERwHHAmzPzG5n5\nD5RC4Y0RcRrwMuDiLN5DWc24oOl+EbAqM2/LzK8D5wOHR8QpTfxy4NbM/GxmPgZcDFwYEQf0a/yS\nJGli9XOl4/vAmZn5fNfxn6GsTDzedTlkOeVSC8DxwKOdQGZuAR4HToiI2ZTnZn2x1XclMAd4ZR/H\nL0mSJlDfio7M/JfM/FznfUTMAi4DvgAsolxaaVsPHNK83lP8RZRLNj+JZ+YwsKHVX5Ik7ecm8u6V\nm4DFwDuB+cC2rvg2YG7zek/x+a33u+svSZL2cxPyt1ci4gbKPozfycyvRcRW4MVdzeZS7nAB2MpP\nFxBzKX9mZGvr/e76j8nAQG81Vi/tBwZmMzg4e0I/o/uzxqPzeeP53OnEPIwyF4V5GGUuCvNQ9PP7\n973oiIj3UTZ6npeZnTtQngWO7Gq6EHiuFV+4i/hXKJdRtjbvv9l8xgDwklb/MRkamtdL857aDw3N\nY8GCAyf0M7o/a1+M53OnI/MwylwU5mGUuSjMQ//0+zkd1wJ/APxuZn6qFVoJLI2IuZnZuUxyEqOb\nQ1c27zvnmU+5NHNNZo5ExKom3tlseiKwHXiil/Ft2rSF4eGdPbUf6qHtxo0v9NSn0w/ouc/GjS/0\n0GPUwMBshobm9ZyL6cY8jDIXhXkYZS4K81B08tAPfSs6IuII4GrgvwNfioiDW+FHgO8B90bEdcDr\nKHekvLmJfwi4KiL+GPgb4Frg25nZKTLuAO6MiDWUDaV3AHf1+nCw4eGd7Ngx9onTyyTrnLvXiTme\nidzr95ioc0wH5mGUuSjMwyhzUZiH/unnharXNee7mlIYrKNc/liXmTuBsymXSFYDbwTOzsxnADLz\naeAcynM7vky5Y+Xszokz8z7geuADwEOUZ3ws7ePYJUnSBOvbSkdm3gDcsIf4tyiPR99d/CHgFXuI\n34hPIZUkacqa2VtyJUlSNRYdkiSpCosOSZJUxYQ8HEx1bN++nTVrnhxT26OOOpo5c+ZM8IgkSdo9\ni44pbM2aJ1l3xmkctbd2AA89zOLFx1YYlSRJu2bRMcUdRXngyd5snOiBSJK0F+7pkCRJVVh0SJKk\nKiw6JEm9XatTAAAIu0lEQVRSFRYdkiSpCosOSZJUhUWHJEmqwltmZ5jt27ezatXX2LRpC8PDe/9T\nzT5UTJLULxYdM8xTTz3J915z6l4fKAY+VEyS1F8WHTPQWB8oBj5UTJLUP+7pkCRJVVh0SJKkKiw6\nJElSFRYdkiSpCjeSaq+2b9/OmjVPjrm9t9lKknbFokN7tWbNk6w74zRvs5Uk7ROLDo2Jt9lKkvaV\nezokSVIVrnRoQrgPRJLUzaJDE8J9IJKkbhYdmjDj2QfSywqJqyOSNLVYdGi/MtYVkvbqyHgu5QwO\nHrAvw5QkjcOUKjoiYi5wB3AOsBl4b2beMrmjUr+NdYWkszoynks5r371WNdgJEn9MqWKDuBm4Bjg\nV4DDgWUR8d3MfGAyB6XJ1+ulnO3bt7Nq1dfYtGkLw8M7937+5lKOl38kafymTNEREfOBC4EzMvMJ\n4ImIuBG4DLDoUE+eeupJvveaU3ve6Frr8o/FiqTpaMoUHcArKeNd0Tq2HPiTyRmOprrxPvCsxuWf\no4462kJF0rQzlYqORcDzmbmjdWw9cEBEvCQzN0zSuKS96rXAqVmo9HKpaTyXmTr9gHGPz0ta0vQw\nlYqO+cC2rmOd93PHcoKBgd4ewDowMLv8Qt+LNcChA7MZHJw95j7tfp3XvfTp5bPafWbPnlVlfOPp\nM97vNJ6cjycPNcfXq4GB2XzjG2v4/BmncfgY2n8XGPj8IxxzzLF89atP8dnTT9lrv3aff/zH3j8L\nGNf4xvpZ7T4Ajz/+2Bg+qTjmmGMZGJjNqlWr+Nd/3crOnSNj6jOez+kYa7/x9Gn3G0+fXnLRHl8t\n4815rzr/ZvT6b8d4jWdO1NDP7z9rZGTv/8+1P4iI3wL+IjNf2jr2Csrv6Zdk5g8nbXCSJGmvptLf\nXnkW+NmIaI95IbDFgkOSpP3fVCo6vgr8GFjSOnYysGpyhiNJknoxZS6vAETE+4FfBi4ADgHuBd6c\nmZ+ezHFJkqS9m0obSQGuoDyR9O+BfwHeZcEhSdLUMKVWOiRJ0tQ1lfZ0SJKkKcyiQ5IkVWHRIUmS\nqrDokCRJVVh0SJKkKqbaLbM9i4i5lNtszwE2A+/NzFsmd1STIyLOBh4ARoBZzf9+MjN/Z1IHVkkz\nF1YDl2bmo82xw4EPAidQ/nzH2zPz7yZrjLXsJhe3A3/Iv50ff5iZd0zaQCdIRLwU+AvgNMrvhU8A\n78jM7TNpTuwlDzNmPgBExC8A/5PyLKgNwP/IzJub2OHMnDmxpzzs85yYCSsdNwPHAL8CXAJcGxHn\nTOqIJs+RwF9THh+/kPKXe39/UkdUSfOP7McpOWj7NLAOOBb4CPCpiDik8vCq2kMujgCWUuZFZ358\nqO7oqvkkcADlF+sbgN8Ermtif8XMmRN7ysOMmQ8RMQt4kPKXy38JeAtwdUS8oWkyI+bEGPKwz3Ni\nWq90RMR84ELgjMx8AngiIm4ELqP8F/9McwTwVGb+YLIHUlNEHAF8bBfHTwdeDizJzK3AeyLiVylP\nvP2zuqOsY3e5aBwB3JiZ/1xxSNVFRADHAQdn5vPNsWuAmyLib4GXAcdP9zmxpzxQ/mGZEfOhcTDw\nFeCSzHwB+FZEfAE4KSLWM0PmBHvIA/D/0oc5Ma2LDuCVlO+4onVsOfAnkzOcSXckMC2XBPfiVOAL\nwNWUJeSO44HHm18kHcspS6jT1S5zEREHAT8HfHOSxlXT94EzO//QtvwM5W87zZQ5sas8zAJ+ZobN\nBzLz+8C5nfcR8cuUv+11CTNoTuwmD6cAb+nXnJjuRcci4PnM3NE6th44ICJekpkbJmlckyWAX4+I\ndwIDwP3ANZn548kd1sTKzDs7r8t/3P3EIsqSadt6yt/1mZb2kIsjKNdnr46IMynXcm/JzGV1Rzjx\nMvNfgM913jdLypdRirEZMyf2kIfPM4PmQ7eI+C5wKPA3lBXx25ghc6JtF3k4jj7Miem+p2M+sK3r\nWOf93MpjmVQR8fPAPGAL8NvAlcB5wI2TOa5Jtrv5MaPmRuMVwE7ga8CZwN3AXRFx1qSOqo6bgMXA\nO5nZc+ImynX8q5nZ8+Ecyt6WXwJuZebOiU4eFlMKr6APc2K6r3Rs5acnRuf9ZmaQzFzbrO78sDn0\njxExAPyviLgiM2fiH+HZCry469hcZtjcAMjMZRHx16358VRE/CfgrZRNdNNSRNwAXA78TmZ+LSJm\n5JzozgPwtZk4HwAy83GAiLgC+ChwD7Cgq9m0nxOtPLydsnl2CNjnOTHdVzqeBX42ItrfcyGwpZW4\nGWMX3/nrlJ3r3b9kZ4pnKfOhbSHw3CSMZdLtZn783GSMpYaIeB/wduC81l+rnnFzYjd5mFHzISL+\n4y7+i/1rwBzK/+1nxJzYSx4O6secmO5Fx1eBH1M2AnWcDKyanOFMnoj4rxHxfEQc0Dq8GNgwA/e2\ndKwEjmluIe04qTk+o0TE/xMR3ZuMFwPfmIzxTLSIuBb4A+B3M/P+VmhGzYnd5WGmzQfK3SkPRES7\nuHgV8M+UTaPHzpA5sbs8/AD4o37MiWl9eSUzt0TEMuDOiLiAsvHnSuDNkzqwyfElynLg3RHxZ8Av\nUPZz3DCpo5pcjwDfA+6NiOuA1wGvZmbOj88A/3ezpPxp4Azg/6I832ZaaW4bvhr478CXIuLgVnjG\nzIm95GHGzIfGKsrD8v6y+c4vo/xu/HPgUWbInGDPeVhJH+bEdF/pALgCeAz4e+B9wLvaS4gzRWb+\nK2WS/AfKxPogcGdmvndSB1bfT/auZOZO4CzKUulq4I3A2Zn5zCSNrbZ2LlYDvwX8HvAk5S6GczPz\ny5M0ton0OsrvvqspdyWsoyyVr2vmxNnMjDmxpzzMpPnQ/l3wAuU/0O4Cbs/M/9HEXscMmBN7yUNf\n5sSskZGZuH9QkiTVNhNWOiRJ0n7AokOSJFVh0SFJkqqw6JAkSVVYdEiSpCosOiRJUhUWHZIkqQqL\nDkmSVIVFhyRJqsKiQ5IkVWHRIUmSqvj/Aa/2q0pDQpg1AAAAAElFTkSuQmCC\n",
248 | "text/plain": [
249 | ""
250 | ]
251 | },
252 | "metadata": {},
253 | "output_type": "display_data"
254 | },
255 | {
256 | "data": {
257 | "text/html": [
258 | "\n",
259 | "
\n",
260 | " \n",
261 | " \n",
262 | " | \n",
263 | " Number_of_entries_per_person | \n",
264 | " Count | \n",
265 | "
\n",
266 | " \n",
267 | " \n",
268 | " \n",
269 | " | 0 | \n",
270 | " 1 | \n",
271 | " 15714 | \n",
272 | "
\n",
273 | " \n",
274 | " | 1 | \n",
275 | " 2 | \n",
276 | " 5878 | \n",
277 | "
\n",
278 | " \n",
279 | " | 2 | \n",
280 | " 3 | \n",
281 | " 2675 | \n",
282 | "
\n",
283 | " \n",
284 | " | 3 | \n",
285 | " 4 | \n",
286 | " 1527 | \n",
287 | "
\n",
288 | " \n",
289 | " | 4 | \n",
290 | " 5 | \n",
291 | " 1038 | \n",
292 | "
\n",
293 | " \n",
294 | " | 5 | \n",
295 | " 6 | \n",
296 | " 670 | \n",
297 | "
\n",
298 | " \n",
299 | " | 6 | \n",
300 | " 7 | \n",
301 | " 490 | \n",
302 | "
\n",
303 | " \n",
304 | " | 7 | \n",
305 | " 8 | \n",
306 | " 388 | \n",
307 | "
\n",
308 | " \n",
309 | " | 8 | \n",
310 | " 9 | \n",
311 | " 335 | \n",
312 | "
\n",
313 | " \n",
314 | " | 9 | \n",
315 | " 10 | \n",
316 | " 207 | \n",
317 | "
\n",
318 | " \n",
319 | " | 10 | \n",
320 | " 11 | \n",
321 | " 195 | \n",
322 | "
\n",
323 | " \n",
324 | " | 11 | \n",
325 | " 12 | \n",
326 | " 175 | \n",
327 | "
\n",
328 | " \n",
329 | " | 12 | \n",
330 | " 13 | \n",
331 | " 124 | \n",
332 | "
\n",
333 | " \n",
334 | " | 13 | \n",
335 | " 14 | \n",
336 | " 90 | \n",
337 | "
\n",
338 | " \n",
339 | " | 14 | \n",
340 | " 15 | \n",
341 | " 73 | \n",
342 | "
\n",
343 | " \n",
344 | " | 15 | \n",
345 | " 16 | \n",
346 | " 47 | \n",
347 | "
\n",
348 | " \n",
349 | " | 16 | \n",
350 | " 17 | \n",
351 | " 56 | \n",
352 | "
\n",
353 | " \n",
354 | " | 17 | \n",
355 | " 18 | \n",
356 | " 44 | \n",
357 | "
\n",
358 | " \n",
359 | " | 18 | \n",
360 | " 19 | \n",
361 | " 34 | \n",
362 | "
\n",
363 | " \n",
364 | " | 19 | \n",
365 | " 20 | \n",
366 | " 19 | \n",
367 | "
\n",
368 | " \n",
369 | " | 20 | \n",
370 | " 21 | \n",
371 | " 16 | \n",
372 | "
\n",
373 | " \n",
374 | " | 21 | \n",
375 | " 22 | \n",
376 | " 16 | \n",
377 | "
\n",
378 | " \n",
379 | " | 22 | \n",
380 | " 23 | \n",
381 | " 5 | \n",
382 | "
\n",
383 | " \n",
384 | " | 23 | \n",
385 | " 24 | \n",
386 | " 7 | \n",
387 | "
\n",
388 | " \n",
389 | " | 24 | \n",
390 | " 25 | \n",
391 | " 1 | \n",
392 | "
\n",
393 | " \n",
394 | " | 25 | \n",
395 | " 28 | \n",
396 | " 3 | \n",
397 | "
\n",
398 | " \n",
399 | " | 26 | \n",
400 | " 32 | \n",
401 | " 1 | \n",
402 | "
\n",
403 | " \n",
404 | "
\n",
405 | "
"
406 | ],
407 | "text/plain": [
408 | " Number_of_entries_per_person Count\n",
409 | "0 1 15714\n",
410 | "1 2 5878\n",
411 | "2 3 2675\n",
412 | "3 4 1527\n",
413 | "4 5 1038\n",
414 | "5 6 670\n",
415 | "6 7 490\n",
416 | "7 8 388\n",
417 | "8 9 335\n",
418 | "9 10 207\n",
419 | "10 11 195\n",
420 | "11 12 175\n",
421 | "12 13 124\n",
422 | "13 14 90\n",
423 | "14 15 73\n",
424 | "15 16 47\n",
425 | "16 17 56\n",
426 | "17 18 44\n",
427 | "18 19 34\n",
428 | "19 20 19\n",
429 | "20 21 16\n",
430 | "21 22 16\n",
431 | "22 23 5\n",
432 | "23 24 7\n",
433 | "24 25 1\n",
434 | "25 28 3\n",
435 | "26 32 1"
436 | ]
437 | },
438 | "execution_count": 6,
439 | "metadata": {},
440 | "output_type": "execute_result"
441 | }
442 | ],
443 | "source": [
444 | "grouped_df = train.groupby('Patient_ID').agg('size').reset_index()\n",
445 | "grouped_df.columns = [\"Patient_ID\", \"Number_of_entries_per_person\"]\n",
446 | "grouped_df = grouped_df.groupby('Number_of_entries_per_person').agg('size').reset_index()\n",
447 | "grouped_df.columns = [\"Number_of_entries_per_person\", \"Count\"]\n",
448 | "plt.bar(grouped_df.Number_of_entries_per_person, grouped_df.Count, color='r')\n",
449 | "plt.show()\n",
450 | "grouped_df"
451 | ]
452 | },
453 | {
454 | "cell_type": "markdown",
455 | "metadata": {},
456 | "source": [
457 | "So 15,714 patients have 1 entry, 5878 have 2 entries and so on.\n",
458 | "\n",
459 | "Now let us look at the number of health camps in train data and their distribution"
460 | ]
461 | },
462 | {
463 | "cell_type": "code",
464 | "execution_count": 7,
465 | "metadata": {
466 | "collapsed": false
467 | },
468 | "outputs": [
469 | {
470 | "name": "stdout",
471 | "output_type": "stream",
472 | "text": [
473 | "Number of Health Camp ID in the train dataset : 44\n"
474 | ]
475 | }
476 | ],
477 | "source": [
478 | "print \"Number of Health Camp ID in the train dataset : \", len(train.Health_Camp_ID.unique())"
479 | ]
480 | },
481 | {
482 | "cell_type": "code",
483 | "execution_count": 8,
484 | "metadata": {
485 | "collapsed": false
486 | },
487 | "outputs": [
488 | {
489 | "data": {
490 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAg0AAAFoCAYAAADUycjgAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAAPYQAAD2EBqD+naQAAIABJREFUeJzt3XucXVV58PFfZkJCAgQiKokQueoSAiIqlwCiIn0t2lfx\n1opoC9iIt9eiUKKtFRVbBKygrWBVFBCpSAWtoqK1CEaJBFEUxIUIJFxChHCZQG5kZt4/nrWZnZMz\nmT2TM8mQ/L6fTz45c/ba+9l77bXXfvb1jOvv70eSJGkoXRt7BiRJ0lODSYMkSWrEpEGSJDVi0iBJ\nkhoxaZAkSY2YNEiSpEZMGiRJUiMmDZIkqRGTBkmS1Mj44RROKf0N8BWgHxhX+78v5zw+pbQfcB6w\nD3Az8K6c84218Y8GTgOmAT8EZuecl9SGfxI4nkhmzs85z1mPZZMkSR003DMNXyd2+NPL/zsDtwPn\npJQmA1cC1wAvBK4DrkwpTQJIKR0AfAk4FTgImApcUE04pXQScDTwWuANwDEppQ+MdMEkSVJnjVuf\n355IKX0IOA6YCbwN+Iec8x614bcBn8g5X5RSuhDozTkfX4btBCwAdss5L0gpLQA+nHP+ahl+DHBa\nznm3Ec+gJEnqmBHf05BSmgqcAszJOT8BHAjMbSn2M2BW+XwQcG01IOd8D7AQOCilNB2YAfy0Nu5c\nYOeU0g4jnUdJktQ563Mj5LuBe3POV5S/pwP3tZRZDOzUYPh04v6I+1qGjauNL0mSNqL1SRreDny2\n9vdkYGVLmZXAxAbDJwPknFe1DKM2viRJ2oiG9fREJaW0P7AjcGnt6xWsvYOfCCxrMHxFme6EWuJQ\nlV1GQ/39/f3jxo1rWlySJA0Ycgc6oqQBeCVwbc750dp39xJPVNRNAxY1GH4vMbPTiPscqmH9tfGH\nNG7cOHp6ltPb2zdome7uLqZMmdSxcqMxTWMb29jGNraxOzHN4Zg6dashy4w0aWh30+M8oPW9CgcD\nn6gNPxS4CCClNIO4X+G6nPOilNLCMvySUv4lwMKc8+LhzFhvbx+rVw9diZ0uZ2xjG9vYxjb2WIzd\nSSNNGvYGvtry3X8Bp6eUzga+ALwT2Aq4rAw/D7g6pTQPuAE4B/hOznlhbfgZKaXqrMPpwFkjnD9J\nktRhI70R8pnAw/Uvcs5Lgb8ADiOSggOAI3POy8vwecAJxMud5gJLiLc/Vs4i7pG4HPgGcGHO+TMj\nnD9JktRhIzrTkHNue+Ej53wD8KJ1jHcR5fJEm2F9wMnlnyRJGmP8wSpJktSISYMkSWrEpEGSJDVi\n0iBJkhoxaZAkSY2YNEiSpEZMGiRJUiMmDZIkqRGTBkmS1IhJgyRJasSkQZIkNWLSIEmSGjFpkCRJ\njZg0SJKkRkb009hj1fz58+npWU5vb1/b4TNn7sP48Vtu4LmSJGnTsEklDbcccAAzBxsGcNXV7L//\n/htwjiRJ2nRsUknDTGBdKcHDG2pGJEnaBHlPgyRJasSkQZIkNWLSIEmSGjFpkCRJjZg0SJKkRkwa\nJElSIyYNkiSpEZMGSZLUiEmDJElqxKRBkiQ1YtIgSZIaMWmQJEmNmDRIkqRGTBokSVIjJg2SJKmR\n8cMdIaU0ATgbOBpYCXw55/yPZdh+wHnAPsDNwLtyzjfWxj0aOA2YBvwQmJ1zXlIb/kngeCKZOT/n\nPGeEyyVJkjpsJGcaPgu8Avgz4C3A7JTS7JTSZOBK4BrghcB1wJUppUkAKaUDgC8BpwIHAVOBC6qJ\nppROIhKR1wJvAI5JKX1gZIslSZI6bVhnGlJKU4kzAYfnnH9ZvvsUcCCwGlhWOztwYkrpVcCbgIuA\n9wCX5py/VsZ7G7AgpbRzznkB8D7gwznn68rwOcRZiU+v5zJKkqQOGO6ZhkOBR3LOc6svcs5n5pz/\nljh7MLel/M+AWeXzQcC1tfHuARYCB6WUpgMzgJ/Wxp0L7JxS2mGY8yhJkkbBcO9p2A24q5wl+Adg\nAvAV4J+B6cR9DHWLgZnl83TgvjbDdyrD+luGLwbGleGLhzmfkiSpw4abNGwNPBeYDRxL7Oz/A3gc\nmEzcGFm3EphYPq9r+GSAnPOqlmHUxpckSRvRcJOG1cA2wFvK5QVSSjsD7wZuY+0d/ERgWfm8Yh3D\nV5RpTaglDlXZZXRId3cX3d1dT34eqmyTcsMpa2xjG9vYxjb2hpxmpw03aVgErKgShiIT9yNcTTxK\nWTetjANw7zqG30tciphG3OdQDeuvjb/epkyZxJQpk5783HSc4Ux/Y5QztrGNbWxjG3tDGG7ScB2w\nZUppj5zz7eW7vYA7gXnAh1rKHwx8onyeR9xIeRFASmkGcb/CdTnnRSmlhWX4JaX8S4CFOeeO3c/Q\n07Ocnp7lTJkyiZ6e5fT29g1atru7q1G54ZTtdDljG9vYxja2sTtl6tSthiwzrKQh5/yHlNKVwAUp\npXcT9zTMAT4OfBM4I6V0NvAF4J3AVsBlZfTzgKtTSvOAG4BzgO/knBfWhp+RUqrOOpwOnDWc+RtK\nb2/fkxXc29vH6tVDV3bTcqMxTWMb29jGNraxOzHNThnJBZFjgNuJxyMvAP4t5/y5nPNS4NXAYURS\ncABwZM55OUDOeR5wAvFyp7nAEuKdD5WzgEuBy4FvABfmnD8zgvmTJEmjYNivkS7JwbHlX+uwG4AX\nrWPciyiXJ9oM6wNOLv8kSdIY4w9WSZKkRkwaJElSIyYNkiSpEZMGSZLUiEmDJElqxKRBkiQ1YtIg\nSZIaMWmQJEmNmDRIkqRGTBokSVIjJg2SJKkRkwZJktSISYMkSWrEpEGSJDVi0iBJkhoxaZAkSY2Y\nNEiSpEZMGiRJUiMmDZIkqRGTBkmS1IhJgyRJasSkQZIkNWLSIEmSGjFpkCRJjZg0SJKkRkwaJElS\nIyYNkiSpEZMGSZLUiEmDJElqxKRBkiQ1YtIgSZIaMWmQJEmNmDRIkqRGxg93hJTSUcDlQD8wrvz/\nzZzzX6aU9gPOA/YBbgbelXO+sTbu0cBpwDTgh8DsnPOS2vBPAscTycz5Oec5I10wSZLUWSM507AX\n8N/Ejn8aMB3425TSZOBK4BrghcB1wJUppUkAKaUDgC8BpwIHAVOBC6qJppROAo4GXgu8ATgmpfSB\nES2VJEnquGGfaQD2BG7OOT9Q/zKldDywrHZ24MSU0quANwEXAe8BLs05f62UfxuwIKW0c855AfA+\n4MM55+vK8DnEWYlPj2AeJUlSh430TMNtbb4/EJjb8t3PgFnl80HAtdWAnPM9wELgoJTSdGAG8NPa\nuHOBnVNKO4xgHiVJUoeN5ExDAv48pfSPQDfwDeKSw3TiPoa6xcDM8nk6cF+b4TuVYf0twxcT90zs\nVD5LkqSNaFhJQ0rp2cAkYDlx2WFX4LPA5PJvZcsoK4GJ5fO6hk8GyDmvahlGbfz11t3dRXd315Of\nhyrbpNxwyhrb2MY2trGNvSGn2WnDShpyzgtTStvnnB8pX/0mpdQNXAxczdo7+InAsvJ5xTqGrwBI\nKU2oJQ5V2WV0yJQpk5gyZdKTn5uOM5zpb4xyxja2sY1tbGNvCMO+PFFLGCq3AlsC9xNPU9RNAxaV\nz/euY/i9xKWIacR9DtWw/tr4662nZzk9PcuZMmUSPT3L6e3tG7Rsd3dXo3LDKdvpcsY2trGNbWxj\nd8rUqVsNWWa4lyf+D3AJsFPOeUX5ej/gQeImxg+1jHIw8InyeR5wKPEkBSmlGcT9CtflnBellBaW\n4ZeU8i8BFuacO3Y/Q29v35MV3Nvbx+rVQ1d203KjMU1jG9vYxja2sTsxzU4Z7pmGnxOXC76UUvo4\nsDtwJnAG8E3gjJTS2cAXgHcCWwGXlXHPA65OKc0DbgDOAb6Tc15YG35GSqk663A6cNZIF0ySJHXW\nsO6iyDk/BrwSeAYwH/gi8Pmc87/mnJcCrwYOI5KCA4Ajc87Ly7jzgBOIJy3mAkuItz9WzgIuJd42\n+Q3gwpzzZ0a+aJIkqZNGck/DrUTi0G7YDcCL1jHuRZTLE22G9QEnl3+SJGmM8QerJElSIyYNkiSp\nEZMGSZLUiEmDJElqxKRBkiQ1YtIgSZIaMWmQJEmNmDRIkqRGTBokSVIjJg2SJKkRkwZJktSISYMk\nSWrEpEGSJDVi0iBJkhoxaZAkSY2YNEiSpEZMGiRJUiMmDZIkqRGTBkmS1IhJgyRJasSkQZIkNWLS\nIEmSGjFpkCRJjZg0SJKkRkwaJElSIyYNkiSpEZMGSZLUiEmDJElqxKRBkiQ1YtIgSZIaMWmQJEmN\njN/YM7ChrVq1ivnzf0dPz3J6e/valpk5cx/Gj99yA8+ZJElj24iThpTSlcDinPPx5e/9gPOAfYCb\ngXflnG+slT8aOA2YBvwQmJ1zXlIb/kngeOLsx/k55zkjnbd1ufnm33L3ES9l5iDDbwG46mr233//\n0QgvSdJT1oguT6SU3gwcWft7MnAlcA3wQuA64MqU0qQy/ADgS8CpwEHAVOCC2vgnAUcDrwXeAByT\nUvrASOatiZnA/oP8GyyZkCRpczfspCGlNBU4E7i+9vWbgWU55zk5nAgsBd5Uhr8HuDTn/LWc883A\n24BXpZR2LsPfB3w453xdzvkaYA7w3pEtkiRJGg0jOdPwKeAi4NbadwcCc1vK/QyYVT4fBFxbDcg5\n3wMsBA5KKU0HZgA/rY07F9g5pbTDCOZPkiSNgmElDSmlw4GXEPcm1E0H7mv5bjGwU4Ph04H+luGL\ngXG18SVJ0kbW+EbIlNJE4kbHd+ecV6aU6oMnAytbRlkJTGwwfDJAznlVyzBq43dEd3cXXV3jGpXr\n7u568nOT8k3KdrqcsY1tbGMb29gb0nCenvgocEPO+X/aDFvB2jv4icCyBsNXAKSUJtQSh6rsMjpo\nypRJjctVZZuOM9zpd7KcsY1tbGMb29gbwnCShr8CdkgpLS1/TwRIKb0RuIR4lLJuGrCofL53HcPv\nJS5FTCPuc6iG9dfG74ienuV0dY1j6wblHnzwUe688zYee2wFfX39g5bde+99mDRpS6ZMmbTOdz9A\nZIWdLDca0zS2sY1tbGNvGrGHa+rUrYYsM5yk4aXAFrW/zyR27HPKsNb3KhwMfKJ8ngccStxASUpp\nBnG/wnU550UppYVl+CWl/EuAhTnnxcOYvyFF5Q59Oqe3t4+bbrppne9zgHinQ2/tnQ69vX2sXj30\nCux0OWMb29jGNraxN4TGSUPO+e763+WMQ3/O+Y6U0gPA6Smls4EvAO8EtgIuK8XPA65OKc0DbgDO\nAb6Tc15YG35GSqk663A6cNbIF6szqvc5rMvDG2JGJEkaAzpyF0XOeSnwF8BhRFJwAHBkznl5GT4P\nOIF4udNcYAnx9sfKWcClwOXAN4ALc86f6cS8SZKkzhjxa6Rzzse1/H0D8KJ1lL+IcnmizbA+4OTy\n7ynH37OQJG0ONrsfrBoN/p6FJGlzYNLQIUPd//AwnpGQJD21mTRsQE3PSOy7775DJhdggiFJ2rBM\nGjawJmckhkouwEsekqQNz6RhjPJxT0nSWLPhX1wtSZKekkwaJElSIyYNkiSpEZMGSZLUiEmDJElq\nxKRBkiQ14iOXT3G+ZVKStKGYNDzFdfItkyYXkqR1MWnYBHTiLZO+YVKSNBSThs1Ik+RCkqTBmDRo\nDU3ukQAvZUjS5sikQWvwx7IkSYMxadBa/LEsSVI7vqdBkiQ1YtIgSZIaMWmQJEmNmDRIkqRGTBok\nSVIjJg2SJKkRkwZJktSISYMkSWrEpEGSJDVi0iBJkhoxaZAkSY2YNEiSpEZMGiRJUiMmDZIkqZFh\n/zR2Sml34HPAIcAS4N9zzp8qw3YBvgjMAu4C3p9z/lFt3COAs4HdgOuA2TnnO2vDTwROBrYBLgPe\nm3NeMZIFkyRJnTWsMw0ppXHAlcBi4AXAO4EPp5TeXIp8G7gPeBFwMXBFSmmnMu4M4ArgfODFwIPA\nt2rTfgPwEWA2cDhwEHDmSBdMkiR11nAvT+wA/Ap4d875jznnHwA/Bg5NKb0c2BU4IYdPEmcTji/j\nzgbm55zPyTnfChwH7JJSOqwMfx9wds75+znnXwInAG9PKW25XksoSZI6YlhJQ875/pzz0TnnxwFS\nSocALwF+QpwZuLHlcsJc4lIFwIHAtbVpLQduBGallLqA/YGf1sadB0wA9h3OPEqSpNEx4hshU0p3\nEUnAdcDlwHTi0kTdYmCn8nldw7cDtqwPzzn3EvdM7IQkSdrohn0jZM3rgWnAecTNjZOBlS1lVgIT\ny+d1DZ9c+3uw8ddbd3cXXV3jOlZuNKb5VIrd3d315Oehynay3GhM09jGNraxN7XYo2HESUPO+UaA\nlNIHgK8RNzhObSk2EVhWPq9g7QRgIvBwGcYgw5fRIVOmTOpoudGY5lMpdlV+Y8+HsY1tbGMbe8MY\nVtKQUnomMCvn/O3a178j7j1YBOzZMsq08j3AveXv1uG/Ii5DrCh/31ZidQPb18Zfbz09y+nqGsfW\nHSo3GtN8KsXu6VnOlCmT6OlZTm9v36Blu7u7OlpuNKZpbGMb29ibWuzhmjp1qyHLDPdMw67A5Sml\nHXPO95fvXgz8ibjp8e9TShNzztVlhkMZuLlxXvkbgJTSZGA/4CM55/6U0vwyvLpZ8mBgFXDTMOdx\nUFG5Q5/OaVpuNKb5VIpdNdbe3j5Wrx664Xa6nLGNbWxjG7tzSUMTw00a5gM3AF8plyV2Bc4APkHs\n7O8GLkgpnQa8hngi4tgy7peBk1NKpwDfBU4F7sg5V0nCucDnU0q3EDdEngt8wZc7SZI0Ngz3kcs+\n4LXA48DPgS8An8k5/3sZ9hriEsMNwFuAo3LO95RxFxA3Tx4PXE88MXFUbdqXAqcD/wFcRTyVMWd9\nFk6SJHXOsG+ELJcl3jjIsDuAl69j3KuA561j+Jn4FkhJksYkf7BKkiQ1sj7vadBmbtWqVcyf/7t1\n3sE7c+Y+jB/vm8AlaVNg0qARu/nm33L3ES9l5iDDbwG46mr233//DThXkqTRYtKg9TKTeERmMA9v\nqBmRJI0672mQJEmNmDRIkqRGTBokSVIjJg2SJKkRkwZJktSISYMkSWrERy416nwJlCRtGkwaNOp8\nCZQkbRpMGrRB+BIoSXrq854GSZLUiEmDJElqxKRBkiQ1YtIgSZIaMWmQJEmNmDRIkqRGTBokSVIj\nJg2SJKkRX+6kMaPJ66bBV05L0sZi0qAxY6jXTYOvnJakjcmkQWPKUK+bhnjltD+CJUkbnkmDnpKa\n/gjWvvvua3IhSR1i0qCnrCY/guUvbEpS55g0aJPnL2xKUmf4yKUkSWrEpEGSJDVi0iBJkhoxaZAk\nSY2YNEiSpEaG9fRESulZwGeBlwPLgG8AH8o5r0op7QJ8EZgF3AW8P+f8o9q4RwBnA7sB1wGzc853\n1oafCJwMbANcBrw357xixEsmSZI6arhnGr4JbAkcArwZ+L/AaWXYt4H7gBcBFwNXpJR2AkgpzQCu\nAM4HXgw8CHyrmmhK6Q3AR4DZwOHAQcCZI1oiSZI0KhqfaUgpJeAAYIec84Plu48AZ6WUfgDsChxY\nzg58MqX0CuB44ONEMjA/53xOGe844P6U0mE552uB9wFn55y/X4afAPwwpXSKZxu0IQznx7L6+rp8\ny6SkzdJwLk/cDxxZJQw12xJnBm5s2cHPJS5VABwIXFsNyDkvTyndCMxKKc0l3r1zam3cecAEYF/g\nF8OYR2lEhvNjWd3dXb5lUtJmqXHSkHN+FPhh9XdKaRzwXuDHwHTi0kTdYmCn8nldw7cjLnk8OTzn\n3JtSWlKGmzRog2j6Y1lNyvqjWpI2RevzGumzgP2IvvMDwMqW4SuBieXz5HUMn1z7e7Dxpaccf/dC\n0qZmRElDSukM4j6Ev8w5/y6ltAJ4WkuxicQTFgArWDsBmEgckK2o/T3Y+B3R3d1FV9e4jpUbjWka\ne9OKPdQZiZ7uLnp7VzN//nwee2wFfX39g5bde+996O3tGrLs3nvvQ3f3lk/Ox1Dz2aTccMp2upyx\njW3skU+z04adNKSU/g04ATgm51w9AXEvsFdL0WnAotrwaW2G/wpYQiQO04DbSoxuYPva+B0xZcqk\njpYbjWkae/OLfeedt3HLAQcMeT/FlOuvj8/rKFuVq85ePFXqwNjGNvboTrNThvuehlOBdwB/lXO+\nojZoHjAnpTQx51xdZjgU+Glt+KG16UwmLm18JOfcn1KaX4ZXN0seDKwCbhrm8qxTT89yurrGsXWH\nyo3GNI29ecZucj9F07I9Pcvp6VnOlCmThnwapLu7q1G54ZTtdDljG9vYI5/mcEydutWQZYbzyOWe\nwIeBfwF+nlLaoTb4GuBu4IKU0mnAa4h+7dgy/MvAySmlU4DvEk9K3FEetwQ4F/h8SukW4obIc4Ev\ndPpxy6jcoU/nNC03GtM0trE7Mc2qI+nt7WP16qE7lablRmOaxja2sUd/mp0ynAsirynlP0zs2O8j\nLh/cl3PuA44iLjHcALwFOCrnfA9AznkB8HrivQ3XE09MHFVNOOd8KXA68B/AVcQbI+esz4JJkqTO\nGs4jl2cAZ6xj+B+J10sPNvwq4HnrGH4mvgVS6ggf95Q0GtbnkUtJY5SPe0oaDSYN0iaqUy+gAs9K\nSAomDdJmbDivz/ashCSTBmkzN5zXZ0vavG3410lJkqSnJM80SGqk6RMZ/nS4tOkyaZDUSNMnMvzp\ncGnTZdIgqbEmT2QMp5ykpxbvaZAkSY2YNEiSpEZMGiRJUiMmDZIkqRGTBkmS1IhJgyRJasRHLiVt\nFP5YlvTUY9IgaaPwx7Kkpx6TBkkbjT+WJT21mDRIGvOa/u6FlzGk0WXSIGnMa/q7F17GkEaXSYOk\np4Qmv2fhGQlpdJk0SNpkND0jse+++/rkhjQCJg2SNilNzkj45IY0MiYNkjZLPrkhDZ9vhJQkSY2Y\nNEiSpEZMGiRJUiMmDZIkqRGTBkmS1IhJgyRJasSkQZIkNWLSIEmSGjFpkCRJjZg0SJKkRkb8GumU\n0kTgBuA9Oedry3e7AF8EZgF3Ae/POf+oNs4RwNnAbsB1wOyc85214ScCJwPbAJcB7805rxjpPEqS\npM4Z0ZmGkjD8J7BXy6BvAfcBLwIuBq5IKe1UxpkBXAGcD7wYeLCUr6b5BuAjwGzgcOAg4MyRzJ8k\nSeq8YScNKaU9gXnAri3fH06cQTghh08SZxOOL0VmA/NzzufknG8FjgN2SSkdVoa/Dzg75/z9nPMv\ngROAt6eU/F1aSZLGgJGcaXgp8GPiEsS42vcHAje2XE6YW8pVw6+tBuSclwM3ArNSSl3ED879tDbu\nPGACsO8I5lGSJHXYsO9pyDl/vvqcUqoPmk5cmqhbDOzUYPh2wJb14Tnn3pTSkjL8F8OdT0mS1Fkj\nvhGyjcnAypbvVgITGwyfXPt7sPHXW3d3F11d4zpWbjSmaWxjG3tsxe7u7nry81BlO1luNKZp7M0r\n9mjoZNKwAnhay3cTgWW14a0JwETg4TKMQYYvo0OmTJnU0XKjMU1jG9vYYyt2VX5jz4exjb0+0+yU\nTiYN97L20xTTgEW14dPaDP8VsIRIHKYBtwGklLqB7Wvjr7eenuV0dY1j6w6VG41pGtvYxh5bsXt6\nljNlyiR6epbT29s3aNnu7q6OlhuNaRp784o9XFOnbjVkmU4mDfOAOSmliTnn6jLDoQzc3Div/A1A\nSmkysB/wkZxzf0ppfhle3Sx5MLAKuKlTMxiVO/TpnKblRmOaxja2scdW7KpT7u3tY/XqoTvoTpcz\ntrE7Mc1O6WTScA1wN3BBSuk04DXEExHHluFfBk5OKZ0CfBc4FbijejEUcC7w+ZTSLcQNkecCX/Dl\nTpIkjQ3rexdFf/Uh59wHvJa4xHAD8BbgqJzzPWX4AuD1xHsbrieemDiqNv6lwOnAfwBXEe94mLOe\n8ydJkjpkvc405Jy7W/6+A3j5OspfBTxvHcPPxLdASpI0JvmDVZIkqRGTBkmS1IhJgyRJasSkQZIk\nNWLSIEmSGjFpkCRJjZg0SJKkRkwaJElSIyYNkiSpEZMGSZLUiEmDJElqxKRBkiQ1YtIgSZIaMWmQ\nJEmNmDRIkqRGxm/sGZCksWzVqlXMn/87enqW09vb17bMzJn7MH78lht4zqQNz6RBktbh5pt/y91H\nvJSZgwy/BeCqq9l///034FxJG4dJgyQNYSawrpTg4Q01I9JG5j0NkiSpEc80SFIHeO+DNgcmDZLU\nAd77oM2BSYMkdYj3PmhT5z0NkiSpEZMGSZLUiEmDJElqxKRBkiQ14o2QkrQBNXk0E3w8U2OTSYMk\nbUBDPZoJPp6pscukQZI2sKEezYR4PNMXRmmsMWmQpDGq6Quj9t13X5MLbRAmDZI0hjV5YVQnkwsw\nwdDgTBokaRPQieQCPHuhdRtTSUNKaSJwLvB6YBnwrznnT2/cuZKkTUfT+yk6fWmkr6+r8VmOJmVN\nWDaOMZU0AJ8CXgi8DNgFuCildFfO+fKNOVOStDnq5KWR7u6uxmc5hio7mgmLici6jZmkIaU0GXg7\n8Mqc803ATSmlM4H3AiYNkjRGNf2hrqZnOZpOczQSlk4mIptiwjJmkgZgX2J+rqt9Nxf4h40zO5Kk\nsa7TCUunEpGxkLBMmDBhHVFHZiwlDdOBB3POq2vfLQa2TCltn3NespHmS5K0GelUIrKxE5b99nvR\nEFGHbywlDZOBlS3fVX9PbDKBW4YYNqO7i66ucR0pNxrTNLaxjW1sY2/esYfSPYxy48d3/uelxvX3\n93d8oiORUnoj8Nmc87Nq3z2PqM/tc86PbLSZkyRJY+pXLu8Fnp5Sqs/TNGC5CYMkSRvfWEoafg08\nARxU++4lwPyNMzuSJKluzFyeAEgpnQccAhwP7ARcABybc/7WxpwvSZI0tm6EBPgA8UbI/wUeBf7J\nhEGSpLFhTJ1pkCRJY9dYuqdBkiSNYSYNkiSpEZMGSZLUiEmDJElqxKRBkiQ1MtYeuQQgpTQBOBs4\nmvj9iS/nnP+xDPs28H+BfqB6AffjwGMt5W4B9gL6Srl+4GfALKAbWAEsL8NWEG+frKZZf7F3Pc4T\nwBa1aQJbswyVAAAbs0lEQVQsY6AeVxGJ2IQ25QZ7WfiiltgAvWUeHwZ+AewD7NBmmo+XeF1lfMq8\n1Mv1A98jXpS1bS1uHwNJY32aVexHgeuBF5TxJg6y3NXvgtTnv/q7H/gp8e6NbmJdrmSg7nceZLmX\nAh/OOf9bm/VIGXd8GXdp+bx1+b++3NcCh5ZpVt9Rm05rXS4HntGmHLXvqrZ2KTC71NPTWbvOf0LU\n+Xii3awuy70UeHbLcq8u5R4HbgdmAFNo39a6y7h95XN3m9j/A7y8THNVib9qkNj1ZewDvkn8vk5r\nnS8rcfqBnvL9tm1i/y/w0hJ7XXXeuj30Af8KvLpN7NY6XwZsw5ptsmrnh5Rl3bpM583AqcAk2re1\nodr5qvJdfTmqbadd7JVl2OeI9b2u2I8BNwPPpf36fpyBtruyDNuGtdt5fbmnAg+Uco8RPwRYj13N\n+wPEun418KyWaUJsC1uUz0vL/9vQvm+plvsxYnv//SDLXa3HLuKlffsMErtq511luuPKOhgs9hPA\nM1m7bdWn2c9Af7eu7bu+vtv12VW/Vu9bqm2s6kPq/VrrNPqJ9U+b5W4Su/653b7qwZzzMwFSSjOJ\ndvgi4B7gYznnr7eZ7rCM1TMNnwVeAfwZ8BZgdkppdhm2Z/luOnAR8EfgiHq5lNJupdyfSrlpwNeB\ng4GLge8QK2db4N3EBvvHUvZrwJ3AnwOvJ1bk6hJrEXAX8NVa7DuJn/CG6CS6gEdq5aaVaf6xLM/l\nREN5CPhKS+yLiI3kVqJz3KrMx7nEa7ZvJN6cWcX+EbCkxOxhYKdUj/1V4DmlPj9d5vP6Em8p8Adg\nxzLNZcAvgeOIHxD7M+LdGYuJRvfFWuxfAb8ty3ppme97icTsBqIz+Fqp868D/00kU1sDJ5Xh368t\n93LgdyU2wKdSSm9rWY8XAXeUuvsfYkPdjthweogd7rfKcv8n0aF8FziN2KAeA+aUabyDtdtQ9U6Q\nq8o07qgtz1dr5d4B/L9SR+9tWTdVnR9apvMTBnbuZ5bhP67FXlrGf1epn32I9T1YW/s1sb6XlnpZ\nztrr+yVEG5pLdEzdwCfaxH4IuAZ4HfAg8PelXLs6f4RoN/1lPrcq47Rb7h8DHy9lVwHHtqnzHuCy\nltgXtondrs7/mjXbZL2dv57YIW4FvAf4CLGzqfcZw2nn3weuINrOE2VZ7mHNtlbF/jOib3lmWQft\nYtfb+ZbEG3A/PMj6/j7RNnrL8kwkDiTaxa76qgml/mcBTwN+zkC/1kvs1I8jEoB3EX3tvbVpXkS0\ns5XAfxF9X3cpv6TN+q6W+4vAHsDzB1nuaj1W9fPnwN+1iV31LTcz0H62KLGr+qnHflWpO4jtptpu\nq7b2n0R/ezFxELq01MNf0H77rtZ3D7HO9yr1cQLRNlv7lu8ykCB/hTX7tUtYc1/yWK1cu+VuEntX\n4sWH04mD54fKvw+V4XvBkwfe3ynL9nyi77kwpfRC1tOYe09DSmkqsfEennOeW747hcjG301kc3sS\njWiwcs8mGug9Oednl2n+CfhRzvlVKaVFxE7nSOAsoqPsJjKyX7RMcwGwPdHQHwX+BXgTsYG8hsh2\n9yY2miVEx3JUmeYbiM7jyfkssRcBuxMr/tFa7OuJHcJuOecFKaWeMu6DwIvLtH5ObMivBb4NHA6c\nUWJfB/wzsZNcIzbxw183ERviCiIpuabU042l3Hhg55zz3Sml+8q8/AA4pkz3TcSG+lYiEdiZ2BCf\nS2wMx5Xl37vM85/KNLYp830nsVM5E/gkkbDMKuUA9ijLfQrwN0QnuRvRSe9b5vGDRCf7TKITuZ/Y\n0E4jOql/II50qx3q08q0PkzsTFYBB9LShsp0rinze1+Zr2Wlrg8pdXt4qb+LgF3KMj+HOLKq1k3V\nhq4p7eCMsmyvJjqFE4gO4sASexzw8tI2fkYkQb8A3saabe1viI5vh7LelxDJ2sXEDrpa31Wd70bs\niAEOIDrKA1pi/y9wQ875w+22nVqdfxw4meis7iPa6e1Ex3xwy3KPJ9b/rFLnAFcy0PFVdX4r8G85\n5y8NEbte5zOIHcgepc7/meiMX1rK/l2Zz57a+LcTbe45tdhN2/nhwOlEMvNRoo0cTPRBb2OgrbVr\nQ5OB37TEbm3nP2Ngh/7BlvV9JNHpv5LYSd1PJN4fZc12Xl/u6UQicinRXpaV5a+2sRXA9JzzYyml\nexhIMN5YluefyrweDxyRcz629EHXE9vRvsS2UF/frX3L04n+pLXO6/XzzDI/u9bqsopd71u+XsZ/\ngOhbWvu148pyP41oO1cS7aG+j3iy34cn9xGnEP3+P7P29l2t71eWOM+uppdzvr22L6n6lhuIfm1H\nom+ZxUC/1rp/eqzU0fZtlnvI2LRIKb2d2M/knPPLWoa9gEiMt8k5Lyvf3QBcknP+dOu0hmMsnmk4\nFHikqmiAnPOZOee/BRKRNd45WDnitNHTiE5ydW2aD5eEYRzROVxJ7JQWlGkCHFafZmkgU4iVu3sp\n93+IjvcR4GpipR5HbMB/X6bRX8pdXZ/PEvsEokM/tjbNKvYy4IHSoYwjdjofJDbQPuDuUvYxouN7\nJOc8N+d8SM75NUTW+0S72MCniI73f4ns+tOljn5Syi0HFuacqxi3EhnsS0vsw4hk5xEiyVmYc767\ntm7uIHaA/5Fz/kOZ5qOlLv6W2IBnEJ3aslL2F6XcCuDXOecFtfV4RolVrcdDy98Xl3qZRGT1f0N0\nVACX5ZyfXyv7Fznn1UT2fTVxlqc6jdnahj5PnFL9PbFTTsT28YUSpyr3SuBLxNEuZV3W103Vhv4s\n5/x4zvm9RKe/NdH5jQN+WGIvI9rl3LLch+ScZxKX5lrb2veAI3POD9bW944lZn19P9mGcs7vLfE/\nQrSheuxHiEsxt9XqvHXbqcqtLPXyeKn7NxId57jW5WZge5hJdFpbEjuLNeq8zPuQsVvq/IYS6/kM\ntMkba9OcXsq9sZRbSCQD1GI3bucl9p5lGau2VvVBa7S1ljb0cPnXGru1nR+Sc94N+AZrr+/qEtxv\na7EXljpvbefV0fyWJd6cMp+9ZT0cWtbhLSVh2KZM87wyrI846/PBsiwXl4Sh6oO+VJteazuv9y0L\niCP51uWu18+/lOW6vF6XVWzW7Fuqdn5Hmd4a/RpxMPJj4qAB4mCkdR/xcC1hGFfWd9Xvt9u+9yTa\n5UJim6tPD9buW5YQZ6y/RyT09X6tvi/ZjzhbdFy75W4Yu9UhRH/2gTbDHqL0vSmlcSmlWWV6Nw4y\nrcbG4pmGvyN26v9GZNMTiNM51RHA54jT8q8mGti5xGmaCQxk2J8CPsbAaftVxEbziTLN5xEVeh6R\n+V5ArOxVLdN8JtGBTSMa60uJHXZ1fexcIjOu35NwP7ERP0Z0oCtaYten+aaW2NW9ELeX/79CdM6f\nJxrBYqJhLSMa5sEMXF9eWv7ftU3sy4mGdQ/R4BcRHXEPcQr91USH82D5bgKx4b+xzC9lvK5SP9cT\np1UX1dbNBcSO9IoyrKrzX5flrF9bhIHr539OJAB3ENdjn1ZijSeOcKv1WF0HrtbN1sSO588ZOEV4\nV5lWde9EVefPLutlx9p6+h5xyaab2GG9rCzPuLIs/SX+FbVy9bZW1el7S7k/Ee3q7pbYe5Rx7yGO\n1PuJU5JHlDp/qNTFM8o6PLsMa9fWqthVnT9MdFj19d3FwCnaqg29ijjy6avF7i7TWl7q7LFS/mNE\nklGv82uIDnthLfY3iR3KrS3L/RMiSawOSB4o09iqVufjS8ye8v0jZV4+yuDrewKR7L6ttg4fYaCN\n1+t861LXRxBHnu8k+obhtvMLiEscy4gzBwvKfD+99rmKfQ3RFywmkuPzynTqsVvb+cPE2Zi3sPb6\n/hFxCr1qt68Azie27wXEjq++3B8rw/qJZG33Ms/fKeuuKn8fA9vOJcSZQso0t29T5zuXuv4RkXi2\ntvNqO1hE7Hy/Wz7Xl7u+jS1h4HeFZhFH2fXY7fqWm4jLdoP1qc8uy3orseOut7X68tT7/fuJ7fZb\nxFF+fX0vJdrJKgbOKNxdvmuNvTWRvGzLwL1GVb9Wjz2jzNfhJX7rcjeJvQA4Nef8A4CU0kIG2tSO\nxKWn9+ec7y/DP8jA9tQFfDTnfBrraSyeadiaOFU5mzgaP4m4lnkisdInEad5LyQq+2TitM5JwPuJ\nU2XbEA1+GbFzvL1M8/3A24lG3lOmP6dM86ttpvlEGXYicapxObECV9TK/ZJYWY8QR5TbEyv8bmKH\nUI99bMs0j2qJPa5M937iGtWJxCm5ql4SkVl3E42vj+gAu4nOelqZfrvYJ5V/exGnp6uMu6rLLmKj\nr+ryGCK56SvLu4zYaXQTN9nd1bJuDi7zeUVLnR9BHEldxMBR5A9KvGfUYqcy33OIDXAbYP/aemxd\nN58r9fXtMt2VJf6tbep8KdGh9hIb9BbEjW8XEjuwI4mN6ySiQ5tF7GxWl+Vp19aOLfH3qK2bU9vE\nnkOccdmWaJv9ROdbLfe0Mn//WGLMKd+3a2tV7P9HnDGaXJa7vr5nlBiZgTb0XAaStSp2dcPsr4kz\nZNWNpK9vU+dHEm16fon9r8R17HFtlvu1DNxr01uW+9dt6nwcsSP5QCkzqUxzsPV9ErG9QLSnqk3O\nbVPn95RyhxEJ/DhG1s7fU76/vkxvu/KvupGt3taOJY4iFzDQLtrFrrfzicTB0WOsvb6PIpKKqp1f\nQuw8KX+3tvPTGUim/kDcj7AFcSbswjLeFOKM0Xml3NFlnGrbqfqSqs5PIdru10pdbkf7dn4S0R+M\nI3Zerctd38Y+V2K9kThz1Bq7Xd/yh1JmsD61ulHzFiKJqbe1annOZM1+/wRivV9eK1et7x+Wv5cT\n/VBv+bykTeyvl3pdRrTLLiIBrcf+NAM3y18yyHI3if094DsppRemlKrE+EHi8tRfUpK2cmZhPLG/\nPI/oRz8AfDCldBjraSwmDauJynpLzvkX5Qer/gU4Ief8cWDHnPNXiRtJJhD3JhxMrLTHiGuVHydW\n6mM5598SnRjE6bmf55zPJhr+E8B+xLXVd7RM87XEEcE/l9jvIzaGlxGJwQTiVM8RRCb/caJD/hiR\n+b2MaFzVtahbiEZRTfMkooOux94CmFfm6RIGGlrV4Z9T5m0CsSFNInbQHydOVX2I2OBeRmSyVexF\nOed/L3X5mzL/H2ypy24i8TmYSEy2LPGXMnBWZK8S+zbgafV1U8o8nnP+akuddxEJ4OvL/C8ry1Nd\nt3u0lFnNwA1QjxIb99Oq9cja6/uaMv2JZXmnETfVvYy4PlnV+Q3EWaJVxGn7t5a6mcHAky53AruW\n5VlA3D+yC3B0u7ZWylWd+a3l/3Nyzp9gzbb2i3L98KOlHvcodbhjmV53KTs/53wW0SaXEZ1Oa1ur\nx/5tGfaXxJHVy1izrf0PcV9B1YZ6S53vS3Q01Rmtq8v831rq/KFSpl2d/4To0J9OnF2pbhptXe5t\niW1nJ2Kn01PWzXOII8KuMk9XE0e9ucR+hNjBDba+f8zAkzqnMNAmX8ea63tbBp4YOIa4D2DBCNv5\nj4jO/4NlessY2MZeR5wdqGJnYie/E7H9v7hN7CfbeWkXHyGOnMez9vq+iTgyvZqBO/Y/RPQhx7B2\nO/9N+dwPHFD6lMOIvuRRYn3fyEB/QYn9UIn9OiKxrre1b5ayBxBnDbpZe30vIvqDnYjtN7VZ7ie3\nsbI8AP1lW2yN3a5v+WJZhpfR0qfmnH9BbKvVcs9mzbZWLc+BtX7/8RK3dfv+EXBhzvmNRBt7iDib\ncFepy0QkXVWd/55Iap4g+pY3s2a/VsV+U4nZv47lbhL7KiIReweRhP0JeEHOeV7O+edEIrYvcVD4\n18CLc84n5px/nXP+DJH8zWE9jcWkYRGwIud8T+27THTy5JwfrZcjGuqOxKNV2wLPTSktJbK7ncuN\nPFsTG+tzatOcVso/URKC1mnOJM4gbFWPnXP+EwN3Ui8lssBeYmOfUcadUcotITLK6i7/VxJHV9uV\nef58m9jXln/PIo4EphLr6fM555Nr5W4jOpD7a7EzkQj8iWh0VTY7I6W0tNRLdW376y11uYqBI4Xn\nEZ3MdkTC8WhZruoySiYSu2rd7Ex0fNX1zmqafcTG8jziqPgnRKKzM3GE2M3A44iPlnVSrcepxJHn\np0v5j5bYvyI6qI+UOE+UnfOjZV4o03uiTOMy4hTpPxJnNsg5V09gVI877URc+1tK7LieThw9/HfL\nuqnaGgxcZz23tm6qsquBlFIan1J6HZGIPI3oHB6srZsnyr/typHB9mXZZ7Rpa78Cdkwp/TvRea7M\nOX+rVq7e1vqITuZviaPfbYlHsW6ulVtBnDmobzvPKuupXZ335Zx3IHZYfUTbXNyy3H3A8pzzypzz\nCuIpgW2IRP6OErtvkNjPKPPcLnbVJqskmlqb7KrXY5netLJedyeOtJ5d6weG085/Vearujt/KpFg\nTWfNtgZx2fDKEnMPog19uSX2k+28tr63GmR9/5bYVu6vxZ5G9DfVvR1VO9+uNo/9xHqEOAtalVtB\n7MB+TCQB/UTCdFfLtrOKuK/iBVVVEzvltxLbWn19Q/Q71XLvT6zLr7Qs95PbWIkPsE1K6bdtYucy\nrKqfGWW5Fw3Sp1LaGpRLTC1tbSWRzFXbbS7z2EX77fsZ5btbyjzfStw8+kBtmlXfcgXRJv+pNt5c\nBtpkNc1qh08ZjzbL3ST2juW7HYl9yXdyzlUfTyn3UBn+IgYSycqviG1rvYzFpOE6YMuU0h617/YC\n7kopfSWl9KV6OeJU8u+J7OxM4gj8B0SF3UtkXv9JrNz9UkoTUkqvIE7JLgN6BpnmeKIBnAI8VMVO\nKT2b2NC3IDqPiUQ9Vtnhx4BVpdz2RGa4BbHxH1jm6e8pN9C0if1O4M6ygbymjPsosfHVy+3AwF3y\nVeyPt4kNcapyXwbuKO8FrmqJvQWxMS0kjgDHERvc+FJuTwYet5sGTEopPausm+qGx+rIuZpmV5nX\nanovIzrFKcRRcH+Zx26iA/hjbT0+Uua1Wo+vKrFnEQ3/4DKtB2vtonq+/wdleapHTfuJjL5qQ5fU\n6qePOH3+5VI/dxPtYgmRENTrvGprEAkNRPJQrZuq7HjiSPOZxGWZ1zOQKHyuxP4eA5cIFpV5PIXo\naFbV2trWpcwsosOZXco+0NImq2luUWJvS5z6fl1ZnmUppery2ffL8ryf2F6qOr+/zGNrnb+CuKYM\ncXRdXUJY3LLcXcS2u11KaQlxTf6Rsjz1Op9EnK2ob7dLiM693fq+lYE2BPE4br1N1rexU4izf/3E\nUdrNxI626geG085fRxxxLyTOmDxEXMteQbS1fgZu+vsv4lr8y8r8P0Ls+Ouxq3Z+OwPrexUwpc36\n3pXY2e9Yi/3p8rm1nS8q81g9jXNPSukrDDxu+ANifb8DuDXnfGOZTl9ZX/VtZ0JZhheUx/ZeW757\niNjO6+u7dbl/T2yT1aXcarmrbex84ibWXuJg4ndtYk8j2sc01jzi7m7Tpy5KKW1T2hrA/W227y2J\n7e+2UmYvBt6r0Lp9vw44uNwo+k3iSP4TwIqU0o5EIlhtY88htrN+ypNcpc7r/VrVZ0wmkoYeavuS\nluVuEvv3RDJ3a5nu0fXLDaXc9qXcfWX6dXsy+E2VjY25pCHHnfdXAheklJ6fUnolcUrlXOL69VvL\n8/t9RId3CAM36cwGqtMw+xErrYs4bVOdor2MONVW3XB0cZnme8rf1TSrxKC3TCcTN2H9uMT7A9Fw\nlhM3//QTO8O9yv9Vuepxt33LPB1V/v4UceRdxa6uRU4B9kgpHUucmusjOouq3FSi03khcSnj8hJ7\nG6JRbNMm9u5EZ/QcItl4sNRRa11C7Iz/ooy3NXEj1duIzuE7pdyLiCPF7xFH8I9TbppKKb0tpbQ7\ncSTaW+b3Y0RHcmKZv+8RndE8olP4A9Ghvrh8/15iQzu/th4X1tbNTGInP65M709lHr9BXBN8ZVmm\n5xI3Ft5W6vJ/iZ38m8v83FZbnv8q9fMsYgf2yzb1cwjww5TSkcTNaxB3X781pfSelNIBxJF4LwPX\nOH9evtuW6Lz/ijhNfHutzmcRbZeyPNcw0NZ+WIt9INF+nyj1cy9xGrJa3weW2FPKfP2UOELcmjgq\nqmL/gUjQtiA68dcQ11S3JDr51jrfD9g6pXQxseNZUtbX89os93jilP4CIgGeUuatXud3lPnfmWhr\n1fr+4iDr+0fE5ZbqqP73rNkmjyyxX1DW365E2zi/1P9kBp4gGE473x2YkFK6jIEzEnuV/+ttrdrG\nnijz/kzips2ftIndTRyRH1vmcSviDEDr+p5FtJeLa7H3JI5GW2O/IKX0fOLU/7gS+yZiW/olsb3f\nXqYxPaX018SOeUKZTn3b+QOx3TxM9GvvIJLabYgzYq3ru77cuxCJUrvlrm9jlHq4pU3sqm+p+tTt\niHayC2v3ay8g1nUuy/3zEr91+x4P7J1SOo6Bl079hrW3791LnXy5jFP1qduV2D8psau+5dNEMvR3\nZfidrN2vHVLG35aBG4LvbLPcTWK/oUzvc6UebwDOSSm9OMX7F/4T+H7O+Rai3eyeUjo9pbRbWc63\nE/vH9TLmnp4AKNlW/Sjpc+U6Giml44kkYgaRcS0mKrK1XPVY3Gqicf4D8TKT1zBwRFxdKxzHwBHH\nLWWaRxIr/rPEin4rsQKrpxyq2C8pf/cRCUQ3sZFX5arYxxBPdtTj0ib2UmJj62Lg7XP1chAbwx/L\ncm/BwLXSPgbeAtgau6rLycSZjBe0qcuHiCOG6rrvr4hOsnqUa2JtuQ8tMapTcVcRG3o1vSr2iUTn\n1sXAncfLy3RmMHAa7hHi2l03cURzSs75iy3r8VYi4352rR6ruukvdbdFiX0/cVmi/tRGVZ/XEclT\ndbqv3oaqdzP8V6njdm3tCeJyQzW9+rr5PfA+4lTs6xm4BLOylP0NcRRVLfeDDLy98XGiE9qdtdva\nVsRpztblhoHrp1Wdv5PovLrK8qws89Ea+zFi59ld6u6snPMn2tT54rJuJjJwiWCw5X43sTPuLvO/\nqpS9uaXOVxIdcBexg/rgIOu7vm4uIxKMB8ryUuaptZ2vJNrJLqVuP0W07ZG088NKrP5SX91lePXS\noXbbWHUj54I2sVvb+S+IxK1d31LfvleUup6wjtjVcj9A7BhvLn9X63sFkVh2ETeLfreMt11tmvV+\nbSIDb4VsbXPV+q4v931EsnJNm+Vu3cbuIBLBdrHrfUt1L0D1REG7fm05kahVT760trV6O3+4tuzt\ntu/6+q5O/Y9n4G3A6+pbesuy1/u1h4kzdQ8Q+5I+4gbPdsvdJHZ1Y+gi4iDrPcTTGROJ+4z+rrrs\nnFI6iLhpeW8iCT8t5/wN1tOYTBokSdLYM+YuT0iSpLHJpEGSJDVi0iBJkhoxaZAkSY2YNEiSpEZM\nGiRJUiMmDZIkqRGTBkmS1IhJgyRJasSkQZIkNWLSIEmSGvn/4D/X1Vhu4eMAAAAASUVORK5CYII=\n",
491 | "text/plain": [
492 | ""
493 | ]
494 | },
495 | "metadata": {},
496 | "output_type": "display_data"
497 | },
498 | {
499 | "data": {
500 | "text/html": [
501 | "\n",
502 | "
\n",
503 | " \n",
504 | " \n",
505 | " | \n",
506 | " Health_Camp_ID | \n",
507 | " Count | \n",
508 | "
\n",
509 | " \n",
510 | " \n",
511 | " \n",
512 | " | 0 | \n",
513 | " 6543 | \n",
514 | " 6543 | \n",
515 | "
\n",
516 | " \n",
517 | " | 1 | \n",
518 | " 6527 | \n",
519 | " 4144 | \n",
520 | "
\n",
521 | " \n",
522 | " | 2 | \n",
523 | " 6538 | \n",
524 | " 3954 | \n",
525 | "
\n",
526 | " \n",
527 | " | 3 | \n",
528 | " 6537 | \n",
529 | " 3859 | \n",
530 | "
\n",
531 | " \n",
532 | " | 4 | \n",
533 | " 6529 | \n",
534 | " 3823 | \n",
535 | "
\n",
536 | " \n",
537 | " | 5 | \n",
538 | " 6526 | \n",
539 | " 3809 | \n",
540 | "
\n",
541 | " \n",
542 | " | 6 | \n",
543 | " 6534 | \n",
544 | " 3597 | \n",
545 | "
\n",
546 | " \n",
547 | " | 7 | \n",
548 | " 6570 | \n",
549 | " 3564 | \n",
550 | "
\n",
551 | " \n",
552 | " | 8 | \n",
553 | " 6580 | \n",
554 | " 3517 | \n",
555 | "
\n",
556 | " \n",
557 | " | 9 | \n",
558 | " 6578 | \n",
559 | " 2837 | \n",
560 | "
\n",
561 | " \n",
562 | " | 10 | \n",
563 | " 6586 | \n",
564 | " 2624 | \n",
565 | "
\n",
566 | " \n",
567 | " | 11 | \n",
568 | " 6542 | \n",
569 | " 2368 | \n",
570 | "
\n",
571 | " \n",
572 | " | 12 | \n",
573 | " 6562 | \n",
574 | " 2338 | \n",
575 | "
\n",
576 | " \n",
577 | " | 13 | \n",
578 | " 6554 | \n",
579 | " 2303 | \n",
580 | "
\n",
581 | " \n",
582 | " | 14 | \n",
583 | " 6571 | \n",
584 | " 2086 | \n",
585 | "
\n",
586 | " \n",
587 | " | 15 | \n",
588 | " 6523 | \n",
589 | " 2084 | \n",
590 | "
\n",
591 | " \n",
592 | " | 16 | \n",
593 | " 6536 | \n",
594 | " 2037 | \n",
595 | "
\n",
596 | " \n",
597 | " | 17 | \n",
598 | " 6532 | \n",
599 | " 1993 | \n",
600 | "
\n",
601 | " \n",
602 | " | 18 | \n",
603 | " 6539 | \n",
604 | " 1992 | \n",
605 | "
\n",
606 | " \n",
607 | " | 19 | \n",
608 | " 6535 | \n",
609 | " 1882 | \n",
610 | "
\n",
611 | " \n",
612 | " | 20 | \n",
613 | " 6549 | \n",
614 | " 1835 | \n",
615 | "
\n",
616 | " \n",
617 | " | 21 | \n",
618 | " 6528 | \n",
619 | " 1744 | \n",
620 | "
\n",
621 | " \n",
622 | " | 22 | \n",
623 | " 6555 | \n",
624 | " 1738 | \n",
625 | "
\n",
626 | " \n",
627 | " | 23 | \n",
628 | " 6541 | \n",
629 | " 1547 | \n",
630 | "
\n",
631 | " \n",
632 | " | 24 | \n",
633 | " 6581 | \n",
634 | " 1485 | \n",
635 | "
\n",
636 | " \n",
637 | " | 25 | \n",
638 | " 6540 | \n",
639 | " 1426 | \n",
640 | "
\n",
641 | " \n",
642 | " | 26 | \n",
643 | " 6585 | \n",
644 | " 1398 | \n",
645 | "
\n",
646 | " \n",
647 | " | 27 | \n",
648 | " 6564 | \n",
649 | " 514 | \n",
650 | "
\n",
651 | " \n",
652 | " | 28 | \n",
653 | " 6546 | \n",
654 | " 403 | \n",
655 | "
\n",
656 | " \n",
657 | " | 29 | \n",
658 | " 6530 | \n",
659 | " 259 | \n",
660 | "
\n",
661 | " \n",
662 | " | 30 | \n",
663 | " 6561 | \n",
664 | " 200 | \n",
665 | "
\n",
666 | " \n",
667 | " | 31 | \n",
668 | " 6569 | \n",
669 | " 177 | \n",
670 | "
\n",
671 | " \n",
672 | " | 32 | \n",
673 | " 6563 | \n",
674 | " 171 | \n",
675 | "
\n",
676 | " \n",
677 | " | 33 | \n",
678 | " 6524 | \n",
679 | " 149 | \n",
680 | "
\n",
681 | " \n",
682 | " | 34 | \n",
683 | " 6544 | \n",
684 | " 128 | \n",
685 | "
\n",
686 | " \n",
687 | " | 35 | \n",
688 | " 6560 | \n",
689 | " 123 | \n",
690 | "
\n",
691 | " \n",
692 | " | 36 | \n",
693 | " 6531 | \n",
694 | " 120 | \n",
695 | "
\n",
696 | " \n",
697 | " | 37 | \n",
698 | " 6553 | \n",
699 | " 94 | \n",
700 | "
\n",
701 | " \n",
702 | " | 38 | \n",
703 | " 6575 | \n",
704 | " 90 | \n",
705 | "
\n",
706 | " \n",
707 | " | 39 | \n",
708 | " 6552 | \n",
709 | " 82 | \n",
710 | "
\n",
711 | " \n",
712 | " | 40 | \n",
713 | " 6587 | \n",
714 | " 79 | \n",
715 | "
\n",
716 | " \n",
717 | " | 41 | \n",
718 | " 6565 | \n",
719 | " 66 | \n",
720 | "
\n",
721 | " \n",
722 | " | 42 | \n",
723 | " 6557 | \n",
724 | " 52 | \n",
725 | "
\n",
726 | " \n",
727 | " | 43 | \n",
728 | " 6558 | \n",
729 | " 44 | \n",
730 | "
\n",
731 | " \n",
732 | "
\n",
733 | "
"
734 | ],
735 | "text/plain": [
736 | " Health_Camp_ID Count\n",
737 | "0 6543 6543\n",
738 | "1 6527 4144\n",
739 | "2 6538 3954\n",
740 | "3 6537 3859\n",
741 | "4 6529 3823\n",
742 | "5 6526 3809\n",
743 | "6 6534 3597\n",
744 | "7 6570 3564\n",
745 | "8 6580 3517\n",
746 | "9 6578 2837\n",
747 | "10 6586 2624\n",
748 | "11 6542 2368\n",
749 | "12 6562 2338\n",
750 | "13 6554 2303\n",
751 | "14 6571 2086\n",
752 | "15 6523 2084\n",
753 | "16 6536 2037\n",
754 | "17 6532 1993\n",
755 | "18 6539 1992\n",
756 | "19 6535 1882\n",
757 | "20 6549 1835\n",
758 | "21 6528 1744\n",
759 | "22 6555 1738\n",
760 | "23 6541 1547\n",
761 | "24 6581 1485\n",
762 | "25 6540 1426\n",
763 | "26 6585 1398\n",
764 | "27 6564 514\n",
765 | "28 6546 403\n",
766 | "29 6530 259\n",
767 | "30 6561 200\n",
768 | "31 6569 177\n",
769 | "32 6563 171\n",
770 | "33 6524 149\n",
771 | "34 6544 128\n",
772 | "35 6560 123\n",
773 | "36 6531 120\n",
774 | "37 6553 94\n",
775 | "38 6575 90\n",
776 | "39 6552 82\n",
777 | "40 6587 79\n",
778 | "41 6565 66\n",
779 | "42 6557 52\n",
780 | "43 6558 44"
781 | ]
782 | },
783 | "execution_count": 8,
784 | "metadata": {},
785 | "output_type": "execute_result"
786 | }
787 | ],
788 | "source": [
789 | "grouped_df = train.groupby(\"Health_Camp_ID\").agg('size').reset_index()\n",
790 | "grouped_df.columns = [\"Health_Camp_ID\", \"Count\"]\n",
791 | "grouped_df = grouped_df.sort_values('Count', ascending=False).reset_index(drop=True)\n",
792 | "plt.bar(range(len(grouped_df.Health_Camp_ID)), grouped_df.Count, tick_label=grouped_df.Health_Camp_ID, color='r')\n",
793 | "plt.show()\n",
794 | "grouped_df"
795 | ]
796 | },
797 | {
798 | "cell_type": "markdown",
799 | "metadata": {},
800 | "source": [
801 | "So the number of patients per medical camp in train set ranges from 6543 for campid=6543 to 44 for campid=6558"
802 | ]
803 | },
804 | {
805 | "cell_type": "markdown",
806 | "metadata": {},
807 | "source": [
808 | "##### Test data exploration"
809 | ]
810 | },
811 | {
812 | "cell_type": "code",
813 | "execution_count": 9,
814 | "metadata": {
815 | "collapsed": false
816 | },
817 | "outputs": [
818 | {
819 | "data": {
820 | "text/html": [
821 | "\n",
822 | "
\n",
823 | " \n",
824 | " \n",
825 | " | \n",
826 | " Patient_ID | \n",
827 | " Health_Camp_ID | \n",
828 | " Registration_Date | \n",
829 | " Var1 | \n",
830 | " Var2 | \n",
831 | " Var3 | \n",
832 | " Var4 | \n",
833 | " Var5 | \n",
834 | "
\n",
835 | " \n",
836 | " \n",
837 | " \n",
838 | " | 0 | \n",
839 | " 505701 | \n",
840 | " 6548 | \n",
841 | " 21-May-06 | \n",
842 | " 1 | \n",
843 | " 0 | \n",
844 | " 0 | \n",
845 | " 0 | \n",
846 | " 2 | \n",
847 | "
\n",
848 | " \n",
849 | " | 1 | \n",
850 | " 500633 | \n",
851 | " 6584 | \n",
852 | " 02-Jun-06 | \n",
853 | " 0 | \n",
854 | " 0 | \n",
855 | " 0 | \n",
856 | " 0 | \n",
857 | " 0 | \n",
858 | "
\n",
859 | " \n",
860 | " | 2 | \n",
861 | " 506945 | \n",
862 | " 6582 | \n",
863 | " 10-Aug-06 | \n",
864 | " 0 | \n",
865 | " 0 | \n",
866 | " 0 | \n",
867 | " 0 | \n",
868 | " 0 | \n",
869 | "
\n",
870 | " \n",
871 | " | 3 | \n",
872 | " 497447 | \n",
873 | " 6551 | \n",
874 | " 27-Aug-06 | \n",
875 | " 0 | \n",
876 | " 0 | \n",
877 | " 0 | \n",
878 | " 0 | \n",
879 | " 0 | \n",
880 | "
\n",
881 | " \n",
882 | " | 4 | \n",
883 | " 496446 | \n",
884 | " 6533 | \n",
885 | " 19-Sep-06 | \n",
886 | " 0 | \n",
887 | " 0 | \n",
888 | " 0 | \n",
889 | " 0 | \n",
890 | " 0 | \n",
891 | "
\n",
892 | " \n",
893 | "
\n",
894 | "
"
895 | ],
896 | "text/plain": [
897 | " Patient_ID Health_Camp_ID Registration_Date Var1 Var2 Var3 Var4 Var5\n",
898 | "0 505701 6548 21-May-06 1 0 0 0 2\n",
899 | "1 500633 6584 02-Jun-06 0 0 0 0 0\n",
900 | "2 506945 6582 10-Aug-06 0 0 0 0 0\n",
901 | "3 497447 6551 27-Aug-06 0 0 0 0 0\n",
902 | "4 496446 6533 19-Sep-06 0 0 0 0 0"
903 | ]
904 | },
905 | "execution_count": 9,
906 | "metadata": {},
907 | "output_type": "execute_result"
908 | }
909 | ],
910 | "source": [
911 | "# Let us take a look at the top few rows\n",
912 | "test.head()"
913 | ]
914 | },
915 | {
916 | "cell_type": "code",
917 | "execution_count": 10,
918 | "metadata": {
919 | "collapsed": false
920 | },
921 | "outputs": [
922 | {
923 | "name": "stdout",
924 | "output_type": "stream",
925 | "text": [
926 | "Number of unique patients in the test : 15324\n"
927 | ]
928 | }
929 | ],
930 | "source": [
931 | "# Number of unique persons present in the test\n",
932 | "print \"Number of unique patients in the test : \", len(test.Patient_ID.unique())"
933 | ]
934 | },
935 | {
936 | "cell_type": "markdown",
937 | "metadata": {},
938 | "source": [
939 | "So 15,324 patients are present in the 35,249 rows present in test set. Now let us see the distribution just like train set"
940 | ]
941 | },
942 | {
943 | "cell_type": "code",
944 | "execution_count": 11,
945 | "metadata": {
946 | "collapsed": false
947 | },
948 | "outputs": [
949 | {
950 | "data": {
951 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAhUAAAFoCAYAAADgsAn8AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAAPYQAAD2EBqD+naQAAIABJREFUeJzt3X20XXV97/t3snMSEnAfUk4laUNB6uhXjFwLCAQEPKDn\nUGwLHNAqch0XoSmKXIrIMYNKoUOkQEABtaAIihy1F6jgE9fiw+UAsUlNDKJE+1VBiCSYSkZsgDw1\ne+f+MeeC5SLZ3XPtuR728v0aI4M1528+/L6slbU/+c3fnHvKjh07kCRJmqipve6AJEkaDIYKSZJU\nC0OFJEmqhaFCkiTVwlAhSZJqYaiQJEm1MFRIkqRaGCokSVItDBWSJKkW06ruEBG/DdwIvB74JXB5\nZn6mbNsP+CRwBPA48J7M/EbTvm8ArgX2B5YCCzPzZ03t5wMXAi8B7gTOzcwt7RQmSZK6q52Rii8C\nvwO8Djgf+HBEnFy2fQlYCxwCfBa4OyLmAUTEPsDdwC3Aa4Cny2NRtp8KXAIsBI4DFgCL2+ifJEnq\ngSlVfvdHRBwCfAfYPzOfKNe9DzgZeD9FqHhpY3QhIr4BPJiZH4iIDwBHZeZxZdtM4BfAn2bmAxFx\nP/DNzLysbH8t8HVgL0crJEnqf1VHKvYHftkIFKXvU4w8HA2sbAkASyguhQAcDjzQaMjMzcBK4IiI\nmAocCjzYtO8yYDrw6op9lCRJPVA1VKwD9oyI3ZrW/R7F3Iy9KS59tG4/r3w9d4z2PYHdmtszcwRY\n37S/JEnqY1Unav4z8BTwsYg4j2JuxXuAHRShYGvL9luBGeXrWWO0z2pa3tX+kiSpj1UKFZm5NSLe\nBNwBbKQYaVhMcUfHKDCzZZcZwKby9RZeHBBmABvKNnbRvolx2rFjx44pU6aMd3NJkvSCCf8ArXxL\naWZ+F/j9iHgpxR0cx1PcWvoo8N9bNp9DMbIBsKZcbm1/iOIyx5Zy+ccAETEE7NW0/39oypQpbNy4\nmZGR0Sol9Z2hoakMD88ciFrAevrZINUC1tPPBqkWGNx6JqpSqIiI2cCXgRMz81/LdX8C/G+KSyMX\nRcSMzGxcxjiKFyZfLiuXG8eaBRwEXJKZOyJiednemMx5JLANeLhKH0dGRtm+ffK/wTBYtYD19LNB\nqgWsp58NUi0wePVMVNXLHxsiYndgcUT8LcUDsM6guPPje8Bq4NaIuAw4keKOjjPK3T8FXFjegvpV\n4FLgscxshIgbgI9HxCqKCZs3ADd5O6kkSZNDOw+/egvwcopbSc8D3pSZKzNzFDiJ4hLGCuBtwMmZ\n+SRAeRvqKcCZFM+62JPi+RaU7bcDVwCfAO6leOLmovbKkiRJ3Vbp4VeTwI4NG56b9ENR06ZNZfbs\n3RmEWsB6+tkg1QLW088GqRYY2HomPFHTXygmSZJqYaiQJEm1MFRIkqRaGCokSVItDBWSJKkWhgpJ\nklQLQ4UkSaqFoUKSJNXCUCFJkmphqJAkSbUwVEiSpFoYKiRJUi0MFZIkqRaGCkmSVAtDhSRJqoWh\nQpIk1cJQIUmSamGokCRJtTBUSJKkWhgqJElSLQwVkiSpFoYKSZJUi2lVd4iIecCNwDHAeuD6zLy+\nbDuobDsQeAR4V2aubNr3NOAyYA7wdWBhZq5var8SOJMi7NySmYvarEuSJHVZOyMVdwLPAAcD5wOX\nR8RJETELuAe4v2xbCtwTETMBIuIw4GbgUmABMBu4tXHQiHgvcBpwEnAqcHpEXNBeWZIkqdsqjVRE\nxJ7A4cBZmfko8GhE/CPweuC3gE1NowvnR8QbgTcDtwHvBm7PzM+Vx3o78ERE7JuZTwDnARdn5tKy\nfRHFqMaHJ1qkJEnqvKojFZuB54B3RMS0iAjgSOAhitGHJS3bfxs4ony9AHig0ZCZTwKrgQURMRfY\nB3iwad8lwL4RsXfFPkqSpB6oFCoycytwLvBOioDxI+BrmflpYC6wtmWXdcC88vVY7XOBHS3t64Ap\nTftLkqQ+VnmiJnAA8GXgGooJmR+NiG8Bs4CtLdtuBWaUr8dqnwWQmdta2mjavy9s27aNVat+UOsx\n588/kOnTp9d6TEmSuq3qnIrXA2cB88pRi4fKu0EuBh7lxQFgBrCpfL1ljPYt5fGnNwWLxrabqGBo\nqLN3yX7/+6tYe/yxzK/peKuAoW/ez8EHH/L8ukYNna6lW6ynfw1SLWA9/WyQaoHBrWeiqo5UHAz8\npAwUDQ8B76eYLzGnZfs5wFPl6zVjtK+huNQxh2KeRaNtR9P+4zI8PLPK5pUND89kPnBorQedyezZ\nu+/0XIPEevrXINUC1tPPBqkWGLx6JqpqqFgLvDwipmXm9nLdAcBjwDLgopbtjwQ+WL5eBhxFcScI\nEbEPxXyJpZn5VESsLts/X25/NLA6M9dV6eDGjZsZGRmtVlXF4w934JgbNjz3/PLQ0FSGh2d2vJZu\nsZ7+NUi1gPX0s0GqBQa3nomqGiq+AiwGbo6Iy4FXUASJi4AvAFdFxLXATRSTOXeneK4FFA/Fui8i\nlgErgOuAr2Tm6qb2qyKiMWpxBXB11YJGRkbZvr1zb3AnPjy76nOna+k26+lfg1QLWE8/G6RaYPDq\nmaiqd39spHgmxVzgO8CHgA9k5s2Z+QzwxxRP2lwBHAackJmby32XAWdTPPxqCcXTOM9sOvzVwO3A\nXcAdwGcaT+qUJEn9r/LdH5n5L8Dxu2hbARyys7ay/TbKyx87aRsFLiz/SJKkSWYwpq1KkqSeM1RI\nkqRaGCokSVItDBWSJKkWhgpJklQLQ4UkSaqFoUKSJNXCUCFJkmphqJAkSbUwVEiSpFoYKiRJUi0M\nFZIkqRaGCkmSVAtDhSRJqoWhQpIk1cJQIUmSamGokCRJtTBUSJKkWhgqJElSLQwVkiSpFoYKSZJU\nC0OFJEmqhaFCkiTVYlqVjSPi/wI+DewApjT9dzQzp0XEQcCNwIHAI8C7MnNl0/6nAZcBc4CvAwsz\nc31T+5XAmRRh55bMXDSB2iRJUhdVHan4fygCwdzyv/sCPwWui4hZwD3A/cDBwFLgnoiYCRARhwE3\nA5cCC4DZwK2NA0fEe4HTgJOAU4HTI+KCdguTJEndVWmkIjO3Av/aWI6Ii8qXFwFvBzY1jS6cHxFv\nBN4M3Aa8G7g9Mz9X7vt24ImI2DcznwDOAy7OzKVl+yKKUY0Pt1ucJEnqnrbnVETEbOB9wKLM/Hfg\ncGBJy2bfBo4oXy8AHmg0ZOaTwGpgQUTMBfYBHmzadwmwb0Ts3W4fJUlS90xkouY5wJrMvLtcngus\nbdlmHTBvHO1zKeZnrG1pm9K0vyRJ6mOVLn+0OAu4sml5FrC1ZZutwIxxtM8CyMxtLW007T8uQ0Od\nvaGlE8cfGprKtGlTf225U+fqBevpX4NUC1hPPxukWmBw65motkJFRBwK/C5we9PqLbw4AMwANo2j\nfUt53OlNwaKx7SYqGB6eWWXzyjpx/OHhmcyevXtXztVL1tO/BqkWsJ5+Nki1wODVM1HtjlQcDzyQ\nmf/WtG4NxR0hzeYAT42jfQ3FpY45FPMsGm07mvYfl40bNzMyMlpll0o2btzMcAeOuWHDc88vDw1N\nZXh4Zsdr6Rbr6V+DVAtYTz8bpFpgcOuZqHZDxc4mZS4DWp8rcSTwwab2oyjuBCEi9qGYL7E0M5+K\niNVl++fL7Y8GVmfmuiodGxkZZfv2zr3Bnfjw7KrPna6l26ynfw1SLWA9/WyQaoHBq2ei2g0VrwL+\nV8u6fwCuiIhrgZuAdwK7A3eW7TcC90XEMmAFcB3wlcxc3dR+VUQ0Ri2uAK5us3+SJKnL2p2Z8VJg\nQ/OKzHwG+BPgGIrQcBhwQmZuLtuXAWdTPPxqCbCe4umZDVdTzNG4C7gD+ExmXt9m/yRJUpe1NVKR\nmS+eVVisXwEcMsZ+t1Fe/thJ2yhwYflHkiRNMoNxL4wkSeo5Q4UkSaqFoUKSJNXCUCFJkmphqJAk\nSbUwVEiSpFoYKiRJUi0MFZIkqRaGCkmSVAtDhSRJqoWhQpIk1cJQIUmSamGokCRJtTBUSJKkWhgq\nJElSLQwVkiSpFoYKSZJUC0OFJEmqhaFCkiTVwlAhSZJqYaiQJEm1MFRIkqRaTKu6Q0RMB64FTgO2\nAp/KzPeXbQcBNwIHAo8A78rMlU37ngZcBswBvg4szMz1Te1XAmdShJ1bMnNRm3VJkqQua2ek4iPA\n64H/BrwNWBgRCyNiFnAPcD9wMLAUuCciZgJExGHAzcClwAJgNnBr46AR8V6KoHIScCpwekRc0F5Z\nkiSp2yqNVETEbIqRhOMy87vlumuAw4HtwKam0YXzI+KNwJuB24B3A7dn5ufK/d4OPBER+2bmE8B5\nwMWZubRsX0QxqvHhCdYoSZK6oOpIxVHArzJzSWNFZi7OzD+nGH1Y0rL9t4EjytcLgAea9nsSWA0s\niIi5wD7Ag037LgH2jYi9K/ZRkiT1QNU5FfsDj5ejDH8FTAc+DVwOzKWYR9FsHTC/fD0XWLuT9nll\n246W9nXAlLJ9XcV+SpKkLqsaKvYA/gBYCJxBEQY+ATwHzKKYuNlsKzCjfD1W+yyAzNzW0kbT/pIk\nqY9VDRXbgZcAbysvXxAR+wLnAD/mxQFgBrCpfL1ljPYt5bGmNwWLxrabqGBoqLN3yXbi+ENDU5k2\nbeqvLXfqXL1gPf1rkGoB6+lng1QLDG49E1U1VDwFbGkEilJSzIe4j+JW0WZzyn0A1ozRvobiUscc\ninkWjbYdTfuPy/DwzCqbV9aJ4w8Pz2T27N27cq5esp7+NUi1gPX0s0GqBQavnomqGiqWArtFxMsz\n86flulcCPwOWARe1bH8k8MHy9TKKiZ63AUTEPhTzJZZm5lMRsbps/3y5/dHA6sysNJ9i48bNjIyM\nVquq4vGHO3DMDRuee355aGgqw8MzO15Lt1hP/xqkWsB6+tkg1QKDW89EVQoVmfmTiLgHuDUizqGY\nU7EI+ADwBeCqiLgWuAl4J7A7cGe5+43AfRGxDFgBXAd8JTNXN7VfFRGNUYsrgKurFjQyMsr27Z17\ngzvx4dlVnztdS7dZT/8apFrAevrZINUCg1fPRLVzEeV04KcUt3/eCnw0M/8uM58B/hg4hiI0HAac\nkJmbATJzGXA2xcOvlgDrKZ550XA1cDtwF3AH8JnMvL6N/kmSpB6o/JjuMjycUf5pbVsBHDLGvrdR\nXv7YSdsocGH5R5IkTTKDMW1VkiT1nKFCkiTVwlAhSZJqYaiQJEm1MFRIkqRaGCokSVItDBWSJKkW\nhgpJklQLQ4UkSaqFoUKSJNXCUCFJkmphqJAkSbUwVEiSpFoYKiRJUi0MFZIkqRaGCkmSVAtDhSRJ\nqoWhQpIk1cJQIUmSamGokCRJtTBUSJKkWhgqJElSLQwVkiSpFtOq7hARJwN3ATuAKeV/v5CZfxYR\nBwE3AgcCjwDvysyVTfueBlwGzAG+DizMzPVN7VcCZ1KEnVsyc1G7hUmSpO5qZ6TilcCXKYLBHGAu\n8OcRMQu4B7gfOBhYCtwTETMBIuIw4GbgUmABMBu4tXHQiHgvcBpwEnAqcHpEXNBWVZIkqesqj1QA\nBwCPZOYvm1dGxJnApqbRhfMj4o3Am4HbgHcDt2fm58rt3w48ERH7ZuYTwHnAxZm5tGxfRDGq8eE2\n+ihJkrqs3ZGKH+9k/eHAkpZ13waOKF8vAB5oNGTmk8BqYEFEzAX2AR5s2ncJsG9E7N1GHyVJUpe1\nM1IRwB9FxPuBIeAOiksacynmUTRbB8wvX88F1u6kfV7ZtqOlfR3FnI155WtJktTHKoWKiPg9YCaw\nmeKyxsuAjwCzyj9bW3bZCswoX4/VPgsgM7e1tNG0/7gMDXX2hpZOHH9oaCrTpk39teVOnasXrKd/\nDVItYD39bJBqgcGtZ6IqhYrMXB0Re2Xmr8pV34+IIeCzwH28OADMADaVr7eM0b4FICKmNwWLxrab\nqGB4eGaVzSvrxPGHh2cye/buXTlXL1lP/xqkWsB6+tkg1QKDV89EVb780RQoGn4E7Ab8guJukGZz\ngKfK12vGaF9DcaljDsU8i0bbjqb9x2Xjxs2MjIxW2aWSjRs3M9yBY27Y8Nzzy0NDUxkentnxWrrF\nevrXINUC1tPPBqkWGNx6Jqrq5Y//DnwemJeZW8rVBwFPU0yyvKhllyOBD5avlwFHUdwJQkTsQzFf\nYmlmPhURq8v2z5fbHw2szsxK8ylGRkbZvr1zb3AnPjy76nOna+k26+lfg1QLWE8/G6RaYPDqmaiq\nIxX/RHE54uaI+ADw+8Bi4CrgC8BVEXEtcBPwTmB34M5y3xuB+yJiGbACuA74Smaubmq/KiIaoxZX\nAFe3W5gkSequSjMzMvNZ4Hjgt4HlwCeBj2fmhzLzGeCPgWMoQsNhwAmZubncdxlwNsWdIkuA9RRP\nz2y4Grid4mmddwCfyczr2y9NkiR1UztzKn5EESx21rYCOGSMfW+jvPyxk7ZR4MLyjyRJmmQG414Y\nSZLUc4YKSZJUC0OFJEmqhaFCkiTVwlAhSZJqYaiQJEm1MFRIkqRaGCokSVItDBWSJKkWhgpJklQL\nQ4UkSaqFoUKSJNXCUCFJkmphqJAkSbUwVEiSpFoYKiRJUi0MFZIkqRaGCkmSVAtDhSRJqoWhQpIk\n1cJQIUmSamGokCRJtZjW7o4RcQ+wLjPPLJcPAm4EDgQeAd6VmSubtj8NuAyYA3wdWJiZ65varwTO\npAg6t2Tmonb7JkmSuq+tkYqIeCtwQtPyLOAe4H7gYGApcE9EzCzbDwNuBi4FFgCzgVub9n8vcBpw\nEnAqcHpEXNBO3yRJUm9UDhURMRtYDHynafVbgU2ZuSgL5wPPAG8u298N3J6Zn8vMR4C3A2+MiH3L\n9vOAizNzaWbeDywCzm2vJEmS1AvtjFRcA9wG/Khp3eHAkpbtvg0cUb5eADzQaMjMJ4HVwIKImAvs\nAzzYtO8SYN+I2LuN/kmSpB6oFCoi4jjgaIq5Ec3mAmtb1q0D5o2jfS6wo6V9HTClaX9JktTnxh0q\nImIGxUTMczJza0vzLKB13VZgxjjaZwFk5raWNpr2lyRJfa7K3R9/A6zIzG/upG0LLw4AM4BN42jf\nAhAR05uCRWPbTVQ0NNTZu2Q7cfyhoalMmzb115Y7da5esJ7+NUi1gPX0s0GqBQa3nomqEireAuwd\nEc+UyzMAIuJNwOcpbhVtNgd4qny9Zoz2NRSXOuZQzLNotO1o2n/chodnVt2l58cfHp7J7Nm7d+Vc\nvWQ9/WuQagHr6WeDVAsMXj0TVSVUvA74T03Liyl+8C8q21qfK3Ek8MHy9TLgKIoJnkTEPhTzJZZm\n5lMRsbps/3y5/dHA6sxcV6F/AGzcuJmRkdGqu1U6/nAHjrlhw3PPLw8NTWV4eGbHa+kW6+lfg1QL\nWE8/G6RaYHDrmahxh4rM/HnzcjlisSMzH4uIXwJXRMS1wE3AO4HdgTvLzW8E7ouIZcAK4DrgK5m5\nuqn9qohojFpcAVzdTkEjI6Ns3965N7gTH55d9bnTtXSb9fSvQaoFrKefDVItMHj1TFQtF1Ey8xng\nT4BjKELDYcAJmbm5bF8GnE3x8KslwHqKp2c2XA3cDtwF3AF8JjOvr6NvkiSpO9p+THdmvqNleQVw\nyBjb30Z5+WMnbaPAheUfSZI0CQ3GtFVJktRzhgpJklQLQ4UkSaqFoUKSJNXCUCFJkmphqJAkSbUw\nVEiSpFoYKiRJUi0MFZIkqRaGCkmSVAtDhSRJqoWhQpIk1cJQIUmSamGokCRJtTBUSJKkWhgqJElS\nLQwVkiSpFoYKSZJUC0OFJEmqhaFCkiTVwlAhSZJqYaiQJEm1MFRIkqRaTKu6Q0T8PvB3wGuB9cDH\nMvOasm0/4JPAEcDjwHsy8xtN+74BuBbYH1gKLMzMnzW1nw9cCLwEuBM4NzO3tFOYJEnqrkojFREx\nBbgHWAf8IfBO4OKIeGu5yZeAtcAhwGeBuyNiXrnvPsDdwC3Aa4CngS82HftU4BJgIXAcsABY3G5h\nkiSpu6pe/tgbeAg4JzMfzcx/BL4FHBURxwIvA87OwpUUoxFnlvsuBJZn5nWZ+SPgHcB+EXFM2X4e\ncG1mfi0zvwucDZwVEbtNqEJJktQVlUJFZv4iM0/LzOcAIuK1wNHA/6YYWVjZcrliCcWlEIDDgQea\njrUZWAkcERFTgUOBB5v2XQZMB15dpY+SJKk32p6oGRGPU4SEpcBdwFyKSx/N1gHzytdjte8J7Nbc\nnpkjFHM25iFJkvpe5YmaTU4B5gA3Uky+nAVsbdlmKzCjfD1W+6ym5V3tPy5DQ529oaUTxx8amsq0\naVN/bblT5+oF6+lfg1QLWE8/G6RaYHDrmai2Q0VmrgSIiAuAz1FMwJzdstkMYFP5egsvDggzgA1l\nG7to30QFw8Mzq2xeWSeOPzw8k9mzd+/KuXrJevrXINUC1tPPBqkWGLx6JqpSqIiIlwJHZOaXmlb/\nkGLuw1PAAS27zCnXA6wpl1vbH6K4zLGlXP5xea4hYK+m/cdl48bNjIyMVtmlko0bNzPcgWNu2PDc\n88tDQ1MZHp7Z8Vq6xXr61yDVAtbTzwapFhjceiaq6kjFy4C7IuJ3M/MX5brXAP9KMSnzf0bEjMxs\nXMY4ihcmXy4rlwGIiFnAQcAlmbkjIpaX7Y3JnEcC24CHq3RwZGSU7ds79wZ34sOzqz53upZus57+\nNUi1gPX0s0GqBQavnomqGiqWAyuAT5eXPV4GXAV8kCIM/By4NSIuA06kuKPjjHLfTwEXRsT7gK8C\nlwKPZWYjRNwAfDwiVlFM2LwBuOk38eFX27ZtY/nyH9aegOfPP5Dp06fXdjxJkppVChWZORoRJwEf\nA/4JeA64PjM/BhARJ1LMrVgB/BQ4OTOfLPd9IiJOAa6neMjVt4GTm459e0TsC3yC4nLKPwCLJlbe\n5PTIIz/g5294HfNrPOYqgHvv46CDDqnxqJIkvaDyRM3yssebdtH2GHDsGPveC7xijPbF+BRNAOZT\nDPPUaUPNx5Mkqdlg3AsjSZJ6zlAhSZJqYaiQJEm1MFRIkqRaGCokSVItDBWSJKkWhgpJklQLQ4Uk\nSaqFoUKSJNXCUCFJkmphqJAkSbUwVEiSpFoYKiRJUi0MFZIkqRaGCkmSVAtDhSRJqoWhQpIk1cJQ\nIUmSamGokCRJtTBUSJKkWhgqJElSLQwVkiSpFtOqbBwRvwN8BDgW2ATcAVyUmdsiYj/gk8ARwOPA\nezLzG037vgG4FtgfWAoszMyfNbWfD1wIvAS4Ezg3M7e0XZkkSeqqqiMVXwB2A14LvBX4U+Cysu1L\nwFrgEOCzwN0RMQ8gIvYB7gZuAV4DPA18sXHQiDgVuARYCBwHLAAWt1WRJEnqiXGHiogI4DDgjMz8\nl8z8NkUQeFtEHAu8DDg7C1dSjEacWe6+EFiemddl5o+AdwD7RcQxZft5wLWZ+bXM/C5wNnBWROxW\nR5GSJKnzqoxU/AI4ITOfbln/nylGFla2XK5YQnEpBOBw4IFGQ2ZuBlYCR0TEVOBQ4MGmfZcB04FX\nV+ifJEnqoXGHisz8t8z8emM5IqYA5wLfAuZSXPpotg6YV74eq31Piksqz7dn5giwvml/SZLU5ypN\n1GxxNXAQxSjDBcDWlvatwIzy9awx2mc1Le9q/3EbGursDS2dOP7Q0FSmTXvhuFOnTqn9HDs7T7c0\n/p91+r3plkGqZ5BqAevpZ4NUCwxuPRPVVqiIiKso5kH8WWb+MCK2AL/VstkMijtEALbw4oAwA9hQ\ntrGL9k1UNDw8s+ouPT/+8PBMZs/e/fnlPfbozFSS1vN0W6ffm24bpHoGqRawnn42SLXA4NUzUZVD\nRUR8lGIi5emZ2biDYw3wypZN5wBPNbXP2Un7QxSXObaUyz8uzzEE7NW0/7ht3LiZkZHRqrtVOv5w\nB465YcNzzy8/++wW9qj5HDs7T7cMDU1leHhmx9+bbhmkegapFrCefjZItcDg1jNRVZ9TcSnwF8Bb\nMvPupqZlwKKImJGZjcsYR/HC5Mtl5XLjOLMoLp1ckpk7ImJ52d6YzHkksA14uGI9jIyMsn17597g\nTnx4Wvs8Orqj9nPs7Dzd1uvz122Q6hmkWsB6+tkg1QKDV89EjTtURMQBwMXA3wL/FBF7NzXfD/wc\nuDUiLgNOpJhrcUbZ/ingwoh4H/BV4FLgscxshIgbgI9HxCqKCZs3ADf58CtJkiaPKjMzTiy3v5ji\nB/9aissTazNzFDiZ4hLGCuBtwMmZ+SRAZj4BnELx3IrvUNzxcXLjwJl5O3AF8AngXopnXCyaSGGS\nJKm7xj1SkZlXAVeN0f4oxeO7d9V+L/CKMdoX41M0JUmatAbjXhhJktRzhgpJklQLQ4UkSaqFoUKS\nJNXCUCFJkmphqJAkSbUwVEiSpFoYKiRJUi0m8qvPNclt27aNVat+UPtx588/kOnTp9d+XElSfzNU\n/AZbteoHrD3+WObXeUyAe+/joIMOqfGokqTJwFDxG24+xW9+q9OGmo8nSZocnFMhSZJqYaiQJEm1\nMFRIkqRaGCokSVItDBWSJKkWhgpJklQLQ4UkSaqFoUKSJNXCUCFJkmphqJAkSbUwVEiSpFoYKiRJ\nUi3a/oViETEDWAG8OzMfKNftB3wSOAJ4HHhPZn6jaZ83ANcC+wNLgYWZ+bOm9vOBC4GXAHcC52bm\nlnb7KEmSuqetkYoyUPw98MqWpi8Ca4FDgM8Cd0fEvHKffYC7gVuA1wBPl9s3jnkqcAmwEDgOWAAs\nbqd/kiSp+yqHiog4AFgGvKxl/XEUIxBnZ+FKitGIM8tNFgLLM/O6zPwR8A5gv4g4pmw/D7g2M7+W\nmd8FzgbOiojd2ilMkiR1VzsjFa8DvkVxiWNK0/rDgZUtlyuWlNs12h9oNGTmZmAlcERETAUOBR5s\n2ncZMB14dRt9lCRJXVZ5TkVmfrzxOiKam+ZSXPpotg6YN472PYHdmtszcyQi1pft/1y1n5Ikqbva\nnqi5E7OArS3rtgIzxtE+q2l5V/uPy9BQZ29o6cTxh4amMm3aC8edOnXKGFvXd55O/b9qPc/IyHaW\nL1/Os89O/X1QAAAMOklEQVRuYXR0R23nedWrDmT69Om1HW+8Gv/fOv1Z64ZBqgWsp58NUi0wuPVM\nVJ2hYgvwWy3rZgCbmtpbA8IMYEPZxi7aN1HB8PDMKptX1onjDw/PZPbs3Z9f3mOPzkwjaT1Pp/5f\ntZ5n+fLlrDrsMObXeI5VwPB3vsOhhx5a41Gr6fRnrZsGqRawnn42SLXA4NUzUXWGijW8+G6QOcBT\nTe1zdtL+ELCeIljMAX4MEBFDwF5N+4/Lxo2bGRkZrdTxqscf7sAxN2x47vnlZ5/dwh41n2Nn5+lE\nLTs7z7PPbmE+xaSZTp6nW4aGpjI8PLPjn7VuGKRawHr62SDVAoNbz0TVGSqWAYsiYkZmNi5jHMUL\nky+XlcsARMQs4CDgkszcERHLy/bGZM4jgW3Aw1U6MTIyyvbtnXuDO/Hhae1znZcIxjpPp/4i9Kqe\nbuv1+es0SLWA9fSzQaoFBq+eiaozVNwP/By4NSIuA06k+MfpGWX7p4ALI+J9wFeBS4HHGg/OAm4A\nPh4RqygmbN4A3OTDryRJmhwmOjPj+X+CZuYocBLFJYwVwNuAkzPzybL9CeAUiudWfIfijo+Tm/a/\nHbgC+ARwL8UzLhZNsH+SJKlLJjRSkZlDLcuPAceOsf29wCvGaF+MT9GUJGlSGox7YSRJUs8ZKiRJ\nUi0MFZIkqRaGCkmSVAtDhSRJqkWdz6mQemrbtm2sWvWD2o87f35vfseIJE02hgoNjFWrfsDa44+t\n/XeMcO99HHTQITUeVZIGk6FCA6UTv2NkQ83Hk6RB5ZwKSZJUC0OFJEmqhaFCkiTVwlAhSZJqYaiQ\nJEm1MFRIkqRaeEupVNG2bdtYvvyHbNy4mZGR0VqO6QO2JA0CQ4VU0SOP/ICfv+F1tT1kywdsSRoU\nhgqpDXU/ZMsHbEkaBM6pkCRJtXCkQupTnfgFac7dkNRJhgqpT9X9C9KcuyGp0wwVUh9z7oakycRQ\nIf0G68TtseBlFuk3VV+FioiYAdwAnAJsAj6UmR/uba+kwVX37bGw88ssnZgfAoYXqd/0VagArgEO\nBv4rsB9wW0Q8npl39bJT0iCr+xILvPgyS93zQ8A5IlI/6ptQERGzgLOA4zPzYeDhiFgMnAsYKqRJ\nrhvhxcs5Um/1TagAXk3Rn6VN65YAf9Wb7kiabLycI/VWP4WKucDTmbm9ad06YLeI2Csz1/eoX5Im\nkUG6nNOt3zPjM1FUl34KFbOArS3rGsszxnuQoaHOPiR0aGhq8Ze/JquAfYamMm3aC/2eOnVKrefY\n1XnqrmVX57Ge6ucAP2vtnKeb9XTCUMt5vve9R/jaccewX03HfxwY+ub9HHzwr89D+f73V/HN44/t\n6HmGhqayfPlynn12C6OjO2o6Ey+qBWDlyu/WdvxdnafxGej0zxzobj0TNWXHjvre3ImIiDcBH8nM\n32la9wqKv9N7ZeavetY5SZL0H+qn3/2xBvgvEdHcpznAZgOFJEn9r59CxfeAfwcWNK07Gljem+5I\nkqQq+ubyB0BE3Ai8FjgTmAfcCpyRmV/sZb8kSdJ/rJ8magJcQPFEzf8P+Dfgrw0UkiRNDn01UiFJ\nkiavfppTIUmSJjFDhSRJqoWhQpIk1cJQIUmSamGokCRJtei3W0ori4gZFLehngJsAj6UmR/uba/a\nFxG/A3wEOJainjuAizJzW087NkERcQ+wLjPP7HVf2hUR04FrgdMofi/NpzLz/b3tVfsiYh5wI3AM\nsB64PjOv722vqiu/A1YA787MB8p1+wGfBI6g+FUU78nMb/Sqj1Xsop4FwIeA/wN4ErgmM2/pXS/H\nZ2e1NLUNAz+i+H67rRf9q2oX780+wCeA11E8Gfr9mXln73o5fruo52iK77lXAD8G/mdmfmu8xxyE\nkYprgIOB/wqcA1waEaf0tEcT8wVgN4qHgL0V+FPgsp72aIIi4q3ACb3uRw0+Arwe+G/A24CFEbGw\nt12akDuBZyj+/pwPXB4RJ/W2S9WUX4p/D7yypemLwFrgEOCzwN1liOprO6snIvYG/l+K5/f8IfA3\nwEcjoq//To3x3jQspvhVDJPCLt6bIYr3ZgvFe3MN8NmI2FXNfWMX9fw28GXg88CrKL4jvlT+Y3dc\nJvVIRUTMAs4Cjs/Mh4GHI2IxcC5wV08714aICOAwYO/MfLpcdwlwNbCol31rV0TMpvjy+E6v+zIR\nZR1nAsdl5nfLddcAh1P8i3hSiYg9Kfp+VmY+CjwaEf9IEZq+1NPOjVNEHEDx5de6/jhgf2BBZm4B\nroyI11O8fx/obi/Hb1f1ACcDT2XmX5fLj0bEsRTB9mvd6l8VY9TSaD8KOA74Rdc6NQFj1PPHwO9S\nfNaeA34SEX8EHAn8sItdrGSMel4L/HvTaP8VEfFeil+fMa6fqZN9pOLVFMFoadO6JRRflpPRL4AT\nGoGiNAX4zz3qTx2uAW6jGOaczI4CfpWZSxorMnNxZv55D/s0EZuB54B3RMS0MtC+FljZ225V8jrg\nWxSXOKY0rT8cWFkGioYl5Xb9bFf1fA14x0627+fvhV3VQkT8J4rLBecAk+Wy7q7qeR3wrTJQAJCZ\np2TmzV3uX1W7qmc9sFdE/A+AiDgZ2AP4wXgPPKlHKoC5wNOZub1p3Tpgt4jYKzPX96hfbcnMfwO+\n3liOiCkUoy7f7FmnJqD8F+PRwIHAx3vcnYnaH3g8It4O/BUwHfg0cHlmTrrH0mbm1og4F/gYxaWP\nIeDTmXlrTztWQWY+/5kqMtHz5lJc+mi2juL3CfWtXdWTmauB1U1tL6W4NHpJN/tXxRjvDcDFFKHv\nmztp60tj1LM/8LOIuAJ4O/BL4G8ys69H+8b4rD0YETcA/xARoxQDD+/IzJ+M99iTfaRiFsWEuWaN\n5Rld7ksnXE1xnW7STQYsr9fdCJyTma3v0WS0B/AHwELgDOC9wHnAX/awTxN1AMX108MoanpTRJzW\n0x7VY1ffC5P+OyEidqOYd7UWuKnH3amsnGvwF8B7et2XmuxBMYq0J/AnwP+i+IF8cE971aaI2IMi\nKF0CHApcTjF/5w/Ge4zJPlKxhRd/UTSWN3W5L7WKiKsofmj9WWZOxksHfwOsyMxJOcqyE9uBlwBv\ny8wnASJiX+BdwHW97Fg7yjkGZwHzytD3UDmR8WKKyVuT2Rbgt1rWzWDyfyfsThECXw68tuXyzmRx\nE3BJyyXeyWw7xWj5u8rl75V3T/wF8M7edattiwAy8/Jy+XvlnUd/Cbx7PAeY7CMVa4D/EhHNdcwB\nNmfmr3rUpwmLiI9SJPnTJ/FvaX0LcHJEPBMRzwCnA/9nRGzscb/a9RSwpREoSgns06P+TNTBwE9a\nRpEeAvbtUX/qtIYX31Uwh+I9nJQi4iUUl0ZfCRybmY/1uEuVRcTvUUxg/FDT98LvAR8vbzmfjJ6i\nuO2y2WT/Xni4ZV2l74XJHiq+B/w7xczUhqOB5b3pzsRFxKUUKfctk+Ve5114HcVcileXf75McVfB\nq3vZqQlYSjFX5+VN615J8QyEyWgt8PKIaB6tPAD4WY/6U6dlwMHlJbiGo8r1k045t+puYD/gmMz8\nl972qG1PUoyy/CEvfC+sBf4amKwTnpcBryrfo4YDmNzfC623w76CCt8Lk/ryR2ZujojbKJLumRQT\nsd5LcX140ilv87kY+Fvgn8r70wHIzHU961gbMvPnzcvlv0p2ZOak/KGVmT8p/zV1a0ScQzEZcBF9\nfIvif+ArFLf63hwRl1N8cVxU/pns7gd+TvFeXQacSHF9+IxedmoC/pziOTx/Cmxs+l7Ylpkbetar\nijJzFPi1EZaI2A78MjMn6yjS31OEohvKW8yPB/6IYp7SZHQz8GBE/CXFPwRPoqjpD8d7gMk+UgFw\nAfBdigfDfBT460l8yeBEivfkYorEuJZieK11Jrt643Tgp8CDwK3ARzPz73raozZl5kaKZ1LMpXiG\nyIeAD0yCW+F25fk7cMofXidRXPJYQfE8h5NbLl31ux28UNMpFLf9fZUXvhfWUkzYnAzGujtq0t05\nxa9/1p6heBjeARS3Xf7fFPPgWi8h9LPmev6Z4vN2BsVlkNMpHnMw7tGxKTt2TMb3VJIk9ZtBGKmQ\nJEl9wFAhSZJqYaiQJEm1MFRIkqRaGCokSVItDBWSJKkWhgpJklQLQ4UkSaqFoUKSJNXCUCFJkmph\nqJAkSbX4/wGFg+QD2iAF7wAAAABJRU5ErkJggg==\n",
952 | "text/plain": [
953 | ""
954 | ]
955 | },
956 | "metadata": {},
957 | "output_type": "display_data"
958 | },
959 | {
960 | "data": {
961 | "text/html": [
962 | "\n",
963 | "
\n",
964 | " \n",
965 | " \n",
966 | " | \n",
967 | " Number_of_entries_per_person | \n",
968 | " Count | \n",
969 | "
\n",
970 | " \n",
971 | " \n",
972 | " \n",
973 | " | 0 | \n",
974 | " 1 | \n",
975 | " 8023 | \n",
976 | "
\n",
977 | " \n",
978 | " | 1 | \n",
979 | " 2 | \n",
980 | " 2877 | \n",
981 | "
\n",
982 | " \n",
983 | " | 2 | \n",
984 | " 3 | \n",
985 | " 1570 | \n",
986 | "
\n",
987 | " \n",
988 | " | 3 | \n",
989 | " 4 | \n",
990 | " 984 | \n",
991 | "
\n",
992 | " \n",
993 | " | 4 | \n",
994 | " 5 | \n",
995 | " 640 | \n",
996 | "
\n",
997 | " \n",
998 | " | 5 | \n",
999 | " 6 | \n",
1000 | " 391 | \n",
1001 | "
\n",
1002 | " \n",
1003 | " | 6 | \n",
1004 | " 7 | \n",
1005 | " 272 | \n",
1006 | "
\n",
1007 | " \n",
1008 | " | 7 | \n",
1009 | " 8 | \n",
1010 | " 187 | \n",
1011 | "
\n",
1012 | " \n",
1013 | " | 8 | \n",
1014 | " 9 | \n",
1015 | " 163 | \n",
1016 | "
\n",
1017 | " \n",
1018 | " | 9 | \n",
1019 | " 10 | \n",
1020 | " 95 | \n",
1021 | "
\n",
1022 | " \n",
1023 | " | 10 | \n",
1024 | " 11 | \n",
1025 | " 59 | \n",
1026 | "
\n",
1027 | " \n",
1028 | " | 11 | \n",
1029 | " 12 | \n",
1030 | " 25 | \n",
1031 | "
\n",
1032 | " \n",
1033 | " | 12 | \n",
1034 | " 13 | \n",
1035 | " 26 | \n",
1036 | "
\n",
1037 | " \n",
1038 | " | 13 | \n",
1039 | " 14 | \n",
1040 | " 6 | \n",
1041 | "
\n",
1042 | " \n",
1043 | " | 14 | \n",
1044 | " 15 | \n",
1045 | " 5 | \n",
1046 | "
\n",
1047 | " \n",
1048 | " | 15 | \n",
1049 | " 17 | \n",
1050 | " 1 | \n",
1051 | "
\n",
1052 | " \n",
1053 | "
\n",
1054 | "
"
1055 | ],
1056 | "text/plain": [
1057 | " Number_of_entries_per_person Count\n",
1058 | "0 1 8023\n",
1059 | "1 2 2877\n",
1060 | "2 3 1570\n",
1061 | "3 4 984\n",
1062 | "4 5 640\n",
1063 | "5 6 391\n",
1064 | "6 7 272\n",
1065 | "7 8 187\n",
1066 | "8 9 163\n",
1067 | "9 10 95\n",
1068 | "10 11 59\n",
1069 | "11 12 25\n",
1070 | "12 13 26\n",
1071 | "13 14 6\n",
1072 | "14 15 5\n",
1073 | "15 17 1"
1074 | ]
1075 | },
1076 | "execution_count": 11,
1077 | "metadata": {},
1078 | "output_type": "execute_result"
1079 | }
1080 | ],
1081 | "source": [
1082 | "grouped_df = test.groupby('Patient_ID').agg('size').reset_index()\n",
1083 | "grouped_df.columns = [\"Patient_ID\", \"Number_of_entries_per_person\"]\n",
1084 | "grouped_df = grouped_df.groupby('Number_of_entries_per_person').agg('size').reset_index()\n",
1085 | "grouped_df.columns = [\"Number_of_entries_per_person\", \"Count\"]\n",
1086 | "plt.bar(grouped_df.Number_of_entries_per_person, grouped_df.Count, color='r')\n",
1087 | "plt.show()\n",
1088 | "grouped_df"
1089 | ]
1090 | },
1091 | {
1092 | "cell_type": "markdown",
1093 | "metadata": {},
1094 | "source": [
1095 | "Now let us look at the number of health camps in test set and their distribution"
1096 | ]
1097 | },
1098 | {
1099 | "cell_type": "code",
1100 | "execution_count": 54,
1101 | "metadata": {
1102 | "collapsed": false
1103 | },
1104 | "outputs": [
1105 | {
1106 | "name": "stdout",
1107 | "output_type": "stream",
1108 | "text": [
1109 | "Number of Health Camp ID in the test dataset : 21\n"
1110 | ]
1111 | }
1112 | ],
1113 | "source": [
1114 | "print \"Number of Health Camp ID in the test dataset : \", len(test.Health_Camp_ID.unique())"
1115 | ]
1116 | },
1117 | {
1118 | "cell_type": "code",
1119 | "execution_count": 12,
1120 | "metadata": {
1121 | "collapsed": false
1122 | },
1123 | "outputs": [
1124 | {
1125 | "data": {
1126 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAg0AAAFoCAYAAADUycjgAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAAPYQAAD2EBqD+naQAAIABJREFUeJzt3XucVXW9//HXXJoBxKnRSkhIrXP6iHM8NpqK9zTL7FSa\nWolmxywyr5ladOHIKTwieNcS8x5dTmapZFZoxS8lIUHNRPTjKRVQiJLQievEzPz++H43LLZ7hrX2\nXjMs5f18PHiw9/ru9ZnP/u611/7s7/qutet6enoQERER2Zz6LZ2AiIiIvDqoaBAREZFUVDSIiIhI\nKioaREREJBUVDSIiIpKKigYRERFJRUWDiIiIpKKiQURERFJR0SAiIiKpNFa7opndAyxz91Pi/enA\nh4AeoC7+/yF3/3lsPwc4H9gWuB04093XxrZm4FrgGGA1cJm7X15tbiIiIpK/qkYazOx44MiyxaOA\nE4DhwLD4/33x8ccCFwBjgcOA0cCUxLqXAnsC7wZOByaY2THV5CYiIiL9oy7rb0+YWSvwGLAEWODu\np5hZE7AKGOXuf6qwzm+BX7n7xHj/AOBeYHtC4fIicIS7PxDbvwa8x90Pq/qZiYiISK6qGWm4FJgG\nPJlYtivQDTxb/mAzqwf2Bh5ILJ4DNAF7xH+NwOxE+yxg3ypyExERkX6SqWgws8OAg4CJZU27Ah3A\nd81siZn93szeH9veAAwijEwA4O5dwHJgBOEwxovuvj4RbxkwyMy2z5KfiIiI9J/URUOcrDgVON3d\n15U17woMBn4BHAH8HLjbzPYEhsTHlK+zDmiO7ZXaiO0iIiJSAFnOnvhvYJ67/6q8wd2/YWZXufvL\ncdHjZrYX8FlgfFxWXgA0E86UaOyljdieWk9PT09dXV2WVURERCTY7AdolqLh48AOZvaPeL8ZwMyO\nc/eWRMFQ8iSwG+EwxFrCGRVPx3UaCJMglxJGO95oZvXu3h3XHQascfeXMuRHXV0dHR1r6Orq3vyD\ne9HQUE9Ly+Ca4xQ1lnIa+FjKaeBjKaeBj1XEnCSb1tZtNvuYLEXDIcDrEvenEK7FMM7MbgG63P0z\nifZ3Ao+5e4+ZzQUOBO6PbfsDnYSzMOqAfxJOw3wwth8EzM2Q2wZdXd2sX1/7RpZXnKLGUk4DH0s5\nDXws5TTwsYqYk+QnddHg7ouT9+OIQ4+7PxMv7PTDeGrlg8CJwAFAqYi4FrjOzJ4gTIi8Frg+cXGn\nabH9FMLkyPOAk2t5YiIiIpKvXC4j7e53ES7KNB54nHBlyCNKhYa73wZMAr4NzCCcXjkuEeJc4GHg\nN8A1wH/FmCIiIlIQVV9G2t0/VXb/ZuDmPh4/hU2vAplsWwN8Kv4TERGRAtIPVomIiEgqKhpEREQk\nFRUNIiIikoqKBhEREUlFRYOIiIikoqJBREREUlHRICIiIqmoaBAREZFUVDSIiIhIKioaREREJBUV\nDSIiIpKKigYRERFJRUWDiIiIpKKiQURERFJR0SAiIiKpqGgQERGRVFQ0iIiISCoqGkRERCQVFQ0i\nIiKSiooGERERSUVFg4iIiKSiokFERERSaax2RTO7B1jm7qfE++3AVGB3YD5wmrs/knj8GGAiMAy4\nFxjr7ssT7RcDpxAKmZvcfVy1uYmIiEj+qhppMLPjgSMT94cA9wC/BfYEZgP3mNng2L4PcCMwARgN\ntAK3JtY/DxgDHAUcC5xoZudWk5uIiIj0j8wjDWbWCkwBHkosPh5YnRgdOMfMPgB8FJgGnAHc5u7f\njzFOAhaa2U7uvhA4Gxjv7rNj+zjCqMTlWXKbO3cuHR1r6Orqzvq0aGvbnaampszriYiIbC2qOTxx\nKaEQ2DGxbF9gVtnjfgfsFx87GphUanD3581sETDazDqBkcADiXVnATuZ2Q7uvixtYk/ssw9tWZ5J\naT2AGTNpb9+rirVFRES2DpmKBjM7DDiIMG/hukTTcMI8hqRlsOEzfDiwpEL7iNjWU9a+DKiL7amL\nhjZg77QPLrOiyvVERES2FqnnNJhZM2Gi4+nuvq6seQhQvmwd0JyifQiAu3eWtZFYX0RERLawLCMN\n/w3Mc/dfVWhbyys/4JuB1Sna1wKYWVOicCg9djUDpKGhnsbGehoa6jfczyNm0WIpp4GPpZwGPpZy\nGvhYRcxJ8pelaPg4sIOZ/SPebwYws+OAHxBOpUwaBiyNt1/oo/0FwqGIYcCiRFtPYv1+19IymNbW\nbTa5n2fsosVSTgMfSzkNfCzlNPCxipiT5CdL0XAI8LrE/SmED/Zxsa38ugr7AxfG23OAAwmTIjGz\nkYT5CrPdfWmcFHkgofiAMG9iUZZJkLXq6FjDihWraGiop6VlcNVnYSQVMZZyGvhYymngYymngY9V\nxJwkm+QX596kLhrcfXHyfhxx6HH3Z8zsb8AkM7sCuB74HLANcHt8+FRgppnNAeYBVwJ3u/uiRPtk\nMyuNOkwCLkmbWx66urpZv7671/t5xi5CLOU08LGU08DHUk4DH6uIOUl+cjlg5O7/AD4IHEwoCvYB\njnT3NbF9DnAq4eJOs4DlhKs/llwC3AbcAfwI+I67X5VHbiIiIpKPqi8j7e6fKrs/D+j1QgfuPo14\neKJCWzdwfvwnIiIiBaSpqSIiIpKKigYRERFJRUWDiIiIpKKiQURERFJR0SAiIiKpqGgQERGRVFQ0\niIiISCoqGkRERCQVFQ0iIiKSiooGERERSUVFg4iIiKSiokFERERSUdEgIiIiqahoEBERkVRUNIiI\niEgqKhpEREQkFRUNIiIikoqKBhEREUlFRYOIiIikoqJBREREUlHRICIiIqmoaBAREZFUVDSIiIhI\nKo1ZVzCztwPfAg4AlgPfdPdLY9tVwFlAD1AX/z/L3a+N7WOAicAw4F5grLsvT8S+GDiFUMzc5O7j\nqn9qIiIikqdMIw1mVgfcAywD3gl8DhhvZsfHh4wCxgHDCYXBcODmuO4+wI3ABGA00Arcmoh9HjAG\nOAo4FjjRzM6t8nmJiIhIzrKONOwAPAqc7u6rgD+b2a+BA4EfEoqGKe7+1wrrngHc5u7fBzCzk4CF\nZraTuy8EzgbGu/vs2D6OMCpxeRXPS0RERHKWaaTB3f/i7mNiwYCZHQAcDMw0s22BHYGne1l9NHB/\nItbzwCJgtJkNB0YCDyQePwvYycx2yJKjiIiI9I+qJ0Ka2XOEIuBB4A5gN8IchvFmttjM/mBmn0ys\nMhxYUhZmGTAitvWUtS8jzIsYUW2OIiIikp/MEyETjiHMW7gOuBJ4GOgGFgBXA+8Grjezl919OjAE\nWFcWYx3QHNtw986yNmJ7v2toqKexsZ6GhvoN9/OIWbRYymngYymngY+lnAY+VhFzkvxVXTS4+yMA\nZvYF4HtAC/BTd38pPmS+mb0DOA2YDqzllQVAM7A6tmFmTYnCofTY1dXmmEVLy2BaW7fZ5H6esYsW\nSzkNfCzlNPCxlNPAxypiTpKfTEWDmb0Z2C+OHJQsAJqAbd3972WrPAkcGm+/QBiZSBoGLI1tdfH+\nokRbT2zvdx0da1ixYhUNDfW0tAymo2MNXV3dNcUsYizlNPCxlNPAx1JOAx+riDlJNskvzr3JOtKw\nC3CHme3o7n+Jy94F/A34vJnt7+7vTTy+HXgq3p5DOMtiGoCZjSTMV5jt7kvNbFFs/0F8/EHAIndf\nljHHqnR1dbN+fXev9/OMXYRYymngYymngY+lnAY+VhFzkvxkLRrmAvOAW+I1FHYBJgMXEoqCL8fl\ndwFHAJ8gzG0AmEo4y2JOjHElcLe7L0q0Tzaz0qjDJOCSKp+XiIiI5CzrKZfdhIsvrSKcNXE9cJW7\nf9Pd5wHHAZ8EHgfOBMa4+0Nx3TnAqYSLO80iXE3ylET4S4DbCGdi/Aj4jrtfVf1TExERkTxlnggZ\nD0sc10vb3cDdfaw7jXh4okJbN3B+/CciIiIFo/NZREREJBUVDSIiIpKKigYRERFJRUWDiIiIpKKi\nQURERFJR0SAiIiKpqGgQERGRVFQ0iIiISCoqGkRERCQVFQ0iIiKSiooGERERSUVFg4iIiKSiokFE\nRERSUdEgIiIiqahoEBERkVRUNIiIiEgqKhpEREQkFRUNIiIikoqKBhEREUlFRYOIiIikoqJBRERE\nUlHRICIiIqk0Zl3BzN4OfAs4AFgOfNPdL41tOwM3APsBzwFfcPf7EuseDlwBvA2YDYx192cT7ecA\n5wPbArcDZ7r72mqemIiIiOQr00iDmdUB9wDLgHcCnwPGm9nx8SHTgSXAXsD3gDvNbERcdyRwJ3AT\n8C7gReCuROxjgQuAscBhwGhgSrVPTERERPKV9fDEDsCjwOnu/md3/yXwa+BAMzsU2AU41YOLCaMJ\np8R1xwJz3f1Kd38S+BSws5kdHNvPBq5w91+4+8PAqcCnzWxQTc9QREREcpHp8IS7/wUYU7pvZgcA\nBwGnE0YGHik7nDCLcKgCYF/g/kSsNWb2CLCfmc0C9gYmJNadAzQBewC/z5JnLTo7O5k7dwEdHWvo\n6urOvH5b2+40NTX1Q2YiIiJbVuY5DSVm9hwwEvgZcAdwJeHQRNIyYES8PbyP9jcAg5Lt7t5lZstj\n+4AVDfPnP87iww+hrYp1nwCYMZP29r2AfAuQWmKpkBERkTxUXTQAxwDDgKmEyY1DgHVlj1kHNMfb\nfbUPSdzvbf0B00YY9qjGisTtPAuQamOVxxEREalW1UWDuz8CYGbnAt8nTHBsLXtYM7A63l7LKwuA\nZsLn7NrE/d7W71cNDfU0NtZTX1+XSxyA+vq6mgqQjpxiJeM0NGz6fy3yilXEnPKMpZwGPpZyGvhY\nRcxJ8pepaDCzNwP7ufv0xOIFhLkHS4FRZasMi8sBXoj3y9sfJZy6uTbefzr+rQZg+8T6/aqlZTCt\nrdswdGht8y5LcYDCxErGSS7LS16xiphTnrGU08DHUk4DH6uIOUl+so407ALcYWY7xkmREE6f/Cth\n0uMXzazZ3UuHGQ4EHoi358T7AJjZEKAduMDde8xsbmwvTZbcH+gEHsuYY1U6OtawYsUqVq5cy9Ac\n4gCFiZWM09BQT0vL4KrnWSTlFauIOeUZSzkNfCzlNPCxipiTZFP+5bKSrEXDXGAecEs8LLELMBm4\nkPBhvxi41cwmAh8mjKafHNe9GTjfzL5EmDw5AXjG3UtFwrXAdWb2BGFC5LXA9QN1caeurm7Wr++m\nu7snlzhAYWIl4/S1LM/4WzJOUWMpp4GPpZwGPlYRc5L8ZDpg5O7dwFHAKuBB4HrgKnf/Zmz7MOEQ\nwzzgBOBod38+rruQMHnyFOAhwhkTRydi3wZMAr4NzCBc42FcLU9ORERE8pN5ImQ8LHFcL23PAIf2\nse4MYNc+2qegq0CKiIgUkqamioiISCoqGkRERCQVFQ0iIiKSiooGERERSUVFg4iIiKSiokFERERS\nUdEgIiIiqahoEBERkVRUNIiIiEgqKhpEREQklcyXkZZXt87OTubOXVD1r8e1te1OU1NTP2QmIiJF\np6JhKzN//uMsPvwQ2qpY9wmAGTNpb98r56xEROTVQEXDVqiN8Jvl1ViRuK1RCxGRrYuKBqmaRi1E\nRLYuKhqkJnmNWoiISPHp7AkRERFJRUWDiIiIpKKiQURERFJR0SAiIiKpqGgQERGRVFQ0iIiISCoq\nGkRERCQVFQ0iIiKSSuaLO5nZW4CrgUOB1cCPgK+4e6eZXQWcBfQAdfH/s9z92rjuGGAiMAy4Fxjr\n7ssTsS8GTiEUMze5+7ganpuIiIjkqJqRhp8Ag4ADgOOBDxEKAYBRwDhgOKEwGA7cDGBm+wA3AhOA\n0UArcGspqJmdB4wBjgKOBU40s3OryE9ERET6QaaRBjMzYB9gB3d/MS67ALiEUCyMAqa4+18rrH4G\ncJu7fz+udxKw0Mx2cveFwNnAeHefHdvHEYqRy6t6ZiIiIpKrrCMNfwGOLBUMUR3wejPbFtgReLqX\ndUcD95fuuPvzwCJgtJkNB0YCDyQePwvYycx2yJijiIiI9INMIw3u/jJhLgIAZlYHnAn8ijDK0AOM\nN7MjgeXA5e4+LT58OLCkLOQyYERs6ylrX0YoSEbE2yIiIrIF1forl5cA7yT80OG7gG5gAWGi5LuB\n683sZXefDgwB1pWtvw5ojm24e2dZG7G93zU01NPYWE99fV0ucYDCxCpiTslYDQ31G+7XqoixlNPA\nx1JOAx+riDlJ/qouGsxsMmEewsfcfQGwwMx+6u4vxYfMN7N3AKcB04G1vLIAaCacgbE2xmxKFA6l\nx66uNscsWloG09q6DUOHDsolDlCYWEXMqTxW6X5eihhLOQ18LOU08LGKmJPkp6qiwcyuAU4FTnT3\nu0rLEwVDyZOEUzMBXiCcUZE0DFga2+ri/UWJtp7Y3u86OtawYsUqVq5cy9Ac4gCFiVXEnJKxGhrq\naWkZTEfHGrq6umuISCFjKaeBj6WcBj5WEXOSbJJf4npTzXUaJgCfBT7u7ncmln8d2N/d35t4eDvw\nVLw9BzgQmBYfP5IwX2G2uy81s0Wx/Qfx8QcBi9x9QOYzdHV1s359N93dPbnEAQoTq4g5lceqdD/P\n2EWIpZwGPpZyGvhYRcxJ8pP1lMtRwHjgIuDBsjMb7ga+HK+tcBdwBPAJwtwGgKnATDObA8wDrgTu\ndvdFifbJZlYadZhEmDMhr3GdnZ3Mnbug6m8VbW2709TU1A+ZiYhIUtaRhg8TTtMcH/9BvPKjuzeY\n2XGEaytMBJ4Dxrj7QwDuPsfMTo1trcAMwohFySXAm4A7gC7gBne/qponJa8u8+c/zuLDD6GtinWf\nAJgxk/b2vXLOSkREymU95XIyMLmP9rsJIw69tU8jHp6o0NYNnB//yVamjXAKTjVWJG5r1EJEpP/U\nesqlSKFo1EJEpP+oaJDXnLxGLUREZFO6coaIiIikoqJBREREUlHRICIiIqmoaBAREZFUVDSIiIhI\nKioaREREJBUVDSIiIpKKigYRERFJRUWDiIiIpKKiQURERFJR0SAiIiKpqGgQERGRVFQ0iIiISCr6\nlUuRXnR2djJ37gI6OtbQ1dWdad22tt1pamrqp8xERLYMFQ0ivZg//3EWH34IbRnXewJgxkza2/fq\nh6xERLYcFQ0ifWgD9q5ivRV5JyIiUgCa0yAiIiKpqGgQERGRVFQ0iIiISCoqGkRERCSVTBMhzewt\nwNXAocBq4EfAV9y908x2Bm4A9gOeA77g7vcl1j0cuAJ4GzAbGOvuzybazwHOB7YFbgfOdPe1VT8z\nERERyVXWkYafAIOAA4DjgQ8BE2PbdGAJsBfwPeBOMxsBYGYjgTuBm4B3AS8Cd5WCmtmxwAXAWOAw\nYDQwpapnJCIiIv0iddFgZgbsA5zs7k+5++8IH/QnmNmhwC7AqR5cTBhNOCWuPhaY6+5XuvuTwKeA\nnc3s4Nh+NnCFu//C3R8GTgU+bWaD8niSIiIiUrssIw1/AY509xfLlr+eMDLwSNnhhFmEQxUA+wL3\nlxrcfQ3wCLCfmdUTToV/ILHuHKAJ2CNDfiIiItKPUhcN7v6yu99bum9mdcCZwK+B4YRDE0nLgBHx\ndl/tbyAc8tjQ7u5dwPLE+iIiIrKF1XL2xCVAO/A1YAiwrqx9HdAcb/fVPiRxv7f1RUREZAur6jLS\nZjaZMA/hY+6+wMzWAtuVPayZcIYFwFpeWQA0E662uzZxv7f1+11DQz2NjfXU19flEgcoTKwi5pSM\nVcScao2VjNPQsOn/teaXR6wi5pRnLOU08LGKmJPkL3PRYGbXECYqnujupTMgXgB2K3voMGBpon1Y\nhfZHCYch1sb7T8e/0QBsn1i/37W0DKa1dRuGDq1t7mUpDlCYWEXMKRmriDnVGisZJ/xa5tyqY+2x\nxx6v+MXMlpbBVcfrjzhFjaWcBj5WEXOS/GS9TsME4LPAx939zkTTHGCcmTW7e+kww4FsnNw4J94v\nxRlCOLRxgbv3mNnc2F6aLLk/0Ak8lvH5VK2jYw0rVqxi5cq1DM0hDlCYWEXMKRmriDnVGisZ5w9/\neISFhx2c+dcyIfxiZsevfsuee4ZfzGxoqKelZXBVP9edlFecosZSTgMfq4g5STalLzp9SV00mNko\nYDxwEfCgme2QaP4tsBi41cwmAh8mnBFxcmy/GTjfzL4E/AyYADzj7qUi4VrgOjN7gjAh8lrg+oG8\nuFNXVzfr13fT3d2TSxygMLGKmFMyVhFzqjVWeZxqfy0TYEUiVqX4tcgrTlFjKaeBj1XEnCQ/WQ4Y\nfTg+fjzhg30J4fDBEnfvBo4mHGKYB5wAHO3uzwO4+0LgGMJ1Gx4inDFxdCmwu98GTAK+DcwgXONh\nXC1PTERERPKVeqTB3ScDk/to/zPh8tK9tc8Adu2jfQq6CqSIiEhhaWqqiIiIpKKiQURERFJR0SAi\nIiKpqGgQERGRVFQ0iIiISCoqGkRERCQVFQ0iIiKSiooGERERSaWqX7kUkS0j/PjVgqqvyd/Wtvsr\nfvxKRCQtFQ0iryLz5z/O4sMPqfrHr5gxk/b2vXLOSkS2FioaRF5lavrxqzwTEZGtjuY0iIiISCoq\nGkRERCQVFQ0iIiKSiooGERERSUUTIUW2Qjp1U0SqoaJBZCukUzdFpBoqGkS2Ujp1U0Sy0pwGERER\nSUVFg4iIiKSiokFERERSUdEgIiIiqahoEBERkVSqPnvCzJqBecAZ7n5/XHYVcBbQA9TF/89y92tj\n+xhgIjAMuBcY6+7LEzEvBk4hFDM3ufu4avMTERGRfFU10hALhv8FditrGgWMA4YTCoPhwM1xnX2A\nG4EJwGigFbg1EfM8YAxwFHAscKKZnVtNfiIiIpK/zCMNZjYK+EEvzaOAKe7+1wptZwC3ufv3Y5yT\ngIVmtpO7LwTOBsa7++zYPo4wKnF51hxFREQkf9WMNBwC/BrYj3AIAgAz2xbYEXi6l/VGA/eX7rj7\n88AiYLSZDQdGAg8kHj8L2MnMdqgiRxEREclZ5pEGd7+udNvMkk2jCHMYxpvZkcBy4HJ3nxbbhwNL\nysItA0bEtp6y9mWEomREvC0iIiJbUJ6Xkd4V6AYWAFcD7wauN7OX3X06MARYV7bOOqA5tuHunWVt\nxPZ+19BQT2NjPfX1dZt/cIo4QGFiFTGnZKwi5lRrrCLmlIyVZ04NDZv+X2vcPGIpp4GPVcScJH+5\nFQ3uPs3MfuruL8VF883sHcBpwHRgLa8sAJqB1bENM2tKFA6lx67OK8e+tLQMprV1G4YOHZRLHKAw\nsYqYUzJWEXOqNVYRc0rGyjOn5LK85BVLOQ18rCLmJPnJ9QerEgVDyZPAofH2C4QzKpKGAUtjW128\nvyjR1hPb+11HxxpWrFjFypVrGZpDHKAwsYqYUzJWEXOqNVYRc0rGyjOnhoZ6WloGV/0z20l5xVJO\nAx+riDlJNuVfBCrJrWgws68D+7v7exOL24Gn4u05wIHAtPj4kYT5CrPdfamZLYrtpTMzDgIWufuA\nzGfo6upm/fpuurt7cokDFCZWEXNKxipiTrXGKmJOyVh55tTXsjzjb8k4ecYqYk55xipiTpKfPEca\n7ga+HK+tcBdwBPAJwtwGgKnATDObQ7go1JXA3e6+KNE+2cxKow6TgEtyzE9ERERqUGvRsOHrirvP\nM7PjCNdWmAg8B4xx94di+xwzOzW2tQIzgM8mYl0CvAm4A+gCbnD3q2rMT0T6WWdnJ3PnLqh6KLmt\nbXeampr6ITMRyVtNRYO7N5Tdv5sw4tDb46cRD09UaOsGzo//RORVYv78x1l8+CG0VbHuEwAzZtLe\nvlfOWYlIf8h1IqSIbJ3agL2rXHdFnomISL/SSbAiIiKSiooGERERSUVFg4iIiKSiokFERERSUdEg\nIiIiqahoEBERkVRUNIiIiEgqKhpEREQkFRUNIiIikoqKBhEREUlFRYOIiIikoqJBREREUlHRICIi\nIqmoaBAREZFUVDSIiIhIKioaREREJBUVDSIiIpKKigYRERFJRUWDiIiIpKKiQURERFJR0SAiIiKp\nNFa7opk1A/OAM9z9/rhsZ+AGYD/gOeAL7n5fYp3DgSuAtwGzgbHu/myi/RzgfGBb4HbgTHdfW22O\nIiIikp+qRhpiwfC/wG5lTXcBS4C9gO8Bd5rZiLjOSOBO4CbgXcCL8fGlmMcCFwBjgcOA0cCUavIT\nERGR/GUuGsxsFDAH2KVs+WGEEYRTPbiYMJpwSnzIWGCuu1/p7k8CnwJ2NrODY/vZwBXu/gt3fxg4\nFfi0mQ2q5omJiIhIvqoZaTgE+DXhEERdYvm+wCNlhxNmxceV2u8vNbj7GuARYD8zqwf2Bh5IrDsH\naAL2qCJHERERyVnmOQ3ufl3ptpklm4YTDk0kLQNGpGh/AzAo2e7uXWa2PLb/PmueIiIikq+qJ0JW\nMARYV7ZsHdCcon1I4n5v6/erhoZ6Ghvrqa+v2/yDU8QBChOriDklYxUxp1pjFTGnZKwi5lS6nfy/\nlph5xHmt55RnrCLmJPnLs2hYC2xXtqwZWJ1oLy8AmoEVsY1e2lczAFpaBtPaug1Dh9Y2haIUByhM\nrCLmlIxVxJxqjVXEnJKxiphT+bI85BUnz1hFzCnPWEXMSfKTZ9HwAq88m2IYsDTRPqxC+6PAckLh\nMAx4GsDMGoDtE+v3q46ONaxYsYqVK9cyNIc4QGFiFTGnZKwi5lRrrCLmlIxVxJwgfLNsaRlMR8ca\nurq6q46ZV5zXek55xipiTpJNefFeSZ5FwxxgnJk1u3vpMMOBbJzcOCfeB8DMhgDtwAXu3mNmc2N7\nabLk/kAn8FiOOfaqq6ub9eu76e7uySUOUJhYRcwpGauIOdUaq4g5JWMVMafNLcsr9paOVcSc8oxV\nxJwkP3kWDb8FFgO3mtlE4MOEMyJOju03A+eb2ZeAnwETgGdKF4YCrgWuM7MnCBMirwWu18WdRERE\niqHWWSYbvmK4ezdwFOEQwzzgBOBod38+ti8EjiFct+EhwhkTRyfWvw2YBHwbmEG4xsO4GvMTERGR\nnNQ00uDuDWX3nwEO7ePxM4Bd+2ifgq4CKSIiUkg6n0VERERSUdEgIiIiqahoEBERkVRUNIiIiEgq\nKhpERESveUOkAAAXe0lEQVQkFRUNIiIikoqKBhEREUlFRYOIiIikoqJBREREUlHRICIiIqmoaBAR\nEZFUVDSIiIhIKioaREREJBUVDSIiIpKKigYRERFJpXFLJyAiUtLZ2cncuQvo6FhDV1d3pnXb2nan\nqampnzITEVDRICIFMn/+4yw+/BDaMq73BMCMmbS379UPWYlIiYoGESmUNmDvKtZbkXciIvIKKhpE\n5DWnlsMcoEMdIr1R0SAirznVHuYAHeoQ6YuKBhF5Tar2MAfoUIdIb3TKpYiIiKSiokFERERSyfXw\nhJkdDdwB9AB18f+fuPvHzKwdmArsDswHTnP3RxLrjgEmAsOAe4Gx7r48z/xERESkenmPNOwG/JTw\nwT8MGA58xsyGAPcAvwX2BGYD95jZYAAz2we4EZgAjAZagVtzzk1ERERqkPdEyFHAfHf/W3KhmZ0C\nrHb3cXHROWb2AeCjwDTgDOA2d/9+fPxJwEIz28ndF+aco4iIiFShP0Yanq6wfF9gVtmy3wH7xduj\ngftLDe7+PLAoLhcREZECyHukwYD3m9nXgAbgR4RDDsMJ8xiSlsGG06iHA0sqtI/IOT8RERGpUm5F\ng5m9FRgMrCEcdtgFuBoYEv+tK1tlHdAcb2+uvd81NNTT2FhPfX1dLnGAwsQqYk7JWEXMqdZYRcwp\nGauIOdUaq79yamjY9P9aYuYRp6ixipiT5C+3osHdF5nZ9u7+Ulz0RzNrAL4HzOSVBUAzsDreXruZ\n9n7X0jKY1tZtGDp0UC5xgMLEKmJOyVhFzKnWWEXMKRmriDnVGqu/ckouy0NecYoaq4g5SX5yPTyR\nKBhKngQGAX8hnE2RNAxYGm+/sJn2ftfRsYYVK1axcuVahuYQByhMrCLmlIxVxJxqjVXEnJKxiphT\nrbH6K6eGhnpaWgZX/TsWJXnFKWqsIuYk2ZQXypXkeXjifcAPgBHuvjYubgdeBB4AvlK2yv7AhfH2\nHOBAwpkUmNlIwnyGOXnltzldXd2sX99Nd3dPLnGAwsQqYk7JWEXMqdZYRcwpGauIOdUaq79y6mtZ\nXrFfS7GKmJPkJ8+RhgcJhxNuNLNvAG8HpgCTgZ8Ak83sCuB64HPANsDtcd2pwEwzmwPMA64E7tbp\nliIiIsWR2ywTd18JHAG8CZgL3ABc5+6Xufs/gP8ADiYUBfsAR7r7mrjuHOBUwpkWs4DlwCl55SYi\nIiK1y3tOw5OEwqFS2zyg19+adfdpxMMTIiIiUjw6n0VERERSyfviTiIirymdnZ3Mnbug6pn8bW27\n09TUlFsckS1JRYOISB/mz3+cxYcfsuHytVk8ATBjJu3te+UWR2RLUtEgIrIZbcDeVa67oh/iiGwp\nmtMgIiIiqahoEBERkVRUNIiIiEgqKhpEREQkFRUNIiIikoqKBhEREUlFRYOIiIikoqJBREREUlHR\nICIiIqmoaBAREZFUVDSIiIhIKvrtCRGRVxn9YqZsKSoaREReZfL8xcxaChAVH1sfFQ0iIq9Cef1i\nZrUFiH6ue+ukokFEZCtXbQGin+ve+mgipIiIiKSikQYREalZnpMzNdGzuFQ0iIhIzfKcnJlnLMlX\noYoGM2sGrgWOAVYDl7n75Vs2KxERSSOvyZl5x5L8FG1Ow6XAnsC7gdOBCWZ2zBbNSERERIACFQ1m\nNgT4NHC2uz/m7tOBKcCZWzYzERERgQIVDcAehMMlsxPLZgH7bpl0REREJKlIRcNw4EV3X59YtgwY\nZGbbb6GcREREJCrSRMghwLqyZaX7zWkCPFHlH34CGNlQT2NjPfX1dbnEAQoRq4g5lccqYk61xCpi\nTuWxiphTLbGKmFN5rCLmVEusIuZUKZbkq66np2dL5wCAmR0HXO3ub0ks25WwDWzv7i9tseRERESk\nUIcnXgDeaGbJnIYBa1QwiIiIbHlFKhr+APwTGJ1YdhAwd8ukIyIiIkmFOTwBYGZTgQOAU4ARwK3A\nye5+15bMS0RERIo1ERLgXMIVIX8DvAz8lwoGERGRYijUSIOIiIgUV5HmNIiIiEiBqWgQERGRVFQ0\niIiISCoqGkRERCQVFQ0iIiKSStFOuQTAzJqAK4AxhN+fuNndvxbbpgMfAnqAuvj/h9z957H9DOBL\nwBuAGcAZwH/HWJ3Ai8BOwArgJeDfKsUys2bgUuBjcflPge54f0NOZtYA/B3YNmWcu2J+k2JO3cAa\nYDtgEbAaaO/t+SX66EvAacDPE/30U8CAveLDtqkyp/XAP4A3xv5aB+xaHotwie9ny5bXxb99F/Du\nLH1e4fW7L65zLPA6YGjsr7rE3yHGSD6/N8TXJJnPi8DtZN8OKsVaG/8lt4PtgCVAU5X9tB5YCWwP\nPEfYDvatMlaqbaqPWHWE138V6bfzNH3+EvAW4G/xdsXtPEWf3wH8Kxt/AbdiTrDZ/UFX7JvtgOfj\n892zin6qI/22OQS4CvhIfNyPgXHA/7Dpe+/NwHLCNrFbhpxg021qs+/jmFdTzOF4wn7jt4TT3j8c\n15tFeB/2sPHLZk/i/772UUbYn38KGJxYj0TOpb7rM5a775JYdg5wednzuczdv4T0m0IWDcDVhI3+\nvUALcJuZPefuNwCjgBMI13IoWQFgZh8HpgAnAk8DNwMPEnYO7yVcA6IdmAgsIOzQJhHexJvEIuxY\nDgLeT3iT3BeXb5ITYSffEmOdmSLONMIbsAk4GphO2GlNJOzgvwdMAL5dIRbxeb4NuIDwRntPzGk7\n4JfAr4B/J/zE+KD4dx/PkNMH43N5C3Ah8DDws9iXXy3LaT3hUt9JV8ScdiNjn1d4/WbG53AY8Cbg\nu3Hd7wG/J/T9RTG35PPbjbCTbGPjTmlKoq+ybAflsaYQLkB2PBu3g78SCrBmwjZwe8Z++gDhg3BH\nQp8/S3hNxgM3ZoyVZZuqFOt3hAurvYfwQZh2O99cn19DKGYvJFwy/sZenl+lWMk+3x74BfAY4eqx\nvyFstx+OyzbE2cz+4EjgTsJ29Q3CL+reRNjGS9tT2n7anvTb5lWEwuTweP+WGKsh9tOdhPfeROAR\n4J7Yd/+TIqfkdpDlfUzsg6MIhcuLhH3JDsCBhNf9bkL/vg+YA0wm7GtWlsUBNtlH/ZVN9+dvBq6P\nfXUP4f33HeDLFXKqFCtpN+BbMffSNrcK6VeFKxrMrJVwRcjD3P3huOxSYF8z+w6wCzDP3cs3IIjf\nlksXhDKzrxM2zIMJl6huJ3zD3omw0fcA2/YS60jgend/NObUAjxXltN7CTszgCWbixPXuzX+7YMJ\nb47phDfOOwhV8/eBIb3EKpkK/BHYD/iMuz9sZu8kvHGWEHbK2wNPAXu4+68z5LQt4dvA14F/AS6J\ncffoJacNy8xsf8K3kXrgOLL3+YbXL/b5dsBC4BF37zGzSwg7ipeAtxK2g4srxBkFPO3uf4t5tRIK\nzcOqyGlDrGScxHZwO2EH/0x8/B+r6KcRhF95nQjsHHP6DrCvu1+UMVbWbSoZ6wjCaz7G3efGZWm3\n8776fAXhA34SG/v8RqA9a5/HH7brJGz/fyZ8i76NcOXYE8vi9LU/WEf4ULwgPufLCUVDb9vB5vop\n7ba5DjjT3f8Q1/tf4OKY0wuEAmI+8DbCNloHbFfFNpX1ffyfwFnuPiv2+VsInw8vxX73GPvl+Px+\n4+7PVIhTMpVQ9LyVjfvz38Vc/zX21aiY00Up9nePEN4nSaOA75S2ORkYhSsaCJXtS+4+q7TA3acA\nmNnuhG/Xz5avZGbbEj4MPplY3Ai8GN8IZwGPuftXE7HWA1/oJY/lwHFm9gPgiPh3N8nJzH5FqJon\nUFYd9xIH4BPAusTzOznmU0cYOoUwvFiRmX2SsDP4PeFDpRTn7zHHRwnDgT2ED6BHsuRkZjsAR7v7\n/fHv7R5jpfl58knAvcDorH1e4fU7EFjh7v9SekxiO/ggYQd2RnmcaDfCN8uSDdtUFdtBMtYrtk1C\nX04i/HbKz4HFvcRJKu+nLwJPufuFiZy6gd2zxorLTo5xUm1TCZcSfiDuh6UFGbbzvvp8DLAocYhx\nd8K3/X9LEau8z3chvH4nJ/rpd8AXkwFS7A/eStiu/wGMJRQmdUB5gV1JxX6Kf7fPbdPdN4zSmNlO\nhA/UtYnnNybRflLM6acpcirfpt5Myvdx3E5OZOO+4kCgg1C0vz7x0CsI+5aK++BEvNI+6iZCgfiK\n/bmZvYNQHK3PEGtCWfMoNt3mZAAU7oqQZvZ5wgZ8DWEYrYkwhPc/wEcJw1H3EYa7FgMT3P2XZrYH\nYaM/nPCNbRfCccpG4Mr4rxl4gLDhNxHeEHeXx4p57EkYMh5JeOOuAT4PnBfXfYwwtHkNYUjyKcK3\nnr7iACwF/kIYpiw9v+8AXyMct19HGKKslNMb4999b1z3OMIOrxTnT3G9hvjvjzHHanIq9fkJhKHy\np2N/bRIr8bodQBgqvpBwzPWaLH1e4fX7N8KowNcIH+jJnO4jHF75SS/99PPYly2EIf9lhB355Vly\nqhDLCB92EwgfCsmcTiNsm9OBfTL20+T4On2djdv5LTH+3zLGyrRNlcX6LeFQ1uWJOGm38776/NLY\nxxcmnt/1hGPnr3h+m+nz7eLr18rG/cFfCR8gc0m/P/gqYaj89YQP0wbCe3x6lf2UattMxLmVUND8\nnXAI6YqyWJ8hfEv/Z4pYfW0Hmd7HMd7nCfMstmHjPJ0RiTx2IGwHreVxyvZRewOXEUbhynPaiXC4\naiQV9ue9xJrg7m+LbW8m7LO+E9ddA9zk7peVPx/JVxHPnhhKGFYdS/jGdB5wFnAOYRLPYMIxzSMI\n3+zujh+CQwkf7t8kVN3HEXZe/x5jPRDjH0r4wHworrOsQiwIE60WEjbIWwjH1ifGnErH/2bFnOqB\n/0sR532EN83uZc/vDMKb6sfx73T3EusK4BZ3XxAf97pEnC8ShjjnEHbInYQd7lerzKnU519O9Gul\nnErGxn7tZuPrl6XPy1+/HxJ2SheV5fQlwk65m8rbAfE12ZZQ5H2M8EFTen5Zt4NkrNsIO9xJFfrp\n7fHx/6+KfvoU4fX6MuH3Vw5j44SxrLFKeaXdppKxniB8uJbiZNnO++rzO+N6yfdxM+H1ztrnXyQU\nE78gjEhsQ3if/5Ns+4NPEz6Q1hGKiF8SDhE9U0U/Zdk2Sy4mHLLpSOSUjPVjwmGl1xEONVa7HWR9\nH0P4gB5O+FA+OfZfE+GQyZ2EbXUHwpyGvvZRELaZ8pzOBk4iFF697c8rxUoqTehcSpi/cREwPhY8\n0o+KWDSsJ+wwTnD338fjkRcBp7r7N4Ad3f277v64u3+dsMF9Nq4H4RjmPe4+m/DGayTsfJYSvrF8\nDfigux9DmBA3qjxWHNq8ETjP3R9g4xDYGwkz299PGMb8SMzpQcKQaZ9x4tyCHxPegGeXPb8PuvvH\nCJPD9q8Q632EOQwTYy7dhNfvBHf/PeED9iVgB3c/nfAtahrw0SpzmkSYMLkzcLi7f6tCnwNgYWb9\nUYTJYBtevyx9Xv76EYYs64jzGhL9dDZhiHxYL9sBhA+TQ9x9jrs/GPNqIBzXzZLTJrEIHyh1hB1d\nXXLbJEz26gbuyNpP7j6DcDy6m/Ct/LD4Or9URazU21SFWA8n45ByO0/R538iHAZIvo9PBFZW0efT\nCBM6D4+v4RLCZMeXM+4PRhHeI18lzNb/AOFXdduz9lPGbRMAd3/K3R8ijN40AFdVeO1OIsw1OBh4\nssrtIPX7OMY7Gvg44QP5PTHOjcBXgDe4+xmECZifJWyz36D3fVTpjIbynO4BGt39P+llf95LrGT/\n3Q+80d2/6u5PuPv34mNPQ/pVEYuGpYRjfM8nljlxKN3dXy57/JOEbxBLE48tKZ010Bjbnyd8SxiZ\naB+ZeHwp1q6Ebx1/TOZE6K+RhDfVQYCZ2T+A/YGvmdnjm4kDYYcD0GhmO8U3x4bnRxhSfWOFnI4n\nDBG+GP/m0bF9QRye3JMwZJjsp0cJw4CpcyI8qUbC0G8LcKS7J89UScYq2T+u+ys2ff2y9Hn561fq\ncxKPd8KO/u4+tgPcfa27r0u0/Sn+35Qxp/JYpZyWJ56/l61bMaeot34iDsl+DFjt7u8gfMt7Lmus\njNtUeawH2PS9l3Y731yfv0D4oEnm1MLG17yvWJX6fDphhGCEhzkvKxJ9lXZ/sCfhPZDM6SE2vl82\nySnqrZ9Kf6fPbdPMGs3sI2Y2NNFW6qceM3uzmR1VltM8Qh+2bCanV2xTWd/HZnY88CPCoY41Zc/v\nD2y6b3mSMIK1Hb3vo6YSvszUs2m/DieedtnH+7hSrJ3MrCPu73D38vk15X0j/aCIRcNsYJCZ/Uti\n2W7Ac2Z2i5ndWPb4dxKq8EWEbx17JNpejP/XEYbtdyZMjnrOzG4hDN09Vx4rxqmLf3dDTvH2s4TZ\nyJcRhlx/STguOZVw6lxfcZI5QRievI0wjFvKaWJcN5nTU4Shz93i89uDMHQH4VSzh+PfekdZP41i\n4ySjtDkB3EDYqT4LfLqXPn8qcX8fwmzxTjZ9/VL3eYXXr9Tn3YRDKcScuwgfYhW3AzPb1syWm9nB\nibbSLO9VWXKqEKuU0xsTz3+3uG5pdnrmfjKzXeNkww3bOeGb3cyssci2TW0Si3AYIvneS7Wdp+zz\nnWJupZy+HJezmVib9LmZ7UoY1XkOuDhuB/+R6Ku0+4MlhP5O9vmZbDopL20/QYptk/BBOS3mW/Jc\nop92IRxm2DeR01Tgb+7+975yqrAdQIb3sZm9J+Z2deyHDXFiEfpjYGFi39IOLHf35fS+j7qAUPx2\ns+kE2j2AZX3sz3uL9UK8Pc/MPm1mT5at217WN9IPCjcREsDMfkqoYE8nVKXTCMNgLxCOdY8lDJWe\nSNi4Rrn7YjM7j3DM7JOECVbfJkwmeoZwjPfHhGp5MuHb9vmEneLUCrHuIUwA+xwbr9PwT8KwaHlO\nP2HjBZI2F2cq4VvXwpjrdMIx28mEYbxzCROsrk3E2i3uBJN99J/xMY/GftqDMCnoPsKO5xrCm3Us\nYWeUNqdpwHWE4eRJhG8v3yZMRry3Uk5x57YmHhZJvn5Z+7z89ZsZn8NhiT7fnvCBOrGP7eCu+Lp/\nNq5/JaGg+lMVOZXHKl2v49Cy7aCTMJfkC4RJlVn66RzCh/KgmNOuhMM7nycM5WaJlXmbSsbazHuv\nr+28rz4/nXBI7J2E4ur18XW4APhBxj5/a3x+M2Iu34p9/z7CNQrS7g8mEo7P98R+ao2vw2TCh201\n/ZRm27yUcGGnTxJGUK4hFLGlfdRthPfr5Pic/yvxGmTZDtK+j0cRCqhnCCMcJ8W/O41wQazTCO+V\nOwhfTm6Nr+3KGH9teU4lcR81gXCWRKmvdiTMXbgk9k+l/XmvsXzjRMi3EkaKboh57E14/37G3X+C\n9JsinnIJG8+eeIBwxbZr3P1bAGZ2OuEY7UjCEPMR7r4YwN0vs3C1w+8SJkhNJ7xBJwH3EzbwZwg7\nkhcJH7IfImzMm8Qi7LQvI+y0ewgXShncS05PA4cQhj83F+dOwjHGSwhv3nWE4fJzCTu2mwkTe05L\nxNrkDZTwV8JOuZTT9YRvmFNiWz1hR5Elp2sI38S2ZeMFZeoIZ1as7yWnNxOGL0tKr1+mPq/w+t1D\n+PZWen43E3YqdxGGqytuB4TjwKXn1xwfPyY+n6zbQXmsn8X+2GQ7sHD6XA9h1OKijP30a0JB+nJ8\nfvMJM+HPIEyYyxKrmm0qGauv915f23lfff4AYXb7nwgfzEsJO/pPED5ks/b59Wycmb+QMFnwvvI4\nm9kf/CjGWkXYDhbFnD5CmJuQtZ/SbptfJRRCtxMK1p8AxxBe59K2WZo0uopQ4I4u6/M020Ha9/Fi\nM9uXcChgBKGAKD2WmNM/CBMod419t5Lwvjynl5zKJftqLeF9coO7/18v+/O+YgHg7ovM7AOEfdZp\nhInMX1LB0P8KOdIgIiIixVPEOQ0iIiJSQCoaREREJBUVDSIiIpKKigYRERFJRUWDiIiIpKKiQURE\nRFJR0SAiIiKpqGgQERGRVFQ0iIiISCoqGkRERCQVFQ0iIiKSyv8HWaNpi/E0QfYAAAAASUVORK5C\nYII=\n",
1127 | "text/plain": [
1128 | ""
1129 | ]
1130 | },
1131 | "metadata": {},
1132 | "output_type": "display_data"
1133 | },
1134 | {
1135 | "data": {
1136 | "text/html": [
1137 | "\n",
1138 | "
\n",
1139 | " \n",
1140 | " \n",
1141 | " | \n",
1142 | " Health_Camp_ID | \n",
1143 | " Count | \n",
1144 | "
\n",
1145 | " \n",
1146 | " \n",
1147 | " \n",
1148 | " | 0 | \n",
1149 | " 6566 | \n",
1150 | " 4061 | \n",
1151 | "
\n",
1152 | " \n",
1153 | " | 1 | \n",
1154 | " 6584 | \n",
1155 | " 3041 | \n",
1156 | "
\n",
1157 | " \n",
1158 | " | 2 | \n",
1159 | " 6583 | \n",
1160 | " 3026 | \n",
1161 | "
\n",
1162 | " \n",
1163 | " | 3 | \n",
1164 | " 6548 | \n",
1165 | " 3020 | \n",
1166 | "
\n",
1167 | " \n",
1168 | " | 4 | \n",
1169 | " 6582 | \n",
1170 | " 2763 | \n",
1171 | "
\n",
1172 | " \n",
1173 | " | 5 | \n",
1174 | " 6576 | \n",
1175 | " 2739 | \n",
1176 | "
\n",
1177 | " \n",
1178 | " | 6 | \n",
1179 | " 6567 | \n",
1180 | " 2441 | \n",
1181 | "
\n",
1182 | " \n",
1183 | " | 7 | \n",
1184 | " 6556 | \n",
1185 | " 2419 | \n",
1186 | "
\n",
1187 | " \n",
1188 | " | 8 | \n",
1189 | " 6551 | \n",
1190 | " 2180 | \n",
1191 | "
\n",
1192 | " \n",
1193 | " | 9 | \n",
1194 | " 6579 | \n",
1195 | " 1981 | \n",
1196 | "
\n",
1197 | " \n",
1198 | " | 10 | \n",
1199 | " 6573 | \n",
1200 | " 1795 | \n",
1201 | "
\n",
1202 | " \n",
1203 | " | 11 | \n",
1204 | " 6574 | \n",
1205 | " 1691 | \n",
1206 | "
\n",
1207 | " \n",
1208 | " | 12 | \n",
1209 | " 6550 | \n",
1210 | " 1425 | \n",
1211 | "
\n",
1212 | " \n",
1213 | " | 13 | \n",
1214 | " 6559 | \n",
1215 | " 692 | \n",
1216 | "
\n",
1217 | " \n",
1218 | " | 14 | \n",
1219 | " 6568 | \n",
1220 | " 643 | \n",
1221 | "
\n",
1222 | " \n",
1223 | " | 15 | \n",
1224 | " 6577 | \n",
1225 | " 385 | \n",
1226 | "
\n",
1227 | " \n",
1228 | " | 16 | \n",
1229 | " 6533 | \n",
1230 | " 377 | \n",
1231 | "
\n",
1232 | " \n",
1233 | " | 17 | \n",
1234 | " 6572 | \n",
1235 | " 184 | \n",
1236 | "
\n",
1237 | " \n",
1238 | " | 18 | \n",
1239 | " 6525 | \n",
1240 | " 166 | \n",
1241 | "
\n",
1242 | " \n",
1243 | " | 19 | \n",
1244 | " 6547 | \n",
1245 | " 111 | \n",
1246 | "
\n",
1247 | " \n",
1248 | " | 20 | \n",
1249 | " 6545 | \n",
1250 | " 109 | \n",
1251 | "
\n",
1252 | " \n",
1253 | "
\n",
1254 | "
"
1255 | ],
1256 | "text/plain": [
1257 | " Health_Camp_ID Count\n",
1258 | "0 6566 4061\n",
1259 | "1 6584 3041\n",
1260 | "2 6583 3026\n",
1261 | "3 6548 3020\n",
1262 | "4 6582 2763\n",
1263 | "5 6576 2739\n",
1264 | "6 6567 2441\n",
1265 | "7 6556 2419\n",
1266 | "8 6551 2180\n",
1267 | "9 6579 1981\n",
1268 | "10 6573 1795\n",
1269 | "11 6574 1691\n",
1270 | "12 6550 1425\n",
1271 | "13 6559 692\n",
1272 | "14 6568 643\n",
1273 | "15 6577 385\n",
1274 | "16 6533 377\n",
1275 | "17 6572 184\n",
1276 | "18 6525 166\n",
1277 | "19 6547 111\n",
1278 | "20 6545 109"
1279 | ]
1280 | },
1281 | "execution_count": 12,
1282 | "metadata": {},
1283 | "output_type": "execute_result"
1284 | }
1285 | ],
1286 | "source": [
1287 | "grouped_df = test.Health_Camp_ID.value_counts().reset_index()\n",
1288 | "grouped_df.columns = [\"Health_Camp_ID\", \"Count\"]\n",
1289 | "plt.bar(range(len(grouped_df.Health_Camp_ID)), grouped_df.Count, tick_label=grouped_df.Health_Camp_ID, color='r')\n",
1290 | "plt.show()\n",
1291 | "grouped_df"
1292 | ]
1293 | },
1294 | {
1295 | "cell_type": "markdown",
1296 | "metadata": {},
1297 | "source": [
1298 | "Now let us see what is the number of common patients and Health camps between train and test set."
1299 | ]
1300 | },
1301 | {
1302 | "cell_type": "code",
1303 | "execution_count": 13,
1304 | "metadata": {
1305 | "collapsed": false
1306 | },
1307 | "outputs": [
1308 | {
1309 | "name": "stdout",
1310 | "output_type": "stream",
1311 | "text": [
1312 | "Number of common patients between train and test : 8460\n",
1313 | "Number of common health camps between train and test : 0\n"
1314 | ]
1315 | }
1316 | ],
1317 | "source": [
1318 | "print \"Number of common patients between train and test : \",len(set(train.Patient_ID).intersection(test.Patient_ID))\n",
1319 | "print \"Number of common health camps between train and test :\", len(set(train.Health_Camp_ID).intersection(test.Health_Camp_ID))"
1320 | ]
1321 | },
1322 | {
1323 | "cell_type": "markdown",
1324 | "metadata": {},
1325 | "source": [
1326 | "###### Points to ponder\n",
1327 | "* 8460 patients are common between train and test. So it might be helpful to use the behaviors of these patients as features.\n",
1328 | "\n",
1329 | "* There are no common health camps between train and test since the dates are disjoint. So it might be helpful to have validation sample that displays the same behavior."
1330 | ]
1331 | },
1332 | {
1333 | "cell_type": "markdown",
1334 | "metadata": {},
1335 | "source": [
1336 | "##### Getting the outcome variable\n",
1337 | "\n",
1338 | "We do not have an 'outcome' variable as such in the training set and we are told that (from data page)\n",
1339 | "\n",
1340 | "\"MedCamp runs 3 formats of these camps. The first and second format provides people with an instantaneous health score. The third format provides information about several health issues through various awareness stalls.\n",
1341 | "\n",
1342 | "&\n",
1343 | "\n",
1344 | "For the first 2 formats, a favourable outcome is defined as getting a health_score, while in the third format it is defined as visiting at least a stall\"\n",
1345 | "\n",
1346 | "So let us use this information to create a response variable and add it to the train dataset"
1347 | ]
1348 | },
1349 | {
1350 | "cell_type": "code",
1351 | "execution_count": 14,
1352 | "metadata": {
1353 | "collapsed": false
1354 | },
1355 | "outputs": [
1356 | {
1357 | "name": "stdout",
1358 | "output_type": "stream",
1359 | "text": [
1360 | "Number of favourable outcomes from all camps : 20534\n",
1361 | "(75278, 8)\n"
1362 | ]
1363 | },
1364 | {
1365 | "data": {
1366 | "text/html": [
1367 | "\n",
1368 | "
\n",
1369 | " \n",
1370 | " \n",
1371 | " | \n",
1372 | " Patient_ID | \n",
1373 | " Health_Camp_ID | \n",
1374 | " Registration_Date | \n",
1375 | " Var1 | \n",
1376 | " Var2 | \n",
1377 | " Var3 | \n",
1378 | " Var4 | \n",
1379 | " Var5 | \n",
1380 | " Outcome | \n",
1381 | "
\n",
1382 | " \n",
1383 | " \n",
1384 | " \n",
1385 | " | 0 | \n",
1386 | " 489652 | \n",
1387 | " 6578 | \n",
1388 | " 10-Sep-05 | \n",
1389 | " 4 | \n",
1390 | " 0 | \n",
1391 | " 0 | \n",
1392 | " 0 | \n",
1393 | " 2 | \n",
1394 | " 1 | \n",
1395 | "
\n",
1396 | " \n",
1397 | " | 1 | \n",
1398 | " 507246 | \n",
1399 | " 6578 | \n",
1400 | " 18-Aug-05 | \n",
1401 | " 45 | \n",
1402 | " 5 | \n",
1403 | " 0 | \n",
1404 | " 0 | \n",
1405 | " 7 | \n",
1406 | " 0 | \n",
1407 | "
\n",
1408 | " \n",
1409 | " | 2 | \n",
1410 | " 523729 | \n",
1411 | " 6534 | \n",
1412 | " 29-Apr-06 | \n",
1413 | " 0 | \n",
1414 | " 0 | \n",
1415 | " 0 | \n",
1416 | " 0 | \n",
1417 | " 0 | \n",
1418 | " 1 | \n",
1419 | "
\n",
1420 | " \n",
1421 | " | 3 | \n",
1422 | " 524931 | \n",
1423 | " 6535 | \n",
1424 | " 07-Feb-04 | \n",
1425 | " 0 | \n",
1426 | " 0 | \n",
1427 | " 0 | \n",
1428 | " 0 | \n",
1429 | " 0 | \n",
1430 | " 0 | \n",
1431 | "
\n",
1432 | " \n",
1433 | " | 4 | \n",
1434 | " 521364 | \n",
1435 | " 6529 | \n",
1436 | " 28-Feb-06 | \n",
1437 | " 15 | \n",
1438 | " 1 | \n",
1439 | " 0 | \n",
1440 | " 0 | \n",
1441 | " 7 | \n",
1442 | " 1 | \n",
1443 | "
\n",
1444 | " \n",
1445 | "
\n",
1446 | "
"
1447 | ],
1448 | "text/plain": [
1449 | " Patient_ID Health_Camp_ID Registration_Date Var1 Var2 Var3 Var4 Var5 \\\n",
1450 | "0 489652 6578 10-Sep-05 4 0 0 0 2 \n",
1451 | "1 507246 6578 18-Aug-05 45 5 0 0 7 \n",
1452 | "2 523729 6534 29-Apr-06 0 0 0 0 0 \n",
1453 | "3 524931 6535 07-Feb-04 0 0 0 0 0 \n",
1454 | "4 521364 6529 28-Feb-06 15 1 0 0 7 \n",
1455 | "\n",
1456 | " Outcome \n",
1457 | "0 1 \n",
1458 | "1 0 \n",
1459 | "2 1 \n",
1460 | "3 0 \n",
1461 | "4 1 "
1462 | ]
1463 | },
1464 | "execution_count": 14,
1465 | "metadata": {},
1466 | "output_type": "execute_result"
1467 | }
1468 | ],
1469 | "source": [
1470 | "## Get only the necessary columns and rename them for concatenating ##\n",
1471 | "col_names = [['Patient_ID','Health_Camp_ID','Outcome']]\n",
1472 | "first_camp = first_format_camp[['Patient_ID','Health_Camp_ID','Health_Score']]\n",
1473 | "first_camp.columns = col_names\n",
1474 | "second_camp = second_format_camp[['Patient_ID','Health_Camp_ID','Health Score']]\n",
1475 | "second_camp.columns = col_names\n",
1476 | "third_camp = third_format_camp[['Patient_ID','Health_Camp_ID','Number_of_stall_visited']]\n",
1477 | "third_camp = third_camp[third_camp['Number_of_stall_visited']>0]\n",
1478 | "third_camp.columns = col_names\n",
1479 | "\n",
1480 | "## concat all the three camps ##\n",
1481 | "all_camps = pd.concat([first_camp, second_camp, third_camp])\n",
1482 | "all_camps['Outcome'] = 1\n",
1483 | "print \"Number of favourable outcomes from all camps : \", all_camps.shape[0]\n",
1484 | "\n",
1485 | "train = pd.read_csv(\"Train.csv\")\n",
1486 | "print train.shape\n",
1487 | "\n",
1488 | "## merging with train and create a new variable 'outcome' which can be used as\n",
1489 | "train = train.merge(all_camps, on=['Patient_ID','Health_Camp_ID'], how='left')\n",
1490 | "train['Outcome'] = train['Outcome'].fillna(0).astype('int')\n",
1491 | "train.head()"
1492 | ]
1493 | }
1494 | ],
1495 | "metadata": {
1496 | "kernelspec": {
1497 | "display_name": "Python 2",
1498 | "language": "python",
1499 | "name": "python2"
1500 | },
1501 | "language_info": {
1502 | "codemirror_mode": {
1503 | "name": "ipython",
1504 | "version": 2
1505 | },
1506 | "file_extension": ".py",
1507 | "mimetype": "text/x-python",
1508 | "name": "python",
1509 | "nbconvert_exporter": "python",
1510 | "pygments_lexer": "ipython2",
1511 | "version": "2.7.10"
1512 | }
1513 | },
1514 | "nbformat": 4,
1515 | "nbformat_minor": 1
1516 | }
1517 |
--------------------------------------------------------------------------------