├── Question 1 ├── Report.pdf ├── xgb_regressor_updated_v3.py └── Final_done.ipynb ├── Question 2 ├── Question2Report.pdf └── 2_final.R ├── Question 3 ├── Question3Report.pdf └── 3_final.R └── README.md /Question 1/Report.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Arkadeep-sophoIITG/JP-Morgan-Quant-Challenge-2018/HEAD/Question 1/Report.pdf -------------------------------------------------------------------------------- /Question 2/Question2Report.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Arkadeep-sophoIITG/JP-Morgan-Quant-Challenge-2018/HEAD/Question 2/Question2Report.pdf -------------------------------------------------------------------------------- /Question 3/Question3Report.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Arkadeep-sophoIITG/JP-Morgan-Quant-Challenge-2018/HEAD/Question 3/Question3Report.pdf -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # JP Morgan Quant Challenge - SiliconLobby [Arkadeep & Hiten] 2 | 36-hour competition that exposed participants to machine learning, derivative modelling, and how to apply their knowledge to the real-life problems faced by the Quantitative Research and Data Analytics teams on a day to day basis. 3 | ## Question 1 - Flight Price Predictor 4 | ### Predictive Modelling with Machine Learning 5 | Optimal timing for airline ticket purchasing from the consumer’s perspective is challenging principally because buyers have insufficient information for reasoning about future price movements. In this challenge we simulate various models for computing the best possible expected future prices by constructing new features from the existing features and also introducing new features to make our model more robust. 6 | 7 | ## Question 2 - Stock Price Simulation 8 | ### Monte Carlo Simulation 9 | Simulate Stock Price Movement using historical data 10 | 11 | ## Question 3 - Derivative Price Evaluation 12 | ### Probablity Theory and Monte Carlo Simulation 13 | Given Payoff States and Contingency Claims evaluate the fair price of the derivative designed 14 | -------------------------------------------------------------------------------- /Question 2/2_final.R: -------------------------------------------------------------------------------- 1 | 2 | # Read only region start 3 | 4 | solution <- function(a, theta, X0, sigma) { 5 | # Input parameters are : 6 | # input1 As Numeric eg:- 232323.4323 7 | # input2 As Numeric eg:- 232323.4323 8 | # input3 As Numeric eg:- 232323.4323 9 | # input4 As Numeric eg:- 232323.4323 10 | 11 | # Expected return type : 12 | # output1 As vector of type Numeric 13 | # Read only region end 14 | 15 | t <- 2 16 | #1 answer 17 | answer1 <- X0 * exp(-a*t) + theta*(1-exp(-a*t)) 18 | 19 | #2 answer 20 | sim = 10^5 21 | vec<- rep(X0,sim) 22 | breaks = 100 23 | for(i in seq(1,breaks)){ 24 | x<-rnorm(sim) 25 | vec<- vec+a*(theta - vec)*(t/breaks) + sigma*sqrt(vec)*x*sqrt(t/breaks); 26 | } 27 | for(i in seq(1,length(vec))){ 28 | vec[i] = max(vec[i]-100,0) 29 | } 30 | answer2<-mean(vec) 31 | 32 | 33 | #3 answer 34 | del<-10^-7 35 | sigma1<-sigma+del 36 | vec<- rep(X0,sim) 37 | vec1<-rep(X0,sim) 38 | for(i in seq(1,breaks)){ 39 | x<-rnorm(sim) 40 | vec<- vec+a*(theta - vec)*(t/breaks) + sigma*sqrt(vec)*x*sqrt(t/breaks); 41 | vec1<- vec1+ a*(theta - vec1)*(t/breaks) + sigma1*sqrt(vec1)*x*sqrt(t/breaks); 42 | } 43 | vega <- 0 44 | for(i in seq(1,sim)){ 45 | if(vec[i]>100&&vec1[i]>100){ 46 | vega <- vega+((vec1[i]-vec[i])/del) 47 | } 48 | } 49 | answer3<-vega/sim 50 | 51 | 52 | #OUTPUT 53 | output <- c(answer1,answer2,answer3); 54 | return (output); 55 | stop("UnsupportedOperation solution( input1, input2, input3, input4)") 56 | } 57 | -------------------------------------------------------------------------------- /Question 3/3_final.R: -------------------------------------------------------------------------------- 1 | 2 | X_SPOT <- c(2443.25,2447.83,2459.27,2459.14,2460.61,2473.83,2473.45,2472.54,2469.91,2477.13,2477.83,2475.42,2472.1,2470.3,2476.35,2477.57,2472.16,2476.83,2480.91,2474.92,2474.02,2438.21,2441.32,2465.84,2464.61,2468.11,2430.01,2425.55,2428.37,2452.51,2444.04,2438.97,2443.05,2444.24,2446.3,2457.59,2471.65,2476.55,2457.85,2457.85,2465.54,2465.1,2461.43,2488.11,2496.48,2498.37,2495.62,2500.23,2503.87,2506.65,2508.24,2500.6,2502.22,2496.66,2496.84,2507.04,2510.06,2519.36,2529.12,2534.58,2537.74,2552.07,2549.33,2544.73,2550.64,2555.24,2550.93,2553.17,2557.64,2559.36,2561.26,2562.1,2575.21,2564.98,2569.13,2557.15,2560.4,2581.07,2572.83,2575.26,2579.36,2579.85,2587.84,2591.13,2590.64,2594.38,2584.62,2582.3,2584.84,2578.87,2564.62,2585.64,2578.85,2582.14,2599.03,2597.08,2602.42,2602.42,2601.42,2627.04,2626.07,2647.58,2642.22,2639.44,2629.57,2629.27,2636.98,2651.5,2659.99,2664.11,2662.85,2652.01,2675.81,2690.16,2681.47,2679.25,2684.57,2683.34,2680.5,2680.5,2682.62,2687.54,2673.61,2695.81,2695.81,2713.06,2723.99,2743.15,2747.71,2751.29,2748.23,2767.56,2786.24,2776.42,2776.42,2802.56,2798.03,2810.3,2832.97,2839.13,2837.54,2839.25,2872.87,2853.53,2822.43,2823.81,2821.98,2762.13,2648.94,2695.14,2681.66,2581,2619.55,2656,2662.94,2698.63,2731.2,2732.22,2716.26,2716.26,2701.33,2703.96,2747.3,2779.6,2744.28,2713.83,2677.67,2691.25,2720.94,2728.12,2726.8,2738.97,2786.57,2783.02,2765.31,2749.48,2747.33,2752.01,2712.92,2716.94,2711.93,2643.69,2588.26,2658.55,2612.62,2605,2640.87,2581.88,2581.88,2614.45,2644.69,2662.84,2604.47,2613.16,2656.87,2642.19,2663.99,2656.3,2677.84,2706.39,2708.64,2693.13,2670.14,2670.29,2634.56,2639.4,2666.94,2669.91,2648.05,2654.8,2635.67,2629.73,2663.42,2672.63,2671.92,2697.79,2723.07,2727.72,2730.13,2711.45,2722.46,2720.13,2712.97,2733.01,2724.44,2733.29,2727.76,2721.33,2689.86,2689.86,2724.01,2705.27,2734.62,2746.87,2748.8,2772.35,2770.37,2779.03,2782,2786.85,2775.63,2782.49,2779.66,2773.75,2762.59,2767.32,2749.76,2754.88,2717.07,2723.06,2699.63,2716.31,2718.37,2726.71,2713.22,2736.61,2736.61,2759.82,2784.17,2793.84,2774.02,2798.29) 3 | Y_SPOT <- c(35.5,35.86,36.35,36.38,36.43,36.47,36.41,36.07,35.82,35.57,35.62,35.94,35.77,35.98,34.76,34.82,34.77,35.27,35.3,35.39,35.28,34.88,34.93,35.47,35.54,35.75,35,34.83,34.91,35.3,35.49,35.52,35.6,35.51,35.52,35.82,36.54,37.36,37.23,37.23,37.67,36.91,37,37.35,37.89,38.21,38.79,38.88,38.59,38.7,38.88,39.1,39.42,40.3,40.26,40.58,40.58,40.38,42.15,43.45,43.78,43.85,44.93,45.33,45.21,45.47,44.89,45.88,45.76,45.02,45.12,45.35,45.61,45.15,46.48,45.12,45.25,44.64,43.37,42.98,43.13,42.6,42.34,42.14,41.7,42.11,42.11,42.66,43.57,43,42.86,43.6,43.88,44.88,44.97,44.29,44.46,44.46,44.17,44.92,43.81,43.09,42.79,43.05,42.8,42.15,42.02,42.02,41.67,41.53,41.4,40.81,40.95,42.15,42.49,42.52,42.16,42.02,41.8,41.8,41.31,41.38,40.99,41.8,41.8,42.82,44.14,44.01,44.22,44.05,43,44.19,44.07,44.19,44.19,44.03,43.86,43.15,43.29,43.38,44.16,43.16,43.49,43.02,42.7,42.41,42.43,41,39.54,41.86,42.39,40.75,41.46,42,41.4,41.81,41.85,41.09,40.77,40.77,40.56,40.91,40.91,41.54,40.17,39.35,37.79,37.43,37.74,37.93,37.74,37.84,37.84,37.83,38.01,37.69,37.85,37.94,37.01,36.89,37.58,36.35,35.17,35.99,34.87,35.47,36.34,35.76,35.76,36.94,38.03,38,37.68,37.83,39.07,39,38.83,38.73,39.17,39.22,38.93,37.77,37.61,37.69,37.93,38.11,38.25,37.65,36.74,36.42,36.2,36.15,36.71,36.34,36.33,36.27,37.16,36.89,36.63,36.94,38.03,38.3,37.79,38.09,38.28,37.85,38.39,38.3,37.38,37.38,37.83,42.7,43.2,43.78,43.41,43.93,44.01,44.25,44.85,44.18,44.45,43.57,43.91,43.95,42.26,41.95,41.12,41.25,40.61,41.01,40.37,40.52,39.4,39.5,38.97,39.47,39.47,39.16,39.75,40.09,39.3,39.27) 4 | library(MASS) 5 | # Read only region start 6 | 7 | solution <- function(r, N) { 8 | # Input parameters are : 9 | # input1 As Numeric eg:- 232323.4323 10 | # input2 As Numeric eg:- 232323.4323 11 | 12 | # Expected return type : 13 | # output1 As Numeric eg:- 232323.4323 14 | # Read only region end 15 | # Write code here... 16 | 17 | #Get sigma from spot prices given 18 | sig_x <- sqrt(var(diff(log(X_SPOT)))); 19 | sig_y <- sqrt(var(diff(log(Y_SPOT)))); 20 | rho <- cor(diff(log(X_SPOT)),diff(log(Y_SPOT))) 21 | #Simulate path-dependent option 22 | sim <- 10000 23 | sigma <- matrix(c(252*0.25,rho*252*0.25,rho*252*0.25,252*0.25),2,2) 24 | payoff <- rep(1,sim); 25 | value <- matrix(rep(c(X_SPOT[1],Y_SPOT[1]),sim*2),sim,2,byrow=TRUE); 26 | for(i in seq(0,N)){ 27 | 28 | rand_gen <- mvrnorm(n=sim, rep(0,2),sigma) 29 | 30 | value_x <- value + r*value*(0.25*252) + sig_x*rand_gen[,1] 31 | value_y <- value + r*value*(0.25*252) + sig_y*rand_gen[,2] 32 | for(j in seq(1,nrow(value))){ 33 | if (((value_x[j,1]>1.25*X_SPOT[1]) || (value_x[j,1]<0.75*X_SPOT[1]))){ 34 | payoff[j] <- 0; 35 | } 36 | if (((value_y[j,2]>1.25*Y_SPOT[1]) || (value_y[j,2]<0.75*Y_SPOT[1]))){ 37 | payoff[j] <- 0; 38 | } 39 | } 40 | } 41 | payoff <- (payoff*(value_x[,1]/(2*X_SPOT[1])) + payoff*(value_y[,2]/(2*Y_SPOT[1])))-1 42 | sum = 0; 43 | #return mean(payoff[payoff>0])*exp(-r*(n/4)*252) 44 | for(i in payoff){ 45 | sum <- sum + max(payoff,0); 46 | } 47 | return (sum/sim)*exp(-r*(n/4)*252); 48 | 49 | stop("UnsupportedOperation solution( input1, input2)") 50 | } 51 | -------------------------------------------------------------------------------- /Question 1/xgb_regressor_updated_v3.py: -------------------------------------------------------------------------------- 1 | from sklearn.metrics import log_loss,accuracy_score,roc_auc_score 2 | from sklearn import preprocessing 3 | from sklearn.metrics import mean_squared_error 4 | import numpy as np 5 | import pandas as pd 6 | import argparse 7 | import os 8 | from hyperopt import hp 9 | from hyperopt import fmin, tpe, hp, STATUS_OK, Trials 10 | import pickle 11 | import sys 12 | import xgboost 13 | from xgboost import XGBRegressor 14 | from sklearn.cross_validation import train_test_split 15 | import time 16 | 17 | def label_gender (row): 18 | if 'Dr' in row['Name']: 19 | return 4 20 | if 'Miss' in row['Name']: 21 | return 0 22 | if 'Mrs' in row['Name']: 23 | return 1 24 | if 'Mr' in row['Name']: 25 | return 3 26 | return None 27 | 28 | 29 | 30 | 31 | def label_race (row): 32 | if (row['From'] == 1 and row['To'] == 2) or (row['From'] == 2 and row['To'] == 1): 33 | return 1446 34 | if (row['From'] == 1 and row['To'] == 3) or (row['From'] == 3 and row['To'] == 1): 35 | return 1654 36 | if (row['From'] == 1 and row['To'] == 4) or (row['From'] == 4 and row['To'] == 1): 37 | return 1148 38 | if (row['From'] == 1 and row['To'] == 5) or (row['From'] == 5 and row['To'] == 1): 39 | return 622 40 | if (row['From'] == 1 and row['To'] == 6) or (row['From'] == 6 and row['To'] == 1): 41 | return 1190 42 | if (row['From'] == 1 and row['To'] == 7) or (row['From'] == 7 and row['To'] == 1): 43 | return 1028 44 | if (row['From'] == 2 and row['To'] == 3) or (row['From'] == 3 and row['To'] == 2): 45 | return 470 46 | if (row['From'] == 2 and row['To'] == 4) or (row['From'] == 4 and row['To'] == 2): 47 | return 480 48 | if (row['From'] == 2 and row['To'] == 5) or (row['From'] == 5 and row['To'] == 2): 49 | return 1140 50 | if (row['From'] == 2 and row['To'] == 6) or (row['From'] == 6 and row['To'] == 2): 51 | return 437 52 | if (row['From'] == 2 and row['To'] == 7) or (row['From'] == 7 and row['To'] == 2): 53 | return 1485 54 | if (row['From'] == 3 and row['To'] == 4) or (row['From'] == 4 and row['To'] == 3): 55 | return 1307 56 | if (row['From'] == 3 and row['To'] == 5) or (row['From'] == 5 and row['To'] == 3): 57 | return 1180 58 | if (row['From'] == 3 and row['To'] == 6) or (row['From'] == 6 and row['To'] == 3): 59 | return 886 60 | if (row['From'] == 3 and row['To'] == 7) or (row['From'] == 7 and row['To'] == 2): 61 | return 1366 62 | if (row['From'] == 4 and row['To'] == 5) or (row['From'] == 5 and row['To'] == 4): 63 | return 1253 64 | if (row['From'] == 4 and row['To'] == 6) or (row['From'] == 6 and row['To'] == 4): 65 | return 417 66 | if (row['From'] == 4 and row['To'] == 7) or (row['From'] == 7 and row['To'] == 4): 67 | return 1760 68 | if (row['From'] == 5 and row['To'] == 6) or (row['From'] == 6 and row['To'] == 5): 69 | return 1425 70 | if (row['From'] == 5 and row['To'] == 7) or (row['From'] == 7 and row['To'] == 5): 71 | return 520 72 | if (row['From'] == 6 and row['To'] == 7) or (row['From'] == 7 and row['To'] == 6): 73 | return 1534 74 | return None 75 | 76 | 77 | def label_race (row): 78 | return time_from[str(int(row['From']))][int(row['To'])-1] 79 | 80 | def dataframe_train(path): 81 | df = pd.read_csv(path,header=0,parse_dates=[1,4,6]) 82 | train_Y = [] 83 | train_Y = df['Fare'] 84 | return df,train_Y 85 | 86 | def dataframe_test(path): 87 | df = pd.read_csv(path,header=0,parse_dates=[1,4,6]) 88 | return df 89 | 90 | 91 | def objective(space): 92 | 93 | clf = XGBRegressor(n_estimators = space['n_estimators'], 94 | max_depth = space['max_depth'], 95 | min_child_weight = space['min_child_weight'], 96 | subsample = space['subsample'], 97 | learning_rate = space['learning_rate'], 98 | gamma = space['gamma'], 99 | colsample_bytree = space['colsample_bytree'], 100 | reg_alpha= space['alpha'], 101 | reg_lambda=space['lambda'], 102 | booster= 'gbtree', 103 | objective='reg:linear' 104 | ) 105 | 106 | eval_set = [( train, y_train)] 107 | 108 | clf.fit(train, 109 | y_train, 110 | eval_set=eval_set, 111 | eval_metric = 'rmse') 112 | 113 | pred = clf.predict(train) 114 | rmse = mean_squared_error((y_train), (pred)) 115 | 116 | # print "SCORE:", mae 117 | return{'loss':rmse, 'status': STATUS_OK } 118 | 119 | def optimize(cores,random_state): 120 | space ={ 121 | 'max_depth': hp.choice('max_depth', np.arange(10, 70, dtype=int)), 122 | 'min_child_weight': hp.quniform ('min_child_weight', 1, 20, 1), 123 | 'subsample': hp.uniform ('subsample', 0.8, 1), 124 | 'n_estimators' : hp.choice('n_estimators', np.arange(500, 5000, 100, dtype=int)), 125 | 'learning_rate' : hp.quniform('learning_rate', 0.025, 0.5, 0.025), 126 | 'gamma' : hp.quniform('gamma', 0.5, 1, 0.05), 127 | 'colsample_bytree' : hp.quniform('colsample_bytree', 0.5, 1, 0.05), 128 | 'alpha' : hp.quniform('alpha', 0, 10, 1), 129 | 'lambda': hp.quniform('lambda', 1, 2, 0.1), 130 | 'nthread': cores, 131 | 'objective': 'reg:linear', 132 | 'booster': 'gbtree', 133 | 'seed': random_state 134 | } 135 | 136 | 137 | trials = Trials() 138 | best = fmin(fn=objective, 139 | space=space, 140 | algo=tpe.suggest, 141 | max_evals=50, # change 142 | trials=trials) 143 | return best 144 | 145 | 146 | # Loaded the entire training and test dataset into pandas dataframe 147 | df_train,trainY = dataframe_train('train.csv') 148 | df_test = dataframe_test('test.csv') 149 | 150 | ###### Computing new features based on training data ####### 151 | 152 | # Added difference feature based on difference between Booking Date and Flight date 153 | df_train['diff'] = df_train['Flight Date'] - df_train['Booking Date'] 154 | df_test['diff'] = df_test['Flight Date'] - df_test['Booking Date'] 155 | df_train['diff'] = df_train['diff'] / np.timedelta64(1, 'D') 156 | df_test['diff'] = df_test['diff']/np.timedelta64(1,'D') 157 | 158 | 159 | # Added feature age based on the birthdate of the passengers 160 | df_train['age'] = df_train['Booking Date'] - df_train['Date of Birth'] 161 | df_train['age'] = df_train['age'] / np.timedelta64(1, 'D') 162 | df_test['age'] = df_test['Booking Date'] - df_test['Date of Birth'] 163 | df_test['age'] = df_test['age'] / np.timedelta64(1, 'D') 164 | 165 | # Added flight_month feature based on the month of the flight 166 | 167 | df_train['flight_month'] = df_train['Flight Date'].dt.month 168 | df_test['flight_month'] = df_test['Flight Date'].dt.month 169 | 170 | # Added feature WEEKEND based on whether the flight is on weekday or weekend 171 | df_train['WEEKEND'] = ((pd.DatetimeIndex(df_train['Flight Date']).dayofweek)//5+1).astype(float) 172 | df_test['WEEKEND'] = ((pd.DatetimeIndex(df_test['Flight Date']).dayofweek)//5+1).astype(float) 173 | #df_test['WEEKDAY'] = (pd.DatetimeIndex(df_test['Booking Date']).dayofweek).astype(float) 174 | #df_train['WEEKDAY'] = (pd.DatetimeIndex(df_test['Booking Date']).dayofweek).astype(float) 175 | 176 | # Added feature weekday based on flight date (Monday - 0 , Tuesday -1 , and so) 177 | df_train['WEEKDAY'] = df_train['Flight Date'].dt.dayofweek 178 | df_test['WEEKDAY'] = df_train['Flight Date'].dt.dayofweek 179 | 180 | # Added feature week of flight based on flight date. 181 | 182 | df_train['FLIGHTWEEK'] = df_train['Flight Date'].dt.week 183 | df_test['FLIGHTWEEK'] = df_test['Flight Date'].dt.week 184 | 185 | df_train = df_train.drop(columns=['Flight Date', 'Booking Date']) 186 | df_test = df_test.drop(columns=['Flight Date', 'Booking Date']) 187 | df_train = df_train.drop(columns='Date of Birth') 188 | df_test = df_test.drop(columns='Date of Birth') 189 | 190 | 191 | dict_cities = {'Mumbai':1 , 'Patna' : 2, 'Kolkata' : 3, 'Delhi' : 4, 'Hyderabad': 5, 'Lucknow': 6, 'Chennai':7} 192 | dict_class = {'Business':25, 'Economy': 10} 193 | df_train = df_train.replace({'From':dict_cities}) 194 | df_test = df_test.replace({'From':dict_cities}) 195 | df_train = df_train.replace({'To': dict_cities}) 196 | df_test = df_test.replace({'To': dict_cities}) 197 | df_train = df_train.replace({'Class':dict_class}) 198 | df_test = df_test.replace({'Class':dict_class}) 199 | 200 | df_train['Flight Time'] = df_train['Flight Time'].str.split(':').str[0] 201 | df_train['age'] = df_train['age']/365.0 202 | df_test['Flight Time'] = df_test['Flight Time'].str.split(':').str[0] 203 | df_test['age'] = df_test['age']/365.0 204 | 205 | time_from = {} 206 | time_from['1'] = [0,130,140,120,75,130,89] 207 | time_from['2'] = [205,0,60,95,104,39,134] 208 | time_from['3'] = [155,90,0,120,120,105,135] 209 | time_from['4'] = [115,80,115,0,120,55,155] 210 | time_from['5'] = [70,104,115,125,0,235,65] 211 | time_from['6'] = [130,39,100,60,245,0,138] 212 | time_from['7'] = [95,134,115,155,65,138,0] 213 | 214 | 215 | 216 | df_train['flight_duration'] = df_train.apply (lambda row: label_race (row),axis=1) 217 | df_test['flight_duration'] = df_test.apply (lambda row: label_race (row),axis=1) 218 | df_train['Flight Time'] = df_train['Flight Time'].astype(str).astype(int) 219 | df_test['Flight Time'] = df_test['Flight Time'].astype(str).astype(int) 220 | df_train['Name'] = df_train['Name'].str.split('.').str[0] 221 | df_test['Name'] = df_test['Name'].str.split('.').str[0] 222 | 223 | df_train['gender'] = df_train.apply (lambda row: label_gender (row),axis=1) 224 | df_test['gender'] = df_test.apply (lambda row: label_gender (row),axis=1) 225 | df_train= df_train.drop(columns='Name') 226 | df_test = df_test.drop(columns='Name') 227 | df_train =df_train.drop(columns='Fare') 228 | train = df_train 229 | y_train = trainY 230 | 231 | 232 | best = optimize(24,1234) 233 | print(best) 234 | xgb = XGBRegressor(n_estimators = int(best['n_estimators']), 235 | learning_rate= best['learning_rate'],objective="reg:linear",booster='gbtree', 236 | gamma = best['gamma'],max_depth=best['max_depth'], 237 | min_child_weight=int(best['min_child_weight']),subsample=best['subsample'], 238 | colsample_bytree=best['colsample_bytree'],reg_alpha=best['alpha'], 239 | reg_lambda=best['lambda'],nthread=24,random_state=1234) 240 | 241 | 242 | xgb.fit(df_train,trainY) 243 | print(xgb.score(df_train,trainY)) 244 | 245 | 246 | xgb_final_preds = xgb.predict(df_test) 247 | np.savetxt('/home/arkadeep/scratch/finall_please_finallll_donee.csv',xgb_final_preds,delimiter = ',') -------------------------------------------------------------------------------- /Question 1/Final_done.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 38, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "import pandas as pd\n", 11 | "import os\n", 12 | "train_Y = []\n", 13 | "def dataframe_train(path):\n", 14 | " df = pd.read_csv(path,header=0,parse_dates=[1,4,6])\n", 15 | " train_Y = df['Fare']\n", 16 | " return df,train_Y\n", 17 | "\n", 18 | "def dataframe_test(path):\n", 19 | " df = pd.read_csv(path,header=0,parse_dates=[1,4,6])\n", 20 | " return df" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 39, 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "df_train,trainY = dataframe_train('/home/arkadeep/Desktop/JP Morgan/train.csv')\n", 30 | "df_test = dataframe_test('/home/arkadeep/Desktop/JP Morgan/test.csv')\n", 31 | "df_train['diff'] = df_train['Flight Date'] - df_train['Booking Date']\n", 32 | "df_test['diff'] = df_test['Flight Date'] - df_test['Booking Date']\n", 33 | "df_train['diff'] = df_train['diff'] / np.timedelta64(1, 'D')\n", 34 | "df_test['diff'] = df_test['diff']/np.timedelta64(1,'D')\n", 35 | "df_train['age'] = df_train['Booking Date'] - df_train['Date of Birth']\n", 36 | "df_train['age'] = df_train['age'] / np.timedelta64(1, 'D')\n", 37 | "df_test['age'] = df_test['Booking Date'] - df_test['Date of Birth']\n", 38 | "df_test['age'] = df_test['age'] / np.timedelta64(1, 'D')\n", 39 | "df_train['flight_month'] = df_train['Flight Date'].dt.month\n", 40 | "df_test['flight_month'] = df_test['Flight Date'].dt.month\n", 41 | "df_train['WEEKEND'] = ((pd.DatetimeIndex(df_train['Flight Date']).dayofweek)//5+1).astype(float)\n", 42 | "df_test['WEEKEND'] = ((pd.DatetimeIndex(df_test['Flight Date']).dayofweek)//5+1).astype(float)\n", 43 | "df_train['FLIGHTWEEK'] = df_train['Flight Date'].dt.week\n", 44 | "df_test['FLIGHTWEEK'] = df_test['Flight Date'].dt.week\n", 45 | "#df_test['WEEKDAY'] = (pd.DatetimeIndex(df_test['Booking Date']).dayofweek).astype(float)\n", 46 | "#df_train['WEEKDAY'] = (pd.DatetimeIndex(df_test['Booking Date']).dayofweek).astype(float)\n", 47 | "df_train['WEEKDAY'] = df_train['Flight Date'].dt.dayofweek\n", 48 | "df_test['WEEKDAY'] = df_train['Flight Date'].dt.dayofweek" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 40, 54 | "metadata": {}, 55 | "outputs": [], 56 | "source": [ 57 | "holidays = ['2016-01-01','2016-01-14','2016-01-15','2016-01-16','2016-01-26','2016-02-12','2016-03-07','2016-03-23','2016-03-25','2016-03-27','2016-04-13','2016-04-15','2016-04-20','2016-04-21','2016-05-21','2016-07-08','2016-08-15','2016-08-18','2016-08-25','2016-09-05','2016-09-12','2016-09-14','2016-10-02','2016-10-11','2016-10-12','2016-10-30','2016-10-31','2016-11-01','2016-11-06','2016-11-24','2016-12-13','2016-12-24','2016-12-25']" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 41, 63 | "metadata": {}, 64 | "outputs": [], 65 | "source": [ 66 | "imp = {'Mumbai':1 , 'Patna' : 2, 'Kolkata' : 3, 'Delhi' : 4, 'Hyderabad': 5, 'Lucknow': 6, 'Chennai':7}" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": 42, 72 | "metadata": {}, 73 | "outputs": [], 74 | "source": [ 75 | "dict_cities = {'Mumbai':1 , 'Patna' : 2, 'Kolkata' : 3, 'Delhi' : 4, 'Hyderabad': 5, 'Lucknow': 6, 'Chennai':7}\n", 76 | "dict_class = {'Business':25, 'Economy': 10}\n", 77 | "df_train = df_train.replace({'From':dict_cities})\n", 78 | "df_test = df_test.replace({'From':dict_cities})\n", 79 | "df_train = df_train.replace({'To': dict_cities})\n", 80 | "df_test = df_test.replace({'To': dict_cities})\n", 81 | "df_train = df_train.replace({'Class':dict_class})\n", 82 | "df_test = df_test.replace({'Class':dict_class})" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": 43, 88 | "metadata": { 89 | "code_folding": [ 90 | 3 91 | ] 92 | }, 93 | "outputs": [], 94 | "source": [ 95 | "def label_holiday (row):\n", 96 | " if str(row['Flight Date']) in holidays :\n", 97 | " return 3\n", 98 | " elif row['Flight Date'].dayofweek //5 ==1:\n", 99 | " return 2\n", 100 | " else:\n", 101 | " return 1\n", 102 | " return None\n", 103 | "df_train['WEEKEND'] = df_train.apply (lambda row: label_holiday(row),axis=1)\n", 104 | "df_test['WEEKEND'] = df_test.apply (lambda row: label_holiday(row),axis=1)" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": 44, 110 | "metadata": {}, 111 | "outputs": [], 112 | "source": [ 113 | "df_train = df_train.drop(columns=['Flight Date', 'Booking Date'])\n", 114 | "df_test = df_test.drop(columns=['Flight Date', 'Booking Date'])\n", 115 | "df_train = df_train.drop(columns='Date of Birth')\n", 116 | "df_test = df_test.drop(columns='Date of Birth')" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": 45, 122 | "metadata": {}, 123 | "outputs": [], 124 | "source": [ 125 | "df_train['Flight Time'] = df_train['Flight Time'].str.split(':').str[0]\n", 126 | "df_train['age'] = df_train['age']/365.0\n", 127 | "df_test['Flight Time'] = df_test['Flight Time'].str.split(':').str[0]\n", 128 | "df_test['age'] = df_test['age']/365.0" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": 46, 134 | "metadata": {}, 135 | "outputs": [], 136 | "source": [ 137 | "time_from = {}\n", 138 | "time_from['1'] = [0,130,140,120,75,130,89]\n", 139 | "time_from['2'] = [205,0,60,95,104,39,134]\n", 140 | "time_from['3'] = [155,90,0,120,120,105,135]\n", 141 | "time_from['4'] = [115,80,115,0,120,55,155]\n", 142 | "time_from['5'] = [70,104,115,125,0,235,65]\n", 143 | "time_from['6'] = [130,39,100,60,245,0,138]\n", 144 | "time_from['7'] = [95,134,115,155,65,138,0]" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": 47, 150 | "metadata": {}, 151 | "outputs": [], 152 | "source": [ 153 | "def label_duration (row):\n", 154 | " return time_from[str(int(row['From']))][int(row['To'])-1]\n", 155 | "\n", 156 | "def label_distance (row):\n", 157 | " if (row['From'] == 1 and row['To'] == 2) or (row['From'] == 2 and row['To'] == 1):\n", 158 | " return 1446\n", 159 | " if (row['From'] == 1 and row['To'] == 3) or (row['From'] == 3 and row['To'] == 1):\n", 160 | " return 1654\n", 161 | " if (row['From'] == 1 and row['To'] == 4) or (row['From'] == 4 and row['To'] == 1):\n", 162 | " return 1148\n", 163 | " if (row['From'] == 1 and row['To'] == 5) or (row['From'] == 5 and row['To'] == 1):\n", 164 | " return 622\n", 165 | " if (row['From'] == 1 and row['To'] == 6) or (row['From'] == 6 and row['To'] == 1):\n", 166 | " return 1190\n", 167 | " if (row['From'] == 1 and row['To'] == 7) or (row['From'] == 7 and row['To'] == 1):\n", 168 | " return 1028\n", 169 | " if (row['From'] == 2 and row['To'] == 3) or (row['From'] == 3 and row['To'] == 2):\n", 170 | " return 470\n", 171 | " if (row['From'] == 2 and row['To'] == 4) or (row['From'] == 4 and row['To'] == 2):\n", 172 | " return 480\n", 173 | " if (row['From'] == 2 and row['To'] == 5) or (row['From'] == 5 and row['To'] == 2):\n", 174 | " return 1140\n", 175 | " if (row['From'] == 2 and row['To'] == 6) or (row['From'] == 6 and row['To'] == 2):\n", 176 | " return 437\n", 177 | " if (row['From'] == 2 and row['To'] == 7) or (row['From'] == 7 and row['To'] == 2):\n", 178 | " return 1485\n", 179 | " if (row['From'] == 3 and row['To'] == 4) or (row['From'] == 4 and row['To'] == 3):\n", 180 | " return 1307\n", 181 | " if (row['From'] == 3 and row['To'] == 5) or (row['From'] == 5 and row['To'] == 3):\n", 182 | " return 1180\n", 183 | " if (row['From'] == 3 and row['To'] == 6) or (row['From'] == 6 and row['To'] == 3):\n", 184 | " return 886\n", 185 | " if (row['From'] == 3 and row['To'] == 7) or (row['From'] == 7 and row['To'] == 2):\n", 186 | " return 1366\n", 187 | " if (row['From'] == 4 and row['To'] == 5) or (row['From'] == 5 and row['To'] == 4):\n", 188 | " return 1253\n", 189 | " if (row['From'] == 4 and row['To'] == 6) or (row['From'] == 6 and row['To'] == 4):\n", 190 | " return 417\n", 191 | " if (row['From'] == 4 and row['To'] == 7) or (row['From'] == 7 and row['To'] == 4):\n", 192 | " return 1760\n", 193 | " if (row['From'] == 5 and row['To'] == 6) or (row['From'] == 6 and row['To'] == 5):\n", 194 | " return 1425\n", 195 | " if (row['From'] == 5 and row['To'] == 7) or (row['From'] == 7 and row['To'] == 5):\n", 196 | " return 520\n", 197 | " if (row['From'] == 6 and row['To'] == 7) or (row['From'] == 7 and row['To'] == 6):\n", 198 | " return 1534\n", 199 | " return None" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": 48, 205 | "metadata": {}, 206 | "outputs": [], 207 | "source": [ 208 | "df_train['flight_duration'] = df_train.apply (lambda row: label_duration (row),axis=1)\n", 209 | "df_test['flight_duration'] = df_test.apply (lambda row: label_duration (row),axis=1)\n", 210 | "df_train['distance'] = df_train.apply (lambda row: label_distance (row),axis=1)\n", 211 | "df_test['distance'] = df_test.apply (lambda row: label_distance (row),axis=1)" 212 | ] 213 | }, 214 | { 215 | "cell_type": "code", 216 | "execution_count": 49, 217 | "metadata": {}, 218 | "outputs": [], 219 | "source": [ 220 | "df_train['Flight Time'] = df_train['Flight Time'].astype(str).astype(int)\n", 221 | "df_test['Flight Time'] = df_test['Flight Time'].astype(str).astype(int)" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": 50, 227 | "metadata": {}, 228 | "outputs": [], 229 | "source": [ 230 | "df_train['Name'] = df_train['Name'].str.split('.').str[0]\n", 231 | "df_test['Name'] = df_test['Name'].str.split('.').str[0]" 232 | ] 233 | }, 234 | { 235 | "cell_type": "code", 236 | "execution_count": 51, 237 | "metadata": {}, 238 | "outputs": [], 239 | "source": [ 240 | "def label_gender (row):\n", 241 | " if 'Dr' in row['Name']:\n", 242 | " return 4\n", 243 | " if 'Miss' in row['Name']:\n", 244 | " return 0\n", 245 | " if 'Mrs' in row['Name']:\n", 246 | " return 1\n", 247 | " if 'Mr' in row['Name']:\n", 248 | " return 3\n", 249 | " return None" 250 | ] 251 | }, 252 | { 253 | "cell_type": "code", 254 | "execution_count": 52, 255 | "metadata": {}, 256 | "outputs": [ 257 | { 258 | "data": { 259 | "text/html": [ 260 | "
\n", 261 | "\n", 274 | "\n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | " \n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | " \n", 655 | " \n", 656 | " \n", 657 | " \n", 658 | " \n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | " \n", 664 | " \n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | " \n", 669 | " \n", 670 | " \n", 671 | " \n", 672 | " \n", 673 | " \n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | " \n", 690 | " \n", 691 | " \n", 692 | " \n", 693 | " \n", 694 | " \n", 695 | " \n", 696 | " \n", 697 | " \n", 698 | " \n", 699 | " \n", 700 | " \n", 701 | " \n", 702 | " \n", 703 | " \n", 704 | " \n", 705 | " \n", 706 | " \n", 707 | " \n", 708 | " \n", 709 | " \n", 710 | " \n", 711 | " \n", 712 | " \n", 713 | " \n", 714 | " \n", 715 | " \n", 716 | " \n", 717 | " \n", 718 | " \n", 719 | " \n", 720 | " \n", 721 | " \n", 722 | " \n", 723 | " \n", 724 | " \n", 725 | " \n", 726 | " \n", 727 | " \n", 728 | " \n", 729 | " \n", 730 | " \n", 731 | " \n", 732 | " \n", 733 | " \n", 734 | " \n", 735 | " \n", 736 | " \n", 737 | " \n", 738 | " \n", 739 | " \n", 740 | " \n", 741 | " \n", 742 | " \n", 743 | " \n", 744 | " \n", 745 | " \n", 746 | " \n", 747 | " \n", 748 | " \n", 749 | " \n", 750 | " \n", 751 | " \n", 752 | " \n", 753 | " \n", 754 | " \n", 755 | " \n", 756 | " \n", 757 | " \n", 758 | " \n", 759 | " \n", 760 | " \n", 761 | " \n", 762 | " \n", 763 | " \n", 764 | " \n", 765 | " \n", 766 | " \n", 767 | " \n", 768 | " \n", 769 | " \n", 770 | " \n", 771 | " \n", 772 | " \n", 773 | " \n", 774 | " \n", 775 | " \n", 776 | " \n", 777 | " \n", 778 | " \n", 779 | " \n", 780 | " \n", 781 | " \n", 782 | " \n", 783 | " \n", 784 | " \n", 785 | " \n", 786 | " \n", 787 | " \n", 788 | " \n", 789 | " \n", 790 | " \n", 791 | " \n", 792 | " \n", 793 | " \n", 794 | " \n", 795 | " \n", 796 | " \n", 797 | " \n", 798 | " \n", 799 | " \n", 800 | " \n", 801 | " \n", 802 | " \n", 803 | " \n", 804 | " \n", 805 | " \n", 806 | " \n", 807 | " \n", 808 | " \n", 809 | " \n", 810 | " \n", 811 | " \n", 812 | " \n", 813 | " \n", 814 | " \n", 815 | " \n", 816 | " \n", 817 | " \n", 818 | " \n", 819 | " \n", 820 | " \n", 821 | " \n", 822 | " \n", 823 | " \n", 824 | " \n", 825 | " \n", 826 | " \n", 827 | " \n", 828 | " \n", 829 | " \n", 830 | " \n", 831 | " \n", 832 | " \n", 833 | " \n", 834 | " \n", 835 | " \n", 836 | " \n", 837 | " \n", 838 | " \n", 839 | " \n", 840 | " \n", 841 | " \n", 842 | " \n", 843 | " \n", 844 | " \n", 845 | " \n", 846 | " \n", 847 | " \n", 848 | " \n", 849 | " \n", 850 | " \n", 851 | " \n", 852 | " \n", 853 | " \n", 854 | " \n", 855 | " \n", 856 | " \n", 857 | " \n", 858 | " \n", 859 | " \n", 860 | " \n", 861 | " \n", 862 | " \n", 863 | " \n", 864 | " \n", 865 | " \n", 866 | " \n", 867 | " \n", 868 | " \n", 869 | " \n", 870 | " \n", 871 | " \n", 872 | " \n", 873 | " \n", 874 | " \n", 875 | " \n", 876 | " \n", 877 | " \n", 878 | " \n", 879 | " \n", 880 | " \n", 881 | " \n", 882 | " \n", 883 | " \n", 884 | " \n", 885 | " \n", 886 | " \n", 887 | " \n", 888 | " \n", 889 | " \n", 890 | " \n", 891 | " \n", 892 | " \n", 893 | " \n", 894 | " \n", 895 | " \n", 896 | " \n", 897 | " \n", 898 | " \n", 899 | " \n", 900 | " \n", 901 | " \n", 902 | " \n", 903 | " \n", 904 | " \n", 905 | " \n", 906 | " \n", 907 | " \n", 908 | " \n", 909 | " \n", 910 | " \n", 911 | " \n", 912 | " \n", 913 | " \n", 914 | " \n", 915 | " \n", 916 | " \n", 917 | " \n", 918 | " \n", 919 | " \n", 920 | " \n", 921 | " \n", 922 | " \n", 923 | " \n", 924 | " \n", 925 | " \n", 926 | " \n", 927 | " \n", 928 | " \n", 929 | " \n", 930 | " \n", 931 | " \n", 932 | " \n", 933 | " \n", 934 | " \n", 935 | " \n", 936 | " \n", 937 | " \n", 938 | " \n", 939 | " \n", 940 | " \n", 941 | " \n", 942 | " \n", 943 | " \n", 944 | " \n", 945 | " \n", 946 | " \n", 947 | " \n", 948 | " \n", 949 | " \n", 950 | " \n", 951 | " \n", 952 | " \n", 953 | " \n", 954 | " \n", 955 | " \n", 956 | " \n", 957 | " \n", 958 | " \n", 959 | " \n", 960 | " \n", 961 | " \n", 962 | " \n", 963 | " \n", 964 | " \n", 965 | " \n", 966 | " \n", 967 | " \n", 968 | " \n", 969 | " \n", 970 | " \n", 971 | " \n", 972 | " \n", 973 | " \n", 974 | " \n", 975 | " \n", 976 | " \n", 977 | " \n", 978 | " \n", 979 | " \n", 980 | " \n", 981 | " \n", 982 | " \n", 983 | " \n", 984 | " \n", 985 | " \n", 986 | " \n", 987 | " \n", 988 | " \n", 989 | " \n", 990 | " \n", 991 | " \n", 992 | " \n", 993 | " \n", 994 | " \n", 995 | " \n", 996 | " \n", 997 | " \n", 998 | " \n", 999 | " \n", 1000 | " \n", 1001 | " \n", 1002 | " \n", 1003 | " \n", 1004 | " \n", 1005 | " \n", 1006 | " \n", 1007 | " \n", 1008 | " \n", 1009 | " \n", 1010 | " \n", 1011 | " \n", 1012 | " \n", 1013 | " \n", 1014 | " \n", 1015 | " \n", 1016 | " \n", 1017 | " \n", 1018 | " \n", 1019 | " \n", 1020 | " \n", 1021 | " \n", 1022 | " \n", 1023 | " \n", 1024 | " \n", 1025 | " \n", 1026 | " \n", 1027 | " \n", 1028 | " \n", 1029 | " \n", 1030 | " \n", 1031 | " \n", 1032 | " \n", 1033 | " \n", 1034 | " \n", 1035 | " \n", 1036 | " \n", 1037 | " \n", 1038 | " \n", 1039 | " \n", 1040 | " \n", 1041 | " \n", 1042 | " \n", 1043 | " \n", 1044 | " \n", 1045 | " \n", 1046 | " \n", 1047 | " \n", 1048 | " \n", 1049 | " \n", 1050 | " \n", 1051 | " \n", 1052 | " \n", 1053 | " \n", 1054 | " \n", 1055 | " \n", 1056 | " \n", 1057 | " \n", 1058 | " \n", 1059 | " \n", 1060 | " \n", 1061 | " \n", 1062 | " \n", 1063 | " \n", 1064 | " \n", 1065 | " \n", 1066 | " \n", 1067 | " \n", 1068 | " \n", 1069 | " \n", 1070 | " \n", 1071 | " \n", 1072 | " \n", 1073 | " \n", 1074 | " \n", 1075 | " \n", 1076 | " \n", 1077 | " \n", 1078 | " \n", 1079 | " \n", 1080 | " \n", 1081 | " \n", 1082 | " \n", 1083 | " \n", 1084 | " \n", 1085 | " \n", 1086 | " \n", 1087 | " \n", 1088 | " \n", 1089 | " \n", 1090 | " \n", 1091 | " \n", 1092 | " \n", 1093 | " \n", 1094 | " \n", 1095 | " \n", 1096 | " \n", 1097 | " \n", 1098 | " \n", 1099 | " \n", 1100 | " \n", 1101 | " \n", 1102 | " \n", 1103 | " \n", 1104 | " \n", 1105 | " \n", 1106 | " \n", 1107 | " \n", 1108 | " \n", 1109 | " \n", 1110 | " \n", 1111 | " \n", 1112 | " \n", 1113 | " \n", 1114 | " \n", 1115 | " \n", 1116 | " \n", 1117 | " \n", 1118 | " \n", 1119 | " \n", 1120 | " \n", 1121 | " \n", 1122 | " \n", 1123 | " \n", 1124 | " \n", 1125 | " \n", 1126 | " \n", 1127 | " \n", 1128 | " \n", 1129 | " \n", 1130 | " \n", 1131 | " \n", 1132 | " \n", 1133 | " \n", 1134 | " \n", 1135 | " \n", 1136 | " \n", 1137 | " \n", 1138 | " \n", 1139 | " \n", 1140 | " \n", 1141 | " \n", 1142 | " \n", 1143 | " \n", 1144 | " \n", 1145 | " \n", 1146 | " \n", 1147 | " \n", 1148 | " \n", 1149 | " \n", 1150 | " \n", 1151 | " \n", 1152 | " \n", 1153 | " \n", 1154 | " \n", 1155 | " \n", 1156 | " \n", 1157 | " \n", 1158 | " \n", 1159 | " \n", 1160 | " \n", 1161 | " \n", 1162 | " \n", 1163 | " \n", 1164 | " \n", 1165 | " \n", 1166 | " \n", 1167 | " \n", 1168 | " \n", 1169 | " \n", 1170 | " \n", 1171 | " \n", 1172 | " \n", 1173 | " \n", 1174 | " \n", 1175 | " \n", 1176 | " \n", 1177 | " \n", 1178 | " \n", 1179 | " \n", 1180 | " \n", 1181 | " \n", 1182 | " \n", 1183 | " \n", 1184 | " \n", 1185 | " \n", 1186 | " \n", 1187 | " \n", 1188 | " \n", 1189 | " \n", 1190 | " \n", 1191 | " \n", 1192 | " \n", 1193 | " \n", 1194 | " \n", 1195 | " \n", 1196 | " \n", 1197 | " \n", 1198 | " \n", 1199 | " \n", 1200 | " \n", 1201 | " \n", 1202 | " \n", 1203 | " \n", 1204 | " \n", 1205 | " \n", 1206 | " \n", 1207 | " \n", 1208 | " \n", 1209 | " \n", 1210 | " \n", 1211 | " \n", 1212 | " \n", 1213 | " \n", 1214 | " \n", 1215 | " \n", 1216 | " \n", 1217 | " \n", 1218 | " \n", 1219 | " \n", 1220 | " \n", 1221 | " \n", 1222 | " \n", 1223 | " \n", 1224 | " \n", 1225 | " \n", 1226 | " \n", 1227 | " \n", 1228 | " \n", 1229 | " \n", 1230 | " \n", 1231 | " \n", 1232 | " \n", 1233 | " \n", 1234 | " \n", 1235 | " \n", 1236 | " \n", 1237 | " \n", 1238 | " \n", 1239 | " \n", 1240 | " \n", 1241 | " \n", 1242 | " \n", 1243 | " \n", 1244 | " \n", 1245 | " \n", 1246 | " \n", 1247 | " \n", 1248 | " \n", 1249 | " \n", 1250 | " \n", 1251 | " \n", 1252 | " \n", 1253 | " \n", 1254 | " \n", 1255 | " \n", 1256 | " \n", 1257 | " \n", 1258 | " \n", 1259 | " \n", 1260 | " \n", 1261 | " \n", 1262 | " \n", 1263 | " \n", 1264 | " \n", 1265 | " \n", 1266 | " \n", 1267 | " \n", 1268 | " \n", 1269 | " \n", 1270 | " \n", 1271 | "
FromToFlight TimeClassdiffageflight_monthWEEKENDFLIGHTWEEKWEEKDAYflight_durationdistancegender
013132516.053.0273971114441401654.04
165181050.034.832877913802451425.04
26210103.034.8986305121439437.04
31614108.027.871233722961301190.03
434101012.022.0739731215121201307.00
572142510.010.501370512011341485.00
61611255.067.734247512121301190.03
771181024.072.82191851204951028.04
84612100.044.56712311247655417.03
94112101.066.542466421461151148.03
1026212514.077.8931517129439437.04
1142151015.061.6328773110080480.04
1274111030.033.75890421901551760.03
133610252.026.3479451110105886.00
146421013.014.712329216060417.00
151719109.030.23835662235891028.00
1614141016.068.221918522151201148.04
1734212517.047.271233311001201307.03
181531016.040.56986312151375622.04
1947201042.041.25205512261551760.03
201392551.065.55890421841401654.04
2167152547.039.638356321061381534.04
2263161020.064.29315131124100886.01
237121251.054.986301112455951028.04
24419258.069.70411012351151148.04
2541212518.039.254795321161151148.03
2636101025.064.70137081314105886.03
271618107.048.356164913831301190.04
2864232531.047.3013703111060417.01
2932131030.072.77260310142490470.04
..........................................
747037201023.020.501370521951351366.00
74715342513.039.3589041114521151180.01
7472468101.023.0794526125055417.03
747346201017.061.2164385118055417.01
74743271021.022.66849311144390470.03
74753113250.056.3972601114621551654.04
74761311106.055.312329511811401654.03
747764191031.065.6821929237560417.04
74787617106.058.6328771124561381534.04
747941192515.062.0794521214831151148.03
748063161019.019.5178081144100886.00
748135131010.021.31780822561201180.00
748247201011.033.8328771214841551760.00
748313181022.070.6712331014111401654.03
748427171032.024.4328771224951341485.03
74855710105.030.2547956123365520.01
74866792546.033.435616712711381534.03
74872719259.045.6301371024351341485.04
748837181024.042.317808622261351366.04
748924141012.070.71232911146195480.04
74902441036.073.63013710242695480.01
74911719106.060.780822111461891028.01
74921714100.017.50411042165891028.00
74934221018.050.389041122580480.03
74945671063.019.301370712742351425.03
74956117259.037.18356231921301190.01
74961717101.063.457534121520891028.03
7497317254.027.60274011321551654.00
749862111020.014.6547953110339437.00
749942221042.035.29315110242680480.01
\n", 1272 | "

7500 rows × 13 columns

\n", 1273 | "
" 1274 | ], 1275 | "text/plain": [ 1276 | " From To Flight Time Class diff age flight_month WEEKEND \\\n", 1277 | "0 1 3 13 25 16.0 53.027397 11 1 \n", 1278 | "1 6 5 18 10 50.0 34.832877 9 1 \n", 1279 | "2 6 2 10 10 3.0 34.898630 5 1 \n", 1280 | "3 1 6 14 10 8.0 27.871233 7 2 \n", 1281 | "4 3 4 10 10 12.0 22.073973 12 1 \n", 1282 | "5 7 2 14 25 10.0 10.501370 5 1 \n", 1283 | "6 1 6 11 25 5.0 67.734247 5 1 \n", 1284 | "7 7 1 18 10 24.0 72.821918 5 1 \n", 1285 | "8 4 6 12 10 0.0 44.567123 11 2 \n", 1286 | "9 4 1 12 10 1.0 66.542466 4 2 \n", 1287 | "10 2 6 21 25 14.0 77.893151 7 1 \n", 1288 | "11 4 2 15 10 15.0 61.632877 3 1 \n", 1289 | "12 7 4 11 10 30.0 33.758904 2 1 \n", 1290 | "13 3 6 10 25 2.0 26.347945 1 1 \n", 1291 | "14 6 4 2 10 13.0 14.712329 2 1 \n", 1292 | "15 1 7 19 10 9.0 30.238356 6 2 \n", 1293 | "16 1 4 14 10 16.0 68.221918 5 2 \n", 1294 | "17 3 4 21 25 17.0 47.271233 3 1 \n", 1295 | "18 1 5 3 10 16.0 40.569863 12 1 \n", 1296 | "19 4 7 20 10 42.0 41.252055 1 2 \n", 1297 | "20 1 3 9 25 51.0 65.558904 2 1 \n", 1298 | "21 6 7 15 25 47.0 39.638356 3 2 \n", 1299 | "22 6 3 16 10 20.0 64.293151 3 1 \n", 1300 | "23 7 1 21 25 1.0 54.986301 11 2 \n", 1301 | "24 4 1 9 25 8.0 69.704110 1 2 \n", 1302 | "25 4 1 21 25 18.0 39.254795 3 2 \n", 1303 | "26 3 6 10 10 25.0 64.701370 8 1 \n", 1304 | "27 1 6 18 10 7.0 48.356164 9 1 \n", 1305 | "28 6 4 23 25 31.0 47.301370 3 1 \n", 1306 | "29 3 2 13 10 30.0 72.772603 10 1 \n", 1307 | "... ... .. ... ... ... ... ... ... \n", 1308 | "7470 3 7 20 10 23.0 20.501370 5 2 \n", 1309 | "7471 5 3 4 25 13.0 39.358904 11 1 \n", 1310 | "7472 4 6 8 10 1.0 23.079452 6 1 \n", 1311 | "7473 4 6 20 10 17.0 61.216438 5 1 \n", 1312 | "7474 3 2 7 10 21.0 22.668493 11 1 \n", 1313 | "7475 3 1 13 25 0.0 56.397260 11 1 \n", 1314 | "7476 1 3 11 10 6.0 55.312329 5 1 \n", 1315 | "7477 6 4 19 10 31.0 65.682192 9 2 \n", 1316 | "7478 7 6 17 10 6.0 58.632877 11 2 \n", 1317 | "7479 4 1 19 25 15.0 62.079452 12 1 \n", 1318 | "7480 6 3 16 10 19.0 19.517808 1 1 \n", 1319 | "7481 3 5 13 10 10.0 21.317808 2 2 \n", 1320 | "7482 4 7 20 10 11.0 33.832877 12 1 \n", 1321 | "7483 1 3 18 10 22.0 70.671233 10 1 \n", 1322 | "7484 2 7 17 10 32.0 24.432877 12 2 \n", 1323 | "7485 5 7 10 10 5.0 30.254795 6 1 \n", 1324 | "7486 6 7 9 25 46.0 33.435616 7 1 \n", 1325 | "7487 2 7 19 25 9.0 45.630137 10 2 \n", 1326 | "7488 3 7 18 10 24.0 42.317808 6 2 \n", 1327 | "7489 2 4 14 10 12.0 70.712329 11 1 \n", 1328 | "7490 2 4 4 10 36.0 73.630137 10 2 \n", 1329 | "7491 1 7 19 10 6.0 60.780822 11 1 \n", 1330 | "7492 1 7 14 10 0.0 17.504110 4 2 \n", 1331 | "7493 4 2 2 10 18.0 50.389041 1 2 \n", 1332 | "7494 5 6 7 10 63.0 19.301370 7 1 \n", 1333 | "7495 6 1 17 25 9.0 37.183562 3 1 \n", 1334 | "7496 1 7 17 10 1.0 63.457534 12 1 \n", 1335 | "7497 3 1 7 25 4.0 27.602740 1 1 \n", 1336 | "7498 6 2 11 10 20.0 14.654795 3 1 \n", 1337 | "7499 4 2 22 10 42.0 35.293151 10 2 \n", 1338 | "\n", 1339 | " FLIGHTWEEK WEEKDAY flight_duration distance gender \n", 1340 | "0 44 4 140 1654.0 4 \n", 1341 | "1 38 0 245 1425.0 4 \n", 1342 | "2 21 4 39 437.0 4 \n", 1343 | "3 29 6 130 1190.0 3 \n", 1344 | "4 51 2 120 1307.0 0 \n", 1345 | "5 20 1 134 1485.0 0 \n", 1346 | "6 21 2 130 1190.0 3 \n", 1347 | "7 20 4 95 1028.0 4 \n", 1348 | "8 47 6 55 417.0 3 \n", 1349 | "9 14 6 115 1148.0 3 \n", 1350 | "10 29 4 39 437.0 4 \n", 1351 | "11 10 0 80 480.0 4 \n", 1352 | "12 9 0 155 1760.0 3 \n", 1353 | "13 1 0 105 886.0 0 \n", 1354 | "14 6 0 60 417.0 0 \n", 1355 | "15 23 5 89 1028.0 0 \n", 1356 | "16 21 5 120 1148.0 4 \n", 1357 | "17 10 0 120 1307.0 3 \n", 1358 | "18 51 3 75 622.0 4 \n", 1359 | "19 2 6 155 1760.0 3 \n", 1360 | "20 8 4 140 1654.0 4 \n", 1361 | "21 10 6 138 1534.0 4 \n", 1362 | "22 12 4 100 886.0 1 \n", 1363 | "23 45 5 95 1028.0 4 \n", 1364 | "24 3 5 115 1148.0 4 \n", 1365 | "25 11 6 115 1148.0 3 \n", 1366 | "26 31 4 105 886.0 3 \n", 1367 | "27 38 3 130 1190.0 4 \n", 1368 | "28 11 0 60 417.0 1 \n", 1369 | "29 42 4 90 470.0 4 \n", 1370 | "... ... ... ... ... ... \n", 1371 | "7470 19 5 135 1366.0 0 \n", 1372 | "7471 45 2 115 1180.0 1 \n", 1373 | "7472 25 0 55 417.0 3 \n", 1374 | "7473 18 0 55 417.0 1 \n", 1375 | "7474 44 3 90 470.0 3 \n", 1376 | "7475 46 2 155 1654.0 4 \n", 1377 | "7476 18 1 140 1654.0 3 \n", 1378 | "7477 37 5 60 417.0 4 \n", 1379 | "7478 45 6 138 1534.0 4 \n", 1380 | "7479 48 3 115 1148.0 3 \n", 1381 | "7480 4 4 100 886.0 0 \n", 1382 | "7481 5 6 120 1180.0 0 \n", 1383 | "7482 48 4 155 1760.0 0 \n", 1384 | "7483 41 1 140 1654.0 3 \n", 1385 | "7484 49 5 134 1485.0 3 \n", 1386 | "7485 23 3 65 520.0 1 \n", 1387 | "7486 27 1 138 1534.0 3 \n", 1388 | "7487 43 5 134 1485.0 4 \n", 1389 | "7488 22 6 135 1366.0 4 \n", 1390 | "7489 46 1 95 480.0 4 \n", 1391 | "7490 42 6 95 480.0 1 \n", 1392 | "7491 46 1 89 1028.0 1 \n", 1393 | "7492 16 5 89 1028.0 0 \n", 1394 | "7493 2 5 80 480.0 3 \n", 1395 | "7494 27 4 235 1425.0 3 \n", 1396 | "7495 9 2 130 1190.0 1 \n", 1397 | "7496 52 0 89 1028.0 3 \n", 1398 | "7497 3 2 155 1654.0 0 \n", 1399 | "7498 10 3 39 437.0 0 \n", 1400 | "7499 42 6 80 480.0 1 \n", 1401 | "\n", 1402 | "[7500 rows x 13 columns]" 1403 | ] 1404 | }, 1405 | "execution_count": 52, 1406 | "metadata": {}, 1407 | "output_type": "execute_result" 1408 | } 1409 | ], 1410 | "source": [ 1411 | "df_train['gender'] = df_train.apply (lambda row: label_gender (row),axis=1)\n", 1412 | "df_test['gender'] = df_test.apply (lambda row: label_gender (row),axis=1)\n", 1413 | "df_train= df_train.drop(columns='Name')\n", 1414 | "df_test = df_test.drop(columns='Name')\n", 1415 | "df_train =df_train.drop(columns='Fare')\n", 1416 | "df_train" 1417 | ] 1418 | }, 1419 | { 1420 | "cell_type": "code", 1421 | "execution_count": 34, 1422 | "metadata": {}, 1423 | "outputs": [ 1424 | { 1425 | "data": { 1426 | "text/plain": [ 1427 | "0 6\n", 1428 | "1 2\n", 1429 | "2 2\n", 1430 | "3 6\n", 1431 | "4 3\n", 1432 | "5 5\n", 1433 | "6 6\n", 1434 | "7 5\n", 1435 | "8 7\n", 1436 | "9 7\n", 1437 | "10 1\n", 1438 | "11 7\n", 1439 | "12 5\n", 1440 | "13 3\n", 1441 | "14 2\n", 1442 | "15 6\n", 1443 | "16 6\n", 1444 | "17 3\n", 1445 | "18 6\n", 1446 | "19 7\n", 1447 | "20 6\n", 1448 | "21 2\n", 1449 | "22 2\n", 1450 | "23 5\n", 1451 | "24 7\n", 1452 | "25 7\n", 1453 | "26 3\n", 1454 | "27 6\n", 1455 | "28 2\n", 1456 | "29 3\n", 1457 | " ..\n", 1458 | "7470 3\n", 1459 | "7471 4\n", 1460 | "7472 7\n", 1461 | "7473 7\n", 1462 | "7474 3\n", 1463 | "7475 3\n", 1464 | "7476 6\n", 1465 | "7477 2\n", 1466 | "7478 5\n", 1467 | "7479 7\n", 1468 | "7480 2\n", 1469 | "7481 3\n", 1470 | "7482 7\n", 1471 | "7483 6\n", 1472 | "7484 1\n", 1473 | "7485 4\n", 1474 | "7486 2\n", 1475 | "7487 1\n", 1476 | "7488 3\n", 1477 | "7489 1\n", 1478 | "7490 1\n", 1479 | "7491 6\n", 1480 | "7492 6\n", 1481 | "7493 7\n", 1482 | "7494 4\n", 1483 | "7495 2\n", 1484 | "7496 6\n", 1485 | "7497 3\n", 1486 | "7498 2\n", 1487 | "7499 7\n", 1488 | "Name: priority, Length: 7500, dtype: int64" 1489 | ] 1490 | }, 1491 | "execution_count": 34, 1492 | "metadata": {}, 1493 | "output_type": "execute_result" 1494 | } 1495 | ], 1496 | "source": [ 1497 | "imp = {1:6 , 2 : 1, 3 : 3, 4 : 7, 5: 4, 6: 2, 7:5}\n", 1498 | "def label_imp (row):\n", 1499 | " return int(imp[int(row['From'])])\n", 1500 | " return None\n", 1501 | "df_train['priority'] = df_train.apply(lambda row: label_imp(row),axis=1)\n", 1502 | "df_test['priority'] = df_test.apply(lambda row: label_imp(row),axis=1)\n", 1503 | "df_train['priority']" 1504 | ] 1505 | }, 1506 | { 1507 | "cell_type": "code", 1508 | "execution_count": 35, 1509 | "metadata": {}, 1510 | "outputs": [ 1511 | { 1512 | "data": { 1513 | "text/plain": [ 1514 | "XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,\n", 1515 | " colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,\n", 1516 | " max_depth=20, min_child_weight=1, missing=None, n_estimators=500,\n", 1517 | " n_jobs=1, nthread=None, objective='reg:linear', random_state=0,\n", 1518 | " reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,\n", 1519 | " silent=True, subsample=1)" 1520 | ] 1521 | }, 1522 | "execution_count": 35, 1523 | "metadata": {}, 1524 | "output_type": "execute_result" 1525 | } 1526 | ], 1527 | "source": [ 1528 | "import xgboost as xgb\n", 1529 | "from xgboost.sklearn import XGBRegressor\n", 1530 | "xgb = XGBRegressor(max_depth = 20,objective='reg:linear',n_estimators=500)\n", 1531 | "xgb.fit(df_train,trainY)" 1532 | ] 1533 | }, 1534 | { 1535 | "cell_type": "code", 1536 | "execution_count": 36, 1537 | "metadata": {}, 1538 | "outputs": [ 1539 | { 1540 | "data": { 1541 | "text/plain": [ 1542 | "0.9999999999996952" 1543 | ] 1544 | }, 1545 | "execution_count": 36, 1546 | "metadata": {}, 1547 | "output_type": "execute_result" 1548 | } 1549 | ], 1550 | "source": [ 1551 | "xgb.score(df_train,trainY)" 1552 | ] 1553 | }, 1554 | { 1555 | "cell_type": "code", 1556 | "execution_count": 37, 1557 | "metadata": {}, 1558 | "outputs": [], 1559 | "source": [ 1560 | "xgb_final_preds = xgb.predict(df_test)\n", 1561 | "np.savetxt('finall_lasttttttttttt_946.csv',xgb_final_preds,delimiter = ',')" 1562 | ] 1563 | }, 1564 | { 1565 | "cell_type": "code", 1566 | "execution_count": null, 1567 | "metadata": {}, 1568 | "outputs": [], 1569 | "source": [] 1570 | } 1571 | ], 1572 | "metadata": { 1573 | "kernelspec": { 1574 | "display_name": "Python 2", 1575 | "language": "python", 1576 | "name": "python2" 1577 | }, 1578 | "language_info": { 1579 | "codemirror_mode": { 1580 | "name": "ipython", 1581 | "version": 3 1582 | }, 1583 | "file_extension": ".py", 1584 | "mimetype": "text/x-python", 1585 | "name": "python", 1586 | "nbconvert_exporter": "python", 1587 | "pygments_lexer": "ipython3", 1588 | "version": "3.5.2" 1589 | } 1590 | }, 1591 | "nbformat": 4, 1592 | "nbformat_minor": 2 1593 | } 1594 | --------------------------------------------------------------------------------