├── Question 1 ├── Report.pdf ├── xgb_regressor_updated_v3.py └── Final_done.ipynb ├── Question 2 ├── Question2Report.pdf └── 2_final.R ├── Question 3 ├── Question3Report.pdf └── 3_final.R └── README.md /Question 1/Report.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Arkadeep-sophoIITG/JP-Morgan-Quant-Challenge-2018/HEAD/Question 1/Report.pdf -------------------------------------------------------------------------------- /Question 2/Question2Report.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Arkadeep-sophoIITG/JP-Morgan-Quant-Challenge-2018/HEAD/Question 2/Question2Report.pdf -------------------------------------------------------------------------------- /Question 3/Question3Report.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Arkadeep-sophoIITG/JP-Morgan-Quant-Challenge-2018/HEAD/Question 3/Question3Report.pdf -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # JP Morgan Quant Challenge - SiliconLobby [Arkadeep & Hiten] 2 | 36-hour competition that exposed participants to machine learning, derivative modelling, and how to apply their knowledge to the real-life problems faced by the Quantitative Research and Data Analytics teams on a day to day basis. 3 | ## Question 1 - Flight Price Predictor 4 | ### Predictive Modelling with Machine Learning 5 | Optimal timing for airline ticket purchasing from the consumer’s perspective is challenging principally because buyers have insufficient information for reasoning about future price movements. In this challenge we simulate various models for computing the best possible expected future prices by constructing new features from the existing features and also introducing new features to make our model more robust. 6 | 7 | ## Question 2 - Stock Price Simulation 8 | ### Monte Carlo Simulation 9 | Simulate Stock Price Movement using historical data 10 | 11 | ## Question 3 - Derivative Price Evaluation 12 | ### Probablity Theory and Monte Carlo Simulation 13 | Given Payoff States and Contingency Claims evaluate the fair price of the derivative designed 14 | -------------------------------------------------------------------------------- /Question 2/2_final.R: -------------------------------------------------------------------------------- 1 | 2 | # Read only region start 3 | 4 | solution <- function(a, theta, X0, sigma) { 5 | # Input parameters are : 6 | # input1 As Numeric eg:- 232323.4323 7 | # input2 As Numeric eg:- 232323.4323 8 | # input3 As Numeric eg:- 232323.4323 9 | # input4 As Numeric eg:- 232323.4323 10 | 11 | # Expected return type : 12 | # output1 As vector of type Numeric 13 | # Read only region end 14 | 15 | t <- 2 16 | #1 answer 17 | answer1 <- X0 * exp(-a*t) + theta*(1-exp(-a*t)) 18 | 19 | #2 answer 20 | sim = 10^5 21 | vec<- rep(X0,sim) 22 | breaks = 100 23 | for(i in seq(1,breaks)){ 24 | x<-rnorm(sim) 25 | vec<- vec+a*(theta - vec)*(t/breaks) + sigma*sqrt(vec)*x*sqrt(t/breaks); 26 | } 27 | for(i in seq(1,length(vec))){ 28 | vec[i] = max(vec[i]-100,0) 29 | } 30 | answer2<-mean(vec) 31 | 32 | 33 | #3 answer 34 | del<-10^-7 35 | sigma1<-sigma+del 36 | vec<- rep(X0,sim) 37 | vec1<-rep(X0,sim) 38 | for(i in seq(1,breaks)){ 39 | x<-rnorm(sim) 40 | vec<- vec+a*(theta - vec)*(t/breaks) + sigma*sqrt(vec)*x*sqrt(t/breaks); 41 | vec1<- vec1+ a*(theta - vec1)*(t/breaks) + sigma1*sqrt(vec1)*x*sqrt(t/breaks); 42 | } 43 | vega <- 0 44 | for(i in seq(1,sim)){ 45 | if(vec[i]>100&&vec1[i]>100){ 46 | vega <- vega+((vec1[i]-vec[i])/del) 47 | } 48 | } 49 | answer3<-vega/sim 50 | 51 | 52 | #OUTPUT 53 | output <- c(answer1,answer2,answer3); 54 | return (output); 55 | stop("UnsupportedOperation solution( input1, input2, input3, input4)") 56 | } 57 | -------------------------------------------------------------------------------- /Question 3/3_final.R: -------------------------------------------------------------------------------- 1 | 2 | X_SPOT <- c(2443.25,2447.83,2459.27,2459.14,2460.61,2473.83,2473.45,2472.54,2469.91,2477.13,2477.83,2475.42,2472.1,2470.3,2476.35,2477.57,2472.16,2476.83,2480.91,2474.92,2474.02,2438.21,2441.32,2465.84,2464.61,2468.11,2430.01,2425.55,2428.37,2452.51,2444.04,2438.97,2443.05,2444.24,2446.3,2457.59,2471.65,2476.55,2457.85,2457.85,2465.54,2465.1,2461.43,2488.11,2496.48,2498.37,2495.62,2500.23,2503.87,2506.65,2508.24,2500.6,2502.22,2496.66,2496.84,2507.04,2510.06,2519.36,2529.12,2534.58,2537.74,2552.07,2549.33,2544.73,2550.64,2555.24,2550.93,2553.17,2557.64,2559.36,2561.26,2562.1,2575.21,2564.98,2569.13,2557.15,2560.4,2581.07,2572.83,2575.26,2579.36,2579.85,2587.84,2591.13,2590.64,2594.38,2584.62,2582.3,2584.84,2578.87,2564.62,2585.64,2578.85,2582.14,2599.03,2597.08,2602.42,2602.42,2601.42,2627.04,2626.07,2647.58,2642.22,2639.44,2629.57,2629.27,2636.98,2651.5,2659.99,2664.11,2662.85,2652.01,2675.81,2690.16,2681.47,2679.25,2684.57,2683.34,2680.5,2680.5,2682.62,2687.54,2673.61,2695.81,2695.81,2713.06,2723.99,2743.15,2747.71,2751.29,2748.23,2767.56,2786.24,2776.42,2776.42,2802.56,2798.03,2810.3,2832.97,2839.13,2837.54,2839.25,2872.87,2853.53,2822.43,2823.81,2821.98,2762.13,2648.94,2695.14,2681.66,2581,2619.55,2656,2662.94,2698.63,2731.2,2732.22,2716.26,2716.26,2701.33,2703.96,2747.3,2779.6,2744.28,2713.83,2677.67,2691.25,2720.94,2728.12,2726.8,2738.97,2786.57,2783.02,2765.31,2749.48,2747.33,2752.01,2712.92,2716.94,2711.93,2643.69,2588.26,2658.55,2612.62,2605,2640.87,2581.88,2581.88,2614.45,2644.69,2662.84,2604.47,2613.16,2656.87,2642.19,2663.99,2656.3,2677.84,2706.39,2708.64,2693.13,2670.14,2670.29,2634.56,2639.4,2666.94,2669.91,2648.05,2654.8,2635.67,2629.73,2663.42,2672.63,2671.92,2697.79,2723.07,2727.72,2730.13,2711.45,2722.46,2720.13,2712.97,2733.01,2724.44,2733.29,2727.76,2721.33,2689.86,2689.86,2724.01,2705.27,2734.62,2746.87,2748.8,2772.35,2770.37,2779.03,2782,2786.85,2775.63,2782.49,2779.66,2773.75,2762.59,2767.32,2749.76,2754.88,2717.07,2723.06,2699.63,2716.31,2718.37,2726.71,2713.22,2736.61,2736.61,2759.82,2784.17,2793.84,2774.02,2798.29) 3 | Y_SPOT <- c(35.5,35.86,36.35,36.38,36.43,36.47,36.41,36.07,35.82,35.57,35.62,35.94,35.77,35.98,34.76,34.82,34.77,35.27,35.3,35.39,35.28,34.88,34.93,35.47,35.54,35.75,35,34.83,34.91,35.3,35.49,35.52,35.6,35.51,35.52,35.82,36.54,37.36,37.23,37.23,37.67,36.91,37,37.35,37.89,38.21,38.79,38.88,38.59,38.7,38.88,39.1,39.42,40.3,40.26,40.58,40.58,40.38,42.15,43.45,43.78,43.85,44.93,45.33,45.21,45.47,44.89,45.88,45.76,45.02,45.12,45.35,45.61,45.15,46.48,45.12,45.25,44.64,43.37,42.98,43.13,42.6,42.34,42.14,41.7,42.11,42.11,42.66,43.57,43,42.86,43.6,43.88,44.88,44.97,44.29,44.46,44.46,44.17,44.92,43.81,43.09,42.79,43.05,42.8,42.15,42.02,42.02,41.67,41.53,41.4,40.81,40.95,42.15,42.49,42.52,42.16,42.02,41.8,41.8,41.31,41.38,40.99,41.8,41.8,42.82,44.14,44.01,44.22,44.05,43,44.19,44.07,44.19,44.19,44.03,43.86,43.15,43.29,43.38,44.16,43.16,43.49,43.02,42.7,42.41,42.43,41,39.54,41.86,42.39,40.75,41.46,42,41.4,41.81,41.85,41.09,40.77,40.77,40.56,40.91,40.91,41.54,40.17,39.35,37.79,37.43,37.74,37.93,37.74,37.84,37.84,37.83,38.01,37.69,37.85,37.94,37.01,36.89,37.58,36.35,35.17,35.99,34.87,35.47,36.34,35.76,35.76,36.94,38.03,38,37.68,37.83,39.07,39,38.83,38.73,39.17,39.22,38.93,37.77,37.61,37.69,37.93,38.11,38.25,37.65,36.74,36.42,36.2,36.15,36.71,36.34,36.33,36.27,37.16,36.89,36.63,36.94,38.03,38.3,37.79,38.09,38.28,37.85,38.39,38.3,37.38,37.38,37.83,42.7,43.2,43.78,43.41,43.93,44.01,44.25,44.85,44.18,44.45,43.57,43.91,43.95,42.26,41.95,41.12,41.25,40.61,41.01,40.37,40.52,39.4,39.5,38.97,39.47,39.47,39.16,39.75,40.09,39.3,39.27) 4 | library(MASS) 5 | # Read only region start 6 | 7 | solution <- function(r, N) { 8 | # Input parameters are : 9 | # input1 As Numeric eg:- 232323.4323 10 | # input2 As Numeric eg:- 232323.4323 11 | 12 | # Expected return type : 13 | # output1 As Numeric eg:- 232323.4323 14 | # Read only region end 15 | # Write code here... 16 | 17 | #Get sigma from spot prices given 18 | sig_x <- sqrt(var(diff(log(X_SPOT)))); 19 | sig_y <- sqrt(var(diff(log(Y_SPOT)))); 20 | rho <- cor(diff(log(X_SPOT)),diff(log(Y_SPOT))) 21 | #Simulate path-dependent option 22 | sim <- 10000 23 | sigma <- matrix(c(252*0.25,rho*252*0.25,rho*252*0.25,252*0.25),2,2) 24 | payoff <- rep(1,sim); 25 | value <- matrix(rep(c(X_SPOT[1],Y_SPOT[1]),sim*2),sim,2,byrow=TRUE); 26 | for(i in seq(0,N)){ 27 | 28 | rand_gen <- mvrnorm(n=sim, rep(0,2),sigma) 29 | 30 | value_x <- value + r*value*(0.25*252) + sig_x*rand_gen[,1] 31 | value_y <- value + r*value*(0.25*252) + sig_y*rand_gen[,2] 32 | for(j in seq(1,nrow(value))){ 33 | if (((value_x[j,1]>1.25*X_SPOT[1]) || (value_x[j,1]<0.75*X_SPOT[1]))){ 34 | payoff[j] <- 0; 35 | } 36 | if (((value_y[j,2]>1.25*Y_SPOT[1]) || (value_y[j,2]<0.75*Y_SPOT[1]))){ 37 | payoff[j] <- 0; 38 | } 39 | } 40 | } 41 | payoff <- (payoff*(value_x[,1]/(2*X_SPOT[1])) + payoff*(value_y[,2]/(2*Y_SPOT[1])))-1 42 | sum = 0; 43 | #return mean(payoff[payoff>0])*exp(-r*(n/4)*252) 44 | for(i in payoff){ 45 | sum <- sum + max(payoff,0); 46 | } 47 | return (sum/sim)*exp(-r*(n/4)*252); 48 | 49 | stop("UnsupportedOperation solution( input1, input2)") 50 | } 51 | -------------------------------------------------------------------------------- /Question 1/xgb_regressor_updated_v3.py: -------------------------------------------------------------------------------- 1 | from sklearn.metrics import log_loss,accuracy_score,roc_auc_score 2 | from sklearn import preprocessing 3 | from sklearn.metrics import mean_squared_error 4 | import numpy as np 5 | import pandas as pd 6 | import argparse 7 | import os 8 | from hyperopt import hp 9 | from hyperopt import fmin, tpe, hp, STATUS_OK, Trials 10 | import pickle 11 | import sys 12 | import xgboost 13 | from xgboost import XGBRegressor 14 | from sklearn.cross_validation import train_test_split 15 | import time 16 | 17 | def label_gender (row): 18 | if 'Dr' in row['Name']: 19 | return 4 20 | if 'Miss' in row['Name']: 21 | return 0 22 | if 'Mrs' in row['Name']: 23 | return 1 24 | if 'Mr' in row['Name']: 25 | return 3 26 | return None 27 | 28 | 29 | 30 | 31 | def label_race (row): 32 | if (row['From'] == 1 and row['To'] == 2) or (row['From'] == 2 and row['To'] == 1): 33 | return 1446 34 | if (row['From'] == 1 and row['To'] == 3) or (row['From'] == 3 and row['To'] == 1): 35 | return 1654 36 | if (row['From'] == 1 and row['To'] == 4) or (row['From'] == 4 and row['To'] == 1): 37 | return 1148 38 | if (row['From'] == 1 and row['To'] == 5) or (row['From'] == 5 and row['To'] == 1): 39 | return 622 40 | if (row['From'] == 1 and row['To'] == 6) or (row['From'] == 6 and row['To'] == 1): 41 | return 1190 42 | if (row['From'] == 1 and row['To'] == 7) or (row['From'] == 7 and row['To'] == 1): 43 | return 1028 44 | if (row['From'] == 2 and row['To'] == 3) or (row['From'] == 3 and row['To'] == 2): 45 | return 470 46 | if (row['From'] == 2 and row['To'] == 4) or (row['From'] == 4 and row['To'] == 2): 47 | return 480 48 | if (row['From'] == 2 and row['To'] == 5) or (row['From'] == 5 and row['To'] == 2): 49 | return 1140 50 | if (row['From'] == 2 and row['To'] == 6) or (row['From'] == 6 and row['To'] == 2): 51 | return 437 52 | if (row['From'] == 2 and row['To'] == 7) or (row['From'] == 7 and row['To'] == 2): 53 | return 1485 54 | if (row['From'] == 3 and row['To'] == 4) or (row['From'] == 4 and row['To'] == 3): 55 | return 1307 56 | if (row['From'] == 3 and row['To'] == 5) or (row['From'] == 5 and row['To'] == 3): 57 | return 1180 58 | if (row['From'] == 3 and row['To'] == 6) or (row['From'] == 6 and row['To'] == 3): 59 | return 886 60 | if (row['From'] == 3 and row['To'] == 7) or (row['From'] == 7 and row['To'] == 2): 61 | return 1366 62 | if (row['From'] == 4 and row['To'] == 5) or (row['From'] == 5 and row['To'] == 4): 63 | return 1253 64 | if (row['From'] == 4 and row['To'] == 6) or (row['From'] == 6 and row['To'] == 4): 65 | return 417 66 | if (row['From'] == 4 and row['To'] == 7) or (row['From'] == 7 and row['To'] == 4): 67 | return 1760 68 | if (row['From'] == 5 and row['To'] == 6) or (row['From'] == 6 and row['To'] == 5): 69 | return 1425 70 | if (row['From'] == 5 and row['To'] == 7) or (row['From'] == 7 and row['To'] == 5): 71 | return 520 72 | if (row['From'] == 6 and row['To'] == 7) or (row['From'] == 7 and row['To'] == 6): 73 | return 1534 74 | return None 75 | 76 | 77 | def label_race (row): 78 | return time_from[str(int(row['From']))][int(row['To'])-1] 79 | 80 | def dataframe_train(path): 81 | df = pd.read_csv(path,header=0,parse_dates=[1,4,6]) 82 | train_Y = [] 83 | train_Y = df['Fare'] 84 | return df,train_Y 85 | 86 | def dataframe_test(path): 87 | df = pd.read_csv(path,header=0,parse_dates=[1,4,6]) 88 | return df 89 | 90 | 91 | def objective(space): 92 | 93 | clf = XGBRegressor(n_estimators = space['n_estimators'], 94 | max_depth = space['max_depth'], 95 | min_child_weight = space['min_child_weight'], 96 | subsample = space['subsample'], 97 | learning_rate = space['learning_rate'], 98 | gamma = space['gamma'], 99 | colsample_bytree = space['colsample_bytree'], 100 | reg_alpha= space['alpha'], 101 | reg_lambda=space['lambda'], 102 | booster= 'gbtree', 103 | objective='reg:linear' 104 | ) 105 | 106 | eval_set = [( train, y_train)] 107 | 108 | clf.fit(train, 109 | y_train, 110 | eval_set=eval_set, 111 | eval_metric = 'rmse') 112 | 113 | pred = clf.predict(train) 114 | rmse = mean_squared_error((y_train), (pred)) 115 | 116 | # print "SCORE:", mae 117 | return{'loss':rmse, 'status': STATUS_OK } 118 | 119 | def optimize(cores,random_state): 120 | space ={ 121 | 'max_depth': hp.choice('max_depth', np.arange(10, 70, dtype=int)), 122 | 'min_child_weight': hp.quniform ('min_child_weight', 1, 20, 1), 123 | 'subsample': hp.uniform ('subsample', 0.8, 1), 124 | 'n_estimators' : hp.choice('n_estimators', np.arange(500, 5000, 100, dtype=int)), 125 | 'learning_rate' : hp.quniform('learning_rate', 0.025, 0.5, 0.025), 126 | 'gamma' : hp.quniform('gamma', 0.5, 1, 0.05), 127 | 'colsample_bytree' : hp.quniform('colsample_bytree', 0.5, 1, 0.05), 128 | 'alpha' : hp.quniform('alpha', 0, 10, 1), 129 | 'lambda': hp.quniform('lambda', 1, 2, 0.1), 130 | 'nthread': cores, 131 | 'objective': 'reg:linear', 132 | 'booster': 'gbtree', 133 | 'seed': random_state 134 | } 135 | 136 | 137 | trials = Trials() 138 | best = fmin(fn=objective, 139 | space=space, 140 | algo=tpe.suggest, 141 | max_evals=50, # change 142 | trials=trials) 143 | return best 144 | 145 | 146 | # Loaded the entire training and test dataset into pandas dataframe 147 | df_train,trainY = dataframe_train('train.csv') 148 | df_test = dataframe_test('test.csv') 149 | 150 | ###### Computing new features based on training data ####### 151 | 152 | # Added difference feature based on difference between Booking Date and Flight date 153 | df_train['diff'] = df_train['Flight Date'] - df_train['Booking Date'] 154 | df_test['diff'] = df_test['Flight Date'] - df_test['Booking Date'] 155 | df_train['diff'] = df_train['diff'] / np.timedelta64(1, 'D') 156 | df_test['diff'] = df_test['diff']/np.timedelta64(1,'D') 157 | 158 | 159 | # Added feature age based on the birthdate of the passengers 160 | df_train['age'] = df_train['Booking Date'] - df_train['Date of Birth'] 161 | df_train['age'] = df_train['age'] / np.timedelta64(1, 'D') 162 | df_test['age'] = df_test['Booking Date'] - df_test['Date of Birth'] 163 | df_test['age'] = df_test['age'] / np.timedelta64(1, 'D') 164 | 165 | # Added flight_month feature based on the month of the flight 166 | 167 | df_train['flight_month'] = df_train['Flight Date'].dt.month 168 | df_test['flight_month'] = df_test['Flight Date'].dt.month 169 | 170 | # Added feature WEEKEND based on whether the flight is on weekday or weekend 171 | df_train['WEEKEND'] = ((pd.DatetimeIndex(df_train['Flight Date']).dayofweek)//5+1).astype(float) 172 | df_test['WEEKEND'] = ((pd.DatetimeIndex(df_test['Flight Date']).dayofweek)//5+1).astype(float) 173 | #df_test['WEEKDAY'] = (pd.DatetimeIndex(df_test['Booking Date']).dayofweek).astype(float) 174 | #df_train['WEEKDAY'] = (pd.DatetimeIndex(df_test['Booking Date']).dayofweek).astype(float) 175 | 176 | # Added feature weekday based on flight date (Monday - 0 , Tuesday -1 , and so) 177 | df_train['WEEKDAY'] = df_train['Flight Date'].dt.dayofweek 178 | df_test['WEEKDAY'] = df_train['Flight Date'].dt.dayofweek 179 | 180 | # Added feature week of flight based on flight date. 181 | 182 | df_train['FLIGHTWEEK'] = df_train['Flight Date'].dt.week 183 | df_test['FLIGHTWEEK'] = df_test['Flight Date'].dt.week 184 | 185 | df_train = df_train.drop(columns=['Flight Date', 'Booking Date']) 186 | df_test = df_test.drop(columns=['Flight Date', 'Booking Date']) 187 | df_train = df_train.drop(columns='Date of Birth') 188 | df_test = df_test.drop(columns='Date of Birth') 189 | 190 | 191 | dict_cities = {'Mumbai':1 , 'Patna' : 2, 'Kolkata' : 3, 'Delhi' : 4, 'Hyderabad': 5, 'Lucknow': 6, 'Chennai':7} 192 | dict_class = {'Business':25, 'Economy': 10} 193 | df_train = df_train.replace({'From':dict_cities}) 194 | df_test = df_test.replace({'From':dict_cities}) 195 | df_train = df_train.replace({'To': dict_cities}) 196 | df_test = df_test.replace({'To': dict_cities}) 197 | df_train = df_train.replace({'Class':dict_class}) 198 | df_test = df_test.replace({'Class':dict_class}) 199 | 200 | df_train['Flight Time'] = df_train['Flight Time'].str.split(':').str[0] 201 | df_train['age'] = df_train['age']/365.0 202 | df_test['Flight Time'] = df_test['Flight Time'].str.split(':').str[0] 203 | df_test['age'] = df_test['age']/365.0 204 | 205 | time_from = {} 206 | time_from['1'] = [0,130,140,120,75,130,89] 207 | time_from['2'] = [205,0,60,95,104,39,134] 208 | time_from['3'] = [155,90,0,120,120,105,135] 209 | time_from['4'] = [115,80,115,0,120,55,155] 210 | time_from['5'] = [70,104,115,125,0,235,65] 211 | time_from['6'] = [130,39,100,60,245,0,138] 212 | time_from['7'] = [95,134,115,155,65,138,0] 213 | 214 | 215 | 216 | df_train['flight_duration'] = df_train.apply (lambda row: label_race (row),axis=1) 217 | df_test['flight_duration'] = df_test.apply (lambda row: label_race (row),axis=1) 218 | df_train['Flight Time'] = df_train['Flight Time'].astype(str).astype(int) 219 | df_test['Flight Time'] = df_test['Flight Time'].astype(str).astype(int) 220 | df_train['Name'] = df_train['Name'].str.split('.').str[0] 221 | df_test['Name'] = df_test['Name'].str.split('.').str[0] 222 | 223 | df_train['gender'] = df_train.apply (lambda row: label_gender (row),axis=1) 224 | df_test['gender'] = df_test.apply (lambda row: label_gender (row),axis=1) 225 | df_train= df_train.drop(columns='Name') 226 | df_test = df_test.drop(columns='Name') 227 | df_train =df_train.drop(columns='Fare') 228 | train = df_train 229 | y_train = trainY 230 | 231 | 232 | best = optimize(24,1234) 233 | print(best) 234 | xgb = XGBRegressor(n_estimators = int(best['n_estimators']), 235 | learning_rate= best['learning_rate'],objective="reg:linear",booster='gbtree', 236 | gamma = best['gamma'],max_depth=best['max_depth'], 237 | min_child_weight=int(best['min_child_weight']),subsample=best['subsample'], 238 | colsample_bytree=best['colsample_bytree'],reg_alpha=best['alpha'], 239 | reg_lambda=best['lambda'],nthread=24,random_state=1234) 240 | 241 | 242 | xgb.fit(df_train,trainY) 243 | print(xgb.score(df_train,trainY)) 244 | 245 | 246 | xgb_final_preds = xgb.predict(df_test) 247 | np.savetxt('/home/arkadeep/scratch/finall_please_finallll_donee.csv',xgb_final_preds,delimiter = ',') -------------------------------------------------------------------------------- /Question 1/Final_done.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 38, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "import pandas as pd\n", 11 | "import os\n", 12 | "train_Y = []\n", 13 | "def dataframe_train(path):\n", 14 | " df = pd.read_csv(path,header=0,parse_dates=[1,4,6])\n", 15 | " train_Y = df['Fare']\n", 16 | " return df,train_Y\n", 17 | "\n", 18 | "def dataframe_test(path):\n", 19 | " df = pd.read_csv(path,header=0,parse_dates=[1,4,6])\n", 20 | " return df" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 39, 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "df_train,trainY = dataframe_train('/home/arkadeep/Desktop/JP Morgan/train.csv')\n", 30 | "df_test = dataframe_test('/home/arkadeep/Desktop/JP Morgan/test.csv')\n", 31 | "df_train['diff'] = df_train['Flight Date'] - df_train['Booking Date']\n", 32 | "df_test['diff'] = df_test['Flight Date'] - df_test['Booking Date']\n", 33 | "df_train['diff'] = df_train['diff'] / np.timedelta64(1, 'D')\n", 34 | "df_test['diff'] = df_test['diff']/np.timedelta64(1,'D')\n", 35 | "df_train['age'] = df_train['Booking Date'] - df_train['Date of Birth']\n", 36 | "df_train['age'] = df_train['age'] / np.timedelta64(1, 'D')\n", 37 | "df_test['age'] = df_test['Booking Date'] - df_test['Date of Birth']\n", 38 | "df_test['age'] = df_test['age'] / np.timedelta64(1, 'D')\n", 39 | "df_train['flight_month'] = df_train['Flight Date'].dt.month\n", 40 | "df_test['flight_month'] = df_test['Flight Date'].dt.month\n", 41 | "df_train['WEEKEND'] = ((pd.DatetimeIndex(df_train['Flight Date']).dayofweek)//5+1).astype(float)\n", 42 | "df_test['WEEKEND'] = ((pd.DatetimeIndex(df_test['Flight Date']).dayofweek)//5+1).astype(float)\n", 43 | "df_train['FLIGHTWEEK'] = df_train['Flight Date'].dt.week\n", 44 | "df_test['FLIGHTWEEK'] = df_test['Flight Date'].dt.week\n", 45 | "#df_test['WEEKDAY'] = (pd.DatetimeIndex(df_test['Booking Date']).dayofweek).astype(float)\n", 46 | "#df_train['WEEKDAY'] = (pd.DatetimeIndex(df_test['Booking Date']).dayofweek).astype(float)\n", 47 | "df_train['WEEKDAY'] = df_train['Flight Date'].dt.dayofweek\n", 48 | "df_test['WEEKDAY'] = df_train['Flight Date'].dt.dayofweek" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 40, 54 | "metadata": {}, 55 | "outputs": [], 56 | "source": [ 57 | "holidays = ['2016-01-01','2016-01-14','2016-01-15','2016-01-16','2016-01-26','2016-02-12','2016-03-07','2016-03-23','2016-03-25','2016-03-27','2016-04-13','2016-04-15','2016-04-20','2016-04-21','2016-05-21','2016-07-08','2016-08-15','2016-08-18','2016-08-25','2016-09-05','2016-09-12','2016-09-14','2016-10-02','2016-10-11','2016-10-12','2016-10-30','2016-10-31','2016-11-01','2016-11-06','2016-11-24','2016-12-13','2016-12-24','2016-12-25']" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 41, 63 | "metadata": {}, 64 | "outputs": [], 65 | "source": [ 66 | "imp = {'Mumbai':1 , 'Patna' : 2, 'Kolkata' : 3, 'Delhi' : 4, 'Hyderabad': 5, 'Lucknow': 6, 'Chennai':7}" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": 42, 72 | "metadata": {}, 73 | "outputs": [], 74 | "source": [ 75 | "dict_cities = {'Mumbai':1 , 'Patna' : 2, 'Kolkata' : 3, 'Delhi' : 4, 'Hyderabad': 5, 'Lucknow': 6, 'Chennai':7}\n", 76 | "dict_class = {'Business':25, 'Economy': 10}\n", 77 | "df_train = df_train.replace({'From':dict_cities})\n", 78 | "df_test = df_test.replace({'From':dict_cities})\n", 79 | "df_train = df_train.replace({'To': dict_cities})\n", 80 | "df_test = df_test.replace({'To': dict_cities})\n", 81 | "df_train = df_train.replace({'Class':dict_class})\n", 82 | "df_test = df_test.replace({'Class':dict_class})" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": 43, 88 | "metadata": { 89 | "code_folding": [ 90 | 3 91 | ] 92 | }, 93 | "outputs": [], 94 | "source": [ 95 | "def label_holiday (row):\n", 96 | " if str(row['Flight Date']) in holidays :\n", 97 | " return 3\n", 98 | " elif row['Flight Date'].dayofweek //5 ==1:\n", 99 | " return 2\n", 100 | " else:\n", 101 | " return 1\n", 102 | " return None\n", 103 | "df_train['WEEKEND'] = df_train.apply (lambda row: label_holiday(row),axis=1)\n", 104 | "df_test['WEEKEND'] = df_test.apply (lambda row: label_holiday(row),axis=1)" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": 44, 110 | "metadata": {}, 111 | "outputs": [], 112 | "source": [ 113 | "df_train = df_train.drop(columns=['Flight Date', 'Booking Date'])\n", 114 | "df_test = df_test.drop(columns=['Flight Date', 'Booking Date'])\n", 115 | "df_train = df_train.drop(columns='Date of Birth')\n", 116 | "df_test = df_test.drop(columns='Date of Birth')" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": 45, 122 | "metadata": {}, 123 | "outputs": [], 124 | "source": [ 125 | "df_train['Flight Time'] = df_train['Flight Time'].str.split(':').str[0]\n", 126 | "df_train['age'] = df_train['age']/365.0\n", 127 | "df_test['Flight Time'] = df_test['Flight Time'].str.split(':').str[0]\n", 128 | "df_test['age'] = df_test['age']/365.0" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": 46, 134 | "metadata": {}, 135 | "outputs": [], 136 | "source": [ 137 | "time_from = {}\n", 138 | "time_from['1'] = [0,130,140,120,75,130,89]\n", 139 | "time_from['2'] = [205,0,60,95,104,39,134]\n", 140 | "time_from['3'] = [155,90,0,120,120,105,135]\n", 141 | "time_from['4'] = [115,80,115,0,120,55,155]\n", 142 | "time_from['5'] = [70,104,115,125,0,235,65]\n", 143 | "time_from['6'] = [130,39,100,60,245,0,138]\n", 144 | "time_from['7'] = [95,134,115,155,65,138,0]" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": 47, 150 | "metadata": {}, 151 | "outputs": [], 152 | "source": [ 153 | "def label_duration (row):\n", 154 | " return time_from[str(int(row['From']))][int(row['To'])-1]\n", 155 | "\n", 156 | "def label_distance (row):\n", 157 | " if (row['From'] == 1 and row['To'] == 2) or (row['From'] == 2 and row['To'] == 1):\n", 158 | " return 1446\n", 159 | " if (row['From'] == 1 and row['To'] == 3) or (row['From'] == 3 and row['To'] == 1):\n", 160 | " return 1654\n", 161 | " if (row['From'] == 1 and row['To'] == 4) or (row['From'] == 4 and row['To'] == 1):\n", 162 | " return 1148\n", 163 | " if (row['From'] == 1 and row['To'] == 5) or (row['From'] == 5 and row['To'] == 1):\n", 164 | " return 622\n", 165 | " if (row['From'] == 1 and row['To'] == 6) or (row['From'] == 6 and row['To'] == 1):\n", 166 | " return 1190\n", 167 | " if (row['From'] == 1 and row['To'] == 7) or (row['From'] == 7 and row['To'] == 1):\n", 168 | " return 1028\n", 169 | " if (row['From'] == 2 and row['To'] == 3) or (row['From'] == 3 and row['To'] == 2):\n", 170 | " return 470\n", 171 | " if (row['From'] == 2 and row['To'] == 4) or (row['From'] == 4 and row['To'] == 2):\n", 172 | " return 480\n", 173 | " if (row['From'] == 2 and row['To'] == 5) or (row['From'] == 5 and row['To'] == 2):\n", 174 | " return 1140\n", 175 | " if (row['From'] == 2 and row['To'] == 6) or (row['From'] == 6 and row['To'] == 2):\n", 176 | " return 437\n", 177 | " if (row['From'] == 2 and row['To'] == 7) or (row['From'] == 7 and row['To'] == 2):\n", 178 | " return 1485\n", 179 | " if (row['From'] == 3 and row['To'] == 4) or (row['From'] == 4 and row['To'] == 3):\n", 180 | " return 1307\n", 181 | " if (row['From'] == 3 and row['To'] == 5) or (row['From'] == 5 and row['To'] == 3):\n", 182 | " return 1180\n", 183 | " if (row['From'] == 3 and row['To'] == 6) or (row['From'] == 6 and row['To'] == 3):\n", 184 | " return 886\n", 185 | " if (row['From'] == 3 and row['To'] == 7) or (row['From'] == 7 and row['To'] == 2):\n", 186 | " return 1366\n", 187 | " if (row['From'] == 4 and row['To'] == 5) or (row['From'] == 5 and row['To'] == 4):\n", 188 | " return 1253\n", 189 | " if (row['From'] == 4 and row['To'] == 6) or (row['From'] == 6 and row['To'] == 4):\n", 190 | " return 417\n", 191 | " if (row['From'] == 4 and row['To'] == 7) or (row['From'] == 7 and row['To'] == 4):\n", 192 | " return 1760\n", 193 | " if (row['From'] == 5 and row['To'] == 6) or (row['From'] == 6 and row['To'] == 5):\n", 194 | " return 1425\n", 195 | " if (row['From'] == 5 and row['To'] == 7) or (row['From'] == 7 and row['To'] == 5):\n", 196 | " return 520\n", 197 | " if (row['From'] == 6 and row['To'] == 7) or (row['From'] == 7 and row['To'] == 6):\n", 198 | " return 1534\n", 199 | " return None" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": 48, 205 | "metadata": {}, 206 | "outputs": [], 207 | "source": [ 208 | "df_train['flight_duration'] = df_train.apply (lambda row: label_duration (row),axis=1)\n", 209 | "df_test['flight_duration'] = df_test.apply (lambda row: label_duration (row),axis=1)\n", 210 | "df_train['distance'] = df_train.apply (lambda row: label_distance (row),axis=1)\n", 211 | "df_test['distance'] = df_test.apply (lambda row: label_distance (row),axis=1)" 212 | ] 213 | }, 214 | { 215 | "cell_type": "code", 216 | "execution_count": 49, 217 | "metadata": {}, 218 | "outputs": [], 219 | "source": [ 220 | "df_train['Flight Time'] = df_train['Flight Time'].astype(str).astype(int)\n", 221 | "df_test['Flight Time'] = df_test['Flight Time'].astype(str).astype(int)" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": 50, 227 | "metadata": {}, 228 | "outputs": [], 229 | "source": [ 230 | "df_train['Name'] = df_train['Name'].str.split('.').str[0]\n", 231 | "df_test['Name'] = df_test['Name'].str.split('.').str[0]" 232 | ] 233 | }, 234 | { 235 | "cell_type": "code", 236 | "execution_count": 51, 237 | "metadata": {}, 238 | "outputs": [], 239 | "source": [ 240 | "def label_gender (row):\n", 241 | " if 'Dr' in row['Name']:\n", 242 | " return 4\n", 243 | " if 'Miss' in row['Name']:\n", 244 | " return 0\n", 245 | " if 'Mrs' in row['Name']:\n", 246 | " return 1\n", 247 | " if 'Mr' in row['Name']:\n", 248 | " return 3\n", 249 | " return None" 250 | ] 251 | }, 252 | { 253 | "cell_type": "code", 254 | "execution_count": 52, 255 | "metadata": {}, 256 | "outputs": [ 257 | { 258 | "data": { 259 | "text/html": [ 260 | "
| \n", 278 | " | From | \n", 279 | "To | \n", 280 | "Flight Time | \n", 281 | "Class | \n", 282 | "diff | \n", 283 | "age | \n", 284 | "flight_month | \n", 285 | "WEEKEND | \n", 286 | "FLIGHTWEEK | \n", 287 | "WEEKDAY | \n", 288 | "flight_duration | \n", 289 | "distance | \n", 290 | "gender | \n", 291 | "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", 296 | "1 | \n", 297 | "3 | \n", 298 | "13 | \n", 299 | "25 | \n", 300 | "16.0 | \n", 301 | "53.027397 | \n", 302 | "11 | \n", 303 | "1 | \n", 304 | "44 | \n", 305 | "4 | \n", 306 | "140 | \n", 307 | "1654.0 | \n", 308 | "4 | \n", 309 | "
| 1 | \n", 312 | "6 | \n", 313 | "5 | \n", 314 | "18 | \n", 315 | "10 | \n", 316 | "50.0 | \n", 317 | "34.832877 | \n", 318 | "9 | \n", 319 | "1 | \n", 320 | "38 | \n", 321 | "0 | \n", 322 | "245 | \n", 323 | "1425.0 | \n", 324 | "4 | \n", 325 | "
| 2 | \n", 328 | "6 | \n", 329 | "2 | \n", 330 | "10 | \n", 331 | "10 | \n", 332 | "3.0 | \n", 333 | "34.898630 | \n", 334 | "5 | \n", 335 | "1 | \n", 336 | "21 | \n", 337 | "4 | \n", 338 | "39 | \n", 339 | "437.0 | \n", 340 | "4 | \n", 341 | "
| 3 | \n", 344 | "1 | \n", 345 | "6 | \n", 346 | "14 | \n", 347 | "10 | \n", 348 | "8.0 | \n", 349 | "27.871233 | \n", 350 | "7 | \n", 351 | "2 | \n", 352 | "29 | \n", 353 | "6 | \n", 354 | "130 | \n", 355 | "1190.0 | \n", 356 | "3 | \n", 357 | "
| 4 | \n", 360 | "3 | \n", 361 | "4 | \n", 362 | "10 | \n", 363 | "10 | \n", 364 | "12.0 | \n", 365 | "22.073973 | \n", 366 | "12 | \n", 367 | "1 | \n", 368 | "51 | \n", 369 | "2 | \n", 370 | "120 | \n", 371 | "1307.0 | \n", 372 | "0 | \n", 373 | "
| 5 | \n", 376 | "7 | \n", 377 | "2 | \n", 378 | "14 | \n", 379 | "25 | \n", 380 | "10.0 | \n", 381 | "10.501370 | \n", 382 | "5 | \n", 383 | "1 | \n", 384 | "20 | \n", 385 | "1 | \n", 386 | "134 | \n", 387 | "1485.0 | \n", 388 | "0 | \n", 389 | "
| 6 | \n", 392 | "1 | \n", 393 | "6 | \n", 394 | "11 | \n", 395 | "25 | \n", 396 | "5.0 | \n", 397 | "67.734247 | \n", 398 | "5 | \n", 399 | "1 | \n", 400 | "21 | \n", 401 | "2 | \n", 402 | "130 | \n", 403 | "1190.0 | \n", 404 | "3 | \n", 405 | "
| 7 | \n", 408 | "7 | \n", 409 | "1 | \n", 410 | "18 | \n", 411 | "10 | \n", 412 | "24.0 | \n", 413 | "72.821918 | \n", 414 | "5 | \n", 415 | "1 | \n", 416 | "20 | \n", 417 | "4 | \n", 418 | "95 | \n", 419 | "1028.0 | \n", 420 | "4 | \n", 421 | "
| 8 | \n", 424 | "4 | \n", 425 | "6 | \n", 426 | "12 | \n", 427 | "10 | \n", 428 | "0.0 | \n", 429 | "44.567123 | \n", 430 | "11 | \n", 431 | "2 | \n", 432 | "47 | \n", 433 | "6 | \n", 434 | "55 | \n", 435 | "417.0 | \n", 436 | "3 | \n", 437 | "
| 9 | \n", 440 | "4 | \n", 441 | "1 | \n", 442 | "12 | \n", 443 | "10 | \n", 444 | "1.0 | \n", 445 | "66.542466 | \n", 446 | "4 | \n", 447 | "2 | \n", 448 | "14 | \n", 449 | "6 | \n", 450 | "115 | \n", 451 | "1148.0 | \n", 452 | "3 | \n", 453 | "
| 10 | \n", 456 | "2 | \n", 457 | "6 | \n", 458 | "21 | \n", 459 | "25 | \n", 460 | "14.0 | \n", 461 | "77.893151 | \n", 462 | "7 | \n", 463 | "1 | \n", 464 | "29 | \n", 465 | "4 | \n", 466 | "39 | \n", 467 | "437.0 | \n", 468 | "4 | \n", 469 | "
| 11 | \n", 472 | "4 | \n", 473 | "2 | \n", 474 | "15 | \n", 475 | "10 | \n", 476 | "15.0 | \n", 477 | "61.632877 | \n", 478 | "3 | \n", 479 | "1 | \n", 480 | "10 | \n", 481 | "0 | \n", 482 | "80 | \n", 483 | "480.0 | \n", 484 | "4 | \n", 485 | "
| 12 | \n", 488 | "7 | \n", 489 | "4 | \n", 490 | "11 | \n", 491 | "10 | \n", 492 | "30.0 | \n", 493 | "33.758904 | \n", 494 | "2 | \n", 495 | "1 | \n", 496 | "9 | \n", 497 | "0 | \n", 498 | "155 | \n", 499 | "1760.0 | \n", 500 | "3 | \n", 501 | "
| 13 | \n", 504 | "3 | \n", 505 | "6 | \n", 506 | "10 | \n", 507 | "25 | \n", 508 | "2.0 | \n", 509 | "26.347945 | \n", 510 | "1 | \n", 511 | "1 | \n", 512 | "1 | \n", 513 | "0 | \n", 514 | "105 | \n", 515 | "886.0 | \n", 516 | "0 | \n", 517 | "
| 14 | \n", 520 | "6 | \n", 521 | "4 | \n", 522 | "2 | \n", 523 | "10 | \n", 524 | "13.0 | \n", 525 | "14.712329 | \n", 526 | "2 | \n", 527 | "1 | \n", 528 | "6 | \n", 529 | "0 | \n", 530 | "60 | \n", 531 | "417.0 | \n", 532 | "0 | \n", 533 | "
| 15 | \n", 536 | "1 | \n", 537 | "7 | \n", 538 | "19 | \n", 539 | "10 | \n", 540 | "9.0 | \n", 541 | "30.238356 | \n", 542 | "6 | \n", 543 | "2 | \n", 544 | "23 | \n", 545 | "5 | \n", 546 | "89 | \n", 547 | "1028.0 | \n", 548 | "0 | \n", 549 | "
| 16 | \n", 552 | "1 | \n", 553 | "4 | \n", 554 | "14 | \n", 555 | "10 | \n", 556 | "16.0 | \n", 557 | "68.221918 | \n", 558 | "5 | \n", 559 | "2 | \n", 560 | "21 | \n", 561 | "5 | \n", 562 | "120 | \n", 563 | "1148.0 | \n", 564 | "4 | \n", 565 | "
| 17 | \n", 568 | "3 | \n", 569 | "4 | \n", 570 | "21 | \n", 571 | "25 | \n", 572 | "17.0 | \n", 573 | "47.271233 | \n", 574 | "3 | \n", 575 | "1 | \n", 576 | "10 | \n", 577 | "0 | \n", 578 | "120 | \n", 579 | "1307.0 | \n", 580 | "3 | \n", 581 | "
| 18 | \n", 584 | "1 | \n", 585 | "5 | \n", 586 | "3 | \n", 587 | "10 | \n", 588 | "16.0 | \n", 589 | "40.569863 | \n", 590 | "12 | \n", 591 | "1 | \n", 592 | "51 | \n", 593 | "3 | \n", 594 | "75 | \n", 595 | "622.0 | \n", 596 | "4 | \n", 597 | "
| 19 | \n", 600 | "4 | \n", 601 | "7 | \n", 602 | "20 | \n", 603 | "10 | \n", 604 | "42.0 | \n", 605 | "41.252055 | \n", 606 | "1 | \n", 607 | "2 | \n", 608 | "2 | \n", 609 | "6 | \n", 610 | "155 | \n", 611 | "1760.0 | \n", 612 | "3 | \n", 613 | "
| 20 | \n", 616 | "1 | \n", 617 | "3 | \n", 618 | "9 | \n", 619 | "25 | \n", 620 | "51.0 | \n", 621 | "65.558904 | \n", 622 | "2 | \n", 623 | "1 | \n", 624 | "8 | \n", 625 | "4 | \n", 626 | "140 | \n", 627 | "1654.0 | \n", 628 | "4 | \n", 629 | "
| 21 | \n", 632 | "6 | \n", 633 | "7 | \n", 634 | "15 | \n", 635 | "25 | \n", 636 | "47.0 | \n", 637 | "39.638356 | \n", 638 | "3 | \n", 639 | "2 | \n", 640 | "10 | \n", 641 | "6 | \n", 642 | "138 | \n", 643 | "1534.0 | \n", 644 | "4 | \n", 645 | "
| 22 | \n", 648 | "6 | \n", 649 | "3 | \n", 650 | "16 | \n", 651 | "10 | \n", 652 | "20.0 | \n", 653 | "64.293151 | \n", 654 | "3 | \n", 655 | "1 | \n", 656 | "12 | \n", 657 | "4 | \n", 658 | "100 | \n", 659 | "886.0 | \n", 660 | "1 | \n", 661 | "
| 23 | \n", 664 | "7 | \n", 665 | "1 | \n", 666 | "21 | \n", 667 | "25 | \n", 668 | "1.0 | \n", 669 | "54.986301 | \n", 670 | "11 | \n", 671 | "2 | \n", 672 | "45 | \n", 673 | "5 | \n", 674 | "95 | \n", 675 | "1028.0 | \n", 676 | "4 | \n", 677 | "
| 24 | \n", 680 | "4 | \n", 681 | "1 | \n", 682 | "9 | \n", 683 | "25 | \n", 684 | "8.0 | \n", 685 | "69.704110 | \n", 686 | "1 | \n", 687 | "2 | \n", 688 | "3 | \n", 689 | "5 | \n", 690 | "115 | \n", 691 | "1148.0 | \n", 692 | "4 | \n", 693 | "
| 25 | \n", 696 | "4 | \n", 697 | "1 | \n", 698 | "21 | \n", 699 | "25 | \n", 700 | "18.0 | \n", 701 | "39.254795 | \n", 702 | "3 | \n", 703 | "2 | \n", 704 | "11 | \n", 705 | "6 | \n", 706 | "115 | \n", 707 | "1148.0 | \n", 708 | "3 | \n", 709 | "
| 26 | \n", 712 | "3 | \n", 713 | "6 | \n", 714 | "10 | \n", 715 | "10 | \n", 716 | "25.0 | \n", 717 | "64.701370 | \n", 718 | "8 | \n", 719 | "1 | \n", 720 | "31 | \n", 721 | "4 | \n", 722 | "105 | \n", 723 | "886.0 | \n", 724 | "3 | \n", 725 | "
| 27 | \n", 728 | "1 | \n", 729 | "6 | \n", 730 | "18 | \n", 731 | "10 | \n", 732 | "7.0 | \n", 733 | "48.356164 | \n", 734 | "9 | \n", 735 | "1 | \n", 736 | "38 | \n", 737 | "3 | \n", 738 | "130 | \n", 739 | "1190.0 | \n", 740 | "4 | \n", 741 | "
| 28 | \n", 744 | "6 | \n", 745 | "4 | \n", 746 | "23 | \n", 747 | "25 | \n", 748 | "31.0 | \n", 749 | "47.301370 | \n", 750 | "3 | \n", 751 | "1 | \n", 752 | "11 | \n", 753 | "0 | \n", 754 | "60 | \n", 755 | "417.0 | \n", 756 | "1 | \n", 757 | "
| 29 | \n", 760 | "3 | \n", 761 | "2 | \n", 762 | "13 | \n", 763 | "10 | \n", 764 | "30.0 | \n", 765 | "72.772603 | \n", 766 | "10 | \n", 767 | "1 | \n", 768 | "42 | \n", 769 | "4 | \n", 770 | "90 | \n", 771 | "470.0 | \n", 772 | "4 | \n", 773 | "
| ... | \n", 776 | "... | \n", 777 | "... | \n", 778 | "... | \n", 779 | "... | \n", 780 | "... | \n", 781 | "... | \n", 782 | "... | \n", 783 | "... | \n", 784 | "... | \n", 785 | "... | \n", 786 | "... | \n", 787 | "... | \n", 788 | "... | \n", 789 | "
| 7470 | \n", 792 | "3 | \n", 793 | "7 | \n", 794 | "20 | \n", 795 | "10 | \n", 796 | "23.0 | \n", 797 | "20.501370 | \n", 798 | "5 | \n", 799 | "2 | \n", 800 | "19 | \n", 801 | "5 | \n", 802 | "135 | \n", 803 | "1366.0 | \n", 804 | "0 | \n", 805 | "
| 7471 | \n", 808 | "5 | \n", 809 | "3 | \n", 810 | "4 | \n", 811 | "25 | \n", 812 | "13.0 | \n", 813 | "39.358904 | \n", 814 | "11 | \n", 815 | "1 | \n", 816 | "45 | \n", 817 | "2 | \n", 818 | "115 | \n", 819 | "1180.0 | \n", 820 | "1 | \n", 821 | "
| 7472 | \n", 824 | "4 | \n", 825 | "6 | \n", 826 | "8 | \n", 827 | "10 | \n", 828 | "1.0 | \n", 829 | "23.079452 | \n", 830 | "6 | \n", 831 | "1 | \n", 832 | "25 | \n", 833 | "0 | \n", 834 | "55 | \n", 835 | "417.0 | \n", 836 | "3 | \n", 837 | "
| 7473 | \n", 840 | "4 | \n", 841 | "6 | \n", 842 | "20 | \n", 843 | "10 | \n", 844 | "17.0 | \n", 845 | "61.216438 | \n", 846 | "5 | \n", 847 | "1 | \n", 848 | "18 | \n", 849 | "0 | \n", 850 | "55 | \n", 851 | "417.0 | \n", 852 | "1 | \n", 853 | "
| 7474 | \n", 856 | "3 | \n", 857 | "2 | \n", 858 | "7 | \n", 859 | "10 | \n", 860 | "21.0 | \n", 861 | "22.668493 | \n", 862 | "11 | \n", 863 | "1 | \n", 864 | "44 | \n", 865 | "3 | \n", 866 | "90 | \n", 867 | "470.0 | \n", 868 | "3 | \n", 869 | "
| 7475 | \n", 872 | "3 | \n", 873 | "1 | \n", 874 | "13 | \n", 875 | "25 | \n", 876 | "0.0 | \n", 877 | "56.397260 | \n", 878 | "11 | \n", 879 | "1 | \n", 880 | "46 | \n", 881 | "2 | \n", 882 | "155 | \n", 883 | "1654.0 | \n", 884 | "4 | \n", 885 | "
| 7476 | \n", 888 | "1 | \n", 889 | "3 | \n", 890 | "11 | \n", 891 | "10 | \n", 892 | "6.0 | \n", 893 | "55.312329 | \n", 894 | "5 | \n", 895 | "1 | \n", 896 | "18 | \n", 897 | "1 | \n", 898 | "140 | \n", 899 | "1654.0 | \n", 900 | "3 | \n", 901 | "
| 7477 | \n", 904 | "6 | \n", 905 | "4 | \n", 906 | "19 | \n", 907 | "10 | \n", 908 | "31.0 | \n", 909 | "65.682192 | \n", 910 | "9 | \n", 911 | "2 | \n", 912 | "37 | \n", 913 | "5 | \n", 914 | "60 | \n", 915 | "417.0 | \n", 916 | "4 | \n", 917 | "
| 7478 | \n", 920 | "7 | \n", 921 | "6 | \n", 922 | "17 | \n", 923 | "10 | \n", 924 | "6.0 | \n", 925 | "58.632877 | \n", 926 | "11 | \n", 927 | "2 | \n", 928 | "45 | \n", 929 | "6 | \n", 930 | "138 | \n", 931 | "1534.0 | \n", 932 | "4 | \n", 933 | "
| 7479 | \n", 936 | "4 | \n", 937 | "1 | \n", 938 | "19 | \n", 939 | "25 | \n", 940 | "15.0 | \n", 941 | "62.079452 | \n", 942 | "12 | \n", 943 | "1 | \n", 944 | "48 | \n", 945 | "3 | \n", 946 | "115 | \n", 947 | "1148.0 | \n", 948 | "3 | \n", 949 | "
| 7480 | \n", 952 | "6 | \n", 953 | "3 | \n", 954 | "16 | \n", 955 | "10 | \n", 956 | "19.0 | \n", 957 | "19.517808 | \n", 958 | "1 | \n", 959 | "1 | \n", 960 | "4 | \n", 961 | "4 | \n", 962 | "100 | \n", 963 | "886.0 | \n", 964 | "0 | \n", 965 | "
| 7481 | \n", 968 | "3 | \n", 969 | "5 | \n", 970 | "13 | \n", 971 | "10 | \n", 972 | "10.0 | \n", 973 | "21.317808 | \n", 974 | "2 | \n", 975 | "2 | \n", 976 | "5 | \n", 977 | "6 | \n", 978 | "120 | \n", 979 | "1180.0 | \n", 980 | "0 | \n", 981 | "
| 7482 | \n", 984 | "4 | \n", 985 | "7 | \n", 986 | "20 | \n", 987 | "10 | \n", 988 | "11.0 | \n", 989 | "33.832877 | \n", 990 | "12 | \n", 991 | "1 | \n", 992 | "48 | \n", 993 | "4 | \n", 994 | "155 | \n", 995 | "1760.0 | \n", 996 | "0 | \n", 997 | "
| 7483 | \n", 1000 | "1 | \n", 1001 | "3 | \n", 1002 | "18 | \n", 1003 | "10 | \n", 1004 | "22.0 | \n", 1005 | "70.671233 | \n", 1006 | "10 | \n", 1007 | "1 | \n", 1008 | "41 | \n", 1009 | "1 | \n", 1010 | "140 | \n", 1011 | "1654.0 | \n", 1012 | "3 | \n", 1013 | "
| 7484 | \n", 1016 | "2 | \n", 1017 | "7 | \n", 1018 | "17 | \n", 1019 | "10 | \n", 1020 | "32.0 | \n", 1021 | "24.432877 | \n", 1022 | "12 | \n", 1023 | "2 | \n", 1024 | "49 | \n", 1025 | "5 | \n", 1026 | "134 | \n", 1027 | "1485.0 | \n", 1028 | "3 | \n", 1029 | "
| 7485 | \n", 1032 | "5 | \n", 1033 | "7 | \n", 1034 | "10 | \n", 1035 | "10 | \n", 1036 | "5.0 | \n", 1037 | "30.254795 | \n", 1038 | "6 | \n", 1039 | "1 | \n", 1040 | "23 | \n", 1041 | "3 | \n", 1042 | "65 | \n", 1043 | "520.0 | \n", 1044 | "1 | \n", 1045 | "
| 7486 | \n", 1048 | "6 | \n", 1049 | "7 | \n", 1050 | "9 | \n", 1051 | "25 | \n", 1052 | "46.0 | \n", 1053 | "33.435616 | \n", 1054 | "7 | \n", 1055 | "1 | \n", 1056 | "27 | \n", 1057 | "1 | \n", 1058 | "138 | \n", 1059 | "1534.0 | \n", 1060 | "3 | \n", 1061 | "
| 7487 | \n", 1064 | "2 | \n", 1065 | "7 | \n", 1066 | "19 | \n", 1067 | "25 | \n", 1068 | "9.0 | \n", 1069 | "45.630137 | \n", 1070 | "10 | \n", 1071 | "2 | \n", 1072 | "43 | \n", 1073 | "5 | \n", 1074 | "134 | \n", 1075 | "1485.0 | \n", 1076 | "4 | \n", 1077 | "
| 7488 | \n", 1080 | "3 | \n", 1081 | "7 | \n", 1082 | "18 | \n", 1083 | "10 | \n", 1084 | "24.0 | \n", 1085 | "42.317808 | \n", 1086 | "6 | \n", 1087 | "2 | \n", 1088 | "22 | \n", 1089 | "6 | \n", 1090 | "135 | \n", 1091 | "1366.0 | \n", 1092 | "4 | \n", 1093 | "
| 7489 | \n", 1096 | "2 | \n", 1097 | "4 | \n", 1098 | "14 | \n", 1099 | "10 | \n", 1100 | "12.0 | \n", 1101 | "70.712329 | \n", 1102 | "11 | \n", 1103 | "1 | \n", 1104 | "46 | \n", 1105 | "1 | \n", 1106 | "95 | \n", 1107 | "480.0 | \n", 1108 | "4 | \n", 1109 | "
| 7490 | \n", 1112 | "2 | \n", 1113 | "4 | \n", 1114 | "4 | \n", 1115 | "10 | \n", 1116 | "36.0 | \n", 1117 | "73.630137 | \n", 1118 | "10 | \n", 1119 | "2 | \n", 1120 | "42 | \n", 1121 | "6 | \n", 1122 | "95 | \n", 1123 | "480.0 | \n", 1124 | "1 | \n", 1125 | "
| 7491 | \n", 1128 | "1 | \n", 1129 | "7 | \n", 1130 | "19 | \n", 1131 | "10 | \n", 1132 | "6.0 | \n", 1133 | "60.780822 | \n", 1134 | "11 | \n", 1135 | "1 | \n", 1136 | "46 | \n", 1137 | "1 | \n", 1138 | "89 | \n", 1139 | "1028.0 | \n", 1140 | "1 | \n", 1141 | "
| 7492 | \n", 1144 | "1 | \n", 1145 | "7 | \n", 1146 | "14 | \n", 1147 | "10 | \n", 1148 | "0.0 | \n", 1149 | "17.504110 | \n", 1150 | "4 | \n", 1151 | "2 | \n", 1152 | "16 | \n", 1153 | "5 | \n", 1154 | "89 | \n", 1155 | "1028.0 | \n", 1156 | "0 | \n", 1157 | "
| 7493 | \n", 1160 | "4 | \n", 1161 | "2 | \n", 1162 | "2 | \n", 1163 | "10 | \n", 1164 | "18.0 | \n", 1165 | "50.389041 | \n", 1166 | "1 | \n", 1167 | "2 | \n", 1168 | "2 | \n", 1169 | "5 | \n", 1170 | "80 | \n", 1171 | "480.0 | \n", 1172 | "3 | \n", 1173 | "
| 7494 | \n", 1176 | "5 | \n", 1177 | "6 | \n", 1178 | "7 | \n", 1179 | "10 | \n", 1180 | "63.0 | \n", 1181 | "19.301370 | \n", 1182 | "7 | \n", 1183 | "1 | \n", 1184 | "27 | \n", 1185 | "4 | \n", 1186 | "235 | \n", 1187 | "1425.0 | \n", 1188 | "3 | \n", 1189 | "
| 7495 | \n", 1192 | "6 | \n", 1193 | "1 | \n", 1194 | "17 | \n", 1195 | "25 | \n", 1196 | "9.0 | \n", 1197 | "37.183562 | \n", 1198 | "3 | \n", 1199 | "1 | \n", 1200 | "9 | \n", 1201 | "2 | \n", 1202 | "130 | \n", 1203 | "1190.0 | \n", 1204 | "1 | \n", 1205 | "
| 7496 | \n", 1208 | "1 | \n", 1209 | "7 | \n", 1210 | "17 | \n", 1211 | "10 | \n", 1212 | "1.0 | \n", 1213 | "63.457534 | \n", 1214 | "12 | \n", 1215 | "1 | \n", 1216 | "52 | \n", 1217 | "0 | \n", 1218 | "89 | \n", 1219 | "1028.0 | \n", 1220 | "3 | \n", 1221 | "
| 7497 | \n", 1224 | "3 | \n", 1225 | "1 | \n", 1226 | "7 | \n", 1227 | "25 | \n", 1228 | "4.0 | \n", 1229 | "27.602740 | \n", 1230 | "1 | \n", 1231 | "1 | \n", 1232 | "3 | \n", 1233 | "2 | \n", 1234 | "155 | \n", 1235 | "1654.0 | \n", 1236 | "0 | \n", 1237 | "
| 7498 | \n", 1240 | "6 | \n", 1241 | "2 | \n", 1242 | "11 | \n", 1243 | "10 | \n", 1244 | "20.0 | \n", 1245 | "14.654795 | \n", 1246 | "3 | \n", 1247 | "1 | \n", 1248 | "10 | \n", 1249 | "3 | \n", 1250 | "39 | \n", 1251 | "437.0 | \n", 1252 | "0 | \n", 1253 | "
| 7499 | \n", 1256 | "4 | \n", 1257 | "2 | \n", 1258 | "22 | \n", 1259 | "10 | \n", 1260 | "42.0 | \n", 1261 | "35.293151 | \n", 1262 | "10 | \n", 1263 | "2 | \n", 1264 | "42 | \n", 1265 | "6 | \n", 1266 | "80 | \n", 1267 | "480.0 | \n", 1268 | "1 | \n", 1269 | "
7500 rows × 13 columns
\n", 1273 | "