├── bestbuy ├── bestbuy.py └── readme ├── emc ├── data │ ├── SiteLocations.csv │ ├── SiteLocations_with_more_sites.csv │ ├── SubmissionZerosExceptNAs.csv │ ├── TrainingData (copy).csv │ ├── TrainingData.csv │ └── result.csv ├── feature_extraction.py ├── log ├── main.py ├── preprocess.py ├── readme ├── regression.py └── utilities.py ├── fb_suggest_missing_link ├── candidate.py ├── main.py ├── rank.py ├── readme ├── utilities.py └── validation.py ├── insult_detect ├── insult_detect.py └── readme ├── kicked_car ├── classification.py ├── data │ ├── .~lock.pos.csv# │ ├── .~lock.test.csv# │ ├── .~lock.training.csv# │ ├── feature_idx.csv │ ├── idx │ ├── log │ ├── log2 │ ├── res.csv │ ├── test.csv │ └── training.csv ├── feature_extraction.py ├── main.py ├── preprocess.py ├── readme └── utilities.py ├── music_rating ├── music_rating.py └── readme ├── photo_quality_prediction ├── classification.py ├── data │ ├── result.csv │ ├── statistics │ ├── test.csv │ └── training.csv ├── feature_selection.py ├── main.py ├── readme └── utilities.py ├── readme.md └── titanic ├── data ├── data.csv ├── data2.csv ├── error.csv ├── result.csv ├── result2.csv ├── result3.csv ├── result4.csv ├── test.csv ├── test2.csv └── train.csv ├── logistic_regression.py ├── readme └── titanic.py /bestbuy/bestbuy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import csv 3 | import re 4 | import operator 5 | import Levenshtein 6 | 7 | __train__ = './data/train.csv' 8 | __test__ = './data/test.csv' 9 | 10 | def read_data(path, cols,ignore_header=True): 11 | csv_file_object = csv.reader(open(path, 'rb')) 12 | if ignore_header: 13 | header = csv_file_object.next() 14 | x = [] 15 | for row in csv_file_object: 16 | r = [] 17 | for col in cols: 18 | r.append(row[col]) 19 | x.append(r) 20 | return x 21 | 22 | def string_normalize(s): 23 | res = s.lower() 24 | res = res.replace(' ', '') 25 | res = ''.join(c for c in res if c.isalnum()) 26 | return res 27 | 28 | def preprocess_data(raw_data, col): 29 | for row in raw_data: 30 | row[col] = string_normalize(row[col]) 31 | 32 | def best_match_key(keys, query): 33 | similarity = 0 34 | best_key = None 35 | for key in keys: 36 | sim = Levenshtein.ratio(key, query) 37 | if sim > similarity: 38 | similarity = sim 39 | best_key = key 40 | return (best_key, similarity) 41 | 42 | def create_match(x, thresh=.85): 43 | match = {} 44 | for row in x: 45 | matched_key = None 46 | sku, query = row 47 | # Fuzzy matching. 48 | best_key, similarity = best_match_key(match.keys(), query) 49 | if similarity > thresh: 50 | matched_key = best_key 51 | else: 52 | match[query] = {sku : 1} 53 | if matched_key is None: 54 | continue 55 | if not match[matched_key].has_key(sku): 56 | match[matched_key][sku] = 1 57 | else: 58 | match[matched_key][sku] += 1 59 | 60 | # Sorts the dictionary. 61 | for key in match.keys(): 62 | tmp_dict = match[key] 63 | tmp_dict = sorted(tmp_dict.iteritems(), key=operator.itemgetter(1)) 64 | tmp_dict.reverse() 65 | match[key] = tmp_dict 66 | return match 67 | 68 | def get_top(x): 69 | sku_count_dict = {} 70 | for row in x: 71 | if not sku_count_dict.has_key(row[0]): 72 | sku_count_dict[row[0]] = 1 73 | else: 74 | sku_count_dict[row[0]] += 1 75 | sorted_dict = sorted(sku_count_dict.iteritems(), key=operator.itemgetter(1)) 76 | sorted_dict.reverse() 77 | 78 | res = [] 79 | for i in range(len(sorted_dict)): 80 | res.append(sorted_dict[i][0]) 81 | return res; 82 | 83 | def predict(match, top, query, k, thresh=.7): 84 | res = [] 85 | matched_key, similarity = best_match_key(match.keys(), query) 86 | # if similarity < 0.8: 87 | # print 'matched_key = %s, query = %s, sim = %s' \ 88 | # % (matched_key, query, similarity) 89 | if similarity > thresh: 90 | for i in range(min(k, len(match[matched_key]))): 91 | res.append(match[matched_key][i][0]) 92 | if len(res) < k: 93 | for i in range(len(top)): 94 | if top[i] not in res: 95 | res.append(top[i]) 96 | if len(res) == k: 97 | break 98 | return res 99 | 100 | if __name__ == '__main__': 101 | # Reads training data. 102 | print 'Reading and preprocessing data...' 103 | x = read_data(__train__, [1, 3]) 104 | preprocess_data(x, 1) 105 | 106 | # Divides into training and cv. 107 | l = int(len(x) * 1) 108 | x_cv = x[l - 10 : :] 109 | x = x[0 : l] 110 | 111 | top = get_top(x) 112 | 113 | # Predicts on cv. 114 | print 'Predicting...' 115 | k = 5 116 | # thresh_match = [0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1] 117 | # thresh_predict = [0, 0.2, 0.4, 0.6, 0.7, 0.8] 118 | thresh_match = [0.75] 119 | thresh_predict = [0] 120 | best_match = None 121 | thresh1 = 0 122 | thresh2 = 0 123 | accuracy = -1 124 | for t1 in thresh_match: 125 | for t2 in thresh_predict: 126 | match = create_match(x, t1) 127 | p_cv = [] 128 | for row in x_cv: 129 | q = row[1] 130 | p_cv.append(predict(match, top, q, k, t2)) 131 | correct = 0. 132 | for i in range(len(x_cv)): 133 | if x_cv[i][0] in p_cv[i]: 134 | correct += 1. 135 | ac = correct / len(x_cv) 136 | print 't1 = %f, t2 = %f, accuracy = %f' % (t1, t2, ac) 137 | if ac > accuracy: 138 | accuracy = ac 139 | thresh1 = t1 140 | thresh2 = t2 141 | best_match = match 142 | print 'thresh1 = %f, thresh2 = %f, accuracy = %f' \ 143 | % (thresh1, thresh2, accuracy) 144 | 145 | # Reads test set. 146 | x_test = read_data(__test__, [2]) 147 | preprocess_data(x_test, 0) 148 | 149 | # Predicts. 150 | res = [] 151 | k = 5 152 | for row in x_test: 153 | q = row[0] 154 | res.append(predict(best_match, top, q, k, thresh2)) 155 | 156 | open_file_object = csv.writer(open("./data/result.csv", "wb")) 157 | open_file_object.writerow(['sku']) 158 | for p in res: 159 | open_file_object.writerow([' '.join(p)]) 160 | 161 | -------------------------------------------------------------------------------- /bestbuy/readme: -------------------------------------------------------------------------------- 1 | This code is for Bestbuy - Predict which Xbox game a visitor will be most interested in based on their search query (https://www.kaggle.com/c/acm-sf-chapter-hackathon-small). 2 | The approach is pretty straighforward, the basic idea is try to match a query with a game (so I ignore all other features like time, game name etc.). The initial approach is to create a map, the key is user's query, the value is also a map, which means I store which game the user clicked and the frequecy of it when user searches for this query. After that, when predicting the game, we simply choose the most frequently clicked games for this query, and if there's less than 5 games, I use the most popular games among all queries to fill the gap. 3 | One optimization I used is to correct users' queries. Since there are lots of typos or short forms, I use Levenshtein to calculate two queries similarity, and if it is above a threshold, the algorithm believe they are the same query. The threshold is chosen by testing on cross validation set. 4 | Finally, I got around 74.3% and the leader is 78.9%. 5 | -------------------------------------------------------------------------------- /emc/data/SiteLocations.csv: -------------------------------------------------------------------------------- 1 | "SITE_ID","LATITUDE","LONGITUDE" 2 | 1,41.6709918952829,-87.7324568962847 3 | 32,41.755832412403,-87.545349670582 4 | 50,41.7075695897648,-87.5685738570845 5 | 57,41.9128621248178,-87.7227234452095 6 | 64,41.7907868783739,-87.6016464917605 7 | 1003,41.9843323270383,-87.7920016971163 8 | 1018,41.773889,-87.815278 9 | 1601,41.6681203371799,-87.9905696935943 10 | 2001,41.6621094347378,-87.6964665157993 11 | 4002,41.8552431328191,-87.7524696987103 12 | 4101,42.053333,-88.108056 13 | 6004,41.8721168410596,-87.8290802510295 14 | 6006,41.8728971999587,-87.8258724913966 15 | 8003,41.631389,-87.568056 16 | -------------------------------------------------------------------------------- /emc/data/SiteLocations_with_more_sites.csv: -------------------------------------------------------------------------------- 1 | "SITE_ID","LATITUDE","LONGITUDE" 2 | 1,41.6709918952829,-87.7324568962847 3 | 14,41.834243,-87.6238 4 | 22,41.6871654376343,-87.5393154841479 5 | 32,41.755832412403,-87.545349670582 6 | 50,41.7075695897648,-87.5685738570845 7 | 52,41.9654848301767,-87.7499280553202 8 | 57,41.9128621248178,-87.7227234452095 9 | 64,41.7907868783739,-87.6016464917605 10 | 76,41.7513999786378,-87.7134881520007 11 | 1003,41.9843323270383,-87.7920016971163 12 | 1018,41.773889,-87.815278 13 | 1601,41.6681203371799,-87.9905696935943 14 | 2001,41.6621094347378,-87.6964665157993 15 | 3301,41.7827660079251,-87.8053767946675 16 | 4002,41.8552431328191,-87.7524696987103 17 | 4101,42.053333,-88.108056 18 | 6004,41.8721168410596,-87.8290802510295 19 | 6005,41.8644264230095,-87.7489023825124 20 | 6006,41.8728971999587,-87.8258724913966 21 | 8003,41.631389,-87.568056 22 | -------------------------------------------------------------------------------- /emc/feature_extraction.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import preprocess 3 | import utilities 4 | 5 | def create_x_y(data, target, pos): 6 | span = 24 7 | x = [] 8 | y = [] 9 | for i in range(len(data)): 10 | if i % 192 < pos + span: 11 | continue 12 | chunk_id = data[i][1] 13 | hour = data[i][5] 14 | t = len(data[0]) - 39 + target 15 | 16 | features = [] 17 | prev_hour = 0 18 | for j in range(i - pos - span, i - pos): 19 | features.append(float(data[j][t])) 20 | if data[j][5] == hour: 21 | prev_hour = float(data[j][t]) 22 | 23 | features.append(prev_hour) 24 | 25 | # Binary hour features. 26 | for h in range(24): 27 | if h == int(hour): 28 | features.append(1) 29 | else: 30 | features.append(0) 31 | 32 | # Binary month features. 33 | month = int(data[i][3]) 34 | for m in range(1, 13): 35 | if m == month: 36 | features.append(1) 37 | else: 38 | features.append(0) 39 | 40 | # Weather features. 41 | for k in range(6, 56): 42 | features.append(float(data[i - pos][k])) 43 | for k in range(6, 56): 44 | features.append(float(data[i - pos - 1][k])) 45 | 46 | x.append(features) 47 | 48 | y.append(float(data[i][t])) 49 | 50 | return x, y 51 | 52 | def get_features(chunk_id, weekday, hour, chunk_avg, hour_avg_by_chunk, 53 | weekday_avg_by_chunk, hour_avg, weekday_avg): 54 | avg = [0.0] * 39 55 | for chunk_id in chunk_avg.keys(): 56 | for i in range(len(avg)): 57 | avg[i] += chunk_avg[chunk_id][i] 58 | 59 | for i in range(len(avg)): 60 | avg[i] /= float(len(chunk_avg)) 61 | 62 | tmp = [] 63 | if chunk_id in chunk_avg: 64 | tmp.append(chunk_avg[chunk_id]) 65 | else: 66 | tmp.append(avg) 67 | # if weekday in weekday_avg_by_chunk[chunk_id]: 68 | # tmp.append(weekday_avg_by_chunk[chunk_id][weekday]) 69 | # else: 70 | # tmp.append(weekday_avg[weekday]) 71 | if chunk_id in chunk_avg and hour in hour_avg_by_chunk[chunk_id]: 72 | tmp.append(hour_avg_by_chunk[chunk_id][hour]) 73 | else: 74 | tmp.append(hour_avg[hour]) 75 | return tmp 76 | 77 | def get_avgs(data, chunk_avg, hour_avg_by_chunk, weekday_avg_by_chunk, 78 | hour_avg, weekday_avg): 79 | res = [] 80 | for line in data: 81 | chunk_id = line[1] 82 | weekday = line[4] 83 | hour = line[5] 84 | 85 | tmp = get_features(chunk_id, weekday, hour, chunk_avg, 86 | hour_avg_by_chunk, weekday_avg_by_chunk, 87 | hour_avg, weekday_avg) 88 | res.append(tmp) 89 | return res 90 | 91 | def get_avg_maps(train_data): 92 | chunk_avg = utilities.get_chunk_avg(train_data) 93 | hour_avg = utilities.get_hour_avg(train_data) 94 | hour_avg_by_chunk = utilities.get_hour_avg_by_chunk(train_data) 95 | weekday_avg = utilities.get_weekday_avg(train_data) 96 | weekday_avg_by_chunk = utilities.get_weekday_avg_by_chunk(train_data) 97 | 98 | return (chunk_avg, hour_avg_by_chunk, weekday_avg_by_chunk, 99 | hour_avg, weekday_avg) 100 | 101 | def get_x_by_avg(train_data, cv_data, chunk_avg, hour_avg_by_chunk, 102 | weekday_avg_by_chunk, hour_avg, weekday_avg): 103 | x_train = get_avgs(train_data, chunk_avg, hour_avg_by_chunk, 104 | weekday_avg_by_chunk, hour_avg, weekday_avg) 105 | x_cv = get_avgs(cv_data, chunk_avg, hour_avg_by_chunk, 106 | weekday_avg_by_chunk, hour_avg, weekday_avg) 107 | return x_train, x_cv 108 | 109 | def get_x_y_by_target(x_train_all, x_cv_all, targets_train, targets_cv, index): 110 | x_train = [] 111 | y_train = [] 112 | for i in range(len(targets_train)): 113 | if not targets_train[i][index] == 'NA': 114 | tmp = [] 115 | for features in x_train_all[i]: 116 | tmp.append(features[index]) 117 | x_train.append(tmp) 118 | y_train.append(float(targets_train[i][index])) 119 | 120 | x_cv = [] 121 | y_cv = [] 122 | for i in range(len(targets_cv)): 123 | if not targets_cv[i][index] == 'NA': 124 | tmp = [] 125 | for features in x_cv_all[i]: 126 | tmp.append(features[index]) 127 | x_cv.append(tmp) 128 | y_cv.append(float(targets_cv[i][index])) 129 | 130 | return x_train, y_train, x_cv, y_cv 131 | 132 | if __name__ == '__main__': 133 | pass 134 | -------------------------------------------------------------------------------- /emc/log: -------------------------------------------------------------------------------- 1 | average by chunk 0.28652 2 | average by hour in chunk 0.27529 3 | 4 | LR with hour_avg and chunk_avg 0.29876 5 | 6 | LR with previous 24h for each pos,target 0.22850 7 | LR with previous 48h for each pos,target 0.22686 8 | 24h prev, hour mean 0.23569 9 | 10 | 24h prev, prev_our 0.22792 11 | 24h prev, prev_our, binary_hour 0.22166 12 | 24h prev, prev_our, binary_hour, in chunk 0.22147 13 | 24h prev, prev_our, binary_hour, binary_weekday, in chunk 0.22252 14 | 24h prev, prev_our, binary_hour, binary_month, in chunk 0.22105 15 | 48h prev, prev_our, binary_hour, binary_month, in chunk 0.22098 16 | 24h prev, prev_our, binary_hour, binary_month, binary_site, in chunk 0.22105 17 | 24h prev, prev_our, binary_hour, binary_month, in chunk, last_weather 0.21862 18 | 24h prev, prev_our, binary_hour, binary_month, in chunk, last_2_weather 0.21795 19 | 24h prev, prev_our, binary_hour, binary_month, in chunk, last_3_weather 0.21827 20 | 48h prev, prev_our, binary_hour, binary_month, in chunk, last_2_weather 0.21820 21 | -------------------------------------------------------------------------------- /emc/main.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import time 3 | import utilities 4 | import preprocess 5 | import feature_extraction 6 | import regression 7 | 8 | def time_series(training_file, submission_file, output_file): 9 | data = utilities.read_file(training_file, True) 10 | first_line = data[0] 11 | data = data[1 : :] 12 | data = preprocess.fill_NAs(data) 13 | 14 | (chunk_avg, hour_avg_by_chunk, weekday_avg_by_chunk, 15 | hour_avg, weekday_avg) = feature_extraction.get_avg_maps(data) 16 | 17 | clf_map = regression.linear_regression_2(data) 18 | 19 | print 'Filling submission file...' 20 | chunk_map = utilities.get_chunk_map(data, 1) 21 | sub_data = utilities.read_file(submission_file, True) 22 | 23 | positions = [1, 2, 3, 4, 5, 10, 17, 24, 48, 72] 24 | for i in range(1, len(sub_data)): 25 | chunk_id = sub_data[i][1] 26 | hour = sub_data[i][3] 27 | pos = positions[(i - 1) % 10] 28 | for j in range(5, len(sub_data[i])): 29 | target = j - 5 30 | if sub_data[i][j] == '0': 31 | if not chunk_id in chunk_map: 32 | sub_data[i][j] = hour_avg[hour][target] 33 | else: 34 | data_in_chunk = chunk_map[chunk_id] 35 | start = len(data_in_chunk) - 24 36 | t = len(data_in_chunk[0]) - 39 + target 37 | features = [] 38 | prev_hour = 0 39 | for k in range(start, len(data_in_chunk)): 40 | features.append(float(data_in_chunk[k][t])) 41 | if data_in_chunk[k][5] == hour: 42 | prev_hour = float(data_in_chunk[k][t]) 43 | 44 | features.append(prev_hour) 45 | 46 | # Binary hour features. 47 | for h in range(24): 48 | if h == int(hour): 49 | features.append(1) 50 | else: 51 | features.append(0) 52 | 53 | # Binary month features. 54 | month = int(sub_data[i][4]) 55 | for m in range(1, 13): 56 | if m == month: 57 | features.append(1) 58 | else: 59 | features.append(0) 60 | 61 | # Weather features. 62 | tmp_length = len(data_in_chunk) 63 | for k in range(6, 56): 64 | features.append(float(data_in_chunk[tmp_length - 1][k])) 65 | for k in range(6, 56): 66 | features.append(float(data_in_chunk[tmp_length - 2][k])) 67 | 68 | sub_data[i][j] = \ 69 | clf_map[(target, pos)].predict([features])[0] 70 | 71 | utilities.write_file(output_file, sub_data) 72 | 73 | def avg(training_file, submission_file, output_file): 74 | data = utilities.read_file(training_file) 75 | 76 | train_data, cv_data = preprocess.get_train_cv_data_by_chunk(data) 77 | targets_train, targets_cv = preprocess.get_train_cv_targets( 78 | train_data, cv_data) 79 | 80 | (chunk_avg, hour_avg_by_chunk, weekday_avg_by_chunk, 81 | hour_avg, weekday_avg) = feature_extraction.get_avg_maps(train_data) 82 | 83 | x_train_all, x_cv_all = feature_extraction.get_x_by_avg( 84 | train_data, cv_data, chunk_avg, hour_avg_by_chunk, 85 | weekday_avg_by_chunk, hour_avg, weekday_avg) 86 | 87 | clfs = regression.linear_regression( 88 | x_train_all, x_cv_all, targets_train, targets_cv) 89 | clfs = regression.random_forest( 90 | x_train_all, x_cv_all, targets_train, targets_cv) 91 | 92 | print 'Filling submission file...' 93 | sub_data = utilities.read_file(submission_file, True) 94 | for i in range(1, len(sub_data)): 95 | chunk_id = sub_data[i][1] 96 | hour = sub_data[i][3] 97 | weekday = '' 98 | all_features = feature_extraction.get_features( 99 | chunk_id, weekday, hour, chunk_avg, hour_avg_by_chunk, 100 | weekday_avg_by_chunk, hour_avg, weekday_avg) 101 | 102 | for j in range(5, len(sub_data[i])): 103 | if sub_data[i][j] == '0': 104 | feature = [] 105 | for f in all_features: 106 | feature.append(f[j - 5]) 107 | sub_data[i][j] = clfs[j - 5].predict([feature])[0] 108 | 109 | utilities.write_file(output_file, sub_data) 110 | 111 | def baseline(training_file, submission_file, output_file): 112 | data = utilities.read_file(training_file) 113 | sub_data = utilities.read_file(submission_file, True) 114 | 115 | print 'Calculating hour averages...' 116 | hour_avg_by_chunk = utilities.get_hour_avg_by_chunk(data) 117 | hour_avg = utilities.get_hour_avg(data) 118 | 119 | print 'Filling submission file...' 120 | for i in range(1, len(sub_data)): 121 | chunk_id = sub_data[i][1] 122 | hour = sub_data[i][3] 123 | for j in range(5, len(sub_data[i])): 124 | if sub_data[i][j] == '0': 125 | if chunk_id in hour_avg_by_chunk: 126 | sub_data[i][j] = hour_avg_by_chunk[chunk_id][hour][j - 5] 127 | else: 128 | sub_data[i][j] = hour_avg[hour][j - 5] 129 | 130 | utilities.write_file(output_file, sub_data) 131 | 132 | if __name__ == '__main__': 133 | start_time = time.time() 134 | time_series('./data/TrainingData.csv', 135 | './data/SubmissionZerosExceptNAs.csv', 136 | './data/result.csv') 137 | print (time.time() - start_time) / 60.0, 'minutes' 138 | -------------------------------------------------------------------------------- /emc/preprocess.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import utilities 3 | 4 | def translate_weekday(data): 5 | print 'Translating weekdays...' 6 | weekdays = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 7 | 'Friday', 'Saturday', 'Sunday'] 8 | for i in range(len(data)): 9 | for j in range(len(weekdays)): 10 | if data[i][4] == weekdays[j]: 11 | data[i][4] = j + 1 12 | return data 13 | 14 | def fill_NAs(data): 15 | print 'Filling NAs...' 16 | for target in range(6, len(data[0])): 17 | if data[0][target] == 'NA': 18 | for i in range(len(data)): 19 | if not data[i][target] == 'NA': 20 | for j in range(0, i): 21 | data[j][target] = data[i][target] 22 | break 23 | 24 | for i in range(len(data)): 25 | for j in range(6, len(data[0])): 26 | if data[i][j] == 'NA': 27 | if i > 0 and not data[i - 1][j] == 'NA': 28 | data[i][j] = data[i - 1][j] 29 | 30 | return data 31 | 32 | def get_train_cv_data_by_chunk(data): 33 | chunk_map = utilities.get_chunk_map(data, 1) 34 | 35 | train_data = [] 36 | cv_data = [] 37 | for chunk_id in chunk_map.keys(): 38 | num = len(chunk_map[chunk_id]) 39 | train_num = 147 40 | train_data += chunk_map[chunk_id][0 : train_num] 41 | cv_data += chunk_map[chunk_id][train_num : :] 42 | return train_data, cv_data 43 | 44 | def get_train_cv_targets(train_data, cv_data): 45 | return get_targets(train_data), get_targets(cv_data) 46 | 47 | def get_targets(data): 48 | targets = [] 49 | for line in data: 50 | n = len(line) 51 | targets.append(line[n - 39 : :]) 52 | return targets 53 | 54 | if __name__ == '__main__': 55 | pass 56 | -------------------------------------------------------------------------------- /emc/readme: -------------------------------------------------------------------------------- 1 | This is the code for EMC Data Science Global Hackathon (Air Quality Prediction) 2 | (http://www.kaggle.com/c/dsg-hackathon). The problem is to predict future air quality 3 | based on past air quality and some other weather info (some of the data might be missing). 4 | My best approach is building a linear regression model for each predicted target and for each position 5 | within the chunk, so totally there are 390 models. Features are mainly past target information. 6 | Specifically, I include past 24 hour target data into features, which seems to be the most effective 7 | ones. Also, hour and month information is also important, I binarized them into the features. Additionally, 8 | the most recent 2 days weather information and the most recent one hour target data also improve the result. 9 | One thing I feel is that for this kind of time series problem, past target data is very very important, even 10 | if I only use these data, the result is already good enougth. Finally, after about 68min training, I achieved 11 | MAE 0.21795, ranking the 4th. 12 | -------------------------------------------------------------------------------- /emc/regression.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from sklearn import linear_model 3 | from sklearn.ensemble import RandomForestClassifier 4 | import feature_extraction 5 | import utilities 6 | 7 | def linear_regression_2(data): 8 | print 'Training with linear regression 2...' 9 | clf_map = {} 10 | positions = [1, 2, 3, 4, 5, 10, 17, 24, 48, 72] 11 | mae = 0.0 12 | num = 0 13 | for target in range(0, 39): 14 | for pos in positions: 15 | t = len(data[0]) - 39 + target 16 | key = (target, pos) 17 | x, y = feature_extraction.create_x_y(data) 18 | 19 | clf = linear_model.LinearRegression() 20 | clf.fit(x, y) 21 | clf_map[key] = clf 22 | 23 | p = clf.predict(x) 24 | mae += utilities.ae(y, p) 25 | num += len(y) 26 | 27 | print '(%s, %s) completed.' % (target, pos) 28 | mae /= float(num) 29 | print 'MAE = %s' % mae 30 | return clf_map 31 | 32 | def linear_regression(x_train_all, x_cv_all, targets_train, targets_cv): 33 | print 'Training with linear regression...' 34 | clfs = regression(x_train_all, x_cv_all, targets_train, targets_cv, 35 | linear_model.LinearRegression) 36 | return clfs 37 | 38 | def random_forest(x_train_all, x_cv_all, targets_train, targets_cv): 39 | print 'Training with random forest...' 40 | clfs = regression(x_train_all, x_cv_all, targets_train, targets_cv, 41 | m_random_forest) 42 | return clfs 43 | 44 | def m_random_forest(): 45 | return RandomForestClassifier(n_estimators=10, max_depth=None, 46 | min_samples_split=1, random_state=0) 47 | 48 | def regression(x_train_all, x_cv_all, targets_train, targets_cv, classifier): 49 | clfs = [] 50 | mae_train = 0 51 | mae_cv = 0 52 | num_train = 0 53 | num_cv = 0 54 | for i in range(len(targets_train[0])): 55 | x_train, y_train, x_cv, y_cv = feature_extraction.get_x_y_by_target( 56 | x_train_all, x_cv_all, targets_train, targets_cv, i) 57 | 58 | clf = classifier() 59 | clf.fit(x_train, y_train) 60 | clfs.append(clf) 61 | 62 | p = clf.predict(x_cv) 63 | mae_cv += utilities.ae(y_cv, p) 64 | num_cv += len(y_cv) 65 | 66 | p = clf.predict(x_train) 67 | mae_train += utilities.ae(y_train, p) 68 | num_train += len(y_train) 69 | 70 | print 'Round %s completed.' % i 71 | 72 | mae_train /= float(num_train) 73 | mae_cv /= float(num_cv) 74 | 75 | print 'MAE in training set: %s' % mae_train 76 | print 'MAE in cv set: %s' % mae_cv 77 | return clfs 78 | 79 | if __name__ == '__main__': 80 | pass 81 | -------------------------------------------------------------------------------- /emc/utilities.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import csv 3 | 4 | def read_file(file_name, header=False): 5 | print 'Reading file...' 6 | f = open(file_name) 7 | reader = csv.reader(f) 8 | res = [] 9 | if not header: 10 | reader.next() 11 | for line in reader: 12 | res.append(line) 13 | 14 | f.close() 15 | return res 16 | 17 | def write_file(file_name, data): 18 | print 'Writing submission file...' 19 | f = open(file_name, 'w') 20 | writer = csv.writer(f) 21 | for line in data: 22 | writer.writerow(line) 23 | f.close() 24 | 25 | def get_site_map(first_line): 26 | res = [] 27 | site_map = {} 28 | index = 0 29 | start = len(first_line) - 39 30 | for i in range(start, len(first_line)): 31 | site = first_line[i] 32 | idx = site.rfind('_', 0, len(site)) 33 | site_num = int(site[idx + 1 : :]) 34 | if site_num not in site_map: 35 | site_map[site_num] = index 36 | index += 1 37 | 38 | res.append(site_map[site_num]) 39 | return res 40 | 41 | def get_chunk_map(data, index): 42 | chunk_map = {} 43 | for line in data: 44 | key = line[index] 45 | if key not in chunk_map: 46 | chunk_map[key] = [] 47 | chunk_map[key].append(line) 48 | return chunk_map 49 | 50 | def get_avg_by_index(data, index): 51 | avg = {} 52 | num = {} 53 | for line in data: 54 | key = line[index] 55 | if key not in avg: 56 | avg[key] = [0.0] * 39 57 | num[key] = [0] * 39 58 | for i in range(56, len(line)): 59 | if not line[i] == 'NA': 60 | num[key][i - 56] += 1 61 | avg[key][i - 56] += float(line[i]) 62 | 63 | for key in avg.keys(): 64 | for i in range(len(avg[key])): 65 | if num[key][i] > 0: 66 | avg[key][i] /= float(num[key][i]) 67 | return avg 68 | 69 | def get_chunk_avg(data): 70 | return get_avg_by_index(data, 1) 71 | 72 | def get_hour_avg(data): 73 | return get_avg_by_index(data, 5) 74 | 75 | def get_weekday_avg(data): 76 | return get_avg_by_index(data, 4) 77 | 78 | def get_hour_avg_by_chunk(data): 79 | chunk_map = get_chunk_map(data, 1) 80 | 81 | hour_avg_by_chunk = {} 82 | for chunk_id in chunk_map.keys(): 83 | hour_avg_by_chunk[chunk_id] = get_hour_avg(chunk_map[chunk_id]) 84 | return hour_avg_by_chunk 85 | 86 | def get_weekday_avg_by_chunk(data): 87 | chunk_map = get_chunk_map(data, 1) 88 | 89 | weekday_avg_by_chunk = {} 90 | for chunk_id in chunk_map.keys(): 91 | weekday_avg_by_chunk[chunk_id] = get_weekday_avg(chunk_map[chunk_id]) 92 | return weekday_avg_by_chunk 93 | 94 | def get_weekday_in_sub(chunk_id, pos_in_chunk, chunk_map): 95 | chunk = chunk_map[chunk_id] 96 | last = chunk[len(chunk) - 1] 97 | last_weekday = last[4] 98 | last_hour = int(last[5]) 99 | last_pos_in_chunk = int(last[2]) 100 | 101 | hour_diff = last_pos_in_chunk - pos_in_chunk 102 | if last_hour + hour_diff < 24: 103 | return last_weekday 104 | else: 105 | hour_diff -= 23 - last_hour 106 | day_diff = int(hour_diff / 24) 107 | weekday = last_weekday + day_diff + 1 108 | if weekday > 7: 109 | weekday -= 7 110 | return weekday 111 | 112 | def ae(y, p): 113 | ae = 0.0 114 | for i in range(len(y)): 115 | ae += abs(float(y[i]) - p[i]) 116 | return ae 117 | 118 | 119 | if __name__ == '__main__': 120 | pass 121 | # res = read_file('./data/TrainingData.csv') 122 | # print res[0] 123 | -------------------------------------------------------------------------------- /fb_suggest_missing_link/candidate.py: -------------------------------------------------------------------------------- 1 | """ 2 | Responsible for selecting candidates. 3 | """ 4 | 5 | #!/usr/bin/env python 6 | 7 | def get_surroundings(follow, followed, nodes): 8 | """ Gets all followers and followees of a given sets of nodes. """ 9 | 10 | followers_and_followees = set() 11 | for node in nodes: 12 | followers_and_followees.update(follow[node]) 13 | followers_and_followees.update(followed[node]) 14 | return followers_and_followees 15 | 16 | def get_candidates(follow, followed, node): 17 | """ Gets candidates for node to suggest follow. """ 18 | 19 | nodes_exclude = follow[node].copy() 20 | nodes_exclude.add(node) 21 | 22 | l1_candidates = get_surroundings(follow, followed, [node]) 23 | l2_candidates = get_surroundings(follow, followed, l1_candidates) 24 | l3_candidates = get_surroundings(follow, followed, l2_candidates) 25 | 26 | candidates = set() 27 | candidates.update(l1_candidates) 28 | candidates.update(l2_candidates) 29 | candidates.update(l3_candidates) 30 | 31 | candidates.difference_update(nodes_exclude) 32 | return candidates 33 | 34 | if __name__ == '__main__': 35 | pass 36 | 37 | -------------------------------------------------------------------------------- /fb_suggest_missing_link/main.py: -------------------------------------------------------------------------------- 1 | """ 2 | Based on number of mutual friends - 15% 3 | Based on number of mutual follows - 30% 4 | Remove non-followed suggestion - 66.3% 5 | """ 6 | 7 | #!/usr/bin/env python 8 | from collections import deque 9 | import utilities 10 | import time 11 | import rank 12 | import candidate 13 | import validation 14 | 15 | def get_popular_people(followed, num): 16 | """ Gets people with most followers. """ 17 | 18 | dict_num_followers = {} 19 | for node in followed.keys(): 20 | dict_num_followers[node] = len(followed[node]) 21 | 22 | popular_people = sorted(dict_num_followers, 23 | key=dict_num_followers.__getitem__, 24 | reverse=True) 25 | 26 | return popular_people[0 : num] 27 | 28 | def suggest_friends(follow, followed, clf, node, popular_people, 29 | max_suggestion): 30 | """ Suggests friends for a given node. """ 31 | 32 | if not follow.has_key(node): 33 | return [] 34 | 35 | candidates = candidate.get_candidates(follow, followed, node) 36 | suggested = rank.rank_candidates(follow, followed, clf, node, candidates) 37 | 38 | # Suggests most popular people when candidates are less than 10. 39 | if len(suggested) < max_suggestion: 40 | for star in popular_people: 41 | if star not in suggested: 42 | suggested.append(star) 43 | if len(suggested) >= max_suggestion: 44 | break 45 | else: 46 | suggested = suggested[0 : max_suggestion] 47 | 48 | return suggested 49 | 50 | def main(follow, followed, test_file, submission_file, data_file, 51 | validation_file, max_suggestion): 52 | """ The main method for the problem. """ 53 | 54 | print 'Reading graph...' 55 | test_nodes = utilities.read_nodes_list(test_file) 56 | 57 | print 'Training with logistic regression...' 58 | clf = rank.train(data_file, validation_file) 59 | 60 | print 'Getting popular people...' 61 | popular_people = get_popular_people(followed, max_suggestion) 62 | 63 | print 'Predicting...' 64 | predictions = [] 65 | count = 0 66 | for node in test_nodes: 67 | suggested = suggest_friends(follow, followed, clf, node, 68 | popular_people, max_suggestion) 69 | predictions.append(suggested) 70 | 71 | count += 1 72 | if count % 100 == 0: 73 | print 'Suggested %d friends.' % count 74 | 75 | print 'Writing submission files...' 76 | utilities.write_submission_file(submission_file, test_nodes, predictions) 77 | 78 | if __name__ == '__main__': 79 | start_time = time.time() 80 | follow, followed = utilities.read_graph('./data/train.csv') 81 | 82 | validation.generate_test_set(follow, followed, 83 | './data/test.csv', 84 | './data/validation.csv', 85 | './data/solution.csv', 86 | 2000, 10) 87 | 88 | main(follow, followed, 89 | './data/validation.csv', 90 | './data/result.csv', 91 | './data/data.csv', 92 | './data/data_test.csv', 93 | 10) 94 | 95 | # main(follow, followed, 96 | # './data/test.csv', 97 | # './data/result.csv', 98 | # './data/data.csv', 99 | # 10) 100 | 101 | print (time.time() - start_time) / 60.0, 'minutes' 102 | 103 | -------------------------------------------------------------------------------- /fb_suggest_missing_link/rank.py: -------------------------------------------------------------------------------- 1 | """ 2 | Responsible for extracting features, classification and ranking. 3 | """ 4 | 5 | #!/usr/bin/env python 6 | import utilities 7 | from numpy import * 8 | from sklearn import cross_validation 9 | from sklearn import linear_model 10 | from sklearn.metrics import precision_recall_fscore_support 11 | 12 | def get_features(follow, followed, n1, n2): 13 | """ Creates features for a given pair of nodes. """ 14 | 15 | # Level 1 features. 16 | does_follow = 0 17 | if n1 in follow[n2]: 18 | does_follow = 1 19 | 20 | # Level 2 features. 21 | followees_follow = set.intersection(follow[n1], followed[n2]) 22 | percent_followees_follow = 0.0 23 | if len(follow[n1]) > 0: 24 | percent_followees_follow = 1.0 * len(followees_follow) / len(follow[n1]) 25 | 26 | followees_followed = set.intersection(follow[n1], follow[n2]) 27 | percent_followees_followed = 0.0 28 | if len(follow[n1]) > 0: 29 | percent_followees_followed = 1.0 * len(followees_followed) \ 30 | / len(follow[n1]) 31 | 32 | followers_follow = set.intersection(followed[n1], followed[n2]) 33 | percent_followers_follow = 0.0 34 | if len(followed[n1]) > 0: 35 | percent_followers_follow = 1.0 * len(followers_follow) \ 36 | / len(followed[n1]) 37 | 38 | followers_followed = set.intersection(followed[n1], follow[n2]) 39 | percent_followers_followed = 0.0 40 | if len(followed[n1]) > 0: 41 | percent_followers_followed = 1.0 * len(followers_followed) \ 42 | / len(followed[n1]) 43 | 44 | return [does_follow, percent_followees_follow, percent_followees_followed, 45 | percent_followers_follow, percent_followers_followed] 46 | 47 | def rank_candidates(follow, followed, clf, node, candidates): 48 | """ Ranks the candidates based on the chance they will be followed. """ 49 | 50 | if not candidates: 51 | return [] 52 | 53 | # Generates feature matrix. 54 | candidates = list(candidates) 55 | x_candidates = [] 56 | for candidate in candidates: 57 | features = get_features(follow, followed, node, candidate) 58 | x_candidates.append(features) 59 | 60 | # Uses classifier to estimate probability. 61 | candidate_score = {} 62 | prob = clf.predict_proba(x_candidates) 63 | for i in range(len(candidates)): 64 | candidate_score[candidates[i]] = prob[i][1] 65 | 66 | # Ranks candidates based on the score 67 | return sorted(candidate_score, key=candidate_score.__getitem__, 68 | reverse=True) 69 | 70 | def get_data(data_file, test_file): 71 | """ Produces training set, cross validation set and test set. """ 72 | 73 | raw_data = utilities.read_file(data_file, True) 74 | test_data = utilities.read_file(test_file, True) 75 | x = array(raw_data, float64) 76 | y = x[:, 0] 77 | x = x[:, 1 : :] 78 | x_train, x_cv, y_train, y_cv = cross_validation.train_test_split( 79 | x, y, test_size=0.3, random_state=None) 80 | x = array(test_data, float64) 81 | y_test = x[:, 0] 82 | x_test = x[:, 1 : :] 83 | 84 | return (x_train, y_train, x_cv, y_cv, x_test, y_test) 85 | 86 | def train(data_file, test_file): 87 | """ Uses random forest to train the model. """ 88 | 89 | x_train, y_train, x_cv, y_cv, x_test, y_test = get_data(data_file, 90 | test_file) 91 | 92 | clf = linear_model.LogisticRegression(penalty='l1', C=1) 93 | clf.fit(x_train, y_train) 94 | print clf.coef_ 95 | 96 | print 'Accuracy in training set: %f'% clf.score(x_train, y_train) 97 | print 'Accuracy in cv: %f' % clf.score(x_cv, y_cv) 98 | print 'Accuracy in test: %f' % clf.score(x_test, y_test) 99 | 100 | precision, recall, f1, support = precision_recall_fscore_support( 101 | y_test, clf.predict(x_test)) 102 | print precision, recall, f1 103 | 104 | return clf 105 | 106 | if __name__ == '__main__': 107 | train('./data/data.csv', 108 | './data/data_test.csv') 109 | 110 | -------------------------------------------------------------------------------- /fb_suggest_missing_link/readme: -------------------------------------------------------------------------------- 1 | This is the code for Kaggle - Facebook Recruiting Competion. The task is about predicting missing links in asymmetric social network. (http://www.kaggle.com/c/FacebookRecruiting) 2 | My approach can be divided into two phases. The first phase is selecting candidates and the second is ranking the candidates. 3 | In the first phase, for each predicting node, I select all surrounding nodes up to 3 levels as the candidates, and statistics show that this method usually get only 8% missing rate and is quite efficient in time. 4 | In the second phase, all candidates are ranked based on the probability to be potentially followed by the given node. This turns out to be a classification problem. Features I use includes: whether it follows the given node, percent of followers of the node follow it, percent of followers of the node followed by it, percent of followees of the node follow it, percent of followees of the node followed by it. 5 | One thing that bothers me for a whole week is that this is a skewed classification, in which the fraction of postive training examples are less than 1%. So the classifier might get very very low recall, thus to be terrible. To mitigate this issue, I under-sampled the negative examples with about 1:10 (this ratio is achieved by experiments) and test the classifier in the original distribution. Logistic regression got a good result (about 73% recall and 23% precision). 6 | My best score is about 71.4% of mean average precision, and the leader is 72.98%. 7 | -------------------------------------------------------------------------------- /fb_suggest_missing_link/utilities.py: -------------------------------------------------------------------------------- 1 | import csv 2 | from numpy import * 3 | 4 | def edges_generator(file_name): 5 | """ 6 | Generator that returns edges given a 2-column csv graph file 7 | """ 8 | 9 | f = open(file_name) 10 | reader = csv.reader(f) 11 | # Ignore the header 12 | reader.next() 13 | 14 | for edges in reader: 15 | nodes = [int(node) for node in edges] 16 | yield nodes 17 | 18 | f.close() 19 | 20 | def read_graph(file_name): 21 | """ 22 | Reads a sparsely represented directed graph into a dictionary 23 | """ 24 | 25 | # Store the graph as a dictionary of edges 26 | follow = {} 27 | followed = {} 28 | 29 | def initialize_node(node): 30 | if node not in follow: 31 | follow[node] = set() 32 | if node not in followed: 33 | followed[node] = set() 34 | 35 | count = 0 36 | for nodes in edges_generator(file_name): 37 | for node in nodes: 38 | initialize_node(node) 39 | follow[nodes[0]].add(nodes[1]) 40 | followed[nodes[1]].add(nodes[0]) 41 | count += 1 42 | if count % 1000000 == 0: 43 | print 'Already read %d nodes.' % count 44 | 45 | return (follow, followed) 46 | 47 | def read_file(data_file, ignore_header=True): 48 | """ Reads data from the file. """ 49 | 50 | f = open(data_file) 51 | reader = csv.reader(f) 52 | if ignore_header: 53 | reader.next() 54 | 55 | data = [] 56 | for row in reader: 57 | data.append(row) 58 | 59 | f.close() 60 | return data 61 | 62 | def read_nodes_list(test_file): 63 | """ 64 | Reads of single-column list of nodes 65 | """ 66 | 67 | f = open(test_file) 68 | reader = csv.reader(f) 69 | reader.next() 70 | 71 | nodes = [] 72 | for row in reader: 73 | nodes.append(int(row[0])) 74 | return nodes 75 | f.close() 76 | 77 | def write_file(data_file, data): 78 | """ Writes the data to the data_file. """ 79 | 80 | f = open(data_file, 'w') 81 | writer = csv.writer(f) 82 | for row in data: 83 | writer.writerow(row) 84 | f.close() 85 | 86 | def write_submission_file(submission_file, test_nodes, test_predictions): 87 | """ 88 | Writes the submission file 89 | """ 90 | 91 | f = open(submission_file, "w") 92 | writer = csv.writer(f) 93 | writer.writerow(["source_node", "destination_nodes"]) 94 | 95 | for source_node, dest_nodes in zip(test_nodes, test_predictions): 96 | writer.writerow([str(source_node), 97 | " ".join([str(n) for n in dest_nodes])]) 98 | f.close() 99 | 100 | -------------------------------------------------------------------------------- /fb_suggest_missing_link/validation.py: -------------------------------------------------------------------------------- 1 | """ 2 | This script is responsible for generating test data and analyzing the 3 | prediction result. 4 | """ 5 | 6 | #!/usr/bin/env python 7 | import utilities 8 | import candidate 9 | from numpy import * 10 | from random import randint 11 | import rank 12 | 13 | def generate_test_nodes(follow, nodes_exclude, num): 14 | """ Generates nodes for test. """ 15 | 16 | test_nodes = [] 17 | nodes_all = list(follow.keys()) 18 | perm = random.permutation(len(nodes_all)) 19 | for index in perm: 20 | node = nodes_all[index] 21 | if len(follow[node]) > 4 and node not in nodes_exclude: 22 | test_nodes.append(node) 23 | if len(test_nodes) >= num: 24 | break 25 | return test_nodes 26 | 27 | def remove_edges(follow, followed, node, max_remove_num): 28 | """ Randomly remove edges for the given node. The dict follow 29 | and follwed are modified in this method""" 30 | 31 | followees = list(follow[node]) 32 | num_followees = len(followees) 33 | r = randint(4, min(num_followees, max_remove_num)) 34 | perm = random.permutation(num_followees) 35 | perm = perm[0 : r] 36 | 37 | edges_removed = [] 38 | for index in perm: 39 | n = followees[index] 40 | follow[node].remove(n) 41 | followed[n].remove(node) 42 | edges_removed.append(n) 43 | return edges_removed 44 | 45 | def generate_solution(follow, followed, nodes_test, max_remove_num): 46 | """ Generates the solution for suggestion missing links. """ 47 | 48 | solution = [] 49 | for node in nodes_test: 50 | s = [node] 51 | edges_removed = remove_edges(follow, followed, node, max_remove_num) 52 | s += edges_removed 53 | solution.append(s) 54 | return solution 55 | 56 | def generate_test_set(follow, followed, test_file, validation_file, 57 | solution_file, num, max_remove_num): 58 | """ Generates the test set for analysis. """ 59 | 60 | nodes_exclude = utilities.read_nodes_list(test_file) 61 | 62 | print 'Generating test nodes...' 63 | nodes_test = generate_test_nodes(follow, nodes_exclude, num) 64 | writable_nodes_test = [[n] for n in nodes_test] 65 | solution = generate_solution(follow, followed, nodes_test, max_remove_num) 66 | 67 | utilities.write_file(validation_file, writable_nodes_test) 68 | utilities.write_file(solution_file, solution) 69 | 70 | def generate_training_set(follow, followed, ratio, solution_file, data_file): 71 | """ Uses the solution file to generate training set to train 72 | the model, hoping this method can get better result. 73 | Ratio controls the fraction of pos and neg data sets, if ratio is -1, 74 | the fraction is the origion fraction.""" 75 | 76 | raw_solution = utilities.read_file(solution_file, False) 77 | dict_solution = {} 78 | for i in range(len(raw_solution)): 79 | row = raw_solution[i] 80 | dict_solution[int(row[0])] = set(int(n) for n in row[1 : :]) 81 | 82 | x_train = [['spring brother is a true man']] 83 | for node in dict_solution.keys(): 84 | nodes_pos = dict_solution[node] 85 | for n in nodes_pos: 86 | features = rank.get_features(follow, followed, node, n) 87 | x_train.append([1] + features) 88 | 89 | nodes_neg = candidate.get_candidates(follow, followed, node) 90 | nodes_neg.difference_update(nodes_pos) 91 | nodes_neg = list(nodes_neg) 92 | perm = random.permutation(len(nodes_neg)) 93 | if ratio != -1: 94 | num = min(int(len(nodes_pos) * ratio), len(nodes_neg)) 95 | else: 96 | num = len(nodes_neg) 97 | for i in range(num): 98 | node = nodes_neg[perm[i]] 99 | features = rank.get_features(follow, followed, node, n) 100 | x_train.append([0] + features) 101 | 102 | utilities.write_file(data_file, x_train) 103 | 104 | def analyze_candidates(solution_file, follow, followed): 105 | """ Analyzes the method get_candidates. """ 106 | 107 | raw_solution = utilities.read_file(solution_file, False) 108 | dict_solution = {} 109 | for row in raw_solution: 110 | dict_solution[int(row[0])] = set(int(n) for n in row[1 : :]) 111 | 112 | count_total = 0 113 | count_miss = 0 114 | for node in dict_solution: 115 | candidates = candidate.get_candidates(follow, followed, node) 116 | for n in dict_solution[node]: 117 | if n not in candidates: 118 | count_miss += 1 119 | count_total += len(dict_solution[node]) 120 | 121 | print 'count_total = %d, count_miss = %d' %( 122 | count_total, count_miss) 123 | 124 | def ap(ground_truth, prediction): 125 | """ Calculates the average precision. """ 126 | 127 | ap = 0.0 128 | already_hit = 0 129 | for i in range(len(prediction)): 130 | if prediction[i] in ground_truth: 131 | already_hit += 1 132 | ap += 1.0 * already_hit / (i + 1) 133 | ap /= len(ground_truth) 134 | return ap 135 | 136 | def mean_average_precision(result_file, solution_file): 137 | """ Calculates the mean average precision. """ 138 | 139 | raw_result = utilities.read_file(result_file, True) 140 | raw_solution = utilities.read_file(solution_file, False) 141 | dict_result = {} 142 | for row in raw_result: 143 | dict_result[row[0]] = row[1 : :] 144 | dict_solution = {} 145 | for row in raw_solution: 146 | dict_solution[row[0]] = set(row[1 : :]) 147 | 148 | res = 0.0 149 | for key in dict_result.keys(): 150 | prediction = dict_result[key][0].split() 151 | ground_truth = dict_solution[key] 152 | res += ap(ground_truth, prediction) 153 | res /= len(dict_result) 154 | print 'mean average precision = %f' % res 155 | 156 | if __name__ == '__main__': 157 | mean_average_precision('./data/result.csv', 158 | './data/solution.csv') 159 | 160 | # follow, followed = utilities.read_graph('./data/train.csv') 161 | # generate_test_set(follow, followed, 162 | # './data/test.csv', 163 | # './data/validation.csv', 164 | # './data/solution.csv', 165 | # 10000, 10) 166 | # print 'Generating training set...' 167 | # generate_training_set(follow, followed, 10, 168 | # './data/solution.csv', 169 | # './data/data.csv') 170 | 171 | # print 'Generating test set...' 172 | # generate_training_set2(follow, followed, -1, 173 | # './data/solution.csv', 174 | # './data/data_test.csv') 175 | 176 | # analyze_candidates('./data/solution.csv', follow, followed) 177 | 178 | 179 | 180 | 181 | 182 | -------------------------------------------------------------------------------- /insult_detect/insult_detect.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import csv 3 | from numpy import * 4 | from sklearn import preprocessing 5 | from sklearn import cross_validation 6 | from sklearn.feature_extraction.text import TfidfVectorizer 7 | from sklearn.naive_bayes import GaussianNB 8 | import string 9 | import re 10 | 11 | """ The solution is based on tf-idf text vectorization and Gaussian 12 | Naive Bayes classification, achieving accuracy 93%. """ 13 | 14 | __train__ = './data/train.csv' 15 | __test__ = './data/test.csv' 16 | 17 | def read_data(path, ignore_header=True): 18 | csv_file_object = csv.reader(open(path, 'rb')) 19 | if ignore_header: 20 | header = csv_file_object.next() 21 | x = [] 22 | for row in csv_file_object: 23 | x.append(row) 24 | return x 25 | 26 | def feature_extract(raw_data): 27 | y = [] 28 | x = [] 29 | for row in raw_data: 30 | y.append(row[0]) 31 | x.append(row[2]) 32 | y = array(y, dtype=int32) 33 | return (y, x) 34 | 35 | def comment_filter(comment): 36 | comment = comment.translate(string.maketrans('\n\t\r', ' ')) 37 | comment = comment.lower() 38 | comment = comment.replace('\\', '') 39 | comment = comment.replace('\'s', '') 40 | comment = comment.replace('\'re', '') 41 | comment = re.sub(r'([^\s\w]|_)+', '', comment) 42 | comment = re.sub('[%s]' % string.digits, '9', comment) 43 | return comment 44 | 45 | if __name__ == '__main__': 46 | print 'Preprocessing...' 47 | raw_data = read_data(__train__) 48 | test_data = read_data(__test__) 49 | y, x = feature_extract(raw_data + test_data) 50 | for i in range(len(x)): 51 | x[i] = comment_filter(x[i]) 52 | 53 | print 'Vectorizing...' 54 | vectorizer = TfidfVectorizer(min_df=1, norm='l2', smooth_idf=True) 55 | x = vectorizer.fit_transform(x) 56 | x = x.toarray() 57 | 58 | print 'Dividing into training set and cv set...' 59 | num_train = len(raw_data) 60 | x_test = x[num_train : :, :] 61 | y_test = y[num_train : :] 62 | x = x[0 : num_train, :] 63 | y = y[0 : num_train] 64 | 65 | x_train, x_cv, y_train, y_cv = cross_validation.train_test_split( 66 | x, y, test_size=0.3, random_state=None) 67 | print 'Training set size: %d, cv set size: %d' % ( 68 | y_train.shape[0], y_cv.shape[0]) 69 | 70 | print 'Fitting Naive Bayes model...' 71 | clf = GaussianNB() 72 | clf.fit(x, y) 73 | 74 | print 'Predicting...' 75 | print 'Accuracy in training set: %f' % clf.score(x_train, y_train) 76 | print 'Accuracy in cv set: %f' % clf.score(x_cv, y_cv) 77 | 78 | print 'Predicting the test set...' 79 | p_test = clf.predict(x_test) 80 | open_file_object = csv.writer(open("./data/result.csv", "wb")) 81 | for i in range(len(test_data)): 82 | test_data[i][0] = p_test[i] * 1.0 83 | open_file_object.writerow(test_data[i]) 84 | -------------------------------------------------------------------------------- /insult_detect/readme: -------------------------------------------------------------------------------- 1 | The task is to predict whether a comment posted during a public discussion is considered insulting to one of the participants (https://www.kaggle.com/c/detecting-insults-in-social-commentary). 2 | The problem is no more than a two classes text classification. After using several methods, my best approach is using classic tf-idf feature extraction (with normalization and idf smoothing) with Gaussian Naive Bayes classifier. 3 | One optimization I use is the string preprocessing. As raw data of comment contains lots of meaningless characters, I filters them out and also replace all the numbers with 9. Actually this part can be further optimized for a great many ways. 4 | Finally I achieved 93% correctness for classification. 5 | -------------------------------------------------------------------------------- /kicked_car/classification.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from sklearn.ensemble import RandomForestClassifier 4 | from sklearn.metrics import precision_recall_fscore_support 5 | 6 | def random_forest(x_train, y_train, x_cv, y_cv): 7 | """ Using Random Forest to classify the data. """ 8 | 9 | print 'Training with RF...' 10 | clf = RandomForestClassifier(n_estimators = 10) 11 | clf.fit(x_train, y_train) 12 | 13 | print 'Predicting...' 14 | print 'Accuracy in training set: %f' % clf.score(x_train, y_train) 15 | if y_cv != None: 16 | print 'Accuracy in cv set: %f' % clf.score(x_cv, y_cv) 17 | precision, recall, f1, support = precision_recall_fscore_support( 18 | y_cv, clf.predict(x_cv)) 19 | print precision, recall, f1 20 | 21 | return clf 22 | 23 | if __name__ == '__main__': 24 | pass 25 | -------------------------------------------------------------------------------- /kicked_car/data/.~lock.pos.csv#: -------------------------------------------------------------------------------- 1 | hang ,hang,hang,10.01.2013 20:29,file:///home/hang/.config/libreoffice/3; -------------------------------------------------------------------------------- /kicked_car/data/.~lock.test.csv#: -------------------------------------------------------------------------------- 1 | hang ,hang,hang,02.01.2013 11:32,file:///home/hang/.config/libreoffice/3; -------------------------------------------------------------------------------- /kicked_car/data/.~lock.training.csv#: -------------------------------------------------------------------------------- 1 | hang ,hang,hang,02.01.2013 11:31,file:///home/hang/.config/libreoffice/3; -------------------------------------------------------------------------------- /kicked_car/data/feature_idx.csv: -------------------------------------------------------------------------------- 1 | 5 2 | 48 3 | 71 4 | 110 5 | 122 6 | 128 7 | 320 8 | 359 9 | 364 10 | 414 11 | 419 12 | 463 13 | 470 14 | 501 15 | 517 16 | 520 17 | 521 18 | 523 19 | 524 20 | 525 21 | 526 22 | 527 23 | 528 24 | 530 25 | 532 26 | 533 27 | 538 28 | 541 29 | 553 30 | 558 31 | 559 32 | 567 33 | 568 34 | 577 35 | 584 36 | 590 37 | 600 38 | 601 39 | 616 40 | 623 41 | 624 42 | 633 43 | 634 44 | 639 45 | 649 46 | 657 47 | 668 48 | 674 49 | 687 50 | 690 51 | 692 52 | 694 53 | 696 54 | 700 55 | 721 56 | 726 57 | 745 58 | 746 59 | 769 60 | 771 61 | 776 62 | 777 63 | 810 64 | 832 65 | 857 66 | 878 67 | 889 68 | 895 69 | 915 70 | 923 71 | 928 72 | 933 73 | 953 74 | 956 75 | 975 76 | 984 77 | 986 78 | 988 79 | 992 80 | 998 81 | 1031 82 | 1038 83 | 1039 84 | 1045 85 | 1050 86 | 1054 87 | 1087 88 | 1101 89 | 1119 90 | 1120 91 | 1122 92 | 1167 93 | 1174 94 | 1179 95 | 1190 96 | 1195 97 | 1200 98 | 1218 99 | 1222 100 | 1223 101 | 1226 102 | 1227 103 | 1231 104 | 1232 105 | 1237 106 | 1239 107 | 1244 108 | 1252 109 | 1269 110 | 1287 111 | 1301 112 | 1310 113 | 1311 114 | 1324 115 | 1329 116 | 1343 117 | 1344 118 | 1370 119 | 1373 120 | 1393 121 | 1396 122 | 1409 123 | 1433 124 | 1435 125 | 1442 126 | 1445 127 | 1464 128 | 1474 129 | 1501 130 | 1502 131 | 1503 132 | 1504 133 | 1516 134 | 1522 135 | 1527 136 | 1530 137 | 1537 138 | 1539 139 | 1573 140 | 1601 141 | 1610 142 | 1618 143 | 1629 144 | 1634 145 | 1638 146 | 1652 147 | 1654 148 | 1655 149 | 1656 150 | 1657 151 | 1662 152 | 1663 153 | 1683 154 | 1700 155 | 1705 156 | 1717 157 | 1726 158 | 1729 159 | 1730 160 | 1734 161 | 1735 162 | 1736 163 | 1741 164 | 1743 165 | 1750 166 | 1755 167 | 1759 168 | 1764 169 | 1770 170 | 1782 171 | 1804 172 | 1809 173 | 1810 174 | 1812 175 | 1855 176 | 1870 177 | 1883 178 | 1907 179 | 1911 180 | 1938 181 | 1975 182 | 1991 183 | 1993 184 | 1998 185 | 2002 186 | 2020 187 | 2037 188 | 2041 189 | 2051 190 | 2101 191 | 2102 192 | 2112 193 | 2117 194 | 2127 195 | 2141 196 | 2175 197 | 2183 198 | 2207 199 | 2210 200 | 2216 201 | 2231 202 | 2234 203 | 2240 204 | 2241 205 | 2248 206 | 2253 207 | 2254 208 | 2267 209 | 2275 210 | 2282 211 | 2286 212 | 2297 213 | 2301 214 | 2306 215 | 2323 216 | 2329 217 | 2342 218 | 2352 219 | 2370 220 | 2376 221 | 2379 222 | 2383 223 | 2405 224 | 2411 225 | 2417 226 | 2424 227 | 2467 228 | 2477 229 | 2481 230 | 2488 231 | 2496 232 | 2516 233 | 2519 234 | 2526 235 | 2540 236 | 2544 237 | 2545 238 | 2549 239 | 2597 240 | 2598 241 | 2600 242 | 2635 243 | 2640 244 | 2648 245 | 2649 246 | 2650 247 | 2653 248 | 2654 249 | 2655 250 | 2657 251 | 2665 252 | 2666 253 | 2669 254 | 2676 255 | 2678 256 | 2679 257 | 2681 258 | 2682 259 | 2683 260 | 2684 261 | 2685 262 | 2686 263 | 2687 264 | 2688 265 | 2690 266 | 2693 267 | 2697 268 | 2710 269 | 2714 270 | 2730 271 | 2733 272 | 2734 273 | 2740 274 | 2741 275 | 2747 276 | 2751 277 | 2765 278 | 2784 279 | 2800 280 | 2802 281 | 2805 282 | 2826 283 | 2827 284 | 2829 285 | 2840 286 | 2843 287 | 2889 288 | 2900 289 | 2904 290 | 2905 291 | 2906 292 | 2911 293 | 2914 294 | 2915 295 | 2920 296 | 2932 297 | 2939 298 | 2948 299 | 2959 300 | 2962 301 | -------------------------------------------------------------------------------- /kicked_car/data/idx: -------------------------------------------------------------------------------- 1 | Field Name Definition 2 | RefID Unique (sequential) number assigned to vehicles 3 | IsBadBuy Identifies if the kicked vehicle was an avoidable purchase 4 | 0 PurchDate The Date the vehicle was Purchased at Auction 5 | 1 Auction (3) Auction provider at which the vehicle was purchased 6 | 2 VehYear (10) The manufacturer's year of the vehicle 7 | *3 VehicleAge The Years elapsed since the manufacturer's year 8 | 4 Make (33) Vehicle Manufacturer 9 | 5 Model (1063) Vehicle Model 10 | 6 Trim (135) Vehicle Trim Level 11 | 7 SubModel (864) Vehicle Submodel 12 | 8 Color (17) Vehicle Color 13 | 9 Transmission (5) Vehicles transmission type (Automatic, Manual) 14 | 10 WheelTypeID (5) The type id of the vehicle wheel 15 | 11 WheelType (4) The vehicle wheel type description (Alloy, Covers) 16 | *12 VehOdo The vehicles odometer reading 17 | 13 Nationality (5) The Manufacturer's country 18 | 14 Size (13) The size category of the vehicle (Compact, SUV, etc.) 19 | 15 TopThreeAmericanName (5) Identifies if the manufacturer is one of the top three American manufacturers 20 | *16 MMRAcquisitionAuctionAveragePrice Acquisition price for this vehicle in average condition at time of purchase 21 | *17 MMRAcquisitionAuctionCleanPrice Acquisition price for this vehicle in the above Average condition at time of purchase 22 | *18 MMRAcquisitionRetailAveragePrice Acquisition price for this vehicle in the retail market in average condition at time of purchase 23 | *19 MMRAcquisitonRetailCleanPrice Acquisition price for this vehicle in the retail market in above average condition at time of purchase 24 | *20 MMRCurrentAuctionAveragePrice Acquisition price for this vehicle in average condition as of current day 25 | *21 MMRCurrentAuctionCleanPrice Acquisition price for this vehicle in the above condition as of current day 26 | *22 MMRCurrentRetailAveragePrice Acquisition price for this vehicle in the retail market in average condition as of current day 27 | *23 MMRCurrentRetailCleanPrice Acquisition price for this vehicle in the retail market in above average condition as of current day 28 | 24 PRIMEUNIT (3) Identifies if the vehicle would have a higher demand than a standard purchase 29 | 25 AUCGUART (3) The level guarntee provided by auction for the vehicle (Green light - Guaranteed/arbitratable, Yellow Light - caution/issue, red light - sold as is) 30 | 26 BYRNO (74) Unique number assigned to the buyer that purchased the vehicle 31 | 27 VNZIP (163) Zipcode where the car was purchased 32 | 28 VNST (37) State where the the car was purchased 33 | *29 VehBCost Acquisition cost paid for the vehicle at time of purchase 34 | 30 IsOnlineSale (2) Identifies if the vehicle was originally purchased online 35 | *31 WarrantyCost Warranty price (term=36month and millage=36K) 36 | 37 | 38 | 39 | [ 0.11074237 0.10457275 0.04262179 0.04206746 0.03988719 0.0396514 40 | 0.03864223 0.03826018 0.03824352 0.03791842 0.03714719 0.0364236 41 | 0.03529239 0.03211145 0.03013207 0.02963083 0.02820693 0.0277857 42 | 0.02660546 0.0241205 0.02401912 0.02379755 0.02328335 0.0194586 43 | 0.01837794 0.01630656 0.01116595 0.00631357 0.00553677 0.0055280 44 | 0.0038861 0.00226295] 45 | [10, 11, 29, 17, 12, 16, 20, 21, 23, 19, 22, 18, 0, 31, 26, 27, 5, 3, 7, 28, 6, 2, 8, 4, 1, 14, 15, 13, 24, 25, 9, 30] 46 | 47 | [0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 24, 25, 26, 27, 28, 30] 48 | 49 | 50 | [36, 41, 0, 12, 32, 29, 16, 1, 8, 20, 18, 5, 22, 38, 4, 27, 49, 26, 21, 51 | 48, 17, 6, 7, 31, 3, 2, 23, 39, 19, 50, 28, 37, 10, 14, 44, 35, 9, 24, 52 | 13, 47, 30, 15, 34, 45, 46, 33, 40, 25, 42, 43, 11, 51] 53 | 54 | 55 | 36, 41, 32, 38, 49, 48, 39, 50, 37, 44, 35, 47, 34, 45, 46, 33, 40, 42, 43, 51 56 | 5, 10, 0, 7, 27, 26, 8, 28, 6, 14, 4, 25, 2, 15, 24, 1, 9, 11, 13, 30 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | -------------------------------------------------------------------------------- /kicked_car/data/log: -------------------------------------------------------------------------------- 1 | RF 30, all features .902855 .845 .245 0.3804 0.23019 2 | RF 300, all features .888102 .948 .083 0.1534 0.22366 3 | RF 30, all features, ds 5 .890340 .865 .115 0.2036 0.22477 4 | RF 300, all features, ds 5 .900754 .783 .253 0.3828 0.23201 5 | RF 30, all features, ds 3 .896186 .736 .227 0.3466 0.22719 6 | RF 300, all features, ds 3 .893126 .624 .303 0.4082 0.23433 7 | RF 300, all features, ds 3, scale .893674 .632 .299 0.4058 0.23378 8 | RF 30, all features, ds 1 .688787 .231 .674 0.3447 0.21998 9 | RF 300, all features, ds 1 .724549 .253 .648 0.3636 0.23506 10 | 11 | LR, 5 binary features, ds 1 .746609 .265 .612 0.3701 0.23383 12 | LR, 10 binary features, ds 0.8 .646852 .222 .717 0.3392 0.23412 13 | LR, 10 binary features, ds 1 .749516 .277 .608 0.3804 0.23606 14 | LR, 10 binary features, ds 1, scale .676251 .226 .646 0.3353 0.21143 15 | LR, 10 binary features, ds 3 0.11535 16 | LR, all binary features, ds 1 .720096 .255 .633 0.3637 0.23073 17 | 18 | RF 300, avg cate .904681 .858 .258 0.3969 0.23040 19 | RF 300, avg cate, ds 3 .896278 .631 .352 0.4518 0.23541 20 | RF 300, all + avg cate, ds 3 .897008 .643 .342 0.4466 0.23622 21 | RF 300, all + avg cate, ds 2 .876364 .490 .434 0.4602 0.23950 22 | RF 300, all + avg cate, ds 2, chi 40 .874629 .483 .455 0.4685 0.23477 23 | RF 300, all + mean std cate, ds 2 .870701 .467 .453 0.4596 0.23232 24 | RF 300, all + avg cate, ds 1 .749258 .279 .672 0.3943 0.23399 25 | RF 300, all + avg cate .904362 .858 .255 0.3930 0.23313 26 | RF 300, avg cate, ds 1 .746792 .278 .678 0.3941 0.22876 27 | RF 300, 10 avg cate, ds 3 .894953 .622 .345 0.4440 0.23053 28 | 29 | RF 300, 5 binary, ds 1 .725655 .256 .617 0.3623 0.23070 30 | 31 | RF 300, 5 binary, chi 200, ds 2 .877004 .492 .385 0.4319 0.23977 32 | RF 300, 5 binary, chi 300, ds 2 .877753 .496 .384 0.4330 0.24076 33 | RF 100, 5 binary, chi 300, ds 2 .879013 .503 .376 0.4302 0.23772 34 | RF 300, 5 binary, chi 200 (ds 2), ds 2 .876364 .489 .383 0.4293 0.23961 35 | RF 300, 5 binary, chi 100, ds 2 .872071 .468 .388 0.4240 0.23696 36 | RF 300, 5 binary, RF_select 28, ds 2 .867367 .447 .386 0.4143 0.23044 37 | RF 300, 5 binary, LR_select 31, ds 2 .862480 .429 .399 0.4136 0.22871 38 | RF 300, 5 bin, LR_select 87(ds 2), ds 2 .869422 .457 .398 0.4253 0.23783 39 | RF 300, 5 bin, LR_select 268(ds 2), ds 2 .880201 .509 .385 0.4382 0.23926 40 | RF 300, 5 bin, LR_select 256, ds 2 .878831 .502 .379 0.4320 0.23911 41 | 42 | RF 300, 5 binary + avg, chi 300, ds 2 .879333 .504 .380 0.4335 0.24221 43 | RF 300, 5 binary + avg, chi 400, ds 2 .881799 .520 .354 0.4213 0.24150 44 | RF 300, 5 binary + avg, pca 300, ds 2 .880749 .514 .329 0.4018 0.21122 45 | RF 300, 5 binary + avg, tree 300, ds 2 .880566 .512 .374 0.4324 0.24024 46 | RF 300, all binary + avg, LR 350, ds 2 0.24222 47 | RF 300, all binary + avg, LR 400, ds 2 .876684 .490 .385 0.4311 0.23812 48 | RF 300, all binary + avg, LR 300, ds 2 .876501 .490 .393 0.4362 0.24153 49 | RF 300, all binary + avg, chi2 300, ds 2 .876639 .490 .370 0.4215 0.24025 50 | 51 | RF 300, all num .876958 .369 .018 0.0344 0.14464 52 | -------------------------------------------------------------------------------- /kicked_car/data/log2: -------------------------------------------------------------------------------- 1 | RF 300, num .876958 .369 .018 0.0344 0.14464 2 | RF 300, num, ds 2 .813428 .262 .296 0.278 0.15263 3 | RF 300, num+diff, ds 2 .814524 .257 .279 0.268 0.14948 4 | RF 300, num+diff2, ds 2 .813 .262 .296 0.278 0.15370 5 | RF 300, num+diff2+avg_meter, ds 2 .814 .258 .282 0.260 0.15031 6 | RF 300, num+diff2-price, ds 2 .823 .273 .278 0.275 0.15144 7 | RF 300, num+diff2, ds 1.5 .755 .230 .433 0.300 0.15396 8 | RF 300, num+diff3, ds 1.5 .772 .253 .448 0.323 0.17862 9 | 10 | 11 | RF 300, num+diff2, ds 1.5 .755 .230 .433 0.300 0.15396 12 | RF 300, num+diff2, ds 2 .813 .262 .296 0.278 0.15370 13 | RF 300, num+diff2+auction, ds 2 .820 .276 .297 0.286 0.15909 6 (1) 14 | RF 300, num+diff2+year, ds 2 .816 .260 .278 0.268 0.15254 (2) 15 | RF 300, num+diff2+make, ds 2 .823 .272 .274 0.273 0.15659 9 (4) 16 | RF 300, num+diff2+trim, ds 2 .839 .292 .239 0.257 0.15980 5 (6) 17 | RF 300, num+diff2+color, ds 2 .823 .270 .268 0.270 0.15550 10 (8) 18 | RF 300, num+diff2+trans, ds 2 .820 .272 .286 0.279 0.15452 11 (9) 19 | RF 300, num+diff2+w_type_id, ds 2 .864 .432 .378 0.403 0.22412 2 (10) 20 | RF 300, num+diff2+w_type, ds 2 .864 .429 .367 0.396 0.22440 1 (11) 21 | RF 300, num+diff2+nation, ds 2 .814 .263 .294 0.278 0.15165 (13) 22 | RF 300, num+diff2+size, ds 2 .819 .266 .279 0.272 0.15121 (14) 23 | RF 300, num+diff2+top3, ds 2 .819 .266 .277 0.271 0.15377 12 (15) 24 | RF 300, num+diff2+prim, ds 2 .820 .284 .316 0.300 0.16330 3 (24) 25 | RF 300, num+diff2+byrno, ds 2 .832 .285 .252 0.268 0.15859 8 (26) 26 | RF 300, num+diff2+zip, ds 2 .837 .292 .237 0.262 0.16025 4 (27) 27 | RF 300, num+diff2+state, ds 2 .833 .291 .258 0.273 0.15901 7 (28) 28 | RF 300, num+diff2+online, ds 2 .820 .270 .288 0.279 0.15365 (30) 29 | 30 | RF 300, num+diff2+top2, ds 2 .862 .422 .373 0.396 0.22514 31 | RF 300, num+diff2+top3, ds 2 .862 .423 .381 0.401 0.22804 32 | RF 300, num+diff2+top4, ds 2 .875 .483 .361 0.413 0.23637 33 | RF 300, num+diff2+top5, ds 2 .874 .476 .364 0.412 0.23707 34 | RF 300, num+diff2+top4+6, ds 2 .878 .499 .361 0.419 0.23448 35 | RF 300, num+diff2+top6, ds 2 .878 .499 .362 0.420 0.23848 36 | RF 300, num+diff2+top12, ds 2 .881 .512 .362 0.424 0.23776 37 | RF 300, num+diff2+top12, ds 1.5 .838 .370 .477 0.417 0.23885 38 | RF 300, num+diff2+top6, ds 1.5 .841 .373 .457 0.410 0.23785 39 | RF 300, num+diff2+top12, chi 300 ds 1.5 .840 .373 .465 0.414 0.24088 40 | RF 300, num+diff2+top12, chi 300 ds 2 .880 .505 .360 0.420 0.23887 41 | RF 300, num+diff2+top12, chi 350 ds 2 .879 .503 .365 0.423 0.23774 42 | RF 300, num+diff2+top12, chi 350 ds 1.5 .843 .383 .476 0.424 0.24114 43 | RF 1000, num+diff2+top12, chi 350 ds 1.5 -- -- -- -- 0.24327 44 | RF 300, num+diff2+top12, chi 400 ds 1.5 .845 .384 .464 0.420 0.24021 45 | RF 300, num+diff2+top12+log, chi 350 ds 1.5 .846 .388 .468 0.425 0.24037 46 | RF 300, num+diff2+all_cate, chi 350 ds 1.5 .836 .367 .476 0.415 0.24048 47 | RF 300, num+diff2+top12+tree, chi 350 ds 1.5 .840 .374 .469 0.416 0.24097 48 | RF 300, num+diff2+top12+tree, chi 300 ds 1.5 .844 .383 .462 0.419 0.23820 49 | RF 300, num+diff2+top12+tree, chi 300 ds 2 .882 .517 .368 0.430 0.23821 50 | RF 300, num+diff2+top12+tree, chi 350 ds 2 .800 .508 .371 0.429 0.23855 51 | RF 300, num+diff2+top12+tree, chi 400 ds 1.5 .844 .383 .464 0.419 0.24075 52 | 53 | RF 1000, num+diff2+top12+avg, chi 350 ds 1.5 -- -- -- -- 0.24221 54 | RF 300, num+diff2+top12+avg, chi 350 ds 1.5 .845 .386 .459 0.419 0.23697 55 | RF 300, num+diff2+top12+avg, chi 300 ds 1.5 .843 .382 .467 0.420 0.23885 56 | 57 | 58 | RF 300, num+diff3+top12, chi 350 ds 1.5 .843 .383 .476 0.424 0.24166 59 | RF 300, num+diff3+tree, ds 1.5 .837 .368 .474 0.415 0.23893 60 | RF 300, num+diff3+top2+tree, ds 1.5 .837 .366 .468 0.411 0.23570 61 | RF 300, num+diff3+tree2, ds 1.5 .822 .334 .469 0.390 0.22707 62 | RF 1000, (11, 15, 27, 24, 1) 0.24151 63 | RF 300, (11, 15, 27, 24, 1), ds 1.5 .834 .365 .476 .413 0.23842 64 | 65 | RF 300, num+diff3+top_all, chi 350 ds 1.5 .837 .368 .473 0.414 0.24085 66 | RF 300, num+diff2+top12, chi 350 ds 1.5 .843 .383 .476 0.424 0.24114 67 | RF 300, num+diff2+top12, tree 100 ds 1.5 .836 .364 .470 0.410 0.23528 68 | RF 300, num+diff2+top12, tree 200 ds 1.5 .839 .369 .462 0.410 0.23745 69 | RF 300, num+diff2+top12, tree 300 ds 1.5 .832 .354 .470 0.404 0.23866 70 | RF 300, num+diff2+top12, tree 300 ds 2 .880 .508 .373 0.430 0.23965 71 | RF 300, num+diff2+top12, tree 350 ds 2 .880 .507 .368 0.427 0.23686 72 | RF 1000, [10, 11, 24, 27, 6, 1, 28, 4, 9, 15], ds 1.5 0.24364 73 | RF 300, [10, 11, 24, 27, 6, 1, 28, 4, 9, 15], ds 1.5 .834 .363 .488 .416 .23936 74 | 75 | RF 300, num+diff3+all, chi 300 ds 1.5 .837 .370 .480 0.417 0.24263 76 | RF 300, num+diff3+all, chi 320 ds 1.5 .838 .371 .481 0.419 0.24126 77 | RF 300, num+diff3+all, chi 350 ds 1.5 .837 .365 .467 0.410 0.23921 78 | RF 300, num+diff3+all, chi 280 ds 1.5 .829 .353 .487 0.410 0.24036 79 | RF 300, num+diff3+all, chi 250 ds 1.5 .832 .358 .480 0.410 0.24113 80 | 81 | RF 300, num+diff3+all+avg, chi 300 ds 1.5 .838 .372 .483 0.421 0.24431 82 | RF 1000, num+diff3+all+ratio, chi 300 ds 1.5 0.24644 83 | RF 300, num+diff3+all+ratio, chi 300 ds 1.5 .838 .372 .483 0.421 0.24005 84 | RF 300, num+diff3+all+ratio, chi 300 ds 1.5 .838 .372 .483 0.421 0.24462 85 | 86 | RF 300, num+diff3+all+ratio, tree 300 ds 1.5 0.24421 87 | RF 1000, num+diff3+all+ratio, tree 300 ds 1.5 0.24981 88 | RF 300, num+diff3+all+ratio, tree 300 ds 1.7 0.24270 89 | RF 300, num+diff3+all+ratio, tree 300 ds 1.3 0.24393 90 | RF 300, num+diff3+all+avg, tree 300 ds 1.5 0.24289 91 | RF 300, num+diff3+all+tree, tree 300 ds 1.5 0.24263 92 | RF 300, num+diff3+all+ratio+year, tree 300 ds 1.5 0.24565 93 | RF 300, num+diff3+all+ratio+year,month, tree 300 ds 1.5 0.24696 94 | RF 1000, num+diff3+all+ratio+year,month, tree 300 ds 1.5 0.25147 (31) 95 | RF 300, num+diff3+all+all_ratio+year,month, tree 300 ds 1.5 0.24627 96 | RF 1000, num+diff3+all+all_ratio+year,month, tree 300 ds 1.5 0.24958 97 | ------------------------------------------------------------------------------------------------------------ 98 | 99 | -------------------------------------------------------------------------------- /kicked_car/feature_extraction.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from sklearn.tree import DecisionTreeClassifier 4 | from collections import defaultdict 5 | import math 6 | 7 | _NUM_FEATURE_INDICES = [3, 12, 16, 17, 18, 19, 20, 21, 22, 23, 29, 31] 8 | _CATE_FEATURE_INDICES = [0, 1, 2, 4, 6, 8, 9, 10, 11, 13, 14, 15, 24, 26, 9 | 27, 28, 30, 32] 10 | 11 | def get_loglikelihood_ratio(x, y, idx, cate_range): 12 | pos_num = 0 13 | neg_num = 0 14 | pos_map = defaultdict(int) 15 | neg_map = defaultdict(int) 16 | for i in range(len(x)): 17 | cate = x[i][idx] 18 | if y[i] == 1: 19 | pos_num += 1 20 | pos_map[cate] += 1 21 | else: 22 | neg_num += 1 23 | neg_map[cate] += 1 24 | 25 | ratio_map = defaultdict(lambda: 0) 26 | for cate in range(cate_range + 1): 27 | p_pos = -100 28 | if cate in pos_map: 29 | p_pos = math.log10(pos_map[cate] / float(pos_num)) 30 | p_neg = -100 31 | if cate in neg_map: 32 | p_neg = math.log10(neg_map[cate] / float(neg_num)) 33 | ratio_map[cate] = p_pos - p_neg 34 | return ratio_map 35 | 36 | def get_feature(x, range_map, ratio_map): 37 | x_new = [] 38 | for line in x: 39 | # Numerical features. 40 | features = [line[idx] for idx in _NUM_FEATURE_INDICES] 41 | # Cur - avg. 42 | features.append(line[17] - line[16]) 43 | features.append(line[19] - line[18]) 44 | features.append(line[21] - line[20]) 45 | features.append(line[23] - line[22]) 46 | 47 | # Diff cur. 48 | features.append(line[19] - line[17]) 49 | features.append(line[21] - line[19]) 50 | features.append(line[23] - line[21]) 51 | features.append(line[21] - line[17]) 52 | features.append(line[23] - line[17]) 53 | features.append(line[23] - line[19]) 54 | 55 | # Diff avg. 56 | features.append(line[18] - line[16]) 57 | features.append(line[20] - line[18]) 58 | features.append(line[22] - line[20]) 59 | features.append(line[22] - line[18]) 60 | 61 | # Categorical features. 62 | for idx in _CATE_FEATURE_INDICES: 63 | for i in range(range_map[idx] + 1): 64 | if i == line[idx]: 65 | features.append(1) 66 | else: 67 | features.append(0) 68 | 69 | # Log likelihood ratio 70 | for idx in _CATE_FEATURE_INDICES: 71 | cate = line[idx] 72 | cur_ratio_map = ratio_map[idx] 73 | features.append(cur_ratio_map[cate]) 74 | 75 | x_new.append(features) 76 | return x_new 77 | 78 | def create_feature(x, y, x_test): 79 | range_map = {} 80 | ratio_map = {} 81 | for idx in _CATE_FEATURE_INDICES: 82 | range_map[idx] = max(x, key=lambda s: s[idx])[idx] 83 | ratio_map[idx] = get_loglikelihood_ratio(x, y, idx, range_map[idx]) 84 | 85 | x_new = get_feature(x, range_map, ratio_map) 86 | x_test_new = get_feature(x_test, range_map, ratio_map) 87 | return (x_new, x_test_new) 88 | 89 | def get_best_k_feature_indices(x, y, k): 90 | print 'Getting best k features...' 91 | clf = DecisionTreeClassifier(random_state=0, compute_importances=True) 92 | clf.fit(x, y) 93 | importance_pairs = [(i, clf.feature_importances_[i]) 94 | for i in range(len(clf.feature_importances_))] 95 | importance_pairs = sorted(importance_pairs, key=lambda s: s[1]) 96 | return [importance_pairs[i][0] for i in range(k)] 97 | 98 | def get_best_k_features(x, indices): 99 | x_important = [] 100 | for line in x: 101 | features = [line[idx] for idx in indices] 102 | x_important.append(features) 103 | return x_important 104 | 105 | if __name__ == '__main__': 106 | pass 107 | -------------------------------------------------------------------------------- /kicked_car/main.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import time 4 | import utilities 5 | import preprocess 6 | import classification 7 | import feature_extraction 8 | from sklearn import cross_validation 9 | 10 | def main(training_file, test_file, submission_file, ratio): 11 | data = utilities.read_file(training_file) 12 | test_data = utilities.read_file(test_file) 13 | 14 | print 'Preparing data...' 15 | x, y = preprocess.prepare_data(data) 16 | refid, x_test = preprocess.prepare_test_data(test_data) 17 | x, x_test = preprocess.preprocess_features(x, x_test) 18 | 19 | print 'Feature extracting...' 20 | x, x_test = feature_extraction.create_feature(x, y, x_test) 21 | 22 | indices = feature_extraction.get_best_k_feature_indices(x, y, 300) 23 | x = feature_extraction.get_best_k_features(x, indices) 24 | x_test = feature_extraction.get_best_k_features(x_test, indices) 25 | print 'Get %s features.' % len(x[0]) 26 | 27 | x_train, x_cv, y_train, y_cv = cross_validation.train_test_split( 28 | x, y, test_size=.3, random_state=0) 29 | x_train, y_train = preprocess.down_sample(x_train, y_train, ratio) 30 | 31 | clf = classification.random_forest(x_train, y_train, x_cv, y_cv) 32 | 33 | print 'Predicting...' 34 | predict = clf.predict_proba(x_test) 35 | utilities.write_submission_file(submission_file, refid, predict) 36 | 37 | if __name__ == '__main__': 38 | start_time = time.time() 39 | 40 | main('./data/training.csv', 41 | './data/test.csv', 42 | './data/res.csv', 43 | 1.5) 44 | 45 | print (time.time() - start_time) / 60.0, 'minutes' 46 | -------------------------------------------------------------------------------- /kicked_car/preprocess.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from random import shuffle 3 | 4 | def extract_year_month(x): 5 | idx = 0 6 | for i in range(len(x)): 7 | pos = x[i][idx].rfind('/', 0, len(x[i][idx])) 8 | pos0 = x[i][idx].find('/', 0, len(x[i][idx])) 9 | x[i].append(x[i][idx][0 : pos0]) 10 | x[i][idx] = x[i][idx][pos + 1 : :] 11 | 12 | def create_category_map(x, idx): 13 | category_map = {} 14 | cur = 0 15 | for line in x: 16 | cate = line[idx] 17 | if not cate in category_map: 18 | category_map[cate] = cur 19 | cur += 1 20 | return category_map 21 | 22 | def convert_category_to_int(x, idx, category_map): 23 | cur = max(category_map.values()) + 1 24 | for i in range(len(x)): 25 | cate = x[i][idx] 26 | if cate in category_map: 27 | cate_num = category_map[cate] 28 | else: 29 | category_map[cate] = cur 30 | cur += 1 31 | x[i][idx] = cate_num 32 | return x 33 | 34 | def convert_categories(x, x_test): 35 | cate_feature_indices = [0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 36 | 24, 25, 26, 27, 28, 30, 32] 37 | for idx in cate_feature_indices: 38 | cate_map = create_category_map(x, idx) 39 | x = convert_category_to_int(x, idx, cate_map) 40 | x_test = convert_category_to_int(x_test, idx, cate_map) 41 | return (x, x_test) 42 | 43 | def get_single_numerical_median(x, idx): 44 | all_values = [float(line[idx]) for line in x 45 | if not line[idx] == '' and not line[idx] == 'NULL'] 46 | all_values.sort() 47 | return all_values[len(all_values) / 2] 48 | 49 | def fill_missing_numerical_feature(x, idx, median): 50 | for i in range(len(x)): 51 | if x[i][idx] == '' or x[i][idx] == 'NULL': 52 | x[i][idx] = median 53 | else: 54 | x[i][idx] = float(x[i][idx]) 55 | return x 56 | 57 | def fill_numerical_features(x, x_test): 58 | num_feature_indices = [3, 12, 16, 17, 18, 19, 20, 21, 22, 23, 29, 31] 59 | for idx in num_feature_indices: 60 | median = get_single_numerical_median(x, idx) 61 | x = fill_missing_numerical_feature(x, idx, median) 62 | x_test = fill_missing_numerical_feature(x_test, idx, median) 63 | return (x, x_test) 64 | 65 | def preprocess_features(x, x_test): 66 | extract_year_month(x) 67 | extract_year_month(x_test) 68 | x, x_test = fill_numerical_features(x, x_test) 69 | x, x_test= convert_categories(x, x_test) 70 | return (x, x_test) 71 | 72 | def down_sample(x, y, ratio): 73 | print 'Down sampling...' 74 | pos_indices = [i for i in range(len(y)) if y[i] == 1] 75 | neg_indices = [i for i in range(len(y)) if y[i] == 0] 76 | 77 | neg_num = min(int(len(pos_indices) * ratio), len(neg_indices)) 78 | shuffle(neg_indices) 79 | sample_indices = pos_indices + neg_indices[0 : neg_num] 80 | shuffle(sample_indices) 81 | 82 | # Down sampling. 83 | x_ds = [x[idx] for idx in sample_indices] 84 | y_ds = [y[idx] for idx in sample_indices] 85 | return (x_ds, y_ds) 86 | 87 | def prepare_data(data): 88 | x = [line[2 : :] for line in data] 89 | y = [int(line[1]) for line in data] 90 | return (x, y) 91 | 92 | def prepare_test_data(data): 93 | x = [line[1 : :] for line in data] 94 | refid = [line[0] for line in data] 95 | return (refid, x) 96 | 97 | if __name__ == '__main__': 98 | pass 99 | -------------------------------------------------------------------------------- /kicked_car/readme: -------------------------------------------------------------------------------- 1 | This is the code for Kaggle - Don't Get Kicked! The problem is to predict if a car purchased is a kick, which means the car purchased by an auto dealership at an auto auction might have serious issues that prevent it from being sold to customers. There are about 30 features of a car given, most of which are categorical features like the model of the car, which country produced the car etc.. Also, this is a 2 | skewed class problem as only 1/7 of the cars are marked as kicks. For more details, check Kaggle’s official description http://www.kaggle.com/c/DontGetKicked. 3 | My approach is based on random forest and most of my time was spent on feature engineering. For numerical features, I found differences between prices quite informative, and I added about 14 features based on these. For categorical features, I binarized all of them except "model" and "submodel". Also for each categorical feature, I added the log-likelihood ratio of it, which boost the result a little bit. Totally I got 500+ features, which turned out to be redundant. So I trained a decision tree with these 500+ features and selected best 300 features based on Gini impurity. Since this is a skewed classes problem, I down sampled the training set to make pos/neg ratio 1/1.5. 4 | With about 1h training, my best performance is 0.25147, ranking 31 among 571 groups. And the leader is 0.26720. 5 | -------------------------------------------------------------------------------- /kicked_car/utilities.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import csv 3 | from numpy import * 4 | 5 | def read_file(file_name, header=False): 6 | print 'Reading file...' 7 | f = open(file_name) 8 | reader = csv.reader(f) 9 | if not header: 10 | reader.next() 11 | res = [line for line in reader] 12 | f.close() 13 | return res 14 | 15 | def write_submission_file(file_name, refid, predict): 16 | print 'Writing submission file...' 17 | f = open(file_name, 'w') 18 | writer = csv.writer(f) 19 | for i in range(len(refid)): 20 | writer.writerow([refid[i], predict[i][1]]) 21 | f.close() 22 | 23 | if __name__ == '__main__': 24 | pass 25 | -------------------------------------------------------------------------------- /music_rating/music_rating.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import csv 3 | from numpy import * 4 | from sklearn import preprocessing 5 | from sklearn import cross_validation 6 | from sklearn import linear_model 7 | 8 | __train__ = './data/train.csv' 9 | __test__ = './data/test.csv' 10 | __users__ = './data/users.csv' 11 | __words__ = './data/words.csv' 12 | 13 | __music_clf_map__ = {} 14 | 15 | def read_data(path, ignore_header=True, max_line=-1): 16 | """ Reads data from file. """ 17 | csv_file_object = csv.reader(open(path, 'rb')) 18 | if ignore_header: 19 | header = csv_file_object.next() 20 | x = [] 21 | for row in csv_file_object: 22 | if max_line >= 0 and len(row) >= max_line: 23 | break 24 | x.append(row) 25 | return x 26 | 27 | def generate_user_features(): 28 | """ Generate user features from users.csv. 29 | Features include sex, age, questions.""" 30 | profiles = read_data(__users__) 31 | indices_features = [0, 1, 2] + range(8, 27) 32 | user_map = {} 33 | for row in profiles: 34 | features = [] 35 | for index in indices_features: 36 | features.append(row[index]) 37 | if features[1] == 'Male': 38 | features[1] = 0 39 | else: 40 | features[1] = 1 41 | user_map[features[0]] = features[1 : :] 42 | return user_map 43 | 44 | def get_mean(user_map, num_features): 45 | mean = [0.0] * num_features 46 | count = [0] * num_features 47 | for key in user_map.keys(): 48 | features = user_map[key] 49 | for i in range(num_features): 50 | if features[i] != '': 51 | mean[i] += float(features[i]) 52 | count[i] += 1 53 | for i in range(num_features): 54 | mean[i] /= count[i] 55 | return mean 56 | 57 | def get_std(user_map, num_features, mean): 58 | std = [0.0] * num_features 59 | count = [0] * num_features 60 | for key in user_map.keys(): 61 | features = user_map[key] 62 | for i in range(num_features): 63 | if features[i] != '': 64 | std[i] += (float(features[i]) - mean[i]) ** 2 65 | count[i] += 1 66 | for i in range(num_features): 67 | std[i] = math.sqrt(std[i] / count[i]) 68 | return std 69 | 70 | def preprocess_feature(user_map): 71 | """ Fills empty features with averages and scales the data.""" 72 | num_features = 21 73 | mean = get_mean(user_map, num_features) 74 | std = get_std(user_map, num_features, mean) 75 | # Scaling. 76 | for key in user_map.keys(): 77 | features = user_map[key] 78 | for i in range(len(features)): 79 | if features[i] == '': 80 | features[i] = 0.0 81 | else: 82 | features[i] = (float(features[i]) - mean[i]) / std[i] 83 | 84 | def extract_rating(data, artist): 85 | """ Extracts all the data includes rating, track, user etc. given 86 | an artist id. """ 87 | ratings = [] 88 | for row in data: 89 | if row[0] == artist: 90 | ratings.append(row) 91 | return ratings 92 | 93 | def generate_train_set(user_map, ratings, artist_user_pref): 94 | """ Generates training set based on all ratings of a particular artist, 95 | features combine both user profile and features from word.csv. """ 96 | x = [] 97 | y = [] 98 | cnt = 0 99 | for row in ratings: 100 | if user_map.has_key(row[2]): 101 | artist_user = (row[0], row[2]) 102 | if artist_user_pref.has_key(artist_user): 103 | y.append(row[3]) 104 | x.append(user_map[row[2]] + artist_user_pref[artist_user]) 105 | else: 106 | cnt += 1 107 | print cnt 108 | x = array(x, float64) 109 | y = array(y, float64) 110 | return (x, y) 111 | 112 | def rmse(real_value, predict_value): 113 | """ Calculating RMSE error. """ 114 | rmse = 0.0 115 | for i in range(real_value.shape[0]): 116 | rmse += (real_value[i] - predict_value[i]) ** 2 117 | rmse = math.sqrt(rmse / real_value.shape[0]) 118 | return rmse 119 | 120 | def generate_music_clf_map(data, artist_user_pref): 121 | """ Generates classifiers for each artist. """ 122 | for row in data: 123 | artist = row[0] 124 | if __music_clf_map__.has_key(artist): 125 | continue 126 | ratings = extract_rating(data, artist) 127 | x, y = generate_train_set(user_map, ratings, artist_user_pref) 128 | clf = linear_model.Lasso(alpha=.5) 129 | clf.fit(x, y) 130 | __music_clf_map__[artist] = clf 131 | print 'RMSE for %s: %f' % (artist, rmse(y, clf.predict(x))) 132 | 133 | def generate_artist_user_pref(): 134 | """ Generates features for each (artist, user) pair from word.csv. """ 135 | words = read_data(__words__) 136 | artist_user_pref = {} 137 | for row in words: 138 | artist_user = (row[0], row[1]) 139 | pref = row[4 : :] 140 | for i in range(len(pref)): 141 | if pref[i] == '': 142 | pref[i] = 0.0 143 | else: 144 | pref[i] = float(pref[i]) 145 | if len(pref) == 82: 146 | pref.append(0) 147 | artist_user_pref[artist_user] = pref 148 | return artist_user_pref 149 | 150 | def generate_artist_mean(data): 151 | """ Calculate average rating for each artist. """ 152 | artist_mean = {} 153 | artist_rate = {} 154 | for row in data: 155 | artist = row[0] 156 | rate = row[3] 157 | if artist_rate.has_key(artist): 158 | artist_rate[artist].append(float(rate)) 159 | else: 160 | artist_rate[artist] = [float(rate)] 161 | for key in artist_rate.keys(): 162 | artist_mean[key] = sum(artist_rate[key]) / len(artist_rate[key]) 163 | return artist_mean 164 | 165 | if __name__ == '__main__': 166 | print 'Generating user features...' 167 | user_map = generate_user_features() 168 | preprocess_feature(user_map) 169 | data = read_data(__train__) 170 | artist_mean = generate_artist_mean(data) 171 | artist_user_pref = generate_artist_user_pref() 172 | 173 | print 'Generating classifiers for each artist...' 174 | generate_music_clf_map(data, artist_user_pref) 175 | test_data = read_data(__test__) 176 | p_test = [] 177 | for row in test_data: 178 | miss = False 179 | feature = None 180 | artist = row[0] 181 | uid = row[2] 182 | clf = __music_clf_map__[artist] 183 | if user_map.has_key(uid): 184 | feature = list(user_map[uid]) 185 | if artist_user_pref.has_key((artist, uid)): 186 | feature += artist_user_pref[(artist, uid)] 187 | else: 188 | miss = True 189 | else: 190 | miss = True 191 | if not miss: 192 | p_test.append(clf.predict(feature)) 193 | else: 194 | # Uses average ratings when user cannot be found. 195 | p_test.append(artist_mean[artist]) 196 | 197 | open_file_object = csv.writer(open("./data/result.csv", "wb")) 198 | for p in p_test: 199 | open_file_object.writerow([p]) 200 | -------------------------------------------------------------------------------- /music_rating/readme: -------------------------------------------------------------------------------- 1 | The task is to predict the rating a user will give to a song (https://www.kaggle.com/c/MusicHackathon). 2 | The interesting part is that this problem provides us with tremendous amount of data, including users's rating, profile, preferences etc.. And they are in various format, ratings, words, binary... So the big challange here is how to select features, which turns out to be the key to this problem. 3 | The basic idea of my approach is to create models for each artist (rather than each artist, track pair). For a particular artist, we extract all its ratings from train.csv, and the features for each user we create from both users.csv and words.csv. I first extract features from users.csv (the file contains users' profiles) for each user, the feature includes age, sex, and the answer for their habbit questions. And then from words.csv (survey for users), I use the score this user give to this song as additional features. Basically I combine this two, and use Lasso regression (L1 norm) to build model. 4 | Due to time issue, I do not fully optimize the algorithm and there are lots of work remains to be done. I finally got rmse 16.68 and the leader got 13.24. 5 | -------------------------------------------------------------------------------- /photo_quality_prediction/classification.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from numpy import * 4 | from sklearn import cross_validation 5 | from sklearn.naive_bayes import MultinomialNB, BernoulliNB 6 | from sklearn.ensemble import RandomForestClassifier 7 | from sklearn import linear_model 8 | from sklearn.neighbors import KNeighborsClassifier 9 | 10 | def prepare_data(x, y, size=0.3, state=0): 11 | """ Divides data into training set and cross validation set. """ 12 | 13 | x_train, x_cv, y_train, y_cv = cross_validation.train_test_split( 14 | x, y, test_size=size, random_state=state) 15 | 16 | return (x_train, y_train, x_cv, y_cv) 17 | 18 | def knn(x_train, y_train, x_cv, y_cv, k=3): 19 | """ Using KNN to classify the data. """ 20 | 21 | print 'Training with KNN...' 22 | clf = KNeighborsClassifier(n_neighbors=k) 23 | clf.fit(x_train, y_train) 24 | 25 | print 'Accuracy in training set: %f' % clf.score(x_train, y_train) 26 | print 'Accuracy in cv set: %f' % clf.score(x_cv, y_cv) 27 | return clf 28 | 29 | def bernoulli_naive_bayes(x_train, y_train, x_cv, y_cv): 30 | """ Using Naive Bayes to classify the data. """ 31 | 32 | print 'Training with NB...' 33 | clf = BernoulliNB() 34 | clf.fit(x_train, y_train) 35 | 36 | print 'Accuracy in training set: %f' % clf.score(x_train, y_train) 37 | print 'Accuracy in cv set: %f' % clf.score(x_cv, y_cv) 38 | return clf 39 | 40 | def naive_bayes(x_train, y_train, x_cv, y_cv): 41 | """ Using Naive Bayes to classify the data. """ 42 | 43 | print 'Training with NB...' 44 | clf = MultinomialNB() 45 | clf.fit(x_train, y_train) 46 | 47 | print 'Accuracy in training set: %f' % clf.score(x_train, y_train) 48 | print 'Accuracy in cv set: %f' % clf.score(x_cv, y_cv) 49 | return clf 50 | 51 | def random_forest(x_train, y_train, x_cv, y_cv): 52 | """ Using Random Forest to classify the data. """ 53 | 54 | print 'Training with RF...' 55 | clf = RandomForestClassifier(n_estimators = 2000, max_features=2) 56 | clf.fit(x_train, y_train) 57 | 58 | print 'Predicting...' 59 | print 'Accuracy in training set: %f' % clf.score(x_train, y_train) 60 | if y_cv != None: 61 | print 'Accuracy in cv set: %f' % clf.score(x_cv, y_cv) 62 | return clf 63 | 64 | def logistic_regression(x_train, y_train, x_cv, y_cv): 65 | """ Using Logistic Regression to classify the data. """ 66 | 67 | print 'Training with LR...' 68 | clf = linear_model.LogisticRegression(penalty='l2', C=.03) 69 | clf.fit(x_train, y_train) 70 | 71 | print 'Accuracy in training set: %f' % clf.score(x_train, y_train) 72 | if y_cv != None: 73 | print 'Accuracy in cv set: %f' % clf.score(x_cv, y_cv) 74 | return clf 75 | 76 | def get_prob(clf, x): 77 | """ Gets the probability of being good. """ 78 | 79 | prob = array(clf.predict_proba(x)) 80 | return prob[:, 1] 81 | 82 | if __name__ == '__main__': 83 | pass 84 | -------------------------------------------------------------------------------- /photo_quality_prediction/data/statistics: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FindBoat/Kaggle/870cb9884d67725a8fe9bad90fceb6ac286f9fe1/photo_quality_prediction/data/statistics -------------------------------------------------------------------------------- /photo_quality_prediction/feature_selection.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from numpy import * 3 | import utilities 4 | import classification 5 | 6 | def generate_features(meta_data_train, y_train, meta_data_test): 7 | """ Generates features for classifier. """ 8 | 9 | # Generate maps. 10 | name_score_map, desc_score_map, caption_score_map, word_score_map = \ 11 | generate_text_score_map(meta_data_train, y_train) 12 | geo_score_map, lat_score_map, lon_score_map = generate_geo_score_map( 13 | meta_data_train, y_train) 14 | shape_score_map, size_score_map, width_score_map, height_score_map = \ 15 | generate_size_score_map(meta_data_train, y_train) 16 | 17 | # Genearte text features. 18 | text_features_train = generate_text_features(meta_data_train, 19 | name_score_map, desc_score_map, caption_score_map, word_score_map) 20 | text_features_test = generate_text_features(meta_data_test, 21 | name_score_map, desc_score_map, caption_score_map, word_score_map) 22 | 23 | # Generates geo features. 24 | geo_features_train = generate_geo_features(meta_data_train, geo_score_map, 25 | lat_score_map, lon_score_map) 26 | geo_features_test = generate_geo_features(meta_data_test, geo_score_map, 27 | lat_score_map, lon_score_map) 28 | 29 | # Generates size features 30 | size_features_train = generate_size_features(meta_data_train, 31 | shape_score_map, size_score_map, width_score_map, height_score_map) 32 | size_features_test = generate_size_features(meta_data_test, 33 | shape_score_map, size_score_map, width_score_map, height_score_map) 34 | 35 | # Combines all features. 36 | x_train = [] 37 | for i in range(len(text_features_train)): 38 | x_train.append(text_features_train[i] + size_features_train[i] \ 39 | + geo_features_train[i]) 40 | 41 | x_test = [] 42 | for i in range(len(text_features_test)): 43 | x_test.append(text_features_test[i] + size_features_test[i] \ 44 | + geo_features_test[i]) 45 | 46 | return (x_train, x_test) 47 | 48 | def generate_geo_features(meta_data, geo_score_map, lat_score_map, 49 | lon_score_map): 50 | """ Generates features for geo information. """ 51 | 52 | geo_avg_score = get_map_avg(geo_score_map) 53 | lat_avg_score = get_map_avg(lat_score_map) 54 | lon_avg_score = get_map_avg(lon_score_map) 55 | 56 | geo_score_features = [] 57 | for line in meta_data: 58 | lat = line[0] 59 | lon = line[1] 60 | geo = (lat, lon) 61 | 62 | geo_score = geo_avg_score 63 | if geo in geo_score_map: 64 | geo_score = geo_score_map[geo] 65 | 66 | lat_score = lat_avg_score 67 | if lat in lat_score_map: 68 | lat_score = lat_score_map[lat] 69 | 70 | lon_score = lon_avg_score 71 | if lon in lon_score_map: 72 | lon_score = lon_score_map[lon] 73 | 74 | geo_score_features.append([geo_score, lat_score, lon_score]) 75 | return geo_score_features 76 | 77 | def generate_geo_score_map(meta_data, y): 78 | """ Generates score map for geo information. """ 79 | 80 | print 'Extracting geo features...' 81 | geo_score_pairs = [] 82 | lat_score_pairs = [] 83 | lon_score_pairs = [] 84 | for i in range(len(y)): 85 | lat = meta_data[i][0] 86 | lon = meta_data[i][1] 87 | geo = (lat, lon) 88 | 89 | geo_score_pairs.append((geo, y[i])) 90 | lat_score_pairs.append((lat, y[i])) 91 | lon_score_pairs.append((lon, y[i])) 92 | 93 | geo_score_map = create_key_avg_map(geo_score_pairs) 94 | lat_score_map = create_key_avg_map(lat_score_pairs) 95 | lon_score_map = create_key_avg_map(lon_score_pairs) 96 | return (geo_score_map, lat_score_map, lon_score_map) 97 | 98 | def generate_size_features(meta_data, shape_score_map, size_score_map, 99 | width_score_map, height_score_map): 100 | """ Generates features for shape, size. """ 101 | 102 | avg_shape_score = get_map_avg(shape_score_map) 103 | avg_size_score = get_map_avg(size_score_map) 104 | avg_width_score = get_map_avg(width_score_map) 105 | avg_height_score = get_map_avg(height_score_map) 106 | 107 | size_score_features = [] 108 | for line in meta_data: 109 | width = line[2] 110 | height = line[3] 111 | shape = (width, height) 112 | size = line[4] 113 | 114 | shape_score = avg_shape_score 115 | if shape in shape_score_map: 116 | shape_score = shape_score_map[shape] 117 | 118 | size_score = avg_size_score 119 | if size in size_score_map: 120 | size_score = size_score_map[size] 121 | 122 | width_score = avg_width_score 123 | if width in width_score_map: 124 | width_score = width_score_map[width] 125 | 126 | height_score = avg_height_score 127 | if height in height_score_map: 128 | height_score = height_score_map[height] 129 | 130 | size_score_features.append( 131 | [shape_score, size_score, width_score, height_score]) 132 | return size_score_features 133 | 134 | def generate_size_score_map(meta_data, y): 135 | """ Generates score map for width, heigth, size. """ 136 | 137 | print 'Extracting size features...' 138 | shape_score_pairs = [] 139 | size_score_pairs = [] 140 | width_score_pairs = [] 141 | height_score_pairs = [] 142 | for i in range(len(y)): 143 | width = meta_data[i][2] 144 | height = meta_data[i][3] 145 | shape = (width, height) 146 | size = meta_data[i][4] 147 | 148 | shape_score_pairs.append((shape, y[i])) 149 | size_score_pairs.append((size, y[i])) 150 | width_score_pairs.append((width, y[i])) 151 | height_score_pairs.append((height, y[i])) 152 | 153 | shape_score_map = create_key_avg_map(shape_score_pairs) 154 | size_score_map = create_key_avg_map(size_score_pairs) 155 | width_score_map = create_key_avg_map(width_score_pairs) 156 | height_score_map = create_key_avg_map(height_score_pairs) 157 | return (shape_score_map, size_score_map, width_score_map, height_score_map) 158 | 159 | def generate_text_features(meta_data, name_score_map, desc_score_map, 160 | caption_score_map, word_score_map): 161 | """ Generates features from name, desc, caption. """ 162 | 163 | avg_name_score = get_map_avg(name_score_map) 164 | avg_desc_score = get_map_avg(desc_score_map) 165 | avg_caption_score = get_map_avg(caption_score_map) 166 | 167 | text_score_features = [] 168 | for i in range(len(meta_data)): 169 | name = meta_data[i][5].split(' ') 170 | desc = meta_data[i][6].split(' ') 171 | caption = meta_data[i][7].split(' ') 172 | 173 | name_scores = [] 174 | for s in name: 175 | if s in name_score_map: 176 | name_scores.append(name_score_map[s]) 177 | elif s in word_score_map: 178 | name_scores.append(word_score_map[s]) 179 | else: 180 | name_scores.append(avg_name_score) 181 | 182 | desc_scores = [] 183 | for s in desc: 184 | if s in desc_score_map: 185 | desc_scores.append(desc_score_map[s]) 186 | elif s in word_score_map: 187 | desc_scores.append(word_score_map[s]) 188 | else: 189 | desc_scores.append(avg_desc_score) 190 | 191 | caption_scores = [] 192 | for s in caption: 193 | if s in caption_score_map: 194 | caption_scores.append(caption_score_map[s]) 195 | elif s in word_score_map: 196 | caption_scores.append(word_score_map[s]) 197 | else: 198 | caption_scores.append(avg_caption_score) 199 | 200 | # Generates features. 201 | name_avg_score = float(sum(name_scores)) / len(name_scores) 202 | desc_avg_score = float(sum(desc_scores)) / len(desc_scores) 203 | caption_avg_score = float(sum(caption_scores)) / len(caption_scores) 204 | 205 | all_scores = name_scores + desc_scores + caption_scores 206 | total_avg_score = float(sum(all_scores)) / len(all_scores) 207 | 208 | name_std = std(name_scores, name_avg_score) 209 | desc_std = std(desc_scores, desc_avg_score) 210 | caption_std = std(caption_scores, caption_avg_score) 211 | total_std = std(all_scores, total_avg_score) 212 | 213 | name_len = 0 214 | if name[0] != '': 215 | name_len = len(name) 216 | desc_len = 0 217 | if desc[0] != '': 218 | desc_len = len(desc) 219 | caption_len = 0 220 | if caption[0] != '': 221 | caption_len = len(caption) 222 | 223 | text_score_features.append([name_avg_score, desc_avg_score, 224 | caption_avg_score, total_avg_score, name_len, desc_len, 225 | caption_len, name_std, desc_std, caption_std, total_std]) 226 | return text_score_features 227 | 228 | def generate_text_score_map(meta_data, y): 229 | """ Generates the text score map for text features. """ 230 | 231 | print 'Extracting text features...' 232 | name_y_pairs = [] 233 | desc_y_pairs = [] 234 | caption_y_pairs = [] 235 | for i in range(len(y)): 236 | name = meta_data[i][5].split(' ') 237 | desc = meta_data[i][6].split(' ') 238 | caption = meta_data[i][7].split(' ') 239 | 240 | for s in name: 241 | name_y_pairs.append((s, y[i])) 242 | for s in desc: 243 | desc_y_pairs.append((s, y[i])) 244 | for s in caption: 245 | caption_y_pairs.append((s, y[i])) 246 | word_y_pairs = name_y_pairs + desc_y_pairs + caption_y_pairs 247 | 248 | name_score_map = create_key_avg_map(name_y_pairs) 249 | desc_score_map = create_key_avg_map(desc_y_pairs) 250 | caption_score_map = create_key_avg_map(caption_y_pairs) 251 | word_score_map = create_key_avg_map(word_y_pairs) 252 | return (name_score_map, desc_score_map, caption_score_map, word_score_map) 253 | 254 | def std(iterable, avg): 255 | """ Calculate the standard deviation. """ 256 | 257 | std = 0.0 258 | for n in iterable: 259 | std += (n - avg) ** 2 260 | return math.sqrt(std) 261 | 262 | def get_map_avg(k_v_map): 263 | """ Calculates the average value of a map. """ 264 | 265 | avg = 0.0 266 | for key in k_v_map.keys(): 267 | avg += k_v_map[key] 268 | return float(avg) / len(k_v_map) 269 | 270 | def create_key_avg_map(k_v_pairs): 271 | """ Creates a map which maps a key to its average value. """ 272 | 273 | key_avg_map = {} 274 | for pair in k_v_pairs: 275 | k = pair[0] 276 | v = pair[1] 277 | if k not in key_avg_map: 278 | key_avg_map[k] = [v, 1] 279 | else: 280 | key_avg_map[k][0] += v 281 | key_avg_map[k][1] += 1 282 | 283 | for key in key_avg_map.keys(): 284 | key_avg_map[key] = float(key_avg_map[key][0]) / key_avg_map[key][1] 285 | 286 | return key_avg_map 287 | 288 | if __name__ == '__main__': 289 | pass 290 | -------------------------------------------------------------------------------- /photo_quality_prediction/main.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import time 3 | import utilities 4 | import classification 5 | import feature_selection 6 | 7 | def all_feature_classify(training_file, num): 8 | """ Classifier using all features. """ 9 | 10 | y, meta_data = utilities.read_training_file(training_file) 11 | y, meta_data = utilities.sample(y, meta_data, num) 12 | 13 | meta_data_train, y_train, meta_data_cv, y_cv = \ 14 | classification.prepare_data(meta_data, y) 15 | 16 | x_train, x_cv = feature_selection.generate_features(meta_data_train, 17 | y_train, meta_data_cv) 18 | 19 | clf = classification.random_forest(x_train, y_train, x_cv, y_cv) 20 | print utilities.binomial_deviance(y_train, 21 | classification.get_prob(clf, x_train)) 22 | print utilities.binomial_deviance(y_cv, classification.get_prob(clf, x_cv)) 23 | 24 | def spring_brother(training_file, test_file, submission_file): 25 | """ Running on the test file. """ 26 | 27 | y, meta_data = utilities.read_training_file(training_file) 28 | ids, meta_data_test = utilities.read_test_file(test_file) 29 | 30 | x_train, x_test = feature_selection.generate_features(meta_data, 31 | y, meta_data_test) 32 | 33 | clf = classification.random_forest(x_train, y, None, None) 34 | 35 | p = classification.get_prob(clf, x_test) 36 | utilities.write_submission_file(submission_file, ids, p) 37 | 38 | if __name__ == '__main__': 39 | start_time = time.time() 40 | 41 | spring_brother('./data/training.csv', 42 | './data/test.csv', 43 | './data/result.csv') 44 | 45 | # all_feature_classify('./data/training.csv', 40000) 46 | 47 | print (time.time() - start_time) / 60.0, 'minutes' 48 | -------------------------------------------------------------------------------- /photo_quality_prediction/readme: -------------------------------------------------------------------------------- 1 | This is the code for Kaggle competition Photo Qaulity Prediction (http://www.kaggle.com/c/PhotoQualityPrediction). 2 | The problem is to predict whether a given photo is of good quality or not based on its meta data rather than the image file. The meta data contains: location of this photo(latitude, longitude), width, heigth, size, and the name, description and caption. 3 | The approach is based on Random Forest. The key is choosing features from the meta data. Since the name, description and caption usually have few words, text classification method does not get very good result (Naive Bayes with tf-idf only gets around 0.22). Features finally I chose include: avg score of locations, avg score of shape and size, avg score for name, description and caption based on the score of each word etc.. And Random Forest with max_features = 2 turns out to be most effective. 4 | Finally the approach got 0.19131 of binomial deviance (ranking 28th/200), and the leader is 0.18434. 5 | -------------------------------------------------------------------------------- /photo_quality_prediction/utilities.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import csv 3 | from numpy import * 4 | 5 | def write_submission_file(file_name, ids, p): 6 | 7 | print 'Writing submission file...' 8 | f = open(file_name, 'w') 9 | writer = csv.writer(f) 10 | for i in range(len(p)): 11 | writer.writerow([ids[i], p[i]]) 12 | f.close() 13 | 14 | def write_file(file_name, data, single=True): 15 | """ Writes data to the file. """ 16 | 17 | print 'Writing output file...' 18 | f = open(file_name, 'w') 19 | writer = csv.writer(f) 20 | for line in data: 21 | if single: 22 | writer.writerow([line]) 23 | else: 24 | writer.writerow(line) 25 | f.close() 26 | 27 | def read_file(file_name, single=True): 28 | """ Reads file. """ 29 | 30 | print 'Reading file...' 31 | f = open(file_name) 32 | reader = csv.reader(f) 33 | res = [] 34 | for line in reader: 35 | if single: 36 | res.append(line[0]) 37 | else: 38 | res.append(line) 39 | 40 | f.close() 41 | return res 42 | 43 | def read_test_file(file_name): 44 | print 'Reading test file...' 45 | f = open(file_name) 46 | reader = csv.reader(f) 47 | reader.next() 48 | 49 | ids = [] 50 | meta_data = [] 51 | for line in reader: 52 | latitude = int(line[1]) 53 | longitude = int(line[2]) 54 | width = int(line[3]) 55 | height = int(line[4]) 56 | size = int(line[5]) 57 | name = line[6] 58 | description = line[7] 59 | caption = line[8] 60 | 61 | ids.append(line[0]) 62 | meta_data.append([latitude, longitude, width, height, size, name, 63 | description, caption]) 64 | 65 | f.close() 66 | return (ids, meta_data) 67 | 68 | def read_training_file(file_name): 69 | """ Reads training file and generates data. """ 70 | 71 | print 'Reading training file...' 72 | f = open(file_name) 73 | reader = csv.reader(f) 74 | reader.next() 75 | 76 | y = [] 77 | meta_data = [] 78 | for line in reader: 79 | latitude = int(line[1]) 80 | longitude = int(line[2]) 81 | width = int(line[3]) 82 | height = int(line[4]) 83 | size = int(line[5]) 84 | name = line[6] 85 | description = line[7] 86 | caption = line[8] 87 | good = int(line[9]) 88 | 89 | y.append(good) 90 | meta_data.append([latitude, longitude, width, height, size, name, 91 | description, caption]) 92 | 93 | f.close() 94 | return (y, meta_data) 95 | 96 | def sample(y, meta_data, num, randomly=True): 97 | """ Randomly samples num data from the whole data set. """ 98 | 99 | if num == -1: 100 | num = len(y) 101 | y_sample = [] 102 | meta_data_sample = [] 103 | perm = range(len(y)) 104 | if randomly: 105 | perm = random.permutation(len(y)) 106 | perm = perm[0 : min(num, len(y))] 107 | for index in perm: 108 | y_sample.append(y[index]) 109 | meta_data_sample.append(meta_data[index]) 110 | return (y_sample, meta_data_sample) 111 | 112 | def binomial_deviance(y, prediction): 113 | """ Calculates the binomial deviance for the prediction. """ 114 | 115 | binomial_deviance = 0.0 116 | for i in range(len(prediction)): 117 | if prediction[i] > .99: 118 | prediction[i] = .99 119 | elif prediction[i] < .1: 120 | prediction[i] = .1 121 | tmp = y[i] * math.log10(prediction[i]) 122 | tmp += (1 - y[i]) * math.log10(1 - prediction[i]) 123 | binomial_deviance -= tmp 124 | binomial_deviance /= float(len(prediction)) 125 | return binomial_deviance 126 | 127 | if __name__ == '__main__': 128 | pass 129 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | This repository contains codes for several competitions in Kaggle. 2 | 3 | **If you like this repo, you can tweet me @sgzhaohang, why thank you!** 4 | -------------------------------------------------------------------------------- /titanic/data/data.csv: -------------------------------------------------------------------------------- 1 | 0,3,0,22,7.25 2 | 1,1,1,38,71.2833 3 | 1,3,1,26,7.925 4 | 1,1,1,35,53.1 5 | 0,3,0,35,8.05 6 | 0,1,0,54,51.8625 7 | 0,3,0,2,21.075 8 | 1,3,1,27,11.1333 9 | 1,2,1,14,30.0708 10 | 1,3,1,4,16.7 11 | 1,1,1,58,26.55 12 | 0,3,0,20,8.05 13 | 0,3,0,39,31.275 14 | 0,3,1,14,7.8542 15 | 1,2,1,55,16 16 | 0,3,0,2,29.125 17 | 0,3,1,31,18 18 | 0,2,0,35,26 19 | 1,2,0,34,13 20 | 1,3,1,15,8.0292 21 | 1,1,0,28,35.5 22 | 0,3,1,8,21.075 23 | 1,3,1,38,31.3875 24 | 0,1,0,19,263 25 | 0,1,0,40,27.7208 26 | 0,2,0,66,10.5 27 | 0,1,0,28,82.1708 28 | 0,1,0,42,52 29 | 0,3,0,21,8.05 30 | 0,3,1,18,18 31 | 1,3,1,14,11.2417 32 | 0,3,1,40,9.475 33 | 0,2,1,27,21 34 | 1,2,1,3,41.5792 35 | 1,3,1,19,7.8792 36 | 0,3,1,18,17.8 37 | 0,3,0,7,39.6875 38 | 0,3,0,21,7.8 39 | 1,1,1,49,76.7292 40 | 1,2,1,29,26 41 | 0,1,0,65,61.9792 42 | 1,2,1,21,10.5 43 | 0,3,0,28.5,7.2292 44 | 1,2,1,5,27.75 45 | 0,3,0,11,46.9 46 | 0,3,0,22,7.2292 47 | 1,1,1,38,80 48 | 0,1,0,45,83.475 49 | 0,3,0,4,27.9 50 | 1,2,1,29,10.5 51 | 0,3,0,19,8.1583 52 | 1,3,1,17,7.925 53 | 0,3,0,26,8.6625 54 | 0,2,0,32,10.5 55 | 0,3,1,16,46.9 56 | 0,2,0,21,73.5 57 | 0,3,0,26,14.4542 58 | 1,3,0,32,56.4958 59 | 0,3,0,25,7.65 60 | 1,2,0,0.83,29 61 | 1,3,1,30,12.475 62 | 0,3,0,22,9 63 | 1,3,0,29,9.5 64 | 0,1,0,28,47.1 65 | 1,2,1,17,10.5 66 | 1,3,1,33,15.85 67 | 0,3,0,16,34.375 68 | 1,1,1,23,263 69 | 0,3,0,24,8.05 70 | 0,3,0,29,8.05 71 | 0,3,0,20,7.8542 72 | 0,1,0,46,61.175 73 | 0,3,0,26,20.575 74 | 0,3,0,59,7.25 75 | 0,1,0,71,34.6542 76 | 1,1,0,23,63.3583 77 | 1,2,1,34,23 78 | 0,2,0,34,26 79 | 0,3,1,28,7.8958 80 | 0,1,0,21,77.2875 81 | 0,3,0,33,8.6542 82 | 0,3,0,37,7.925 83 | 0,3,0,28,7.8958 84 | 1,3,1,21,7.65 85 | 0,3,0,38,7.8958 86 | 0,1,0,47,52 87 | 0,3,1,14.5,14.4542 88 | 0,3,0,22,8.05 89 | 0,3,1,20,9.825 90 | 0,3,1,17,14.4583 91 | 0,3,0,21,7.925 92 | 0,3,0,70.5,7.75 93 | 0,2,0,29,21 94 | 0,1,0,24,247.5208 95 | 0,3,1,2,31.275 96 | 0,2,0,21,73.5 97 | 0,2,0,32.5,30.0708 98 | 1,2,1,32.5,13 99 | 0,1,0,54,77.2875 100 | 1,3,0,12,11.2417 101 | 1,3,0,24,7.1417 102 | 0,3,0,45,6.975 103 | 0,3,0,33,7.8958 104 | 0,3,0,20,7.05 105 | 0,3,1,47,14.5 106 | 1,2,1,29,26 107 | 0,2,0,25,13 108 | 0,2,0,23,15.0458 109 | 1,1,1,19,26.2833 110 | 0,1,0,37,53.1 111 | 0,3,0,16,9.2167 112 | 0,1,0,24,79.2 113 | 1,3,1,22,7.75 114 | 1,3,1,24,15.85 115 | 0,3,0,19,6.75 116 | 0,2,0,18,11.5 117 | 0,2,0,19,36.75 118 | 1,3,0,27,7.7958 119 | 0,3,1,9,34.375 120 | 0,2,0,36.5,26 121 | 0,2,0,42,13 122 | 0,2,0,51,12.525 123 | 1,1,1,22,66.6 124 | 0,3,0,55.5,8.05 125 | 0,3,0,40.5,14.5 126 | 0,1,0,51,61.3792 127 | 1,3,1,16,7.7333 128 | 0,3,0,30,8.05 129 | 0,3,0,44,16.1 130 | 1,2,1,40,15.75 131 | 0,3,0,26,7.775 132 | 0,3,0,17,8.6625 133 | 0,3,0,1,39.6875 134 | 1,3,0,9,20.525 135 | 0,3,1,45,27.9 136 | 0,3,0,28,56.4958 137 | 0,1,0,61,33.5 138 | 0,3,0,4,29.125 139 | 1,3,1,1,11.1333 140 | 0,3,0,21,7.925 141 | 0,1,0,56,30.6958 142 | 0,3,0,18,7.8542 143 | 0,1,1,50,28.7125 144 | 0,2,0,30,13 145 | 0,3,0,36,0 146 | 0,3,0,9,31.3875 147 | 1,2,0,1,39 148 | 1,3,1,4,22.025 149 | 1,1,0,45,26.55 150 | 0,3,0,40,15.5 151 | 0,3,0,36,7.8958 152 | 1,2,1,32,13 153 | 0,2,0,19,13 154 | 1,3,1,19,7.8542 155 | 1,2,0,3,26 156 | 1,1,1,44,27.7208 157 | 1,1,1,58,146.5208 158 | 0,3,0,42,8.4042 159 | 0,2,1,24,13 160 | 0,3,0,28,9.5 161 | 0,3,0,34,6.4958 162 | 0,3,0,45.5,7.225 163 | 1,3,0,18,8.05 164 | 0,3,1,2,10.4625 165 | 0,3,0,32,15.85 166 | 1,3,0,26,18.7875 167 | 1,3,1,16,7.75 168 | 1,1,0,40,31 169 | 0,3,0,24,7.05 170 | 1,2,1,35,21 171 | 0,3,0,22,7.25 172 | 0,2,0,30,13 173 | 1,1,1,31,113.275 174 | 1,3,1,27,7.925 175 | 0,2,0,42,27 176 | 1,1,1,32,76.2917 177 | 0,2,0,30,10.5 178 | 1,3,0,16,8.05 179 | 0,2,0,27,13 180 | 0,3,0,51,8.05 181 | 1,1,0,38,90 182 | 0,3,0,22,9.35 183 | 1,2,0,19,10.5 184 | 0,3,0,20.5,7.25 185 | 0,2,0,18,13 186 | 1,1,1,35,83.475 187 | 0,3,0,29,7.775 188 | 0,2,0,59,13.5 189 | 1,3,1,5,31.3875 190 | 0,2,0,24,10.5 191 | 0,2,0,44,26 192 | 1,2,1,8,26.25 193 | 0,2,0,19,10.5 194 | 0,2,0,33,12.275 195 | 0,2,0,29,10.5 196 | 0,3,0,22,7.125 197 | 0,3,0,30,7.225 198 | 0,1,0,44,90 199 | 0,3,1,25,7.775 200 | 1,2,1,24,14.5 201 | 1,1,0,37,52.5542 202 | 0,2,0,54,26 203 | 0,3,1,29,10.4625 204 | 0,1,0,62,26.55 205 | 0,3,0,30,16.1 206 | 0,3,1,41,20.2125 207 | 1,3,1,29,15.2458 208 | 1,1,1,30,86.5 209 | 1,1,1,35,512.3292 210 | 1,2,1,50,26 211 | 1,3,0,3,31.3875 212 | 0,1,0,52,79.65 213 | 0,1,0,40,0 214 | 0,2,0,36,10.5 215 | 0,3,0,16,39.6875 216 | 1,3,0,25,7.775 217 | 1,1,1,58,153.4625 218 | 1,1,1,35,135.6333 219 | 1,3,0,25,0 220 | 1,2,1,41,19.5 221 | 0,1,0,37,29.7 222 | 1,1,1,63,77.9583 223 | 0,3,1,45,7.75 224 | 0,3,0,7,29.125 225 | 1,3,1,35,20.25 226 | 0,3,0,65,7.75 227 | 0,3,0,28,7.8542 228 | 0,3,0,16,9.5 229 | 1,3,0,19,8.05 230 | 0,3,0,33,8.6625 231 | 1,3,0,30,9.5 232 | 0,3,0,22,7.8958 233 | 1,2,0,42,13 234 | 1,3,1,22,7.75 235 | 1,1,1,26,78.85 236 | 1,1,1,19,91.0792 237 | 0,2,0,36,12.875 238 | 0,3,1,24,8.85 239 | 0,3,0,24,7.8958 240 | 0,3,0,23.5,7.2292 241 | 0,1,1,2,151.55 242 | 1,1,1,50,247.5208 243 | 0,3,0,19,0 244 | 1,1,0,0.92,151.55 245 | 1,1,1,17,108.9 246 | 0,2,0,30,24 247 | 1,1,1,30,56.9292 248 | 1,1,1,24,83.1583 249 | 1,1,1,18,262.375 250 | 0,2,1,26,26 251 | 0,3,0,28,7.8958 252 | 0,2,0,43,26.25 253 | 1,3,1,26,7.8542 254 | 1,2,1,24,26 255 | 0,2,0,54,14 256 | 1,1,1,31,164.8667 257 | 1,1,1,40,134.5 258 | 0,3,0,22,7.25 259 | 0,3,0,27,7.8958 260 | 1,2,1,30,12.35 261 | 1,2,1,22,29 262 | 1,1,1,36,135.6333 263 | 0,3,0,61,6.2375 264 | 1,2,1,36,13 265 | 1,3,1,31,20.525 266 | 1,1,1,16,57.9792 267 | 0,1,0,45.5,28.5 268 | 0,1,0,38,153.4625 269 | 0,3,0,16,18 270 | 0,1,0,29,66.6 271 | 1,1,1,41,134.5 272 | 1,3,0,45,8.05 273 | 0,1,0,45,35.5 274 | 1,2,0,2,26 275 | 1,1,1,24,263 276 | 0,2,0,28,13 277 | 0,2,0,25,13 278 | 0,2,0,36,13 279 | 1,2,1,24,13 280 | 1,2,1,40,13 281 | 1,3,0,3,15.9 282 | 0,3,0,42,8.6625 283 | 0,3,0,23,9.225 284 | 0,3,0,15,7.2292 285 | 0,3,0,25,17.8 286 | 0,3,0,28,9.5 287 | 1,1,1,22,55 288 | 0,2,1,38,13 289 | 0,3,0,40,27.9 290 | 0,2,0,29,27.7208 291 | 0,3,1,45,14.4542 292 | 0,3,0,35,7.05 293 | 0,3,0,30,7.25 294 | 1,1,1,60,75.25 295 | 1,1,1,24,69.3 296 | 1,1,0,25,55.4417 297 | 0,3,0,18,6.4958 298 | 0,3,0,19,8.05 299 | 0,1,0,22,135.6333 300 | 0,3,1,3,21.075 301 | 1,3,1,22,7.25 302 | 0,1,0,27,211.5 303 | 0,3,0,20,4.0125 304 | 0,3,0,19,7.775 305 | 1,1,1,42,227.525 306 | 1,3,1,1,15.7417 307 | 0,3,0,32,7.925 308 | 1,1,1,35,52 309 | 0,2,0,18,73.5 310 | 0,3,0,1,46.9 311 | 1,2,1,36,13 312 | 1,2,1,17,12 313 | 1,1,0,36,120 314 | 1,3,0,21,7.7958 315 | 0,3,0,28,7.925 316 | 1,1,1,23,113.275 317 | 1,3,1,24,16.7 318 | 0,3,0,22,7.7958 319 | 0,3,1,31,7.8542 320 | 0,2,0,46,26 321 | 0,2,0,23,10.5 322 | 1,2,1,28,12.65 323 | 1,3,0,39,7.925 324 | 0,3,0,26,8.05 325 | 0,3,1,21,9.825 326 | 0,3,0,28,15.85 327 | 0,3,1,20,8.6625 328 | 0,2,0,34,21 329 | 0,3,0,51,7.75 330 | 1,2,0,3,18.75 331 | 0,3,0,21,7.775 332 | 1,1,1,33,90 333 | 1,3,0,44,7.925 334 | 1,2,1,34,32.5 335 | 1,2,1,18,13 336 | 0,2,0,30,13 337 | 0,3,1,10,24.15 338 | 0,3,0,21,7.7333 339 | 0,3,0,29,7.875 340 | 0,3,1,28,14.4 341 | 0,3,0,18,20.2125 342 | 1,2,1,28,26 343 | 1,2,1,19,26 344 | 1,3,0,32,8.05 345 | 1,1,0,28,26.55 346 | 1,2,1,42,26 347 | 0,3,0,17,7.125 348 | 0,1,0,50,55.9 349 | 1,1,1,14,120 350 | 0,3,1,21,34.375 351 | 1,2,1,24,18.75 352 | 0,1,0,64,263 353 | 0,2,0,31,10.5 354 | 1,2,1,45,26.25 355 | 0,3,0,20,9.5 356 | 0,3,0,25,7.775 357 | 1,2,1,28,13 358 | 1,1,0,4,81.8583 359 | 1,2,1,13,19.5 360 | 1,1,0,34,26.55 361 | 1,3,1,5,19.2583 362 | 1,1,0,52,30.5 363 | 0,2,0,36,27.75 364 | 0,1,0,30,27.75 365 | 1,1,0,49,89.1042 366 | 1,3,0,29,7.8958 367 | 0,1,0,65,26.55 368 | 1,2,1,50,10.5 369 | 1,1,0,48,26.55 370 | 0,3,0,34,8.05 371 | 0,1,0,47,38.5 372 | 0,2,0,48,13 373 | 0,3,0,38,7.05 374 | 0,1,0,56,26.55 375 | 1,3,1,0.75,19.2583 376 | 0,3,0,38,8.6625 377 | 1,2,1,33,27.75 378 | 1,2,1,23,13.7917 379 | 0,3,1,22,9.8375 380 | 0,2,0,34,21 381 | 0,3,0,29,7.0458 382 | 0,3,0,22,7.5208 383 | 1,3,1,2,12.2875 384 | 0,3,0,9,46.9 385 | 0,3,0,50,8.05 386 | 1,3,1,63,9.5875 387 | 1,1,0,25,91.0792 388 | 1,1,1,35,90 389 | 0,1,0,58,29.7 390 | 0,3,0,30,8.05 391 | 1,3,0,9,15.9 392 | 0,3,0,21,7.25 393 | 0,1,0,55,30.5 394 | 0,1,0,71,49.5042 395 | 0,3,0,21,8.05 396 | 1,1,1,54,78.2667 397 | 0,1,1,25,151.55 398 | 0,3,0,24,7.7958 399 | 0,3,0,17,8.6625 400 | 0,3,1,21,7.75 401 | 0,3,1,37,9.5875 402 | 1,1,1,16,86.5 403 | 0,1,0,18,108.9 404 | 1,2,1,33,26 405 | 0,3,0,28,22.525 406 | 1,3,0,26,56.4958 407 | 1,3,0,29,7.75 408 | 1,1,0,36,26.2875 409 | 1,1,1,54,59.4 410 | 0,3,0,24,7.4958 411 | 0,1,0,47,34.0208 412 | 1,2,1,34,10.5 413 | 1,2,1,36,26 414 | 0,3,0,32,7.8958 415 | 1,1,1,30,93.5 416 | 0,3,0,22,7.8958 417 | 1,1,1,44,57.9792 418 | 0,3,0,40.5,7.75 419 | 1,2,1,50,10.5 420 | 0,3,0,39,7.925 421 | 0,2,0,23,11.5 422 | 1,2,1,2,26 423 | 0,3,0,17,7.2292 424 | 0,3,1,30,8.6625 425 | 1,2,1,7,26.25 426 | 0,1,0,45,26.55 427 | 1,1,1,30,106.425 428 | 1,1,1,22,49.5 429 | 1,1,1,36,71 430 | 0,3,1,9,31.275 431 | 0,3,1,11,31.275 432 | 1,2,0,32,26 433 | 0,1,0,50,106.425 434 | 0,1,0,64,26 435 | 1,2,1,19,26 436 | 0,3,0,33,20.525 437 | 1,2,0,8,36.75 438 | 1,1,0,17,110.8833 439 | 0,2,0,27,26 440 | 1,3,0,22,7.225 441 | 1,3,1,22,7.775 442 | 0,1,0,62,26.55 443 | 1,1,1,48,39.6 444 | 1,1,1,39,79.65 445 | 1,3,1,36,17.4 446 | 0,3,0,40,7.8958 447 | 0,2,0,28,13.5 448 | 0,3,0,24,24.15 449 | 0,3,0,19,7.8958 450 | 0,3,1,29,21.075 451 | 1,3,0,32,7.8542 452 | 1,2,0,62,10.5 453 | 1,1,1,53,51.4792 454 | 1,1,0,36,26.3875 455 | 0,3,0,16,8.05 456 | 0,3,0,19,14.5 457 | 1,2,1,34,13 458 | 1,1,1,39,55.9 459 | 1,3,0,32,7.925 460 | 1,2,1,25,30 461 | 1,1,1,39,110.8833 462 | 0,2,0,54,26 463 | 0,1,0,36,40.125 464 | 1,1,1,18,79.65 465 | 0,2,0,47,15 466 | 1,1,0,60,79.2 467 | 0,3,0,22,8.05 468 | 0,3,0,35,7.125 469 | 1,1,1,52,78.2667 470 | 0,3,0,47,7.25 471 | 0,2,0,37,26 472 | 0,3,0,36,24.15 473 | 0,3,0,49,0 474 | 1,1,0,49,56.9292 475 | 1,2,1,24,27 476 | 0,3,0,44,8.05 477 | 1,1,0,35,26.55 478 | 0,3,0,36,15.55 479 | 0,3,0,30,7.8958 480 | 1,1,0,27,30.5 481 | 1,2,1,22,41.5792 482 | 1,1,1,40,153.4625 483 | 0,3,1,39,31.275 484 | 0,3,0,35,8.05 485 | 1,2,1,24,65 486 | 0,3,0,34,14.4 487 | 0,3,1,26,16.1 488 | 1,2,1,4,39 489 | 0,2,0,26,10.5 490 | 0,3,0,27,14.4542 491 | 1,1,0,42,52.5542 492 | 1,3,0,20,15.7417 493 | 0,3,0,21,7.8542 494 | 0,3,0,21,16.1 495 | 0,1,0,61,32.3208 496 | 0,2,0,57,12.35 497 | 1,1,1,21,77.9583 498 | 0,3,0,26,7.8958 499 | 1,1,0,80,30 500 | 0,3,0,51,7.0542 501 | 1,1,0,32,30.5 502 | 0,3,1,9,27.9 503 | 1,2,1,28,13 504 | 0,3,0,32,7.925 505 | 0,2,0,31,26.25 506 | 0,3,1,41,39.6875 507 | 0,3,0,20,7.8542 508 | 1,1,1,24,69.3 509 | 0,3,1,2,27.9 510 | 1,3,1,0.75,19.2583 511 | 1,1,0,48,76.7292 512 | 0,3,0,19,7.8958 513 | 1,1,0,56,35.5 514 | 1,3,1,23,7.55 515 | 1,2,1,18,23 516 | 0,3,0,21,8.4333 517 | 0,3,1,18,6.75 518 | 0,2,0,24,73.5 519 | 0,3,1,32,15.5 520 | 0,2,0,23,13 521 | 0,1,0,58,113.275 522 | 1,1,0,50,133.65 523 | 0,3,0,40,7.225 524 | 0,1,0,47,25.5875 525 | 0,3,0,36,7.4958 526 | 1,3,0,20,7.925 527 | 0,2,0,32,73.5 528 | 0,2,0,25,13 529 | 0,3,0,43,8.05 530 | 1,2,1,40,39 531 | 0,1,0,31,52 532 | 0,2,0,70,10.5 533 | 1,2,0,31,13 534 | 0,3,0,18,7.775 535 | 0,3,0,24.5,8.05 536 | 1,3,1,18,9.8417 537 | 0,3,1,43,46.9 538 | 1,1,0,36,512.3292 539 | 1,1,0,27,76.7292 540 | 0,3,0,20,9.225 541 | 0,3,0,14,46.9 542 | 0,2,0,60,39 543 | 0,2,0,25,41.5792 544 | 0,3,0,14,39.6875 545 | 0,3,0,19,10.1708 546 | 0,3,0,18,7.7958 547 | 1,1,1,15,211.3375 548 | 1,1,0,31,57 549 | 1,3,1,4,13.4167 550 | 0,3,0,25,7.225 551 | 0,1,0,60,26.55 552 | 0,2,0,52,13.5 553 | 0,3,0,44,8.05 554 | 0,1,0,49,110.8833 555 | 0,3,0,42,7.65 556 | 1,1,1,18,227.525 557 | 1,1,0,35,26.2875 558 | 0,3,1,18,14.4542 559 | 0,3,0,25,7.7417 560 | 0,3,0,26,7.8542 561 | 0,2,0,39,26 562 | 1,2,1,45,13.5 563 | 1,1,0,42,26.2875 564 | 1,1,1,22,151.55 565 | 1,1,1,24,49.5042 566 | 1,1,0,48,52 567 | 0,3,0,29,9.4833 568 | 0,2,0,52,13 569 | 0,3,0,19,7.65 570 | 1,1,1,38,227.525 571 | 1,2,1,27,10.5 572 | 0,3,0,33,7.775 573 | 1,2,1,6,33 574 | 0,3,0,17,7.0542 575 | 0,2,0,34,13 576 | 0,2,0,50,13 577 | 1,1,0,27,53.1 578 | 0,3,0,20,8.6625 579 | 1,2,1,30,21 580 | 0,2,0,25,26 581 | 0,3,1,25,7.925 582 | 1,1,1,29,211.3375 583 | 0,3,0,11,18.7875 584 | 0,2,0,23,13 585 | 0,2,0,23,13 586 | 0,3,0,28.5,16.1 587 | 0,3,1,48,34.375 588 | 1,1,0,35,512.3292 589 | 0,1,0,36,78.85 590 | 1,1,1,21,262.375 591 | 0,3,0,24,16.1 592 | 1,3,0,31,7.925 593 | 0,1,0,70,71 594 | 0,3,0,16,20.25 595 | 1,2,1,30,13 596 | 0,1,0,19,53.1 597 | 0,3,0,31,7.75 598 | 1,2,1,4,23 599 | 1,3,0,6,12.475 600 | 0,3,0,33,9.5 601 | 0,3,0,23,7.8958 602 | 1,2,1,48,65 603 | 1,2,0,0.67,14.5 604 | 0,3,0,28,7.7958 605 | 0,2,0,18,11.5 606 | 0,3,0,34,8.05 607 | 1,1,1,33,86.5 608 | 0,3,0,41,7.125 609 | 1,3,0,20,7.2292 610 | 1,1,1,36,120 611 | 0,3,0,16,7.775 612 | 1,1,1,51,77.9583 613 | 0,3,1,30.5,7.75 614 | 0,3,0,32,8.3625 615 | 0,3,0,24,9.5 616 | 0,3,0,48,7.8542 617 | 0,2,1,57,10.5 618 | 1,2,1,54,23 619 | 0,3,0,18,7.75 620 | 1,3,1,5,12.475 621 | 1,1,1,43,211.3375 622 | 1,3,1,13,7.2292 623 | 1,1,1,17,57 624 | 0,1,0,29,30 625 | 0,3,0,25,7.05 626 | 0,3,0,25,7.25 627 | 1,3,1,18,7.4958 628 | 0,3,0,8,29.125 629 | 1,3,0,1,20.575 630 | 0,1,0,46,79.2 631 | 0,2,0,16,26 632 | 0,3,0,25,7.8958 633 | 0,2,0,39,13 634 | 1,1,1,49,25.9292 635 | 1,3,1,31,8.6833 636 | 0,3,0,30,7.2292 637 | 0,3,1,30,24.15 638 | 0,2,0,34,13 639 | 1,2,1,31,26.25 640 | 1,1,0,11,120 641 | 1,3,0,0.42,8.5167 642 | 1,3,0,27,6.975 643 | 0,3,0,31,7.775 644 | 0,1,0,39,0 645 | 0,3,1,18,7.775 646 | 0,2,0,39,13 647 | 1,1,1,33,53.1 648 | 0,3,0,26,7.8875 649 | 0,3,0,39,24.15 650 | 0,2,0,35,10.5 651 | 0,3,1,6,31.275 652 | 0,3,0,30.5,8.05 653 | 0,3,1,23,7.925 654 | 0,2,0,31,37.0042 655 | 0,3,0,43,6.45 656 | 0,3,0,10,27.9 657 | 1,1,1,52,93.5 658 | 1,3,0,27,8.6625 659 | 0,1,0,38,0 660 | 1,3,1,27,12.475 661 | 0,3,0,2,39.6875 662 | 1,2,0,1,37.0042 663 | 1,1,1,62,80 664 | 1,3,1,15,14.4542 665 | 1,2,0,0.83,18.75 666 | 0,3,0,23,7.8542 667 | 0,3,0,18,8.3 668 | 1,1,1,39,83.1583 669 | 0,3,0,21,8.6625 670 | 1,3,0,32,56.4958 671 | 0,3,0,20,7.925 672 | 0,2,0,16,10.5 673 | 1,1,1,30,31 674 | 0,3,0,34.5,6.4375 675 | 0,3,0,17,8.6625 676 | 0,3,0,42,7.55 677 | 0,3,0,35,7.8958 678 | 0,2,0,28,33 679 | 0,3,0,4,31.275 680 | 0,3,0,74,7.775 681 | 0,3,1,9,15.2458 682 | 1,1,1,16,39.4 683 | 0,2,1,44,26 684 | 1,3,1,18,9.35 685 | 1,1,1,45,164.8667 686 | 1,1,0,51,26.55 687 | 1,3,1,24,19.2583 688 | 0,3,0,41,14.1083 689 | 0,2,0,21,11.5 690 | 1,1,1,48,25.9292 691 | 0,2,0,24,13 692 | 1,2,1,42,13 693 | 1,2,1,27,13.8583 694 | 0,1,0,31,50.4958 695 | 1,3,0,4,11.1333 696 | 0,3,0,26,7.8958 697 | 1,1,1,47,52.5542 698 | 0,1,0,33,5 699 | 0,3,0,47,9 700 | 1,2,1,28,24 701 | 1,3,1,15,7.225 702 | 0,3,0,20,9.8458 703 | 0,3,0,19,7.8958 704 | 1,1,1,56,83.1583 705 | 1,2,1,25,26 706 | 0,3,0,33,7.8958 707 | 0,3,1,22,10.5167 708 | 0,2,0,28,10.5 709 | 0,3,0,25,7.05 710 | 0,3,1,39,29.125 711 | 0,2,0,27,13 712 | 1,1,1,19,30 713 | 1,1,0,26,30 714 | 0,3,0,32,7.75 715 | -------------------------------------------------------------------------------- /titanic/data/data2.csv: -------------------------------------------------------------------------------- 1 | 0,3,0,22,7.25 2 | 1,1,1,38,71.2833 3 | 1,3,1,26,7.925 4 | 1,1,1,35,53.1 5 | 0,3,0,35,8.05 6 | 0,3,0,30,8.4583 7 | 0,1,0,54,51.8625 8 | 0,3,0,2,21.075 9 | 1,3,1,27,11.1333 10 | 1,2,1,14,30.0708 11 | 1,3,1,4,16.7 12 | 1,1,1,58,26.55 13 | 0,3,0,20,8.05 14 | 0,3,0,39,31.275 15 | 0,3,1,14,7.8542 16 | 1,2,1,55,16 17 | 0,3,0,2,29.125 18 | 1,2,0,30,13 19 | 0,3,1,31,18 20 | 1,3,1,30,7.225 21 | 0,2,0,35,26 22 | 1,2,0,34,13 23 | 1,3,1,15,8.0292 24 | 1,1,0,28,35.5 25 | 0,3,1,8,21.075 26 | 1,3,1,38,31.3875 27 | 0,3,0,30,7.225 28 | 0,1,0,19,263 29 | 1,3,1,30,7.8792 30 | 0,3,0,30,7.8958 31 | 0,1,0,40,27.7208 32 | 1,1,1,30,146.5208 33 | 1,3,1,30,7.75 34 | 0,2,0,66,10.5 35 | 0,1,0,28,82.1708 36 | 0,1,0,42,52 37 | 1,3,0,30,7.2292 38 | 0,3,0,21,8.05 39 | 0,3,1,18,18 40 | 1,3,1,14,11.2417 41 | 0,3,1,40,9.475 42 | 0,2,1,27,21 43 | 0,3,0,30,7.8958 44 | 1,2,1,3,41.5792 45 | 1,3,1,19,7.8792 46 | 0,3,0,30,8.05 47 | 0,3,0,30,15.5 48 | 1,3,1,30,7.75 49 | 0,3,0,30,21.6792 50 | 0,3,1,18,17.8 51 | 0,3,0,7,39.6875 52 | 0,3,0,21,7.8 53 | 1,1,1,49,76.7292 54 | 1,2,1,29,26 55 | 0,1,0,65,61.9792 56 | 1,1,0,30,35.5 57 | 1,2,1,21,10.5 58 | 0,3,0,28.5,7.2292 59 | 1,2,1,5,27.75 60 | 0,3,0,11,46.9 61 | 0,3,0,22,7.2292 62 | 1,1,1,38,80 63 | 0,1,0,45,83.475 64 | 0,3,0,4,27.9 65 | 0,1,0,30,27.7208 66 | 1,3,0,30,15.2458 67 | 1,2,1,29,10.5 68 | 0,3,0,19,8.1583 69 | 1,3,1,17,7.925 70 | 0,3,0,26,8.6625 71 | 0,2,0,32,10.5 72 | 0,3,1,16,46.9 73 | 0,2,0,21,73.5 74 | 0,3,0,26,14.4542 75 | 1,3,0,32,56.4958 76 | 0,3,0,25,7.65 77 | 0,3,0,30,7.8958 78 | 0,3,0,30,8.05 79 | 1,2,0,0.83,29 80 | 1,3,1,30,12.475 81 | 0,3,0,22,9 82 | 1,3,0,29,9.5 83 | 1,3,1,30,7.7875 84 | 0,1,0,28,47.1 85 | 1,2,1,17,10.5 86 | 1,3,1,33,15.85 87 | 0,3,0,16,34.375 88 | 0,3,0,30,8.05 89 | 1,1,1,23,263 90 | 0,3,0,24,8.05 91 | 0,3,0,29,8.05 92 | 0,3,0,20,7.8542 93 | 0,1,0,46,61.175 94 | 0,3,0,26,20.575 95 | 0,3,0,59,7.25 96 | 0,3,0,30,8.05 97 | 0,1,0,71,34.6542 98 | 1,1,0,23,63.3583 99 | 1,2,1,34,23 100 | 0,2,0,34,26 101 | 0,3,1,28,7.8958 102 | 0,3,0,30,7.8958 103 | 0,1,0,21,77.2875 104 | 0,3,0,33,8.6542 105 | 0,3,0,37,7.925 106 | 0,3,0,28,7.8958 107 | 1,3,1,21,7.65 108 | 1,3,0,30,7.775 109 | 0,3,0,38,7.8958 110 | 1,3,1,30,24.15 111 | 0,1,0,47,52 112 | 0,3,1,14.5,14.4542 113 | 0,3,0,22,8.05 114 | 0,3,1,20,9.825 115 | 0,3,1,17,14.4583 116 | 0,3,0,21,7.925 117 | 0,3,0,70.5,7.75 118 | 0,2,0,29,21 119 | 0,1,0,24,247.5208 120 | 0,3,1,2,31.275 121 | 0,2,0,21,73.5 122 | 0,3,0,30,8.05 123 | 0,2,0,32.5,30.0708 124 | 1,2,1,32.5,13 125 | 0,1,0,54,77.2875 126 | 1,3,0,12,11.2417 127 | 0,3,0,30,7.75 128 | 1,3,0,24,7.1417 129 | 1,3,1,30,22.3583 130 | 0,3,0,45,6.975 131 | 0,3,0,33,7.8958 132 | 0,3,0,20,7.05 133 | 0,3,1,47,14.5 134 | 1,2,1,29,26 135 | 0,2,0,25,13 136 | 0,2,0,23,15.0458 137 | 1,1,1,19,26.2833 138 | 0,1,0,37,53.1 139 | 0,3,0,16,9.2167 140 | 0,1,0,24,79.2 141 | 0,3,1,30,15.2458 142 | 1,3,1,22,7.75 143 | 1,3,1,24,15.85 144 | 0,3,0,19,6.75 145 | 0,2,0,18,11.5 146 | 0,2,0,19,36.75 147 | 1,3,0,27,7.7958 148 | 0,3,1,9,34.375 149 | 0,2,0,36.5,26 150 | 0,2,0,42,13 151 | 0,2,0,51,12.525 152 | 1,1,1,22,66.6 153 | 0,3,0,55.5,8.05 154 | 0,3,0,40.5,14.5 155 | 0,3,0,30,7.3125 156 | 0,1,0,51,61.3792 157 | 1,3,1,16,7.7333 158 | 0,3,0,30,8.05 159 | 0,3,0,30,8.6625 160 | 0,3,0,30,69.55 161 | 0,3,0,44,16.1 162 | 1,2,1,40,15.75 163 | 0,3,0,26,7.775 164 | 0,3,0,17,8.6625 165 | 0,3,0,1,39.6875 166 | 1,3,0,9,20.525 167 | 1,1,1,30,55 168 | 0,3,1,45,27.9 169 | 0,1,0,30,25.925 170 | 0,3,0,28,56.4958 171 | 0,1,0,61,33.5 172 | 0,3,0,4,29.125 173 | 1,3,1,1,11.1333 174 | 0,3,0,21,7.925 175 | 0,1,0,56,30.6958 176 | 0,3,0,18,7.8542 177 | 0,3,0,30,25.4667 178 | 0,1,1,50,28.7125 179 | 0,2,0,30,13 180 | 0,3,0,36,0 181 | 0,3,1,30,69.55 182 | 0,2,0,30,15.05 183 | 0,3,0,9,31.3875 184 | 1,2,0,1,39 185 | 1,3,1,4,22.025 186 | 0,1,0,30,50 187 | 1,3,1,30,15.5 188 | 1,1,0,45,26.55 189 | 0,3,0,40,15.5 190 | 0,3,0,36,7.8958 191 | 1,2,1,32,13 192 | 0,2,0,19,13 193 | 1,3,1,19,7.8542 194 | 1,2,0,3,26 195 | 1,1,1,44,27.7208 196 | 1,1,1,58,146.5208 197 | 0,3,0,30,7.75 198 | 0,3,0,42,8.4042 199 | 1,3,1,30,7.75 200 | 0,2,1,24,13 201 | 0,3,0,28,9.5 202 | 0,3,0,30,69.55 203 | 0,3,0,34,6.4958 204 | 0,3,0,45.5,7.225 205 | 1,3,0,18,8.05 206 | 0,3,1,2,10.4625 207 | 0,3,0,32,15.85 208 | 1,3,0,26,18.7875 209 | 1,3,1,16,7.75 210 | 1,1,0,40,31 211 | 0,3,0,24,7.05 212 | 1,2,1,35,21 213 | 0,3,0,22,7.25 214 | 0,2,0,30,13 215 | 0,3,0,30,7.75 216 | 1,1,1,31,113.275 217 | 1,3,1,27,7.925 218 | 0,2,0,42,27 219 | 1,1,1,32,76.2917 220 | 0,2,0,30,10.5 221 | 1,3,0,16,8.05 222 | 0,2,0,27,13 223 | 0,3,0,51,8.05 224 | 0,3,0,30,7.8958 225 | 1,1,0,38,90 226 | 0,3,0,22,9.35 227 | 1,2,0,19,10.5 228 | 0,3,0,20.5,7.25 229 | 0,2,0,18,13 230 | 0,3,1,30,25.4667 231 | 1,1,1,35,83.475 232 | 0,3,0,29,7.775 233 | 0,2,0,59,13.5 234 | 1,3,1,5,31.3875 235 | 0,2,0,24,10.5 236 | 0,3,1,30,7.55 237 | 0,2,0,44,26 238 | 1,2,1,8,26.25 239 | 0,2,0,19,10.5 240 | 0,2,0,33,12.275 241 | 0,3,1,30,14.4542 242 | 1,3,1,30,15.5 243 | 0,2,0,29,10.5 244 | 0,3,0,22,7.125 245 | 0,3,0,30,7.225 246 | 0,1,0,44,90 247 | 0,3,1,25,7.775 248 | 1,2,1,24,14.5 249 | 1,1,0,37,52.5542 250 | 0,2,0,54,26 251 | 0,3,0,30,7.25 252 | 0,3,1,29,10.4625 253 | 0,1,0,62,26.55 254 | 0,3,0,30,16.1 255 | 0,3,1,41,20.2125 256 | 1,3,1,29,15.2458 257 | 1,1,1,30,79.2 258 | 1,1,1,30,86.5 259 | 1,1,1,35,512.3292 260 | 1,2,1,50,26 261 | 0,3,0,30,7.75 262 | 1,3,0,3,31.3875 263 | 0,1,0,52,79.65 264 | 0,1,0,40,0 265 | 0,3,1,30,7.75 266 | 0,2,0,36,10.5 267 | 0,3,0,16,39.6875 268 | 1,3,0,25,7.775 269 | 1,1,1,58,153.4625 270 | 1,1,1,35,135.6333 271 | 0,1,0,30,31 272 | 1,3,0,25,0 273 | 1,2,1,41,19.5 274 | 0,1,0,37,29.7 275 | 1,3,1,30,7.75 276 | 1,1,1,63,77.9583 277 | 0,3,1,45,7.75 278 | 0,2,0,30,0 279 | 0,3,0,7,29.125 280 | 1,3,1,35,20.25 281 | 0,3,0,65,7.75 282 | 0,3,0,28,7.8542 283 | 0,3,0,16,9.5 284 | 1,3,0,19,8.05 285 | 0,1,0,30,26 286 | 0,3,0,33,8.6625 287 | 1,3,0,30,9.5 288 | 0,3,0,22,7.8958 289 | 1,2,0,42,13 290 | 1,3,1,22,7.75 291 | 1,1,1,26,78.85 292 | 1,1,1,19,91.0792 293 | 0,2,0,36,12.875 294 | 0,3,1,24,8.85 295 | 0,3,0,24,7.8958 296 | 0,1,0,30,27.7208 297 | 0,3,0,23.5,7.2292 298 | 0,1,1,2,151.55 299 | 1,1,0,30,30.5 300 | 1,1,1,50,247.5208 301 | 1,3,1,30,7.75 302 | 1,3,0,30,23.25 303 | 0,3,0,19,0 304 | 1,2,1,30,12.35 305 | 0,3,0,30,8.05 306 | 1,1,0,0.92,151.55 307 | 1,1,1,30,110.8833 308 | 1,1,1,17,108.9 309 | 0,2,0,30,24 310 | 1,1,1,30,56.9292 311 | 1,1,1,24,83.1583 312 | 1,1,1,18,262.375 313 | 0,2,1,26,26 314 | 0,3,0,28,7.8958 315 | 0,2,0,43,26.25 316 | 1,3,1,26,7.8542 317 | 1,2,1,24,26 318 | 0,2,0,54,14 319 | 1,1,1,31,164.8667 320 | 1,1,1,40,134.5 321 | 0,3,0,22,7.25 322 | 0,3,0,27,7.8958 323 | 1,2,1,30,12.35 324 | 1,2,1,22,29 325 | 0,3,0,30,69.55 326 | 1,1,1,36,135.6333 327 | 0,3,0,61,6.2375 328 | 1,2,1,36,13 329 | 1,3,1,31,20.525 330 | 1,1,1,16,57.9792 331 | 1,3,1,30,23.25 332 | 0,1,0,45.5,28.5 333 | 0,1,0,38,153.4625 334 | 0,3,0,16,18 335 | 1,1,1,30,133.65 336 | 0,3,0,30,7.8958 337 | 0,1,0,29,66.6 338 | 1,1,1,41,134.5 339 | 1,3,0,45,8.05 340 | 0,1,0,45,35.5 341 | 1,2,0,2,26 342 | 1,1,1,24,263 343 | 0,2,0,28,13 344 | 0,2,0,25,13 345 | 0,2,0,36,13 346 | 1,2,1,24,13 347 | 1,2,1,40,13 348 | 1,3,1,30,16.1 349 | 1,3,0,3,15.9 350 | 0,3,0,42,8.6625 351 | 0,3,0,23,9.225 352 | 0,1,0,30,35 353 | 0,3,0,15,7.2292 354 | 0,3,0,25,17.8 355 | 0,3,0,30,7.225 356 | 0,3,0,28,9.5 357 | 1,1,1,22,55 358 | 0,2,1,38,13 359 | 1,3,1,30,7.8792 360 | 1,3,1,30,7.8792 361 | 0,3,0,40,27.9 362 | 0,2,0,29,27.7208 363 | 0,3,1,45,14.4542 364 | 0,3,0,35,7.05 365 | 0,3,0,30,15.5 366 | 0,3,0,30,7.25 367 | 1,1,1,60,75.25 368 | 1,3,1,30,7.2292 369 | 1,3,1,30,7.75 370 | 1,1,1,24,69.3 371 | 1,1,0,25,55.4417 372 | 0,3,0,18,6.4958 373 | 0,3,0,19,8.05 374 | 0,1,0,22,135.6333 375 | 0,3,1,3,21.075 376 | 1,1,1,30,82.1708 377 | 1,3,1,22,7.25 378 | 0,1,0,27,211.5 379 | 0,3,0,20,4.0125 380 | 0,3,0,19,7.775 381 | 1,1,1,42,227.525 382 | 1,3,1,1,15.7417 383 | 0,3,0,32,7.925 384 | 1,1,1,35,52 385 | 0,3,0,30,7.8958 386 | 0,2,0,18,73.5 387 | 0,3,0,1,46.9 388 | 1,2,1,36,13 389 | 0,3,0,30,7.7292 390 | 1,2,1,17,12 391 | 1,1,0,36,120 392 | 1,3,0,21,7.7958 393 | 0,3,0,28,7.925 394 | 1,1,1,23,113.275 395 | 1,3,1,24,16.7 396 | 0,3,0,22,7.7958 397 | 0,3,1,31,7.8542 398 | 0,2,0,46,26 399 | 0,2,0,23,10.5 400 | 1,2,1,28,12.65 401 | 1,3,0,39,7.925 402 | 0,3,0,26,8.05 403 | 0,3,1,21,9.825 404 | 0,3,0,28,15.85 405 | 0,3,1,20,8.6625 406 | 0,2,0,34,21 407 | 0,3,0,51,7.75 408 | 1,2,0,3,18.75 409 | 0,3,0,21,7.775 410 | 0,3,1,30,25.4667 411 | 0,3,0,30,7.8958 412 | 0,3,0,30,6.8583 413 | 1,1,1,33,90 414 | 0,2,0,30,0 415 | 1,3,0,44,7.925 416 | 0,3,1,30,8.05 417 | 1,2,1,34,32.5 418 | 1,2,1,18,13 419 | 0,2,0,30,13 420 | 0,3,1,10,24.15 421 | 0,3,0,30,7.8958 422 | 0,3,0,21,7.7333 423 | 0,3,0,29,7.875 424 | 0,3,1,28,14.4 425 | 0,3,0,18,20.2125 426 | 0,3,0,30,7.25 427 | 1,2,1,28,26 428 | 1,2,1,19,26 429 | 0,3,0,30,7.75 430 | 1,3,0,32,8.05 431 | 1,1,0,28,26.55 432 | 1,3,1,30,16.1 433 | 1,2,1,42,26 434 | 0,3,0,17,7.125 435 | 0,1,0,50,55.9 436 | 1,1,1,14,120 437 | 0,3,1,21,34.375 438 | 1,2,1,24,18.75 439 | 0,1,0,64,263 440 | 0,2,0,31,10.5 441 | 1,2,1,45,26.25 442 | 0,3,0,20,9.5 443 | 0,3,0,25,7.775 444 | 1,2,1,28,13 445 | 1,3,0,30,8.1125 446 | 1,1,0,4,81.8583 447 | 1,2,1,13,19.5 448 | 1,1,0,34,26.55 449 | 1,3,1,5,19.2583 450 | 1,1,0,52,30.5 451 | 0,2,0,36,27.75 452 | 0,3,0,30,19.9667 453 | 0,1,0,30,27.75 454 | 1,1,0,49,89.1042 455 | 0,3,0,30,8.05 456 | 1,3,0,29,7.8958 457 | 0,1,0,65,26.55 458 | 1,1,1,30,51.8625 459 | 1,2,1,50,10.5 460 | 0,3,0,30,7.75 461 | 1,1,0,48,26.55 462 | 0,3,0,34,8.05 463 | 0,1,0,47,38.5 464 | 0,2,0,48,13 465 | 0,3,0,30,8.05 466 | 0,3,0,38,7.05 467 | 0,2,0,30,0 468 | 0,1,0,56,26.55 469 | 0,3,0,30,7.725 470 | 1,3,1,0.75,19.2583 471 | 0,3,0,30,7.25 472 | 0,3,0,38,8.6625 473 | 1,2,1,33,27.75 474 | 1,2,1,23,13.7917 475 | 0,3,1,22,9.8375 476 | 0,1,0,30,52 477 | 0,2,0,34,21 478 | 0,3,0,29,7.0458 479 | 0,3,0,22,7.5208 480 | 1,3,1,2,12.2875 481 | 0,3,0,9,46.9 482 | 0,2,0,30,0 483 | 0,3,0,50,8.05 484 | 1,3,1,63,9.5875 485 | 1,1,0,25,91.0792 486 | 0,3,1,30,25.4667 487 | 1,1,1,35,90 488 | 0,1,0,58,29.7 489 | 0,3,0,30,8.05 490 | 1,3,0,9,15.9 491 | 0,3,0,30,19.9667 492 | 0,3,0,21,7.25 493 | 0,1,0,55,30.5 494 | 0,1,0,71,49.5042 495 | 0,3,0,21,8.05 496 | 0,3,0,30,14.4583 497 | 1,1,1,54,78.2667 498 | 0,3,0,30,15.1 499 | 0,1,1,25,151.55 500 | 0,3,0,24,7.7958 501 | 0,3,0,17,8.6625 502 | 0,3,1,21,7.75 503 | 0,3,1,30,7.6292 504 | 0,3,1,37,9.5875 505 | 1,1,1,16,86.5 506 | 0,1,0,18,108.9 507 | 1,2,1,33,26 508 | 1,1,0,30,26.55 509 | 0,3,0,28,22.525 510 | 1,3,0,26,56.4958 511 | 1,3,0,29,7.75 512 | 0,3,0,30,8.05 513 | 1,1,0,36,26.2875 514 | 1,1,1,54,59.4 515 | 0,3,0,24,7.4958 516 | 0,1,0,47,34.0208 517 | 1,2,1,34,10.5 518 | 0,3,0,30,24.15 519 | 1,2,1,36,26 520 | 0,3,0,32,7.8958 521 | 1,1,1,30,93.5 522 | 0,3,0,22,7.8958 523 | 0,3,0,30,7.225 524 | 1,1,1,44,57.9792 525 | 0,3,0,30,7.2292 526 | 0,3,0,40.5,7.75 527 | 1,2,1,50,10.5 528 | 0,1,0,30,221.7792 529 | 0,3,0,39,7.925 530 | 0,2,0,23,11.5 531 | 1,2,1,2,26 532 | 0,3,0,30,7.2292 533 | 0,3,0,17,7.2292 534 | 1,3,1,30,22.3583 535 | 0,3,1,30,8.6625 536 | 1,2,1,7,26.25 537 | 0,1,0,45,26.55 538 | 1,1,1,30,106.425 539 | 0,3,0,30,14.5 540 | 1,1,1,22,49.5 541 | 1,1,1,36,71 542 | 0,3,1,9,31.275 543 | 0,3,1,11,31.275 544 | 1,2,0,32,26 545 | 0,1,0,50,106.425 546 | 0,1,0,64,26 547 | 1,2,1,19,26 548 | 1,2,0,30,13.8625 549 | 0,3,0,33,20.525 550 | 1,2,0,8,36.75 551 | 1,1,0,17,110.8833 552 | 0,2,0,27,26 553 | 0,3,0,30,7.8292 554 | 1,3,0,22,7.225 555 | 1,3,1,22,7.775 556 | 0,1,0,62,26.55 557 | 1,1,1,48,39.6 558 | 0,1,0,30,227.525 559 | 1,1,1,39,79.65 560 | 1,3,1,36,17.4 561 | 0,3,0,30,7.75 562 | 0,3,0,40,7.8958 563 | 0,2,0,28,13.5 564 | 0,3,0,30,8.05 565 | 0,3,1,30,8.05 566 | 0,3,0,24,24.15 567 | 0,3,0,19,7.8958 568 | 0,3,1,29,21.075 569 | 0,3,0,30,7.2292 570 | 1,3,0,32,7.8542 571 | 1,2,0,62,10.5 572 | 1,1,1,53,51.4792 573 | 1,1,0,36,26.3875 574 | 1,3,1,30,7.75 575 | 0,3,0,16,8.05 576 | 0,3,0,19,14.5 577 | 1,2,1,34,13 578 | 1,1,1,39,55.9 579 | 0,3,1,30,14.4583 580 | 1,3,0,32,7.925 581 | 1,2,1,25,30 582 | 1,1,1,39,110.8833 583 | 0,2,0,54,26 584 | 0,1,0,36,40.125 585 | 0,3,0,30,8.7125 586 | 1,1,1,18,79.65 587 | 0,2,0,47,15 588 | 1,1,0,60,79.2 589 | 0,3,0,22,8.05 590 | 0,3,0,30,8.05 591 | 0,3,0,35,7.125 592 | 1,1,1,52,78.2667 593 | 0,3,0,47,7.25 594 | 0,3,1,30,7.75 595 | 0,2,0,37,26 596 | 0,3,0,36,24.15 597 | 1,2,1,30,33 598 | 0,3,0,49,0 599 | 0,3,0,30,7.225 600 | 1,1,0,49,56.9292 601 | 1,2,1,24,27 602 | 0,3,0,30,7.8958 603 | 0,1,0,30,42.4 604 | 0,3,0,44,8.05 605 | 1,1,0,35,26.55 606 | 0,3,0,36,15.55 607 | 0,3,0,30,7.8958 608 | 1,1,0,27,30.5 609 | 1,2,1,22,41.5792 610 | 1,1,1,40,153.4625 611 | 0,3,1,39,31.275 612 | 0,3,0,30,7.05 613 | 1,3,1,30,15.5 614 | 0,3,0,30,7.75 615 | 0,3,0,35,8.05 616 | 1,2,1,24,65 617 | 0,3,0,34,14.4 618 | 0,3,1,26,16.1 619 | 1,2,1,4,39 620 | 0,2,0,26,10.5 621 | 0,3,0,27,14.4542 622 | 1,1,0,42,52.5542 623 | 1,3,0,20,15.7417 624 | 0,3,0,21,7.8542 625 | 0,3,0,21,16.1 626 | 0,1,0,61,32.3208 627 | 0,2,0,57,12.35 628 | 1,1,1,21,77.9583 629 | 0,3,0,26,7.8958 630 | 0,3,0,30,7.7333 631 | 1,1,0,80,30 632 | 0,3,0,51,7.0542 633 | 1,1,0,32,30.5 634 | 0,1,0,30,0 635 | 0,3,1,9,27.9 636 | 1,2,1,28,13 637 | 0,3,0,32,7.925 638 | 0,2,0,31,26.25 639 | 0,3,1,41,39.6875 640 | 0,3,0,30,16.1 641 | 0,3,0,20,7.8542 642 | 1,1,1,24,69.3 643 | 0,3,1,2,27.9 644 | 1,3,0,30,56.4958 645 | 1,3,1,0.75,19.2583 646 | 1,1,0,48,76.7292 647 | 0,3,0,19,7.8958 648 | 1,1,0,56,35.5 649 | 0,3,0,30,7.55 650 | 1,3,1,23,7.55 651 | 0,3,0,30,7.8958 652 | 1,2,1,18,23 653 | 0,3,0,21,8.4333 654 | 1,3,1,30,7.8292 655 | 0,3,1,18,6.75 656 | 0,2,0,24,73.5 657 | 0,3,0,30,7.8958 658 | 0,3,1,32,15.5 659 | 0,2,0,23,13 660 | 0,1,0,58,113.275 661 | 1,1,0,50,133.65 662 | 0,3,0,40,7.225 663 | 0,1,0,47,25.5875 664 | 0,3,0,36,7.4958 665 | 1,3,0,20,7.925 666 | 0,2,0,32,73.5 667 | 0,2,0,25,13 668 | 0,3,0,30,7.775 669 | 0,3,0,43,8.05 670 | 1,1,1,30,52 671 | 1,2,1,40,39 672 | 0,1,0,31,52 673 | 0,2,0,70,10.5 674 | 1,2,0,31,13 675 | 0,2,0,30,0 676 | 0,3,0,18,7.775 677 | 0,3,0,24.5,8.05 678 | 1,3,1,18,9.8417 679 | 0,3,1,43,46.9 680 | 1,1,0,36,512.3292 681 | 0,3,1,30,8.1375 682 | 1,1,0,27,76.7292 683 | 0,3,0,20,9.225 684 | 0,3,0,14,46.9 685 | 0,2,0,60,39 686 | 0,2,0,25,41.5792 687 | 0,3,0,14,39.6875 688 | 0,3,0,19,10.1708 689 | 0,3,0,18,7.7958 690 | 1,1,1,15,211.3375 691 | 1,1,0,31,57 692 | 1,3,1,4,13.4167 693 | 1,3,0,30,56.4958 694 | 0,3,0,25,7.225 695 | 0,1,0,60,26.55 696 | 0,2,0,52,13.5 697 | 0,3,0,44,8.05 698 | 1,3,1,30,7.7333 699 | 0,1,0,49,110.8833 700 | 0,3,0,42,7.65 701 | 1,1,1,18,227.525 702 | 1,1,0,35,26.2875 703 | 0,3,1,18,14.4542 704 | 0,3,0,25,7.7417 705 | 0,3,0,26,7.8542 706 | 0,2,0,39,26 707 | 1,2,1,45,13.5 708 | 1,1,0,42,26.2875 709 | 1,1,1,22,151.55 710 | 1,3,0,30,15.2458 711 | 1,1,1,24,49.5042 712 | 0,1,0,30,26.55 713 | 1,1,0,48,52 714 | 0,3,0,29,9.4833 715 | 0,2,0,52,13 716 | 0,3,0,19,7.65 717 | 1,1,1,38,227.525 718 | 1,2,1,27,10.5 719 | 0,3,0,30,15.5 720 | 0,3,0,33,7.775 721 | 1,2,1,6,33 722 | 0,3,0,17,7.0542 723 | 0,2,0,34,13 724 | 0,2,0,50,13 725 | 1,1,0,27,53.1 726 | 0,3,0,20,8.6625 727 | 1,2,1,30,21 728 | 1,3,1,30,7.7375 729 | 0,2,0,25,26 730 | 0,3,1,25,7.925 731 | 1,1,1,29,211.3375 732 | 0,3,0,11,18.7875 733 | 0,2,0,30,0 734 | 0,2,0,23,13 735 | 0,2,0,23,13 736 | 0,3,0,28.5,16.1 737 | 0,3,1,48,34.375 738 | 1,1,0,35,512.3292 739 | 0,3,0,30,7.8958 740 | 0,3,0,30,7.8958 741 | 1,1,0,30,30 742 | 0,1,0,36,78.85 743 | 1,1,1,21,262.375 744 | 0,3,0,24,16.1 745 | 1,3,0,31,7.925 746 | 0,1,0,70,71 747 | 0,3,0,16,20.25 748 | 1,2,1,30,13 749 | 0,1,0,19,53.1 750 | 0,3,0,31,7.75 751 | 1,2,1,4,23 752 | 1,3,0,6,12.475 753 | 0,3,0,33,9.5 754 | 0,3,0,23,7.8958 755 | 1,2,1,48,65 756 | 1,2,0,0.67,14.5 757 | 0,3,0,28,7.7958 758 | 0,2,0,18,11.5 759 | 0,3,0,34,8.05 760 | 1,1,1,33,86.5 761 | 0,3,0,30,14.5 762 | 0,3,0,41,7.125 763 | 1,3,0,20,7.2292 764 | 1,1,1,36,120 765 | 0,3,0,16,7.775 766 | 1,1,1,51,77.9583 767 | 0,1,0,30,39.6 768 | 0,3,1,30.5,7.75 769 | 0,3,0,30,24.15 770 | 0,3,0,32,8.3625 771 | 0,3,0,24,9.5 772 | 0,3,0,48,7.8542 773 | 0,2,1,57,10.5 774 | 0,3,0,30,7.225 775 | 1,2,1,54,23 776 | 0,3,0,18,7.75 777 | 0,3,0,30,7.75 778 | 1,3,1,5,12.475 779 | 0,3,0,30,7.7375 780 | 1,1,1,43,211.3375 781 | 1,3,1,13,7.2292 782 | 1,1,1,17,57 783 | 0,1,0,29,30 784 | 0,3,0,30,23.45 785 | 0,3,0,25,7.05 786 | 0,3,0,25,7.25 787 | 1,3,1,18,7.4958 788 | 0,3,0,8,29.125 789 | 1,3,0,1,20.575 790 | 0,1,0,46,79.2 791 | 0,3,0,30,7.75 792 | 0,2,0,16,26 793 | 0,3,1,30,69.55 794 | 0,1,0,30,30.6958 795 | 0,3,0,25,7.8958 796 | 0,2,0,39,13 797 | 1,1,1,49,25.9292 798 | 1,3,1,31,8.6833 799 | 0,3,0,30,7.2292 800 | 0,3,1,30,24.15 801 | 0,2,0,34,13 802 | 1,2,1,31,26.25 803 | 1,1,0,11,120 804 | 1,3,0,0.42,8.5167 805 | 1,3,0,27,6.975 806 | 0,3,0,31,7.775 807 | 0,1,0,39,0 808 | 0,3,1,18,7.775 809 | 0,2,0,39,13 810 | 1,1,1,33,53.1 811 | 0,3,0,26,7.8875 812 | 0,3,0,39,24.15 813 | 0,2,0,35,10.5 814 | 0,3,1,6,31.275 815 | 0,3,0,30.5,8.05 816 | 0,1,0,30,0 817 | 0,3,1,23,7.925 818 | 0,2,0,31,37.0042 819 | 0,3,0,43,6.45 820 | 0,3,0,10,27.9 821 | 1,1,1,52,93.5 822 | 1,3,0,27,8.6625 823 | 0,1,0,38,0 824 | 1,3,1,27,12.475 825 | 0,3,0,2,39.6875 826 | 0,3,0,30,6.95 827 | 0,3,0,30,56.4958 828 | 1,2,0,1,37.0042 829 | 1,3,0,30,7.75 830 | 1,1,1,62,80 831 | 1,3,1,15,14.4542 832 | 1,2,0,0.83,18.75 833 | 0,3,0,30,7.2292 834 | 0,3,0,23,7.8542 835 | 0,3,0,18,8.3 836 | 1,1,1,39,83.1583 837 | 0,3,0,21,8.6625 838 | 0,3,0,30,8.05 839 | 1,3,0,32,56.4958 840 | 1,1,0,30,29.7 841 | 0,3,0,20,7.925 842 | 0,2,0,16,10.5 843 | 1,1,1,30,31 844 | 0,3,0,34.5,6.4375 845 | 0,3,0,17,8.6625 846 | 0,3,0,42,7.55 847 | 0,3,0,30,69.55 848 | 0,3,0,35,7.8958 849 | 0,2,0,28,33 850 | 1,1,1,30,89.1042 851 | 0,3,0,4,31.275 852 | 0,3,0,74,7.775 853 | 0,3,1,9,15.2458 854 | 1,1,1,16,39.4 855 | 0,2,1,44,26 856 | 1,3,1,18,9.35 857 | 1,1,1,45,164.8667 858 | 1,1,0,51,26.55 859 | 1,3,1,24,19.2583 860 | 0,3,0,30,7.2292 861 | 0,3,0,41,14.1083 862 | 0,2,0,21,11.5 863 | 1,1,1,48,25.9292 864 | 0,3,1,30,69.55 865 | 0,2,0,24,13 866 | 1,2,1,42,13 867 | 1,2,1,27,13.8583 868 | 0,1,0,31,50.4958 869 | 0,3,0,30,9.5 870 | 1,3,0,4,11.1333 871 | 0,3,0,26,7.8958 872 | 1,1,1,47,52.5542 873 | 0,1,0,33,5 874 | 0,3,0,47,9 875 | 1,2,1,28,24 876 | 1,3,1,15,7.225 877 | 0,3,0,20,9.8458 878 | 0,3,0,19,7.8958 879 | 0,3,0,30,7.8958 880 | 1,1,1,56,83.1583 881 | 1,2,1,25,26 882 | 0,3,0,33,7.8958 883 | 0,3,1,22,10.5167 884 | 0,2,0,28,10.5 885 | 0,3,0,25,7.05 886 | 0,3,1,39,29.125 887 | 0,2,0,27,13 888 | 1,1,1,19,30 889 | 0,3,1,30,23.45 890 | 1,1,0,26,30 891 | 0,3,0,32,7.75 892 | -------------------------------------------------------------------------------- /titanic/data/error.csv: -------------------------------------------------------------------------------- 1 | 0,2,1,57,10.5,0.7302221251009093 2 | 0,1,0,29,30,0.675536885938038 3 | 0,3,1,18,7.774999999999999,0.6282204455573266 4 | 0,3,1,23,7.925000000000001,0.5079028840062008 5 | 0,3,1,9,15.2458,0.6488754889618661 6 | 0,2,1,44,26,0.8607264852516288 7 | 0,1,0,31,50.4958,0.5351885699944017 8 | 0,1,0,33,5,0.6211402657508864 9 | 0,3,1,39,29.125,0.5269026514727263 10 | 1,3,0,20,7.229199999999999,0.1977638232454237 11 | 1,3,0,1,20.575,0.440943917205827 12 | 1,3,1,31,8.683299999999999,0.3704616329942221 13 | 1,3,0,27,6.975000000000001,0.1233117481969939 14 | 1,3,0,27,8.662500000000001,0.1267544048066883 15 | 1,3,1,27,12.475,0.3964072664612249 16 | 1,3,0,32,56.4958,0.3914447790949062 17 | 1,1,0,51,26.55,0.2160881298374403 18 | 1,3,1,24,19.2583,0.3570362276990257 19 | -------------------------------------------------------------------------------- /titanic/data/result.csv: -------------------------------------------------------------------------------- 1 | 0 2 | 0 3 | 0 4 | 0 5 | 0 6 | 0 7 | 0 8 | 0 9 | 1 10 | 0 11 | 0 12 | 0 13 | 1 14 | 0 15 | 1 16 | 1 17 | 0 18 | 0 19 | 0 20 | 0 21 | 0 22 | 0 23 | 1 24 | 1 25 | 1 26 | 0 27 | 1 28 | 0 29 | 0 30 | 0 31 | 0 32 | 0 33 | 0 34 | 0 35 | 1 36 | 0 37 | 0 38 | 1 39 | 0 40 | 0 41 | 0 42 | 1 43 | 0 44 | 1 45 | 1 46 | 0 47 | 0 48 | 0 49 | 1 50 | 0 51 | 1 52 | 0 53 | 1 54 | 1 55 | 0 56 | 0 57 | 0 58 | 0 59 | 0 60 | 1 61 | 0 62 | 0 63 | 0 64 | 1 65 | 0 66 | 1 67 | 1 68 | 0 69 | 1 70 | 1 71 | 1 72 | 0 73 | 0 74 | 1 75 | 1 76 | 0 77 | 0 78 | 1 79 | 0 80 | 1 81 | 0 82 | 0 83 | 0 84 | 0 85 | 0 86 | 0 87 | 0 88 | 1 89 | 0 90 | 1 91 | 0 92 | 0 93 | 1 94 | 0 95 | 1 96 | 0 97 | 1 98 | 0 99 | 1 100 | 0 101 | 1 102 | 0 103 | 0 104 | 0 105 | 1 106 | 0 107 | 0 108 | 0 109 | 0 110 | 0 111 | 0 112 | 0 113 | 1 114 | 1 115 | 1 116 | 0 117 | 0 118 | 1 119 | 1 120 | 1 121 | 1 122 | 0 123 | 1 124 | 0 125 | 0 126 | 0 127 | 0 128 | 0 129 | 0 130 | 0 131 | 0 132 | 0 133 | 0 134 | 0 135 | 0 136 | 0 137 | 0 138 | 0 139 | 1 140 | 0 141 | 0 142 | 1 143 | 0 144 | 0 145 | 0 146 | 0 147 | 1 148 | 0 149 | 1 150 | 0 151 | 1 152 | 0 153 | 0 154 | 0 155 | 0 156 | 0 157 | 1 158 | 1 159 | 0 160 | 0 161 | 0 162 | 0 163 | 1 164 | 0 165 | 0 166 | 0 167 | 0 168 | 0 169 | 1 170 | 1 171 | 0 172 | 0 173 | 0 174 | 0 175 | 0 176 | 1 177 | 1 178 | 0 179 | 1 180 | 1 181 | 0 182 | 0 183 | 1 184 | 0 185 | 1 186 | 0 187 | 1 188 | 0 189 | 0 190 | 0 191 | 0 192 | 1 193 | 0 194 | 0 195 | 1 196 | 0 197 | 1 198 | 1 199 | 0 200 | 0 201 | 0 202 | 1 203 | 0 204 | 1 205 | 0 206 | 1 207 | 0 208 | 0 209 | 1 210 | 0 211 | 0 212 | 0 213 | 0 214 | 1 215 | 0 216 | 0 217 | 0 218 | 0 219 | 1 220 | 0 221 | 1 222 | 0 223 | 1 224 | 0 225 | 1 226 | 0 227 | 0 228 | 0 229 | 0 230 | 0 231 | 1 232 | 1 233 | 0 234 | 0 235 | 0 236 | 0 237 | 0 238 | 0 239 | 1 240 | 1 241 | 1 242 | 1 243 | 0 244 | 0 245 | 0 246 | 0 247 | 1 248 | 0 249 | 1 250 | 0 251 | 1 252 | 0 253 | 0 254 | 0 255 | 0 256 | 0 257 | 0 258 | 0 259 | 1 260 | 0 261 | 0 262 | 0 263 | 1 264 | 1 265 | 0 266 | 0 267 | 0 268 | 0 269 | 0 270 | 0 271 | 0 272 | 0 273 | 1 274 | 0 275 | 0 276 | 1 277 | 0 278 | 0 279 | 0 280 | 0 281 | 1 282 | 1 283 | 0 284 | 1 285 | 0 286 | 0 287 | 0 288 | 1 289 | 0 290 | 0 291 | 1 292 | 0 293 | 0 294 | 0 295 | 0 296 | 0 297 | 1 298 | 0 299 | 1 300 | 0 301 | 0 302 | 0 303 | 0 304 | 0 305 | 0 306 | 1 307 | 0 308 | 1 309 | 0 310 | 0 311 | 0 312 | 0 313 | 0 314 | 0 315 | 1 316 | 1 317 | 0 318 | 0 319 | 0 320 | 0 321 | 0 322 | 0 323 | 0 324 | 0 325 | 1 326 | 0 327 | 1 328 | 0 329 | 0 330 | 0 331 | 1 332 | 0 333 | 0 334 | 0 335 | 0 336 | 1 337 | 0 338 | 0 339 | 0 340 | 0 341 | 0 342 | 0 343 | 0 344 | 1 345 | 0 346 | 1 347 | 0 348 | 0 349 | 0 350 | 1 351 | 1 352 | 0 353 | 0 354 | 0 355 | 0 356 | 0 357 | 1 358 | 0 359 | 0 360 | 0 361 | 0 362 | 1 363 | 1 364 | 0 365 | 1 366 | 0 367 | 0 368 | 0 369 | 1 370 | 0 371 | 0 372 | 1 373 | 0 374 | 0 375 | 1 376 | 1 377 | 1 378 | 0 379 | 0 380 | 0 381 | 0 382 | 0 383 | 0 384 | 0 385 | 0 386 | 1 387 | 0 388 | 0 389 | 0 390 | 0 391 | 1 392 | 1 393 | 0 394 | 0 395 | 0 396 | 1 397 | 0 398 | 1 399 | 0 400 | 0 401 | 1 402 | 0 403 | 1 404 | 1 405 | 0 406 | 0 407 | 0 408 | 0 409 | 0 410 | 1 411 | 0 412 | 1 413 | 0 414 | 0 415 | 1 416 | 0 417 | 0 418 | 0 419 | -------------------------------------------------------------------------------- /titanic/data/result2.csv: -------------------------------------------------------------------------------- 1 | 0 2 | 0 3 | 0 4 | 0 5 | 1 6 | 0 7 | 1 8 | 0 9 | 1 10 | 0 11 | 0 12 | 0 13 | 1 14 | 0 15 | 1 16 | 1 17 | 0 18 | 0 19 | 1 20 | 0 21 | 0 22 | 0 23 | 1 24 | 1 25 | 1 26 | 0 27 | 1 28 | 0 29 | 0 30 | 0 31 | 0 32 | 0 33 | 0 34 | 0 35 | 0 36 | 0 37 | 1 38 | 1 39 | 0 40 | 0 41 | 0 42 | 0 43 | 0 44 | 1 45 | 1 46 | 0 47 | 0 48 | 0 49 | 1 50 | 0 51 | 1 52 | 0 53 | 1 54 | 1 55 | 0 56 | 0 57 | 0 58 | 0 59 | 0 60 | 1 61 | 0 62 | 0 63 | 0 64 | 1 65 | 0 66 | 1 67 | 1 68 | 0 69 | 0 70 | 1 71 | 1 72 | 0 73 | 1 74 | 0 75 | 1 76 | 0 77 | 0 78 | 1 79 | 0 80 | 1 81 | 0 82 | 0 83 | 0 84 | 0 85 | 0 86 | 0 87 | 1 88 | 1 89 | 1 90 | 1 91 | 1 92 | 0 93 | 1 94 | 0 95 | 0 96 | 0 97 | 1 98 | 0 99 | 1 100 | 0 101 | 1 102 | 0 103 | 0 104 | 0 105 | 1 106 | 0 107 | 0 108 | 0 109 | 0 110 | 0 111 | 0 112 | 1 113 | 1 114 | 1 115 | 1 116 | 0 117 | 0 118 | 1 119 | 0 120 | 1 121 | 1 122 | 0 123 | 1 124 | 0 125 | 0 126 | 1 127 | 0 128 | 0 129 | 0 130 | 0 131 | 0 132 | 0 133 | 0 134 | 0 135 | 0 136 | 0 137 | 0 138 | 0 139 | 1 140 | 0 141 | 0 142 | 1 143 | 0 144 | 0 145 | 0 146 | 0 147 | 0 148 | 0 149 | 0 150 | 0 151 | 1 152 | 0 153 | 0 154 | 0 155 | 0 156 | 0 157 | 1 158 | 1 159 | 0 160 | 0 161 | 1 162 | 0 163 | 1 164 | 0 165 | 0 166 | 0 167 | 0 168 | 0 169 | 1 170 | 1 171 | 0 172 | 0 173 | 0 174 | 0 175 | 0 176 | 1 177 | 1 178 | 0 179 | 1 180 | 1 181 | 0 182 | 0 183 | 1 184 | 0 185 | 1 186 | 0 187 | 1 188 | 0 189 | 0 190 | 0 191 | 0 192 | 0 193 | 0 194 | 0 195 | 1 196 | 0 197 | 1 198 | 1 199 | 0 200 | 1 201 | 0 202 | 0 203 | 0 204 | 1 205 | 0 206 | 0 207 | 0 208 | 0 209 | 1 210 | 0 211 | 0 212 | 0 213 | 0 214 | 1 215 | 0 216 | 0 217 | 1 218 | 0 219 | 1 220 | 0 221 | 1 222 | 0 223 | 1 224 | 0 225 | 1 226 | 0 227 | 0 228 | 1 229 | 0 230 | 0 231 | 0 232 | 1 233 | 0 234 | 0 235 | 0 236 | 0 237 | 0 238 | 0 239 | 1 240 | 1 241 | 1 242 | 1 243 | 0 244 | 0 245 | 0 246 | 0 247 | 1 248 | 0 249 | 1 250 | 0 251 | 1 252 | 0 253 | 0 254 | 0 255 | 0 256 | 0 257 | 0 258 | 0 259 | 1 260 | 0 261 | 0 262 | 0 263 | 1 264 | 1 265 | 0 266 | 0 267 | 0 268 | 0 269 | 1 270 | 0 271 | 0 272 | 0 273 | 1 274 | 0 275 | 0 276 | 1 277 | 0 278 | 0 279 | 0 280 | 0 281 | 1 282 | 0 283 | 1 284 | 1 285 | 1 286 | 0 287 | 0 288 | 1 289 | 0 290 | 0 291 | 0 292 | 1 293 | 0 294 | 0 295 | 0 296 | 0 297 | 1 298 | 0 299 | 0 300 | 0 301 | 0 302 | 0 303 | 0 304 | 0 305 | 1 306 | 1 307 | 0 308 | 1 309 | 0 310 | 0 311 | 0 312 | 0 313 | 0 314 | 0 315 | 1 316 | 1 317 | 0 318 | 0 319 | 0 320 | 0 321 | 0 322 | 0 323 | 0 324 | 0 325 | 1 326 | 0 327 | 1 328 | 0 329 | 0 330 | 0 331 | 1 332 | 0 333 | 0 334 | 1 335 | 0 336 | 0 337 | 0 338 | 0 339 | 0 340 | 0 341 | 0 342 | 0 343 | 0 344 | 1 345 | 0 346 | 1 347 | 0 348 | 0 349 | 0 350 | 1 351 | 1 352 | 0 353 | 0 354 | 0 355 | 1 356 | 0 357 | 1 358 | 0 359 | 0 360 | 0 361 | 0 362 | 1 363 | 1 364 | 0 365 | 1 366 | 0 367 | 0 368 | 0 369 | 1 370 | 0 371 | 0 372 | 1 373 | 0 374 | 0 375 | 1 376 | 1 377 | 1 378 | 0 379 | 0 380 | 0 381 | 0 382 | 0 383 | 0 384 | 1 385 | 0 386 | 1 387 | 0 388 | 0 389 | 0 390 | 0 391 | 1 392 | 1 393 | 0 394 | 0 395 | 0 396 | 1 397 | 0 398 | 1 399 | 0 400 | 0 401 | 1 402 | 0 403 | 1 404 | 1 405 | 0 406 | 0 407 | 0 408 | 0 409 | 1 410 | 1 411 | 1 412 | 1 413 | 1 414 | 0 415 | 1 416 | 0 417 | 0 418 | 0 419 | -------------------------------------------------------------------------------- /titanic/data/result3.csv: -------------------------------------------------------------------------------- 1 | 0.0 2 | 0.0 3 | 0.0 4 | 0.0 5 | 0.0 6 | 0.0 7 | 0.0 8 | 0.0 9 | 0.0 10 | 0.0 11 | 0.0 12 | 0.0 13 | 1.0 14 | 0.0 15 | 1.0 16 | 0.0 17 | 0.0 18 | 0.0 19 | 0.0 20 | 0.0 21 | 0.0 22 | 0.0 23 | 0.0 24 | 0.0 25 | 1.0 26 | 0.0 27 | 1.0 28 | 0.0 29 | 0.0 30 | 1.0 31 | 0.0 32 | 0.0 33 | 0.0 34 | 1.0 35 | 0.0 36 | 0.0 37 | 1.0 38 | 0.0 39 | 0.0 40 | 0.0 41 | 0.0 42 | 0.0 43 | 0.0 44 | 0.0 45 | 1.0 46 | 0.0 47 | 0.0 48 | 0.0 49 | 1.0 50 | 0.0 51 | 0.0 52 | 0.0 53 | 0.0 54 | 1.0 55 | 1.0 56 | 0.0 57 | 0.0 58 | 0.0 59 | 1.0 60 | 1.0 61 | 0.0 62 | 0.0 63 | 0.0 64 | 0.0 65 | 0.0 66 | 0.0 67 | 0.0 68 | 0.0 69 | 0.0 70 | 1.0 71 | 0.0 72 | 0.0 73 | 0.0 74 | 0.0 75 | 1.0 76 | 0.0 77 | 0.0 78 | 1.0 79 | 0.0 80 | 0.0 81 | 0.0 82 | 0.0 83 | 0.0 84 | 0.0 85 | 0.0 86 | 0.0 87 | 0.0 88 | 0.0 89 | 1.0 90 | 0.0 91 | 0.0 92 | 0.0 93 | 1.0 94 | 0.0 95 | 0.0 96 | 0.0 97 | 1.0 98 | 0.0 99 | 0.0 100 | 0.0 101 | 1.0 102 | 0.0 103 | 0.0 104 | 0.0 105 | 0.0 106 | 0.0 107 | 0.0 108 | 0.0 109 | 0.0 110 | 0.0 111 | 0.0 112 | 0.0 113 | 1.0 114 | 0.0 115 | 1.0 116 | 0.0 117 | 1.0 118 | 0.0 119 | 0.0 120 | 0.0 121 | 0.0 122 | 0.0 123 | 1.0 124 | 0.0 125 | 0.0 126 | 0.0 127 | 0.0 128 | 1.0 129 | 0.0 130 | 0.0 131 | 0.0 132 | 0.0 133 | 1.0 134 | 1.0 135 | 0.0 136 | 0.0 137 | 0.0 138 | 0.0 139 | 0.0 140 | 0.0 141 | 0.0 142 | 1.0 143 | 0.0 144 | 0.0 145 | 0.0 146 | 0.0 147 | 0.0 148 | 0.0 149 | 0.0 150 | 0.0 151 | 1.0 152 | 0.0 153 | 0.0 154 | 0.0 155 | 0.0 156 | 0.0 157 | 1.0 158 | 0.0 159 | 0.0 160 | 0.0 161 | 0.0 162 | 0.0 163 | 0.0 164 | 0.0 165 | 0.0 166 | 0.0 167 | 0.0 168 | 0.0 169 | 0.0 170 | 0.0 171 | 0.0 172 | 0.0 173 | 0.0 174 | 0.0 175 | 0.0 176 | 0.0 177 | 0.0 178 | 0.0 179 | 0.0 180 | 1.0 181 | 0.0 182 | 0.0 183 | 1.0 184 | 0.0 185 | 1.0 186 | 0.0 187 | 0.0 188 | 0.0 189 | 0.0 190 | 0.0 191 | 0.0 192 | 0.0 193 | 0.0 194 | 0.0 195 | 0.0 196 | 0.0 197 | 0.0 198 | 0.0 199 | 0.0 200 | 1.0 201 | 0.0 202 | 1.0 203 | 0.0 204 | 0.0 205 | 0.0 206 | 0.0 207 | 0.0 208 | 0.0 209 | 1.0 210 | 0.0 211 | 0.0 212 | 0.0 213 | 0.0 214 | 0.0 215 | 0.0 216 | 0.0 217 | 1.0 218 | 0.0 219 | 1.0 220 | 0.0 221 | 0.0 222 | 0.0 223 | 0.0 224 | 0.0 225 | 1.0 226 | 0.0 227 | 0.0 228 | 1.0 229 | 0.0 230 | 0.0 231 | 0.0 232 | 1.0 233 | 0.0 234 | 0.0 235 | 0.0 236 | 0.0 237 | 0.0 238 | 0.0 239 | 0.0 240 | 1.0 241 | 1.0 242 | 0.0 243 | 0.0 244 | 0.0 245 | 1.0 246 | 0.0 247 | 0.0 248 | 0.0 249 | 0.0 250 | 0.0 251 | 0.0 252 | 0.0 253 | 0.0 254 | 0.0 255 | 0.0 256 | 0.0 257 | 0.0 258 | 0.0 259 | 0.0 260 | 0.0 261 | 0.0 262 | 0.0 263 | 0.0 264 | 0.0 265 | 0.0 266 | 0.0 267 | 1.0 268 | 0.0 269 | 1.0 270 | 0.0 271 | 0.0 272 | 0.0 273 | 1.0 274 | 0.0 275 | 1.0 276 | 0.0 277 | 0.0 278 | 0.0 279 | 0.0 280 | 0.0 281 | 0.0 282 | 0.0 283 | 1.0 284 | 0.0 285 | 0.0 286 | 0.0 287 | 1.0 288 | 0.0 289 | 0.0 290 | 0.0 291 | 0.0 292 | 0.0 293 | 0.0 294 | 0.0 295 | 0.0 296 | 0.0 297 | 0.0 298 | 1.0 299 | 0.0 300 | 0.0 301 | 0.0 302 | 1.0 303 | 0.0 304 | 0.0 305 | 1.0 306 | 1.0 307 | 0.0 308 | 0.0 309 | 0.0 310 | 0.0 311 | 0.0 312 | 0.0 313 | 0.0 314 | 0.0 315 | 1.0 316 | 0.0 317 | 0.0 318 | 0.0 319 | 0.0 320 | 0.0 321 | 0.0 322 | 0.0 323 | 0.0 324 | 0.0 325 | 1.0 326 | 0.0 327 | 0.0 328 | 0.0 329 | 0.0 330 | 0.0 331 | 0.0 332 | 0.0 333 | 1.0 334 | 0.0 335 | 0.0 336 | 0.0 337 | 0.0 338 | 0.0 339 | 0.0 340 | 0.0 341 | 0.0 342 | 0.0 343 | 0.0 344 | 1.0 345 | 0.0 346 | 0.0 347 | 0.0 348 | 0.0 349 | 0.0 350 | 0.0 351 | 1.0 352 | 0.0 353 | 0.0 354 | 0.0 355 | 0.0 356 | 0.0 357 | 1.0 358 | 0.0 359 | 0.0 360 | 0.0 361 | 0.0 362 | 0.0 363 | 0.0 364 | 0.0 365 | 1.0 366 | 0.0 367 | 0.0 368 | 0.0 369 | 1.0 370 | 0.0 371 | 0.0 372 | 1.0 373 | 0.0 374 | 0.0 375 | 1.0 376 | 1.0 377 | 0.0 378 | 0.0 379 | 0.0 380 | 0.0 381 | 0.0 382 | 0.0 383 | 0.0 384 | 0.0 385 | 0.0 386 | 0.0 387 | 0.0 388 | 0.0 389 | 0.0 390 | 0.0 391 | 0.0 392 | 1.0 393 | 0.0 394 | 0.0 395 | 0.0 396 | 1.0 397 | 0.0 398 | 1.0 399 | 0.0 400 | 0.0 401 | 1.0 402 | 0.0 403 | 1.0 404 | 0.0 405 | 0.0 406 | 0.0 407 | 0.0 408 | 0.0 409 | 0.0 410 | 0.0 411 | 1.0 412 | 1.0 413 | 0.0 414 | 0.0 415 | 1.0 416 | 0.0 417 | 0.0 418 | 0.0 419 | -------------------------------------------------------------------------------- /titanic/data/result4.csv: -------------------------------------------------------------------------------- 1 | 1.0 2 | 0.0 3 | 1.0 4 | 1.0 5 | 0.0 6 | 1.0 7 | 0.0 8 | 1.0 9 | 0.0 10 | 1.0 11 | 1.0 12 | 1.0 13 | 1.0 14 | 1.0 15 | 1.0 16 | 0.0 17 | 1.0 18 | 1.0 19 | 0.0 20 | 0.0 21 | 1.0 22 | 1.0 23 | 1.0 24 | 1.0 25 | 1.0 26 | 1.0 27 | 1.0 28 | 1.0 29 | 1.0 30 | 1.0 31 | 1.0 32 | 1.0 33 | 0.0 34 | 0.0 35 | 1.0 36 | 1.0 37 | 1.0 38 | 0.0 39 | 1.0 40 | 1.0 41 | 1.0 42 | 0.0 43 | 1.0 44 | 0.0 45 | 1.0 46 | 1.0 47 | 1.0 48 | 0.0 49 | 1.0 50 | 0.0 51 | 1.0 52 | 1.0 53 | 0.0 54 | 1.0 55 | 1.0 56 | 1.0 57 | 1.0 58 | 1.0 59 | 0.0 60 | 1.0 61 | 1.0 62 | 1.0 63 | 1.0 64 | 0.0 65 | 1.0 66 | 0.0 67 | 0.0 68 | 1.0 69 | 1.0 70 | 1.0 71 | 0.0 72 | 1.0 73 | 0.0 74 | 1.0 75 | 1.0 76 | 1.0 77 | 0.0 78 | 1.0 79 | 1.0 80 | 0.0 81 | 1.0 82 | 1.0 83 | 1.0 84 | 1.0 85 | 0.0 86 | 0.0 87 | 0.0 88 | 0.0 89 | 1.0 90 | 1.0 91 | 0.0 92 | 1.0 93 | 1.0 94 | 0.0 95 | 1.0 96 | 1.0 97 | 1.0 98 | 1.0 99 | 0.0 100 | 1.0 101 | 1.0 102 | 1.0 103 | 0.0 104 | 1.0 105 | 0.0 106 | 1.0 107 | 1.0 108 | 0.0 109 | 0.0 110 | 1.0 111 | 1.0 112 | 0.0 113 | 1.0 114 | 0.0 115 | 1.0 116 | 1.0 117 | 0.0 118 | 0.0 119 | 1.0 120 | 0.0 121 | 0.0 122 | 0.0 123 | 1.0 124 | 1.0 125 | 0.0 126 | 0.0 127 | 1.0 128 | 0.0 129 | 1.0 130 | 1.0 131 | 1.0 132 | 1.0 133 | 0.0 134 | 0.0 135 | 1.0 136 | 1.0 137 | 1.0 138 | 1.0 139 | 0.0 140 | 1.0 141 | 0.0 142 | 1.0 143 | 1.0 144 | 1.0 145 | 1.0 146 | 1.0 147 | 0.0 148 | 1.0 149 | 0.0 150 | 1.0 151 | 1.0 152 | 1.0 153 | 1.0 154 | 0.0 155 | 1.0 156 | 1.0 157 | 1.0 158 | 0.0 159 | 1.0 160 | 0.0 161 | 1.0 162 | 1.0 163 | 0.0 164 | 1.0 165 | 1.0 166 | 0.0 167 | 1.0 168 | 1.0 169 | 0.0 170 | 0.0 171 | 1.0 172 | 1.0 173 | 1.0 174 | 0.0 175 | 1.0 176 | 0.0 177 | 0.0 178 | 1.0 179 | 0.0 180 | 1.0 181 | 1.0 182 | 1.0 183 | 1.0 184 | 0.0 185 | 1.0 186 | 1.0 187 | 0.0 188 | 1.0 189 | 0.0 190 | 1.0 191 | 1.0 192 | 0.0 193 | 1.0 194 | 1.0 195 | 1.0 196 | 1.0 197 | 1.0 198 | 0.0 199 | 1.0 200 | 1.0 201 | 1.0 202 | 1.0 203 | 1.0 204 | 0.0 205 | 1.0 206 | 0.0 207 | 0.0 208 | 1.0 209 | 1.0 210 | 1.0 211 | 1.0 212 | 0.0 213 | 1.0 214 | 0.0 215 | 0.0 216 | 1.0 217 | 0.0 218 | 1.0 219 | 1.0 220 | 0.0 221 | 0.0 222 | 1.0 223 | 0.0 224 | 1.0 225 | 1.0 226 | 0.0 227 | 1.0 228 | 1.0 229 | 1.0 230 | 1.0 231 | 1.0 232 | 1.0 233 | 1.0 234 | 1.0 235 | 1.0 236 | 1.0 237 | 1.0 238 | 1.0 239 | 0.0 240 | 1.0 241 | 1.0 242 | 0.0 243 | 1.0 244 | 1.0 245 | 1.0 246 | 1.0 247 | 0.0 248 | 1.0 249 | 0.0 250 | 0.0 251 | 0.0 252 | 1.0 253 | 1.0 254 | 1.0 255 | 1.0 256 | 1.0 257 | 0.0 258 | 1.0 259 | 0.0 260 | 1.0 261 | 1.0 262 | 1.0 263 | 0.0 264 | 0.0 265 | 1.0 266 | 1.0 267 | 0.0 268 | 1.0 269 | 1.0 270 | 1.0 271 | 1.0 272 | 0.0 273 | 1.0 274 | 1.0 275 | 0.0 276 | 0.0 277 | 1.0 278 | 1.0 279 | 1.0 280 | 1.0 281 | 0.0 282 | 0.0 283 | 1.0 284 | 0.0 285 | 0.0 286 | 1.0 287 | 0.0 288 | 1.0 289 | 0.0 290 | 0.0 291 | 0.0 292 | 0.0 293 | 0.0 294 | 1.0 295 | 1.0 296 | 1.0 297 | 0.0 298 | 1.0 299 | 1.0 300 | 1.0 301 | 1.0 302 | 1.0 303 | 1.0 304 | 1.0 305 | 1.0 306 | 1.0 307 | 1.0 308 | 0.0 309 | 1.0 310 | 0.0 311 | 1.0 312 | 1.0 313 | 1.0 314 | 0.0 315 | 1.0 316 | 0.0 317 | 1.0 318 | 1.0 319 | 1.0 320 | 1.0 321 | 1.0 322 | 1.0 323 | 1.0 324 | 1.0 325 | 1.0 326 | 1.0 327 | 0.0 328 | 1.0 329 | 1.0 330 | 1.0 331 | 0.0 332 | 1.0 333 | 0.0 334 | 0.0 335 | 1.0 336 | 1.0 337 | 1.0 338 | 1.0 339 | 1.0 340 | 0.0 341 | 1.0 342 | 1.0 343 | 0.0 344 | 1.0 345 | 0.0 346 | 0.0 347 | 1.0 348 | 0.0 349 | 1.0 350 | 0.0 351 | 1.0 352 | 1.0 353 | 1.0 354 | 1.0 355 | 0.0 356 | 1.0 357 | 1.0 358 | 1.0 359 | 0.0 360 | 0.0 361 | 1.0 362 | 0.0 363 | 0.0 364 | 1.0 365 | 1.0 366 | 0.0 367 | 0.0 368 | 0.0 369 | 1.0 370 | 1.0 371 | 1.0 372 | 1.0 373 | 0.0 374 | 1.0 375 | 1.0 376 | 1.0 377 | 0.0 378 | 1.0 379 | 1.0 380 | 1.0 381 | 0.0 382 | 1.0 383 | 0.0 384 | 0.0 385 | 1.0 386 | 0.0 387 | 1.0 388 | 1.0 389 | 1.0 390 | 1.0 391 | 1.0 392 | 1.0 393 | 1.0 394 | 1.0 395 | 1.0 396 | 1.0 397 | 1.0 398 | 1.0 399 | 1.0 400 | 1.0 401 | 1.0 402 | 1.0 403 | 1.0 404 | 1.0 405 | 1.0 406 | 1.0 407 | 1.0 408 | 1.0 409 | 1.0 410 | 0.0 411 | 1.0 412 | 1.0 413 | 0.0 414 | 0.0 415 | 1.0 416 | 1.0 417 | 0.0 418 | 0.0 419 | -------------------------------------------------------------------------------- /titanic/data/test.csv: -------------------------------------------------------------------------------- 1 | pclass,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked 2 | 3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q 3 | 3,"Wilkes, Mrs. James (Ellen Needs)",female,47,1,0,363272,7,,S 4 | 2,"Myles, Mr. Thomas Francis",male,62,0,0,240276,9.6875,,Q 5 | 3,"Wirz, Mr. Albert",male,27,0,0,315154,8.6625,,S 6 | 3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22,1,1,3101298,12.2875,,S 7 | 3,"Svensson, Mr. Johan Cervin",male,14,0,0,7538,9.225,,S 8 | 3,"Connolly, Miss. Kate",female,30,0,0,330972,7.6292,,Q 9 | 2,"Caldwell, Mr. Albert Francis",male,26,1,1,248738,29,,S 10 | 3,"Abrahim, Mrs. Joseph (Sophie Halaut Easu)",female,18,0,0,2657,7.2292,,C 11 | 3,"Davies, Mr. John Samuel",male,21,2,0,A/4 48871,24.15,,S 12 | 3,"Ilieff, Mr. Ylio",male,,0,0,349220,7.8958,,S 13 | 1,"Jones, Mr. Charles Cresson",male,46,0,0,694,26,,S 14 | 1,"Snyder, Mrs. John Pillsbury (Nelle Stevenson)",female,23,1,0,21228,82.2667,B45,S 15 | 2,"Howard, Mr. Benjamin",male,63,1,0,24065,26,,S 16 | 1,"Chaffee, Mrs. Herbert Fuller (Carrie Constance Toogood)",female,47,1,0,W.E.P. 5734,61.175,E31,S 17 | 2,"del Carlo, Mrs. Sebastiano (Argenia Genovesi)",female,24,1,0,SC/PARIS 2167,27.7208,,C 18 | 2,"Keane, Mr. Daniel",male,35,0,0,233734,12.35,,Q 19 | 3,"Assaf, Mr. Gerios",male,21,0,0,2692,7.225,,C 20 | 3,"Ilmakangas, Miss. Ida Livija",female,27,1,0,STON/O2. 3101270,7.925,,S 21 | 3,"Assaf Khalil, Mrs. Mariana (Miriam"")""",female,45,0,0,2696,7.225,,C 22 | 1,"Rothschild, Mr. Martin",male,55,1,0,PC 17603,59.4,,C 23 | 3,"Olsen, Master. Artur Karl",male,9,0,1,C 17368,3.1708,,S 24 | 1,"Flegenheim, Mrs. Alfred (Antoinette)",female,,0,0,PC 17598,31.6833,,S 25 | 1,"Williams, Mr. Richard Norris II",male,21,0,1,PC 17597,61.3792,,C 26 | 1,"Ryerson, Mrs. Arthur Larned (Emily Maria Borie)",female,48,1,3,PC 17608,262.375,B57 B59 B63 B66,C 27 | 3,"Robins, Mr. Alexander A",male,50,1,0,A/5. 3337,14.5,,S 28 | 1,"Ostby, Miss. Helene Ragnhild",female,22,0,1,113509,61.9792,B36,C 29 | 3,"Daher, Mr. Shedid",male,22.5,0,0,2698,7.225,,C 30 | 1,"Brady, Mr. John Bertram",male,41,0,0,113054,30.5,A21,S 31 | 3,"Samaan, Mr. Elias",male,,2,0,2662,21.6792,,C 32 | 2,"Louch, Mr. Charles Alexander",male,50,1,0,SC/AH 3085,26,,S 33 | 2,"Jefferys, Mr. Clifford Thomas",male,24,2,0,C.A. 31029,31.5,,S 34 | 3,"Dean, Mrs. Bertram (Eva Georgetta Light)",female,33,1,2,C.A. 2315,20.575,,S 35 | 3,"Johnston, Mrs. Andrew G (Elizabeth Lily"" Watson)""",female,,1,2,W./C. 6607,23.45,,S 36 | 1,"Mock, Mr. Philipp Edmund",male,30,1,0,13236,57.75,C78,C 37 | 3,"Katavelas, Mr. Vassilios (Catavelas Vassilios"")""",male,18.5,0,0,2682,7.2292,,C 38 | 3,"Roth, Miss. Sarah A",female,,0,0,342712,8.05,,S 39 | 3,"Cacic, Miss. Manda",female,21,0,0,315087,8.6625,,S 40 | 3,"Sap, Mr. Julius",male,25,0,0,345768,9.5,,S 41 | 3,"Hee, Mr. Ling",male,,0,0,1601,56.4958,,S 42 | 3,"Karun, Mr. Franz",male,39,0,1,349256,13.4167,,C 43 | 1,"Franklin, Mr. Thomas Parham",male,,0,0,113778,26.55,D34,S 44 | 3,"Goldsmith, Mr. Nathan",male,41,0,0,SOTON/O.Q. 3101263,7.85,,S 45 | 2,"Corbett, Mrs. Walter H (Irene Colvin)",female,30,0,0,237249,13,,S 46 | 1,"Kimball, Mrs. Edwin Nelson Jr (Gertrude Parsons)",female,45,1,0,11753,52.5542,D19,S 47 | 3,"Peltomaki, Mr. Nikolai Johannes",male,25,0,0,STON/O 2. 3101291,7.925,,S 48 | 1,"Chevre, Mr. Paul Romaine",male,45,0,0,PC 17594,29.7,A9,C 49 | 3,"Shaughnessy, Mr. Patrick",male,,0,0,370374,7.75,,Q 50 | 1,"Bucknell, Mrs. William Robert (Emma Eliza Ward)",female,60,0,0,11813,76.2917,D15,C 51 | 3,"Coutts, Mrs. William (Winnie Minnie"" Treanor)""",female,36,0,2,C.A. 37671,15.9,,S 52 | 1,"Smith, Mr. Lucien Philip",male,24,1,0,13695,60,C31,S 53 | 2,"Pulbaum, Mr. Franz",male,27,0,0,SC/PARIS 2168,15.0333,,C 54 | 2,"Hocking, Miss. Ellen Nellie""""",female,20,2,1,29105,23,,S 55 | 1,"Fortune, Miss. Ethel Flora",female,28,3,2,19950,263,C23 C25 C27,S 56 | 2,"Mangiavacchi, Mr. Serafino Emilio",male,,0,0,SC/A.3 2861,15.5792,,C 57 | 3,"Rice, Master. Albert",male,10,4,1,382652,29.125,,Q 58 | 3,"Cor, Mr. Bartol",male,35,0,0,349230,7.8958,,S 59 | 3,"Abelseth, Mr. Olaus Jorgensen",male,25,0,0,348122,7.65,F G63,S 60 | 3,"Davison, Mr. Thomas Henry",male,,1,0,386525,16.1,,S 61 | 1,"Chaudanson, Miss. Victorine",female,36,0,0,PC 17608,262.375,B61,C 62 | 3,"Dika, Mr. Mirko",male,17,0,0,349232,7.8958,,S 63 | 2,"McCrae, Mr. Arthur Gordon",male,32,0,0,237216,13.5,,S 64 | 3,"Bjorklund, Mr. Ernst Herbert",male,18,0,0,347090,7.75,,S 65 | 3,"Bradley, Miss. Bridget Delia",female,22,0,0,334914,7.725,,Q 66 | 1,"Ryerson, Master. John Borie",male,13,2,2,PC 17608,262.375,B57 B59 B63 B66,C 67 | 2,"Corey, Mrs. Percy C (Mary Phyllis Elizabeth Miller)",female,,0,0,F.C.C. 13534,21,,S 68 | 3,"Burns, Miss. Mary Delia",female,18,0,0,330963,7.8792,,Q 69 | 1,"Moore, Mr. Clarence Bloomfield",male,47,0,0,113796,42.4,,S 70 | 1,"Tucker, Mr. Gilbert Milligan Jr",male,31,0,0,2543,28.5375,C53,C 71 | 1,"Fortune, Mrs. Mark (Mary McDougald)",female,60,1,4,19950,263,C23 C25 C27,S 72 | 3,"Mulvihill, Miss. Bertha E",female,24,0,0,382653,7.75,,Q 73 | 3,"Minkoff, Mr. Lazar",male,21,0,0,349211,7.8958,,S 74 | 3,"Nieminen, Miss. Manta Josefina",female,29,0,0,3101297,7.925,,S 75 | 1,"Ovies y Rodriguez, Mr. Servando",male,28.5,0,0,PC 17562,27.7208,D43,C 76 | 1,"Geiger, Miss. Amalie",female,35,0,0,113503,211.5,C130,C 77 | 1,"Keeping, Mr. Edwin",male,32.5,0,0,113503,211.5,C132,C 78 | 3,"Miles, Mr. Frank",male,,0,0,359306,8.05,,S 79 | 1,"Cornell, Mrs. Robert Clifford (Malvina Helen Lamson)",female,55,2,0,11770,25.7,C101,S 80 | 2,"Aldworth, Mr. Charles Augustus",male,30,0,0,248744,13,,S 81 | 3,"Doyle, Miss. Elizabeth",female,24,0,0,368702,7.75,,Q 82 | 3,"Boulos, Master. Akar",male,6,1,1,2678,15.2458,,C 83 | 1,"Straus, Mr. Isidor",male,67,1,0,PC 17483,221.7792,C55 C57,S 84 | 1,"Case, Mr. Howard Brown",male,49,0,0,19924,26,,S 85 | 3,"Demetri, Mr. Marinko",male,,0,0,349238,7.8958,,S 86 | 2,"Lamb, Mr. John Joseph",male,,0,0,240261,10.7083,,Q 87 | 3,"Khalil, Mr. Betros",male,,1,0,2660,14.4542,,C 88 | 3,"Barry, Miss. Julia",female,27,0,0,330844,7.8792,,Q 89 | 3,"Badman, Miss. Emily Louisa",female,18,0,0,A/4 31416,8.05,,S 90 | 3,"O'Donoghue, Ms. Bridget",female,,0,0,364856,7.75,,Q 91 | 2,"Wells, Master. Ralph Lester",male,2,1,1,29103,23,,S 92 | 3,"Dyker, Mrs. Adolf Fredrik (Anna Elisabeth Judith Andersson)",female,22,1,0,347072,13.9,,S 93 | 3,"Pedersen, Mr. Olaf",male,,0,0,345498,7.775,,S 94 | 1,"Davidson, Mrs. Thornton (Orian Hays)",female,27,1,2,F.C. 12750,52,B71,S 95 | 3,"Guest, Mr. Robert",male,,0,0,376563,8.05,,S 96 | 1,"Birnbaum, Mr. Jakob",male,25,0,0,13905,26,,C 97 | 3,"Tenglin, Mr. Gunnar Isidor",male,25,0,0,350033,7.7958,,S 98 | 1,"Cavendish, Mrs. Tyrell William (Julia Florence Siegel)",female,76,1,0,19877,78.85,C46,S 99 | 3,"Makinen, Mr. Kalle Edvard",male,29,0,0,STON/O 2. 3101268,7.925,,S 100 | 3,"Braf, Miss. Elin Ester Maria",female,20,0,0,347471,7.8542,,S 101 | 3,"Nancarrow, Mr. William Henry",male,33,0,0,A./5. 3338,8.05,,S 102 | 1,"Stengel, Mrs. Charles Emil Henry (Annie May Morris)",female,43,1,0,11778,55.4417,C116,C 103 | 2,"Weisz, Mr. Leopold",male,27,1,0,228414,26,,S 104 | 3,"Foley, Mr. William",male,,0,0,365235,7.75,,Q 105 | 3,"Johansson Palmquist, Mr. Oskar Leander",male,26,0,0,347070,7.775,,S 106 | 3,"Thomas, Mrs. Alexander (Thamine Thelma"")""",female,16,1,1,2625,8.5167,,C 107 | 3,"Holthen, Mr. Johan Martin",male,28,0,0,C 4001,22.525,,S 108 | 3,"Buckley, Mr. Daniel",male,21,0,0,330920,7.8208,,Q 109 | 3,"Ryan, Mr. Edward",male,,0,0,383162,7.75,,Q 110 | 3,"Willer, Mr. Aaron (Abi Weller"")""",male,,0,0,3410,8.7125,,S 111 | 2,"Swane, Mr. George",male,18.5,0,0,248734,13,F,S 112 | 2,"Stanton, Mr. Samuel Ward",male,41,0,0,237734,15.0458,,C 113 | 3,"Shine, Miss. Ellen Natalia",female,,0,0,330968,7.7792,,Q 114 | 1,"Evans, Miss. Edith Corse",female,36,0,0,PC 17531,31.6792,A29,C 115 | 3,"Buckley, Miss. Katherine",female,18.5,0,0,329944,7.2833,,Q 116 | 1,"Straus, Mrs. Isidor (Rosalie Ida Blun)",female,63,1,0,PC 17483,221.7792,C55 C57,S 117 | 3,"Chronopoulos, Mr. Demetrios",male,18,1,0,2680,14.4542,,C 118 | 3,"Thomas, Mr. John",male,,0,0,2681,6.4375,,C 119 | 3,"Sandstrom, Miss. Beatrice Irene",female,1,1,1,PP 9549,16.7,G6,S 120 | 1,"Beattie, Mr. Thomson",male,36,0,0,13050,75.2417,C6,C 121 | 2,"Chapman, Mrs. John Henry (Sara Elizabeth Lawry)",female,29,1,0,SC/AH 29037,26,,S 122 | 2,"Watt, Miss. Bertha J",female,12,0,0,C.A. 33595,15.75,,S 123 | 3,"Kiernan, Mr. John",male,,1,0,367227,7.75,,Q 124 | 1,"Schabert, Mrs. Paul (Emma Mock)",female,35,1,0,13236,57.75,C28,C 125 | 3,"Carver, Mr. Alfred John",male,28,0,0,392095,7.25,,S 126 | 3,"Kennedy, Mr. John",male,,0,0,368783,7.75,,Q 127 | 3,"Cribb, Miss. Laura Alice",female,17,0,1,371362,16.1,,S 128 | 3,"Brobeck, Mr. Karl Rudolf",male,22,0,0,350045,7.7958,,S 129 | 3,"McCoy, Miss. Alicia",female,,2,0,367226,23.25,,Q 130 | 2,"Bowenur, Mr. Solomon",male,42,0,0,211535,13,,S 131 | 3,"Petersen, Mr. Marius",male,24,0,0,342441,8.05,,S 132 | 3,"Spinner, Mr. Henry John",male,32,0,0,STON/OQ. 369943,8.05,,S 133 | 1,"Gracie, Col. Archibald IV",male,53,0,0,113780,28.5,C51,C 134 | 3,"Lefebre, Mrs. Frank (Frances)",female,,0,4,4133,25.4667,,S 135 | 3,"Thomas, Mr. Charles P",male,,1,0,2621,6.4375,,C 136 | 3,"Dintcheff, Mr. Valtcho",male,43,0,0,349226,7.8958,,S 137 | 3,"Carlsson, Mr. Carl Robert",male,24,0,0,350409,7.8542,,S 138 | 3,"Zakarian, Mr. Mapriededer",male,26.5,0,0,2656,7.225,,C 139 | 2,"Schmidt, Mr. August",male,26,0,0,248659,13,,S 140 | 3,"Drapkin, Miss. Jennie",female,23,0,0,SOTON/OQ 392083,8.05,,S 141 | 3,"Goodwin, Mr. Charles Frederick",male,40,1,6,CA 2144,46.9,,S 142 | 3,"Goodwin, Miss. Jessie Allis",female,10,5,2,CA 2144,46.9,,S 143 | 1,"Daniels, Miss. Sarah",female,33,0,0,113781,151.55,,S 144 | 1,"Ryerson, Mr. Arthur Larned",male,61,1,3,PC 17608,262.375,B57 B59 B63 B66,C 145 | 2,"Beauchamp, Mr. Henry James",male,28,0,0,244358,26,,S 146 | 1,"Lindeberg-Lind, Mr. Erik Gustaf (Mr Edward Lingrey"")""",male,42,0,0,17475,26.55,,S 147 | 3,"Vander Planke, Mr. Julius",male,31,3,0,345763,18,,S 148 | 1,"Hilliard, Mr. Herbert Henry",male,,0,0,17463,51.8625,E46,S 149 | 3,"Davies, Mr. Evan",male,22,0,0,SC/A4 23568,8.05,,S 150 | 1,"Crafton, Mr. John Bertram",male,,0,0,113791,26.55,,S 151 | 2,"Lahtinen, Rev. William",male,30,1,1,250651,26,,S 152 | 1,"Earnshaw, Mrs. Boulton (Olive Potter)",female,23,0,1,11767,83.1583,C54,C 153 | 3,"Matinoff, Mr. Nicola",male,,0,0,349255,7.8958,,C 154 | 3,"Storey, Mr. Thomas",male,60.5,0,0,3701,,,S 155 | 3,"Klasen, Mrs. (Hulda Kristina Eugenia Lofqvist)",female,36,0,2,350405,12.1833,,S 156 | 3,"Asplund, Master. Filip Oscar",male,13,4,2,347077,31.3875,,S 157 | 3,"Duquemin, Mr. Joseph",male,24,0,0,S.O./P.P. 752,7.55,,S 158 | 1,"Bird, Miss. Ellen",female,29,0,0,PC 17483,221.7792,C97,S 159 | 3,"Lundin, Miss. Olga Elida",female,23,0,0,347469,7.8542,,S 160 | 1,"Borebank, Mr. John James",male,42,0,0,110489,26.55,D22,S 161 | 3,"Peacock, Mrs. Benjamin (Edith Nile)",female,26,0,2,SOTON/O.Q. 3101315,13.775,,S 162 | 3,"Smyth, Miss. Julia",female,,0,0,335432,7.7333,,Q 163 | 3,"Touma, Master. Georges Youssef",male,7,1,1,2650,15.2458,,C 164 | 2,"Wright, Miss. Marion",female,26,0,0,220844,13.5,,S 165 | 3,"Pearce, Mr. Ernest",male,,0,0,343271,7,,S 166 | 2,"Peruschitz, Rev. Joseph Maria",male,41,0,0,237393,13,,S 167 | 3,"Kink-Heilmann, Mrs. Anton (Luise Heilmann)",female,26,1,1,315153,22.025,,S 168 | 1,"Brandeis, Mr. Emil",male,48,0,0,PC 17591,50.4958,B10,C 169 | 3,"Ford, Mr. Edward Watson",male,18,2,2,W./C. 6608,34.375,,S 170 | 1,"Cassebeer, Mrs. Henry Arthur Jr (Eleanor Genevieve Fosdick)",female,,0,0,17770,27.7208,,C 171 | 3,"Hellstrom, Miss. Hilda Maria",female,22,0,0,7548,8.9625,,S 172 | 3,"Lithman, Mr. Simon",male,,0,0,S.O./P.P. 251,7.55,,S 173 | 3,"Zakarian, Mr. Ortin",male,27,0,0,2670,7.225,,C 174 | 3,"Dyker, Mr. Adolf Fredrik",male,23,1,0,347072,13.9,,S 175 | 3,"Torfa, Mr. Assad",male,,0,0,2673,7.2292,,C 176 | 3,"Asplund, Mr. Carl Oscar Vilhelm Gustafsson",male,40,1,5,347077,31.3875,,S 177 | 2,"Brown, Miss. Edith Eileen",female,15,0,2,29750,39,,S 178 | 2,"Sincock, Miss. Maude",female,20,0,0,C.A. 33112,36.75,,S 179 | 1,"Stengel, Mr. Charles Emil Henry",male,54,1,0,11778,55.4417,C116,C 180 | 2,"Becker, Mrs. Allen Oliver (Nellie E Baumgardner)",female,36,0,3,230136,39,F4,S 181 | 1,"Compton, Mrs. Alexander Taylor (Mary Eliza Ingersoll)",female,64,0,2,PC 17756,83.1583,E45,C 182 | 2,"McCrie, Mr. James Matthew",male,30,0,0,233478,13,,S 183 | 1,"Compton, Mr. Alexander Taylor Jr",male,37,1,1,PC 17756,83.1583,E52,C 184 | 1,"Marvin, Mrs. Daniel Warner (Mary Graham Carmichael Farquarson)",female,18,1,0,113773,53.1,D30,S 185 | 3,"Lane, Mr. Patrick",male,,0,0,7935,7.75,,Q 186 | 1,"Douglas, Mrs. Frederick Charles (Mary Helene Baxter)",female,27,1,1,PC 17558,247.5208,B58 B60,C 187 | 2,"Maybery, Mr. Frank Hubert",male,40,0,0,239059,16,,S 188 | 2,"Phillips, Miss. Alice Frances Louisa",female,21,0,1,S.O./P.P. 2,21,,S 189 | 3,"Davies, Mr. Joseph",male,17,2,0,A/4 48873,8.05,,S 190 | 3,"Sage, Miss. Ada",female,,8,2,CA. 2343,69.55,,S 191 | 2,"Veal, Mr. James",male,40,0,0,28221,13,,S 192 | 2,"Angle, Mr. William A",male,34,1,0,226875,26,,S 193 | 1,"Salomon, Mr. Abraham L",male,,0,0,111163,26,,S 194 | 3,"van Billiard, Master. Walter John",male,11.5,1,1,A/5. 851,14.5,,S 195 | 2,"Lingane, Mr. John",male,61,0,0,235509,12.35,,Q 196 | 2,"Drew, Master. Marshall Brines",male,8,0,2,28220,32.5,,S 197 | 3,"Karlsson, Mr. Julius Konrad Eugen",male,33,0,0,347465,7.8542,,S 198 | 1,"Spedden, Master. Robert Douglas",male,6,0,2,16966,134.5,E34,C 199 | 3,"Nilsson, Miss. Berta Olivia",female,18,0,0,347066,7.775,,S 200 | 2,"Baimbrigge, Mr. Charles Robert",male,23,0,0,C.A. 31030,10.5,,S 201 | 3,"Rasmussen, Mrs. (Lena Jacobsen Solvang)",female,,0,0,65305,8.1125,,S 202 | 3,"Murphy, Miss. Nora",female,,0,0,36568,15.5,,Q 203 | 3,"Danbom, Master. Gilbert Sigvard Emanuel",male,0.33,0,2,347080,14.4,,S 204 | 1,"Astor, Col. John Jacob",male,47,1,0,PC 17757,227.525,C62 C64,C 205 | 2,"Quick, Miss. Winifred Vera",female,8,1,1,26360,26,,S 206 | 2,"Andrew, Mr. Frank Thomas",male,25,0,0,C.A. 34050,10.5,,S 207 | 1,"Omont, Mr. Alfred Fernand",male,,0,0,F.C. 12998,25.7417,,C 208 | 3,"McGowan, Miss. Katherine",female,35,0,0,9232,7.75,,Q 209 | 2,"Collett, Mr. Sidney C Stuart",male,24,0,0,28034,10.5,,S 210 | 1,"Rosenbaum, Miss. Edith Louise",female,33,0,0,PC 17613,27.7208,A11,C 211 | 3,"Delalic, Mr. Redjo",male,25,0,0,349250,7.8958,,S 212 | 3,"Andersen, Mr. Albert Karvin",male,32,0,0,C 4001,22.525,,S 213 | 3,"Finoli, Mr. Luigi",male,,0,0,SOTON/O.Q. 3101308,7.05,,S 214 | 2,"Deacon, Mr. Percy William",male,17,0,0,S.O.C. 14879,73.5,,S 215 | 2,"Howard, Mrs. Benjamin (Ellen Truelove Arman)",female,60,1,0,24065,26,,S 216 | 3,"Andersson, Miss. Ida Augusta Margareta",female,38,4,2,347091,7.775,,S 217 | 1,"Head, Mr. Christopher",male,42,0,0,113038,42.5,B11,S 218 | 3,"Mahon, Miss. Bridget Delia",female,,0,0,330924,7.8792,,Q 219 | 1,"Wick, Mr. George Dennick",male,57,1,1,36928,164.8667,,S 220 | 1,"Widener, Mrs. George Dunton (Eleanor Elkins)",female,50,1,1,113503,211.5,C80,C 221 | 3,"Thomson, Mr. Alexander Morrison",male,,0,0,32302,8.05,,S 222 | 2,"Duran y More, Miss. Florentina",female,30,1,0,SC/PARIS 2148,13.8583,,C 223 | 3,"Reynolds, Mr. Harold J",male,21,0,0,342684,8.05,,S 224 | 2,"Cook, Mrs. (Selena Rogers)",female,22,0,0,W./C. 14266,10.5,F33,S 225 | 3,"Karlsson, Mr. Einar Gervasius",male,21,0,0,350053,7.7958,,S 226 | 1,"Candee, Mrs. Edward (Helen Churchill Hungerford)",female,53,0,0,PC 17606,27.4458,,C 227 | 3,"Moubarek, Mrs. George (Omine Amenia"" Alexander)""",female,,0,2,2661,15.2458,,C 228 | 3,"Asplund, Mr. Johan Charles",male,23,0,0,350054,7.7958,,S 229 | 3,"McNeill, Miss. Bridget",female,,0,0,370368,7.75,,Q 230 | 3,"Everett, Mr. Thomas James",male,40.5,0,0,C.A. 6212,15.1,,S 231 | 2,"Hocking, Mr. Samuel James Metcalfe",male,36,0,0,242963,13,,S 232 | 2,"Sweet, Mr. George Frederick",male,14,0,0,220845,65,,S 233 | 1,"Willard, Miss. Constance",female,21,0,0,113795,26.55,,S 234 | 3,"Wiklund, Mr. Karl Johan",male,21,1,0,3101266,6.4958,,S 235 | 3,"Linehan, Mr. Michael",male,,0,0,330971,7.8792,,Q 236 | 1,"Cumings, Mr. John Bradley",male,39,1,0,PC 17599,71.2833,C85,C 237 | 3,"Vendel, Mr. Olof Edvin",male,20,0,0,350416,7.8542,,S 238 | 1,"Warren, Mr. Frank Manley",male,64,1,0,110813,75.25,D37,C 239 | 3,"Baccos, Mr. Raffull",male,20,0,0,2679,7.225,,C 240 | 2,"Hiltunen, Miss. Marta",female,18,1,1,250650,13,,S 241 | 1,"Douglas, Mrs. Walter Donald (Mahala Dutton)",female,48,1,0,PC 17761,106.425,C86,C 242 | 1,"Lindstrom, Mrs. Carl Johan (Sigrid Posse)",female,55,0,0,112377,27.7208,,C 243 | 2,"Christy, Mrs. (Alice Frances)",female,45,0,2,237789,30,,S 244 | 1,"Spedden, Mr. Frederic Oakley",male,45,1,1,16966,134.5,E34,C 245 | 3,"Hyman, Mr. Abraham",male,,0,0,3470,7.8875,,S 246 | 3,"Johnston, Master. William Arthur Willie""""",male,,1,2,W./C. 6607,23.45,,S 247 | 1,"Kenyon, Mr. Frederick R",male,41,1,0,17464,51.8625,D21,S 248 | 2,"Karnes, Mrs. J Frank (Claire Bennett)",female,22,0,0,F.C.C. 13534,21,,S 249 | 2,"Drew, Mr. James Vivian",male,42,1,1,28220,32.5,,S 250 | 2,"Hold, Mrs. Stephen (Annie Margaret Hill)",female,29,1,0,26707,26,,S 251 | 3,"Khalil, Mrs. Betros (Zahie Maria"" Elias)""",female,,1,0,2660,14.4542,,C 252 | 2,"West, Miss. Barbara J",female,0.92,1,2,C.A. 34651,27.75,,S 253 | 3,"Abrahamsson, Mr. Abraham August Johannes",male,20,0,0,SOTON/O2 3101284,7.925,,S 254 | 1,"Clark, Mr. Walter Miller",male,27,1,0,13508,136.7792,C89,C 255 | 3,"Salander, Mr. Karl Johan",male,24,0,0,7266,9.325,,S 256 | 3,"Wenzel, Mr. Linhart",male,32.5,0,0,345775,9.5,,S 257 | 3,"MacKay, Mr. George William",male,,0,0,C.A. 42795,7.55,,S 258 | 3,"Mahon, Mr. John",male,,0,0,AQ/4 3130,7.75,,Q 259 | 3,"Niklasson, Mr. Samuel",male,28,0,0,363611,8.05,,S 260 | 2,"Bentham, Miss. Lilian W",female,19,0,0,28404,13,,S 261 | 3,"Midtsjo, Mr. Karl Albert",male,21,0,0,345501,7.775,,S 262 | 3,"de Messemaeker, Mr. Guillaume Joseph",male,36.5,1,0,345572,17.4,,S 263 | 3,"Nilsson, Mr. August Ferdinand",male,21,0,0,350410,7.8542,,S 264 | 2,"Wells, Mrs. Arthur Henry (Addie"" Dart Trevaskis)""",female,29,0,2,29103,23,,S 265 | 3,"Klasen, Miss. Gertrud Emilia",female,1,1,1,350405,12.1833,,S 266 | 2,"Portaluppi, Mr. Emilio Ilario Giuseppe",male,30,0,0,C.A. 34644,12.7375,,C 267 | 3,"Lyntakoff, Mr. Stanko",male,,0,0,349235,7.8958,,S 268 | 1,"Chisholm, Mr. Roderick Robert Crispin",male,,0,0,112051,0,,S 269 | 3,"Warren, Mr. Charles William",male,,0,0,C.A. 49867,7.55,,S 270 | 3,"Howard, Miss. May Elizabeth",female,,0,0,A. 2. 39186,8.05,,S 271 | 3,"Pokrnic, Mr. Mate",male,17,0,0,315095,8.6625,,S 272 | 1,"McCaffry, Mr. Thomas Francis",male,46,0,0,13050,75.2417,C6,C 273 | 3,"Fox, Mr. Patrick",male,,0,0,368573,7.75,,Q 274 | 1,"Clark, Mrs. Walter Miller (Virginia McDowell)",female,26,1,0,13508,136.7792,C89,C 275 | 3,"Lennon, Miss. Mary",female,,1,0,370371,15.5,,Q 276 | 3,"Saade, Mr. Jean Nassr",male,,0,0,2676,7.225,,C 277 | 2,"Bryhl, Miss. Dagmar Jenny Ingeborg ",female,20,1,0,236853,26,,S 278 | 2,"Parker, Mr. Clifford Richard",male,28,0,0,SC 14888,10.5,,S 279 | 2,"Faunthorpe, Mr. Harry",male,40,1,0,2926,26,,S 280 | 2,"Ware, Mr. John James",male,30,1,0,CA 31352,21,,S 281 | 2,"Oxenham, Mr. Percy Thomas",male,22,0,0,W./C. 14260,10.5,,S 282 | 3,"Oreskovic, Miss. Jelka",female,23,0,0,315085,8.6625,,S 283 | 3,"Peacock, Master. Alfred Edward",male,0.75,1,1,SOTON/O.Q. 3101315,13.775,,S 284 | 3,"Fleming, Miss. Honora",female,,0,0,364859,7.75,,Q 285 | 3,"Touma, Miss. Maria Youssef",female,9,1,1,2650,15.2458,,C 286 | 3,"Rosblom, Miss. Salli Helena",female,2,1,1,370129,20.2125,,S 287 | 3,"Dennis, Mr. William",male,36,0,0,A/5 21175,7.25,,S 288 | 3,"Franklin, Mr. Charles (Charles Fardon)",male,,0,0,SOTON/O.Q. 3101314,7.25,,S 289 | 1,"Snyder, Mr. John Pillsbury",male,24,1,0,21228,82.2667,B45,S 290 | 3,"Mardirosian, Mr. Sarkis",male,,0,0,2655,7.2292,F E46,C 291 | 3,"Ford, Mr. Arthur",male,,0,0,A/5 1478,8.05,,S 292 | 1,"Rheims, Mr. George Alexander Lucien",male,,0,0,PC 17607,39.6,,S 293 | 3,"Daly, Miss. Margaret Marcella Maggie""""",female,30,0,0,382650,6.95,,Q 294 | 3,"Nasr, Mr. Mustafa",male,,0,0,2652,7.2292,,C 295 | 1,"Dodge, Dr. Washington",male,53,1,1,33638,81.8583,A34,S 296 | 3,"Wittevrongel, Mr. Camille",male,36,0,0,345771,9.5,,S 297 | 3,"Angheloff, Mr. Minko",male,26,0,0,349202,7.8958,,S 298 | 2,"Laroche, Miss. Louise",female,1,1,2,SC/Paris 2123,41.5792,,C 299 | 3,"Samaan, Mr. Hanna",male,,2,0,2662,21.6792,,C 300 | 1,"Loring, Mr. Joseph Holland",male,30,0,0,113801,45.5,,S 301 | 3,"Johansson, Mr. Nils",male,29,0,0,347467,7.8542,,S 302 | 3,"Olsson, Mr. Oscar Wilhelm",male,32,0,0,347079,7.775,,S 303 | 2,"Malachard, Mr. Noel",male,,0,0,237735,15.0458,D,C 304 | 2,"Phillips, Mr. Escott Robert",male,43,0,1,S.O./P.P. 2,21,,S 305 | 3,"Pokrnic, Mr. Tome",male,24,0,0,315092,8.6625,,S 306 | 3,"McCarthy, Miss. Catherine Katie""""",female,,0,0,383123,7.75,,Q 307 | 1,"Crosby, Mrs. Edward Gifford (Catherine Elizabeth Halstead)",female,64,1,1,112901,26.55,B26,S 308 | 1,"Allison, Mr. Hudson Joshua Creighton",male,30,1,2,113781,151.55,C22 C26,S 309 | 3,"Aks, Master. Philip Frank",male,0.83,0,1,392091,9.35,,S 310 | 1,"Hays, Mr. Charles Melville",male,55,1,1,12749,93.5,B69,S 311 | 3,"Hansen, Mrs. Claus Peter (Jennie L Howard)",female,45,1,0,350026,14.1083,,S 312 | 3,"Cacic, Mr. Jego Grga",male,18,0,0,315091,8.6625,,S 313 | 3,"Vartanian, Mr. David",male,22,0,0,2658,7.225,,C 314 | 3,"Sadowitz, Mr. Harry",male,,0,0,LP 1588,7.575,,S 315 | 3,"Carr, Miss. Jeannie",female,37,0,0,368364,7.75,,Q 316 | 1,"White, Mrs. John Stuart (Ella Holmes)",female,55,0,0,PC 17760,135.6333,C32,C 317 | 3,"Hagardon, Miss. Kate",female,17,0,0,AQ/3. 30631,7.7333,,Q 318 | 1,"Spencer, Mr. William Augustus",male,57,1,0,PC 17569,146.5208,B78,C 319 | 2,"Rogers, Mr. Reginald Harry",male,19,0,0,28004,10.5,,S 320 | 3,"Jonsson, Mr. Nils Hilding",male,27,0,0,350408,7.8542,,S 321 | 2,"Jefferys, Mr. Ernest Wilfred",male,22,2,0,C.A. 31029,31.5,,S 322 | 3,"Andersson, Mr. Johan Samuel",male,26,0,0,347075,7.775,,S 323 | 3,"Krekorian, Mr. Neshan",male,25,0,0,2654,7.2292,F E57,C 324 | 2,"Nesson, Mr. Israel",male,26,0,0,244368,13,F2,S 325 | 1,"Rowe, Mr. Alfred G",male,33,0,0,113790,26.55,,S 326 | 1,"Kreuchen, Miss. Emilie",female,39,0,0,24160,211.3375,,S 327 | 3,"Assam, Mr. Ali",male,23,0,0,SOTON/O.Q. 3101309,7.05,,S 328 | 2,"Becker, Miss. Ruth Elizabeth",female,12,2,1,230136,39,F4,S 329 | 1,"Rosenshine, Mr. George (Mr George Thorne"")""",male,46,0,0,PC 17585,79.2,,C 330 | 2,"Clarke, Mr. Charles Valentine",male,29,1,0,2003,26,,S 331 | 2,"Enander, Mr. Ingvar",male,21,0,0,236854,13,,S 332 | 2,"Davies, Mrs. John Morgan (Elizabeth Agnes Mary White) ",female,48,0,2,C.A. 33112,36.75,,S 333 | 1,"Dulles, Mr. William Crothers",male,39,0,0,PC 17580,29.7,A18,C 334 | 3,"Thomas, Mr. Tannous",male,,0,0,2684,7.225,,C 335 | 3,"Nakid, Mrs. Said (Waika Mary"" Mowad)""",female,19,1,1,2653,15.7417,,C 336 | 3,"Cor, Mr. Ivan",male,27,0,0,349229,7.8958,,S 337 | 1,"Maguire, Mr. John Edward",male,30,0,0,110469,26,C106,S 338 | 2,"de Brito, Mr. Jose Joaquim",male,32,0,0,244360,13,,S 339 | 3,"Elias, Mr. Joseph",male,39,0,2,2675,7.2292,,C 340 | 2,"Denbury, Mr. Herbert",male,25,0,0,C.A. 31029,31.5,,S 341 | 3,"Betros, Master. Seman",male,,0,0,2622,7.2292,,C 342 | 2,"Fillbrook, Mr. Joseph Charles",male,18,0,0,C.A. 15185,10.5,,S 343 | 3,"Lundstrom, Mr. Thure Edvin",male,32,0,0,350403,7.5792,,S 344 | 3,"Sage, Mr. John George",male,,1,9,CA. 2343,69.55,,S 345 | 1,"Cardeza, Mrs. James Warburton Martinez (Charlotte Wardle Drake)",female,58,0,1,PC 17755,512.3292,B51 B53 B55,C 346 | 3,"van Billiard, Master. James William",male,,1,1,A/5. 851,14.5,,S 347 | 3,"Abelseth, Miss. Karen Marie",female,16,0,0,348125,7.65,,S 348 | 2,"Botsford, Mr. William Hull",male,26,0,0,237670,13,,S 349 | 3,"Whabee, Mrs. George Joseph (Shawneene Abi-Saab)",female,38,0,0,2688,7.2292,,C 350 | 2,"Giles, Mr. Ralph",male,24,0,0,248726,13.5,,S 351 | 2,"Walcroft, Miss. Nellie",female,31,0,0,F.C.C. 13528,21,,S 352 | 1,"Greenfield, Mrs. Leo David (Blanche Strouse)",female,45,0,1,PC 17759,63.3583,D10 D12,C 353 | 2,"Stokes, Mr. Philip Joseph",male,25,0,0,F.C.C. 13540,10.5,,S 354 | 2,"Dibden, Mr. William",male,18,0,0,S.O.C. 14879,73.5,,S 355 | 2,"Herman, Mr. Samuel",male,49,1,2,220845,65,,S 356 | 3,"Dean, Miss. Elizabeth Gladys Millvina""""",female,0.17,1,2,C.A. 2315,20.575,,S 357 | 1,"Julian, Mr. Henry Forbes",male,50,0,0,113044,26,E60,S 358 | 1,"Brown, Mrs. John Murray (Caroline Lane Lamson)",female,59,2,0,11769,51.4792,C101,S 359 | 3,"Lockyer, Mr. Edward",male,,0,0,1222,7.8792,,S 360 | 3,"O'Keefe, Mr. Patrick",male,,0,0,368402,7.75,,Q 361 | 3,"Lindell, Mrs. Edvard Bengtsson (Elin Gerda Persson)",female,30,1,0,349910,15.55,,S 362 | 3,"Sage, Master. William Henry",male,14.5,8,2,CA. 2343,69.55,,S 363 | 2,"Mallet, Mrs. Albert (Antoinette Magnin)",female,24,1,1,S.C./PARIS 2079,37.0042,,C 364 | 2,"Ware, Mrs. John James (Florence Louise Long)",female,31,0,0,CA 31352,21,,S 365 | 3,"Strilic, Mr. Ivan",male,27,0,0,315083,8.6625,,S 366 | 1,"Harder, Mrs. George Achilles (Dorothy Annan)",female,25,1,0,11765,55.4417,E50,C 367 | 3,"Sage, Mrs. John (Annie Bullen)",female,,1,9,CA. 2343,69.55,,S 368 | 3,"Caram, Mr. Joseph",male,,1,0,2689,14.4583,,C 369 | 3,"Riihivouri, Miss. Susanna Juhantytar Sanni""""",female,22,0,0,3101295,39.6875,,S 370 | 1,"Gibson, Mrs. Leonard (Pauline C Boeson)",female,45,0,1,112378,59.4,,C 371 | 2,"Pallas y Castello, Mr. Emilio",male,29,0,0,SC/PARIS 2147,13.8583,,C 372 | 2,"Giles, Mr. Edgar",male,21,1,0,28133,11.5,,S 373 | 1,"Wilson, Miss. Helen Alice",female,31,0,0,16966,134.5,E39 E41,C 374 | 1,"Ismay, Mr. Joseph Bruce",male,49,0,0,112058,0,B52 B54 B56,S 375 | 2,"Harbeck, Mr. William H",male,44,0,0,248746,13,,S 376 | 1,"Dodge, Mrs. Washington (Ruth Vidaver)",female,54,1,1,33638,81.8583,A34,S 377 | 1,"Bowen, Miss. Grace Scott",female,45,0,0,PC 17608,262.375,,C 378 | 3,"Kink, Miss. Maria",female,22,2,0,315152,8.6625,,S 379 | 2,"Cotterill, Mr. Henry Harry""""",male,21,0,0,29107,11.5,,S 380 | 1,"Hipkins, Mr. William Edward",male,55,0,0,680,50,C39,S 381 | 3,"Asplund, Master. Carl Edgar",male,5,4,2,347077,31.3875,,S 382 | 3,"O'Connor, Mr. Patrick",male,,0,0,366713,7.75,,Q 383 | 3,"Foley, Mr. Joseph",male,26,0,0,330910,7.8792,,Q 384 | 3,"Risien, Mrs. Samuel (Emma)",female,,0,0,364498,14.5,,S 385 | 3,"McNamee, Mrs. Neal (Eileen O'Leary)",female,19,1,0,376566,16.1,,S 386 | 2,"Wheeler, Mr. Edwin Frederick""""",male,,0,0,SC/PARIS 2159,12.875,,S 387 | 2,"Herman, Miss. Kate",female,24,1,2,220845,65,,S 388 | 3,"Aronsson, Mr. Ernst Axel Algot",male,24,0,0,349911,7.775,,S 389 | 2,"Ashby, Mr. John",male,57,0,0,244346,13,,S 390 | 3,"Canavan, Mr. Patrick",male,21,0,0,364858,7.75,,Q 391 | 3,"Palsson, Master. Paul Folke",male,6,3,1,349909,21.075,,S 392 | 1,"Payne, Mr. Vivian Ponsonby",male,23,0,0,12749,93.5,B24,S 393 | 1,"Lines, Mrs. Ernest H (Elizabeth Lindsey James)",female,51,0,1,PC 17592,39.4,D28,S 394 | 3,"Abbott, Master. Eugene Joseph",male,13,0,2,C.A. 2673,20.25,,S 395 | 2,"Gilbert, Mr. William",male,47,0,0,C.A. 30769,10.5,,S 396 | 3,"Kink-Heilmann, Mr. Anton",male,29,3,1,315153,22.025,,S 397 | 1,"Smith, Mrs. Lucien Philip (Mary Eloise Hughes)",female,18,1,0,13695,60,C31,S 398 | 3,"Colbert, Mr. Patrick",male,24,0,0,371109,7.25,,Q 399 | 1,"Frolicher-Stehli, Mrs. Maxmillian (Margaretha Emerentia Stehli)",female,48,1,1,13567,79.2,B41,C 400 | 3,"Larsson-Rondberg, Mr. Edvard A",male,22,0,0,347065,7.775,,S 401 | 3,"Conlon, Mr. Thomas Henry",male,31,0,0,21332,7.7333,,Q 402 | 1,"Bonnell, Miss. Caroline",female,30,0,0,36928,164.8667,C7,S 403 | 2,"Gale, Mr. Harry",male,38,1,0,28664,21,,S 404 | 1,"Gibson, Miss. Dorothy Winifred",female,22,0,1,112378,59.4,,C 405 | 1,"Carrau, Mr. Jose Pedro",male,17,0,0,113059,47.1,,S 406 | 1,"Frauenthal, Mr. Isaac Gerald",male,43,1,0,17765,27.7208,D40,C 407 | 2,"Nourney, Mr. Alfred (Baron von Drachstedt"")""",male,20,0,0,SC/PARIS 2166,13.8625,D38,C 408 | 2,"Ware, Mr. William Jeffery",male,23,1,0,28666,10.5,,S 409 | 1,"Widener, Mr. George Dunton",male,50,1,1,113503,211.5,C80,C 410 | 3,"Riordan, Miss. Johanna Hannah""""",female,,0,0,334915,7.7208,,Q 411 | 3,"Peacock, Miss. Treasteall",female,3,1,1,SOTON/O.Q. 3101315,13.775,,S 412 | 3,"Naughton, Miss. Hannah",female,,0,0,365237,7.75,,Q 413 | 1,"Minahan, Mrs. William Edward (Lillian E Thorpe)",female,37,1,0,19928,90,C78,Q 414 | 3,"Henriksson, Miss. Jenny Lovisa",female,28,0,0,347086,7.775,,S 415 | 3,"Spector, Mr. Woolf",male,,0,0,A.5. 3236,8.05,,S 416 | 1,"Oliva y Ocana, Dona. Fermina",female,39,0,0,PC 17758,108.9,C105,C 417 | 3,"Saether, Mr. Simon Sivertsen",male,38.5,0,0,SOTON/O.Q. 3101262,7.25,,S 418 | 3,"Ware, Mr. Frederick",male,,0,0,359309,8.05,,S 419 | 3,"Peter, Master. Michael J",male,,1,1,2668,22.3583,,C 420 | -------------------------------------------------------------------------------- /titanic/data/test2.csv: -------------------------------------------------------------------------------- 1 | 3,0,34.5,7.8292 2 | 3,1,47,7 3 | 2,0,62,9.6875 4 | 3,0,27,8.6625 5 | 3,1,22,12.2875 6 | 3,0,14,9.225 7 | 3,1,30,7.6292 8 | 2,0,26,29 9 | 3,1,18,7.2292 10 | 3,0,21,24.15 11 | 3,0,-1,7.8958 12 | 1,0,46,26 13 | 1,1,23,82.2667 14 | 2,0,63,26 15 | 1,1,47,61.175 16 | 2,1,24,27.7208 17 | 2,0,35,12.35 18 | 3,0,21,7.225 19 | 3,1,27,7.925 20 | 3,1,45,7.225 21 | 1,0,55,59.4 22 | 3,0,9,3.1708 23 | 1,1,-1,31.6833 24 | 1,0,21,61.3792 25 | 1,1,48,262.375 26 | 3,0,50,14.5 27 | 1,1,22,61.9792 28 | 3,0,22.5,7.225 29 | 1,0,41,30.5 30 | 3,0,-1,21.6792 31 | 2,0,50,26 32 | 2,0,24,31.5 33 | 3,1,33,20.575 34 | 3,1,-1,23.45 35 | 1,0,30,57.75 36 | 3,0,18.5,7.2292 37 | 3,1,-1,8.05 38 | 3,1,21,8.6625 39 | 3,0,25,9.5 40 | 3,0,-1,56.4958 41 | 3,0,39,13.4167 42 | 1,0,-1,26.55 43 | 3,0,41,7.85 44 | 2,1,30,13 45 | 1,1,45,52.5542 46 | 3,0,25,7.925 47 | 1,0,45,29.7 48 | 3,0,-1,7.75 49 | 1,1,60,76.2917 50 | 3,1,36,15.9 51 | 1,0,24,60 52 | 2,0,27,15.0333 53 | 2,1,20,23 54 | 1,1,28,263 55 | 2,0,-1,15.5792 56 | 3,0,10,29.125 57 | 3,0,35,7.8958 58 | 3,0,25,7.65 59 | 3,0,-1,16.1 60 | 1,1,36,262.375 61 | 3,0,17,7.8958 62 | 2,0,32,13.5 63 | 3,0,18,7.75 64 | 3,1,22,7.725 65 | 1,0,13,262.375 66 | 2,1,-1,21 67 | 3,1,18,7.8792 68 | 1,0,47,42.4 69 | 1,0,31,28.5375 70 | 1,1,60,263 71 | 3,1,24,7.75 72 | 3,0,21,7.8958 73 | 3,1,29,7.925 74 | 1,0,28.5,27.7208 75 | 1,1,35,211.5 76 | 1,0,32.5,211.5 77 | 3,0,-1,8.05 78 | 1,1,55,25.7 79 | 2,0,30,13 80 | 3,1,24,7.75 81 | 3,0,6,15.2458 82 | 1,0,67,221.7792 83 | 1,0,49,26 84 | 3,0,-1,7.8958 85 | 2,0,-1,10.7083 86 | 3,0,-1,14.4542 87 | 3,1,27,7.8792 88 | 3,1,18,8.05 89 | 3,1,-1,7.75 90 | 2,0,2,23 91 | 3,1,22,13.9 92 | 3,0,-1,7.775 93 | 1,1,27,52 94 | 3,0,-1,8.05 95 | 1,0,25,26 96 | 3,0,25,7.7958 97 | 1,1,76,78.85 98 | 3,0,29,7.925 99 | 3,1,20,7.8542 100 | 3,0,33,8.05 101 | 1,1,43,55.4417 102 | 2,0,27,26 103 | 3,0,-1,7.75 104 | 3,0,26,7.775 105 | 3,1,16,8.5167 106 | 3,0,28,22.525 107 | 3,0,21,7.8208 108 | 3,0,-1,7.75 109 | 3,0,-1,8.7125 110 | 2,0,18.5,13 111 | 2,0,41,15.0458 112 | 3,1,-1,7.7792 113 | 1,1,36,31.6792 114 | 3,1,18.5,7.2833 115 | 1,1,63,221.7792 116 | 3,0,18,14.4542 117 | 3,0,-1,6.4375 118 | 3,1,1,16.7 119 | 1,0,36,75.2417 120 | 2,1,29,26 121 | 2,1,12,15.75 122 | 3,0,-1,7.75 123 | 1,1,35,57.75 124 | 3,0,28,7.25 125 | 3,0,-1,7.75 126 | 3,1,17,16.1 127 | 3,0,22,7.7958 128 | 3,1,-1,23.25 129 | 2,0,42,13 130 | 3,0,24,8.05 131 | 3,0,32,8.05 132 | 1,0,53,28.5 133 | 3,1,-1,25.4667 134 | 3,0,-1,6.4375 135 | 3,0,43,7.8958 136 | 3,0,24,7.8542 137 | 3,0,26.5,7.225 138 | 2,0,26,13 139 | 3,1,23,8.05 140 | 3,0,40,46.9 141 | 3,1,10,46.9 142 | 1,1,33,151.55 143 | 1,0,61,262.375 144 | 2,0,28,26 145 | 1,0,42,26.55 146 | 3,0,31,18 147 | 1,0,-1,51.8625 148 | 3,0,22,8.05 149 | 1,0,-1,26.55 150 | 2,0,30,26 151 | 1,1,23,83.1583 152 | 3,0,-1,7.8958 153 | 3,0,60.5,35 154 | 3,1,36,12.1833 155 | 3,0,13,31.3875 156 | 3,0,24,7.55 157 | 1,1,29,221.7792 158 | 3,1,23,7.8542 159 | 1,0,42,26.55 160 | 3,1,26,13.775 161 | 3,1,-1,7.7333 162 | 3,0,7,15.2458 163 | 2,1,26,13.5 164 | 3,0,-1,7 165 | 2,0,41,13 166 | 3,1,26,22.025 167 | 1,0,48,50.4958 168 | 3,0,18,34.375 169 | 1,1,-1,27.7208 170 | 3,1,22,8.9625 171 | 3,0,-1,7.55 172 | 3,0,27,7.225 173 | 3,0,23,13.9 174 | 3,0,-1,7.2292 175 | 3,0,40,31.3875 176 | 2,1,15,39 177 | 2,1,20,36.75 178 | 1,0,54,55.4417 179 | 2,1,36,39 180 | 1,1,64,83.1583 181 | 2,0,30,13 182 | 1,0,37,83.1583 183 | 1,1,18,53.1 184 | 3,0,-1,7.75 185 | 1,1,27,247.5208 186 | 2,0,40,16 187 | 2,1,21,21 188 | 3,0,17,8.05 189 | 3,1,-1,69.55 190 | 2,0,40,13 191 | 2,0,34,26 192 | 1,0,-1,26 193 | 3,0,11.5,14.5 194 | 2,0,61,12.35 195 | 2,0,8,32.5 196 | 3,0,33,7.8542 197 | 1,0,6,134.5 198 | 3,1,18,7.775 199 | 2,0,23,10.5 200 | 3,1,-1,8.1125 201 | 3,1,-1,15.5 202 | 3,0,0.33,14.4 203 | 1,0,47,227.525 204 | 2,1,8,26 205 | 2,0,25,10.5 206 | 1,0,-1,25.7417 207 | 3,1,35,7.75 208 | 2,0,24,10.5 209 | 1,1,33,27.7208 210 | 3,0,25,7.8958 211 | 3,0,32,22.525 212 | 3,0,-1,7.05 213 | 2,0,17,73.5 214 | 2,1,60,26 215 | 3,1,38,7.775 216 | 1,0,42,42.5 217 | 3,1,-1,7.8792 218 | 1,0,57,164.8667 219 | 1,1,50,211.5 220 | 3,0,-1,8.05 221 | 2,1,30,13.8583 222 | 3,0,21,8.05 223 | 2,1,22,10.5 224 | 3,0,21,7.7958 225 | 1,1,53,27.4458 226 | 3,1,-1,15.2458 227 | 3,0,23,7.7958 228 | 3,1,-1,7.75 229 | 3,0,40.5,15.1 230 | 2,0,36,13 231 | 2,0,14,65 232 | 1,1,21,26.55 233 | 3,0,21,6.4958 234 | 3,0,-1,7.8792 235 | 1,0,39,71.2833 236 | 3,0,20,7.8542 237 | 1,0,64,75.25 238 | 3,0,20,7.225 239 | 2,1,18,13 240 | 1,1,48,106.425 241 | 1,1,55,27.7208 242 | 2,1,45,30 243 | 1,0,45,134.5 244 | 3,0,-1,7.8875 245 | 3,0,-1,23.45 246 | 1,0,41,51.8625 247 | 2,1,22,21 248 | 2,0,42,32.5 249 | 2,1,29,26 250 | 3,1,-1,14.4542 251 | 2,1,0.92,27.75 252 | 3,0,20,7.925 253 | 1,0,27,136.7792 254 | 3,0,24,9.325 255 | 3,0,32.5,9.5 256 | 3,0,-1,7.55 257 | 3,0,-1,7.75 258 | 3,0,28,8.05 259 | 2,1,19,13 260 | 3,0,21,7.775 261 | 3,0,36.5,17.4 262 | 3,0,21,7.8542 263 | 2,1,29,23 264 | 3,1,1,12.1833 265 | 2,0,30,12.7375 266 | 3,0,-1,7.8958 267 | 1,0,-1,0 268 | 3,0,-1,7.55 269 | 3,1,-1,8.05 270 | 3,0,17,8.6625 271 | 1,0,46,75.2417 272 | 3,0,-1,7.75 273 | 1,1,26,136.7792 274 | 3,1,-1,15.5 275 | 3,0,-1,7.225 276 | 2,1,20,26 277 | 2,0,28,10.5 278 | 2,0,40,26 279 | 2,0,30,21 280 | 2,0,22,10.5 281 | 3,1,23,8.6625 282 | 3,0,0.75,13.775 283 | 3,1,-1,7.75 284 | 3,1,9,15.2458 285 | 3,1,2,20.2125 286 | 3,0,36,7.25 287 | 3,0,-1,7.25 288 | 1,0,24,82.2667 289 | 3,0,-1,7.2292 290 | 3,0,-1,8.05 291 | 1,0,-1,39.6 292 | 3,1,30,6.95 293 | 3,0,-1,7.2292 294 | 1,0,53,81.8583 295 | 3,0,36,9.5 296 | 3,0,26,7.8958 297 | 2,1,1,41.5792 298 | 3,0,-1,21.6792 299 | 1,0,30,45.5 300 | 3,0,29,7.8542 301 | 3,0,32,7.775 302 | 2,0,-1,15.0458 303 | 2,0,43,21 304 | 3,0,24,8.6625 305 | 3,1,-1,7.75 306 | 1,1,64,26.55 307 | 1,0,30,151.55 308 | 3,0,0.83,9.35 309 | 1,0,55,93.5 310 | 3,1,45,14.1083 311 | 3,0,18,8.6625 312 | 3,0,22,7.225 313 | 3,0,-1,7.575 314 | 3,1,37,7.75 315 | 1,1,55,135.6333 316 | 3,1,17,7.7333 317 | 1,0,57,146.5208 318 | 2,0,19,10.5 319 | 3,0,27,7.8542 320 | 2,0,22,31.5 321 | 3,0,26,7.775 322 | 3,0,25,7.2292 323 | 2,0,26,13 324 | 1,0,33,26.55 325 | 1,1,39,211.3375 326 | 3,0,23,7.05 327 | 2,1,12,39 328 | 1,0,46,79.2 329 | 2,0,29,26 330 | 2,0,21,13 331 | 2,1,48,36.75 332 | 1,0,39,29.7 333 | 3,0,-1,7.225 334 | 3,1,19,15.7417 335 | 3,0,27,7.8958 336 | 1,0,30,26 337 | 2,0,32,13 338 | 3,0,39,7.2292 339 | 2,0,25,31.5 340 | 3,0,-1,7.2292 341 | 2,0,18,10.5 342 | 3,0,32,7.5792 343 | 3,0,-1,69.55 344 | 1,1,58,512.3292 345 | 3,0,-1,14.5 346 | 3,1,16,7.65 347 | 2,0,26,13 348 | 3,1,38,7.2292 349 | 2,0,24,13.5 350 | 2,1,31,21 351 | 1,1,45,63.3583 352 | 2,0,25,10.5 353 | 2,0,18,73.5 354 | 2,0,49,65 355 | 3,1,0.17,20.575 356 | 1,0,50,26 357 | 1,1,59,51.4792 358 | 3,0,-1,7.8792 359 | 3,0,-1,7.75 360 | 3,1,30,15.55 361 | 3,0,14.5,69.55 362 | 2,1,24,37.0042 363 | 2,1,31,21 364 | 3,0,27,8.6625 365 | 1,1,25,55.4417 366 | 3,1,-1,69.55 367 | 3,0,-1,14.4583 368 | 3,1,22,39.6875 369 | 1,1,45,59.4 370 | 2,0,29,13.8583 371 | 2,0,21,11.5 372 | 1,1,31,134.5 373 | 1,0,49,0 374 | 2,0,44,13 375 | 1,1,54,81.8583 376 | 1,1,45,262.375 377 | 3,1,22,8.6625 378 | 2,0,21,11.5 379 | 1,0,55,50 380 | 3,0,5,31.3875 381 | 3,0,-1,7.75 382 | 3,0,26,7.8792 383 | 3,1,-1,14.5 384 | 3,1,19,16.1 385 | 2,0,-1,12.875 386 | 2,1,24,65 387 | 3,0,24,7.775 388 | 2,0,57,13 389 | 3,0,21,7.75 390 | 3,0,6,21.075 391 | 1,0,23,93.5 392 | 1,1,51,39.4 393 | 3,0,13,20.25 394 | 2,0,47,10.5 395 | 3,0,29,22.025 396 | 1,1,18,60 397 | 3,0,24,7.25 398 | 1,1,48,79.2 399 | 3,0,22,7.775 400 | 3,0,31,7.7333 401 | 1,1,30,164.8667 402 | 2,0,38,21 403 | 1,1,22,59.4 404 | 1,0,17,47.1 405 | 1,0,43,27.7208 406 | 2,0,20,13.8625 407 | 2,0,23,10.5 408 | 1,0,50,211.5 409 | 3,1,-1,7.7208 410 | 3,1,3,13.775 411 | 3,1,-1,7.75 412 | 1,1,37,90 413 | 3,1,28,7.775 414 | 3,0,-1,8.05 415 | 1,1,39,108.9 416 | 3,0,38.5,7.25 417 | 3,0,-1,8.05 418 | 3,0,-1,22.3583 419 | -------------------------------------------------------------------------------- /titanic/logistic_regression.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from numpy import * 3 | from scipy.optimize import fmin_bfgs 4 | 5 | class LogisticRegression: 6 | """ An implementation of logistic regression. """ 7 | def __init__ (self, x, y, lambda_=0): 8 | self.x = x 9 | self.y = atleast_2d(y).transpose() 10 | self._lambda = lambda_ 11 | 12 | def _sigmoid(self, x): 13 | res = 1 / (1 + exp(-x)) 14 | idx = res == 1 15 | res[idx] = .99 16 | return res 17 | 18 | def _compute_cost(self, theta): 19 | """ Calculate the cost function: 20 | J = -1 / m * (y' * log(sigmoid(X * theta)) + (1 .- y') * log(1 .- sigmoid(X * theta))) 21 | J += lambda / (2 * m) * theta(2 : end)' * theta(2 : end) 22 | """ 23 | m = self.x.shape[0] 24 | x_bias = hstack((ones((m, 1)), self.x)) 25 | theta = atleast_2d(theta).transpose() 26 | J = -1.0 / m * (dot(self.y.transpose(), log(self._sigmoid(dot(x_bias, theta)))) 27 | + dot(1 - self.y.transpose(), log(1 - self._sigmoid(dot(x_bias, theta))))) 28 | J += self._lambda / (2 * m) * sum(theta[1 : :] ** 2) 29 | return J[0, 0] 30 | 31 | def _compute_grad(self, theta): 32 | """ Calculate the gradient of J: 33 | grad = 1 / m * (X' * (sigmoid(X * theta) - y)) 34 | grad(2 : end) += lambda / m * theta(2 : end) 35 | """ 36 | m = self.x.shape[0] 37 | x_bias = hstack((ones((m, 1)), self.x)) 38 | theta = atleast_2d(theta).transpose() 39 | grad = 1.0 / m * (dot(x_bias.transpose(), self._sigmoid(dot(x_bias, theta)) - self.y)) 40 | grad[1 : :] += self._lambda / m * theta[1 : :] 41 | return grad.ravel() 42 | 43 | def learn(self, max_iter=300): 44 | """ Train theta from the dataset, return value is a 1-D array. 45 | """ 46 | initial_theta = [0] * (self.x.shape[1] + 1) 47 | args_ = () 48 | theta = fmin_bfgs(f=self._compute_cost, x0=initial_theta, 49 | fprime=self._compute_grad, args=args_, maxiter=max_iter) 50 | self._theta = atleast_2d(theta).transpose() 51 | 52 | def predict(self, x): 53 | m = x.shape[0] 54 | x_bias = hstack((ones((m, 1)), x)) 55 | p = zeros((m, 1)) 56 | prob = self._sigmoid(dot(x_bias, self._theta)) 57 | idx = prob >= 0.5 58 | p[idx] = 1 59 | return p.ravel() 60 | 61 | if __name__ == '__main__': 62 | pass 63 | -------------------------------------------------------------------------------- /titanic/readme: -------------------------------------------------------------------------------- 1 | Kaggle - Titanic 2 | Problem link: https://www.kaggle.com/c/titanic-gettingStarted 3 | This is a logistic regression solution for Kaggle Titanic prediction, achieving accuracy about 83%. The code contains a implementation of logistic regression with L2 regularization. 4 | The data preprocessing part is left out, we construct the training and testing data with only four features: pclass, sex, age and fare. -------------------------------------------------------------------------------- /titanic/titanic.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import csv 3 | from numpy import * 4 | from logistic_regression import LogisticRegression 5 | 6 | def map_feature(x): 7 | """ Add polynomial features to x in order to reduce high bias. 8 | """ 9 | m, n = x.shape 10 | out = x 11 | 12 | # Add quodratic features. 13 | for i in range(n): 14 | for j in range(i, n): 15 | out = hstack((out, x[:, i].reshape(m, 1) * x[:, j].reshape(m, 1))) 16 | 17 | # Add cubic features. 18 | for i in range(n): 19 | for j in range(i, n): 20 | for k in range(j, n): 21 | out = hstack( 22 | (out, x[:, i].reshape(m, 1) * x[:, j].reshape(m, 1) * x[:, k].reshape(m, 1))) 23 | return out 24 | 25 | def scale_data(x): 26 | """ Scale data with zero mean and unit variance. 27 | """ 28 | mu = x.mean(axis=0) 29 | sigma = x.std(axis=0) 30 | x = (x - mu) / sigma 31 | return (x, mu, sigma) 32 | 33 | def read_data(): 34 | # Data in the file has been preprocessed by eliminating rows with missing values. 35 | csv_file_object = csv.reader(open('./data/data.csv', 'rb')) 36 | header = csv_file_object.next() 37 | x = [] 38 | for row in csv_file_object: 39 | x.append(row) 40 | return array(x, dtype=float64) 41 | 42 | if __name__ == '__main__': 43 | x = read_data() 44 | 45 | # Generates training set and cross validation set. 46 | y = x[:, 0] 47 | x = x[:, 1 : :] 48 | x = map_feature(x) 49 | num = int(x.shape[0] * .7) 50 | x_cv = x[num : :, :] 51 | y_cv = y[num : :] 52 | x = x[0 : num, :] 53 | y = y[0 : num] 54 | 55 | # Feature scaling. 56 | x, mu, sigma = scale_data(x) 57 | x_cv = (x_cv - mu) / sigma 58 | 59 | # Use cross validation set to find the best lambda for regularization. 60 | C_candidates = [0, 0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30] 61 | lambda_ = 0 62 | best_accuracy = 0 63 | for C in C_candidates: 64 | clf = LogisticRegression(x, y, C) 65 | clf.learn() 66 | p_cv = clf.predict(x_cv) 67 | accuracy = (p_cv == y_cv).mean() 68 | if accuracy > best_accuracy: 69 | best_accuracy = accuracy 70 | lambda_ = C 71 | print 'Best regularization parameter lambda: %f' % lambda_ 72 | 73 | clf = LogisticRegression(x, y, lambda_) 74 | clf.learn() 75 | p = clf.predict(x) 76 | p_cv = clf.predict(x_cv) 77 | print 'Accuracy in training set: %f'% (p == y).mean() 78 | print 'Accuracy in cv: %f' % (p_cv == y_cv).mean() 79 | --------------------------------------------------------------------------------