├── .gitignore ├── .ipynb_checkpoints └── speedmodel-checkpoint.ipynb ├── README.md ├── speedmodel.ipynb ├── kmean_vectors.ipynb ├── titanic ├── gender_submission.csv ├── test.csv └── train.csv └── gen_features.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | .ipynb_checkpoints -------------------------------------------------------------------------------- /.ipynb_checkpoints/speedmodel-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [], 3 | "metadata": {}, 4 | "nbformat": 4, 5 | "nbformat_minor": 2 6 | } 7 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # data_analysis 2 | #### get_feature_importance 机器学习数据预处理:包括浮点型数据转化降低内存、画数据分布图、多种特征筛选、多种调参技巧 3 | #### gen_features 各种数据衍生方式 4 | #### 树模型部署:treelite模型加速 5 | #### 文本聚类打标签 6 | -------------------------------------------------------------------------------- /speedmodel.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "# https://treelite.readthedocs.io/en/latest/tutorials/first.html" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [] 18 | } 19 | ], 20 | "metadata": { 21 | "kernelspec": { 22 | "display_name": "Python 3", 23 | "language": "python", 24 | "name": "python3" 25 | }, 26 | "language_info": { 27 | "codemirror_mode": { 28 | "name": "ipython", 29 | "version": 3 30 | }, 31 | "file_extension": ".py", 32 | "mimetype": "text/x-python", 33 | "name": "python", 34 | "nbconvert_exporter": "python", 35 | "pygments_lexer": "ipython3", 36 | "version": "3.7.4" 37 | }, 38 | "toc": { 39 | "base_numbering": 1, 40 | "nav_menu": {}, 41 | "number_sections": true, 42 | "sideBar": true, 43 | "skip_h1_title": false, 44 | "title_cell": "Table of Contents", 45 | "title_sidebar": "Contents", 46 | "toc_cell": false, 47 | "toc_position": {}, 48 | "toc_section_display": true, 49 | "toc_window_display": false 50 | } 51 | }, 52 | "nbformat": 4, 53 | "nbformat_minor": 2 54 | } 55 | -------------------------------------------------------------------------------- /kmean_vectors.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "139c210d", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import pandas as pd\n", 11 | "from sklearn.cluster import KMeans\n", 12 | "import numpy as np\n", 13 | "import matplotlib.pyplot as plt\n", 14 | "import torch\n", 15 | "from sklearn.metrics import silhouette_score\n", 16 | "from transformers import AutoTokenizer, AutoModelForMaskedLM" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": null, 22 | "id": "bfbcf944", 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "def cosine_similarity(vector1, vector2):\n", 27 | " dot_product = np.dot(vector1, vector2)\n", 28 | " norm_vector1 = np.linalg.norm(vector1)\n", 29 | " norm_vector2 = np.linalg.norm(vector2)\n", 30 | " cosine_similarity = dot_product / (norm_vector1 * norm_vector2)\n", 31 | " return cosine_similarity\n", 32 | "\n", 33 | "\n", 34 | "def get_emb(text):\n", 35 | " inputs = tokenizer(text, return_tensors=\"pt\")\n", 36 | " outputs = model(**inputs, output_hidden_states=True)\n", 37 | " text_embedding = outputs.hidden_states[-1][:, 0, :]\n", 38 | " vector = torch.Tensor(text_embedding).tolist()[0]\n", 39 | " return vector" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": null, 45 | "id": "f4466060", 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": [ 49 | "# 文本向量化\n", 50 | "model = AutoModelForMaskedLM.from_pretrained(r'./Erlangshen-SimCSE-110M-Chinese')\n", 51 | "tokenizer = AutoTokenizer.from_pretrained(r'./Erlangshen-SimCSE-110M-Chinese')\n", 52 | "data = pd.read_csv(r\"./kmean_test.csv\", encoding='gbk')\n", 53 | "data['emb'] = data['text'].apply(lambda x: get_emb(x))\n", 54 | "vector_list = data['emb'].values.tolist()" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": null, 60 | "id": "7faf0b3e", 61 | "metadata": {}, 62 | "outputs": [], 63 | "source": [ 64 | "# 寻找最佳聚类k值的两种方法\n", 65 | "# 手肘法 利用SSE选择k,取拐点时的k\n", 66 | "SSE = [] # 存放每次结果的误差平方和\n", 67 | "for k in range(2, 9):\n", 68 | " estimator = KMeans(n_clusters=k) # 构造聚类器\n", 69 | " estimator.fit(vector_list)\n", 70 | " SSE.append(estimator.inertia_)\n", 71 | "X = range(2, 9)\n", 72 | "plt.xlabel('k')\n", 73 | "plt.ylabel('SSE')\n", 74 | "plt.plot(X, SSE, 'o-')\n", 75 | "plt.show()\n", 76 | "\n", 77 | "# 轮廓系数法 取系数最大时的k\n", 78 | "Scores = [] # 存放轮廓系数\n", 79 | "for k in range(2, 9):\n", 80 | " estimator = KMeans(n_clusters=k) # 构造聚类器\n", 81 | " estimator.fit(vector_list)\n", 82 | " Scores.append(silhouette_score(vector_list, estimator.labels_, metric='euclidean'))\n", 83 | "X = range(2, 9)\n", 84 | "plt.xlabel('k')\n", 85 | "plt.ylabel('轮廓系数')\n", 86 | "plt.plot(X, Scores, 'o-')\n", 87 | "plt.show()" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": null, 93 | "id": "cbe7596a", 94 | "metadata": {}, 95 | "outputs": [], 96 | "source": [ 97 | "# 确定k后开始聚类\n", 98 | "clf = KMeans(n_clusters=6)\n", 99 | "clf.fit(vector_list)" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": null, 105 | "id": "6dcafd2a", 106 | "metadata": {}, 107 | "outputs": [], 108 | "source": [ 109 | "# 获取所有训练样本的簇标签[0,2,1,0,2,2,1,..]\n", 110 | "labels = clf.labels_\n", 111 | "data['label'] = labels\n", 112 | "\n", 113 | "# 也可以推理新的样本\n", 114 | "# clf.predict()\n", 115 | "\n", 116 | "# 获取中心点,并构建簇中心点与簇标签的映射\n", 117 | "centers = clf.cluster_centers_\n", 118 | "center_map = dict(zip([i for i in range(len(centers))], centers))\n", 119 | "data['center'] = data['label'].apply(lambda x: center_map.get(x))\n", 120 | "\n", 121 | "# 计算样本到簇心的距离\n", 122 | "data['cosine'] = data.apply(lambda x: cosine_similarity(x['center'], x['emb']), axis=1)\n", 123 | "data.to_csv(r\"./kmean_test_res.csv\", index=False)\n", 124 | "\n", 125 | "# todo 挑选每个簇最靠近簇心的样本,人为观察打标或丢给大模型打标" 126 | ] 127 | } 128 | ], 129 | "metadata": { 130 | "kernelspec": { 131 | "display_name": "Python 3", 132 | "language": "python", 133 | "name": "python3" 134 | }, 135 | "language_info": { 136 | "codemirror_mode": { 137 | "name": "ipython", 138 | "version": 3 139 | }, 140 | "file_extension": ".py", 141 | "mimetype": "text/x-python", 142 | "name": "python", 143 | "nbconvert_exporter": "python", 144 | "pygments_lexer": "ipython3", 145 | "version": "3.8.5" 146 | } 147 | }, 148 | "nbformat": 4, 149 | "nbformat_minor": 5 150 | } 151 | -------------------------------------------------------------------------------- /titanic/gender_submission.csv: -------------------------------------------------------------------------------- 1 | PassengerId,Survived 2 | 892,0 3 | 893,1 4 | 894,0 5 | 895,0 6 | 896,1 7 | 897,0 8 | 898,1 9 | 899,0 10 | 900,1 11 | 901,0 12 | 902,0 13 | 903,0 14 | 904,1 15 | 905,0 16 | 906,1 17 | 907,1 18 | 908,0 19 | 909,0 20 | 910,1 21 | 911,1 22 | 912,0 23 | 913,0 24 | 914,1 25 | 915,0 26 | 916,1 27 | 917,0 28 | 918,1 29 | 919,0 30 | 920,0 31 | 921,0 32 | 922,0 33 | 923,0 34 | 924,1 35 | 925,1 36 | 926,0 37 | 927,0 38 | 928,1 39 | 929,1 40 | 930,0 41 | 931,0 42 | 932,0 43 | 933,0 44 | 934,0 45 | 935,1 46 | 936,1 47 | 937,0 48 | 938,0 49 | 939,0 50 | 940,1 51 | 941,1 52 | 942,0 53 | 943,0 54 | 944,1 55 | 945,1 56 | 946,0 57 | 947,0 58 | 948,0 59 | 949,0 60 | 950,0 61 | 951,1 62 | 952,0 63 | 953,0 64 | 954,0 65 | 955,1 66 | 956,0 67 | 957,1 68 | 958,1 69 | 959,0 70 | 960,0 71 | 961,1 72 | 962,1 73 | 963,0 74 | 964,1 75 | 965,0 76 | 966,1 77 | 967,0 78 | 968,0 79 | 969,1 80 | 970,0 81 | 971,1 82 | 972,0 83 | 973,0 84 | 974,0 85 | 975,0 86 | 976,0 87 | 977,0 88 | 978,1 89 | 979,1 90 | 980,1 91 | 981,0 92 | 982,1 93 | 983,0 94 | 984,1 95 | 985,0 96 | 986,0 97 | 987,0 98 | 988,1 99 | 989,0 100 | 990,1 101 | 991,0 102 | 992,1 103 | 993,0 104 | 994,0 105 | 995,0 106 | 996,1 107 | 997,0 108 | 998,0 109 | 999,0 110 | 1000,0 111 | 1001,0 112 | 1002,0 113 | 1003,1 114 | 1004,1 115 | 1005,1 116 | 1006,1 117 | 1007,0 118 | 1008,0 119 | 1009,1 120 | 1010,0 121 | 1011,1 122 | 1012,1 123 | 1013,0 124 | 1014,1 125 | 1015,0 126 | 1016,0 127 | 1017,1 128 | 1018,0 129 | 1019,1 130 | 1020,0 131 | 1021,0 132 | 1022,0 133 | 1023,0 134 | 1024,1 135 | 1025,0 136 | 1026,0 137 | 1027,0 138 | 1028,0 139 | 1029,0 140 | 1030,1 141 | 1031,0 142 | 1032,1 143 | 1033,1 144 | 1034,0 145 | 1035,0 146 | 1036,0 147 | 1037,0 148 | 1038,0 149 | 1039,0 150 | 1040,0 151 | 1041,0 152 | 1042,1 153 | 1043,0 154 | 1044,0 155 | 1045,1 156 | 1046,0 157 | 1047,0 158 | 1048,1 159 | 1049,1 160 | 1050,0 161 | 1051,1 162 | 1052,1 163 | 1053,0 164 | 1054,1 165 | 1055,0 166 | 1056,0 167 | 1057,1 168 | 1058,0 169 | 1059,0 170 | 1060,1 171 | 1061,1 172 | 1062,0 173 | 1063,0 174 | 1064,0 175 | 1065,0 176 | 1066,0 177 | 1067,1 178 | 1068,1 179 | 1069,0 180 | 1070,1 181 | 1071,1 182 | 1072,0 183 | 1073,0 184 | 1074,1 185 | 1075,0 186 | 1076,1 187 | 1077,0 188 | 1078,1 189 | 1079,0 190 | 1080,1 191 | 1081,0 192 | 1082,0 193 | 1083,0 194 | 1084,0 195 | 1085,0 196 | 1086,0 197 | 1087,0 198 | 1088,0 199 | 1089,1 200 | 1090,0 201 | 1091,1 202 | 1092,1 203 | 1093,0 204 | 1094,0 205 | 1095,1 206 | 1096,0 207 | 1097,0 208 | 1098,1 209 | 1099,0 210 | 1100,1 211 | 1101,0 212 | 1102,0 213 | 1103,0 214 | 1104,0 215 | 1105,1 216 | 1106,1 217 | 1107,0 218 | 1108,1 219 | 1109,0 220 | 1110,1 221 | 1111,0 222 | 1112,1 223 | 1113,0 224 | 1114,1 225 | 1115,0 226 | 1116,1 227 | 1117,1 228 | 1118,0 229 | 1119,1 230 | 1120,0 231 | 1121,0 232 | 1122,0 233 | 1123,1 234 | 1124,0 235 | 1125,0 236 | 1126,0 237 | 1127,0 238 | 1128,0 239 | 1129,0 240 | 1130,1 241 | 1131,1 242 | 1132,1 243 | 1133,1 244 | 1134,0 245 | 1135,0 246 | 1136,0 247 | 1137,0 248 | 1138,1 249 | 1139,0 250 | 1140,1 251 | 1141,1 252 | 1142,1 253 | 1143,0 254 | 1144,0 255 | 1145,0 256 | 1146,0 257 | 1147,0 258 | 1148,0 259 | 1149,0 260 | 1150,1 261 | 1151,0 262 | 1152,0 263 | 1153,0 264 | 1154,1 265 | 1155,1 266 | 1156,0 267 | 1157,0 268 | 1158,0 269 | 1159,0 270 | 1160,1 271 | 1161,0 272 | 1162,0 273 | 1163,0 274 | 1164,1 275 | 1165,1 276 | 1166,0 277 | 1167,1 278 | 1168,0 279 | 1169,0 280 | 1170,0 281 | 1171,0 282 | 1172,1 283 | 1173,0 284 | 1174,1 285 | 1175,1 286 | 1176,1 287 | 1177,0 288 | 1178,0 289 | 1179,0 290 | 1180,0 291 | 1181,0 292 | 1182,0 293 | 1183,1 294 | 1184,0 295 | 1185,0 296 | 1186,0 297 | 1187,0 298 | 1188,1 299 | 1189,0 300 | 1190,0 301 | 1191,0 302 | 1192,0 303 | 1193,0 304 | 1194,0 305 | 1195,0 306 | 1196,1 307 | 1197,1 308 | 1198,0 309 | 1199,0 310 | 1200,0 311 | 1201,1 312 | 1202,0 313 | 1203,0 314 | 1204,0 315 | 1205,1 316 | 1206,1 317 | 1207,1 318 | 1208,0 319 | 1209,0 320 | 1210,0 321 | 1211,0 322 | 1212,0 323 | 1213,0 324 | 1214,0 325 | 1215,0 326 | 1216,1 327 | 1217,0 328 | 1218,1 329 | 1219,0 330 | 1220,0 331 | 1221,0 332 | 1222,1 333 | 1223,0 334 | 1224,0 335 | 1225,1 336 | 1226,0 337 | 1227,0 338 | 1228,0 339 | 1229,0 340 | 1230,0 341 | 1231,0 342 | 1232,0 343 | 1233,0 344 | 1234,0 345 | 1235,1 346 | 1236,0 347 | 1237,1 348 | 1238,0 349 | 1239,1 350 | 1240,0 351 | 1241,1 352 | 1242,1 353 | 1243,0 354 | 1244,0 355 | 1245,0 356 | 1246,1 357 | 1247,0 358 | 1248,1 359 | 1249,0 360 | 1250,0 361 | 1251,1 362 | 1252,0 363 | 1253,1 364 | 1254,1 365 | 1255,0 366 | 1256,1 367 | 1257,1 368 | 1258,0 369 | 1259,1 370 | 1260,1 371 | 1261,0 372 | 1262,0 373 | 1263,1 374 | 1264,0 375 | 1265,0 376 | 1266,1 377 | 1267,1 378 | 1268,1 379 | 1269,0 380 | 1270,0 381 | 1271,0 382 | 1272,0 383 | 1273,0 384 | 1274,1 385 | 1275,1 386 | 1276,0 387 | 1277,1 388 | 1278,0 389 | 1279,0 390 | 1280,0 391 | 1281,0 392 | 1282,0 393 | 1283,1 394 | 1284,0 395 | 1285,0 396 | 1286,0 397 | 1287,1 398 | 1288,0 399 | 1289,1 400 | 1290,0 401 | 1291,0 402 | 1292,1 403 | 1293,0 404 | 1294,1 405 | 1295,0 406 | 1296,0 407 | 1297,0 408 | 1298,0 409 | 1299,0 410 | 1300,1 411 | 1301,1 412 | 1302,1 413 | 1303,1 414 | 1304,1 415 | 1305,0 416 | 1306,1 417 | 1307,0 418 | 1308,0 419 | 1309,0 420 | -------------------------------------------------------------------------------- /titanic/test.csv: -------------------------------------------------------------------------------- 1 | PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Embarked 2 | 892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,Q 3 | 893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47,1,0,363272,7,S 4 | 894,2,"Myles, Mr. Thomas Francis",male,62,0,0,240276,9.6875,Q 5 | 895,3,"Wirz, Mr. Albert",male,27,0,0,315154,8.6625,S 6 | 896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22,1,1,3101298,12.2875,S 7 | 897,3,"Svensson, Mr. Johan Cervin",male,14,0,0,7538,9.225,S 8 | 898,3,"Connolly, Miss. Kate",female,30,0,0,330972,7.6292,Q 9 | 899,2,"Caldwell, Mr. Albert Francis",male,26,1,1,248738,29,S 10 | 900,3,"Abrahim, Mrs. Joseph (Sophie Halaut Easu)",female,18,0,0,2657,7.2292,C 11 | 901,3,"Davies, Mr. John Samuel",male,21,2,0,A/4 48871,24.15,S 12 | 902,3,"Ilieff, Mr. Ylio",male,,0,0,349220,7.8958,S 13 | 903,1,"Jones, Mr. Charles Cresson",male,46,0,0,694,26,S 14 | 904,1,"Snyder, Mrs. John Pillsbury (Nelle Stevenson)",female,23,1,0,21228,82.2667,S 15 | 905,2,"Howard, Mr. Benjamin",male,63,1,0,24065,26,S 16 | 906,1,"Chaffee, Mrs. Herbert Fuller (Carrie Constance Toogood)",female,47,1,0,W.E.P. 5734,61.175,S 17 | 907,2,"del Carlo, Mrs. Sebastiano (Argenia Genovesi)",female,24,1,0,SC/PARIS 2167,27.7208,C 18 | 908,2,"Keane, Mr. Daniel",male,35,0,0,233734,12.35,Q 19 | 909,3,"Assaf, Mr. Gerios",male,21,0,0,2692,7.225,C 20 | 910,3,"Ilmakangas, Miss. Ida Livija",female,27,1,0,STON/O2. 3101270,7.925,S 21 | 911,3,"Assaf Khalil, Mrs. Mariana (Miriam"")""",female,45,0,0,2696,7.225,C 22 | 912,1,"Rothschild, Mr. Martin",male,55,1,0,PC 17603,59.4,C 23 | 913,3,"Olsen, Master. Artur Karl",male,9,0,1,C 17368,3.1708,S 24 | 914,1,"Flegenheim, Mrs. Alfred (Antoinette)",female,,0,0,PC 17598,31.6833,S 25 | 915,1,"Williams, Mr. Richard Norris II",male,21,0,1,PC 17597,61.3792,C 26 | 916,1,"Ryerson, Mrs. Arthur Larned (Emily Maria Borie)",female,48,1,3,PC 17608,262.375,C 27 | 917,3,"Robins, Mr. Alexander A",male,50,1,0,A/5. 3337,14.5,S 28 | 918,1,"Ostby, Miss. Helene Ragnhild",female,22,0,1,113509,61.9792,C 29 | 919,3,"Daher, Mr. Shedid",male,22.5,0,0,2698,7.225,C 30 | 920,1,"Brady, Mr. John Bertram",male,41,0,0,113054,30.5,S 31 | 921,3,"Samaan, Mr. Elias",male,,2,0,2662,21.6792,C 32 | 922,2,"Louch, Mr. Charles Alexander",male,50,1,0,SC/AH 3085,26,S 33 | 923,2,"Jefferys, Mr. Clifford Thomas",male,24,2,0,C.A. 31029,31.5,S 34 | 924,3,"Dean, Mrs. Bertram (Eva Georgetta Light)",female,33,1,2,C.A. 2315,20.575,S 35 | 925,3,"Johnston, Mrs. Andrew G (Elizabeth Lily"" Watson)""",female,,1,2,W./C. 6607,23.45,S 36 | 926,1,"Mock, Mr. Philipp Edmund",male,30,1,0,13236,57.75,C 37 | 927,3,"Katavelas, Mr. Vassilios (Catavelas Vassilios"")""",male,18.5,0,0,2682,7.2292,C 38 | 928,3,"Roth, Miss. Sarah A",female,,0,0,342712,8.05,S 39 | 929,3,"Cacic, Miss. Manda",female,21,0,0,315087,8.6625,S 40 | 930,3,"Sap, Mr. Julius",male,25,0,0,345768,9.5,S 41 | 931,3,"Hee, Mr. Ling",male,,0,0,1601,56.4958,S 42 | 932,3,"Karun, Mr. Franz",male,39,0,1,349256,13.4167,C 43 | 933,1,"Franklin, Mr. Thomas Parham",male,,0,0,113778,26.55,S 44 | 934,3,"Goldsmith, Mr. Nathan",male,41,0,0,SOTON/O.Q. 3101263,7.85,S 45 | 935,2,"Corbett, Mrs. Walter H (Irene Colvin)",female,30,0,0,237249,13,S 46 | 936,1,"Kimball, Mrs. Edwin Nelson Jr (Gertrude Parsons)",female,45,1,0,11753,52.5542,S 47 | 937,3,"Peltomaki, Mr. Nikolai Johannes",male,25,0,0,STON/O 2. 3101291,7.925,S 48 | 938,1,"Chevre, Mr. Paul Romaine",male,45,0,0,PC 17594,29.7,C 49 | 939,3,"Shaughnessy, Mr. Patrick",male,,0,0,370374,7.75,Q 50 | 940,1,"Bucknell, Mrs. William Robert (Emma Eliza Ward)",female,60,0,0,11813,76.2917,C 51 | 941,3,"Coutts, Mrs. William (Winnie Minnie"" Treanor)""",female,36,0,2,C.A. 37671,15.9,S 52 | 942,1,"Smith, Mr. Lucien Philip",male,24,1,0,13695,60,S 53 | 943,2,"Pulbaum, Mr. Franz",male,27,0,0,SC/PARIS 2168,15.0333,C 54 | 944,2,"Hocking, Miss. Ellen Nellie""""",female,20,2,1,29105,23,S 55 | 945,1,"Fortune, Miss. Ethel Flora",female,28,3,2,19950,263,S 56 | 946,2,"Mangiavacchi, Mr. Serafino Emilio",male,,0,0,SC/A.3 2861,15.5792,C 57 | 947,3,"Rice, Master. Albert",male,10,4,1,382652,29.125,Q 58 | 948,3,"Cor, Mr. Bartol",male,35,0,0,349230,7.8958,S 59 | 949,3,"Abelseth, Mr. Olaus Jorgensen",male,25,0,0,348122,7.65,S 60 | 950,3,"Davison, Mr. Thomas Henry",male,,1,0,386525,16.1,S 61 | 951,1,"Chaudanson, Miss. Victorine",female,36,0,0,PC 17608,262.375,C 62 | 952,3,"Dika, Mr. Mirko",male,17,0,0,349232,7.8958,S 63 | 953,2,"McCrae, Mr. Arthur Gordon",male,32,0,0,237216,13.5,S 64 | 954,3,"Bjorklund, Mr. Ernst Herbert",male,18,0,0,347090,7.75,S 65 | 955,3,"Bradley, Miss. Bridget Delia",female,22,0,0,334914,7.725,Q 66 | 956,1,"Ryerson, Master. John Borie",male,13,2,2,PC 17608,262.375,C 67 | 957,2,"Corey, Mrs. Percy C (Mary Phyllis Elizabeth Miller)",female,,0,0,F.C.C. 13534,21,S 68 | 958,3,"Burns, Miss. Mary Delia",female,18,0,0,330963,7.8792,Q 69 | 959,1,"Moore, Mr. Clarence Bloomfield",male,47,0,0,113796,42.4,S 70 | 960,1,"Tucker, Mr. Gilbert Milligan Jr",male,31,0,0,2543,28.5375,C 71 | 961,1,"Fortune, Mrs. Mark (Mary McDougald)",female,60,1,4,19950,263,S 72 | 962,3,"Mulvihill, Miss. Bertha E",female,24,0,0,382653,7.75,Q 73 | 963,3,"Minkoff, Mr. Lazar",male,21,0,0,349211,7.8958,S 74 | 964,3,"Nieminen, Miss. Manta Josefina",female,29,0,0,3101297,7.925,S 75 | 965,1,"Ovies y Rodriguez, Mr. Servando",male,28.5,0,0,PC 17562,27.7208,C 76 | 966,1,"Geiger, Miss. Amalie",female,35,0,0,113503,211.5,C 77 | 967,1,"Keeping, Mr. Edwin",male,32.5,0,0,113503,211.5,C 78 | 968,3,"Miles, Mr. Frank",male,,0,0,359306,8.05,S 79 | 969,1,"Cornell, Mrs. Robert Clifford (Malvina Helen Lamson)",female,55,2,0,11770,25.7,S 80 | 970,2,"Aldworth, Mr. Charles Augustus",male,30,0,0,248744,13,S 81 | 971,3,"Doyle, Miss. Elizabeth",female,24,0,0,368702,7.75,Q 82 | 972,3,"Boulos, Master. Akar",male,6,1,1,2678,15.2458,C 83 | 973,1,"Straus, Mr. Isidor",male,67,1,0,PC 17483,221.7792,S 84 | 974,1,"Case, Mr. Howard Brown",male,49,0,0,19924,26,S 85 | 975,3,"Demetri, Mr. Marinko",male,,0,0,349238,7.8958,S 86 | 976,2,"Lamb, Mr. John Joseph",male,,0,0,240261,10.7083,Q 87 | 977,3,"Khalil, Mr. Betros",male,,1,0,2660,14.4542,C 88 | 978,3,"Barry, Miss. Julia",female,27,0,0,330844,7.8792,Q 89 | 979,3,"Badman, Miss. Emily Louisa",female,18,0,0,A/4 31416,8.05,S 90 | 980,3,"O'Donoghue, Ms. Bridget",female,,0,0,364856,7.75,Q 91 | 981,2,"Wells, Master. Ralph Lester",male,2,1,1,29103,23,S 92 | 982,3,"Dyker, Mrs. Adolf Fredrik (Anna Elisabeth Judith Andersson)",female,22,1,0,347072,13.9,S 93 | 983,3,"Pedersen, Mr. Olaf",male,,0,0,345498,7.775,S 94 | 984,1,"Davidson, Mrs. Thornton (Orian Hays)",female,27,1,2,F.C. 12750,52,S 95 | 985,3,"Guest, Mr. Robert",male,,0,0,376563,8.05,S 96 | 986,1,"Birnbaum, Mr. Jakob",male,25,0,0,13905,26,C 97 | 987,3,"Tenglin, Mr. Gunnar Isidor",male,25,0,0,350033,7.7958,S 98 | 988,1,"Cavendish, Mrs. Tyrell William (Julia Florence Siegel)",female,76,1,0,19877,78.85,S 99 | 989,3,"Makinen, Mr. Kalle Edvard",male,29,0,0,STON/O 2. 3101268,7.925,S 100 | 990,3,"Braf, Miss. Elin Ester Maria",female,20,0,0,347471,7.8542,S 101 | 991,3,"Nancarrow, Mr. William Henry",male,33,0,0,A./5. 3338,8.05,S 102 | 992,1,"Stengel, Mrs. Charles Emil Henry (Annie May Morris)",female,43,1,0,11778,55.4417,C 103 | 993,2,"Weisz, Mr. Leopold",male,27,1,0,228414,26,S 104 | 994,3,"Foley, Mr. William",male,,0,0,365235,7.75,Q 105 | 995,3,"Johansson Palmquist, Mr. Oskar Leander",male,26,0,0,347070,7.775,S 106 | 996,3,"Thomas, Mrs. Alexander (Thamine Thelma"")""",female,16,1,1,2625,8.5167,C 107 | 997,3,"Holthen, Mr. Johan Martin",male,28,0,0,C 4001,22.525,S 108 | 998,3,"Buckley, Mr. Daniel",male,21,0,0,330920,7.8208,Q 109 | 999,3,"Ryan, Mr. Edward",male,,0,0,383162,7.75,Q 110 | 1000,3,"Willer, Mr. Aaron (Abi Weller"")""",male,,0,0,3410,8.7125,S 111 | 1001,2,"Swane, Mr. George",male,18.5,0,0,248734,13,S 112 | 1002,2,"Stanton, Mr. Samuel Ward",male,41,0,0,237734,15.0458,C 113 | 1003,3,"Shine, Miss. Ellen Natalia",female,,0,0,330968,7.7792,Q 114 | 1004,1,"Evans, Miss. Edith Corse",female,36,0,0,PC 17531,31.6792,C 115 | 1005,3,"Buckley, Miss. Katherine",female,18.5,0,0,329944,7.2833,Q 116 | 1006,1,"Straus, Mrs. Isidor (Rosalie Ida Blun)",female,63,1,0,PC 17483,221.7792,S 117 | 1007,3,"Chronopoulos, Mr. Demetrios",male,18,1,0,2680,14.4542,C 118 | 1008,3,"Thomas, Mr. John",male,,0,0,2681,6.4375,C 119 | 1009,3,"Sandstrom, Miss. Beatrice Irene",female,1,1,1,PP 9549,16.7,S 120 | 1010,1,"Beattie, Mr. Thomson",male,36,0,0,13050,75.2417,C 121 | 1011,2,"Chapman, Mrs. John Henry (Sara Elizabeth Lawry)",female,29,1,0,SC/AH 29037,26,S 122 | 1012,2,"Watt, Miss. Bertha J",female,12,0,0,C.A. 33595,15.75,S 123 | 1013,3,"Kiernan, Mr. John",male,,1,0,367227,7.75,Q 124 | 1014,1,"Schabert, Mrs. Paul (Emma Mock)",female,35,1,0,13236,57.75,C 125 | 1015,3,"Carver, Mr. Alfred John",male,28,0,0,392095,7.25,S 126 | 1016,3,"Kennedy, Mr. John",male,,0,0,368783,7.75,Q 127 | 1017,3,"Cribb, Miss. Laura Alice",female,17,0,1,371362,16.1,S 128 | 1018,3,"Brobeck, Mr. Karl Rudolf",male,22,0,0,350045,7.7958,S 129 | 1019,3,"McCoy, Miss. Alicia",female,,2,0,367226,23.25,Q 130 | 1020,2,"Bowenur, Mr. Solomon",male,42,0,0,211535,13,S 131 | 1021,3,"Petersen, Mr. Marius",male,24,0,0,342441,8.05,S 132 | 1022,3,"Spinner, Mr. Henry John",male,32,0,0,STON/OQ. 369943,8.05,S 133 | 1023,1,"Gracie, Col. Archibald IV",male,53,0,0,113780,28.5,C 134 | 1024,3,"Lefebre, Mrs. Frank (Frances)",female,,0,4,4133,25.4667,S 135 | 1025,3,"Thomas, Mr. Charles P",male,,1,0,2621,6.4375,C 136 | 1026,3,"Dintcheff, Mr. Valtcho",male,43,0,0,349226,7.8958,S 137 | 1027,3,"Carlsson, Mr. Carl Robert",male,24,0,0,350409,7.8542,S 138 | 1028,3,"Zakarian, Mr. Mapriededer",male,26.5,0,0,2656,7.225,C 139 | 1029,2,"Schmidt, Mr. August",male,26,0,0,248659,13,S 140 | 1030,3,"Drapkin, Miss. Jennie",female,23,0,0,SOTON/OQ 392083,8.05,S 141 | 1031,3,"Goodwin, Mr. Charles Frederick",male,40,1,6,CA 2144,46.9,S 142 | 1032,3,"Goodwin, Miss. Jessie Allis",female,10,5,2,CA 2144,46.9,S 143 | 1033,1,"Daniels, Miss. Sarah",female,33,0,0,113781,151.55,S 144 | 1034,1,"Ryerson, Mr. Arthur Larned",male,61,1,3,PC 17608,262.375,C 145 | 1035,2,"Beauchamp, Mr. Henry James",male,28,0,0,244358,26,S 146 | 1036,1,"Lindeberg-Lind, Mr. Erik Gustaf (Mr Edward Lingrey"")""",male,42,0,0,17475,26.55,S 147 | 1037,3,"Vander Planke, Mr. Julius",male,31,3,0,345763,18,S 148 | 1038,1,"Hilliard, Mr. Herbert Henry",male,,0,0,17463,51.8625,S 149 | 1039,3,"Davies, Mr. Evan",male,22,0,0,SC/A4 23568,8.05,S 150 | 1040,1,"Crafton, Mr. John Bertram",male,,0,0,113791,26.55,S 151 | 1041,2,"Lahtinen, Rev. William",male,30,1,1,250651,26,S 152 | 1042,1,"Earnshaw, Mrs. Boulton (Olive Potter)",female,23,0,1,11767,83.1583,C 153 | 1043,3,"Matinoff, Mr. Nicola",male,,0,0,349255,7.8958,C 154 | 1044,3,"Storey, Mr. Thomas",male,60.5,0,0,3701,,S 155 | 1045,3,"Klasen, Mrs. (Hulda Kristina Eugenia Lofqvist)",female,36,0,2,350405,12.1833,S 156 | 1046,3,"Asplund, Master. Filip Oscar",male,13,4,2,347077,31.3875,S 157 | 1047,3,"Duquemin, Mr. Joseph",male,24,0,0,S.O./P.P. 752,7.55,S 158 | 1048,1,"Bird, Miss. Ellen",female,29,0,0,PC 17483,221.7792,S 159 | 1049,3,"Lundin, Miss. Olga Elida",female,23,0,0,347469,7.8542,S 160 | 1050,1,"Borebank, Mr. John James",male,42,0,0,110489,26.55,S 161 | 1051,3,"Peacock, Mrs. Benjamin (Edith Nile)",female,26,0,2,SOTON/O.Q. 3101315,13.775,S 162 | 1052,3,"Smyth, Miss. Julia",female,,0,0,335432,7.7333,Q 163 | 1053,3,"Touma, Master. Georges Youssef",male,7,1,1,2650,15.2458,C 164 | 1054,2,"Wright, Miss. Marion",female,26,0,0,220844,13.5,S 165 | 1055,3,"Pearce, Mr. Ernest",male,,0,0,343271,7,S 166 | 1056,2,"Peruschitz, Rev. Joseph Maria",male,41,0,0,237393,13,S 167 | 1057,3,"Kink-Heilmann, Mrs. Anton (Luise Heilmann)",female,26,1,1,315153,22.025,S 168 | 1058,1,"Brandeis, Mr. Emil",male,48,0,0,PC 17591,50.4958,C 169 | 1059,3,"Ford, Mr. Edward Watson",male,18,2,2,W./C. 6608,34.375,S 170 | 1060,1,"Cassebeer, Mrs. Henry Arthur Jr (Eleanor Genevieve Fosdick)",female,,0,0,17770,27.7208,C 171 | 1061,3,"Hellstrom, Miss. Hilda Maria",female,22,0,0,7548,8.9625,S 172 | 1062,3,"Lithman, Mr. Simon",male,,0,0,S.O./P.P. 251,7.55,S 173 | 1063,3,"Zakarian, Mr. Ortin",male,27,0,0,2670,7.225,C 174 | 1064,3,"Dyker, Mr. Adolf Fredrik",male,23,1,0,347072,13.9,S 175 | 1065,3,"Torfa, Mr. Assad",male,,0,0,2673,7.2292,C 176 | 1066,3,"Asplund, Mr. Carl Oscar Vilhelm Gustafsson",male,40,1,5,347077,31.3875,S 177 | 1067,2,"Brown, Miss. Edith Eileen",female,15,0,2,29750,39,S 178 | 1068,2,"Sincock, Miss. Maude",female,20,0,0,C.A. 33112,36.75,S 179 | 1069,1,"Stengel, Mr. Charles Emil Henry",male,54,1,0,11778,55.4417,C 180 | 1070,2,"Becker, Mrs. Allen Oliver (Nellie E Baumgardner)",female,36,0,3,230136,39,S 181 | 1071,1,"Compton, Mrs. Alexander Taylor (Mary Eliza Ingersoll)",female,64,0,2,PC 17756,83.1583,C 182 | 1072,2,"McCrie, Mr. James Matthew",male,30,0,0,233478,13,S 183 | 1073,1,"Compton, Mr. Alexander Taylor Jr",male,37,1,1,PC 17756,83.1583,C 184 | 1074,1,"Marvin, Mrs. Daniel Warner (Mary Graham Carmichael Farquarson)",female,18,1,0,113773,53.1,S 185 | 1075,3,"Lane, Mr. Patrick",male,,0,0,7935,7.75,Q 186 | 1076,1,"Douglas, Mrs. Frederick Charles (Mary Helene Baxter)",female,27,1,1,PC 17558,247.5208,C 187 | 1077,2,"Maybery, Mr. Frank Hubert",male,40,0,0,239059,16,S 188 | 1078,2,"Phillips, Miss. Alice Frances Louisa",female,21,0,1,S.O./P.P. 2,21,S 189 | 1079,3,"Davies, Mr. Joseph",male,17,2,0,A/4 48873,8.05,S 190 | 1080,3,"Sage, Miss. Ada",female,,8,2,CA. 2343,69.55,S 191 | 1081,2,"Veal, Mr. James",male,40,0,0,28221,13,S 192 | 1082,2,"Angle, Mr. William A",male,34,1,0,226875,26,S 193 | 1083,1,"Salomon, Mr. Abraham L",male,,0,0,111163,26,S 194 | 1084,3,"van Billiard, Master. Walter John",male,11.5,1,1,A/5. 851,14.5,S 195 | 1085,2,"Lingane, Mr. John",male,61,0,0,235509,12.35,Q 196 | 1086,2,"Drew, Master. Marshall Brines",male,8,0,2,28220,32.5,S 197 | 1087,3,"Karlsson, Mr. Julius Konrad Eugen",male,33,0,0,347465,7.8542,S 198 | 1088,1,"Spedden, Master. Robert Douglas",male,6,0,2,16966,134.5,C 199 | 1089,3,"Nilsson, Miss. Berta Olivia",female,18,0,0,347066,7.775,S 200 | 1090,2,"Baimbrigge, Mr. Charles Robert",male,23,0,0,C.A. 31030,10.5,S 201 | 1091,3,"Rasmussen, Mrs. (Lena Jacobsen Solvang)",female,,0,0,65305,8.1125,S 202 | 1092,3,"Murphy, Miss. Nora",female,,0,0,36568,15.5,Q 203 | 1093,3,"Danbom, Master. Gilbert Sigvard Emanuel",male,0.33,0,2,347080,14.4,S 204 | 1094,1,"Astor, Col. John Jacob",male,47,1,0,PC 17757,227.525,C 205 | 1095,2,"Quick, Miss. Winifred Vera",female,8,1,1,26360,26,S 206 | 1096,2,"Andrew, Mr. Frank Thomas",male,25,0,0,C.A. 34050,10.5,S 207 | 1097,1,"Omont, Mr. Alfred Fernand",male,,0,0,F.C. 12998,25.7417,C 208 | 1098,3,"McGowan, Miss. Katherine",female,35,0,0,9232,7.75,Q 209 | 1099,2,"Collett, Mr. Sidney C Stuart",male,24,0,0,28034,10.5,S 210 | 1100,1,"Rosenbaum, Miss. Edith Louise",female,33,0,0,PC 17613,27.7208,C 211 | 1101,3,"Delalic, Mr. Redjo",male,25,0,0,349250,7.8958,S 212 | 1102,3,"Andersen, Mr. Albert Karvin",male,32,0,0,C 4001,22.525,S 213 | 1103,3,"Finoli, Mr. Luigi",male,,0,0,SOTON/O.Q. 3101308,7.05,S 214 | 1104,2,"Deacon, Mr. Percy William",male,17,0,0,S.O.C. 14879,73.5,S 215 | 1105,2,"Howard, Mrs. Benjamin (Ellen Truelove Arman)",female,60,1,0,24065,26,S 216 | 1106,3,"Andersson, Miss. Ida Augusta Margareta",female,38,4,2,347091,7.775,S 217 | 1107,1,"Head, Mr. Christopher",male,42,0,0,113038,42.5,S 218 | 1108,3,"Mahon, Miss. Bridget Delia",female,,0,0,330924,7.8792,Q 219 | 1109,1,"Wick, Mr. George Dennick",male,57,1,1,36928,164.8667,S 220 | 1110,1,"Widener, Mrs. George Dunton (Eleanor Elkins)",female,50,1,1,113503,211.5,C 221 | 1111,3,"Thomson, Mr. Alexander Morrison",male,,0,0,32302,8.05,S 222 | 1112,2,"Duran y More, Miss. Florentina",female,30,1,0,SC/PARIS 2148,13.8583,C 223 | 1113,3,"Reynolds, Mr. Harold J",male,21,0,0,342684,8.05,S 224 | 1114,2,"Cook, Mrs. (Selena Rogers)",female,22,0,0,W./C. 14266,10.5,S 225 | 1115,3,"Karlsson, Mr. Einar Gervasius",male,21,0,0,350053,7.7958,S 226 | 1116,1,"Candee, Mrs. Edward (Helen Churchill Hungerford)",female,53,0,0,PC 17606,27.4458,C 227 | 1117,3,"Moubarek, Mrs. George (Omine Amenia"" Alexander)""",female,,0,2,2661,15.2458,C 228 | 1118,3,"Asplund, Mr. Johan Charles",male,23,0,0,350054,7.7958,S 229 | 1119,3,"McNeill, Miss. Bridget",female,,0,0,370368,7.75,Q 230 | 1120,3,"Everett, Mr. Thomas James",male,40.5,0,0,C.A. 6212,15.1,S 231 | 1121,2,"Hocking, Mr. Samuel James Metcalfe",male,36,0,0,242963,13,S 232 | 1122,2,"Sweet, Mr. George Frederick",male,14,0,0,220845,65,S 233 | 1123,1,"Willard, Miss. Constance",female,21,0,0,113795,26.55,S 234 | 1124,3,"Wiklund, Mr. Karl Johan",male,21,1,0,3101266,6.4958,S 235 | 1125,3,"Linehan, Mr. Michael",male,,0,0,330971,7.8792,Q 236 | 1126,1,"Cumings, Mr. John Bradley",male,39,1,0,PC 17599,71.2833,C 237 | 1127,3,"Vendel, Mr. Olof Edvin",male,20,0,0,350416,7.8542,S 238 | 1128,1,"Warren, Mr. Frank Manley",male,64,1,0,110813,75.25,C 239 | 1129,3,"Baccos, Mr. Raffull",male,20,0,0,2679,7.225,C 240 | 1130,2,"Hiltunen, Miss. Marta",female,18,1,1,250650,13,S 241 | 1131,1,"Douglas, Mrs. Walter Donald (Mahala Dutton)",female,48,1,0,PC 17761,106.425,C 242 | 1132,1,"Lindstrom, Mrs. Carl Johan (Sigrid Posse)",female,55,0,0,112377,27.7208,C 243 | 1133,2,"Christy, Mrs. (Alice Frances)",female,45,0,2,237789,30,S 244 | 1134,1,"Spedden, Mr. Frederic Oakley",male,45,1,1,16966,134.5,C 245 | 1135,3,"Hyman, Mr. Abraham",male,,0,0,3470,7.8875,S 246 | 1136,3,"Johnston, Master. William Arthur Willie""""",male,,1,2,W./C. 6607,23.45,S 247 | 1137,1,"Kenyon, Mr. Frederick R",male,41,1,0,17464,51.8625,S 248 | 1138,2,"Karnes, Mrs. J Frank (Claire Bennett)",female,22,0,0,F.C.C. 13534,21,S 249 | 1139,2,"Drew, Mr. James Vivian",male,42,1,1,28220,32.5,S 250 | 1140,2,"Hold, Mrs. Stephen (Annie Margaret Hill)",female,29,1,0,26707,26,S 251 | 1141,3,"Khalil, Mrs. Betros (Zahie Maria"" Elias)""",female,,1,0,2660,14.4542,C 252 | 1142,2,"West, Miss. Barbara J",female,0.92,1,2,C.A. 34651,27.75,S 253 | 1143,3,"Abrahamsson, Mr. Abraham August Johannes",male,20,0,0,SOTON/O2 3101284,7.925,S 254 | 1144,1,"Clark, Mr. Walter Miller",male,27,1,0,13508,136.7792,C 255 | 1145,3,"Salander, Mr. Karl Johan",male,24,0,0,7266,9.325,S 256 | 1146,3,"Wenzel, Mr. Linhart",male,32.5,0,0,345775,9.5,S 257 | 1147,3,"MacKay, Mr. George William",male,,0,0,C.A. 42795,7.55,S 258 | 1148,3,"Mahon, Mr. John",male,,0,0,AQ/4 3130,7.75,Q 259 | 1149,3,"Niklasson, Mr. Samuel",male,28,0,0,363611,8.05,S 260 | 1150,2,"Bentham, Miss. Lilian W",female,19,0,0,28404,13,S 261 | 1151,3,"Midtsjo, Mr. Karl Albert",male,21,0,0,345501,7.775,S 262 | 1152,3,"de Messemaeker, Mr. Guillaume Joseph",male,36.5,1,0,345572,17.4,S 263 | 1153,3,"Nilsson, Mr. August Ferdinand",male,21,0,0,350410,7.8542,S 264 | 1154,2,"Wells, Mrs. Arthur Henry (Addie"" Dart Trevaskis)""",female,29,0,2,29103,23,S 265 | 1155,3,"Klasen, Miss. Gertrud Emilia",female,1,1,1,350405,12.1833,S 266 | 1156,2,"Portaluppi, Mr. Emilio Ilario Giuseppe",male,30,0,0,C.A. 34644,12.7375,C 267 | 1157,3,"Lyntakoff, Mr. Stanko",male,,0,0,349235,7.8958,S 268 | 1158,1,"Chisholm, Mr. Roderick Robert Crispin",male,,0,0,112051,0,S 269 | 1159,3,"Warren, Mr. Charles William",male,,0,0,C.A. 49867,7.55,S 270 | 1160,3,"Howard, Miss. May Elizabeth",female,,0,0,A. 2. 39186,8.05,S 271 | 1161,3,"Pokrnic, Mr. Mate",male,17,0,0,315095,8.6625,S 272 | 1162,1,"McCaffry, Mr. Thomas Francis",male,46,0,0,13050,75.2417,C 273 | 1163,3,"Fox, Mr. Patrick",male,,0,0,368573,7.75,Q 274 | 1164,1,"Clark, Mrs. Walter Miller (Virginia McDowell)",female,26,1,0,13508,136.7792,C 275 | 1165,3,"Lennon, Miss. Mary",female,,1,0,370371,15.5,Q 276 | 1166,3,"Saade, Mr. Jean Nassr",male,,0,0,2676,7.225,C 277 | 1167,2,"Bryhl, Miss. Dagmar Jenny Ingeborg ",female,20,1,0,236853,26,S 278 | 1168,2,"Parker, Mr. Clifford Richard",male,28,0,0,SC 14888,10.5,S 279 | 1169,2,"Faunthorpe, Mr. Harry",male,40,1,0,2926,26,S 280 | 1170,2,"Ware, Mr. John James",male,30,1,0,CA 31352,21,S 281 | 1171,2,"Oxenham, Mr. Percy Thomas",male,22,0,0,W./C. 14260,10.5,S 282 | 1172,3,"Oreskovic, Miss. Jelka",female,23,0,0,315085,8.6625,S 283 | 1173,3,"Peacock, Master. Alfred Edward",male,0.75,1,1,SOTON/O.Q. 3101315,13.775,S 284 | 1174,3,"Fleming, Miss. Honora",female,,0,0,364859,7.75,Q 285 | 1175,3,"Touma, Miss. Maria Youssef",female,9,1,1,2650,15.2458,C 286 | 1176,3,"Rosblom, Miss. Salli Helena",female,2,1,1,370129,20.2125,S 287 | 1177,3,"Dennis, Mr. William",male,36,0,0,A/5 21175,7.25,S 288 | 1178,3,"Franklin, Mr. Charles (Charles Fardon)",male,,0,0,SOTON/O.Q. 3101314,7.25,S 289 | 1179,1,"Snyder, Mr. John Pillsbury",male,24,1,0,21228,82.2667,S 290 | 1180,3,"Mardirosian, Mr. Sarkis",male,,0,0,2655,7.2292,C 291 | 1181,3,"Ford, Mr. Arthur",male,,0,0,A/5 1478,8.05,S 292 | 1182,1,"Rheims, Mr. George Alexander Lucien",male,,0,0,PC 17607,39.6,S 293 | 1183,3,"Daly, Miss. Margaret Marcella Maggie""""",female,30,0,0,382650,6.95,Q 294 | 1184,3,"Nasr, Mr. Mustafa",male,,0,0,2652,7.2292,C 295 | 1185,1,"Dodge, Dr. Washington",male,53,1,1,33638,81.8583,S 296 | 1186,3,"Wittevrongel, Mr. Camille",male,36,0,0,345771,9.5,S 297 | 1187,3,"Angheloff, Mr. Minko",male,26,0,0,349202,7.8958,S 298 | 1188,2,"Laroche, Miss. Louise",female,1,1,2,SC/Paris 2123,41.5792,C 299 | 1189,3,"Samaan, Mr. Hanna",male,,2,0,2662,21.6792,C 300 | 1190,1,"Loring, Mr. Joseph Holland",male,30,0,0,113801,45.5,S 301 | 1191,3,"Johansson, Mr. Nils",male,29,0,0,347467,7.8542,S 302 | 1192,3,"Olsson, Mr. Oscar Wilhelm",male,32,0,0,347079,7.775,S 303 | 1193,2,"Malachard, Mr. Noel",male,,0,0,237735,15.0458,C 304 | 1194,2,"Phillips, Mr. Escott Robert",male,43,0,1,S.O./P.P. 2,21,S 305 | 1195,3,"Pokrnic, Mr. Tome",male,24,0,0,315092,8.6625,S 306 | 1196,3,"McCarthy, Miss. Catherine Katie""""",female,,0,0,383123,7.75,Q 307 | 1197,1,"Crosby, Mrs. Edward Gifford (Catherine Elizabeth Halstead)",female,64,1,1,112901,26.55,S 308 | 1198,1,"Allison, Mr. Hudson Joshua Creighton",male,30,1,2,113781,151.55,S 309 | 1199,3,"Aks, Master. Philip Frank",male,0.83,0,1,392091,9.35,S 310 | 1200,1,"Hays, Mr. Charles Melville",male,55,1,1,12749,93.5,S 311 | 1201,3,"Hansen, Mrs. Claus Peter (Jennie L Howard)",female,45,1,0,350026,14.1083,S 312 | 1202,3,"Cacic, Mr. Jego Grga",male,18,0,0,315091,8.6625,S 313 | 1203,3,"Vartanian, Mr. David",male,22,0,0,2658,7.225,C 314 | 1204,3,"Sadowitz, Mr. Harry",male,,0,0,LP 1588,7.575,S 315 | 1205,3,"Carr, Miss. Jeannie",female,37,0,0,368364,7.75,Q 316 | 1206,1,"White, Mrs. John Stuart (Ella Holmes)",female,55,0,0,PC 17760,135.6333,C 317 | 1207,3,"Hagardon, Miss. Kate",female,17,0,0,AQ/3. 30631,7.7333,Q 318 | 1208,1,"Spencer, Mr. William Augustus",male,57,1,0,PC 17569,146.5208,C 319 | 1209,2,"Rogers, Mr. Reginald Harry",male,19,0,0,28004,10.5,S 320 | 1210,3,"Jonsson, Mr. Nils Hilding",male,27,0,0,350408,7.8542,S 321 | 1211,2,"Jefferys, Mr. Ernest Wilfred",male,22,2,0,C.A. 31029,31.5,S 322 | 1212,3,"Andersson, Mr. Johan Samuel",male,26,0,0,347075,7.775,S 323 | 1213,3,"Krekorian, Mr. Neshan",male,25,0,0,2654,7.2292,C 324 | 1214,2,"Nesson, Mr. Israel",male,26,0,0,244368,13,S 325 | 1215,1,"Rowe, Mr. Alfred G",male,33,0,0,113790,26.55,S 326 | 1216,1,"Kreuchen, Miss. Emilie",female,39,0,0,24160,211.3375,S 327 | 1217,3,"Assam, Mr. Ali",male,23,0,0,SOTON/O.Q. 3101309,7.05,S 328 | 1218,2,"Becker, Miss. Ruth Elizabeth",female,12,2,1,230136,39,S 329 | 1219,1,"Rosenshine, Mr. George (Mr George Thorne"")""",male,46,0,0,PC 17585,79.2,C 330 | 1220,2,"Clarke, Mr. Charles Valentine",male,29,1,0,2003,26,S 331 | 1221,2,"Enander, Mr. Ingvar",male,21,0,0,236854,13,S 332 | 1222,2,"Davies, Mrs. John Morgan (Elizabeth Agnes Mary White) ",female,48,0,2,C.A. 33112,36.75,S 333 | 1223,1,"Dulles, Mr. William Crothers",male,39,0,0,PC 17580,29.7,C 334 | 1224,3,"Thomas, Mr. Tannous",male,,0,0,2684,7.225,C 335 | 1225,3,"Nakid, Mrs. Said (Waika Mary"" Mowad)""",female,19,1,1,2653,15.7417,C 336 | 1226,3,"Cor, Mr. Ivan",male,27,0,0,349229,7.8958,S 337 | 1227,1,"Maguire, Mr. John Edward",male,30,0,0,110469,26,S 338 | 1228,2,"de Brito, Mr. Jose Joaquim",male,32,0,0,244360,13,S 339 | 1229,3,"Elias, Mr. Joseph",male,39,0,2,2675,7.2292,C 340 | 1230,2,"Denbury, Mr. Herbert",male,25,0,0,C.A. 31029,31.5,S 341 | 1231,3,"Betros, Master. Seman",male,,0,0,2622,7.2292,C 342 | 1232,2,"Fillbrook, Mr. Joseph Charles",male,18,0,0,C.A. 15185,10.5,S 343 | 1233,3,"Lundstrom, Mr. Thure Edvin",male,32,0,0,350403,7.5792,S 344 | 1234,3,"Sage, Mr. John George",male,,1,9,CA. 2343,69.55,S 345 | 1235,1,"Cardeza, Mrs. James Warburton Martinez (Charlotte Wardle Drake)",female,58,0,1,PC 17755,512.3292,C 346 | 1236,3,"van Billiard, Master. James William",male,,1,1,A/5. 851,14.5,S 347 | 1237,3,"Abelseth, Miss. Karen Marie",female,16,0,0,348125,7.65,S 348 | 1238,2,"Botsford, Mr. William Hull",male,26,0,0,237670,13,S 349 | 1239,3,"Whabee, Mrs. George Joseph (Shawneene Abi-Saab)",female,38,0,0,2688,7.2292,C 350 | 1240,2,"Giles, Mr. Ralph",male,24,0,0,248726,13.5,S 351 | 1241,2,"Walcroft, Miss. Nellie",female,31,0,0,F.C.C. 13528,21,S 352 | 1242,1,"Greenfield, Mrs. Leo David (Blanche Strouse)",female,45,0,1,PC 17759,63.3583,C 353 | 1243,2,"Stokes, Mr. Philip Joseph",male,25,0,0,F.C.C. 13540,10.5,S 354 | 1244,2,"Dibden, Mr. William",male,18,0,0,S.O.C. 14879,73.5,S 355 | 1245,2,"Herman, Mr. Samuel",male,49,1,2,220845,65,S 356 | 1246,3,"Dean, Miss. Elizabeth Gladys Millvina""""",female,0.17,1,2,C.A. 2315,20.575,S 357 | 1247,1,"Julian, Mr. Henry Forbes",male,50,0,0,113044,26,S 358 | 1248,1,"Brown, Mrs. John Murray (Caroline Lane Lamson)",female,59,2,0,11769,51.4792,S 359 | 1249,3,"Lockyer, Mr. Edward",male,,0,0,1222,7.8792,S 360 | 1250,3,"O'Keefe, Mr. Patrick",male,,0,0,368402,7.75,Q 361 | 1251,3,"Lindell, Mrs. Edvard Bengtsson (Elin Gerda Persson)",female,30,1,0,349910,15.55,S 362 | 1252,3,"Sage, Master. William Henry",male,14.5,8,2,CA. 2343,69.55,S 363 | 1253,2,"Mallet, Mrs. Albert (Antoinette Magnin)",female,24,1,1,S.C./PARIS 2079,37.0042,C 364 | 1254,2,"Ware, Mrs. John James (Florence Louise Long)",female,31,0,0,CA 31352,21,S 365 | 1255,3,"Strilic, Mr. Ivan",male,27,0,0,315083,8.6625,S 366 | 1256,1,"Harder, Mrs. George Achilles (Dorothy Annan)",female,25,1,0,11765,55.4417,C 367 | 1257,3,"Sage, Mrs. John (Annie Bullen)",female,,1,9,CA. 2343,69.55,S 368 | 1258,3,"Caram, Mr. Joseph",male,,1,0,2689,14.4583,C 369 | 1259,3,"Riihivouri, Miss. Susanna Juhantytar Sanni""""",female,22,0,0,3101295,39.6875,S 370 | 1260,1,"Gibson, Mrs. Leonard (Pauline C Boeson)",female,45,0,1,112378,59.4,C 371 | 1261,2,"Pallas y Castello, Mr. Emilio",male,29,0,0,SC/PARIS 2147,13.8583,C 372 | 1262,2,"Giles, Mr. Edgar",male,21,1,0,28133,11.5,S 373 | 1263,1,"Wilson, Miss. Helen Alice",female,31,0,0,16966,134.5,C 374 | 1264,1,"Ismay, Mr. Joseph Bruce",male,49,0,0,112058,0,S 375 | 1265,2,"Harbeck, Mr. William H",male,44,0,0,248746,13,S 376 | 1266,1,"Dodge, Mrs. Washington (Ruth Vidaver)",female,54,1,1,33638,81.8583,S 377 | 1267,1,"Bowen, Miss. Grace Scott",female,45,0,0,PC 17608,262.375,C 378 | 1268,3,"Kink, Miss. Maria",female,22,2,0,315152,8.6625,S 379 | 1269,2,"Cotterill, Mr. Henry Harry""""",male,21,0,0,29107,11.5,S 380 | 1270,1,"Hipkins, Mr. William Edward",male,55,0,0,680,50,S 381 | 1271,3,"Asplund, Master. Carl Edgar",male,5,4,2,347077,31.3875,S 382 | 1272,3,"O'Connor, Mr. Patrick",male,,0,0,366713,7.75,Q 383 | 1273,3,"Foley, Mr. Joseph",male,26,0,0,330910,7.8792,Q 384 | 1274,3,"Risien, Mrs. Samuel (Emma)",female,,0,0,364498,14.5,S 385 | 1275,3,"McNamee, Mrs. Neal (Eileen O'Leary)",female,19,1,0,376566,16.1,S 386 | 1276,2,"Wheeler, Mr. Edwin Frederick""""",male,,0,0,SC/PARIS 2159,12.875,S 387 | 1277,2,"Herman, Miss. Kate",female,24,1,2,220845,65,S 388 | 1278,3,"Aronsson, Mr. Ernst Axel Algot",male,24,0,0,349911,7.775,S 389 | 1279,2,"Ashby, Mr. John",male,57,0,0,244346,13,S 390 | 1280,3,"Canavan, Mr. Patrick",male,21,0,0,364858,7.75,Q 391 | 1281,3,"Palsson, Master. Paul Folke",male,6,3,1,349909,21.075,S 392 | 1282,1,"Payne, Mr. Vivian Ponsonby",male,23,0,0,12749,93.5,S 393 | 1283,1,"Lines, Mrs. Ernest H (Elizabeth Lindsey James)",female,51,0,1,PC 17592,39.4,S 394 | 1284,3,"Abbott, Master. Eugene Joseph",male,13,0,2,C.A. 2673,20.25,S 395 | 1285,2,"Gilbert, Mr. William",male,47,0,0,C.A. 30769,10.5,S 396 | 1286,3,"Kink-Heilmann, Mr. Anton",male,29,3,1,315153,22.025,S 397 | 1287,1,"Smith, Mrs. Lucien Philip (Mary Eloise Hughes)",female,18,1,0,13695,60,S 398 | 1288,3,"Colbert, Mr. Patrick",male,24,0,0,371109,7.25,Q 399 | 1289,1,"Frolicher-Stehli, Mrs. Maxmillian (Margaretha Emerentia Stehli)",female,48,1,1,13567,79.2,C 400 | 1290,3,"Larsson-Rondberg, Mr. Edvard A",male,22,0,0,347065,7.775,S 401 | 1291,3,"Conlon, Mr. Thomas Henry",male,31,0,0,21332,7.7333,Q 402 | 1292,1,"Bonnell, Miss. Caroline",female,30,0,0,36928,164.8667,S 403 | 1293,2,"Gale, Mr. Harry",male,38,1,0,28664,21,S 404 | 1294,1,"Gibson, Miss. Dorothy Winifred",female,22,0,1,112378,59.4,C 405 | 1295,1,"Carrau, Mr. Jose Pedro",male,17,0,0,113059,47.1,S 406 | 1296,1,"Frauenthal, Mr. Isaac Gerald",male,43,1,0,17765,27.7208,C 407 | 1297,2,"Nourney, Mr. Alfred (Baron von Drachstedt"")""",male,20,0,0,SC/PARIS 2166,13.8625,C 408 | 1298,2,"Ware, Mr. William Jeffery",male,23,1,0,28666,10.5,S 409 | 1299,1,"Widener, Mr. George Dunton",male,50,1,1,113503,211.5,C 410 | 1300,3,"Riordan, Miss. Johanna Hannah""""",female,,0,0,334915,7.7208,Q 411 | 1301,3,"Peacock, Miss. Treasteall",female,3,1,1,SOTON/O.Q. 3101315,13.775,S 412 | 1302,3,"Naughton, Miss. Hannah",female,,0,0,365237,7.75,Q 413 | 1303,1,"Minahan, Mrs. William Edward (Lillian E Thorpe)",female,37,1,0,19928,90,Q 414 | 1304,3,"Henriksson, Miss. Jenny Lovisa",female,28,0,0,347086,7.775,S 415 | 1305,3,"Spector, Mr. Woolf",male,,0,0,A.5. 3236,8.05,S 416 | 1306,1,"Oliva y Ocana, Dona. Fermina",female,39,0,0,PC 17758,108.9,C 417 | 1307,3,"Saether, Mr. Simon Sivertsen",male,38.5,0,0,SOTON/O.Q. 3101262,7.25,S 418 | 1308,3,"Ware, Mr. Frederick",male,,0,0,359309,8.05,S 419 | 1309,3,"Peter, Master. Michael J",male,,1,1,2668,22.3583,C 420 | -------------------------------------------------------------------------------- /titanic/train.csv: -------------------------------------------------------------------------------- 1 | PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Embarked 2 | 1,0,3,"Braund, Mr. Owen Harris",male,22,1,0,A/5 21171,7.25,S 3 | 2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Thayer)",female,38,1,0,PC 17599,71.2833,C 4 | 3,1,3,"Heikkinen, Miss. Laina",female,26,0,0,STON/O2. 3101282,7.925,S 5 | 4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35,1,0,113803,53.1,S 6 | 5,0,3,"Allen, Mr. William Henry",male,35,0,0,373450,8.05,S 7 | 6,0,3,"Moran, Mr. James",male,,0,0,330877,8.4583,Q 8 | 7,0,1,"McCarthy, Mr. Timothy J",male,54,0,0,17463,51.8625,S 9 | 8,0,3,"Palsson, Master. Gosta Leonard",male,2,3,1,349909,21.075,S 10 | 9,1,3,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,27,0,2,347742,11.1333,S 11 | 10,1,2,"Nasser, Mrs. Nicholas (Adele Achem)",female,14,1,0,237736,30.0708,C 12 | 11,1,3,"Sandstrom, Miss. Marguerite Rut",female,4,1,1,PP 9549,16.7,S 13 | 12,1,1,"Bonnell, Miss. Elizabeth",female,58,0,0,113783,26.55,S 14 | 13,0,3,"Saundercock, Mr. William Henry",male,20,0,0,A/5. 2151,8.05,S 15 | 14,0,3,"Andersson, Mr. Anders Johan",male,39,1,5,347082,31.275,S 16 | 15,0,3,"Vestrom, Miss. Hulda Amanda Adolfina",female,14,0,0,350406,7.8542,S 17 | 16,1,2,"Hewlett, Mrs. (Mary D Kingcome) ",female,55,0,0,248706,16,S 18 | 17,0,3,"Rice, Master. Eugene",male,2,4,1,382652,29.125,Q 19 | 18,1,2,"Williams, Mr. Charles Eugene",male,,0,0,244373,13,S 20 | 19,0,3,"Vander Planke, Mrs. Julius (Emelia Maria Vandemoortele)",female,31,1,0,345763,18,S 21 | 20,1,3,"Masselmani, Mrs. Fatima",female,,0,0,2649,7.225,C 22 | 21,0,2,"Fynney, Mr. Joseph J",male,35,0,0,239865,26,S 23 | 22,1,2,"Beesley, Mr. Lawrence",male,34,0,0,248698,13,S 24 | 23,1,3,"McGowan, Miss. Anna ""Annie""",female,15,0,0,330923,8.0292,Q 25 | 24,1,1,"Sloper, Mr. William Thompson",male,28,0,0,113788,35.5,S 26 | 25,0,3,"Palsson, Miss. Torborg Danira",female,8,3,1,349909,21.075,S 27 | 26,1,3,"Asplund, Mrs. Carl Oscar (Selma Augusta Emilia Johansson)",female,38,1,5,347077,31.3875,S 28 | 27,0,3,"Emir, Mr. Farred Chehab",male,,0,0,2631,7.225,C 29 | 28,0,1,"Fortune, Mr. Charles Alexander",male,19,3,2,19950,263,S 30 | 29,1,3,"O'Dwyer, Miss. Ellen ""Nellie""",female,,0,0,330959,7.8792,Q 31 | 30,0,3,"Todoroff, Mr. Lalio",male,,0,0,349216,7.8958,S 32 | 31,0,1,"Uruchurtu, Don. Manuel E",male,40,0,0,PC 17601,27.7208,C 33 | 32,1,1,"Spencer, Mrs. William Augustus (Marie Eugenie)",female,,1,0,PC 17569,146.5208,C 34 | 33,1,3,"Glynn, Miss. Mary Agatha",female,,0,0,335677,7.75,Q 35 | 34,0,2,"Wheadon, Mr. Edward H",male,66,0,0,C.A. 24579,10.5,S 36 | 35,0,1,"Meyer, Mr. Edgar Joseph",male,28,1,0,PC 17604,82.1708,C 37 | 36,0,1,"Holverson, Mr. Alexander Oskar",male,42,1,0,113789,52,S 38 | 37,1,3,"Mamee, Mr. Hanna",male,,0,0,2677,7.2292,C 39 | 38,0,3,"Cann, Mr. Ernest Charles",male,21,0,0,A./5. 2152,8.05,S 40 | 39,0,3,"Vander Planke, Miss. Augusta Maria",female,18,2,0,345764,18,S 41 | 40,1,3,"Nicola-Yarred, Miss. Jamila",female,14,1,0,2651,11.2417,C 42 | 41,0,3,"Ahlin, Mrs. Johan (Johanna Persdotter Larsson)",female,40,1,0,7546,9.475,S 43 | 42,0,2,"Turpin, Mrs. William John Robert (Dorothy Ann Wonnacott)",female,27,1,0,11668,21,S 44 | 43,0,3,"Kraeff, Mr. Theodor",male,,0,0,349253,7.8958,C 45 | 44,1,2,"Laroche, Miss. Simonne Marie Anne Andree",female,3,1,2,SC/Paris 2123,41.5792,C 46 | 45,1,3,"Devaney, Miss. Margaret Delia",female,19,0,0,330958,7.8792,Q 47 | 46,0,3,"Rogers, Mr. William John",male,,0,0,S.C./A.4. 23567,8.05,S 48 | 47,0,3,"Lennon, Mr. Denis",male,,1,0,370371,15.5,Q 49 | 48,1,3,"O'Driscoll, Miss. Bridget",female,,0,0,14311,7.75,Q 50 | 49,0,3,"Samaan, Mr. Youssef",male,,2,0,2662,21.6792,C 51 | 50,0,3,"Arnold-Franchi, Mrs. Josef (Josefine Franchi)",female,18,1,0,349237,17.8,S 52 | 51,0,3,"Panula, Master. Juha Niilo",male,7,4,1,3101295,39.6875,S 53 | 52,0,3,"Nosworthy, Mr. Richard Cater",male,21,0,0,A/4. 39886,7.8,S 54 | 53,1,1,"Harper, Mrs. Henry Sleeper (Myna Haxtun)",female,49,1,0,PC 17572,76.7292,C 55 | 54,1,2,"Faunthorpe, Mrs. Lizzie (Elizabeth Anne Wilkinson)",female,29,1,0,2926,26,S 56 | 55,0,1,"Ostby, Mr. Engelhart Cornelius",male,65,0,1,113509,61.9792,C 57 | 56,1,1,"Woolner, Mr. Hugh",male,,0,0,19947,35.5,S 58 | 57,1,2,"Rugg, Miss. Emily",female,21,0,0,C.A. 31026,10.5,S 59 | 58,0,3,"Novel, Mr. Mansouer",male,28.5,0,0,2697,7.2292,C 60 | 59,1,2,"West, Miss. Constance Mirium",female,5,1,2,C.A. 34651,27.75,S 61 | 60,0,3,"Goodwin, Master. William Frederick",male,11,5,2,CA 2144,46.9,S 62 | 61,0,3,"Sirayanian, Mr. Orsen",male,22,0,0,2669,7.2292,C 63 | 62,1,1,"Icard, Miss. Amelie",female,38,0,0,113572,80, 64 | 63,0,1,"Harris, Mr. Henry Birkhardt",male,45,1,0,36973,83.475,S 65 | 64,0,3,"Skoog, Master. Harald",male,4,3,2,347088,27.9,S 66 | 65,0,1,"Stewart, Mr. Albert A",male,,0,0,PC 17605,27.7208,C 67 | 66,1,3,"Moubarek, Master. Gerios",male,,1,1,2661,15.2458,C 68 | 67,1,2,"Nye, Mrs. (Elizabeth Ramell)",female,29,0,0,C.A. 29395,10.5,S 69 | 68,0,3,"Crease, Mr. Ernest James",male,19,0,0,S.P. 3464,8.1583,S 70 | 69,1,3,"Andersson, Miss. Erna Alexandra",female,17,4,2,3101281,7.925,S 71 | 70,0,3,"Kink, Mr. Vincenz",male,26,2,0,315151,8.6625,S 72 | 71,0,2,"Jenkin, Mr. Stephen Curnow",male,32,0,0,C.A. 33111,10.5,S 73 | 72,0,3,"Goodwin, Miss. Lillian Amy",female,16,5,2,CA 2144,46.9,S 74 | 73,0,2,"Hood, Mr. Ambrose Jr",male,21,0,0,S.O.C. 14879,73.5,S 75 | 74,0,3,"Chronopoulos, Mr. Apostolos",male,26,1,0,2680,14.4542,C 76 | 75,1,3,"Bing, Mr. Lee",male,32,0,0,1601,56.4958,S 77 | 76,0,3,"Moen, Mr. Sigurd Hansen",male,25,0,0,348123,7.65,S 78 | 77,0,3,"Staneff, Mr. Ivan",male,,0,0,349208,7.8958,S 79 | 78,0,3,"Moutal, Mr. Rahamin Haim",male,,0,0,374746,8.05,S 80 | 79,1,2,"Caldwell, Master. Alden Gates",male,0.83,0,2,248738,29,S 81 | 80,1,3,"Dowdell, Miss. Elizabeth",female,30,0,0,364516,12.475,S 82 | 81,0,3,"Waelens, Mr. Achille",male,22,0,0,345767,9,S 83 | 82,1,3,"Sheerlinck, Mr. Jan Baptist",male,29,0,0,345779,9.5,S 84 | 83,1,3,"McDermott, Miss. Brigdet Delia",female,,0,0,330932,7.7875,Q 85 | 84,0,1,"Carrau, Mr. Francisco M",male,28,0,0,113059,47.1,S 86 | 85,1,2,"Ilett, Miss. Bertha",female,17,0,0,SO/C 14885,10.5,S 87 | 86,1,3,"Backstrom, Mrs. Karl Alfred (Maria Mathilda Gustafsson)",female,33,3,0,3101278,15.85,S 88 | 87,0,3,"Ford, Mr. William Neal",male,16,1,3,W./C. 6608,34.375,S 89 | 88,0,3,"Slocovski, Mr. Selman Francis",male,,0,0,SOTON/OQ 392086,8.05,S 90 | 89,1,1,"Fortune, Miss. Mabel Helen",female,23,3,2,19950,263,S 91 | 90,0,3,"Celotti, Mr. Francesco",male,24,0,0,343275,8.05,S 92 | 91,0,3,"Christmann, Mr. Emil",male,29,0,0,343276,8.05,S 93 | 92,0,3,"Andreasson, Mr. Paul Edvin",male,20,0,0,347466,7.8542,S 94 | 93,0,1,"Chaffee, Mr. Herbert Fuller",male,46,1,0,W.E.P. 5734,61.175,S 95 | 94,0,3,"Dean, Mr. Bertram Frank",male,26,1,2,C.A. 2315,20.575,S 96 | 95,0,3,"Coxon, Mr. Daniel",male,59,0,0,364500,7.25,S 97 | 96,0,3,"Shorney, Mr. Charles Joseph",male,,0,0,374910,8.05,S 98 | 97,0,1,"Goldschmidt, Mr. George B",male,71,0,0,PC 17754,34.6542,C 99 | 98,1,1,"Greenfield, Mr. William Bertram",male,23,0,1,PC 17759,63.3583,C 100 | 99,1,2,"Doling, Mrs. John T (Ada Julia Bone)",female,34,0,1,231919,23,S 101 | 100,0,2,"Kantor, Mr. Sinai",male,34,1,0,244367,26,S 102 | 101,0,3,"Petranec, Miss. Matilda",female,28,0,0,349245,7.8958,S 103 | 102,0,3,"Petroff, Mr. Pastcho (""Pentcho"")",male,,0,0,349215,7.8958,S 104 | 103,0,1,"White, Mr. Richard Frasar",male,21,0,1,35281,77.2875,S 105 | 104,0,3,"Johansson, Mr. Gustaf Joel",male,33,0,0,7540,8.6542,S 106 | 105,0,3,"Gustafsson, Mr. Anders Vilhelm",male,37,2,0,3101276,7.925,S 107 | 106,0,3,"Mionoff, Mr. Stoytcho",male,28,0,0,349207,7.8958,S 108 | 107,1,3,"Salkjelsvik, Miss. Anna Kristine",female,21,0,0,343120,7.65,S 109 | 108,1,3,"Moss, Mr. Albert Johan",male,,0,0,312991,7.775,S 110 | 109,0,3,"Rekic, Mr. Tido",male,38,0,0,349249,7.8958,S 111 | 110,1,3,"Moran, Miss. Bertha",female,,1,0,371110,24.15,Q 112 | 111,0,1,"Porter, Mr. Walter Chamberlain",male,47,0,0,110465,52,S 113 | 112,0,3,"Zabour, Miss. Hileni",female,14.5,1,0,2665,14.4542,C 114 | 113,0,3,"Barton, Mr. David John",male,22,0,0,324669,8.05,S 115 | 114,0,3,"Jussila, Miss. Katriina",female,20,1,0,4136,9.825,S 116 | 115,0,3,"Attalah, Miss. Malake",female,17,0,0,2627,14.4583,C 117 | 116,0,3,"Pekoniemi, Mr. Edvard",male,21,0,0,STON/O 2. 3101294,7.925,S 118 | 117,0,3,"Connors, Mr. Patrick",male,70.5,0,0,370369,7.75,Q 119 | 118,0,2,"Turpin, Mr. William John Robert",male,29,1,0,11668,21,S 120 | 119,0,1,"Baxter, Mr. Quigg Edmond",male,24,0,1,PC 17558,247.5208,C 121 | 120,0,3,"Andersson, Miss. Ellis Anna Maria",female,2,4,2,347082,31.275,S 122 | 121,0,2,"Hickman, Mr. Stanley George",male,21,2,0,S.O.C. 14879,73.5,S 123 | 122,0,3,"Moore, Mr. Leonard Charles",male,,0,0,A4. 54510,8.05,S 124 | 123,0,2,"Nasser, Mr. Nicholas",male,32.5,1,0,237736,30.0708,C 125 | 124,1,2,"Webber, Miss. Susan",female,32.5,0,0,27267,13,S 126 | 125,0,1,"White, Mr. Percival Wayland",male,54,0,1,35281,77.2875,S 127 | 126,1,3,"Nicola-Yarred, Master. Elias",male,12,1,0,2651,11.2417,C 128 | 127,0,3,"McMahon, Mr. Martin",male,,0,0,370372,7.75,Q 129 | 128,1,3,"Madsen, Mr. Fridtjof Arne",male,24,0,0,C 17369,7.1417,S 130 | 129,1,3,"Peter, Miss. Anna",female,,1,1,2668,22.3583,C 131 | 130,0,3,"Ekstrom, Mr. Johan",male,45,0,0,347061,6.975,S 132 | 131,0,3,"Drazenoic, Mr. Jozef",male,33,0,0,349241,7.8958,C 133 | 132,0,3,"Coelho, Mr. Domingos Fernandeo",male,20,0,0,SOTON/O.Q. 3101307,7.05,S 134 | 133,0,3,"Robins, Mrs. Alexander A (Grace Charity Laury)",female,47,1,0,A/5. 3337,14.5,S 135 | 134,1,2,"Weisz, Mrs. Leopold (Mathilde Francoise Pede)",female,29,1,0,228414,26,S 136 | 135,0,2,"Sobey, Mr. Samuel James Hayden",male,25,0,0,C.A. 29178,13,S 137 | 136,0,2,"Richard, Mr. Emile",male,23,0,0,SC/PARIS 2133,15.0458,C 138 | 137,1,1,"Newsom, Miss. Helen Monypeny",female,19,0,2,11752,26.2833,S 139 | 138,0,1,"Futrelle, Mr. Jacques Heath",male,37,1,0,113803,53.1,S 140 | 139,0,3,"Osen, Mr. Olaf Elon",male,16,0,0,7534,9.2167,S 141 | 140,0,1,"Giglio, Mr. Victor",male,24,0,0,PC 17593,79.2,C 142 | 141,0,3,"Boulos, Mrs. Joseph (Sultana)",female,,0,2,2678,15.2458,C 143 | 142,1,3,"Nysten, Miss. Anna Sofia",female,22,0,0,347081,7.75,S 144 | 143,1,3,"Hakkarainen, Mrs. Pekka Pietari (Elin Matilda Dolck)",female,24,1,0,STON/O2. 3101279,15.85,S 145 | 144,0,3,"Burke, Mr. Jeremiah",male,19,0,0,365222,6.75,Q 146 | 145,0,2,"Andrew, Mr. Edgardo Samuel",male,18,0,0,231945,11.5,S 147 | 146,0,2,"Nicholls, Mr. Joseph Charles",male,19,1,1,C.A. 33112,36.75,S 148 | 147,1,3,"Andersson, Mr. August Edvard (""Wennerstrom"")",male,27,0,0,350043,7.7958,S 149 | 148,0,3,"Ford, Miss. Robina Maggie ""Ruby""",female,9,2,2,W./C. 6608,34.375,S 150 | 149,0,2,"Navratil, Mr. Michel (""Louis M Hoffman"")",male,36.5,0,2,230080,26,S 151 | 150,0,2,"Byles, Rev. Thomas Roussel Davids",male,42,0,0,244310,13,S 152 | 151,0,2,"Bateman, Rev. Robert James",male,51,0,0,S.O.P. 1166,12.525,S 153 | 152,1,1,"Pears, Mrs. Thomas (Edith Wearne)",female,22,1,0,113776,66.6,S 154 | 153,0,3,"Meo, Mr. Alfonzo",male,55.5,0,0,A.5. 11206,8.05,S 155 | 154,0,3,"van Billiard, Mr. Austin Blyler",male,40.5,0,2,A/5. 851,14.5,S 156 | 155,0,3,"Olsen, Mr. Ole Martin",male,,0,0,Fa 265302,7.3125,S 157 | 156,0,1,"Williams, Mr. Charles Duane",male,51,0,1,PC 17597,61.3792,C 158 | 157,1,3,"Gilnagh, Miss. Katherine ""Katie""",female,16,0,0,35851,7.7333,Q 159 | 158,0,3,"Corn, Mr. Harry",male,30,0,0,SOTON/OQ 392090,8.05,S 160 | 159,0,3,"Smiljanic, Mr. Mile",male,,0,0,315037,8.6625,S 161 | 160,0,3,"Sage, Master. Thomas Henry",male,,8,2,CA. 2343,69.55,S 162 | 161,0,3,"Cribb, Mr. John Hatfield",male,44,0,1,371362,16.1,S 163 | 162,1,2,"Watt, Mrs. James (Elizabeth ""Bessie"" Inglis Milne)",female,40,0,0,C.A. 33595,15.75,S 164 | 163,0,3,"Bengtsson, Mr. John Viktor",male,26,0,0,347068,7.775,S 165 | 164,0,3,"Calic, Mr. Jovo",male,17,0,0,315093,8.6625,S 166 | 165,0,3,"Panula, Master. Eino Viljami",male,1,4,1,3101295,39.6875,S 167 | 166,1,3,"Goldsmith, Master. Frank John William ""Frankie""",male,9,0,2,363291,20.525,S 168 | 167,1,1,"Chibnall, Mrs. (Edith Martha Bowerman)",female,,0,1,113505,55,S 169 | 168,0,3,"Skoog, Mrs. William (Anna Bernhardina Karlsson)",female,45,1,4,347088,27.9,S 170 | 169,0,1,"Baumann, Mr. John D",male,,0,0,PC 17318,25.925,S 171 | 170,0,3,"Ling, Mr. Lee",male,28,0,0,1601,56.4958,S 172 | 171,0,1,"Van der hoef, Mr. Wyckoff",male,61,0,0,111240,33.5,S 173 | 172,0,3,"Rice, Master. Arthur",male,4,4,1,382652,29.125,Q 174 | 173,1,3,"Johnson, Miss. Eleanor Ileen",female,1,1,1,347742,11.1333,S 175 | 174,0,3,"Sivola, Mr. Antti Wilhelm",male,21,0,0,STON/O 2. 3101280,7.925,S 176 | 175,0,1,"Smith, Mr. James Clinch",male,56,0,0,17764,30.6958,C 177 | 176,0,3,"Klasen, Mr. Klas Albin",male,18,1,1,350404,7.8542,S 178 | 177,0,3,"Lefebre, Master. Henry Forbes",male,,3,1,4133,25.4667,S 179 | 178,0,1,"Isham, Miss. Ann Elizabeth",female,50,0,0,PC 17595,28.7125,C 180 | 179,0,2,"Hale, Mr. Reginald",male,30,0,0,250653,13,S 181 | 180,0,3,"Leonard, Mr. Lionel",male,36,0,0,LINE,0,S 182 | 181,0,3,"Sage, Miss. Constance Gladys",female,,8,2,CA. 2343,69.55,S 183 | 182,0,2,"Pernot, Mr. Rene",male,,0,0,SC/PARIS 2131,15.05,C 184 | 183,0,3,"Asplund, Master. Clarence Gustaf Hugo",male,9,4,2,347077,31.3875,S 185 | 184,1,2,"Becker, Master. Richard F",male,1,2,1,230136,39,S 186 | 185,1,3,"Kink-Heilmann, Miss. Luise Gretchen",female,4,0,2,315153,22.025,S 187 | 186,0,1,"Rood, Mr. Hugh Roscoe",male,,0,0,113767,50,S 188 | 187,1,3,"O'Brien, Mrs. Thomas (Johanna ""Hannah"" Godfrey)",female,,1,0,370365,15.5,Q 189 | 188,1,1,"Romaine, Mr. Charles Hallace (""Mr C Rolmane"")",male,45,0,0,111428,26.55,S 190 | 189,0,3,"Bourke, Mr. John",male,40,1,1,364849,15.5,Q 191 | 190,0,3,"Turcin, Mr. Stjepan",male,36,0,0,349247,7.8958,S 192 | 191,1,2,"Pinsky, Mrs. (Rosa)",female,32,0,0,234604,13,S 193 | 192,0,2,"Carbines, Mr. William",male,19,0,0,28424,13,S 194 | 193,1,3,"Andersen-Jensen, Miss. Carla Christine Nielsine",female,19,1,0,350046,7.8542,S 195 | 194,1,2,"Navratil, Master. Michel M",male,3,1,1,230080,26,S 196 | 195,1,1,"Brown, Mrs. James Joseph (Margaret Tobin)",female,44,0,0,PC 17610,27.7208,C 197 | 196,1,1,"Lurette, Miss. Elise",female,58,0,0,PC 17569,146.5208,C 198 | 197,0,3,"Mernagh, Mr. Robert",male,,0,0,368703,7.75,Q 199 | 198,0,3,"Olsen, Mr. Karl Siegwart Andreas",male,42,0,1,4579,8.4042,S 200 | 199,1,3,"Madigan, Miss. Margaret ""Maggie""",female,,0,0,370370,7.75,Q 201 | 200,0,2,"Yrois, Miss. Henriette (""Mrs Harbeck"")",female,24,0,0,248747,13,S 202 | 201,0,3,"Vande Walle, Mr. Nestor Cyriel",male,28,0,0,345770,9.5,S 203 | 202,0,3,"Sage, Mr. Frederick",male,,8,2,CA. 2343,69.55,S 204 | 203,0,3,"Johanson, Mr. Jakob Alfred",male,34,0,0,3101264,6.4958,S 205 | 204,0,3,"Youseff, Mr. Gerious",male,45.5,0,0,2628,7.225,C 206 | 205,1,3,"Cohen, Mr. Gurshon ""Gus""",male,18,0,0,A/5 3540,8.05,S 207 | 206,0,3,"Strom, Miss. Telma Matilda",female,2,0,1,347054,10.4625,S 208 | 207,0,3,"Backstrom, Mr. Karl Alfred",male,32,1,0,3101278,15.85,S 209 | 208,1,3,"Albimona, Mr. Nassef Cassem",male,26,0,0,2699,18.7875,C 210 | 209,1,3,"Carr, Miss. Helen ""Ellen""",female,16,0,0,367231,7.75,Q 211 | 210,1,1,"Blank, Mr. Henry",male,40,0,0,112277,31,C 212 | 211,0,3,"Ali, Mr. Ahmed",male,24,0,0,SOTON/O.Q. 3101311,7.05,S 213 | 212,1,2,"Cameron, Miss. Clear Annie",female,35,0,0,F.C.C. 13528,21,S 214 | 213,0,3,"Perkin, Mr. John Henry",male,22,0,0,A/5 21174,7.25,S 215 | 214,0,2,"Givard, Mr. Hans Kristensen",male,30,0,0,250646,13,S 216 | 215,0,3,"Kiernan, Mr. Philip",male,,1,0,367229,7.75,Q 217 | 216,1,1,"Newell, Miss. Madeleine",female,31,1,0,35273,113.275,C 218 | 217,1,3,"Honkanen, Miss. Eliina",female,27,0,0,STON/O2. 3101283,7.925,S 219 | 218,0,2,"Jacobsohn, Mr. Sidney Samuel",male,42,1,0,243847,27,S 220 | 219,1,1,"Bazzani, Miss. Albina",female,32,0,0,11813,76.2917,C 221 | 220,0,2,"Harris, Mr. Walter",male,30,0,0,W/C 14208,10.5,S 222 | 221,1,3,"Sunderland, Mr. Victor Francis",male,16,0,0,SOTON/OQ 392089,8.05,S 223 | 222,0,2,"Bracken, Mr. James H",male,27,0,0,220367,13,S 224 | 223,0,3,"Green, Mr. George Henry",male,51,0,0,21440,8.05,S 225 | 224,0,3,"Nenkoff, Mr. Christo",male,,0,0,349234,7.8958,S 226 | 225,1,1,"Hoyt, Mr. Frederick Maxfield",male,38,1,0,19943,90,S 227 | 226,0,3,"Berglund, Mr. Karl Ivar Sven",male,22,0,0,PP 4348,9.35,S 228 | 227,1,2,"Mellors, Mr. William John",male,19,0,0,SW/PP 751,10.5,S 229 | 228,0,3,"Lovell, Mr. John Hall (""Henry"")",male,20.5,0,0,A/5 21173,7.25,S 230 | 229,0,2,"Fahlstrom, Mr. Arne Jonas",male,18,0,0,236171,13,S 231 | 230,0,3,"Lefebre, Miss. Mathilde",female,,3,1,4133,25.4667,S 232 | 231,1,1,"Harris, Mrs. Henry Birkhardt (Irene Wallach)",female,35,1,0,36973,83.475,S 233 | 232,0,3,"Larsson, Mr. Bengt Edvin",male,29,0,0,347067,7.775,S 234 | 233,0,2,"Sjostedt, Mr. Ernst Adolf",male,59,0,0,237442,13.5,S 235 | 234,1,3,"Asplund, Miss. Lillian Gertrud",female,5,4,2,347077,31.3875,S 236 | 235,0,2,"Leyson, Mr. Robert William Norman",male,24,0,0,C.A. 29566,10.5,S 237 | 236,0,3,"Harknett, Miss. Alice Phoebe",female,,0,0,W./C. 6609,7.55,S 238 | 237,0,2,"Hold, Mr. Stephen",male,44,1,0,26707,26,S 239 | 238,1,2,"Collyer, Miss. Marjorie ""Lottie""",female,8,0,2,C.A. 31921,26.25,S 240 | 239,0,2,"Pengelly, Mr. Frederick William",male,19,0,0,28665,10.5,S 241 | 240,0,2,"Hunt, Mr. George Henry",male,33,0,0,SCO/W 1585,12.275,S 242 | 241,0,3,"Zabour, Miss. Thamine",female,,1,0,2665,14.4542,C 243 | 242,1,3,"Murphy, Miss. Katherine ""Kate""",female,,1,0,367230,15.5,Q 244 | 243,0,2,"Coleridge, Mr. Reginald Charles",male,29,0,0,W./C. 14263,10.5,S 245 | 244,0,3,"Maenpaa, Mr. Matti Alexanteri",male,22,0,0,STON/O 2. 3101275,7.125,S 246 | 245,0,3,"Attalah, Mr. Sleiman",male,30,0,0,2694,7.225,C 247 | 246,0,1,"Minahan, Dr. William Edward",male,44,2,0,19928,90,Q 248 | 247,0,3,"Lindahl, Miss. Agda Thorilda Viktoria",female,25,0,0,347071,7.775,S 249 | 248,1,2,"Hamalainen, Mrs. William (Anna)",female,24,0,2,250649,14.5,S 250 | 249,1,1,"Beckwith, Mr. Richard Leonard",male,37,1,1,11751,52.5542,S 251 | 250,0,2,"Carter, Rev. Ernest Courtenay",male,54,1,0,244252,26,S 252 | 251,0,3,"Reed, Mr. James George",male,,0,0,362316,7.25,S 253 | 252,0,3,"Strom, Mrs. Wilhelm (Elna Matilda Persson)",female,29,1,1,347054,10.4625,S 254 | 253,0,1,"Stead, Mr. William Thomas",male,62,0,0,113514,26.55,S 255 | 254,0,3,"Lobb, Mr. William Arthur",male,30,1,0,A/5. 3336,16.1,S 256 | 255,0,3,"Rosblom, Mrs. Viktor (Helena Wilhelmina)",female,41,0,2,370129,20.2125,S 257 | 256,1,3,"Touma, Mrs. Darwis (Hanne Youssef Razi)",female,29,0,2,2650,15.2458,C 258 | 257,1,1,"Thorne, Mrs. Gertrude Maybelle",female,,0,0,PC 17585,79.2,C 259 | 258,1,1,"Cherry, Miss. Gladys",female,30,0,0,110152,86.5,S 260 | 259,1,1,"Ward, Miss. Anna",female,35,0,0,PC 17755,512.3292,C 261 | 260,1,2,"Parrish, Mrs. (Lutie Davis)",female,50,0,1,230433,26,S 262 | 261,0,3,"Smith, Mr. Thomas",male,,0,0,384461,7.75,Q 263 | 262,1,3,"Asplund, Master. Edvin Rojj Felix",male,3,4,2,347077,31.3875,S 264 | 263,0,1,"Taussig, Mr. Emil",male,52,1,1,110413,79.65,S 265 | 264,0,1,"Harrison, Mr. William",male,40,0,0,112059,0,S 266 | 265,0,3,"Henry, Miss. Delia",female,,0,0,382649,7.75,Q 267 | 266,0,2,"Reeves, Mr. David",male,36,0,0,C.A. 17248,10.5,S 268 | 267,0,3,"Panula, Mr. Ernesti Arvid",male,16,4,1,3101295,39.6875,S 269 | 268,1,3,"Persson, Mr. Ernst Ulrik",male,25,1,0,347083,7.775,S 270 | 269,1,1,"Graham, Mrs. William Thompson (Edith Junkins)",female,58,0,1,PC 17582,153.4625,S 271 | 270,1,1,"Bissette, Miss. Amelia",female,35,0,0,PC 17760,135.6333,S 272 | 271,0,1,"Cairns, Mr. Alexander",male,,0,0,113798,31,S 273 | 272,1,3,"Tornquist, Mr. William Henry",male,25,0,0,LINE,0,S 274 | 273,1,2,"Mellinger, Mrs. (Elizabeth Anne Maidment)",female,41,0,1,250644,19.5,S 275 | 274,0,1,"Natsch, Mr. Charles H",male,37,0,1,PC 17596,29.7,C 276 | 275,1,3,"Healy, Miss. Hanora ""Nora""",female,,0,0,370375,7.75,Q 277 | 276,1,1,"Andrews, Miss. Kornelia Theodosia",female,63,1,0,13502,77.9583,S 278 | 277,0,3,"Lindblom, Miss. Augusta Charlotta",female,45,0,0,347073,7.75,S 279 | 278,0,2,"Parkes, Mr. Francis ""Frank""",male,,0,0,239853,0,S 280 | 279,0,3,"Rice, Master. Eric",male,7,4,1,382652,29.125,Q 281 | 280,1,3,"Abbott, Mrs. Stanton (Rosa Hunt)",female,35,1,1,C.A. 2673,20.25,S 282 | 281,0,3,"Duane, Mr. Frank",male,65,0,0,336439,7.75,Q 283 | 282,0,3,"Olsson, Mr. Nils Johan Goransson",male,28,0,0,347464,7.8542,S 284 | 283,0,3,"de Pelsmaeker, Mr. Alfons",male,16,0,0,345778,9.5,S 285 | 284,1,3,"Dorking, Mr. Edward Arthur",male,19,0,0,A/5. 10482,8.05,S 286 | 285,0,1,"Smith, Mr. Richard William",male,,0,0,113056,26,S 287 | 286,0,3,"Stankovic, Mr. Ivan",male,33,0,0,349239,8.6625,C 288 | 287,1,3,"de Mulder, Mr. Theodore",male,30,0,0,345774,9.5,S 289 | 288,0,3,"Naidenoff, Mr. Penko",male,22,0,0,349206,7.8958,S 290 | 289,1,2,"Hosono, Mr. Masabumi",male,42,0,0,237798,13,S 291 | 290,1,3,"Connolly, Miss. Kate",female,22,0,0,370373,7.75,Q 292 | 291,1,1,"Barber, Miss. Ellen ""Nellie""",female,26,0,0,19877,78.85,S 293 | 292,1,1,"Bishop, Mrs. Dickinson H (Helen Walton)",female,19,1,0,11967,91.0792,C 294 | 293,0,2,"Levy, Mr. Rene Jacques",male,36,0,0,SC/Paris 2163,12.875,C 295 | 294,0,3,"Haas, Miss. Aloisia",female,24,0,0,349236,8.85,S 296 | 295,0,3,"Mineff, Mr. Ivan",male,24,0,0,349233,7.8958,S 297 | 296,0,1,"Lewy, Mr. Ervin G",male,,0,0,PC 17612,27.7208,C 298 | 297,0,3,"Hanna, Mr. Mansour",male,23.5,0,0,2693,7.2292,C 299 | 298,0,1,"Allison, Miss. Helen Loraine",female,2,1,2,113781,151.55,S 300 | 299,1,1,"Saalfeld, Mr. Adolphe",male,,0,0,19988,30.5,S 301 | 300,1,1,"Baxter, Mrs. James (Helene DeLaudeniere Chaput)",female,50,0,1,PC 17558,247.5208,C 302 | 301,1,3,"Kelly, Miss. Anna Katherine ""Annie Kate""",female,,0,0,9234,7.75,Q 303 | 302,1,3,"McCoy, Mr. Bernard",male,,2,0,367226,23.25,Q 304 | 303,0,3,"Johnson, Mr. William Cahoone Jr",male,19,0,0,LINE,0,S 305 | 304,1,2,"Keane, Miss. Nora A",female,,0,0,226593,12.35,Q 306 | 305,0,3,"Williams, Mr. Howard Hugh ""Harry""",male,,0,0,A/5 2466,8.05,S 307 | 306,1,1,"Allison, Master. Hudson Trevor",male,0.92,1,2,113781,151.55,S 308 | 307,1,1,"Fleming, Miss. Margaret",female,,0,0,17421,110.8833,C 309 | 308,1,1,"Penasco y Castellana, Mrs. Victor de Satode (Maria Josefa Perez de Soto y Vallejo)",female,17,1,0,PC 17758,108.9,C 310 | 309,0,2,"Abelson, Mr. Samuel",male,30,1,0,P/PP 3381,24,C 311 | 310,1,1,"Francatelli, Miss. Laura Mabel",female,30,0,0,PC 17485,56.9292,C 312 | 311,1,1,"Hays, Miss. Margaret Bechstein",female,24,0,0,11767,83.1583,C 313 | 312,1,1,"Ryerson, Miss. Emily Borie",female,18,2,2,PC 17608,262.375,C 314 | 313,0,2,"Lahtinen, Mrs. William (Anna Sylfven)",female,26,1,1,250651,26,S 315 | 314,0,3,"Hendekovic, Mr. Ignjac",male,28,0,0,349243,7.8958,S 316 | 315,0,2,"Hart, Mr. Benjamin",male,43,1,1,F.C.C. 13529,26.25,S 317 | 316,1,3,"Nilsson, Miss. Helmina Josefina",female,26,0,0,347470,7.8542,S 318 | 317,1,2,"Kantor, Mrs. Sinai (Miriam Sternin)",female,24,1,0,244367,26,S 319 | 318,0,2,"Moraweck, Dr. Ernest",male,54,0,0,29011,14,S 320 | 319,1,1,"Wick, Miss. Mary Natalie",female,31,0,2,36928,164.8667,S 321 | 320,1,1,"Spedden, Mrs. Frederic Oakley (Margaretta Corning Stone)",female,40,1,1,16966,134.5,C 322 | 321,0,3,"Dennis, Mr. Samuel",male,22,0,0,A/5 21172,7.25,S 323 | 322,0,3,"Danoff, Mr. Yoto",male,27,0,0,349219,7.8958,S 324 | 323,1,2,"Slayter, Miss. Hilda Mary",female,30,0,0,234818,12.35,Q 325 | 324,1,2,"Caldwell, Mrs. Albert Francis (Sylvia Mae Harbaugh)",female,22,1,1,248738,29,S 326 | 325,0,3,"Sage, Mr. George John Jr",male,,8,2,CA. 2343,69.55,S 327 | 326,1,1,"Young, Miss. Marie Grice",female,36,0,0,PC 17760,135.6333,C 328 | 327,0,3,"Nysveen, Mr. Johan Hansen",male,61,0,0,345364,6.2375,S 329 | 328,1,2,"Ball, Mrs. (Ada E Hall)",female,36,0,0,28551,13,S 330 | 329,1,3,"Goldsmith, Mrs. Frank John (Emily Alice Brown)",female,31,1,1,363291,20.525,S 331 | 330,1,1,"Hippach, Miss. Jean Gertrude",female,16,0,1,111361,57.9792,C 332 | 331,1,3,"McCoy, Miss. Agnes",female,,2,0,367226,23.25,Q 333 | 332,0,1,"Partner, Mr. Austen",male,45.5,0,0,113043,28.5,S 334 | 333,0,1,"Graham, Mr. George Edward",male,38,0,1,PC 17582,153.4625,S 335 | 334,0,3,"Vander Planke, Mr. Leo Edmondus",male,16,2,0,345764,18,S 336 | 335,1,1,"Frauenthal, Mrs. Henry William (Clara Heinsheimer)",female,,1,0,PC 17611,133.65,S 337 | 336,0,3,"Denkoff, Mr. Mitto",male,,0,0,349225,7.8958,S 338 | 337,0,1,"Pears, Mr. Thomas Clinton",male,29,1,0,113776,66.6,S 339 | 338,1,1,"Burns, Miss. Elizabeth Margaret",female,41,0,0,16966,134.5,C 340 | 339,1,3,"Dahl, Mr. Karl Edwart",male,45,0,0,7598,8.05,S 341 | 340,0,1,"Blackwell, Mr. Stephen Weart",male,45,0,0,113784,35.5,S 342 | 341,1,2,"Navratil, Master. Edmond Roger",male,2,1,1,230080,26,S 343 | 342,1,1,"Fortune, Miss. Alice Elizabeth",female,24,3,2,19950,263,S 344 | 343,0,2,"Collander, Mr. Erik Gustaf",male,28,0,0,248740,13,S 345 | 344,0,2,"Sedgwick, Mr. Charles Frederick Waddington",male,25,0,0,244361,13,S 346 | 345,0,2,"Fox, Mr. Stanley Hubert",male,36,0,0,229236,13,S 347 | 346,1,2,"Brown, Miss. Amelia ""Mildred""",female,24,0,0,248733,13,S 348 | 347,1,2,"Smith, Miss. Marion Elsie",female,40,0,0,31418,13,S 349 | 348,1,3,"Davison, Mrs. Thomas Henry (Mary E Finck)",female,,1,0,386525,16.1,S 350 | 349,1,3,"Coutts, Master. William Loch ""William""",male,3,1,1,C.A. 37671,15.9,S 351 | 350,0,3,"Dimic, Mr. Jovan",male,42,0,0,315088,8.6625,S 352 | 351,0,3,"Odahl, Mr. Nils Martin",male,23,0,0,7267,9.225,S 353 | 352,0,1,"Williams-Lambert, Mr. Fletcher Fellows",male,,0,0,113510,35,S 354 | 353,0,3,"Elias, Mr. Tannous",male,15,1,1,2695,7.2292,C 355 | 354,0,3,"Arnold-Franchi, Mr. Josef",male,25,1,0,349237,17.8,S 356 | 355,0,3,"Yousif, Mr. Wazli",male,,0,0,2647,7.225,C 357 | 356,0,3,"Vanden Steen, Mr. Leo Peter",male,28,0,0,345783,9.5,S 358 | 357,1,1,"Bowerman, Miss. Elsie Edith",female,22,0,1,113505,55,S 359 | 358,0,2,"Funk, Miss. Annie Clemmer",female,38,0,0,237671,13,S 360 | 359,1,3,"McGovern, Miss. Mary",female,,0,0,330931,7.8792,Q 361 | 360,1,3,"Mockler, Miss. Helen Mary ""Ellie""",female,,0,0,330980,7.8792,Q 362 | 361,0,3,"Skoog, Mr. Wilhelm",male,40,1,4,347088,27.9,S 363 | 362,0,2,"del Carlo, Mr. Sebastiano",male,29,1,0,SC/PARIS 2167,27.7208,C 364 | 363,0,3,"Barbara, Mrs. (Catherine David)",female,45,0,1,2691,14.4542,C 365 | 364,0,3,"Asim, Mr. Adola",male,35,0,0,SOTON/O.Q. 3101310,7.05,S 366 | 365,0,3,"O'Brien, Mr. Thomas",male,,1,0,370365,15.5,Q 367 | 366,0,3,"Adahl, Mr. Mauritz Nils Martin",male,30,0,0,C 7076,7.25,S 368 | 367,1,1,"Warren, Mrs. Frank Manley (Anna Sophia Atkinson)",female,60,1,0,110813,75.25,C 369 | 368,1,3,"Moussa, Mrs. (Mantoura Boulos)",female,,0,0,2626,7.2292,C 370 | 369,1,3,"Jermyn, Miss. Annie",female,,0,0,14313,7.75,Q 371 | 370,1,1,"Aubart, Mme. Leontine Pauline",female,24,0,0,PC 17477,69.3,C 372 | 371,1,1,"Harder, Mr. George Achilles",male,25,1,0,11765,55.4417,C 373 | 372,0,3,"Wiklund, Mr. Jakob Alfred",male,18,1,0,3101267,6.4958,S 374 | 373,0,3,"Beavan, Mr. William Thomas",male,19,0,0,323951,8.05,S 375 | 374,0,1,"Ringhini, Mr. Sante",male,22,0,0,PC 17760,135.6333,C 376 | 375,0,3,"Palsson, Miss. Stina Viola",female,3,3,1,349909,21.075,S 377 | 376,1,1,"Meyer, Mrs. Edgar Joseph (Leila Saks)",female,,1,0,PC 17604,82.1708,C 378 | 377,1,3,"Landergren, Miss. Aurora Adelia",female,22,0,0,C 7077,7.25,S 379 | 378,0,1,"Widener, Mr. Harry Elkins",male,27,0,2,113503,211.5,C 380 | 379,0,3,"Betros, Mr. Tannous",male,20,0,0,2648,4.0125,C 381 | 380,0,3,"Gustafsson, Mr. Karl Gideon",male,19,0,0,347069,7.775,S 382 | 381,1,1,"Bidois, Miss. Rosalie",female,42,0,0,PC 17757,227.525,C 383 | 382,1,3,"Nakid, Miss. Maria (""Mary"")",female,1,0,2,2653,15.7417,C 384 | 383,0,3,"Tikkanen, Mr. Juho",male,32,0,0,STON/O 2. 3101293,7.925,S 385 | 384,1,1,"Holverson, Mrs. Alexander Oskar (Mary Aline Towner)",female,35,1,0,113789,52,S 386 | 385,0,3,"Plotcharsky, Mr. Vasil",male,,0,0,349227,7.8958,S 387 | 386,0,2,"Davies, Mr. Charles Henry",male,18,0,0,S.O.C. 14879,73.5,S 388 | 387,0,3,"Goodwin, Master. Sidney Leonard",male,1,5,2,CA 2144,46.9,S 389 | 388,1,2,"Buss, Miss. Kate",female,36,0,0,27849,13,S 390 | 389,0,3,"Sadlier, Mr. Matthew",male,,0,0,367655,7.7292,Q 391 | 390,1,2,"Lehmann, Miss. Bertha",female,17,0,0,SC 1748,12,C 392 | 391,1,1,"Carter, Mr. William Ernest",male,36,1,2,113760,120,S 393 | 392,1,3,"Jansson, Mr. Carl Olof",male,21,0,0,350034,7.7958,S 394 | 393,0,3,"Gustafsson, Mr. Johan Birger",male,28,2,0,3101277,7.925,S 395 | 394,1,1,"Newell, Miss. Marjorie",female,23,1,0,35273,113.275,C 396 | 395,1,3,"Sandstrom, Mrs. Hjalmar (Agnes Charlotta Bengtsson)",female,24,0,2,PP 9549,16.7,S 397 | 396,0,3,"Johansson, Mr. Erik",male,22,0,0,350052,7.7958,S 398 | 397,0,3,"Olsson, Miss. Elina",female,31,0,0,350407,7.8542,S 399 | 398,0,2,"McKane, Mr. Peter David",male,46,0,0,28403,26,S 400 | 399,0,2,"Pain, Dr. Alfred",male,23,0,0,244278,10.5,S 401 | 400,1,2,"Trout, Mrs. William H (Jessie L)",female,28,0,0,240929,12.65,S 402 | 401,1,3,"Niskanen, Mr. Juha",male,39,0,0,STON/O 2. 3101289,7.925,S 403 | 402,0,3,"Adams, Mr. John",male,26,0,0,341826,8.05,S 404 | 403,0,3,"Jussila, Miss. Mari Aina",female,21,1,0,4137,9.825,S 405 | 404,0,3,"Hakkarainen, Mr. Pekka Pietari",male,28,1,0,STON/O2. 3101279,15.85,S 406 | 405,0,3,"Oreskovic, Miss. Marija",female,20,0,0,315096,8.6625,S 407 | 406,0,2,"Gale, Mr. Shadrach",male,34,1,0,28664,21,S 408 | 407,0,3,"Widegren, Mr. Carl/Charles Peter",male,51,0,0,347064,7.75,S 409 | 408,1,2,"Richards, Master. William Rowe",male,3,1,1,29106,18.75,S 410 | 409,0,3,"Birkeland, Mr. Hans Martin Monsen",male,21,0,0,312992,7.775,S 411 | 410,0,3,"Lefebre, Miss. Ida",female,,3,1,4133,25.4667,S 412 | 411,0,3,"Sdycoff, Mr. Todor",male,,0,0,349222,7.8958,S 413 | 412,0,3,"Hart, Mr. Henry",male,,0,0,394140,6.8583,Q 414 | 413,1,1,"Minahan, Miss. Daisy E",female,33,1,0,19928,90,Q 415 | 414,0,2,"Cunningham, Mr. Alfred Fleming",male,,0,0,239853,0,S 416 | 415,1,3,"Sundman, Mr. Johan Julian",male,44,0,0,STON/O 2. 3101269,7.925,S 417 | 416,0,3,"Meek, Mrs. Thomas (Annie Louise Rowley)",female,,0,0,343095,8.05,S 418 | 417,1,2,"Drew, Mrs. James Vivian (Lulu Thorne Christian)",female,34,1,1,28220,32.5,S 419 | 418,1,2,"Silven, Miss. Lyyli Karoliina",female,18,0,2,250652,13,S 420 | 419,0,2,"Matthews, Mr. William John",male,30,0,0,28228,13,S 421 | 420,0,3,"Van Impe, Miss. Catharina",female,10,0,2,345773,24.15,S 422 | 421,0,3,"Gheorgheff, Mr. Stanio",male,,0,0,349254,7.8958,C 423 | 422,0,3,"Charters, Mr. David",male,21,0,0,A/5. 13032,7.7333,Q 424 | 423,0,3,"Zimmerman, Mr. Leo",male,29,0,0,315082,7.875,S 425 | 424,0,3,"Danbom, Mrs. Ernst Gilbert (Anna Sigrid Maria Brogren)",female,28,1,1,347080,14.4,S 426 | 425,0,3,"Rosblom, Mr. Viktor Richard",male,18,1,1,370129,20.2125,S 427 | 426,0,3,"Wiseman, Mr. Phillippe",male,,0,0,A/4. 34244,7.25,S 428 | 427,1,2,"Clarke, Mrs. Charles V (Ada Maria Winfield)",female,28,1,0,2003,26,S 429 | 428,1,2,"Phillips, Miss. Kate Florence (""Mrs Kate Louise Phillips Marshall"")",female,19,0,0,250655,26,S 430 | 429,0,3,"Flynn, Mr. James",male,,0,0,364851,7.75,Q 431 | 430,1,3,"Pickard, Mr. Berk (Berk Trembisky)",male,32,0,0,SOTON/O.Q. 392078,8.05,S 432 | 431,1,1,"Bjornstrom-Steffansson, Mr. Mauritz Hakan",male,28,0,0,110564,26.55,S 433 | 432,1,3,"Thorneycroft, Mrs. Percival (Florence Kate White)",female,,1,0,376564,16.1,S 434 | 433,1,2,"Louch, Mrs. Charles Alexander (Alice Adelaide Slow)",female,42,1,0,SC/AH 3085,26,S 435 | 434,0,3,"Kallio, Mr. Nikolai Erland",male,17,0,0,STON/O 2. 3101274,7.125,S 436 | 435,0,1,"Silvey, Mr. William Baird",male,50,1,0,13507,55.9,S 437 | 436,1,1,"Carter, Miss. Lucile Polk",female,14,1,2,113760,120,S 438 | 437,0,3,"Ford, Miss. Doolina Margaret ""Daisy""",female,21,2,2,W./C. 6608,34.375,S 439 | 438,1,2,"Richards, Mrs. Sidney (Emily Hocking)",female,24,2,3,29106,18.75,S 440 | 439,0,1,"Fortune, Mr. Mark",male,64,1,4,19950,263,S 441 | 440,0,2,"Kvillner, Mr. Johan Henrik Johannesson",male,31,0,0,C.A. 18723,10.5,S 442 | 441,1,2,"Hart, Mrs. Benjamin (Esther Ada Bloomfield)",female,45,1,1,F.C.C. 13529,26.25,S 443 | 442,0,3,"Hampe, Mr. Leon",male,20,0,0,345769,9.5,S 444 | 443,0,3,"Petterson, Mr. Johan Emil",male,25,1,0,347076,7.775,S 445 | 444,1,2,"Reynaldo, Ms. Encarnacion",female,28,0,0,230434,13,S 446 | 445,1,3,"Johannesen-Bratthammer, Mr. Bernt",male,,0,0,65306,8.1125,S 447 | 446,1,1,"Dodge, Master. Washington",male,4,0,2,33638,81.8583,S 448 | 447,1,2,"Mellinger, Miss. Madeleine Violet",female,13,0,1,250644,19.5,S 449 | 448,1,1,"Seward, Mr. Frederic Kimber",male,34,0,0,113794,26.55,S 450 | 449,1,3,"Baclini, Miss. Marie Catherine",female,5,2,1,2666,19.2583,C 451 | 450,1,1,"Peuchen, Major. Arthur Godfrey",male,52,0,0,113786,30.5,S 452 | 451,0,2,"West, Mr. Edwy Arthur",male,36,1,2,C.A. 34651,27.75,S 453 | 452,0,3,"Hagland, Mr. Ingvald Olai Olsen",male,,1,0,65303,19.9667,S 454 | 453,0,1,"Foreman, Mr. Benjamin Laventall",male,30,0,0,113051,27.75,C 455 | 454,1,1,"Goldenberg, Mr. Samuel L",male,49,1,0,17453,89.1042,C 456 | 455,0,3,"Peduzzi, Mr. Joseph",male,,0,0,A/5 2817,8.05,S 457 | 456,1,3,"Jalsevac, Mr. Ivan",male,29,0,0,349240,7.8958,C 458 | 457,0,1,"Millet, Mr. Francis Davis",male,65,0,0,13509,26.55,S 459 | 458,1,1,"Kenyon, Mrs. Frederick R (Marion)",female,,1,0,17464,51.8625,S 460 | 459,1,2,"Toomey, Miss. Ellen",female,50,0,0,F.C.C. 13531,10.5,S 461 | 460,0,3,"O'Connor, Mr. Maurice",male,,0,0,371060,7.75,Q 462 | 461,1,1,"Anderson, Mr. Harry",male,48,0,0,19952,26.55,S 463 | 462,0,3,"Morley, Mr. William",male,34,0,0,364506,8.05,S 464 | 463,0,1,"Gee, Mr. Arthur H",male,47,0,0,111320,38.5,S 465 | 464,0,2,"Milling, Mr. Jacob Christian",male,48,0,0,234360,13,S 466 | 465,0,3,"Maisner, Mr. Simon",male,,0,0,A/S 2816,8.05,S 467 | 466,0,3,"Goncalves, Mr. Manuel Estanslas",male,38,0,0,SOTON/O.Q. 3101306,7.05,S 468 | 467,0,2,"Campbell, Mr. William",male,,0,0,239853,0,S 469 | 468,0,1,"Smart, Mr. John Montgomery",male,56,0,0,113792,26.55,S 470 | 469,0,3,"Scanlan, Mr. James",male,,0,0,36209,7.725,Q 471 | 470,1,3,"Baclini, Miss. Helene Barbara",female,0.75,2,1,2666,19.2583,C 472 | 471,0,3,"Keefe, Mr. Arthur",male,,0,0,323592,7.25,S 473 | 472,0,3,"Cacic, Mr. Luka",male,38,0,0,315089,8.6625,S 474 | 473,1,2,"West, Mrs. Edwy Arthur (Ada Mary Worth)",female,33,1,2,C.A. 34651,27.75,S 475 | 474,1,2,"Jerwan, Mrs. Amin S (Marie Marthe Thuillard)",female,23,0,0,SC/AH Basle 541,13.7917,C 476 | 475,0,3,"Strandberg, Miss. Ida Sofia",female,22,0,0,7553,9.8375,S 477 | 476,0,1,"Clifford, Mr. George Quincy",male,,0,0,110465,52,S 478 | 477,0,2,"Renouf, Mr. Peter Henry",male,34,1,0,31027,21,S 479 | 478,0,3,"Braund, Mr. Lewis Richard",male,29,1,0,3460,7.0458,S 480 | 479,0,3,"Karlsson, Mr. Nils August",male,22,0,0,350060,7.5208,S 481 | 480,1,3,"Hirvonen, Miss. Hildur E",female,2,0,1,3101298,12.2875,S 482 | 481,0,3,"Goodwin, Master. Harold Victor",male,9,5,2,CA 2144,46.9,S 483 | 482,0,2,"Frost, Mr. Anthony Wood ""Archie""",male,,0,0,239854,0,S 484 | 483,0,3,"Rouse, Mr. Richard Henry",male,50,0,0,A/5 3594,8.05,S 485 | 484,1,3,"Turkula, Mrs. (Hedwig)",female,63,0,0,4134,9.5875,S 486 | 485,1,1,"Bishop, Mr. Dickinson H",male,25,1,0,11967,91.0792,C 487 | 486,0,3,"Lefebre, Miss. Jeannie",female,,3,1,4133,25.4667,S 488 | 487,1,1,"Hoyt, Mrs. Frederick Maxfield (Jane Anne Forby)",female,35,1,0,19943,90,S 489 | 488,0,1,"Kent, Mr. Edward Austin",male,58,0,0,11771,29.7,C 490 | 489,0,3,"Somerton, Mr. Francis William",male,30,0,0,A.5. 18509,8.05,S 491 | 490,1,3,"Coutts, Master. Eden Leslie ""Neville""",male,9,1,1,C.A. 37671,15.9,S 492 | 491,0,3,"Hagland, Mr. Konrad Mathias Reiersen",male,,1,0,65304,19.9667,S 493 | 492,0,3,"Windelov, Mr. Einar",male,21,0,0,SOTON/OQ 3101317,7.25,S 494 | 493,0,1,"Molson, Mr. Harry Markland",male,55,0,0,113787,30.5,S 495 | 494,0,1,"Artagaveytia, Mr. Ramon",male,71,0,0,PC 17609,49.5042,C 496 | 495,0,3,"Stanley, Mr. Edward Roland",male,21,0,0,A/4 45380,8.05,S 497 | 496,0,3,"Yousseff, Mr. Gerious",male,,0,0,2627,14.4583,C 498 | 497,1,1,"Eustis, Miss. Elizabeth Mussey",female,54,1,0,36947,78.2667,C 499 | 498,0,3,"Shellard, Mr. Frederick William",male,,0,0,C.A. 6212,15.1,S 500 | 499,0,1,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",female,25,1,2,113781,151.55,S 501 | 500,0,3,"Svensson, Mr. Olof",male,24,0,0,350035,7.7958,S 502 | 501,0,3,"Calic, Mr. Petar",male,17,0,0,315086,8.6625,S 503 | 502,0,3,"Canavan, Miss. Mary",female,21,0,0,364846,7.75,Q 504 | 503,0,3,"O'Sullivan, Miss. Bridget Mary",female,,0,0,330909,7.6292,Q 505 | 504,0,3,"Laitinen, Miss. Kristina Sofia",female,37,0,0,4135,9.5875,S 506 | 505,1,1,"Maioni, Miss. Roberta",female,16,0,0,110152,86.5,S 507 | 506,0,1,"Penasco y Castellana, Mr. Victor de Satode",male,18,1,0,PC 17758,108.9,C 508 | 507,1,2,"Quick, Mrs. Frederick Charles (Jane Richards)",female,33,0,2,26360,26,S 509 | 508,1,1,"Bradley, Mr. George (""George Arthur Brayton"")",male,,0,0,111427,26.55,S 510 | 509,0,3,"Olsen, Mr. Henry Margido",male,28,0,0,C 4001,22.525,S 511 | 510,1,3,"Lang, Mr. Fang",male,26,0,0,1601,56.4958,S 512 | 511,1,3,"Daly, Mr. Eugene Patrick",male,29,0,0,382651,7.75,Q 513 | 512,0,3,"Webber, Mr. James",male,,0,0,SOTON/OQ 3101316,8.05,S 514 | 513,1,1,"McGough, Mr. James Robert",male,36,0,0,PC 17473,26.2875,S 515 | 514,1,1,"Rothschild, Mrs. Martin (Elizabeth L. Barrett)",female,54,1,0,PC 17603,59.4,C 516 | 515,0,3,"Coleff, Mr. Satio",male,24,0,0,349209,7.4958,S 517 | 516,0,1,"Walker, Mr. William Anderson",male,47,0,0,36967,34.0208,S 518 | 517,1,2,"Lemore, Mrs. (Amelia Milley)",female,34,0,0,C.A. 34260,10.5,S 519 | 518,0,3,"Ryan, Mr. Patrick",male,,0,0,371110,24.15,Q 520 | 519,1,2,"Angle, Mrs. William A (Florence ""Mary"" Agnes Hughes)",female,36,1,0,226875,26,S 521 | 520,0,3,"Pavlovic, Mr. Stefo",male,32,0,0,349242,7.8958,S 522 | 521,1,1,"Perreault, Miss. Anne",female,30,0,0,12749,93.5,S 523 | 522,0,3,"Vovk, Mr. Janko",male,22,0,0,349252,7.8958,S 524 | 523,0,3,"Lahoud, Mr. Sarkis",male,,0,0,2624,7.225,C 525 | 524,1,1,"Hippach, Mrs. Louis Albert (Ida Sophia Fischer)",female,44,0,1,111361,57.9792,C 526 | 525,0,3,"Kassem, Mr. Fared",male,,0,0,2700,7.2292,C 527 | 526,0,3,"Farrell, Mr. James",male,40.5,0,0,367232,7.75,Q 528 | 527,1,2,"Ridsdale, Miss. Lucy",female,50,0,0,W./C. 14258,10.5,S 529 | 528,0,1,"Farthing, Mr. John",male,,0,0,PC 17483,221.7792,S 530 | 529,0,3,"Salonen, Mr. Johan Werner",male,39,0,0,3101296,7.925,S 531 | 530,0,2,"Hocking, Mr. Richard George",male,23,2,1,29104,11.5,S 532 | 531,1,2,"Quick, Miss. Phyllis May",female,2,1,1,26360,26,S 533 | 532,0,3,"Toufik, Mr. Nakli",male,,0,0,2641,7.2292,C 534 | 533,0,3,"Elias, Mr. Joseph Jr",male,17,1,1,2690,7.2292,C 535 | 534,1,3,"Peter, Mrs. Catherine (Catherine Rizk)",female,,0,2,2668,22.3583,C 536 | 535,0,3,"Cacic, Miss. Marija",female,30,0,0,315084,8.6625,S 537 | 536,1,2,"Hart, Miss. Eva Miriam",female,7,0,2,F.C.C. 13529,26.25,S 538 | 537,0,1,"Butt, Major. Archibald Willingham",male,45,0,0,113050,26.55,S 539 | 538,1,1,"LeRoy, Miss. Bertha",female,30,0,0,PC 17761,106.425,C 540 | 539,0,3,"Risien, Mr. Samuel Beard",male,,0,0,364498,14.5,S 541 | 540,1,1,"Frolicher, Miss. Hedwig Margaritha",female,22,0,2,13568,49.5,C 542 | 541,1,1,"Crosby, Miss. Harriet R",female,36,0,2,WE/P 5735,71,S 543 | 542,0,3,"Andersson, Miss. Ingeborg Constanzia",female,9,4,2,347082,31.275,S 544 | 543,0,3,"Andersson, Miss. Sigrid Elisabeth",female,11,4,2,347082,31.275,S 545 | 544,1,2,"Beane, Mr. Edward",male,32,1,0,2908,26,S 546 | 545,0,1,"Douglas, Mr. Walter Donald",male,50,1,0,PC 17761,106.425,C 547 | 546,0,1,"Nicholson, Mr. Arthur Ernest",male,64,0,0,693,26,S 548 | 547,1,2,"Beane, Mrs. Edward (Ethel Clarke)",female,19,1,0,2908,26,S 549 | 548,1,2,"Padro y Manent, Mr. Julian",male,,0,0,SC/PARIS 2146,13.8625,C 550 | 549,0,3,"Goldsmith, Mr. Frank John",male,33,1,1,363291,20.525,S 551 | 550,1,2,"Davies, Master. John Morgan Jr",male,8,1,1,C.A. 33112,36.75,S 552 | 551,1,1,"Thayer, Mr. John Borland Jr",male,17,0,2,17421,110.8833,C 553 | 552,0,2,"Sharp, Mr. Percival James R",male,27,0,0,244358,26,S 554 | 553,0,3,"O'Brien, Mr. Timothy",male,,0,0,330979,7.8292,Q 555 | 554,1,3,"Leeni, Mr. Fahim (""Philip Zenni"")",male,22,0,0,2620,7.225,C 556 | 555,1,3,"Ohman, Miss. Velin",female,22,0,0,347085,7.775,S 557 | 556,0,1,"Wright, Mr. George",male,62,0,0,113807,26.55,S 558 | 557,1,1,"Duff Gordon, Lady. (Lucille Christiana Sutherland) (""Mrs Morgan"")",female,48,1,0,11755,39.6,C 559 | 558,0,1,"Robbins, Mr. Victor",male,,0,0,PC 17757,227.525,C 560 | 559,1,1,"Taussig, Mrs. Emil (Tillie Mandelbaum)",female,39,1,1,110413,79.65,S 561 | 560,1,3,"de Messemaeker, Mrs. Guillaume Joseph (Emma)",female,36,1,0,345572,17.4,S 562 | 561,0,3,"Morrow, Mr. Thomas Rowan",male,,0,0,372622,7.75,Q 563 | 562,0,3,"Sivic, Mr. Husein",male,40,0,0,349251,7.8958,S 564 | 563,0,2,"Norman, Mr. Robert Douglas",male,28,0,0,218629,13.5,S 565 | 564,0,3,"Simmons, Mr. John",male,,0,0,SOTON/OQ 392082,8.05,S 566 | 565,0,3,"Meanwell, Miss. (Marion Ogden)",female,,0,0,SOTON/O.Q. 392087,8.05,S 567 | 566,0,3,"Davies, Mr. Alfred J",male,24,2,0,A/4 48871,24.15,S 568 | 567,0,3,"Stoytcheff, Mr. Ilia",male,19,0,0,349205,7.8958,S 569 | 568,0,3,"Palsson, Mrs. Nils (Alma Cornelia Berglund)",female,29,0,4,349909,21.075,S 570 | 569,0,3,"Doharr, Mr. Tannous",male,,0,0,2686,7.2292,C 571 | 570,1,3,"Jonsson, Mr. Carl",male,32,0,0,350417,7.8542,S 572 | 571,1,2,"Harris, Mr. George",male,62,0,0,S.W./PP 752,10.5,S 573 | 572,1,1,"Appleton, Mrs. Edward Dale (Charlotte Lamson)",female,53,2,0,11769,51.4792,S 574 | 573,1,1,"Flynn, Mr. John Irwin (""Irving"")",male,36,0,0,PC 17474,26.3875,S 575 | 574,1,3,"Kelly, Miss. Mary",female,,0,0,14312,7.75,Q 576 | 575,0,3,"Rush, Mr. Alfred George John",male,16,0,0,A/4. 20589,8.05,S 577 | 576,0,3,"Patchett, Mr. George",male,19,0,0,358585,14.5,S 578 | 577,1,2,"Garside, Miss. Ethel",female,34,0,0,243880,13,S 579 | 578,1,1,"Silvey, Mrs. William Baird (Alice Munger)",female,39,1,0,13507,55.9,S 580 | 579,0,3,"Caram, Mrs. Joseph (Maria Elias)",female,,1,0,2689,14.4583,C 581 | 580,1,3,"Jussila, Mr. Eiriik",male,32,0,0,STON/O 2. 3101286,7.925,S 582 | 581,1,2,"Christy, Miss. Julie Rachel",female,25,1,1,237789,30,S 583 | 582,1,1,"Thayer, Mrs. John Borland (Marian Longstreth Morris)",female,39,1,1,17421,110.8833,C 584 | 583,0,2,"Downton, Mr. William James",male,54,0,0,28403,26,S 585 | 584,0,1,"Ross, Mr. John Hugo",male,36,0,0,13049,40.125,C 586 | 585,0,3,"Paulner, Mr. Uscher",male,,0,0,3411,8.7125,C 587 | 586,1,1,"Taussig, Miss. Ruth",female,18,0,2,110413,79.65,S 588 | 587,0,2,"Jarvis, Mr. John Denzil",male,47,0,0,237565,15,S 589 | 588,1,1,"Frolicher-Stehli, Mr. Maxmillian",male,60,1,1,13567,79.2,C 590 | 589,0,3,"Gilinski, Mr. Eliezer",male,22,0,0,14973,8.05,S 591 | 590,0,3,"Murdlin, Mr. Joseph",male,,0,0,A./5. 3235,8.05,S 592 | 591,0,3,"Rintamaki, Mr. Matti",male,35,0,0,STON/O 2. 3101273,7.125,S 593 | 592,1,1,"Stephenson, Mrs. Walter Bertram (Martha Eustis)",female,52,1,0,36947,78.2667,C 594 | 593,0,3,"Elsbury, Mr. William James",male,47,0,0,A/5 3902,7.25,S 595 | 594,0,3,"Bourke, Miss. Mary",female,,0,2,364848,7.75,Q 596 | 595,0,2,"Chapman, Mr. John Henry",male,37,1,0,SC/AH 29037,26,S 597 | 596,0,3,"Van Impe, Mr. Jean Baptiste",male,36,1,1,345773,24.15,S 598 | 597,1,2,"Leitch, Miss. Jessie Wills",female,,0,0,248727,33,S 599 | 598,0,3,"Johnson, Mr. Alfred",male,49,0,0,LINE,0,S 600 | 599,0,3,"Boulos, Mr. Hanna",male,,0,0,2664,7.225,C 601 | 600,1,1,"Duff Gordon, Sir. Cosmo Edmund (""Mr Morgan"")",male,49,1,0,PC 17485,56.9292,C 602 | 601,1,2,"Jacobsohn, Mrs. Sidney Samuel (Amy Frances Christy)",female,24,2,1,243847,27,S 603 | 602,0,3,"Slabenoff, Mr. Petco",male,,0,0,349214,7.8958,S 604 | 603,0,1,"Harrington, Mr. Charles H",male,,0,0,113796,42.4,S 605 | 604,0,3,"Torber, Mr. Ernst William",male,44,0,0,364511,8.05,S 606 | 605,1,1,"Homer, Mr. Harry (""Mr E Haven"")",male,35,0,0,111426,26.55,C 607 | 606,0,3,"Lindell, Mr. Edvard Bengtsson",male,36,1,0,349910,15.55,S 608 | 607,0,3,"Karaic, Mr. Milan",male,30,0,0,349246,7.8958,S 609 | 608,1,1,"Daniel, Mr. Robert Williams",male,27,0,0,113804,30.5,S 610 | 609,1,2,"Laroche, Mrs. Joseph (Juliette Marie Louise Lafargue)",female,22,1,2,SC/Paris 2123,41.5792,C 611 | 610,1,1,"Shutes, Miss. Elizabeth W",female,40,0,0,PC 17582,153.4625,S 612 | 611,0,3,"Andersson, Mrs. Anders Johan (Alfrida Konstantia Brogren)",female,39,1,5,347082,31.275,S 613 | 612,0,3,"Jardin, Mr. Jose Neto",male,,0,0,SOTON/O.Q. 3101305,7.05,S 614 | 613,1,3,"Murphy, Miss. Margaret Jane",female,,1,0,367230,15.5,Q 615 | 614,0,3,"Horgan, Mr. John",male,,0,0,370377,7.75,Q 616 | 615,0,3,"Brocklebank, Mr. William Alfred",male,35,0,0,364512,8.05,S 617 | 616,1,2,"Herman, Miss. Alice",female,24,1,2,220845,65,S 618 | 617,0,3,"Danbom, Mr. Ernst Gilbert",male,34,1,1,347080,14.4,S 619 | 618,0,3,"Lobb, Mrs. William Arthur (Cordelia K Stanlick)",female,26,1,0,A/5. 3336,16.1,S 620 | 619,1,2,"Becker, Miss. Marion Louise",female,4,2,1,230136,39,S 621 | 620,0,2,"Gavey, Mr. Lawrence",male,26,0,0,31028,10.5,S 622 | 621,0,3,"Yasbeck, Mr. Antoni",male,27,1,0,2659,14.4542,C 623 | 622,1,1,"Kimball, Mr. Edwin Nelson Jr",male,42,1,0,11753,52.5542,S 624 | 623,1,3,"Nakid, Mr. Sahid",male,20,1,1,2653,15.7417,C 625 | 624,0,3,"Hansen, Mr. Henry Damsgaard",male,21,0,0,350029,7.8542,S 626 | 625,0,3,"Bowen, Mr. David John ""Dai""",male,21,0,0,54636,16.1,S 627 | 626,0,1,"Sutton, Mr. Frederick",male,61,0,0,36963,32.3208,S 628 | 627,0,2,"Kirkland, Rev. Charles Leonard",male,57,0,0,219533,12.35,Q 629 | 628,1,1,"Longley, Miss. Gretchen Fiske",female,21,0,0,13502,77.9583,S 630 | 629,0,3,"Bostandyeff, Mr. Guentcho",male,26,0,0,349224,7.8958,S 631 | 630,0,3,"O'Connell, Mr. Patrick D",male,,0,0,334912,7.7333,Q 632 | 631,1,1,"Barkworth, Mr. Algernon Henry Wilson",male,80,0,0,27042,30,S 633 | 632,0,3,"Lundahl, Mr. Johan Svensson",male,51,0,0,347743,7.0542,S 634 | 633,1,1,"Stahelin-Maeglin, Dr. Max",male,32,0,0,13214,30.5,C 635 | 634,0,1,"Parr, Mr. William Henry Marsh",male,,0,0,112052,0,S 636 | 635,0,3,"Skoog, Miss. Mabel",female,9,3,2,347088,27.9,S 637 | 636,1,2,"Davis, Miss. Mary",female,28,0,0,237668,13,S 638 | 637,0,3,"Leinonen, Mr. Antti Gustaf",male,32,0,0,STON/O 2. 3101292,7.925,S 639 | 638,0,2,"Collyer, Mr. Harvey",male,31,1,1,C.A. 31921,26.25,S 640 | 639,0,3,"Panula, Mrs. Juha (Maria Emilia Ojala)",female,41,0,5,3101295,39.6875,S 641 | 640,0,3,"Thorneycroft, Mr. Percival",male,,1,0,376564,16.1,S 642 | 641,0,3,"Jensen, Mr. Hans Peder",male,20,0,0,350050,7.8542,S 643 | 642,1,1,"Sagesser, Mlle. Emma",female,24,0,0,PC 17477,69.3,C 644 | 643,0,3,"Skoog, Miss. Margit Elizabeth",female,2,3,2,347088,27.9,S 645 | 644,1,3,"Foo, Mr. Choong",male,,0,0,1601,56.4958,S 646 | 645,1,3,"Baclini, Miss. Eugenie",female,0.75,2,1,2666,19.2583,C 647 | 646,1,1,"Harper, Mr. Henry Sleeper",male,48,1,0,PC 17572,76.7292,C 648 | 647,0,3,"Cor, Mr. Liudevit",male,19,0,0,349231,7.8958,S 649 | 648,1,1,"Simonius-Blumer, Col. Oberst Alfons",male,56,0,0,13213,35.5,C 650 | 649,0,3,"Willey, Mr. Edward",male,,0,0,S.O./P.P. 751,7.55,S 651 | 650,1,3,"Stanley, Miss. Amy Zillah Elsie",female,23,0,0,CA. 2314,7.55,S 652 | 651,0,3,"Mitkoff, Mr. Mito",male,,0,0,349221,7.8958,S 653 | 652,1,2,"Doling, Miss. Elsie",female,18,0,1,231919,23,S 654 | 653,0,3,"Kalvik, Mr. Johannes Halvorsen",male,21,0,0,8475,8.4333,S 655 | 654,1,3,"O'Leary, Miss. Hanora ""Norah""",female,,0,0,330919,7.8292,Q 656 | 655,0,3,"Hegarty, Miss. Hanora ""Nora""",female,18,0,0,365226,6.75,Q 657 | 656,0,2,"Hickman, Mr. Leonard Mark",male,24,2,0,S.O.C. 14879,73.5,S 658 | 657,0,3,"Radeff, Mr. Alexander",male,,0,0,349223,7.8958,S 659 | 658,0,3,"Bourke, Mrs. John (Catherine)",female,32,1,1,364849,15.5,Q 660 | 659,0,2,"Eitemiller, Mr. George Floyd",male,23,0,0,29751,13,S 661 | 660,0,1,"Newell, Mr. Arthur Webster",male,58,0,2,35273,113.275,C 662 | 661,1,1,"Frauenthal, Dr. Henry William",male,50,2,0,PC 17611,133.65,S 663 | 662,0,3,"Badt, Mr. Mohamed",male,40,0,0,2623,7.225,C 664 | 663,0,1,"Colley, Mr. Edward Pomeroy",male,47,0,0,5727,25.5875,S 665 | 664,0,3,"Coleff, Mr. Peju",male,36,0,0,349210,7.4958,S 666 | 665,1,3,"Lindqvist, Mr. Eino William",male,20,1,0,STON/O 2. 3101285,7.925,S 667 | 666,0,2,"Hickman, Mr. Lewis",male,32,2,0,S.O.C. 14879,73.5,S 668 | 667,0,2,"Butler, Mr. Reginald Fenton",male,25,0,0,234686,13,S 669 | 668,0,3,"Rommetvedt, Mr. Knud Paust",male,,0,0,312993,7.775,S 670 | 669,0,3,"Cook, Mr. Jacob",male,43,0,0,A/5 3536,8.05,S 671 | 670,1,1,"Taylor, Mrs. Elmer Zebley (Juliet Cummins Wright)",female,,1,0,19996,52,S 672 | 671,1,2,"Brown, Mrs. Thomas William Solomon (Elizabeth Catherine Ford)",female,40,1,1,29750,39,S 673 | 672,0,1,"Davidson, Mr. Thornton",male,31,1,0,F.C. 12750,52,S 674 | 673,0,2,"Mitchell, Mr. Henry Michael",male,70,0,0,C.A. 24580,10.5,S 675 | 674,1,2,"Wilhelms, Mr. Charles",male,31,0,0,244270,13,S 676 | 675,0,2,"Watson, Mr. Ennis Hastings",male,,0,0,239856,0,S 677 | 676,0,3,"Edvardsson, Mr. Gustaf Hjalmar",male,18,0,0,349912,7.775,S 678 | 677,0,3,"Sawyer, Mr. Frederick Charles",male,24.5,0,0,342826,8.05,S 679 | 678,1,3,"Turja, Miss. Anna Sofia",female,18,0,0,4138,9.8417,S 680 | 679,0,3,"Goodwin, Mrs. Frederick (Augusta Tyler)",female,43,1,6,CA 2144,46.9,S 681 | 680,1,1,"Cardeza, Mr. Thomas Drake Martinez",male,36,0,1,PC 17755,512.3292,C 682 | 681,0,3,"Peters, Miss. Katie",female,,0,0,330935,8.1375,Q 683 | 682,1,1,"Hassab, Mr. Hammad",male,27,0,0,PC 17572,76.7292,C 684 | 683,0,3,"Olsvigen, Mr. Thor Anderson",male,20,0,0,6563,9.225,S 685 | 684,0,3,"Goodwin, Mr. Charles Edward",male,14,5,2,CA 2144,46.9,S 686 | 685,0,2,"Brown, Mr. Thomas William Solomon",male,60,1,1,29750,39,S 687 | 686,0,2,"Laroche, Mr. Joseph Philippe Lemercier",male,25,1,2,SC/Paris 2123,41.5792,C 688 | 687,0,3,"Panula, Mr. Jaako Arnold",male,14,4,1,3101295,39.6875,S 689 | 688,0,3,"Dakic, Mr. Branko",male,19,0,0,349228,10.1708,S 690 | 689,0,3,"Fischer, Mr. Eberhard Thelander",male,18,0,0,350036,7.7958,S 691 | 690,1,1,"Madill, Miss. Georgette Alexandra",female,15,0,1,24160,211.3375,S 692 | 691,1,1,"Dick, Mr. Albert Adrian",male,31,1,0,17474,57,S 693 | 692,1,3,"Karun, Miss. Manca",female,4,0,1,349256,13.4167,C 694 | 693,1,3,"Lam, Mr. Ali",male,,0,0,1601,56.4958,S 695 | 694,0,3,"Saad, Mr. Khalil",male,25,0,0,2672,7.225,C 696 | 695,0,1,"Weir, Col. John",male,60,0,0,113800,26.55,S 697 | 696,0,2,"Chapman, Mr. Charles Henry",male,52,0,0,248731,13.5,S 698 | 697,0,3,"Kelly, Mr. James",male,44,0,0,363592,8.05,S 699 | 698,1,3,"Mullens, Miss. Katherine ""Katie""",female,,0,0,35852,7.7333,Q 700 | 699,0,1,"Thayer, Mr. John Borland",male,49,1,1,17421,110.8833,C 701 | 700,0,3,"Humblen, Mr. Adolf Mathias Nicolai Olsen",male,42,0,0,348121,7.65,S 702 | 701,1,1,"Astor, Mrs. John Jacob (Madeleine Talmadge Force)",female,18,1,0,PC 17757,227.525,C 703 | 702,1,1,"Silverthorne, Mr. Spencer Victor",male,35,0,0,PC 17475,26.2875,S 704 | 703,0,3,"Barbara, Miss. Saiide",female,18,0,1,2691,14.4542,C 705 | 704,0,3,"Gallagher, Mr. Martin",male,25,0,0,36864,7.7417,Q 706 | 705,0,3,"Hansen, Mr. Henrik Juul",male,26,1,0,350025,7.8542,S 707 | 706,0,2,"Morley, Mr. Henry Samuel (""Mr Henry Marshall"")",male,39,0,0,250655,26,S 708 | 707,1,2,"Kelly, Mrs. Florence ""Fannie""",female,45,0,0,223596,13.5,S 709 | 708,1,1,"Calderhead, Mr. Edward Pennington",male,42,0,0,PC 17476,26.2875,S 710 | 709,1,1,"Cleaver, Miss. Alice",female,22,0,0,113781,151.55,S 711 | 710,1,3,"Moubarek, Master. Halim Gonios (""William George"")",male,,1,1,2661,15.2458,C 712 | 711,1,1,"Mayne, Mlle. Berthe Antonine (""Mrs de Villiers"")",female,24,0,0,PC 17482,49.5042,C 713 | 712,0,1,"Klaber, Mr. Herman",male,,0,0,113028,26.55,S 714 | 713,1,1,"Taylor, Mr. Elmer Zebley",male,48,1,0,19996,52,S 715 | 714,0,3,"Larsson, Mr. August Viktor",male,29,0,0,7545,9.4833,S 716 | 715,0,2,"Greenberg, Mr. Samuel",male,52,0,0,250647,13,S 717 | 716,0,3,"Soholt, Mr. Peter Andreas Lauritz Andersen",male,19,0,0,348124,7.65,S 718 | 717,1,1,"Endres, Miss. Caroline Louise",female,38,0,0,PC 17757,227.525,C 719 | 718,1,2,"Troutt, Miss. Edwina Celia ""Winnie""",female,27,0,0,34218,10.5,S 720 | 719,0,3,"McEvoy, Mr. Michael",male,,0,0,36568,15.5,Q 721 | 720,0,3,"Johnson, Mr. Malkolm Joackim",male,33,0,0,347062,7.775,S 722 | 721,1,2,"Harper, Miss. Annie Jessie ""Nina""",female,6,0,1,248727,33,S 723 | 722,0,3,"Jensen, Mr. Svend Lauritz",male,17,1,0,350048,7.0542,S 724 | 723,0,2,"Gillespie, Mr. William Henry",male,34,0,0,12233,13,S 725 | 724,0,2,"Hodges, Mr. Henry Price",male,50,0,0,250643,13,S 726 | 725,1,1,"Chambers, Mr. Norman Campbell",male,27,1,0,113806,53.1,S 727 | 726,0,3,"Oreskovic, Mr. Luka",male,20,0,0,315094,8.6625,S 728 | 727,1,2,"Renouf, Mrs. Peter Henry (Lillian Jefferys)",female,30,3,0,31027,21,S 729 | 728,1,3,"Mannion, Miss. Margareth",female,,0,0,36866,7.7375,Q 730 | 729,0,2,"Bryhl, Mr. Kurt Arnold Gottfrid",male,25,1,0,236853,26,S 731 | 730,0,3,"Ilmakangas, Miss. Pieta Sofia",female,25,1,0,STON/O2. 3101271,7.925,S 732 | 731,1,1,"Allen, Miss. Elisabeth Walton",female,29,0,0,24160,211.3375,S 733 | 732,0,3,"Hassan, Mr. Houssein G N",male,11,0,0,2699,18.7875,C 734 | 733,0,2,"Knight, Mr. Robert J",male,,0,0,239855,0,S 735 | 734,0,2,"Berriman, Mr. William John",male,23,0,0,28425,13,S 736 | 735,0,2,"Troupiansky, Mr. Moses Aaron",male,23,0,0,233639,13,S 737 | 736,0,3,"Williams, Mr. Leslie",male,28.5,0,0,54636,16.1,S 738 | 737,0,3,"Ford, Mrs. Edward (Margaret Ann Watson)",female,48,1,3,W./C. 6608,34.375,S 739 | 738,1,1,"Lesurer, Mr. Gustave J",male,35,0,0,PC 17755,512.3292,C 740 | 739,0,3,"Ivanoff, Mr. Kanio",male,,0,0,349201,7.8958,S 741 | 740,0,3,"Nankoff, Mr. Minko",male,,0,0,349218,7.8958,S 742 | 741,1,1,"Hawksford, Mr. Walter James",male,,0,0,16988,30,S 743 | 742,0,1,"Cavendish, Mr. Tyrell William",male,36,1,0,19877,78.85,S 744 | 743,1,1,"Ryerson, Miss. Susan Parker ""Suzette""",female,21,2,2,PC 17608,262.375,C 745 | 744,0,3,"McNamee, Mr. Neal",male,24,1,0,376566,16.1,S 746 | 745,1,3,"Stranden, Mr. Juho",male,31,0,0,STON/O 2. 3101288,7.925,S 747 | 746,0,1,"Crosby, Capt. Edward Gifford",male,70,1,1,WE/P 5735,71,S 748 | 747,0,3,"Abbott, Mr. Rossmore Edward",male,16,1,1,C.A. 2673,20.25,S 749 | 748,1,2,"Sinkkonen, Miss. Anna",female,30,0,0,250648,13,S 750 | 749,0,1,"Marvin, Mr. Daniel Warner",male,19,1,0,113773,53.1,S 751 | 750,0,3,"Connaghton, Mr. Michael",male,31,0,0,335097,7.75,Q 752 | 751,1,2,"Wells, Miss. Joan",female,4,1,1,29103,23,S 753 | 752,1,3,"Moor, Master. Meier",male,6,0,1,392096,12.475,S 754 | 753,0,3,"Vande Velde, Mr. Johannes Joseph",male,33,0,0,345780,9.5,S 755 | 754,0,3,"Jonkoff, Mr. Lalio",male,23,0,0,349204,7.8958,S 756 | 755,1,2,"Herman, Mrs. Samuel (Jane Laver)",female,48,1,2,220845,65,S 757 | 756,1,2,"Hamalainen, Master. Viljo",male,0.67,1,1,250649,14.5,S 758 | 757,0,3,"Carlsson, Mr. August Sigfrid",male,28,0,0,350042,7.7958,S 759 | 758,0,2,"Bailey, Mr. Percy Andrew",male,18,0,0,29108,11.5,S 760 | 759,0,3,"Theobald, Mr. Thomas Leonard",male,34,0,0,363294,8.05,S 761 | 760,1,1,"Rothes, the Countess. of (Lucy Noel Martha Dyer-Edwards)",female,33,0,0,110152,86.5,S 762 | 761,0,3,"Garfirth, Mr. John",male,,0,0,358585,14.5,S 763 | 762,0,3,"Nirva, Mr. Iisakki Antino Aijo",male,41,0,0,SOTON/O2 3101272,7.125,S 764 | 763,1,3,"Barah, Mr. Hanna Assi",male,20,0,0,2663,7.2292,C 765 | 764,1,1,"Carter, Mrs. William Ernest (Lucile Polk)",female,36,1,2,113760,120,S 766 | 765,0,3,"Eklund, Mr. Hans Linus",male,16,0,0,347074,7.775,S 767 | 766,1,1,"Hogeboom, Mrs. John C (Anna Andrews)",female,51,1,0,13502,77.9583,S 768 | 767,0,1,"Brewe, Dr. Arthur Jackson",male,,0,0,112379,39.6,C 769 | 768,0,3,"Mangan, Miss. Mary",female,30.5,0,0,364850,7.75,Q 770 | 769,0,3,"Moran, Mr. Daniel J",male,,1,0,371110,24.15,Q 771 | 770,0,3,"Gronnestad, Mr. Daniel Danielsen",male,32,0,0,8471,8.3625,S 772 | 771,0,3,"Lievens, Mr. Rene Aime",male,24,0,0,345781,9.5,S 773 | 772,0,3,"Jensen, Mr. Niels Peder",male,48,0,0,350047,7.8542,S 774 | 773,0,2,"Mack, Mrs. (Mary)",female,57,0,0,S.O./P.P. 3,10.5,S 775 | 774,0,3,"Elias, Mr. Dibo",male,,0,0,2674,7.225,C 776 | 775,1,2,"Hocking, Mrs. Elizabeth (Eliza Needs)",female,54,1,3,29105,23,S 777 | 776,0,3,"Myhrman, Mr. Pehr Fabian Oliver Malkolm",male,18,0,0,347078,7.75,S 778 | 777,0,3,"Tobin, Mr. Roger",male,,0,0,383121,7.75,Q 779 | 778,1,3,"Emanuel, Miss. Virginia Ethel",female,5,0,0,364516,12.475,S 780 | 779,0,3,"Kilgannon, Mr. Thomas J",male,,0,0,36865,7.7375,Q 781 | 780,1,1,"Robert, Mrs. Edward Scott (Elisabeth Walton McMillan)",female,43,0,1,24160,211.3375,S 782 | 781,1,3,"Ayoub, Miss. Banoura",female,13,0,0,2687,7.2292,C 783 | 782,1,1,"Dick, Mrs. Albert Adrian (Vera Gillespie)",female,17,1,0,17474,57,S 784 | 783,0,1,"Long, Mr. Milton Clyde",male,29,0,0,113501,30,S 785 | 784,0,3,"Johnston, Mr. Andrew G",male,,1,2,W./C. 6607,23.45,S 786 | 785,0,3,"Ali, Mr. William",male,25,0,0,SOTON/O.Q. 3101312,7.05,S 787 | 786,0,3,"Harmer, Mr. Abraham (David Lishin)",male,25,0,0,374887,7.25,S 788 | 787,1,3,"Sjoblom, Miss. Anna Sofia",female,18,0,0,3101265,7.4958,S 789 | 788,0,3,"Rice, Master. George Hugh",male,8,4,1,382652,29.125,Q 790 | 789,1,3,"Dean, Master. Bertram Vere",male,1,1,2,C.A. 2315,20.575,S 791 | 790,0,1,"Guggenheim, Mr. Benjamin",male,46,0,0,PC 17593,79.2,C 792 | 791,0,3,"Keane, Mr. Andrew ""Andy""",male,,0,0,12460,7.75,Q 793 | 792,0,2,"Gaskell, Mr. Alfred",male,16,0,0,239865,26,S 794 | 793,0,3,"Sage, Miss. Stella Anna",female,,8,2,CA. 2343,69.55,S 795 | 794,0,1,"Hoyt, Mr. William Fisher",male,,0,0,PC 17600,30.6958,C 796 | 795,0,3,"Dantcheff, Mr. Ristiu",male,25,0,0,349203,7.8958,S 797 | 796,0,2,"Otter, Mr. Richard",male,39,0,0,28213,13,S 798 | 797,1,1,"Leader, Dr. Alice (Farnham)",female,49,0,0,17465,25.9292,S 799 | 798,1,3,"Osman, Mrs. Mara",female,31,0,0,349244,8.6833,S 800 | 799,0,3,"Ibrahim Shawah, Mr. Yousseff",male,30,0,0,2685,7.2292,C 801 | 800,0,3,"Van Impe, Mrs. Jean Baptiste (Rosalie Paula Govaert)",female,30,1,1,345773,24.15,S 802 | 801,0,2,"Ponesell, Mr. Martin",male,34,0,0,250647,13,S 803 | 802,1,2,"Collyer, Mrs. Harvey (Charlotte Annie Tate)",female,31,1,1,C.A. 31921,26.25,S 804 | 803,1,1,"Carter, Master. William Thornton II",male,11,1,2,113760,120,S 805 | 804,1,3,"Thomas, Master. Assad Alexander",male,0.42,0,1,2625,8.5167,C 806 | 805,1,3,"Hedman, Mr. Oskar Arvid",male,27,0,0,347089,6.975,S 807 | 806,0,3,"Johansson, Mr. Karl Johan",male,31,0,0,347063,7.775,S 808 | 807,0,1,"Andrews, Mr. Thomas Jr",male,39,0,0,112050,0,S 809 | 808,0,3,"Pettersson, Miss. Ellen Natalia",female,18,0,0,347087,7.775,S 810 | 809,0,2,"Meyer, Mr. August",male,39,0,0,248723,13,S 811 | 810,1,1,"Chambers, Mrs. Norman Campbell (Bertha Griggs)",female,33,1,0,113806,53.1,S 812 | 811,0,3,"Alexander, Mr. William",male,26,0,0,3474,7.8875,S 813 | 812,0,3,"Lester, Mr. James",male,39,0,0,A/4 48871,24.15,S 814 | 813,0,2,"Slemen, Mr. Richard James",male,35,0,0,28206,10.5,S 815 | 814,0,3,"Andersson, Miss. Ebba Iris Alfrida",female,6,4,2,347082,31.275,S 816 | 815,0,3,"Tomlin, Mr. Ernest Portage",male,30.5,0,0,364499,8.05,S 817 | 816,0,1,"Fry, Mr. Richard",male,,0,0,112058,0,S 818 | 817,0,3,"Heininen, Miss. Wendla Maria",female,23,0,0,STON/O2. 3101290,7.925,S 819 | 818,0,2,"Mallet, Mr. Albert",male,31,1,1,S.C./PARIS 2079,37.0042,C 820 | 819,0,3,"Holm, Mr. John Fredrik Alexander",male,43,0,0,C 7075,6.45,S 821 | 820,0,3,"Skoog, Master. Karl Thorsten",male,10,3,2,347088,27.9,S 822 | 821,1,1,"Hays, Mrs. Charles Melville (Clara Jennings Gregg)",female,52,1,1,12749,93.5,S 823 | 822,1,3,"Lulic, Mr. Nikola",male,27,0,0,315098,8.6625,S 824 | 823,0,1,"Reuchlin, Jonkheer. John George",male,38,0,0,19972,0,S 825 | 824,1,3,"Moor, Mrs. (Beila)",female,27,0,1,392096,12.475,S 826 | 825,0,3,"Panula, Master. Urho Abraham",male,2,4,1,3101295,39.6875,S 827 | 826,0,3,"Flynn, Mr. John",male,,0,0,368323,6.95,Q 828 | 827,0,3,"Lam, Mr. Len",male,,0,0,1601,56.4958,S 829 | 828,1,2,"Mallet, Master. Andre",male,1,0,2,S.C./PARIS 2079,37.0042,C 830 | 829,1,3,"McCormack, Mr. Thomas Joseph",male,,0,0,367228,7.75,Q 831 | 830,1,1,"Stone, Mrs. George Nelson (Martha Evelyn)",female,62,0,0,113572,80, 832 | 831,1,3,"Yasbeck, Mrs. Antoni (Selini Alexander)",female,15,1,0,2659,14.4542,C 833 | 832,1,2,"Richards, Master. George Sibley",male,0.83,1,1,29106,18.75,S 834 | 833,0,3,"Saad, Mr. Amin",male,,0,0,2671,7.2292,C 835 | 834,0,3,"Augustsson, Mr. Albert",male,23,0,0,347468,7.8542,S 836 | 835,0,3,"Allum, Mr. Owen George",male,18,0,0,2223,8.3,S 837 | 836,1,1,"Compton, Miss. Sara Rebecca",female,39,1,1,PC 17756,83.1583,C 838 | 837,0,3,"Pasic, Mr. Jakob",male,21,0,0,315097,8.6625,S 839 | 838,0,3,"Sirota, Mr. Maurice",male,,0,0,392092,8.05,S 840 | 839,1,3,"Chip, Mr. Chang",male,32,0,0,1601,56.4958,S 841 | 840,1,1,"Marechal, Mr. Pierre",male,,0,0,11774,29.7,C 842 | 841,0,3,"Alhomaki, Mr. Ilmari Rudolf",male,20,0,0,SOTON/O2 3101287,7.925,S 843 | 842,0,2,"Mudd, Mr. Thomas Charles",male,16,0,0,S.O./P.P. 3,10.5,S 844 | 843,1,1,"Serepeca, Miss. Augusta",female,30,0,0,113798,31,C 845 | 844,0,3,"Lemberopolous, Mr. Peter L",male,34.5,0,0,2683,6.4375,C 846 | 845,0,3,"Culumovic, Mr. Jeso",male,17,0,0,315090,8.6625,S 847 | 846,0,3,"Abbing, Mr. Anthony",male,42,0,0,C.A. 5547,7.55,S 848 | 847,0,3,"Sage, Mr. Douglas Bullen",male,,8,2,CA. 2343,69.55,S 849 | 848,0,3,"Markoff, Mr. Marin",male,35,0,0,349213,7.8958,C 850 | 849,0,2,"Harper, Rev. John",male,28,0,1,248727,33,S 851 | 850,1,1,"Goldenberg, Mrs. Samuel L (Edwiga Grabowska)",female,,1,0,17453,89.1042,C 852 | 851,0,3,"Andersson, Master. Sigvard Harald Elias",male,4,4,2,347082,31.275,S 853 | 852,0,3,"Svensson, Mr. Johan",male,74,0,0,347060,7.775,S 854 | 853,0,3,"Boulos, Miss. Nourelain",female,9,1,1,2678,15.2458,C 855 | 854,1,1,"Lines, Miss. Mary Conover",female,16,0,1,PC 17592,39.4,S 856 | 855,0,2,"Carter, Mrs. Ernest Courtenay (Lilian Hughes)",female,44,1,0,244252,26,S 857 | 856,1,3,"Aks, Mrs. Sam (Leah Rosen)",female,18,0,1,392091,9.35,S 858 | 857,1,1,"Wick, Mrs. George Dennick (Mary Hitchcock)",female,45,1,1,36928,164.8667,S 859 | 858,1,1,"Daly, Mr. Peter Denis ",male,51,0,0,113055,26.55,S 860 | 859,1,3,"Baclini, Mrs. Solomon (Latifa Qurban)",female,24,0,3,2666,19.2583,C 861 | 860,0,3,"Razi, Mr. Raihed",male,,0,0,2629,7.2292,C 862 | 861,0,3,"Hansen, Mr. Claus Peter",male,41,2,0,350026,14.1083,S 863 | 862,0,2,"Giles, Mr. Frederick Edward",male,21,1,0,28134,11.5,S 864 | 863,1,1,"Swift, Mrs. Frederick Joel (Margaret Welles Barron)",female,48,0,0,17466,25.9292,S 865 | 864,0,3,"Sage, Miss. Dorothy Edith ""Dolly""",female,,8,2,CA. 2343,69.55,S 866 | 865,0,2,"Gill, Mr. John William",male,24,0,0,233866,13,S 867 | 866,1,2,"Bystrom, Mrs. (Karolina)",female,42,0,0,236852,13,S 868 | 867,1,2,"Duran y More, Miss. Asuncion",female,27,1,0,SC/PARIS 2149,13.8583,C 869 | 868,0,1,"Roebling, Mr. Washington Augustus II",male,31,0,0,PC 17590,50.4958,S 870 | 869,0,3,"van Melkebeke, Mr. Philemon",male,,0,0,345777,9.5,S 871 | 870,1,3,"Johnson, Master. Harold Theodor",male,4,1,1,347742,11.1333,S 872 | 871,0,3,"Balkic, Mr. Cerin",male,26,0,0,349248,7.8958,S 873 | 872,1,1,"Beckwith, Mrs. Richard Leonard (Sallie Monypeny)",female,47,1,1,11751,52.5542,S 874 | 873,0,1,"Carlsson, Mr. Frans Olof",male,33,0,0,695,5,S 875 | 874,0,3,"Vander Cruyssen, Mr. Victor",male,47,0,0,345765,9,S 876 | 875,1,2,"Abelson, Mrs. Samuel (Hannah Wizosky)",female,28,1,0,P/PP 3381,24,C 877 | 876,1,3,"Najib, Miss. Adele Kiamie ""Jane""",female,15,0,0,2667,7.225,C 878 | 877,0,3,"Gustafsson, Mr. Alfred Ossian",male,20,0,0,7534,9.8458,S 879 | 878,0,3,"Petroff, Mr. Nedelio",male,19,0,0,349212,7.8958,S 880 | 879,0,3,"Laleff, Mr. Kristo",male,,0,0,349217,7.8958,S 881 | 880,1,1,"Potter, Mrs. Thomas Jr (Lily Alexenia Wilson)",female,56,0,1,11767,83.1583,C 882 | 881,1,2,"Shelley, Mrs. William (Imanita Parrish Hall)",female,25,0,1,230433,26,S 883 | 882,0,3,"Markun, Mr. Johann",male,33,0,0,349257,7.8958,S 884 | 883,0,3,"Dahlberg, Miss. Gerda Ulrika",female,22,0,0,7552,10.5167,S 885 | 884,0,2,"Banfield, Mr. Frederick James",male,28,0,0,C.A./SOTON 34068,10.5,S 886 | 885,0,3,"Sutehall, Mr. Henry Jr",male,25,0,0,SOTON/OQ 392076,7.05,S 887 | 886,0,3,"Rice, Mrs. William (Margaret Norton)",female,39,0,5,382652,29.125,Q 888 | 887,0,2,"Montvila, Rev. Juozas",male,27,0,0,211536,13,S 889 | 888,1,1,"Graham, Miss. Margaret Edith",female,19,0,0,112053,30,S 890 | 889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.45,S 891 | 890,1,1,"Behr, Mr. Karl Howell",male,26,0,0,111369,30,C 892 | 891,0,3,"Dooley, Mr. Patrick",male,32,0,0,370376,7.75,Q 893 | -------------------------------------------------------------------------------- /gen_features.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from sklearn.model_selection import KFold,StratifiedKFold\n", 10 | "import pandas as pd\n", 11 | "import numpy as np\n", 12 | "from tqdm import tqdm\n", 13 | "import warnings\n", 14 | "warnings.filterwarnings(\"ignore\")" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 2, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "train = pd.read_csv('./titanic/train.csv')\n", 24 | "test = pd.read_csv('./titanic/test.csv')" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 3, 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": [ 33 | "train['Age'] = train['Age'].fillna(train['Age'].mode())\n", 34 | "test['Age'] = test['Age'].fillna(test['Age'].mode())\n", 35 | "\n", 36 | "train['Embarked'] = train['Embarked'].fillna(train['Embarked'].mode())\n", 37 | "\n", 38 | "train.rename(columns={'Survived':'label'},inplace=True)\n", 39 | "test.rename(columns={'Survived':'label'},inplace=True)\n", 40 | "\n", 41 | "train = train.drop(['Name','Ticket'],axis=1)\n", 42 | "test = test.drop(['Name','Ticket'],axis=1)" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 4, 48 | "metadata": {}, 49 | "outputs": [ 50 | { 51 | "data": { 52 | "text/html": [ 53 | "
\n", 54 | "\n", 67 | "\n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | "
PassengerIdlabelPclassSexAgeSibSpParchFareEmbarked
0103male22.0107.2500S
1211female38.01071.2833C
2313female26.0007.9250S
3411female35.01053.1000S
4503male35.0008.0500S
\n", 145 | "
" 146 | ], 147 | "text/plain": [ 148 | " PassengerId label Pclass Sex Age SibSp Parch Fare Embarked\n", 149 | "0 1 0 3 male 22.0 1 0 7.2500 S\n", 150 | "1 2 1 1 female 38.0 1 0 71.2833 C\n", 151 | "2 3 1 3 female 26.0 0 0 7.9250 S\n", 152 | "3 4 1 1 female 35.0 1 0 53.1000 S\n", 153 | "4 5 0 3 male 35.0 0 0 8.0500 S" 154 | ] 155 | }, 156 | "execution_count": 4, 157 | "metadata": {}, 158 | "output_type": "execute_result" 159 | } 160 | ], 161 | "source": [ 162 | "train.head()" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": 5, 168 | "metadata": {}, 169 | "outputs": [ 170 | { 171 | "data": { 172 | "text/plain": [ 173 | "Index(['PassengerId', 'label', 'Pclass', 'Sex', 'Age', 'SibSp', 'Parch',\n", 174 | " 'Fare', 'Embarked'],\n", 175 | " dtype='object')" 176 | ] 177 | }, 178 | "execution_count": 5, 179 | "metadata": {}, 180 | "output_type": "execute_result" 181 | } 182 | ], 183 | "source": [ 184 | "train.columns" 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": 6, 190 | "metadata": {}, 191 | "outputs": [], 192 | "source": [ 193 | "#疑问:分组统计特征,要在train和test中单独求吗?" 194 | ] 195 | }, 196 | { 197 | "cell_type": "code", 198 | "execution_count": 7, 199 | "metadata": {}, 200 | "outputs": [ 201 | { 202 | "name": "stderr", 203 | "output_type": "stream", 204 | "text": [ 205 | "5it [00:00, 333.45it/s]\n" 206 | ] 207 | }, 208 | { 209 | "data": { 210 | "text/html": [ 211 | "
\n", 212 | "\n", 225 | "\n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | "
PassengerIdlabelPclassSexAgeSibSpParchFareEmbarkedSex_target_encEmbarked_target_enc
010.03male22.0107.2500S0.1952790.335271
121.01female38.01071.2833C0.7398370.589552
231.03female26.0007.9250S0.7398370.335271
341.01female35.01053.1000S0.7398370.335271
450.03male35.0008.0500S0.1952790.335271
\n", 315 | "
" 316 | ], 317 | "text/plain": [ 318 | " PassengerId label Pclass Sex Age SibSp Parch Fare Embarked \\\n", 319 | "0 1 0.0 3 male 22.0 1 0 7.2500 S \n", 320 | "1 2 1.0 1 female 38.0 1 0 71.2833 C \n", 321 | "2 3 1.0 3 female 26.0 0 0 7.9250 S \n", 322 | "3 4 1.0 1 female 35.0 1 0 53.1000 S \n", 323 | "4 5 0.0 3 male 35.0 0 0 8.0500 S \n", 324 | "\n", 325 | " Sex_target_enc Embarked_target_enc \n", 326 | "0 0.195279 0.335271 \n", 327 | "1 0.739837 0.589552 \n", 328 | "2 0.739837 0.335271 \n", 329 | "3 0.739837 0.335271 \n", 330 | "4 0.195279 0.335271 " 331 | ] 332 | }, 333 | "execution_count": 7, 334 | "metadata": {}, 335 | "output_type": "execute_result" 336 | } 337 | ], 338 | "source": [ 339 | "# 目标编码\n", 340 | "# https://mp.weixin.qq.com/s/taMj-x-qLz5sw-7zld5BmA\n", 341 | "# target encoding,可以理解为转化率,就是聚合id后对label求mean、sum、count(lable=1)/count(all),\n", 342 | "# 这里需要考虑到数据穿越造成线下严重过拟合的情况,一般需要通过交叉平滑处理,如划分为5折,用其中4折去统计剩下的一折,\n", 343 | "# 然后对过大或过小的进行平滑,对测试集直接用训练集来做统计,若数据集有时间因素存在,则需要根据时间顺序来处理;\n", 344 | "def kfold_mean(df_train, df_test, target, target_mean_list):\n", 345 | " folds = StratifiedKFold(n_splits=5)\n", 346 | "\n", 347 | " mean_of_target = df_train[target].mean()\n", 348 | "\n", 349 | " for fold_, (trn_idx, val_idx) in tqdm(enumerate(folds.split(df_train, y=df_train[target]))):\n", 350 | " tr_x = df_train.iloc[trn_idx, :]\n", 351 | " vl_x = df_train.iloc[val_idx, :]\n", 352 | "\n", 353 | " for col in target_mean_list:\n", 354 | " df_train.loc[vl_x.index, f'{col}_target_enc'] = vl_x[col].map(tr_x.groupby(col)[target].mean())\n", 355 | "\n", 356 | " for col in target_mean_list:\n", 357 | " df_train[f'{col}_target_enc'].fillna(mean_of_target, inplace=True)\n", 358 | "\n", 359 | " df_test[f'{col}_target_enc'] = df_test[col].map(df_train.groupby(col)[f'{col}_target_enc'].mean())\n", 360 | "\n", 361 | " df_test[f'{col}_target_enc'].fillna(mean_of_target, inplace=True)\n", 362 | " return pd.concat([df_train, df_test], ignore_index=True)\n", 363 | "\n", 364 | "feature_list = ['Sex','Embarked']\n", 365 | "data = pd.concat([train, test], ignore_index=True)\n", 366 | "data = kfold_mean(data[~data['label'].isna()], data[data['label'].isna()],'label',feature_list)\n", 367 | "\n", 368 | "data.head()" 369 | ] 370 | }, 371 | { 372 | "cell_type": "code", 373 | "execution_count": 8, 374 | "metadata": {}, 375 | "outputs": [ 376 | { 377 | "name": "stderr", 378 | "output_type": "stream", 379 | "text": [ 380 | "count_feas 基本交叉特征: 100%|██████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 499.26it/s]\n" 381 | ] 382 | }, 383 | { 384 | "data": { 385 | "text/html": [ 386 | "
\n", 387 | "\n", 400 | "\n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | "
PassengerIdlabelPclassSexAgeSibSpParchFareEmbarkedSex_target_enc...Pclass_Age_addPclass_Age_diffAge_Fare_ratioAge_Fare_multiAge_Fare_addAge_Fare_diffAge_Pclass_ratioAge_Pclass_multiAge_Pclass_addAge_Pclass_diff
010.03male22.0107.2500S0.195279...25.0-19.03.034483159.500029.250014.75007.33333366.025.019.0
121.01female38.01071.2833C0.739837...39.0-37.00.5330842708.7654109.2833-33.283338.00000038.039.037.0
231.03female26.0007.9250S0.739837...29.0-23.03.280757206.050033.925018.07508.66666778.029.023.0
341.01female35.01053.1000S0.739837...36.0-34.00.6591341858.500088.1000-18.100035.00000035.036.034.0
450.03male35.0008.0500S0.195279...38.0-32.04.347826281.750043.050026.950011.666667105.038.032.0
\n", 550 | "

5 rows × 35 columns

\n", 551 | "
" 552 | ], 553 | "text/plain": [ 554 | " PassengerId label Pclass Sex Age SibSp Parch Fare Embarked \\\n", 555 | "0 1 0.0 3 male 22.0 1 0 7.2500 S \n", 556 | "1 2 1.0 1 female 38.0 1 0 71.2833 C \n", 557 | "2 3 1.0 3 female 26.0 0 0 7.9250 S \n", 558 | "3 4 1.0 1 female 35.0 1 0 53.1000 S \n", 559 | "4 5 0.0 3 male 35.0 0 0 8.0500 S \n", 560 | "\n", 561 | " Sex_target_enc ... Pclass_Age_add Pclass_Age_diff Age_Fare_ratio \\\n", 562 | "0 0.195279 ... 25.0 -19.0 3.034483 \n", 563 | "1 0.739837 ... 39.0 -37.0 0.533084 \n", 564 | "2 0.739837 ... 29.0 -23.0 3.280757 \n", 565 | "3 0.739837 ... 36.0 -34.0 0.659134 \n", 566 | "4 0.195279 ... 38.0 -32.0 4.347826 \n", 567 | "\n", 568 | " Age_Fare_multi Age_Fare_add Age_Fare_diff Age_Pclass_ratio \\\n", 569 | "0 159.5000 29.2500 14.7500 7.333333 \n", 570 | "1 2708.7654 109.2833 -33.2833 38.000000 \n", 571 | "2 206.0500 33.9250 18.0750 8.666667 \n", 572 | "3 1858.5000 88.1000 -18.1000 35.000000 \n", 573 | "4 281.7500 43.0500 26.9500 11.666667 \n", 574 | "\n", 575 | " Age_Pclass_multi Age_Pclass_add Age_Pclass_diff \n", 576 | "0 66.0 25.0 19.0 \n", 577 | "1 38.0 39.0 37.0 \n", 578 | "2 78.0 29.0 23.0 \n", 579 | "3 35.0 36.0 34.0 \n", 580 | "4 105.0 38.0 32.0 \n", 581 | "\n", 582 | "[5 rows x 35 columns]" 583 | ] 584 | }, 585 | "execution_count": 8, 586 | "metadata": {}, 587 | "output_type": "execute_result" 588 | } 589 | ], 590 | "source": [ 591 | "#数值交叉特征\n", 592 | "countfea = ['Fare','Pclass','Age']\n", 593 | "#数值特征与数值特征之间做加减乘除\n", 594 | "for f1 in tqdm(countfea, desc=\"count_feas 基本交叉特征\"):\n", 595 | " for f2 in countfea:\n", 596 | " if f1 != f2:\n", 597 | " data['{}_{}_ratio'.format(f1, f2)] = data[f1].values / data[f2].values\n", 598 | " data['{}_{}_multi'.format(f1, f2)] = data[f1].values * data[f2].values\n", 599 | " data['{}_{}_add'.format(f1, f2)] = data[f1].values + data[f2].values\n", 600 | " data['{}_{}_diff'.format(f1, f2)] = data[f1].values - data[f2].values\n", 601 | "data.head()" 602 | ] 603 | }, 604 | { 605 | "cell_type": "code", 606 | "execution_count": 9, 607 | "metadata": {}, 608 | "outputs": [ 609 | { 610 | "name": "stderr", 611 | "output_type": "stream", 612 | "text": [ 613 | "count_feas 基本聚合特征: 100%|██████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 111.11it/s]\n" 614 | ] 615 | }, 616 | { 617 | "data": { 618 | "text/html": [ 619 | "
\n", 620 | "\n", 633 | "\n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | " \n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | " \n", 655 | " \n", 656 | " \n", 657 | " \n", 658 | " \n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | " \n", 664 | " \n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | " \n", 669 | " \n", 670 | " \n", 671 | " \n", 672 | " \n", 673 | " \n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | " \n", 690 | " \n", 691 | " \n", 692 | " \n", 693 | " \n", 694 | " \n", 695 | " \n", 696 | " \n", 697 | " \n", 698 | " \n", 699 | " \n", 700 | " \n", 701 | " \n", 702 | " \n", 703 | " \n", 704 | " \n", 705 | " \n", 706 | " \n", 707 | " \n", 708 | " \n", 709 | " \n", 710 | " \n", 711 | " \n", 712 | " \n", 713 | " \n", 714 | " \n", 715 | " \n", 716 | " \n", 717 | " \n", 718 | " \n", 719 | " \n", 720 | " \n", 721 | " \n", 722 | " \n", 723 | " \n", 724 | " \n", 725 | " \n", 726 | " \n", 727 | " \n", 728 | " \n", 729 | " \n", 730 | " \n", 731 | " \n", 732 | " \n", 733 | " \n", 734 | " \n", 735 | " \n", 736 | " \n", 737 | " \n", 738 | " \n", 739 | " \n", 740 | " \n", 741 | " \n", 742 | " \n", 743 | " \n", 744 | " \n", 745 | " \n", 746 | " \n", 747 | " \n", 748 | " \n", 749 | " \n", 750 | " \n", 751 | " \n", 752 | " \n", 753 | " \n", 754 | " \n", 755 | " \n", 756 | " \n", 757 | " \n", 758 | " \n", 759 | " \n", 760 | " \n", 761 | " \n", 762 | " \n", 763 | " \n", 764 | " \n", 765 | " \n", 766 | " \n", 767 | " \n", 768 | " \n", 769 | " \n", 770 | " \n", 771 | " \n", 772 | " \n", 773 | " \n", 774 | " \n", 775 | " \n", 776 | " \n", 777 | " \n", 778 | " \n", 779 | " \n", 780 | " \n", 781 | " \n", 782 | "
PassengerIdlabelPclassSexAgeSibSpParchFareEmbarkedSex_target_enc...Age_Sex_meanAge_Sex_medianAge_Sex_maxAge_Sex_minAge_Sex_stdAge_Embarked_meanAge_Embarked_medianAge_Embarked_maxAge_Embarked_minAge_Embarked_std
010.03male22.0107.2500S0.195279...30.58522828.080.00.3314.28058129.24520528.080.00.1714.047507
121.01female38.01071.2833C0.739837...28.68708827.076.00.1714.57696232.33217030.071.00.4215.258092
231.03female26.0007.9250S0.739837...28.68708827.076.00.1714.57696229.24520528.080.00.1714.047507
341.01female35.01053.1000S0.739837...28.68708827.076.00.1714.57696229.24520528.080.00.1714.047507
450.03male35.0008.0500S0.195279...30.58522828.080.00.3314.28058129.24520528.080.00.1714.047507
\n", 783 | "

5 rows × 55 columns

\n", 784 | "
" 785 | ], 786 | "text/plain": [ 787 | " PassengerId label Pclass Sex Age SibSp Parch Fare Embarked \\\n", 788 | "0 1 0.0 3 male 22.0 1 0 7.2500 S \n", 789 | "1 2 1.0 1 female 38.0 1 0 71.2833 C \n", 790 | "2 3 1.0 3 female 26.0 0 0 7.9250 S \n", 791 | "3 4 1.0 1 female 35.0 1 0 53.1000 S \n", 792 | "4 5 0.0 3 male 35.0 0 0 8.0500 S \n", 793 | "\n", 794 | " Sex_target_enc ... Age_Sex_mean Age_Sex_median Age_Sex_max \\\n", 795 | "0 0.195279 ... 30.585228 28.0 80.0 \n", 796 | "1 0.739837 ... 28.687088 27.0 76.0 \n", 797 | "2 0.739837 ... 28.687088 27.0 76.0 \n", 798 | "3 0.739837 ... 28.687088 27.0 76.0 \n", 799 | "4 0.195279 ... 30.585228 28.0 80.0 \n", 800 | "\n", 801 | " Age_Sex_min Age_Sex_std Age_Embarked_mean Age_Embarked_median \\\n", 802 | "0 0.33 14.280581 29.245205 28.0 \n", 803 | "1 0.17 14.576962 32.332170 30.0 \n", 804 | "2 0.17 14.576962 29.245205 28.0 \n", 805 | "3 0.17 14.576962 29.245205 28.0 \n", 806 | "4 0.33 14.280581 29.245205 28.0 \n", 807 | "\n", 808 | " Age_Embarked_max Age_Embarked_min Age_Embarked_std \n", 809 | "0 80.0 0.17 14.047507 \n", 810 | "1 71.0 0.42 15.258092 \n", 811 | "2 80.0 0.17 14.047507 \n", 812 | "3 80.0 0.17 14.047507 \n", 813 | "4 80.0 0.17 14.047507 \n", 814 | "\n", 815 | "[5 rows x 55 columns]" 816 | ] 817 | }, 818 | "execution_count": 9, 819 | "metadata": {}, 820 | "output_type": "execute_result" 821 | } 822 | ], 823 | "source": [ 824 | "#类别特征与数值特征之间\n", 825 | "catefea = ['Sex','Embarked']\n", 826 | "countfea = ['Fare','Age']\n", 827 | "#例如男性购买的船票票价均值、最大值、最小值中位数。\n", 828 | "for i in tqdm(countfea,desc=\"count_feas 基本聚合特征\"):\n", 829 | " for j in catefea:\n", 830 | " data['{}_{}_mean'.format(i,j)] = data.groupby(j)[i].transform('mean')\n", 831 | " data['{}_{}_median'.format(i,j)] = data.groupby(j)[i].transform('median')\n", 832 | " data['{}_{}_max'.format(i,j)] = data.groupby(j)[i].transform('max')\n", 833 | " data['{}_{}_min'.format(i,j)] = data.groupby(j)[i].transform('min')\n", 834 | " data['{}_{}_std'.format(i,j)] = data.groupby(j)[i].transform('std')\n", 835 | " \n", 836 | "data.head()" 837 | ] 838 | }, 839 | { 840 | "cell_type": "code", 841 | "execution_count": 10, 842 | "metadata": {}, 843 | "outputs": [ 844 | { 845 | "data": { 846 | "text/html": [ 847 | "
\n", 848 | "\n", 861 | "\n", 862 | " \n", 863 | " \n", 864 | " \n", 865 | " \n", 866 | " \n", 867 | " \n", 868 | " \n", 869 | " \n", 870 | " \n", 871 | " \n", 872 | " \n", 873 | " \n", 874 | " \n", 875 | " \n", 876 | " \n", 877 | " \n", 878 | " \n", 879 | " \n", 880 | " \n", 881 | " \n", 882 | " \n", 883 | " \n", 884 | " \n", 885 | " \n", 886 | " \n", 887 | " \n", 888 | " \n", 889 | " \n", 890 | " \n", 891 | " \n", 892 | " \n", 893 | " \n", 894 | " \n", 895 | " \n", 896 | " \n", 897 | " \n", 898 | " \n", 899 | " \n", 900 | " \n", 901 | " \n", 902 | " \n", 903 | " \n", 904 | " \n", 905 | " \n", 906 | " \n", 907 | " \n", 908 | " \n", 909 | " \n", 910 | " \n", 911 | " \n", 912 | " \n", 913 | " \n", 914 | " \n", 915 | " \n", 916 | " \n", 917 | " \n", 918 | " \n", 919 | " \n", 920 | " \n", 921 | " \n", 922 | " \n", 923 | " \n", 924 | " \n", 925 | " \n", 926 | " \n", 927 | " \n", 928 | " \n", 929 | " \n", 930 | " \n", 931 | " \n", 932 | " \n", 933 | " \n", 934 | " \n", 935 | " \n", 936 | " \n", 937 | " \n", 938 | " \n", 939 | " \n", 940 | " \n", 941 | " \n", 942 | " \n", 943 | " \n", 944 | " \n", 945 | " \n", 946 | " \n", 947 | " \n", 948 | " \n", 949 | " \n", 950 | " \n", 951 | " \n", 952 | " \n", 953 | " \n", 954 | " \n", 955 | " \n", 956 | " \n", 957 | " \n", 958 | " \n", 959 | " \n", 960 | " \n", 961 | " \n", 962 | " \n", 963 | " \n", 964 | " \n", 965 | " \n", 966 | " \n", 967 | " \n", 968 | " \n", 969 | " \n", 970 | " \n", 971 | " \n", 972 | " \n", 973 | " \n", 974 | " \n", 975 | " \n", 976 | " \n", 977 | " \n", 978 | " \n", 979 | " \n", 980 | " \n", 981 | " \n", 982 | " \n", 983 | " \n", 984 | " \n", 985 | " \n", 986 | " \n", 987 | " \n", 988 | " \n", 989 | " \n", 990 | " \n", 991 | " \n", 992 | " \n", 993 | " \n", 994 | " \n", 995 | " \n", 996 | " \n", 997 | " \n", 998 | " \n", 999 | " \n", 1000 | " \n", 1001 | " \n", 1002 | " \n", 1003 | " \n", 1004 | " \n", 1005 | " \n", 1006 | " \n", 1007 | " \n", 1008 | " \n", 1009 | " \n", 1010 | "
PassengerIdlabelPclassSexAgeSibSpParchFareEmbarkedSex_target_enc...Age-max_gb_SexAge/sum_gb_SexFare-mean_gb_EmbarkedFare-min_gb_EmbarkedFare-max_gb_EmbarkedFare/sum_gb_EmbarkedAge-mean_gb_EmbarkedAge-min_gb_EmbarkedAge-max_gb_EmbarkedAge/sum_gb_Embarked
010.03male22.0107.2500S0.195279...-58.00.001093-20.1688247.2500-255.75000.000290-7.24520521.83-58.00.000962
121.01female38.01071.2833C0.739837...-38.00.0034148.94703367.2708-441.04590.0042355.66783037.58-33.00.005544
231.03female26.0007.9250S0.739837...-50.00.002336-19.4938247.9250-255.07500.000317-3.24520525.83-54.00.001137
341.01female35.01053.1000S0.739837...-41.00.00314425.68117653.1000-209.90000.0021215.75479534.83-45.00.001530
450.03male35.0008.0500S0.195279...-45.00.001739-19.3688248.0500-254.95000.0003225.75479534.83-45.00.001530
\n", 1011 | "

5 rows × 71 columns

\n", 1012 | "
" 1013 | ], 1014 | "text/plain": [ 1015 | " PassengerId label Pclass Sex Age SibSp Parch Fare Embarked \\\n", 1016 | "0 1 0.0 3 male 22.0 1 0 7.2500 S \n", 1017 | "1 2 1.0 1 female 38.0 1 0 71.2833 C \n", 1018 | "2 3 1.0 3 female 26.0 0 0 7.9250 S \n", 1019 | "3 4 1.0 1 female 35.0 1 0 53.1000 S \n", 1020 | "4 5 0.0 3 male 35.0 0 0 8.0500 S \n", 1021 | "\n", 1022 | " Sex_target_enc ... Age-max_gb_Sex Age/sum_gb_Sex Fare-mean_gb_Embarked \\\n", 1023 | "0 0.195279 ... -58.0 0.001093 -20.168824 \n", 1024 | "1 0.739837 ... -38.0 0.003414 8.947033 \n", 1025 | "2 0.739837 ... -50.0 0.002336 -19.493824 \n", 1026 | "3 0.739837 ... -41.0 0.003144 25.681176 \n", 1027 | "4 0.195279 ... -45.0 0.001739 -19.368824 \n", 1028 | "\n", 1029 | " Fare-min_gb_Embarked Fare-max_gb_Embarked Fare/sum_gb_Embarked \\\n", 1030 | "0 7.2500 -255.7500 0.000290 \n", 1031 | "1 67.2708 -441.0459 0.004235 \n", 1032 | "2 7.9250 -255.0750 0.000317 \n", 1033 | "3 53.1000 -209.9000 0.002121 \n", 1034 | "4 8.0500 -254.9500 0.000322 \n", 1035 | "\n", 1036 | " Age-mean_gb_Embarked Age-min_gb_Embarked Age-max_gb_Embarked \\\n", 1037 | "0 -7.245205 21.83 -58.0 \n", 1038 | "1 5.667830 37.58 -33.0 \n", 1039 | "2 -3.245205 25.83 -54.0 \n", 1040 | "3 5.754795 34.83 -45.0 \n", 1041 | "4 5.754795 34.83 -45.0 \n", 1042 | "\n", 1043 | " Age/sum_gb_Embarked \n", 1044 | "0 0.000962 \n", 1045 | "1 0.005544 \n", 1046 | "2 0.001137 \n", 1047 | "3 0.001530 \n", 1048 | "4 0.001530 \n", 1049 | "\n", 1050 | "[5 rows x 71 columns]" 1051 | ] 1052 | }, 1053 | "execution_count": 10, 1054 | "metadata": {}, 1055 | "output_type": "execute_result" 1056 | } 1057 | ], 1058 | "source": [ 1059 | "#偏离值特征,顾名思义,偏离均值、最大值、最小值的误差值\n", 1060 | "catefea = ['Sex','Embarked']\n", 1061 | "countfea = ['Fare','Age'] \n", 1062 | "for group in catefea:\n", 1063 | " for feature in countfea:\n", 1064 | " tmp = data.groupby(group)[feature].agg([sum, min, max, np.mean]).reset_index()\n", 1065 | " tmp = pd.merge(data, tmp, on=group, how='left')\n", 1066 | " data['{}-mean_gb_{}'.format(feature, group)] = data[feature] - tmp['mean']\n", 1067 | " data['{}-min_gb_{}'.format(feature, group)] = data[feature] - tmp['min']\n", 1068 | " data['{}-max_gb_{}'.format(feature, group)] = data[feature] - tmp['max']\n", 1069 | " data['{}/sum_gb_{}'.format(feature, group)] = data[feature] / tmp['sum']\n", 1070 | "data.head()" 1071 | ] 1072 | }, 1073 | { 1074 | "cell_type": "code", 1075 | "execution_count": 11, 1076 | "metadata": {}, 1077 | "outputs": [], 1078 | "source": [ 1079 | "# 特征unique count特征,针对类别型特征\n", 1080 | "# for index, col1 in enumerate(['age', 'province', 'city', 'model']):\n", 1081 | "# for col2 in ['age', 'province', 'city', 'model'][index:]:\n", 1082 | "# data['{}_in_{}_count'.format(col1, col2)] = data.groupby(col1)[col2].transform('count')\n", 1083 | "# data['{}_in_{}_nunique'.format(col1, col2)] = data.groupby(col1)[col2].transform('nunique')\n", 1084 | "# data['{}_in_{}_nunique/{}_in_{}_count'.format(col1, col2, col1, col2)] = data['{}_in_{}_nunique'.format(col1,col2)] /data['{}_in_{}_count'.format(col1,col2)]\n", 1085 | "\n", 1086 | "# data['{}_in_{}_count'.format(col2, col1)] = data.groupby(col2)[col1].transform('count')\n", 1087 | "# data['{}_in_{}_nunique'.format(col2, col1)] = data.groupby(col2)[col1].transform('nunique')\n", 1088 | "# data['{}_in_{}_nunique/{}_in_{}_count'.format(col2, col1, col2, col1)] = data['{}_in_{}_nunique'.format(col2,col1)] / data['{}_in_{}_count'.format(col2, col1)]\n" 1089 | ] 1090 | }, 1091 | { 1092 | "cell_type": "code", 1093 | "execution_count": 12, 1094 | "metadata": {}, 1095 | "outputs": [ 1096 | { 1097 | "name": "stderr", 1098 | "output_type": "stream", 1099 | "text": [ 1100 | "100%|██████████████████████████████████████████████████████████████████████████| 1309/1309 [00:00<00:00, 163549.12it/s]\n" 1101 | ] 1102 | }, 1103 | { 1104 | "data": { 1105 | "text/html": [ 1106 | "
\n", 1107 | "\n", 1120 | "\n", 1121 | " \n", 1122 | " \n", 1123 | " \n", 1124 | " \n", 1125 | " \n", 1126 | " \n", 1127 | " \n", 1128 | " \n", 1129 | " \n", 1130 | " \n", 1131 | " \n", 1132 | " \n", 1133 | " \n", 1134 | " \n", 1135 | " \n", 1136 | " \n", 1137 | " \n", 1138 | " \n", 1139 | " \n", 1140 | " \n", 1141 | " \n", 1142 | " \n", 1143 | " \n", 1144 | " \n", 1145 | " \n", 1146 | " \n", 1147 | " \n", 1148 | " \n", 1149 | " \n", 1150 | " \n", 1151 | " \n", 1152 | " \n", 1153 | " \n", 1154 | " \n", 1155 | " \n", 1156 | " \n", 1157 | " \n", 1158 | " \n", 1159 | " \n", 1160 | " \n", 1161 | " \n", 1162 | " \n", 1163 | " \n", 1164 | " \n", 1165 | " \n", 1166 | " \n", 1167 | " \n", 1168 | " \n", 1169 | " \n", 1170 | " \n", 1171 | " \n", 1172 | " \n", 1173 | " \n", 1174 | " \n", 1175 | " \n", 1176 | " \n", 1177 | " \n", 1178 | " \n", 1179 | " \n", 1180 | " \n", 1181 | " \n", 1182 | " \n", 1183 | " \n", 1184 | " \n", 1185 | " \n", 1186 | " \n", 1187 | " \n", 1188 | " \n", 1189 | " \n", 1190 | " \n", 1191 | " \n", 1192 | " \n", 1193 | " \n", 1194 | " \n", 1195 | " \n", 1196 | " \n", 1197 | " \n", 1198 | " \n", 1199 | " \n", 1200 | " \n", 1201 | " \n", 1202 | " \n", 1203 | " \n", 1204 | " \n", 1205 | " \n", 1206 | " \n", 1207 | " \n", 1208 | " \n", 1209 | " \n", 1210 | " \n", 1211 | " \n", 1212 | " \n", 1213 | " \n", 1214 | " \n", 1215 | " \n", 1216 | " \n", 1217 | " \n", 1218 | " \n", 1219 | " \n", 1220 | " \n", 1221 | " \n", 1222 | " \n", 1223 | " \n", 1224 | " \n", 1225 | " \n", 1226 | " \n", 1227 | " \n", 1228 | " \n", 1229 | " \n", 1230 | " \n", 1231 | " \n", 1232 | " \n", 1233 | " \n", 1234 | " \n", 1235 | " \n", 1236 | " \n", 1237 | " \n", 1238 | " \n", 1239 | " \n", 1240 | " \n", 1241 | " \n", 1242 | " \n", 1243 | " \n", 1244 | " \n", 1245 | " \n", 1246 | " \n", 1247 | " \n", 1248 | " \n", 1249 | " \n", 1250 | " \n", 1251 | " \n", 1252 | " \n", 1253 | " \n", 1254 | " \n", 1255 | " \n", 1256 | " \n", 1257 | " \n", 1258 | " \n", 1259 | " \n", 1260 | " \n", 1261 | " \n", 1262 | " \n", 1263 | " \n", 1264 | " \n", 1265 | " \n", 1266 | " \n", 1267 | " \n", 1268 | " \n", 1269 | "
PassengerIdlabelPclassSexAgeSibSpParchFareEmbarkedSex_target_enc...Age/sum_gb_SexFare-mean_gb_EmbarkedFare-min_gb_EmbarkedFare-max_gb_EmbarkedFare/sum_gb_EmbarkedAge-mean_gb_EmbarkedAge-min_gb_EmbarkedAge-max_gb_EmbarkedAge/sum_gb_EmbarkedPclass_Sex
010.03male22.0107.2500S0.195279...0.001093-20.1688247.2500-255.75000.000290-7.24520521.83-58.00.0009620
121.01female38.01071.2833C0.739837...0.0034148.94703367.2708-441.04590.0042355.66783037.58-33.00.0055441
231.03female26.0007.9250S0.739837...0.002336-19.4938247.9250-255.07500.000317-3.24520525.83-54.00.0011372
341.01female35.01053.1000S0.739837...0.00314425.68117653.1000-209.90000.0021215.75479534.83-45.00.0015301
450.03male35.0008.0500S0.195279...0.001739-19.3688248.0500-254.95000.0003225.75479534.83-45.00.0015300
\n", 1270 | "

5 rows × 72 columns

\n", 1271 | "
" 1272 | ], 1273 | "text/plain": [ 1274 | " PassengerId label Pclass Sex Age SibSp Parch Fare Embarked \\\n", 1275 | "0 1 0.0 3 male 22.0 1 0 7.2500 S \n", 1276 | "1 2 1.0 1 female 38.0 1 0 71.2833 C \n", 1277 | "2 3 1.0 3 female 26.0 0 0 7.9250 S \n", 1278 | "3 4 1.0 1 female 35.0 1 0 53.1000 S \n", 1279 | "4 5 0.0 3 male 35.0 0 0 8.0500 S \n", 1280 | "\n", 1281 | " Sex_target_enc ... Age/sum_gb_Sex Fare-mean_gb_Embarked \\\n", 1282 | "0 0.195279 ... 0.001093 -20.168824 \n", 1283 | "1 0.739837 ... 0.003414 8.947033 \n", 1284 | "2 0.739837 ... 0.002336 -19.493824 \n", 1285 | "3 0.739837 ... 0.003144 25.681176 \n", 1286 | "4 0.195279 ... 0.001739 -19.368824 \n", 1287 | "\n", 1288 | " Fare-min_gb_Embarked Fare-max_gb_Embarked Fare/sum_gb_Embarked \\\n", 1289 | "0 7.2500 -255.7500 0.000290 \n", 1290 | "1 67.2708 -441.0459 0.004235 \n", 1291 | "2 7.9250 -255.0750 0.000317 \n", 1292 | "3 53.1000 -209.9000 0.002121 \n", 1293 | "4 8.0500 -254.9500 0.000322 \n", 1294 | "\n", 1295 | " Age-mean_gb_Embarked Age-min_gb_Embarked Age-max_gb_Embarked \\\n", 1296 | "0 -7.245205 21.83 -58.0 \n", 1297 | "1 5.667830 37.58 -33.0 \n", 1298 | "2 -3.245205 25.83 -54.0 \n", 1299 | "3 5.754795 34.83 -45.0 \n", 1300 | "4 5.754795 34.83 -45.0 \n", 1301 | "\n", 1302 | " Age/sum_gb_Embarked Pclass_Sex \n", 1303 | "0 0.000962 0 \n", 1304 | "1 0.005544 1 \n", 1305 | "2 0.001137 2 \n", 1306 | "3 0.001530 1 \n", 1307 | "4 0.001530 0 \n", 1308 | "\n", 1309 | "[5 rows x 72 columns]" 1310 | ] 1311 | }, 1312 | "execution_count": 12, 1313 | "metadata": {}, 1314 | "output_type": "execute_result" 1315 | } 1316 | ], 1317 | "source": [ 1318 | "#类别与类别特征逻辑上的交叉,特征值的排列组合\n", 1319 | "#例如:即是男性,pclass又是1的样本。\n", 1320 | "def cross_two(base_info,name_1,name_2):\n", 1321 | " new_col=[]\n", 1322 | " encode=0\n", 1323 | " dic={}\n", 1324 | " val_1=base_info[name_1]\n", 1325 | " val_2=base_info[name_2]\n", 1326 | " for i in tqdm(range(len(val_1))):\n", 1327 | " tmp=str(val_1[i])+'_'+str(val_2[i])\n", 1328 | " if tmp in dic:\n", 1329 | " new_col.append(dic[tmp])\n", 1330 | " else:\n", 1331 | " dic[tmp]=encode\n", 1332 | " new_col.append(encode)\n", 1333 | " encode+=1\n", 1334 | " return new_col\n", 1335 | "\n", 1336 | "new_col=cross_two(data,'Pclass','Sex')#作企业类型-小类的交叉特征\n", 1337 | "data['Pclass_Sex']=new_col\n", 1338 | "data.head()" 1339 | ] 1340 | }, 1341 | { 1342 | "cell_type": "code", 1343 | "execution_count": 13, 1344 | "metadata": {}, 1345 | "outputs": [ 1346 | { 1347 | "data": { 1348 | "text/html": [ 1349 | "
\n", 1350 | "\n", 1363 | "\n", 1364 | " \n", 1365 | " \n", 1366 | " \n", 1367 | " \n", 1368 | " \n", 1369 | " \n", 1370 | " \n", 1371 | " \n", 1372 | " \n", 1373 | " \n", 1374 | " \n", 1375 | " \n", 1376 | " \n", 1377 | " \n", 1378 | " \n", 1379 | " \n", 1380 | " \n", 1381 | " \n", 1382 | " \n", 1383 | " \n", 1384 | " \n", 1385 | " \n", 1386 | " \n", 1387 | " \n", 1388 | " \n", 1389 | " \n", 1390 | " \n", 1391 | " \n", 1392 | " \n", 1393 | " \n", 1394 | " \n", 1395 | " \n", 1396 | " \n", 1397 | " \n", 1398 | " \n", 1399 | " \n", 1400 | " \n", 1401 | " \n", 1402 | " \n", 1403 | " \n", 1404 | " \n", 1405 | " \n", 1406 | " \n", 1407 | " \n", 1408 | " \n", 1409 | " \n", 1410 | " \n", 1411 | " \n", 1412 | " \n", 1413 | " \n", 1414 | " \n", 1415 | " \n", 1416 | " \n", 1417 | " \n", 1418 | " \n", 1419 | " \n", 1420 | " \n", 1421 | " \n", 1422 | " \n", 1423 | " \n", 1424 | " \n", 1425 | " \n", 1426 | " \n", 1427 | " \n", 1428 | " \n", 1429 | " \n", 1430 | " \n", 1431 | " \n", 1432 | " \n", 1433 | " \n", 1434 | " \n", 1435 | " \n", 1436 | " \n", 1437 | " \n", 1438 | " \n", 1439 | " \n", 1440 | " \n", 1441 | " \n", 1442 | " \n", 1443 | " \n", 1444 | " \n", 1445 | " \n", 1446 | " \n", 1447 | " \n", 1448 | " \n", 1449 | " \n", 1450 | " \n", 1451 | " \n", 1452 | " \n", 1453 | " \n", 1454 | " \n", 1455 | " \n", 1456 | " \n", 1457 | " \n", 1458 | " \n", 1459 | " \n", 1460 | " \n", 1461 | " \n", 1462 | " \n", 1463 | " \n", 1464 | " \n", 1465 | " \n", 1466 | " \n", 1467 | " \n", 1468 | " \n", 1469 | " \n", 1470 | " \n", 1471 | " \n", 1472 | " \n", 1473 | " \n", 1474 | " \n", 1475 | " \n", 1476 | " \n", 1477 | " \n", 1478 | " \n", 1479 | " \n", 1480 | " \n", 1481 | " \n", 1482 | " \n", 1483 | " \n", 1484 | " \n", 1485 | " \n", 1486 | " \n", 1487 | " \n", 1488 | " \n", 1489 | " \n", 1490 | " \n", 1491 | " \n", 1492 | " \n", 1493 | " \n", 1494 | " \n", 1495 | " \n", 1496 | " \n", 1497 | " \n", 1498 | " \n", 1499 | " \n", 1500 | " \n", 1501 | " \n", 1502 | " \n", 1503 | " \n", 1504 | " \n", 1505 | " \n", 1506 | " \n", 1507 | " \n", 1508 | " \n", 1509 | " \n", 1510 | " \n", 1511 | " \n", 1512 | "
PassengerIdlabelPclassSexAgeSibSpParchFareEmbarkedSex_target_enc...Fare-max_gb_EmbarkedFare/sum_gb_EmbarkedAge-mean_gb_EmbarkedAge-min_gb_EmbarkedAge-max_gb_EmbarkedAge/sum_gb_EmbarkedPclass_SexSex_COUNTEmbarked_COUNTPclass_COUNT
010.03male22.0107.2500S0.195279...-255.75000.000290-7.24520521.83-58.00.0009620843914.0709
121.01female38.01071.2833C0.739837...-441.04590.0042355.66783037.58-33.00.0055441466270.0323
231.03female26.0007.9250S0.739837...-255.07500.000317-3.24520525.83-54.00.0011372466914.0709
341.01female35.01053.1000S0.739837...-209.90000.0021215.75479534.83-45.00.0015301466914.0323
450.03male35.0008.0500S0.195279...-254.95000.0003225.75479534.83-45.00.0015300843914.0709
\n", 1513 | "

5 rows × 75 columns

\n", 1514 | "
" 1515 | ], 1516 | "text/plain": [ 1517 | " PassengerId label Pclass Sex Age SibSp Parch Fare Embarked \\\n", 1518 | "0 1 0.0 3 male 22.0 1 0 7.2500 S \n", 1519 | "1 2 1.0 1 female 38.0 1 0 71.2833 C \n", 1520 | "2 3 1.0 3 female 26.0 0 0 7.9250 S \n", 1521 | "3 4 1.0 1 female 35.0 1 0 53.1000 S \n", 1522 | "4 5 0.0 3 male 35.0 0 0 8.0500 S \n", 1523 | "\n", 1524 | " Sex_target_enc ... Fare-max_gb_Embarked Fare/sum_gb_Embarked \\\n", 1525 | "0 0.195279 ... -255.7500 0.000290 \n", 1526 | "1 0.739837 ... -441.0459 0.004235 \n", 1527 | "2 0.739837 ... -255.0750 0.000317 \n", 1528 | "3 0.739837 ... -209.9000 0.002121 \n", 1529 | "4 0.195279 ... -254.9500 0.000322 \n", 1530 | "\n", 1531 | " Age-mean_gb_Embarked Age-min_gb_Embarked Age-max_gb_Embarked \\\n", 1532 | "0 -7.245205 21.83 -58.0 \n", 1533 | "1 5.667830 37.58 -33.0 \n", 1534 | "2 -3.245205 25.83 -54.0 \n", 1535 | "3 5.754795 34.83 -45.0 \n", 1536 | "4 5.754795 34.83 -45.0 \n", 1537 | "\n", 1538 | " Age/sum_gb_Embarked Pclass_Sex Sex_COUNT Embarked_COUNT Pclass_COUNT \n", 1539 | "0 0.000962 0 843 914.0 709 \n", 1540 | "1 0.005544 1 466 270.0 323 \n", 1541 | "2 0.001137 2 466 914.0 709 \n", 1542 | "3 0.001530 1 466 914.0 323 \n", 1543 | "4 0.001530 0 843 914.0 709 \n", 1544 | "\n", 1545 | "[5 rows x 75 columns]" 1546 | ] 1547 | }, 1548 | "execution_count": 13, 1549 | "metadata": {}, 1550 | "output_type": "execute_result" 1551 | } 1552 | ], 1553 | "source": [ 1554 | "# 频数统计,计算类别特征每一类的个数,例如 男性的count是843,那男性对应的衍生特征值是843\n", 1555 | "cat_col = ['Sex','Embarked','Pclass']\n", 1556 | "for col in cat_col:\n", 1557 | " data[col + '_COUNT'] = data[col].map(data[col].value_counts())\n", 1558 | " col_idx = data[col].value_counts()\n", 1559 | "\n", 1560 | "data.head()" 1561 | ] 1562 | }, 1563 | { 1564 | "cell_type": "code", 1565 | "execution_count": 14, 1566 | "metadata": {}, 1567 | "outputs": [ 1568 | { 1569 | "name": "stderr", 1570 | "output_type": "stream", 1571 | "text": [ 1572 | "分箱特征: 100%|█████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 996.75it/s]\n" 1573 | ] 1574 | }, 1575 | { 1576 | "data": { 1577 | "text/html": [ 1578 | "
\n", 1579 | "\n", 1592 | "\n", 1593 | " \n", 1594 | " \n", 1595 | " \n", 1596 | " \n", 1597 | " \n", 1598 | " \n", 1599 | " \n", 1600 | " \n", 1601 | " \n", 1602 | " \n", 1603 | " \n", 1604 | " \n", 1605 | " \n", 1606 | " \n", 1607 | " \n", 1608 | " \n", 1609 | " \n", 1610 | " \n", 1611 | " \n", 1612 | " \n", 1613 | " \n", 1614 | " \n", 1615 | " \n", 1616 | " \n", 1617 | " \n", 1618 | " \n", 1619 | " \n", 1620 | " \n", 1621 | " \n", 1622 | " \n", 1623 | " \n", 1624 | " \n", 1625 | " \n", 1626 | " \n", 1627 | " \n", 1628 | " \n", 1629 | " \n", 1630 | " \n", 1631 | " \n", 1632 | " \n", 1633 | " \n", 1634 | " \n", 1635 | " \n", 1636 | " \n", 1637 | " \n", 1638 | " \n", 1639 | " \n", 1640 | " \n", 1641 | " \n", 1642 | " \n", 1643 | " \n", 1644 | " \n", 1645 | " \n", 1646 | " \n", 1647 | " \n", 1648 | " \n", 1649 | " \n", 1650 | " \n", 1651 | " \n", 1652 | " \n", 1653 | " \n", 1654 | " \n", 1655 | " \n", 1656 | " \n", 1657 | " \n", 1658 | " \n", 1659 | " \n", 1660 | " \n", 1661 | " \n", 1662 | " \n", 1663 | " \n", 1664 | " \n", 1665 | " \n", 1666 | " \n", 1667 | " \n", 1668 | " \n", 1669 | " \n", 1670 | " \n", 1671 | " \n", 1672 | " \n", 1673 | " \n", 1674 | " \n", 1675 | " \n", 1676 | " \n", 1677 | " \n", 1678 | " \n", 1679 | " \n", 1680 | " \n", 1681 | " \n", 1682 | " \n", 1683 | " \n", 1684 | " \n", 1685 | " \n", 1686 | " \n", 1687 | " \n", 1688 | " \n", 1689 | " \n", 1690 | " \n", 1691 | " \n", 1692 | " \n", 1693 | " \n", 1694 | " \n", 1695 | " \n", 1696 | " \n", 1697 | " \n", 1698 | " \n", 1699 | " \n", 1700 | " \n", 1701 | " \n", 1702 | " \n", 1703 | " \n", 1704 | " \n", 1705 | " \n", 1706 | " \n", 1707 | " \n", 1708 | " \n", 1709 | " \n", 1710 | " \n", 1711 | " \n", 1712 | " \n", 1713 | " \n", 1714 | " \n", 1715 | " \n", 1716 | " \n", 1717 | " \n", 1718 | " \n", 1719 | " \n", 1720 | " \n", 1721 | " \n", 1722 | " \n", 1723 | " \n", 1724 | " \n", 1725 | " \n", 1726 | " \n", 1727 | " \n", 1728 | " \n", 1729 | " \n", 1730 | " \n", 1731 | " \n", 1732 | " \n", 1733 | " \n", 1734 | " \n", 1735 | " \n", 1736 | " \n", 1737 | " \n", 1738 | " \n", 1739 | " \n", 1740 | " \n", 1741 | "
PassengerIdlabelPclassSexAgeSibSpParchFareEmbarkedSex_target_enc...Fare/sum_gb_EmbarkedAge-mean_gb_EmbarkedAge-min_gb_EmbarkedAge-max_gb_EmbarkedAge/sum_gb_EmbarkedPclass_SexSex_COUNTEmbarked_COUNTPclass_COUNTAge_bin
010.03male22.0107.2500S0.195279...0.000290-7.24520521.83-58.00.0009620843914.07094.0
121.01female38.01071.2833C0.739837...0.0042355.66783037.58-33.00.0055441466270.03237.0
231.03female26.0007.9250S0.739837...0.000317-3.24520525.83-54.00.0011372466914.07095.0
341.01female35.01053.1000S0.739837...0.0021215.75479534.83-45.00.0015301466914.03237.0
450.03male35.0008.0500S0.195279...0.0003225.75479534.83-45.00.0015300843914.07097.0
\n", 1742 | "

5 rows × 76 columns

\n", 1743 | "
" 1744 | ], 1745 | "text/plain": [ 1746 | " PassengerId label Pclass Sex Age SibSp Parch Fare Embarked \\\n", 1747 | "0 1 0.0 3 male 22.0 1 0 7.2500 S \n", 1748 | "1 2 1.0 1 female 38.0 1 0 71.2833 C \n", 1749 | "2 3 1.0 3 female 26.0 0 0 7.9250 S \n", 1750 | "3 4 1.0 1 female 35.0 1 0 53.1000 S \n", 1751 | "4 5 0.0 3 male 35.0 0 0 8.0500 S \n", 1752 | "\n", 1753 | " Sex_target_enc ... Fare/sum_gb_Embarked Age-mean_gb_Embarked \\\n", 1754 | "0 0.195279 ... 0.000290 -7.245205 \n", 1755 | "1 0.739837 ... 0.004235 5.667830 \n", 1756 | "2 0.739837 ... 0.000317 -3.245205 \n", 1757 | "3 0.739837 ... 0.002121 5.754795 \n", 1758 | "4 0.195279 ... 0.000322 5.754795 \n", 1759 | "\n", 1760 | " Age-min_gb_Embarked Age-max_gb_Embarked Age/sum_gb_Embarked Pclass_Sex \\\n", 1761 | "0 21.83 -58.0 0.000962 0 \n", 1762 | "1 37.58 -33.0 0.005544 1 \n", 1763 | "2 25.83 -54.0 0.001137 2 \n", 1764 | "3 34.83 -45.0 0.001530 1 \n", 1765 | "4 34.83 -45.0 0.001530 0 \n", 1766 | "\n", 1767 | " Sex_COUNT Embarked_COUNT Pclass_COUNT Age_bin \n", 1768 | "0 843 914.0 709 4.0 \n", 1769 | "1 466 270.0 323 7.0 \n", 1770 | "2 466 914.0 709 5.0 \n", 1771 | "3 466 914.0 323 7.0 \n", 1772 | "4 843 914.0 709 7.0 \n", 1773 | "\n", 1774 | "[5 rows x 76 columns]" 1775 | ] 1776 | }, 1777 | "execution_count": 14, 1778 | "metadata": {}, 1779 | "output_type": "execute_result" 1780 | } 1781 | ], 1782 | "source": [ 1783 | "#分箱,等频、等宽\n", 1784 | "def bucket(data,count_feas,bucket_len):\n", 1785 | " #通过除法映射到间隔均匀的分箱中,每个分箱的取值范围都是Age/10,相当于宽度10等宽分箱\n", 1786 | " for fea in tqdm(count_feas,desc='分箱特征'):\n", 1787 | " data['{}_bin'.format(fea)] = np.floor_divide(data[fea], bucket_len)\n", 1788 | "\n", 1789 | "count_feas = ['Age']\n", 1790 | "bucket(data,count_feas,5)\n", 1791 | "data.head()" 1792 | ] 1793 | }, 1794 | { 1795 | "cell_type": "code", 1796 | "execution_count": 15, 1797 | "metadata": {}, 1798 | "outputs": [ 1799 | { 1800 | "name": "stderr", 1801 | "output_type": "stream", 1802 | "text": [ 1803 | "100%|██████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 1001.27it/s]\n" 1804 | ] 1805 | }, 1806 | { 1807 | "data": { 1808 | "text/html": [ 1809 | "
\n", 1810 | "\n", 1823 | "\n", 1824 | " \n", 1825 | " \n", 1826 | " \n", 1827 | " \n", 1828 | " \n", 1829 | " \n", 1830 | " \n", 1831 | " \n", 1832 | " \n", 1833 | " \n", 1834 | " \n", 1835 | " \n", 1836 | " \n", 1837 | " \n", 1838 | " \n", 1839 | " \n", 1840 | " \n", 1841 | " \n", 1842 | " \n", 1843 | " \n", 1844 | " \n", 1845 | " \n", 1846 | " \n", 1847 | " \n", 1848 | " \n", 1849 | " \n", 1850 | " \n", 1851 | " \n", 1852 | " \n", 1853 | " \n", 1854 | " \n", 1855 | " \n", 1856 | " \n", 1857 | " \n", 1858 | " \n", 1859 | " \n", 1860 | " \n", 1861 | " \n", 1862 | " \n", 1863 | " \n", 1864 | " \n", 1865 | " \n", 1866 | " \n", 1867 | " \n", 1868 | " \n", 1869 | " \n", 1870 | " \n", 1871 | " \n", 1872 | " \n", 1873 | " \n", 1874 | " \n", 1875 | " \n", 1876 | " \n", 1877 | " \n", 1878 | " \n", 1879 | " \n", 1880 | " \n", 1881 | " \n", 1882 | " \n", 1883 | " \n", 1884 | " \n", 1885 | " \n", 1886 | " \n", 1887 | " \n", 1888 | " \n", 1889 | " \n", 1890 | " \n", 1891 | " \n", 1892 | " \n", 1893 | " \n", 1894 | " \n", 1895 | " \n", 1896 | " \n", 1897 | " \n", 1898 | " \n", 1899 | " \n", 1900 | " \n", 1901 | " \n", 1902 | " \n", 1903 | " \n", 1904 | " \n", 1905 | " \n", 1906 | " \n", 1907 | " \n", 1908 | " \n", 1909 | " \n", 1910 | " \n", 1911 | " \n", 1912 | " \n", 1913 | " \n", 1914 | " \n", 1915 | " \n", 1916 | " \n", 1917 | " \n", 1918 | " \n", 1919 | " \n", 1920 | " \n", 1921 | " \n", 1922 | " \n", 1923 | " \n", 1924 | " \n", 1925 | " \n", 1926 | " \n", 1927 | " \n", 1928 | " \n", 1929 | " \n", 1930 | " \n", 1931 | " \n", 1932 | " \n", 1933 | " \n", 1934 | " \n", 1935 | " \n", 1936 | " \n", 1937 | " \n", 1938 | " \n", 1939 | " \n", 1940 | " \n", 1941 | " \n", 1942 | " \n", 1943 | " \n", 1944 | " \n", 1945 | " \n", 1946 | " \n", 1947 | " \n", 1948 | " \n", 1949 | " \n", 1950 | " \n", 1951 | " \n", 1952 | " \n", 1953 | " \n", 1954 | " \n", 1955 | " \n", 1956 | " \n", 1957 | " \n", 1958 | " \n", 1959 | " \n", 1960 | " \n", 1961 | " \n", 1962 | " \n", 1963 | " \n", 1964 | " \n", 1965 | " \n", 1966 | " \n", 1967 | " \n", 1968 | " \n", 1969 | " \n", 1970 | " \n", 1971 | " \n", 1972 | "
PassengerIdlabelPclassSexAgeSibSpParchFareEmbarkedSex_target_enc...Age-mean_gb_EmbarkedAge-min_gb_EmbarkedAge-max_gb_EmbarkedAge/sum_gb_EmbarkedPclass_SexSex_COUNTEmbarked_COUNTPclass_COUNTAge_binSex_label_encoder
010.03male22.0107.2500S0.195279...-7.24520521.83-58.00.0009620843914.07094.01
121.01female38.01071.2833C0.739837...5.66783037.58-33.00.0055441466270.03237.00
231.03female26.0007.9250S0.739837...-3.24520525.83-54.00.0011372466914.07095.00
341.01female35.01053.1000S0.739837...5.75479534.83-45.00.0015301466914.03237.00
450.03male35.0008.0500S0.195279...5.75479534.83-45.00.0015300843914.07097.01
\n", 1973 | "

5 rows × 77 columns

\n", 1974 | "
" 1975 | ], 1976 | "text/plain": [ 1977 | " PassengerId label Pclass Sex Age SibSp Parch Fare Embarked \\\n", 1978 | "0 1 0.0 3 male 22.0 1 0 7.2500 S \n", 1979 | "1 2 1.0 1 female 38.0 1 0 71.2833 C \n", 1980 | "2 3 1.0 3 female 26.0 0 0 7.9250 S \n", 1981 | "3 4 1.0 1 female 35.0 1 0 53.1000 S \n", 1982 | "4 5 0.0 3 male 35.0 0 0 8.0500 S \n", 1983 | "\n", 1984 | " Sex_target_enc ... Age-mean_gb_Embarked Age-min_gb_Embarked \\\n", 1985 | "0 0.195279 ... -7.245205 21.83 \n", 1986 | "1 0.739837 ... 5.667830 37.58 \n", 1987 | "2 0.739837 ... -3.245205 25.83 \n", 1988 | "3 0.739837 ... 5.754795 34.83 \n", 1989 | "4 0.195279 ... 5.754795 34.83 \n", 1990 | "\n", 1991 | " Age-max_gb_Embarked Age/sum_gb_Embarked Pclass_Sex Sex_COUNT \\\n", 1992 | "0 -58.0 0.000962 0 843 \n", 1993 | "1 -33.0 0.005544 1 466 \n", 1994 | "2 -54.0 0.001137 2 466 \n", 1995 | "3 -45.0 0.001530 1 466 \n", 1996 | "4 -45.0 0.001530 0 843 \n", 1997 | "\n", 1998 | " Embarked_COUNT Pclass_COUNT Age_bin Sex_label_encoder \n", 1999 | "0 914.0 709 4.0 1 \n", 2000 | "1 270.0 323 7.0 0 \n", 2001 | "2 914.0 709 5.0 0 \n", 2002 | "3 914.0 323 7.0 0 \n", 2003 | "4 914.0 709 7.0 1 \n", 2004 | "\n", 2005 | "[5 rows x 77 columns]" 2006 | ] 2007 | }, 2008 | "execution_count": 15, 2009 | "metadata": {}, 2010 | "output_type": "execute_result" 2011 | } 2012 | ], 2013 | "source": [ 2014 | "# 基本的类别特征转换方法label_encode\n", 2015 | "from sklearn.preprocessing import LabelEncoder\n", 2016 | "for col in tqdm(['Sex']):\n", 2017 | " le = LabelEncoder()\n", 2018 | " le.fit(data[col])\n", 2019 | " data[col+'_label_encoder'] = le.transform(data[col])\n", 2020 | " # test[col] = le.transform(test[col])\n", 2021 | "data.head()\n" 2022 | ] 2023 | }, 2024 | { 2025 | "cell_type": "code", 2026 | "execution_count": null, 2027 | "metadata": {}, 2028 | "outputs": [], 2029 | "source": [ 2030 | "#构建序列特征,例如构建每个用户的登录行为序列\n", 2031 | "#例如一个用户会存在多个样本,对用户groupby获取登录序列,拼起来作为新特征\n", 2032 | "launch_grp = pd.DataFrame()\n", 2033 | "\n", 2034 | "user_id = []\n", 2035 | "launch_date_str = []\n", 2036 | "for i in launch.groupby('user_id'):\n", 2037 | " launch_date = []\n", 2038 | " user_id.append(i[0])\n", 2039 | " for j in i[1]['date']:\n", 2040 | " launch_date.append(j)\n", 2041 | " launch_date_str.append(str(launch_date))\n", 2042 | "launch_grp['user_id'] = list(user_id)\n", 2043 | "launch_grp['launch_date_str'] = list(launch_date_str)\n", 2044 | "launch_grp.head()" 2045 | ] 2046 | }, 2047 | { 2048 | "cell_type": "code", 2049 | "execution_count": null, 2050 | "metadata": {}, 2051 | "outputs": [], 2052 | "source": [ 2053 | "#对上述序列或文本特征进行w2v,构建embedding特征\n", 2054 | "from gensim.models.word2vec import Word2Vec\n", 2055 | "\n", 2056 | "data['tagid'] = data['tagid'].apply(lambda x: eval(x))\n", 2057 | "sentences = data['tagid'].values.tolist()\n", 2058 | "for i in range(len(sentences)):\n", 2059 | " sentences[i] = [str(x) for x in sentences[i]] #将每个tagid转化成str格式\n", 2060 | "\n", 2061 | "#训练数据格式如下\n", 2062 | "#sentences=[['外形', '外观', '好看', '屏幕', '特别'], ['手机', '好看', '段时间'], ['手机', '很漂亮', '评价']]\n", 2063 | "\n", 2064 | "emb_size = 32\n", 2065 | "#model = Word2Vec(sentences,vector_size=emb_size, window=6, min_count=5, sg=0, hs=0, seed=1,epochs=5)\n", 2066 | "model = Word2Vec.load('./w2vmodel/w2vmodel.model')\n", 2067 | "emb_matrix = []\n", 2068 | "for seq in sentences:\n", 2069 | " vec = []\n", 2070 | " for w in seq:\n", 2071 | "# if w in model.wv.vocab:\n", 2072 | "# vec.append(model.wv[w])\n", 2073 | " try:\n", 2074 | " vec.append(model.wv[w])\n", 2075 | " except KeyError:\n", 2076 | " continue\n", 2077 | " \n", 2078 | " if len(vec) > 0:\n", 2079 | " emb_matrix.append(np.mean(vec, axis=0))\n", 2080 | " else:\n", 2081 | " emb_matrix.append([0] * emb_size)\n", 2082 | "emb_matrix = np.array(emb_matrix)\n", 2083 | "for i in range(emb_size):\n", 2084 | " data['tag_emb_{}'.format(i)] = emb_matrix[:, i]" 2085 | ] 2086 | }, 2087 | { 2088 | "cell_type": "code", 2089 | "execution_count": null, 2090 | "metadata": {}, 2091 | "outputs": [], 2092 | "source": [ 2093 | "#想象力特征" 2094 | ] 2095 | } 2096 | ], 2097 | "metadata": { 2098 | "kernelspec": { 2099 | "display_name": "Python 3", 2100 | "language": "python", 2101 | "name": "python3" 2102 | }, 2103 | "language_info": { 2104 | "codemirror_mode": { 2105 | "name": "ipython", 2106 | "version": 3 2107 | }, 2108 | "file_extension": ".py", 2109 | "mimetype": "text/x-python", 2110 | "name": "python", 2111 | "nbconvert_exporter": "python", 2112 | "pygments_lexer": "ipython3", 2113 | "version": "3.8.5" 2114 | }, 2115 | "toc": { 2116 | "base_numbering": 1, 2117 | "nav_menu": {}, 2118 | "number_sections": true, 2119 | "sideBar": true, 2120 | "skip_h1_title": false, 2121 | "title_cell": "Table of Contents", 2122 | "title_sidebar": "Contents", 2123 | "toc_cell": false, 2124 | "toc_position": {}, 2125 | "toc_section_display": true, 2126 | "toc_window_display": false 2127 | } 2128 | }, 2129 | "nbformat": 4, 2130 | "nbformat_minor": 2 2131 | } 2132 | --------------------------------------------------------------------------------