├── .gitignore
├── .ipynb_checkpoints
└── speedmodel-checkpoint.ipynb
├── README.md
├── speedmodel.ipynb
├── kmean_vectors.ipynb
├── titanic
├── gender_submission.csv
├── test.csv
└── train.csv
└── gen_features.ipynb
/.gitignore:
--------------------------------------------------------------------------------
1 | .ipynb_checkpoints
--------------------------------------------------------------------------------
/.ipynb_checkpoints/speedmodel-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [],
3 | "metadata": {},
4 | "nbformat": 4,
5 | "nbformat_minor": 2
6 | }
7 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # data_analysis
2 | #### get_feature_importance 机器学习数据预处理:包括浮点型数据转化降低内存、画数据分布图、多种特征筛选、多种调参技巧
3 | #### gen_features 各种数据衍生方式
4 | #### 树模型部署:treelite模型加速
5 | #### 文本聚类打标签
6 |
--------------------------------------------------------------------------------
/speedmodel.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "# https://treelite.readthedocs.io/en/latest/tutorials/first.html"
10 | ]
11 | },
12 | {
13 | "cell_type": "code",
14 | "execution_count": null,
15 | "metadata": {},
16 | "outputs": [],
17 | "source": []
18 | }
19 | ],
20 | "metadata": {
21 | "kernelspec": {
22 | "display_name": "Python 3",
23 | "language": "python",
24 | "name": "python3"
25 | },
26 | "language_info": {
27 | "codemirror_mode": {
28 | "name": "ipython",
29 | "version": 3
30 | },
31 | "file_extension": ".py",
32 | "mimetype": "text/x-python",
33 | "name": "python",
34 | "nbconvert_exporter": "python",
35 | "pygments_lexer": "ipython3",
36 | "version": "3.7.4"
37 | },
38 | "toc": {
39 | "base_numbering": 1,
40 | "nav_menu": {},
41 | "number_sections": true,
42 | "sideBar": true,
43 | "skip_h1_title": false,
44 | "title_cell": "Table of Contents",
45 | "title_sidebar": "Contents",
46 | "toc_cell": false,
47 | "toc_position": {},
48 | "toc_section_display": true,
49 | "toc_window_display": false
50 | }
51 | },
52 | "nbformat": 4,
53 | "nbformat_minor": 2
54 | }
55 |
--------------------------------------------------------------------------------
/kmean_vectors.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "id": "139c210d",
7 | "metadata": {},
8 | "outputs": [],
9 | "source": [
10 | "import pandas as pd\n",
11 | "from sklearn.cluster import KMeans\n",
12 | "import numpy as np\n",
13 | "import matplotlib.pyplot as plt\n",
14 | "import torch\n",
15 | "from sklearn.metrics import silhouette_score\n",
16 | "from transformers import AutoTokenizer, AutoModelForMaskedLM"
17 | ]
18 | },
19 | {
20 | "cell_type": "code",
21 | "execution_count": null,
22 | "id": "bfbcf944",
23 | "metadata": {},
24 | "outputs": [],
25 | "source": [
26 | "def cosine_similarity(vector1, vector2):\n",
27 | " dot_product = np.dot(vector1, vector2)\n",
28 | " norm_vector1 = np.linalg.norm(vector1)\n",
29 | " norm_vector2 = np.linalg.norm(vector2)\n",
30 | " cosine_similarity = dot_product / (norm_vector1 * norm_vector2)\n",
31 | " return cosine_similarity\n",
32 | "\n",
33 | "\n",
34 | "def get_emb(text):\n",
35 | " inputs = tokenizer(text, return_tensors=\"pt\")\n",
36 | " outputs = model(**inputs, output_hidden_states=True)\n",
37 | " text_embedding = outputs.hidden_states[-1][:, 0, :]\n",
38 | " vector = torch.Tensor(text_embedding).tolist()[0]\n",
39 | " return vector"
40 | ]
41 | },
42 | {
43 | "cell_type": "code",
44 | "execution_count": null,
45 | "id": "f4466060",
46 | "metadata": {},
47 | "outputs": [],
48 | "source": [
49 | "# 文本向量化\n",
50 | "model = AutoModelForMaskedLM.from_pretrained(r'./Erlangshen-SimCSE-110M-Chinese')\n",
51 | "tokenizer = AutoTokenizer.from_pretrained(r'./Erlangshen-SimCSE-110M-Chinese')\n",
52 | "data = pd.read_csv(r\"./kmean_test.csv\", encoding='gbk')\n",
53 | "data['emb'] = data['text'].apply(lambda x: get_emb(x))\n",
54 | "vector_list = data['emb'].values.tolist()"
55 | ]
56 | },
57 | {
58 | "cell_type": "code",
59 | "execution_count": null,
60 | "id": "7faf0b3e",
61 | "metadata": {},
62 | "outputs": [],
63 | "source": [
64 | "# 寻找最佳聚类k值的两种方法\n",
65 | "# 手肘法 利用SSE选择k,取拐点时的k\n",
66 | "SSE = [] # 存放每次结果的误差平方和\n",
67 | "for k in range(2, 9):\n",
68 | " estimator = KMeans(n_clusters=k) # 构造聚类器\n",
69 | " estimator.fit(vector_list)\n",
70 | " SSE.append(estimator.inertia_)\n",
71 | "X = range(2, 9)\n",
72 | "plt.xlabel('k')\n",
73 | "plt.ylabel('SSE')\n",
74 | "plt.plot(X, SSE, 'o-')\n",
75 | "plt.show()\n",
76 | "\n",
77 | "# 轮廓系数法 取系数最大时的k\n",
78 | "Scores = [] # 存放轮廓系数\n",
79 | "for k in range(2, 9):\n",
80 | " estimator = KMeans(n_clusters=k) # 构造聚类器\n",
81 | " estimator.fit(vector_list)\n",
82 | " Scores.append(silhouette_score(vector_list, estimator.labels_, metric='euclidean'))\n",
83 | "X = range(2, 9)\n",
84 | "plt.xlabel('k')\n",
85 | "plt.ylabel('轮廓系数')\n",
86 | "plt.plot(X, Scores, 'o-')\n",
87 | "plt.show()"
88 | ]
89 | },
90 | {
91 | "cell_type": "code",
92 | "execution_count": null,
93 | "id": "cbe7596a",
94 | "metadata": {},
95 | "outputs": [],
96 | "source": [
97 | "# 确定k后开始聚类\n",
98 | "clf = KMeans(n_clusters=6)\n",
99 | "clf.fit(vector_list)"
100 | ]
101 | },
102 | {
103 | "cell_type": "code",
104 | "execution_count": null,
105 | "id": "6dcafd2a",
106 | "metadata": {},
107 | "outputs": [],
108 | "source": [
109 | "# 获取所有训练样本的簇标签[0,2,1,0,2,2,1,..]\n",
110 | "labels = clf.labels_\n",
111 | "data['label'] = labels\n",
112 | "\n",
113 | "# 也可以推理新的样本\n",
114 | "# clf.predict()\n",
115 | "\n",
116 | "# 获取中心点,并构建簇中心点与簇标签的映射\n",
117 | "centers = clf.cluster_centers_\n",
118 | "center_map = dict(zip([i for i in range(len(centers))], centers))\n",
119 | "data['center'] = data['label'].apply(lambda x: center_map.get(x))\n",
120 | "\n",
121 | "# 计算样本到簇心的距离\n",
122 | "data['cosine'] = data.apply(lambda x: cosine_similarity(x['center'], x['emb']), axis=1)\n",
123 | "data.to_csv(r\"./kmean_test_res.csv\", index=False)\n",
124 | "\n",
125 | "# todo 挑选每个簇最靠近簇心的样本,人为观察打标或丢给大模型打标"
126 | ]
127 | }
128 | ],
129 | "metadata": {
130 | "kernelspec": {
131 | "display_name": "Python 3",
132 | "language": "python",
133 | "name": "python3"
134 | },
135 | "language_info": {
136 | "codemirror_mode": {
137 | "name": "ipython",
138 | "version": 3
139 | },
140 | "file_extension": ".py",
141 | "mimetype": "text/x-python",
142 | "name": "python",
143 | "nbconvert_exporter": "python",
144 | "pygments_lexer": "ipython3",
145 | "version": "3.8.5"
146 | }
147 | },
148 | "nbformat": 4,
149 | "nbformat_minor": 5
150 | }
151 |
--------------------------------------------------------------------------------
/titanic/gender_submission.csv:
--------------------------------------------------------------------------------
1 | PassengerId,Survived
2 | 892,0
3 | 893,1
4 | 894,0
5 | 895,0
6 | 896,1
7 | 897,0
8 | 898,1
9 | 899,0
10 | 900,1
11 | 901,0
12 | 902,0
13 | 903,0
14 | 904,1
15 | 905,0
16 | 906,1
17 | 907,1
18 | 908,0
19 | 909,0
20 | 910,1
21 | 911,1
22 | 912,0
23 | 913,0
24 | 914,1
25 | 915,0
26 | 916,1
27 | 917,0
28 | 918,1
29 | 919,0
30 | 920,0
31 | 921,0
32 | 922,0
33 | 923,0
34 | 924,1
35 | 925,1
36 | 926,0
37 | 927,0
38 | 928,1
39 | 929,1
40 | 930,0
41 | 931,0
42 | 932,0
43 | 933,0
44 | 934,0
45 | 935,1
46 | 936,1
47 | 937,0
48 | 938,0
49 | 939,0
50 | 940,1
51 | 941,1
52 | 942,0
53 | 943,0
54 | 944,1
55 | 945,1
56 | 946,0
57 | 947,0
58 | 948,0
59 | 949,0
60 | 950,0
61 | 951,1
62 | 952,0
63 | 953,0
64 | 954,0
65 | 955,1
66 | 956,0
67 | 957,1
68 | 958,1
69 | 959,0
70 | 960,0
71 | 961,1
72 | 962,1
73 | 963,0
74 | 964,1
75 | 965,0
76 | 966,1
77 | 967,0
78 | 968,0
79 | 969,1
80 | 970,0
81 | 971,1
82 | 972,0
83 | 973,0
84 | 974,0
85 | 975,0
86 | 976,0
87 | 977,0
88 | 978,1
89 | 979,1
90 | 980,1
91 | 981,0
92 | 982,1
93 | 983,0
94 | 984,1
95 | 985,0
96 | 986,0
97 | 987,0
98 | 988,1
99 | 989,0
100 | 990,1
101 | 991,0
102 | 992,1
103 | 993,0
104 | 994,0
105 | 995,0
106 | 996,1
107 | 997,0
108 | 998,0
109 | 999,0
110 | 1000,0
111 | 1001,0
112 | 1002,0
113 | 1003,1
114 | 1004,1
115 | 1005,1
116 | 1006,1
117 | 1007,0
118 | 1008,0
119 | 1009,1
120 | 1010,0
121 | 1011,1
122 | 1012,1
123 | 1013,0
124 | 1014,1
125 | 1015,0
126 | 1016,0
127 | 1017,1
128 | 1018,0
129 | 1019,1
130 | 1020,0
131 | 1021,0
132 | 1022,0
133 | 1023,0
134 | 1024,1
135 | 1025,0
136 | 1026,0
137 | 1027,0
138 | 1028,0
139 | 1029,0
140 | 1030,1
141 | 1031,0
142 | 1032,1
143 | 1033,1
144 | 1034,0
145 | 1035,0
146 | 1036,0
147 | 1037,0
148 | 1038,0
149 | 1039,0
150 | 1040,0
151 | 1041,0
152 | 1042,1
153 | 1043,0
154 | 1044,0
155 | 1045,1
156 | 1046,0
157 | 1047,0
158 | 1048,1
159 | 1049,1
160 | 1050,0
161 | 1051,1
162 | 1052,1
163 | 1053,0
164 | 1054,1
165 | 1055,0
166 | 1056,0
167 | 1057,1
168 | 1058,0
169 | 1059,0
170 | 1060,1
171 | 1061,1
172 | 1062,0
173 | 1063,0
174 | 1064,0
175 | 1065,0
176 | 1066,0
177 | 1067,1
178 | 1068,1
179 | 1069,0
180 | 1070,1
181 | 1071,1
182 | 1072,0
183 | 1073,0
184 | 1074,1
185 | 1075,0
186 | 1076,1
187 | 1077,0
188 | 1078,1
189 | 1079,0
190 | 1080,1
191 | 1081,0
192 | 1082,0
193 | 1083,0
194 | 1084,0
195 | 1085,0
196 | 1086,0
197 | 1087,0
198 | 1088,0
199 | 1089,1
200 | 1090,0
201 | 1091,1
202 | 1092,1
203 | 1093,0
204 | 1094,0
205 | 1095,1
206 | 1096,0
207 | 1097,0
208 | 1098,1
209 | 1099,0
210 | 1100,1
211 | 1101,0
212 | 1102,0
213 | 1103,0
214 | 1104,0
215 | 1105,1
216 | 1106,1
217 | 1107,0
218 | 1108,1
219 | 1109,0
220 | 1110,1
221 | 1111,0
222 | 1112,1
223 | 1113,0
224 | 1114,1
225 | 1115,0
226 | 1116,1
227 | 1117,1
228 | 1118,0
229 | 1119,1
230 | 1120,0
231 | 1121,0
232 | 1122,0
233 | 1123,1
234 | 1124,0
235 | 1125,0
236 | 1126,0
237 | 1127,0
238 | 1128,0
239 | 1129,0
240 | 1130,1
241 | 1131,1
242 | 1132,1
243 | 1133,1
244 | 1134,0
245 | 1135,0
246 | 1136,0
247 | 1137,0
248 | 1138,1
249 | 1139,0
250 | 1140,1
251 | 1141,1
252 | 1142,1
253 | 1143,0
254 | 1144,0
255 | 1145,0
256 | 1146,0
257 | 1147,0
258 | 1148,0
259 | 1149,0
260 | 1150,1
261 | 1151,0
262 | 1152,0
263 | 1153,0
264 | 1154,1
265 | 1155,1
266 | 1156,0
267 | 1157,0
268 | 1158,0
269 | 1159,0
270 | 1160,1
271 | 1161,0
272 | 1162,0
273 | 1163,0
274 | 1164,1
275 | 1165,1
276 | 1166,0
277 | 1167,1
278 | 1168,0
279 | 1169,0
280 | 1170,0
281 | 1171,0
282 | 1172,1
283 | 1173,0
284 | 1174,1
285 | 1175,1
286 | 1176,1
287 | 1177,0
288 | 1178,0
289 | 1179,0
290 | 1180,0
291 | 1181,0
292 | 1182,0
293 | 1183,1
294 | 1184,0
295 | 1185,0
296 | 1186,0
297 | 1187,0
298 | 1188,1
299 | 1189,0
300 | 1190,0
301 | 1191,0
302 | 1192,0
303 | 1193,0
304 | 1194,0
305 | 1195,0
306 | 1196,1
307 | 1197,1
308 | 1198,0
309 | 1199,0
310 | 1200,0
311 | 1201,1
312 | 1202,0
313 | 1203,0
314 | 1204,0
315 | 1205,1
316 | 1206,1
317 | 1207,1
318 | 1208,0
319 | 1209,0
320 | 1210,0
321 | 1211,0
322 | 1212,0
323 | 1213,0
324 | 1214,0
325 | 1215,0
326 | 1216,1
327 | 1217,0
328 | 1218,1
329 | 1219,0
330 | 1220,0
331 | 1221,0
332 | 1222,1
333 | 1223,0
334 | 1224,0
335 | 1225,1
336 | 1226,0
337 | 1227,0
338 | 1228,0
339 | 1229,0
340 | 1230,0
341 | 1231,0
342 | 1232,0
343 | 1233,0
344 | 1234,0
345 | 1235,1
346 | 1236,0
347 | 1237,1
348 | 1238,0
349 | 1239,1
350 | 1240,0
351 | 1241,1
352 | 1242,1
353 | 1243,0
354 | 1244,0
355 | 1245,0
356 | 1246,1
357 | 1247,0
358 | 1248,1
359 | 1249,0
360 | 1250,0
361 | 1251,1
362 | 1252,0
363 | 1253,1
364 | 1254,1
365 | 1255,0
366 | 1256,1
367 | 1257,1
368 | 1258,0
369 | 1259,1
370 | 1260,1
371 | 1261,0
372 | 1262,0
373 | 1263,1
374 | 1264,0
375 | 1265,0
376 | 1266,1
377 | 1267,1
378 | 1268,1
379 | 1269,0
380 | 1270,0
381 | 1271,0
382 | 1272,0
383 | 1273,0
384 | 1274,1
385 | 1275,1
386 | 1276,0
387 | 1277,1
388 | 1278,0
389 | 1279,0
390 | 1280,0
391 | 1281,0
392 | 1282,0
393 | 1283,1
394 | 1284,0
395 | 1285,0
396 | 1286,0
397 | 1287,1
398 | 1288,0
399 | 1289,1
400 | 1290,0
401 | 1291,0
402 | 1292,1
403 | 1293,0
404 | 1294,1
405 | 1295,0
406 | 1296,0
407 | 1297,0
408 | 1298,0
409 | 1299,0
410 | 1300,1
411 | 1301,1
412 | 1302,1
413 | 1303,1
414 | 1304,1
415 | 1305,0
416 | 1306,1
417 | 1307,0
418 | 1308,0
419 | 1309,0
420 |
--------------------------------------------------------------------------------
/titanic/test.csv:
--------------------------------------------------------------------------------
1 | PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Embarked
2 | 892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,Q
3 | 893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47,1,0,363272,7,S
4 | 894,2,"Myles, Mr. Thomas Francis",male,62,0,0,240276,9.6875,Q
5 | 895,3,"Wirz, Mr. Albert",male,27,0,0,315154,8.6625,S
6 | 896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22,1,1,3101298,12.2875,S
7 | 897,3,"Svensson, Mr. Johan Cervin",male,14,0,0,7538,9.225,S
8 | 898,3,"Connolly, Miss. Kate",female,30,0,0,330972,7.6292,Q
9 | 899,2,"Caldwell, Mr. Albert Francis",male,26,1,1,248738,29,S
10 | 900,3,"Abrahim, Mrs. Joseph (Sophie Halaut Easu)",female,18,0,0,2657,7.2292,C
11 | 901,3,"Davies, Mr. John Samuel",male,21,2,0,A/4 48871,24.15,S
12 | 902,3,"Ilieff, Mr. Ylio",male,,0,0,349220,7.8958,S
13 | 903,1,"Jones, Mr. Charles Cresson",male,46,0,0,694,26,S
14 | 904,1,"Snyder, Mrs. John Pillsbury (Nelle Stevenson)",female,23,1,0,21228,82.2667,S
15 | 905,2,"Howard, Mr. Benjamin",male,63,1,0,24065,26,S
16 | 906,1,"Chaffee, Mrs. Herbert Fuller (Carrie Constance Toogood)",female,47,1,0,W.E.P. 5734,61.175,S
17 | 907,2,"del Carlo, Mrs. Sebastiano (Argenia Genovesi)",female,24,1,0,SC/PARIS 2167,27.7208,C
18 | 908,2,"Keane, Mr. Daniel",male,35,0,0,233734,12.35,Q
19 | 909,3,"Assaf, Mr. Gerios",male,21,0,0,2692,7.225,C
20 | 910,3,"Ilmakangas, Miss. Ida Livija",female,27,1,0,STON/O2. 3101270,7.925,S
21 | 911,3,"Assaf Khalil, Mrs. Mariana (Miriam"")""",female,45,0,0,2696,7.225,C
22 | 912,1,"Rothschild, Mr. Martin",male,55,1,0,PC 17603,59.4,C
23 | 913,3,"Olsen, Master. Artur Karl",male,9,0,1,C 17368,3.1708,S
24 | 914,1,"Flegenheim, Mrs. Alfred (Antoinette)",female,,0,0,PC 17598,31.6833,S
25 | 915,1,"Williams, Mr. Richard Norris II",male,21,0,1,PC 17597,61.3792,C
26 | 916,1,"Ryerson, Mrs. Arthur Larned (Emily Maria Borie)",female,48,1,3,PC 17608,262.375,C
27 | 917,3,"Robins, Mr. Alexander A",male,50,1,0,A/5. 3337,14.5,S
28 | 918,1,"Ostby, Miss. Helene Ragnhild",female,22,0,1,113509,61.9792,C
29 | 919,3,"Daher, Mr. Shedid",male,22.5,0,0,2698,7.225,C
30 | 920,1,"Brady, Mr. John Bertram",male,41,0,0,113054,30.5,S
31 | 921,3,"Samaan, Mr. Elias",male,,2,0,2662,21.6792,C
32 | 922,2,"Louch, Mr. Charles Alexander",male,50,1,0,SC/AH 3085,26,S
33 | 923,2,"Jefferys, Mr. Clifford Thomas",male,24,2,0,C.A. 31029,31.5,S
34 | 924,3,"Dean, Mrs. Bertram (Eva Georgetta Light)",female,33,1,2,C.A. 2315,20.575,S
35 | 925,3,"Johnston, Mrs. Andrew G (Elizabeth Lily"" Watson)""",female,,1,2,W./C. 6607,23.45,S
36 | 926,1,"Mock, Mr. Philipp Edmund",male,30,1,0,13236,57.75,C
37 | 927,3,"Katavelas, Mr. Vassilios (Catavelas Vassilios"")""",male,18.5,0,0,2682,7.2292,C
38 | 928,3,"Roth, Miss. Sarah A",female,,0,0,342712,8.05,S
39 | 929,3,"Cacic, Miss. Manda",female,21,0,0,315087,8.6625,S
40 | 930,3,"Sap, Mr. Julius",male,25,0,0,345768,9.5,S
41 | 931,3,"Hee, Mr. Ling",male,,0,0,1601,56.4958,S
42 | 932,3,"Karun, Mr. Franz",male,39,0,1,349256,13.4167,C
43 | 933,1,"Franklin, Mr. Thomas Parham",male,,0,0,113778,26.55,S
44 | 934,3,"Goldsmith, Mr. Nathan",male,41,0,0,SOTON/O.Q. 3101263,7.85,S
45 | 935,2,"Corbett, Mrs. Walter H (Irene Colvin)",female,30,0,0,237249,13,S
46 | 936,1,"Kimball, Mrs. Edwin Nelson Jr (Gertrude Parsons)",female,45,1,0,11753,52.5542,S
47 | 937,3,"Peltomaki, Mr. Nikolai Johannes",male,25,0,0,STON/O 2. 3101291,7.925,S
48 | 938,1,"Chevre, Mr. Paul Romaine",male,45,0,0,PC 17594,29.7,C
49 | 939,3,"Shaughnessy, Mr. Patrick",male,,0,0,370374,7.75,Q
50 | 940,1,"Bucknell, Mrs. William Robert (Emma Eliza Ward)",female,60,0,0,11813,76.2917,C
51 | 941,3,"Coutts, Mrs. William (Winnie Minnie"" Treanor)""",female,36,0,2,C.A. 37671,15.9,S
52 | 942,1,"Smith, Mr. Lucien Philip",male,24,1,0,13695,60,S
53 | 943,2,"Pulbaum, Mr. Franz",male,27,0,0,SC/PARIS 2168,15.0333,C
54 | 944,2,"Hocking, Miss. Ellen Nellie""""",female,20,2,1,29105,23,S
55 | 945,1,"Fortune, Miss. Ethel Flora",female,28,3,2,19950,263,S
56 | 946,2,"Mangiavacchi, Mr. Serafino Emilio",male,,0,0,SC/A.3 2861,15.5792,C
57 | 947,3,"Rice, Master. Albert",male,10,4,1,382652,29.125,Q
58 | 948,3,"Cor, Mr. Bartol",male,35,0,0,349230,7.8958,S
59 | 949,3,"Abelseth, Mr. Olaus Jorgensen",male,25,0,0,348122,7.65,S
60 | 950,3,"Davison, Mr. Thomas Henry",male,,1,0,386525,16.1,S
61 | 951,1,"Chaudanson, Miss. Victorine",female,36,0,0,PC 17608,262.375,C
62 | 952,3,"Dika, Mr. Mirko",male,17,0,0,349232,7.8958,S
63 | 953,2,"McCrae, Mr. Arthur Gordon",male,32,0,0,237216,13.5,S
64 | 954,3,"Bjorklund, Mr. Ernst Herbert",male,18,0,0,347090,7.75,S
65 | 955,3,"Bradley, Miss. Bridget Delia",female,22,0,0,334914,7.725,Q
66 | 956,1,"Ryerson, Master. John Borie",male,13,2,2,PC 17608,262.375,C
67 | 957,2,"Corey, Mrs. Percy C (Mary Phyllis Elizabeth Miller)",female,,0,0,F.C.C. 13534,21,S
68 | 958,3,"Burns, Miss. Mary Delia",female,18,0,0,330963,7.8792,Q
69 | 959,1,"Moore, Mr. Clarence Bloomfield",male,47,0,0,113796,42.4,S
70 | 960,1,"Tucker, Mr. Gilbert Milligan Jr",male,31,0,0,2543,28.5375,C
71 | 961,1,"Fortune, Mrs. Mark (Mary McDougald)",female,60,1,4,19950,263,S
72 | 962,3,"Mulvihill, Miss. Bertha E",female,24,0,0,382653,7.75,Q
73 | 963,3,"Minkoff, Mr. Lazar",male,21,0,0,349211,7.8958,S
74 | 964,3,"Nieminen, Miss. Manta Josefina",female,29,0,0,3101297,7.925,S
75 | 965,1,"Ovies y Rodriguez, Mr. Servando",male,28.5,0,0,PC 17562,27.7208,C
76 | 966,1,"Geiger, Miss. Amalie",female,35,0,0,113503,211.5,C
77 | 967,1,"Keeping, Mr. Edwin",male,32.5,0,0,113503,211.5,C
78 | 968,3,"Miles, Mr. Frank",male,,0,0,359306,8.05,S
79 | 969,1,"Cornell, Mrs. Robert Clifford (Malvina Helen Lamson)",female,55,2,0,11770,25.7,S
80 | 970,2,"Aldworth, Mr. Charles Augustus",male,30,0,0,248744,13,S
81 | 971,3,"Doyle, Miss. Elizabeth",female,24,0,0,368702,7.75,Q
82 | 972,3,"Boulos, Master. Akar",male,6,1,1,2678,15.2458,C
83 | 973,1,"Straus, Mr. Isidor",male,67,1,0,PC 17483,221.7792,S
84 | 974,1,"Case, Mr. Howard Brown",male,49,0,0,19924,26,S
85 | 975,3,"Demetri, Mr. Marinko",male,,0,0,349238,7.8958,S
86 | 976,2,"Lamb, Mr. John Joseph",male,,0,0,240261,10.7083,Q
87 | 977,3,"Khalil, Mr. Betros",male,,1,0,2660,14.4542,C
88 | 978,3,"Barry, Miss. Julia",female,27,0,0,330844,7.8792,Q
89 | 979,3,"Badman, Miss. Emily Louisa",female,18,0,0,A/4 31416,8.05,S
90 | 980,3,"O'Donoghue, Ms. Bridget",female,,0,0,364856,7.75,Q
91 | 981,2,"Wells, Master. Ralph Lester",male,2,1,1,29103,23,S
92 | 982,3,"Dyker, Mrs. Adolf Fredrik (Anna Elisabeth Judith Andersson)",female,22,1,0,347072,13.9,S
93 | 983,3,"Pedersen, Mr. Olaf",male,,0,0,345498,7.775,S
94 | 984,1,"Davidson, Mrs. Thornton (Orian Hays)",female,27,1,2,F.C. 12750,52,S
95 | 985,3,"Guest, Mr. Robert",male,,0,0,376563,8.05,S
96 | 986,1,"Birnbaum, Mr. Jakob",male,25,0,0,13905,26,C
97 | 987,3,"Tenglin, Mr. Gunnar Isidor",male,25,0,0,350033,7.7958,S
98 | 988,1,"Cavendish, Mrs. Tyrell William (Julia Florence Siegel)",female,76,1,0,19877,78.85,S
99 | 989,3,"Makinen, Mr. Kalle Edvard",male,29,0,0,STON/O 2. 3101268,7.925,S
100 | 990,3,"Braf, Miss. Elin Ester Maria",female,20,0,0,347471,7.8542,S
101 | 991,3,"Nancarrow, Mr. William Henry",male,33,0,0,A./5. 3338,8.05,S
102 | 992,1,"Stengel, Mrs. Charles Emil Henry (Annie May Morris)",female,43,1,0,11778,55.4417,C
103 | 993,2,"Weisz, Mr. Leopold",male,27,1,0,228414,26,S
104 | 994,3,"Foley, Mr. William",male,,0,0,365235,7.75,Q
105 | 995,3,"Johansson Palmquist, Mr. Oskar Leander",male,26,0,0,347070,7.775,S
106 | 996,3,"Thomas, Mrs. Alexander (Thamine Thelma"")""",female,16,1,1,2625,8.5167,C
107 | 997,3,"Holthen, Mr. Johan Martin",male,28,0,0,C 4001,22.525,S
108 | 998,3,"Buckley, Mr. Daniel",male,21,0,0,330920,7.8208,Q
109 | 999,3,"Ryan, Mr. Edward",male,,0,0,383162,7.75,Q
110 | 1000,3,"Willer, Mr. Aaron (Abi Weller"")""",male,,0,0,3410,8.7125,S
111 | 1001,2,"Swane, Mr. George",male,18.5,0,0,248734,13,S
112 | 1002,2,"Stanton, Mr. Samuel Ward",male,41,0,0,237734,15.0458,C
113 | 1003,3,"Shine, Miss. Ellen Natalia",female,,0,0,330968,7.7792,Q
114 | 1004,1,"Evans, Miss. Edith Corse",female,36,0,0,PC 17531,31.6792,C
115 | 1005,3,"Buckley, Miss. Katherine",female,18.5,0,0,329944,7.2833,Q
116 | 1006,1,"Straus, Mrs. Isidor (Rosalie Ida Blun)",female,63,1,0,PC 17483,221.7792,S
117 | 1007,3,"Chronopoulos, Mr. Demetrios",male,18,1,0,2680,14.4542,C
118 | 1008,3,"Thomas, Mr. John",male,,0,0,2681,6.4375,C
119 | 1009,3,"Sandstrom, Miss. Beatrice Irene",female,1,1,1,PP 9549,16.7,S
120 | 1010,1,"Beattie, Mr. Thomson",male,36,0,0,13050,75.2417,C
121 | 1011,2,"Chapman, Mrs. John Henry (Sara Elizabeth Lawry)",female,29,1,0,SC/AH 29037,26,S
122 | 1012,2,"Watt, Miss. Bertha J",female,12,0,0,C.A. 33595,15.75,S
123 | 1013,3,"Kiernan, Mr. John",male,,1,0,367227,7.75,Q
124 | 1014,1,"Schabert, Mrs. Paul (Emma Mock)",female,35,1,0,13236,57.75,C
125 | 1015,3,"Carver, Mr. Alfred John",male,28,0,0,392095,7.25,S
126 | 1016,3,"Kennedy, Mr. John",male,,0,0,368783,7.75,Q
127 | 1017,3,"Cribb, Miss. Laura Alice",female,17,0,1,371362,16.1,S
128 | 1018,3,"Brobeck, Mr. Karl Rudolf",male,22,0,0,350045,7.7958,S
129 | 1019,3,"McCoy, Miss. Alicia",female,,2,0,367226,23.25,Q
130 | 1020,2,"Bowenur, Mr. Solomon",male,42,0,0,211535,13,S
131 | 1021,3,"Petersen, Mr. Marius",male,24,0,0,342441,8.05,S
132 | 1022,3,"Spinner, Mr. Henry John",male,32,0,0,STON/OQ. 369943,8.05,S
133 | 1023,1,"Gracie, Col. Archibald IV",male,53,0,0,113780,28.5,C
134 | 1024,3,"Lefebre, Mrs. Frank (Frances)",female,,0,4,4133,25.4667,S
135 | 1025,3,"Thomas, Mr. Charles P",male,,1,0,2621,6.4375,C
136 | 1026,3,"Dintcheff, Mr. Valtcho",male,43,0,0,349226,7.8958,S
137 | 1027,3,"Carlsson, Mr. Carl Robert",male,24,0,0,350409,7.8542,S
138 | 1028,3,"Zakarian, Mr. Mapriededer",male,26.5,0,0,2656,7.225,C
139 | 1029,2,"Schmidt, Mr. August",male,26,0,0,248659,13,S
140 | 1030,3,"Drapkin, Miss. Jennie",female,23,0,0,SOTON/OQ 392083,8.05,S
141 | 1031,3,"Goodwin, Mr. Charles Frederick",male,40,1,6,CA 2144,46.9,S
142 | 1032,3,"Goodwin, Miss. Jessie Allis",female,10,5,2,CA 2144,46.9,S
143 | 1033,1,"Daniels, Miss. Sarah",female,33,0,0,113781,151.55,S
144 | 1034,1,"Ryerson, Mr. Arthur Larned",male,61,1,3,PC 17608,262.375,C
145 | 1035,2,"Beauchamp, Mr. Henry James",male,28,0,0,244358,26,S
146 | 1036,1,"Lindeberg-Lind, Mr. Erik Gustaf (Mr Edward Lingrey"")""",male,42,0,0,17475,26.55,S
147 | 1037,3,"Vander Planke, Mr. Julius",male,31,3,0,345763,18,S
148 | 1038,1,"Hilliard, Mr. Herbert Henry",male,,0,0,17463,51.8625,S
149 | 1039,3,"Davies, Mr. Evan",male,22,0,0,SC/A4 23568,8.05,S
150 | 1040,1,"Crafton, Mr. John Bertram",male,,0,0,113791,26.55,S
151 | 1041,2,"Lahtinen, Rev. William",male,30,1,1,250651,26,S
152 | 1042,1,"Earnshaw, Mrs. Boulton (Olive Potter)",female,23,0,1,11767,83.1583,C
153 | 1043,3,"Matinoff, Mr. Nicola",male,,0,0,349255,7.8958,C
154 | 1044,3,"Storey, Mr. Thomas",male,60.5,0,0,3701,,S
155 | 1045,3,"Klasen, Mrs. (Hulda Kristina Eugenia Lofqvist)",female,36,0,2,350405,12.1833,S
156 | 1046,3,"Asplund, Master. Filip Oscar",male,13,4,2,347077,31.3875,S
157 | 1047,3,"Duquemin, Mr. Joseph",male,24,0,0,S.O./P.P. 752,7.55,S
158 | 1048,1,"Bird, Miss. Ellen",female,29,0,0,PC 17483,221.7792,S
159 | 1049,3,"Lundin, Miss. Olga Elida",female,23,0,0,347469,7.8542,S
160 | 1050,1,"Borebank, Mr. John James",male,42,0,0,110489,26.55,S
161 | 1051,3,"Peacock, Mrs. Benjamin (Edith Nile)",female,26,0,2,SOTON/O.Q. 3101315,13.775,S
162 | 1052,3,"Smyth, Miss. Julia",female,,0,0,335432,7.7333,Q
163 | 1053,3,"Touma, Master. Georges Youssef",male,7,1,1,2650,15.2458,C
164 | 1054,2,"Wright, Miss. Marion",female,26,0,0,220844,13.5,S
165 | 1055,3,"Pearce, Mr. Ernest",male,,0,0,343271,7,S
166 | 1056,2,"Peruschitz, Rev. Joseph Maria",male,41,0,0,237393,13,S
167 | 1057,3,"Kink-Heilmann, Mrs. Anton (Luise Heilmann)",female,26,1,1,315153,22.025,S
168 | 1058,1,"Brandeis, Mr. Emil",male,48,0,0,PC 17591,50.4958,C
169 | 1059,3,"Ford, Mr. Edward Watson",male,18,2,2,W./C. 6608,34.375,S
170 | 1060,1,"Cassebeer, Mrs. Henry Arthur Jr (Eleanor Genevieve Fosdick)",female,,0,0,17770,27.7208,C
171 | 1061,3,"Hellstrom, Miss. Hilda Maria",female,22,0,0,7548,8.9625,S
172 | 1062,3,"Lithman, Mr. Simon",male,,0,0,S.O./P.P. 251,7.55,S
173 | 1063,3,"Zakarian, Mr. Ortin",male,27,0,0,2670,7.225,C
174 | 1064,3,"Dyker, Mr. Adolf Fredrik",male,23,1,0,347072,13.9,S
175 | 1065,3,"Torfa, Mr. Assad",male,,0,0,2673,7.2292,C
176 | 1066,3,"Asplund, Mr. Carl Oscar Vilhelm Gustafsson",male,40,1,5,347077,31.3875,S
177 | 1067,2,"Brown, Miss. Edith Eileen",female,15,0,2,29750,39,S
178 | 1068,2,"Sincock, Miss. Maude",female,20,0,0,C.A. 33112,36.75,S
179 | 1069,1,"Stengel, Mr. Charles Emil Henry",male,54,1,0,11778,55.4417,C
180 | 1070,2,"Becker, Mrs. Allen Oliver (Nellie E Baumgardner)",female,36,0,3,230136,39,S
181 | 1071,1,"Compton, Mrs. Alexander Taylor (Mary Eliza Ingersoll)",female,64,0,2,PC 17756,83.1583,C
182 | 1072,2,"McCrie, Mr. James Matthew",male,30,0,0,233478,13,S
183 | 1073,1,"Compton, Mr. Alexander Taylor Jr",male,37,1,1,PC 17756,83.1583,C
184 | 1074,1,"Marvin, Mrs. Daniel Warner (Mary Graham Carmichael Farquarson)",female,18,1,0,113773,53.1,S
185 | 1075,3,"Lane, Mr. Patrick",male,,0,0,7935,7.75,Q
186 | 1076,1,"Douglas, Mrs. Frederick Charles (Mary Helene Baxter)",female,27,1,1,PC 17558,247.5208,C
187 | 1077,2,"Maybery, Mr. Frank Hubert",male,40,0,0,239059,16,S
188 | 1078,2,"Phillips, Miss. Alice Frances Louisa",female,21,0,1,S.O./P.P. 2,21,S
189 | 1079,3,"Davies, Mr. Joseph",male,17,2,0,A/4 48873,8.05,S
190 | 1080,3,"Sage, Miss. Ada",female,,8,2,CA. 2343,69.55,S
191 | 1081,2,"Veal, Mr. James",male,40,0,0,28221,13,S
192 | 1082,2,"Angle, Mr. William A",male,34,1,0,226875,26,S
193 | 1083,1,"Salomon, Mr. Abraham L",male,,0,0,111163,26,S
194 | 1084,3,"van Billiard, Master. Walter John",male,11.5,1,1,A/5. 851,14.5,S
195 | 1085,2,"Lingane, Mr. John",male,61,0,0,235509,12.35,Q
196 | 1086,2,"Drew, Master. Marshall Brines",male,8,0,2,28220,32.5,S
197 | 1087,3,"Karlsson, Mr. Julius Konrad Eugen",male,33,0,0,347465,7.8542,S
198 | 1088,1,"Spedden, Master. Robert Douglas",male,6,0,2,16966,134.5,C
199 | 1089,3,"Nilsson, Miss. Berta Olivia",female,18,0,0,347066,7.775,S
200 | 1090,2,"Baimbrigge, Mr. Charles Robert",male,23,0,0,C.A. 31030,10.5,S
201 | 1091,3,"Rasmussen, Mrs. (Lena Jacobsen Solvang)",female,,0,0,65305,8.1125,S
202 | 1092,3,"Murphy, Miss. Nora",female,,0,0,36568,15.5,Q
203 | 1093,3,"Danbom, Master. Gilbert Sigvard Emanuel",male,0.33,0,2,347080,14.4,S
204 | 1094,1,"Astor, Col. John Jacob",male,47,1,0,PC 17757,227.525,C
205 | 1095,2,"Quick, Miss. Winifred Vera",female,8,1,1,26360,26,S
206 | 1096,2,"Andrew, Mr. Frank Thomas",male,25,0,0,C.A. 34050,10.5,S
207 | 1097,1,"Omont, Mr. Alfred Fernand",male,,0,0,F.C. 12998,25.7417,C
208 | 1098,3,"McGowan, Miss. Katherine",female,35,0,0,9232,7.75,Q
209 | 1099,2,"Collett, Mr. Sidney C Stuart",male,24,0,0,28034,10.5,S
210 | 1100,1,"Rosenbaum, Miss. Edith Louise",female,33,0,0,PC 17613,27.7208,C
211 | 1101,3,"Delalic, Mr. Redjo",male,25,0,0,349250,7.8958,S
212 | 1102,3,"Andersen, Mr. Albert Karvin",male,32,0,0,C 4001,22.525,S
213 | 1103,3,"Finoli, Mr. Luigi",male,,0,0,SOTON/O.Q. 3101308,7.05,S
214 | 1104,2,"Deacon, Mr. Percy William",male,17,0,0,S.O.C. 14879,73.5,S
215 | 1105,2,"Howard, Mrs. Benjamin (Ellen Truelove Arman)",female,60,1,0,24065,26,S
216 | 1106,3,"Andersson, Miss. Ida Augusta Margareta",female,38,4,2,347091,7.775,S
217 | 1107,1,"Head, Mr. Christopher",male,42,0,0,113038,42.5,S
218 | 1108,3,"Mahon, Miss. Bridget Delia",female,,0,0,330924,7.8792,Q
219 | 1109,1,"Wick, Mr. George Dennick",male,57,1,1,36928,164.8667,S
220 | 1110,1,"Widener, Mrs. George Dunton (Eleanor Elkins)",female,50,1,1,113503,211.5,C
221 | 1111,3,"Thomson, Mr. Alexander Morrison",male,,0,0,32302,8.05,S
222 | 1112,2,"Duran y More, Miss. Florentina",female,30,1,0,SC/PARIS 2148,13.8583,C
223 | 1113,3,"Reynolds, Mr. Harold J",male,21,0,0,342684,8.05,S
224 | 1114,2,"Cook, Mrs. (Selena Rogers)",female,22,0,0,W./C. 14266,10.5,S
225 | 1115,3,"Karlsson, Mr. Einar Gervasius",male,21,0,0,350053,7.7958,S
226 | 1116,1,"Candee, Mrs. Edward (Helen Churchill Hungerford)",female,53,0,0,PC 17606,27.4458,C
227 | 1117,3,"Moubarek, Mrs. George (Omine Amenia"" Alexander)""",female,,0,2,2661,15.2458,C
228 | 1118,3,"Asplund, Mr. Johan Charles",male,23,0,0,350054,7.7958,S
229 | 1119,3,"McNeill, Miss. Bridget",female,,0,0,370368,7.75,Q
230 | 1120,3,"Everett, Mr. Thomas James",male,40.5,0,0,C.A. 6212,15.1,S
231 | 1121,2,"Hocking, Mr. Samuel James Metcalfe",male,36,0,0,242963,13,S
232 | 1122,2,"Sweet, Mr. George Frederick",male,14,0,0,220845,65,S
233 | 1123,1,"Willard, Miss. Constance",female,21,0,0,113795,26.55,S
234 | 1124,3,"Wiklund, Mr. Karl Johan",male,21,1,0,3101266,6.4958,S
235 | 1125,3,"Linehan, Mr. Michael",male,,0,0,330971,7.8792,Q
236 | 1126,1,"Cumings, Mr. John Bradley",male,39,1,0,PC 17599,71.2833,C
237 | 1127,3,"Vendel, Mr. Olof Edvin",male,20,0,0,350416,7.8542,S
238 | 1128,1,"Warren, Mr. Frank Manley",male,64,1,0,110813,75.25,C
239 | 1129,3,"Baccos, Mr. Raffull",male,20,0,0,2679,7.225,C
240 | 1130,2,"Hiltunen, Miss. Marta",female,18,1,1,250650,13,S
241 | 1131,1,"Douglas, Mrs. Walter Donald (Mahala Dutton)",female,48,1,0,PC 17761,106.425,C
242 | 1132,1,"Lindstrom, Mrs. Carl Johan (Sigrid Posse)",female,55,0,0,112377,27.7208,C
243 | 1133,2,"Christy, Mrs. (Alice Frances)",female,45,0,2,237789,30,S
244 | 1134,1,"Spedden, Mr. Frederic Oakley",male,45,1,1,16966,134.5,C
245 | 1135,3,"Hyman, Mr. Abraham",male,,0,0,3470,7.8875,S
246 | 1136,3,"Johnston, Master. William Arthur Willie""""",male,,1,2,W./C. 6607,23.45,S
247 | 1137,1,"Kenyon, Mr. Frederick R",male,41,1,0,17464,51.8625,S
248 | 1138,2,"Karnes, Mrs. J Frank (Claire Bennett)",female,22,0,0,F.C.C. 13534,21,S
249 | 1139,2,"Drew, Mr. James Vivian",male,42,1,1,28220,32.5,S
250 | 1140,2,"Hold, Mrs. Stephen (Annie Margaret Hill)",female,29,1,0,26707,26,S
251 | 1141,3,"Khalil, Mrs. Betros (Zahie Maria"" Elias)""",female,,1,0,2660,14.4542,C
252 | 1142,2,"West, Miss. Barbara J",female,0.92,1,2,C.A. 34651,27.75,S
253 | 1143,3,"Abrahamsson, Mr. Abraham August Johannes",male,20,0,0,SOTON/O2 3101284,7.925,S
254 | 1144,1,"Clark, Mr. Walter Miller",male,27,1,0,13508,136.7792,C
255 | 1145,3,"Salander, Mr. Karl Johan",male,24,0,0,7266,9.325,S
256 | 1146,3,"Wenzel, Mr. Linhart",male,32.5,0,0,345775,9.5,S
257 | 1147,3,"MacKay, Mr. George William",male,,0,0,C.A. 42795,7.55,S
258 | 1148,3,"Mahon, Mr. John",male,,0,0,AQ/4 3130,7.75,Q
259 | 1149,3,"Niklasson, Mr. Samuel",male,28,0,0,363611,8.05,S
260 | 1150,2,"Bentham, Miss. Lilian W",female,19,0,0,28404,13,S
261 | 1151,3,"Midtsjo, Mr. Karl Albert",male,21,0,0,345501,7.775,S
262 | 1152,3,"de Messemaeker, Mr. Guillaume Joseph",male,36.5,1,0,345572,17.4,S
263 | 1153,3,"Nilsson, Mr. August Ferdinand",male,21,0,0,350410,7.8542,S
264 | 1154,2,"Wells, Mrs. Arthur Henry (Addie"" Dart Trevaskis)""",female,29,0,2,29103,23,S
265 | 1155,3,"Klasen, Miss. Gertrud Emilia",female,1,1,1,350405,12.1833,S
266 | 1156,2,"Portaluppi, Mr. Emilio Ilario Giuseppe",male,30,0,0,C.A. 34644,12.7375,C
267 | 1157,3,"Lyntakoff, Mr. Stanko",male,,0,0,349235,7.8958,S
268 | 1158,1,"Chisholm, Mr. Roderick Robert Crispin",male,,0,0,112051,0,S
269 | 1159,3,"Warren, Mr. Charles William",male,,0,0,C.A. 49867,7.55,S
270 | 1160,3,"Howard, Miss. May Elizabeth",female,,0,0,A. 2. 39186,8.05,S
271 | 1161,3,"Pokrnic, Mr. Mate",male,17,0,0,315095,8.6625,S
272 | 1162,1,"McCaffry, Mr. Thomas Francis",male,46,0,0,13050,75.2417,C
273 | 1163,3,"Fox, Mr. Patrick",male,,0,0,368573,7.75,Q
274 | 1164,1,"Clark, Mrs. Walter Miller (Virginia McDowell)",female,26,1,0,13508,136.7792,C
275 | 1165,3,"Lennon, Miss. Mary",female,,1,0,370371,15.5,Q
276 | 1166,3,"Saade, Mr. Jean Nassr",male,,0,0,2676,7.225,C
277 | 1167,2,"Bryhl, Miss. Dagmar Jenny Ingeborg ",female,20,1,0,236853,26,S
278 | 1168,2,"Parker, Mr. Clifford Richard",male,28,0,0,SC 14888,10.5,S
279 | 1169,2,"Faunthorpe, Mr. Harry",male,40,1,0,2926,26,S
280 | 1170,2,"Ware, Mr. John James",male,30,1,0,CA 31352,21,S
281 | 1171,2,"Oxenham, Mr. Percy Thomas",male,22,0,0,W./C. 14260,10.5,S
282 | 1172,3,"Oreskovic, Miss. Jelka",female,23,0,0,315085,8.6625,S
283 | 1173,3,"Peacock, Master. Alfred Edward",male,0.75,1,1,SOTON/O.Q. 3101315,13.775,S
284 | 1174,3,"Fleming, Miss. Honora",female,,0,0,364859,7.75,Q
285 | 1175,3,"Touma, Miss. Maria Youssef",female,9,1,1,2650,15.2458,C
286 | 1176,3,"Rosblom, Miss. Salli Helena",female,2,1,1,370129,20.2125,S
287 | 1177,3,"Dennis, Mr. William",male,36,0,0,A/5 21175,7.25,S
288 | 1178,3,"Franklin, Mr. Charles (Charles Fardon)",male,,0,0,SOTON/O.Q. 3101314,7.25,S
289 | 1179,1,"Snyder, Mr. John Pillsbury",male,24,1,0,21228,82.2667,S
290 | 1180,3,"Mardirosian, Mr. Sarkis",male,,0,0,2655,7.2292,C
291 | 1181,3,"Ford, Mr. Arthur",male,,0,0,A/5 1478,8.05,S
292 | 1182,1,"Rheims, Mr. George Alexander Lucien",male,,0,0,PC 17607,39.6,S
293 | 1183,3,"Daly, Miss. Margaret Marcella Maggie""""",female,30,0,0,382650,6.95,Q
294 | 1184,3,"Nasr, Mr. Mustafa",male,,0,0,2652,7.2292,C
295 | 1185,1,"Dodge, Dr. Washington",male,53,1,1,33638,81.8583,S
296 | 1186,3,"Wittevrongel, Mr. Camille",male,36,0,0,345771,9.5,S
297 | 1187,3,"Angheloff, Mr. Minko",male,26,0,0,349202,7.8958,S
298 | 1188,2,"Laroche, Miss. Louise",female,1,1,2,SC/Paris 2123,41.5792,C
299 | 1189,3,"Samaan, Mr. Hanna",male,,2,0,2662,21.6792,C
300 | 1190,1,"Loring, Mr. Joseph Holland",male,30,0,0,113801,45.5,S
301 | 1191,3,"Johansson, Mr. Nils",male,29,0,0,347467,7.8542,S
302 | 1192,3,"Olsson, Mr. Oscar Wilhelm",male,32,0,0,347079,7.775,S
303 | 1193,2,"Malachard, Mr. Noel",male,,0,0,237735,15.0458,C
304 | 1194,2,"Phillips, Mr. Escott Robert",male,43,0,1,S.O./P.P. 2,21,S
305 | 1195,3,"Pokrnic, Mr. Tome",male,24,0,0,315092,8.6625,S
306 | 1196,3,"McCarthy, Miss. Catherine Katie""""",female,,0,0,383123,7.75,Q
307 | 1197,1,"Crosby, Mrs. Edward Gifford (Catherine Elizabeth Halstead)",female,64,1,1,112901,26.55,S
308 | 1198,1,"Allison, Mr. Hudson Joshua Creighton",male,30,1,2,113781,151.55,S
309 | 1199,3,"Aks, Master. Philip Frank",male,0.83,0,1,392091,9.35,S
310 | 1200,1,"Hays, Mr. Charles Melville",male,55,1,1,12749,93.5,S
311 | 1201,3,"Hansen, Mrs. Claus Peter (Jennie L Howard)",female,45,1,0,350026,14.1083,S
312 | 1202,3,"Cacic, Mr. Jego Grga",male,18,0,0,315091,8.6625,S
313 | 1203,3,"Vartanian, Mr. David",male,22,0,0,2658,7.225,C
314 | 1204,3,"Sadowitz, Mr. Harry",male,,0,0,LP 1588,7.575,S
315 | 1205,3,"Carr, Miss. Jeannie",female,37,0,0,368364,7.75,Q
316 | 1206,1,"White, Mrs. John Stuart (Ella Holmes)",female,55,0,0,PC 17760,135.6333,C
317 | 1207,3,"Hagardon, Miss. Kate",female,17,0,0,AQ/3. 30631,7.7333,Q
318 | 1208,1,"Spencer, Mr. William Augustus",male,57,1,0,PC 17569,146.5208,C
319 | 1209,2,"Rogers, Mr. Reginald Harry",male,19,0,0,28004,10.5,S
320 | 1210,3,"Jonsson, Mr. Nils Hilding",male,27,0,0,350408,7.8542,S
321 | 1211,2,"Jefferys, Mr. Ernest Wilfred",male,22,2,0,C.A. 31029,31.5,S
322 | 1212,3,"Andersson, Mr. Johan Samuel",male,26,0,0,347075,7.775,S
323 | 1213,3,"Krekorian, Mr. Neshan",male,25,0,0,2654,7.2292,C
324 | 1214,2,"Nesson, Mr. Israel",male,26,0,0,244368,13,S
325 | 1215,1,"Rowe, Mr. Alfred G",male,33,0,0,113790,26.55,S
326 | 1216,1,"Kreuchen, Miss. Emilie",female,39,0,0,24160,211.3375,S
327 | 1217,3,"Assam, Mr. Ali",male,23,0,0,SOTON/O.Q. 3101309,7.05,S
328 | 1218,2,"Becker, Miss. Ruth Elizabeth",female,12,2,1,230136,39,S
329 | 1219,1,"Rosenshine, Mr. George (Mr George Thorne"")""",male,46,0,0,PC 17585,79.2,C
330 | 1220,2,"Clarke, Mr. Charles Valentine",male,29,1,0,2003,26,S
331 | 1221,2,"Enander, Mr. Ingvar",male,21,0,0,236854,13,S
332 | 1222,2,"Davies, Mrs. John Morgan (Elizabeth Agnes Mary White) ",female,48,0,2,C.A. 33112,36.75,S
333 | 1223,1,"Dulles, Mr. William Crothers",male,39,0,0,PC 17580,29.7,C
334 | 1224,3,"Thomas, Mr. Tannous",male,,0,0,2684,7.225,C
335 | 1225,3,"Nakid, Mrs. Said (Waika Mary"" Mowad)""",female,19,1,1,2653,15.7417,C
336 | 1226,3,"Cor, Mr. Ivan",male,27,0,0,349229,7.8958,S
337 | 1227,1,"Maguire, Mr. John Edward",male,30,0,0,110469,26,S
338 | 1228,2,"de Brito, Mr. Jose Joaquim",male,32,0,0,244360,13,S
339 | 1229,3,"Elias, Mr. Joseph",male,39,0,2,2675,7.2292,C
340 | 1230,2,"Denbury, Mr. Herbert",male,25,0,0,C.A. 31029,31.5,S
341 | 1231,3,"Betros, Master. Seman",male,,0,0,2622,7.2292,C
342 | 1232,2,"Fillbrook, Mr. Joseph Charles",male,18,0,0,C.A. 15185,10.5,S
343 | 1233,3,"Lundstrom, Mr. Thure Edvin",male,32,0,0,350403,7.5792,S
344 | 1234,3,"Sage, Mr. John George",male,,1,9,CA. 2343,69.55,S
345 | 1235,1,"Cardeza, Mrs. James Warburton Martinez (Charlotte Wardle Drake)",female,58,0,1,PC 17755,512.3292,C
346 | 1236,3,"van Billiard, Master. James William",male,,1,1,A/5. 851,14.5,S
347 | 1237,3,"Abelseth, Miss. Karen Marie",female,16,0,0,348125,7.65,S
348 | 1238,2,"Botsford, Mr. William Hull",male,26,0,0,237670,13,S
349 | 1239,3,"Whabee, Mrs. George Joseph (Shawneene Abi-Saab)",female,38,0,0,2688,7.2292,C
350 | 1240,2,"Giles, Mr. Ralph",male,24,0,0,248726,13.5,S
351 | 1241,2,"Walcroft, Miss. Nellie",female,31,0,0,F.C.C. 13528,21,S
352 | 1242,1,"Greenfield, Mrs. Leo David (Blanche Strouse)",female,45,0,1,PC 17759,63.3583,C
353 | 1243,2,"Stokes, Mr. Philip Joseph",male,25,0,0,F.C.C. 13540,10.5,S
354 | 1244,2,"Dibden, Mr. William",male,18,0,0,S.O.C. 14879,73.5,S
355 | 1245,2,"Herman, Mr. Samuel",male,49,1,2,220845,65,S
356 | 1246,3,"Dean, Miss. Elizabeth Gladys Millvina""""",female,0.17,1,2,C.A. 2315,20.575,S
357 | 1247,1,"Julian, Mr. Henry Forbes",male,50,0,0,113044,26,S
358 | 1248,1,"Brown, Mrs. John Murray (Caroline Lane Lamson)",female,59,2,0,11769,51.4792,S
359 | 1249,3,"Lockyer, Mr. Edward",male,,0,0,1222,7.8792,S
360 | 1250,3,"O'Keefe, Mr. Patrick",male,,0,0,368402,7.75,Q
361 | 1251,3,"Lindell, Mrs. Edvard Bengtsson (Elin Gerda Persson)",female,30,1,0,349910,15.55,S
362 | 1252,3,"Sage, Master. William Henry",male,14.5,8,2,CA. 2343,69.55,S
363 | 1253,2,"Mallet, Mrs. Albert (Antoinette Magnin)",female,24,1,1,S.C./PARIS 2079,37.0042,C
364 | 1254,2,"Ware, Mrs. John James (Florence Louise Long)",female,31,0,0,CA 31352,21,S
365 | 1255,3,"Strilic, Mr. Ivan",male,27,0,0,315083,8.6625,S
366 | 1256,1,"Harder, Mrs. George Achilles (Dorothy Annan)",female,25,1,0,11765,55.4417,C
367 | 1257,3,"Sage, Mrs. John (Annie Bullen)",female,,1,9,CA. 2343,69.55,S
368 | 1258,3,"Caram, Mr. Joseph",male,,1,0,2689,14.4583,C
369 | 1259,3,"Riihivouri, Miss. Susanna Juhantytar Sanni""""",female,22,0,0,3101295,39.6875,S
370 | 1260,1,"Gibson, Mrs. Leonard (Pauline C Boeson)",female,45,0,1,112378,59.4,C
371 | 1261,2,"Pallas y Castello, Mr. Emilio",male,29,0,0,SC/PARIS 2147,13.8583,C
372 | 1262,2,"Giles, Mr. Edgar",male,21,1,0,28133,11.5,S
373 | 1263,1,"Wilson, Miss. Helen Alice",female,31,0,0,16966,134.5,C
374 | 1264,1,"Ismay, Mr. Joseph Bruce",male,49,0,0,112058,0,S
375 | 1265,2,"Harbeck, Mr. William H",male,44,0,0,248746,13,S
376 | 1266,1,"Dodge, Mrs. Washington (Ruth Vidaver)",female,54,1,1,33638,81.8583,S
377 | 1267,1,"Bowen, Miss. Grace Scott",female,45,0,0,PC 17608,262.375,C
378 | 1268,3,"Kink, Miss. Maria",female,22,2,0,315152,8.6625,S
379 | 1269,2,"Cotterill, Mr. Henry Harry""""",male,21,0,0,29107,11.5,S
380 | 1270,1,"Hipkins, Mr. William Edward",male,55,0,0,680,50,S
381 | 1271,3,"Asplund, Master. Carl Edgar",male,5,4,2,347077,31.3875,S
382 | 1272,3,"O'Connor, Mr. Patrick",male,,0,0,366713,7.75,Q
383 | 1273,3,"Foley, Mr. Joseph",male,26,0,0,330910,7.8792,Q
384 | 1274,3,"Risien, Mrs. Samuel (Emma)",female,,0,0,364498,14.5,S
385 | 1275,3,"McNamee, Mrs. Neal (Eileen O'Leary)",female,19,1,0,376566,16.1,S
386 | 1276,2,"Wheeler, Mr. Edwin Frederick""""",male,,0,0,SC/PARIS 2159,12.875,S
387 | 1277,2,"Herman, Miss. Kate",female,24,1,2,220845,65,S
388 | 1278,3,"Aronsson, Mr. Ernst Axel Algot",male,24,0,0,349911,7.775,S
389 | 1279,2,"Ashby, Mr. John",male,57,0,0,244346,13,S
390 | 1280,3,"Canavan, Mr. Patrick",male,21,0,0,364858,7.75,Q
391 | 1281,3,"Palsson, Master. Paul Folke",male,6,3,1,349909,21.075,S
392 | 1282,1,"Payne, Mr. Vivian Ponsonby",male,23,0,0,12749,93.5,S
393 | 1283,1,"Lines, Mrs. Ernest H (Elizabeth Lindsey James)",female,51,0,1,PC 17592,39.4,S
394 | 1284,3,"Abbott, Master. Eugene Joseph",male,13,0,2,C.A. 2673,20.25,S
395 | 1285,2,"Gilbert, Mr. William",male,47,0,0,C.A. 30769,10.5,S
396 | 1286,3,"Kink-Heilmann, Mr. Anton",male,29,3,1,315153,22.025,S
397 | 1287,1,"Smith, Mrs. Lucien Philip (Mary Eloise Hughes)",female,18,1,0,13695,60,S
398 | 1288,3,"Colbert, Mr. Patrick",male,24,0,0,371109,7.25,Q
399 | 1289,1,"Frolicher-Stehli, Mrs. Maxmillian (Margaretha Emerentia Stehli)",female,48,1,1,13567,79.2,C
400 | 1290,3,"Larsson-Rondberg, Mr. Edvard A",male,22,0,0,347065,7.775,S
401 | 1291,3,"Conlon, Mr. Thomas Henry",male,31,0,0,21332,7.7333,Q
402 | 1292,1,"Bonnell, Miss. Caroline",female,30,0,0,36928,164.8667,S
403 | 1293,2,"Gale, Mr. Harry",male,38,1,0,28664,21,S
404 | 1294,1,"Gibson, Miss. Dorothy Winifred",female,22,0,1,112378,59.4,C
405 | 1295,1,"Carrau, Mr. Jose Pedro",male,17,0,0,113059,47.1,S
406 | 1296,1,"Frauenthal, Mr. Isaac Gerald",male,43,1,0,17765,27.7208,C
407 | 1297,2,"Nourney, Mr. Alfred (Baron von Drachstedt"")""",male,20,0,0,SC/PARIS 2166,13.8625,C
408 | 1298,2,"Ware, Mr. William Jeffery",male,23,1,0,28666,10.5,S
409 | 1299,1,"Widener, Mr. George Dunton",male,50,1,1,113503,211.5,C
410 | 1300,3,"Riordan, Miss. Johanna Hannah""""",female,,0,0,334915,7.7208,Q
411 | 1301,3,"Peacock, Miss. Treasteall",female,3,1,1,SOTON/O.Q. 3101315,13.775,S
412 | 1302,3,"Naughton, Miss. Hannah",female,,0,0,365237,7.75,Q
413 | 1303,1,"Minahan, Mrs. William Edward (Lillian E Thorpe)",female,37,1,0,19928,90,Q
414 | 1304,3,"Henriksson, Miss. Jenny Lovisa",female,28,0,0,347086,7.775,S
415 | 1305,3,"Spector, Mr. Woolf",male,,0,0,A.5. 3236,8.05,S
416 | 1306,1,"Oliva y Ocana, Dona. Fermina",female,39,0,0,PC 17758,108.9,C
417 | 1307,3,"Saether, Mr. Simon Sivertsen",male,38.5,0,0,SOTON/O.Q. 3101262,7.25,S
418 | 1308,3,"Ware, Mr. Frederick",male,,0,0,359309,8.05,S
419 | 1309,3,"Peter, Master. Michael J",male,,1,1,2668,22.3583,C
420 |
--------------------------------------------------------------------------------
/titanic/train.csv:
--------------------------------------------------------------------------------
1 | PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Embarked
2 | 1,0,3,"Braund, Mr. Owen Harris",male,22,1,0,A/5 21171,7.25,S
3 | 2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Thayer)",female,38,1,0,PC 17599,71.2833,C
4 | 3,1,3,"Heikkinen, Miss. Laina",female,26,0,0,STON/O2. 3101282,7.925,S
5 | 4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35,1,0,113803,53.1,S
6 | 5,0,3,"Allen, Mr. William Henry",male,35,0,0,373450,8.05,S
7 | 6,0,3,"Moran, Mr. James",male,,0,0,330877,8.4583,Q
8 | 7,0,1,"McCarthy, Mr. Timothy J",male,54,0,0,17463,51.8625,S
9 | 8,0,3,"Palsson, Master. Gosta Leonard",male,2,3,1,349909,21.075,S
10 | 9,1,3,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,27,0,2,347742,11.1333,S
11 | 10,1,2,"Nasser, Mrs. Nicholas (Adele Achem)",female,14,1,0,237736,30.0708,C
12 | 11,1,3,"Sandstrom, Miss. Marguerite Rut",female,4,1,1,PP 9549,16.7,S
13 | 12,1,1,"Bonnell, Miss. Elizabeth",female,58,0,0,113783,26.55,S
14 | 13,0,3,"Saundercock, Mr. William Henry",male,20,0,0,A/5. 2151,8.05,S
15 | 14,0,3,"Andersson, Mr. Anders Johan",male,39,1,5,347082,31.275,S
16 | 15,0,3,"Vestrom, Miss. Hulda Amanda Adolfina",female,14,0,0,350406,7.8542,S
17 | 16,1,2,"Hewlett, Mrs. (Mary D Kingcome) ",female,55,0,0,248706,16,S
18 | 17,0,3,"Rice, Master. Eugene",male,2,4,1,382652,29.125,Q
19 | 18,1,2,"Williams, Mr. Charles Eugene",male,,0,0,244373,13,S
20 | 19,0,3,"Vander Planke, Mrs. Julius (Emelia Maria Vandemoortele)",female,31,1,0,345763,18,S
21 | 20,1,3,"Masselmani, Mrs. Fatima",female,,0,0,2649,7.225,C
22 | 21,0,2,"Fynney, Mr. Joseph J",male,35,0,0,239865,26,S
23 | 22,1,2,"Beesley, Mr. Lawrence",male,34,0,0,248698,13,S
24 | 23,1,3,"McGowan, Miss. Anna ""Annie""",female,15,0,0,330923,8.0292,Q
25 | 24,1,1,"Sloper, Mr. William Thompson",male,28,0,0,113788,35.5,S
26 | 25,0,3,"Palsson, Miss. Torborg Danira",female,8,3,1,349909,21.075,S
27 | 26,1,3,"Asplund, Mrs. Carl Oscar (Selma Augusta Emilia Johansson)",female,38,1,5,347077,31.3875,S
28 | 27,0,3,"Emir, Mr. Farred Chehab",male,,0,0,2631,7.225,C
29 | 28,0,1,"Fortune, Mr. Charles Alexander",male,19,3,2,19950,263,S
30 | 29,1,3,"O'Dwyer, Miss. Ellen ""Nellie""",female,,0,0,330959,7.8792,Q
31 | 30,0,3,"Todoroff, Mr. Lalio",male,,0,0,349216,7.8958,S
32 | 31,0,1,"Uruchurtu, Don. Manuel E",male,40,0,0,PC 17601,27.7208,C
33 | 32,1,1,"Spencer, Mrs. William Augustus (Marie Eugenie)",female,,1,0,PC 17569,146.5208,C
34 | 33,1,3,"Glynn, Miss. Mary Agatha",female,,0,0,335677,7.75,Q
35 | 34,0,2,"Wheadon, Mr. Edward H",male,66,0,0,C.A. 24579,10.5,S
36 | 35,0,1,"Meyer, Mr. Edgar Joseph",male,28,1,0,PC 17604,82.1708,C
37 | 36,0,1,"Holverson, Mr. Alexander Oskar",male,42,1,0,113789,52,S
38 | 37,1,3,"Mamee, Mr. Hanna",male,,0,0,2677,7.2292,C
39 | 38,0,3,"Cann, Mr. Ernest Charles",male,21,0,0,A./5. 2152,8.05,S
40 | 39,0,3,"Vander Planke, Miss. Augusta Maria",female,18,2,0,345764,18,S
41 | 40,1,3,"Nicola-Yarred, Miss. Jamila",female,14,1,0,2651,11.2417,C
42 | 41,0,3,"Ahlin, Mrs. Johan (Johanna Persdotter Larsson)",female,40,1,0,7546,9.475,S
43 | 42,0,2,"Turpin, Mrs. William John Robert (Dorothy Ann Wonnacott)",female,27,1,0,11668,21,S
44 | 43,0,3,"Kraeff, Mr. Theodor",male,,0,0,349253,7.8958,C
45 | 44,1,2,"Laroche, Miss. Simonne Marie Anne Andree",female,3,1,2,SC/Paris 2123,41.5792,C
46 | 45,1,3,"Devaney, Miss. Margaret Delia",female,19,0,0,330958,7.8792,Q
47 | 46,0,3,"Rogers, Mr. William John",male,,0,0,S.C./A.4. 23567,8.05,S
48 | 47,0,3,"Lennon, Mr. Denis",male,,1,0,370371,15.5,Q
49 | 48,1,3,"O'Driscoll, Miss. Bridget",female,,0,0,14311,7.75,Q
50 | 49,0,3,"Samaan, Mr. Youssef",male,,2,0,2662,21.6792,C
51 | 50,0,3,"Arnold-Franchi, Mrs. Josef (Josefine Franchi)",female,18,1,0,349237,17.8,S
52 | 51,0,3,"Panula, Master. Juha Niilo",male,7,4,1,3101295,39.6875,S
53 | 52,0,3,"Nosworthy, Mr. Richard Cater",male,21,0,0,A/4. 39886,7.8,S
54 | 53,1,1,"Harper, Mrs. Henry Sleeper (Myna Haxtun)",female,49,1,0,PC 17572,76.7292,C
55 | 54,1,2,"Faunthorpe, Mrs. Lizzie (Elizabeth Anne Wilkinson)",female,29,1,0,2926,26,S
56 | 55,0,1,"Ostby, Mr. Engelhart Cornelius",male,65,0,1,113509,61.9792,C
57 | 56,1,1,"Woolner, Mr. Hugh",male,,0,0,19947,35.5,S
58 | 57,1,2,"Rugg, Miss. Emily",female,21,0,0,C.A. 31026,10.5,S
59 | 58,0,3,"Novel, Mr. Mansouer",male,28.5,0,0,2697,7.2292,C
60 | 59,1,2,"West, Miss. Constance Mirium",female,5,1,2,C.A. 34651,27.75,S
61 | 60,0,3,"Goodwin, Master. William Frederick",male,11,5,2,CA 2144,46.9,S
62 | 61,0,3,"Sirayanian, Mr. Orsen",male,22,0,0,2669,7.2292,C
63 | 62,1,1,"Icard, Miss. Amelie",female,38,0,0,113572,80,
64 | 63,0,1,"Harris, Mr. Henry Birkhardt",male,45,1,0,36973,83.475,S
65 | 64,0,3,"Skoog, Master. Harald",male,4,3,2,347088,27.9,S
66 | 65,0,1,"Stewart, Mr. Albert A",male,,0,0,PC 17605,27.7208,C
67 | 66,1,3,"Moubarek, Master. Gerios",male,,1,1,2661,15.2458,C
68 | 67,1,2,"Nye, Mrs. (Elizabeth Ramell)",female,29,0,0,C.A. 29395,10.5,S
69 | 68,0,3,"Crease, Mr. Ernest James",male,19,0,0,S.P. 3464,8.1583,S
70 | 69,1,3,"Andersson, Miss. Erna Alexandra",female,17,4,2,3101281,7.925,S
71 | 70,0,3,"Kink, Mr. Vincenz",male,26,2,0,315151,8.6625,S
72 | 71,0,2,"Jenkin, Mr. Stephen Curnow",male,32,0,0,C.A. 33111,10.5,S
73 | 72,0,3,"Goodwin, Miss. Lillian Amy",female,16,5,2,CA 2144,46.9,S
74 | 73,0,2,"Hood, Mr. Ambrose Jr",male,21,0,0,S.O.C. 14879,73.5,S
75 | 74,0,3,"Chronopoulos, Mr. Apostolos",male,26,1,0,2680,14.4542,C
76 | 75,1,3,"Bing, Mr. Lee",male,32,0,0,1601,56.4958,S
77 | 76,0,3,"Moen, Mr. Sigurd Hansen",male,25,0,0,348123,7.65,S
78 | 77,0,3,"Staneff, Mr. Ivan",male,,0,0,349208,7.8958,S
79 | 78,0,3,"Moutal, Mr. Rahamin Haim",male,,0,0,374746,8.05,S
80 | 79,1,2,"Caldwell, Master. Alden Gates",male,0.83,0,2,248738,29,S
81 | 80,1,3,"Dowdell, Miss. Elizabeth",female,30,0,0,364516,12.475,S
82 | 81,0,3,"Waelens, Mr. Achille",male,22,0,0,345767,9,S
83 | 82,1,3,"Sheerlinck, Mr. Jan Baptist",male,29,0,0,345779,9.5,S
84 | 83,1,3,"McDermott, Miss. Brigdet Delia",female,,0,0,330932,7.7875,Q
85 | 84,0,1,"Carrau, Mr. Francisco M",male,28,0,0,113059,47.1,S
86 | 85,1,2,"Ilett, Miss. Bertha",female,17,0,0,SO/C 14885,10.5,S
87 | 86,1,3,"Backstrom, Mrs. Karl Alfred (Maria Mathilda Gustafsson)",female,33,3,0,3101278,15.85,S
88 | 87,0,3,"Ford, Mr. William Neal",male,16,1,3,W./C. 6608,34.375,S
89 | 88,0,3,"Slocovski, Mr. Selman Francis",male,,0,0,SOTON/OQ 392086,8.05,S
90 | 89,1,1,"Fortune, Miss. Mabel Helen",female,23,3,2,19950,263,S
91 | 90,0,3,"Celotti, Mr. Francesco",male,24,0,0,343275,8.05,S
92 | 91,0,3,"Christmann, Mr. Emil",male,29,0,0,343276,8.05,S
93 | 92,0,3,"Andreasson, Mr. Paul Edvin",male,20,0,0,347466,7.8542,S
94 | 93,0,1,"Chaffee, Mr. Herbert Fuller",male,46,1,0,W.E.P. 5734,61.175,S
95 | 94,0,3,"Dean, Mr. Bertram Frank",male,26,1,2,C.A. 2315,20.575,S
96 | 95,0,3,"Coxon, Mr. Daniel",male,59,0,0,364500,7.25,S
97 | 96,0,3,"Shorney, Mr. Charles Joseph",male,,0,0,374910,8.05,S
98 | 97,0,1,"Goldschmidt, Mr. George B",male,71,0,0,PC 17754,34.6542,C
99 | 98,1,1,"Greenfield, Mr. William Bertram",male,23,0,1,PC 17759,63.3583,C
100 | 99,1,2,"Doling, Mrs. John T (Ada Julia Bone)",female,34,0,1,231919,23,S
101 | 100,0,2,"Kantor, Mr. Sinai",male,34,1,0,244367,26,S
102 | 101,0,3,"Petranec, Miss. Matilda",female,28,0,0,349245,7.8958,S
103 | 102,0,3,"Petroff, Mr. Pastcho (""Pentcho"")",male,,0,0,349215,7.8958,S
104 | 103,0,1,"White, Mr. Richard Frasar",male,21,0,1,35281,77.2875,S
105 | 104,0,3,"Johansson, Mr. Gustaf Joel",male,33,0,0,7540,8.6542,S
106 | 105,0,3,"Gustafsson, Mr. Anders Vilhelm",male,37,2,0,3101276,7.925,S
107 | 106,0,3,"Mionoff, Mr. Stoytcho",male,28,0,0,349207,7.8958,S
108 | 107,1,3,"Salkjelsvik, Miss. Anna Kristine",female,21,0,0,343120,7.65,S
109 | 108,1,3,"Moss, Mr. Albert Johan",male,,0,0,312991,7.775,S
110 | 109,0,3,"Rekic, Mr. Tido",male,38,0,0,349249,7.8958,S
111 | 110,1,3,"Moran, Miss. Bertha",female,,1,0,371110,24.15,Q
112 | 111,0,1,"Porter, Mr. Walter Chamberlain",male,47,0,0,110465,52,S
113 | 112,0,3,"Zabour, Miss. Hileni",female,14.5,1,0,2665,14.4542,C
114 | 113,0,3,"Barton, Mr. David John",male,22,0,0,324669,8.05,S
115 | 114,0,3,"Jussila, Miss. Katriina",female,20,1,0,4136,9.825,S
116 | 115,0,3,"Attalah, Miss. Malake",female,17,0,0,2627,14.4583,C
117 | 116,0,3,"Pekoniemi, Mr. Edvard",male,21,0,0,STON/O 2. 3101294,7.925,S
118 | 117,0,3,"Connors, Mr. Patrick",male,70.5,0,0,370369,7.75,Q
119 | 118,0,2,"Turpin, Mr. William John Robert",male,29,1,0,11668,21,S
120 | 119,0,1,"Baxter, Mr. Quigg Edmond",male,24,0,1,PC 17558,247.5208,C
121 | 120,0,3,"Andersson, Miss. Ellis Anna Maria",female,2,4,2,347082,31.275,S
122 | 121,0,2,"Hickman, Mr. Stanley George",male,21,2,0,S.O.C. 14879,73.5,S
123 | 122,0,3,"Moore, Mr. Leonard Charles",male,,0,0,A4. 54510,8.05,S
124 | 123,0,2,"Nasser, Mr. Nicholas",male,32.5,1,0,237736,30.0708,C
125 | 124,1,2,"Webber, Miss. Susan",female,32.5,0,0,27267,13,S
126 | 125,0,1,"White, Mr. Percival Wayland",male,54,0,1,35281,77.2875,S
127 | 126,1,3,"Nicola-Yarred, Master. Elias",male,12,1,0,2651,11.2417,C
128 | 127,0,3,"McMahon, Mr. Martin",male,,0,0,370372,7.75,Q
129 | 128,1,3,"Madsen, Mr. Fridtjof Arne",male,24,0,0,C 17369,7.1417,S
130 | 129,1,3,"Peter, Miss. Anna",female,,1,1,2668,22.3583,C
131 | 130,0,3,"Ekstrom, Mr. Johan",male,45,0,0,347061,6.975,S
132 | 131,0,3,"Drazenoic, Mr. Jozef",male,33,0,0,349241,7.8958,C
133 | 132,0,3,"Coelho, Mr. Domingos Fernandeo",male,20,0,0,SOTON/O.Q. 3101307,7.05,S
134 | 133,0,3,"Robins, Mrs. Alexander A (Grace Charity Laury)",female,47,1,0,A/5. 3337,14.5,S
135 | 134,1,2,"Weisz, Mrs. Leopold (Mathilde Francoise Pede)",female,29,1,0,228414,26,S
136 | 135,0,2,"Sobey, Mr. Samuel James Hayden",male,25,0,0,C.A. 29178,13,S
137 | 136,0,2,"Richard, Mr. Emile",male,23,0,0,SC/PARIS 2133,15.0458,C
138 | 137,1,1,"Newsom, Miss. Helen Monypeny",female,19,0,2,11752,26.2833,S
139 | 138,0,1,"Futrelle, Mr. Jacques Heath",male,37,1,0,113803,53.1,S
140 | 139,0,3,"Osen, Mr. Olaf Elon",male,16,0,0,7534,9.2167,S
141 | 140,0,1,"Giglio, Mr. Victor",male,24,0,0,PC 17593,79.2,C
142 | 141,0,3,"Boulos, Mrs. Joseph (Sultana)",female,,0,2,2678,15.2458,C
143 | 142,1,3,"Nysten, Miss. Anna Sofia",female,22,0,0,347081,7.75,S
144 | 143,1,3,"Hakkarainen, Mrs. Pekka Pietari (Elin Matilda Dolck)",female,24,1,0,STON/O2. 3101279,15.85,S
145 | 144,0,3,"Burke, Mr. Jeremiah",male,19,0,0,365222,6.75,Q
146 | 145,0,2,"Andrew, Mr. Edgardo Samuel",male,18,0,0,231945,11.5,S
147 | 146,0,2,"Nicholls, Mr. Joseph Charles",male,19,1,1,C.A. 33112,36.75,S
148 | 147,1,3,"Andersson, Mr. August Edvard (""Wennerstrom"")",male,27,0,0,350043,7.7958,S
149 | 148,0,3,"Ford, Miss. Robina Maggie ""Ruby""",female,9,2,2,W./C. 6608,34.375,S
150 | 149,0,2,"Navratil, Mr. Michel (""Louis M Hoffman"")",male,36.5,0,2,230080,26,S
151 | 150,0,2,"Byles, Rev. Thomas Roussel Davids",male,42,0,0,244310,13,S
152 | 151,0,2,"Bateman, Rev. Robert James",male,51,0,0,S.O.P. 1166,12.525,S
153 | 152,1,1,"Pears, Mrs. Thomas (Edith Wearne)",female,22,1,0,113776,66.6,S
154 | 153,0,3,"Meo, Mr. Alfonzo",male,55.5,0,0,A.5. 11206,8.05,S
155 | 154,0,3,"van Billiard, Mr. Austin Blyler",male,40.5,0,2,A/5. 851,14.5,S
156 | 155,0,3,"Olsen, Mr. Ole Martin",male,,0,0,Fa 265302,7.3125,S
157 | 156,0,1,"Williams, Mr. Charles Duane",male,51,0,1,PC 17597,61.3792,C
158 | 157,1,3,"Gilnagh, Miss. Katherine ""Katie""",female,16,0,0,35851,7.7333,Q
159 | 158,0,3,"Corn, Mr. Harry",male,30,0,0,SOTON/OQ 392090,8.05,S
160 | 159,0,3,"Smiljanic, Mr. Mile",male,,0,0,315037,8.6625,S
161 | 160,0,3,"Sage, Master. Thomas Henry",male,,8,2,CA. 2343,69.55,S
162 | 161,0,3,"Cribb, Mr. John Hatfield",male,44,0,1,371362,16.1,S
163 | 162,1,2,"Watt, Mrs. James (Elizabeth ""Bessie"" Inglis Milne)",female,40,0,0,C.A. 33595,15.75,S
164 | 163,0,3,"Bengtsson, Mr. John Viktor",male,26,0,0,347068,7.775,S
165 | 164,0,3,"Calic, Mr. Jovo",male,17,0,0,315093,8.6625,S
166 | 165,0,3,"Panula, Master. Eino Viljami",male,1,4,1,3101295,39.6875,S
167 | 166,1,3,"Goldsmith, Master. Frank John William ""Frankie""",male,9,0,2,363291,20.525,S
168 | 167,1,1,"Chibnall, Mrs. (Edith Martha Bowerman)",female,,0,1,113505,55,S
169 | 168,0,3,"Skoog, Mrs. William (Anna Bernhardina Karlsson)",female,45,1,4,347088,27.9,S
170 | 169,0,1,"Baumann, Mr. John D",male,,0,0,PC 17318,25.925,S
171 | 170,0,3,"Ling, Mr. Lee",male,28,0,0,1601,56.4958,S
172 | 171,0,1,"Van der hoef, Mr. Wyckoff",male,61,0,0,111240,33.5,S
173 | 172,0,3,"Rice, Master. Arthur",male,4,4,1,382652,29.125,Q
174 | 173,1,3,"Johnson, Miss. Eleanor Ileen",female,1,1,1,347742,11.1333,S
175 | 174,0,3,"Sivola, Mr. Antti Wilhelm",male,21,0,0,STON/O 2. 3101280,7.925,S
176 | 175,0,1,"Smith, Mr. James Clinch",male,56,0,0,17764,30.6958,C
177 | 176,0,3,"Klasen, Mr. Klas Albin",male,18,1,1,350404,7.8542,S
178 | 177,0,3,"Lefebre, Master. Henry Forbes",male,,3,1,4133,25.4667,S
179 | 178,0,1,"Isham, Miss. Ann Elizabeth",female,50,0,0,PC 17595,28.7125,C
180 | 179,0,2,"Hale, Mr. Reginald",male,30,0,0,250653,13,S
181 | 180,0,3,"Leonard, Mr. Lionel",male,36,0,0,LINE,0,S
182 | 181,0,3,"Sage, Miss. Constance Gladys",female,,8,2,CA. 2343,69.55,S
183 | 182,0,2,"Pernot, Mr. Rene",male,,0,0,SC/PARIS 2131,15.05,C
184 | 183,0,3,"Asplund, Master. Clarence Gustaf Hugo",male,9,4,2,347077,31.3875,S
185 | 184,1,2,"Becker, Master. Richard F",male,1,2,1,230136,39,S
186 | 185,1,3,"Kink-Heilmann, Miss. Luise Gretchen",female,4,0,2,315153,22.025,S
187 | 186,0,1,"Rood, Mr. Hugh Roscoe",male,,0,0,113767,50,S
188 | 187,1,3,"O'Brien, Mrs. Thomas (Johanna ""Hannah"" Godfrey)",female,,1,0,370365,15.5,Q
189 | 188,1,1,"Romaine, Mr. Charles Hallace (""Mr C Rolmane"")",male,45,0,0,111428,26.55,S
190 | 189,0,3,"Bourke, Mr. John",male,40,1,1,364849,15.5,Q
191 | 190,0,3,"Turcin, Mr. Stjepan",male,36,0,0,349247,7.8958,S
192 | 191,1,2,"Pinsky, Mrs. (Rosa)",female,32,0,0,234604,13,S
193 | 192,0,2,"Carbines, Mr. William",male,19,0,0,28424,13,S
194 | 193,1,3,"Andersen-Jensen, Miss. Carla Christine Nielsine",female,19,1,0,350046,7.8542,S
195 | 194,1,2,"Navratil, Master. Michel M",male,3,1,1,230080,26,S
196 | 195,1,1,"Brown, Mrs. James Joseph (Margaret Tobin)",female,44,0,0,PC 17610,27.7208,C
197 | 196,1,1,"Lurette, Miss. Elise",female,58,0,0,PC 17569,146.5208,C
198 | 197,0,3,"Mernagh, Mr. Robert",male,,0,0,368703,7.75,Q
199 | 198,0,3,"Olsen, Mr. Karl Siegwart Andreas",male,42,0,1,4579,8.4042,S
200 | 199,1,3,"Madigan, Miss. Margaret ""Maggie""",female,,0,0,370370,7.75,Q
201 | 200,0,2,"Yrois, Miss. Henriette (""Mrs Harbeck"")",female,24,0,0,248747,13,S
202 | 201,0,3,"Vande Walle, Mr. Nestor Cyriel",male,28,0,0,345770,9.5,S
203 | 202,0,3,"Sage, Mr. Frederick",male,,8,2,CA. 2343,69.55,S
204 | 203,0,3,"Johanson, Mr. Jakob Alfred",male,34,0,0,3101264,6.4958,S
205 | 204,0,3,"Youseff, Mr. Gerious",male,45.5,0,0,2628,7.225,C
206 | 205,1,3,"Cohen, Mr. Gurshon ""Gus""",male,18,0,0,A/5 3540,8.05,S
207 | 206,0,3,"Strom, Miss. Telma Matilda",female,2,0,1,347054,10.4625,S
208 | 207,0,3,"Backstrom, Mr. Karl Alfred",male,32,1,0,3101278,15.85,S
209 | 208,1,3,"Albimona, Mr. Nassef Cassem",male,26,0,0,2699,18.7875,C
210 | 209,1,3,"Carr, Miss. Helen ""Ellen""",female,16,0,0,367231,7.75,Q
211 | 210,1,1,"Blank, Mr. Henry",male,40,0,0,112277,31,C
212 | 211,0,3,"Ali, Mr. Ahmed",male,24,0,0,SOTON/O.Q. 3101311,7.05,S
213 | 212,1,2,"Cameron, Miss. Clear Annie",female,35,0,0,F.C.C. 13528,21,S
214 | 213,0,3,"Perkin, Mr. John Henry",male,22,0,0,A/5 21174,7.25,S
215 | 214,0,2,"Givard, Mr. Hans Kristensen",male,30,0,0,250646,13,S
216 | 215,0,3,"Kiernan, Mr. Philip",male,,1,0,367229,7.75,Q
217 | 216,1,1,"Newell, Miss. Madeleine",female,31,1,0,35273,113.275,C
218 | 217,1,3,"Honkanen, Miss. Eliina",female,27,0,0,STON/O2. 3101283,7.925,S
219 | 218,0,2,"Jacobsohn, Mr. Sidney Samuel",male,42,1,0,243847,27,S
220 | 219,1,1,"Bazzani, Miss. Albina",female,32,0,0,11813,76.2917,C
221 | 220,0,2,"Harris, Mr. Walter",male,30,0,0,W/C 14208,10.5,S
222 | 221,1,3,"Sunderland, Mr. Victor Francis",male,16,0,0,SOTON/OQ 392089,8.05,S
223 | 222,0,2,"Bracken, Mr. James H",male,27,0,0,220367,13,S
224 | 223,0,3,"Green, Mr. George Henry",male,51,0,0,21440,8.05,S
225 | 224,0,3,"Nenkoff, Mr. Christo",male,,0,0,349234,7.8958,S
226 | 225,1,1,"Hoyt, Mr. Frederick Maxfield",male,38,1,0,19943,90,S
227 | 226,0,3,"Berglund, Mr. Karl Ivar Sven",male,22,0,0,PP 4348,9.35,S
228 | 227,1,2,"Mellors, Mr. William John",male,19,0,0,SW/PP 751,10.5,S
229 | 228,0,3,"Lovell, Mr. John Hall (""Henry"")",male,20.5,0,0,A/5 21173,7.25,S
230 | 229,0,2,"Fahlstrom, Mr. Arne Jonas",male,18,0,0,236171,13,S
231 | 230,0,3,"Lefebre, Miss. Mathilde",female,,3,1,4133,25.4667,S
232 | 231,1,1,"Harris, Mrs. Henry Birkhardt (Irene Wallach)",female,35,1,0,36973,83.475,S
233 | 232,0,3,"Larsson, Mr. Bengt Edvin",male,29,0,0,347067,7.775,S
234 | 233,0,2,"Sjostedt, Mr. Ernst Adolf",male,59,0,0,237442,13.5,S
235 | 234,1,3,"Asplund, Miss. Lillian Gertrud",female,5,4,2,347077,31.3875,S
236 | 235,0,2,"Leyson, Mr. Robert William Norman",male,24,0,0,C.A. 29566,10.5,S
237 | 236,0,3,"Harknett, Miss. Alice Phoebe",female,,0,0,W./C. 6609,7.55,S
238 | 237,0,2,"Hold, Mr. Stephen",male,44,1,0,26707,26,S
239 | 238,1,2,"Collyer, Miss. Marjorie ""Lottie""",female,8,0,2,C.A. 31921,26.25,S
240 | 239,0,2,"Pengelly, Mr. Frederick William",male,19,0,0,28665,10.5,S
241 | 240,0,2,"Hunt, Mr. George Henry",male,33,0,0,SCO/W 1585,12.275,S
242 | 241,0,3,"Zabour, Miss. Thamine",female,,1,0,2665,14.4542,C
243 | 242,1,3,"Murphy, Miss. Katherine ""Kate""",female,,1,0,367230,15.5,Q
244 | 243,0,2,"Coleridge, Mr. Reginald Charles",male,29,0,0,W./C. 14263,10.5,S
245 | 244,0,3,"Maenpaa, Mr. Matti Alexanteri",male,22,0,0,STON/O 2. 3101275,7.125,S
246 | 245,0,3,"Attalah, Mr. Sleiman",male,30,0,0,2694,7.225,C
247 | 246,0,1,"Minahan, Dr. William Edward",male,44,2,0,19928,90,Q
248 | 247,0,3,"Lindahl, Miss. Agda Thorilda Viktoria",female,25,0,0,347071,7.775,S
249 | 248,1,2,"Hamalainen, Mrs. William (Anna)",female,24,0,2,250649,14.5,S
250 | 249,1,1,"Beckwith, Mr. Richard Leonard",male,37,1,1,11751,52.5542,S
251 | 250,0,2,"Carter, Rev. Ernest Courtenay",male,54,1,0,244252,26,S
252 | 251,0,3,"Reed, Mr. James George",male,,0,0,362316,7.25,S
253 | 252,0,3,"Strom, Mrs. Wilhelm (Elna Matilda Persson)",female,29,1,1,347054,10.4625,S
254 | 253,0,1,"Stead, Mr. William Thomas",male,62,0,0,113514,26.55,S
255 | 254,0,3,"Lobb, Mr. William Arthur",male,30,1,0,A/5. 3336,16.1,S
256 | 255,0,3,"Rosblom, Mrs. Viktor (Helena Wilhelmina)",female,41,0,2,370129,20.2125,S
257 | 256,1,3,"Touma, Mrs. Darwis (Hanne Youssef Razi)",female,29,0,2,2650,15.2458,C
258 | 257,1,1,"Thorne, Mrs. Gertrude Maybelle",female,,0,0,PC 17585,79.2,C
259 | 258,1,1,"Cherry, Miss. Gladys",female,30,0,0,110152,86.5,S
260 | 259,1,1,"Ward, Miss. Anna",female,35,0,0,PC 17755,512.3292,C
261 | 260,1,2,"Parrish, Mrs. (Lutie Davis)",female,50,0,1,230433,26,S
262 | 261,0,3,"Smith, Mr. Thomas",male,,0,0,384461,7.75,Q
263 | 262,1,3,"Asplund, Master. Edvin Rojj Felix",male,3,4,2,347077,31.3875,S
264 | 263,0,1,"Taussig, Mr. Emil",male,52,1,1,110413,79.65,S
265 | 264,0,1,"Harrison, Mr. William",male,40,0,0,112059,0,S
266 | 265,0,3,"Henry, Miss. Delia",female,,0,0,382649,7.75,Q
267 | 266,0,2,"Reeves, Mr. David",male,36,0,0,C.A. 17248,10.5,S
268 | 267,0,3,"Panula, Mr. Ernesti Arvid",male,16,4,1,3101295,39.6875,S
269 | 268,1,3,"Persson, Mr. Ernst Ulrik",male,25,1,0,347083,7.775,S
270 | 269,1,1,"Graham, Mrs. William Thompson (Edith Junkins)",female,58,0,1,PC 17582,153.4625,S
271 | 270,1,1,"Bissette, Miss. Amelia",female,35,0,0,PC 17760,135.6333,S
272 | 271,0,1,"Cairns, Mr. Alexander",male,,0,0,113798,31,S
273 | 272,1,3,"Tornquist, Mr. William Henry",male,25,0,0,LINE,0,S
274 | 273,1,2,"Mellinger, Mrs. (Elizabeth Anne Maidment)",female,41,0,1,250644,19.5,S
275 | 274,0,1,"Natsch, Mr. Charles H",male,37,0,1,PC 17596,29.7,C
276 | 275,1,3,"Healy, Miss. Hanora ""Nora""",female,,0,0,370375,7.75,Q
277 | 276,1,1,"Andrews, Miss. Kornelia Theodosia",female,63,1,0,13502,77.9583,S
278 | 277,0,3,"Lindblom, Miss. Augusta Charlotta",female,45,0,0,347073,7.75,S
279 | 278,0,2,"Parkes, Mr. Francis ""Frank""",male,,0,0,239853,0,S
280 | 279,0,3,"Rice, Master. Eric",male,7,4,1,382652,29.125,Q
281 | 280,1,3,"Abbott, Mrs. Stanton (Rosa Hunt)",female,35,1,1,C.A. 2673,20.25,S
282 | 281,0,3,"Duane, Mr. Frank",male,65,0,0,336439,7.75,Q
283 | 282,0,3,"Olsson, Mr. Nils Johan Goransson",male,28,0,0,347464,7.8542,S
284 | 283,0,3,"de Pelsmaeker, Mr. Alfons",male,16,0,0,345778,9.5,S
285 | 284,1,3,"Dorking, Mr. Edward Arthur",male,19,0,0,A/5. 10482,8.05,S
286 | 285,0,1,"Smith, Mr. Richard William",male,,0,0,113056,26,S
287 | 286,0,3,"Stankovic, Mr. Ivan",male,33,0,0,349239,8.6625,C
288 | 287,1,3,"de Mulder, Mr. Theodore",male,30,0,0,345774,9.5,S
289 | 288,0,3,"Naidenoff, Mr. Penko",male,22,0,0,349206,7.8958,S
290 | 289,1,2,"Hosono, Mr. Masabumi",male,42,0,0,237798,13,S
291 | 290,1,3,"Connolly, Miss. Kate",female,22,0,0,370373,7.75,Q
292 | 291,1,1,"Barber, Miss. Ellen ""Nellie""",female,26,0,0,19877,78.85,S
293 | 292,1,1,"Bishop, Mrs. Dickinson H (Helen Walton)",female,19,1,0,11967,91.0792,C
294 | 293,0,2,"Levy, Mr. Rene Jacques",male,36,0,0,SC/Paris 2163,12.875,C
295 | 294,0,3,"Haas, Miss. Aloisia",female,24,0,0,349236,8.85,S
296 | 295,0,3,"Mineff, Mr. Ivan",male,24,0,0,349233,7.8958,S
297 | 296,0,1,"Lewy, Mr. Ervin G",male,,0,0,PC 17612,27.7208,C
298 | 297,0,3,"Hanna, Mr. Mansour",male,23.5,0,0,2693,7.2292,C
299 | 298,0,1,"Allison, Miss. Helen Loraine",female,2,1,2,113781,151.55,S
300 | 299,1,1,"Saalfeld, Mr. Adolphe",male,,0,0,19988,30.5,S
301 | 300,1,1,"Baxter, Mrs. James (Helene DeLaudeniere Chaput)",female,50,0,1,PC 17558,247.5208,C
302 | 301,1,3,"Kelly, Miss. Anna Katherine ""Annie Kate""",female,,0,0,9234,7.75,Q
303 | 302,1,3,"McCoy, Mr. Bernard",male,,2,0,367226,23.25,Q
304 | 303,0,3,"Johnson, Mr. William Cahoone Jr",male,19,0,0,LINE,0,S
305 | 304,1,2,"Keane, Miss. Nora A",female,,0,0,226593,12.35,Q
306 | 305,0,3,"Williams, Mr. Howard Hugh ""Harry""",male,,0,0,A/5 2466,8.05,S
307 | 306,1,1,"Allison, Master. Hudson Trevor",male,0.92,1,2,113781,151.55,S
308 | 307,1,1,"Fleming, Miss. Margaret",female,,0,0,17421,110.8833,C
309 | 308,1,1,"Penasco y Castellana, Mrs. Victor de Satode (Maria Josefa Perez de Soto y Vallejo)",female,17,1,0,PC 17758,108.9,C
310 | 309,0,2,"Abelson, Mr. Samuel",male,30,1,0,P/PP 3381,24,C
311 | 310,1,1,"Francatelli, Miss. Laura Mabel",female,30,0,0,PC 17485,56.9292,C
312 | 311,1,1,"Hays, Miss. Margaret Bechstein",female,24,0,0,11767,83.1583,C
313 | 312,1,1,"Ryerson, Miss. Emily Borie",female,18,2,2,PC 17608,262.375,C
314 | 313,0,2,"Lahtinen, Mrs. William (Anna Sylfven)",female,26,1,1,250651,26,S
315 | 314,0,3,"Hendekovic, Mr. Ignjac",male,28,0,0,349243,7.8958,S
316 | 315,0,2,"Hart, Mr. Benjamin",male,43,1,1,F.C.C. 13529,26.25,S
317 | 316,1,3,"Nilsson, Miss. Helmina Josefina",female,26,0,0,347470,7.8542,S
318 | 317,1,2,"Kantor, Mrs. Sinai (Miriam Sternin)",female,24,1,0,244367,26,S
319 | 318,0,2,"Moraweck, Dr. Ernest",male,54,0,0,29011,14,S
320 | 319,1,1,"Wick, Miss. Mary Natalie",female,31,0,2,36928,164.8667,S
321 | 320,1,1,"Spedden, Mrs. Frederic Oakley (Margaretta Corning Stone)",female,40,1,1,16966,134.5,C
322 | 321,0,3,"Dennis, Mr. Samuel",male,22,0,0,A/5 21172,7.25,S
323 | 322,0,3,"Danoff, Mr. Yoto",male,27,0,0,349219,7.8958,S
324 | 323,1,2,"Slayter, Miss. Hilda Mary",female,30,0,0,234818,12.35,Q
325 | 324,1,2,"Caldwell, Mrs. Albert Francis (Sylvia Mae Harbaugh)",female,22,1,1,248738,29,S
326 | 325,0,3,"Sage, Mr. George John Jr",male,,8,2,CA. 2343,69.55,S
327 | 326,1,1,"Young, Miss. Marie Grice",female,36,0,0,PC 17760,135.6333,C
328 | 327,0,3,"Nysveen, Mr. Johan Hansen",male,61,0,0,345364,6.2375,S
329 | 328,1,2,"Ball, Mrs. (Ada E Hall)",female,36,0,0,28551,13,S
330 | 329,1,3,"Goldsmith, Mrs. Frank John (Emily Alice Brown)",female,31,1,1,363291,20.525,S
331 | 330,1,1,"Hippach, Miss. Jean Gertrude",female,16,0,1,111361,57.9792,C
332 | 331,1,3,"McCoy, Miss. Agnes",female,,2,0,367226,23.25,Q
333 | 332,0,1,"Partner, Mr. Austen",male,45.5,0,0,113043,28.5,S
334 | 333,0,1,"Graham, Mr. George Edward",male,38,0,1,PC 17582,153.4625,S
335 | 334,0,3,"Vander Planke, Mr. Leo Edmondus",male,16,2,0,345764,18,S
336 | 335,1,1,"Frauenthal, Mrs. Henry William (Clara Heinsheimer)",female,,1,0,PC 17611,133.65,S
337 | 336,0,3,"Denkoff, Mr. Mitto",male,,0,0,349225,7.8958,S
338 | 337,0,1,"Pears, Mr. Thomas Clinton",male,29,1,0,113776,66.6,S
339 | 338,1,1,"Burns, Miss. Elizabeth Margaret",female,41,0,0,16966,134.5,C
340 | 339,1,3,"Dahl, Mr. Karl Edwart",male,45,0,0,7598,8.05,S
341 | 340,0,1,"Blackwell, Mr. Stephen Weart",male,45,0,0,113784,35.5,S
342 | 341,1,2,"Navratil, Master. Edmond Roger",male,2,1,1,230080,26,S
343 | 342,1,1,"Fortune, Miss. Alice Elizabeth",female,24,3,2,19950,263,S
344 | 343,0,2,"Collander, Mr. Erik Gustaf",male,28,0,0,248740,13,S
345 | 344,0,2,"Sedgwick, Mr. Charles Frederick Waddington",male,25,0,0,244361,13,S
346 | 345,0,2,"Fox, Mr. Stanley Hubert",male,36,0,0,229236,13,S
347 | 346,1,2,"Brown, Miss. Amelia ""Mildred""",female,24,0,0,248733,13,S
348 | 347,1,2,"Smith, Miss. Marion Elsie",female,40,0,0,31418,13,S
349 | 348,1,3,"Davison, Mrs. Thomas Henry (Mary E Finck)",female,,1,0,386525,16.1,S
350 | 349,1,3,"Coutts, Master. William Loch ""William""",male,3,1,1,C.A. 37671,15.9,S
351 | 350,0,3,"Dimic, Mr. Jovan",male,42,0,0,315088,8.6625,S
352 | 351,0,3,"Odahl, Mr. Nils Martin",male,23,0,0,7267,9.225,S
353 | 352,0,1,"Williams-Lambert, Mr. Fletcher Fellows",male,,0,0,113510,35,S
354 | 353,0,3,"Elias, Mr. Tannous",male,15,1,1,2695,7.2292,C
355 | 354,0,3,"Arnold-Franchi, Mr. Josef",male,25,1,0,349237,17.8,S
356 | 355,0,3,"Yousif, Mr. Wazli",male,,0,0,2647,7.225,C
357 | 356,0,3,"Vanden Steen, Mr. Leo Peter",male,28,0,0,345783,9.5,S
358 | 357,1,1,"Bowerman, Miss. Elsie Edith",female,22,0,1,113505,55,S
359 | 358,0,2,"Funk, Miss. Annie Clemmer",female,38,0,0,237671,13,S
360 | 359,1,3,"McGovern, Miss. Mary",female,,0,0,330931,7.8792,Q
361 | 360,1,3,"Mockler, Miss. Helen Mary ""Ellie""",female,,0,0,330980,7.8792,Q
362 | 361,0,3,"Skoog, Mr. Wilhelm",male,40,1,4,347088,27.9,S
363 | 362,0,2,"del Carlo, Mr. Sebastiano",male,29,1,0,SC/PARIS 2167,27.7208,C
364 | 363,0,3,"Barbara, Mrs. (Catherine David)",female,45,0,1,2691,14.4542,C
365 | 364,0,3,"Asim, Mr. Adola",male,35,0,0,SOTON/O.Q. 3101310,7.05,S
366 | 365,0,3,"O'Brien, Mr. Thomas",male,,1,0,370365,15.5,Q
367 | 366,0,3,"Adahl, Mr. Mauritz Nils Martin",male,30,0,0,C 7076,7.25,S
368 | 367,1,1,"Warren, Mrs. Frank Manley (Anna Sophia Atkinson)",female,60,1,0,110813,75.25,C
369 | 368,1,3,"Moussa, Mrs. (Mantoura Boulos)",female,,0,0,2626,7.2292,C
370 | 369,1,3,"Jermyn, Miss. Annie",female,,0,0,14313,7.75,Q
371 | 370,1,1,"Aubart, Mme. Leontine Pauline",female,24,0,0,PC 17477,69.3,C
372 | 371,1,1,"Harder, Mr. George Achilles",male,25,1,0,11765,55.4417,C
373 | 372,0,3,"Wiklund, Mr. Jakob Alfred",male,18,1,0,3101267,6.4958,S
374 | 373,0,3,"Beavan, Mr. William Thomas",male,19,0,0,323951,8.05,S
375 | 374,0,1,"Ringhini, Mr. Sante",male,22,0,0,PC 17760,135.6333,C
376 | 375,0,3,"Palsson, Miss. Stina Viola",female,3,3,1,349909,21.075,S
377 | 376,1,1,"Meyer, Mrs. Edgar Joseph (Leila Saks)",female,,1,0,PC 17604,82.1708,C
378 | 377,1,3,"Landergren, Miss. Aurora Adelia",female,22,0,0,C 7077,7.25,S
379 | 378,0,1,"Widener, Mr. Harry Elkins",male,27,0,2,113503,211.5,C
380 | 379,0,3,"Betros, Mr. Tannous",male,20,0,0,2648,4.0125,C
381 | 380,0,3,"Gustafsson, Mr. Karl Gideon",male,19,0,0,347069,7.775,S
382 | 381,1,1,"Bidois, Miss. Rosalie",female,42,0,0,PC 17757,227.525,C
383 | 382,1,3,"Nakid, Miss. Maria (""Mary"")",female,1,0,2,2653,15.7417,C
384 | 383,0,3,"Tikkanen, Mr. Juho",male,32,0,0,STON/O 2. 3101293,7.925,S
385 | 384,1,1,"Holverson, Mrs. Alexander Oskar (Mary Aline Towner)",female,35,1,0,113789,52,S
386 | 385,0,3,"Plotcharsky, Mr. Vasil",male,,0,0,349227,7.8958,S
387 | 386,0,2,"Davies, Mr. Charles Henry",male,18,0,0,S.O.C. 14879,73.5,S
388 | 387,0,3,"Goodwin, Master. Sidney Leonard",male,1,5,2,CA 2144,46.9,S
389 | 388,1,2,"Buss, Miss. Kate",female,36,0,0,27849,13,S
390 | 389,0,3,"Sadlier, Mr. Matthew",male,,0,0,367655,7.7292,Q
391 | 390,1,2,"Lehmann, Miss. Bertha",female,17,0,0,SC 1748,12,C
392 | 391,1,1,"Carter, Mr. William Ernest",male,36,1,2,113760,120,S
393 | 392,1,3,"Jansson, Mr. Carl Olof",male,21,0,0,350034,7.7958,S
394 | 393,0,3,"Gustafsson, Mr. Johan Birger",male,28,2,0,3101277,7.925,S
395 | 394,1,1,"Newell, Miss. Marjorie",female,23,1,0,35273,113.275,C
396 | 395,1,3,"Sandstrom, Mrs. Hjalmar (Agnes Charlotta Bengtsson)",female,24,0,2,PP 9549,16.7,S
397 | 396,0,3,"Johansson, Mr. Erik",male,22,0,0,350052,7.7958,S
398 | 397,0,3,"Olsson, Miss. Elina",female,31,0,0,350407,7.8542,S
399 | 398,0,2,"McKane, Mr. Peter David",male,46,0,0,28403,26,S
400 | 399,0,2,"Pain, Dr. Alfred",male,23,0,0,244278,10.5,S
401 | 400,1,2,"Trout, Mrs. William H (Jessie L)",female,28,0,0,240929,12.65,S
402 | 401,1,3,"Niskanen, Mr. Juha",male,39,0,0,STON/O 2. 3101289,7.925,S
403 | 402,0,3,"Adams, Mr. John",male,26,0,0,341826,8.05,S
404 | 403,0,3,"Jussila, Miss. Mari Aina",female,21,1,0,4137,9.825,S
405 | 404,0,3,"Hakkarainen, Mr. Pekka Pietari",male,28,1,0,STON/O2. 3101279,15.85,S
406 | 405,0,3,"Oreskovic, Miss. Marija",female,20,0,0,315096,8.6625,S
407 | 406,0,2,"Gale, Mr. Shadrach",male,34,1,0,28664,21,S
408 | 407,0,3,"Widegren, Mr. Carl/Charles Peter",male,51,0,0,347064,7.75,S
409 | 408,1,2,"Richards, Master. William Rowe",male,3,1,1,29106,18.75,S
410 | 409,0,3,"Birkeland, Mr. Hans Martin Monsen",male,21,0,0,312992,7.775,S
411 | 410,0,3,"Lefebre, Miss. Ida",female,,3,1,4133,25.4667,S
412 | 411,0,3,"Sdycoff, Mr. Todor",male,,0,0,349222,7.8958,S
413 | 412,0,3,"Hart, Mr. Henry",male,,0,0,394140,6.8583,Q
414 | 413,1,1,"Minahan, Miss. Daisy E",female,33,1,0,19928,90,Q
415 | 414,0,2,"Cunningham, Mr. Alfred Fleming",male,,0,0,239853,0,S
416 | 415,1,3,"Sundman, Mr. Johan Julian",male,44,0,0,STON/O 2. 3101269,7.925,S
417 | 416,0,3,"Meek, Mrs. Thomas (Annie Louise Rowley)",female,,0,0,343095,8.05,S
418 | 417,1,2,"Drew, Mrs. James Vivian (Lulu Thorne Christian)",female,34,1,1,28220,32.5,S
419 | 418,1,2,"Silven, Miss. Lyyli Karoliina",female,18,0,2,250652,13,S
420 | 419,0,2,"Matthews, Mr. William John",male,30,0,0,28228,13,S
421 | 420,0,3,"Van Impe, Miss. Catharina",female,10,0,2,345773,24.15,S
422 | 421,0,3,"Gheorgheff, Mr. Stanio",male,,0,0,349254,7.8958,C
423 | 422,0,3,"Charters, Mr. David",male,21,0,0,A/5. 13032,7.7333,Q
424 | 423,0,3,"Zimmerman, Mr. Leo",male,29,0,0,315082,7.875,S
425 | 424,0,3,"Danbom, Mrs. Ernst Gilbert (Anna Sigrid Maria Brogren)",female,28,1,1,347080,14.4,S
426 | 425,0,3,"Rosblom, Mr. Viktor Richard",male,18,1,1,370129,20.2125,S
427 | 426,0,3,"Wiseman, Mr. Phillippe",male,,0,0,A/4. 34244,7.25,S
428 | 427,1,2,"Clarke, Mrs. Charles V (Ada Maria Winfield)",female,28,1,0,2003,26,S
429 | 428,1,2,"Phillips, Miss. Kate Florence (""Mrs Kate Louise Phillips Marshall"")",female,19,0,0,250655,26,S
430 | 429,0,3,"Flynn, Mr. James",male,,0,0,364851,7.75,Q
431 | 430,1,3,"Pickard, Mr. Berk (Berk Trembisky)",male,32,0,0,SOTON/O.Q. 392078,8.05,S
432 | 431,1,1,"Bjornstrom-Steffansson, Mr. Mauritz Hakan",male,28,0,0,110564,26.55,S
433 | 432,1,3,"Thorneycroft, Mrs. Percival (Florence Kate White)",female,,1,0,376564,16.1,S
434 | 433,1,2,"Louch, Mrs. Charles Alexander (Alice Adelaide Slow)",female,42,1,0,SC/AH 3085,26,S
435 | 434,0,3,"Kallio, Mr. Nikolai Erland",male,17,0,0,STON/O 2. 3101274,7.125,S
436 | 435,0,1,"Silvey, Mr. William Baird",male,50,1,0,13507,55.9,S
437 | 436,1,1,"Carter, Miss. Lucile Polk",female,14,1,2,113760,120,S
438 | 437,0,3,"Ford, Miss. Doolina Margaret ""Daisy""",female,21,2,2,W./C. 6608,34.375,S
439 | 438,1,2,"Richards, Mrs. Sidney (Emily Hocking)",female,24,2,3,29106,18.75,S
440 | 439,0,1,"Fortune, Mr. Mark",male,64,1,4,19950,263,S
441 | 440,0,2,"Kvillner, Mr. Johan Henrik Johannesson",male,31,0,0,C.A. 18723,10.5,S
442 | 441,1,2,"Hart, Mrs. Benjamin (Esther Ada Bloomfield)",female,45,1,1,F.C.C. 13529,26.25,S
443 | 442,0,3,"Hampe, Mr. Leon",male,20,0,0,345769,9.5,S
444 | 443,0,3,"Petterson, Mr. Johan Emil",male,25,1,0,347076,7.775,S
445 | 444,1,2,"Reynaldo, Ms. Encarnacion",female,28,0,0,230434,13,S
446 | 445,1,3,"Johannesen-Bratthammer, Mr. Bernt",male,,0,0,65306,8.1125,S
447 | 446,1,1,"Dodge, Master. Washington",male,4,0,2,33638,81.8583,S
448 | 447,1,2,"Mellinger, Miss. Madeleine Violet",female,13,0,1,250644,19.5,S
449 | 448,1,1,"Seward, Mr. Frederic Kimber",male,34,0,0,113794,26.55,S
450 | 449,1,3,"Baclini, Miss. Marie Catherine",female,5,2,1,2666,19.2583,C
451 | 450,1,1,"Peuchen, Major. Arthur Godfrey",male,52,0,0,113786,30.5,S
452 | 451,0,2,"West, Mr. Edwy Arthur",male,36,1,2,C.A. 34651,27.75,S
453 | 452,0,3,"Hagland, Mr. Ingvald Olai Olsen",male,,1,0,65303,19.9667,S
454 | 453,0,1,"Foreman, Mr. Benjamin Laventall",male,30,0,0,113051,27.75,C
455 | 454,1,1,"Goldenberg, Mr. Samuel L",male,49,1,0,17453,89.1042,C
456 | 455,0,3,"Peduzzi, Mr. Joseph",male,,0,0,A/5 2817,8.05,S
457 | 456,1,3,"Jalsevac, Mr. Ivan",male,29,0,0,349240,7.8958,C
458 | 457,0,1,"Millet, Mr. Francis Davis",male,65,0,0,13509,26.55,S
459 | 458,1,1,"Kenyon, Mrs. Frederick R (Marion)",female,,1,0,17464,51.8625,S
460 | 459,1,2,"Toomey, Miss. Ellen",female,50,0,0,F.C.C. 13531,10.5,S
461 | 460,0,3,"O'Connor, Mr. Maurice",male,,0,0,371060,7.75,Q
462 | 461,1,1,"Anderson, Mr. Harry",male,48,0,0,19952,26.55,S
463 | 462,0,3,"Morley, Mr. William",male,34,0,0,364506,8.05,S
464 | 463,0,1,"Gee, Mr. Arthur H",male,47,0,0,111320,38.5,S
465 | 464,0,2,"Milling, Mr. Jacob Christian",male,48,0,0,234360,13,S
466 | 465,0,3,"Maisner, Mr. Simon",male,,0,0,A/S 2816,8.05,S
467 | 466,0,3,"Goncalves, Mr. Manuel Estanslas",male,38,0,0,SOTON/O.Q. 3101306,7.05,S
468 | 467,0,2,"Campbell, Mr. William",male,,0,0,239853,0,S
469 | 468,0,1,"Smart, Mr. John Montgomery",male,56,0,0,113792,26.55,S
470 | 469,0,3,"Scanlan, Mr. James",male,,0,0,36209,7.725,Q
471 | 470,1,3,"Baclini, Miss. Helene Barbara",female,0.75,2,1,2666,19.2583,C
472 | 471,0,3,"Keefe, Mr. Arthur",male,,0,0,323592,7.25,S
473 | 472,0,3,"Cacic, Mr. Luka",male,38,0,0,315089,8.6625,S
474 | 473,1,2,"West, Mrs. Edwy Arthur (Ada Mary Worth)",female,33,1,2,C.A. 34651,27.75,S
475 | 474,1,2,"Jerwan, Mrs. Amin S (Marie Marthe Thuillard)",female,23,0,0,SC/AH Basle 541,13.7917,C
476 | 475,0,3,"Strandberg, Miss. Ida Sofia",female,22,0,0,7553,9.8375,S
477 | 476,0,1,"Clifford, Mr. George Quincy",male,,0,0,110465,52,S
478 | 477,0,2,"Renouf, Mr. Peter Henry",male,34,1,0,31027,21,S
479 | 478,0,3,"Braund, Mr. Lewis Richard",male,29,1,0,3460,7.0458,S
480 | 479,0,3,"Karlsson, Mr. Nils August",male,22,0,0,350060,7.5208,S
481 | 480,1,3,"Hirvonen, Miss. Hildur E",female,2,0,1,3101298,12.2875,S
482 | 481,0,3,"Goodwin, Master. Harold Victor",male,9,5,2,CA 2144,46.9,S
483 | 482,0,2,"Frost, Mr. Anthony Wood ""Archie""",male,,0,0,239854,0,S
484 | 483,0,3,"Rouse, Mr. Richard Henry",male,50,0,0,A/5 3594,8.05,S
485 | 484,1,3,"Turkula, Mrs. (Hedwig)",female,63,0,0,4134,9.5875,S
486 | 485,1,1,"Bishop, Mr. Dickinson H",male,25,1,0,11967,91.0792,C
487 | 486,0,3,"Lefebre, Miss. Jeannie",female,,3,1,4133,25.4667,S
488 | 487,1,1,"Hoyt, Mrs. Frederick Maxfield (Jane Anne Forby)",female,35,1,0,19943,90,S
489 | 488,0,1,"Kent, Mr. Edward Austin",male,58,0,0,11771,29.7,C
490 | 489,0,3,"Somerton, Mr. Francis William",male,30,0,0,A.5. 18509,8.05,S
491 | 490,1,3,"Coutts, Master. Eden Leslie ""Neville""",male,9,1,1,C.A. 37671,15.9,S
492 | 491,0,3,"Hagland, Mr. Konrad Mathias Reiersen",male,,1,0,65304,19.9667,S
493 | 492,0,3,"Windelov, Mr. Einar",male,21,0,0,SOTON/OQ 3101317,7.25,S
494 | 493,0,1,"Molson, Mr. Harry Markland",male,55,0,0,113787,30.5,S
495 | 494,0,1,"Artagaveytia, Mr. Ramon",male,71,0,0,PC 17609,49.5042,C
496 | 495,0,3,"Stanley, Mr. Edward Roland",male,21,0,0,A/4 45380,8.05,S
497 | 496,0,3,"Yousseff, Mr. Gerious",male,,0,0,2627,14.4583,C
498 | 497,1,1,"Eustis, Miss. Elizabeth Mussey",female,54,1,0,36947,78.2667,C
499 | 498,0,3,"Shellard, Mr. Frederick William",male,,0,0,C.A. 6212,15.1,S
500 | 499,0,1,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",female,25,1,2,113781,151.55,S
501 | 500,0,3,"Svensson, Mr. Olof",male,24,0,0,350035,7.7958,S
502 | 501,0,3,"Calic, Mr. Petar",male,17,0,0,315086,8.6625,S
503 | 502,0,3,"Canavan, Miss. Mary",female,21,0,0,364846,7.75,Q
504 | 503,0,3,"O'Sullivan, Miss. Bridget Mary",female,,0,0,330909,7.6292,Q
505 | 504,0,3,"Laitinen, Miss. Kristina Sofia",female,37,0,0,4135,9.5875,S
506 | 505,1,1,"Maioni, Miss. Roberta",female,16,0,0,110152,86.5,S
507 | 506,0,1,"Penasco y Castellana, Mr. Victor de Satode",male,18,1,0,PC 17758,108.9,C
508 | 507,1,2,"Quick, Mrs. Frederick Charles (Jane Richards)",female,33,0,2,26360,26,S
509 | 508,1,1,"Bradley, Mr. George (""George Arthur Brayton"")",male,,0,0,111427,26.55,S
510 | 509,0,3,"Olsen, Mr. Henry Margido",male,28,0,0,C 4001,22.525,S
511 | 510,1,3,"Lang, Mr. Fang",male,26,0,0,1601,56.4958,S
512 | 511,1,3,"Daly, Mr. Eugene Patrick",male,29,0,0,382651,7.75,Q
513 | 512,0,3,"Webber, Mr. James",male,,0,0,SOTON/OQ 3101316,8.05,S
514 | 513,1,1,"McGough, Mr. James Robert",male,36,0,0,PC 17473,26.2875,S
515 | 514,1,1,"Rothschild, Mrs. Martin (Elizabeth L. Barrett)",female,54,1,0,PC 17603,59.4,C
516 | 515,0,3,"Coleff, Mr. Satio",male,24,0,0,349209,7.4958,S
517 | 516,0,1,"Walker, Mr. William Anderson",male,47,0,0,36967,34.0208,S
518 | 517,1,2,"Lemore, Mrs. (Amelia Milley)",female,34,0,0,C.A. 34260,10.5,S
519 | 518,0,3,"Ryan, Mr. Patrick",male,,0,0,371110,24.15,Q
520 | 519,1,2,"Angle, Mrs. William A (Florence ""Mary"" Agnes Hughes)",female,36,1,0,226875,26,S
521 | 520,0,3,"Pavlovic, Mr. Stefo",male,32,0,0,349242,7.8958,S
522 | 521,1,1,"Perreault, Miss. Anne",female,30,0,0,12749,93.5,S
523 | 522,0,3,"Vovk, Mr. Janko",male,22,0,0,349252,7.8958,S
524 | 523,0,3,"Lahoud, Mr. Sarkis",male,,0,0,2624,7.225,C
525 | 524,1,1,"Hippach, Mrs. Louis Albert (Ida Sophia Fischer)",female,44,0,1,111361,57.9792,C
526 | 525,0,3,"Kassem, Mr. Fared",male,,0,0,2700,7.2292,C
527 | 526,0,3,"Farrell, Mr. James",male,40.5,0,0,367232,7.75,Q
528 | 527,1,2,"Ridsdale, Miss. Lucy",female,50,0,0,W./C. 14258,10.5,S
529 | 528,0,1,"Farthing, Mr. John",male,,0,0,PC 17483,221.7792,S
530 | 529,0,3,"Salonen, Mr. Johan Werner",male,39,0,0,3101296,7.925,S
531 | 530,0,2,"Hocking, Mr. Richard George",male,23,2,1,29104,11.5,S
532 | 531,1,2,"Quick, Miss. Phyllis May",female,2,1,1,26360,26,S
533 | 532,0,3,"Toufik, Mr. Nakli",male,,0,0,2641,7.2292,C
534 | 533,0,3,"Elias, Mr. Joseph Jr",male,17,1,1,2690,7.2292,C
535 | 534,1,3,"Peter, Mrs. Catherine (Catherine Rizk)",female,,0,2,2668,22.3583,C
536 | 535,0,3,"Cacic, Miss. Marija",female,30,0,0,315084,8.6625,S
537 | 536,1,2,"Hart, Miss. Eva Miriam",female,7,0,2,F.C.C. 13529,26.25,S
538 | 537,0,1,"Butt, Major. Archibald Willingham",male,45,0,0,113050,26.55,S
539 | 538,1,1,"LeRoy, Miss. Bertha",female,30,0,0,PC 17761,106.425,C
540 | 539,0,3,"Risien, Mr. Samuel Beard",male,,0,0,364498,14.5,S
541 | 540,1,1,"Frolicher, Miss. Hedwig Margaritha",female,22,0,2,13568,49.5,C
542 | 541,1,1,"Crosby, Miss. Harriet R",female,36,0,2,WE/P 5735,71,S
543 | 542,0,3,"Andersson, Miss. Ingeborg Constanzia",female,9,4,2,347082,31.275,S
544 | 543,0,3,"Andersson, Miss. Sigrid Elisabeth",female,11,4,2,347082,31.275,S
545 | 544,1,2,"Beane, Mr. Edward",male,32,1,0,2908,26,S
546 | 545,0,1,"Douglas, Mr. Walter Donald",male,50,1,0,PC 17761,106.425,C
547 | 546,0,1,"Nicholson, Mr. Arthur Ernest",male,64,0,0,693,26,S
548 | 547,1,2,"Beane, Mrs. Edward (Ethel Clarke)",female,19,1,0,2908,26,S
549 | 548,1,2,"Padro y Manent, Mr. Julian",male,,0,0,SC/PARIS 2146,13.8625,C
550 | 549,0,3,"Goldsmith, Mr. Frank John",male,33,1,1,363291,20.525,S
551 | 550,1,2,"Davies, Master. John Morgan Jr",male,8,1,1,C.A. 33112,36.75,S
552 | 551,1,1,"Thayer, Mr. John Borland Jr",male,17,0,2,17421,110.8833,C
553 | 552,0,2,"Sharp, Mr. Percival James R",male,27,0,0,244358,26,S
554 | 553,0,3,"O'Brien, Mr. Timothy",male,,0,0,330979,7.8292,Q
555 | 554,1,3,"Leeni, Mr. Fahim (""Philip Zenni"")",male,22,0,0,2620,7.225,C
556 | 555,1,3,"Ohman, Miss. Velin",female,22,0,0,347085,7.775,S
557 | 556,0,1,"Wright, Mr. George",male,62,0,0,113807,26.55,S
558 | 557,1,1,"Duff Gordon, Lady. (Lucille Christiana Sutherland) (""Mrs Morgan"")",female,48,1,0,11755,39.6,C
559 | 558,0,1,"Robbins, Mr. Victor",male,,0,0,PC 17757,227.525,C
560 | 559,1,1,"Taussig, Mrs. Emil (Tillie Mandelbaum)",female,39,1,1,110413,79.65,S
561 | 560,1,3,"de Messemaeker, Mrs. Guillaume Joseph (Emma)",female,36,1,0,345572,17.4,S
562 | 561,0,3,"Morrow, Mr. Thomas Rowan",male,,0,0,372622,7.75,Q
563 | 562,0,3,"Sivic, Mr. Husein",male,40,0,0,349251,7.8958,S
564 | 563,0,2,"Norman, Mr. Robert Douglas",male,28,0,0,218629,13.5,S
565 | 564,0,3,"Simmons, Mr. John",male,,0,0,SOTON/OQ 392082,8.05,S
566 | 565,0,3,"Meanwell, Miss. (Marion Ogden)",female,,0,0,SOTON/O.Q. 392087,8.05,S
567 | 566,0,3,"Davies, Mr. Alfred J",male,24,2,0,A/4 48871,24.15,S
568 | 567,0,3,"Stoytcheff, Mr. Ilia",male,19,0,0,349205,7.8958,S
569 | 568,0,3,"Palsson, Mrs. Nils (Alma Cornelia Berglund)",female,29,0,4,349909,21.075,S
570 | 569,0,3,"Doharr, Mr. Tannous",male,,0,0,2686,7.2292,C
571 | 570,1,3,"Jonsson, Mr. Carl",male,32,0,0,350417,7.8542,S
572 | 571,1,2,"Harris, Mr. George",male,62,0,0,S.W./PP 752,10.5,S
573 | 572,1,1,"Appleton, Mrs. Edward Dale (Charlotte Lamson)",female,53,2,0,11769,51.4792,S
574 | 573,1,1,"Flynn, Mr. John Irwin (""Irving"")",male,36,0,0,PC 17474,26.3875,S
575 | 574,1,3,"Kelly, Miss. Mary",female,,0,0,14312,7.75,Q
576 | 575,0,3,"Rush, Mr. Alfred George John",male,16,0,0,A/4. 20589,8.05,S
577 | 576,0,3,"Patchett, Mr. George",male,19,0,0,358585,14.5,S
578 | 577,1,2,"Garside, Miss. Ethel",female,34,0,0,243880,13,S
579 | 578,1,1,"Silvey, Mrs. William Baird (Alice Munger)",female,39,1,0,13507,55.9,S
580 | 579,0,3,"Caram, Mrs. Joseph (Maria Elias)",female,,1,0,2689,14.4583,C
581 | 580,1,3,"Jussila, Mr. Eiriik",male,32,0,0,STON/O 2. 3101286,7.925,S
582 | 581,1,2,"Christy, Miss. Julie Rachel",female,25,1,1,237789,30,S
583 | 582,1,1,"Thayer, Mrs. John Borland (Marian Longstreth Morris)",female,39,1,1,17421,110.8833,C
584 | 583,0,2,"Downton, Mr. William James",male,54,0,0,28403,26,S
585 | 584,0,1,"Ross, Mr. John Hugo",male,36,0,0,13049,40.125,C
586 | 585,0,3,"Paulner, Mr. Uscher",male,,0,0,3411,8.7125,C
587 | 586,1,1,"Taussig, Miss. Ruth",female,18,0,2,110413,79.65,S
588 | 587,0,2,"Jarvis, Mr. John Denzil",male,47,0,0,237565,15,S
589 | 588,1,1,"Frolicher-Stehli, Mr. Maxmillian",male,60,1,1,13567,79.2,C
590 | 589,0,3,"Gilinski, Mr. Eliezer",male,22,0,0,14973,8.05,S
591 | 590,0,3,"Murdlin, Mr. Joseph",male,,0,0,A./5. 3235,8.05,S
592 | 591,0,3,"Rintamaki, Mr. Matti",male,35,0,0,STON/O 2. 3101273,7.125,S
593 | 592,1,1,"Stephenson, Mrs. Walter Bertram (Martha Eustis)",female,52,1,0,36947,78.2667,C
594 | 593,0,3,"Elsbury, Mr. William James",male,47,0,0,A/5 3902,7.25,S
595 | 594,0,3,"Bourke, Miss. Mary",female,,0,2,364848,7.75,Q
596 | 595,0,2,"Chapman, Mr. John Henry",male,37,1,0,SC/AH 29037,26,S
597 | 596,0,3,"Van Impe, Mr. Jean Baptiste",male,36,1,1,345773,24.15,S
598 | 597,1,2,"Leitch, Miss. Jessie Wills",female,,0,0,248727,33,S
599 | 598,0,3,"Johnson, Mr. Alfred",male,49,0,0,LINE,0,S
600 | 599,0,3,"Boulos, Mr. Hanna",male,,0,0,2664,7.225,C
601 | 600,1,1,"Duff Gordon, Sir. Cosmo Edmund (""Mr Morgan"")",male,49,1,0,PC 17485,56.9292,C
602 | 601,1,2,"Jacobsohn, Mrs. Sidney Samuel (Amy Frances Christy)",female,24,2,1,243847,27,S
603 | 602,0,3,"Slabenoff, Mr. Petco",male,,0,0,349214,7.8958,S
604 | 603,0,1,"Harrington, Mr. Charles H",male,,0,0,113796,42.4,S
605 | 604,0,3,"Torber, Mr. Ernst William",male,44,0,0,364511,8.05,S
606 | 605,1,1,"Homer, Mr. Harry (""Mr E Haven"")",male,35,0,0,111426,26.55,C
607 | 606,0,3,"Lindell, Mr. Edvard Bengtsson",male,36,1,0,349910,15.55,S
608 | 607,0,3,"Karaic, Mr. Milan",male,30,0,0,349246,7.8958,S
609 | 608,1,1,"Daniel, Mr. Robert Williams",male,27,0,0,113804,30.5,S
610 | 609,1,2,"Laroche, Mrs. Joseph (Juliette Marie Louise Lafargue)",female,22,1,2,SC/Paris 2123,41.5792,C
611 | 610,1,1,"Shutes, Miss. Elizabeth W",female,40,0,0,PC 17582,153.4625,S
612 | 611,0,3,"Andersson, Mrs. Anders Johan (Alfrida Konstantia Brogren)",female,39,1,5,347082,31.275,S
613 | 612,0,3,"Jardin, Mr. Jose Neto",male,,0,0,SOTON/O.Q. 3101305,7.05,S
614 | 613,1,3,"Murphy, Miss. Margaret Jane",female,,1,0,367230,15.5,Q
615 | 614,0,3,"Horgan, Mr. John",male,,0,0,370377,7.75,Q
616 | 615,0,3,"Brocklebank, Mr. William Alfred",male,35,0,0,364512,8.05,S
617 | 616,1,2,"Herman, Miss. Alice",female,24,1,2,220845,65,S
618 | 617,0,3,"Danbom, Mr. Ernst Gilbert",male,34,1,1,347080,14.4,S
619 | 618,0,3,"Lobb, Mrs. William Arthur (Cordelia K Stanlick)",female,26,1,0,A/5. 3336,16.1,S
620 | 619,1,2,"Becker, Miss. Marion Louise",female,4,2,1,230136,39,S
621 | 620,0,2,"Gavey, Mr. Lawrence",male,26,0,0,31028,10.5,S
622 | 621,0,3,"Yasbeck, Mr. Antoni",male,27,1,0,2659,14.4542,C
623 | 622,1,1,"Kimball, Mr. Edwin Nelson Jr",male,42,1,0,11753,52.5542,S
624 | 623,1,3,"Nakid, Mr. Sahid",male,20,1,1,2653,15.7417,C
625 | 624,0,3,"Hansen, Mr. Henry Damsgaard",male,21,0,0,350029,7.8542,S
626 | 625,0,3,"Bowen, Mr. David John ""Dai""",male,21,0,0,54636,16.1,S
627 | 626,0,1,"Sutton, Mr. Frederick",male,61,0,0,36963,32.3208,S
628 | 627,0,2,"Kirkland, Rev. Charles Leonard",male,57,0,0,219533,12.35,Q
629 | 628,1,1,"Longley, Miss. Gretchen Fiske",female,21,0,0,13502,77.9583,S
630 | 629,0,3,"Bostandyeff, Mr. Guentcho",male,26,0,0,349224,7.8958,S
631 | 630,0,3,"O'Connell, Mr. Patrick D",male,,0,0,334912,7.7333,Q
632 | 631,1,1,"Barkworth, Mr. Algernon Henry Wilson",male,80,0,0,27042,30,S
633 | 632,0,3,"Lundahl, Mr. Johan Svensson",male,51,0,0,347743,7.0542,S
634 | 633,1,1,"Stahelin-Maeglin, Dr. Max",male,32,0,0,13214,30.5,C
635 | 634,0,1,"Parr, Mr. William Henry Marsh",male,,0,0,112052,0,S
636 | 635,0,3,"Skoog, Miss. Mabel",female,9,3,2,347088,27.9,S
637 | 636,1,2,"Davis, Miss. Mary",female,28,0,0,237668,13,S
638 | 637,0,3,"Leinonen, Mr. Antti Gustaf",male,32,0,0,STON/O 2. 3101292,7.925,S
639 | 638,0,2,"Collyer, Mr. Harvey",male,31,1,1,C.A. 31921,26.25,S
640 | 639,0,3,"Panula, Mrs. Juha (Maria Emilia Ojala)",female,41,0,5,3101295,39.6875,S
641 | 640,0,3,"Thorneycroft, Mr. Percival",male,,1,0,376564,16.1,S
642 | 641,0,3,"Jensen, Mr. Hans Peder",male,20,0,0,350050,7.8542,S
643 | 642,1,1,"Sagesser, Mlle. Emma",female,24,0,0,PC 17477,69.3,C
644 | 643,0,3,"Skoog, Miss. Margit Elizabeth",female,2,3,2,347088,27.9,S
645 | 644,1,3,"Foo, Mr. Choong",male,,0,0,1601,56.4958,S
646 | 645,1,3,"Baclini, Miss. Eugenie",female,0.75,2,1,2666,19.2583,C
647 | 646,1,1,"Harper, Mr. Henry Sleeper",male,48,1,0,PC 17572,76.7292,C
648 | 647,0,3,"Cor, Mr. Liudevit",male,19,0,0,349231,7.8958,S
649 | 648,1,1,"Simonius-Blumer, Col. Oberst Alfons",male,56,0,0,13213,35.5,C
650 | 649,0,3,"Willey, Mr. Edward",male,,0,0,S.O./P.P. 751,7.55,S
651 | 650,1,3,"Stanley, Miss. Amy Zillah Elsie",female,23,0,0,CA. 2314,7.55,S
652 | 651,0,3,"Mitkoff, Mr. Mito",male,,0,0,349221,7.8958,S
653 | 652,1,2,"Doling, Miss. Elsie",female,18,0,1,231919,23,S
654 | 653,0,3,"Kalvik, Mr. Johannes Halvorsen",male,21,0,0,8475,8.4333,S
655 | 654,1,3,"O'Leary, Miss. Hanora ""Norah""",female,,0,0,330919,7.8292,Q
656 | 655,0,3,"Hegarty, Miss. Hanora ""Nora""",female,18,0,0,365226,6.75,Q
657 | 656,0,2,"Hickman, Mr. Leonard Mark",male,24,2,0,S.O.C. 14879,73.5,S
658 | 657,0,3,"Radeff, Mr. Alexander",male,,0,0,349223,7.8958,S
659 | 658,0,3,"Bourke, Mrs. John (Catherine)",female,32,1,1,364849,15.5,Q
660 | 659,0,2,"Eitemiller, Mr. George Floyd",male,23,0,0,29751,13,S
661 | 660,0,1,"Newell, Mr. Arthur Webster",male,58,0,2,35273,113.275,C
662 | 661,1,1,"Frauenthal, Dr. Henry William",male,50,2,0,PC 17611,133.65,S
663 | 662,0,3,"Badt, Mr. Mohamed",male,40,0,0,2623,7.225,C
664 | 663,0,1,"Colley, Mr. Edward Pomeroy",male,47,0,0,5727,25.5875,S
665 | 664,0,3,"Coleff, Mr. Peju",male,36,0,0,349210,7.4958,S
666 | 665,1,3,"Lindqvist, Mr. Eino William",male,20,1,0,STON/O 2. 3101285,7.925,S
667 | 666,0,2,"Hickman, Mr. Lewis",male,32,2,0,S.O.C. 14879,73.5,S
668 | 667,0,2,"Butler, Mr. Reginald Fenton",male,25,0,0,234686,13,S
669 | 668,0,3,"Rommetvedt, Mr. Knud Paust",male,,0,0,312993,7.775,S
670 | 669,0,3,"Cook, Mr. Jacob",male,43,0,0,A/5 3536,8.05,S
671 | 670,1,1,"Taylor, Mrs. Elmer Zebley (Juliet Cummins Wright)",female,,1,0,19996,52,S
672 | 671,1,2,"Brown, Mrs. Thomas William Solomon (Elizabeth Catherine Ford)",female,40,1,1,29750,39,S
673 | 672,0,1,"Davidson, Mr. Thornton",male,31,1,0,F.C. 12750,52,S
674 | 673,0,2,"Mitchell, Mr. Henry Michael",male,70,0,0,C.A. 24580,10.5,S
675 | 674,1,2,"Wilhelms, Mr. Charles",male,31,0,0,244270,13,S
676 | 675,0,2,"Watson, Mr. Ennis Hastings",male,,0,0,239856,0,S
677 | 676,0,3,"Edvardsson, Mr. Gustaf Hjalmar",male,18,0,0,349912,7.775,S
678 | 677,0,3,"Sawyer, Mr. Frederick Charles",male,24.5,0,0,342826,8.05,S
679 | 678,1,3,"Turja, Miss. Anna Sofia",female,18,0,0,4138,9.8417,S
680 | 679,0,3,"Goodwin, Mrs. Frederick (Augusta Tyler)",female,43,1,6,CA 2144,46.9,S
681 | 680,1,1,"Cardeza, Mr. Thomas Drake Martinez",male,36,0,1,PC 17755,512.3292,C
682 | 681,0,3,"Peters, Miss. Katie",female,,0,0,330935,8.1375,Q
683 | 682,1,1,"Hassab, Mr. Hammad",male,27,0,0,PC 17572,76.7292,C
684 | 683,0,3,"Olsvigen, Mr. Thor Anderson",male,20,0,0,6563,9.225,S
685 | 684,0,3,"Goodwin, Mr. Charles Edward",male,14,5,2,CA 2144,46.9,S
686 | 685,0,2,"Brown, Mr. Thomas William Solomon",male,60,1,1,29750,39,S
687 | 686,0,2,"Laroche, Mr. Joseph Philippe Lemercier",male,25,1,2,SC/Paris 2123,41.5792,C
688 | 687,0,3,"Panula, Mr. Jaako Arnold",male,14,4,1,3101295,39.6875,S
689 | 688,0,3,"Dakic, Mr. Branko",male,19,0,0,349228,10.1708,S
690 | 689,0,3,"Fischer, Mr. Eberhard Thelander",male,18,0,0,350036,7.7958,S
691 | 690,1,1,"Madill, Miss. Georgette Alexandra",female,15,0,1,24160,211.3375,S
692 | 691,1,1,"Dick, Mr. Albert Adrian",male,31,1,0,17474,57,S
693 | 692,1,3,"Karun, Miss. Manca",female,4,0,1,349256,13.4167,C
694 | 693,1,3,"Lam, Mr. Ali",male,,0,0,1601,56.4958,S
695 | 694,0,3,"Saad, Mr. Khalil",male,25,0,0,2672,7.225,C
696 | 695,0,1,"Weir, Col. John",male,60,0,0,113800,26.55,S
697 | 696,0,2,"Chapman, Mr. Charles Henry",male,52,0,0,248731,13.5,S
698 | 697,0,3,"Kelly, Mr. James",male,44,0,0,363592,8.05,S
699 | 698,1,3,"Mullens, Miss. Katherine ""Katie""",female,,0,0,35852,7.7333,Q
700 | 699,0,1,"Thayer, Mr. John Borland",male,49,1,1,17421,110.8833,C
701 | 700,0,3,"Humblen, Mr. Adolf Mathias Nicolai Olsen",male,42,0,0,348121,7.65,S
702 | 701,1,1,"Astor, Mrs. John Jacob (Madeleine Talmadge Force)",female,18,1,0,PC 17757,227.525,C
703 | 702,1,1,"Silverthorne, Mr. Spencer Victor",male,35,0,0,PC 17475,26.2875,S
704 | 703,0,3,"Barbara, Miss. Saiide",female,18,0,1,2691,14.4542,C
705 | 704,0,3,"Gallagher, Mr. Martin",male,25,0,0,36864,7.7417,Q
706 | 705,0,3,"Hansen, Mr. Henrik Juul",male,26,1,0,350025,7.8542,S
707 | 706,0,2,"Morley, Mr. Henry Samuel (""Mr Henry Marshall"")",male,39,0,0,250655,26,S
708 | 707,1,2,"Kelly, Mrs. Florence ""Fannie""",female,45,0,0,223596,13.5,S
709 | 708,1,1,"Calderhead, Mr. Edward Pennington",male,42,0,0,PC 17476,26.2875,S
710 | 709,1,1,"Cleaver, Miss. Alice",female,22,0,0,113781,151.55,S
711 | 710,1,3,"Moubarek, Master. Halim Gonios (""William George"")",male,,1,1,2661,15.2458,C
712 | 711,1,1,"Mayne, Mlle. Berthe Antonine (""Mrs de Villiers"")",female,24,0,0,PC 17482,49.5042,C
713 | 712,0,1,"Klaber, Mr. Herman",male,,0,0,113028,26.55,S
714 | 713,1,1,"Taylor, Mr. Elmer Zebley",male,48,1,0,19996,52,S
715 | 714,0,3,"Larsson, Mr. August Viktor",male,29,0,0,7545,9.4833,S
716 | 715,0,2,"Greenberg, Mr. Samuel",male,52,0,0,250647,13,S
717 | 716,0,3,"Soholt, Mr. Peter Andreas Lauritz Andersen",male,19,0,0,348124,7.65,S
718 | 717,1,1,"Endres, Miss. Caroline Louise",female,38,0,0,PC 17757,227.525,C
719 | 718,1,2,"Troutt, Miss. Edwina Celia ""Winnie""",female,27,0,0,34218,10.5,S
720 | 719,0,3,"McEvoy, Mr. Michael",male,,0,0,36568,15.5,Q
721 | 720,0,3,"Johnson, Mr. Malkolm Joackim",male,33,0,0,347062,7.775,S
722 | 721,1,2,"Harper, Miss. Annie Jessie ""Nina""",female,6,0,1,248727,33,S
723 | 722,0,3,"Jensen, Mr. Svend Lauritz",male,17,1,0,350048,7.0542,S
724 | 723,0,2,"Gillespie, Mr. William Henry",male,34,0,0,12233,13,S
725 | 724,0,2,"Hodges, Mr. Henry Price",male,50,0,0,250643,13,S
726 | 725,1,1,"Chambers, Mr. Norman Campbell",male,27,1,0,113806,53.1,S
727 | 726,0,3,"Oreskovic, Mr. Luka",male,20,0,0,315094,8.6625,S
728 | 727,1,2,"Renouf, Mrs. Peter Henry (Lillian Jefferys)",female,30,3,0,31027,21,S
729 | 728,1,3,"Mannion, Miss. Margareth",female,,0,0,36866,7.7375,Q
730 | 729,0,2,"Bryhl, Mr. Kurt Arnold Gottfrid",male,25,1,0,236853,26,S
731 | 730,0,3,"Ilmakangas, Miss. Pieta Sofia",female,25,1,0,STON/O2. 3101271,7.925,S
732 | 731,1,1,"Allen, Miss. Elisabeth Walton",female,29,0,0,24160,211.3375,S
733 | 732,0,3,"Hassan, Mr. Houssein G N",male,11,0,0,2699,18.7875,C
734 | 733,0,2,"Knight, Mr. Robert J",male,,0,0,239855,0,S
735 | 734,0,2,"Berriman, Mr. William John",male,23,0,0,28425,13,S
736 | 735,0,2,"Troupiansky, Mr. Moses Aaron",male,23,0,0,233639,13,S
737 | 736,0,3,"Williams, Mr. Leslie",male,28.5,0,0,54636,16.1,S
738 | 737,0,3,"Ford, Mrs. Edward (Margaret Ann Watson)",female,48,1,3,W./C. 6608,34.375,S
739 | 738,1,1,"Lesurer, Mr. Gustave J",male,35,0,0,PC 17755,512.3292,C
740 | 739,0,3,"Ivanoff, Mr. Kanio",male,,0,0,349201,7.8958,S
741 | 740,0,3,"Nankoff, Mr. Minko",male,,0,0,349218,7.8958,S
742 | 741,1,1,"Hawksford, Mr. Walter James",male,,0,0,16988,30,S
743 | 742,0,1,"Cavendish, Mr. Tyrell William",male,36,1,0,19877,78.85,S
744 | 743,1,1,"Ryerson, Miss. Susan Parker ""Suzette""",female,21,2,2,PC 17608,262.375,C
745 | 744,0,3,"McNamee, Mr. Neal",male,24,1,0,376566,16.1,S
746 | 745,1,3,"Stranden, Mr. Juho",male,31,0,0,STON/O 2. 3101288,7.925,S
747 | 746,0,1,"Crosby, Capt. Edward Gifford",male,70,1,1,WE/P 5735,71,S
748 | 747,0,3,"Abbott, Mr. Rossmore Edward",male,16,1,1,C.A. 2673,20.25,S
749 | 748,1,2,"Sinkkonen, Miss. Anna",female,30,0,0,250648,13,S
750 | 749,0,1,"Marvin, Mr. Daniel Warner",male,19,1,0,113773,53.1,S
751 | 750,0,3,"Connaghton, Mr. Michael",male,31,0,0,335097,7.75,Q
752 | 751,1,2,"Wells, Miss. Joan",female,4,1,1,29103,23,S
753 | 752,1,3,"Moor, Master. Meier",male,6,0,1,392096,12.475,S
754 | 753,0,3,"Vande Velde, Mr. Johannes Joseph",male,33,0,0,345780,9.5,S
755 | 754,0,3,"Jonkoff, Mr. Lalio",male,23,0,0,349204,7.8958,S
756 | 755,1,2,"Herman, Mrs. Samuel (Jane Laver)",female,48,1,2,220845,65,S
757 | 756,1,2,"Hamalainen, Master. Viljo",male,0.67,1,1,250649,14.5,S
758 | 757,0,3,"Carlsson, Mr. August Sigfrid",male,28,0,0,350042,7.7958,S
759 | 758,0,2,"Bailey, Mr. Percy Andrew",male,18,0,0,29108,11.5,S
760 | 759,0,3,"Theobald, Mr. Thomas Leonard",male,34,0,0,363294,8.05,S
761 | 760,1,1,"Rothes, the Countess. of (Lucy Noel Martha Dyer-Edwards)",female,33,0,0,110152,86.5,S
762 | 761,0,3,"Garfirth, Mr. John",male,,0,0,358585,14.5,S
763 | 762,0,3,"Nirva, Mr. Iisakki Antino Aijo",male,41,0,0,SOTON/O2 3101272,7.125,S
764 | 763,1,3,"Barah, Mr. Hanna Assi",male,20,0,0,2663,7.2292,C
765 | 764,1,1,"Carter, Mrs. William Ernest (Lucile Polk)",female,36,1,2,113760,120,S
766 | 765,0,3,"Eklund, Mr. Hans Linus",male,16,0,0,347074,7.775,S
767 | 766,1,1,"Hogeboom, Mrs. John C (Anna Andrews)",female,51,1,0,13502,77.9583,S
768 | 767,0,1,"Brewe, Dr. Arthur Jackson",male,,0,0,112379,39.6,C
769 | 768,0,3,"Mangan, Miss. Mary",female,30.5,0,0,364850,7.75,Q
770 | 769,0,3,"Moran, Mr. Daniel J",male,,1,0,371110,24.15,Q
771 | 770,0,3,"Gronnestad, Mr. Daniel Danielsen",male,32,0,0,8471,8.3625,S
772 | 771,0,3,"Lievens, Mr. Rene Aime",male,24,0,0,345781,9.5,S
773 | 772,0,3,"Jensen, Mr. Niels Peder",male,48,0,0,350047,7.8542,S
774 | 773,0,2,"Mack, Mrs. (Mary)",female,57,0,0,S.O./P.P. 3,10.5,S
775 | 774,0,3,"Elias, Mr. Dibo",male,,0,0,2674,7.225,C
776 | 775,1,2,"Hocking, Mrs. Elizabeth (Eliza Needs)",female,54,1,3,29105,23,S
777 | 776,0,3,"Myhrman, Mr. Pehr Fabian Oliver Malkolm",male,18,0,0,347078,7.75,S
778 | 777,0,3,"Tobin, Mr. Roger",male,,0,0,383121,7.75,Q
779 | 778,1,3,"Emanuel, Miss. Virginia Ethel",female,5,0,0,364516,12.475,S
780 | 779,0,3,"Kilgannon, Mr. Thomas J",male,,0,0,36865,7.7375,Q
781 | 780,1,1,"Robert, Mrs. Edward Scott (Elisabeth Walton McMillan)",female,43,0,1,24160,211.3375,S
782 | 781,1,3,"Ayoub, Miss. Banoura",female,13,0,0,2687,7.2292,C
783 | 782,1,1,"Dick, Mrs. Albert Adrian (Vera Gillespie)",female,17,1,0,17474,57,S
784 | 783,0,1,"Long, Mr. Milton Clyde",male,29,0,0,113501,30,S
785 | 784,0,3,"Johnston, Mr. Andrew G",male,,1,2,W./C. 6607,23.45,S
786 | 785,0,3,"Ali, Mr. William",male,25,0,0,SOTON/O.Q. 3101312,7.05,S
787 | 786,0,3,"Harmer, Mr. Abraham (David Lishin)",male,25,0,0,374887,7.25,S
788 | 787,1,3,"Sjoblom, Miss. Anna Sofia",female,18,0,0,3101265,7.4958,S
789 | 788,0,3,"Rice, Master. George Hugh",male,8,4,1,382652,29.125,Q
790 | 789,1,3,"Dean, Master. Bertram Vere",male,1,1,2,C.A. 2315,20.575,S
791 | 790,0,1,"Guggenheim, Mr. Benjamin",male,46,0,0,PC 17593,79.2,C
792 | 791,0,3,"Keane, Mr. Andrew ""Andy""",male,,0,0,12460,7.75,Q
793 | 792,0,2,"Gaskell, Mr. Alfred",male,16,0,0,239865,26,S
794 | 793,0,3,"Sage, Miss. Stella Anna",female,,8,2,CA. 2343,69.55,S
795 | 794,0,1,"Hoyt, Mr. William Fisher",male,,0,0,PC 17600,30.6958,C
796 | 795,0,3,"Dantcheff, Mr. Ristiu",male,25,0,0,349203,7.8958,S
797 | 796,0,2,"Otter, Mr. Richard",male,39,0,0,28213,13,S
798 | 797,1,1,"Leader, Dr. Alice (Farnham)",female,49,0,0,17465,25.9292,S
799 | 798,1,3,"Osman, Mrs. Mara",female,31,0,0,349244,8.6833,S
800 | 799,0,3,"Ibrahim Shawah, Mr. Yousseff",male,30,0,0,2685,7.2292,C
801 | 800,0,3,"Van Impe, Mrs. Jean Baptiste (Rosalie Paula Govaert)",female,30,1,1,345773,24.15,S
802 | 801,0,2,"Ponesell, Mr. Martin",male,34,0,0,250647,13,S
803 | 802,1,2,"Collyer, Mrs. Harvey (Charlotte Annie Tate)",female,31,1,1,C.A. 31921,26.25,S
804 | 803,1,1,"Carter, Master. William Thornton II",male,11,1,2,113760,120,S
805 | 804,1,3,"Thomas, Master. Assad Alexander",male,0.42,0,1,2625,8.5167,C
806 | 805,1,3,"Hedman, Mr. Oskar Arvid",male,27,0,0,347089,6.975,S
807 | 806,0,3,"Johansson, Mr. Karl Johan",male,31,0,0,347063,7.775,S
808 | 807,0,1,"Andrews, Mr. Thomas Jr",male,39,0,0,112050,0,S
809 | 808,0,3,"Pettersson, Miss. Ellen Natalia",female,18,0,0,347087,7.775,S
810 | 809,0,2,"Meyer, Mr. August",male,39,0,0,248723,13,S
811 | 810,1,1,"Chambers, Mrs. Norman Campbell (Bertha Griggs)",female,33,1,0,113806,53.1,S
812 | 811,0,3,"Alexander, Mr. William",male,26,0,0,3474,7.8875,S
813 | 812,0,3,"Lester, Mr. James",male,39,0,0,A/4 48871,24.15,S
814 | 813,0,2,"Slemen, Mr. Richard James",male,35,0,0,28206,10.5,S
815 | 814,0,3,"Andersson, Miss. Ebba Iris Alfrida",female,6,4,2,347082,31.275,S
816 | 815,0,3,"Tomlin, Mr. Ernest Portage",male,30.5,0,0,364499,8.05,S
817 | 816,0,1,"Fry, Mr. Richard",male,,0,0,112058,0,S
818 | 817,0,3,"Heininen, Miss. Wendla Maria",female,23,0,0,STON/O2. 3101290,7.925,S
819 | 818,0,2,"Mallet, Mr. Albert",male,31,1,1,S.C./PARIS 2079,37.0042,C
820 | 819,0,3,"Holm, Mr. John Fredrik Alexander",male,43,0,0,C 7075,6.45,S
821 | 820,0,3,"Skoog, Master. Karl Thorsten",male,10,3,2,347088,27.9,S
822 | 821,1,1,"Hays, Mrs. Charles Melville (Clara Jennings Gregg)",female,52,1,1,12749,93.5,S
823 | 822,1,3,"Lulic, Mr. Nikola",male,27,0,0,315098,8.6625,S
824 | 823,0,1,"Reuchlin, Jonkheer. John George",male,38,0,0,19972,0,S
825 | 824,1,3,"Moor, Mrs. (Beila)",female,27,0,1,392096,12.475,S
826 | 825,0,3,"Panula, Master. Urho Abraham",male,2,4,1,3101295,39.6875,S
827 | 826,0,3,"Flynn, Mr. John",male,,0,0,368323,6.95,Q
828 | 827,0,3,"Lam, Mr. Len",male,,0,0,1601,56.4958,S
829 | 828,1,2,"Mallet, Master. Andre",male,1,0,2,S.C./PARIS 2079,37.0042,C
830 | 829,1,3,"McCormack, Mr. Thomas Joseph",male,,0,0,367228,7.75,Q
831 | 830,1,1,"Stone, Mrs. George Nelson (Martha Evelyn)",female,62,0,0,113572,80,
832 | 831,1,3,"Yasbeck, Mrs. Antoni (Selini Alexander)",female,15,1,0,2659,14.4542,C
833 | 832,1,2,"Richards, Master. George Sibley",male,0.83,1,1,29106,18.75,S
834 | 833,0,3,"Saad, Mr. Amin",male,,0,0,2671,7.2292,C
835 | 834,0,3,"Augustsson, Mr. Albert",male,23,0,0,347468,7.8542,S
836 | 835,0,3,"Allum, Mr. Owen George",male,18,0,0,2223,8.3,S
837 | 836,1,1,"Compton, Miss. Sara Rebecca",female,39,1,1,PC 17756,83.1583,C
838 | 837,0,3,"Pasic, Mr. Jakob",male,21,0,0,315097,8.6625,S
839 | 838,0,3,"Sirota, Mr. Maurice",male,,0,0,392092,8.05,S
840 | 839,1,3,"Chip, Mr. Chang",male,32,0,0,1601,56.4958,S
841 | 840,1,1,"Marechal, Mr. Pierre",male,,0,0,11774,29.7,C
842 | 841,0,3,"Alhomaki, Mr. Ilmari Rudolf",male,20,0,0,SOTON/O2 3101287,7.925,S
843 | 842,0,2,"Mudd, Mr. Thomas Charles",male,16,0,0,S.O./P.P. 3,10.5,S
844 | 843,1,1,"Serepeca, Miss. Augusta",female,30,0,0,113798,31,C
845 | 844,0,3,"Lemberopolous, Mr. Peter L",male,34.5,0,0,2683,6.4375,C
846 | 845,0,3,"Culumovic, Mr. Jeso",male,17,0,0,315090,8.6625,S
847 | 846,0,3,"Abbing, Mr. Anthony",male,42,0,0,C.A. 5547,7.55,S
848 | 847,0,3,"Sage, Mr. Douglas Bullen",male,,8,2,CA. 2343,69.55,S
849 | 848,0,3,"Markoff, Mr. Marin",male,35,0,0,349213,7.8958,C
850 | 849,0,2,"Harper, Rev. John",male,28,0,1,248727,33,S
851 | 850,1,1,"Goldenberg, Mrs. Samuel L (Edwiga Grabowska)",female,,1,0,17453,89.1042,C
852 | 851,0,3,"Andersson, Master. Sigvard Harald Elias",male,4,4,2,347082,31.275,S
853 | 852,0,3,"Svensson, Mr. Johan",male,74,0,0,347060,7.775,S
854 | 853,0,3,"Boulos, Miss. Nourelain",female,9,1,1,2678,15.2458,C
855 | 854,1,1,"Lines, Miss. Mary Conover",female,16,0,1,PC 17592,39.4,S
856 | 855,0,2,"Carter, Mrs. Ernest Courtenay (Lilian Hughes)",female,44,1,0,244252,26,S
857 | 856,1,3,"Aks, Mrs. Sam (Leah Rosen)",female,18,0,1,392091,9.35,S
858 | 857,1,1,"Wick, Mrs. George Dennick (Mary Hitchcock)",female,45,1,1,36928,164.8667,S
859 | 858,1,1,"Daly, Mr. Peter Denis ",male,51,0,0,113055,26.55,S
860 | 859,1,3,"Baclini, Mrs. Solomon (Latifa Qurban)",female,24,0,3,2666,19.2583,C
861 | 860,0,3,"Razi, Mr. Raihed",male,,0,0,2629,7.2292,C
862 | 861,0,3,"Hansen, Mr. Claus Peter",male,41,2,0,350026,14.1083,S
863 | 862,0,2,"Giles, Mr. Frederick Edward",male,21,1,0,28134,11.5,S
864 | 863,1,1,"Swift, Mrs. Frederick Joel (Margaret Welles Barron)",female,48,0,0,17466,25.9292,S
865 | 864,0,3,"Sage, Miss. Dorothy Edith ""Dolly""",female,,8,2,CA. 2343,69.55,S
866 | 865,0,2,"Gill, Mr. John William",male,24,0,0,233866,13,S
867 | 866,1,2,"Bystrom, Mrs. (Karolina)",female,42,0,0,236852,13,S
868 | 867,1,2,"Duran y More, Miss. Asuncion",female,27,1,0,SC/PARIS 2149,13.8583,C
869 | 868,0,1,"Roebling, Mr. Washington Augustus II",male,31,0,0,PC 17590,50.4958,S
870 | 869,0,3,"van Melkebeke, Mr. Philemon",male,,0,0,345777,9.5,S
871 | 870,1,3,"Johnson, Master. Harold Theodor",male,4,1,1,347742,11.1333,S
872 | 871,0,3,"Balkic, Mr. Cerin",male,26,0,0,349248,7.8958,S
873 | 872,1,1,"Beckwith, Mrs. Richard Leonard (Sallie Monypeny)",female,47,1,1,11751,52.5542,S
874 | 873,0,1,"Carlsson, Mr. Frans Olof",male,33,0,0,695,5,S
875 | 874,0,3,"Vander Cruyssen, Mr. Victor",male,47,0,0,345765,9,S
876 | 875,1,2,"Abelson, Mrs. Samuel (Hannah Wizosky)",female,28,1,0,P/PP 3381,24,C
877 | 876,1,3,"Najib, Miss. Adele Kiamie ""Jane""",female,15,0,0,2667,7.225,C
878 | 877,0,3,"Gustafsson, Mr. Alfred Ossian",male,20,0,0,7534,9.8458,S
879 | 878,0,3,"Petroff, Mr. Nedelio",male,19,0,0,349212,7.8958,S
880 | 879,0,3,"Laleff, Mr. Kristo",male,,0,0,349217,7.8958,S
881 | 880,1,1,"Potter, Mrs. Thomas Jr (Lily Alexenia Wilson)",female,56,0,1,11767,83.1583,C
882 | 881,1,2,"Shelley, Mrs. William (Imanita Parrish Hall)",female,25,0,1,230433,26,S
883 | 882,0,3,"Markun, Mr. Johann",male,33,0,0,349257,7.8958,S
884 | 883,0,3,"Dahlberg, Miss. Gerda Ulrika",female,22,0,0,7552,10.5167,S
885 | 884,0,2,"Banfield, Mr. Frederick James",male,28,0,0,C.A./SOTON 34068,10.5,S
886 | 885,0,3,"Sutehall, Mr. Henry Jr",male,25,0,0,SOTON/OQ 392076,7.05,S
887 | 886,0,3,"Rice, Mrs. William (Margaret Norton)",female,39,0,5,382652,29.125,Q
888 | 887,0,2,"Montvila, Rev. Juozas",male,27,0,0,211536,13,S
889 | 888,1,1,"Graham, Miss. Margaret Edith",female,19,0,0,112053,30,S
890 | 889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.45,S
891 | 890,1,1,"Behr, Mr. Karl Howell",male,26,0,0,111369,30,C
892 | 891,0,3,"Dooley, Mr. Patrick",male,32,0,0,370376,7.75,Q
893 |
--------------------------------------------------------------------------------
/gen_features.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "from sklearn.model_selection import KFold,StratifiedKFold\n",
10 | "import pandas as pd\n",
11 | "import numpy as np\n",
12 | "from tqdm import tqdm\n",
13 | "import warnings\n",
14 | "warnings.filterwarnings(\"ignore\")"
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": 2,
20 | "metadata": {},
21 | "outputs": [],
22 | "source": [
23 | "train = pd.read_csv('./titanic/train.csv')\n",
24 | "test = pd.read_csv('./titanic/test.csv')"
25 | ]
26 | },
27 | {
28 | "cell_type": "code",
29 | "execution_count": 3,
30 | "metadata": {},
31 | "outputs": [],
32 | "source": [
33 | "train['Age'] = train['Age'].fillna(train['Age'].mode())\n",
34 | "test['Age'] = test['Age'].fillna(test['Age'].mode())\n",
35 | "\n",
36 | "train['Embarked'] = train['Embarked'].fillna(train['Embarked'].mode())\n",
37 | "\n",
38 | "train.rename(columns={'Survived':'label'},inplace=True)\n",
39 | "test.rename(columns={'Survived':'label'},inplace=True)\n",
40 | "\n",
41 | "train = train.drop(['Name','Ticket'],axis=1)\n",
42 | "test = test.drop(['Name','Ticket'],axis=1)"
43 | ]
44 | },
45 | {
46 | "cell_type": "code",
47 | "execution_count": 4,
48 | "metadata": {},
49 | "outputs": [
50 | {
51 | "data": {
52 | "text/html": [
53 | "
\n",
54 | "\n",
67 | "
\n",
68 | " \n",
69 | " \n",
70 | " | \n",
71 | " PassengerId | \n",
72 | " label | \n",
73 | " Pclass | \n",
74 | " Sex | \n",
75 | " Age | \n",
76 | " SibSp | \n",
77 | " Parch | \n",
78 | " Fare | \n",
79 | " Embarked | \n",
80 | "
\n",
81 | " \n",
82 | " \n",
83 | " \n",
84 | " | 0 | \n",
85 | " 1 | \n",
86 | " 0 | \n",
87 | " 3 | \n",
88 | " male | \n",
89 | " 22.0 | \n",
90 | " 1 | \n",
91 | " 0 | \n",
92 | " 7.2500 | \n",
93 | " S | \n",
94 | "
\n",
95 | " \n",
96 | " | 1 | \n",
97 | " 2 | \n",
98 | " 1 | \n",
99 | " 1 | \n",
100 | " female | \n",
101 | " 38.0 | \n",
102 | " 1 | \n",
103 | " 0 | \n",
104 | " 71.2833 | \n",
105 | " C | \n",
106 | "
\n",
107 | " \n",
108 | " | 2 | \n",
109 | " 3 | \n",
110 | " 1 | \n",
111 | " 3 | \n",
112 | " female | \n",
113 | " 26.0 | \n",
114 | " 0 | \n",
115 | " 0 | \n",
116 | " 7.9250 | \n",
117 | " S | \n",
118 | "
\n",
119 | " \n",
120 | " | 3 | \n",
121 | " 4 | \n",
122 | " 1 | \n",
123 | " 1 | \n",
124 | " female | \n",
125 | " 35.0 | \n",
126 | " 1 | \n",
127 | " 0 | \n",
128 | " 53.1000 | \n",
129 | " S | \n",
130 | "
\n",
131 | " \n",
132 | " | 4 | \n",
133 | " 5 | \n",
134 | " 0 | \n",
135 | " 3 | \n",
136 | " male | \n",
137 | " 35.0 | \n",
138 | " 0 | \n",
139 | " 0 | \n",
140 | " 8.0500 | \n",
141 | " S | \n",
142 | "
\n",
143 | " \n",
144 | "
\n",
145 | "
"
146 | ],
147 | "text/plain": [
148 | " PassengerId label Pclass Sex Age SibSp Parch Fare Embarked\n",
149 | "0 1 0 3 male 22.0 1 0 7.2500 S\n",
150 | "1 2 1 1 female 38.0 1 0 71.2833 C\n",
151 | "2 3 1 3 female 26.0 0 0 7.9250 S\n",
152 | "3 4 1 1 female 35.0 1 0 53.1000 S\n",
153 | "4 5 0 3 male 35.0 0 0 8.0500 S"
154 | ]
155 | },
156 | "execution_count": 4,
157 | "metadata": {},
158 | "output_type": "execute_result"
159 | }
160 | ],
161 | "source": [
162 | "train.head()"
163 | ]
164 | },
165 | {
166 | "cell_type": "code",
167 | "execution_count": 5,
168 | "metadata": {},
169 | "outputs": [
170 | {
171 | "data": {
172 | "text/plain": [
173 | "Index(['PassengerId', 'label', 'Pclass', 'Sex', 'Age', 'SibSp', 'Parch',\n",
174 | " 'Fare', 'Embarked'],\n",
175 | " dtype='object')"
176 | ]
177 | },
178 | "execution_count": 5,
179 | "metadata": {},
180 | "output_type": "execute_result"
181 | }
182 | ],
183 | "source": [
184 | "train.columns"
185 | ]
186 | },
187 | {
188 | "cell_type": "code",
189 | "execution_count": 6,
190 | "metadata": {},
191 | "outputs": [],
192 | "source": [
193 | "#疑问:分组统计特征,要在train和test中单独求吗?"
194 | ]
195 | },
196 | {
197 | "cell_type": "code",
198 | "execution_count": 7,
199 | "metadata": {},
200 | "outputs": [
201 | {
202 | "name": "stderr",
203 | "output_type": "stream",
204 | "text": [
205 | "5it [00:00, 333.45it/s]\n"
206 | ]
207 | },
208 | {
209 | "data": {
210 | "text/html": [
211 | "\n",
212 | "\n",
225 | "
\n",
226 | " \n",
227 | " \n",
228 | " | \n",
229 | " PassengerId | \n",
230 | " label | \n",
231 | " Pclass | \n",
232 | " Sex | \n",
233 | " Age | \n",
234 | " SibSp | \n",
235 | " Parch | \n",
236 | " Fare | \n",
237 | " Embarked | \n",
238 | " Sex_target_enc | \n",
239 | " Embarked_target_enc | \n",
240 | "
\n",
241 | " \n",
242 | " \n",
243 | " \n",
244 | " | 0 | \n",
245 | " 1 | \n",
246 | " 0.0 | \n",
247 | " 3 | \n",
248 | " male | \n",
249 | " 22.0 | \n",
250 | " 1 | \n",
251 | " 0 | \n",
252 | " 7.2500 | \n",
253 | " S | \n",
254 | " 0.195279 | \n",
255 | " 0.335271 | \n",
256 | "
\n",
257 | " \n",
258 | " | 1 | \n",
259 | " 2 | \n",
260 | " 1.0 | \n",
261 | " 1 | \n",
262 | " female | \n",
263 | " 38.0 | \n",
264 | " 1 | \n",
265 | " 0 | \n",
266 | " 71.2833 | \n",
267 | " C | \n",
268 | " 0.739837 | \n",
269 | " 0.589552 | \n",
270 | "
\n",
271 | " \n",
272 | " | 2 | \n",
273 | " 3 | \n",
274 | " 1.0 | \n",
275 | " 3 | \n",
276 | " female | \n",
277 | " 26.0 | \n",
278 | " 0 | \n",
279 | " 0 | \n",
280 | " 7.9250 | \n",
281 | " S | \n",
282 | " 0.739837 | \n",
283 | " 0.335271 | \n",
284 | "
\n",
285 | " \n",
286 | " | 3 | \n",
287 | " 4 | \n",
288 | " 1.0 | \n",
289 | " 1 | \n",
290 | " female | \n",
291 | " 35.0 | \n",
292 | " 1 | \n",
293 | " 0 | \n",
294 | " 53.1000 | \n",
295 | " S | \n",
296 | " 0.739837 | \n",
297 | " 0.335271 | \n",
298 | "
\n",
299 | " \n",
300 | " | 4 | \n",
301 | " 5 | \n",
302 | " 0.0 | \n",
303 | " 3 | \n",
304 | " male | \n",
305 | " 35.0 | \n",
306 | " 0 | \n",
307 | " 0 | \n",
308 | " 8.0500 | \n",
309 | " S | \n",
310 | " 0.195279 | \n",
311 | " 0.335271 | \n",
312 | "
\n",
313 | " \n",
314 | "
\n",
315 | "
"
316 | ],
317 | "text/plain": [
318 | " PassengerId label Pclass Sex Age SibSp Parch Fare Embarked \\\n",
319 | "0 1 0.0 3 male 22.0 1 0 7.2500 S \n",
320 | "1 2 1.0 1 female 38.0 1 0 71.2833 C \n",
321 | "2 3 1.0 3 female 26.0 0 0 7.9250 S \n",
322 | "3 4 1.0 1 female 35.0 1 0 53.1000 S \n",
323 | "4 5 0.0 3 male 35.0 0 0 8.0500 S \n",
324 | "\n",
325 | " Sex_target_enc Embarked_target_enc \n",
326 | "0 0.195279 0.335271 \n",
327 | "1 0.739837 0.589552 \n",
328 | "2 0.739837 0.335271 \n",
329 | "3 0.739837 0.335271 \n",
330 | "4 0.195279 0.335271 "
331 | ]
332 | },
333 | "execution_count": 7,
334 | "metadata": {},
335 | "output_type": "execute_result"
336 | }
337 | ],
338 | "source": [
339 | "# 目标编码\n",
340 | "# https://mp.weixin.qq.com/s/taMj-x-qLz5sw-7zld5BmA\n",
341 | "# target encoding,可以理解为转化率,就是聚合id后对label求mean、sum、count(lable=1)/count(all),\n",
342 | "# 这里需要考虑到数据穿越造成线下严重过拟合的情况,一般需要通过交叉平滑处理,如划分为5折,用其中4折去统计剩下的一折,\n",
343 | "# 然后对过大或过小的进行平滑,对测试集直接用训练集来做统计,若数据集有时间因素存在,则需要根据时间顺序来处理;\n",
344 | "def kfold_mean(df_train, df_test, target, target_mean_list):\n",
345 | " folds = StratifiedKFold(n_splits=5)\n",
346 | "\n",
347 | " mean_of_target = df_train[target].mean()\n",
348 | "\n",
349 | " for fold_, (trn_idx, val_idx) in tqdm(enumerate(folds.split(df_train, y=df_train[target]))):\n",
350 | " tr_x = df_train.iloc[trn_idx, :]\n",
351 | " vl_x = df_train.iloc[val_idx, :]\n",
352 | "\n",
353 | " for col in target_mean_list:\n",
354 | " df_train.loc[vl_x.index, f'{col}_target_enc'] = vl_x[col].map(tr_x.groupby(col)[target].mean())\n",
355 | "\n",
356 | " for col in target_mean_list:\n",
357 | " df_train[f'{col}_target_enc'].fillna(mean_of_target, inplace=True)\n",
358 | "\n",
359 | " df_test[f'{col}_target_enc'] = df_test[col].map(df_train.groupby(col)[f'{col}_target_enc'].mean())\n",
360 | "\n",
361 | " df_test[f'{col}_target_enc'].fillna(mean_of_target, inplace=True)\n",
362 | " return pd.concat([df_train, df_test], ignore_index=True)\n",
363 | "\n",
364 | "feature_list = ['Sex','Embarked']\n",
365 | "data = pd.concat([train, test], ignore_index=True)\n",
366 | "data = kfold_mean(data[~data['label'].isna()], data[data['label'].isna()],'label',feature_list)\n",
367 | "\n",
368 | "data.head()"
369 | ]
370 | },
371 | {
372 | "cell_type": "code",
373 | "execution_count": 8,
374 | "metadata": {},
375 | "outputs": [
376 | {
377 | "name": "stderr",
378 | "output_type": "stream",
379 | "text": [
380 | "count_feas 基本交叉特征: 100%|██████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 499.26it/s]\n"
381 | ]
382 | },
383 | {
384 | "data": {
385 | "text/html": [
386 | "\n",
387 | "\n",
400 | "
\n",
401 | " \n",
402 | " \n",
403 | " | \n",
404 | " PassengerId | \n",
405 | " label | \n",
406 | " Pclass | \n",
407 | " Sex | \n",
408 | " Age | \n",
409 | " SibSp | \n",
410 | " Parch | \n",
411 | " Fare | \n",
412 | " Embarked | \n",
413 | " Sex_target_enc | \n",
414 | " ... | \n",
415 | " Pclass_Age_add | \n",
416 | " Pclass_Age_diff | \n",
417 | " Age_Fare_ratio | \n",
418 | " Age_Fare_multi | \n",
419 | " Age_Fare_add | \n",
420 | " Age_Fare_diff | \n",
421 | " Age_Pclass_ratio | \n",
422 | " Age_Pclass_multi | \n",
423 | " Age_Pclass_add | \n",
424 | " Age_Pclass_diff | \n",
425 | "
\n",
426 | " \n",
427 | " \n",
428 | " \n",
429 | " | 0 | \n",
430 | " 1 | \n",
431 | " 0.0 | \n",
432 | " 3 | \n",
433 | " male | \n",
434 | " 22.0 | \n",
435 | " 1 | \n",
436 | " 0 | \n",
437 | " 7.2500 | \n",
438 | " S | \n",
439 | " 0.195279 | \n",
440 | " ... | \n",
441 | " 25.0 | \n",
442 | " -19.0 | \n",
443 | " 3.034483 | \n",
444 | " 159.5000 | \n",
445 | " 29.2500 | \n",
446 | " 14.7500 | \n",
447 | " 7.333333 | \n",
448 | " 66.0 | \n",
449 | " 25.0 | \n",
450 | " 19.0 | \n",
451 | "
\n",
452 | " \n",
453 | " | 1 | \n",
454 | " 2 | \n",
455 | " 1.0 | \n",
456 | " 1 | \n",
457 | " female | \n",
458 | " 38.0 | \n",
459 | " 1 | \n",
460 | " 0 | \n",
461 | " 71.2833 | \n",
462 | " C | \n",
463 | " 0.739837 | \n",
464 | " ... | \n",
465 | " 39.0 | \n",
466 | " -37.0 | \n",
467 | " 0.533084 | \n",
468 | " 2708.7654 | \n",
469 | " 109.2833 | \n",
470 | " -33.2833 | \n",
471 | " 38.000000 | \n",
472 | " 38.0 | \n",
473 | " 39.0 | \n",
474 | " 37.0 | \n",
475 | "
\n",
476 | " \n",
477 | " | 2 | \n",
478 | " 3 | \n",
479 | " 1.0 | \n",
480 | " 3 | \n",
481 | " female | \n",
482 | " 26.0 | \n",
483 | " 0 | \n",
484 | " 0 | \n",
485 | " 7.9250 | \n",
486 | " S | \n",
487 | " 0.739837 | \n",
488 | " ... | \n",
489 | " 29.0 | \n",
490 | " -23.0 | \n",
491 | " 3.280757 | \n",
492 | " 206.0500 | \n",
493 | " 33.9250 | \n",
494 | " 18.0750 | \n",
495 | " 8.666667 | \n",
496 | " 78.0 | \n",
497 | " 29.0 | \n",
498 | " 23.0 | \n",
499 | "
\n",
500 | " \n",
501 | " | 3 | \n",
502 | " 4 | \n",
503 | " 1.0 | \n",
504 | " 1 | \n",
505 | " female | \n",
506 | " 35.0 | \n",
507 | " 1 | \n",
508 | " 0 | \n",
509 | " 53.1000 | \n",
510 | " S | \n",
511 | " 0.739837 | \n",
512 | " ... | \n",
513 | " 36.0 | \n",
514 | " -34.0 | \n",
515 | " 0.659134 | \n",
516 | " 1858.5000 | \n",
517 | " 88.1000 | \n",
518 | " -18.1000 | \n",
519 | " 35.000000 | \n",
520 | " 35.0 | \n",
521 | " 36.0 | \n",
522 | " 34.0 | \n",
523 | "
\n",
524 | " \n",
525 | " | 4 | \n",
526 | " 5 | \n",
527 | " 0.0 | \n",
528 | " 3 | \n",
529 | " male | \n",
530 | " 35.0 | \n",
531 | " 0 | \n",
532 | " 0 | \n",
533 | " 8.0500 | \n",
534 | " S | \n",
535 | " 0.195279 | \n",
536 | " ... | \n",
537 | " 38.0 | \n",
538 | " -32.0 | \n",
539 | " 4.347826 | \n",
540 | " 281.7500 | \n",
541 | " 43.0500 | \n",
542 | " 26.9500 | \n",
543 | " 11.666667 | \n",
544 | " 105.0 | \n",
545 | " 38.0 | \n",
546 | " 32.0 | \n",
547 | "
\n",
548 | " \n",
549 | "
\n",
550 | "
5 rows × 35 columns
\n",
551 | "
"
552 | ],
553 | "text/plain": [
554 | " PassengerId label Pclass Sex Age SibSp Parch Fare Embarked \\\n",
555 | "0 1 0.0 3 male 22.0 1 0 7.2500 S \n",
556 | "1 2 1.0 1 female 38.0 1 0 71.2833 C \n",
557 | "2 3 1.0 3 female 26.0 0 0 7.9250 S \n",
558 | "3 4 1.0 1 female 35.0 1 0 53.1000 S \n",
559 | "4 5 0.0 3 male 35.0 0 0 8.0500 S \n",
560 | "\n",
561 | " Sex_target_enc ... Pclass_Age_add Pclass_Age_diff Age_Fare_ratio \\\n",
562 | "0 0.195279 ... 25.0 -19.0 3.034483 \n",
563 | "1 0.739837 ... 39.0 -37.0 0.533084 \n",
564 | "2 0.739837 ... 29.0 -23.0 3.280757 \n",
565 | "3 0.739837 ... 36.0 -34.0 0.659134 \n",
566 | "4 0.195279 ... 38.0 -32.0 4.347826 \n",
567 | "\n",
568 | " Age_Fare_multi Age_Fare_add Age_Fare_diff Age_Pclass_ratio \\\n",
569 | "0 159.5000 29.2500 14.7500 7.333333 \n",
570 | "1 2708.7654 109.2833 -33.2833 38.000000 \n",
571 | "2 206.0500 33.9250 18.0750 8.666667 \n",
572 | "3 1858.5000 88.1000 -18.1000 35.000000 \n",
573 | "4 281.7500 43.0500 26.9500 11.666667 \n",
574 | "\n",
575 | " Age_Pclass_multi Age_Pclass_add Age_Pclass_diff \n",
576 | "0 66.0 25.0 19.0 \n",
577 | "1 38.0 39.0 37.0 \n",
578 | "2 78.0 29.0 23.0 \n",
579 | "3 35.0 36.0 34.0 \n",
580 | "4 105.0 38.0 32.0 \n",
581 | "\n",
582 | "[5 rows x 35 columns]"
583 | ]
584 | },
585 | "execution_count": 8,
586 | "metadata": {},
587 | "output_type": "execute_result"
588 | }
589 | ],
590 | "source": [
591 | "#数值交叉特征\n",
592 | "countfea = ['Fare','Pclass','Age']\n",
593 | "#数值特征与数值特征之间做加减乘除\n",
594 | "for f1 in tqdm(countfea, desc=\"count_feas 基本交叉特征\"):\n",
595 | " for f2 in countfea:\n",
596 | " if f1 != f2:\n",
597 | " data['{}_{}_ratio'.format(f1, f2)] = data[f1].values / data[f2].values\n",
598 | " data['{}_{}_multi'.format(f1, f2)] = data[f1].values * data[f2].values\n",
599 | " data['{}_{}_add'.format(f1, f2)] = data[f1].values + data[f2].values\n",
600 | " data['{}_{}_diff'.format(f1, f2)] = data[f1].values - data[f2].values\n",
601 | "data.head()"
602 | ]
603 | },
604 | {
605 | "cell_type": "code",
606 | "execution_count": 9,
607 | "metadata": {},
608 | "outputs": [
609 | {
610 | "name": "stderr",
611 | "output_type": "stream",
612 | "text": [
613 | "count_feas 基本聚合特征: 100%|██████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 111.11it/s]\n"
614 | ]
615 | },
616 | {
617 | "data": {
618 | "text/html": [
619 | "\n",
620 | "\n",
633 | "
\n",
634 | " \n",
635 | " \n",
636 | " | \n",
637 | " PassengerId | \n",
638 | " label | \n",
639 | " Pclass | \n",
640 | " Sex | \n",
641 | " Age | \n",
642 | " SibSp | \n",
643 | " Parch | \n",
644 | " Fare | \n",
645 | " Embarked | \n",
646 | " Sex_target_enc | \n",
647 | " ... | \n",
648 | " Age_Sex_mean | \n",
649 | " Age_Sex_median | \n",
650 | " Age_Sex_max | \n",
651 | " Age_Sex_min | \n",
652 | " Age_Sex_std | \n",
653 | " Age_Embarked_mean | \n",
654 | " Age_Embarked_median | \n",
655 | " Age_Embarked_max | \n",
656 | " Age_Embarked_min | \n",
657 | " Age_Embarked_std | \n",
658 | "
\n",
659 | " \n",
660 | " \n",
661 | " \n",
662 | " | 0 | \n",
663 | " 1 | \n",
664 | " 0.0 | \n",
665 | " 3 | \n",
666 | " male | \n",
667 | " 22.0 | \n",
668 | " 1 | \n",
669 | " 0 | \n",
670 | " 7.2500 | \n",
671 | " S | \n",
672 | " 0.195279 | \n",
673 | " ... | \n",
674 | " 30.585228 | \n",
675 | " 28.0 | \n",
676 | " 80.0 | \n",
677 | " 0.33 | \n",
678 | " 14.280581 | \n",
679 | " 29.245205 | \n",
680 | " 28.0 | \n",
681 | " 80.0 | \n",
682 | " 0.17 | \n",
683 | " 14.047507 | \n",
684 | "
\n",
685 | " \n",
686 | " | 1 | \n",
687 | " 2 | \n",
688 | " 1.0 | \n",
689 | " 1 | \n",
690 | " female | \n",
691 | " 38.0 | \n",
692 | " 1 | \n",
693 | " 0 | \n",
694 | " 71.2833 | \n",
695 | " C | \n",
696 | " 0.739837 | \n",
697 | " ... | \n",
698 | " 28.687088 | \n",
699 | " 27.0 | \n",
700 | " 76.0 | \n",
701 | " 0.17 | \n",
702 | " 14.576962 | \n",
703 | " 32.332170 | \n",
704 | " 30.0 | \n",
705 | " 71.0 | \n",
706 | " 0.42 | \n",
707 | " 15.258092 | \n",
708 | "
\n",
709 | " \n",
710 | " | 2 | \n",
711 | " 3 | \n",
712 | " 1.0 | \n",
713 | " 3 | \n",
714 | " female | \n",
715 | " 26.0 | \n",
716 | " 0 | \n",
717 | " 0 | \n",
718 | " 7.9250 | \n",
719 | " S | \n",
720 | " 0.739837 | \n",
721 | " ... | \n",
722 | " 28.687088 | \n",
723 | " 27.0 | \n",
724 | " 76.0 | \n",
725 | " 0.17 | \n",
726 | " 14.576962 | \n",
727 | " 29.245205 | \n",
728 | " 28.0 | \n",
729 | " 80.0 | \n",
730 | " 0.17 | \n",
731 | " 14.047507 | \n",
732 | "
\n",
733 | " \n",
734 | " | 3 | \n",
735 | " 4 | \n",
736 | " 1.0 | \n",
737 | " 1 | \n",
738 | " female | \n",
739 | " 35.0 | \n",
740 | " 1 | \n",
741 | " 0 | \n",
742 | " 53.1000 | \n",
743 | " S | \n",
744 | " 0.739837 | \n",
745 | " ... | \n",
746 | " 28.687088 | \n",
747 | " 27.0 | \n",
748 | " 76.0 | \n",
749 | " 0.17 | \n",
750 | " 14.576962 | \n",
751 | " 29.245205 | \n",
752 | " 28.0 | \n",
753 | " 80.0 | \n",
754 | " 0.17 | \n",
755 | " 14.047507 | \n",
756 | "
\n",
757 | " \n",
758 | " | 4 | \n",
759 | " 5 | \n",
760 | " 0.0 | \n",
761 | " 3 | \n",
762 | " male | \n",
763 | " 35.0 | \n",
764 | " 0 | \n",
765 | " 0 | \n",
766 | " 8.0500 | \n",
767 | " S | \n",
768 | " 0.195279 | \n",
769 | " ... | \n",
770 | " 30.585228 | \n",
771 | " 28.0 | \n",
772 | " 80.0 | \n",
773 | " 0.33 | \n",
774 | " 14.280581 | \n",
775 | " 29.245205 | \n",
776 | " 28.0 | \n",
777 | " 80.0 | \n",
778 | " 0.17 | \n",
779 | " 14.047507 | \n",
780 | "
\n",
781 | " \n",
782 | "
\n",
783 | "
5 rows × 55 columns
\n",
784 | "
"
785 | ],
786 | "text/plain": [
787 | " PassengerId label Pclass Sex Age SibSp Parch Fare Embarked \\\n",
788 | "0 1 0.0 3 male 22.0 1 0 7.2500 S \n",
789 | "1 2 1.0 1 female 38.0 1 0 71.2833 C \n",
790 | "2 3 1.0 3 female 26.0 0 0 7.9250 S \n",
791 | "3 4 1.0 1 female 35.0 1 0 53.1000 S \n",
792 | "4 5 0.0 3 male 35.0 0 0 8.0500 S \n",
793 | "\n",
794 | " Sex_target_enc ... Age_Sex_mean Age_Sex_median Age_Sex_max \\\n",
795 | "0 0.195279 ... 30.585228 28.0 80.0 \n",
796 | "1 0.739837 ... 28.687088 27.0 76.0 \n",
797 | "2 0.739837 ... 28.687088 27.0 76.0 \n",
798 | "3 0.739837 ... 28.687088 27.0 76.0 \n",
799 | "4 0.195279 ... 30.585228 28.0 80.0 \n",
800 | "\n",
801 | " Age_Sex_min Age_Sex_std Age_Embarked_mean Age_Embarked_median \\\n",
802 | "0 0.33 14.280581 29.245205 28.0 \n",
803 | "1 0.17 14.576962 32.332170 30.0 \n",
804 | "2 0.17 14.576962 29.245205 28.0 \n",
805 | "3 0.17 14.576962 29.245205 28.0 \n",
806 | "4 0.33 14.280581 29.245205 28.0 \n",
807 | "\n",
808 | " Age_Embarked_max Age_Embarked_min Age_Embarked_std \n",
809 | "0 80.0 0.17 14.047507 \n",
810 | "1 71.0 0.42 15.258092 \n",
811 | "2 80.0 0.17 14.047507 \n",
812 | "3 80.0 0.17 14.047507 \n",
813 | "4 80.0 0.17 14.047507 \n",
814 | "\n",
815 | "[5 rows x 55 columns]"
816 | ]
817 | },
818 | "execution_count": 9,
819 | "metadata": {},
820 | "output_type": "execute_result"
821 | }
822 | ],
823 | "source": [
824 | "#类别特征与数值特征之间\n",
825 | "catefea = ['Sex','Embarked']\n",
826 | "countfea = ['Fare','Age']\n",
827 | "#例如男性购买的船票票价均值、最大值、最小值中位数。\n",
828 | "for i in tqdm(countfea,desc=\"count_feas 基本聚合特征\"):\n",
829 | " for j in catefea:\n",
830 | " data['{}_{}_mean'.format(i,j)] = data.groupby(j)[i].transform('mean')\n",
831 | " data['{}_{}_median'.format(i,j)] = data.groupby(j)[i].transform('median')\n",
832 | " data['{}_{}_max'.format(i,j)] = data.groupby(j)[i].transform('max')\n",
833 | " data['{}_{}_min'.format(i,j)] = data.groupby(j)[i].transform('min')\n",
834 | " data['{}_{}_std'.format(i,j)] = data.groupby(j)[i].transform('std')\n",
835 | " \n",
836 | "data.head()"
837 | ]
838 | },
839 | {
840 | "cell_type": "code",
841 | "execution_count": 10,
842 | "metadata": {},
843 | "outputs": [
844 | {
845 | "data": {
846 | "text/html": [
847 | "\n",
848 | "\n",
861 | "
\n",
862 | " \n",
863 | " \n",
864 | " | \n",
865 | " PassengerId | \n",
866 | " label | \n",
867 | " Pclass | \n",
868 | " Sex | \n",
869 | " Age | \n",
870 | " SibSp | \n",
871 | " Parch | \n",
872 | " Fare | \n",
873 | " Embarked | \n",
874 | " Sex_target_enc | \n",
875 | " ... | \n",
876 | " Age-max_gb_Sex | \n",
877 | " Age/sum_gb_Sex | \n",
878 | " Fare-mean_gb_Embarked | \n",
879 | " Fare-min_gb_Embarked | \n",
880 | " Fare-max_gb_Embarked | \n",
881 | " Fare/sum_gb_Embarked | \n",
882 | " Age-mean_gb_Embarked | \n",
883 | " Age-min_gb_Embarked | \n",
884 | " Age-max_gb_Embarked | \n",
885 | " Age/sum_gb_Embarked | \n",
886 | "
\n",
887 | " \n",
888 | " \n",
889 | " \n",
890 | " | 0 | \n",
891 | " 1 | \n",
892 | " 0.0 | \n",
893 | " 3 | \n",
894 | " male | \n",
895 | " 22.0 | \n",
896 | " 1 | \n",
897 | " 0 | \n",
898 | " 7.2500 | \n",
899 | " S | \n",
900 | " 0.195279 | \n",
901 | " ... | \n",
902 | " -58.0 | \n",
903 | " 0.001093 | \n",
904 | " -20.168824 | \n",
905 | " 7.2500 | \n",
906 | " -255.7500 | \n",
907 | " 0.000290 | \n",
908 | " -7.245205 | \n",
909 | " 21.83 | \n",
910 | " -58.0 | \n",
911 | " 0.000962 | \n",
912 | "
\n",
913 | " \n",
914 | " | 1 | \n",
915 | " 2 | \n",
916 | " 1.0 | \n",
917 | " 1 | \n",
918 | " female | \n",
919 | " 38.0 | \n",
920 | " 1 | \n",
921 | " 0 | \n",
922 | " 71.2833 | \n",
923 | " C | \n",
924 | " 0.739837 | \n",
925 | " ... | \n",
926 | " -38.0 | \n",
927 | " 0.003414 | \n",
928 | " 8.947033 | \n",
929 | " 67.2708 | \n",
930 | " -441.0459 | \n",
931 | " 0.004235 | \n",
932 | " 5.667830 | \n",
933 | " 37.58 | \n",
934 | " -33.0 | \n",
935 | " 0.005544 | \n",
936 | "
\n",
937 | " \n",
938 | " | 2 | \n",
939 | " 3 | \n",
940 | " 1.0 | \n",
941 | " 3 | \n",
942 | " female | \n",
943 | " 26.0 | \n",
944 | " 0 | \n",
945 | " 0 | \n",
946 | " 7.9250 | \n",
947 | " S | \n",
948 | " 0.739837 | \n",
949 | " ... | \n",
950 | " -50.0 | \n",
951 | " 0.002336 | \n",
952 | " -19.493824 | \n",
953 | " 7.9250 | \n",
954 | " -255.0750 | \n",
955 | " 0.000317 | \n",
956 | " -3.245205 | \n",
957 | " 25.83 | \n",
958 | " -54.0 | \n",
959 | " 0.001137 | \n",
960 | "
\n",
961 | " \n",
962 | " | 3 | \n",
963 | " 4 | \n",
964 | " 1.0 | \n",
965 | " 1 | \n",
966 | " female | \n",
967 | " 35.0 | \n",
968 | " 1 | \n",
969 | " 0 | \n",
970 | " 53.1000 | \n",
971 | " S | \n",
972 | " 0.739837 | \n",
973 | " ... | \n",
974 | " -41.0 | \n",
975 | " 0.003144 | \n",
976 | " 25.681176 | \n",
977 | " 53.1000 | \n",
978 | " -209.9000 | \n",
979 | " 0.002121 | \n",
980 | " 5.754795 | \n",
981 | " 34.83 | \n",
982 | " -45.0 | \n",
983 | " 0.001530 | \n",
984 | "
\n",
985 | " \n",
986 | " | 4 | \n",
987 | " 5 | \n",
988 | " 0.0 | \n",
989 | " 3 | \n",
990 | " male | \n",
991 | " 35.0 | \n",
992 | " 0 | \n",
993 | " 0 | \n",
994 | " 8.0500 | \n",
995 | " S | \n",
996 | " 0.195279 | \n",
997 | " ... | \n",
998 | " -45.0 | \n",
999 | " 0.001739 | \n",
1000 | " -19.368824 | \n",
1001 | " 8.0500 | \n",
1002 | " -254.9500 | \n",
1003 | " 0.000322 | \n",
1004 | " 5.754795 | \n",
1005 | " 34.83 | \n",
1006 | " -45.0 | \n",
1007 | " 0.001530 | \n",
1008 | "
\n",
1009 | " \n",
1010 | "
\n",
1011 | "
5 rows × 71 columns
\n",
1012 | "
"
1013 | ],
1014 | "text/plain": [
1015 | " PassengerId label Pclass Sex Age SibSp Parch Fare Embarked \\\n",
1016 | "0 1 0.0 3 male 22.0 1 0 7.2500 S \n",
1017 | "1 2 1.0 1 female 38.0 1 0 71.2833 C \n",
1018 | "2 3 1.0 3 female 26.0 0 0 7.9250 S \n",
1019 | "3 4 1.0 1 female 35.0 1 0 53.1000 S \n",
1020 | "4 5 0.0 3 male 35.0 0 0 8.0500 S \n",
1021 | "\n",
1022 | " Sex_target_enc ... Age-max_gb_Sex Age/sum_gb_Sex Fare-mean_gb_Embarked \\\n",
1023 | "0 0.195279 ... -58.0 0.001093 -20.168824 \n",
1024 | "1 0.739837 ... -38.0 0.003414 8.947033 \n",
1025 | "2 0.739837 ... -50.0 0.002336 -19.493824 \n",
1026 | "3 0.739837 ... -41.0 0.003144 25.681176 \n",
1027 | "4 0.195279 ... -45.0 0.001739 -19.368824 \n",
1028 | "\n",
1029 | " Fare-min_gb_Embarked Fare-max_gb_Embarked Fare/sum_gb_Embarked \\\n",
1030 | "0 7.2500 -255.7500 0.000290 \n",
1031 | "1 67.2708 -441.0459 0.004235 \n",
1032 | "2 7.9250 -255.0750 0.000317 \n",
1033 | "3 53.1000 -209.9000 0.002121 \n",
1034 | "4 8.0500 -254.9500 0.000322 \n",
1035 | "\n",
1036 | " Age-mean_gb_Embarked Age-min_gb_Embarked Age-max_gb_Embarked \\\n",
1037 | "0 -7.245205 21.83 -58.0 \n",
1038 | "1 5.667830 37.58 -33.0 \n",
1039 | "2 -3.245205 25.83 -54.0 \n",
1040 | "3 5.754795 34.83 -45.0 \n",
1041 | "4 5.754795 34.83 -45.0 \n",
1042 | "\n",
1043 | " Age/sum_gb_Embarked \n",
1044 | "0 0.000962 \n",
1045 | "1 0.005544 \n",
1046 | "2 0.001137 \n",
1047 | "3 0.001530 \n",
1048 | "4 0.001530 \n",
1049 | "\n",
1050 | "[5 rows x 71 columns]"
1051 | ]
1052 | },
1053 | "execution_count": 10,
1054 | "metadata": {},
1055 | "output_type": "execute_result"
1056 | }
1057 | ],
1058 | "source": [
1059 | "#偏离值特征,顾名思义,偏离均值、最大值、最小值的误差值\n",
1060 | "catefea = ['Sex','Embarked']\n",
1061 | "countfea = ['Fare','Age'] \n",
1062 | "for group in catefea:\n",
1063 | " for feature in countfea:\n",
1064 | " tmp = data.groupby(group)[feature].agg([sum, min, max, np.mean]).reset_index()\n",
1065 | " tmp = pd.merge(data, tmp, on=group, how='left')\n",
1066 | " data['{}-mean_gb_{}'.format(feature, group)] = data[feature] - tmp['mean']\n",
1067 | " data['{}-min_gb_{}'.format(feature, group)] = data[feature] - tmp['min']\n",
1068 | " data['{}-max_gb_{}'.format(feature, group)] = data[feature] - tmp['max']\n",
1069 | " data['{}/sum_gb_{}'.format(feature, group)] = data[feature] / tmp['sum']\n",
1070 | "data.head()"
1071 | ]
1072 | },
1073 | {
1074 | "cell_type": "code",
1075 | "execution_count": 11,
1076 | "metadata": {},
1077 | "outputs": [],
1078 | "source": [
1079 | "# 特征unique count特征,针对类别型特征\n",
1080 | "# for index, col1 in enumerate(['age', 'province', 'city', 'model']):\n",
1081 | "# for col2 in ['age', 'province', 'city', 'model'][index:]:\n",
1082 | "# data['{}_in_{}_count'.format(col1, col2)] = data.groupby(col1)[col2].transform('count')\n",
1083 | "# data['{}_in_{}_nunique'.format(col1, col2)] = data.groupby(col1)[col2].transform('nunique')\n",
1084 | "# data['{}_in_{}_nunique/{}_in_{}_count'.format(col1, col2, col1, col2)] = data['{}_in_{}_nunique'.format(col1,col2)] /data['{}_in_{}_count'.format(col1,col2)]\n",
1085 | "\n",
1086 | "# data['{}_in_{}_count'.format(col2, col1)] = data.groupby(col2)[col1].transform('count')\n",
1087 | "# data['{}_in_{}_nunique'.format(col2, col1)] = data.groupby(col2)[col1].transform('nunique')\n",
1088 | "# data['{}_in_{}_nunique/{}_in_{}_count'.format(col2, col1, col2, col1)] = data['{}_in_{}_nunique'.format(col2,col1)] / data['{}_in_{}_count'.format(col2, col1)]\n"
1089 | ]
1090 | },
1091 | {
1092 | "cell_type": "code",
1093 | "execution_count": 12,
1094 | "metadata": {},
1095 | "outputs": [
1096 | {
1097 | "name": "stderr",
1098 | "output_type": "stream",
1099 | "text": [
1100 | "100%|██████████████████████████████████████████████████████████████████████████| 1309/1309 [00:00<00:00, 163549.12it/s]\n"
1101 | ]
1102 | },
1103 | {
1104 | "data": {
1105 | "text/html": [
1106 | "\n",
1107 | "\n",
1120 | "
\n",
1121 | " \n",
1122 | " \n",
1123 | " | \n",
1124 | " PassengerId | \n",
1125 | " label | \n",
1126 | " Pclass | \n",
1127 | " Sex | \n",
1128 | " Age | \n",
1129 | " SibSp | \n",
1130 | " Parch | \n",
1131 | " Fare | \n",
1132 | " Embarked | \n",
1133 | " Sex_target_enc | \n",
1134 | " ... | \n",
1135 | " Age/sum_gb_Sex | \n",
1136 | " Fare-mean_gb_Embarked | \n",
1137 | " Fare-min_gb_Embarked | \n",
1138 | " Fare-max_gb_Embarked | \n",
1139 | " Fare/sum_gb_Embarked | \n",
1140 | " Age-mean_gb_Embarked | \n",
1141 | " Age-min_gb_Embarked | \n",
1142 | " Age-max_gb_Embarked | \n",
1143 | " Age/sum_gb_Embarked | \n",
1144 | " Pclass_Sex | \n",
1145 | "
\n",
1146 | " \n",
1147 | " \n",
1148 | " \n",
1149 | " | 0 | \n",
1150 | " 1 | \n",
1151 | " 0.0 | \n",
1152 | " 3 | \n",
1153 | " male | \n",
1154 | " 22.0 | \n",
1155 | " 1 | \n",
1156 | " 0 | \n",
1157 | " 7.2500 | \n",
1158 | " S | \n",
1159 | " 0.195279 | \n",
1160 | " ... | \n",
1161 | " 0.001093 | \n",
1162 | " -20.168824 | \n",
1163 | " 7.2500 | \n",
1164 | " -255.7500 | \n",
1165 | " 0.000290 | \n",
1166 | " -7.245205 | \n",
1167 | " 21.83 | \n",
1168 | " -58.0 | \n",
1169 | " 0.000962 | \n",
1170 | " 0 | \n",
1171 | "
\n",
1172 | " \n",
1173 | " | 1 | \n",
1174 | " 2 | \n",
1175 | " 1.0 | \n",
1176 | " 1 | \n",
1177 | " female | \n",
1178 | " 38.0 | \n",
1179 | " 1 | \n",
1180 | " 0 | \n",
1181 | " 71.2833 | \n",
1182 | " C | \n",
1183 | " 0.739837 | \n",
1184 | " ... | \n",
1185 | " 0.003414 | \n",
1186 | " 8.947033 | \n",
1187 | " 67.2708 | \n",
1188 | " -441.0459 | \n",
1189 | " 0.004235 | \n",
1190 | " 5.667830 | \n",
1191 | " 37.58 | \n",
1192 | " -33.0 | \n",
1193 | " 0.005544 | \n",
1194 | " 1 | \n",
1195 | "
\n",
1196 | " \n",
1197 | " | 2 | \n",
1198 | " 3 | \n",
1199 | " 1.0 | \n",
1200 | " 3 | \n",
1201 | " female | \n",
1202 | " 26.0 | \n",
1203 | " 0 | \n",
1204 | " 0 | \n",
1205 | " 7.9250 | \n",
1206 | " S | \n",
1207 | " 0.739837 | \n",
1208 | " ... | \n",
1209 | " 0.002336 | \n",
1210 | " -19.493824 | \n",
1211 | " 7.9250 | \n",
1212 | " -255.0750 | \n",
1213 | " 0.000317 | \n",
1214 | " -3.245205 | \n",
1215 | " 25.83 | \n",
1216 | " -54.0 | \n",
1217 | " 0.001137 | \n",
1218 | " 2 | \n",
1219 | "
\n",
1220 | " \n",
1221 | " | 3 | \n",
1222 | " 4 | \n",
1223 | " 1.0 | \n",
1224 | " 1 | \n",
1225 | " female | \n",
1226 | " 35.0 | \n",
1227 | " 1 | \n",
1228 | " 0 | \n",
1229 | " 53.1000 | \n",
1230 | " S | \n",
1231 | " 0.739837 | \n",
1232 | " ... | \n",
1233 | " 0.003144 | \n",
1234 | " 25.681176 | \n",
1235 | " 53.1000 | \n",
1236 | " -209.9000 | \n",
1237 | " 0.002121 | \n",
1238 | " 5.754795 | \n",
1239 | " 34.83 | \n",
1240 | " -45.0 | \n",
1241 | " 0.001530 | \n",
1242 | " 1 | \n",
1243 | "
\n",
1244 | " \n",
1245 | " | 4 | \n",
1246 | " 5 | \n",
1247 | " 0.0 | \n",
1248 | " 3 | \n",
1249 | " male | \n",
1250 | " 35.0 | \n",
1251 | " 0 | \n",
1252 | " 0 | \n",
1253 | " 8.0500 | \n",
1254 | " S | \n",
1255 | " 0.195279 | \n",
1256 | " ... | \n",
1257 | " 0.001739 | \n",
1258 | " -19.368824 | \n",
1259 | " 8.0500 | \n",
1260 | " -254.9500 | \n",
1261 | " 0.000322 | \n",
1262 | " 5.754795 | \n",
1263 | " 34.83 | \n",
1264 | " -45.0 | \n",
1265 | " 0.001530 | \n",
1266 | " 0 | \n",
1267 | "
\n",
1268 | " \n",
1269 | "
\n",
1270 | "
5 rows × 72 columns
\n",
1271 | "
"
1272 | ],
1273 | "text/plain": [
1274 | " PassengerId label Pclass Sex Age SibSp Parch Fare Embarked \\\n",
1275 | "0 1 0.0 3 male 22.0 1 0 7.2500 S \n",
1276 | "1 2 1.0 1 female 38.0 1 0 71.2833 C \n",
1277 | "2 3 1.0 3 female 26.0 0 0 7.9250 S \n",
1278 | "3 4 1.0 1 female 35.0 1 0 53.1000 S \n",
1279 | "4 5 0.0 3 male 35.0 0 0 8.0500 S \n",
1280 | "\n",
1281 | " Sex_target_enc ... Age/sum_gb_Sex Fare-mean_gb_Embarked \\\n",
1282 | "0 0.195279 ... 0.001093 -20.168824 \n",
1283 | "1 0.739837 ... 0.003414 8.947033 \n",
1284 | "2 0.739837 ... 0.002336 -19.493824 \n",
1285 | "3 0.739837 ... 0.003144 25.681176 \n",
1286 | "4 0.195279 ... 0.001739 -19.368824 \n",
1287 | "\n",
1288 | " Fare-min_gb_Embarked Fare-max_gb_Embarked Fare/sum_gb_Embarked \\\n",
1289 | "0 7.2500 -255.7500 0.000290 \n",
1290 | "1 67.2708 -441.0459 0.004235 \n",
1291 | "2 7.9250 -255.0750 0.000317 \n",
1292 | "3 53.1000 -209.9000 0.002121 \n",
1293 | "4 8.0500 -254.9500 0.000322 \n",
1294 | "\n",
1295 | " Age-mean_gb_Embarked Age-min_gb_Embarked Age-max_gb_Embarked \\\n",
1296 | "0 -7.245205 21.83 -58.0 \n",
1297 | "1 5.667830 37.58 -33.0 \n",
1298 | "2 -3.245205 25.83 -54.0 \n",
1299 | "3 5.754795 34.83 -45.0 \n",
1300 | "4 5.754795 34.83 -45.0 \n",
1301 | "\n",
1302 | " Age/sum_gb_Embarked Pclass_Sex \n",
1303 | "0 0.000962 0 \n",
1304 | "1 0.005544 1 \n",
1305 | "2 0.001137 2 \n",
1306 | "3 0.001530 1 \n",
1307 | "4 0.001530 0 \n",
1308 | "\n",
1309 | "[5 rows x 72 columns]"
1310 | ]
1311 | },
1312 | "execution_count": 12,
1313 | "metadata": {},
1314 | "output_type": "execute_result"
1315 | }
1316 | ],
1317 | "source": [
1318 | "#类别与类别特征逻辑上的交叉,特征值的排列组合\n",
1319 | "#例如:即是男性,pclass又是1的样本。\n",
1320 | "def cross_two(base_info,name_1,name_2):\n",
1321 | " new_col=[]\n",
1322 | " encode=0\n",
1323 | " dic={}\n",
1324 | " val_1=base_info[name_1]\n",
1325 | " val_2=base_info[name_2]\n",
1326 | " for i in tqdm(range(len(val_1))):\n",
1327 | " tmp=str(val_1[i])+'_'+str(val_2[i])\n",
1328 | " if tmp in dic:\n",
1329 | " new_col.append(dic[tmp])\n",
1330 | " else:\n",
1331 | " dic[tmp]=encode\n",
1332 | " new_col.append(encode)\n",
1333 | " encode+=1\n",
1334 | " return new_col\n",
1335 | "\n",
1336 | "new_col=cross_two(data,'Pclass','Sex')#作企业类型-小类的交叉特征\n",
1337 | "data['Pclass_Sex']=new_col\n",
1338 | "data.head()"
1339 | ]
1340 | },
1341 | {
1342 | "cell_type": "code",
1343 | "execution_count": 13,
1344 | "metadata": {},
1345 | "outputs": [
1346 | {
1347 | "data": {
1348 | "text/html": [
1349 | "\n",
1350 | "\n",
1363 | "
\n",
1364 | " \n",
1365 | " \n",
1366 | " | \n",
1367 | " PassengerId | \n",
1368 | " label | \n",
1369 | " Pclass | \n",
1370 | " Sex | \n",
1371 | " Age | \n",
1372 | " SibSp | \n",
1373 | " Parch | \n",
1374 | " Fare | \n",
1375 | " Embarked | \n",
1376 | " Sex_target_enc | \n",
1377 | " ... | \n",
1378 | " Fare-max_gb_Embarked | \n",
1379 | " Fare/sum_gb_Embarked | \n",
1380 | " Age-mean_gb_Embarked | \n",
1381 | " Age-min_gb_Embarked | \n",
1382 | " Age-max_gb_Embarked | \n",
1383 | " Age/sum_gb_Embarked | \n",
1384 | " Pclass_Sex | \n",
1385 | " Sex_COUNT | \n",
1386 | " Embarked_COUNT | \n",
1387 | " Pclass_COUNT | \n",
1388 | "
\n",
1389 | " \n",
1390 | " \n",
1391 | " \n",
1392 | " | 0 | \n",
1393 | " 1 | \n",
1394 | " 0.0 | \n",
1395 | " 3 | \n",
1396 | " male | \n",
1397 | " 22.0 | \n",
1398 | " 1 | \n",
1399 | " 0 | \n",
1400 | " 7.2500 | \n",
1401 | " S | \n",
1402 | " 0.195279 | \n",
1403 | " ... | \n",
1404 | " -255.7500 | \n",
1405 | " 0.000290 | \n",
1406 | " -7.245205 | \n",
1407 | " 21.83 | \n",
1408 | " -58.0 | \n",
1409 | " 0.000962 | \n",
1410 | " 0 | \n",
1411 | " 843 | \n",
1412 | " 914.0 | \n",
1413 | " 709 | \n",
1414 | "
\n",
1415 | " \n",
1416 | " | 1 | \n",
1417 | " 2 | \n",
1418 | " 1.0 | \n",
1419 | " 1 | \n",
1420 | " female | \n",
1421 | " 38.0 | \n",
1422 | " 1 | \n",
1423 | " 0 | \n",
1424 | " 71.2833 | \n",
1425 | " C | \n",
1426 | " 0.739837 | \n",
1427 | " ... | \n",
1428 | " -441.0459 | \n",
1429 | " 0.004235 | \n",
1430 | " 5.667830 | \n",
1431 | " 37.58 | \n",
1432 | " -33.0 | \n",
1433 | " 0.005544 | \n",
1434 | " 1 | \n",
1435 | " 466 | \n",
1436 | " 270.0 | \n",
1437 | " 323 | \n",
1438 | "
\n",
1439 | " \n",
1440 | " | 2 | \n",
1441 | " 3 | \n",
1442 | " 1.0 | \n",
1443 | " 3 | \n",
1444 | " female | \n",
1445 | " 26.0 | \n",
1446 | " 0 | \n",
1447 | " 0 | \n",
1448 | " 7.9250 | \n",
1449 | " S | \n",
1450 | " 0.739837 | \n",
1451 | " ... | \n",
1452 | " -255.0750 | \n",
1453 | " 0.000317 | \n",
1454 | " -3.245205 | \n",
1455 | " 25.83 | \n",
1456 | " -54.0 | \n",
1457 | " 0.001137 | \n",
1458 | " 2 | \n",
1459 | " 466 | \n",
1460 | " 914.0 | \n",
1461 | " 709 | \n",
1462 | "
\n",
1463 | " \n",
1464 | " | 3 | \n",
1465 | " 4 | \n",
1466 | " 1.0 | \n",
1467 | " 1 | \n",
1468 | " female | \n",
1469 | " 35.0 | \n",
1470 | " 1 | \n",
1471 | " 0 | \n",
1472 | " 53.1000 | \n",
1473 | " S | \n",
1474 | " 0.739837 | \n",
1475 | " ... | \n",
1476 | " -209.9000 | \n",
1477 | " 0.002121 | \n",
1478 | " 5.754795 | \n",
1479 | " 34.83 | \n",
1480 | " -45.0 | \n",
1481 | " 0.001530 | \n",
1482 | " 1 | \n",
1483 | " 466 | \n",
1484 | " 914.0 | \n",
1485 | " 323 | \n",
1486 | "
\n",
1487 | " \n",
1488 | " | 4 | \n",
1489 | " 5 | \n",
1490 | " 0.0 | \n",
1491 | " 3 | \n",
1492 | " male | \n",
1493 | " 35.0 | \n",
1494 | " 0 | \n",
1495 | " 0 | \n",
1496 | " 8.0500 | \n",
1497 | " S | \n",
1498 | " 0.195279 | \n",
1499 | " ... | \n",
1500 | " -254.9500 | \n",
1501 | " 0.000322 | \n",
1502 | " 5.754795 | \n",
1503 | " 34.83 | \n",
1504 | " -45.0 | \n",
1505 | " 0.001530 | \n",
1506 | " 0 | \n",
1507 | " 843 | \n",
1508 | " 914.0 | \n",
1509 | " 709 | \n",
1510 | "
\n",
1511 | " \n",
1512 | "
\n",
1513 | "
5 rows × 75 columns
\n",
1514 | "
"
1515 | ],
1516 | "text/plain": [
1517 | " PassengerId label Pclass Sex Age SibSp Parch Fare Embarked \\\n",
1518 | "0 1 0.0 3 male 22.0 1 0 7.2500 S \n",
1519 | "1 2 1.0 1 female 38.0 1 0 71.2833 C \n",
1520 | "2 3 1.0 3 female 26.0 0 0 7.9250 S \n",
1521 | "3 4 1.0 1 female 35.0 1 0 53.1000 S \n",
1522 | "4 5 0.0 3 male 35.0 0 0 8.0500 S \n",
1523 | "\n",
1524 | " Sex_target_enc ... Fare-max_gb_Embarked Fare/sum_gb_Embarked \\\n",
1525 | "0 0.195279 ... -255.7500 0.000290 \n",
1526 | "1 0.739837 ... -441.0459 0.004235 \n",
1527 | "2 0.739837 ... -255.0750 0.000317 \n",
1528 | "3 0.739837 ... -209.9000 0.002121 \n",
1529 | "4 0.195279 ... -254.9500 0.000322 \n",
1530 | "\n",
1531 | " Age-mean_gb_Embarked Age-min_gb_Embarked Age-max_gb_Embarked \\\n",
1532 | "0 -7.245205 21.83 -58.0 \n",
1533 | "1 5.667830 37.58 -33.0 \n",
1534 | "2 -3.245205 25.83 -54.0 \n",
1535 | "3 5.754795 34.83 -45.0 \n",
1536 | "4 5.754795 34.83 -45.0 \n",
1537 | "\n",
1538 | " Age/sum_gb_Embarked Pclass_Sex Sex_COUNT Embarked_COUNT Pclass_COUNT \n",
1539 | "0 0.000962 0 843 914.0 709 \n",
1540 | "1 0.005544 1 466 270.0 323 \n",
1541 | "2 0.001137 2 466 914.0 709 \n",
1542 | "3 0.001530 1 466 914.0 323 \n",
1543 | "4 0.001530 0 843 914.0 709 \n",
1544 | "\n",
1545 | "[5 rows x 75 columns]"
1546 | ]
1547 | },
1548 | "execution_count": 13,
1549 | "metadata": {},
1550 | "output_type": "execute_result"
1551 | }
1552 | ],
1553 | "source": [
1554 | "# 频数统计,计算类别特征每一类的个数,例如 男性的count是843,那男性对应的衍生特征值是843\n",
1555 | "cat_col = ['Sex','Embarked','Pclass']\n",
1556 | "for col in cat_col:\n",
1557 | " data[col + '_COUNT'] = data[col].map(data[col].value_counts())\n",
1558 | " col_idx = data[col].value_counts()\n",
1559 | "\n",
1560 | "data.head()"
1561 | ]
1562 | },
1563 | {
1564 | "cell_type": "code",
1565 | "execution_count": 14,
1566 | "metadata": {},
1567 | "outputs": [
1568 | {
1569 | "name": "stderr",
1570 | "output_type": "stream",
1571 | "text": [
1572 | "分箱特征: 100%|█████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 996.75it/s]\n"
1573 | ]
1574 | },
1575 | {
1576 | "data": {
1577 | "text/html": [
1578 | "\n",
1579 | "\n",
1592 | "
\n",
1593 | " \n",
1594 | " \n",
1595 | " | \n",
1596 | " PassengerId | \n",
1597 | " label | \n",
1598 | " Pclass | \n",
1599 | " Sex | \n",
1600 | " Age | \n",
1601 | " SibSp | \n",
1602 | " Parch | \n",
1603 | " Fare | \n",
1604 | " Embarked | \n",
1605 | " Sex_target_enc | \n",
1606 | " ... | \n",
1607 | " Fare/sum_gb_Embarked | \n",
1608 | " Age-mean_gb_Embarked | \n",
1609 | " Age-min_gb_Embarked | \n",
1610 | " Age-max_gb_Embarked | \n",
1611 | " Age/sum_gb_Embarked | \n",
1612 | " Pclass_Sex | \n",
1613 | " Sex_COUNT | \n",
1614 | " Embarked_COUNT | \n",
1615 | " Pclass_COUNT | \n",
1616 | " Age_bin | \n",
1617 | "
\n",
1618 | " \n",
1619 | " \n",
1620 | " \n",
1621 | " | 0 | \n",
1622 | " 1 | \n",
1623 | " 0.0 | \n",
1624 | " 3 | \n",
1625 | " male | \n",
1626 | " 22.0 | \n",
1627 | " 1 | \n",
1628 | " 0 | \n",
1629 | " 7.2500 | \n",
1630 | " S | \n",
1631 | " 0.195279 | \n",
1632 | " ... | \n",
1633 | " 0.000290 | \n",
1634 | " -7.245205 | \n",
1635 | " 21.83 | \n",
1636 | " -58.0 | \n",
1637 | " 0.000962 | \n",
1638 | " 0 | \n",
1639 | " 843 | \n",
1640 | " 914.0 | \n",
1641 | " 709 | \n",
1642 | " 4.0 | \n",
1643 | "
\n",
1644 | " \n",
1645 | " | 1 | \n",
1646 | " 2 | \n",
1647 | " 1.0 | \n",
1648 | " 1 | \n",
1649 | " female | \n",
1650 | " 38.0 | \n",
1651 | " 1 | \n",
1652 | " 0 | \n",
1653 | " 71.2833 | \n",
1654 | " C | \n",
1655 | " 0.739837 | \n",
1656 | " ... | \n",
1657 | " 0.004235 | \n",
1658 | " 5.667830 | \n",
1659 | " 37.58 | \n",
1660 | " -33.0 | \n",
1661 | " 0.005544 | \n",
1662 | " 1 | \n",
1663 | " 466 | \n",
1664 | " 270.0 | \n",
1665 | " 323 | \n",
1666 | " 7.0 | \n",
1667 | "
\n",
1668 | " \n",
1669 | " | 2 | \n",
1670 | " 3 | \n",
1671 | " 1.0 | \n",
1672 | " 3 | \n",
1673 | " female | \n",
1674 | " 26.0 | \n",
1675 | " 0 | \n",
1676 | " 0 | \n",
1677 | " 7.9250 | \n",
1678 | " S | \n",
1679 | " 0.739837 | \n",
1680 | " ... | \n",
1681 | " 0.000317 | \n",
1682 | " -3.245205 | \n",
1683 | " 25.83 | \n",
1684 | " -54.0 | \n",
1685 | " 0.001137 | \n",
1686 | " 2 | \n",
1687 | " 466 | \n",
1688 | " 914.0 | \n",
1689 | " 709 | \n",
1690 | " 5.0 | \n",
1691 | "
\n",
1692 | " \n",
1693 | " | 3 | \n",
1694 | " 4 | \n",
1695 | " 1.0 | \n",
1696 | " 1 | \n",
1697 | " female | \n",
1698 | " 35.0 | \n",
1699 | " 1 | \n",
1700 | " 0 | \n",
1701 | " 53.1000 | \n",
1702 | " S | \n",
1703 | " 0.739837 | \n",
1704 | " ... | \n",
1705 | " 0.002121 | \n",
1706 | " 5.754795 | \n",
1707 | " 34.83 | \n",
1708 | " -45.0 | \n",
1709 | " 0.001530 | \n",
1710 | " 1 | \n",
1711 | " 466 | \n",
1712 | " 914.0 | \n",
1713 | " 323 | \n",
1714 | " 7.0 | \n",
1715 | "
\n",
1716 | " \n",
1717 | " | 4 | \n",
1718 | " 5 | \n",
1719 | " 0.0 | \n",
1720 | " 3 | \n",
1721 | " male | \n",
1722 | " 35.0 | \n",
1723 | " 0 | \n",
1724 | " 0 | \n",
1725 | " 8.0500 | \n",
1726 | " S | \n",
1727 | " 0.195279 | \n",
1728 | " ... | \n",
1729 | " 0.000322 | \n",
1730 | " 5.754795 | \n",
1731 | " 34.83 | \n",
1732 | " -45.0 | \n",
1733 | " 0.001530 | \n",
1734 | " 0 | \n",
1735 | " 843 | \n",
1736 | " 914.0 | \n",
1737 | " 709 | \n",
1738 | " 7.0 | \n",
1739 | "
\n",
1740 | " \n",
1741 | "
\n",
1742 | "
5 rows × 76 columns
\n",
1743 | "
"
1744 | ],
1745 | "text/plain": [
1746 | " PassengerId label Pclass Sex Age SibSp Parch Fare Embarked \\\n",
1747 | "0 1 0.0 3 male 22.0 1 0 7.2500 S \n",
1748 | "1 2 1.0 1 female 38.0 1 0 71.2833 C \n",
1749 | "2 3 1.0 3 female 26.0 0 0 7.9250 S \n",
1750 | "3 4 1.0 1 female 35.0 1 0 53.1000 S \n",
1751 | "4 5 0.0 3 male 35.0 0 0 8.0500 S \n",
1752 | "\n",
1753 | " Sex_target_enc ... Fare/sum_gb_Embarked Age-mean_gb_Embarked \\\n",
1754 | "0 0.195279 ... 0.000290 -7.245205 \n",
1755 | "1 0.739837 ... 0.004235 5.667830 \n",
1756 | "2 0.739837 ... 0.000317 -3.245205 \n",
1757 | "3 0.739837 ... 0.002121 5.754795 \n",
1758 | "4 0.195279 ... 0.000322 5.754795 \n",
1759 | "\n",
1760 | " Age-min_gb_Embarked Age-max_gb_Embarked Age/sum_gb_Embarked Pclass_Sex \\\n",
1761 | "0 21.83 -58.0 0.000962 0 \n",
1762 | "1 37.58 -33.0 0.005544 1 \n",
1763 | "2 25.83 -54.0 0.001137 2 \n",
1764 | "3 34.83 -45.0 0.001530 1 \n",
1765 | "4 34.83 -45.0 0.001530 0 \n",
1766 | "\n",
1767 | " Sex_COUNT Embarked_COUNT Pclass_COUNT Age_bin \n",
1768 | "0 843 914.0 709 4.0 \n",
1769 | "1 466 270.0 323 7.0 \n",
1770 | "2 466 914.0 709 5.0 \n",
1771 | "3 466 914.0 323 7.0 \n",
1772 | "4 843 914.0 709 7.0 \n",
1773 | "\n",
1774 | "[5 rows x 76 columns]"
1775 | ]
1776 | },
1777 | "execution_count": 14,
1778 | "metadata": {},
1779 | "output_type": "execute_result"
1780 | }
1781 | ],
1782 | "source": [
1783 | "#分箱,等频、等宽\n",
1784 | "def bucket(data,count_feas,bucket_len):\n",
1785 | " #通过除法映射到间隔均匀的分箱中,每个分箱的取值范围都是Age/10,相当于宽度10等宽分箱\n",
1786 | " for fea in tqdm(count_feas,desc='分箱特征'):\n",
1787 | " data['{}_bin'.format(fea)] = np.floor_divide(data[fea], bucket_len)\n",
1788 | "\n",
1789 | "count_feas = ['Age']\n",
1790 | "bucket(data,count_feas,5)\n",
1791 | "data.head()"
1792 | ]
1793 | },
1794 | {
1795 | "cell_type": "code",
1796 | "execution_count": 15,
1797 | "metadata": {},
1798 | "outputs": [
1799 | {
1800 | "name": "stderr",
1801 | "output_type": "stream",
1802 | "text": [
1803 | "100%|██████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 1001.27it/s]\n"
1804 | ]
1805 | },
1806 | {
1807 | "data": {
1808 | "text/html": [
1809 | "\n",
1810 | "\n",
1823 | "
\n",
1824 | " \n",
1825 | " \n",
1826 | " | \n",
1827 | " PassengerId | \n",
1828 | " label | \n",
1829 | " Pclass | \n",
1830 | " Sex | \n",
1831 | " Age | \n",
1832 | " SibSp | \n",
1833 | " Parch | \n",
1834 | " Fare | \n",
1835 | " Embarked | \n",
1836 | " Sex_target_enc | \n",
1837 | " ... | \n",
1838 | " Age-mean_gb_Embarked | \n",
1839 | " Age-min_gb_Embarked | \n",
1840 | " Age-max_gb_Embarked | \n",
1841 | " Age/sum_gb_Embarked | \n",
1842 | " Pclass_Sex | \n",
1843 | " Sex_COUNT | \n",
1844 | " Embarked_COUNT | \n",
1845 | " Pclass_COUNT | \n",
1846 | " Age_bin | \n",
1847 | " Sex_label_encoder | \n",
1848 | "
\n",
1849 | " \n",
1850 | " \n",
1851 | " \n",
1852 | " | 0 | \n",
1853 | " 1 | \n",
1854 | " 0.0 | \n",
1855 | " 3 | \n",
1856 | " male | \n",
1857 | " 22.0 | \n",
1858 | " 1 | \n",
1859 | " 0 | \n",
1860 | " 7.2500 | \n",
1861 | " S | \n",
1862 | " 0.195279 | \n",
1863 | " ... | \n",
1864 | " -7.245205 | \n",
1865 | " 21.83 | \n",
1866 | " -58.0 | \n",
1867 | " 0.000962 | \n",
1868 | " 0 | \n",
1869 | " 843 | \n",
1870 | " 914.0 | \n",
1871 | " 709 | \n",
1872 | " 4.0 | \n",
1873 | " 1 | \n",
1874 | "
\n",
1875 | " \n",
1876 | " | 1 | \n",
1877 | " 2 | \n",
1878 | " 1.0 | \n",
1879 | " 1 | \n",
1880 | " female | \n",
1881 | " 38.0 | \n",
1882 | " 1 | \n",
1883 | " 0 | \n",
1884 | " 71.2833 | \n",
1885 | " C | \n",
1886 | " 0.739837 | \n",
1887 | " ... | \n",
1888 | " 5.667830 | \n",
1889 | " 37.58 | \n",
1890 | " -33.0 | \n",
1891 | " 0.005544 | \n",
1892 | " 1 | \n",
1893 | " 466 | \n",
1894 | " 270.0 | \n",
1895 | " 323 | \n",
1896 | " 7.0 | \n",
1897 | " 0 | \n",
1898 | "
\n",
1899 | " \n",
1900 | " | 2 | \n",
1901 | " 3 | \n",
1902 | " 1.0 | \n",
1903 | " 3 | \n",
1904 | " female | \n",
1905 | " 26.0 | \n",
1906 | " 0 | \n",
1907 | " 0 | \n",
1908 | " 7.9250 | \n",
1909 | " S | \n",
1910 | " 0.739837 | \n",
1911 | " ... | \n",
1912 | " -3.245205 | \n",
1913 | " 25.83 | \n",
1914 | " -54.0 | \n",
1915 | " 0.001137 | \n",
1916 | " 2 | \n",
1917 | " 466 | \n",
1918 | " 914.0 | \n",
1919 | " 709 | \n",
1920 | " 5.0 | \n",
1921 | " 0 | \n",
1922 | "
\n",
1923 | " \n",
1924 | " | 3 | \n",
1925 | " 4 | \n",
1926 | " 1.0 | \n",
1927 | " 1 | \n",
1928 | " female | \n",
1929 | " 35.0 | \n",
1930 | " 1 | \n",
1931 | " 0 | \n",
1932 | " 53.1000 | \n",
1933 | " S | \n",
1934 | " 0.739837 | \n",
1935 | " ... | \n",
1936 | " 5.754795 | \n",
1937 | " 34.83 | \n",
1938 | " -45.0 | \n",
1939 | " 0.001530 | \n",
1940 | " 1 | \n",
1941 | " 466 | \n",
1942 | " 914.0 | \n",
1943 | " 323 | \n",
1944 | " 7.0 | \n",
1945 | " 0 | \n",
1946 | "
\n",
1947 | " \n",
1948 | " | 4 | \n",
1949 | " 5 | \n",
1950 | " 0.0 | \n",
1951 | " 3 | \n",
1952 | " male | \n",
1953 | " 35.0 | \n",
1954 | " 0 | \n",
1955 | " 0 | \n",
1956 | " 8.0500 | \n",
1957 | " S | \n",
1958 | " 0.195279 | \n",
1959 | " ... | \n",
1960 | " 5.754795 | \n",
1961 | " 34.83 | \n",
1962 | " -45.0 | \n",
1963 | " 0.001530 | \n",
1964 | " 0 | \n",
1965 | " 843 | \n",
1966 | " 914.0 | \n",
1967 | " 709 | \n",
1968 | " 7.0 | \n",
1969 | " 1 | \n",
1970 | "
\n",
1971 | " \n",
1972 | "
\n",
1973 | "
5 rows × 77 columns
\n",
1974 | "
"
1975 | ],
1976 | "text/plain": [
1977 | " PassengerId label Pclass Sex Age SibSp Parch Fare Embarked \\\n",
1978 | "0 1 0.0 3 male 22.0 1 0 7.2500 S \n",
1979 | "1 2 1.0 1 female 38.0 1 0 71.2833 C \n",
1980 | "2 3 1.0 3 female 26.0 0 0 7.9250 S \n",
1981 | "3 4 1.0 1 female 35.0 1 0 53.1000 S \n",
1982 | "4 5 0.0 3 male 35.0 0 0 8.0500 S \n",
1983 | "\n",
1984 | " Sex_target_enc ... Age-mean_gb_Embarked Age-min_gb_Embarked \\\n",
1985 | "0 0.195279 ... -7.245205 21.83 \n",
1986 | "1 0.739837 ... 5.667830 37.58 \n",
1987 | "2 0.739837 ... -3.245205 25.83 \n",
1988 | "3 0.739837 ... 5.754795 34.83 \n",
1989 | "4 0.195279 ... 5.754795 34.83 \n",
1990 | "\n",
1991 | " Age-max_gb_Embarked Age/sum_gb_Embarked Pclass_Sex Sex_COUNT \\\n",
1992 | "0 -58.0 0.000962 0 843 \n",
1993 | "1 -33.0 0.005544 1 466 \n",
1994 | "2 -54.0 0.001137 2 466 \n",
1995 | "3 -45.0 0.001530 1 466 \n",
1996 | "4 -45.0 0.001530 0 843 \n",
1997 | "\n",
1998 | " Embarked_COUNT Pclass_COUNT Age_bin Sex_label_encoder \n",
1999 | "0 914.0 709 4.0 1 \n",
2000 | "1 270.0 323 7.0 0 \n",
2001 | "2 914.0 709 5.0 0 \n",
2002 | "3 914.0 323 7.0 0 \n",
2003 | "4 914.0 709 7.0 1 \n",
2004 | "\n",
2005 | "[5 rows x 77 columns]"
2006 | ]
2007 | },
2008 | "execution_count": 15,
2009 | "metadata": {},
2010 | "output_type": "execute_result"
2011 | }
2012 | ],
2013 | "source": [
2014 | "# 基本的类别特征转换方法label_encode\n",
2015 | "from sklearn.preprocessing import LabelEncoder\n",
2016 | "for col in tqdm(['Sex']):\n",
2017 | " le = LabelEncoder()\n",
2018 | " le.fit(data[col])\n",
2019 | " data[col+'_label_encoder'] = le.transform(data[col])\n",
2020 | " # test[col] = le.transform(test[col])\n",
2021 | "data.head()\n"
2022 | ]
2023 | },
2024 | {
2025 | "cell_type": "code",
2026 | "execution_count": null,
2027 | "metadata": {},
2028 | "outputs": [],
2029 | "source": [
2030 | "#构建序列特征,例如构建每个用户的登录行为序列\n",
2031 | "#例如一个用户会存在多个样本,对用户groupby获取登录序列,拼起来作为新特征\n",
2032 | "launch_grp = pd.DataFrame()\n",
2033 | "\n",
2034 | "user_id = []\n",
2035 | "launch_date_str = []\n",
2036 | "for i in launch.groupby('user_id'):\n",
2037 | " launch_date = []\n",
2038 | " user_id.append(i[0])\n",
2039 | " for j in i[1]['date']:\n",
2040 | " launch_date.append(j)\n",
2041 | " launch_date_str.append(str(launch_date))\n",
2042 | "launch_grp['user_id'] = list(user_id)\n",
2043 | "launch_grp['launch_date_str'] = list(launch_date_str)\n",
2044 | "launch_grp.head()"
2045 | ]
2046 | },
2047 | {
2048 | "cell_type": "code",
2049 | "execution_count": null,
2050 | "metadata": {},
2051 | "outputs": [],
2052 | "source": [
2053 | "#对上述序列或文本特征进行w2v,构建embedding特征\n",
2054 | "from gensim.models.word2vec import Word2Vec\n",
2055 | "\n",
2056 | "data['tagid'] = data['tagid'].apply(lambda x: eval(x))\n",
2057 | "sentences = data['tagid'].values.tolist()\n",
2058 | "for i in range(len(sentences)):\n",
2059 | " sentences[i] = [str(x) for x in sentences[i]] #将每个tagid转化成str格式\n",
2060 | "\n",
2061 | "#训练数据格式如下\n",
2062 | "#sentences=[['外形', '外观', '好看', '屏幕', '特别'], ['手机', '好看', '段时间'], ['手机', '很漂亮', '评价']]\n",
2063 | "\n",
2064 | "emb_size = 32\n",
2065 | "#model = Word2Vec(sentences,vector_size=emb_size, window=6, min_count=5, sg=0, hs=0, seed=1,epochs=5)\n",
2066 | "model = Word2Vec.load('./w2vmodel/w2vmodel.model')\n",
2067 | "emb_matrix = []\n",
2068 | "for seq in sentences:\n",
2069 | " vec = []\n",
2070 | " for w in seq:\n",
2071 | "# if w in model.wv.vocab:\n",
2072 | "# vec.append(model.wv[w])\n",
2073 | " try:\n",
2074 | " vec.append(model.wv[w])\n",
2075 | " except KeyError:\n",
2076 | " continue\n",
2077 | " \n",
2078 | " if len(vec) > 0:\n",
2079 | " emb_matrix.append(np.mean(vec, axis=0))\n",
2080 | " else:\n",
2081 | " emb_matrix.append([0] * emb_size)\n",
2082 | "emb_matrix = np.array(emb_matrix)\n",
2083 | "for i in range(emb_size):\n",
2084 | " data['tag_emb_{}'.format(i)] = emb_matrix[:, i]"
2085 | ]
2086 | },
2087 | {
2088 | "cell_type": "code",
2089 | "execution_count": null,
2090 | "metadata": {},
2091 | "outputs": [],
2092 | "source": [
2093 | "#想象力特征"
2094 | ]
2095 | }
2096 | ],
2097 | "metadata": {
2098 | "kernelspec": {
2099 | "display_name": "Python 3",
2100 | "language": "python",
2101 | "name": "python3"
2102 | },
2103 | "language_info": {
2104 | "codemirror_mode": {
2105 | "name": "ipython",
2106 | "version": 3
2107 | },
2108 | "file_extension": ".py",
2109 | "mimetype": "text/x-python",
2110 | "name": "python",
2111 | "nbconvert_exporter": "python",
2112 | "pygments_lexer": "ipython3",
2113 | "version": "3.8.5"
2114 | },
2115 | "toc": {
2116 | "base_numbering": 1,
2117 | "nav_menu": {},
2118 | "number_sections": true,
2119 | "sideBar": true,
2120 | "skip_h1_title": false,
2121 | "title_cell": "Table of Contents",
2122 | "title_sidebar": "Contents",
2123 | "toc_cell": false,
2124 | "toc_position": {},
2125 | "toc_section_display": true,
2126 | "toc_window_display": false
2127 | }
2128 | },
2129 | "nbformat": 4,
2130 | "nbformat_minor": 2
2131 | }
2132 |
--------------------------------------------------------------------------------