├── dataset ├── alsd.png ├── artist_alias_small.txt ├── test.csv └── train.csv ├── .gitignore ├── README.md └── notebooks ├── spark-ml-recommendation-implicit.ipynb ├── spark-ml-gbt-pipeline.ipynb ├── spark-ml-recommendation-explicit.ipynb ├── spark-ml-clustering.ipynb └── spark-ml-starter.ipynb /dataset/alsd.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Swalloow/pyspark-ml-examples/HEAD/dataset/alsd.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | spark-warehouse/ 3 | *.crc 4 | *.gzip 5 | .ipynb_checkpoints/ 6 | .cache/ 7 | .ipython/ 8 | .local/ 9 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pyspark-ml-examples 2 | Spark ML Tutorial and Examples for Beginners 3 | 4 | ## How to start 5 | Use Docker Images : https://hub.docker.com/r/jupyter/pyspark-notebook/ 6 | 7 | ``` 8 | docker run -it --rm -p 8888:8888 --name jupyter \ 9 | -v /YOUR_DOWNLOAD_PATH/pyspark-ml-examples:/home/jovyan jupyter/pyspark-notebook start-notebook.sh 10 | ``` 11 | 12 | ## Index 13 | - **spark-ml-starter**: EDA, Preprocessing, Modeling, Evaluation, Tuning 14 | - **spark-ml-gbt-pipeline**: GBTClassifier, Pipeline 15 | - **spark-ml-recommendation-explicit**: Movie recommendation with Explicit Collaborative Filtering 16 | - **spark-ml-recommendation-implicit**: Music recommendation with Implicit Collaborative Filtering 17 | - **spark-ml-clustering**: Anomaly Detection in Network Trac with K-means Clustering 18 | 19 | ## Dataset 20 | - Kaggle Titanic Dataset: https://www.kaggle.com/c/titanic/data 21 | - MovieLens Dataset: https://grouplens.org/datasets/movielens/100k/ 22 | - Last.fm Music Dataset: http://www.dtic.upf.edu/~ocelma/MusicRecommendationDataset/lastfm-1K.html 23 | - KDD Cup 1999 Dataset: http://www.kdd.org/kdd-cup/view/kdd-cup-1999/Data 24 | -------------------------------------------------------------------------------- /notebooks/spark-ml-recommendation-implicit.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Spark ML Music Recommendation (Implicit)\n", 8 | "\n", 9 | "- dataset: http://www.dtic.upf.edu/~ocelma/MusicRecommendationDataset/lastfm-1K.html" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "from pyspark.conf import SparkConf\n", 19 | "from pyspark import StorageLevel\n", 20 | "\n", 21 | "from pyspark.sql import SparkSession\n", 22 | "from pyspark.sql.functions import *\n", 23 | "from pyspark.sql.types import *" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 2, 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "spark = SparkSession.builder \\\n", 33 | " .master(\"local\") \\\n", 34 | " .appName(\"Spark ML\") \\\n", 35 | " .getOrCreate()\n", 36 | "\n", 37 | "sc = spark.sparkContext\n", 38 | "sc.setLogLevel(\"INFO\")" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 3, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "schema = StructType([\n", 48 | " StructField(\"userId\", IntegerType()),\n", 49 | " StructField(\"artistId\", IntegerType()),\n", 50 | " StructField(\"count\", IntegerType())\n", 51 | "])" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": 4, 57 | "metadata": {}, 58 | "outputs": [ 59 | { 60 | "name": "stdout", 61 | "output_type": "stream", 62 | "text": [ 63 | "+-------+--------+-----+\n", 64 | "| userId|artistId|count|\n", 65 | "+-------+--------+-----+\n", 66 | "|1059637| 1000010| 238|\n", 67 | "|1059637| 1000049| 1|\n", 68 | "|1059637| 1000056| 1|\n", 69 | "|1059637| 1000062| 11|\n", 70 | "|1059637| 1000094| 1|\n", 71 | "|1059637| 1000112| 423|\n", 72 | "|1059637| 1000113| 5|\n", 73 | "|1059637| 1000114| 2|\n", 74 | "|1059637| 1000123| 2|\n", 75 | "|1059637| 1000130|19129|\n", 76 | "+-------+--------+-----+\n", 77 | "only showing top 10 rows\n", 78 | "\n" 79 | ] 80 | } 81 | ], 82 | "source": [ 83 | "df = spark.read.csv(\"../dataset/user_artist_data_small.txt\", schema=schema, sep=\" \").cache()\n", 84 | "df.show(10)" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": 5, 90 | "metadata": {}, 91 | "outputs": [ 92 | { 93 | "name": "stdout", 94 | "output_type": "stream", 95 | "text": [ 96 | "+-------+------------------+------------------+------------------+\n", 97 | "|summary| userId| artistId| count|\n", 98 | "+-------+------------------+------------------+------------------+\n", 99 | "| count| 49481| 49481| 49481|\n", 100 | "| mean|1328420.1949435137|2003155.0297285826|130.57579677047755|\n", 101 | "| stddev| 452991.3131262286|2489609.3644763026|3034.3847545693047|\n", 102 | "| min| 1000647| 1| 1|\n", 103 | "| max| 2288164| 10788218| 439771|\n", 104 | "+-------+------------------+------------------+------------------+\n", 105 | "\n" 106 | ] 107 | } 108 | ], 109 | "source": [ 110 | "df.describe().show()" 111 | ] 112 | }, 113 | { 114 | "cell_type": "markdown", 115 | "metadata": {}, 116 | "source": [ 117 | "## Alternating Least Square" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": 6, 123 | "metadata": {}, 124 | "outputs": [], 125 | "source": [ 126 | "from pyspark.ml.recommendation import ALS\n", 127 | "from pyspark.ml.tuning import TrainValidationSplit, ParamGridBuilder" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": 7, 133 | "metadata": {}, 134 | "outputs": [], 135 | "source": [ 136 | "(train, test) = df.randomSplit([0.8, 0.2])" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": 8, 142 | "metadata": {}, 143 | "outputs": [ 144 | { 145 | "name": "stdout", 146 | "output_type": "stream", 147 | "text": [ 148 | "+-------+--------------------+\n", 149 | "| id| features|\n", 150 | "+-------+--------------------+\n", 151 | "|1001440|[0.0, 0.0, 0.0, 0...|\n", 152 | "|1017610|[0.0, 0.0, 0.0, 0...|\n", 153 | "|1021940|[0.0, 0.0, 0.0, 0...|\n", 154 | "|1058890|[0.0, 0.0, 0.0, 0...|\n", 155 | "|2005710|[0.0, 0.0, 0.0, 0...|\n", 156 | "+-------+--------------------+\n", 157 | "only showing top 5 rows\n", 158 | "\n" 159 | ] 160 | } 161 | ], 162 | "source": [ 163 | "als = ALS(userCol=\"userId\", itemCol=\"artistId\", ratingCol=\"count\") \\\n", 164 | " .setColdStartStrategy(\"drop\") \\\n", 165 | " .setNonnegative(True) \\\n", 166 | " .setRank(14) \\\n", 167 | " .setMaxIter(20) \\\n", 168 | " .setRegParam(.17) \\\n", 169 | " .setAlpha(3.0)\n", 170 | "\n", 171 | "alsModel = als.fit(train)\n", 172 | "alsModel.userFactors.show(5)" 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": 9, 178 | "metadata": {}, 179 | "outputs": [ 180 | { 181 | "name": "stdout", 182 | "output_type": "stream", 183 | "text": [ 184 | "+-------+--------+-----+----------+\n", 185 | "| userId|artistId|count|prediction|\n", 186 | "+-------+--------+-----+----------+\n", 187 | "|1000647| 1035554| 1| 0.0|\n", 188 | "|1000647| 1598| 1| 0.0|\n", 189 | "|1000647| 1004088| 1| 0.0|\n", 190 | "|1000647| 4037| 2| 0.0|\n", 191 | "|1000647| 1002220| 5| 0.0|\n", 192 | "+-------+--------+-----+----------+\n", 193 | "only showing top 5 rows\n", 194 | "\n" 195 | ] 196 | } 197 | ], 198 | "source": [ 199 | "predictions = alsModel.transform(test)\n", 200 | "predictions.sort(\"userId\", \"count\").show(5)" 201 | ] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "execution_count": 10, 206 | "metadata": {}, 207 | "outputs": [], 208 | "source": [ 209 | "spark.stop()" 210 | ] 211 | } 212 | ], 213 | "metadata": { 214 | "kernelspec": { 215 | "display_name": "Python 3", 216 | "language": "python", 217 | "name": "python3" 218 | }, 219 | "language_info": { 220 | "codemirror_mode": { 221 | "name": "ipython", 222 | "version": 3 223 | }, 224 | "file_extension": ".py", 225 | "mimetype": "text/x-python", 226 | "name": "python", 227 | "nbconvert_exporter": "python", 228 | "pygments_lexer": "ipython3", 229 | "version": "3.6.3" 230 | } 231 | }, 232 | "nbformat": 4, 233 | "nbformat_minor": 2 234 | } 235 | -------------------------------------------------------------------------------- /notebooks/spark-ml-gbt-pipeline.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Spark ML GBTClassifier + Pipeline" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "from pyspark.sql import SparkSession\n", 17 | "from pyspark.conf import SparkConf\n", 18 | "from pyspark import StorageLevel" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "spark = SparkSession.builder \\\n", 28 | " .master(\"local\") \\\n", 29 | " .appName(\"Spark ML\") \\\n", 30 | " .getOrCreate()\n", 31 | "\n", 32 | "sc = spark.sparkContext\n", 33 | "sc.setLogLevel(\"INFO\")" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": {}, 39 | "source": [ 40 | "---\n", 41 | "## Pipelines\n", 42 | "\n", 43 | "- http://spark.apache.org/docs/latest/api/python/pyspark.ml.html#module-pyspark.ml\n", 44 | "- 복잡한 ML 과정들을 파이프라인으로 모듈화 시킬 수 있도록 도와주는 패키지\n", 45 | "\n", 46 | "#### Transformer\n", 47 | "\n", 48 | "- DataFrame을 **lazily** 하게 또 다른 DataFrame으로 변형, `transform()` 메서드 구현\n", 49 | "- Feature Engineering에 필요한 알고리즘들뿐만 아니라, 이미 학습이 끝난 Model도 이에 해당\n", 50 | "\n", 51 | "---\n", 52 | "#### Estimator\n", 53 | "\n", 54 | "- DataFrame을 model에 fitting 시키는 단계, 학습시키는 알고리즘이 모두 이에 해당\n", 55 | "- 예를 들면 `LogisticRegression`은 `Estimator`에 해당\n", 56 | "- `fit()` 함수를 호출하여 생성된 `LogisticRegressionModel`은 `Model`이자 `Transformer`\n", 57 | "\n", 58 | "---\n", 59 | "#### Pipeline\n", 60 | "\n", 61 | "- ML을 돌리기 위해 필요한 stage를 연결시킨 구현체\n", 62 | "- `Transformer`, `Estimator`가 Pipeline의 각 stage에 해당\n", 63 | "- `Pipeline.fit()`을 호출하면 각 단계에서 지정한 함수를 순서대로 호출\n", 64 | "- 마찬가지로 `PipelineModel`은 `fit()` 함수를 호출하여 생성된 `Model`" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": 3, 70 | "metadata": {}, 71 | "outputs": [], 72 | "source": [ 73 | "from pyspark.sql.functions import *\n", 74 | "from pyspark.sql.types import IntegerType\n", 75 | "\n", 76 | "from pyspark.ml.pipeline import Pipeline\n", 77 | "from pyspark.ml.feature import StandardScaler, VectorAssembler, Imputer, StringIndexer\n", 78 | "from pyspark.ml.classification import GBTClassifier\n", 79 | "from pyspark.ml.tuning import CrossValidator\n", 80 | "from pyspark.ml.tuning import ParamGridBuilder\n", 81 | "from pyspark.ml.evaluation import MulticlassClassificationEvaluator" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": 4, 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "# K-Fold value\n", 91 | "fold = 3\n", 92 | "\n", 93 | "# Read train, test dataset\n", 94 | "inputCols = ['Pclass', 'age_im', 'SibSp', 'Parch', 'Fare', 'embarked_ix', 'sex_ix', 'len_name']\n", 95 | "str_length = udf(lambda x: len(x), IntegerType())\n", 96 | "\n", 97 | "train = spark.read \\\n", 98 | " .csv(\"../dataset/train.csv\", header=True, inferSchema=True) \\\n", 99 | " .withColumnRenamed(\"Survived\", \"label\") \\\n", 100 | " .withColumn('len_name', str_length(col('name'))) \\\n", 101 | " .na.drop(subset=[\"Embarked\", \"Fare\"]) \\\n", 102 | " .cache()\n", 103 | "\n", 104 | "test = spark.read \\\n", 105 | " .csv(\"../dataset/test.csv\", header=True, inferSchema=True) \\\n", 106 | " .withColumnRenamed(\"Survived\", \"label\") \\\n", 107 | " .withColumn('len_name', str_length(col('name'))) \\\n", 108 | " .na.drop(subset=[\"Embarked\", \"Fare\"]) \\\n", 109 | " .cache()" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": 5, 115 | "metadata": {}, 116 | "outputs": [ 117 | { 118 | "name": "stdout", 119 | "output_type": "stream", 120 | "text": [ 121 | "Make pipeline, model fitting...\n", 122 | "Model training finished!\n", 123 | "Cross-validation average score : 0.8117291437383991\n", 124 | "Best maxDepth parameters : 5\n", 125 | "Best maxIter parameters : 40\n", 126 | "Best maxBins parameters : 25\n" 127 | ] 128 | } 129 | ], 130 | "source": [ 131 | "# Define operators\n", 132 | "imputer = Imputer(inputCols=['Age'], outputCols=['age_im'], strategy='mean')\n", 133 | "sex_ix = StringIndexer(inputCol='Sex', outputCol='sex_ix')\n", 134 | "embarked_ix = StringIndexer(inputCol='Embarked', outputCol='embarked_ix')\n", 135 | "assembler = VectorAssembler(inputCols=inputCols, outputCol='features')\n", 136 | "scaler = StandardScaler(inputCol='features', outputCol='scaled_features', withStd=True, withMean=False)\n", 137 | "model = GBTClassifier(labelCol='label', featuresCol='scaled_features', cacheNodeIds=True)\n", 138 | "evaluator = MulticlassClassificationEvaluator(predictionCol='prediction', labelCol='label', metricName='accuracy')\n", 139 | "\n", 140 | "# Pipeline\n", 141 | "print(\"Make pipeline, model fitting...\")\n", 142 | "pipeline = Pipeline(\n", 143 | " stages=[imputer, sex_ix, embarked_ix, assembler, scaler, model])\n", 144 | "\n", 145 | "# K-Fold Cross-validation with Parameter tuning\n", 146 | "paramGrid = ParamGridBuilder() \\\n", 147 | " .addGrid(model.maxDepth, [5, 7]) \\\n", 148 | " .addGrid(model.maxIter, [20, 40]) \\\n", 149 | " .addGrid(model.maxBins, [25]) \\\n", 150 | " .addGrid(model.stepSize, [0.025]) \\\n", 151 | " .addGrid(model.subsamplingRate, [0.7]) \\\n", 152 | " .build()\n", 153 | "\n", 154 | "# Fold 3 * Param 4 = 12\n", 155 | "cv = CrossValidator(estimator=pipeline,\n", 156 | " evaluator=evaluator,\n", 157 | " estimatorParamMaps=paramGrid,\n", 158 | " numFolds=fold)\n", 159 | "\n", 160 | "# Model training\n", 161 | "cvModel = cv.fit(train)\n", 162 | "bestModel = cvModel.bestModel\n", 163 | "\n", 164 | "print(\"Model training finished!\")\n", 165 | "print(\"Cross-validation average score : {}\".format(cvModel.avgMetrics[0]))\n", 166 | "print(\"Best maxDepth parameters : {}\".format(bestModel.stages[5]._java_obj.getMaxDepth()))\n", 167 | "print(\"Best maxIter parameters : {}\".format(bestModel.stages[5]._java_obj.getMaxIter()))\n", 168 | "print(\"Best maxBins parameters : {}\".format(bestModel.stages[5]._java_obj.getMaxBins()))" 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": 6, 174 | "metadata": {}, 175 | "outputs": [ 176 | { 177 | "name": "stdout", 178 | "output_type": "stream", 179 | "text": [ 180 | "Save to csv finished!\n" 181 | ] 182 | } 183 | ], 184 | "source": [ 185 | "# Save prediction result\n", 186 | "predict = bestModel.transform(test)\n", 187 | "predict.select(\"PassengerId\", \"prediction\") \\\n", 188 | " .coalesce(1).write.mode(\"overwrite\") \\\n", 189 | " .option(\"compression\", \"gzip\") \\\n", 190 | " .csv(\"../dataset/pred_titanic.csv.gzip\", sep=\",\", header=True)\n", 191 | "\n", 192 | "print(\"Save to csv finished!\")" 193 | ] 194 | }, 195 | { 196 | "cell_type": "code", 197 | "execution_count": 7, 198 | "metadata": {}, 199 | "outputs": [ 200 | { 201 | "name": "stdout", 202 | "output_type": "stream", 203 | "text": [ 204 | "Feature importance:\n", 205 | "Pclass = 0.0635219203849\n", 206 | "age_im = 0.196579680469\n", 207 | "SibSp = 0.0553845897311\n", 208 | "Parch = 0.0267508377674\n", 209 | "Fare = 0.240002914139\n", 210 | "embarked_ix = 0.064995764243\n", 211 | "sex_ix = 0.088147713686\n", 212 | "len_name = 0.264616579579\n", 213 | "\n" 214 | ] 215 | } 216 | ], 217 | "source": [ 218 | "# Feature Importance\n", 219 | "featureImportance = bestModel.stages[-1].featureImportances.toArray()\n", 220 | "print(\"Feature importance:\\n{}\\n\".format(\n", 221 | " \"\\n\".join(map(lambda x: \"{} = {}\".format(str(x[0]), str(x[1])), zip(inputCols, featureImportance)))))" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": 8, 227 | "metadata": {}, 228 | "outputs": [], 229 | "source": [ 230 | "spark.stop()" 231 | ] 232 | } 233 | ], 234 | "metadata": { 235 | "kernelspec": { 236 | "display_name": "Python 3", 237 | "language": "python", 238 | "name": "python3" 239 | }, 240 | "language_info": { 241 | "codemirror_mode": { 242 | "name": "ipython", 243 | "version": 3 244 | }, 245 | "file_extension": ".py", 246 | "mimetype": "text/x-python", 247 | "name": "python", 248 | "nbconvert_exporter": "python", 249 | "pygments_lexer": "ipython3", 250 | "version": "3.6.3" 251 | } 252 | }, 253 | "nbformat": 4, 254 | "nbformat_minor": 2 255 | } 256 | -------------------------------------------------------------------------------- /notebooks/spark-ml-recommendation-explicit.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Spark ML Movie Recommendation (Explicit)\n", 8 | "\n", 9 | "- dataset: https://grouplens.org/datasets/movielens/100k/" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "from pyspark.conf import SparkConf\n", 19 | "from pyspark import StorageLevel\n", 20 | "\n", 21 | "from pyspark.sql import SparkSession\n", 22 | "from pyspark.sql.functions import *\n", 23 | "from pyspark.sql.types import *" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 2, 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "spark = SparkSession.builder \\\n", 33 | " .master(\"local\") \\\n", 34 | " .appName(\"Spark ML\") \\\n", 35 | " .getOrCreate()\n", 36 | "\n", 37 | "sc = spark.sparkContext\n", 38 | "sc.setLogLevel(\"INFO\")" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 3, 44 | "metadata": {}, 45 | "outputs": [ 46 | { 47 | "name": "stdout", 48 | "output_type": "stream", 49 | "text": [ 50 | "+------+-------+------+\n", 51 | "|userId|movieId|rating|\n", 52 | "+------+-------+------+\n", 53 | "| 1| 2| 3.5|\n", 54 | "| 1| 29| 3.5|\n", 55 | "| 1| 32| 3.5|\n", 56 | "| 1| 47| 3.5|\n", 57 | "| 1| 50| 3.5|\n", 58 | "| 1| 112| 3.5|\n", 59 | "| 1| 151| 4.0|\n", 60 | "| 1| 223| 4.0|\n", 61 | "| 1| 253| 4.0|\n", 62 | "| 1| 260| 4.0|\n", 63 | "+------+-------+------+\n", 64 | "only showing top 10 rows\n", 65 | "\n" 66 | ] 67 | } 68 | ], 69 | "source": [ 70 | "df = spark.read.csv(\"../dataset/ml-ratings.csv\", inferSchema=True, header=True).cache()\n", 71 | "df.show(10)" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": 4, 77 | "metadata": {}, 78 | "outputs": [ 79 | { 80 | "name": "stdout", 81 | "output_type": "stream", 82 | "text": [ 83 | "+-------+-----------------+------------------+------------------+\n", 84 | "|summary| userId| movieId| rating|\n", 85 | "+-------+-----------------+------------------+------------------+\n", 86 | "| count| 100000| 100000| 100000|\n", 87 | "| mean| 362.8304| 8572.4658| 3.507605|\n", 88 | "| stddev|196.8029033568026|19056.086005583176|1.0629280136183334|\n", 89 | "| min| 1| 1| 0.5|\n", 90 | "| max| 702| 128594| 5.0|\n", 91 | "+-------+-----------------+------------------+------------------+\n", 92 | "\n" 93 | ] 94 | } 95 | ], 96 | "source": [ 97 | "df.describe().show()" 98 | ] 99 | }, 100 | { 101 | "cell_type": "markdown", 102 | "metadata": {}, 103 | "source": [ 104 | "## Alternating Least Square\n", 105 | "\n", 106 | "![](../dataset/alsd.png)" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": 5, 112 | "metadata": {}, 113 | "outputs": [], 114 | "source": [ 115 | "from pyspark.ml.evaluation import RegressionEvaluator\n", 116 | "from pyspark.ml.recommendation import ALS\n", 117 | "from pyspark.ml.tuning import TrainValidationSplit, ParamGridBuilder" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": 6, 123 | "metadata": {}, 124 | "outputs": [], 125 | "source": [ 126 | "(train, test) = df.randomSplit([0.8, 0.2])\n", 127 | "als = ALS(userCol=\"userId\", itemCol=\"movieId\", ratingCol=\"rating\",\n", 128 | " coldStartStrategy=\"drop\", nonnegative=True)" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": 7, 134 | "metadata": {}, 135 | "outputs": [], 136 | "source": [ 137 | "# Hyperparameter\n", 138 | "paramGrid = ParamGridBuilder() \\\n", 139 | " .addGrid(als.rank, [12, 14]) \\\n", 140 | " .addGrid(als.maxIter, [18, 20]) \\\n", 141 | " .addGrid(als.regParam, [.17, .19]) \\\n", 142 | " .build()\n", 143 | "\n", 144 | "# Evaluator RMSE\n", 145 | "evaluator = RegressionEvaluator(\n", 146 | " metricName=\"rmse\", labelCol=\"rating\", \n", 147 | " predictionCol=\"prediction\")" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": 8, 153 | "metadata": {}, 154 | "outputs": [], 155 | "source": [ 156 | "# Cross-Validation\n", 157 | "tvs = TrainValidationSplit(\n", 158 | " estimator=als, \n", 159 | " estimatorParamMaps=paramGrid, \n", 160 | " evaluator=evaluator)" 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": 9, 166 | "metadata": {}, 167 | "outputs": [], 168 | "source": [ 169 | "# Fit ALS model to training data\n", 170 | "model = tvs.fit(train)\n", 171 | "bestModel = model.bestModel" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": 10, 177 | "metadata": {}, 178 | "outputs": [ 179 | { 180 | "name": "stdout", 181 | "output_type": "stream", 182 | "text": [ 183 | "RMSE : 0.8976724722515121\n", 184 | "Best rank : 12\n", 185 | "Best maxIter : 20\n", 186 | "Best regParam : 0.17\n" 187 | ] 188 | } 189 | ], 190 | "source": [ 191 | "predictions = bestModel.transform(test)\n", 192 | "rmse = evaluator.evaluate(predictions)\n", 193 | "\n", 194 | "# Evaluation results\n", 195 | "print(\"RMSE : {}\".format(str(rmse)))\n", 196 | "print(\"Best rank : {}\".format(bestModel.rank))\n", 197 | "print(\"Best maxIter : {}\".format(bestModel._java_obj.parent().getMaxIter()))\n", 198 | "print(\"Best regParam : {}\".format(bestModel._java_obj.parent().getRegParam()))" 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": 11, 204 | "metadata": { 205 | "scrolled": true 206 | }, 207 | "outputs": [ 208 | { 209 | "name": "stdout", 210 | "output_type": "stream", 211 | "text": [ 212 | "+------+-------+------+----------+\n", 213 | "|userId|movieId|rating|prediction|\n", 214 | "+------+-------+------+----------+\n", 215 | "| 1| 1370| 3.0| 3.4125395|\n", 216 | "| 1| 1750| 3.5| 2.4918456|\n", 217 | "| 1| 924| 3.5| 3.4901712|\n", 218 | "| 1| 6755| 3.5| 3.5711613|\n", 219 | "| 1| 1208| 3.5| 3.7742562|\n", 220 | "+------+-------+------+----------+\n", 221 | "only showing top 5 rows\n", 222 | "\n" 223 | ] 224 | } 225 | ], 226 | "source": [ 227 | "predictions.sort(\"userId\", \"rating\").show(5)" 228 | ] 229 | }, 230 | { 231 | "cell_type": "code", 232 | "execution_count": 12, 233 | "metadata": {}, 234 | "outputs": [], 235 | "source": [ 236 | "# Recommendation for all user (matrix R)\n", 237 | "recs = bestModel.recommendForAllUsers(10)" 238 | ] 239 | }, 240 | { 241 | "cell_type": "code", 242 | "execution_count": 13, 243 | "metadata": {}, 244 | "outputs": [], 245 | "source": [ 246 | "# UDF function for mapping result\n", 247 | "zip_ = udf(\n", 248 | " lambda x, y: list(zip(x, y)),\n", 249 | " ArrayType(StructType([\n", 250 | " StructField(\"movieId\", IntegerType()),\n", 251 | " StructField(\"rating\", DoubleType())\n", 252 | " ]))\n", 253 | ")" 254 | ] 255 | }, 256 | { 257 | "cell_type": "code", 258 | "execution_count": 14, 259 | "metadata": {}, 260 | "outputs": [], 261 | "source": [ 262 | "# Recommendation for specific user\n", 263 | "def get_recs_for_user(recs, userId):\n", 264 | " recs = recs.filter(recs[\"userId\"]==userId)\n", 265 | " recs = recs.select(\"userId\", \"recommendations.movieId\", \"recommendations.rating\") \\\n", 266 | " .withColumn(\"tmp\", explode(zip_(\"movieId\", \"rating\"))) \\\n", 267 | " .select(\"userId\", \"tmp.movieId\", \"tmp.rating\")\n", 268 | " return recs" 269 | ] 270 | }, 271 | { 272 | "cell_type": "code", 273 | "execution_count": 15, 274 | "metadata": { 275 | "scrolled": false 276 | }, 277 | "outputs": [ 278 | { 279 | "name": "stdout", 280 | "output_type": "stream", 281 | "text": [ 282 | "+------+-------+------------------+\n", 283 | "|userId|movieId| rating|\n", 284 | "+------+-------+------------------+\n", 285 | "| 11| 449| 5.564600467681885|\n", 286 | "| 11| 31545| 5.513560771942139|\n", 287 | "| 11| 26413| 5.472157001495361|\n", 288 | "| 11| 5004| 5.369581699371338|\n", 289 | "| 11| 7560| 5.316649436950684|\n", 290 | "| 11| 48326| 5.310154438018799|\n", 291 | "| 11| 91529|5.1881818771362305|\n", 292 | "| 11| 751| 5.152843475341797|\n", 293 | "| 11| 66934| 5.130558967590332|\n", 294 | "| 11| 4798| 5.120045185089111|\n", 295 | "+------+-------+------------------+\n", 296 | "\n" 297 | ] 298 | } 299 | ], 300 | "source": [ 301 | "recs = get_recs_for_user(recs, \"11\")\n", 302 | "recs.show()" 303 | ] 304 | }, 305 | { 306 | "cell_type": "code", 307 | "execution_count": 16, 308 | "metadata": {}, 309 | "outputs": [], 310 | "source": [ 311 | "spark.stop()" 312 | ] 313 | } 314 | ], 315 | "metadata": { 316 | "kernelspec": { 317 | "display_name": "Python 3", 318 | "language": "python", 319 | "name": "python3" 320 | }, 321 | "language_info": { 322 | "codemirror_mode": { 323 | "name": "ipython", 324 | "version": 3 325 | }, 326 | "file_extension": ".py", 327 | "mimetype": "text/x-python", 328 | "name": "python", 329 | "nbconvert_exporter": "python", 330 | "pygments_lexer": "ipython3", 331 | "version": "3.6.3" 332 | } 333 | }, 334 | "nbformat": 4, 335 | "nbformat_minor": 2 336 | } 337 | -------------------------------------------------------------------------------- /dataset/artist_alias_small.txt: -------------------------------------------------------------------------------- 1 | 1027859 1252408 2 | 1017615 668 3 | 6745885 1268522 4 | 1018110 1018110 5 | 1014609 1014609 6 | 6713071 2976 7 | 1014175 1014175 8 | 1008798 1008798 9 | 1013851 1013851 10 | 6696814 1030672 11 | 1036747 1239516 12 | 1278781 1021980 13 | 2035175 1007565 14 | 1327067 1308328 15 | 2006482 1140837 16 | 1314530 1237371 17 | 1160800 1345290 18 | 1255401 1055061 19 | 1307351 1055061 20 | 1234249 1005225 21 | 6622310 1094137 22 | 1261919 6977528 23 | 2103190 1002909 24 | 9929875 1009048 25 | 2118737 1011363 26 | 9929864 1000699 27 | 6666813 1305683 28 | 1172822 1127113 29 | 2026635 1001597 30 | 6726078 1018408 31 | 1039896 1277013 32 | 1239168 1266817 33 | 6819291 1277876 34 | 2030690 2060894 35 | 6786886 166 36 | 1051692 1307569 37 | 1239193 1012079 38 | 1291581 78 39 | 6642817 1010969 40 | 1293171 1007614 41 | 1070350 1034635 42 | 6603691 1279932 43 | 1027851 1063053 44 | 2060513 2029258 45 | 1277348 668 46 | 1253023 1033862 47 | 1002892 1002451 48 | 2060435 1256876 49 | 6612396 1301739 50 | 1280154 1021970 51 | 6617155 1039381 52 | 1006102 1034635 53 | 6697417 2013670 54 | 1059007 2653 55 | 2101386 2013670 56 | 1098456 1254644 57 | 6633276 1013675 58 | 162 1332522 59 | 1246265 1010669 60 | 6708991 1009773 61 | 1000110 1034635 62 | 1002566 1034635 63 | 1001864 1001864 64 | 9929533 1000088 65 | 1289246 1023527 66 | 1261152 1007206 67 | 2113342 1134530 68 | 1016805 3195 69 | 1325227 1246524 70 | 1245064 1264 71 | 1015753 1261449 72 | 2164287 10076841 73 | 1044186 10076841 74 | 1006661 1172842 75 | 6639087 974 76 | 1028218 1349406 77 | 9928967 15 78 | 1269139 1003505 79 | 2150015 1018408 80 | 6611952 1269012 81 | 2134206 1062330 82 | 6893915 1017065 83 | 10345702 1017065 84 | 6880926 1017065 85 | 6873763 1259700 86 | 1231677 1294194 87 | 1333467 1156425 88 | 1169681 1651 89 | 1106289 2093800 90 | 6634844 1018408 91 | 2111668 2085 92 | 1038666 1295935 93 | 10112808 3437 94 | 9928973 1000113 95 | 10203303 6618355 96 | 1279723 1007263 97 | 1022552 1249851 98 | 2279441 6834637 99 | 1214254 1262045 100 | 1011272 1246839 101 | 10021668 1250233 102 | 6648707 1088328 103 | 1002139 1018807 104 | 1040536 2073100 105 | 1050544 1002332 106 | 6852428 1035970 107 | 1318457 1002152 108 | 1010410 1013654 109 | 1273591 1598 110 | 2144935 1066433 111 | 1000935 6747938 112 | 6603035 1314538 113 | 2073427 1006475 114 | 1305679 1034635 115 | 6723001 2039323 116 | 6612338 1257158 117 | 15 15 118 | 6843840 1326 119 | 1140506 1271441 120 | 1097968 1004831 121 | 9929045 153 122 | 1265244 3122 123 | 1010155 1252957 124 | 1246508 1013471 125 | 6666470 1349406 126 | 10328673 1956 127 | 3630 6951848 128 | 9919424 234 129 | 10013648 733 130 | 1185593 1028908 131 | 1030955 5452 132 | 1101433 1755 133 | 6979261 1008583 134 | 1199139 166 135 | 9929269 1238242 136 | 1323083 1029530 137 | 6652651 1009454 138 | 2684 1002480 139 | 1266264 1286358 140 | 1299041 1034635 141 | 10107676 118 142 | 6843503 1257158 143 | 10140618 28 144 | 1210088 1104179 145 | 6640761 1254011 146 | 1010284 1018807 147 | 1260442 2060894 148 | 1027472 2036 149 | 2085035 1000236 150 | 1156068 1001859 151 | 1211487 1014738 152 | 1123801 1034202 153 | 9929763 1003778 154 | 1327730 6705745 155 | 1016673 1298111 156 | 9910959 1034635 157 | 6644630 1065358 158 | 1146111 1000123 159 | 9929062 1010646 160 | 6666050 1294194 161 | 1104667 1012935 162 | 6747304 4538 163 | 1262727 2176737 164 | 9931068 1003361 165 | 1024502 1009571 166 | 6777696 1047693 167 | 1047140 1277286 168 | 1329111 1023485 169 | 1010145 1249657 170 | 1321574 71 171 | 1004857 1034635 172 | 2112240 1246983 173 | 1304801 1307569 174 | 10328618 2814 175 | 10227482 1000200 176 | 1341684 71 177 | 2036732 71 178 | 2034497 71 179 | 1338466 71 180 | 1351048 71 181 | 1339315 71 182 | 1009443 1020059 183 | 6927588 1107395 184 | 6755702 1014604 185 | 1037848 1007201 186 | 1321035 1007201 187 | 1051861 1056268 188 | 2066585 1178346 189 | 1003979 1247540 190 | 6606624 1034635 191 | 1210850 2101375 192 | 2154067 1279924 193 | 1292006 1279924 194 | 1100499 1003448 195 | 1159075 1002152 196 | 1016988 1009571 197 | 1300745 5841 198 | 6868142 6866886 199 | 1018155 1015852 200 | 6638483 1195889 201 | 1011730 1239504 202 | 1009499 6730533 203 | 1014145 1009646 204 | 1212985 1301739 205 | 9929600 1004129 206 | 1280087 1295531 207 | 6704224 9964755 208 | 1071257 1236897 209 | 1060739 1263049 210 | 6645431 1013510 211 | 1126370 2114258 212 | 10328567 1000689 213 | 9997128 4303 214 | 1214221 1021115 215 | 6752624 684 216 | 6843863 1326 217 | 10163001 4775 218 | 1244701 1249401 219 | 1330987 1056296 220 | 1038051 6684730 221 | 1007834 1237371 222 | 1293474 1006885 223 | 2099786 2048617 224 | 1302130 1291109 225 | 6738758 2106357 226 | 9929441 1307 227 | 1013011 1276641 228 | 6623536 9916985 229 | 6606825 1014175 230 | 2017616 1007864 231 | 1291230 1236346 232 | 1286507 1137423 233 | 6935408 4349 234 | 6689505 1001655 235 | 1023449 1310185 236 | 2009180 6751847 237 | 1109974 1007063 238 | 10079136 1002328 239 | 1099602 2966 240 | 1015298 1247152 241 | 9931148 1006896 242 | 6666533 1253307 243 | 6667192 1086117 244 | 1080914 1274829 245 | 1003801 1241757 246 | 1049704 1261464 247 | 10092575 1000028 248 | 1334929 1246709 249 | 1291110 1030060 250 | 1055562 1276662 251 | 1090594 1009633 252 | 1252764 1003014 253 | 2058402 1024619 254 | 1029677 9983203 255 | 6671271 1033631 256 | 1327919 9983203 257 | 6827946 9983203 258 | 1270553 1327696 259 | 1000945 1018807 260 | 6786145 300 261 | 6614668 7006467 262 | 10331634 1000048 263 | 9912102 1034635 264 | 1065198 2061677 265 | 1351750 1233610 266 | 1307528 2036704 267 | 1012315 1238836 268 | 1314904 6977528 269 | 1053693 1170206 270 | 1287055 1020615 271 | 7023179 5696 272 | 6963887 1013095 273 | 1252485 1010725 274 | 1079065 1236703 275 | 1027126 1255783 276 | 1274317 1234387 277 | 1012803 2161899 278 | 6666213 3554 279 | 1298276 6875510 280 | 1234344 1012125 281 | 10055114 2051723 282 | 10377598 1010055 283 | 1033104 1027610 284 | 2179213 1111915 285 | 6730134 1271216 286 | 1301746 1056258 287 | 1017322 1277866 288 | 1045804 1247516 289 | 1152469 1009402 290 | 2140188 10334513 291 | 1291960 1266817 292 | 2059804 1008487 293 | 6708740 1089337 294 | 1101793 1044253 295 | 1047491 1003342 296 | 1049384 1008336 297 | 1059884 1288727 298 | 6873850 1300642 299 | 2067429 1034635 300 | 2069589 1234503 301 | 10237528 1235384 302 | 1027009 2004228 303 | 6751850 2070071 304 | 6607841 1015122 305 | 6606625 1034635 306 | 1052722 2797 307 | 6688903 6706174 308 | 6892355 6785079 309 | 6618608 6785079 310 | 1019819 1034635 311 | 9929669 1004347 312 | 6606757 1003888 313 | 2140558 2114264 314 | 6730231 2161931 315 | 1075482 1264703 316 | 2064333 1076507 317 | 1022108 1035334 318 | 6759209 1241695 319 | 1008416 242 320 | 10263339 1008093 321 | 1276810 420 322 | 6622876 2161595 323 | 6670816 2051861 324 | 1254235 1254644 325 | 1305341 1010658 326 | 1039314 1203762 327 | 9919711 9956508 328 | 2082135 1259297 329 | 6635073 1259297 330 | 1300796 2036 331 | 6619918 2140107 332 | 1258892 1244746 333 | 6662497 1327588 334 | 6882695 1013167 335 | 1245000 1028445 336 | 5702 1066440 337 | 1007480 1012243 338 | 1244982 1028445 339 | 1122437 1254299 340 | 2075188 10150610 341 | 1073470 1327647 342 | 10270142 10096874 343 | 9954151 779 344 | 1275001 1028445 345 | 1244994 1028445 346 | 1122824 2148043 347 | 1084265 1065358 348 | 2140580 1002672 349 | 2070673 1169482 350 | 1010636 1239101 351 | 1031417 1265996 352 | 1033536 1008824 353 | 1006162 1048788 354 | 1179492 1246136 355 | 2113453 1003052 356 | 2052613 1088572 357 | 1279110 2035089 358 | 10314684 6716462 359 | 1042508 1008824 360 | 10176206 1014716 361 | 6657341 10361613 362 | 1283015 1001230 363 | 1289264 1013714 364 | 1033391 1239278 365 | 2082602 1043147 366 | 1052982 1011231 367 | 2036251 2043827 368 | 2020235 1246136 369 | 1271924 1018769 370 | 1039828 1018807 371 | 1039174 1075543 372 | 9918754 4497 373 | 1235697 1233982 374 | 1244970 1028445 375 | 6843877 1326 376 | 1115653 1249252 377 | 1127341 1252111 378 | 1012852 6696725 379 | 1130370 2235 380 | 1245134 1007263 381 | 1036143 1063053 382 | 6790420 2129177 383 | 1263808 2070227 384 | 6620802 4377 385 | 1181082 1009583 386 | 1203592 1156425 387 | 1024571 1029592 388 | 1029799 1156425 389 | 1016343 1014340 390 | 754 754 391 | 1016631 1006736 392 | 1138014 1034635 393 | 1002778 393 394 | 1253088 1238269 395 | 6933178 809 396 | 1011293 1489 397 | 6965760 1027610 398 | 1010760 1239516 399 | 1006322 1006322 400 | 1006347 1006347 401 | 1058622 1251812 402 | 6742353 2104058 403 | 6718488 1059264 404 | 1281865 1011316 405 | 1006140 1246817 406 | 1015584 1007658 407 | 9919044 2007 408 | 9937566 809 409 | 9937520 1147975 410 | 1275359 1287322 411 | 2061602 6748393 412 | 6642370 1049114 413 | 1010872 2439 414 | 2126687 1023928 415 | 9930422 1002619 416 | 1000129 5810 417 | 1092585 1002559 418 | 1303182 6837566 419 | 10586534 1008953 420 | 1017671 1015311 421 | 1025954 2040456 422 | 1271505 1003694 423 | 1322886 2105178 424 | 6895260 1308328 425 | 1278418 1255555 426 | 1340193 2797 427 | 2155515 1003105 428 | 1022439 4609 429 | 6632852 1001835 430 | 2100392 1012457 431 | 1108824 1007415 432 | 9929214 1233948 433 | 6703817 1006812 434 | 1007631 1235753 435 | 1039492 1269417 436 | 6926874 1014485 437 | 1002498 3066 438 | 1097119 1016561 439 | 1008455 1020 440 | 1286284 5630 441 | 1266371 1236703 442 | 6779861 1250104 443 | 2025676 1001141 444 | 1023217 2076786 445 | 1011160 1238478 446 | 1340980 1000602 447 | 1006268 1002392 448 | 1197558 1001943 449 | 1340959 1000602 450 | 6677859 670 451 | 2066701 1063426 452 | 1327070 1341919 453 | 10050528 831 454 | 1015209 1167955 455 | 1192222 1025647 456 | 1020004 1025647 457 | 6814996 1002912 458 | 1086572 1233677 459 | 1138325 2023771 460 | 1139033 1011219 461 | 1050349 1043653 462 | 4421 1252779 463 | 6703227 1210979 464 | 1070459 2064199 465 | 709 2003588 466 | 6843892 1012077 467 | 1012388 1254487 468 | 6625714 1060179 469 | 6901494 1006485 470 | 6806131 1002061 471 | 1284393 887 472 | 1023197 1269447 473 | 6604494 6834637 474 | 2131963 1020 475 | 10504380 1070177 476 | 2279509 1322366 477 | 1002670 1034635 478 | 1143149 1093353 479 | 6934479 1256115 480 | 9947015 1292207 481 | 2147002 10077841 482 | 1278109 1036654 483 | 9929574 1008086 484 | 1061384 1252912 485 | 6935936 3016 486 | 2159997 1003313 487 | 1044729 1046148 488 | 1067417 1003694 489 | 1270359 1019163 490 | 1102968 1233982 491 | 1030009 1791 492 | 6766073 1006837 493 | 1026989 1033119 494 | 1267504 1007885 495 | 1134651 659 496 | 1006146 1031646 497 | 1157380 1018266 498 | 1047433 1054273 499 | 6895364 2153903 500 | 6812143 1017092 501 | 1104448 1008824 502 | 1246003 4185 503 | 1255551 1266817 504 | 1326264 1235663 505 | 2164170 1247118 506 | 6923988 2140107 507 | 6606823 1267774 508 | 10198357 1025721 509 | 10014536 1009583 510 | 1038011 718 511 | 9952803 1156794 512 | 1248351 1007614 513 | 1293301 1001307 514 | 1033265 1136358 515 | 9967716 1015426 516 | 1106946 1006039 517 | 1233934 1234700 518 | 6704508 1006919 519 | 6804942 6676351 520 | 2025457 951 521 | 1112427 1080556 522 | 1089486 1090808 523 | 1139806 1039249 524 | 1006788 1003221 525 | 6677618 1028295 526 | 2166168 2025147 527 | 1059441 1039249 528 | 2164352 6922812 529 | 6607809 1034635 530 | 2008447 1034635 531 | 1028259 1237408 532 | 1257324 1240486 533 | 1129917 1039249 534 | 2162103 3195 535 | 6827283 3195 536 | 6835354 1039381 537 | 2031207 2031502 538 | 1022183 938 539 | 1067365 1003694 540 | 1344818 1331984 541 | 2008155 1007801 542 | 1011370 1246174 543 | 1089639 1003694 544 | 1023269 1253188 545 | 1033184 1006320 546 | 6801236 1013362 547 | 1130780 1319532 548 | 6793156 1319489 549 | 1240531 1078983 550 | 1005489 2003588 551 | 1116245 1003556 552 | 6621334 1012031 553 | 1059744 1254973 554 | 1079120 1000655 555 | 6972279 1000985 556 | 6985668 6992655 557 | 1078613 2017 558 | 1234308 59 559 | 9929753 1007347 560 | 1009440 1003272 561 | 10383606 1319489 562 | 10713370 1084235 563 | 10713436 6611448 564 | 1022947 6951566 565 | 1197153 1018406 566 | 1145243 2115937 567 | 1008020 1277876 568 | 1006577 1261516 569 | 1007949 1045811 570 | 1241896 1045811 571 | 6818610 1233623 572 | 6703268 1260159 573 | 6843530 1260159 574 | 9974891 1260159 575 | 1033242 1000655 576 | 2159316 1034635 577 | 1289361 1262825 578 | 6679381 1027349 579 | 6827288 1238056 580 | 1312285 1039017 581 | 1012932 1234727 582 | 1071225 1027595 583 | 1070722 930 584 | 1110471 1007075 585 | 10052696 1116214 586 | 1063100 1057539 587 | 1208053 1060179 588 | -------------------------------------------------------------------------------- /notebooks/spark-ml-clustering.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Anomaly Detection in Network Trac with K-means Clustering" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "- KDD Cup 1999: Computer network intrusion detection\n", 15 | "- http://www.kdd.org/kdd-cup/view/kdd-cup-1999/Data" 16 | ] 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": {}, 21 | "source": [ 22 | "---\n", 23 | "- kddcup.names A list of features.\n", 24 | "- kddcup.data.zip The full data set (18M; 743M Uncompressed)\n", 25 | "- kddcup.testdata.unlabeled.zip (11.2M; 430M Uncompressed)\n", 26 | "- kddcup.data_10_percent.zip A 10% subset. (2.1M; 75M Uncompressed)\n", 27 | "- kddcup.newtestdata_10_percent_unlabeled.zip (1.4M; 45M Uncompressed)\n", 28 | "- kddcup.testdata.unlabeled_10_percent.zip (1.4M;45M Uncompressed)\n", 29 | "- corrected.zip Test data with corrected labels.\n", 30 | "- training_attack_types A list of intrusion types." 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 1, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "from pyspark.conf import SparkConf\n", 40 | "from pyspark import StorageLevel\n", 41 | "\n", 42 | "from pyspark.sql import SparkSession\n", 43 | "from pyspark.sql.functions import *\n", 44 | "from pyspark.sql.types import *" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 2, 50 | "metadata": {}, 51 | "outputs": [], 52 | "source": [ 53 | "spark = SparkSession.builder \\\n", 54 | " .master(\"local\") \\\n", 55 | " .appName(\"Spark ML\") \\\n", 56 | " .getOrCreate()\n", 57 | "\n", 58 | "sc = spark.sparkContext\n", 59 | "sc.setLogLevel(\"INFO\")" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 3, 65 | "metadata": {}, 66 | "outputs": [ 67 | { 68 | "name": "stdout", 69 | "output_type": "stream", 70 | "text": [ 71 | "Number of training data: 494021\n", 72 | "root\n", 73 | " |-- duration: integer (nullable = true)\n", 74 | " |-- protocol_type: string (nullable = true)\n", 75 | " |-- service: string (nullable = true)\n", 76 | " |-- flag: string (nullable = true)\n", 77 | " |-- src_bytes: integer (nullable = true)\n", 78 | " |-- dst_bytes: integer (nullable = true)\n", 79 | " |-- land: integer (nullable = true)\n", 80 | " |-- wrong_fragment: integer (nullable = true)\n", 81 | " |-- urgent: integer (nullable = true)\n", 82 | " |-- hot: integer (nullable = true)\n", 83 | " |-- num_failed_logins: integer (nullable = true)\n", 84 | " |-- logged_in: integer (nullable = true)\n", 85 | " |-- num_compromised: integer (nullable = true)\n", 86 | " |-- root_shell: integer (nullable = true)\n", 87 | " |-- su_attempted: integer (nullable = true)\n", 88 | " |-- num_root: string (nullable = true)\n", 89 | " |-- num_file_creations: string (nullable = true)\n", 90 | " |-- num_shells: string (nullable = true)\n", 91 | " |-- num_access_files: integer (nullable = true)\n", 92 | " |-- num_outbound_cmds: integer (nullable = true)\n", 93 | " |-- is_host_login: integer (nullable = true)\n", 94 | " |-- is_guest_login: integer (nullable = true)\n", 95 | " |-- count: integer (nullable = true)\n", 96 | " |-- srv_count: integer (nullable = true)\n", 97 | " |-- serror_rate: double (nullable = true)\n", 98 | " |-- srv_serror_rate: double (nullable = true)\n", 99 | " |-- rerror_rate: double (nullable = true)\n", 100 | " |-- srv_rerror_rate: double (nullable = true)\n", 101 | " |-- same_srv_rate: double (nullable = true)\n", 102 | " |-- diff_srv_rate: double (nullable = true)\n", 103 | " |-- srv_diff_host_rate: double (nullable = true)\n", 104 | " |-- dst_host_count: integer (nullable = true)\n", 105 | " |-- dst_host_srv_count: integer (nullable = true)\n", 106 | " |-- dst_host_same_srv_rate: double (nullable = true)\n", 107 | " |-- dst_host_diff_srv_rate: double (nullable = true)\n", 108 | " |-- dst_host_same_src_port_rate: double (nullable = true)\n", 109 | " |-- dst_host_srv_diff_host_rate: double (nullable = true)\n", 110 | " |-- dst_host_serror_rate: double (nullable = true)\n", 111 | " |-- dst_host_srv_serror_rate: double (nullable = true)\n", 112 | " |-- dst_host_rerror_rate: double (nullable = true)\n", 113 | " |-- dst_host_srv_rerror_rate: double (nullable = true)\n", 114 | " |-- label: string (nullable = true)\n", 115 | "\n" 116 | ] 117 | } 118 | ], 119 | "source": [ 120 | "df = spark.read.csv(\"../dataset/kddcup.data_10_percent.txt\", inferSchema=True, sep=\",\").toDF(\n", 121 | " \"duration\", \"protocol_type\", \"service\", \"flag\", \"src_bytes\",\n", 122 | " \"dst_bytes\", \"land\", \"wrong_fragment\", \"urgent\",\n", 123 | " \"hot\", \"num_failed_logins\", \"logged_in\", \"num_compromised\",\n", 124 | " \"root_shell\", \"su_attempted\", \"num_root\", \"num_file_creations\",\n", 125 | " \"num_shells\", \"num_access_files\", \"num_outbound_cmds\",\n", 126 | " \"is_host_login\", \"is_guest_login\", \"count\", \"srv_count\",\n", 127 | " \"serror_rate\", \"srv_serror_rate\", \"rerror_rate\", \"srv_rerror_rate\",\n", 128 | " \"same_srv_rate\", \"diff_srv_rate\", \"srv_diff_host_rate\",\n", 129 | " \"dst_host_count\", \"dst_host_srv_count\",\n", 130 | " \"dst_host_same_srv_rate\", \"dst_host_diff_srv_rate\",\n", 131 | " \"dst_host_same_src_port_rate\", \"dst_host_srv_diff_host_rate\",\n", 132 | " \"dst_host_serror_rate\", \"dst_host_srv_serror_rate\",\n", 133 | " \"dst_host_rerror_rate\", \"dst_host_srv_rerror_rate\", \"label\")\n", 134 | "\n", 135 | "print(\"Number of training data: {}\".format(df.count()))\n", 136 | "df.printSchema()" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": 4, 142 | "metadata": {}, 143 | "outputs": [ 144 | { 145 | "name": "stdout", 146 | "output_type": "stream", 147 | "text": [ 148 | "+----------------+------+\n", 149 | "| label| count|\n", 150 | "+----------------+------+\n", 151 | "| smurf.|280790|\n", 152 | "| neptune.|107201|\n", 153 | "| normal.| 97277|\n", 154 | "| back.| 2203|\n", 155 | "| satan.| 1589|\n", 156 | "| ipsweep.| 1247|\n", 157 | "| portsweep.| 1040|\n", 158 | "| warezclient.| 1020|\n", 159 | "| teardrop.| 979|\n", 160 | "| pod.| 264|\n", 161 | "| nmap.| 231|\n", 162 | "| guess_passwd.| 53|\n", 163 | "|buffer_overflow.| 30|\n", 164 | "| land.| 21|\n", 165 | "| warezmaster.| 20|\n", 166 | "| imap.| 12|\n", 167 | "| rootkit.| 10|\n", 168 | "| loadmodule.| 9|\n", 169 | "| ftp_write.| 8|\n", 170 | "| multihop.| 7|\n", 171 | "| phf.| 4|\n", 172 | "| perl.| 3|\n", 173 | "| spy.| 2|\n", 174 | "| 0.00| 1|\n", 175 | "+----------------+------+\n", 176 | "\n" 177 | ] 178 | } 179 | ], 180 | "source": [ 181 | "df.select(\"label\").groupBy(\"label\").count().orderBy(desc(\"count\")).show(25)" 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": 5, 187 | "metadata": {}, 188 | "outputs": [], 189 | "source": [ 190 | "from pyspark.ml.feature import VectorAssembler\n", 191 | "from pyspark.ml.feature import StandardScaler\n", 192 | "from pyspark.ml.clustering import KMeans, KMeansModel\n", 193 | "from pyspark.ml import Pipeline\n", 194 | "\n", 195 | "import random" 196 | ] 197 | }, 198 | { 199 | "cell_type": "code", 200 | "execution_count": 6, 201 | "metadata": {}, 202 | "outputs": [], 203 | "source": [ 204 | "# Only numeric\n", 205 | "df = df.withColumn(\"num_root\", df.num_root.cast(\"int\"))\n", 206 | "df = df.withColumn(\"num_file_creations\", df.num_root.cast(\"int\"))\n", 207 | "df = df.withColumn(\"num_shells\", df.num_root.cast(\"int\"))\n", 208 | "\n", 209 | "# Drop null\n", 210 | "train = df.drop(\"protocol_type\", \"service\", \"flag\").dropna().cache()\n", 211 | "columns = train.columns\n", 212 | "columns.remove('label')" 213 | ] 214 | }, 215 | { 216 | "cell_type": "code", 217 | "execution_count": 7, 218 | "metadata": {}, 219 | "outputs": [ 220 | { 221 | "data": { 222 | "text/plain": [ 223 | "KMeans_4ca6977062d576f928ad" 224 | ] 225 | }, 226 | "execution_count": 7, 227 | "metadata": {}, 228 | "output_type": "execute_result" 229 | } 230 | ], 231 | "source": [ 232 | "# VectorAssembler\n", 233 | "assembler = VectorAssembler(\n", 234 | " inputCols=columns,\n", 235 | " outputCol='features')\n", 236 | "\n", 237 | "# StandardScaler\n", 238 | "scaler = StandardScaler(\n", 239 | " inputCol='features',\n", 240 | " outputCol='scaled_features',\n", 241 | " withStd=True,\n", 242 | " withMean=True)\n", 243 | "\n", 244 | "# KMeans\n", 245 | "kmeans = KMeans(\n", 246 | " featuresCol='scaled_features',\n", 247 | " predictionCol='cluster',\n", 248 | " maxIter=30,\n", 249 | " seed=random.randrange(1,10))\n", 250 | "kmeans.setK(3)" 251 | ] 252 | }, 253 | { 254 | "cell_type": "markdown", 255 | "metadata": {}, 256 | "source": [ 257 | "### KMeans.explainParams()\n", 258 | "- featuresCol: features column name (default: features)\n", 259 | "- initMode: initialization algorithm (default: k-means||)\n", 260 | "- initSteps: number of steps for k-means|| (default: 5)\n", 261 | "- k: number of clusters to create (default: 2)\n", 262 | "- maxIter: maximum number of iterations (>= 0) (default: 20)\n", 263 | "- predictionCol: prediction column name (default: prediction)\n", 264 | "- seed: random seed (default: -1689246527)\n", 265 | "- tol: the convergence tolerance for iterative algorithms (default: 1.0E-4)" 266 | ] 267 | }, 268 | { 269 | "cell_type": "code", 270 | "execution_count": 8, 271 | "metadata": {}, 272 | "outputs": [], 273 | "source": [ 274 | "# Pipeline\n", 275 | "pipeline = Pipeline().setStages([assembler, scaler, kmeans])\n", 276 | "pipelineModel = pipeline.fit(train)\n", 277 | "kmeansModel = pipelineModel.stages[-1]" 278 | ] 279 | }, 280 | { 281 | "cell_type": "code", 282 | "execution_count": 9, 283 | "metadata": {}, 284 | "outputs": [ 285 | { 286 | "name": "stdout", 287 | "output_type": "stream", 288 | "text": [ 289 | "[-0.06768475 -0.00234773 -0.02619924 -0.00667342 -0.04772019 -0.00257147\n", 290 | " -0.04412916 -0.00961196 -0.41718843 -0.00567868 -0.01055195 -0.00467567\n", 291 | " -0.00564001 -0.00564001 -0.00564001 -0.02763182 0. 0.\n", 292 | " -0.03726266 0.41183858 0.35085558 0.13839435 0.13854274 -0.24789685\n", 293 | " -0.2486132 -0.01231898 -0.07526366 -0.20035199 0.3453196 0.08954367\n", 294 | " 0.05754689 -0.14071868 0.2934586 -0.15847693 0.13831224 0.13898245\n", 295 | " -0.25188338 -0.24946264]\n", 296 | "[ 0.22147364 0.00769472 0.08573617 0.0218723 0.15640414 0.00842805\n", 297 | " 0.14463444 0.03150345 1.36729709 0.01066278 0.03294298 0.01088012\n", 298 | " 0.01052669 0.01052669 0.01052669 0.08724121 0. 0.\n", 299 | " 0.12212933 -1.3497775 -1.1499116 -0.45358059 -0.45406697 0.81248829\n", 300 | " 0.81483617 0.04036626 0.2466817 0.65665886 -1.13173936 -0.29345095\n", 301 | " -0.18858729 0.46119809 -0.96179208 0.51941222 -0.45331195 -0.45551016\n", 302 | " 0.82555426 0.81762023]\n", 303 | "[ 2.10018788e+01 -1.72190129e-03 7.62630765e+00 -6.67341765e-03\n", 304 | " -4.77201855e-02 -2.57146810e-03 -4.41359117e-02 -9.78218463e-03\n", 305 | " 2.39699353e+00 4.59031807e+02 9.47690015e+01 2.56648086e+02\n", 306 | " 4.59571367e+02 4.59571367e+02 4.59571367e+02 1.91849250e+02\n", 307 | " 0.00000000e+00 0.00000000e+00 -3.72626624e-02 -1.55426159e+00\n", 308 | " -1.18505964e+00 -4.64089853e-01 -4.63520572e-01 -2.47960492e-01\n", 309 | " -2.48631297e-01 5.36987560e-01 -2.55243406e-01 -2.03629298e-01\n", 310 | " -2.80287846e+00 -1.68488973e+00 -1.33594925e+00 4.95099724e-01\n", 311 | " -1.20906944e+00 -1.58629297e-01 -4.38142968e-01 -3.45067374e-01\n", 312 | " -2.52039538e-01 -2.49464020e-01]\n" 313 | ] 314 | } 315 | ], 316 | "source": [ 317 | "print(*kmeansModel.clusterCenters(), sep='\\n')" 318 | ] 319 | }, 320 | { 321 | "cell_type": "code", 322 | "execution_count": 10, 323 | "metadata": { 324 | "scrolled": false 325 | }, 326 | "outputs": [ 327 | { 328 | "name": "stdout", 329 | "output_type": "stream", 330 | "text": [ 331 | "+-------+----------------+------+\n", 332 | "|cluster| label| count|\n", 333 | "+-------+----------------+------+\n", 334 | "| 0| guess_passwd.| 1|\n", 335 | "| 0| spy.| 1|\n", 336 | "| 0| imap.| 4|\n", 337 | "| 0| portsweep.| 13|\n", 338 | "| 0| satan.| 71|\n", 339 | "| 0| nmap.| 105|\n", 340 | "| 0| normal.| 10822|\n", 341 | "| 0| neptune.| 86723|\n", 342 | "| 0| smurf.|280786|\n", 343 | "| 1| spy.| 1|\n", 344 | "| 1| perl.| 3|\n", 345 | "| 1| phf.| 4|\n", 346 | "| 1| smurf.| 4|\n", 347 | "| 1| multihop.| 7|\n", 348 | "| 1| ftp_write.| 8|\n", 349 | "| 1| imap.| 8|\n", 350 | "| 1| loadmodule.| 9|\n", 351 | "| 1| rootkit.| 10|\n", 352 | "| 1| warezmaster.| 20|\n", 353 | "| 1| land.| 21|\n", 354 | "| 1|buffer_overflow.| 30|\n", 355 | "| 1| guess_passwd.| 52|\n", 356 | "| 1| nmap.| 126|\n", 357 | "| 1| pod.| 264|\n", 358 | "| 1| teardrop.| 979|\n", 359 | "| 1| warezclient.| 1020|\n", 360 | "| 1| portsweep.| 1027|\n", 361 | "| 1| ipsweep.| 1247|\n", 362 | "| 1| satan.| 1518|\n", 363 | "| 1| back.| 2203|\n", 364 | "| 1| neptune.| 20478|\n", 365 | "| 1| normal.| 86453|\n", 366 | "| 2| normal.| 2|\n", 367 | "+-------+----------------+------+\n", 368 | "\n" 369 | ] 370 | } 371 | ], 372 | "source": [ 373 | "withCluster = pipelineModel.transform(train)\n", 374 | "clusterLabel = withCluster.select(\"cluster\", \"label\") \\\n", 375 | " .groupBy(\"cluster\", \"label\").count() \\\n", 376 | " .orderBy(\"cluster\", \"count\")\n", 377 | "clusterLabel.show(100)" 378 | ] 379 | }, 380 | { 381 | "cell_type": "code", 382 | "execution_count": 11, 383 | "metadata": {}, 384 | "outputs": [ 385 | { 386 | "name": "stdout", 387 | "output_type": "stream", 388 | "text": [ 389 | "+-------+----------+\n", 390 | "|cluster|sum(count)|\n", 391 | "+-------+----------+\n", 392 | "| 0| 378526|\n", 393 | "| 1| 115492|\n", 394 | "| 2| 2|\n", 395 | "+-------+----------+\n", 396 | "\n" 397 | ] 398 | } 399 | ], 400 | "source": [ 401 | "clusterLabel.groupBy('cluster').sum('count').orderBy('cluster').show()" 402 | ] 403 | }, 404 | { 405 | "cell_type": "code", 406 | "execution_count": 12, 407 | "metadata": {}, 408 | "outputs": [ 409 | { 410 | "data": { 411 | "text/plain": [ 412 | "[378526, 115492, 2]" 413 | ] 414 | }, 415 | "execution_count": 12, 416 | "metadata": {}, 417 | "output_type": "execute_result" 418 | } 419 | ], 420 | "source": [ 421 | "kmeansModel.summary.clusterSizes" 422 | ] 423 | }, 424 | { 425 | "cell_type": "code", 426 | "execution_count": 13, 427 | "metadata": {}, 428 | "outputs": [ 429 | { 430 | "name": "stdout", 431 | "output_type": "stream", 432 | "text": [ 433 | "+-------+--------------------+\n", 434 | "|cluster| scaled_features|\n", 435 | "+-------+--------------------+\n", 436 | "| 1|[-0.0677917208490...|\n", 437 | "| 1|[-0.0677917208490...|\n", 438 | "| 1|[-0.0677917208490...|\n", 439 | "+-------+--------------------+\n", 440 | "only showing top 3 rows\n", 441 | "\n" 442 | ] 443 | } 444 | ], 445 | "source": [ 446 | "scaled_features = withCluster.select('cluster', 'scaled_features')\n", 447 | "scaled_features.show(3)" 448 | ] 449 | }, 450 | { 451 | "cell_type": "code", 452 | "execution_count": 14, 453 | "metadata": {}, 454 | "outputs": [], 455 | "source": [ 456 | "spark.stop()" 457 | ] 458 | } 459 | ], 460 | "metadata": { 461 | "kernelspec": { 462 | "display_name": "Python 3", 463 | "language": "python", 464 | "name": "python3" 465 | }, 466 | "language_info": { 467 | "codemirror_mode": { 468 | "name": "ipython", 469 | "version": 3 470 | }, 471 | "file_extension": ".py", 472 | "mimetype": "text/x-python", 473 | "name": "python", 474 | "nbconvert_exporter": "python", 475 | "pygments_lexer": "ipython3", 476 | "version": "3.6.3" 477 | } 478 | }, 479 | "nbformat": 4, 480 | "nbformat_minor": 2 481 | } 482 | -------------------------------------------------------------------------------- /notebooks/spark-ml-starter.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Spark ML\n", 8 | "\n", 9 | "- RDD 기반의 MLlib이 아닌 DataFrame 기반의 ML 패키지를 설명할 예정\n", 10 | "- 사용한 버전: spark 2.2+, python 3.5+" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 1, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "from pyspark.sql import SparkSession\n", 20 | "from pyspark.conf import SparkConf\n", 21 | "from pyspark import StorageLevel" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 2, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "spark = SparkSession.builder \\\n", 31 | " .master(\"local\") \\\n", 32 | " .appName(\"Spark ML\") \\\n", 33 | " .getOrCreate()\n", 34 | "\n", 35 | "sc = spark.sparkContext\n", 36 | "sc.setLogLevel(\"INFO\")" 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": {}, 42 | "source": [ 43 | "### Dataset - Kaggle Titanic" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 3, 49 | "metadata": {}, 50 | "outputs": [ 51 | { 52 | "name": "stdout", 53 | "output_type": "stream", 54 | "text": [ 55 | "+-----------+--------+------+--------------------+------+----+-----+-----+----------------+-------+-----+--------+\n", 56 | "|PassengerId|Survived|Pclass| Name| Sex| Age|SibSp|Parch| Ticket| Fare|Cabin|Embarked|\n", 57 | "+-----------+--------+------+--------------------+------+----+-----+-----+----------------+-------+-----+--------+\n", 58 | "| 1| 0| 3|Braund, Mr. Owen ...| male|22.0| 1| 0| A/5 21171| 7.25| null| S|\n", 59 | "| 2| 1| 1|Cumings, Mrs. Joh...|female|38.0| 1| 0| PC 17599|71.2833| C85| C|\n", 60 | "| 3| 1| 3|Heikkinen, Miss. ...|female|26.0| 0| 0|STON/O2. 3101282| 7.925| null| S|\n", 61 | "| 4| 1| 1|Futrelle, Mrs. Ja...|female|35.0| 1| 0| 113803| 53.1| C123| S|\n", 62 | "| 5| 0| 3|Allen, Mr. Willia...| male|35.0| 0| 0| 373450| 8.05| null| S|\n", 63 | "| 6| 0| 3| Moran, Mr. James| male|null| 0| 0| 330877| 8.4583| null| Q|\n", 64 | "| 7| 0| 1|McCarthy, Mr. Tim...| male|54.0| 0| 0| 17463|51.8625| E46| S|\n", 65 | "| 8| 0| 3|Palsson, Master. ...| male| 2.0| 3| 1| 349909| 21.075| null| S|\n", 66 | "| 9| 1| 3|Johnson, Mrs. Osc...|female|27.0| 0| 2| 347742|11.1333| null| S|\n", 67 | "| 10| 1| 2|Nasser, Mrs. Nich...|female|14.0| 1| 0| 237736|30.0708| null| C|\n", 68 | "+-----------+--------+------+--------------------+------+----+-----+-----+----------------+-------+-----+--------+\n", 69 | "only showing top 10 rows\n", 70 | "\n" 71 | ] 72 | } 73 | ], 74 | "source": [ 75 | "df = spark.read.csv(\"../dataset/train.csv\", header=True, inferSchema=True).cache()\n", 76 | "df.createOrReplaceTempView(\"train\")\n", 77 | "df.show(10)" 78 | ] 79 | }, 80 | { 81 | "cell_type": "markdown", 82 | "metadata": {}, 83 | "source": [ 84 | "---\n", 85 | "## EDA: Spark SQL + Zeppelin\n", 86 | "\n", 87 | "- EDA는 분산쿼리를 통해 빠르게 수행\n", 88 | "- `Zeppelin` 환경을 구축해서 쿼리에 대한 그래프를 바로 확인 가능\n", 89 | "- `printSchema()`, `describe()`, `isNull()`, `select()` 함수를 통해 데이터 상태 확인" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": 4, 95 | "metadata": {}, 96 | "outputs": [], 97 | "source": [ 98 | "from pyspark.sql import Row\n", 99 | "from pyspark.sql.functions import *" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": 5, 105 | "metadata": {}, 106 | "outputs": [ 107 | { 108 | "name": "stdout", 109 | "output_type": "stream", 110 | "text": [ 111 | "root\n", 112 | " |-- PassengerId: integer (nullable = true)\n", 113 | " |-- Survived: integer (nullable = true)\n", 114 | " |-- Pclass: integer (nullable = true)\n", 115 | " |-- Name: string (nullable = true)\n", 116 | " |-- Sex: string (nullable = true)\n", 117 | " |-- Age: double (nullable = true)\n", 118 | " |-- SibSp: integer (nullable = true)\n", 119 | " |-- Parch: integer (nullable = true)\n", 120 | " |-- Ticket: string (nullable = true)\n", 121 | " |-- Fare: double (nullable = true)\n", 122 | " |-- Cabin: string (nullable = true)\n", 123 | " |-- Embarked: string (nullable = true)\n", 124 | "\n" 125 | ] 126 | } 127 | ], 128 | "source": [ 129 | "df.printSchema()" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": 6, 135 | "metadata": { 136 | "scrolled": false 137 | }, 138 | "outputs": [ 139 | { 140 | "name": "stdout", 141 | "output_type": "stream", 142 | "text": [ 143 | "+-------+------------------+\n", 144 | "|summary| age|\n", 145 | "+-------+------------------+\n", 146 | "| count| 714|\n", 147 | "| mean| 29.69911764705882|\n", 148 | "| stddev|14.526497332334035|\n", 149 | "| min| 0.42|\n", 150 | "| max| 80.0|\n", 151 | "+-------+------------------+\n", 152 | "\n" 153 | ] 154 | } 155 | ], 156 | "source": [ 157 | "df.describe(['age']).show()" 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": 7, 163 | "metadata": {}, 164 | "outputs": [ 165 | { 166 | "name": "stdout", 167 | "output_type": "stream", 168 | "text": [ 169 | "+-----------+--------+------+----+---+---+-----+-----+------+----+-----+--------+\n", 170 | "|PassengerId|Survived|Pclass|Name|Sex|Age|SibSp|Parch|Ticket|Fare|Cabin|Embarked|\n", 171 | "+-----------+--------+------+----+---+---+-----+-----+------+----+-----+--------+\n", 172 | "| 0| 0| 0| 0| 0|177| 0| 0| 0| 0| 687| 2|\n", 173 | "+-----------+--------+------+----+---+---+-----+-----+------+----+-----+--------+\n", 174 | "\n" 175 | ] 176 | } 177 | ], 178 | "source": [ 179 | "# column들에 대한 null 체크\n", 180 | "df.select(*(\n", 181 | " sum(col(c).isNull().cast(\"int\")).alias(c)\n", 182 | " for c in df.columns)).show()" 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": 8, 188 | "metadata": {}, 189 | "outputs": [ 190 | { 191 | "name": "stdout", 192 | "output_type": "stream", 193 | "text": [ 194 | "+--------+-----+\n", 195 | "|Embarked|count|\n", 196 | "+--------+-----+\n", 197 | "| Q| 30|\n", 198 | "| null| 2|\n", 199 | "| C| 93|\n", 200 | "| S| 217|\n", 201 | "+--------+-----+\n", 202 | "\n" 203 | ] 204 | } 205 | ], 206 | "source": [ 207 | "query = \"\"\"\n", 208 | "SELECT Embarked, count(PassengerId) as count\n", 209 | "FROM train\n", 210 | "WHERE Survived = 1\n", 211 | "GROUP BY Embarked\n", 212 | "\"\"\"\n", 213 | "\n", 214 | "spark.sql(query).show()" 215 | ] 216 | }, 217 | { 218 | "cell_type": "code", 219 | "execution_count": 9, 220 | "metadata": {}, 221 | "outputs": [ 222 | { 223 | "name": "stdout", 224 | "output_type": "stream", 225 | "text": [ 226 | "+------+-----+\n", 227 | "|PClass|count|\n", 228 | "+------+-----+\n", 229 | "| 1| 136|\n", 230 | "| 3| 119|\n", 231 | "| 2| 87|\n", 232 | "+------+-----+\n", 233 | "\n" 234 | ] 235 | } 236 | ], 237 | "source": [ 238 | "query = \"\"\"\n", 239 | "SELECT PClass, count(PassengerId) as count\n", 240 | "FROM train\n", 241 | "WHERE Survived = 1\n", 242 | "GROUP BY PClass\n", 243 | "\"\"\"\n", 244 | "\n", 245 | "spark.sql(query).show()" 246 | ] 247 | }, 248 | { 249 | "cell_type": "markdown", 250 | "metadata": {}, 251 | "source": [ 252 | "---\n", 253 | "## Preprocessing: Spark DataFrame Function + UDF\n", 254 | "\n", 255 | "#### Missing Value\n", 256 | "\n", 257 | "- `pyspark.sql.DataFrameNaFunctions`에서 확인\n", 258 | "- Spark ML의 `Imputer`로도 처리 가능 (Pipeline과의 연계)\n", 259 | "- http://spark.apache.org/docs/latest/api/python/pyspark.sql.html#pyspark.sql.DataFrameNaFunctions\n", 260 | "\n", 261 | "---\n", 262 | "#### Feature Engineering\n", 263 | "\n", 264 | "- udf를 만들어서 내가 원하는 형태로 전처리 가능\n", 265 | "- approxQuantile, correlation, covariance, stratified sampling 등이 필요한 경우\n", 266 | "- `pyspark.sql.DataFrameStatFunctions`에서 확인\n", 267 | "- http://spark.apache.org/docs/latest/api/python/pyspark.sql.html#pyspark.sql.DataFrameStatFunctions" 268 | ] 269 | }, 270 | { 271 | "cell_type": "code", 272 | "execution_count": 10, 273 | "metadata": {}, 274 | "outputs": [], 275 | "source": [ 276 | "from pyspark.sql.functions import udf\n", 277 | "from pyspark.sql.types import StringType, IntegerType" 278 | ] 279 | }, 280 | { 281 | "cell_type": "code", 282 | "execution_count": 11, 283 | "metadata": {}, 284 | "outputs": [ 285 | { 286 | "name": "stdout", 287 | "output_type": "stream", 288 | "text": [ 289 | "Before: 177\n", 290 | "After 0\n" 291 | ] 292 | } 293 | ], 294 | "source": [ 295 | "# column에서 null 값을 drop 시키는 경우\n", 296 | "df = df.drop('cabin')\n", 297 | "before = df.select('age').where('age is null').count()\n", 298 | "print(\"Before: {}\".format(before))\n", 299 | "\n", 300 | "test = df.na.drop(subset=[\"age\"])\n", 301 | "after = test.select('age').where('age is null').count()\n", 302 | "print(\"After {}\".format(after))" 303 | ] 304 | }, 305 | { 306 | "cell_type": "code", 307 | "execution_count": 12, 308 | "metadata": {}, 309 | "outputs": [ 310 | { 311 | "name": "stdout", 312 | "output_type": "stream", 313 | "text": [ 314 | "+----+\n", 315 | "| age|\n", 316 | "+----+\n", 317 | "|22.0|\n", 318 | "|38.0|\n", 319 | "|26.0|\n", 320 | "|35.0|\n", 321 | "|35.0|\n", 322 | "+----+\n", 323 | "only showing top 5 rows\n", 324 | "\n" 325 | ] 326 | } 327 | ], 328 | "source": [ 329 | "# column에서 null 값을 mean으로 채우는 경우\n", 330 | "avg_age = df.where('age is not null').groupBy().avg('age').collect()[0][0]\n", 331 | "df = df.na.fill({'age': avg_age})\n", 332 | "df.select('age').show(5)" 333 | ] 334 | }, 335 | { 336 | "cell_type": "code", 337 | "execution_count": 13, 338 | "metadata": {}, 339 | "outputs": [ 340 | { 341 | "name": "stdout", 342 | "output_type": "stream", 343 | "text": [ 344 | "Before:\n", 345 | "+--------+-----+\n", 346 | "|survived|count|\n", 347 | "+--------+-----+\n", 348 | "| 1| 342|\n", 349 | "| 0| 549|\n", 350 | "+--------+-----+\n", 351 | "\n", 352 | "After:\n", 353 | "+--------+-----+\n", 354 | "|survived|count|\n", 355 | "+--------+-----+\n", 356 | "| 1| 168|\n", 357 | "| 0| 57|\n", 358 | "+--------+-----+\n", 359 | "\n" 360 | ] 361 | } 362 | ], 363 | "source": [ 364 | "# label을 기준으로 Stratified Sampling 예시\n", 365 | "sample_df = df.sampleBy('survived', fractions={0: 0.1, 1: 0.5}, seed=0)\n", 366 | "print(\"Before:\")\n", 367 | "df.groupBy('survived').count().show()\n", 368 | "print(\"After:\")\n", 369 | "sample_df.groupBy('survived').count().show()" 370 | ] 371 | }, 372 | { 373 | "cell_type": "code", 374 | "execution_count": 14, 375 | "metadata": {}, 376 | "outputs": [ 377 | { 378 | "name": "stdout", 379 | "output_type": "stream", 380 | "text": [ 381 | "+--------------------+--------+\n", 382 | "| name|len_name|\n", 383 | "+--------------------+--------+\n", 384 | "|Braund, Mr. Owen ...| 23|\n", 385 | "|Cumings, Mrs. Joh...| 51|\n", 386 | "|Heikkinen, Miss. ...| 22|\n", 387 | "|Futrelle, Mrs. Ja...| 44|\n", 388 | "|Allen, Mr. Willia...| 24|\n", 389 | "+--------------------+--------+\n", 390 | "only showing top 5 rows\n", 391 | "\n" 392 | ] 393 | } 394 | ], 395 | "source": [ 396 | "# 승객 이름의 길이를 새로운 feature로 추가하는 예시\n", 397 | "str_length = udf(lambda x: len(x), IntegerType())\n", 398 | "df = df.withColumn('len_name', str_length(df['name']))\n", 399 | "df.select('name', 'len_name').show(5)" 400 | ] 401 | }, 402 | { 403 | "cell_type": "code", 404 | "execution_count": 15, 405 | "metadata": {}, 406 | "outputs": [ 407 | { 408 | "name": "stdout", 409 | "output_type": "stream", 410 | "text": [ 411 | "+--------+-----------+\n", 412 | "|embarked|embarked_ix|\n", 413 | "+--------+-----------+\n", 414 | "| S| 3|\n", 415 | "| C| 1|\n", 416 | "| S| 3|\n", 417 | "| S| 3|\n", 418 | "| S| 3|\n", 419 | "+--------+-----------+\n", 420 | "only showing top 5 rows\n", 421 | "\n" 422 | ] 423 | } 424 | ], 425 | "source": [ 426 | "# udf를 사용해서 categorical feature를 전처리하는 예시\n", 427 | "# Spark ML의 StringIndexer를 사용해도 결과는 동일\n", 428 | "\n", 429 | "def embarked_to_int(embarked):\n", 430 | " if embarked == 'C': return 1\n", 431 | " elif embarked == 'Q': return 2\n", 432 | " elif embarked == 'S': return 3 \n", 433 | " else: return 0\n", 434 | "\n", 435 | "embarked_to_int = udf(embarked_to_int, IntegerType())\n", 436 | "df = df.withColumn('embarked_ix', embarked_to_int(df['embarked']))\n", 437 | "df.select('embarked', 'embarked_ix').show(5)" 438 | ] 439 | }, 440 | { 441 | "cell_type": "code", 442 | "execution_count": 16, 443 | "metadata": {}, 444 | "outputs": [ 445 | { 446 | "name": "stdout", 447 | "output_type": "stream", 448 | "text": [ 449 | "+------+------+\n", 450 | "| sex|sex_ix|\n", 451 | "+------+------+\n", 452 | "| male| 0|\n", 453 | "|female| 1|\n", 454 | "|female| 1|\n", 455 | "|female| 1|\n", 456 | "| male| 0|\n", 457 | "+------+------+\n", 458 | "only showing top 5 rows\n", 459 | "\n" 460 | ] 461 | } 462 | ], 463 | "source": [ 464 | "# Spark SQL Function의 when-otherwise 절을 사용하는 방법\n", 465 | "# categorical feature를 전처리하는 예시\n", 466 | "df.select('sex', \n", 467 | " when(df['sex'] == 'male', 0).otherwise(1).alias('sex_ix')).show(5)" 468 | ] 469 | }, 470 | { 471 | "cell_type": "markdown", 472 | "metadata": {}, 473 | "source": [ 474 | "---\n", 475 | "## Extracting, transforming and selecting features\n", 476 | "http://spark.apache.org/docs/latest/api/python/pyspark.ml.html#module-pyspark.ml.feature\n", 477 | "\n", 478 | "#### Extraction\n", 479 | "\n", 480 | "- raw 데이터로부터 feature를 추출하기 위한 패키지\n", 481 | "- `TF-IDF`, `Word2Vec`, `CountVectorizer`, `FeatureHasher`\n", 482 | "\n", 483 | "---\n", 484 | "#### Transformation\n", 485 | "\n", 486 | "- feature를 변형시키기 위한 패키지 (scaling, coverting)\n", 487 | "- `Tokenizer`, `StopWordsRemover`, `n-gram`, `PCA`, `StringIndexer`, `OneHotEncoder`\n", 488 | "- `StandardScaler`, `MinMaxScaler` 등\n", 489 | "\n", 490 | "---\n", 491 | "#### Selection\n", 492 | "\n", 493 | "- feature selection을 지원하는 패키지 (feature가 정말 많은 경우 유용)\n", 494 | "- `VectorSlicer`, `RFormula`, `ChiSqSelector`" 495 | ] 496 | }, 497 | { 498 | "cell_type": "code", 499 | "execution_count": 17, 500 | "metadata": {}, 501 | "outputs": [], 502 | "source": [ 503 | "from pyspark.ml.feature import StringIndexer\n", 504 | "from pyspark.ml.feature import VectorAssembler" 505 | ] 506 | }, 507 | { 508 | "cell_type": "code", 509 | "execution_count": 18, 510 | "metadata": {}, 511 | "outputs": [ 512 | { 513 | "name": "stdout", 514 | "output_type": "stream", 515 | "text": [ 516 | "+------+------+\n", 517 | "| Sex|sex_ix|\n", 518 | "+------+------+\n", 519 | "| male| 0.0|\n", 520 | "|female| 1.0|\n", 521 | "|female| 1.0|\n", 522 | "|female| 1.0|\n", 523 | "| male| 0.0|\n", 524 | "+------+------+\n", 525 | "only showing top 5 rows\n", 526 | "\n" 527 | ] 528 | } 529 | ], 530 | "source": [ 531 | "# StringIndexer를 사용해서 categorical feature를 전처리하는 예시\n", 532 | "df = StringIndexer(inputCol='Sex', outputCol='sex_ix').fit(df).transform(df)\n", 533 | "df.select('Sex', 'sex_ix').show(5)" 534 | ] 535 | }, 536 | { 537 | "cell_type": "code", 538 | "execution_count": 19, 539 | "metadata": {}, 540 | "outputs": [ 541 | { 542 | "name": "stdout", 543 | "output_type": "stream", 544 | "text": [ 545 | "+-----------+-----+--------------------+\n", 546 | "|PassengerId|label| features|\n", 547 | "+-----------+-----+--------------------+\n", 548 | "| 1| 0|[3.0,22.0,1.0,0.0...|\n", 549 | "| 2| 1|[1.0,38.0,1.0,0.0...|\n", 550 | "| 3| 1|[3.0,26.0,0.0,0.0...|\n", 551 | "| 4| 1|[1.0,35.0,1.0,0.0...|\n", 552 | "| 5| 0|[3.0,35.0,0.0,0.0...|\n", 553 | "+-----------+-----+--------------------+\n", 554 | "only showing top 5 rows\n", 555 | "\n" 556 | ] 557 | } 558 | ], 559 | "source": [ 560 | "# VectorAssembler를 사용해서 feature를 vector 형태로 변환\n", 561 | "inputCols = ['Pclass', 'Age', 'SibSp', 'Parch', 'Fare', 'embarked_ix', 'sex_ix', 'len_name']\n", 562 | "assembler = VectorAssembler(inputCols=inputCols, outputCol='features')\n", 563 | "train = assembler.transform(df).select('PassengerId', col('Survived').alias('label'), 'features')\n", 564 | "train.show(5)" 565 | ] 566 | }, 567 | { 568 | "cell_type": "markdown", 569 | "metadata": {}, 570 | "source": [ 571 | "---\n", 572 | "## Model\n", 573 | "- http://spark.apache.org/docs/latest/api/python/pyspark.ml.html#module-pyspark.ml.classification\n", 574 | "- 대부분 **Data parallelism**을 통해 분산학습하는 방식\n", 575 | "- Spark 2.3 버전부터 **Model parallelism**을 지원\n", 576 | "\n", 577 | "#### Classification, Regression\n", 578 | "\n", 579 | "- 트리 모델: `DecisionTree`, `RandomForest`, `GBTClassifier`\n", 580 | "- SVM 모델: `LinearSVC`, `OneVsRest`\n", 581 | "- `MultilayerPerceptronClassifier`: hidden layer가 없는 Softmax 모델\n", 582 | "- `LinearRegression`, `SurvivalRegression`, `NaiveBayes`\n", 583 | "\n", 584 | "---\n", 585 | "#### Clustering\n", 586 | "\n", 587 | "- 다양한 클러스터링 알고리즘을 지원\n", 588 | "- `KMeans`, `LDA`, `GMM`\n", 589 | "- 이전에는 computeCost 함수를 통해 SSE로 모델을 평가\n", 590 | "- 2.3 버전부터 `ClusteringEvaluator` 사용 가능\n", 591 | "\n", 592 | "---\n", 593 | "#### Recommendation\n", 594 | "\n", 595 | "- CF 방식의 `Alternating Least Squares(ALS)` 추천 알고리즘을 지원\n", 596 | "- \"Large-Scale Parallel Collaborative Filtering for the Netflix Prize\" 논문을 참고\n", 597 | "- Production에 쉽게 연동할 수 있게 만든 **Apache PredictionIO**도 참고 (MLlib)" 598 | ] 599 | }, 600 | { 601 | "cell_type": "code", 602 | "execution_count": 20, 603 | "metadata": {}, 604 | "outputs": [], 605 | "source": [ 606 | "from pyspark.ml.classification import RandomForestClassifier" 607 | ] 608 | }, 609 | { 610 | "cell_type": "code", 611 | "execution_count": 21, 612 | "metadata": {}, 613 | "outputs": [ 614 | { 615 | "name": "stdout", 616 | "output_type": "stream", 617 | "text": [ 618 | "+-----------+-----+--------------------+--------------------+--------------------+----------+\n", 619 | "|PassengerId|label| features| rawPrediction| probability|prediction|\n", 620 | "+-----------+-----+--------------------+--------------------+--------------------+----------+\n", 621 | "| 27| 0|[3.0,29.699117647...|[16.4071621231905...|[0.82035810615952...| 0.0|\n", 622 | "| 34| 0|[2.0,66.0,0.0,0.0...|[17.1275400788451...|[0.85637700394225...| 0.0|\n", 623 | "| 44| 1|[2.0,3.0,1.0,2.0,...|[0.35197754315401...|[0.01759887715770...| 1.0|\n", 624 | "| 49| 0|[3.0,29.699117647...|[14.2343850263881...|[0.71171925131940...| 0.0|\n", 625 | "| 50| 0|[3.0,18.0,1.0,0.0...|[7.40981147634526...|[0.37049057381726...| 1.0|\n", 626 | "+-----------+-----+--------------------+--------------------+--------------------+----------+\n", 627 | "only showing top 5 rows\n", 628 | "\n" 629 | ] 630 | } 631 | ], 632 | "source": [ 633 | "# RandomForestClassifier 예제\n", 634 | "# training set을 row 단위로 partitioning\n", 635 | "splits = train.randomSplit([0.8, 0.2])\n", 636 | "train = splits[0].cache()\n", 637 | "test = splits[1].cache()\n", 638 | "\n", 639 | "# cacheNodeIds: 인스턴스 마다 노드의 Id를 캐싱, 트리가 깊어진다면 성능 향상 팁\n", 640 | "model = RandomForestClassifier(\n", 641 | " labelCol=\"label\",\n", 642 | " featuresCol=\"features\",\n", 643 | " cacheNodeIds=True)\n", 644 | "\n", 645 | "predict = model.fit(train).transform(test)\n", 646 | "predict.show(5)" 647 | ] 648 | }, 649 | { 650 | "cell_type": "markdown", 651 | "metadata": {}, 652 | "source": [ 653 | "---\n", 654 | "## Evaluation\n", 655 | "http://spark.apache.org/docs/latest/api/python/pyspark.ml.html#module-pyspark.ml.evaluation\n", 656 | "\n", 657 | "- 모델을 평가하기 위한 패키지, 사용할 수 있는 metric을 확인할 필요가 있음\n", 658 | "- BinaryClassificationEvaluator: `areaUnderROC`만 사용 가능\n", 659 | "- MulticlassClassificationEvaluator: `f1`, `weightedPrecision`, `weightedRecall`, `accuracy`\n", 660 | "- RegressionEvaluator: `rmse`, `mse`, `mae`\n", 661 | "- ClusteringEvaluator: 2.3 버전에 새롭게 추가, metric으로 `silhouette` 사용 가능\n", 662 | "- `confusionMatrix()` 등 몇 가지는 아직 Spark MLlib에만 존재함" 663 | ] 664 | }, 665 | { 666 | "cell_type": "code", 667 | "execution_count": 22, 668 | "metadata": {}, 669 | "outputs": [ 670 | { 671 | "data": { 672 | "text/plain": [ 673 | "0.8469387755102041" 674 | ] 675 | }, 676 | "execution_count": 22, 677 | "metadata": {}, 678 | "output_type": "execute_result" 679 | } 680 | ], 681 | "source": [ 682 | "from pyspark.ml.evaluation import MulticlassClassificationEvaluator\n", 683 | "\n", 684 | "evaluator = MulticlassClassificationEvaluator(\n", 685 | " predictionCol=\"prediction\", \n", 686 | " labelCol=\"label\", \n", 687 | " metricName=\"accuracy\")\n", 688 | "\n", 689 | "evaluator.evaluate(predict)" 690 | ] 691 | }, 692 | { 693 | "cell_type": "markdown", 694 | "metadata": {}, 695 | "source": [ 696 | "---\n", 697 | "## Tuning: model selection and hyperparameter tuning\n", 698 | "- http://spark.apache.org/docs/latest/api/python/pyspark.ml.html#module-pyspark.ml.tuning\n", 699 | "- 지정한 parameter의 조합에 대하여 반복 학습하는 형태\n", 700 | "- 원래 `data parallelism` 만 지원했지만, 2.3버전부터 `model parallelism`도 지원하기 시작\n", 701 | "- CrossValidator와 TrainValidationSplit에 `parallelism` 파라메터 지정\n", 702 | "\n", 703 | "#### ParamGridBuilder\n", 704 | "\n", 705 | "- 파라메터를 자동으로 튜닝하기 위한 빌더 패키지 (Grid Search)\n", 706 | "- 각 모델에 대한 파라메터는 `spark.ml.param` module\n", 707 | "\n", 708 | "---\n", 709 | "#### CrossValidator\n", 710 | "\n", 711 | "- K-Fold CrossValidation 그 자체 (위키 참고)\n", 712 | "- 지정한 Fold 만큼 반복 학습\n", 713 | "\n", 714 | "---\n", 715 | "#### TrainValidationSplit (Experimental)\n", 716 | "\n", 717 | "- 지정한 비율에 따라 훈련/검증 셋을 나누어 학습에 반영\n", 718 | "- CrossValidator에 비해 금방 끝나겠지만, 주어진 학습 데이터가 적다면 결과가 부정확할 수 있음" 719 | ] 720 | }, 721 | { 722 | "cell_type": "code", 723 | "execution_count": 23, 724 | "metadata": {}, 725 | "outputs": [], 726 | "source": [ 727 | "from pyspark.ml.tuning import TrainValidationSplit\n", 728 | "from pyspark.ml.tuning import ParamGridBuilder\n", 729 | "from pyspark.ml.evaluation import MulticlassClassificationEvaluator" 730 | ] 731 | }, 732 | { 733 | "cell_type": "code", 734 | "execution_count": 24, 735 | "metadata": {}, 736 | "outputs": [ 737 | { 738 | "data": { 739 | "text/plain": [ 740 | "0.8622448979591837" 741 | ] 742 | }, 743 | "execution_count": 24, 744 | "metadata": {}, 745 | "output_type": "execute_result" 746 | } 747 | ], 748 | "source": [ 749 | "# Modeling\n", 750 | "model = RandomForestClassifier(\n", 751 | " labelCol=\"label\",\n", 752 | " featuresCol=\"features\",\n", 753 | " cacheNodeIds=True)\n", 754 | "\n", 755 | "# Parameter tuning\n", 756 | "paramGrid = ParamGridBuilder() \\\n", 757 | " .addGrid(model.numTrees, [500, 700]) \\\n", 758 | " .addGrid(model.maxDepth, [5, 7]) \\\n", 759 | " .addGrid(model.impurity, [\"gini\"]) \\\n", 760 | " .addGrid(model.maxBins, [31]) \\\n", 761 | " .addGrid(model.subsamplingRate, [0.7]) \\\n", 762 | " .build()\n", 763 | "\n", 764 | "# Evaluator: accuracy\n", 765 | "evaluator = MulticlassClassificationEvaluator(\n", 766 | " predictionCol=\"prediction\", \n", 767 | " labelCol=\"label\", \n", 768 | " metricName=\"accuracy\")\n", 769 | "\n", 770 | "# train:validation = 7:3\n", 771 | "tvs = TrainValidationSplit(\n", 772 | " estimator=model,\n", 773 | " estimatorParamMaps=paramGrid,\n", 774 | " evaluator=evaluator,\n", 775 | " trainRatio=0.7)\n", 776 | "\n", 777 | "tvsModel = tvs.fit(train)\n", 778 | "predict = tvsModel.transform(test)\n", 779 | "evaluator.evaluate(predict)" 780 | ] 781 | }, 782 | { 783 | "cell_type": "code", 784 | "execution_count": 25, 785 | "metadata": { 786 | "scrolled": true 787 | }, 788 | "outputs": [ 789 | { 790 | "data": { 791 | "text/plain": [ 792 | "DataFrame[PassengerId: int, Survived: int, Pclass: int, Name: string, Sex: string, Age: double, SibSp: int, Parch: int, Ticket: string, Fare: double, Embarked: string, len_name: int, embarked_ix: int, sex_ix: double]" 793 | ] 794 | }, 795 | "execution_count": 25, 796 | "metadata": {}, 797 | "output_type": "execute_result" 798 | } 799 | ], 800 | "source": [ 801 | "train.unpersist()\n", 802 | "test.unpersist()\n", 803 | "df.unpersist()" 804 | ] 805 | }, 806 | { 807 | "cell_type": "code", 808 | "execution_count": 30, 809 | "metadata": {}, 810 | "outputs": [], 811 | "source": [ 812 | "spark.stop()" 813 | ] 814 | } 815 | ], 816 | "metadata": { 817 | "kernelspec": { 818 | "display_name": "Python 3", 819 | "language": "python", 820 | "name": "python3" 821 | }, 822 | "language_info": { 823 | "codemirror_mode": { 824 | "name": "ipython", 825 | "version": 3 826 | }, 827 | "file_extension": ".py", 828 | "mimetype": "text/x-python", 829 | "name": "python", 830 | "nbconvert_exporter": "python", 831 | "pygments_lexer": "ipython3", 832 | "version": "3.6.3" 833 | } 834 | }, 835 | "nbformat": 4, 836 | "nbformat_minor": 2 837 | } 838 | -------------------------------------------------------------------------------- /dataset/test.csv: -------------------------------------------------------------------------------- 1 | PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked 2 | 892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q 3 | 893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47,1,0,363272,7,,S 4 | 894,2,"Myles, Mr. Thomas Francis",male,62,0,0,240276,9.6875,,Q 5 | 895,3,"Wirz, Mr. Albert",male,27,0,0,315154,8.6625,,S 6 | 896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22,1,1,3101298,12.2875,,S 7 | 897,3,"Svensson, Mr. Johan Cervin",male,14,0,0,7538,9.225,,S 8 | 898,3,"Connolly, Miss. Kate",female,30,0,0,330972,7.6292,,Q 9 | 899,2,"Caldwell, Mr. Albert Francis",male,26,1,1,248738,29,,S 10 | 900,3,"Abrahim, Mrs. Joseph (Sophie Halaut Easu)",female,18,0,0,2657,7.2292,,C 11 | 901,3,"Davies, Mr. John Samuel",male,21,2,0,A/4 48871,24.15,,S 12 | 902,3,"Ilieff, Mr. Ylio",male,,0,0,349220,7.8958,,S 13 | 903,1,"Jones, Mr. Charles Cresson",male,46,0,0,694,26,,S 14 | 904,1,"Snyder, Mrs. John Pillsbury (Nelle Stevenson)",female,23,1,0,21228,82.2667,B45,S 15 | 905,2,"Howard, Mr. Benjamin",male,63,1,0,24065,26,,S 16 | 906,1,"Chaffee, Mrs. Herbert Fuller (Carrie Constance Toogood)",female,47,1,0,W.E.P. 5734,61.175,E31,S 17 | 907,2,"del Carlo, Mrs. Sebastiano (Argenia Genovesi)",female,24,1,0,SC/PARIS 2167,27.7208,,C 18 | 908,2,"Keane, Mr. Daniel",male,35,0,0,233734,12.35,,Q 19 | 909,3,"Assaf, Mr. Gerios",male,21,0,0,2692,7.225,,C 20 | 910,3,"Ilmakangas, Miss. Ida Livija",female,27,1,0,STON/O2. 3101270,7.925,,S 21 | 911,3,"Assaf Khalil, Mrs. Mariana (Miriam"")""",female,45,0,0,2696,7.225,,C 22 | 912,1,"Rothschild, Mr. Martin",male,55,1,0,PC 17603,59.4,,C 23 | 913,3,"Olsen, Master. Artur Karl",male,9,0,1,C 17368,3.1708,,S 24 | 914,1,"Flegenheim, Mrs. Alfred (Antoinette)",female,,0,0,PC 17598,31.6833,,S 25 | 915,1,"Williams, Mr. Richard Norris II",male,21,0,1,PC 17597,61.3792,,C 26 | 916,1,"Ryerson, Mrs. Arthur Larned (Emily Maria Borie)",female,48,1,3,PC 17608,262.375,B57 B59 B63 B66,C 27 | 917,3,"Robins, Mr. Alexander A",male,50,1,0,A/5. 3337,14.5,,S 28 | 918,1,"Ostby, Miss. Helene Ragnhild",female,22,0,1,113509,61.9792,B36,C 29 | 919,3,"Daher, Mr. Shedid",male,22.5,0,0,2698,7.225,,C 30 | 920,1,"Brady, Mr. John Bertram",male,41,0,0,113054,30.5,A21,S 31 | 921,3,"Samaan, Mr. Elias",male,,2,0,2662,21.6792,,C 32 | 922,2,"Louch, Mr. Charles Alexander",male,50,1,0,SC/AH 3085,26,,S 33 | 923,2,"Jefferys, Mr. Clifford Thomas",male,24,2,0,C.A. 31029,31.5,,S 34 | 924,3,"Dean, Mrs. Bertram (Eva Georgetta Light)",female,33,1,2,C.A. 2315,20.575,,S 35 | 925,3,"Johnston, Mrs. Andrew G (Elizabeth Lily"" Watson)""",female,,1,2,W./C. 6607,23.45,,S 36 | 926,1,"Mock, Mr. Philipp Edmund",male,30,1,0,13236,57.75,C78,C 37 | 927,3,"Katavelas, Mr. Vassilios (Catavelas Vassilios"")""",male,18.5,0,0,2682,7.2292,,C 38 | 928,3,"Roth, Miss. Sarah A",female,,0,0,342712,8.05,,S 39 | 929,3,"Cacic, Miss. Manda",female,21,0,0,315087,8.6625,,S 40 | 930,3,"Sap, Mr. Julius",male,25,0,0,345768,9.5,,S 41 | 931,3,"Hee, Mr. Ling",male,,0,0,1601,56.4958,,S 42 | 932,3,"Karun, Mr. Franz",male,39,0,1,349256,13.4167,,C 43 | 933,1,"Franklin, Mr. Thomas Parham",male,,0,0,113778,26.55,D34,S 44 | 934,3,"Goldsmith, Mr. Nathan",male,41,0,0,SOTON/O.Q. 3101263,7.85,,S 45 | 935,2,"Corbett, Mrs. Walter H (Irene Colvin)",female,30,0,0,237249,13,,S 46 | 936,1,"Kimball, Mrs. Edwin Nelson Jr (Gertrude Parsons)",female,45,1,0,11753,52.5542,D19,S 47 | 937,3,"Peltomaki, Mr. Nikolai Johannes",male,25,0,0,STON/O 2. 3101291,7.925,,S 48 | 938,1,"Chevre, Mr. Paul Romaine",male,45,0,0,PC 17594,29.7,A9,C 49 | 939,3,"Shaughnessy, Mr. Patrick",male,,0,0,370374,7.75,,Q 50 | 940,1,"Bucknell, Mrs. William Robert (Emma Eliza Ward)",female,60,0,0,11813,76.2917,D15,C 51 | 941,3,"Coutts, Mrs. William (Winnie Minnie"" Treanor)""",female,36,0,2,C.A. 37671,15.9,,S 52 | 942,1,"Smith, Mr. Lucien Philip",male,24,1,0,13695,60,C31,S 53 | 943,2,"Pulbaum, Mr. Franz",male,27,0,0,SC/PARIS 2168,15.0333,,C 54 | 944,2,"Hocking, Miss. Ellen Nellie""""",female,20,2,1,29105,23,,S 55 | 945,1,"Fortune, Miss. Ethel Flora",female,28,3,2,19950,263,C23 C25 C27,S 56 | 946,2,"Mangiavacchi, Mr. Serafino Emilio",male,,0,0,SC/A.3 2861,15.5792,,C 57 | 947,3,"Rice, Master. Albert",male,10,4,1,382652,29.125,,Q 58 | 948,3,"Cor, Mr. Bartol",male,35,0,0,349230,7.8958,,S 59 | 949,3,"Abelseth, Mr. Olaus Jorgensen",male,25,0,0,348122,7.65,F G63,S 60 | 950,3,"Davison, Mr. Thomas Henry",male,,1,0,386525,16.1,,S 61 | 951,1,"Chaudanson, Miss. Victorine",female,36,0,0,PC 17608,262.375,B61,C 62 | 952,3,"Dika, Mr. Mirko",male,17,0,0,349232,7.8958,,S 63 | 953,2,"McCrae, Mr. Arthur Gordon",male,32,0,0,237216,13.5,,S 64 | 954,3,"Bjorklund, Mr. Ernst Herbert",male,18,0,0,347090,7.75,,S 65 | 955,3,"Bradley, Miss. Bridget Delia",female,22,0,0,334914,7.725,,Q 66 | 956,1,"Ryerson, Master. John Borie",male,13,2,2,PC 17608,262.375,B57 B59 B63 B66,C 67 | 957,2,"Corey, Mrs. Percy C (Mary Phyllis Elizabeth Miller)",female,,0,0,F.C.C. 13534,21,,S 68 | 958,3,"Burns, Miss. Mary Delia",female,18,0,0,330963,7.8792,,Q 69 | 959,1,"Moore, Mr. Clarence Bloomfield",male,47,0,0,113796,42.4,,S 70 | 960,1,"Tucker, Mr. Gilbert Milligan Jr",male,31,0,0,2543,28.5375,C53,C 71 | 961,1,"Fortune, Mrs. Mark (Mary McDougald)",female,60,1,4,19950,263,C23 C25 C27,S 72 | 962,3,"Mulvihill, Miss. Bertha E",female,24,0,0,382653,7.75,,Q 73 | 963,3,"Minkoff, Mr. Lazar",male,21,0,0,349211,7.8958,,S 74 | 964,3,"Nieminen, Miss. Manta Josefina",female,29,0,0,3101297,7.925,,S 75 | 965,1,"Ovies y Rodriguez, Mr. Servando",male,28.5,0,0,PC 17562,27.7208,D43,C 76 | 966,1,"Geiger, Miss. Amalie",female,35,0,0,113503,211.5,C130,C 77 | 967,1,"Keeping, Mr. Edwin",male,32.5,0,0,113503,211.5,C132,C 78 | 968,3,"Miles, Mr. Frank",male,,0,0,359306,8.05,,S 79 | 969,1,"Cornell, Mrs. Robert Clifford (Malvina Helen Lamson)",female,55,2,0,11770,25.7,C101,S 80 | 970,2,"Aldworth, Mr. Charles Augustus",male,30,0,0,248744,13,,S 81 | 971,3,"Doyle, Miss. Elizabeth",female,24,0,0,368702,7.75,,Q 82 | 972,3,"Boulos, Master. Akar",male,6,1,1,2678,15.2458,,C 83 | 973,1,"Straus, Mr. Isidor",male,67,1,0,PC 17483,221.7792,C55 C57,S 84 | 974,1,"Case, Mr. Howard Brown",male,49,0,0,19924,26,,S 85 | 975,3,"Demetri, Mr. Marinko",male,,0,0,349238,7.8958,,S 86 | 976,2,"Lamb, Mr. John Joseph",male,,0,0,240261,10.7083,,Q 87 | 977,3,"Khalil, Mr. Betros",male,,1,0,2660,14.4542,,C 88 | 978,3,"Barry, Miss. Julia",female,27,0,0,330844,7.8792,,Q 89 | 979,3,"Badman, Miss. Emily Louisa",female,18,0,0,A/4 31416,8.05,,S 90 | 980,3,"O'Donoghue, Ms. Bridget",female,,0,0,364856,7.75,,Q 91 | 981,2,"Wells, Master. Ralph Lester",male,2,1,1,29103,23,,S 92 | 982,3,"Dyker, Mrs. Adolf Fredrik (Anna Elisabeth Judith Andersson)",female,22,1,0,347072,13.9,,S 93 | 983,3,"Pedersen, Mr. Olaf",male,,0,0,345498,7.775,,S 94 | 984,1,"Davidson, Mrs. Thornton (Orian Hays)",female,27,1,2,F.C. 12750,52,B71,S 95 | 985,3,"Guest, Mr. Robert",male,,0,0,376563,8.05,,S 96 | 986,1,"Birnbaum, Mr. Jakob",male,25,0,0,13905,26,,C 97 | 987,3,"Tenglin, Mr. Gunnar Isidor",male,25,0,0,350033,7.7958,,S 98 | 988,1,"Cavendish, Mrs. Tyrell William (Julia Florence Siegel)",female,76,1,0,19877,78.85,C46,S 99 | 989,3,"Makinen, Mr. Kalle Edvard",male,29,0,0,STON/O 2. 3101268,7.925,,S 100 | 990,3,"Braf, Miss. Elin Ester Maria",female,20,0,0,347471,7.8542,,S 101 | 991,3,"Nancarrow, Mr. William Henry",male,33,0,0,A./5. 3338,8.05,,S 102 | 992,1,"Stengel, Mrs. Charles Emil Henry (Annie May Morris)",female,43,1,0,11778,55.4417,C116,C 103 | 993,2,"Weisz, Mr. Leopold",male,27,1,0,228414,26,,S 104 | 994,3,"Foley, Mr. William",male,,0,0,365235,7.75,,Q 105 | 995,3,"Johansson Palmquist, Mr. Oskar Leander",male,26,0,0,347070,7.775,,S 106 | 996,3,"Thomas, Mrs. Alexander (Thamine Thelma"")""",female,16,1,1,2625,8.5167,,C 107 | 997,3,"Holthen, Mr. Johan Martin",male,28,0,0,C 4001,22.525,,S 108 | 998,3,"Buckley, Mr. Daniel",male,21,0,0,330920,7.8208,,Q 109 | 999,3,"Ryan, Mr. Edward",male,,0,0,383162,7.75,,Q 110 | 1000,3,"Willer, Mr. Aaron (Abi Weller"")""",male,,0,0,3410,8.7125,,S 111 | 1001,2,"Swane, Mr. George",male,18.5,0,0,248734,13,F,S 112 | 1002,2,"Stanton, Mr. Samuel Ward",male,41,0,0,237734,15.0458,,C 113 | 1003,3,"Shine, Miss. Ellen Natalia",female,,0,0,330968,7.7792,,Q 114 | 1004,1,"Evans, Miss. Edith Corse",female,36,0,0,PC 17531,31.6792,A29,C 115 | 1005,3,"Buckley, Miss. Katherine",female,18.5,0,0,329944,7.2833,,Q 116 | 1006,1,"Straus, Mrs. Isidor (Rosalie Ida Blun)",female,63,1,0,PC 17483,221.7792,C55 C57,S 117 | 1007,3,"Chronopoulos, Mr. Demetrios",male,18,1,0,2680,14.4542,,C 118 | 1008,3,"Thomas, Mr. John",male,,0,0,2681,6.4375,,C 119 | 1009,3,"Sandstrom, Miss. Beatrice Irene",female,1,1,1,PP 9549,16.7,G6,S 120 | 1010,1,"Beattie, Mr. Thomson",male,36,0,0,13050,75.2417,C6,C 121 | 1011,2,"Chapman, Mrs. John Henry (Sara Elizabeth Lawry)",female,29,1,0,SC/AH 29037,26,,S 122 | 1012,2,"Watt, Miss. Bertha J",female,12,0,0,C.A. 33595,15.75,,S 123 | 1013,3,"Kiernan, Mr. John",male,,1,0,367227,7.75,,Q 124 | 1014,1,"Schabert, Mrs. Paul (Emma Mock)",female,35,1,0,13236,57.75,C28,C 125 | 1015,3,"Carver, Mr. Alfred John",male,28,0,0,392095,7.25,,S 126 | 1016,3,"Kennedy, Mr. John",male,,0,0,368783,7.75,,Q 127 | 1017,3,"Cribb, Miss. Laura Alice",female,17,0,1,371362,16.1,,S 128 | 1018,3,"Brobeck, Mr. Karl Rudolf",male,22,0,0,350045,7.7958,,S 129 | 1019,3,"McCoy, Miss. Alicia",female,,2,0,367226,23.25,,Q 130 | 1020,2,"Bowenur, Mr. Solomon",male,42,0,0,211535,13,,S 131 | 1021,3,"Petersen, Mr. Marius",male,24,0,0,342441,8.05,,S 132 | 1022,3,"Spinner, Mr. Henry John",male,32,0,0,STON/OQ. 369943,8.05,,S 133 | 1023,1,"Gracie, Col. Archibald IV",male,53,0,0,113780,28.5,C51,C 134 | 1024,3,"Lefebre, Mrs. Frank (Frances)",female,,0,4,4133,25.4667,,S 135 | 1025,3,"Thomas, Mr. Charles P",male,,1,0,2621,6.4375,,C 136 | 1026,3,"Dintcheff, Mr. Valtcho",male,43,0,0,349226,7.8958,,S 137 | 1027,3,"Carlsson, Mr. Carl Robert",male,24,0,0,350409,7.8542,,S 138 | 1028,3,"Zakarian, Mr. Mapriededer",male,26.5,0,0,2656,7.225,,C 139 | 1029,2,"Schmidt, Mr. August",male,26,0,0,248659,13,,S 140 | 1030,3,"Drapkin, Miss. Jennie",female,23,0,0,SOTON/OQ 392083,8.05,,S 141 | 1031,3,"Goodwin, Mr. Charles Frederick",male,40,1,6,CA 2144,46.9,,S 142 | 1032,3,"Goodwin, Miss. Jessie Allis",female,10,5,2,CA 2144,46.9,,S 143 | 1033,1,"Daniels, Miss. Sarah",female,33,0,0,113781,151.55,,S 144 | 1034,1,"Ryerson, Mr. Arthur Larned",male,61,1,3,PC 17608,262.375,B57 B59 B63 B66,C 145 | 1035,2,"Beauchamp, Mr. Henry James",male,28,0,0,244358,26,,S 146 | 1036,1,"Lindeberg-Lind, Mr. Erik Gustaf (Mr Edward Lingrey"")""",male,42,0,0,17475,26.55,,S 147 | 1037,3,"Vander Planke, Mr. Julius",male,31,3,0,345763,18,,S 148 | 1038,1,"Hilliard, Mr. Herbert Henry",male,,0,0,17463,51.8625,E46,S 149 | 1039,3,"Davies, Mr. Evan",male,22,0,0,SC/A4 23568,8.05,,S 150 | 1040,1,"Crafton, Mr. John Bertram",male,,0,0,113791,26.55,,S 151 | 1041,2,"Lahtinen, Rev. William",male,30,1,1,250651,26,,S 152 | 1042,1,"Earnshaw, Mrs. Boulton (Olive Potter)",female,23,0,1,11767,83.1583,C54,C 153 | 1043,3,"Matinoff, Mr. Nicola",male,,0,0,349255,7.8958,,C 154 | 1044,3,"Storey, Mr. Thomas",male,60.5,0,0,3701,,,S 155 | 1045,3,"Klasen, Mrs. (Hulda Kristina Eugenia Lofqvist)",female,36,0,2,350405,12.1833,,S 156 | 1046,3,"Asplund, Master. Filip Oscar",male,13,4,2,347077,31.3875,,S 157 | 1047,3,"Duquemin, Mr. Joseph",male,24,0,0,S.O./P.P. 752,7.55,,S 158 | 1048,1,"Bird, Miss. Ellen",female,29,0,0,PC 17483,221.7792,C97,S 159 | 1049,3,"Lundin, Miss. Olga Elida",female,23,0,0,347469,7.8542,,S 160 | 1050,1,"Borebank, Mr. John James",male,42,0,0,110489,26.55,D22,S 161 | 1051,3,"Peacock, Mrs. Benjamin (Edith Nile)",female,26,0,2,SOTON/O.Q. 3101315,13.775,,S 162 | 1052,3,"Smyth, Miss. Julia",female,,0,0,335432,7.7333,,Q 163 | 1053,3,"Touma, Master. Georges Youssef",male,7,1,1,2650,15.2458,,C 164 | 1054,2,"Wright, Miss. Marion",female,26,0,0,220844,13.5,,S 165 | 1055,3,"Pearce, Mr. Ernest",male,,0,0,343271,7,,S 166 | 1056,2,"Peruschitz, Rev. Joseph Maria",male,41,0,0,237393,13,,S 167 | 1057,3,"Kink-Heilmann, Mrs. Anton (Luise Heilmann)",female,26,1,1,315153,22.025,,S 168 | 1058,1,"Brandeis, Mr. Emil",male,48,0,0,PC 17591,50.4958,B10,C 169 | 1059,3,"Ford, Mr. Edward Watson",male,18,2,2,W./C. 6608,34.375,,S 170 | 1060,1,"Cassebeer, Mrs. Henry Arthur Jr (Eleanor Genevieve Fosdick)",female,,0,0,17770,27.7208,,C 171 | 1061,3,"Hellstrom, Miss. Hilda Maria",female,22,0,0,7548,8.9625,,S 172 | 1062,3,"Lithman, Mr. Simon",male,,0,0,S.O./P.P. 251,7.55,,S 173 | 1063,3,"Zakarian, Mr. Ortin",male,27,0,0,2670,7.225,,C 174 | 1064,3,"Dyker, Mr. Adolf Fredrik",male,23,1,0,347072,13.9,,S 175 | 1065,3,"Torfa, Mr. Assad",male,,0,0,2673,7.2292,,C 176 | 1066,3,"Asplund, Mr. Carl Oscar Vilhelm Gustafsson",male,40,1,5,347077,31.3875,,S 177 | 1067,2,"Brown, Miss. Edith Eileen",female,15,0,2,29750,39,,S 178 | 1068,2,"Sincock, Miss. Maude",female,20,0,0,C.A. 33112,36.75,,S 179 | 1069,1,"Stengel, Mr. Charles Emil Henry",male,54,1,0,11778,55.4417,C116,C 180 | 1070,2,"Becker, Mrs. Allen Oliver (Nellie E Baumgardner)",female,36,0,3,230136,39,F4,S 181 | 1071,1,"Compton, Mrs. Alexander Taylor (Mary Eliza Ingersoll)",female,64,0,2,PC 17756,83.1583,E45,C 182 | 1072,2,"McCrie, Mr. James Matthew",male,30,0,0,233478,13,,S 183 | 1073,1,"Compton, Mr. Alexander Taylor Jr",male,37,1,1,PC 17756,83.1583,E52,C 184 | 1074,1,"Marvin, Mrs. Daniel Warner (Mary Graham Carmichael Farquarson)",female,18,1,0,113773,53.1,D30,S 185 | 1075,3,"Lane, Mr. Patrick",male,,0,0,7935,7.75,,Q 186 | 1076,1,"Douglas, Mrs. Frederick Charles (Mary Helene Baxter)",female,27,1,1,PC 17558,247.5208,B58 B60,C 187 | 1077,2,"Maybery, Mr. Frank Hubert",male,40,0,0,239059,16,,S 188 | 1078,2,"Phillips, Miss. Alice Frances Louisa",female,21,0,1,S.O./P.P. 2,21,,S 189 | 1079,3,"Davies, Mr. Joseph",male,17,2,0,A/4 48873,8.05,,S 190 | 1080,3,"Sage, Miss. Ada",female,,8,2,CA. 2343,69.55,,S 191 | 1081,2,"Veal, Mr. James",male,40,0,0,28221,13,,S 192 | 1082,2,"Angle, Mr. William A",male,34,1,0,226875,26,,S 193 | 1083,1,"Salomon, Mr. Abraham L",male,,0,0,111163,26,,S 194 | 1084,3,"van Billiard, Master. Walter John",male,11.5,1,1,A/5. 851,14.5,,S 195 | 1085,2,"Lingane, Mr. John",male,61,0,0,235509,12.35,,Q 196 | 1086,2,"Drew, Master. Marshall Brines",male,8,0,2,28220,32.5,,S 197 | 1087,3,"Karlsson, Mr. Julius Konrad Eugen",male,33,0,0,347465,7.8542,,S 198 | 1088,1,"Spedden, Master. Robert Douglas",male,6,0,2,16966,134.5,E34,C 199 | 1089,3,"Nilsson, Miss. Berta Olivia",female,18,0,0,347066,7.775,,S 200 | 1090,2,"Baimbrigge, Mr. Charles Robert",male,23,0,0,C.A. 31030,10.5,,S 201 | 1091,3,"Rasmussen, Mrs. (Lena Jacobsen Solvang)",female,,0,0,65305,8.1125,,S 202 | 1092,3,"Murphy, Miss. Nora",female,,0,0,36568,15.5,,Q 203 | 1093,3,"Danbom, Master. Gilbert Sigvard Emanuel",male,0.33,0,2,347080,14.4,,S 204 | 1094,1,"Astor, Col. John Jacob",male,47,1,0,PC 17757,227.525,C62 C64,C 205 | 1095,2,"Quick, Miss. Winifred Vera",female,8,1,1,26360,26,,S 206 | 1096,2,"Andrew, Mr. Frank Thomas",male,25,0,0,C.A. 34050,10.5,,S 207 | 1097,1,"Omont, Mr. Alfred Fernand",male,,0,0,F.C. 12998,25.7417,,C 208 | 1098,3,"McGowan, Miss. Katherine",female,35,0,0,9232,7.75,,Q 209 | 1099,2,"Collett, Mr. Sidney C Stuart",male,24,0,0,28034,10.5,,S 210 | 1100,1,"Rosenbaum, Miss. Edith Louise",female,33,0,0,PC 17613,27.7208,A11,C 211 | 1101,3,"Delalic, Mr. Redjo",male,25,0,0,349250,7.8958,,S 212 | 1102,3,"Andersen, Mr. Albert Karvin",male,32,0,0,C 4001,22.525,,S 213 | 1103,3,"Finoli, Mr. Luigi",male,,0,0,SOTON/O.Q. 3101308,7.05,,S 214 | 1104,2,"Deacon, Mr. Percy William",male,17,0,0,S.O.C. 14879,73.5,,S 215 | 1105,2,"Howard, Mrs. Benjamin (Ellen Truelove Arman)",female,60,1,0,24065,26,,S 216 | 1106,3,"Andersson, Miss. Ida Augusta Margareta",female,38,4,2,347091,7.775,,S 217 | 1107,1,"Head, Mr. Christopher",male,42,0,0,113038,42.5,B11,S 218 | 1108,3,"Mahon, Miss. Bridget Delia",female,,0,0,330924,7.8792,,Q 219 | 1109,1,"Wick, Mr. George Dennick",male,57,1,1,36928,164.8667,,S 220 | 1110,1,"Widener, Mrs. George Dunton (Eleanor Elkins)",female,50,1,1,113503,211.5,C80,C 221 | 1111,3,"Thomson, Mr. Alexander Morrison",male,,0,0,32302,8.05,,S 222 | 1112,2,"Duran y More, Miss. Florentina",female,30,1,0,SC/PARIS 2148,13.8583,,C 223 | 1113,3,"Reynolds, Mr. Harold J",male,21,0,0,342684,8.05,,S 224 | 1114,2,"Cook, Mrs. (Selena Rogers)",female,22,0,0,W./C. 14266,10.5,F33,S 225 | 1115,3,"Karlsson, Mr. Einar Gervasius",male,21,0,0,350053,7.7958,,S 226 | 1116,1,"Candee, Mrs. Edward (Helen Churchill Hungerford)",female,53,0,0,PC 17606,27.4458,,C 227 | 1117,3,"Moubarek, Mrs. George (Omine Amenia"" Alexander)""",female,,0,2,2661,15.2458,,C 228 | 1118,3,"Asplund, Mr. Johan Charles",male,23,0,0,350054,7.7958,,S 229 | 1119,3,"McNeill, Miss. Bridget",female,,0,0,370368,7.75,,Q 230 | 1120,3,"Everett, Mr. Thomas James",male,40.5,0,0,C.A. 6212,15.1,,S 231 | 1121,2,"Hocking, Mr. Samuel James Metcalfe",male,36,0,0,242963,13,,S 232 | 1122,2,"Sweet, Mr. George Frederick",male,14,0,0,220845,65,,S 233 | 1123,1,"Willard, Miss. Constance",female,21,0,0,113795,26.55,,S 234 | 1124,3,"Wiklund, Mr. Karl Johan",male,21,1,0,3101266,6.4958,,S 235 | 1125,3,"Linehan, Mr. Michael",male,,0,0,330971,7.8792,,Q 236 | 1126,1,"Cumings, Mr. John Bradley",male,39,1,0,PC 17599,71.2833,C85,C 237 | 1127,3,"Vendel, Mr. Olof Edvin",male,20,0,0,350416,7.8542,,S 238 | 1128,1,"Warren, Mr. Frank Manley",male,64,1,0,110813,75.25,D37,C 239 | 1129,3,"Baccos, Mr. Raffull",male,20,0,0,2679,7.225,,C 240 | 1130,2,"Hiltunen, Miss. Marta",female,18,1,1,250650,13,,S 241 | 1131,1,"Douglas, Mrs. Walter Donald (Mahala Dutton)",female,48,1,0,PC 17761,106.425,C86,C 242 | 1132,1,"Lindstrom, Mrs. Carl Johan (Sigrid Posse)",female,55,0,0,112377,27.7208,,C 243 | 1133,2,"Christy, Mrs. (Alice Frances)",female,45,0,2,237789,30,,S 244 | 1134,1,"Spedden, Mr. Frederic Oakley",male,45,1,1,16966,134.5,E34,C 245 | 1135,3,"Hyman, Mr. Abraham",male,,0,0,3470,7.8875,,S 246 | 1136,3,"Johnston, Master. William Arthur Willie""""",male,,1,2,W./C. 6607,23.45,,S 247 | 1137,1,"Kenyon, Mr. Frederick R",male,41,1,0,17464,51.8625,D21,S 248 | 1138,2,"Karnes, Mrs. J Frank (Claire Bennett)",female,22,0,0,F.C.C. 13534,21,,S 249 | 1139,2,"Drew, Mr. James Vivian",male,42,1,1,28220,32.5,,S 250 | 1140,2,"Hold, Mrs. Stephen (Annie Margaret Hill)",female,29,1,0,26707,26,,S 251 | 1141,3,"Khalil, Mrs. Betros (Zahie Maria"" Elias)""",female,,1,0,2660,14.4542,,C 252 | 1142,2,"West, Miss. Barbara J",female,0.92,1,2,C.A. 34651,27.75,,S 253 | 1143,3,"Abrahamsson, Mr. Abraham August Johannes",male,20,0,0,SOTON/O2 3101284,7.925,,S 254 | 1144,1,"Clark, Mr. Walter Miller",male,27,1,0,13508,136.7792,C89,C 255 | 1145,3,"Salander, Mr. Karl Johan",male,24,0,0,7266,9.325,,S 256 | 1146,3,"Wenzel, Mr. Linhart",male,32.5,0,0,345775,9.5,,S 257 | 1147,3,"MacKay, Mr. George William",male,,0,0,C.A. 42795,7.55,,S 258 | 1148,3,"Mahon, Mr. John",male,,0,0,AQ/4 3130,7.75,,Q 259 | 1149,3,"Niklasson, Mr. Samuel",male,28,0,0,363611,8.05,,S 260 | 1150,2,"Bentham, Miss. Lilian W",female,19,0,0,28404,13,,S 261 | 1151,3,"Midtsjo, Mr. Karl Albert",male,21,0,0,345501,7.775,,S 262 | 1152,3,"de Messemaeker, Mr. Guillaume Joseph",male,36.5,1,0,345572,17.4,,S 263 | 1153,3,"Nilsson, Mr. August Ferdinand",male,21,0,0,350410,7.8542,,S 264 | 1154,2,"Wells, Mrs. Arthur Henry (Addie"" Dart Trevaskis)""",female,29,0,2,29103,23,,S 265 | 1155,3,"Klasen, Miss. Gertrud Emilia",female,1,1,1,350405,12.1833,,S 266 | 1156,2,"Portaluppi, Mr. Emilio Ilario Giuseppe",male,30,0,0,C.A. 34644,12.7375,,C 267 | 1157,3,"Lyntakoff, Mr. Stanko",male,,0,0,349235,7.8958,,S 268 | 1158,1,"Chisholm, Mr. Roderick Robert Crispin",male,,0,0,112051,0,,S 269 | 1159,3,"Warren, Mr. Charles William",male,,0,0,C.A. 49867,7.55,,S 270 | 1160,3,"Howard, Miss. May Elizabeth",female,,0,0,A. 2. 39186,8.05,,S 271 | 1161,3,"Pokrnic, Mr. Mate",male,17,0,0,315095,8.6625,,S 272 | 1162,1,"McCaffry, Mr. Thomas Francis",male,46,0,0,13050,75.2417,C6,C 273 | 1163,3,"Fox, Mr. Patrick",male,,0,0,368573,7.75,,Q 274 | 1164,1,"Clark, Mrs. Walter Miller (Virginia McDowell)",female,26,1,0,13508,136.7792,C89,C 275 | 1165,3,"Lennon, Miss. Mary",female,,1,0,370371,15.5,,Q 276 | 1166,3,"Saade, Mr. Jean Nassr",male,,0,0,2676,7.225,,C 277 | 1167,2,"Bryhl, Miss. Dagmar Jenny Ingeborg ",female,20,1,0,236853,26,,S 278 | 1168,2,"Parker, Mr. Clifford Richard",male,28,0,0,SC 14888,10.5,,S 279 | 1169,2,"Faunthorpe, Mr. Harry",male,40,1,0,2926,26,,S 280 | 1170,2,"Ware, Mr. John James",male,30,1,0,CA 31352,21,,S 281 | 1171,2,"Oxenham, Mr. Percy Thomas",male,22,0,0,W./C. 14260,10.5,,S 282 | 1172,3,"Oreskovic, Miss. Jelka",female,23,0,0,315085,8.6625,,S 283 | 1173,3,"Peacock, Master. Alfred Edward",male,0.75,1,1,SOTON/O.Q. 3101315,13.775,,S 284 | 1174,3,"Fleming, Miss. Honora",female,,0,0,364859,7.75,,Q 285 | 1175,3,"Touma, Miss. Maria Youssef",female,9,1,1,2650,15.2458,,C 286 | 1176,3,"Rosblom, Miss. Salli Helena",female,2,1,1,370129,20.2125,,S 287 | 1177,3,"Dennis, Mr. William",male,36,0,0,A/5 21175,7.25,,S 288 | 1178,3,"Franklin, Mr. Charles (Charles Fardon)",male,,0,0,SOTON/O.Q. 3101314,7.25,,S 289 | 1179,1,"Snyder, Mr. John Pillsbury",male,24,1,0,21228,82.2667,B45,S 290 | 1180,3,"Mardirosian, Mr. Sarkis",male,,0,0,2655,7.2292,F E46,C 291 | 1181,3,"Ford, Mr. Arthur",male,,0,0,A/5 1478,8.05,,S 292 | 1182,1,"Rheims, Mr. George Alexander Lucien",male,,0,0,PC 17607,39.6,,S 293 | 1183,3,"Daly, Miss. Margaret Marcella Maggie""""",female,30,0,0,382650,6.95,,Q 294 | 1184,3,"Nasr, Mr. Mustafa",male,,0,0,2652,7.2292,,C 295 | 1185,1,"Dodge, Dr. Washington",male,53,1,1,33638,81.8583,A34,S 296 | 1186,3,"Wittevrongel, Mr. Camille",male,36,0,0,345771,9.5,,S 297 | 1187,3,"Angheloff, Mr. Minko",male,26,0,0,349202,7.8958,,S 298 | 1188,2,"Laroche, Miss. Louise",female,1,1,2,SC/Paris 2123,41.5792,,C 299 | 1189,3,"Samaan, Mr. Hanna",male,,2,0,2662,21.6792,,C 300 | 1190,1,"Loring, Mr. Joseph Holland",male,30,0,0,113801,45.5,,S 301 | 1191,3,"Johansson, Mr. Nils",male,29,0,0,347467,7.8542,,S 302 | 1192,3,"Olsson, Mr. Oscar Wilhelm",male,32,0,0,347079,7.775,,S 303 | 1193,2,"Malachard, Mr. Noel",male,,0,0,237735,15.0458,D,C 304 | 1194,2,"Phillips, Mr. Escott Robert",male,43,0,1,S.O./P.P. 2,21,,S 305 | 1195,3,"Pokrnic, Mr. Tome",male,24,0,0,315092,8.6625,,S 306 | 1196,3,"McCarthy, Miss. Catherine Katie""""",female,,0,0,383123,7.75,,Q 307 | 1197,1,"Crosby, Mrs. Edward Gifford (Catherine Elizabeth Halstead)",female,64,1,1,112901,26.55,B26,S 308 | 1198,1,"Allison, Mr. Hudson Joshua Creighton",male,30,1,2,113781,151.55,C22 C26,S 309 | 1199,3,"Aks, Master. Philip Frank",male,0.83,0,1,392091,9.35,,S 310 | 1200,1,"Hays, Mr. Charles Melville",male,55,1,1,12749,93.5,B69,S 311 | 1201,3,"Hansen, Mrs. Claus Peter (Jennie L Howard)",female,45,1,0,350026,14.1083,,S 312 | 1202,3,"Cacic, Mr. Jego Grga",male,18,0,0,315091,8.6625,,S 313 | 1203,3,"Vartanian, Mr. David",male,22,0,0,2658,7.225,,C 314 | 1204,3,"Sadowitz, Mr. Harry",male,,0,0,LP 1588,7.575,,S 315 | 1205,3,"Carr, Miss. Jeannie",female,37,0,0,368364,7.75,,Q 316 | 1206,1,"White, Mrs. John Stuart (Ella Holmes)",female,55,0,0,PC 17760,135.6333,C32,C 317 | 1207,3,"Hagardon, Miss. Kate",female,17,0,0,AQ/3. 30631,7.7333,,Q 318 | 1208,1,"Spencer, Mr. William Augustus",male,57,1,0,PC 17569,146.5208,B78,C 319 | 1209,2,"Rogers, Mr. Reginald Harry",male,19,0,0,28004,10.5,,S 320 | 1210,3,"Jonsson, Mr. Nils Hilding",male,27,0,0,350408,7.8542,,S 321 | 1211,2,"Jefferys, Mr. Ernest Wilfred",male,22,2,0,C.A. 31029,31.5,,S 322 | 1212,3,"Andersson, Mr. Johan Samuel",male,26,0,0,347075,7.775,,S 323 | 1213,3,"Krekorian, Mr. Neshan",male,25,0,0,2654,7.2292,F E57,C 324 | 1214,2,"Nesson, Mr. Israel",male,26,0,0,244368,13,F2,S 325 | 1215,1,"Rowe, Mr. Alfred G",male,33,0,0,113790,26.55,,S 326 | 1216,1,"Kreuchen, Miss. Emilie",female,39,0,0,24160,211.3375,,S 327 | 1217,3,"Assam, Mr. Ali",male,23,0,0,SOTON/O.Q. 3101309,7.05,,S 328 | 1218,2,"Becker, Miss. Ruth Elizabeth",female,12,2,1,230136,39,F4,S 329 | 1219,1,"Rosenshine, Mr. George (Mr George Thorne"")""",male,46,0,0,PC 17585,79.2,,C 330 | 1220,2,"Clarke, Mr. Charles Valentine",male,29,1,0,2003,26,,S 331 | 1221,2,"Enander, Mr. Ingvar",male,21,0,0,236854,13,,S 332 | 1222,2,"Davies, Mrs. John Morgan (Elizabeth Agnes Mary White) ",female,48,0,2,C.A. 33112,36.75,,S 333 | 1223,1,"Dulles, Mr. William Crothers",male,39,0,0,PC 17580,29.7,A18,C 334 | 1224,3,"Thomas, Mr. Tannous",male,,0,0,2684,7.225,,C 335 | 1225,3,"Nakid, Mrs. Said (Waika Mary"" Mowad)""",female,19,1,1,2653,15.7417,,C 336 | 1226,3,"Cor, Mr. Ivan",male,27,0,0,349229,7.8958,,S 337 | 1227,1,"Maguire, Mr. John Edward",male,30,0,0,110469,26,C106,S 338 | 1228,2,"de Brito, Mr. Jose Joaquim",male,32,0,0,244360,13,,S 339 | 1229,3,"Elias, Mr. Joseph",male,39,0,2,2675,7.2292,,C 340 | 1230,2,"Denbury, Mr. Herbert",male,25,0,0,C.A. 31029,31.5,,S 341 | 1231,3,"Betros, Master. Seman",male,,0,0,2622,7.2292,,C 342 | 1232,2,"Fillbrook, Mr. Joseph Charles",male,18,0,0,C.A. 15185,10.5,,S 343 | 1233,3,"Lundstrom, Mr. Thure Edvin",male,32,0,0,350403,7.5792,,S 344 | 1234,3,"Sage, Mr. John George",male,,1,9,CA. 2343,69.55,,S 345 | 1235,1,"Cardeza, Mrs. James Warburton Martinez (Charlotte Wardle Drake)",female,58,0,1,PC 17755,512.3292,B51 B53 B55,C 346 | 1236,3,"van Billiard, Master. James William",male,,1,1,A/5. 851,14.5,,S 347 | 1237,3,"Abelseth, Miss. Karen Marie",female,16,0,0,348125,7.65,,S 348 | 1238,2,"Botsford, Mr. William Hull",male,26,0,0,237670,13,,S 349 | 1239,3,"Whabee, Mrs. George Joseph (Shawneene Abi-Saab)",female,38,0,0,2688,7.2292,,C 350 | 1240,2,"Giles, Mr. Ralph",male,24,0,0,248726,13.5,,S 351 | 1241,2,"Walcroft, Miss. Nellie",female,31,0,0,F.C.C. 13528,21,,S 352 | 1242,1,"Greenfield, Mrs. Leo David (Blanche Strouse)",female,45,0,1,PC 17759,63.3583,D10 D12,C 353 | 1243,2,"Stokes, Mr. Philip Joseph",male,25,0,0,F.C.C. 13540,10.5,,S 354 | 1244,2,"Dibden, Mr. William",male,18,0,0,S.O.C. 14879,73.5,,S 355 | 1245,2,"Herman, Mr. Samuel",male,49,1,2,220845,65,,S 356 | 1246,3,"Dean, Miss. Elizabeth Gladys Millvina""""",female,0.17,1,2,C.A. 2315,20.575,,S 357 | 1247,1,"Julian, Mr. Henry Forbes",male,50,0,0,113044,26,E60,S 358 | 1248,1,"Brown, Mrs. John Murray (Caroline Lane Lamson)",female,59,2,0,11769,51.4792,C101,S 359 | 1249,3,"Lockyer, Mr. Edward",male,,0,0,1222,7.8792,,S 360 | 1250,3,"O'Keefe, Mr. Patrick",male,,0,0,368402,7.75,,Q 361 | 1251,3,"Lindell, Mrs. Edvard Bengtsson (Elin Gerda Persson)",female,30,1,0,349910,15.55,,S 362 | 1252,3,"Sage, Master. William Henry",male,14.5,8,2,CA. 2343,69.55,,S 363 | 1253,2,"Mallet, Mrs. Albert (Antoinette Magnin)",female,24,1,1,S.C./PARIS 2079,37.0042,,C 364 | 1254,2,"Ware, Mrs. John James (Florence Louise Long)",female,31,0,0,CA 31352,21,,S 365 | 1255,3,"Strilic, Mr. Ivan",male,27,0,0,315083,8.6625,,S 366 | 1256,1,"Harder, Mrs. George Achilles (Dorothy Annan)",female,25,1,0,11765,55.4417,E50,C 367 | 1257,3,"Sage, Mrs. John (Annie Bullen)",female,,1,9,CA. 2343,69.55,,S 368 | 1258,3,"Caram, Mr. Joseph",male,,1,0,2689,14.4583,,C 369 | 1259,3,"Riihivouri, Miss. Susanna Juhantytar Sanni""""",female,22,0,0,3101295,39.6875,,S 370 | 1260,1,"Gibson, Mrs. Leonard (Pauline C Boeson)",female,45,0,1,112378,59.4,,C 371 | 1261,2,"Pallas y Castello, Mr. Emilio",male,29,0,0,SC/PARIS 2147,13.8583,,C 372 | 1262,2,"Giles, Mr. Edgar",male,21,1,0,28133,11.5,,S 373 | 1263,1,"Wilson, Miss. Helen Alice",female,31,0,0,16966,134.5,E39 E41,C 374 | 1264,1,"Ismay, Mr. Joseph Bruce",male,49,0,0,112058,0,B52 B54 B56,S 375 | 1265,2,"Harbeck, Mr. William H",male,44,0,0,248746,13,,S 376 | 1266,1,"Dodge, Mrs. Washington (Ruth Vidaver)",female,54,1,1,33638,81.8583,A34,S 377 | 1267,1,"Bowen, Miss. Grace Scott",female,45,0,0,PC 17608,262.375,,C 378 | 1268,3,"Kink, Miss. Maria",female,22,2,0,315152,8.6625,,S 379 | 1269,2,"Cotterill, Mr. Henry Harry""""",male,21,0,0,29107,11.5,,S 380 | 1270,1,"Hipkins, Mr. William Edward",male,55,0,0,680,50,C39,S 381 | 1271,3,"Asplund, Master. Carl Edgar",male,5,4,2,347077,31.3875,,S 382 | 1272,3,"O'Connor, Mr. Patrick",male,,0,0,366713,7.75,,Q 383 | 1273,3,"Foley, Mr. Joseph",male,26,0,0,330910,7.8792,,Q 384 | 1274,3,"Risien, Mrs. Samuel (Emma)",female,,0,0,364498,14.5,,S 385 | 1275,3,"McNamee, Mrs. Neal (Eileen O'Leary)",female,19,1,0,376566,16.1,,S 386 | 1276,2,"Wheeler, Mr. Edwin Frederick""""",male,,0,0,SC/PARIS 2159,12.875,,S 387 | 1277,2,"Herman, Miss. Kate",female,24,1,2,220845,65,,S 388 | 1278,3,"Aronsson, Mr. Ernst Axel Algot",male,24,0,0,349911,7.775,,S 389 | 1279,2,"Ashby, Mr. John",male,57,0,0,244346,13,,S 390 | 1280,3,"Canavan, Mr. Patrick",male,21,0,0,364858,7.75,,Q 391 | 1281,3,"Palsson, Master. Paul Folke",male,6,3,1,349909,21.075,,S 392 | 1282,1,"Payne, Mr. Vivian Ponsonby",male,23,0,0,12749,93.5,B24,S 393 | 1283,1,"Lines, Mrs. Ernest H (Elizabeth Lindsey James)",female,51,0,1,PC 17592,39.4,D28,S 394 | 1284,3,"Abbott, Master. Eugene Joseph",male,13,0,2,C.A. 2673,20.25,,S 395 | 1285,2,"Gilbert, Mr. William",male,47,0,0,C.A. 30769,10.5,,S 396 | 1286,3,"Kink-Heilmann, Mr. Anton",male,29,3,1,315153,22.025,,S 397 | 1287,1,"Smith, Mrs. Lucien Philip (Mary Eloise Hughes)",female,18,1,0,13695,60,C31,S 398 | 1288,3,"Colbert, Mr. Patrick",male,24,0,0,371109,7.25,,Q 399 | 1289,1,"Frolicher-Stehli, Mrs. Maxmillian (Margaretha Emerentia Stehli)",female,48,1,1,13567,79.2,B41,C 400 | 1290,3,"Larsson-Rondberg, Mr. Edvard A",male,22,0,0,347065,7.775,,S 401 | 1291,3,"Conlon, Mr. Thomas Henry",male,31,0,0,21332,7.7333,,Q 402 | 1292,1,"Bonnell, Miss. Caroline",female,30,0,0,36928,164.8667,C7,S 403 | 1293,2,"Gale, Mr. Harry",male,38,1,0,28664,21,,S 404 | 1294,1,"Gibson, Miss. Dorothy Winifred",female,22,0,1,112378,59.4,,C 405 | 1295,1,"Carrau, Mr. Jose Pedro",male,17,0,0,113059,47.1,,S 406 | 1296,1,"Frauenthal, Mr. Isaac Gerald",male,43,1,0,17765,27.7208,D40,C 407 | 1297,2,"Nourney, Mr. Alfred (Baron von Drachstedt"")""",male,20,0,0,SC/PARIS 2166,13.8625,D38,C 408 | 1298,2,"Ware, Mr. William Jeffery",male,23,1,0,28666,10.5,,S 409 | 1299,1,"Widener, Mr. George Dunton",male,50,1,1,113503,211.5,C80,C 410 | 1300,3,"Riordan, Miss. Johanna Hannah""""",female,,0,0,334915,7.7208,,Q 411 | 1301,3,"Peacock, Miss. Treasteall",female,3,1,1,SOTON/O.Q. 3101315,13.775,,S 412 | 1302,3,"Naughton, Miss. Hannah",female,,0,0,365237,7.75,,Q 413 | 1303,1,"Minahan, Mrs. William Edward (Lillian E Thorpe)",female,37,1,0,19928,90,C78,Q 414 | 1304,3,"Henriksson, Miss. Jenny Lovisa",female,28,0,0,347086,7.775,,S 415 | 1305,3,"Spector, Mr. Woolf",male,,0,0,A.5. 3236,8.05,,S 416 | 1306,1,"Oliva y Ocana, Dona. Fermina",female,39,0,0,PC 17758,108.9,C105,C 417 | 1307,3,"Saether, Mr. Simon Sivertsen",male,38.5,0,0,SOTON/O.Q. 3101262,7.25,,S 418 | 1308,3,"Ware, Mr. Frederick",male,,0,0,359309,8.05,,S 419 | 1309,3,"Peter, Master. Michael J",male,,1,1,2668,22.3583,,C 420 | -------------------------------------------------------------------------------- /dataset/train.csv: -------------------------------------------------------------------------------- 1 | PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked 2 | 1,0,3,"Braund, Mr. Owen Harris",male,22,1,0,A/5 21171,7.25,,S 3 | 2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Thayer)",female,38,1,0,PC 17599,71.2833,C85,C 4 | 3,1,3,"Heikkinen, Miss. Laina",female,26,0,0,STON/O2. 3101282,7.925,,S 5 | 4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35,1,0,113803,53.1,C123,S 6 | 5,0,3,"Allen, Mr. William Henry",male,35,0,0,373450,8.05,,S 7 | 6,0,3,"Moran, Mr. James",male,,0,0,330877,8.4583,,Q 8 | 7,0,1,"McCarthy, Mr. Timothy J",male,54,0,0,17463,51.8625,E46,S 9 | 8,0,3,"Palsson, Master. Gosta Leonard",male,2,3,1,349909,21.075,,S 10 | 9,1,3,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,27,0,2,347742,11.1333,,S 11 | 10,1,2,"Nasser, Mrs. Nicholas (Adele Achem)",female,14,1,0,237736,30.0708,,C 12 | 11,1,3,"Sandstrom, Miss. Marguerite Rut",female,4,1,1,PP 9549,16.7,G6,S 13 | 12,1,1,"Bonnell, Miss. Elizabeth",female,58,0,0,113783,26.55,C103,S 14 | 13,0,3,"Saundercock, Mr. William Henry",male,20,0,0,A/5. 2151,8.05,,S 15 | 14,0,3,"Andersson, Mr. Anders Johan",male,39,1,5,347082,31.275,,S 16 | 15,0,3,"Vestrom, Miss. Hulda Amanda Adolfina",female,14,0,0,350406,7.8542,,S 17 | 16,1,2,"Hewlett, Mrs. (Mary D Kingcome) ",female,55,0,0,248706,16,,S 18 | 17,0,3,"Rice, Master. Eugene",male,2,4,1,382652,29.125,,Q 19 | 18,1,2,"Williams, Mr. Charles Eugene",male,,0,0,244373,13,,S 20 | 19,0,3,"Vander Planke, Mrs. Julius (Emelia Maria Vandemoortele)",female,31,1,0,345763,18,,S 21 | 20,1,3,"Masselmani, Mrs. Fatima",female,,0,0,2649,7.225,,C 22 | 21,0,2,"Fynney, Mr. Joseph J",male,35,0,0,239865,26,,S 23 | 22,1,2,"Beesley, Mr. Lawrence",male,34,0,0,248698,13,D56,S 24 | 23,1,3,"McGowan, Miss. Anna ""Annie""",female,15,0,0,330923,8.0292,,Q 25 | 24,1,1,"Sloper, Mr. William Thompson",male,28,0,0,113788,35.5,A6,S 26 | 25,0,3,"Palsson, Miss. Torborg Danira",female,8,3,1,349909,21.075,,S 27 | 26,1,3,"Asplund, Mrs. Carl Oscar (Selma Augusta Emilia Johansson)",female,38,1,5,347077,31.3875,,S 28 | 27,0,3,"Emir, Mr. Farred Chehab",male,,0,0,2631,7.225,,C 29 | 28,0,1,"Fortune, Mr. Charles Alexander",male,19,3,2,19950,263,C23 C25 C27,S 30 | 29,1,3,"O'Dwyer, Miss. Ellen ""Nellie""",female,,0,0,330959,7.8792,,Q 31 | 30,0,3,"Todoroff, Mr. Lalio",male,,0,0,349216,7.8958,,S 32 | 31,0,1,"Uruchurtu, Don. Manuel E",male,40,0,0,PC 17601,27.7208,,C 33 | 32,1,1,"Spencer, Mrs. William Augustus (Marie Eugenie)",female,,1,0,PC 17569,146.5208,B78,C 34 | 33,1,3,"Glynn, Miss. Mary Agatha",female,,0,0,335677,7.75,,Q 35 | 34,0,2,"Wheadon, Mr. Edward H",male,66,0,0,C.A. 24579,10.5,,S 36 | 35,0,1,"Meyer, Mr. Edgar Joseph",male,28,1,0,PC 17604,82.1708,,C 37 | 36,0,1,"Holverson, Mr. Alexander Oskar",male,42,1,0,113789,52,,S 38 | 37,1,3,"Mamee, Mr. Hanna",male,,0,0,2677,7.2292,,C 39 | 38,0,3,"Cann, Mr. Ernest Charles",male,21,0,0,A./5. 2152,8.05,,S 40 | 39,0,3,"Vander Planke, Miss. Augusta Maria",female,18,2,0,345764,18,,S 41 | 40,1,3,"Nicola-Yarred, Miss. Jamila",female,14,1,0,2651,11.2417,,C 42 | 41,0,3,"Ahlin, Mrs. Johan (Johanna Persdotter Larsson)",female,40,1,0,7546,9.475,,S 43 | 42,0,2,"Turpin, Mrs. William John Robert (Dorothy Ann Wonnacott)",female,27,1,0,11668,21,,S 44 | 43,0,3,"Kraeff, Mr. Theodor",male,,0,0,349253,7.8958,,C 45 | 44,1,2,"Laroche, Miss. Simonne Marie Anne Andree",female,3,1,2,SC/Paris 2123,41.5792,,C 46 | 45,1,3,"Devaney, Miss. Margaret Delia",female,19,0,0,330958,7.8792,,Q 47 | 46,0,3,"Rogers, Mr. William John",male,,0,0,S.C./A.4. 23567,8.05,,S 48 | 47,0,3,"Lennon, Mr. Denis",male,,1,0,370371,15.5,,Q 49 | 48,1,3,"O'Driscoll, Miss. Bridget",female,,0,0,14311,7.75,,Q 50 | 49,0,3,"Samaan, Mr. Youssef",male,,2,0,2662,21.6792,,C 51 | 50,0,3,"Arnold-Franchi, Mrs. Josef (Josefine Franchi)",female,18,1,0,349237,17.8,,S 52 | 51,0,3,"Panula, Master. Juha Niilo",male,7,4,1,3101295,39.6875,,S 53 | 52,0,3,"Nosworthy, Mr. Richard Cater",male,21,0,0,A/4. 39886,7.8,,S 54 | 53,1,1,"Harper, Mrs. Henry Sleeper (Myna Haxtun)",female,49,1,0,PC 17572,76.7292,D33,C 55 | 54,1,2,"Faunthorpe, Mrs. Lizzie (Elizabeth Anne Wilkinson)",female,29,1,0,2926,26,,S 56 | 55,0,1,"Ostby, Mr. Engelhart Cornelius",male,65,0,1,113509,61.9792,B30,C 57 | 56,1,1,"Woolner, Mr. Hugh",male,,0,0,19947,35.5,C52,S 58 | 57,1,2,"Rugg, Miss. Emily",female,21,0,0,C.A. 31026,10.5,,S 59 | 58,0,3,"Novel, Mr. Mansouer",male,28.5,0,0,2697,7.2292,,C 60 | 59,1,2,"West, Miss. Constance Mirium",female,5,1,2,C.A. 34651,27.75,,S 61 | 60,0,3,"Goodwin, Master. William Frederick",male,11,5,2,CA 2144,46.9,,S 62 | 61,0,3,"Sirayanian, Mr. Orsen",male,22,0,0,2669,7.2292,,C 63 | 62,1,1,"Icard, Miss. Amelie",female,38,0,0,113572,80,B28, 64 | 63,0,1,"Harris, Mr. Henry Birkhardt",male,45,1,0,36973,83.475,C83,S 65 | 64,0,3,"Skoog, Master. Harald",male,4,3,2,347088,27.9,,S 66 | 65,0,1,"Stewart, Mr. Albert A",male,,0,0,PC 17605,27.7208,,C 67 | 66,1,3,"Moubarek, Master. Gerios",male,,1,1,2661,15.2458,,C 68 | 67,1,2,"Nye, Mrs. (Elizabeth Ramell)",female,29,0,0,C.A. 29395,10.5,F33,S 69 | 68,0,3,"Crease, Mr. Ernest James",male,19,0,0,S.P. 3464,8.1583,,S 70 | 69,1,3,"Andersson, Miss. Erna Alexandra",female,17,4,2,3101281,7.925,,S 71 | 70,0,3,"Kink, Mr. Vincenz",male,26,2,0,315151,8.6625,,S 72 | 71,0,2,"Jenkin, Mr. Stephen Curnow",male,32,0,0,C.A. 33111,10.5,,S 73 | 72,0,3,"Goodwin, Miss. Lillian Amy",female,16,5,2,CA 2144,46.9,,S 74 | 73,0,2,"Hood, Mr. Ambrose Jr",male,21,0,0,S.O.C. 14879,73.5,,S 75 | 74,0,3,"Chronopoulos, Mr. Apostolos",male,26,1,0,2680,14.4542,,C 76 | 75,1,3,"Bing, Mr. Lee",male,32,0,0,1601,56.4958,,S 77 | 76,0,3,"Moen, Mr. Sigurd Hansen",male,25,0,0,348123,7.65,F G73,S 78 | 77,0,3,"Staneff, Mr. Ivan",male,,0,0,349208,7.8958,,S 79 | 78,0,3,"Moutal, Mr. Rahamin Haim",male,,0,0,374746,8.05,,S 80 | 79,1,2,"Caldwell, Master. Alden Gates",male,0.83,0,2,248738,29,,S 81 | 80,1,3,"Dowdell, Miss. Elizabeth",female,30,0,0,364516,12.475,,S 82 | 81,0,3,"Waelens, Mr. Achille",male,22,0,0,345767,9,,S 83 | 82,1,3,"Sheerlinck, Mr. Jan Baptist",male,29,0,0,345779,9.5,,S 84 | 83,1,3,"McDermott, Miss. Brigdet Delia",female,,0,0,330932,7.7875,,Q 85 | 84,0,1,"Carrau, Mr. Francisco M",male,28,0,0,113059,47.1,,S 86 | 85,1,2,"Ilett, Miss. Bertha",female,17,0,0,SO/C 14885,10.5,,S 87 | 86,1,3,"Backstrom, Mrs. Karl Alfred (Maria Mathilda Gustafsson)",female,33,3,0,3101278,15.85,,S 88 | 87,0,3,"Ford, Mr. William Neal",male,16,1,3,W./C. 6608,34.375,,S 89 | 88,0,3,"Slocovski, Mr. Selman Francis",male,,0,0,SOTON/OQ 392086,8.05,,S 90 | 89,1,1,"Fortune, Miss. Mabel Helen",female,23,3,2,19950,263,C23 C25 C27,S 91 | 90,0,3,"Celotti, Mr. Francesco",male,24,0,0,343275,8.05,,S 92 | 91,0,3,"Christmann, Mr. Emil",male,29,0,0,343276,8.05,,S 93 | 92,0,3,"Andreasson, Mr. Paul Edvin",male,20,0,0,347466,7.8542,,S 94 | 93,0,1,"Chaffee, Mr. Herbert Fuller",male,46,1,0,W.E.P. 5734,61.175,E31,S 95 | 94,0,3,"Dean, Mr. Bertram Frank",male,26,1,2,C.A. 2315,20.575,,S 96 | 95,0,3,"Coxon, Mr. Daniel",male,59,0,0,364500,7.25,,S 97 | 96,0,3,"Shorney, Mr. Charles Joseph",male,,0,0,374910,8.05,,S 98 | 97,0,1,"Goldschmidt, Mr. George B",male,71,0,0,PC 17754,34.6542,A5,C 99 | 98,1,1,"Greenfield, Mr. William Bertram",male,23,0,1,PC 17759,63.3583,D10 D12,C 100 | 99,1,2,"Doling, Mrs. John T (Ada Julia Bone)",female,34,0,1,231919,23,,S 101 | 100,0,2,"Kantor, Mr. Sinai",male,34,1,0,244367,26,,S 102 | 101,0,3,"Petranec, Miss. Matilda",female,28,0,0,349245,7.8958,,S 103 | 102,0,3,"Petroff, Mr. Pastcho (""Pentcho"")",male,,0,0,349215,7.8958,,S 104 | 103,0,1,"White, Mr. Richard Frasar",male,21,0,1,35281,77.2875,D26,S 105 | 104,0,3,"Johansson, Mr. Gustaf Joel",male,33,0,0,7540,8.6542,,S 106 | 105,0,3,"Gustafsson, Mr. Anders Vilhelm",male,37,2,0,3101276,7.925,,S 107 | 106,0,3,"Mionoff, Mr. Stoytcho",male,28,0,0,349207,7.8958,,S 108 | 107,1,3,"Salkjelsvik, Miss. Anna Kristine",female,21,0,0,343120,7.65,,S 109 | 108,1,3,"Moss, Mr. Albert Johan",male,,0,0,312991,7.775,,S 110 | 109,0,3,"Rekic, Mr. Tido",male,38,0,0,349249,7.8958,,S 111 | 110,1,3,"Moran, Miss. Bertha",female,,1,0,371110,24.15,,Q 112 | 111,0,1,"Porter, Mr. Walter Chamberlain",male,47,0,0,110465,52,C110,S 113 | 112,0,3,"Zabour, Miss. Hileni",female,14.5,1,0,2665,14.4542,,C 114 | 113,0,3,"Barton, Mr. David John",male,22,0,0,324669,8.05,,S 115 | 114,0,3,"Jussila, Miss. Katriina",female,20,1,0,4136,9.825,,S 116 | 115,0,3,"Attalah, Miss. Malake",female,17,0,0,2627,14.4583,,C 117 | 116,0,3,"Pekoniemi, Mr. Edvard",male,21,0,0,STON/O 2. 3101294,7.925,,S 118 | 117,0,3,"Connors, Mr. Patrick",male,70.5,0,0,370369,7.75,,Q 119 | 118,0,2,"Turpin, Mr. William John Robert",male,29,1,0,11668,21,,S 120 | 119,0,1,"Baxter, Mr. Quigg Edmond",male,24,0,1,PC 17558,247.5208,B58 B60,C 121 | 120,0,3,"Andersson, Miss. Ellis Anna Maria",female,2,4,2,347082,31.275,,S 122 | 121,0,2,"Hickman, Mr. Stanley George",male,21,2,0,S.O.C. 14879,73.5,,S 123 | 122,0,3,"Moore, Mr. Leonard Charles",male,,0,0,A4. 54510,8.05,,S 124 | 123,0,2,"Nasser, Mr. Nicholas",male,32.5,1,0,237736,30.0708,,C 125 | 124,1,2,"Webber, Miss. Susan",female,32.5,0,0,27267,13,E101,S 126 | 125,0,1,"White, Mr. Percival Wayland",male,54,0,1,35281,77.2875,D26,S 127 | 126,1,3,"Nicola-Yarred, Master. Elias",male,12,1,0,2651,11.2417,,C 128 | 127,0,3,"McMahon, Mr. Martin",male,,0,0,370372,7.75,,Q 129 | 128,1,3,"Madsen, Mr. Fridtjof Arne",male,24,0,0,C 17369,7.1417,,S 130 | 129,1,3,"Peter, Miss. Anna",female,,1,1,2668,22.3583,F E69,C 131 | 130,0,3,"Ekstrom, Mr. Johan",male,45,0,0,347061,6.975,,S 132 | 131,0,3,"Drazenoic, Mr. Jozef",male,33,0,0,349241,7.8958,,C 133 | 132,0,3,"Coelho, Mr. Domingos Fernandeo",male,20,0,0,SOTON/O.Q. 3101307,7.05,,S 134 | 133,0,3,"Robins, Mrs. Alexander A (Grace Charity Laury)",female,47,1,0,A/5. 3337,14.5,,S 135 | 134,1,2,"Weisz, Mrs. Leopold (Mathilde Francoise Pede)",female,29,1,0,228414,26,,S 136 | 135,0,2,"Sobey, Mr. Samuel James Hayden",male,25,0,0,C.A. 29178,13,,S 137 | 136,0,2,"Richard, Mr. Emile",male,23,0,0,SC/PARIS 2133,15.0458,,C 138 | 137,1,1,"Newsom, Miss. Helen Monypeny",female,19,0,2,11752,26.2833,D47,S 139 | 138,0,1,"Futrelle, Mr. Jacques Heath",male,37,1,0,113803,53.1,C123,S 140 | 139,0,3,"Osen, Mr. Olaf Elon",male,16,0,0,7534,9.2167,,S 141 | 140,0,1,"Giglio, Mr. Victor",male,24,0,0,PC 17593,79.2,B86,C 142 | 141,0,3,"Boulos, Mrs. Joseph (Sultana)",female,,0,2,2678,15.2458,,C 143 | 142,1,3,"Nysten, Miss. Anna Sofia",female,22,0,0,347081,7.75,,S 144 | 143,1,3,"Hakkarainen, Mrs. Pekka Pietari (Elin Matilda Dolck)",female,24,1,0,STON/O2. 3101279,15.85,,S 145 | 144,0,3,"Burke, Mr. Jeremiah",male,19,0,0,365222,6.75,,Q 146 | 145,0,2,"Andrew, Mr. Edgardo Samuel",male,18,0,0,231945,11.5,,S 147 | 146,0,2,"Nicholls, Mr. Joseph Charles",male,19,1,1,C.A. 33112,36.75,,S 148 | 147,1,3,"Andersson, Mr. August Edvard (""Wennerstrom"")",male,27,0,0,350043,7.7958,,S 149 | 148,0,3,"Ford, Miss. Robina Maggie ""Ruby""",female,9,2,2,W./C. 6608,34.375,,S 150 | 149,0,2,"Navratil, Mr. Michel (""Louis M Hoffman"")",male,36.5,0,2,230080,26,F2,S 151 | 150,0,2,"Byles, Rev. Thomas Roussel Davids",male,42,0,0,244310,13,,S 152 | 151,0,2,"Bateman, Rev. Robert James",male,51,0,0,S.O.P. 1166,12.525,,S 153 | 152,1,1,"Pears, Mrs. Thomas (Edith Wearne)",female,22,1,0,113776,66.6,C2,S 154 | 153,0,3,"Meo, Mr. Alfonzo",male,55.5,0,0,A.5. 11206,8.05,,S 155 | 154,0,3,"van Billiard, Mr. Austin Blyler",male,40.5,0,2,A/5. 851,14.5,,S 156 | 155,0,3,"Olsen, Mr. Ole Martin",male,,0,0,Fa 265302,7.3125,,S 157 | 156,0,1,"Williams, Mr. Charles Duane",male,51,0,1,PC 17597,61.3792,,C 158 | 157,1,3,"Gilnagh, Miss. Katherine ""Katie""",female,16,0,0,35851,7.7333,,Q 159 | 158,0,3,"Corn, Mr. Harry",male,30,0,0,SOTON/OQ 392090,8.05,,S 160 | 159,0,3,"Smiljanic, Mr. Mile",male,,0,0,315037,8.6625,,S 161 | 160,0,3,"Sage, Master. Thomas Henry",male,,8,2,CA. 2343,69.55,,S 162 | 161,0,3,"Cribb, Mr. John Hatfield",male,44,0,1,371362,16.1,,S 163 | 162,1,2,"Watt, Mrs. James (Elizabeth ""Bessie"" Inglis Milne)",female,40,0,0,C.A. 33595,15.75,,S 164 | 163,0,3,"Bengtsson, Mr. John Viktor",male,26,0,0,347068,7.775,,S 165 | 164,0,3,"Calic, Mr. Jovo",male,17,0,0,315093,8.6625,,S 166 | 165,0,3,"Panula, Master. Eino Viljami",male,1,4,1,3101295,39.6875,,S 167 | 166,1,3,"Goldsmith, Master. Frank John William ""Frankie""",male,9,0,2,363291,20.525,,S 168 | 167,1,1,"Chibnall, Mrs. (Edith Martha Bowerman)",female,,0,1,113505,55,E33,S 169 | 168,0,3,"Skoog, Mrs. William (Anna Bernhardina Karlsson)",female,45,1,4,347088,27.9,,S 170 | 169,0,1,"Baumann, Mr. John D",male,,0,0,PC 17318,25.925,,S 171 | 170,0,3,"Ling, Mr. Lee",male,28,0,0,1601,56.4958,,S 172 | 171,0,1,"Van der hoef, Mr. Wyckoff",male,61,0,0,111240,33.5,B19,S 173 | 172,0,3,"Rice, Master. Arthur",male,4,4,1,382652,29.125,,Q 174 | 173,1,3,"Johnson, Miss. Eleanor Ileen",female,1,1,1,347742,11.1333,,S 175 | 174,0,3,"Sivola, Mr. Antti Wilhelm",male,21,0,0,STON/O 2. 3101280,7.925,,S 176 | 175,0,1,"Smith, Mr. James Clinch",male,56,0,0,17764,30.6958,A7,C 177 | 176,0,3,"Klasen, Mr. Klas Albin",male,18,1,1,350404,7.8542,,S 178 | 177,0,3,"Lefebre, Master. Henry Forbes",male,,3,1,4133,25.4667,,S 179 | 178,0,1,"Isham, Miss. Ann Elizabeth",female,50,0,0,PC 17595,28.7125,C49,C 180 | 179,0,2,"Hale, Mr. Reginald",male,30,0,0,250653,13,,S 181 | 180,0,3,"Leonard, Mr. Lionel",male,36,0,0,LINE,0,,S 182 | 181,0,3,"Sage, Miss. Constance Gladys",female,,8,2,CA. 2343,69.55,,S 183 | 182,0,2,"Pernot, Mr. Rene",male,,0,0,SC/PARIS 2131,15.05,,C 184 | 183,0,3,"Asplund, Master. Clarence Gustaf Hugo",male,9,4,2,347077,31.3875,,S 185 | 184,1,2,"Becker, Master. Richard F",male,1,2,1,230136,39,F4,S 186 | 185,1,3,"Kink-Heilmann, Miss. Luise Gretchen",female,4,0,2,315153,22.025,,S 187 | 186,0,1,"Rood, Mr. Hugh Roscoe",male,,0,0,113767,50,A32,S 188 | 187,1,3,"O'Brien, Mrs. Thomas (Johanna ""Hannah"" Godfrey)",female,,1,0,370365,15.5,,Q 189 | 188,1,1,"Romaine, Mr. Charles Hallace (""Mr C Rolmane"")",male,45,0,0,111428,26.55,,S 190 | 189,0,3,"Bourke, Mr. John",male,40,1,1,364849,15.5,,Q 191 | 190,0,3,"Turcin, Mr. Stjepan",male,36,0,0,349247,7.8958,,S 192 | 191,1,2,"Pinsky, Mrs. (Rosa)",female,32,0,0,234604,13,,S 193 | 192,0,2,"Carbines, Mr. William",male,19,0,0,28424,13,,S 194 | 193,1,3,"Andersen-Jensen, Miss. Carla Christine Nielsine",female,19,1,0,350046,7.8542,,S 195 | 194,1,2,"Navratil, Master. Michel M",male,3,1,1,230080,26,F2,S 196 | 195,1,1,"Brown, Mrs. James Joseph (Margaret Tobin)",female,44,0,0,PC 17610,27.7208,B4,C 197 | 196,1,1,"Lurette, Miss. Elise",female,58,0,0,PC 17569,146.5208,B80,C 198 | 197,0,3,"Mernagh, Mr. Robert",male,,0,0,368703,7.75,,Q 199 | 198,0,3,"Olsen, Mr. Karl Siegwart Andreas",male,42,0,1,4579,8.4042,,S 200 | 199,1,3,"Madigan, Miss. Margaret ""Maggie""",female,,0,0,370370,7.75,,Q 201 | 200,0,2,"Yrois, Miss. Henriette (""Mrs Harbeck"")",female,24,0,0,248747,13,,S 202 | 201,0,3,"Vande Walle, Mr. Nestor Cyriel",male,28,0,0,345770,9.5,,S 203 | 202,0,3,"Sage, Mr. Frederick",male,,8,2,CA. 2343,69.55,,S 204 | 203,0,3,"Johanson, Mr. Jakob Alfred",male,34,0,0,3101264,6.4958,,S 205 | 204,0,3,"Youseff, Mr. Gerious",male,45.5,0,0,2628,7.225,,C 206 | 205,1,3,"Cohen, Mr. Gurshon ""Gus""",male,18,0,0,A/5 3540,8.05,,S 207 | 206,0,3,"Strom, Miss. Telma Matilda",female,2,0,1,347054,10.4625,G6,S 208 | 207,0,3,"Backstrom, Mr. Karl Alfred",male,32,1,0,3101278,15.85,,S 209 | 208,1,3,"Albimona, Mr. Nassef Cassem",male,26,0,0,2699,18.7875,,C 210 | 209,1,3,"Carr, Miss. Helen ""Ellen""",female,16,0,0,367231,7.75,,Q 211 | 210,1,1,"Blank, Mr. Henry",male,40,0,0,112277,31,A31,C 212 | 211,0,3,"Ali, Mr. Ahmed",male,24,0,0,SOTON/O.Q. 3101311,7.05,,S 213 | 212,1,2,"Cameron, Miss. Clear Annie",female,35,0,0,F.C.C. 13528,21,,S 214 | 213,0,3,"Perkin, Mr. John Henry",male,22,0,0,A/5 21174,7.25,,S 215 | 214,0,2,"Givard, Mr. Hans Kristensen",male,30,0,0,250646,13,,S 216 | 215,0,3,"Kiernan, Mr. Philip",male,,1,0,367229,7.75,,Q 217 | 216,1,1,"Newell, Miss. Madeleine",female,31,1,0,35273,113.275,D36,C 218 | 217,1,3,"Honkanen, Miss. Eliina",female,27,0,0,STON/O2. 3101283,7.925,,S 219 | 218,0,2,"Jacobsohn, Mr. Sidney Samuel",male,42,1,0,243847,27,,S 220 | 219,1,1,"Bazzani, Miss. Albina",female,32,0,0,11813,76.2917,D15,C 221 | 220,0,2,"Harris, Mr. Walter",male,30,0,0,W/C 14208,10.5,,S 222 | 221,1,3,"Sunderland, Mr. Victor Francis",male,16,0,0,SOTON/OQ 392089,8.05,,S 223 | 222,0,2,"Bracken, Mr. James H",male,27,0,0,220367,13,,S 224 | 223,0,3,"Green, Mr. George Henry",male,51,0,0,21440,8.05,,S 225 | 224,0,3,"Nenkoff, Mr. Christo",male,,0,0,349234,7.8958,,S 226 | 225,1,1,"Hoyt, Mr. Frederick Maxfield",male,38,1,0,19943,90,C93,S 227 | 226,0,3,"Berglund, Mr. Karl Ivar Sven",male,22,0,0,PP 4348,9.35,,S 228 | 227,1,2,"Mellors, Mr. William John",male,19,0,0,SW/PP 751,10.5,,S 229 | 228,0,3,"Lovell, Mr. John Hall (""Henry"")",male,20.5,0,0,A/5 21173,7.25,,S 230 | 229,0,2,"Fahlstrom, Mr. Arne Jonas",male,18,0,0,236171,13,,S 231 | 230,0,3,"Lefebre, Miss. Mathilde",female,,3,1,4133,25.4667,,S 232 | 231,1,1,"Harris, Mrs. Henry Birkhardt (Irene Wallach)",female,35,1,0,36973,83.475,C83,S 233 | 232,0,3,"Larsson, Mr. Bengt Edvin",male,29,0,0,347067,7.775,,S 234 | 233,0,2,"Sjostedt, Mr. Ernst Adolf",male,59,0,0,237442,13.5,,S 235 | 234,1,3,"Asplund, Miss. Lillian Gertrud",female,5,4,2,347077,31.3875,,S 236 | 235,0,2,"Leyson, Mr. Robert William Norman",male,24,0,0,C.A. 29566,10.5,,S 237 | 236,0,3,"Harknett, Miss. Alice Phoebe",female,,0,0,W./C. 6609,7.55,,S 238 | 237,0,2,"Hold, Mr. Stephen",male,44,1,0,26707,26,,S 239 | 238,1,2,"Collyer, Miss. Marjorie ""Lottie""",female,8,0,2,C.A. 31921,26.25,,S 240 | 239,0,2,"Pengelly, Mr. Frederick William",male,19,0,0,28665,10.5,,S 241 | 240,0,2,"Hunt, Mr. George Henry",male,33,0,0,SCO/W 1585,12.275,,S 242 | 241,0,3,"Zabour, Miss. Thamine",female,,1,0,2665,14.4542,,C 243 | 242,1,3,"Murphy, Miss. Katherine ""Kate""",female,,1,0,367230,15.5,,Q 244 | 243,0,2,"Coleridge, Mr. Reginald Charles",male,29,0,0,W./C. 14263,10.5,,S 245 | 244,0,3,"Maenpaa, Mr. Matti Alexanteri",male,22,0,0,STON/O 2. 3101275,7.125,,S 246 | 245,0,3,"Attalah, Mr. Sleiman",male,30,0,0,2694,7.225,,C 247 | 246,0,1,"Minahan, Dr. William Edward",male,44,2,0,19928,90,C78,Q 248 | 247,0,3,"Lindahl, Miss. Agda Thorilda Viktoria",female,25,0,0,347071,7.775,,S 249 | 248,1,2,"Hamalainen, Mrs. William (Anna)",female,24,0,2,250649,14.5,,S 250 | 249,1,1,"Beckwith, Mr. Richard Leonard",male,37,1,1,11751,52.5542,D35,S 251 | 250,0,2,"Carter, Rev. Ernest Courtenay",male,54,1,0,244252,26,,S 252 | 251,0,3,"Reed, Mr. James George",male,,0,0,362316,7.25,,S 253 | 252,0,3,"Strom, Mrs. Wilhelm (Elna Matilda Persson)",female,29,1,1,347054,10.4625,G6,S 254 | 253,0,1,"Stead, Mr. William Thomas",male,62,0,0,113514,26.55,C87,S 255 | 254,0,3,"Lobb, Mr. William Arthur",male,30,1,0,A/5. 3336,16.1,,S 256 | 255,0,3,"Rosblom, Mrs. Viktor (Helena Wilhelmina)",female,41,0,2,370129,20.2125,,S 257 | 256,1,3,"Touma, Mrs. Darwis (Hanne Youssef Razi)",female,29,0,2,2650,15.2458,,C 258 | 257,1,1,"Thorne, Mrs. Gertrude Maybelle",female,,0,0,PC 17585,79.2,,C 259 | 258,1,1,"Cherry, Miss. Gladys",female,30,0,0,110152,86.5,B77,S 260 | 259,1,1,"Ward, Miss. Anna",female,35,0,0,PC 17755,512.3292,,C 261 | 260,1,2,"Parrish, Mrs. (Lutie Davis)",female,50,0,1,230433,26,,S 262 | 261,0,3,"Smith, Mr. Thomas",male,,0,0,384461,7.75,,Q 263 | 262,1,3,"Asplund, Master. Edvin Rojj Felix",male,3,4,2,347077,31.3875,,S 264 | 263,0,1,"Taussig, Mr. Emil",male,52,1,1,110413,79.65,E67,S 265 | 264,0,1,"Harrison, Mr. William",male,40,0,0,112059,0,B94,S 266 | 265,0,3,"Henry, Miss. Delia",female,,0,0,382649,7.75,,Q 267 | 266,0,2,"Reeves, Mr. David",male,36,0,0,C.A. 17248,10.5,,S 268 | 267,0,3,"Panula, Mr. Ernesti Arvid",male,16,4,1,3101295,39.6875,,S 269 | 268,1,3,"Persson, Mr. Ernst Ulrik",male,25,1,0,347083,7.775,,S 270 | 269,1,1,"Graham, Mrs. William Thompson (Edith Junkins)",female,58,0,1,PC 17582,153.4625,C125,S 271 | 270,1,1,"Bissette, Miss. Amelia",female,35,0,0,PC 17760,135.6333,C99,S 272 | 271,0,1,"Cairns, Mr. Alexander",male,,0,0,113798,31,,S 273 | 272,1,3,"Tornquist, Mr. William Henry",male,25,0,0,LINE,0,,S 274 | 273,1,2,"Mellinger, Mrs. (Elizabeth Anne Maidment)",female,41,0,1,250644,19.5,,S 275 | 274,0,1,"Natsch, Mr. Charles H",male,37,0,1,PC 17596,29.7,C118,C 276 | 275,1,3,"Healy, Miss. Hanora ""Nora""",female,,0,0,370375,7.75,,Q 277 | 276,1,1,"Andrews, Miss. Kornelia Theodosia",female,63,1,0,13502,77.9583,D7,S 278 | 277,0,3,"Lindblom, Miss. Augusta Charlotta",female,45,0,0,347073,7.75,,S 279 | 278,0,2,"Parkes, Mr. Francis ""Frank""",male,,0,0,239853,0,,S 280 | 279,0,3,"Rice, Master. Eric",male,7,4,1,382652,29.125,,Q 281 | 280,1,3,"Abbott, Mrs. Stanton (Rosa Hunt)",female,35,1,1,C.A. 2673,20.25,,S 282 | 281,0,3,"Duane, Mr. Frank",male,65,0,0,336439,7.75,,Q 283 | 282,0,3,"Olsson, Mr. Nils Johan Goransson",male,28,0,0,347464,7.8542,,S 284 | 283,0,3,"de Pelsmaeker, Mr. Alfons",male,16,0,0,345778,9.5,,S 285 | 284,1,3,"Dorking, Mr. Edward Arthur",male,19,0,0,A/5. 10482,8.05,,S 286 | 285,0,1,"Smith, Mr. Richard William",male,,0,0,113056,26,A19,S 287 | 286,0,3,"Stankovic, Mr. Ivan",male,33,0,0,349239,8.6625,,C 288 | 287,1,3,"de Mulder, Mr. Theodore",male,30,0,0,345774,9.5,,S 289 | 288,0,3,"Naidenoff, Mr. Penko",male,22,0,0,349206,7.8958,,S 290 | 289,1,2,"Hosono, Mr. Masabumi",male,42,0,0,237798,13,,S 291 | 290,1,3,"Connolly, Miss. Kate",female,22,0,0,370373,7.75,,Q 292 | 291,1,1,"Barber, Miss. Ellen ""Nellie""",female,26,0,0,19877,78.85,,S 293 | 292,1,1,"Bishop, Mrs. Dickinson H (Helen Walton)",female,19,1,0,11967,91.0792,B49,C 294 | 293,0,2,"Levy, Mr. Rene Jacques",male,36,0,0,SC/Paris 2163,12.875,D,C 295 | 294,0,3,"Haas, Miss. Aloisia",female,24,0,0,349236,8.85,,S 296 | 295,0,3,"Mineff, Mr. Ivan",male,24,0,0,349233,7.8958,,S 297 | 296,0,1,"Lewy, Mr. Ervin G",male,,0,0,PC 17612,27.7208,,C 298 | 297,0,3,"Hanna, Mr. Mansour",male,23.5,0,0,2693,7.2292,,C 299 | 298,0,1,"Allison, Miss. Helen Loraine",female,2,1,2,113781,151.55,C22 C26,S 300 | 299,1,1,"Saalfeld, Mr. Adolphe",male,,0,0,19988,30.5,C106,S 301 | 300,1,1,"Baxter, Mrs. James (Helene DeLaudeniere Chaput)",female,50,0,1,PC 17558,247.5208,B58 B60,C 302 | 301,1,3,"Kelly, Miss. Anna Katherine ""Annie Kate""",female,,0,0,9234,7.75,,Q 303 | 302,1,3,"McCoy, Mr. Bernard",male,,2,0,367226,23.25,,Q 304 | 303,0,3,"Johnson, Mr. William Cahoone Jr",male,19,0,0,LINE,0,,S 305 | 304,1,2,"Keane, Miss. Nora A",female,,0,0,226593,12.35,E101,Q 306 | 305,0,3,"Williams, Mr. Howard Hugh ""Harry""",male,,0,0,A/5 2466,8.05,,S 307 | 306,1,1,"Allison, Master. Hudson Trevor",male,0.92,1,2,113781,151.55,C22 C26,S 308 | 307,1,1,"Fleming, Miss. Margaret",female,,0,0,17421,110.8833,,C 309 | 308,1,1,"Penasco y Castellana, Mrs. Victor de Satode (Maria Josefa Perez de Soto y Vallejo)",female,17,1,0,PC 17758,108.9,C65,C 310 | 309,0,2,"Abelson, Mr. Samuel",male,30,1,0,P/PP 3381,24,,C 311 | 310,1,1,"Francatelli, Miss. Laura Mabel",female,30,0,0,PC 17485,56.9292,E36,C 312 | 311,1,1,"Hays, Miss. Margaret Bechstein",female,24,0,0,11767,83.1583,C54,C 313 | 312,1,1,"Ryerson, Miss. Emily Borie",female,18,2,2,PC 17608,262.375,B57 B59 B63 B66,C 314 | 313,0,2,"Lahtinen, Mrs. William (Anna Sylfven)",female,26,1,1,250651,26,,S 315 | 314,0,3,"Hendekovic, Mr. Ignjac",male,28,0,0,349243,7.8958,,S 316 | 315,0,2,"Hart, Mr. Benjamin",male,43,1,1,F.C.C. 13529,26.25,,S 317 | 316,1,3,"Nilsson, Miss. Helmina Josefina",female,26,0,0,347470,7.8542,,S 318 | 317,1,2,"Kantor, Mrs. Sinai (Miriam Sternin)",female,24,1,0,244367,26,,S 319 | 318,0,2,"Moraweck, Dr. Ernest",male,54,0,0,29011,14,,S 320 | 319,1,1,"Wick, Miss. Mary Natalie",female,31,0,2,36928,164.8667,C7,S 321 | 320,1,1,"Spedden, Mrs. Frederic Oakley (Margaretta Corning Stone)",female,40,1,1,16966,134.5,E34,C 322 | 321,0,3,"Dennis, Mr. Samuel",male,22,0,0,A/5 21172,7.25,,S 323 | 322,0,3,"Danoff, Mr. Yoto",male,27,0,0,349219,7.8958,,S 324 | 323,1,2,"Slayter, Miss. Hilda Mary",female,30,0,0,234818,12.35,,Q 325 | 324,1,2,"Caldwell, Mrs. Albert Francis (Sylvia Mae Harbaugh)",female,22,1,1,248738,29,,S 326 | 325,0,3,"Sage, Mr. George John Jr",male,,8,2,CA. 2343,69.55,,S 327 | 326,1,1,"Young, Miss. Marie Grice",female,36,0,0,PC 17760,135.6333,C32,C 328 | 327,0,3,"Nysveen, Mr. Johan Hansen",male,61,0,0,345364,6.2375,,S 329 | 328,1,2,"Ball, Mrs. (Ada E Hall)",female,36,0,0,28551,13,D,S 330 | 329,1,3,"Goldsmith, Mrs. Frank John (Emily Alice Brown)",female,31,1,1,363291,20.525,,S 331 | 330,1,1,"Hippach, Miss. Jean Gertrude",female,16,0,1,111361,57.9792,B18,C 332 | 331,1,3,"McCoy, Miss. Agnes",female,,2,0,367226,23.25,,Q 333 | 332,0,1,"Partner, Mr. Austen",male,45.5,0,0,113043,28.5,C124,S 334 | 333,0,1,"Graham, Mr. George Edward",male,38,0,1,PC 17582,153.4625,C91,S 335 | 334,0,3,"Vander Planke, Mr. Leo Edmondus",male,16,2,0,345764,18,,S 336 | 335,1,1,"Frauenthal, Mrs. Henry William (Clara Heinsheimer)",female,,1,0,PC 17611,133.65,,S 337 | 336,0,3,"Denkoff, Mr. Mitto",male,,0,0,349225,7.8958,,S 338 | 337,0,1,"Pears, Mr. Thomas Clinton",male,29,1,0,113776,66.6,C2,S 339 | 338,1,1,"Burns, Miss. Elizabeth Margaret",female,41,0,0,16966,134.5,E40,C 340 | 339,1,3,"Dahl, Mr. Karl Edwart",male,45,0,0,7598,8.05,,S 341 | 340,0,1,"Blackwell, Mr. Stephen Weart",male,45,0,0,113784,35.5,T,S 342 | 341,1,2,"Navratil, Master. Edmond Roger",male,2,1,1,230080,26,F2,S 343 | 342,1,1,"Fortune, Miss. Alice Elizabeth",female,24,3,2,19950,263,C23 C25 C27,S 344 | 343,0,2,"Collander, Mr. Erik Gustaf",male,28,0,0,248740,13,,S 345 | 344,0,2,"Sedgwick, Mr. Charles Frederick Waddington",male,25,0,0,244361,13,,S 346 | 345,0,2,"Fox, Mr. Stanley Hubert",male,36,0,0,229236,13,,S 347 | 346,1,2,"Brown, Miss. Amelia ""Mildred""",female,24,0,0,248733,13,F33,S 348 | 347,1,2,"Smith, Miss. Marion Elsie",female,40,0,0,31418,13,,S 349 | 348,1,3,"Davison, Mrs. Thomas Henry (Mary E Finck)",female,,1,0,386525,16.1,,S 350 | 349,1,3,"Coutts, Master. William Loch ""William""",male,3,1,1,C.A. 37671,15.9,,S 351 | 350,0,3,"Dimic, Mr. Jovan",male,42,0,0,315088,8.6625,,S 352 | 351,0,3,"Odahl, Mr. Nils Martin",male,23,0,0,7267,9.225,,S 353 | 352,0,1,"Williams-Lambert, Mr. Fletcher Fellows",male,,0,0,113510,35,C128,S 354 | 353,0,3,"Elias, Mr. Tannous",male,15,1,1,2695,7.2292,,C 355 | 354,0,3,"Arnold-Franchi, Mr. Josef",male,25,1,0,349237,17.8,,S 356 | 355,0,3,"Yousif, Mr. Wazli",male,,0,0,2647,7.225,,C 357 | 356,0,3,"Vanden Steen, Mr. Leo Peter",male,28,0,0,345783,9.5,,S 358 | 357,1,1,"Bowerman, Miss. Elsie Edith",female,22,0,1,113505,55,E33,S 359 | 358,0,2,"Funk, Miss. Annie Clemmer",female,38,0,0,237671,13,,S 360 | 359,1,3,"McGovern, Miss. Mary",female,,0,0,330931,7.8792,,Q 361 | 360,1,3,"Mockler, Miss. Helen Mary ""Ellie""",female,,0,0,330980,7.8792,,Q 362 | 361,0,3,"Skoog, Mr. Wilhelm",male,40,1,4,347088,27.9,,S 363 | 362,0,2,"del Carlo, Mr. Sebastiano",male,29,1,0,SC/PARIS 2167,27.7208,,C 364 | 363,0,3,"Barbara, Mrs. (Catherine David)",female,45,0,1,2691,14.4542,,C 365 | 364,0,3,"Asim, Mr. Adola",male,35,0,0,SOTON/O.Q. 3101310,7.05,,S 366 | 365,0,3,"O'Brien, Mr. Thomas",male,,1,0,370365,15.5,,Q 367 | 366,0,3,"Adahl, Mr. Mauritz Nils Martin",male,30,0,0,C 7076,7.25,,S 368 | 367,1,1,"Warren, Mrs. Frank Manley (Anna Sophia Atkinson)",female,60,1,0,110813,75.25,D37,C 369 | 368,1,3,"Moussa, Mrs. (Mantoura Boulos)",female,,0,0,2626,7.2292,,C 370 | 369,1,3,"Jermyn, Miss. Annie",female,,0,0,14313,7.75,,Q 371 | 370,1,1,"Aubart, Mme. Leontine Pauline",female,24,0,0,PC 17477,69.3,B35,C 372 | 371,1,1,"Harder, Mr. George Achilles",male,25,1,0,11765,55.4417,E50,C 373 | 372,0,3,"Wiklund, Mr. Jakob Alfred",male,18,1,0,3101267,6.4958,,S 374 | 373,0,3,"Beavan, Mr. William Thomas",male,19,0,0,323951,8.05,,S 375 | 374,0,1,"Ringhini, Mr. Sante",male,22,0,0,PC 17760,135.6333,,C 376 | 375,0,3,"Palsson, Miss. Stina Viola",female,3,3,1,349909,21.075,,S 377 | 376,1,1,"Meyer, Mrs. Edgar Joseph (Leila Saks)",female,,1,0,PC 17604,82.1708,,C 378 | 377,1,3,"Landergren, Miss. Aurora Adelia",female,22,0,0,C 7077,7.25,,S 379 | 378,0,1,"Widener, Mr. Harry Elkins",male,27,0,2,113503,211.5,C82,C 380 | 379,0,3,"Betros, Mr. Tannous",male,20,0,0,2648,4.0125,,C 381 | 380,0,3,"Gustafsson, Mr. Karl Gideon",male,19,0,0,347069,7.775,,S 382 | 381,1,1,"Bidois, Miss. Rosalie",female,42,0,0,PC 17757,227.525,,C 383 | 382,1,3,"Nakid, Miss. Maria (""Mary"")",female,1,0,2,2653,15.7417,,C 384 | 383,0,3,"Tikkanen, Mr. Juho",male,32,0,0,STON/O 2. 3101293,7.925,,S 385 | 384,1,1,"Holverson, Mrs. Alexander Oskar (Mary Aline Towner)",female,35,1,0,113789,52,,S 386 | 385,0,3,"Plotcharsky, Mr. Vasil",male,,0,0,349227,7.8958,,S 387 | 386,0,2,"Davies, Mr. Charles Henry",male,18,0,0,S.O.C. 14879,73.5,,S 388 | 387,0,3,"Goodwin, Master. Sidney Leonard",male,1,5,2,CA 2144,46.9,,S 389 | 388,1,2,"Buss, Miss. Kate",female,36,0,0,27849,13,,S 390 | 389,0,3,"Sadlier, Mr. Matthew",male,,0,0,367655,7.7292,,Q 391 | 390,1,2,"Lehmann, Miss. Bertha",female,17,0,0,SC 1748,12,,C 392 | 391,1,1,"Carter, Mr. William Ernest",male,36,1,2,113760,120,B96 B98,S 393 | 392,1,3,"Jansson, Mr. Carl Olof",male,21,0,0,350034,7.7958,,S 394 | 393,0,3,"Gustafsson, Mr. Johan Birger",male,28,2,0,3101277,7.925,,S 395 | 394,1,1,"Newell, Miss. Marjorie",female,23,1,0,35273,113.275,D36,C 396 | 395,1,3,"Sandstrom, Mrs. Hjalmar (Agnes Charlotta Bengtsson)",female,24,0,2,PP 9549,16.7,G6,S 397 | 396,0,3,"Johansson, Mr. Erik",male,22,0,0,350052,7.7958,,S 398 | 397,0,3,"Olsson, Miss. Elina",female,31,0,0,350407,7.8542,,S 399 | 398,0,2,"McKane, Mr. Peter David",male,46,0,0,28403,26,,S 400 | 399,0,2,"Pain, Dr. Alfred",male,23,0,0,244278,10.5,,S 401 | 400,1,2,"Trout, Mrs. William H (Jessie L)",female,28,0,0,240929,12.65,,S 402 | 401,1,3,"Niskanen, Mr. Juha",male,39,0,0,STON/O 2. 3101289,7.925,,S 403 | 402,0,3,"Adams, Mr. John",male,26,0,0,341826,8.05,,S 404 | 403,0,3,"Jussila, Miss. Mari Aina",female,21,1,0,4137,9.825,,S 405 | 404,0,3,"Hakkarainen, Mr. Pekka Pietari",male,28,1,0,STON/O2. 3101279,15.85,,S 406 | 405,0,3,"Oreskovic, Miss. Marija",female,20,0,0,315096,8.6625,,S 407 | 406,0,2,"Gale, Mr. Shadrach",male,34,1,0,28664,21,,S 408 | 407,0,3,"Widegren, Mr. Carl/Charles Peter",male,51,0,0,347064,7.75,,S 409 | 408,1,2,"Richards, Master. William Rowe",male,3,1,1,29106,18.75,,S 410 | 409,0,3,"Birkeland, Mr. Hans Martin Monsen",male,21,0,0,312992,7.775,,S 411 | 410,0,3,"Lefebre, Miss. Ida",female,,3,1,4133,25.4667,,S 412 | 411,0,3,"Sdycoff, Mr. Todor",male,,0,0,349222,7.8958,,S 413 | 412,0,3,"Hart, Mr. Henry",male,,0,0,394140,6.8583,,Q 414 | 413,1,1,"Minahan, Miss. Daisy E",female,33,1,0,19928,90,C78,Q 415 | 414,0,2,"Cunningham, Mr. Alfred Fleming",male,,0,0,239853,0,,S 416 | 415,1,3,"Sundman, Mr. Johan Julian",male,44,0,0,STON/O 2. 3101269,7.925,,S 417 | 416,0,3,"Meek, Mrs. Thomas (Annie Louise Rowley)",female,,0,0,343095,8.05,,S 418 | 417,1,2,"Drew, Mrs. James Vivian (Lulu Thorne Christian)",female,34,1,1,28220,32.5,,S 419 | 418,1,2,"Silven, Miss. Lyyli Karoliina",female,18,0,2,250652,13,,S 420 | 419,0,2,"Matthews, Mr. William John",male,30,0,0,28228,13,,S 421 | 420,0,3,"Van Impe, Miss. Catharina",female,10,0,2,345773,24.15,,S 422 | 421,0,3,"Gheorgheff, Mr. Stanio",male,,0,0,349254,7.8958,,C 423 | 422,0,3,"Charters, Mr. David",male,21,0,0,A/5. 13032,7.7333,,Q 424 | 423,0,3,"Zimmerman, Mr. Leo",male,29,0,0,315082,7.875,,S 425 | 424,0,3,"Danbom, Mrs. Ernst Gilbert (Anna Sigrid Maria Brogren)",female,28,1,1,347080,14.4,,S 426 | 425,0,3,"Rosblom, Mr. Viktor Richard",male,18,1,1,370129,20.2125,,S 427 | 426,0,3,"Wiseman, Mr. Phillippe",male,,0,0,A/4. 34244,7.25,,S 428 | 427,1,2,"Clarke, Mrs. Charles V (Ada Maria Winfield)",female,28,1,0,2003,26,,S 429 | 428,1,2,"Phillips, Miss. Kate Florence (""Mrs Kate Louise Phillips Marshall"")",female,19,0,0,250655,26,,S 430 | 429,0,3,"Flynn, Mr. James",male,,0,0,364851,7.75,,Q 431 | 430,1,3,"Pickard, Mr. Berk (Berk Trembisky)",male,32,0,0,SOTON/O.Q. 392078,8.05,E10,S 432 | 431,1,1,"Bjornstrom-Steffansson, Mr. Mauritz Hakan",male,28,0,0,110564,26.55,C52,S 433 | 432,1,3,"Thorneycroft, Mrs. Percival (Florence Kate White)",female,,1,0,376564,16.1,,S 434 | 433,1,2,"Louch, Mrs. Charles Alexander (Alice Adelaide Slow)",female,42,1,0,SC/AH 3085,26,,S 435 | 434,0,3,"Kallio, Mr. Nikolai Erland",male,17,0,0,STON/O 2. 3101274,7.125,,S 436 | 435,0,1,"Silvey, Mr. William Baird",male,50,1,0,13507,55.9,E44,S 437 | 436,1,1,"Carter, Miss. Lucile Polk",female,14,1,2,113760,120,B96 B98,S 438 | 437,0,3,"Ford, Miss. Doolina Margaret ""Daisy""",female,21,2,2,W./C. 6608,34.375,,S 439 | 438,1,2,"Richards, Mrs. Sidney (Emily Hocking)",female,24,2,3,29106,18.75,,S 440 | 439,0,1,"Fortune, Mr. Mark",male,64,1,4,19950,263,C23 C25 C27,S 441 | 440,0,2,"Kvillner, Mr. Johan Henrik Johannesson",male,31,0,0,C.A. 18723,10.5,,S 442 | 441,1,2,"Hart, Mrs. Benjamin (Esther Ada Bloomfield)",female,45,1,1,F.C.C. 13529,26.25,,S 443 | 442,0,3,"Hampe, Mr. Leon",male,20,0,0,345769,9.5,,S 444 | 443,0,3,"Petterson, Mr. Johan Emil",male,25,1,0,347076,7.775,,S 445 | 444,1,2,"Reynaldo, Ms. Encarnacion",female,28,0,0,230434,13,,S 446 | 445,1,3,"Johannesen-Bratthammer, Mr. Bernt",male,,0,0,65306,8.1125,,S 447 | 446,1,1,"Dodge, Master. Washington",male,4,0,2,33638,81.8583,A34,S 448 | 447,1,2,"Mellinger, Miss. Madeleine Violet",female,13,0,1,250644,19.5,,S 449 | 448,1,1,"Seward, Mr. Frederic Kimber",male,34,0,0,113794,26.55,,S 450 | 449,1,3,"Baclini, Miss. Marie Catherine",female,5,2,1,2666,19.2583,,C 451 | 450,1,1,"Peuchen, Major. Arthur Godfrey",male,52,0,0,113786,30.5,C104,S 452 | 451,0,2,"West, Mr. Edwy Arthur",male,36,1,2,C.A. 34651,27.75,,S 453 | 452,0,3,"Hagland, Mr. Ingvald Olai Olsen",male,,1,0,65303,19.9667,,S 454 | 453,0,1,"Foreman, Mr. Benjamin Laventall",male,30,0,0,113051,27.75,C111,C 455 | 454,1,1,"Goldenberg, Mr. Samuel L",male,49,1,0,17453,89.1042,C92,C 456 | 455,0,3,"Peduzzi, Mr. Joseph",male,,0,0,A/5 2817,8.05,,S 457 | 456,1,3,"Jalsevac, Mr. Ivan",male,29,0,0,349240,7.8958,,C 458 | 457,0,1,"Millet, Mr. Francis Davis",male,65,0,0,13509,26.55,E38,S 459 | 458,1,1,"Kenyon, Mrs. Frederick R (Marion)",female,,1,0,17464,51.8625,D21,S 460 | 459,1,2,"Toomey, Miss. Ellen",female,50,0,0,F.C.C. 13531,10.5,,S 461 | 460,0,3,"O'Connor, Mr. Maurice",male,,0,0,371060,7.75,,Q 462 | 461,1,1,"Anderson, Mr. Harry",male,48,0,0,19952,26.55,E12,S 463 | 462,0,3,"Morley, Mr. William",male,34,0,0,364506,8.05,,S 464 | 463,0,1,"Gee, Mr. Arthur H",male,47,0,0,111320,38.5,E63,S 465 | 464,0,2,"Milling, Mr. Jacob Christian",male,48,0,0,234360,13,,S 466 | 465,0,3,"Maisner, Mr. Simon",male,,0,0,A/S 2816,8.05,,S 467 | 466,0,3,"Goncalves, Mr. Manuel Estanslas",male,38,0,0,SOTON/O.Q. 3101306,7.05,,S 468 | 467,0,2,"Campbell, Mr. William",male,,0,0,239853,0,,S 469 | 468,0,1,"Smart, Mr. John Montgomery",male,56,0,0,113792,26.55,,S 470 | 469,0,3,"Scanlan, Mr. James",male,,0,0,36209,7.725,,Q 471 | 470,1,3,"Baclini, Miss. Helene Barbara",female,0.75,2,1,2666,19.2583,,C 472 | 471,0,3,"Keefe, Mr. Arthur",male,,0,0,323592,7.25,,S 473 | 472,0,3,"Cacic, Mr. Luka",male,38,0,0,315089,8.6625,,S 474 | 473,1,2,"West, Mrs. Edwy Arthur (Ada Mary Worth)",female,33,1,2,C.A. 34651,27.75,,S 475 | 474,1,2,"Jerwan, Mrs. Amin S (Marie Marthe Thuillard)",female,23,0,0,SC/AH Basle 541,13.7917,D,C 476 | 475,0,3,"Strandberg, Miss. Ida Sofia",female,22,0,0,7553,9.8375,,S 477 | 476,0,1,"Clifford, Mr. George Quincy",male,,0,0,110465,52,A14,S 478 | 477,0,2,"Renouf, Mr. Peter Henry",male,34,1,0,31027,21,,S 479 | 478,0,3,"Braund, Mr. Lewis Richard",male,29,1,0,3460,7.0458,,S 480 | 479,0,3,"Karlsson, Mr. Nils August",male,22,0,0,350060,7.5208,,S 481 | 480,1,3,"Hirvonen, Miss. Hildur E",female,2,0,1,3101298,12.2875,,S 482 | 481,0,3,"Goodwin, Master. Harold Victor",male,9,5,2,CA 2144,46.9,,S 483 | 482,0,2,"Frost, Mr. Anthony Wood ""Archie""",male,,0,0,239854,0,,S 484 | 483,0,3,"Rouse, Mr. Richard Henry",male,50,0,0,A/5 3594,8.05,,S 485 | 484,1,3,"Turkula, Mrs. (Hedwig)",female,63,0,0,4134,9.5875,,S 486 | 485,1,1,"Bishop, Mr. Dickinson H",male,25,1,0,11967,91.0792,B49,C 487 | 486,0,3,"Lefebre, Miss. Jeannie",female,,3,1,4133,25.4667,,S 488 | 487,1,1,"Hoyt, Mrs. Frederick Maxfield (Jane Anne Forby)",female,35,1,0,19943,90,C93,S 489 | 488,0,1,"Kent, Mr. Edward Austin",male,58,0,0,11771,29.7,B37,C 490 | 489,0,3,"Somerton, Mr. Francis William",male,30,0,0,A.5. 18509,8.05,,S 491 | 490,1,3,"Coutts, Master. Eden Leslie ""Neville""",male,9,1,1,C.A. 37671,15.9,,S 492 | 491,0,3,"Hagland, Mr. Konrad Mathias Reiersen",male,,1,0,65304,19.9667,,S 493 | 492,0,3,"Windelov, Mr. Einar",male,21,0,0,SOTON/OQ 3101317,7.25,,S 494 | 493,0,1,"Molson, Mr. Harry Markland",male,55,0,0,113787,30.5,C30,S 495 | 494,0,1,"Artagaveytia, Mr. Ramon",male,71,0,0,PC 17609,49.5042,,C 496 | 495,0,3,"Stanley, Mr. Edward Roland",male,21,0,0,A/4 45380,8.05,,S 497 | 496,0,3,"Yousseff, Mr. Gerious",male,,0,0,2627,14.4583,,C 498 | 497,1,1,"Eustis, Miss. Elizabeth Mussey",female,54,1,0,36947,78.2667,D20,C 499 | 498,0,3,"Shellard, Mr. Frederick William",male,,0,0,C.A. 6212,15.1,,S 500 | 499,0,1,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",female,25,1,2,113781,151.55,C22 C26,S 501 | 500,0,3,"Svensson, Mr. Olof",male,24,0,0,350035,7.7958,,S 502 | 501,0,3,"Calic, Mr. Petar",male,17,0,0,315086,8.6625,,S 503 | 502,0,3,"Canavan, Miss. Mary",female,21,0,0,364846,7.75,,Q 504 | 503,0,3,"O'Sullivan, Miss. Bridget Mary",female,,0,0,330909,7.6292,,Q 505 | 504,0,3,"Laitinen, Miss. Kristina Sofia",female,37,0,0,4135,9.5875,,S 506 | 505,1,1,"Maioni, Miss. Roberta",female,16,0,0,110152,86.5,B79,S 507 | 506,0,1,"Penasco y Castellana, Mr. Victor de Satode",male,18,1,0,PC 17758,108.9,C65,C 508 | 507,1,2,"Quick, Mrs. Frederick Charles (Jane Richards)",female,33,0,2,26360,26,,S 509 | 508,1,1,"Bradley, Mr. George (""George Arthur Brayton"")",male,,0,0,111427,26.55,,S 510 | 509,0,3,"Olsen, Mr. Henry Margido",male,28,0,0,C 4001,22.525,,S 511 | 510,1,3,"Lang, Mr. Fang",male,26,0,0,1601,56.4958,,S 512 | 511,1,3,"Daly, Mr. Eugene Patrick",male,29,0,0,382651,7.75,,Q 513 | 512,0,3,"Webber, Mr. James",male,,0,0,SOTON/OQ 3101316,8.05,,S 514 | 513,1,1,"McGough, Mr. James Robert",male,36,0,0,PC 17473,26.2875,E25,S 515 | 514,1,1,"Rothschild, Mrs. Martin (Elizabeth L. Barrett)",female,54,1,0,PC 17603,59.4,,C 516 | 515,0,3,"Coleff, Mr. Satio",male,24,0,0,349209,7.4958,,S 517 | 516,0,1,"Walker, Mr. William Anderson",male,47,0,0,36967,34.0208,D46,S 518 | 517,1,2,"Lemore, Mrs. (Amelia Milley)",female,34,0,0,C.A. 34260,10.5,F33,S 519 | 518,0,3,"Ryan, Mr. Patrick",male,,0,0,371110,24.15,,Q 520 | 519,1,2,"Angle, Mrs. William A (Florence ""Mary"" Agnes Hughes)",female,36,1,0,226875,26,,S 521 | 520,0,3,"Pavlovic, Mr. Stefo",male,32,0,0,349242,7.8958,,S 522 | 521,1,1,"Perreault, Miss. Anne",female,30,0,0,12749,93.5,B73,S 523 | 522,0,3,"Vovk, Mr. Janko",male,22,0,0,349252,7.8958,,S 524 | 523,0,3,"Lahoud, Mr. Sarkis",male,,0,0,2624,7.225,,C 525 | 524,1,1,"Hippach, Mrs. Louis Albert (Ida Sophia Fischer)",female,44,0,1,111361,57.9792,B18,C 526 | 525,0,3,"Kassem, Mr. Fared",male,,0,0,2700,7.2292,,C 527 | 526,0,3,"Farrell, Mr. James",male,40.5,0,0,367232,7.75,,Q 528 | 527,1,2,"Ridsdale, Miss. Lucy",female,50,0,0,W./C. 14258,10.5,,S 529 | 528,0,1,"Farthing, Mr. John",male,,0,0,PC 17483,221.7792,C95,S 530 | 529,0,3,"Salonen, Mr. Johan Werner",male,39,0,0,3101296,7.925,,S 531 | 530,0,2,"Hocking, Mr. Richard George",male,23,2,1,29104,11.5,,S 532 | 531,1,2,"Quick, Miss. Phyllis May",female,2,1,1,26360,26,,S 533 | 532,0,3,"Toufik, Mr. Nakli",male,,0,0,2641,7.2292,,C 534 | 533,0,3,"Elias, Mr. Joseph Jr",male,17,1,1,2690,7.2292,,C 535 | 534,1,3,"Peter, Mrs. Catherine (Catherine Rizk)",female,,0,2,2668,22.3583,,C 536 | 535,0,3,"Cacic, Miss. Marija",female,30,0,0,315084,8.6625,,S 537 | 536,1,2,"Hart, Miss. Eva Miriam",female,7,0,2,F.C.C. 13529,26.25,,S 538 | 537,0,1,"Butt, Major. Archibald Willingham",male,45,0,0,113050,26.55,B38,S 539 | 538,1,1,"LeRoy, Miss. Bertha",female,30,0,0,PC 17761,106.425,,C 540 | 539,0,3,"Risien, Mr. Samuel Beard",male,,0,0,364498,14.5,,S 541 | 540,1,1,"Frolicher, Miss. Hedwig Margaritha",female,22,0,2,13568,49.5,B39,C 542 | 541,1,1,"Crosby, Miss. Harriet R",female,36,0,2,WE/P 5735,71,B22,S 543 | 542,0,3,"Andersson, Miss. Ingeborg Constanzia",female,9,4,2,347082,31.275,,S 544 | 543,0,3,"Andersson, Miss. Sigrid Elisabeth",female,11,4,2,347082,31.275,,S 545 | 544,1,2,"Beane, Mr. Edward",male,32,1,0,2908,26,,S 546 | 545,0,1,"Douglas, Mr. Walter Donald",male,50,1,0,PC 17761,106.425,C86,C 547 | 546,0,1,"Nicholson, Mr. Arthur Ernest",male,64,0,0,693,26,,S 548 | 547,1,2,"Beane, Mrs. Edward (Ethel Clarke)",female,19,1,0,2908,26,,S 549 | 548,1,2,"Padro y Manent, Mr. Julian",male,,0,0,SC/PARIS 2146,13.8625,,C 550 | 549,0,3,"Goldsmith, Mr. Frank John",male,33,1,1,363291,20.525,,S 551 | 550,1,2,"Davies, Master. John Morgan Jr",male,8,1,1,C.A. 33112,36.75,,S 552 | 551,1,1,"Thayer, Mr. John Borland Jr",male,17,0,2,17421,110.8833,C70,C 553 | 552,0,2,"Sharp, Mr. Percival James R",male,27,0,0,244358,26,,S 554 | 553,0,3,"O'Brien, Mr. Timothy",male,,0,0,330979,7.8292,,Q 555 | 554,1,3,"Leeni, Mr. Fahim (""Philip Zenni"")",male,22,0,0,2620,7.225,,C 556 | 555,1,3,"Ohman, Miss. Velin",female,22,0,0,347085,7.775,,S 557 | 556,0,1,"Wright, Mr. George",male,62,0,0,113807,26.55,,S 558 | 557,1,1,"Duff Gordon, Lady. (Lucille Christiana Sutherland) (""Mrs Morgan"")",female,48,1,0,11755,39.6,A16,C 559 | 558,0,1,"Robbins, Mr. Victor",male,,0,0,PC 17757,227.525,,C 560 | 559,1,1,"Taussig, Mrs. Emil (Tillie Mandelbaum)",female,39,1,1,110413,79.65,E67,S 561 | 560,1,3,"de Messemaeker, Mrs. Guillaume Joseph (Emma)",female,36,1,0,345572,17.4,,S 562 | 561,0,3,"Morrow, Mr. Thomas Rowan",male,,0,0,372622,7.75,,Q 563 | 562,0,3,"Sivic, Mr. Husein",male,40,0,0,349251,7.8958,,S 564 | 563,0,2,"Norman, Mr. Robert Douglas",male,28,0,0,218629,13.5,,S 565 | 564,0,3,"Simmons, Mr. John",male,,0,0,SOTON/OQ 392082,8.05,,S 566 | 565,0,3,"Meanwell, Miss. (Marion Ogden)",female,,0,0,SOTON/O.Q. 392087,8.05,,S 567 | 566,0,3,"Davies, Mr. Alfred J",male,24,2,0,A/4 48871,24.15,,S 568 | 567,0,3,"Stoytcheff, Mr. Ilia",male,19,0,0,349205,7.8958,,S 569 | 568,0,3,"Palsson, Mrs. Nils (Alma Cornelia Berglund)",female,29,0,4,349909,21.075,,S 570 | 569,0,3,"Doharr, Mr. Tannous",male,,0,0,2686,7.2292,,C 571 | 570,1,3,"Jonsson, Mr. Carl",male,32,0,0,350417,7.8542,,S 572 | 571,1,2,"Harris, Mr. George",male,62,0,0,S.W./PP 752,10.5,,S 573 | 572,1,1,"Appleton, Mrs. Edward Dale (Charlotte Lamson)",female,53,2,0,11769,51.4792,C101,S 574 | 573,1,1,"Flynn, Mr. John Irwin (""Irving"")",male,36,0,0,PC 17474,26.3875,E25,S 575 | 574,1,3,"Kelly, Miss. Mary",female,,0,0,14312,7.75,,Q 576 | 575,0,3,"Rush, Mr. Alfred George John",male,16,0,0,A/4. 20589,8.05,,S 577 | 576,0,3,"Patchett, Mr. George",male,19,0,0,358585,14.5,,S 578 | 577,1,2,"Garside, Miss. Ethel",female,34,0,0,243880,13,,S 579 | 578,1,1,"Silvey, Mrs. William Baird (Alice Munger)",female,39,1,0,13507,55.9,E44,S 580 | 579,0,3,"Caram, Mrs. Joseph (Maria Elias)",female,,1,0,2689,14.4583,,C 581 | 580,1,3,"Jussila, Mr. Eiriik",male,32,0,0,STON/O 2. 3101286,7.925,,S 582 | 581,1,2,"Christy, Miss. Julie Rachel",female,25,1,1,237789,30,,S 583 | 582,1,1,"Thayer, Mrs. John Borland (Marian Longstreth Morris)",female,39,1,1,17421,110.8833,C68,C 584 | 583,0,2,"Downton, Mr. William James",male,54,0,0,28403,26,,S 585 | 584,0,1,"Ross, Mr. John Hugo",male,36,0,0,13049,40.125,A10,C 586 | 585,0,3,"Paulner, Mr. Uscher",male,,0,0,3411,8.7125,,C 587 | 586,1,1,"Taussig, Miss. Ruth",female,18,0,2,110413,79.65,E68,S 588 | 587,0,2,"Jarvis, Mr. John Denzil",male,47,0,0,237565,15,,S 589 | 588,1,1,"Frolicher-Stehli, Mr. Maxmillian",male,60,1,1,13567,79.2,B41,C 590 | 589,0,3,"Gilinski, Mr. Eliezer",male,22,0,0,14973,8.05,,S 591 | 590,0,3,"Murdlin, Mr. Joseph",male,,0,0,A./5. 3235,8.05,,S 592 | 591,0,3,"Rintamaki, Mr. Matti",male,35,0,0,STON/O 2. 3101273,7.125,,S 593 | 592,1,1,"Stephenson, Mrs. Walter Bertram (Martha Eustis)",female,52,1,0,36947,78.2667,D20,C 594 | 593,0,3,"Elsbury, Mr. William James",male,47,0,0,A/5 3902,7.25,,S 595 | 594,0,3,"Bourke, Miss. Mary",female,,0,2,364848,7.75,,Q 596 | 595,0,2,"Chapman, Mr. John Henry",male,37,1,0,SC/AH 29037,26,,S 597 | 596,0,3,"Van Impe, Mr. Jean Baptiste",male,36,1,1,345773,24.15,,S 598 | 597,1,2,"Leitch, Miss. Jessie Wills",female,,0,0,248727,33,,S 599 | 598,0,3,"Johnson, Mr. Alfred",male,49,0,0,LINE,0,,S 600 | 599,0,3,"Boulos, Mr. Hanna",male,,0,0,2664,7.225,,C 601 | 600,1,1,"Duff Gordon, Sir. Cosmo Edmund (""Mr Morgan"")",male,49,1,0,PC 17485,56.9292,A20,C 602 | 601,1,2,"Jacobsohn, Mrs. Sidney Samuel (Amy Frances Christy)",female,24,2,1,243847,27,,S 603 | 602,0,3,"Slabenoff, Mr. Petco",male,,0,0,349214,7.8958,,S 604 | 603,0,1,"Harrington, Mr. Charles H",male,,0,0,113796,42.4,,S 605 | 604,0,3,"Torber, Mr. Ernst William",male,44,0,0,364511,8.05,,S 606 | 605,1,1,"Homer, Mr. Harry (""Mr E Haven"")",male,35,0,0,111426,26.55,,C 607 | 606,0,3,"Lindell, Mr. Edvard Bengtsson",male,36,1,0,349910,15.55,,S 608 | 607,0,3,"Karaic, Mr. Milan",male,30,0,0,349246,7.8958,,S 609 | 608,1,1,"Daniel, Mr. Robert Williams",male,27,0,0,113804,30.5,,S 610 | 609,1,2,"Laroche, Mrs. Joseph (Juliette Marie Louise Lafargue)",female,22,1,2,SC/Paris 2123,41.5792,,C 611 | 610,1,1,"Shutes, Miss. Elizabeth W",female,40,0,0,PC 17582,153.4625,C125,S 612 | 611,0,3,"Andersson, Mrs. Anders Johan (Alfrida Konstantia Brogren)",female,39,1,5,347082,31.275,,S 613 | 612,0,3,"Jardin, Mr. Jose Neto",male,,0,0,SOTON/O.Q. 3101305,7.05,,S 614 | 613,1,3,"Murphy, Miss. Margaret Jane",female,,1,0,367230,15.5,,Q 615 | 614,0,3,"Horgan, Mr. John",male,,0,0,370377,7.75,,Q 616 | 615,0,3,"Brocklebank, Mr. William Alfred",male,35,0,0,364512,8.05,,S 617 | 616,1,2,"Herman, Miss. Alice",female,24,1,2,220845,65,,S 618 | 617,0,3,"Danbom, Mr. Ernst Gilbert",male,34,1,1,347080,14.4,,S 619 | 618,0,3,"Lobb, Mrs. William Arthur (Cordelia K Stanlick)",female,26,1,0,A/5. 3336,16.1,,S 620 | 619,1,2,"Becker, Miss. Marion Louise",female,4,2,1,230136,39,F4,S 621 | 620,0,2,"Gavey, Mr. Lawrence",male,26,0,0,31028,10.5,,S 622 | 621,0,3,"Yasbeck, Mr. Antoni",male,27,1,0,2659,14.4542,,C 623 | 622,1,1,"Kimball, Mr. Edwin Nelson Jr",male,42,1,0,11753,52.5542,D19,S 624 | 623,1,3,"Nakid, Mr. Sahid",male,20,1,1,2653,15.7417,,C 625 | 624,0,3,"Hansen, Mr. Henry Damsgaard",male,21,0,0,350029,7.8542,,S 626 | 625,0,3,"Bowen, Mr. David John ""Dai""",male,21,0,0,54636,16.1,,S 627 | 626,0,1,"Sutton, Mr. Frederick",male,61,0,0,36963,32.3208,D50,S 628 | 627,0,2,"Kirkland, Rev. Charles Leonard",male,57,0,0,219533,12.35,,Q 629 | 628,1,1,"Longley, Miss. Gretchen Fiske",female,21,0,0,13502,77.9583,D9,S 630 | 629,0,3,"Bostandyeff, Mr. Guentcho",male,26,0,0,349224,7.8958,,S 631 | 630,0,3,"O'Connell, Mr. Patrick D",male,,0,0,334912,7.7333,,Q 632 | 631,1,1,"Barkworth, Mr. Algernon Henry Wilson",male,80,0,0,27042,30,A23,S 633 | 632,0,3,"Lundahl, Mr. Johan Svensson",male,51,0,0,347743,7.0542,,S 634 | 633,1,1,"Stahelin-Maeglin, Dr. Max",male,32,0,0,13214,30.5,B50,C 635 | 634,0,1,"Parr, Mr. William Henry Marsh",male,,0,0,112052,0,,S 636 | 635,0,3,"Skoog, Miss. Mabel",female,9,3,2,347088,27.9,,S 637 | 636,1,2,"Davis, Miss. Mary",female,28,0,0,237668,13,,S 638 | 637,0,3,"Leinonen, Mr. Antti Gustaf",male,32,0,0,STON/O 2. 3101292,7.925,,S 639 | 638,0,2,"Collyer, Mr. Harvey",male,31,1,1,C.A. 31921,26.25,,S 640 | 639,0,3,"Panula, Mrs. Juha (Maria Emilia Ojala)",female,41,0,5,3101295,39.6875,,S 641 | 640,0,3,"Thorneycroft, Mr. Percival",male,,1,0,376564,16.1,,S 642 | 641,0,3,"Jensen, Mr. Hans Peder",male,20,0,0,350050,7.8542,,S 643 | 642,1,1,"Sagesser, Mlle. Emma",female,24,0,0,PC 17477,69.3,B35,C 644 | 643,0,3,"Skoog, Miss. Margit Elizabeth",female,2,3,2,347088,27.9,,S 645 | 644,1,3,"Foo, Mr. Choong",male,,0,0,1601,56.4958,,S 646 | 645,1,3,"Baclini, Miss. Eugenie",female,0.75,2,1,2666,19.2583,,C 647 | 646,1,1,"Harper, Mr. Henry Sleeper",male,48,1,0,PC 17572,76.7292,D33,C 648 | 647,0,3,"Cor, Mr. Liudevit",male,19,0,0,349231,7.8958,,S 649 | 648,1,1,"Simonius-Blumer, Col. Oberst Alfons",male,56,0,0,13213,35.5,A26,C 650 | 649,0,3,"Willey, Mr. Edward",male,,0,0,S.O./P.P. 751,7.55,,S 651 | 650,1,3,"Stanley, Miss. Amy Zillah Elsie",female,23,0,0,CA. 2314,7.55,,S 652 | 651,0,3,"Mitkoff, Mr. Mito",male,,0,0,349221,7.8958,,S 653 | 652,1,2,"Doling, Miss. Elsie",female,18,0,1,231919,23,,S 654 | 653,0,3,"Kalvik, Mr. Johannes Halvorsen",male,21,0,0,8475,8.4333,,S 655 | 654,1,3,"O'Leary, Miss. Hanora ""Norah""",female,,0,0,330919,7.8292,,Q 656 | 655,0,3,"Hegarty, Miss. Hanora ""Nora""",female,18,0,0,365226,6.75,,Q 657 | 656,0,2,"Hickman, Mr. Leonard Mark",male,24,2,0,S.O.C. 14879,73.5,,S 658 | 657,0,3,"Radeff, Mr. Alexander",male,,0,0,349223,7.8958,,S 659 | 658,0,3,"Bourke, Mrs. John (Catherine)",female,32,1,1,364849,15.5,,Q 660 | 659,0,2,"Eitemiller, Mr. George Floyd",male,23,0,0,29751,13,,S 661 | 660,0,1,"Newell, Mr. Arthur Webster",male,58,0,2,35273,113.275,D48,C 662 | 661,1,1,"Frauenthal, Dr. Henry William",male,50,2,0,PC 17611,133.65,,S 663 | 662,0,3,"Badt, Mr. Mohamed",male,40,0,0,2623,7.225,,C 664 | 663,0,1,"Colley, Mr. Edward Pomeroy",male,47,0,0,5727,25.5875,E58,S 665 | 664,0,3,"Coleff, Mr. Peju",male,36,0,0,349210,7.4958,,S 666 | 665,1,3,"Lindqvist, Mr. Eino William",male,20,1,0,STON/O 2. 3101285,7.925,,S 667 | 666,0,2,"Hickman, Mr. Lewis",male,32,2,0,S.O.C. 14879,73.5,,S 668 | 667,0,2,"Butler, Mr. Reginald Fenton",male,25,0,0,234686,13,,S 669 | 668,0,3,"Rommetvedt, Mr. Knud Paust",male,,0,0,312993,7.775,,S 670 | 669,0,3,"Cook, Mr. Jacob",male,43,0,0,A/5 3536,8.05,,S 671 | 670,1,1,"Taylor, Mrs. Elmer Zebley (Juliet Cummins Wright)",female,,1,0,19996,52,C126,S 672 | 671,1,2,"Brown, Mrs. Thomas William Solomon (Elizabeth Catherine Ford)",female,40,1,1,29750,39,,S 673 | 672,0,1,"Davidson, Mr. Thornton",male,31,1,0,F.C. 12750,52,B71,S 674 | 673,0,2,"Mitchell, Mr. Henry Michael",male,70,0,0,C.A. 24580,10.5,,S 675 | 674,1,2,"Wilhelms, Mr. Charles",male,31,0,0,244270,13,,S 676 | 675,0,2,"Watson, Mr. Ennis Hastings",male,,0,0,239856,0,,S 677 | 676,0,3,"Edvardsson, Mr. Gustaf Hjalmar",male,18,0,0,349912,7.775,,S 678 | 677,0,3,"Sawyer, Mr. Frederick Charles",male,24.5,0,0,342826,8.05,,S 679 | 678,1,3,"Turja, Miss. Anna Sofia",female,18,0,0,4138,9.8417,,S 680 | 679,0,3,"Goodwin, Mrs. Frederick (Augusta Tyler)",female,43,1,6,CA 2144,46.9,,S 681 | 680,1,1,"Cardeza, Mr. Thomas Drake Martinez",male,36,0,1,PC 17755,512.3292,B51 B53 B55,C 682 | 681,0,3,"Peters, Miss. Katie",female,,0,0,330935,8.1375,,Q 683 | 682,1,1,"Hassab, Mr. Hammad",male,27,0,0,PC 17572,76.7292,D49,C 684 | 683,0,3,"Olsvigen, Mr. Thor Anderson",male,20,0,0,6563,9.225,,S 685 | 684,0,3,"Goodwin, Mr. Charles Edward",male,14,5,2,CA 2144,46.9,,S 686 | 685,0,2,"Brown, Mr. Thomas William Solomon",male,60,1,1,29750,39,,S 687 | 686,0,2,"Laroche, Mr. Joseph Philippe Lemercier",male,25,1,2,SC/Paris 2123,41.5792,,C 688 | 687,0,3,"Panula, Mr. Jaako Arnold",male,14,4,1,3101295,39.6875,,S 689 | 688,0,3,"Dakic, Mr. Branko",male,19,0,0,349228,10.1708,,S 690 | 689,0,3,"Fischer, Mr. Eberhard Thelander",male,18,0,0,350036,7.7958,,S 691 | 690,1,1,"Madill, Miss. Georgette Alexandra",female,15,0,1,24160,211.3375,B5,S 692 | 691,1,1,"Dick, Mr. Albert Adrian",male,31,1,0,17474,57,B20,S 693 | 692,1,3,"Karun, Miss. Manca",female,4,0,1,349256,13.4167,,C 694 | 693,1,3,"Lam, Mr. Ali",male,,0,0,1601,56.4958,,S 695 | 694,0,3,"Saad, Mr. Khalil",male,25,0,0,2672,7.225,,C 696 | 695,0,1,"Weir, Col. John",male,60,0,0,113800,26.55,,S 697 | 696,0,2,"Chapman, Mr. Charles Henry",male,52,0,0,248731,13.5,,S 698 | 697,0,3,"Kelly, Mr. James",male,44,0,0,363592,8.05,,S 699 | 698,1,3,"Mullens, Miss. Katherine ""Katie""",female,,0,0,35852,7.7333,,Q 700 | 699,0,1,"Thayer, Mr. John Borland",male,49,1,1,17421,110.8833,C68,C 701 | 700,0,3,"Humblen, Mr. Adolf Mathias Nicolai Olsen",male,42,0,0,348121,7.65,F G63,S 702 | 701,1,1,"Astor, Mrs. John Jacob (Madeleine Talmadge Force)",female,18,1,0,PC 17757,227.525,C62 C64,C 703 | 702,1,1,"Silverthorne, Mr. Spencer Victor",male,35,0,0,PC 17475,26.2875,E24,S 704 | 703,0,3,"Barbara, Miss. Saiide",female,18,0,1,2691,14.4542,,C 705 | 704,0,3,"Gallagher, Mr. Martin",male,25,0,0,36864,7.7417,,Q 706 | 705,0,3,"Hansen, Mr. Henrik Juul",male,26,1,0,350025,7.8542,,S 707 | 706,0,2,"Morley, Mr. Henry Samuel (""Mr Henry Marshall"")",male,39,0,0,250655,26,,S 708 | 707,1,2,"Kelly, Mrs. Florence ""Fannie""",female,45,0,0,223596,13.5,,S 709 | 708,1,1,"Calderhead, Mr. Edward Pennington",male,42,0,0,PC 17476,26.2875,E24,S 710 | 709,1,1,"Cleaver, Miss. Alice",female,22,0,0,113781,151.55,,S 711 | 710,1,3,"Moubarek, Master. Halim Gonios (""William George"")",male,,1,1,2661,15.2458,,C 712 | 711,1,1,"Mayne, Mlle. Berthe Antonine (""Mrs de Villiers"")",female,24,0,0,PC 17482,49.5042,C90,C 713 | 712,0,1,"Klaber, Mr. Herman",male,,0,0,113028,26.55,C124,S 714 | 713,1,1,"Taylor, Mr. Elmer Zebley",male,48,1,0,19996,52,C126,S 715 | 714,0,3,"Larsson, Mr. August Viktor",male,29,0,0,7545,9.4833,,S 716 | 715,0,2,"Greenberg, Mr. Samuel",male,52,0,0,250647,13,,S 717 | 716,0,3,"Soholt, Mr. Peter Andreas Lauritz Andersen",male,19,0,0,348124,7.65,F G73,S 718 | 717,1,1,"Endres, Miss. Caroline Louise",female,38,0,0,PC 17757,227.525,C45,C 719 | 718,1,2,"Troutt, Miss. Edwina Celia ""Winnie""",female,27,0,0,34218,10.5,E101,S 720 | 719,0,3,"McEvoy, Mr. Michael",male,,0,0,36568,15.5,,Q 721 | 720,0,3,"Johnson, Mr. Malkolm Joackim",male,33,0,0,347062,7.775,,S 722 | 721,1,2,"Harper, Miss. Annie Jessie ""Nina""",female,6,0,1,248727,33,,S 723 | 722,0,3,"Jensen, Mr. Svend Lauritz",male,17,1,0,350048,7.0542,,S 724 | 723,0,2,"Gillespie, Mr. William Henry",male,34,0,0,12233,13,,S 725 | 724,0,2,"Hodges, Mr. Henry Price",male,50,0,0,250643,13,,S 726 | 725,1,1,"Chambers, Mr. Norman Campbell",male,27,1,0,113806,53.1,E8,S 727 | 726,0,3,"Oreskovic, Mr. Luka",male,20,0,0,315094,8.6625,,S 728 | 727,1,2,"Renouf, Mrs. Peter Henry (Lillian Jefferys)",female,30,3,0,31027,21,,S 729 | 728,1,3,"Mannion, Miss. Margareth",female,,0,0,36866,7.7375,,Q 730 | 729,0,2,"Bryhl, Mr. Kurt Arnold Gottfrid",male,25,1,0,236853,26,,S 731 | 730,0,3,"Ilmakangas, Miss. Pieta Sofia",female,25,1,0,STON/O2. 3101271,7.925,,S 732 | 731,1,1,"Allen, Miss. Elisabeth Walton",female,29,0,0,24160,211.3375,B5,S 733 | 732,0,3,"Hassan, Mr. Houssein G N",male,11,0,0,2699,18.7875,,C 734 | 733,0,2,"Knight, Mr. Robert J",male,,0,0,239855,0,,S 735 | 734,0,2,"Berriman, Mr. William John",male,23,0,0,28425,13,,S 736 | 735,0,2,"Troupiansky, Mr. Moses Aaron",male,23,0,0,233639,13,,S 737 | 736,0,3,"Williams, Mr. Leslie",male,28.5,0,0,54636,16.1,,S 738 | 737,0,3,"Ford, Mrs. Edward (Margaret Ann Watson)",female,48,1,3,W./C. 6608,34.375,,S 739 | 738,1,1,"Lesurer, Mr. Gustave J",male,35,0,0,PC 17755,512.3292,B101,C 740 | 739,0,3,"Ivanoff, Mr. Kanio",male,,0,0,349201,7.8958,,S 741 | 740,0,3,"Nankoff, Mr. Minko",male,,0,0,349218,7.8958,,S 742 | 741,1,1,"Hawksford, Mr. Walter James",male,,0,0,16988,30,D45,S 743 | 742,0,1,"Cavendish, Mr. Tyrell William",male,36,1,0,19877,78.85,C46,S 744 | 743,1,1,"Ryerson, Miss. Susan Parker ""Suzette""",female,21,2,2,PC 17608,262.375,B57 B59 B63 B66,C 745 | 744,0,3,"McNamee, Mr. Neal",male,24,1,0,376566,16.1,,S 746 | 745,1,3,"Stranden, Mr. Juho",male,31,0,0,STON/O 2. 3101288,7.925,,S 747 | 746,0,1,"Crosby, Capt. Edward Gifford",male,70,1,1,WE/P 5735,71,B22,S 748 | 747,0,3,"Abbott, Mr. Rossmore Edward",male,16,1,1,C.A. 2673,20.25,,S 749 | 748,1,2,"Sinkkonen, Miss. Anna",female,30,0,0,250648,13,,S 750 | 749,0,1,"Marvin, Mr. Daniel Warner",male,19,1,0,113773,53.1,D30,S 751 | 750,0,3,"Connaghton, Mr. Michael",male,31,0,0,335097,7.75,,Q 752 | 751,1,2,"Wells, Miss. Joan",female,4,1,1,29103,23,,S 753 | 752,1,3,"Moor, Master. Meier",male,6,0,1,392096,12.475,E121,S 754 | 753,0,3,"Vande Velde, Mr. Johannes Joseph",male,33,0,0,345780,9.5,,S 755 | 754,0,3,"Jonkoff, Mr. Lalio",male,23,0,0,349204,7.8958,,S 756 | 755,1,2,"Herman, Mrs. Samuel (Jane Laver)",female,48,1,2,220845,65,,S 757 | 756,1,2,"Hamalainen, Master. Viljo",male,0.67,1,1,250649,14.5,,S 758 | 757,0,3,"Carlsson, Mr. August Sigfrid",male,28,0,0,350042,7.7958,,S 759 | 758,0,2,"Bailey, Mr. Percy Andrew",male,18,0,0,29108,11.5,,S 760 | 759,0,3,"Theobald, Mr. Thomas Leonard",male,34,0,0,363294,8.05,,S 761 | 760,1,1,"Rothes, the Countess. of (Lucy Noel Martha Dyer-Edwards)",female,33,0,0,110152,86.5,B77,S 762 | 761,0,3,"Garfirth, Mr. John",male,,0,0,358585,14.5,,S 763 | 762,0,3,"Nirva, Mr. Iisakki Antino Aijo",male,41,0,0,SOTON/O2 3101272,7.125,,S 764 | 763,1,3,"Barah, Mr. Hanna Assi",male,20,0,0,2663,7.2292,,C 765 | 764,1,1,"Carter, Mrs. William Ernest (Lucile Polk)",female,36,1,2,113760,120,B96 B98,S 766 | 765,0,3,"Eklund, Mr. Hans Linus",male,16,0,0,347074,7.775,,S 767 | 766,1,1,"Hogeboom, Mrs. John C (Anna Andrews)",female,51,1,0,13502,77.9583,D11,S 768 | 767,0,1,"Brewe, Dr. Arthur Jackson",male,,0,0,112379,39.6,,C 769 | 768,0,3,"Mangan, Miss. Mary",female,30.5,0,0,364850,7.75,,Q 770 | 769,0,3,"Moran, Mr. Daniel J",male,,1,0,371110,24.15,,Q 771 | 770,0,3,"Gronnestad, Mr. Daniel Danielsen",male,32,0,0,8471,8.3625,,S 772 | 771,0,3,"Lievens, Mr. Rene Aime",male,24,0,0,345781,9.5,,S 773 | 772,0,3,"Jensen, Mr. Niels Peder",male,48,0,0,350047,7.8542,,S 774 | 773,0,2,"Mack, Mrs. (Mary)",female,57,0,0,S.O./P.P. 3,10.5,E77,S 775 | 774,0,3,"Elias, Mr. Dibo",male,,0,0,2674,7.225,,C 776 | 775,1,2,"Hocking, Mrs. Elizabeth (Eliza Needs)",female,54,1,3,29105,23,,S 777 | 776,0,3,"Myhrman, Mr. Pehr Fabian Oliver Malkolm",male,18,0,0,347078,7.75,,S 778 | 777,0,3,"Tobin, Mr. Roger",male,,0,0,383121,7.75,F38,Q 779 | 778,1,3,"Emanuel, Miss. Virginia Ethel",female,5,0,0,364516,12.475,,S 780 | 779,0,3,"Kilgannon, Mr. Thomas J",male,,0,0,36865,7.7375,,Q 781 | 780,1,1,"Robert, Mrs. Edward Scott (Elisabeth Walton McMillan)",female,43,0,1,24160,211.3375,B3,S 782 | 781,1,3,"Ayoub, Miss. Banoura",female,13,0,0,2687,7.2292,,C 783 | 782,1,1,"Dick, Mrs. Albert Adrian (Vera Gillespie)",female,17,1,0,17474,57,B20,S 784 | 783,0,1,"Long, Mr. Milton Clyde",male,29,0,0,113501,30,D6,S 785 | 784,0,3,"Johnston, Mr. Andrew G",male,,1,2,W./C. 6607,23.45,,S 786 | 785,0,3,"Ali, Mr. William",male,25,0,0,SOTON/O.Q. 3101312,7.05,,S 787 | 786,0,3,"Harmer, Mr. Abraham (David Lishin)",male,25,0,0,374887,7.25,,S 788 | 787,1,3,"Sjoblom, Miss. Anna Sofia",female,18,0,0,3101265,7.4958,,S 789 | 788,0,3,"Rice, Master. George Hugh",male,8,4,1,382652,29.125,,Q 790 | 789,1,3,"Dean, Master. Bertram Vere",male,1,1,2,C.A. 2315,20.575,,S 791 | 790,0,1,"Guggenheim, Mr. Benjamin",male,46,0,0,PC 17593,79.2,B82 B84,C 792 | 791,0,3,"Keane, Mr. Andrew ""Andy""",male,,0,0,12460,7.75,,Q 793 | 792,0,2,"Gaskell, Mr. Alfred",male,16,0,0,239865,26,,S 794 | 793,0,3,"Sage, Miss. Stella Anna",female,,8,2,CA. 2343,69.55,,S 795 | 794,0,1,"Hoyt, Mr. William Fisher",male,,0,0,PC 17600,30.6958,,C 796 | 795,0,3,"Dantcheff, Mr. Ristiu",male,25,0,0,349203,7.8958,,S 797 | 796,0,2,"Otter, Mr. Richard",male,39,0,0,28213,13,,S 798 | 797,1,1,"Leader, Dr. Alice (Farnham)",female,49,0,0,17465,25.9292,D17,S 799 | 798,1,3,"Osman, Mrs. Mara",female,31,0,0,349244,8.6833,,S 800 | 799,0,3,"Ibrahim Shawah, Mr. Yousseff",male,30,0,0,2685,7.2292,,C 801 | 800,0,3,"Van Impe, Mrs. Jean Baptiste (Rosalie Paula Govaert)",female,30,1,1,345773,24.15,,S 802 | 801,0,2,"Ponesell, Mr. Martin",male,34,0,0,250647,13,,S 803 | 802,1,2,"Collyer, Mrs. Harvey (Charlotte Annie Tate)",female,31,1,1,C.A. 31921,26.25,,S 804 | 803,1,1,"Carter, Master. William Thornton II",male,11,1,2,113760,120,B96 B98,S 805 | 804,1,3,"Thomas, Master. Assad Alexander",male,0.42,0,1,2625,8.5167,,C 806 | 805,1,3,"Hedman, Mr. Oskar Arvid",male,27,0,0,347089,6.975,,S 807 | 806,0,3,"Johansson, Mr. Karl Johan",male,31,0,0,347063,7.775,,S 808 | 807,0,1,"Andrews, Mr. Thomas Jr",male,39,0,0,112050,0,A36,S 809 | 808,0,3,"Pettersson, Miss. Ellen Natalia",female,18,0,0,347087,7.775,,S 810 | 809,0,2,"Meyer, Mr. August",male,39,0,0,248723,13,,S 811 | 810,1,1,"Chambers, Mrs. Norman Campbell (Bertha Griggs)",female,33,1,0,113806,53.1,E8,S 812 | 811,0,3,"Alexander, Mr. William",male,26,0,0,3474,7.8875,,S 813 | 812,0,3,"Lester, Mr. James",male,39,0,0,A/4 48871,24.15,,S 814 | 813,0,2,"Slemen, Mr. Richard James",male,35,0,0,28206,10.5,,S 815 | 814,0,3,"Andersson, Miss. Ebba Iris Alfrida",female,6,4,2,347082,31.275,,S 816 | 815,0,3,"Tomlin, Mr. Ernest Portage",male,30.5,0,0,364499,8.05,,S 817 | 816,0,1,"Fry, Mr. Richard",male,,0,0,112058,0,B102,S 818 | 817,0,3,"Heininen, Miss. Wendla Maria",female,23,0,0,STON/O2. 3101290,7.925,,S 819 | 818,0,2,"Mallet, Mr. Albert",male,31,1,1,S.C./PARIS 2079,37.0042,,C 820 | 819,0,3,"Holm, Mr. John Fredrik Alexander",male,43,0,0,C 7075,6.45,,S 821 | 820,0,3,"Skoog, Master. Karl Thorsten",male,10,3,2,347088,27.9,,S 822 | 821,1,1,"Hays, Mrs. Charles Melville (Clara Jennings Gregg)",female,52,1,1,12749,93.5,B69,S 823 | 822,1,3,"Lulic, Mr. Nikola",male,27,0,0,315098,8.6625,,S 824 | 823,0,1,"Reuchlin, Jonkheer. John George",male,38,0,0,19972,0,,S 825 | 824,1,3,"Moor, Mrs. (Beila)",female,27,0,1,392096,12.475,E121,S 826 | 825,0,3,"Panula, Master. Urho Abraham",male,2,4,1,3101295,39.6875,,S 827 | 826,0,3,"Flynn, Mr. John",male,,0,0,368323,6.95,,Q 828 | 827,0,3,"Lam, Mr. Len",male,,0,0,1601,56.4958,,S 829 | 828,1,2,"Mallet, Master. Andre",male,1,0,2,S.C./PARIS 2079,37.0042,,C 830 | 829,1,3,"McCormack, Mr. Thomas Joseph",male,,0,0,367228,7.75,,Q 831 | 830,1,1,"Stone, Mrs. George Nelson (Martha Evelyn)",female,62,0,0,113572,80,B28, 832 | 831,1,3,"Yasbeck, Mrs. Antoni (Selini Alexander)",female,15,1,0,2659,14.4542,,C 833 | 832,1,2,"Richards, Master. George Sibley",male,0.83,1,1,29106,18.75,,S 834 | 833,0,3,"Saad, Mr. Amin",male,,0,0,2671,7.2292,,C 835 | 834,0,3,"Augustsson, Mr. Albert",male,23,0,0,347468,7.8542,,S 836 | 835,0,3,"Allum, Mr. Owen George",male,18,0,0,2223,8.3,,S 837 | 836,1,1,"Compton, Miss. Sara Rebecca",female,39,1,1,PC 17756,83.1583,E49,C 838 | 837,0,3,"Pasic, Mr. Jakob",male,21,0,0,315097,8.6625,,S 839 | 838,0,3,"Sirota, Mr. Maurice",male,,0,0,392092,8.05,,S 840 | 839,1,3,"Chip, Mr. Chang",male,32,0,0,1601,56.4958,,S 841 | 840,1,1,"Marechal, Mr. Pierre",male,,0,0,11774,29.7,C47,C 842 | 841,0,3,"Alhomaki, Mr. Ilmari Rudolf",male,20,0,0,SOTON/O2 3101287,7.925,,S 843 | 842,0,2,"Mudd, Mr. Thomas Charles",male,16,0,0,S.O./P.P. 3,10.5,,S 844 | 843,1,1,"Serepeca, Miss. Augusta",female,30,0,0,113798,31,,C 845 | 844,0,3,"Lemberopolous, Mr. Peter L",male,34.5,0,0,2683,6.4375,,C 846 | 845,0,3,"Culumovic, Mr. Jeso",male,17,0,0,315090,8.6625,,S 847 | 846,0,3,"Abbing, Mr. Anthony",male,42,0,0,C.A. 5547,7.55,,S 848 | 847,0,3,"Sage, Mr. Douglas Bullen",male,,8,2,CA. 2343,69.55,,S 849 | 848,0,3,"Markoff, Mr. Marin",male,35,0,0,349213,7.8958,,C 850 | 849,0,2,"Harper, Rev. John",male,28,0,1,248727,33,,S 851 | 850,1,1,"Goldenberg, Mrs. Samuel L (Edwiga Grabowska)",female,,1,0,17453,89.1042,C92,C 852 | 851,0,3,"Andersson, Master. Sigvard Harald Elias",male,4,4,2,347082,31.275,,S 853 | 852,0,3,"Svensson, Mr. Johan",male,74,0,0,347060,7.775,,S 854 | 853,0,3,"Boulos, Miss. Nourelain",female,9,1,1,2678,15.2458,,C 855 | 854,1,1,"Lines, Miss. Mary Conover",female,16,0,1,PC 17592,39.4,D28,S 856 | 855,0,2,"Carter, Mrs. Ernest Courtenay (Lilian Hughes)",female,44,1,0,244252,26,,S 857 | 856,1,3,"Aks, Mrs. Sam (Leah Rosen)",female,18,0,1,392091,9.35,,S 858 | 857,1,1,"Wick, Mrs. George Dennick (Mary Hitchcock)",female,45,1,1,36928,164.8667,,S 859 | 858,1,1,"Daly, Mr. Peter Denis ",male,51,0,0,113055,26.55,E17,S 860 | 859,1,3,"Baclini, Mrs. Solomon (Latifa Qurban)",female,24,0,3,2666,19.2583,,C 861 | 860,0,3,"Razi, Mr. Raihed",male,,0,0,2629,7.2292,,C 862 | 861,0,3,"Hansen, Mr. Claus Peter",male,41,2,0,350026,14.1083,,S 863 | 862,0,2,"Giles, Mr. Frederick Edward",male,21,1,0,28134,11.5,,S 864 | 863,1,1,"Swift, Mrs. Frederick Joel (Margaret Welles Barron)",female,48,0,0,17466,25.9292,D17,S 865 | 864,0,3,"Sage, Miss. Dorothy Edith ""Dolly""",female,,8,2,CA. 2343,69.55,,S 866 | 865,0,2,"Gill, Mr. John William",male,24,0,0,233866,13,,S 867 | 866,1,2,"Bystrom, Mrs. (Karolina)",female,42,0,0,236852,13,,S 868 | 867,1,2,"Duran y More, Miss. Asuncion",female,27,1,0,SC/PARIS 2149,13.8583,,C 869 | 868,0,1,"Roebling, Mr. Washington Augustus II",male,31,0,0,PC 17590,50.4958,A24,S 870 | 869,0,3,"van Melkebeke, Mr. Philemon",male,,0,0,345777,9.5,,S 871 | 870,1,3,"Johnson, Master. Harold Theodor",male,4,1,1,347742,11.1333,,S 872 | 871,0,3,"Balkic, Mr. Cerin",male,26,0,0,349248,7.8958,,S 873 | 872,1,1,"Beckwith, Mrs. Richard Leonard (Sallie Monypeny)",female,47,1,1,11751,52.5542,D35,S 874 | 873,0,1,"Carlsson, Mr. Frans Olof",male,33,0,0,695,5,B51 B53 B55,S 875 | 874,0,3,"Vander Cruyssen, Mr. Victor",male,47,0,0,345765,9,,S 876 | 875,1,2,"Abelson, Mrs. Samuel (Hannah Wizosky)",female,28,1,0,P/PP 3381,24,,C 877 | 876,1,3,"Najib, Miss. Adele Kiamie ""Jane""",female,15,0,0,2667,7.225,,C 878 | 877,0,3,"Gustafsson, Mr. Alfred Ossian",male,20,0,0,7534,9.8458,,S 879 | 878,0,3,"Petroff, Mr. Nedelio",male,19,0,0,349212,7.8958,,S 880 | 879,0,3,"Laleff, Mr. Kristo",male,,0,0,349217,7.8958,,S 881 | 880,1,1,"Potter, Mrs. Thomas Jr (Lily Alexenia Wilson)",female,56,0,1,11767,83.1583,C50,C 882 | 881,1,2,"Shelley, Mrs. William (Imanita Parrish Hall)",female,25,0,1,230433,26,,S 883 | 882,0,3,"Markun, Mr. Johann",male,33,0,0,349257,7.8958,,S 884 | 883,0,3,"Dahlberg, Miss. Gerda Ulrika",female,22,0,0,7552,10.5167,,S 885 | 884,0,2,"Banfield, Mr. Frederick James",male,28,0,0,C.A./SOTON 34068,10.5,,S 886 | 885,0,3,"Sutehall, Mr. Henry Jr",male,25,0,0,SOTON/OQ 392076,7.05,,S 887 | 886,0,3,"Rice, Mrs. William (Margaret Norton)",female,39,0,5,382652,29.125,,Q 888 | 887,0,2,"Montvila, Rev. Juozas",male,27,0,0,211536,13,,S 889 | 888,1,1,"Graham, Miss. Margaret Edith",female,19,0,0,112053,30,B42,S 890 | 889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.45,,S 891 | 890,1,1,"Behr, Mr. Karl Howell",male,26,0,0,111369,30,C148,C 892 | 891,0,3,"Dooley, Mr. Patrick",male,32,0,0,370376,7.75,,Q 893 | --------------------------------------------------------------------------------