├── ML.png ├── Reducing Traffic Mortality in the USA └── datasets │ ├── miles-driven.csv │ └── road-accidents.csv ├── Future Sales ├── shops.csv ├── item_categories.csv └── Future Sales.ipynb ├── Deeplearning.ipynb ├── README.md ├── Future Sales 1.1.ipynb ├── Give Life_ Predict Blood Donations ├── datasets │ └── transfusion.data └── notebook.ipynb ├── Predicting Car Prices └── imports-85.data ├── Predicting Bike Rentals └── Basics.ipynb ├── Prediciting Titanic Survival └── test.csv └── Overfit.ipynb /ML.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ammarshaikh123/Projects-on-Machine-Learning/master/ML.png -------------------------------------------------------------------------------- /Reducing Traffic Mortality in the USA/datasets/miles-driven.csv: -------------------------------------------------------------------------------- 1 | state|million_miles_annually 2 | Alabama|64914 3 | Alaska|4593 4 | Arizona|59575 5 | Arkansas|32953 6 | California|320784 7 | Colorado|46606 8 | Connecticut|31197 9 | Delaware|9028 10 | District of Columbia|3568 11 | Florida|191855 12 | Georgia|108454 13 | Hawaii|10066 14 | Idaho|15937 15 | Illinois|103234 16 | Indiana|76485 17 | Iowa|31274 18 | Kansas|30021 19 | Kentucky|48061 20 | Louisiana|46513 21 | Maine|14248 22 | Maryland|56221 23 | Massachusetts|54792 24 | Michigan|94754 25 | Minnesota|56685 26 | Mississippi|38851 27 | Missouri|68789 28 | Montana|11660 29 | Nebraska|19093 30 | Nevada|24189 31 | New Hampshire|12720 32 | New Jersey|73094 33 | New Mexico|25650 34 | New York|127726 35 | North Carolina|103772 36 | North Dakota|9131 37 | Ohio|111990 38 | Oklahoma|47464 39 | Oregon|33373 40 | Pennsylvania|99204 41 | Rhode Island|7901 42 | South Carolina|48730 43 | South Dakota|9002 44 | Tennessee|70751 45 | Texas|237440 46 | Utah|26222 47 | Vermont|7141 48 | Virginia|80974 49 | Washington|56955 50 | West Virginia|18963 51 | Wisconsin|58554 52 | Wyoming|9245 53 | -------------------------------------------------------------------------------- /Future Sales/shops.csv: -------------------------------------------------------------------------------- 1 | shop_name,shop_id 2 | "!Якутск Орджоникидзе, 56 фран",0 3 | "!Якутск ТЦ ""Центральный"" фран",1 4 | "Адыгея ТЦ ""Мега""",2 5 | "Балашиха ТРК ""Октябрь-Киномир""",3 6 | "Волжский ТЦ ""Волга Молл""",4 7 | "Вологда ТРЦ ""Мармелад""",5 8 | "Воронеж (Плехановская, 13)",6 9 | "Воронеж ТРЦ ""Максимир""",7 10 | "Воронеж ТРЦ Сити-Парк ""Град""",8 11 | Выездная Торговля,9 12 | Жуковский ул. Чкалова 39м?,10 13 | Жуковский ул. Чкалова 39м²,11 14 | Интернет-магазин ЧС,12 15 | "Казань ТЦ ""Бехетле""",13 16 | "Казань ТЦ ""ПаркХаус"" II",14 17 | "Калуга ТРЦ ""XXI век""",15 18 | "Коломна ТЦ ""Рио""",16 19 | "Красноярск ТЦ ""Взлетка Плаза""",17 20 | "Красноярск ТЦ ""Июнь""",18 21 | "Курск ТЦ ""Пушкинский""",19 22 | "Москва ""Распродажа""",20 23 | "Москва МТРЦ ""Афи Молл""",21 24 | Москва Магазин С21,22 25 | "Москва ТК ""Буденовский"" (пав.А2)",23 26 | "Москва ТК ""Буденовский"" (пав.К7)",24 27 | "Москва ТРК ""Атриум""",25 28 | "Москва ТЦ ""Ареал"" (Беляево)",26 29 | "Москва ТЦ ""МЕГА Белая Дача II""",27 30 | "Москва ТЦ ""МЕГА Теплый Стан"" II",28 31 | "Москва ТЦ ""Новый век"" (Новокосино)",29 32 | "Москва ТЦ ""Перловский""",30 33 | "Москва ТЦ ""Семеновский""",31 34 | "Москва ТЦ ""Серебряный Дом""",32 35 | "Мытищи ТРК ""XL-3""",33 36 | "Н.Новгород ТРЦ ""РИО""",34 37 | "Н.Новгород ТРЦ ""Фантастика""",35 38 | "Новосибирск ТРЦ ""Галерея Новосибирск""",36 39 | "Новосибирск ТЦ ""Мега""",37 40 | "Омск ТЦ ""Мега""",38 41 | "РостовНаДону ТРК ""Мегацентр Горизонт""",39 42 | "РостовНаДону ТРК ""Мегацентр Горизонт"" Островной",40 43 | "РостовНаДону ТЦ ""Мега""",41 44 | "СПб ТК ""Невский Центр""",42 45 | "СПб ТК ""Сенная""",43 46 | "Самара ТЦ ""Мелодия""",44 47 | "Самара ТЦ ""ПаркХаус""",45 48 | "Сергиев Посад ТЦ ""7Я""",46 49 | "Сургут ТРЦ ""Сити Молл""",47 50 | "Томск ТРЦ ""Изумрудный Город""",48 51 | "Тюмень ТРЦ ""Кристалл""",49 52 | "Тюмень ТЦ ""Гудвин""",50 53 | "Тюмень ТЦ ""Зеленый Берег""",51 54 | "Уфа ТК ""Центральный""",52 55 | "Уфа ТЦ ""Семья"" 2",53 56 | "Химки ТЦ ""Мега""",54 57 | Цифровой склад 1С-Онлайн,55 58 | "Чехов ТРЦ ""Карнавал""",56 59 | "Якутск Орджоникидзе, 56",57 60 | "Якутск ТЦ ""Центральный""",58 61 | "Ярославль ТЦ ""Альтаир""",59 62 | -------------------------------------------------------------------------------- /Reducing Traffic Mortality in the USA/datasets/road-accidents.csv: -------------------------------------------------------------------------------- 1 | ##### LICENSE ##### 2 | # This data set is modified from the original at fivethirtyeight (https://github.com/fivethirtyeight/data/tree/master/bad-drivers) 3 | # and it is released under CC BY 4.0 (https://creativecommons.org/licenses/by/4.0/) 4 | ##### COLUMN ABBREVIATIONS ##### 5 | # drvr_fatl_col_bmiles = Number of drivers involved in fatal collisions per billion miles (2011) 6 | # perc_fatl_speed = Percentage Of Drivers Involved In Fatal Collisions Who Were Speeding (2009) 7 | # perc_fatl_alcohol = Percentage Of Drivers Involved In Fatal Collisions Who Were Alcohol-Impaired (2011) 8 | # perc_fatl_1st_time = Percentage Of Drivers Involved In Fatal Collisions Who Had Not Been Involved In Any Previous Accidents (2011) 9 | ##### DATA BEGIN ##### 10 | state|drvr_fatl_col_bmiles|perc_fatl_speed|perc_fatl_alcohol|perc_fatl_1st_time 11 | Alabama|18.8|39|30|80 12 | Alaska|18.1|41|25|94 13 | Arizona|18.6|35|28|96 14 | Arkansas|22.4|18|26|95 15 | California|12|35|28|89 16 | Colorado|13.6|37|28|95 17 | Connecticut|10.8|46|36|82 18 | Delaware|16.2|38|30|99 19 | District of Columbia|5.9|34|27|100 20 | Florida|17.9|21|29|94 21 | Georgia|15.6|19|25|93 22 | Hawaii|17.5|54|41|87 23 | Idaho|15.3|36|29|98 24 | Illinois|12.8|36|34|96 25 | Indiana|14.5|25|29|95 26 | Iowa|15.7|17|25|87 27 | Kansas|17.8|27|24|85 28 | Kentucky|21.4|19|23|76 29 | Louisiana|20.5|35|33|98 30 | Maine|15.1|38|30|84 31 | Maryland|12.5|34|32|99 32 | Massachusetts|8.2|23|35|80 33 | Michigan|14.1|24|28|77 34 | Minnesota|9.6|23|29|88 35 | Mississippi|17.6|15|31|100 36 | Missouri|16.1|43|34|84 37 | Montana|21.4|39|44|85 38 | Nebraska|14.9|13|35|90 39 | Nevada|14.7|37|32|99 40 | New Hampshire|11.6|35|30|83 41 | New Jersey|11.2|16|28|78 42 | New Mexico|18.4|19|27|98 43 | New York|12.3|32|29|80 44 | North Carolina|16.8|39|31|81 45 | North Dakota|23.9|23|42|86 46 | Ohio|14.1|28|34|82 47 | Oklahoma|19.9|32|29|94 48 | Oregon|12.8|33|26|90 49 | Pennsylvania|18.2|50|31|88 50 | Rhode Island|11.1|34|38|79 51 | South Carolina|23.9|38|41|81 52 | South Dakota|19.4|31|33|86 53 | Tennessee|19.5|21|29|81 54 | Texas|19.4|40|38|87 55 | Utah|11.3|43|16|96 56 | Vermont|13.6|30|30|95 57 | Virginia|12.7|19|27|88 58 | Washington|10.6|42|33|86 59 | West Virginia|23.8|34|28|87 60 | Wisconsin|13.8|36|33|84 61 | Wyoming|17.4|42|32|90 62 | -------------------------------------------------------------------------------- /Future Sales/item_categories.csv: -------------------------------------------------------------------------------- 1 | item_category_name,item_category_id 2 | PC - Гарнитуры/Наушники,0 3 | Аксессуары - PS2,1 4 | Аксессуары - PS3,2 5 | Аксессуары - PS4,3 6 | Аксессуары - PSP,4 7 | Аксессуары - PSVita,5 8 | Аксессуары - XBOX 360,6 9 | Аксессуары - XBOX ONE,7 10 | Билеты (Цифра),8 11 | Доставка товара,9 12 | Игровые консоли - PS2,10 13 | Игровые консоли - PS3,11 14 | Игровые консоли - PS4,12 15 | Игровые консоли - PSP,13 16 | Игровые консоли - PSVita,14 17 | Игровые консоли - XBOX 360,15 18 | Игровые консоли - XBOX ONE,16 19 | Игровые консоли - Прочие,17 20 | Игры - PS2,18 21 | Игры - PS3,19 22 | Игры - PS4,20 23 | Игры - PSP,21 24 | Игры - PSVita,22 25 | Игры - XBOX 360,23 26 | Игры - XBOX ONE,24 27 | Игры - Аксессуары для игр,25 28 | Игры Android - Цифра,26 29 | Игры MAC - Цифра,27 30 | Игры PC - Дополнительные издания,28 31 | Игры PC - Коллекционные издания,29 32 | Игры PC - Стандартные издания,30 33 | Игры PC - Цифра,31 34 | "Карты оплаты (Кино, Музыка, Игры)",32 35 | Карты оплаты - Live!,33 36 | Карты оплаты - Live! (Цифра),34 37 | Карты оплаты - PSN,35 38 | Карты оплаты - Windows (Цифра),36 39 | Кино - Blu-Ray,37 40 | Кино - Blu-Ray 3D,38 41 | Кино - Blu-Ray 4K,39 42 | Кино - DVD,40 43 | Кино - Коллекционное,41 44 | "Книги - Артбуки, энциклопедии",42 45 | Книги - Аудиокниги,43 46 | Книги - Аудиокниги (Цифра),44 47 | Книги - Аудиокниги 1С,45 48 | Книги - Бизнес литература,46 49 | "Книги - Комиксы, манга",47 50 | Книги - Компьютерная литература,48 51 | Книги - Методические материалы 1С,49 52 | Книги - Открытки,50 53 | Книги - Познавательная литература,51 54 | Книги - Путеводители,52 55 | Книги - Художественная литература,53 56 | Книги - Цифра,54 57 | Музыка - CD локального производства,55 58 | Музыка - CD фирменного производства,56 59 | Музыка - MP3,57 60 | Музыка - Винил,58 61 | Музыка - Музыкальное видео,59 62 | Музыка - Подарочные издания,60 63 | Подарки - Атрибутика,61 64 | "Подарки - Гаджеты, роботы, спорт",62 65 | Подарки - Мягкие игрушки,63 66 | Подарки - Настольные игры,64 67 | Подарки - Настольные игры (компактные),65 68 | "Подарки - Открытки, наклейки",66 69 | Подарки - Развитие,67 70 | "Подарки - Сертификаты, услуги",68 71 | Подарки - Сувениры,69 72 | Подарки - Сувениры (в навеску),70 73 | "Подарки - Сумки, Альбомы, Коврики д/мыши",71 74 | Подарки - Фигурки,72 75 | Программы - 1С:Предприятие 8,73 76 | Программы - MAC (Цифра),74 77 | Программы - Для дома и офиса,75 78 | Программы - Для дома и офиса (Цифра),76 79 | Программы - Обучающие,77 80 | Программы - Обучающие (Цифра),78 81 | Служебные,79 82 | Служебные - Билеты,80 83 | Чистые носители (шпиль),81 84 | Чистые носители (штучные),82 85 | Элементы питания,83 86 | -------------------------------------------------------------------------------- /Deeplearning.ipynb: -------------------------------------------------------------------------------- 1 | {"cells":[{"metadata":{"_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","trusted":true},"cell_type":"code","source":"# This Python 3 environment comes with many helpful analytics libraries installed\n# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python\n# For example, here's several helpful packages to load in \n\nimport numpy as np # linear algebra\nimport pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n\n# Input data files are available in the \"../input/\" directory.\n# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory\n\nimport os\nprint(os.listdir(\"../input\"))\n\n# Any results you write to the current directory are saved as output.","execution_count":null,"outputs":[]},{"metadata":{"_cell_guid":"79c7e3d0-c299-4dcb-8224-4455121ee9b0","_uuid":"d629ff2d2480ee46fbb7e2d37f6b5fab8052498a","trusted":true},"cell_type":"code","source":"%reload_ext autoreload\n%autoreload 2\n%matplotlib inline\n\nfrom fastai import *\nfrom fastai.vision import *","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"bs = 64","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"train_df = pd.read_csv(\"../input/train.csv\")\ntest_df = pd.read_csv(\"../input/sample_submission.csv\")\n\ntest_img = ImageList.from_df(test_df, path='../input/test', folder='test')\ntrain_img = (ImageList.from_df(train_df, path='../input/train', folder='train')\n .random_split_by_pct(0.2)\n .label_from_df()\n .add_test(test_img)\n .transform(get_transforms(flip_vert=True), size=128)\n .databunch(path='.', bs=64)\n .normalize(imagenet_stats)\n )","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"train_img.show_batch(rows=3, figsize=(3,4))","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"learn = create_cnn(train_img,models.resnet34, metrics=[error_rate, accuracy])","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"learn.unfreeze()\nlearn.lr_find()\nlearn.recorder.plot()","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"learn.fit(epochs=5,lr=2e-6)","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"preds,_ = learn.get_preds(ds_type=DatasetType.Test)\npreds.shape","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"classes = preds.argmax(1)\n\ntest_df.has_cactus = classes\ntest_df.head()","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"test_df.to_csv('submission_new.csv', index=False)","execution_count":null,"outputs":[]}],"metadata":{"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"name":"python","version":"3.6.4","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"}},"nbformat":4,"nbformat_minor":1} -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Machine-Learning 2 | 3 | This repo contains my projects on Machine Learning, I have covered diverse topics under each project, You can see the description for each project below. 4 | 5 | 1)[Classify Song Genres from Audio Data](https://github.com/ammarshaikh123/Projects-on-Machine-Learning/tree/master/Classify%20Song%20Genres%20from%20Audio%20Data) 6 | 7 | This project is based on recommending new music to the users by analysing various components. I have used PCA in this project for dimensionality reduction. For modelling I have tried different models like Decision Tree and Logistic Regression. I have further used Corss validation to evaluate the models performance. 8 | 9 | 2)[Give Life_ Predict Blood Donations](https://github.com/ammarshaikh123/Projects-on-Machine-Learning/tree/master/Give%20Life_%20Predict%20Blood%20Donations) 10 | 11 | This project is based on binnary classification where I predict if a donor who has donated blood in previous 6 months will dontate blood again or not. It was fun as I got to implement pipelines in this project which was something new for me. 12 | 13 | 3)[Prediciting Titanic Survival](https://github.com/ammarshaikh123/Projects-on-Machine-Learning/tree/master/Prediciting%20Titanic%20Survival) 14 | 15 | 16 | In this project I worked on a Kaggle data set (Titanic Data Set). Based on all the features available I tried predicting whether a passanger will survive or not. I was able to land in top 14% of the competetion. I have used XGBoost along with hyper parameter tuning to get my results. 17 | 18 | 4)[Predicting Bike Rentals](https://github.com/ammarshaikh123/Projects-on-Machine-Learning/tree/master/Predicting%20Bike%20Rentals) 19 | 20 | Many American cities have communal bike sharing stations where you can rent bicycles by the hour or day. Washington, D.C. is one of these cities. In this project, I tried to predict the total number of bikes people rented in a given hour by using variables like monthly rental, weekly rental, temperature, humidity etc. We used Linear Regression as well as Decesion trees and compared which model provided us better results. 21 | 22 | 23 | 5)[Predicting Car Prices](https://github.com/ammarshaikh123/Projects-on-Machine-Learning/tree/master/Predicting%20Car%20Prices) 24 | 25 | In this project I tried predicting price of car based on training my model with attributes like fuel-type,engine-type,compression-rate,horsepower etc. I have used KNeighbors as my model to predict the car price. 26 | 27 | 6)[Predicting Credit Card Approvals](https://github.com/ammarshaikh123/Projects-on-Machine-Learning/tree/master/Predicting%20Credit%20Card%20Approvals) 28 | 29 | I worked on this project to get a feel of fraud detection analysis. From the given dataset I tried to predict whether a loan should be approved or not based on various features which were available. I used logistic regression as my model and I also used hyper parameter tuning to enhance the performance of my model. 30 | 31 | 7)[Predicting board game reviews](https://github.com/ammarshaikh123/Projects-on-Machine-Learning/tree/master/Predicting%20board%20game%20reviews) 32 | 33 | In this project I worked with a data set that contains 80000 board games and their associated review scores. We used various parameters of review and tried to predict the average rating of a board game. We used co-relation to find out relavant variables for my analysis and Linear regression for modelling. 34 | 35 | 8)[Reducing Traffic Mortality in the USA](https://github.com/ammarshaikh123/Projects-on-Machine-Learning/tree/master/Reducing%20Traffic%20Mortality%20in%20the%20USA) 36 | 37 | This project was based on the increasing rate of road accidents. I used unsupervised learning for training my model and PCA for dimensionality reduction. I also tried to display a concept called as masking by using multivariate regression. 38 | 39 | 9)[Predicting TMDB Box Office Collections ](https://github.com/ammarshaikh123/Projects-on-Machine-Learning/blob/master/IMDB.ipynb) 40 | 41 | In this project, I tried to predict box office collections of movies based on various features provided in the data set. It was a very intersting project to work on as the data format was something new to me and I got to learn and explore new dimensions of Data analytics. 42 | 43 | 10)[Don't Overfit ](https://github.com/ammarshaikh123/Projects-on-Machine-Learning/blob/master/Overfit.ipynb) 44 | 45 | This is a kaggle competition I worked on which had only 250 rows in train set and around 1000+ rows in test. The task was to predict the test set without overfitting the training set. I have used various models like xgboost, linear regression and later combined them inorder to achieve better performance. 46 | 47 | ### Few snapshots of visualization I have performed in the mentioned projects. 48 | 49 | ![alt text](https://github.com/ammarshaikh123/Projects-on-Machine-Learning/blob/master/ML.png) 50 | -------------------------------------------------------------------------------- /Future Sales 1.1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "import pandas as pd\n", 11 | "import matplotlib.pyplot as plt\n", 12 | "import seaborn as sns\n", 13 | "\n", 14 | "from datetime import datetime, date\n", 15 | "\n", 16 | "\n", 17 | "from sklearn.preprocessing import StandardScaler\n", 18 | "\n" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "%matplotlib inline\n", 28 | "\n", 29 | "test = pd.read_csv('test.csv')\n", 30 | "item_categories = pd.read_csv('item_categories.csv')\n", 31 | "shops = pd.read_csv('shops.csv')\n", 32 | "train = pd.read_csv('sales_train.csv', parse_dates=['date'])" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 3, 38 | "metadata": {}, 39 | "outputs": [], 40 | "source": [ 41 | "items=pd.read_csv('items.csv')" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 4, 47 | "metadata": {}, 48 | "outputs": [], 49 | "source": [ 50 | "import os\n", 51 | "\n", 52 | "os.makedirs('tmp', exist_ok=True)\n", 53 | "train.to_feather('train-raw')\n", 54 | "test.to_feather('test-raw')\n", 55 | "items.to_feather('items-raw')\n" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 5, 61 | "metadata": {}, 62 | "outputs": [ 63 | { 64 | "data": { 65 | "text/html": [ 66 | "
\n", 67 | "\n", 80 | "\n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | "
datedate_block_numshop_iditem_iditem_priceitem_cnt_day
02013-02-0105922154999.001.0
12013-03-010252552899.001.0
22013-05-010252552899.00-1.0
32013-06-0102525541709.051.0
42013-01-1502525551099.001.0
\n", 140 | "
" 141 | ], 142 | "text/plain": [ 143 | " date date_block_num shop_id item_id item_price item_cnt_day\n", 144 | "0 2013-02-01 0 59 22154 999.00 1.0\n", 145 | "1 2013-03-01 0 25 2552 899.00 1.0\n", 146 | "2 2013-05-01 0 25 2552 899.00 -1.0\n", 147 | "3 2013-06-01 0 25 2554 1709.05 1.0\n", 148 | "4 2013-01-15 0 25 2555 1099.00 1.0" 149 | ] 150 | }, 151 | "execution_count": 5, 152 | "metadata": {}, 153 | "output_type": "execute_result" 154 | } 155 | ], 156 | "source": [ 157 | "train.head()" 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": 6, 163 | "metadata": {}, 164 | "outputs": [ 165 | { 166 | "data": { 167 | "text/html": [ 168 | "
\n", 169 | "\n", 182 | "\n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | "
IDshop_iditem_id
0055037
1155320
2255233
3355232
4455268
\n", 224 | "
" 225 | ], 226 | "text/plain": [ 227 | " ID shop_id item_id\n", 228 | "0 0 5 5037\n", 229 | "1 1 5 5320\n", 230 | "2 2 5 5233\n", 231 | "3 3 5 5232\n", 232 | "4 4 5 5268" 233 | ] 234 | }, 235 | "execution_count": 6, 236 | "metadata": {}, 237 | "output_type": "execute_result" 238 | } 239 | ], 240 | "source": [ 241 | "test.head()" 242 | ] 243 | }, 244 | { 245 | "cell_type": "code", 246 | "execution_count": 7, 247 | "metadata": {}, 248 | "outputs": [], 249 | "source": [ 250 | "train['year']=train.date.dt.year" 251 | ] 252 | }, 253 | { 254 | "cell_type": "code", 255 | "execution_count": 9, 256 | "metadata": {}, 257 | "outputs": [], 258 | "source": [ 259 | "train['month']=train.date.dt.month\n", 260 | "train['week']=train.date.dt.week\n", 261 | "train['dow']=train.date.dt.dayofweek" 262 | ] 263 | }, 264 | { 265 | "cell_type": "code", 266 | "execution_count": null, 267 | "metadata": {}, 268 | "outputs": [], 269 | "source": [ 270 | "train['quarter']=train.date.dt.quarter\n", 271 | "train['is_month_start']=train.date.dt.month\n" 272 | ] 273 | } 274 | ], 275 | "metadata": { 276 | "kernelspec": { 277 | "display_name": "Python 3", 278 | "language": "python", 279 | "name": "python3" 280 | }, 281 | "language_info": { 282 | "codemirror_mode": { 283 | "name": "ipython", 284 | "version": 3 285 | }, 286 | "file_extension": ".py", 287 | "mimetype": "text/x-python", 288 | "name": "python", 289 | "nbconvert_exporter": "python", 290 | "pygments_lexer": "ipython3", 291 | "version": "3.7.1" 292 | } 293 | }, 294 | "nbformat": 4, 295 | "nbformat_minor": 2 296 | } 297 | -------------------------------------------------------------------------------- /Give Life_ Predict Blood Donations/datasets/transfusion.data: -------------------------------------------------------------------------------- 1 | Recency (months),Frequency (times),Monetary (c.c. blood),Time (months),"whether he/she donated blood in March 2007" 2 | 2 ,50,12500,98 ,1 3 | 0 ,13,3250,28 ,1 4 | 1 ,16,4000,35 ,1 5 | 2 ,20,5000,45 ,1 6 | 1 ,24,6000,77 ,0 7 | 4 ,4,1000,4 ,0 8 | 2 ,7,1750,14 ,1 9 | 1 ,12,3000,35 ,0 10 | 2 ,9,2250,22 ,1 11 | 5 ,46,11500,98 ,1 12 | 4 ,23,5750,58 ,0 13 | 0 ,3,750,4 ,0 14 | 2 ,10,2500,28 ,1 15 | 1 ,13,3250,47 ,0 16 | 2 ,6,1500,15 ,1 17 | 2 ,5,1250,11 ,1 18 | 2 ,14,3500,48 ,1 19 | 2 ,15,3750,49 ,1 20 | 2 ,6,1500,15 ,1 21 | 2 ,3,750,4 ,1 22 | 2 ,3,750,4 ,1 23 | 4 ,11,2750,28 ,0 24 | 2 ,6,1500,16 ,1 25 | 2 ,6,1500,16 ,1 26 | 9 ,9,2250,16 ,0 27 | 4 ,14,3500,40 ,0 28 | 4 ,6,1500,14 ,0 29 | 4 ,12,3000,34 ,1 30 | 4 ,5,1250,11 ,1 31 | 4 ,8,2000,21 ,0 32 | 1 ,14,3500,58 ,0 33 | 4 ,10,2500,28 ,1 34 | 4 ,10,2500,28 ,1 35 | 4 ,9,2250,26 ,1 36 | 2 ,16,4000,64 ,0 37 | 2 ,8,2000,28 ,1 38 | 2 ,12,3000,47 ,1 39 | 4 ,6,1500,16 ,1 40 | 2 ,14,3500,57 ,1 41 | 4 ,7,1750,22 ,1 42 | 2 ,13,3250,53 ,1 43 | 2 ,5,1250,16 ,0 44 | 2 ,5,1250,16 ,1 45 | 2 ,5,1250,16 ,0 46 | 4 ,20,5000,69 ,1 47 | 4 ,9,2250,28 ,1 48 | 2 ,9,2250,36 ,0 49 | 2 ,2,500,2 ,0 50 | 2 ,2,500,2 ,0 51 | 2 ,2,500,2 ,0 52 | 2 ,11,2750,46 ,0 53 | 2 ,11,2750,46 ,1 54 | 2 ,6,1500,22 ,0 55 | 2 ,12,3000,52 ,0 56 | 4 ,5,1250,14 ,1 57 | 4 ,19,4750,69 ,1 58 | 4 ,8,2000,26 ,1 59 | 2 ,7,1750,28 ,1 60 | 2 ,16,4000,81 ,0 61 | 3 ,6,1500,21 ,0 62 | 2 ,7,1750,29 ,0 63 | 2 ,8,2000,35 ,1 64 | 2 ,10,2500,49 ,0 65 | 4 ,5,1250,16 ,1 66 | 2 ,3,750,9 ,1 67 | 3 ,16,4000,74 ,0 68 | 2 ,4,1000,14 ,1 69 | 0 ,2,500,4 ,0 70 | 4 ,7,1750,25 ,0 71 | 1 ,9,2250,51 ,0 72 | 2 ,4,1000,16 ,0 73 | 2 ,4,1000,16 ,0 74 | 4 ,17,4250,71 ,1 75 | 2 ,2,500,4 ,0 76 | 2 ,2,500,4 ,1 77 | 2 ,2,500,4 ,1 78 | 2 ,4,1000,16 ,1 79 | 2 ,2,500,4 ,0 80 | 2 ,2,500,4 ,0 81 | 2 ,2,500,4 ,0 82 | 4 ,6,1500,23 ,1 83 | 2 ,4,1000,16 ,0 84 | 2 ,4,1000,16 ,0 85 | 2 ,4,1000,16 ,0 86 | 2 ,6,1500,28 ,1 87 | 2 ,6,1500,28 ,0 88 | 4 ,2,500,4 ,0 89 | 4 ,2,500,4 ,0 90 | 4 ,2,500,4 ,0 91 | 2 ,7,1750,35 ,1 92 | 4 ,2,500,4 ,1 93 | 4 ,2,500,4 ,0 94 | 4 ,2,500,4 ,0 95 | 4 ,2,500,4 ,0 96 | 12 ,11,2750,23 ,0 97 | 4 ,7,1750,28 ,0 98 | 3 ,17,4250,86 ,0 99 | 4 ,9,2250,38 ,1 100 | 4 ,4,1000,14 ,1 101 | 5 ,7,1750,26 ,1 102 | 4 ,8,2000,34 ,1 103 | 2 ,13,3250,76 ,1 104 | 4 ,9,2250,40 ,0 105 | 2 ,5,1250,26 ,0 106 | 2 ,5,1250,26 ,0 107 | 6 ,17,4250,70 ,0 108 | 0 ,8,2000,59 ,0 109 | 3 ,5,1250,26 ,0 110 | 2 ,3,750,14 ,0 111 | 2 ,10,2500,64 ,0 112 | 4 ,5,1250,23 ,1 113 | 4 ,9,2250,46 ,0 114 | 4 ,5,1250,23 ,0 115 | 4 ,8,2000,40 ,1 116 | 2 ,12,3000,82 ,0 117 | 11 ,24,6000,64 ,0 118 | 2 ,7,1750,46 ,1 119 | 4 ,11,2750,61 ,0 120 | 1 ,7,1750,57 ,0 121 | 2 ,11,2750,79 ,1 122 | 2 ,3,750,16 ,1 123 | 4 ,5,1250,26 ,1 124 | 2 ,6,1500,41 ,1 125 | 2 ,5,1250,33 ,1 126 | 2 ,4,1000,26 ,0 127 | 2 ,5,1250,34 ,0 128 | 4 ,8,2000,46 ,1 129 | 2 ,4,1000,26 ,0 130 | 4 ,8,2000,48 ,1 131 | 2 ,2,500,10 ,1 132 | 4 ,5,1250,28 ,0 133 | 2 ,12,3000,95 ,0 134 | 2 ,2,500,10 ,0 135 | 4 ,6,1500,35 ,0 136 | 2 ,11,2750,88 ,0 137 | 2 ,3,750,19 ,0 138 | 2 ,5,1250,37 ,0 139 | 2 ,12,3000,98 ,0 140 | 9 ,5,1250,19 ,0 141 | 2 ,2,500,11 ,0 142 | 2 ,9,2250,74 ,0 143 | 5 ,14,3500,86 ,0 144 | 4 ,3,750,16 ,0 145 | 4 ,3,750,16 ,0 146 | 4 ,2,500,9 ,1 147 | 4 ,3,750,16 ,1 148 | 6 ,3,750,14 ,0 149 | 2 ,2,500,11 ,0 150 | 2 ,2,500,11 ,1 151 | 2 ,2,500,11 ,0 152 | 2 ,7,1750,58 ,1 153 | 4 ,6,1500,39 ,0 154 | 4 ,11,2750,78 ,0 155 | 2 ,1,250,2 ,1 156 | 2 ,1,250,2 ,0 157 | 2 ,1,250,2 ,0 158 | 2 ,1,250,2 ,0 159 | 2 ,1,250,2 ,0 160 | 2 ,1,250,2 ,0 161 | 2 ,1,250,2 ,0 162 | 2 ,1,250,2 ,0 163 | 2 ,1,250,2 ,0 164 | 2 ,1,250,2 ,0 165 | 2 ,1,250,2 ,1 166 | 2 ,1,250,2 ,1 167 | 2 ,1,250,2 ,1 168 | 2 ,1,250,2 ,0 169 | 2 ,1,250,2 ,0 170 | 2 ,1,250,2 ,0 171 | 2 ,1,250,2 ,0 172 | 2 ,1,250,2 ,0 173 | 2 ,1,250,2 ,0 174 | 2 ,1,250,2 ,0 175 | 2 ,1,250,2 ,0 176 | 2 ,1,250,2 ,0 177 | 11 ,10,2500,35 ,0 178 | 11 ,4,1000,16 ,1 179 | 4 ,5,1250,33 ,1 180 | 4 ,6,1500,41 ,1 181 | 2 ,3,750,22 ,0 182 | 4 ,4,1000,26 ,1 183 | 10 ,4,1000,16 ,0 184 | 2 ,4,1000,35 ,0 185 | 4 ,12,3000,88 ,0 186 | 13 ,8,2000,26 ,0 187 | 11 ,9,2250,33 ,0 188 | 4 ,5,1250,34 ,0 189 | 4 ,4,1000,26 ,0 190 | 8 ,15,3750,77 ,0 191 | 4 ,5,1250,35 ,1 192 | 4 ,7,1750,52 ,0 193 | 4 ,7,1750,52 ,0 194 | 2 ,4,1000,35 ,0 195 | 11 ,11,2750,42 ,0 196 | 2 ,2,500,14 ,0 197 | 2 ,5,1250,47 ,1 198 | 9 ,8,2000,38 ,1 199 | 4 ,6,1500,47 ,0 200 | 11 ,7,1750,29 ,0 201 | 9 ,9,2250,45 ,0 202 | 4 ,6,1500,52 ,0 203 | 4 ,7,1750,58 ,0 204 | 6 ,2,500,11 ,1 205 | 4 ,7,1750,58 ,0 206 | 11 ,9,2250,38 ,0 207 | 11 ,6,1500,26 ,0 208 | 2 ,2,500,16 ,0 209 | 2 ,7,1750,76 ,0 210 | 11 ,6,1500,27 ,0 211 | 11 ,3,750,14 ,0 212 | 4 ,1,250,4 ,0 213 | 4 ,1,250,4 ,0 214 | 4 ,1,250,4 ,0 215 | 4 ,1,250,4 ,0 216 | 4 ,1,250,4 ,0 217 | 4 ,1,250,4 ,1 218 | 4 ,1,250,4 ,0 219 | 4 ,1,250,4 ,0 220 | 4 ,1,250,4 ,0 221 | 4 ,1,250,4 ,0 222 | 4 ,1,250,4 ,0 223 | 4 ,1,250,4 ,1 224 | 4 ,1,250,4 ,1 225 | 4 ,1,250,4 ,0 226 | 4 ,1,250,4 ,1 227 | 4 ,1,250,4 ,1 228 | 4 ,1,250,4 ,0 229 | 4 ,3,750,24 ,0 230 | 4 ,1,250,4 ,0 231 | 4 ,1,250,4 ,0 232 | 4 ,1,250,4 ,0 233 | 4 ,1,250,4 ,1 234 | 4 ,1,250,4 ,0 235 | 10 ,8,2000,39 ,0 236 | 14 ,7,1750,26 ,0 237 | 8 ,10,2500,63 ,0 238 | 11 ,3,750,15 ,0 239 | 4 ,2,500,14 ,0 240 | 2 ,4,1000,43 ,0 241 | 8 ,9,2250,58 ,0 242 | 8 ,8,2000,52 ,1 243 | 11 ,22,5500,98 ,0 244 | 4 ,3,750,25 ,1 245 | 11 ,17,4250,79 ,1 246 | 9 ,2,500,11 ,0 247 | 4 ,5,1250,46 ,0 248 | 11 ,12,3000,58 ,0 249 | 7 ,12,3000,86 ,0 250 | 11 ,2,500,11 ,0 251 | 11 ,2,500,11 ,0 252 | 11 ,2,500,11 ,0 253 | 2 ,6,1500,75 ,0 254 | 11 ,8,2000,41 ,1 255 | 11 ,3,750,16 ,1 256 | 12 ,13,3250,59 ,0 257 | 2 ,3,750,35 ,0 258 | 16 ,8,2000,28 ,0 259 | 11 ,7,1750,37 ,0 260 | 4 ,3,750,28 ,0 261 | 12 ,12,3000,58 ,0 262 | 4 ,4,1000,41 ,0 263 | 11 ,14,3500,73 ,1 264 | 2 ,2,500,23 ,0 265 | 2 ,3,750,38 ,1 266 | 4 ,5,1250,58 ,0 267 | 4 ,4,1000,43 ,1 268 | 3 ,2,500,23 ,0 269 | 11 ,8,2000,46 ,0 270 | 4 ,7,1750,82 ,0 271 | 13 ,4,1000,21 ,0 272 | 16 ,11,2750,40 ,0 273 | 16 ,7,1750,28 ,0 274 | 7 ,2,500,16 ,0 275 | 4 ,5,1250,58 ,0 276 | 4 ,5,1250,58 ,0 277 | 4 ,4,1000,46 ,0 278 | 14 ,13,3250,57 ,0 279 | 4 ,3,750,34 ,0 280 | 14 ,18,4500,78 ,0 281 | 11 ,8,2000,48 ,0 282 | 14 ,16,4000,70 ,0 283 | 14 ,4,1000,22 ,1 284 | 14 ,5,1250,26 ,0 285 | 8 ,2,500,16 ,0 286 | 11 ,5,1250,33 ,0 287 | 11 ,2,500,14 ,0 288 | 4 ,2,500,23 ,0 289 | 9 ,2,500,16 ,1 290 | 14 ,5,1250,28 ,1 291 | 14 ,3,750,19 ,1 292 | 14 ,4,1000,23 ,1 293 | 16 ,12,3000,50 ,0 294 | 11 ,4,1000,28 ,0 295 | 11 ,5,1250,35 ,0 296 | 11 ,5,1250,35 ,0 297 | 2 ,4,1000,70 ,0 298 | 14 ,5,1250,28 ,0 299 | 14 ,2,500,14 ,0 300 | 14 ,2,500,14 ,0 301 | 14 ,2,500,14 ,0 302 | 14 ,2,500,14 ,0 303 | 14 ,2,500,14 ,0 304 | 14 ,2,500,14 ,0 305 | 2 ,3,750,52 ,0 306 | 14 ,6,1500,34 ,0 307 | 11 ,5,1250,37 ,1 308 | 4 ,5,1250,74 ,0 309 | 11 ,3,750,23 ,0 310 | 16 ,4,1000,23 ,0 311 | 16 ,3,750,19 ,0 312 | 11 ,5,1250,38 ,0 313 | 11 ,2,500,16 ,0 314 | 12 ,9,2250,60 ,0 315 | 9 ,1,250,9 ,0 316 | 9 ,1,250,9 ,0 317 | 4 ,2,500,29 ,0 318 | 11 ,2,500,17 ,0 319 | 14 ,4,1000,26 ,0 320 | 11 ,9,2250,72 ,1 321 | 11 ,5,1250,41 ,0 322 | 15 ,16,4000,82 ,0 323 | 9 ,5,1250,51 ,1 324 | 11 ,4,1000,34 ,0 325 | 14 ,8,2000,50 ,1 326 | 16 ,7,1750,38 ,0 327 | 14 ,2,500,16 ,0 328 | 2 ,2,500,41 ,0 329 | 14 ,16,4000,98 ,0 330 | 14 ,4,1000,28 ,1 331 | 16 ,7,1750,39 ,0 332 | 14 ,7,1750,47 ,0 333 | 16 ,6,1500,35 ,0 334 | 16 ,6,1500,35 ,1 335 | 11 ,7,1750,62 ,1 336 | 16 ,2,500,16 ,0 337 | 16 ,3,750,21 ,1 338 | 11 ,3,750,28 ,0 339 | 11 ,7,1750,64 ,0 340 | 11 ,1,250,11 ,1 341 | 9 ,3,750,34 ,0 342 | 14 ,4,1000,30 ,0 343 | 23 ,38,9500,98 ,0 344 | 11 ,6,1500,58 ,0 345 | 11 ,1,250,11 ,0 346 | 11 ,1,250,11 ,0 347 | 11 ,1,250,11 ,0 348 | 11 ,1,250,11 ,0 349 | 11 ,1,250,11 ,0 350 | 11 ,1,250,11 ,0 351 | 11 ,1,250,11 ,0 352 | 11 ,1,250,11 ,0 353 | 11 ,2,500,21 ,0 354 | 11 ,5,1250,50 ,0 355 | 11 ,2,500,21 ,0 356 | 16 ,4,1000,28 ,0 357 | 4 ,2,500,41 ,0 358 | 16 ,6,1500,40 ,0 359 | 14 ,3,750,26 ,0 360 | 9 ,2,500,26 ,0 361 | 21 ,16,4000,64 ,0 362 | 14 ,6,1500,51 ,0 363 | 11 ,2,500,24 ,0 364 | 4 ,3,750,71 ,0 365 | 21 ,13,3250,57 ,0 366 | 11 ,6,1500,71 ,0 367 | 14 ,2,500,21 ,1 368 | 23 ,15,3750,57 ,0 369 | 14 ,4,1000,38 ,0 370 | 11 ,2,500,26 ,0 371 | 16 ,5,1250,40 ,1 372 | 4 ,2,500,51 ,1 373 | 14 ,3,750,31 ,0 374 | 4 ,2,500,52 ,0 375 | 9 ,4,1000,65 ,0 376 | 14 ,4,1000,40 ,0 377 | 11 ,3,750,40 ,1 378 | 14 ,5,1250,50 ,0 379 | 14 ,1,250,14 ,0 380 | 14 ,1,250,14 ,0 381 | 14 ,1,250,14 ,0 382 | 14 ,1,250,14 ,0 383 | 14 ,1,250,14 ,0 384 | 14 ,1,250,14 ,0 385 | 14 ,1,250,14 ,0 386 | 14 ,1,250,14 ,0 387 | 14 ,7,1750,72 ,0 388 | 14 ,1,250,14 ,0 389 | 14 ,1,250,14 ,0 390 | 9 ,3,750,52 ,0 391 | 14 ,7,1750,73 ,0 392 | 11 ,4,1000,58 ,0 393 | 11 ,4,1000,59 ,0 394 | 4 ,2,500,59 ,0 395 | 11 ,4,1000,61 ,0 396 | 16 ,4,1000,40 ,0 397 | 16 ,10,2500,89 ,0 398 | 21 ,2,500,21 ,1 399 | 21 ,3,750,26 ,0 400 | 16 ,8,2000,76 ,0 401 | 21 ,3,750,26 ,1 402 | 18 ,2,500,23 ,0 403 | 23 ,5,1250,33 ,0 404 | 23 ,8,2000,46 ,0 405 | 16 ,3,750,34 ,0 406 | 14 ,5,1250,64 ,0 407 | 14 ,3,750,41 ,0 408 | 16 ,1,250,16 ,0 409 | 16 ,1,250,16 ,0 410 | 16 ,1,250,16 ,0 411 | 16 ,1,250,16 ,0 412 | 16 ,1,250,16 ,0 413 | 16 ,1,250,16 ,0 414 | 16 ,1,250,16 ,0 415 | 16 ,4,1000,45 ,0 416 | 16 ,1,250,16 ,0 417 | 16 ,1,250,16 ,0 418 | 16 ,1,250,16 ,0 419 | 16 ,1,250,16 ,0 420 | 16 ,1,250,16 ,0 421 | 16 ,2,500,26 ,0 422 | 21 ,2,500,23 ,0 423 | 16 ,2,500,27 ,0 424 | 21 ,2,500,23 ,0 425 | 21 ,2,500,23 ,0 426 | 14 ,4,1000,57 ,0 427 | 16 ,5,1250,60 ,0 428 | 23 ,2,500,23 ,0 429 | 14 ,5,1250,74 ,0 430 | 23 ,3,750,28 ,0 431 | 16 ,3,750,40 ,0 432 | 9 ,2,500,52 ,0 433 | 9 ,2,500,52 ,0 434 | 16 ,7,1750,87 ,1 435 | 14 ,4,1000,64 ,0 436 | 14 ,2,500,35 ,0 437 | 16 ,7,1750,93 ,0 438 | 21 ,2,500,25 ,0 439 | 14 ,3,750,52 ,0 440 | 23 ,14,3500,93 ,0 441 | 18 ,8,2000,95 ,0 442 | 16 ,3,750,46 ,0 443 | 11 ,3,750,76 ,0 444 | 11 ,2,500,52 ,0 445 | 11 ,3,750,76 ,0 446 | 23 ,12,3000,86 ,0 447 | 21 ,3,750,35 ,0 448 | 23 ,2,500,26 ,0 449 | 23 ,2,500,26 ,0 450 | 23 ,8,2000,64 ,0 451 | 16 ,3,750,50 ,0 452 | 23 ,3,750,33 ,0 453 | 21 ,3,750,38 ,0 454 | 23 ,2,500,28 ,0 455 | 21 ,1,250,21 ,0 456 | 21 ,1,250,21 ,0 457 | 21 ,1,250,21 ,0 458 | 21 ,1,250,21 ,0 459 | 21 ,1,250,21 ,0 460 | 21 ,1,250,21 ,0 461 | 21 ,1,250,21 ,0 462 | 21 ,1,250,21 ,0 463 | 21 ,1,250,21 ,0 464 | 21 ,1,250,21 ,1 465 | 21 ,1,250,21 ,0 466 | 21 ,1,250,21 ,0 467 | 21 ,5,1250,60 ,0 468 | 23 ,4,1000,45 ,0 469 | 21 ,4,1000,52 ,0 470 | 22 ,1,250,22 ,1 471 | 11 ,2,500,70 ,0 472 | 23 ,5,1250,58 ,0 473 | 23 ,3,750,40 ,0 474 | 23 ,3,750,41 ,0 475 | 14 ,3,750,83 ,0 476 | 21 ,2,500,35 ,0 477 | 26 ,5,1250,49 ,1 478 | 23 ,6,1500,70 ,0 479 | 23 ,1,250,23 ,0 480 | 23 ,1,250,23 ,0 481 | 23 ,1,250,23 ,0 482 | 23 ,1,250,23 ,0 483 | 23 ,1,250,23 ,0 484 | 23 ,1,250,23 ,0 485 | 23 ,1,250,23 ,0 486 | 23 ,1,250,23 ,0 487 | 23 ,4,1000,53 ,0 488 | 21 ,6,1500,86 ,0 489 | 23 ,3,750,48 ,0 490 | 21 ,2,500,41 ,0 491 | 21 ,3,750,64 ,0 492 | 16 ,2,500,70 ,0 493 | 21 ,3,750,70 ,0 494 | 23 ,4,1000,87 ,0 495 | 23 ,3,750,89 ,0 496 | 23 ,2,500,87 ,0 497 | 35 ,3,750,64 ,0 498 | 38 ,1,250,38 ,0 499 | 38 ,1,250,38 ,0 500 | 40 ,1,250,40 ,0 501 | 74 ,1,250,74 ,0 502 | 2 ,43,10750,86 ,1 503 | 6 ,22,5500,28 ,1 504 | 2 ,34,8500,77 ,1 505 | 2 ,44,11000,98 ,0 506 | 0 ,26,6500,76 ,1 507 | 2 ,41,10250,98 ,1 508 | 3 ,21,5250,42 ,1 509 | 2 ,11,2750,23 ,0 510 | 2 ,21,5250,52 ,1 511 | 2 ,13,3250,32 ,1 512 | 4 ,4,1000,4 ,1 513 | 2 ,11,2750,26 ,0 514 | 2 ,11,2750,28 ,0 515 | 3 ,14,3500,35 ,0 516 | 4 ,16,4000,38 ,1 517 | 4 ,6,1500,14 ,0 518 | 3 ,5,1250,12 ,1 519 | 4 ,33,8250,98 ,1 520 | 3 ,10,2500,33 ,1 521 | 4 ,10,2500,28 ,1 522 | 2 ,11,2750,40 ,1 523 | 2 ,11,2750,41 ,1 524 | 4 ,13,3250,39 ,1 525 | 1 ,10,2500,43 ,1 526 | 4 ,9,2250,28 ,0 527 | 2 ,4,1000,11 ,0 528 | 2 ,5,1250,16 ,1 529 | 2 ,15,3750,64 ,0 530 | 5 ,24,6000,79 ,0 531 | 2 ,6,1500,22 ,1 532 | 4 ,5,1250,16 ,1 533 | 2 ,4,1000,14 ,1 534 | 4 ,8,2000,28 ,0 535 | 2 ,4,1000,14 ,0 536 | 2 ,6,1500,26 ,0 537 | 4 ,5,1250,16 ,1 538 | 2 ,7,1750,32 ,1 539 | 2 ,6,1500,26 ,1 540 | 2 ,8,2000,38 ,1 541 | 2 ,2,500,4 ,1 542 | 2 ,6,1500,28 ,1 543 | 2 ,10,2500,52 ,0 544 | 4 ,16,4000,70 ,1 545 | 4 ,2,500,4 ,1 546 | 1 ,14,3500,95 ,0 547 | 4 ,2,500,4 ,1 548 | 7 ,14,3500,48 ,0 549 | 2 ,3,750,11 ,0 550 | 2 ,12,3000,70 ,1 551 | 4 ,7,1750,32 ,1 552 | 4 ,4,1000,16 ,0 553 | 2 ,6,1500,35 ,1 554 | 4 ,6,1500,28 ,1 555 | 2 ,3,750,14 ,0 556 | 2 ,4,1000,23 ,0 557 | 4 ,4,1000,18 ,0 558 | 5 ,6,1500,28 ,0 559 | 4 ,6,1500,30 ,0 560 | 14 ,5,1250,14 ,0 561 | 3 ,8,2000,50 ,0 562 | 4 ,11,2750,64 ,1 563 | 4 ,9,2250,52 ,0 564 | 4 ,16,4000,98 ,1 565 | 7 ,10,2500,47 ,0 566 | 4 ,14,3500,86 ,0 567 | 2 ,9,2250,75 ,0 568 | 4 ,6,1500,35 ,0 569 | 4 ,9,2250,55 ,0 570 | 4 ,6,1500,35 ,1 571 | 2 ,6,1500,45 ,0 572 | 2 ,6,1500,47 ,0 573 | 4 ,2,500,9 ,0 574 | 2 ,2,500,11 ,1 575 | 2 ,2,500,11 ,0 576 | 2 ,2,500,11 ,1 577 | 4 ,6,1500,38 ,1 578 | 3 ,4,1000,29 ,1 579 | 9 ,9,2250,38 ,0 580 | 11 ,5,1250,18 ,0 581 | 2 ,3,750,21 ,0 582 | 2 ,1,250,2 ,0 583 | 2 ,1,250,2 ,1 584 | 2 ,1,250,2 ,0 585 | 2 ,1,250,2 ,0 586 | 2 ,1,250,2 ,0 587 | 2 ,1,250,2 ,0 588 | 2 ,1,250,2 ,1 589 | 2 ,1,250,2 ,0 590 | 2 ,1,250,2 ,0 591 | 2 ,1,250,2 ,0 592 | 2 ,1,250,2 ,0 593 | 11 ,11,2750,38 ,0 594 | 2 ,3,750,22 ,0 595 | 9 ,11,2750,49 ,1 596 | 5 ,11,2750,75 ,0 597 | 3 ,5,1250,38 ,0 598 | 3 ,1,250,3 ,1 599 | 4 ,6,1500,43 ,0 600 | 2 ,3,750,24 ,0 601 | 12 ,11,2750,39 ,0 602 | 2 ,2,500,14 ,0 603 | 4 ,6,1500,46 ,0 604 | 9 ,3,750,14 ,0 605 | 14 ,8,2000,26 ,0 606 | 4 ,2,500,13 ,0 607 | 4 ,11,2750,95 ,0 608 | 2 ,7,1750,77 ,0 609 | 2 ,7,1750,77 ,0 610 | 4 ,1,250,4 ,0 611 | 4 ,1,250,4 ,0 612 | 4 ,1,250,4 ,0 613 | 4 ,1,250,4 ,0 614 | 4 ,1,250,4 ,1 615 | 4 ,1,250,4 ,0 616 | 4 ,1,250,4 ,0 617 | 4 ,1,250,4 ,0 618 | 4 ,1,250,4 ,0 619 | 4 ,1,250,4 ,0 620 | 4 ,1,250,4 ,1 621 | 4 ,1,250,4 ,0 622 | 4 ,7,1750,62 ,0 623 | 4 ,1,250,4 ,0 624 | 4 ,4,1000,34 ,1 625 | 11 ,6,1500,28 ,0 626 | 13 ,3,750,14 ,1 627 | 7 ,5,1250,35 ,0 628 | 9 ,9,2250,54 ,0 629 | 11 ,2,500,11 ,0 630 | 2 ,5,1250,63 ,0 631 | 7 ,11,2750,89 ,0 632 | 8 ,9,2250,64 ,0 633 | 2 ,2,500,22 ,0 634 | 6 ,3,750,26 ,0 635 | 12 ,15,3750,71 ,0 636 | 13 ,3,750,16 ,0 637 | 11 ,16,4000,89 ,0 638 | 4 ,5,1250,58 ,0 639 | 14 ,7,1750,35 ,0 640 | 11 ,4,1000,27 ,0 641 | 7 ,9,2250,89 ,1 642 | 11 ,8,2000,52 ,1 643 | 7 ,5,1250,52 ,0 644 | 11 ,6,1500,41 ,0 645 | 10 ,5,1250,38 ,0 646 | 14 ,2,500,14 ,1 647 | 14 ,2,500,14 ,0 648 | 14 ,2,500,14 ,0 649 | 2 ,2,500,33 ,0 650 | 11 ,3,750,23 ,0 651 | 14 ,8,2000,46 ,0 652 | 9 ,1,250,9 ,0 653 | 16 ,5,1250,27 ,0 654 | 14 ,4,1000,26 ,0 655 | 4 ,2,500,30 ,0 656 | 14 ,3,750,21 ,0 657 | 16 ,16,4000,77 ,0 658 | 4 ,2,500,31 ,0 659 | 14 ,8,2000,50 ,0 660 | 11 ,3,750,26 ,0 661 | 14 ,7,1750,45 ,0 662 | 15 ,5,1250,33 ,0 663 | 16 ,2,500,16 ,0 664 | 16 ,3,750,21 ,0 665 | 11 ,8,2000,72 ,0 666 | 11 ,1,250,11 ,0 667 | 11 ,1,250,11 ,0 668 | 11 ,1,250,11 ,0 669 | 11 ,1,250,11 ,1 670 | 11 ,1,250,11 ,0 671 | 2 ,3,750,75 ,1 672 | 2 ,3,750,77 ,0 673 | 16 ,4,1000,28 ,0 674 | 16 ,15,3750,87 ,0 675 | 16 ,14,3500,83 ,0 676 | 16 ,10,2500,62 ,0 677 | 16 ,3,750,23 ,0 678 | 14 ,3,750,26 ,0 679 | 23 ,19,4750,62 ,0 680 | 11 ,7,1750,75 ,0 681 | 14 ,3,750,28 ,0 682 | 20 ,14,3500,69 ,1 683 | 4 ,2,500,46 ,0 684 | 11 ,2,500,25 ,0 685 | 11 ,3,750,37 ,0 686 | 16 ,4,1000,33 ,0 687 | 21 ,7,1750,38 ,0 688 | 13 ,7,1750,76 ,0 689 | 16 ,6,1500,50 ,0 690 | 14 ,3,750,33 ,0 691 | 14 ,1,250,14 ,0 692 | 14 ,1,250,14 ,0 693 | 14 ,1,250,14 ,0 694 | 14 ,1,250,14 ,0 695 | 14 ,1,250,14 ,0 696 | 14 ,1,250,14 ,0 697 | 17 ,7,1750,58 ,1 698 | 14 ,3,750,35 ,0 699 | 14 ,3,750,35 ,0 700 | 16 ,7,1750,64 ,0 701 | 21 ,2,500,21 ,0 702 | 16 ,3,750,35 ,0 703 | 16 ,1,250,16 ,0 704 | 16 ,1,250,16 ,0 705 | 16 ,1,250,16 ,0 706 | 16 ,1,250,16 ,0 707 | 16 ,1,250,16 ,0 708 | 14 ,2,500,29 ,0 709 | 11 ,4,1000,74 ,0 710 | 11 ,2,500,38 ,1 711 | 21 ,6,1500,48 ,0 712 | 23 ,2,500,23 ,0 713 | 23 ,6,1500,45 ,0 714 | 14 ,2,500,35 ,1 715 | 16 ,6,1500,81 ,0 716 | 16 ,4,1000,58 ,0 717 | 16 ,5,1250,71 ,0 718 | 21 ,2,500,26 ,0 719 | 21 ,3,750,35 ,0 720 | 21 ,3,750,35 ,0 721 | 23 ,8,2000,69 ,0 722 | 21 ,3,750,38 ,0 723 | 23 ,3,750,35 ,0 724 | 21 ,3,750,40 ,0 725 | 23 ,2,500,28 ,0 726 | 21 ,1,250,21 ,0 727 | 21 ,1,250,21 ,0 728 | 25 ,6,1500,50 ,0 729 | 21 ,1,250,21 ,0 730 | 21 ,1,250,21 ,0 731 | 23 ,3,750,39 ,0 732 | 21 ,2,500,33 ,0 733 | 14 ,3,750,79 ,0 734 | 23 ,1,250,23 ,1 735 | 23 ,1,250,23 ,0 736 | 23 ,1,250,23 ,0 737 | 23 ,1,250,23 ,0 738 | 23 ,1,250,23 ,0 739 | 23 ,1,250,23 ,0 740 | 23 ,1,250,23 ,0 741 | 23 ,4,1000,52 ,0 742 | 23 ,1,250,23 ,0 743 | 23 ,7,1750,88 ,0 744 | 16 ,3,750,86 ,0 745 | 23 ,2,500,38 ,0 746 | 21 ,2,500,52 ,0 747 | 23 ,3,750,62 ,0 748 | 39 ,1,250,39 ,0 749 | 72 ,1,250,72 ,0 -------------------------------------------------------------------------------- /Give Life_ Predict Blood Donations/notebook.ipynb: -------------------------------------------------------------------------------- 1 | {"cells":[{"metadata":{"dc":{"key":"3"},"deletable":false,"editable":false,"run_control":{"frozen":true},"tags":["context"]},"cell_type":"markdown","source":"## 1. Inspecting transfusion.data file\n

\"A

\n

Blood transfusion saves lives - from replacing lost blood during major surgery or a serious injury to treating various illnesses and blood disorders. Ensuring that there's enough blood in supply whenever needed is a serious challenge for the health professionals. According to WebMD, \"about 5 million Americans need a blood transfusion every year\".

\n

Our dataset is from a mobile blood donation vehicle in Taiwan. The Blood Transfusion Service Center drives to different universities and collects blood as part of a blood drive. We want to predict whether or not a donor will give blood the next time the vehicle comes to campus.

\n

The data is stored in datasets/transfusion.data and it is structured according to RFMTC marketing model (a variation of RFM). We'll explore what that means later in this notebook. First, let's inspect the data.

"},{"metadata":{"dc":{"key":"3"},"tags":["sample_code"],"trusted":true},"cell_type":"code","source":"# Print out the first 5 lines from the transfusion.data file\n!head -n 5 datasets/transfusion.data","execution_count":18,"outputs":[{"output_type":"stream","text":"Recency (months),Frequency (times),Monetary (c.c. blood),Time (months),\"whether he/she donated blood in March 2007\"\r\r\n2 ,50,12500,98 ,1\r\r\n0 ,13,3250,28 ,1\r\r\n1 ,16,4000,35 ,1\r\r\n2 ,20,5000,45 ,1\r\r\n","name":"stdout"}]},{"metadata":{"dc":{"key":"10"},"deletable":false,"editable":false,"run_control":{"frozen":true},"tags":["context"]},"cell_type":"markdown","source":"## 2. Loading the blood donations data\n

We now know that we are working with a typical CSV file (i.e., the delimiter is ,, etc.). We proceed to loading the data into memory.

"},{"metadata":{"dc":{"key":"10"},"tags":["sample_code"],"trusted":true},"cell_type":"code","source":"# Import pandas\nimport pandas as pd\n\n# Read in dataset\ntransfusion = pd.read_csv('datasets/transfusion.data')\n\n# Print out the first rows of our dataset\ntransfusion.head()","execution_count":20,"outputs":[{"output_type":"execute_result","execution_count":20,"data":{"text/plain":" Recency (months) Frequency (times) Monetary (c.c. blood) Time (months) \\\n0 2 50 12500 98 \n1 0 13 3250 28 \n2 1 16 4000 35 \n3 2 20 5000 45 \n4 1 24 6000 77 \n\n whether he/she donated blood in March 2007 \n0 1 \n1 1 \n2 1 \n3 1 \n4 0 ","text/html":"
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Recency (months)Frequency (times)Monetary (c.c. blood)Time (months)whether he/she donated blood in March 2007
025012500981
10133250281
21164000351
32205000451
41246000770
\n
"},"metadata":{}}]},{"metadata":{"dc":{"key":"17"},"deletable":false,"editable":false,"run_control":{"frozen":true},"tags":["context"]},"cell_type":"markdown","source":"## 3. Inspecting transfusion DataFrame\n

Let's briefly return to our discussion of RFM model. RFM stands for Recency, Frequency and Monetary Value and it is commonly used in marketing for identifying your best customers. In our case, our customers are blood donors.

\n

RFMTC is a variation of the RFM model. Below is a description of what each column means in our dataset:

\n\n

It looks like every column in our DataFrame has the numeric type, which is exactly what we want when building a machine learning model. Let's verify our hypothesis.

"},{"metadata":{"dc":{"key":"17"},"tags":["sample_code"],"trusted":true},"cell_type":"code","source":"# Print a concise summary of transfusion DataFrame\ntransfusion.info()","execution_count":22,"outputs":[{"output_type":"stream","text":"\nRangeIndex: 748 entries, 0 to 747\nData columns (total 5 columns):\nRecency (months) 748 non-null int64\nFrequency (times) 748 non-null int64\nMonetary (c.c. blood) 748 non-null int64\nTime (months) 748 non-null int64\nwhether he/she donated blood in March 2007 748 non-null int64\ndtypes: int64(5)\nmemory usage: 29.3 KB\n","name":"stdout"}]},{"metadata":{"dc":{"key":"24"},"deletable":false,"editable":false,"run_control":{"frozen":true},"tags":["context"]},"cell_type":"markdown","source":"## 4. Creating target column\n

We are aiming to predict the value in whether he/she donated blood in March 2007 column. Let's rename this it to target so that it's more convenient to work with.

"},{"metadata":{"dc":{"key":"24"},"tags":["sample_code"],"trusted":true},"cell_type":"code","source":"# Rename target column as 'target' for brevity \ntransfusion.rename(\n columns={'whether he/she donated blood in March 2007': 'target'},\n inplace=True\n)\n\n# Print out the first 2 rows\ntransfusion.head(2)","execution_count":24,"outputs":[{"output_type":"execute_result","execution_count":24,"data":{"text/plain":" Recency (months) Frequency (times) Monetary (c.c. blood) Time (months) \\\n0 2 50 12500 98 \n1 0 13 3250 28 \n\n target \n0 1 \n1 1 ","text/html":"
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Recency (months)Frequency (times)Monetary (c.c. blood)Time (months)target
025012500981
10133250281
\n
"},"metadata":{}}]},{"metadata":{"dc":{"key":"31"},"deletable":false,"editable":false,"run_control":{"frozen":true},"tags":["context"]},"cell_type":"markdown","source":"## 5. Checking target incidence\n

We want to predict whether or not the same donor will give blood the next time the vehicle comes to campus. The model for this is a binary classifier, meaning that there are only 2 possible outcomes:

\n\n

Target incidence is defined as the number of cases of each individual target value in a dataset. That is, how many 0s in the target column compared to how many 1s? Target incidence gives us an idea of how balanced (or imbalanced) is our dataset.

"},{"metadata":{"dc":{"key":"31"},"tags":["sample_code"],"trusted":true},"cell_type":"code","source":"# Print target incidence proportions, rounding output to 3 decimal places\ntransfusion.target.value_counts(normalize=True).round(3)","execution_count":26,"outputs":[{"output_type":"execute_result","execution_count":26,"data":{"text/plain":"0 0.762\n1 0.238\nName: target, dtype: float64"},"metadata":{}}]},{"metadata":{"dc":{"key":"38"},"deletable":false,"editable":false,"run_control":{"frozen":true},"tags":["context"]},"cell_type":"markdown","source":"## 6. Splitting transfusion into train and test datasets\n

We'll now use train_test_split() method to split transfusion DataFrame.

\n

Target incidence informed us that in our dataset 0s appear 76% of the time. We want to keep the same structure in train and test datasets, i.e., both datasets must have 0 target incidence of 76%. This is very easy to do using the train_test_split() method from the scikit learn library - all we need to do is specify the stratify parameter. In our case, we'll stratify on the target column.

"},{"metadata":{"dc":{"key":"38"},"tags":["sample_code"],"trusted":true},"cell_type":"code","source":"# Import train_test_split method\nfrom sklearn.model_selection import train_test_split\n\n# Split transfusion DataFrame into\n# X_train, X_test, y_train and y_test datasets,\n# stratifying on the `target` column\nX_train, X_test, y_train , y_test = train_test_split(\n transfusion.drop(columns='target'),\n transfusion.target,\n test_size=0.25,\n random_state=42,\n stratify=transfusion.target\n)\n\n# Print out the first 2 rows of X_train\nX_train.head(2)","execution_count":28,"outputs":[{"output_type":"execute_result","execution_count":28,"data":{"text/plain":" Recency (months) Frequency (times) Monetary (c.c. blood) Time (months)\n334 16 2 500 16\n99 5 7 1750 26","text/html":"
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Recency (months)Frequency (times)Monetary (c.c. blood)Time (months)
33416250016
9957175026
\n
"},"metadata":{}}]},{"metadata":{"dc":{"key":"45"},"deletable":false,"editable":false,"run_control":{"frozen":true},"tags":["context"]},"cell_type":"markdown","source":"## 7. Selecting model using TPOT\n

TPOT is a Python Automated Machine Learning tool that optimizes machine learning pipelines using genetic programming.

\n

\"TPOT

\n

TPOT will automatically explore hundreds of possible pipelines to find the best one for our dataset. Note, the outcome of this search will be a scikit-learn pipeline, meaning it will include any pre-processing steps as well as the model.

\n

We are using TPOT to help us zero in on one model that we can then explore and optimize further.

"},{"metadata":{"dc":{"key":"45"},"tags":["sample_code"],"trusted":true},"cell_type":"code","source":"# Import TPOTClassifier and roc_auc_score\nfrom tpot import TPOTClassifier\nfrom sklearn.metrics import roc_auc_score\n\n# Instantiate TPOTClassifier\ntpot = TPOTClassifier(\n generations=5,\n population_size=20,\n verbosity=2,\n scoring='roc_auc',\n random_state=42,\n disable_update_check=True,\n config_dict='TPOT light'\n)\ntpot.fit(X_train, y_train)\n\n# AUC score for tpot model\ntpot_auc_score = roc_auc_score(y_test, tpot.predict_proba(X_test)[:, 1])\nprint(f'\\nAUC score: {tpot_auc_score:.4f}')\n\n# Print best pipeline steps\nprint('\\nBest pipeline steps:', end='\\n')\nfor idx, (name, transform) in enumerate(tpot.fitted_pipeline_.steps, start=1):\n # Print idx and transform\n print(f'{idx}.{transform}')","execution_count":30,"outputs":[{"output_type":"display_data","data":{"text/plain":"HBox(children=(IntProgress(value=0, description='Optimization Progress', max=120, style=ProgressStyle(descript…","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"f2062ead0d694ea3bd78bb4921f6c8d5"}},"metadata":{}},{"output_type":"stream","text":"Generation 1 - Current best internal CV score: 0.7433977184592779\nGeneration 2 - Current best internal CV score: 0.7433977184592779\nGeneration 3 - Current best internal CV score: 0.7433977184592779\nGeneration 4 - Current best internal CV score: 0.7433977184592779\nGeneration 5 - Current best internal CV score: 0.7433977184592779\n\nBest pipeline: LogisticRegression(input_matrix, C=0.5, dual=False, penalty=l2)\n\nAUC score: 0.7850\n\nBest pipeline steps:\n1.LogisticRegression(C=0.5, class_weight=None, dual=False, fit_intercept=True,\n intercept_scaling=1, max_iter=100, multi_class='warn',\n n_jobs=None, penalty='l2', random_state=None, solver='warn',\n tol=0.0001, verbose=0, warm_start=False)\n","name":"stdout"}]},{"metadata":{"dc":{"key":"52"},"deletable":false,"editable":false,"run_control":{"frozen":true},"tags":["context"]},"cell_type":"markdown","source":"## 8. Checking the variance\n

TPOT picked LogisticRegression as the best model for our dataset with no pre-processing steps, giving us the AUC score of 0.7850. This is a great starting point. Let's see if we can make it better.

\n

One of the assumptions for linear regression models is that the data and the features we are giving it are related in a linear fashion, or can be measured with a linear distance metric. If a feature in our dataset has a high variance that's an order of magnitude or more greater than the other features, this could impact the model's ability to learn from other features in the dataset.

\n

Correcting for high variance is called normalization. It is one of the possible transformations you do before training a model. Let's check the variance to see if such transformation is needed.

"},{"metadata":{"dc":{"key":"52"},"tags":["sample_code"],"trusted":true},"cell_type":"code","source":"# X_train's variance, rounding the output to 3 decimal places\n\nX_train.var().round(3)","execution_count":32,"outputs":[{"output_type":"execute_result","execution_count":32,"data":{"text/plain":"Recency (months) 66.929\nFrequency (times) 33.830\nMonetary (c.c. blood) 2114363.700\nTime (months) 611.147\ndtype: float64"},"metadata":{}}]},{"metadata":{"dc":{"key":"59"},"deletable":false,"editable":false,"run_control":{"frozen":true},"tags":["context"]},"cell_type":"markdown","source":"## 9. Log normalization\n

Monetary (c.c. blood)'s variance is very high in comparison to any other column in the dataset. This means that, unless accounted for, this feature may get more weight by the model (i.e., be seen as more important) than any other feature.

\n

One way to correct for high variance is to use log normalization.

"},{"metadata":{"dc":{"key":"59"},"tags":["sample_code"],"trusted":true},"cell_type":"code","source":"# Import numpy\nimport numpy as np\n\n# Copy X_train and X_test into X_train_normed and X_test_normed\nX_train_normed , X_test_normed = X_train.copy(), X_test.copy()\n\n# Specify which column to normalize\ncol_to_normalize = 'Monetary (c.c. blood)'\n\n# Log normalization\nfor df_ in [X_train_normed, X_test_normed]:\n # Add log normalized column\n df_['monetary_log'] = np.log(df_[col_to_normalize])\n # Drop the original column\n df_.drop(columns=col_to_normalize, inplace=True)\n\n# Check the variance for X_train_normed\nround(X_train_normed.var(),3)","execution_count":34,"outputs":[{"output_type":"execute_result","execution_count":34,"data":{"text/plain":"Recency (months) 66.929\nFrequency (times) 33.830\nTime (months) 611.147\nmonetary_log 0.837\ndtype: float64"},"metadata":{}}]},{"metadata":{"dc":{"key":"66"},"deletable":false,"editable":false,"run_control":{"frozen":true},"tags":["context"]},"cell_type":"markdown","source":"## 10. Training the linear regression model\n

The variance looks much better now. Notice that now Time (months) has the largest variance, but it's not the orders of magnitude higher than the rest of the variables, so we'll leave it as is.

\n

We are now ready to train the linear regression model.

"},{"metadata":{"dc":{"key":"66"},"tags":["sample_code"],"trusted":true},"cell_type":"code","source":"# Importing modules\nfrom sklearn import linear_model\n\n# Instantiate LogisticRegression\nlogreg = linear_model.LogisticRegression(\n solver='liblinear',\n random_state=42\n)\n\n# Train the model\nlogreg.fit(X_train_normed, y_train)\n\n# AUC score for tpot model\nlogreg_auc_score = roc_auc_score(y_test, logreg.predict_proba(X_test_normed)[:, 1])\nprint(f'\\nAUC score: {logreg_auc_score:.4f}')","execution_count":36,"outputs":[{"output_type":"stream","text":"\nAUC score: 0.7891\n","name":"stdout"}]},{"metadata":{"dc":{"key":"73"},"deletable":false,"editable":false,"run_control":{"frozen":true},"tags":["context"]},"cell_type":"markdown","source":"## 11. Conclusion\n

The demand for blood fluctuates throughout the year. As one prominent example, blood donations slow down during busy holiday seasons. An accurate forecast for the future supply of blood allows for an appropriate action to be taken ahead of time and therefore saving more lives.

\n

In this notebook, we explored automatic model selection using TPOT and the results are not too far off. Furthermore, both of our models are doing slightly better than simply choosing 0 all the time (the target incidence suggests that such a model would have 76% success rate).

\n

Another benefit of using logistic regression model is that it is interpretable. We can analyze how much of the variance in the response variable (target) can be explained by other variables in our dataset.

"},{"metadata":{"dc":{"key":"73"},"tags":["sample_code"],"trusted":true},"cell_type":"code","source":"# Importing itemgetter\nfrom operator import itemgetter\n\n# Sort models based on their AUC score from highest to lowest\nsorted(\n [('tpot', tpot_auc_score), ('logreg', logreg_auc_score)],\n key=itemgetter(1),\n reverse=True\n)","execution_count":38,"outputs":[{"output_type":"execute_result","execution_count":38,"data":{"text/plain":"[('logreg', 0.7890972663699937), ('tpot', 0.7849650349650349)]"},"metadata":{}}]}],"metadata":{"kernelspec":{"name":"python3","display_name":"Python 3","language":"python"},"language_info":{"name":"python","version":"3.6.7","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"}},"nbformat":4,"nbformat_minor":2} -------------------------------------------------------------------------------- /Future Sales/Future Sales.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "import pandas as pd\n", 11 | "import matplotlib.pyplot as plt\n", 12 | "import seaborn as sns\n", 13 | "\n", 14 | "from datetime import datetime, date\n", 15 | "from dateutil.relativedelta import relativedelta\n", 16 | "\n", 17 | "from sklearn.preprocessing import StandardScaler\n", 18 | "\n", 19 | "from math import ceil\n", 20 | "\n", 21 | "\n", 22 | "%matplotlib inline\n", 23 | "\n", 24 | "train = pd.read_csv('sales_train.csv')\n", 25 | "test = pd.read_csv('test.csv')\n", 26 | "submission = pd.read_csv('sample_submission.csv')\n", 27 | "items = pd.read_csv('items.csv')\n", 28 | "item_cats = pd.read_csv('item_categories.csv')\n", 29 | "shops = pd.read_csv('shops.csv')" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 2, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "test_shops = test.shop_id.unique()\n", 39 | "train = train[train.shop_id.isin(test_shops)]\n", 40 | "test_items = test.item_id.unique()\n", 41 | "train = train[train.item_id.isin(test_items)]" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 3, 47 | "metadata": {}, 48 | "outputs": [], 49 | "source": [ 50 | "MAX_BLOCK_NUM = train.date_block_num.max()\n", 51 | "MAX_ITEM = len(test_items)\n", 52 | "MAX_CAT = len(item_cats)\n", 53 | "MAX_YEAR = 3\n", 54 | "MAX_MONTH = 4 # 7 8 9 10\n", 55 | "MAX_SHOP = len(test_shops)" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 4, 61 | "metadata": {}, 62 | "outputs": [ 63 | { 64 | "data": { 65 | "text/html": [ 66 | "
\n", 67 | "\n", 80 | "\n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | "
datedate_block_numshop_iditem_iditem_priceitem_cnt_day
002.01.201305922154999.01.0
1003.01.20130252574399.02.0
1105.01.20130252574399.01.0
1207.01.20130252574399.01.0
1308.01.20130252574399.02.0
\n", 140 | "
" 141 | ], 142 | "text/plain": [ 143 | " date date_block_num shop_id item_id item_price item_cnt_day\n", 144 | "0 02.01.2013 0 59 22154 999.0 1.0\n", 145 | "10 03.01.2013 0 25 2574 399.0 2.0\n", 146 | "11 05.01.2013 0 25 2574 399.0 1.0\n", 147 | "12 07.01.2013 0 25 2574 399.0 1.0\n", 148 | "13 08.01.2013 0 25 2574 399.0 2.0" 149 | ] 150 | }, 151 | "execution_count": 4, 152 | "metadata": {}, 153 | "output_type": "execute_result" 154 | } 155 | ], 156 | "source": [ 157 | "train.head()" 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": 5, 163 | "metadata": {}, 164 | "outputs": [], 165 | "source": [ 166 | "train = train.set_index('item_id').join(items.set_index('item_id')).drop('item_name', axis=1).reset_index()\n", 167 | "\n" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": 6, 173 | "metadata": {}, 174 | "outputs": [], 175 | "source": [ 176 | "train['month'] = train.date.apply(lambda x: datetime.strptime(x, '%d.%m.%Y').strftime('%m'))\n", 177 | "train['year'] = train.date.apply(lambda x: datetime.strptime(x, '%d.%m.%Y').strftime('%Y'))\n", 178 | "\n" 179 | ] 180 | }, 181 | { 182 | "cell_type": "code", 183 | "execution_count": 7, 184 | "metadata": {}, 185 | "outputs": [ 186 | { 187 | "data": { 188 | "text/html": [ 189 | "
\n", 190 | "\n", 203 | "\n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | "
item_iddatedate_block_numshop_iditem_priceitem_cnt_dayitem_category_idmonthyear
03028.02.2013150399.01.040022013
13026.02.2013150399.01.040022013
23012.02.2013150399.01.040022013
33014.02.2013150399.02.040022013
43015.02.2013150399.03.040022013
\n", 281 | "
" 282 | ], 283 | "text/plain": [ 284 | " item_id date date_block_num shop_id item_price item_cnt_day \\\n", 285 | "0 30 28.02.2013 1 50 399.0 1.0 \n", 286 | "1 30 26.02.2013 1 50 399.0 1.0 \n", 287 | "2 30 12.02.2013 1 50 399.0 1.0 \n", 288 | "3 30 14.02.2013 1 50 399.0 2.0 \n", 289 | "4 30 15.02.2013 1 50 399.0 3.0 \n", 290 | "\n", 291 | " item_category_id month year \n", 292 | "0 40 02 2013 \n", 293 | "1 40 02 2013 \n", 294 | "2 40 02 2013 \n", 295 | "3 40 02 2013 \n", 296 | "4 40 02 2013 " 297 | ] 298 | }, 299 | "execution_count": 7, 300 | "metadata": {}, 301 | "output_type": "execute_result" 302 | } 303 | ], 304 | "source": [ 305 | "train.head()" 306 | ] 307 | }, 308 | { 309 | "cell_type": "code", 310 | "execution_count": 8, 311 | "metadata": {}, 312 | "outputs": [ 313 | { 314 | "name": "stderr", 315 | "output_type": "stream", 316 | "text": [ 317 | "C:\\Users\\salee\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:4: FutureWarning: Method .as_matrix will be removed in a future version. Use .values instead.\n", 318 | " after removing the cwd from sys.path.\n", 319 | "C:\\Users\\salee\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:5: FutureWarning: Method .as_matrix will be removed in a future version. Use .values instead.\n", 320 | " \"\"\"\n", 321 | "C:\\Users\\salee\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:7: FutureWarning: Method .as_matrix will be removed in a future version. Use .values instead.\n", 322 | " import sys\n", 323 | "C:\\Users\\salee\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:8: FutureWarning: Method .as_matrix will be removed in a future version. Use .values instead.\n", 324 | " \n" 325 | ] 326 | } 327 | ], 328 | "source": [ 329 | "scaler = StandardScaler()\n", 330 | "cnt_scaler = StandardScaler()\n", 331 | "\n", 332 | "scaler.fit(train.item_price.as_matrix().reshape(-1, 1))\n", 333 | "cnt_scaler.fit(train.item_cnt_day.as_matrix().reshape(-1, 1))\n", 334 | "\n", 335 | "train.item_price = scaler.transform(train.item_price.as_matrix().reshape(-1, 1))\n", 336 | "train.item_cnt_day = cnt_scaler.transform(train.item_cnt_day.as_matrix().reshape(-1, 1))" 337 | ] 338 | }, 339 | { 340 | "cell_type": "code", 341 | "execution_count": 9, 342 | "metadata": {}, 343 | "outputs": [ 344 | { 345 | "data": { 346 | "text/html": [ 347 | "
\n", 348 | "\n", 361 | "\n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | "
item_iddatedate_block_numshop_iditem_priceitem_cnt_dayitem_category_idmonthyear
03028.02.2013150-0.345667-0.09696240022013
13026.02.2013150-0.345667-0.09696240022013
23012.02.2013150-0.345667-0.09696240022013
33014.02.2013150-0.3456670.20488040022013
43015.02.2013150-0.3456670.50672140022013
\n", 439 | "
" 440 | ], 441 | "text/plain": [ 442 | " item_id date date_block_num shop_id item_price item_cnt_day \\\n", 443 | "0 30 28.02.2013 1 50 -0.345667 -0.096962 \n", 444 | "1 30 26.02.2013 1 50 -0.345667 -0.096962 \n", 445 | "2 30 12.02.2013 1 50 -0.345667 -0.096962 \n", 446 | "3 30 14.02.2013 1 50 -0.345667 0.204880 \n", 447 | "4 30 15.02.2013 1 50 -0.345667 0.506721 \n", 448 | "\n", 449 | " item_category_id month year \n", 450 | "0 40 02 2013 \n", 451 | "1 40 02 2013 \n", 452 | "2 40 02 2013 \n", 453 | "3 40 02 2013 \n", 454 | "4 40 02 2013 " 455 | ] 456 | }, 457 | "execution_count": 9, 458 | "metadata": {}, 459 | "output_type": "execute_result" 460 | } 461 | ], 462 | "source": [ 463 | "train.head()" 464 | ] 465 | }, 466 | { 467 | "cell_type": "code", 468 | "execution_count": 10, 469 | "metadata": {}, 470 | "outputs": [ 471 | { 472 | "data": { 473 | "text/html": [ 474 | "
\n", 475 | "\n", 488 | "\n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | "
item_priceitem_cnt_day
shop_iditem_iddate_block_nummonthyear
2302032013-0.367556-0.096962
5062013-0.345667-0.096962
15042014-0.471530-0.096962
16052014-0.471530-0.096962
311022013-0.725991-0.387848
\n", 550 | "
" 551 | ], 552 | "text/plain": [ 553 | " item_price item_cnt_day\n", 554 | "shop_id item_id date_block_num month year \n", 555 | "2 30 2 03 2013 -0.367556 -0.096962\n", 556 | " 5 06 2013 -0.345667 -0.096962\n", 557 | " 15 04 2014 -0.471530 -0.096962\n", 558 | " 16 05 2014 -0.471530 -0.096962\n", 559 | " 31 1 02 2013 -0.725991 -0.387848" 560 | ] 561 | }, 562 | "execution_count": 10, 563 | "metadata": {}, 564 | "output_type": "execute_result" 565 | } 566 | ], 567 | "source": [ 568 | "train = train.drop('date', axis=1)\n", 569 | "train = train.drop('item_category_id', axis=1)\n", 570 | "train = train.groupby(['shop_id', 'item_id', 'date_block_num', 'month', 'year']).sum()\n", 571 | "train = train.sort_index()\n", 572 | "train.head()" 573 | ] 574 | }, 575 | { 576 | "cell_type": "code", 577 | "execution_count": 12, 578 | "metadata": {}, 579 | "outputs": [], 580 | "source": [ 581 | "price = train.reset_index().set_index(['item_id', 'shop_id', 'date_block_num'])\n", 582 | "price = price.sort_index()" 583 | ] 584 | }, 585 | { 586 | "cell_type": "code", 587 | "execution_count": 13, 588 | "metadata": {}, 589 | "outputs": [], 590 | "source": [ 591 | "def convert(date_block):\n", 592 | " date = datetime(2013, 1, 1)\n", 593 | " date += relativedelta(months = date_block)\n", 594 | " return (date.month, date.year)\n", 595 | "\n", 596 | "def closest_date_block(current_day, item_id, shop_id):\n", 597 | " \"\"\"Find the block_date which is closest to the current_day, given item_id and shop_id. Returns index integer\"\"\"\n", 598 | " if (item_id, shop_id) in price.index:\n", 599 | " search_lst = np.array(price.loc[(item_id, shop_id)].index) \n", 600 | " return search_lst[np.abs(current_day - search_lst).argmin()]\n", 601 | " return -1\n", 602 | " \n", 603 | "def closest_price(current_day, item_id, shop_id):\n", 604 | " closest_date = closest_date_block(current_day, item_id, shop_id)\n", 605 | " if closest_date != -1:\n", 606 | " return price.loc[( item_id, shop_id, closest_date )]['item_price']\n", 607 | " return np.nan\n", 608 | "\n", 609 | "def closest_price_lambda(x):\n", 610 | " return closest_price(34, x.item_id, x.shop_id)\n", 611 | "assert closest_date_block(18, 30, 5) == 18" 612 | ] 613 | }, 614 | { 615 | "cell_type": "code", 616 | "execution_count": 14, 617 | "metadata": {}, 618 | "outputs": [], 619 | "source": [ 620 | "maxlen = 4 # 4 months\n", 621 | "step = 1\n", 622 | "# 0: train, 1: val, 2:test\n", 623 | "sentences = [[],[],[]]\n", 624 | "next_chars = [[], []]\n", 625 | "BLOCKS = [6, 18, 30]\n", 626 | "\n", 627 | "for s in test_shops:\n", 628 | " shop_items = list(train.loc[s].index.get_level_values(0).unique())\n", 629 | " for it in shop_items: \n", 630 | " for i_index, i in enumerate(BLOCKS):\n", 631 | " sentence = []\n", 632 | " closest_pc = closest_price(i, it, s) \n", 633 | " for j in range(maxlen+1):\n", 634 | " if j < maxlen:\n", 635 | " if (s, it, i+j) in train.index:\n", 636 | " r = train.loc[(s, it, i + j)].to_dict(orient='list') \n", 637 | " closest_pc = r['item_price'][0]\n", 638 | " item_cnt_day = r['item_cnt_day'][0]\n", 639 | " row = {'shop_id': s, 'date_block_num': i+j, 'item_cnt_day': item_cnt_day, \n", 640 | " 'month': month, 'item_id': it, 'item_price': closest_pc, 'year': year}\n", 641 | " else:\n", 642 | " month, year = convert(i+j) \n", 643 | " row = {'shop_id': s, 'date_block_num': i+j, 'item_cnt_day': 0, \n", 644 | " 'month': month, 'item_id': it, 'item_price': closest_pc, 'year': year}\n", 645 | " sentence.append(row)\n", 646 | " elif i_index < 2: # not in test set\n", 647 | " next_chars[i_index].append(row)\n", 648 | " sentences[i_index].append(sentence)" 649 | ] 650 | }, 651 | { 652 | "cell_type": "code", 653 | "execution_count": null, 654 | "metadata": {}, 655 | "outputs": [], 656 | "source": [ 657 | "train.head()" 658 | ] 659 | }, 660 | { 661 | "cell_type": "code", 662 | "execution_count": null, 663 | "metadata": {}, 664 | "outputs": [], 665 | "source": [] 666 | }, 667 | { 668 | "cell_type": "code", 669 | "execution_count": null, 670 | "metadata": {}, 671 | "outputs": [], 672 | "source": [ 673 | " " 674 | ] 675 | } 676 | ], 677 | "metadata": { 678 | "kernelspec": { 679 | "display_name": "Python 3", 680 | "language": "python", 681 | "name": "python3" 682 | }, 683 | "language_info": { 684 | "codemirror_mode": { 685 | "name": "ipython", 686 | "version": 3 687 | }, 688 | "file_extension": ".py", 689 | "mimetype": "text/x-python", 690 | "name": "python", 691 | "nbconvert_exporter": "python", 692 | "pygments_lexer": "ipython3", 693 | "version": "3.7.1" 694 | } 695 | }, 696 | "nbformat": 4, 697 | "nbformat_minor": 2 698 | } 699 | -------------------------------------------------------------------------------- /Predicting Car Prices/imports-85.data: -------------------------------------------------------------------------------- 1 | 3,?,alfa-romero,gas,std,two,convertible,rwd,front,88.60,168.80,64.10,48.80,2548,dohc,four,130,mpfi,3.47,2.68,9.00,111,5000,21,27,13495 2 | 3,?,alfa-romero,gas,std,two,convertible,rwd,front,88.60,168.80,64.10,48.80,2548,dohc,four,130,mpfi,3.47,2.68,9.00,111,5000,21,27,16500 3 | 1,?,alfa-romero,gas,std,two,hatchback,rwd,front,94.50,171.20,65.50,52.40,2823,ohcv,six,152,mpfi,2.68,3.47,9.00,154,5000,19,26,16500 4 | 2,164,audi,gas,std,four,sedan,fwd,front,99.80,176.60,66.20,54.30,2337,ohc,four,109,mpfi,3.19,3.40,10.00,102,5500,24,30,13950 5 | 2,164,audi,gas,std,four,sedan,4wd,front,99.40,176.60,66.40,54.30,2824,ohc,five,136,mpfi,3.19,3.40,8.00,115,5500,18,22,17450 6 | 2,?,audi,gas,std,two,sedan,fwd,front,99.80,177.30,66.30,53.10,2507,ohc,five,136,mpfi,3.19,3.40,8.50,110,5500,19,25,15250 7 | 1,158,audi,gas,std,four,sedan,fwd,front,105.80,192.70,71.40,55.70,2844,ohc,five,136,mpfi,3.19,3.40,8.50,110,5500,19,25,17710 8 | 1,?,audi,gas,std,four,wagon,fwd,front,105.80,192.70,71.40,55.70,2954,ohc,five,136,mpfi,3.19,3.40,8.50,110,5500,19,25,18920 9 | 1,158,audi,gas,turbo,four,sedan,fwd,front,105.80,192.70,71.40,55.90,3086,ohc,five,131,mpfi,3.13,3.40,8.30,140,5500,17,20,23875 10 | 0,?,audi,gas,turbo,two,hatchback,4wd,front,99.50,178.20,67.90,52.00,3053,ohc,five,131,mpfi,3.13,3.40,7.00,160,5500,16,22,? 11 | 2,192,bmw,gas,std,two,sedan,rwd,front,101.20,176.80,64.80,54.30,2395,ohc,four,108,mpfi,3.50,2.80,8.80,101,5800,23,29,16430 12 | 0,192,bmw,gas,std,four,sedan,rwd,front,101.20,176.80,64.80,54.30,2395,ohc,four,108,mpfi,3.50,2.80,8.80,101,5800,23,29,16925 13 | 0,188,bmw,gas,std,two,sedan,rwd,front,101.20,176.80,64.80,54.30,2710,ohc,six,164,mpfi,3.31,3.19,9.00,121,4250,21,28,20970 14 | 0,188,bmw,gas,std,four,sedan,rwd,front,101.20,176.80,64.80,54.30,2765,ohc,six,164,mpfi,3.31,3.19,9.00,121,4250,21,28,21105 15 | 1,?,bmw,gas,std,four,sedan,rwd,front,103.50,189.00,66.90,55.70,3055,ohc,six,164,mpfi,3.31,3.19,9.00,121,4250,20,25,24565 16 | 0,?,bmw,gas,std,four,sedan,rwd,front,103.50,189.00,66.90,55.70,3230,ohc,six,209,mpfi,3.62,3.39,8.00,182,5400,16,22,30760 17 | 0,?,bmw,gas,std,two,sedan,rwd,front,103.50,193.80,67.90,53.70,3380,ohc,six,209,mpfi,3.62,3.39,8.00,182,5400,16,22,41315 18 | 0,?,bmw,gas,std,four,sedan,rwd,front,110.00,197.00,70.90,56.30,3505,ohc,six,209,mpfi,3.62,3.39,8.00,182,5400,15,20,36880 19 | 2,121,chevrolet,gas,std,two,hatchback,fwd,front,88.40,141.10,60.30,53.20,1488,l,three,61,2bbl,2.91,3.03,9.50,48,5100,47,53,5151 20 | 1,98,chevrolet,gas,std,two,hatchback,fwd,front,94.50,155.90,63.60,52.00,1874,ohc,four,90,2bbl,3.03,3.11,9.60,70,5400,38,43,6295 21 | 0,81,chevrolet,gas,std,four,sedan,fwd,front,94.50,158.80,63.60,52.00,1909,ohc,four,90,2bbl,3.03,3.11,9.60,70,5400,38,43,6575 22 | 1,118,dodge,gas,std,two,hatchback,fwd,front,93.70,157.30,63.80,50.80,1876,ohc,four,90,2bbl,2.97,3.23,9.41,68,5500,37,41,5572 23 | 1,118,dodge,gas,std,two,hatchback,fwd,front,93.70,157.30,63.80,50.80,1876,ohc,four,90,2bbl,2.97,3.23,9.40,68,5500,31,38,6377 24 | 1,118,dodge,gas,turbo,two,hatchback,fwd,front,93.70,157.30,63.80,50.80,2128,ohc,four,98,mpfi,3.03,3.39,7.60,102,5500,24,30,7957 25 | 1,148,dodge,gas,std,four,hatchback,fwd,front,93.70,157.30,63.80,50.60,1967,ohc,four,90,2bbl,2.97,3.23,9.40,68,5500,31,38,6229 26 | 1,148,dodge,gas,std,four,sedan,fwd,front,93.70,157.30,63.80,50.60,1989,ohc,four,90,2bbl,2.97,3.23,9.40,68,5500,31,38,6692 27 | 1,148,dodge,gas,std,four,sedan,fwd,front,93.70,157.30,63.80,50.60,1989,ohc,four,90,2bbl,2.97,3.23,9.40,68,5500,31,38,7609 28 | 1,148,dodge,gas,turbo,?,sedan,fwd,front,93.70,157.30,63.80,50.60,2191,ohc,four,98,mpfi,3.03,3.39,7.60,102,5500,24,30,8558 29 | -1,110,dodge,gas,std,four,wagon,fwd,front,103.30,174.60,64.60,59.80,2535,ohc,four,122,2bbl,3.34,3.46,8.50,88,5000,24,30,8921 30 | 3,145,dodge,gas,turbo,two,hatchback,fwd,front,95.90,173.20,66.30,50.20,2811,ohc,four,156,mfi,3.60,3.90,7.00,145,5000,19,24,12964 31 | 2,137,honda,gas,std,two,hatchback,fwd,front,86.60,144.60,63.90,50.80,1713,ohc,four,92,1bbl,2.91,3.41,9.60,58,4800,49,54,6479 32 | 2,137,honda,gas,std,two,hatchback,fwd,front,86.60,144.60,63.90,50.80,1819,ohc,four,92,1bbl,2.91,3.41,9.20,76,6000,31,38,6855 33 | 1,101,honda,gas,std,two,hatchback,fwd,front,93.70,150.00,64.00,52.60,1837,ohc,four,79,1bbl,2.91,3.07,10.10,60,5500,38,42,5399 34 | 1,101,honda,gas,std,two,hatchback,fwd,front,93.70,150.00,64.00,52.60,1940,ohc,four,92,1bbl,2.91,3.41,9.20,76,6000,30,34,6529 35 | 1,101,honda,gas,std,two,hatchback,fwd,front,93.70,150.00,64.00,52.60,1956,ohc,four,92,1bbl,2.91,3.41,9.20,76,6000,30,34,7129 36 | 0,110,honda,gas,std,four,sedan,fwd,front,96.50,163.40,64.00,54.50,2010,ohc,four,92,1bbl,2.91,3.41,9.20,76,6000,30,34,7295 37 | 0,78,honda,gas,std,four,wagon,fwd,front,96.50,157.10,63.90,58.30,2024,ohc,four,92,1bbl,2.92,3.41,9.20,76,6000,30,34,7295 38 | 0,106,honda,gas,std,two,hatchback,fwd,front,96.50,167.50,65.20,53.30,2236,ohc,four,110,1bbl,3.15,3.58,9.00,86,5800,27,33,7895 39 | 0,106,honda,gas,std,two,hatchback,fwd,front,96.50,167.50,65.20,53.30,2289,ohc,four,110,1bbl,3.15,3.58,9.00,86,5800,27,33,9095 40 | 0,85,honda,gas,std,four,sedan,fwd,front,96.50,175.40,65.20,54.10,2304,ohc,four,110,1bbl,3.15,3.58,9.00,86,5800,27,33,8845 41 | 0,85,honda,gas,std,four,sedan,fwd,front,96.50,175.40,62.50,54.10,2372,ohc,four,110,1bbl,3.15,3.58,9.00,86,5800,27,33,10295 42 | 0,85,honda,gas,std,four,sedan,fwd,front,96.50,175.40,65.20,54.10,2465,ohc,four,110,mpfi,3.15,3.58,9.00,101,5800,24,28,12945 43 | 1,107,honda,gas,std,two,sedan,fwd,front,96.50,169.10,66.00,51.00,2293,ohc,four,110,2bbl,3.15,3.58,9.10,100,5500,25,31,10345 44 | 0,?,isuzu,gas,std,four,sedan,rwd,front,94.30,170.70,61.80,53.50,2337,ohc,four,111,2bbl,3.31,3.23,8.50,78,4800,24,29,6785 45 | 1,?,isuzu,gas,std,two,sedan,fwd,front,94.50,155.90,63.60,52.00,1874,ohc,four,90,2bbl,3.03,3.11,9.60,70,5400,38,43,? 46 | 0,?,isuzu,gas,std,four,sedan,fwd,front,94.50,155.90,63.60,52.00,1909,ohc,four,90,2bbl,3.03,3.11,9.60,70,5400,38,43,? 47 | 2,?,isuzu,gas,std,two,hatchback,rwd,front,96.00,172.60,65.20,51.40,2734,ohc,four,119,spfi,3.43,3.23,9.20,90,5000,24,29,11048 48 | 0,145,jaguar,gas,std,four,sedan,rwd,front,113.00,199.60,69.60,52.80,4066,dohc,six,258,mpfi,3.63,4.17,8.10,176,4750,15,19,32250 49 | 0,?,jaguar,gas,std,four,sedan,rwd,front,113.00,199.60,69.60,52.80,4066,dohc,six,258,mpfi,3.63,4.17,8.10,176,4750,15,19,35550 50 | 0,?,jaguar,gas,std,two,sedan,rwd,front,102.00,191.70,70.60,47.80,3950,ohcv,twelve,326,mpfi,3.54,2.76,11.50,262,5000,13,17,36000 51 | 1,104,mazda,gas,std,two,hatchback,fwd,front,93.10,159.10,64.20,54.10,1890,ohc,four,91,2bbl,3.03,3.15,9.00,68,5000,30,31,5195 52 | 1,104,mazda,gas,std,two,hatchback,fwd,front,93.10,159.10,64.20,54.10,1900,ohc,four,91,2bbl,3.03,3.15,9.00,68,5000,31,38,6095 53 | 1,104,mazda,gas,std,two,hatchback,fwd,front,93.10,159.10,64.20,54.10,1905,ohc,four,91,2bbl,3.03,3.15,9.00,68,5000,31,38,6795 54 | 1,113,mazda,gas,std,four,sedan,fwd,front,93.10,166.80,64.20,54.10,1945,ohc,four,91,2bbl,3.03,3.15,9.00,68,5000,31,38,6695 55 | 1,113,mazda,gas,std,four,sedan,fwd,front,93.10,166.80,64.20,54.10,1950,ohc,four,91,2bbl,3.08,3.15,9.00,68,5000,31,38,7395 56 | 3,150,mazda,gas,std,two,hatchback,rwd,front,95.30,169.00,65.70,49.60,2380,rotor,two,70,4bbl,?,?,9.40,101,6000,17,23,10945 57 | 3,150,mazda,gas,std,two,hatchback,rwd,front,95.30,169.00,65.70,49.60,2380,rotor,two,70,4bbl,?,?,9.40,101,6000,17,23,11845 58 | 3,150,mazda,gas,std,two,hatchback,rwd,front,95.30,169.00,65.70,49.60,2385,rotor,two,70,4bbl,?,?,9.40,101,6000,17,23,13645 59 | 3,150,mazda,gas,std,two,hatchback,rwd,front,95.30,169.00,65.70,49.60,2500,rotor,two,80,mpfi,?,?,9.40,135,6000,16,23,15645 60 | 1,129,mazda,gas,std,two,hatchback,fwd,front,98.80,177.80,66.50,53.70,2385,ohc,four,122,2bbl,3.39,3.39,8.60,84,4800,26,32,8845 61 | 0,115,mazda,gas,std,four,sedan,fwd,front,98.80,177.80,66.50,55.50,2410,ohc,four,122,2bbl,3.39,3.39,8.60,84,4800,26,32,8495 62 | 1,129,mazda,gas,std,two,hatchback,fwd,front,98.80,177.80,66.50,53.70,2385,ohc,four,122,2bbl,3.39,3.39,8.60,84,4800,26,32,10595 63 | 0,115,mazda,gas,std,four,sedan,fwd,front,98.80,177.80,66.50,55.50,2410,ohc,four,122,2bbl,3.39,3.39,8.60,84,4800,26,32,10245 64 | 0,?,mazda,diesel,std,?,sedan,fwd,front,98.80,177.80,66.50,55.50,2443,ohc,four,122,idi,3.39,3.39,22.70,64,4650,36,42,10795 65 | 0,115,mazda,gas,std,four,hatchback,fwd,front,98.80,177.80,66.50,55.50,2425,ohc,four,122,2bbl,3.39,3.39,8.60,84,4800,26,32,11245 66 | 0,118,mazda,gas,std,four,sedan,rwd,front,104.90,175.00,66.10,54.40,2670,ohc,four,140,mpfi,3.76,3.16,8.00,120,5000,19,27,18280 67 | 0,?,mazda,diesel,std,four,sedan,rwd,front,104.90,175.00,66.10,54.40,2700,ohc,four,134,idi,3.43,3.64,22.00,72,4200,31,39,18344 68 | -1,93,mercedes-benz,diesel,turbo,four,sedan,rwd,front,110.00,190.90,70.30,56.50,3515,ohc,five,183,idi,3.58,3.64,21.50,123,4350,22,25,25552 69 | -1,93,mercedes-benz,diesel,turbo,four,wagon,rwd,front,110.00,190.90,70.30,58.70,3750,ohc,five,183,idi,3.58,3.64,21.50,123,4350,22,25,28248 70 | 0,93,mercedes-benz,diesel,turbo,two,hardtop,rwd,front,106.70,187.50,70.30,54.90,3495,ohc,five,183,idi,3.58,3.64,21.50,123,4350,22,25,28176 71 | -1,93,mercedes-benz,diesel,turbo,four,sedan,rwd,front,115.60,202.60,71.70,56.30,3770,ohc,five,183,idi,3.58,3.64,21.50,123,4350,22,25,31600 72 | -1,?,mercedes-benz,gas,std,four,sedan,rwd,front,115.60,202.60,71.70,56.50,3740,ohcv,eight,234,mpfi,3.46,3.10,8.30,155,4750,16,18,34184 73 | 3,142,mercedes-benz,gas,std,two,convertible,rwd,front,96.60,180.30,70.50,50.80,3685,ohcv,eight,234,mpfi,3.46,3.10,8.30,155,4750,16,18,35056 74 | 0,?,mercedes-benz,gas,std,four,sedan,rwd,front,120.90,208.10,71.70,56.70,3900,ohcv,eight,308,mpfi,3.80,3.35,8.00,184,4500,14,16,40960 75 | 1,?,mercedes-benz,gas,std,two,hardtop,rwd,front,112.00,199.20,72.00,55.40,3715,ohcv,eight,304,mpfi,3.80,3.35,8.00,184,4500,14,16,45400 76 | 1,?,mercury,gas,turbo,two,hatchback,rwd,front,102.70,178.40,68.00,54.80,2910,ohc,four,140,mpfi,3.78,3.12,8.00,175,5000,19,24,16503 77 | 2,161,mitsubishi,gas,std,two,hatchback,fwd,front,93.70,157.30,64.40,50.80,1918,ohc,four,92,2bbl,2.97,3.23,9.40,68,5500,37,41,5389 78 | 2,161,mitsubishi,gas,std,two,hatchback,fwd,front,93.70,157.30,64.40,50.80,1944,ohc,four,92,2bbl,2.97,3.23,9.40,68,5500,31,38,6189 79 | 2,161,mitsubishi,gas,std,two,hatchback,fwd,front,93.70,157.30,64.40,50.80,2004,ohc,four,92,2bbl,2.97,3.23,9.40,68,5500,31,38,6669 80 | 1,161,mitsubishi,gas,turbo,two,hatchback,fwd,front,93,157.30,63.80,50.80,2145,ohc,four,98,spdi,3.03,3.39,7.60,102,5500,24,30,7689 81 | 3,153,mitsubishi,gas,turbo,two,hatchback,fwd,front,96.30,173.00,65.40,49.40,2370,ohc,four,110,spdi,3.17,3.46,7.50,116,5500,23,30,9959 82 | 3,153,mitsubishi,gas,std,two,hatchback,fwd,front,96.30,173.00,65.40,49.40,2328,ohc,four,122,2bbl,3.35,3.46,8.50,88,5000,25,32,8499 83 | 3,?,mitsubishi,gas,turbo,two,hatchback,fwd,front,95.90,173.20,66.30,50.20,2833,ohc,four,156,spdi,3.58,3.86,7.00,145,5000,19,24,12629 84 | 3,?,mitsubishi,gas,turbo,two,hatchback,fwd,front,95.90,173.20,66.30,50.20,2921,ohc,four,156,spdi,3.59,3.86,7.00,145,5000,19,24,14869 85 | 3,?,mitsubishi,gas,turbo,two,hatchback,fwd,front,95.90,173.20,66.30,50.20,2926,ohc,four,156,spdi,3.59,3.86,7.00,145,5000,19,24,14489 86 | 1,125,mitsubishi,gas,std,four,sedan,fwd,front,96.30,172.40,65.40,51.60,2365,ohc,four,122,2bbl,3.35,3.46,8.50,88,5000,25,32,6989 87 | 1,125,mitsubishi,gas,std,four,sedan,fwd,front,96.30,172.40,65.40,51.60,2405,ohc,four,122,2bbl,3.35,3.46,8.50,88,5000,25,32,8189 88 | 1,125,mitsubishi,gas,turbo,four,sedan,fwd,front,96.30,172.40,65.40,51.60,2403,ohc,four,110,spdi,3.17,3.46,7.50,116,5500,23,30,9279 89 | -1,137,mitsubishi,gas,std,four,sedan,fwd,front,96.30,172.40,65.40,51.60,2403,ohc,four,110,spdi,3.17,3.46,7.50,116,5500,23,30,9279 90 | 1,128,nissan,gas,std,two,sedan,fwd,front,94.50,165.30,63.80,54.50,1889,ohc,four,97,2bbl,3.15,3.29,9.40,69,5200,31,37,5499 91 | 1,128,nissan,diesel,std,two,sedan,fwd,front,94.50,165.30,63.80,54.50,2017,ohc,four,103,idi,2.99,3.47,21.90,55,4800,45,50,7099 92 | 1,128,nissan,gas,std,two,sedan,fwd,front,94.50,165.30,63.80,54.50,1918,ohc,four,97,2bbl,3.15,3.29,9.40,69,5200,31,37,6649 93 | 1,122,nissan,gas,std,four,sedan,fwd,front,94.50,165.30,63.80,54.50,1938,ohc,four,97,2bbl,3.15,3.29,9.40,69,5200,31,37,6849 94 | 1,103,nissan,gas,std,four,wagon,fwd,front,94.50,170.20,63.80,53.50,2024,ohc,four,97,2bbl,3.15,3.29,9.40,69,5200,31,37,7349 95 | 1,128,nissan,gas,std,two,sedan,fwd,front,94.50,165.30,63.80,54.50,1951,ohc,four,97,2bbl,3.15,3.29,9.40,69,5200,31,37,7299 96 | 1,128,nissan,gas,std,two,hatchback,fwd,front,94.50,165.60,63.80,53.30,2028,ohc,four,97,2bbl,3.15,3.29,9.40,69,5200,31,37,7799 97 | 1,122,nissan,gas,std,four,sedan,fwd,front,94.50,165.30,63.80,54.50,1971,ohc,four,97,2bbl,3.15,3.29,9.40,69,5200,31,37,7499 98 | 1,103,nissan,gas,std,four,wagon,fwd,front,94.50,170.20,63.80,53.50,2037,ohc,four,97,2bbl,3.15,3.29,9.40,69,5200,31,37,7999 99 | 2,168,nissan,gas,std,two,hardtop,fwd,front,95.10,162.40,63.80,53.30,2008,ohc,four,97,2bbl,3.15,3.29,9.40,69,5200,31,37,8249 100 | 0,106,nissan,gas,std,four,hatchback,fwd,front,97.20,173.40,65.20,54.70,2324,ohc,four,120,2bbl,3.33,3.47,8.50,97,5200,27,34,8949 101 | 0,106,nissan,gas,std,four,sedan,fwd,front,97.20,173.40,65.20,54.70,2302,ohc,four,120,2bbl,3.33,3.47,8.50,97,5200,27,34,9549 102 | 0,128,nissan,gas,std,four,sedan,fwd,front,100.40,181.70,66.50,55.10,3095,ohcv,six,181,mpfi,3.43,3.27,9.00,152,5200,17,22,13499 103 | 0,108,nissan,gas,std,four,wagon,fwd,front,100.40,184.60,66.50,56.10,3296,ohcv,six,181,mpfi,3.43,3.27,9.00,152,5200,17,22,14399 104 | 0,108,nissan,gas,std,four,sedan,fwd,front,100.40,184.60,66.50,55.10,3060,ohcv,six,181,mpfi,3.43,3.27,9.00,152,5200,19,25,13499 105 | 3,194,nissan,gas,std,two,hatchback,rwd,front,91.30,170.70,67.90,49.70,3071,ohcv,six,181,mpfi,3.43,3.27,9.00,160,5200,19,25,17199 106 | 3,194,nissan,gas,turbo,two,hatchback,rwd,front,91.30,170.70,67.90,49.70,3139,ohcv,six,181,mpfi,3.43,3.27,7.80,200,5200,17,23,19699 107 | 1,231,nissan,gas,std,two,hatchback,rwd,front,99.20,178.50,67.90,49.70,3139,ohcv,six,181,mpfi,3.43,3.27,9.00,160,5200,19,25,18399 108 | 0,161,peugot,gas,std,four,sedan,rwd,front,107.90,186.70,68.40,56.70,3020,l,four,120,mpfi,3.46,3.19,8.40,97,5000,19,24,11900 109 | 0,161,peugot,diesel,turbo,four,sedan,rwd,front,107.90,186.70,68.40,56.70,3197,l,four,152,idi,3.70,3.52,21.00,95,4150,28,33,13200 110 | 0,?,peugot,gas,std,four,wagon,rwd,front,114.20,198.90,68.40,58.70,3230,l,four,120,mpfi,3.46,3.19,8.40,97,5000,19,24,12440 111 | 0,?,peugot,diesel,turbo,four,wagon,rwd,front,114.20,198.90,68.40,58.70,3430,l,four,152,idi,3.70,3.52,21.00,95,4150,25,25,13860 112 | 0,161,peugot,gas,std,four,sedan,rwd,front,107.90,186.70,68.40,56.70,3075,l,four,120,mpfi,3.46,2.19,8.40,95,5000,19,24,15580 113 | 0,161,peugot,diesel,turbo,four,sedan,rwd,front,107.90,186.70,68.40,56.70,3252,l,four,152,idi,3.70,3.52,21.00,95,4150,28,33,16900 114 | 0,?,peugot,gas,std,four,wagon,rwd,front,114.20,198.90,68.40,56.70,3285,l,four,120,mpfi,3.46,2.19,8.40,95,5000,19,24,16695 115 | 0,?,peugot,diesel,turbo,four,wagon,rwd,front,114.20,198.90,68.40,58.70,3485,l,four,152,idi,3.70,3.52,21.00,95,4150,25,25,17075 116 | 0,161,peugot,gas,std,four,sedan,rwd,front,107.90,186.70,68.40,56.70,3075,l,four,120,mpfi,3.46,3.19,8.40,97,5000,19,24,16630 117 | 0,161,peugot,diesel,turbo,four,sedan,rwd,front,107.90,186.70,68.40,56.70,3252,l,four,152,idi,3.70,3.52,21.00,95,4150,28,33,17950 118 | 0,161,peugot,gas,turbo,four,sedan,rwd,front,108.00,186.70,68.30,56.00,3130,l,four,134,mpfi,3.61,3.21,7.00,142,5600,18,24,18150 119 | 1,119,plymouth,gas,std,two,hatchback,fwd,front,93.70,157.30,63.80,50.80,1918,ohc,four,90,2bbl,2.97,3.23,9.40,68,5500,37,41,5572 120 | 1,119,plymouth,gas,turbo,two,hatchback,fwd,front,93.70,157.30,63.80,50.80,2128,ohc,four,98,spdi,3.03,3.39,7.60,102,5500,24,30,7957 121 | 1,154,plymouth,gas,std,four,hatchback,fwd,front,93.70,157.30,63.80,50.60,1967,ohc,four,90,2bbl,2.97,3.23,9.40,68,5500,31,38,6229 122 | 1,154,plymouth,gas,std,four,sedan,fwd,front,93.70,167.30,63.80,50.80,1989,ohc,four,90,2bbl,2.97,3.23,9.40,68,5500,31,38,6692 123 | 1,154,plymouth,gas,std,four,sedan,fwd,front,93.70,167.30,63.80,50.80,2191,ohc,four,98,2bbl,2.97,3.23,9.40,68,5500,31,38,7609 124 | -1,74,plymouth,gas,std,four,wagon,fwd,front,103.30,174.60,64.60,59.80,2535,ohc,four,122,2bbl,3.35,3.46,8.50,88,5000,24,30,8921 125 | 3,?,plymouth,gas,turbo,two,hatchback,rwd,front,95.90,173.20,66.30,50.20,2818,ohc,four,156,spdi,3.59,3.86,7.00,145,5000,19,24,12764 126 | 3,186,porsche,gas,std,two,hatchback,rwd,front,94.50,168.90,68.30,50.20,2778,ohc,four,151,mpfi,3.94,3.11,9.50,143,5500,19,27,22018 127 | 3,?,porsche,gas,std,two,hardtop,rwd,rear,89.50,168.90,65.00,51.60,2756,ohcf,six,194,mpfi,3.74,2.90,9.50,207,5900,17,25,32528 128 | 3,?,porsche,gas,std,two,hardtop,rwd,rear,89.50,168.90,65.00,51.60,2756,ohcf,six,194,mpfi,3.74,2.90,9.50,207,5900,17,25,34028 129 | 3,?,porsche,gas,std,two,convertible,rwd,rear,89.50,168.90,65.00,51.60,2800,ohcf,six,194,mpfi,3.74,2.90,9.50,207,5900,17,25,37028 130 | 1,?,porsche,gas,std,two,hatchback,rwd,front,98.40,175.70,72.30,50.50,3366,dohcv,eight,203,mpfi,3.94,3.11,10.00,288,5750,17,28,? 131 | 0,?,renault,gas,std,four,wagon,fwd,front,96.10,181.50,66.50,55.20,2579,ohc,four,132,mpfi,3.46,3.90,8.70,?,?,23,31,9295 132 | 2,?,renault,gas,std,two,hatchback,fwd,front,96.10,176.80,66.60,50.50,2460,ohc,four,132,mpfi,3.46,3.90,8.70,?,?,23,31,9895 133 | 3,150,saab,gas,std,two,hatchback,fwd,front,99.10,186.60,66.50,56.10,2658,ohc,four,121,mpfi,3.54,3.07,9.31,110,5250,21,28,11850 134 | 2,104,saab,gas,std,four,sedan,fwd,front,99.10,186.60,66.50,56.10,2695,ohc,four,121,mpfi,3.54,3.07,9.30,110,5250,21,28,12170 135 | 3,150,saab,gas,std,two,hatchback,fwd,front,99.10,186.60,66.50,56.10,2707,ohc,four,121,mpfi,2.54,2.07,9.30,110,5250,21,28,15040 136 | 2,104,saab,gas,std,four,sedan,fwd,front,99.10,186.60,66.50,56.10,2758,ohc,four,121,mpfi,3.54,3.07,9.30,110,5250,21,28,15510 137 | 3,150,saab,gas,turbo,two,hatchback,fwd,front,99.10,186.60,66.50,56.10,2808,dohc,four,121,mpfi,3.54,3.07,9.00,160,5500,19,26,18150 138 | 2,104,saab,gas,turbo,four,sedan,fwd,front,99.10,186.60,66.50,56.10,2847,dohc,four,121,mpfi,3.54,3.07,9.00,160,5500,19,26,18620 139 | 2,83,subaru,gas,std,two,hatchback,fwd,front,93.70,156.90,63.40,53.70,2050,ohcf,four,97,2bbl,3.62,2.36,9.00,69,4900,31,36,5118 140 | 2,83,subaru,gas,std,two,hatchback,fwd,front,93.70,157.90,63.60,53.70,2120,ohcf,four,108,2bbl,3.62,2.64,8.70,73,4400,26,31,7053 141 | 2,83,subaru,gas,std,two,hatchback,4wd,front,93.30,157.30,63.80,55.70,2240,ohcf,four,108,2bbl,3.62,2.64,8.70,73,4400,26,31,7603 142 | 0,102,subaru,gas,std,four,sedan,fwd,front,97.20,172.00,65.40,52.50,2145,ohcf,four,108,2bbl,3.62,2.64,9.50,82,4800,32,37,7126 143 | 0,102,subaru,gas,std,four,sedan,fwd,front,97.20,172.00,65.40,52.50,2190,ohcf,four,108,2bbl,3.62,2.64,9.50,82,4400,28,33,7775 144 | 0,102,subaru,gas,std,four,sedan,fwd,front,97.20,172.00,65.40,52.50,2340,ohcf,four,108,mpfi,3.62,2.64,9.00,94,5200,26,32,9960 145 | 0,102,subaru,gas,std,four,sedan,4wd,front,97.00,172.00,65.40,54.30,2385,ohcf,four,108,2bbl,3.62,2.64,9.00,82,4800,24,25,9233 146 | 0,102,subaru,gas,turbo,four,sedan,4wd,front,97.00,172.00,65.40,54.30,2510,ohcf,four,108,mpfi,3.62,2.64,7.70,111,4800,24,29,11259 147 | 0,89,subaru,gas,std,four,wagon,fwd,front,97.00,173.50,65.40,53.00,2290,ohcf,four,108,2bbl,3.62,2.64,9.00,82,4800,28,32,7463 148 | 0,89,subaru,gas,std,four,wagon,fwd,front,97.00,173.50,65.40,53.00,2455,ohcf,four,108,mpfi,3.62,2.64,9.00,94,5200,25,31,10198 149 | 0,85,subaru,gas,std,four,wagon,4wd,front,96.90,173.60,65.40,54.90,2420,ohcf,four,108,2bbl,3.62,2.64,9.00,82,4800,23,29,8013 150 | 0,85,subaru,gas,turbo,four,wagon,4wd,front,96.90,173.60,65.40,54.90,2650,ohcf,four,108,mpfi,3.62,2.64,7.70,111,4800,23,23,11694 151 | 1,87,toyota,gas,std,two,hatchback,fwd,front,95.70,158.70,63.60,54.50,1985,ohc,four,92,2bbl,3.05,3.03,9.00,62,4800,35,39,5348 152 | 1,87,toyota,gas,std,two,hatchback,fwd,front,95.70,158.70,63.60,54.50,2040,ohc,four,92,2bbl,3.05,3.03,9.00,62,4800,31,38,6338 153 | 1,74,toyota,gas,std,four,hatchback,fwd,front,95.70,158.70,63.60,54.50,2015,ohc,four,92,2bbl,3.05,3.03,9.00,62,4800,31,38,6488 154 | 0,77,toyota,gas,std,four,wagon,fwd,front,95.70,169.70,63.60,59.10,2280,ohc,four,92,2bbl,3.05,3.03,9.00,62,4800,31,37,6918 155 | 0,81,toyota,gas,std,four,wagon,4wd,front,95.70,169.70,63.60,59.10,2290,ohc,four,92,2bbl,3.05,3.03,9.00,62,4800,27,32,7898 156 | 0,91,toyota,gas,std,four,wagon,4wd,front,95.70,169.70,63.60,59.10,3110,ohc,four,92,2bbl,3.05,3.03,9.00,62,4800,27,32,8778 157 | 0,91,toyota,gas,std,four,sedan,fwd,front,95.70,166.30,64.40,53.00,2081,ohc,four,98,2bbl,3.19,3.03,9.00,70,4800,30,37,6938 158 | 0,91,toyota,gas,std,four,hatchback,fwd,front,95.70,166.30,64.40,52.80,2109,ohc,four,98,2bbl,3.19,3.03,9.00,70,4800,30,37,7198 159 | 0,91,toyota,diesel,std,four,sedan,fwd,front,95.70,166.30,64.40,53.00,2275,ohc,four,110,idi,3.27,3.35,22.50,56,4500,34,36,7898 160 | 0,91,toyota,diesel,std,four,hatchback,fwd,front,95.70,166.30,64.40,52.80,2275,ohc,four,110,idi,3.27,3.35,22.50,56,4500,38,47,7788 161 | 0,91,toyota,gas,std,four,sedan,fwd,front,95.70,166.30,64.40,53.00,2094,ohc,four,98,2bbl,3.19,3.03,9.00,70,4800,38,47,7738 162 | 0,91,toyota,gas,std,four,hatchback,fwd,front,95.70,166.30,64.40,52.80,2122,ohc,four,98,2bbl,3.19,3.03,9.00,70,4800,28,34,8358 163 | 0,91,toyota,gas,std,four,sedan,fwd,front,95.70,166.30,64.40,52.80,2140,ohc,four,98,2bbl,3.19,3.03,9.00,70,4800,28,34,9258 164 | 1,168,toyota,gas,std,two,sedan,rwd,front,94.50,168.70,64.00,52.60,2169,ohc,four,98,2bbl,3.19,3.03,9.00,70,4800,29,34,8058 165 | 1,168,toyota,gas,std,two,hatchback,rwd,front,94.50,168.70,64.00,52.60,2204,ohc,four,98,2bbl,3.19,3.03,9.00,70,4800,29,34,8238 166 | 1,168,toyota,gas,std,two,sedan,rwd,front,94.50,168.70,64.00,52.60,2265,dohc,four,98,mpfi,3.24,3.08,9.40,112,6600,26,29,9298 167 | 1,168,toyota,gas,std,two,hatchback,rwd,front,94.50,168.70,64.00,52.60,2300,dohc,four,98,mpfi,3.24,3.08,9.40,112,6600,26,29,9538 168 | 2,134,toyota,gas,std,two,hardtop,rwd,front,98.40,176.20,65.60,52.00,2540,ohc,four,146,mpfi,3.62,3.50,9.30,116,4800,24,30,8449 169 | 2,134,toyota,gas,std,two,hardtop,rwd,front,98.40,176.20,65.60,52.00,2536,ohc,four,146,mpfi,3.62,3.50,9.30,116,4800,24,30,9639 170 | 2,134,toyota,gas,std,two,hatchback,rwd,front,98.40,176.20,65.60,52.00,2551,ohc,four,146,mpfi,3.62,3.50,9.30,116,4800,24,30,9989 171 | 2,134,toyota,gas,std,two,hardtop,rwd,front,98.40,176.20,65.60,52.00,2679,ohc,four,146,mpfi,3.62,3.50,9.30,116,4800,24,30,11199 172 | 2,134,toyota,gas,std,two,hatchback,rwd,front,98.40,176.20,65.60,52.00,2714,ohc,four,146,mpfi,3.62,3.50,9.30,116,4800,24,30,11549 173 | 2,134,toyota,gas,std,two,convertible,rwd,front,98.40,176.20,65.60,53.00,2975,ohc,four,146,mpfi,3.62,3.50,9.30,116,4800,24,30,17669 174 | -1,65,toyota,gas,std,four,sedan,fwd,front,102.40,175.60,66.50,54.90,2326,ohc,four,122,mpfi,3.31,3.54,8.70,92,4200,29,34,8948 175 | -1,65,toyota,diesel,turbo,four,sedan,fwd,front,102.40,175.60,66.50,54.90,2480,ohc,four,110,idi,3.27,3.35,22.50,73,4500,30,33,10698 176 | -1,65,toyota,gas,std,four,hatchback,fwd,front,102.40,175.60,66.50,53.90,2414,ohc,four,122,mpfi,3.31,3.54,8.70,92,4200,27,32,9988 177 | -1,65,toyota,gas,std,four,sedan,fwd,front,102.40,175.60,66.50,54.90,2414,ohc,four,122,mpfi,3.31,3.54,8.70,92,4200,27,32,10898 178 | -1,65,toyota,gas,std,four,hatchback,fwd,front,102.40,175.60,66.50,53.90,2458,ohc,four,122,mpfi,3.31,3.54,8.70,92,4200,27,32,11248 179 | 3,197,toyota,gas,std,two,hatchback,rwd,front,102.90,183.50,67.70,52.00,2976,dohc,six,171,mpfi,3.27,3.35,9.30,161,5200,20,24,16558 180 | 3,197,toyota,gas,std,two,hatchback,rwd,front,102.90,183.50,67.70,52.00,3016,dohc,six,171,mpfi,3.27,3.35,9.30,161,5200,19,24,15998 181 | -1,90,toyota,gas,std,four,sedan,rwd,front,104.50,187.80,66.50,54.10,3131,dohc,six,171,mpfi,3.27,3.35,9.20,156,5200,20,24,15690 182 | -1,?,toyota,gas,std,four,wagon,rwd,front,104.50,187.80,66.50,54.10,3151,dohc,six,161,mpfi,3.27,3.35,9.20,156,5200,19,24,15750 183 | 2,122,volkswagen,diesel,std,two,sedan,fwd,front,97.30,171.70,65.50,55.70,2261,ohc,four,97,idi,3.01,3.40,23.00,52,4800,37,46,7775 184 | 2,122,volkswagen,gas,std,two,sedan,fwd,front,97.30,171.70,65.50,55.70,2209,ohc,four,109,mpfi,3.19,3.40,9.00,85,5250,27,34,7975 185 | 2,94,volkswagen,diesel,std,four,sedan,fwd,front,97.30,171.70,65.50,55.70,2264,ohc,four,97,idi,3.01,3.40,23.00,52,4800,37,46,7995 186 | 2,94,volkswagen,gas,std,four,sedan,fwd,front,97.30,171.70,65.50,55.70,2212,ohc,four,109,mpfi,3.19,3.40,9.00,85,5250,27,34,8195 187 | 2,94,volkswagen,gas,std,four,sedan,fwd,front,97.30,171.70,65.50,55.70,2275,ohc,four,109,mpfi,3.19,3.40,9.00,85,5250,27,34,8495 188 | 2,94,volkswagen,diesel,turbo,four,sedan,fwd,front,97.30,171.70,65.50,55.70,2319,ohc,four,97,idi,3.01,3.40,23.00,68,4500,37,42,9495 189 | 2,94,volkswagen,gas,std,four,sedan,fwd,front,97.30,171.70,65.50,55.70,2300,ohc,four,109,mpfi,3.19,3.40,10.00,100,5500,26,32,9995 190 | 3,?,volkswagen,gas,std,two,convertible,fwd,front,94.50,159.30,64.20,55.60,2254,ohc,four,109,mpfi,3.19,3.40,8.50,90,5500,24,29,11595 191 | 3,256,volkswagen,gas,std,two,hatchback,fwd,front,94.50,165.70,64.00,51.40,2221,ohc,four,109,mpfi,3.19,3.40,8.50,90,5500,24,29,9980 192 | 0,?,volkswagen,gas,std,four,sedan,fwd,front,100.40,180.20,66.90,55.10,2661,ohc,five,136,mpfi,3.19,3.40,8.50,110,5500,19,24,13295 193 | 0,?,volkswagen,diesel,turbo,four,sedan,fwd,front,100.40,180.20,66.90,55.10,2579,ohc,four,97,idi,3.01,3.40,23.00,68,4500,33,38,13845 194 | 0,?,volkswagen,gas,std,four,wagon,fwd,front,100.40,183.10,66.90,55.10,2563,ohc,four,109,mpfi,3.19,3.40,9.00,88,5500,25,31,12290 195 | -2,103,volvo,gas,std,four,sedan,rwd,front,104.30,188.80,67.20,56.20,2912,ohc,four,141,mpfi,3.78,3.15,9.50,114,5400,23,28,12940 196 | -1,74,volvo,gas,std,four,wagon,rwd,front,104.30,188.80,67.20,57.50,3034,ohc,four,141,mpfi,3.78,3.15,9.50,114,5400,23,28,13415 197 | -2,103,volvo,gas,std,four,sedan,rwd,front,104.30,188.80,67.20,56.20,2935,ohc,four,141,mpfi,3.78,3.15,9.50,114,5400,24,28,15985 198 | -1,74,volvo,gas,std,four,wagon,rwd,front,104.30,188.80,67.20,57.50,3042,ohc,four,141,mpfi,3.78,3.15,9.50,114,5400,24,28,16515 199 | -2,103,volvo,gas,turbo,four,sedan,rwd,front,104.30,188.80,67.20,56.20,3045,ohc,four,130,mpfi,3.62,3.15,7.50,162,5100,17,22,18420 200 | -1,74,volvo,gas,turbo,four,wagon,rwd,front,104.30,188.80,67.20,57.50,3157,ohc,four,130,mpfi,3.62,3.15,7.50,162,5100,17,22,18950 201 | -1,95,volvo,gas,std,four,sedan,rwd,front,109.10,188.80,68.90,55.50,2952,ohc,four,141,mpfi,3.78,3.15,9.50,114,5400,23,28,16845 202 | -1,95,volvo,gas,turbo,four,sedan,rwd,front,109.10,188.80,68.80,55.50,3049,ohc,four,141,mpfi,3.78,3.15,8.70,160,5300,19,25,19045 203 | -1,95,volvo,gas,std,four,sedan,rwd,front,109.10,188.80,68.90,55.50,3012,ohcv,six,173,mpfi,3.58,2.87,8.80,134,5500,18,23,21485 204 | -1,95,volvo,diesel,turbo,four,sedan,rwd,front,109.10,188.80,68.90,55.50,3217,ohc,six,145,idi,3.01,3.40,23.00,106,4800,26,27,22470 205 | -1,95,volvo,gas,turbo,four,sedan,rwd,front,109.10,188.80,68.90,55.50,3062,ohc,four,141,mpfi,3.78,3.15,9.50,114,5400,19,25,22625 206 | -------------------------------------------------------------------------------- /Predicting Bike Rentals/Basics.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "Many American cities have communal bike sharing stations where you can rent bicycles by the hour or day. Washington, D.C. is one of these cities. The District collects detailed data on the number of bicycles people rent by the hour and day.\n", 8 | "\n", 9 | "Hadi Fanaee-T at the University of Porto compiled this data into a CSV file, which you'll be working with in this project. The file contains 17380 rows, with each row representing the number of bike rentals for a single hour of a single day. You can download the data from the University of California, Irvine's website." 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "Here are the descriptions for the relevant columns:\n", 17 | "\n", 18 | "instant - A unique sequential ID number for each row\n", 19 | "dteday - The date of the rentals\n", 20 | "season - The season in which the rentals occurred\n", 21 | "yr - The year the rentals occurred\n", 22 | "mnth - The month the rentals occurred\n", 23 | "hr - The hour the rentals occurred\n", 24 | "holiday - Whether or not the day was a holiday\n", 25 | "weekday - The day of the week (as a number, 0 to 7)\n", 26 | "workingday - Whether or not the day was a working day\n", 27 | "weathersit - The weather (as a categorical variable)\n", 28 | "temp - The temperature, on a 0-1 scale\n", 29 | "atemp - The adjusted temperature\n", 30 | "hum - The humidity, on a 0-1 scale\n", 31 | "windspeed - The wind speed, on a 0-1 scale\n", 32 | "casual - The number of casual riders (people who hadn't previously signed up with the bike sharing program)\n", 33 | "registered - The number of registered riders (people who had already signed up)\n", 34 | "cnt - The total number of bike rentals (casual + registered)" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 5, 40 | "metadata": { 41 | "collapsed": false 42 | }, 43 | "outputs": [ 44 | { 45 | "data": { 46 | "text/html": [ 47 | "
\n", 48 | "\n", 61 | "\n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | "
instantdtedayseasonyrmnthhrholidayweekdayworkingdayweathersittempatemphumwindspeedcasualregisteredcnt
012011-01-01101006010.240.28790.810.031316
122011-01-01101106010.220.27270.800.083240
232011-01-01101206010.220.27270.800.052732
342011-01-01101306010.240.28790.750.031013
452011-01-01101406010.240.28790.750.0011
\n", 187 | "
" 188 | ], 189 | "text/plain": [ 190 | " instant dteday season yr mnth hr holiday weekday workingday \\\n", 191 | "0 1 2011-01-01 1 0 1 0 0 6 0 \n", 192 | "1 2 2011-01-01 1 0 1 1 0 6 0 \n", 193 | "2 3 2011-01-01 1 0 1 2 0 6 0 \n", 194 | "3 4 2011-01-01 1 0 1 3 0 6 0 \n", 195 | "4 5 2011-01-01 1 0 1 4 0 6 0 \n", 196 | "\n", 197 | " weathersit temp atemp hum windspeed casual registered cnt \n", 198 | "0 1 0.24 0.2879 0.81 0.0 3 13 16 \n", 199 | "1 1 0.22 0.2727 0.80 0.0 8 32 40 \n", 200 | "2 1 0.22 0.2727 0.80 0.0 5 27 32 \n", 201 | "3 1 0.24 0.2879 0.75 0.0 3 10 13 \n", 202 | "4 1 0.24 0.2879 0.75 0.0 0 1 1 " 203 | ] 204 | }, 205 | "execution_count": 5, 206 | "metadata": {}, 207 | "output_type": "execute_result" 208 | } 209 | ], 210 | "source": [ 211 | "import pandas as pd\n", 212 | "\n", 213 | "bike_rentals = pd.read_csv('bike_rental_hour.csv')\n", 214 | "\n", 215 | "bike_rentals.head()" 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": 6, 221 | "metadata": { 222 | "collapsed": false 223 | }, 224 | "outputs": [ 225 | { 226 | "data": { 227 | "text/plain": [ 228 | "(array([6972., 3705., 2659., 1660., 987., 663., 369., 188., 139.,\n", 229 | " 37.]),\n", 230 | " array([ 1. , 98.6, 196.2, 293.8, 391.4, 489. , 586.6, 684.2, 781.8,\n", 231 | " 879.4, 977. ]),\n", 232 | " )" 233 | ] 234 | }, 235 | "execution_count": 6, 236 | "metadata": {}, 237 | "output_type": "execute_result" 238 | }, 239 | { 240 | "data": { 241 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYcAAAEACAYAAABYq7oeAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAGx9JREFUeJzt3WFsW9Xh/vHHkILE0DwYi13ZXjKKk8ahaVMag7RpskJI\nukh1hihRuqpxCxNau4kGTYKxN2vf1GFITK3avpjW/JKgqVleNdEfQgIFS6yDmi5lm2ioF5FCcocT\ntoYQKCUtOf8XoXd0lzZpcWJDvh/pSr6n99x7zqnjR+f63muXMcYIAIDPuSbbDQAA5B7CAQDgQDgA\nABwIBwCAA+EAAHAgHAAADrOGQyqVUnl5uVavXq3y8nK53W7t2bNH4+Pjqq6uVnFxsWpqajQxMWHX\nicfjCgaDKikpUV9fn13e39+vsrIyFRUVqampaX56BAD40lxXcp/D9PS0/H6/jh49qr179+rb3/62\nHn30UT3xxBMaHx9Xc3OzTpw4oY0bN+q1117TyMiIqqqq9M9//lMul0t33nmn9u7dq4qKCtXW1mr7\n9u2qqamZz/4BAK7CFZ1WeuGFF7Rs2TIFAgF1dXUpFotJkmKxmA4dOiRJ6u7uVkNDg/Ly8lRYWKhg\nMKhkMql0Oq3JyUlVVFRIkhobG+06AIDcckXh8Kc//Uk/+clPJEmjo6PyeDySJK/Xq7GxMUmSZVkK\nBAJ2HZ/PJ8uyZFmW/H6/Xe73+2VZ1pfuAAAg8+YcDufOnVN3d7fuv/9+SZLL5bro3/93HQDw1ZU3\n1w17enp0xx136JZbbpEkeTwee/aQTqeVn58vaWamMDw8bNcbGRmRz+e7ZPkXIWgA4Opk6nF5c545\nHDx4UBs2bLDXo9GoWltbJUltbW2qq6uzyzs6OjQ1NaWhoSENDg4qHA7L6/XK7XYrmUzKGKP29na7\nzhczWVvc7godPXpUxpisL7/5zW+y3oZcWRgLxoKxuPySSXOaOZw5c0YvvPCCfv/739tljz32mOrr\n69XS0qKCggJ1dnZKkkKhkOrr6xUKhbRkyRLt37/fngns27dPmzdv1tmzZ1VbW6u1a9dmtDMAgMyY\nUzjccMMNeu+99y4qu/nmm/XCCy984faPP/64Hn/8cUf5HXfcoX/84x9X0UwAwELiDukcF4lEst2E\nnMFY/Bdj8V+Mxfy4opvgFsrMaajsNcvtDquvb6/C4XDW2gAAV8rlcmXsuwdmDgAAB8IBAOBAOAAA\nHAgHAIAD4QAAcCAcAAAOhAMAwIFwAAA4EA4AAAfCAQDgQDgAABwIBwCAA+EAAHAgHAAADoQDAMCB\ncAAAOBAOAAAHwgEA4EA4AAAcCAcAgAPhAABwmFM4TExM6P7771dJSYlKS0t19OhRjY+Pq7q6WsXF\nxaqpqdHExIS9fTweVzAYVElJifr6+uzy/v5+lZWVqaioSE1NTZnvDQAgI+YUDtu3b1dtba0GBgb0\nt7/9TcuXL1dzc7Oqqqp08uRJVVZWKh6PS5JOnDihzs5ODQwMqKenR9u2bZMxRpK0detWHThwQKlU\nSqlUSr29vfPXMwDAVZs1HD744AO9/PLL2rJliyQpLy9PbrdbXV1disVikqRYLKZDhw5Jkrq7u9XQ\n0KC8vDwVFhYqGAwqmUwqnU5rcnJSFRUVkqTGxka7DgAgt8waDkNDQ7rlllu0ZcsWrV69Wg899JDO\nnDmj0dFReTweSZLX69XY2JgkybIsBQIBu77P55NlWbIsS36/3y73+/2yLCvT/QEAZEDebBucP39e\n/f392rdvn9asWaNHHnlEzc3NcrlcF233v+tf3o7PvY58tgAALkgkEkokEvOy71nDwe/3KxAIaM2a\nNZKk++67T83NzfJ4PPbsIZ1OKz8/X9LMTGF4eNiuPzIyIp/Pd8nyS9txdT0CgEUiEokoEonY6zt3\n7szYvmc9reTxeBQIBJRKpSRJhw8fVmlpqaLRqFpbWyVJbW1tqqurkyRFo1F1dHRoampKQ0NDGhwc\nVDgcltfrldvtVjKZlDFG7e3tdh0AQG6ZdeYgSXv27NHGjRt17tw53Xrrrfq///s/ffrpp6qvr1dL\nS4sKCgrU2dkpSQqFQqqvr1coFNKSJUu0f/9++5TTvn37tHnzZp09e1a1tbVau3bt/PUMAHDVXObC\ndaY5ZCZMstcstzusvr69CofDWWsDAFwpl8ulTH2kc4c0AMCBcAAAOBAOAAAHwgEA4EA4AAAcCAcA\ngAPhAABwIBwAAA6EAwDAgXAAADgQDgAAB8IBAOBAOAAAHAgHAIAD4QAAcCAcAAAOhAMAwIFwAAA4\nEA4AAAfCAQDgQDgAABwIBwCAA+EAAHCYUzgUFhZq5cqVKi8vVzgcliSNj4+rurpaxcXFqqmp0cTE\nhL19PB5XMBhUSUmJ+vr67PL+/n6VlZWpqKhITU1NGe4KACBT5hQO11xzjRKJhI4fP65kMilJam5u\nVlVVlU6ePKnKykrF43FJ0okTJ9TZ2amBgQH19PRo27ZtMsZIkrZu3aoDBw4olUoplUqpt7d3nroF\nAPgy5hQOxhhNT09fVNbV1aVYLCZJisViOnTokCSpu7tbDQ0NysvLU2FhoYLBoJLJpNLptCYnJ1VR\nUSFJamxstOsAAHLLnMLB5XLpnnvuUUVFhf7whz9IkkZHR+XxeCRJXq9XY2NjkiTLshQIBOy6Pp9P\nlmXJsiz5/X673O/3y7KsjHUEAJA5eXPZ6MiRI1q6dKnee+89+3sGl8t10Tb/u/7l7fjc68hnCwDg\ngkQioUQiMS/7nlM4LF26VJL0ne98Rz/+8Y+VTCbl8Xjs2UM6nVZ+fr6kmZnC8PCwXXdkZEQ+n++S\n5Ze248p7AwCLSCQSUSQSsdd37tyZsX3PelrpzJkz+vDDDyVJH330kfr6+rRixQpFo1G1trZKktra\n2lRXVydJikaj6ujo0NTUlIaGhjQ4OKhwOCyv1yu3261kMiljjNrb2+06AIDcMuvMYXR0VPfee69c\nLpfOnz+vjRs3qrq6WmvWrFF9fb1aWlpUUFCgzs5OSVIoFFJ9fb1CoZCWLFmi/fv326ec9u3bp82b\nN+vs2bOqra3V2rVr57d3AICr4jIXrjPNITNhkr1mud1h9fXtte/pAICvApfLpUx9pHOHNADAgXAA\nADgQDgAAB8IBAOBAOAAAHAgHAIAD4QAAcCAcAAAOhAMAwIFwAAA4EA4AAAfCAQDgQDgAABwIBwCA\nA+EAAHAgHAAADoQDAMCBcAAAOBAOAAAHwgEA4EA4AAAcCAcAgAPhAABwmHM4TE9Pa/Xq1YpGo5Kk\n8fFxVVdXq7i4WDU1NZqYmLC3jcfjCgaDKikpUV9fn13e39+vsrIyFRUVqampKYPdAABk0pzDYffu\n3QqFQvZ6c3OzqqqqdPLkSVVWVioej0uSTpw4oc7OTg0MDKinp0fbtm2TMUaStHXrVh04cECpVEqp\nVEq9vb0Z7g4AIBPmFA4jIyN69tln9dOf/tQu6+rqUiwWkyTFYjEdOnRIktTd3a2Ghgbl5eWpsLBQ\nwWBQyWRS6XRak5OTqqiokCQ1NjbadQAAuWVO4fDII4/oySeflMvlsstGR0fl8XgkSV6vV2NjY5Ik\ny7IUCATs7Xw+nyzLkmVZ8vv9drnf75dlWRnpBAAgs/Jm2+CZZ56Rx+PRqlWrlEgkLrnd54MjM3Z8\n7nXkswUAcEEikbjs5/KXMWs4HDlyRN3d3Xr22Wf18ccfa3JyUps2bZLX67VnD+l0Wvn5+ZJmZgrD\nw8N2/ZGREfl8vkuWX9qOq+4UACwGkUhEkUjEXt+5c2fG9j3raaVdu3bpnXfe0VtvvaWOjg5VVlbq\n6aef1rp169Ta2ipJamtrU11dnSQpGo2qo6NDU1NTGhoa0uDgoMLhsLxer9xut5LJpIwxam9vt+sA\nAHLLrDOHS/nVr36l+vp6tbS0qKCgQJ2dnZKkUCik+vp6hUIhLVmyRPv377dPOe3bt0+bN2/W2bNn\nVVtbq7Vr12amFwCAjHKZC9eZ5pCZMMles9zusPr69iocDmetDQBwpVwulzL1kc4d0gAAB8IBAOBA\nOAAAHAgHAIAD4QAAcOBqpS/gdod17bWWTp/+V9ba4PEUKJ0+lbXjA/jqyeTVSld9n8PX3UwwZC+g\nRkcz/TgSAJg7TisBABwIBwCAA+EAAHAgHAAADoQDAMCBcAAAOBAOAAAHwgEA4EA4AAAcCAcAgAPh\nAABwIBwAAA6EAwDAgXAAADgQDgAAB8IBAOAwazh88sknuvPOO1VeXq7S0lL9+te/liSNj4+rurpa\nxcXFqqmp0cTEhF0nHo8rGAyqpKREfX19dnl/f7/KyspUVFSkpqameegOACATZg2H66+/Xi+99JKO\nHz+uv//973rxxRd15MgRNTc3q6qqSidPnlRlZaXi8bgk6cSJE+rs7NTAwIB6enq0bds2+2frtm7d\nqgMHDiiVSimVSqm3t3d+ewcAuCpzOq10ww03SJqZRUxPT+umm25SV1eXYrGYJCkWi+nQoUOSpO7u\nbjU0NCgvL0+FhYUKBoNKJpNKp9OanJxURUWFJKmxsdGuAwDILXMKh+npaZWXl8vr9SoSiSgUCml0\ndFQej0eS5PV6NTY2JkmyLEuBQMCu6/P5ZFmWLMuS3++3y/1+vyzLymRfAAAZkjeXja655hodP35c\nH3zwgWpqapRIJORyuS7a5n/Xv7wdn3sd+WwBAFyQSCSUSCTmZd9zCocLvvnNb6q2tlbHjh2Tx+Ox\nZw/pdFr5+fmSZmYKw8PDdp2RkRH5fL5Lll/ajivqCAAsNpFIRJFIxF7fuXNnxvY962mlf//73/aV\nSB9//LGef/55lZeXKxqNqrW1VZLU1tamuro6SVI0GlVHR4empqY0NDSkwcFBhcNheb1eud1uJZNJ\nGWPU3t5u1wEA5JZZZw7vvvuuYrGYjDGanp7Wpk2bdPfdd6u8vFz19fVqaWlRQUGBOjs7JUmhUEj1\n9fUKhUJasmSJ9u/fb59y2rdvnzZv3qyzZ8+qtrZWa9eund/eAQCuistcuM40h8yESfaa5XaHNTHx\nWlbbILmUg/81AHKYy5W5zw3ukAYAOBAOAAAHwgEA4EA4AAAcCAcAgAPhAABwuKI7pLGQrp+HR5Jc\nGY+nQOn0qay2AUB2EA456xNl9z4LaXQ0u+EEIHs4rQQAcCAcAAAOhAMAwIFwAAA4EA4AAAfCAQDg\nQDgAABwIBwCAA+EAAHAgHAAADoQDAMCBcAAAOBAOAAAHwgEA4EA4AAAcZg2HkZERVVZWqrS0VCtW\nrNCePXskSePj46qurlZxcbFqamo0MTFh14nH4woGgyopKVFfX59d3t/fr7KyMhUVFampqWkeugMA\nyIRZwyEvL09PPfWU3njjDb3yyivat2+f3nzzTTU3N6uqqkonT55UZWWl4vG4JOnEiRPq7OzUwMCA\nenp6tG3bNhkz86M1W7du1YEDB5RKpZRKpdTb2zu/vQMAXJVZw8Hr9WrVqlWSpBtvvFElJSUaGRlR\nV1eXYrGYJCkWi+nQoUOSpO7ubjU0NCgvL0+FhYUKBoNKJpNKp9OanJxURUWFJKmxsdGuAwDILVf0\nncOpU6f0+uuv66677tLo6Kg8Ho+kmQAZGxuTJFmWpUAgYNfx+XyyLEuWZcnv99vlfr9flmVlog8A\ngAyb829If/jhh1q/fr12796tG2+8US7Xxb8v/L/rX96Oz72OfLYAAC5IJBJKJBLzsu85hcP58+e1\nfv16bdq0SXV1dZIkj8djzx7S6bTy8/MlzcwUhoeH7bojIyPy+XyXLL+0HVfeGwBYRCKRiCKRiL2+\nc+fOjO17TqeVHnjgAYVCIW3fvt0ui0ajam1tlSS1tbXZoRGNRtXR0aGpqSkNDQ1pcHBQ4XBYXq9X\nbrdbyWRSxhi1t7fbdQAAucVlLlxKdAlHjhzRD3/4Q61YsUIul0sul0u7du1SOBxWfX29hoeHVVBQ\noM7OTn3rW9+SNHMp64EDB7RkyRLt3r1b1dXVkqS//vWv2rx5s86ePava2lrt3r37ixvlckm6bLPm\nldsd1sTEa1ltg5TdMbjQhlneHgByiMuVub/ZWcMhGwgHiXAAcKUyGQ7cIQ0AcCAcAAAOhAMAwIFw\nAAA4EA4AAAfCAQDgMOfHZ2Axun4eHotyZTyeAqXTp7LaBmAxIhxwGZ8o2/dajI5mN5yAxYrTSgAA\nB8IBAOBAOAAAHAgHAIAD4QAAcCAcAAAOhAMAwIFwAAA4EA4AAAfCAQDgQDgAABwIBwCAA+EAAHAg\nHAAADoQDAMBh1nB48MEH5fF4VFZWZpeNj4+rurpaxcXFqqmp0cTEhP1v8XhcwWBQJSUl6uvrs8v7\n+/tVVlamoqIiNTU1ZbgbAIBMmjUctmzZot7e3ovKmpubVVVVpZMnT6qyslLxeFySdOLECXV2dmpg\nYEA9PT3atm2bjJn5sZitW7fqwIEDSqVSSqVSjn0CAHLHrOHwgx/8QDfddNNFZV1dXYrFYpKkWCym\nQ4cOSZK6u7vV0NCgvLw8FRYWKhgMKplMKp1Oa3JyUhUVFZKkxsZGuw4AIPdc1XcOY2Nj8ng8kiSv\n16uxsTFJkmVZCgQC9nY+n0+WZcmyLPn9frvc7/fLsqwv024AwDzKyG9Iz8+P0O/43OvIZwsWn+vn\n6f01Nx5PgdLpU1k7PnA5iURCiURiXvZ9VeHg8Xg0Ojoqj8ejdDqt/Px8STMzheHhYXu7kZER+Xy+\nS5Zf3o6raRq+dj6RZLJ29NHR7AUTMJtIJKJIJGKv79y5M2P7ntNpJWOM/cWyJEWjUbW2tkqS2tra\nVFdXZ5d3dHRoampKQ0NDGhwcVDgcltfrldvtVjKZlDFG7e3tdh0AQA4ys9iwYYNZunSpue6660wg\nEDAtLS3m9OnT5u677zZFRUXmnnvuMePj4/b2u3btMsuWLTPLly83vb29dvmxY8fM7bffbm677Tbz\n8MMPX/aYkoxksra43RVZb0P2j08bLhwf+KrI5PvV9dkOc8rMOebsNcvtDmti4rWstkHK7hjQhv8e\nPwf/RIAv5HJl7v3KHdIAAAfCAQDgQDgAABwIBwCAA+EAAHAgHAAADhl5fAbw9ZXdx3dIPMID2UE4\nAJeV3cd3SDzCA9nBaSUAgAPhAABwIBwAAA6EAwDAgXAAADgQDgAAB8IBAODAfQ5AzuNGPCw8wgHI\nedyIh4XHaSUAgAPhAABwIBwAAA6EAwDAgS+kAcxBdq+Y4mqphbfgM4fnnntOy5cvV1FRkZ544omF\nPjyAq3LhiqnsLKOjablcrqwuXm/h/A9zDlnQcJientYvfvEL9fb26o033tDBgwf15ptvLmQTvoIS\n2W5ADklkuwE5JJHtBiywy4XTS5f5t0wG1Nvz380csqDhkEwmFQwGVVBQoCVLlqihoUFdXV0L2YSv\noES2G5BDEtluQA5JZLsBOSSR7QZ8LS1oOFiWpUAgYK/7/X5ZlrWQTQAAzEHOfiH9zW+uy9qxP/44\nlbVjA8hVi+sxJgsaDj6fT++88469PjIyIp/P94XbfvDB/1uoZl1Gth8ZcOH4O3OgDdn0+TZkYyxy\nbQwuWOixyPY4XO742fwbWTijo28vWEC5jDEL9tCWTz/9VMXFxTp8+LCWLl2qcDisgwcPqqSkZKGa\nAACYgwWdOVx77bXau3evqqurNT09rQcffJBgAIActKAzBwDAV0NOPT5jsd0gNzIyosrKSpWWlmrF\nihXas2ePJGl8fFzV1dUqLi5WTU2NJiYm7DrxeFzBYFAlJSXq6+vLVtPnxfT0tFavXq1oNCpp8Y6D\nJE1MTOj+++9XSUmJSktLdfTo0UU7HvF4XKWlpSorK9PGjRs1NTW1aMbiwQcflMfjUVlZmV12NX3v\n7+9XWVmZioqK1NTUNLeDmxzx6aefmmXLlplTp06Zqakps3LlSjMwMJDtZs2rd9991xw/ftwYY8zk\n5KQpKioyAwMD5tFHHzVPPPGEMcaY5uZm89hjjxljjHnjjTfMqlWrzLlz58zQ0JBZtmyZmZ6ezlr7\nM+2pp54yGzduNOvWrTPGmEU7DsYYE4vFTEtLizHGmHPnzpn3339/UY7HqVOnzPe+9z3zySefGGOM\nqa+vN62trYtmLF5++WVz/Phxs2LFCrvsavoeDodNMpk0xhjzox/9yDz33HOzHjtnwuGVV14xa9eu\ntdfj8bhpbm7OYosWXl1dnXn++edNcXGxSafTxpiZACkuLjbGOMdk7dq15tVXX81KWzNteHjYVFVV\nmZdeeskOh8U4DsYYMzExYW699VZH+WIcj9OnT5vi4mJz+vRpc+7cObNu3bpF9zdy6tSpi8LhSvv+\n7rvvmpKSErv84MGD5mc/+9msx82Z00qL/Qa5U6dO6fXXX9ddd92l0dFReTweSZLX69XY2Jgk5xj5\nfL6vzRg98sgjevLJJy+6TG8xjoMkDQ0N6ZZbbtGWLVu0evVqPfTQQzpz5syiHI+bbrpJv/zlL/Xd\n735XPp9PbrdbVVVVi3IsLhgbG7uivluWJb/fb5fP9bM1Z8JhMfvwww+1fv167d69WzfeeKPjOuZs\n33gz35555hl5PB6tWrVK5jLXR3zdx+GC8+fPq7+/Xz//+c/V39+vb3zjG2publ507wtJeuutt/S7\n3/1Ob7/9tv71r3/po48+0h//+MdFORaXMl99z5lwuJIb5L5Ozp8/r/Xr12vTpk2qq6uTJHk8Ho2O\njkqS0um08vPzJc2M0fDwsF336zJGR44cUXd3t2699VZt2LBBL774ojZt2iSv17uoxuECv9+vQCCg\nNWvWSJLuu+8+9ff3L7r3hSQdO3ZM3//+93XzzTfr2muv1b333qu//OUvi3IsLrjSvl/tmORMOFRU\nVGhwcFBvv/22pqam1NHRYV+18nX2wAMPKBQKafv27XZZNBpVa2urJKmtrc0OjWg0qo6ODk1NTWlo\naEiDg4MKh8PZaHZG7dq1S++8847eeustdXR0qLKyUk8//bTWrVu3qMbhAo/Ho0AgoFRq5jEuhw8f\nVmlp6aJ7X0hScXGxXn31VZ09e1bGGB0+fFihUGhRjYWZ+W7YXr/Svnu9XrndbiWTSRlj1N7ebteZ\n7cA5o6enxxQVFZnbbrvNxOPxbDdn3v35z38211xzjVm5cqVZtWqVKS8vNz09PeY///mPufvuu01R\nUZG55557zPj4uF1n165dZtmyZWb58uWmt7c3i62fH4lEwv5CejGPw+uvv27WrFljVq5cae69917z\n/vvvL9rx+O1vf2tCoZBZsWKFaWxsNFNTU4tmLDZs2GCWLl1qrrvuOhMIBExLS4s5ffr0Fff92LFj\n5vbbbze33Xabefjhh+d0bG6CAwA45MxpJQBA7iAcAAAOhAMAwIFwAAA4EA4AAAfCAQDgQDgAABwI\nBwCAw/8HkCb6D0gnLtwAAAAASUVORK5CYII=\n", 242 | "text/plain": [ 243 | "" 244 | ] 245 | }, 246 | "metadata": {}, 247 | "output_type": "display_data" 248 | } 249 | ], 250 | "source": [ 251 | "%matplotlib inline\n", 252 | "\n", 253 | "import matplotlib.pyplot as plt\n", 254 | "\n", 255 | "plt.hist(bike_rentals[\"cnt\"])" 256 | ] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": 8, 261 | "metadata": { 262 | "collapsed": false 263 | }, 264 | "outputs": [ 265 | { 266 | "data": { 267 | "text/plain": [ 268 | "instant 0.278379\n", 269 | "season 0.178056\n", 270 | "yr 0.250495\n", 271 | "mnth 0.120638\n", 272 | "hr 0.394071\n", 273 | "holiday -0.030927\n", 274 | "weekday 0.026900\n", 275 | "workingday 0.030284\n", 276 | "weathersit -0.142426\n", 277 | "temp 0.404772\n", 278 | "atemp 0.400929\n", 279 | "hum -0.322911\n", 280 | "windspeed 0.093234\n", 281 | "casual 0.694564\n", 282 | "registered 0.972151\n", 283 | "cnt 1.000000\n", 284 | "Name: cnt, dtype: float64" 285 | ] 286 | }, 287 | "execution_count": 8, 288 | "metadata": {}, 289 | "output_type": "execute_result" 290 | } 291 | ], 292 | "source": [ 293 | "bike_rentals.corr()['cnt']" 294 | ] 295 | }, 296 | { 297 | "cell_type": "markdown", 298 | "metadata": {}, 299 | "source": [ 300 | "As we already know from description that cnt is a sum of casual and registered column it will obviously show a high correlation, what is interesting is to see how temperature contributes to the value of cnt and we would have assumed that a holiday might be a contributing factor in deciding the number of registration, by the above table we can clearly see that thats not the case" 301 | ] 302 | }, 303 | { 304 | "cell_type": "markdown", 305 | "metadata": {}, 306 | "source": [ 307 | "It can often be helpful to calculate features before applying machine learning models. Features can enhance the accuracy of models by introducing new information, or distilling existing information.\n", 308 | "\n", 309 | "For example, the hr column in bike_rentals contains the hours during which bikes are rented, from 1 to 24. A machine will treat each hour differently, without understanding that certain hours are related. We can introduce some order into the process by creating a new column with labels for morning, afternoon, evening, and night. This will bundle similar times together, enabling the model to make better decisions" 310 | ] 311 | }, 312 | { 313 | "cell_type": "code", 314 | "execution_count": 13, 315 | "metadata": { 316 | "collapsed": false 317 | }, 318 | "outputs": [], 319 | "source": [ 320 | "def assign_label(hour):\n", 321 | " if hour >=0 and hour < 6:\n", 322 | " return 4\n", 323 | " elif hour >=6 and hour < 12:\n", 324 | " return 1\n", 325 | " elif hour >= 12 and hour < 18:\n", 326 | " return 2\n", 327 | " elif hour >= 18 and hour <=24:\n", 328 | " return 3\n", 329 | "\n", 330 | "bike_rentals[\"time_label\"] = bike_rentals[\"hr\"].apply(assign_label)\n" 331 | ] 332 | }, 333 | { 334 | "cell_type": "markdown", 335 | "metadata": {}, 336 | "source": [ 337 | "# Error metric¶\n", 338 | "The mean squared error metric makes the most sense to evaluate our error. MSE works on continuous numeric data, which fits our data quite well." 339 | ] 340 | }, 341 | { 342 | "cell_type": "markdown", 343 | "metadata": {}, 344 | "source": [ 345 | "Before we begin applying machine learning algorithms, we'll need to split the data into training and testing sets. This will enable us to train an algorithm using the training set, and evaluate its accuracy on the testing set. If we train an algorithm on the training data, then evaluate its performance on the same data, we can get an unrealistically low error value, due to overfitting." 346 | ] 347 | }, 348 | { 349 | "cell_type": "code", 350 | "execution_count": 16, 351 | "metadata": { 352 | "collapsed": true 353 | }, 354 | "outputs": [], 355 | "source": [ 356 | "#let split 80% of our data to train set and remaning 20% to test set\n", 357 | "\n", 358 | "train = bike_rentals.sample(frac=.8)\n", 359 | "\n", 360 | "test = bike_rentals.loc[~bike_rentals.index.isin(train.index)]" 361 | ] 362 | }, 363 | { 364 | "cell_type": "markdown", 365 | "metadata": {}, 366 | "source": [ 367 | "Now that we've done some exploration and manipulation, we're ready to apply linear regression to the data. Linear regression will probably work fairly well on this data, given that many of the columns are highly correlated with cnt." 368 | ] 369 | }, 370 | { 371 | "cell_type": "code", 372 | "execution_count": 17, 373 | "metadata": { 374 | "collapsed": false 375 | }, 376 | "outputs": [ 377 | { 378 | "data": { 379 | "text/plain": [ 380 | "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)" 381 | ] 382 | }, 383 | "execution_count": 17, 384 | "metadata": {}, 385 | "output_type": "execute_result" 386 | } 387 | ], 388 | "source": [ 389 | "\n", 390 | "from sklearn.linear_model import LinearRegression\n", 391 | "\n", 392 | "predictors = list(train.columns)\n", 393 | "predictors.remove(\"cnt\")\n", 394 | "predictors.remove(\"casual\")\n", 395 | "predictors.remove(\"registered\")\n", 396 | "predictors.remove(\"dteday\")\n", 397 | "\n", 398 | "reg = LinearRegression()\n", 399 | "\n", 400 | "reg.fit(train[predictors], train[\"cnt\"])" 401 | ] 402 | }, 403 | { 404 | "cell_type": "code", 405 | "execution_count": 18, 406 | "metadata": { 407 | "collapsed": false 408 | }, 409 | "outputs": [ 410 | { 411 | "data": { 412 | "text/plain": [ 413 | "17468.19856186649" 414 | ] 415 | }, 416 | "execution_count": 18, 417 | "metadata": {}, 418 | "output_type": "execute_result" 419 | } 420 | ], 421 | "source": [ 422 | "\n", 423 | "import numpy\n", 424 | "predictions = reg.predict(test[predictors])\n", 425 | "\n", 426 | "numpy.mean((predictions - test[\"cnt\"]) ** 2)" 427 | ] 428 | }, 429 | { 430 | "cell_type": "markdown", 431 | "metadata": {}, 432 | "source": [ 433 | "# Error\n", 434 | "The error is very high, which may be due to the fact that the data has a few extremely high rental counts, but otherwise mostly low counts. Larger errors are penalized more with MSE, which leads to a higher total error." 435 | ] 436 | }, 437 | { 438 | "cell_type": "markdown", 439 | "metadata": {}, 440 | "source": [ 441 | "Now lets apply the decision tree algorithm. You'll be able to compare its error with the error from linear regression, which will enable you to pick the right algorithm for this data set." 442 | ] 443 | }, 444 | { 445 | "cell_type": "code", 446 | "execution_count": 19, 447 | "metadata": { 448 | "collapsed": false 449 | }, 450 | "outputs": [ 451 | { 452 | "data": { 453 | "text/plain": [ 454 | "DecisionTreeRegressor(criterion='mse', max_depth=None, max_features=None,\n", 455 | " max_leaf_nodes=None, min_impurity_split=1e-07,\n", 456 | " min_samples_leaf=5, min_samples_split=2,\n", 457 | " min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n", 458 | " splitter='best')" 459 | ] 460 | }, 461 | "execution_count": 19, 462 | "metadata": {}, 463 | "output_type": "execute_result" 464 | } 465 | ], 466 | "source": [ 467 | "from sklearn.tree import DecisionTreeRegressor\n", 468 | "\n", 469 | "reg = DecisionTreeRegressor(min_samples_leaf=5)\n", 470 | "\n", 471 | "reg.fit(train[predictors], train[\"cnt\"])" 472 | ] 473 | }, 474 | { 475 | "cell_type": "code", 476 | "execution_count": 20, 477 | "metadata": { 478 | "collapsed": false 479 | }, 480 | "outputs": [ 481 | { 482 | "data": { 483 | "text/plain": [ 484 | "2633.0851704678644" 485 | ] 486 | }, 487 | "execution_count": 20, 488 | "metadata": {}, 489 | "output_type": "execute_result" 490 | } 491 | ], 492 | "source": [ 493 | "predictions = reg.predict(test[predictors])\n", 494 | "\n", 495 | "numpy.mean((predictions - test[\"cnt\"]) ** 2)" 496 | ] 497 | }, 498 | { 499 | "cell_type": "code", 500 | "execution_count": 21, 501 | "metadata": { 502 | "collapsed": false 503 | }, 504 | "outputs": [ 505 | { 506 | "data": { 507 | "text/plain": [ 508 | "2855.7447177470913" 509 | ] 510 | }, 511 | "execution_count": 21, 512 | "metadata": {}, 513 | "output_type": "execute_result" 514 | } 515 | ], 516 | "source": [ 517 | "# lets try and see if we get better result with a different sample value \n", 518 | "\n", 519 | "reg = DecisionTreeRegressor(min_samples_leaf=2)\n", 520 | "\n", 521 | "reg.fit(train[predictors], train[\"cnt\"])\n", 522 | "\n", 523 | "predictions = reg.predict(test[predictors])\n", 524 | "\n", 525 | "numpy.mean((predictions - test[\"cnt\"]) ** 2)" 526 | ] 527 | }, 528 | { 529 | "cell_type": "markdown", 530 | "metadata": {}, 531 | "source": [ 532 | "# Decision tree error\n", 533 | "By taking the nonlinear predictors into account, the decision tree regressor appears to have much higher accuracy than linear regression." 534 | ] 535 | }, 536 | { 537 | "cell_type": "markdown", 538 | "metadata": {}, 539 | "source": [ 540 | "We can now apply the random forest algorithm, which improves on the decision tree algorithm. Random forests tend to be much more accurate than simple models like linear regression. Due to the way random forests are constructed, they tend to overfit much less than decision trees. Random forests can still be prone to overfitting, though, so it's important to tune parameters like maximum depth and minimum samples per leaf." 541 | ] 542 | }, 543 | { 544 | "cell_type": "code", 545 | "execution_count": null, 546 | "metadata": { 547 | "collapsed": true 548 | }, 549 | "outputs": [], 550 | "source": [] 551 | } 552 | ], 553 | "metadata": { 554 | "kernelspec": { 555 | "display_name": "Python 3", 556 | "language": "python", 557 | "name": "python3" 558 | }, 559 | "language_info": { 560 | "codemirror_mode": { 561 | "name": "ipython", 562 | "version": 3 563 | }, 564 | "file_extension": ".py", 565 | "mimetype": "text/x-python", 566 | "name": "python", 567 | "nbconvert_exporter": "python", 568 | "pygments_lexer": "ipython3", 569 | "version": "3.4.3" 570 | } 571 | }, 572 | "nbformat": 4, 573 | "nbformat_minor": 0 574 | } 575 | -------------------------------------------------------------------------------- /Prediciting Titanic Survival/test.csv: -------------------------------------------------------------------------------- 1 | PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked 2 | 892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q 3 | 893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47,1,0,363272,7,,S 4 | 894,2,"Myles, Mr. Thomas Francis",male,62,0,0,240276,9.6875,,Q 5 | 895,3,"Wirz, Mr. Albert",male,27,0,0,315154,8.6625,,S 6 | 896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22,1,1,3101298,12.2875,,S 7 | 897,3,"Svensson, Mr. Johan Cervin",male,14,0,0,7538,9.225,,S 8 | 898,3,"Connolly, Miss. Kate",female,30,0,0,330972,7.6292,,Q 9 | 899,2,"Caldwell, Mr. Albert Francis",male,26,1,1,248738,29,,S 10 | 900,3,"Abrahim, Mrs. Joseph (Sophie Halaut Easu)",female,18,0,0,2657,7.2292,,C 11 | 901,3,"Davies, Mr. John Samuel",male,21,2,0,A/4 48871,24.15,,S 12 | 902,3,"Ilieff, Mr. Ylio",male,,0,0,349220,7.8958,,S 13 | 903,1,"Jones, Mr. Charles Cresson",male,46,0,0,694,26,,S 14 | 904,1,"Snyder, Mrs. John Pillsbury (Nelle Stevenson)",female,23,1,0,21228,82.2667,B45,S 15 | 905,2,"Howard, Mr. Benjamin",male,63,1,0,24065,26,,S 16 | 906,1,"Chaffee, Mrs. Herbert Fuller (Carrie Constance Toogood)",female,47,1,0,W.E.P. 5734,61.175,E31,S 17 | 907,2,"del Carlo, Mrs. Sebastiano (Argenia Genovesi)",female,24,1,0,SC/PARIS 2167,27.7208,,C 18 | 908,2,"Keane, Mr. Daniel",male,35,0,0,233734,12.35,,Q 19 | 909,3,"Assaf, Mr. Gerios",male,21,0,0,2692,7.225,,C 20 | 910,3,"Ilmakangas, Miss. Ida Livija",female,27,1,0,STON/O2. 3101270,7.925,,S 21 | 911,3,"Assaf Khalil, Mrs. Mariana (Miriam"")""",female,45,0,0,2696,7.225,,C 22 | 912,1,"Rothschild, Mr. Martin",male,55,1,0,PC 17603,59.4,,C 23 | 913,3,"Olsen, Master. Artur Karl",male,9,0,1,C 17368,3.1708,,S 24 | 914,1,"Flegenheim, Mrs. Alfred (Antoinette)",female,,0,0,PC 17598,31.6833,,S 25 | 915,1,"Williams, Mr. Richard Norris II",male,21,0,1,PC 17597,61.3792,,C 26 | 916,1,"Ryerson, Mrs. Arthur Larned (Emily Maria Borie)",female,48,1,3,PC 17608,262.375,B57 B59 B63 B66,C 27 | 917,3,"Robins, Mr. Alexander A",male,50,1,0,A/5. 3337,14.5,,S 28 | 918,1,"Ostby, Miss. Helene Ragnhild",female,22,0,1,113509,61.9792,B36,C 29 | 919,3,"Daher, Mr. Shedid",male,22.5,0,0,2698,7.225,,C 30 | 920,1,"Brady, Mr. John Bertram",male,41,0,0,113054,30.5,A21,S 31 | 921,3,"Samaan, Mr. Elias",male,,2,0,2662,21.6792,,C 32 | 922,2,"Louch, Mr. Charles Alexander",male,50,1,0,SC/AH 3085,26,,S 33 | 923,2,"Jefferys, Mr. Clifford Thomas",male,24,2,0,C.A. 31029,31.5,,S 34 | 924,3,"Dean, Mrs. Bertram (Eva Georgetta Light)",female,33,1,2,C.A. 2315,20.575,,S 35 | 925,3,"Johnston, Mrs. Andrew G (Elizabeth Lily"" Watson)""",female,,1,2,W./C. 6607,23.45,,S 36 | 926,1,"Mock, Mr. Philipp Edmund",male,30,1,0,13236,57.75,C78,C 37 | 927,3,"Katavelas, Mr. Vassilios (Catavelas Vassilios"")""",male,18.5,0,0,2682,7.2292,,C 38 | 928,3,"Roth, Miss. Sarah A",female,,0,0,342712,8.05,,S 39 | 929,3,"Cacic, Miss. Manda",female,21,0,0,315087,8.6625,,S 40 | 930,3,"Sap, Mr. Julius",male,25,0,0,345768,9.5,,S 41 | 931,3,"Hee, Mr. Ling",male,,0,0,1601,56.4958,,S 42 | 932,3,"Karun, Mr. Franz",male,39,0,1,349256,13.4167,,C 43 | 933,1,"Franklin, Mr. Thomas Parham",male,,0,0,113778,26.55,D34,S 44 | 934,3,"Goldsmith, Mr. Nathan",male,41,0,0,SOTON/O.Q. 3101263,7.85,,S 45 | 935,2,"Corbett, Mrs. Walter H (Irene Colvin)",female,30,0,0,237249,13,,S 46 | 936,1,"Kimball, Mrs. Edwin Nelson Jr (Gertrude Parsons)",female,45,1,0,11753,52.5542,D19,S 47 | 937,3,"Peltomaki, Mr. Nikolai Johannes",male,25,0,0,STON/O 2. 3101291,7.925,,S 48 | 938,1,"Chevre, Mr. Paul Romaine",male,45,0,0,PC 17594,29.7,A9,C 49 | 939,3,"Shaughnessy, Mr. Patrick",male,,0,0,370374,7.75,,Q 50 | 940,1,"Bucknell, Mrs. William Robert (Emma Eliza Ward)",female,60,0,0,11813,76.2917,D15,C 51 | 941,3,"Coutts, Mrs. William (Winnie Minnie"" Treanor)""",female,36,0,2,C.A. 37671,15.9,,S 52 | 942,1,"Smith, Mr. Lucien Philip",male,24,1,0,13695,60,C31,S 53 | 943,2,"Pulbaum, Mr. Franz",male,27,0,0,SC/PARIS 2168,15.0333,,C 54 | 944,2,"Hocking, Miss. Ellen Nellie""""",female,20,2,1,29105,23,,S 55 | 945,1,"Fortune, Miss. Ethel Flora",female,28,3,2,19950,263,C23 C25 C27,S 56 | 946,2,"Mangiavacchi, Mr. Serafino Emilio",male,,0,0,SC/A.3 2861,15.5792,,C 57 | 947,3,"Rice, Master. Albert",male,10,4,1,382652,29.125,,Q 58 | 948,3,"Cor, Mr. Bartol",male,35,0,0,349230,7.8958,,S 59 | 949,3,"Abelseth, Mr. Olaus Jorgensen",male,25,0,0,348122,7.65,F G63,S 60 | 950,3,"Davison, Mr. Thomas Henry",male,,1,0,386525,16.1,,S 61 | 951,1,"Chaudanson, Miss. Victorine",female,36,0,0,PC 17608,262.375,B61,C 62 | 952,3,"Dika, Mr. Mirko",male,17,0,0,349232,7.8958,,S 63 | 953,2,"McCrae, Mr. Arthur Gordon",male,32,0,0,237216,13.5,,S 64 | 954,3,"Bjorklund, Mr. Ernst Herbert",male,18,0,0,347090,7.75,,S 65 | 955,3,"Bradley, Miss. Bridget Delia",female,22,0,0,334914,7.725,,Q 66 | 956,1,"Ryerson, Master. John Borie",male,13,2,2,PC 17608,262.375,B57 B59 B63 B66,C 67 | 957,2,"Corey, Mrs. Percy C (Mary Phyllis Elizabeth Miller)",female,,0,0,F.C.C. 13534,21,,S 68 | 958,3,"Burns, Miss. Mary Delia",female,18,0,0,330963,7.8792,,Q 69 | 959,1,"Moore, Mr. Clarence Bloomfield",male,47,0,0,113796,42.4,,S 70 | 960,1,"Tucker, Mr. Gilbert Milligan Jr",male,31,0,0,2543,28.5375,C53,C 71 | 961,1,"Fortune, Mrs. Mark (Mary McDougald)",female,60,1,4,19950,263,C23 C25 C27,S 72 | 962,3,"Mulvihill, Miss. Bertha E",female,24,0,0,382653,7.75,,Q 73 | 963,3,"Minkoff, Mr. Lazar",male,21,0,0,349211,7.8958,,S 74 | 964,3,"Nieminen, Miss. Manta Josefina",female,29,0,0,3101297,7.925,,S 75 | 965,1,"Ovies y Rodriguez, Mr. Servando",male,28.5,0,0,PC 17562,27.7208,D43,C 76 | 966,1,"Geiger, Miss. Amalie",female,35,0,0,113503,211.5,C130,C 77 | 967,1,"Keeping, Mr. Edwin",male,32.5,0,0,113503,211.5,C132,C 78 | 968,3,"Miles, Mr. Frank",male,,0,0,359306,8.05,,S 79 | 969,1,"Cornell, Mrs. Robert Clifford (Malvina Helen Lamson)",female,55,2,0,11770,25.7,C101,S 80 | 970,2,"Aldworth, Mr. Charles Augustus",male,30,0,0,248744,13,,S 81 | 971,3,"Doyle, Miss. Elizabeth",female,24,0,0,368702,7.75,,Q 82 | 972,3,"Boulos, Master. Akar",male,6,1,1,2678,15.2458,,C 83 | 973,1,"Straus, Mr. Isidor",male,67,1,0,PC 17483,221.7792,C55 C57,S 84 | 974,1,"Case, Mr. Howard Brown",male,49,0,0,19924,26,,S 85 | 975,3,"Demetri, Mr. Marinko",male,,0,0,349238,7.8958,,S 86 | 976,2,"Lamb, Mr. John Joseph",male,,0,0,240261,10.7083,,Q 87 | 977,3,"Khalil, Mr. Betros",male,,1,0,2660,14.4542,,C 88 | 978,3,"Barry, Miss. Julia",female,27,0,0,330844,7.8792,,Q 89 | 979,3,"Badman, Miss. Emily Louisa",female,18,0,0,A/4 31416,8.05,,S 90 | 980,3,"O'Donoghue, Ms. Bridget",female,,0,0,364856,7.75,,Q 91 | 981,2,"Wells, Master. Ralph Lester",male,2,1,1,29103,23,,S 92 | 982,3,"Dyker, Mrs. Adolf Fredrik (Anna Elisabeth Judith Andersson)",female,22,1,0,347072,13.9,,S 93 | 983,3,"Pedersen, Mr. Olaf",male,,0,0,345498,7.775,,S 94 | 984,1,"Davidson, Mrs. Thornton (Orian Hays)",female,27,1,2,F.C. 12750,52,B71,S 95 | 985,3,"Guest, Mr. Robert",male,,0,0,376563,8.05,,S 96 | 986,1,"Birnbaum, Mr. Jakob",male,25,0,0,13905,26,,C 97 | 987,3,"Tenglin, Mr. Gunnar Isidor",male,25,0,0,350033,7.7958,,S 98 | 988,1,"Cavendish, Mrs. Tyrell William (Julia Florence Siegel)",female,76,1,0,19877,78.85,C46,S 99 | 989,3,"Makinen, Mr. Kalle Edvard",male,29,0,0,STON/O 2. 3101268,7.925,,S 100 | 990,3,"Braf, Miss. Elin Ester Maria",female,20,0,0,347471,7.8542,,S 101 | 991,3,"Nancarrow, Mr. William Henry",male,33,0,0,A./5. 3338,8.05,,S 102 | 992,1,"Stengel, Mrs. Charles Emil Henry (Annie May Morris)",female,43,1,0,11778,55.4417,C116,C 103 | 993,2,"Weisz, Mr. Leopold",male,27,1,0,228414,26,,S 104 | 994,3,"Foley, Mr. William",male,,0,0,365235,7.75,,Q 105 | 995,3,"Johansson Palmquist, Mr. Oskar Leander",male,26,0,0,347070,7.775,,S 106 | 996,3,"Thomas, Mrs. Alexander (Thamine Thelma"")""",female,16,1,1,2625,8.5167,,C 107 | 997,3,"Holthen, Mr. Johan Martin",male,28,0,0,C 4001,22.525,,S 108 | 998,3,"Buckley, Mr. Daniel",male,21,0,0,330920,7.8208,,Q 109 | 999,3,"Ryan, Mr. Edward",male,,0,0,383162,7.75,,Q 110 | 1000,3,"Willer, Mr. Aaron (Abi Weller"")""",male,,0,0,3410,8.7125,,S 111 | 1001,2,"Swane, Mr. George",male,18.5,0,0,248734,13,F,S 112 | 1002,2,"Stanton, Mr. Samuel Ward",male,41,0,0,237734,15.0458,,C 113 | 1003,3,"Shine, Miss. Ellen Natalia",female,,0,0,330968,7.7792,,Q 114 | 1004,1,"Evans, Miss. Edith Corse",female,36,0,0,PC 17531,31.6792,A29,C 115 | 1005,3,"Buckley, Miss. Katherine",female,18.5,0,0,329944,7.2833,,Q 116 | 1006,1,"Straus, Mrs. Isidor (Rosalie Ida Blun)",female,63,1,0,PC 17483,221.7792,C55 C57,S 117 | 1007,3,"Chronopoulos, Mr. Demetrios",male,18,1,0,2680,14.4542,,C 118 | 1008,3,"Thomas, Mr. John",male,,0,0,2681,6.4375,,C 119 | 1009,3,"Sandstrom, Miss. Beatrice Irene",female,1,1,1,PP 9549,16.7,G6,S 120 | 1010,1,"Beattie, Mr. Thomson",male,36,0,0,13050,75.2417,C6,C 121 | 1011,2,"Chapman, Mrs. John Henry (Sara Elizabeth Lawry)",female,29,1,0,SC/AH 29037,26,,S 122 | 1012,2,"Watt, Miss. Bertha J",female,12,0,0,C.A. 33595,15.75,,S 123 | 1013,3,"Kiernan, Mr. John",male,,1,0,367227,7.75,,Q 124 | 1014,1,"Schabert, Mrs. Paul (Emma Mock)",female,35,1,0,13236,57.75,C28,C 125 | 1015,3,"Carver, Mr. Alfred John",male,28,0,0,392095,7.25,,S 126 | 1016,3,"Kennedy, Mr. John",male,,0,0,368783,7.75,,Q 127 | 1017,3,"Cribb, Miss. Laura Alice",female,17,0,1,371362,16.1,,S 128 | 1018,3,"Brobeck, Mr. Karl Rudolf",male,22,0,0,350045,7.7958,,S 129 | 1019,3,"McCoy, Miss. Alicia",female,,2,0,367226,23.25,,Q 130 | 1020,2,"Bowenur, Mr. Solomon",male,42,0,0,211535,13,,S 131 | 1021,3,"Petersen, Mr. Marius",male,24,0,0,342441,8.05,,S 132 | 1022,3,"Spinner, Mr. Henry John",male,32,0,0,STON/OQ. 369943,8.05,,S 133 | 1023,1,"Gracie, Col. Archibald IV",male,53,0,0,113780,28.5,C51,C 134 | 1024,3,"Lefebre, Mrs. Frank (Frances)",female,,0,4,4133,25.4667,,S 135 | 1025,3,"Thomas, Mr. Charles P",male,,1,0,2621,6.4375,,C 136 | 1026,3,"Dintcheff, Mr. Valtcho",male,43,0,0,349226,7.8958,,S 137 | 1027,3,"Carlsson, Mr. Carl Robert",male,24,0,0,350409,7.8542,,S 138 | 1028,3,"Zakarian, Mr. Mapriededer",male,26.5,0,0,2656,7.225,,C 139 | 1029,2,"Schmidt, Mr. August",male,26,0,0,248659,13,,S 140 | 1030,3,"Drapkin, Miss. Jennie",female,23,0,0,SOTON/OQ 392083,8.05,,S 141 | 1031,3,"Goodwin, Mr. Charles Frederick",male,40,1,6,CA 2144,46.9,,S 142 | 1032,3,"Goodwin, Miss. Jessie Allis",female,10,5,2,CA 2144,46.9,,S 143 | 1033,1,"Daniels, Miss. Sarah",female,33,0,0,113781,151.55,,S 144 | 1034,1,"Ryerson, Mr. Arthur Larned",male,61,1,3,PC 17608,262.375,B57 B59 B63 B66,C 145 | 1035,2,"Beauchamp, Mr. Henry James",male,28,0,0,244358,26,,S 146 | 1036,1,"Lindeberg-Lind, Mr. Erik Gustaf (Mr Edward Lingrey"")""",male,42,0,0,17475,26.55,,S 147 | 1037,3,"Vander Planke, Mr. Julius",male,31,3,0,345763,18,,S 148 | 1038,1,"Hilliard, Mr. Herbert Henry",male,,0,0,17463,51.8625,E46,S 149 | 1039,3,"Davies, Mr. Evan",male,22,0,0,SC/A4 23568,8.05,,S 150 | 1040,1,"Crafton, Mr. John Bertram",male,,0,0,113791,26.55,,S 151 | 1041,2,"Lahtinen, Rev. William",male,30,1,1,250651,26,,S 152 | 1042,1,"Earnshaw, Mrs. Boulton (Olive Potter)",female,23,0,1,11767,83.1583,C54,C 153 | 1043,3,"Matinoff, Mr. Nicola",male,,0,0,349255,7.8958,,C 154 | 1044,3,"Storey, Mr. Thomas",male,60.5,0,0,3701,,,S 155 | 1045,3,"Klasen, Mrs. (Hulda Kristina Eugenia Lofqvist)",female,36,0,2,350405,12.1833,,S 156 | 1046,3,"Asplund, Master. Filip Oscar",male,13,4,2,347077,31.3875,,S 157 | 1047,3,"Duquemin, Mr. Joseph",male,24,0,0,S.O./P.P. 752,7.55,,S 158 | 1048,1,"Bird, Miss. Ellen",female,29,0,0,PC 17483,221.7792,C97,S 159 | 1049,3,"Lundin, Miss. Olga Elida",female,23,0,0,347469,7.8542,,S 160 | 1050,1,"Borebank, Mr. John James",male,42,0,0,110489,26.55,D22,S 161 | 1051,3,"Peacock, Mrs. Benjamin (Edith Nile)",female,26,0,2,SOTON/O.Q. 3101315,13.775,,S 162 | 1052,3,"Smyth, Miss. Julia",female,,0,0,335432,7.7333,,Q 163 | 1053,3,"Touma, Master. Georges Youssef",male,7,1,1,2650,15.2458,,C 164 | 1054,2,"Wright, Miss. Marion",female,26,0,0,220844,13.5,,S 165 | 1055,3,"Pearce, Mr. Ernest",male,,0,0,343271,7,,S 166 | 1056,2,"Peruschitz, Rev. Joseph Maria",male,41,0,0,237393,13,,S 167 | 1057,3,"Kink-Heilmann, Mrs. Anton (Luise Heilmann)",female,26,1,1,315153,22.025,,S 168 | 1058,1,"Brandeis, Mr. Emil",male,48,0,0,PC 17591,50.4958,B10,C 169 | 1059,3,"Ford, Mr. Edward Watson",male,18,2,2,W./C. 6608,34.375,,S 170 | 1060,1,"Cassebeer, Mrs. Henry Arthur Jr (Eleanor Genevieve Fosdick)",female,,0,0,17770,27.7208,,C 171 | 1061,3,"Hellstrom, Miss. Hilda Maria",female,22,0,0,7548,8.9625,,S 172 | 1062,3,"Lithman, Mr. Simon",male,,0,0,S.O./P.P. 251,7.55,,S 173 | 1063,3,"Zakarian, Mr. Ortin",male,27,0,0,2670,7.225,,C 174 | 1064,3,"Dyker, Mr. Adolf Fredrik",male,23,1,0,347072,13.9,,S 175 | 1065,3,"Torfa, Mr. Assad",male,,0,0,2673,7.2292,,C 176 | 1066,3,"Asplund, Mr. Carl Oscar Vilhelm Gustafsson",male,40,1,5,347077,31.3875,,S 177 | 1067,2,"Brown, Miss. Edith Eileen",female,15,0,2,29750,39,,S 178 | 1068,2,"Sincock, Miss. Maude",female,20,0,0,C.A. 33112,36.75,,S 179 | 1069,1,"Stengel, Mr. Charles Emil Henry",male,54,1,0,11778,55.4417,C116,C 180 | 1070,2,"Becker, Mrs. Allen Oliver (Nellie E Baumgardner)",female,36,0,3,230136,39,F4,S 181 | 1071,1,"Compton, Mrs. Alexander Taylor (Mary Eliza Ingersoll)",female,64,0,2,PC 17756,83.1583,E45,C 182 | 1072,2,"McCrie, Mr. James Matthew",male,30,0,0,233478,13,,S 183 | 1073,1,"Compton, Mr. Alexander Taylor Jr",male,37,1,1,PC 17756,83.1583,E52,C 184 | 1074,1,"Marvin, Mrs. Daniel Warner (Mary Graham Carmichael Farquarson)",female,18,1,0,113773,53.1,D30,S 185 | 1075,3,"Lane, Mr. Patrick",male,,0,0,7935,7.75,,Q 186 | 1076,1,"Douglas, Mrs. Frederick Charles (Mary Helene Baxter)",female,27,1,1,PC 17558,247.5208,B58 B60,C 187 | 1077,2,"Maybery, Mr. Frank Hubert",male,40,0,0,239059,16,,S 188 | 1078,2,"Phillips, Miss. Alice Frances Louisa",female,21,0,1,S.O./P.P. 2,21,,S 189 | 1079,3,"Davies, Mr. Joseph",male,17,2,0,A/4 48873,8.05,,S 190 | 1080,3,"Sage, Miss. Ada",female,,8,2,CA. 2343,69.55,,S 191 | 1081,2,"Veal, Mr. James",male,40,0,0,28221,13,,S 192 | 1082,2,"Angle, Mr. William A",male,34,1,0,226875,26,,S 193 | 1083,1,"Salomon, Mr. Abraham L",male,,0,0,111163,26,,S 194 | 1084,3,"van Billiard, Master. Walter John",male,11.5,1,1,A/5. 851,14.5,,S 195 | 1085,2,"Lingane, Mr. John",male,61,0,0,235509,12.35,,Q 196 | 1086,2,"Drew, Master. Marshall Brines",male,8,0,2,28220,32.5,,S 197 | 1087,3,"Karlsson, Mr. Julius Konrad Eugen",male,33,0,0,347465,7.8542,,S 198 | 1088,1,"Spedden, Master. Robert Douglas",male,6,0,2,16966,134.5,E34,C 199 | 1089,3,"Nilsson, Miss. Berta Olivia",female,18,0,0,347066,7.775,,S 200 | 1090,2,"Baimbrigge, Mr. Charles Robert",male,23,0,0,C.A. 31030,10.5,,S 201 | 1091,3,"Rasmussen, Mrs. (Lena Jacobsen Solvang)",female,,0,0,65305,8.1125,,S 202 | 1092,3,"Murphy, Miss. Nora",female,,0,0,36568,15.5,,Q 203 | 1093,3,"Danbom, Master. Gilbert Sigvard Emanuel",male,0.33,0,2,347080,14.4,,S 204 | 1094,1,"Astor, Col. John Jacob",male,47,1,0,PC 17757,227.525,C62 C64,C 205 | 1095,2,"Quick, Miss. Winifred Vera",female,8,1,1,26360,26,,S 206 | 1096,2,"Andrew, Mr. Frank Thomas",male,25,0,0,C.A. 34050,10.5,,S 207 | 1097,1,"Omont, Mr. Alfred Fernand",male,,0,0,F.C. 12998,25.7417,,C 208 | 1098,3,"McGowan, Miss. Katherine",female,35,0,0,9232,7.75,,Q 209 | 1099,2,"Collett, Mr. Sidney C Stuart",male,24,0,0,28034,10.5,,S 210 | 1100,1,"Rosenbaum, Miss. Edith Louise",female,33,0,0,PC 17613,27.7208,A11,C 211 | 1101,3,"Delalic, Mr. Redjo",male,25,0,0,349250,7.8958,,S 212 | 1102,3,"Andersen, Mr. Albert Karvin",male,32,0,0,C 4001,22.525,,S 213 | 1103,3,"Finoli, Mr. Luigi",male,,0,0,SOTON/O.Q. 3101308,7.05,,S 214 | 1104,2,"Deacon, Mr. Percy William",male,17,0,0,S.O.C. 14879,73.5,,S 215 | 1105,2,"Howard, Mrs. Benjamin (Ellen Truelove Arman)",female,60,1,0,24065,26,,S 216 | 1106,3,"Andersson, Miss. Ida Augusta Margareta",female,38,4,2,347091,7.775,,S 217 | 1107,1,"Head, Mr. Christopher",male,42,0,0,113038,42.5,B11,S 218 | 1108,3,"Mahon, Miss. Bridget Delia",female,,0,0,330924,7.8792,,Q 219 | 1109,1,"Wick, Mr. George Dennick",male,57,1,1,36928,164.8667,,S 220 | 1110,1,"Widener, Mrs. George Dunton (Eleanor Elkins)",female,50,1,1,113503,211.5,C80,C 221 | 1111,3,"Thomson, Mr. Alexander Morrison",male,,0,0,32302,8.05,,S 222 | 1112,2,"Duran y More, Miss. Florentina",female,30,1,0,SC/PARIS 2148,13.8583,,C 223 | 1113,3,"Reynolds, Mr. Harold J",male,21,0,0,342684,8.05,,S 224 | 1114,2,"Cook, Mrs. (Selena Rogers)",female,22,0,0,W./C. 14266,10.5,F33,S 225 | 1115,3,"Karlsson, Mr. Einar Gervasius",male,21,0,0,350053,7.7958,,S 226 | 1116,1,"Candee, Mrs. Edward (Helen Churchill Hungerford)",female,53,0,0,PC 17606,27.4458,,C 227 | 1117,3,"Moubarek, Mrs. George (Omine Amenia"" Alexander)""",female,,0,2,2661,15.2458,,C 228 | 1118,3,"Asplund, Mr. Johan Charles",male,23,0,0,350054,7.7958,,S 229 | 1119,3,"McNeill, Miss. Bridget",female,,0,0,370368,7.75,,Q 230 | 1120,3,"Everett, Mr. Thomas James",male,40.5,0,0,C.A. 6212,15.1,,S 231 | 1121,2,"Hocking, Mr. Samuel James Metcalfe",male,36,0,0,242963,13,,S 232 | 1122,2,"Sweet, Mr. George Frederick",male,14,0,0,220845,65,,S 233 | 1123,1,"Willard, Miss. Constance",female,21,0,0,113795,26.55,,S 234 | 1124,3,"Wiklund, Mr. Karl Johan",male,21,1,0,3101266,6.4958,,S 235 | 1125,3,"Linehan, Mr. Michael",male,,0,0,330971,7.8792,,Q 236 | 1126,1,"Cumings, Mr. John Bradley",male,39,1,0,PC 17599,71.2833,C85,C 237 | 1127,3,"Vendel, Mr. Olof Edvin",male,20,0,0,350416,7.8542,,S 238 | 1128,1,"Warren, Mr. Frank Manley",male,64,1,0,110813,75.25,D37,C 239 | 1129,3,"Baccos, Mr. Raffull",male,20,0,0,2679,7.225,,C 240 | 1130,2,"Hiltunen, Miss. Marta",female,18,1,1,250650,13,,S 241 | 1131,1,"Douglas, Mrs. Walter Donald (Mahala Dutton)",female,48,1,0,PC 17761,106.425,C86,C 242 | 1132,1,"Lindstrom, Mrs. Carl Johan (Sigrid Posse)",female,55,0,0,112377,27.7208,,C 243 | 1133,2,"Christy, Mrs. (Alice Frances)",female,45,0,2,237789,30,,S 244 | 1134,1,"Spedden, Mr. Frederic Oakley",male,45,1,1,16966,134.5,E34,C 245 | 1135,3,"Hyman, Mr. Abraham",male,,0,0,3470,7.8875,,S 246 | 1136,3,"Johnston, Master. William Arthur Willie""""",male,,1,2,W./C. 6607,23.45,,S 247 | 1137,1,"Kenyon, Mr. Frederick R",male,41,1,0,17464,51.8625,D21,S 248 | 1138,2,"Karnes, Mrs. J Frank (Claire Bennett)",female,22,0,0,F.C.C. 13534,21,,S 249 | 1139,2,"Drew, Mr. James Vivian",male,42,1,1,28220,32.5,,S 250 | 1140,2,"Hold, Mrs. Stephen (Annie Margaret Hill)",female,29,1,0,26707,26,,S 251 | 1141,3,"Khalil, Mrs. Betros (Zahie Maria"" Elias)""",female,,1,0,2660,14.4542,,C 252 | 1142,2,"West, Miss. Barbara J",female,0.92,1,2,C.A. 34651,27.75,,S 253 | 1143,3,"Abrahamsson, Mr. Abraham August Johannes",male,20,0,0,SOTON/O2 3101284,7.925,,S 254 | 1144,1,"Clark, Mr. Walter Miller",male,27,1,0,13508,136.7792,C89,C 255 | 1145,3,"Salander, Mr. Karl Johan",male,24,0,0,7266,9.325,,S 256 | 1146,3,"Wenzel, Mr. Linhart",male,32.5,0,0,345775,9.5,,S 257 | 1147,3,"MacKay, Mr. George William",male,,0,0,C.A. 42795,7.55,,S 258 | 1148,3,"Mahon, Mr. John",male,,0,0,AQ/4 3130,7.75,,Q 259 | 1149,3,"Niklasson, Mr. Samuel",male,28,0,0,363611,8.05,,S 260 | 1150,2,"Bentham, Miss. Lilian W",female,19,0,0,28404,13,,S 261 | 1151,3,"Midtsjo, Mr. Karl Albert",male,21,0,0,345501,7.775,,S 262 | 1152,3,"de Messemaeker, Mr. Guillaume Joseph",male,36.5,1,0,345572,17.4,,S 263 | 1153,3,"Nilsson, Mr. August Ferdinand",male,21,0,0,350410,7.8542,,S 264 | 1154,2,"Wells, Mrs. Arthur Henry (Addie"" Dart Trevaskis)""",female,29,0,2,29103,23,,S 265 | 1155,3,"Klasen, Miss. Gertrud Emilia",female,1,1,1,350405,12.1833,,S 266 | 1156,2,"Portaluppi, Mr. Emilio Ilario Giuseppe",male,30,0,0,C.A. 34644,12.7375,,C 267 | 1157,3,"Lyntakoff, Mr. Stanko",male,,0,0,349235,7.8958,,S 268 | 1158,1,"Chisholm, Mr. Roderick Robert Crispin",male,,0,0,112051,0,,S 269 | 1159,3,"Warren, Mr. Charles William",male,,0,0,C.A. 49867,7.55,,S 270 | 1160,3,"Howard, Miss. May Elizabeth",female,,0,0,A. 2. 39186,8.05,,S 271 | 1161,3,"Pokrnic, Mr. Mate",male,17,0,0,315095,8.6625,,S 272 | 1162,1,"McCaffry, Mr. Thomas Francis",male,46,0,0,13050,75.2417,C6,C 273 | 1163,3,"Fox, Mr. Patrick",male,,0,0,368573,7.75,,Q 274 | 1164,1,"Clark, Mrs. Walter Miller (Virginia McDowell)",female,26,1,0,13508,136.7792,C89,C 275 | 1165,3,"Lennon, Miss. Mary",female,,1,0,370371,15.5,,Q 276 | 1166,3,"Saade, Mr. Jean Nassr",male,,0,0,2676,7.225,,C 277 | 1167,2,"Bryhl, Miss. Dagmar Jenny Ingeborg ",female,20,1,0,236853,26,,S 278 | 1168,2,"Parker, Mr. Clifford Richard",male,28,0,0,SC 14888,10.5,,S 279 | 1169,2,"Faunthorpe, Mr. Harry",male,40,1,0,2926,26,,S 280 | 1170,2,"Ware, Mr. John James",male,30,1,0,CA 31352,21,,S 281 | 1171,2,"Oxenham, Mr. Percy Thomas",male,22,0,0,W./C. 14260,10.5,,S 282 | 1172,3,"Oreskovic, Miss. Jelka",female,23,0,0,315085,8.6625,,S 283 | 1173,3,"Peacock, Master. Alfred Edward",male,0.75,1,1,SOTON/O.Q. 3101315,13.775,,S 284 | 1174,3,"Fleming, Miss. Honora",female,,0,0,364859,7.75,,Q 285 | 1175,3,"Touma, Miss. Maria Youssef",female,9,1,1,2650,15.2458,,C 286 | 1176,3,"Rosblom, Miss. Salli Helena",female,2,1,1,370129,20.2125,,S 287 | 1177,3,"Dennis, Mr. William",male,36,0,0,A/5 21175,7.25,,S 288 | 1178,3,"Franklin, Mr. Charles (Charles Fardon)",male,,0,0,SOTON/O.Q. 3101314,7.25,,S 289 | 1179,1,"Snyder, Mr. John Pillsbury",male,24,1,0,21228,82.2667,B45,S 290 | 1180,3,"Mardirosian, Mr. Sarkis",male,,0,0,2655,7.2292,F E46,C 291 | 1181,3,"Ford, Mr. Arthur",male,,0,0,A/5 1478,8.05,,S 292 | 1182,1,"Rheims, Mr. George Alexander Lucien",male,,0,0,PC 17607,39.6,,S 293 | 1183,3,"Daly, Miss. Margaret Marcella Maggie""""",female,30,0,0,382650,6.95,,Q 294 | 1184,3,"Nasr, Mr. Mustafa",male,,0,0,2652,7.2292,,C 295 | 1185,1,"Dodge, Dr. Washington",male,53,1,1,33638,81.8583,A34,S 296 | 1186,3,"Wittevrongel, Mr. Camille",male,36,0,0,345771,9.5,,S 297 | 1187,3,"Angheloff, Mr. Minko",male,26,0,0,349202,7.8958,,S 298 | 1188,2,"Laroche, Miss. Louise",female,1,1,2,SC/Paris 2123,41.5792,,C 299 | 1189,3,"Samaan, Mr. Hanna",male,,2,0,2662,21.6792,,C 300 | 1190,1,"Loring, Mr. Joseph Holland",male,30,0,0,113801,45.5,,S 301 | 1191,3,"Johansson, Mr. Nils",male,29,0,0,347467,7.8542,,S 302 | 1192,3,"Olsson, Mr. Oscar Wilhelm",male,32,0,0,347079,7.775,,S 303 | 1193,2,"Malachard, Mr. Noel",male,,0,0,237735,15.0458,D,C 304 | 1194,2,"Phillips, Mr. Escott Robert",male,43,0,1,S.O./P.P. 2,21,,S 305 | 1195,3,"Pokrnic, Mr. Tome",male,24,0,0,315092,8.6625,,S 306 | 1196,3,"McCarthy, Miss. Catherine Katie""""",female,,0,0,383123,7.75,,Q 307 | 1197,1,"Crosby, Mrs. Edward Gifford (Catherine Elizabeth Halstead)",female,64,1,1,112901,26.55,B26,S 308 | 1198,1,"Allison, Mr. Hudson Joshua Creighton",male,30,1,2,113781,151.55,C22 C26,S 309 | 1199,3,"Aks, Master. Philip Frank",male,0.83,0,1,392091,9.35,,S 310 | 1200,1,"Hays, Mr. Charles Melville",male,55,1,1,12749,93.5,B69,S 311 | 1201,3,"Hansen, Mrs. Claus Peter (Jennie L Howard)",female,45,1,0,350026,14.1083,,S 312 | 1202,3,"Cacic, Mr. Jego Grga",male,18,0,0,315091,8.6625,,S 313 | 1203,3,"Vartanian, Mr. David",male,22,0,0,2658,7.225,,C 314 | 1204,3,"Sadowitz, Mr. Harry",male,,0,0,LP 1588,7.575,,S 315 | 1205,3,"Carr, Miss. Jeannie",female,37,0,0,368364,7.75,,Q 316 | 1206,1,"White, Mrs. John Stuart (Ella Holmes)",female,55,0,0,PC 17760,135.6333,C32,C 317 | 1207,3,"Hagardon, Miss. Kate",female,17,0,0,AQ/3. 30631,7.7333,,Q 318 | 1208,1,"Spencer, Mr. William Augustus",male,57,1,0,PC 17569,146.5208,B78,C 319 | 1209,2,"Rogers, Mr. Reginald Harry",male,19,0,0,28004,10.5,,S 320 | 1210,3,"Jonsson, Mr. Nils Hilding",male,27,0,0,350408,7.8542,,S 321 | 1211,2,"Jefferys, Mr. Ernest Wilfred",male,22,2,0,C.A. 31029,31.5,,S 322 | 1212,3,"Andersson, Mr. Johan Samuel",male,26,0,0,347075,7.775,,S 323 | 1213,3,"Krekorian, Mr. Neshan",male,25,0,0,2654,7.2292,F E57,C 324 | 1214,2,"Nesson, Mr. Israel",male,26,0,0,244368,13,F2,S 325 | 1215,1,"Rowe, Mr. Alfred G",male,33,0,0,113790,26.55,,S 326 | 1216,1,"Kreuchen, Miss. Emilie",female,39,0,0,24160,211.3375,,S 327 | 1217,3,"Assam, Mr. Ali",male,23,0,0,SOTON/O.Q. 3101309,7.05,,S 328 | 1218,2,"Becker, Miss. Ruth Elizabeth",female,12,2,1,230136,39,F4,S 329 | 1219,1,"Rosenshine, Mr. George (Mr George Thorne"")""",male,46,0,0,PC 17585,79.2,,C 330 | 1220,2,"Clarke, Mr. Charles Valentine",male,29,1,0,2003,26,,S 331 | 1221,2,"Enander, Mr. Ingvar",male,21,0,0,236854,13,,S 332 | 1222,2,"Davies, Mrs. John Morgan (Elizabeth Agnes Mary White) ",female,48,0,2,C.A. 33112,36.75,,S 333 | 1223,1,"Dulles, Mr. William Crothers",male,39,0,0,PC 17580,29.7,A18,C 334 | 1224,3,"Thomas, Mr. Tannous",male,,0,0,2684,7.225,,C 335 | 1225,3,"Nakid, Mrs. Said (Waika Mary"" Mowad)""",female,19,1,1,2653,15.7417,,C 336 | 1226,3,"Cor, Mr. Ivan",male,27,0,0,349229,7.8958,,S 337 | 1227,1,"Maguire, Mr. John Edward",male,30,0,0,110469,26,C106,S 338 | 1228,2,"de Brito, Mr. Jose Joaquim",male,32,0,0,244360,13,,S 339 | 1229,3,"Elias, Mr. Joseph",male,39,0,2,2675,7.2292,,C 340 | 1230,2,"Denbury, Mr. Herbert",male,25,0,0,C.A. 31029,31.5,,S 341 | 1231,3,"Betros, Master. Seman",male,,0,0,2622,7.2292,,C 342 | 1232,2,"Fillbrook, Mr. Joseph Charles",male,18,0,0,C.A. 15185,10.5,,S 343 | 1233,3,"Lundstrom, Mr. Thure Edvin",male,32,0,0,350403,7.5792,,S 344 | 1234,3,"Sage, Mr. John George",male,,1,9,CA. 2343,69.55,,S 345 | 1235,1,"Cardeza, Mrs. James Warburton Martinez (Charlotte Wardle Drake)",female,58,0,1,PC 17755,512.3292,B51 B53 B55,C 346 | 1236,3,"van Billiard, Master. James William",male,,1,1,A/5. 851,14.5,,S 347 | 1237,3,"Abelseth, Miss. Karen Marie",female,16,0,0,348125,7.65,,S 348 | 1238,2,"Botsford, Mr. William Hull",male,26,0,0,237670,13,,S 349 | 1239,3,"Whabee, Mrs. George Joseph (Shawneene Abi-Saab)",female,38,0,0,2688,7.2292,,C 350 | 1240,2,"Giles, Mr. Ralph",male,24,0,0,248726,13.5,,S 351 | 1241,2,"Walcroft, Miss. Nellie",female,31,0,0,F.C.C. 13528,21,,S 352 | 1242,1,"Greenfield, Mrs. Leo David (Blanche Strouse)",female,45,0,1,PC 17759,63.3583,D10 D12,C 353 | 1243,2,"Stokes, Mr. Philip Joseph",male,25,0,0,F.C.C. 13540,10.5,,S 354 | 1244,2,"Dibden, Mr. William",male,18,0,0,S.O.C. 14879,73.5,,S 355 | 1245,2,"Herman, Mr. Samuel",male,49,1,2,220845,65,,S 356 | 1246,3,"Dean, Miss. Elizabeth Gladys Millvina""""",female,0.17,1,2,C.A. 2315,20.575,,S 357 | 1247,1,"Julian, Mr. Henry Forbes",male,50,0,0,113044,26,E60,S 358 | 1248,1,"Brown, Mrs. John Murray (Caroline Lane Lamson)",female,59,2,0,11769,51.4792,C101,S 359 | 1249,3,"Lockyer, Mr. Edward",male,,0,0,1222,7.8792,,S 360 | 1250,3,"O'Keefe, Mr. Patrick",male,,0,0,368402,7.75,,Q 361 | 1251,3,"Lindell, Mrs. Edvard Bengtsson (Elin Gerda Persson)",female,30,1,0,349910,15.55,,S 362 | 1252,3,"Sage, Master. William Henry",male,14.5,8,2,CA. 2343,69.55,,S 363 | 1253,2,"Mallet, Mrs. Albert (Antoinette Magnin)",female,24,1,1,S.C./PARIS 2079,37.0042,,C 364 | 1254,2,"Ware, Mrs. John James (Florence Louise Long)",female,31,0,0,CA 31352,21,,S 365 | 1255,3,"Strilic, Mr. Ivan",male,27,0,0,315083,8.6625,,S 366 | 1256,1,"Harder, Mrs. George Achilles (Dorothy Annan)",female,25,1,0,11765,55.4417,E50,C 367 | 1257,3,"Sage, Mrs. John (Annie Bullen)",female,,1,9,CA. 2343,69.55,,S 368 | 1258,3,"Caram, Mr. Joseph",male,,1,0,2689,14.4583,,C 369 | 1259,3,"Riihivouri, Miss. Susanna Juhantytar Sanni""""",female,22,0,0,3101295,39.6875,,S 370 | 1260,1,"Gibson, Mrs. Leonard (Pauline C Boeson)",female,45,0,1,112378,59.4,,C 371 | 1261,2,"Pallas y Castello, Mr. Emilio",male,29,0,0,SC/PARIS 2147,13.8583,,C 372 | 1262,2,"Giles, Mr. Edgar",male,21,1,0,28133,11.5,,S 373 | 1263,1,"Wilson, Miss. Helen Alice",female,31,0,0,16966,134.5,E39 E41,C 374 | 1264,1,"Ismay, Mr. Joseph Bruce",male,49,0,0,112058,0,B52 B54 B56,S 375 | 1265,2,"Harbeck, Mr. William H",male,44,0,0,248746,13,,S 376 | 1266,1,"Dodge, Mrs. Washington (Ruth Vidaver)",female,54,1,1,33638,81.8583,A34,S 377 | 1267,1,"Bowen, Miss. Grace Scott",female,45,0,0,PC 17608,262.375,,C 378 | 1268,3,"Kink, Miss. Maria",female,22,2,0,315152,8.6625,,S 379 | 1269,2,"Cotterill, Mr. Henry Harry""""",male,21,0,0,29107,11.5,,S 380 | 1270,1,"Hipkins, Mr. William Edward",male,55,0,0,680,50,C39,S 381 | 1271,3,"Asplund, Master. Carl Edgar",male,5,4,2,347077,31.3875,,S 382 | 1272,3,"O'Connor, Mr. Patrick",male,,0,0,366713,7.75,,Q 383 | 1273,3,"Foley, Mr. Joseph",male,26,0,0,330910,7.8792,,Q 384 | 1274,3,"Risien, Mrs. Samuel (Emma)",female,,0,0,364498,14.5,,S 385 | 1275,3,"McNamee, Mrs. Neal (Eileen O'Leary)",female,19,1,0,376566,16.1,,S 386 | 1276,2,"Wheeler, Mr. Edwin Frederick""""",male,,0,0,SC/PARIS 2159,12.875,,S 387 | 1277,2,"Herman, Miss. Kate",female,24,1,2,220845,65,,S 388 | 1278,3,"Aronsson, Mr. Ernst Axel Algot",male,24,0,0,349911,7.775,,S 389 | 1279,2,"Ashby, Mr. John",male,57,0,0,244346,13,,S 390 | 1280,3,"Canavan, Mr. Patrick",male,21,0,0,364858,7.75,,Q 391 | 1281,3,"Palsson, Master. Paul Folke",male,6,3,1,349909,21.075,,S 392 | 1282,1,"Payne, Mr. Vivian Ponsonby",male,23,0,0,12749,93.5,B24,S 393 | 1283,1,"Lines, Mrs. Ernest H (Elizabeth Lindsey James)",female,51,0,1,PC 17592,39.4,D28,S 394 | 1284,3,"Abbott, Master. Eugene Joseph",male,13,0,2,C.A. 2673,20.25,,S 395 | 1285,2,"Gilbert, Mr. William",male,47,0,0,C.A. 30769,10.5,,S 396 | 1286,3,"Kink-Heilmann, Mr. Anton",male,29,3,1,315153,22.025,,S 397 | 1287,1,"Smith, Mrs. Lucien Philip (Mary Eloise Hughes)",female,18,1,0,13695,60,C31,S 398 | 1288,3,"Colbert, Mr. Patrick",male,24,0,0,371109,7.25,,Q 399 | 1289,1,"Frolicher-Stehli, Mrs. Maxmillian (Margaretha Emerentia Stehli)",female,48,1,1,13567,79.2,B41,C 400 | 1290,3,"Larsson-Rondberg, Mr. Edvard A",male,22,0,0,347065,7.775,,S 401 | 1291,3,"Conlon, Mr. Thomas Henry",male,31,0,0,21332,7.7333,,Q 402 | 1292,1,"Bonnell, Miss. Caroline",female,30,0,0,36928,164.8667,C7,S 403 | 1293,2,"Gale, Mr. Harry",male,38,1,0,28664,21,,S 404 | 1294,1,"Gibson, Miss. Dorothy Winifred",female,22,0,1,112378,59.4,,C 405 | 1295,1,"Carrau, Mr. Jose Pedro",male,17,0,0,113059,47.1,,S 406 | 1296,1,"Frauenthal, Mr. Isaac Gerald",male,43,1,0,17765,27.7208,D40,C 407 | 1297,2,"Nourney, Mr. Alfred (Baron von Drachstedt"")""",male,20,0,0,SC/PARIS 2166,13.8625,D38,C 408 | 1298,2,"Ware, Mr. William Jeffery",male,23,1,0,28666,10.5,,S 409 | 1299,1,"Widener, Mr. George Dunton",male,50,1,1,113503,211.5,C80,C 410 | 1300,3,"Riordan, Miss. Johanna Hannah""""",female,,0,0,334915,7.7208,,Q 411 | 1301,3,"Peacock, Miss. Treasteall",female,3,1,1,SOTON/O.Q. 3101315,13.775,,S 412 | 1302,3,"Naughton, Miss. Hannah",female,,0,0,365237,7.75,,Q 413 | 1303,1,"Minahan, Mrs. William Edward (Lillian E Thorpe)",female,37,1,0,19928,90,C78,Q 414 | 1304,3,"Henriksson, Miss. Jenny Lovisa",female,28,0,0,347086,7.775,,S 415 | 1305,3,"Spector, Mr. Woolf",male,,0,0,A.5. 3236,8.05,,S 416 | 1306,1,"Oliva y Ocana, Dona. Fermina",female,39,0,0,PC 17758,108.9,C105,C 417 | 1307,3,"Saether, Mr. Simon Sivertsen",male,38.5,0,0,SOTON/O.Q. 3101262,7.25,,S 418 | 1308,3,"Ware, Mr. Frederick",male,,0,0,359309,8.05,,S 419 | 1309,3,"Peter, Master. Michael J",male,,1,1,2668,22.3583,,C 420 | -------------------------------------------------------------------------------- /Overfit.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 170, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "import numpy as np\n", 11 | "from sklearn.preprocessing import StandardScaler\n", 12 | "from sklearn.preprocessing import Imputer\n", 13 | "from sklearn.model_selection import GridSearchCV , train_test_split , cross_val_score\n", 14 | "from sklearn.metrics import classification_report , confusion_matrix\n", 15 | "\n", 16 | "\n", 17 | "from sklearn.linear_model import LogisticRegression\n", 18 | "from sklearn.naive_bayes import GaussianNB\n", 19 | "from sklearn.neighbors import KNeighborsClassifier\n", 20 | "from sklearn.tree import DecisionTreeClassifier\n", 21 | "from sklearn.ensemble import RandomForestClassifier\n", 22 | "from sklearn.ensemble import GradientBoostingClassifier\n", 23 | "from sklearn.svm import SVC\n", 24 | "from sklearn.metrics import roc_curve, auc\n", 25 | "import os\n", 26 | "import warnings" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 264, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "train=pd.read_csv('train.csv')\n", 36 | "test=pd.read_csv('test.csv')" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 60, 42 | "metadata": {}, 43 | "outputs": [ 44 | { 45 | "data": { 46 | "text/html": [ 47 | "
\n", 48 | "\n", 61 | "\n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | "
idtarget01234567...290291292293294295296297298299
count250.000000250.000000250.000000250.000000250.000000250.000000250.000000250.000000250.000000250.000000...250.000000250.000000250.000000250.000000250.000000250.000000250.000000250.000000250.000000250.000000
mean124.5000000.6400000.023292-0.0268720.1674040.0019040.001588-0.0073040.0320520.078412...0.0446520.1263440.018436-0.012092-0.065720-0.1061120.0464720.0064520.009372-0.128952
std72.3129770.4809630.9983541.0093141.0217091.0117511.0354110.9557001.0066570.939731...1.0114160.9725670.9542290.9606301.0574141.0383890.9676610.9989841.0080990.971219
min0.0000000.000000-2.319000-2.931000-2.477000-2.359000-2.566000-2.845000-2.976000-3.444000...-2.804000-2.443000-2.757000-2.466000-3.287000-3.072000-2.634000-2.776000-3.211000-3.500000
25%62.2500000.000000-0.644750-0.739750-0.425250-0.686500-0.659000-0.643750-0.675000-0.550750...-0.617000-0.510500-0.535750-0.657000-0.818500-0.821000-0.605500-0.751250-0.550000-0.754250
50%124.5000001.000000-0.0155000.0570000.184000-0.016500-0.0230000.0375000.0605000.183500...0.0675000.0910000.057500-0.021000-0.009000-0.0795000.0095000.005500-0.009000-0.132500
75%186.7500001.0000000.6770000.6207500.8050000.7200000.7350000.6605000.7832500.766250...0.7972500.8042500.6315000.6502500.7395000.4930000.6830000.7942500.6542500.503250
max249.0000001.0000002.5670002.4190003.3920002.7710002.9010002.7930002.5460002.846000...2.8650002.8010002.7360002.5960002.2260003.1310003.2360002.6260003.5300002.771000
\n", 283 | "

8 rows × 302 columns

\n", 284 | "
" 285 | ], 286 | "text/plain": [ 287 | " id target 0 1 2 3 \\\n", 288 | "count 250.000000 250.000000 250.000000 250.000000 250.000000 250.000000 \n", 289 | "mean 124.500000 0.640000 0.023292 -0.026872 0.167404 0.001904 \n", 290 | "std 72.312977 0.480963 0.998354 1.009314 1.021709 1.011751 \n", 291 | "min 0.000000 0.000000 -2.319000 -2.931000 -2.477000 -2.359000 \n", 292 | "25% 62.250000 0.000000 -0.644750 -0.739750 -0.425250 -0.686500 \n", 293 | "50% 124.500000 1.000000 -0.015500 0.057000 0.184000 -0.016500 \n", 294 | "75% 186.750000 1.000000 0.677000 0.620750 0.805000 0.720000 \n", 295 | "max 249.000000 1.000000 2.567000 2.419000 3.392000 2.771000 \n", 296 | "\n", 297 | " 4 5 6 7 ... 290 \\\n", 298 | "count 250.000000 250.000000 250.000000 250.000000 ... 250.000000 \n", 299 | "mean 0.001588 -0.007304 0.032052 0.078412 ... 0.044652 \n", 300 | "std 1.035411 0.955700 1.006657 0.939731 ... 1.011416 \n", 301 | "min -2.566000 -2.845000 -2.976000 -3.444000 ... -2.804000 \n", 302 | "25% -0.659000 -0.643750 -0.675000 -0.550750 ... -0.617000 \n", 303 | "50% -0.023000 0.037500 0.060500 0.183500 ... 0.067500 \n", 304 | "75% 0.735000 0.660500 0.783250 0.766250 ... 0.797250 \n", 305 | "max 2.901000 2.793000 2.546000 2.846000 ... 2.865000 \n", 306 | "\n", 307 | " 291 292 293 294 295 296 \\\n", 308 | "count 250.000000 250.000000 250.000000 250.000000 250.000000 250.000000 \n", 309 | "mean 0.126344 0.018436 -0.012092 -0.065720 -0.106112 0.046472 \n", 310 | "std 0.972567 0.954229 0.960630 1.057414 1.038389 0.967661 \n", 311 | "min -2.443000 -2.757000 -2.466000 -3.287000 -3.072000 -2.634000 \n", 312 | "25% -0.510500 -0.535750 -0.657000 -0.818500 -0.821000 -0.605500 \n", 313 | "50% 0.091000 0.057500 -0.021000 -0.009000 -0.079500 0.009500 \n", 314 | "75% 0.804250 0.631500 0.650250 0.739500 0.493000 0.683000 \n", 315 | "max 2.801000 2.736000 2.596000 2.226000 3.131000 3.236000 \n", 316 | "\n", 317 | " 297 298 299 \n", 318 | "count 250.000000 250.000000 250.000000 \n", 319 | "mean 0.006452 0.009372 -0.128952 \n", 320 | "std 0.998984 1.008099 0.971219 \n", 321 | "min -2.776000 -3.211000 -3.500000 \n", 322 | "25% -0.751250 -0.550000 -0.754250 \n", 323 | "50% 0.005500 -0.009000 -0.132500 \n", 324 | "75% 0.794250 0.654250 0.503250 \n", 325 | "max 2.626000 3.530000 2.771000 \n", 326 | "\n", 327 | "[8 rows x 302 columns]" 328 | ] 329 | }, 330 | "execution_count": 60, 331 | "metadata": {}, 332 | "output_type": "execute_result" 333 | } 334 | ], 335 | "source": [] 336 | }, 337 | { 338 | "cell_type": "code", 339 | "execution_count": 59, 340 | "metadata": {}, 341 | "outputs": [ 342 | { 343 | "data": { 344 | "text/html": [ 345 | "
\n", 346 | "\n", 359 | "\n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | "
id012345678...290291292293294295296297298299
02500.500-1.033-1.5950.309-0.7140.5020.535-0.129-0.687...-0.088-2.628-0.8452.078-0.2772.1320.609-0.1040.3120.979
12510.7760.914-0.4941.347-0.8670.4800.578-0.3130.203...-0.683-0.0660.0250.606-0.353-1.133-3.1380.281-0.625-0.761
22521.7500.509-0.0570.835-0.4761.428-0.701-2.009-1.378...-0.0940.351-0.607-0.737-0.0310.7010.9760.135-1.3272.463
3253-0.556-1.855-0.6820.5781.5920.512-1.4190.7220.511...-0.336-0.7870.255-0.031-0.8360.9162.4111.053-1.601-1.529
42540.754-0.2451.173-1.6230.0090.3700.781-1.763-1.432...2.184-1.0900.2161.186-0.1430.322-0.068-0.156-1.1530.825
\n", 509 | "

5 rows × 301 columns

\n", 510 | "
" 511 | ], 512 | "text/plain": [ 513 | " id 0 1 2 3 4 5 6 7 8 ... \\\n", 514 | "0 250 0.500 -1.033 -1.595 0.309 -0.714 0.502 0.535 -0.129 -0.687 ... \n", 515 | "1 251 0.776 0.914 -0.494 1.347 -0.867 0.480 0.578 -0.313 0.203 ... \n", 516 | "2 252 1.750 0.509 -0.057 0.835 -0.476 1.428 -0.701 -2.009 -1.378 ... \n", 517 | "3 253 -0.556 -1.855 -0.682 0.578 1.592 0.512 -1.419 0.722 0.511 ... \n", 518 | "4 254 0.754 -0.245 1.173 -1.623 0.009 0.370 0.781 -1.763 -1.432 ... \n", 519 | "\n", 520 | " 290 291 292 293 294 295 296 297 298 299 \n", 521 | "0 -0.088 -2.628 -0.845 2.078 -0.277 2.132 0.609 -0.104 0.312 0.979 \n", 522 | "1 -0.683 -0.066 0.025 0.606 -0.353 -1.133 -3.138 0.281 -0.625 -0.761 \n", 523 | "2 -0.094 0.351 -0.607 -0.737 -0.031 0.701 0.976 0.135 -1.327 2.463 \n", 524 | "3 -0.336 -0.787 0.255 -0.031 -0.836 0.916 2.411 1.053 -1.601 -1.529 \n", 525 | "4 2.184 -1.090 0.216 1.186 -0.143 0.322 -0.068 -0.156 -1.153 0.825 \n", 526 | "\n", 527 | "[5 rows x 301 columns]" 528 | ] 529 | }, 530 | "execution_count": 59, 531 | "metadata": {}, 532 | "output_type": "execute_result" 533 | } 534 | ], 535 | "source": [ 536 | "test.head()" 537 | ] 538 | }, 539 | { 540 | "cell_type": "code", 541 | "execution_count": 172, 542 | "metadata": {}, 543 | "outputs": [ 544 | { 545 | "data": { 546 | "text/plain": [ 547 | "array([0.7 , 0.72, 0.68, 0.8 , 0.74])" 548 | ] 549 | }, 550 | "execution_count": 172, 551 | "metadata": {}, 552 | "output_type": "execute_result" 553 | } 554 | ], 555 | "source": [ 556 | "from sklearn.linear_model import LogisticRegression\n", 557 | "\n", 558 | "X=train.drop(['id','target'],axis=1)\n", 559 | "y=train.target\n", 560 | "\n", 561 | "clf2 = LogisticRegression(class_weight='balanced', solver='liblinear', penalty ='l1',C=0.8).fit(X, y)\n", 562 | "scores = cross_val_score(clf2, X, y, cv=5)\n", 563 | "scores" 564 | ] 565 | }, 566 | { 567 | "cell_type": "code", 568 | "execution_count": 191, 569 | "metadata": {}, 570 | "outputs": [ 571 | { 572 | "data": { 573 | "text/plain": [ 574 | "array([0.72, 0.7 , 0.64, 0.76, 0.64])" 575 | ] 576 | }, 577 | "execution_count": 191, 578 | "metadata": {}, 579 | "output_type": "execute_result" 580 | } 581 | ], 582 | "source": [ 583 | "from sklearn.linear_model import SGDClassifier\n", 584 | "\n", 585 | "X=train.drop(['id','target'],axis=1)\n", 586 | "y=train.target\n", 587 | "\n", 588 | "clf3 = SGDClassifier(alpha=0.008, average=False, class_weight=None,\n", 589 | " early_stopping=False, epsilon=0.1, eta0=0.0, fit_intercept=True,\n", 590 | " l1_ratio=0.15, learning_rate='optimal', loss='log', max_iter=1000,\n", 591 | " n_iter=None, n_iter_no_change=5, n_jobs=None, penalty='l2',\n", 592 | " power_t=0.5, random_state=None, shuffle=True, tol=0.001,\n", 593 | " validation_fraction=0.1, verbose=0, warm_start=False).fit(X, y)\n", 594 | "scores = cross_val_score(clf3, X, y, cv=5)\n", 595 | "scores" 596 | ] 597 | }, 598 | { 599 | "cell_type": "code", 600 | "execution_count": 210, 601 | "metadata": {}, 602 | "outputs": [ 603 | { 604 | "data": { 605 | "text/plain": [ 606 | "array([0.6 , 0.68, 0.7 , 0.7 , 0.7 ])" 607 | ] 608 | }, 609 | "execution_count": 210, 610 | "metadata": {}, 611 | "output_type": "execute_result" 612 | } 613 | ], 614 | "source": [ 615 | "from sklearn.ensemble import GradientBoostingClassifier\n", 616 | "\n", 617 | "X=train.drop(['id','target'],axis=1)\n", 618 | "y=train.target\n", 619 | "\n", 620 | "clf4 = GradientBoostingClassifier(loss='deviance', learning_rate=0.007, n_estimators=200,min_samples_split=2,\n", 621 | " validation_fraction=0.1,tol=0.0001).fit(X, y)\n", 622 | "scores = cross_val_score(clf4, X, y, cv=5)\n", 623 | "scores\n" 624 | ] 625 | }, 626 | { 627 | "cell_type": "code", 628 | "execution_count": 174, 629 | "metadata": {}, 630 | "outputs": [ 631 | { 632 | "name": "stdout", 633 | "output_type": "stream", 634 | "text": [ 635 | " precision recall f1-score support\n", 636 | "\n", 637 | " 0.0 1.00 1.00 1.00 90\n", 638 | " 1.0 1.00 1.00 1.00 160\n", 639 | "\n", 640 | " micro avg 1.00 1.00 1.00 250\n", 641 | " macro avg 1.00 1.00 1.00 250\n", 642 | "weighted avg 1.00 1.00 1.00 250\n", 643 | "\n" 644 | ] 645 | } 646 | ], 647 | "source": [ 648 | "test_predict= clf3.predict(X)\n", 649 | "\n", 650 | "\n", 651 | "from sklearn.metrics import classification_report\n", 652 | "\n", 653 | "print(classification_report(y,test_predict))" 654 | ] 655 | }, 656 | { 657 | "cell_type": "code", 658 | "execution_count": 211, 659 | "metadata": {}, 660 | "outputs": [], 661 | "source": [ 662 | "test_predict= 0.5*clf2.predict_proba(test.drop('id',axis=1))[:,1] + 0.2*clf3.predict_proba(test.drop('id',axis=1))[:,1] +0.3*clf4.predict_proba(test.drop('id',axis=1))[:,1]" 663 | ] 664 | }, 665 | { 666 | "cell_type": "code", 667 | "execution_count": 216, 668 | "metadata": {}, 669 | "outputs": [], 670 | "source": [ 671 | "test_predict=clf3.predict_proba(test.drop('id',axis=1))[:,1]" 672 | ] 673 | }, 674 | { 675 | "cell_type": "code", 676 | "execution_count": 217, 677 | "metadata": {}, 678 | "outputs": [], 679 | "source": [ 680 | "ss=pd.read_csv('test.csv')\n", 681 | "ids = ss['id']\n", 682 | "\n", 683 | "submission_file = open(\"overfit.csv\", \"w\")\n", 684 | "\n", 685 | "import csv as csv\n", 686 | "\n", 687 | "open_file_object = csv.writer(submission_file)\n", 688 | "\n", 689 | "# Write the header of the csv\n", 690 | "open_file_object.writerow([\"id\",\"target\"])\n", 691 | "\n", 692 | "# Write the rows of the csv\n", 693 | "open_file_object.writerows(zip(ids, test_predict))\n", 694 | "\n", 695 | "# Close the file\n", 696 | "submission_file.close()" 697 | ] 698 | }, 699 | { 700 | "cell_type": "code", 701 | "execution_count": 237, 702 | "metadata": {}, 703 | "outputs": [], 704 | "source": [ 705 | "a=[]\n", 706 | "for i in train.drop(['id','target'],axis=1).columns:\n", 707 | " \n", 708 | " a.append(train[i].corr(train['target']))\n", 709 | " \n" 710 | ] 711 | }, 712 | { 713 | "cell_type": "code", 714 | "execution_count": 243, 715 | "metadata": {}, 716 | "outputs": [], 717 | "source": [ 718 | "d={'col1':a}\n", 719 | "d=pd.DataFrame(data=d)\n" 720 | ] 721 | }, 722 | { 723 | "cell_type": "code", 724 | "execution_count": 249, 725 | "metadata": {}, 726 | "outputs": [ 727 | { 728 | "data": { 729 | "text/plain": [ 730 | "Int64Index([ 4, 16, 39, 43, 63, 73, 80, 82, 90, 91, 98, 108, 117,\n", 731 | " 127, 129, 133, 134, 150, 165, 189, 194, 209, 217, 220, 230, 237,\n", 732 | " 239, 252, 258, 276, 295, 298],\n", 733 | " dtype='int64')" 734 | ] 735 | }, 736 | "execution_count": 249, 737 | "metadata": {}, 738 | "output_type": "execute_result" 739 | } 740 | ], 741 | "source": [ 742 | "d[d.values<-0.1].index" 743 | ] 744 | }, 745 | { 746 | "cell_type": "code", 747 | "execution_count": 265, 748 | "metadata": {}, 749 | "outputs": [], 750 | "source": [ 751 | "X=train.drop(['4', '16', '39', '43', '63', '73', '80', '82', '90', '91', '98', '108', '117',\n", 752 | " '127', '129', '133', '134', '150', '165', '189', '194', '209', '217', '220', '230', '237',\n", 753 | " '239', '252', '258', '276', '295', '298'],axis=1)\n", 754 | "\n", 755 | "N_test=test.drop(['4', '16', '39', '43', '63', '73', '80', '82', '90', '91', '98', '108', '117',\n", 756 | " '127', '129', '133', '134', '150', '165', '189', '194', '209', '217', '220', '230', '237',\n", 757 | " '239', '252', '258', '276', '295', '298'],axis=1)\n", 758 | "y=X.target" 759 | ] 760 | }, 761 | { 762 | "cell_type": "code", 763 | "execution_count": 307, 764 | "metadata": {}, 765 | "outputs": [ 766 | { 767 | "data": { 768 | "text/plain": [ 769 | "array([0.68, 0.66, 0.66, 0.6 , 0.6 ])" 770 | ] 771 | }, 772 | "execution_count": 307, 773 | "metadata": {}, 774 | "output_type": "execute_result" 775 | } 776 | ], 777 | "source": [ 778 | "from sklearn.linear_model import LogisticRegression\n", 779 | "\n", 780 | "\n", 781 | "#X=X.drop(['id','target'],axis=1)\n", 782 | "\n", 783 | "\n", 784 | "clf2 = LogisticRegression(class_weight='balanced', solver='liblinear', penalty ='l1',C=0.122).fit(X, y)\n", 785 | "scores = cross_val_score(clf2, X, y, cv=5)\n", 786 | "scores" 787 | ] 788 | }, 789 | { 790 | "cell_type": "code", 791 | "execution_count": 276, 792 | "metadata": {}, 793 | "outputs": [ 794 | { 795 | "data": { 796 | "text/plain": [ 797 | "array([0.64, 0.66, 0.68, 0.72, 0.7 ])" 798 | ] 799 | }, 800 | "execution_count": 276, 801 | "metadata": {}, 802 | "output_type": "execute_result" 803 | } 804 | ], 805 | "source": [ 806 | "from sklearn.ensemble import GradientBoostingClassifier\n", 807 | "\n", 808 | "\n", 809 | "\n", 810 | "clf4 = GradientBoostingClassifier(loss='deviance', learning_rate=0.0042, n_estimators=200,min_samples_split=2,\n", 811 | " validation_fraction=0.1,tol=0.0001).fit(X, y)\n", 812 | "scores = cross_val_score(clf4, X, y, cv=5)\n", 813 | "scores" 814 | ] 815 | }, 816 | { 817 | "cell_type": "code", 818 | "execution_count": 295, 819 | "metadata": {}, 820 | "outputs": [ 821 | { 822 | "data": { 823 | "text/plain": [ 824 | "array([0.62, 0.6 , 0.58, 0.72, 0.6 ])" 825 | ] 826 | }, 827 | "execution_count": 295, 828 | "metadata": {}, 829 | "output_type": "execute_result" 830 | } 831 | ], 832 | "source": [ 833 | "from sklearn.linear_model import SGDClassifier\n", 834 | "\n", 835 | "\n", 836 | "\n", 837 | "clf3 = SGDClassifier(alpha=0.0092, average=False, class_weight=None,\n", 838 | " early_stopping=False, epsilon=0.1, eta0=0.0, fit_intercept=True,\n", 839 | " l1_ratio=0.15, learning_rate='optimal', loss='log', max_iter=1000,\n", 840 | " n_iter=None, n_iter_no_change=5, n_jobs=None, penalty='l2',\n", 841 | " power_t=0.5, random_state=None, shuffle=True, tol=0.001,\n", 842 | " validation_fraction=0.1, verbose=0, warm_start=False).fit(X, y)\n", 843 | "scores = cross_val_score(clf3, X, y, cv=5)\n", 844 | "scores" 845 | ] 846 | }, 847 | { 848 | "cell_type": "code", 849 | "execution_count": 299, 850 | "metadata": {}, 851 | "outputs": [], 852 | "source": [ 853 | "test_predict= clf2.predict_proba(N_test.drop('id',axis=1))[:,1] " 854 | ] 855 | }, 856 | { 857 | "cell_type": "code", 858 | "execution_count": 300, 859 | "metadata": {}, 860 | "outputs": [], 861 | "source": [ 862 | "ss=pd.read_csv('test.csv')\n", 863 | "ids = ss['id']\n", 864 | "\n", 865 | "submission_file = open(\"overfit.csv\", \"w\")\n", 866 | "\n", 867 | "import csv as csv\n", 868 | "\n", 869 | "open_file_object = csv.writer(submission_file)\n", 870 | "\n", 871 | "# Write the header of the csv\n", 872 | "open_file_object.writerow([\"id\",\"target\"])\n", 873 | "\n", 874 | "# Write the rows of the csv\n", 875 | "open_file_object.writerows(zip(ids, test_predict))\n", 876 | "\n", 877 | "# Close the file\n", 878 | "submission_file.close()" 879 | ] 880 | }, 881 | { 882 | "cell_type": "code", 883 | "execution_count": null, 884 | "metadata": {}, 885 | "outputs": [], 886 | "source": [] 887 | } 888 | ], 889 | "metadata": { 890 | "kernelspec": { 891 | "display_name": "Python 3", 892 | "language": "python", 893 | "name": "python3" 894 | }, 895 | "language_info": { 896 | "codemirror_mode": { 897 | "name": "ipython", 898 | "version": 3 899 | }, 900 | "file_extension": ".py", 901 | "mimetype": "text/x-python", 902 | "name": "python", 903 | "nbconvert_exporter": "python", 904 | "pygments_lexer": "ipython3", 905 | "version": "3.7.1" 906 | } 907 | }, 908 | "nbformat": 4, 909 | "nbformat_minor": 2 910 | } 911 | --------------------------------------------------------------------------------