├── ML.png
├── Reducing Traffic Mortality in the USA
└── datasets
│ ├── miles-driven.csv
│ └── road-accidents.csv
├── Future Sales
├── shops.csv
├── item_categories.csv
└── Future Sales.ipynb
├── Deeplearning.ipynb
├── README.md
├── Future Sales 1.1.ipynb
├── Give Life_ Predict Blood Donations
├── datasets
│ └── transfusion.data
└── notebook.ipynb
├── Predicting Car Prices
└── imports-85.data
├── Predicting Bike Rentals
└── Basics.ipynb
├── Prediciting Titanic Survival
└── test.csv
└── Overfit.ipynb
/ML.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ammarshaikh123/Projects-on-Machine-Learning/master/ML.png
--------------------------------------------------------------------------------
/Reducing Traffic Mortality in the USA/datasets/miles-driven.csv:
--------------------------------------------------------------------------------
1 | state|million_miles_annually
2 | Alabama|64914
3 | Alaska|4593
4 | Arizona|59575
5 | Arkansas|32953
6 | California|320784
7 | Colorado|46606
8 | Connecticut|31197
9 | Delaware|9028
10 | District of Columbia|3568
11 | Florida|191855
12 | Georgia|108454
13 | Hawaii|10066
14 | Idaho|15937
15 | Illinois|103234
16 | Indiana|76485
17 | Iowa|31274
18 | Kansas|30021
19 | Kentucky|48061
20 | Louisiana|46513
21 | Maine|14248
22 | Maryland|56221
23 | Massachusetts|54792
24 | Michigan|94754
25 | Minnesota|56685
26 | Mississippi|38851
27 | Missouri|68789
28 | Montana|11660
29 | Nebraska|19093
30 | Nevada|24189
31 | New Hampshire|12720
32 | New Jersey|73094
33 | New Mexico|25650
34 | New York|127726
35 | North Carolina|103772
36 | North Dakota|9131
37 | Ohio|111990
38 | Oklahoma|47464
39 | Oregon|33373
40 | Pennsylvania|99204
41 | Rhode Island|7901
42 | South Carolina|48730
43 | South Dakota|9002
44 | Tennessee|70751
45 | Texas|237440
46 | Utah|26222
47 | Vermont|7141
48 | Virginia|80974
49 | Washington|56955
50 | West Virginia|18963
51 | Wisconsin|58554
52 | Wyoming|9245
53 |
--------------------------------------------------------------------------------
/Future Sales/shops.csv:
--------------------------------------------------------------------------------
1 | shop_name,shop_id
2 | "!Якутск Орджоникидзе, 56 фран",0
3 | "!Якутск ТЦ ""Центральный"" фран",1
4 | "Адыгея ТЦ ""Мега""",2
5 | "Балашиха ТРК ""Октябрь-Киномир""",3
6 | "Волжский ТЦ ""Волга Молл""",4
7 | "Вологда ТРЦ ""Мармелад""",5
8 | "Воронеж (Плехановская, 13)",6
9 | "Воронеж ТРЦ ""Максимир""",7
10 | "Воронеж ТРЦ Сити-Парк ""Град""",8
11 | Выездная Торговля,9
12 | Жуковский ул. Чкалова 39м?,10
13 | Жуковский ул. Чкалова 39м²,11
14 | Интернет-магазин ЧС,12
15 | "Казань ТЦ ""Бехетле""",13
16 | "Казань ТЦ ""ПаркХаус"" II",14
17 | "Калуга ТРЦ ""XXI век""",15
18 | "Коломна ТЦ ""Рио""",16
19 | "Красноярск ТЦ ""Взлетка Плаза""",17
20 | "Красноярск ТЦ ""Июнь""",18
21 | "Курск ТЦ ""Пушкинский""",19
22 | "Москва ""Распродажа""",20
23 | "Москва МТРЦ ""Афи Молл""",21
24 | Москва Магазин С21,22
25 | "Москва ТК ""Буденовский"" (пав.А2)",23
26 | "Москва ТК ""Буденовский"" (пав.К7)",24
27 | "Москва ТРК ""Атриум""",25
28 | "Москва ТЦ ""Ареал"" (Беляево)",26
29 | "Москва ТЦ ""МЕГА Белая Дача II""",27
30 | "Москва ТЦ ""МЕГА Теплый Стан"" II",28
31 | "Москва ТЦ ""Новый век"" (Новокосино)",29
32 | "Москва ТЦ ""Перловский""",30
33 | "Москва ТЦ ""Семеновский""",31
34 | "Москва ТЦ ""Серебряный Дом""",32
35 | "Мытищи ТРК ""XL-3""",33
36 | "Н.Новгород ТРЦ ""РИО""",34
37 | "Н.Новгород ТРЦ ""Фантастика""",35
38 | "Новосибирск ТРЦ ""Галерея Новосибирск""",36
39 | "Новосибирск ТЦ ""Мега""",37
40 | "Омск ТЦ ""Мега""",38
41 | "РостовНаДону ТРК ""Мегацентр Горизонт""",39
42 | "РостовНаДону ТРК ""Мегацентр Горизонт"" Островной",40
43 | "РостовНаДону ТЦ ""Мега""",41
44 | "СПб ТК ""Невский Центр""",42
45 | "СПб ТК ""Сенная""",43
46 | "Самара ТЦ ""Мелодия""",44
47 | "Самара ТЦ ""ПаркХаус""",45
48 | "Сергиев Посад ТЦ ""7Я""",46
49 | "Сургут ТРЦ ""Сити Молл""",47
50 | "Томск ТРЦ ""Изумрудный Город""",48
51 | "Тюмень ТРЦ ""Кристалл""",49
52 | "Тюмень ТЦ ""Гудвин""",50
53 | "Тюмень ТЦ ""Зеленый Берег""",51
54 | "Уфа ТК ""Центральный""",52
55 | "Уфа ТЦ ""Семья"" 2",53
56 | "Химки ТЦ ""Мега""",54
57 | Цифровой склад 1С-Онлайн,55
58 | "Чехов ТРЦ ""Карнавал""",56
59 | "Якутск Орджоникидзе, 56",57
60 | "Якутск ТЦ ""Центральный""",58
61 | "Ярославль ТЦ ""Альтаир""",59
62 |
--------------------------------------------------------------------------------
/Reducing Traffic Mortality in the USA/datasets/road-accidents.csv:
--------------------------------------------------------------------------------
1 | ##### LICENSE #####
2 | # This data set is modified from the original at fivethirtyeight (https://github.com/fivethirtyeight/data/tree/master/bad-drivers)
3 | # and it is released under CC BY 4.0 (https://creativecommons.org/licenses/by/4.0/)
4 | ##### COLUMN ABBREVIATIONS #####
5 | # drvr_fatl_col_bmiles = Number of drivers involved in fatal collisions per billion miles (2011)
6 | # perc_fatl_speed = Percentage Of Drivers Involved In Fatal Collisions Who Were Speeding (2009)
7 | # perc_fatl_alcohol = Percentage Of Drivers Involved In Fatal Collisions Who Were Alcohol-Impaired (2011)
8 | # perc_fatl_1st_time = Percentage Of Drivers Involved In Fatal Collisions Who Had Not Been Involved In Any Previous Accidents (2011)
9 | ##### DATA BEGIN #####
10 | state|drvr_fatl_col_bmiles|perc_fatl_speed|perc_fatl_alcohol|perc_fatl_1st_time
11 | Alabama|18.8|39|30|80
12 | Alaska|18.1|41|25|94
13 | Arizona|18.6|35|28|96
14 | Arkansas|22.4|18|26|95
15 | California|12|35|28|89
16 | Colorado|13.6|37|28|95
17 | Connecticut|10.8|46|36|82
18 | Delaware|16.2|38|30|99
19 | District of Columbia|5.9|34|27|100
20 | Florida|17.9|21|29|94
21 | Georgia|15.6|19|25|93
22 | Hawaii|17.5|54|41|87
23 | Idaho|15.3|36|29|98
24 | Illinois|12.8|36|34|96
25 | Indiana|14.5|25|29|95
26 | Iowa|15.7|17|25|87
27 | Kansas|17.8|27|24|85
28 | Kentucky|21.4|19|23|76
29 | Louisiana|20.5|35|33|98
30 | Maine|15.1|38|30|84
31 | Maryland|12.5|34|32|99
32 | Massachusetts|8.2|23|35|80
33 | Michigan|14.1|24|28|77
34 | Minnesota|9.6|23|29|88
35 | Mississippi|17.6|15|31|100
36 | Missouri|16.1|43|34|84
37 | Montana|21.4|39|44|85
38 | Nebraska|14.9|13|35|90
39 | Nevada|14.7|37|32|99
40 | New Hampshire|11.6|35|30|83
41 | New Jersey|11.2|16|28|78
42 | New Mexico|18.4|19|27|98
43 | New York|12.3|32|29|80
44 | North Carolina|16.8|39|31|81
45 | North Dakota|23.9|23|42|86
46 | Ohio|14.1|28|34|82
47 | Oklahoma|19.9|32|29|94
48 | Oregon|12.8|33|26|90
49 | Pennsylvania|18.2|50|31|88
50 | Rhode Island|11.1|34|38|79
51 | South Carolina|23.9|38|41|81
52 | South Dakota|19.4|31|33|86
53 | Tennessee|19.5|21|29|81
54 | Texas|19.4|40|38|87
55 | Utah|11.3|43|16|96
56 | Vermont|13.6|30|30|95
57 | Virginia|12.7|19|27|88
58 | Washington|10.6|42|33|86
59 | West Virginia|23.8|34|28|87
60 | Wisconsin|13.8|36|33|84
61 | Wyoming|17.4|42|32|90
62 |
--------------------------------------------------------------------------------
/Future Sales/item_categories.csv:
--------------------------------------------------------------------------------
1 | item_category_name,item_category_id
2 | PC - Гарнитуры/Наушники,0
3 | Аксессуары - PS2,1
4 | Аксессуары - PS3,2
5 | Аксессуары - PS4,3
6 | Аксессуары - PSP,4
7 | Аксессуары - PSVita,5
8 | Аксессуары - XBOX 360,6
9 | Аксессуары - XBOX ONE,7
10 | Билеты (Цифра),8
11 | Доставка товара,9
12 | Игровые консоли - PS2,10
13 | Игровые консоли - PS3,11
14 | Игровые консоли - PS4,12
15 | Игровые консоли - PSP,13
16 | Игровые консоли - PSVita,14
17 | Игровые консоли - XBOX 360,15
18 | Игровые консоли - XBOX ONE,16
19 | Игровые консоли - Прочие,17
20 | Игры - PS2,18
21 | Игры - PS3,19
22 | Игры - PS4,20
23 | Игры - PSP,21
24 | Игры - PSVita,22
25 | Игры - XBOX 360,23
26 | Игры - XBOX ONE,24
27 | Игры - Аксессуары для игр,25
28 | Игры Android - Цифра,26
29 | Игры MAC - Цифра,27
30 | Игры PC - Дополнительные издания,28
31 | Игры PC - Коллекционные издания,29
32 | Игры PC - Стандартные издания,30
33 | Игры PC - Цифра,31
34 | "Карты оплаты (Кино, Музыка, Игры)",32
35 | Карты оплаты - Live!,33
36 | Карты оплаты - Live! (Цифра),34
37 | Карты оплаты - PSN,35
38 | Карты оплаты - Windows (Цифра),36
39 | Кино - Blu-Ray,37
40 | Кино - Blu-Ray 3D,38
41 | Кино - Blu-Ray 4K,39
42 | Кино - DVD,40
43 | Кино - Коллекционное,41
44 | "Книги - Артбуки, энциклопедии",42
45 | Книги - Аудиокниги,43
46 | Книги - Аудиокниги (Цифра),44
47 | Книги - Аудиокниги 1С,45
48 | Книги - Бизнес литература,46
49 | "Книги - Комиксы, манга",47
50 | Книги - Компьютерная литература,48
51 | Книги - Методические материалы 1С,49
52 | Книги - Открытки,50
53 | Книги - Познавательная литература,51
54 | Книги - Путеводители,52
55 | Книги - Художественная литература,53
56 | Книги - Цифра,54
57 | Музыка - CD локального производства,55
58 | Музыка - CD фирменного производства,56
59 | Музыка - MP3,57
60 | Музыка - Винил,58
61 | Музыка - Музыкальное видео,59
62 | Музыка - Подарочные издания,60
63 | Подарки - Атрибутика,61
64 | "Подарки - Гаджеты, роботы, спорт",62
65 | Подарки - Мягкие игрушки,63
66 | Подарки - Настольные игры,64
67 | Подарки - Настольные игры (компактные),65
68 | "Подарки - Открытки, наклейки",66
69 | Подарки - Развитие,67
70 | "Подарки - Сертификаты, услуги",68
71 | Подарки - Сувениры,69
72 | Подарки - Сувениры (в навеску),70
73 | "Подарки - Сумки, Альбомы, Коврики д/мыши",71
74 | Подарки - Фигурки,72
75 | Программы - 1С:Предприятие 8,73
76 | Программы - MAC (Цифра),74
77 | Программы - Для дома и офиса,75
78 | Программы - Для дома и офиса (Цифра),76
79 | Программы - Обучающие,77
80 | Программы - Обучающие (Цифра),78
81 | Служебные,79
82 | Служебные - Билеты,80
83 | Чистые носители (шпиль),81
84 | Чистые носители (штучные),82
85 | Элементы питания,83
86 |
--------------------------------------------------------------------------------
/Deeplearning.ipynb:
--------------------------------------------------------------------------------
1 | {"cells":[{"metadata":{"_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","trusted":true},"cell_type":"code","source":"# This Python 3 environment comes with many helpful analytics libraries installed\n# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python\n# For example, here's several helpful packages to load in \n\nimport numpy as np # linear algebra\nimport pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n\n# Input data files are available in the \"../input/\" directory.\n# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory\n\nimport os\nprint(os.listdir(\"../input\"))\n\n# Any results you write to the current directory are saved as output.","execution_count":null,"outputs":[]},{"metadata":{"_cell_guid":"79c7e3d0-c299-4dcb-8224-4455121ee9b0","_uuid":"d629ff2d2480ee46fbb7e2d37f6b5fab8052498a","trusted":true},"cell_type":"code","source":"%reload_ext autoreload\n%autoreload 2\n%matplotlib inline\n\nfrom fastai import *\nfrom fastai.vision import *","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"bs = 64","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"train_df = pd.read_csv(\"../input/train.csv\")\ntest_df = pd.read_csv(\"../input/sample_submission.csv\")\n\ntest_img = ImageList.from_df(test_df, path='../input/test', folder='test')\ntrain_img = (ImageList.from_df(train_df, path='../input/train', folder='train')\n .random_split_by_pct(0.2)\n .label_from_df()\n .add_test(test_img)\n .transform(get_transforms(flip_vert=True), size=128)\n .databunch(path='.', bs=64)\n .normalize(imagenet_stats)\n )","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"train_img.show_batch(rows=3, figsize=(3,4))","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"learn = create_cnn(train_img,models.resnet34, metrics=[error_rate, accuracy])","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"learn.unfreeze()\nlearn.lr_find()\nlearn.recorder.plot()","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"learn.fit(epochs=5,lr=2e-6)","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"preds,_ = learn.get_preds(ds_type=DatasetType.Test)\npreds.shape","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"classes = preds.argmax(1)\n\ntest_df.has_cactus = classes\ntest_df.head()","execution_count":null,"outputs":[]},{"metadata":{"trusted":true},"cell_type":"code","source":"test_df.to_csv('submission_new.csv', index=False)","execution_count":null,"outputs":[]}],"metadata":{"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"name":"python","version":"3.6.4","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"}},"nbformat":4,"nbformat_minor":1}
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Machine-Learning
2 |
3 | This repo contains my projects on Machine Learning, I have covered diverse topics under each project, You can see the description for each project below.
4 |
5 | 1)[Classify Song Genres from Audio Data](https://github.com/ammarshaikh123/Projects-on-Machine-Learning/tree/master/Classify%20Song%20Genres%20from%20Audio%20Data)
6 |
7 | This project is based on recommending new music to the users by analysing various components. I have used PCA in this project for dimensionality reduction. For modelling I have tried different models like Decision Tree and Logistic Regression. I have further used Corss validation to evaluate the models performance.
8 |
9 | 2)[Give Life_ Predict Blood Donations](https://github.com/ammarshaikh123/Projects-on-Machine-Learning/tree/master/Give%20Life_%20Predict%20Blood%20Donations)
10 |
11 | This project is based on binnary classification where I predict if a donor who has donated blood in previous 6 months will dontate blood again or not. It was fun as I got to implement pipelines in this project which was something new for me.
12 |
13 | 3)[Prediciting Titanic Survival](https://github.com/ammarshaikh123/Projects-on-Machine-Learning/tree/master/Prediciting%20Titanic%20Survival)
14 |
15 |
16 | In this project I worked on a Kaggle data set (Titanic Data Set). Based on all the features available I tried predicting whether a passanger will survive or not. I was able to land in top 14% of the competetion. I have used XGBoost along with hyper parameter tuning to get my results.
17 |
18 | 4)[Predicting Bike Rentals](https://github.com/ammarshaikh123/Projects-on-Machine-Learning/tree/master/Predicting%20Bike%20Rentals)
19 |
20 | Many American cities have communal bike sharing stations where you can rent bicycles by the hour or day. Washington, D.C. is one of these cities. In this project, I tried to predict the total number of bikes people rented in a given hour by using variables like monthly rental, weekly rental, temperature, humidity etc. We used Linear Regression as well as Decesion trees and compared which model provided us better results.
21 |
22 |
23 | 5)[Predicting Car Prices](https://github.com/ammarshaikh123/Projects-on-Machine-Learning/tree/master/Predicting%20Car%20Prices)
24 |
25 | In this project I tried predicting price of car based on training my model with attributes like fuel-type,engine-type,compression-rate,horsepower etc. I have used KNeighbors as my model to predict the car price.
26 |
27 | 6)[Predicting Credit Card Approvals](https://github.com/ammarshaikh123/Projects-on-Machine-Learning/tree/master/Predicting%20Credit%20Card%20Approvals)
28 |
29 | I worked on this project to get a feel of fraud detection analysis. From the given dataset I tried to predict whether a loan should be approved or not based on various features which were available. I used logistic regression as my model and I also used hyper parameter tuning to enhance the performance of my model.
30 |
31 | 7)[Predicting board game reviews](https://github.com/ammarshaikh123/Projects-on-Machine-Learning/tree/master/Predicting%20board%20game%20reviews)
32 |
33 | In this project I worked with a data set that contains 80000 board games and their associated review scores. We used various parameters of review and tried to predict the average rating of a board game. We used co-relation to find out relavant variables for my analysis and Linear regression for modelling.
34 |
35 | 8)[Reducing Traffic Mortality in the USA](https://github.com/ammarshaikh123/Projects-on-Machine-Learning/tree/master/Reducing%20Traffic%20Mortality%20in%20the%20USA)
36 |
37 | This project was based on the increasing rate of road accidents. I used unsupervised learning for training my model and PCA for dimensionality reduction. I also tried to display a concept called as masking by using multivariate regression.
38 |
39 | 9)[Predicting TMDB Box Office Collections ](https://github.com/ammarshaikh123/Projects-on-Machine-Learning/blob/master/IMDB.ipynb)
40 |
41 | In this project, I tried to predict box office collections of movies based on various features provided in the data set. It was a very intersting project to work on as the data format was something new to me and I got to learn and explore new dimensions of Data analytics.
42 |
43 | 10)[Don't Overfit ](https://github.com/ammarshaikh123/Projects-on-Machine-Learning/blob/master/Overfit.ipynb)
44 |
45 | This is a kaggle competition I worked on which had only 250 rows in train set and around 1000+ rows in test. The task was to predict the test set without overfitting the training set. I have used various models like xgboost, linear regression and later combined them inorder to achieve better performance.
46 |
47 | ### Few snapshots of visualization I have performed in the mentioned projects.
48 |
49 | 
50 |
--------------------------------------------------------------------------------
/Future Sales 1.1.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import numpy as np\n",
10 | "import pandas as pd\n",
11 | "import matplotlib.pyplot as plt\n",
12 | "import seaborn as sns\n",
13 | "\n",
14 | "from datetime import datetime, date\n",
15 | "\n",
16 | "\n",
17 | "from sklearn.preprocessing import StandardScaler\n",
18 | "\n"
19 | ]
20 | },
21 | {
22 | "cell_type": "code",
23 | "execution_count": 2,
24 | "metadata": {},
25 | "outputs": [],
26 | "source": [
27 | "%matplotlib inline\n",
28 | "\n",
29 | "test = pd.read_csv('test.csv')\n",
30 | "item_categories = pd.read_csv('item_categories.csv')\n",
31 | "shops = pd.read_csv('shops.csv')\n",
32 | "train = pd.read_csv('sales_train.csv', parse_dates=['date'])"
33 | ]
34 | },
35 | {
36 | "cell_type": "code",
37 | "execution_count": 3,
38 | "metadata": {},
39 | "outputs": [],
40 | "source": [
41 | "items=pd.read_csv('items.csv')"
42 | ]
43 | },
44 | {
45 | "cell_type": "code",
46 | "execution_count": 4,
47 | "metadata": {},
48 | "outputs": [],
49 | "source": [
50 | "import os\n",
51 | "\n",
52 | "os.makedirs('tmp', exist_ok=True)\n",
53 | "train.to_feather('train-raw')\n",
54 | "test.to_feather('test-raw')\n",
55 | "items.to_feather('items-raw')\n"
56 | ]
57 | },
58 | {
59 | "cell_type": "code",
60 | "execution_count": 5,
61 | "metadata": {},
62 | "outputs": [
63 | {
64 | "data": {
65 | "text/html": [
66 | "
\n",
67 | "\n",
80 | "
\n",
81 | " \n",
82 | " \n",
83 | " | \n",
84 | " date | \n",
85 | " date_block_num | \n",
86 | " shop_id | \n",
87 | " item_id | \n",
88 | " item_price | \n",
89 | " item_cnt_day | \n",
90 | "
\n",
91 | " \n",
92 | " \n",
93 | " \n",
94 | " | 0 | \n",
95 | " 2013-02-01 | \n",
96 | " 0 | \n",
97 | " 59 | \n",
98 | " 22154 | \n",
99 | " 999.00 | \n",
100 | " 1.0 | \n",
101 | "
\n",
102 | " \n",
103 | " | 1 | \n",
104 | " 2013-03-01 | \n",
105 | " 0 | \n",
106 | " 25 | \n",
107 | " 2552 | \n",
108 | " 899.00 | \n",
109 | " 1.0 | \n",
110 | "
\n",
111 | " \n",
112 | " | 2 | \n",
113 | " 2013-05-01 | \n",
114 | " 0 | \n",
115 | " 25 | \n",
116 | " 2552 | \n",
117 | " 899.00 | \n",
118 | " -1.0 | \n",
119 | "
\n",
120 | " \n",
121 | " | 3 | \n",
122 | " 2013-06-01 | \n",
123 | " 0 | \n",
124 | " 25 | \n",
125 | " 2554 | \n",
126 | " 1709.05 | \n",
127 | " 1.0 | \n",
128 | "
\n",
129 | " \n",
130 | " | 4 | \n",
131 | " 2013-01-15 | \n",
132 | " 0 | \n",
133 | " 25 | \n",
134 | " 2555 | \n",
135 | " 1099.00 | \n",
136 | " 1.0 | \n",
137 | "
\n",
138 | " \n",
139 | "
\n",
140 | "
"
141 | ],
142 | "text/plain": [
143 | " date date_block_num shop_id item_id item_price item_cnt_day\n",
144 | "0 2013-02-01 0 59 22154 999.00 1.0\n",
145 | "1 2013-03-01 0 25 2552 899.00 1.0\n",
146 | "2 2013-05-01 0 25 2552 899.00 -1.0\n",
147 | "3 2013-06-01 0 25 2554 1709.05 1.0\n",
148 | "4 2013-01-15 0 25 2555 1099.00 1.0"
149 | ]
150 | },
151 | "execution_count": 5,
152 | "metadata": {},
153 | "output_type": "execute_result"
154 | }
155 | ],
156 | "source": [
157 | "train.head()"
158 | ]
159 | },
160 | {
161 | "cell_type": "code",
162 | "execution_count": 6,
163 | "metadata": {},
164 | "outputs": [
165 | {
166 | "data": {
167 | "text/html": [
168 | "\n",
169 | "\n",
182 | "
\n",
183 | " \n",
184 | " \n",
185 | " | \n",
186 | " ID | \n",
187 | " shop_id | \n",
188 | " item_id | \n",
189 | "
\n",
190 | " \n",
191 | " \n",
192 | " \n",
193 | " | 0 | \n",
194 | " 0 | \n",
195 | " 5 | \n",
196 | " 5037 | \n",
197 | "
\n",
198 | " \n",
199 | " | 1 | \n",
200 | " 1 | \n",
201 | " 5 | \n",
202 | " 5320 | \n",
203 | "
\n",
204 | " \n",
205 | " | 2 | \n",
206 | " 2 | \n",
207 | " 5 | \n",
208 | " 5233 | \n",
209 | "
\n",
210 | " \n",
211 | " | 3 | \n",
212 | " 3 | \n",
213 | " 5 | \n",
214 | " 5232 | \n",
215 | "
\n",
216 | " \n",
217 | " | 4 | \n",
218 | " 4 | \n",
219 | " 5 | \n",
220 | " 5268 | \n",
221 | "
\n",
222 | " \n",
223 | "
\n",
224 | "
"
225 | ],
226 | "text/plain": [
227 | " ID shop_id item_id\n",
228 | "0 0 5 5037\n",
229 | "1 1 5 5320\n",
230 | "2 2 5 5233\n",
231 | "3 3 5 5232\n",
232 | "4 4 5 5268"
233 | ]
234 | },
235 | "execution_count": 6,
236 | "metadata": {},
237 | "output_type": "execute_result"
238 | }
239 | ],
240 | "source": [
241 | "test.head()"
242 | ]
243 | },
244 | {
245 | "cell_type": "code",
246 | "execution_count": 7,
247 | "metadata": {},
248 | "outputs": [],
249 | "source": [
250 | "train['year']=train.date.dt.year"
251 | ]
252 | },
253 | {
254 | "cell_type": "code",
255 | "execution_count": 9,
256 | "metadata": {},
257 | "outputs": [],
258 | "source": [
259 | "train['month']=train.date.dt.month\n",
260 | "train['week']=train.date.dt.week\n",
261 | "train['dow']=train.date.dt.dayofweek"
262 | ]
263 | },
264 | {
265 | "cell_type": "code",
266 | "execution_count": null,
267 | "metadata": {},
268 | "outputs": [],
269 | "source": [
270 | "train['quarter']=train.date.dt.quarter\n",
271 | "train['is_month_start']=train.date.dt.month\n"
272 | ]
273 | }
274 | ],
275 | "metadata": {
276 | "kernelspec": {
277 | "display_name": "Python 3",
278 | "language": "python",
279 | "name": "python3"
280 | },
281 | "language_info": {
282 | "codemirror_mode": {
283 | "name": "ipython",
284 | "version": 3
285 | },
286 | "file_extension": ".py",
287 | "mimetype": "text/x-python",
288 | "name": "python",
289 | "nbconvert_exporter": "python",
290 | "pygments_lexer": "ipython3",
291 | "version": "3.7.1"
292 | }
293 | },
294 | "nbformat": 4,
295 | "nbformat_minor": 2
296 | }
297 |
--------------------------------------------------------------------------------
/Give Life_ Predict Blood Donations/datasets/transfusion.data:
--------------------------------------------------------------------------------
1 | Recency (months),Frequency (times),Monetary (c.c. blood),Time (months),"whether he/she donated blood in March 2007"
2 | 2 ,50,12500,98 ,1
3 | 0 ,13,3250,28 ,1
4 | 1 ,16,4000,35 ,1
5 | 2 ,20,5000,45 ,1
6 | 1 ,24,6000,77 ,0
7 | 4 ,4,1000,4 ,0
8 | 2 ,7,1750,14 ,1
9 | 1 ,12,3000,35 ,0
10 | 2 ,9,2250,22 ,1
11 | 5 ,46,11500,98 ,1
12 | 4 ,23,5750,58 ,0
13 | 0 ,3,750,4 ,0
14 | 2 ,10,2500,28 ,1
15 | 1 ,13,3250,47 ,0
16 | 2 ,6,1500,15 ,1
17 | 2 ,5,1250,11 ,1
18 | 2 ,14,3500,48 ,1
19 | 2 ,15,3750,49 ,1
20 | 2 ,6,1500,15 ,1
21 | 2 ,3,750,4 ,1
22 | 2 ,3,750,4 ,1
23 | 4 ,11,2750,28 ,0
24 | 2 ,6,1500,16 ,1
25 | 2 ,6,1500,16 ,1
26 | 9 ,9,2250,16 ,0
27 | 4 ,14,3500,40 ,0
28 | 4 ,6,1500,14 ,0
29 | 4 ,12,3000,34 ,1
30 | 4 ,5,1250,11 ,1
31 | 4 ,8,2000,21 ,0
32 | 1 ,14,3500,58 ,0
33 | 4 ,10,2500,28 ,1
34 | 4 ,10,2500,28 ,1
35 | 4 ,9,2250,26 ,1
36 | 2 ,16,4000,64 ,0
37 | 2 ,8,2000,28 ,1
38 | 2 ,12,3000,47 ,1
39 | 4 ,6,1500,16 ,1
40 | 2 ,14,3500,57 ,1
41 | 4 ,7,1750,22 ,1
42 | 2 ,13,3250,53 ,1
43 | 2 ,5,1250,16 ,0
44 | 2 ,5,1250,16 ,1
45 | 2 ,5,1250,16 ,0
46 | 4 ,20,5000,69 ,1
47 | 4 ,9,2250,28 ,1
48 | 2 ,9,2250,36 ,0
49 | 2 ,2,500,2 ,0
50 | 2 ,2,500,2 ,0
51 | 2 ,2,500,2 ,0
52 | 2 ,11,2750,46 ,0
53 | 2 ,11,2750,46 ,1
54 | 2 ,6,1500,22 ,0
55 | 2 ,12,3000,52 ,0
56 | 4 ,5,1250,14 ,1
57 | 4 ,19,4750,69 ,1
58 | 4 ,8,2000,26 ,1
59 | 2 ,7,1750,28 ,1
60 | 2 ,16,4000,81 ,0
61 | 3 ,6,1500,21 ,0
62 | 2 ,7,1750,29 ,0
63 | 2 ,8,2000,35 ,1
64 | 2 ,10,2500,49 ,0
65 | 4 ,5,1250,16 ,1
66 | 2 ,3,750,9 ,1
67 | 3 ,16,4000,74 ,0
68 | 2 ,4,1000,14 ,1
69 | 0 ,2,500,4 ,0
70 | 4 ,7,1750,25 ,0
71 | 1 ,9,2250,51 ,0
72 | 2 ,4,1000,16 ,0
73 | 2 ,4,1000,16 ,0
74 | 4 ,17,4250,71 ,1
75 | 2 ,2,500,4 ,0
76 | 2 ,2,500,4 ,1
77 | 2 ,2,500,4 ,1
78 | 2 ,4,1000,16 ,1
79 | 2 ,2,500,4 ,0
80 | 2 ,2,500,4 ,0
81 | 2 ,2,500,4 ,0
82 | 4 ,6,1500,23 ,1
83 | 2 ,4,1000,16 ,0
84 | 2 ,4,1000,16 ,0
85 | 2 ,4,1000,16 ,0
86 | 2 ,6,1500,28 ,1
87 | 2 ,6,1500,28 ,0
88 | 4 ,2,500,4 ,0
89 | 4 ,2,500,4 ,0
90 | 4 ,2,500,4 ,0
91 | 2 ,7,1750,35 ,1
92 | 4 ,2,500,4 ,1
93 | 4 ,2,500,4 ,0
94 | 4 ,2,500,4 ,0
95 | 4 ,2,500,4 ,0
96 | 12 ,11,2750,23 ,0
97 | 4 ,7,1750,28 ,0
98 | 3 ,17,4250,86 ,0
99 | 4 ,9,2250,38 ,1
100 | 4 ,4,1000,14 ,1
101 | 5 ,7,1750,26 ,1
102 | 4 ,8,2000,34 ,1
103 | 2 ,13,3250,76 ,1
104 | 4 ,9,2250,40 ,0
105 | 2 ,5,1250,26 ,0
106 | 2 ,5,1250,26 ,0
107 | 6 ,17,4250,70 ,0
108 | 0 ,8,2000,59 ,0
109 | 3 ,5,1250,26 ,0
110 | 2 ,3,750,14 ,0
111 | 2 ,10,2500,64 ,0
112 | 4 ,5,1250,23 ,1
113 | 4 ,9,2250,46 ,0
114 | 4 ,5,1250,23 ,0
115 | 4 ,8,2000,40 ,1
116 | 2 ,12,3000,82 ,0
117 | 11 ,24,6000,64 ,0
118 | 2 ,7,1750,46 ,1
119 | 4 ,11,2750,61 ,0
120 | 1 ,7,1750,57 ,0
121 | 2 ,11,2750,79 ,1
122 | 2 ,3,750,16 ,1
123 | 4 ,5,1250,26 ,1
124 | 2 ,6,1500,41 ,1
125 | 2 ,5,1250,33 ,1
126 | 2 ,4,1000,26 ,0
127 | 2 ,5,1250,34 ,0
128 | 4 ,8,2000,46 ,1
129 | 2 ,4,1000,26 ,0
130 | 4 ,8,2000,48 ,1
131 | 2 ,2,500,10 ,1
132 | 4 ,5,1250,28 ,0
133 | 2 ,12,3000,95 ,0
134 | 2 ,2,500,10 ,0
135 | 4 ,6,1500,35 ,0
136 | 2 ,11,2750,88 ,0
137 | 2 ,3,750,19 ,0
138 | 2 ,5,1250,37 ,0
139 | 2 ,12,3000,98 ,0
140 | 9 ,5,1250,19 ,0
141 | 2 ,2,500,11 ,0
142 | 2 ,9,2250,74 ,0
143 | 5 ,14,3500,86 ,0
144 | 4 ,3,750,16 ,0
145 | 4 ,3,750,16 ,0
146 | 4 ,2,500,9 ,1
147 | 4 ,3,750,16 ,1
148 | 6 ,3,750,14 ,0
149 | 2 ,2,500,11 ,0
150 | 2 ,2,500,11 ,1
151 | 2 ,2,500,11 ,0
152 | 2 ,7,1750,58 ,1
153 | 4 ,6,1500,39 ,0
154 | 4 ,11,2750,78 ,0
155 | 2 ,1,250,2 ,1
156 | 2 ,1,250,2 ,0
157 | 2 ,1,250,2 ,0
158 | 2 ,1,250,2 ,0
159 | 2 ,1,250,2 ,0
160 | 2 ,1,250,2 ,0
161 | 2 ,1,250,2 ,0
162 | 2 ,1,250,2 ,0
163 | 2 ,1,250,2 ,0
164 | 2 ,1,250,2 ,0
165 | 2 ,1,250,2 ,1
166 | 2 ,1,250,2 ,1
167 | 2 ,1,250,2 ,1
168 | 2 ,1,250,2 ,0
169 | 2 ,1,250,2 ,0
170 | 2 ,1,250,2 ,0
171 | 2 ,1,250,2 ,0
172 | 2 ,1,250,2 ,0
173 | 2 ,1,250,2 ,0
174 | 2 ,1,250,2 ,0
175 | 2 ,1,250,2 ,0
176 | 2 ,1,250,2 ,0
177 | 11 ,10,2500,35 ,0
178 | 11 ,4,1000,16 ,1
179 | 4 ,5,1250,33 ,1
180 | 4 ,6,1500,41 ,1
181 | 2 ,3,750,22 ,0
182 | 4 ,4,1000,26 ,1
183 | 10 ,4,1000,16 ,0
184 | 2 ,4,1000,35 ,0
185 | 4 ,12,3000,88 ,0
186 | 13 ,8,2000,26 ,0
187 | 11 ,9,2250,33 ,0
188 | 4 ,5,1250,34 ,0
189 | 4 ,4,1000,26 ,0
190 | 8 ,15,3750,77 ,0
191 | 4 ,5,1250,35 ,1
192 | 4 ,7,1750,52 ,0
193 | 4 ,7,1750,52 ,0
194 | 2 ,4,1000,35 ,0
195 | 11 ,11,2750,42 ,0
196 | 2 ,2,500,14 ,0
197 | 2 ,5,1250,47 ,1
198 | 9 ,8,2000,38 ,1
199 | 4 ,6,1500,47 ,0
200 | 11 ,7,1750,29 ,0
201 | 9 ,9,2250,45 ,0
202 | 4 ,6,1500,52 ,0
203 | 4 ,7,1750,58 ,0
204 | 6 ,2,500,11 ,1
205 | 4 ,7,1750,58 ,0
206 | 11 ,9,2250,38 ,0
207 | 11 ,6,1500,26 ,0
208 | 2 ,2,500,16 ,0
209 | 2 ,7,1750,76 ,0
210 | 11 ,6,1500,27 ,0
211 | 11 ,3,750,14 ,0
212 | 4 ,1,250,4 ,0
213 | 4 ,1,250,4 ,0
214 | 4 ,1,250,4 ,0
215 | 4 ,1,250,4 ,0
216 | 4 ,1,250,4 ,0
217 | 4 ,1,250,4 ,1
218 | 4 ,1,250,4 ,0
219 | 4 ,1,250,4 ,0
220 | 4 ,1,250,4 ,0
221 | 4 ,1,250,4 ,0
222 | 4 ,1,250,4 ,0
223 | 4 ,1,250,4 ,1
224 | 4 ,1,250,4 ,1
225 | 4 ,1,250,4 ,0
226 | 4 ,1,250,4 ,1
227 | 4 ,1,250,4 ,1
228 | 4 ,1,250,4 ,0
229 | 4 ,3,750,24 ,0
230 | 4 ,1,250,4 ,0
231 | 4 ,1,250,4 ,0
232 | 4 ,1,250,4 ,0
233 | 4 ,1,250,4 ,1
234 | 4 ,1,250,4 ,0
235 | 10 ,8,2000,39 ,0
236 | 14 ,7,1750,26 ,0
237 | 8 ,10,2500,63 ,0
238 | 11 ,3,750,15 ,0
239 | 4 ,2,500,14 ,0
240 | 2 ,4,1000,43 ,0
241 | 8 ,9,2250,58 ,0
242 | 8 ,8,2000,52 ,1
243 | 11 ,22,5500,98 ,0
244 | 4 ,3,750,25 ,1
245 | 11 ,17,4250,79 ,1
246 | 9 ,2,500,11 ,0
247 | 4 ,5,1250,46 ,0
248 | 11 ,12,3000,58 ,0
249 | 7 ,12,3000,86 ,0
250 | 11 ,2,500,11 ,0
251 | 11 ,2,500,11 ,0
252 | 11 ,2,500,11 ,0
253 | 2 ,6,1500,75 ,0
254 | 11 ,8,2000,41 ,1
255 | 11 ,3,750,16 ,1
256 | 12 ,13,3250,59 ,0
257 | 2 ,3,750,35 ,0
258 | 16 ,8,2000,28 ,0
259 | 11 ,7,1750,37 ,0
260 | 4 ,3,750,28 ,0
261 | 12 ,12,3000,58 ,0
262 | 4 ,4,1000,41 ,0
263 | 11 ,14,3500,73 ,1
264 | 2 ,2,500,23 ,0
265 | 2 ,3,750,38 ,1
266 | 4 ,5,1250,58 ,0
267 | 4 ,4,1000,43 ,1
268 | 3 ,2,500,23 ,0
269 | 11 ,8,2000,46 ,0
270 | 4 ,7,1750,82 ,0
271 | 13 ,4,1000,21 ,0
272 | 16 ,11,2750,40 ,0
273 | 16 ,7,1750,28 ,0
274 | 7 ,2,500,16 ,0
275 | 4 ,5,1250,58 ,0
276 | 4 ,5,1250,58 ,0
277 | 4 ,4,1000,46 ,0
278 | 14 ,13,3250,57 ,0
279 | 4 ,3,750,34 ,0
280 | 14 ,18,4500,78 ,0
281 | 11 ,8,2000,48 ,0
282 | 14 ,16,4000,70 ,0
283 | 14 ,4,1000,22 ,1
284 | 14 ,5,1250,26 ,0
285 | 8 ,2,500,16 ,0
286 | 11 ,5,1250,33 ,0
287 | 11 ,2,500,14 ,0
288 | 4 ,2,500,23 ,0
289 | 9 ,2,500,16 ,1
290 | 14 ,5,1250,28 ,1
291 | 14 ,3,750,19 ,1
292 | 14 ,4,1000,23 ,1
293 | 16 ,12,3000,50 ,0
294 | 11 ,4,1000,28 ,0
295 | 11 ,5,1250,35 ,0
296 | 11 ,5,1250,35 ,0
297 | 2 ,4,1000,70 ,0
298 | 14 ,5,1250,28 ,0
299 | 14 ,2,500,14 ,0
300 | 14 ,2,500,14 ,0
301 | 14 ,2,500,14 ,0
302 | 14 ,2,500,14 ,0
303 | 14 ,2,500,14 ,0
304 | 14 ,2,500,14 ,0
305 | 2 ,3,750,52 ,0
306 | 14 ,6,1500,34 ,0
307 | 11 ,5,1250,37 ,1
308 | 4 ,5,1250,74 ,0
309 | 11 ,3,750,23 ,0
310 | 16 ,4,1000,23 ,0
311 | 16 ,3,750,19 ,0
312 | 11 ,5,1250,38 ,0
313 | 11 ,2,500,16 ,0
314 | 12 ,9,2250,60 ,0
315 | 9 ,1,250,9 ,0
316 | 9 ,1,250,9 ,0
317 | 4 ,2,500,29 ,0
318 | 11 ,2,500,17 ,0
319 | 14 ,4,1000,26 ,0
320 | 11 ,9,2250,72 ,1
321 | 11 ,5,1250,41 ,0
322 | 15 ,16,4000,82 ,0
323 | 9 ,5,1250,51 ,1
324 | 11 ,4,1000,34 ,0
325 | 14 ,8,2000,50 ,1
326 | 16 ,7,1750,38 ,0
327 | 14 ,2,500,16 ,0
328 | 2 ,2,500,41 ,0
329 | 14 ,16,4000,98 ,0
330 | 14 ,4,1000,28 ,1
331 | 16 ,7,1750,39 ,0
332 | 14 ,7,1750,47 ,0
333 | 16 ,6,1500,35 ,0
334 | 16 ,6,1500,35 ,1
335 | 11 ,7,1750,62 ,1
336 | 16 ,2,500,16 ,0
337 | 16 ,3,750,21 ,1
338 | 11 ,3,750,28 ,0
339 | 11 ,7,1750,64 ,0
340 | 11 ,1,250,11 ,1
341 | 9 ,3,750,34 ,0
342 | 14 ,4,1000,30 ,0
343 | 23 ,38,9500,98 ,0
344 | 11 ,6,1500,58 ,0
345 | 11 ,1,250,11 ,0
346 | 11 ,1,250,11 ,0
347 | 11 ,1,250,11 ,0
348 | 11 ,1,250,11 ,0
349 | 11 ,1,250,11 ,0
350 | 11 ,1,250,11 ,0
351 | 11 ,1,250,11 ,0
352 | 11 ,1,250,11 ,0
353 | 11 ,2,500,21 ,0
354 | 11 ,5,1250,50 ,0
355 | 11 ,2,500,21 ,0
356 | 16 ,4,1000,28 ,0
357 | 4 ,2,500,41 ,0
358 | 16 ,6,1500,40 ,0
359 | 14 ,3,750,26 ,0
360 | 9 ,2,500,26 ,0
361 | 21 ,16,4000,64 ,0
362 | 14 ,6,1500,51 ,0
363 | 11 ,2,500,24 ,0
364 | 4 ,3,750,71 ,0
365 | 21 ,13,3250,57 ,0
366 | 11 ,6,1500,71 ,0
367 | 14 ,2,500,21 ,1
368 | 23 ,15,3750,57 ,0
369 | 14 ,4,1000,38 ,0
370 | 11 ,2,500,26 ,0
371 | 16 ,5,1250,40 ,1
372 | 4 ,2,500,51 ,1
373 | 14 ,3,750,31 ,0
374 | 4 ,2,500,52 ,0
375 | 9 ,4,1000,65 ,0
376 | 14 ,4,1000,40 ,0
377 | 11 ,3,750,40 ,1
378 | 14 ,5,1250,50 ,0
379 | 14 ,1,250,14 ,0
380 | 14 ,1,250,14 ,0
381 | 14 ,1,250,14 ,0
382 | 14 ,1,250,14 ,0
383 | 14 ,1,250,14 ,0
384 | 14 ,1,250,14 ,0
385 | 14 ,1,250,14 ,0
386 | 14 ,1,250,14 ,0
387 | 14 ,7,1750,72 ,0
388 | 14 ,1,250,14 ,0
389 | 14 ,1,250,14 ,0
390 | 9 ,3,750,52 ,0
391 | 14 ,7,1750,73 ,0
392 | 11 ,4,1000,58 ,0
393 | 11 ,4,1000,59 ,0
394 | 4 ,2,500,59 ,0
395 | 11 ,4,1000,61 ,0
396 | 16 ,4,1000,40 ,0
397 | 16 ,10,2500,89 ,0
398 | 21 ,2,500,21 ,1
399 | 21 ,3,750,26 ,0
400 | 16 ,8,2000,76 ,0
401 | 21 ,3,750,26 ,1
402 | 18 ,2,500,23 ,0
403 | 23 ,5,1250,33 ,0
404 | 23 ,8,2000,46 ,0
405 | 16 ,3,750,34 ,0
406 | 14 ,5,1250,64 ,0
407 | 14 ,3,750,41 ,0
408 | 16 ,1,250,16 ,0
409 | 16 ,1,250,16 ,0
410 | 16 ,1,250,16 ,0
411 | 16 ,1,250,16 ,0
412 | 16 ,1,250,16 ,0
413 | 16 ,1,250,16 ,0
414 | 16 ,1,250,16 ,0
415 | 16 ,4,1000,45 ,0
416 | 16 ,1,250,16 ,0
417 | 16 ,1,250,16 ,0
418 | 16 ,1,250,16 ,0
419 | 16 ,1,250,16 ,0
420 | 16 ,1,250,16 ,0
421 | 16 ,2,500,26 ,0
422 | 21 ,2,500,23 ,0
423 | 16 ,2,500,27 ,0
424 | 21 ,2,500,23 ,0
425 | 21 ,2,500,23 ,0
426 | 14 ,4,1000,57 ,0
427 | 16 ,5,1250,60 ,0
428 | 23 ,2,500,23 ,0
429 | 14 ,5,1250,74 ,0
430 | 23 ,3,750,28 ,0
431 | 16 ,3,750,40 ,0
432 | 9 ,2,500,52 ,0
433 | 9 ,2,500,52 ,0
434 | 16 ,7,1750,87 ,1
435 | 14 ,4,1000,64 ,0
436 | 14 ,2,500,35 ,0
437 | 16 ,7,1750,93 ,0
438 | 21 ,2,500,25 ,0
439 | 14 ,3,750,52 ,0
440 | 23 ,14,3500,93 ,0
441 | 18 ,8,2000,95 ,0
442 | 16 ,3,750,46 ,0
443 | 11 ,3,750,76 ,0
444 | 11 ,2,500,52 ,0
445 | 11 ,3,750,76 ,0
446 | 23 ,12,3000,86 ,0
447 | 21 ,3,750,35 ,0
448 | 23 ,2,500,26 ,0
449 | 23 ,2,500,26 ,0
450 | 23 ,8,2000,64 ,0
451 | 16 ,3,750,50 ,0
452 | 23 ,3,750,33 ,0
453 | 21 ,3,750,38 ,0
454 | 23 ,2,500,28 ,0
455 | 21 ,1,250,21 ,0
456 | 21 ,1,250,21 ,0
457 | 21 ,1,250,21 ,0
458 | 21 ,1,250,21 ,0
459 | 21 ,1,250,21 ,0
460 | 21 ,1,250,21 ,0
461 | 21 ,1,250,21 ,0
462 | 21 ,1,250,21 ,0
463 | 21 ,1,250,21 ,0
464 | 21 ,1,250,21 ,1
465 | 21 ,1,250,21 ,0
466 | 21 ,1,250,21 ,0
467 | 21 ,5,1250,60 ,0
468 | 23 ,4,1000,45 ,0
469 | 21 ,4,1000,52 ,0
470 | 22 ,1,250,22 ,1
471 | 11 ,2,500,70 ,0
472 | 23 ,5,1250,58 ,0
473 | 23 ,3,750,40 ,0
474 | 23 ,3,750,41 ,0
475 | 14 ,3,750,83 ,0
476 | 21 ,2,500,35 ,0
477 | 26 ,5,1250,49 ,1
478 | 23 ,6,1500,70 ,0
479 | 23 ,1,250,23 ,0
480 | 23 ,1,250,23 ,0
481 | 23 ,1,250,23 ,0
482 | 23 ,1,250,23 ,0
483 | 23 ,1,250,23 ,0
484 | 23 ,1,250,23 ,0
485 | 23 ,1,250,23 ,0
486 | 23 ,1,250,23 ,0
487 | 23 ,4,1000,53 ,0
488 | 21 ,6,1500,86 ,0
489 | 23 ,3,750,48 ,0
490 | 21 ,2,500,41 ,0
491 | 21 ,3,750,64 ,0
492 | 16 ,2,500,70 ,0
493 | 21 ,3,750,70 ,0
494 | 23 ,4,1000,87 ,0
495 | 23 ,3,750,89 ,0
496 | 23 ,2,500,87 ,0
497 | 35 ,3,750,64 ,0
498 | 38 ,1,250,38 ,0
499 | 38 ,1,250,38 ,0
500 | 40 ,1,250,40 ,0
501 | 74 ,1,250,74 ,0
502 | 2 ,43,10750,86 ,1
503 | 6 ,22,5500,28 ,1
504 | 2 ,34,8500,77 ,1
505 | 2 ,44,11000,98 ,0
506 | 0 ,26,6500,76 ,1
507 | 2 ,41,10250,98 ,1
508 | 3 ,21,5250,42 ,1
509 | 2 ,11,2750,23 ,0
510 | 2 ,21,5250,52 ,1
511 | 2 ,13,3250,32 ,1
512 | 4 ,4,1000,4 ,1
513 | 2 ,11,2750,26 ,0
514 | 2 ,11,2750,28 ,0
515 | 3 ,14,3500,35 ,0
516 | 4 ,16,4000,38 ,1
517 | 4 ,6,1500,14 ,0
518 | 3 ,5,1250,12 ,1
519 | 4 ,33,8250,98 ,1
520 | 3 ,10,2500,33 ,1
521 | 4 ,10,2500,28 ,1
522 | 2 ,11,2750,40 ,1
523 | 2 ,11,2750,41 ,1
524 | 4 ,13,3250,39 ,1
525 | 1 ,10,2500,43 ,1
526 | 4 ,9,2250,28 ,0
527 | 2 ,4,1000,11 ,0
528 | 2 ,5,1250,16 ,1
529 | 2 ,15,3750,64 ,0
530 | 5 ,24,6000,79 ,0
531 | 2 ,6,1500,22 ,1
532 | 4 ,5,1250,16 ,1
533 | 2 ,4,1000,14 ,1
534 | 4 ,8,2000,28 ,0
535 | 2 ,4,1000,14 ,0
536 | 2 ,6,1500,26 ,0
537 | 4 ,5,1250,16 ,1
538 | 2 ,7,1750,32 ,1
539 | 2 ,6,1500,26 ,1
540 | 2 ,8,2000,38 ,1
541 | 2 ,2,500,4 ,1
542 | 2 ,6,1500,28 ,1
543 | 2 ,10,2500,52 ,0
544 | 4 ,16,4000,70 ,1
545 | 4 ,2,500,4 ,1
546 | 1 ,14,3500,95 ,0
547 | 4 ,2,500,4 ,1
548 | 7 ,14,3500,48 ,0
549 | 2 ,3,750,11 ,0
550 | 2 ,12,3000,70 ,1
551 | 4 ,7,1750,32 ,1
552 | 4 ,4,1000,16 ,0
553 | 2 ,6,1500,35 ,1
554 | 4 ,6,1500,28 ,1
555 | 2 ,3,750,14 ,0
556 | 2 ,4,1000,23 ,0
557 | 4 ,4,1000,18 ,0
558 | 5 ,6,1500,28 ,0
559 | 4 ,6,1500,30 ,0
560 | 14 ,5,1250,14 ,0
561 | 3 ,8,2000,50 ,0
562 | 4 ,11,2750,64 ,1
563 | 4 ,9,2250,52 ,0
564 | 4 ,16,4000,98 ,1
565 | 7 ,10,2500,47 ,0
566 | 4 ,14,3500,86 ,0
567 | 2 ,9,2250,75 ,0
568 | 4 ,6,1500,35 ,0
569 | 4 ,9,2250,55 ,0
570 | 4 ,6,1500,35 ,1
571 | 2 ,6,1500,45 ,0
572 | 2 ,6,1500,47 ,0
573 | 4 ,2,500,9 ,0
574 | 2 ,2,500,11 ,1
575 | 2 ,2,500,11 ,0
576 | 2 ,2,500,11 ,1
577 | 4 ,6,1500,38 ,1
578 | 3 ,4,1000,29 ,1
579 | 9 ,9,2250,38 ,0
580 | 11 ,5,1250,18 ,0
581 | 2 ,3,750,21 ,0
582 | 2 ,1,250,2 ,0
583 | 2 ,1,250,2 ,1
584 | 2 ,1,250,2 ,0
585 | 2 ,1,250,2 ,0
586 | 2 ,1,250,2 ,0
587 | 2 ,1,250,2 ,0
588 | 2 ,1,250,2 ,1
589 | 2 ,1,250,2 ,0
590 | 2 ,1,250,2 ,0
591 | 2 ,1,250,2 ,0
592 | 2 ,1,250,2 ,0
593 | 11 ,11,2750,38 ,0
594 | 2 ,3,750,22 ,0
595 | 9 ,11,2750,49 ,1
596 | 5 ,11,2750,75 ,0
597 | 3 ,5,1250,38 ,0
598 | 3 ,1,250,3 ,1
599 | 4 ,6,1500,43 ,0
600 | 2 ,3,750,24 ,0
601 | 12 ,11,2750,39 ,0
602 | 2 ,2,500,14 ,0
603 | 4 ,6,1500,46 ,0
604 | 9 ,3,750,14 ,0
605 | 14 ,8,2000,26 ,0
606 | 4 ,2,500,13 ,0
607 | 4 ,11,2750,95 ,0
608 | 2 ,7,1750,77 ,0
609 | 2 ,7,1750,77 ,0
610 | 4 ,1,250,4 ,0
611 | 4 ,1,250,4 ,0
612 | 4 ,1,250,4 ,0
613 | 4 ,1,250,4 ,0
614 | 4 ,1,250,4 ,1
615 | 4 ,1,250,4 ,0
616 | 4 ,1,250,4 ,0
617 | 4 ,1,250,4 ,0
618 | 4 ,1,250,4 ,0
619 | 4 ,1,250,4 ,0
620 | 4 ,1,250,4 ,1
621 | 4 ,1,250,4 ,0
622 | 4 ,7,1750,62 ,0
623 | 4 ,1,250,4 ,0
624 | 4 ,4,1000,34 ,1
625 | 11 ,6,1500,28 ,0
626 | 13 ,3,750,14 ,1
627 | 7 ,5,1250,35 ,0
628 | 9 ,9,2250,54 ,0
629 | 11 ,2,500,11 ,0
630 | 2 ,5,1250,63 ,0
631 | 7 ,11,2750,89 ,0
632 | 8 ,9,2250,64 ,0
633 | 2 ,2,500,22 ,0
634 | 6 ,3,750,26 ,0
635 | 12 ,15,3750,71 ,0
636 | 13 ,3,750,16 ,0
637 | 11 ,16,4000,89 ,0
638 | 4 ,5,1250,58 ,0
639 | 14 ,7,1750,35 ,0
640 | 11 ,4,1000,27 ,0
641 | 7 ,9,2250,89 ,1
642 | 11 ,8,2000,52 ,1
643 | 7 ,5,1250,52 ,0
644 | 11 ,6,1500,41 ,0
645 | 10 ,5,1250,38 ,0
646 | 14 ,2,500,14 ,1
647 | 14 ,2,500,14 ,0
648 | 14 ,2,500,14 ,0
649 | 2 ,2,500,33 ,0
650 | 11 ,3,750,23 ,0
651 | 14 ,8,2000,46 ,0
652 | 9 ,1,250,9 ,0
653 | 16 ,5,1250,27 ,0
654 | 14 ,4,1000,26 ,0
655 | 4 ,2,500,30 ,0
656 | 14 ,3,750,21 ,0
657 | 16 ,16,4000,77 ,0
658 | 4 ,2,500,31 ,0
659 | 14 ,8,2000,50 ,0
660 | 11 ,3,750,26 ,0
661 | 14 ,7,1750,45 ,0
662 | 15 ,5,1250,33 ,0
663 | 16 ,2,500,16 ,0
664 | 16 ,3,750,21 ,0
665 | 11 ,8,2000,72 ,0
666 | 11 ,1,250,11 ,0
667 | 11 ,1,250,11 ,0
668 | 11 ,1,250,11 ,0
669 | 11 ,1,250,11 ,1
670 | 11 ,1,250,11 ,0
671 | 2 ,3,750,75 ,1
672 | 2 ,3,750,77 ,0
673 | 16 ,4,1000,28 ,0
674 | 16 ,15,3750,87 ,0
675 | 16 ,14,3500,83 ,0
676 | 16 ,10,2500,62 ,0
677 | 16 ,3,750,23 ,0
678 | 14 ,3,750,26 ,0
679 | 23 ,19,4750,62 ,0
680 | 11 ,7,1750,75 ,0
681 | 14 ,3,750,28 ,0
682 | 20 ,14,3500,69 ,1
683 | 4 ,2,500,46 ,0
684 | 11 ,2,500,25 ,0
685 | 11 ,3,750,37 ,0
686 | 16 ,4,1000,33 ,0
687 | 21 ,7,1750,38 ,0
688 | 13 ,7,1750,76 ,0
689 | 16 ,6,1500,50 ,0
690 | 14 ,3,750,33 ,0
691 | 14 ,1,250,14 ,0
692 | 14 ,1,250,14 ,0
693 | 14 ,1,250,14 ,0
694 | 14 ,1,250,14 ,0
695 | 14 ,1,250,14 ,0
696 | 14 ,1,250,14 ,0
697 | 17 ,7,1750,58 ,1
698 | 14 ,3,750,35 ,0
699 | 14 ,3,750,35 ,0
700 | 16 ,7,1750,64 ,0
701 | 21 ,2,500,21 ,0
702 | 16 ,3,750,35 ,0
703 | 16 ,1,250,16 ,0
704 | 16 ,1,250,16 ,0
705 | 16 ,1,250,16 ,0
706 | 16 ,1,250,16 ,0
707 | 16 ,1,250,16 ,0
708 | 14 ,2,500,29 ,0
709 | 11 ,4,1000,74 ,0
710 | 11 ,2,500,38 ,1
711 | 21 ,6,1500,48 ,0
712 | 23 ,2,500,23 ,0
713 | 23 ,6,1500,45 ,0
714 | 14 ,2,500,35 ,1
715 | 16 ,6,1500,81 ,0
716 | 16 ,4,1000,58 ,0
717 | 16 ,5,1250,71 ,0
718 | 21 ,2,500,26 ,0
719 | 21 ,3,750,35 ,0
720 | 21 ,3,750,35 ,0
721 | 23 ,8,2000,69 ,0
722 | 21 ,3,750,38 ,0
723 | 23 ,3,750,35 ,0
724 | 21 ,3,750,40 ,0
725 | 23 ,2,500,28 ,0
726 | 21 ,1,250,21 ,0
727 | 21 ,1,250,21 ,0
728 | 25 ,6,1500,50 ,0
729 | 21 ,1,250,21 ,0
730 | 21 ,1,250,21 ,0
731 | 23 ,3,750,39 ,0
732 | 21 ,2,500,33 ,0
733 | 14 ,3,750,79 ,0
734 | 23 ,1,250,23 ,1
735 | 23 ,1,250,23 ,0
736 | 23 ,1,250,23 ,0
737 | 23 ,1,250,23 ,0
738 | 23 ,1,250,23 ,0
739 | 23 ,1,250,23 ,0
740 | 23 ,1,250,23 ,0
741 | 23 ,4,1000,52 ,0
742 | 23 ,1,250,23 ,0
743 | 23 ,7,1750,88 ,0
744 | 16 ,3,750,86 ,0
745 | 23 ,2,500,38 ,0
746 | 21 ,2,500,52 ,0
747 | 23 ,3,750,62 ,0
748 | 39 ,1,250,39 ,0
749 | 72 ,1,250,72 ,0
--------------------------------------------------------------------------------
/Give Life_ Predict Blood Donations/notebook.ipynb:
--------------------------------------------------------------------------------
1 | {"cells":[{"metadata":{"dc":{"key":"3"},"deletable":false,"editable":false,"run_control":{"frozen":true},"tags":["context"]},"cell_type":"markdown","source":"## 1. Inspecting transfusion.data file\n
\nBlood transfusion saves lives - from replacing lost blood during major surgery or a serious injury to treating various illnesses and blood disorders. Ensuring that there's enough blood in supply whenever needed is a serious challenge for the health professionals. According to WebMD, \"about 5 million Americans need a blood transfusion every year\".
\nOur dataset is from a mobile blood donation vehicle in Taiwan. The Blood Transfusion Service Center drives to different universities and collects blood as part of a blood drive. We want to predict whether or not a donor will give blood the next time the vehicle comes to campus.
\nThe data is stored in datasets/transfusion.data and it is structured according to RFMTC marketing model (a variation of RFM). We'll explore what that means later in this notebook. First, let's inspect the data.
"},{"metadata":{"dc":{"key":"3"},"tags":["sample_code"],"trusted":true},"cell_type":"code","source":"# Print out the first 5 lines from the transfusion.data file\n!head -n 5 datasets/transfusion.data","execution_count":18,"outputs":[{"output_type":"stream","text":"Recency (months),Frequency (times),Monetary (c.c. blood),Time (months),\"whether he/she donated blood in March 2007\"\r\r\n2 ,50,12500,98 ,1\r\r\n0 ,13,3250,28 ,1\r\r\n1 ,16,4000,35 ,1\r\r\n2 ,20,5000,45 ,1\r\r\n","name":"stdout"}]},{"metadata":{"dc":{"key":"10"},"deletable":false,"editable":false,"run_control":{"frozen":true},"tags":["context"]},"cell_type":"markdown","source":"## 2. Loading the blood donations data\nWe now know that we are working with a typical CSV file (i.e., the delimiter is ,, etc.). We proceed to loading the data into memory.
"},{"metadata":{"dc":{"key":"10"},"tags":["sample_code"],"trusted":true},"cell_type":"code","source":"# Import pandas\nimport pandas as pd\n\n# Read in dataset\ntransfusion = pd.read_csv('datasets/transfusion.data')\n\n# Print out the first rows of our dataset\ntransfusion.head()","execution_count":20,"outputs":[{"output_type":"execute_result","execution_count":20,"data":{"text/plain":" Recency (months) Frequency (times) Monetary (c.c. blood) Time (months) \\\n0 2 50 12500 98 \n1 0 13 3250 28 \n2 1 16 4000 35 \n3 2 20 5000 45 \n4 1 24 6000 77 \n\n whether he/she donated blood in March 2007 \n0 1 \n1 1 \n2 1 \n3 1 \n4 0 ","text/html":"\n\n
\n \n \n | \n Recency (months) | \n Frequency (times) | \n Monetary (c.c. blood) | \n Time (months) | \n whether he/she donated blood in March 2007 | \n
\n \n \n \n | 0 | \n 2 | \n 50 | \n 12500 | \n 98 | \n 1 | \n
\n \n | 1 | \n 0 | \n 13 | \n 3250 | \n 28 | \n 1 | \n
\n \n | 2 | \n 1 | \n 16 | \n 4000 | \n 35 | \n 1 | \n
\n \n | 3 | \n 2 | \n 20 | \n 5000 | \n 45 | \n 1 | \n
\n \n | 4 | \n 1 | \n 24 | \n 6000 | \n 77 | \n 0 | \n
\n \n
\n
"},"metadata":{}}]},{"metadata":{"dc":{"key":"17"},"deletable":false,"editable":false,"run_control":{"frozen":true},"tags":["context"]},"cell_type":"markdown","source":"## 3. Inspecting transfusion DataFrame\nLet's briefly return to our discussion of RFM model. RFM stands for Recency, Frequency and Monetary Value and it is commonly used in marketing for identifying your best customers. In our case, our customers are blood donors.
\nRFMTC is a variation of the RFM model. Below is a description of what each column means in our dataset:
\n\n- R (Recency - months since the last donation)
\n- F (Frequency - total number of donation)
\n- M (Monetary - total blood donated in c.c.)
\n- T (Time - months since the first donation)
\n- a binary variable representing whether he/she donated blood in March 2007 (1 stands for donating blood; 0 stands for not donating blood)
\n
\nIt looks like every column in our DataFrame has the numeric type, which is exactly what we want when building a machine learning model. Let's verify our hypothesis.
"},{"metadata":{"dc":{"key":"17"},"tags":["sample_code"],"trusted":true},"cell_type":"code","source":"# Print a concise summary of transfusion DataFrame\ntransfusion.info()","execution_count":22,"outputs":[{"output_type":"stream","text":"\nRangeIndex: 748 entries, 0 to 747\nData columns (total 5 columns):\nRecency (months) 748 non-null int64\nFrequency (times) 748 non-null int64\nMonetary (c.c. blood) 748 non-null int64\nTime (months) 748 non-null int64\nwhether he/she donated blood in March 2007 748 non-null int64\ndtypes: int64(5)\nmemory usage: 29.3 KB\n","name":"stdout"}]},{"metadata":{"dc":{"key":"24"},"deletable":false,"editable":false,"run_control":{"frozen":true},"tags":["context"]},"cell_type":"markdown","source":"## 4. Creating target column\nWe are aiming to predict the value in whether he/she donated blood in March 2007 column. Let's rename this it to target so that it's more convenient to work with.
"},{"metadata":{"dc":{"key":"24"},"tags":["sample_code"],"trusted":true},"cell_type":"code","source":"# Rename target column as 'target' for brevity \ntransfusion.rename(\n columns={'whether he/she donated blood in March 2007': 'target'},\n inplace=True\n)\n\n# Print out the first 2 rows\ntransfusion.head(2)","execution_count":24,"outputs":[{"output_type":"execute_result","execution_count":24,"data":{"text/plain":" Recency (months) Frequency (times) Monetary (c.c. blood) Time (months) \\\n0 2 50 12500 98 \n1 0 13 3250 28 \n\n target \n0 1 \n1 1 ","text/html":"\n\n
\n \n \n | \n Recency (months) | \n Frequency (times) | \n Monetary (c.c. blood) | \n Time (months) | \n target | \n
\n \n \n \n | 0 | \n 2 | \n 50 | \n 12500 | \n 98 | \n 1 | \n
\n \n | 1 | \n 0 | \n 13 | \n 3250 | \n 28 | \n 1 | \n
\n \n
\n
"},"metadata":{}}]},{"metadata":{"dc":{"key":"31"},"deletable":false,"editable":false,"run_control":{"frozen":true},"tags":["context"]},"cell_type":"markdown","source":"## 5. Checking target incidence\nWe want to predict whether or not the same donor will give blood the next time the vehicle comes to campus. The model for this is a binary classifier, meaning that there are only 2 possible outcomes:
\n\n0 - the donor will not give blood \n1 - the donor will give blood \n
\nTarget incidence is defined as the number of cases of each individual target value in a dataset. That is, how many 0s in the target column compared to how many 1s? Target incidence gives us an idea of how balanced (or imbalanced) is our dataset.
"},{"metadata":{"dc":{"key":"31"},"tags":["sample_code"],"trusted":true},"cell_type":"code","source":"# Print target incidence proportions, rounding output to 3 decimal places\ntransfusion.target.value_counts(normalize=True).round(3)","execution_count":26,"outputs":[{"output_type":"execute_result","execution_count":26,"data":{"text/plain":"0 0.762\n1 0.238\nName: target, dtype: float64"},"metadata":{}}]},{"metadata":{"dc":{"key":"38"},"deletable":false,"editable":false,"run_control":{"frozen":true},"tags":["context"]},"cell_type":"markdown","source":"## 6. Splitting transfusion into train and test datasets\nWe'll now use train_test_split() method to split transfusion DataFrame.
\nTarget incidence informed us that in our dataset 0s appear 76% of the time. We want to keep the same structure in train and test datasets, i.e., both datasets must have 0 target incidence of 76%. This is very easy to do using the train_test_split() method from the scikit learn library - all we need to do is specify the stratify parameter. In our case, we'll stratify on the target column.
"},{"metadata":{"dc":{"key":"38"},"tags":["sample_code"],"trusted":true},"cell_type":"code","source":"# Import train_test_split method\nfrom sklearn.model_selection import train_test_split\n\n# Split transfusion DataFrame into\n# X_train, X_test, y_train and y_test datasets,\n# stratifying on the `target` column\nX_train, X_test, y_train , y_test = train_test_split(\n transfusion.drop(columns='target'),\n transfusion.target,\n test_size=0.25,\n random_state=42,\n stratify=transfusion.target\n)\n\n# Print out the first 2 rows of X_train\nX_train.head(2)","execution_count":28,"outputs":[{"output_type":"execute_result","execution_count":28,"data":{"text/plain":" Recency (months) Frequency (times) Monetary (c.c. blood) Time (months)\n334 16 2 500 16\n99 5 7 1750 26","text/html":"\n\n
\n \n \n | \n Recency (months) | \n Frequency (times) | \n Monetary (c.c. blood) | \n Time (months) | \n
\n \n \n \n | 334 | \n 16 | \n 2 | \n 500 | \n 16 | \n
\n \n | 99 | \n 5 | \n 7 | \n 1750 | \n 26 | \n
\n \n
\n
"},"metadata":{}}]},{"metadata":{"dc":{"key":"45"},"deletable":false,"editable":false,"run_control":{"frozen":true},"tags":["context"]},"cell_type":"markdown","source":"## 7. Selecting model using TPOT\nTPOT is a Python Automated Machine Learning tool that optimizes machine learning pipelines using genetic programming.
\n
\nTPOT will automatically explore hundreds of possible pipelines to find the best one for our dataset. Note, the outcome of this search will be a scikit-learn pipeline, meaning it will include any pre-processing steps as well as the model.
\nWe are using TPOT to help us zero in on one model that we can then explore and optimize further.
"},{"metadata":{"dc":{"key":"45"},"tags":["sample_code"],"trusted":true},"cell_type":"code","source":"# Import TPOTClassifier and roc_auc_score\nfrom tpot import TPOTClassifier\nfrom sklearn.metrics import roc_auc_score\n\n# Instantiate TPOTClassifier\ntpot = TPOTClassifier(\n generations=5,\n population_size=20,\n verbosity=2,\n scoring='roc_auc',\n random_state=42,\n disable_update_check=True,\n config_dict='TPOT light'\n)\ntpot.fit(X_train, y_train)\n\n# AUC score for tpot model\ntpot_auc_score = roc_auc_score(y_test, tpot.predict_proba(X_test)[:, 1])\nprint(f'\\nAUC score: {tpot_auc_score:.4f}')\n\n# Print best pipeline steps\nprint('\\nBest pipeline steps:', end='\\n')\nfor idx, (name, transform) in enumerate(tpot.fitted_pipeline_.steps, start=1):\n # Print idx and transform\n print(f'{idx}.{transform}')","execution_count":30,"outputs":[{"output_type":"display_data","data":{"text/plain":"HBox(children=(IntProgress(value=0, description='Optimization Progress', max=120, style=ProgressStyle(descript…","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"f2062ead0d694ea3bd78bb4921f6c8d5"}},"metadata":{}},{"output_type":"stream","text":"Generation 1 - Current best internal CV score: 0.7433977184592779\nGeneration 2 - Current best internal CV score: 0.7433977184592779\nGeneration 3 - Current best internal CV score: 0.7433977184592779\nGeneration 4 - Current best internal CV score: 0.7433977184592779\nGeneration 5 - Current best internal CV score: 0.7433977184592779\n\nBest pipeline: LogisticRegression(input_matrix, C=0.5, dual=False, penalty=l2)\n\nAUC score: 0.7850\n\nBest pipeline steps:\n1.LogisticRegression(C=0.5, class_weight=None, dual=False, fit_intercept=True,\n intercept_scaling=1, max_iter=100, multi_class='warn',\n n_jobs=None, penalty='l2', random_state=None, solver='warn',\n tol=0.0001, verbose=0, warm_start=False)\n","name":"stdout"}]},{"metadata":{"dc":{"key":"52"},"deletable":false,"editable":false,"run_control":{"frozen":true},"tags":["context"]},"cell_type":"markdown","source":"## 8. Checking the variance\nTPOT picked LogisticRegression as the best model for our dataset with no pre-processing steps, giving us the AUC score of 0.7850. This is a great starting point. Let's see if we can make it better.
\nOne of the assumptions for linear regression models is that the data and the features we are giving it are related in a linear fashion, or can be measured with a linear distance metric. If a feature in our dataset has a high variance that's an order of magnitude or more greater than the other features, this could impact the model's ability to learn from other features in the dataset.
\nCorrecting for high variance is called normalization. It is one of the possible transformations you do before training a model. Let's check the variance to see if such transformation is needed.
"},{"metadata":{"dc":{"key":"52"},"tags":["sample_code"],"trusted":true},"cell_type":"code","source":"# X_train's variance, rounding the output to 3 decimal places\n\nX_train.var().round(3)","execution_count":32,"outputs":[{"output_type":"execute_result","execution_count":32,"data":{"text/plain":"Recency (months) 66.929\nFrequency (times) 33.830\nMonetary (c.c. blood) 2114363.700\nTime (months) 611.147\ndtype: float64"},"metadata":{}}]},{"metadata":{"dc":{"key":"59"},"deletable":false,"editable":false,"run_control":{"frozen":true},"tags":["context"]},"cell_type":"markdown","source":"## 9. Log normalization\nMonetary (c.c. blood)'s variance is very high in comparison to any other column in the dataset. This means that, unless accounted for, this feature may get more weight by the model (i.e., be seen as more important) than any other feature.
\nOne way to correct for high variance is to use log normalization.
"},{"metadata":{"dc":{"key":"59"},"tags":["sample_code"],"trusted":true},"cell_type":"code","source":"# Import numpy\nimport numpy as np\n\n# Copy X_train and X_test into X_train_normed and X_test_normed\nX_train_normed , X_test_normed = X_train.copy(), X_test.copy()\n\n# Specify which column to normalize\ncol_to_normalize = 'Monetary (c.c. blood)'\n\n# Log normalization\nfor df_ in [X_train_normed, X_test_normed]:\n # Add log normalized column\n df_['monetary_log'] = np.log(df_[col_to_normalize])\n # Drop the original column\n df_.drop(columns=col_to_normalize, inplace=True)\n\n# Check the variance for X_train_normed\nround(X_train_normed.var(),3)","execution_count":34,"outputs":[{"output_type":"execute_result","execution_count":34,"data":{"text/plain":"Recency (months) 66.929\nFrequency (times) 33.830\nTime (months) 611.147\nmonetary_log 0.837\ndtype: float64"},"metadata":{}}]},{"metadata":{"dc":{"key":"66"},"deletable":false,"editable":false,"run_control":{"frozen":true},"tags":["context"]},"cell_type":"markdown","source":"## 10. Training the linear regression model\nThe variance looks much better now. Notice that now Time (months) has the largest variance, but it's not the orders of magnitude higher than the rest of the variables, so we'll leave it as is.
\nWe are now ready to train the linear regression model.
"},{"metadata":{"dc":{"key":"66"},"tags":["sample_code"],"trusted":true},"cell_type":"code","source":"# Importing modules\nfrom sklearn import linear_model\n\n# Instantiate LogisticRegression\nlogreg = linear_model.LogisticRegression(\n solver='liblinear',\n random_state=42\n)\n\n# Train the model\nlogreg.fit(X_train_normed, y_train)\n\n# AUC score for tpot model\nlogreg_auc_score = roc_auc_score(y_test, logreg.predict_proba(X_test_normed)[:, 1])\nprint(f'\\nAUC score: {logreg_auc_score:.4f}')","execution_count":36,"outputs":[{"output_type":"stream","text":"\nAUC score: 0.7891\n","name":"stdout"}]},{"metadata":{"dc":{"key":"73"},"deletable":false,"editable":false,"run_control":{"frozen":true},"tags":["context"]},"cell_type":"markdown","source":"## 11. Conclusion\nThe demand for blood fluctuates throughout the year. As one prominent example, blood donations slow down during busy holiday seasons. An accurate forecast for the future supply of blood allows for an appropriate action to be taken ahead of time and therefore saving more lives.
\nIn this notebook, we explored automatic model selection using TPOT and the results are not too far off. Furthermore, both of our models are doing slightly better than simply choosing 0 all the time (the target incidence suggests that such a model would have 76% success rate).
\nAnother benefit of using logistic regression model is that it is interpretable. We can analyze how much of the variance in the response variable (target) can be explained by other variables in our dataset.
"},{"metadata":{"dc":{"key":"73"},"tags":["sample_code"],"trusted":true},"cell_type":"code","source":"# Importing itemgetter\nfrom operator import itemgetter\n\n# Sort models based on their AUC score from highest to lowest\nsorted(\n [('tpot', tpot_auc_score), ('logreg', logreg_auc_score)],\n key=itemgetter(1),\n reverse=True\n)","execution_count":38,"outputs":[{"output_type":"execute_result","execution_count":38,"data":{"text/plain":"[('logreg', 0.7890972663699937), ('tpot', 0.7849650349650349)]"},"metadata":{}}]}],"metadata":{"kernelspec":{"name":"python3","display_name":"Python 3","language":"python"},"language_info":{"name":"python","version":"3.6.7","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"}},"nbformat":4,"nbformat_minor":2}
--------------------------------------------------------------------------------
/Future Sales/Future Sales.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import numpy as np\n",
10 | "import pandas as pd\n",
11 | "import matplotlib.pyplot as plt\n",
12 | "import seaborn as sns\n",
13 | "\n",
14 | "from datetime import datetime, date\n",
15 | "from dateutil.relativedelta import relativedelta\n",
16 | "\n",
17 | "from sklearn.preprocessing import StandardScaler\n",
18 | "\n",
19 | "from math import ceil\n",
20 | "\n",
21 | "\n",
22 | "%matplotlib inline\n",
23 | "\n",
24 | "train = pd.read_csv('sales_train.csv')\n",
25 | "test = pd.read_csv('test.csv')\n",
26 | "submission = pd.read_csv('sample_submission.csv')\n",
27 | "items = pd.read_csv('items.csv')\n",
28 | "item_cats = pd.read_csv('item_categories.csv')\n",
29 | "shops = pd.read_csv('shops.csv')"
30 | ]
31 | },
32 | {
33 | "cell_type": "code",
34 | "execution_count": 2,
35 | "metadata": {},
36 | "outputs": [],
37 | "source": [
38 | "test_shops = test.shop_id.unique()\n",
39 | "train = train[train.shop_id.isin(test_shops)]\n",
40 | "test_items = test.item_id.unique()\n",
41 | "train = train[train.item_id.isin(test_items)]"
42 | ]
43 | },
44 | {
45 | "cell_type": "code",
46 | "execution_count": 3,
47 | "metadata": {},
48 | "outputs": [],
49 | "source": [
50 | "MAX_BLOCK_NUM = train.date_block_num.max()\n",
51 | "MAX_ITEM = len(test_items)\n",
52 | "MAX_CAT = len(item_cats)\n",
53 | "MAX_YEAR = 3\n",
54 | "MAX_MONTH = 4 # 7 8 9 10\n",
55 | "MAX_SHOP = len(test_shops)"
56 | ]
57 | },
58 | {
59 | "cell_type": "code",
60 | "execution_count": 4,
61 | "metadata": {},
62 | "outputs": [
63 | {
64 | "data": {
65 | "text/html": [
66 | "\n",
67 | "\n",
80 | "
\n",
81 | " \n",
82 | " \n",
83 | " | \n",
84 | " date | \n",
85 | " date_block_num | \n",
86 | " shop_id | \n",
87 | " item_id | \n",
88 | " item_price | \n",
89 | " item_cnt_day | \n",
90 | "
\n",
91 | " \n",
92 | " \n",
93 | " \n",
94 | " | 0 | \n",
95 | " 02.01.2013 | \n",
96 | " 0 | \n",
97 | " 59 | \n",
98 | " 22154 | \n",
99 | " 999.0 | \n",
100 | " 1.0 | \n",
101 | "
\n",
102 | " \n",
103 | " | 10 | \n",
104 | " 03.01.2013 | \n",
105 | " 0 | \n",
106 | " 25 | \n",
107 | " 2574 | \n",
108 | " 399.0 | \n",
109 | " 2.0 | \n",
110 | "
\n",
111 | " \n",
112 | " | 11 | \n",
113 | " 05.01.2013 | \n",
114 | " 0 | \n",
115 | " 25 | \n",
116 | " 2574 | \n",
117 | " 399.0 | \n",
118 | " 1.0 | \n",
119 | "
\n",
120 | " \n",
121 | " | 12 | \n",
122 | " 07.01.2013 | \n",
123 | " 0 | \n",
124 | " 25 | \n",
125 | " 2574 | \n",
126 | " 399.0 | \n",
127 | " 1.0 | \n",
128 | "
\n",
129 | " \n",
130 | " | 13 | \n",
131 | " 08.01.2013 | \n",
132 | " 0 | \n",
133 | " 25 | \n",
134 | " 2574 | \n",
135 | " 399.0 | \n",
136 | " 2.0 | \n",
137 | "
\n",
138 | " \n",
139 | "
\n",
140 | "
"
141 | ],
142 | "text/plain": [
143 | " date date_block_num shop_id item_id item_price item_cnt_day\n",
144 | "0 02.01.2013 0 59 22154 999.0 1.0\n",
145 | "10 03.01.2013 0 25 2574 399.0 2.0\n",
146 | "11 05.01.2013 0 25 2574 399.0 1.0\n",
147 | "12 07.01.2013 0 25 2574 399.0 1.0\n",
148 | "13 08.01.2013 0 25 2574 399.0 2.0"
149 | ]
150 | },
151 | "execution_count": 4,
152 | "metadata": {},
153 | "output_type": "execute_result"
154 | }
155 | ],
156 | "source": [
157 | "train.head()"
158 | ]
159 | },
160 | {
161 | "cell_type": "code",
162 | "execution_count": 5,
163 | "metadata": {},
164 | "outputs": [],
165 | "source": [
166 | "train = train.set_index('item_id').join(items.set_index('item_id')).drop('item_name', axis=1).reset_index()\n",
167 | "\n"
168 | ]
169 | },
170 | {
171 | "cell_type": "code",
172 | "execution_count": 6,
173 | "metadata": {},
174 | "outputs": [],
175 | "source": [
176 | "train['month'] = train.date.apply(lambda x: datetime.strptime(x, '%d.%m.%Y').strftime('%m'))\n",
177 | "train['year'] = train.date.apply(lambda x: datetime.strptime(x, '%d.%m.%Y').strftime('%Y'))\n",
178 | "\n"
179 | ]
180 | },
181 | {
182 | "cell_type": "code",
183 | "execution_count": 7,
184 | "metadata": {},
185 | "outputs": [
186 | {
187 | "data": {
188 | "text/html": [
189 | "\n",
190 | "\n",
203 | "
\n",
204 | " \n",
205 | " \n",
206 | " | \n",
207 | " item_id | \n",
208 | " date | \n",
209 | " date_block_num | \n",
210 | " shop_id | \n",
211 | " item_price | \n",
212 | " item_cnt_day | \n",
213 | " item_category_id | \n",
214 | " month | \n",
215 | " year | \n",
216 | "
\n",
217 | " \n",
218 | " \n",
219 | " \n",
220 | " | 0 | \n",
221 | " 30 | \n",
222 | " 28.02.2013 | \n",
223 | " 1 | \n",
224 | " 50 | \n",
225 | " 399.0 | \n",
226 | " 1.0 | \n",
227 | " 40 | \n",
228 | " 02 | \n",
229 | " 2013 | \n",
230 | "
\n",
231 | " \n",
232 | " | 1 | \n",
233 | " 30 | \n",
234 | " 26.02.2013 | \n",
235 | " 1 | \n",
236 | " 50 | \n",
237 | " 399.0 | \n",
238 | " 1.0 | \n",
239 | " 40 | \n",
240 | " 02 | \n",
241 | " 2013 | \n",
242 | "
\n",
243 | " \n",
244 | " | 2 | \n",
245 | " 30 | \n",
246 | " 12.02.2013 | \n",
247 | " 1 | \n",
248 | " 50 | \n",
249 | " 399.0 | \n",
250 | " 1.0 | \n",
251 | " 40 | \n",
252 | " 02 | \n",
253 | " 2013 | \n",
254 | "
\n",
255 | " \n",
256 | " | 3 | \n",
257 | " 30 | \n",
258 | " 14.02.2013 | \n",
259 | " 1 | \n",
260 | " 50 | \n",
261 | " 399.0 | \n",
262 | " 2.0 | \n",
263 | " 40 | \n",
264 | " 02 | \n",
265 | " 2013 | \n",
266 | "
\n",
267 | " \n",
268 | " | 4 | \n",
269 | " 30 | \n",
270 | " 15.02.2013 | \n",
271 | " 1 | \n",
272 | " 50 | \n",
273 | " 399.0 | \n",
274 | " 3.0 | \n",
275 | " 40 | \n",
276 | " 02 | \n",
277 | " 2013 | \n",
278 | "
\n",
279 | " \n",
280 | "
\n",
281 | "
"
282 | ],
283 | "text/plain": [
284 | " item_id date date_block_num shop_id item_price item_cnt_day \\\n",
285 | "0 30 28.02.2013 1 50 399.0 1.0 \n",
286 | "1 30 26.02.2013 1 50 399.0 1.0 \n",
287 | "2 30 12.02.2013 1 50 399.0 1.0 \n",
288 | "3 30 14.02.2013 1 50 399.0 2.0 \n",
289 | "4 30 15.02.2013 1 50 399.0 3.0 \n",
290 | "\n",
291 | " item_category_id month year \n",
292 | "0 40 02 2013 \n",
293 | "1 40 02 2013 \n",
294 | "2 40 02 2013 \n",
295 | "3 40 02 2013 \n",
296 | "4 40 02 2013 "
297 | ]
298 | },
299 | "execution_count": 7,
300 | "metadata": {},
301 | "output_type": "execute_result"
302 | }
303 | ],
304 | "source": [
305 | "train.head()"
306 | ]
307 | },
308 | {
309 | "cell_type": "code",
310 | "execution_count": 8,
311 | "metadata": {},
312 | "outputs": [
313 | {
314 | "name": "stderr",
315 | "output_type": "stream",
316 | "text": [
317 | "C:\\Users\\salee\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:4: FutureWarning: Method .as_matrix will be removed in a future version. Use .values instead.\n",
318 | " after removing the cwd from sys.path.\n",
319 | "C:\\Users\\salee\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:5: FutureWarning: Method .as_matrix will be removed in a future version. Use .values instead.\n",
320 | " \"\"\"\n",
321 | "C:\\Users\\salee\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:7: FutureWarning: Method .as_matrix will be removed in a future version. Use .values instead.\n",
322 | " import sys\n",
323 | "C:\\Users\\salee\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:8: FutureWarning: Method .as_matrix will be removed in a future version. Use .values instead.\n",
324 | " \n"
325 | ]
326 | }
327 | ],
328 | "source": [
329 | "scaler = StandardScaler()\n",
330 | "cnt_scaler = StandardScaler()\n",
331 | "\n",
332 | "scaler.fit(train.item_price.as_matrix().reshape(-1, 1))\n",
333 | "cnt_scaler.fit(train.item_cnt_day.as_matrix().reshape(-1, 1))\n",
334 | "\n",
335 | "train.item_price = scaler.transform(train.item_price.as_matrix().reshape(-1, 1))\n",
336 | "train.item_cnt_day = cnt_scaler.transform(train.item_cnt_day.as_matrix().reshape(-1, 1))"
337 | ]
338 | },
339 | {
340 | "cell_type": "code",
341 | "execution_count": 9,
342 | "metadata": {},
343 | "outputs": [
344 | {
345 | "data": {
346 | "text/html": [
347 | "\n",
348 | "\n",
361 | "
\n",
362 | " \n",
363 | " \n",
364 | " | \n",
365 | " item_id | \n",
366 | " date | \n",
367 | " date_block_num | \n",
368 | " shop_id | \n",
369 | " item_price | \n",
370 | " item_cnt_day | \n",
371 | " item_category_id | \n",
372 | " month | \n",
373 | " year | \n",
374 | "
\n",
375 | " \n",
376 | " \n",
377 | " \n",
378 | " | 0 | \n",
379 | " 30 | \n",
380 | " 28.02.2013 | \n",
381 | " 1 | \n",
382 | " 50 | \n",
383 | " -0.345667 | \n",
384 | " -0.096962 | \n",
385 | " 40 | \n",
386 | " 02 | \n",
387 | " 2013 | \n",
388 | "
\n",
389 | " \n",
390 | " | 1 | \n",
391 | " 30 | \n",
392 | " 26.02.2013 | \n",
393 | " 1 | \n",
394 | " 50 | \n",
395 | " -0.345667 | \n",
396 | " -0.096962 | \n",
397 | " 40 | \n",
398 | " 02 | \n",
399 | " 2013 | \n",
400 | "
\n",
401 | " \n",
402 | " | 2 | \n",
403 | " 30 | \n",
404 | " 12.02.2013 | \n",
405 | " 1 | \n",
406 | " 50 | \n",
407 | " -0.345667 | \n",
408 | " -0.096962 | \n",
409 | " 40 | \n",
410 | " 02 | \n",
411 | " 2013 | \n",
412 | "
\n",
413 | " \n",
414 | " | 3 | \n",
415 | " 30 | \n",
416 | " 14.02.2013 | \n",
417 | " 1 | \n",
418 | " 50 | \n",
419 | " -0.345667 | \n",
420 | " 0.204880 | \n",
421 | " 40 | \n",
422 | " 02 | \n",
423 | " 2013 | \n",
424 | "
\n",
425 | " \n",
426 | " | 4 | \n",
427 | " 30 | \n",
428 | " 15.02.2013 | \n",
429 | " 1 | \n",
430 | " 50 | \n",
431 | " -0.345667 | \n",
432 | " 0.506721 | \n",
433 | " 40 | \n",
434 | " 02 | \n",
435 | " 2013 | \n",
436 | "
\n",
437 | " \n",
438 | "
\n",
439 | "
"
440 | ],
441 | "text/plain": [
442 | " item_id date date_block_num shop_id item_price item_cnt_day \\\n",
443 | "0 30 28.02.2013 1 50 -0.345667 -0.096962 \n",
444 | "1 30 26.02.2013 1 50 -0.345667 -0.096962 \n",
445 | "2 30 12.02.2013 1 50 -0.345667 -0.096962 \n",
446 | "3 30 14.02.2013 1 50 -0.345667 0.204880 \n",
447 | "4 30 15.02.2013 1 50 -0.345667 0.506721 \n",
448 | "\n",
449 | " item_category_id month year \n",
450 | "0 40 02 2013 \n",
451 | "1 40 02 2013 \n",
452 | "2 40 02 2013 \n",
453 | "3 40 02 2013 \n",
454 | "4 40 02 2013 "
455 | ]
456 | },
457 | "execution_count": 9,
458 | "metadata": {},
459 | "output_type": "execute_result"
460 | }
461 | ],
462 | "source": [
463 | "train.head()"
464 | ]
465 | },
466 | {
467 | "cell_type": "code",
468 | "execution_count": 10,
469 | "metadata": {},
470 | "outputs": [
471 | {
472 | "data": {
473 | "text/html": [
474 | "\n",
475 | "\n",
488 | "
\n",
489 | " \n",
490 | " \n",
491 | " | \n",
492 | " | \n",
493 | " | \n",
494 | " | \n",
495 | " | \n",
496 | " item_price | \n",
497 | " item_cnt_day | \n",
498 | "
\n",
499 | " \n",
500 | " | shop_id | \n",
501 | " item_id | \n",
502 | " date_block_num | \n",
503 | " month | \n",
504 | " year | \n",
505 | " | \n",
506 | " | \n",
507 | "
\n",
508 | " \n",
509 | " \n",
510 | " \n",
511 | " | 2 | \n",
512 | " 30 | \n",
513 | " 2 | \n",
514 | " 03 | \n",
515 | " 2013 | \n",
516 | " -0.367556 | \n",
517 | " -0.096962 | \n",
518 | "
\n",
519 | " \n",
520 | " | 5 | \n",
521 | " 06 | \n",
522 | " 2013 | \n",
523 | " -0.345667 | \n",
524 | " -0.096962 | \n",
525 | "
\n",
526 | " \n",
527 | " | 15 | \n",
528 | " 04 | \n",
529 | " 2014 | \n",
530 | " -0.471530 | \n",
531 | " -0.096962 | \n",
532 | "
\n",
533 | " \n",
534 | " | 16 | \n",
535 | " 05 | \n",
536 | " 2014 | \n",
537 | " -0.471530 | \n",
538 | " -0.096962 | \n",
539 | "
\n",
540 | " \n",
541 | " | 31 | \n",
542 | " 1 | \n",
543 | " 02 | \n",
544 | " 2013 | \n",
545 | " -0.725991 | \n",
546 | " -0.387848 | \n",
547 | "
\n",
548 | " \n",
549 | "
\n",
550 | "
"
551 | ],
552 | "text/plain": [
553 | " item_price item_cnt_day\n",
554 | "shop_id item_id date_block_num month year \n",
555 | "2 30 2 03 2013 -0.367556 -0.096962\n",
556 | " 5 06 2013 -0.345667 -0.096962\n",
557 | " 15 04 2014 -0.471530 -0.096962\n",
558 | " 16 05 2014 -0.471530 -0.096962\n",
559 | " 31 1 02 2013 -0.725991 -0.387848"
560 | ]
561 | },
562 | "execution_count": 10,
563 | "metadata": {},
564 | "output_type": "execute_result"
565 | }
566 | ],
567 | "source": [
568 | "train = train.drop('date', axis=1)\n",
569 | "train = train.drop('item_category_id', axis=1)\n",
570 | "train = train.groupby(['shop_id', 'item_id', 'date_block_num', 'month', 'year']).sum()\n",
571 | "train = train.sort_index()\n",
572 | "train.head()"
573 | ]
574 | },
575 | {
576 | "cell_type": "code",
577 | "execution_count": 12,
578 | "metadata": {},
579 | "outputs": [],
580 | "source": [
581 | "price = train.reset_index().set_index(['item_id', 'shop_id', 'date_block_num'])\n",
582 | "price = price.sort_index()"
583 | ]
584 | },
585 | {
586 | "cell_type": "code",
587 | "execution_count": 13,
588 | "metadata": {},
589 | "outputs": [],
590 | "source": [
591 | "def convert(date_block):\n",
592 | " date = datetime(2013, 1, 1)\n",
593 | " date += relativedelta(months = date_block)\n",
594 | " return (date.month, date.year)\n",
595 | "\n",
596 | "def closest_date_block(current_day, item_id, shop_id):\n",
597 | " \"\"\"Find the block_date which is closest to the current_day, given item_id and shop_id. Returns index integer\"\"\"\n",
598 | " if (item_id, shop_id) in price.index:\n",
599 | " search_lst = np.array(price.loc[(item_id, shop_id)].index) \n",
600 | " return search_lst[np.abs(current_day - search_lst).argmin()]\n",
601 | " return -1\n",
602 | " \n",
603 | "def closest_price(current_day, item_id, shop_id):\n",
604 | " closest_date = closest_date_block(current_day, item_id, shop_id)\n",
605 | " if closest_date != -1:\n",
606 | " return price.loc[( item_id, shop_id, closest_date )]['item_price']\n",
607 | " return np.nan\n",
608 | "\n",
609 | "def closest_price_lambda(x):\n",
610 | " return closest_price(34, x.item_id, x.shop_id)\n",
611 | "assert closest_date_block(18, 30, 5) == 18"
612 | ]
613 | },
614 | {
615 | "cell_type": "code",
616 | "execution_count": 14,
617 | "metadata": {},
618 | "outputs": [],
619 | "source": [
620 | "maxlen = 4 # 4 months\n",
621 | "step = 1\n",
622 | "# 0: train, 1: val, 2:test\n",
623 | "sentences = [[],[],[]]\n",
624 | "next_chars = [[], []]\n",
625 | "BLOCKS = [6, 18, 30]\n",
626 | "\n",
627 | "for s in test_shops:\n",
628 | " shop_items = list(train.loc[s].index.get_level_values(0).unique())\n",
629 | " for it in shop_items: \n",
630 | " for i_index, i in enumerate(BLOCKS):\n",
631 | " sentence = []\n",
632 | " closest_pc = closest_price(i, it, s) \n",
633 | " for j in range(maxlen+1):\n",
634 | " if j < maxlen:\n",
635 | " if (s, it, i+j) in train.index:\n",
636 | " r = train.loc[(s, it, i + j)].to_dict(orient='list') \n",
637 | " closest_pc = r['item_price'][0]\n",
638 | " item_cnt_day = r['item_cnt_day'][0]\n",
639 | " row = {'shop_id': s, 'date_block_num': i+j, 'item_cnt_day': item_cnt_day, \n",
640 | " 'month': month, 'item_id': it, 'item_price': closest_pc, 'year': year}\n",
641 | " else:\n",
642 | " month, year = convert(i+j) \n",
643 | " row = {'shop_id': s, 'date_block_num': i+j, 'item_cnt_day': 0, \n",
644 | " 'month': month, 'item_id': it, 'item_price': closest_pc, 'year': year}\n",
645 | " sentence.append(row)\n",
646 | " elif i_index < 2: # not in test set\n",
647 | " next_chars[i_index].append(row)\n",
648 | " sentences[i_index].append(sentence)"
649 | ]
650 | },
651 | {
652 | "cell_type": "code",
653 | "execution_count": null,
654 | "metadata": {},
655 | "outputs": [],
656 | "source": [
657 | "train.head()"
658 | ]
659 | },
660 | {
661 | "cell_type": "code",
662 | "execution_count": null,
663 | "metadata": {},
664 | "outputs": [],
665 | "source": []
666 | },
667 | {
668 | "cell_type": "code",
669 | "execution_count": null,
670 | "metadata": {},
671 | "outputs": [],
672 | "source": [
673 | " "
674 | ]
675 | }
676 | ],
677 | "metadata": {
678 | "kernelspec": {
679 | "display_name": "Python 3",
680 | "language": "python",
681 | "name": "python3"
682 | },
683 | "language_info": {
684 | "codemirror_mode": {
685 | "name": "ipython",
686 | "version": 3
687 | },
688 | "file_extension": ".py",
689 | "mimetype": "text/x-python",
690 | "name": "python",
691 | "nbconvert_exporter": "python",
692 | "pygments_lexer": "ipython3",
693 | "version": "3.7.1"
694 | }
695 | },
696 | "nbformat": 4,
697 | "nbformat_minor": 2
698 | }
699 |
--------------------------------------------------------------------------------
/Predicting Car Prices/imports-85.data:
--------------------------------------------------------------------------------
1 | 3,?,alfa-romero,gas,std,two,convertible,rwd,front,88.60,168.80,64.10,48.80,2548,dohc,four,130,mpfi,3.47,2.68,9.00,111,5000,21,27,13495
2 | 3,?,alfa-romero,gas,std,two,convertible,rwd,front,88.60,168.80,64.10,48.80,2548,dohc,four,130,mpfi,3.47,2.68,9.00,111,5000,21,27,16500
3 | 1,?,alfa-romero,gas,std,two,hatchback,rwd,front,94.50,171.20,65.50,52.40,2823,ohcv,six,152,mpfi,2.68,3.47,9.00,154,5000,19,26,16500
4 | 2,164,audi,gas,std,four,sedan,fwd,front,99.80,176.60,66.20,54.30,2337,ohc,four,109,mpfi,3.19,3.40,10.00,102,5500,24,30,13950
5 | 2,164,audi,gas,std,four,sedan,4wd,front,99.40,176.60,66.40,54.30,2824,ohc,five,136,mpfi,3.19,3.40,8.00,115,5500,18,22,17450
6 | 2,?,audi,gas,std,two,sedan,fwd,front,99.80,177.30,66.30,53.10,2507,ohc,five,136,mpfi,3.19,3.40,8.50,110,5500,19,25,15250
7 | 1,158,audi,gas,std,four,sedan,fwd,front,105.80,192.70,71.40,55.70,2844,ohc,five,136,mpfi,3.19,3.40,8.50,110,5500,19,25,17710
8 | 1,?,audi,gas,std,four,wagon,fwd,front,105.80,192.70,71.40,55.70,2954,ohc,five,136,mpfi,3.19,3.40,8.50,110,5500,19,25,18920
9 | 1,158,audi,gas,turbo,four,sedan,fwd,front,105.80,192.70,71.40,55.90,3086,ohc,five,131,mpfi,3.13,3.40,8.30,140,5500,17,20,23875
10 | 0,?,audi,gas,turbo,two,hatchback,4wd,front,99.50,178.20,67.90,52.00,3053,ohc,five,131,mpfi,3.13,3.40,7.00,160,5500,16,22,?
11 | 2,192,bmw,gas,std,two,sedan,rwd,front,101.20,176.80,64.80,54.30,2395,ohc,four,108,mpfi,3.50,2.80,8.80,101,5800,23,29,16430
12 | 0,192,bmw,gas,std,four,sedan,rwd,front,101.20,176.80,64.80,54.30,2395,ohc,four,108,mpfi,3.50,2.80,8.80,101,5800,23,29,16925
13 | 0,188,bmw,gas,std,two,sedan,rwd,front,101.20,176.80,64.80,54.30,2710,ohc,six,164,mpfi,3.31,3.19,9.00,121,4250,21,28,20970
14 | 0,188,bmw,gas,std,four,sedan,rwd,front,101.20,176.80,64.80,54.30,2765,ohc,six,164,mpfi,3.31,3.19,9.00,121,4250,21,28,21105
15 | 1,?,bmw,gas,std,four,sedan,rwd,front,103.50,189.00,66.90,55.70,3055,ohc,six,164,mpfi,3.31,3.19,9.00,121,4250,20,25,24565
16 | 0,?,bmw,gas,std,four,sedan,rwd,front,103.50,189.00,66.90,55.70,3230,ohc,six,209,mpfi,3.62,3.39,8.00,182,5400,16,22,30760
17 | 0,?,bmw,gas,std,two,sedan,rwd,front,103.50,193.80,67.90,53.70,3380,ohc,six,209,mpfi,3.62,3.39,8.00,182,5400,16,22,41315
18 | 0,?,bmw,gas,std,four,sedan,rwd,front,110.00,197.00,70.90,56.30,3505,ohc,six,209,mpfi,3.62,3.39,8.00,182,5400,15,20,36880
19 | 2,121,chevrolet,gas,std,two,hatchback,fwd,front,88.40,141.10,60.30,53.20,1488,l,three,61,2bbl,2.91,3.03,9.50,48,5100,47,53,5151
20 | 1,98,chevrolet,gas,std,two,hatchback,fwd,front,94.50,155.90,63.60,52.00,1874,ohc,four,90,2bbl,3.03,3.11,9.60,70,5400,38,43,6295
21 | 0,81,chevrolet,gas,std,four,sedan,fwd,front,94.50,158.80,63.60,52.00,1909,ohc,four,90,2bbl,3.03,3.11,9.60,70,5400,38,43,6575
22 | 1,118,dodge,gas,std,two,hatchback,fwd,front,93.70,157.30,63.80,50.80,1876,ohc,four,90,2bbl,2.97,3.23,9.41,68,5500,37,41,5572
23 | 1,118,dodge,gas,std,two,hatchback,fwd,front,93.70,157.30,63.80,50.80,1876,ohc,four,90,2bbl,2.97,3.23,9.40,68,5500,31,38,6377
24 | 1,118,dodge,gas,turbo,two,hatchback,fwd,front,93.70,157.30,63.80,50.80,2128,ohc,four,98,mpfi,3.03,3.39,7.60,102,5500,24,30,7957
25 | 1,148,dodge,gas,std,four,hatchback,fwd,front,93.70,157.30,63.80,50.60,1967,ohc,four,90,2bbl,2.97,3.23,9.40,68,5500,31,38,6229
26 | 1,148,dodge,gas,std,four,sedan,fwd,front,93.70,157.30,63.80,50.60,1989,ohc,four,90,2bbl,2.97,3.23,9.40,68,5500,31,38,6692
27 | 1,148,dodge,gas,std,four,sedan,fwd,front,93.70,157.30,63.80,50.60,1989,ohc,four,90,2bbl,2.97,3.23,9.40,68,5500,31,38,7609
28 | 1,148,dodge,gas,turbo,?,sedan,fwd,front,93.70,157.30,63.80,50.60,2191,ohc,four,98,mpfi,3.03,3.39,7.60,102,5500,24,30,8558
29 | -1,110,dodge,gas,std,four,wagon,fwd,front,103.30,174.60,64.60,59.80,2535,ohc,four,122,2bbl,3.34,3.46,8.50,88,5000,24,30,8921
30 | 3,145,dodge,gas,turbo,two,hatchback,fwd,front,95.90,173.20,66.30,50.20,2811,ohc,four,156,mfi,3.60,3.90,7.00,145,5000,19,24,12964
31 | 2,137,honda,gas,std,two,hatchback,fwd,front,86.60,144.60,63.90,50.80,1713,ohc,four,92,1bbl,2.91,3.41,9.60,58,4800,49,54,6479
32 | 2,137,honda,gas,std,two,hatchback,fwd,front,86.60,144.60,63.90,50.80,1819,ohc,four,92,1bbl,2.91,3.41,9.20,76,6000,31,38,6855
33 | 1,101,honda,gas,std,two,hatchback,fwd,front,93.70,150.00,64.00,52.60,1837,ohc,four,79,1bbl,2.91,3.07,10.10,60,5500,38,42,5399
34 | 1,101,honda,gas,std,two,hatchback,fwd,front,93.70,150.00,64.00,52.60,1940,ohc,four,92,1bbl,2.91,3.41,9.20,76,6000,30,34,6529
35 | 1,101,honda,gas,std,two,hatchback,fwd,front,93.70,150.00,64.00,52.60,1956,ohc,four,92,1bbl,2.91,3.41,9.20,76,6000,30,34,7129
36 | 0,110,honda,gas,std,four,sedan,fwd,front,96.50,163.40,64.00,54.50,2010,ohc,four,92,1bbl,2.91,3.41,9.20,76,6000,30,34,7295
37 | 0,78,honda,gas,std,four,wagon,fwd,front,96.50,157.10,63.90,58.30,2024,ohc,four,92,1bbl,2.92,3.41,9.20,76,6000,30,34,7295
38 | 0,106,honda,gas,std,two,hatchback,fwd,front,96.50,167.50,65.20,53.30,2236,ohc,four,110,1bbl,3.15,3.58,9.00,86,5800,27,33,7895
39 | 0,106,honda,gas,std,two,hatchback,fwd,front,96.50,167.50,65.20,53.30,2289,ohc,four,110,1bbl,3.15,3.58,9.00,86,5800,27,33,9095
40 | 0,85,honda,gas,std,four,sedan,fwd,front,96.50,175.40,65.20,54.10,2304,ohc,four,110,1bbl,3.15,3.58,9.00,86,5800,27,33,8845
41 | 0,85,honda,gas,std,four,sedan,fwd,front,96.50,175.40,62.50,54.10,2372,ohc,four,110,1bbl,3.15,3.58,9.00,86,5800,27,33,10295
42 | 0,85,honda,gas,std,four,sedan,fwd,front,96.50,175.40,65.20,54.10,2465,ohc,four,110,mpfi,3.15,3.58,9.00,101,5800,24,28,12945
43 | 1,107,honda,gas,std,two,sedan,fwd,front,96.50,169.10,66.00,51.00,2293,ohc,four,110,2bbl,3.15,3.58,9.10,100,5500,25,31,10345
44 | 0,?,isuzu,gas,std,four,sedan,rwd,front,94.30,170.70,61.80,53.50,2337,ohc,four,111,2bbl,3.31,3.23,8.50,78,4800,24,29,6785
45 | 1,?,isuzu,gas,std,two,sedan,fwd,front,94.50,155.90,63.60,52.00,1874,ohc,four,90,2bbl,3.03,3.11,9.60,70,5400,38,43,?
46 | 0,?,isuzu,gas,std,four,sedan,fwd,front,94.50,155.90,63.60,52.00,1909,ohc,four,90,2bbl,3.03,3.11,9.60,70,5400,38,43,?
47 | 2,?,isuzu,gas,std,two,hatchback,rwd,front,96.00,172.60,65.20,51.40,2734,ohc,four,119,spfi,3.43,3.23,9.20,90,5000,24,29,11048
48 | 0,145,jaguar,gas,std,four,sedan,rwd,front,113.00,199.60,69.60,52.80,4066,dohc,six,258,mpfi,3.63,4.17,8.10,176,4750,15,19,32250
49 | 0,?,jaguar,gas,std,four,sedan,rwd,front,113.00,199.60,69.60,52.80,4066,dohc,six,258,mpfi,3.63,4.17,8.10,176,4750,15,19,35550
50 | 0,?,jaguar,gas,std,two,sedan,rwd,front,102.00,191.70,70.60,47.80,3950,ohcv,twelve,326,mpfi,3.54,2.76,11.50,262,5000,13,17,36000
51 | 1,104,mazda,gas,std,two,hatchback,fwd,front,93.10,159.10,64.20,54.10,1890,ohc,four,91,2bbl,3.03,3.15,9.00,68,5000,30,31,5195
52 | 1,104,mazda,gas,std,two,hatchback,fwd,front,93.10,159.10,64.20,54.10,1900,ohc,four,91,2bbl,3.03,3.15,9.00,68,5000,31,38,6095
53 | 1,104,mazda,gas,std,two,hatchback,fwd,front,93.10,159.10,64.20,54.10,1905,ohc,four,91,2bbl,3.03,3.15,9.00,68,5000,31,38,6795
54 | 1,113,mazda,gas,std,four,sedan,fwd,front,93.10,166.80,64.20,54.10,1945,ohc,four,91,2bbl,3.03,3.15,9.00,68,5000,31,38,6695
55 | 1,113,mazda,gas,std,four,sedan,fwd,front,93.10,166.80,64.20,54.10,1950,ohc,four,91,2bbl,3.08,3.15,9.00,68,5000,31,38,7395
56 | 3,150,mazda,gas,std,two,hatchback,rwd,front,95.30,169.00,65.70,49.60,2380,rotor,two,70,4bbl,?,?,9.40,101,6000,17,23,10945
57 | 3,150,mazda,gas,std,two,hatchback,rwd,front,95.30,169.00,65.70,49.60,2380,rotor,two,70,4bbl,?,?,9.40,101,6000,17,23,11845
58 | 3,150,mazda,gas,std,two,hatchback,rwd,front,95.30,169.00,65.70,49.60,2385,rotor,two,70,4bbl,?,?,9.40,101,6000,17,23,13645
59 | 3,150,mazda,gas,std,two,hatchback,rwd,front,95.30,169.00,65.70,49.60,2500,rotor,two,80,mpfi,?,?,9.40,135,6000,16,23,15645
60 | 1,129,mazda,gas,std,two,hatchback,fwd,front,98.80,177.80,66.50,53.70,2385,ohc,four,122,2bbl,3.39,3.39,8.60,84,4800,26,32,8845
61 | 0,115,mazda,gas,std,four,sedan,fwd,front,98.80,177.80,66.50,55.50,2410,ohc,four,122,2bbl,3.39,3.39,8.60,84,4800,26,32,8495
62 | 1,129,mazda,gas,std,two,hatchback,fwd,front,98.80,177.80,66.50,53.70,2385,ohc,four,122,2bbl,3.39,3.39,8.60,84,4800,26,32,10595
63 | 0,115,mazda,gas,std,four,sedan,fwd,front,98.80,177.80,66.50,55.50,2410,ohc,four,122,2bbl,3.39,3.39,8.60,84,4800,26,32,10245
64 | 0,?,mazda,diesel,std,?,sedan,fwd,front,98.80,177.80,66.50,55.50,2443,ohc,four,122,idi,3.39,3.39,22.70,64,4650,36,42,10795
65 | 0,115,mazda,gas,std,four,hatchback,fwd,front,98.80,177.80,66.50,55.50,2425,ohc,four,122,2bbl,3.39,3.39,8.60,84,4800,26,32,11245
66 | 0,118,mazda,gas,std,four,sedan,rwd,front,104.90,175.00,66.10,54.40,2670,ohc,four,140,mpfi,3.76,3.16,8.00,120,5000,19,27,18280
67 | 0,?,mazda,diesel,std,four,sedan,rwd,front,104.90,175.00,66.10,54.40,2700,ohc,four,134,idi,3.43,3.64,22.00,72,4200,31,39,18344
68 | -1,93,mercedes-benz,diesel,turbo,four,sedan,rwd,front,110.00,190.90,70.30,56.50,3515,ohc,five,183,idi,3.58,3.64,21.50,123,4350,22,25,25552
69 | -1,93,mercedes-benz,diesel,turbo,four,wagon,rwd,front,110.00,190.90,70.30,58.70,3750,ohc,five,183,idi,3.58,3.64,21.50,123,4350,22,25,28248
70 | 0,93,mercedes-benz,diesel,turbo,two,hardtop,rwd,front,106.70,187.50,70.30,54.90,3495,ohc,five,183,idi,3.58,3.64,21.50,123,4350,22,25,28176
71 | -1,93,mercedes-benz,diesel,turbo,four,sedan,rwd,front,115.60,202.60,71.70,56.30,3770,ohc,five,183,idi,3.58,3.64,21.50,123,4350,22,25,31600
72 | -1,?,mercedes-benz,gas,std,four,sedan,rwd,front,115.60,202.60,71.70,56.50,3740,ohcv,eight,234,mpfi,3.46,3.10,8.30,155,4750,16,18,34184
73 | 3,142,mercedes-benz,gas,std,two,convertible,rwd,front,96.60,180.30,70.50,50.80,3685,ohcv,eight,234,mpfi,3.46,3.10,8.30,155,4750,16,18,35056
74 | 0,?,mercedes-benz,gas,std,four,sedan,rwd,front,120.90,208.10,71.70,56.70,3900,ohcv,eight,308,mpfi,3.80,3.35,8.00,184,4500,14,16,40960
75 | 1,?,mercedes-benz,gas,std,two,hardtop,rwd,front,112.00,199.20,72.00,55.40,3715,ohcv,eight,304,mpfi,3.80,3.35,8.00,184,4500,14,16,45400
76 | 1,?,mercury,gas,turbo,two,hatchback,rwd,front,102.70,178.40,68.00,54.80,2910,ohc,four,140,mpfi,3.78,3.12,8.00,175,5000,19,24,16503
77 | 2,161,mitsubishi,gas,std,two,hatchback,fwd,front,93.70,157.30,64.40,50.80,1918,ohc,four,92,2bbl,2.97,3.23,9.40,68,5500,37,41,5389
78 | 2,161,mitsubishi,gas,std,two,hatchback,fwd,front,93.70,157.30,64.40,50.80,1944,ohc,four,92,2bbl,2.97,3.23,9.40,68,5500,31,38,6189
79 | 2,161,mitsubishi,gas,std,two,hatchback,fwd,front,93.70,157.30,64.40,50.80,2004,ohc,four,92,2bbl,2.97,3.23,9.40,68,5500,31,38,6669
80 | 1,161,mitsubishi,gas,turbo,two,hatchback,fwd,front,93,157.30,63.80,50.80,2145,ohc,four,98,spdi,3.03,3.39,7.60,102,5500,24,30,7689
81 | 3,153,mitsubishi,gas,turbo,two,hatchback,fwd,front,96.30,173.00,65.40,49.40,2370,ohc,four,110,spdi,3.17,3.46,7.50,116,5500,23,30,9959
82 | 3,153,mitsubishi,gas,std,two,hatchback,fwd,front,96.30,173.00,65.40,49.40,2328,ohc,four,122,2bbl,3.35,3.46,8.50,88,5000,25,32,8499
83 | 3,?,mitsubishi,gas,turbo,two,hatchback,fwd,front,95.90,173.20,66.30,50.20,2833,ohc,four,156,spdi,3.58,3.86,7.00,145,5000,19,24,12629
84 | 3,?,mitsubishi,gas,turbo,two,hatchback,fwd,front,95.90,173.20,66.30,50.20,2921,ohc,four,156,spdi,3.59,3.86,7.00,145,5000,19,24,14869
85 | 3,?,mitsubishi,gas,turbo,two,hatchback,fwd,front,95.90,173.20,66.30,50.20,2926,ohc,four,156,spdi,3.59,3.86,7.00,145,5000,19,24,14489
86 | 1,125,mitsubishi,gas,std,four,sedan,fwd,front,96.30,172.40,65.40,51.60,2365,ohc,four,122,2bbl,3.35,3.46,8.50,88,5000,25,32,6989
87 | 1,125,mitsubishi,gas,std,four,sedan,fwd,front,96.30,172.40,65.40,51.60,2405,ohc,four,122,2bbl,3.35,3.46,8.50,88,5000,25,32,8189
88 | 1,125,mitsubishi,gas,turbo,four,sedan,fwd,front,96.30,172.40,65.40,51.60,2403,ohc,four,110,spdi,3.17,3.46,7.50,116,5500,23,30,9279
89 | -1,137,mitsubishi,gas,std,four,sedan,fwd,front,96.30,172.40,65.40,51.60,2403,ohc,four,110,spdi,3.17,3.46,7.50,116,5500,23,30,9279
90 | 1,128,nissan,gas,std,two,sedan,fwd,front,94.50,165.30,63.80,54.50,1889,ohc,four,97,2bbl,3.15,3.29,9.40,69,5200,31,37,5499
91 | 1,128,nissan,diesel,std,two,sedan,fwd,front,94.50,165.30,63.80,54.50,2017,ohc,four,103,idi,2.99,3.47,21.90,55,4800,45,50,7099
92 | 1,128,nissan,gas,std,two,sedan,fwd,front,94.50,165.30,63.80,54.50,1918,ohc,four,97,2bbl,3.15,3.29,9.40,69,5200,31,37,6649
93 | 1,122,nissan,gas,std,four,sedan,fwd,front,94.50,165.30,63.80,54.50,1938,ohc,four,97,2bbl,3.15,3.29,9.40,69,5200,31,37,6849
94 | 1,103,nissan,gas,std,four,wagon,fwd,front,94.50,170.20,63.80,53.50,2024,ohc,four,97,2bbl,3.15,3.29,9.40,69,5200,31,37,7349
95 | 1,128,nissan,gas,std,two,sedan,fwd,front,94.50,165.30,63.80,54.50,1951,ohc,four,97,2bbl,3.15,3.29,9.40,69,5200,31,37,7299
96 | 1,128,nissan,gas,std,two,hatchback,fwd,front,94.50,165.60,63.80,53.30,2028,ohc,four,97,2bbl,3.15,3.29,9.40,69,5200,31,37,7799
97 | 1,122,nissan,gas,std,four,sedan,fwd,front,94.50,165.30,63.80,54.50,1971,ohc,four,97,2bbl,3.15,3.29,9.40,69,5200,31,37,7499
98 | 1,103,nissan,gas,std,four,wagon,fwd,front,94.50,170.20,63.80,53.50,2037,ohc,four,97,2bbl,3.15,3.29,9.40,69,5200,31,37,7999
99 | 2,168,nissan,gas,std,two,hardtop,fwd,front,95.10,162.40,63.80,53.30,2008,ohc,four,97,2bbl,3.15,3.29,9.40,69,5200,31,37,8249
100 | 0,106,nissan,gas,std,four,hatchback,fwd,front,97.20,173.40,65.20,54.70,2324,ohc,four,120,2bbl,3.33,3.47,8.50,97,5200,27,34,8949
101 | 0,106,nissan,gas,std,four,sedan,fwd,front,97.20,173.40,65.20,54.70,2302,ohc,four,120,2bbl,3.33,3.47,8.50,97,5200,27,34,9549
102 | 0,128,nissan,gas,std,four,sedan,fwd,front,100.40,181.70,66.50,55.10,3095,ohcv,six,181,mpfi,3.43,3.27,9.00,152,5200,17,22,13499
103 | 0,108,nissan,gas,std,four,wagon,fwd,front,100.40,184.60,66.50,56.10,3296,ohcv,six,181,mpfi,3.43,3.27,9.00,152,5200,17,22,14399
104 | 0,108,nissan,gas,std,four,sedan,fwd,front,100.40,184.60,66.50,55.10,3060,ohcv,six,181,mpfi,3.43,3.27,9.00,152,5200,19,25,13499
105 | 3,194,nissan,gas,std,two,hatchback,rwd,front,91.30,170.70,67.90,49.70,3071,ohcv,six,181,mpfi,3.43,3.27,9.00,160,5200,19,25,17199
106 | 3,194,nissan,gas,turbo,two,hatchback,rwd,front,91.30,170.70,67.90,49.70,3139,ohcv,six,181,mpfi,3.43,3.27,7.80,200,5200,17,23,19699
107 | 1,231,nissan,gas,std,two,hatchback,rwd,front,99.20,178.50,67.90,49.70,3139,ohcv,six,181,mpfi,3.43,3.27,9.00,160,5200,19,25,18399
108 | 0,161,peugot,gas,std,four,sedan,rwd,front,107.90,186.70,68.40,56.70,3020,l,four,120,mpfi,3.46,3.19,8.40,97,5000,19,24,11900
109 | 0,161,peugot,diesel,turbo,four,sedan,rwd,front,107.90,186.70,68.40,56.70,3197,l,four,152,idi,3.70,3.52,21.00,95,4150,28,33,13200
110 | 0,?,peugot,gas,std,four,wagon,rwd,front,114.20,198.90,68.40,58.70,3230,l,four,120,mpfi,3.46,3.19,8.40,97,5000,19,24,12440
111 | 0,?,peugot,diesel,turbo,four,wagon,rwd,front,114.20,198.90,68.40,58.70,3430,l,four,152,idi,3.70,3.52,21.00,95,4150,25,25,13860
112 | 0,161,peugot,gas,std,four,sedan,rwd,front,107.90,186.70,68.40,56.70,3075,l,four,120,mpfi,3.46,2.19,8.40,95,5000,19,24,15580
113 | 0,161,peugot,diesel,turbo,four,sedan,rwd,front,107.90,186.70,68.40,56.70,3252,l,four,152,idi,3.70,3.52,21.00,95,4150,28,33,16900
114 | 0,?,peugot,gas,std,four,wagon,rwd,front,114.20,198.90,68.40,56.70,3285,l,four,120,mpfi,3.46,2.19,8.40,95,5000,19,24,16695
115 | 0,?,peugot,diesel,turbo,four,wagon,rwd,front,114.20,198.90,68.40,58.70,3485,l,four,152,idi,3.70,3.52,21.00,95,4150,25,25,17075
116 | 0,161,peugot,gas,std,four,sedan,rwd,front,107.90,186.70,68.40,56.70,3075,l,four,120,mpfi,3.46,3.19,8.40,97,5000,19,24,16630
117 | 0,161,peugot,diesel,turbo,four,sedan,rwd,front,107.90,186.70,68.40,56.70,3252,l,four,152,idi,3.70,3.52,21.00,95,4150,28,33,17950
118 | 0,161,peugot,gas,turbo,four,sedan,rwd,front,108.00,186.70,68.30,56.00,3130,l,four,134,mpfi,3.61,3.21,7.00,142,5600,18,24,18150
119 | 1,119,plymouth,gas,std,two,hatchback,fwd,front,93.70,157.30,63.80,50.80,1918,ohc,four,90,2bbl,2.97,3.23,9.40,68,5500,37,41,5572
120 | 1,119,plymouth,gas,turbo,two,hatchback,fwd,front,93.70,157.30,63.80,50.80,2128,ohc,four,98,spdi,3.03,3.39,7.60,102,5500,24,30,7957
121 | 1,154,plymouth,gas,std,four,hatchback,fwd,front,93.70,157.30,63.80,50.60,1967,ohc,four,90,2bbl,2.97,3.23,9.40,68,5500,31,38,6229
122 | 1,154,plymouth,gas,std,four,sedan,fwd,front,93.70,167.30,63.80,50.80,1989,ohc,four,90,2bbl,2.97,3.23,9.40,68,5500,31,38,6692
123 | 1,154,plymouth,gas,std,four,sedan,fwd,front,93.70,167.30,63.80,50.80,2191,ohc,four,98,2bbl,2.97,3.23,9.40,68,5500,31,38,7609
124 | -1,74,plymouth,gas,std,four,wagon,fwd,front,103.30,174.60,64.60,59.80,2535,ohc,four,122,2bbl,3.35,3.46,8.50,88,5000,24,30,8921
125 | 3,?,plymouth,gas,turbo,two,hatchback,rwd,front,95.90,173.20,66.30,50.20,2818,ohc,four,156,spdi,3.59,3.86,7.00,145,5000,19,24,12764
126 | 3,186,porsche,gas,std,two,hatchback,rwd,front,94.50,168.90,68.30,50.20,2778,ohc,four,151,mpfi,3.94,3.11,9.50,143,5500,19,27,22018
127 | 3,?,porsche,gas,std,two,hardtop,rwd,rear,89.50,168.90,65.00,51.60,2756,ohcf,six,194,mpfi,3.74,2.90,9.50,207,5900,17,25,32528
128 | 3,?,porsche,gas,std,two,hardtop,rwd,rear,89.50,168.90,65.00,51.60,2756,ohcf,six,194,mpfi,3.74,2.90,9.50,207,5900,17,25,34028
129 | 3,?,porsche,gas,std,two,convertible,rwd,rear,89.50,168.90,65.00,51.60,2800,ohcf,six,194,mpfi,3.74,2.90,9.50,207,5900,17,25,37028
130 | 1,?,porsche,gas,std,two,hatchback,rwd,front,98.40,175.70,72.30,50.50,3366,dohcv,eight,203,mpfi,3.94,3.11,10.00,288,5750,17,28,?
131 | 0,?,renault,gas,std,four,wagon,fwd,front,96.10,181.50,66.50,55.20,2579,ohc,four,132,mpfi,3.46,3.90,8.70,?,?,23,31,9295
132 | 2,?,renault,gas,std,two,hatchback,fwd,front,96.10,176.80,66.60,50.50,2460,ohc,four,132,mpfi,3.46,3.90,8.70,?,?,23,31,9895
133 | 3,150,saab,gas,std,two,hatchback,fwd,front,99.10,186.60,66.50,56.10,2658,ohc,four,121,mpfi,3.54,3.07,9.31,110,5250,21,28,11850
134 | 2,104,saab,gas,std,four,sedan,fwd,front,99.10,186.60,66.50,56.10,2695,ohc,four,121,mpfi,3.54,3.07,9.30,110,5250,21,28,12170
135 | 3,150,saab,gas,std,two,hatchback,fwd,front,99.10,186.60,66.50,56.10,2707,ohc,four,121,mpfi,2.54,2.07,9.30,110,5250,21,28,15040
136 | 2,104,saab,gas,std,four,sedan,fwd,front,99.10,186.60,66.50,56.10,2758,ohc,four,121,mpfi,3.54,3.07,9.30,110,5250,21,28,15510
137 | 3,150,saab,gas,turbo,two,hatchback,fwd,front,99.10,186.60,66.50,56.10,2808,dohc,four,121,mpfi,3.54,3.07,9.00,160,5500,19,26,18150
138 | 2,104,saab,gas,turbo,four,sedan,fwd,front,99.10,186.60,66.50,56.10,2847,dohc,four,121,mpfi,3.54,3.07,9.00,160,5500,19,26,18620
139 | 2,83,subaru,gas,std,two,hatchback,fwd,front,93.70,156.90,63.40,53.70,2050,ohcf,four,97,2bbl,3.62,2.36,9.00,69,4900,31,36,5118
140 | 2,83,subaru,gas,std,two,hatchback,fwd,front,93.70,157.90,63.60,53.70,2120,ohcf,four,108,2bbl,3.62,2.64,8.70,73,4400,26,31,7053
141 | 2,83,subaru,gas,std,two,hatchback,4wd,front,93.30,157.30,63.80,55.70,2240,ohcf,four,108,2bbl,3.62,2.64,8.70,73,4400,26,31,7603
142 | 0,102,subaru,gas,std,four,sedan,fwd,front,97.20,172.00,65.40,52.50,2145,ohcf,four,108,2bbl,3.62,2.64,9.50,82,4800,32,37,7126
143 | 0,102,subaru,gas,std,four,sedan,fwd,front,97.20,172.00,65.40,52.50,2190,ohcf,four,108,2bbl,3.62,2.64,9.50,82,4400,28,33,7775
144 | 0,102,subaru,gas,std,four,sedan,fwd,front,97.20,172.00,65.40,52.50,2340,ohcf,four,108,mpfi,3.62,2.64,9.00,94,5200,26,32,9960
145 | 0,102,subaru,gas,std,four,sedan,4wd,front,97.00,172.00,65.40,54.30,2385,ohcf,four,108,2bbl,3.62,2.64,9.00,82,4800,24,25,9233
146 | 0,102,subaru,gas,turbo,four,sedan,4wd,front,97.00,172.00,65.40,54.30,2510,ohcf,four,108,mpfi,3.62,2.64,7.70,111,4800,24,29,11259
147 | 0,89,subaru,gas,std,four,wagon,fwd,front,97.00,173.50,65.40,53.00,2290,ohcf,four,108,2bbl,3.62,2.64,9.00,82,4800,28,32,7463
148 | 0,89,subaru,gas,std,four,wagon,fwd,front,97.00,173.50,65.40,53.00,2455,ohcf,four,108,mpfi,3.62,2.64,9.00,94,5200,25,31,10198
149 | 0,85,subaru,gas,std,four,wagon,4wd,front,96.90,173.60,65.40,54.90,2420,ohcf,four,108,2bbl,3.62,2.64,9.00,82,4800,23,29,8013
150 | 0,85,subaru,gas,turbo,four,wagon,4wd,front,96.90,173.60,65.40,54.90,2650,ohcf,four,108,mpfi,3.62,2.64,7.70,111,4800,23,23,11694
151 | 1,87,toyota,gas,std,two,hatchback,fwd,front,95.70,158.70,63.60,54.50,1985,ohc,four,92,2bbl,3.05,3.03,9.00,62,4800,35,39,5348
152 | 1,87,toyota,gas,std,two,hatchback,fwd,front,95.70,158.70,63.60,54.50,2040,ohc,four,92,2bbl,3.05,3.03,9.00,62,4800,31,38,6338
153 | 1,74,toyota,gas,std,four,hatchback,fwd,front,95.70,158.70,63.60,54.50,2015,ohc,four,92,2bbl,3.05,3.03,9.00,62,4800,31,38,6488
154 | 0,77,toyota,gas,std,four,wagon,fwd,front,95.70,169.70,63.60,59.10,2280,ohc,four,92,2bbl,3.05,3.03,9.00,62,4800,31,37,6918
155 | 0,81,toyota,gas,std,four,wagon,4wd,front,95.70,169.70,63.60,59.10,2290,ohc,four,92,2bbl,3.05,3.03,9.00,62,4800,27,32,7898
156 | 0,91,toyota,gas,std,four,wagon,4wd,front,95.70,169.70,63.60,59.10,3110,ohc,four,92,2bbl,3.05,3.03,9.00,62,4800,27,32,8778
157 | 0,91,toyota,gas,std,four,sedan,fwd,front,95.70,166.30,64.40,53.00,2081,ohc,four,98,2bbl,3.19,3.03,9.00,70,4800,30,37,6938
158 | 0,91,toyota,gas,std,four,hatchback,fwd,front,95.70,166.30,64.40,52.80,2109,ohc,four,98,2bbl,3.19,3.03,9.00,70,4800,30,37,7198
159 | 0,91,toyota,diesel,std,four,sedan,fwd,front,95.70,166.30,64.40,53.00,2275,ohc,four,110,idi,3.27,3.35,22.50,56,4500,34,36,7898
160 | 0,91,toyota,diesel,std,four,hatchback,fwd,front,95.70,166.30,64.40,52.80,2275,ohc,four,110,idi,3.27,3.35,22.50,56,4500,38,47,7788
161 | 0,91,toyota,gas,std,four,sedan,fwd,front,95.70,166.30,64.40,53.00,2094,ohc,four,98,2bbl,3.19,3.03,9.00,70,4800,38,47,7738
162 | 0,91,toyota,gas,std,four,hatchback,fwd,front,95.70,166.30,64.40,52.80,2122,ohc,four,98,2bbl,3.19,3.03,9.00,70,4800,28,34,8358
163 | 0,91,toyota,gas,std,four,sedan,fwd,front,95.70,166.30,64.40,52.80,2140,ohc,four,98,2bbl,3.19,3.03,9.00,70,4800,28,34,9258
164 | 1,168,toyota,gas,std,two,sedan,rwd,front,94.50,168.70,64.00,52.60,2169,ohc,four,98,2bbl,3.19,3.03,9.00,70,4800,29,34,8058
165 | 1,168,toyota,gas,std,two,hatchback,rwd,front,94.50,168.70,64.00,52.60,2204,ohc,four,98,2bbl,3.19,3.03,9.00,70,4800,29,34,8238
166 | 1,168,toyota,gas,std,two,sedan,rwd,front,94.50,168.70,64.00,52.60,2265,dohc,four,98,mpfi,3.24,3.08,9.40,112,6600,26,29,9298
167 | 1,168,toyota,gas,std,two,hatchback,rwd,front,94.50,168.70,64.00,52.60,2300,dohc,four,98,mpfi,3.24,3.08,9.40,112,6600,26,29,9538
168 | 2,134,toyota,gas,std,two,hardtop,rwd,front,98.40,176.20,65.60,52.00,2540,ohc,four,146,mpfi,3.62,3.50,9.30,116,4800,24,30,8449
169 | 2,134,toyota,gas,std,two,hardtop,rwd,front,98.40,176.20,65.60,52.00,2536,ohc,four,146,mpfi,3.62,3.50,9.30,116,4800,24,30,9639
170 | 2,134,toyota,gas,std,two,hatchback,rwd,front,98.40,176.20,65.60,52.00,2551,ohc,four,146,mpfi,3.62,3.50,9.30,116,4800,24,30,9989
171 | 2,134,toyota,gas,std,two,hardtop,rwd,front,98.40,176.20,65.60,52.00,2679,ohc,four,146,mpfi,3.62,3.50,9.30,116,4800,24,30,11199
172 | 2,134,toyota,gas,std,two,hatchback,rwd,front,98.40,176.20,65.60,52.00,2714,ohc,four,146,mpfi,3.62,3.50,9.30,116,4800,24,30,11549
173 | 2,134,toyota,gas,std,two,convertible,rwd,front,98.40,176.20,65.60,53.00,2975,ohc,four,146,mpfi,3.62,3.50,9.30,116,4800,24,30,17669
174 | -1,65,toyota,gas,std,four,sedan,fwd,front,102.40,175.60,66.50,54.90,2326,ohc,four,122,mpfi,3.31,3.54,8.70,92,4200,29,34,8948
175 | -1,65,toyota,diesel,turbo,four,sedan,fwd,front,102.40,175.60,66.50,54.90,2480,ohc,four,110,idi,3.27,3.35,22.50,73,4500,30,33,10698
176 | -1,65,toyota,gas,std,four,hatchback,fwd,front,102.40,175.60,66.50,53.90,2414,ohc,four,122,mpfi,3.31,3.54,8.70,92,4200,27,32,9988
177 | -1,65,toyota,gas,std,four,sedan,fwd,front,102.40,175.60,66.50,54.90,2414,ohc,four,122,mpfi,3.31,3.54,8.70,92,4200,27,32,10898
178 | -1,65,toyota,gas,std,four,hatchback,fwd,front,102.40,175.60,66.50,53.90,2458,ohc,four,122,mpfi,3.31,3.54,8.70,92,4200,27,32,11248
179 | 3,197,toyota,gas,std,two,hatchback,rwd,front,102.90,183.50,67.70,52.00,2976,dohc,six,171,mpfi,3.27,3.35,9.30,161,5200,20,24,16558
180 | 3,197,toyota,gas,std,two,hatchback,rwd,front,102.90,183.50,67.70,52.00,3016,dohc,six,171,mpfi,3.27,3.35,9.30,161,5200,19,24,15998
181 | -1,90,toyota,gas,std,four,sedan,rwd,front,104.50,187.80,66.50,54.10,3131,dohc,six,171,mpfi,3.27,3.35,9.20,156,5200,20,24,15690
182 | -1,?,toyota,gas,std,four,wagon,rwd,front,104.50,187.80,66.50,54.10,3151,dohc,six,161,mpfi,3.27,3.35,9.20,156,5200,19,24,15750
183 | 2,122,volkswagen,diesel,std,two,sedan,fwd,front,97.30,171.70,65.50,55.70,2261,ohc,four,97,idi,3.01,3.40,23.00,52,4800,37,46,7775
184 | 2,122,volkswagen,gas,std,two,sedan,fwd,front,97.30,171.70,65.50,55.70,2209,ohc,four,109,mpfi,3.19,3.40,9.00,85,5250,27,34,7975
185 | 2,94,volkswagen,diesel,std,four,sedan,fwd,front,97.30,171.70,65.50,55.70,2264,ohc,four,97,idi,3.01,3.40,23.00,52,4800,37,46,7995
186 | 2,94,volkswagen,gas,std,four,sedan,fwd,front,97.30,171.70,65.50,55.70,2212,ohc,four,109,mpfi,3.19,3.40,9.00,85,5250,27,34,8195
187 | 2,94,volkswagen,gas,std,four,sedan,fwd,front,97.30,171.70,65.50,55.70,2275,ohc,four,109,mpfi,3.19,3.40,9.00,85,5250,27,34,8495
188 | 2,94,volkswagen,diesel,turbo,four,sedan,fwd,front,97.30,171.70,65.50,55.70,2319,ohc,four,97,idi,3.01,3.40,23.00,68,4500,37,42,9495
189 | 2,94,volkswagen,gas,std,four,sedan,fwd,front,97.30,171.70,65.50,55.70,2300,ohc,four,109,mpfi,3.19,3.40,10.00,100,5500,26,32,9995
190 | 3,?,volkswagen,gas,std,two,convertible,fwd,front,94.50,159.30,64.20,55.60,2254,ohc,four,109,mpfi,3.19,3.40,8.50,90,5500,24,29,11595
191 | 3,256,volkswagen,gas,std,two,hatchback,fwd,front,94.50,165.70,64.00,51.40,2221,ohc,four,109,mpfi,3.19,3.40,8.50,90,5500,24,29,9980
192 | 0,?,volkswagen,gas,std,four,sedan,fwd,front,100.40,180.20,66.90,55.10,2661,ohc,five,136,mpfi,3.19,3.40,8.50,110,5500,19,24,13295
193 | 0,?,volkswagen,diesel,turbo,four,sedan,fwd,front,100.40,180.20,66.90,55.10,2579,ohc,four,97,idi,3.01,3.40,23.00,68,4500,33,38,13845
194 | 0,?,volkswagen,gas,std,four,wagon,fwd,front,100.40,183.10,66.90,55.10,2563,ohc,four,109,mpfi,3.19,3.40,9.00,88,5500,25,31,12290
195 | -2,103,volvo,gas,std,four,sedan,rwd,front,104.30,188.80,67.20,56.20,2912,ohc,four,141,mpfi,3.78,3.15,9.50,114,5400,23,28,12940
196 | -1,74,volvo,gas,std,four,wagon,rwd,front,104.30,188.80,67.20,57.50,3034,ohc,four,141,mpfi,3.78,3.15,9.50,114,5400,23,28,13415
197 | -2,103,volvo,gas,std,four,sedan,rwd,front,104.30,188.80,67.20,56.20,2935,ohc,four,141,mpfi,3.78,3.15,9.50,114,5400,24,28,15985
198 | -1,74,volvo,gas,std,four,wagon,rwd,front,104.30,188.80,67.20,57.50,3042,ohc,four,141,mpfi,3.78,3.15,9.50,114,5400,24,28,16515
199 | -2,103,volvo,gas,turbo,four,sedan,rwd,front,104.30,188.80,67.20,56.20,3045,ohc,four,130,mpfi,3.62,3.15,7.50,162,5100,17,22,18420
200 | -1,74,volvo,gas,turbo,four,wagon,rwd,front,104.30,188.80,67.20,57.50,3157,ohc,four,130,mpfi,3.62,3.15,7.50,162,5100,17,22,18950
201 | -1,95,volvo,gas,std,four,sedan,rwd,front,109.10,188.80,68.90,55.50,2952,ohc,four,141,mpfi,3.78,3.15,9.50,114,5400,23,28,16845
202 | -1,95,volvo,gas,turbo,four,sedan,rwd,front,109.10,188.80,68.80,55.50,3049,ohc,four,141,mpfi,3.78,3.15,8.70,160,5300,19,25,19045
203 | -1,95,volvo,gas,std,four,sedan,rwd,front,109.10,188.80,68.90,55.50,3012,ohcv,six,173,mpfi,3.58,2.87,8.80,134,5500,18,23,21485
204 | -1,95,volvo,diesel,turbo,four,sedan,rwd,front,109.10,188.80,68.90,55.50,3217,ohc,six,145,idi,3.01,3.40,23.00,106,4800,26,27,22470
205 | -1,95,volvo,gas,turbo,four,sedan,rwd,front,109.10,188.80,68.90,55.50,3062,ohc,four,141,mpfi,3.78,3.15,9.50,114,5400,19,25,22625
206 |
--------------------------------------------------------------------------------
/Predicting Bike Rentals/Basics.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "Many American cities have communal bike sharing stations where you can rent bicycles by the hour or day. Washington, D.C. is one of these cities. The District collects detailed data on the number of bicycles people rent by the hour and day.\n",
8 | "\n",
9 | "Hadi Fanaee-T at the University of Porto compiled this data into a CSV file, which you'll be working with in this project. The file contains 17380 rows, with each row representing the number of bike rentals for a single hour of a single day. You can download the data from the University of California, Irvine's website."
10 | ]
11 | },
12 | {
13 | "cell_type": "markdown",
14 | "metadata": {},
15 | "source": [
16 | "Here are the descriptions for the relevant columns:\n",
17 | "\n",
18 | "instant - A unique sequential ID number for each row\n",
19 | "dteday - The date of the rentals\n",
20 | "season - The season in which the rentals occurred\n",
21 | "yr - The year the rentals occurred\n",
22 | "mnth - The month the rentals occurred\n",
23 | "hr - The hour the rentals occurred\n",
24 | "holiday - Whether or not the day was a holiday\n",
25 | "weekday - The day of the week (as a number, 0 to 7)\n",
26 | "workingday - Whether or not the day was a working day\n",
27 | "weathersit - The weather (as a categorical variable)\n",
28 | "temp - The temperature, on a 0-1 scale\n",
29 | "atemp - The adjusted temperature\n",
30 | "hum - The humidity, on a 0-1 scale\n",
31 | "windspeed - The wind speed, on a 0-1 scale\n",
32 | "casual - The number of casual riders (people who hadn't previously signed up with the bike sharing program)\n",
33 | "registered - The number of registered riders (people who had already signed up)\n",
34 | "cnt - The total number of bike rentals (casual + registered)"
35 | ]
36 | },
37 | {
38 | "cell_type": "code",
39 | "execution_count": 5,
40 | "metadata": {
41 | "collapsed": false
42 | },
43 | "outputs": [
44 | {
45 | "data": {
46 | "text/html": [
47 | "\n",
48 | "\n",
61 | "
\n",
62 | " \n",
63 | " \n",
64 | " | \n",
65 | " instant | \n",
66 | " dteday | \n",
67 | " season | \n",
68 | " yr | \n",
69 | " mnth | \n",
70 | " hr | \n",
71 | " holiday | \n",
72 | " weekday | \n",
73 | " workingday | \n",
74 | " weathersit | \n",
75 | " temp | \n",
76 | " atemp | \n",
77 | " hum | \n",
78 | " windspeed | \n",
79 | " casual | \n",
80 | " registered | \n",
81 | " cnt | \n",
82 | "
\n",
83 | " \n",
84 | " \n",
85 | " \n",
86 | " | 0 | \n",
87 | " 1 | \n",
88 | " 2011-01-01 | \n",
89 | " 1 | \n",
90 | " 0 | \n",
91 | " 1 | \n",
92 | " 0 | \n",
93 | " 0 | \n",
94 | " 6 | \n",
95 | " 0 | \n",
96 | " 1 | \n",
97 | " 0.24 | \n",
98 | " 0.2879 | \n",
99 | " 0.81 | \n",
100 | " 0.0 | \n",
101 | " 3 | \n",
102 | " 13 | \n",
103 | " 16 | \n",
104 | "
\n",
105 | " \n",
106 | " | 1 | \n",
107 | " 2 | \n",
108 | " 2011-01-01 | \n",
109 | " 1 | \n",
110 | " 0 | \n",
111 | " 1 | \n",
112 | " 1 | \n",
113 | " 0 | \n",
114 | " 6 | \n",
115 | " 0 | \n",
116 | " 1 | \n",
117 | " 0.22 | \n",
118 | " 0.2727 | \n",
119 | " 0.80 | \n",
120 | " 0.0 | \n",
121 | " 8 | \n",
122 | " 32 | \n",
123 | " 40 | \n",
124 | "
\n",
125 | " \n",
126 | " | 2 | \n",
127 | " 3 | \n",
128 | " 2011-01-01 | \n",
129 | " 1 | \n",
130 | " 0 | \n",
131 | " 1 | \n",
132 | " 2 | \n",
133 | " 0 | \n",
134 | " 6 | \n",
135 | " 0 | \n",
136 | " 1 | \n",
137 | " 0.22 | \n",
138 | " 0.2727 | \n",
139 | " 0.80 | \n",
140 | " 0.0 | \n",
141 | " 5 | \n",
142 | " 27 | \n",
143 | " 32 | \n",
144 | "
\n",
145 | " \n",
146 | " | 3 | \n",
147 | " 4 | \n",
148 | " 2011-01-01 | \n",
149 | " 1 | \n",
150 | " 0 | \n",
151 | " 1 | \n",
152 | " 3 | \n",
153 | " 0 | \n",
154 | " 6 | \n",
155 | " 0 | \n",
156 | " 1 | \n",
157 | " 0.24 | \n",
158 | " 0.2879 | \n",
159 | " 0.75 | \n",
160 | " 0.0 | \n",
161 | " 3 | \n",
162 | " 10 | \n",
163 | " 13 | \n",
164 | "
\n",
165 | " \n",
166 | " | 4 | \n",
167 | " 5 | \n",
168 | " 2011-01-01 | \n",
169 | " 1 | \n",
170 | " 0 | \n",
171 | " 1 | \n",
172 | " 4 | \n",
173 | " 0 | \n",
174 | " 6 | \n",
175 | " 0 | \n",
176 | " 1 | \n",
177 | " 0.24 | \n",
178 | " 0.2879 | \n",
179 | " 0.75 | \n",
180 | " 0.0 | \n",
181 | " 0 | \n",
182 | " 1 | \n",
183 | " 1 | \n",
184 | "
\n",
185 | " \n",
186 | "
\n",
187 | "
"
188 | ],
189 | "text/plain": [
190 | " instant dteday season yr mnth hr holiday weekday workingday \\\n",
191 | "0 1 2011-01-01 1 0 1 0 0 6 0 \n",
192 | "1 2 2011-01-01 1 0 1 1 0 6 0 \n",
193 | "2 3 2011-01-01 1 0 1 2 0 6 0 \n",
194 | "3 4 2011-01-01 1 0 1 3 0 6 0 \n",
195 | "4 5 2011-01-01 1 0 1 4 0 6 0 \n",
196 | "\n",
197 | " weathersit temp atemp hum windspeed casual registered cnt \n",
198 | "0 1 0.24 0.2879 0.81 0.0 3 13 16 \n",
199 | "1 1 0.22 0.2727 0.80 0.0 8 32 40 \n",
200 | "2 1 0.22 0.2727 0.80 0.0 5 27 32 \n",
201 | "3 1 0.24 0.2879 0.75 0.0 3 10 13 \n",
202 | "4 1 0.24 0.2879 0.75 0.0 0 1 1 "
203 | ]
204 | },
205 | "execution_count": 5,
206 | "metadata": {},
207 | "output_type": "execute_result"
208 | }
209 | ],
210 | "source": [
211 | "import pandas as pd\n",
212 | "\n",
213 | "bike_rentals = pd.read_csv('bike_rental_hour.csv')\n",
214 | "\n",
215 | "bike_rentals.head()"
216 | ]
217 | },
218 | {
219 | "cell_type": "code",
220 | "execution_count": 6,
221 | "metadata": {
222 | "collapsed": false
223 | },
224 | "outputs": [
225 | {
226 | "data": {
227 | "text/plain": [
228 | "(array([6972., 3705., 2659., 1660., 987., 663., 369., 188., 139.,\n",
229 | " 37.]),\n",
230 | " array([ 1. , 98.6, 196.2, 293.8, 391.4, 489. , 586.6, 684.2, 781.8,\n",
231 | " 879.4, 977. ]),\n",
232 | " )"
233 | ]
234 | },
235 | "execution_count": 6,
236 | "metadata": {},
237 | "output_type": "execute_result"
238 | },
239 | {
240 | "data": {
241 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYcAAAEACAYAAABYq7oeAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAGx9JREFUeJzt3WFsW9Xh/vHHkILE0DwYi13ZXjKKk8ahaVMag7RpskJI\nukh1hihRuqpxCxNau4kGTYKxN2vf1GFITK3avpjW/JKgqVleNdEfQgIFS6yDmi5lm2ioF5FCcocT\ntoYQKCUtOf8XoXd0lzZpcWJDvh/pSr6n99x7zqnjR+f63muXMcYIAIDPuSbbDQAA5B7CAQDgQDgA\nABwIBwCAA+EAAHAgHAAADrOGQyqVUnl5uVavXq3y8nK53W7t2bNH4+Pjqq6uVnFxsWpqajQxMWHX\nicfjCgaDKikpUV9fn13e39+vsrIyFRUVqampaX56BAD40lxXcp/D9PS0/H6/jh49qr179+rb3/62\nHn30UT3xxBMaHx9Xc3OzTpw4oY0bN+q1117TyMiIqqqq9M9//lMul0t33nmn9u7dq4qKCtXW1mr7\n9u2qqamZz/4BAK7CFZ1WeuGFF7Rs2TIFAgF1dXUpFotJkmKxmA4dOiRJ6u7uVkNDg/Ly8lRYWKhg\nMKhkMql0Oq3JyUlVVFRIkhobG+06AIDcckXh8Kc//Uk/+clPJEmjo6PyeDySJK/Xq7GxMUmSZVkK\nBAJ2HZ/PJ8uyZFmW/H6/Xe73+2VZ1pfuAAAg8+YcDufOnVN3d7fuv/9+SZLL5bro3/93HQDw1ZU3\n1w17enp0xx136JZbbpEkeTwee/aQTqeVn58vaWamMDw8bNcbGRmRz+e7ZPkXIWgA4Opk6nF5c545\nHDx4UBs2bLDXo9GoWltbJUltbW2qq6uzyzs6OjQ1NaWhoSENDg4qHA7L6/XK7XYrmUzKGKP29na7\nzhczWVvc7godPXpUxpisL7/5zW+y3oZcWRgLxoKxuPySSXOaOZw5c0YvvPCCfv/739tljz32mOrr\n69XS0qKCggJ1dnZKkkKhkOrr6xUKhbRkyRLt37/fngns27dPmzdv1tmzZ1VbW6u1a9dmtDMAgMyY\nUzjccMMNeu+99y4qu/nmm/XCCy984faPP/64Hn/8cUf5HXfcoX/84x9X0UwAwELiDukcF4lEst2E\nnMFY/Bdj8V+Mxfy4opvgFsrMaajsNcvtDquvb6/C4XDW2gAAV8rlcmXsuwdmDgAAB8IBAOBAOAAA\nHAgHAIAD4QAAcCAcAAAOhAMAwIFwAAA4EA4AAAfCAQDgQDgAABwIBwCAA+EAAHAgHAAADoQDAMCB\ncAAAOBAOAAAHwgEA4EA4AAAcCAcAgAPhAABwmFM4TExM6P7771dJSYlKS0t19OhRjY+Pq7q6WsXF\nxaqpqdHExIS9fTweVzAYVElJifr6+uzy/v5+lZWVqaioSE1NTZnvDQAgI+YUDtu3b1dtba0GBgb0\nt7/9TcuXL1dzc7Oqqqp08uRJVVZWKh6PS5JOnDihzs5ODQwMqKenR9u2bZMxRpK0detWHThwQKlU\nSqlUSr29vfPXMwDAVZs1HD744AO9/PLL2rJliyQpLy9PbrdbXV1disVikqRYLKZDhw5Jkrq7u9XQ\n0KC8vDwVFhYqGAwqmUwqnU5rcnJSFRUVkqTGxka7DgAgt8waDkNDQ7rlllu0ZcsWrV69Wg899JDO\nnDmj0dFReTweSZLX69XY2JgkybIsBQIBu77P55NlWbIsS36/3y73+/2yLCvT/QEAZEDebBucP39e\n/f392rdvn9asWaNHHnlEzc3NcrlcF233v+tf3o7PvY58tgAALkgkEkokEvOy71nDwe/3KxAIaM2a\nNZKk++67T83NzfJ4PPbsIZ1OKz8/X9LMTGF4eNiuPzIyIp/Pd8nyS9txdT0CgEUiEokoEonY6zt3\n7szYvmc9reTxeBQIBJRKpSRJhw8fVmlpqaLRqFpbWyVJbW1tqqurkyRFo1F1dHRoampKQ0NDGhwc\nVDgcltfrldvtVjKZlDFG7e3tdh0AQG6ZdeYgSXv27NHGjRt17tw53Xrrrfq///s/ffrpp6qvr1dL\nS4sKCgrU2dkpSQqFQqqvr1coFNKSJUu0f/9++5TTvn37tHnzZp09e1a1tbVau3bt/PUMAHDVXObC\ndaY5ZCZMstcstzusvr69CofDWWsDAFwpl8ulTH2kc4c0AMCBcAAAOBAOAAAHwgEA4EA4AAAcCAcA\ngAPhAABwIBwAAA6EAwDAgXAAADgQDgAAB8IBAOBAOAAAHAgHAIAD4QAAcCAcAAAOhAMAwIFwAAA4\nEA4AAAfCAQDgQDgAABwIBwCAA+EAAHCYUzgUFhZq5cqVKi8vVzgcliSNj4+rurpaxcXFqqmp0cTE\nhL19PB5XMBhUSUmJ+vr67PL+/n6VlZWpqKhITU1NGe4KACBT5hQO11xzjRKJhI4fP65kMilJam5u\nVlVVlU6ePKnKykrF43FJ0okTJ9TZ2amBgQH19PRo27ZtMsZIkrZu3aoDBw4olUoplUqpt7d3nroF\nAPgy5hQOxhhNT09fVNbV1aVYLCZJisViOnTokCSpu7tbDQ0NysvLU2FhoYLBoJLJpNLptCYnJ1VR\nUSFJamxstOsAAHLLnMLB5XLpnnvuUUVFhf7whz9IkkZHR+XxeCRJXq9XY2NjkiTLshQIBOy6Pp9P\nlmXJsiz5/X673O/3y7KsjHUEAJA5eXPZ6MiRI1q6dKnee+89+3sGl8t10Tb/u/7l7fjc68hnCwDg\ngkQioUQiMS/7nlM4LF26VJL0ne98Rz/+8Y+VTCbl8Xjs2UM6nVZ+fr6kmZnC8PCwXXdkZEQ+n++S\n5Ze248p7AwCLSCQSUSQSsdd37tyZsX3PelrpzJkz+vDDDyVJH330kfr6+rRixQpFo1G1trZKktra\n2lRXVydJikaj6ujo0NTUlIaGhjQ4OKhwOCyv1yu3261kMiljjNrb2+06AIDcMuvMYXR0VPfee69c\nLpfOnz+vjRs3qrq6WmvWrFF9fb1aWlpUUFCgzs5OSVIoFFJ9fb1CoZCWLFmi/fv326ec9u3bp82b\nN+vs2bOqra3V2rVr57d3AICr4jIXrjPNITNhkr1mud1h9fXtte/pAICvApfLpUx9pHOHNADAgXAA\nADgQDgAAB8IBAOBAOAAAHAgHAIAD4QAAcCAcAAAOhAMAwIFwAAA4EA4AAAfCAQDgQDgAABwIBwCA\nA+EAAHAgHAAADoQDAMCBcAAAOBAOAAAHwgEA4EA4AAAcCAcAgAPhAABwmHM4TE9Pa/Xq1YpGo5Kk\n8fFxVVdXq7i4WDU1NZqYmLC3jcfjCgaDKikpUV9fn13e39+vsrIyFRUVqampKYPdAABk0pzDYffu\n3QqFQvZ6c3OzqqqqdPLkSVVWVioej0uSTpw4oc7OTg0MDKinp0fbtm2TMUaStHXrVh04cECpVEqp\nVEq9vb0Z7g4AIBPmFA4jIyN69tln9dOf/tQu6+rqUiwWkyTFYjEdOnRIktTd3a2Ghgbl5eWpsLBQ\nwWBQyWRS6XRak5OTqqiokCQ1NjbadQAAuWVO4fDII4/oySeflMvlsstGR0fl8XgkSV6vV2NjY5Ik\ny7IUCATs7Xw+nyzLkmVZ8vv9drnf75dlWRnpBAAgs/Jm2+CZZ56Rx+PRqlWrlEgkLrnd54MjM3Z8\n7nXkswUAcEEikbjs5/KXMWs4HDlyRN3d3Xr22Wf18ccfa3JyUps2bZLX67VnD+l0Wvn5+ZJmZgrD\nw8N2/ZGREfl8vkuWX9qOq+4UACwGkUhEkUjEXt+5c2fG9j3raaVdu3bpnXfe0VtvvaWOjg5VVlbq\n6aef1rp169Ta2ipJamtrU11dnSQpGo2qo6NDU1NTGhoa0uDgoMLhsLxer9xut5LJpIwxam9vt+sA\nAHLLrDOHS/nVr36l+vp6tbS0qKCgQJ2dnZKkUCik+vp6hUIhLVmyRPv377dPOe3bt0+bN2/W2bNn\nVVtbq7Vr12amFwCAjHKZC9eZ5pCZMMles9zusPr69iocDmetDQBwpVwulzL1kc4d0gAAB8IBAOBA\nOAAAHAgHAIAD4QAAcOBqpS/gdod17bWWTp/+V9ba4PEUKJ0+lbXjA/jqyeTVSld9n8PX3UwwZC+g\nRkcz/TgSAJg7TisBABwIBwCAA+EAAHAgHAAADoQDAMCBcAAAOBAOAAAHwgEA4EA4AAAcCAcAgAPh\nAABwIBwAAA6EAwDAgXAAADgQDgAAB8IBAOAwazh88sknuvPOO1VeXq7S0lL9+te/liSNj4+rurpa\nxcXFqqmp0cTEhF0nHo8rGAyqpKREfX19dnl/f7/KyspUVFSkpqameegOACATZg2H66+/Xi+99JKO\nHz+uv//973rxxRd15MgRNTc3q6qqSidPnlRlZaXi8bgk6cSJE+rs7NTAwIB6enq0bds2+2frtm7d\nqgMHDiiVSimVSqm3t3d+ewcAuCpzOq10ww03SJqZRUxPT+umm25SV1eXYrGYJCkWi+nQoUOSpO7u\nbjU0NCgvL0+FhYUKBoNKJpNKp9OanJxURUWFJKmxsdGuAwDILXMKh+npaZWXl8vr9SoSiSgUCml0\ndFQej0eS5PV6NTY2JkmyLEuBQMCu6/P5ZFmWLMuS3++3y/1+vyzLymRfAAAZkjeXja655hodP35c\nH3zwgWpqapRIJORyuS7a5n/Xv7wdn3sd+WwBAFyQSCSUSCTmZd9zCocLvvnNb6q2tlbHjh2Tx+Ox\nZw/pdFr5+fmSZmYKw8PDdp2RkRH5fL5Lll/ajivqCAAsNpFIRJFIxF7fuXNnxvY962mlf//73/aV\nSB9//LGef/55lZeXKxqNqrW1VZLU1tamuro6SVI0GlVHR4empqY0NDSkwcFBhcNheb1eud1uJZNJ\nGWPU3t5u1wEA5JZZZw7vvvuuYrGYjDGanp7Wpk2bdPfdd6u8vFz19fVqaWlRQUGBOjs7JUmhUEj1\n9fUKhUJasmSJ9u/fb59y2rdvnzZv3qyzZ8+qtrZWa9eund/eAQCuistcuM40h8yESfaa5XaHNTHx\nWlbbILmUg/81AHKYy5W5zw3ukAYAOBAOAAAHwgEA4EA4AAAcCAcAgAPhAABwuKI7pLGQrp+HR5Jc\nGY+nQOn0qay2AUB2EA456xNl9z4LaXQ0u+EEIHs4rQQAcCAcAAAOhAMAwIFwAAA4EA4AAAfCAQDg\nQDgAABwIBwCAA+EAAHAgHAAADoQDAMCBcAAAOBAOAAAHwgEA4EA4AAAcZg2HkZERVVZWqrS0VCtW\nrNCePXskSePj46qurlZxcbFqamo0MTFh14nH4woGgyopKVFfX59d3t/fr7KyMhUVFampqWkeugMA\nyIRZwyEvL09PPfWU3njjDb3yyivat2+f3nzzTTU3N6uqqkonT55UZWWl4vG4JOnEiRPq7OzUwMCA\nenp6tG3bNhkz86M1W7du1YEDB5RKpZRKpdTb2zu/vQMAXJVZw8Hr9WrVqlWSpBtvvFElJSUaGRlR\nV1eXYrGYJCkWi+nQoUOSpO7ubjU0NCgvL0+FhYUKBoNKJpNKp9OanJxURUWFJKmxsdGuAwDILVf0\nncOpU6f0+uuv66677tLo6Kg8Ho+kmQAZGxuTJFmWpUAgYNfx+XyyLEuWZcnv99vlfr9flmVlog8A\ngAyb829If/jhh1q/fr12796tG2+8US7Xxb8v/L/rX96Oz72OfLYAAC5IJBJKJBLzsu85hcP58+e1\nfv16bdq0SXV1dZIkj8djzx7S6bTy8/MlzcwUhoeH7bojIyPy+XyXLL+0HVfeGwBYRCKRiCKRiL2+\nc+fOjO17TqeVHnjgAYVCIW3fvt0ui0ajam1tlSS1tbXZoRGNRtXR0aGpqSkNDQ1pcHBQ4XBYXq9X\nbrdbyWRSxhi1t7fbdQAAucVlLlxKdAlHjhzRD3/4Q61YsUIul0sul0u7du1SOBxWfX29hoeHVVBQ\noM7OTn3rW9+SNHMp64EDB7RkyRLt3r1b1dXVkqS//vWv2rx5s86ePava2lrt3r37ixvlckm6bLPm\nldsd1sTEa1ltg5TdMbjQhlneHgByiMuVub/ZWcMhGwgHiXAAcKUyGQ7cIQ0AcCAcAAAOhAMAwIFw\nAAA4EA4AAAfCAQDgMOfHZ2Axun4eHotyZTyeAqXTp7LaBmAxIhxwGZ8o2/dajI5mN5yAxYrTSgAA\nB8IBAOBAOAAAHAgHAIAD4QAAcCAcAAAOhAMAwIFwAAA4EA4AAAfCAQDgQDgAABwIBwCAA+EAAHAg\nHAAADoQDAMBh1nB48MEH5fF4VFZWZpeNj4+rurpaxcXFqqmp0cTEhP1v8XhcwWBQJSUl6uvrs8v7\n+/tVVlamoqIiNTU1ZbgbAIBMmjUctmzZot7e3ovKmpubVVVVpZMnT6qyslLxeFySdOLECXV2dmpg\nYEA9PT3atm2bjJn5sZitW7fqwIEDSqVSSqVSjn0CAHLHrOHwgx/8QDfddNNFZV1dXYrFYpKkWCym\nQ4cOSZK6u7vV0NCgvLw8FRYWKhgMKplMKp1Oa3JyUhUVFZKkxsZGuw4AIPdc1XcOY2Nj8ng8kiSv\n16uxsTFJkmVZCgQC9nY+n0+WZcmyLPn9frvc7/fLsqwv024AwDzKyG9Iz8+P0O/43OvIZwsWn+vn\n6f01Nx5PgdLpU1k7PnA5iURCiURiXvZ9VeHg8Xg0Ojoqj8ejdDqt/Px8STMzheHhYXu7kZER+Xy+\nS5Zf3o6raRq+dj6RZLJ29NHR7AUTMJtIJKJIJGKv79y5M2P7ntNpJWOM/cWyJEWjUbW2tkqS2tra\nVFdXZ5d3dHRoampKQ0NDGhwcVDgcltfrldvtVjKZlDFG7e3tdh0AQA4ys9iwYYNZunSpue6660wg\nEDAtLS3m9OnT5u677zZFRUXmnnvuMePj4/b2u3btMsuWLTPLly83vb29dvmxY8fM7bffbm677Tbz\n8MMPX/aYkoxksra43RVZb0P2j08bLhwf+KrI5PvV9dkOc8rMOebsNcvtDmti4rWstkHK7hjQhv8e\nPwf/RIAv5HJl7v3KHdIAAAfCAQDgQDgAABwIBwCAA+EAAHAgHAAADhl5fAbw9ZXdx3dIPMID2UE4\nAJeV3cd3SDzCA9nBaSUAgAPhAABwIBwAAA6EAwDAgXAAADgQDgAAB8IBAODAfQ5AzuNGPCw8wgHI\nedyIh4XHaSUAgAPhAABwIBwAAA6EAwDAgS+kAcxBdq+Y4mqphbfgM4fnnntOy5cvV1FRkZ544omF\nPjyAq3LhiqnsLKOjablcrqwuXm/h/A9zDlnQcJientYvfvEL9fb26o033tDBgwf15ptvLmQTvoIS\n2W5ADklkuwE5JJHtBiywy4XTS5f5t0wG1Nvz380csqDhkEwmFQwGVVBQoCVLlqihoUFdXV0L2YSv\noES2G5BDEtluQA5JZLsBOSSR7QZ8LS1oOFiWpUAgYK/7/X5ZlrWQTQAAzEHOfiH9zW+uy9qxP/44\nlbVjA8hVi+sxJgsaDj6fT++88469PjIyIp/P94XbfvDB/1uoZl1Gth8ZcOH4O3OgDdn0+TZkYyxy\nbQwuWOixyPY4XO742fwbWTijo28vWEC5jDEL9tCWTz/9VMXFxTp8+LCWLl2qcDisgwcPqqSkZKGa\nAACYgwWdOVx77bXau3evqqurNT09rQcffJBgAIActKAzBwDAV0NOPT5jsd0gNzIyosrKSpWWlmrF\nihXas2ePJGl8fFzV1dUqLi5WTU2NJiYm7DrxeFzBYFAlJSXq6+vLVtPnxfT0tFavXq1oNCpp8Y6D\nJE1MTOj+++9XSUmJSktLdfTo0UU7HvF4XKWlpSorK9PGjRs1NTW1aMbiwQcflMfjUVlZmV12NX3v\n7+9XWVmZioqK1NTUNLeDmxzx6aefmmXLlplTp06Zqakps3LlSjMwMJDtZs2rd9991xw/ftwYY8zk\n5KQpKioyAwMD5tFHHzVPPPGEMcaY5uZm89hjjxljjHnjjTfMqlWrzLlz58zQ0JBZtmyZmZ6ezlr7\nM+2pp54yGzduNOvWrTPGmEU7DsYYE4vFTEtLizHGmHPnzpn3339/UY7HqVOnzPe+9z3zySefGGOM\nqa+vN62trYtmLF5++WVz/Phxs2LFCrvsavoeDodNMpk0xhjzox/9yDz33HOzHjtnwuGVV14xa9eu\ntdfj8bhpbm7OYosWXl1dnXn++edNcXGxSafTxpiZACkuLjbGOMdk7dq15tVXX81KWzNteHjYVFVV\nmZdeeskOh8U4DsYYMzExYW699VZH+WIcj9OnT5vi4mJz+vRpc+7cObNu3bpF9zdy6tSpi8LhSvv+\n7rvvmpKSErv84MGD5mc/+9msx82Z00qL/Qa5U6dO6fXXX9ddd92l0dFReTweSZLX69XY2Jgk5xj5\nfL6vzRg98sgjevLJJy+6TG8xjoMkDQ0N6ZZbbtGWLVu0evVqPfTQQzpz5syiHI+bbrpJv/zlL/Xd\n735XPp9PbrdbVVVVi3IsLhgbG7uivluWJb/fb5fP9bM1Z8JhMfvwww+1fv167d69WzfeeKPjOuZs\n33gz35555hl5PB6tWrVK5jLXR3zdx+GC8+fPq7+/Xz//+c/V39+vb3zjG2publ507wtJeuutt/S7\n3/1Ob7/9tv71r3/po48+0h//+MdFORaXMl99z5lwuJIb5L5Ozp8/r/Xr12vTpk2qq6uTJHk8Ho2O\njkqS0um08vPzJc2M0fDwsF336zJGR44cUXd3t2699VZt2LBBL774ojZt2iSv17uoxuECv9+vQCCg\nNWvWSJLuu+8+9ff3L7r3hSQdO3ZM3//+93XzzTfr2muv1b333qu//OUvi3IsLrjSvl/tmORMOFRU\nVGhwcFBvv/22pqam1NHRYV+18nX2wAMPKBQKafv27XZZNBpVa2urJKmtrc0OjWg0qo6ODk1NTWlo\naEiDg4MKh8PZaHZG7dq1S++8847eeustdXR0qLKyUk8//bTWrVu3qMbhAo/Ho0AgoFRq5jEuhw8f\nVmlp6aJ7X0hScXGxXn31VZ09e1bGGB0+fFihUGhRjYWZ+W7YXr/Svnu9XrndbiWTSRlj1N7ebteZ\n7cA5o6enxxQVFZnbbrvNxOPxbDdn3v35z38211xzjVm5cqVZtWqVKS8vNz09PeY///mPufvuu01R\nUZG55557zPj4uF1n165dZtmyZWb58uWmt7c3i62fH4lEwv5CejGPw+uvv27WrFljVq5cae69917z\n/vvvL9rx+O1vf2tCoZBZsWKFaWxsNFNTU4tmLDZs2GCWLl1qrrvuOhMIBExLS4s5ffr0Fff92LFj\n5vbbbze33Xabefjhh+d0bG6CAwA45MxpJQBA7iAcAAAOhAMAwIFwAAA4EA4AAAfCAQDgQDgAABwI\nBwCAw/8HkCb6D0gnLtwAAAAASUVORK5CYII=\n",
242 | "text/plain": [
243 | ""
244 | ]
245 | },
246 | "metadata": {},
247 | "output_type": "display_data"
248 | }
249 | ],
250 | "source": [
251 | "%matplotlib inline\n",
252 | "\n",
253 | "import matplotlib.pyplot as plt\n",
254 | "\n",
255 | "plt.hist(bike_rentals[\"cnt\"])"
256 | ]
257 | },
258 | {
259 | "cell_type": "code",
260 | "execution_count": 8,
261 | "metadata": {
262 | "collapsed": false
263 | },
264 | "outputs": [
265 | {
266 | "data": {
267 | "text/plain": [
268 | "instant 0.278379\n",
269 | "season 0.178056\n",
270 | "yr 0.250495\n",
271 | "mnth 0.120638\n",
272 | "hr 0.394071\n",
273 | "holiday -0.030927\n",
274 | "weekday 0.026900\n",
275 | "workingday 0.030284\n",
276 | "weathersit -0.142426\n",
277 | "temp 0.404772\n",
278 | "atemp 0.400929\n",
279 | "hum -0.322911\n",
280 | "windspeed 0.093234\n",
281 | "casual 0.694564\n",
282 | "registered 0.972151\n",
283 | "cnt 1.000000\n",
284 | "Name: cnt, dtype: float64"
285 | ]
286 | },
287 | "execution_count": 8,
288 | "metadata": {},
289 | "output_type": "execute_result"
290 | }
291 | ],
292 | "source": [
293 | "bike_rentals.corr()['cnt']"
294 | ]
295 | },
296 | {
297 | "cell_type": "markdown",
298 | "metadata": {},
299 | "source": [
300 | "As we already know from description that cnt is a sum of casual and registered column it will obviously show a high correlation, what is interesting is to see how temperature contributes to the value of cnt and we would have assumed that a holiday might be a contributing factor in deciding the number of registration, by the above table we can clearly see that thats not the case"
301 | ]
302 | },
303 | {
304 | "cell_type": "markdown",
305 | "metadata": {},
306 | "source": [
307 | "It can often be helpful to calculate features before applying machine learning models. Features can enhance the accuracy of models by introducing new information, or distilling existing information.\n",
308 | "\n",
309 | "For example, the hr column in bike_rentals contains the hours during which bikes are rented, from 1 to 24. A machine will treat each hour differently, without understanding that certain hours are related. We can introduce some order into the process by creating a new column with labels for morning, afternoon, evening, and night. This will bundle similar times together, enabling the model to make better decisions"
310 | ]
311 | },
312 | {
313 | "cell_type": "code",
314 | "execution_count": 13,
315 | "metadata": {
316 | "collapsed": false
317 | },
318 | "outputs": [],
319 | "source": [
320 | "def assign_label(hour):\n",
321 | " if hour >=0 and hour < 6:\n",
322 | " return 4\n",
323 | " elif hour >=6 and hour < 12:\n",
324 | " return 1\n",
325 | " elif hour >= 12 and hour < 18:\n",
326 | " return 2\n",
327 | " elif hour >= 18 and hour <=24:\n",
328 | " return 3\n",
329 | "\n",
330 | "bike_rentals[\"time_label\"] = bike_rentals[\"hr\"].apply(assign_label)\n"
331 | ]
332 | },
333 | {
334 | "cell_type": "markdown",
335 | "metadata": {},
336 | "source": [
337 | "# Error metric¶\n",
338 | "The mean squared error metric makes the most sense to evaluate our error. MSE works on continuous numeric data, which fits our data quite well."
339 | ]
340 | },
341 | {
342 | "cell_type": "markdown",
343 | "metadata": {},
344 | "source": [
345 | "Before we begin applying machine learning algorithms, we'll need to split the data into training and testing sets. This will enable us to train an algorithm using the training set, and evaluate its accuracy on the testing set. If we train an algorithm on the training data, then evaluate its performance on the same data, we can get an unrealistically low error value, due to overfitting."
346 | ]
347 | },
348 | {
349 | "cell_type": "code",
350 | "execution_count": 16,
351 | "metadata": {
352 | "collapsed": true
353 | },
354 | "outputs": [],
355 | "source": [
356 | "#let split 80% of our data to train set and remaning 20% to test set\n",
357 | "\n",
358 | "train = bike_rentals.sample(frac=.8)\n",
359 | "\n",
360 | "test = bike_rentals.loc[~bike_rentals.index.isin(train.index)]"
361 | ]
362 | },
363 | {
364 | "cell_type": "markdown",
365 | "metadata": {},
366 | "source": [
367 | "Now that we've done some exploration and manipulation, we're ready to apply linear regression to the data. Linear regression will probably work fairly well on this data, given that many of the columns are highly correlated with cnt."
368 | ]
369 | },
370 | {
371 | "cell_type": "code",
372 | "execution_count": 17,
373 | "metadata": {
374 | "collapsed": false
375 | },
376 | "outputs": [
377 | {
378 | "data": {
379 | "text/plain": [
380 | "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)"
381 | ]
382 | },
383 | "execution_count": 17,
384 | "metadata": {},
385 | "output_type": "execute_result"
386 | }
387 | ],
388 | "source": [
389 | "\n",
390 | "from sklearn.linear_model import LinearRegression\n",
391 | "\n",
392 | "predictors = list(train.columns)\n",
393 | "predictors.remove(\"cnt\")\n",
394 | "predictors.remove(\"casual\")\n",
395 | "predictors.remove(\"registered\")\n",
396 | "predictors.remove(\"dteday\")\n",
397 | "\n",
398 | "reg = LinearRegression()\n",
399 | "\n",
400 | "reg.fit(train[predictors], train[\"cnt\"])"
401 | ]
402 | },
403 | {
404 | "cell_type": "code",
405 | "execution_count": 18,
406 | "metadata": {
407 | "collapsed": false
408 | },
409 | "outputs": [
410 | {
411 | "data": {
412 | "text/plain": [
413 | "17468.19856186649"
414 | ]
415 | },
416 | "execution_count": 18,
417 | "metadata": {},
418 | "output_type": "execute_result"
419 | }
420 | ],
421 | "source": [
422 | "\n",
423 | "import numpy\n",
424 | "predictions = reg.predict(test[predictors])\n",
425 | "\n",
426 | "numpy.mean((predictions - test[\"cnt\"]) ** 2)"
427 | ]
428 | },
429 | {
430 | "cell_type": "markdown",
431 | "metadata": {},
432 | "source": [
433 | "# Error\n",
434 | "The error is very high, which may be due to the fact that the data has a few extremely high rental counts, but otherwise mostly low counts. Larger errors are penalized more with MSE, which leads to a higher total error."
435 | ]
436 | },
437 | {
438 | "cell_type": "markdown",
439 | "metadata": {},
440 | "source": [
441 | "Now lets apply the decision tree algorithm. You'll be able to compare its error with the error from linear regression, which will enable you to pick the right algorithm for this data set."
442 | ]
443 | },
444 | {
445 | "cell_type": "code",
446 | "execution_count": 19,
447 | "metadata": {
448 | "collapsed": false
449 | },
450 | "outputs": [
451 | {
452 | "data": {
453 | "text/plain": [
454 | "DecisionTreeRegressor(criterion='mse', max_depth=None, max_features=None,\n",
455 | " max_leaf_nodes=None, min_impurity_split=1e-07,\n",
456 | " min_samples_leaf=5, min_samples_split=2,\n",
457 | " min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n",
458 | " splitter='best')"
459 | ]
460 | },
461 | "execution_count": 19,
462 | "metadata": {},
463 | "output_type": "execute_result"
464 | }
465 | ],
466 | "source": [
467 | "from sklearn.tree import DecisionTreeRegressor\n",
468 | "\n",
469 | "reg = DecisionTreeRegressor(min_samples_leaf=5)\n",
470 | "\n",
471 | "reg.fit(train[predictors], train[\"cnt\"])"
472 | ]
473 | },
474 | {
475 | "cell_type": "code",
476 | "execution_count": 20,
477 | "metadata": {
478 | "collapsed": false
479 | },
480 | "outputs": [
481 | {
482 | "data": {
483 | "text/plain": [
484 | "2633.0851704678644"
485 | ]
486 | },
487 | "execution_count": 20,
488 | "metadata": {},
489 | "output_type": "execute_result"
490 | }
491 | ],
492 | "source": [
493 | "predictions = reg.predict(test[predictors])\n",
494 | "\n",
495 | "numpy.mean((predictions - test[\"cnt\"]) ** 2)"
496 | ]
497 | },
498 | {
499 | "cell_type": "code",
500 | "execution_count": 21,
501 | "metadata": {
502 | "collapsed": false
503 | },
504 | "outputs": [
505 | {
506 | "data": {
507 | "text/plain": [
508 | "2855.7447177470913"
509 | ]
510 | },
511 | "execution_count": 21,
512 | "metadata": {},
513 | "output_type": "execute_result"
514 | }
515 | ],
516 | "source": [
517 | "# lets try and see if we get better result with a different sample value \n",
518 | "\n",
519 | "reg = DecisionTreeRegressor(min_samples_leaf=2)\n",
520 | "\n",
521 | "reg.fit(train[predictors], train[\"cnt\"])\n",
522 | "\n",
523 | "predictions = reg.predict(test[predictors])\n",
524 | "\n",
525 | "numpy.mean((predictions - test[\"cnt\"]) ** 2)"
526 | ]
527 | },
528 | {
529 | "cell_type": "markdown",
530 | "metadata": {},
531 | "source": [
532 | "# Decision tree error\n",
533 | "By taking the nonlinear predictors into account, the decision tree regressor appears to have much higher accuracy than linear regression."
534 | ]
535 | },
536 | {
537 | "cell_type": "markdown",
538 | "metadata": {},
539 | "source": [
540 | "We can now apply the random forest algorithm, which improves on the decision tree algorithm. Random forests tend to be much more accurate than simple models like linear regression. Due to the way random forests are constructed, they tend to overfit much less than decision trees. Random forests can still be prone to overfitting, though, so it's important to tune parameters like maximum depth and minimum samples per leaf."
541 | ]
542 | },
543 | {
544 | "cell_type": "code",
545 | "execution_count": null,
546 | "metadata": {
547 | "collapsed": true
548 | },
549 | "outputs": [],
550 | "source": []
551 | }
552 | ],
553 | "metadata": {
554 | "kernelspec": {
555 | "display_name": "Python 3",
556 | "language": "python",
557 | "name": "python3"
558 | },
559 | "language_info": {
560 | "codemirror_mode": {
561 | "name": "ipython",
562 | "version": 3
563 | },
564 | "file_extension": ".py",
565 | "mimetype": "text/x-python",
566 | "name": "python",
567 | "nbconvert_exporter": "python",
568 | "pygments_lexer": "ipython3",
569 | "version": "3.4.3"
570 | }
571 | },
572 | "nbformat": 4,
573 | "nbformat_minor": 0
574 | }
575 |
--------------------------------------------------------------------------------
/Prediciting Titanic Survival/test.csv:
--------------------------------------------------------------------------------
1 | PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
2 | 892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q
3 | 893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47,1,0,363272,7,,S
4 | 894,2,"Myles, Mr. Thomas Francis",male,62,0,0,240276,9.6875,,Q
5 | 895,3,"Wirz, Mr. Albert",male,27,0,0,315154,8.6625,,S
6 | 896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22,1,1,3101298,12.2875,,S
7 | 897,3,"Svensson, Mr. Johan Cervin",male,14,0,0,7538,9.225,,S
8 | 898,3,"Connolly, Miss. Kate",female,30,0,0,330972,7.6292,,Q
9 | 899,2,"Caldwell, Mr. Albert Francis",male,26,1,1,248738,29,,S
10 | 900,3,"Abrahim, Mrs. Joseph (Sophie Halaut Easu)",female,18,0,0,2657,7.2292,,C
11 | 901,3,"Davies, Mr. John Samuel",male,21,2,0,A/4 48871,24.15,,S
12 | 902,3,"Ilieff, Mr. Ylio",male,,0,0,349220,7.8958,,S
13 | 903,1,"Jones, Mr. Charles Cresson",male,46,0,0,694,26,,S
14 | 904,1,"Snyder, Mrs. John Pillsbury (Nelle Stevenson)",female,23,1,0,21228,82.2667,B45,S
15 | 905,2,"Howard, Mr. Benjamin",male,63,1,0,24065,26,,S
16 | 906,1,"Chaffee, Mrs. Herbert Fuller (Carrie Constance Toogood)",female,47,1,0,W.E.P. 5734,61.175,E31,S
17 | 907,2,"del Carlo, Mrs. Sebastiano (Argenia Genovesi)",female,24,1,0,SC/PARIS 2167,27.7208,,C
18 | 908,2,"Keane, Mr. Daniel",male,35,0,0,233734,12.35,,Q
19 | 909,3,"Assaf, Mr. Gerios",male,21,0,0,2692,7.225,,C
20 | 910,3,"Ilmakangas, Miss. Ida Livija",female,27,1,0,STON/O2. 3101270,7.925,,S
21 | 911,3,"Assaf Khalil, Mrs. Mariana (Miriam"")""",female,45,0,0,2696,7.225,,C
22 | 912,1,"Rothschild, Mr. Martin",male,55,1,0,PC 17603,59.4,,C
23 | 913,3,"Olsen, Master. Artur Karl",male,9,0,1,C 17368,3.1708,,S
24 | 914,1,"Flegenheim, Mrs. Alfred (Antoinette)",female,,0,0,PC 17598,31.6833,,S
25 | 915,1,"Williams, Mr. Richard Norris II",male,21,0,1,PC 17597,61.3792,,C
26 | 916,1,"Ryerson, Mrs. Arthur Larned (Emily Maria Borie)",female,48,1,3,PC 17608,262.375,B57 B59 B63 B66,C
27 | 917,3,"Robins, Mr. Alexander A",male,50,1,0,A/5. 3337,14.5,,S
28 | 918,1,"Ostby, Miss. Helene Ragnhild",female,22,0,1,113509,61.9792,B36,C
29 | 919,3,"Daher, Mr. Shedid",male,22.5,0,0,2698,7.225,,C
30 | 920,1,"Brady, Mr. John Bertram",male,41,0,0,113054,30.5,A21,S
31 | 921,3,"Samaan, Mr. Elias",male,,2,0,2662,21.6792,,C
32 | 922,2,"Louch, Mr. Charles Alexander",male,50,1,0,SC/AH 3085,26,,S
33 | 923,2,"Jefferys, Mr. Clifford Thomas",male,24,2,0,C.A. 31029,31.5,,S
34 | 924,3,"Dean, Mrs. Bertram (Eva Georgetta Light)",female,33,1,2,C.A. 2315,20.575,,S
35 | 925,3,"Johnston, Mrs. Andrew G (Elizabeth Lily"" Watson)""",female,,1,2,W./C. 6607,23.45,,S
36 | 926,1,"Mock, Mr. Philipp Edmund",male,30,1,0,13236,57.75,C78,C
37 | 927,3,"Katavelas, Mr. Vassilios (Catavelas Vassilios"")""",male,18.5,0,0,2682,7.2292,,C
38 | 928,3,"Roth, Miss. Sarah A",female,,0,0,342712,8.05,,S
39 | 929,3,"Cacic, Miss. Manda",female,21,0,0,315087,8.6625,,S
40 | 930,3,"Sap, Mr. Julius",male,25,0,0,345768,9.5,,S
41 | 931,3,"Hee, Mr. Ling",male,,0,0,1601,56.4958,,S
42 | 932,3,"Karun, Mr. Franz",male,39,0,1,349256,13.4167,,C
43 | 933,1,"Franklin, Mr. Thomas Parham",male,,0,0,113778,26.55,D34,S
44 | 934,3,"Goldsmith, Mr. Nathan",male,41,0,0,SOTON/O.Q. 3101263,7.85,,S
45 | 935,2,"Corbett, Mrs. Walter H (Irene Colvin)",female,30,0,0,237249,13,,S
46 | 936,1,"Kimball, Mrs. Edwin Nelson Jr (Gertrude Parsons)",female,45,1,0,11753,52.5542,D19,S
47 | 937,3,"Peltomaki, Mr. Nikolai Johannes",male,25,0,0,STON/O 2. 3101291,7.925,,S
48 | 938,1,"Chevre, Mr. Paul Romaine",male,45,0,0,PC 17594,29.7,A9,C
49 | 939,3,"Shaughnessy, Mr. Patrick",male,,0,0,370374,7.75,,Q
50 | 940,1,"Bucknell, Mrs. William Robert (Emma Eliza Ward)",female,60,0,0,11813,76.2917,D15,C
51 | 941,3,"Coutts, Mrs. William (Winnie Minnie"" Treanor)""",female,36,0,2,C.A. 37671,15.9,,S
52 | 942,1,"Smith, Mr. Lucien Philip",male,24,1,0,13695,60,C31,S
53 | 943,2,"Pulbaum, Mr. Franz",male,27,0,0,SC/PARIS 2168,15.0333,,C
54 | 944,2,"Hocking, Miss. Ellen Nellie""""",female,20,2,1,29105,23,,S
55 | 945,1,"Fortune, Miss. Ethel Flora",female,28,3,2,19950,263,C23 C25 C27,S
56 | 946,2,"Mangiavacchi, Mr. Serafino Emilio",male,,0,0,SC/A.3 2861,15.5792,,C
57 | 947,3,"Rice, Master. Albert",male,10,4,1,382652,29.125,,Q
58 | 948,3,"Cor, Mr. Bartol",male,35,0,0,349230,7.8958,,S
59 | 949,3,"Abelseth, Mr. Olaus Jorgensen",male,25,0,0,348122,7.65,F G63,S
60 | 950,3,"Davison, Mr. Thomas Henry",male,,1,0,386525,16.1,,S
61 | 951,1,"Chaudanson, Miss. Victorine",female,36,0,0,PC 17608,262.375,B61,C
62 | 952,3,"Dika, Mr. Mirko",male,17,0,0,349232,7.8958,,S
63 | 953,2,"McCrae, Mr. Arthur Gordon",male,32,0,0,237216,13.5,,S
64 | 954,3,"Bjorklund, Mr. Ernst Herbert",male,18,0,0,347090,7.75,,S
65 | 955,3,"Bradley, Miss. Bridget Delia",female,22,0,0,334914,7.725,,Q
66 | 956,1,"Ryerson, Master. John Borie",male,13,2,2,PC 17608,262.375,B57 B59 B63 B66,C
67 | 957,2,"Corey, Mrs. Percy C (Mary Phyllis Elizabeth Miller)",female,,0,0,F.C.C. 13534,21,,S
68 | 958,3,"Burns, Miss. Mary Delia",female,18,0,0,330963,7.8792,,Q
69 | 959,1,"Moore, Mr. Clarence Bloomfield",male,47,0,0,113796,42.4,,S
70 | 960,1,"Tucker, Mr. Gilbert Milligan Jr",male,31,0,0,2543,28.5375,C53,C
71 | 961,1,"Fortune, Mrs. Mark (Mary McDougald)",female,60,1,4,19950,263,C23 C25 C27,S
72 | 962,3,"Mulvihill, Miss. Bertha E",female,24,0,0,382653,7.75,,Q
73 | 963,3,"Minkoff, Mr. Lazar",male,21,0,0,349211,7.8958,,S
74 | 964,3,"Nieminen, Miss. Manta Josefina",female,29,0,0,3101297,7.925,,S
75 | 965,1,"Ovies y Rodriguez, Mr. Servando",male,28.5,0,0,PC 17562,27.7208,D43,C
76 | 966,1,"Geiger, Miss. Amalie",female,35,0,0,113503,211.5,C130,C
77 | 967,1,"Keeping, Mr. Edwin",male,32.5,0,0,113503,211.5,C132,C
78 | 968,3,"Miles, Mr. Frank",male,,0,0,359306,8.05,,S
79 | 969,1,"Cornell, Mrs. Robert Clifford (Malvina Helen Lamson)",female,55,2,0,11770,25.7,C101,S
80 | 970,2,"Aldworth, Mr. Charles Augustus",male,30,0,0,248744,13,,S
81 | 971,3,"Doyle, Miss. Elizabeth",female,24,0,0,368702,7.75,,Q
82 | 972,3,"Boulos, Master. Akar",male,6,1,1,2678,15.2458,,C
83 | 973,1,"Straus, Mr. Isidor",male,67,1,0,PC 17483,221.7792,C55 C57,S
84 | 974,1,"Case, Mr. Howard Brown",male,49,0,0,19924,26,,S
85 | 975,3,"Demetri, Mr. Marinko",male,,0,0,349238,7.8958,,S
86 | 976,2,"Lamb, Mr. John Joseph",male,,0,0,240261,10.7083,,Q
87 | 977,3,"Khalil, Mr. Betros",male,,1,0,2660,14.4542,,C
88 | 978,3,"Barry, Miss. Julia",female,27,0,0,330844,7.8792,,Q
89 | 979,3,"Badman, Miss. Emily Louisa",female,18,0,0,A/4 31416,8.05,,S
90 | 980,3,"O'Donoghue, Ms. Bridget",female,,0,0,364856,7.75,,Q
91 | 981,2,"Wells, Master. Ralph Lester",male,2,1,1,29103,23,,S
92 | 982,3,"Dyker, Mrs. Adolf Fredrik (Anna Elisabeth Judith Andersson)",female,22,1,0,347072,13.9,,S
93 | 983,3,"Pedersen, Mr. Olaf",male,,0,0,345498,7.775,,S
94 | 984,1,"Davidson, Mrs. Thornton (Orian Hays)",female,27,1,2,F.C. 12750,52,B71,S
95 | 985,3,"Guest, Mr. Robert",male,,0,0,376563,8.05,,S
96 | 986,1,"Birnbaum, Mr. Jakob",male,25,0,0,13905,26,,C
97 | 987,3,"Tenglin, Mr. Gunnar Isidor",male,25,0,0,350033,7.7958,,S
98 | 988,1,"Cavendish, Mrs. Tyrell William (Julia Florence Siegel)",female,76,1,0,19877,78.85,C46,S
99 | 989,3,"Makinen, Mr. Kalle Edvard",male,29,0,0,STON/O 2. 3101268,7.925,,S
100 | 990,3,"Braf, Miss. Elin Ester Maria",female,20,0,0,347471,7.8542,,S
101 | 991,3,"Nancarrow, Mr. William Henry",male,33,0,0,A./5. 3338,8.05,,S
102 | 992,1,"Stengel, Mrs. Charles Emil Henry (Annie May Morris)",female,43,1,0,11778,55.4417,C116,C
103 | 993,2,"Weisz, Mr. Leopold",male,27,1,0,228414,26,,S
104 | 994,3,"Foley, Mr. William",male,,0,0,365235,7.75,,Q
105 | 995,3,"Johansson Palmquist, Mr. Oskar Leander",male,26,0,0,347070,7.775,,S
106 | 996,3,"Thomas, Mrs. Alexander (Thamine Thelma"")""",female,16,1,1,2625,8.5167,,C
107 | 997,3,"Holthen, Mr. Johan Martin",male,28,0,0,C 4001,22.525,,S
108 | 998,3,"Buckley, Mr. Daniel",male,21,0,0,330920,7.8208,,Q
109 | 999,3,"Ryan, Mr. Edward",male,,0,0,383162,7.75,,Q
110 | 1000,3,"Willer, Mr. Aaron (Abi Weller"")""",male,,0,0,3410,8.7125,,S
111 | 1001,2,"Swane, Mr. George",male,18.5,0,0,248734,13,F,S
112 | 1002,2,"Stanton, Mr. Samuel Ward",male,41,0,0,237734,15.0458,,C
113 | 1003,3,"Shine, Miss. Ellen Natalia",female,,0,0,330968,7.7792,,Q
114 | 1004,1,"Evans, Miss. Edith Corse",female,36,0,0,PC 17531,31.6792,A29,C
115 | 1005,3,"Buckley, Miss. Katherine",female,18.5,0,0,329944,7.2833,,Q
116 | 1006,1,"Straus, Mrs. Isidor (Rosalie Ida Blun)",female,63,1,0,PC 17483,221.7792,C55 C57,S
117 | 1007,3,"Chronopoulos, Mr. Demetrios",male,18,1,0,2680,14.4542,,C
118 | 1008,3,"Thomas, Mr. John",male,,0,0,2681,6.4375,,C
119 | 1009,3,"Sandstrom, Miss. Beatrice Irene",female,1,1,1,PP 9549,16.7,G6,S
120 | 1010,1,"Beattie, Mr. Thomson",male,36,0,0,13050,75.2417,C6,C
121 | 1011,2,"Chapman, Mrs. John Henry (Sara Elizabeth Lawry)",female,29,1,0,SC/AH 29037,26,,S
122 | 1012,2,"Watt, Miss. Bertha J",female,12,0,0,C.A. 33595,15.75,,S
123 | 1013,3,"Kiernan, Mr. John",male,,1,0,367227,7.75,,Q
124 | 1014,1,"Schabert, Mrs. Paul (Emma Mock)",female,35,1,0,13236,57.75,C28,C
125 | 1015,3,"Carver, Mr. Alfred John",male,28,0,0,392095,7.25,,S
126 | 1016,3,"Kennedy, Mr. John",male,,0,0,368783,7.75,,Q
127 | 1017,3,"Cribb, Miss. Laura Alice",female,17,0,1,371362,16.1,,S
128 | 1018,3,"Brobeck, Mr. Karl Rudolf",male,22,0,0,350045,7.7958,,S
129 | 1019,3,"McCoy, Miss. Alicia",female,,2,0,367226,23.25,,Q
130 | 1020,2,"Bowenur, Mr. Solomon",male,42,0,0,211535,13,,S
131 | 1021,3,"Petersen, Mr. Marius",male,24,0,0,342441,8.05,,S
132 | 1022,3,"Spinner, Mr. Henry John",male,32,0,0,STON/OQ. 369943,8.05,,S
133 | 1023,1,"Gracie, Col. Archibald IV",male,53,0,0,113780,28.5,C51,C
134 | 1024,3,"Lefebre, Mrs. Frank (Frances)",female,,0,4,4133,25.4667,,S
135 | 1025,3,"Thomas, Mr. Charles P",male,,1,0,2621,6.4375,,C
136 | 1026,3,"Dintcheff, Mr. Valtcho",male,43,0,0,349226,7.8958,,S
137 | 1027,3,"Carlsson, Mr. Carl Robert",male,24,0,0,350409,7.8542,,S
138 | 1028,3,"Zakarian, Mr. Mapriededer",male,26.5,0,0,2656,7.225,,C
139 | 1029,2,"Schmidt, Mr. August",male,26,0,0,248659,13,,S
140 | 1030,3,"Drapkin, Miss. Jennie",female,23,0,0,SOTON/OQ 392083,8.05,,S
141 | 1031,3,"Goodwin, Mr. Charles Frederick",male,40,1,6,CA 2144,46.9,,S
142 | 1032,3,"Goodwin, Miss. Jessie Allis",female,10,5,2,CA 2144,46.9,,S
143 | 1033,1,"Daniels, Miss. Sarah",female,33,0,0,113781,151.55,,S
144 | 1034,1,"Ryerson, Mr. Arthur Larned",male,61,1,3,PC 17608,262.375,B57 B59 B63 B66,C
145 | 1035,2,"Beauchamp, Mr. Henry James",male,28,0,0,244358,26,,S
146 | 1036,1,"Lindeberg-Lind, Mr. Erik Gustaf (Mr Edward Lingrey"")""",male,42,0,0,17475,26.55,,S
147 | 1037,3,"Vander Planke, Mr. Julius",male,31,3,0,345763,18,,S
148 | 1038,1,"Hilliard, Mr. Herbert Henry",male,,0,0,17463,51.8625,E46,S
149 | 1039,3,"Davies, Mr. Evan",male,22,0,0,SC/A4 23568,8.05,,S
150 | 1040,1,"Crafton, Mr. John Bertram",male,,0,0,113791,26.55,,S
151 | 1041,2,"Lahtinen, Rev. William",male,30,1,1,250651,26,,S
152 | 1042,1,"Earnshaw, Mrs. Boulton (Olive Potter)",female,23,0,1,11767,83.1583,C54,C
153 | 1043,3,"Matinoff, Mr. Nicola",male,,0,0,349255,7.8958,,C
154 | 1044,3,"Storey, Mr. Thomas",male,60.5,0,0,3701,,,S
155 | 1045,3,"Klasen, Mrs. (Hulda Kristina Eugenia Lofqvist)",female,36,0,2,350405,12.1833,,S
156 | 1046,3,"Asplund, Master. Filip Oscar",male,13,4,2,347077,31.3875,,S
157 | 1047,3,"Duquemin, Mr. Joseph",male,24,0,0,S.O./P.P. 752,7.55,,S
158 | 1048,1,"Bird, Miss. Ellen",female,29,0,0,PC 17483,221.7792,C97,S
159 | 1049,3,"Lundin, Miss. Olga Elida",female,23,0,0,347469,7.8542,,S
160 | 1050,1,"Borebank, Mr. John James",male,42,0,0,110489,26.55,D22,S
161 | 1051,3,"Peacock, Mrs. Benjamin (Edith Nile)",female,26,0,2,SOTON/O.Q. 3101315,13.775,,S
162 | 1052,3,"Smyth, Miss. Julia",female,,0,0,335432,7.7333,,Q
163 | 1053,3,"Touma, Master. Georges Youssef",male,7,1,1,2650,15.2458,,C
164 | 1054,2,"Wright, Miss. Marion",female,26,0,0,220844,13.5,,S
165 | 1055,3,"Pearce, Mr. Ernest",male,,0,0,343271,7,,S
166 | 1056,2,"Peruschitz, Rev. Joseph Maria",male,41,0,0,237393,13,,S
167 | 1057,3,"Kink-Heilmann, Mrs. Anton (Luise Heilmann)",female,26,1,1,315153,22.025,,S
168 | 1058,1,"Brandeis, Mr. Emil",male,48,0,0,PC 17591,50.4958,B10,C
169 | 1059,3,"Ford, Mr. Edward Watson",male,18,2,2,W./C. 6608,34.375,,S
170 | 1060,1,"Cassebeer, Mrs. Henry Arthur Jr (Eleanor Genevieve Fosdick)",female,,0,0,17770,27.7208,,C
171 | 1061,3,"Hellstrom, Miss. Hilda Maria",female,22,0,0,7548,8.9625,,S
172 | 1062,3,"Lithman, Mr. Simon",male,,0,0,S.O./P.P. 251,7.55,,S
173 | 1063,3,"Zakarian, Mr. Ortin",male,27,0,0,2670,7.225,,C
174 | 1064,3,"Dyker, Mr. Adolf Fredrik",male,23,1,0,347072,13.9,,S
175 | 1065,3,"Torfa, Mr. Assad",male,,0,0,2673,7.2292,,C
176 | 1066,3,"Asplund, Mr. Carl Oscar Vilhelm Gustafsson",male,40,1,5,347077,31.3875,,S
177 | 1067,2,"Brown, Miss. Edith Eileen",female,15,0,2,29750,39,,S
178 | 1068,2,"Sincock, Miss. Maude",female,20,0,0,C.A. 33112,36.75,,S
179 | 1069,1,"Stengel, Mr. Charles Emil Henry",male,54,1,0,11778,55.4417,C116,C
180 | 1070,2,"Becker, Mrs. Allen Oliver (Nellie E Baumgardner)",female,36,0,3,230136,39,F4,S
181 | 1071,1,"Compton, Mrs. Alexander Taylor (Mary Eliza Ingersoll)",female,64,0,2,PC 17756,83.1583,E45,C
182 | 1072,2,"McCrie, Mr. James Matthew",male,30,0,0,233478,13,,S
183 | 1073,1,"Compton, Mr. Alexander Taylor Jr",male,37,1,1,PC 17756,83.1583,E52,C
184 | 1074,1,"Marvin, Mrs. Daniel Warner (Mary Graham Carmichael Farquarson)",female,18,1,0,113773,53.1,D30,S
185 | 1075,3,"Lane, Mr. Patrick",male,,0,0,7935,7.75,,Q
186 | 1076,1,"Douglas, Mrs. Frederick Charles (Mary Helene Baxter)",female,27,1,1,PC 17558,247.5208,B58 B60,C
187 | 1077,2,"Maybery, Mr. Frank Hubert",male,40,0,0,239059,16,,S
188 | 1078,2,"Phillips, Miss. Alice Frances Louisa",female,21,0,1,S.O./P.P. 2,21,,S
189 | 1079,3,"Davies, Mr. Joseph",male,17,2,0,A/4 48873,8.05,,S
190 | 1080,3,"Sage, Miss. Ada",female,,8,2,CA. 2343,69.55,,S
191 | 1081,2,"Veal, Mr. James",male,40,0,0,28221,13,,S
192 | 1082,2,"Angle, Mr. William A",male,34,1,0,226875,26,,S
193 | 1083,1,"Salomon, Mr. Abraham L",male,,0,0,111163,26,,S
194 | 1084,3,"van Billiard, Master. Walter John",male,11.5,1,1,A/5. 851,14.5,,S
195 | 1085,2,"Lingane, Mr. John",male,61,0,0,235509,12.35,,Q
196 | 1086,2,"Drew, Master. Marshall Brines",male,8,0,2,28220,32.5,,S
197 | 1087,3,"Karlsson, Mr. Julius Konrad Eugen",male,33,0,0,347465,7.8542,,S
198 | 1088,1,"Spedden, Master. Robert Douglas",male,6,0,2,16966,134.5,E34,C
199 | 1089,3,"Nilsson, Miss. Berta Olivia",female,18,0,0,347066,7.775,,S
200 | 1090,2,"Baimbrigge, Mr. Charles Robert",male,23,0,0,C.A. 31030,10.5,,S
201 | 1091,3,"Rasmussen, Mrs. (Lena Jacobsen Solvang)",female,,0,0,65305,8.1125,,S
202 | 1092,3,"Murphy, Miss. Nora",female,,0,0,36568,15.5,,Q
203 | 1093,3,"Danbom, Master. Gilbert Sigvard Emanuel",male,0.33,0,2,347080,14.4,,S
204 | 1094,1,"Astor, Col. John Jacob",male,47,1,0,PC 17757,227.525,C62 C64,C
205 | 1095,2,"Quick, Miss. Winifred Vera",female,8,1,1,26360,26,,S
206 | 1096,2,"Andrew, Mr. Frank Thomas",male,25,0,0,C.A. 34050,10.5,,S
207 | 1097,1,"Omont, Mr. Alfred Fernand",male,,0,0,F.C. 12998,25.7417,,C
208 | 1098,3,"McGowan, Miss. Katherine",female,35,0,0,9232,7.75,,Q
209 | 1099,2,"Collett, Mr. Sidney C Stuart",male,24,0,0,28034,10.5,,S
210 | 1100,1,"Rosenbaum, Miss. Edith Louise",female,33,0,0,PC 17613,27.7208,A11,C
211 | 1101,3,"Delalic, Mr. Redjo",male,25,0,0,349250,7.8958,,S
212 | 1102,3,"Andersen, Mr. Albert Karvin",male,32,0,0,C 4001,22.525,,S
213 | 1103,3,"Finoli, Mr. Luigi",male,,0,0,SOTON/O.Q. 3101308,7.05,,S
214 | 1104,2,"Deacon, Mr. Percy William",male,17,0,0,S.O.C. 14879,73.5,,S
215 | 1105,2,"Howard, Mrs. Benjamin (Ellen Truelove Arman)",female,60,1,0,24065,26,,S
216 | 1106,3,"Andersson, Miss. Ida Augusta Margareta",female,38,4,2,347091,7.775,,S
217 | 1107,1,"Head, Mr. Christopher",male,42,0,0,113038,42.5,B11,S
218 | 1108,3,"Mahon, Miss. Bridget Delia",female,,0,0,330924,7.8792,,Q
219 | 1109,1,"Wick, Mr. George Dennick",male,57,1,1,36928,164.8667,,S
220 | 1110,1,"Widener, Mrs. George Dunton (Eleanor Elkins)",female,50,1,1,113503,211.5,C80,C
221 | 1111,3,"Thomson, Mr. Alexander Morrison",male,,0,0,32302,8.05,,S
222 | 1112,2,"Duran y More, Miss. Florentina",female,30,1,0,SC/PARIS 2148,13.8583,,C
223 | 1113,3,"Reynolds, Mr. Harold J",male,21,0,0,342684,8.05,,S
224 | 1114,2,"Cook, Mrs. (Selena Rogers)",female,22,0,0,W./C. 14266,10.5,F33,S
225 | 1115,3,"Karlsson, Mr. Einar Gervasius",male,21,0,0,350053,7.7958,,S
226 | 1116,1,"Candee, Mrs. Edward (Helen Churchill Hungerford)",female,53,0,0,PC 17606,27.4458,,C
227 | 1117,3,"Moubarek, Mrs. George (Omine Amenia"" Alexander)""",female,,0,2,2661,15.2458,,C
228 | 1118,3,"Asplund, Mr. Johan Charles",male,23,0,0,350054,7.7958,,S
229 | 1119,3,"McNeill, Miss. Bridget",female,,0,0,370368,7.75,,Q
230 | 1120,3,"Everett, Mr. Thomas James",male,40.5,0,0,C.A. 6212,15.1,,S
231 | 1121,2,"Hocking, Mr. Samuel James Metcalfe",male,36,0,0,242963,13,,S
232 | 1122,2,"Sweet, Mr. George Frederick",male,14,0,0,220845,65,,S
233 | 1123,1,"Willard, Miss. Constance",female,21,0,0,113795,26.55,,S
234 | 1124,3,"Wiklund, Mr. Karl Johan",male,21,1,0,3101266,6.4958,,S
235 | 1125,3,"Linehan, Mr. Michael",male,,0,0,330971,7.8792,,Q
236 | 1126,1,"Cumings, Mr. John Bradley",male,39,1,0,PC 17599,71.2833,C85,C
237 | 1127,3,"Vendel, Mr. Olof Edvin",male,20,0,0,350416,7.8542,,S
238 | 1128,1,"Warren, Mr. Frank Manley",male,64,1,0,110813,75.25,D37,C
239 | 1129,3,"Baccos, Mr. Raffull",male,20,0,0,2679,7.225,,C
240 | 1130,2,"Hiltunen, Miss. Marta",female,18,1,1,250650,13,,S
241 | 1131,1,"Douglas, Mrs. Walter Donald (Mahala Dutton)",female,48,1,0,PC 17761,106.425,C86,C
242 | 1132,1,"Lindstrom, Mrs. Carl Johan (Sigrid Posse)",female,55,0,0,112377,27.7208,,C
243 | 1133,2,"Christy, Mrs. (Alice Frances)",female,45,0,2,237789,30,,S
244 | 1134,1,"Spedden, Mr. Frederic Oakley",male,45,1,1,16966,134.5,E34,C
245 | 1135,3,"Hyman, Mr. Abraham",male,,0,0,3470,7.8875,,S
246 | 1136,3,"Johnston, Master. William Arthur Willie""""",male,,1,2,W./C. 6607,23.45,,S
247 | 1137,1,"Kenyon, Mr. Frederick R",male,41,1,0,17464,51.8625,D21,S
248 | 1138,2,"Karnes, Mrs. J Frank (Claire Bennett)",female,22,0,0,F.C.C. 13534,21,,S
249 | 1139,2,"Drew, Mr. James Vivian",male,42,1,1,28220,32.5,,S
250 | 1140,2,"Hold, Mrs. Stephen (Annie Margaret Hill)",female,29,1,0,26707,26,,S
251 | 1141,3,"Khalil, Mrs. Betros (Zahie Maria"" Elias)""",female,,1,0,2660,14.4542,,C
252 | 1142,2,"West, Miss. Barbara J",female,0.92,1,2,C.A. 34651,27.75,,S
253 | 1143,3,"Abrahamsson, Mr. Abraham August Johannes",male,20,0,0,SOTON/O2 3101284,7.925,,S
254 | 1144,1,"Clark, Mr. Walter Miller",male,27,1,0,13508,136.7792,C89,C
255 | 1145,3,"Salander, Mr. Karl Johan",male,24,0,0,7266,9.325,,S
256 | 1146,3,"Wenzel, Mr. Linhart",male,32.5,0,0,345775,9.5,,S
257 | 1147,3,"MacKay, Mr. George William",male,,0,0,C.A. 42795,7.55,,S
258 | 1148,3,"Mahon, Mr. John",male,,0,0,AQ/4 3130,7.75,,Q
259 | 1149,3,"Niklasson, Mr. Samuel",male,28,0,0,363611,8.05,,S
260 | 1150,2,"Bentham, Miss. Lilian W",female,19,0,0,28404,13,,S
261 | 1151,3,"Midtsjo, Mr. Karl Albert",male,21,0,0,345501,7.775,,S
262 | 1152,3,"de Messemaeker, Mr. Guillaume Joseph",male,36.5,1,0,345572,17.4,,S
263 | 1153,3,"Nilsson, Mr. August Ferdinand",male,21,0,0,350410,7.8542,,S
264 | 1154,2,"Wells, Mrs. Arthur Henry (Addie"" Dart Trevaskis)""",female,29,0,2,29103,23,,S
265 | 1155,3,"Klasen, Miss. Gertrud Emilia",female,1,1,1,350405,12.1833,,S
266 | 1156,2,"Portaluppi, Mr. Emilio Ilario Giuseppe",male,30,0,0,C.A. 34644,12.7375,,C
267 | 1157,3,"Lyntakoff, Mr. Stanko",male,,0,0,349235,7.8958,,S
268 | 1158,1,"Chisholm, Mr. Roderick Robert Crispin",male,,0,0,112051,0,,S
269 | 1159,3,"Warren, Mr. Charles William",male,,0,0,C.A. 49867,7.55,,S
270 | 1160,3,"Howard, Miss. May Elizabeth",female,,0,0,A. 2. 39186,8.05,,S
271 | 1161,3,"Pokrnic, Mr. Mate",male,17,0,0,315095,8.6625,,S
272 | 1162,1,"McCaffry, Mr. Thomas Francis",male,46,0,0,13050,75.2417,C6,C
273 | 1163,3,"Fox, Mr. Patrick",male,,0,0,368573,7.75,,Q
274 | 1164,1,"Clark, Mrs. Walter Miller (Virginia McDowell)",female,26,1,0,13508,136.7792,C89,C
275 | 1165,3,"Lennon, Miss. Mary",female,,1,0,370371,15.5,,Q
276 | 1166,3,"Saade, Mr. Jean Nassr",male,,0,0,2676,7.225,,C
277 | 1167,2,"Bryhl, Miss. Dagmar Jenny Ingeborg ",female,20,1,0,236853,26,,S
278 | 1168,2,"Parker, Mr. Clifford Richard",male,28,0,0,SC 14888,10.5,,S
279 | 1169,2,"Faunthorpe, Mr. Harry",male,40,1,0,2926,26,,S
280 | 1170,2,"Ware, Mr. John James",male,30,1,0,CA 31352,21,,S
281 | 1171,2,"Oxenham, Mr. Percy Thomas",male,22,0,0,W./C. 14260,10.5,,S
282 | 1172,3,"Oreskovic, Miss. Jelka",female,23,0,0,315085,8.6625,,S
283 | 1173,3,"Peacock, Master. Alfred Edward",male,0.75,1,1,SOTON/O.Q. 3101315,13.775,,S
284 | 1174,3,"Fleming, Miss. Honora",female,,0,0,364859,7.75,,Q
285 | 1175,3,"Touma, Miss. Maria Youssef",female,9,1,1,2650,15.2458,,C
286 | 1176,3,"Rosblom, Miss. Salli Helena",female,2,1,1,370129,20.2125,,S
287 | 1177,3,"Dennis, Mr. William",male,36,0,0,A/5 21175,7.25,,S
288 | 1178,3,"Franklin, Mr. Charles (Charles Fardon)",male,,0,0,SOTON/O.Q. 3101314,7.25,,S
289 | 1179,1,"Snyder, Mr. John Pillsbury",male,24,1,0,21228,82.2667,B45,S
290 | 1180,3,"Mardirosian, Mr. Sarkis",male,,0,0,2655,7.2292,F E46,C
291 | 1181,3,"Ford, Mr. Arthur",male,,0,0,A/5 1478,8.05,,S
292 | 1182,1,"Rheims, Mr. George Alexander Lucien",male,,0,0,PC 17607,39.6,,S
293 | 1183,3,"Daly, Miss. Margaret Marcella Maggie""""",female,30,0,0,382650,6.95,,Q
294 | 1184,3,"Nasr, Mr. Mustafa",male,,0,0,2652,7.2292,,C
295 | 1185,1,"Dodge, Dr. Washington",male,53,1,1,33638,81.8583,A34,S
296 | 1186,3,"Wittevrongel, Mr. Camille",male,36,0,0,345771,9.5,,S
297 | 1187,3,"Angheloff, Mr. Minko",male,26,0,0,349202,7.8958,,S
298 | 1188,2,"Laroche, Miss. Louise",female,1,1,2,SC/Paris 2123,41.5792,,C
299 | 1189,3,"Samaan, Mr. Hanna",male,,2,0,2662,21.6792,,C
300 | 1190,1,"Loring, Mr. Joseph Holland",male,30,0,0,113801,45.5,,S
301 | 1191,3,"Johansson, Mr. Nils",male,29,0,0,347467,7.8542,,S
302 | 1192,3,"Olsson, Mr. Oscar Wilhelm",male,32,0,0,347079,7.775,,S
303 | 1193,2,"Malachard, Mr. Noel",male,,0,0,237735,15.0458,D,C
304 | 1194,2,"Phillips, Mr. Escott Robert",male,43,0,1,S.O./P.P. 2,21,,S
305 | 1195,3,"Pokrnic, Mr. Tome",male,24,0,0,315092,8.6625,,S
306 | 1196,3,"McCarthy, Miss. Catherine Katie""""",female,,0,0,383123,7.75,,Q
307 | 1197,1,"Crosby, Mrs. Edward Gifford (Catherine Elizabeth Halstead)",female,64,1,1,112901,26.55,B26,S
308 | 1198,1,"Allison, Mr. Hudson Joshua Creighton",male,30,1,2,113781,151.55,C22 C26,S
309 | 1199,3,"Aks, Master. Philip Frank",male,0.83,0,1,392091,9.35,,S
310 | 1200,1,"Hays, Mr. Charles Melville",male,55,1,1,12749,93.5,B69,S
311 | 1201,3,"Hansen, Mrs. Claus Peter (Jennie L Howard)",female,45,1,0,350026,14.1083,,S
312 | 1202,3,"Cacic, Mr. Jego Grga",male,18,0,0,315091,8.6625,,S
313 | 1203,3,"Vartanian, Mr. David",male,22,0,0,2658,7.225,,C
314 | 1204,3,"Sadowitz, Mr. Harry",male,,0,0,LP 1588,7.575,,S
315 | 1205,3,"Carr, Miss. Jeannie",female,37,0,0,368364,7.75,,Q
316 | 1206,1,"White, Mrs. John Stuart (Ella Holmes)",female,55,0,0,PC 17760,135.6333,C32,C
317 | 1207,3,"Hagardon, Miss. Kate",female,17,0,0,AQ/3. 30631,7.7333,,Q
318 | 1208,1,"Spencer, Mr. William Augustus",male,57,1,0,PC 17569,146.5208,B78,C
319 | 1209,2,"Rogers, Mr. Reginald Harry",male,19,0,0,28004,10.5,,S
320 | 1210,3,"Jonsson, Mr. Nils Hilding",male,27,0,0,350408,7.8542,,S
321 | 1211,2,"Jefferys, Mr. Ernest Wilfred",male,22,2,0,C.A. 31029,31.5,,S
322 | 1212,3,"Andersson, Mr. Johan Samuel",male,26,0,0,347075,7.775,,S
323 | 1213,3,"Krekorian, Mr. Neshan",male,25,0,0,2654,7.2292,F E57,C
324 | 1214,2,"Nesson, Mr. Israel",male,26,0,0,244368,13,F2,S
325 | 1215,1,"Rowe, Mr. Alfred G",male,33,0,0,113790,26.55,,S
326 | 1216,1,"Kreuchen, Miss. Emilie",female,39,0,0,24160,211.3375,,S
327 | 1217,3,"Assam, Mr. Ali",male,23,0,0,SOTON/O.Q. 3101309,7.05,,S
328 | 1218,2,"Becker, Miss. Ruth Elizabeth",female,12,2,1,230136,39,F4,S
329 | 1219,1,"Rosenshine, Mr. George (Mr George Thorne"")""",male,46,0,0,PC 17585,79.2,,C
330 | 1220,2,"Clarke, Mr. Charles Valentine",male,29,1,0,2003,26,,S
331 | 1221,2,"Enander, Mr. Ingvar",male,21,0,0,236854,13,,S
332 | 1222,2,"Davies, Mrs. John Morgan (Elizabeth Agnes Mary White) ",female,48,0,2,C.A. 33112,36.75,,S
333 | 1223,1,"Dulles, Mr. William Crothers",male,39,0,0,PC 17580,29.7,A18,C
334 | 1224,3,"Thomas, Mr. Tannous",male,,0,0,2684,7.225,,C
335 | 1225,3,"Nakid, Mrs. Said (Waika Mary"" Mowad)""",female,19,1,1,2653,15.7417,,C
336 | 1226,3,"Cor, Mr. Ivan",male,27,0,0,349229,7.8958,,S
337 | 1227,1,"Maguire, Mr. John Edward",male,30,0,0,110469,26,C106,S
338 | 1228,2,"de Brito, Mr. Jose Joaquim",male,32,0,0,244360,13,,S
339 | 1229,3,"Elias, Mr. Joseph",male,39,0,2,2675,7.2292,,C
340 | 1230,2,"Denbury, Mr. Herbert",male,25,0,0,C.A. 31029,31.5,,S
341 | 1231,3,"Betros, Master. Seman",male,,0,0,2622,7.2292,,C
342 | 1232,2,"Fillbrook, Mr. Joseph Charles",male,18,0,0,C.A. 15185,10.5,,S
343 | 1233,3,"Lundstrom, Mr. Thure Edvin",male,32,0,0,350403,7.5792,,S
344 | 1234,3,"Sage, Mr. John George",male,,1,9,CA. 2343,69.55,,S
345 | 1235,1,"Cardeza, Mrs. James Warburton Martinez (Charlotte Wardle Drake)",female,58,0,1,PC 17755,512.3292,B51 B53 B55,C
346 | 1236,3,"van Billiard, Master. James William",male,,1,1,A/5. 851,14.5,,S
347 | 1237,3,"Abelseth, Miss. Karen Marie",female,16,0,0,348125,7.65,,S
348 | 1238,2,"Botsford, Mr. William Hull",male,26,0,0,237670,13,,S
349 | 1239,3,"Whabee, Mrs. George Joseph (Shawneene Abi-Saab)",female,38,0,0,2688,7.2292,,C
350 | 1240,2,"Giles, Mr. Ralph",male,24,0,0,248726,13.5,,S
351 | 1241,2,"Walcroft, Miss. Nellie",female,31,0,0,F.C.C. 13528,21,,S
352 | 1242,1,"Greenfield, Mrs. Leo David (Blanche Strouse)",female,45,0,1,PC 17759,63.3583,D10 D12,C
353 | 1243,2,"Stokes, Mr. Philip Joseph",male,25,0,0,F.C.C. 13540,10.5,,S
354 | 1244,2,"Dibden, Mr. William",male,18,0,0,S.O.C. 14879,73.5,,S
355 | 1245,2,"Herman, Mr. Samuel",male,49,1,2,220845,65,,S
356 | 1246,3,"Dean, Miss. Elizabeth Gladys Millvina""""",female,0.17,1,2,C.A. 2315,20.575,,S
357 | 1247,1,"Julian, Mr. Henry Forbes",male,50,0,0,113044,26,E60,S
358 | 1248,1,"Brown, Mrs. John Murray (Caroline Lane Lamson)",female,59,2,0,11769,51.4792,C101,S
359 | 1249,3,"Lockyer, Mr. Edward",male,,0,0,1222,7.8792,,S
360 | 1250,3,"O'Keefe, Mr. Patrick",male,,0,0,368402,7.75,,Q
361 | 1251,3,"Lindell, Mrs. Edvard Bengtsson (Elin Gerda Persson)",female,30,1,0,349910,15.55,,S
362 | 1252,3,"Sage, Master. William Henry",male,14.5,8,2,CA. 2343,69.55,,S
363 | 1253,2,"Mallet, Mrs. Albert (Antoinette Magnin)",female,24,1,1,S.C./PARIS 2079,37.0042,,C
364 | 1254,2,"Ware, Mrs. John James (Florence Louise Long)",female,31,0,0,CA 31352,21,,S
365 | 1255,3,"Strilic, Mr. Ivan",male,27,0,0,315083,8.6625,,S
366 | 1256,1,"Harder, Mrs. George Achilles (Dorothy Annan)",female,25,1,0,11765,55.4417,E50,C
367 | 1257,3,"Sage, Mrs. John (Annie Bullen)",female,,1,9,CA. 2343,69.55,,S
368 | 1258,3,"Caram, Mr. Joseph",male,,1,0,2689,14.4583,,C
369 | 1259,3,"Riihivouri, Miss. Susanna Juhantytar Sanni""""",female,22,0,0,3101295,39.6875,,S
370 | 1260,1,"Gibson, Mrs. Leonard (Pauline C Boeson)",female,45,0,1,112378,59.4,,C
371 | 1261,2,"Pallas y Castello, Mr. Emilio",male,29,0,0,SC/PARIS 2147,13.8583,,C
372 | 1262,2,"Giles, Mr. Edgar",male,21,1,0,28133,11.5,,S
373 | 1263,1,"Wilson, Miss. Helen Alice",female,31,0,0,16966,134.5,E39 E41,C
374 | 1264,1,"Ismay, Mr. Joseph Bruce",male,49,0,0,112058,0,B52 B54 B56,S
375 | 1265,2,"Harbeck, Mr. William H",male,44,0,0,248746,13,,S
376 | 1266,1,"Dodge, Mrs. Washington (Ruth Vidaver)",female,54,1,1,33638,81.8583,A34,S
377 | 1267,1,"Bowen, Miss. Grace Scott",female,45,0,0,PC 17608,262.375,,C
378 | 1268,3,"Kink, Miss. Maria",female,22,2,0,315152,8.6625,,S
379 | 1269,2,"Cotterill, Mr. Henry Harry""""",male,21,0,0,29107,11.5,,S
380 | 1270,1,"Hipkins, Mr. William Edward",male,55,0,0,680,50,C39,S
381 | 1271,3,"Asplund, Master. Carl Edgar",male,5,4,2,347077,31.3875,,S
382 | 1272,3,"O'Connor, Mr. Patrick",male,,0,0,366713,7.75,,Q
383 | 1273,3,"Foley, Mr. Joseph",male,26,0,0,330910,7.8792,,Q
384 | 1274,3,"Risien, Mrs. Samuel (Emma)",female,,0,0,364498,14.5,,S
385 | 1275,3,"McNamee, Mrs. Neal (Eileen O'Leary)",female,19,1,0,376566,16.1,,S
386 | 1276,2,"Wheeler, Mr. Edwin Frederick""""",male,,0,0,SC/PARIS 2159,12.875,,S
387 | 1277,2,"Herman, Miss. Kate",female,24,1,2,220845,65,,S
388 | 1278,3,"Aronsson, Mr. Ernst Axel Algot",male,24,0,0,349911,7.775,,S
389 | 1279,2,"Ashby, Mr. John",male,57,0,0,244346,13,,S
390 | 1280,3,"Canavan, Mr. Patrick",male,21,0,0,364858,7.75,,Q
391 | 1281,3,"Palsson, Master. Paul Folke",male,6,3,1,349909,21.075,,S
392 | 1282,1,"Payne, Mr. Vivian Ponsonby",male,23,0,0,12749,93.5,B24,S
393 | 1283,1,"Lines, Mrs. Ernest H (Elizabeth Lindsey James)",female,51,0,1,PC 17592,39.4,D28,S
394 | 1284,3,"Abbott, Master. Eugene Joseph",male,13,0,2,C.A. 2673,20.25,,S
395 | 1285,2,"Gilbert, Mr. William",male,47,0,0,C.A. 30769,10.5,,S
396 | 1286,3,"Kink-Heilmann, Mr. Anton",male,29,3,1,315153,22.025,,S
397 | 1287,1,"Smith, Mrs. Lucien Philip (Mary Eloise Hughes)",female,18,1,0,13695,60,C31,S
398 | 1288,3,"Colbert, Mr. Patrick",male,24,0,0,371109,7.25,,Q
399 | 1289,1,"Frolicher-Stehli, Mrs. Maxmillian (Margaretha Emerentia Stehli)",female,48,1,1,13567,79.2,B41,C
400 | 1290,3,"Larsson-Rondberg, Mr. Edvard A",male,22,0,0,347065,7.775,,S
401 | 1291,3,"Conlon, Mr. Thomas Henry",male,31,0,0,21332,7.7333,,Q
402 | 1292,1,"Bonnell, Miss. Caroline",female,30,0,0,36928,164.8667,C7,S
403 | 1293,2,"Gale, Mr. Harry",male,38,1,0,28664,21,,S
404 | 1294,1,"Gibson, Miss. Dorothy Winifred",female,22,0,1,112378,59.4,,C
405 | 1295,1,"Carrau, Mr. Jose Pedro",male,17,0,0,113059,47.1,,S
406 | 1296,1,"Frauenthal, Mr. Isaac Gerald",male,43,1,0,17765,27.7208,D40,C
407 | 1297,2,"Nourney, Mr. Alfred (Baron von Drachstedt"")""",male,20,0,0,SC/PARIS 2166,13.8625,D38,C
408 | 1298,2,"Ware, Mr. William Jeffery",male,23,1,0,28666,10.5,,S
409 | 1299,1,"Widener, Mr. George Dunton",male,50,1,1,113503,211.5,C80,C
410 | 1300,3,"Riordan, Miss. Johanna Hannah""""",female,,0,0,334915,7.7208,,Q
411 | 1301,3,"Peacock, Miss. Treasteall",female,3,1,1,SOTON/O.Q. 3101315,13.775,,S
412 | 1302,3,"Naughton, Miss. Hannah",female,,0,0,365237,7.75,,Q
413 | 1303,1,"Minahan, Mrs. William Edward (Lillian E Thorpe)",female,37,1,0,19928,90,C78,Q
414 | 1304,3,"Henriksson, Miss. Jenny Lovisa",female,28,0,0,347086,7.775,,S
415 | 1305,3,"Spector, Mr. Woolf",male,,0,0,A.5. 3236,8.05,,S
416 | 1306,1,"Oliva y Ocana, Dona. Fermina",female,39,0,0,PC 17758,108.9,C105,C
417 | 1307,3,"Saether, Mr. Simon Sivertsen",male,38.5,0,0,SOTON/O.Q. 3101262,7.25,,S
418 | 1308,3,"Ware, Mr. Frederick",male,,0,0,359309,8.05,,S
419 | 1309,3,"Peter, Master. Michael J",male,,1,1,2668,22.3583,,C
420 |
--------------------------------------------------------------------------------
/Overfit.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 170,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import pandas as pd\n",
10 | "import numpy as np\n",
11 | "from sklearn.preprocessing import StandardScaler\n",
12 | "from sklearn.preprocessing import Imputer\n",
13 | "from sklearn.model_selection import GridSearchCV , train_test_split , cross_val_score\n",
14 | "from sklearn.metrics import classification_report , confusion_matrix\n",
15 | "\n",
16 | "\n",
17 | "from sklearn.linear_model import LogisticRegression\n",
18 | "from sklearn.naive_bayes import GaussianNB\n",
19 | "from sklearn.neighbors import KNeighborsClassifier\n",
20 | "from sklearn.tree import DecisionTreeClassifier\n",
21 | "from sklearn.ensemble import RandomForestClassifier\n",
22 | "from sklearn.ensemble import GradientBoostingClassifier\n",
23 | "from sklearn.svm import SVC\n",
24 | "from sklearn.metrics import roc_curve, auc\n",
25 | "import os\n",
26 | "import warnings"
27 | ]
28 | },
29 | {
30 | "cell_type": "code",
31 | "execution_count": 264,
32 | "metadata": {},
33 | "outputs": [],
34 | "source": [
35 | "train=pd.read_csv('train.csv')\n",
36 | "test=pd.read_csv('test.csv')"
37 | ]
38 | },
39 | {
40 | "cell_type": "code",
41 | "execution_count": 60,
42 | "metadata": {},
43 | "outputs": [
44 | {
45 | "data": {
46 | "text/html": [
47 | "\n",
48 | "\n",
61 | "
\n",
62 | " \n",
63 | " \n",
64 | " | \n",
65 | " id | \n",
66 | " target | \n",
67 | " 0 | \n",
68 | " 1 | \n",
69 | " 2 | \n",
70 | " 3 | \n",
71 | " 4 | \n",
72 | " 5 | \n",
73 | " 6 | \n",
74 | " 7 | \n",
75 | " ... | \n",
76 | " 290 | \n",
77 | " 291 | \n",
78 | " 292 | \n",
79 | " 293 | \n",
80 | " 294 | \n",
81 | " 295 | \n",
82 | " 296 | \n",
83 | " 297 | \n",
84 | " 298 | \n",
85 | " 299 | \n",
86 | "
\n",
87 | " \n",
88 | " \n",
89 | " \n",
90 | " | count | \n",
91 | " 250.000000 | \n",
92 | " 250.000000 | \n",
93 | " 250.000000 | \n",
94 | " 250.000000 | \n",
95 | " 250.000000 | \n",
96 | " 250.000000 | \n",
97 | " 250.000000 | \n",
98 | " 250.000000 | \n",
99 | " 250.000000 | \n",
100 | " 250.000000 | \n",
101 | " ... | \n",
102 | " 250.000000 | \n",
103 | " 250.000000 | \n",
104 | " 250.000000 | \n",
105 | " 250.000000 | \n",
106 | " 250.000000 | \n",
107 | " 250.000000 | \n",
108 | " 250.000000 | \n",
109 | " 250.000000 | \n",
110 | " 250.000000 | \n",
111 | " 250.000000 | \n",
112 | "
\n",
113 | " \n",
114 | " | mean | \n",
115 | " 124.500000 | \n",
116 | " 0.640000 | \n",
117 | " 0.023292 | \n",
118 | " -0.026872 | \n",
119 | " 0.167404 | \n",
120 | " 0.001904 | \n",
121 | " 0.001588 | \n",
122 | " -0.007304 | \n",
123 | " 0.032052 | \n",
124 | " 0.078412 | \n",
125 | " ... | \n",
126 | " 0.044652 | \n",
127 | " 0.126344 | \n",
128 | " 0.018436 | \n",
129 | " -0.012092 | \n",
130 | " -0.065720 | \n",
131 | " -0.106112 | \n",
132 | " 0.046472 | \n",
133 | " 0.006452 | \n",
134 | " 0.009372 | \n",
135 | " -0.128952 | \n",
136 | "
\n",
137 | " \n",
138 | " | std | \n",
139 | " 72.312977 | \n",
140 | " 0.480963 | \n",
141 | " 0.998354 | \n",
142 | " 1.009314 | \n",
143 | " 1.021709 | \n",
144 | " 1.011751 | \n",
145 | " 1.035411 | \n",
146 | " 0.955700 | \n",
147 | " 1.006657 | \n",
148 | " 0.939731 | \n",
149 | " ... | \n",
150 | " 1.011416 | \n",
151 | " 0.972567 | \n",
152 | " 0.954229 | \n",
153 | " 0.960630 | \n",
154 | " 1.057414 | \n",
155 | " 1.038389 | \n",
156 | " 0.967661 | \n",
157 | " 0.998984 | \n",
158 | " 1.008099 | \n",
159 | " 0.971219 | \n",
160 | "
\n",
161 | " \n",
162 | " | min | \n",
163 | " 0.000000 | \n",
164 | " 0.000000 | \n",
165 | " -2.319000 | \n",
166 | " -2.931000 | \n",
167 | " -2.477000 | \n",
168 | " -2.359000 | \n",
169 | " -2.566000 | \n",
170 | " -2.845000 | \n",
171 | " -2.976000 | \n",
172 | " -3.444000 | \n",
173 | " ... | \n",
174 | " -2.804000 | \n",
175 | " -2.443000 | \n",
176 | " -2.757000 | \n",
177 | " -2.466000 | \n",
178 | " -3.287000 | \n",
179 | " -3.072000 | \n",
180 | " -2.634000 | \n",
181 | " -2.776000 | \n",
182 | " -3.211000 | \n",
183 | " -3.500000 | \n",
184 | "
\n",
185 | " \n",
186 | " | 25% | \n",
187 | " 62.250000 | \n",
188 | " 0.000000 | \n",
189 | " -0.644750 | \n",
190 | " -0.739750 | \n",
191 | " -0.425250 | \n",
192 | " -0.686500 | \n",
193 | " -0.659000 | \n",
194 | " -0.643750 | \n",
195 | " -0.675000 | \n",
196 | " -0.550750 | \n",
197 | " ... | \n",
198 | " -0.617000 | \n",
199 | " -0.510500 | \n",
200 | " -0.535750 | \n",
201 | " -0.657000 | \n",
202 | " -0.818500 | \n",
203 | " -0.821000 | \n",
204 | " -0.605500 | \n",
205 | " -0.751250 | \n",
206 | " -0.550000 | \n",
207 | " -0.754250 | \n",
208 | "
\n",
209 | " \n",
210 | " | 50% | \n",
211 | " 124.500000 | \n",
212 | " 1.000000 | \n",
213 | " -0.015500 | \n",
214 | " 0.057000 | \n",
215 | " 0.184000 | \n",
216 | " -0.016500 | \n",
217 | " -0.023000 | \n",
218 | " 0.037500 | \n",
219 | " 0.060500 | \n",
220 | " 0.183500 | \n",
221 | " ... | \n",
222 | " 0.067500 | \n",
223 | " 0.091000 | \n",
224 | " 0.057500 | \n",
225 | " -0.021000 | \n",
226 | " -0.009000 | \n",
227 | " -0.079500 | \n",
228 | " 0.009500 | \n",
229 | " 0.005500 | \n",
230 | " -0.009000 | \n",
231 | " -0.132500 | \n",
232 | "
\n",
233 | " \n",
234 | " | 75% | \n",
235 | " 186.750000 | \n",
236 | " 1.000000 | \n",
237 | " 0.677000 | \n",
238 | " 0.620750 | \n",
239 | " 0.805000 | \n",
240 | " 0.720000 | \n",
241 | " 0.735000 | \n",
242 | " 0.660500 | \n",
243 | " 0.783250 | \n",
244 | " 0.766250 | \n",
245 | " ... | \n",
246 | " 0.797250 | \n",
247 | " 0.804250 | \n",
248 | " 0.631500 | \n",
249 | " 0.650250 | \n",
250 | " 0.739500 | \n",
251 | " 0.493000 | \n",
252 | " 0.683000 | \n",
253 | " 0.794250 | \n",
254 | " 0.654250 | \n",
255 | " 0.503250 | \n",
256 | "
\n",
257 | " \n",
258 | " | max | \n",
259 | " 249.000000 | \n",
260 | " 1.000000 | \n",
261 | " 2.567000 | \n",
262 | " 2.419000 | \n",
263 | " 3.392000 | \n",
264 | " 2.771000 | \n",
265 | " 2.901000 | \n",
266 | " 2.793000 | \n",
267 | " 2.546000 | \n",
268 | " 2.846000 | \n",
269 | " ... | \n",
270 | " 2.865000 | \n",
271 | " 2.801000 | \n",
272 | " 2.736000 | \n",
273 | " 2.596000 | \n",
274 | " 2.226000 | \n",
275 | " 3.131000 | \n",
276 | " 3.236000 | \n",
277 | " 2.626000 | \n",
278 | " 3.530000 | \n",
279 | " 2.771000 | \n",
280 | "
\n",
281 | " \n",
282 | "
\n",
283 | "
8 rows × 302 columns
\n",
284 | "
"
285 | ],
286 | "text/plain": [
287 | " id target 0 1 2 3 \\\n",
288 | "count 250.000000 250.000000 250.000000 250.000000 250.000000 250.000000 \n",
289 | "mean 124.500000 0.640000 0.023292 -0.026872 0.167404 0.001904 \n",
290 | "std 72.312977 0.480963 0.998354 1.009314 1.021709 1.011751 \n",
291 | "min 0.000000 0.000000 -2.319000 -2.931000 -2.477000 -2.359000 \n",
292 | "25% 62.250000 0.000000 -0.644750 -0.739750 -0.425250 -0.686500 \n",
293 | "50% 124.500000 1.000000 -0.015500 0.057000 0.184000 -0.016500 \n",
294 | "75% 186.750000 1.000000 0.677000 0.620750 0.805000 0.720000 \n",
295 | "max 249.000000 1.000000 2.567000 2.419000 3.392000 2.771000 \n",
296 | "\n",
297 | " 4 5 6 7 ... 290 \\\n",
298 | "count 250.000000 250.000000 250.000000 250.000000 ... 250.000000 \n",
299 | "mean 0.001588 -0.007304 0.032052 0.078412 ... 0.044652 \n",
300 | "std 1.035411 0.955700 1.006657 0.939731 ... 1.011416 \n",
301 | "min -2.566000 -2.845000 -2.976000 -3.444000 ... -2.804000 \n",
302 | "25% -0.659000 -0.643750 -0.675000 -0.550750 ... -0.617000 \n",
303 | "50% -0.023000 0.037500 0.060500 0.183500 ... 0.067500 \n",
304 | "75% 0.735000 0.660500 0.783250 0.766250 ... 0.797250 \n",
305 | "max 2.901000 2.793000 2.546000 2.846000 ... 2.865000 \n",
306 | "\n",
307 | " 291 292 293 294 295 296 \\\n",
308 | "count 250.000000 250.000000 250.000000 250.000000 250.000000 250.000000 \n",
309 | "mean 0.126344 0.018436 -0.012092 -0.065720 -0.106112 0.046472 \n",
310 | "std 0.972567 0.954229 0.960630 1.057414 1.038389 0.967661 \n",
311 | "min -2.443000 -2.757000 -2.466000 -3.287000 -3.072000 -2.634000 \n",
312 | "25% -0.510500 -0.535750 -0.657000 -0.818500 -0.821000 -0.605500 \n",
313 | "50% 0.091000 0.057500 -0.021000 -0.009000 -0.079500 0.009500 \n",
314 | "75% 0.804250 0.631500 0.650250 0.739500 0.493000 0.683000 \n",
315 | "max 2.801000 2.736000 2.596000 2.226000 3.131000 3.236000 \n",
316 | "\n",
317 | " 297 298 299 \n",
318 | "count 250.000000 250.000000 250.000000 \n",
319 | "mean 0.006452 0.009372 -0.128952 \n",
320 | "std 0.998984 1.008099 0.971219 \n",
321 | "min -2.776000 -3.211000 -3.500000 \n",
322 | "25% -0.751250 -0.550000 -0.754250 \n",
323 | "50% 0.005500 -0.009000 -0.132500 \n",
324 | "75% 0.794250 0.654250 0.503250 \n",
325 | "max 2.626000 3.530000 2.771000 \n",
326 | "\n",
327 | "[8 rows x 302 columns]"
328 | ]
329 | },
330 | "execution_count": 60,
331 | "metadata": {},
332 | "output_type": "execute_result"
333 | }
334 | ],
335 | "source": []
336 | },
337 | {
338 | "cell_type": "code",
339 | "execution_count": 59,
340 | "metadata": {},
341 | "outputs": [
342 | {
343 | "data": {
344 | "text/html": [
345 | "\n",
346 | "\n",
359 | "
\n",
360 | " \n",
361 | " \n",
362 | " | \n",
363 | " id | \n",
364 | " 0 | \n",
365 | " 1 | \n",
366 | " 2 | \n",
367 | " 3 | \n",
368 | " 4 | \n",
369 | " 5 | \n",
370 | " 6 | \n",
371 | " 7 | \n",
372 | " 8 | \n",
373 | " ... | \n",
374 | " 290 | \n",
375 | " 291 | \n",
376 | " 292 | \n",
377 | " 293 | \n",
378 | " 294 | \n",
379 | " 295 | \n",
380 | " 296 | \n",
381 | " 297 | \n",
382 | " 298 | \n",
383 | " 299 | \n",
384 | "
\n",
385 | " \n",
386 | " \n",
387 | " \n",
388 | " | 0 | \n",
389 | " 250 | \n",
390 | " 0.500 | \n",
391 | " -1.033 | \n",
392 | " -1.595 | \n",
393 | " 0.309 | \n",
394 | " -0.714 | \n",
395 | " 0.502 | \n",
396 | " 0.535 | \n",
397 | " -0.129 | \n",
398 | " -0.687 | \n",
399 | " ... | \n",
400 | " -0.088 | \n",
401 | " -2.628 | \n",
402 | " -0.845 | \n",
403 | " 2.078 | \n",
404 | " -0.277 | \n",
405 | " 2.132 | \n",
406 | " 0.609 | \n",
407 | " -0.104 | \n",
408 | " 0.312 | \n",
409 | " 0.979 | \n",
410 | "
\n",
411 | " \n",
412 | " | 1 | \n",
413 | " 251 | \n",
414 | " 0.776 | \n",
415 | " 0.914 | \n",
416 | " -0.494 | \n",
417 | " 1.347 | \n",
418 | " -0.867 | \n",
419 | " 0.480 | \n",
420 | " 0.578 | \n",
421 | " -0.313 | \n",
422 | " 0.203 | \n",
423 | " ... | \n",
424 | " -0.683 | \n",
425 | " -0.066 | \n",
426 | " 0.025 | \n",
427 | " 0.606 | \n",
428 | " -0.353 | \n",
429 | " -1.133 | \n",
430 | " -3.138 | \n",
431 | " 0.281 | \n",
432 | " -0.625 | \n",
433 | " -0.761 | \n",
434 | "
\n",
435 | " \n",
436 | " | 2 | \n",
437 | " 252 | \n",
438 | " 1.750 | \n",
439 | " 0.509 | \n",
440 | " -0.057 | \n",
441 | " 0.835 | \n",
442 | " -0.476 | \n",
443 | " 1.428 | \n",
444 | " -0.701 | \n",
445 | " -2.009 | \n",
446 | " -1.378 | \n",
447 | " ... | \n",
448 | " -0.094 | \n",
449 | " 0.351 | \n",
450 | " -0.607 | \n",
451 | " -0.737 | \n",
452 | " -0.031 | \n",
453 | " 0.701 | \n",
454 | " 0.976 | \n",
455 | " 0.135 | \n",
456 | " -1.327 | \n",
457 | " 2.463 | \n",
458 | "
\n",
459 | " \n",
460 | " | 3 | \n",
461 | " 253 | \n",
462 | " -0.556 | \n",
463 | " -1.855 | \n",
464 | " -0.682 | \n",
465 | " 0.578 | \n",
466 | " 1.592 | \n",
467 | " 0.512 | \n",
468 | " -1.419 | \n",
469 | " 0.722 | \n",
470 | " 0.511 | \n",
471 | " ... | \n",
472 | " -0.336 | \n",
473 | " -0.787 | \n",
474 | " 0.255 | \n",
475 | " -0.031 | \n",
476 | " -0.836 | \n",
477 | " 0.916 | \n",
478 | " 2.411 | \n",
479 | " 1.053 | \n",
480 | " -1.601 | \n",
481 | " -1.529 | \n",
482 | "
\n",
483 | " \n",
484 | " | 4 | \n",
485 | " 254 | \n",
486 | " 0.754 | \n",
487 | " -0.245 | \n",
488 | " 1.173 | \n",
489 | " -1.623 | \n",
490 | " 0.009 | \n",
491 | " 0.370 | \n",
492 | " 0.781 | \n",
493 | " -1.763 | \n",
494 | " -1.432 | \n",
495 | " ... | \n",
496 | " 2.184 | \n",
497 | " -1.090 | \n",
498 | " 0.216 | \n",
499 | " 1.186 | \n",
500 | " -0.143 | \n",
501 | " 0.322 | \n",
502 | " -0.068 | \n",
503 | " -0.156 | \n",
504 | " -1.153 | \n",
505 | " 0.825 | \n",
506 | "
\n",
507 | " \n",
508 | "
\n",
509 | "
5 rows × 301 columns
\n",
510 | "
"
511 | ],
512 | "text/plain": [
513 | " id 0 1 2 3 4 5 6 7 8 ... \\\n",
514 | "0 250 0.500 -1.033 -1.595 0.309 -0.714 0.502 0.535 -0.129 -0.687 ... \n",
515 | "1 251 0.776 0.914 -0.494 1.347 -0.867 0.480 0.578 -0.313 0.203 ... \n",
516 | "2 252 1.750 0.509 -0.057 0.835 -0.476 1.428 -0.701 -2.009 -1.378 ... \n",
517 | "3 253 -0.556 -1.855 -0.682 0.578 1.592 0.512 -1.419 0.722 0.511 ... \n",
518 | "4 254 0.754 -0.245 1.173 -1.623 0.009 0.370 0.781 -1.763 -1.432 ... \n",
519 | "\n",
520 | " 290 291 292 293 294 295 296 297 298 299 \n",
521 | "0 -0.088 -2.628 -0.845 2.078 -0.277 2.132 0.609 -0.104 0.312 0.979 \n",
522 | "1 -0.683 -0.066 0.025 0.606 -0.353 -1.133 -3.138 0.281 -0.625 -0.761 \n",
523 | "2 -0.094 0.351 -0.607 -0.737 -0.031 0.701 0.976 0.135 -1.327 2.463 \n",
524 | "3 -0.336 -0.787 0.255 -0.031 -0.836 0.916 2.411 1.053 -1.601 -1.529 \n",
525 | "4 2.184 -1.090 0.216 1.186 -0.143 0.322 -0.068 -0.156 -1.153 0.825 \n",
526 | "\n",
527 | "[5 rows x 301 columns]"
528 | ]
529 | },
530 | "execution_count": 59,
531 | "metadata": {},
532 | "output_type": "execute_result"
533 | }
534 | ],
535 | "source": [
536 | "test.head()"
537 | ]
538 | },
539 | {
540 | "cell_type": "code",
541 | "execution_count": 172,
542 | "metadata": {},
543 | "outputs": [
544 | {
545 | "data": {
546 | "text/plain": [
547 | "array([0.7 , 0.72, 0.68, 0.8 , 0.74])"
548 | ]
549 | },
550 | "execution_count": 172,
551 | "metadata": {},
552 | "output_type": "execute_result"
553 | }
554 | ],
555 | "source": [
556 | "from sklearn.linear_model import LogisticRegression\n",
557 | "\n",
558 | "X=train.drop(['id','target'],axis=1)\n",
559 | "y=train.target\n",
560 | "\n",
561 | "clf2 = LogisticRegression(class_weight='balanced', solver='liblinear', penalty ='l1',C=0.8).fit(X, y)\n",
562 | "scores = cross_val_score(clf2, X, y, cv=5)\n",
563 | "scores"
564 | ]
565 | },
566 | {
567 | "cell_type": "code",
568 | "execution_count": 191,
569 | "metadata": {},
570 | "outputs": [
571 | {
572 | "data": {
573 | "text/plain": [
574 | "array([0.72, 0.7 , 0.64, 0.76, 0.64])"
575 | ]
576 | },
577 | "execution_count": 191,
578 | "metadata": {},
579 | "output_type": "execute_result"
580 | }
581 | ],
582 | "source": [
583 | "from sklearn.linear_model import SGDClassifier\n",
584 | "\n",
585 | "X=train.drop(['id','target'],axis=1)\n",
586 | "y=train.target\n",
587 | "\n",
588 | "clf3 = SGDClassifier(alpha=0.008, average=False, class_weight=None,\n",
589 | " early_stopping=False, epsilon=0.1, eta0=0.0, fit_intercept=True,\n",
590 | " l1_ratio=0.15, learning_rate='optimal', loss='log', max_iter=1000,\n",
591 | " n_iter=None, n_iter_no_change=5, n_jobs=None, penalty='l2',\n",
592 | " power_t=0.5, random_state=None, shuffle=True, tol=0.001,\n",
593 | " validation_fraction=0.1, verbose=0, warm_start=False).fit(X, y)\n",
594 | "scores = cross_val_score(clf3, X, y, cv=5)\n",
595 | "scores"
596 | ]
597 | },
598 | {
599 | "cell_type": "code",
600 | "execution_count": 210,
601 | "metadata": {},
602 | "outputs": [
603 | {
604 | "data": {
605 | "text/plain": [
606 | "array([0.6 , 0.68, 0.7 , 0.7 , 0.7 ])"
607 | ]
608 | },
609 | "execution_count": 210,
610 | "metadata": {},
611 | "output_type": "execute_result"
612 | }
613 | ],
614 | "source": [
615 | "from sklearn.ensemble import GradientBoostingClassifier\n",
616 | "\n",
617 | "X=train.drop(['id','target'],axis=1)\n",
618 | "y=train.target\n",
619 | "\n",
620 | "clf4 = GradientBoostingClassifier(loss='deviance', learning_rate=0.007, n_estimators=200,min_samples_split=2,\n",
621 | " validation_fraction=0.1,tol=0.0001).fit(X, y)\n",
622 | "scores = cross_val_score(clf4, X, y, cv=5)\n",
623 | "scores\n"
624 | ]
625 | },
626 | {
627 | "cell_type": "code",
628 | "execution_count": 174,
629 | "metadata": {},
630 | "outputs": [
631 | {
632 | "name": "stdout",
633 | "output_type": "stream",
634 | "text": [
635 | " precision recall f1-score support\n",
636 | "\n",
637 | " 0.0 1.00 1.00 1.00 90\n",
638 | " 1.0 1.00 1.00 1.00 160\n",
639 | "\n",
640 | " micro avg 1.00 1.00 1.00 250\n",
641 | " macro avg 1.00 1.00 1.00 250\n",
642 | "weighted avg 1.00 1.00 1.00 250\n",
643 | "\n"
644 | ]
645 | }
646 | ],
647 | "source": [
648 | "test_predict= clf3.predict(X)\n",
649 | "\n",
650 | "\n",
651 | "from sklearn.metrics import classification_report\n",
652 | "\n",
653 | "print(classification_report(y,test_predict))"
654 | ]
655 | },
656 | {
657 | "cell_type": "code",
658 | "execution_count": 211,
659 | "metadata": {},
660 | "outputs": [],
661 | "source": [
662 | "test_predict= 0.5*clf2.predict_proba(test.drop('id',axis=1))[:,1] + 0.2*clf3.predict_proba(test.drop('id',axis=1))[:,1] +0.3*clf4.predict_proba(test.drop('id',axis=1))[:,1]"
663 | ]
664 | },
665 | {
666 | "cell_type": "code",
667 | "execution_count": 216,
668 | "metadata": {},
669 | "outputs": [],
670 | "source": [
671 | "test_predict=clf3.predict_proba(test.drop('id',axis=1))[:,1]"
672 | ]
673 | },
674 | {
675 | "cell_type": "code",
676 | "execution_count": 217,
677 | "metadata": {},
678 | "outputs": [],
679 | "source": [
680 | "ss=pd.read_csv('test.csv')\n",
681 | "ids = ss['id']\n",
682 | "\n",
683 | "submission_file = open(\"overfit.csv\", \"w\")\n",
684 | "\n",
685 | "import csv as csv\n",
686 | "\n",
687 | "open_file_object = csv.writer(submission_file)\n",
688 | "\n",
689 | "# Write the header of the csv\n",
690 | "open_file_object.writerow([\"id\",\"target\"])\n",
691 | "\n",
692 | "# Write the rows of the csv\n",
693 | "open_file_object.writerows(zip(ids, test_predict))\n",
694 | "\n",
695 | "# Close the file\n",
696 | "submission_file.close()"
697 | ]
698 | },
699 | {
700 | "cell_type": "code",
701 | "execution_count": 237,
702 | "metadata": {},
703 | "outputs": [],
704 | "source": [
705 | "a=[]\n",
706 | "for i in train.drop(['id','target'],axis=1).columns:\n",
707 | " \n",
708 | " a.append(train[i].corr(train['target']))\n",
709 | " \n"
710 | ]
711 | },
712 | {
713 | "cell_type": "code",
714 | "execution_count": 243,
715 | "metadata": {},
716 | "outputs": [],
717 | "source": [
718 | "d={'col1':a}\n",
719 | "d=pd.DataFrame(data=d)\n"
720 | ]
721 | },
722 | {
723 | "cell_type": "code",
724 | "execution_count": 249,
725 | "metadata": {},
726 | "outputs": [
727 | {
728 | "data": {
729 | "text/plain": [
730 | "Int64Index([ 4, 16, 39, 43, 63, 73, 80, 82, 90, 91, 98, 108, 117,\n",
731 | " 127, 129, 133, 134, 150, 165, 189, 194, 209, 217, 220, 230, 237,\n",
732 | " 239, 252, 258, 276, 295, 298],\n",
733 | " dtype='int64')"
734 | ]
735 | },
736 | "execution_count": 249,
737 | "metadata": {},
738 | "output_type": "execute_result"
739 | }
740 | ],
741 | "source": [
742 | "d[d.values<-0.1].index"
743 | ]
744 | },
745 | {
746 | "cell_type": "code",
747 | "execution_count": 265,
748 | "metadata": {},
749 | "outputs": [],
750 | "source": [
751 | "X=train.drop(['4', '16', '39', '43', '63', '73', '80', '82', '90', '91', '98', '108', '117',\n",
752 | " '127', '129', '133', '134', '150', '165', '189', '194', '209', '217', '220', '230', '237',\n",
753 | " '239', '252', '258', '276', '295', '298'],axis=1)\n",
754 | "\n",
755 | "N_test=test.drop(['4', '16', '39', '43', '63', '73', '80', '82', '90', '91', '98', '108', '117',\n",
756 | " '127', '129', '133', '134', '150', '165', '189', '194', '209', '217', '220', '230', '237',\n",
757 | " '239', '252', '258', '276', '295', '298'],axis=1)\n",
758 | "y=X.target"
759 | ]
760 | },
761 | {
762 | "cell_type": "code",
763 | "execution_count": 307,
764 | "metadata": {},
765 | "outputs": [
766 | {
767 | "data": {
768 | "text/plain": [
769 | "array([0.68, 0.66, 0.66, 0.6 , 0.6 ])"
770 | ]
771 | },
772 | "execution_count": 307,
773 | "metadata": {},
774 | "output_type": "execute_result"
775 | }
776 | ],
777 | "source": [
778 | "from sklearn.linear_model import LogisticRegression\n",
779 | "\n",
780 | "\n",
781 | "#X=X.drop(['id','target'],axis=1)\n",
782 | "\n",
783 | "\n",
784 | "clf2 = LogisticRegression(class_weight='balanced', solver='liblinear', penalty ='l1',C=0.122).fit(X, y)\n",
785 | "scores = cross_val_score(clf2, X, y, cv=5)\n",
786 | "scores"
787 | ]
788 | },
789 | {
790 | "cell_type": "code",
791 | "execution_count": 276,
792 | "metadata": {},
793 | "outputs": [
794 | {
795 | "data": {
796 | "text/plain": [
797 | "array([0.64, 0.66, 0.68, 0.72, 0.7 ])"
798 | ]
799 | },
800 | "execution_count": 276,
801 | "metadata": {},
802 | "output_type": "execute_result"
803 | }
804 | ],
805 | "source": [
806 | "from sklearn.ensemble import GradientBoostingClassifier\n",
807 | "\n",
808 | "\n",
809 | "\n",
810 | "clf4 = GradientBoostingClassifier(loss='deviance', learning_rate=0.0042, n_estimators=200,min_samples_split=2,\n",
811 | " validation_fraction=0.1,tol=0.0001).fit(X, y)\n",
812 | "scores = cross_val_score(clf4, X, y, cv=5)\n",
813 | "scores"
814 | ]
815 | },
816 | {
817 | "cell_type": "code",
818 | "execution_count": 295,
819 | "metadata": {},
820 | "outputs": [
821 | {
822 | "data": {
823 | "text/plain": [
824 | "array([0.62, 0.6 , 0.58, 0.72, 0.6 ])"
825 | ]
826 | },
827 | "execution_count": 295,
828 | "metadata": {},
829 | "output_type": "execute_result"
830 | }
831 | ],
832 | "source": [
833 | "from sklearn.linear_model import SGDClassifier\n",
834 | "\n",
835 | "\n",
836 | "\n",
837 | "clf3 = SGDClassifier(alpha=0.0092, average=False, class_weight=None,\n",
838 | " early_stopping=False, epsilon=0.1, eta0=0.0, fit_intercept=True,\n",
839 | " l1_ratio=0.15, learning_rate='optimal', loss='log', max_iter=1000,\n",
840 | " n_iter=None, n_iter_no_change=5, n_jobs=None, penalty='l2',\n",
841 | " power_t=0.5, random_state=None, shuffle=True, tol=0.001,\n",
842 | " validation_fraction=0.1, verbose=0, warm_start=False).fit(X, y)\n",
843 | "scores = cross_val_score(clf3, X, y, cv=5)\n",
844 | "scores"
845 | ]
846 | },
847 | {
848 | "cell_type": "code",
849 | "execution_count": 299,
850 | "metadata": {},
851 | "outputs": [],
852 | "source": [
853 | "test_predict= clf2.predict_proba(N_test.drop('id',axis=1))[:,1] "
854 | ]
855 | },
856 | {
857 | "cell_type": "code",
858 | "execution_count": 300,
859 | "metadata": {},
860 | "outputs": [],
861 | "source": [
862 | "ss=pd.read_csv('test.csv')\n",
863 | "ids = ss['id']\n",
864 | "\n",
865 | "submission_file = open(\"overfit.csv\", \"w\")\n",
866 | "\n",
867 | "import csv as csv\n",
868 | "\n",
869 | "open_file_object = csv.writer(submission_file)\n",
870 | "\n",
871 | "# Write the header of the csv\n",
872 | "open_file_object.writerow([\"id\",\"target\"])\n",
873 | "\n",
874 | "# Write the rows of the csv\n",
875 | "open_file_object.writerows(zip(ids, test_predict))\n",
876 | "\n",
877 | "# Close the file\n",
878 | "submission_file.close()"
879 | ]
880 | },
881 | {
882 | "cell_type": "code",
883 | "execution_count": null,
884 | "metadata": {},
885 | "outputs": [],
886 | "source": []
887 | }
888 | ],
889 | "metadata": {
890 | "kernelspec": {
891 | "display_name": "Python 3",
892 | "language": "python",
893 | "name": "python3"
894 | },
895 | "language_info": {
896 | "codemirror_mode": {
897 | "name": "ipython",
898 | "version": 3
899 | },
900 | "file_extension": ".py",
901 | "mimetype": "text/x-python",
902 | "name": "python",
903 | "nbconvert_exporter": "python",
904 | "pygments_lexer": "ipython3",
905 | "version": "3.7.1"
906 | }
907 | },
908 | "nbformat": 4,
909 | "nbformat_minor": 2
910 | }
911 |
--------------------------------------------------------------------------------