├── images
├── Images.md
├── rf.png
├── BvsV.png
├── MSE.png
├── dt1.jpg
├── knn1.png
├── okay.jpeg
├── reg.jpg
├── svm.jpg
├── svm1.png
├── formula.png
├── kernel2.jpg
├── logreg.jpg
├── thanks.png
├── voting.png
├── hierarch.gif
├── kmeans++.gif
├── model_fit.jpg
├── tradeoff.png
├── cluster_v_class.jpg
├── support_vectors.jpg
├── Kmeans_animation.gif
└── hierarchial_approach.jfif
├── data
├── Position_Salaries.csv
├── Data.csv
├── Mall_Customers.csv
├── wine-clustering.csv
├── Social_Network_Ads.csv
├── x-y.csv
└── Realestate.csv
├── requirements.txt
├── .gitignore
├── README.md
├── week 2.ipynb
└── week 3.ipynb
/images/Images.md:
--------------------------------------------------------------------------------
1 | Contains Images
2 |
--------------------------------------------------------------------------------
/images/rf.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kabirnagpal/ML-Track/HEAD/images/rf.png
--------------------------------------------------------------------------------
/images/BvsV.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kabirnagpal/ML-Track/HEAD/images/BvsV.png
--------------------------------------------------------------------------------
/images/MSE.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kabirnagpal/ML-Track/HEAD/images/MSE.png
--------------------------------------------------------------------------------
/images/dt1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kabirnagpal/ML-Track/HEAD/images/dt1.jpg
--------------------------------------------------------------------------------
/images/knn1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kabirnagpal/ML-Track/HEAD/images/knn1.png
--------------------------------------------------------------------------------
/images/okay.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kabirnagpal/ML-Track/HEAD/images/okay.jpeg
--------------------------------------------------------------------------------
/images/reg.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kabirnagpal/ML-Track/HEAD/images/reg.jpg
--------------------------------------------------------------------------------
/images/svm.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kabirnagpal/ML-Track/HEAD/images/svm.jpg
--------------------------------------------------------------------------------
/images/svm1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kabirnagpal/ML-Track/HEAD/images/svm1.png
--------------------------------------------------------------------------------
/images/formula.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kabirnagpal/ML-Track/HEAD/images/formula.png
--------------------------------------------------------------------------------
/images/kernel2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kabirnagpal/ML-Track/HEAD/images/kernel2.jpg
--------------------------------------------------------------------------------
/images/logreg.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kabirnagpal/ML-Track/HEAD/images/logreg.jpg
--------------------------------------------------------------------------------
/images/thanks.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kabirnagpal/ML-Track/HEAD/images/thanks.png
--------------------------------------------------------------------------------
/images/voting.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kabirnagpal/ML-Track/HEAD/images/voting.png
--------------------------------------------------------------------------------
/images/hierarch.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kabirnagpal/ML-Track/HEAD/images/hierarch.gif
--------------------------------------------------------------------------------
/images/kmeans++.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kabirnagpal/ML-Track/HEAD/images/kmeans++.gif
--------------------------------------------------------------------------------
/images/model_fit.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kabirnagpal/ML-Track/HEAD/images/model_fit.jpg
--------------------------------------------------------------------------------
/images/tradeoff.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kabirnagpal/ML-Track/HEAD/images/tradeoff.png
--------------------------------------------------------------------------------
/images/cluster_v_class.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kabirnagpal/ML-Track/HEAD/images/cluster_v_class.jpg
--------------------------------------------------------------------------------
/images/support_vectors.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kabirnagpal/ML-Track/HEAD/images/support_vectors.jpg
--------------------------------------------------------------------------------
/images/Kmeans_animation.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kabirnagpal/ML-Track/HEAD/images/Kmeans_animation.gif
--------------------------------------------------------------------------------
/images/hierarchial_approach.jfif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kabirnagpal/ML-Track/HEAD/images/hierarchial_approach.jfif
--------------------------------------------------------------------------------
/data/Position_Salaries.csv:
--------------------------------------------------------------------------------
1 | Position,Level,Salary
2 | Business Analyst,1,45000
3 | Junior Consultant,2,50000
4 | Senior Consultant,3,60000
5 | Manager,4,80000
6 | Country Manager,5,110000
7 | Region Manager,6,150000
8 | Partner,7,200000
9 | Senior Partner,8,300000
10 | C-level,9,500000
11 | CEO,10,1000000
--------------------------------------------------------------------------------
/data/Data.csv:
--------------------------------------------------------------------------------
1 | City,Experience,Salary,Promotion
2 | Delhi,4,55000,No
3 | Mumbai,2,20000,Yes
4 | Agra,3,30000,No
5 | Mumbai,8,72000,No
6 | Agra,4,,Yes
7 | Delhi,5,60000,Yes
8 | Mumbai,,52000,No
9 | Delhi,4,51000,Yes
10 | Agra,5,59000,No
11 | Delhi,3,31000,Yes
12 | ,5,58000,Yes
13 | Mumbai,,52000,No
14 | Delhi,8,79000,Yes
15 | Agra,5,60000,No
16 | Delhi,7,67000,Yes
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | Package Version
2 | --------------- -------
3 | cycler 0.10.0
4 | joblib 0.16.0
5 | kiwisolver 1.2.0
6 | matplotlib 3.2.2
7 | numpy 1.19.0
8 | pandas 1.0.5
9 | pip 20.0.2
10 | pkg-resources 0.0.0
11 | plotly 4.8.2
12 | pyparsing 2.4.7
13 | python-dateutil 2.8.1
14 | pytz 2020.1
15 | retrying 1.3.3
16 | scikit-learn 0.23.1
17 | scipy 1.5.1
18 | seaborn 0.10.1
19 | setuptools 44.0.0
20 | six 1.15.0
21 | threadpoolctl 2.1.0
22 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | MANIFEST
27 |
28 | # PyInstaller
29 | # Usually these files are written by a python script from a template
30 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 | *.spec
33 |
34 | # Installer logs
35 | pip-log.txt
36 | pip-delete-this-directory.txt
37 |
38 | # Unit test / coverage reports
39 | htmlcov/
40 | .tox/
41 | .coverage
42 | .coverage.*
43 | .cache
44 | nosetests.xml
45 | coverage.xml
46 | *.cover
47 | .hypothesis/
48 | .pytest_cache/
49 |
50 | # Translations
51 | *.mo
52 | *.pot
53 |
54 | # Django stuff:
55 | *.log
56 | local_settings.py
57 | db.sqlite3
58 |
59 | # Flask stuff:
60 | instance/
61 | .webassets-cache
62 |
63 | # Scrapy stuff:
64 | .scrapy
65 |
66 | # Sphinx documentation
67 | docs/_build/
68 |
69 | # PyBuilder
70 | target/
71 |
72 | # Jupyter Notebook
73 | .ipynb_checkpoints
74 |
75 | # pyenv
76 | .python-version
77 |
78 | # celery beat schedule file
79 | celerybeat-schedule
80 |
81 | # SageMath parsed files
82 | *.sage.py
83 |
84 | # Environments
85 | .env
86 | .venv
87 | env/
88 | venv/
89 | ENV/
90 | env.bak/
91 | venv.bak/
92 |
93 | # Spyder project settings
94 | .spyderproject
95 | .spyproject
96 |
97 | # Rope project settings
98 | .ropeproject
99 |
100 | # mkdocs documentation
101 | /site
102 |
103 | # mypy
104 | .mypy_cache/
105 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | SoA-ML-14
2 | ==========
3 | ## Week 1: Intro to Numpy and Pandas
4 | ### (Anaconda, spyder, jupyter)
5 | #### Getting Familiar with:
6 | [*Link to Week 1's Jupyter Notebook*](https://github.com/kabirnagpal/SoA-ML-14/blob/master/week%201.ipynb)
7 | * Numpy
8 | * Pandas
9 | * Matplotlib
10 |
11 | ## Week 2: Basic Data pre-processing:
12 | [*Link to Week 2's Jupyter Notebook*](https://github.com/kabirnagpal/SoA-ML-14/blob/master/week%202.ipynb)
13 | * One Hot encoding
14 | * Label Encoding
15 | * Normalization
16 | * Dealing with Missing values
17 | * Introduction to Machine learning
18 | * Types of Learning (Supervised, Unsupervised and Reinforcement)
19 | * Application of Machine Learning
20 |
21 |
22 | ## Week 3: Regression Algorithms:
23 | [*Link to Week 3's Jupyter Notebook*](https://github.com/kabirnagpal/SoA-ML-14/blob/master/week%203.ipynb)
24 | * Linear Regression
25 | * Multiple Linear Regression
26 | * Polynomial Regression
27 |
28 |
29 | ## Week 4:Classification Algorithms:
30 | [*Link to Week 4's Jupyter Notebook*](https://github.com/kabirnagpal/SoA-ML-14/blob/master/week%204.ipynb)
31 | * Logistic Regression
32 | * K-Nearest Neighbours
33 | * Support Vector Classifier
34 | * Decision Tree
35 | * Random Forest
36 | * Voting Classifier
37 |
38 |
39 | ## Week 5: Bias vs Variance Trade off
40 | [*Link to Week 5's Jupyter Notebook*](https://github.com/kabirnagpal/SoA-ML-14/blob/master/week%205.ipynb)
41 | * OverFitting
42 | * UnderFitting
43 | * Regularization
44 | * Support Vector Machines
45 |
46 |
47 | ## Week 6:Clustering Algorithms:
48 | [*Link to Week 6's Jupyter Notebook*](https://github.com/kabirnagpal/SoA-ML-14/blob/master/week%206.ipynb)
49 | * K-means Clustering
50 | * Hierarchical Clustering
51 |
52 |
53 | ## Week 7: Dimensionality Reduction:
54 | [*Link to Week 7's Jupyter Notebook*](https://github.com/kabirnagpal/SoA-ML-14/blob/master/week%207.ipynb)
55 | * PCA
56 | * LDA
57 | * Kernel PCA
58 |
59 | * Model Selection:
60 | * K-fold Cross Validation
61 | * Parameter Tuning
62 | * Grid Search
63 |
64 | ## Week 8: An introduction to Boosting
65 | [*Link to Week 8's Jupyter Notebook*](https://github.com/kabirnagpal/SoA-ML-14/blob/master/week%208.ipynb)
66 | * Gradient Boosting
67 | * XGBoost
--------------------------------------------------------------------------------
/data/Mall_Customers.csv:
--------------------------------------------------------------------------------
1 | CustomerID,Gender,Age,Annual Income (k$),Spending Score (1-100)
2 | 1,Male,19,15,39
3 | 2,Male,21,15,81
4 | 3,Female,20,16,6
5 | 4,Female,23,16,77
6 | 5,Female,31,17,40
7 | 6,Female,22,17,76
8 | 7,Female,35,18,6
9 | 8,Female,23,18,94
10 | 9,Male,64,19,3
11 | 10,Female,30,19,72
12 | 11,Male,67,19,14
13 | 12,Female,35,19,99
14 | 13,Female,58,20,15
15 | 14,Female,24,20,77
16 | 15,Male,37,20,13
17 | 16,Male,22,20,79
18 | 17,Female,35,21,35
19 | 18,Male,20,21,66
20 | 19,Male,52,23,29
21 | 20,Female,35,23,98
22 | 21,Male,35,24,35
23 | 22,Male,25,24,73
24 | 23,Female,46,25,5
25 | 24,Male,31,25,73
26 | 25,Female,54,28,14
27 | 26,Male,29,28,82
28 | 27,Female,45,28,32
29 | 28,Male,35,28,61
30 | 29,Female,40,29,31
31 | 30,Female,23,29,87
32 | 31,Male,60,30,4
33 | 32,Female,21,30,73
34 | 33,Male,53,33,4
35 | 34,Male,18,33,92
36 | 35,Female,49,33,14
37 | 36,Female,21,33,81
38 | 37,Female,42,34,17
39 | 38,Female,30,34,73
40 | 39,Female,36,37,26
41 | 40,Female,20,37,75
42 | 41,Female,65,38,35
43 | 42,Male,24,38,92
44 | 43,Male,48,39,36
45 | 44,Female,31,39,61
46 | 45,Female,49,39,28
47 | 46,Female,24,39,65
48 | 47,Female,50,40,55
49 | 48,Female,27,40,47
50 | 49,Female,29,40,42
51 | 50,Female,31,40,42
52 | 51,Female,49,42,52
53 | 52,Male,33,42,60
54 | 53,Female,31,43,54
55 | 54,Male,59,43,60
56 | 55,Female,50,43,45
57 | 56,Male,47,43,41
58 | 57,Female,51,44,50
59 | 58,Male,69,44,46
60 | 59,Female,27,46,51
61 | 60,Male,53,46,46
62 | 61,Male,70,46,56
63 | 62,Male,19,46,55
64 | 63,Female,67,47,52
65 | 64,Female,54,47,59
66 | 65,Male,63,48,51
67 | 66,Male,18,48,59
68 | 67,Female,43,48,50
69 | 68,Female,68,48,48
70 | 69,Male,19,48,59
71 | 70,Female,32,48,47
72 | 71,Male,70,49,55
73 | 72,Female,47,49,42
74 | 73,Female,60,50,49
75 | 74,Female,60,50,56
76 | 75,Male,59,54,47
77 | 76,Male,26,54,54
78 | 77,Female,45,54,53
79 | 78,Male,40,54,48
80 | 79,Female,23,54,52
81 | 80,Female,49,54,42
82 | 81,Male,57,54,51
83 | 82,Male,38,54,55
84 | 83,Male,67,54,41
85 | 84,Female,46,54,44
86 | 85,Female,21,54,57
87 | 86,Male,48,54,46
88 | 87,Female,55,57,58
89 | 88,Female,22,57,55
90 | 89,Female,34,58,60
91 | 90,Female,50,58,46
92 | 91,Female,68,59,55
93 | 92,Male,18,59,41
94 | 93,Male,48,60,49
95 | 94,Female,40,60,40
96 | 95,Female,32,60,42
97 | 96,Male,24,60,52
98 | 97,Female,47,60,47
99 | 98,Female,27,60,50
100 | 99,Male,48,61,42
101 | 100,Male,20,61,49
102 | 101,Female,23,62,41
103 | 102,Female,49,62,48
104 | 103,Male,67,62,59
105 | 104,Male,26,62,55
106 | 105,Male,49,62,56
107 | 106,Female,21,62,42
108 | 107,Female,66,63,50
109 | 108,Male,54,63,46
110 | 109,Male,68,63,43
111 | 110,Male,66,63,48
112 | 111,Male,65,63,52
113 | 112,Female,19,63,54
114 | 113,Female,38,64,42
115 | 114,Male,19,64,46
116 | 115,Female,18,65,48
117 | 116,Female,19,65,50
118 | 117,Female,63,65,43
119 | 118,Female,49,65,59
120 | 119,Female,51,67,43
121 | 120,Female,50,67,57
122 | 121,Male,27,67,56
123 | 122,Female,38,67,40
124 | 123,Female,40,69,58
125 | 124,Male,39,69,91
126 | 125,Female,23,70,29
127 | 126,Female,31,70,77
128 | 127,Male,43,71,35
129 | 128,Male,40,71,95
130 | 129,Male,59,71,11
131 | 130,Male,38,71,75
132 | 131,Male,47,71,9
133 | 132,Male,39,71,75
134 | 133,Female,25,72,34
135 | 134,Female,31,72,71
136 | 135,Male,20,73,5
137 | 136,Female,29,73,88
138 | 137,Female,44,73,7
139 | 138,Male,32,73,73
140 | 139,Male,19,74,10
141 | 140,Female,35,74,72
142 | 141,Female,57,75,5
143 | 142,Male,32,75,93
144 | 143,Female,28,76,40
145 | 144,Female,32,76,87
146 | 145,Male,25,77,12
147 | 146,Male,28,77,97
148 | 147,Male,48,77,36
149 | 148,Female,32,77,74
150 | 149,Female,34,78,22
151 | 150,Male,34,78,90
152 | 151,Male,43,78,17
153 | 152,Male,39,78,88
154 | 153,Female,44,78,20
155 | 154,Female,38,78,76
156 | 155,Female,47,78,16
157 | 156,Female,27,78,89
158 | 157,Male,37,78,1
159 | 158,Female,30,78,78
160 | 159,Male,34,78,1
161 | 160,Female,30,78,73
162 | 161,Female,56,79,35
163 | 162,Female,29,79,83
164 | 163,Male,19,81,5
165 | 164,Female,31,81,93
166 | 165,Male,50,85,26
167 | 166,Female,36,85,75
168 | 167,Male,42,86,20
169 | 168,Female,33,86,95
170 | 169,Female,36,87,27
171 | 170,Male,32,87,63
172 | 171,Male,40,87,13
173 | 172,Male,28,87,75
174 | 173,Male,36,87,10
175 | 174,Male,36,87,92
176 | 175,Female,52,88,13
177 | 176,Female,30,88,86
178 | 177,Male,58,88,15
179 | 178,Male,27,88,69
180 | 179,Male,59,93,14
181 | 180,Male,35,93,90
182 | 181,Female,37,97,32
183 | 182,Female,32,97,86
184 | 183,Male,46,98,15
185 | 184,Female,29,98,88
186 | 185,Female,41,99,39
187 | 186,Male,30,99,97
188 | 187,Female,54,101,24
189 | 188,Male,28,101,68
190 | 189,Female,41,103,17
191 | 190,Female,36,103,85
192 | 191,Female,34,103,23
193 | 192,Female,32,103,69
194 | 193,Male,33,113,8
195 | 194,Female,38,113,91
196 | 195,Female,47,120,16
197 | 196,Female,35,120,79
198 | 197,Female,45,126,28
199 | 198,Male,32,126,74
200 | 199,Male,32,137,18
201 | 200,Male,30,137,83
202 |
--------------------------------------------------------------------------------
/data/wine-clustering.csv:
--------------------------------------------------------------------------------
1 | Alcohol,Malic_Acid,Ash,Ash_Alcanity,Magnesium,Total_Phenols,Flavanoids,Nonflavanoid_Phenols,Proanthocyanins,Color_Intensity,Hue,OD280,Proline
2 | 14.23,1.71,2.43,15.6,127,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065
3 | 13.2,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050
4 | 13.16,2.36,2.67,18.6,101,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185
5 | 14.37,1.95,2.5,16.8,113,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480
6 | 13.24,2.59,2.87,21,118,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735
7 | 14.2,1.76,2.45,15.2,112,3.27,3.39,0.34,1.97,6.75,1.05,2.85,1450
8 | 14.39,1.87,2.45,14.6,96,2.5,2.52,0.3,1.98,5.25,1.02,3.58,1290
9 | 14.06,2.15,2.61,17.6,121,2.6,2.51,0.31,1.25,5.05,1.06,3.58,1295
10 | 14.83,1.64,2.17,14,97,2.8,2.98,0.29,1.98,5.2,1.08,2.85,1045
11 | 13.86,1.35,2.27,16,98,2.98,3.15,0.22,1.85,7.22,1.01,3.55,1045
12 | 14.1,2.16,2.3,18,105,2.95,3.32,0.22,2.38,5.75,1.25,3.17,1510
13 | 14.12,1.48,2.32,16.8,95,2.2,2.43,0.26,1.57,5,1.17,2.82,1280
14 | 13.75,1.73,2.41,16,89,2.6,2.76,0.29,1.81,5.6,1.15,2.9,1320
15 | 14.75,1.73,2.39,11.4,91,3.1,3.69,0.43,2.81,5.4,1.25,2.73,1150
16 | 14.38,1.87,2.38,12,102,3.3,3.64,0.29,2.96,7.5,1.2,3,1547
17 | 13.63,1.81,2.7,17.2,112,2.85,2.91,0.3,1.46,7.3,1.28,2.88,1310
18 | 14.3,1.92,2.72,20,120,2.8,3.14,0.33,1.97,6.2,1.07,2.65,1280
19 | 13.83,1.57,2.62,20,115,2.95,3.4,0.4,1.72,6.6,1.13,2.57,1130
20 | 14.19,1.59,2.48,16.5,108,3.3,3.93,0.32,1.86,8.7,1.23,2.82,1680
21 | 13.64,3.1,2.56,15.2,116,2.7,3.03,0.17,1.66,5.1,0.96,3.36,845
22 | 14.06,1.63,2.28,16,126,3,3.17,0.24,2.1,5.65,1.09,3.71,780
23 | 12.93,3.8,2.65,18.6,102,2.41,2.41,0.25,1.98,4.5,1.03,3.52,770
24 | 13.71,1.86,2.36,16.6,101,2.61,2.88,0.27,1.69,3.8,1.11,4,1035
25 | 12.85,1.6,2.52,17.8,95,2.48,2.37,0.26,1.46,3.93,1.09,3.63,1015
26 | 13.5,1.81,2.61,20,96,2.53,2.61,0.28,1.66,3.52,1.12,3.82,845
27 | 13.05,2.05,3.22,25,124,2.63,2.68,0.47,1.92,3.58,1.13,3.2,830
28 | 13.39,1.77,2.62,16.1,93,2.85,2.94,0.34,1.45,4.8,0.92,3.22,1195
29 | 13.3,1.72,2.14,17,94,2.4,2.19,0.27,1.35,3.95,1.02,2.77,1285
30 | 13.87,1.9,2.8,19.4,107,2.95,2.97,0.37,1.76,4.5,1.25,3.4,915
31 | 14.02,1.68,2.21,16,96,2.65,2.33,0.26,1.98,4.7,1.04,3.59,1035
32 | 13.73,1.5,2.7,22.5,101,3,3.25,0.29,2.38,5.7,1.19,2.71,1285
33 | 13.58,1.66,2.36,19.1,106,2.86,3.19,0.22,1.95,6.9,1.09,2.88,1515
34 | 13.68,1.83,2.36,17.2,104,2.42,2.69,0.42,1.97,3.84,1.23,2.87,990
35 | 13.76,1.53,2.7,19.5,132,2.95,2.74,0.5,1.35,5.4,1.25,3,1235
36 | 13.51,1.8,2.65,19,110,2.35,2.53,0.29,1.54,4.2,1.1,2.87,1095
37 | 13.48,1.81,2.41,20.5,100,2.7,2.98,0.26,1.86,5.1,1.04,3.47,920
38 | 13.28,1.64,2.84,15.5,110,2.6,2.68,0.34,1.36,4.6,1.09,2.78,880
39 | 13.05,1.65,2.55,18,98,2.45,2.43,0.29,1.44,4.25,1.12,2.51,1105
40 | 13.07,1.5,2.1,15.5,98,2.4,2.64,0.28,1.37,3.7,1.18,2.69,1020
41 | 14.22,3.99,2.51,13.2,128,3,3.04,0.2,2.08,5.1,0.89,3.53,760
42 | 13.56,1.71,2.31,16.2,117,3.15,3.29,0.34,2.34,6.13,0.95,3.38,795
43 | 13.41,3.84,2.12,18.8,90,2.45,2.68,0.27,1.48,4.28,0.91,3,1035
44 | 13.88,1.89,2.59,15,101,3.25,3.56,0.17,1.7,5.43,0.88,3.56,1095
45 | 13.24,3.98,2.29,17.5,103,2.64,2.63,0.32,1.66,4.36,0.82,3,680
46 | 13.05,1.77,2.1,17,107,3,3,0.28,2.03,5.04,0.88,3.35,885
47 | 14.21,4.04,2.44,18.9,111,2.85,2.65,0.3,1.25,5.24,0.87,3.33,1080
48 | 14.38,3.59,2.28,16,102,3.25,3.17,0.27,2.19,4.9,1.04,3.44,1065
49 | 13.9,1.68,2.12,16,101,3.1,3.39,0.21,2.14,6.1,0.91,3.33,985
50 | 14.1,2.02,2.4,18.8,103,2.75,2.92,0.32,2.38,6.2,1.07,2.75,1060
51 | 13.94,1.73,2.27,17.4,108,2.88,3.54,0.32,2.08,8.9,1.12,3.1,1260
52 | 13.05,1.73,2.04,12.4,92,2.72,3.27,0.17,2.91,7.2,1.12,2.91,1150
53 | 13.83,1.65,2.6,17.2,94,2.45,2.99,0.22,2.29,5.6,1.24,3.37,1265
54 | 13.82,1.75,2.42,14,111,3.88,3.74,0.32,1.87,7.05,1.01,3.26,1190
55 | 13.77,1.9,2.68,17.1,115,3,2.79,0.39,1.68,6.3,1.13,2.93,1375
56 | 13.74,1.67,2.25,16.4,118,2.6,2.9,0.21,1.62,5.85,0.92,3.2,1060
57 | 13.56,1.73,2.46,20.5,116,2.96,2.78,0.2,2.45,6.25,0.98,3.03,1120
58 | 14.22,1.7,2.3,16.3,118,3.2,3,0.26,2.03,6.38,0.94,3.31,970
59 | 13.29,1.97,2.68,16.8,102,3,3.23,0.31,1.66,6,1.07,2.84,1270
60 | 13.72,1.43,2.5,16.7,108,3.4,3.67,0.19,2.04,6.8,0.89,2.87,1285
61 | 12.37,0.94,1.36,10.6,88,1.98,0.57,0.28,0.42,1.95,1.05,1.82,520
62 | 12.33,1.1,2.28,16,101,2.05,1.09,0.63,0.41,3.27,1.25,1.67,680
63 | 12.64,1.36,2.02,16.8,100,2.02,1.41,0.53,0.62,5.75,0.98,1.59,450
64 | 13.67,1.25,1.92,18,94,2.1,1.79,0.32,0.73,3.8,1.23,2.46,630
65 | 12.37,1.13,2.16,19,87,3.5,3.1,0.19,1.87,4.45,1.22,2.87,420
66 | 12.17,1.45,2.53,19,104,1.89,1.75,0.45,1.03,2.95,1.45,2.23,355
67 | 12.37,1.21,2.56,18.1,98,2.42,2.65,0.37,2.08,4.6,1.19,2.3,678
68 | 13.11,1.01,1.7,15,78,2.98,3.18,0.26,2.28,5.3,1.12,3.18,502
69 | 12.37,1.17,1.92,19.6,78,2.11,2,0.27,1.04,4.68,1.12,3.48,510
70 | 13.34,0.94,2.36,17,110,2.53,1.3,0.55,0.42,3.17,1.02,1.93,750
71 | 12.21,1.19,1.75,16.8,151,1.85,1.28,0.14,2.5,2.85,1.28,3.07,718
72 | 12.29,1.61,2.21,20.4,103,1.1,1.02,0.37,1.46,3.05,0.906,1.82,870
73 | 13.86,1.51,2.67,25,86,2.95,2.86,0.21,1.87,3.38,1.36,3.16,410
74 | 13.49,1.66,2.24,24,87,1.88,1.84,0.27,1.03,3.74,0.98,2.78,472
75 | 12.99,1.67,2.6,30,139,3.3,2.89,0.21,1.96,3.35,1.31,3.5,985
76 | 11.96,1.09,2.3,21,101,3.38,2.14,0.13,1.65,3.21,0.99,3.13,886
77 | 11.66,1.88,1.92,16,97,1.61,1.57,0.34,1.15,3.8,1.23,2.14,428
78 | 13.03,0.9,1.71,16,86,1.95,2.03,0.24,1.46,4.6,1.19,2.48,392
79 | 11.84,2.89,2.23,18,112,1.72,1.32,0.43,0.95,2.65,0.96,2.52,500
80 | 12.33,0.99,1.95,14.8,136,1.9,1.85,0.35,2.76,3.4,1.06,2.31,750
81 | 12.7,3.87,2.4,23,101,2.83,2.55,0.43,1.95,2.57,1.19,3.13,463
82 | 12,0.92,2,19,86,2.42,2.26,0.3,1.43,2.5,1.38,3.12,278
83 | 12.72,1.81,2.2,18.8,86,2.2,2.53,0.26,1.77,3.9,1.16,3.14,714
84 | 12.08,1.13,2.51,24,78,2,1.58,0.4,1.4,2.2,1.31,2.72,630
85 | 13.05,3.86,2.32,22.5,85,1.65,1.59,0.61,1.62,4.8,0.84,2.01,515
86 | 11.84,0.89,2.58,18,94,2.2,2.21,0.22,2.35,3.05,0.79,3.08,520
87 | 12.67,0.98,2.24,18,99,2.2,1.94,0.3,1.46,2.62,1.23,3.16,450
88 | 12.16,1.61,2.31,22.8,90,1.78,1.69,0.43,1.56,2.45,1.33,2.26,495
89 | 11.65,1.67,2.62,26,88,1.92,1.61,0.4,1.34,2.6,1.36,3.21,562
90 | 11.64,2.06,2.46,21.6,84,1.95,1.69,0.48,1.35,2.8,1,2.75,680
91 | 12.08,1.33,2.3,23.6,70,2.2,1.59,0.42,1.38,1.74,1.07,3.21,625
92 | 12.08,1.83,2.32,18.5,81,1.6,1.5,0.52,1.64,2.4,1.08,2.27,480
93 | 12,1.51,2.42,22,86,1.45,1.25,0.5,1.63,3.6,1.05,2.65,450
94 | 12.69,1.53,2.26,20.7,80,1.38,1.46,0.58,1.62,3.05,0.96,2.06,495
95 | 12.29,2.83,2.22,18,88,2.45,2.25,0.25,1.99,2.15,1.15,3.3,290
96 | 11.62,1.99,2.28,18,98,3.02,2.26,0.17,1.35,3.25,1.16,2.96,345
97 | 12.47,1.52,2.2,19,162,2.5,2.27,0.32,3.28,2.6,1.16,2.63,937
98 | 11.81,2.12,2.74,21.5,134,1.6,0.99,0.14,1.56,2.5,0.95,2.26,625
99 | 12.29,1.41,1.98,16,85,2.55,2.5,0.29,1.77,2.9,1.23,2.74,428
100 | 12.37,1.07,2.1,18.5,88,3.52,3.75,0.24,1.95,4.5,1.04,2.77,660
101 | 12.29,3.17,2.21,18,88,2.85,2.99,0.45,2.81,2.3,1.42,2.83,406
102 | 12.08,2.08,1.7,17.5,97,2.23,2.17,0.26,1.4,3.3,1.27,2.96,710
103 | 12.6,1.34,1.9,18.5,88,1.45,1.36,0.29,1.35,2.45,1.04,2.77,562
104 | 12.34,2.45,2.46,21,98,2.56,2.11,0.34,1.31,2.8,0.8,3.38,438
105 | 11.82,1.72,1.88,19.5,86,2.5,1.64,0.37,1.42,2.06,0.94,2.44,415
106 | 12.51,1.73,1.98,20.5,85,2.2,1.92,0.32,1.48,2.94,1.04,3.57,672
107 | 12.42,2.55,2.27,22,90,1.68,1.84,0.66,1.42,2.7,0.86,3.3,315
108 | 12.25,1.73,2.12,19,80,1.65,2.03,0.37,1.63,3.4,1,3.17,510
109 | 12.72,1.75,2.28,22.5,84,1.38,1.76,0.48,1.63,3.3,0.88,2.42,488
110 | 12.22,1.29,1.94,19,92,2.36,2.04,0.39,2.08,2.7,0.86,3.02,312
111 | 11.61,1.35,2.7,20,94,2.74,2.92,0.29,2.49,2.65,0.96,3.26,680
112 | 11.46,3.74,1.82,19.5,107,3.18,2.58,0.24,3.58,2.9,0.75,2.81,562
113 | 12.52,2.43,2.17,21,88,2.55,2.27,0.26,1.22,2,0.9,2.78,325
114 | 11.76,2.68,2.92,20,103,1.75,2.03,0.6,1.05,3.8,1.23,2.5,607
115 | 11.41,0.74,2.5,21,88,2.48,2.01,0.42,1.44,3.08,1.1,2.31,434
116 | 12.08,1.39,2.5,22.5,84,2.56,2.29,0.43,1.04,2.9,0.93,3.19,385
117 | 11.03,1.51,2.2,21.5,85,2.46,2.17,0.52,2.01,1.9,1.71,2.87,407
118 | 11.82,1.47,1.99,20.8,86,1.98,1.6,0.3,1.53,1.95,0.95,3.33,495
119 | 12.42,1.61,2.19,22.5,108,2,2.09,0.34,1.61,2.06,1.06,2.96,345
120 | 12.77,3.43,1.98,16,80,1.63,1.25,0.43,0.83,3.4,0.7,2.12,372
121 | 12,3.43,2,19,87,2,1.64,0.37,1.87,1.28,0.93,3.05,564
122 | 11.45,2.4,2.42,20,96,2.9,2.79,0.32,1.83,3.25,0.8,3.39,625
123 | 11.56,2.05,3.23,28.5,119,3.18,5.08,0.47,1.87,6,0.93,3.69,465
124 | 12.42,4.43,2.73,26.5,102,2.2,2.13,0.43,1.71,2.08,0.92,3.12,365
125 | 13.05,5.8,2.13,21.5,86,2.62,2.65,0.3,2.01,2.6,0.73,3.1,380
126 | 11.87,4.31,2.39,21,82,2.86,3.03,0.21,2.91,2.8,0.75,3.64,380
127 | 12.07,2.16,2.17,21,85,2.6,2.65,0.37,1.35,2.76,0.86,3.28,378
128 | 12.43,1.53,2.29,21.5,86,2.74,3.15,0.39,1.77,3.94,0.69,2.84,352
129 | 11.79,2.13,2.78,28.5,92,2.13,2.24,0.58,1.76,3,0.97,2.44,466
130 | 12.37,1.63,2.3,24.5,88,2.22,2.45,0.4,1.9,2.12,0.89,2.78,342
131 | 12.04,4.3,2.38,22,80,2.1,1.75,0.42,1.35,2.6,0.79,2.57,580
132 | 12.86,1.35,2.32,18,122,1.51,1.25,0.21,0.94,4.1,0.76,1.29,630
133 | 12.88,2.99,2.4,20,104,1.3,1.22,0.24,0.83,5.4,0.74,1.42,530
134 | 12.81,2.31,2.4,24,98,1.15,1.09,0.27,0.83,5.7,0.66,1.36,560
135 | 12.7,3.55,2.36,21.5,106,1.7,1.2,0.17,0.84,5,0.78,1.29,600
136 | 12.51,1.24,2.25,17.5,85,2,0.58,0.6,1.25,5.45,0.75,1.51,650
137 | 12.6,2.46,2.2,18.5,94,1.62,0.66,0.63,0.94,7.1,0.73,1.58,695
138 | 12.25,4.72,2.54,21,89,1.38,0.47,0.53,0.8,3.85,0.75,1.27,720
139 | 12.53,5.51,2.64,25,96,1.79,0.6,0.63,1.1,5,0.82,1.69,515
140 | 13.49,3.59,2.19,19.5,88,1.62,0.48,0.58,0.88,5.7,0.81,1.82,580
141 | 12.84,2.96,2.61,24,101,2.32,0.6,0.53,0.81,4.92,0.89,2.15,590
142 | 12.93,2.81,2.7,21,96,1.54,0.5,0.53,0.75,4.6,0.77,2.31,600
143 | 13.36,2.56,2.35,20,89,1.4,0.5,0.37,0.64,5.6,0.7,2.47,780
144 | 13.52,3.17,2.72,23.5,97,1.55,0.52,0.5,0.55,4.35,0.89,2.06,520
145 | 13.62,4.95,2.35,20,92,2,0.8,0.47,1.02,4.4,0.91,2.05,550
146 | 12.25,3.88,2.2,18.5,112,1.38,0.78,0.29,1.14,8.21,0.65,2,855
147 | 13.16,3.57,2.15,21,102,1.5,0.55,0.43,1.3,4,0.6,1.68,830
148 | 13.88,5.04,2.23,20,80,0.98,0.34,0.4,0.68,4.9,0.58,1.33,415
149 | 12.87,4.61,2.48,21.5,86,1.7,0.65,0.47,0.86,7.65,0.54,1.86,625
150 | 13.32,3.24,2.38,21.5,92,1.93,0.76,0.45,1.25,8.42,0.55,1.62,650
151 | 13.08,3.9,2.36,21.5,113,1.41,1.39,0.34,1.14,9.4,0.57,1.33,550
152 | 13.5,3.12,2.62,24,123,1.4,1.57,0.22,1.25,8.6,0.59,1.3,500
153 | 12.79,2.67,2.48,22,112,1.48,1.36,0.24,1.26,10.8,0.48,1.47,480
154 | 13.11,1.9,2.75,25.5,116,2.2,1.28,0.26,1.56,7.1,0.61,1.33,425
155 | 13.23,3.3,2.28,18.5,98,1.8,0.83,0.61,1.87,10.52,0.56,1.51,675
156 | 12.58,1.29,2.1,20,103,1.48,0.58,0.53,1.4,7.6,0.58,1.55,640
157 | 13.17,5.19,2.32,22,93,1.74,0.63,0.61,1.55,7.9,0.6,1.48,725
158 | 13.84,4.12,2.38,19.5,89,1.8,0.83,0.48,1.56,9.01,0.57,1.64,480
159 | 12.45,3.03,2.64,27,97,1.9,0.58,0.63,1.14,7.5,0.67,1.73,880
160 | 14.34,1.68,2.7,25,98,2.8,1.31,0.53,2.7,13,0.57,1.96,660
161 | 13.48,1.67,2.64,22.5,89,2.6,1.1,0.52,2.29,11.75,0.57,1.78,620
162 | 12.36,3.83,2.38,21,88,2.3,0.92,0.5,1.04,7.65,0.56,1.58,520
163 | 13.69,3.26,2.54,20,107,1.83,0.56,0.5,0.8,5.88,0.96,1.82,680
164 | 12.85,3.27,2.58,22,106,1.65,0.6,0.6,0.96,5.58,0.87,2.11,570
165 | 12.96,3.45,2.35,18.5,106,1.39,0.7,0.4,0.94,5.28,0.68,1.75,675
166 | 13.78,2.76,2.3,22,90,1.35,0.68,0.41,1.03,9.58,0.7,1.68,615
167 | 13.73,4.36,2.26,22.5,88,1.28,0.47,0.52,1.15,6.62,0.78,1.75,520
168 | 13.45,3.7,2.6,23,111,1.7,0.92,0.43,1.46,10.68,0.85,1.56,695
169 | 12.82,3.37,2.3,19.5,88,1.48,0.66,0.4,0.97,10.26,0.72,1.75,685
170 | 13.58,2.58,2.69,24.5,105,1.55,0.84,0.39,1.54,8.66,0.74,1.8,750
171 | 13.4,4.6,2.86,25,112,1.98,0.96,0.27,1.11,8.5,0.67,1.92,630
172 | 12.2,3.03,2.32,19,96,1.25,0.49,0.4,0.73,5.5,0.66,1.83,510
173 | 12.77,2.39,2.28,19.5,86,1.39,0.51,0.48,0.64,9.899999,0.57,1.63,470
174 | 14.16,2.51,2.48,20,91,1.68,0.7,0.44,1.24,9.7,0.62,1.71,660
175 | 13.71,5.65,2.45,20.5,95,1.68,0.61,0.52,1.06,7.7,0.64,1.74,740
176 | 13.4,3.91,2.48,23,102,1.8,0.75,0.43,1.41,7.3,0.7,1.56,750
177 | 13.27,4.28,2.26,20,120,1.59,0.69,0.43,1.35,10.2,0.59,1.56,835
178 | 13.17,2.59,2.37,20,120,1.65,0.68,0.53,1.46,9.3,0.6,1.62,840
179 | 14.13,4.1,2.74,24.5,96,2.05,0.76,0.56,1.35,9.2,0.61,1.6,560
--------------------------------------------------------------------------------
/data/Social_Network_Ads.csv:
--------------------------------------------------------------------------------
1 | User ID,Gender,Age,EstimatedSalary,Purchased
2 | 15624510,Male,19,19000,0
3 | 15810944,Male,35,20000,0
4 | 15668575,Female,26,43000,0
5 | 15603246,Female,27,57000,0
6 | 15804002,Male,19,76000,0
7 | 15728773,Male,27,58000,0
8 | 15598044,Female,27,84000,0
9 | 15694829,Female,32,150000,1
10 | 15600575,Male,25,33000,0
11 | 15727311,Female,35,65000,0
12 | 15570769,Female,26,80000,0
13 | 15606274,Female,26,52000,0
14 | 15746139,Male,20,86000,0
15 | 15704987,Male,32,18000,0
16 | 15628972,Male,18,82000,0
17 | 15697686,Male,29,80000,0
18 | 15733883,Male,47,25000,1
19 | 15617482,Male,45,26000,1
20 | 15704583,Male,46,28000,1
21 | 15621083,Female,48,29000,1
22 | 15649487,Male,45,22000,1
23 | 15736760,Female,47,49000,1
24 | 15714658,Male,48,41000,1
25 | 15599081,Female,45,22000,1
26 | 15705113,Male,46,23000,1
27 | 15631159,Male,47,20000,1
28 | 15792818,Male,49,28000,1
29 | 15633531,Female,47,30000,1
30 | 15744529,Male,29,43000,0
31 | 15669656,Male,31,18000,0
32 | 15581198,Male,31,74000,0
33 | 15729054,Female,27,137000,1
34 | 15573452,Female,21,16000,0
35 | 15776733,Female,28,44000,0
36 | 15724858,Male,27,90000,0
37 | 15713144,Male,35,27000,0
38 | 15690188,Female,33,28000,0
39 | 15689425,Male,30,49000,0
40 | 15671766,Female,26,72000,0
41 | 15782806,Female,27,31000,0
42 | 15764419,Female,27,17000,0
43 | 15591915,Female,33,51000,0
44 | 15772798,Male,35,108000,0
45 | 15792008,Male,30,15000,0
46 | 15715541,Female,28,84000,0
47 | 15639277,Male,23,20000,0
48 | 15798850,Male,25,79000,0
49 | 15776348,Female,27,54000,0
50 | 15727696,Male,30,135000,1
51 | 15793813,Female,31,89000,0
52 | 15694395,Female,24,32000,0
53 | 15764195,Female,18,44000,0
54 | 15744919,Female,29,83000,0
55 | 15671655,Female,35,23000,0
56 | 15654901,Female,27,58000,0
57 | 15649136,Female,24,55000,0
58 | 15775562,Female,23,48000,0
59 | 15807481,Male,28,79000,0
60 | 15642885,Male,22,18000,0
61 | 15789109,Female,32,117000,0
62 | 15814004,Male,27,20000,0
63 | 15673619,Male,25,87000,0
64 | 15595135,Female,23,66000,0
65 | 15583681,Male,32,120000,1
66 | 15605000,Female,59,83000,0
67 | 15718071,Male,24,58000,0
68 | 15679760,Male,24,19000,0
69 | 15654574,Female,23,82000,0
70 | 15577178,Female,22,63000,0
71 | 15595324,Female,31,68000,0
72 | 15756932,Male,25,80000,0
73 | 15726358,Female,24,27000,0
74 | 15595228,Female,20,23000,0
75 | 15782530,Female,33,113000,0
76 | 15592877,Male,32,18000,0
77 | 15651983,Male,34,112000,1
78 | 15746737,Male,18,52000,0
79 | 15774179,Female,22,27000,0
80 | 15667265,Female,28,87000,0
81 | 15655123,Female,26,17000,0
82 | 15595917,Male,30,80000,0
83 | 15668385,Male,39,42000,0
84 | 15709476,Male,20,49000,0
85 | 15711218,Male,35,88000,0
86 | 15798659,Female,30,62000,0
87 | 15663939,Female,31,118000,1
88 | 15694946,Male,24,55000,0
89 | 15631912,Female,28,85000,0
90 | 15768816,Male,26,81000,0
91 | 15682268,Male,35,50000,0
92 | 15684801,Male,22,81000,0
93 | 15636428,Female,30,116000,0
94 | 15809823,Male,26,15000,0
95 | 15699284,Female,29,28000,0
96 | 15786993,Female,29,83000,0
97 | 15709441,Female,35,44000,0
98 | 15710257,Female,35,25000,0
99 | 15582492,Male,28,123000,1
100 | 15575694,Male,35,73000,0
101 | 15756820,Female,28,37000,0
102 | 15766289,Male,27,88000,0
103 | 15593014,Male,28,59000,0
104 | 15584545,Female,32,86000,0
105 | 15675949,Female,33,149000,1
106 | 15672091,Female,19,21000,0
107 | 15801658,Male,21,72000,0
108 | 15706185,Female,26,35000,0
109 | 15789863,Male,27,89000,0
110 | 15720943,Male,26,86000,0
111 | 15697997,Female,38,80000,0
112 | 15665416,Female,39,71000,0
113 | 15660200,Female,37,71000,0
114 | 15619653,Male,38,61000,0
115 | 15773447,Male,37,55000,0
116 | 15739160,Male,42,80000,0
117 | 15689237,Male,40,57000,0
118 | 15679297,Male,35,75000,0
119 | 15591433,Male,36,52000,0
120 | 15642725,Male,40,59000,0
121 | 15701962,Male,41,59000,0
122 | 15811613,Female,36,75000,0
123 | 15741049,Male,37,72000,0
124 | 15724423,Female,40,75000,0
125 | 15574305,Male,35,53000,0
126 | 15678168,Female,41,51000,0
127 | 15697020,Female,39,61000,0
128 | 15610801,Male,42,65000,0
129 | 15745232,Male,26,32000,0
130 | 15722758,Male,30,17000,0
131 | 15792102,Female,26,84000,0
132 | 15675185,Male,31,58000,0
133 | 15801247,Male,33,31000,0
134 | 15725660,Male,30,87000,0
135 | 15638963,Female,21,68000,0
136 | 15800061,Female,28,55000,0
137 | 15578006,Male,23,63000,0
138 | 15668504,Female,20,82000,0
139 | 15687491,Male,30,107000,1
140 | 15610403,Female,28,59000,0
141 | 15741094,Male,19,25000,0
142 | 15807909,Male,19,85000,0
143 | 15666141,Female,18,68000,0
144 | 15617134,Male,35,59000,0
145 | 15783029,Male,30,89000,0
146 | 15622833,Female,34,25000,0
147 | 15746422,Female,24,89000,0
148 | 15750839,Female,27,96000,1
149 | 15749130,Female,41,30000,0
150 | 15779862,Male,29,61000,0
151 | 15767871,Male,20,74000,0
152 | 15679651,Female,26,15000,0
153 | 15576219,Male,41,45000,0
154 | 15699247,Male,31,76000,0
155 | 15619087,Female,36,50000,0
156 | 15605327,Male,40,47000,0
157 | 15610140,Female,31,15000,0
158 | 15791174,Male,46,59000,0
159 | 15602373,Male,29,75000,0
160 | 15762605,Male,26,30000,0
161 | 15598840,Female,32,135000,1
162 | 15744279,Male,32,100000,1
163 | 15670619,Male,25,90000,0
164 | 15599533,Female,37,33000,0
165 | 15757837,Male,35,38000,0
166 | 15697574,Female,33,69000,0
167 | 15578738,Female,18,86000,0
168 | 15762228,Female,22,55000,0
169 | 15614827,Female,35,71000,0
170 | 15789815,Male,29,148000,1
171 | 15579781,Female,29,47000,0
172 | 15587013,Male,21,88000,0
173 | 15570932,Male,34,115000,0
174 | 15794661,Female,26,118000,0
175 | 15581654,Female,34,43000,0
176 | 15644296,Female,34,72000,0
177 | 15614420,Female,23,28000,0
178 | 15609653,Female,35,47000,0
179 | 15594577,Male,25,22000,0
180 | 15584114,Male,24,23000,0
181 | 15673367,Female,31,34000,0
182 | 15685576,Male,26,16000,0
183 | 15774727,Female,31,71000,0
184 | 15694288,Female,32,117000,1
185 | 15603319,Male,33,43000,0
186 | 15759066,Female,33,60000,0
187 | 15814816,Male,31,66000,0
188 | 15724402,Female,20,82000,0
189 | 15571059,Female,33,41000,0
190 | 15674206,Male,35,72000,0
191 | 15715160,Male,28,32000,0
192 | 15730448,Male,24,84000,0
193 | 15662067,Female,19,26000,0
194 | 15779581,Male,29,43000,0
195 | 15662901,Male,19,70000,0
196 | 15689751,Male,28,89000,0
197 | 15667742,Male,34,43000,0
198 | 15738448,Female,30,79000,0
199 | 15680243,Female,20,36000,0
200 | 15745083,Male,26,80000,0
201 | 15708228,Male,35,22000,0
202 | 15628523,Male,35,39000,0
203 | 15708196,Male,49,74000,0
204 | 15735549,Female,39,134000,1
205 | 15809347,Female,41,71000,0
206 | 15660866,Female,58,101000,1
207 | 15766609,Female,47,47000,0
208 | 15654230,Female,55,130000,1
209 | 15794566,Female,52,114000,0
210 | 15800890,Female,40,142000,1
211 | 15697424,Female,46,22000,0
212 | 15724536,Female,48,96000,1
213 | 15735878,Male,52,150000,1
214 | 15707596,Female,59,42000,0
215 | 15657163,Male,35,58000,0
216 | 15622478,Male,47,43000,0
217 | 15779529,Female,60,108000,1
218 | 15636023,Male,49,65000,0
219 | 15582066,Male,40,78000,0
220 | 15666675,Female,46,96000,0
221 | 15732987,Male,59,143000,1
222 | 15789432,Female,41,80000,0
223 | 15663161,Male,35,91000,1
224 | 15694879,Male,37,144000,1
225 | 15593715,Male,60,102000,1
226 | 15575002,Female,35,60000,0
227 | 15622171,Male,37,53000,0
228 | 15795224,Female,36,126000,1
229 | 15685346,Male,56,133000,1
230 | 15691808,Female,40,72000,0
231 | 15721007,Female,42,80000,1
232 | 15794253,Female,35,147000,1
233 | 15694453,Male,39,42000,0
234 | 15813113,Male,40,107000,1
235 | 15614187,Male,49,86000,1
236 | 15619407,Female,38,112000,0
237 | 15646227,Male,46,79000,1
238 | 15660541,Male,40,57000,0
239 | 15753874,Female,37,80000,0
240 | 15617877,Female,46,82000,0
241 | 15772073,Female,53,143000,1
242 | 15701537,Male,42,149000,1
243 | 15736228,Male,38,59000,0
244 | 15780572,Female,50,88000,1
245 | 15769596,Female,56,104000,1
246 | 15586996,Female,41,72000,0
247 | 15722061,Female,51,146000,1
248 | 15638003,Female,35,50000,0
249 | 15775590,Female,57,122000,1
250 | 15730688,Male,41,52000,0
251 | 15753102,Female,35,97000,1
252 | 15810075,Female,44,39000,0
253 | 15723373,Male,37,52000,0
254 | 15795298,Female,48,134000,1
255 | 15584320,Female,37,146000,1
256 | 15724161,Female,50,44000,0
257 | 15750056,Female,52,90000,1
258 | 15609637,Female,41,72000,0
259 | 15794493,Male,40,57000,0
260 | 15569641,Female,58,95000,1
261 | 15815236,Female,45,131000,1
262 | 15811177,Female,35,77000,0
263 | 15680587,Male,36,144000,1
264 | 15672821,Female,55,125000,1
265 | 15767681,Female,35,72000,0
266 | 15600379,Male,48,90000,1
267 | 15801336,Female,42,108000,1
268 | 15721592,Male,40,75000,0
269 | 15581282,Male,37,74000,0
270 | 15746203,Female,47,144000,1
271 | 15583137,Male,40,61000,0
272 | 15680752,Female,43,133000,0
273 | 15688172,Female,59,76000,1
274 | 15791373,Male,60,42000,1
275 | 15589449,Male,39,106000,1
276 | 15692819,Female,57,26000,1
277 | 15727467,Male,57,74000,1
278 | 15734312,Male,38,71000,0
279 | 15764604,Male,49,88000,1
280 | 15613014,Female,52,38000,1
281 | 15759684,Female,50,36000,1
282 | 15609669,Female,59,88000,1
283 | 15685536,Male,35,61000,0
284 | 15750447,Male,37,70000,1
285 | 15663249,Female,52,21000,1
286 | 15638646,Male,48,141000,0
287 | 15734161,Female,37,93000,1
288 | 15631070,Female,37,62000,0
289 | 15761950,Female,48,138000,1
290 | 15649668,Male,41,79000,0
291 | 15713912,Female,37,78000,1
292 | 15586757,Male,39,134000,1
293 | 15596522,Male,49,89000,1
294 | 15625395,Male,55,39000,1
295 | 15760570,Male,37,77000,0
296 | 15566689,Female,35,57000,0
297 | 15725794,Female,36,63000,0
298 | 15673539,Male,42,73000,1
299 | 15705298,Female,43,112000,1
300 | 15675791,Male,45,79000,0
301 | 15747043,Male,46,117000,1
302 | 15736397,Female,58,38000,1
303 | 15678201,Male,48,74000,1
304 | 15720745,Female,37,137000,1
305 | 15637593,Male,37,79000,1
306 | 15598070,Female,40,60000,0
307 | 15787550,Male,42,54000,0
308 | 15603942,Female,51,134000,0
309 | 15733973,Female,47,113000,1
310 | 15596761,Male,36,125000,1
311 | 15652400,Female,38,50000,0
312 | 15717893,Female,42,70000,0
313 | 15622585,Male,39,96000,1
314 | 15733964,Female,38,50000,0
315 | 15753861,Female,49,141000,1
316 | 15747097,Female,39,79000,0
317 | 15594762,Female,39,75000,1
318 | 15667417,Female,54,104000,1
319 | 15684861,Male,35,55000,0
320 | 15742204,Male,45,32000,1
321 | 15623502,Male,36,60000,0
322 | 15774872,Female,52,138000,1
323 | 15611191,Female,53,82000,1
324 | 15674331,Male,41,52000,0
325 | 15619465,Female,48,30000,1
326 | 15575247,Female,48,131000,1
327 | 15695679,Female,41,60000,0
328 | 15713463,Male,41,72000,0
329 | 15785170,Female,42,75000,0
330 | 15796351,Male,36,118000,1
331 | 15639576,Female,47,107000,1
332 | 15693264,Male,38,51000,0
333 | 15589715,Female,48,119000,1
334 | 15769902,Male,42,65000,0
335 | 15587177,Male,40,65000,0
336 | 15814553,Male,57,60000,1
337 | 15601550,Female,36,54000,0
338 | 15664907,Male,58,144000,1
339 | 15612465,Male,35,79000,0
340 | 15810800,Female,38,55000,0
341 | 15665760,Male,39,122000,1
342 | 15588080,Female,53,104000,1
343 | 15776844,Male,35,75000,0
344 | 15717560,Female,38,65000,0
345 | 15629739,Female,47,51000,1
346 | 15729908,Male,47,105000,1
347 | 15716781,Female,41,63000,0
348 | 15646936,Male,53,72000,1
349 | 15768151,Female,54,108000,1
350 | 15579212,Male,39,77000,0
351 | 15721835,Male,38,61000,0
352 | 15800515,Female,38,113000,1
353 | 15591279,Male,37,75000,0
354 | 15587419,Female,42,90000,1
355 | 15750335,Female,37,57000,0
356 | 15699619,Male,36,99000,1
357 | 15606472,Male,60,34000,1
358 | 15778368,Male,54,70000,1
359 | 15671387,Female,41,72000,0
360 | 15573926,Male,40,71000,1
361 | 15709183,Male,42,54000,0
362 | 15577514,Male,43,129000,1
363 | 15778830,Female,53,34000,1
364 | 15768072,Female,47,50000,1
365 | 15768293,Female,42,79000,0
366 | 15654456,Male,42,104000,1
367 | 15807525,Female,59,29000,1
368 | 15574372,Female,58,47000,1
369 | 15671249,Male,46,88000,1
370 | 15779744,Male,38,71000,0
371 | 15624755,Female,54,26000,1
372 | 15611430,Female,60,46000,1
373 | 15774744,Male,60,83000,1
374 | 15629885,Female,39,73000,0
375 | 15708791,Male,59,130000,1
376 | 15793890,Female,37,80000,0
377 | 15646091,Female,46,32000,1
378 | 15596984,Female,46,74000,0
379 | 15800215,Female,42,53000,0
380 | 15577806,Male,41,87000,1
381 | 15749381,Female,58,23000,1
382 | 15683758,Male,42,64000,0
383 | 15670615,Male,48,33000,1
384 | 15715622,Female,44,139000,1
385 | 15707634,Male,49,28000,1
386 | 15806901,Female,57,33000,1
387 | 15775335,Male,56,60000,1
388 | 15724150,Female,49,39000,1
389 | 15627220,Male,39,71000,0
390 | 15672330,Male,47,34000,1
391 | 15668521,Female,48,35000,1
392 | 15807837,Male,48,33000,1
393 | 15592570,Male,47,23000,1
394 | 15748589,Female,45,45000,1
395 | 15635893,Male,60,42000,1
396 | 15757632,Female,39,59000,0
397 | 15691863,Female,46,41000,1
398 | 15706071,Male,51,23000,1
399 | 15654296,Female,50,20000,1
400 | 15755018,Male,36,33000,0
401 | 15594041,Female,49,36000,1
--------------------------------------------------------------------------------
/data/x-y.csv:
--------------------------------------------------------------------------------
1 | x,y
2 | 24,21.54945196
3 | 50,47.46446305
4 | 15,17.21865634
5 | 38,36.58639803
6 | 87,87.28898389
7 | 36,32.46387493
8 | 12,10.78089683
9 | 81,80.7633986
10 | 25,24.61215147
11 | 5,6.963319071
12 | 16,11.23757338
13 | 16,13.53290206
14 | 24,24.60323899
15 | 39,39.40049976
16 | 54,48.43753838
17 | 60,61.69900319
18 | 26,26.92832418
19 | 73,70.4052055
20 | 29,29.34092408
21 | 31,25.30895192
22 | 68,69.02934339
23 | 87,84.99484703
24 | 58,57.04310305
25 | 54,50.5921991
26 | 84,83.02772202
27 | 58,57.05752706
28 | 49,47.95883341
29 | 20,24.34226432
30 | 90,94.68488281
31 | 48,48.03970696
32 | 4,7.08132338
33 | 25,21.99239907
34 | 42,42.33151664
35 | 0,0.329089443
36 | 60,61.92303698
37 | 93,91.17716423
38 | 39,39.45358014
39 | 7,5.996069607
40 | 21,22.59015942
41 | 68,61.18044414
42 | 84,85.02778957
43 | 0,-1.28631089
44 | 58,61.94273962
45 | 19,21.96033347
46 | 36,33.66194193
47 | 19,17.60946242
48 | 59,58.5630564
49 | 51,52.82390762
50 | 19,22.1363481
51 | 33,35.07467353
52 | 85,86.18822311
53 | 44,42.63227697
54 | 5,4.09817744
55 | 59,61.2229864
56 | 14,17.70677576
57 | 9,11.85312574
58 | 75,80.23051695
59 | 69,62.64931741
60 | 10,9.616859804
61 | 17,20.02797699
62 | 58,61.7510743
63 | 74,71.61010303
64 | 21,23.77154623
65 | 51,51.90142035
66 | 19,22.66073682
67 | 50,50.02897927
68 | 24,26.68794368
69 | 0,0.376911899
70 | 12,6.806419002
71 | 75,77.33986001
72 | 21,28.90260209
73 | 64,66.7346608
74 | 5,0.707510638
75 | 58,57.07748383
76 | 32,28.41453196
77 | 41,44.46272123
78 | 7,7.459605998
79 | 4,2.316708112
80 | 5,4.928546187
81 | 49,52.50336074
82 | 90,91.19109623
83 | 3,8.489164326
84 | 11,6.963371967
85 | 32,31.97989959
86 | 83,81.4281205
87 | 25,22.62365422
88 | 83,78.52505087
89 | 26,25.80714057
90 | 76,73.51081775
91 | 95,91.775467
92 | 53,49.21863516
93 | 77,80.50445387
94 | 42,50.05636123
95 | 25,25.46292549
96 | 54,55.32164264
97 | 55,59.1244888
98 | 0,1.100686692
99 | 73,71.98020786
100 | 35,30.13666408
101 | 86,83.88427405
102 | 90,89.91004752
103 | 13,8.335654576
104 | 46,47.88388961
105 | 46,45.00397413
106 | 32,31.15664574
107 | 8,9.190375682
108 | 71,74.83135003
109 | 28,30.23177607
110 | 24,24.21914027
111 | 56,57.87219151
112 | 49,50.61728392
113 | 79,78.67470043
114 | 90,86.236707
115 | 89,89.10409255
116 | 41,43.26595082
117 | 27,26.68273277
118 | 58,59.46383041
119 | 26,28.90055826
120 | 31,31.300416
121 | 70,71.1433266
122 | 71,68.4739206
123 | 39,39.98238856
124 | 7,4.075776144
125 | 48,47.85817542
126 | 56,51.20390217
127 | 45,43.9367213
128 | 41,38.13626679
129 | 3,3.574661632
130 | 37,36.4139958
131 | 24,22.21908523
132 | 68,63.5312572
133 | 47,49.86702787
134 | 27,21.53140009
135 | 68,64.05710234
136 | 74,70.77549842
137 | 95,92.15749762
138 | 79,81.22259156
139 | 21,25.10114067
140 | 95,94.08853397
141 | 54,53.25166165
142 | 56,59.16236621
143 | 80,75.24148428
144 | 26,28.22325833
145 | 25,25.33323728
146 | 8,6.364615703
147 | 95,95.4609216
148 | 94,88.64183756
149 | 54,58.70318693
150 | 7,6.815491279
151 | 99,99.40394676
152 | 36,32.77049249
153 | 48,47.0586788
154 | 65,60.53321778
155 | 42,40.30929858
156 | 93,89.42222685
157 | 86,86.82132066
158 | 26,26.11697543
159 | 51,53.26657596
160 | 100,96.62327888
161 | 94,95.78441027
162 | 6,6.047286687
163 | 24,24.47387908
164 | 75,75.96844763
165 | 7,3.829381009
166 | 53,52.51703683
167 | 73,72.80457527
168 | 16,14.10999096
169 | 80,80.86087062
170 | 77,77.01988215
171 | 89,86.26972444
172 | 80,77.13735466
173 | 55,51.47649476
174 | 19,17.34557531
175 | 56,57.72853572
176 | 47,44.15029394
177 | 56,59.24362743
178 | 2,-1.053275611
179 | 82,86.79002254
180 | 57,60.14031858
181 | 44,44.04222058
182 | 26,24.5227488
183 | 52,52.95305521
184 | 41,43.16133498
185 | 44,45.67562576
186 | 3,-2.830749501
187 | 31,29.19693178
188 | 97,96.49812401
189 | 21,22.5453232
190 | 17,20.10741433
191 | 7,4.035430253
192 | 61,61.14568518
193 | 10,13.97163653
194 | 52,55.34529893
195 | 10,12.18441166
196 | 65,64.00077658
197 | 71,70.3188322
198 | 4,-0.936895047
199 | 24,18.91422276
200 | 26,23.87590331
201 | 51,47.5775361
202 | 42,43.2736092
203 | 62,66.48278755
204 | 74,75.72605529
205 | 77,80.59643338
206 | 3,-2.235879852
207 | 50,47.04654956
208 | 24,21.59635575
209 | 37,32.87558963
210 | 58,57.95782956
211 | 52,52.24760027
212 | 27,24.58286902
213 | 14,12.12573805
214 | 100,100.0158026
215 | 3530.15736917
216 | 72,74.04682658
217 | 5,1.611947467
218 | 71,70.36836307
219 | 54,52.26831735
220 | 84,83.1286166
221 | 42,43.64765048
222 | 54,49.44785426
223 | 74,72.6356699
224 | 54,52.78130641
225 | 53,57.11195136
226 | 78,79.1050629
227 | 97,101.6228548
228 | 49,53.5825402
229 | 71,68.92139297
230 | 48,46.9666961
231 | 51,51.02642868
232 | 89,85.52073551
233 | 99,99.51685756
234 | 93,94.63911256
235 | 49,46.78357742
236 | 18,21.21321959
237 | 65,58.37266004
238 | 83,87.22059677
239 | 100,102.4967859
240 | 41,43.88314335
241 | 52,53.06655757
242 | 29,26.33464785
243 | 97,98.52008934
244 | 7,9.400497579
245 | 51,52.94026699
246 | 58,53.83020877
247 | 50,45.94511142
248 | 67,65.0132736
249 | 89,86.5069584
250 | 76,75.63280796
251 | 35,36.78035027
252 | 99,100.5328916
253 | 31,29.04466136
254 | 52,51.70352433
255 | 11,9.199954718
256 | 66,71.70015848
257 | 50,49.82634062
258 | 39,37.49971096
259 | 60,53.65084683
260 | 35,33.92561965
261 | 53,49.92639685
262 | 14,8.148154262
263 | 49,49.72359037
264 | 16,16.16712757
265 | 76,75.30033002
266 | 13,9.577368568
267 | 51,48.38088357
268 | 70,72.95331671
269 | 98,92.59573853
270 | 86,88.85523586
271 | 100,99.00361771
272 | 46,45.09439571
273 | 51,46.94362684
274 | 50,48.33449605
275 | 91,94.92329574
276 | 48,47.78165248
277 | 81,81.28960746
278 | 38,37.83155021
279 | 40,39.69185252
280 | 79,76.92664854
281 | 96,88.02990531
282 | 60,56.99178872
283 | 70,72.58929383
284 | 44,44.98103442
285 | 11,11.99017641
286 | 6,1.919513328
287 | 5,1.628826073
288 | 72,66.27746655
289 | 55,57.53887255
290 | 95,94.70291077
291 | 41,41.21469904
292 | 25,25.04169243
293 | 1,3.778209914
294 | 55,50.50711779
295 | 4,9.682408486
296 | 48,48.88147608
297 | 55,54.40348599
298 | 75,71.70233156
299 | 68,69.35848388
300 | 100,99.98491591
301 | 25,26.03323718
302 | 75,75.48910307
303 | 34,36.59623056
304 | 38,40.95102191
305 | 92,86.78316267
306 | 21,15.50701184
307 | 88,85.86077871
308 | 75,79.20610113
309 | 76,80.80643766
310 | 44,48.59717283
311 | 10,13.93415049
312 | 21,27.3051179
313 | 16,14.00226297
314 | 32,33.67416
315 | 13,13.11612884
316 | 26,24.76649193
317 | 70,73.68477876
318 | 77,77.53149541
319 | 77,76.24503196
320 | 88,88.0578931
321 | 35,35.02445799
322 | 24,21.65857739
323 | 17,17.33681562
324 | 91,94.36778957
325 | 32,33.43396307
326 | 36,32.52179399
327 | 89,90.57741298
328 | 69,71.25634126
329 | 30,31.23212856
330 | 6,5.398840061
331 | 22,18.56241391
332 | 67,71.97121038
333 | 9,5.225759566
334 | 74,73.5964342
335 | 50,49.76948983
336 | 85,82.69087513
337 | 3,1.652309089
338 | 0,-3.836652144
339 | 59,62.03811556
340 | 62,61.26514581
341 | 17,13.24991628
342 | 90,88.61672694
343 | 23,21.13655528
344 | 19,23.85017475
345 | 93,92.01203405
346 | 14,10.26712261
347 | 58,54.14681616
348 | 87,87.00645713
349 | 37,37.69447352
350 | 20,19.62278654
351 | 35,34.78561007
352 | 63,62.03190983
353 | 56,52.67003801
354 | 62,58.09031476
355 | 98,97.19448821
356 | 90,90.50155298
357 | 51,50.5123462
358 | 93,94.45211871
359 | 22,21.10794636
360 | 38,37.36298431
361 | 13,10.28574844
362 | 98,96.04932416
363 | 99,100.0953697
364 | 31,30.6063167
365 | 94,96.19000542
366 | 73,71.30828034
367 | 37,34.59311043
368 | 23,19.02332876
369 | 11,10.76669688
370 | 88,90.5799868
371 | 47,48.71787679
372 | 79,78.74139764
373 | 91,85.23492274
374 | 71,71.65789964
375 | 10,8.938990554
376 | 39,39.89606046
377 | 92,91.85091116
378 | 99,99.11200375
379 | 28,26.22196486
380 | 32,33.21584226
381 | 32,35.72392691
382 | 75,76.88604495
383 | 99,99.30874567
384 | 27,25.77161074
385 | 64,67.85169407
386 | 98,98.50371084
387 | 38,31.11331895
388 | 46,45.51171028
389 | 13,12.65537808
390 | 96,95.56065366
391 | 9,9.526431641
392 | 34,36.10893209
393 | 49,46.43628318
394 | 1,-3.83998112
395 | 50,48.97302037
396 | 94,93.25305499
397 | 27,23.47650968
398 | 20,17.13551132
399 | 12,14.55896144
400 | 45,41.53992729
401 | 91,91.64730552
402 | 61,66.16652565
403 | 10,9.230857489
404 | 47,47.41377893
405 | 33,34.76441561
406 | 84,86.10796637
407 | 24,21.81267954
408 | 48,48.89963951
409 | 48,46.78108638
410 | 9,12.91328547
411 | 93,94.55203143
412 | 99,94.97068753
413 | 8,2.379172481
414 | 20,21.47982988
415 | 38,35.79795462
416 | 78,82.0763803
417 | 81,78.87097714
418 | 42,47.2492425
419 | 95,96.18852325
420 | 78,78.38491927
421 | 44,42.94274064
422 | 68,64.43231595
423 | 87,84.21191485
424 | 58,57.3069783
425 | 52,52.52101436
426 | 26,25.7440243
427 | 75,75.42283401
428 | 48,53.62523007
429 | 71,75.14466308
430 | 77,74.12151511
431 | 34,36.24807243
432 | 24,20.21665898
433 | 70,66.94758118
434 | 29,34.07278254
435 | 76,73.13850045
436 | 98,92.85929155
437 | 28,28.36793808
438 | 87,85.59308727
439 | 9,10.68453755
440 | 87,86.10708624
441 | 33,33.22031418
442 | 64,66.09563422
443 | 17,19.30486546
444 | 49,48.84542083
445 | 95,93.73176312
446 | 75,75.45758614
447 | 89,91.24239226
448 | 81,87.15690853
449 | 25,25.53752833
450 | 47,46.06629478
451 | 50,49.65277661
452 | 5,7.382244165
453 | 68,71.11189935
454 | 84,83.50570521
455 | 8,8.791139893
456 | 41,33.30638903
457 | 26,26.40362524
458 | 89,91.72960726
459 | 78,82.53030719
460 | 34,36.67762733
461 | 92,86.98450355
462 | 27,32.34784175
463 | 12,16.78353974
464 | 2,1.576584383
465 | 22,17.4618141
466 | 0,2.116113029
467 | 26,24.34804332
468 | 50,48.29491198
469 | 84,85.52145453
470 | 70,73.71434779
471 | 66,63.15189497
472 | 42,38.46213684
473 | 19,19.47100788
474 | 94,94.07428225
475 | 71,67.92051286
476 | 19,22.58096241
477 | 16,16.01629889
478 | 49,48.43307886
479 | 29,29.6673599
480 | 29,26.65566328
481 | 86,86.28206739
482 | 50,50.82304924
483 | 86,88.57251713
484 | 30,32.59980745
485 | 23,21.02469368
486 | 20,20.72894979
487 | 16,20.38051187
488 | 57,57.25180153
489 | 8,6.967537054
490 | 8,10.240085
491 | 62,64.94841088
492 | 55,55.35893915
493 | 30,31.24365589
494 | 86,90.72048818
495 | 62,58.750127
496 | 51,55.85003198
497 | 61,60.19925869
498 | 86,85.03295412
499 | 61,60.38823085
500 | 21,18.44679787
501 | 81,82.18839247
502 | 97,94.2963344
503 | 5,7.682024586
504 | 61,61.01858089
505 | 47,53.60562216
506 | 98,94.47728801
507 | 30,27.9645947
508 | 63,62.55662585
509 | 0,1.406254414
510 | 100,101.7003412
511 | 18,13.84973988
512 | 30,28.99769315
513 | 98,99.04315693
514 | 16,15.56135514
515 | 22,24.63528393
516 | 55,53.98393374
517 | 43,42.91449728
518 | 75,74.29662112
519 | 91,91.17012883
520 | 46,49.42440876
521 | 85,82.47683519
522 | 55,56.15303953
523 | 36,37.17063131
524 | 49,46.36928662
525 | 94,97.02383456
526 | 43,40.83182104
527 | 22,24.08498313
528 | 37,41.14386358
529 | 24,21.97388066
530 | 95,100.740897
531 | 61,61.19971596
532 | 75,74.39517002
533 | 68,69.04377173
534 | 58,56.68718792
535 | 5,5.860391715
536 | 53,55.72021356
537 | 80,79.22021816
538 | 83,86.30177517
539 | 25,25.26971886
540 | 34,36.33294447
541 | 26,27.65574228
542 | 90,94.79690531
543 | 60,58.67366671
544 | 49,56.15934471
545 | 19,18.40919388
546 | 92,86.26936988
547 | 29,26.59436195
548 | 8,8.452520159
549 | 57,56.18131518
550 | 29,27.65452669
551 | 19,20.87391785
552 | 81,77.83354439
553 | 50,50.01787825
554 | 15,9.290856256
555 | 70,75.0284725
556 | 39,38.3037698
557 | 43,44.70786405
558 | 21,22.51016575
559 | 98,102.4959452
560 | 86,86.76845244
561 | 16,13.89748578
562 | 25,24.81824269
563 | 31,33.94224862
564 | 93,92.26970059
565 | 67,68.73365081
566 | 49,47.38516883
567 | 25,32.37576914
568 | 88,87.67388681
569 | 54,54.57648371
570 | 21,18.06450222
571 | 8,7.896539841
572 | 32,35.00341078
573 | 35,36.72823317
574 | 67,65.84975426
575 | 90,89.59295492
576 | 59,61.69026202
577 | 15,11.60499315
578 | 67,71.0826803
579 | 42,43.71901164
580 | 44,41.57421008
581 | 77,74.25552425
582 | 68,66.28310437
583 | 36,36.62438077
584 | 11,10.32374866
585 | 10,7.156457657
586 | 65,67.88603132
587 | 98,101.1097591
588 | 98,98.6132033
589 | 49,50.19083844
590 | 31,27.83896261
591 | 56,55.9249564
592 | 70,76.47340872
593 | 91,92.05756378
594 | 25,27.35245439
595 | 54,55.32083476
596 | 39,41.39990349
597 | 91,93.59057024
598 | 3,5.297054029
599 | 22,21.01429422
600 | 2,2.267059451
601 | 2,-0.121860502
602 | 65,66.49546208
603 | 71,73.83637687
604 | 42,42.10140878
605 | 76,77.35135732
606 | 43,41.02251779
607 | 8,14.75305272
608 | 86,83.28199022
609 | 87,89.93374342
610 | 3,2.286571686
611 | 58,55.61421297
612 | 62,62.15313408
613 | 89,89.55803528
614 | 95,94.00291863
615 | 28,26.78023848
616 | 0,-0.764537626
617 | 1,0.282866003
618 | 49,44.26800515
619 | 21,19.85174138
620 | 46,47.15960005
621 | 11,8.359366572
622 | 89,92.08157084
623 | 37,41.88734051
624 | 29,30.5413129
625 | 44,46.87654473
626 | 96,96.35659485
627 | 16,17.9170699
628 | 74,71.67949917
629 | 35,32.64997554
630 | 42,39.34482965
631 | 16,17.03401999
632 | 56,52.87524074
633 | 18,15.85414849
634 | 100,108.8716183
635 | 54,49.30477253
636 | 92,89.4749477
637 | 63,63.67348242
638 | 81,83.78410946
639 | 73,73.51136922
640 | 48,46.80297244
641 | 1,5.809946802
642 | 85,85.23027975
643 | 14,10.58213964
644 | 25,21.37698317
645 | 45,46.0537745
646 | 98,95.2389253
647 | 97,94.15149206
648 | 58,54.54868046
649 | 93,87.36260449
650 | 88,88.47741598
651 | 89,84.48045678
652 | 47,48.79647071
653 | 6,10.76675683
654 | 34,30.48882921
655 | 30,29.76846185
656 | 16,13.51574749
657 | 86,86.12955884
658 | 40,43.30022747
659 | 52,51.92110232
660 | 15,16.49185287
661 | 4,7.998073432
662 | 95,97.66689567
663 | 99,89.80545367
664 | 35,38.07166567
665 | 58,60.27852322
666 | 10,6.709195759
667 | 16,18.35488924
668 | 53,56.37058203
669 | 58,62.80064204
670 | 42,41.25155632
671 | 24,19.42637541
672 | 84,82.88935804
673 | 64,63.61364981
674 | 12,11.29627199
675 | 61,60.02274882
676 | 75,72.60339326
677 | 15,11.87964573
678 | 100,100.7012737
679 | 43,45.12420809
680 | 13,14.81106804
681 | 48,48.09368034
682 | 45,42.29145672
683 | 52,52.73389794
684 | 34,36.72396986
685 | 30,28.64535198
686 | 65,62.16675273
687 | 100,95.58459518
688 | 67,66.04325304
689 | 99,99.9566225
690 | 45,46.14941984
691 | 87,89.13754963
692 | 73,69.71787806
693 | 9,12.31736648
694 | 81,78.20296268
695 | 72,71.30995371
696 | 81,81.45544709
697 | 58,58.59500642
698 | 93,94.62509374
699 | 82,88.60376995
700 | 66,63.64868529
701 | 97,94.9752655
--------------------------------------------------------------------------------
/data/Realestate.csv:
--------------------------------------------------------------------------------
1 | No,X1 transaction date,X2 house age,X3 distance to the nearest MRT station,X4 number of convenience stores,X5 latitude,X6 longitude,Y house price of unit area
2 | 1,2012.917,32,84.87882,10,24.98298,121.54024,37.9
3 | 2,2012.917,19.5,306.5947,9,24.98034,121.53951,42.2
4 | 3,2013.583,13.3,561.9845,5,24.98746,121.54391,47.3
5 | 4,2013.500,13.3,561.9845,5,24.98746,121.54391,54.8
6 | 5,2012.833,5,390.5684,5,24.97937,121.54245,43.1
7 | 6,2012.667,7.1,2175.03,3,24.96305,121.51254,32.1
8 | 7,2012.667,34.5,623.4731,7,24.97933,121.53642,40.3
9 | 8,2013.417,20.3,287.6025,6,24.98042,121.54228,46.7
10 | 9,2013.500,31.7,5512.038,1,24.95095,121.48458,18.8
11 | 10,2013.417,17.9,1783.18,3,24.96731,121.51486,22.1
12 | 11,2013.083,34.8,405.2134,1,24.97349,121.53372,41.4
13 | 12,2013.333,6.3,90.45606,9,24.97433,121.5431,58.1
14 | 13,2012.917,13,492.2313,5,24.96515,121.53737,39.3
15 | 14,2012.667,20.4,2469.645,4,24.96108,121.51046,23.8
16 | 15,2013.500,13.2,1164.838,4,24.99156,121.53406,34.3
17 | 16,2013.583,35.7,579.2083,2,24.9824,121.54619,50.5
18 | 17,2013.250,0,292.9978,6,24.97744,121.54458,70.1
19 | 18,2012.750,17.7,350.8515,1,24.97544,121.53119,37.4
20 | 19,2013.417,16.9,368.1363,8,24.9675,121.54451,42.3
21 | 20,2012.667,1.5,23.38284,7,24.96772,121.54102,47.7
22 | 21,2013.417,4.5,2275.877,3,24.96314,121.51151,29.3
23 | 22,2013.417,10.5,279.1726,7,24.97528,121.54541,51.6
24 | 23,2012.917,14.7,1360.139,1,24.95204,121.54842,24.6
25 | 24,2013.083,10.1,279.1726,7,24.97528,121.54541,47.9
26 | 25,2013.000,39.6,480.6977,4,24.97353,121.53885,38.8
27 | 26,2013.083,29.3,1487.868,2,24.97542,121.51726,27
28 | 27,2012.667,3.1,383.8624,5,24.98085,121.54391,56.2
29 | 28,2013.250,10.4,276.449,5,24.95593,121.53913,33.6
30 | 29,2013.500,19.2,557.478,4,24.97419,121.53797,47
31 | 30,2013.083,7.1,451.2438,5,24.97563,121.54694,57.1
32 | 31,2013.500,25.9,4519.69,0,24.94826,121.49587,22.1
33 | 32,2012.750,29.6,769.4034,7,24.98281,121.53408,25
34 | 33,2012.750,37.9,488.5727,1,24.97349,121.53451,34.2
35 | 34,2013.250,16.5,323.655,6,24.97841,121.54281,49.3
36 | 35,2012.750,15.4,205.367,7,24.98419,121.54243,55.1
37 | 36,2013.500,13.9,4079.418,0,25.01459,121.51816,27.3
38 | 37,2012.917,14.7,1935.009,2,24.96386,121.51458,22.9
39 | 38,2013.167,12,1360.139,1,24.95204,121.54842,25.3
40 | 39,2012.667,3.1,577.9615,6,24.97201,121.54722,47.7
41 | 40,2013.167,16.2,289.3248,5,24.98203,121.54348,46.2
42 | 41,2013.000,13.6,4082.015,0,24.94155,121.50381,15.9
43 | 42,2013.500,16.8,4066.587,0,24.94297,121.50342,18.2
44 | 43,2013.417,36.1,519.4617,5,24.96305,121.53758,34.7
45 | 44,2012.750,34.4,512.7871,6,24.98748,121.54301,34.1
46 | 45,2013.583,2.7,533.4762,4,24.97445,121.54765,53.9
47 | 46,2013.083,36.6,488.8193,8,24.97015,121.54494,38.3
48 | 47,2013.417,21.7,463.9623,9,24.9703,121.54458,42
49 | 48,2013.583,35.9,640.7391,3,24.97563,121.53715,61.5
50 | 49,2013.417,24.2,4605.749,0,24.94684,121.49578,13.4
51 | 50,2012.667,29.4,4510.359,1,24.94925,121.49542,13.2
52 | 51,2013.417,21.7,512.5487,4,24.974,121.53842,44.2
53 | 52,2013.083,31.3,1758.406,1,24.95402,121.55282,20.7
54 | 53,2013.583,32.1,1438.579,3,24.97419,121.5175,27
55 | 54,2013.083,13.3,492.2313,5,24.96515,121.53737,38.9
56 | 55,2013.083,16.1,289.3248,5,24.98203,121.54348,51.7
57 | 56,2012.833,31.7,1160.632,0,24.94968,121.53009,13.7
58 | 57,2013.417,33.6,371.2495,8,24.97254,121.54059,41.9
59 | 58,2012.917,3.5,56.47425,7,24.95744,121.53711,53.5
60 | 59,2013.500,30.3,4510.359,1,24.94925,121.49542,22.6
61 | 60,2013.083,13.3,336.0532,5,24.95776,121.53438,42.4
62 | 61,2013.417,11,1931.207,2,24.96365,121.51471,21.3
63 | 62,2013.500,5.3,259.6607,6,24.97585,121.54516,63.2
64 | 63,2012.917,17.2,2175.877,3,24.96303,121.51254,27.7
65 | 64,2013.583,2.6,533.4762,4,24.97445,121.54765,55
66 | 65,2013.333,17.5,995.7554,0,24.96305,121.54915,25.3
67 | 66,2013.417,40.1,123.7429,8,24.97635,121.54329,44.3
68 | 67,2013.000,1,193.5845,6,24.96571,121.54089,50.7
69 | 68,2013.500,8.5,104.8101,5,24.96674,121.54067,56.8
70 | 69,2013.417,30.4,464.223,6,24.97964,121.53805,36.2
71 | 70,2012.833,12.5,561.9845,5,24.98746,121.54391,42
72 | 71,2013.583,6.6,90.45606,9,24.97433,121.5431,59
73 | 72,2013.083,35.5,640.7391,3,24.97563,121.53715,40.8
74 | 73,2013.583,32.5,424.5442,8,24.97587,121.53913,36.3
75 | 74,2013.167,13.8,4082.015,0,24.94155,121.50381,20
76 | 75,2012.917,6.8,379.5575,10,24.98343,121.53762,54.4
77 | 76,2013.500,12.3,1360.139,1,24.95204,121.54842,29.5
78 | 77,2013.583,35.9,616.4004,3,24.97723,121.53767,36.8
79 | 78,2012.833,20.5,2185.128,3,24.96322,121.51237,25.6
80 | 79,2012.917,38.2,552.4371,2,24.97598,121.53381,29.8
81 | 80,2013.000,18,1414.837,1,24.95182,121.54887,26.5
82 | 81,2013.500,11.8,533.4762,4,24.97445,121.54765,40.3
83 | 82,2013.000,30.8,377.7956,6,24.96427,121.53964,36.8
84 | 83,2013.083,13.2,150.9347,7,24.96725,121.54252,48.1
85 | 84,2012.917,25.3,2707.392,3,24.96056,121.50831,17.7
86 | 85,2013.083,15.1,383.2805,7,24.96735,121.54464,43.7
87 | 86,2012.750,0,338.9679,9,24.96853,121.54413,50.8
88 | 87,2012.833,1.8,1455.798,1,24.9512,121.549,27
89 | 88,2013.583,16.9,4066.587,0,24.94297,121.50342,18.3
90 | 89,2012.917,8.9,1406.43,0,24.98573,121.52758,48
91 | 90,2013.500,23,3947.945,0,24.94783,121.50243,25.3
92 | 91,2012.833,0,274.0144,1,24.9748,121.53059,45.4
93 | 92,2013.250,9.1,1402.016,0,24.98569,121.5276,43.2
94 | 93,2012.917,20.6,2469.645,4,24.96108,121.51046,21.8
95 | 94,2012.917,31.9,1146.329,0,24.9492,121.53076,16.1
96 | 95,2012.917,40.9,167.5989,5,24.9663,121.54026,41
97 | 96,2012.917,8,104.8101,5,24.96674,121.54067,51.8
98 | 97,2013.417,6.4,90.45606,9,24.97433,121.5431,59.5
99 | 98,2013.083,28.4,617.4424,3,24.97746,121.53299,34.6
100 | 99,2013.417,16.4,289.3248,5,24.98203,121.54348,51
101 | 100,2013.417,6.4,90.45606,9,24.97433,121.5431,62.2
102 | 101,2013.500,17.5,964.7496,4,24.98872,121.53411,38.2
103 | 102,2012.833,12.7,170.1289,1,24.97371,121.52984,32.9
104 | 103,2013.083,1.1,193.5845,6,24.96571,121.54089,54.4
105 | 104,2012.750,0,208.3905,6,24.95618,121.53844,45.7
106 | 105,2012.667,32.7,392.4459,6,24.96398,121.5425,30.5
107 | 106,2012.833,0,292.9978,6,24.97744,121.54458,71
108 | 107,2013.083,17.2,189.5181,8,24.97707,121.54308,47.1
109 | 108,2013.333,12.2,1360.139,1,24.95204,121.54842,26.6
110 | 109,2013.417,31.4,592.5006,2,24.9726,121.53561,34.1
111 | 110,2013.583,4,2147.376,3,24.96299,121.51284,28.4
112 | 111,2013.083,8.1,104.8101,5,24.96674,121.54067,51.6
113 | 112,2013.583,33.3,196.6172,7,24.97701,121.54224,39.4
114 | 113,2013.417,9.9,2102.427,3,24.96044,121.51462,23.1
115 | 114,2013.333,14.8,393.2606,6,24.96172,121.53812,7.6
116 | 115,2012.667,30.6,143.8383,8,24.98155,121.54142,53.3
117 | 116,2013.083,20.6,737.9161,2,24.98092,121.54739,46.4
118 | 117,2013.000,30.9,6396.283,1,24.94375,121.47883,12.2
119 | 118,2013.000,13.6,4197.349,0,24.93885,121.50383,13
120 | 119,2013.500,25.3,1583.722,3,24.96622,121.51709,30.6
121 | 120,2013.500,16.6,289.3248,5,24.98203,121.54348,59.6
122 | 121,2013.167,13.3,492.2313,5,24.96515,121.53737,31.3
123 | 122,2013.500,13.6,492.2313,5,24.96515,121.53737,48
124 | 123,2013.250,31.5,414.9476,4,24.98199,121.54464,32.5
125 | 124,2013.417,0,185.4296,0,24.9711,121.5317,45.5
126 | 125,2012.917,9.9,279.1726,7,24.97528,121.54541,57.4
127 | 126,2013.167,1.1,193.5845,6,24.96571,121.54089,48.6
128 | 127,2013.083,38.6,804.6897,4,24.97838,121.53477,62.9
129 | 128,2013.250,3.8,383.8624,5,24.98085,121.54391,55
130 | 129,2013.083,41.3,124.9912,6,24.96674,121.54039,60.7
131 | 130,2013.417,38.5,216.8329,7,24.98086,121.54162,41
132 | 131,2013.250,29.6,535.527,8,24.98092,121.53653,37.5
133 | 132,2013.500,4,2147.376,3,24.96299,121.51284,30.7
134 | 133,2013.167,26.6,482.7581,5,24.97433,121.53863,37.5
135 | 134,2012.833,18,373.3937,8,24.9866,121.54082,39.5
136 | 135,2012.667,33.4,186.9686,6,24.96604,121.54211,42.2
137 | 136,2012.917,18.9,1009.235,0,24.96357,121.54951,20.8
138 | 137,2012.750,11.4,390.5684,5,24.97937,121.54245,46.8
139 | 138,2013.500,13.6,319.0708,6,24.96495,121.54277,47.4
140 | 139,2013.167,10,942.4664,0,24.97843,121.52406,43.5
141 | 140,2012.667,12.9,492.2313,5,24.96515,121.53737,42.5
142 | 141,2013.250,16.2,289.3248,5,24.98203,121.54348,51.4
143 | 142,2013.333,5.1,1559.827,3,24.97213,121.51627,28.9
144 | 143,2013.417,19.8,640.6071,5,24.97017,121.54647,37.5
145 | 144,2013.500,13.6,492.2313,5,24.96515,121.53737,40.1
146 | 145,2013.083,11.9,1360.139,1,24.95204,121.54842,28.4
147 | 146,2012.917,2.1,451.2438,5,24.97563,121.54694,45.5
148 | 147,2012.750,0,185.4296,0,24.9711,121.5317,52.2
149 | 148,2012.750,3.2,489.8821,8,24.97017,121.54494,43.2
150 | 149,2013.500,16.4,3780.59,0,24.93293,121.51203,45.1
151 | 150,2012.667,34.9,179.4538,8,24.97349,121.54245,39.7
152 | 151,2013.250,35.8,170.7311,7,24.96719,121.54269,48.5
153 | 152,2013.500,4.9,387.7721,9,24.98118,121.53788,44.7
154 | 153,2013.333,12,1360.139,1,24.95204,121.54842,28.9
155 | 154,2013.250,6.5,376.1709,6,24.95418,121.53713,40.9
156 | 155,2013.500,16.9,4066.587,0,24.94297,121.50342,20.7
157 | 156,2013.167,13.8,4082.015,0,24.94155,121.50381,15.6
158 | 157,2013.583,30.7,1264.73,0,24.94883,121.52954,18.3
159 | 158,2013.250,16.1,815.9314,4,24.97886,121.53464,35.6
160 | 159,2013.000,11.6,390.5684,5,24.97937,121.54245,39.4
161 | 160,2012.667,15.5,815.9314,4,24.97886,121.53464,37.4
162 | 161,2012.917,3.5,49.66105,8,24.95836,121.53756,57.8
163 | 162,2013.417,19.2,616.4004,3,24.97723,121.53767,39.6
164 | 163,2012.750,16,4066.587,0,24.94297,121.50342,11.6
165 | 164,2013.500,8.5,104.8101,5,24.96674,121.54067,55.5
166 | 165,2012.833,0,185.4296,0,24.9711,121.5317,55.2
167 | 166,2012.917,13.7,1236.564,1,24.97694,121.55391,30.6
168 | 167,2013.417,0,292.9978,6,24.97744,121.54458,73.6
169 | 168,2013.417,28.2,330.0854,8,24.97408,121.54011,43.4
170 | 169,2013.083,27.6,515.1122,5,24.96299,121.5432,37.4
171 | 170,2013.417,8.4,1962.628,1,24.95468,121.55481,23.5
172 | 171,2013.333,24,4527.687,0,24.94741,121.49628,14.4
173 | 172,2013.083,3.6,383.8624,5,24.98085,121.54391,58.8
174 | 173,2013.583,6.6,90.45606,9,24.97433,121.5431,58.1
175 | 174,2013.083,41.3,401.8807,4,24.98326,121.5446,35.1
176 | 175,2013.417,4.3,432.0385,7,24.9805,121.53778,45.2
177 | 176,2013.083,30.2,472.1745,3,24.97005,121.53758,36.5
178 | 177,2012.833,13.9,4573.779,0,24.94867,121.49507,19.2
179 | 178,2013.083,33,181.0766,9,24.97697,121.54262,42
180 | 179,2013.500,13.1,1144.436,4,24.99176,121.53456,36.7
181 | 180,2013.083,14,438.8513,1,24.97493,121.5273,42.6
182 | 181,2012.667,26.9,4449.27,0,24.94898,121.49621,15.5
183 | 182,2013.167,11.6,201.8939,8,24.98489,121.54121,55.9
184 | 183,2013.500,13.5,2147.376,3,24.96299,121.51284,23.6
185 | 184,2013.500,17,4082.015,0,24.94155,121.50381,18.8
186 | 185,2012.750,14.1,2615.465,0,24.95495,121.56174,21.8
187 | 186,2012.750,31.4,1447.286,3,24.97285,121.5173,21.5
188 | 187,2013.167,20.9,2185.128,3,24.96322,121.51237,25.7
189 | 188,2013.000,8.9,3078.176,0,24.95464,121.56627,22
190 | 189,2012.917,34.8,190.0392,8,24.97707,121.54312,44.3
191 | 190,2012.917,16.3,4066.587,0,24.94297,121.50342,20.5
192 | 191,2013.500,35.3,616.5735,8,24.97945,121.53642,42.3
193 | 192,2013.167,13.2,750.0704,2,24.97371,121.54951,37.8
194 | 193,2013.167,43.8,57.58945,7,24.9675,121.54069,42.7
195 | 194,2013.417,9.7,421.479,5,24.98246,121.54477,49.3
196 | 195,2013.500,15.2,3771.895,0,24.93363,121.51158,29.3
197 | 196,2013.333,15.2,461.1016,5,24.95425,121.5399,34.6
198 | 197,2013.000,22.8,707.9067,2,24.981,121.54713,36.6
199 | 198,2013.250,34.4,126.7286,8,24.96881,121.54089,48.2
200 | 199,2013.083,34,157.6052,7,24.96628,121.54196,39.1
201 | 200,2013.417,18.2,451.6419,8,24.96945,121.5449,31.6
202 | 201,2013.417,17.4,995.7554,0,24.96305,121.54915,25.5
203 | 202,2013.417,13.1,561.9845,5,24.98746,121.54391,45.9
204 | 203,2012.917,38.3,642.6985,3,24.97559,121.53713,31.5
205 | 204,2012.667,15.6,289.3248,5,24.98203,121.54348,46.1
206 | 205,2013.000,18,1414.837,1,24.95182,121.54887,26.6
207 | 206,2013.083,12.8,1449.722,3,24.97289,121.51728,21.4
208 | 207,2013.250,22.2,379.5575,10,24.98343,121.53762,44
209 | 208,2013.083,38.5,665.0636,3,24.97503,121.53692,34.2
210 | 209,2012.750,11.5,1360.139,1,24.95204,121.54842,26.2
211 | 210,2012.833,34.8,175.6294,8,24.97347,121.54271,40.9
212 | 211,2013.500,5.2,390.5684,5,24.97937,121.54245,52.2
213 | 212,2013.083,0,274.0144,1,24.9748,121.53059,43.5
214 | 213,2013.333,17.6,1805.665,2,24.98672,121.52091,31.1
215 | 214,2013.083,6.2,90.45606,9,24.97433,121.5431,58
216 | 215,2013.583,18.1,1783.18,3,24.96731,121.51486,20.9
217 | 216,2013.333,19.2,383.7129,8,24.972,121.54477,48.1
218 | 217,2013.250,37.8,590.9292,1,24.97153,121.53559,39.7
219 | 218,2012.917,28,372.6242,6,24.97838,121.54119,40.8
220 | 219,2013.417,13.6,492.2313,5,24.96515,121.53737,43.8
221 | 220,2012.750,29.3,529.7771,8,24.98102,121.53655,40.2
222 | 221,2013.333,37.2,186.5101,9,24.97703,121.54265,78.3
223 | 222,2013.333,9,1402.016,0,24.98569,121.5276,38.5
224 | 223,2013.583,30.6,431.1114,10,24.98123,121.53743,48.5
225 | 224,2013.250,9.1,1402.016,0,24.98569,121.5276,42.3
226 | 225,2013.333,34.5,324.9419,6,24.97814,121.5417,46
227 | 226,2013.250,1.1,193.5845,6,24.96571,121.54089,49
228 | 227,2013.000,16.5,4082.015,0,24.94155,121.50381,12.8
229 | 228,2012.917,32.4,265.0609,8,24.98059,121.53986,40.2
230 | 229,2013.417,11.9,3171.329,0,25.00115,121.51776,46.6
231 | 230,2013.583,31,1156.412,0,24.9489,121.53095,19
232 | 231,2013.500,4,2147.376,3,24.96299,121.51284,33.4
233 | 232,2012.833,16.2,4074.736,0,24.94235,121.50357,14.7
234 | 233,2012.917,27.1,4412.765,1,24.95032,121.49587,17.4
235 | 234,2013.333,39.7,333.3679,9,24.98016,121.53932,32.4
236 | 235,2013.250,8,2216.612,4,24.96007,121.51361,23.9
237 | 236,2012.750,12.9,250.631,7,24.96606,121.54297,39.3
238 | 237,2013.167,3.6,373.8389,10,24.98322,121.53765,61.9
239 | 238,2013.167,13,732.8528,0,24.97668,121.52518,39
240 | 239,2013.083,12.8,732.8528,0,24.97668,121.52518,40.6
241 | 240,2013.500,18.1,837.7233,0,24.96334,121.54767,29.7
242 | 241,2013.083,11,1712.632,2,24.96412,121.5167,28.8
243 | 242,2013.500,13.7,250.631,7,24.96606,121.54297,41.4
244 | 243,2012.833,2,2077.39,3,24.96357,121.51329,33.4
245 | 244,2013.417,32.8,204.1705,8,24.98236,121.53923,48.2
246 | 245,2013.083,4.8,1559.827,3,24.97213,121.51627,21.7
247 | 246,2013.417,7.5,639.6198,5,24.97258,121.54814,40.8
248 | 247,2013.417,16.4,389.8219,6,24.96412,121.54273,40.6
249 | 248,2013.333,21.7,1055.067,0,24.96211,121.54928,23.1
250 | 249,2013.000,19,1009.235,0,24.96357,121.54951,22.3
251 | 250,2012.833,18,6306.153,1,24.95743,121.47516,15
252 | 251,2013.167,39.2,424.7132,7,24.97429,121.53917,30
253 | 252,2012.917,31.7,1159.454,0,24.9496,121.53018,13.8
254 | 253,2012.833,5.9,90.45606,9,24.97433,121.5431,52.7
255 | 254,2012.667,30.4,1735.595,2,24.96464,121.51623,25.9
256 | 255,2012.667,1.1,329.9747,5,24.98254,121.54395,51.8
257 | 256,2013.417,31.5,5512.038,1,24.95095,121.48458,17.4
258 | 257,2012.667,14.6,339.2289,1,24.97519,121.53151,26.5
259 | 258,2013.250,17.3,444.1334,1,24.97501,121.5273,43.9
260 | 259,2013.417,0,292.9978,6,24.97744,121.54458,63.3
261 | 260,2013.083,17.7,837.7233,0,24.96334,121.54767,28.8
262 | 261,2013.250,17,1485.097,4,24.97073,121.517,30.7
263 | 262,2013.167,16.2,2288.011,3,24.95885,121.51359,24.4
264 | 263,2012.917,15.9,289.3248,5,24.98203,121.54348,53
265 | 264,2013.417,3.9,2147.376,3,24.96299,121.51284,31.7
266 | 265,2013.167,32.6,493.657,7,24.96968,121.54522,40.6
267 | 266,2012.833,15.7,815.9314,4,24.97886,121.53464,38.1
268 | 267,2013.250,17.8,1783.18,3,24.96731,121.51486,23.7
269 | 268,2012.833,34.7,482.7581,5,24.97433,121.53863,41.1
270 | 269,2013.417,17.2,390.5684,5,24.97937,121.54245,40.1
271 | 270,2013.000,17.6,837.7233,0,24.96334,121.54767,23
272 | 271,2013.333,10.8,252.5822,1,24.9746,121.53046,117.5
273 | 272,2012.917,17.7,451.6419,8,24.96945,121.5449,26.5
274 | 273,2012.750,13,492.2313,5,24.96515,121.53737,40.5
275 | 274,2013.417,13.2,170.1289,1,24.97371,121.52984,29.3
276 | 275,2013.167,27.5,394.0173,7,24.97305,121.53994,41
277 | 276,2012.667,1.5,23.38284,7,24.96772,121.54102,49.7
278 | 277,2013.000,19.1,461.1016,5,24.95425,121.5399,34
279 | 278,2013.417,21.2,2185.128,3,24.96322,121.51237,27.7
280 | 279,2012.750,0,208.3905,6,24.95618,121.53844,44
281 | 280,2013.417,2.6,1554.25,3,24.97026,121.51642,31.1
282 | 281,2013.250,2.3,184.3302,6,24.96581,121.54086,45.4
283 | 282,2013.333,4.7,387.7721,9,24.98118,121.53788,44.8
284 | 283,2012.917,2,1455.798,1,24.9512,121.549,25.6
285 | 284,2013.417,33.5,1978.671,2,24.98674,121.51844,23.5
286 | 285,2012.917,15,383.2805,7,24.96735,121.54464,34.4
287 | 286,2013.167,30.1,718.2937,3,24.97509,121.53644,55.3
288 | 287,2012.917,5.9,90.45606,9,24.97433,121.5431,56.3
289 | 288,2013.000,19.2,461.1016,5,24.95425,121.5399,32.9
290 | 289,2013.583,16.6,323.6912,6,24.97841,121.5428,51
291 | 290,2013.333,13.9,289.3248,5,24.98203,121.54348,44.5
292 | 291,2013.083,37.7,490.3446,0,24.97217,121.53471,37
293 | 292,2012.833,3.4,56.47425,7,24.95744,121.53711,54.4
294 | 293,2013.083,17.5,395.6747,5,24.95674,121.534,24.5
295 | 294,2012.667,12.6,383.2805,7,24.96735,121.54464,42.5
296 | 295,2013.500,26.4,335.5273,6,24.9796,121.5414,38.1
297 | 296,2013.167,18.2,2179.59,3,24.96299,121.51252,21.8
298 | 297,2012.750,12.5,1144.436,4,24.99176,121.53456,34.1
299 | 298,2012.833,34.9,567.0349,4,24.97003,121.5458,28.5
300 | 299,2013.333,16.7,4082.015,0,24.94155,121.50381,16.7
301 | 300,2013.167,33.2,121.7262,10,24.98178,121.54059,46.1
302 | 301,2013.083,2.5,156.2442,4,24.96696,121.53992,36.9
303 | 302,2012.750,38,461.7848,0,24.97229,121.53445,35.7
304 | 303,2013.500,16.5,2288.011,3,24.95885,121.51359,23.2
305 | 304,2013.500,38.3,439.7105,0,24.97161,121.53423,38.4
306 | 305,2013.417,20,1626.083,3,24.96622,121.51668,29.4
307 | 306,2013.083,16.2,289.3248,5,24.98203,121.54348,55
308 | 307,2013.500,14.4,169.9803,1,24.97369,121.52979,50.2
309 | 308,2012.833,10.3,3079.89,0,24.9546,121.56627,24.7
310 | 309,2013.417,16.4,289.3248,5,24.98203,121.54348,53
311 | 310,2013.250,30.3,1264.73,0,24.94883,121.52954,19.1
312 | 311,2013.583,16.4,1643.499,2,24.95394,121.55174,24.7
313 | 312,2013.167,21.3,537.7971,4,24.97425,121.53814,42.2
314 | 313,2013.583,35.4,318.5292,9,24.97071,121.54069,78
315 | 314,2013.333,8.3,104.8101,5,24.96674,121.54067,42.8
316 | 315,2013.250,3.7,577.9615,6,24.97201,121.54722,41.6
317 | 316,2013.083,15.6,1756.411,2,24.9832,121.51812,27.3
318 | 317,2013.250,13.3,250.631,7,24.96606,121.54297,42
319 | 318,2012.750,15.6,752.7669,2,24.97795,121.53451,37.5
320 | 319,2013.333,7.1,379.5575,10,24.98343,121.53762,49.8
321 | 320,2013.250,34.6,272.6783,5,24.95562,121.53872,26.9
322 | 321,2012.750,13.5,4197.349,0,24.93885,121.50383,18.6
323 | 322,2012.917,16.9,964.7496,4,24.98872,121.53411,37.7
324 | 323,2013.000,12.9,187.4823,1,24.97388,121.52981,33.1
325 | 324,2013.417,28.6,197.1338,6,24.97631,121.54436,42.5
326 | 325,2012.667,12.4,1712.632,2,24.96412,121.5167,31.3
327 | 326,2013.083,36.6,488.8193,8,24.97015,121.54494,38.1
328 | 327,2013.500,4.1,56.47425,7,24.95744,121.53711,62.1
329 | 328,2013.417,3.5,757.3377,3,24.97538,121.54971,36.7
330 | 329,2012.833,15.9,1497.713,3,24.97003,121.51696,23.6
331 | 330,2013.000,13.6,4197.349,0,24.93885,121.50383,19.2
332 | 331,2013.083,32,1156.777,0,24.94935,121.53046,12.8
333 | 332,2013.333,25.6,4519.69,0,24.94826,121.49587,15.6
334 | 333,2013.167,39.8,617.7134,2,24.97577,121.53475,39.6
335 | 334,2012.750,7.8,104.8101,5,24.96674,121.54067,38.4
336 | 335,2012.917,30,1013.341,5,24.99006,121.5346,22.8
337 | 336,2013.583,27.3,337.6016,6,24.96431,121.54063,36.5
338 | 337,2012.833,5.1,1867.233,2,24.98407,121.51748,35.6
339 | 338,2012.833,31.3,600.8604,5,24.96871,121.54651,30.9
340 | 339,2012.917,31.5,258.186,9,24.96867,121.54331,36.3
341 | 340,2013.333,1.7,329.9747,5,24.98254,121.54395,50.4
342 | 341,2013.333,33.6,270.8895,0,24.97281,121.53265,42.9
343 | 342,2013.000,13,750.0704,2,24.97371,121.54951,37
344 | 343,2012.667,5.7,90.45606,9,24.97433,121.5431,53.5
345 | 344,2013.000,33.5,563.2854,8,24.98223,121.53597,46.6
346 | 345,2013.500,34.6,3085.17,0,24.998,121.5155,41.2
347 | 346,2012.667,0,185.4296,0,24.9711,121.5317,37.9
348 | 347,2013.417,13.2,1712.632,2,24.96412,121.5167,30.8
349 | 348,2013.583,17.4,6488.021,1,24.95719,121.47353,11.2
350 | 349,2012.833,4.6,259.6607,6,24.97585,121.54516,53.7
351 | 350,2012.750,7.8,104.8101,5,24.96674,121.54067,47
352 | 351,2013.000,13.2,492.2313,5,24.96515,121.53737,42.3
353 | 352,2012.833,4,2180.245,3,24.96324,121.51241,28.6
354 | 353,2012.833,18.4,2674.961,3,24.96143,121.50827,25.7
355 | 354,2013.500,4.1,2147.376,3,24.96299,121.51284,31.3
356 | 355,2013.417,12.2,1360.139,1,24.95204,121.54842,30.1
357 | 356,2013.250,3.8,383.8624,5,24.98085,121.54391,60.7
358 | 357,2012.833,10.3,211.4473,1,24.97417,121.52999,45.3
359 | 358,2013.417,0,338.9679,9,24.96853,121.54413,44.9
360 | 359,2013.167,1.1,193.5845,6,24.96571,121.54089,45.1
361 | 360,2013.500,5.6,2408.993,0,24.95505,121.55964,24.7
362 | 361,2012.667,32.9,87.30222,10,24.983,121.54022,47.1
363 | 362,2013.083,41.4,281.205,8,24.97345,121.54093,63.3
364 | 363,2013.417,17.1,967.4,4,24.98872,121.53408,40
365 | 364,2013.500,32.3,109.9455,10,24.98182,121.54086,48
366 | 365,2013.417,35.3,614.1394,7,24.97913,121.53666,33.1
367 | 366,2012.917,17.3,2261.432,4,24.96182,121.51222,29.5
368 | 367,2012.750,14.2,1801.544,1,24.95153,121.55254,24.8
369 | 368,2012.833,15,1828.319,2,24.96464,121.51531,20.9
370 | 369,2013.417,18.2,350.8515,1,24.97544,121.53119,43.1
371 | 370,2012.667,20.2,2185.128,3,24.96322,121.51237,22.8
372 | 371,2012.750,15.9,289.3248,5,24.98203,121.54348,42.1
373 | 372,2013.500,4.1,312.8963,5,24.95591,121.53956,51.7
374 | 373,2013.000,33.9,157.6052,7,24.96628,121.54196,41.5
375 | 374,2013.083,0,274.0144,1,24.9748,121.53059,52.2
376 | 375,2013.250,5.4,390.5684,5,24.97937,121.54245,49.5
377 | 376,2013.250,21.7,1157.988,0,24.96165,121.55011,23.8
378 | 377,2013.417,14.7,1717.193,2,24.96447,121.51649,30.5
379 | 378,2013.333,3.9,49.66105,8,24.95836,121.53756,56.8
380 | 379,2013.333,37.3,587.8877,8,24.97077,121.54634,37.4
381 | 380,2013.333,0,292.9978,6,24.97744,121.54458,69.7
382 | 381,2013.333,14.1,289.3248,5,24.98203,121.54348,53.3
383 | 382,2013.417,8,132.5469,9,24.98298,121.53981,47.3
384 | 383,2013.000,16.3,3529.564,0,24.93207,121.51597,29.3
385 | 384,2012.667,29.1,506.1144,4,24.97845,121.53889,40.3
386 | 385,2012.750,16.1,4066.587,0,24.94297,121.50342,12.9
387 | 386,2013.000,18.3,82.88643,10,24.983,121.54026,46.6
388 | 387,2012.833,0,185.4296,0,24.9711,121.5317,55.3
389 | 388,2013.250,16.2,2103.555,3,24.96042,121.51462,25.6
390 | 389,2013.500,10.4,2251.938,4,24.95957,121.51353,27.3
391 | 390,2013.250,40.9,122.3619,8,24.96756,121.5423,67.7
392 | 391,2013.500,32.8,377.8302,9,24.97151,121.5435,38.6
393 | 392,2013.583,6.2,1939.749,1,24.95155,121.55387,31.3
394 | 393,2013.083,42.7,443.802,6,24.97927,121.53874,35.3
395 | 394,2013.000,16.9,967.4,4,24.98872,121.53408,40.3
396 | 395,2013.500,32.6,4136.271,1,24.95544,121.4963,24.7
397 | 396,2012.917,21.2,512.5487,4,24.974,121.53842,42.5
398 | 397,2012.667,37.1,918.6357,1,24.97198,121.55063,31.9
399 | 398,2013.417,13.1,1164.838,4,24.99156,121.53406,32.2
400 | 399,2013.417,14.7,1717.193,2,24.96447,121.51649,23
401 | 400,2012.917,12.7,170.1289,1,24.97371,121.52984,37.3
402 | 401,2013.250,26.8,482.7581,5,24.97433,121.53863,35.5
403 | 402,2013.083,7.6,2175.03,3,24.96305,121.51254,27.7
404 | 403,2012.833,12.7,187.4823,1,24.97388,121.52981,28.5
405 | 404,2012.667,30.9,161.942,9,24.98353,121.53966,39.7
406 | 405,2013.333,16.4,289.3248,5,24.98203,121.54348,41.2
407 | 406,2012.667,23,130.9945,6,24.95663,121.53765,37.2
408 | 407,2013.167,1.9,372.1386,7,24.97293,121.54026,40.5
409 | 408,2013.000,5.2,2408.993,0,24.95505,121.55964,22.3
410 | 409,2013.417,18.5,2175.744,3,24.9633,121.51243,28.1
411 | 410,2013.000,13.7,4082.015,0,24.94155,121.50381,15.4
412 | 411,2012.667,5.6,90.45606,9,24.97433,121.5431,50
413 | 412,2013.250,18.8,390.9696,7,24.97923,121.53986,40.6
414 | 413,2013.000,8.1,104.8101,5,24.96674,121.54067,52.5
415 | 414,2013.500,6.5,90.45606,9,24.97433,121.5431,63.9
416 |
--------------------------------------------------------------------------------
/week 2.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Welcome to Week 2"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "This week we'll learn about Feature Engineering. This is the art that helps achieve great accuracy in Machine Learning. \n",
15 | "There are a number of things we need to keep in mind before we send in our data to formulas. \n",
16 | "\n",
17 | "No null data, and no strings presence in the dataset is must, but a number of other factors also affect our data. More than 40% of the time is consumed in this step by most professionals as well, so do refer blogs to learn more. \n",
18 | "Let's get started"
19 | ]
20 | },
21 | {
22 | "cell_type": "code",
23 | "execution_count": 1,
24 | "metadata": {},
25 | "outputs": [],
26 | "source": [
27 | "import pandas as pd\n",
28 | "import numpy as np"
29 | ]
30 | },
31 | {
32 | "cell_type": "code",
33 | "execution_count": 2,
34 | "metadata": {},
35 | "outputs": [
36 | {
37 | "data": {
38 | "text/html": [
39 | "
\n",
40 | "\n",
53 | "
\n",
54 | " \n",
55 | " \n",
56 | " | \n",
57 | " City | \n",
58 | " Experience | \n",
59 | " Salary | \n",
60 | " Promotion | \n",
61 | "
\n",
62 | " \n",
63 | " \n",
64 | " \n",
65 | " | 0 | \n",
66 | " Delhi | \n",
67 | " 4.0 | \n",
68 | " 55000.0 | \n",
69 | " No | \n",
70 | "
\n",
71 | " \n",
72 | " | 1 | \n",
73 | " Mumbai | \n",
74 | " 2.0 | \n",
75 | " 20000.0 | \n",
76 | " Yes | \n",
77 | "
\n",
78 | " \n",
79 | " | 2 | \n",
80 | " Agra | \n",
81 | " 3.0 | \n",
82 | " 30000.0 | \n",
83 | " No | \n",
84 | "
\n",
85 | " \n",
86 | " | 3 | \n",
87 | " Mumbai | \n",
88 | " 8.0 | \n",
89 | " 72000.0 | \n",
90 | " No | \n",
91 | "
\n",
92 | " \n",
93 | " | 4 | \n",
94 | " Agra | \n",
95 | " 4.0 | \n",
96 | " NaN | \n",
97 | " Yes | \n",
98 | "
\n",
99 | " \n",
100 | "
\n",
101 | "
"
102 | ],
103 | "text/plain": [
104 | " City Experience Salary Promotion\n",
105 | "0 Delhi 4.0 55000.0 No\n",
106 | "1 Mumbai 2.0 20000.0 Yes\n",
107 | "2 Agra 3.0 30000.0 No\n",
108 | "3 Mumbai 8.0 72000.0 No\n",
109 | "4 Agra 4.0 NaN Yes"
110 | ]
111 | },
112 | "execution_count": 2,
113 | "metadata": {},
114 | "output_type": "execute_result"
115 | }
116 | ],
117 | "source": [
118 | "data=pd.read_csv(\"data/Data.csv\")\n",
119 | "data.head()"
120 | ]
121 | },
122 | {
123 | "cell_type": "markdown",
124 | "metadata": {},
125 | "source": [
126 | "If you look at the dataset you'll see we have missing values and you know how to deal with it. \n",
127 | "There are also new methods which I'll guide you through this week."
128 | ]
129 | },
130 | {
131 | "cell_type": "code",
132 | "execution_count": 3,
133 | "metadata": {},
134 | "outputs": [
135 | {
136 | "data": {
137 | "text/plain": [
138 | "City 1\n",
139 | "Experience 2\n",
140 | "Salary 1\n",
141 | "Promotion 0\n",
142 | "dtype: int64"
143 | ]
144 | },
145 | "execution_count": 3,
146 | "metadata": {},
147 | "output_type": "execute_result"
148 | }
149 | ],
150 | "source": [
151 | "data.isna().sum()"
152 | ]
153 | },
154 | {
155 | "cell_type": "code",
156 | "execution_count": 4,
157 | "metadata": {},
158 | "outputs": [
159 | {
160 | "data": {
161 | "text/html": [
162 | "\n",
163 | "\n",
176 | "
\n",
177 | " \n",
178 | " \n",
179 | " | \n",
180 | " City | \n",
181 | " Experience | \n",
182 | " Salary | \n",
183 | " Promotion | \n",
184 | "
\n",
185 | " \n",
186 | " \n",
187 | " \n",
188 | " | 0 | \n",
189 | " Delhi | \n",
190 | " 4.0 | \n",
191 | " 55000.0 | \n",
192 | " No | \n",
193 | "
\n",
194 | " \n",
195 | " | 1 | \n",
196 | " Mumbai | \n",
197 | " 2.0 | \n",
198 | " 20000.0 | \n",
199 | " Yes | \n",
200 | "
\n",
201 | " \n",
202 | " | 2 | \n",
203 | " Agra | \n",
204 | " 3.0 | \n",
205 | " 30000.0 | \n",
206 | " No | \n",
207 | "
\n",
208 | " \n",
209 | " | 3 | \n",
210 | " Mumbai | \n",
211 | " 8.0 | \n",
212 | " 72000.0 | \n",
213 | " No | \n",
214 | "
\n",
215 | " \n",
216 | " | 5 | \n",
217 | " Delhi | \n",
218 | " 5.0 | \n",
219 | " 60000.0 | \n",
220 | " Yes | \n",
221 | "
\n",
222 | " \n",
223 | "
\n",
224 | "
"
225 | ],
226 | "text/plain": [
227 | " City Experience Salary Promotion\n",
228 | "0 Delhi 4.0 55000.0 No\n",
229 | "1 Mumbai 2.0 20000.0 Yes\n",
230 | "2 Agra 3.0 30000.0 No\n",
231 | "3 Mumbai 8.0 72000.0 No\n",
232 | "5 Delhi 5.0 60000.0 Yes"
233 | ]
234 | },
235 | "execution_count": 4,
236 | "metadata": {},
237 | "output_type": "execute_result"
238 | }
239 | ],
240 | "source": [
241 | "# method 1\n",
242 | "data1 = data.dropna(how='any',axis=0) \n",
243 | "data1.head()"
244 | ]
245 | },
246 | {
247 | "cell_type": "code",
248 | "execution_count": 5,
249 | "metadata": {},
250 | "outputs": [
251 | {
252 | "name": "stdout",
253 | "output_type": "stream",
254 | "text": [
255 | "x_most_frequent = [[55000.]\n",
256 | " [20000.]\n",
257 | " [30000.]\n",
258 | " [72000.]\n",
259 | " [52000.]\n",
260 | " [60000.]\n",
261 | " [52000.]\n",
262 | " [51000.]\n",
263 | " [59000.]\n",
264 | " [31000.]\n",
265 | " [58000.]\n",
266 | " [52000.]\n",
267 | " [79000.]\n",
268 | " [60000.]\n",
269 | " [67000.]]\n",
270 | "x_mean = [[55000. ]\n",
271 | " [20000. ]\n",
272 | " [30000. ]\n",
273 | " [72000. ]\n",
274 | " [53285.71428571]\n",
275 | " [60000. ]\n",
276 | " [52000. ]\n",
277 | " [51000. ]\n",
278 | " [59000. ]\n",
279 | " [31000. ]\n",
280 | " [58000. ]\n",
281 | " [52000. ]\n",
282 | " [79000. ]\n",
283 | " [60000. ]\n",
284 | " [67000. ]]\n",
285 | "x_median = [[55000.]\n",
286 | " [20000.]\n",
287 | " [30000.]\n",
288 | " [72000.]\n",
289 | " [56500.]\n",
290 | " [60000.]\n",
291 | " [52000.]\n",
292 | " [51000.]\n",
293 | " [59000.]\n",
294 | " [31000.]\n",
295 | " [58000.]\n",
296 | " [52000.]\n",
297 | " [79000.]\n",
298 | " [60000.]\n",
299 | " [67000.]]\n"
300 | ]
301 | }
302 | ],
303 | "source": [
304 | "# method 2\n",
305 | "from sklearn.impute import SimpleImputer as Imputer\n",
306 | "x = data['Salary'].values.reshape(-1,1)\n",
307 | "\n",
308 | "x_most_frequent = Imputer(missing_values=np.nan, \n",
309 | " strategy = 'most_frequent').fit_transform(x)\n",
310 | "print(\"x_most_frequent = \",x_most_frequent)\n",
311 | "\n",
312 | "x_mean = Imputer(missing_values=np.nan, \n",
313 | " strategy = 'mean').fit_transform(x)\n",
314 | "print(\"x_mean = \",x_mean)\n",
315 | "\n",
316 | "x_median = Imputer(missing_values=np.nan, \n",
317 | " strategy = 'median').fit_transform(x)\n",
318 | "print(\"x_median = \",x_median)"
319 | ]
320 | },
321 | {
322 | "cell_type": "markdown",
323 | "metadata": {},
324 | "source": [
325 | "Continuing the preprocessing, do keep in mind ML require mathematics, so we cannot have words \n",
326 | "To solve this we need to convert them to numbers. \n",
327 | "We can do it by giving them numbers like\n",
328 | "* Agra 0\n",
329 | "* Delhi 1\n",
330 | "* Mumbai 2"
331 | ]
332 | },
333 | {
334 | "cell_type": "code",
335 | "execution_count": 6,
336 | "metadata": {},
337 | "outputs": [],
338 | "source": [
339 | "#converting data frame to values\n",
340 | "X = data1.iloc[:, :-1].values\n",
341 | "y = data1.iloc[:, 3].values"
342 | ]
343 | },
344 | {
345 | "cell_type": "code",
346 | "execution_count": 7,
347 | "metadata": {},
348 | "outputs": [
349 | {
350 | "data": {
351 | "text/plain": [
352 | "array([['Delhi', 4.0, 55000.0],\n",
353 | " ['Mumbai', 2.0, 20000.0],\n",
354 | " ['Agra', 3.0, 30000.0],\n",
355 | " ['Mumbai', 8.0, 72000.0],\n",
356 | " ['Delhi', 5.0, 60000.0],\n",
357 | " ['Delhi', 4.0, 51000.0],\n",
358 | " ['Agra', 5.0, 59000.0],\n",
359 | " ['Delhi', 3.0, 31000.0],\n",
360 | " ['Delhi', 8.0, 79000.0],\n",
361 | " ['Agra', 5.0, 60000.0],\n",
362 | " ['Delhi', 7.0, 67000.0]], dtype=object)"
363 | ]
364 | },
365 | "execution_count": 7,
366 | "metadata": {},
367 | "output_type": "execute_result"
368 | }
369 | ],
370 | "source": [
371 | "X"
372 | ]
373 | },
374 | {
375 | "cell_type": "code",
376 | "execution_count": 8,
377 | "metadata": {},
378 | "outputs": [
379 | {
380 | "data": {
381 | "text/plain": [
382 | "array(['No', 'Yes', 'No', 'No', 'Yes', 'Yes', 'No', 'Yes', 'Yes', 'No',\n",
383 | " 'Yes'], dtype=object)"
384 | ]
385 | },
386 | "execution_count": 8,
387 | "metadata": {},
388 | "output_type": "execute_result"
389 | }
390 | ],
391 | "source": [
392 | "y"
393 | ]
394 | },
395 | {
396 | "cell_type": "code",
397 | "execution_count": 9,
398 | "metadata": {},
399 | "outputs": [
400 | {
401 | "data": {
402 | "text/plain": [
403 | "array([[1, 4.0, 55000.0],\n",
404 | " [2, 2.0, 20000.0],\n",
405 | " [0, 3.0, 30000.0],\n",
406 | " [2, 8.0, 72000.0],\n",
407 | " [1, 5.0, 60000.0],\n",
408 | " [1, 4.0, 51000.0],\n",
409 | " [0, 5.0, 59000.0],\n",
410 | " [1, 3.0, 31000.0],\n",
411 | " [1, 8.0, 79000.0],\n",
412 | " [0, 5.0, 60000.0],\n",
413 | " [1, 7.0, 67000.0]], dtype=object)"
414 | ]
415 | },
416 | "execution_count": 9,
417 | "metadata": {},
418 | "output_type": "execute_result"
419 | }
420 | ],
421 | "source": [
422 | "from sklearn.preprocessing import LabelEncoder\n",
423 | "labelencoder_X = LabelEncoder()\n",
424 | "\n",
425 | "X[:, 0] = labelencoder_X.fit_transform(X[:, 0])\n",
426 | "X"
427 | ]
428 | },
429 | {
430 | "cell_type": "code",
431 | "execution_count": 10,
432 | "metadata": {},
433 | "outputs": [
434 | {
435 | "data": {
436 | "text/plain": [
437 | "array(['Agra', 'Delhi', 'Mumbai'], dtype=object)"
438 | ]
439 | },
440 | "execution_count": 10,
441 | "metadata": {},
442 | "output_type": "execute_result"
443 | }
444 | ],
445 | "source": [
446 | "labelencoder_X.classes_"
447 | ]
448 | },
449 | {
450 | "cell_type": "code",
451 | "execution_count": 11,
452 | "metadata": {},
453 | "outputs": [],
454 | "source": [
455 | "labelencoder_y = LabelEncoder()\n",
456 | "y = labelencoder_y.fit_transform(y)"
457 | ]
458 | },
459 | {
460 | "cell_type": "code",
461 | "execution_count": 12,
462 | "metadata": {},
463 | "outputs": [
464 | {
465 | "data": {
466 | "text/plain": [
467 | "array([0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1])"
468 | ]
469 | },
470 | "execution_count": 12,
471 | "metadata": {},
472 | "output_type": "execute_result"
473 | }
474 | ],
475 | "source": [
476 | "y"
477 | ]
478 | },
479 | {
480 | "cell_type": "markdown",
481 | "metadata": {},
482 | "source": [
483 | "* no 0\n",
484 | "* yes 1"
485 | ]
486 | },
487 | {
488 | "cell_type": "markdown",
489 | "metadata": {},
490 | "source": [
491 | "Over here we'll point out that in case of cities we shall not give weightage to countries. \n",
492 | "In a sense as the number of cities increase, cities with larger number will be given more priority my the ML formulas. \n",
493 | "\n",
494 | "Due to this Mumbai will get more importance than Agra. \n",
495 | "Think it over or google what will happen if instead of 3 we'll have 100 countries!!\n",
496 | "\n",
497 | "To over come this we'll have to judge a Column and apply one hot encoding."
498 | ]
499 | },
500 | {
501 | "cell_type": "code",
502 | "execution_count": 13,
503 | "metadata": {},
504 | "outputs": [
505 | {
506 | "data": {
507 | "text/plain": [
508 | "array([[0., 1., 0.],\n",
509 | " [0., 0., 1.],\n",
510 | " [1., 0., 0.],\n",
511 | " [0., 0., 1.],\n",
512 | " [0., 1., 0.],\n",
513 | " [0., 1., 0.],\n",
514 | " [1., 0., 0.],\n",
515 | " [0., 1., 0.],\n",
516 | " [0., 1., 0.],\n",
517 | " [1., 0., 0.],\n",
518 | " [0., 1., 0.]])"
519 | ]
520 | },
521 | "execution_count": 13,
522 | "metadata": {},
523 | "output_type": "execute_result"
524 | }
525 | ],
526 | "source": [
527 | "from sklearn.preprocessing import OneHotEncoder\n",
528 | "\n",
529 | "onehotencoder = OneHotEncoder(categories='auto') \n",
530 | "p = onehotencoder.fit_transform(X[:,0:1]).toarray()\n",
531 | "p"
532 | ]
533 | },
534 | {
535 | "cell_type": "code",
536 | "execution_count": 14,
537 | "metadata": {},
538 | "outputs": [
539 | {
540 | "data": {
541 | "text/html": [
542 | "\n",
543 | "\n",
556 | "
\n",
557 | " \n",
558 | " \n",
559 | " | \n",
560 | " Agra | \n",
561 | " Delhi | \n",
562 | " Mumbai | \n",
563 | "
\n",
564 | " \n",
565 | " \n",
566 | " \n",
567 | " | 0 | \n",
568 | " 0 | \n",
569 | " 1 | \n",
570 | " 0 | \n",
571 | "
\n",
572 | " \n",
573 | " | 1 | \n",
574 | " 0 | \n",
575 | " 0 | \n",
576 | " 1 | \n",
577 | "
\n",
578 | " \n",
579 | " | 2 | \n",
580 | " 1 | \n",
581 | " 0 | \n",
582 | " 0 | \n",
583 | "
\n",
584 | " \n",
585 | " | 3 | \n",
586 | " 0 | \n",
587 | " 0 | \n",
588 | " 1 | \n",
589 | "
\n",
590 | " \n",
591 | " | 5 | \n",
592 | " 0 | \n",
593 | " 1 | \n",
594 | " 0 | \n",
595 | "
\n",
596 | " \n",
597 | "
\n",
598 | "
"
599 | ],
600 | "text/plain": [
601 | " Agra Delhi Mumbai\n",
602 | "0 0 1 0\n",
603 | "1 0 0 1\n",
604 | "2 1 0 0\n",
605 | "3 0 0 1\n",
606 | "5 0 1 0"
607 | ]
608 | },
609 | "execution_count": 14,
610 | "metadata": {},
611 | "output_type": "execute_result"
612 | }
613 | ],
614 | "source": [
615 | "# This will seem more senseful to you. \n",
616 | "dff = pd.get_dummies(data1['City'])\n",
617 | "dff.head()"
618 | ]
619 | },
620 | {
621 | "cell_type": "markdown",
622 | "metadata": {},
623 | "source": [
624 | "Although we converted are variables to the above format, still we'll face one issue. It's callled **Dummy Variable Trap**. We'll discuss it next week. "
625 | ]
626 | },
627 | {
628 | "cell_type": "code",
629 | "execution_count": 15,
630 | "metadata": {},
631 | "outputs": [
632 | {
633 | "data": {
634 | "text/html": [
635 | "\n",
636 | "\n",
649 | "
\n",
650 | " \n",
651 | " \n",
652 | " | \n",
653 | " Agra | \n",
654 | " Delhi | \n",
655 | " Mumbai | \n",
656 | " Experience | \n",
657 | " Salary | \n",
658 | "
\n",
659 | " \n",
660 | " \n",
661 | " \n",
662 | " | 0 | \n",
663 | " 0 | \n",
664 | " 1 | \n",
665 | " 0 | \n",
666 | " 4.0 | \n",
667 | " 55000.0 | \n",
668 | "
\n",
669 | " \n",
670 | " | 1 | \n",
671 | " 0 | \n",
672 | " 0 | \n",
673 | " 1 | \n",
674 | " 2.0 | \n",
675 | " 20000.0 | \n",
676 | "
\n",
677 | " \n",
678 | " | 2 | \n",
679 | " 1 | \n",
680 | " 0 | \n",
681 | " 0 | \n",
682 | " 3.0 | \n",
683 | " 30000.0 | \n",
684 | "
\n",
685 | " \n",
686 | " | 3 | \n",
687 | " 0 | \n",
688 | " 0 | \n",
689 | " 1 | \n",
690 | " 8.0 | \n",
691 | " 72000.0 | \n",
692 | "
\n",
693 | " \n",
694 | " | 5 | \n",
695 | " 0 | \n",
696 | " 1 | \n",
697 | " 0 | \n",
698 | " 5.0 | \n",
699 | " 60000.0 | \n",
700 | "
\n",
701 | " \n",
702 | " | 7 | \n",
703 | " 0 | \n",
704 | " 1 | \n",
705 | " 0 | \n",
706 | " 4.0 | \n",
707 | " 51000.0 | \n",
708 | "
\n",
709 | " \n",
710 | " | 8 | \n",
711 | " 1 | \n",
712 | " 0 | \n",
713 | " 0 | \n",
714 | " 5.0 | \n",
715 | " 59000.0 | \n",
716 | "
\n",
717 | " \n",
718 | " | 9 | \n",
719 | " 0 | \n",
720 | " 1 | \n",
721 | " 0 | \n",
722 | " 3.0 | \n",
723 | " 31000.0 | \n",
724 | "
\n",
725 | " \n",
726 | " | 12 | \n",
727 | " 0 | \n",
728 | " 1 | \n",
729 | " 0 | \n",
730 | " 8.0 | \n",
731 | " 79000.0 | \n",
732 | "
\n",
733 | " \n",
734 | " | 13 | \n",
735 | " 1 | \n",
736 | " 0 | \n",
737 | " 0 | \n",
738 | " 5.0 | \n",
739 | " 60000.0 | \n",
740 | "
\n",
741 | " \n",
742 | " | 14 | \n",
743 | " 0 | \n",
744 | " 1 | \n",
745 | " 0 | \n",
746 | " 7.0 | \n",
747 | " 67000.0 | \n",
748 | "
\n",
749 | " \n",
750 | "
\n",
751 | "
"
752 | ],
753 | "text/plain": [
754 | " Agra Delhi Mumbai Experience Salary\n",
755 | "0 0 1 0 4.0 55000.0\n",
756 | "1 0 0 1 2.0 20000.0\n",
757 | "2 1 0 0 3.0 30000.0\n",
758 | "3 0 0 1 8.0 72000.0\n",
759 | "5 0 1 0 5.0 60000.0\n",
760 | "7 0 1 0 4.0 51000.0\n",
761 | "8 1 0 0 5.0 59000.0\n",
762 | "9 0 1 0 3.0 31000.0\n",
763 | "12 0 1 0 8.0 79000.0\n",
764 | "13 1 0 0 5.0 60000.0\n",
765 | "14 0 1 0 7.0 67000.0"
766 | ]
767 | },
768 | "execution_count": 15,
769 | "metadata": {},
770 | "output_type": "execute_result"
771 | }
772 | ],
773 | "source": [
774 | "dff=pd.concat([dff, data1[\"Experience\"],data1[\"Salary\"]], axis=1)\n",
775 | "dff"
776 | ]
777 | },
778 | {
779 | "cell_type": "markdown",
780 | "metadata": {},
781 | "source": [
782 | "# Normalisation\n",
783 | "In the data frame above we should scale down the salary and Experience because with respect to 1 and 0 it is too large and will neglect the relevance of City. There are many methods to achieve this."
784 | ]
785 | },
786 | {
787 | "cell_type": "code",
788 | "execution_count": 16,
789 | "metadata": {},
790 | "outputs": [
791 | {
792 | "name": "stdout",
793 | "output_type": "stream",
794 | "text": [
795 | "[[-0.61237244 0.91287093 -0.47140452 -0.47140452 0.10738071]\n",
796 | " [-0.61237244 -1.09544512 2.12132034 -1.50849447 -1.8612657 ]\n",
797 | " [ 1.63299316 -1.09544512 -0.47140452 -0.98994949 -1.29879529]\n",
798 | " [-0.61237244 -1.09544512 2.12132034 1.60277537 1.0635804 ]\n",
799 | " [-0.61237244 0.91287093 -0.47140452 0.04714045 0.38861591]\n",
800 | " [-0.61237244 0.91287093 -0.47140452 -0.47140452 -0.11760745]\n",
801 | " [ 1.63299316 -1.09544512 -0.47140452 0.04714045 0.33236887]\n",
802 | " [-0.61237244 0.91287093 -0.47140452 -0.98994949 -1.24254825]\n",
803 | " [-0.61237244 0.91287093 -0.47140452 1.60277537 1.45730968]\n",
804 | " [ 1.63299316 -1.09544512 -0.47140452 0.04714045 0.38861591]\n",
805 | " [-0.61237244 0.91287093 -0.47140452 1.0842304 0.7823452 ]]\n"
806 | ]
807 | }
808 | ],
809 | "source": [
810 | "X = dff.iloc[:,:].values\n",
811 | "\n",
812 | "from sklearn.preprocessing import StandardScaler\n",
813 | "sc_X = StandardScaler()\n",
814 | "print(sc_X.fit_transform(X))"
815 | ]
816 | },
817 | {
818 | "cell_type": "code",
819 | "execution_count": 17,
820 | "metadata": {},
821 | "outputs": [
822 | {
823 | "name": "stdout",
824 | "output_type": "stream",
825 | "text": [
826 | "[[0. 1. 0. 0.5 0.69620253]\n",
827 | " [0. 0. 1. 0.25 0.25316456]\n",
828 | " [1. 0. 0. 0.375 0.37974684]\n",
829 | " [0. 0. 1. 1. 0.91139241]\n",
830 | " [0. 1. 0. 0.625 0.75949367]\n",
831 | " [0. 1. 0. 0.5 0.64556962]\n",
832 | " [1. 0. 0. 0.625 0.74683544]\n",
833 | " [0. 1. 0. 0.375 0.39240506]\n",
834 | " [0. 1. 0. 1. 1. ]\n",
835 | " [1. 0. 0. 0.625 0.75949367]\n",
836 | " [0. 1. 0. 0.875 0.84810127]]\n"
837 | ]
838 | }
839 | ],
840 | "source": [
841 | "X = dff.iloc[:,:].values\n",
842 | "\n",
843 | "from sklearn.preprocessing import MaxAbsScaler\n",
844 | "m_X = MaxAbsScaler()\n",
845 | "print(m_X.fit_transform(X))"
846 | ]
847 | },
848 | {
849 | "cell_type": "markdown",
850 | "metadata": {},
851 | "source": [
852 | "## Machine Learning\n",
853 | "- [Application](https://www.geeksforgeeks.org/machine-learning-introduction/)\n",
854 | "- [Types of ML models](https://www.geeksforgeeks.org/ml-types-learning-supervised-learning/)\n",
855 | "- [Difference between Supervised and Unsupervised Learning](https://www.geeksforgeeks.org/difference-between-supervised-and-unsupervised-learning/?ref=rp)\n",
856 | "- [Semi-supervised Learning](https://www.geeksforgeeks.org/ml-semi-supervised-learning/?ref=rp)"
857 | ]
858 | },
859 | {
860 | "cell_type": "markdown",
861 | "metadata": {},
862 | "source": [
863 | "## Other Links to refer: \n",
864 | "- [Scikit-Learn](https://scikit-learn.org/stable/modules/classes.html#module-sklearn.preprocessing)\n",
865 | "- [Geek for Geeks](https://www.geeksforgeeks.org/data-preprocessing-machine-learning-python/)\n",
866 | "- [Medium](https://medium.com/search?q=preprocessing%20in%20machine%20learning)\n",
867 | "- [YouTube](https://www.youtube.com/results?search_query=preprocessing+in+machine+learning)\n",
868 | "- [Analytics Vidhya](https://www.analyticsvidhya.com/blog/2016/07/practical-guide-data-preprocessing-python-scikit-learn/)"
869 | ]
870 | }
871 | ],
872 | "metadata": {
873 | "kernelspec": {
874 | "display_name": "Python 3",
875 | "language": "python",
876 | "name": "python3"
877 | },
878 | "language_info": {
879 | "codemirror_mode": {
880 | "name": "ipython",
881 | "version": 3
882 | },
883 | "file_extension": ".py",
884 | "mimetype": "text/x-python",
885 | "name": "python",
886 | "nbconvert_exporter": "python",
887 | "pygments_lexer": "ipython3",
888 | "version": "3.8.2"
889 | }
890 | },
891 | "nbformat": 4,
892 | "nbformat_minor": 2
893 | }
894 |
--------------------------------------------------------------------------------
/week 3.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Welcome to Week 3\n",
8 | "## Linear Regression"
9 | ]
10 | },
11 | {
12 | "cell_type": "markdown",
13 | "metadata": {},
14 | "source": [
15 | "Simple linear regression is a basic ML model. \n",
16 | "You must be aware about the equation of line **y=mx+c**. \n",
17 | "This function tries to make best fit line for our dataset. \n",
18 | "Dataset is available [here](https://www.kaggle.com/andonians/random-linear-regression) \n",
19 | "We recommend you to go through [this](https://www.youtube.com/watch?v=GhrxgbQnEEU) or [this](https://www.youtube.com/watch?v=E5RjzSK0fvY) video to understand the intuition."
20 | ]
21 | },
22 | {
23 | "cell_type": "code",
24 | "execution_count": 1,
25 | "metadata": {},
26 | "outputs": [],
27 | "source": [
28 | "import pandas as pd\n",
29 | "data=pd.read_csv('data/x-y.csv')\n",
30 | "data = data.dropna(how='any',axis=0)[:100]"
31 | ]
32 | },
33 | {
34 | "cell_type": "code",
35 | "execution_count": 2,
36 | "metadata": {},
37 | "outputs": [
38 | {
39 | "data": {
40 | "text/html": [
41 | "\n",
42 | "\n",
55 | "
\n",
56 | " \n",
57 | " \n",
58 | " | \n",
59 | " x | \n",
60 | " y | \n",
61 | "
\n",
62 | " \n",
63 | " \n",
64 | " \n",
65 | " | 0 | \n",
66 | " 24.0 | \n",
67 | " 21.549452 | \n",
68 | "
\n",
69 | " \n",
70 | " | 1 | \n",
71 | " 50.0 | \n",
72 | " 47.464463 | \n",
73 | "
\n",
74 | " \n",
75 | " | 2 | \n",
76 | " 15.0 | \n",
77 | " 17.218656 | \n",
78 | "
\n",
79 | " \n",
80 | " | 3 | \n",
81 | " 38.0 | \n",
82 | " 36.586398 | \n",
83 | "
\n",
84 | " \n",
85 | " | 4 | \n",
86 | " 87.0 | \n",
87 | " 87.288984 | \n",
88 | "
\n",
89 | " \n",
90 | "
\n",
91 | "
"
92 | ],
93 | "text/plain": [
94 | " x y\n",
95 | "0 24.0 21.549452\n",
96 | "1 50.0 47.464463\n",
97 | "2 15.0 17.218656\n",
98 | "3 38.0 36.586398\n",
99 | "4 87.0 87.288984"
100 | ]
101 | },
102 | "execution_count": 2,
103 | "metadata": {},
104 | "output_type": "execute_result"
105 | }
106 | ],
107 | "source": [
108 | "data.head()"
109 | ]
110 | },
111 | {
112 | "cell_type": "code",
113 | "execution_count": 3,
114 | "metadata": {},
115 | "outputs": [
116 | {
117 | "data": {
118 | "image/png": "\n",
119 | "text/plain": [
120 | ""
121 | ]
122 | },
123 | "metadata": {
124 | "needs_background": "light"
125 | },
126 | "output_type": "display_data"
127 | }
128 | ],
129 | "source": [
130 | "import matplotlib.pyplot as plt\n",
131 | "\n",
132 | "plt.scatter(data[\"x\"],data[\"y\"])\n",
133 | "plt.xlabel(\"X\")\n",
134 | "plt.ylabel(\"Y\")\n",
135 | "plt.show()"
136 | ]
137 | },
138 | {
139 | "cell_type": "markdown",
140 | "metadata": {},
141 | "source": [
142 | "As observed the dataset is linear. "
143 | ]
144 | },
145 | {
146 | "cell_type": "code",
147 | "execution_count": 4,
148 | "metadata": {},
149 | "outputs": [],
150 | "source": [
151 | "x=data['x'].values.reshape(-1,1)\n",
152 | "y=data['y'].values.reshape(-1,1)"
153 | ]
154 | },
155 | {
156 | "cell_type": "markdown",
157 | "metadata": {},
158 | "source": [
159 | "### Spliting the Data\n",
160 | "It is necessary to have a dataset to train and one dataset to test. \n",
161 | "But these two must be of same origin to prevent error. \n",
162 | "For example if predicting presence of function from a webcam, you cannot expect to attain good results while testing for CCTV footage. \n",
163 | "\n",
164 | "So if test data is not given explicitly make your own by spilting the data for small data we can split the ratio of about **train***: 0.8 **test 0.2**\n",
165 | "\n",
166 | "For larger dataset we can split in the ratio of **train**: 0.01 **test 0.99** "
167 | ]
168 | },
169 | {
170 | "cell_type": "code",
171 | "execution_count": 5,
172 | "metadata": {},
173 | "outputs": [],
174 | "source": [
175 | "from sklearn.model_selection import train_test_split\n",
176 | "X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.2)"
177 | ]
178 | },
179 | {
180 | "cell_type": "markdown",
181 | "metadata": {},
182 | "source": [
183 | "## [sklearn.linear_model.LinearRegression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html?highlight=linear%20regression)\n",
184 | "\n",
185 | "This class makes a best fit line for **Y = M*X + C** . \n",
186 | "Let's understand it's working.\n",
187 | "\n",
188 | "* It choses random variable m and c. \n",
189 | "* Calculate y prediction using these random variable. \n",
190 | "* Calculater error (root mean square).\n",
191 | "* Update variables to get least error using stats formula."
192 | ]
193 | },
194 | {
195 | "cell_type": "code",
196 | "execution_count": 6,
197 | "metadata": {},
198 | "outputs": [
199 | {
200 | "data": {
201 | "text/plain": [
202 | "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)"
203 | ]
204 | },
205 | "execution_count": 6,
206 | "metadata": {},
207 | "output_type": "execute_result"
208 | }
209 | ],
210 | "source": [
211 | "from sklearn.linear_model import LinearRegression\n",
212 | "r1=LinearRegression()\n",
213 | "# This is an object which stores value of slope and intercept\n",
214 | "r1.fit(X_train, y_train)"
215 | ]
216 | },
217 | {
218 | "cell_type": "code",
219 | "execution_count": 7,
220 | "metadata": {},
221 | "outputs": [
222 | {
223 | "data": {
224 | "text/plain": [
225 | "array([0.54038651])"
226 | ]
227 | },
228 | "execution_count": 7,
229 | "metadata": {},
230 | "output_type": "execute_result"
231 | }
232 | ],
233 | "source": [
234 | "r1.intercept_"
235 | ]
236 | },
237 | {
238 | "cell_type": "code",
239 | "execution_count": 8,
240 | "metadata": {},
241 | "outputs": [
242 | {
243 | "data": {
244 | "text/plain": [
245 | "array([[0.98590331]])"
246 | ]
247 | },
248 | "execution_count": 8,
249 | "metadata": {},
250 | "output_type": "execute_result"
251 | }
252 | ],
253 | "source": [
254 | "r1.coef_"
255 | ]
256 | },
257 | {
258 | "cell_type": "code",
259 | "execution_count": 9,
260 | "metadata": {},
261 | "outputs": [],
262 | "source": [
263 | "y_pred = r1.predict(X_test)"
264 | ]
265 | },
266 | {
267 | "cell_type": "code",
268 | "execution_count": 10,
269 | "metadata": {},
270 | "outputs": [
271 | {
272 | "data": {
273 | "image/png": "\n",
274 | "text/plain": [
275 | ""
276 | ]
277 | },
278 | "metadata": {
279 | "needs_background": "light"
280 | },
281 | "output_type": "display_data"
282 | }
283 | ],
284 | "source": [
285 | "#visualising\n",
286 | "plt.scatter(x,y,color='red')\n",
287 | "plt.plot(X_train, r1.predict(X_train),color='blue')\n",
288 | "plt.ylabel('salary')\n",
289 | "plt.xlabel('experience')\n",
290 | "plt.show()"
291 | ]
292 | },
293 | {
294 | "cell_type": "code",
295 | "execution_count": 11,
296 | "metadata": {},
297 | "outputs": [
298 | {
299 | "data": {
300 | "text/plain": [
301 | "array([[99.13071774]])"
302 | ]
303 | },
304 | "execution_count": 11,
305 | "metadata": {},
306 | "output_type": "execute_result"
307 | }
308 | ],
309 | "source": [
310 | "import numpy as np\n",
311 | "p=np.asarray([100]).reshape(-1,1)\n",
312 | "r1.predict(p)"
313 | ]
314 | },
315 | {
316 | "cell_type": "markdown",
317 | "metadata": {},
318 | "source": [
319 | "## Multilinear Regression\n",
320 | "\n",
321 | "This model was very peculiar to two axis only. Let's add some more variables. \n",
322 | "Now our equation will be **y = b0 + b1x1 + b2x2 +b3x3 ...** \n",
323 | "Here Sklearn provides us flexibility, as we can again use the same class. Let's try it out. \n",
324 | "The dataset is available [here](https://www.kaggle.com/quantbruce/real-estate-price-prediction)."
325 | ]
326 | },
327 | {
328 | "cell_type": "code",
329 | "execution_count": 12,
330 | "metadata": {},
331 | "outputs": [
332 | {
333 | "data": {
334 | "text/html": [
335 | "\n",
336 | "\n",
349 | "
\n",
350 | " \n",
351 | " \n",
352 | " | \n",
353 | " No | \n",
354 | " X1 transaction date | \n",
355 | " X2 house age | \n",
356 | " X3 distance to the nearest MRT station | \n",
357 | " X4 number of convenience stores | \n",
358 | " X5 latitude | \n",
359 | " X6 longitude | \n",
360 | " Y house price of unit area | \n",
361 | "
\n",
362 | " \n",
363 | " \n",
364 | " \n",
365 | " | 0 | \n",
366 | " 1 | \n",
367 | " 2012.917 | \n",
368 | " 32.0 | \n",
369 | " 84.87882 | \n",
370 | " 10 | \n",
371 | " 24.98298 | \n",
372 | " 121.54024 | \n",
373 | " 37.9 | \n",
374 | "
\n",
375 | " \n",
376 | " | 1 | \n",
377 | " 2 | \n",
378 | " 2012.917 | \n",
379 | " 19.5 | \n",
380 | " 306.59470 | \n",
381 | " 9 | \n",
382 | " 24.98034 | \n",
383 | " 121.53951 | \n",
384 | " 42.2 | \n",
385 | "
\n",
386 | " \n",
387 | " | 2 | \n",
388 | " 3 | \n",
389 | " 2013.583 | \n",
390 | " 13.3 | \n",
391 | " 561.98450 | \n",
392 | " 5 | \n",
393 | " 24.98746 | \n",
394 | " 121.54391 | \n",
395 | " 47.3 | \n",
396 | "
\n",
397 | " \n",
398 | " | 3 | \n",
399 | " 4 | \n",
400 | " 2013.500 | \n",
401 | " 13.3 | \n",
402 | " 561.98450 | \n",
403 | " 5 | \n",
404 | " 24.98746 | \n",
405 | " 121.54391 | \n",
406 | " 54.8 | \n",
407 | "
\n",
408 | " \n",
409 | " | 4 | \n",
410 | " 5 | \n",
411 | " 2012.833 | \n",
412 | " 5.0 | \n",
413 | " 390.56840 | \n",
414 | " 5 | \n",
415 | " 24.97937 | \n",
416 | " 121.54245 | \n",
417 | " 43.1 | \n",
418 | "
\n",
419 | " \n",
420 | "
\n",
421 | "
"
422 | ],
423 | "text/plain": [
424 | " No X1 transaction date X2 house age \\\n",
425 | "0 1 2012.917 32.0 \n",
426 | "1 2 2012.917 19.5 \n",
427 | "2 3 2013.583 13.3 \n",
428 | "3 4 2013.500 13.3 \n",
429 | "4 5 2012.833 5.0 \n",
430 | "\n",
431 | " X3 distance to the nearest MRT station X4 number of convenience stores \\\n",
432 | "0 84.87882 10 \n",
433 | "1 306.59470 9 \n",
434 | "2 561.98450 5 \n",
435 | "3 561.98450 5 \n",
436 | "4 390.56840 5 \n",
437 | "\n",
438 | " X5 latitude X6 longitude Y house price of unit area \n",
439 | "0 24.98298 121.54024 37.9 \n",
440 | "1 24.98034 121.53951 42.2 \n",
441 | "2 24.98746 121.54391 47.3 \n",
442 | "3 24.98746 121.54391 54.8 \n",
443 | "4 24.97937 121.54245 43.1 "
444 | ]
445 | },
446 | "execution_count": 12,
447 | "metadata": {},
448 | "output_type": "execute_result"
449 | }
450 | ],
451 | "source": [
452 | "dataset = pd.read_csv('data/Realestate.csv')\n",
453 | "dataset.head()"
454 | ]
455 | },
456 | {
457 | "cell_type": "code",
458 | "execution_count": 13,
459 | "metadata": {},
460 | "outputs": [],
461 | "source": [
462 | "y = dataset[\"Y house price of unit area\"].values.reshape(-1,1)\n",
463 | "x = dataset.drop([\"No\",\"Y house price of unit area\",\"X1 transaction date\"],axis=1)"
464 | ]
465 | },
466 | {
467 | "cell_type": "code",
468 | "execution_count": 14,
469 | "metadata": {},
470 | "outputs": [],
471 | "source": [
472 | "from sklearn.model_selection import train_test_split\n",
473 | "x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=0.2,random_state=0)"
474 | ]
475 | },
476 | {
477 | "cell_type": "code",
478 | "execution_count": 15,
479 | "metadata": {},
480 | "outputs": [],
481 | "source": [
482 | "\"\"\"\n",
483 | "We need to apply scaling due to the values. Here we've apllied Standard Scaler but you shoul check out more. \n",
484 | "Also the same scalar function must be applied to both training and testing data.\n",
485 | "\"\"\"\n",
486 | "from sklearn.preprocessing import StandardScaler\n",
487 | "sc = StandardScaler()\n",
488 | "x_train = sc.fit_transform(x_train)\n",
489 | "x_test = sc.transform(x_test)"
490 | ]
491 | },
492 | {
493 | "cell_type": "code",
494 | "execution_count": 16,
495 | "metadata": {},
496 | "outputs": [],
497 | "source": [
498 | "from sklearn.linear_model import LinearRegression\n",
499 | "l1=LinearRegression()\n",
500 | "l1.fit(x_train,y_train)\n",
501 | "y_pred=l1.predict(x_test)"
502 | ]
503 | },
504 | {
505 | "cell_type": "code",
506 | "execution_count": 17,
507 | "metadata": {},
508 | "outputs": [
509 | {
510 | "data": {
511 | "text/plain": [
512 | "array([[41.39074736],\n",
513 | " [12.35535341],\n",
514 | " [41.10500538],\n",
515 | " [12.16895621],\n",
516 | " [40.36129482]])"
517 | ]
518 | },
519 | "execution_count": 17,
520 | "metadata": {},
521 | "output_type": "execute_result"
522 | }
523 | ],
524 | "source": [
525 | "y_pred[:5]"
526 | ]
527 | },
528 | {
529 | "cell_type": "code",
530 | "execution_count": 18,
531 | "metadata": {},
532 | "outputs": [
533 | {
534 | "data": {
535 | "text/plain": [
536 | "array([[45.3],\n",
537 | " [14.4],\n",
538 | " [46. ],\n",
539 | " [15.6],\n",
540 | " [50.2]])"
541 | ]
542 | },
543 | "execution_count": 18,
544 | "metadata": {},
545 | "output_type": "execute_result"
546 | }
547 | ],
548 | "source": [
549 | "y_test[:5]"
550 | ]
551 | },
552 | {
553 | "cell_type": "markdown",
554 | "metadata": {},
555 | "source": [
556 | "the values are pretty close. Let's find our error. \n",
557 | "We've multiple mathods for determining error, and you should check [this](https://www.dataquest.io/blog/understanding-regression-error-metrics/) to learn about tyoes of regression errors and when to use them.\n",
558 | "\n",
559 | "We'll be using mean squared error from [sklearn.metrics](https://scikit-learn.org/stable/modules/classes.html#module-sklearn.metrics)"
560 | ]
561 | },
562 | {
563 | "cell_type": "code",
564 | "execution_count": 19,
565 | "metadata": {},
566 | "outputs": [
567 | {
568 | "data": {
569 | "text/plain": [
570 | "62.172235622414036"
571 | ]
572 | },
573 | "execution_count": 19,
574 | "metadata": {},
575 | "output_type": "execute_result"
576 | }
577 | ],
578 | "source": [
579 | "from sklearn.metrics import mean_squared_error \n",
580 | "mean_squared_error(y_test,y_pred)"
581 | ]
582 | },
583 | {
584 | "cell_type": "markdown",
585 | "metadata": {},
586 | "source": [
587 | "## Dummy variable tray \n",
588 | "Last week we learned about One Hot Encoding our data, to give equal weighatge to different classes, but we face a problem here!!\n",
589 | "\n",
590 | "Lets consider **y = b0 + b1x1 + b2x2 +b3x3** \n",
591 | "\n",
592 | "Where x2 and x3 are dummy variables, i.e. if x2 = 1 ,x3 = 0 \n",
593 | "So x3 = 1-x2 \n",
594 | "Applying in our equation \n",
595 | "\n",
596 | "y = b0 + b1x1 + b2x2 +b3(1-x2) \n",
597 | "or\n",
598 | "y = b0 + b3 + b1x1 + x2 (b2-b3)\n",
599 | "\n",
600 | "So variable b3 is being added to constant tot create a new constant and subtracting from b2 affects our coeffecient. Hence, the equation of line changes attributing to an increase in loss. \n",
601 | "\n",
602 | "This is dummy variable trap, and only way to setlle is to remove one variable, while creating it\n",
603 | "\n",
604 | "Check out more [here](https://www.youtube.com/watch?v=qrWx3OjZL3o)"
605 | ]
606 | },
607 | {
608 | "cell_type": "markdown",
609 | "metadata": {},
610 | "source": [
611 | "## Backword elimination\n",
612 | "This is a method of removing columns with small or not effect on our result. \n",
613 | "This makes our model faster and also in some cases Robust. \n",
614 | "Refer [this](https://medium.com/@manjabogicevic/multiple-linear-regression-using-python-b99754591ac0) to learn more. "
615 | ]
616 | },
617 | {
618 | "cell_type": "markdown",
619 | "metadata": {},
620 | "source": [
621 | "# Polynomial regression\n",
622 | "\n",
623 | "Not all variables are linearly dependent on target variables.\n",
624 | "Let's consider a dataset of your position in a company and your Salary.\n",
625 | "A sample dataset is present [here](https://www.kaggle.com/testpython/polynomial-position-salary-data)\n",
626 | "\n",
627 | "Equation of a degree 2 polynomial equation looks like this \n",
628 | "**y = ax^2 + bx + c** \n",
629 | "\n",
630 | "A Degree 3 euation is like this \n",
631 | "**y = ax^3 + bx^2 + cx + d**"
632 | ]
633 | },
634 | {
635 | "cell_type": "code",
636 | "execution_count": 20,
637 | "metadata": {},
638 | "outputs": [
639 | {
640 | "data": {
641 | "text/html": [
642 | "\n",
643 | "\n",
656 | "
\n",
657 | " \n",
658 | " \n",
659 | " | \n",
660 | " Position | \n",
661 | " Level | \n",
662 | " Salary | \n",
663 | "
\n",
664 | " \n",
665 | " \n",
666 | " \n",
667 | " | 0 | \n",
668 | " Business Analyst | \n",
669 | " 1 | \n",
670 | " 45000 | \n",
671 | "
\n",
672 | " \n",
673 | " | 1 | \n",
674 | " Junior Consultant | \n",
675 | " 2 | \n",
676 | " 50000 | \n",
677 | "
\n",
678 | " \n",
679 | " | 2 | \n",
680 | " Senior Consultant | \n",
681 | " 3 | \n",
682 | " 60000 | \n",
683 | "
\n",
684 | " \n",
685 | " | 3 | \n",
686 | " Manager | \n",
687 | " 4 | \n",
688 | " 80000 | \n",
689 | "
\n",
690 | " \n",
691 | " | 4 | \n",
692 | " Country Manager | \n",
693 | " 5 | \n",
694 | " 110000 | \n",
695 | "
\n",
696 | " \n",
697 | "
\n",
698 | "
"
699 | ],
700 | "text/plain": [
701 | " Position Level Salary\n",
702 | "0 Business Analyst 1 45000\n",
703 | "1 Junior Consultant 2 50000\n",
704 | "2 Senior Consultant 3 60000\n",
705 | "3 Manager 4 80000\n",
706 | "4 Country Manager 5 110000"
707 | ]
708 | },
709 | "execution_count": 20,
710 | "metadata": {},
711 | "output_type": "execute_result"
712 | }
713 | ],
714 | "source": [
715 | "data = pd.read_csv('data/Position_Salaries.csv')\n",
716 | "data.head()"
717 | ]
718 | },
719 | {
720 | "cell_type": "code",
721 | "execution_count": 21,
722 | "metadata": {},
723 | "outputs": [],
724 | "source": [
725 | "x=data.iloc[:,1:2].values\n",
726 | "y=data.iloc[:,-1].values"
727 | ]
728 | },
729 | {
730 | "cell_type": "markdown",
731 | "metadata": {},
732 | "source": [
733 | "There's no Polynomail regression method in Scikit Learn, but we can convert our dataset to polynomial features and them implement Linear Regression. \n",
734 | "Check out more at **[Polynomial Features](https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.PolynomialFeatures.html)**"
735 | ]
736 | },
737 | {
738 | "cell_type": "code",
739 | "execution_count": 22,
740 | "metadata": {},
741 | "outputs": [
742 | {
743 | "data": {
744 | "text/plain": [
745 | "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)"
746 | ]
747 | },
748 | "execution_count": 22,
749 | "metadata": {},
750 | "output_type": "execute_result"
751 | }
752 | ],
753 | "source": [
754 | "from sklearn.linear_model import LinearRegression\n",
755 | "from sklearn.preprocessing import PolynomialFeatures\n",
756 | "\n",
757 | "p1= PolynomialFeatures(degree=2) # lets start with degree 2\n",
758 | "x_poly=p1.fit_transform(x)\n",
759 | "l2=LinearRegression()\n",
760 | "l2.fit(x_poly,y)"
761 | ]
762 | },
763 | {
764 | "cell_type": "code",
765 | "execution_count": 23,
766 | "metadata": {},
767 | "outputs": [
768 | {
769 | "data": {
770 | "image/png": "\n",
771 | "text/plain": [
772 | ""
773 | ]
774 | },
775 | "metadata": {
776 | "needs_background": "light"
777 | },
778 | "output_type": "display_data"
779 | }
780 | ],
781 | "source": [
782 | "plt.scatter(x, y, color = 'red')\n",
783 | "plt.plot(x, l2.predict(p1.fit_transform(x)), color = 'blue')\n",
784 | "plt.title('Truth or Bluff (Regression Model)')\n",
785 | "plt.xlabel('Position level')\n",
786 | "plt.ylabel('Salary')\n",
787 | "plt.show()"
788 | ]
789 | },
790 | {
791 | "cell_type": "code",
792 | "execution_count": 24,
793 | "metadata": {},
794 | "outputs": [
795 | {
796 | "data": {
797 | "image/png": "\n",
798 | "text/plain": [
799 | ""
800 | ]
801 | },
802 | "metadata": {
803 | "needs_background": "light"
804 | },
805 | "output_type": "display_data"
806 | }
807 | ],
808 | "source": [
809 | "p1= PolynomialFeatures(degree=5) # lets start with degree 5\n",
810 | "x_poly=p1.fit_transform(x)\n",
811 | "l2=LinearRegression()\n",
812 | "l2.fit(x_poly,y)\n",
813 | "\n",
814 | "plt.scatter(x, y, color = 'red')\n",
815 | "plt.plot(x, l2.predict(p1.fit_transform(x)), color = 'blue')\n",
816 | "plt.title('Truth or Bluff (Regression Model)')\n",
817 | "plt.xlabel('Position level')\n",
818 | "plt.ylabel('Salary')\n",
819 | "plt.show()"
820 | ]
821 | },
822 | {
823 | "cell_type": "markdown",
824 | "metadata": {},
825 | "source": [
826 | "That's all for this week. \n",
827 | "Do refer the link and practice out on Kaggle. \n",
828 | "You can also refer here for more regression models. \n",
829 | "- [Super Data Science](https://www.superdatascience.com/pages/machine-learning)\n",
830 | "- [Scikit-Learn](https://scikit-learn.org/stable/index.html)\n",
831 | "- [YouTube](https://www.youtube.com/watch?v=E5RjzSK0fvY)"
832 | ]
833 | }
834 | ],
835 | "metadata": {
836 | "kernelspec": {
837 | "display_name": "Python 3",
838 | "language": "python",
839 | "name": "python3"
840 | },
841 | "language_info": {
842 | "codemirror_mode": {
843 | "name": "ipython",
844 | "version": 3
845 | },
846 | "file_extension": ".py",
847 | "mimetype": "text/x-python",
848 | "name": "python",
849 | "nbconvert_exporter": "python",
850 | "pygments_lexer": "ipython3",
851 | "version": "3.8.2"
852 | }
853 | },
854 | "nbformat": 4,
855 | "nbformat_minor": 2
856 | }
857 |
--------------------------------------------------------------------------------