├── .gitignore ├── LICENSE ├── Part 1 - Data Preprocessing ├── Data.csv └── preprocessing.py ├── Part 10 - Model Selection & Boosting ├── Section 26 - Model Selection │ ├── Social_Network_Ads.csv │ └── gridsearch_kfold.py └── Section 27 - XGBoost │ ├── Churn_Modelling.csv │ └── xgb.py ├── Part 2 - Regression ├── Section 2 - Simple Linear Regression │ ├── Salary_Data.csv │ └── slr.py ├── Section 3 - Multiple Linear Regression │ ├── 50_Startups.csv │ └── mlr.py ├── Section 4 - Polynomial Regression │ ├── Position_Salaries.csv │ └── plr.py ├── Section 5 - Support Vector Regression (SVR) │ ├── Position_Salaries.csv │ └── svr.py ├── Section 6 - Decision Tree Regression │ ├── Position_Salaries.csv │ └── dtr.py └── Section 7 - Random Forest Regression │ ├── Position_Salaries.csv │ └── rfr.py ├── Part 3 - Classification ├── Section 10 - Support Vector Machine (SVM) │ ├── Social_Network_Ads.csv │ └── svm.py ├── Section 11 - Naive Bayes │ ├── Social_Network_Ads.csv │ └── nb.py ├── Section 12 - Decision Tree Classification │ ├── Social_Network_Ads.csv │ └── dtc.py ├── Section 13 - Random Forest Classification │ ├── Social_Network_Ads.csv │ └── rfc.py ├── Section 8 - Logistic Regression │ ├── Social_Network_Ads.csv │ ├── lr.py │ └── lr_with_gender.py └── Section 9 - K-Nearest Neighbors (K-NN) │ ├── Social_Network_Ads.csv │ └── knn.py ├── Part 4 - Clustering ├── Section 14 - K-Means Clustering │ ├── Mall_Customers.csv │ └── kmeans.py └── Section 15 - Hierarchical Clustering │ ├── Mall_Customers.csv │ └── hc.py ├── Part 5 - Association Rule Learning ├── Section 16 - Apriori │ ├── Market_Basket_Optimisation.csv │ └── apriori.py └── Section 17 - Eclat │ ├── Market_Basket_Optimisation.csv │ └── eclat.py ├── Part 6 - Reinforcement Learning ├── Section 18 - Upper Confidence Bound (UCB) │ ├── Ads_CTR_Optimisation.csv │ └── ucb.py └── Section 19 - Thompson Sampling │ ├── Ads_CTR_Optimisation.csv │ ├── Thompson_Sampling_Slide.png │ └── ts.py ├── Part 7 - Natural Language Processing └── Section 20 - Natural Language Processing │ ├── Restaurant_Reviews.tsv │ ├── nlp_naive-bayes.py │ └── nlp_random-forest.py ├── Part 8 - Deep Learning ├── Section 21 - Artificial Neural Networks (ANN) │ ├── Churn_Modelling.csv │ └── ann.py └── Section 22 - Convolutional Neural Networks (CNN) │ └── cnn.py ├── Part 9 - Dimensionality Reduction ├── Section 23 - Principal Component Analysis (PCA) │ ├── Wine.csv │ └── pca.py ├── Section 24 - Linear Discriminant Analysis (LDA) │ ├── Wine.csv │ └── lda.py └── Section 25 - Kernel PCA │ ├── Social_Network_Ads.csv │ └── kernel_pca.py ├── README.md └── Tools ├── __init__.py ├── accuracy.py └── apyori.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | base-directory-tree/.babelrc 106 | base-directory-tree/.postcssrc 107 | base-directory-tree/LICENSE 108 | base-directory-tree/package.json 109 | base-directory-tree/public/favicon.ico 110 | base-directory-tree/public/index.html 111 | base-directory-tree/README.md 112 | base-directory-tree/src/App.vue 113 | base-directory-tree/src/assets/logo.png 114 | base-directory-tree/src/components/AppArrow.vue 115 | base-directory-tree/src/components/AppDarkmode.vue 116 | base-directory-tree/src/components/AppItem.vue 117 | base-directory-tree/src/components/AppLightmode.vue 118 | base-directory-tree/src/components/AppMoreinfo.vue 119 | base-directory-tree/src/components/AppNav.vue 120 | base-directory-tree/src/components/AppNote.vue 121 | base-directory-tree/src/components/AppToggle.vue 122 | base-directory-tree/src/main.js 123 | base-directory-tree/src/name.js 124 | base-directory-tree/src/store.js 125 | base-directory-tree/src/tree.js 126 | base-directory-tree/yarn.lock 127 | 128 | *.jpg -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 M. Doosti Lakhani 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Part 1 - Data Preprocessing/Data.csv: -------------------------------------------------------------------------------- 1 | Country,Age,Salary,Purchased 2 | France,44,72000,No 3 | Spain,27,48000,Yes 4 | Germany,30,54000,No 5 | Spain,38,61000,No 6 | Germany,40,,Yes 7 | France,35,58000,Yes 8 | Spain,,52000,No 9 | France,48,79000,Yes 10 | Germany,50,83000,No 11 | France,37,67000,Yes -------------------------------------------------------------------------------- /Part 1 - Data Preprocessing/preprocessing.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Mon Jul 16 23:11:13 2018 4 | 5 | @author: Mohammad Doosti Lakhani 6 | """ 7 | 8 | # imporing libraries 9 | import numpy as np 10 | import matplotlib.pyplot as plt 11 | import pandas as pd 12 | 13 | # importing dataset 14 | dataset = pd.read_csv('Data.csv') 15 | x = dataset.iloc[:,:-1].values 16 | y = dataset.iloc[:,3].values 17 | 18 | # resolving missing data 19 | from sklearn.preprocessing import Imputer 20 | imputer = Imputer(missing_values = 'NaN', strategy = 'mean', axis= 0) 21 | imputer = imputer.fit(x[:,1:3]) 22 | x[:,1:3] = imputer.transform(x[:,1:3]) 23 | 24 | # encoding categorial data types to labelEncoder and onehotencoder 25 | from sklearn.preprocessing import LabelEncoder, OneHotEncoder 26 | labelencoder_x = LabelEncoder() 27 | labelencoder_x = labelencoder_x.fit(x[:,0]) 28 | x[:,0] = labelencoder_x.transform(x[:,0]) 29 | 30 | labelencoder_y = LabelEncoder() 31 | labelencoder_y = labelencoder_y.fit(y) 32 | y = labelencoder_y.transform(y) 33 | 34 | onehotencoder_x = OneHotEncoder(categorical_features=[0]) 35 | onehotencoder_x = onehotencoder_x.fit(x) 36 | x = onehotencoder_x.transform(x).toarray() 37 | 38 | # splitting dataset into Train set and Test set 39 | from sklearn.model_selection import train_test_split 40 | x_train,x_test,y_train,y_test = train_test_split(x,y, train_size = 0.8 , random_state=0) 41 | 42 | # feature scaling 43 | from sklearn.preprocessing import StandardScaler 44 | standardscaler_x = StandardScaler() 45 | standardscaler_x = standardscaler_x.fit(x_train) 46 | x_train = standardscaler_x.transform(x_train) 47 | x_test = standardscaler_x.transform(x_test) 48 | 49 | -------------------------------------------------------------------------------- /Part 10 - Model Selection & Boosting/Section 26 - Model Selection/Social_Network_Ads.csv: -------------------------------------------------------------------------------- 1 | User ID,Gender,Age,EstimatedSalary,Purchased 2 | 15624510,Male,19,19000,0 3 | 15810944,Male,35,20000,0 4 | 15668575,Female,26,43000,0 5 | 15603246,Female,27,57000,0 6 | 15804002,Male,19,76000,0 7 | 15728773,Male,27,58000,0 8 | 15598044,Female,27,84000,0 9 | 15694829,Female,32,150000,1 10 | 15600575,Male,25,33000,0 11 | 15727311,Female,35,65000,0 12 | 15570769,Female,26,80000,0 13 | 15606274,Female,26,52000,0 14 | 15746139,Male,20,86000,0 15 | 15704987,Male,32,18000,0 16 | 15628972,Male,18,82000,0 17 | 15697686,Male,29,80000,0 18 | 15733883,Male,47,25000,1 19 | 15617482,Male,45,26000,1 20 | 15704583,Male,46,28000,1 21 | 15621083,Female,48,29000,1 22 | 15649487,Male,45,22000,1 23 | 15736760,Female,47,49000,1 24 | 15714658,Male,48,41000,1 25 | 15599081,Female,45,22000,1 26 | 15705113,Male,46,23000,1 27 | 15631159,Male,47,20000,1 28 | 15792818,Male,49,28000,1 29 | 15633531,Female,47,30000,1 30 | 15744529,Male,29,43000,0 31 | 15669656,Male,31,18000,0 32 | 15581198,Male,31,74000,0 33 | 15729054,Female,27,137000,1 34 | 15573452,Female,21,16000,0 35 | 15776733,Female,28,44000,0 36 | 15724858,Male,27,90000,0 37 | 15713144,Male,35,27000,0 38 | 15690188,Female,33,28000,0 39 | 15689425,Male,30,49000,0 40 | 15671766,Female,26,72000,0 41 | 15782806,Female,27,31000,0 42 | 15764419,Female,27,17000,0 43 | 15591915,Female,33,51000,0 44 | 15772798,Male,35,108000,0 45 | 15792008,Male,30,15000,0 46 | 15715541,Female,28,84000,0 47 | 15639277,Male,23,20000,0 48 | 15798850,Male,25,79000,0 49 | 15776348,Female,27,54000,0 50 | 15727696,Male,30,135000,1 51 | 15793813,Female,31,89000,0 52 | 15694395,Female,24,32000,0 53 | 15764195,Female,18,44000,0 54 | 15744919,Female,29,83000,0 55 | 15671655,Female,35,23000,0 56 | 15654901,Female,27,58000,0 57 | 15649136,Female,24,55000,0 58 | 15775562,Female,23,48000,0 59 | 15807481,Male,28,79000,0 60 | 15642885,Male,22,18000,0 61 | 15789109,Female,32,117000,0 62 | 15814004,Male,27,20000,0 63 | 15673619,Male,25,87000,0 64 | 15595135,Female,23,66000,0 65 | 15583681,Male,32,120000,1 66 | 15605000,Female,59,83000,0 67 | 15718071,Male,24,58000,0 68 | 15679760,Male,24,19000,0 69 | 15654574,Female,23,82000,0 70 | 15577178,Female,22,63000,0 71 | 15595324,Female,31,68000,0 72 | 15756932,Male,25,80000,0 73 | 15726358,Female,24,27000,0 74 | 15595228,Female,20,23000,0 75 | 15782530,Female,33,113000,0 76 | 15592877,Male,32,18000,0 77 | 15651983,Male,34,112000,1 78 | 15746737,Male,18,52000,0 79 | 15774179,Female,22,27000,0 80 | 15667265,Female,28,87000,0 81 | 15655123,Female,26,17000,0 82 | 15595917,Male,30,80000,0 83 | 15668385,Male,39,42000,0 84 | 15709476,Male,20,49000,0 85 | 15711218,Male,35,88000,0 86 | 15798659,Female,30,62000,0 87 | 15663939,Female,31,118000,1 88 | 15694946,Male,24,55000,0 89 | 15631912,Female,28,85000,0 90 | 15768816,Male,26,81000,0 91 | 15682268,Male,35,50000,0 92 | 15684801,Male,22,81000,0 93 | 15636428,Female,30,116000,0 94 | 15809823,Male,26,15000,0 95 | 15699284,Female,29,28000,0 96 | 15786993,Female,29,83000,0 97 | 15709441,Female,35,44000,0 98 | 15710257,Female,35,25000,0 99 | 15582492,Male,28,123000,1 100 | 15575694,Male,35,73000,0 101 | 15756820,Female,28,37000,0 102 | 15766289,Male,27,88000,0 103 | 15593014,Male,28,59000,0 104 | 15584545,Female,32,86000,0 105 | 15675949,Female,33,149000,1 106 | 15672091,Female,19,21000,0 107 | 15801658,Male,21,72000,0 108 | 15706185,Female,26,35000,0 109 | 15789863,Male,27,89000,0 110 | 15720943,Male,26,86000,0 111 | 15697997,Female,38,80000,0 112 | 15665416,Female,39,71000,0 113 | 15660200,Female,37,71000,0 114 | 15619653,Male,38,61000,0 115 | 15773447,Male,37,55000,0 116 | 15739160,Male,42,80000,0 117 | 15689237,Male,40,57000,0 118 | 15679297,Male,35,75000,0 119 | 15591433,Male,36,52000,0 120 | 15642725,Male,40,59000,0 121 | 15701962,Male,41,59000,0 122 | 15811613,Female,36,75000,0 123 | 15741049,Male,37,72000,0 124 | 15724423,Female,40,75000,0 125 | 15574305,Male,35,53000,0 126 | 15678168,Female,41,51000,0 127 | 15697020,Female,39,61000,0 128 | 15610801,Male,42,65000,0 129 | 15745232,Male,26,32000,0 130 | 15722758,Male,30,17000,0 131 | 15792102,Female,26,84000,0 132 | 15675185,Male,31,58000,0 133 | 15801247,Male,33,31000,0 134 | 15725660,Male,30,87000,0 135 | 15638963,Female,21,68000,0 136 | 15800061,Female,28,55000,0 137 | 15578006,Male,23,63000,0 138 | 15668504,Female,20,82000,0 139 | 15687491,Male,30,107000,1 140 | 15610403,Female,28,59000,0 141 | 15741094,Male,19,25000,0 142 | 15807909,Male,19,85000,0 143 | 15666141,Female,18,68000,0 144 | 15617134,Male,35,59000,0 145 | 15783029,Male,30,89000,0 146 | 15622833,Female,34,25000,0 147 | 15746422,Female,24,89000,0 148 | 15750839,Female,27,96000,1 149 | 15749130,Female,41,30000,0 150 | 15779862,Male,29,61000,0 151 | 15767871,Male,20,74000,0 152 | 15679651,Female,26,15000,0 153 | 15576219,Male,41,45000,0 154 | 15699247,Male,31,76000,0 155 | 15619087,Female,36,50000,0 156 | 15605327,Male,40,47000,0 157 | 15610140,Female,31,15000,0 158 | 15791174,Male,46,59000,0 159 | 15602373,Male,29,75000,0 160 | 15762605,Male,26,30000,0 161 | 15598840,Female,32,135000,1 162 | 15744279,Male,32,100000,1 163 | 15670619,Male,25,90000,0 164 | 15599533,Female,37,33000,0 165 | 15757837,Male,35,38000,0 166 | 15697574,Female,33,69000,0 167 | 15578738,Female,18,86000,0 168 | 15762228,Female,22,55000,0 169 | 15614827,Female,35,71000,0 170 | 15789815,Male,29,148000,1 171 | 15579781,Female,29,47000,0 172 | 15587013,Male,21,88000,0 173 | 15570932,Male,34,115000,0 174 | 15794661,Female,26,118000,0 175 | 15581654,Female,34,43000,0 176 | 15644296,Female,34,72000,0 177 | 15614420,Female,23,28000,0 178 | 15609653,Female,35,47000,0 179 | 15594577,Male,25,22000,0 180 | 15584114,Male,24,23000,0 181 | 15673367,Female,31,34000,0 182 | 15685576,Male,26,16000,0 183 | 15774727,Female,31,71000,0 184 | 15694288,Female,32,117000,1 185 | 15603319,Male,33,43000,0 186 | 15759066,Female,33,60000,0 187 | 15814816,Male,31,66000,0 188 | 15724402,Female,20,82000,0 189 | 15571059,Female,33,41000,0 190 | 15674206,Male,35,72000,0 191 | 15715160,Male,28,32000,0 192 | 15730448,Male,24,84000,0 193 | 15662067,Female,19,26000,0 194 | 15779581,Male,29,43000,0 195 | 15662901,Male,19,70000,0 196 | 15689751,Male,28,89000,0 197 | 15667742,Male,34,43000,0 198 | 15738448,Female,30,79000,0 199 | 15680243,Female,20,36000,0 200 | 15745083,Male,26,80000,0 201 | 15708228,Male,35,22000,0 202 | 15628523,Male,35,39000,0 203 | 15708196,Male,49,74000,0 204 | 15735549,Female,39,134000,1 205 | 15809347,Female,41,71000,0 206 | 15660866,Female,58,101000,1 207 | 15766609,Female,47,47000,0 208 | 15654230,Female,55,130000,1 209 | 15794566,Female,52,114000,0 210 | 15800890,Female,40,142000,1 211 | 15697424,Female,46,22000,0 212 | 15724536,Female,48,96000,1 213 | 15735878,Male,52,150000,1 214 | 15707596,Female,59,42000,0 215 | 15657163,Male,35,58000.0,0 216 | 15622478,Male,47.0,43000,0 217 | 15779529,Female,60,108000,1 218 | 15636023,Male,49,65000,0 219 | 15582066,Male,40,78000,0 220 | 15666675,Female,46,96000,0 221 | 15732987,Male,59,143000,1 222 | 15789432,Female,41,80000,0 223 | 15663161,Male,35,91000,1 224 | 15694879,Male,37,144000,1 225 | 15593715,Male,60,102000,1 226 | 15575002,Female,35,60000,0 227 | 15622171,Male,37,53000,0 228 | 15795224,Female,36,126000,1 229 | 15685346,Male,56,133000,1 230 | 15691808,Female,40,72000,0 231 | 15721007,Female,42,80000,1 232 | 15794253,Female,35,147000,1 233 | 15694453,Male,39,42000,0 234 | 15813113,Male,40,107000,1 235 | 15614187,Male,49,86000,1 236 | 15619407,Female,38,112000,0 237 | 15646227,Male,46,79000,1 238 | 15660541,Male,40,57000,0 239 | 15753874,Female,37,80000,0 240 | 15617877,Female,46,82000,0 241 | 15772073,Female,53,143000,1 242 | 15701537,Male,42,149000,1 243 | 15736228,Male,38,59000,0 244 | 15780572,Female,50,88000,1 245 | 15769596,Female,56,104000,1 246 | 15586996,Female,41,72000,0 247 | 15722061,Female,51,146000,1 248 | 15638003,Female,35,50000,0 249 | 15775590,Female,57,122000,1 250 | 15730688,Male,41,52000,0 251 | 15753102,Female,35,97000,1 252 | 15810075,Female,44,39000,0 253 | 15723373,Male,37,52000,0 254 | 15795298,Female,48,134000,1 255 | 15584320,Female,37,146000,1 256 | 15724161,Female,50,44000,0 257 | 15750056,Female,52,90000,1 258 | 15609637,Female,41,72000,0 259 | 15794493,Male,40,57000,0 260 | 15569641,Female,58,95000,1 261 | 15815236,Female,45,131000,1 262 | 15811177,Female,35,77000,0 263 | 15680587,Male,36,144000,1 264 | 15672821,Female,55,125000,1 265 | 15767681,Female,35,72000,0 266 | 15600379,Male,48,90000,1 267 | 15801336,Female,42,108000,1 268 | 15721592,Male,40,75000,0 269 | 15581282,Male,37,74000,0 270 | 15746203,Female,47,144000,1 271 | 15583137,Male,40,61000,0 272 | 15680752,Female,43,133000,0 273 | 15688172,Female,59,76000,1 274 | 15791373,Male,60,42000,1 275 | 15589449,Male,39,106000,1 276 | 15692819,Female,57,26000,1 277 | 15727467,Male,57,74000,1 278 | 15734312,Male,38,71000,0 279 | 15764604,Male,49,88000,1 280 | 15613014,Female,52,38000,1 281 | 15759684,Female,50,36000,1 282 | 15609669,Female,59,88000,1 283 | 15685536,Male,35,61000,0 284 | 15750447,Male,37,70000,1 285 | 15663249,Female,52,21000,1 286 | 15638646,Male,48,141000,0 287 | 15734161,Female,37,93000,1 288 | 15631070,Female,37,62000,0 289 | 15761950,Female,48,138000,1 290 | 15649668,Male,41,79000,0 291 | 15713912,Female,37,78000,1 292 | 15586757,Male,39,134000,1 293 | 15596522,Male,49,89000,1 294 | 15625395,Male,55,39000,1 295 | 15760570,Male,37,77000,0 296 | 15566689,Female,35,57000,0 297 | 15725794,Female,36,63000,0 298 | 15673539,Male,42,73000,1 299 | 15705298,Female,43,112000,1 300 | 15675791,Male,45,79000,0 301 | 15747043,Male,46,117000,1 302 | 15736397,Female,58,38000,1 303 | 15678201,Male,48,74000,1 304 | 15720745,Female,37,137000,1 305 | 15637593,Male,37,79000,1 306 | 15598070,Female,40,60000,0 307 | 15787550,Male,42,54000,0 308 | 15603942,Female,51,134000,0 309 | 15733973,Female,47,113000,1 310 | 15596761,Male,36,125000,1 311 | 15652400,Female,38,50000,0 312 | 15717893,Female,42,70000,0 313 | 15622585,Male,39,96000,1 314 | 15733964,Female,38,50000,0 315 | 15753861,Female,49,141000,1 316 | 15747097,Female,39,79000,0 317 | 15594762,Female,39,75000,1 318 | 15667417,Female,54,104000,1 319 | 15684861,Male,35,55000,0 320 | 15742204,Male,45,32000,1 321 | 15623502,Male,36,60000,0 322 | 15774872,Female,52,138000,1 323 | 15611191,Female,53,82000,1 324 | 15674331,Male,41,52000,0 325 | 15619465,Female,48,30000,1 326 | 15575247,Female,48,131000,1 327 | 15695679,Female,41,60000,0 328 | 15713463,Male,41,72000,0 329 | 15785170,Female,42,75000,0 330 | 15796351,Male,36,118000,1 331 | 15639576,Female,47,107000,1 332 | 15693264,Male,38,51000,0 333 | 15589715,Female,48,119000,1 334 | 15769902,Male,42,65000,0 335 | 15587177,Male,40,65000,0 336 | 15814553,Male,57,60000,1 337 | 15601550,Female,36,54000,0 338 | 15664907,Male,58,144000,1 339 | 15612465,Male,35,79000,0 340 | 15810800,Female,38,55000,0 341 | 15665760,Male,39,122000,1 342 | 15588080,Female,53,104000,1 343 | 15776844,Male,35,75000,0 344 | 15717560,Female,38,65000,0 345 | 15629739,Female,47,51000,1 346 | 15729908,Male,47,105000,1 347 | 15716781,Female,41,63000,0 348 | 15646936,Male,53,72000,1 349 | 15768151,Female,54,108000,1 350 | 15579212,Male,39,77000,0 351 | 15721835,Male,38,61000,0 352 | 15800515,Female,38,113000,1 353 | 15591279,Male,37,75000,0 354 | 15587419,Female,42,90000,1 355 | 15750335,Female,37,57000,0 356 | 15699619,Male,36,99000,1 357 | 15606472,Male,60,34000,1 358 | 15778368,Male,54,70000,1 359 | 15671387,Female,41,72000,0 360 | 15573926,Male,40,71000,1 361 | 15709183,Male,42,54000,0 362 | 15577514,Male,43,129000,1 363 | 15778830,Female,53,34000,1 364 | 15768072,Female,47,50000,1 365 | 15768293,Female,42,79000,0 366 | 15654456,Male,42,104000,1 367 | 15807525,Female,59,29000,1 368 | 15574372,Female,58,47000,1 369 | 15671249,Male,46,88000,1 370 | 15779744,Male,38,71000,0 371 | 15624755,Female,54,26000,1 372 | 15611430,Female,60,46000,1 373 | 15774744,Male,60,83000,1 374 | 15629885,Female,39,73000,0 375 | 15708791,Male,59,130000,1 376 | 15793890,Female,37,80000,0 377 | 15646091,Female,46,32000,1 378 | 15596984,Female,46,74000,0 379 | 15800215,Female,42,53000,0 380 | 15577806,Male,41,87000,1 381 | 15749381,Female,58,23000,1 382 | 15683758,Male,42,64000,0 383 | 15670615,Male,48,33000,1 384 | 15715622,Female,44,139000,1 385 | 15707634,Male,49,28000,1 386 | 15806901,Female,57,33000,1 387 | 15775335,Male,56,60000,1 388 | 15724150,Female,49,39000,1 389 | 15627220,Male,39,71000,0 390 | 15672330,Male,47,34000,1 391 | 15668521,Female,48,35000,1 392 | 15807837,Male,48,33000,1 393 | 15592570,Male,47,23000,1 394 | 15748589,Female,45,45000,1 395 | 15635893,Male,60,42000,1 396 | 15757632,Female,39,59000,0 397 | 15691863,Female,46,41000,1 398 | 15706071,Male,51,23000,1 399 | 15654296,Female,50,20000,1 400 | 15755018,Male,36,33000,0 401 | 15594041,Female,49,36000,1 -------------------------------------------------------------------------------- /Part 10 - Model Selection & Boosting/Section 26 - Model Selection/gridsearch_kfold.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Fri Sep 28 15:47:19 2018 4 | 5 | @author: Mohammad Doosti Lakhani 6 | """ 7 | 8 | # Imporing libraries 9 | import numpy as np 10 | import matplotlib.pyplot as plt 11 | import pandas as pd 12 | 13 | # Importing dataset 14 | dataset = pd.read_csv('Social_Network_Ads.csv') 15 | x = dataset.iloc[:,2:-1].values 16 | y = dataset.iloc[:,-1].values 17 | 18 | # Splitting dataset into Train set and Test set 19 | from sklearn.model_selection import train_test_split 20 | x_train,x_test,y_train,y_test = train_test_split(x,y, train_size = 0.75 , random_state=0) 21 | 22 | # Fitting the Naive Bayes (Gausiian form) model to the train set 23 | from sklearn.tree import DecisionTreeClassifier 24 | classifier = DecisionTreeClassifier(random_state=0, presort=True, criterion='entropy') 25 | classifier = classifier.fit(x_train,y_train) 26 | 27 | # Make the prediction on train set 28 | y_train_pred = classifier.predict(x_train) 29 | 30 | # Make the prediction on train set 31 | y_test_pred = classifier.predict(x_test) 32 | 33 | # Acurracy on test and train set using K-Fold Cross Validation 34 | from sklearn.model_selection import cross_val_score 35 | accs_train = cross_val_score(estimator = classifier, X = x_train, y = y_train, cv=10) 36 | accs_test = cross_val_score(estimator = classifier, X = x_test, y = y_test, cv=10) 37 | 38 | acc_train = accs_train.mean() 39 | acc_test = accs_test.mean() 40 | print('Train status = %{} Accuracy'.format(acc_train*100)) 41 | print('Test status = %{} Accuracy'.format(acc_test*100)) 42 | 43 | 44 | # Applying GridSearch to find best parameters 45 | from sklearn.model_selection import GridSearchCV 46 | parameters = [{ 'criterion' : ['gini'], 'splitter':['best','random'], 'min_samples_split':[0.1,0.2,0.3,0.4,0.5], 47 | 'min_samples_leaf': [1,2,3,4,5]}, 48 | {'criterion' : ['entropy'], 'splitter':['best','random'], 'min_samples_split':[0.1,0.2,0.3,0.4,0.5], 49 | 'min_samples_leaf': [1,2,3,4,5]} ] 50 | gridsearch = GridSearchCV(estimator = classifier, param_grid = parameters,refit= False, scoring='accuracy', cv=10) 51 | gridsearch = gridsearch.fit(x,y) 52 | optimal_accuracy = gridsearch.best_score_ 53 | optimal_parameters = gridsearch.best_params_ 54 | 55 | # Train model using optimal parameters optained by GridSearch 56 | classifier = DecisionTreeClassifier(random_state=0, presort=True, criterion='entropy', splitter='best', 57 | min_samples_leaf=1 , min_samples_split=0.1) 58 | classifier = classifier.fit(x_train,y_train) 59 | 60 | 61 | # Visualising the Training set results 62 | from matplotlib.colors import ListedColormap 63 | X_set, y_set = x_train, y_train 64 | X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 1), 65 | np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 1)) 66 | plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), 67 | alpha = 0.75, cmap = ListedColormap(('red', 'green'))) 68 | plt.xlim(X1.min(), X1.max()) 69 | plt.ylim(X2.min(), X2.max()) 70 | for i, j in enumerate(np.unique(y_set)): 71 | plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], 72 | c = ListedColormap(('red', 'green'))(i), label = j) 73 | plt.title('Decision Tree Classifier - Entropy (Training set)') 74 | plt.xlabel('Age') 75 | plt.ylabel('Estimated Salary') 76 | plt.legend() 77 | plt.show() 78 | 79 | # Visualising the Test set results 80 | from matplotlib.colors import ListedColormap 81 | X_set, y_set = x_test, y_test 82 | X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 1), 83 | np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 1)) 84 | plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), 85 | alpha = 0.75, cmap = ListedColormap(('red', 'green'))) 86 | plt.xlim(X1.min(), X1.max()) 87 | plt.ylim(X2.min(), X2.max()) 88 | for i, j in enumerate(np.unique(y_set)): 89 | plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], 90 | c = ListedColormap(('red', 'green'))(i), label = j) 91 | plt.title('Decision Tree Classifier - Entropy (Test set)') 92 | plt.xlabel('Age') 93 | plt.ylabel('Estimated Salary') 94 | plt.legend() 95 | plt.show() -------------------------------------------------------------------------------- /Part 10 - Model Selection & Boosting/Section 27 - XGBoost/xgb.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Fri Sep 28 18:30:10 2018 4 | 5 | @author: Mohammad Doosti Lakhani 6 | """ 7 | """ 8 | Be sure to do not set your file name "xgboost.py". if you do, you cannot import library!!! 9 | """ 10 | # Importing Library 11 | import pandas as pd 12 | 13 | # Import Dataset 14 | dataset = pd.read_csv('Churn_Modelling.csv') 15 | x = dataset.iloc[:, 3:-1].values 16 | y = dataset.iloc[:, -1].values 17 | 18 | # Encoding categorical data to one hot encoded form 19 | from sklearn.preprocessing import LabelEncoder, OneHotEncoder 20 | labelencoder_country = LabelEncoder() 21 | x[:, 1] = labelencoder_country.fit_transform(x[:, 1]) # Encoding 'Geography' to numbers 22 | 23 | labelencoder_gender = LabelEncoder() 24 | x[:, 2] = labelencoder_gender.fit_transform(x[:, 2]) # Encoding 'Gender' to numbers 25 | 26 | onehotencoder = OneHotEncoder(categorical_features = [1]) 27 | x = onehotencoder.fit_transform(x).todense() # Encoding 'Geography' to one hot 28 | 29 | x = x[:, 1:] # get rid of dummy variable trap 30 | 31 | # Feature Scaling 32 | from sklearn.preprocessing import StandardScaler 33 | sc = StandardScaler() 34 | x = sc.fit_transform(x) 35 | 36 | from sklearn.preprocessing import MinMaxScaler 37 | scalar = MinMaxScaler() 38 | x= scalar.fit_transform(x) 39 | 40 | # Splitting the dataset into the Training set and Test set 41 | from sklearn.model_selection import train_test_split 42 | x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2) 43 | 44 | # Fitting XGboost model 45 | from xgboost import XGBClassifier 46 | param = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic','eval_metric':'auc'} 47 | classifier = XGBClassifier(**param) 48 | classifier = classifier.fit(x_train,y_train) 49 | 50 | # Acurracy on test and train set using K-Fold Cross Validation 51 | from sklearn.model_selection import cross_val_score 52 | accs_train = cross_val_score(estimator = classifier, X = x_train, y = y_train, cv=10) 53 | accs_test = cross_val_score(estimator = classifier, X = x_test, y = y_test, cv=10) 54 | 55 | acc_train = accs_train.mean() 56 | acc_test = accs_test.mean() 57 | print('Train status = %{} Accuracy'.format(acc_train*100)) 58 | print('Test status = %{} Accuracy'.format(acc_test*100)) 59 | -------------------------------------------------------------------------------- /Part 2 - Regression/Section 2 - Simple Linear Regression/Salary_Data.csv: -------------------------------------------------------------------------------- 1 | YearsExperience,Salary 2 | 1.1,39343.00 3 | 1.3,46205.00 4 | 1.5,37731.00 5 | 2.0,43525.00 6 | 2.2,39891.00 7 | 2.9,56642.00 8 | 3.0,60150.00 9 | 3.2,54445.00 10 | 3.2,64445.00 11 | 3.7,57189.00 12 | 3.9,63218.00 13 | 4.0,55794.00 14 | 4.0,56957.00 15 | 4.1,57081.00 16 | 4.5,61111.00 17 | 4.9,67938.00 18 | 5.1,66029.00 19 | 5.3,83088.00 20 | 5.9,81363.00 21 | 6.0,93940.00 22 | 6.8,91738.00 23 | 7.1,98273.00 24 | 7.9,101302.00 25 | 8.2,113812.00 26 | 8.7,109431.00 27 | 9.0,105582.00 28 | 9.5,116969.00 29 | 9.6,112635.00 30 | 10.3,122391.00 31 | 10.5,121872.00 32 | -------------------------------------------------------------------------------- /Part 2 - Regression/Section 2 - Simple Linear Regression/slr.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Tue Jul 17 22:37:49 2018 4 | 5 | @author: Mohammad Doosti Lakhani 6 | """ 7 | 8 | # importing libraries 9 | import numpy as np 10 | import matplotlib.pyplot as plt 11 | import pandas as pd 12 | 13 | # imporing dataset 14 | dataset = pd.read_csv('Salary_Data.csv') 15 | x = dataset.iloc[:,0].values 16 | y = dataset.iloc[:,1].values 17 | 18 | # our dataset do not have any missing data or do not need to be scaled. 19 | # splitting dataset into Train set and Test set 20 | from sklearn.model_selection import train_test_split 21 | x_train,x_test,y_train,y_test = train_test_split(x,y, train_size = 2/3 , random_state=0) 22 | 23 | 24 | # reshaping test and trains sets becasue they are single feature 25 | x_train = x_train.reshape(-1,1) 26 | y_train = y_train.reshape(-1,1) 27 | x_test = x_test.reshape(-1,1) 28 | y_test = y_test.reshape(-1,1) 29 | 30 | # fitting Linear Regression to the train set 31 | from sklearn.linear_model import LinearRegression 32 | regressor = LinearRegression() 33 | regressor = regressor.fit(x_train,y_train) 34 | 35 | # predicting test set result 36 | y_predict = regressor.predict(x_test) 37 | score = regressor.score(x_test,y_test) # we got 0.97 accuracy !!! 38 | 39 | # visualize the fitted model and our data 40 | plt.scatter(x_train,y_train, color ='red', label='Train Values', alpha=0.6) 41 | plt.scatter(x_test,y_test,color='blue', label ='Test Values', alpha=0.6) 42 | plt.plot(x_train,regressor.predict(x_train),color='green') 43 | plt.title('Salary vs Exprience (train data)') 44 | plt.xlabel('Exprience') 45 | plt.ylabel('Salary') 46 | plt.legend() 47 | plt.grid() 48 | plt.show() 49 | -------------------------------------------------------------------------------- /Part 2 - Regression/Section 3 - Multiple Linear Regression/50_Startups.csv: -------------------------------------------------------------------------------- 1 | R&D Spend,Administration,Marketing Spend,State,Profit 2 | 165349.2,136897.8,471784.1,New York,192261.83 3 | 162597.7,151377.59,443898.53,California,191792.06 4 | 153441.51,101145.55,407934.54,Florida,191050.39 5 | 144372.41,118671.85,383199.62,New York,182901.99 6 | 142107.34,91391.77,366168.42,Florida,166187.94 7 | 131876.9,99814.71,362861.36,New York,156991.12 8 | 134615.46,147198.87,127716.82,California,156122.51 9 | 130298.13,145530.06,323876.68,Florida,155752.6 10 | 120542.52,148718.95,311613.29,New York,152211.77 11 | 123334.88,108679.17,304981.62,California,149759.96 12 | 101913.08,110594.11,229160.95,Florida,146121.95 13 | 100671.96,91790.61,249744.55,California,144259.4 14 | 93863.75,127320.38,249839.44,Florida,141585.52 15 | 91992.39,135495.07,252664.93,California,134307.35 16 | 119943.24,156547.42,256512.92,Florida,132602.65 17 | 114523.61,122616.84,261776.23,New York,129917.04 18 | 78013.11,121597.55,264346.06,California,126992.93 19 | 94657.16,145077.58,282574.31,New York,125370.37 20 | 91749.16,114175.79,294919.57,Florida,124266.9 21 | 86419.7,153514.11,0,New York,122776.86 22 | 76253.86,113867.3,298664.47,California,118474.03 23 | 78389.47,153773.43,299737.29,New York,111313.02 24 | 73994.56,122782.75,303319.26,Florida,110352.25 25 | 67532.53,105751.03,304768.73,Florida,108733.99 26 | 77044.01,99281.34,140574.81,New York,108552.04 27 | 64664.71,139553.16,137962.62,California,107404.34 28 | 75328.87,144135.98,134050.07,Florida,105733.54 29 | 72107.6,127864.55,353183.81,New York,105008.31 30 | 66051.52,182645.56,118148.2,Florida,103282.38 31 | 65605.48,153032.06,107138.38,New York,101004.64 32 | 61994.48,115641.28,91131.24,Florida,99937.59 33 | 61136.38,152701.92,88218.23,New York,97483.56 34 | 63408.86,129219.61,46085.25,California,97427.84 35 | 55493.95,103057.49,214634.81,Florida,96778.92 36 | 46426.07,157693.92,210797.67,California,96712.8 37 | 46014.02,85047.44,205517.64,New York,96479.51 38 | 28663.76,127056.21,201126.82,Florida,90708.19 39 | 44069.95,51283.14,197029.42,California,89949.14 40 | 20229.59,65947.93,185265.1,New York,81229.06 41 | 38558.51,82982.09,174999.3,California,81005.76 42 | 28754.33,118546.05,172795.67,California,78239.91 43 | 27892.92,84710.77,164470.71,Florida,77798.83 44 | 23640.93,96189.63,148001.11,California,71498.49 45 | 15505.73,127382.3,35534.17,New York,69758.98 46 | 22177.74,154806.14,28334.72,California,65200.33 47 | 1000.23,124153.04,1903.93,New York,64926.08 48 | 1315.46,115816.21,297114.46,Florida,49490.75 49 | 0,135426.92,0,California,42559.73 50 | 542.05,51743.15,0,New York,35673.41 51 | 0,116983.8,45173.06,California,14681.4 -------------------------------------------------------------------------------- /Part 2 - Regression/Section 3 - Multiple Linear Regression/mlr.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Thu Jul 19 00:10:53 2018 4 | 5 | @author: Mohammad Doosti Lakhani 6 | """ 7 | 8 | # imporing libraries 9 | import numpy as np 10 | import matplotlib.pyplot as plt 11 | import pandas as pd 12 | 13 | # imporing dataset 14 | dataset = pd.read_csv('50_Startups.csv') 15 | x = dataset.iloc[:,:-1].values 16 | y = dataset.iloc[:,4].values 17 | 18 | # encoding categorial data types to labelEncoder and onehotencoder 19 | from sklearn.preprocessing import LabelEncoder, OneHotEncoder 20 | labelencoder_x = LabelEncoder() 21 | labelencoder_x = labelencoder_x.fit(x[:,3]) 22 | x[:,3] = labelencoder_x.transform(x[:,3]) 23 | 24 | onehotencoder_x = OneHotEncoder(categorical_features=[3]) 25 | onehotencoder_x = onehotencoder_x.fit(x) 26 | x = onehotencoder_x.transform(x).toarray() 27 | 28 | # getting rid of Dummy Variables Trap 29 | x = x [:,1:] 30 | 31 | # splitting dataset into Train set and Test set 32 | from sklearn.model_selection import train_test_split 33 | x_train,x_test,y_train,y_test = train_test_split(x,y, train_size = 0.8 , random_state=0) 34 | 35 | # optimal model using Backward Elimination approach 36 | import statsmodels.formula.api as sm 37 | x_train = np.append(arr = np.ones(shape= (40,1)) , values = x_train ,axis = 1) # adding Constant value 38 | x_test = np.append(arr = np.ones(shape= (10,1)) , values = x_test ,axis = 1) 39 | x_optimal = x_train 40 | regressor_OLS = sm.OLS(endog = y_train,exog = x_optimal).fit() 41 | print(regressor_OLS.summary()) 42 | 43 | # backward main loop 44 | while np.argmax(regressor_OLS.pvalues[:],axis = 0) > 0.05: 45 | indv_index = np.argmax(regressor_OLS.pvalues[:],axis = 0) # the index of independent variable with max pvalue 46 | x_optimal = np.delete(x_optimal,indv_index,axis =1) 47 | x_test = np.delete(x_test,indv_index,axis =1) 48 | regressor_OLS = sm.OLS(endog = y_train,exog = x_optimal).fit() 49 | print(regressor_OLS.summary()) 50 | 51 | # prediciting using optimal model 52 | y_predict = regressor_OLS.predict(x_test) 53 | 54 | # visualize the fitted model and our data (3D) -------------------------------------------------------------------------------- /Part 2 - Regression/Section 4 - Polynomial Regression/Position_Salaries.csv: -------------------------------------------------------------------------------- 1 | Position,Level,Salary 2 | Business Analyst,1,45000 3 | Junior Consultant,2,50000 4 | Senior Consultant,3,60000 5 | Manager,4,80000 6 | Country Manager,5,110000 7 | Region Manager,6,150000 8 | Partner,7,200000 9 | Senior Partner,8,300000 10 | C-level,9,500000 11 | CEO,10,1000000 -------------------------------------------------------------------------------- /Part 2 - Regression/Section 4 - Polynomial Regression/plr.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sun Jul 22 00:31:44 2018 4 | 5 | @author: Mohammad Doosti Lakhani 6 | """ 7 | # imporing libraries 8 | import numpy as np 9 | import matplotlib.pyplot as plt 10 | import pandas as pd 11 | 12 | # imporing dataset 13 | dataset = pd.read_csv('Position_Salaries.csv') 14 | x = dataset.iloc[:,1].values 15 | y = dataset.iloc[:,2].values 16 | 17 | # Becasue of the data in dataset which is prepared to be polynominal, we do not have any splits. 18 | 19 | # Fitting Polynomial Regression to the dataset 20 | from sklearn.preprocessing import PolynomialFeatures 21 | from sklearn.linear_model import LinearRegression 22 | poly_lin_regressor = PolynomialFeatures(degree = 4) 23 | poly_lin_regressor = poly_lin_regressor.fit(x.reshape(-1,1)) 24 | x = poly_lin_regressor.transform(x.reshape(-1,1)) # prepared x for train 25 | 26 | # poly_lin_regressor = poly_lin_regressor.fit(x,y) 27 | lin_regressor = LinearRegression() 28 | lin_regressor = lin_regressor.fit(x,y) 29 | 30 | # predicting Y values using LinearRegression with input by PolynomialFeatures 31 | y_prediction = lin_regressor.predict(x) 32 | 33 | # visualize the fitted model and our data 34 | plt.scatter(x[:,1],y, color ='red', alpha=0.6) 35 | plt.plot(x[:,1],y_prediction,color='green') 36 | plt.title('Level vs Salary (train data)') 37 | plt.xlabel('Level') 38 | plt.ylabel('Salary') 39 | plt.legend() 40 | plt.grid() 41 | plt.show() 42 | -------------------------------------------------------------------------------- /Part 2 - Regression/Section 5 - Support Vector Regression (SVR)/Position_Salaries.csv: -------------------------------------------------------------------------------- 1 | Position,Level,Salary 2 | Business Analyst,1,45000 3 | Junior Consultant,2,50000 4 | Senior Consultant,3,60000 5 | Manager,4,80000 6 | Country Manager,5,110000 7 | Region Manager,6,150000 8 | Partner,7,200000 9 | Senior Partner,8,300000 10 | C-level,9,500000 11 | CEO,10,1000000 -------------------------------------------------------------------------------- /Part 2 - Regression/Section 5 - Support Vector Regression (SVR)/svr.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Mon Aug 13 01:48:25 2018 4 | 5 | @author: Mohammad Doosti Lakhani 6 | """ 7 | 8 | # imporing libraries 9 | import numpy as np 10 | import matplotlib.pyplot as plt 11 | import pandas as pd 12 | 13 | # importing dataset 14 | dataset = pd.read_csv('Position_Salaries.csv') 15 | x = dataset.iloc[:,1].values 16 | y = dataset.iloc[:,-1].values 17 | 18 | # reshape x and y because they just have one feature 19 | x = x.reshape(-1,1) 20 | y = y.reshape(-1,1) 21 | 22 | # feature scaling 23 | from sklearn.preprocessing import StandardScaler 24 | standardscaler_x = StandardScaler() 25 | x = standardscaler_x.fit_transform(x) 26 | standardscaler_y = StandardScaler() 27 | y = standardscaler_y.fit_transform(y) 28 | 29 | # reshape to 1d array, necessary for model. 30 | y = y.reshape(len(y),) 31 | 32 | # fittign SVR model to the data using POLY kernel 33 | from sklearn.svm import SVR 34 | regressor = SVR(kernel='poly') 35 | regressor = regressor.fit(x,y) 36 | 37 | # scaling test data for prediction 38 | test = np.zeros(1) # we are testing just one value 39 | test[0]= 6.5 40 | test = test.reshape(1,1) # reshape to 2D array! 41 | test = standardscaler_x.transform(test) # rescaling test data like train data 42 | 43 | # making prediction 44 | y_pred = regressor.predict(test) 45 | 46 | # inver scalingy y to real value 47 | y_predict = standardscaler_y.inverse_transform(y_pred) 48 | 49 | #visualize the fitted model and our data 50 | plt.scatter(x,y, color ='red', alpha=0.6) 51 | plt.scatter(test,y_pred,color = 'blue', marker='D') 52 | plt.plot(x,regressor.predict(x),color='green') 53 | plt.title('Level vs Salary (train data) using poly kernel') 54 | plt.xlabel('Level') 55 | plt.ylabel('Salary') 56 | plt.legend() 57 | plt.grid() 58 | plt.show() 59 | 60 | 61 | ################################# 62 | # fitting data with SVR model with RBF kernel 63 | regressor2 = SVR(kernel='rbf') 64 | regressor2 = regressor2.fit(x,y) 65 | 66 | # making prediction 67 | y_pred = regressor2.predict(test) 68 | 69 | # inver scalingy y to real value 70 | y_predict = standardscaler_y.inverse_transform(y_pred) 71 | 72 | #visualize the fitted model and our data 73 | plt.scatter(x,y, color ='red', alpha=0.6) 74 | plt.scatter(test,y_pred,color = 'blue', marker='D') 75 | plt.plot(x,regressor2.predict(x),color='green') 76 | plt.title('Level vs Salary (train data) using rbf kernel') 77 | plt.xlabel('Level') 78 | plt.ylabel('Salary') 79 | plt.legend() 80 | plt.grid() 81 | plt.show() 82 | 83 | 84 | -------------------------------------------------------------------------------- /Part 2 - Regression/Section 6 - Decision Tree Regression/Position_Salaries.csv: -------------------------------------------------------------------------------- 1 | Position,Level,Salary 2 | Business Analyst,1,45000 3 | Junior Consultant,2,50000 4 | Senior Consultant,3,60000 5 | Manager,4,80000 6 | Country Manager,5,110000 7 | Region Manager,6,150000 8 | Partner,7,200000 9 | Senior Partner,8,300000 10 | C-level,9,500000 11 | CEO,10,1000000 -------------------------------------------------------------------------------- /Part 2 - Regression/Section 6 - Decision Tree Regression/dtr.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Wed Aug 15 04:23:21 2018 4 | 5 | @author: Mohammad Doosti Lakhani 6 | """ 7 | 8 | # imporing libraries 9 | import numpy as np 10 | import matplotlib.pyplot as plt 11 | import pandas as pd 12 | 13 | # importing dataset 14 | dataset = pd.read_csv('Position_Salaries.csv') 15 | x = dataset.iloc[:,1].values 16 | y = dataset.iloc[:,-1].values 17 | 18 | # reshape x and y because they just have one feature 19 | x = x.reshape(-1,1) 20 | y = y.reshape(-1,1) 21 | 22 | 23 | # fitting Decision Tree Regression model 24 | from sklearn.tree import DecisionTreeRegressor 25 | regressor = DecisionTreeRegressor(random_state = 0) 26 | regressor= regressor.fit(x,y) 27 | 28 | # making prediction 29 | y_pred = regressor.predict(6.5) 30 | 31 | #visualize the fitted model and our data 32 | # we can see average values calculated by Decision Tree Regressor. 33 | # Each step is one section with its information entropy. 34 | x_grid = np.arange(min(x),max(x),0.01) 35 | x_grid = x_grid.reshape(-1,1) 36 | plt.scatter(x,y, color ='red', alpha=0.6) 37 | plt.scatter(6.5,y_pred,color = 'blue', marker='D') 38 | plt.plot(x_grid,regressor.predict(x_grid),color='green') 39 | plt.title('Level vs Salary (train data) using Decision Tree Regression') 40 | plt.xlabel('Level') 41 | plt.ylabel('Salary') 42 | plt.legend() 43 | plt.grid() 44 | plt.show() 45 | 46 | 47 | ##################################### WITH FEARURE SCALING ######################################## 48 | 49 | from sklearn.preprocessing import StandardScaler 50 | standardscaler_x = StandardScaler() 51 | x = standardscaler_x.fit_transform(x) 52 | standardscaler_y = StandardScaler() 53 | y = standardscaler_y.fit_transform(y) 54 | 55 | # fitting Decision Tree Regression model 56 | regressor2 = DecisionTreeRegressor(random_state = 0) 57 | regressor2 = regressor2.fit(x,y) 58 | 59 | # scaling test data for prediction 60 | test = np.zeros(1) # we are testing just one value 61 | test[0]= 6.5 62 | test = test.reshape(1,1) # reshape to 2D array! 63 | test = standardscaler_x.transform(test) # rescaling test data like train data 64 | 65 | # making prediction 66 | y_pred2 = regressor2.predict(test) 67 | 68 | # inver scalingy y to real value 69 | y_predict = standardscaler_y.inverse_transform(y_pred2) 70 | 71 | # same value. 72 | # but it is necesaary to scale your data. In this example dataset, it was ok! 73 | 74 | -------------------------------------------------------------------------------- /Part 2 - Regression/Section 7 - Random Forest Regression/Position_Salaries.csv: -------------------------------------------------------------------------------- 1 | Position,Level,Salary 2 | Business Analyst,1,45000 3 | Junior Consultant,2,50000 4 | Senior Consultant,3,60000 5 | Manager,4,80000 6 | Country Manager,5,110000 7 | Region Manager,6,150000 8 | Partner,7,200000 9 | Senior Partner,8,300000 10 | C-level,9,500000 11 | CEO,10,1000000 -------------------------------------------------------------------------------- /Part 2 - Regression/Section 7 - Random Forest Regression/rfr.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Thu Aug 16 00:17:26 2018 4 | 5 | @author: Mohammad Doosti Lakhani 6 | """ 7 | 8 | # A nonlinear and non continuos regression problem. 9 | 10 | # imporing libraries 11 | import numpy as np 12 | import matplotlib.pyplot as plt 13 | import pandas as pd 14 | 15 | # importing dataset 16 | dataset = pd.read_csv('Position_Salaries.csv') 17 | x = dataset.iloc[:,1].values 18 | y = dataset.iloc[:,-1].values 19 | 20 | # reshape x and y because they just have one feature 21 | x = x.reshape(-1,1) 22 | # array y should stay 1D. But x should reshape to 2D 23 | 24 | # fitting Random Forest Regression model 25 | from sklearn.ensemble import RandomForestRegressor 26 | regressor = RandomForestRegressor(n_estimators = 500, random_state = 0) 27 | regressor= regressor.fit(x,y) 28 | regressor2 = RandomForestRegressor(n_estimators = 10, random_state = 0) # trey to show diff in number of trees 29 | regressor2 = regressor2.fit(x,y) 30 | 31 | 32 | # making prediction 33 | y_pred = regressor.predict(6.5) 34 | 35 | # visualize the fitted model and our data 36 | # we can see average values calculated by Random Forest Regression. 37 | # Each step is one section with its information entropy. 38 | x_grid = np.arange(min(x),max(x),0.01) 39 | x_grid = x_grid.reshape(-1,1) 40 | plt.scatter(x,y, color ='red', alpha=0.6) 41 | plt.scatter(6.5,y_pred,color = 'blue', marker='D',alpha = 0.5) 42 | plt.plot(x_grid,regressor.predict(x_grid),color='green' , alpha= 0.7) 43 | plt.plot(x_grid,regressor2.predict(x_grid),color='purple', alpha = 0.6) 44 | plt.title('Level vs Salary (train data) using Decision Tree Regression') 45 | plt.xlabel('Level') 46 | plt.ylabel('Salary') 47 | plt.legend() 48 | plt.grid() 49 | plt.show() 50 | -------------------------------------------------------------------------------- /Part 3 - Classification/Section 10 - Support Vector Machine (SVM)/Social_Network_Ads.csv: -------------------------------------------------------------------------------- 1 | User ID,Gender,Age,EstimatedSalary,Purchased 2 | 15624510,Male,19,19000,0 3 | 15810944,Male,35,20000,0 4 | 15668575,Female,26,43000,0 5 | 15603246,Female,27,57000,0 6 | 15804002,Male,19,76000,0 7 | 15728773,Male,27,58000,0 8 | 15598044,Female,27,84000,0 9 | 15694829,Female,32,150000,1 10 | 15600575,Male,25,33000,0 11 | 15727311,Female,35,65000,0 12 | 15570769,Female,26,80000,0 13 | 15606274,Female,26,52000,0 14 | 15746139,Male,20,86000,0 15 | 15704987,Male,32,18000,0 16 | 15628972,Male,18,82000,0 17 | 15697686,Male,29,80000,0 18 | 15733883,Male,47,25000,1 19 | 15617482,Male,45,26000,1 20 | 15704583,Male,46,28000,1 21 | 15621083,Female,48,29000,1 22 | 15649487,Male,45,22000,1 23 | 15736760,Female,47,49000,1 24 | 15714658,Male,48,41000,1 25 | 15599081,Female,45,22000,1 26 | 15705113,Male,46,23000,1 27 | 15631159,Male,47,20000,1 28 | 15792818,Male,49,28000,1 29 | 15633531,Female,47,30000,1 30 | 15744529,Male,29,43000,0 31 | 15669656,Male,31,18000,0 32 | 15581198,Male,31,74000,0 33 | 15729054,Female,27,137000,1 34 | 15573452,Female,21,16000,0 35 | 15776733,Female,28,44000,0 36 | 15724858,Male,27,90000,0 37 | 15713144,Male,35,27000,0 38 | 15690188,Female,33,28000,0 39 | 15689425,Male,30,49000,0 40 | 15671766,Female,26,72000,0 41 | 15782806,Female,27,31000,0 42 | 15764419,Female,27,17000,0 43 | 15591915,Female,33,51000,0 44 | 15772798,Male,35,108000,0 45 | 15792008,Male,30,15000,0 46 | 15715541,Female,28,84000,0 47 | 15639277,Male,23,20000,0 48 | 15798850,Male,25,79000,0 49 | 15776348,Female,27,54000,0 50 | 15727696,Male,30,135000,1 51 | 15793813,Female,31,89000,0 52 | 15694395,Female,24,32000,0 53 | 15764195,Female,18,44000,0 54 | 15744919,Female,29,83000,0 55 | 15671655,Female,35,23000,0 56 | 15654901,Female,27,58000,0 57 | 15649136,Female,24,55000,0 58 | 15775562,Female,23,48000,0 59 | 15807481,Male,28,79000,0 60 | 15642885,Male,22,18000,0 61 | 15789109,Female,32,117000,0 62 | 15814004,Male,27,20000,0 63 | 15673619,Male,25,87000,0 64 | 15595135,Female,23,66000,0 65 | 15583681,Male,32,120000,1 66 | 15605000,Female,59,83000,0 67 | 15718071,Male,24,58000,0 68 | 15679760,Male,24,19000,0 69 | 15654574,Female,23,82000,0 70 | 15577178,Female,22,63000,0 71 | 15595324,Female,31,68000,0 72 | 15756932,Male,25,80000,0 73 | 15726358,Female,24,27000,0 74 | 15595228,Female,20,23000,0 75 | 15782530,Female,33,113000,0 76 | 15592877,Male,32,18000,0 77 | 15651983,Male,34,112000,1 78 | 15746737,Male,18,52000,0 79 | 15774179,Female,22,27000,0 80 | 15667265,Female,28,87000,0 81 | 15655123,Female,26,17000,0 82 | 15595917,Male,30,80000,0 83 | 15668385,Male,39,42000,0 84 | 15709476,Male,20,49000,0 85 | 15711218,Male,35,88000,0 86 | 15798659,Female,30,62000,0 87 | 15663939,Female,31,118000,1 88 | 15694946,Male,24,55000,0 89 | 15631912,Female,28,85000,0 90 | 15768816,Male,26,81000,0 91 | 15682268,Male,35,50000,0 92 | 15684801,Male,22,81000,0 93 | 15636428,Female,30,116000,0 94 | 15809823,Male,26,15000,0 95 | 15699284,Female,29,28000,0 96 | 15786993,Female,29,83000,0 97 | 15709441,Female,35,44000,0 98 | 15710257,Female,35,25000,0 99 | 15582492,Male,28,123000,1 100 | 15575694,Male,35,73000,0 101 | 15756820,Female,28,37000,0 102 | 15766289,Male,27,88000,0 103 | 15593014,Male,28,59000,0 104 | 15584545,Female,32,86000,0 105 | 15675949,Female,33,149000,1 106 | 15672091,Female,19,21000,0 107 | 15801658,Male,21,72000,0 108 | 15706185,Female,26,35000,0 109 | 15789863,Male,27,89000,0 110 | 15720943,Male,26,86000,0 111 | 15697997,Female,38,80000,0 112 | 15665416,Female,39,71000,0 113 | 15660200,Female,37,71000,0 114 | 15619653,Male,38,61000,0 115 | 15773447,Male,37,55000,0 116 | 15739160,Male,42,80000,0 117 | 15689237,Male,40,57000,0 118 | 15679297,Male,35,75000,0 119 | 15591433,Male,36,52000,0 120 | 15642725,Male,40,59000,0 121 | 15701962,Male,41,59000,0 122 | 15811613,Female,36,75000,0 123 | 15741049,Male,37,72000,0 124 | 15724423,Female,40,75000,0 125 | 15574305,Male,35,53000,0 126 | 15678168,Female,41,51000,0 127 | 15697020,Female,39,61000,0 128 | 15610801,Male,42,65000,0 129 | 15745232,Male,26,32000,0 130 | 15722758,Male,30,17000,0 131 | 15792102,Female,26,84000,0 132 | 15675185,Male,31,58000,0 133 | 15801247,Male,33,31000,0 134 | 15725660,Male,30,87000,0 135 | 15638963,Female,21,68000,0 136 | 15800061,Female,28,55000,0 137 | 15578006,Male,23,63000,0 138 | 15668504,Female,20,82000,0 139 | 15687491,Male,30,107000,1 140 | 15610403,Female,28,59000,0 141 | 15741094,Male,19,25000,0 142 | 15807909,Male,19,85000,0 143 | 15666141,Female,18,68000,0 144 | 15617134,Male,35,59000,0 145 | 15783029,Male,30,89000,0 146 | 15622833,Female,34,25000,0 147 | 15746422,Female,24,89000,0 148 | 15750839,Female,27,96000,1 149 | 15749130,Female,41,30000,0 150 | 15779862,Male,29,61000,0 151 | 15767871,Male,20,74000,0 152 | 15679651,Female,26,15000,0 153 | 15576219,Male,41,45000,0 154 | 15699247,Male,31,76000,0 155 | 15619087,Female,36,50000,0 156 | 15605327,Male,40,47000,0 157 | 15610140,Female,31,15000,0 158 | 15791174,Male,46,59000,0 159 | 15602373,Male,29,75000,0 160 | 15762605,Male,26,30000,0 161 | 15598840,Female,32,135000,1 162 | 15744279,Male,32,100000,1 163 | 15670619,Male,25,90000,0 164 | 15599533,Female,37,33000,0 165 | 15757837,Male,35,38000,0 166 | 15697574,Female,33,69000,0 167 | 15578738,Female,18,86000,0 168 | 15762228,Female,22,55000,0 169 | 15614827,Female,35,71000,0 170 | 15789815,Male,29,148000,1 171 | 15579781,Female,29,47000,0 172 | 15587013,Male,21,88000,0 173 | 15570932,Male,34,115000,0 174 | 15794661,Female,26,118000,0 175 | 15581654,Female,34,43000,0 176 | 15644296,Female,34,72000,0 177 | 15614420,Female,23,28000,0 178 | 15609653,Female,35,47000,0 179 | 15594577,Male,25,22000,0 180 | 15584114,Male,24,23000,0 181 | 15673367,Female,31,34000,0 182 | 15685576,Male,26,16000,0 183 | 15774727,Female,31,71000,0 184 | 15694288,Female,32,117000,1 185 | 15603319,Male,33,43000,0 186 | 15759066,Female,33,60000,0 187 | 15814816,Male,31,66000,0 188 | 15724402,Female,20,82000,0 189 | 15571059,Female,33,41000,0 190 | 15674206,Male,35,72000,0 191 | 15715160,Male,28,32000,0 192 | 15730448,Male,24,84000,0 193 | 15662067,Female,19,26000,0 194 | 15779581,Male,29,43000,0 195 | 15662901,Male,19,70000,0 196 | 15689751,Male,28,89000,0 197 | 15667742,Male,34,43000,0 198 | 15738448,Female,30,79000,0 199 | 15680243,Female,20,36000,0 200 | 15745083,Male,26,80000,0 201 | 15708228,Male,35,22000,0 202 | 15628523,Male,35,39000,0 203 | 15708196,Male,49,74000,0 204 | 15735549,Female,39,134000,1 205 | 15809347,Female,41,71000,0 206 | 15660866,Female,58,101000,1 207 | 15766609,Female,47,47000,0 208 | 15654230,Female,55,130000,1 209 | 15794566,Female,52,114000,0 210 | 15800890,Female,40,142000,1 211 | 15697424,Female,46,22000,0 212 | 15724536,Female,48,96000,1 213 | 15735878,Male,52,150000,1 214 | 15707596,Female,59,42000,0 215 | 15657163,Male,35,58000,0 216 | 15622478,Male,47,43000,0 217 | 15779529,Female,60,108000,1 218 | 15636023,Male,49,65000,0 219 | 15582066,Male,40,78000,0 220 | 15666675,Female,46,96000,0 221 | 15732987,Male,59,143000,1 222 | 15789432,Female,41,80000,0 223 | 15663161,Male,35,91000,1 224 | 15694879,Male,37,144000,1 225 | 15593715,Male,60,102000,1 226 | 15575002,Female,35,60000,0 227 | 15622171,Male,37,53000,0 228 | 15795224,Female,36,126000,1 229 | 15685346,Male,56,133000,1 230 | 15691808,Female,40,72000,0 231 | 15721007,Female,42,80000,1 232 | 15794253,Female,35,147000,1 233 | 15694453,Male,39,42000,0 234 | 15813113,Male,40,107000,1 235 | 15614187,Male,49,86000,1 236 | 15619407,Female,38,112000,0 237 | 15646227,Male,46,79000,1 238 | 15660541,Male,40,57000,0 239 | 15753874,Female,37,80000,0 240 | 15617877,Female,46,82000,0 241 | 15772073,Female,53,143000,1 242 | 15701537,Male,42,149000,1 243 | 15736228,Male,38,59000,0 244 | 15780572,Female,50,88000,1 245 | 15769596,Female,56,104000,1 246 | 15586996,Female,41,72000,0 247 | 15722061,Female,51,146000,1 248 | 15638003,Female,35,50000,0 249 | 15775590,Female,57,122000,1 250 | 15730688,Male,41,52000,0 251 | 15753102,Female,35,97000,1 252 | 15810075,Female,44,39000,0 253 | 15723373,Male,37,52000,0 254 | 15795298,Female,48,134000,1 255 | 15584320,Female,37,146000,1 256 | 15724161,Female,50,44000,0 257 | 15750056,Female,52,90000,1 258 | 15609637,Female,41,72000,0 259 | 15794493,Male,40,57000,0 260 | 15569641,Female,58,95000,1 261 | 15815236,Female,45,131000,1 262 | 15811177,Female,35,77000,0 263 | 15680587,Male,36,144000,1 264 | 15672821,Female,55,125000,1 265 | 15767681,Female,35,72000,0 266 | 15600379,Male,48,90000,1 267 | 15801336,Female,42,108000,1 268 | 15721592,Male,40,75000,0 269 | 15581282,Male,37,74000,0 270 | 15746203,Female,47,144000,1 271 | 15583137,Male,40,61000,0 272 | 15680752,Female,43,133000,0 273 | 15688172,Female,59,76000,1 274 | 15791373,Male,60,42000,1 275 | 15589449,Male,39,106000,1 276 | 15692819,Female,57,26000,1 277 | 15727467,Male,57,74000,1 278 | 15734312,Male,38,71000,0 279 | 15764604,Male,49,88000,1 280 | 15613014,Female,52,38000,1 281 | 15759684,Female,50,36000,1 282 | 15609669,Female,59,88000,1 283 | 15685536,Male,35,61000,0 284 | 15750447,Male,37,70000,1 285 | 15663249,Female,52,21000,1 286 | 15638646,Male,48,141000,0 287 | 15734161,Female,37,93000,1 288 | 15631070,Female,37,62000,0 289 | 15761950,Female,48,138000,1 290 | 15649668,Male,41,79000,0 291 | 15713912,Female,37,78000,1 292 | 15586757,Male,39,134000,1 293 | 15596522,Male,49,89000,1 294 | 15625395,Male,55,39000,1 295 | 15760570,Male,37,77000,0 296 | 15566689,Female,35,57000,0 297 | 15725794,Female,36,63000,0 298 | 15673539,Male,42,73000,1 299 | 15705298,Female,43,112000,1 300 | 15675791,Male,45,79000,0 301 | 15747043,Male,46,117000,1 302 | 15736397,Female,58,38000,1 303 | 15678201,Male,48,74000,1 304 | 15720745,Female,37,137000,1 305 | 15637593,Male,37,79000,1 306 | 15598070,Female,40,60000,0 307 | 15787550,Male,42,54000,0 308 | 15603942,Female,51,134000,0 309 | 15733973,Female,47,113000,1 310 | 15596761,Male,36,125000,1 311 | 15652400,Female,38,50000,0 312 | 15717893,Female,42,70000,0 313 | 15622585,Male,39,96000,1 314 | 15733964,Female,38,50000,0 315 | 15753861,Female,49,141000,1 316 | 15747097,Female,39,79000,0 317 | 15594762,Female,39,75000,1 318 | 15667417,Female,54,104000,1 319 | 15684861,Male,35,55000,0 320 | 15742204,Male,45,32000,1 321 | 15623502,Male,36,60000,0 322 | 15774872,Female,52,138000,1 323 | 15611191,Female,53,82000,1 324 | 15674331,Male,41,52000,0 325 | 15619465,Female,48,30000,1 326 | 15575247,Female,48,131000,1 327 | 15695679,Female,41,60000,0 328 | 15713463,Male,41,72000,0 329 | 15785170,Female,42,75000,0 330 | 15796351,Male,36,118000,1 331 | 15639576,Female,47,107000,1 332 | 15693264,Male,38,51000,0 333 | 15589715,Female,48,119000,1 334 | 15769902,Male,42,65000,0 335 | 15587177,Male,40,65000,0 336 | 15814553,Male,57,60000,1 337 | 15601550,Female,36,54000,0 338 | 15664907,Male,58,144000,1 339 | 15612465,Male,35,79000,0 340 | 15810800,Female,38,55000,0 341 | 15665760,Male,39,122000,1 342 | 15588080,Female,53,104000,1 343 | 15776844,Male,35,75000,0 344 | 15717560,Female,38,65000,0 345 | 15629739,Female,47,51000,1 346 | 15729908,Male,47,105000,1 347 | 15716781,Female,41,63000,0 348 | 15646936,Male,53,72000,1 349 | 15768151,Female,54,108000,1 350 | 15579212,Male,39,77000,0 351 | 15721835,Male,38,61000,0 352 | 15800515,Female,38,113000,1 353 | 15591279,Male,37,75000,0 354 | 15587419,Female,42,90000,1 355 | 15750335,Female,37,57000,0 356 | 15699619,Male,36,99000,1 357 | 15606472,Male,60,34000,1 358 | 15778368,Male,54,70000,1 359 | 15671387,Female,41,72000,0 360 | 15573926,Male,40,71000,1 361 | 15709183,Male,42,54000,0 362 | 15577514,Male,43,129000,1 363 | 15778830,Female,53,34000,1 364 | 15768072,Female,47,50000,1 365 | 15768293,Female,42,79000,0 366 | 15654456,Male,42,104000,1 367 | 15807525,Female,59,29000,1 368 | 15574372,Female,58,47000,1 369 | 15671249,Male,46,88000,1 370 | 15779744,Male,38,71000,0 371 | 15624755,Female,54,26000,1 372 | 15611430,Female,60,46000,1 373 | 15774744,Male,60,83000,1 374 | 15629885,Female,39,73000,0 375 | 15708791,Male,59,130000,1 376 | 15793890,Female,37,80000,0 377 | 15646091,Female,46,32000,1 378 | 15596984,Female,46,74000,0 379 | 15800215,Female,42,53000,0 380 | 15577806,Male,41,87000,1 381 | 15749381,Female,58,23000,1 382 | 15683758,Male,42,64000,0 383 | 15670615,Male,48,33000,1 384 | 15715622,Female,44,139000,1 385 | 15707634,Male,49,28000,1 386 | 15806901,Female,57,33000,1 387 | 15775335,Male,56,60000,1 388 | 15724150,Female,49,39000,1 389 | 15627220,Male,39,71000,0 390 | 15672330,Male,47,34000,1 391 | 15668521,Female,48,35000,1 392 | 15807837,Male,48,33000,1 393 | 15592570,Male,47,23000,1 394 | 15748589,Female,45,45000,1 395 | 15635893,Male,60,42000,1 396 | 15757632,Female,39,59000,0 397 | 15691863,Female,46,41000,1 398 | 15706071,Male,51,23000,1 399 | 15654296,Female,50,20000,1 400 | 15755018,Male,36,33000,0 401 | 15594041,Female,49,36000,1 -------------------------------------------------------------------------------- /Part 3 - Classification/Section 10 - Support Vector Machine (SVM)/svm.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Fri Aug 17 23:26:31 2018 4 | 5 | @author: Mohammad Doosti Lakhani 6 | """ 7 | 8 | # Imporing libraries 9 | import numpy as np 10 | import matplotlib.pyplot as plt 11 | import pandas as pd 12 | 13 | # Importing dataset 14 | dataset = pd.read_csv('Social_Network_Ads.csv') 15 | x = dataset.iloc[:,2:4].values 16 | y = dataset.iloc[:,4].values 17 | 18 | # Feature scaling 19 | from sklearn.preprocessing import StandardScaler 20 | standardscaler_x = StandardScaler() 21 | standardscaler_x = standardscaler_x.fit(x) 22 | x = standardscaler_x.transform(x) 23 | 24 | # Splitting dataset into Train set and Test set 25 | from sklearn.model_selection import train_test_split 26 | x_train,x_test,y_train,y_test = train_test_split(x,y, train_size = 0.75 , random_state=0) 27 | 28 | # Fitting the K-Nearest Neighbors model to the train set 29 | from sklearn.svm import SVC 30 | classifier = SVC(kernel='rbf', random_state=0) 31 | classifier = classifier.fit(x_train,y_train) 32 | 33 | """ Try to uncomment below code and see the visualization output (Try different kernels)""" 34 | 35 | """ 36 | 37 | classifier = SVC(kernel='poly', degree = 3, random_state=0) 38 | classifier = classifier.fit(x_train,y_train) 39 | 40 | classifier = SVC(kernel='linear', random_state=0) ## (Equals to SVR) 41 | classifier = classifier.fit(x_train,y_train) 42 | 43 | classifier = SVC(kernel='sigmoid', random_state=0) 44 | classifier = classifier.fit(x_train,y_train) 45 | 46 | """ 47 | 48 | 49 | # Make the prediction on train set 50 | y_train_pred = classifier.predict(x_train) 51 | 52 | # Make the prediction on train set 53 | y_test_pred = classifier.predict(x_test) 54 | 55 | # Acurracy on test and train set 56 | from sklearn.metrics import confusion_matrix 57 | cm_train = confusion_matrix(y_train,y_train_pred) 58 | cm_test = confusion_matrix(y_test,y_test_pred) 59 | 60 | 61 | import os 62 | import sys 63 | 64 | scriptpath = "../../Tools" # functions of acc and CAP 65 | # Add the directory containing your module to the Python path 66 | sys.path.append(os.path.abspath(scriptpath)) 67 | import accuracy as ac 68 | 69 | t_train,f_train,acc_train = ac.accuracy_on_cm(cm_train) 70 | print('Train status = #{} True, #{} False, %{} Accuracy'.format(t_train,f_train,acc_train*100)) 71 | 72 | t_test,f_test,acc_test = ac.accuracy_on_cm(cm_test) 73 | print('Test status = #{} True, #{} False, %{} Accuracy'.format(t_test,f_test,acc_test*100)) 74 | 75 | 76 | 77 | # Visualising the Training set results 78 | from matplotlib.colors import ListedColormap 79 | X_set, y_set = x_train, y_train 80 | X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01), 81 | np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01)) 82 | plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), 83 | alpha = 0.75, cmap = ListedColormap(('red', 'green'))) 84 | plt.xlim(X1.min(), X1.max()) 85 | plt.ylim(X2.min(), X2.max()) 86 | for i, j in enumerate(np.unique(y_set)): 87 | plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], 88 | c = ListedColormap(('red', 'green'))(i), label = j) 89 | plt.title('SVM - rbf kernel (Training set)') 90 | plt.xlabel('Age') 91 | plt.ylabel('Estimated Salary') 92 | plt.legend() 93 | plt.show() 94 | 95 | # Visualising the Test set results 96 | from matplotlib.colors import ListedColormap 97 | X_set, y_set = x_test, y_test 98 | X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01), 99 | np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01)) 100 | plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), 101 | alpha = 0.75, cmap = ListedColormap(('red', 'green'))) 102 | plt.xlim(X1.min(), X1.max()) 103 | plt.ylim(X2.min(), X2.max()) 104 | for i, j in enumerate(np.unique(y_set)): 105 | plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], 106 | c = ListedColormap(('red', 'green'))(i), label = j) 107 | plt.title('SVM - rbf kernel (Test set)') 108 | plt.xlabel('Age') 109 | plt.ylabel('Estimated Salary') 110 | plt.legend() 111 | plt.show() 112 | -------------------------------------------------------------------------------- /Part 3 - Classification/Section 11 - Naive Bayes/Social_Network_Ads.csv: -------------------------------------------------------------------------------- 1 | User ID,Gender,Age,EstimatedSalary,Purchased 2 | 15624510,Male,19,19000,0 3 | 15810944,Male,35,20000,0 4 | 15668575,Female,26,43000,0 5 | 15603246,Female,27,57000,0 6 | 15804002,Male,19,76000,0 7 | 15728773,Male,27,58000,0 8 | 15598044,Female,27,84000,0 9 | 15694829,Female,32,150000,1 10 | 15600575,Male,25,33000,0 11 | 15727311,Female,35,65000,0 12 | 15570769,Female,26,80000,0 13 | 15606274,Female,26,52000,0 14 | 15746139,Male,20,86000,0 15 | 15704987,Male,32,18000,0 16 | 15628972,Male,18,82000,0 17 | 15697686,Male,29,80000,0 18 | 15733883,Male,47,25000,1 19 | 15617482,Male,45,26000,1 20 | 15704583,Male,46,28000,1 21 | 15621083,Female,48,29000,1 22 | 15649487,Male,45,22000,1 23 | 15736760,Female,47,49000,1 24 | 15714658,Male,48,41000,1 25 | 15599081,Female,45,22000,1 26 | 15705113,Male,46,23000,1 27 | 15631159,Male,47,20000,1 28 | 15792818,Male,49,28000,1 29 | 15633531,Female,47,30000,1 30 | 15744529,Male,29,43000,0 31 | 15669656,Male,31,18000,0 32 | 15581198,Male,31,74000,0 33 | 15729054,Female,27,137000,1 34 | 15573452,Female,21,16000,0 35 | 15776733,Female,28,44000,0 36 | 15724858,Male,27,90000,0 37 | 15713144,Male,35,27000,0 38 | 15690188,Female,33,28000,0 39 | 15689425,Male,30,49000,0 40 | 15671766,Female,26,72000,0 41 | 15782806,Female,27,31000,0 42 | 15764419,Female,27,17000,0 43 | 15591915,Female,33,51000,0 44 | 15772798,Male,35,108000,0 45 | 15792008,Male,30,15000,0 46 | 15715541,Female,28,84000,0 47 | 15639277,Male,23,20000,0 48 | 15798850,Male,25,79000,0 49 | 15776348,Female,27,54000,0 50 | 15727696,Male,30,135000,1 51 | 15793813,Female,31,89000,0 52 | 15694395,Female,24,32000,0 53 | 15764195,Female,18,44000,0 54 | 15744919,Female,29,83000,0 55 | 15671655,Female,35,23000,0 56 | 15654901,Female,27,58000,0 57 | 15649136,Female,24,55000,0 58 | 15775562,Female,23,48000,0 59 | 15807481,Male,28,79000,0 60 | 15642885,Male,22,18000,0 61 | 15789109,Female,32,117000,0 62 | 15814004,Male,27,20000,0 63 | 15673619,Male,25,87000,0 64 | 15595135,Female,23,66000,0 65 | 15583681,Male,32,120000,1 66 | 15605000,Female,59,83000,0 67 | 15718071,Male,24,58000,0 68 | 15679760,Male,24,19000,0 69 | 15654574,Female,23,82000,0 70 | 15577178,Female,22,63000,0 71 | 15595324,Female,31,68000,0 72 | 15756932,Male,25,80000,0 73 | 15726358,Female,24,27000,0 74 | 15595228,Female,20,23000,0 75 | 15782530,Female,33,113000,0 76 | 15592877,Male,32,18000,0 77 | 15651983,Male,34,112000,1 78 | 15746737,Male,18,52000,0 79 | 15774179,Female,22,27000,0 80 | 15667265,Female,28,87000,0 81 | 15655123,Female,26,17000,0 82 | 15595917,Male,30,80000,0 83 | 15668385,Male,39,42000,0 84 | 15709476,Male,20,49000,0 85 | 15711218,Male,35,88000,0 86 | 15798659,Female,30,62000,0 87 | 15663939,Female,31,118000,1 88 | 15694946,Male,24,55000,0 89 | 15631912,Female,28,85000,0 90 | 15768816,Male,26,81000,0 91 | 15682268,Male,35,50000,0 92 | 15684801,Male,22,81000,0 93 | 15636428,Female,30,116000,0 94 | 15809823,Male,26,15000,0 95 | 15699284,Female,29,28000,0 96 | 15786993,Female,29,83000,0 97 | 15709441,Female,35,44000,0 98 | 15710257,Female,35,25000,0 99 | 15582492,Male,28,123000,1 100 | 15575694,Male,35,73000,0 101 | 15756820,Female,28,37000,0 102 | 15766289,Male,27,88000,0 103 | 15593014,Male,28,59000,0 104 | 15584545,Female,32,86000,0 105 | 15675949,Female,33,149000,1 106 | 15672091,Female,19,21000,0 107 | 15801658,Male,21,72000,0 108 | 15706185,Female,26,35000,0 109 | 15789863,Male,27,89000,0 110 | 15720943,Male,26,86000,0 111 | 15697997,Female,38,80000,0 112 | 15665416,Female,39,71000,0 113 | 15660200,Female,37,71000,0 114 | 15619653,Male,38,61000,0 115 | 15773447,Male,37,55000,0 116 | 15739160,Male,42,80000,0 117 | 15689237,Male,40,57000,0 118 | 15679297,Male,35,75000,0 119 | 15591433,Male,36,52000,0 120 | 15642725,Male,40,59000,0 121 | 15701962,Male,41,59000,0 122 | 15811613,Female,36,75000,0 123 | 15741049,Male,37,72000,0 124 | 15724423,Female,40,75000,0 125 | 15574305,Male,35,53000,0 126 | 15678168,Female,41,51000,0 127 | 15697020,Female,39,61000,0 128 | 15610801,Male,42,65000,0 129 | 15745232,Male,26,32000,0 130 | 15722758,Male,30,17000,0 131 | 15792102,Female,26,84000,0 132 | 15675185,Male,31,58000,0 133 | 15801247,Male,33,31000,0 134 | 15725660,Male,30,87000,0 135 | 15638963,Female,21,68000,0 136 | 15800061,Female,28,55000,0 137 | 15578006,Male,23,63000,0 138 | 15668504,Female,20,82000,0 139 | 15687491,Male,30,107000,1 140 | 15610403,Female,28,59000,0 141 | 15741094,Male,19,25000,0 142 | 15807909,Male,19,85000,0 143 | 15666141,Female,18,68000,0 144 | 15617134,Male,35,59000,0 145 | 15783029,Male,30,89000,0 146 | 15622833,Female,34,25000,0 147 | 15746422,Female,24,89000,0 148 | 15750839,Female,27,96000,1 149 | 15749130,Female,41,30000,0 150 | 15779862,Male,29,61000,0 151 | 15767871,Male,20,74000,0 152 | 15679651,Female,26,15000,0 153 | 15576219,Male,41,45000,0 154 | 15699247,Male,31,76000,0 155 | 15619087,Female,36,50000,0 156 | 15605327,Male,40,47000,0 157 | 15610140,Female,31,15000,0 158 | 15791174,Male,46,59000,0 159 | 15602373,Male,29,75000,0 160 | 15762605,Male,26,30000,0 161 | 15598840,Female,32,135000,1 162 | 15744279,Male,32,100000,1 163 | 15670619,Male,25,90000,0 164 | 15599533,Female,37,33000,0 165 | 15757837,Male,35,38000,0 166 | 15697574,Female,33,69000,0 167 | 15578738,Female,18,86000,0 168 | 15762228,Female,22,55000,0 169 | 15614827,Female,35,71000,0 170 | 15789815,Male,29,148000,1 171 | 15579781,Female,29,47000,0 172 | 15587013,Male,21,88000,0 173 | 15570932,Male,34,115000,0 174 | 15794661,Female,26,118000,0 175 | 15581654,Female,34,43000,0 176 | 15644296,Female,34,72000,0 177 | 15614420,Female,23,28000,0 178 | 15609653,Female,35,47000,0 179 | 15594577,Male,25,22000,0 180 | 15584114,Male,24,23000,0 181 | 15673367,Female,31,34000,0 182 | 15685576,Male,26,16000,0 183 | 15774727,Female,31,71000,0 184 | 15694288,Female,32,117000,1 185 | 15603319,Male,33,43000,0 186 | 15759066,Female,33,60000,0 187 | 15814816,Male,31,66000,0 188 | 15724402,Female,20,82000,0 189 | 15571059,Female,33,41000,0 190 | 15674206,Male,35,72000,0 191 | 15715160,Male,28,32000,0 192 | 15730448,Male,24,84000,0 193 | 15662067,Female,19,26000,0 194 | 15779581,Male,29,43000,0 195 | 15662901,Male,19,70000,0 196 | 15689751,Male,28,89000,0 197 | 15667742,Male,34,43000,0 198 | 15738448,Female,30,79000,0 199 | 15680243,Female,20,36000,0 200 | 15745083,Male,26,80000,0 201 | 15708228,Male,35,22000,0 202 | 15628523,Male,35,39000,0 203 | 15708196,Male,49,74000,0 204 | 15735549,Female,39,134000,1 205 | 15809347,Female,41,71000,0 206 | 15660866,Female,58,101000,1 207 | 15766609,Female,47,47000,0 208 | 15654230,Female,55,130000,1 209 | 15794566,Female,52,114000,0 210 | 15800890,Female,40,142000,1 211 | 15697424,Female,46,22000,0 212 | 15724536,Female,48,96000,1 213 | 15735878,Male,52,150000,1 214 | 15707596,Female,59,42000,0 215 | 15657163,Male,35,58000,0 216 | 15622478,Male,47,43000,0 217 | 15779529,Female,60,108000,1 218 | 15636023,Male,49,65000,0 219 | 15582066,Male,40,78000,0 220 | 15666675,Female,46,96000,0 221 | 15732987,Male,59,143000,1 222 | 15789432,Female,41,80000,0 223 | 15663161,Male,35,91000,1 224 | 15694879,Male,37,144000,1 225 | 15593715,Male,60,102000,1 226 | 15575002,Female,35,60000,0 227 | 15622171,Male,37,53000,0 228 | 15795224,Female,36,126000,1 229 | 15685346,Male,56,133000,1 230 | 15691808,Female,40,72000,0 231 | 15721007,Female,42,80000,1 232 | 15794253,Female,35,147000,1 233 | 15694453,Male,39,42000,0 234 | 15813113,Male,40,107000,1 235 | 15614187,Male,49,86000,1 236 | 15619407,Female,38,112000,0 237 | 15646227,Male,46,79000,1 238 | 15660541,Male,40,57000,0 239 | 15753874,Female,37,80000,0 240 | 15617877,Female,46,82000,0 241 | 15772073,Female,53,143000,1 242 | 15701537,Male,42,149000,1 243 | 15736228,Male,38,59000,0 244 | 15780572,Female,50,88000,1 245 | 15769596,Female,56,104000,1 246 | 15586996,Female,41,72000,0 247 | 15722061,Female,51,146000,1 248 | 15638003,Female,35,50000,0 249 | 15775590,Female,57,122000,1 250 | 15730688,Male,41,52000,0 251 | 15753102,Female,35,97000,1 252 | 15810075,Female,44,39000,0 253 | 15723373,Male,37,52000,0 254 | 15795298,Female,48,134000,1 255 | 15584320,Female,37,146000,1 256 | 15724161,Female,50,44000,0 257 | 15750056,Female,52,90000,1 258 | 15609637,Female,41,72000,0 259 | 15794493,Male,40,57000,0 260 | 15569641,Female,58,95000,1 261 | 15815236,Female,45,131000,1 262 | 15811177,Female,35,77000,0 263 | 15680587,Male,36,144000,1 264 | 15672821,Female,55,125000,1 265 | 15767681,Female,35,72000,0 266 | 15600379,Male,48,90000,1 267 | 15801336,Female,42,108000,1 268 | 15721592,Male,40,75000,0 269 | 15581282,Male,37,74000,0 270 | 15746203,Female,47,144000,1 271 | 15583137,Male,40,61000,0 272 | 15680752,Female,43,133000,0 273 | 15688172,Female,59,76000,1 274 | 15791373,Male,60,42000,1 275 | 15589449,Male,39,106000,1 276 | 15692819,Female,57,26000,1 277 | 15727467,Male,57,74000,1 278 | 15734312,Male,38,71000,0 279 | 15764604,Male,49,88000,1 280 | 15613014,Female,52,38000,1 281 | 15759684,Female,50,36000,1 282 | 15609669,Female,59,88000,1 283 | 15685536,Male,35,61000,0 284 | 15750447,Male,37,70000,1 285 | 15663249,Female,52,21000,1 286 | 15638646,Male,48,141000,0 287 | 15734161,Female,37,93000,1 288 | 15631070,Female,37,62000,0 289 | 15761950,Female,48,138000,1 290 | 15649668,Male,41,79000,0 291 | 15713912,Female,37,78000,1 292 | 15586757,Male,39,134000,1 293 | 15596522,Male,49,89000,1 294 | 15625395,Male,55,39000,1 295 | 15760570,Male,37,77000,0 296 | 15566689,Female,35,57000,0 297 | 15725794,Female,36,63000,0 298 | 15673539,Male,42,73000,1 299 | 15705298,Female,43,112000,1 300 | 15675791,Male,45,79000,0 301 | 15747043,Male,46,117000,1 302 | 15736397,Female,58,38000,1 303 | 15678201,Male,48,74000,1 304 | 15720745,Female,37,137000,1 305 | 15637593,Male,37,79000,1 306 | 15598070,Female,40,60000,0 307 | 15787550,Male,42,54000,0 308 | 15603942,Female,51,134000,0 309 | 15733973,Female,47,113000,1 310 | 15596761,Male,36,125000,1 311 | 15652400,Female,38,50000,0 312 | 15717893,Female,42,70000,0 313 | 15622585,Male,39,96000,1 314 | 15733964,Female,38,50000,0 315 | 15753861,Female,49,141000,1 316 | 15747097,Female,39,79000,0 317 | 15594762,Female,39,75000,1 318 | 15667417,Female,54,104000,1 319 | 15684861,Male,35,55000,0 320 | 15742204,Male,45,32000,1 321 | 15623502,Male,36,60000,0 322 | 15774872,Female,52,138000,1 323 | 15611191,Female,53,82000,1 324 | 15674331,Male,41,52000,0 325 | 15619465,Female,48,30000,1 326 | 15575247,Female,48,131000,1 327 | 15695679,Female,41,60000,0 328 | 15713463,Male,41,72000,0 329 | 15785170,Female,42,75000,0 330 | 15796351,Male,36,118000,1 331 | 15639576,Female,47,107000,1 332 | 15693264,Male,38,51000,0 333 | 15589715,Female,48,119000,1 334 | 15769902,Male,42,65000,0 335 | 15587177,Male,40,65000,0 336 | 15814553,Male,57,60000,1 337 | 15601550,Female,36,54000,0 338 | 15664907,Male,58,144000,1 339 | 15612465,Male,35,79000,0 340 | 15810800,Female,38,55000,0 341 | 15665760,Male,39,122000,1 342 | 15588080,Female,53,104000,1 343 | 15776844,Male,35,75000,0 344 | 15717560,Female,38,65000,0 345 | 15629739,Female,47,51000,1 346 | 15729908,Male,47,105000,1 347 | 15716781,Female,41,63000,0 348 | 15646936,Male,53,72000,1 349 | 15768151,Female,54,108000,1 350 | 15579212,Male,39,77000,0 351 | 15721835,Male,38,61000,0 352 | 15800515,Female,38,113000,1 353 | 15591279,Male,37,75000,0 354 | 15587419,Female,42,90000,1 355 | 15750335,Female,37,57000,0 356 | 15699619,Male,36,99000,1 357 | 15606472,Male,60,34000,1 358 | 15778368,Male,54,70000,1 359 | 15671387,Female,41,72000,0 360 | 15573926,Male,40,71000,1 361 | 15709183,Male,42,54000,0 362 | 15577514,Male,43,129000,1 363 | 15778830,Female,53,34000,1 364 | 15768072,Female,47,50000,1 365 | 15768293,Female,42,79000,0 366 | 15654456,Male,42,104000,1 367 | 15807525,Female,59,29000,1 368 | 15574372,Female,58,47000,1 369 | 15671249,Male,46,88000,1 370 | 15779744,Male,38,71000,0 371 | 15624755,Female,54,26000,1 372 | 15611430,Female,60,46000,1 373 | 15774744,Male,60,83000,1 374 | 15629885,Female,39,73000,0 375 | 15708791,Male,59,130000,1 376 | 15793890,Female,37,80000,0 377 | 15646091,Female,46,32000,1 378 | 15596984,Female,46,74000,0 379 | 15800215,Female,42,53000,0 380 | 15577806,Male,41,87000,1 381 | 15749381,Female,58,23000,1 382 | 15683758,Male,42,64000,0 383 | 15670615,Male,48,33000,1 384 | 15715622,Female,44,139000,1 385 | 15707634,Male,49,28000,1 386 | 15806901,Female,57,33000,1 387 | 15775335,Male,56,60000,1 388 | 15724150,Female,49,39000,1 389 | 15627220,Male,39,71000,0 390 | 15672330,Male,47,34000,1 391 | 15668521,Female,48,35000,1 392 | 15807837,Male,48,33000,1 393 | 15592570,Male,47,23000,1 394 | 15748589,Female,45,45000,1 395 | 15635893,Male,60,42000,1 396 | 15757632,Female,39,59000,0 397 | 15691863,Female,46,41000,1 398 | 15706071,Male,51,23000,1 399 | 15654296,Female,50,20000,1 400 | 15755018,Male,36,33000,0 401 | 15594041,Female,49,36000,1 -------------------------------------------------------------------------------- /Part 3 - Classification/Section 11 - Naive Bayes/nb.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sat Sep 8 20:04:31 2018 4 | 5 | @author: Mohammad Doosti Lakhani 6 | """ 7 | 8 | 9 | # Imporing libraries 10 | import numpy as np 11 | import matplotlib.pyplot as plt 12 | import pandas as pd 13 | 14 | # Importing dataset 15 | dataset = pd.read_csv('Social_Network_Ads.csv') 16 | x = dataset.iloc[:,2:4].values 17 | y = dataset.iloc[:,4].values 18 | 19 | # Feature scaling 20 | from sklearn.preprocessing import StandardScaler 21 | standardscaler_x = StandardScaler() 22 | standardscaler_x = standardscaler_x.fit(x) 23 | x = standardscaler_x.transform(x) 24 | 25 | # Splitting dataset into Train set and Test set 26 | from sklearn.model_selection import train_test_split 27 | x_train,x_test,y_train,y_test = train_test_split(x,y, train_size = 0.75 , random_state=0) 28 | 29 | # Fitting the Naive Bayes (Gausiian form) model to the train set 30 | from sklearn.naive_bayes import GaussianNB 31 | classifier = GaussianNB() 32 | classifier = classifier.fit(x_train,y_train) 33 | 34 | """ Try to uncomment below code to try different likelihood of features algorithms 35 | from sklearn.naive_bayes import BernoulliNB 36 | classifier = BernoulliNB() 37 | classifier = classifier.fit(x_train,y_train) 38 | """ 39 | 40 | # Make the prediction on train set 41 | y_train_pred = classifier.predict(x_train) 42 | 43 | # Make the prediction on train set 44 | y_test_pred = classifier.predict(x_test) 45 | 46 | # Acurracy on test and train set 47 | from sklearn.metrics import confusion_matrix 48 | cm_train = confusion_matrix(y_train,y_train_pred) 49 | cm_test = confusion_matrix(y_test,y_test_pred) 50 | 51 | 52 | import os 53 | import sys 54 | 55 | scriptpath = "../../Tools" # functions of acc and CAP 56 | # Add the directory containing your module to the Python path 57 | sys.path.append(os.path.abspath(scriptpath)) 58 | import accuracy as ac 59 | 60 | t_train,f_train,acc_train = ac.accuracy_on_cm(cm_train) 61 | print('Train status = #{} True, #{} False, %{} Accuracy'.format(t_train,f_train,acc_train*100)) 62 | 63 | t_test,f_test,acc_test = ac.accuracy_on_cm(cm_test) 64 | print('Test status = #{} True, #{} False, %{} Accuracy'.format(t_test,f_test,acc_test*100)) 65 | 66 | 67 | # Visualising the Training set results 68 | from matplotlib.colors import ListedColormap 69 | X_set, y_set = x_train, y_train 70 | X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01), 71 | np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01)) 72 | plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), 73 | alpha = 0.75, cmap = ListedColormap(('red', 'green'))) 74 | plt.xlim(X1.min(), X1.max()) 75 | plt.ylim(X2.min(), X2.max()) 76 | for i, j in enumerate(np.unique(y_set)): 77 | plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], 78 | c = ListedColormap(('red', 'green'))(i), label = j) 79 | plt.title('Naive Bayes - Gaussian (Training set)') 80 | plt.xlabel('Age') 81 | plt.ylabel('Estimated Salary') 82 | plt.legend() 83 | plt.show() 84 | 85 | # Visualising the Test set results 86 | from matplotlib.colors import ListedColormap 87 | X_set, y_set = x_test, y_test 88 | X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01), 89 | np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01)) 90 | plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), 91 | alpha = 0.75, cmap = ListedColormap(('red', 'green'))) 92 | plt.xlim(X1.min(), X1.max()) 93 | plt.ylim(X2.min(), X2.max()) 94 | for i, j in enumerate(np.unique(y_set)): 95 | plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], 96 | c = ListedColormap(('red', 'green'))(i), label = j) 97 | plt.title('Naive Bayes - Gaussian (Test set)') 98 | plt.xlabel('Age') 99 | plt.ylabel('Estimated Salary') 100 | plt.legend() 101 | plt.show() -------------------------------------------------------------------------------- /Part 3 - Classification/Section 12 - Decision Tree Classification/Social_Network_Ads.csv: -------------------------------------------------------------------------------- 1 | User ID,Gender,Age,EstimatedSalary,Purchased 2 | 15624510,Male,19,19000,0 3 | 15810944,Male,35,20000,0 4 | 15668575,Female,26,43000,0 5 | 15603246,Female,27,57000,0 6 | 15804002,Male,19,76000,0 7 | 15728773,Male,27,58000,0 8 | 15598044,Female,27,84000,0 9 | 15694829,Female,32,150000,1 10 | 15600575,Male,25,33000,0 11 | 15727311,Female,35,65000,0 12 | 15570769,Female,26,80000,0 13 | 15606274,Female,26,52000,0 14 | 15746139,Male,20,86000,0 15 | 15704987,Male,32,18000,0 16 | 15628972,Male,18,82000,0 17 | 15697686,Male,29,80000,0 18 | 15733883,Male,47,25000,1 19 | 15617482,Male,45,26000,1 20 | 15704583,Male,46,28000,1 21 | 15621083,Female,48,29000,1 22 | 15649487,Male,45,22000,1 23 | 15736760,Female,47,49000,1 24 | 15714658,Male,48,41000,1 25 | 15599081,Female,45,22000,1 26 | 15705113,Male,46,23000,1 27 | 15631159,Male,47,20000,1 28 | 15792818,Male,49,28000,1 29 | 15633531,Female,47,30000,1 30 | 15744529,Male,29,43000,0 31 | 15669656,Male,31,18000,0 32 | 15581198,Male,31,74000,0 33 | 15729054,Female,27,137000,1 34 | 15573452,Female,21,16000,0 35 | 15776733,Female,28,44000,0 36 | 15724858,Male,27,90000,0 37 | 15713144,Male,35,27000,0 38 | 15690188,Female,33,28000,0 39 | 15689425,Male,30,49000,0 40 | 15671766,Female,26,72000,0 41 | 15782806,Female,27,31000,0 42 | 15764419,Female,27,17000,0 43 | 15591915,Female,33,51000,0 44 | 15772798,Male,35,108000,0 45 | 15792008,Male,30,15000,0 46 | 15715541,Female,28,84000,0 47 | 15639277,Male,23,20000,0 48 | 15798850,Male,25,79000,0 49 | 15776348,Female,27,54000,0 50 | 15727696,Male,30,135000,1 51 | 15793813,Female,31,89000,0 52 | 15694395,Female,24,32000,0 53 | 15764195,Female,18,44000,0 54 | 15744919,Female,29,83000,0 55 | 15671655,Female,35,23000,0 56 | 15654901,Female,27,58000,0 57 | 15649136,Female,24,55000,0 58 | 15775562,Female,23,48000,0 59 | 15807481,Male,28,79000,0 60 | 15642885,Male,22,18000,0 61 | 15789109,Female,32,117000,0 62 | 15814004,Male,27,20000,0 63 | 15673619,Male,25,87000,0 64 | 15595135,Female,23,66000,0 65 | 15583681,Male,32,120000,1 66 | 15605000,Female,59,83000,0 67 | 15718071,Male,24,58000,0 68 | 15679760,Male,24,19000,0 69 | 15654574,Female,23,82000,0 70 | 15577178,Female,22,63000,0 71 | 15595324,Female,31,68000,0 72 | 15756932,Male,25,80000,0 73 | 15726358,Female,24,27000,0 74 | 15595228,Female,20,23000,0 75 | 15782530,Female,33,113000,0 76 | 15592877,Male,32,18000,0 77 | 15651983,Male,34,112000,1 78 | 15746737,Male,18,52000,0 79 | 15774179,Female,22,27000,0 80 | 15667265,Female,28,87000,0 81 | 15655123,Female,26,17000,0 82 | 15595917,Male,30,80000,0 83 | 15668385,Male,39,42000,0 84 | 15709476,Male,20,49000,0 85 | 15711218,Male,35,88000,0 86 | 15798659,Female,30,62000,0 87 | 15663939,Female,31,118000,1 88 | 15694946,Male,24,55000,0 89 | 15631912,Female,28,85000,0 90 | 15768816,Male,26,81000,0 91 | 15682268,Male,35,50000,0 92 | 15684801,Male,22,81000,0 93 | 15636428,Female,30,116000,0 94 | 15809823,Male,26,15000,0 95 | 15699284,Female,29,28000,0 96 | 15786993,Female,29,83000,0 97 | 15709441,Female,35,44000,0 98 | 15710257,Female,35,25000,0 99 | 15582492,Male,28,123000,1 100 | 15575694,Male,35,73000,0 101 | 15756820,Female,28,37000,0 102 | 15766289,Male,27,88000,0 103 | 15593014,Male,28,59000,0 104 | 15584545,Female,32,86000,0 105 | 15675949,Female,33,149000,1 106 | 15672091,Female,19,21000,0 107 | 15801658,Male,21,72000,0 108 | 15706185,Female,26,35000,0 109 | 15789863,Male,27,89000,0 110 | 15720943,Male,26,86000,0 111 | 15697997,Female,38,80000,0 112 | 15665416,Female,39,71000,0 113 | 15660200,Female,37,71000,0 114 | 15619653,Male,38,61000,0 115 | 15773447,Male,37,55000,0 116 | 15739160,Male,42,80000,0 117 | 15689237,Male,40,57000,0 118 | 15679297,Male,35,75000,0 119 | 15591433,Male,36,52000,0 120 | 15642725,Male,40,59000,0 121 | 15701962,Male,41,59000,0 122 | 15811613,Female,36,75000,0 123 | 15741049,Male,37,72000,0 124 | 15724423,Female,40,75000,0 125 | 15574305,Male,35,53000,0 126 | 15678168,Female,41,51000,0 127 | 15697020,Female,39,61000,0 128 | 15610801,Male,42,65000,0 129 | 15745232,Male,26,32000,0 130 | 15722758,Male,30,17000,0 131 | 15792102,Female,26,84000,0 132 | 15675185,Male,31,58000,0 133 | 15801247,Male,33,31000,0 134 | 15725660,Male,30,87000,0 135 | 15638963,Female,21,68000,0 136 | 15800061,Female,28,55000,0 137 | 15578006,Male,23,63000,0 138 | 15668504,Female,20,82000,0 139 | 15687491,Male,30,107000,1 140 | 15610403,Female,28,59000,0 141 | 15741094,Male,19,25000,0 142 | 15807909,Male,19,85000,0 143 | 15666141,Female,18,68000,0 144 | 15617134,Male,35,59000,0 145 | 15783029,Male,30,89000,0 146 | 15622833,Female,34,25000,0 147 | 15746422,Female,24,89000,0 148 | 15750839,Female,27,96000,1 149 | 15749130,Female,41,30000,0 150 | 15779862,Male,29,61000,0 151 | 15767871,Male,20,74000,0 152 | 15679651,Female,26,15000,0 153 | 15576219,Male,41,45000,0 154 | 15699247,Male,31,76000,0 155 | 15619087,Female,36,50000,0 156 | 15605327,Male,40,47000,0 157 | 15610140,Female,31,15000,0 158 | 15791174,Male,46,59000,0 159 | 15602373,Male,29,75000,0 160 | 15762605,Male,26,30000,0 161 | 15598840,Female,32,135000,1 162 | 15744279,Male,32,100000,1 163 | 15670619,Male,25,90000,0 164 | 15599533,Female,37,33000,0 165 | 15757837,Male,35,38000,0 166 | 15697574,Female,33,69000,0 167 | 15578738,Female,18,86000,0 168 | 15762228,Female,22,55000,0 169 | 15614827,Female,35,71000,0 170 | 15789815,Male,29,148000,1 171 | 15579781,Female,29,47000,0 172 | 15587013,Male,21,88000,0 173 | 15570932,Male,34,115000,0 174 | 15794661,Female,26,118000,0 175 | 15581654,Female,34,43000,0 176 | 15644296,Female,34,72000,0 177 | 15614420,Female,23,28000,0 178 | 15609653,Female,35,47000,0 179 | 15594577,Male,25,22000,0 180 | 15584114,Male,24,23000,0 181 | 15673367,Female,31,34000,0 182 | 15685576,Male,26,16000,0 183 | 15774727,Female,31,71000,0 184 | 15694288,Female,32,117000,1 185 | 15603319,Male,33,43000,0 186 | 15759066,Female,33,60000,0 187 | 15814816,Male,31,66000,0 188 | 15724402,Female,20,82000,0 189 | 15571059,Female,33,41000,0 190 | 15674206,Male,35,72000,0 191 | 15715160,Male,28,32000,0 192 | 15730448,Male,24,84000,0 193 | 15662067,Female,19,26000,0 194 | 15779581,Male,29,43000,0 195 | 15662901,Male,19,70000,0 196 | 15689751,Male,28,89000,0 197 | 15667742,Male,34,43000,0 198 | 15738448,Female,30,79000,0 199 | 15680243,Female,20,36000,0 200 | 15745083,Male,26,80000,0 201 | 15708228,Male,35,22000,0 202 | 15628523,Male,35,39000,0 203 | 15708196,Male,49,74000,0 204 | 15735549,Female,39,134000,1 205 | 15809347,Female,41,71000,0 206 | 15660866,Female,58,101000,1 207 | 15766609,Female,47,47000,0 208 | 15654230,Female,55,130000,1 209 | 15794566,Female,52,114000,0 210 | 15800890,Female,40,142000,1 211 | 15697424,Female,46,22000,0 212 | 15724536,Female,48,96000,1 213 | 15735878,Male,52,150000,1 214 | 15707596,Female,59,42000,0 215 | 15657163,Male,35,58000,0 216 | 15622478,Male,47,43000,0 217 | 15779529,Female,60,108000,1 218 | 15636023,Male,49,65000,0 219 | 15582066,Male,40,78000,0 220 | 15666675,Female,46,96000,0 221 | 15732987,Male,59,143000,1 222 | 15789432,Female,41,80000,0 223 | 15663161,Male,35,91000,1 224 | 15694879,Male,37,144000,1 225 | 15593715,Male,60,102000,1 226 | 15575002,Female,35,60000,0 227 | 15622171,Male,37,53000,0 228 | 15795224,Female,36,126000,1 229 | 15685346,Male,56,133000,1 230 | 15691808,Female,40,72000,0 231 | 15721007,Female,42,80000,1 232 | 15794253,Female,35,147000,1 233 | 15694453,Male,39,42000,0 234 | 15813113,Male,40,107000,1 235 | 15614187,Male,49,86000,1 236 | 15619407,Female,38,112000,0 237 | 15646227,Male,46,79000,1 238 | 15660541,Male,40,57000,0 239 | 15753874,Female,37,80000,0 240 | 15617877,Female,46,82000,0 241 | 15772073,Female,53,143000,1 242 | 15701537,Male,42,149000,1 243 | 15736228,Male,38,59000,0 244 | 15780572,Female,50,88000,1 245 | 15769596,Female,56,104000,1 246 | 15586996,Female,41,72000,0 247 | 15722061,Female,51,146000,1 248 | 15638003,Female,35,50000,0 249 | 15775590,Female,57,122000,1 250 | 15730688,Male,41,52000,0 251 | 15753102,Female,35,97000,1 252 | 15810075,Female,44,39000,0 253 | 15723373,Male,37,52000,0 254 | 15795298,Female,48,134000,1 255 | 15584320,Female,37,146000,1 256 | 15724161,Female,50,44000,0 257 | 15750056,Female,52,90000,1 258 | 15609637,Female,41,72000,0 259 | 15794493,Male,40,57000,0 260 | 15569641,Female,58,95000,1 261 | 15815236,Female,45,131000,1 262 | 15811177,Female,35,77000,0 263 | 15680587,Male,36,144000,1 264 | 15672821,Female,55,125000,1 265 | 15767681,Female,35,72000,0 266 | 15600379,Male,48,90000,1 267 | 15801336,Female,42,108000,1 268 | 15721592,Male,40,75000,0 269 | 15581282,Male,37,74000,0 270 | 15746203,Female,47,144000,1 271 | 15583137,Male,40,61000,0 272 | 15680752,Female,43,133000,0 273 | 15688172,Female,59,76000,1 274 | 15791373,Male,60,42000,1 275 | 15589449,Male,39,106000,1 276 | 15692819,Female,57,26000,1 277 | 15727467,Male,57,74000,1 278 | 15734312,Male,38,71000,0 279 | 15764604,Male,49,88000,1 280 | 15613014,Female,52,38000,1 281 | 15759684,Female,50,36000,1 282 | 15609669,Female,59,88000,1 283 | 15685536,Male,35,61000,0 284 | 15750447,Male,37,70000,1 285 | 15663249,Female,52,21000,1 286 | 15638646,Male,48,141000,0 287 | 15734161,Female,37,93000,1 288 | 15631070,Female,37,62000,0 289 | 15761950,Female,48,138000,1 290 | 15649668,Male,41,79000,0 291 | 15713912,Female,37,78000,1 292 | 15586757,Male,39,134000,1 293 | 15596522,Male,49,89000,1 294 | 15625395,Male,55,39000,1 295 | 15760570,Male,37,77000,0 296 | 15566689,Female,35,57000,0 297 | 15725794,Female,36,63000,0 298 | 15673539,Male,42,73000,1 299 | 15705298,Female,43,112000,1 300 | 15675791,Male,45,79000,0 301 | 15747043,Male,46,117000,1 302 | 15736397,Female,58,38000,1 303 | 15678201,Male,48,74000,1 304 | 15720745,Female,37,137000,1 305 | 15637593,Male,37,79000,1 306 | 15598070,Female,40,60000,0 307 | 15787550,Male,42,54000,0 308 | 15603942,Female,51,134000,0 309 | 15733973,Female,47,113000,1 310 | 15596761,Male,36,125000,1 311 | 15652400,Female,38,50000,0 312 | 15717893,Female,42,70000,0 313 | 15622585,Male,39,96000,1 314 | 15733964,Female,38,50000,0 315 | 15753861,Female,49,141000,1 316 | 15747097,Female,39,79000,0 317 | 15594762,Female,39,75000,1 318 | 15667417,Female,54,104000,1 319 | 15684861,Male,35,55000,0 320 | 15742204,Male,45,32000,1 321 | 15623502,Male,36,60000,0 322 | 15774872,Female,52,138000,1 323 | 15611191,Female,53,82000,1 324 | 15674331,Male,41,52000,0 325 | 15619465,Female,48,30000,1 326 | 15575247,Female,48,131000,1 327 | 15695679,Female,41,60000,0 328 | 15713463,Male,41,72000,0 329 | 15785170,Female,42,75000,0 330 | 15796351,Male,36,118000,1 331 | 15639576,Female,47,107000,1 332 | 15693264,Male,38,51000,0 333 | 15589715,Female,48,119000,1 334 | 15769902,Male,42,65000,0 335 | 15587177,Male,40,65000,0 336 | 15814553,Male,57,60000,1 337 | 15601550,Female,36,54000,0 338 | 15664907,Male,58,144000,1 339 | 15612465,Male,35,79000,0 340 | 15810800,Female,38,55000,0 341 | 15665760,Male,39,122000,1 342 | 15588080,Female,53,104000,1 343 | 15776844,Male,35,75000,0 344 | 15717560,Female,38,65000,0 345 | 15629739,Female,47,51000,1 346 | 15729908,Male,47,105000,1 347 | 15716781,Female,41,63000,0 348 | 15646936,Male,53,72000,1 349 | 15768151,Female,54,108000,1 350 | 15579212,Male,39,77000,0 351 | 15721835,Male,38,61000,0 352 | 15800515,Female,38,113000,1 353 | 15591279,Male,37,75000,0 354 | 15587419,Female,42,90000,1 355 | 15750335,Female,37,57000,0 356 | 15699619,Male,36,99000,1 357 | 15606472,Male,60,34000,1 358 | 15778368,Male,54,70000,1 359 | 15671387,Female,41,72000,0 360 | 15573926,Male,40,71000,1 361 | 15709183,Male,42,54000,0 362 | 15577514,Male,43,129000,1 363 | 15778830,Female,53,34000,1 364 | 15768072,Female,47,50000,1 365 | 15768293,Female,42,79000,0 366 | 15654456,Male,42,104000,1 367 | 15807525,Female,59,29000,1 368 | 15574372,Female,58,47000,1 369 | 15671249,Male,46,88000,1 370 | 15779744,Male,38,71000,0 371 | 15624755,Female,54,26000,1 372 | 15611430,Female,60,46000,1 373 | 15774744,Male,60,83000,1 374 | 15629885,Female,39,73000,0 375 | 15708791,Male,59,130000,1 376 | 15793890,Female,37,80000,0 377 | 15646091,Female,46,32000,1 378 | 15596984,Female,46,74000,0 379 | 15800215,Female,42,53000,0 380 | 15577806,Male,41,87000,1 381 | 15749381,Female,58,23000,1 382 | 15683758,Male,42,64000,0 383 | 15670615,Male,48,33000,1 384 | 15715622,Female,44,139000,1 385 | 15707634,Male,49,28000,1 386 | 15806901,Female,57,33000,1 387 | 15775335,Male,56,60000,1 388 | 15724150,Female,49,39000,1 389 | 15627220,Male,39,71000,0 390 | 15672330,Male,47,34000,1 391 | 15668521,Female,48,35000,1 392 | 15807837,Male,48,33000,1 393 | 15592570,Male,47,23000,1 394 | 15748589,Female,45,45000,1 395 | 15635893,Male,60,42000,1 396 | 15757632,Female,39,59000,0 397 | 15691863,Female,46,41000,1 398 | 15706071,Male,51,23000,1 399 | 15654296,Female,50,20000,1 400 | 15755018,Male,36,33000,0 401 | 15594041,Female,49,36000,1 -------------------------------------------------------------------------------- /Part 3 - Classification/Section 12 - Decision Tree Classification/dtc.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sat Sep 8 20:51:26 2018 4 | 5 | @author: Mohammad Doosti Lakhani 6 | """ 7 | 8 | 9 | # Imporing libraries 10 | import numpy as np 11 | import matplotlib.pyplot as plt 12 | import pandas as pd 13 | 14 | # Importing dataset 15 | dataset = pd.read_csv('Social_Network_Ads.csv') 16 | x = dataset.iloc[:,2:4].values 17 | y = dataset.iloc[:,4].values 18 | 19 | # Splitting dataset into Train set and Test set 20 | from sklearn.model_selection import train_test_split 21 | x_train,x_test,y_train,y_test = train_test_split(x,y, train_size = 0.75 , random_state=0) 22 | 23 | # Fitting the Naive Bayes (Gausiian form) model to the train set 24 | from sklearn.tree import DecisionTreeClassifier 25 | classifier = DecisionTreeClassifier(random_state=0, presort=True, criterion='entropy') 26 | classifier = classifier.fit(x_train,y_train) 27 | 28 | """ Try to uncomment below code to try different criterion algorithms 29 | from sklearn.tree import DecisionTreeClassifier 30 | classifier = DecisionTreeClassifier(random_state=0, presort=True, criterion='gini') # OVERFIT on training set 31 | classifier = classifier.fit(x_train,y_train) 32 | """ 33 | 34 | # Make the prediction on train set 35 | y_train_pred = classifier.predict(x_train) 36 | 37 | # Make the prediction on train set 38 | y_test_pred = classifier.predict(x_test) 39 | 40 | # Acurracy on test and train set 41 | from sklearn.metrics import confusion_matrix 42 | cm_train = confusion_matrix(y_train,y_train_pred) 43 | cm_test = confusion_matrix(y_test,y_test_pred) 44 | 45 | import os 46 | import sys 47 | 48 | scriptpath = "../../Tools" # functions of acc and CAP 49 | # Add the directory containing your module to the Python path 50 | sys.path.append(os.path.abspath(scriptpath)) 51 | import accuracy as ac 52 | 53 | t_train,f_train,acc_train = ac.accuracy_on_cm(cm_train) 54 | print('Train status = #{} True, #{} False, %{} Accuracy'.format(t_train,f_train,acc_train*100)) 55 | 56 | t_test,f_test,acc_test = ac.accuracy_on_cm(cm_test) 57 | print('Test status = #{} True, #{} False, %{} Accuracy'.format(t_test,f_test,acc_test*100)) 58 | 59 | 60 | # Visualising the Training set results 61 | from matplotlib.colors import ListedColormap 62 | X_set, y_set = x_train, y_train 63 | X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 1), 64 | np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 1)) 65 | plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), 66 | alpha = 0.75, cmap = ListedColormap(('red', 'green'))) 67 | plt.xlim(X1.min(), X1.max()) 68 | plt.ylim(X2.min(), X2.max()) 69 | for i, j in enumerate(np.unique(y_set)): 70 | plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], 71 | c = ListedColormap(('red', 'green'))(i), label = j) 72 | plt.title('Decision Tree Classifier - Entropy (Training set)') 73 | plt.xlabel('Age') 74 | plt.ylabel('Estimated Salary') 75 | plt.legend() 76 | plt.show() 77 | 78 | # Visualising the Test set results 79 | from matplotlib.colors import ListedColormap 80 | X_set, y_set = x_test, y_test 81 | X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 1), 82 | np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 1)) 83 | plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), 84 | alpha = 0.75, cmap = ListedColormap(('red', 'green'))) 85 | plt.xlim(X1.min(), X1.max()) 86 | plt.ylim(X2.min(), X2.max()) 87 | for i, j in enumerate(np.unique(y_set)): 88 | plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], 89 | c = ListedColormap(('red', 'green'))(i), label = j) 90 | plt.title('Decision Tree Classifier - Entropy (Test set)') 91 | plt.xlabel('Age') 92 | plt.ylabel('Estimated Salary') 93 | plt.legend() 94 | plt.show() -------------------------------------------------------------------------------- /Part 3 - Classification/Section 13 - Random Forest Classification/Social_Network_Ads.csv: -------------------------------------------------------------------------------- 1 | User ID,Gender,Age,EstimatedSalary,Purchased 2 | 15624510,Male,19,19000,0 3 | 15810944,Male,35,20000,0 4 | 15668575,Female,26,43000,0 5 | 15603246,Female,27,57000,0 6 | 15804002,Male,19,76000,0 7 | 15728773,Male,27,58000,0 8 | 15598044,Female,27,84000,0 9 | 15694829,Female,32,150000,1 10 | 15600575,Male,25,33000,0 11 | 15727311,Female,35,65000,0 12 | 15570769,Female,26,80000,0 13 | 15606274,Female,26,52000,0 14 | 15746139,Male,20,86000,0 15 | 15704987,Male,32,18000,0 16 | 15628972,Male,18,82000,0 17 | 15697686,Male,29,80000,0 18 | 15733883,Male,47,25000,1 19 | 15617482,Male,45,26000,1 20 | 15704583,Male,46,28000,1 21 | 15621083,Female,48,29000,1 22 | 15649487,Male,45,22000,1 23 | 15736760,Female,47,49000,1 24 | 15714658,Male,48,41000,1 25 | 15599081,Female,45,22000,1 26 | 15705113,Male,46,23000,1 27 | 15631159,Male,47,20000,1 28 | 15792818,Male,49,28000,1 29 | 15633531,Female,47,30000,1 30 | 15744529,Male,29,43000,0 31 | 15669656,Male,31,18000,0 32 | 15581198,Male,31,74000,0 33 | 15729054,Female,27,137000,1 34 | 15573452,Female,21,16000,0 35 | 15776733,Female,28,44000,0 36 | 15724858,Male,27,90000,0 37 | 15713144,Male,35,27000,0 38 | 15690188,Female,33,28000,0 39 | 15689425,Male,30,49000,0 40 | 15671766,Female,26,72000,0 41 | 15782806,Female,27,31000,0 42 | 15764419,Female,27,17000,0 43 | 15591915,Female,33,51000,0 44 | 15772798,Male,35,108000,0 45 | 15792008,Male,30,15000,0 46 | 15715541,Female,28,84000,0 47 | 15639277,Male,23,20000,0 48 | 15798850,Male,25,79000,0 49 | 15776348,Female,27,54000,0 50 | 15727696,Male,30,135000,1 51 | 15793813,Female,31,89000,0 52 | 15694395,Female,24,32000,0 53 | 15764195,Female,18,44000,0 54 | 15744919,Female,29,83000,0 55 | 15671655,Female,35,23000,0 56 | 15654901,Female,27,58000,0 57 | 15649136,Female,24,55000,0 58 | 15775562,Female,23,48000,0 59 | 15807481,Male,28,79000,0 60 | 15642885,Male,22,18000,0 61 | 15789109,Female,32,117000,0 62 | 15814004,Male,27,20000,0 63 | 15673619,Male,25,87000,0 64 | 15595135,Female,23,66000,0 65 | 15583681,Male,32,120000,1 66 | 15605000,Female,59,83000,0 67 | 15718071,Male,24,58000,0 68 | 15679760,Male,24,19000,0 69 | 15654574,Female,23,82000,0 70 | 15577178,Female,22,63000,0 71 | 15595324,Female,31,68000,0 72 | 15756932,Male,25,80000,0 73 | 15726358,Female,24,27000,0 74 | 15595228,Female,20,23000,0 75 | 15782530,Female,33,113000,0 76 | 15592877,Male,32,18000,0 77 | 15651983,Male,34,112000,1 78 | 15746737,Male,18,52000,0 79 | 15774179,Female,22,27000,0 80 | 15667265,Female,28,87000,0 81 | 15655123,Female,26,17000,0 82 | 15595917,Male,30,80000,0 83 | 15668385,Male,39,42000,0 84 | 15709476,Male,20,49000,0 85 | 15711218,Male,35,88000,0 86 | 15798659,Female,30,62000,0 87 | 15663939,Female,31,118000,1 88 | 15694946,Male,24,55000,0 89 | 15631912,Female,28,85000,0 90 | 15768816,Male,26,81000,0 91 | 15682268,Male,35,50000,0 92 | 15684801,Male,22,81000,0 93 | 15636428,Female,30,116000,0 94 | 15809823,Male,26,15000,0 95 | 15699284,Female,29,28000,0 96 | 15786993,Female,29,83000,0 97 | 15709441,Female,35,44000,0 98 | 15710257,Female,35,25000,0 99 | 15582492,Male,28,123000,1 100 | 15575694,Male,35,73000,0 101 | 15756820,Female,28,37000,0 102 | 15766289,Male,27,88000,0 103 | 15593014,Male,28,59000,0 104 | 15584545,Female,32,86000,0 105 | 15675949,Female,33,149000,1 106 | 15672091,Female,19,21000,0 107 | 15801658,Male,21,72000,0 108 | 15706185,Female,26,35000,0 109 | 15789863,Male,27,89000,0 110 | 15720943,Male,26,86000,0 111 | 15697997,Female,38,80000,0 112 | 15665416,Female,39,71000,0 113 | 15660200,Female,37,71000,0 114 | 15619653,Male,38,61000,0 115 | 15773447,Male,37,55000,0 116 | 15739160,Male,42,80000,0 117 | 15689237,Male,40,57000,0 118 | 15679297,Male,35,75000,0 119 | 15591433,Male,36,52000,0 120 | 15642725,Male,40,59000,0 121 | 15701962,Male,41,59000,0 122 | 15811613,Female,36,75000,0 123 | 15741049,Male,37,72000,0 124 | 15724423,Female,40,75000,0 125 | 15574305,Male,35,53000,0 126 | 15678168,Female,41,51000,0 127 | 15697020,Female,39,61000,0 128 | 15610801,Male,42,65000,0 129 | 15745232,Male,26,32000,0 130 | 15722758,Male,30,17000,0 131 | 15792102,Female,26,84000,0 132 | 15675185,Male,31,58000,0 133 | 15801247,Male,33,31000,0 134 | 15725660,Male,30,87000,0 135 | 15638963,Female,21,68000,0 136 | 15800061,Female,28,55000,0 137 | 15578006,Male,23,63000,0 138 | 15668504,Female,20,82000,0 139 | 15687491,Male,30,107000,1 140 | 15610403,Female,28,59000,0 141 | 15741094,Male,19,25000,0 142 | 15807909,Male,19,85000,0 143 | 15666141,Female,18,68000,0 144 | 15617134,Male,35,59000,0 145 | 15783029,Male,30,89000,0 146 | 15622833,Female,34,25000,0 147 | 15746422,Female,24,89000,0 148 | 15750839,Female,27,96000,1 149 | 15749130,Female,41,30000,0 150 | 15779862,Male,29,61000,0 151 | 15767871,Male,20,74000,0 152 | 15679651,Female,26,15000,0 153 | 15576219,Male,41,45000,0 154 | 15699247,Male,31,76000,0 155 | 15619087,Female,36,50000,0 156 | 15605327,Male,40,47000,0 157 | 15610140,Female,31,15000,0 158 | 15791174,Male,46,59000,0 159 | 15602373,Male,29,75000,0 160 | 15762605,Male,26,30000,0 161 | 15598840,Female,32,135000,1 162 | 15744279,Male,32,100000,1 163 | 15670619,Male,25,90000,0 164 | 15599533,Female,37,33000,0 165 | 15757837,Male,35,38000,0 166 | 15697574,Female,33,69000,0 167 | 15578738,Female,18,86000,0 168 | 15762228,Female,22,55000,0 169 | 15614827,Female,35,71000,0 170 | 15789815,Male,29,148000,1 171 | 15579781,Female,29,47000,0 172 | 15587013,Male,21,88000,0 173 | 15570932,Male,34,115000,0 174 | 15794661,Female,26,118000,0 175 | 15581654,Female,34,43000,0 176 | 15644296,Female,34,72000,0 177 | 15614420,Female,23,28000,0 178 | 15609653,Female,35,47000,0 179 | 15594577,Male,25,22000,0 180 | 15584114,Male,24,23000,0 181 | 15673367,Female,31,34000,0 182 | 15685576,Male,26,16000,0 183 | 15774727,Female,31,71000,0 184 | 15694288,Female,32,117000,1 185 | 15603319,Male,33,43000,0 186 | 15759066,Female,33,60000,0 187 | 15814816,Male,31,66000,0 188 | 15724402,Female,20,82000,0 189 | 15571059,Female,33,41000,0 190 | 15674206,Male,35,72000,0 191 | 15715160,Male,28,32000,0 192 | 15730448,Male,24,84000,0 193 | 15662067,Female,19,26000,0 194 | 15779581,Male,29,43000,0 195 | 15662901,Male,19,70000,0 196 | 15689751,Male,28,89000,0 197 | 15667742,Male,34,43000,0 198 | 15738448,Female,30,79000,0 199 | 15680243,Female,20,36000,0 200 | 15745083,Male,26,80000,0 201 | 15708228,Male,35,22000,0 202 | 15628523,Male,35,39000,0 203 | 15708196,Male,49,74000,0 204 | 15735549,Female,39,134000,1 205 | 15809347,Female,41,71000,0 206 | 15660866,Female,58,101000,1 207 | 15766609,Female,47,47000,0 208 | 15654230,Female,55,130000,1 209 | 15794566,Female,52,114000,0 210 | 15800890,Female,40,142000,1 211 | 15697424,Female,46,22000,0 212 | 15724536,Female,48,96000,1 213 | 15735878,Male,52,150000,1 214 | 15707596,Female,59,42000,0 215 | 15657163,Male,35,58000,0 216 | 15622478,Male,47,43000,0 217 | 15779529,Female,60,108000,1 218 | 15636023,Male,49,65000,0 219 | 15582066,Male,40,78000,0 220 | 15666675,Female,46,96000,0 221 | 15732987,Male,59,143000,1 222 | 15789432,Female,41,80000,0 223 | 15663161,Male,35,91000,1 224 | 15694879,Male,37,144000,1 225 | 15593715,Male,60,102000,1 226 | 15575002,Female,35,60000,0 227 | 15622171,Male,37,53000,0 228 | 15795224,Female,36,126000,1 229 | 15685346,Male,56,133000,1 230 | 15691808,Female,40,72000,0 231 | 15721007,Female,42,80000,1 232 | 15794253,Female,35,147000,1 233 | 15694453,Male,39,42000,0 234 | 15813113,Male,40,107000,1 235 | 15614187,Male,49,86000,1 236 | 15619407,Female,38,112000,0 237 | 15646227,Male,46,79000,1 238 | 15660541,Male,40,57000,0 239 | 15753874,Female,37,80000,0 240 | 15617877,Female,46,82000,0 241 | 15772073,Female,53,143000,1 242 | 15701537,Male,42,149000,1 243 | 15736228,Male,38,59000,0 244 | 15780572,Female,50,88000,1 245 | 15769596,Female,56,104000,1 246 | 15586996,Female,41,72000,0 247 | 15722061,Female,51,146000,1 248 | 15638003,Female,35,50000,0 249 | 15775590,Female,57,122000,1 250 | 15730688,Male,41,52000,0 251 | 15753102,Female,35,97000,1 252 | 15810075,Female,44,39000,0 253 | 15723373,Male,37,52000,0 254 | 15795298,Female,48,134000,1 255 | 15584320,Female,37,146000,1 256 | 15724161,Female,50,44000,0 257 | 15750056,Female,52,90000,1 258 | 15609637,Female,41,72000,0 259 | 15794493,Male,40,57000,0 260 | 15569641,Female,58,95000,1 261 | 15815236,Female,45,131000,1 262 | 15811177,Female,35,77000,0 263 | 15680587,Male,36,144000,1 264 | 15672821,Female,55,125000,1 265 | 15767681,Female,35,72000,0 266 | 15600379,Male,48,90000,1 267 | 15801336,Female,42,108000,1 268 | 15721592,Male,40,75000,0 269 | 15581282,Male,37,74000,0 270 | 15746203,Female,47,144000,1 271 | 15583137,Male,40,61000,0 272 | 15680752,Female,43,133000,0 273 | 15688172,Female,59,76000,1 274 | 15791373,Male,60,42000,1 275 | 15589449,Male,39,106000,1 276 | 15692819,Female,57,26000,1 277 | 15727467,Male,57,74000,1 278 | 15734312,Male,38,71000,0 279 | 15764604,Male,49,88000,1 280 | 15613014,Female,52,38000,1 281 | 15759684,Female,50,36000,1 282 | 15609669,Female,59,88000,1 283 | 15685536,Male,35,61000,0 284 | 15750447,Male,37,70000,1 285 | 15663249,Female,52,21000,1 286 | 15638646,Male,48,141000,0 287 | 15734161,Female,37,93000,1 288 | 15631070,Female,37,62000,0 289 | 15761950,Female,48,138000,1 290 | 15649668,Male,41,79000,0 291 | 15713912,Female,37,78000,1 292 | 15586757,Male,39,134000,1 293 | 15596522,Male,49,89000,1 294 | 15625395,Male,55,39000,1 295 | 15760570,Male,37,77000,0 296 | 15566689,Female,35,57000,0 297 | 15725794,Female,36,63000,0 298 | 15673539,Male,42,73000,1 299 | 15705298,Female,43,112000,1 300 | 15675791,Male,45,79000,0 301 | 15747043,Male,46,117000,1 302 | 15736397,Female,58,38000,1 303 | 15678201,Male,48,74000,1 304 | 15720745,Female,37,137000,1 305 | 15637593,Male,37,79000,1 306 | 15598070,Female,40,60000,0 307 | 15787550,Male,42,54000,0 308 | 15603942,Female,51,134000,0 309 | 15733973,Female,47,113000,1 310 | 15596761,Male,36,125000,1 311 | 15652400,Female,38,50000,0 312 | 15717893,Female,42,70000,0 313 | 15622585,Male,39,96000,1 314 | 15733964,Female,38,50000,0 315 | 15753861,Female,49,141000,1 316 | 15747097,Female,39,79000,0 317 | 15594762,Female,39,75000,1 318 | 15667417,Female,54,104000,1 319 | 15684861,Male,35,55000,0 320 | 15742204,Male,45,32000,1 321 | 15623502,Male,36,60000,0 322 | 15774872,Female,52,138000,1 323 | 15611191,Female,53,82000,1 324 | 15674331,Male,41,52000,0 325 | 15619465,Female,48,30000,1 326 | 15575247,Female,48,131000,1 327 | 15695679,Female,41,60000,0 328 | 15713463,Male,41,72000,0 329 | 15785170,Female,42,75000,0 330 | 15796351,Male,36,118000,1 331 | 15639576,Female,47,107000,1 332 | 15693264,Male,38,51000,0 333 | 15589715,Female,48,119000,1 334 | 15769902,Male,42,65000,0 335 | 15587177,Male,40,65000,0 336 | 15814553,Male,57,60000,1 337 | 15601550,Female,36,54000,0 338 | 15664907,Male,58,144000,1 339 | 15612465,Male,35,79000,0 340 | 15810800,Female,38,55000,0 341 | 15665760,Male,39,122000,1 342 | 15588080,Female,53,104000,1 343 | 15776844,Male,35,75000,0 344 | 15717560,Female,38,65000,0 345 | 15629739,Female,47,51000,1 346 | 15729908,Male,47,105000,1 347 | 15716781,Female,41,63000,0 348 | 15646936,Male,53,72000,1 349 | 15768151,Female,54,108000,1 350 | 15579212,Male,39,77000,0 351 | 15721835,Male,38,61000,0 352 | 15800515,Female,38,113000,1 353 | 15591279,Male,37,75000,0 354 | 15587419,Female,42,90000,1 355 | 15750335,Female,37,57000,0 356 | 15699619,Male,36,99000,1 357 | 15606472,Male,60,34000,1 358 | 15778368,Male,54,70000,1 359 | 15671387,Female,41,72000,0 360 | 15573926,Male,40,71000,1 361 | 15709183,Male,42,54000,0 362 | 15577514,Male,43,129000,1 363 | 15778830,Female,53,34000,1 364 | 15768072,Female,47,50000,1 365 | 15768293,Female,42,79000,0 366 | 15654456,Male,42,104000,1 367 | 15807525,Female,59,29000,1 368 | 15574372,Female,58,47000,1 369 | 15671249,Male,46,88000,1 370 | 15779744,Male,38,71000,0 371 | 15624755,Female,54,26000,1 372 | 15611430,Female,60,46000,1 373 | 15774744,Male,60,83000,1 374 | 15629885,Female,39,73000,0 375 | 15708791,Male,59,130000,1 376 | 15793890,Female,37,80000,0 377 | 15646091,Female,46,32000,1 378 | 15596984,Female,46,74000,0 379 | 15800215,Female,42,53000,0 380 | 15577806,Male,41,87000,1 381 | 15749381,Female,58,23000,1 382 | 15683758,Male,42,64000,0 383 | 15670615,Male,48,33000,1 384 | 15715622,Female,44,139000,1 385 | 15707634,Male,49,28000,1 386 | 15806901,Female,57,33000,1 387 | 15775335,Male,56,60000,1 388 | 15724150,Female,49,39000,1 389 | 15627220,Male,39,71000,0 390 | 15672330,Male,47,34000,1 391 | 15668521,Female,48,35000,1 392 | 15807837,Male,48,33000,1 393 | 15592570,Male,47,23000,1 394 | 15748589,Female,45,45000,1 395 | 15635893,Male,60,42000,1 396 | 15757632,Female,39,59000,0 397 | 15691863,Female,46,41000,1 398 | 15706071,Male,51,23000,1 399 | 15654296,Female,50,20000,1 400 | 15755018,Male,36,33000,0 401 | 15594041,Female,49,36000,1 -------------------------------------------------------------------------------- /Part 3 - Classification/Section 13 - Random Forest Classification/rfc.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sat Sep 8 21:20:36 2018 4 | 5 | @author: Mohammad Doosti Lakhani 6 | """ 7 | 8 | # Imporing libraries 9 | import numpy as np 10 | import matplotlib.pyplot as plt 11 | import pandas as pd 12 | import os 13 | import sys 14 | 15 | scriptpath = "../../Tools" # functions of acc and CAP 16 | # Add the directory containing your module to the Python path 17 | sys.path.append(os.path.abspath(scriptpath)) 18 | import accuracy as ac 19 | 20 | # Importing dataset 21 | dataset = pd.read_csv('Social_Network_Ads.csv') 22 | x = dataset.iloc[:,2:4].values 23 | y = dataset.iloc[:,4].values 24 | 25 | 26 | # Splitting dataset into Train set and Test set 27 | from sklearn.model_selection import train_test_split 28 | x_train,x_test,y_train,y_test = train_test_split(x,y, train_size = 0.75 , random_state=0) 29 | 30 | # Fitting the Naive Bayes (Gausiian form) model to the train set 31 | from sklearn.ensemble import RandomForestClassifier 32 | classifier = RandomForestClassifier(n_estimators=7,criterion='gini',random_state=0) 33 | classifier = classifier.fit(x_train,y_train) 34 | 35 | """ Try to uncomment below code to try different criterion algorithms 36 | from sklearn.ensemble import RandomForestClassifier 37 | classifier = RandomForestClassifier(n_estimators=7,criterion='entropy',random_state=0) 38 | classifier = classifier.fit(x_train,y_train) 39 | 40 | """ 41 | 42 | # Make the prediction on train set 43 | y_train_pred = classifier.predict(x_train) 44 | 45 | # Make the prediction on train set 46 | y_test_pred = classifier.predict(x_test) 47 | 48 | # Acurracy on test and train set 49 | from sklearn.metrics import confusion_matrix 50 | cm_train = confusion_matrix(y_train,y_train_pred) 51 | cm_test = confusion_matrix(y_test,y_test_pred) 52 | 53 | 54 | t_train,f_train,acc_train = ac.accuracy_on_cm(cm_train) 55 | print('Train status = #{} True, #{} False, %{} Accuracy'.format(t_train,f_train,acc_train*100)) 56 | 57 | t_test,f_test,acc_test = ac.accuracy_on_cm(cm_test) 58 | print('Test status = #{} True, #{} False, %{} Accuracy'.format(t_test,f_test,acc_test*100)) 59 | 60 | 61 | ac.capcurve(y_train,classifier.predict_proba(x_train),"Train") 62 | ac.capcurve(y_test,classifier.predict_proba(x_test),"Test") 63 | 64 | # Visualising the Training set results 65 | from matplotlib.colors import ListedColormap 66 | X_set, y_set = x_train, y_train 67 | X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 1), 68 | np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 1)) 69 | plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), 70 | alpha = 0.75, cmap = ListedColormap(('red', 'green'))) 71 | plt.xlim(X1.min(), X1.max()) 72 | plt.ylim(X2.min(), X2.max()) 73 | for i, j in enumerate(np.unique(y_set)): 74 | plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], 75 | c = ListedColormap(('red', 'green'))(i), label = j) 76 | plt.title('Decision Tree Classifier - Entropy (Training set)') 77 | plt.xlabel('Age') 78 | plt.ylabel('Estimated Salary') 79 | plt.legend() 80 | plt.show() 81 | 82 | # Visualising the Test set results 83 | from matplotlib.colors import ListedColormap 84 | X_set, y_set = x_test, y_test 85 | X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 1), 86 | np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 1)) 87 | plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), 88 | alpha = 0.75, cmap = ListedColormap(('red', 'green'))) 89 | plt.xlim(X1.min(), X1.max()) 90 | plt.ylim(X2.min(), X2.max()) 91 | for i, j in enumerate(np.unique(y_set)): 92 | plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], 93 | c = ListedColormap(('red', 'green'))(i), label = j) 94 | plt.title('Decision Tree Classifier - Entropy (Test set)') 95 | plt.xlabel('Age') 96 | plt.ylabel('Estimated Salary') 97 | plt.legend() 98 | 99 | 100 | 101 | -------------------------------------------------------------------------------- /Part 3 - Classification/Section 8 - Logistic Regression/Social_Network_Ads.csv: -------------------------------------------------------------------------------- 1 | User ID,Gender,Age,EstimatedSalary,Purchased 2 | 15624510,Male,19,19000,0 3 | 15810944,Male,35,20000,0 4 | 15668575,Female,26,43000,0 5 | 15603246,Female,27,57000,0 6 | 15804002,Male,19,76000,0 7 | 15728773,Male,27,58000,0 8 | 15598044,Female,27,84000,0 9 | 15694829,Female,32,150000,1 10 | 15600575,Male,25,33000,0 11 | 15727311,Female,35,65000,0 12 | 15570769,Female,26,80000,0 13 | 15606274,Female,26,52000,0 14 | 15746139,Male,20,86000,0 15 | 15704987,Male,32,18000,0 16 | 15628972,Male,18,82000,0 17 | 15697686,Male,29,80000,0 18 | 15733883,Male,47,25000,1 19 | 15617482,Male,45,26000,1 20 | 15704583,Male,46,28000,1 21 | 15621083,Female,48,29000,1 22 | 15649487,Male,45,22000,1 23 | 15736760,Female,47,49000,1 24 | 15714658,Male,48,41000,1 25 | 15599081,Female,45,22000,1 26 | 15705113,Male,46,23000,1 27 | 15631159,Male,47,20000,1 28 | 15792818,Male,49,28000,1 29 | 15633531,Female,47,30000,1 30 | 15744529,Male,29,43000,0 31 | 15669656,Male,31,18000,0 32 | 15581198,Male,31,74000,0 33 | 15729054,Female,27,137000,1 34 | 15573452,Female,21,16000,0 35 | 15776733,Female,28,44000,0 36 | 15724858,Male,27,90000,0 37 | 15713144,Male,35,27000,0 38 | 15690188,Female,33,28000,0 39 | 15689425,Male,30,49000,0 40 | 15671766,Female,26,72000,0 41 | 15782806,Female,27,31000,0 42 | 15764419,Female,27,17000,0 43 | 15591915,Female,33,51000,0 44 | 15772798,Male,35,108000,0 45 | 15792008,Male,30,15000,0 46 | 15715541,Female,28,84000,0 47 | 15639277,Male,23,20000,0 48 | 15798850,Male,25,79000,0 49 | 15776348,Female,27,54000,0 50 | 15727696,Male,30,135000,1 51 | 15793813,Female,31,89000,0 52 | 15694395,Female,24,32000,0 53 | 15764195,Female,18,44000,0 54 | 15744919,Female,29,83000,0 55 | 15671655,Female,35,23000,0 56 | 15654901,Female,27,58000,0 57 | 15649136,Female,24,55000,0 58 | 15775562,Female,23,48000,0 59 | 15807481,Male,28,79000,0 60 | 15642885,Male,22,18000,0 61 | 15789109,Female,32,117000,0 62 | 15814004,Male,27,20000,0 63 | 15673619,Male,25,87000,0 64 | 15595135,Female,23,66000,0 65 | 15583681,Male,32,120000,1 66 | 15605000,Female,59,83000,0 67 | 15718071,Male,24,58000,0 68 | 15679760,Male,24,19000,0 69 | 15654574,Female,23,82000,0 70 | 15577178,Female,22,63000,0 71 | 15595324,Female,31,68000,0 72 | 15756932,Male,25,80000,0 73 | 15726358,Female,24,27000,0 74 | 15595228,Female,20,23000,0 75 | 15782530,Female,33,113000,0 76 | 15592877,Male,32,18000,0 77 | 15651983,Male,34,112000,1 78 | 15746737,Male,18,52000,0 79 | 15774179,Female,22,27000,0 80 | 15667265,Female,28,87000,0 81 | 15655123,Female,26,17000,0 82 | 15595917,Male,30,80000,0 83 | 15668385,Male,39,42000,0 84 | 15709476,Male,20,49000,0 85 | 15711218,Male,35,88000,0 86 | 15798659,Female,30,62000,0 87 | 15663939,Female,31,118000,1 88 | 15694946,Male,24,55000,0 89 | 15631912,Female,28,85000,0 90 | 15768816,Male,26,81000,0 91 | 15682268,Male,35,50000,0 92 | 15684801,Male,22,81000,0 93 | 15636428,Female,30,116000,0 94 | 15809823,Male,26,15000,0 95 | 15699284,Female,29,28000,0 96 | 15786993,Female,29,83000,0 97 | 15709441,Female,35,44000,0 98 | 15710257,Female,35,25000,0 99 | 15582492,Male,28,123000,1 100 | 15575694,Male,35,73000,0 101 | 15756820,Female,28,37000,0 102 | 15766289,Male,27,88000,0 103 | 15593014,Male,28,59000,0 104 | 15584545,Female,32,86000,0 105 | 15675949,Female,33,149000,1 106 | 15672091,Female,19,21000,0 107 | 15801658,Male,21,72000,0 108 | 15706185,Female,26,35000,0 109 | 15789863,Male,27,89000,0 110 | 15720943,Male,26,86000,0 111 | 15697997,Female,38,80000,0 112 | 15665416,Female,39,71000,0 113 | 15660200,Female,37,71000,0 114 | 15619653,Male,38,61000,0 115 | 15773447,Male,37,55000,0 116 | 15739160,Male,42,80000,0 117 | 15689237,Male,40,57000,0 118 | 15679297,Male,35,75000,0 119 | 15591433,Male,36,52000,0 120 | 15642725,Male,40,59000,0 121 | 15701962,Male,41,59000,0 122 | 15811613,Female,36,75000,0 123 | 15741049,Male,37,72000,0 124 | 15724423,Female,40,75000,0 125 | 15574305,Male,35,53000,0 126 | 15678168,Female,41,51000,0 127 | 15697020,Female,39,61000,0 128 | 15610801,Male,42,65000,0 129 | 15745232,Male,26,32000,0 130 | 15722758,Male,30,17000,0 131 | 15792102,Female,26,84000,0 132 | 15675185,Male,31,58000,0 133 | 15801247,Male,33,31000,0 134 | 15725660,Male,30,87000,0 135 | 15638963,Female,21,68000,0 136 | 15800061,Female,28,55000,0 137 | 15578006,Male,23,63000,0 138 | 15668504,Female,20,82000,0 139 | 15687491,Male,30,107000,1 140 | 15610403,Female,28,59000,0 141 | 15741094,Male,19,25000,0 142 | 15807909,Male,19,85000,0 143 | 15666141,Female,18,68000,0 144 | 15617134,Male,35,59000,0 145 | 15783029,Male,30,89000,0 146 | 15622833,Female,34,25000,0 147 | 15746422,Female,24,89000,0 148 | 15750839,Female,27,96000,1 149 | 15749130,Female,41,30000,0 150 | 15779862,Male,29,61000,0 151 | 15767871,Male,20,74000,0 152 | 15679651,Female,26,15000,0 153 | 15576219,Male,41,45000,0 154 | 15699247,Male,31,76000,0 155 | 15619087,Female,36,50000,0 156 | 15605327,Male,40,47000,0 157 | 15610140,Female,31,15000,0 158 | 15791174,Male,46,59000,0 159 | 15602373,Male,29,75000,0 160 | 15762605,Male,26,30000,0 161 | 15598840,Female,32,135000,1 162 | 15744279,Male,32,100000,1 163 | 15670619,Male,25,90000,0 164 | 15599533,Female,37,33000,0 165 | 15757837,Male,35,38000,0 166 | 15697574,Female,33,69000,0 167 | 15578738,Female,18,86000,0 168 | 15762228,Female,22,55000,0 169 | 15614827,Female,35,71000,0 170 | 15789815,Male,29,148000,1 171 | 15579781,Female,29,47000,0 172 | 15587013,Male,21,88000,0 173 | 15570932,Male,34,115000,0 174 | 15794661,Female,26,118000,0 175 | 15581654,Female,34,43000,0 176 | 15644296,Female,34,72000,0 177 | 15614420,Female,23,28000,0 178 | 15609653,Female,35,47000,0 179 | 15594577,Male,25,22000,0 180 | 15584114,Male,24,23000,0 181 | 15673367,Female,31,34000,0 182 | 15685576,Male,26,16000,0 183 | 15774727,Female,31,71000,0 184 | 15694288,Female,32,117000,1 185 | 15603319,Male,33,43000,0 186 | 15759066,Female,33,60000,0 187 | 15814816,Male,31,66000,0 188 | 15724402,Female,20,82000,0 189 | 15571059,Female,33,41000,0 190 | 15674206,Male,35,72000,0 191 | 15715160,Male,28,32000,0 192 | 15730448,Male,24,84000,0 193 | 15662067,Female,19,26000,0 194 | 15779581,Male,29,43000,0 195 | 15662901,Male,19,70000,0 196 | 15689751,Male,28,89000,0 197 | 15667742,Male,34,43000,0 198 | 15738448,Female,30,79000,0 199 | 15680243,Female,20,36000,0 200 | 15745083,Male,26,80000,0 201 | 15708228,Male,35,22000,0 202 | 15628523,Male,35,39000,0 203 | 15708196,Male,49,74000,0 204 | 15735549,Female,39,134000,1 205 | 15809347,Female,41,71000,0 206 | 15660866,Female,58,101000,1 207 | 15766609,Female,47,47000,0 208 | 15654230,Female,55,130000,1 209 | 15794566,Female,52,114000,0 210 | 15800890,Female,40,142000,1 211 | 15697424,Female,46,22000,0 212 | 15724536,Female,48,96000,1 213 | 15735878,Male,52,150000,1 214 | 15707596,Female,59,42000,0 215 | 15657163,Male,35,58000,0 216 | 15622478,Male,47,43000,0 217 | 15779529,Female,60,108000,1 218 | 15636023,Male,49,65000,0 219 | 15582066,Male,40,78000,0 220 | 15666675,Female,46,96000,0 221 | 15732987,Male,59,143000,1 222 | 15789432,Female,41,80000,0 223 | 15663161,Male,35,91000,1 224 | 15694879,Male,37,144000,1 225 | 15593715,Male,60,102000,1 226 | 15575002,Female,35,60000,0 227 | 15622171,Male,37,53000,0 228 | 15795224,Female,36,126000,1 229 | 15685346,Male,56,133000,1 230 | 15691808,Female,40,72000,0 231 | 15721007,Female,42,80000,1 232 | 15794253,Female,35,147000,1 233 | 15694453,Male,39,42000,0 234 | 15813113,Male,40,107000,1 235 | 15614187,Male,49,86000,1 236 | 15619407,Female,38,112000,0 237 | 15646227,Male,46,79000,1 238 | 15660541,Male,40,57000,0 239 | 15753874,Female,37,80000,0 240 | 15617877,Female,46,82000,0 241 | 15772073,Female,53,143000,1 242 | 15701537,Male,42,149000,1 243 | 15736228,Male,38,59000,0 244 | 15780572,Female,50,88000,1 245 | 15769596,Female,56,104000,1 246 | 15586996,Female,41,72000,0 247 | 15722061,Female,51,146000,1 248 | 15638003,Female,35,50000,0 249 | 15775590,Female,57,122000,1 250 | 15730688,Male,41,52000,0 251 | 15753102,Female,35,97000,1 252 | 15810075,Female,44,39000,0 253 | 15723373,Male,37,52000,0 254 | 15795298,Female,48,134000,1 255 | 15584320,Female,37,146000,1 256 | 15724161,Female,50,44000,0 257 | 15750056,Female,52,90000,1 258 | 15609637,Female,41,72000,0 259 | 15794493,Male,40,57000,0 260 | 15569641,Female,58,95000,1 261 | 15815236,Female,45,131000,1 262 | 15811177,Female,35,77000,0 263 | 15680587,Male,36,144000,1 264 | 15672821,Female,55,125000,1 265 | 15767681,Female,35,72000,0 266 | 15600379,Male,48,90000,1 267 | 15801336,Female,42,108000,1 268 | 15721592,Male,40,75000,0 269 | 15581282,Male,37,74000,0 270 | 15746203,Female,47,144000,1 271 | 15583137,Male,40,61000,0 272 | 15680752,Female,43,133000,0 273 | 15688172,Female,59,76000,1 274 | 15791373,Male,60,42000,1 275 | 15589449,Male,39,106000,1 276 | 15692819,Female,57,26000,1 277 | 15727467,Male,57,74000,1 278 | 15734312,Male,38,71000,0 279 | 15764604,Male,49,88000,1 280 | 15613014,Female,52,38000,1 281 | 15759684,Female,50,36000,1 282 | 15609669,Female,59,88000,1 283 | 15685536,Male,35,61000,0 284 | 15750447,Male,37,70000,1 285 | 15663249,Female,52,21000,1 286 | 15638646,Male,48,141000,0 287 | 15734161,Female,37,93000,1 288 | 15631070,Female,37,62000,0 289 | 15761950,Female,48,138000,1 290 | 15649668,Male,41,79000,0 291 | 15713912,Female,37,78000,1 292 | 15586757,Male,39,134000,1 293 | 15596522,Male,49,89000,1 294 | 15625395,Male,55,39000,1 295 | 15760570,Male,37,77000,0 296 | 15566689,Female,35,57000,0 297 | 15725794,Female,36,63000,0 298 | 15673539,Male,42,73000,1 299 | 15705298,Female,43,112000,1 300 | 15675791,Male,45,79000,0 301 | 15747043,Male,46,117000,1 302 | 15736397,Female,58,38000,1 303 | 15678201,Male,48,74000,1 304 | 15720745,Female,37,137000,1 305 | 15637593,Male,37,79000,1 306 | 15598070,Female,40,60000,0 307 | 15787550,Male,42,54000,0 308 | 15603942,Female,51,134000,0 309 | 15733973,Female,47,113000,1 310 | 15596761,Male,36,125000,1 311 | 15652400,Female,38,50000,0 312 | 15717893,Female,42,70000,0 313 | 15622585,Male,39,96000,1 314 | 15733964,Female,38,50000,0 315 | 15753861,Female,49,141000,1 316 | 15747097,Female,39,79000,0 317 | 15594762,Female,39,75000,1 318 | 15667417,Female,54,104000,1 319 | 15684861,Male,35,55000,0 320 | 15742204,Male,45,32000,1 321 | 15623502,Male,36,60000,0 322 | 15774872,Female,52,138000,1 323 | 15611191,Female,53,82000,1 324 | 15674331,Male,41,52000,0 325 | 15619465,Female,48,30000,1 326 | 15575247,Female,48,131000,1 327 | 15695679,Female,41,60000,0 328 | 15713463,Male,41,72000,0 329 | 15785170,Female,42,75000,0 330 | 15796351,Male,36,118000,1 331 | 15639576,Female,47,107000,1 332 | 15693264,Male,38,51000,0 333 | 15589715,Female,48,119000,1 334 | 15769902,Male,42,65000,0 335 | 15587177,Male,40,65000,0 336 | 15814553,Male,57,60000,1 337 | 15601550,Female,36,54000,0 338 | 15664907,Male,58,144000,1 339 | 15612465,Male,35,79000,0 340 | 15810800,Female,38,55000,0 341 | 15665760,Male,39,122000,1 342 | 15588080,Female,53,104000,1 343 | 15776844,Male,35,75000,0 344 | 15717560,Female,38,65000,0 345 | 15629739,Female,47,51000,1 346 | 15729908,Male,47,105000,1 347 | 15716781,Female,41,63000,0 348 | 15646936,Male,53,72000,1 349 | 15768151,Female,54,108000,1 350 | 15579212,Male,39,77000,0 351 | 15721835,Male,38,61000,0 352 | 15800515,Female,38,113000,1 353 | 15591279,Male,37,75000,0 354 | 15587419,Female,42,90000,1 355 | 15750335,Female,37,57000,0 356 | 15699619,Male,36,99000,1 357 | 15606472,Male,60,34000,1 358 | 15778368,Male,54,70000,1 359 | 15671387,Female,41,72000,0 360 | 15573926,Male,40,71000,1 361 | 15709183,Male,42,54000,0 362 | 15577514,Male,43,129000,1 363 | 15778830,Female,53,34000,1 364 | 15768072,Female,47,50000,1 365 | 15768293,Female,42,79000,0 366 | 15654456,Male,42,104000,1 367 | 15807525,Female,59,29000,1 368 | 15574372,Female,58,47000,1 369 | 15671249,Male,46,88000,1 370 | 15779744,Male,38,71000,0 371 | 15624755,Female,54,26000,1 372 | 15611430,Female,60,46000,1 373 | 15774744,Male,60,83000,1 374 | 15629885,Female,39,73000,0 375 | 15708791,Male,59,130000,1 376 | 15793890,Female,37,80000,0 377 | 15646091,Female,46,32000,1 378 | 15596984,Female,46,74000,0 379 | 15800215,Female,42,53000,0 380 | 15577806,Male,41,87000,1 381 | 15749381,Female,58,23000,1 382 | 15683758,Male,42,64000,0 383 | 15670615,Male,48,33000,1 384 | 15715622,Female,44,139000,1 385 | 15707634,Male,49,28000,1 386 | 15806901,Female,57,33000,1 387 | 15775335,Male,56,60000,1 388 | 15724150,Female,49,39000,1 389 | 15627220,Male,39,71000,0 390 | 15672330,Male,47,34000,1 391 | 15668521,Female,48,35000,1 392 | 15807837,Male,48,33000,1 393 | 15592570,Male,47,23000,1 394 | 15748589,Female,45,45000,1 395 | 15635893,Male,60,42000,1 396 | 15757632,Female,39,59000,0 397 | 15691863,Female,46,41000,1 398 | 15706071,Male,51,23000,1 399 | 15654296,Female,50,20000,1 400 | 15755018,Male,36,33000,0 401 | 15594041,Female,49,36000,1 -------------------------------------------------------------------------------- /Part 3 - Classification/Section 8 - Logistic Regression/lr.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Thu Aug 16 23:14:59 2018 4 | 5 | @author: Mohammad Doosti Lakhani 6 | """ 7 | 8 | """############################## based on assignment, we do not include Gender ########################""" 9 | 10 | 11 | # Imporing libraries 12 | import numpy as np 13 | import matplotlib.pyplot as plt 14 | import pandas as pd 15 | 16 | # Importing dataset 17 | dataset = pd.read_csv('Social_Network_Ads.csv') 18 | x = dataset.iloc[:,2:4].values 19 | y = dataset.iloc[:,4].values 20 | 21 | # Feature scaling 22 | from sklearn.preprocessing import StandardScaler 23 | standardscaler_x = StandardScaler() 24 | standardscaler_x = standardscaler_x.fit(x) 25 | x = standardscaler_x.transform(x) 26 | 27 | # Splitting dataset into Train set and Test set 28 | from sklearn.model_selection import train_test_split 29 | x_train,x_test,y_train,y_test = train_test_split(x,y, train_size = 0.75 , random_state=0) 30 | 31 | # Fitting Logistic Regression model to train data 32 | from sklearn.linear_model import LogisticRegression 33 | classifier = LogisticRegression(random_state=0, solver='liblinear') 34 | classifier = classifier.fit(x_train,y_train) 35 | 36 | # Make the prediction on train set 37 | y_train_pred = classifier.predict(x_train) 38 | 39 | # Make the prediction on train set 40 | y_test_pred = classifier.predict(x_test) 41 | 42 | # Acurracy on test and train set 43 | from sklearn.metrics import confusion_matrix 44 | cm_train = confusion_matrix(y_train,y_train_pred) 45 | cm_test = confusion_matrix(y_test,y_test_pred) 46 | 47 | import os 48 | import sys 49 | 50 | scriptpath = "../../Tools" # functions of acc and CAP 51 | # Add the directory containing your module to the Python path 52 | sys.path.append(os.path.abspath(scriptpath)) 53 | import accuracy as ac 54 | 55 | t_train,f_train,acc_train = ac.accuracy_on_cm(cm_train) 56 | print('Train status = #{} True, #{} False, %{} Accuracy'.format(t_train,f_train,acc_train*100)) 57 | 58 | t_test,f_test,acc_test = ac.accuracy_on_cm(cm_test) 59 | print('Test status = #{} True, #{} False, %{} Accuracy'.format(t_test,f_test,acc_test*100)) 60 | 61 | # Visualising the Training set results 62 | from matplotlib.colors import ListedColormap 63 | X_set, y_set = x_train, y_train 64 | X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01), 65 | np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01)) 66 | plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), 67 | alpha = 0.75, cmap = ListedColormap(('red', 'green'))) 68 | plt.xlim(X1.min(), X1.max()) 69 | plt.ylim(X2.min(), X2.max()) 70 | for i, j in enumerate(np.unique(y_set)): 71 | plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], 72 | c = ListedColormap(('red', 'green'))(i), label = j) 73 | plt.title('Logistic Regression (Training set)') 74 | plt.xlabel('Age') 75 | plt.ylabel('Estimated Salary') 76 | plt.legend() 77 | plt.show() 78 | 79 | # Visualising the Test set results 80 | from matplotlib.colors import ListedColormap 81 | X_set, y_set = x_test, y_test 82 | X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01), 83 | np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01)) 84 | plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), 85 | alpha = 0.75, cmap = ListedColormap(('red', 'green'))) 86 | plt.xlim(X1.min(), X1.max()) 87 | plt.ylim(X2.min(), X2.max()) 88 | for i, j in enumerate(np.unique(y_set)): 89 | plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], 90 | c = ListedColormap(('red', 'green'))(i), label = j) 91 | plt.title('Logistic Regression (Test set)') 92 | plt.xlabel('Age') 93 | plt.ylabel('Estimated Salary') 94 | plt.legend() 95 | plt.show() 96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /Part 3 - Classification/Section 8 - Logistic Regression/lr_with_gender.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Thu Aug 16 23:46:04 2018 4 | 5 | @author: Mohammad Doosti Lakhani 6 | """ 7 | 8 | """############################## in this code, we include Gender ########################""" 9 | 10 | # imporing libraries 11 | import numpy as np 12 | import matplotlib.pyplot as plt 13 | import pandas as pd 14 | 15 | # importing dataset 16 | dataset = pd.read_csv('Social_Network_Ads.csv') 17 | x = dataset.iloc[:,1:4].values 18 | y = dataset.iloc[:,-1].values 19 | 20 | # feature scaling 21 | from sklearn.preprocessing import StandardScaler 22 | standardscaler_x = StandardScaler() 23 | standardscaler_x = standardscaler_x.fit(x[:,1:3]) 24 | x[:,1:3] = standardscaler_x.transform(x[:,1:3]) 25 | 26 | # encoding categorial data (Gender) types to labelEncoder and onehotencoder 27 | from sklearn.preprocessing import LabelEncoder, OneHotEncoder 28 | labelencoder_x = LabelEncoder() 29 | labelencoder_x = labelencoder_x.fit(x[:,0]) 30 | x[:,0] = labelencoder_x.transform(x[:,0]) 31 | 32 | onehotencoder_x = OneHotEncoder(categorical_features=[0]) 33 | onehotencoder_x = onehotencoder_x.fit(x) 34 | x = onehotencoder_x.transform(x).toarray() 35 | 36 | # splitting dataset into Train set and Test set 37 | from sklearn.model_selection import train_test_split 38 | x_train,x_test,y_train,y_test = train_test_split(x,y, train_size = 0.75 , random_state=0) 39 | 40 | # Fitting Logistic Regression model to train data 41 | from sklearn.linear_model import LogisticRegression 42 | classifier = LogisticRegression(random_state=0, solver='liblinear') 43 | classifier = classifier.fit(x_train,y_train) 44 | 45 | # Make the prediction on train set 46 | y_train_pred = classifier.predict(x_train) 47 | 48 | # Make the prediction on train set 49 | y_test_pred = classifier.predict(x_test) 50 | 51 | 52 | # Acurracy on test and train set 53 | from sklearn.metrics import confusion_matrix 54 | cm_train = confusion_matrix(y_train,y_train_pred) 55 | cm_test = confusion_matrix(y_test,y_test_pred) 56 | 57 | import os 58 | import sys 59 | 60 | scriptpath = "../../Tools" # functions of acc and CAP 61 | # Add the directory containing your module to the Python path 62 | sys.path.append(os.path.abspath(scriptpath)) 63 | import accuracy as ac 64 | 65 | t_train,f_train,acc_train = ac.accuracy_on_cm(cm_train) 66 | print('Train status = #{} True, #{} False, %{} Accuracy'.format(t_train,f_train,acc_train*100)) 67 | 68 | t_test,f_test,acc_test = ac.accuracy_on_cm(cm_test) 69 | print('Test status = #{} True, #{} False, %{} Accuracy'.format(t_test,f_test,acc_test*100)) 70 | 71 | # For educational purpose, I didn't added PCA to reduce dimensions to plot. But if you 72 | # like to do this, see the Part 9 and use lr.py visualization code to plot. -------------------------------------------------------------------------------- /Part 3 - Classification/Section 9 - K-Nearest Neighbors (K-NN)/Social_Network_Ads.csv: -------------------------------------------------------------------------------- 1 | User ID,Gender,Age,EstimatedSalary,Purchased 2 | 15624510,Male,19,19000,0 3 | 15810944,Male,35,20000,0 4 | 15668575,Female,26,43000,0 5 | 15603246,Female,27,57000,0 6 | 15804002,Male,19,76000,0 7 | 15728773,Male,27,58000,0 8 | 15598044,Female,27,84000,0 9 | 15694829,Female,32,150000,1 10 | 15600575,Male,25,33000,0 11 | 15727311,Female,35,65000,0 12 | 15570769,Female,26,80000,0 13 | 15606274,Female,26,52000,0 14 | 15746139,Male,20,86000,0 15 | 15704987,Male,32,18000,0 16 | 15628972,Male,18,82000,0 17 | 15697686,Male,29,80000,0 18 | 15733883,Male,47,25000,1 19 | 15617482,Male,45,26000,1 20 | 15704583,Male,46,28000,1 21 | 15621083,Female,48,29000,1 22 | 15649487,Male,45,22000,1 23 | 15736760,Female,47,49000,1 24 | 15714658,Male,48,41000,1 25 | 15599081,Female,45,22000,1 26 | 15705113,Male,46,23000,1 27 | 15631159,Male,47,20000,1 28 | 15792818,Male,49,28000,1 29 | 15633531,Female,47,30000,1 30 | 15744529,Male,29,43000,0 31 | 15669656,Male,31,18000,0 32 | 15581198,Male,31,74000,0 33 | 15729054,Female,27,137000,1 34 | 15573452,Female,21,16000,0 35 | 15776733,Female,28,44000,0 36 | 15724858,Male,27,90000,0 37 | 15713144,Male,35,27000,0 38 | 15690188,Female,33,28000,0 39 | 15689425,Male,30,49000,0 40 | 15671766,Female,26,72000,0 41 | 15782806,Female,27,31000,0 42 | 15764419,Female,27,17000,0 43 | 15591915,Female,33,51000,0 44 | 15772798,Male,35,108000,0 45 | 15792008,Male,30,15000,0 46 | 15715541,Female,28,84000,0 47 | 15639277,Male,23,20000,0 48 | 15798850,Male,25,79000,0 49 | 15776348,Female,27,54000,0 50 | 15727696,Male,30,135000,1 51 | 15793813,Female,31,89000,0 52 | 15694395,Female,24,32000,0 53 | 15764195,Female,18,44000,0 54 | 15744919,Female,29,83000,0 55 | 15671655,Female,35,23000,0 56 | 15654901,Female,27,58000,0 57 | 15649136,Female,24,55000,0 58 | 15775562,Female,23,48000,0 59 | 15807481,Male,28,79000,0 60 | 15642885,Male,22,18000,0 61 | 15789109,Female,32,117000,0 62 | 15814004,Male,27,20000,0 63 | 15673619,Male,25,87000,0 64 | 15595135,Female,23,66000,0 65 | 15583681,Male,32,120000,1 66 | 15605000,Female,59,83000,0 67 | 15718071,Male,24,58000,0 68 | 15679760,Male,24,19000,0 69 | 15654574,Female,23,82000,0 70 | 15577178,Female,22,63000,0 71 | 15595324,Female,31,68000,0 72 | 15756932,Male,25,80000,0 73 | 15726358,Female,24,27000,0 74 | 15595228,Female,20,23000,0 75 | 15782530,Female,33,113000,0 76 | 15592877,Male,32,18000,0 77 | 15651983,Male,34,112000,1 78 | 15746737,Male,18,52000,0 79 | 15774179,Female,22,27000,0 80 | 15667265,Female,28,87000,0 81 | 15655123,Female,26,17000,0 82 | 15595917,Male,30,80000,0 83 | 15668385,Male,39,42000,0 84 | 15709476,Male,20,49000,0 85 | 15711218,Male,35,88000,0 86 | 15798659,Female,30,62000,0 87 | 15663939,Female,31,118000,1 88 | 15694946,Male,24,55000,0 89 | 15631912,Female,28,85000,0 90 | 15768816,Male,26,81000,0 91 | 15682268,Male,35,50000,0 92 | 15684801,Male,22,81000,0 93 | 15636428,Female,30,116000,0 94 | 15809823,Male,26,15000,0 95 | 15699284,Female,29,28000,0 96 | 15786993,Female,29,83000,0 97 | 15709441,Female,35,44000,0 98 | 15710257,Female,35,25000,0 99 | 15582492,Male,28,123000,1 100 | 15575694,Male,35,73000,0 101 | 15756820,Female,28,37000,0 102 | 15766289,Male,27,88000,0 103 | 15593014,Male,28,59000,0 104 | 15584545,Female,32,86000,0 105 | 15675949,Female,33,149000,1 106 | 15672091,Female,19,21000,0 107 | 15801658,Male,21,72000,0 108 | 15706185,Female,26,35000,0 109 | 15789863,Male,27,89000,0 110 | 15720943,Male,26,86000,0 111 | 15697997,Female,38,80000,0 112 | 15665416,Female,39,71000,0 113 | 15660200,Female,37,71000,0 114 | 15619653,Male,38,61000,0 115 | 15773447,Male,37,55000,0 116 | 15739160,Male,42,80000,0 117 | 15689237,Male,40,57000,0 118 | 15679297,Male,35,75000,0 119 | 15591433,Male,36,52000,0 120 | 15642725,Male,40,59000,0 121 | 15701962,Male,41,59000,0 122 | 15811613,Female,36,75000,0 123 | 15741049,Male,37,72000,0 124 | 15724423,Female,40,75000,0 125 | 15574305,Male,35,53000,0 126 | 15678168,Female,41,51000,0 127 | 15697020,Female,39,61000,0 128 | 15610801,Male,42,65000,0 129 | 15745232,Male,26,32000,0 130 | 15722758,Male,30,17000,0 131 | 15792102,Female,26,84000,0 132 | 15675185,Male,31,58000,0 133 | 15801247,Male,33,31000,0 134 | 15725660,Male,30,87000,0 135 | 15638963,Female,21,68000,0 136 | 15800061,Female,28,55000,0 137 | 15578006,Male,23,63000,0 138 | 15668504,Female,20,82000,0 139 | 15687491,Male,30,107000,1 140 | 15610403,Female,28,59000,0 141 | 15741094,Male,19,25000,0 142 | 15807909,Male,19,85000,0 143 | 15666141,Female,18,68000,0 144 | 15617134,Male,35,59000,0 145 | 15783029,Male,30,89000,0 146 | 15622833,Female,34,25000,0 147 | 15746422,Female,24,89000,0 148 | 15750839,Female,27,96000,1 149 | 15749130,Female,41,30000,0 150 | 15779862,Male,29,61000,0 151 | 15767871,Male,20,74000,0 152 | 15679651,Female,26,15000,0 153 | 15576219,Male,41,45000,0 154 | 15699247,Male,31,76000,0 155 | 15619087,Female,36,50000,0 156 | 15605327,Male,40,47000,0 157 | 15610140,Female,31,15000,0 158 | 15791174,Male,46,59000,0 159 | 15602373,Male,29,75000,0 160 | 15762605,Male,26,30000,0 161 | 15598840,Female,32,135000,1 162 | 15744279,Male,32,100000,1 163 | 15670619,Male,25,90000,0 164 | 15599533,Female,37,33000,0 165 | 15757837,Male,35,38000,0 166 | 15697574,Female,33,69000,0 167 | 15578738,Female,18,86000,0 168 | 15762228,Female,22,55000,0 169 | 15614827,Female,35,71000,0 170 | 15789815,Male,29,148000,1 171 | 15579781,Female,29,47000,0 172 | 15587013,Male,21,88000,0 173 | 15570932,Male,34,115000,0 174 | 15794661,Female,26,118000,0 175 | 15581654,Female,34,43000,0 176 | 15644296,Female,34,72000,0 177 | 15614420,Female,23,28000,0 178 | 15609653,Female,35,47000,0 179 | 15594577,Male,25,22000,0 180 | 15584114,Male,24,23000,0 181 | 15673367,Female,31,34000,0 182 | 15685576,Male,26,16000,0 183 | 15774727,Female,31,71000,0 184 | 15694288,Female,32,117000,1 185 | 15603319,Male,33,43000,0 186 | 15759066,Female,33,60000,0 187 | 15814816,Male,31,66000,0 188 | 15724402,Female,20,82000,0 189 | 15571059,Female,33,41000,0 190 | 15674206,Male,35,72000,0 191 | 15715160,Male,28,32000,0 192 | 15730448,Male,24,84000,0 193 | 15662067,Female,19,26000,0 194 | 15779581,Male,29,43000,0 195 | 15662901,Male,19,70000,0 196 | 15689751,Male,28,89000,0 197 | 15667742,Male,34,43000,0 198 | 15738448,Female,30,79000,0 199 | 15680243,Female,20,36000,0 200 | 15745083,Male,26,80000,0 201 | 15708228,Male,35,22000,0 202 | 15628523,Male,35,39000,0 203 | 15708196,Male,49,74000,0 204 | 15735549,Female,39,134000,1 205 | 15809347,Female,41,71000,0 206 | 15660866,Female,58,101000,1 207 | 15766609,Female,47,47000,0 208 | 15654230,Female,55,130000,1 209 | 15794566,Female,52,114000,0 210 | 15800890,Female,40,142000,1 211 | 15697424,Female,46,22000,0 212 | 15724536,Female,48,96000,1 213 | 15735878,Male,52,150000,1 214 | 15707596,Female,59,42000,0 215 | 15657163,Male,35,58000,0 216 | 15622478,Male,47,43000,0 217 | 15779529,Female,60,108000,1 218 | 15636023,Male,49,65000,0 219 | 15582066,Male,40,78000,0 220 | 15666675,Female,46,96000,0 221 | 15732987,Male,59,143000,1 222 | 15789432,Female,41,80000,0 223 | 15663161,Male,35,91000,1 224 | 15694879,Male,37,144000,1 225 | 15593715,Male,60,102000,1 226 | 15575002,Female,35,60000,0 227 | 15622171,Male,37,53000,0 228 | 15795224,Female,36,126000,1 229 | 15685346,Male,56,133000,1 230 | 15691808,Female,40,72000,0 231 | 15721007,Female,42,80000,1 232 | 15794253,Female,35,147000,1 233 | 15694453,Male,39,42000,0 234 | 15813113,Male,40,107000,1 235 | 15614187,Male,49,86000,1 236 | 15619407,Female,38,112000,0 237 | 15646227,Male,46,79000,1 238 | 15660541,Male,40,57000,0 239 | 15753874,Female,37,80000,0 240 | 15617877,Female,46,82000,0 241 | 15772073,Female,53,143000,1 242 | 15701537,Male,42,149000,1 243 | 15736228,Male,38,59000,0 244 | 15780572,Female,50,88000,1 245 | 15769596,Female,56,104000,1 246 | 15586996,Female,41,72000,0 247 | 15722061,Female,51,146000,1 248 | 15638003,Female,35,50000,0 249 | 15775590,Female,57,122000,1 250 | 15730688,Male,41,52000,0 251 | 15753102,Female,35,97000,1 252 | 15810075,Female,44,39000,0 253 | 15723373,Male,37,52000,0 254 | 15795298,Female,48,134000,1 255 | 15584320,Female,37,146000,1 256 | 15724161,Female,50,44000,0 257 | 15750056,Female,52,90000,1 258 | 15609637,Female,41,72000,0 259 | 15794493,Male,40,57000,0 260 | 15569641,Female,58,95000,1 261 | 15815236,Female,45,131000,1 262 | 15811177,Female,35,77000,0 263 | 15680587,Male,36,144000,1 264 | 15672821,Female,55,125000,1 265 | 15767681,Female,35,72000,0 266 | 15600379,Male,48,90000,1 267 | 15801336,Female,42,108000,1 268 | 15721592,Male,40,75000,0 269 | 15581282,Male,37,74000,0 270 | 15746203,Female,47,144000,1 271 | 15583137,Male,40,61000,0 272 | 15680752,Female,43,133000,0 273 | 15688172,Female,59,76000,1 274 | 15791373,Male,60,42000,1 275 | 15589449,Male,39,106000,1 276 | 15692819,Female,57,26000,1 277 | 15727467,Male,57,74000,1 278 | 15734312,Male,38,71000,0 279 | 15764604,Male,49,88000,1 280 | 15613014,Female,52,38000,1 281 | 15759684,Female,50,36000,1 282 | 15609669,Female,59,88000,1 283 | 15685536,Male,35,61000,0 284 | 15750447,Male,37,70000,1 285 | 15663249,Female,52,21000,1 286 | 15638646,Male,48,141000,0 287 | 15734161,Female,37,93000,1 288 | 15631070,Female,37,62000,0 289 | 15761950,Female,48,138000,1 290 | 15649668,Male,41,79000,0 291 | 15713912,Female,37,78000,1 292 | 15586757,Male,39,134000,1 293 | 15596522,Male,49,89000,1 294 | 15625395,Male,55,39000,1 295 | 15760570,Male,37,77000,0 296 | 15566689,Female,35,57000,0 297 | 15725794,Female,36,63000,0 298 | 15673539,Male,42,73000,1 299 | 15705298,Female,43,112000,1 300 | 15675791,Male,45,79000,0 301 | 15747043,Male,46,117000,1 302 | 15736397,Female,58,38000,1 303 | 15678201,Male,48,74000,1 304 | 15720745,Female,37,137000,1 305 | 15637593,Male,37,79000,1 306 | 15598070,Female,40,60000,0 307 | 15787550,Male,42,54000,0 308 | 15603942,Female,51,134000,0 309 | 15733973,Female,47,113000,1 310 | 15596761,Male,36,125000,1 311 | 15652400,Female,38,50000,0 312 | 15717893,Female,42,70000,0 313 | 15622585,Male,39,96000,1 314 | 15733964,Female,38,50000,0 315 | 15753861,Female,49,141000,1 316 | 15747097,Female,39,79000,0 317 | 15594762,Female,39,75000,1 318 | 15667417,Female,54,104000,1 319 | 15684861,Male,35,55000,0 320 | 15742204,Male,45,32000,1 321 | 15623502,Male,36,60000,0 322 | 15774872,Female,52,138000,1 323 | 15611191,Female,53,82000,1 324 | 15674331,Male,41,52000,0 325 | 15619465,Female,48,30000,1 326 | 15575247,Female,48,131000,1 327 | 15695679,Female,41,60000,0 328 | 15713463,Male,41,72000,0 329 | 15785170,Female,42,75000,0 330 | 15796351,Male,36,118000,1 331 | 15639576,Female,47,107000,1 332 | 15693264,Male,38,51000,0 333 | 15589715,Female,48,119000,1 334 | 15769902,Male,42,65000,0 335 | 15587177,Male,40,65000,0 336 | 15814553,Male,57,60000,1 337 | 15601550,Female,36,54000,0 338 | 15664907,Male,58,144000,1 339 | 15612465,Male,35,79000,0 340 | 15810800,Female,38,55000,0 341 | 15665760,Male,39,122000,1 342 | 15588080,Female,53,104000,1 343 | 15776844,Male,35,75000,0 344 | 15717560,Female,38,65000,0 345 | 15629739,Female,47,51000,1 346 | 15729908,Male,47,105000,1 347 | 15716781,Female,41,63000,0 348 | 15646936,Male,53,72000,1 349 | 15768151,Female,54,108000,1 350 | 15579212,Male,39,77000,0 351 | 15721835,Male,38,61000,0 352 | 15800515,Female,38,113000,1 353 | 15591279,Male,37,75000,0 354 | 15587419,Female,42,90000,1 355 | 15750335,Female,37,57000,0 356 | 15699619,Male,36,99000,1 357 | 15606472,Male,60,34000,1 358 | 15778368,Male,54,70000,1 359 | 15671387,Female,41,72000,0 360 | 15573926,Male,40,71000,1 361 | 15709183,Male,42,54000,0 362 | 15577514,Male,43,129000,1 363 | 15778830,Female,53,34000,1 364 | 15768072,Female,47,50000,1 365 | 15768293,Female,42,79000,0 366 | 15654456,Male,42,104000,1 367 | 15807525,Female,59,29000,1 368 | 15574372,Female,58,47000,1 369 | 15671249,Male,46,88000,1 370 | 15779744,Male,38,71000,0 371 | 15624755,Female,54,26000,1 372 | 15611430,Female,60,46000,1 373 | 15774744,Male,60,83000,1 374 | 15629885,Female,39,73000,0 375 | 15708791,Male,59,130000,1 376 | 15793890,Female,37,80000,0 377 | 15646091,Female,46,32000,1 378 | 15596984,Female,46,74000,0 379 | 15800215,Female,42,53000,0 380 | 15577806,Male,41,87000,1 381 | 15749381,Female,58,23000,1 382 | 15683758,Male,42,64000,0 383 | 15670615,Male,48,33000,1 384 | 15715622,Female,44,139000,1 385 | 15707634,Male,49,28000,1 386 | 15806901,Female,57,33000,1 387 | 15775335,Male,56,60000,1 388 | 15724150,Female,49,39000,1 389 | 15627220,Male,39,71000,0 390 | 15672330,Male,47,34000,1 391 | 15668521,Female,48,35000,1 392 | 15807837,Male,48,33000,1 393 | 15592570,Male,47,23000,1 394 | 15748589,Female,45,45000,1 395 | 15635893,Male,60,42000,1 396 | 15757632,Female,39,59000,0 397 | 15691863,Female,46,41000,1 398 | 15706071,Male,51,23000,1 399 | 15654296,Female,50,20000,1 400 | 15755018,Male,36,33000,0 401 | 15594041,Female,49,36000,1 -------------------------------------------------------------------------------- /Part 3 - Classification/Section 9 - K-Nearest Neighbors (K-NN)/knn.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Fri Aug 17 22:50:06 2018 4 | 5 | @author: Mohammad Doosti Lakhani 6 | """ 7 | 8 | # Imporing libraries 9 | import numpy as np 10 | import matplotlib.pyplot as plt 11 | import pandas as pd 12 | 13 | # Importing dataset 14 | dataset = pd.read_csv('Social_Network_Ads.csv') 15 | x = dataset.iloc[:,2:4].values 16 | y = dataset.iloc[:,4].values 17 | 18 | # Feature scaling 19 | from sklearn.preprocessing import StandardScaler 20 | standardscaler_x = StandardScaler() 21 | standardscaler_x = standardscaler_x.fit(x) 22 | x = standardscaler_x.transform(x) 23 | 24 | # Splitting dataset into Train set and Test set 25 | from sklearn.model_selection import train_test_split 26 | x_train,x_test,y_train,y_test = train_test_split(x,y, train_size = 0.75 , random_state=0) 27 | 28 | # Fitting the K-Nearest Neighbors model to the train set 29 | from sklearn.neighbors import KNeighborsClassifier 30 | classifier = KNeighborsClassifier(n_neighbors=10, p=2,metric='minkowski') 31 | classifier = classifier.fit(x_train,y_train) 32 | 33 | # Make the prediction on train set 34 | y_train_pred = classifier.predict(x_train) 35 | 36 | # Make the prediction on train set 37 | y_test_pred = classifier.predict(x_test) 38 | 39 | # Acurracy on test and train set 40 | from sklearn.metrics import confusion_matrix 41 | cm_train = confusion_matrix(y_train,y_train_pred) 42 | cm_test = confusion_matrix(y_test,y_test_pred) 43 | 44 | 45 | import os 46 | import sys 47 | 48 | scriptpath = "../../Tools" # functions of acc and CAP 49 | # Add the directory containing your module to the Python path 50 | sys.path.append(os.path.abspath(scriptpath)) 51 | import accuracy as ac 52 | 53 | t_train,f_train,acc_train = ac.accuracy_on_cm(cm_train) 54 | print('Train status = #{} True, #{} False, %{} Accuracy'.format(t_train,f_train,acc_train*100)) 55 | 56 | t_test,f_test,acc_test = ac.accuracy_on_cm(cm_test) 57 | print('Test status = #{} True, #{} False, %{} Accuracy'.format(t_test,f_test,acc_test*100)) 58 | 59 | 60 | 61 | # Visualising the Training set results 62 | from matplotlib.colors import ListedColormap 63 | X_set, y_set = x_train, y_train 64 | X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01), 65 | np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01)) 66 | plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), 67 | alpha = 0.75, cmap = ListedColormap(('red', 'green'))) 68 | plt.xlim(X1.min(), X1.max()) 69 | plt.ylim(X2.min(), X2.max()) 70 | for i, j in enumerate(np.unique(y_set)): 71 | plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], 72 | c = ListedColormap(('red', 'green'))(i), label = j) 73 | plt.title('K-Nearest Neighbors (Training set)') 74 | plt.xlabel('Age') 75 | plt.ylabel('Estimated Salary') 76 | plt.legend() 77 | plt.show() 78 | 79 | # Visualising the Test set results 80 | from matplotlib.colors import ListedColormap 81 | X_set, y_set = x_test, y_test 82 | X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01), 83 | np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01)) 84 | plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), 85 | alpha = 0.75, cmap = ListedColormap(('red', 'green'))) 86 | plt.xlim(X1.min(), X1.max()) 87 | plt.ylim(X2.min(), X2.max()) 88 | for i, j in enumerate(np.unique(y_set)): 89 | plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], 90 | c = ListedColormap(('red', 'green'))(i), label = j) 91 | plt.title('K-Nearest Neighbors (Test set)') 92 | plt.xlabel('Age') 93 | plt.ylabel('Estimated Salary') 94 | plt.legend() 95 | plt.show() 96 | -------------------------------------------------------------------------------- /Part 4 - Clustering/Section 14 - K-Means Clustering/Mall_Customers.csv: -------------------------------------------------------------------------------- 1 | CustomerID,Genre,Age,Annual Income (k$),Spending Score (1-100) 2 | 0001,Male,19,15,39 3 | 0002,Male,21,15,81 4 | 0003,Female,20,16,6 5 | 0004,Female,23,16,77 6 | 0005,Female,31,17,40 7 | 0006,Female,22,17,76 8 | 0007,Female,35,18,6 9 | 0008,Female,23,18,94 10 | 0009,Male,64,19,3 11 | 0010,Female,30,19,72 12 | 0011,Male,67,19,14 13 | 0012,Female,35,19,99 14 | 0013,Female,58,20,15 15 | 0014,Female,24,20,77 16 | 0015,Male,37,20,13 17 | 0016,Male,22,20,79 18 | 0017,Female,35,21,35 19 | 0018,Male,20,21,66 20 | 0019,Male,52,23,29 21 | 0020,Female,35,23,98 22 | 0021,Male,35,24,35 23 | 0022,Male,25,24,73 24 | 0023,Female,46,25,5 25 | 0024,Male,31,25,73 26 | 0025,Female,54,28,14 27 | 0026,Male,29,28,82 28 | 0027,Female,45,28,32 29 | 0028,Male,35,28,61 30 | 0029,Female,40,29,31 31 | 0030,Female,23,29,87 32 | 0031,Male,60,30,4 33 | 0032,Female,21,30,73 34 | 0033,Male,53,33,4 35 | 0034,Male,18,33,92 36 | 0035,Female,49,33,14 37 | 0036,Female,21,33,81 38 | 0037,Female,42,34,17 39 | 0038,Female,30,34,73 40 | 0039,Female,36,37,26 41 | 0040,Female,20,37,75 42 | 0041,Female,65,38,35 43 | 0042,Male,24,38,92 44 | 0043,Male,48,39,36 45 | 0044,Female,31,39,61 46 | 0045,Female,49,39,28 47 | 0046,Female,24,39,65 48 | 0047,Female,50,40,55 49 | 0048,Female,27,40,47 50 | 0049,Female,29,40,42 51 | 0050,Female,31,40,42 52 | 0051,Female,49,42,52 53 | 0052,Male,33,42,60 54 | 0053,Female,31,43,54 55 | 0054,Male,59,43,60 56 | 0055,Female,50,43,45 57 | 0056,Male,47,43,41 58 | 0057,Female,51,44,50 59 | 0058,Male,69,44,46 60 | 0059,Female,27,46,51 61 | 0060,Male,53,46,46 62 | 0061,Male,70,46,56 63 | 0062,Male,19,46,55 64 | 0063,Female,67,47,52 65 | 0064,Female,54,47,59 66 | 0065,Male,63,48,51 67 | 0066,Male,18,48,59 68 | 0067,Female,43,48,50 69 | 0068,Female,68,48,48 70 | 0069,Male,19,48,59 71 | 0070,Female,32,48,47 72 | 0071,Male,70,49,55 73 | 0072,Female,47,49,42 74 | 0073,Female,60,50,49 75 | 0074,Female,60,50,56 76 | 0075,Male,59,54,47 77 | 0076,Male,26,54,54 78 | 0077,Female,45,54,53 79 | 0078,Male,40,54,48 80 | 0079,Female,23,54,52 81 | 0080,Female,49,54,42 82 | 0081,Male,57,54,51 83 | 0082,Male,38,54,55 84 | 0083,Male,67,54,41 85 | 0084,Female,46,54,44 86 | 0085,Female,21,54,57 87 | 0086,Male,48,54,46 88 | 0087,Female,55,57,58 89 | 0088,Female,22,57,55 90 | 0089,Female,34,58,60 91 | 0090,Female,50,58,46 92 | 0091,Female,68,59,55 93 | 0092,Male,18,59,41 94 | 0093,Male,48,60,49 95 | 0094,Female,40,60,40 96 | 0095,Female,32,60,42 97 | 0096,Male,24,60,52 98 | 0097,Female,47,60,47 99 | 0098,Female,27,60,50 100 | 0099,Male,48,61,42 101 | 0100,Male,20,61,49 102 | 0101,Female,23,62,41 103 | 0102,Female,49,62,48 104 | 0103,Male,67,62,59 105 | 0104,Male,26,62,55 106 | 0105,Male,49,62,56 107 | 0106,Female,21,62,42 108 | 0107,Female,66,63,50 109 | 0108,Male,54,63,46 110 | 0109,Male,68,63,43 111 | 0110,Male,66,63,48 112 | 0111,Male,65,63,52 113 | 0112,Female,19,63,54 114 | 0113,Female,38,64,42 115 | 0114,Male,19,64,46 116 | 0115,Female,18,65,48 117 | 0116,Female,19,65,50 118 | 0117,Female,63,65,43 119 | 0118,Female,49,65,59 120 | 0119,Female,51,67,43 121 | 0120,Female,50,67,57 122 | 0121,Male,27,67,56 123 | 0122,Female,38,67,40 124 | 0123,Female,40,69,58 125 | 0124,Male,39,69,91 126 | 0125,Female,23,70,29 127 | 0126,Female,31,70,77 128 | 0127,Male,43,71,35 129 | 0128,Male,40,71,95 130 | 0129,Male,59,71,11 131 | 0130,Male,38,71,75 132 | 0131,Male,47,71,9 133 | 0132,Male,39,71,75 134 | 0133,Female,25,72,34 135 | 0134,Female,31,72,71 136 | 0135,Male,20,73,5 137 | 0136,Female,29,73,88 138 | 0137,Female,44,73,7 139 | 0138,Male,32,73,73 140 | 0139,Male,19,74,10 141 | 0140,Female,35,74,72 142 | 0141,Female,57,75,5 143 | 0142,Male,32,75,93 144 | 0143,Female,28,76,40 145 | 0144,Female,32,76,87 146 | 0145,Male,25,77,12 147 | 0146,Male,28,77,97 148 | 0147,Male,48,77,36 149 | 0148,Female,32,77,74 150 | 0149,Female,34,78,22 151 | 0150,Male,34,78,90 152 | 0151,Male,43,78,17 153 | 0152,Male,39,78,88 154 | 0153,Female,44,78,20 155 | 0154,Female,38,78,76 156 | 0155,Female,47,78,16 157 | 0156,Female,27,78,89 158 | 0157,Male,37,78,1 159 | 0158,Female,30,78,78 160 | 0159,Male,34,78,1 161 | 0160,Female,30,78,73 162 | 0161,Female,56,79,35 163 | 0162,Female,29,79,83 164 | 0163,Male,19,81,5 165 | 0164,Female,31,81,93 166 | 0165,Male,50,85,26 167 | 0166,Female,36,85,75 168 | 0167,Male,42,86,20 169 | 0168,Female,33,86,95 170 | 0169,Female,36,87,27 171 | 0170,Male,32,87,63 172 | 0171,Male,40,87,13 173 | 0172,Male,28,87,75 174 | 0173,Male,36,87,10 175 | 0174,Male,36,87,92 176 | 0175,Female,52,88,13 177 | 0176,Female,30,88,86 178 | 0177,Male,58,88,15 179 | 0178,Male,27,88,69 180 | 0179,Male,59,93,14 181 | 0180,Male,35,93,90 182 | 0181,Female,37,97,32 183 | 0182,Female,32,97,86 184 | 0183,Male,46,98,15 185 | 0184,Female,29,98,88 186 | 0185,Female,41,99,39 187 | 0186,Male,30,99,97 188 | 0187,Female,54,101,24 189 | 0188,Male,28,101,68 190 | 0189,Female,41,103,17 191 | 0190,Female,36,103,85 192 | 0191,Female,34,103,23 193 | 0192,Female,32,103,69 194 | 0193,Male,33,113,8 195 | 0194,Female,38,113,91 196 | 0195,Female,47,120,16 197 | 0196,Female,35,120,79 198 | 0197,Female,45,126,28 199 | 0198,Male,32,126,74 200 | 0199,Male,32,137,18 201 | 0200,Male,30,137,83 -------------------------------------------------------------------------------- /Part 4 - Clustering/Section 14 - K-Means Clustering/kmeans.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sun Sep 9 18:13:51 2018 4 | 5 | @author: Mohammad Doosti Lakhani 6 | """ 7 | 8 | # import libraries 9 | import numpy as np 10 | import matplotlib.pyplot as plt 11 | import pandas as pd 12 | 13 | # Importing dataset 14 | dataset = pd.read_csv('Mall_Customers.csv') 15 | x = dataset.iloc[:,[3,4]].values 16 | 17 | # Finding the best value of CLUSTER_COUNT using Elbow method 18 | from sklearn.cluster import KMeans 19 | cluster_count_test = 10 20 | wcss = [] 21 | 22 | for i in range(1,cluster_count_test+1): 23 | kmeans = KMeans(n_clusters=i, init='k-means++',n_init=50,max_iter=300,random_state=0) 24 | kmeans = kmeans.fit(x) 25 | wcss.append(kmeans.inertia_) 26 | 27 | # Plot wccs values wrt number of clusters 28 | plt.plot(np.arange(1,11),wcss) 29 | plt.title('ELbow of KMeans') 30 | plt.xlabel('Number of Clusters') 31 | plt.ylabel('WCCS value') 32 | plt.show() 33 | 34 | # Applying kmeans with optimal number of clusters gained by Elbow method 35 | cluster_count = 5 36 | kmeans = KMeans(n_clusters=cluster_count, init='k-means++',n_init=50,max_iter=300,random_state=0) 37 | kmeans = kmeans.fit(x) 38 | 39 | y_pred = kmeans.predict(x) # predicted labels 40 | centroids = kmeans.cluster_centers_ 41 | 42 | 43 | # Plot scatter of datapoints with their clusters 44 | plt.scatter(x[y_pred==0,0],x[y_pred==0,1],s=100, c='yellow',label = 'Cluster 1') 45 | plt.scatter(x[y_pred==1,0],x[y_pred==1,1],s=100, c='blue',label = 'Cluster 2') 46 | plt.scatter(x[y_pred==2,0],x[y_pred==2,1],s=100, c='purple',label = 'Cluster 3') 47 | plt.scatter(x[y_pred==3,0],x[y_pred==3,1],s=100, c='cyan',label = 'Cluster 4') 48 | plt.scatter(x[y_pred==4,0],x[y_pred==4,1],s=100, c='green',label = 'Cluster 5') 49 | plt.scatter(centroids[:,0],centroids[:,1],s=200,c='red',marker= 'd',label = 'Centroids') 50 | plt.title('Clusters with wccs='+str(wcss[4])) 51 | plt.xlabel('Income (k$)') 52 | plt.ylabel('Spending Score (1-100)') 53 | plt.legend() 54 | plt.show() 55 | 56 | -------------------------------------------------------------------------------- /Part 4 - Clustering/Section 15 - Hierarchical Clustering/Mall_Customers.csv: -------------------------------------------------------------------------------- 1 | CustomerID,Genre,Age,Annual Income (k$),Spending Score (1-100) 2 | 0001,Male,19,15,39 3 | 0002,Male,21,15,81 4 | 0003,Female,20,16,6 5 | 0004,Female,23,16,77 6 | 0005,Female,31,17,40 7 | 0006,Female,22,17,76 8 | 0007,Female,35,18,6 9 | 0008,Female,23,18,94 10 | 0009,Male,64,19,3 11 | 0010,Female,30,19,72 12 | 0011,Male,67,19,14 13 | 0012,Female,35,19,99 14 | 0013,Female,58,20,15 15 | 0014,Female,24,20,77 16 | 0015,Male,37,20,13 17 | 0016,Male,22,20,79 18 | 0017,Female,35,21,35 19 | 0018,Male,20,21,66 20 | 0019,Male,52,23,29 21 | 0020,Female,35,23,98 22 | 0021,Male,35,24,35 23 | 0022,Male,25,24,73 24 | 0023,Female,46,25,5 25 | 0024,Male,31,25,73 26 | 0025,Female,54,28,14 27 | 0026,Male,29,28,82 28 | 0027,Female,45,28,32 29 | 0028,Male,35,28,61 30 | 0029,Female,40,29,31 31 | 0030,Female,23,29,87 32 | 0031,Male,60,30,4 33 | 0032,Female,21,30,73 34 | 0033,Male,53,33,4 35 | 0034,Male,18,33,92 36 | 0035,Female,49,33,14 37 | 0036,Female,21,33,81 38 | 0037,Female,42,34,17 39 | 0038,Female,30,34,73 40 | 0039,Female,36,37,26 41 | 0040,Female,20,37,75 42 | 0041,Female,65,38,35 43 | 0042,Male,24,38,92 44 | 0043,Male,48,39,36 45 | 0044,Female,31,39,61 46 | 0045,Female,49,39,28 47 | 0046,Female,24,39,65 48 | 0047,Female,50,40,55 49 | 0048,Female,27,40,47 50 | 0049,Female,29,40,42 51 | 0050,Female,31,40,42 52 | 0051,Female,49,42,52 53 | 0052,Male,33,42,60 54 | 0053,Female,31,43,54 55 | 0054,Male,59,43,60 56 | 0055,Female,50,43,45 57 | 0056,Male,47,43,41 58 | 0057,Female,51,44,50 59 | 0058,Male,69,44,46 60 | 0059,Female,27,46,51 61 | 0060,Male,53,46,46 62 | 0061,Male,70,46,56 63 | 0062,Male,19,46,55 64 | 0063,Female,67,47,52 65 | 0064,Female,54,47,59 66 | 0065,Male,63,48,51 67 | 0066,Male,18,48,59 68 | 0067,Female,43,48,50 69 | 0068,Female,68,48,48 70 | 0069,Male,19,48,59 71 | 0070,Female,32,48,47 72 | 0071,Male,70,49,55 73 | 0072,Female,47,49,42 74 | 0073,Female,60,50,49 75 | 0074,Female,60,50,56 76 | 0075,Male,59,54,47 77 | 0076,Male,26,54,54 78 | 0077,Female,45,54,53 79 | 0078,Male,40,54,48 80 | 0079,Female,23,54,52 81 | 0080,Female,49,54,42 82 | 0081,Male,57,54,51 83 | 0082,Male,38,54,55 84 | 0083,Male,67,54,41 85 | 0084,Female,46,54,44 86 | 0085,Female,21,54,57 87 | 0086,Male,48,54,46 88 | 0087,Female,55,57,58 89 | 0088,Female,22,57,55 90 | 0089,Female,34,58,60 91 | 0090,Female,50,58,46 92 | 0091,Female,68,59,55 93 | 0092,Male,18,59,41 94 | 0093,Male,48,60,49 95 | 0094,Female,40,60,40 96 | 0095,Female,32,60,42 97 | 0096,Male,24,60,52 98 | 0097,Female,47,60,47 99 | 0098,Female,27,60,50 100 | 0099,Male,48,61,42 101 | 0100,Male,20,61,49 102 | 0101,Female,23,62,41 103 | 0102,Female,49,62,48 104 | 0103,Male,67,62,59 105 | 0104,Male,26,62,55 106 | 0105,Male,49,62,56 107 | 0106,Female,21,62,42 108 | 0107,Female,66,63,50 109 | 0108,Male,54,63,46 110 | 0109,Male,68,63,43 111 | 0110,Male,66,63,48 112 | 0111,Male,65,63,52 113 | 0112,Female,19,63,54 114 | 0113,Female,38,64,42 115 | 0114,Male,19,64,46 116 | 0115,Female,18,65,48 117 | 0116,Female,19,65,50 118 | 0117,Female,63,65,43 119 | 0118,Female,49,65,59 120 | 0119,Female,51,67,43 121 | 0120,Female,50,67,57 122 | 0121,Male,27,67,56 123 | 0122,Female,38,67,40 124 | 0123,Female,40,69,58 125 | 0124,Male,39,69,91 126 | 0125,Female,23,70,29 127 | 0126,Female,31,70,77 128 | 0127,Male,43,71,35 129 | 0128,Male,40,71,95 130 | 0129,Male,59,71,11 131 | 0130,Male,38,71,75 132 | 0131,Male,47,71,9 133 | 0132,Male,39,71,75 134 | 0133,Female,25,72,34 135 | 0134,Female,31,72,71 136 | 0135,Male,20,73,5 137 | 0136,Female,29,73,88 138 | 0137,Female,44,73,7 139 | 0138,Male,32,73,73 140 | 0139,Male,19,74,10 141 | 0140,Female,35,74,72 142 | 0141,Female,57,75,5 143 | 0142,Male,32,75,93 144 | 0143,Female,28,76,40 145 | 0144,Female,32,76,87 146 | 0145,Male,25,77,12 147 | 0146,Male,28,77,97 148 | 0147,Male,48,77,36 149 | 0148,Female,32,77,74 150 | 0149,Female,34,78,22 151 | 0150,Male,34,78,90 152 | 0151,Male,43,78,17 153 | 0152,Male,39,78,88 154 | 0153,Female,44,78,20 155 | 0154,Female,38,78,76 156 | 0155,Female,47,78,16 157 | 0156,Female,27,78,89 158 | 0157,Male,37,78,1 159 | 0158,Female,30,78,78 160 | 0159,Male,34,78,1 161 | 0160,Female,30,78,73 162 | 0161,Female,56,79,35 163 | 0162,Female,29,79,83 164 | 0163,Male,19,81,5 165 | 0164,Female,31,81,93 166 | 0165,Male,50,85,26 167 | 0166,Female,36,85,75 168 | 0167,Male,42,86,20 169 | 0168,Female,33,86,95 170 | 0169,Female,36,87,27 171 | 0170,Male,32,87,63 172 | 0171,Male,40,87,13 173 | 0172,Male,28,87,75 174 | 0173,Male,36,87,10 175 | 0174,Male,36,87,92 176 | 0175,Female,52,88,13 177 | 0176,Female,30,88,86 178 | 0177,Male,58,88,15 179 | 0178,Male,27,88,69 180 | 0179,Male,59,93,14 181 | 0180,Male,35,93,90 182 | 0181,Female,37,97,32 183 | 0182,Female,32,97,86 184 | 0183,Male,46,98,15 185 | 0184,Female,29,98,88 186 | 0185,Female,41,99,39 187 | 0186,Male,30,99,97 188 | 0187,Female,54,101,24 189 | 0188,Male,28,101,68 190 | 0189,Female,41,103,17 191 | 0190,Female,36,103,85 192 | 0191,Female,34,103,23 193 | 0192,Female,32,103,69 194 | 0193,Male,33,113,8 195 | 0194,Female,38,113,91 196 | 0195,Female,47,120,16 197 | 0196,Female,35,120,79 198 | 0197,Female,45,126,28 199 | 0198,Male,32,126,74 200 | 0199,Male,32,137,18 201 | 0200,Male,30,137,83 -------------------------------------------------------------------------------- /Part 4 - Clustering/Section 15 - Hierarchical Clustering/hc.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sun Sep 9 19:59:37 2018 4 | 5 | @author: Mohammad Doosti Lakhani 6 | """ 7 | 8 | # import libraries 9 | import numpy as np 10 | import matplotlib.pyplot as plt 11 | import pandas as pd 12 | 13 | # Importing dataset 14 | dataset = pd.read_csv('Mall_Customers.csv') 15 | x = dataset.iloc[:,[3,4]].values 16 | 17 | 18 | # Finding the optimal count of clusters using Dendogram method 19 | import scipy.cluster.hierarchy as h 20 | dendogram = h.dendrogram(Z=h.linkage(x,method='ward')) 21 | plt.title('Dendogram') 22 | plt.xlabel('Customers') 23 | plt.ylabel('Distance') 24 | plt.show() 25 | 26 | # Fitting Hierachial clustring using optimal number of clusters 27 | from sklearn.cluster import AgglomerativeClustering 28 | hc = AgglomerativeClustering(n_clusters=5,affinity='euclidean',linkage='ward') 29 | y_pred = hc.fit_predict(x) 30 | 31 | # Plot scatter of datapoints with their clusters 32 | plt.scatter(x[y_pred==0,0],x[y_pred==0,1],s=100, c='yellow',label = 'Cluster 1') 33 | plt.scatter(x[y_pred==1,0],x[y_pred==1,1],s=100, c='blue',label = 'Cluster 2') 34 | plt.scatter(x[y_pred==2,0],x[y_pred==2,1],s=100, c='purple',label = 'Cluster 3') 35 | plt.scatter(x[y_pred==3,0],x[y_pred==3,1],s=100, c='cyan',label = 'Cluster 4') 36 | plt.scatter(x[y_pred==4,0],x[y_pred==4,1],s=100, c='green',label = 'Cluster 5') 37 | plt.title('Clusters (Hierachial))') 38 | plt.xlabel('Income (k$)') 39 | plt.ylabel('Spending Score (1-100)') 40 | plt.legend() 41 | plt.show() -------------------------------------------------------------------------------- /Part 5 - Association Rule Learning/Section 16 - Apriori/apriori.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sun Sep 9 20:26:28 2018 4 | 5 | @author: Mohammad Doosti Lakhani 6 | """ 7 | 8 | 9 | 10 | # import libraries 11 | import numpy as np 12 | import pandas as pd 13 | import os 14 | import sys 15 | 16 | scriptpath = "../../Tools" # functions of acc and CAP 17 | # Add the directory containing your module to the Python path 18 | sys.path.append(os.path.abspath(scriptpath)) 19 | 20 | 21 | 22 | # Importing dataset 23 | dataset = pd.read_csv('Market_Basket_Optimisation.csv',names=np.arange(1,21)) 24 | 25 | # Preprocesing data to fit model 26 | transactions = [] 27 | for sublist in dataset.values.tolist(): 28 | clean_sublist = [item for item in sublist if item is not np.nan] 29 | transactions.append(clean_sublist) # remove 'nan' values # https://github.com/rasbt/mlxtend/issues/433 30 | 31 | from mlxtend.preprocessing import TransactionEncoder 32 | te = TransactionEncoder() 33 | te_ary = te.fit(transactions).transform(transactions) 34 | df_x = pd.DataFrame(te_ary, columns=te.columns_) # encode to onehot 35 | 36 | 37 | # ref = https://github.com/ymoch/apyori/blob/master/apyori.py 38 | from apyori import apriori 39 | rules = apriori(transactions= transactions, min_support= 0.003, min_confidence= 0.2, in_lift = 3, min_length = 2) 40 | result = list(rules) 41 | 42 | # Train model using Apiori algorithm 43 | # ref = https://rasbt.github.io/mlxtend/api_subpackages/mlxtend.frequent_patterns/ 44 | from mlxtend.frequent_patterns import apriori 45 | from mlxtend.frequent_patterns import association_rules 46 | df_sets = apriori(df_x, min_support=0.005, use_colnames=True) 47 | df_rules = association_rules(df_sets,metric='lift',min_threshold= 3) 48 | -------------------------------------------------------------------------------- /Part 5 - Association Rule Learning/Section 17 - Eclat/eclat.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Tue Sep 11 12:55:49 2018 4 | 5 | @author: Mohammad Doosti Lakhani 6 | """ 7 | 8 | # import libraries 9 | import numpy as np 10 | import pandas as pd 11 | 12 | # Importing dataset 13 | dataset = pd.read_csv('Market_Basket_Optimisation.csv',names=np.arange(1,21)) 14 | 15 | # Preprocesing data to fit model 16 | transactions = [] 17 | for sublist in dataset.values.tolist(): 18 | clean_sublist = [item for item in sublist if item is not np.nan] 19 | transactions.append(clean_sublist) # remove 'nan' values # https://github.com/rasbt/mlxtend/issues/433 20 | 21 | from mlxtend.preprocessing import TransactionEncoder 22 | te = TransactionEncoder() 23 | te_ary = te.fit(transactions).transform(transactions) 24 | df_x = pd.DataFrame(te_ary, columns=te.columns_) # encode to onehot 25 | 26 | # Train model using Apiori algorithm 27 | # ref = https://rasbt.github.io/mlxtend/api_subpackages/mlxtend.frequent_patterns/ 28 | from mlxtend.frequent_patterns import apriori 29 | from mlxtend.frequent_patterns import association_rules 30 | df_sets = apriori(df_x, min_support=0.005, use_colnames=True) 31 | df_rules = association_rules(df_sets,metric='support',min_threshold= 0.005,support_only=True) 32 | # if you use only "support", it called "ECLAT" -------------------------------------------------------------------------------- /Part 6 - Reinforcement Learning/Section 18 - Upper Confidence Bound (UCB)/ucb.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Tue Sep 11 17:00:29 2018 4 | 5 | @author: Mohammad Doosti Lakhani 6 | """ 7 | 8 | # import libraries 9 | import numpy as np 10 | import pandas as pd 11 | import matplotlib.pyplot as plt 12 | import math 13 | 14 | 15 | # Importing dataset 16 | dataset = pd.read_csv('Ads_CTR_Optimisation.csv') 17 | 18 | # Implementing Upper Bound Confidence 19 | m,n = dataset.shape 20 | 21 | ad_selected = np.zeros(m) # selected ads by users 22 | 23 | N = np.zeros(n,dtype=np.float16) # number of selection of ad i 24 | R = np.zeros(n,dtype=np.float16) # sum of reward of ad i 25 | 26 | total_reward =0 27 | 28 | # implementation in vectorized form 29 | for i in range(0,m): 30 | max_index = 0 31 | r = R / N 32 | delta = np.sqrt(3/2 * math.log(i + 1) / N) 33 | upper_bound = r + delta 34 | max_index = np.argmax(upper_bound) 35 | ad_selected[i] = max_index 36 | N[max_index] += 1 37 | reward = dataset.values[i,max_index] 38 | R[max_index] += reward 39 | total_reward+= reward 40 | 41 | # Visualizing selections 42 | plt.hist(ad_selected) 43 | plt.show() -------------------------------------------------------------------------------- /Part 6 - Reinforcement Learning/Section 19 - Thompson Sampling/Thompson_Sampling_Slide.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nikronic/Machine-Learning-Models/9fb48463ee2211eec800b2436699508f55d5ee28/Part 6 - Reinforcement Learning/Section 19 - Thompson Sampling/Thompson_Sampling_Slide.png -------------------------------------------------------------------------------- /Part 6 - Reinforcement Learning/Section 19 - Thompson Sampling/ts.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Mon Sep 17 18:42:25 2018 4 | 5 | @author: Mohammad Doosti Lakhani 6 | """ 7 | 8 | # import libraries 9 | import numpy as np 10 | import pandas as pd 11 | import matplotlib.pyplot as plt 12 | 13 | # Importing dataset 14 | dataset = pd.read_csv('Ads_CTR_Optimisation.csv') 15 | 16 | # Implementing Thomson Sampling 17 | m,n = dataset.shape 18 | 19 | ad_selected = np.zeros(m) # selected ads by users 20 | 21 | N1 = np.zeros(n,dtype=np.float16) # ad i got reward count 22 | N0 = np.zeros(n,dtype=np.float16) #ad i did not get reward count 23 | 24 | total_reward = 0 25 | 26 | # Implementation in vectorized form 27 | for i in range(0,m): 28 | max_index = 0 29 | theta = np.random.beta(N1+1,N0+1) 30 | max_index = np.argmax(theta) 31 | ad_selected[i] = max_index 32 | reward = dataset.values[i,max_index] 33 | if reward == 1: 34 | N1[max_index] += 1 35 | else: 36 | N0[max_index] += 1 37 | total_reward+= reward 38 | 39 | # Visualizing selections 40 | plt.hist(ad_selected) 41 | plt.show() -------------------------------------------------------------------------------- /Part 7 - Natural Language Processing/Section 20 - Natural Language Processing/nlp_naive-bayes.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Mon Sep 17 21:44:06 2018 4 | 5 | @author: Mohammad Doosti Lakhani 6 | """ 7 | 8 | # Importing the libraries 9 | import pandas as pd 10 | 11 | # Importing the dataset 12 | dataset = pd.read_csv('Restaurant_Reviews.tsv', delimiter = '\t', quoting = 3) # remove quoting marks 13 | x = dataset.iloc[:,0].values 14 | y = dataset.iloc[:,1].values 15 | 16 | # Cleaning the Text 17 | import re 18 | from nltk.corpus import stopwords # removing useless words 19 | from nltk.stem.porter import PorterStemmer # getting root of words 20 | stopwords_list = stopwords.words('english') 21 | ps = PorterStemmer() 22 | 23 | corpus = [] 24 | for i in range(0,len(x)): 25 | corp = re.sub(pattern = '[^a-zA-Z]', repl = ' ', string = x[i]) 26 | corp = corp.split() 27 | corp = [ps.stem(word) for word in corp] 28 | corp = ' '.join(corp) 29 | corpus.append(corp) 30 | 31 | # Creating the Bag of Words model 32 | from sklearn.feature_extraction.text import CountVectorizer # text to column vector os sparse matrix 33 | cv = CountVectorizer(lowercase=True, stop_words = stopwords_list) # ignoring useless words and to lowercase 34 | x_sparse = cv.fit_transform(corpus) # bag of word sparse matrix 35 | x = x_sparse.todense() # Convert sparse matrix to dense matrix 36 | 37 | # Splitting dataset into Train set and Test set 38 | from sklearn.model_selection import train_test_split 39 | x_train,x_test,y_train,y_test = train_test_split(x,y, train_size = 0.85 , random_state=0) 40 | 41 | # Fitting the Naive Bayes (Gausiian form) model to the train set 42 | from sklearn.naive_bayes import GaussianNB 43 | classifier = GaussianNB() 44 | classifier = classifier.fit(x_train,y_train) 45 | 46 | # Make the prediction on train set 47 | y_train_pred = classifier.predict(x_train) 48 | 49 | # Make the prediction on train set 50 | y_test_pred = classifier.predict(x_test) 51 | 52 | # Acurracy on test and train set 53 | from sklearn.metrics import confusion_matrix 54 | cm_train = confusion_matrix(y_train,y_train_pred) 55 | cm_test = confusion_matrix(y_test,y_test_pred) 56 | 57 | import os 58 | import sys 59 | 60 | scriptpath = "../../Tools" # functions of acc and CAP 61 | # Add the directory containing your module to the Python path 62 | sys.path.append(os.path.abspath(scriptpath)) 63 | import accuracy as ac 64 | 65 | t_train,f_train,acc_train = ac.accuracy_on_cm(cm_train) 66 | print('Train status = #{} True, #{} False, %{} Accuracy'.format(t_train,f_train,acc_train*100)) 67 | 68 | t_test,f_test,acc_test = ac.accuracy_on_cm(cm_test) 69 | print('Test status = #{} True, #{} False, %{} Accuracy'.format(t_test,f_test,acc_test*100)) -------------------------------------------------------------------------------- /Part 7 - Natural Language Processing/Section 20 - Natural Language Processing/nlp_random-forest.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Tue Sep 18 17:56:22 2018 4 | 5 | @author: Mohammad Doosti Lakhani 6 | """ 7 | 8 | # -*- coding: utf-8 -*- 9 | """ 10 | Created on Mon Sep 17 21:44:06 2018 11 | 12 | @author: Mohammad Doosti Lakhani 13 | """ 14 | 15 | # Importing the libraries 16 | import pandas as pd 17 | 18 | # Importing the dataset 19 | dataset = pd.read_csv('Restaurant_Reviews.tsv', delimiter = '\t', quoting = 3) # remove quoting marks 20 | x = dataset.iloc[:,0].values 21 | y = dataset.iloc[:,1].values 22 | 23 | # Cleaning the Text 24 | import re 25 | from nltk.corpus import stopwords # removing useless words 26 | from nltk.stem.porter import PorterStemmer # getting root of words 27 | stopwords_list = stopwords.words('english') 28 | ps = PorterStemmer() 29 | 30 | corpus = [] 31 | for i in range(0,len(x)): 32 | corp = re.sub(pattern = '[^a-zA-Z]', repl = ' ', string = x[i]) 33 | corp = corp.split() 34 | corp = [ps.stem(word) for word in corp] 35 | corp = ' '.join(corp) 36 | corpus.append(corp) 37 | 38 | # Creating the Bag of Words model 39 | from sklearn.feature_extraction.text import CountVectorizer # text to column vector os sparse matrix 40 | cv = CountVectorizer(lowercase=True, stop_words = stopwords_list) # ignoring useless words and to lowercase 41 | x_sparse = cv.fit_transform(corpus) # bag of word sparse matrix 42 | x = x_sparse.todense() # Convert sparse matrix to dense matrix 43 | 44 | # Splitting dataset into Train set and Test set 45 | from sklearn.model_selection import train_test_split 46 | x_train,x_test,y_train,y_test = train_test_split(x,y, train_size = 0.85 , random_state=8) 47 | 48 | # Fitting the Naive Bayes (Gausiian form) model to the train set 49 | from sklearn.ensemble import RandomForestClassifier 50 | classifier = RandomForestClassifier(n_estimators=12,criterion='entropy',random_state=0) 51 | classifier = classifier.fit(x_train,y_train) 52 | 53 | # Make the prediction on train set 54 | y_train_pred = classifier.predict(x_train) 55 | 56 | # Make the prediction on train set 57 | y_test_pred = classifier.predict(x_test) 58 | 59 | # Acurracy on test and train set 60 | from sklearn.metrics import confusion_matrix 61 | cm_train = confusion_matrix(y_train,y_train_pred) 62 | cm_test = confusion_matrix(y_test,y_test_pred) 63 | 64 | import os 65 | import sys 66 | 67 | scriptpath = "../../Tools" # functions of acc 68 | # Add the directory containing your module to the Python path 69 | sys.path.append(os.path.abspath(scriptpath)) 70 | import accuracy as ac 71 | 72 | t_train,f_train,acc_train = ac.accuracy_on_cm(cm_train) 73 | print('Train status = #{} True, #{} False, %{} Accuracy'.format(t_train,f_train,acc_train*100)) 74 | 75 | t_test,f_test,acc_test = ac.accuracy_on_cm(cm_test) 76 | print('Test status = #{} True, #{} False, %{} Accuracy'.format(t_test,f_test,acc_test*100)) -------------------------------------------------------------------------------- /Part 8 - Deep Learning/Section 21 - Artificial Neural Networks (ANN)/ann.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Tue Sep 18 20:45:13 2018 4 | 5 | @author: Mohammad Doosti Lakhani 6 | """ 7 | 8 | # Importing Library 9 | import pandas as pd 10 | 11 | # Import Dataset 12 | dataset = pd.read_csv('Churn_Modelling.csv') 13 | x = dataset.iloc[:, 3:-1].values 14 | y = dataset.iloc[:, -1].values 15 | 16 | # Encoding categorical data to one hot encoded form 17 | from sklearn.preprocessing import LabelEncoder, OneHotEncoder 18 | labelencoder_country = LabelEncoder() 19 | x[:, 1] = labelencoder_country.fit_transform(x[:, 1]) # Encoding 'Geography' to numbers 20 | 21 | labelencoder_gender = LabelEncoder() 22 | x[:, 2] = labelencoder_gender.fit_transform(x[:, 2]) # Encoding 'Gender' to numbers 23 | 24 | onehotencoder = OneHotEncoder(categorical_features = [1]) 25 | x = onehotencoder.fit_transform(x).todense() # Encoding 'Geography' to one hot 26 | 27 | x = x[:, 1:] # get rid of dummy variable trap 28 | 29 | # Feature Scaling 30 | from sklearn.preprocessing import StandardScaler 31 | sc = StandardScaler() 32 | x = sc.fit_transform(x) 33 | 34 | from sklearn.preprocessing import MinMaxScaler 35 | scalar = MinMaxScaler() 36 | x= scalar.fit_transform(x) 37 | 38 | # Splitting the dataset into the Training set and Test set 39 | from sklearn.model_selection import train_test_split 40 | x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2) 41 | 42 | # Importing Library 43 | from keras.models import Sequential 44 | from keras.layers import Dense 45 | 46 | # Building Model Structure 47 | model = Sequential() 48 | model.add(Dense(units = 22, activation='relu', input_dim = 11)) # Input layer and first hidden layer 49 | # when we assing `input_dim = 11`, we actually creating input layer 50 | 51 | model.add(Dense(units = 22, activation='relu')) # Second hidden layer 52 | model.add(Dense(units = 1, activation='sigmoid')) # Output layer 53 | 54 | model.compile(loss = 'binary_crossentropy', optimizer='adam', metrics = ['accuracy']) 55 | 56 | # Fitting fully connnected NN to the Training set 57 | model.fit(x_train, y_train, batch_size = 25, epochs = 200) 58 | 59 | # Predicting on the Test set 60 | y_pred = model.predict(x_test) 61 | y_pred = (y_pred > 0.5) 62 | 63 | # Get acurracy on Test set 64 | from sklearn.metrics import confusion_matrix 65 | cm_test = confusion_matrix(y_test, y_pred) 66 | 67 | import os 68 | import sys 69 | 70 | scriptpath = "../../Tools" # functions of acc and CAP 71 | # Add the directory containing your module to the Python path 72 | sys.path.append(os.path.abspath(scriptpath)) 73 | import accuracy as ac 74 | 75 | t_test,f_test,acc_test = ac.accuracy_on_cm(cm_test) 76 | print('Test status = #{} True, #{} False, %{} Accuracy'.format(t_test,f_test,acc_test*100)) 77 | 78 | -------------------------------------------------------------------------------- /Part 8 - Deep Learning/Section 22 - Convolutional Neural Networks (CNN)/cnn.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Mon Sep 24 22:48:03 2018 4 | 5 | @author: Mohammad Doosti Lakhani 6 | """ 7 | 8 | 9 | """Important Note: This implementation take about hours on CPU. Use GPU or colab.research.google.com""" 10 | 11 | from keras.models import Sequential 12 | from keras.layers import Conv2D 13 | from keras.layers import MaxPool2D 14 | from keras.layers import Dense 15 | from keras.layers import Flatten 16 | 17 | 18 | # Preprocessing 19 | """In this step, we just need to put training and test files to the folders with below structure : 20 | -test_set 21 | ->class1 22 | ->class2 23 | ->... 24 | -training_set 25 | ->class1 26 | ->class2 27 | ->... 28 | """ 29 | 30 | 31 | # Defining some control parameters 32 | image_x = 128 33 | image_y = 128 34 | image_channels = 3 35 | 36 | training_set_path = 'dataset/training_set' 37 | test_set_path = 'dataset/test_set' 38 | 39 | model = Sequential() # Building sequential model 40 | 41 | # First layer of convolution 42 | model.add(Conv2D(64, kernel_size=(5,5), strides=(1,1), activation='relu', input_shape=(image_x,image_y,image_channels))) 43 | model.add(MaxPool2D(strides=(2,2), pool_size=(2,2))) 44 | 45 | # Second layer of convolution 46 | model.add(Conv2D(64, kernel_size=(5,5), strides=(1,1), activation='relu')) 47 | model.add(MaxPool2D(strides=(2,2), pool_size=(2,2))) 48 | 49 | # Flatten convolved tensors for feeding as input to Fully Connected layer 50 | model.add(Flatten()) 51 | 52 | # First Hidden layer of fully connected 53 | model.add(Dense(units=256, activation='relu')) 54 | 55 | # Second Hidden layer of fully connected 56 | model.add(Dense(units=1, activation='sigmoid')) 57 | 58 | # Compiling the model 59 | model.compile(optimizer='adam', metrics=['accuracy'], loss='binary_crossentropy') 60 | 61 | # Data Augmentation 62 | # Because we have small dataset, to prevent overfitting and train better, we use this method 63 | # We do augmentation on both test_set and training_set 64 | 65 | from keras.preprocessing.image import ImageDataGenerator 66 | 67 | train_datagen = ImageDataGenerator(rescale = 1./255, shear_range = 0.2, zoom_range = 0.2, horizontal_flip = True) 68 | test_datagen = ImageDataGenerator(rescale = 1./255) 69 | training_set = train_datagen.flow_from_directory(training_set_path, target_size = (image_x, image_y), batch_size = 32, class_mode = 'binary') 70 | test_set = test_datagen.flow_from_directory(test_set_path, target_size = (image_x, image_y), batch_size = 32, class_mode = 'binary') 71 | 72 | # Firring model with original and augmented data 73 | model.fit_generator(training_set, steps_per_epoch = 8000, epochs = 25, validation_data = test_set, validation_steps = 2000) 74 | 75 | -------------------------------------------------------------------------------- /Part 9 - Dimensionality Reduction/Section 23 - Principal Component Analysis (PCA)/Wine.csv: -------------------------------------------------------------------------------- 1 | Alcohol,Malic_Acid,Ash,Ash_Alcanity,Magnesium,Total_Phenols,Flavanoids,Nonflavanoid_Phenols,Proanthocyanins,Color_Intensity,Hue,OD280,Proline,Customer_Segment 2 | 14.23,1.71,2.43,15.6,127,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065,1 3 | 13.2,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050,1 4 | 13.16,2.36,2.67,18.6,101,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185,1 5 | 14.37,1.95,2.5,16.8,113,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480,1 6 | 13.24,2.59,2.87,21,118,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735,1 7 | 14.2,1.76,2.45,15.2,112,3.27,3.39,0.34,1.97,6.75,1.05,2.85,1450,1 8 | 14.39,1.87,2.45,14.6,96,2.5,2.52,0.3,1.98,5.25,1.02,3.58,1290,1 9 | 14.06,2.15,2.61,17.6,121,2.6,2.51,0.31,1.25,5.05,1.06,3.58,1295,1 10 | 14.83,1.64,2.17,14,97,2.8,2.98,0.29,1.98,5.2,1.08,2.85,1045,1 11 | 13.86,1.35,2.27,16,98,2.98,3.15,0.22,1.85,7.22,1.01,3.55,1045,1 12 | 14.1,2.16,2.3,18,105,2.95,3.32,0.22,2.38,5.75,1.25,3.17,1510,1 13 | 14.12,1.48,2.32,16.8,95,2.2,2.43,0.26,1.57,5,1.17,2.82,1280,1 14 | 13.75,1.73,2.41,16,89,2.6,2.76,0.29,1.81,5.6,1.15,2.9,1320,1 15 | 14.75,1.73,2.39,11.4,91,3.1,3.69,0.43,2.81,5.4,1.25,2.73,1150,1 16 | 14.38,1.87,2.38,12,102,3.3,3.64,0.29,2.96,7.5,1.2,3,1547,1 17 | 13.63,1.81,2.7,17.2,112,2.85,2.91,0.3,1.46,7.3,1.28,2.88,1310,1 18 | 14.3,1.92,2.72,20,120,2.8,3.14,0.33,1.97,6.2,1.07,2.65,1280,1 19 | 13.83,1.57,2.62,20,115,2.95,3.4,0.4,1.72,6.6,1.13,2.57,1130,1 20 | 14.19,1.59,2.48,16.5,108,3.3,3.93,0.32,1.86,8.7,1.23,2.82,1680,1 21 | 13.64,3.1,2.56,15.2,116,2.7,3.03,0.17,1.66,5.1,0.96,3.36,845,1 22 | 14.06,1.63,2.28,16,126,3,3.17,0.24,2.1,5.65,1.09,3.71,780,1 23 | 12.93,3.8,2.65,18.6,102,2.41,2.41,0.25,1.98,4.5,1.03,3.52,770,1 24 | 13.71,1.86,2.36,16.6,101,2.61,2.88,0.27,1.69,3.8,1.11,4,1035,1 25 | 12.85,1.6,2.52,17.8,95,2.48,2.37,0.26,1.46,3.93,1.09,3.63,1015,1 26 | 13.5,1.81,2.61,20,96,2.53,2.61,0.28,1.66,3.52,1.12,3.82,845,1 27 | 13.05,2.05,3.22,25,124,2.63,2.68,0.47,1.92,3.58,1.13,3.2,830,1 28 | 13.39,1.77,2.62,16.1,93,2.85,2.94,0.34,1.45,4.8,0.92,3.22,1195,1 29 | 13.3,1.72,2.14,17,94,2.4,2.19,0.27,1.35,3.95,1.02,2.77,1285,1 30 | 13.87,1.9,2.8,19.4,107,2.95,2.97,0.37,1.76,4.5,1.25,3.4,915,1 31 | 14.02,1.68,2.21,16,96,2.65,2.33,0.26,1.98,4.7,1.04,3.59,1035,1 32 | 13.73,1.5,2.7,22.5,101,3,3.25,0.29,2.38,5.7,1.19,2.71,1285,1 33 | 13.58,1.66,2.36,19.1,106,2.86,3.19,0.22,1.95,6.9,1.09,2.88,1515,1 34 | 13.68,1.83,2.36,17.2,104,2.42,2.69,0.42,1.97,3.84,1.23,2.87,990,1 35 | 13.76,1.53,2.7,19.5,132,2.95,2.74,0.5,1.35,5.4,1.25,3,1235,1 36 | 13.51,1.8,2.65,19,110,2.35,2.53,0.29,1.54,4.2,1.1,2.87,1095,1 37 | 13.48,1.81,2.41,20.5,100,2.7,2.98,0.26,1.86,5.1,1.04,3.47,920,1 38 | 13.28,1.64,2.84,15.5,110,2.6,2.68,0.34,1.36,4.6,1.09,2.78,880,1 39 | 13.05,1.65,2.55,18,98,2.45,2.43,0.29,1.44,4.25,1.12,2.51,1105,1 40 | 13.07,1.5,2.1,15.5,98,2.4,2.64,0.28,1.37,3.7,1.18,2.69,1020,1 41 | 14.22,3.99,2.51,13.2,128,3,3.04,0.2,2.08,5.1,0.89,3.53,760,1 42 | 13.56,1.71,2.31,16.2,117,3.15,3.29,0.34,2.34,6.13,0.95,3.38,795,1 43 | 13.41,3.84,2.12,18.8,90,2.45,2.68,0.27,1.48,4.28,0.91,3,1035,1 44 | 13.88,1.89,2.59,15,101,3.25,3.56,0.17,1.7,5.43,0.88,3.56,1095,1 45 | 13.24,3.98,2.29,17.5,103,2.64,2.63,0.32,1.66,4.36,0.82,3,680,1 46 | 13.05,1.77,2.1,17,107,3,3,0.28,2.03,5.04,0.88,3.35,885,1 47 | 14.21,4.04,2.44,18.9,111,2.85,2.65,0.3,1.25,5.24,0.87,3.33,1080,1 48 | 14.38,3.59,2.28,16,102,3.25,3.17,0.27,2.19,4.9,1.04,3.44,1065,1 49 | 13.9,1.68,2.12,16,101,3.1,3.39,0.21,2.14,6.1,0.91,3.33,985,1 50 | 14.1,2.02,2.4,18.8,103,2.75,2.92,0.32,2.38,6.2,1.07,2.75,1060,1 51 | 13.94,1.73,2.27,17.4,108,2.88,3.54,0.32,2.08,8.9,1.12,3.1,1260,1 52 | 13.05,1.73,2.04,12.4,92,2.72,3.27,0.17,2.91,7.2,1.12,2.91,1150,1 53 | 13.83,1.65,2.6,17.2,94,2.45,2.99,0.22,2.29,5.6,1.24,3.37,1265,1 54 | 13.82,1.75,2.42,14,111,3.88,3.74,0.32,1.87,7.05,1.01,3.26,1190,1 55 | 13.77,1.9,2.68,17.1,115,3,2.79,0.39,1.68,6.3,1.13,2.93,1375,1 56 | 13.74,1.67,2.25,16.4,118,2.6,2.9,0.21,1.62,5.85,0.92,3.2,1060,1 57 | 13.56,1.73,2.46,20.5,116,2.96,2.78,0.2,2.45,6.25,0.98,3.03,1120,1 58 | 14.22,1.7,2.3,16.3,118,3.2,3,0.26,2.03,6.38,0.94,3.31,970,1 59 | 13.29,1.97,2.68,16.8,102,3,3.23,0.31,1.66,6,1.07,2.84,1270,1 60 | 13.72,1.43,2.5,16.7,108,3.4,3.67,0.19,2.04,6.8,0.89,2.87,1285,1 61 | 12.37,0.94,1.36,10.6,88,1.98,0.57,0.28,0.42,1.95,1.05,1.82,520,2 62 | 12.33,1.1,2.28,16,101,2.05,1.09,0.63,0.41,3.27,1.25,1.67,680,2 63 | 12.64,1.36,2.02,16.8,100,2.02,1.41,0.53,0.62,5.75,0.98,1.59,450,2 64 | 13.67,1.25,1.92,18,94,2.1,1.79,0.32,0.73,3.8,1.23,2.46,630,2 65 | 12.37,1.13,2.16,19,87,3.5,3.1,0.19,1.87,4.45,1.22,2.87,420,2 66 | 12.17,1.45,2.53,19,104,1.89,1.75,0.45,1.03,2.95,1.45,2.23,355,2 67 | 12.37,1.21,2.56,18.1,98,2.42,2.65,0.37,2.08,4.6,1.19,2.3,678,2 68 | 13.11,1.01,1.7,15,78,2.98,3.18,0.26,2.28,5.3,1.12,3.18,502,2 69 | 12.37,1.17,1.92,19.6,78,2.11,2,0.27,1.04,4.68,1.12,3.48,510,2 70 | 13.34,0.94,2.36,17,110,2.53,1.3,0.55,0.42,3.17,1.02,1.93,750,2 71 | 12.21,1.19,1.75,16.8,151,1.85,1.28,0.14,2.5,2.85,1.28,3.07,718,2 72 | 12.29,1.61,2.21,20.4,103,1.1,1.02,0.37,1.46,3.05,0.906,1.82,870,2 73 | 13.86,1.51,2.67,25,86,2.95,2.86,0.21,1.87,3.38,1.36,3.16,410,2 74 | 13.49,1.66,2.24,24,87,1.88,1.84,0.27,1.03,3.74,0.98,2.78,472,2 75 | 12.99,1.67,2.6,30,139,3.3,2.89,0.21,1.96,3.35,1.31,3.5,985,2 76 | 11.96,1.09,2.3,21,101,3.38,2.14,0.13,1.65,3.21,0.99,3.13,886,2 77 | 11.66,1.88,1.92,16,97,1.61,1.57,0.34,1.15,3.8,1.23,2.14,428,2 78 | 13.03,0.9,1.71,16,86,1.95,2.03,0.24,1.46,4.6,1.19,2.48,392,2 79 | 11.84,2.89,2.23,18,112,1.72,1.32,0.43,0.95,2.65,0.96,2.52,500,2 80 | 12.33,0.99,1.95,14.8,136,1.9,1.85,0.35,2.76,3.4,1.06,2.31,750,2 81 | 12.7,3.87,2.4,23,101,2.83,2.55,0.43,1.95,2.57,1.19,3.13,463,2 82 | 12,0.92,2,19,86,2.42,2.26,0.3,1.43,2.5,1.38,3.12,278,2 83 | 12.72,1.81,2.2,18.8,86,2.2,2.53,0.26,1.77,3.9,1.16,3.14,714,2 84 | 12.08,1.13,2.51,24,78,2,1.58,0.4,1.4,2.2,1.31,2.72,630,2 85 | 13.05,3.86,2.32,22.5,85,1.65,1.59,0.61,1.62,4.8,0.84,2.01,515,2 86 | 11.84,0.89,2.58,18,94,2.2,2.21,0.22,2.35,3.05,0.79,3.08,520,2 87 | 12.67,0.98,2.24,18,99,2.2,1.94,0.3,1.46,2.62,1.23,3.16,450,2 88 | 12.16,1.61,2.31,22.8,90,1.78,1.69,0.43,1.56,2.45,1.33,2.26,495,2 89 | 11.65,1.67,2.62,26,88,1.92,1.61,0.4,1.34,2.6,1.36,3.21,562,2 90 | 11.64,2.06,2.46,21.6,84,1.95,1.69,0.48,1.35,2.8,1,2.75,680,2 91 | 12.08,1.33,2.3,23.6,70,2.2,1.59,0.42,1.38,1.74,1.07,3.21,625,2 92 | 12.08,1.83,2.32,18.5,81,1.6,1.5,0.52,1.64,2.4,1.08,2.27,480,2 93 | 12,1.51,2.42,22,86,1.45,1.25,0.5,1.63,3.6,1.05,2.65,450,2 94 | 12.69,1.53,2.26,20.7,80,1.38,1.46,0.58,1.62,3.05,0.96,2.06,495,2 95 | 12.29,2.83,2.22,18,88,2.45,2.25,0.25,1.99,2.15,1.15,3.3,290,2 96 | 11.62,1.99,2.28,18,98,3.02,2.26,0.17,1.35,3.25,1.16,2.96,345,2 97 | 12.47,1.52,2.2,19,162,2.5,2.27,0.32,3.28,2.6,1.16,2.63,937,2 98 | 11.81,2.12,2.74,21.5,134,1.6,0.99,0.14,1.56,2.5,0.95,2.26,625,2 99 | 12.29,1.41,1.98,16,85,2.55,2.5,0.29,1.77,2.9,1.23,2.74,428,2 100 | 12.37,1.07,2.1,18.5,88,3.52,3.75,0.24,1.95,4.5,1.04,2.77,660,2 101 | 12.29,3.17,2.21,18,88,2.85,2.99,0.45,2.81,2.3,1.42,2.83,406,2 102 | 12.08,2.08,1.7,17.5,97,2.23,2.17,0.26,1.4,3.3,1.27,2.96,710,2 103 | 12.6,1.34,1.9,18.5,88,1.45,1.36,0.29,1.35,2.45,1.04,2.77,562,2 104 | 12.34,2.45,2.46,21,98,2.56,2.11,0.34,1.31,2.8,0.8,3.38,438,2 105 | 11.82,1.72,1.88,19.5,86,2.5,1.64,0.37,1.42,2.06,0.94,2.44,415,2 106 | 12.51,1.73,1.98,20.5,85,2.2,1.92,0.32,1.48,2.94,1.04,3.57,672,2 107 | 12.42,2.55,2.27,22,90,1.68,1.84,0.66,1.42,2.7,0.86,3.3,315,2 108 | 12.25,1.73,2.12,19,80,1.65,2.03,0.37,1.63,3.4,1,3.17,510,2 109 | 12.72,1.75,2.28,22.5,84,1.38,1.76,0.48,1.63,3.3,0.88,2.42,488,2 110 | 12.22,1.29,1.94,19,92,2.36,2.04,0.39,2.08,2.7,0.86,3.02,312,2 111 | 11.61,1.35,2.7,20,94,2.74,2.92,0.29,2.49,2.65,0.96,3.26,680,2 112 | 11.46,3.74,1.82,19.5,107,3.18,2.58,0.24,3.58,2.9,0.75,2.81,562,2 113 | 12.52,2.43,2.17,21,88,2.55,2.27,0.26,1.22,2,0.9,2.78,325,2 114 | 11.76,2.68,2.92,20,103,1.75,2.03,0.6,1.05,3.8,1.23,2.5,607,2 115 | 11.41,0.74,2.5,21,88,2.48,2.01,0.42,1.44,3.08,1.1,2.31,434,2 116 | 12.08,1.39,2.5,22.5,84,2.56,2.29,0.43,1.04,2.9,0.93,3.19,385,2 117 | 11.03,1.51,2.2,21.5,85,2.46,2.17,0.52,2.01,1.9,1.71,2.87,407,2 118 | 11.82,1.47,1.99,20.8,86,1.98,1.6,0.3,1.53,1.95,0.95,3.33,495,2 119 | 12.42,1.61,2.19,22.5,108,2,2.09,0.34,1.61,2.06,1.06,2.96,345,2 120 | 12.77,3.43,1.98,16,80,1.63,1.25,0.43,0.83,3.4,0.7,2.12,372,2 121 | 12,3.43,2,19,87,2,1.64,0.37,1.87,1.28,0.93,3.05,564,2 122 | 11.45,2.4,2.42,20,96,2.9,2.79,0.32,1.83,3.25,0.8,3.39,625,2 123 | 11.56,2.05,3.23,28.5,119,3.18,5.08,0.47,1.87,6,0.93,3.69,465,2 124 | 12.42,4.43,2.73,26.5,102,2.2,2.13,0.43,1.71,2.08,0.92,3.12,365,2 125 | 13.05,5.8,2.13,21.5,86,2.62,2.65,0.3,2.01,2.6,0.73,3.1,380,2 126 | 11.87,4.31,2.39,21,82,2.86,3.03,0.21,2.91,2.8,0.75,3.64,380,2 127 | 12.07,2.16,2.17,21,85,2.6,2.65,0.37,1.35,2.76,0.86,3.28,378,2 128 | 12.43,1.53,2.29,21.5,86,2.74,3.15,0.39,1.77,3.94,0.69,2.84,352,2 129 | 11.79,2.13,2.78,28.5,92,2.13,2.24,0.58,1.76,3,0.97,2.44,466,2 130 | 12.37,1.63,2.3,24.5,88,2.22,2.45,0.4,1.9,2.12,0.89,2.78,342,2 131 | 12.04,4.3,2.38,22,80,2.1,1.75,0.42,1.35,2.6,0.79,2.57,580,2 132 | 12.86,1.35,2.32,18,122,1.51,1.25,0.21,0.94,4.1,0.76,1.29,630,3 133 | 12.88,2.99,2.4,20,104,1.3,1.22,0.24,0.83,5.4,0.74,1.42,530,3 134 | 12.81,2.31,2.4,24,98,1.15,1.09,0.27,0.83,5.7,0.66,1.36,560,3 135 | 12.7,3.55,2.36,21.5,106,1.7,1.2,0.17,0.84,5,0.78,1.29,600,3 136 | 12.51,1.24,2.25,17.5,85,2,0.58,0.6,1.25,5.45,0.75,1.51,650,3 137 | 12.6,2.46,2.2,18.5,94,1.62,0.66,0.63,0.94,7.1,0.73,1.58,695,3 138 | 12.25,4.72,2.54,21,89,1.38,0.47,0.53,0.8,3.85,0.75,1.27,720,3 139 | 12.53,5.51,2.64,25,96,1.79,0.6,0.63,1.1,5,0.82,1.69,515,3 140 | 13.49,3.59,2.19,19.5,88,1.62,0.48,0.58,0.88,5.7,0.81,1.82,580,3 141 | 12.84,2.96,2.61,24,101,2.32,0.6,0.53,0.81,4.92,0.89,2.15,590,3 142 | 12.93,2.81,2.7,21,96,1.54,0.5,0.53,0.75,4.6,0.77,2.31,600,3 143 | 13.36,2.56,2.35,20,89,1.4,0.5,0.37,0.64,5.6,0.7,2.47,780,3 144 | 13.52,3.17,2.72,23.5,97,1.55,0.52,0.5,0.55,4.35,0.89,2.06,520,3 145 | 13.62,4.95,2.35,20,92,2,0.8,0.47,1.02,4.4,0.91,2.05,550,3 146 | 12.25,3.88,2.2,18.5,112,1.38,0.78,0.29,1.14,8.21,0.65,2,855,3 147 | 13.16,3.57,2.15,21,102,1.5,0.55,0.43,1.3,4,0.6,1.68,830,3 148 | 13.88,5.04,2.23,20,80,0.98,0.34,0.4,0.68,4.9,0.58,1.33,415,3 149 | 12.87,4.61,2.48,21.5,86,1.7,0.65,0.47,0.86,7.65,0.54,1.86,625,3 150 | 13.32,3.24,2.38,21.5,92,1.93,0.76,0.45,1.25,8.42,0.55,1.62,650,3 151 | 13.08,3.9,2.36,21.5,113,1.41,1.39,0.34,1.14,9.4,0.57,1.33,550,3 152 | 13.5,3.12,2.62,24,123,1.4,1.57,0.22,1.25,8.6,0.59,1.3,500,3 153 | 12.79,2.67,2.48,22,112,1.48,1.36,0.24,1.26,10.8,0.48,1.47,480,3 154 | 13.11,1.9,2.75,25.5,116,2.2,1.28,0.26,1.56,7.1,0.61,1.33,425,3 155 | 13.23,3.3,2.28,18.5,98,1.8,0.83,0.61,1.87,10.52,0.56,1.51,675,3 156 | 12.58,1.29,2.1,20,103,1.48,0.58,0.53,1.4,7.6,0.58,1.55,640,3 157 | 13.17,5.19,2.32,22,93,1.74,0.63,0.61,1.55,7.9,0.6,1.48,725,3 158 | 13.84,4.12,2.38,19.5,89,1.8,0.83,0.48,1.56,9.01,0.57,1.64,480,3 159 | 12.45,3.03,2.64,27,97,1.9,0.58,0.63,1.14,7.5,0.67,1.73,880,3 160 | 14.34,1.68,2.7,25,98,2.8,1.31,0.53,2.7,13,0.57,1.96,660,3 161 | 13.48,1.67,2.64,22.5,89,2.6,1.1,0.52,2.29,11.75,0.57,1.78,620,3 162 | 12.36,3.83,2.38,21,88,2.3,0.92,0.5,1.04,7.65,0.56,1.58,520,3 163 | 13.69,3.26,2.54,20,107,1.83,0.56,0.5,0.8,5.88,0.96,1.82,680,3 164 | 12.85,3.27,2.58,22,106,1.65,0.6,0.6,0.96,5.58,0.87,2.11,570,3 165 | 12.96,3.45,2.35,18.5,106,1.39,0.7,0.4,0.94,5.28,0.68,1.75,675,3 166 | 13.78,2.76,2.3,22,90,1.35,0.68,0.41,1.03,9.58,0.7,1.68,615,3 167 | 13.73,4.36,2.26,22.5,88,1.28,0.47,0.52,1.15,6.62,0.78,1.75,520,3 168 | 13.45,3.7,2.6,23,111,1.7,0.92,0.43,1.46,10.68,0.85,1.56,695,3 169 | 12.82,3.37,2.3,19.5,88,1.48,0.66,0.4,0.97,10.26,0.72,1.75,685,3 170 | 13.58,2.58,2.69,24.5,105,1.55,0.84,0.39,1.54,8.66,0.74,1.8,750,3 171 | 13.4,4.6,2.86,25,112,1.98,0.96,0.27,1.11,8.5,0.67,1.92,630,3 172 | 12.2,3.03,2.32,19,96,1.25,0.49,0.4,0.73,5.5,0.66,1.83,510,3 173 | 12.77,2.39,2.28,19.5,86,1.39,0.51,0.48,0.64,9.899999,0.57,1.63,470,3 174 | 14.16,2.51,2.48,20,91,1.68,0.7,0.44,1.24,9.7,0.62,1.71,660,3 175 | 13.71,5.65,2.45,20.5,95,1.68,0.61,0.52,1.06,7.7,0.64,1.74,740,3 176 | 13.4,3.91,2.48,23,102,1.8,0.75,0.43,1.41,7.3,0.7,1.56,750,3 177 | 13.27,4.28,2.26,20,120,1.59,0.69,0.43,1.35,10.2,0.59,1.56,835,3 178 | 13.17,2.59,2.37,20,120,1.65,0.68,0.53,1.46,9.3,0.6,1.62,840,3 179 | 14.13,4.1,2.74,24.5,96,2.05,0.76,0.56,1.35,9.2,0.61,1.6,560,3 -------------------------------------------------------------------------------- /Part 9 - Dimensionality Reduction/Section 23 - Principal Component Analysis (PCA)/pca.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Tue Sep 25 21:47:59 2018 4 | 5 | @author: Mohammad Doosti Lakhani 6 | """ 7 | 8 | # Imporing libraries 9 | import numpy as np 10 | import matplotlib.pyplot as plt 11 | import pandas as pd 12 | 13 | # Importing dataset 14 | dataset = pd.read_csv('Wine.csv') 15 | x = dataset.iloc[:,:-1].values 16 | y = dataset.iloc[:,-1].values 17 | 18 | # Feature scaling 19 | from sklearn.preprocessing import StandardScaler 20 | standardscaler_x = StandardScaler() 21 | standardscaler_x = standardscaler_x.fit(x) 22 | x = standardscaler_x.transform(x) 23 | 24 | # Applying PCA on x 25 | from sklearn.decomposition import PCA 26 | pca = PCA(n_components=2) 27 | x_pca =pca.fit_transform(x) 28 | enough_components = np.sum(pca.explained_variance_ratio_) > 0.4 # if True, 2 components is enough. 29 | # The condition is problem dependent but more than 40% if very good. 30 | 31 | 32 | # Splitting dataset into Train set and Test set 33 | from sklearn.model_selection import train_test_split 34 | x_train,x_test,y_train,y_test = train_test_split(x_pca,y, train_size = 0.8 , random_state=0) 35 | 36 | # Fitting Logistic Regression model to train data 37 | from sklearn.linear_model import LogisticRegression 38 | classifier = LogisticRegression(random_state=0, solver='liblinear') 39 | classifier = classifier.fit(x_train,y_train) 40 | 41 | # Make the prediction on train set 42 | y_train_pred = classifier.predict(x_train) 43 | 44 | # Make the prediction on train set 45 | y_test_pred = classifier.predict(x_test) 46 | 47 | # Acurracy on test and train set 48 | from sklearn.metrics import confusion_matrix 49 | cm_train = confusion_matrix(y_train,y_train_pred) 50 | cm_test = confusion_matrix(y_test,y_test_pred) 51 | 52 | import os 53 | import sys 54 | 55 | scriptpath = "../../Tools" # functions of acc and CAP 56 | # Add the directory containing your module to the Python path 57 | sys.path.append(os.path.abspath(scriptpath)) 58 | import accuracy as ac 59 | 60 | t_train,f_train,acc_train = ac.accuracy_on_cm(cm_train) 61 | print('Train status = #{} True, #{} False, %{} Accuracy'.format(t_train,f_train,acc_train*100)) 62 | 63 | t_test,f_test,acc_test = ac.accuracy_on_cm(cm_test) 64 | print('Test status = #{} True, #{} False, %{} Accuracy'.format(t_test,f_test,acc_test*100)) 65 | 66 | # Visualising the Training set results 67 | from matplotlib.colors import ListedColormap 68 | X_set, y_set = x_train, y_train 69 | X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01), 70 | np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01)) 71 | plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), 72 | alpha = 0.75, cmap = ListedColormap(('red', 'green','gray'))) 73 | plt.xlim(X1.min(), X1.max()) 74 | plt.ylim(X2.min(), X2.max()) 75 | for i, j in enumerate(np.unique(y_set)): 76 | plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], 77 | c = ListedColormap(('red', 'green','gray'))(i), label = j) 78 | plt.title('Logistic Regression (Training set)') 79 | plt.xlabel('PCA 1') 80 | plt.ylabel('PCA 2') 81 | plt.legend() 82 | plt.show() 83 | 84 | # Visualising the Test set results 85 | from matplotlib.colors import ListedColormap 86 | X_set, y_set = x_test, y_test 87 | X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01), 88 | np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01)) 89 | plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), 90 | alpha = 0.75, cmap = ListedColormap(('red', 'green','gray'))) 91 | plt.xlim(X1.min(), X1.max()) 92 | plt.ylim(X2.min(), X2.max()) 93 | for i, j in enumerate(np.unique(y_set)): 94 | plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], 95 | c = ListedColormap(('red', 'green','gray'))(i), label = j) 96 | plt.title('Logistic Regression (Test set)') 97 | plt.xlabel('PCA 1') 98 | plt.ylabel('PCA 2') 99 | plt.legend() 100 | plt.show() 101 | -------------------------------------------------------------------------------- /Part 9 - Dimensionality Reduction/Section 24 - Linear Discriminant Analysis (LDA)/Wine.csv: -------------------------------------------------------------------------------- 1 | Alcohol,Malic_Acid,Ash,Ash_Alcanity,Magnesium,Total_Phenols,Flavanoids,Nonflavanoid_Phenols,Proanthocyanins,Color_Intensity,Hue,OD280,Proline,Customer_Segment 2 | 14.23,1.71,2.43,15.6,127,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065,1 3 | 13.2,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050,1 4 | 13.16,2.36,2.67,18.6,101,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185,1 5 | 14.37,1.95,2.5,16.8,113,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480,1 6 | 13.24,2.59,2.87,21,118,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735,1 7 | 14.2,1.76,2.45,15.2,112,3.27,3.39,0.34,1.97,6.75,1.05,2.85,1450,1 8 | 14.39,1.87,2.45,14.6,96,2.5,2.52,0.3,1.98,5.25,1.02,3.58,1290,1 9 | 14.06,2.15,2.61,17.6,121,2.6,2.51,0.31,1.25,5.05,1.06,3.58,1295,1 10 | 14.83,1.64,2.17,14,97,2.8,2.98,0.29,1.98,5.2,1.08,2.85,1045,1 11 | 13.86,1.35,2.27,16,98,2.98,3.15,0.22,1.85,7.22,1.01,3.55,1045,1 12 | 14.1,2.16,2.3,18,105,2.95,3.32,0.22,2.38,5.75,1.25,3.17,1510,1 13 | 14.12,1.48,2.32,16.8,95,2.2,2.43,0.26,1.57,5,1.17,2.82,1280,1 14 | 13.75,1.73,2.41,16,89,2.6,2.76,0.29,1.81,5.6,1.15,2.9,1320,1 15 | 14.75,1.73,2.39,11.4,91,3.1,3.69,0.43,2.81,5.4,1.25,2.73,1150,1 16 | 14.38,1.87,2.38,12,102,3.3,3.64,0.29,2.96,7.5,1.2,3,1547,1 17 | 13.63,1.81,2.7,17.2,112,2.85,2.91,0.3,1.46,7.3,1.28,2.88,1310,1 18 | 14.3,1.92,2.72,20,120,2.8,3.14,0.33,1.97,6.2,1.07,2.65,1280,1 19 | 13.83,1.57,2.62,20,115,2.95,3.4,0.4,1.72,6.6,1.13,2.57,1130,1 20 | 14.19,1.59,2.48,16.5,108,3.3,3.93,0.32,1.86,8.7,1.23,2.82,1680,1 21 | 13.64,3.1,2.56,15.2,116,2.7,3.03,0.17,1.66,5.1,0.96,3.36,845,1 22 | 14.06,1.63,2.28,16,126,3,3.17,0.24,2.1,5.65,1.09,3.71,780,1 23 | 12.93,3.8,2.65,18.6,102,2.41,2.41,0.25,1.98,4.5,1.03,3.52,770,1 24 | 13.71,1.86,2.36,16.6,101,2.61,2.88,0.27,1.69,3.8,1.11,4,1035,1 25 | 12.85,1.6,2.52,17.8,95,2.48,2.37,0.26,1.46,3.93,1.09,3.63,1015,1 26 | 13.5,1.81,2.61,20,96,2.53,2.61,0.28,1.66,3.52,1.12,3.82,845,1 27 | 13.05,2.05,3.22,25,124,2.63,2.68,0.47,1.92,3.58,1.13,3.2,830,1 28 | 13.39,1.77,2.62,16.1,93,2.85,2.94,0.34,1.45,4.8,0.92,3.22,1195,1 29 | 13.3,1.72,2.14,17,94,2.4,2.19,0.27,1.35,3.95,1.02,2.77,1285,1 30 | 13.87,1.9,2.8,19.4,107,2.95,2.97,0.37,1.76,4.5,1.25,3.4,915,1 31 | 14.02,1.68,2.21,16,96,2.65,2.33,0.26,1.98,4.7,1.04,3.59,1035,1 32 | 13.73,1.5,2.7,22.5,101,3,3.25,0.29,2.38,5.7,1.19,2.71,1285,1 33 | 13.58,1.66,2.36,19.1,106,2.86,3.19,0.22,1.95,6.9,1.09,2.88,1515,1 34 | 13.68,1.83,2.36,17.2,104,2.42,2.69,0.42,1.97,3.84,1.23,2.87,990,1 35 | 13.76,1.53,2.7,19.5,132,2.95,2.74,0.5,1.35,5.4,1.25,3,1235,1 36 | 13.51,1.8,2.65,19,110,2.35,2.53,0.29,1.54,4.2,1.1,2.87,1095,1 37 | 13.48,1.81,2.41,20.5,100,2.7,2.98,0.26,1.86,5.1,1.04,3.47,920,1 38 | 13.28,1.64,2.84,15.5,110,2.6,2.68,0.34,1.36,4.6,1.09,2.78,880,1 39 | 13.05,1.65,2.55,18,98,2.45,2.43,0.29,1.44,4.25,1.12,2.51,1105,1 40 | 13.07,1.5,2.1,15.5,98,2.4,2.64,0.28,1.37,3.7,1.18,2.69,1020,1 41 | 14.22,3.99,2.51,13.2,128,3,3.04,0.2,2.08,5.1,0.89,3.53,760,1 42 | 13.56,1.71,2.31,16.2,117,3.15,3.29,0.34,2.34,6.13,0.95,3.38,795,1 43 | 13.41,3.84,2.12,18.8,90,2.45,2.68,0.27,1.48,4.28,0.91,3,1035,1 44 | 13.88,1.89,2.59,15,101,3.25,3.56,0.17,1.7,5.43,0.88,3.56,1095,1 45 | 13.24,3.98,2.29,17.5,103,2.64,2.63,0.32,1.66,4.36,0.82,3,680,1 46 | 13.05,1.77,2.1,17,107,3,3,0.28,2.03,5.04,0.88,3.35,885,1 47 | 14.21,4.04,2.44,18.9,111,2.85,2.65,0.3,1.25,5.24,0.87,3.33,1080,1 48 | 14.38,3.59,2.28,16,102,3.25,3.17,0.27,2.19,4.9,1.04,3.44,1065,1 49 | 13.9,1.68,2.12,16,101,3.1,3.39,0.21,2.14,6.1,0.91,3.33,985,1 50 | 14.1,2.02,2.4,18.8,103,2.75,2.92,0.32,2.38,6.2,1.07,2.75,1060,1 51 | 13.94,1.73,2.27,17.4,108,2.88,3.54,0.32,2.08,8.9,1.12,3.1,1260,1 52 | 13.05,1.73,2.04,12.4,92,2.72,3.27,0.17,2.91,7.2,1.12,2.91,1150,1 53 | 13.83,1.65,2.6,17.2,94,2.45,2.99,0.22,2.29,5.6,1.24,3.37,1265,1 54 | 13.82,1.75,2.42,14,111,3.88,3.74,0.32,1.87,7.05,1.01,3.26,1190,1 55 | 13.77,1.9,2.68,17.1,115,3,2.79,0.39,1.68,6.3,1.13,2.93,1375,1 56 | 13.74,1.67,2.25,16.4,118,2.6,2.9,0.21,1.62,5.85,0.92,3.2,1060,1 57 | 13.56,1.73,2.46,20.5,116,2.96,2.78,0.2,2.45,6.25,0.98,3.03,1120,1 58 | 14.22,1.7,2.3,16.3,118,3.2,3,0.26,2.03,6.38,0.94,3.31,970,1 59 | 13.29,1.97,2.68,16.8,102,3,3.23,0.31,1.66,6,1.07,2.84,1270,1 60 | 13.72,1.43,2.5,16.7,108,3.4,3.67,0.19,2.04,6.8,0.89,2.87,1285,1 61 | 12.37,0.94,1.36,10.6,88,1.98,0.57,0.28,0.42,1.95,1.05,1.82,520,2 62 | 12.33,1.1,2.28,16,101,2.05,1.09,0.63,0.41,3.27,1.25,1.67,680,2 63 | 12.64,1.36,2.02,16.8,100,2.02,1.41,0.53,0.62,5.75,0.98,1.59,450,2 64 | 13.67,1.25,1.92,18,94,2.1,1.79,0.32,0.73,3.8,1.23,2.46,630,2 65 | 12.37,1.13,2.16,19,87,3.5,3.1,0.19,1.87,4.45,1.22,2.87,420,2 66 | 12.17,1.45,2.53,19,104,1.89,1.75,0.45,1.03,2.95,1.45,2.23,355,2 67 | 12.37,1.21,2.56,18.1,98,2.42,2.65,0.37,2.08,4.6,1.19,2.3,678,2 68 | 13.11,1.01,1.7,15,78,2.98,3.18,0.26,2.28,5.3,1.12,3.18,502,2 69 | 12.37,1.17,1.92,19.6,78,2.11,2,0.27,1.04,4.68,1.12,3.48,510,2 70 | 13.34,0.94,2.36,17,110,2.53,1.3,0.55,0.42,3.17,1.02,1.93,750,2 71 | 12.21,1.19,1.75,16.8,151,1.85,1.28,0.14,2.5,2.85,1.28,3.07,718,2 72 | 12.29,1.61,2.21,20.4,103,1.1,1.02,0.37,1.46,3.05,0.906,1.82,870,2 73 | 13.86,1.51,2.67,25,86,2.95,2.86,0.21,1.87,3.38,1.36,3.16,410,2 74 | 13.49,1.66,2.24,24,87,1.88,1.84,0.27,1.03,3.74,0.98,2.78,472,2 75 | 12.99,1.67,2.6,30,139,3.3,2.89,0.21,1.96,3.35,1.31,3.5,985,2 76 | 11.96,1.09,2.3,21,101,3.38,2.14,0.13,1.65,3.21,0.99,3.13,886,2 77 | 11.66,1.88,1.92,16,97,1.61,1.57,0.34,1.15,3.8,1.23,2.14,428,2 78 | 13.03,0.9,1.71,16,86,1.95,2.03,0.24,1.46,4.6,1.19,2.48,392,2 79 | 11.84,2.89,2.23,18,112,1.72,1.32,0.43,0.95,2.65,0.96,2.52,500,2 80 | 12.33,0.99,1.95,14.8,136,1.9,1.85,0.35,2.76,3.4,1.06,2.31,750,2 81 | 12.7,3.87,2.4,23,101,2.83,2.55,0.43,1.95,2.57,1.19,3.13,463,2 82 | 12,0.92,2,19,86,2.42,2.26,0.3,1.43,2.5,1.38,3.12,278,2 83 | 12.72,1.81,2.2,18.8,86,2.2,2.53,0.26,1.77,3.9,1.16,3.14,714,2 84 | 12.08,1.13,2.51,24,78,2,1.58,0.4,1.4,2.2,1.31,2.72,630,2 85 | 13.05,3.86,2.32,22.5,85,1.65,1.59,0.61,1.62,4.8,0.84,2.01,515,2 86 | 11.84,0.89,2.58,18,94,2.2,2.21,0.22,2.35,3.05,0.79,3.08,520,2 87 | 12.67,0.98,2.24,18,99,2.2,1.94,0.3,1.46,2.62,1.23,3.16,450,2 88 | 12.16,1.61,2.31,22.8,90,1.78,1.69,0.43,1.56,2.45,1.33,2.26,495,2 89 | 11.65,1.67,2.62,26,88,1.92,1.61,0.4,1.34,2.6,1.36,3.21,562,2 90 | 11.64,2.06,2.46,21.6,84,1.95,1.69,0.48,1.35,2.8,1,2.75,680,2 91 | 12.08,1.33,2.3,23.6,70,2.2,1.59,0.42,1.38,1.74,1.07,3.21,625,2 92 | 12.08,1.83,2.32,18.5,81,1.6,1.5,0.52,1.64,2.4,1.08,2.27,480,2 93 | 12,1.51,2.42,22,86,1.45,1.25,0.5,1.63,3.6,1.05,2.65,450,2 94 | 12.69,1.53,2.26,20.7,80,1.38,1.46,0.58,1.62,3.05,0.96,2.06,495,2 95 | 12.29,2.83,2.22,18,88,2.45,2.25,0.25,1.99,2.15,1.15,3.3,290,2 96 | 11.62,1.99,2.28,18,98,3.02,2.26,0.17,1.35,3.25,1.16,2.96,345,2 97 | 12.47,1.52,2.2,19,162,2.5,2.27,0.32,3.28,2.6,1.16,2.63,937,2 98 | 11.81,2.12,2.74,21.5,134,1.6,0.99,0.14,1.56,2.5,0.95,2.26,625,2 99 | 12.29,1.41,1.98,16,85,2.55,2.5,0.29,1.77,2.9,1.23,2.74,428,2 100 | 12.37,1.07,2.1,18.5,88,3.52,3.75,0.24,1.95,4.5,1.04,2.77,660,2 101 | 12.29,3.17,2.21,18,88,2.85,2.99,0.45,2.81,2.3,1.42,2.83,406,2 102 | 12.08,2.08,1.7,17.5,97,2.23,2.17,0.26,1.4,3.3,1.27,2.96,710,2 103 | 12.6,1.34,1.9,18.5,88,1.45,1.36,0.29,1.35,2.45,1.04,2.77,562,2 104 | 12.34,2.45,2.46,21,98,2.56,2.11,0.34,1.31,2.8,0.8,3.38,438,2 105 | 11.82,1.72,1.88,19.5,86,2.5,1.64,0.37,1.42,2.06,0.94,2.44,415,2 106 | 12.51,1.73,1.98,20.5,85,2.2,1.92,0.32,1.48,2.94,1.04,3.57,672,2 107 | 12.42,2.55,2.27,22,90,1.68,1.84,0.66,1.42,2.7,0.86,3.3,315,2 108 | 12.25,1.73,2.12,19,80,1.65,2.03,0.37,1.63,3.4,1,3.17,510,2 109 | 12.72,1.75,2.28,22.5,84,1.38,1.76,0.48,1.63,3.3,0.88,2.42,488,2 110 | 12.22,1.29,1.94,19,92,2.36,2.04,0.39,2.08,2.7,0.86,3.02,312,2 111 | 11.61,1.35,2.7,20,94,2.74,2.92,0.29,2.49,2.65,0.96,3.26,680,2 112 | 11.46,3.74,1.82,19.5,107,3.18,2.58,0.24,3.58,2.9,0.75,2.81,562,2 113 | 12.52,2.43,2.17,21,88,2.55,2.27,0.26,1.22,2,0.9,2.78,325,2 114 | 11.76,2.68,2.92,20,103,1.75,2.03,0.6,1.05,3.8,1.23,2.5,607,2 115 | 11.41,0.74,2.5,21,88,2.48,2.01,0.42,1.44,3.08,1.1,2.31,434,2 116 | 12.08,1.39,2.5,22.5,84,2.56,2.29,0.43,1.04,2.9,0.93,3.19,385,2 117 | 11.03,1.51,2.2,21.5,85,2.46,2.17,0.52,2.01,1.9,1.71,2.87,407,2 118 | 11.82,1.47,1.99,20.8,86,1.98,1.6,0.3,1.53,1.95,0.95,3.33,495,2 119 | 12.42,1.61,2.19,22.5,108,2,2.09,0.34,1.61,2.06,1.06,2.96,345,2 120 | 12.77,3.43,1.98,16,80,1.63,1.25,0.43,0.83,3.4,0.7,2.12,372,2 121 | 12,3.43,2,19,87,2,1.64,0.37,1.87,1.28,0.93,3.05,564,2 122 | 11.45,2.4,2.42,20,96,2.9,2.79,0.32,1.83,3.25,0.8,3.39,625,2 123 | 11.56,2.05,3.23,28.5,119,3.18,5.08,0.47,1.87,6,0.93,3.69,465,2 124 | 12.42,4.43,2.73,26.5,102,2.2,2.13,0.43,1.71,2.08,0.92,3.12,365,2 125 | 13.05,5.8,2.13,21.5,86,2.62,2.65,0.3,2.01,2.6,0.73,3.1,380,2 126 | 11.87,4.31,2.39,21,82,2.86,3.03,0.21,2.91,2.8,0.75,3.64,380,2 127 | 12.07,2.16,2.17,21,85,2.6,2.65,0.37,1.35,2.76,0.86,3.28,378,2 128 | 12.43,1.53,2.29,21.5,86,2.74,3.15,0.39,1.77,3.94,0.69,2.84,352,2 129 | 11.79,2.13,2.78,28.5,92,2.13,2.24,0.58,1.76,3,0.97,2.44,466,2 130 | 12.37,1.63,2.3,24.5,88,2.22,2.45,0.4,1.9,2.12,0.89,2.78,342,2 131 | 12.04,4.3,2.38,22,80,2.1,1.75,0.42,1.35,2.6,0.79,2.57,580,2 132 | 12.86,1.35,2.32,18,122,1.51,1.25,0.21,0.94,4.1,0.76,1.29,630,3 133 | 12.88,2.99,2.4,20,104,1.3,1.22,0.24,0.83,5.4,0.74,1.42,530,3 134 | 12.81,2.31,2.4,24,98,1.15,1.09,0.27,0.83,5.7,0.66,1.36,560,3 135 | 12.7,3.55,2.36,21.5,106,1.7,1.2,0.17,0.84,5,0.78,1.29,600,3 136 | 12.51,1.24,2.25,17.5,85,2,0.58,0.6,1.25,5.45,0.75,1.51,650,3 137 | 12.6,2.46,2.2,18.5,94,1.62,0.66,0.63,0.94,7.1,0.73,1.58,695,3 138 | 12.25,4.72,2.54,21,89,1.38,0.47,0.53,0.8,3.85,0.75,1.27,720,3 139 | 12.53,5.51,2.64,25,96,1.79,0.6,0.63,1.1,5,0.82,1.69,515,3 140 | 13.49,3.59,2.19,19.5,88,1.62,0.48,0.58,0.88,5.7,0.81,1.82,580,3 141 | 12.84,2.96,2.61,24,101,2.32,0.6,0.53,0.81,4.92,0.89,2.15,590,3 142 | 12.93,2.81,2.7,21,96,1.54,0.5,0.53,0.75,4.6,0.77,2.31,600,3 143 | 13.36,2.56,2.35,20,89,1.4,0.5,0.37,0.64,5.6,0.7,2.47,780,3 144 | 13.52,3.17,2.72,23.5,97,1.55,0.52,0.5,0.55,4.35,0.89,2.06,520,3 145 | 13.62,4.95,2.35,20,92,2,0.8,0.47,1.02,4.4,0.91,2.05,550,3 146 | 12.25,3.88,2.2,18.5,112,1.38,0.78,0.29,1.14,8.21,0.65,2,855,3 147 | 13.16,3.57,2.15,21,102,1.5,0.55,0.43,1.3,4,0.6,1.68,830,3 148 | 13.88,5.04,2.23,20,80,0.98,0.34,0.4,0.68,4.9,0.58,1.33,415,3 149 | 12.87,4.61,2.48,21.5,86,1.7,0.65,0.47,0.86,7.65,0.54,1.86,625,3 150 | 13.32,3.24,2.38,21.5,92,1.93,0.76,0.45,1.25,8.42,0.55,1.62,650,3 151 | 13.08,3.9,2.36,21.5,113,1.41,1.39,0.34,1.14,9.4,0.57,1.33,550,3 152 | 13.5,3.12,2.62,24,123,1.4,1.57,0.22,1.25,8.6,0.59,1.3,500,3 153 | 12.79,2.67,2.48,22,112,1.48,1.36,0.24,1.26,10.8,0.48,1.47,480,3 154 | 13.11,1.9,2.75,25.5,116,2.2,1.28,0.26,1.56,7.1,0.61,1.33,425,3 155 | 13.23,3.3,2.28,18.5,98,1.8,0.83,0.61,1.87,10.52,0.56,1.51,675,3 156 | 12.58,1.29,2.1,20,103,1.48,0.58,0.53,1.4,7.6,0.58,1.55,640,3 157 | 13.17,5.19,2.32,22,93,1.74,0.63,0.61,1.55,7.9,0.6,1.48,725,3 158 | 13.84,4.12,2.38,19.5,89,1.8,0.83,0.48,1.56,9.01,0.57,1.64,480,3 159 | 12.45,3.03,2.64,27,97,1.9,0.58,0.63,1.14,7.5,0.67,1.73,880,3 160 | 14.34,1.68,2.7,25,98,2.8,1.31,0.53,2.7,13,0.57,1.96,660,3 161 | 13.48,1.67,2.64,22.5,89,2.6,1.1,0.52,2.29,11.75,0.57,1.78,620,3 162 | 12.36,3.83,2.38,21,88,2.3,0.92,0.5,1.04,7.65,0.56,1.58,520,3 163 | 13.69,3.26,2.54,20,107,1.83,0.56,0.5,0.8,5.88,0.96,1.82,680,3 164 | 12.85,3.27,2.58,22,106,1.65,0.6,0.6,0.96,5.58,0.87,2.11,570,3 165 | 12.96,3.45,2.35,18.5,106,1.39,0.7,0.4,0.94,5.28,0.68,1.75,675,3 166 | 13.78,2.76,2.3,22,90,1.35,0.68,0.41,1.03,9.58,0.7,1.68,615,3 167 | 13.73,4.36,2.26,22.5,88,1.28,0.47,0.52,1.15,6.62,0.78,1.75,520,3 168 | 13.45,3.7,2.6,23,111,1.7,0.92,0.43,1.46,10.68,0.85,1.56,695,3 169 | 12.82,3.37,2.3,19.5,88,1.48,0.66,0.4,0.97,10.26,0.72,1.75,685,3 170 | 13.58,2.58,2.69,24.5,105,1.55,0.84,0.39,1.54,8.66,0.74,1.8,750,3 171 | 13.4,4.6,2.86,25,112,1.98,0.96,0.27,1.11,8.5,0.67,1.92,630,3 172 | 12.2,3.03,2.32,19,96,1.25,0.49,0.4,0.73,5.5,0.66,1.83,510,3 173 | 12.77,2.39,2.28,19.5,86,1.39,0.51,0.48,0.64,9.899999,0.57,1.63,470,3 174 | 14.16,2.51,2.48,20,91,1.68,0.7,0.44,1.24,9.7,0.62,1.71,660,3 175 | 13.71,5.65,2.45,20.5,95,1.68,0.61,0.52,1.06,7.7,0.64,1.74,740,3 176 | 13.4,3.91,2.48,23,102,1.8,0.75,0.43,1.41,7.3,0.7,1.56,750,3 177 | 13.27,4.28,2.26,20,120,1.59,0.69,0.43,1.35,10.2,0.59,1.56,835,3 178 | 13.17,2.59,2.37,20,120,1.65,0.68,0.53,1.46,9.3,0.6,1.62,840,3 179 | 14.13,4.1,2.74,24.5,96,2.05,0.76,0.56,1.35,9.2,0.61,1.6,560,3 -------------------------------------------------------------------------------- /Part 9 - Dimensionality Reduction/Section 24 - Linear Discriminant Analysis (LDA)/lda.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Tue Sep 25 22:11:46 2018 4 | 5 | @author: Mohammad Doosti Lakhani 6 | """ 7 | 8 | # Imporing libraries 9 | import numpy as np 10 | import matplotlib.pyplot as plt 11 | import pandas as pd 12 | 13 | # Importing dataset 14 | dataset = pd.read_csv('Wine.csv') 15 | x = dataset.iloc[:,:-1].values 16 | y = dataset.iloc[:,-1].values 17 | 18 | # Feature scaling 19 | from sklearn.preprocessing import StandardScaler 20 | standardscaler_x = StandardScaler() 21 | standardscaler_x = standardscaler_x.fit(x) 22 | x = standardscaler_x.transform(x) 23 | 24 | # Applying PCA on x 25 | from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA 26 | lda = LDA(n_components=2) 27 | x_pca =lda.fit_transform(x,y) 28 | enough_components = np.sum(lda.explained_variance_ratio_) > 0.4 # if True, 2 components is enough. 29 | # The condition is problem dependent but more than 40% if very good. 30 | 31 | # Splitting dataset into Train set and Test set 32 | from sklearn.model_selection import train_test_split 33 | x_train,x_test,y_train,y_test = train_test_split(x_pca,y, train_size = 0.8 , random_state=0) 34 | 35 | # Fitting Logistic Regression model to train data 36 | from sklearn.linear_model import LogisticRegression 37 | classifier = LogisticRegression(random_state=0, solver='liblinear') 38 | classifier = classifier.fit(x_train,y_train) 39 | 40 | # Make the prediction on train set 41 | y_train_pred = classifier.predict(x_train) 42 | 43 | # Make the prediction on train set 44 | y_test_pred = classifier.predict(x_test) 45 | 46 | # Acurracy on test and train set 47 | from sklearn.metrics import confusion_matrix 48 | cm_train = confusion_matrix(y_train,y_train_pred) 49 | cm_test = confusion_matrix(y_test,y_test_pred) 50 | 51 | import os 52 | import sys 53 | 54 | scriptpath = "../../Tools" # functions of acc and CAP 55 | # Add the directory containing your module to the Python path 56 | sys.path.append(os.path.abspath(scriptpath)) 57 | import accuracy as ac 58 | 59 | t_train,f_train,acc_train = ac.accuracy_on_cm(cm_train) 60 | print('Train status = #{} True, #{} False, %{} Accuracy'.format(t_train,f_train,acc_train*100)) 61 | 62 | t_test,f_test,acc_test = ac.accuracy_on_cm(cm_test) 63 | print('Test status = #{} True, #{} False, %{} Accuracy'.format(t_test,f_test,acc_test*100)) 64 | 65 | # Visualising the Training set results 66 | from matplotlib.colors import ListedColormap 67 | X_set, y_set = x_train, y_train 68 | X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01), 69 | np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01)) 70 | plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), 71 | alpha = 0.75, cmap = ListedColormap(('red', 'green','gray'))) 72 | plt.xlim(X1.min(), X1.max()) 73 | plt.ylim(X2.min(), X2.max()) 74 | for i, j in enumerate(np.unique(y_set)): 75 | plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], 76 | c = ListedColormap(('red', 'green','gray'))(i), label = j) 77 | plt.title('Logistic Regression (Training set)') 78 | plt.xlabel('PCA 1') 79 | plt.ylabel('PCA 2') 80 | plt.legend() 81 | plt.show() 82 | 83 | # Visualising the Test set results 84 | from matplotlib.colors import ListedColormap 85 | X_set, y_set = x_test, y_test 86 | X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01), 87 | np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01)) 88 | plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), 89 | alpha = 0.75, cmap = ListedColormap(('red', 'green','gray'))) 90 | plt.xlim(X1.min(), X1.max()) 91 | plt.ylim(X2.min(), X2.max()) 92 | for i, j in enumerate(np.unique(y_set)): 93 | plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], 94 | c = ListedColormap(('red', 'green','gray'))(i), label = j) 95 | plt.title('Logistic Regression (Test set)') 96 | plt.xlabel('PCA 1') 97 | plt.ylabel('PCA 2') 98 | plt.legend() 99 | plt.show() 100 | -------------------------------------------------------------------------------- /Part 9 - Dimensionality Reduction/Section 25 - Kernel PCA/Social_Network_Ads.csv: -------------------------------------------------------------------------------- 1 | User ID,Gender,Age,EstimatedSalary,Purchased 2 | 15624510,Male,19,19000,0 3 | 15810944,Male,35,20000,0 4 | 15668575,Female,26,43000,0 5 | 15603246,Female,27,57000,0 6 | 15804002,Male,19,76000,0 7 | 15728773,Male,27,58000,0 8 | 15598044,Female,27,84000,0 9 | 15694829,Female,32,150000,1 10 | 15600575,Male,25,33000,0 11 | 15727311,Female,35,65000,0 12 | 15570769,Female,26,80000,0 13 | 15606274,Female,26,52000,0 14 | 15746139,Male,20,86000,0 15 | 15704987,Male,32,18000,0 16 | 15628972,Male,18,82000,0 17 | 15697686,Male,29,80000,0 18 | 15733883,Male,47,25000,1 19 | 15617482,Male,45,26000,1 20 | 15704583,Male,46,28000,1 21 | 15621083,Female,48,29000,1 22 | 15649487,Male,45,22000,1 23 | 15736760,Female,47,49000,1 24 | 15714658,Male,48,41000,1 25 | 15599081,Female,45,22000,1 26 | 15705113,Male,46,23000,1 27 | 15631159,Male,47,20000,1 28 | 15792818,Male,49,28000,1 29 | 15633531,Female,47,30000,1 30 | 15744529,Male,29,43000,0 31 | 15669656,Male,31,18000,0 32 | 15581198,Male,31,74000,0 33 | 15729054,Female,27,137000,1 34 | 15573452,Female,21,16000,0 35 | 15776733,Female,28,44000,0 36 | 15724858,Male,27,90000,0 37 | 15713144,Male,35,27000,0 38 | 15690188,Female,33,28000,0 39 | 15689425,Male,30,49000,0 40 | 15671766,Female,26,72000,0 41 | 15782806,Female,27,31000,0 42 | 15764419,Female,27,17000,0 43 | 15591915,Female,33,51000,0 44 | 15772798,Male,35,108000,0 45 | 15792008,Male,30,15000,0 46 | 15715541,Female,28,84000,0 47 | 15639277,Male,23,20000,0 48 | 15798850,Male,25,79000,0 49 | 15776348,Female,27,54000,0 50 | 15727696,Male,30,135000,1 51 | 15793813,Female,31,89000,0 52 | 15694395,Female,24,32000,0 53 | 15764195,Female,18,44000,0 54 | 15744919,Female,29,83000,0 55 | 15671655,Female,35,23000,0 56 | 15654901,Female,27,58000,0 57 | 15649136,Female,24,55000,0 58 | 15775562,Female,23,48000,0 59 | 15807481,Male,28,79000,0 60 | 15642885,Male,22,18000,0 61 | 15789109,Female,32,117000,0 62 | 15814004,Male,27,20000,0 63 | 15673619,Male,25,87000,0 64 | 15595135,Female,23,66000,0 65 | 15583681,Male,32,120000,1 66 | 15605000,Female,59,83000,0 67 | 15718071,Male,24,58000,0 68 | 15679760,Male,24,19000,0 69 | 15654574,Female,23,82000,0 70 | 15577178,Female,22,63000,0 71 | 15595324,Female,31,68000,0 72 | 15756932,Male,25,80000,0 73 | 15726358,Female,24,27000,0 74 | 15595228,Female,20,23000,0 75 | 15782530,Female,33,113000,0 76 | 15592877,Male,32,18000,0 77 | 15651983,Male,34,112000,1 78 | 15746737,Male,18,52000,0 79 | 15774179,Female,22,27000,0 80 | 15667265,Female,28,87000,0 81 | 15655123,Female,26,17000,0 82 | 15595917,Male,30,80000,0 83 | 15668385,Male,39,42000,0 84 | 15709476,Male,20,49000,0 85 | 15711218,Male,35,88000,0 86 | 15798659,Female,30,62000,0 87 | 15663939,Female,31,118000,1 88 | 15694946,Male,24,55000,0 89 | 15631912,Female,28,85000,0 90 | 15768816,Male,26,81000,0 91 | 15682268,Male,35,50000,0 92 | 15684801,Male,22,81000,0 93 | 15636428,Female,30,116000,0 94 | 15809823,Male,26,15000,0 95 | 15699284,Female,29,28000,0 96 | 15786993,Female,29,83000,0 97 | 15709441,Female,35,44000,0 98 | 15710257,Female,35,25000,0 99 | 15582492,Male,28,123000,1 100 | 15575694,Male,35,73000,0 101 | 15756820,Female,28,37000,0 102 | 15766289,Male,27,88000,0 103 | 15593014,Male,28,59000,0 104 | 15584545,Female,32,86000,0 105 | 15675949,Female,33,149000,1 106 | 15672091,Female,19,21000,0 107 | 15801658,Male,21,72000,0 108 | 15706185,Female,26,35000,0 109 | 15789863,Male,27,89000,0 110 | 15720943,Male,26,86000,0 111 | 15697997,Female,38,80000,0 112 | 15665416,Female,39,71000,0 113 | 15660200,Female,37,71000,0 114 | 15619653,Male,38,61000,0 115 | 15773447,Male,37,55000,0 116 | 15739160,Male,42,80000,0 117 | 15689237,Male,40,57000,0 118 | 15679297,Male,35,75000,0 119 | 15591433,Male,36,52000,0 120 | 15642725,Male,40,59000,0 121 | 15701962,Male,41,59000,0 122 | 15811613,Female,36,75000,0 123 | 15741049,Male,37,72000,0 124 | 15724423,Female,40,75000,0 125 | 15574305,Male,35,53000,0 126 | 15678168,Female,41,51000,0 127 | 15697020,Female,39,61000,0 128 | 15610801,Male,42,65000,0 129 | 15745232,Male,26,32000,0 130 | 15722758,Male,30,17000,0 131 | 15792102,Female,26,84000,0 132 | 15675185,Male,31,58000,0 133 | 15801247,Male,33,31000,0 134 | 15725660,Male,30,87000,0 135 | 15638963,Female,21,68000,0 136 | 15800061,Female,28,55000,0 137 | 15578006,Male,23,63000,0 138 | 15668504,Female,20,82000,0 139 | 15687491,Male,30,107000,1 140 | 15610403,Female,28,59000,0 141 | 15741094,Male,19,25000,0 142 | 15807909,Male,19,85000,0 143 | 15666141,Female,18,68000,0 144 | 15617134,Male,35,59000,0 145 | 15783029,Male,30,89000,0 146 | 15622833,Female,34,25000,0 147 | 15746422,Female,24,89000,0 148 | 15750839,Female,27,96000,1 149 | 15749130,Female,41,30000,0 150 | 15779862,Male,29,61000,0 151 | 15767871,Male,20,74000,0 152 | 15679651,Female,26,15000,0 153 | 15576219,Male,41,45000,0 154 | 15699247,Male,31,76000,0 155 | 15619087,Female,36,50000,0 156 | 15605327,Male,40,47000,0 157 | 15610140,Female,31,15000,0 158 | 15791174,Male,46,59000,0 159 | 15602373,Male,29,75000,0 160 | 15762605,Male,26,30000,0 161 | 15598840,Female,32,135000,1 162 | 15744279,Male,32,100000,1 163 | 15670619,Male,25,90000,0 164 | 15599533,Female,37,33000,0 165 | 15757837,Male,35,38000,0 166 | 15697574,Female,33,69000,0 167 | 15578738,Female,18,86000,0 168 | 15762228,Female,22,55000,0 169 | 15614827,Female,35,71000,0 170 | 15789815,Male,29,148000,1 171 | 15579781,Female,29,47000,0 172 | 15587013,Male,21,88000,0 173 | 15570932,Male,34,115000,0 174 | 15794661,Female,26,118000,0 175 | 15581654,Female,34,43000,0 176 | 15644296,Female,34,72000,0 177 | 15614420,Female,23,28000,0 178 | 15609653,Female,35,47000,0 179 | 15594577,Male,25,22000,0 180 | 15584114,Male,24,23000,0 181 | 15673367,Female,31,34000,0 182 | 15685576,Male,26,16000,0 183 | 15774727,Female,31,71000,0 184 | 15694288,Female,32,117000,1 185 | 15603319,Male,33,43000,0 186 | 15759066,Female,33,60000,0 187 | 15814816,Male,31,66000,0 188 | 15724402,Female,20,82000,0 189 | 15571059,Female,33,41000,0 190 | 15674206,Male,35,72000,0 191 | 15715160,Male,28,32000,0 192 | 15730448,Male,24,84000,0 193 | 15662067,Female,19,26000,0 194 | 15779581,Male,29,43000,0 195 | 15662901,Male,19,70000,0 196 | 15689751,Male,28,89000,0 197 | 15667742,Male,34,43000,0 198 | 15738448,Female,30,79000,0 199 | 15680243,Female,20,36000,0 200 | 15745083,Male,26,80000,0 201 | 15708228,Male,35,22000,0 202 | 15628523,Male,35,39000,0 203 | 15708196,Male,49,74000,0 204 | 15735549,Female,39,134000,1 205 | 15809347,Female,41,71000,0 206 | 15660866,Female,58,101000,1 207 | 15766609,Female,47,47000,0 208 | 15654230,Female,55,130000,1 209 | 15794566,Female,52,114000,0 210 | 15800890,Female,40,142000,1 211 | 15697424,Female,46,22000,0 212 | 15724536,Female,48,96000,1 213 | 15735878,Male,52,150000,1 214 | 15707596,Female,59,42000,0 215 | 15657163,Male,35,58000.0,0 216 | 15622478,Male,47.0,43000,0 217 | 15779529,Female,60,108000,1 218 | 15636023,Male,49,65000,0 219 | 15582066,Male,40,78000,0 220 | 15666675,Female,46,96000,0 221 | 15732987,Male,59,143000,1 222 | 15789432,Female,41,80000,0 223 | 15663161,Male,35,91000,1 224 | 15694879,Male,37,144000,1 225 | 15593715,Male,60,102000,1 226 | 15575002,Female,35,60000,0 227 | 15622171,Male,37,53000,0 228 | 15795224,Female,36,126000,1 229 | 15685346,Male,56,133000,1 230 | 15691808,Female,40,72000,0 231 | 15721007,Female,42,80000,1 232 | 15794253,Female,35,147000,1 233 | 15694453,Male,39,42000,0 234 | 15813113,Male,40,107000,1 235 | 15614187,Male,49,86000,1 236 | 15619407,Female,38,112000,0 237 | 15646227,Male,46,79000,1 238 | 15660541,Male,40,57000,0 239 | 15753874,Female,37,80000,0 240 | 15617877,Female,46,82000,0 241 | 15772073,Female,53,143000,1 242 | 15701537,Male,42,149000,1 243 | 15736228,Male,38,59000,0 244 | 15780572,Female,50,88000,1 245 | 15769596,Female,56,104000,1 246 | 15586996,Female,41,72000,0 247 | 15722061,Female,51,146000,1 248 | 15638003,Female,35,50000,0 249 | 15775590,Female,57,122000,1 250 | 15730688,Male,41,52000,0 251 | 15753102,Female,35,97000,1 252 | 15810075,Female,44,39000,0 253 | 15723373,Male,37,52000,0 254 | 15795298,Female,48,134000,1 255 | 15584320,Female,37,146000,1 256 | 15724161,Female,50,44000,0 257 | 15750056,Female,52,90000,1 258 | 15609637,Female,41,72000,0 259 | 15794493,Male,40,57000,0 260 | 15569641,Female,58,95000,1 261 | 15815236,Female,45,131000,1 262 | 15811177,Female,35,77000,0 263 | 15680587,Male,36,144000,1 264 | 15672821,Female,55,125000,1 265 | 15767681,Female,35,72000,0 266 | 15600379,Male,48,90000,1 267 | 15801336,Female,42,108000,1 268 | 15721592,Male,40,75000,0 269 | 15581282,Male,37,74000,0 270 | 15746203,Female,47,144000,1 271 | 15583137,Male,40,61000,0 272 | 15680752,Female,43,133000,0 273 | 15688172,Female,59,76000,1 274 | 15791373,Male,60,42000,1 275 | 15589449,Male,39,106000,1 276 | 15692819,Female,57,26000,1 277 | 15727467,Male,57,74000,1 278 | 15734312,Male,38,71000,0 279 | 15764604,Male,49,88000,1 280 | 15613014,Female,52,38000,1 281 | 15759684,Female,50,36000,1 282 | 15609669,Female,59,88000,1 283 | 15685536,Male,35,61000,0 284 | 15750447,Male,37,70000,1 285 | 15663249,Female,52,21000,1 286 | 15638646,Male,48,141000,0 287 | 15734161,Female,37,93000,1 288 | 15631070,Female,37,62000,0 289 | 15761950,Female,48,138000,1 290 | 15649668,Male,41,79000,0 291 | 15713912,Female,37,78000,1 292 | 15586757,Male,39,134000,1 293 | 15596522,Male,49,89000,1 294 | 15625395,Male,55,39000,1 295 | 15760570,Male,37,77000,0 296 | 15566689,Female,35,57000,0 297 | 15725794,Female,36,63000,0 298 | 15673539,Male,42,73000,1 299 | 15705298,Female,43,112000,1 300 | 15675791,Male,45,79000,0 301 | 15747043,Male,46,117000,1 302 | 15736397,Female,58,38000,1 303 | 15678201,Male,48,74000,1 304 | 15720745,Female,37,137000,1 305 | 15637593,Male,37,79000,1 306 | 15598070,Female,40,60000,0 307 | 15787550,Male,42,54000,0 308 | 15603942,Female,51,134000,0 309 | 15733973,Female,47,113000,1 310 | 15596761,Male,36,125000,1 311 | 15652400,Female,38,50000,0 312 | 15717893,Female,42,70000,0 313 | 15622585,Male,39,96000,1 314 | 15733964,Female,38,50000,0 315 | 15753861,Female,49,141000,1 316 | 15747097,Female,39,79000,0 317 | 15594762,Female,39,75000,1 318 | 15667417,Female,54,104000,1 319 | 15684861,Male,35,55000,0 320 | 15742204,Male,45,32000,1 321 | 15623502,Male,36,60000,0 322 | 15774872,Female,52,138000,1 323 | 15611191,Female,53,82000,1 324 | 15674331,Male,41,52000,0 325 | 15619465,Female,48,30000,1 326 | 15575247,Female,48,131000,1 327 | 15695679,Female,41,60000,0 328 | 15713463,Male,41,72000,0 329 | 15785170,Female,42,75000,0 330 | 15796351,Male,36,118000,1 331 | 15639576,Female,47,107000,1 332 | 15693264,Male,38,51000,0 333 | 15589715,Female,48,119000,1 334 | 15769902,Male,42,65000,0 335 | 15587177,Male,40,65000,0 336 | 15814553,Male,57,60000,1 337 | 15601550,Female,36,54000,0 338 | 15664907,Male,58,144000,1 339 | 15612465,Male,35,79000,0 340 | 15810800,Female,38,55000,0 341 | 15665760,Male,39,122000,1 342 | 15588080,Female,53,104000,1 343 | 15776844,Male,35,75000,0 344 | 15717560,Female,38,65000,0 345 | 15629739,Female,47,51000,1 346 | 15729908,Male,47,105000,1 347 | 15716781,Female,41,63000,0 348 | 15646936,Male,53,72000,1 349 | 15768151,Female,54,108000,1 350 | 15579212,Male,39,77000,0 351 | 15721835,Male,38,61000,0 352 | 15800515,Female,38,113000,1 353 | 15591279,Male,37,75000,0 354 | 15587419,Female,42,90000,1 355 | 15750335,Female,37,57000,0 356 | 15699619,Male,36,99000,1 357 | 15606472,Male,60,34000,1 358 | 15778368,Male,54,70000,1 359 | 15671387,Female,41,72000,0 360 | 15573926,Male,40,71000,1 361 | 15709183,Male,42,54000,0 362 | 15577514,Male,43,129000,1 363 | 15778830,Female,53,34000,1 364 | 15768072,Female,47,50000,1 365 | 15768293,Female,42,79000,0 366 | 15654456,Male,42,104000,1 367 | 15807525,Female,59,29000,1 368 | 15574372,Female,58,47000,1 369 | 15671249,Male,46,88000,1 370 | 15779744,Male,38,71000,0 371 | 15624755,Female,54,26000,1 372 | 15611430,Female,60,46000,1 373 | 15774744,Male,60,83000,1 374 | 15629885,Female,39,73000,0 375 | 15708791,Male,59,130000,1 376 | 15793890,Female,37,80000,0 377 | 15646091,Female,46,32000,1 378 | 15596984,Female,46,74000,0 379 | 15800215,Female,42,53000,0 380 | 15577806,Male,41,87000,1 381 | 15749381,Female,58,23000,1 382 | 15683758,Male,42,64000,0 383 | 15670615,Male,48,33000,1 384 | 15715622,Female,44,139000,1 385 | 15707634,Male,49,28000,1 386 | 15806901,Female,57,33000,1 387 | 15775335,Male,56,60000,1 388 | 15724150,Female,49,39000,1 389 | 15627220,Male,39,71000,0 390 | 15672330,Male,47,34000,1 391 | 15668521,Female,48,35000,1 392 | 15807837,Male,48,33000,1 393 | 15592570,Male,47,23000,1 394 | 15748589,Female,45,45000,1 395 | 15635893,Male,60,42000,1 396 | 15757632,Female,39,59000,0 397 | 15691863,Female,46,41000,1 398 | 15706071,Male,51,23000,1 399 | 15654296,Female,50,20000,1 400 | 15755018,Male,36,33000,0 401 | 15594041,Female,49,36000,1 -------------------------------------------------------------------------------- /Part 9 - Dimensionality Reduction/Section 25 - Kernel PCA/kernel_pca.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Tue Sep 25 22:36:20 2018 4 | 5 | @author: Mohammad Doosti Lakhani 6 | """ 7 | 8 | 9 | # Imporing libraries 10 | import numpy as np 11 | import matplotlib.pyplot as plt 12 | import pandas as pd 13 | 14 | # Importing dataset 15 | dataset = pd.read_csv('Social_Network_Ads.csv') 16 | x = dataset.iloc[:,1:-1].values 17 | y = dataset.iloc[:,-1].values 18 | 19 | # Convert categorical data with LabelEncoder 20 | from sklearn.preprocessing import LabelEncoder 21 | lbl_encoder = LabelEncoder() 22 | x[:,0] = lbl_encoder.fit_transform(x[:,0]) # convert gender to 0 and 1 23 | 24 | # Feature scaling 25 | from sklearn.preprocessing import StandardScaler 26 | standardscaler_x = StandardScaler() 27 | standardscaler_x = standardscaler_x.fit(x) 28 | x = standardscaler_x.transform(x) 29 | 30 | # Applying KernelPCA on x 31 | from sklearn.decomposition import KernelPCA 32 | pca = KernelPCA(n_components=2, kernel='rbf') # We use Kernel to map to higher dimenstion to separate with linear line. 33 | # Then use PCA to reduce dimension. 34 | x_pca =pca.fit_transform(x) 35 | 36 | # Splitting dataset into Train set and Test set 37 | from sklearn.model_selection import train_test_split 38 | x_train,x_test,y_train,y_test = train_test_split(x_pca,y, train_size = 0.8 , random_state=0) 39 | 40 | # Fitting Logistic Regression model to train data 41 | from sklearn.linear_model import LogisticRegression 42 | classifier = LogisticRegression(random_state=0, solver='liblinear') 43 | classifier = classifier.fit(x_train,y_train) 44 | 45 | # Make the prediction on train set 46 | y_train_pred = classifier.predict(x_train) 47 | 48 | # Make the prediction on train set 49 | y_test_pred = classifier.predict(x_test) 50 | 51 | # Acurracy on test and train set 52 | from sklearn.metrics import confusion_matrix 53 | cm_train = confusion_matrix(y_train,y_train_pred) 54 | cm_test = confusion_matrix(y_test,y_test_pred) 55 | 56 | import os 57 | import sys 58 | 59 | scriptpath = "../../Tools" # functions of acc and CAP 60 | # Add the directory containing your module to the Python path 61 | sys.path.append(os.path.abspath(scriptpath)) 62 | import accuracy as ac 63 | 64 | t_train,f_train,acc_train = ac.accuracy_on_cm(cm_train) 65 | print('Train status = #{} True, #{} False, %{} Accuracy'.format(t_train,f_train,acc_train*100)) 66 | 67 | t_test,f_test,acc_test = ac.accuracy_on_cm(cm_test) 68 | print('Test status = #{} True, #{} False, %{} Accuracy'.format(t_test,f_test,acc_test*100)) 69 | 70 | # Visualising the Training set results 71 | from matplotlib.colors import ListedColormap 72 | X_set, y_set = x_train, y_train 73 | X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01), 74 | np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01)) 75 | plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), 76 | alpha = 0.75, cmap = ListedColormap(('red', 'green'))) 77 | plt.xlim(X1.min(), X1.max()) 78 | plt.ylim(X2.min(), X2.max()) 79 | for i, j in enumerate(np.unique(y_set)): 80 | plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], 81 | c = ListedColormap(('red', 'green'))(i), label = j) 82 | plt.title('Logistic Regression (Training set)') 83 | plt.xlabel('PCA 1') 84 | plt.ylabel('PCA 2') 85 | plt.legend() 86 | plt.show() 87 | 88 | # Visualising the Test set results 89 | from matplotlib.colors import ListedColormap 90 | X_set, y_set = x_test, y_test 91 | X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01), 92 | np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01)) 93 | plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), 94 | alpha = 0.75, cmap = ListedColormap(('red', 'green'))) 95 | plt.xlim(X1.min(), X1.max()) 96 | plt.ylim(X2.min(), X2.max()) 97 | for i, j in enumerate(np.unique(y_set)): 98 | plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], 99 | c = ListedColormap(('red', 'green'))(i), label = j) 100 | plt.title('Logistic Regression (Test set)') 101 | plt.xlabel('PCA 1') 102 | plt.ylabel('PCA 2') 103 | plt.legend() 104 | plt.show() 105 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Machine-Learning-Models 2 | In This repository I made some simple to complex methods in machine learning. Here I try to build template style code. All of these models will be used on real datasets in another Jupyter notebook file with full documentation. 3 | 4 | # Goal 5 | I used more general form and some other methods for building templates to import them into a Jupyter notebook. 6 | 7 | # Resource 8 | Superdatascience.com 9 | 10 | # Citation 11 | Please cite this project as: 12 | 13 | Nikan Doosti. (2020). Nikronic/Machine-Learning-Models: DOI Release (v0.1-alpha). Zenodo. https://doi.org/10.5281/zenodo.3838653 14 | 15 | [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3838653.svg)](https://doi.org/10.5281/zenodo.3838653) 16 | -------------------------------------------------------------------------------- /Tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nikronic/Machine-Learning-Models/9fb48463ee2211eec800b2436699508f55d5ee28/Tools/__init__.py -------------------------------------------------------------------------------- /Tools/accuracy.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sun Sep 9 14:51:24 2018 4 | 5 | @author: Mohammad Doosti Lakhani 6 | """ 7 | """ 8 | In this file, I implemented functions to calculate accuracy like confusion_matrix or CAP. 9 | You can use this functions in any of the implemented models. 10 | """ 11 | 12 | 13 | import numpy as np 14 | import matplotlib.pyplot as plt 15 | import pandas as pd 16 | from scipy import integrate 17 | 18 | 19 | def capcurve(y_values, y_preds_proba,title_of_chart): 20 | num_pos_obs = np.sum(y_values) 21 | num_count = len(y_values) 22 | rate_pos_obs = float(num_pos_obs) / float(num_count) 23 | ideal = pd.DataFrame({'x':[0,rate_pos_obs,1],'y':[0,1,1]}) 24 | xx = np.arange(num_count) / float(num_count - 1) 25 | 26 | y_cap = np.c_[y_values,y_preds_proba[:,1]] 27 | y_cap_df_s = pd.DataFrame(data=y_cap) 28 | y_cap_df_s = y_cap_df_s.sort_values([1], ascending=False).reset_index(level = y_cap_df_s.index.names, drop=True) 29 | 30 | print(y_cap_df_s.head(20)) 31 | 32 | yy = np.cumsum(y_cap_df_s[0]) / float(num_pos_obs) 33 | yy = np.append([0], yy[0:num_count-1]) #add the first curve point (0,0) : for xx=0 we have yy=0 34 | 35 | percent = 0.5 36 | row_index = int(np.trunc(num_count * percent)) 37 | 38 | val_y1 = yy[row_index] 39 | val_y2 = yy[row_index+1] 40 | if val_y1 == val_y2: 41 | val = val_y1*1.0 42 | else: 43 | val_x1 = xx[row_index] 44 | val_x2 = xx[row_index+1] 45 | val = val_y1 + ((val_x2 - percent)/(val_x2 - val_x1))*(val_y2 - val_y1) 46 | 47 | sigma_ideal = 1 * xx[num_pos_obs - 1 ] / 2 + (xx[num_count - 1] - xx[num_pos_obs]) * 1 48 | sigma_model = integrate.simps(yy,xx) 49 | sigma_random = integrate.simps(xx,xx) 50 | 51 | ar_value = (sigma_model - sigma_random) / (sigma_ideal - sigma_random) 52 | 53 | fig, ax = plt.subplots(nrows = 1, ncols = 1) 54 | ax.plot(ideal['x'],ideal['y'], color='grey', label='Perfect Model') 55 | ax.plot(xx,yy, color='red', label='User Model') 56 | ax.plot(xx,xx, color='blue', label='Random Model') 57 | ax.plot([percent, percent], [0.0, val], color='green', linestyle='--', linewidth=1) 58 | ax.plot([0, percent], [val, val], color='green', linestyle='--', linewidth=1, label=str(val*100)+'% of positive obs at '+str(percent*100)+'%') 59 | 60 | plt.xlim(0, 1.02) 61 | plt.ylim(0, 1.25) 62 | plt.title("CAP Curve ("+title_of_chart+") - a_r value ="+str(ar_value)) 63 | plt.xlabel('% of the data') 64 | plt.ylabel('% of positive obs') 65 | plt.legend() 66 | # ref = https://github.com/APavlides/cap_curve/blob/master/cap_curve.py 67 | 68 | 69 | def accuracy_on_cm(confusion_matrix): 70 | t = np.trace(confusion_matrix) 71 | f = np.sum(confusion_matrix) - t 72 | ac = t/(t+f) 73 | return (t,f,ac) --------------------------------------------------------------------------------