├── images ├── Images.md ├── rf.png ├── BvsV.png ├── MSE.png ├── dt1.jpg ├── knn1.png ├── okay.jpeg ├── reg.jpg ├── svm.jpg ├── svm1.png ├── formula.png ├── kernel2.jpg ├── logreg.jpg ├── thanks.png ├── voting.png ├── hierarch.gif ├── kmeans++.gif ├── model_fit.jpg ├── tradeoff.png ├── cluster_v_class.jpg ├── support_vectors.jpg ├── Kmeans_animation.gif └── hierarchial_approach.jfif ├── data ├── Position_Salaries.csv ├── Data.csv ├── Mall_Customers.csv ├── wine-clustering.csv ├── Social_Network_Ads.csv ├── x-y.csv └── Realestate.csv ├── requirements.txt ├── .gitignore ├── README.md ├── week 2.ipynb └── week 3.ipynb /images/Images.md: -------------------------------------------------------------------------------- 1 | Contains Images 2 | -------------------------------------------------------------------------------- /images/rf.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kabirnagpal/ML-Track/HEAD/images/rf.png -------------------------------------------------------------------------------- /images/BvsV.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kabirnagpal/ML-Track/HEAD/images/BvsV.png -------------------------------------------------------------------------------- /images/MSE.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kabirnagpal/ML-Track/HEAD/images/MSE.png -------------------------------------------------------------------------------- /images/dt1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kabirnagpal/ML-Track/HEAD/images/dt1.jpg -------------------------------------------------------------------------------- /images/knn1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kabirnagpal/ML-Track/HEAD/images/knn1.png -------------------------------------------------------------------------------- /images/okay.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kabirnagpal/ML-Track/HEAD/images/okay.jpeg -------------------------------------------------------------------------------- /images/reg.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kabirnagpal/ML-Track/HEAD/images/reg.jpg -------------------------------------------------------------------------------- /images/svm.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kabirnagpal/ML-Track/HEAD/images/svm.jpg -------------------------------------------------------------------------------- /images/svm1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kabirnagpal/ML-Track/HEAD/images/svm1.png -------------------------------------------------------------------------------- /images/formula.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kabirnagpal/ML-Track/HEAD/images/formula.png -------------------------------------------------------------------------------- /images/kernel2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kabirnagpal/ML-Track/HEAD/images/kernel2.jpg -------------------------------------------------------------------------------- /images/logreg.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kabirnagpal/ML-Track/HEAD/images/logreg.jpg -------------------------------------------------------------------------------- /images/thanks.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kabirnagpal/ML-Track/HEAD/images/thanks.png -------------------------------------------------------------------------------- /images/voting.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kabirnagpal/ML-Track/HEAD/images/voting.png -------------------------------------------------------------------------------- /images/hierarch.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kabirnagpal/ML-Track/HEAD/images/hierarch.gif -------------------------------------------------------------------------------- /images/kmeans++.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kabirnagpal/ML-Track/HEAD/images/kmeans++.gif -------------------------------------------------------------------------------- /images/model_fit.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kabirnagpal/ML-Track/HEAD/images/model_fit.jpg -------------------------------------------------------------------------------- /images/tradeoff.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kabirnagpal/ML-Track/HEAD/images/tradeoff.png -------------------------------------------------------------------------------- /images/cluster_v_class.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kabirnagpal/ML-Track/HEAD/images/cluster_v_class.jpg -------------------------------------------------------------------------------- /images/support_vectors.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kabirnagpal/ML-Track/HEAD/images/support_vectors.jpg -------------------------------------------------------------------------------- /images/Kmeans_animation.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kabirnagpal/ML-Track/HEAD/images/Kmeans_animation.gif -------------------------------------------------------------------------------- /images/hierarchial_approach.jfif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kabirnagpal/ML-Track/HEAD/images/hierarchial_approach.jfif -------------------------------------------------------------------------------- /data/Position_Salaries.csv: -------------------------------------------------------------------------------- 1 | Position,Level,Salary 2 | Business Analyst,1,45000 3 | Junior Consultant,2,50000 4 | Senior Consultant,3,60000 5 | Manager,4,80000 6 | Country Manager,5,110000 7 | Region Manager,6,150000 8 | Partner,7,200000 9 | Senior Partner,8,300000 10 | C-level,9,500000 11 | CEO,10,1000000 -------------------------------------------------------------------------------- /data/Data.csv: -------------------------------------------------------------------------------- 1 | City,Experience,Salary,Promotion 2 | Delhi,4,55000,No 3 | Mumbai,2,20000,Yes 4 | Agra,3,30000,No 5 | Mumbai,8,72000,No 6 | Agra,4,,Yes 7 | Delhi,5,60000,Yes 8 | Mumbai,,52000,No 9 | Delhi,4,51000,Yes 10 | Agra,5,59000,No 11 | Delhi,3,31000,Yes 12 | ,5,58000,Yes 13 | Mumbai,,52000,No 14 | Delhi,8,79000,Yes 15 | Agra,5,60000,No 16 | Delhi,7,67000,Yes -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | Package Version 2 | --------------- ------- 3 | cycler 0.10.0 4 | joblib 0.16.0 5 | kiwisolver 1.2.0 6 | matplotlib 3.2.2 7 | numpy 1.19.0 8 | pandas 1.0.5 9 | pip 20.0.2 10 | pkg-resources 0.0.0 11 | plotly 4.8.2 12 | pyparsing 2.4.7 13 | python-dateutil 2.8.1 14 | pytz 2020.1 15 | retrying 1.3.3 16 | scikit-learn 0.23.1 17 | scipy 1.5.1 18 | seaborn 0.10.1 19 | setuptools 44.0.0 20 | six 1.15.0 21 | threadpoolctl 2.1.0 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | SoA-ML-14 2 | ========== 3 | ## Week 1: Intro to Numpy and Pandas 4 | ### (Anaconda, spyder, jupyter) 5 | #### Getting Familiar with: 6 | [*Link to Week 1's Jupyter Notebook*](https://github.com/kabirnagpal/SoA-ML-14/blob/master/week%201.ipynb) 7 | * Numpy 8 | * Pandas 9 | * Matplotlib 10 | 11 | ## Week 2: Basic Data pre-processing: 12 | [*Link to Week 2's Jupyter Notebook*](https://github.com/kabirnagpal/SoA-ML-14/blob/master/week%202.ipynb) 13 | * One Hot encoding 14 | * Label Encoding 15 | * Normalization 16 | * Dealing with Missing values 17 | * Introduction to Machine learning 18 | * Types of Learning (Supervised, Unsupervised and Reinforcement) 19 | * Application of Machine Learning 20 | 21 | 22 | ## Week 3: Regression Algorithms: 23 | [*Link to Week 3's Jupyter Notebook*](https://github.com/kabirnagpal/SoA-ML-14/blob/master/week%203.ipynb) 24 | * Linear Regression 25 | * Multiple Linear Regression 26 | * Polynomial Regression 27 | 28 | 29 | ## Week 4:Classification Algorithms: 30 | [*Link to Week 4's Jupyter Notebook*](https://github.com/kabirnagpal/SoA-ML-14/blob/master/week%204.ipynb) 31 | * Logistic Regression 32 | * K-Nearest Neighbours 33 | * Support Vector Classifier 34 | * Decision Tree 35 | * Random Forest 36 | * Voting Classifier 37 | 38 | 39 | ## Week 5: Bias vs Variance Trade off 40 | [*Link to Week 5's Jupyter Notebook*](https://github.com/kabirnagpal/SoA-ML-14/blob/master/week%205.ipynb) 41 | * OverFitting 42 | * UnderFitting 43 | * Regularization 44 | * Support Vector Machines 45 | 46 | 47 | ## Week 6:Clustering Algorithms: 48 | [*Link to Week 6's Jupyter Notebook*](https://github.com/kabirnagpal/SoA-ML-14/blob/master/week%206.ipynb) 49 | * K-means Clustering 50 | * Hierarchical Clustering 51 | 52 | 53 | ## Week 7: Dimensionality Reduction: 54 | [*Link to Week 7's Jupyter Notebook*](https://github.com/kabirnagpal/SoA-ML-14/blob/master/week%207.ipynb) 55 | * PCA 56 | * LDA 57 | * Kernel PCA 58 | 59 | * Model Selection: 60 | * K-fold Cross Validation 61 | * Parameter Tuning 62 | * Grid Search 63 | 64 | ## Week 8: An introduction to Boosting 65 | [*Link to Week 8's Jupyter Notebook*](https://github.com/kabirnagpal/SoA-ML-14/blob/master/week%208.ipynb) 66 | * Gradient Boosting 67 | * XGBoost -------------------------------------------------------------------------------- /data/Mall_Customers.csv: -------------------------------------------------------------------------------- 1 | CustomerID,Gender,Age,Annual Income (k$),Spending Score (1-100) 2 | 1,Male,19,15,39 3 | 2,Male,21,15,81 4 | 3,Female,20,16,6 5 | 4,Female,23,16,77 6 | 5,Female,31,17,40 7 | 6,Female,22,17,76 8 | 7,Female,35,18,6 9 | 8,Female,23,18,94 10 | 9,Male,64,19,3 11 | 10,Female,30,19,72 12 | 11,Male,67,19,14 13 | 12,Female,35,19,99 14 | 13,Female,58,20,15 15 | 14,Female,24,20,77 16 | 15,Male,37,20,13 17 | 16,Male,22,20,79 18 | 17,Female,35,21,35 19 | 18,Male,20,21,66 20 | 19,Male,52,23,29 21 | 20,Female,35,23,98 22 | 21,Male,35,24,35 23 | 22,Male,25,24,73 24 | 23,Female,46,25,5 25 | 24,Male,31,25,73 26 | 25,Female,54,28,14 27 | 26,Male,29,28,82 28 | 27,Female,45,28,32 29 | 28,Male,35,28,61 30 | 29,Female,40,29,31 31 | 30,Female,23,29,87 32 | 31,Male,60,30,4 33 | 32,Female,21,30,73 34 | 33,Male,53,33,4 35 | 34,Male,18,33,92 36 | 35,Female,49,33,14 37 | 36,Female,21,33,81 38 | 37,Female,42,34,17 39 | 38,Female,30,34,73 40 | 39,Female,36,37,26 41 | 40,Female,20,37,75 42 | 41,Female,65,38,35 43 | 42,Male,24,38,92 44 | 43,Male,48,39,36 45 | 44,Female,31,39,61 46 | 45,Female,49,39,28 47 | 46,Female,24,39,65 48 | 47,Female,50,40,55 49 | 48,Female,27,40,47 50 | 49,Female,29,40,42 51 | 50,Female,31,40,42 52 | 51,Female,49,42,52 53 | 52,Male,33,42,60 54 | 53,Female,31,43,54 55 | 54,Male,59,43,60 56 | 55,Female,50,43,45 57 | 56,Male,47,43,41 58 | 57,Female,51,44,50 59 | 58,Male,69,44,46 60 | 59,Female,27,46,51 61 | 60,Male,53,46,46 62 | 61,Male,70,46,56 63 | 62,Male,19,46,55 64 | 63,Female,67,47,52 65 | 64,Female,54,47,59 66 | 65,Male,63,48,51 67 | 66,Male,18,48,59 68 | 67,Female,43,48,50 69 | 68,Female,68,48,48 70 | 69,Male,19,48,59 71 | 70,Female,32,48,47 72 | 71,Male,70,49,55 73 | 72,Female,47,49,42 74 | 73,Female,60,50,49 75 | 74,Female,60,50,56 76 | 75,Male,59,54,47 77 | 76,Male,26,54,54 78 | 77,Female,45,54,53 79 | 78,Male,40,54,48 80 | 79,Female,23,54,52 81 | 80,Female,49,54,42 82 | 81,Male,57,54,51 83 | 82,Male,38,54,55 84 | 83,Male,67,54,41 85 | 84,Female,46,54,44 86 | 85,Female,21,54,57 87 | 86,Male,48,54,46 88 | 87,Female,55,57,58 89 | 88,Female,22,57,55 90 | 89,Female,34,58,60 91 | 90,Female,50,58,46 92 | 91,Female,68,59,55 93 | 92,Male,18,59,41 94 | 93,Male,48,60,49 95 | 94,Female,40,60,40 96 | 95,Female,32,60,42 97 | 96,Male,24,60,52 98 | 97,Female,47,60,47 99 | 98,Female,27,60,50 100 | 99,Male,48,61,42 101 | 100,Male,20,61,49 102 | 101,Female,23,62,41 103 | 102,Female,49,62,48 104 | 103,Male,67,62,59 105 | 104,Male,26,62,55 106 | 105,Male,49,62,56 107 | 106,Female,21,62,42 108 | 107,Female,66,63,50 109 | 108,Male,54,63,46 110 | 109,Male,68,63,43 111 | 110,Male,66,63,48 112 | 111,Male,65,63,52 113 | 112,Female,19,63,54 114 | 113,Female,38,64,42 115 | 114,Male,19,64,46 116 | 115,Female,18,65,48 117 | 116,Female,19,65,50 118 | 117,Female,63,65,43 119 | 118,Female,49,65,59 120 | 119,Female,51,67,43 121 | 120,Female,50,67,57 122 | 121,Male,27,67,56 123 | 122,Female,38,67,40 124 | 123,Female,40,69,58 125 | 124,Male,39,69,91 126 | 125,Female,23,70,29 127 | 126,Female,31,70,77 128 | 127,Male,43,71,35 129 | 128,Male,40,71,95 130 | 129,Male,59,71,11 131 | 130,Male,38,71,75 132 | 131,Male,47,71,9 133 | 132,Male,39,71,75 134 | 133,Female,25,72,34 135 | 134,Female,31,72,71 136 | 135,Male,20,73,5 137 | 136,Female,29,73,88 138 | 137,Female,44,73,7 139 | 138,Male,32,73,73 140 | 139,Male,19,74,10 141 | 140,Female,35,74,72 142 | 141,Female,57,75,5 143 | 142,Male,32,75,93 144 | 143,Female,28,76,40 145 | 144,Female,32,76,87 146 | 145,Male,25,77,12 147 | 146,Male,28,77,97 148 | 147,Male,48,77,36 149 | 148,Female,32,77,74 150 | 149,Female,34,78,22 151 | 150,Male,34,78,90 152 | 151,Male,43,78,17 153 | 152,Male,39,78,88 154 | 153,Female,44,78,20 155 | 154,Female,38,78,76 156 | 155,Female,47,78,16 157 | 156,Female,27,78,89 158 | 157,Male,37,78,1 159 | 158,Female,30,78,78 160 | 159,Male,34,78,1 161 | 160,Female,30,78,73 162 | 161,Female,56,79,35 163 | 162,Female,29,79,83 164 | 163,Male,19,81,5 165 | 164,Female,31,81,93 166 | 165,Male,50,85,26 167 | 166,Female,36,85,75 168 | 167,Male,42,86,20 169 | 168,Female,33,86,95 170 | 169,Female,36,87,27 171 | 170,Male,32,87,63 172 | 171,Male,40,87,13 173 | 172,Male,28,87,75 174 | 173,Male,36,87,10 175 | 174,Male,36,87,92 176 | 175,Female,52,88,13 177 | 176,Female,30,88,86 178 | 177,Male,58,88,15 179 | 178,Male,27,88,69 180 | 179,Male,59,93,14 181 | 180,Male,35,93,90 182 | 181,Female,37,97,32 183 | 182,Female,32,97,86 184 | 183,Male,46,98,15 185 | 184,Female,29,98,88 186 | 185,Female,41,99,39 187 | 186,Male,30,99,97 188 | 187,Female,54,101,24 189 | 188,Male,28,101,68 190 | 189,Female,41,103,17 191 | 190,Female,36,103,85 192 | 191,Female,34,103,23 193 | 192,Female,32,103,69 194 | 193,Male,33,113,8 195 | 194,Female,38,113,91 196 | 195,Female,47,120,16 197 | 196,Female,35,120,79 198 | 197,Female,45,126,28 199 | 198,Male,32,126,74 200 | 199,Male,32,137,18 201 | 200,Male,30,137,83 202 | -------------------------------------------------------------------------------- /data/wine-clustering.csv: -------------------------------------------------------------------------------- 1 | Alcohol,Malic_Acid,Ash,Ash_Alcanity,Magnesium,Total_Phenols,Flavanoids,Nonflavanoid_Phenols,Proanthocyanins,Color_Intensity,Hue,OD280,Proline 2 | 14.23,1.71,2.43,15.6,127,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065 3 | 13.2,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050 4 | 13.16,2.36,2.67,18.6,101,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185 5 | 14.37,1.95,2.5,16.8,113,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480 6 | 13.24,2.59,2.87,21,118,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735 7 | 14.2,1.76,2.45,15.2,112,3.27,3.39,0.34,1.97,6.75,1.05,2.85,1450 8 | 14.39,1.87,2.45,14.6,96,2.5,2.52,0.3,1.98,5.25,1.02,3.58,1290 9 | 14.06,2.15,2.61,17.6,121,2.6,2.51,0.31,1.25,5.05,1.06,3.58,1295 10 | 14.83,1.64,2.17,14,97,2.8,2.98,0.29,1.98,5.2,1.08,2.85,1045 11 | 13.86,1.35,2.27,16,98,2.98,3.15,0.22,1.85,7.22,1.01,3.55,1045 12 | 14.1,2.16,2.3,18,105,2.95,3.32,0.22,2.38,5.75,1.25,3.17,1510 13 | 14.12,1.48,2.32,16.8,95,2.2,2.43,0.26,1.57,5,1.17,2.82,1280 14 | 13.75,1.73,2.41,16,89,2.6,2.76,0.29,1.81,5.6,1.15,2.9,1320 15 | 14.75,1.73,2.39,11.4,91,3.1,3.69,0.43,2.81,5.4,1.25,2.73,1150 16 | 14.38,1.87,2.38,12,102,3.3,3.64,0.29,2.96,7.5,1.2,3,1547 17 | 13.63,1.81,2.7,17.2,112,2.85,2.91,0.3,1.46,7.3,1.28,2.88,1310 18 | 14.3,1.92,2.72,20,120,2.8,3.14,0.33,1.97,6.2,1.07,2.65,1280 19 | 13.83,1.57,2.62,20,115,2.95,3.4,0.4,1.72,6.6,1.13,2.57,1130 20 | 14.19,1.59,2.48,16.5,108,3.3,3.93,0.32,1.86,8.7,1.23,2.82,1680 21 | 13.64,3.1,2.56,15.2,116,2.7,3.03,0.17,1.66,5.1,0.96,3.36,845 22 | 14.06,1.63,2.28,16,126,3,3.17,0.24,2.1,5.65,1.09,3.71,780 23 | 12.93,3.8,2.65,18.6,102,2.41,2.41,0.25,1.98,4.5,1.03,3.52,770 24 | 13.71,1.86,2.36,16.6,101,2.61,2.88,0.27,1.69,3.8,1.11,4,1035 25 | 12.85,1.6,2.52,17.8,95,2.48,2.37,0.26,1.46,3.93,1.09,3.63,1015 26 | 13.5,1.81,2.61,20,96,2.53,2.61,0.28,1.66,3.52,1.12,3.82,845 27 | 13.05,2.05,3.22,25,124,2.63,2.68,0.47,1.92,3.58,1.13,3.2,830 28 | 13.39,1.77,2.62,16.1,93,2.85,2.94,0.34,1.45,4.8,0.92,3.22,1195 29 | 13.3,1.72,2.14,17,94,2.4,2.19,0.27,1.35,3.95,1.02,2.77,1285 30 | 13.87,1.9,2.8,19.4,107,2.95,2.97,0.37,1.76,4.5,1.25,3.4,915 31 | 14.02,1.68,2.21,16,96,2.65,2.33,0.26,1.98,4.7,1.04,3.59,1035 32 | 13.73,1.5,2.7,22.5,101,3,3.25,0.29,2.38,5.7,1.19,2.71,1285 33 | 13.58,1.66,2.36,19.1,106,2.86,3.19,0.22,1.95,6.9,1.09,2.88,1515 34 | 13.68,1.83,2.36,17.2,104,2.42,2.69,0.42,1.97,3.84,1.23,2.87,990 35 | 13.76,1.53,2.7,19.5,132,2.95,2.74,0.5,1.35,5.4,1.25,3,1235 36 | 13.51,1.8,2.65,19,110,2.35,2.53,0.29,1.54,4.2,1.1,2.87,1095 37 | 13.48,1.81,2.41,20.5,100,2.7,2.98,0.26,1.86,5.1,1.04,3.47,920 38 | 13.28,1.64,2.84,15.5,110,2.6,2.68,0.34,1.36,4.6,1.09,2.78,880 39 | 13.05,1.65,2.55,18,98,2.45,2.43,0.29,1.44,4.25,1.12,2.51,1105 40 | 13.07,1.5,2.1,15.5,98,2.4,2.64,0.28,1.37,3.7,1.18,2.69,1020 41 | 14.22,3.99,2.51,13.2,128,3,3.04,0.2,2.08,5.1,0.89,3.53,760 42 | 13.56,1.71,2.31,16.2,117,3.15,3.29,0.34,2.34,6.13,0.95,3.38,795 43 | 13.41,3.84,2.12,18.8,90,2.45,2.68,0.27,1.48,4.28,0.91,3,1035 44 | 13.88,1.89,2.59,15,101,3.25,3.56,0.17,1.7,5.43,0.88,3.56,1095 45 | 13.24,3.98,2.29,17.5,103,2.64,2.63,0.32,1.66,4.36,0.82,3,680 46 | 13.05,1.77,2.1,17,107,3,3,0.28,2.03,5.04,0.88,3.35,885 47 | 14.21,4.04,2.44,18.9,111,2.85,2.65,0.3,1.25,5.24,0.87,3.33,1080 48 | 14.38,3.59,2.28,16,102,3.25,3.17,0.27,2.19,4.9,1.04,3.44,1065 49 | 13.9,1.68,2.12,16,101,3.1,3.39,0.21,2.14,6.1,0.91,3.33,985 50 | 14.1,2.02,2.4,18.8,103,2.75,2.92,0.32,2.38,6.2,1.07,2.75,1060 51 | 13.94,1.73,2.27,17.4,108,2.88,3.54,0.32,2.08,8.9,1.12,3.1,1260 52 | 13.05,1.73,2.04,12.4,92,2.72,3.27,0.17,2.91,7.2,1.12,2.91,1150 53 | 13.83,1.65,2.6,17.2,94,2.45,2.99,0.22,2.29,5.6,1.24,3.37,1265 54 | 13.82,1.75,2.42,14,111,3.88,3.74,0.32,1.87,7.05,1.01,3.26,1190 55 | 13.77,1.9,2.68,17.1,115,3,2.79,0.39,1.68,6.3,1.13,2.93,1375 56 | 13.74,1.67,2.25,16.4,118,2.6,2.9,0.21,1.62,5.85,0.92,3.2,1060 57 | 13.56,1.73,2.46,20.5,116,2.96,2.78,0.2,2.45,6.25,0.98,3.03,1120 58 | 14.22,1.7,2.3,16.3,118,3.2,3,0.26,2.03,6.38,0.94,3.31,970 59 | 13.29,1.97,2.68,16.8,102,3,3.23,0.31,1.66,6,1.07,2.84,1270 60 | 13.72,1.43,2.5,16.7,108,3.4,3.67,0.19,2.04,6.8,0.89,2.87,1285 61 | 12.37,0.94,1.36,10.6,88,1.98,0.57,0.28,0.42,1.95,1.05,1.82,520 62 | 12.33,1.1,2.28,16,101,2.05,1.09,0.63,0.41,3.27,1.25,1.67,680 63 | 12.64,1.36,2.02,16.8,100,2.02,1.41,0.53,0.62,5.75,0.98,1.59,450 64 | 13.67,1.25,1.92,18,94,2.1,1.79,0.32,0.73,3.8,1.23,2.46,630 65 | 12.37,1.13,2.16,19,87,3.5,3.1,0.19,1.87,4.45,1.22,2.87,420 66 | 12.17,1.45,2.53,19,104,1.89,1.75,0.45,1.03,2.95,1.45,2.23,355 67 | 12.37,1.21,2.56,18.1,98,2.42,2.65,0.37,2.08,4.6,1.19,2.3,678 68 | 13.11,1.01,1.7,15,78,2.98,3.18,0.26,2.28,5.3,1.12,3.18,502 69 | 12.37,1.17,1.92,19.6,78,2.11,2,0.27,1.04,4.68,1.12,3.48,510 70 | 13.34,0.94,2.36,17,110,2.53,1.3,0.55,0.42,3.17,1.02,1.93,750 71 | 12.21,1.19,1.75,16.8,151,1.85,1.28,0.14,2.5,2.85,1.28,3.07,718 72 | 12.29,1.61,2.21,20.4,103,1.1,1.02,0.37,1.46,3.05,0.906,1.82,870 73 | 13.86,1.51,2.67,25,86,2.95,2.86,0.21,1.87,3.38,1.36,3.16,410 74 | 13.49,1.66,2.24,24,87,1.88,1.84,0.27,1.03,3.74,0.98,2.78,472 75 | 12.99,1.67,2.6,30,139,3.3,2.89,0.21,1.96,3.35,1.31,3.5,985 76 | 11.96,1.09,2.3,21,101,3.38,2.14,0.13,1.65,3.21,0.99,3.13,886 77 | 11.66,1.88,1.92,16,97,1.61,1.57,0.34,1.15,3.8,1.23,2.14,428 78 | 13.03,0.9,1.71,16,86,1.95,2.03,0.24,1.46,4.6,1.19,2.48,392 79 | 11.84,2.89,2.23,18,112,1.72,1.32,0.43,0.95,2.65,0.96,2.52,500 80 | 12.33,0.99,1.95,14.8,136,1.9,1.85,0.35,2.76,3.4,1.06,2.31,750 81 | 12.7,3.87,2.4,23,101,2.83,2.55,0.43,1.95,2.57,1.19,3.13,463 82 | 12,0.92,2,19,86,2.42,2.26,0.3,1.43,2.5,1.38,3.12,278 83 | 12.72,1.81,2.2,18.8,86,2.2,2.53,0.26,1.77,3.9,1.16,3.14,714 84 | 12.08,1.13,2.51,24,78,2,1.58,0.4,1.4,2.2,1.31,2.72,630 85 | 13.05,3.86,2.32,22.5,85,1.65,1.59,0.61,1.62,4.8,0.84,2.01,515 86 | 11.84,0.89,2.58,18,94,2.2,2.21,0.22,2.35,3.05,0.79,3.08,520 87 | 12.67,0.98,2.24,18,99,2.2,1.94,0.3,1.46,2.62,1.23,3.16,450 88 | 12.16,1.61,2.31,22.8,90,1.78,1.69,0.43,1.56,2.45,1.33,2.26,495 89 | 11.65,1.67,2.62,26,88,1.92,1.61,0.4,1.34,2.6,1.36,3.21,562 90 | 11.64,2.06,2.46,21.6,84,1.95,1.69,0.48,1.35,2.8,1,2.75,680 91 | 12.08,1.33,2.3,23.6,70,2.2,1.59,0.42,1.38,1.74,1.07,3.21,625 92 | 12.08,1.83,2.32,18.5,81,1.6,1.5,0.52,1.64,2.4,1.08,2.27,480 93 | 12,1.51,2.42,22,86,1.45,1.25,0.5,1.63,3.6,1.05,2.65,450 94 | 12.69,1.53,2.26,20.7,80,1.38,1.46,0.58,1.62,3.05,0.96,2.06,495 95 | 12.29,2.83,2.22,18,88,2.45,2.25,0.25,1.99,2.15,1.15,3.3,290 96 | 11.62,1.99,2.28,18,98,3.02,2.26,0.17,1.35,3.25,1.16,2.96,345 97 | 12.47,1.52,2.2,19,162,2.5,2.27,0.32,3.28,2.6,1.16,2.63,937 98 | 11.81,2.12,2.74,21.5,134,1.6,0.99,0.14,1.56,2.5,0.95,2.26,625 99 | 12.29,1.41,1.98,16,85,2.55,2.5,0.29,1.77,2.9,1.23,2.74,428 100 | 12.37,1.07,2.1,18.5,88,3.52,3.75,0.24,1.95,4.5,1.04,2.77,660 101 | 12.29,3.17,2.21,18,88,2.85,2.99,0.45,2.81,2.3,1.42,2.83,406 102 | 12.08,2.08,1.7,17.5,97,2.23,2.17,0.26,1.4,3.3,1.27,2.96,710 103 | 12.6,1.34,1.9,18.5,88,1.45,1.36,0.29,1.35,2.45,1.04,2.77,562 104 | 12.34,2.45,2.46,21,98,2.56,2.11,0.34,1.31,2.8,0.8,3.38,438 105 | 11.82,1.72,1.88,19.5,86,2.5,1.64,0.37,1.42,2.06,0.94,2.44,415 106 | 12.51,1.73,1.98,20.5,85,2.2,1.92,0.32,1.48,2.94,1.04,3.57,672 107 | 12.42,2.55,2.27,22,90,1.68,1.84,0.66,1.42,2.7,0.86,3.3,315 108 | 12.25,1.73,2.12,19,80,1.65,2.03,0.37,1.63,3.4,1,3.17,510 109 | 12.72,1.75,2.28,22.5,84,1.38,1.76,0.48,1.63,3.3,0.88,2.42,488 110 | 12.22,1.29,1.94,19,92,2.36,2.04,0.39,2.08,2.7,0.86,3.02,312 111 | 11.61,1.35,2.7,20,94,2.74,2.92,0.29,2.49,2.65,0.96,3.26,680 112 | 11.46,3.74,1.82,19.5,107,3.18,2.58,0.24,3.58,2.9,0.75,2.81,562 113 | 12.52,2.43,2.17,21,88,2.55,2.27,0.26,1.22,2,0.9,2.78,325 114 | 11.76,2.68,2.92,20,103,1.75,2.03,0.6,1.05,3.8,1.23,2.5,607 115 | 11.41,0.74,2.5,21,88,2.48,2.01,0.42,1.44,3.08,1.1,2.31,434 116 | 12.08,1.39,2.5,22.5,84,2.56,2.29,0.43,1.04,2.9,0.93,3.19,385 117 | 11.03,1.51,2.2,21.5,85,2.46,2.17,0.52,2.01,1.9,1.71,2.87,407 118 | 11.82,1.47,1.99,20.8,86,1.98,1.6,0.3,1.53,1.95,0.95,3.33,495 119 | 12.42,1.61,2.19,22.5,108,2,2.09,0.34,1.61,2.06,1.06,2.96,345 120 | 12.77,3.43,1.98,16,80,1.63,1.25,0.43,0.83,3.4,0.7,2.12,372 121 | 12,3.43,2,19,87,2,1.64,0.37,1.87,1.28,0.93,3.05,564 122 | 11.45,2.4,2.42,20,96,2.9,2.79,0.32,1.83,3.25,0.8,3.39,625 123 | 11.56,2.05,3.23,28.5,119,3.18,5.08,0.47,1.87,6,0.93,3.69,465 124 | 12.42,4.43,2.73,26.5,102,2.2,2.13,0.43,1.71,2.08,0.92,3.12,365 125 | 13.05,5.8,2.13,21.5,86,2.62,2.65,0.3,2.01,2.6,0.73,3.1,380 126 | 11.87,4.31,2.39,21,82,2.86,3.03,0.21,2.91,2.8,0.75,3.64,380 127 | 12.07,2.16,2.17,21,85,2.6,2.65,0.37,1.35,2.76,0.86,3.28,378 128 | 12.43,1.53,2.29,21.5,86,2.74,3.15,0.39,1.77,3.94,0.69,2.84,352 129 | 11.79,2.13,2.78,28.5,92,2.13,2.24,0.58,1.76,3,0.97,2.44,466 130 | 12.37,1.63,2.3,24.5,88,2.22,2.45,0.4,1.9,2.12,0.89,2.78,342 131 | 12.04,4.3,2.38,22,80,2.1,1.75,0.42,1.35,2.6,0.79,2.57,580 132 | 12.86,1.35,2.32,18,122,1.51,1.25,0.21,0.94,4.1,0.76,1.29,630 133 | 12.88,2.99,2.4,20,104,1.3,1.22,0.24,0.83,5.4,0.74,1.42,530 134 | 12.81,2.31,2.4,24,98,1.15,1.09,0.27,0.83,5.7,0.66,1.36,560 135 | 12.7,3.55,2.36,21.5,106,1.7,1.2,0.17,0.84,5,0.78,1.29,600 136 | 12.51,1.24,2.25,17.5,85,2,0.58,0.6,1.25,5.45,0.75,1.51,650 137 | 12.6,2.46,2.2,18.5,94,1.62,0.66,0.63,0.94,7.1,0.73,1.58,695 138 | 12.25,4.72,2.54,21,89,1.38,0.47,0.53,0.8,3.85,0.75,1.27,720 139 | 12.53,5.51,2.64,25,96,1.79,0.6,0.63,1.1,5,0.82,1.69,515 140 | 13.49,3.59,2.19,19.5,88,1.62,0.48,0.58,0.88,5.7,0.81,1.82,580 141 | 12.84,2.96,2.61,24,101,2.32,0.6,0.53,0.81,4.92,0.89,2.15,590 142 | 12.93,2.81,2.7,21,96,1.54,0.5,0.53,0.75,4.6,0.77,2.31,600 143 | 13.36,2.56,2.35,20,89,1.4,0.5,0.37,0.64,5.6,0.7,2.47,780 144 | 13.52,3.17,2.72,23.5,97,1.55,0.52,0.5,0.55,4.35,0.89,2.06,520 145 | 13.62,4.95,2.35,20,92,2,0.8,0.47,1.02,4.4,0.91,2.05,550 146 | 12.25,3.88,2.2,18.5,112,1.38,0.78,0.29,1.14,8.21,0.65,2,855 147 | 13.16,3.57,2.15,21,102,1.5,0.55,0.43,1.3,4,0.6,1.68,830 148 | 13.88,5.04,2.23,20,80,0.98,0.34,0.4,0.68,4.9,0.58,1.33,415 149 | 12.87,4.61,2.48,21.5,86,1.7,0.65,0.47,0.86,7.65,0.54,1.86,625 150 | 13.32,3.24,2.38,21.5,92,1.93,0.76,0.45,1.25,8.42,0.55,1.62,650 151 | 13.08,3.9,2.36,21.5,113,1.41,1.39,0.34,1.14,9.4,0.57,1.33,550 152 | 13.5,3.12,2.62,24,123,1.4,1.57,0.22,1.25,8.6,0.59,1.3,500 153 | 12.79,2.67,2.48,22,112,1.48,1.36,0.24,1.26,10.8,0.48,1.47,480 154 | 13.11,1.9,2.75,25.5,116,2.2,1.28,0.26,1.56,7.1,0.61,1.33,425 155 | 13.23,3.3,2.28,18.5,98,1.8,0.83,0.61,1.87,10.52,0.56,1.51,675 156 | 12.58,1.29,2.1,20,103,1.48,0.58,0.53,1.4,7.6,0.58,1.55,640 157 | 13.17,5.19,2.32,22,93,1.74,0.63,0.61,1.55,7.9,0.6,1.48,725 158 | 13.84,4.12,2.38,19.5,89,1.8,0.83,0.48,1.56,9.01,0.57,1.64,480 159 | 12.45,3.03,2.64,27,97,1.9,0.58,0.63,1.14,7.5,0.67,1.73,880 160 | 14.34,1.68,2.7,25,98,2.8,1.31,0.53,2.7,13,0.57,1.96,660 161 | 13.48,1.67,2.64,22.5,89,2.6,1.1,0.52,2.29,11.75,0.57,1.78,620 162 | 12.36,3.83,2.38,21,88,2.3,0.92,0.5,1.04,7.65,0.56,1.58,520 163 | 13.69,3.26,2.54,20,107,1.83,0.56,0.5,0.8,5.88,0.96,1.82,680 164 | 12.85,3.27,2.58,22,106,1.65,0.6,0.6,0.96,5.58,0.87,2.11,570 165 | 12.96,3.45,2.35,18.5,106,1.39,0.7,0.4,0.94,5.28,0.68,1.75,675 166 | 13.78,2.76,2.3,22,90,1.35,0.68,0.41,1.03,9.58,0.7,1.68,615 167 | 13.73,4.36,2.26,22.5,88,1.28,0.47,0.52,1.15,6.62,0.78,1.75,520 168 | 13.45,3.7,2.6,23,111,1.7,0.92,0.43,1.46,10.68,0.85,1.56,695 169 | 12.82,3.37,2.3,19.5,88,1.48,0.66,0.4,0.97,10.26,0.72,1.75,685 170 | 13.58,2.58,2.69,24.5,105,1.55,0.84,0.39,1.54,8.66,0.74,1.8,750 171 | 13.4,4.6,2.86,25,112,1.98,0.96,0.27,1.11,8.5,0.67,1.92,630 172 | 12.2,3.03,2.32,19,96,1.25,0.49,0.4,0.73,5.5,0.66,1.83,510 173 | 12.77,2.39,2.28,19.5,86,1.39,0.51,0.48,0.64,9.899999,0.57,1.63,470 174 | 14.16,2.51,2.48,20,91,1.68,0.7,0.44,1.24,9.7,0.62,1.71,660 175 | 13.71,5.65,2.45,20.5,95,1.68,0.61,0.52,1.06,7.7,0.64,1.74,740 176 | 13.4,3.91,2.48,23,102,1.8,0.75,0.43,1.41,7.3,0.7,1.56,750 177 | 13.27,4.28,2.26,20,120,1.59,0.69,0.43,1.35,10.2,0.59,1.56,835 178 | 13.17,2.59,2.37,20,120,1.65,0.68,0.53,1.46,9.3,0.6,1.62,840 179 | 14.13,4.1,2.74,24.5,96,2.05,0.76,0.56,1.35,9.2,0.61,1.6,560 -------------------------------------------------------------------------------- /data/Social_Network_Ads.csv: -------------------------------------------------------------------------------- 1 | User ID,Gender,Age,EstimatedSalary,Purchased 2 | 15624510,Male,19,19000,0 3 | 15810944,Male,35,20000,0 4 | 15668575,Female,26,43000,0 5 | 15603246,Female,27,57000,0 6 | 15804002,Male,19,76000,0 7 | 15728773,Male,27,58000,0 8 | 15598044,Female,27,84000,0 9 | 15694829,Female,32,150000,1 10 | 15600575,Male,25,33000,0 11 | 15727311,Female,35,65000,0 12 | 15570769,Female,26,80000,0 13 | 15606274,Female,26,52000,0 14 | 15746139,Male,20,86000,0 15 | 15704987,Male,32,18000,0 16 | 15628972,Male,18,82000,0 17 | 15697686,Male,29,80000,0 18 | 15733883,Male,47,25000,1 19 | 15617482,Male,45,26000,1 20 | 15704583,Male,46,28000,1 21 | 15621083,Female,48,29000,1 22 | 15649487,Male,45,22000,1 23 | 15736760,Female,47,49000,1 24 | 15714658,Male,48,41000,1 25 | 15599081,Female,45,22000,1 26 | 15705113,Male,46,23000,1 27 | 15631159,Male,47,20000,1 28 | 15792818,Male,49,28000,1 29 | 15633531,Female,47,30000,1 30 | 15744529,Male,29,43000,0 31 | 15669656,Male,31,18000,0 32 | 15581198,Male,31,74000,0 33 | 15729054,Female,27,137000,1 34 | 15573452,Female,21,16000,0 35 | 15776733,Female,28,44000,0 36 | 15724858,Male,27,90000,0 37 | 15713144,Male,35,27000,0 38 | 15690188,Female,33,28000,0 39 | 15689425,Male,30,49000,0 40 | 15671766,Female,26,72000,0 41 | 15782806,Female,27,31000,0 42 | 15764419,Female,27,17000,0 43 | 15591915,Female,33,51000,0 44 | 15772798,Male,35,108000,0 45 | 15792008,Male,30,15000,0 46 | 15715541,Female,28,84000,0 47 | 15639277,Male,23,20000,0 48 | 15798850,Male,25,79000,0 49 | 15776348,Female,27,54000,0 50 | 15727696,Male,30,135000,1 51 | 15793813,Female,31,89000,0 52 | 15694395,Female,24,32000,0 53 | 15764195,Female,18,44000,0 54 | 15744919,Female,29,83000,0 55 | 15671655,Female,35,23000,0 56 | 15654901,Female,27,58000,0 57 | 15649136,Female,24,55000,0 58 | 15775562,Female,23,48000,0 59 | 15807481,Male,28,79000,0 60 | 15642885,Male,22,18000,0 61 | 15789109,Female,32,117000,0 62 | 15814004,Male,27,20000,0 63 | 15673619,Male,25,87000,0 64 | 15595135,Female,23,66000,0 65 | 15583681,Male,32,120000,1 66 | 15605000,Female,59,83000,0 67 | 15718071,Male,24,58000,0 68 | 15679760,Male,24,19000,0 69 | 15654574,Female,23,82000,0 70 | 15577178,Female,22,63000,0 71 | 15595324,Female,31,68000,0 72 | 15756932,Male,25,80000,0 73 | 15726358,Female,24,27000,0 74 | 15595228,Female,20,23000,0 75 | 15782530,Female,33,113000,0 76 | 15592877,Male,32,18000,0 77 | 15651983,Male,34,112000,1 78 | 15746737,Male,18,52000,0 79 | 15774179,Female,22,27000,0 80 | 15667265,Female,28,87000,0 81 | 15655123,Female,26,17000,0 82 | 15595917,Male,30,80000,0 83 | 15668385,Male,39,42000,0 84 | 15709476,Male,20,49000,0 85 | 15711218,Male,35,88000,0 86 | 15798659,Female,30,62000,0 87 | 15663939,Female,31,118000,1 88 | 15694946,Male,24,55000,0 89 | 15631912,Female,28,85000,0 90 | 15768816,Male,26,81000,0 91 | 15682268,Male,35,50000,0 92 | 15684801,Male,22,81000,0 93 | 15636428,Female,30,116000,0 94 | 15809823,Male,26,15000,0 95 | 15699284,Female,29,28000,0 96 | 15786993,Female,29,83000,0 97 | 15709441,Female,35,44000,0 98 | 15710257,Female,35,25000,0 99 | 15582492,Male,28,123000,1 100 | 15575694,Male,35,73000,0 101 | 15756820,Female,28,37000,0 102 | 15766289,Male,27,88000,0 103 | 15593014,Male,28,59000,0 104 | 15584545,Female,32,86000,0 105 | 15675949,Female,33,149000,1 106 | 15672091,Female,19,21000,0 107 | 15801658,Male,21,72000,0 108 | 15706185,Female,26,35000,0 109 | 15789863,Male,27,89000,0 110 | 15720943,Male,26,86000,0 111 | 15697997,Female,38,80000,0 112 | 15665416,Female,39,71000,0 113 | 15660200,Female,37,71000,0 114 | 15619653,Male,38,61000,0 115 | 15773447,Male,37,55000,0 116 | 15739160,Male,42,80000,0 117 | 15689237,Male,40,57000,0 118 | 15679297,Male,35,75000,0 119 | 15591433,Male,36,52000,0 120 | 15642725,Male,40,59000,0 121 | 15701962,Male,41,59000,0 122 | 15811613,Female,36,75000,0 123 | 15741049,Male,37,72000,0 124 | 15724423,Female,40,75000,0 125 | 15574305,Male,35,53000,0 126 | 15678168,Female,41,51000,0 127 | 15697020,Female,39,61000,0 128 | 15610801,Male,42,65000,0 129 | 15745232,Male,26,32000,0 130 | 15722758,Male,30,17000,0 131 | 15792102,Female,26,84000,0 132 | 15675185,Male,31,58000,0 133 | 15801247,Male,33,31000,0 134 | 15725660,Male,30,87000,0 135 | 15638963,Female,21,68000,0 136 | 15800061,Female,28,55000,0 137 | 15578006,Male,23,63000,0 138 | 15668504,Female,20,82000,0 139 | 15687491,Male,30,107000,1 140 | 15610403,Female,28,59000,0 141 | 15741094,Male,19,25000,0 142 | 15807909,Male,19,85000,0 143 | 15666141,Female,18,68000,0 144 | 15617134,Male,35,59000,0 145 | 15783029,Male,30,89000,0 146 | 15622833,Female,34,25000,0 147 | 15746422,Female,24,89000,0 148 | 15750839,Female,27,96000,1 149 | 15749130,Female,41,30000,0 150 | 15779862,Male,29,61000,0 151 | 15767871,Male,20,74000,0 152 | 15679651,Female,26,15000,0 153 | 15576219,Male,41,45000,0 154 | 15699247,Male,31,76000,0 155 | 15619087,Female,36,50000,0 156 | 15605327,Male,40,47000,0 157 | 15610140,Female,31,15000,0 158 | 15791174,Male,46,59000,0 159 | 15602373,Male,29,75000,0 160 | 15762605,Male,26,30000,0 161 | 15598840,Female,32,135000,1 162 | 15744279,Male,32,100000,1 163 | 15670619,Male,25,90000,0 164 | 15599533,Female,37,33000,0 165 | 15757837,Male,35,38000,0 166 | 15697574,Female,33,69000,0 167 | 15578738,Female,18,86000,0 168 | 15762228,Female,22,55000,0 169 | 15614827,Female,35,71000,0 170 | 15789815,Male,29,148000,1 171 | 15579781,Female,29,47000,0 172 | 15587013,Male,21,88000,0 173 | 15570932,Male,34,115000,0 174 | 15794661,Female,26,118000,0 175 | 15581654,Female,34,43000,0 176 | 15644296,Female,34,72000,0 177 | 15614420,Female,23,28000,0 178 | 15609653,Female,35,47000,0 179 | 15594577,Male,25,22000,0 180 | 15584114,Male,24,23000,0 181 | 15673367,Female,31,34000,0 182 | 15685576,Male,26,16000,0 183 | 15774727,Female,31,71000,0 184 | 15694288,Female,32,117000,1 185 | 15603319,Male,33,43000,0 186 | 15759066,Female,33,60000,0 187 | 15814816,Male,31,66000,0 188 | 15724402,Female,20,82000,0 189 | 15571059,Female,33,41000,0 190 | 15674206,Male,35,72000,0 191 | 15715160,Male,28,32000,0 192 | 15730448,Male,24,84000,0 193 | 15662067,Female,19,26000,0 194 | 15779581,Male,29,43000,0 195 | 15662901,Male,19,70000,0 196 | 15689751,Male,28,89000,0 197 | 15667742,Male,34,43000,0 198 | 15738448,Female,30,79000,0 199 | 15680243,Female,20,36000,0 200 | 15745083,Male,26,80000,0 201 | 15708228,Male,35,22000,0 202 | 15628523,Male,35,39000,0 203 | 15708196,Male,49,74000,0 204 | 15735549,Female,39,134000,1 205 | 15809347,Female,41,71000,0 206 | 15660866,Female,58,101000,1 207 | 15766609,Female,47,47000,0 208 | 15654230,Female,55,130000,1 209 | 15794566,Female,52,114000,0 210 | 15800890,Female,40,142000,1 211 | 15697424,Female,46,22000,0 212 | 15724536,Female,48,96000,1 213 | 15735878,Male,52,150000,1 214 | 15707596,Female,59,42000,0 215 | 15657163,Male,35,58000,0 216 | 15622478,Male,47,43000,0 217 | 15779529,Female,60,108000,1 218 | 15636023,Male,49,65000,0 219 | 15582066,Male,40,78000,0 220 | 15666675,Female,46,96000,0 221 | 15732987,Male,59,143000,1 222 | 15789432,Female,41,80000,0 223 | 15663161,Male,35,91000,1 224 | 15694879,Male,37,144000,1 225 | 15593715,Male,60,102000,1 226 | 15575002,Female,35,60000,0 227 | 15622171,Male,37,53000,0 228 | 15795224,Female,36,126000,1 229 | 15685346,Male,56,133000,1 230 | 15691808,Female,40,72000,0 231 | 15721007,Female,42,80000,1 232 | 15794253,Female,35,147000,1 233 | 15694453,Male,39,42000,0 234 | 15813113,Male,40,107000,1 235 | 15614187,Male,49,86000,1 236 | 15619407,Female,38,112000,0 237 | 15646227,Male,46,79000,1 238 | 15660541,Male,40,57000,0 239 | 15753874,Female,37,80000,0 240 | 15617877,Female,46,82000,0 241 | 15772073,Female,53,143000,1 242 | 15701537,Male,42,149000,1 243 | 15736228,Male,38,59000,0 244 | 15780572,Female,50,88000,1 245 | 15769596,Female,56,104000,1 246 | 15586996,Female,41,72000,0 247 | 15722061,Female,51,146000,1 248 | 15638003,Female,35,50000,0 249 | 15775590,Female,57,122000,1 250 | 15730688,Male,41,52000,0 251 | 15753102,Female,35,97000,1 252 | 15810075,Female,44,39000,0 253 | 15723373,Male,37,52000,0 254 | 15795298,Female,48,134000,1 255 | 15584320,Female,37,146000,1 256 | 15724161,Female,50,44000,0 257 | 15750056,Female,52,90000,1 258 | 15609637,Female,41,72000,0 259 | 15794493,Male,40,57000,0 260 | 15569641,Female,58,95000,1 261 | 15815236,Female,45,131000,1 262 | 15811177,Female,35,77000,0 263 | 15680587,Male,36,144000,1 264 | 15672821,Female,55,125000,1 265 | 15767681,Female,35,72000,0 266 | 15600379,Male,48,90000,1 267 | 15801336,Female,42,108000,1 268 | 15721592,Male,40,75000,0 269 | 15581282,Male,37,74000,0 270 | 15746203,Female,47,144000,1 271 | 15583137,Male,40,61000,0 272 | 15680752,Female,43,133000,0 273 | 15688172,Female,59,76000,1 274 | 15791373,Male,60,42000,1 275 | 15589449,Male,39,106000,1 276 | 15692819,Female,57,26000,1 277 | 15727467,Male,57,74000,1 278 | 15734312,Male,38,71000,0 279 | 15764604,Male,49,88000,1 280 | 15613014,Female,52,38000,1 281 | 15759684,Female,50,36000,1 282 | 15609669,Female,59,88000,1 283 | 15685536,Male,35,61000,0 284 | 15750447,Male,37,70000,1 285 | 15663249,Female,52,21000,1 286 | 15638646,Male,48,141000,0 287 | 15734161,Female,37,93000,1 288 | 15631070,Female,37,62000,0 289 | 15761950,Female,48,138000,1 290 | 15649668,Male,41,79000,0 291 | 15713912,Female,37,78000,1 292 | 15586757,Male,39,134000,1 293 | 15596522,Male,49,89000,1 294 | 15625395,Male,55,39000,1 295 | 15760570,Male,37,77000,0 296 | 15566689,Female,35,57000,0 297 | 15725794,Female,36,63000,0 298 | 15673539,Male,42,73000,1 299 | 15705298,Female,43,112000,1 300 | 15675791,Male,45,79000,0 301 | 15747043,Male,46,117000,1 302 | 15736397,Female,58,38000,1 303 | 15678201,Male,48,74000,1 304 | 15720745,Female,37,137000,1 305 | 15637593,Male,37,79000,1 306 | 15598070,Female,40,60000,0 307 | 15787550,Male,42,54000,0 308 | 15603942,Female,51,134000,0 309 | 15733973,Female,47,113000,1 310 | 15596761,Male,36,125000,1 311 | 15652400,Female,38,50000,0 312 | 15717893,Female,42,70000,0 313 | 15622585,Male,39,96000,1 314 | 15733964,Female,38,50000,0 315 | 15753861,Female,49,141000,1 316 | 15747097,Female,39,79000,0 317 | 15594762,Female,39,75000,1 318 | 15667417,Female,54,104000,1 319 | 15684861,Male,35,55000,0 320 | 15742204,Male,45,32000,1 321 | 15623502,Male,36,60000,0 322 | 15774872,Female,52,138000,1 323 | 15611191,Female,53,82000,1 324 | 15674331,Male,41,52000,0 325 | 15619465,Female,48,30000,1 326 | 15575247,Female,48,131000,1 327 | 15695679,Female,41,60000,0 328 | 15713463,Male,41,72000,0 329 | 15785170,Female,42,75000,0 330 | 15796351,Male,36,118000,1 331 | 15639576,Female,47,107000,1 332 | 15693264,Male,38,51000,0 333 | 15589715,Female,48,119000,1 334 | 15769902,Male,42,65000,0 335 | 15587177,Male,40,65000,0 336 | 15814553,Male,57,60000,1 337 | 15601550,Female,36,54000,0 338 | 15664907,Male,58,144000,1 339 | 15612465,Male,35,79000,0 340 | 15810800,Female,38,55000,0 341 | 15665760,Male,39,122000,1 342 | 15588080,Female,53,104000,1 343 | 15776844,Male,35,75000,0 344 | 15717560,Female,38,65000,0 345 | 15629739,Female,47,51000,1 346 | 15729908,Male,47,105000,1 347 | 15716781,Female,41,63000,0 348 | 15646936,Male,53,72000,1 349 | 15768151,Female,54,108000,1 350 | 15579212,Male,39,77000,0 351 | 15721835,Male,38,61000,0 352 | 15800515,Female,38,113000,1 353 | 15591279,Male,37,75000,0 354 | 15587419,Female,42,90000,1 355 | 15750335,Female,37,57000,0 356 | 15699619,Male,36,99000,1 357 | 15606472,Male,60,34000,1 358 | 15778368,Male,54,70000,1 359 | 15671387,Female,41,72000,0 360 | 15573926,Male,40,71000,1 361 | 15709183,Male,42,54000,0 362 | 15577514,Male,43,129000,1 363 | 15778830,Female,53,34000,1 364 | 15768072,Female,47,50000,1 365 | 15768293,Female,42,79000,0 366 | 15654456,Male,42,104000,1 367 | 15807525,Female,59,29000,1 368 | 15574372,Female,58,47000,1 369 | 15671249,Male,46,88000,1 370 | 15779744,Male,38,71000,0 371 | 15624755,Female,54,26000,1 372 | 15611430,Female,60,46000,1 373 | 15774744,Male,60,83000,1 374 | 15629885,Female,39,73000,0 375 | 15708791,Male,59,130000,1 376 | 15793890,Female,37,80000,0 377 | 15646091,Female,46,32000,1 378 | 15596984,Female,46,74000,0 379 | 15800215,Female,42,53000,0 380 | 15577806,Male,41,87000,1 381 | 15749381,Female,58,23000,1 382 | 15683758,Male,42,64000,0 383 | 15670615,Male,48,33000,1 384 | 15715622,Female,44,139000,1 385 | 15707634,Male,49,28000,1 386 | 15806901,Female,57,33000,1 387 | 15775335,Male,56,60000,1 388 | 15724150,Female,49,39000,1 389 | 15627220,Male,39,71000,0 390 | 15672330,Male,47,34000,1 391 | 15668521,Female,48,35000,1 392 | 15807837,Male,48,33000,1 393 | 15592570,Male,47,23000,1 394 | 15748589,Female,45,45000,1 395 | 15635893,Male,60,42000,1 396 | 15757632,Female,39,59000,0 397 | 15691863,Female,46,41000,1 398 | 15706071,Male,51,23000,1 399 | 15654296,Female,50,20000,1 400 | 15755018,Male,36,33000,0 401 | 15594041,Female,49,36000,1 -------------------------------------------------------------------------------- /data/x-y.csv: -------------------------------------------------------------------------------- 1 | x,y 2 | 24,21.54945196 3 | 50,47.46446305 4 | 15,17.21865634 5 | 38,36.58639803 6 | 87,87.28898389 7 | 36,32.46387493 8 | 12,10.78089683 9 | 81,80.7633986 10 | 25,24.61215147 11 | 5,6.963319071 12 | 16,11.23757338 13 | 16,13.53290206 14 | 24,24.60323899 15 | 39,39.40049976 16 | 54,48.43753838 17 | 60,61.69900319 18 | 26,26.92832418 19 | 73,70.4052055 20 | 29,29.34092408 21 | 31,25.30895192 22 | 68,69.02934339 23 | 87,84.99484703 24 | 58,57.04310305 25 | 54,50.5921991 26 | 84,83.02772202 27 | 58,57.05752706 28 | 49,47.95883341 29 | 20,24.34226432 30 | 90,94.68488281 31 | 48,48.03970696 32 | 4,7.08132338 33 | 25,21.99239907 34 | 42,42.33151664 35 | 0,0.329089443 36 | 60,61.92303698 37 | 93,91.17716423 38 | 39,39.45358014 39 | 7,5.996069607 40 | 21,22.59015942 41 | 68,61.18044414 42 | 84,85.02778957 43 | 0,-1.28631089 44 | 58,61.94273962 45 | 19,21.96033347 46 | 36,33.66194193 47 | 19,17.60946242 48 | 59,58.5630564 49 | 51,52.82390762 50 | 19,22.1363481 51 | 33,35.07467353 52 | 85,86.18822311 53 | 44,42.63227697 54 | 5,4.09817744 55 | 59,61.2229864 56 | 14,17.70677576 57 | 9,11.85312574 58 | 75,80.23051695 59 | 69,62.64931741 60 | 10,9.616859804 61 | 17,20.02797699 62 | 58,61.7510743 63 | 74,71.61010303 64 | 21,23.77154623 65 | 51,51.90142035 66 | 19,22.66073682 67 | 50,50.02897927 68 | 24,26.68794368 69 | 0,0.376911899 70 | 12,6.806419002 71 | 75,77.33986001 72 | 21,28.90260209 73 | 64,66.7346608 74 | 5,0.707510638 75 | 58,57.07748383 76 | 32,28.41453196 77 | 41,44.46272123 78 | 7,7.459605998 79 | 4,2.316708112 80 | 5,4.928546187 81 | 49,52.50336074 82 | 90,91.19109623 83 | 3,8.489164326 84 | 11,6.963371967 85 | 32,31.97989959 86 | 83,81.4281205 87 | 25,22.62365422 88 | 83,78.52505087 89 | 26,25.80714057 90 | 76,73.51081775 91 | 95,91.775467 92 | 53,49.21863516 93 | 77,80.50445387 94 | 42,50.05636123 95 | 25,25.46292549 96 | 54,55.32164264 97 | 55,59.1244888 98 | 0,1.100686692 99 | 73,71.98020786 100 | 35,30.13666408 101 | 86,83.88427405 102 | 90,89.91004752 103 | 13,8.335654576 104 | 46,47.88388961 105 | 46,45.00397413 106 | 32,31.15664574 107 | 8,9.190375682 108 | 71,74.83135003 109 | 28,30.23177607 110 | 24,24.21914027 111 | 56,57.87219151 112 | 49,50.61728392 113 | 79,78.67470043 114 | 90,86.236707 115 | 89,89.10409255 116 | 41,43.26595082 117 | 27,26.68273277 118 | 58,59.46383041 119 | 26,28.90055826 120 | 31,31.300416 121 | 70,71.1433266 122 | 71,68.4739206 123 | 39,39.98238856 124 | 7,4.075776144 125 | 48,47.85817542 126 | 56,51.20390217 127 | 45,43.9367213 128 | 41,38.13626679 129 | 3,3.574661632 130 | 37,36.4139958 131 | 24,22.21908523 132 | 68,63.5312572 133 | 47,49.86702787 134 | 27,21.53140009 135 | 68,64.05710234 136 | 74,70.77549842 137 | 95,92.15749762 138 | 79,81.22259156 139 | 21,25.10114067 140 | 95,94.08853397 141 | 54,53.25166165 142 | 56,59.16236621 143 | 80,75.24148428 144 | 26,28.22325833 145 | 25,25.33323728 146 | 8,6.364615703 147 | 95,95.4609216 148 | 94,88.64183756 149 | 54,58.70318693 150 | 7,6.815491279 151 | 99,99.40394676 152 | 36,32.77049249 153 | 48,47.0586788 154 | 65,60.53321778 155 | 42,40.30929858 156 | 93,89.42222685 157 | 86,86.82132066 158 | 26,26.11697543 159 | 51,53.26657596 160 | 100,96.62327888 161 | 94,95.78441027 162 | 6,6.047286687 163 | 24,24.47387908 164 | 75,75.96844763 165 | 7,3.829381009 166 | 53,52.51703683 167 | 73,72.80457527 168 | 16,14.10999096 169 | 80,80.86087062 170 | 77,77.01988215 171 | 89,86.26972444 172 | 80,77.13735466 173 | 55,51.47649476 174 | 19,17.34557531 175 | 56,57.72853572 176 | 47,44.15029394 177 | 56,59.24362743 178 | 2,-1.053275611 179 | 82,86.79002254 180 | 57,60.14031858 181 | 44,44.04222058 182 | 26,24.5227488 183 | 52,52.95305521 184 | 41,43.16133498 185 | 44,45.67562576 186 | 3,-2.830749501 187 | 31,29.19693178 188 | 97,96.49812401 189 | 21,22.5453232 190 | 17,20.10741433 191 | 7,4.035430253 192 | 61,61.14568518 193 | 10,13.97163653 194 | 52,55.34529893 195 | 10,12.18441166 196 | 65,64.00077658 197 | 71,70.3188322 198 | 4,-0.936895047 199 | 24,18.91422276 200 | 26,23.87590331 201 | 51,47.5775361 202 | 42,43.2736092 203 | 62,66.48278755 204 | 74,75.72605529 205 | 77,80.59643338 206 | 3,-2.235879852 207 | 50,47.04654956 208 | 24,21.59635575 209 | 37,32.87558963 210 | 58,57.95782956 211 | 52,52.24760027 212 | 27,24.58286902 213 | 14,12.12573805 214 | 100,100.0158026 215 | 3530.15736917 216 | 72,74.04682658 217 | 5,1.611947467 218 | 71,70.36836307 219 | 54,52.26831735 220 | 84,83.1286166 221 | 42,43.64765048 222 | 54,49.44785426 223 | 74,72.6356699 224 | 54,52.78130641 225 | 53,57.11195136 226 | 78,79.1050629 227 | 97,101.6228548 228 | 49,53.5825402 229 | 71,68.92139297 230 | 48,46.9666961 231 | 51,51.02642868 232 | 89,85.52073551 233 | 99,99.51685756 234 | 93,94.63911256 235 | 49,46.78357742 236 | 18,21.21321959 237 | 65,58.37266004 238 | 83,87.22059677 239 | 100,102.4967859 240 | 41,43.88314335 241 | 52,53.06655757 242 | 29,26.33464785 243 | 97,98.52008934 244 | 7,9.400497579 245 | 51,52.94026699 246 | 58,53.83020877 247 | 50,45.94511142 248 | 67,65.0132736 249 | 89,86.5069584 250 | 76,75.63280796 251 | 35,36.78035027 252 | 99,100.5328916 253 | 31,29.04466136 254 | 52,51.70352433 255 | 11,9.199954718 256 | 66,71.70015848 257 | 50,49.82634062 258 | 39,37.49971096 259 | 60,53.65084683 260 | 35,33.92561965 261 | 53,49.92639685 262 | 14,8.148154262 263 | 49,49.72359037 264 | 16,16.16712757 265 | 76,75.30033002 266 | 13,9.577368568 267 | 51,48.38088357 268 | 70,72.95331671 269 | 98,92.59573853 270 | 86,88.85523586 271 | 100,99.00361771 272 | 46,45.09439571 273 | 51,46.94362684 274 | 50,48.33449605 275 | 91,94.92329574 276 | 48,47.78165248 277 | 81,81.28960746 278 | 38,37.83155021 279 | 40,39.69185252 280 | 79,76.92664854 281 | 96,88.02990531 282 | 60,56.99178872 283 | 70,72.58929383 284 | 44,44.98103442 285 | 11,11.99017641 286 | 6,1.919513328 287 | 5,1.628826073 288 | 72,66.27746655 289 | 55,57.53887255 290 | 95,94.70291077 291 | 41,41.21469904 292 | 25,25.04169243 293 | 1,3.778209914 294 | 55,50.50711779 295 | 4,9.682408486 296 | 48,48.88147608 297 | 55,54.40348599 298 | 75,71.70233156 299 | 68,69.35848388 300 | 100,99.98491591 301 | 25,26.03323718 302 | 75,75.48910307 303 | 34,36.59623056 304 | 38,40.95102191 305 | 92,86.78316267 306 | 21,15.50701184 307 | 88,85.86077871 308 | 75,79.20610113 309 | 76,80.80643766 310 | 44,48.59717283 311 | 10,13.93415049 312 | 21,27.3051179 313 | 16,14.00226297 314 | 32,33.67416 315 | 13,13.11612884 316 | 26,24.76649193 317 | 70,73.68477876 318 | 77,77.53149541 319 | 77,76.24503196 320 | 88,88.0578931 321 | 35,35.02445799 322 | 24,21.65857739 323 | 17,17.33681562 324 | 91,94.36778957 325 | 32,33.43396307 326 | 36,32.52179399 327 | 89,90.57741298 328 | 69,71.25634126 329 | 30,31.23212856 330 | 6,5.398840061 331 | 22,18.56241391 332 | 67,71.97121038 333 | 9,5.225759566 334 | 74,73.5964342 335 | 50,49.76948983 336 | 85,82.69087513 337 | 3,1.652309089 338 | 0,-3.836652144 339 | 59,62.03811556 340 | 62,61.26514581 341 | 17,13.24991628 342 | 90,88.61672694 343 | 23,21.13655528 344 | 19,23.85017475 345 | 93,92.01203405 346 | 14,10.26712261 347 | 58,54.14681616 348 | 87,87.00645713 349 | 37,37.69447352 350 | 20,19.62278654 351 | 35,34.78561007 352 | 63,62.03190983 353 | 56,52.67003801 354 | 62,58.09031476 355 | 98,97.19448821 356 | 90,90.50155298 357 | 51,50.5123462 358 | 93,94.45211871 359 | 22,21.10794636 360 | 38,37.36298431 361 | 13,10.28574844 362 | 98,96.04932416 363 | 99,100.0953697 364 | 31,30.6063167 365 | 94,96.19000542 366 | 73,71.30828034 367 | 37,34.59311043 368 | 23,19.02332876 369 | 11,10.76669688 370 | 88,90.5799868 371 | 47,48.71787679 372 | 79,78.74139764 373 | 91,85.23492274 374 | 71,71.65789964 375 | 10,8.938990554 376 | 39,39.89606046 377 | 92,91.85091116 378 | 99,99.11200375 379 | 28,26.22196486 380 | 32,33.21584226 381 | 32,35.72392691 382 | 75,76.88604495 383 | 99,99.30874567 384 | 27,25.77161074 385 | 64,67.85169407 386 | 98,98.50371084 387 | 38,31.11331895 388 | 46,45.51171028 389 | 13,12.65537808 390 | 96,95.56065366 391 | 9,9.526431641 392 | 34,36.10893209 393 | 49,46.43628318 394 | 1,-3.83998112 395 | 50,48.97302037 396 | 94,93.25305499 397 | 27,23.47650968 398 | 20,17.13551132 399 | 12,14.55896144 400 | 45,41.53992729 401 | 91,91.64730552 402 | 61,66.16652565 403 | 10,9.230857489 404 | 47,47.41377893 405 | 33,34.76441561 406 | 84,86.10796637 407 | 24,21.81267954 408 | 48,48.89963951 409 | 48,46.78108638 410 | 9,12.91328547 411 | 93,94.55203143 412 | 99,94.97068753 413 | 8,2.379172481 414 | 20,21.47982988 415 | 38,35.79795462 416 | 78,82.0763803 417 | 81,78.87097714 418 | 42,47.2492425 419 | 95,96.18852325 420 | 78,78.38491927 421 | 44,42.94274064 422 | 68,64.43231595 423 | 87,84.21191485 424 | 58,57.3069783 425 | 52,52.52101436 426 | 26,25.7440243 427 | 75,75.42283401 428 | 48,53.62523007 429 | 71,75.14466308 430 | 77,74.12151511 431 | 34,36.24807243 432 | 24,20.21665898 433 | 70,66.94758118 434 | 29,34.07278254 435 | 76,73.13850045 436 | 98,92.85929155 437 | 28,28.36793808 438 | 87,85.59308727 439 | 9,10.68453755 440 | 87,86.10708624 441 | 33,33.22031418 442 | 64,66.09563422 443 | 17,19.30486546 444 | 49,48.84542083 445 | 95,93.73176312 446 | 75,75.45758614 447 | 89,91.24239226 448 | 81,87.15690853 449 | 25,25.53752833 450 | 47,46.06629478 451 | 50,49.65277661 452 | 5,7.382244165 453 | 68,71.11189935 454 | 84,83.50570521 455 | 8,8.791139893 456 | 41,33.30638903 457 | 26,26.40362524 458 | 89,91.72960726 459 | 78,82.53030719 460 | 34,36.67762733 461 | 92,86.98450355 462 | 27,32.34784175 463 | 12,16.78353974 464 | 2,1.576584383 465 | 22,17.4618141 466 | 0,2.116113029 467 | 26,24.34804332 468 | 50,48.29491198 469 | 84,85.52145453 470 | 70,73.71434779 471 | 66,63.15189497 472 | 42,38.46213684 473 | 19,19.47100788 474 | 94,94.07428225 475 | 71,67.92051286 476 | 19,22.58096241 477 | 16,16.01629889 478 | 49,48.43307886 479 | 29,29.6673599 480 | 29,26.65566328 481 | 86,86.28206739 482 | 50,50.82304924 483 | 86,88.57251713 484 | 30,32.59980745 485 | 23,21.02469368 486 | 20,20.72894979 487 | 16,20.38051187 488 | 57,57.25180153 489 | 8,6.967537054 490 | 8,10.240085 491 | 62,64.94841088 492 | 55,55.35893915 493 | 30,31.24365589 494 | 86,90.72048818 495 | 62,58.750127 496 | 51,55.85003198 497 | 61,60.19925869 498 | 86,85.03295412 499 | 61,60.38823085 500 | 21,18.44679787 501 | 81,82.18839247 502 | 97,94.2963344 503 | 5,7.682024586 504 | 61,61.01858089 505 | 47,53.60562216 506 | 98,94.47728801 507 | 30,27.9645947 508 | 63,62.55662585 509 | 0,1.406254414 510 | 100,101.7003412 511 | 18,13.84973988 512 | 30,28.99769315 513 | 98,99.04315693 514 | 16,15.56135514 515 | 22,24.63528393 516 | 55,53.98393374 517 | 43,42.91449728 518 | 75,74.29662112 519 | 91,91.17012883 520 | 46,49.42440876 521 | 85,82.47683519 522 | 55,56.15303953 523 | 36,37.17063131 524 | 49,46.36928662 525 | 94,97.02383456 526 | 43,40.83182104 527 | 22,24.08498313 528 | 37,41.14386358 529 | 24,21.97388066 530 | 95,100.740897 531 | 61,61.19971596 532 | 75,74.39517002 533 | 68,69.04377173 534 | 58,56.68718792 535 | 5,5.860391715 536 | 53,55.72021356 537 | 80,79.22021816 538 | 83,86.30177517 539 | 25,25.26971886 540 | 34,36.33294447 541 | 26,27.65574228 542 | 90,94.79690531 543 | 60,58.67366671 544 | 49,56.15934471 545 | 19,18.40919388 546 | 92,86.26936988 547 | 29,26.59436195 548 | 8,8.452520159 549 | 57,56.18131518 550 | 29,27.65452669 551 | 19,20.87391785 552 | 81,77.83354439 553 | 50,50.01787825 554 | 15,9.290856256 555 | 70,75.0284725 556 | 39,38.3037698 557 | 43,44.70786405 558 | 21,22.51016575 559 | 98,102.4959452 560 | 86,86.76845244 561 | 16,13.89748578 562 | 25,24.81824269 563 | 31,33.94224862 564 | 93,92.26970059 565 | 67,68.73365081 566 | 49,47.38516883 567 | 25,32.37576914 568 | 88,87.67388681 569 | 54,54.57648371 570 | 21,18.06450222 571 | 8,7.896539841 572 | 32,35.00341078 573 | 35,36.72823317 574 | 67,65.84975426 575 | 90,89.59295492 576 | 59,61.69026202 577 | 15,11.60499315 578 | 67,71.0826803 579 | 42,43.71901164 580 | 44,41.57421008 581 | 77,74.25552425 582 | 68,66.28310437 583 | 36,36.62438077 584 | 11,10.32374866 585 | 10,7.156457657 586 | 65,67.88603132 587 | 98,101.1097591 588 | 98,98.6132033 589 | 49,50.19083844 590 | 31,27.83896261 591 | 56,55.9249564 592 | 70,76.47340872 593 | 91,92.05756378 594 | 25,27.35245439 595 | 54,55.32083476 596 | 39,41.39990349 597 | 91,93.59057024 598 | 3,5.297054029 599 | 22,21.01429422 600 | 2,2.267059451 601 | 2,-0.121860502 602 | 65,66.49546208 603 | 71,73.83637687 604 | 42,42.10140878 605 | 76,77.35135732 606 | 43,41.02251779 607 | 8,14.75305272 608 | 86,83.28199022 609 | 87,89.93374342 610 | 3,2.286571686 611 | 58,55.61421297 612 | 62,62.15313408 613 | 89,89.55803528 614 | 95,94.00291863 615 | 28,26.78023848 616 | 0,-0.764537626 617 | 1,0.282866003 618 | 49,44.26800515 619 | 21,19.85174138 620 | 46,47.15960005 621 | 11,8.359366572 622 | 89,92.08157084 623 | 37,41.88734051 624 | 29,30.5413129 625 | 44,46.87654473 626 | 96,96.35659485 627 | 16,17.9170699 628 | 74,71.67949917 629 | 35,32.64997554 630 | 42,39.34482965 631 | 16,17.03401999 632 | 56,52.87524074 633 | 18,15.85414849 634 | 100,108.8716183 635 | 54,49.30477253 636 | 92,89.4749477 637 | 63,63.67348242 638 | 81,83.78410946 639 | 73,73.51136922 640 | 48,46.80297244 641 | 1,5.809946802 642 | 85,85.23027975 643 | 14,10.58213964 644 | 25,21.37698317 645 | 45,46.0537745 646 | 98,95.2389253 647 | 97,94.15149206 648 | 58,54.54868046 649 | 93,87.36260449 650 | 88,88.47741598 651 | 89,84.48045678 652 | 47,48.79647071 653 | 6,10.76675683 654 | 34,30.48882921 655 | 30,29.76846185 656 | 16,13.51574749 657 | 86,86.12955884 658 | 40,43.30022747 659 | 52,51.92110232 660 | 15,16.49185287 661 | 4,7.998073432 662 | 95,97.66689567 663 | 99,89.80545367 664 | 35,38.07166567 665 | 58,60.27852322 666 | 10,6.709195759 667 | 16,18.35488924 668 | 53,56.37058203 669 | 58,62.80064204 670 | 42,41.25155632 671 | 24,19.42637541 672 | 84,82.88935804 673 | 64,63.61364981 674 | 12,11.29627199 675 | 61,60.02274882 676 | 75,72.60339326 677 | 15,11.87964573 678 | 100,100.7012737 679 | 43,45.12420809 680 | 13,14.81106804 681 | 48,48.09368034 682 | 45,42.29145672 683 | 52,52.73389794 684 | 34,36.72396986 685 | 30,28.64535198 686 | 65,62.16675273 687 | 100,95.58459518 688 | 67,66.04325304 689 | 99,99.9566225 690 | 45,46.14941984 691 | 87,89.13754963 692 | 73,69.71787806 693 | 9,12.31736648 694 | 81,78.20296268 695 | 72,71.30995371 696 | 81,81.45544709 697 | 58,58.59500642 698 | 93,94.62509374 699 | 82,88.60376995 700 | 66,63.64868529 701 | 97,94.9752655 -------------------------------------------------------------------------------- /data/Realestate.csv: -------------------------------------------------------------------------------- 1 | No,X1 transaction date,X2 house age,X3 distance to the nearest MRT station,X4 number of convenience stores,X5 latitude,X6 longitude,Y house price of unit area 2 | 1,2012.917,32,84.87882,10,24.98298,121.54024,37.9 3 | 2,2012.917,19.5,306.5947,9,24.98034,121.53951,42.2 4 | 3,2013.583,13.3,561.9845,5,24.98746,121.54391,47.3 5 | 4,2013.500,13.3,561.9845,5,24.98746,121.54391,54.8 6 | 5,2012.833,5,390.5684,5,24.97937,121.54245,43.1 7 | 6,2012.667,7.1,2175.03,3,24.96305,121.51254,32.1 8 | 7,2012.667,34.5,623.4731,7,24.97933,121.53642,40.3 9 | 8,2013.417,20.3,287.6025,6,24.98042,121.54228,46.7 10 | 9,2013.500,31.7,5512.038,1,24.95095,121.48458,18.8 11 | 10,2013.417,17.9,1783.18,3,24.96731,121.51486,22.1 12 | 11,2013.083,34.8,405.2134,1,24.97349,121.53372,41.4 13 | 12,2013.333,6.3,90.45606,9,24.97433,121.5431,58.1 14 | 13,2012.917,13,492.2313,5,24.96515,121.53737,39.3 15 | 14,2012.667,20.4,2469.645,4,24.96108,121.51046,23.8 16 | 15,2013.500,13.2,1164.838,4,24.99156,121.53406,34.3 17 | 16,2013.583,35.7,579.2083,2,24.9824,121.54619,50.5 18 | 17,2013.250,0,292.9978,6,24.97744,121.54458,70.1 19 | 18,2012.750,17.7,350.8515,1,24.97544,121.53119,37.4 20 | 19,2013.417,16.9,368.1363,8,24.9675,121.54451,42.3 21 | 20,2012.667,1.5,23.38284,7,24.96772,121.54102,47.7 22 | 21,2013.417,4.5,2275.877,3,24.96314,121.51151,29.3 23 | 22,2013.417,10.5,279.1726,7,24.97528,121.54541,51.6 24 | 23,2012.917,14.7,1360.139,1,24.95204,121.54842,24.6 25 | 24,2013.083,10.1,279.1726,7,24.97528,121.54541,47.9 26 | 25,2013.000,39.6,480.6977,4,24.97353,121.53885,38.8 27 | 26,2013.083,29.3,1487.868,2,24.97542,121.51726,27 28 | 27,2012.667,3.1,383.8624,5,24.98085,121.54391,56.2 29 | 28,2013.250,10.4,276.449,5,24.95593,121.53913,33.6 30 | 29,2013.500,19.2,557.478,4,24.97419,121.53797,47 31 | 30,2013.083,7.1,451.2438,5,24.97563,121.54694,57.1 32 | 31,2013.500,25.9,4519.69,0,24.94826,121.49587,22.1 33 | 32,2012.750,29.6,769.4034,7,24.98281,121.53408,25 34 | 33,2012.750,37.9,488.5727,1,24.97349,121.53451,34.2 35 | 34,2013.250,16.5,323.655,6,24.97841,121.54281,49.3 36 | 35,2012.750,15.4,205.367,7,24.98419,121.54243,55.1 37 | 36,2013.500,13.9,4079.418,0,25.01459,121.51816,27.3 38 | 37,2012.917,14.7,1935.009,2,24.96386,121.51458,22.9 39 | 38,2013.167,12,1360.139,1,24.95204,121.54842,25.3 40 | 39,2012.667,3.1,577.9615,6,24.97201,121.54722,47.7 41 | 40,2013.167,16.2,289.3248,5,24.98203,121.54348,46.2 42 | 41,2013.000,13.6,4082.015,0,24.94155,121.50381,15.9 43 | 42,2013.500,16.8,4066.587,0,24.94297,121.50342,18.2 44 | 43,2013.417,36.1,519.4617,5,24.96305,121.53758,34.7 45 | 44,2012.750,34.4,512.7871,6,24.98748,121.54301,34.1 46 | 45,2013.583,2.7,533.4762,4,24.97445,121.54765,53.9 47 | 46,2013.083,36.6,488.8193,8,24.97015,121.54494,38.3 48 | 47,2013.417,21.7,463.9623,9,24.9703,121.54458,42 49 | 48,2013.583,35.9,640.7391,3,24.97563,121.53715,61.5 50 | 49,2013.417,24.2,4605.749,0,24.94684,121.49578,13.4 51 | 50,2012.667,29.4,4510.359,1,24.94925,121.49542,13.2 52 | 51,2013.417,21.7,512.5487,4,24.974,121.53842,44.2 53 | 52,2013.083,31.3,1758.406,1,24.95402,121.55282,20.7 54 | 53,2013.583,32.1,1438.579,3,24.97419,121.5175,27 55 | 54,2013.083,13.3,492.2313,5,24.96515,121.53737,38.9 56 | 55,2013.083,16.1,289.3248,5,24.98203,121.54348,51.7 57 | 56,2012.833,31.7,1160.632,0,24.94968,121.53009,13.7 58 | 57,2013.417,33.6,371.2495,8,24.97254,121.54059,41.9 59 | 58,2012.917,3.5,56.47425,7,24.95744,121.53711,53.5 60 | 59,2013.500,30.3,4510.359,1,24.94925,121.49542,22.6 61 | 60,2013.083,13.3,336.0532,5,24.95776,121.53438,42.4 62 | 61,2013.417,11,1931.207,2,24.96365,121.51471,21.3 63 | 62,2013.500,5.3,259.6607,6,24.97585,121.54516,63.2 64 | 63,2012.917,17.2,2175.877,3,24.96303,121.51254,27.7 65 | 64,2013.583,2.6,533.4762,4,24.97445,121.54765,55 66 | 65,2013.333,17.5,995.7554,0,24.96305,121.54915,25.3 67 | 66,2013.417,40.1,123.7429,8,24.97635,121.54329,44.3 68 | 67,2013.000,1,193.5845,6,24.96571,121.54089,50.7 69 | 68,2013.500,8.5,104.8101,5,24.96674,121.54067,56.8 70 | 69,2013.417,30.4,464.223,6,24.97964,121.53805,36.2 71 | 70,2012.833,12.5,561.9845,5,24.98746,121.54391,42 72 | 71,2013.583,6.6,90.45606,9,24.97433,121.5431,59 73 | 72,2013.083,35.5,640.7391,3,24.97563,121.53715,40.8 74 | 73,2013.583,32.5,424.5442,8,24.97587,121.53913,36.3 75 | 74,2013.167,13.8,4082.015,0,24.94155,121.50381,20 76 | 75,2012.917,6.8,379.5575,10,24.98343,121.53762,54.4 77 | 76,2013.500,12.3,1360.139,1,24.95204,121.54842,29.5 78 | 77,2013.583,35.9,616.4004,3,24.97723,121.53767,36.8 79 | 78,2012.833,20.5,2185.128,3,24.96322,121.51237,25.6 80 | 79,2012.917,38.2,552.4371,2,24.97598,121.53381,29.8 81 | 80,2013.000,18,1414.837,1,24.95182,121.54887,26.5 82 | 81,2013.500,11.8,533.4762,4,24.97445,121.54765,40.3 83 | 82,2013.000,30.8,377.7956,6,24.96427,121.53964,36.8 84 | 83,2013.083,13.2,150.9347,7,24.96725,121.54252,48.1 85 | 84,2012.917,25.3,2707.392,3,24.96056,121.50831,17.7 86 | 85,2013.083,15.1,383.2805,7,24.96735,121.54464,43.7 87 | 86,2012.750,0,338.9679,9,24.96853,121.54413,50.8 88 | 87,2012.833,1.8,1455.798,1,24.9512,121.549,27 89 | 88,2013.583,16.9,4066.587,0,24.94297,121.50342,18.3 90 | 89,2012.917,8.9,1406.43,0,24.98573,121.52758,48 91 | 90,2013.500,23,3947.945,0,24.94783,121.50243,25.3 92 | 91,2012.833,0,274.0144,1,24.9748,121.53059,45.4 93 | 92,2013.250,9.1,1402.016,0,24.98569,121.5276,43.2 94 | 93,2012.917,20.6,2469.645,4,24.96108,121.51046,21.8 95 | 94,2012.917,31.9,1146.329,0,24.9492,121.53076,16.1 96 | 95,2012.917,40.9,167.5989,5,24.9663,121.54026,41 97 | 96,2012.917,8,104.8101,5,24.96674,121.54067,51.8 98 | 97,2013.417,6.4,90.45606,9,24.97433,121.5431,59.5 99 | 98,2013.083,28.4,617.4424,3,24.97746,121.53299,34.6 100 | 99,2013.417,16.4,289.3248,5,24.98203,121.54348,51 101 | 100,2013.417,6.4,90.45606,9,24.97433,121.5431,62.2 102 | 101,2013.500,17.5,964.7496,4,24.98872,121.53411,38.2 103 | 102,2012.833,12.7,170.1289,1,24.97371,121.52984,32.9 104 | 103,2013.083,1.1,193.5845,6,24.96571,121.54089,54.4 105 | 104,2012.750,0,208.3905,6,24.95618,121.53844,45.7 106 | 105,2012.667,32.7,392.4459,6,24.96398,121.5425,30.5 107 | 106,2012.833,0,292.9978,6,24.97744,121.54458,71 108 | 107,2013.083,17.2,189.5181,8,24.97707,121.54308,47.1 109 | 108,2013.333,12.2,1360.139,1,24.95204,121.54842,26.6 110 | 109,2013.417,31.4,592.5006,2,24.9726,121.53561,34.1 111 | 110,2013.583,4,2147.376,3,24.96299,121.51284,28.4 112 | 111,2013.083,8.1,104.8101,5,24.96674,121.54067,51.6 113 | 112,2013.583,33.3,196.6172,7,24.97701,121.54224,39.4 114 | 113,2013.417,9.9,2102.427,3,24.96044,121.51462,23.1 115 | 114,2013.333,14.8,393.2606,6,24.96172,121.53812,7.6 116 | 115,2012.667,30.6,143.8383,8,24.98155,121.54142,53.3 117 | 116,2013.083,20.6,737.9161,2,24.98092,121.54739,46.4 118 | 117,2013.000,30.9,6396.283,1,24.94375,121.47883,12.2 119 | 118,2013.000,13.6,4197.349,0,24.93885,121.50383,13 120 | 119,2013.500,25.3,1583.722,3,24.96622,121.51709,30.6 121 | 120,2013.500,16.6,289.3248,5,24.98203,121.54348,59.6 122 | 121,2013.167,13.3,492.2313,5,24.96515,121.53737,31.3 123 | 122,2013.500,13.6,492.2313,5,24.96515,121.53737,48 124 | 123,2013.250,31.5,414.9476,4,24.98199,121.54464,32.5 125 | 124,2013.417,0,185.4296,0,24.9711,121.5317,45.5 126 | 125,2012.917,9.9,279.1726,7,24.97528,121.54541,57.4 127 | 126,2013.167,1.1,193.5845,6,24.96571,121.54089,48.6 128 | 127,2013.083,38.6,804.6897,4,24.97838,121.53477,62.9 129 | 128,2013.250,3.8,383.8624,5,24.98085,121.54391,55 130 | 129,2013.083,41.3,124.9912,6,24.96674,121.54039,60.7 131 | 130,2013.417,38.5,216.8329,7,24.98086,121.54162,41 132 | 131,2013.250,29.6,535.527,8,24.98092,121.53653,37.5 133 | 132,2013.500,4,2147.376,3,24.96299,121.51284,30.7 134 | 133,2013.167,26.6,482.7581,5,24.97433,121.53863,37.5 135 | 134,2012.833,18,373.3937,8,24.9866,121.54082,39.5 136 | 135,2012.667,33.4,186.9686,6,24.96604,121.54211,42.2 137 | 136,2012.917,18.9,1009.235,0,24.96357,121.54951,20.8 138 | 137,2012.750,11.4,390.5684,5,24.97937,121.54245,46.8 139 | 138,2013.500,13.6,319.0708,6,24.96495,121.54277,47.4 140 | 139,2013.167,10,942.4664,0,24.97843,121.52406,43.5 141 | 140,2012.667,12.9,492.2313,5,24.96515,121.53737,42.5 142 | 141,2013.250,16.2,289.3248,5,24.98203,121.54348,51.4 143 | 142,2013.333,5.1,1559.827,3,24.97213,121.51627,28.9 144 | 143,2013.417,19.8,640.6071,5,24.97017,121.54647,37.5 145 | 144,2013.500,13.6,492.2313,5,24.96515,121.53737,40.1 146 | 145,2013.083,11.9,1360.139,1,24.95204,121.54842,28.4 147 | 146,2012.917,2.1,451.2438,5,24.97563,121.54694,45.5 148 | 147,2012.750,0,185.4296,0,24.9711,121.5317,52.2 149 | 148,2012.750,3.2,489.8821,8,24.97017,121.54494,43.2 150 | 149,2013.500,16.4,3780.59,0,24.93293,121.51203,45.1 151 | 150,2012.667,34.9,179.4538,8,24.97349,121.54245,39.7 152 | 151,2013.250,35.8,170.7311,7,24.96719,121.54269,48.5 153 | 152,2013.500,4.9,387.7721,9,24.98118,121.53788,44.7 154 | 153,2013.333,12,1360.139,1,24.95204,121.54842,28.9 155 | 154,2013.250,6.5,376.1709,6,24.95418,121.53713,40.9 156 | 155,2013.500,16.9,4066.587,0,24.94297,121.50342,20.7 157 | 156,2013.167,13.8,4082.015,0,24.94155,121.50381,15.6 158 | 157,2013.583,30.7,1264.73,0,24.94883,121.52954,18.3 159 | 158,2013.250,16.1,815.9314,4,24.97886,121.53464,35.6 160 | 159,2013.000,11.6,390.5684,5,24.97937,121.54245,39.4 161 | 160,2012.667,15.5,815.9314,4,24.97886,121.53464,37.4 162 | 161,2012.917,3.5,49.66105,8,24.95836,121.53756,57.8 163 | 162,2013.417,19.2,616.4004,3,24.97723,121.53767,39.6 164 | 163,2012.750,16,4066.587,0,24.94297,121.50342,11.6 165 | 164,2013.500,8.5,104.8101,5,24.96674,121.54067,55.5 166 | 165,2012.833,0,185.4296,0,24.9711,121.5317,55.2 167 | 166,2012.917,13.7,1236.564,1,24.97694,121.55391,30.6 168 | 167,2013.417,0,292.9978,6,24.97744,121.54458,73.6 169 | 168,2013.417,28.2,330.0854,8,24.97408,121.54011,43.4 170 | 169,2013.083,27.6,515.1122,5,24.96299,121.5432,37.4 171 | 170,2013.417,8.4,1962.628,1,24.95468,121.55481,23.5 172 | 171,2013.333,24,4527.687,0,24.94741,121.49628,14.4 173 | 172,2013.083,3.6,383.8624,5,24.98085,121.54391,58.8 174 | 173,2013.583,6.6,90.45606,9,24.97433,121.5431,58.1 175 | 174,2013.083,41.3,401.8807,4,24.98326,121.5446,35.1 176 | 175,2013.417,4.3,432.0385,7,24.9805,121.53778,45.2 177 | 176,2013.083,30.2,472.1745,3,24.97005,121.53758,36.5 178 | 177,2012.833,13.9,4573.779,0,24.94867,121.49507,19.2 179 | 178,2013.083,33,181.0766,9,24.97697,121.54262,42 180 | 179,2013.500,13.1,1144.436,4,24.99176,121.53456,36.7 181 | 180,2013.083,14,438.8513,1,24.97493,121.5273,42.6 182 | 181,2012.667,26.9,4449.27,0,24.94898,121.49621,15.5 183 | 182,2013.167,11.6,201.8939,8,24.98489,121.54121,55.9 184 | 183,2013.500,13.5,2147.376,3,24.96299,121.51284,23.6 185 | 184,2013.500,17,4082.015,0,24.94155,121.50381,18.8 186 | 185,2012.750,14.1,2615.465,0,24.95495,121.56174,21.8 187 | 186,2012.750,31.4,1447.286,3,24.97285,121.5173,21.5 188 | 187,2013.167,20.9,2185.128,3,24.96322,121.51237,25.7 189 | 188,2013.000,8.9,3078.176,0,24.95464,121.56627,22 190 | 189,2012.917,34.8,190.0392,8,24.97707,121.54312,44.3 191 | 190,2012.917,16.3,4066.587,0,24.94297,121.50342,20.5 192 | 191,2013.500,35.3,616.5735,8,24.97945,121.53642,42.3 193 | 192,2013.167,13.2,750.0704,2,24.97371,121.54951,37.8 194 | 193,2013.167,43.8,57.58945,7,24.9675,121.54069,42.7 195 | 194,2013.417,9.7,421.479,5,24.98246,121.54477,49.3 196 | 195,2013.500,15.2,3771.895,0,24.93363,121.51158,29.3 197 | 196,2013.333,15.2,461.1016,5,24.95425,121.5399,34.6 198 | 197,2013.000,22.8,707.9067,2,24.981,121.54713,36.6 199 | 198,2013.250,34.4,126.7286,8,24.96881,121.54089,48.2 200 | 199,2013.083,34,157.6052,7,24.96628,121.54196,39.1 201 | 200,2013.417,18.2,451.6419,8,24.96945,121.5449,31.6 202 | 201,2013.417,17.4,995.7554,0,24.96305,121.54915,25.5 203 | 202,2013.417,13.1,561.9845,5,24.98746,121.54391,45.9 204 | 203,2012.917,38.3,642.6985,3,24.97559,121.53713,31.5 205 | 204,2012.667,15.6,289.3248,5,24.98203,121.54348,46.1 206 | 205,2013.000,18,1414.837,1,24.95182,121.54887,26.6 207 | 206,2013.083,12.8,1449.722,3,24.97289,121.51728,21.4 208 | 207,2013.250,22.2,379.5575,10,24.98343,121.53762,44 209 | 208,2013.083,38.5,665.0636,3,24.97503,121.53692,34.2 210 | 209,2012.750,11.5,1360.139,1,24.95204,121.54842,26.2 211 | 210,2012.833,34.8,175.6294,8,24.97347,121.54271,40.9 212 | 211,2013.500,5.2,390.5684,5,24.97937,121.54245,52.2 213 | 212,2013.083,0,274.0144,1,24.9748,121.53059,43.5 214 | 213,2013.333,17.6,1805.665,2,24.98672,121.52091,31.1 215 | 214,2013.083,6.2,90.45606,9,24.97433,121.5431,58 216 | 215,2013.583,18.1,1783.18,3,24.96731,121.51486,20.9 217 | 216,2013.333,19.2,383.7129,8,24.972,121.54477,48.1 218 | 217,2013.250,37.8,590.9292,1,24.97153,121.53559,39.7 219 | 218,2012.917,28,372.6242,6,24.97838,121.54119,40.8 220 | 219,2013.417,13.6,492.2313,5,24.96515,121.53737,43.8 221 | 220,2012.750,29.3,529.7771,8,24.98102,121.53655,40.2 222 | 221,2013.333,37.2,186.5101,9,24.97703,121.54265,78.3 223 | 222,2013.333,9,1402.016,0,24.98569,121.5276,38.5 224 | 223,2013.583,30.6,431.1114,10,24.98123,121.53743,48.5 225 | 224,2013.250,9.1,1402.016,0,24.98569,121.5276,42.3 226 | 225,2013.333,34.5,324.9419,6,24.97814,121.5417,46 227 | 226,2013.250,1.1,193.5845,6,24.96571,121.54089,49 228 | 227,2013.000,16.5,4082.015,0,24.94155,121.50381,12.8 229 | 228,2012.917,32.4,265.0609,8,24.98059,121.53986,40.2 230 | 229,2013.417,11.9,3171.329,0,25.00115,121.51776,46.6 231 | 230,2013.583,31,1156.412,0,24.9489,121.53095,19 232 | 231,2013.500,4,2147.376,3,24.96299,121.51284,33.4 233 | 232,2012.833,16.2,4074.736,0,24.94235,121.50357,14.7 234 | 233,2012.917,27.1,4412.765,1,24.95032,121.49587,17.4 235 | 234,2013.333,39.7,333.3679,9,24.98016,121.53932,32.4 236 | 235,2013.250,8,2216.612,4,24.96007,121.51361,23.9 237 | 236,2012.750,12.9,250.631,7,24.96606,121.54297,39.3 238 | 237,2013.167,3.6,373.8389,10,24.98322,121.53765,61.9 239 | 238,2013.167,13,732.8528,0,24.97668,121.52518,39 240 | 239,2013.083,12.8,732.8528,0,24.97668,121.52518,40.6 241 | 240,2013.500,18.1,837.7233,0,24.96334,121.54767,29.7 242 | 241,2013.083,11,1712.632,2,24.96412,121.5167,28.8 243 | 242,2013.500,13.7,250.631,7,24.96606,121.54297,41.4 244 | 243,2012.833,2,2077.39,3,24.96357,121.51329,33.4 245 | 244,2013.417,32.8,204.1705,8,24.98236,121.53923,48.2 246 | 245,2013.083,4.8,1559.827,3,24.97213,121.51627,21.7 247 | 246,2013.417,7.5,639.6198,5,24.97258,121.54814,40.8 248 | 247,2013.417,16.4,389.8219,6,24.96412,121.54273,40.6 249 | 248,2013.333,21.7,1055.067,0,24.96211,121.54928,23.1 250 | 249,2013.000,19,1009.235,0,24.96357,121.54951,22.3 251 | 250,2012.833,18,6306.153,1,24.95743,121.47516,15 252 | 251,2013.167,39.2,424.7132,7,24.97429,121.53917,30 253 | 252,2012.917,31.7,1159.454,0,24.9496,121.53018,13.8 254 | 253,2012.833,5.9,90.45606,9,24.97433,121.5431,52.7 255 | 254,2012.667,30.4,1735.595,2,24.96464,121.51623,25.9 256 | 255,2012.667,1.1,329.9747,5,24.98254,121.54395,51.8 257 | 256,2013.417,31.5,5512.038,1,24.95095,121.48458,17.4 258 | 257,2012.667,14.6,339.2289,1,24.97519,121.53151,26.5 259 | 258,2013.250,17.3,444.1334,1,24.97501,121.5273,43.9 260 | 259,2013.417,0,292.9978,6,24.97744,121.54458,63.3 261 | 260,2013.083,17.7,837.7233,0,24.96334,121.54767,28.8 262 | 261,2013.250,17,1485.097,4,24.97073,121.517,30.7 263 | 262,2013.167,16.2,2288.011,3,24.95885,121.51359,24.4 264 | 263,2012.917,15.9,289.3248,5,24.98203,121.54348,53 265 | 264,2013.417,3.9,2147.376,3,24.96299,121.51284,31.7 266 | 265,2013.167,32.6,493.657,7,24.96968,121.54522,40.6 267 | 266,2012.833,15.7,815.9314,4,24.97886,121.53464,38.1 268 | 267,2013.250,17.8,1783.18,3,24.96731,121.51486,23.7 269 | 268,2012.833,34.7,482.7581,5,24.97433,121.53863,41.1 270 | 269,2013.417,17.2,390.5684,5,24.97937,121.54245,40.1 271 | 270,2013.000,17.6,837.7233,0,24.96334,121.54767,23 272 | 271,2013.333,10.8,252.5822,1,24.9746,121.53046,117.5 273 | 272,2012.917,17.7,451.6419,8,24.96945,121.5449,26.5 274 | 273,2012.750,13,492.2313,5,24.96515,121.53737,40.5 275 | 274,2013.417,13.2,170.1289,1,24.97371,121.52984,29.3 276 | 275,2013.167,27.5,394.0173,7,24.97305,121.53994,41 277 | 276,2012.667,1.5,23.38284,7,24.96772,121.54102,49.7 278 | 277,2013.000,19.1,461.1016,5,24.95425,121.5399,34 279 | 278,2013.417,21.2,2185.128,3,24.96322,121.51237,27.7 280 | 279,2012.750,0,208.3905,6,24.95618,121.53844,44 281 | 280,2013.417,2.6,1554.25,3,24.97026,121.51642,31.1 282 | 281,2013.250,2.3,184.3302,6,24.96581,121.54086,45.4 283 | 282,2013.333,4.7,387.7721,9,24.98118,121.53788,44.8 284 | 283,2012.917,2,1455.798,1,24.9512,121.549,25.6 285 | 284,2013.417,33.5,1978.671,2,24.98674,121.51844,23.5 286 | 285,2012.917,15,383.2805,7,24.96735,121.54464,34.4 287 | 286,2013.167,30.1,718.2937,3,24.97509,121.53644,55.3 288 | 287,2012.917,5.9,90.45606,9,24.97433,121.5431,56.3 289 | 288,2013.000,19.2,461.1016,5,24.95425,121.5399,32.9 290 | 289,2013.583,16.6,323.6912,6,24.97841,121.5428,51 291 | 290,2013.333,13.9,289.3248,5,24.98203,121.54348,44.5 292 | 291,2013.083,37.7,490.3446,0,24.97217,121.53471,37 293 | 292,2012.833,3.4,56.47425,7,24.95744,121.53711,54.4 294 | 293,2013.083,17.5,395.6747,5,24.95674,121.534,24.5 295 | 294,2012.667,12.6,383.2805,7,24.96735,121.54464,42.5 296 | 295,2013.500,26.4,335.5273,6,24.9796,121.5414,38.1 297 | 296,2013.167,18.2,2179.59,3,24.96299,121.51252,21.8 298 | 297,2012.750,12.5,1144.436,4,24.99176,121.53456,34.1 299 | 298,2012.833,34.9,567.0349,4,24.97003,121.5458,28.5 300 | 299,2013.333,16.7,4082.015,0,24.94155,121.50381,16.7 301 | 300,2013.167,33.2,121.7262,10,24.98178,121.54059,46.1 302 | 301,2013.083,2.5,156.2442,4,24.96696,121.53992,36.9 303 | 302,2012.750,38,461.7848,0,24.97229,121.53445,35.7 304 | 303,2013.500,16.5,2288.011,3,24.95885,121.51359,23.2 305 | 304,2013.500,38.3,439.7105,0,24.97161,121.53423,38.4 306 | 305,2013.417,20,1626.083,3,24.96622,121.51668,29.4 307 | 306,2013.083,16.2,289.3248,5,24.98203,121.54348,55 308 | 307,2013.500,14.4,169.9803,1,24.97369,121.52979,50.2 309 | 308,2012.833,10.3,3079.89,0,24.9546,121.56627,24.7 310 | 309,2013.417,16.4,289.3248,5,24.98203,121.54348,53 311 | 310,2013.250,30.3,1264.73,0,24.94883,121.52954,19.1 312 | 311,2013.583,16.4,1643.499,2,24.95394,121.55174,24.7 313 | 312,2013.167,21.3,537.7971,4,24.97425,121.53814,42.2 314 | 313,2013.583,35.4,318.5292,9,24.97071,121.54069,78 315 | 314,2013.333,8.3,104.8101,5,24.96674,121.54067,42.8 316 | 315,2013.250,3.7,577.9615,6,24.97201,121.54722,41.6 317 | 316,2013.083,15.6,1756.411,2,24.9832,121.51812,27.3 318 | 317,2013.250,13.3,250.631,7,24.96606,121.54297,42 319 | 318,2012.750,15.6,752.7669,2,24.97795,121.53451,37.5 320 | 319,2013.333,7.1,379.5575,10,24.98343,121.53762,49.8 321 | 320,2013.250,34.6,272.6783,5,24.95562,121.53872,26.9 322 | 321,2012.750,13.5,4197.349,0,24.93885,121.50383,18.6 323 | 322,2012.917,16.9,964.7496,4,24.98872,121.53411,37.7 324 | 323,2013.000,12.9,187.4823,1,24.97388,121.52981,33.1 325 | 324,2013.417,28.6,197.1338,6,24.97631,121.54436,42.5 326 | 325,2012.667,12.4,1712.632,2,24.96412,121.5167,31.3 327 | 326,2013.083,36.6,488.8193,8,24.97015,121.54494,38.1 328 | 327,2013.500,4.1,56.47425,7,24.95744,121.53711,62.1 329 | 328,2013.417,3.5,757.3377,3,24.97538,121.54971,36.7 330 | 329,2012.833,15.9,1497.713,3,24.97003,121.51696,23.6 331 | 330,2013.000,13.6,4197.349,0,24.93885,121.50383,19.2 332 | 331,2013.083,32,1156.777,0,24.94935,121.53046,12.8 333 | 332,2013.333,25.6,4519.69,0,24.94826,121.49587,15.6 334 | 333,2013.167,39.8,617.7134,2,24.97577,121.53475,39.6 335 | 334,2012.750,7.8,104.8101,5,24.96674,121.54067,38.4 336 | 335,2012.917,30,1013.341,5,24.99006,121.5346,22.8 337 | 336,2013.583,27.3,337.6016,6,24.96431,121.54063,36.5 338 | 337,2012.833,5.1,1867.233,2,24.98407,121.51748,35.6 339 | 338,2012.833,31.3,600.8604,5,24.96871,121.54651,30.9 340 | 339,2012.917,31.5,258.186,9,24.96867,121.54331,36.3 341 | 340,2013.333,1.7,329.9747,5,24.98254,121.54395,50.4 342 | 341,2013.333,33.6,270.8895,0,24.97281,121.53265,42.9 343 | 342,2013.000,13,750.0704,2,24.97371,121.54951,37 344 | 343,2012.667,5.7,90.45606,9,24.97433,121.5431,53.5 345 | 344,2013.000,33.5,563.2854,8,24.98223,121.53597,46.6 346 | 345,2013.500,34.6,3085.17,0,24.998,121.5155,41.2 347 | 346,2012.667,0,185.4296,0,24.9711,121.5317,37.9 348 | 347,2013.417,13.2,1712.632,2,24.96412,121.5167,30.8 349 | 348,2013.583,17.4,6488.021,1,24.95719,121.47353,11.2 350 | 349,2012.833,4.6,259.6607,6,24.97585,121.54516,53.7 351 | 350,2012.750,7.8,104.8101,5,24.96674,121.54067,47 352 | 351,2013.000,13.2,492.2313,5,24.96515,121.53737,42.3 353 | 352,2012.833,4,2180.245,3,24.96324,121.51241,28.6 354 | 353,2012.833,18.4,2674.961,3,24.96143,121.50827,25.7 355 | 354,2013.500,4.1,2147.376,3,24.96299,121.51284,31.3 356 | 355,2013.417,12.2,1360.139,1,24.95204,121.54842,30.1 357 | 356,2013.250,3.8,383.8624,5,24.98085,121.54391,60.7 358 | 357,2012.833,10.3,211.4473,1,24.97417,121.52999,45.3 359 | 358,2013.417,0,338.9679,9,24.96853,121.54413,44.9 360 | 359,2013.167,1.1,193.5845,6,24.96571,121.54089,45.1 361 | 360,2013.500,5.6,2408.993,0,24.95505,121.55964,24.7 362 | 361,2012.667,32.9,87.30222,10,24.983,121.54022,47.1 363 | 362,2013.083,41.4,281.205,8,24.97345,121.54093,63.3 364 | 363,2013.417,17.1,967.4,4,24.98872,121.53408,40 365 | 364,2013.500,32.3,109.9455,10,24.98182,121.54086,48 366 | 365,2013.417,35.3,614.1394,7,24.97913,121.53666,33.1 367 | 366,2012.917,17.3,2261.432,4,24.96182,121.51222,29.5 368 | 367,2012.750,14.2,1801.544,1,24.95153,121.55254,24.8 369 | 368,2012.833,15,1828.319,2,24.96464,121.51531,20.9 370 | 369,2013.417,18.2,350.8515,1,24.97544,121.53119,43.1 371 | 370,2012.667,20.2,2185.128,3,24.96322,121.51237,22.8 372 | 371,2012.750,15.9,289.3248,5,24.98203,121.54348,42.1 373 | 372,2013.500,4.1,312.8963,5,24.95591,121.53956,51.7 374 | 373,2013.000,33.9,157.6052,7,24.96628,121.54196,41.5 375 | 374,2013.083,0,274.0144,1,24.9748,121.53059,52.2 376 | 375,2013.250,5.4,390.5684,5,24.97937,121.54245,49.5 377 | 376,2013.250,21.7,1157.988,0,24.96165,121.55011,23.8 378 | 377,2013.417,14.7,1717.193,2,24.96447,121.51649,30.5 379 | 378,2013.333,3.9,49.66105,8,24.95836,121.53756,56.8 380 | 379,2013.333,37.3,587.8877,8,24.97077,121.54634,37.4 381 | 380,2013.333,0,292.9978,6,24.97744,121.54458,69.7 382 | 381,2013.333,14.1,289.3248,5,24.98203,121.54348,53.3 383 | 382,2013.417,8,132.5469,9,24.98298,121.53981,47.3 384 | 383,2013.000,16.3,3529.564,0,24.93207,121.51597,29.3 385 | 384,2012.667,29.1,506.1144,4,24.97845,121.53889,40.3 386 | 385,2012.750,16.1,4066.587,0,24.94297,121.50342,12.9 387 | 386,2013.000,18.3,82.88643,10,24.983,121.54026,46.6 388 | 387,2012.833,0,185.4296,0,24.9711,121.5317,55.3 389 | 388,2013.250,16.2,2103.555,3,24.96042,121.51462,25.6 390 | 389,2013.500,10.4,2251.938,4,24.95957,121.51353,27.3 391 | 390,2013.250,40.9,122.3619,8,24.96756,121.5423,67.7 392 | 391,2013.500,32.8,377.8302,9,24.97151,121.5435,38.6 393 | 392,2013.583,6.2,1939.749,1,24.95155,121.55387,31.3 394 | 393,2013.083,42.7,443.802,6,24.97927,121.53874,35.3 395 | 394,2013.000,16.9,967.4,4,24.98872,121.53408,40.3 396 | 395,2013.500,32.6,4136.271,1,24.95544,121.4963,24.7 397 | 396,2012.917,21.2,512.5487,4,24.974,121.53842,42.5 398 | 397,2012.667,37.1,918.6357,1,24.97198,121.55063,31.9 399 | 398,2013.417,13.1,1164.838,4,24.99156,121.53406,32.2 400 | 399,2013.417,14.7,1717.193,2,24.96447,121.51649,23 401 | 400,2012.917,12.7,170.1289,1,24.97371,121.52984,37.3 402 | 401,2013.250,26.8,482.7581,5,24.97433,121.53863,35.5 403 | 402,2013.083,7.6,2175.03,3,24.96305,121.51254,27.7 404 | 403,2012.833,12.7,187.4823,1,24.97388,121.52981,28.5 405 | 404,2012.667,30.9,161.942,9,24.98353,121.53966,39.7 406 | 405,2013.333,16.4,289.3248,5,24.98203,121.54348,41.2 407 | 406,2012.667,23,130.9945,6,24.95663,121.53765,37.2 408 | 407,2013.167,1.9,372.1386,7,24.97293,121.54026,40.5 409 | 408,2013.000,5.2,2408.993,0,24.95505,121.55964,22.3 410 | 409,2013.417,18.5,2175.744,3,24.9633,121.51243,28.1 411 | 410,2013.000,13.7,4082.015,0,24.94155,121.50381,15.4 412 | 411,2012.667,5.6,90.45606,9,24.97433,121.5431,50 413 | 412,2013.250,18.8,390.9696,7,24.97923,121.53986,40.6 414 | 413,2013.000,8.1,104.8101,5,24.96674,121.54067,52.5 415 | 414,2013.500,6.5,90.45606,9,24.97433,121.5431,63.9 416 | -------------------------------------------------------------------------------- /week 2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Welcome to Week 2" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "This week we'll learn about Feature Engineering. This is the art that helps achieve great accuracy in Machine Learning. \n", 15 | "There are a number of things we need to keep in mind before we send in our data to formulas. \n", 16 | "\n", 17 | "No null data, and no strings presence in the dataset is must, but a number of other factors also affect our data. More than 40% of the time is consumed in this step by most professionals as well, so do refer blogs to learn more. \n", 18 | "Let's get started" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 1, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "import pandas as pd\n", 28 | "import numpy as np" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 2, 34 | "metadata": {}, 35 | "outputs": [ 36 | { 37 | "data": { 38 | "text/html": [ 39 | "
\n", 40 | "\n", 53 | "\n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | "
CityExperienceSalaryPromotion
0Delhi4.055000.0No
1Mumbai2.020000.0Yes
2Agra3.030000.0No
3Mumbai8.072000.0No
4Agra4.0NaNYes
\n", 101 | "
" 102 | ], 103 | "text/plain": [ 104 | " City Experience Salary Promotion\n", 105 | "0 Delhi 4.0 55000.0 No\n", 106 | "1 Mumbai 2.0 20000.0 Yes\n", 107 | "2 Agra 3.0 30000.0 No\n", 108 | "3 Mumbai 8.0 72000.0 No\n", 109 | "4 Agra 4.0 NaN Yes" 110 | ] 111 | }, 112 | "execution_count": 2, 113 | "metadata": {}, 114 | "output_type": "execute_result" 115 | } 116 | ], 117 | "source": [ 118 | "data=pd.read_csv(\"data/Data.csv\")\n", 119 | "data.head()" 120 | ] 121 | }, 122 | { 123 | "cell_type": "markdown", 124 | "metadata": {}, 125 | "source": [ 126 | "If you look at the dataset you'll see we have missing values and you know how to deal with it. \n", 127 | "There are also new methods which I'll guide you through this week." 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": 3, 133 | "metadata": {}, 134 | "outputs": [ 135 | { 136 | "data": { 137 | "text/plain": [ 138 | "City 1\n", 139 | "Experience 2\n", 140 | "Salary 1\n", 141 | "Promotion 0\n", 142 | "dtype: int64" 143 | ] 144 | }, 145 | "execution_count": 3, 146 | "metadata": {}, 147 | "output_type": "execute_result" 148 | } 149 | ], 150 | "source": [ 151 | "data.isna().sum()" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": 4, 157 | "metadata": {}, 158 | "outputs": [ 159 | { 160 | "data": { 161 | "text/html": [ 162 | "
\n", 163 | "\n", 176 | "\n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | "
CityExperienceSalaryPromotion
0Delhi4.055000.0No
1Mumbai2.020000.0Yes
2Agra3.030000.0No
3Mumbai8.072000.0No
5Delhi5.060000.0Yes
\n", 224 | "
" 225 | ], 226 | "text/plain": [ 227 | " City Experience Salary Promotion\n", 228 | "0 Delhi 4.0 55000.0 No\n", 229 | "1 Mumbai 2.0 20000.0 Yes\n", 230 | "2 Agra 3.0 30000.0 No\n", 231 | "3 Mumbai 8.0 72000.0 No\n", 232 | "5 Delhi 5.0 60000.0 Yes" 233 | ] 234 | }, 235 | "execution_count": 4, 236 | "metadata": {}, 237 | "output_type": "execute_result" 238 | } 239 | ], 240 | "source": [ 241 | "# method 1\n", 242 | "data1 = data.dropna(how='any',axis=0) \n", 243 | "data1.head()" 244 | ] 245 | }, 246 | { 247 | "cell_type": "code", 248 | "execution_count": 5, 249 | "metadata": {}, 250 | "outputs": [ 251 | { 252 | "name": "stdout", 253 | "output_type": "stream", 254 | "text": [ 255 | "x_most_frequent = [[55000.]\n", 256 | " [20000.]\n", 257 | " [30000.]\n", 258 | " [72000.]\n", 259 | " [52000.]\n", 260 | " [60000.]\n", 261 | " [52000.]\n", 262 | " [51000.]\n", 263 | " [59000.]\n", 264 | " [31000.]\n", 265 | " [58000.]\n", 266 | " [52000.]\n", 267 | " [79000.]\n", 268 | " [60000.]\n", 269 | " [67000.]]\n", 270 | "x_mean = [[55000. ]\n", 271 | " [20000. ]\n", 272 | " [30000. ]\n", 273 | " [72000. ]\n", 274 | " [53285.71428571]\n", 275 | " [60000. ]\n", 276 | " [52000. ]\n", 277 | " [51000. ]\n", 278 | " [59000. ]\n", 279 | " [31000. ]\n", 280 | " [58000. ]\n", 281 | " [52000. ]\n", 282 | " [79000. ]\n", 283 | " [60000. ]\n", 284 | " [67000. ]]\n", 285 | "x_median = [[55000.]\n", 286 | " [20000.]\n", 287 | " [30000.]\n", 288 | " [72000.]\n", 289 | " [56500.]\n", 290 | " [60000.]\n", 291 | " [52000.]\n", 292 | " [51000.]\n", 293 | " [59000.]\n", 294 | " [31000.]\n", 295 | " [58000.]\n", 296 | " [52000.]\n", 297 | " [79000.]\n", 298 | " [60000.]\n", 299 | " [67000.]]\n" 300 | ] 301 | } 302 | ], 303 | "source": [ 304 | "# method 2\n", 305 | "from sklearn.impute import SimpleImputer as Imputer\n", 306 | "x = data['Salary'].values.reshape(-1,1)\n", 307 | "\n", 308 | "x_most_frequent = Imputer(missing_values=np.nan, \n", 309 | " strategy = 'most_frequent').fit_transform(x)\n", 310 | "print(\"x_most_frequent = \",x_most_frequent)\n", 311 | "\n", 312 | "x_mean = Imputer(missing_values=np.nan, \n", 313 | " strategy = 'mean').fit_transform(x)\n", 314 | "print(\"x_mean = \",x_mean)\n", 315 | "\n", 316 | "x_median = Imputer(missing_values=np.nan, \n", 317 | " strategy = 'median').fit_transform(x)\n", 318 | "print(\"x_median = \",x_median)" 319 | ] 320 | }, 321 | { 322 | "cell_type": "markdown", 323 | "metadata": {}, 324 | "source": [ 325 | "Continuing the preprocessing, do keep in mind ML require mathematics, so we cannot have words \n", 326 | "To solve this we need to convert them to numbers. \n", 327 | "We can do it by giving them numbers like\n", 328 | "* Agra 0\n", 329 | "* Delhi 1\n", 330 | "* Mumbai 2" 331 | ] 332 | }, 333 | { 334 | "cell_type": "code", 335 | "execution_count": 6, 336 | "metadata": {}, 337 | "outputs": [], 338 | "source": [ 339 | "#converting data frame to values\n", 340 | "X = data1.iloc[:, :-1].values\n", 341 | "y = data1.iloc[:, 3].values" 342 | ] 343 | }, 344 | { 345 | "cell_type": "code", 346 | "execution_count": 7, 347 | "metadata": {}, 348 | "outputs": [ 349 | { 350 | "data": { 351 | "text/plain": [ 352 | "array([['Delhi', 4.0, 55000.0],\n", 353 | " ['Mumbai', 2.0, 20000.0],\n", 354 | " ['Agra', 3.0, 30000.0],\n", 355 | " ['Mumbai', 8.0, 72000.0],\n", 356 | " ['Delhi', 5.0, 60000.0],\n", 357 | " ['Delhi', 4.0, 51000.0],\n", 358 | " ['Agra', 5.0, 59000.0],\n", 359 | " ['Delhi', 3.0, 31000.0],\n", 360 | " ['Delhi', 8.0, 79000.0],\n", 361 | " ['Agra', 5.0, 60000.0],\n", 362 | " ['Delhi', 7.0, 67000.0]], dtype=object)" 363 | ] 364 | }, 365 | "execution_count": 7, 366 | "metadata": {}, 367 | "output_type": "execute_result" 368 | } 369 | ], 370 | "source": [ 371 | "X" 372 | ] 373 | }, 374 | { 375 | "cell_type": "code", 376 | "execution_count": 8, 377 | "metadata": {}, 378 | "outputs": [ 379 | { 380 | "data": { 381 | "text/plain": [ 382 | "array(['No', 'Yes', 'No', 'No', 'Yes', 'Yes', 'No', 'Yes', 'Yes', 'No',\n", 383 | " 'Yes'], dtype=object)" 384 | ] 385 | }, 386 | "execution_count": 8, 387 | "metadata": {}, 388 | "output_type": "execute_result" 389 | } 390 | ], 391 | "source": [ 392 | "y" 393 | ] 394 | }, 395 | { 396 | "cell_type": "code", 397 | "execution_count": 9, 398 | "metadata": {}, 399 | "outputs": [ 400 | { 401 | "data": { 402 | "text/plain": [ 403 | "array([[1, 4.0, 55000.0],\n", 404 | " [2, 2.0, 20000.0],\n", 405 | " [0, 3.0, 30000.0],\n", 406 | " [2, 8.0, 72000.0],\n", 407 | " [1, 5.0, 60000.0],\n", 408 | " [1, 4.0, 51000.0],\n", 409 | " [0, 5.0, 59000.0],\n", 410 | " [1, 3.0, 31000.0],\n", 411 | " [1, 8.0, 79000.0],\n", 412 | " [0, 5.0, 60000.0],\n", 413 | " [1, 7.0, 67000.0]], dtype=object)" 414 | ] 415 | }, 416 | "execution_count": 9, 417 | "metadata": {}, 418 | "output_type": "execute_result" 419 | } 420 | ], 421 | "source": [ 422 | "from sklearn.preprocessing import LabelEncoder\n", 423 | "labelencoder_X = LabelEncoder()\n", 424 | "\n", 425 | "X[:, 0] = labelencoder_X.fit_transform(X[:, 0])\n", 426 | "X" 427 | ] 428 | }, 429 | { 430 | "cell_type": "code", 431 | "execution_count": 10, 432 | "metadata": {}, 433 | "outputs": [ 434 | { 435 | "data": { 436 | "text/plain": [ 437 | "array(['Agra', 'Delhi', 'Mumbai'], dtype=object)" 438 | ] 439 | }, 440 | "execution_count": 10, 441 | "metadata": {}, 442 | "output_type": "execute_result" 443 | } 444 | ], 445 | "source": [ 446 | "labelencoder_X.classes_" 447 | ] 448 | }, 449 | { 450 | "cell_type": "code", 451 | "execution_count": 11, 452 | "metadata": {}, 453 | "outputs": [], 454 | "source": [ 455 | "labelencoder_y = LabelEncoder()\n", 456 | "y = labelencoder_y.fit_transform(y)" 457 | ] 458 | }, 459 | { 460 | "cell_type": "code", 461 | "execution_count": 12, 462 | "metadata": {}, 463 | "outputs": [ 464 | { 465 | "data": { 466 | "text/plain": [ 467 | "array([0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1])" 468 | ] 469 | }, 470 | "execution_count": 12, 471 | "metadata": {}, 472 | "output_type": "execute_result" 473 | } 474 | ], 475 | "source": [ 476 | "y" 477 | ] 478 | }, 479 | { 480 | "cell_type": "markdown", 481 | "metadata": {}, 482 | "source": [ 483 | "* no 0\n", 484 | "* yes 1" 485 | ] 486 | }, 487 | { 488 | "cell_type": "markdown", 489 | "metadata": {}, 490 | "source": [ 491 | "Over here we'll point out that in case of cities we shall not give weightage to countries. \n", 492 | "In a sense as the number of cities increase, cities with larger number will be given more priority my the ML formulas. \n", 493 | "\n", 494 | "Due to this Mumbai will get more importance than Agra. \n", 495 | "Think it over or google what will happen if instead of 3 we'll have 100 countries!!\n", 496 | "\n", 497 | "To over come this we'll have to judge a Column and apply one hot encoding." 498 | ] 499 | }, 500 | { 501 | "cell_type": "code", 502 | "execution_count": 13, 503 | "metadata": {}, 504 | "outputs": [ 505 | { 506 | "data": { 507 | "text/plain": [ 508 | "array([[0., 1., 0.],\n", 509 | " [0., 0., 1.],\n", 510 | " [1., 0., 0.],\n", 511 | " [0., 0., 1.],\n", 512 | " [0., 1., 0.],\n", 513 | " [0., 1., 0.],\n", 514 | " [1., 0., 0.],\n", 515 | " [0., 1., 0.],\n", 516 | " [0., 1., 0.],\n", 517 | " [1., 0., 0.],\n", 518 | " [0., 1., 0.]])" 519 | ] 520 | }, 521 | "execution_count": 13, 522 | "metadata": {}, 523 | "output_type": "execute_result" 524 | } 525 | ], 526 | "source": [ 527 | "from sklearn.preprocessing import OneHotEncoder\n", 528 | "\n", 529 | "onehotencoder = OneHotEncoder(categories='auto') \n", 530 | "p = onehotencoder.fit_transform(X[:,0:1]).toarray()\n", 531 | "p" 532 | ] 533 | }, 534 | { 535 | "cell_type": "code", 536 | "execution_count": 14, 537 | "metadata": {}, 538 | "outputs": [ 539 | { 540 | "data": { 541 | "text/html": [ 542 | "
\n", 543 | "\n", 556 | "\n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | "
AgraDelhiMumbai
0010
1001
2100
3001
5010
\n", 598 | "
" 599 | ], 600 | "text/plain": [ 601 | " Agra Delhi Mumbai\n", 602 | "0 0 1 0\n", 603 | "1 0 0 1\n", 604 | "2 1 0 0\n", 605 | "3 0 0 1\n", 606 | "5 0 1 0" 607 | ] 608 | }, 609 | "execution_count": 14, 610 | "metadata": {}, 611 | "output_type": "execute_result" 612 | } 613 | ], 614 | "source": [ 615 | "# This will seem more senseful to you. \n", 616 | "dff = pd.get_dummies(data1['City'])\n", 617 | "dff.head()" 618 | ] 619 | }, 620 | { 621 | "cell_type": "markdown", 622 | "metadata": {}, 623 | "source": [ 624 | "Although we converted are variables to the above format, still we'll face one issue. It's callled **Dummy Variable Trap**. We'll discuss it next week. " 625 | ] 626 | }, 627 | { 628 | "cell_type": "code", 629 | "execution_count": 15, 630 | "metadata": {}, 631 | "outputs": [ 632 | { 633 | "data": { 634 | "text/html": [ 635 | "
\n", 636 | "\n", 649 | "\n", 650 | " \n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | " \n", 655 | " \n", 656 | " \n", 657 | " \n", 658 | " \n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | " \n", 664 | " \n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | " \n", 669 | " \n", 670 | " \n", 671 | " \n", 672 | " \n", 673 | " \n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | " \n", 690 | " \n", 691 | " \n", 692 | " \n", 693 | " \n", 694 | " \n", 695 | " \n", 696 | " \n", 697 | " \n", 698 | " \n", 699 | " \n", 700 | " \n", 701 | " \n", 702 | " \n", 703 | " \n", 704 | " \n", 705 | " \n", 706 | " \n", 707 | " \n", 708 | " \n", 709 | " \n", 710 | " \n", 711 | " \n", 712 | " \n", 713 | " \n", 714 | " \n", 715 | " \n", 716 | " \n", 717 | " \n", 718 | " \n", 719 | " \n", 720 | " \n", 721 | " \n", 722 | " \n", 723 | " \n", 724 | " \n", 725 | " \n", 726 | " \n", 727 | " \n", 728 | " \n", 729 | " \n", 730 | " \n", 731 | " \n", 732 | " \n", 733 | " \n", 734 | " \n", 735 | " \n", 736 | " \n", 737 | " \n", 738 | " \n", 739 | " \n", 740 | " \n", 741 | " \n", 742 | " \n", 743 | " \n", 744 | " \n", 745 | " \n", 746 | " \n", 747 | " \n", 748 | " \n", 749 | " \n", 750 | "
AgraDelhiMumbaiExperienceSalary
00104.055000.0
10012.020000.0
21003.030000.0
30018.072000.0
50105.060000.0
70104.051000.0
81005.059000.0
90103.031000.0
120108.079000.0
131005.060000.0
140107.067000.0
\n", 751 | "
" 752 | ], 753 | "text/plain": [ 754 | " Agra Delhi Mumbai Experience Salary\n", 755 | "0 0 1 0 4.0 55000.0\n", 756 | "1 0 0 1 2.0 20000.0\n", 757 | "2 1 0 0 3.0 30000.0\n", 758 | "3 0 0 1 8.0 72000.0\n", 759 | "5 0 1 0 5.0 60000.0\n", 760 | "7 0 1 0 4.0 51000.0\n", 761 | "8 1 0 0 5.0 59000.0\n", 762 | "9 0 1 0 3.0 31000.0\n", 763 | "12 0 1 0 8.0 79000.0\n", 764 | "13 1 0 0 5.0 60000.0\n", 765 | "14 0 1 0 7.0 67000.0" 766 | ] 767 | }, 768 | "execution_count": 15, 769 | "metadata": {}, 770 | "output_type": "execute_result" 771 | } 772 | ], 773 | "source": [ 774 | "dff=pd.concat([dff, data1[\"Experience\"],data1[\"Salary\"]], axis=1)\n", 775 | "dff" 776 | ] 777 | }, 778 | { 779 | "cell_type": "markdown", 780 | "metadata": {}, 781 | "source": [ 782 | "# Normalisation\n", 783 | "In the data frame above we should scale down the salary and Experience because with respect to 1 and 0 it is too large and will neglect the relevance of City. There are many methods to achieve this." 784 | ] 785 | }, 786 | { 787 | "cell_type": "code", 788 | "execution_count": 16, 789 | "metadata": {}, 790 | "outputs": [ 791 | { 792 | "name": "stdout", 793 | "output_type": "stream", 794 | "text": [ 795 | "[[-0.61237244 0.91287093 -0.47140452 -0.47140452 0.10738071]\n", 796 | " [-0.61237244 -1.09544512 2.12132034 -1.50849447 -1.8612657 ]\n", 797 | " [ 1.63299316 -1.09544512 -0.47140452 -0.98994949 -1.29879529]\n", 798 | " [-0.61237244 -1.09544512 2.12132034 1.60277537 1.0635804 ]\n", 799 | " [-0.61237244 0.91287093 -0.47140452 0.04714045 0.38861591]\n", 800 | " [-0.61237244 0.91287093 -0.47140452 -0.47140452 -0.11760745]\n", 801 | " [ 1.63299316 -1.09544512 -0.47140452 0.04714045 0.33236887]\n", 802 | " [-0.61237244 0.91287093 -0.47140452 -0.98994949 -1.24254825]\n", 803 | " [-0.61237244 0.91287093 -0.47140452 1.60277537 1.45730968]\n", 804 | " [ 1.63299316 -1.09544512 -0.47140452 0.04714045 0.38861591]\n", 805 | " [-0.61237244 0.91287093 -0.47140452 1.0842304 0.7823452 ]]\n" 806 | ] 807 | } 808 | ], 809 | "source": [ 810 | "X = dff.iloc[:,:].values\n", 811 | "\n", 812 | "from sklearn.preprocessing import StandardScaler\n", 813 | "sc_X = StandardScaler()\n", 814 | "print(sc_X.fit_transform(X))" 815 | ] 816 | }, 817 | { 818 | "cell_type": "code", 819 | "execution_count": 17, 820 | "metadata": {}, 821 | "outputs": [ 822 | { 823 | "name": "stdout", 824 | "output_type": "stream", 825 | "text": [ 826 | "[[0. 1. 0. 0.5 0.69620253]\n", 827 | " [0. 0. 1. 0.25 0.25316456]\n", 828 | " [1. 0. 0. 0.375 0.37974684]\n", 829 | " [0. 0. 1. 1. 0.91139241]\n", 830 | " [0. 1. 0. 0.625 0.75949367]\n", 831 | " [0. 1. 0. 0.5 0.64556962]\n", 832 | " [1. 0. 0. 0.625 0.74683544]\n", 833 | " [0. 1. 0. 0.375 0.39240506]\n", 834 | " [0. 1. 0. 1. 1. ]\n", 835 | " [1. 0. 0. 0.625 0.75949367]\n", 836 | " [0. 1. 0. 0.875 0.84810127]]\n" 837 | ] 838 | } 839 | ], 840 | "source": [ 841 | "X = dff.iloc[:,:].values\n", 842 | "\n", 843 | "from sklearn.preprocessing import MaxAbsScaler\n", 844 | "m_X = MaxAbsScaler()\n", 845 | "print(m_X.fit_transform(X))" 846 | ] 847 | }, 848 | { 849 | "cell_type": "markdown", 850 | "metadata": {}, 851 | "source": [ 852 | "## Machine Learning\n", 853 | "- [Application](https://www.geeksforgeeks.org/machine-learning-introduction/)\n", 854 | "- [Types of ML models](https://www.geeksforgeeks.org/ml-types-learning-supervised-learning/)\n", 855 | "- [Difference between Supervised and Unsupervised Learning](https://www.geeksforgeeks.org/difference-between-supervised-and-unsupervised-learning/?ref=rp)\n", 856 | "- [Semi-supervised Learning](https://www.geeksforgeeks.org/ml-semi-supervised-learning/?ref=rp)" 857 | ] 858 | }, 859 | { 860 | "cell_type": "markdown", 861 | "metadata": {}, 862 | "source": [ 863 | "## Other Links to refer: \n", 864 | "- [Scikit-Learn](https://scikit-learn.org/stable/modules/classes.html#module-sklearn.preprocessing)\n", 865 | "- [Geek for Geeks](https://www.geeksforgeeks.org/data-preprocessing-machine-learning-python/)\n", 866 | "- [Medium](https://medium.com/search?q=preprocessing%20in%20machine%20learning)\n", 867 | "- [YouTube](https://www.youtube.com/results?search_query=preprocessing+in+machine+learning)\n", 868 | "- [Analytics Vidhya](https://www.analyticsvidhya.com/blog/2016/07/practical-guide-data-preprocessing-python-scikit-learn/)" 869 | ] 870 | } 871 | ], 872 | "metadata": { 873 | "kernelspec": { 874 | "display_name": "Python 3", 875 | "language": "python", 876 | "name": "python3" 877 | }, 878 | "language_info": { 879 | "codemirror_mode": { 880 | "name": "ipython", 881 | "version": 3 882 | }, 883 | "file_extension": ".py", 884 | "mimetype": "text/x-python", 885 | "name": "python", 886 | "nbconvert_exporter": "python", 887 | "pygments_lexer": "ipython3", 888 | "version": "3.8.2" 889 | } 890 | }, 891 | "nbformat": 4, 892 | "nbformat_minor": 2 893 | } 894 | -------------------------------------------------------------------------------- /week 3.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Welcome to Week 3\n", 8 | "## Linear Regression" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "metadata": {}, 14 | "source": [ 15 | "Simple linear regression is a basic ML model. \n", 16 | "You must be aware about the equation of line **y=mx+c**. \n", 17 | "This function tries to make best fit line for our dataset. \n", 18 | "Dataset is available [here](https://www.kaggle.com/andonians/random-linear-regression) \n", 19 | "We recommend you to go through [this](https://www.youtube.com/watch?v=GhrxgbQnEEU) or [this](https://www.youtube.com/watch?v=E5RjzSK0fvY) video to understand the intuition." 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 1, 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "import pandas as pd\n", 29 | "data=pd.read_csv('data/x-y.csv')\n", 30 | "data = data.dropna(how='any',axis=0)[:100]" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 2, 36 | "metadata": {}, 37 | "outputs": [ 38 | { 39 | "data": { 40 | "text/html": [ 41 | "
\n", 42 | "\n", 55 | "\n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | "
xy
024.021.549452
150.047.464463
215.017.218656
338.036.586398
487.087.288984
\n", 91 | "
" 92 | ], 93 | "text/plain": [ 94 | " x y\n", 95 | "0 24.0 21.549452\n", 96 | "1 50.0 47.464463\n", 97 | "2 15.0 17.218656\n", 98 | "3 38.0 36.586398\n", 99 | "4 87.0 87.288984" 100 | ] 101 | }, 102 | "execution_count": 2, 103 | "metadata": {}, 104 | "output_type": "execute_result" 105 | } 106 | ], 107 | "source": [ 108 | "data.head()" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": 3, 114 | "metadata": {}, 115 | "outputs": [ 116 | { 117 | "data": { 118 | "image/png": "\n", 119 | "text/plain": [ 120 | "
" 121 | ] 122 | }, 123 | "metadata": { 124 | "needs_background": "light" 125 | }, 126 | "output_type": "display_data" 127 | } 128 | ], 129 | "source": [ 130 | "import matplotlib.pyplot as plt\n", 131 | "\n", 132 | "plt.scatter(data[\"x\"],data[\"y\"])\n", 133 | "plt.xlabel(\"X\")\n", 134 | "plt.ylabel(\"Y\")\n", 135 | "plt.show()" 136 | ] 137 | }, 138 | { 139 | "cell_type": "markdown", 140 | "metadata": {}, 141 | "source": [ 142 | "As observed the dataset is linear. " 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": 4, 148 | "metadata": {}, 149 | "outputs": [], 150 | "source": [ 151 | "x=data['x'].values.reshape(-1,1)\n", 152 | "y=data['y'].values.reshape(-1,1)" 153 | ] 154 | }, 155 | { 156 | "cell_type": "markdown", 157 | "metadata": {}, 158 | "source": [ 159 | "### Spliting the Data\n", 160 | "It is necessary to have a dataset to train and one dataset to test. \n", 161 | "But these two must be of same origin to prevent error. \n", 162 | "For example if predicting presence of function from a webcam, you cannot expect to attain good results while testing for CCTV footage. \n", 163 | "\n", 164 | "So if test data is not given explicitly make your own by spilting the data for small data we can split the ratio of about **train***: 0.8 **test 0.2**\n", 165 | "\n", 166 | "For larger dataset we can split in the ratio of **train**: 0.01 **test 0.99** " 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": 5, 172 | "metadata": {}, 173 | "outputs": [], 174 | "source": [ 175 | "from sklearn.model_selection import train_test_split\n", 176 | "X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.2)" 177 | ] 178 | }, 179 | { 180 | "cell_type": "markdown", 181 | "metadata": {}, 182 | "source": [ 183 | "## [sklearn.linear_model.LinearRegression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html?highlight=linear%20regression)\n", 184 | "\n", 185 | "This class makes a best fit line for **Y = M*X + C** . \n", 186 | "Let's understand it's working.\n", 187 | "\n", 188 | "* It choses random variable m and c. \n", 189 | "* Calculate y prediction using these random variable. \n", 190 | "* Calculater error (root mean square).\n", 191 | "* Update variables to get least error using stats formula." 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": 6, 197 | "metadata": {}, 198 | "outputs": [ 199 | { 200 | "data": { 201 | "text/plain": [ 202 | "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)" 203 | ] 204 | }, 205 | "execution_count": 6, 206 | "metadata": {}, 207 | "output_type": "execute_result" 208 | } 209 | ], 210 | "source": [ 211 | "from sklearn.linear_model import LinearRegression\n", 212 | "r1=LinearRegression()\n", 213 | "# This is an object which stores value of slope and intercept\n", 214 | "r1.fit(X_train, y_train)" 215 | ] 216 | }, 217 | { 218 | "cell_type": "code", 219 | "execution_count": 7, 220 | "metadata": {}, 221 | "outputs": [ 222 | { 223 | "data": { 224 | "text/plain": [ 225 | "array([0.54038651])" 226 | ] 227 | }, 228 | "execution_count": 7, 229 | "metadata": {}, 230 | "output_type": "execute_result" 231 | } 232 | ], 233 | "source": [ 234 | "r1.intercept_" 235 | ] 236 | }, 237 | { 238 | "cell_type": "code", 239 | "execution_count": 8, 240 | "metadata": {}, 241 | "outputs": [ 242 | { 243 | "data": { 244 | "text/plain": [ 245 | "array([[0.98590331]])" 246 | ] 247 | }, 248 | "execution_count": 8, 249 | "metadata": {}, 250 | "output_type": "execute_result" 251 | } 252 | ], 253 | "source": [ 254 | "r1.coef_" 255 | ] 256 | }, 257 | { 258 | "cell_type": "code", 259 | "execution_count": 9, 260 | "metadata": {}, 261 | "outputs": [], 262 | "source": [ 263 | "y_pred = r1.predict(X_test)" 264 | ] 265 | }, 266 | { 267 | "cell_type": "code", 268 | "execution_count": 10, 269 | "metadata": {}, 270 | "outputs": [ 271 | { 272 | "data": { 273 | "image/png": "\n", 274 | "text/plain": [ 275 | "
" 276 | ] 277 | }, 278 | "metadata": { 279 | "needs_background": "light" 280 | }, 281 | "output_type": "display_data" 282 | } 283 | ], 284 | "source": [ 285 | "#visualising\n", 286 | "plt.scatter(x,y,color='red')\n", 287 | "plt.plot(X_train, r1.predict(X_train),color='blue')\n", 288 | "plt.ylabel('salary')\n", 289 | "plt.xlabel('experience')\n", 290 | "plt.show()" 291 | ] 292 | }, 293 | { 294 | "cell_type": "code", 295 | "execution_count": 11, 296 | "metadata": {}, 297 | "outputs": [ 298 | { 299 | "data": { 300 | "text/plain": [ 301 | "array([[99.13071774]])" 302 | ] 303 | }, 304 | "execution_count": 11, 305 | "metadata": {}, 306 | "output_type": "execute_result" 307 | } 308 | ], 309 | "source": [ 310 | "import numpy as np\n", 311 | "p=np.asarray([100]).reshape(-1,1)\n", 312 | "r1.predict(p)" 313 | ] 314 | }, 315 | { 316 | "cell_type": "markdown", 317 | "metadata": {}, 318 | "source": [ 319 | "## Multilinear Regression\n", 320 | "\n", 321 | "This model was very peculiar to two axis only. Let's add some more variables. \n", 322 | "Now our equation will be **y = b0 + b1x1 + b2x2 +b3x3 ...** \n", 323 | "Here Sklearn provides us flexibility, as we can again use the same class. Let's try it out. \n", 324 | "The dataset is available [here](https://www.kaggle.com/quantbruce/real-estate-price-prediction)." 325 | ] 326 | }, 327 | { 328 | "cell_type": "code", 329 | "execution_count": 12, 330 | "metadata": {}, 331 | "outputs": [ 332 | { 333 | "data": { 334 | "text/html": [ 335 | "
\n", 336 | "\n", 349 | "\n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | "
NoX1 transaction dateX2 house ageX3 distance to the nearest MRT stationX4 number of convenience storesX5 latitudeX6 longitudeY house price of unit area
012012.91732.084.878821024.98298121.5402437.9
122012.91719.5306.59470924.98034121.5395142.2
232013.58313.3561.98450524.98746121.5439147.3
342013.50013.3561.98450524.98746121.5439154.8
452012.8335.0390.56840524.97937121.5424543.1
\n", 421 | "
" 422 | ], 423 | "text/plain": [ 424 | " No X1 transaction date X2 house age \\\n", 425 | "0 1 2012.917 32.0 \n", 426 | "1 2 2012.917 19.5 \n", 427 | "2 3 2013.583 13.3 \n", 428 | "3 4 2013.500 13.3 \n", 429 | "4 5 2012.833 5.0 \n", 430 | "\n", 431 | " X3 distance to the nearest MRT station X4 number of convenience stores \\\n", 432 | "0 84.87882 10 \n", 433 | "1 306.59470 9 \n", 434 | "2 561.98450 5 \n", 435 | "3 561.98450 5 \n", 436 | "4 390.56840 5 \n", 437 | "\n", 438 | " X5 latitude X6 longitude Y house price of unit area \n", 439 | "0 24.98298 121.54024 37.9 \n", 440 | "1 24.98034 121.53951 42.2 \n", 441 | "2 24.98746 121.54391 47.3 \n", 442 | "3 24.98746 121.54391 54.8 \n", 443 | "4 24.97937 121.54245 43.1 " 444 | ] 445 | }, 446 | "execution_count": 12, 447 | "metadata": {}, 448 | "output_type": "execute_result" 449 | } 450 | ], 451 | "source": [ 452 | "dataset = pd.read_csv('data/Realestate.csv')\n", 453 | "dataset.head()" 454 | ] 455 | }, 456 | { 457 | "cell_type": "code", 458 | "execution_count": 13, 459 | "metadata": {}, 460 | "outputs": [], 461 | "source": [ 462 | "y = dataset[\"Y house price of unit area\"].values.reshape(-1,1)\n", 463 | "x = dataset.drop([\"No\",\"Y house price of unit area\",\"X1 transaction date\"],axis=1)" 464 | ] 465 | }, 466 | { 467 | "cell_type": "code", 468 | "execution_count": 14, 469 | "metadata": {}, 470 | "outputs": [], 471 | "source": [ 472 | "from sklearn.model_selection import train_test_split\n", 473 | "x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=0.2,random_state=0)" 474 | ] 475 | }, 476 | { 477 | "cell_type": "code", 478 | "execution_count": 15, 479 | "metadata": {}, 480 | "outputs": [], 481 | "source": [ 482 | "\"\"\"\n", 483 | "We need to apply scaling due to the values. Here we've apllied Standard Scaler but you shoul check out more. \n", 484 | "Also the same scalar function must be applied to both training and testing data.\n", 485 | "\"\"\"\n", 486 | "from sklearn.preprocessing import StandardScaler\n", 487 | "sc = StandardScaler()\n", 488 | "x_train = sc.fit_transform(x_train)\n", 489 | "x_test = sc.transform(x_test)" 490 | ] 491 | }, 492 | { 493 | "cell_type": "code", 494 | "execution_count": 16, 495 | "metadata": {}, 496 | "outputs": [], 497 | "source": [ 498 | "from sklearn.linear_model import LinearRegression\n", 499 | "l1=LinearRegression()\n", 500 | "l1.fit(x_train,y_train)\n", 501 | "y_pred=l1.predict(x_test)" 502 | ] 503 | }, 504 | { 505 | "cell_type": "code", 506 | "execution_count": 17, 507 | "metadata": {}, 508 | "outputs": [ 509 | { 510 | "data": { 511 | "text/plain": [ 512 | "array([[41.39074736],\n", 513 | " [12.35535341],\n", 514 | " [41.10500538],\n", 515 | " [12.16895621],\n", 516 | " [40.36129482]])" 517 | ] 518 | }, 519 | "execution_count": 17, 520 | "metadata": {}, 521 | "output_type": "execute_result" 522 | } 523 | ], 524 | "source": [ 525 | "y_pred[:5]" 526 | ] 527 | }, 528 | { 529 | "cell_type": "code", 530 | "execution_count": 18, 531 | "metadata": {}, 532 | "outputs": [ 533 | { 534 | "data": { 535 | "text/plain": [ 536 | "array([[45.3],\n", 537 | " [14.4],\n", 538 | " [46. ],\n", 539 | " [15.6],\n", 540 | " [50.2]])" 541 | ] 542 | }, 543 | "execution_count": 18, 544 | "metadata": {}, 545 | "output_type": "execute_result" 546 | } 547 | ], 548 | "source": [ 549 | "y_test[:5]" 550 | ] 551 | }, 552 | { 553 | "cell_type": "markdown", 554 | "metadata": {}, 555 | "source": [ 556 | "the values are pretty close. Let's find our error. \n", 557 | "We've multiple mathods for determining error, and you should check [this](https://www.dataquest.io/blog/understanding-regression-error-metrics/) to learn about tyoes of regression errors and when to use them.\n", 558 | "\n", 559 | "We'll be using mean squared error from [sklearn.metrics](https://scikit-learn.org/stable/modules/classes.html#module-sklearn.metrics)" 560 | ] 561 | }, 562 | { 563 | "cell_type": "code", 564 | "execution_count": 19, 565 | "metadata": {}, 566 | "outputs": [ 567 | { 568 | "data": { 569 | "text/plain": [ 570 | "62.172235622414036" 571 | ] 572 | }, 573 | "execution_count": 19, 574 | "metadata": {}, 575 | "output_type": "execute_result" 576 | } 577 | ], 578 | "source": [ 579 | "from sklearn.metrics import mean_squared_error \n", 580 | "mean_squared_error(y_test,y_pred)" 581 | ] 582 | }, 583 | { 584 | "cell_type": "markdown", 585 | "metadata": {}, 586 | "source": [ 587 | "## Dummy variable tray \n", 588 | "Last week we learned about One Hot Encoding our data, to give equal weighatge to different classes, but we face a problem here!!\n", 589 | "\n", 590 | "Lets consider **y = b0 + b1x1 + b2x2 +b3x3** \n", 591 | "\n", 592 | "Where x2 and x3 are dummy variables, i.e. if x2 = 1 ,x3 = 0 \n", 593 | "So x3 = 1-x2 \n", 594 | "Applying in our equation \n", 595 | "\n", 596 | "y = b0 + b1x1 + b2x2 +b3(1-x2) \n", 597 | "or\n", 598 | "y = b0 + b3 + b1x1 + x2 (b2-b3)\n", 599 | "\n", 600 | "So variable b3 is being added to constant tot create a new constant and subtracting from b2 affects our coeffecient. Hence, the equation of line changes attributing to an increase in loss. \n", 601 | "\n", 602 | "This is dummy variable trap, and only way to setlle is to remove one variable, while creating it\n", 603 | "\n", 604 | "Check out more [here](https://www.youtube.com/watch?v=qrWx3OjZL3o)" 605 | ] 606 | }, 607 | { 608 | "cell_type": "markdown", 609 | "metadata": {}, 610 | "source": [ 611 | "## Backword elimination\n", 612 | "This is a method of removing columns with small or not effect on our result. \n", 613 | "This makes our model faster and also in some cases Robust. \n", 614 | "Refer [this](https://medium.com/@manjabogicevic/multiple-linear-regression-using-python-b99754591ac0) to learn more. " 615 | ] 616 | }, 617 | { 618 | "cell_type": "markdown", 619 | "metadata": {}, 620 | "source": [ 621 | "# Polynomial regression\n", 622 | "\n", 623 | "Not all variables are linearly dependent on target variables.\n", 624 | "Let's consider a dataset of your position in a company and your Salary.\n", 625 | "A sample dataset is present [here](https://www.kaggle.com/testpython/polynomial-position-salary-data)\n", 626 | "\n", 627 | "Equation of a degree 2 polynomial equation looks like this \n", 628 | "**y = ax^2 + bx + c** \n", 629 | "\n", 630 | "A Degree 3 euation is like this \n", 631 | "**y = ax^3 + bx^2 + cx + d**" 632 | ] 633 | }, 634 | { 635 | "cell_type": "code", 636 | "execution_count": 20, 637 | "metadata": {}, 638 | "outputs": [ 639 | { 640 | "data": { 641 | "text/html": [ 642 | "
\n", 643 | "\n", 656 | "\n", 657 | " \n", 658 | " \n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | " \n", 664 | " \n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | " \n", 669 | " \n", 670 | " \n", 671 | " \n", 672 | " \n", 673 | " \n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | " \n", 690 | " \n", 691 | " \n", 692 | " \n", 693 | " \n", 694 | " \n", 695 | " \n", 696 | " \n", 697 | "
PositionLevelSalary
0Business Analyst145000
1Junior Consultant250000
2Senior Consultant360000
3Manager480000
4Country Manager5110000
\n", 698 | "
" 699 | ], 700 | "text/plain": [ 701 | " Position Level Salary\n", 702 | "0 Business Analyst 1 45000\n", 703 | "1 Junior Consultant 2 50000\n", 704 | "2 Senior Consultant 3 60000\n", 705 | "3 Manager 4 80000\n", 706 | "4 Country Manager 5 110000" 707 | ] 708 | }, 709 | "execution_count": 20, 710 | "metadata": {}, 711 | "output_type": "execute_result" 712 | } 713 | ], 714 | "source": [ 715 | "data = pd.read_csv('data/Position_Salaries.csv')\n", 716 | "data.head()" 717 | ] 718 | }, 719 | { 720 | "cell_type": "code", 721 | "execution_count": 21, 722 | "metadata": {}, 723 | "outputs": [], 724 | "source": [ 725 | "x=data.iloc[:,1:2].values\n", 726 | "y=data.iloc[:,-1].values" 727 | ] 728 | }, 729 | { 730 | "cell_type": "markdown", 731 | "metadata": {}, 732 | "source": [ 733 | "There's no Polynomail regression method in Scikit Learn, but we can convert our dataset to polynomial features and them implement Linear Regression. \n", 734 | "Check out more at **[Polynomial Features](https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.PolynomialFeatures.html)**" 735 | ] 736 | }, 737 | { 738 | "cell_type": "code", 739 | "execution_count": 22, 740 | "metadata": {}, 741 | "outputs": [ 742 | { 743 | "data": { 744 | "text/plain": [ 745 | "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)" 746 | ] 747 | }, 748 | "execution_count": 22, 749 | "metadata": {}, 750 | "output_type": "execute_result" 751 | } 752 | ], 753 | "source": [ 754 | "from sklearn.linear_model import LinearRegression\n", 755 | "from sklearn.preprocessing import PolynomialFeatures\n", 756 | "\n", 757 | "p1= PolynomialFeatures(degree=2) # lets start with degree 2\n", 758 | "x_poly=p1.fit_transform(x)\n", 759 | "l2=LinearRegression()\n", 760 | "l2.fit(x_poly,y)" 761 | ] 762 | }, 763 | { 764 | "cell_type": "code", 765 | "execution_count": 23, 766 | "metadata": {}, 767 | "outputs": [ 768 | { 769 | "data": { 770 | "image/png": "\n", 771 | "text/plain": [ 772 | "
" 773 | ] 774 | }, 775 | "metadata": { 776 | "needs_background": "light" 777 | }, 778 | "output_type": "display_data" 779 | } 780 | ], 781 | "source": [ 782 | "plt.scatter(x, y, color = 'red')\n", 783 | "plt.plot(x, l2.predict(p1.fit_transform(x)), color = 'blue')\n", 784 | "plt.title('Truth or Bluff (Regression Model)')\n", 785 | "plt.xlabel('Position level')\n", 786 | "plt.ylabel('Salary')\n", 787 | "plt.show()" 788 | ] 789 | }, 790 | { 791 | "cell_type": "code", 792 | "execution_count": 24, 793 | "metadata": {}, 794 | "outputs": [ 795 | { 796 | "data": { 797 | "image/png": "\n", 798 | "text/plain": [ 799 | "
" 800 | ] 801 | }, 802 | "metadata": { 803 | "needs_background": "light" 804 | }, 805 | "output_type": "display_data" 806 | } 807 | ], 808 | "source": [ 809 | "p1= PolynomialFeatures(degree=5) # lets start with degree 5\n", 810 | "x_poly=p1.fit_transform(x)\n", 811 | "l2=LinearRegression()\n", 812 | "l2.fit(x_poly,y)\n", 813 | "\n", 814 | "plt.scatter(x, y, color = 'red')\n", 815 | "plt.plot(x, l2.predict(p1.fit_transform(x)), color = 'blue')\n", 816 | "plt.title('Truth or Bluff (Regression Model)')\n", 817 | "plt.xlabel('Position level')\n", 818 | "plt.ylabel('Salary')\n", 819 | "plt.show()" 820 | ] 821 | }, 822 | { 823 | "cell_type": "markdown", 824 | "metadata": {}, 825 | "source": [ 826 | "That's all for this week. \n", 827 | "Do refer the link and practice out on Kaggle. \n", 828 | "You can also refer here for more regression models. \n", 829 | "- [Super Data Science](https://www.superdatascience.com/pages/machine-learning)\n", 830 | "- [Scikit-Learn](https://scikit-learn.org/stable/index.html)\n", 831 | "- [YouTube](https://www.youtube.com/watch?v=E5RjzSK0fvY)" 832 | ] 833 | } 834 | ], 835 | "metadata": { 836 | "kernelspec": { 837 | "display_name": "Python 3", 838 | "language": "python", 839 | "name": "python3" 840 | }, 841 | "language_info": { 842 | "codemirror_mode": { 843 | "name": "ipython", 844 | "version": 3 845 | }, 846 | "file_extension": ".py", 847 | "mimetype": "text/x-python", 848 | "name": "python", 849 | "nbconvert_exporter": "python", 850 | "pygments_lexer": "ipython3", 851 | "version": "3.8.2" 852 | } 853 | }, 854 | "nbformat": 4, 855 | "nbformat_minor": 2 856 | } 857 | --------------------------------------------------------------------------------