├── Fundamentals of PCA_I ├── Change+of+Basis.zip └── init ├── PCA in Python ├── Graded_Questions_PCA.ipynb ├── Logistic+Regression+and+PCA.zip ├── PCA+Demo+I.zip └── init ├── PCA-Additional Resource ├── HousingCaseStudy_Using_PCA.ipynb └── init └── README.md /Fundamentals of PCA_I/Change+of+Basis.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContentUpgrad/Unsupervised-Learning-Principal-Component-Analysis/cae9f26b2a2d792c441bcd980a968c0c77dd479c/Fundamentals of PCA_I/Change+of+Basis.zip -------------------------------------------------------------------------------- /Fundamentals of PCA_I/init: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /PCA in Python/Graded_Questions_PCA.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "### Importing and Understanding Data" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import pandas as pd\n", 17 | "import numpy as np" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": null, 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "# Importing Housing.csv\n", 27 | "housing = pd.read_csv('newhousing.csv')" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": null, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "# Looking at the first five rows\n", 37 | "housing.head()" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": null, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "housing.shape" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "# What type of values are stored in the columns?\n", 56 | "housing.info()" 57 | ] 58 | }, 59 | { 60 | "cell_type": "markdown", 61 | "metadata": {}, 62 | "source": [ 63 | "## Splitting Data into Training and Testing Sets" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": null, 69 | "metadata": {}, 70 | "outputs": [], 71 | "source": [ 72 | "housing.columns" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": null, 78 | "metadata": {}, 79 | "outputs": [], 80 | "source": [ 81 | "# Putting feature variable to X\n", 82 | "X = housing[['area', 'bedrooms', 'bathrooms', 'stories', 'mainroad',\n", 83 | " 'guestroom', 'basement', 'hotwaterheating', 'airconditioning',\n", 84 | " 'parking', 'prefarea', 'semi-furnished', 'unfurnished',\n", 85 | " 'areaperbedroom', 'bbratio']]\n", 86 | "\n", 87 | "# Putting response variable to y\n", 88 | "y = housing['price']" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": null, 94 | "metadata": {}, 95 | "outputs": [], 96 | "source": [ 97 | "# Importing matplotlib and seaborn\n", 98 | "import matplotlib.pyplot as plt\n", 99 | "import seaborn as sns\n", 100 | "%matplotlib inline\n", 101 | "# Let's see the correlation matrix \n", 102 | "plt.figure(figsize = (16,10)) # Size of the figure\n", 103 | "sns.heatmap(X.corr(),annot = True)" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": null, 109 | "metadata": {}, 110 | "outputs": [], 111 | "source": [ 112 | "#creating correlation matrix for the given data\n", 113 | "corrmat = np.corrcoef(X.transpose())" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": null, 119 | "metadata": {}, 120 | "outputs": [], 121 | "source": [ 122 | "#Make a diagonal matrix with diagonal entry of Matrix corrmat\n", 123 | "p=np.diagflat(corrmat.diagonal())" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": null, 129 | "metadata": {}, 130 | "outputs": [], 131 | "source": [ 132 | "# subtract diagonal entries making all diagonals 0\n", 133 | "corrmat_diag_zero = corrmat - p\n", 134 | "print(\"max corr:\",corrmat_diag_zero.max(), \", min corr: \", corrmat_diag_zero.min(),)\n" 135 | ] 136 | }, 137 | { 138 | "cell_type": "markdown", 139 | "metadata": {}, 140 | "source": [ 141 | "### Question No. 1 and 2" 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": null, 147 | "metadata": {}, 148 | "outputs": [], 149 | "source": [ 150 | "# Retrieve the (i,j) index for which matrix has maximum value\n", 151 | "\n", 152 | "ij_max = np.unravel_index(\n", 153 | "# ## add code to solve for Question 1 and Question 2\n", 154 | ")\n", 155 | "print(\"ij_max is\",ij_max)\n", 156 | "print(\"Maximum correlation :\",corrmat_diag_zero[ij_max])" 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": null, 162 | "metadata": {}, 163 | "outputs": [], 164 | "source": [] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": null, 169 | "metadata": {}, 170 | "outputs": [], 171 | "source": [ 172 | "import matplotlib.pyplot as plt\n", 173 | "from sklearn import linear_model\n", 174 | "from sklearn.metrics import mean_squared_error, r2_score\n", 175 | "from sklearn.metrics import explained_variance_score\n", 176 | "#Importing the PCA module\n", 177 | "from sklearn.decomposition import PCA" 178 | ] 179 | }, 180 | { 181 | "cell_type": "code", 182 | "execution_count": null, 183 | "metadata": {}, 184 | "outputs": [], 185 | "source": [] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": null, 190 | "metadata": {}, 191 | "outputs": [], 192 | "source": [] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": null, 197 | "metadata": {}, 198 | "outputs": [], 199 | "source": [] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": null, 204 | "metadata": {}, 205 | "outputs": [], 206 | "source": [ 207 | "#random_state is the seed used by the random number generator, it can be any integer.\n", 208 | "from sklearn.model_selection import train_test_split\n", 209 | "X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7 ,test_size = 0.3, random_state=100)\n", 210 | "\n", 211 | "from sklearn import preprocessing\n", 212 | "X_scaler = preprocessing.StandardScaler().fit(X_train)\n", 213 | "y_scaler = preprocessing.StandardScaler().fit(y_train.values.reshape(-1,1))\n", 214 | "Xtrain=X_scaler.transform(X_train) \n", 215 | "ytrain=y_scaler.transform(y_train.values.reshape(-1,1)) \n", 216 | "\n", 217 | "Xtest=X_scaler.transform(X_test) \n", 218 | "ytest=y_scaler.transform(y_test.values.reshape(-1,1))" 219 | ] 220 | }, 221 | { 222 | "cell_type": "markdown", 223 | "metadata": {}, 224 | "source": [ 225 | "### Question No. 4" 226 | ] 227 | }, 228 | { 229 | "cell_type": "code", 230 | "execution_count": null, 231 | "metadata": {}, 232 | "outputs": [], 233 | "source": [ 234 | "pca = PCA(n_components=6,random_state=100)\n", 235 | "\n", 236 | "#Scale and transform data to get Principal Components\n", 237 | "\n", 238 | "## add code for question no. 4\n", 239 | "# fit_transform and transform to get the reduced data\n", 240 | "# Xtrain_reduced = \n", 241 | "# Xtest_reduced = \n", 242 | "\n", 243 | "\n", 244 | "regrpca = linear_model.LinearRegression()\n", 245 | "# Train the model using the principal components of the transformed training sets\n", 246 | "regrpca.fit(Xtrain_reduced, ytrain)\n", 247 | "# Make predictions using the principal components of the transformed testing set\n", 248 | "y_pred = regrpca.predict(Xtest_reduced)\n", 249 | "print(\"Mean squared error: %.2f\" % mean_squared_error(ytest, y_pred))\n", 250 | "# Explained variance score: 1 is perfect prediction\n", 251 | "print('R2 score: %.2f' % r2_score(ytest, y_pred))\n" 252 | ] 253 | } 254 | ], 255 | "metadata": { 256 | "kernelspec": { 257 | "display_name": "Python 3", 258 | "language": "python", 259 | "name": "python3" 260 | }, 261 | "language_info": { 262 | "codemirror_mode": { 263 | "name": "ipython", 264 | "version": 3 265 | }, 266 | "file_extension": ".py", 267 | "mimetype": "text/x-python", 268 | "name": "python", 269 | "nbconvert_exporter": "python", 270 | "pygments_lexer": "ipython3", 271 | "version": "3.6.3" 272 | } 273 | }, 274 | "nbformat": 4, 275 | "nbformat_minor": 2 276 | } 277 | -------------------------------------------------------------------------------- /PCA in Python/Logistic+Regression+and+PCA.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContentUpgrad/Unsupervised-Learning-Principal-Component-Analysis/cae9f26b2a2d792c441bcd980a968c0c77dd479c/PCA in Python/Logistic+Regression+and+PCA.zip -------------------------------------------------------------------------------- /PCA in Python/PCA+Demo+I.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ContentUpgrad/Unsupervised-Learning-Principal-Component-Analysis/cae9f26b2a2d792c441bcd980a968c0c77dd479c/PCA in Python/PCA+Demo+I.zip -------------------------------------------------------------------------------- /PCA in Python/init: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /PCA-Additional Resource/init: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Unsupervised-Learning-Principal-Component-Analysis --------------------------------------------------------------------------------