├── CODE_OF_CONDUCT.md ├── Complete_Data_.csv ├── Indian_pines_knnc.py ├── Indian_pines_knnc_after_pca.py ├── LICENSE.md ├── PaviaUniversity_HSI ├── Pavia_University.ipynb └── README.md ├── README.md ├── assets ├── Indian_pines_accuracy_after_pca.JPG ├── Indian_pines_accuracy_before_pca.JPG ├── Indian_pines_classification_before_pca.JPG ├── Indian_pines_varianve_ratio.JPG └── indian_pines_after_pca_with_2PC.JPG ├── indian_pines_after_pca.csv ├── indian_pines_after_pca.dat └── indian_pines_pca.py /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. 6 | 7 | ## Our Standards 8 | 9 | Examples of behavior that contributes to creating a positive environment include: 10 | 11 | * Using welcoming and inclusive language 12 | * Being respectful of differing viewpoints and experiences 13 | * Gracefully accepting constructive criticism 14 | * Focusing on what is best for the community 15 | * Showing empathy towards other community members 16 | 17 | Examples of unacceptable behavior by participants include: 18 | 19 | * The use of sexualized language or imagery and unwelcome sexual attention or advances 20 | * Trolling, insulting/derogatory comments, and personal or political attacks 21 | * Public or private harassment 22 | * Publishing others' private information, such as a physical or electronic address, without explicit permission 23 | * Other conduct which could reasonably be considered inappropriate in a professional setting 24 | 25 | ## Our Responsibilities 26 | 27 | Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. 28 | 29 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. 30 | 31 | ## Scope 32 | 33 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. 34 | 35 | ## Enforcement 36 | 37 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at syamkakarla1126@gmail.com. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. 38 | 39 | Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. 40 | 41 | ## Attribution 42 | 43 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version] 44 | 45 | [homepage]: http://contributor-covenant.org 46 | [version]: http://contributor-covenant.org/version/1/4/ 47 | -------------------------------------------------------------------------------- /Indian_pines_knnc.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import pandas as pd 4 | 5 | 6 | from sklearn import decomposition 7 | from sklearn import datasets 8 | 9 | # load dataset into Pandas DataFrame 10 | df = pd.read_csv("D:\Python_programs\ML\Complete_Data_.csv") 11 | 12 | from sklearn.preprocessing import StandardScaler 13 | n=[] 14 | ind=[] 15 | for i in range(200): 16 | n.append(i+1) 17 | for i in range(200): 18 | ind.append('px'+str(n[i])) 19 | 20 | features = ind 21 | X = df.loc[:, features].values 22 | # Separating out the target 23 | Y = df.loc[:,['target']].values 24 | from sklearn.model_selection import train_test_split 25 | 26 | X_train, X_test, y_train, y_test = train_test_split( 27 | X, Y, test_size = 0.3, random_state = 100) 28 | y_train=y_train.ravel() 29 | y_test=y_test.ravel() 30 | #classifier.fit(X_train, y_train.squeeze()) 31 | 32 | from sklearn.neighbors import KNeighborsClassifier # FOR K=13 ,IT HAS ACCURACY AROUND 72.7488902980 33 | from sklearn import metrics 34 | import time 35 | #model = KNeighborsClassifier() 36 | model=KNeighborsClassifier(n_neighbors =13, weights='uniform', algorithm='auto') 37 | model.fit(X_train, y_train) 38 | start = time.time() 39 | Yhat = model.predict(X_test) 40 | end = time.time() 41 | print('Time Taken For Classification is :',(end - start)) 42 | print("Accuracy :",metrics.accuracy_score(Yhat, y_test)*100) 43 | print('\n','*'*11,'Accuracy of INDIAN-PINES Dataset Before PCA','*'*11) 44 | print('*'*11,' Classifier : K-NEAREST NEIGHBOUR ','*'*11) 45 | for K in range(25): 46 | K_value = K+1 47 | neigh = KNeighborsClassifier(n_neighbors = K_value, weights='uniform', algorithm='auto') 48 | neigh.fit(X_train, y_train) 49 | y_pred = neigh.predict(X_test) 50 | print ("Accuracy is :%1.10f"%(metrics.accuracy_score(y_test,y_pred)*100),"% ","for K-Value: %4d"%(K_value)) 51 | 52 | -------------------------------------------------------------------------------- /Indian_pines_knnc_after_pca.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import pandas as pd 4 | 5 | 6 | from sklearn import decomposition 7 | from sklearn import datasets 8 | 9 | # load dataset into Pandas DataFrame 10 | df = pd.read_csv("D:\Python_programs\ML\indian_pines_after_pca.csv") 11 | 12 | from sklearn.preprocessing import StandardScaler 13 | n=[] 14 | ind=[] 15 | for i in range(2): 16 | ind.append('PC-'+str(i+1)) 17 | 18 | features = ind 19 | X = df.loc[:, features].values 20 | # Separating out the target 21 | Y = df.loc[:,['target']].values 22 | from sklearn.model_selection import train_test_split 23 | 24 | X_train, X_test, y_train, y_test = train_test_split( 25 | X, Y, test_size = 0.3, random_state = 100) 26 | y_train=y_train.ravel() 27 | y_test=y_test.ravel() 28 | #classifier.fit(X_train, y_train.squeeze()) 29 | 30 | from sklearn.neighbors import KNeighborsClassifier # FOR K=13 ,IT HAS ACCURACY AROUND 72.7488902980 31 | from sklearn import metrics 32 | import time 33 | #model = KNeighborsClassifier() 34 | model=KNeighborsClassifier(n_neighbors =13, weights='uniform', algorithm='auto') 35 | model.fit(X_train, y_train) 36 | start = time.time() 37 | Yhat = model.predict(X_test) 38 | end = time.time() 39 | print('Time Taken For Classification is :',(end - start)) 40 | print("Accuracy :",metrics.accuracy_score(Yhat, y_test)*100) 41 | print('\n','*'*11,'Accuracy of INDIAN-PINES Dataset After PCA','*'*11) 42 | print('*'*11,' Classifier : K-NEAREST NEIGHBOUR ','*'*11) 43 | for K in range(25): 44 | K_value = K+1 45 | neigh = KNeighborsClassifier(n_neighbors = K_value, weights='uniform', algorithm='auto') 46 | neigh.fit(X_train, y_train) 47 | #start = time.time() 48 | y_pred = neigh.predict(X_test) 49 | #end = time.time() 50 | #print('Time Taken For Classification is :',(end - start)) 51 | print ("Accuracy is :%1.10f"%(metrics.accuracy_score(y_test,y_pred)*100),"% ","for K-Value: %4d"%(K_value)) 52 | 53 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Syam kakarla 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /PaviaUniversity_HSI/Pavia_University.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "import matplotlib.pyplot as plt\n", 11 | "import pandas as pd\n", 12 | "from sklearn import decomposition\n", 13 | "from sklearn.preprocessing import MinMaxScaler\n", 14 | "from sklearn.decomposition import PCA\n", 15 | "import seaborn as sns" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 4, 21 | "metadata": {}, 22 | "outputs": [ 23 | { 24 | "data": { 25 | "text/html": [ 26 | "
\n", 27 | "\n", 40 | "\n", 41 | " \n", 42 | " \n", 43 | " \n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | "
pix1pix2pix3pix4pix5pix6pix7pix8pix9pix10...pix95pix96pix97pix98pix99pix100pix101pix102pix103class
0255255255255255255255255255255...2552552552552552552552552550
1255255255255255255255255255255...2552552552552552552552552550
2255255255255255255255255255255...2552552552552552552552552550
3255255255255255255178198193224...2552552552552552552552552550
4255255255255255255255208231255...2552552552552552552552552550
\n", 190 | "

5 rows × 104 columns

\n", 191 | "
" 192 | ], 193 | "text/plain": [ 194 | " pix1 pix2 pix3 pix4 pix5 pix6 pix7 pix8 pix9 pix10 ... pix95 \\\n", 195 | "0 255 255 255 255 255 255 255 255 255 255 ... 255 \n", 196 | "1 255 255 255 255 255 255 255 255 255 255 ... 255 \n", 197 | "2 255 255 255 255 255 255 255 255 255 255 ... 255 \n", 198 | "3 255 255 255 255 255 255 178 198 193 224 ... 255 \n", 199 | "4 255 255 255 255 255 255 255 208 231 255 ... 255 \n", 200 | "\n", 201 | " pix96 pix97 pix98 pix99 pix100 pix101 pix102 pix103 class \n", 202 | "0 255 255 255 255 255 255 255 255 0 \n", 203 | "1 255 255 255 255 255 255 255 255 0 \n", 204 | "2 255 255 255 255 255 255 255 255 0 \n", 205 | "3 255 255 255 255 255 255 255 255 0 \n", 206 | "4 255 255 255 255 255 255 255 255 0 \n", 207 | "\n", 208 | "[5 rows x 104 columns]" 209 | ] 210 | }, 211 | "execution_count": 4, 212 | "metadata": {}, 213 | "output_type": "execute_result" 214 | } 215 | ], 216 | "source": [ 217 | "df = pd.read_csv(\"paviaU_CompleteData.csv\")\n", 218 | "df.head()" 219 | ] 220 | }, 221 | { 222 | "cell_type": "code", 223 | "execution_count": 5, 224 | "metadata": {}, 225 | "outputs": [], 226 | "source": [ 227 | "X = df.loc[:, ['pix'+str(i) for i in range(1,104)]].values\n", 228 | "# Separating out the target\n", 229 | "y = df.loc[:,['class']].values" 230 | ] 231 | }, 232 | { 233 | "cell_type": "code", 234 | "execution_count": 6, 235 | "metadata": {}, 236 | "outputs": [], 237 | "source": [ 238 | "scaler_model = MinMaxScaler()\n", 239 | "scaler_model.fit(X.astype(float))\n", 240 | "x=scaler_model.transform(X)" 241 | ] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "execution_count": 7, 246 | "metadata": {}, 247 | "outputs": [], 248 | "source": [ 249 | "from sklearn.neighbors import KNeighborsClassifier \n", 250 | "from sklearn import metrics\n", 251 | "def perform_knnc(X_train, X_test, y_train, y_test, w = 'uniform'):\n", 252 | " # try K=1 through K=40 and record testing accuracy\n", 253 | " k_range = range(1, 41)\n", 254 | "\n", 255 | " # We can create Python list using [] or llist()\n", 256 | " scores = []\n", 257 | "\n", 258 | " # We use a loop through the range 1 to 40\n", 259 | " # We append the scores in a list\n", 260 | " for k in k_range:\n", 261 | " knn = KNeighborsClassifier(n_neighbors=k, weights=w, algorithm='auto')\n", 262 | " knn.fit(X_train, y_train)\n", 263 | " y_pred = knn.predict(X_test)\n", 264 | " scores.append(metrics.accuracy_score(y_test, y_pred))\n", 265 | " scores=[i*100 for i in scores]\n", 266 | " return scores\n", 267 | "\n", 268 | "def plot_acc(scores):\n", 269 | " plt.figure(figsize=(12, 6))\n", 270 | " plt.plot(scores, color='blue', linestyle='dashed', marker='o', \n", 271 | " markerfacecolor='#ff6347', markersize=10)\n", 272 | " plt.title('Accuracy scores for K-values(1-40)') \n", 273 | " plt.xlabel('K Value')\n", 274 | " plt.xticks=[i for i in range(1, 41)]\n", 275 | " plt.ylabel('Accuracy')\n", 276 | " plt.show()" 277 | ] 278 | }, 279 | { 280 | "cell_type": "markdown", 281 | "metadata": {}, 282 | "source": [ 283 | "## PCA" 284 | ] 285 | }, 286 | { 287 | "cell_type": "code", 288 | "execution_count": 8, 289 | "metadata": {}, 290 | "outputs": [ 291 | { 292 | "data": { 293 | "text/plain": [ 294 | "0.9996941215372042" 295 | ] 296 | }, 297 | "execution_count": 8, 298 | "metadata": {}, 299 | "output_type": "execute_result" 300 | } 301 | ], 302 | "source": [ 303 | "from sklearn.model_selection import train_test_split\n", 304 | "\n", 305 | "pca = PCA(n_components=70)\n", 306 | "principalComponents = pca.fit_transform(X)\n", 307 | "sum(pca.explained_variance_ratio_)" 308 | ] 309 | }, 310 | { 311 | "cell_type": "code", 312 | "execution_count": 10, 313 | "metadata": {}, 314 | "outputs": [], 315 | "source": [ 316 | "principalDf = pd.DataFrame(data = principalComponents, columns = ['PC-'+str(i) for i in range(1, 71)])\n", 317 | "# Adding lables\n", 318 | "finalDf = pd.concat([principalDf, df[['class']]], axis = 1)\n", 319 | "finalDf.to_csv('PaviaU_pca_70.csv')" 320 | ] 321 | }, 322 | { 323 | "cell_type": "code", 324 | "execution_count": null, 325 | "metadata": {}, 326 | "outputs": [], 327 | "source": [ 328 | "X_train, X_test, y_train, y_test = train_test_split(principalComponents, y.ravel(), test_size = 0.3, random_state = 100)\n", 329 | "acc_scores=[]\n", 330 | "acc_scores = perform_knnc(X_train, X_test, y_train, y_test, w = 'uniform')\n", 331 | "print('Maximum Accuracy:',max(acc_scores),' at k-value',acc_scores.index(max(acc_scores))+1)" 332 | ] 333 | }, 334 | { 335 | "cell_type": "code", 336 | "execution_count": null, 337 | "metadata": {}, 338 | "outputs": [], 339 | "source": [ 340 | "acc_scores = perform_knnc(X_train, X_test, y_train, y_test, w = 'distance')\n", 341 | "print('Maximum Accuracy:',max(acc_scores),' at k-value',acc_scores.index(max(acc_scores))+1)\n", 342 | "for i in range(len(acc_scores)):\n", 343 | " print(\"Accuracy : %2.10f at k-value %2d\"%(acc_scores[i], i+1))" 344 | ] 345 | }, 346 | { 347 | "cell_type": "code", 348 | "execution_count": null, 349 | "metadata": {}, 350 | "outputs": [], 351 | "source": [ 352 | "print(' Accuracy:',acc_scores[6],' at k-value',7)" 353 | ] 354 | }, 355 | { 356 | "cell_type": "markdown", 357 | "metadata": {}, 358 | "source": [ 359 | "## Accuracies:\n", 360 | "## PCA + KNNC = 86.78804855275443\n", 361 | "## PCA + WKNNC = 87.58169934640523" 362 | ] 363 | }, 364 | { 365 | "cell_type": "markdown", 366 | "metadata": {}, 367 | "source": [ 368 | "### -------------------------------------------------------------------------------------------------------------------------------------------------------" 369 | ] 370 | }, 371 | { 372 | "cell_type": "markdown", 373 | "metadata": {}, 374 | "source": [ 375 | "# KPCA" 376 | ] 377 | }, 378 | { 379 | "cell_type": "code", 380 | "execution_count": 43, 381 | "metadata": {}, 382 | "outputs": [], 383 | "source": [ 384 | "from sklearn.model_selection import train_test_split\n", 385 | "from sklearn.decomposition import KernelPCA\n", 386 | "\n", 387 | "kpca = kpca = KernelPCA(n_components=70, kernel='poly')\n", 388 | "principalComponents = kpca.fit_transform(X)\n", 389 | "# Adding lables\n", 390 | "principalDf = pd.DataFrame(data = principalComponents, columns = ['PC-'+str(i) for i in range(1, 71)])\n", 391 | "# Adding lables\n", 392 | "finalDf = pd.concat([principalDf, df[['class']]], axis = 1)\n", 393 | "finalDf.to_csv('salinas_kpca_70.csv')\n" 394 | ] 395 | }, 396 | { 397 | "cell_type": "code", 398 | "execution_count": 44, 399 | "metadata": {}, 400 | "outputs": [ 401 | { 402 | "name": "stdout", 403 | "output_type": "stream", 404 | "text": [ 405 | "Maximum Accuracy: 85.80765639589168 at k-value 9\n", 406 | "Maximum Accuracy: 85.52754435107376 at k-value 7\n" 407 | ] 408 | } 409 | ], 410 | "source": [ 411 | "X_train, X_test, y_train, y_test = train_test_split(principalComponents, y.ravel(), test_size = 0.3, random_state = 100)\n", 412 | "acc_scores=[]\n", 413 | "acc_scores = perform_knnc(X_train, X_test, y_train, y_test, w = 'uniform')\n", 414 | "print('Maximum Accuracy:',max(acc_scores),' at k-value',acc_scores.index(max(acc_scores))+1)\n", 415 | "print('Maximum Accuracy:',acc_scores[6],' at k-value',7)" 416 | ] 417 | }, 418 | { 419 | "cell_type": "code", 420 | "execution_count": 45, 421 | "metadata": {}, 422 | "outputs": [ 423 | { 424 | "name": "stdout", 425 | "output_type": "stream", 426 | "text": [ 427 | "Maximum Accuracy: 87.06816059757236 at k-value 6\n", 428 | "Maximum Accuracy: 86.60130718954248 at k-value 7\n" 429 | ] 430 | } 431 | ], 432 | "source": [ 433 | "acc_scores = perform_knnc(X_train, X_test, y_train, y_test, w = 'distance')\n", 434 | "print('Maximum Accuracy:',max(acc_scores),' at k-value',acc_scores.index(max(acc_scores))+1)\n", 435 | "print('Maximum Accuracy:',acc_scores[6],' at k-value',7)" 436 | ] 437 | }, 438 | { 439 | "cell_type": "markdown", 440 | "metadata": {}, 441 | "source": [ 442 | "## Accuracies:\n", 443 | "## PCA + KNNC = 86.78804855275443\n", 444 | "## PCA + WKNNC = 87.58169934640523\n", 445 | "## KPCA + KNNC = 85.52754435107376\n", 446 | "## KPCA + WKNNC = 86.60130718954248" 447 | ] 448 | }, 449 | { 450 | "cell_type": "code", 451 | "execution_count": 49, 452 | "metadata": {}, 453 | "outputs": [], 454 | "source": [ 455 | "def plot_gt(X, Y, k_value, w='uniform', q='plot'):\n", 456 | " neigh = KNeighborsClassifier(n_neighbors = k_value, weights= w, algorithm='auto')\n", 457 | " neigh.fit(X, Y.ravel())\n", 458 | " pred = neigh.predict(X)\n", 459 | " gt = pred\n", 460 | " gt.resize((86, 83))\n", 461 | " fig = plt.figure(figsize=(6, 6))\n", 462 | " plt.title(\"Clasification Map(salinas): \"+q)\n", 463 | " plt.imshow(gt, cmap='jet')\n", 464 | " q = q+'.png'\n", 465 | " fig.savefig(q, dpi=fig.dpi, bbox_inches=\"tight\")\n", 466 | " plt.colorbar()\n", 467 | " plt.show()" 468 | ] 469 | }, 470 | { 471 | "cell_type": "code", 472 | "execution_count": 51, 473 | "metadata": {}, 474 | "outputs": [ 475 | { 476 | "data": { 477 | "image/png": "\n", 478 | "text/plain": [ 479 | "
" 480 | ] 481 | }, 482 | "metadata": { 483 | "needs_background": "light" 484 | }, 485 | "output_type": "display_data" 486 | } 487 | ], 488 | "source": [ 489 | "df = pd.read_csv(\"salinas_pca_70.csv\")\n", 490 | "X = df.loc[:, ['PC-'+str(i) for i in range(1,71)]].values\n", 491 | "# Separating out the target\n", 492 | "Y = df.loc[:,['class']].values\n", 493 | "plot_gt(X, Y, k_value=7, w='uniform', q='PCA + KNNC')" 494 | ] 495 | }, 496 | { 497 | "cell_type": "code", 498 | "execution_count": 52, 499 | "metadata": {}, 500 | "outputs": [ 501 | { 502 | "data": { 503 | "image/png": "\n", 504 | "text/plain": [ 505 | "
" 506 | ] 507 | }, 508 | "metadata": { 509 | "needs_background": "light" 510 | }, 511 | "output_type": "display_data" 512 | } 513 | ], 514 | "source": [ 515 | "df = pd.read_csv(\"salinas_pca_70.csv\")\n", 516 | "X = df.loc[:, ['PC-'+str(i) for i in range(1,71)]].values\n", 517 | "# Separating out the target\n", 518 | "Y = df.loc[:,['class']].values\n", 519 | "plot_gt(X, Y, k_value=7, w='distance', q='PCA + WKNNC')" 520 | ] 521 | }, 522 | { 523 | "cell_type": "code", 524 | "execution_count": 54, 525 | "metadata": {}, 526 | "outputs": [ 527 | { 528 | "data": { 529 | "image/png": "\n", 530 | "text/plain": [ 531 | "
" 532 | ] 533 | }, 534 | "metadata": { 535 | "needs_background": "light" 536 | }, 537 | "output_type": "display_data" 538 | } 539 | ], 540 | "source": [ 541 | "df = pd.read_csv(\"salinas_kpca_70.csv\")\n", 542 | "X = df.loc[:, ['PC-'+str(i) for i in range(1,71)]].values\n", 543 | "# Separating out the target\n", 544 | "Y = df.loc[:,['class']].values\n", 545 | "plot_gt(X, Y, k_value=7, q='kPCA + KNNC')" 546 | ] 547 | }, 548 | { 549 | "cell_type": "code", 550 | "execution_count": 55, 551 | "metadata": {}, 552 | "outputs": [ 553 | { 554 | "data": { 555 | "image/png": "\n", 556 | "text/plain": [ 557 | "
" 558 | ] 559 | }, 560 | "metadata": { 561 | "needs_background": "light" 562 | }, 563 | "output_type": "display_data" 564 | } 565 | ], 566 | "source": [ 567 | "df = pd.read_csv(\"salinas_kpca_70.csv\")\n", 568 | "X = df.loc[:, ['PC-'+str(i) for i in range(1,71)]].values\n", 569 | "# Separating out the target\n", 570 | "Y = df.loc[:,['class']].values\n", 571 | "plot_gt(X, Y, k_value=7, w='distance', q='KPCA + WKNNC')" 572 | ] 573 | }, 574 | { 575 | "cell_type": "code", 576 | "execution_count": null, 577 | "metadata": {}, 578 | "outputs": [], 579 | "source": [] 580 | } 581 | ], 582 | "metadata": { 583 | "kernelspec": { 584 | "display_name": "Python 3", 585 | "language": "python", 586 | "name": "python3" 587 | }, 588 | "language_info": { 589 | "codemirror_mode": { 590 | "name": "ipython", 591 | "version": 3 592 | }, 593 | "file_extension": ".py", 594 | "mimetype": "text/x-python", 595 | "name": "python", 596 | "nbconvert_exporter": "python", 597 | "pygments_lexer": "ipython3", 598 | "version": "3.6.7" 599 | } 600 | }, 601 | "nbformat": 4, 602 | "nbformat_minor": 2 603 | } 604 | -------------------------------------------------------------------------------- /PaviaUniversity_HSI/README.md: -------------------------------------------------------------------------------- 1 | #### This notebook describes dimensionality reduction techiques PCA and KPCA on Pavia University dataset usign python. 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Dimensionality reduction and classification on [Hyperspectral Image](http://www.ehu.eus/ccwintco/index.php/Hyperspectral_Remote_Sensing_Scenes) Using Python 2 | 3 | ## Authors 4 | 5 | * [**DR.T.Hitendra Sarma**](https://scholar.google.co.in/citations?user=8Frh6IQAAAAJ&hl=en) 6 | * [**Syam Kakarla**](https://www.linkedin.com/in/syam-kakarla/) 7 | 8 | ### Prerequisites 9 | 10 | The prerequisites to better understand the code and concept are: 11 | ``` 12 | * Python 13 | * MatLab 14 | * Linear Algebra 15 | ``` 16 | 17 | ### Installation 18 | 19 | * This project is fully based on python. So, the necessary modules needed for computaion are: 20 | ``` 21 | * Numpy 22 | * Sklearn 23 | * Matplotlib 24 | * Pandas 25 | ``` 26 | * The commands needed for installing the above modules on windows platfom are: 27 | ```python 28 | 29 | pip install numpy 30 | pip install sklearn 31 | pip install matplotlib 32 | pip install pandas 33 | ``` 34 | * we can verify the installation of modules by importing the modules. For example: 35 | ```python 36 | 37 | import numpy 38 | from sklearn.decomposition import PCA 39 | import matplotlib.pyplot as plt 40 | import pandas as pd 41 | ``` 42 | ### Results 43 | 44 | * Here we are performing the the **dimensionality reduction** on one of the widely used **hyperspectral image** [Indian Pines](http://www.ehu.eus/ccwintco/index.php/Hyperspectral_Remote_Sensing_Scenes) 45 | 46 | 1. The result of the [indian_pines_pca.py]( 47 | https://github.com/syamkakarla98/Dimensionality-reduction-and-classification-on-Hyperspectral-Images-Using-Python/blob/master/indian_pines_after_pca.csv) is shown below: 48 | 49 | * It initial result is a bargraph for the first **10 Pricipal Components according** to their _variance ratio's_ : 50 | 51 | ![indian_pines_varianve_ratio](https://user-images.githubusercontent.com/36328597/41495831-56fff622-714e-11e8-87ab-731c11d14bab.JPG) 52 | 53 | Since, the initial two principal COmponents have high variance. so, we will select the initial two PC'S. 54 | 55 | * It second result is a scatter plot for the first **10 Pricipal Components** is : 56 | 57 | ![indian_pines_after_pca_with_2pc](https://user-images.githubusercontent.com/36328597/41495958-603d0baa-7151-11e8-9c7c-c7452b2fb6a8.JPG) 58 | 59 | 60 | * The above program resullts a dimensionally reduced [csvfile]( 61 | https://github.com/syamkakarla98/Dimensionality-reduction-and-classification-on-Hyperspectral-Images-Using-Python/blob/master/indian_pines_after_pca.csv) . 62 | 63 | 2. The result of the [indian_pines_knnc.py](https://github.com/syamkakarla98/Dimensionality-reduction-and-classification-on-Hyperspectral-Images-Using-Python/blob/master/Indian_pines_knnc.py) is given below: 64 | 65 | * The above program will classify the Indian Pines dataset before **Principal Component Analysis(PCA)**. The classifier here used for classification is [K-Nearest Neighbour Classifier (KNNC)](http://scikitlearn.org/stable/auto_examples/neighbors/plot_classification.html) 66 | * The time taken for classification is: 67 | 68 | ![indian_pines_classification_before_pca](https://user-images.githubusercontent.com/36328597/41496231-d2ddac0e-7157-11e8-9c14-29e89685569c.JPG) 69 | 70 | * Then the classification accuracy of indian pines dataset before **PCA** is: 71 | 72 | ![indian_pines_accuracy_before_pca](https://user-images.githubusercontent.com/36328597/41495844-97a3e31e-714e-11e8-8d63-4d786317b239.JPG) 73 | 74 | 3. The result of the [indian_pines_knnc_after_pca.py]( 75 | https://github.com/syamkakarla98/Dimensionality-reduction-and-classification-on-Hyperspectral-Images-Using-Python/blob/master/Indian_pines_knnc_after_pca.py) 76 | 77 | * Then the resultant classification accuracy of indian pines dataset after **PCA** is: 78 | 79 | ![indian_pines_accuracy_after_pca](https://user-images.githubusercontent.com/36328597/41495843-9753df04-714e-11e8-9540-0968bdb27a7f.JPG) 80 | 81 | ### Conclusion : 82 | 83 | * By performing **PCA** on the corrected indian pines dataset results **100 Principal Components(PC'S)**. 84 | * since, the initial two Principal Components(PC'S) has **92.01839071674918** variance ratio. we selected two only. 85 | * Initially the dataset contains the dimensions **21025 X 200** is drastically reduced to **21025 X 2** dimensions. 86 | * The time taken for classification before and after Principal Component Analysis(PCA) is: 87 | 88 | | Dataset | Accuracy | Time Taken | 89 | | ------------- |:-----------: | ----------:| 90 | | Before PCA | 72.748890 | 17.6010 | 91 | | After PCA | 60.098187 | 0.17700982 | 92 | 93 | * Hence, the **time** has been reduced with a lot of difference and the **classification accuracy(C.A)** also reduced but the C.A can increased little bit by varying the 'k' value. 94 | 95 | ## License 96 | 97 | This project is licensed under the MIT License - see the [LICENSE.md](https://github.com/syamkakarla98/Dimensionality-reduction-and-classification-on-Hyperspectral-Images-Using-Python/blob/master/LICENSE.md) file for details 98 | -------------------------------------------------------------------------------- /assets/Indian_pines_accuracy_after_pca.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syamkakarla98/Dimensionality-reduction-and-classification-on-Hyperspectral-Images-Using-Python/ff81981af4a401d5b55d9946dec20dc73585fe7f/assets/Indian_pines_accuracy_after_pca.JPG -------------------------------------------------------------------------------- /assets/Indian_pines_accuracy_before_pca.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syamkakarla98/Dimensionality-reduction-and-classification-on-Hyperspectral-Images-Using-Python/ff81981af4a401d5b55d9946dec20dc73585fe7f/assets/Indian_pines_accuracy_before_pca.JPG -------------------------------------------------------------------------------- /assets/Indian_pines_classification_before_pca.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syamkakarla98/Dimensionality-reduction-and-classification-on-Hyperspectral-Images-Using-Python/ff81981af4a401d5b55d9946dec20dc73585fe7f/assets/Indian_pines_classification_before_pca.JPG -------------------------------------------------------------------------------- /assets/Indian_pines_varianve_ratio.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syamkakarla98/Dimensionality-reduction-and-classification-on-Hyperspectral-Images-Using-Python/ff81981af4a401d5b55d9946dec20dc73585fe7f/assets/Indian_pines_varianve_ratio.JPG -------------------------------------------------------------------------------- /assets/indian_pines_after_pca_with_2PC.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syamkakarla98/Dimensionality-reduction-and-classification-on-Hyperspectral-Images-Using-Python/ff81981af4a401d5b55d9946dec20dc73585fe7f/assets/indian_pines_after_pca_with_2PC.JPG -------------------------------------------------------------------------------- /indian_pines_pca.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import pandas as pd 4 | 5 | 6 | from sklearn import decomposition 7 | from sklearn import datasets 8 | 9 | # load dataset into Pandas DataFrame 10 | df = pd.read_csv("D:\Python_programs\ML\Complete_Data_.csv") 11 | 12 | from sklearn.preprocessing import StandardScaler 13 | n=[] 14 | ind=[] 15 | for i in range(200): 16 | n.append(i+1) 17 | for i in range(200): 18 | ind.append('px'+str(n[i])) 19 | 20 | features = ind 21 | x = df.loc[:, features].values 22 | # Separating out the target 23 | y = df.loc[:,['target']].values 24 | # Standardizing the features 25 | from sklearn.preprocessing import MinMaxScaler 26 | scaler_model = MinMaxScaler() 27 | scaler_model.fit(x.astype(float)) 28 | x=scaler_model.transform(x) 29 | 30 | 31 | from sklearn.decomposition import PCA 32 | 33 | 34 | ## Finding the principle components 35 | pca = PCA(n_components=10) 36 | principalComponents = pca.fit_transform(x) 37 | ev=pca.explained_variance_ratio_ 38 | 39 | # *Since the initial 2 principal components have high variance. 40 | # so, we select pc-1 and pc-2. 41 | #--------------------------------------------------- 42 | pca = PCA(n_components=2) 43 | principalComponents = pca.fit_transform(x) 44 | principalDf = pd.DataFrame(data = principalComponents 45 | , columns = ['PC-1','PC-2']) 46 | # Adding lables 47 | finalDf = pd.concat([principalDf, df[['target']]], axis = 1) 48 | 49 | #--------- Bar Graph for Explained Variance Ratio ------------ 50 | plt.bar([1,2,3,4,5,6,7,8,9,10],list(ev*100),label='Principal Components',color='b') 51 | plt.legend() 52 | plt.xlabel('Principal Components') 53 | pc=[] 54 | for i in range(10): 55 | pc.append('PC'+str(i+1)) 56 | #plt.xticks([1,2,3,4,5,6,7,8,9,10],pc, fontsize=8, rotation=30) 57 | plt.xticks([1,2,3,4,5,6,7,8,9,10],pc, fontsize=8, rotation=30) 58 | plt.ylabel('Variance Ratio') 59 | plt.title('Variance Ratio of INDIAN PINES Dataset') 60 | plt.show() 61 | 62 | 63 | #--------------------------------------------------- 64 | # Plotting pc1 & pc2 65 | fig = plt.figure(figsize = (8,8)) 66 | ax = fig.add_subplot(1,1,1) 67 | ax.set_xlabel('PC-1', fontsize = 15) 68 | ax.set_ylabel('PC-2', fontsize = 15) 69 | ax.set_title('PCA on INDIAN PINES Dataset', fontsize = 20) 70 | targets = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16] 71 | colors = ['r','g','b','y','m','c','k','r','g','b','y','m','c','k','b','r'] 72 | for target, color in zip(targets,colors): 73 | indicesToKeep = finalDf['target'] == target 74 | ax.scatter(finalDf.loc[indicesToKeep, 'PC-1'] 75 | , finalDf.loc[indicesToKeep, 'PC-2'] 76 | , c = color 77 | , s = 9) 78 | ax.legend(targets) 79 | ax.grid() 80 | plt.show() # FOR SHOWING THE PLOT 81 | 82 | #-------------------SENDING REDUCED DATA INTO CSV FILE------------ 83 | 84 | finalDf.to_csv('indian_pines_after_pca.dat') 85 | 86 | --------------------------------------------------------------------------------