├── PySIC
    ├── __init__.py
    ├── Data_Preparation.py
    ├── Data.py
    └── Training.py
├── MANIFEST.in
├── CHANGELOG.txt
├── LICENCE.txt
├── setup.py
└── README.md


/PySIC/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | global-include *.txt *.py *.md


--------------------------------------------------------------------------------
/CHANGELOG.txt:
--------------------------------------------------------------------------------
1 | Change Log
2 | ==========
3 | 
4 | 1.0.0 (31/12/2020)
5 | ------------------
6 | - First Release


--------------------------------------------------------------------------------
/LICENCE.txt:
--------------------------------------------------------------------------------
1 | Copyright 2020 Hejar Shahabi
2 | 
3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4 | 
5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6 | 
7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
8 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | with open("README.md", "r", encoding="utf-8") as readme:
 4 |     long_description = readme.read()
 5 | classifiers = [
 6 |     'Development Status :: 5 - Production/Stable',
 7 |     'Intended Audience :: Science/Research',
 8 |     'Operating System :: Microsoft :: Windows :: Windows 10',
 9 |     'License :: OSI Approved :: MIT License',
10 |     'Programming Language :: Python :: 3'
11 | ]
12 | 
13 | setup(
14 |     name='PySIC',
15 |     version='1.1',
16 |     description='A package for satellite image classification using machine learning models',
17 |     long_description=long_description,
18 |     url='https://github.com/Hejarshahabi',
19 |     author='Hejar Shahabi',
20 |     author_email='hejarshahabi@gmail.com',
21 |     license='MIT',
22 |     classifiers=classifiers,
23 |     keywords='Machine Learning, Remote Sensing ',
24 |     long_description_content_type='text/markdown',
25 |     packages=find_packages( ),
26 |     install_requires=['numpy','pandas','matplotlib','sklearn','gdal','imbalanced-learn']
27 | )
28 | 


--------------------------------------------------------------------------------
/PySIC/Data_Preparation.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | from imblearn.over_sampling import SMOTE
 4 | from imblearn.under_sampling import NearMiss
 5 | from sklearn.preprocessing import MinMaxScaler,  StandardScaler
 6 | from PySIC.Data import Plotting
 7 | import matplotlib.pyplot as plt
 8 | import pandas as pd
 9 | class Preprocessing:
10 |     def __init__(self,X_Train, Y_Train):
11 |         self.X_train = X_Train
12 |         self.Y_train = Y_Train
13 |     def Scaling(self, scaling=None):
14 |         self.scaling = scaling
15 |         if self.scaling == None or self.scaling == "minmax":
16 |             scale=MinMaxScaler()
17 |             self.Xtrain= scale.fit_transform(self.X_train)
18 |         elif self.scaling == "Standard":
19 |             scale= StandardScaler()
20 |             self.Xtrain=scale.fit_transform(self.X_train)
21 |     def Balancing(self, method=None):
22 |         self.method=method
23 |         if self.method == None or self.method == "nearmiss":
24 |             balance= NearMiss()
25 |             self.Xtrainb, self.Ytrainb =balance.fit_resample(self.Xtrain, self.Y_train)
26 |         elif self.method== "smote":
27 |             balance = SMOTE()
28 |             self.Xtrainb, self.Ytrainb = balance.fit_resample ( self.Xtrain, self.Y_train )
29 |         self.Xtrainb, self.Ytrainb
30 |     def get_balanced_data(self):
31 |         return self.Xtrainb, self.Ytrainb
32 |     def plot(self, figsize=None, hist_bins=None, MarkerSize= None, Marker =None ):
33 |         self.figsize = figsize
34 |         self.MarkerSize = MarkerSize
35 |         self.Marker = Marker
36 |         self.hist_bins = hist_bins
37 |         Plotting(self.Xtrainb, self.Ytrainb, self.figsize,self.hist_bins, self.MarkerSize, self.Marker).plot()
38 |     def get_balanced_samples(self):
39 |         self.classes, self.samples = np.unique ( self.Ytrainb, return_counts=True )
40 |         cat = pd.DataFrame ( {"Class ID" : self.classes, 'Samples' : self.samples} )
41 |         cat.reset_index ( drop=True )
42 |         cat.style.hide_index ( )
43 |         print ( "ID of each class, and their samples (pixels)" )
44 |         print ( cat )
45 |         plt.bar ( self.classes, self.samples, align="center", alpha=.7 )
46 |         plt.xticks ( self.classes )
47 |         plt.xlabel ( "Class ID" )
48 |         plt.ylabel ( "Number of Samples" )
49 |         plt.show ( )
50 | 
51 | 
52 | 
53 | 
54 | 
55 | 
56 | 
57 | 
58 | 
59 | 
60 | 
61 | 
62 | 
63 | 
64 | 
65 | 
66 | 
67 | 


--------------------------------------------------------------------------------
/PySIC/Data.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | import gdal
 4 | import matplotlib.pyplot as plt
 5 | import os
 6 | class InputData:
 7 |     def __init__(self,features, label):
 8 |         self.data= gdal.Open(features)
 9 |         self.data=self.data.ReadAsArray()
10 |         print("the training dataset has: ")
11 |         print(f'{self.data.shape[0]} Bands, {self.data.shape[1]} Rows, {self.data.shape[2]} Columns')
12 |         print("----------------------------------------------")
13 |         self.inv=gdal.Open(label)
14 |         self.inv=self.inv.ReadAsArray()
15 |         print("the label dataset has: ")
16 |         print(f'{self.inv.shape[0]} Rows, {self.inv.shape[1]} Columns')
17 |         print("----------------------------------------------")
18 |         if self.data.shape[1]!=self.inv.shape[0] or self.data.shape[2]!= self.inv.shape[1]:
19 |             print("the training dataset and label dataset do not have the same shape")
20 |             print ("please reshape the datasets into the same shapes")
21 |             print("----------------------------------------------")
22 |         tempimage= np.zeros((self.data.shape[1]*self.data.shape[2], self.data.shape[0]), dtype="int")
23 |         for i in range(self.data.shape[0]):
24 |             tempimage[:,i]= (self.data[i,:,:]).ravel()
25 |         self.features=tempimage
26 |         print("the shape of new training dataset is: ")
27 |         print(f'{self.features.shape[0]} Rows, {self.features.shape[1]} Column')
28 |         print("----------------------------------------------")
29 |         self.label=self.inv.reshape(self.inv.shape[0]*self.inv.shape[1],1)
30 |         print("the shape of new label dataset is: ")
31 |         print(f'{self.label.shape[0]} Rows, 1 Columns')
32 |         print("----------------------------------------------")
33 | 
34 |         self.stack=np.hstack((self.features, self.label))
35 |         self.stack=self.stack[self.stack[:,(self.stack.shape[-1]-1)]>0]
36 |     def get_train_data(self):
37 |         return(self.stack[:,:-1], self.stack[:,-1])
38 |     def get_Samples(self):
39 |         self.classes, self.samples = np.unique(self.stack[:,-1], return_counts=True)
40 |         cat=pd.DataFrame({"Class ID": self.classes, 'Samples':self.samples})
41 |         cat.reset_index(drop=True)
42 |         cat.style.hide_index()
43 |         print("ID of each class, and their samples (pixels)")
44 |         print (cat)
45 |         plt.bar(self.classes, self.samples, align="center", alpha=.7)
46 |         plt.xticks(self.classes)
47 |         plt.xlabel("Class ID")
48 |         plt.ylabel("Number of Samples")
49 |         plt.show()
50 | class Plotting:
51 |     def __init__(self, features, labels, figsize=None, hist_bins=None, MarkerSize= None, Marker =None ):
52 |         self.feature=features
53 |         self.labels=labels
54 |         self.figsize = figsize
55 |         self.MarkerSize = MarkerSize
56 |         self.Marker = Marker
57 |         self.hist_bins = hist_bins
58 |         self.title = "Histogram of features and the correlation between classes in each band"
59 |     def plot(self):
60 |         clm=[]
61 |         for i in range(self.feature.shape[1]):
62 |             i=i+1
63 |             clm.append("Band "+str(i))
64 |         self.clm=clm
65 |         self.df=pd.DataFrame(self.feature, columns=clm)
66 |         self.plot = pd.plotting.scatter_matrix(self.df, c=self.labels, figsize=self.figsize, s=self.MarkerSize, marker=self.Marker, hist_kwds={"bins":self.hist_bins})
67 |         plt.suptitle(self.title)
68 |         plt.show()
69 |         return
70 | 
71 | 
72 | 
73 | 
74 | 
75 | 
76 | 
77 | 
78 | 
79 | 
80 | 
81 | 
82 | 
83 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | # PySIC
  4 | **PySIC** stands for Python Satellite Imagery Classifier is a powerful
  5 | and easy to implement package for satellite image classification using
  6 | machine learning (ML) model. All available ML models in Scikit-Learn
  7 | library can be used in PySIC as a classifier model, in addition to
  8 | single classifier model, another ensemble ML model called Stacking is
  9 | used to combine multiple ML models together to build a powerful
 10 | classifier model. More information on Stacking model can be found in
 11 | on [Scikit-Learn website](https://scikit-learn.org/stable) and in this [paper](https://www.mdpi.com/1424-8220/19/22/4893) by Shahabi et al, 2019.
 12 | 
 13 | ![enter image description here](https://www.mdpi.com/sensors/sensors-19-04893/article_deploy/html/images/sensors-19-04893-g005-550.jpg)
 14 | 
 15 | **Shahabi, Hejar, et al**. "A semi-automated object-based gully networks detection using different machine learning models: A case study of Bowen Catchment, Queensland, Australia." _Sensors_ 19.22 (2019): 4893.
 16 | 
 17 | For model assessment and validation, Stratified K-Fold is used to
 18 | evaluate the performance of the selected model, also, the training data
 19 | set is divided in two sets for model assessment and model validation.
 20 | The ration of the division is set by user. **PySIC** gets features ( can
 21 | be a stack of satellite images, NDVI index, Slope layer and..) and
 22 | labels as **.tif** files as inputs and its output is a classification map
 23 | with .tif‌ format. In this package, before applying classification, some
 24 | pre-processing steps including data scaling, standardizing and balancing
 25 | are taken to reduce the inconsistency and imbalance among features and
 26 | classes, respectively. The module is built on the libraries such as
 27 | **Numpy**, **Pandas**, **Matplotlib**, **Scikit-Learn**, **imbalanced-learn**, and **GDAL**,
 28 | therefore installing these libraries are required. 
 29 | Please do not use this package for commercial purpose without my explicit permission.
 30 | Researchers/ academicians are welcomed for feedback and technical
 31 | support. 
 32 | # PySIC Requirements:
 33 | 
 34 |  1. **Numpy**
 35 |  2. **Pandas**
 36 |  3. **Scikit-Learn**
 37 |  4. **Imbalanced-learn**
 38 |  5. **GDAL**
 39 | 
 40 |  The version of **GDAL** should matches your python version,using the following [link](https://www.lfd.uci.edu/~gohlke/pythonlibs/) you can download **GDAL** file that matches your python version.  
 41 | To install **GDAL** manually follow these steps: First download it in your local drive, then in your terminal environment type this code:  
 42 | pip install C:\......\GDAL3.x.x‘cpx‘cpx‘win_amd64.whl.
 43 | 
 44 | # A Guide on how to use this package
 45 | 
 46 |     import os
 47 | 
 48 |     os.chdir('your data directory')
 49 | 
 50 | *To load data Data module should be called*
 51 | 
 52 |     from PySIC import Data
 53 | 
 54 | *The first and second arguments are features, and label raster, respectively. It returns information on data bands, rows and columns and then convert them to a 2D matrices. Each column in new training dataset represents an image or feature band, and each row shows a pixel*
 55 | 
 56 |     instance=Data.InputData(data,inventory)
 57 | To access your input features and labels in from of arrays
 58 |    
 59 | 
 60 |      InputFeatures = instance.data
 61 |         InputLabels= instance.inv
 62 | 
 63 | *with this code you can get reshaped training features and labels*
 64 | 
 65 |     features, labels=instance.get_train_data()
 66 | 
 67 | 
 68 | *with this code you can get the labels and the number of samples(pixels)associated with each label or class as well as the graph. your samples might be imbalanced, but in following you'll learn how to balanced them using different methods*
 69 | 
 70 |     instance.get_Samples()
 71 | 
 72 | *this code takes features and labels as inputs to visualize them as below this visualization can help to evaluate the distribution of your classes based on the features Diagonal histograms are features or image bands*
 73 | 
 74 |     plot=Data.Plotting(features,labels,figsize=[10,10], hist_bins=50, MarkerSize=10, Marker=".")
 75 |     plot.plot()
 76 | ## Data Preparation for classification
 77 | 
 78 |     from PySIC import Data_Preparation
 79 | 
 80 | *To preprocess features and labels for classification*  
 81 | 
 82 |     data=Data_Preparation.Preprocessing(features, labels)
 83 | 
 84 | ## Scaling
 85 | S*ince machine learning models (except random forest and decision tree)
 86 | are based on calculating distances, all features should be scaled to the
 87 | same range here to scaling methods called MinMax and StandardScalar,
 88 | which are available in Scikit-Learn package are used the following code
 89 | is used for scaling, and accepts string as input. if you input "minmax"
 90 | it uses MinMax method or if you use standard it uses "StandardScalar" if
 91 | you put nothing it uses the default method which is MinMax method.*  
 92 | 
 93 |     data.Scaling("minmax")
 94 | 
 95 | ## DataBalance
 96 | *To make a balance between(under-sampling/over-sampling) classes and their samples, the following code is used. It gets string input such as"nearmiss" or "smote" to balance data. Default method is "NearMiss" more information on these methods please visit [nearmiss](https://imbalanced-learn.readthedocs.io/en/stable/generated/imblearn.under_sampling.NearMiss.html) and [smote](https://imbalanced-learn.readthedocs.io/en/stable/generated/imblearn.over_sampling.SMOTE.html) documentation webpage.
 97 | 
 98 |     data.Balancing()
 99 | 
100 | *The following code shows the balanced samples based on the method you applied previously.*  
101 | 
102 |     data.get_balanced_samples()
103 | 
104 | *Using the following code scaled and balanced training data can be returend.*  
105 | 
106 |     X, Y = data.get_balanced_data()
107 | 
108 | *This code plots balanced features and labels.*  
109 | 
110 |     data.plot(figsize=[10,10], hist_bins=50, MarkerSize= 10, Marker=".")
111 | ## Model Training 
112 | *In this section to approaches are used for image classification.
113 | First method is using single classifier and second method is stacking mutiple classfier together as one powerful model.*
114 | 
115 |  ## USing Single Classifier
116 | *Inputs are **X** and **Y** that we got from last code.  
117 | the third argument (**0.3**) is the ratio between training data and validation data*  
118 | 
119 |     from PySIC import Training
120 |     single=Training.Single_Classifier(X,Y,0.3)
121 | 
122 | *Here you have to introduce your own model wit its parameters for example:*
123 | 
124 |     mymodel= MLPClassifier(hidden_layer_sizes=200,activation='relu',max_iter=500 )   
125 | *Using this code you add your defined model.*
126 | 
127 |     single.Add_Model(mymodel)
128 | 
129 | *To evaluate model performance "StratifiedKFold" for cross validation. the following code gets an integer as splits or folds to split data for model assessment.*  
130 | 
131 |     single.Model_assessment(Folds=5)
132 | 
133 | *Using following code, model accuracy in different fold as well as the mean accuracy are shown the accuracy report is stored in current directory.*  
134 | 
135 |     single.get_model_score()
136 | 
137 | **Fitting model**  
138 | 
139 |       single.Model_fitting()
140 | 
141 | *this code validate fitted model on some unseen data(validation data).*
142 | 
143 |     single.Model_validation()
144 | 
145 | *In order to apply the train model on your new data set you can use this code it takes to input frist is your image or features and the second one is the scaling method that was used to scale the training data. 
146 | 
147 |     single.Prediction("test2.tif", "minmax")
148 |     
149 | *This code plot the map*
150 | 
151 |     single.PlotMap()
152 | *To save the classification map in tif format following code can be use.
153 | takes the a name for the map and stores in the current directory.*  
154 | 
155 |     single.ExprotMap("prediction.tif")
156 |  ## Stacking Classifier
157 | 
158 |     from PySIC import Training
159 |     stacking= Training.Multi_Classifier(X,Y,0.3)
160 | 
161 | *using this code base models can be introduced. Here in our example we just use four base models: **RF**, **SVM**, **Dtree** and **MLP**.*  
162 | *default* `stacking.Base_models()` *base models are **RF**, **MLP** and **SVM**.*
163 | 
164 | *the following format should be used in intordducing your own base models:*
165 | 
166 |     baseModel = (("RF",RandomForestClassifier(n_estimators=100)),("SVM", SVC()),("Dtree", DecisionTreeClassifier()),("MLP", MLPClassifier(hidden_layer_sizes=200)))
167 |   **Adding* base models*
168 | 
169 |     stacking.Base_models(baseModel)
170 | 
171 | *like single classifer, number of folds must be *added* for base models* assessment.  
172 | 
173 |     stacking.Base_model_assessment(Folds=4)
174 | 
175 | *This code returns each base model training accuracy in each fold.*  
176 | 
177 |     stacking.get_accuracy()
178 | 
179 | with this code you should introduce your meta classifier and number for cross validation folds default meta model is `LogesticRegression()` (it is recommended to use the default model) and **CV** is 5 this code can be passed with no inputs. but here we used **MLP** as our meta model.
180 | 
181 |     stacking.Meta_model(MLPClassifier(), CV=4)
182 | 
183 | *To evaluate the meta model performance*
184 | 
185 |     stacking.Meta_model_assessment()
186 | 
187 | **Fitting model**
188 | 
189 |     stacking.Model_fitting()
190 | 
191 | *Model validation*
192 | 
193 |     stacking.Model_validation()
194 | 
195 | *Save validation data as a **.csv** file in current directory*  
196 | 
197 |     stacking.save_validation()
198 | 
199 | **Making predictions.**  
200 | 
201 |     stacking.Prediction("test2.tif", scaling="minmax")
202 | 
203 | *This code plot the map*
204 | 
205 |     stacking.PlotMap()
206 | *To save the classification map in tif format following code can be use.
207 | takes the a name for the map and stores in the current directory.*  
208 | 
209 |     stacking.ExportMap("prediction.tif")
210 | 
211 | 
212 | 
213 | 
214 | 
215 | 
216 | 
217 | 
218 | 
219 | 
220 | 
221 | 
222 | 
223 | 
224 | 
225 | 
226 | 
227 | 
228 | 
229 | 
230 | 
231 | 
232 | 
233 | 
234 | 
235 | 
236 | 
237 | 
238 | 
239 | 
240 | 
241 | 
242 | 
243 | 
244 | 
245 | 
246 | 
247 | 
248 | 
249 | 
250 | 
251 | 
252 | 
253 | 
254 | 
255 | 
256 | 
257 | 
258 | 
259 | 


--------------------------------------------------------------------------------
/PySIC/Training.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from sklearn.ensemble import RandomForestClassifier, StackingClassifier
  3 | from sklearn.svm import SVC
  4 | from sklearn.metrics import ConfusionMatrixDisplay
  5 | from sklearn.neural_network import MLPClassifier
  6 | from sklearn.linear_model import LogisticRegression
  7 | from sklearn.model_selection import StratifiedKFold
  8 | from sklearn.model_selection import train_test_split
  9 | from sklearn.model_selection import cross_val_score
 10 | from sklearn.metrics import confusion_matrix
 11 | import matplotlib.pyplot as plt
 12 | from sklearn.preprocessing import MinMaxScaler,  StandardScaler
 13 | import matplotlib.patches as mpatches
 14 | 
 15 | 
 16 | import gdal
 17 | import pandas as pd
 18 | class Single_Classifier:
 19 |     def __init__(self,features,labels, TVsize):
 20 |         self.features=features
 21 |         self.labels=labels
 22 |         self.TVsize=TVsize
 23 |         self.X_train, self.X_test, self.y_train, self.y_test= train_test_split(self.features, self.labels, test_size=self.TVsize,random_state=None, stratify=self.labels)
 24 |     def Add_Model(self, model=None):
 25 |         self.model = model
 26 |         if self.model == None:
 27 |             self.model = RandomForestClassifier()
 28 |         else:
 29 |             self.model = model
 30 |     def Model_assessment(self, Folds=3):
 31 |         self.Folds=Folds
 32 |         self.CV = StratifiedKFold(n_splits=self.Folds, random_state=None)
 33 |         self.scores = cross_val_score( self.model, self.X_train, self.y_train, cv=self.CV, scoring='accuracy')
 34 |     def get_model_score(self):
 35 |         print ( f' Accuracy of The Model Based on {self.Folds} Fold Cross Validation:' )
 36 |         #print(f'{self.model} >> {self.scores}')
 37 |         frame=np.ones((1,self.Folds+1), dtype=object)
 38 |         frame[0 , 0] = str ( self.model)
 39 |         for i in range (self.Folds):
 40 |             frame[0,i+1]=np.round((self.scores[i]),3)
 41 |         clm=["Model"]
 42 |         for i in range(self.Folds):
 43 |             clm.append("Fold "+str(i+1))
 44 |         self.df=pd.DataFrame(frame, columns=clm)
 45 |         self.df["Mean Accuracy"]=np.round(self.scores.mean(),3)
 46 |         print(self.df)
 47 |         print("----------------------------------------------------------------------------------------------------------------")
 48 |         self.df.to_csv(str(self.model)+"-Training Accuracy.csv")
 49 |         print( f"*** Model Accuracy Scores Are Saved as ({str(self.model)}-Training Accuracy.csv) File in Current Directory ***")
 50 |         print ( "-------------------------------------------------------------------------------------------------------------" )
 51 |         plt.boxplot ( (np.round ( self.scores , 3 )), showmeans=True )
 52 |         plt.xticks([1],[str(self.model)])
 53 |         plt.title(f"The Trained Model Accuracy Based on {self.Folds} Folds Cross Validation")
 54 |         plt.show ( )
 55 |     def Model_fitting(self):
 56 |         self.model.fit(self.X_train, self.y_train)
 57 |     def Model_validation(self):
 58 |         self.Y_predicted= self.model.predict(self.X_test)
 59 |         cm = confusion_matrix (self.y_test ,  self.Y_predicted)
 60 |         disp = ConfusionMatrixDisplay ( confusion_matrix=cm , display_labels=self.model.classes_ )
 61 |         disp.plot()
 62 |         plt.title("Confusion Matrix")
 63 |         plt.show()
 64 |         self.cm= cm.astype(float)/cm.sum(axis=1)[:,np.newaxis]
 65 |         print("Classification Accuracy for Each Class: ")
 66 |         classlist=["Class "+str(i+1) for i in range(len(self.cm.diagonal()))]
 67 |         acc=np.zeros((1,len(self.cm.diagonal())))
 68 |         for i in range (len(self.cm.diagonal())):
 69 |             acc[0,i]=self.cm.diagonal()[i]
 70 |         self.accrep=pd.DataFrame(cm, columns=classlist, index=classlist)
 71 |         self.accrep["Accuracy"]=np.round(self.cm.diagonal(),3)
 72 |         print(self.accrep)
 73 |         print('Overall Classification Accuracy: %0.3f' %(self.cm.diagonal().sum()/len(self.cm.diagonal())))
 74 |     def save_validation(self):
 75 |         if len(self.df.index)==1:
 76 |             self.accrep.to_csv(str(self.model)+"-Confusion Matrix.csv")
 77 |             print("----------------------------------------------------------------------------------------------------------------")
 78 |             print( f"*** Confusion Matrix Scores Are Saved as ({str(self.model)}-Confusion Matrix.csv) File in Current Directory ***")
 79 |         else:
 80 |             self.accrep.to_csv ( "Meta-model Confusion Matrix.csv" )
 81 |             print (f"*** Confusion Matrix Scores Are Saved as (Meta-Model Confusion Matrix.csv) File in Current Directory ***" )
 82 |     def Prediction(self, features, scaling=None):
 83 |         self.scaling=scaling
 84 |         self.new_img= gdal.Open ( features )
 85 |         self.new_image = self.new_img.ReadAsArray ( )
 86 |         print ( "The Dataset to Be Classified Has: " )
 87 |         print ( f'{self.new_image.shape[0]} Bands, {self.new_image.shape[1]} Rows, {self.new_image.shape[2]} Columns' )
 88 |         print ( "----------------------------------------------" )
 89 |         tempimage = np.zeros ( (self.new_image.shape[1] * self.new_image.shape[2] , self.new_image.shape[0]) , dtype="int" )
 90 |         for i in range ( self.new_image.shape[0] ) :
 91 |             tempimage[: , i] = (self.new_image[i , : , :]).ravel ( )
 92 |         self.new_features= tempimage
 93 |         if self.scaling == None or self.scaling == "minmax":
 94 |             scale=MinMaxScaler()
 95 |             self.feature= scale.fit_transform(self.new_features)
 96 |         elif self.scaling == "Standard":
 97 |             scale= StandardScaler()
 98 |             self.feature=scale.fit_transform(self.new_features)
 99 | 
100 |         self.prediction=self.model.predict(self.feature)
101 |         print("Classification Is Done")
102 |         self.map= self.prediction.reshape(self.new_image.shape[1],self.new_image.shape[2])
103 |     def get_ClassificationMap(self):
104 |         return self.map
105 |     def PlotMap(self):
106 |         self.cls, self.nump= np.unique(self.map, return_counts=True)
107 |         self.clslist=[]
108 |         for i in range(len(self.cls)):
109 |             self.clslist.append("Class: "+str(i+1))
110 |         plt.figure(figsize=(10,10))
111 |         img=plt.imshow(self.map, interpolation=None)
112 |         colors = [img.cmap ( img.norm ( cls ) ) for cls in self.cls]
113 |         patches = [mpatches.Patch ( color=colors[i] , label=self.clslist[i] ) for i in range ( len ( self.cls ) )]
114 |         plt.legend ( handles=patches , bbox_to_anchor=(1.07, 1) , loc= 2 , borderaxespad=0., fontsize=12, title= "Class ID" )
115 |         plt.title("Classification Map", fontsize=16 )
116 |         plt.show()
117 |     def ExprotMap(self, MapName):
118 |         self.mapname=MapName
119 |         driver=gdal.GetDriverByName("GTiff")
120 |         driver.Register()
121 |         output=driver.Create(self.mapname, xsize=self.map.shape[1], ysize=self.map.shape[0], bands=1, eType=gdal.GDT_Int16)
122 |         output.SetGeoTransform(self.new_img.GetGeoTransform())
123 |         output.SetProjection(self.new_img.GetProjection())
124 |         outputband=output.GetRasterBand(1)
125 |         outputband.WriteArray(self.map)
126 |         outputband.SetNoDataValue(0)
127 |         outputband.FlushCache()
128 |         outputband= None
129 |         output= None
130 |         print( f"Classification Map is Sotred as '{str(self.mapname)}' in Current Directory ")
131 | class Multi_Classifier(Single_Classifier):
132 |     def __init__(self,features, labels, TVsize):
133 |         super().__init__(features, labels, TVsize)
134 |     def Base_models(self, base_models=None):
135 |         self.base = base_models
136 |         if self.base == None:
137 |             self.base = list()
138 |             self.base.append(("RF", RandomForestClassifier(n_estimators=100)))
139 |             self.base.append(("SVM", SVC()))
140 |             self.base.append(("MLP", MLPClassifier(max_iter=500)))
141 |         else:
142 |             self.base= base_models
143 |         print("----------------------------------------------------------------------------------------------------------------")
144 |         print("Base Models Are as Below:")
145 |         print (pd.DataFrame(self.base,columns=["Model Name", "Model Parameters"]))
146 |         print('----------------------------------------------------------------------------------------------------------------')
147 |     def Base_model_assessment(self, Folds=3):
148 |         #self.classifier=self.base
149 |         self.Folds=Folds
150 |         self.modell, self.accuracy = list(), list()
151 |         for self.name, self.model in (dict(self.base)).items():
152 |             Single_Classifier.Model_assessment(self, self.Folds)
153 |             self.accuracy.append((self.scores))
154 |             self.modell.append(self.name)
155 |     def get_accuracy(self):
156 |         print(f' Accuracy of Each Model (base model) based on {self.Folds} Fold Cross Validation:')
157 |         #print(f'{self.model} >> {self.accuracy}')
158 |         r= np.zeros((len(self.base),self.Folds+1))
159 |         accuracy=np.array(np.round(self.accuracy,3))
160 |         r = np.ones ( (r.shape), dtype=object )
161 |         self.mscores=np.ones((len(self.base),1))
162 |         for i in range ( len ( self.base ) ) :
163 |             for j in range ( self.Folds ) :
164 |                 r[i, 0] = self.base[i]
165 |                 r[i, j + 1] = accuracy[i, j]
166 |                 self.mscores[i,0]=accuracy[i,:].mean()
167 |         column=["Base Model"]
168 |         for i in range(self.Folds):
169 |             column.append("Fold "+ str(i+1))
170 |         df=pd.DataFrame(r , columns=[column])
171 |         df["Mean"]=np.round(self.mscores,3)
172 |         df.to_csv("Base-Models Training Accuracy.csv")
173 |         print(df)
174 |         print("----------------------------------------------------------------------------------------------------------------")
175 |         print(f"*** Model Accuracy Scores Are Saved as Base-Model Training Accuracy.csv File in Current Directory ***" )
176 |         print("----------------------------------------------------------------------------------------------------------------")
177 |         plt.boxplot((self.accuracy), labels=self.base, showmeans=True )
178 |         plt.title(f"Base Models Accuracy Based on {self.Folds} Fold Cross  Validation")
179 |         plt.show ( )
180 |     def Meta_model(self, meta_model=None, CV=None):
181 |         self.model = meta_model
182 |         if self.model == None:
183 |             self.model= LogisticRegression()
184 |         else:
185 |             self.model= meta_model
186 |         self.cv=CV
187 |         if self.cv==None:
188 |             self.cv=5
189 |         else:
190 |             self.cv=CV
191 |     def Meta_model_assessment(self):
192 |         self.stack=StackingClassifier(self.base, self.model, cv=self.cv)
193 |         Single_Classifier.Model_assessment (self, Folds=self.cv)
194 |         list1=["Fold "+str(i+1) for i in range(self.cv)]
195 |         list=["Meta Model"]
196 |         for i in list1:
197 |             list.append(i)
198 |         df=np.ones((1, self.cv+1), dtype=object)
199 |         df[0,0]=str(self.model)
200 |         for i in range(self.cv):
201 |             df[0,i+1]=np.round(self.scores[i],3)
202 |         df=pd.DataFrame(df, columns=list)
203 |         df["Mean"]=np.round(self.scores.mean(),3)
204 |         print(f" Accuracy of The Meta-Model Based on {self.cv} Fold Cross Validation:")
205 |         print(df)
206 |         print("----------------------------------------------------------------------------------------------------------------")
207 |         plt.boxplot( (np.round(self.scores,3)) ,showmeans=True)
208 |         plt.xticks ( [1] , ["Metal Model"] )
209 |         plt.title(f"Meta Model Accuracy Based on {self.cv} Fold Cross Validation")
210 |         plt.show ( )
211 |     # def Model_fitting(self):
212 |     #     self.model=self.stack
213 |     #     self.model.fit(self.X_train, self.y_train)
214 |     # def Model_validation(self):
215 |     #     Single_Classifier.Model_validation(self)
216 |     # def save_validation(self):
217 |     #     Single_Classifier.save_validation(self)
218 |     def save_validation(self):
219 |         print("----------------------------------------------------------------------------------------------------------------")
220 |         self.accrep.to_csv ( "Meta-model Confusion Matrix.csv" )
221 |         print (f"*** Confusion Matrix Scores Are Saved as (Meta-Model Confusion Matrix.csv) File in Current Directory ***" )
222 | 
223 | 
224 | 
225 | 
226 | 
227 | 
228 | 
229 | 
230 | 
231 | 
232 | 
233 | 


--------------------------------------------------------------------------------