├── requirements.txt
├── tests.py
├── LICENSE
├── visualisations.py
├── main.py
├── README.md
├── .gitignore
├── my_callbacks.py
├── classification_models.py
├── classification_preprocessing.py
├── image_classification.py
├── cnn_model.py
└── geospatial_preprocessing.py


/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | pandas
3 | sklearn
4 | keras
5 | matplotlib
6 | 


--------------------------------------------------------------------------------
/tests.py:
--------------------------------------------------------------------------------
 1 | from classification_preprocessing import spatial_stratified_sample, label_encod, read_data_CNN, read_data
 2 | import pandas as pd
 3 | from cnn_model import CNN_model
 4 | 
 5 | data = r'C:\cysia\cnn_mgr\validation.pickle'
 6 | 
 7 | 
 8 | X_trening, X_test, y_trening, y_test = read_data(data, 'ekstrakcja', 'klasa', 'indeks')
 9 | print(X_trening.shape)
10 | # print(X_trening.shape, X_test.shape, y_trening.shape, y_test.shape)
11 | 
12 | # CNN_model(X_trening, X_test, y_trening, y_test, 1, accuracy_report=True)


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 MJedr
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/visualisations.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | from collections import Counter
 5 | 
 6 | def plot_history(history):
 7 |     hist = pd.read_csv(history, sep=",")
 8 |     loss_list = hist.loss
 9 |     val_loss_list = hist.val_loss
10 |     acc_list = hist.acc
11 |     val_acc_list = hist.val_acc
12 | 
13 |     if len(loss_list) == 0:
14 |         print('Loss is missing in history')
15 |         return
16 | 
17 |     epochs = range(0, (hist.epoch.tail(1).values[0].astype(int) + 1))
18 | 
19 |     fig, ax = plt.subplots(1, 2, figsize=(16, 8))
20 |     ax[0].plot(epochs, loss_list, 'b',
21 |                label='trening')
22 |     ax[0].plot(epochs, val_loss_list, 'g',
23 |                label='walidacja')
24 |     ax[0].set_title('Funkcja kosztu(loss)')
25 |     ax[0].set_xlabel('Epoki')
26 |     ax[0].set_ylabel('Loss')
27 |     ax[0].legend()
28 |     ax[1].plot(epochs, acc_list, 'b',
29 |                label='trening')
30 |     ax[1].plot(epochs, val_acc_list, 'g',
31 |                label='walidacja')
32 |     ax[1].set_title('Dokładność całkowita')
33 |     ax[1].set_xlabel('Epoki')
34 |     ax[1].set_ylabel('OA')
35 |     ax[1].legend()
36 | 
37 |     fig.savefig('model_history.png')
38 |     plt.show()


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import geospatial_preprocessing
 3 | from classification_preprocessing import read_data
 4 | from cnn_model import CNN_model
 5 | 
 6 | # reference data with training polygons
 7 | data_shp = r'esri_shapefile.shp'
 8 | # reference image to extract samples and classify
 9 | data_raster = r'raster_type_envi.dat'
10 | # output mask file name
11 | mask_raster = 'mask.tiff'
12 | # filenames for extracted values
13 | export_csv = 'extracted.csv'
14 | export_pickle = 'extracted.pickle'
15 | 
16 | 
17 | class_names = (geospatial_preprocessing.get_class_names(data_shp))
18 | print('classes to be classified - ', class_names)
19 | class_nb = len(class_names)
20 | # copy of input shp file (to add unique indices for each polygon)
21 | shp_copy = geospatial_preprocessing.create_index_fld(data_shp, class_names)
22 | # rasterization
23 | geospatial_preprocessing.rasterize(data_raster, mask_raster, shp_copy)
24 | # training data extraction
25 | geospatial_preprocessing.extract(data_raster, mask_raster, class_nb, class_names, export_pickle, export_csv)
26 | # splits data
27 | X_trening, X_test, y_trening, y_test = read_data(export_pickle, 'ekstrakcja', 'klasa', 'indeks')
28 | # CNN classification
29 | CNN_model(X_trening, X_test, y_trening, y_test, 1, accuracy_report=True)


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # CNN-remote-sensing
 2 |   
 3 | A tool which enables perform a full per-pixel remote sensing image classification, 
 4 | which includes:
 5 | - reading sample data from *.shp* file
 6 | - rasterizing *.shp* file and extracting extracting signal vectors for samples from remote sensing image
 7 | - splitting data into training, test and validation set in a given proportion with a respect to the spatial correlation
 8 | - training CNN, RF, SVM models
 9 | - evaluating trained models
10 | - classyfing a full remote sensing image.
11 | 
12 | ## Requirements
13 | To run the program it is neccesary to have Python 3.6. and **installed Python bindings** 
14 | (more on a <a href="https://pypi.org/project/GDAL/"> PyPi website </a>).  
15 | Then just use Python Pip to install other requirements:
16 | 
17 | ```
18 | pip install -r requirements.txt
19 | ```
20 | 
21 | ## Data 
22 | To run the classification, following files are neccessary:
23 | - file in *.shp* format with trainig samples, which must include a field with class names
24 | - raster file for classification in ENVI .hdr Labelled Raster.
25 | 
26 | ## Getting started
27 | To start a classification enter the paths to your files in main.py and then run the program. Trained model automatically will be saved.  
28 | To perform image classification, please enter path for the model and the raster image in a image_classification.py file. 
29 | **Please, note that the image classification can take a really long time.**
30 | 
31 | ## Author
32 | Marcjanna Jędrych 
33 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 
106 | # pycharm
107 | .idea/*


--------------------------------------------------------------------------------
/my_callbacks.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | from keras.callbacks import Callback, ReduceLROnPlateau
 3 | from sklearn.metrics import f1_score
 4 | import matplotlib.pyplot as plt
 5 | 
 6 | class Metrics(Callback):
 7 |     def on_train_begin(self, logs={}):
 8 |         self.val_f1s = []
 9 |         self.val_recalls = []
10 |         self.val_precisions = []
11 | 
12 | class PlotLosses(Callback):
13 |     def on_train_begin(self, logs={}):
14 |         self.i = 0
15 |         self.x = []
16 |         self.losses = []
17 |         self.val_losses = []
18 |         self.fig = plt.figure()
19 |         self.logs = []
20 | 
21 |     def on_epoch_end(self, epoch, logs={}):
22 |         self.logs.append(logs)
23 |         self.x.append(self.i)
24 |         self.losses.append(logs.get('loss'))
25 |         self.val_losses.append(logs.get('val_loss'))
26 |         self.i += 1
27 | 
28 |         plt.plot(self.x, self.losses, label="loss")
29 |         plt.plot(self.x, self.val_losses, label="val_loss")
30 |         plt.legend()
31 |         plt.show()
32 | 
33 | class F1Metric(Callback):
34 |     def on_train_begin(self, logs={}):
35 |         self.f1 = []
36 | 
37 |     def on_train_end(self, logs={}):
38 |         return
39 | 
40 |     def on_epoch_begin(self, epoch, logs={}):
41 |         return
42 | 
43 |     def on_epoch_end(self, epoch, logs={}):
44 |         y_pred = self.model.predict(self.model.validation_data[0])
45 |         self.f1.append(f1_score(self.model.validation_data[1], y_pred, average='weighted'))
46 |         return
47 | 
48 |     def on_batch_begin(self, batch, logs={}):
49 |         return
50 | 
51 | def learning_rate_reduction(patience):
52 |     ReduceLROnPlateau(monitor='val_acc',
53 |                             patience=patience,
54 |                             verbose=1,
55 |                             factor=0.5,
56 |                             min_lr=0.00001)
57 | 
58 | class TimeOnBatch(Callback):
59 |     def on_train_begin(self, logs={}):
60 |         self.logs = []
61 | 
62 |     def on_epoch_begin(self, logs={}):
63 |         self.start = time.time()
64 | 
65 |     def on_epoch_end(self, logs={}):
66 |         self.logs.append(time.time() - self.starttime)


--------------------------------------------------------------------------------
/classification_models.py:
--------------------------------------------------------------------------------
 1 | import csv
 2 | import pandas as pd
 3 | import os
 4 | from sklearn.ensemble import RandomForestClassifier
 5 | from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, cohen_kappa_score, f1_score
 6 | import pickle
 7 | from sklearn import svm
 8 | import time
 9 | 
10 | def SVM_classifier(X_training, y_training, kernel='rbf',
11 |                    gamma='0.00000001', C=150, degree=1):
12 | 
13 |     print('Starts fitting model ...')
14 |     if kernel == 'rbf':
15 |         clf = svm.SVC(random_state=1, kernel=kernel, gamma=gamma, C=C)
16 |     elif kernel == 'poly':
17 |         clf = svm.SVC(random_state=1, kernel=kernel, gamma=gamma, C=C, degree=degree)
18 |     else:
19 |         clf = svm.LinearSVC(C=C)
20 |     start = time.time()
21 |     clf_svm = clf.fit(X_training, y_training)
22 |     end = time.time()
23 |     delta = end - start
24 |     print('Model fitted. Fitting time:', delta)
25 |     print('Predicting values ...')
26 | 
27 | def RF_classifier(X_training, y_training,
28 |                   n_estimators=150, oob_score=True, bootstrap=True):
29 | 
30 |     print('Starts fitting model ...')
31 |     clf = RandomForestClassifier(n_estimators=n_estimators, oob_score=oob_score,
32 |                                  bootstrap=bootstrap)
33 |     start = time.time()
34 |     clf_svm = clf.fit(X_training, y_training)
35 |     end = time.time()
36 |     delta = end - start
37 |     print('Model fitted. Fitting time:', delta)
38 |     print('Predicting values ...')
39 | 
40 | def classify(model, X_test, y_test,
41 |                    model_name='model',
42 |                    conf_matrix=False, accuracy_report=False):
43 | 
44 |     model_name = model_name + '_' + type(model).__name__
45 |     while os.path.isfile(model_name + ".sav"):
46 |         model_name = model_name + str(1)
47 |     start_prediction = time.time()
48 |     y_pred = model.predict(X_test)
49 |     end_prediction = time.time()
50 |     delta_prediction = end_prediction - start_prediction
51 |     print('Test set predicted ...')
52 | 
53 |     if conf_matrix:
54 |         cm_filename = model_name + '_cm.csv'
55 |         cm = pd.DataFrame(confusion_matrix(y_test, y_pred))
56 |         cm.to_csv(cm_filename)
57 | 
58 |     if accuracy_report:
59 |         raport_filename = model_name + '_report.csv'
60 |         report = classification_report(y_test, y_pred)
61 |         with open(raport_filename, 'w') as acc_report:
62 |             acc_report.write(report)
63 | 
64 |     filename_svm = model_name + '.sav'
65 |     pickle.dump(model, open(filename_svm, 'wb'))
66 | 
67 |     with open(model_name + '.csv', 'a', newline='') as history:
68 |         writer = csv.writer(history, delimiter=';')
69 |         writer.writerow([model_name, accuracy_score(y_test, y_pred),
70 |                       cohen_kappa_score(y_test, y_pred), f1_score(y_test, y_pred, average='weighted'),
71 |                       delta_prediction])
72 | 
73 |     return y_pred
74 | 
75 | 
76 | 


--------------------------------------------------------------------------------
/classification_preprocessing.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import numpy as np
 3 | import pandas as pd
 4 | from sklearn.preprocessing import LabelEncoder
 5 | 
 6 | 
 7 | def spatial_sample(data_to_split, field, training_data_size):
 8 |     unique_index = pd.unique(data_to_split[field])
 9 |     training = np.random.choice(unique_index, np.ceil(unique_index.shape[0] * training_data_size).astype('int_'),
10 |                                 replace=False)
11 |     test = unique_index[np.isin(unique_index, training) == False]
12 |     return training, test
13 | 
14 | 
15 | def spatial_stratified_sample(data, field, class_field_name='klasa', training_data_size=0.7, save=False):
16 |     print('dividing data ...')
17 |     training_index = np.array([-1])
18 |     test_index = np.array([-1])
19 |     unique_class = pd.unique(data[class_field_name])
20 |     for class_name in unique_class:
21 |         subset = data[data[class_field_name] == class_name]
22 |         sampling = spatial_sample(subset, field, 0.7)
23 |         training_index = np.append(training_index, sampling[0])
24 |         test_index = np.append(test_index, sampling[1])
25 |     training_df = data[np.isin(data[field], training_index)]
26 |     test_df = data[np.isin(data[field], test_index)]
27 | 
28 |     if save:
29 |         pd.to_pickle(training_df, 'training.pickle')
30 |         pd.to_pickle(test_df, 'test.pickle')
31 | 
32 |     return training_df, test_df
33 | 
34 | 
35 | def label_encod(data_to_encode):
36 |     print('encoding data ...')
37 |     le = LabelEncoder()
38 |     data_array = np.array(data_to_encode)
39 |     le_fit = le.fit(np.unique(data_array[0]))
40 |     encoded = []
41 |     print(encoded)
42 | 
43 |     for nb, i in enumerate(data_to_encode):
44 |         y_encoded = le.transform(data_array[nb])
45 |         encoded.append(y_encoded)
46 | 
47 |     le_name_mapping = dict(zip(le.classes_, le.transform(le.classes_)))
48 |     print(le_name_mapping)
49 | 
50 |     return np.array(encoded)
51 | 
52 | 
53 | def read_data_CNN(data, value_field, class_field, spatial_index_field):
54 |     df = pd.read_pickle(data)
55 |     dane_test = spatial_stratified_sample(df, spatial_index_field)
56 | 
57 |     x_trening = dane_test[0][value_field]
58 |     x_test = dane_test[1][value_field]
59 |     y_trening = dane_test[0][class_field]
60 |     y_test = dane_test[1][class_field]
61 | 
62 |     print('reshaping data ...')
63 |     feature_dim = len(x_trening.values[0])
64 |     X_trening = np.array([np.array(x, dtype='int_').reshape(feature_dim, 1) for x in x_trening.values])
65 |     X_test = np.array([np.array(x, dtype='int_').reshape(feature_dim, 1) for x in x_test.values])
66 | 
67 |     labels_en = label_encod([y_trening, y_test])
68 |     y_tr = labels_en[0]
69 |     y_te = labels_en[1]
70 | 
71 |     return X_trening, X_test, y_tr, y_te
72 | 
73 | 
74 | def read_data(data, value_field, class_field, spatial_index_field):
75 |     df = pd.read_pickle(data)
76 |     dane_test = spatial_stratified_sample(df, spatial_index_field)
77 | 
78 |     x_trening = dane_test[0][value_field]
79 |     x_test = dane_test[1][value_field]
80 |     y_trening = dane_test[0][class_field]
81 |     y_test = dane_test[0][class_field]
82 | 
83 |     X_trening = np.array([np.array(x, dtype='int_') for x in x_trening.values])
84 |     X_test = np.array([np.array(x, dtype='int_') for x in x_test.values])
85 | 
86 |     labels_en = label_encod([y_trening, y_test])
87 |     y_tr = labels_en[0]
88 |     y_te = labels_en[1]
89 | 
90 |     return X_trening, X_test, y_tr, y_te
91 | 


--------------------------------------------------------------------------------
/image_classification.py:
--------------------------------------------------------------------------------
 1 | from osgeo import gdal, osr
 2 | import numpy as np
 3 | import pickle
 4 | import gdal, gdalnumeric
 5 | from gdalconst import *
 6 | 
 7 | def classify_tile(array, model):
 8 |     temp = np.zeros([array.shape[0], array.shape[1]])
 9 |     for nb, i in enumerate(array):
10 |         temp[nb] = model.predict(i.reshape(1, -1))
11 |     temp = temp.reshape(array.shape[0], array.shape[1])
12 |     return temp
13 | 
14 | def classify_raster(raster_path, model_path, out_raster_name, x_block_size=256, y_block_size=160):
15 |     ds = gdal.Open(raster_path, GA_ReadOnly)
16 |     model = pickle.load(open(model_path, 'rb'))
17 |     band = ds.GetRasterBand(1)
18 |     b_array = band.ReadAsArray()
19 |     x_im_size = band.XSize
20 |     y_im_size = band.YSize
21 |     x_block_size = int(x_block_size)
22 |     y_block_size = int(y_block_size)
23 |     xsize = b_array.shape[0]
24 |     ysize = b_array.shape[1]
25 |     xstride = np.floor(xsize / x_block_size).astype('int64')
26 |     ystride = np.floor(ysize / y_block_size).astype('int64')
27 |     max_xstride_range = int(x_block_size * xstride)
28 |     max_ystride_range = int(y_block_size * ystride)
29 |     nb_px_not_ystride = int(ysize % y_block_size)
30 |     nb_px_not_xstride = int(xsize % x_block_size)
31 |     out_raster = np.zeros([xsize, ysize])
32 | 
33 |     memory_driver = gdal.GetDriverByName('GTiff')
34 |     proj = ds.GetProjectionRef()
35 |     ext = ds.GetGeoTransform()
36 |     out_raster_ds = memory_driver.Create(out_raster_name, x_im_size, y_im_size, 1, gdal.GDT_UInt16)
37 |     out_raster_ds.SetProjection(proj)
38 |     out_raster_ds.SetGeoTransform(ext)
39 | 
40 |     pixels = 0
41 |     for yy in range(0, ystride):
42 |         for xx in range(0, xstride):
43 |             y = xx * x_block_size
44 |             x = yy * y_block_size
45 |             array = ds.ReadAsArray(y, x, y_block_size, x_block_size).reshape(x_block_size * y_block_size, 30)
46 |             out_raster[x:x + x_block_size, y:y + y_block_size] = classify_tile(array, model)
47 |             print('classified {0} pixels out of {1}'.format(pixels, xsize * ysize))
48 |             print('-------------------------------------')
49 |         if yy == (ystride - 1):
50 |             if xsize % x_block_size != 0 and ysize % y_block_size != 0:
51 |                 array = ds.ReadAsArray(y, 0, int(ysize % y_block_size), int(xsize)). \
52 |                     reshape((ysize % y_block_size) * xsize, 30)
53 |                 out_raster[y_block_size * ystride: y_block_size * ystride + ysize % y_block_size, 0:xsize] = classify_tile(array, model)
54 |                 new_x_stride = int(xsize - (xstride * x_block_size))
55 |                 new_x = int(xstride * x_block_size)
56 |                 new_y = 0
57 |                 for yyy in range(0, ystride):
58 |                     array = ds.ReadAsArray(new_x, new_y, new_x_stride, y_block_size). \
59 |                         reshape(new_x_stride * y_block_size, 30)
60 |                     out_raster[new_y:new_y + y_block_size, new_x:xsize] = classify_tile(array)
61 |                     new_y += y_block_size
62 |             elif xsize % x_block_size != 0 and ysize % y_block_size == 0:
63 |                 array = ds.ReadAsArray(int(xstride * x_block_size), 0, int(xsize % x_block_size), int(ysize)). \
64 |                     reshape((xsize % x_block_size) * ysize, 30)
65 |                 out_raster[0:ysize, x_block_size * xstride: x_block_size * xstride + xsize % x_block_size] = \
66 |                     classify_tile(array, model)
67 |             elif xsize % x_block_size == 0 and ysize % y_block_size != 0:
68 |                 array = ds.ReadAsArray(y, 0, int(ysize % y_block_size), int(xsize)). \
69 |                     reshape(nb_px_not_ystride * xsize, 30)
70 |                 out_raster[y_block_size * ystride: y_block_size * ystride + ysize % y_block_size, 0:xsize] = \
71 |                     classify_tile(array, model)
72 |             print('classified {0} pixels out of {1}'.format(pixels, xsize * ysize))
73 | 
74 |     outband = out_raster_ds.GetRasterBand(1)
75 |     outband.WriteArray(out_raster)
76 |     outband.FlushCache()
77 | 
78 |     ds = None
79 | 
80 |     return out_raster
81 | 


--------------------------------------------------------------------------------
/cnn_model.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import csv
  3 | import os
  4 | from keras.models import Sequential
  5 | from keras.layers import Conv1D, Flatten, Dense, Dropout, \
  6 |     MaxPooling1D, BatchNormalization
  7 | from keras.optimizers import Adam
  8 | from keras.regularizers import l1_l2
  9 | from keras.initializers import he_normal
 10 | from keras.callbacks import EarlyStopping, CSVLogger
 11 | from sklearn.metrics import accuracy_score, cohen_kappa_score, f1_score, \
 12 |     confusion_matrix, classification_report
 13 | import pandas as pd
 14 | import time
 15 | import my_callbacks
 16 | 
 17 | def CNN_model(X_training, X_test, y_training, y_test, n_epochs=100, batch_size=256,
 18 |               model_name='model', history_file='model_accuracies.csv',
 19 |               conf_matrix=False, accuracy_report=False):
 20 | 
 21 |     while os.path.isfile(model_name + ".h5"):
 22 |         model_name = model_name + str(1)
 23 | 
 24 |     csv_logger = CSVLogger('model_training.log')
 25 |     plot_losses = my_callbacks.PlotLosses()
 26 |     metrics = my_callbacks.Metrics()
 27 |     f1_accuracy = my_callbacks.F1Metric()
 28 |     earlystop = EarlyStopping(monitor='val_acc', patience=10,
 29 |                               mode='auto')
 30 |     adam = Adam(lr=0.00001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
 31 | 
 32 |     model = Sequential()
 33 |     model.add(Conv1D(32, 9,
 34 |                      input_shape=(X_training.shape[1], 1),
 35 |                      kernel_initializer=he_normal(seed=12),
 36 |                      activation='relu',
 37 |                      W_regularizer=l1_l2(0.01)))
 38 |     model.add(BatchNormalization())
 39 |     model.add(MaxPooling1D(1))
 40 |     model.add(Conv1D(32, 3,
 41 |                      activation='relu',
 42 |                      W_regularizer=l1_l2(0.01),
 43 |                      padding='same'))
 44 |     model.add(MaxPooling1D(3, padding='same'))
 45 |     model.add(BatchNormalization())
 46 |     model.add(Conv1D(9, 3,
 47 |                      activation='relu',
 48 |                      W_regularizer=l1_l2(0.01),
 49 |                      padding='same'))
 50 |     model.add(MaxPooling1D(3, padding='same'))
 51 |     model.add(BatchNormalization())
 52 |     model.add(Conv1D(9, 3,
 53 |                      activation='relu',
 54 |                      W_regularizer=l1_l2(0.01),
 55 |                      padding='same'))
 56 |     model.add(MaxPooling1D(3, padding='same'))
 57 |     model.add(BatchNormalization())
 58 |     model.add(Flatten())
 59 |     model.add(Dense(512, activation='relu'))
 60 |     model.add(Dropout(0.2))
 61 |     model.add(Dense(256, activation='relu'))
 62 |     model.add(BatchNormalization())
 63 |     model.add(Dropout(0.2))
 64 |     model.add(Dense(17, activation='softmax', input_shape=(1,)))
 65 |     model.compile(optimizer=adam, loss='sparse_categorical_crossentropy',
 66 |                   metrics=['accuracy'])
 67 | 
 68 |     print('starts fitting model ...')
 69 |     start = time.time()
 70 |     model.fit(X_training, y_training, batch_size=batch_size, epochs=n_epochs,
 71 |                         validation_data=(X_test, y_test),
 72 |               callbacks=[metrics, csv_logger])
 73 |     end = time.time()
 74 |     delta = end - start
 75 |     print('fitting time: ', delta)
 76 | 
 77 |     print('starts predicting model ...')
 78 |     start_prediction = time.time()
 79 |     model.predict(X_test)
 80 |     end_prediction = time.time()
 81 |     delta_prediction = end_prediction - start_prediction
 82 |     print('prediction time: ', delta_prediction)
 83 | 
 84 |     y_pred = model.predict_classes(X_test)
 85 | 
 86 |     model.save_weights(model_name + ".h5")
 87 |     print('weights saved to disk')
 88 | 
 89 |     model_json = model.to_json()
 90 |     with open(model_name + '.json', 'w') as json_file:
 91 |         json_file.write(model_json)
 92 |     print('model saved to disk')
 93 | 
 94 |     with open(history_file, 'a', newline='') as history:
 95 |         writer = csv.writer(history, delimiter=';')
 96 |         writer.writerow([model_name, accuracy_score(y_test, y_pred),
 97 |                       cohen_kappa_score(y_test, y_pred), f1_score(y_test, y_pred, average='weighted'),
 98 |                       delta, delta_prediction])
 99 | 
100 |     if conf_matrix:
101 |         cm_filename = model_name + '_cm.csv'
102 |         cm = pd.DataFrame(confusion_matrix(y_test, y_pred))
103 |         cm.to_csv(cm_filename)
104 | 
105 |     if accuracy_report:
106 |         raport_filename = model_name + '_report.csv'
107 |         report = classification_report(y_test, y_pred)
108 |         with open(raport_filename, 'w') as acc_report:
109 |             acc_report.write(report)
110 | 
111 |     return y_pred


--------------------------------------------------------------------------------
/geospatial_preprocessing.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from gdal import ogr
  3 | import os
  4 | from osgeo import gdal, gdalnumeric, gdalconst
  5 | import numpy as np
  6 | import pandas as pd
  7 | 
  8 | 
  9 | def open_envi_array(img):
 10 |     driver = gdal.GetDriverByName('ENVI')
 11 |     driver.Register()
 12 |     img_open = gdal.Open(img, gdalconst.GA_ReadOnly)
 13 |     img_arr = img_open.ReadAsArray()
 14 |     return img_arr
 15 | 
 16 | 
 17 | def get_class_names(shp):
 18 |     """
 19 |     Function gets unique class names from shp file
 20 |     shp - input shapefile
 21 |     """
 22 |     driver_shp = ogr.GetDriverByName('ESRI Shapefile')
 23 |     data = driver_shp.Open(shp, 1)
 24 |     layer = data.GetLayer()
 25 |     feature = layer.GetNextFeature()
 26 |     field_vals = []
 27 |     while feature:
 28 |         field_vals.append(feature.GetFieldAsString('klasa'))
 29 |         feature = layer.GetNextFeature()
 30 |     vals = np.unique(field_vals)
 31 |     return vals
 32 | 
 33 | 
 34 | def create_index_fld(input_shp, class_names, output_name='training_indexed.shp'):
 35 |     """
 36 |     Creates an extra field in shp with unique index value for each polygon
 37 |     input_shp - input shapefile
 38 |     class_names - list of unique class names to classify
 39 |     output name - output shapefile name
 40 |     """
 41 |     data_shp = input_shp
 42 |     driver_shp = ogr.GetDriverByName('ESRI Shapefile')
 43 |     vector = driver_shp.Open(data_shp, 1)
 44 |     lyr = vector.GetLayer()
 45 |     directory_out = os.getcwd()
 46 |     # if file with given name exists delete
 47 |     if output_name + '.shp' in os.listdir(directory_out):
 48 |         driver_shp.DeleteDataSource(output_name + '.shp')
 49 |     print('created file', output_name)
 50 |     out_ds = driver_shp.CreateDataSource(directory_out)
 51 | 
 52 |     lyr_copy = out_ds.CopyLayer(lyr, output_name)
 53 |     fieldDefn = ogr.FieldDefn('indeks', ogr.OFTInteger)
 54 |     fieldDefn.SetWidth(1)
 55 |     lyr_copy.CreateField(fieldDefn)
 56 | 
 57 |     for nb, f in enumerate(lyr_copy):
 58 |         f.SetField('indeks', nb)
 59 |         lyr_copy.SetFeature(f)
 60 | 
 61 |     fieldDefn = ogr.FieldDefn('kod', ogr.OFTInteger)
 62 |     fieldDefn.SetWidth(10)
 63 |     lyr_copy.CreateField(fieldDefn)
 64 | 
 65 |     code = 1
 66 |     for a in class_names:
 67 |         print(class_names[code - 1])
 68 |         lyr_copy.SetAttributeFilter("klasa = '{0}'".format(class_names[code - 1]))
 69 |         for f in lyr_copy:
 70 |             f.SetField('kod', code)
 71 |             lyr_copy.SetFeature(f)
 72 |         code += 1
 73 |     print('created')
 74 |     return output_name + '.shp'
 75 | 
 76 | 
 77 | def rasterize(in_raster, out_raster_name, shp_in):
 78 |     """
 79 |     Funkcja do rasteryzacji shapefila.
 80 |     Raster wejscie - raster, na podstawie którego utworzony zostanie raster wyjsciowy (ten sam układ wsp., wymiary, rodzielczosć)
 81 |     Raster wyjscie - scieżka do pliku wyjsciowego tiff
 82 |     shp_wejscie - dane, na podstawie ktorych tworzona bedzie maska. Scieżka dostępu do pliku shp.
 83 |     atrybut - wartosc opcjonalna, ktora przypisana zostanie zrasteryzowanym polom. Dobierana na podstawie atrybutu w pliku shp.
 84 |     """
 85 | 
 86 |     driver_raster = gdal.GetDriverByName('ENVI')
 87 |     driver_raster.Register()
 88 |     raster_in = gdal.Open(in_raster, gdalconst.GA_ReadOnly)
 89 | 
 90 |     driver_shp = ogr.GetDriverByName('ESRI Shapefile')
 91 |     shp_in = driver_shp.Open(shp_in, 1)
 92 |     shp_lyr = shp_in.GetLayer()
 93 | 
 94 |     ncol = raster_in.RasterXSize
 95 |     nrow = raster_in.RasterYSize
 96 | 
 97 |     proj = raster_in.GetProjectionRef()
 98 |     ext = raster_in.GetGeoTransform()
 99 | 
100 |     # Create the raster dataset
101 |     memory_driver = gdal.GetDriverByName('GTiff')
102 |     out_raster_ds = memory_driver.Create(out_raster_name, ncol, nrow, 2, gdal.GDT_UInt16)
103 | 
104 |     out_raster_ds.SetProjection(proj)
105 |     out_raster_ds.SetGeoTransform(ext)
106 | 
107 |     out_raster = out_raster_ds.ReadAsArray()
108 | 
109 |     for i in range(2):
110 |         out_raster[i].fill(-999)
111 | 
112 |     status0 = gdal.RasterizeLayer(out_raster_ds,
113 |                                   [2],
114 |                                   shp_lyr,
115 |                                   None, None,
116 |                                   options=['ALL_TOUCHED=TRUE',
117 |                                            "ATTRIBUTE={0}".format('kod')]
118 |                                   )
119 |     status = gdal.RasterizeLayer(out_raster_ds,
120 |                                  [1],
121 |                                  shp_lyr,
122 |                                  None, None,
123 |                                  options=['ALL_TOUCHED=TRUE',
124 |                                           "ATTRIBUTE={0}".format('indeks')]
125 |                                  )
126 | 
127 |     out_raster_ds = None
128 | 
129 |     if status != 0:
130 |         print("I don't think it worked...")
131 |     else:
132 |         print("Success")
133 | 
134 | 
135 | def extract(raster_in, mask, class_nb, class_names, out_file_pickle, out_file_csv):
136 |     """
137 |     Extract values from raster file. Saves into csv and pickle file.
138 | 
139 |     raster_in: in raster to extract values
140 |     mask: mask with rasterised training data
141 |     class_nb: number of classes to extract values
142 |     class_names: class names to extract values
143 |     out_file_csv: output csv
144 |     out_file_pickle: output pickle
145 |     """
146 |     drivers_raster = gdal.GetDriverByName('ENVI')
147 |     drivers_raster.Register()
148 | 
149 |     raster = gdal.Open(raster_in, gdalconst.GA_ReadOnly)
150 | 
151 |     inmask = gdal.Open(mask, gdalconst.GA_ReadOnly)
152 |     band_mask = inmask.GetRasterBand(2)
153 |     data_mask = band_mask.ReadAsArray(0, 0)
154 | 
155 |     coords = np.nonzero(data_mask)
156 |     new_coords = np.array([0, 0])
157 |     for i in range(len(coords[0])):  # reads coordinates from input raster
158 |         m = np.array([coords[0][i], coords[1][i]])
159 |         new_coords = np.vstack((new_coords, m))
160 | 
161 |     np.delete(new_coords, 0, 0)  # removers first empty row
162 | 
163 |     pixel_class = ([data_mask[x, y] for x, y in new_coords])
164 |     px_vals = [[] for x in range(class_nb)]
165 |     for nb, x in enumerate(pixel_class):
166 |         px_vals[x - 1].append(new_coords[nb])
167 | 
168 |     data = []
169 |     band_mask_index = inmask.GetRasterBand(1)
170 | 
171 |     for nb, class_nb in enumerate(px_vals):
172 |         coord_list_class = px_vals[nb]
173 |         class_id = nb + 1
174 |         for counter, i in enumerate(coord_list_class):
175 |             x, y = int(i[0]), int(i[1])
176 |             bands = [raster.GetRasterBand(i) for i in range(1, raster.RasterCount + 1)]
177 |             pix_val = np.squeeze(
178 |                 np.array([gdalnumeric.BandReadAsArray(band, y, x, 1, 1) for band in bands]).astype('int64'))
179 |             pixel_extract = [x] + [y] + [pix_val] + ['{0}'.format(class_names[class_id])] + \
180 |                             [int(band_mask_index.ReadAsArray(y, x, 1, 1))]
181 |             data.append(pixel_extract)
182 |             print('extracted', round((counter + 1) / len(coord_list_class), 2),
183 |                   '% form class {0}'.format(class_names[class_id]))
184 | 
185 |     # cleaning data
186 |     x = [x[0] for x in data]
187 |     y = [x[1] for x in data]
188 |     values = [x[2] for x in data]
189 |     class_name = [x[3] for x in data]
190 |     index = [x[4] for x in data]
191 | 
192 |     df = pd.DataFrame(list(zip(x, y, values, class_name, index)),
193 |                       columns=['x', 'y', 'coordinates', 'values', 'class', 'index'])
194 | 
195 |     print(df.loc[:, 'class'].value_counts())
196 |     df.to_csv(out_file_csv)
197 |     df.to_pickle(out_file_pickle)
198 |     print('done!')
199 | 


--------------------------------------------------------------------------------