├── Code
    ├── Model implementation.py
    ├── Pre-training.py
    ├── Transfer-learning.py
    └── load data.py
├── Dataset
    ├── Experimental-data
    │   └── Upload later.txt
    └── Simulated-DAS-data
    │   └── Simulated dataset.txt
├── README.MD
└── Supplementary_material.pdf


/Code/Model implementation.py:
--------------------------------------------------------------------------------
  1 | # author: SaKuRa Pop
  2 | # data: 2021/3/13 10:40
  3 | import pickle
  4 | import numpy as np
  5 | import keras
  6 | from keras.layers import Input, Dense, Activation, BatchNormalization, Conv1D, Dropout
  7 | from keras.layers import GlobalAveragePooling1D, MaxPooling1D
  8 | from keras.models import Model
  9 | from keras.initializers import glorot_uniform
 10 | from sklearn import preprocessing
 11 | from sklearn.model_selection import KFold
 12 | import matplotlib.pyplot as plt
 13 | import keras.backend as K
 14 | from sklearn.model_selection import train_test_split
 15 | from tensorflow.compat.v1 import ConfigProto
 16 | from tensorflow.compat.v1 import InteractiveSession
 17 | 
 18 | config = ConfigProto()
 19 | config.gpu_options.allow_growth = True
 20 | session = InteractiveSession(config=config)
 21 | 
 22 | 
 23 | def compute_coeff_determination(actual, predict):
 24 |     ss_res = np.sum(np.square(actual-predict))
 25 |     ss_tot = np.sum(np.square(actual - np.mean(actual)))
 26 |     return 1 - ss_res/(ss_tot + 1e-08)
 27 | 
 28 | 
 29 | def relative_error(actual, predict):
 30 |     error = np.abs(actual - predict) / (actual+1e-08)
 31 |     return error
 32 | 
 33 | 
 34 | def absolute_error(actual, predict):
 35 |     error = np.abs(actual - predict)
 36 |     return error
 37 | 
 38 | 
 39 | class Loss_history(keras.callbacks.Callback):
 40 | 
 41 |     def on_train_begin(self, logs={}):
 42 |         self.losses = []
 43 |         self.val_loss = []
 44 |         self.coeff_determination = []
 45 | 
 46 |     def on_batch_end(self, batch, logs={}):
 47 |         self.losses.append(logs.get("loss"))
 48 |         self.val_loss.append(logs.get("val_loss"))
 49 |         self.coeff_determination.append(logs.get("coeff_determination"))
 50 | 
 51 | 
 52 | def coeff_determination(y_true, y_pred):
 53 |     SS_res = K.sum(K.square(y_true-y_pred))
 54 |     SS_tot = K.sum(K.square(y_true - K.mean(y_true)))
 55 |     return 1 - SS_res/(SS_tot + K.epsilon())
 56 | 
 57 | 
 58 | def conv1d(input_shape=(4097, 1)):
 59 |     x_input = Input(input_shape)
 60 | 
 61 |     x = Conv1D(filters=16, kernel_size=9, strides=2, kernel_initializer=glorot_uniform(seed=0))(x_input)
 62 |     x = BatchNormalization(axis=2)(x)
 63 |     x = Activation("relu")(x)
 64 |     x = Conv1D(filters=16, kernel_size=9, strides=2, kernel_initializer=glorot_uniform(seed=0))(x)
 65 |     x = BatchNormalization(axis=2)(x)
 66 |     x = Activation("relu")(x)
 67 |     x = MaxPooling1D(pool_size=3, strides=2)(x)
 68 | 
 69 |     x = Conv1D(filters=64, kernel_size=9, strides=2, kernel_initializer=glorot_uniform(seed=0))(x)
 70 |     x = BatchNormalization(axis=2)(x)
 71 |     x = Activation("relu")(x)
 72 |     x = Conv1D(filters=64, kernel_size=9, strides=2, kernel_initializer=glorot_uniform(seed=0))(x)
 73 |     x = BatchNormalization(axis=2)(x)
 74 |     x = Activation("relu")(x)
 75 |     x = MaxPooling1D(pool_size=3, strides=2)(x)
 76 | 
 77 |     x = Conv1D(filters=128, kernel_size=9, strides=2, kernel_initializer=glorot_uniform(seed=0))(x)
 78 |     x = BatchNormalization(axis=2)(x)
 79 |     x = Activation("relu")(x)
 80 |     x = Conv1D(filters=128, kernel_size=9, strides=2, kernel_initializer=glorot_uniform(seed=0))(x)
 81 |     x = BatchNormalization(axis=2)(x)
 82 |     x = Activation("relu")(x)
 83 |     x = MaxPooling1D(pool_size=3, strides=2)(x)
 84 | 
 85 |     x = Conv1D(filters=256, kernel_size=2, strides=1, kernel_initializer=glorot_uniform(seed=0))(x)
 86 |     x = BatchNormalization(axis=2)(x)
 87 |     x = Activation("relu")(x)
 88 |     x = GlobalAveragePooling1D()(x)
 89 | 
 90 |     x = Dense(units=128, activation='relu')(x)
 91 |     x = Dense(units=1, activation='sigmoid')(x)
 92 |     model = Model(inputs=x_input, outputs=x)
 93 |     return model
 94 | 
 95 | 
 96 | def fully_connected(input_shape=(4097, )):
 97 |     x_input = Input(input_shape)
 98 |     x = Dense(units=8194, activation="relu")(x_input)
 99 |     x = Dense(units=4097, activation="relu")(x)
100 |     x = Dense(units=2084, activation="relu")(x)
101 |     x = Dense(units=1024, activation="relu")(x)
102 |     x = Dense(units=512, activation="relu")(x)
103 |     x = Dense(units=256, activation="relu")(x)
104 |     x = Dropout(rate=0.2)(x)
105 |     x = Dense(units=256, activation="relu")(x)
106 |     x = Dropout(rate=0.2)(x)
107 |     x = Dense(units=128, activation="relu")(x)
108 |     x = Dense(units=1, activation="linear")(x)
109 |     model = Model(inputs=x_input, outputs=x)
110 |     return model


--------------------------------------------------------------------------------
/Code/Pre-training.py:
--------------------------------------------------------------------------------
  1 | # author: SaKuRa Pop
  2 | # data: 2021/3/17 15:38
  3 | import pickle
  4 | import numpy as np
  5 | import keras
  6 | from keras.layers import Input, Add, Dense, Activation, ZeroPadding2D, BatchNormalization, Flatten, Conv1D
  7 | from keras.layers import GlobalAveragePooling1D, MaxPooling1D, Dropout
  8 | from keras.models import Model
  9 | from keras.initializers import glorot_uniform
 10 | from sklearn import preprocessing
 11 | from sklearn.model_selection import KFold
 12 | import matplotlib.pyplot as plt
 13 | import keras.backend as K
 14 | from time import *
 15 | import matplotlib
 16 | from matplotlib.pyplot import MultipleLocator
 17 | from sklearn.model_selection import train_test_split
 18 | from tensorflow.compat.v1 import ConfigProto
 19 | from tensorflow.compat.v1 import InteractiveSession
 20 | import torch
 21 | import torch.utils.data as Data
 22 | import random
 23 | from sklearn.model_selection import KFold
 24 | 
 25 | config = ConfigProto()
 26 | config.gpu_options.allow_growth = True
 27 | session = InteractiveSession(config=config)
 28 | 
 29 | 
 30 | def compute_coeff_determination(actual, predict):
 31 |     ss_res = np.sum(np.square(actual-predict))
 32 |     ss_tot = np.sum(np.square(actual - np.mean(actual)))
 33 |     return 1 - ss_res/(ss_tot + 1e-08)
 34 | 
 35 | 
 36 | def relative_error(actual, predict):
 37 |     error = np.abs(actual - predict) / (actual+ 1e-08)
 38 |     return error
 39 | 
 40 | 
 41 | def absolute_error(actual, predict):
 42 |     error = np.abs(actual - predict)
 43 |     return error
 44 | 
 45 | 
 46 | class Loss_history(keras.callbacks.Callback):
 47 | 
 48 |     def on_train_begin(self, logs={}):
 49 |         self.losses = []
 50 |         self.val_loss = []
 51 |         self.coeff_determination = []
 52 | 
 53 |     def on_batch_end(self, batch, logs={}):
 54 |         self.losses.append(logs.get("loss"))
 55 |         self.val_loss.append(logs.get("val_loss"))
 56 |         self.coeff_determination.append(logs.get("coeff_determination"))
 57 | 
 58 | 
 59 | def coeff_determination(y_true, y_pred):
 60 |     SS_res = K.sum(K.square(y_true-y_pred))
 61 |     SS_tot = K.sum(K.square(y_true - K.mean(y_true)))
 62 |     return 1 - SS_res/(SS_tot + K.epsilon())
 63 | 
 64 | 
 65 | gas_absorption_spectra = np.load(data_path)
 66 | ground_truth_concentration = np.load(label_path)
 67 | 
 68 | """数据预处理"""
 69 | gas_absorption_spectra = preprocessing.scale(gas_absorption_spectra)
 70 | gas_absorption_spectra = gas_absorption_spectra[:, :, np.newaxis]  # one more dimention for 1D-CNN;
 71 | ground_truth_concentration = ground_truth_concentration / 10000  # scale to (0, 1) scope
 72 | 
 73 | # you can set a random seed here
 74 | train, test, train_label, test_label = train_test_split(gas_absorption_spectra, ground_truth_concentration,
 75 |                                                         test_size=0.2,
 76 |                                                         random_state=seed)
 77 | 
 78 | kf = KFold(n_splits=10, shuffle=False, random_state=None)
 79 | training_input_index = np.ones_like(train_label).astype(np.uint8)
 80 | validation_input_index = np.ones(test_label).astype(np.uint8)
 81 | 
 82 | for train_index, test_index in kf.split(train):
 83 |     training_input_index = np.vstack((training_input_index, train_index))
 84 |     testing_input_index = np.vstack((testing_input_index, test_index))
 85 | training_index = training_input_index[1:, :]
 86 | validation_index = testing_input_index[1:, :]
 87 | 
 88 | """run each fold"""
 89 | # k_fold_index from 1 to ten
 90 | train_input = train[training_index[k_fold_index]]
 91 | train_input = train_input[:, :, np.newaxis]  # add one more dimension for 1D-CNN
 92 | train_label = train_label[training_index[k_fold_index]]
 93 | 
 94 | validation_input = train[validation_index[k_fold_index]]   # 3000, 4097
 95 | validation_input = validation_input[:, :, np.newaxis]  # add one more dimension for 1D-CNN
 96 | validaiton_label = train_label[validation_index[k_fold_index]]   # 3000, 1
 97 | 
 98 | 
 99 | # seed you can set a random seed here
100 | def training(train, train_label, validation, validation_label, model, epochs, learning_rate, metric):
101 |     adam = keras.optimizers.Adam(lr=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
102 |     model.compile(optimizer=adam, loss='mean_squared_error', metrics=metric)
103 |     history = model.fit(x=train, y=train_label, validation_data=(validation, validaiton_label), epochs=epochs,
104 |                         batch_size=batch_size, verbose=1)
105 |     save_path = r"..."
106 |     model.save(save_path)
107 | 
108 | 
109 | def evaluation(model, input, ground_truth):
110 |     predict_result = model.predict(input)
111 |     R_squre = compute_coeff_determination(predict_result, ground_truth)
112 |     RE = relative_error(predict_result, ground_truth).mean()
113 |     AE = absolute_error(ground_truth, predict_result).mean()
114 | 
115 | 
116 | 
117 | 


--------------------------------------------------------------------------------
/Code/Transfer-learning.py:
--------------------------------------------------------------------------------
 1 | # author: SaKuRa Pop
 2 | # data: 2021/3/17 16:02
 3 | import numpy as np
 4 | import keras
 5 | from keras.layers import Input, Add, Dense, Activation, ZeroPadding2D, BatchNormalization, Flatten, Conv1D
 6 | from keras.layers import GlobalAveragePooling1D, MaxPooling1D, Dropout
 7 | from keras.models import Model
 8 | from keras.initializers import glorot_uniform
 9 | from sklearn import preprocessing
10 | import matplotlib.pyplot as plt
11 | import keras.backend as K
12 | from time import *
13 | import matplotlib
14 | from matplotlib.pyplot import MultipleLocator
15 | from sklearn.model_selection import train_test_split
16 | from tensorflow.compat.v1 import ConfigProto
17 | from tensorflow.compat.v1 import InteractiveSession
18 | from keras.models import load_model
19 | from keras.models import model_from_json
20 | from sklearn.model_selection import train_test_split
21 | from sklearn.metrics import median_absolute_error
22 | from sklearn.metrics import mean_absolute_error
23 | from sklearn.metrics import mean_squared_error
24 | from sklearn.metrics import r2_score
25 | from sklearn.metrics import explained_variance_score
26 | 
27 | 
28 | def coeff_determination(y_true, y_pred):
29 |     SS_res = K.sum(K.square(y_true-y_pred))
30 |     SS_tot = K.sum(K.square(y_true - K.mean(y_true)))
31 |     return 1 - SS_res/(SS_tot + K.epsilon())
32 | 
33 | 
34 | model_path = r"..."
35 | transfer_model = load_model(model_path, custom_objects={"coeff_determination": coeff_determination})
36 | 
37 | 
38 | def transfer_learning():
39 |     for layer in transfer_model.layers:
40 |         layer.trainable = False
41 |     transfer_model.layers[-1].trainable = True
42 | 
43 |     adam = keras.optimizers.Adam(lr=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
44 |     transfer_model.compile(optimizer=adam, loss='mean_squared_error', metrics=[coeff_determination])
45 |     transfer_model.summary()
46 | 
47 |     for layer in transfer_model.layers:
48 |         print(layer.name, "is trainable? ", layer.trainable)
49 | 
50 |     history = transfer_model.fit(x=train, y=train_label,
51 |                                  validation_data=(test, test_label), epochs=epochs,
52 |                                  batch_size=batch_size, verbose=1)
53 | 
54 |     save_path = r"..."
55 |     transfer_model.save(save_path)
56 | 
57 | 
58 | def evaluation(transfer_model, test, test_label):
59 |     predict_result = transfer_model.predict(test)
60 |     R_squre = compute_coeff_determination(predict_result, test_label)
61 |     RE = relative_error(predict_result, test_label).mean()
62 |     AE = absolute_error(test_label, predict_result).mean()
63 | 
64 | 
65 | 
66 | 


--------------------------------------------------------------------------------
/Code/load data.py:
--------------------------------------------------------------------------------
 1 | # author: SaKuRa Pop
 2 | # data: 2021/3/13 10:48
 3 | import xlrd
 4 | import numpy as np
 5 | 
 6 | 
 7 | def xlsx_to_pkl(open_file_path, save_file_path_1, save_file_path_2):
 8 |     data = xlrd.open_workbook(open_file_path)
 9 |     table = data.sheet_by_name('Sheet1')
10 |     table_array = np.zeros((10000, 4097))
11 |     for i in range(2000):
12 |         for j in range(4097):
13 |             table_array[i, j] = table.cell_value(i, j)
14 | 
15 |     data_x = table_array
16 |     data_y = np.array([x for x in range(10000)])
17 |     data_y = data_y[:, np.newaxis]
18 |     np.save(save_file_path_1, data_x)
19 |     np.save(save_file_path_2, data_y)
20 |     return data_x, data_y
21 | 


--------------------------------------------------------------------------------
/Dataset/Experimental-data/Upload later.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Popsama/gas_retrieval_with_deep_learning/e4b49553a2383141eae784ff3e99b90f7111e906/Dataset/Experimental-data/Upload later.txt


--------------------------------------------------------------------------------
/Dataset/Simulated-DAS-data/Simulated dataset.txt:
--------------------------------------------------------------------------------
1 | https://pan.baidu.com/s/1a6Senhy1dyFPVN2Kg16wvw
2 | key: z77z


--------------------------------------------------------------------------------
/README.MD:
--------------------------------------------------------------------------------
 1 | # Retrieval of gas concentrations in optical spectroscopy with deep learning
 2 | ---
 3 | ## Linbo Tian, Jiachen Sun, Jun Chang, Jinbao Xia, Zhifeng Zhang, Alexandre A. Kolomenskii, Hans A. Schuessler, Sasa Zhang
 4 | ---
 5 | ### This repository provide supplementary material, including:
 6 | 
 7 | ### Code
 8 | >>#### load data.py - Loading data from xlxs file to pkl. I/O routines
 9 | >>#### Model implementation.py - The deep neural networks (1D-CNN&DMLP) implementated in Keras.
10 | >>#### Pre-training.py - Instructions for pretrain the models
11 | >>#### transfer-learning.py - Instructions to implement the transfer learning for the pre-trained models.
12 | ### Dataset
13 | >>#### We have already uploaded Simulated Dataset.


--------------------------------------------------------------------------------
/Supplementary_material.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Popsama/gas_retrieval_with_deep_learning/e4b49553a2383141eae784ff3e99b90f7111e906/Supplementary_material.pdf


--------------------------------------------------------------------------------