├── Code ├── Model implementation.py ├── Pre-training.py ├── Transfer-learning.py └── load data.py ├── Dataset ├── Experimental-data │ └── Upload later.txt └── Simulated-DAS-data │ └── Simulated dataset.txt ├── README.MD └── Supplementary_material.pdf /Code/Model implementation.py: -------------------------------------------------------------------------------- 1 | # author: SaKuRa Pop 2 | # data: 2021/3/13 10:40 3 | import pickle 4 | import numpy as np 5 | import keras 6 | from keras.layers import Input, Dense, Activation, BatchNormalization, Conv1D, Dropout 7 | from keras.layers import GlobalAveragePooling1D, MaxPooling1D 8 | from keras.models import Model 9 | from keras.initializers import glorot_uniform 10 | from sklearn import preprocessing 11 | from sklearn.model_selection import KFold 12 | import matplotlib.pyplot as plt 13 | import keras.backend as K 14 | from sklearn.model_selection import train_test_split 15 | from tensorflow.compat.v1 import ConfigProto 16 | from tensorflow.compat.v1 import InteractiveSession 17 | 18 | config = ConfigProto() 19 | config.gpu_options.allow_growth = True 20 | session = InteractiveSession(config=config) 21 | 22 | 23 | def compute_coeff_determination(actual, predict): 24 | ss_res = np.sum(np.square(actual-predict)) 25 | ss_tot = np.sum(np.square(actual - np.mean(actual))) 26 | return 1 - ss_res/(ss_tot + 1e-08) 27 | 28 | 29 | def relative_error(actual, predict): 30 | error = np.abs(actual - predict) / (actual+1e-08) 31 | return error 32 | 33 | 34 | def absolute_error(actual, predict): 35 | error = np.abs(actual - predict) 36 | return error 37 | 38 | 39 | class Loss_history(keras.callbacks.Callback): 40 | 41 | def on_train_begin(self, logs={}): 42 | self.losses = [] 43 | self.val_loss = [] 44 | self.coeff_determination = [] 45 | 46 | def on_batch_end(self, batch, logs={}): 47 | self.losses.append(logs.get("loss")) 48 | self.val_loss.append(logs.get("val_loss")) 49 | self.coeff_determination.append(logs.get("coeff_determination")) 50 | 51 | 52 | def coeff_determination(y_true, y_pred): 53 | SS_res = K.sum(K.square(y_true-y_pred)) 54 | SS_tot = K.sum(K.square(y_true - K.mean(y_true))) 55 | return 1 - SS_res/(SS_tot + K.epsilon()) 56 | 57 | 58 | def conv1d(input_shape=(4097, 1)): 59 | x_input = Input(input_shape) 60 | 61 | x = Conv1D(filters=16, kernel_size=9, strides=2, kernel_initializer=glorot_uniform(seed=0))(x_input) 62 | x = BatchNormalization(axis=2)(x) 63 | x = Activation("relu")(x) 64 | x = Conv1D(filters=16, kernel_size=9, strides=2, kernel_initializer=glorot_uniform(seed=0))(x) 65 | x = BatchNormalization(axis=2)(x) 66 | x = Activation("relu")(x) 67 | x = MaxPooling1D(pool_size=3, strides=2)(x) 68 | 69 | x = Conv1D(filters=64, kernel_size=9, strides=2, kernel_initializer=glorot_uniform(seed=0))(x) 70 | x = BatchNormalization(axis=2)(x) 71 | x = Activation("relu")(x) 72 | x = Conv1D(filters=64, kernel_size=9, strides=2, kernel_initializer=glorot_uniform(seed=0))(x) 73 | x = BatchNormalization(axis=2)(x) 74 | x = Activation("relu")(x) 75 | x = MaxPooling1D(pool_size=3, strides=2)(x) 76 | 77 | x = Conv1D(filters=128, kernel_size=9, strides=2, kernel_initializer=glorot_uniform(seed=0))(x) 78 | x = BatchNormalization(axis=2)(x) 79 | x = Activation("relu")(x) 80 | x = Conv1D(filters=128, kernel_size=9, strides=2, kernel_initializer=glorot_uniform(seed=0))(x) 81 | x = BatchNormalization(axis=2)(x) 82 | x = Activation("relu")(x) 83 | x = MaxPooling1D(pool_size=3, strides=2)(x) 84 | 85 | x = Conv1D(filters=256, kernel_size=2, strides=1, kernel_initializer=glorot_uniform(seed=0))(x) 86 | x = BatchNormalization(axis=2)(x) 87 | x = Activation("relu")(x) 88 | x = GlobalAveragePooling1D()(x) 89 | 90 | x = Dense(units=128, activation='relu')(x) 91 | x = Dense(units=1, activation='sigmoid')(x) 92 | model = Model(inputs=x_input, outputs=x) 93 | return model 94 | 95 | 96 | def fully_connected(input_shape=(4097, )): 97 | x_input = Input(input_shape) 98 | x = Dense(units=8194, activation="relu")(x_input) 99 | x = Dense(units=4097, activation="relu")(x) 100 | x = Dense(units=2084, activation="relu")(x) 101 | x = Dense(units=1024, activation="relu")(x) 102 | x = Dense(units=512, activation="relu")(x) 103 | x = Dense(units=256, activation="relu")(x) 104 | x = Dropout(rate=0.2)(x) 105 | x = Dense(units=256, activation="relu")(x) 106 | x = Dropout(rate=0.2)(x) 107 | x = Dense(units=128, activation="relu")(x) 108 | x = Dense(units=1, activation="linear")(x) 109 | model = Model(inputs=x_input, outputs=x) 110 | return model -------------------------------------------------------------------------------- /Code/Pre-training.py: -------------------------------------------------------------------------------- 1 | # author: SaKuRa Pop 2 | # data: 2021/3/17 15:38 3 | import pickle 4 | import numpy as np 5 | import keras 6 | from keras.layers import Input, Add, Dense, Activation, ZeroPadding2D, BatchNormalization, Flatten, Conv1D 7 | from keras.layers import GlobalAveragePooling1D, MaxPooling1D, Dropout 8 | from keras.models import Model 9 | from keras.initializers import glorot_uniform 10 | from sklearn import preprocessing 11 | from sklearn.model_selection import KFold 12 | import matplotlib.pyplot as plt 13 | import keras.backend as K 14 | from time import * 15 | import matplotlib 16 | from matplotlib.pyplot import MultipleLocator 17 | from sklearn.model_selection import train_test_split 18 | from tensorflow.compat.v1 import ConfigProto 19 | from tensorflow.compat.v1 import InteractiveSession 20 | import torch 21 | import torch.utils.data as Data 22 | import random 23 | from sklearn.model_selection import KFold 24 | 25 | config = ConfigProto() 26 | config.gpu_options.allow_growth = True 27 | session = InteractiveSession(config=config) 28 | 29 | 30 | def compute_coeff_determination(actual, predict): 31 | ss_res = np.sum(np.square(actual-predict)) 32 | ss_tot = np.sum(np.square(actual - np.mean(actual))) 33 | return 1 - ss_res/(ss_tot + 1e-08) 34 | 35 | 36 | def relative_error(actual, predict): 37 | error = np.abs(actual - predict) / (actual+ 1e-08) 38 | return error 39 | 40 | 41 | def absolute_error(actual, predict): 42 | error = np.abs(actual - predict) 43 | return error 44 | 45 | 46 | class Loss_history(keras.callbacks.Callback): 47 | 48 | def on_train_begin(self, logs={}): 49 | self.losses = [] 50 | self.val_loss = [] 51 | self.coeff_determination = [] 52 | 53 | def on_batch_end(self, batch, logs={}): 54 | self.losses.append(logs.get("loss")) 55 | self.val_loss.append(logs.get("val_loss")) 56 | self.coeff_determination.append(logs.get("coeff_determination")) 57 | 58 | 59 | def coeff_determination(y_true, y_pred): 60 | SS_res = K.sum(K.square(y_true-y_pred)) 61 | SS_tot = K.sum(K.square(y_true - K.mean(y_true))) 62 | return 1 - SS_res/(SS_tot + K.epsilon()) 63 | 64 | 65 | gas_absorption_spectra = np.load(data_path) 66 | ground_truth_concentration = np.load(label_path) 67 | 68 | """数据预处理""" 69 | gas_absorption_spectra = preprocessing.scale(gas_absorption_spectra) 70 | gas_absorption_spectra = gas_absorption_spectra[:, :, np.newaxis] # one more dimention for 1D-CNN; 71 | ground_truth_concentration = ground_truth_concentration / 10000 # scale to (0, 1) scope 72 | 73 | # you can set a random seed here 74 | train, test, train_label, test_label = train_test_split(gas_absorption_spectra, ground_truth_concentration, 75 | test_size=0.2, 76 | random_state=seed) 77 | 78 | kf = KFold(n_splits=10, shuffle=False, random_state=None) 79 | training_input_index = np.ones_like(train_label).astype(np.uint8) 80 | validation_input_index = np.ones(test_label).astype(np.uint8) 81 | 82 | for train_index, test_index in kf.split(train): 83 | training_input_index = np.vstack((training_input_index, train_index)) 84 | testing_input_index = np.vstack((testing_input_index, test_index)) 85 | training_index = training_input_index[1:, :] 86 | validation_index = testing_input_index[1:, :] 87 | 88 | """run each fold""" 89 | # k_fold_index from 1 to ten 90 | train_input = train[training_index[k_fold_index]] 91 | train_input = train_input[:, :, np.newaxis] # add one more dimension for 1D-CNN 92 | train_label = train_label[training_index[k_fold_index]] 93 | 94 | validation_input = train[validation_index[k_fold_index]] # 3000, 4097 95 | validation_input = validation_input[:, :, np.newaxis] # add one more dimension for 1D-CNN 96 | validaiton_label = train_label[validation_index[k_fold_index]] # 3000, 1 97 | 98 | 99 | # seed you can set a random seed here 100 | def training(train, train_label, validation, validation_label, model, epochs, learning_rate, metric): 101 | adam = keras.optimizers.Adam(lr=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-08) 102 | model.compile(optimizer=adam, loss='mean_squared_error', metrics=metric) 103 | history = model.fit(x=train, y=train_label, validation_data=(validation, validaiton_label), epochs=epochs, 104 | batch_size=batch_size, verbose=1) 105 | save_path = r"..." 106 | model.save(save_path) 107 | 108 | 109 | def evaluation(model, input, ground_truth): 110 | predict_result = model.predict(input) 111 | R_squre = compute_coeff_determination(predict_result, ground_truth) 112 | RE = relative_error(predict_result, ground_truth).mean() 113 | AE = absolute_error(ground_truth, predict_result).mean() 114 | 115 | 116 | 117 | -------------------------------------------------------------------------------- /Code/Transfer-learning.py: -------------------------------------------------------------------------------- 1 | # author: SaKuRa Pop 2 | # data: 2021/3/17 16:02 3 | import numpy as np 4 | import keras 5 | from keras.layers import Input, Add, Dense, Activation, ZeroPadding2D, BatchNormalization, Flatten, Conv1D 6 | from keras.layers import GlobalAveragePooling1D, MaxPooling1D, Dropout 7 | from keras.models import Model 8 | from keras.initializers import glorot_uniform 9 | from sklearn import preprocessing 10 | import matplotlib.pyplot as plt 11 | import keras.backend as K 12 | from time import * 13 | import matplotlib 14 | from matplotlib.pyplot import MultipleLocator 15 | from sklearn.model_selection import train_test_split 16 | from tensorflow.compat.v1 import ConfigProto 17 | from tensorflow.compat.v1 import InteractiveSession 18 | from keras.models import load_model 19 | from keras.models import model_from_json 20 | from sklearn.model_selection import train_test_split 21 | from sklearn.metrics import median_absolute_error 22 | from sklearn.metrics import mean_absolute_error 23 | from sklearn.metrics import mean_squared_error 24 | from sklearn.metrics import r2_score 25 | from sklearn.metrics import explained_variance_score 26 | 27 | 28 | def coeff_determination(y_true, y_pred): 29 | SS_res = K.sum(K.square(y_true-y_pred)) 30 | SS_tot = K.sum(K.square(y_true - K.mean(y_true))) 31 | return 1 - SS_res/(SS_tot + K.epsilon()) 32 | 33 | 34 | model_path = r"..." 35 | transfer_model = load_model(model_path, custom_objects={"coeff_determination": coeff_determination}) 36 | 37 | 38 | def transfer_learning(): 39 | for layer in transfer_model.layers: 40 | layer.trainable = False 41 | transfer_model.layers[-1].trainable = True 42 | 43 | adam = keras.optimizers.Adam(lr=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-08) 44 | transfer_model.compile(optimizer=adam, loss='mean_squared_error', metrics=[coeff_determination]) 45 | transfer_model.summary() 46 | 47 | for layer in transfer_model.layers: 48 | print(layer.name, "is trainable? ", layer.trainable) 49 | 50 | history = transfer_model.fit(x=train, y=train_label, 51 | validation_data=(test, test_label), epochs=epochs, 52 | batch_size=batch_size, verbose=1) 53 | 54 | save_path = r"..." 55 | transfer_model.save(save_path) 56 | 57 | 58 | def evaluation(transfer_model, test, test_label): 59 | predict_result = transfer_model.predict(test) 60 | R_squre = compute_coeff_determination(predict_result, test_label) 61 | RE = relative_error(predict_result, test_label).mean() 62 | AE = absolute_error(test_label, predict_result).mean() 63 | 64 | 65 | 66 | -------------------------------------------------------------------------------- /Code/load data.py: -------------------------------------------------------------------------------- 1 | # author: SaKuRa Pop 2 | # data: 2021/3/13 10:48 3 | import xlrd 4 | import numpy as np 5 | 6 | 7 | def xlsx_to_pkl(open_file_path, save_file_path_1, save_file_path_2): 8 | data = xlrd.open_workbook(open_file_path) 9 | table = data.sheet_by_name('Sheet1') 10 | table_array = np.zeros((10000, 4097)) 11 | for i in range(2000): 12 | for j in range(4097): 13 | table_array[i, j] = table.cell_value(i, j) 14 | 15 | data_x = table_array 16 | data_y = np.array([x for x in range(10000)]) 17 | data_y = data_y[:, np.newaxis] 18 | np.save(save_file_path_1, data_x) 19 | np.save(save_file_path_2, data_y) 20 | return data_x, data_y 21 | -------------------------------------------------------------------------------- /Dataset/Experimental-data/Upload later.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Popsama/gas_retrieval_with_deep_learning/e4b49553a2383141eae784ff3e99b90f7111e906/Dataset/Experimental-data/Upload later.txt -------------------------------------------------------------------------------- /Dataset/Simulated-DAS-data/Simulated dataset.txt: -------------------------------------------------------------------------------- 1 | https://pan.baidu.com/s/1a6Senhy1dyFPVN2Kg16wvw 2 | key: z77z -------------------------------------------------------------------------------- /README.MD: -------------------------------------------------------------------------------- 1 | # Retrieval of gas concentrations in optical spectroscopy with deep learning 2 | --- 3 | ## Linbo Tian, Jiachen Sun, Jun Chang, Jinbao Xia, Zhifeng Zhang, Alexandre A. Kolomenskii, Hans A. Schuessler, Sasa Zhang 4 | --- 5 | ### This repository provide supplementary material, including: 6 | 7 | ### Code 8 | >>#### load data.py - Loading data from xlxs file to pkl. I/O routines 9 | >>#### Model implementation.py - The deep neural networks (1D-CNN&DMLP) implementated in Keras. 10 | >>#### Pre-training.py - Instructions for pretrain the models 11 | >>#### transfer-learning.py - Instructions to implement the transfer learning for the pre-trained models. 12 | ### Dataset 13 | >>#### We have already uploaded Simulated Dataset. -------------------------------------------------------------------------------- /Supplementary_material.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Popsama/gas_retrieval_with_deep_learning/e4b49553a2383141eae784ff3e99b90f7111e906/Supplementary_material.pdf --------------------------------------------------------------------------------