├── utils ├── __init__.pyc ├── settings.py ├── data_loader.py └── history.py ├── deepNetworks ├── __init__.pyc ├── netArch.py └── model.py ├── matlab postprocessing ├── parsecsv.m ├── findStatResult.m ├── gradeCombination.m ├── dataSelection.m ├── makeFeatureFiles_wholeimage.m ├── setting.m ├── importfilecsv.m ├── makeFeatureFiles.m ├── makeFeatureFiles_Sliding.m ├── plotMRvsAccuracy.m ├── mapMaker.m ├── makeData.m ├── validation.m ├── learningCurves.m ├── learningAnalysis.m └── makeFilter.m ├── experiments ├── plothistory.py ├── mainTestOpt.py ├── mainTrainOpt.py └── trainmodel.py ├── README.md └── LICENSE /utils/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AziziShekoofeh/Time-series-Classification/HEAD/utils/__init__.pyc -------------------------------------------------------------------------------- /deepNetworks/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AziziShekoofeh/Time-series-Classification/HEAD/deepNetworks/__init__.pyc -------------------------------------------------------------------------------- /matlab postprocessing/parsecsv.m: -------------------------------------------------------------------------------- 1 | %% Define an internal function to parse the values and params 2 | 3 | function [param_value,param_loc] = parsecsv(paramname, param_log_name, param_log_value) 4 | 5 | [~, param_loc] = intersect(param_log_name,cellstr(paramname)); 6 | param_value = param_log_value(param_loc, :); 7 | 8 | end -------------------------------------------------------------------------------- /experiments/plothistory.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | module_root = '..' 4 | sys.path.append(module_root) 5 | 6 | from utils.history import History 7 | 8 | if __name__ == '__main__': 9 | logs_dir = 'DeepNetArch1-Div' 10 | model_history = History(logs_dir) 11 | 12 | # model_history.plot_results(train=True, validation=False, params=False, model_visualization=False, 13 | # loss=True, acc=False, auc=False, min_acc=0.4) 14 | model_history.filtered_learning_curve(train=True, validation=False, params=False, 15 | loss=True, acc=False, auc=False) 16 | -------------------------------------------------------------------------------- /utils/settings.py: -------------------------------------------------------------------------------- 1 | import socket 2 | 3 | if socket.gethostname() == 'purang23': 4 | project_folder = "e://tscRF_LSTM//Python//TeUS_RNN//TeUS_RNN//" 5 | intermediate_folder = project_folder + "Datasets" 6 | 7 | if socket.gethostname() == 'minerva-VirtualBox': 8 | project_folder = "/media/sf_Host_Share/tscRF_LSTM/Python/TeUS_RNN/TeUS_RNN/" 9 | intermediate_folder = project_folder + "Datasets" 10 | 11 | if socket.gethostname() == 'purang26': 12 | project_folder = "/home/shekoofeh/Project/TeUS_RNN/TeUS_RNN/" 13 | intermediate_folder = project_folder + "Datasets" 14 | 15 | if socket.gethostname() == 'purang29': 16 | project_folder = "/data/home/shekoofeh/TeUS_RNN/TeUS_RNN/" 17 | intermediate_folder = project_folder + "Datasets" 18 | -------------------------------------------------------------------------------- /matlab postprocessing/findStatResult.m: -------------------------------------------------------------------------------- 1 | function [TP, FP, TN, FN, sensitivity, specificity, misClassified] = findStatResult(Predicted_Label,Label) 2 | 3 | 4 | % Predicted_Label = (Predicted_Label == 2 | Predicted_Label == 3); 5 | % Label = (Label == 2 | Label == 3); 6 | 7 | TP = sum(((Predicted_Label == Label) & (Predicted_Label == 1))); 8 | FP = sum((Predicted_Label ~= Label) & (Predicted_Label == 1)); 9 | TN = sum((Predicted_Label == Label) & (Predicted_Label == 0)); 10 | FN = sum((Predicted_Label ~= Label) & (Predicted_Label == 0)); 11 | 12 | find((Predicted_Label ~= Label) & (Predicted_Label == 1)); 13 | find((Predicted_Label ~= Label) & (Predicted_Label == 0)); 14 | 15 | sensitivity = TP/(TP+FN); 16 | specificity = TN/(TN+FP); 17 | 18 | misClassified = find(Predicted_Label ~= Label); 19 | 20 | end -------------------------------------------------------------------------------- /matlab postprocessing/gradeCombination.m: -------------------------------------------------------------------------------- 1 | function [combinedGrade,CancerPercentageCombined] = gradeCombination(Predicted_Label,MRgrade,CancerPercentage) 2 | 3 | % Grade combination for benign/cancerous classifier 4 | % Predicted_Label: from cancer detection approach, 1 means cancer 0 means non-cancerous 5 | % MRgrade: from MRI, 1 = low, 2 = moderate, 3 = high 6 | 7 | 8 | % @ Code composed by Shekoofeh Azizi on 01/02/2016 (UBC-RCL) 9 | % @ Code modified by Shekoofeh Azizi on 29/05/2017 (UBC-RCL) 10 | 11 | noCores = size(MRgrade,1); 12 | combinedGrade = Predicted_Label; 13 | CancerPercentageCombined = CancerPercentage; 14 | 15 | for i =1 : noCores 16 | % if(MRgrade(i,1) == 3 && Predicted_Label(i,1)==0 && CancerPercentage(i,1)~=0 ) 17 | if(MRgrade(i,1) == 3 && Predicted_Label(i,1)==0 ) 18 | combinedGrade(i,1)= 1; 19 | CancerPercentageCombined(i,1) = 100; 20 | end 21 | 22 | if(MRgrade(i,1) == 1 && Predicted_Label(i,1)==1) 23 | combinedGrade(i,1)= 0; 24 | CancerPercentageCombined(i,1) = 0; 25 | end 26 | end 27 | 28 | end -------------------------------------------------------------------------------- /matlab postprocessing/dataSelection.m: -------------------------------------------------------------------------------- 1 | function [selected_idx_train, selected_idx_test] = dataSelection(D,L,TCL_limit,MTL_limit) 2 | 3 | %% Select data for Validation Test and Train based on th etumor size 4 | % @ Code composed by Shekoofeh Azizi on 23/08/2016 (UBC-RCL) 5 | % @ Code modified by Shekoofeh Azizi on 19/05/2017 (UBC-RCL) 6 | 7 | %% 1- Train + Validation Data 8 | 9 | noROI = 80; 10 | trainPer = 1.0; 11 | 12 | % Select cancerous large cores 13 | L_ca = L(L(:,1) == 1,:); 14 | s = RandStream('mt19937ar','Seed',0); 15 | selected_ca_train = find( L_ca(:,2) >= TCL_limit & L_ca(:,7) >= MTL_limit & L_ca(:,4) == 1); 16 | selected_ca_train = randperm(s,length(selected_ca_train),length(selected_ca_train)); 17 | selected_ca_train = L_ca(selected_ca_train,9); 18 | %D_ca_train = D_ca(ExpandPSamp(selected_ca_train,noROI),:); 19 | 20 | % Select benign cores 21 | L_be = L(L(:,1) == 0 & L(:,4) == 1,:); 22 | 23 | % Fix a seed to generate a reproducible results 24 | s = RandStream('mt19937ar','Seed',10); 25 | selected_be_train = randperm(s,length(L_be),length(selected_ca_train)); % Equal number of cancerous and benign 26 | selected_be_train = L_be(selected_be_train,9); 27 | % selected_be_train = ExpandPSamp(selected_be_train,noROI); 28 | 29 | %% Selected index 30 | 31 | selected_idx_train = [selected_be_train; selected_ca_train]; 32 | selected_idx_test = find(~ismember(L(:,9),selected_idx_train)); 33 | 34 | if(intersect(selected_idx_train,selected_idx_test)) 35 | warning('Error in dataselection'); 36 | display(intersect(selected_idx_train,selected_idx_test)) 37 | end 38 | end 39 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # TimeSeries_Classification 2 | ##### Time-Series binary classification using RNNs 3 | ##### Shekoofeh Azizi 4 | 5 | 6 | ### Aim 7 | In this project we aim to implement and compare different RNN implementaion including LSTM, GRU and vanilla RNN for the task of time series binary classification. We also further visualize gate activities in different implementation to have a better understanding of the underlying signals. 8 | 9 | ### Data and results 10 | Data could be any time-series data with binary label 11 | 12 | Reults and methods are presented in detailed at [1]: 13 | (https://ieeexplore.ieee.org/abstract/document/8395313/) 14 | 15 | 16 | ### Credits 17 | Using Python Keras library (Keras 2.x) with [Tensorflow] backend: (https://www.tensorflow.org/versions/r0.7/tutorials/recurrent/index.html#recurrent-neural-networks) 18 | 19 | 20 | [1] Azizi, Shekoofeh, et al. "Deep Recurrent Neural Networks for Prostate Cancer Detection: Analysis of Temporal Enhanced Ultrasound." IEEE transactions on medical imaging (2018). 21 | 22 | If you are using these codes in any capicity please cite the above paper or: 23 | 24 | @article{azizi2018deep, 25 | title={Deep Recurrent Neural Networks for Prostate Cancer Detection: Analysis of Temporal Enhanced Ultrasound}, 26 | author={Azizi, Shekoofeh and Bayat, Sharareh and Yan, Pingkun and Tahmasebi, Amir and Kwak, Jin Tae and Xu, Sheng and Turkbey, Baris and Choyke, Peter and Pinto, Peter and Wood, Bradford and others}, 27 | journal={IEEE transactions on medical imaging}, 28 | year={2018}, 29 | publisher={IEEE} 30 | } 31 | 32 | 33 | ###### Tips for Running on GPU 34 | ###### - export CUDA_VISIBLE_DEVICES="1" 35 | ###### - THEANO_FLAGS=device=gpu1,floatX=float64 python trainmodel.py 36 | -------------------------------------------------------------------------------- /matlab postprocessing/makeFeatureFiles_wholeimage.m: -------------------------------------------------------------------------------- 1 | function [X_bmode, X_rf, infoCore, infoROI] = makeFeatureFiles_wholeimage(path_1, path_2) 2 | 3 | %% Generate Feature Design Matrix for the B-mode and Rf data in Whole Image 4 | % They are unlabeled data 5 | % (Philips Dataset Including 255 Test 80 ROIs) 6 | 7 | %INPUT 8 | % Saving Path: path_1 9 | % Feature Path: Path_2 10 | 11 | % OUTPUT 12 | % X_bmode : Bmode Features 13 | % X_rf : RF Features 14 | 15 | % @ Code composed by Shekoofeh Azizi on 29/11/2015 (UBC-RCL) 16 | % @ Code modified by Shekoofeh Azizi on 19/05/2017 (UBC-RCL) 17 | % @ Code modified by Shekoofeh Azizi on 20/10/2017 (UBC-RCL) 18 | 19 | %% Read our Excel in format of table, contain the info of patients 20 | ExcelFileName = [path_1,'PatientsInfo_All.xlsx']; 21 | [num,txt,~] = xlsread(ExcelFileName); 22 | PatientsInfo_FileName = txt(2:end,3); 23 | PatientsInfo_FileName = cell2mat(PatientsInfo_FileName); 24 | PatientsInfo = num(1:end,[3, 4, 5, 6, 9, 19, 20, 15, 22]); 25 | 26 | %% Ceating Matrix of Features (X) for the Bmode 27 | X = []; 28 | S = []; 29 | feature = []; 30 | infoROI = []; 31 | size_samples = []; 32 | 33 | for i = 1 : size(PatientsInfo_FileName,1) 34 | filename = [path_2,'./features_bmode_tsc_wholeimage_80/feature_',PatientsInfo_FileName(i,:),'.mat']; 35 | load(filename); 36 | X = [X; feature]; %#ok 37 | infoROI = [infoROI; repmat(PatientsInfo(i,:),[size(feature,1),1])]; %#ok 38 | size_samples = [size_samples; size(feature,1)]; %#ok 39 | end 40 | X_bmode = X; 41 | infoCore = [PatientsInfo,size_samples]; 42 | 43 | %% Ceating Matrix of Features (X) for the RF 44 | X = []; 45 | feature = []; 46 | for i = 1 : size(PatientsInfo_FileName,1) 47 | filename = [path_2,'./features_rf_tsc_wholeimage_80/feature_',PatientsInfo_FileName(i,:),'.mat']; 48 | load(filename); 49 | X = [X; feature]; %#ok 50 | end 51 | X_rf = X; 52 | 53 | end -------------------------------------------------------------------------------- /experiments/mainTestOpt.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | 4 | module_root = '..' 5 | sys.path.append(module_root) 6 | 7 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1' 8 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" 9 | 10 | from utils.data_loader import DataLoader 11 | from keras import backend as k 12 | from keras.models import load_model 13 | from utils import settings as s 14 | from sklearn.metrics import roc_auc_score 15 | import scipy.io as spio 16 | 17 | 18 | if __name__ == '__main__': 19 | logs_dir = 'DeepNetArch1-Div' 20 | sl = 100 21 | ds_rate = 2 22 | early_stopping = True 23 | downsample = False 24 | bmode = True 25 | whole_map = False 26 | 27 | uid = '2017_10_20_09_48_15_arch3' 28 | uid = '2017_10_19_09_55_17_arch2' 29 | uid = '2017_10_19_20_54_29_arch1' 30 | 31 | if downsample: 32 | ds = DataLoader(sl=sl, downsample=True, downsample_rate=ds_rate) 33 | sl = int(sl/ds_rate) 34 | else: 35 | ds = DataLoader(sl=sl) 36 | 37 | if whole_map: 38 | test_seq = ds.load_whole_test(bmode) 39 | else: 40 | test_seq, test_label = ds.load_test(bmode) 41 | 42 | model_checkpoint_dir = os.path.join(s.intermediate_folder, 'model_checkpoints/opt') 43 | model_checkpoint_file = os.path.join(model_checkpoint_dir, uid + '.hdf5') 44 | model = load_model(model_checkpoint_file) 45 | 46 | test_predictions = model.predict(test_seq, verbose=1) 47 | results = {'test_predictions': test_predictions} 48 | 49 | logs_dir = os.path.join(s.intermediate_folder, 'logs', logs_dir) 50 | test_log_dir = os.path.join(logs_dir, 'test_logs/') 51 | 52 | if not whole_map: 53 | test_auc = roc_auc_score(test_label, test_predictions) 54 | spio.savemat(test_log_dir + uid + '.mat', results) 55 | print(["Test AUC: ", test_auc]) 56 | else: 57 | spio.savemat(test_log_dir + uid + '_whole.mat', results) 58 | 59 | # print('-' * 50) 60 | # print('UID: {}'.format(uid)) 61 | # print('-' * 50) 62 | 63 | k.clear_session() 64 | -------------------------------------------------------------------------------- /matlab postprocessing/setting.m: -------------------------------------------------------------------------------- 1 | % Script to set path for the training logs 2 | % @ Code composed by Shekoofeh Azizi on 18/10/2017 (UBC-RCL) 3 | 4 | function [subFiles, opt_param_log_name, opt_param_log_value, learn_log] = setting(model_type) 5 | % get root of current file 6 | log_dir = 'E:\tscRF_LSTM\Python\TeUS_RNN\TeUS_RNN\Datasets\logs\'; 7 | log_dir_sub = '/'; % or /opt/ or /bmode/ or / 8 | 9 | % RF model id: Optimum Params 10 | lstm_opt_model_id = '2017_09_10_21_41_54_arch1.csv'; 11 | gru_opt_model_id = '2017_09_11_18_58_33_arch2.csv'; 12 | rnn_opt_model_id = '2017_09_07_23_03_26_arch3.csv'; 13 | 14 | switch model_type 15 | case 'lstm' 16 | sub_dir = 'DeepNetArch1-Div'; 17 | % Add Path 18 | root = [log_dir, sub_dir, log_dir_sub]; 19 | addpath([root '/train_logs']); 20 | addpath([root '/csv_logs']); 21 | [opt_param_log_name, opt_param_log_value] = importfilecsv(lstm_opt_model_id); 22 | learn_log = importdata([lstm_opt_model_id(1:25),'.log']); 23 | 24 | case 'gru' 25 | sub_dir = 'DeepNetArch2-Div'; 26 | % Add Path 27 | root = [log_dir, sub_dir, log_dir_sub]; 28 | addpath([root '/train_logs']); 29 | addpath([root '/csv_logs']); 30 | [opt_param_log_name, opt_param_log_value] = importfilecsv(lstm_opt_model_id); 31 | learn_log = importdata([gru_opt_model_id(1:25),'.log']); 32 | case 'rnn' 33 | sub_dir = 'DeepNetArch3-Div'; 34 | % Add Path 35 | root = [log_dir, sub_dir, log_dir_sub]; 36 | addpath([root '/train_logs']); 37 | addpath([root '/csv_logs']); 38 | [opt_param_log_name, opt_param_log_value] = importfilecsv(lstm_opt_model_id); 39 | learn_log = importdata([rnn_opt_model_id(1:25),'.log']); 40 | end 41 | 42 | 43 | 44 | 45 | % Get a list of all files and folders in this folder. 46 | files = dir([log_dir, sub_dir, '\', '\csv_logs\']); 47 | % Get a logical vector that tells which is a directory. 48 | dirFlags = [files(:).isdir]; 49 | % Extract only those that are directories. 50 | subFiles = {files(~dirFlags).name}'; 51 | % Removing current and previous directory 52 | % subFolders(ismember(subFolders,{'.','..'})) = []; 53 | 54 | end -------------------------------------------------------------------------------- /matlab postprocessing/importfilecsv.m: -------------------------------------------------------------------------------- 1 | function [optimizer,sgd] = importfilecsv(filename, startRow, endRow) 2 | %IMPORTFILE Import numeric data from a text file as column vectors. 3 | % [OPTIMIZER,SGD] = IMPORTFILE(FILENAME) Reads data from text file 4 | % FILENAME for the default selection. 5 | % 6 | % [OPTIMIZER,SGD] = IMPORTFILE(FILENAME, STARTROW, ENDROW) Reads data 7 | % from rows STARTROW through ENDROW of text file FILENAME. 8 | % 9 | % Example: 10 | % [optimizer,sgd] = importfile('2017_09_15_02_13_19_arch1.csv',1, 8); 11 | % 12 | % See also TEXTSCAN. 13 | 14 | % Auto-generated by MATLAB on 2017/10/19 10:41:26 15 | 16 | %% Initialize variables. 17 | delimiter = ','; 18 | if nargin<=2 19 | startRow = 1; 20 | endRow = inf; 21 | end 22 | 23 | %% Format string for each line of text: 24 | % column1: text (%s) 25 | % column2: text (%s) 26 | % For more information, see the TEXTSCAN documentation. 27 | formatSpec = '%s%s%[^\n\r]'; 28 | 29 | %% Open the text file. 30 | fileID = fopen(filename,'r'); 31 | 32 | %% Read columns of data according to format string. 33 | % This call is based on the structure of the file used to generate this 34 | % code. If an error occurs for a different file, try regenerating the code 35 | % from the Import Tool. 36 | dataArray = textscan(fileID, formatSpec, endRow(1)-startRow(1)+1, 'Delimiter', delimiter, 'HeaderLines', startRow(1)-1, 'ReturnOnError', false); 37 | for block=2:length(startRow) 38 | frewind(fileID); 39 | dataArrayBlock = textscan(fileID, formatSpec, endRow(block)-startRow(block)+1, 'Delimiter', delimiter, 'HeaderLines', startRow(block)-1, 'ReturnOnError', false); 40 | for col=1:length(dataArray) 41 | dataArray{col} = [dataArray{col};dataArrayBlock{col}]; 42 | end 43 | end 44 | 45 | %% Close the text file. 46 | fclose(fileID); 47 | 48 | %% Post processing for unimportable data. 49 | % No unimportable data rules were applied during the import, so no post 50 | % processing code is included. To generate code which works for 51 | % unimportable data, select unimportable cells in a file and regenerate the 52 | % script. 53 | 54 | %% Allocate imported array to column variable names 55 | optimizer = dataArray{:, 1}; 56 | sgd = dataArray{:, 2}; 57 | 58 | 59 | -------------------------------------------------------------------------------- /matlab postprocessing/makeFeatureFiles.m: -------------------------------------------------------------------------------- 1 | function [X_bmode, X_rf, infoROI, infoCore, PatientsInfo_FileName] = makeFeatureFiles(path_1, path_2) 2 | 3 | %% Generate Feature Design Matrix for all of the B-mode and RF data 4 | % (Philips Dataset Including 255 Test 80 ROIs) 5 | 6 | % INPUT 7 | % Saving Path: path_1 8 | % Feature Path: Path_2 9 | 10 | % OUTPUT 11 | % X_bmode : Bmode Features 12 | % X_rf : RF Features 13 | % S_info : Patient information and labels 14 | 15 | % Patient Info: 16 | % Column 1: Label 17 | % Column 2: Tumor in Core length 18 | % Column 3: MR Label 19 | % Column 4: Axial and Sagittal Match (1: Match 0: Mismatch) 20 | % Column 5: Gleason Score 21 | % Column 6: Distance to Boundary (mm) 22 | % Column 7: MR greatest size 23 | % Column 8: Sagittal GS 24 | % Column 9: Sample number 25 | % Column 10: ROI number/ Column 10: Sample Size 26 | 27 | % @ Code composed by Shekoofeh Azizi on 22/08/2016 (UBC-RCL) 28 | % @ Code modified by Shekoofeh Azizi on 19/05/2017 (UBC-RCL) 29 | 30 | %% Read our Excel in format of table, contain the info of patients 31 | ExcelFileName = [path_1,'PatientsInfo_All.xlsx']; 32 | [num,txt,~] = xlsread(ExcelFileName); 33 | PatientsInfo_FileName = txt(2:end,3); 34 | PatientsInfo_FileName = cell2mat(PatientsInfo_FileName); 35 | PatientsInfo = num(1:end,[3, 4, 5, 6, 9, 19, 20, 15, 22]); 36 | 37 | %% Ceating Matrix of Features (X) for the RF 38 | X = []; 39 | infoROI = []; 40 | feature = []; 41 | size_samples = []; 42 | 43 | for i = 1 : size(PatientsInfo_FileName,1) 44 | filename = [path_2,'./features_rf_tsc_ROI_80/feature_',PatientsInfo_FileName(i,:),'.mat']; 45 | load(filename); 46 | X = [X; feature]; 47 | size_samples = [size_samples; size(feature,1)]; 48 | ROI_num = 1:size(feature,1); 49 | infoROI = [infoROI; [repmat([PatientsInfo(i,:)],[size(feature,1),1]),ROI_num']]; 50 | end 51 | X_rf = X; 52 | infoCore = [PatientsInfo,size_samples]; 53 | 54 | %% Ceating Matrix of Features (X) for the Bmode 55 | X = []; 56 | feature = []; 57 | for i = 1 : size(PatientsInfo_FileName,1) 58 | filename = [path_2,'./features_bmode_tsc_ROI_80/feature_',PatientsInfo_FileName(i,:),'.mat']; 59 | load(filename); 60 | X = [X;feature]; 61 | end 62 | X_bmode = X; 63 | 64 | removeIDX = (any(isnan(X_rf),2)); 65 | X_rf(removeIDX,:)= 0 ; 66 | 67 | end -------------------------------------------------------------------------------- /matlab postprocessing/makeFeatureFiles_Sliding.m: -------------------------------------------------------------------------------- 1 | function [X_bmode, X_rf, infoROI, infoCore, PatientsInfo_FileName] = makeFeatureFiles_Sliding(path_1, path_2) 2 | 3 | %% Generate Feature Design Matrix for all of the B-mode and RF data 4 | % (Philips Dataset Including 255 Test 80 ROIs) 5 | 6 | % INPUT 7 | % Saving Path: path_1 8 | % Feature Path: Path_2 9 | 10 | % OUTPUT 11 | % X_bmode : Bmode Features 12 | % X_rf : RF Features 13 | % S_info : Patient information and labels 14 | 15 | % Patient Info: 16 | % Column 1: Label 17 | % Column 2: Tumor in Core length 18 | % Column 3: MR Label 19 | % Column 4: Axial and Sagittal Match (1: Match 0: Mismatch) 20 | % Column 5: Gleason Score 21 | % Column 6: Distance to Boundary (mm) 22 | % Column 7: MR greatest size 23 | % Column 8: Sagittal GS 24 | % Column 9: Sample number 25 | % Column 10: ROI number/ Column 10: Sample Size 26 | 27 | % @ Code composed by Shekoofeh Azizi on 22/08/2016 (UBC-RCL) 28 | % @ Code modified by Shekoofeh Azizi on 19/05/2017 (UBC-RCL) 29 | % @ Code modified by Shekoofeh Azizi on 20/10/2017 (UBC-RCL) 30 | 31 | 32 | %% Read our Excel in format of table, contain the info of patients 33 | ExcelFileName = [path_1,'PatientsInfo_All.xlsx']; 34 | [num,txt,~] = xlsread(ExcelFileName); 35 | PatientsInfo_FileName = txt(2:end,3); 36 | PatientsInfo_FileName = cell2mat(PatientsInfo_FileName); 37 | PatientsInfo = num(1:end,[3, 4, 5, 6, 9, 19, 20, 15, 22]); 38 | 39 | %% Ceating Matrix of Features (X) for the RF 40 | X = []; 41 | infoROI = []; 42 | feature = []; 43 | size_samples = []; 44 | 45 | for i = 1 : size(PatientsInfo_FileName,1) 46 | filename = [path_2,'./features_rf_tsc_ROI_Sliding/feature_',PatientsInfo_FileName(i,:),'.mat']; 47 | load(filename); 48 | X = [X; feature]; %#ok 49 | size_samples = [size_samples; size(feature,1)]; %#ok 50 | ROI_num = 1:size(feature,1); 51 | infoROI = [infoROI; [repmat(PatientsInfo(i,:),[size(feature,1),1]),ROI_num']]; %#ok 52 | end 53 | X_rf = X; 54 | infoCore = [PatientsInfo,size_samples]; 55 | 56 | %% Ceating Matrix of Features (X) for the Bmode 57 | X = []; 58 | feature = []; 59 | for i = 1 : size(PatientsInfo_FileName,1) 60 | filename = [path_2,'./features_bmode_tsc_ROI_Sliding/feature_',PatientsInfo_FileName(i,:),'.mat']; 61 | load(filename); 62 | X = [X;feature]; %#ok 63 | end 64 | X_bmode = X; 65 | 66 | end -------------------------------------------------------------------------------- /matlab postprocessing/plotMRvsAccuracy.m: -------------------------------------------------------------------------------- 1 | function [t,AUC] = plotMRvsAccuracy(MRsize,predictedGrade,realGrade,CancerPercentage) 2 | 3 | cnt = 0; 4 | t = [0 : 0.3 : 1,1.6,1.8, 2:0.3:2.7]; 5 | 6 | ACC = zeros(1,size(t,2)); 7 | AUC = zeros(1,size(t,2)); 8 | SEN = zeros(1,size(t,2)); 9 | SPEC = zeros(1,size(t,2)); 10 | S = zeros(1,size(t,2)); 11 | Cmat = zeros(size(t,2),3); 12 | Cmat(1,:) = [1.0 0.5 0.5]; 13 | Cmat(2,:) = [0.5 0.5 1.0]; 14 | Cmat(3,:) = [0.4 1.0 0.4]; 15 | Cmat(4,:) = [0.5 0.4 0.6]; 16 | 17 | 18 | figure1 = figure('Color','None'); 19 | axes1 = axes('Parent',figure1,'FontSize',13,'FontName','Times'); 20 | box(axes1,'on'); 21 | hold on 22 | for j = t 23 | 24 | filter_Core = find(MRsize >= j & MRsize ~=100); 25 | cnt = cnt + 1; 26 | S(cnt) = size(filter_Core,1); 27 | L1 = predictedGrade(filter_Core,1); 28 | L2 = realGrade(filter_Core,1); 29 | L3 = CancerPercentage(filter_Core,1); 30 | [~, ~, ~, ~,SEN(cnt),SPEC(cnt)] = findStatResult(L1,L2); 31 | ACC(cnt) = (1 - sum(L1~=L2) / size(filter_Core,1)); 32 | [X_ROC,Y_ROC,~,AUC(cnt)] = perfcurve(L2,L3,1); 33 | if( mod(cnt,2) == 0 ) 34 | plot(X_ROC,Y_ROC,'Color',Cmat(cnt/2,:),'LineWidth',2.0,'LineStyle','--',... 35 | 'DisplayName',sprintf('Larger than %2.2g cm',j)) 36 | xlabel('False positive rate (1-Specificity)','Interpreter','latex','FontSize',13); 37 | ylabel('True positive rate (Specificity)','Interpreter','latex','FontSize',13); 38 | end 39 | 40 | end 41 | legend(axes1,'show'); 42 | hold off 43 | 44 | % Create figure 45 | figure2 = figure('Color',[1 1 1]); 46 | axes2 = axes('Parent',figure2,'FontSize',13,'FontName','Times'); 47 | box(axes2,'on'); 48 | hold(axes2,'all'); 49 | 50 | % Create scatter 51 | % h1=scatter(t,ACC,'MarkerEdgeColor',[1 0.5 0.5],'DisplayName','Accuracy'); plot(t,ACC,'LineStyle',':','Color',[1 0 0]); 52 | % h2=scatter(t,SPEC,'MarkerEdgeColor',[0.5 0.5 1],'DisplayName','Specificty'); plot(t,SPEC,'LineStyle',':','Color',[0 0 1]); 53 | % h3=scatter(t,SEN,'MarkerEdgeColor',[0.5 1 0.5],'DisplayName','Sensitivity'); plot(t,SEN,'LineStyle',':','Color',[0 1 0]); 54 | h4=scatter(t,AUC,'MarkerEdgeColor',[0.5 0.5 1],'DisplayName','AUC','LineWidth',1.5); plot(t,AUC,'LineStyle','--','Color',[0.5 0.5 1],'LineWidth',1.5); 55 | xlabel('Greatest Tumor Length in MRI','Interpreter','latex','FontSize',13); 56 | ylabel('Area Under the Curve (AUC)','Interpreter','latex','FontSize',13); 57 | % legend([h1,h4]); 58 | -------------------------------------------------------------------------------- /matlab postprocessing/mapMaker.m: -------------------------------------------------------------------------------- 1 | %% Make Map for the whole image and colromap generation based on the needed structure! 2 | % @ Code modified by Shekoofeh Azizi on 20/10/2017 (UBC-RCL) 3 | 4 | %% 5 | clear all %#ok 6 | close all 7 | clc 8 | 9 | %% Initializing: Define Parameters and Reading Data 10 | path = 'E:\Feature Extraction\Philips Dataset\Extracted Features\'; 11 | log_dir = 'E:\tscRF_LSTM\Python\TeUS_RNN\TeUS_RNN\Datasets\logs\DeepNetArch3-Div\test_logs\'; 12 | filename = '2017_10_20_09_48_15_arch3_whole.mat'; 13 | % filename = '2017_10_19_09_55_17_arch2_whole.mat'; 14 | % filename = '2017_10_19_20_54_29_arch1_whole.mat'; 15 | 16 | % ExcelFileName : Name of Excel which contain our patients info 17 | ExcelFileName = 'PatientsInfo_All.xlsx'; 18 | 19 | % Read filenames and Patient Info 20 | [num,txt,raw] = xlsread(ExcelFileName); 21 | 22 | PatientsInfo_FileName = txt(2:end,3); 23 | PatientsInfo_FileName = cell2mat(PatientsInfo_FileName); 24 | 25 | % Load probability maps 26 | load([log_dir, filename]); 27 | load('./Datasets/D_Whole_Labels.mat'); 28 | 29 | prob_estimates_test = test_predictions; 30 | SampSize_test = infoCorew(:,10); 31 | % Create structure containing filenames and corresponding probability maps 32 | field1 = 'filename'; value1 = 'rf00000000000000'; 33 | field2 = 'probmap'; value2 = zeros(1,1); 34 | s_temp = struct(field1,value1,field2,value2); 35 | 36 | s = []; 37 | noFiles = size(PatientsInfo_FileName,1); 38 | 39 | for i=1:noFiles 40 | 41 | 42 | p_temp = prob_estimates_test(sum(SampSize_test(1:i-1))+ 1 : sum(SampSize_test(1:i)),1); 43 | 44 | filename = [path,'/features_wholeimage_limits_80_new/feature_limit_',PatientsInfo_FileName(i,:),'.mat']; 45 | load(filename); 46 | 47 | x_range = (x_lim_right - x_lim_left +0.5)*2; 48 | y_range = (y_lim_right - y_lim_left +0.5)*2; 49 | 50 | % For dataset 1-6th we don't have 51 | if(x_range < 1 || y_range < 1) 52 | s_temp.filename = PatientsInfo_FileName(i,:); 53 | s = [s;s_temp]; 54 | continue; 55 | end 56 | % Create structure containing filenames and corresponding probability maps 57 | field1 = 'filename'; value1 = 'rf00000000000000'; 58 | field2 = 'probmap'; value2 = zeros(x_range,y_range); 59 | s_temp = struct(field1,value1,field2,value2); 60 | 61 | p_temp = reshape(p_temp,[y_range x_range]); 62 | probabilitymap = flip(p_temp); 63 | 64 | probabilitymap = imresize( probabilitymap, 'Scale', 0.5 ); %% Scale for 0.5 mm ROI s 65 | 66 | filename = PatientsInfo_FileName(i,:); 67 | s_temp.filename = filename; 68 | s_temp.probmap = probabilitymap; 69 | s = [s;s_temp]; 70 | i 71 | end 72 | 73 | 74 | save RNN_Bmode_Wholemap.mat s 75 | -------------------------------------------------------------------------------- /matlab postprocessing/makeData.m: -------------------------------------------------------------------------------- 1 | %% makeData: Data division for large cores in LSTM impelemtaion 2 | %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 3 | 4 | % Patient Info: 5 | % Column 1: Label 6 | % Column 2: Tumor in Core length 7 | % Column 3: MR Label 8 | % Column 4: Axial and Sagittal Match (1: Match 0: Mismatch) 9 | % Column 5: Gleason Score 10 | % Column 6: Distance to Boundary (mm) 11 | % Column 7: MR greatest size 12 | % Column 8: Sagittal GS 13 | % Column 9: Sample number 14 | % Column 10: ROI number/ Column 10: Sample Size 15 | 16 | % @ Code composed by Shekoofeh Azizi on 24/10/2016 (UBC-RCL) 17 | % @ Code modified by Shekoofeh Azizi on 23/12/2016 (UBC-RCL) 18 | % @ Code modified by Shekoofeh Azizi on 19/05/2017 (UBC-RCL) 19 | % @ Code modified by Shekoofeh Azizi on 15/08/2017 (UBC-RCL) 20 | 21 | %% 22 | clc 23 | close all 24 | clear all %#ok 25 | 26 | %% Initialization 27 | TCL_limit = 4.00; % more than 25% be cancerous 28 | MTL_limit = 0.0; % Large cores 29 | 30 | path_1 = 'E:\tscRF_LSTM\Python\TeUS_RNN\TeUS_RNN\matlab postprocessing\Data preparation for RNN based methods\'; % Saving Path: path_1 31 | path_2 = 'E:\Feature Extraction\Philips Dataset\Extracted Features\'; % Feature Path: Path_2 32 | 33 | %% Reading Data 34 | % 1- Reading data from the sliding ROI 35 | [Xs_bmode, Xs_rf, infoROIs, infoCores, ~] = makeFeatureFiles_Sliding(path_1, path_2); 36 | 37 | % 2- Reading data from the fixed ROI 38 | [Xf_bmode, Xf_rf, infoROIf, infoCoref, ~] = makeFeatureFiles(path_1, path_2); 39 | 40 | % 3- Make the whole image feature files 41 | [Xw_bmode, Xw_rf, infoCorew, infoROIw] = makeFeatureFiles_wholeimage(path_1, path_2); 42 | 43 | %% Partitioning based on the tumore size 44 | % Select large tumor size for train+validation 45 | % Then select randomly between them for train(0.8) and validation(0.2) 46 | % Equal number of benign cores selected randomly for each set 47 | [selected_idx_train, selected_idx_test] = dataSelection(Xf_bmode,infoCoref,TCL_limit,MTL_limit); 48 | 49 | 50 | %% Save D_Fixed 51 | idx_train = find(ismember(infoROIf(:,9),selected_idx_train)); 52 | idx_test = find(ismember(infoROIf(:,9),selected_idx_test)); 53 | Df_train = [Xf_bmode(idx_train,:);Xf_rf(idx_train,:)]; 54 | Lf_train = [infoROIf(idx_train,:);infoROIf(idx_train,:)]; 55 | Df_test = [Xf_bmode(idx_test,:);Xf_rf(idx_test,:)]; 56 | Lf_test = [infoROIf(idx_test,:);infoROIf(idx_test,:)]; 57 | save([path_1,'Datasets\D_Fixed.mat'],'Df_train','Lf_train','Df_test','Lf_test') 58 | 59 | %% Save D_Sliding 60 | idx_train = find(ismember(infoROIs(:,9),selected_idx_train)); 61 | idx_test = find(ismember(infoROIs(:,9),selected_idx_test)); 62 | Ds_train = [Xs_bmode(idx_train,:);Xs_rf(idx_train,:)]; 63 | Ls_train = [infoROIs(idx_train,:);infoROIs(idx_train,:)]; 64 | Ds_test = [Xs_bmode(idx_test,:);Xs_rf(idx_test,:)]; 65 | Ls_test = [infoROIs(idx_test,:);infoROIs(idx_test,:)]; 66 | save([path_1,'Datasets\D_Sliding.mat'],'Ds_train','Ls_train','Ds_test','Ls_test') 67 | 68 | 69 | %% Save D_Whole 70 | Dw_bmode = Xw_bmode'; 71 | Dw_rf =Xw_rf'; 72 | save([path_1,'Datasets\D_Whole_Bmode.mat'],'Dw_bmode','-v7.3') 73 | save([path_1,'Datasets\D_Whole_RF.mat'],'Dw_rf','-v7.3') 74 | save([path_1,'Datasets\D_Whole_Labels.mat'],'infoCorew') -------------------------------------------------------------------------------- /experiments/mainTrainOpt.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import time 3 | import os 4 | 5 | module_root = '..' 6 | sys.path.append(module_root) 7 | 8 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1' 9 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" 10 | 11 | from deepNetworks.model import RNNModel 12 | from utils.data_loader import DataLoader 13 | from deepNetworks.netArch import DeepNetArch1, DeepNetArch2, DeepNetArch3, DeepNetArch1L1, DeepNetArch2L1, \ 14 | DeepNetArch3L1 15 | from keras import backend as k 16 | from utils.history import History 17 | 18 | if __name__ == '__main__': 19 | logs_dir = 'DeepNetArch1-Div' 20 | model_type = 'DeepNetArch1' 21 | sl = 100 22 | ds_rate = 2 23 | early_stopping = True 24 | downsample = False 25 | bmode = True 26 | subdir = '/bmode/' 27 | model_history = History(logs_dir) 28 | opt_params, opt_model_uid = model_history.find_opt_model(auc=False, loss=False, acc=True) 29 | 30 | if downsample: 31 | ds = DataLoader(sl=sl, downsample=True, downsample_rate=ds_rate) 32 | sl = int(sl/ds_rate) 33 | else: 34 | ds = DataLoader(sl=sl) 35 | 36 | if model_type == 'DeepNetArch1': 37 | arch = DeepNetArch1(sl=sl, initial_lr=float(opt_params['initial_lr']), l2_reg=float(opt_params['l2_regulizer']), 38 | dropout=float(opt_params['dropout']), rec_dropout=float(opt_params['rec_dropout']), 39 | optimizer=opt_params['optimizer'], summary=1) 40 | if model_type == 'DeepNetArch2': 41 | arch = DeepNetArch2(sl=sl, initial_lr=float(opt_params['initial_lr']), l2_reg=float(opt_params['l2_regulizer']), 42 | dropout=float(opt_params['dropout']), rec_dropout=float(opt_params['rec_dropout']), 43 | optimizer=opt_params['optimizer'], summary=1) 44 | if model_type == 'DeepNetArch3': 45 | arch = DeepNetArch3(sl=sl, initial_lr=float(opt_params['initial_lr']), l2_reg=float(opt_params['l2_regulizer']), 46 | dropout=float(opt_params['dropout']), rec_dropout=float(opt_params['rec_dropout']), 47 | optimizer=opt_params['optimizer'], summary=1) 48 | if model_type == 'DeepNetArch1L1': 49 | arch = DeepNetArch1L1(sl=sl, initial_lr=float(opt_params['initial_lr']), 50 | l2_reg=float(opt_params['l2_regulizer']), 51 | dropout=float(opt_params['dropout']), rec_dropout=float(opt_params['rec_dropout']), 52 | optimizer=opt_params['optimizer'], summary=1) 53 | if model_type == 'DeepNetArch2L1': 54 | arch = DeepNetArch2L1(sl=sl, initial_lr=float(opt_params['initial_lr']), 55 | l2_reg=float(opt_params['l2_regulizer']), 56 | dropout=float(opt_params['dropout']), rec_dropout=float(opt_params['rec_dropout']), 57 | optimizer=opt_params['optimizer'], summary=1) 58 | if model_type == 'DeepNetArch3L1': 59 | arch = DeepNetArch3L1(sl=sl, initial_lr=float(opt_params['initial_lr']), 60 | l2_reg=float(opt_params['l2_regulizer']), 61 | dropout=float(opt_params['dropout']), rec_dropout=float(opt_params['rec_dropout']), 62 | optimizer=opt_params['optimizer'], summary=1) 63 | 64 | model, model_id = arch.arch_generator() 65 | 66 | rnn_model = RNNModel(ds, model, opt_params, log_dir=logs_dir + subdir + str(sl), division=True, bmode=bmode) 67 | uid = time.strftime("%Y_%m_%d_%H_%M_%S_") + model_id 68 | print('-' * 50) 69 | print('UID: {}'.format(uid)) 70 | print('-' * 50) 71 | 72 | rnn_model.opt_model_train(uid=uid, batch_size=int(opt_params['batch_size']), es=early_stopping, 73 | nb_epoch=int(opt_params['n_epoch']), verbose=2) 74 | 75 | test_predictions = rnn_model.predict_test(uid=uid) 76 | k.clear_session() 77 | -------------------------------------------------------------------------------- /matlab postprocessing/validation.m: -------------------------------------------------------------------------------- 1 | %% Comparison of Bmode and RF data 2 | % @ Code composed by Shekoofeh Azizi on 22/05/2017 (UBC-RCL) 3 | % @ Code modified by Shekoofeh Azizi on 01/06/2017 (UBC-RCL) 4 | % @ Code modified by Shekoofeh Azizi on 20/10/2017 (UBC-RCL) 5 | 6 | %% 7 | % Patient Info: 8 | % Column 1: Label 9 | % Column 2: Tumor in Core length 10 | % Column 3: MR Label 11 | % Column 4: Axial and Sagittal Match (1: Match 0: Mismatch) 12 | % Column 5: Gleason Score 13 | % Column 6: Distance to Boundary (mm) 14 | % Column 7: MR greatest size 15 | % Column 8: Sagittal GS 16 | % Column 9: Sample number 17 | % Column 10: ROI number/ Column 10: Sample Size 18 | 19 | %% 20 | 21 | clear all %#ok 22 | close all 23 | clc 24 | 25 | noROI = 80; 26 | filtering = 'nofilter'; 27 | value = 2; 28 | 29 | %% Loading RF model and results 30 | log_dir = 'E:\tscRF_LSTM\Python\TeUS_RNN\TeUS_RNN\Datasets\logs\DeepNetArch1-Div\test_logs\'; 31 | 32 | % Bmode model id 33 | % filename = '2017_10_20_09_48_15_arch3.mat'; % Threshold .5 Wholemap .8 34 | % filename = '2017_10_19_09_55_17_arch2.mat'; % Threshold .4 35 | % filename = '2017_10_19_20_54_29_arch1.mat'; % Threshold .5 Wholemap .8 36 | 37 | % RF model id 38 | % filename = '2017_10_05_14_48_49_arch3.mat'; % Threshold .5 Wholemap .8 39 | % filename = '2017_10_05_11_28_48_arch2.mat'; % Threshold .5 40 | % filename = '2017_10_05_11_29_04_arch1.mat'; % Threshold .4 Wholemap .8 41 | filename = '2017_10_08_19_38_47_arch1.mat'; 42 | 43 | load([log_dir, filename]); 44 | load('.\Datasets\D_Fixed') 45 | 46 | L_TEST = Lf_test(1:size(Lf_test)/2,:); 47 | noFiles_test = size(L_TEST,1)/noROI; 48 | estimatedProb = test_predictions; 49 | 50 | % Find the optimm threshold using ROC curve 51 | [X_ROC,Y_ROC,T,~,OPTROCPT] = perfcurve(L_TEST(:,1),estimatedProb,1); 52 | Threshold = T((X_ROC==OPTROCPT(1))&(Y_ROC==OPTROCPT(2))); 53 | % Threshold = 0.5; 54 | predictedL = (estimatedProb>=Threshold); 55 | 56 | CancerPercentage = []; 57 | CancerEstimate = []; 58 | L_Core = []; 59 | for i = 1 : noFiles_test 60 | predict_label = predictedL((i-1)*noROI+1:i*noROI,1); 61 | cancer_estimate = estimatedProb((i-1)*noROI+1:i*noROI,1); 62 | CancerPercentage(i)=100*length(find(predict_label==1))/noROI; %#ok 63 | CancerEstimate(i)=100*sum(cancer_estimate(:,1))/noROI; %#ok 64 | L_Core(i,:) = L_TEST((i-1)*noROI+1,:); %#ok 65 | end 66 | CancerPercentage = CancerPercentage'; 67 | CancerEstimate = CancerEstimate'; 68 | results = CancerPercentage; 69 | 70 | %% Results Evalution 71 | hold on 72 | filter = makeFilter(L_Core,filtering,value); 73 | [X_ROC,Y_ROC,T,AUC_Core,OPTROCPT] = perfcurve(L_Core(filter,1),double(results(filter,:)),1); 74 | plot(X_ROC,Y_ROC,'Color','b','LineWidth',1.5,'DisplayName','RF data') 75 | xlabel('False positive rate (1-Specificity)'); 76 | ylabel('True positive rate (Specificity)') 77 | 78 | noFiles_filter = size(L_Core(filter,1),1); 79 | cp = CancerPercentage(filter,1); 80 | sen = OPTROCPT(2); 81 | spe = 1-OPTROCPT(1); 82 | Threshold_filter = T((X_ROC==OPTROCPT(1))&(Y_ROC==OPTROCPT(2))); 83 | acc = sum(L_Core(filter,1) == (cp >= Threshold_filter))/size(cp,1); 84 | display('RF Results') 85 | fprintf('Accuracy: %d AUC: %d\n', acc, AUC_Core); 86 | fprintf('Sensitivity: %d\n', sen); 87 | fprintf('Specificity: %d\n', spe); 88 | predictedL= (cp >= Threshold_filter); 89 | CancerP = results(filter,1); 90 | 91 | %% 92 | % Plot AUC vs. MR length for binary classification 93 | MRsize = L_Core(filter,7); 94 | realGrade = L_Core(filter,1); 95 | [~,AUC_Predicted] = plotMRvsAccuracy(MRsize,predictedL,realGrade,CancerP); 96 | 97 | 98 | %% Binary classification + MR grading 99 | MRgrade = L_Core(filter,3); 100 | [~,CancerPercentageCombined] = gradeCombination(predictedL,MRgrade,CancerP); 101 | [~,~,~,AUC_Core,~] = perfcurve(L_Core(filter,1),CancerPercentageCombined,1); 102 | fprintf('AUC Combined: %d\n', AUC_Core); 103 | 104 | -------------------------------------------------------------------------------- /matlab postprocessing/learningCurves.m: -------------------------------------------------------------------------------- 1 | % Plot Learning curves for the selected models 2 | % @ Code composed by Shekoofeh Azizi on 18/10/2017 (UBC-RCL) 3 | %% 4 | clc 5 | clear all %#ok 6 | close all 7 | 8 | %% Initialization 9 | % Se the model ids of the selected optimum models 10 | 11 | % Bmode model id: Opt 12 | % lstm_opt_model_id = '2017_10_19_20_54_29_arch1.csv'; 13 | % gru_opt_model_id = '2017_10_19_09_55_17_arch2.csv'; 14 | % rnn_opt_model_id = '2017_10_20_09_48_15_arch3.csv'; 15 | 16 | % RF model id: Opt 17 | lstm_opt_model_id = '2017_10_05_11_29_04_arch1.csv'; 18 | gru_opt_model_id = '2017_10_05_11_28_48_arch2.csv'; 19 | rnn_opt_model_id = '2017_10_05_14_48_49_arch3.csv'; 20 | 21 | % Path setting 22 | log_dir = 'E:\tscRF_LSTM\Python\TeUS_RNN\TeUS_RNN\Datasets\logs\'; 23 | log_dir_sub = '\'; 24 | sub_dir = 'DeepNetArch2-Div'; 25 | root = [log_dir, sub_dir, log_dir_sub]; 26 | addpath([root '/train_logs']); 27 | addpath([root '/csv_logs']); 28 | 29 | %% 30 | curve_type = {'loss';'acc';'val_loss';'val_acc'}; 31 | curve_names = {'Train Loss';'Train Accuracy';'Validation Loss';'Validation Accuracy'}; 32 | linestyle = {'-'; '-.'; '-'; '-.' }; 33 | color = {[1 0.27 0.27] ; [1 0.27 0.27]; [0 0.8 0.4]; [0 0.8 0.4]}; 34 | noCurves = size(curve_type,1); 35 | 36 | fig = figure; 37 | left_color = [0 0 0]; 38 | right_color = [0 0 0]; 39 | set(fig,'defaultAxesColorOrder',[left_color; right_color]); 40 | % Create axes 41 | ax = axes('Parent',fig); 42 | set(ax,'FontName','Times','FontSize',14,'GridColor',... 43 | [0.247058823529412 0.247058823529412 0.247058823529412],'GridLineStyle',':',... 44 | 'LineStyleOrderIndex',3,'XGrid','on','YGrid','on'); 45 | for i = 1 : noCurves 46 | if(i<3) 47 | yyaxis left 48 | ylabel('Loss','FontName','Times','Interpreter','latex'); 49 | else 50 | yyaxis right 51 | ylabel('Accuracy/AUC','FontName','Times','Interpreter','latex'); 52 | end 53 | net_name = gru_opt_model_id(1:25); 54 | [param_log_name, param_log_value] = importfilecsv([net_name, '.csv']); 55 | learn_log = importdata([net_name, '.log']); 56 | diagram_type_train = curve_type{i,1}; 57 | [~, param_loc] = intersect(learn_log.textdata,cellstr(diagram_type_train)); 58 | value = learn_log.data(:,param_loc); 59 | 60 | plot(value,'DisplayName',curve_names{i,1},... 61 | 'LineWidth',1.2,'LineStyle',linestyle{i,1}, 'Color',color{i,1}) 62 | hold on 63 | end 64 | 65 | [~, param_loc] = intersect(learn_log.textdata,cellstr('val_auc')); 66 | value = learn_log.data(:,param_loc); 67 | plot(value,'DisplayName','Validation AUC','LineWidth',1.2,'LineStyle','-', 'Color',[0.2 0.4 1]); 68 | 69 | legend('show') 70 | xlabel('Iteration (Epochs)','FontName','Times','Interpreter','latex') 71 | hold off 72 | 73 | 74 | 75 | %% 76 | % curve_type = {'loss';'acc';'val_loss';'val_acc'}; 77 | % curve_names = {'Train Loss';'Train Accuracy';'Validation Loss';'Validation Accuracy'}; 78 | % linestyle = {'-'; '-.'; '-'; '-.' }; 79 | % color = {[1 0.27 0.27] ; [1 0.27 0.27]; [0 0.8 0.4]; [0 0.8 0.4]}; 80 | % noCurves = size(curve_type,1); 81 | % 82 | % fig = figure; 83 | % left_color = [0 0 0]; 84 | % right_color = [0 0 0]; 85 | % set(fig,'defaultAxesColorOrder',[left_color; right_color]); 86 | % for i = 1 : noCurves 87 | % yyaxis left 88 | % net_name = lstm_opt_model_id(1:25); 89 | % [param_log_name, param_log_value] = importfilecsv([net_name, '.csv']); 90 | % learn_log = importdata([net_name, '.log']); 91 | % diagram_type_train = curve_type{i,1}; 92 | % [~, param_loc] = intersect(learn_log.textdata,cellstr(diagram_type_train)); 93 | % value = learn_log.data(:,param_loc); 94 | % 95 | % plot(value,'DisplayName',curve_names{i,1},... 96 | % 'LineWidth',1.2,'LineStyle',linestyle{i,1}, 'Color',color{i,1}) 97 | % hold on 98 | % end 99 | % ylabel('Loss/Accuracy'); 100 | % 101 | % yyaxis right 102 | % [~, param_loc] = intersect(learn_log.textdata,cellstr('lr')); 103 | % value = learn_log.data(:,param_loc); 104 | % plot(value,'DisplayName','Learning Rate','LineWidth',1.2,'LineStyle','-', 'Color',[0 0 0]); 105 | % ylabel('Learning Rate'); 106 | % ylim([10e-4 10e-3]) 107 | % legend('show') 108 | % hold off 109 | 110 | 111 | 112 | 113 | -------------------------------------------------------------------------------- /matlab postprocessing/learningAnalysis.m: -------------------------------------------------------------------------------- 1 | % plot the learning curves for 2 | % @ Code composed by Shekoofeh Azizi on 18/10/2017 (UBC-RCL) 3 | %% 4 | clc 5 | clear all %#ok 6 | close all 7 | 8 | %% 9 | 10 | model_type = 'gru'; 11 | diagram_type_train = 'loss'; 12 | diagram_type_val = 'val_loss'; 13 | compare_type = 'initial_lr'; 14 | 15 | [subFiles, opt_param_log_name, opt_param_log_value] = setting(model_type); 16 | noFiles = size(subFiles,1); 17 | 18 | [opt_lr, ~ ] = parsecsv('initial_lr', opt_param_log_name, opt_param_log_value); 19 | [opt_bs, ~ ] = parsecsv('batch_size', opt_param_log_name, opt_param_log_value); 20 | [opt_optimizer, ~ ] = parsecsv('optimizer', opt_param_log_name, opt_param_log_value); 21 | [opt_reg, ~ ] = parsecsv('l2_regulizer', opt_param_log_name, opt_param_log_value); 22 | [opt_do, ~ ] = parsecsv('dropout', opt_param_log_name, opt_param_log_value); 23 | 24 | bs = '128'; 25 | do = '0'; 26 | reg = '0.0001'; 27 | 28 | fig = figure; 29 | % Create axes 30 | ax = axes('Parent',fig); 31 | set(ax,'FontName','Times','FontSize',14,'GridLineStyle',':',... 32 | 'LineStyleOrderIndex',3,'XGrid','on','YGrid','on'); 33 | hold on 34 | for i = 1 : noFiles 35 | filename = subFiles{i,1}; 36 | net_name = filename(1:25); 37 | [param_log_name, param_log_value] = importfilecsv([net_name, '.csv']); 38 | learn_log = importdata(filename); 39 | 40 | [file_lr, ~ ] = parsecsv('initial_lr', param_log_name, param_log_value); 41 | [file_bs, ~ ] = parsecsv('batch_size', param_log_name, param_log_value); 42 | [file_optimizer, ~ ] = parsecsv('optimizer', param_log_name, param_log_value); 43 | [file_reg, ~ ] = parsecsv('l2_regulizer', param_log_name, param_log_value); 44 | [file_do, ~ ] = parsecsv('dropout', param_log_name, param_log_value); 45 | 46 | if(strcmp(bs, file_bs) && strcmp(reg, file_reg) && strcmp(do, file_do)) 47 | 48 | switch(file_lr{1,1}) 49 | case '0.01' 50 | linestyle = ':'; 51 | case '0.0001' 52 | linestyle = '-'; 53 | otherwise 54 | fprintf('Invalid!\n' ); 55 | end 56 | 57 | switch(file_optimizer{1,1}) 58 | case 'sgd' 59 | [~, param_loc] = intersect(learn_log.textdata,cellstr(diagram_type_train)); 60 | value = learn_log.data(:,param_loc); 61 | plot(value,'DisplayName',strcat('sgd, lr = ', file_lr{1,1}),... 62 | 'LineWidth',1.2,'LineStyle',linestyle, 'Color',[0.2 0.4 1]) 63 | hold on 64 | % [~, param_loc] = intersect(learn_log.textdata,cellstr(diagram_type_val)); 65 | % value = learn_log.data(:,param_loc); 66 | % plot(value,'DisplayName',strcat('validation loss: Lr = ', file_lr{1,1})) 67 | % hold on 68 | case 'rmsprop' 69 | [~, param_loc] = intersect(learn_log.textdata,cellstr(diagram_type_train)); 70 | value = learn_log.data(:,param_loc); 71 | plot(value,'DisplayName',strcat('rmsprop, lr = ', file_lr{1,1}), ... 72 | 'LineWidth',1.2,'LineStyle',linestyle, 'Color',[1 0.27 0.27]) 73 | hold on 74 | % [~, param_loc] = intersect(learn_log.textdata,cellstr(diagram_type_val)); 75 | % value = learn_log.data(:,param_loc); 76 | % plot(value,'DisplayName',strcat('validation loss: Lr = ', file_lr{1,1})) 77 | % hold on 78 | case 'adam' 79 | [~, param_loc] = intersect(learn_log.textdata,cellstr(diagram_type_train)); 80 | value = learn_log.data(:,param_loc); 81 | plot(value,'DisplayName',strcat('adam, lr = ', file_lr{1,1}),... 82 | 'LineWidth',1.2,'LineStyle',linestyle,'Color', [0 0.8 0.4]) 83 | hold on 84 | % [~, param_loc] = intersect(learn_log.textdata,cellstr(diagram_type_val)); 85 | % value = learn_log.data(:,param_loc); 86 | % plot(value,'DisplayName',strcat('validation loss: Lr = ', file_lr{1,1})) 87 | % hold on 88 | otherwise 89 | fprintf('Invalid!\n' ); 90 | end 91 | end 92 | end 93 | ylabel('Loss','FontName','Times','Interpreter','latex','FontSize',14); 94 | xlabel('Iteration (Epochs)','FontName','Times','Interpreter','latex','FontSize',14) 95 | ylim([0 0.7]) 96 | legend('show') 97 | box('on') 98 | hold off 99 | 100 | 101 | 102 | 103 | -------------------------------------------------------------------------------- /matlab postprocessing/makeFilter.m: -------------------------------------------------------------------------------- 1 | function [filter_Core] = makeFilter(Y_Core,filtering,value) 2 | %% Exclude data based o different criteria 3 | % nofilter : include all of the data 4 | % basic: exclude cores with more than 3mm distance to boundary and cores 5 | % have mismatche between axial and sagittal pathology 6 | % mrbased: basic filter + MR grade based exclusion 7 | % gsbased: basic filter + GS based exclusion 8 | % binary: sepration of cancer and benign. 9 | 10 | % Inputs: Y_ROI ,Y_Core : Labels and charectristics of each ROI/Core 11 | % filtering: string, based on following filters, i.e. 'basic' 12 | % value: default-0 for basic, nofilter, 13 | % mrbased-1(low),2(medium),3(high) 14 | % gsbased-0,6,71(GS 3+4),72(GS 4+3),8,9 15 | % binary: 0 Benign, 1 Cancerous 16 | % 17 | % Patient Info: 18 | % Column 1: Label 19 | % Column 2: Tumor in Core length 20 | % Column 3: MR Label 21 | % Column 4: Axial and Sagittal Match (1: Match 0: Mismatch) 22 | % Column 5: Gleason Score 23 | % Column 6: Distance to Boundary (mm) 24 | % Column 7: MR greatest size 25 | % Column 8: Sagittal GS 26 | % Column 9: Sample number 27 | % Column 10: ROI number/ Column 10: Sample Size 28 | 29 | % @ Code composed by Shekoofeh Azizi on 24/11/2015 (UBC-RCL) 30 | % @ Code modified by Shekoofeh Azizi on 01/06/2017 (UBC-RCL) 31 | 32 | %% 33 | if nargin < 3 34 | assert(~strcmp(filtering,'D2Bmrbased'),'Invalid value for MR grade!'); 35 | value = 0; 36 | end 37 | 38 | % set filtering conditions 39 | switch filtering 40 | case 'nofilter' 41 | % filter_ROI = 1:length(Y_ROI) ; 42 | filter_Core = 1:length(Y_Core) ; 43 | case 'D2' 44 | % filter_ROI = 1:length(Y_ROI) ; 45 | filter_Core = 1:length(Y_Core) ; 46 | case 'binary' 47 | % filter_ROI = find(Y_ROI(:,1)== value); 48 | filter_Core = find(Y_Core(:,1)== value); 49 | case 'D2B' 50 | % less than 3mm and mismatch 51 | % filter_ROI = find(Y_ROI(:,8) >= 2.99 & Y_ROI(:,6)== 1 ) ; 52 | filter_Core = find(Y_Core(:,6) >= 2.99 & Y_Core(:,4)== 1 ); 53 | case 'D3B' 54 | % less than 3mm and mismatch 55 | % filter_ROI = find(Y_ROI(:,8) >= 2.99 & Y_ROI(:,6)== 0 ) ; 56 | filter_Core = find(Y_Core(:,6) >= 2.99 & Y_Core(:,4)== 0 ); 57 | case 'D2A' 58 | % less than 3mm and mismatch 59 | % filter_ROI = find(Y_ROI(:,8) >= 2.99 ) ; 60 | filter_Core = find(Y_Core(:,6) >= 2.99 ); 61 | case 'D3' 62 | % Just less than 3mm 63 | % filter_ROI = find(Y_ROI(:,8) < 3.00 ) ; 64 | filter_Core = find(Y_Core(:,6) < 3.00 ); 65 | case 'D2C' 66 | % Include match cores 67 | % filter_ROI = find(Y_ROI(:,6)== 1) ; 68 | filter_Core = find(Y_Core(:,4)== 1); 69 | case 'D2Cmrlen' 70 | % Include match cores 71 | % filter_ROI = find(Y_ROI(:,6)== 1 & Y_ROI(:,13) >= 2) ; 72 | filter_Core = find(Y_Core(:,4)== 1 & Y_Core(:,13) >= 2); 73 | case 'D2M' 74 | % Include just mis-match cores 75 | % filter_ROI = find(Y_ROI(:,6)== 0) ; 76 | filter_Core = find(Y_Core(:,4)== 0); 77 | case 'D2mrbased' 78 | % less than 3mm and mismatch + MR level filtering 79 | % filter_ROI = find(Y_ROI(:,5)== value) ; 80 | filter_Core = find(Y_Core(:,3)==value); 81 | case 'D2Bmrbased' 82 | % less than 3mm and mismatch + MR level filtering 83 | % filter_ROI = find(Y_ROI(:,8) >= 2.99 & Y_ROI(:,6)== 1 & Y_ROI(:,5)== value) ; 84 | filter_Core = find(Y_Core(:,6) >= 2.99 & Y_Core(:,4)== 1 & Y_Core(:,3)==value); 85 | case 'D2Amrbased' 86 | % less than 3mm and mismatch + MR level filtering 87 | % filter_ROI = find(Y_ROI(:,8) >= 2.99 & Y_ROI(:,5)== value) ; 88 | filter_Core = find(Y_Core(:,6) >= 2.99 & Y_Core(:,3)==value); 89 | case 'D2Cmrbased' 90 | % less than 3mm and mismatch + MR level filtering 91 | % filter_ROI = find(Y_ROI(:,6)== 1 & Y_ROI(:,5)== value) ; 92 | filter_Core = find(Y_Core(:,4)== 1 & Y_Core(:,3)==value); 93 | case 'gsbased' 94 | % less than 3mm and mismatch + Gleason filtering 95 | % filter_ROI = find(Y_ROI(:,8) >= 2.99 & Y_ROI(:,6)== 1 & Y_ROI(:,7)== value) ; 96 | filter_Core = find(Y_Core(:,6) >= 2.99 & Y_Core(:,4)== 1 & Y_Core(:,5)== value); 97 | case 'D2Cgsbased' 98 | % less than 3mm and mismatch + Gleason filtering 99 | % filter_ROI = find(Y_ROI(:,8) >= 2.99 & Y_ROI(:,6)== 1 & Y_ROI(:,7)== value) ; 100 | filter_Core = find(Y_Core(:,4)== 1 & Y_Core(:,5)== value); 101 | case 'Lenbased' 102 | filter_Core = find(Y_Core(:,4) >= value & Y_Core(:,4)== 1); 103 | end 104 | 105 | end -------------------------------------------------------------------------------- /utils/data_loader.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from utils import settings as S 3 | import scipy.io as spio 4 | from numpy.random import permutation 5 | from sklearn.model_selection import train_test_split 6 | import h5py 7 | 8 | 9 | class DataLoader: 10 | def __init__(self, sl, validation_split=0.2, downsample=False, downsample_rate=2): 11 | self.sl = sl 12 | self.data_dir = S.intermediate_folder 13 | self.validation_split = validation_split 14 | self.downsample = downsample 15 | self.downsample_rate = downsample_rate 16 | 17 | @staticmethod 18 | def reshaper(data): 19 | # reshape input to be [samples, time steps, features] 20 | reshaped_data = np.reshape(data, (data.shape[0], data.shape[1], 1)) 21 | return reshaped_data 22 | 23 | def load_train_test(self, bmode=False): 24 | d_sliding = spio.loadmat(self.data_dir + '/' + 'D_Sliding.mat') 25 | d_fixed = spio.loadmat(self.data_dir + '/' + 'D_Fixed.mat') 26 | 27 | # Matrix reformatting to access the cells 28 | d_train = d_sliding['Ds_train'][0:, 0:self.sl] # B-mode+RF Train 29 | l_train = d_sliding['Ls_train'][0:, 0] # B-mode+RF Train Labels 30 | d_test = d_fixed['Df_test'][0:, 0:self.sl] # B-mode+RF Test 31 | l_test = d_fixed['Lf_test'][0:, 0] # b-mode+RF Test Labels 32 | 33 | # Select the first half including only Bmode data [Bmode Data, RF Data] 34 | d_train_r = d_train[d_train.shape[0] / 2:, 0:self.sl] # RF Train 35 | l_train_r = l_train[l_train.shape[0] / 2:, ] # RF Train Labels 36 | d_test_r = d_test[d_test.shape[0] / 2:, 0:self.sl] # RF Test 37 | l_test_r = l_test[l_test.shape[0] / 2:, ] # RF Test Labels 38 | train_seq = d_train_r 39 | test_seq = d_test_r 40 | train_label = l_train_r 41 | test_label = l_test_r 42 | 43 | if bmode: 44 | d_train_b = d_train[0:d_train.shape[0] / 2, 0:self.sl] # B-mode Train 45 | l_train_b = l_train[0:l_train.shape[0] / 2, ] # B-mode Train Labels 46 | d_test_b = d_test[0:d_test.shape[0] / 2, 0:self.sl] # B-mode Test 47 | l_test_b = l_test[0:l_test.shape[0] / 2, ] # B-mode Test Labels 48 | train_seq = d_train_b 49 | test_seq = d_test_b 50 | train_label = l_train_b 51 | test_label = l_test_b 52 | 53 | if self.downsample: 54 | idx = np.floor(np.linspace(start=0, stop=self.sl-1, num=(self.sl/self.downsample_rate))) 55 | train_seq = train_seq[:, idx.astype(int)] 56 | test_seq = test_seq[:, idx.astype(int)] 57 | 58 | train_seq = self.reshaper(train_seq) 59 | test_seq = self.reshaper(test_seq) 60 | 61 | return train_seq, train_label, test_seq, test_label 62 | 63 | def load_data(self, bmode=False): 64 | train_data, train_label, test_seq, test_label = self.load_train_test(bmode) 65 | train_seq, train_label, validation_seq, validation_label = self.split_data(train_data, train_label) 66 | return train_seq, train_label, validation_seq, validation_label, test_seq, test_label 67 | 68 | def split_data(self, train_data, train_label): 69 | 70 | perm_idx = permutation(train_data.shape[0]) 71 | train_data_perm = train_data[perm_idx, :] 72 | train_label_perm = train_label[perm_idx, ] 73 | 74 | validation_idx = int(round(self.validation_split * train_data.shape[0],0)) 75 | 76 | train_seq = train_data_perm[validation_idx:, :] 77 | train_label = train_label_perm[validation_idx:, ] 78 | validation_seq = train_data_perm[0:validation_idx, :] 79 | validation_label = train_label_perm[0:validation_idx, ] 80 | 81 | return train_seq, train_label, validation_seq, validation_label 82 | 83 | def load_test(self, bmode=False): 84 | _, _, test_seq, test_label = self.load_train_test(bmode) 85 | return test_seq, test_label 86 | 87 | def load_data_split(self, bmode=False): # shuffle and do the division based on the split size 88 | train_data, train_label, test_seq, test_label = self.load_train_test(bmode) 89 | data_seq = np.concatenate([train_data, test_seq]) 90 | data_label = np.concatenate([train_label, test_label]) 91 | train_data, test_seq, train_label, test_label = train_test_split(data_seq, data_label, test_size=0.2, 92 | random_state=40) 93 | if bmode: 94 | train_data = np.concatenate([train_data, test_seq]) 95 | train_label = np.concatenate([train_label, test_label]) 96 | 97 | train_seq, train_label, validation_seq, validation_label = self.split_data(train_data, train_label) 98 | return train_seq, train_label, validation_seq, validation_label, test_seq, test_label 99 | 100 | def load_whole_test(self, bmode=False): 101 | if bmode: 102 | d_test = h5py.File(self.data_dir + '/' + 'D_Whole_Bmode.mat') 103 | d_test = d_test['Dw_bmode'].value 104 | else: 105 | d_test = h5py.File(self.data_dir + '/' + 'D_Whole_RF.mat') 106 | d_test = d_test['Dw_rf'].value 107 | 108 | test_seq = d_test[0:, 0:self.sl] 109 | test_seq = self.reshaper(test_seq) 110 | 111 | return test_seq 112 | 113 | -------------------------------------------------------------------------------- /experiments/trainmodel.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import time 3 | # import os 4 | from pprint import pprint as p 5 | module_root = '..' 6 | sys.path.append(module_root) 7 | p(sys.path) 8 | # os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1' 9 | # os.environ["CUDA_VISIBLE_DEVICES"] = "1" 10 | 11 | from utils.data_loader import DataLoader 12 | from deepNetworks.model import RNNModel 13 | from deepNetworks.netArch import DeepNetArch1, DeepNetArch2, DeepNetArch3, DeepNetArch1L1, DeepNetArch2L1, DeepNetArch3L1 14 | from keras import backend as k 15 | # import tensorflow as tf 16 | 17 | # config = tf.ConfigProto(allow_soft_placement=True) 18 | # config.gpu_options.allow_growth = True 19 | # session = tf.Session(config=config) 20 | # k.set_session(session) 21 | 22 | if __name__ == '__main__': 23 | log_dir = 'DeepNetArch3' 24 | early_stopping = False 25 | sl = 100 26 | validation_split = 0.2 27 | n_epoch = 100 28 | batch_sizes = [64, 128] 29 | initial_lrs = [1e-2, 1e-4] 30 | l2_regulizers = [0.0001, 0.0002] 31 | dropouts = [0, 0.4] 32 | rec_dropouts = [0] 33 | optimizers = ['sgd', 'rmsprop', 'adam'] 34 | 35 | grid_size = len(batch_sizes) * len(initial_lrs) * len(l2_regulizers) * len(dropouts) * len(rec_dropouts) * len( 36 | optimizers) 37 | i = 1 38 | model_number = 40 39 | for batch_size in batch_sizes: 40 | for initial_lr in initial_lrs: 41 | for l2_regulizer in l2_regulizers: 42 | for dropout in dropouts: 43 | for rec_dropout in rec_dropouts: 44 | for optimizer in optimizers: 45 | 46 | if i < model_number: 47 | i += 1 48 | continue 49 | 50 | print('-' * 50) 51 | print('-' * 50) 52 | print('-' * 50) 53 | print('batchsize:{}, initial_lr:{}, l2_regulizer:{}, dropout:{}, rec_dropout:{},' 54 | ' optimizer:{} '.format( 55 | batch_size, initial_lr, l2_regulizer, dropout, rec_dropout, optimizer)) 56 | print("experiment {} of total {}".format(i, grid_size)) 57 | ds = DataLoader(sl=sl, validation_split=validation_split) 58 | 59 | if log_dir == 'DeepNetArch1': 60 | arch = DeepNetArch1(sl=sl, initial_lr=initial_lr, l2_reg=l2_regulizer, dropout=dropout, 61 | rec_dropout=rec_dropout, optimizer=optimizer, summary=1) 62 | if log_dir == 'DeepNetArch2': 63 | arch = DeepNetArch2(sl=sl, initial_lr=initial_lr, l2_reg=l2_regulizer, dropout=dropout, 64 | rec_dropout=rec_dropout, optimizer=optimizer, summary=1) 65 | if log_dir == 'DeepNetArch3': 66 | arch = DeepNetArch3(sl=sl, initial_lr=initial_lr, l2_reg=l2_regulizer, dropout=dropout, 67 | rec_dropout=rec_dropout, optimizer=optimizer, summary=1) 68 | if log_dir == 'DeepNetArch1L1': 69 | arch = DeepNetArch1L1(sl=sl, initial_lr=initial_lr, l2_reg=l2_regulizer, 70 | dropout=dropout, rec_dropout=rec_dropout, optimizer=optimizer, 71 | summary=1) 72 | if log_dir == 'DeepNetArch2L1': 73 | arch = DeepNetArch2L1(sl=sl, initial_lr=initial_lr, l2_reg=l2_regulizer, 74 | dropout=dropout, rec_dropout=rec_dropout, optimizer=optimizer, 75 | summary=1) 76 | if log_dir == 'DeepNetArch3L1': 77 | arch = DeepNetArch3L1(sl=sl, initial_lr=initial_lr, l2_reg=l2_regulizer, 78 | dropout=dropout, rec_dropout=rec_dropout, optimizer=optimizer, 79 | summary=1) 80 | 81 | model, model_id = arch.arch_generator() 82 | 83 | params = dict() 84 | params['batch_size'] = batch_size 85 | params['initial_lr'] = initial_lr 86 | params['l2_regulizer'] = l2_regulizer 87 | params['dropout'] = dropout 88 | params['rec_dropout'] = rec_dropout 89 | params['n_epoch'] = n_epoch 90 | params['sl'] = sl 91 | params['optimizer'] = optimizer 92 | 93 | rnn_model = RNNModel(ds, model, params, log_dir=log_dir, division=True) 94 | uid = time.strftime("%Y_%m_%d_%H_%M_%S_") + model_id 95 | print('-' * 50) 96 | print('UID: {}'.format(uid)) 97 | print('-' * 50) 98 | 99 | rnn_model.train(uid=uid, batch_size=batch_size, es=early_stopping, nb_epoch=n_epoch, 100 | verbose=2) 101 | k.clear_session() 102 | i += 1 103 | -------------------------------------------------------------------------------- /deepNetworks/netArch.py: -------------------------------------------------------------------------------- 1 | from keras.layers import LSTM, GRU, SimpleRNN, Dense 2 | from keras.models import Sequential 3 | from keras.regularizers import l2 4 | 5 | 6 | class DeepNetArch1: # 2 Layers LSTM + Dense 7 | def __init__(self, sl, initial_lr, l2_reg, dropout, rec_dropout, optimizer, summary): 8 | self.sl = sl 9 | self.summary = summary 10 | self.l2_reg = l2(l2_reg) 11 | self.dropout = dropout 12 | self.rec_dropout = rec_dropout 13 | self.initial_lr = initial_lr 14 | self.optimizer = optimizer 15 | 16 | def arch_generator(self): 17 | model_name = "arch1" 18 | model = Sequential() 19 | model.add(LSTM(units=self.sl, return_sequences=True, dropout=self.dropout, recurrent_dropout=self.rec_dropout, 20 | input_shape=(self.sl, 1), stateful=False)) 21 | model.add(LSTM(units=self.sl, dropout=self.dropout, recurrent_dropout=self.rec_dropout, return_sequences=False)) 22 | model.add(Dense(1, activation="sigmoid", kernel_initializer="he_normal", kernel_regularizer=self.l2_reg)) 23 | model.compile(loss='binary_crossentropy', optimizer=self.optimizer, metrics=['accuracy']) 24 | if self.summary: 25 | print(model.summary()) 26 | return model, model_name 27 | 28 | 29 | class DeepNetArch2: # 2 Layers GRU + Dense 30 | def __init__(self, sl, initial_lr, l2_reg, dropout, rec_dropout, optimizer, summary): 31 | self.sl = sl 32 | self.summary = summary 33 | self.l2_reg = l2(l2_reg) 34 | self.dropout = dropout 35 | self.rec_dropout = rec_dropout 36 | self.initial_lr = initial_lr 37 | self.optimizer = optimizer 38 | 39 | def arch_generator(self): 40 | model_name = "arch2" 41 | model = Sequential() 42 | model.add(GRU(units=self.sl, return_sequences=True, dropout=self.dropout, recurrent_dropout=self.rec_dropout, 43 | input_shape=(self.sl, 1), stateful=False)) 44 | model.add(GRU(units=self.sl, dropout=self.dropout, recurrent_dropout=self.rec_dropout, return_sequences=False)) 45 | model.add(Dense(1, activation="sigmoid", kernel_initializer="he_normal", kernel_regularizer=self.l2_reg)) 46 | model.compile(loss='binary_crossentropy', optimizer=self.optimizer, metrics=['accuracy']) 47 | if self.summary: 48 | print(model.summary()) 49 | return model, model_name 50 | 51 | 52 | class DeepNetArch3: # 2 Layers RNN + Dense 53 | def __init__(self, sl, initial_lr, l2_reg, dropout, rec_dropout, optimizer, summary): 54 | self.sl = sl 55 | self.summary = summary 56 | self.l2_reg = l2(l2_reg) 57 | self.dropout = dropout 58 | self.rec_dropout = rec_dropout 59 | self.initial_lr = initial_lr 60 | self.optimizer = optimizer 61 | 62 | def arch_generator(self): 63 | model_name = "arch3" 64 | model = Sequential() 65 | model.add(SimpleRNN(units=self.sl, return_sequences=True, dropout=self.dropout, 66 | recurrent_dropout=self.rec_dropout, 67 | input_shape=(self.sl, 1), stateful=False)) 68 | model.add(SimpleRNN(units=self.sl, dropout=self.dropout, recurrent_dropout=self.rec_dropout, return_sequences=False)) 69 | model.add(Dense(1, activation="sigmoid", kernel_initializer="he_normal", kernel_regularizer=self.l2_reg)) 70 | model.compile(loss='binary_crossentropy', optimizer=self.optimizer, metrics=['accuracy']) 71 | if self.summary: 72 | print(model.summary()) 73 | return model, model_name 74 | 75 | 76 | class DeepNetArch1L1: # 1 Layers LSTM + Dense 77 | def __init__(self, sl, initial_lr, l2_reg, dropout, rec_dropout, optimizer, summary): 78 | self.sl = sl 79 | self.summary = summary 80 | self.l2_reg = l2(l2_reg) 81 | self.dropout = dropout 82 | self.rec_dropout = rec_dropout 83 | self.initial_lr = initial_lr 84 | self.optimizer = optimizer 85 | 86 | def arch_generator(self): 87 | model_name = "arch1l1" 88 | model = Sequential() 89 | model.add(LSTM(units=self.sl, return_sequences=False, dropout=self.dropout, recurrent_dropout=self.rec_dropout, 90 | input_shape=(self.sl, 1), stateful=False)) 91 | model.add(Dense(1, activation="sigmoid", kernel_initializer="he_normal", kernel_regularizer=self.l2_reg)) 92 | model.compile(loss='binary_crossentropy', optimizer=self.optimizer, metrics=['accuracy']) 93 | if self.summary: 94 | print(model.summary()) 95 | return model, model_name 96 | 97 | 98 | class DeepNetArch2L1: # 1 Layers GRU + Dense 99 | def __init__(self, sl, initial_lr, l2_reg, dropout, rec_dropout, optimizer, summary): 100 | self.sl = sl 101 | self.summary = summary 102 | self.l2_reg = l2(l2_reg) 103 | self.dropout = dropout 104 | self.rec_dropout = rec_dropout 105 | self.initial_lr = initial_lr 106 | self.optimizer = optimizer 107 | 108 | def arch_generator(self): 109 | model_name = "arch2l1" 110 | model = Sequential() 111 | model.add(GRU(units=self.sl, return_sequences=False, dropout=self.dropout, recurrent_dropout=self.rec_dropout, 112 | input_shape=(self.sl, 1), stateful=False)) 113 | model.add(Dense(1, activation="sigmoid", kernel_initializer="he_normal", kernel_regularizer=self.l2_reg)) 114 | model.compile(loss='binary_crossentropy', optimizer=self.optimizer, metrics=['accuracy']) 115 | if self.summary: 116 | print(model.summary()) 117 | return model, model_name 118 | 119 | 120 | class DeepNetArch3L1: # 1 Layers RNN + Dense 121 | def __init__(self, sl, initial_lr, l2_reg, dropout, rec_dropout, optimizer, summary): 122 | self.sl = sl 123 | self.summary = summary 124 | self.l2_reg = l2(l2_reg) 125 | self.dropout = dropout 126 | self.rec_dropout = rec_dropout 127 | self.initial_lr = initial_lr 128 | self.optimizer = optimizer 129 | 130 | def arch_generator(self): 131 | model_name = "arch3l1" 132 | model = Sequential() 133 | model.add(SimpleRNN(units=self.sl, return_sequences=False, dropout=self.dropout, 134 | recurrent_dropout=self.rec_dropout, 135 | input_shape=(self.sl, 1), stateful=False)) 136 | model.add(Dense(1, activation="sigmoid", kernel_initializer="he_normal", kernel_regularizer=self.l2_reg)) 137 | model.compile(loss='binary_crossentropy', optimizer=self.optimizer, metrics=['accuracy']) 138 | if self.summary: 139 | print(model.summary()) 140 | return model, model_name 141 | 142 | -------------------------------------------------------------------------------- /deepNetworks/model.py: -------------------------------------------------------------------------------- 1 | from keras.callbacks import CSVLogger, Callback, ReduceLROnPlateau, EarlyStopping, ModelCheckpoint 2 | 3 | module_root = '..' 4 | import sys 5 | import os 6 | from utils import settings as s 7 | import json 8 | import csv 9 | import numpy as np 10 | # import h5py 11 | 12 | sys.path.append(module_root) 13 | from sklearn.metrics import roc_auc_score 14 | 15 | 16 | class AUCHistory(Callback): 17 | def __init__(self, validation_data): 18 | self.validation_d = validation_data[0] 19 | self.validation_l = validation_data[1] 20 | 21 | def on_epoch_end(self, epoch, logs={}): 22 | y_pred = self.model.predict(self.validation_d) 23 | # print(np.concatenate((y_pred, self.validation_l), axis=1)) 24 | logs['val_auc'] = roc_auc_score(self.validation_l, y_pred) 25 | print("- AUC: {0:0.2f}".format(logs['val_auc'])) 26 | 27 | 28 | class RNNModel: 29 | def __init__(self, data, model, params, log_dir, division=False, bmode=False): 30 | self.ds = data 31 | self.model = model 32 | self.log_dir = log_dir 33 | self.training_params_dict = params 34 | self.type = type 35 | self.bmode = bmode 36 | if not division: 37 | self.train_seq, self.train_label, self.validation_seq, self.validation_label, self.test_data,\ 38 | self.test_label = self.ds.load_data(bmode=self.bmode) 39 | if division: 40 | self.train_seq, self.train_label, self.validation_seq, self.validation_label, self.test_data,\ 41 | self.test_label = self.ds.load_data_split(bmode=self.bmode) 42 | 43 | def train(self, uid, batch_size, es, nb_epoch, verbose): 44 | print('-' * 30) 45 | print('Fitting model...') 46 | print('-' * 30) 47 | callbacks_list = [] 48 | logs_dir = os.path.join(s.intermediate_folder, 'logs', self.log_dir) 49 | if not os.path.isdir(logs_dir): 50 | os.mkdir(logs_dir) 51 | 52 | model_json = self.model.to_json() 53 | model_log_dir = os.path.join(logs_dir, 'model_logs') 54 | if not os.path.isdir(model_log_dir): 55 | os.mkdir(model_log_dir) 56 | 57 | with open(os.path.join(model_log_dir, uid + '.json'), 'w') as outfile: 58 | json.dump(model_json, outfile) 59 | 60 | train_log_dir = os.path.join(logs_dir, 'train_logs') 61 | if not os.path.isdir(train_log_dir): 62 | os.mkdir(train_log_dir) 63 | 64 | with open(os.path.join(train_log_dir, uid + '.csv'), 'w') as csv_file: 65 | writer = csv.writer(csv_file) 66 | for key, value in self.training_params_dict.items(): 67 | writer.writerow([key, value]) 68 | 69 | validation_data = (self.validation_seq, self.validation_label[:]) 70 | 71 | reduce_lr = ReduceLROnPlateau(monitor='val_acc', factor=0.9, 72 | patience=15, min_lr=1e-9, 73 | epsilon=0.001, verbose=1) 74 | 75 | model_checkpoint_dir = os.path.join(s.intermediate_folder, 'model_checkpoints') 76 | if not os.path.exists(model_checkpoint_dir): 77 | os.mkdir(model_checkpoint_dir) 78 | 79 | model_checkpoint = ModelCheckpoint(os.path.join(model_checkpoint_dir, uid + '.hdf5'), 80 | monitor='val_acc', save_best_only=True) 81 | callbacks_list.append(model_checkpoint) 82 | history = AUCHistory(validation_data) 83 | callbacks_list.append(history) 84 | callbacks_list.append(reduce_lr) 85 | if es: 86 | es = EarlyStopping(monitor='val_loss', min_delta=1e-4, patience=2, verbose=1) 87 | callbacks_list.append(es) 88 | 89 | csv_log_dir = os.path.join(logs_dir, 'csv_logs') 90 | if not os.path.isdir(csv_log_dir): 91 | os.mkdir(csv_log_dir) 92 | csv_logger = CSVLogger(os.path.join(csv_log_dir, uid + '.log')) 93 | callbacks_list.append(csv_logger) 94 | 95 | callbacks_list.append(reduce_lr) 96 | 97 | self.model.fit(self.train_seq, self.train_label, batch_size=batch_size, epochs=nb_epoch, verbose=verbose, 98 | shuffle=True, callbacks=callbacks_list, validation_data=validation_data) 99 | 100 | def predict_test(self, uid): 101 | test_seq, test_labels = self.ds.load_test(bmode=self.bmode) 102 | model_checkpoint_dir = os.path.join(s.intermediate_folder, 'model_checkpoints/opt') 103 | model_checkpoint_file = os.path.join(model_checkpoint_dir, uid + '.hdf5') 104 | self.model.load_weights(model_checkpoint_file) 105 | test_predictions = self.model.predict(test_seq, verbose=1) 106 | test_auc = roc_auc_score(test_labels, test_predictions) 107 | print(["Test AUC: ", test_auc]) 108 | return test_predictions 109 | 110 | def opt_model_train(self, uid, batch_size, es, nb_epoch, verbose): 111 | 112 | # Re-define local train (train + validation) and test sequence 113 | # train_seq, train_label, test_seq, test_label = self.ds.load_train_test(bmode=self.bmode) 114 | train_seq = np.concatenate((self.train_seq, self.validation_seq)) 115 | train_label = np.concatenate((self.train_label, self.validation_label)) 116 | test_seq = self.test_data 117 | test_label = self.test_label 118 | print('-' * 30) 119 | print('Fitting optimum model ...') 120 | print('-' * 30) 121 | callbacks_list = [] 122 | logs_dir = os.path.join(s.intermediate_folder, 'logs', self.log_dir) 123 | if not os.path.isdir(logs_dir): 124 | os.mkdir(logs_dir) 125 | 126 | model_json = self.model.to_json() 127 | model_log_dir = os.path.join(logs_dir, 'model_logs') 128 | if not os.path.isdir(model_log_dir): 129 | os.mkdir(model_log_dir) 130 | 131 | with open(os.path.join(model_log_dir, uid + '.json'), 'w') as outfile: 132 | json.dump(model_json, outfile) 133 | 134 | train_log_dir = os.path.join(logs_dir, 'train_logs') 135 | if not os.path.isdir(train_log_dir): 136 | os.mkdir(train_log_dir) 137 | 138 | with open(os.path.join(train_log_dir, uid + '.csv'), 'w') as csv_file: 139 | writer = csv.writer(csv_file) 140 | for key, value in self.training_params_dict.items(): 141 | writer.writerow([key, value]) 142 | 143 | test_data = (test_seq, test_label[:]) 144 | 145 | reduce_lr = ReduceLROnPlateau(monitor='val_acc', factor=0.9, 146 | patience=15, min_lr=1e-9, 147 | epsilon=0.001, verbose=1) 148 | 149 | model_checkpoint_dir = os.path.join(s.intermediate_folder, 'model_checkpoints/opt') 150 | if not os.path.exists(model_checkpoint_dir): 151 | os.mkdir(model_checkpoint_dir) 152 | 153 | model_checkpoint = ModelCheckpoint(os.path.join(model_checkpoint_dir, uid + '.hdf5'), 154 | monitor='val_acc', save_best_only=True) 155 | callbacks_list.append(model_checkpoint) 156 | history = AUCHistory(test_data) 157 | callbacks_list.append(history) 158 | callbacks_list.append(reduce_lr) 159 | 160 | if es: 161 | es = EarlyStopping(monitor='val_loss', min_delta=1e-4, patience=2, verbose=1) 162 | callbacks_list.append(es) 163 | 164 | csv_log_dir = os.path.join(logs_dir, 'csv_logs') 165 | if not os.path.isdir(csv_log_dir): 166 | os.mkdir(csv_log_dir) 167 | csv_logger = CSVLogger(os.path.join(csv_log_dir, uid + '.log')) 168 | callbacks_list.append(csv_logger) 169 | 170 | callbacks_list.append(reduce_lr) 171 | 172 | self.model.fit(train_seq, train_label, batch_size=batch_size, epochs=nb_epoch, verbose=verbose, 173 | shuffle=True, callbacks=callbacks_list, validation_data=test_data) 174 | -------------------------------------------------------------------------------- /utils/history.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import json 3 | import sys 4 | import os 5 | import numpy as np 6 | import pandas as pd 7 | import matplotlib.pyplot as plt 8 | from IPython.display import display, SVG 9 | from keras.models import model_from_json 10 | from keras.utils.vis_utils import model_to_dot 11 | 12 | module_root = '../..' 13 | sys.path.append(module_root) 14 | from utils import settings as s 15 | 16 | 17 | class History: 18 | def __init__(self, logs_dir): 19 | self.logs_folder = os.path.join(s.intermediate_folder, 'logs', logs_dir) 20 | self.csv_logs_folder = os.path.join(self.logs_folder, 'csv_logs') 21 | self.model_logs_folder = os.path.join(self.logs_folder, 'model_logs') 22 | self.train_logs_folder = os.path.join(self.logs_folder, 'train_logs') 23 | 24 | def plot_results(self, train, validation, params, model_visualization, loss, acc, auc, min_acc): 25 | csv_name_sorted = sorted(glob.glob(self.csv_logs_folder + '/*.log')) 26 | csv_train_sorted = sorted(glob.glob(self.train_logs_folder + '/*.csv')) 27 | uid_sample = os.path.split(csv_name_sorted[1])[1].split('.')[0] 28 | final = pd.DataFrame( 29 | index=pd.read_csv(list(filter(lambda x: uid_sample in x, csv_train_sorted))[0], header=None)[0].tolist()) 30 | final.index.name = None 31 | plt.figure(figsize=(8, 6)) 32 | for csv_file in csv_name_sorted: 33 | if os.path.getsize(csv_file) > 0: 34 | data = pd.read_csv(csv_file) 35 | uid = os.path.split(csv_file)[1].split('.')[0] 36 | 37 | if np.amax(data['val_acc']) > min_acc: 38 | if validation: 39 | if loss: 40 | plt.plot(data['epoch'], data['val_loss'], label=uid + ' Val Loss') 41 | if acc: 42 | plt.plot(data['epoch'], data['val_acc'], label=uid + ' Val Acc') 43 | if auc: 44 | plt.plot(data['epoch'], data['val_auc'], label=uid + ' Val AUC') 45 | if train: 46 | if loss: 47 | plt.plot(data['epoch'], data['loss'], label=uid + ' Train Loss') 48 | if acc: 49 | plt.plot(data['epoch'], data['acc'], label=uid + ' Train Acc') 50 | 51 | if params: 52 | print(uid) 53 | train_csv = pd.read_csv(list(filter(lambda x: uid in x, csv_train_sorted))[0], header=None) 54 | train_csv.columns = ['parameters', uid[8:25]] 55 | train_csv.set_index('parameters', inplace=True) 56 | final = final.join(train_csv) 57 | print("*" * 100) 58 | print("*" * 100) 59 | if model_visualization: 60 | model_log = glob.glob(self.model_logs_folder + '/' + uid + '*.json')[0] 61 | with open(model_log) as model_file: 62 | json_string = json.load(model_file) 63 | model = model_from_json(json_string) 64 | print(uid) 65 | # print(model.summary()) 66 | print("*" * 100) 67 | dot = model_to_dot(model).create(prog='dot', format='svg') 68 | return SVG(dot) 69 | plt.ylabel('Accuracy') 70 | plt.ylim([0, 1]) 71 | plt.xlabel('Epoch') 72 | plt.legend() 73 | # plt.close() 74 | plt.savefig(self.logs_folder + '/out.pdf', transparent=True) 75 | if params: 76 | display(final.drop(['data_ID', 'data_id'], axis=0)) 77 | return plt 78 | 79 | def find_opt_model(self, loss, acc, auc): 80 | csv_name_sorted = sorted(glob.glob(self.csv_logs_folder + '/*.log')) 81 | csv_train_sorted = sorted(glob.glob(self.train_logs_folder + '/*.csv')) 82 | uid_sample = os.path.split(csv_name_sorted[1])[1].split('.')[0] 83 | final = pd.DataFrame( 84 | index=pd.read_csv(list(filter(lambda x: uid_sample in x, csv_train_sorted))[0], header=None)[0].tolist()) 85 | final.index.name = None 86 | validation_results = [] 87 | train_uids = [] 88 | for csv_file in csv_name_sorted: 89 | if os.path.getsize(csv_file) > 0: 90 | data = pd.read_csv(csv_file) 91 | uid = os.path.split(csv_file)[1].split('.')[0] 92 | 93 | train_uids.append(uid) 94 | if auc: 95 | validation_results.append(np.amax(data['val_auc'])) 96 | if loss: 97 | validation_results.append(np.amin(data['val_loss'])) 98 | if acc: 99 | validation_results.append(np.amax(data['val_acc'])) 100 | if loss: 101 | opt_model_uid = train_uids[validation_results.index(min(validation_results))] 102 | else: 103 | opt_model_uid = train_uids[validation_results.index(max(validation_results))] 104 | print("Optimum Model ID: ", opt_model_uid) 105 | print("Optimum Training Value: ", max(validation_results)) 106 | train_csv = pd.read_csv(list(filter(lambda x: opt_model_uid in x, csv_train_sorted))[0], header=None) 107 | train_csv.columns = ['parameters', opt_model_uid[8:25]] 108 | train_csv.set_index('parameters', inplace=True) 109 | final = final.join(train_csv) 110 | opt_model = final 111 | print("Optimum Params:") 112 | print(final) 113 | print("*" * 100) 114 | print("*" * 100) 115 | 116 | opt_model_axes = opt_model.axes 117 | opt_model_rows = opt_model_axes[0] 118 | opt_model_cols = opt_model_axes[1].values 119 | 120 | opt_params = dict() 121 | 122 | for rows in opt_model_rows: 123 | for cols in opt_model_cols: 124 | opt_params[rows] = opt_model.get_value(rows, cols) 125 | 126 | return opt_params, opt_model_uid 127 | 128 | def plot_learning_curve(self, model_id, acc, auc, loss): 129 | csv_name_sorted = sorted(glob.glob(self.csv_logs_folder + '/*.log')) 130 | for csv_file in csv_name_sorted: 131 | uid = os.path.split(csv_file)[1].split('.')[0] 132 | if uid == model_id: 133 | data = pd.read_csv(csv_file) 134 | if auc: 135 | train_results = data['auc'] 136 | validation_results = data['val_auc'] 137 | if acc: 138 | train_results = data['acc'] 139 | validation_results = data['val_acc'] 140 | if loss: 141 | train_results = data['loss'] 142 | validation_results = data['val_loss'] 143 | break 144 | 145 | plt.figure() 146 | plt.title("Learning curve of " + model_id) 147 | 148 | plt.xlabel("Epoch Number") 149 | plt.ylabel("Accuracy") 150 | 151 | train_sizes = range(1, train_results.shape[0] + 1, 1) 152 | train_scores_mean = np.mean(train_results) 153 | train_scores_std = np.std(train_results) 154 | validation_scores_mean = np.mean(validation_results) 155 | validation_scores_std = np.std(validation_results) 156 | plt.grid() 157 | 158 | plt.fill_between(train_sizes, train_results - train_scores_std, 159 | train_scores_mean + train_scores_std, alpha=0.1, 160 | color="r") 161 | plt.fill_between(train_sizes, validation_results - validation_scores_std, 162 | validation_scores_mean + validation_scores_std, alpha=0.1, color="g") 163 | 164 | plt.plot(train_sizes, train_results, 'o-', color="r", 165 | label="Training score") 166 | plt.plot(train_sizes, validation_results, 'o-', color="g", 167 | label="Cross-validation score") 168 | 169 | plt.legend(loc="best") 170 | return plt 171 | 172 | def filtered_learning_curve(self, train, validation, params, loss, acc, auc): 173 | csv_name_sorted = sorted(glob.glob(self.csv_logs_folder + '/*.log')) 174 | csv_train_sorted = sorted(glob.glob(self.train_logs_folder + '/*.csv')) 175 | uid_sample = os.path.split(csv_name_sorted[1])[1].split('.')[0] 176 | final = pd.DataFrame( 177 | index=pd.read_csv(list(filter(lambda x: uid_sample in x, csv_train_sorted))[0], header=None)[0].tolist()) 178 | final.index.name = None 179 | plt.figure(figsize=(8, 6)) 180 | for csv_file in csv_name_sorted: 181 | if os.path.getsize(csv_file) > 0: 182 | data = pd.read_csv(csv_file) 183 | uid = os.path.split(csv_file)[1].split('.')[0] 184 | 185 | train_csv = pd.read_csv(list(filter(lambda x: uid in x, csv_train_sorted))[0], header=None) 186 | train_csv.columns = ['parameters', uid[8:25]] 187 | train_csv.set_index('parameters', inplace=True) 188 | final = final.join(train_csv) 189 | current_model = final 190 | current_model_axes = current_model.axes 191 | current_model_rows = current_model_axes[0] 192 | current_model_cols = current_model_axes[1].values 193 | current_model_params = dict() 194 | for rows in current_model_rows: 195 | for cols in current_model_cols: 196 | current_model_params[rows] = current_model.get_value(rows, cols) 197 | 198 | if validation: 199 | if loss: 200 | plt.plot(data['epoch'], data['val_loss'], label=uid + ' Val Loss') 201 | if acc: 202 | plt.plot(data['epoch'], data['val_acc'], label=uid + ' Val Acc') 203 | if auc: 204 | plt.plot(data['epoch'], data['val_auc'], label=uid + ' Val AUC') 205 | if train: 206 | if loss: 207 | plt.plot(data['epoch'], data['loss'], label=uid + ' Train Loss') 208 | if acc: 209 | plt.plot(data['epoch'], data['acc'], label=uid + ' Train Acc') 210 | 211 | plt.ylabel('Accuracy') 212 | plt.ylim([0, 1]) 213 | plt.xlabel('Epoch') 214 | plt.legend() 215 | # plt.close() 216 | plt.savefig(self.logs_folder + '/out.pdf', transparent=True) 217 | if params: 218 | display(final.drop(['data_ID', 'data_id'], axis=0)) 219 | return plt -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 0. NO COMMERCIAL USE 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "{}" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright {yyyy} {name of copyright owner} 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | --------------------------------------------------------------------------------