├── utils
    ├── __init__.pyc
    ├── settings.py
    ├── data_loader.py
    └── history.py
├── deepNetworks
    ├── __init__.pyc
    ├── netArch.py
    └── model.py
├── matlab postprocessing
    ├── parsecsv.m
    ├── findStatResult.m
    ├── gradeCombination.m
    ├── dataSelection.m
    ├── makeFeatureFiles_wholeimage.m
    ├── setting.m
    ├── importfilecsv.m
    ├── makeFeatureFiles.m
    ├── makeFeatureFiles_Sliding.m
    ├── plotMRvsAccuracy.m
    ├── mapMaker.m
    ├── makeData.m
    ├── validation.m
    ├── learningCurves.m
    ├── learningAnalysis.m
    └── makeFilter.m
├── experiments
    ├── plothistory.py
    ├── mainTestOpt.py
    ├── mainTrainOpt.py
    └── trainmodel.py
├── README.md
└── LICENSE


/utils/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AziziShekoofeh/Time-series-Classification/HEAD/utils/__init__.pyc


--------------------------------------------------------------------------------
/deepNetworks/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AziziShekoofeh/Time-series-Classification/HEAD/deepNetworks/__init__.pyc


--------------------------------------------------------------------------------
/matlab postprocessing/parsecsv.m:
--------------------------------------------------------------------------------
1 | %% Define an internal function to parse the values and params
2 | 
3 | function [param_value,param_loc] = parsecsv(paramname, param_log_name, param_log_value)
4 | 
5 |     [~, param_loc] = intersect(param_log_name,cellstr(paramname));
6 |     param_value = param_log_value(param_loc, :);
7 |     
8 | end


--------------------------------------------------------------------------------
/experiments/plothistory.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | module_root = '..'
 4 | sys.path.append(module_root)
 5 | 
 6 | from utils.history import History
 7 | 
 8 | if __name__ == '__main__':
 9 |     logs_dir = 'DeepNetArch1-Div'
10 |     model_history = History(logs_dir)
11 | 
12 |     # model_history.plot_results(train=True, validation=False, params=False, model_visualization=False,
13 |     #                            loss=True, acc=False, auc=False, min_acc=0.4)
14 |     model_history.filtered_learning_curve(train=True, validation=False, params=False,
15 |                                           loss=True, acc=False, auc=False)
16 | 


--------------------------------------------------------------------------------
/utils/settings.py:
--------------------------------------------------------------------------------
 1 | import socket
 2 | 
 3 | if socket.gethostname() == 'purang23':
 4 |     project_folder = "e://tscRF_LSTM//Python//TeUS_RNN//TeUS_RNN//"
 5 |     intermediate_folder = project_folder + "Datasets"
 6 | 
 7 | if socket.gethostname() == 'minerva-VirtualBox':
 8 |     project_folder = "/media/sf_Host_Share/tscRF_LSTM/Python/TeUS_RNN/TeUS_RNN/"
 9 |     intermediate_folder = project_folder + "Datasets"
10 | 
11 | if socket.gethostname() == 'purang26':
12 |     project_folder = "/home/shekoofeh/Project/TeUS_RNN/TeUS_RNN/"
13 |     intermediate_folder = project_folder + "Datasets"
14 | 
15 | if socket.gethostname() == 'purang29':
16 |     project_folder = "/data/home/shekoofeh/TeUS_RNN/TeUS_RNN/"
17 |     intermediate_folder = project_folder + "Datasets"
18 | 


--------------------------------------------------------------------------------
/matlab postprocessing/findStatResult.m:
--------------------------------------------------------------------------------
 1 | function [TP, FP, TN, FN, sensitivity, specificity, misClassified] = findStatResult(Predicted_Label,Label)
 2 | 
 3 | 
 4 | % Predicted_Label = (Predicted_Label == 2 | Predicted_Label == 3);
 5 | % Label = (Label == 2 | Label == 3);
 6 | 
 7 | TP = sum(((Predicted_Label == Label) & (Predicted_Label == 1)));
 8 | FP = sum((Predicted_Label ~= Label) & (Predicted_Label == 1));
 9 | TN = sum((Predicted_Label == Label) & (Predicted_Label == 0));
10 | FN = sum((Predicted_Label ~= Label) & (Predicted_Label == 0));
11 | 
12 | find((Predicted_Label ~= Label) & (Predicted_Label == 1));
13 | find((Predicted_Label ~= Label) & (Predicted_Label == 0));
14 | 
15 | sensitivity = TP/(TP+FN);
16 | specificity = TN/(TN+FP);
17 | 
18 | misClassified = find(Predicted_Label ~= Label);
19 | 
20 | end


--------------------------------------------------------------------------------
/matlab postprocessing/gradeCombination.m:
--------------------------------------------------------------------------------
 1 | function [combinedGrade,CancerPercentageCombined] = gradeCombination(Predicted_Label,MRgrade,CancerPercentage)
 2 | 
 3 | % Grade combination for benign/cancerous classifier
 4 | % Predicted_Label: from cancer detection approach, 1 means cancer 0 means non-cancerous
 5 | % MRgrade: from MRI, 1 = low, 2 = moderate, 3 = high
 6 | 
 7 | 
 8 | %   @ Code composed by Shekoofeh Azizi on 01/02/2016 (UBC-RCL)
 9 | %   @ Code modified by Shekoofeh Azizi on 29/05/2017 (UBC-RCL)
10 | 
11 | noCores = size(MRgrade,1);
12 | combinedGrade = Predicted_Label;
13 | CancerPercentageCombined = CancerPercentage;
14 | 
15 | for i =1 : noCores     
16 | %    if(MRgrade(i,1) == 3 && Predicted_Label(i,1)==0 && CancerPercentage(i,1)~=0 )
17 |     if(MRgrade(i,1) == 3 && Predicted_Label(i,1)==0 )
18 |         combinedGrade(i,1)= 1;
19 |         CancerPercentageCombined(i,1) = 100;
20 |     end
21 | 
22 |     if(MRgrade(i,1) == 1 && Predicted_Label(i,1)==1)
23 |         combinedGrade(i,1)= 0;
24 |         CancerPercentageCombined(i,1) = 0;
25 |     end
26 | end
27 | 
28 | end


--------------------------------------------------------------------------------
/matlab postprocessing/dataSelection.m:
--------------------------------------------------------------------------------
 1 | function [selected_idx_train, selected_idx_test] = dataSelection(D,L,TCL_limit,MTL_limit)
 2 | 
 3 | %% Select data for Validation Test and Train based on th etumor size
 4 | %   @ Code composed by Shekoofeh Azizi on 23/08/2016 (UBC-RCL)
 5 | %   @ Code modified by Shekoofeh Azizi on 19/05/2017 (UBC-RCL)
 6 | 
 7 | %% 1- Train + Validation Data
 8 | 
 9 | noROI = 80;
10 | trainPer = 1.0;
11 | 
12 | % Select cancerous large cores
13 | L_ca = L(L(:,1) == 1,:);
14 | s = RandStream('mt19937ar','Seed',0);
15 | selected_ca_train = find( L_ca(:,2) >= TCL_limit & L_ca(:,7) >= MTL_limit & L_ca(:,4) == 1);
16 | selected_ca_train = randperm(s,length(selected_ca_train),length(selected_ca_train));
17 | selected_ca_train = L_ca(selected_ca_train,9);
18 | %D_ca_train = D_ca(ExpandPSamp(selected_ca_train,noROI),:);
19 | 
20 | % Select benign cores
21 | L_be = L(L(:,1) == 0 & L(:,4) == 1,:);
22 | 
23 | % Fix a seed to generate a reproducible results
24 | s = RandStream('mt19937ar','Seed',10);
25 | selected_be_train = randperm(s,length(L_be),length(selected_ca_train)); % Equal number of cancerous and benign
26 | selected_be_train = L_be(selected_be_train,9);
27 | % selected_be_train = ExpandPSamp(selected_be_train,noROI);
28 | 
29 | %% Selected index
30 | 
31 | selected_idx_train = [selected_be_train; selected_ca_train];
32 | selected_idx_test = find(~ismember(L(:,9),selected_idx_train));
33 | 
34 | if(intersect(selected_idx_train,selected_idx_test))
35 |     warning('Error in dataselection');
36 |     display(intersect(selected_idx_train,selected_idx_test))
37 | end
38 | end
39 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # TimeSeries_Classification
 2 | ##### Time-Series binary classification using RNNs 
 3 | ##### Shekoofeh Azizi
 4 | 
 5 | 
 6 | ### Aim
 7 | In this project we aim to implement and compare different RNN implementaion including LSTM, GRU and vanilla RNN for the task of time series binary classification. We also further visualize gate activities in different implementation to have a better understanding of the underlying signals.
 8 | 
 9 | ### Data and results
10 | Data could be any time-series data with binary label
11 | 
12 | Reults and methods are presented in detailed at [1]: 
13 | (https://ieeexplore.ieee.org/abstract/document/8395313/)
14 | 
15 | 
16 | ### Credits
17 | Using Python Keras library (Keras 2.x) with [Tensorflow] backend: (https://www.tensorflow.org/versions/r0.7/tutorials/recurrent/index.html#recurrent-neural-networks)
18 | 
19 | 
20 | [1] Azizi, Shekoofeh, et al. "Deep Recurrent Neural Networks for Prostate Cancer Detection: Analysis of Temporal Enhanced Ultrasound." IEEE transactions on medical imaging (2018).
21 | 
22 | If you are using these codes in any capicity please cite the above paper or:
23 | 
24 | @article{azizi2018deep,
25 |   title={Deep Recurrent Neural Networks for Prostate Cancer Detection: Analysis of Temporal Enhanced Ultrasound},
26 |   author={Azizi, Shekoofeh and Bayat, Sharareh and Yan, Pingkun and Tahmasebi, Amir and Kwak, Jin Tae and Xu, Sheng and Turkbey, Baris and Choyke, Peter and Pinto, Peter and Wood, Bradford and others},
27 |   journal={IEEE transactions on medical imaging},
28 |   year={2018},
29 |   publisher={IEEE}
30 | }
31 | 
32 | 
33 | ###### Tips for Running on GPU
34 | ######    - export CUDA_VISIBLE_DEVICES="1"
35 | ######    - THEANO_FLAGS=device=gpu1,floatX=float64 python  trainmodel.py
36 | 


--------------------------------------------------------------------------------
/matlab postprocessing/makeFeatureFiles_wholeimage.m:
--------------------------------------------------------------------------------
 1 | function [X_bmode, X_rf, infoCore, infoROI] = makeFeatureFiles_wholeimage(path_1, path_2)
 2 | 
 3 | %% Generate Feature Design Matrix for the B-mode and Rf data in Whole Image 
 4 | % They are unlabeled data
 5 | %  (Philips Dataset Including 255 Test 80 ROIs)
 6 | 
 7 | %INPUT
 8 | %   Saving Path:  path_1
 9 | %   Feature Path: Path_2 
10 | 
11 | % OUTPUT
12 | %   X_bmode : Bmode Features
13 | %   X_rf    : RF Features
14 | 
15 | %   @ Code composed by Shekoofeh Azizi on 29/11/2015 (UBC-RCL)
16 | %   @ Code modified by Shekoofeh Azizi on 19/05/2017 (UBC-RCL)
17 | %   @ Code modified by Shekoofeh Azizi on 20/10/2017 (UBC-RCL)
18 | 
19 | %% Read our Excel in format of table, contain the info of patients
20 | ExcelFileName = [path_1,'PatientsInfo_All.xlsx'];
21 | [num,txt,~] = xlsread(ExcelFileName);
22 | PatientsInfo_FileName = txt(2:end,3);
23 | PatientsInfo_FileName = cell2mat(PatientsInfo_FileName);
24 | PatientsInfo = num(1:end,[3, 4, 5, 6, 9, 19, 20, 15, 22]);
25 | 
26 | %% Ceating Matrix of Features (X) for the Bmode
27 | X = [];
28 | S = [];
29 | feature = [];
30 | infoROI = [];
31 | size_samples = [];
32 | 
33 | for i = 1 : size(PatientsInfo_FileName,1)
34 |     filename = [path_2,'./features_bmode_tsc_wholeimage_80/feature_',PatientsInfo_FileName(i,:),'.mat'];
35 |     load(filename);
36 |     X = [X; feature]; %#ok<AGROW>
37 |     infoROI = [infoROI; repmat(PatientsInfo(i,:),[size(feature,1),1])]; %#ok<AGROW>
38 |     size_samples = [size_samples; size(feature,1)]; %#ok<AGROW>
39 | end
40 | X_bmode = X;
41 | infoCore = [PatientsInfo,size_samples];
42 | 
43 | %% Ceating Matrix of Features (X) for the RF
44 | X = [];
45 | feature = [];
46 | for i = 1 : size(PatientsInfo_FileName,1)
47 |     filename = [path_2,'./features_rf_tsc_wholeimage_80/feature_',PatientsInfo_FileName(i,:),'.mat'];
48 |     load(filename);
49 |     X = [X; feature]; %#ok<AGROW>
50 | end
51 | X_rf = X;
52 | 
53 | end


--------------------------------------------------------------------------------
/experiments/mainTestOpt.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import os
 3 | 
 4 | module_root = '..'
 5 | sys.path.append(module_root)
 6 | 
 7 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'
 8 | os.environ["CUDA_VISIBLE_DEVICES"] = "0"
 9 | 
10 | from utils.data_loader import DataLoader
11 | from keras import backend as k
12 | from keras.models import load_model
13 | from utils import settings as s
14 | from sklearn.metrics import roc_auc_score
15 | import scipy.io as spio
16 | 
17 | 
18 | if __name__ == '__main__':
19 |     logs_dir = 'DeepNetArch1-Div'
20 |     sl = 100
21 |     ds_rate = 2
22 |     early_stopping = True
23 |     downsample = False
24 |     bmode = True
25 |     whole_map = False
26 | 
27 |     uid = '2017_10_20_09_48_15_arch3'
28 |     uid = '2017_10_19_09_55_17_arch2'
29 |     uid = '2017_10_19_20_54_29_arch1'
30 | 
31 |     if downsample:
32 |         ds = DataLoader(sl=sl, downsample=True, downsample_rate=ds_rate)
33 |         sl = int(sl/ds_rate)
34 |     else:
35 |         ds = DataLoader(sl=sl)
36 | 
37 |     if whole_map:
38 |         test_seq = ds.load_whole_test(bmode)
39 |     else:
40 |         test_seq, test_label = ds.load_test(bmode)
41 | 
42 |     model_checkpoint_dir = os.path.join(s.intermediate_folder, 'model_checkpoints/opt')
43 |     model_checkpoint_file = os.path.join(model_checkpoint_dir, uid + '.hdf5')
44 |     model = load_model(model_checkpoint_file)
45 | 
46 |     test_predictions = model.predict(test_seq, verbose=1)
47 |     results = {'test_predictions': test_predictions}
48 | 
49 |     logs_dir = os.path.join(s.intermediate_folder, 'logs', logs_dir)
50 |     test_log_dir = os.path.join(logs_dir, 'test_logs/')
51 | 
52 |     if not whole_map:
53 |         test_auc = roc_auc_score(test_label, test_predictions)
54 |         spio.savemat(test_log_dir + uid + '.mat', results)
55 |         print(["Test AUC: ", test_auc])
56 |     else:
57 |         spio.savemat(test_log_dir + uid + '_whole.mat', results)
58 | 
59 |     # print('-' * 50)
60 |     # print('UID: {}'.format(uid))
61 |     # print('-' * 50)
62 | 
63 |     k.clear_session()
64 | 


--------------------------------------------------------------------------------
/matlab postprocessing/setting.m:
--------------------------------------------------------------------------------
 1 | %   Script to set path for the training logs
 2 | %   @ Code composed by Shekoofeh Azizi on 18/10/2017 (UBC-RCL)
 3 | 
 4 | function [subFiles, opt_param_log_name, opt_param_log_value, learn_log] = setting(model_type)
 5 | % get root of current file
 6 | log_dir = 'E:\tscRF_LSTM\Python\TeUS_RNN\TeUS_RNN\Datasets\logs\';
 7 | log_dir_sub = '/'; % or /opt/ or /bmode/ or /
 8 | 
 9 | % RF model id: Optimum Params
10 | lstm_opt_model_id = '2017_09_10_21_41_54_arch1.csv';
11 | gru_opt_model_id = '2017_09_11_18_58_33_arch2.csv';
12 | rnn_opt_model_id = '2017_09_07_23_03_26_arch3.csv';
13 | 
14 | switch model_type
15 |     case 'lstm'
16 |         sub_dir = 'DeepNetArch1-Div';
17 |         % Add Path
18 |         root = [log_dir, sub_dir, log_dir_sub];
19 |         addpath([root '/train_logs']);
20 |         addpath([root '/csv_logs']);
21 |         [opt_param_log_name, opt_param_log_value]  = importfilecsv(lstm_opt_model_id);
22 |         learn_log = importdata([lstm_opt_model_id(1:25),'.log']);
23 |         
24 |     case 'gru'
25 |         sub_dir = 'DeepNetArch2-Div';
26 |         % Add Path
27 |         root = [log_dir, sub_dir, log_dir_sub];
28 |         addpath([root '/train_logs']);
29 |         addpath([root '/csv_logs']);
30 |         [opt_param_log_name, opt_param_log_value]  = importfilecsv(lstm_opt_model_id);
31 |         learn_log = importdata([gru_opt_model_id(1:25),'.log']);
32 |     case 'rnn'
33 |         sub_dir = 'DeepNetArch3-Div';
34 |         % Add Path
35 |         root = [log_dir, sub_dir, log_dir_sub];
36 |         addpath([root '/train_logs']);
37 |         addpath([root '/csv_logs']);
38 |         [opt_param_log_name, opt_param_log_value]  = importfilecsv(lstm_opt_model_id);
39 |         learn_log = importdata([rnn_opt_model_id(1:25),'.log']);
40 | end
41 | 
42 | 
43 | 
44 | 
45 | % Get a list of all files and folders in this folder.
46 | files = dir([log_dir, sub_dir, '\', '\csv_logs\']);
47 | % Get a logical vector that tells which is a directory.
48 | dirFlags = [files(:).isdir];
49 | % Extract only those that are directories.
50 | subFiles = {files(~dirFlags).name}';
51 | % Removing current and previous directory
52 | % subFolders(ismember(subFolders,{'.','..'})) = [];
53 | 
54 | end


--------------------------------------------------------------------------------
/matlab postprocessing/importfilecsv.m:
--------------------------------------------------------------------------------
 1 | function [optimizer,sgd] = importfilecsv(filename, startRow, endRow)
 2 | %IMPORTFILE Import numeric data from a text file as column vectors.
 3 | %   [OPTIMIZER,SGD] = IMPORTFILE(FILENAME) Reads data from text file
 4 | %   FILENAME for the default selection.
 5 | %
 6 | %   [OPTIMIZER,SGD] = IMPORTFILE(FILENAME, STARTROW, ENDROW) Reads data
 7 | %   from rows STARTROW through ENDROW of text file FILENAME.
 8 | %
 9 | % Example:
10 | %   [optimizer,sgd] = importfile('2017_09_15_02_13_19_arch1.csv',1, 8);
11 | %
12 | %    See also TEXTSCAN.
13 | 
14 | % Auto-generated by MATLAB on 2017/10/19 10:41:26
15 | 
16 | %% Initialize variables.
17 | delimiter = ',';
18 | if nargin<=2
19 |     startRow = 1;
20 |     endRow = inf;
21 | end
22 | 
23 | %% Format string for each line of text:
24 | %   column1: text (%s)
25 | %	column2: text (%s)
26 | % For more information, see the TEXTSCAN documentation.
27 | formatSpec = '%s%s%[^\n\r]';
28 | 
29 | %% Open the text file.
30 | fileID = fopen(filename,'r');
31 | 
32 | %% Read columns of data according to format string.
33 | % This call is based on the structure of the file used to generate this
34 | % code. If an error occurs for a different file, try regenerating the code
35 | % from the Import Tool.
36 | dataArray = textscan(fileID, formatSpec, endRow(1)-startRow(1)+1, 'Delimiter', delimiter, 'HeaderLines', startRow(1)-1, 'ReturnOnError', false);
37 | for block=2:length(startRow)
38 |     frewind(fileID);
39 |     dataArrayBlock = textscan(fileID, formatSpec, endRow(block)-startRow(block)+1, 'Delimiter', delimiter, 'HeaderLines', startRow(block)-1, 'ReturnOnError', false);
40 |     for col=1:length(dataArray)
41 |         dataArray{col} = [dataArray{col};dataArrayBlock{col}];
42 |     end
43 | end
44 | 
45 | %% Close the text file.
46 | fclose(fileID);
47 | 
48 | %% Post processing for unimportable data.
49 | % No unimportable data rules were applied during the import, so no post
50 | % processing code is included. To generate code which works for
51 | % unimportable data, select unimportable cells in a file and regenerate the
52 | % script.
53 | 
54 | %% Allocate imported array to column variable names
55 | optimizer = dataArray{:, 1};
56 | sgd = dataArray{:, 2};
57 | 
58 | 
59 | 


--------------------------------------------------------------------------------
/matlab postprocessing/makeFeatureFiles.m:
--------------------------------------------------------------------------------
 1 | function [X_bmode, X_rf, infoROI, infoCore, PatientsInfo_FileName] = makeFeatureFiles(path_1, path_2)
 2 | 
 3 | %% Generate Feature Design Matrix for all of the B-mode and RF data 
 4 | %  (Philips Dataset Including 255 Test 80 ROIs)
 5 | 
 6 | % INPUT
 7 | %   Saving Path: path_1
 8 | %   Feature Path: Path_2 
 9 | 
10 | % OUTPUT
11 | %   X_bmode : Bmode Features
12 | %   X_rf    : RF Features
13 | %   S_info    : Patient information and labels
14 | 
15 | % Patient Info:
16 | % Column 1: Label
17 | % Column 2: Tumor in Core length
18 | % Column 3: MR Label
19 | % Column 4: Axial and Sagittal Match (1: Match 0: Mismatch)
20 | % Column 5: Gleason Score
21 | % Column 6: Distance to Boundary (mm)
22 | % Column 7: MR greatest size
23 | % Column 8: Sagittal GS
24 | % Column 9: Sample number
25 | % Column 10: ROI number/ Column 10: Sample Size
26 | 
27 | %   @ Code composed by Shekoofeh Azizi on 22/08/2016 (UBC-RCL)
28 | %   @ Code modified by Shekoofeh Azizi on 19/05/2017 (UBC-RCL)
29 | 
30 | %% Read our Excel in format of table, contain the info of patients
31 | ExcelFileName = [path_1,'PatientsInfo_All.xlsx'];
32 | [num,txt,~] = xlsread(ExcelFileName);
33 | PatientsInfo_FileName = txt(2:end,3);
34 | PatientsInfo_FileName = cell2mat(PatientsInfo_FileName);
35 | PatientsInfo = num(1:end,[3, 4, 5, 6, 9, 19, 20, 15, 22]);
36 | 
37 | %% Ceating Matrix of Features (X) for the RF
38 | X = [];
39 | infoROI = [];
40 | feature = [];
41 | size_samples = [];
42 | 
43 | for i = 1 : size(PatientsInfo_FileName,1)
44 |     filename = [path_2,'./features_rf_tsc_ROI_80/feature_',PatientsInfo_FileName(i,:),'.mat'];
45 |     load(filename);
46 |     X = [X; feature];
47 |     size_samples = [size_samples; size(feature,1)];
48 |     ROI_num = 1:size(feature,1);
49 |     infoROI = [infoROI; [repmat([PatientsInfo(i,:)],[size(feature,1),1]),ROI_num']];
50 | end
51 | X_rf = X;
52 | infoCore = [PatientsInfo,size_samples];
53 | 
54 | %% Ceating Matrix of Features (X) for the Bmode
55 | X = [];
56 | feature = [];
57 | for i = 1 : size(PatientsInfo_FileName,1)
58 |     filename = [path_2,'./features_bmode_tsc_ROI_80/feature_',PatientsInfo_FileName(i,:),'.mat'];
59 |     load(filename);
60 |     X = [X;feature];
61 | end
62 | X_bmode = X;
63 | 
64 | removeIDX = (any(isnan(X_rf),2));
65 | X_rf(removeIDX,:)= 0 ;
66 | 
67 | end


--------------------------------------------------------------------------------
/matlab postprocessing/makeFeatureFiles_Sliding.m:
--------------------------------------------------------------------------------
 1 | function [X_bmode, X_rf, infoROI, infoCore, PatientsInfo_FileName] = makeFeatureFiles_Sliding(path_1, path_2)
 2 | 
 3 | %% Generate Feature Design Matrix for all of the B-mode and RF data 
 4 | %  (Philips Dataset Including 255 Test 80 ROIs)
 5 | 
 6 | % INPUT
 7 | %   Saving Path: path_1
 8 | %   Feature Path: Path_2 
 9 | 
10 | % OUTPUT
11 | %   X_bmode : Bmode Features
12 | %   X_rf    : RF Features
13 | %   S_info    : Patient information and labels
14 | 
15 | % Patient Info:
16 | % Column 1: Label
17 | % Column 2: Tumor in Core length
18 | % Column 3: MR Label
19 | % Column 4: Axial and Sagittal Match (1: Match 0: Mismatch)
20 | % Column 5: Gleason Score
21 | % Column 6: Distance to Boundary (mm)
22 | % Column 7: MR greatest size
23 | % Column 8: Sagittal GS
24 | % Column 9: Sample number
25 | % Column 10: ROI number/ Column 10: Sample Size
26 | 
27 | %   @ Code composed by Shekoofeh Azizi on 22/08/2016 (UBC-RCL)
28 | %   @ Code modified by Shekoofeh Azizi on 19/05/2017 (UBC-RCL)
29 | %   @ Code modified by Shekoofeh Azizi on 20/10/2017 (UBC-RCL)
30 | 
31 | 
32 | %% Read our Excel in format of table, contain the info of patients
33 | ExcelFileName = [path_1,'PatientsInfo_All.xlsx'];
34 | [num,txt,~] = xlsread(ExcelFileName);
35 | PatientsInfo_FileName = txt(2:end,3);
36 | PatientsInfo_FileName = cell2mat(PatientsInfo_FileName);
37 | PatientsInfo = num(1:end,[3, 4, 5, 6, 9, 19, 20, 15, 22]);
38 | 
39 | %% Ceating Matrix of Features (X) for the RF
40 | X = [];
41 | infoROI = [];
42 | feature = [];
43 | size_samples = [];
44 | 
45 | for i = 1 : size(PatientsInfo_FileName,1)
46 |     filename = [path_2,'./features_rf_tsc_ROI_Sliding/feature_',PatientsInfo_FileName(i,:),'.mat'];
47 |     load(filename);
48 |     X = [X; feature]; %#ok<AGROW>
49 |     size_samples = [size_samples; size(feature,1)]; %#ok<AGROW>
50 |     ROI_num = 1:size(feature,1);
51 |     infoROI = [infoROI; [repmat(PatientsInfo(i,:),[size(feature,1),1]),ROI_num']]; %#ok<AGROW>
52 | end
53 | X_rf = X;
54 | infoCore = [PatientsInfo,size_samples];
55 | 
56 | %% Ceating Matrix of Features (X) for the Bmode
57 | X = [];
58 | feature = [];
59 | for i = 1 : size(PatientsInfo_FileName,1)
60 |     filename = [path_2,'./features_bmode_tsc_ROI_Sliding/feature_',PatientsInfo_FileName(i,:),'.mat'];
61 |     load(filename);
62 |     X = [X;feature]; %#ok<AGROW>
63 | end
64 | X_bmode = X;
65 | 
66 | end


--------------------------------------------------------------------------------
/matlab postprocessing/plotMRvsAccuracy.m:
--------------------------------------------------------------------------------
 1 | function [t,AUC] = plotMRvsAccuracy(MRsize,predictedGrade,realGrade,CancerPercentage)
 2 | 
 3 | cnt = 0;
 4 | t = [0 : 0.3 : 1,1.6,1.8, 2:0.3:2.7];
 5 | 
 6 | ACC = zeros(1,size(t,2));
 7 | AUC = zeros(1,size(t,2));
 8 | SEN = zeros(1,size(t,2));
 9 | SPEC = zeros(1,size(t,2));
10 | S = zeros(1,size(t,2));
11 | Cmat = zeros(size(t,2),3);
12 | Cmat(1,:) = [1.0 0.5 0.5];
13 | Cmat(2,:) = [0.5 0.5 1.0];
14 | Cmat(3,:) = [0.4 1.0 0.4];
15 | Cmat(4,:) = [0.5 0.4 0.6];
16 | 
17 | 
18 | figure1 = figure('Color','None');
19 | axes1 = axes('Parent',figure1,'FontSize',13,'FontName','Times');
20 | box(axes1,'on');
21 | hold on
22 | for j = t 
23 |     
24 |     filter_Core = find(MRsize >= j & MRsize ~=100);
25 |     cnt = cnt + 1;
26 |     S(cnt) = size(filter_Core,1);
27 |     L1 = predictedGrade(filter_Core,1);
28 |     L2 = realGrade(filter_Core,1);
29 |     L3 = CancerPercentage(filter_Core,1);
30 |     [~, ~, ~, ~,SEN(cnt),SPEC(cnt)] = findStatResult(L1,L2);
31 |     ACC(cnt) = (1 - sum(L1~=L2) / size(filter_Core,1));
32 |     [X_ROC,Y_ROC,~,AUC(cnt)] = perfcurve(L2,L3,1);
33 |     if( mod(cnt,2) == 0 )
34 |         plot(X_ROC,Y_ROC,'Color',Cmat(cnt/2,:),'LineWidth',2.0,'LineStyle','--',...
35 |             'DisplayName',sprintf('Larger than %2.2g cm',j))
36 |         xlabel('False positive rate (1-Specificity)','Interpreter','latex','FontSize',13); 
37 |         ylabel('True positive rate (Specificity)','Interpreter','latex','FontSize',13);
38 |     end
39 |             
40 | end
41 | legend(axes1,'show');
42 | hold off
43 | 
44 | % Create figure
45 | figure2 = figure('Color',[1 1 1]);
46 | axes2 = axes('Parent',figure2,'FontSize',13,'FontName','Times');
47 | box(axes2,'on');
48 | hold(axes2,'all');
49 | 
50 | % Create scatter
51 | % h1=scatter(t,ACC,'MarkerEdgeColor',[1 0.5 0.5],'DisplayName','Accuracy'); plot(t,ACC,'LineStyle',':','Color',[1 0 0]);
52 | % h2=scatter(t,SPEC,'MarkerEdgeColor',[0.5 0.5 1],'DisplayName','Specificty'); plot(t,SPEC,'LineStyle',':','Color',[0 0 1]);
53 | % h3=scatter(t,SEN,'MarkerEdgeColor',[0.5 1 0.5],'DisplayName','Sensitivity'); plot(t,SEN,'LineStyle',':','Color',[0 1 0]);
54 | h4=scatter(t,AUC,'MarkerEdgeColor',[0.5 0.5 1],'DisplayName','AUC','LineWidth',1.5); plot(t,AUC,'LineStyle','--','Color',[0.5 0.5 1],'LineWidth',1.5);
55 | xlabel('Greatest Tumor Length in MRI','Interpreter','latex','FontSize',13);
56 | ylabel('Area Under the Curve (AUC)','Interpreter','latex','FontSize',13);
57 | % legend([h1,h4]);
58 | 


--------------------------------------------------------------------------------
/matlab postprocessing/mapMaker.m:
--------------------------------------------------------------------------------
 1 | %% Make Map for the whole image and colromap generation based on the needed structure!
 2 | %  @ Code modified by Shekoofeh Azizi on 20/10/2017 (UBC-RCL)
 3 | 
 4 | %%
 5 | clear all %#ok<CLALL>
 6 | close all
 7 | clc
 8 | 
 9 | %% Initializing: Define Parameters and Reading Data
10 | path = 'E:\Feature Extraction\Philips Dataset\Extracted Features\';
11 | log_dir = 'E:\tscRF_LSTM\Python\TeUS_RNN\TeUS_RNN\Datasets\logs\DeepNetArch3-Div\test_logs\';
12 | filename = '2017_10_20_09_48_15_arch3_whole.mat';
13 | % filename = '2017_10_19_09_55_17_arch2_whole.mat';
14 | % filename = '2017_10_19_20_54_29_arch1_whole.mat';
15 | 
16 | % ExcelFileName : Name of Excel which contain our patients info
17 | ExcelFileName = 'PatientsInfo_All.xlsx';
18 | 
19 | % Read filenames and Patient Info
20 | [num,txt,raw] = xlsread(ExcelFileName);
21 | 
22 | PatientsInfo_FileName = txt(2:end,3);
23 | PatientsInfo_FileName = cell2mat(PatientsInfo_FileName);
24 | 
25 | % Load probability maps
26 | load([log_dir, filename]);
27 | load('./Datasets/D_Whole_Labels.mat');
28 | 
29 | prob_estimates_test = test_predictions;
30 | SampSize_test = infoCorew(:,10);
31 | % Create structure containing filenames and corresponding probability maps
32 | field1 = 'filename';  value1 = 'rf00000000000000';
33 | field2 = 'probmap';   value2 = zeros(1,1);
34 | s_temp = struct(field1,value1,field2,value2);
35 | 
36 | s = [];
37 | noFiles = size(PatientsInfo_FileName,1);
38 | 
39 | for i=1:noFiles
40 | 
41 | 
42 |     p_temp = prob_estimates_test(sum(SampSize_test(1:i-1))+ 1 : sum(SampSize_test(1:i)),1);
43 |     
44 |     filename = [path,'/features_wholeimage_limits_80_new/feature_limit_',PatientsInfo_FileName(i,:),'.mat'];
45 |     load(filename);
46 |     
47 |     x_range = (x_lim_right - x_lim_left +0.5)*2;
48 |     y_range = (y_lim_right - y_lim_left +0.5)*2;
49 |     
50 |     % For dataset 1-6th we don't have 
51 |     if(x_range < 1 || y_range < 1)
52 |         s_temp.filename = PatientsInfo_FileName(i,:);
53 |         s = [s;s_temp];
54 |         continue;
55 |     end
56 |     % Create structure containing filenames and corresponding probability maps
57 |     field1 = 'filename';  value1 = 'rf00000000000000';
58 |     field2 = 'probmap';   value2 = zeros(x_range,y_range);
59 |     s_temp = struct(field1,value1,field2,value2);
60 |     
61 |     p_temp = reshape(p_temp,[y_range x_range]);
62 |     probabilitymap = flip(p_temp);
63 | 
64 |     probabilitymap = imresize( probabilitymap, 'Scale', 0.5 );  %% Scale for 0.5 mm ROI s
65 | 
66 |     filename = PatientsInfo_FileName(i,:);
67 |     s_temp.filename = filename;
68 |     s_temp.probmap = probabilitymap;
69 |     s = [s;s_temp];
70 |     i
71 | end
72 | 
73 | 
74 | save RNN_Bmode_Wholemap.mat s
75 | 


--------------------------------------------------------------------------------
/matlab postprocessing/makeData.m:
--------------------------------------------------------------------------------
 1 | %% makeData: Data division for large cores in LSTM impelemtaion
 2 | %% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 3 | 
 4 | % Patient Info:
 5 | % Column 1: Label
 6 | % Column 2: Tumor in Core length   
 7 | % Column 3: MR Label
 8 | % Column 4: Axial and Sagittal Match (1: Match 0: Mismatch)
 9 | % Column 5: Gleason Score
10 | % Column 6: Distance to Boundary (mm)
11 | % Column 7: MR greatest size
12 | % Column 8: Sagittal GS
13 | % Column 9: Sample number
14 | % Column 10: ROI number/ Column 10: Sample Size
15 | 
16 | % @ Code composed by Shekoofeh Azizi on 24/10/2016 (UBC-RCL)
17 | % @ Code modified by Shekoofeh Azizi on 23/12/2016 (UBC-RCL)
18 | % @ Code modified by Shekoofeh Azizi on 19/05/2017 (UBC-RCL)
19 | % @ Code modified by Shekoofeh Azizi on 15/08/2017 (UBC-RCL)
20 | 
21 | %%
22 | clc
23 | close all
24 | clear all %#ok<CLALL>
25 | 
26 | %% Initialization
27 | TCL_limit = 4.00; % more than 25% be cancerous  
28 | MTL_limit = 0.0;  % Large cores 
29 | 
30 | path_1 = 'E:\tscRF_LSTM\Python\TeUS_RNN\TeUS_RNN\matlab postprocessing\Data preparation for RNN based methods\';  % Saving Path: path_1
31 | path_2 = 'E:\Feature Extraction\Philips Dataset\Extracted Features\';    % Feature Path: Path_2 
32 | 
33 | %% Reading Data
34 | % 1- Reading data from the sliding ROI
35 | [Xs_bmode, Xs_rf, infoROIs, infoCores, ~] = makeFeatureFiles_Sliding(path_1, path_2);
36 | 
37 | % 2- Reading data from the fixed ROI
38 | [Xf_bmode, Xf_rf, infoROIf, infoCoref, ~] = makeFeatureFiles(path_1, path_2);
39 | 
40 | % 3- Make the whole image feature files
41 | [Xw_bmode, Xw_rf, infoCorew, infoROIw] = makeFeatureFiles_wholeimage(path_1, path_2);
42 | 
43 | %% Partitioning based on the tumore size
44 | % Select large tumor size for train+validation
45 | % Then select randomly between them for train(0.8) and validation(0.2)
46 | % Equal number of benign cores selected randomly for each set
47 | [selected_idx_train, selected_idx_test] = dataSelection(Xf_bmode,infoCoref,TCL_limit,MTL_limit);
48 | 
49 | 
50 | %% Save D_Fixed
51 | idx_train = find(ismember(infoROIf(:,9),selected_idx_train));
52 | idx_test  = find(ismember(infoROIf(:,9),selected_idx_test));
53 | Df_train  = [Xf_bmode(idx_train,:);Xf_rf(idx_train,:)];  
54 | Lf_train  = [infoROIf(idx_train,:);infoROIf(idx_train,:)];
55 | Df_test   = [Xf_bmode(idx_test,:);Xf_rf(idx_test,:)]; 
56 | Lf_test   = [infoROIf(idx_test,:);infoROIf(idx_test,:)];
57 | save([path_1,'Datasets\D_Fixed.mat'],'Df_train','Lf_train','Df_test','Lf_test')
58 |  
59 | %% Save D_Sliding
60 | idx_train = find(ismember(infoROIs(:,9),selected_idx_train));
61 | idx_test  = find(ismember(infoROIs(:,9),selected_idx_test));
62 | Ds_train = [Xs_bmode(idx_train,:);Xs_rf(idx_train,:)];  
63 | Ls_train = [infoROIs(idx_train,:);infoROIs(idx_train,:)];
64 | Ds_test = [Xs_bmode(idx_test,:);Xs_rf(idx_test,:)]; 
65 | Ls_test = [infoROIs(idx_test,:);infoROIs(idx_test,:)];
66 | save([path_1,'Datasets\D_Sliding.mat'],'Ds_train','Ls_train','Ds_test','Ls_test')
67 | 
68 | 
69 | %% Save D_Whole
70 | Dw_bmode = Xw_bmode';
71 | Dw_rf =Xw_rf';
72 | save([path_1,'Datasets\D_Whole_Bmode.mat'],'Dw_bmode','-v7.3')
73 | save([path_1,'Datasets\D_Whole_RF.mat'],'Dw_rf','-v7.3')
74 | save([path_1,'Datasets\D_Whole_Labels.mat'],'infoCorew')


--------------------------------------------------------------------------------
/experiments/mainTrainOpt.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import time
 3 | import os
 4 | 
 5 | module_root = '..'
 6 | sys.path.append(module_root)
 7 | 
 8 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'
 9 | os.environ["CUDA_VISIBLE_DEVICES"] = "0"
10 | 
11 | from deepNetworks.model import RNNModel
12 | from utils.data_loader import DataLoader
13 | from deepNetworks.netArch import DeepNetArch1, DeepNetArch2, DeepNetArch3, DeepNetArch1L1, DeepNetArch2L1, \
14 |     DeepNetArch3L1
15 | from keras import backend as k
16 | from utils.history import History
17 | 
18 | if __name__ == '__main__':
19 |     logs_dir = 'DeepNetArch1-Div'
20 |     model_type = 'DeepNetArch1'
21 |     sl = 100
22 |     ds_rate = 2
23 |     early_stopping = True
24 |     downsample = False
25 |     bmode = True
26 |     subdir = '/bmode/'
27 |     model_history = History(logs_dir)
28 |     opt_params, opt_model_uid = model_history.find_opt_model(auc=False, loss=False, acc=True)
29 | 
30 |     if downsample:
31 |         ds = DataLoader(sl=sl, downsample=True, downsample_rate=ds_rate)
32 |         sl = int(sl/ds_rate)
33 |     else:
34 |         ds = DataLoader(sl=sl)
35 | 
36 |     if model_type == 'DeepNetArch1':
37 |         arch = DeepNetArch1(sl=sl, initial_lr=float(opt_params['initial_lr']), l2_reg=float(opt_params['l2_regulizer']),
38 |                             dropout=float(opt_params['dropout']), rec_dropout=float(opt_params['rec_dropout']),
39 |                             optimizer=opt_params['optimizer'], summary=1)
40 |     if model_type == 'DeepNetArch2':
41 |         arch = DeepNetArch2(sl=sl, initial_lr=float(opt_params['initial_lr']), l2_reg=float(opt_params['l2_regulizer']),
42 |                             dropout=float(opt_params['dropout']), rec_dropout=float(opt_params['rec_dropout']),
43 |                             optimizer=opt_params['optimizer'], summary=1)
44 |     if model_type == 'DeepNetArch3':
45 |         arch = DeepNetArch3(sl=sl, initial_lr=float(opt_params['initial_lr']), l2_reg=float(opt_params['l2_regulizer']),
46 |                             dropout=float(opt_params['dropout']), rec_dropout=float(opt_params['rec_dropout']),
47 |                             optimizer=opt_params['optimizer'], summary=1)
48 |     if model_type == 'DeepNetArch1L1':
49 |         arch = DeepNetArch1L1(sl=sl, initial_lr=float(opt_params['initial_lr']),
50 |                               l2_reg=float(opt_params['l2_regulizer']),
51 |                               dropout=float(opt_params['dropout']), rec_dropout=float(opt_params['rec_dropout']),
52 |                               optimizer=opt_params['optimizer'], summary=1)
53 |     if model_type == 'DeepNetArch2L1':
54 |         arch = DeepNetArch2L1(sl=sl, initial_lr=float(opt_params['initial_lr']),
55 |                               l2_reg=float(opt_params['l2_regulizer']),
56 |                               dropout=float(opt_params['dropout']), rec_dropout=float(opt_params['rec_dropout']),
57 |                               optimizer=opt_params['optimizer'], summary=1)
58 |     if model_type == 'DeepNetArch3L1':
59 |         arch = DeepNetArch3L1(sl=sl, initial_lr=float(opt_params['initial_lr']),
60 |                               l2_reg=float(opt_params['l2_regulizer']),
61 |                               dropout=float(opt_params['dropout']), rec_dropout=float(opt_params['rec_dropout']),
62 |                               optimizer=opt_params['optimizer'], summary=1)
63 | 
64 |     model, model_id = arch.arch_generator()
65 | 
66 |     rnn_model = RNNModel(ds, model, opt_params, log_dir=logs_dir + subdir + str(sl), division=True, bmode=bmode)
67 |     uid = time.strftime("%Y_%m_%d_%H_%M_%S_") + model_id
68 |     print('-' * 50)
69 |     print('UID: {}'.format(uid))
70 |     print('-' * 50)
71 | 
72 |     rnn_model.opt_model_train(uid=uid, batch_size=int(opt_params['batch_size']), es=early_stopping,
73 |                               nb_epoch=int(opt_params['n_epoch']), verbose=2)
74 | 
75 |     test_predictions = rnn_model.predict_test(uid=uid)
76 |     k.clear_session()
77 | 


--------------------------------------------------------------------------------
/matlab postprocessing/validation.m:
--------------------------------------------------------------------------------
  1 | %% Comparison of Bmode and RF data
  2 | % @ Code composed by Shekoofeh Azizi on 22/05/2017 (UBC-RCL)
  3 | % @ Code modified by Shekoofeh Azizi on 01/06/2017 (UBC-RCL)
  4 | % @ Code modified by Shekoofeh Azizi on 20/10/2017 (UBC-RCL)
  5 | 
  6 | %%
  7 | % Patient Info:
  8 | % Column 1: Label
  9 | % Column 2: Tumor in Core length
 10 | % Column 3: MR Label
 11 | % Column 4: Axial and Sagittal Match (1: Match 0: Mismatch)
 12 | % Column 5: Gleason Score
 13 | % Column 6: Distance to Boundary (mm)
 14 | % Column 7: MR greatest size
 15 | % Column 8: Sagittal GS
 16 | % Column 9: Sample number
 17 | % Column 10: ROI number/ Column 10: Sample Size
 18 | 
 19 | %%
 20 | 
 21 | clear all %#ok<CLALL>
 22 | close all
 23 | clc
 24 | 
 25 | noROI = 80;
 26 | filtering = 'nofilter';
 27 | value = 2;
 28 | 
 29 | %% Loading RF model and results
 30 | log_dir = 'E:\tscRF_LSTM\Python\TeUS_RNN\TeUS_RNN\Datasets\logs\DeepNetArch1-Div\test_logs\';
 31 | 
 32 | % Bmode model id
 33 | % filename = '2017_10_20_09_48_15_arch3.mat'; % Threshold .5  Wholemap .8
 34 | % filename = '2017_10_19_09_55_17_arch2.mat'; % Threshold .4
 35 | % filename = '2017_10_19_20_54_29_arch1.mat'; % Threshold .5  Wholemap .8
 36 | 
 37 | % RF model id
 38 | % filename = '2017_10_05_14_48_49_arch3.mat'; % Threshold .5  Wholemap .8
 39 | % filename = '2017_10_05_11_28_48_arch2.mat'; % Threshold .5
 40 | % filename = '2017_10_05_11_29_04_arch1.mat'; % Threshold .4  Wholemap .8
 41 | filename = '2017_10_08_19_38_47_arch1.mat';
 42 | 
 43 | load([log_dir, filename]);
 44 | load('.\Datasets\D_Fixed')
 45 | 
 46 | L_TEST = Lf_test(1:size(Lf_test)/2,:);
 47 | noFiles_test = size(L_TEST,1)/noROI;
 48 | estimatedProb = test_predictions;
 49 | 
 50 | % Find the optimm threshold using ROC curve
 51 | [X_ROC,Y_ROC,T,~,OPTROCPT] = perfcurve(L_TEST(:,1),estimatedProb,1);
 52 | Threshold = T((X_ROC==OPTROCPT(1))&(Y_ROC==OPTROCPT(2))); 
 53 | % Threshold = 0.5;
 54 | predictedL = (estimatedProb>=Threshold);
 55 | 
 56 | CancerPercentage = [];
 57 | CancerEstimate = [];
 58 | L_Core = [];
 59 | for i = 1 : noFiles_test 
 60 |     predict_label = predictedL((i-1)*noROI+1:i*noROI,1);
 61 |     cancer_estimate = estimatedProb((i-1)*noROI+1:i*noROI,1);
 62 |     CancerPercentage(i)=100*length(find(predict_label==1))/noROI; %#ok<SAGROW>
 63 |     CancerEstimate(i)=100*sum(cancer_estimate(:,1))/noROI; %#ok<SAGROW>
 64 |     L_Core(i,:) = L_TEST((i-1)*noROI+1,:); %#ok<SAGROW>
 65 | end
 66 | CancerPercentage = CancerPercentage';
 67 | CancerEstimate = CancerEstimate';
 68 | results =  CancerPercentage;
 69 | 
 70 | %% Results Evalution
 71 | hold on
 72 | filter = makeFilter(L_Core,filtering,value);
 73 | [X_ROC,Y_ROC,T,AUC_Core,OPTROCPT] = perfcurve(L_Core(filter,1),double(results(filter,:)),1);
 74 | plot(X_ROC,Y_ROC,'Color','b','LineWidth',1.5,'DisplayName','RF data')
 75 | xlabel('False positive rate (1-Specificity)'); 
 76 | ylabel('True positive rate (Specificity)')
 77 | 
 78 | noFiles_filter = size(L_Core(filter,1),1);
 79 | cp = CancerPercentage(filter,1);
 80 | sen = OPTROCPT(2);
 81 | spe = 1-OPTROCPT(1);
 82 | Threshold_filter = T((X_ROC==OPTROCPT(1))&(Y_ROC==OPTROCPT(2)));
 83 | acc = sum(L_Core(filter,1) == (cp >= Threshold_filter))/size(cp,1);
 84 | display('RF Results')
 85 | fprintf('Accuracy: %d   AUC: %d\n', acc, AUC_Core);
 86 | fprintf('Sensitivity: %d\n', sen);
 87 | fprintf('Specificity: %d\n', spe);
 88 | predictedL= (cp >= Threshold_filter); 
 89 | CancerP = results(filter,1);
 90 | 
 91 | %%
 92 | % Plot AUC vs. MR length for binary classification
 93 | MRsize = L_Core(filter,7);
 94 | realGrade = L_Core(filter,1);
 95 | [~,AUC_Predicted] = plotMRvsAccuracy(MRsize,predictedL,realGrade,CancerP);
 96 | 
 97 | 
 98 | %% Binary classification + MR grading
 99 | MRgrade = L_Core(filter,3);
100 | [~,CancerPercentageCombined] = gradeCombination(predictedL,MRgrade,CancerP);
101 | [~,~,~,AUC_Core,~] = perfcurve(L_Core(filter,1),CancerPercentageCombined,1);
102 | fprintf('AUC Combined: %d\n', AUC_Core);
103 | 
104 | 


--------------------------------------------------------------------------------
/matlab postprocessing/learningCurves.m:
--------------------------------------------------------------------------------
  1 | %   Plot Learning curves for the selected models
  2 | %   @ Code composed by Shekoofeh Azizi on 18/10/2017 (UBC-RCL)
  3 | %%
  4 | clc
  5 | clear all %#ok<CLALL>
  6 | close all
  7 | 
  8 | %% Initialization
  9 | % Se the model ids of the selected optimum models
 10 | 
 11 | % Bmode model id: Opt
 12 | % lstm_opt_model_id = '2017_10_19_20_54_29_arch1.csv';
 13 | % gru_opt_model_id = '2017_10_19_09_55_17_arch2.csv';
 14 | % rnn_opt_model_id = '2017_10_20_09_48_15_arch3.csv';
 15 | 
 16 | % RF model id: Opt
 17 | lstm_opt_model_id = '2017_10_05_11_29_04_arch1.csv';
 18 | gru_opt_model_id = '2017_10_05_11_28_48_arch2.csv';
 19 | rnn_opt_model_id = '2017_10_05_14_48_49_arch3.csv';
 20 | 
 21 | % Path setting
 22 | log_dir = 'E:\tscRF_LSTM\Python\TeUS_RNN\TeUS_RNN\Datasets\logs\';
 23 | log_dir_sub = '\'; 
 24 | sub_dir = 'DeepNetArch2-Div';
 25 | root = [log_dir, sub_dir, log_dir_sub];
 26 | addpath([root '/train_logs']);
 27 | addpath([root '/csv_logs']);
 28 | 
 29 | %%
 30 | curve_type = {'loss';'acc';'val_loss';'val_acc'};
 31 | curve_names = {'Train Loss';'Train Accuracy';'Validation Loss';'Validation Accuracy'};
 32 | linestyle = {'-'; '-.'; '-'; '-.' };
 33 | color = {[1 0.27 0.27] ; [1 0.27 0.27]; [0 0.8 0.4]; [0 0.8 0.4]};
 34 | noCurves = size(curve_type,1);
 35 | 
 36 | fig = figure;
 37 | left_color = [0 0 0];
 38 | right_color = [0 0 0];
 39 | set(fig,'defaultAxesColorOrder',[left_color; right_color]);
 40 | % Create axes
 41 | ax = axes('Parent',fig);
 42 | set(ax,'FontName','Times','FontSize',14,'GridColor',...
 43 |     [0.247058823529412 0.247058823529412 0.247058823529412],'GridLineStyle',':',...
 44 |     'LineStyleOrderIndex',3,'XGrid','on','YGrid','on');
 45 | for i = 1 : noCurves
 46 |     if(i<3)
 47 |         yyaxis left
 48 |         ylabel('Loss','FontName','Times','Interpreter','latex');
 49 |     else
 50 |         yyaxis right
 51 |         ylabel('Accuracy/AUC','FontName','Times','Interpreter','latex');
 52 |     end
 53 |     net_name = gru_opt_model_id(1:25);
 54 |     [param_log_name, param_log_value]  = importfilecsv([net_name, '.csv']);
 55 |     learn_log = importdata([net_name, '.log']);
 56 |     diagram_type_train = curve_type{i,1};
 57 |     [~, param_loc] = intersect(learn_log.textdata,cellstr(diagram_type_train));
 58 |     value = learn_log.data(:,param_loc);
 59 |     
 60 |     plot(value,'DisplayName',curve_names{i,1},...
 61 |         'LineWidth',1.2,'LineStyle',linestyle{i,1}, 'Color',color{i,1})
 62 |     hold on    
 63 | end
 64 | 
 65 | [~, param_loc] = intersect(learn_log.textdata,cellstr('val_auc'));
 66 | value = learn_log.data(:,param_loc);
 67 | plot(value,'DisplayName','Validation AUC','LineWidth',1.2,'LineStyle','-', 'Color',[0.2 0.4 1]);
 68 | 
 69 | legend('show')
 70 | xlabel('Iteration (Epochs)','FontName','Times','Interpreter','latex')
 71 | hold off
 72 | 
 73 | 
 74 | 
 75 | %%
 76 | % curve_type = {'loss';'acc';'val_loss';'val_acc'};
 77 | % curve_names = {'Train Loss';'Train Accuracy';'Validation Loss';'Validation Accuracy'};
 78 | % linestyle = {'-'; '-.'; '-'; '-.' };
 79 | % color = {[1 0.27 0.27] ; [1 0.27 0.27]; [0 0.8 0.4]; [0 0.8 0.4]};
 80 | % noCurves = size(curve_type,1);
 81 | % 
 82 | % fig = figure;
 83 | % left_color = [0 0 0];
 84 | % right_color = [0 0 0];
 85 | % set(fig,'defaultAxesColorOrder',[left_color; right_color]);
 86 | % for i = 1 : noCurves
 87 | %     yyaxis left
 88 | %     net_name = lstm_opt_model_id(1:25);
 89 | %     [param_log_name, param_log_value]  = importfilecsv([net_name, '.csv']);
 90 | %     learn_log = importdata([net_name, '.log']);
 91 | %     diagram_type_train = curve_type{i,1};
 92 | %     [~, param_loc] = intersect(learn_log.textdata,cellstr(diagram_type_train));
 93 | %     value = learn_log.data(:,param_loc);
 94 | %     
 95 | %     plot(value,'DisplayName',curve_names{i,1},...
 96 | %         'LineWidth',1.2,'LineStyle',linestyle{i,1}, 'Color',color{i,1})
 97 | %     hold on
 98 | % end
 99 | % ylabel('Loss/Accuracy');
100 | % 
101 | % yyaxis right
102 | % [~, param_loc] = intersect(learn_log.textdata,cellstr('lr'));
103 | % value = learn_log.data(:,param_loc);
104 | % plot(value,'DisplayName','Learning Rate','LineWidth',1.2,'LineStyle','-', 'Color',[0 0 0]);
105 | % ylabel('Learning Rate');
106 | % ylim([10e-4 10e-3])
107 | % legend('show')
108 | % hold off
109 | 
110 | 
111 | 
112 | 
113 | 


--------------------------------------------------------------------------------
/matlab postprocessing/learningAnalysis.m:
--------------------------------------------------------------------------------
  1 | %   plot the learning curves for 
  2 | %   @ Code composed by Shekoofeh Azizi on 18/10/2017 (UBC-RCL)
  3 | %%
  4 | clc
  5 | clear all %#ok<CLALL>
  6 | close all
  7 | 
  8 | %%
  9 | 
 10 | model_type = 'gru';
 11 | diagram_type_train = 'loss';
 12 | diagram_type_val = 'val_loss';
 13 | compare_type = 'initial_lr';
 14 | 
 15 | [subFiles, opt_param_log_name, opt_param_log_value] = setting(model_type);
 16 | noFiles = size(subFiles,1);
 17 | 
 18 | [opt_lr, ~ ] = parsecsv('initial_lr', opt_param_log_name, opt_param_log_value);
 19 | [opt_bs, ~ ] = parsecsv('batch_size', opt_param_log_name, opt_param_log_value);
 20 | [opt_optimizer, ~ ] = parsecsv('optimizer', opt_param_log_name, opt_param_log_value);
 21 | [opt_reg, ~ ] = parsecsv('l2_regulizer', opt_param_log_name, opt_param_log_value);
 22 | [opt_do, ~ ] = parsecsv('dropout', opt_param_log_name, opt_param_log_value);
 23 | 
 24 | bs = '128';
 25 | do = '0';
 26 | reg = '0.0001';
 27 | 
 28 | fig = figure;
 29 | % Create axes
 30 | ax = axes('Parent',fig);
 31 | set(ax,'FontName','Times','FontSize',14,'GridLineStyle',':',...
 32 |     'LineStyleOrderIndex',3,'XGrid','on','YGrid','on');
 33 | hold on
 34 | for i = 1 : noFiles
 35 |     filename = subFiles{i,1};
 36 |     net_name = filename(1:25);
 37 |     [param_log_name, param_log_value]  = importfilecsv([net_name, '.csv']);
 38 |     learn_log = importdata(filename);
 39 |     
 40 |     [file_lr, ~ ] = parsecsv('initial_lr', param_log_name, param_log_value);
 41 |     [file_bs, ~ ] = parsecsv('batch_size', param_log_name, param_log_value);
 42 |     [file_optimizer, ~ ] = parsecsv('optimizer', param_log_name, param_log_value);
 43 |     [file_reg, ~ ] = parsecsv('l2_regulizer', param_log_name, param_log_value);
 44 |     [file_do, ~ ] = parsecsv('dropout', param_log_name, param_log_value);
 45 |     
 46 |     if(strcmp(bs, file_bs) && strcmp(reg, file_reg) && strcmp(do, file_do))
 47 |         
 48 |         switch(file_lr{1,1})
 49 |             case '0.01'
 50 |                 linestyle = ':';
 51 |             case '0.0001'
 52 |                 linestyle = '-';
 53 |             otherwise
 54 |                 fprintf('Invalid!\n' );
 55 |         end
 56 |         
 57 |         switch(file_optimizer{1,1})
 58 |             case 'sgd'
 59 |                 [~, param_loc] = intersect(learn_log.textdata,cellstr(diagram_type_train));
 60 |                 value = learn_log.data(:,param_loc);
 61 |                 plot(value,'DisplayName',strcat('sgd, lr = ', file_lr{1,1}),...
 62 |                     'LineWidth',1.2,'LineStyle',linestyle, 'Color',[0.2 0.4 1])
 63 |                 hold on
 64 |                 %         [~, param_loc] = intersect(learn_log.textdata,cellstr(diagram_type_val));
 65 |                 %         value = learn_log.data(:,param_loc);
 66 |                 %         plot(value,'DisplayName',strcat('validation loss: Lr = ', file_lr{1,1}))
 67 |                 %         hold on
 68 |             case 'rmsprop'
 69 |                 [~, param_loc] = intersect(learn_log.textdata,cellstr(diagram_type_train));
 70 |                 value = learn_log.data(:,param_loc);
 71 |                 plot(value,'DisplayName',strcat('rmsprop, lr = ', file_lr{1,1}), ...
 72 |                     'LineWidth',1.2,'LineStyle',linestyle, 'Color',[1 0.27 0.27])
 73 |                 hold on
 74 |                 %         [~, param_loc] = intersect(learn_log.textdata,cellstr(diagram_type_val));
 75 |                 %         value = learn_log.data(:,param_loc);
 76 |                 %         plot(value,'DisplayName',strcat('validation loss: Lr = ', file_lr{1,1}))
 77 |                 %         hold on
 78 |             case 'adam'
 79 |                 [~, param_loc] = intersect(learn_log.textdata,cellstr(diagram_type_train));
 80 |                 value = learn_log.data(:,param_loc);
 81 |                 plot(value,'DisplayName',strcat('adam, lr = ', file_lr{1,1}),...
 82 |                     'LineWidth',1.2,'LineStyle',linestyle,'Color', [0 0.8 0.4])
 83 |                 hold on
 84 |                 %         [~, param_loc] = intersect(learn_log.textdata,cellstr(diagram_type_val));
 85 |                 %         value = learn_log.data(:,param_loc);
 86 |                 %         plot(value,'DisplayName',strcat('validation loss: Lr = ', file_lr{1,1}))
 87 |                 %         hold on
 88 |             otherwise
 89 |                 fprintf('Invalid!\n' );
 90 |         end
 91 |     end
 92 | end
 93 | ylabel('Loss','FontName','Times','Interpreter','latex','FontSize',14);
 94 | xlabel('Iteration (Epochs)','FontName','Times','Interpreter','latex','FontSize',14)
 95 | ylim([0 0.7])
 96 | legend('show')
 97 | box('on')
 98 | hold off
 99 | 
100 | 
101 | 
102 | 
103 | 


--------------------------------------------------------------------------------
/matlab postprocessing/makeFilter.m:
--------------------------------------------------------------------------------
  1 | function [filter_Core] = makeFilter(Y_Core,filtering,value)
  2 | %% Exclude data based o different criteria
  3 | %   nofilter : include all of the data
  4 | %   basic: exclude cores with more than 3mm distance to boundary and cores
  5 | %   have mismatche between axial and sagittal pathology
  6 | %   mrbased: basic filter + MR grade based exclusion
  7 | %   gsbased: basic filter + GS based exclusion
  8 | %   binary: sepration of cancer and benign.
  9 | 
 10 | %   Inputs: Y_ROI ,Y_Core : Labels and charectristics of each ROI/Core
 11 | %           filtering: string, based on following filters, i.e. 'basic'
 12 | %           value: default-0 for basic, nofilter,
 13 | %                  mrbased-1(low),2(medium),3(high)
 14 | %                  gsbased-0,6,71(GS 3+4),72(GS 4+3),8,9
 15 | %                  binary: 0 Benign, 1 Cancerous
 16 | %
 17 | %  Patient Info:
 18 | %  Column 1: Label
 19 | %  Column 2: Tumor in Core length
 20 | %  Column 3: MR Label
 21 | %  Column 4: Axial and Sagittal Match (1: Match 0: Mismatch)
 22 | %  Column 5: Gleason Score
 23 | %  Column 6: Distance to Boundary (mm)
 24 | %  Column 7: MR greatest size
 25 | %  Column 8: Sagittal GS
 26 | %  Column 9: Sample number
 27 | %  Column 10: ROI number/ Column 10: Sample Size
 28 | 
 29 | %   @ Code composed by Shekoofeh Azizi on 24/11/2015 (UBC-RCL)
 30 | %   @ Code modified by Shekoofeh Azizi on 01/06/2017 (UBC-RCL)
 31 | 
 32 | %%
 33 | if nargin < 3
 34 |     assert(~strcmp(filtering,'D2Bmrbased'),'Invalid value for MR grade!');
 35 |     value = 0;
 36 | end
 37 | 
 38 | % set filtering conditions
 39 | switch filtering
 40 |     case 'nofilter'
 41 |         %         filter_ROI = 1:length(Y_ROI) ;
 42 |         filter_Core = 1:length(Y_Core) ;
 43 |     case 'D2'
 44 |         %         filter_ROI = 1:length(Y_ROI) ;
 45 |         filter_Core = 1:length(Y_Core) ;
 46 |     case 'binary'
 47 |         %         filter_ROI = find(Y_ROI(:,1)== value);
 48 |         filter_Core = find(Y_Core(:,1)== value);
 49 |     case 'D2B'
 50 |         % less than 3mm and mismatch
 51 |         %         filter_ROI = find(Y_ROI(:,8) >= 2.99 & Y_ROI(:,6)== 1 ) ;
 52 |         filter_Core = find(Y_Core(:,6) >= 2.99 & Y_Core(:,4)== 1 );
 53 |     case 'D3B'
 54 |         % less than 3mm and mismatch
 55 |         %         filter_ROI = find(Y_ROI(:,8) >= 2.99 & Y_ROI(:,6)== 0 ) ;
 56 |         filter_Core = find(Y_Core(:,6) >= 2.99 & Y_Core(:,4)== 0 );
 57 |     case 'D2A'
 58 |         % less than 3mm and mismatch
 59 |         %         filter_ROI = find(Y_ROI(:,8) >= 2.99 ) ;
 60 |         filter_Core = find(Y_Core(:,6) >= 2.99 );
 61 |     case 'D3'
 62 |         % Just less than 3mm
 63 |         %         filter_ROI = find(Y_ROI(:,8) < 3.00 ) ;
 64 |         filter_Core = find(Y_Core(:,6) < 3.00 );
 65 |     case 'D2C'
 66 |         % Include match cores
 67 |         %         filter_ROI = find(Y_ROI(:,6)== 1) ;
 68 |         filter_Core = find(Y_Core(:,4)== 1);
 69 |     case 'D2Cmrlen'
 70 |         % Include match cores
 71 |         %         filter_ROI = find(Y_ROI(:,6)== 1 & Y_ROI(:,13) >= 2) ;
 72 |         filter_Core = find(Y_Core(:,4)== 1 & Y_Core(:,13) >= 2);
 73 |     case 'D2M'
 74 |         % Include just mis-match cores
 75 |         %         filter_ROI = find(Y_ROI(:,6)== 0) ;
 76 |         filter_Core = find(Y_Core(:,4)== 0);
 77 |     case 'D2mrbased'
 78 |         % less than 3mm and mismatch + MR level filtering
 79 |         %         filter_ROI = find(Y_ROI(:,5)== value) ;
 80 |         filter_Core = find(Y_Core(:,3)==value);
 81 |     case 'D2Bmrbased'
 82 |         % less than 3mm and mismatch + MR level filtering
 83 |         %         filter_ROI = find(Y_ROI(:,8) >= 2.99 & Y_ROI(:,6)== 1 & Y_ROI(:,5)== value) ;
 84 |         filter_Core = find(Y_Core(:,6) >= 2.99 & Y_Core(:,4)== 1 & Y_Core(:,3)==value);
 85 |     case 'D2Amrbased'
 86 |         % less than 3mm and mismatch + MR level filtering
 87 |         %         filter_ROI = find(Y_ROI(:,8) >= 2.99 & Y_ROI(:,5)== value) ;
 88 |         filter_Core = find(Y_Core(:,6) >= 2.99 & Y_Core(:,3)==value);
 89 |     case 'D2Cmrbased'
 90 |         % less than 3mm and mismatch + MR level filtering
 91 |         %         filter_ROI = find(Y_ROI(:,6)== 1 & Y_ROI(:,5)== value) ;
 92 |         filter_Core = find(Y_Core(:,4)== 1 & Y_Core(:,3)==value);
 93 |     case 'gsbased'
 94 |         % less than 3mm and mismatch + Gleason filtering
 95 |         %         filter_ROI = find(Y_ROI(:,8) >= 2.99 & Y_ROI(:,6)== 1 & Y_ROI(:,7)== value) ;
 96 |         filter_Core = find(Y_Core(:,6) >= 2.99 & Y_Core(:,4)== 1 & Y_Core(:,5)== value);
 97 |     case 'D2Cgsbased'
 98 |         % less than 3mm and mismatch + Gleason filtering
 99 |         %         filter_ROI = find(Y_ROI(:,8) >= 2.99 & Y_ROI(:,6)== 1 & Y_ROI(:,7)== value) ;
100 |         filter_Core = find(Y_Core(:,4)== 1 & Y_Core(:,5)== value);
101 |     case 'Lenbased'
102 |         filter_Core = find(Y_Core(:,4) >= value & Y_Core(:,4)== 1);
103 | end
104 | 
105 | end


--------------------------------------------------------------------------------
/utils/data_loader.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from utils import settings as S
  3 | import scipy.io as spio
  4 | from numpy.random import permutation
  5 | from sklearn.model_selection import train_test_split
  6 | import h5py
  7 | 
  8 | 
  9 | class DataLoader:
 10 |     def __init__(self, sl, validation_split=0.2, downsample=False, downsample_rate=2):
 11 |         self.sl = sl
 12 |         self.data_dir = S.intermediate_folder
 13 |         self.validation_split = validation_split
 14 |         self.downsample = downsample
 15 |         self.downsample_rate = downsample_rate
 16 | 
 17 |     @staticmethod
 18 |     def reshaper(data):
 19 |         # reshape input to be [samples, time steps, features]
 20 |         reshaped_data = np.reshape(data, (data.shape[0], data.shape[1], 1))
 21 |         return reshaped_data
 22 | 
 23 |     def load_train_test(self, bmode=False):
 24 |         d_sliding = spio.loadmat(self.data_dir + '/' + 'D_Sliding.mat')
 25 |         d_fixed = spio.loadmat(self.data_dir + '/' + 'D_Fixed.mat')
 26 | 
 27 |         # Matrix reformatting to access the cells
 28 |         d_train = d_sliding['Ds_train'][0:, 0:self.sl]  # B-mode+RF Train
 29 |         l_train = d_sliding['Ls_train'][0:, 0]  # B-mode+RF Train Labels
 30 |         d_test = d_fixed['Df_test'][0:, 0:self.sl]  # B-mode+RF Test
 31 |         l_test = d_fixed['Lf_test'][0:, 0]  # b-mode+RF Test Labels
 32 | 
 33 |         # Select the first half including only Bmode data [Bmode Data, RF Data]
 34 |         d_train_r = d_train[d_train.shape[0] / 2:, 0:self.sl]  # RF Train
 35 |         l_train_r = l_train[l_train.shape[0] / 2:, ]  # RF Train Labels
 36 |         d_test_r = d_test[d_test.shape[0] / 2:, 0:self.sl]  # RF Test
 37 |         l_test_r = l_test[l_test.shape[0] / 2:, ]  # RF Test Labels
 38 |         train_seq = d_train_r
 39 |         test_seq = d_test_r
 40 |         train_label = l_train_r
 41 |         test_label = l_test_r
 42 | 
 43 |         if bmode:
 44 |             d_train_b = d_train[0:d_train.shape[0] / 2, 0:self.sl]  # B-mode Train
 45 |             l_train_b = l_train[0:l_train.shape[0] / 2, ]  # B-mode Train Labels
 46 |             d_test_b = d_test[0:d_test.shape[0] / 2, 0:self.sl]  # B-mode Test
 47 |             l_test_b = l_test[0:l_test.shape[0] / 2, ]  # B-mode Test Labels
 48 |             train_seq = d_train_b
 49 |             test_seq = d_test_b
 50 |             train_label = l_train_b
 51 |             test_label = l_test_b
 52 | 
 53 |         if self.downsample:
 54 |             idx = np.floor(np.linspace(start=0, stop=self.sl-1, num=(self.sl/self.downsample_rate)))
 55 |             train_seq = train_seq[:, idx.astype(int)]
 56 |             test_seq = test_seq[:, idx.astype(int)]
 57 | 
 58 |         train_seq = self.reshaper(train_seq)
 59 |         test_seq = self.reshaper(test_seq)
 60 | 
 61 |         return train_seq, train_label, test_seq, test_label
 62 | 
 63 |     def load_data(self, bmode=False):
 64 |         train_data, train_label, test_seq, test_label = self.load_train_test(bmode)
 65 |         train_seq, train_label, validation_seq, validation_label = self.split_data(train_data, train_label)
 66 |         return train_seq, train_label, validation_seq, validation_label, test_seq, test_label
 67 | 
 68 |     def split_data(self, train_data, train_label):
 69 | 
 70 |         perm_idx = permutation(train_data.shape[0])
 71 |         train_data_perm = train_data[perm_idx, :]
 72 |         train_label_perm = train_label[perm_idx, ]
 73 | 
 74 |         validation_idx = int(round(self.validation_split * train_data.shape[0],0))
 75 | 
 76 |         train_seq = train_data_perm[validation_idx:, :]
 77 |         train_label = train_label_perm[validation_idx:, ]
 78 |         validation_seq = train_data_perm[0:validation_idx, :]
 79 |         validation_label = train_label_perm[0:validation_idx, ]
 80 | 
 81 |         return train_seq, train_label, validation_seq, validation_label
 82 | 
 83 |     def load_test(self, bmode=False):
 84 |         _, _, test_seq, test_label = self.load_train_test(bmode)
 85 |         return test_seq, test_label
 86 | 
 87 |     def load_data_split(self, bmode=False):   # shuffle and do the division based on the split size
 88 |         train_data, train_label, test_seq, test_label = self.load_train_test(bmode)
 89 |         data_seq = np.concatenate([train_data, test_seq])
 90 |         data_label = np.concatenate([train_label, test_label])
 91 |         train_data, test_seq, train_label, test_label = train_test_split(data_seq, data_label, test_size=0.2,
 92 |                                                                          random_state=40)
 93 |         if bmode:
 94 |             train_data = np.concatenate([train_data, test_seq])
 95 |             train_label = np.concatenate([train_label, test_label])
 96 | 
 97 |         train_seq, train_label, validation_seq, validation_label = self.split_data(train_data, train_label)
 98 |         return train_seq, train_label, validation_seq, validation_label, test_seq, test_label
 99 | 
100 |     def load_whole_test(self, bmode=False):
101 |         if bmode:
102 |             d_test = h5py.File(self.data_dir + '/' + 'D_Whole_Bmode.mat')
103 |             d_test = d_test['Dw_bmode'].value
104 |         else:
105 |             d_test = h5py.File(self.data_dir + '/' + 'D_Whole_RF.mat')
106 |             d_test = d_test['Dw_rf'].value
107 | 
108 |         test_seq = d_test[0:, 0:self.sl]
109 |         test_seq = self.reshaper(test_seq)
110 | 
111 |         return test_seq
112 | 
113 | 


--------------------------------------------------------------------------------
/experiments/trainmodel.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import time
  3 | # import os
  4 | from pprint import pprint as p
  5 | module_root = '..'
  6 | sys.path.append(module_root)
  7 | p(sys.path)
  8 | # os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'
  9 | # os.environ["CUDA_VISIBLE_DEVICES"] = "1"
 10 | 
 11 | from utils.data_loader import DataLoader
 12 | from deepNetworks.model import RNNModel
 13 | from deepNetworks.netArch import DeepNetArch1, DeepNetArch2, DeepNetArch3, DeepNetArch1L1, DeepNetArch2L1, DeepNetArch3L1
 14 | from keras import backend as k
 15 | # import tensorflow as tf
 16 | 
 17 | # config = tf.ConfigProto(allow_soft_placement=True)
 18 | # config.gpu_options.allow_growth = True
 19 | # session = tf.Session(config=config)
 20 | # k.set_session(session)
 21 | 
 22 | if __name__ == '__main__':
 23 |     log_dir = 'DeepNetArch3'
 24 |     early_stopping = False
 25 |     sl = 100
 26 |     validation_split = 0.2
 27 |     n_epoch = 100
 28 |     batch_sizes = [64, 128]
 29 |     initial_lrs = [1e-2, 1e-4]
 30 |     l2_regulizers = [0.0001, 0.0002]
 31 |     dropouts = [0, 0.4]
 32 |     rec_dropouts = [0]
 33 |     optimizers = ['sgd', 'rmsprop', 'adam']
 34 | 
 35 |     grid_size = len(batch_sizes) * len(initial_lrs) * len(l2_regulizers) * len(dropouts) * len(rec_dropouts) * len(
 36 |         optimizers)
 37 |     i = 1
 38 |     model_number = 40
 39 |     for batch_size in batch_sizes:
 40 |         for initial_lr in initial_lrs:
 41 |             for l2_regulizer in l2_regulizers:
 42 |                 for dropout in dropouts:
 43 |                     for rec_dropout in rec_dropouts:
 44 |                         for optimizer in optimizers:
 45 | 
 46 |                             if i < model_number:
 47 |                                 i += 1
 48 |                                 continue
 49 | 
 50 |                             print('-' * 50)
 51 |                             print('-' * 50)
 52 |                             print('-' * 50)
 53 |                             print('batchsize:{}, initial_lr:{}, l2_regulizer:{}, dropout:{}, rec_dropout:{},'
 54 |                                   ' optimizer:{} '.format(
 55 |                                     batch_size, initial_lr, l2_regulizer, dropout, rec_dropout, optimizer))
 56 |                             print("experiment {} of total {}".format(i, grid_size))
 57 |                             ds = DataLoader(sl=sl, validation_split=validation_split)
 58 | 
 59 |                             if log_dir == 'DeepNetArch1':
 60 |                                 arch = DeepNetArch1(sl=sl, initial_lr=initial_lr, l2_reg=l2_regulizer, dropout=dropout,
 61 |                                                     rec_dropout=rec_dropout, optimizer=optimizer, summary=1)
 62 |                             if log_dir == 'DeepNetArch2':
 63 |                                 arch = DeepNetArch2(sl=sl, initial_lr=initial_lr, l2_reg=l2_regulizer, dropout=dropout,
 64 |                                                     rec_dropout=rec_dropout, optimizer=optimizer, summary=1)
 65 |                             if log_dir == 'DeepNetArch3':
 66 |                                 arch = DeepNetArch3(sl=sl, initial_lr=initial_lr, l2_reg=l2_regulizer, dropout=dropout,
 67 |                                                     rec_dropout=rec_dropout, optimizer=optimizer, summary=1)
 68 |                             if log_dir == 'DeepNetArch1L1':
 69 |                                 arch = DeepNetArch1L1(sl=sl, initial_lr=initial_lr, l2_reg=l2_regulizer,
 70 |                                                       dropout=dropout, rec_dropout=rec_dropout, optimizer=optimizer,
 71 |                                                       summary=1)
 72 |                             if log_dir == 'DeepNetArch2L1':
 73 |                                 arch = DeepNetArch2L1(sl=sl, initial_lr=initial_lr, l2_reg=l2_regulizer,
 74 |                                                       dropout=dropout, rec_dropout=rec_dropout, optimizer=optimizer,
 75 |                                                       summary=1)
 76 |                             if log_dir == 'DeepNetArch3L1':
 77 |                                 arch = DeepNetArch3L1(sl=sl, initial_lr=initial_lr, l2_reg=l2_regulizer,
 78 |                                                       dropout=dropout, rec_dropout=rec_dropout, optimizer=optimizer,
 79 |                                                       summary=1)
 80 | 
 81 |                             model, model_id = arch.arch_generator()
 82 | 
 83 |                             params = dict()
 84 |                             params['batch_size'] = batch_size
 85 |                             params['initial_lr'] = initial_lr
 86 |                             params['l2_regulizer'] = l2_regulizer
 87 |                             params['dropout'] = dropout
 88 |                             params['rec_dropout'] = rec_dropout
 89 |                             params['n_epoch'] = n_epoch
 90 |                             params['sl'] = sl
 91 |                             params['optimizer'] = optimizer
 92 | 
 93 |                             rnn_model = RNNModel(ds, model, params, log_dir=log_dir, division=True)
 94 |                             uid = time.strftime("%Y_%m_%d_%H_%M_%S_") + model_id
 95 |                             print('-' * 50)
 96 |                             print('UID: {}'.format(uid))
 97 |                             print('-' * 50)
 98 | 
 99 |                             rnn_model.train(uid=uid, batch_size=batch_size, es=early_stopping, nb_epoch=n_epoch,
100 |                                             verbose=2)
101 |                             k.clear_session()
102 |                             i += 1
103 | 


--------------------------------------------------------------------------------
/deepNetworks/netArch.py:
--------------------------------------------------------------------------------
  1 | from keras.layers import LSTM, GRU, SimpleRNN, Dense
  2 | from keras.models import Sequential
  3 | from keras.regularizers import l2
  4 | 
  5 | 
  6 | class DeepNetArch1:  # 2 Layers LSTM + Dense
  7 |     def __init__(self, sl, initial_lr, l2_reg, dropout, rec_dropout, optimizer, summary):
  8 |         self.sl = sl
  9 |         self.summary = summary
 10 |         self.l2_reg = l2(l2_reg)
 11 |         self.dropout = dropout
 12 |         self.rec_dropout = rec_dropout
 13 |         self.initial_lr = initial_lr
 14 |         self.optimizer = optimizer
 15 | 
 16 |     def arch_generator(self):
 17 |         model_name = "arch1"
 18 |         model = Sequential()
 19 |         model.add(LSTM(units=self.sl, return_sequences=True, dropout=self.dropout, recurrent_dropout=self.rec_dropout,
 20 |                        input_shape=(self.sl, 1), stateful=False))
 21 |         model.add(LSTM(units=self.sl, dropout=self.dropout, recurrent_dropout=self.rec_dropout, return_sequences=False))
 22 |         model.add(Dense(1, activation="sigmoid", kernel_initializer="he_normal", kernel_regularizer=self.l2_reg))
 23 |         model.compile(loss='binary_crossentropy', optimizer=self.optimizer, metrics=['accuracy'])
 24 |         if self.summary:
 25 |             print(model.summary())
 26 |         return model, model_name
 27 | 
 28 | 
 29 | class DeepNetArch2:  # 2 Layers GRU + Dense
 30 |     def __init__(self, sl, initial_lr, l2_reg, dropout, rec_dropout, optimizer, summary):
 31 |         self.sl = sl
 32 |         self.summary = summary
 33 |         self.l2_reg = l2(l2_reg)
 34 |         self.dropout = dropout
 35 |         self.rec_dropout = rec_dropout
 36 |         self.initial_lr = initial_lr
 37 |         self.optimizer = optimizer
 38 | 
 39 |     def arch_generator(self):
 40 |         model_name = "arch2"
 41 |         model = Sequential()
 42 |         model.add(GRU(units=self.sl, return_sequences=True, dropout=self.dropout, recurrent_dropout=self.rec_dropout,
 43 |                       input_shape=(self.sl, 1), stateful=False))
 44 |         model.add(GRU(units=self.sl, dropout=self.dropout, recurrent_dropout=self.rec_dropout, return_sequences=False))
 45 |         model.add(Dense(1, activation="sigmoid", kernel_initializer="he_normal", kernel_regularizer=self.l2_reg))
 46 |         model.compile(loss='binary_crossentropy', optimizer=self.optimizer, metrics=['accuracy'])
 47 |         if self.summary:
 48 |             print(model.summary())
 49 |         return model, model_name
 50 | 
 51 | 
 52 | class DeepNetArch3:  # 2 Layers RNN + Dense
 53 |     def __init__(self, sl, initial_lr, l2_reg, dropout, rec_dropout, optimizer, summary):
 54 |         self.sl = sl
 55 |         self.summary = summary
 56 |         self.l2_reg = l2(l2_reg)
 57 |         self.dropout = dropout
 58 |         self.rec_dropout = rec_dropout
 59 |         self.initial_lr = initial_lr
 60 |         self.optimizer = optimizer
 61 | 
 62 |     def arch_generator(self):
 63 |         model_name = "arch3"
 64 |         model = Sequential()
 65 |         model.add(SimpleRNN(units=self.sl, return_sequences=True, dropout=self.dropout,
 66 |                             recurrent_dropout=self.rec_dropout,
 67 |                             input_shape=(self.sl, 1), stateful=False))
 68 |         model.add(SimpleRNN(units=self.sl, dropout=self.dropout, recurrent_dropout=self.rec_dropout, return_sequences=False))
 69 |         model.add(Dense(1, activation="sigmoid", kernel_initializer="he_normal", kernel_regularizer=self.l2_reg))
 70 |         model.compile(loss='binary_crossentropy', optimizer=self.optimizer, metrics=['accuracy'])
 71 |         if self.summary:
 72 |             print(model.summary())
 73 |         return model, model_name
 74 | 
 75 | 
 76 | class DeepNetArch1L1:  # 1 Layers LSTM + Dense
 77 |     def __init__(self, sl, initial_lr, l2_reg, dropout, rec_dropout, optimizer, summary):
 78 |         self.sl = sl
 79 |         self.summary = summary
 80 |         self.l2_reg = l2(l2_reg)
 81 |         self.dropout = dropout
 82 |         self.rec_dropout = rec_dropout
 83 |         self.initial_lr = initial_lr
 84 |         self.optimizer = optimizer
 85 | 
 86 |     def arch_generator(self):
 87 |         model_name = "arch1l1"
 88 |         model = Sequential()
 89 |         model.add(LSTM(units=self.sl, return_sequences=False, dropout=self.dropout, recurrent_dropout=self.rec_dropout,
 90 |                        input_shape=(self.sl, 1), stateful=False))
 91 |         model.add(Dense(1, activation="sigmoid", kernel_initializer="he_normal", kernel_regularizer=self.l2_reg))
 92 |         model.compile(loss='binary_crossentropy', optimizer=self.optimizer, metrics=['accuracy'])
 93 |         if self.summary:
 94 |             print(model.summary())
 95 |         return model, model_name
 96 | 
 97 | 
 98 | class DeepNetArch2L1:  # 1 Layers GRU + Dense
 99 |     def __init__(self, sl, initial_lr, l2_reg, dropout, rec_dropout, optimizer, summary):
100 |         self.sl = sl
101 |         self.summary = summary
102 |         self.l2_reg = l2(l2_reg)
103 |         self.dropout = dropout
104 |         self.rec_dropout = rec_dropout
105 |         self.initial_lr = initial_lr
106 |         self.optimizer = optimizer
107 | 
108 |     def arch_generator(self):
109 |         model_name = "arch2l1"
110 |         model = Sequential()
111 |         model.add(GRU(units=self.sl, return_sequences=False, dropout=self.dropout, recurrent_dropout=self.rec_dropout,
112 |                       input_shape=(self.sl, 1), stateful=False))
113 |         model.add(Dense(1, activation="sigmoid", kernel_initializer="he_normal", kernel_regularizer=self.l2_reg))
114 |         model.compile(loss='binary_crossentropy', optimizer=self.optimizer, metrics=['accuracy'])
115 |         if self.summary:
116 |             print(model.summary())
117 |         return model, model_name
118 | 
119 | 
120 | class DeepNetArch3L1:  # 1 Layers RNN + Dense
121 |     def __init__(self, sl, initial_lr, l2_reg, dropout, rec_dropout, optimizer, summary):
122 |         self.sl = sl
123 |         self.summary = summary
124 |         self.l2_reg = l2(l2_reg)
125 |         self.dropout = dropout
126 |         self.rec_dropout = rec_dropout
127 |         self.initial_lr = initial_lr
128 |         self.optimizer = optimizer
129 | 
130 |     def arch_generator(self):
131 |         model_name = "arch3l1"
132 |         model = Sequential()
133 |         model.add(SimpleRNN(units=self.sl, return_sequences=False, dropout=self.dropout,
134 |                             recurrent_dropout=self.rec_dropout,
135 |                             input_shape=(self.sl, 1), stateful=False))
136 |         model.add(Dense(1, activation="sigmoid", kernel_initializer="he_normal", kernel_regularizer=self.l2_reg))
137 |         model.compile(loss='binary_crossentropy', optimizer=self.optimizer, metrics=['accuracy'])
138 |         if self.summary:
139 |             print(model.summary())
140 |         return model, model_name
141 | 
142 | 


--------------------------------------------------------------------------------
/deepNetworks/model.py:
--------------------------------------------------------------------------------
  1 | from keras.callbacks import CSVLogger, Callback, ReduceLROnPlateau, EarlyStopping, ModelCheckpoint
  2 | 
  3 | module_root = '..'
  4 | import sys
  5 | import os
  6 | from utils import settings as s
  7 | import json
  8 | import csv
  9 | import numpy as np
 10 | # import h5py
 11 | 
 12 | sys.path.append(module_root)
 13 | from sklearn.metrics import roc_auc_score
 14 | 
 15 | 
 16 | class AUCHistory(Callback):
 17 |     def __init__(self, validation_data):
 18 |         self.validation_d = validation_data[0]
 19 |         self.validation_l = validation_data[1]
 20 | 
 21 |     def on_epoch_end(self, epoch, logs={}):
 22 |         y_pred = self.model.predict(self.validation_d)
 23 |         # print(np.concatenate((y_pred, self.validation_l), axis=1))
 24 |         logs['val_auc'] = roc_auc_score(self.validation_l, y_pred)
 25 |         print("- AUC: {0:0.2f}".format(logs['val_auc']))
 26 | 
 27 | 
 28 | class RNNModel:
 29 |     def __init__(self, data, model, params, log_dir, division=False, bmode=False):
 30 |         self.ds = data
 31 |         self.model = model
 32 |         self.log_dir = log_dir
 33 |         self.training_params_dict = params
 34 |         self.type = type
 35 |         self.bmode = bmode
 36 |         if not division:
 37 |             self.train_seq, self.train_label, self.validation_seq, self.validation_label, self.test_data,\
 38 |                 self.test_label = self.ds.load_data(bmode=self.bmode)
 39 |         if division:
 40 |             self.train_seq, self.train_label, self.validation_seq, self.validation_label, self.test_data,\
 41 |                 self.test_label = self.ds.load_data_split(bmode=self.bmode)
 42 | 
 43 |     def train(self, uid, batch_size, es, nb_epoch, verbose):
 44 |         print('-' * 30)
 45 |         print('Fitting model...')
 46 |         print('-' * 30)
 47 |         callbacks_list = []
 48 |         logs_dir = os.path.join(s.intermediate_folder, 'logs', self.log_dir)
 49 |         if not os.path.isdir(logs_dir):
 50 |             os.mkdir(logs_dir)
 51 | 
 52 |         model_json = self.model.to_json()
 53 |         model_log_dir = os.path.join(logs_dir, 'model_logs')
 54 |         if not os.path.isdir(model_log_dir):
 55 |             os.mkdir(model_log_dir)
 56 | 
 57 |         with open(os.path.join(model_log_dir, uid + '.json'), 'w') as outfile:
 58 |             json.dump(model_json, outfile)
 59 | 
 60 |         train_log_dir = os.path.join(logs_dir, 'train_logs')
 61 |         if not os.path.isdir(train_log_dir):
 62 |             os.mkdir(train_log_dir)
 63 | 
 64 |         with open(os.path.join(train_log_dir, uid + '.csv'), 'w') as csv_file:
 65 |             writer = csv.writer(csv_file)
 66 |             for key, value in self.training_params_dict.items():
 67 |                 writer.writerow([key, value])
 68 | 
 69 |         validation_data = (self.validation_seq, self.validation_label[:])
 70 | 
 71 |         reduce_lr = ReduceLROnPlateau(monitor='val_acc', factor=0.9,
 72 |                                       patience=15, min_lr=1e-9,
 73 |                                       epsilon=0.001, verbose=1)
 74 | 
 75 |         model_checkpoint_dir = os.path.join(s.intermediate_folder, 'model_checkpoints')
 76 |         if not os.path.exists(model_checkpoint_dir):
 77 |             os.mkdir(model_checkpoint_dir)
 78 | 
 79 |         model_checkpoint = ModelCheckpoint(os.path.join(model_checkpoint_dir, uid + '.hdf5'),
 80 |                                            monitor='val_acc', save_best_only=True)
 81 |         callbacks_list.append(model_checkpoint)
 82 |         history = AUCHistory(validation_data)
 83 |         callbacks_list.append(history)
 84 |         callbacks_list.append(reduce_lr)
 85 |         if es:
 86 |             es = EarlyStopping(monitor='val_loss', min_delta=1e-4, patience=2, verbose=1)
 87 |             callbacks_list.append(es)
 88 | 
 89 |         csv_log_dir = os.path.join(logs_dir, 'csv_logs')
 90 |         if not os.path.isdir(csv_log_dir):
 91 |             os.mkdir(csv_log_dir)
 92 |         csv_logger = CSVLogger(os.path.join(csv_log_dir, uid + '.log'))
 93 |         callbacks_list.append(csv_logger)
 94 | 
 95 |         callbacks_list.append(reduce_lr)
 96 | 
 97 |         self.model.fit(self.train_seq, self.train_label, batch_size=batch_size, epochs=nb_epoch, verbose=verbose,
 98 |                        shuffle=True, callbacks=callbacks_list, validation_data=validation_data)
 99 | 
100 |     def predict_test(self, uid):
101 |         test_seq, test_labels = self.ds.load_test(bmode=self.bmode)
102 |         model_checkpoint_dir = os.path.join(s.intermediate_folder, 'model_checkpoints/opt')
103 |         model_checkpoint_file = os.path.join(model_checkpoint_dir, uid + '.hdf5')
104 |         self.model.load_weights(model_checkpoint_file)
105 |         test_predictions = self.model.predict(test_seq, verbose=1)
106 |         test_auc = roc_auc_score(test_labels, test_predictions)
107 |         print(["Test AUC: ", test_auc])
108 |         return test_predictions
109 | 
110 |     def opt_model_train(self, uid, batch_size, es, nb_epoch, verbose):
111 | 
112 |         # Re-define local train (train + validation) and test sequence
113 |         # train_seq, train_label, test_seq, test_label = self.ds.load_train_test(bmode=self.bmode)
114 |         train_seq = np.concatenate((self.train_seq, self.validation_seq))
115 |         train_label = np.concatenate((self.train_label, self.validation_label))
116 |         test_seq = self.test_data
117 |         test_label = self.test_label
118 |         print('-' * 30)
119 |         print('Fitting optimum model ...')
120 |         print('-' * 30)
121 |         callbacks_list = []
122 |         logs_dir = os.path.join(s.intermediate_folder, 'logs', self.log_dir)
123 |         if not os.path.isdir(logs_dir):
124 |             os.mkdir(logs_dir)
125 | 
126 |         model_json = self.model.to_json()
127 |         model_log_dir = os.path.join(logs_dir, 'model_logs')
128 |         if not os.path.isdir(model_log_dir):
129 |             os.mkdir(model_log_dir)
130 | 
131 |         with open(os.path.join(model_log_dir, uid + '.json'), 'w') as outfile:
132 |             json.dump(model_json, outfile)
133 | 
134 |         train_log_dir = os.path.join(logs_dir, 'train_logs')
135 |         if not os.path.isdir(train_log_dir):
136 |             os.mkdir(train_log_dir)
137 | 
138 |         with open(os.path.join(train_log_dir, uid + '.csv'), 'w') as csv_file:
139 |             writer = csv.writer(csv_file)
140 |             for key, value in self.training_params_dict.items():
141 |                 writer.writerow([key, value])
142 | 
143 |         test_data = (test_seq, test_label[:])
144 | 
145 |         reduce_lr = ReduceLROnPlateau(monitor='val_acc', factor=0.9,
146 |                                       patience=15, min_lr=1e-9,
147 |                                       epsilon=0.001, verbose=1)
148 | 
149 |         model_checkpoint_dir = os.path.join(s.intermediate_folder, 'model_checkpoints/opt')
150 |         if not os.path.exists(model_checkpoint_dir):
151 |             os.mkdir(model_checkpoint_dir)
152 | 
153 |         model_checkpoint = ModelCheckpoint(os.path.join(model_checkpoint_dir, uid + '.hdf5'),
154 |                                            monitor='val_acc', save_best_only=True)
155 |         callbacks_list.append(model_checkpoint)
156 |         history = AUCHistory(test_data)
157 |         callbacks_list.append(history)
158 |         callbacks_list.append(reduce_lr)
159 | 
160 |         if es:
161 |             es = EarlyStopping(monitor='val_loss', min_delta=1e-4, patience=2, verbose=1)
162 |             callbacks_list.append(es)
163 | 
164 |         csv_log_dir = os.path.join(logs_dir, 'csv_logs')
165 |         if not os.path.isdir(csv_log_dir):
166 |             os.mkdir(csv_log_dir)
167 |         csv_logger = CSVLogger(os.path.join(csv_log_dir, uid + '.log'))
168 |         callbacks_list.append(csv_logger)
169 | 
170 |         callbacks_list.append(reduce_lr)
171 | 
172 |         self.model.fit(train_seq, train_label, batch_size=batch_size, epochs=nb_epoch, verbose=verbose,
173 |                        shuffle=True, callbacks=callbacks_list, validation_data=test_data)
174 | 


--------------------------------------------------------------------------------
/utils/history.py:
--------------------------------------------------------------------------------
  1 | import glob
  2 | import json
  3 | import sys
  4 | import os
  5 | import numpy as np
  6 | import pandas as pd
  7 | import matplotlib.pyplot as plt
  8 | from IPython.display import display, SVG
  9 | from keras.models import model_from_json
 10 | from keras.utils.vis_utils import model_to_dot
 11 | 
 12 | module_root = '../..'
 13 | sys.path.append(module_root)
 14 | from utils import settings as s
 15 | 
 16 | 
 17 | class History:
 18 |     def __init__(self, logs_dir):
 19 |         self.logs_folder = os.path.join(s.intermediate_folder, 'logs', logs_dir)
 20 |         self.csv_logs_folder = os.path.join(self.logs_folder, 'csv_logs')
 21 |         self.model_logs_folder = os.path.join(self.logs_folder, 'model_logs')
 22 |         self.train_logs_folder = os.path.join(self.logs_folder, 'train_logs')
 23 | 
 24 |     def plot_results(self, train, validation, params, model_visualization, loss, acc, auc, min_acc):
 25 |         csv_name_sorted = sorted(glob.glob(self.csv_logs_folder + '/*.log'))
 26 |         csv_train_sorted = sorted(glob.glob(self.train_logs_folder + '/*.csv'))
 27 |         uid_sample = os.path.split(csv_name_sorted[1])[1].split('.')[0]
 28 |         final = pd.DataFrame(
 29 |             index=pd.read_csv(list(filter(lambda x: uid_sample in x, csv_train_sorted))[0], header=None)[0].tolist())
 30 |         final.index.name = None
 31 |         plt.figure(figsize=(8, 6))
 32 |         for csv_file in csv_name_sorted:
 33 |             if os.path.getsize(csv_file) > 0:
 34 |                 data = pd.read_csv(csv_file)
 35 |                 uid = os.path.split(csv_file)[1].split('.')[0]
 36 | 
 37 |                 if np.amax(data['val_acc']) > min_acc:
 38 |                     if validation:
 39 |                         if loss:
 40 |                             plt.plot(data['epoch'], data['val_loss'], label=uid + ' Val Loss')
 41 |                         if acc:
 42 |                             plt.plot(data['epoch'], data['val_acc'], label=uid + ' Val Acc')
 43 |                         if auc:
 44 |                             plt.plot(data['epoch'], data['val_auc'], label=uid + ' Val AUC')
 45 |                     if train:
 46 |                         if loss:
 47 |                             plt.plot(data['epoch'], data['loss'], label=uid + ' Train Loss')
 48 |                         if acc:
 49 |                             plt.plot(data['epoch'], data['acc'], label=uid + ' Train Acc')
 50 | 
 51 |                     if params:
 52 |                         print(uid)
 53 |                         train_csv = pd.read_csv(list(filter(lambda x: uid in x, csv_train_sorted))[0], header=None)
 54 |                         train_csv.columns = ['parameters', uid[8:25]]
 55 |                         train_csv.set_index('parameters', inplace=True)
 56 |                         final = final.join(train_csv)
 57 |                         print("*" * 100)
 58 |                         print("*" * 100)
 59 |                     if model_visualization:
 60 |                         model_log = glob.glob(self.model_logs_folder + '/' + uid + '*.json')[0]
 61 |                         with open(model_log) as model_file:
 62 |                             json_string = json.load(model_file)
 63 |                         model = model_from_json(json_string)
 64 |                         print(uid)
 65 |                         #                     print(model.summary())
 66 |                         print("*" * 100)
 67 |                         dot = model_to_dot(model).create(prog='dot', format='svg')
 68 |                         return SVG(dot)
 69 |         plt.ylabel('Accuracy')
 70 |         plt.ylim([0, 1])
 71 |         plt.xlabel('Epoch')
 72 |         plt.legend()
 73 |         # plt.close()
 74 |         plt.savefig(self.logs_folder + '/out.pdf', transparent=True)
 75 |         if params:
 76 |             display(final.drop(['data_ID', 'data_id'], axis=0))
 77 |         return plt
 78 | 
 79 |     def find_opt_model(self, loss, acc, auc):
 80 |         csv_name_sorted = sorted(glob.glob(self.csv_logs_folder + '/*.log'))
 81 |         csv_train_sorted = sorted(glob.glob(self.train_logs_folder + '/*.csv'))
 82 |         uid_sample = os.path.split(csv_name_sorted[1])[1].split('.')[0]
 83 |         final = pd.DataFrame(
 84 |             index=pd.read_csv(list(filter(lambda x: uid_sample in x, csv_train_sorted))[0], header=None)[0].tolist())
 85 |         final.index.name = None
 86 |         validation_results = []
 87 |         train_uids = []
 88 |         for csv_file in csv_name_sorted:
 89 |             if os.path.getsize(csv_file) > 0:
 90 |                 data = pd.read_csv(csv_file)
 91 |                 uid = os.path.split(csv_file)[1].split('.')[0]
 92 | 
 93 |                 train_uids.append(uid)
 94 |                 if auc:
 95 |                     validation_results.append(np.amax(data['val_auc']))
 96 |                 if loss:
 97 |                     validation_results.append(np.amin(data['val_loss']))
 98 |                 if acc:
 99 |                     validation_results.append(np.amax(data['val_acc']))
100 |         if loss:
101 |             opt_model_uid = train_uids[validation_results.index(min(validation_results))]
102 |         else:
103 |             opt_model_uid = train_uids[validation_results.index(max(validation_results))]
104 |         print("Optimum Model ID:  ", opt_model_uid)
105 |         print("Optimum Training Value: ", max(validation_results))
106 |         train_csv = pd.read_csv(list(filter(lambda x: opt_model_uid in x, csv_train_sorted))[0], header=None)
107 |         train_csv.columns = ['parameters', opt_model_uid[8:25]]
108 |         train_csv.set_index('parameters', inplace=True)
109 |         final = final.join(train_csv)
110 |         opt_model = final
111 |         print("Optimum Params:")
112 |         print(final)
113 |         print("*" * 100)
114 |         print("*" * 100)
115 | 
116 |         opt_model_axes = opt_model.axes
117 |         opt_model_rows = opt_model_axes[0]
118 |         opt_model_cols = opt_model_axes[1].values
119 | 
120 |         opt_params = dict()
121 | 
122 |         for rows in opt_model_rows:
123 |             for cols in opt_model_cols:
124 |                 opt_params[rows] = opt_model.get_value(rows, cols)
125 | 
126 |         return opt_params, opt_model_uid
127 | 
128 |     def plot_learning_curve(self, model_id, acc, auc, loss):
129 |         csv_name_sorted = sorted(glob.glob(self.csv_logs_folder + '/*.log'))
130 |         for csv_file in csv_name_sorted:
131 |             uid = os.path.split(csv_file)[1].split('.')[0]
132 |             if uid == model_id:
133 |                 data = pd.read_csv(csv_file)
134 |                 if auc:
135 |                     train_results = data['auc']
136 |                     validation_results = data['val_auc']
137 |                 if acc:
138 |                     train_results = data['acc']
139 |                     validation_results = data['val_acc']
140 |                 if loss:
141 |                     train_results = data['loss']
142 |                     validation_results = data['val_loss']
143 |                 break
144 | 
145 |         plt.figure()
146 |         plt.title("Learning curve of " + model_id)
147 | 
148 |         plt.xlabel("Epoch Number")
149 |         plt.ylabel("Accuracy")
150 | 
151 |         train_sizes = range(1, train_results.shape[0] + 1, 1)
152 |         train_scores_mean = np.mean(train_results)
153 |         train_scores_std = np.std(train_results)
154 |         validation_scores_mean = np.mean(validation_results)
155 |         validation_scores_std = np.std(validation_results)
156 |         plt.grid()
157 | 
158 |         plt.fill_between(train_sizes, train_results - train_scores_std,
159 |                          train_scores_mean + train_scores_std, alpha=0.1,
160 |                          color="r")
161 |         plt.fill_between(train_sizes, validation_results - validation_scores_std,
162 |                          validation_scores_mean + validation_scores_std, alpha=0.1, color="g")
163 | 
164 |         plt.plot(train_sizes, train_results, 'o-', color="r",
165 |                  label="Training score")
166 |         plt.plot(train_sizes, validation_results, 'o-', color="g",
167 |                  label="Cross-validation score")
168 | 
169 |         plt.legend(loc="best")
170 |         return plt
171 | 
172 |     def filtered_learning_curve(self, train, validation, params, loss, acc, auc):
173 |         csv_name_sorted = sorted(glob.glob(self.csv_logs_folder + '/*.log'))
174 |         csv_train_sorted = sorted(glob.glob(self.train_logs_folder + '/*.csv'))
175 |         uid_sample = os.path.split(csv_name_sorted[1])[1].split('.')[0]
176 |         final = pd.DataFrame(
177 |             index=pd.read_csv(list(filter(lambda x: uid_sample in x, csv_train_sorted))[0], header=None)[0].tolist())
178 |         final.index.name = None
179 |         plt.figure(figsize=(8, 6))
180 |         for csv_file in csv_name_sorted:
181 |             if os.path.getsize(csv_file) > 0:
182 |                 data = pd.read_csv(csv_file)
183 |                 uid = os.path.split(csv_file)[1].split('.')[0]
184 | 
185 |                 train_csv = pd.read_csv(list(filter(lambda x: uid in x, csv_train_sorted))[0], header=None)
186 |                 train_csv.columns = ['parameters', uid[8:25]]
187 |                 train_csv.set_index('parameters', inplace=True)
188 |                 final = final.join(train_csv)
189 |                 current_model = final
190 |                 current_model_axes = current_model.axes
191 |                 current_model_rows = current_model_axes[0]
192 |                 current_model_cols = current_model_axes[1].values
193 |                 current_model_params = dict()
194 |                 for rows in current_model_rows:
195 |                     for cols in current_model_cols:
196 |                         current_model_params[rows] = current_model.get_value(rows, cols)
197 | 
198 |                 if validation:
199 |                     if loss:
200 |                         plt.plot(data['epoch'], data['val_loss'], label=uid + ' Val Loss')
201 |                     if acc:
202 |                         plt.plot(data['epoch'], data['val_acc'], label=uid + ' Val Acc')
203 |                     if auc:
204 |                         plt.plot(data['epoch'], data['val_auc'], label=uid + ' Val AUC')
205 |                 if train:
206 |                     if loss:
207 |                         plt.plot(data['epoch'], data['loss'], label=uid + ' Train Loss')
208 |                     if acc:
209 |                         plt.plot(data['epoch'], data['acc'], label=uid + ' Train Acc')
210 | 
211 |         plt.ylabel('Accuracy')
212 |         plt.ylim([0, 1])
213 |         plt.xlabel('Epoch')
214 |         plt.legend()
215 |         # plt.close()
216 |         plt.savefig(self.logs_folder + '/out.pdf', transparent=True)
217 |         if params:
218 |             display(final.drop(['data_ID', 'data_id'], axis=0))
219 |         return plt


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    0. NO COMMERCIAL USE 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 
177 |    END OF TERMS AND CONDITIONS
178 | 
179 |    APPENDIX: How to apply the Apache License to your work.
180 | 
181 |       To apply the Apache License to your work, attach the following
182 |       boilerplate notice, with the fields enclosed by brackets "{}"
183 |       replaced with your own identifying information. (Don't include
184 |       the brackets!)  The text should be enclosed in the appropriate
185 |       comment syntax for the file format. We also recommend that a
186 |       file or class name and description of purpose be included on the
187 |       same "printed page" as the copyright notice for easier
188 |       identification within third-party archives.
189 | 
190 |    Copyright {yyyy} {name of copyright owner}
191 | 
192 |    Licensed under the Apache License, Version 2.0 (the "License");
193 |    you may not use this file except in compliance with the License.
194 |    You may obtain a copy of the License at
195 | 
196 |        http://www.apache.org/licenses/LICENSE-2.0
197 | 
198 |    Unless required by applicable law or agreed to in writing, software
199 |    distributed under the License is distributed on an "AS IS" BASIS,
200 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 |    See the License for the specific language governing permissions and
202 |    limitations under the License.
203 | 


--------------------------------------------------------------------------------