├── pc_cfgs
    ├── __init__.py
    └── example.py
├── indices_isic2019.pkl
├── Matlab
    ├── ColorConstancy
    │   ├── cow2.jpg
    │   ├── dog3.jpg
    │   ├── building1.jpg
    │   ├── building1_cc.jpg
    │   ├── dilation33.m
    │   ├── set_border.m
    │   ├── norm_derivative.m
    │   ├── ColorConstancyDemo.m
    │   ├── gDer.m
    │   ├── fill_border.m
    │   └── general_cc.m
    └── adjust_2019.m
├── cfgs
    ├── 2019
    │   ├── test_effb0_ss.py
    │   ├── test_effb0_rr.py
    │   ├── test_effb0_ss_meta.py
    │   └── test_effb0_rr_meta.py
    └── __pycache__
    │   └── __init__.cpython-35.pyc
├── meta_data
    └── official
    │   └── meta_data_official.pkl
├── LICENSE
├── README.md
├── auto_augment.py
├── models.py
├── ensemble.py
├── train.py
└── eval.py


/pc_cfgs/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/indices_isic2019.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ngessert/isic2019/HEAD/indices_isic2019.pkl


--------------------------------------------------------------------------------
/Matlab/ColorConstancy/cow2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ngessert/isic2019/HEAD/Matlab/ColorConstancy/cow2.jpg


--------------------------------------------------------------------------------
/Matlab/ColorConstancy/dog3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ngessert/isic2019/HEAD/Matlab/ColorConstancy/dog3.jpg


--------------------------------------------------------------------------------
/Matlab/ColorConstancy/building1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ngessert/isic2019/HEAD/Matlab/ColorConstancy/building1.jpg


--------------------------------------------------------------------------------
/Matlab/ColorConstancy/building1_cc.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ngessert/isic2019/HEAD/Matlab/ColorConstancy/building1_cc.jpg


--------------------------------------------------------------------------------
/cfgs/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ngessert/isic2019/HEAD/cfgs/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/meta_data/official/meta_data_official.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ngessert/isic2019/HEAD/meta_data/official/meta_data_official.pkl


--------------------------------------------------------------------------------
/pc_cfgs/example.py:
--------------------------------------------------------------------------------
1 | # Empty dict to store machine specific info
2 | mdlParams = {}
3 | # Define machine specific paths
4 | mdlParams['pathBase'] = '/home/Gessert'
5 | 


--------------------------------------------------------------------------------
/Matlab/ColorConstancy/dilation33.m:
--------------------------------------------------------------------------------
 1 | function out = dilation33(in)
 2 | 
 3 | hh=size(in,1);
 4 | ll=size(in,2);
 5 | out = zeros(hh,ll,3);
 6 | out(:,:,1)=[in(2:hh,:); in(hh,:)];
 7 | out(:,:,2)=in;
 8 | out(:,:,3)=[in(1,:); in(1:hh-1,:)];
 9 | out2=max(out,[],3);
10 | out(:,:,1)=[out2(:,2:ll), out2(:,ll)];
11 | out(:,:,2)=out2;
12 | out(:,:,3)=[out2(:,1), out2(:,1:ll-1)];
13 | out=max(out,[],3);


--------------------------------------------------------------------------------
/Matlab/ColorConstancy/set_border.m:
--------------------------------------------------------------------------------
 1 | function out=set_border(in,width,method)
 2 | %sets border to either zero method=0,or method=1 to average
 3 | if nargin<3
 4 |     method=1;
 5 | end
 6 | 
 7 | temp=ones(size(in));
 8 | [y x] = ndgrid(1:size(in,1),1:size(in,2));
 9 | temp=temp.*( (x<size(temp,2)-width+1 ) & (x>width) );
10 | temp=temp.*( (y<size(temp,1)-width+1 ) & (y>width) );
11 | out=temp.*in;
12 | if method==1
13 |     out=out+(sum(out(:))./sum(temp(:))) *(ones(size(in))-temp);
14 | end


--------------------------------------------------------------------------------
/Matlab/ColorConstancy/norm_derivative.m:
--------------------------------------------------------------------------------
 1 | function [Rw,Gw,Bw]=NormDerivative(in, sigma, order)
 2 | 
 3 | if(nargin<3) order=1; end
 4 | 
 5 | R=in(:,:,1);
 6 | G=in(:,:,2);
 7 | B=in(:,:,3);
 8 | 
 9 | if(order==1)
10 |     Rx=gDer(R,sigma,1,0);
11 |     Ry=gDer(R,sigma,0,1);
12 |     Rw=sqrt(Rx.^2+Ry.^2);
13 |     
14 |     Gx=gDer(G,sigma,1,0);
15 |     Gy=gDer(G,sigma,0,1);
16 |     Gw=sqrt(Gx.^2+Gy.^2);
17 |     
18 |     Bx=gDer(B,sigma,1,0);
19 |     By=gDer(B,sigma,0,1);
20 |     Bw=sqrt(Bx.^2+By.^2);
21 | end
22 | 
23 | if(order==2)        %computes frobius norm
24 |     Rxx=gDer(R,sigma,2,0);
25 |     Ryy=gDer(R,sigma,0,2);
26 |     Rxy=gDer(R,sigma,1,1);
27 |     Rw=sqrt(Rxx.^2+4*Rxy.^2+Ryy.^2);
28 |     
29 |     Gxx=gDer(G,sigma,2,0);
30 |     Gyy=gDer(G,sigma,0,2);
31 |     Gxy=gDer(G,sigma,1,1);
32 |     Gw=sqrt(Gxx.^2+4*Gxy.^2+Gyy.^2);
33 |     
34 |     Bxx=gDer(B,sigma,2,0);
35 |     Byy=gDer(B,sigma,0,2);
36 |     Bxy=gDer(B,sigma,1,1);
37 |     Bw=sqrt(Bxx.^2+4*Bxy.^2+Byy.^2);
38 | end


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Nils Gessert
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Matlab/ColorConstancy/ColorConstancyDemo.m:
--------------------------------------------------------------------------------
 1 | % shows example of illuminant estimation based on Grey-World, Shades of
 2 | % Gray, max-RGB, and Grey-Edge algorithm
 3 | 
 4 | 
 5 | %some example images
 6 | input_im=double(imread('building1.jpg'));
 7 | %input_im=double(imread('cow2.jpg'));
 8 | %input_im=double(imread('dog3.jpg'));
 9 | 
10 | figure(1);imshow(uint8(input_im));
11 | title('input image');
12 | 
13 | % Grey-World
14 | [wR,wG,wB,out1]=general_cc(input_im,0,1,0);
15 | figure(2);imshow(uint8(out1));
16 | title('Grey-World');
17 | 
18 | % max-RGB
19 | [wR,wG,wB,out2]=general_cc(input_im,0,-1,0);
20 | figure(3);imshow(uint8(out2));
21 | title('max-RGB');
22 | 
23 | % Shades of Grey
24 | mink_norm=5;    % any number between 1 and infinity
25 | [wR,wG,wB,out3]=general_cc(input_im,0,mink_norm,0);
26 | figure(4);imshow(uint8(out3));
27 | title('Shades of Grey');
28 | 
29 | % Grey-Edge
30 | mink_norm=5;    % any number between 1 and infinity
31 | sigma=2;        % sigma 
32 | diff_order=1;   % differentiation order (1 or 2)
33 | 
34 | [wR,wG,wB,out4]=general_cc(input_im,diff_order,mink_norm,sigma);
35 | figure(5);imshow(uint8(out4));
36 | title('Grey-Edge');
37 | 


--------------------------------------------------------------------------------
/Matlab/ColorConstancy/gDer.m:
--------------------------------------------------------------------------------
 1 | function [H]= gDer(f,sigma, iorder,jorder)
 2 | 
 3 | %H = HxRecGauss(f, sigma, sigma, iorder,jorder,3);
 4 | %H = HxGaussDerivative2d(f, sigma, iorder,jorder,3);
 5 | 
 6 | %original program
 7 | %Initialize the filter
 8 | 
 9 | break_off_sigma = 3.;
10 | filtersize = floor(break_off_sigma*sigma+0.5);
11 | 
12 | f=fill_border(f,filtersize);
13 | 
14 | x=-filtersize:1:filtersize;
15 | 
16 | Gauss=1/(sqrt(2 * pi) * sigma)* exp((x.^2)/(-2 * sigma * sigma) );
17 | 
18 | switch(iorder)
19 | case 0
20 |     Gx= Gauss/sum(Gauss);
21 | case 1
22 |     Gx  =  -(x/sigma^2).*Gauss;
23 |     Gx  =  Gx./(sum(sum(x.*Gx)));
24 | case 2
25 |     Gx = (x.^2/sigma^4-1/sigma^2).*Gauss;
26 |     Gx = Gx-sum(Gx)/size(x,2);
27 |     Gx = Gx/sum(0.5*x.*x.*Gx);
28 | end
29 | H = filter2(Gx,f);
30 | 
31 | switch(jorder)
32 | case 0
33 |     Gy= Gauss/sum(Gauss);
34 | case 1
35 |     Gy  =  -(x/sigma^2).*Gauss;
36 |     Gy  =  Gy./(sum(sum(x.*Gy)));
37 | case 2
38 |     Gy = (x.^2/sigma^4-1/sigma^2).*Gauss;
39 |     Gy = Gy-sum(Gy)/size(x,2);
40 |     Gy = Gy/sum(0.5*x.*x.*Gy);
41 | end
42 | H = filter2(Gy',H);
43 | 
44 | H=H(filtersize+1:size(H,1)-filtersize,filtersize+1:size(H,2)-filtersize);


--------------------------------------------------------------------------------
/Matlab/ColorConstancy/fill_border.m:
--------------------------------------------------------------------------------
 1 | function out=fill_border(in,bw)
 2 | 
 3 | hh=size(in,1);
 4 | ww=size(in,2);
 5 | dd=size(in,3);
 6 | 
 7 | if(dd==1)
 8 | 	out=zeros(hh+bw*2,ww+bw*2);
 9 | 	
10 | 	out(1:bw,1:bw)=ones(bw,bw).*in(1,1);
11 | 	out(bw+hh+1:2*bw+hh,1:bw)=ones(bw,bw).*in(hh,1);
12 | 	out(1:bw,bw+1+ww:2*bw+ww)=ones(bw,bw).*in(1,ww);
13 | 	out(bw+hh+1:2*bw+hh,bw+1+ww:2*bw+ww)=ones(bw,bw).*in(hh,ww);
14 | 	out( bw+1:bw+hh,bw+1:bw+ww )= in;
15 | 	out(1:bw,bw+1:bw+ww)=ones(bw,1)*in(1,:);
16 | 	out(bw+hh+1:2*bw+hh,bw+1:bw+ww)=ones(bw,1)*in(hh,:);
17 | 	out(bw+1:bw+hh,1:bw)=in(:,1)*ones(1,bw);
18 | 	out(bw+1:bw+hh,bw+ww+1:2*bw+ww)=in(:,ww)*ones(1,bw);
19 | else
20 |   	out=zeros(hh+bw*2,ww+bw*2,dd);
21 |     for(ii=1:dd)
22 |     	out(1:bw,1:bw,ii)=ones(bw,bw).*in(1,1,ii);
23 | 		out(bw+hh+1:2*bw+hh,1:bw,ii)=ones(bw,bw).*in(hh,1,ii);
24 | 		out(1:bw,bw+1+ww:2*bw+ww,ii)=ones(bw,bw).*in(1,ww,ii);
25 | 		out(bw+hh+1:2*bw+hh,bw+1+ww:2*bw+ww,ii)=ones(bw,bw).*in(hh,ww,ii);
26 | 		out( bw+1:bw+hh,bw+1:bw+ww,ii )= in(:,:,ii);
27 | 		out(1:bw,bw+1:bw+ww,ii)=ones(bw,1)*in(1,:,ii);
28 | 		out(bw+hh+1:2*bw+hh,bw+1:bw+ww,ii)=ones(bw,1)*in(hh,:,ii);
29 | 		out(bw+1:bw+hh,1:bw,ii)=in(:,1,ii)*ones(1,bw);
30 | 		out(bw+1:bw+hh,bw+ww+1:2*bw+ww,ii)=in(:,ww,ii)*ones(1,bw);
31 |     end
32 | end


--------------------------------------------------------------------------------
/Matlab/ColorConstancy/general_cc.m:
--------------------------------------------------------------------------------
 1 | % general_cc: estimates the light source of an input_image. 
 2 | %
 3 | % Depending on the parameters the estimation is equal to Grey-Wolrd, Max-RGB, general Grey-World,
 4 | % Shades-of-Gray or Grey-Edge algorithm.
 5 | %
 6 | % SYNOPSIS:
 7 | %    [white_R ,white_G ,white_B,output_data] = general_cc(input_data,njet,mink_norm,sigma,mask_im)
 8 | %    
 9 | % INPUT :
10 | %   input_data    : color input image (NxMx3)
11 | %	njet          : the order of differentiation (range from 0-2). 
12 | %	mink_norm     : minkowski norm used (if mink_norm==-1 then the max
13 | %                   operation is applied which is equal to minkowski_norm=infinity).
14 | %   mask_im       : binary images with zeros on image positions which
15 | %                   should be considered for illuminant estimation.
16 | % OUTPUT: 
17 | %   [white_R,white_G,white_B]           : illuminant color estimation
18 | %   output_data                         : color corrected image
19 | 
20 | % LITERATURE :
21 | %
22 | % J. van de Weijer, Th. Gevers, A. Gijsenij
23 | % "Edge-Based Color Constancy"
24 | % IEEE Trans. Image Processing, accepted 2007.
25 | %
26 | % The paper includes references to other Color Constancy algorithms
27 | % included in general_cc.m such as Grey-World, and max-RGB, and
28 | % Shades-of-Gray.
29 | 
30 | function [white_R ,white_G ,white_B,output_data] = general_cc(input_data,njet,mink_norm,sigma,mask_im)
31 | 
32 | if(nargin<2), njet=0; end
33 | if(nargin<3), mink_norm=1; end
34 | if(nargin<4), sigma=1; end
35 | if(nargin<5), mask_im=zeros(size(input_data,1),size(input_data,2)); end
36 | 
37 | % remove all saturated points
38 | saturation_threshold = 255;
39 | mask_im2 = mask_im + (dilation33(double(max(input_data,[],3)>=saturation_threshold)));   
40 | mask_im2=double(mask_im2==0);
41 | mask_im2=set_border(mask_im2,sigma+1,0);
42 | % the mask_im2 contains pixels higher saturation_threshold and which are
43 | % not included in mask_im.
44 | 
45 | output_data=input_data;
46 | 
47 | if(njet==0)
48 |    if(sigma~=0)
49 |      for ii=1:3
50 |         input_data(:,:,ii)=gDer(input_data(:,:,ii),sigma,0,0);
51 |      end
52 |    end
53 | end
54 | 
55 | if(njet>0)
56 |     [Rx,Gx,Bx]=norm_derivative(input_data, sigma, njet);
57 |     
58 |     input_data(:,:,1)=Rx;
59 |     input_data(:,:,2)=Gx;
60 |     input_data(:,:,3)=Bx;    
61 | end
62 | 
63 | input_data=abs(input_data);
64 | 
65 | if(mink_norm~=-1)          % minkowski norm = (1,infinity >
66 |     kleur=power(input_data,mink_norm);
67 |     white_R = power(sum(sum(kleur(:,:,1).*mask_im2)),1/mink_norm);
68 |     white_G = power(sum(sum(kleur(:,:,2).*mask_im2)),1/mink_norm);
69 |     white_B = power(sum(sum(kleur(:,:,3).*mask_im2)),1/mink_norm);
70 | 
71 |     som=sqrt(white_R^2+white_G^2+white_B^2);
72 | 
73 |     white_R=white_R/som;
74 |     white_G=white_G/som;
75 |     white_B=white_B/som;
76 | else                    %minkowski-norm is infinit: Max-algorithm     
77 |     R=input_data(:,:,1);
78 |     G=input_data(:,:,2);
79 |     B=input_data(:,:,3);
80 |     
81 |     white_R=max(R(:).*mask_im2(:));
82 |     white_G=max(G(:).*mask_im2(:));
83 |     white_B=max(B(:).*mask_im2(:));
84 |     
85 |     som=sqrt(white_R^2+white_G^2+white_B^2);
86 | 
87 |     white_R=white_R/som;
88 |     white_G=white_G/som;
89 |     white_B=white_B/som;
90 | end
91 | output_data(:,:,1)=output_data(:,:,1)/(white_R*sqrt(3));
92 | output_data(:,:,2)=output_data(:,:,2)/(white_G*sqrt(3));
93 | output_data(:,:,3)=output_data(:,:,3)/(white_B*sqrt(3));


--------------------------------------------------------------------------------
/Matlab/adjust_2019.m:
--------------------------------------------------------------------------------
  1 | pathImSrc = '\isic\2019\ISIC_2019_Training_Input';
  2 | pathImTar = '\isic\2019\official';
  3 | %pathImCheck = '\isic\2019\official_check';
  4 | fold = dir(pathImSrc);
  5 | std_size = [450,600];
  6 | preserve_ratio = true;
  7 | preserve_size = 600;
  8 | crop_black = true;
  9 | margin = 0.1;
 10 | thresh = 0.3;
 11 | resize = true;
 12 | use_cc = true;
 13 | write_png = false;
 14 | write = true;
 15 | ind = 1;
 16 | all_heights = 0;
 17 | all_width = 0;
 18 | %initialize
 19 | use_cropping = false;
 20 | for i=3:length(fold)
 21 |     try
 22 |        im = imread([pathImSrc '\' fold(i).name]);
 23 |     catch
 24 |        disp(['Image ' fold(i).name ' failed.'])
 25 |        continue
 26 |     end
 27 |     if crop_black
 28 |         lvl = graythresh(rgb2gray(im));
 29 |         BW = imbinarize(imgaussfilt(rgb2gray(im),2),lvl*0.2);        
 30 |         stats = regionprops('table',BW,'Centroid',...
 31 |             'MajorAxisLength','MinorAxisLength');
 32 |         if size(stats,1) > 0
 33 |             diameters = mean([stats.MajorAxisLength stats.MinorAxisLength],2);
 34 |             [diameter_srt,srt_ind] = sort(diameters,'descend');
 35 |             %[diameter,ind] = max(diameters);
 36 |             radius = diameter_srt(1)/2;
 37 |             center = stats.Centroid(srt_ind(1),:);
 38 |             % define box
 39 |             x_min = int32(center(2)-radius+margin*radius);
 40 |             x_max = int32(center(2)+radius-margin*radius);
 41 |             y_min = int32(center(1)-radius+margin*radius);
 42 |             y_max = int32(center(1)+radius-margin*radius);
 43 |             use_cropping = true;
 44 |         else
 45 |             use_cropping = false;
 46 |         end
 47 |         if x_min < 1 || x_max > size(im,1) || y_min < 1 || y_max > size(im,2)
 48 |             if length(diameter_srt) > 1
 49 |                 % try second largest
 50 |                 radius = diameter_srt(2)/2;
 51 |                 center = stats.Centroid(srt_ind(2),:);
 52 |                 % define box
 53 |                 x_min = int32(center(2)-radius+margin*radius);
 54 |                 x_max = int32(center(2)+radius-margin*radius);
 55 |                 y_min = int32(center(1)-radius+margin*radius);
 56 |                 y_max = int32(center(1)+radius-margin*radius);
 57 |                 if x_min < 1 || x_max > size(im,1) || y_min < 1 || y_max > size(im,2)
 58 |                     use_cropping = false;
 59 |                 end
 60 |             else
 61 |                 use_cropping = false;
 62 |             end
 63 |         end
 64 |         if use_cropping
 65 |             mean_inside = mean(im(x_min:x_max,y_min:y_max,:),'all');
 66 |             mean_outside = (mean(im(1:x_min,:,:),'all')+mean(im(x_min:x_max,1:y_min,:),'all')+mean(im(x_max:end,:,:),'all')+mean(im(x_min:x_max,y_max:end,:),'all'))/4;
 67 |             if mean_outside/mean_inside > thresh
 68 |                 use_cropping = false;
 69 |             end
 70 |         end        
 71 |         if use_cropping
 72 |             %imwrite(im,[pathImCheck '\' fold(i).name]);            
 73 |             im = im(x_min:x_max,y_min:y_max,:);
 74 |             %imwrite(im,[pathImCheck '\' replace(fold(i).name,'.jpg','_c.jpg')]);
 75 |             %disp([fold(i).name ' cropped.'])
 76 |         end
 77 |     end
 78 |     %all_heights(ind) = size(im,1);
 79 |     %all_width(ind) = size(im,2);
 80 |     %ind = ind+1;
 81 |     % resize?
 82 |     if resize
 83 |         if preserve_ratio
 84 |             % long side is resized to target size
 85 |             if size(im,1) > size(im,2)
 86 |                im = permute(im,[2,1,3]); 
 87 |             end
 88 |             if size(im,2) ~= preserve_size
 89 |                 ratio = preserve_size/size(im,2);
 90 |                 %disp(['Before ' mat2str(size(im))])
 91 |                 im = imresize(im,[int32(round(size(im,1)*ratio)),preserve_size]);
 92 |                 %disp(['After ' mat2str(size(im))])
 93 |             end
 94 |         else
 95 |             if size(im,1) > size(im,2)
 96 |                im = permute(im,[2,1,3]); 
 97 |             end
 98 |             if size(im,1) ~= std_size(1) || size(im,2) ~= std_size(2)
 99 |                im = imresize(im,std_size); 
100 |             end
101 |         end
102 |     end
103 |     % cc
104 |     if use_cc
105 |         [~,~,~,im_new]=general_cc(double(im),0,6,0);
106 |         im_new = uint8(im_new);
107 |     else
108 |         im_new = im;
109 |     end
110 |     if write
111 |         if write_png
112 |             imwrite(im_new,[pathImTar '\' replace(fold(i).name,'.jpg','.png')]);  
113 |         else
114 |             imwrite(im_new,[pathImTar '\' fold(i).name],'Quality',100);
115 |         end
116 |     end
117 |     if mod(i,1000) == 0
118 |         disp(i)
119 |     end
120 | end


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## Skin Lesion Classification Using Ensembles of Multi-Resolution EfficientNets with Meta Data
 2 | 
 3 | Code for team DAISYLab's participation in the ([ISIC 2019 challenge](https://challenge2019.isic-archive.com/)).
 4 | 
 5 | We achieved first place in both tasks: ([Leaderboards](https://challenge2019.isic-archive.com/leaderboard.html)).
 6 | 
 7 | Arxiv paper: https://arxiv.org/abs/1910.03910
 8 | 
 9 | Please cite our MethodsX article if you make use of our work: https://doi.org/10.1016/j.mex.2020.100864
10 | 
11 | ### Usage
12 | 
13 | Here, we explain the basic usage of our code. Note that we used additional datasets that need to be prepared in a similar way. Most of it is based on our [last year's approach](https://github.com/ngessert/isic2018).
14 | 
15 | ### Data and Path Preparation
16 | 
17 | The images' and labels' directory strucutre should look like this: /isic2019/images/official/ISIC_0024306.jpg and /isic2019/labels/official/labels.csv. The labels in the CSV file should be structured as follows: first column contains the image ID ("ISIC_0024306"), then the one-hot encoded labels follow.
18 | 
19 | Other datasets such as the 7-point dataset need to be formatted in a similar way. I.e. there needs to be a "sevenpoint" folder (instead of "official") for the images and a "sevepoint" folder for the labels with the properly fromatted label files.
20 | 
21 | Our split for training/validation with 5-Fold CV is included in the "indices_isic2019.pkl" file. This should be placed in the same directory as /isic2019. Note that we do not use a test set.
22 | 
23 | In pc_cfgs we include an example for a machine specific cfg. Here, the base folder can be adjusted for different machines.
24 | 
25 | In the cfgs folder, there example configs. You can swap out models by using the names given in models.py. For the EfficientNets we used the recommended resolution from the paper --> https://github.com/lukemelas/EfficientNet-PyTorch.
26 | 
27 | When training a model with additional meta data, you need the prepared meta data file in the meta_data folder. The meta_data folder is structured similar to the images or labels folder (one subfolder for each dataset).
28 | 
29 | ### Training a model
30 | 
31 | We included two example config files for full training and 5-Fold CV. More details on the different options, e.g. for balancing and cropping, are given in the paper. To start training, run: `python train.py example 2019.test_effb0_ss gpu0` 
32 | 
33 | gpu0 indicates the number of the GPU that should be used for training. This is helpful for machines with multiple GPUs.
34 | 
35 | ### Evaluate a model 
36 | 
37 | For model evaluation, there are multiple options. First, a 5-Fold CV model can be evaluated on each held out split. For evaluation of same-sized cropping model (see paper for explanation), run: `python eval.py example 2019.test_effb0_ss multiorder36 average NONE bestgpu0` 
38 | 
39 | `multiorder36` indicates that ordered, multi-crop evaluation with 36 crops should be performed. Always use 9, 16, 25, 36, 49... etc. number of crops. `average` indicates the predictions should be averaged over the crops (can also be `vote`). `best` indicates that the best model obtained during training should be used. Can be `last` to use the last model saved. 
40 | 
41 | When evaluating a model with random-resize option (see paper for explanation), run this instead: `python eval.py example 2019.test_effb0_rr multideterm1sc4f4 average NONE bestgpu0`
42 | 
43 | If final predictions on new, unlabeled images should be performed, add the path to said images at the end of the evaluation call: `python eval.py example 2019.test_effb0_ss multiorder36 average NONE bestgpu0 NONE /home/Gessert/data/isic/isic2019/images/Test` 
44 | 
45 | Each evaluation run generates a pkl file that can be used for further ensemble aggregation.
46 | 
47 | ### Construct an Ensemble
48 | 
49 | Testing ensembles is also split into two parts. First, an ensemble can be constructed based on 5-Fold CV error and the corresponding best models are saved. Then, the final predictions on a new dataset can be made using the generated files from the evaluation section.
50 | 
51 | For 5-Fold CV performance assessment, run: `python ensemble.py /path/to/evaluation/files evalexhaust15 /path/to/file/best_models.pkl`
52 | The first path indicates the location where all evaluation pkl files are located. `evalexhaust15`: `eval` indicates that 5-Fold CV evaluation is desired. `exhaust15` indicates that the top 15 performing models should be tested for their optimal combination. I.e., every possible combination (average predictions) of those models is tested for the best performance. Without the exhaust option, only the top N combinations are considered, i.e., the tested combinations are: top1 model, top1+top2 model, top1+top2+top3 model, etc. The last argument indicates the path where the best performing combination is saved.
53 | 
54 | For generation of new predictions for unlabeled data, run: `python ensemble.py /path/to/evaluation/files best /path/to/file/best_models.pkl /path/to/predictions.csv /path/to/image/files`
55 | `best` indicates that only the models with best in the name should be considered. This relates to the evaluation where either the best performing model or the last checkpoint can be used for generation. This can be `last` or `bestlast` for both. The next argument is the path to the file that was generated in the first ensemble run. This can just be `NONE` if all models should be included. The next argument is the path to the CSV file that should contain the predictions. The last argument is the path to the image files which is used to match the predictions to image file names.
56 | 


--------------------------------------------------------------------------------
/auto_augment.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import numpy as np
  3 | import scipy
  4 | from scipy import ndimage
  5 | from PIL import Image, ImageEnhance, ImageOps
  6 | 
  7 | #See: https://github.com/4uiiurz1/pytorch-auto-augment
  8 | class AutoAugment(object):
  9 |     def __init__(self):
 10 |         self.policies = [
 11 |             ['Invert', 0.1, 7, 'Contrast', 0.2, 6],
 12 |             ['Rotate', 0.7, 2, 'TranslateX', 0.3, 9],
 13 |             ['Sharpness', 0.8, 1, 'Sharpness', 0.9, 3],
 14 |             ['ShearY', 0.5, 8, 'TranslateY', 0.7, 9],
 15 |             ['AutoContrast', 0.5, 8, 'Equalize', 0.9, 2],
 16 |             ['ShearY', 0.2, 7, 'Posterize', 0.3, 7],
 17 |             ['Color', 0.4, 3, 'Brightness', 0.6, 7],
 18 |             ['Sharpness', 0.3, 9, 'Brightness', 0.7, 9],
 19 |             ['Equalize', 0.6, 5, 'Equalize', 0.5, 1],
 20 |             ['Contrast', 0.6, 7, 'Sharpness', 0.6, 5],
 21 |             ['Color', 0.7, 7, 'TranslateX', 0.5, 8],
 22 |             ['Equalize', 0.3, 7, 'AutoContrast', 0.4, 8],
 23 |             ['TranslateY', 0.4, 3, 'Sharpness', 0.2, 6],
 24 |             ['Brightness', 0.9, 6, 'Color', 0.2, 8],
 25 |             ['Solarize', 0.5, 2, 'Invert', 0, 0.3],
 26 |             ['Equalize', 0.2, 0, 'AutoContrast', 0.6, 0],
 27 |             ['Equalize', 0.2, 8, 'Equalize', 0.6, 4],
 28 |             ['Color', 0.9, 9, 'Equalize', 0.6, 6],
 29 |             ['AutoContrast', 0.8, 4, 'Solarize', 0.2, 8],
 30 |             ['Brightness', 0.1, 3, 'Color', 0.7, 0],
 31 |             ['Solarize', 0.4, 5, 'AutoContrast', 0.9, 3],
 32 |             ['TranslateY', 0.9, 9, 'TranslateY', 0.7, 9],
 33 |             ['AutoContrast', 0.9, 2, 'Solarize', 0.8, 3],
 34 |             ['Equalize', 0.8, 8, 'Invert', 0.1, 3],
 35 |             ['TranslateY', 0.7, 9, 'AutoContrast', 0.9, 1],
 36 |         ]
 37 | 
 38 |     def __call__(self, img):
 39 |         img = apply_policy(img, self.policies[random.randrange(len(self.policies))])
 40 |         return img
 41 | 
 42 | 
 43 | operations = {
 44 |     'ShearX': lambda img, magnitude: shear_x(img, magnitude),
 45 |     'ShearY': lambda img, magnitude: shear_y(img, magnitude),
 46 |     'TranslateX': lambda img, magnitude: translate_x(img, magnitude),
 47 |     'TranslateY': lambda img, magnitude: translate_y(img, magnitude),
 48 |     'Rotate': lambda img, magnitude: rotate(img, magnitude),
 49 |     'AutoContrast': lambda img, magnitude: auto_contrast(img, magnitude),
 50 |     'Invert': lambda img, magnitude: invert(img, magnitude),
 51 |     'Equalize': lambda img, magnitude: equalize(img, magnitude),
 52 |     'Solarize': lambda img, magnitude: solarize(img, magnitude),
 53 |     'Posterize': lambda img, magnitude: posterize(img, magnitude),
 54 |     'Contrast': lambda img, magnitude: contrast(img, magnitude),
 55 |     'Color': lambda img, magnitude: color(img, magnitude),
 56 |     'Brightness': lambda img, magnitude: brightness(img, magnitude),
 57 |     'Sharpness': lambda img, magnitude: sharpness(img, magnitude),
 58 |     'Cutout': lambda img, magnitude: cutout(img, magnitude),
 59 | }
 60 | 
 61 | 
 62 | def apply_policy(img, policy):
 63 |     if random.random() < policy[1]:
 64 |         img = operations[policy[0]](img, policy[2])
 65 |     if random.random() < policy[4]:
 66 |         img = operations[policy[3]](img, policy[5])
 67 | 
 68 |     return img
 69 | 
 70 | 
 71 | def transform_matrix_offset_center(matrix, x, y):
 72 |     o_x = float(x) / 2 + 0.5
 73 |     o_y = float(y) / 2 + 0.5
 74 |     offset_matrix = np.array([[1, 0, o_x], [0, 1, o_y], [0, 0, 1]])
 75 |     reset_matrix = np.array([[1, 0, -o_x], [0, 1, -o_y], [0, 0, 1]])
 76 |     transform_matrix = offset_matrix @ matrix @ reset_matrix
 77 |     return transform_matrix
 78 | 
 79 | 
 80 | def shear_x(img, magnitude):
 81 |     img = np.array(img)
 82 |     magnitudes = np.linspace(-0.3, 0.3, 11)
 83 | 
 84 |     transform_matrix = np.array([[1, random.uniform(magnitudes[magnitude], magnitudes[magnitude+1]), 0],
 85 |                                  [0, 1, 0],
 86 |                                  [0, 0, 1]])
 87 |     transform_matrix = transform_matrix_offset_center(transform_matrix, img.shape[0], img.shape[1])
 88 |     affine_matrix = transform_matrix[:2, :2]
 89 |     offset = transform_matrix[:2, 2]
 90 |     img = np.stack([ndimage.interpolation.affine_transform(
 91 |                     img[:, :, c],
 92 |                     affine_matrix,
 93 |                     offset) for c in range(img.shape[2])], axis=2)
 94 |     img = Image.fromarray(img)
 95 |     return img
 96 | 
 97 | 
 98 | def shear_y(img, magnitude):
 99 |     img = np.array(img)
100 |     magnitudes = np.linspace(-0.3, 0.3, 11)
101 | 
102 |     transform_matrix = np.array([[1, 0, 0],
103 |                                  [random.uniform(magnitudes[magnitude], magnitudes[magnitude+1]), 1, 0],
104 |                                  [0, 0, 1]])
105 |     transform_matrix = transform_matrix_offset_center(transform_matrix, img.shape[0], img.shape[1])
106 |     affine_matrix = transform_matrix[:2, :2]
107 |     offset = transform_matrix[:2, 2]
108 |     img = np.stack([ndimage.interpolation.affine_transform(
109 |                     img[:, :, c],
110 |                     affine_matrix,
111 |                     offset) for c in range(img.shape[2])], axis=2)
112 |     img = Image.fromarray(img)
113 |     return img
114 | 
115 | 
116 | def translate_x(img, magnitude):
117 |     img = np.array(img)
118 |     magnitudes = np.linspace(-150/331, 150/331, 11)
119 | 
120 |     transform_matrix = np.array([[1, 0, 0],
121 |                                  [0, 1, img.shape[1]*random.uniform(magnitudes[magnitude], magnitudes[magnitude+1])],
122 |                                  [0, 0, 1]])
123 |     transform_matrix = transform_matrix_offset_center(transform_matrix, img.shape[0], img.shape[1])
124 |     affine_matrix = transform_matrix[:2, :2]
125 |     offset = transform_matrix[:2, 2]
126 |     img = np.stack([ndimage.interpolation.affine_transform(
127 |                     img[:, :, c],
128 |                     affine_matrix,
129 |                     offset) for c in range(img.shape[2])], axis=2)
130 |     img = Image.fromarray(img)
131 |     return img
132 | 
133 | 
134 | def translate_y(img, magnitude):
135 |     img = np.array(img)
136 |     magnitudes = np.linspace(-150/331, 150/331, 11)
137 | 
138 |     transform_matrix = np.array([[1, 0, img.shape[0]*random.uniform(magnitudes[magnitude], magnitudes[magnitude+1])],
139 |                                  [0, 1, 0],
140 |                                  [0, 0, 1]])
141 |     transform_matrix = transform_matrix_offset_center(transform_matrix, img.shape[0], img.shape[1])
142 |     affine_matrix = transform_matrix[:2, :2]
143 |     offset = transform_matrix[:2, 2]
144 |     img = np.stack([ndimage.interpolation.affine_transform(
145 |                     img[:, :, c],
146 |                     affine_matrix,
147 |                     offset) for c in range(img.shape[2])], axis=2)
148 |     img = Image.fromarray(img)
149 |     return img
150 | 
151 | 
152 | def rotate(img, magnitude):
153 |     img = np.array(img)
154 |     magnitudes = np.linspace(-30, 30, 11)
155 |     theta = np.deg2rad(random.uniform(magnitudes[magnitude], magnitudes[magnitude+1]))
156 |     transform_matrix = np.array([[np.cos(theta), -np.sin(theta), 0],
157 |                                  [np.sin(theta), np.cos(theta), 0],
158 |                                  [0, 0, 1]])
159 |     transform_matrix = transform_matrix_offset_center(transform_matrix, img.shape[0], img.shape[1])
160 |     affine_matrix = transform_matrix[:2, :2]
161 |     offset = transform_matrix[:2, 2]
162 |     img = np.stack([ndimage.interpolation.affine_transform(
163 |                     img[:, :, c],
164 |                     affine_matrix,
165 |                     offset) for c in range(img.shape[2])], axis=2)
166 |     img = Image.fromarray(img)
167 |     return img
168 | 
169 | 
170 | def auto_contrast(img, magnitude):
171 |     img = ImageOps.autocontrast(img)
172 |     return img
173 | 
174 | 
175 | def invert(img, magnitude):
176 |     img = ImageOps.invert(img)
177 |     return img
178 | 
179 | 
180 | def equalize(img, magnitude):
181 |     img = ImageOps.equalize(img)
182 |     return img
183 | 
184 | 
185 | def solarize(img, magnitude):
186 |     magnitudes = np.linspace(0, 256, 11)
187 |     img = ImageOps.solarize(img, random.uniform(magnitudes[magnitude], magnitudes[magnitude+1]))
188 |     return img
189 | 
190 | 
191 | def posterize(img, magnitude):
192 |     magnitudes = np.linspace(4, 8, 11)
193 |     img = ImageOps.posterize(img, int(round(random.uniform(magnitudes[magnitude], magnitudes[magnitude+1]))))
194 |     return img
195 | 
196 | 
197 | def contrast(img, magnitude):
198 |     magnitudes = np.linspace(0.1, 1.9, 11)
199 |     img = ImageEnhance.Contrast(img).enhance(random.uniform(magnitudes[magnitude], magnitudes[magnitude+1]))
200 |     return img
201 | 
202 | 
203 | def color(img, magnitude):
204 |     magnitudes = np.linspace(0.1, 1.9, 11)
205 |     img = ImageEnhance.Color(img).enhance(random.uniform(magnitudes[magnitude], magnitudes[magnitude+1]))
206 |     return img
207 | 
208 | 
209 | def brightness(img, magnitude):
210 |     magnitudes = np.linspace(0.1, 1.9, 11)
211 |     img = ImageEnhance.Brightness(img).enhance(random.uniform(magnitudes[magnitude], magnitudes[magnitude+1]))
212 |     return img
213 | 
214 | 
215 | def sharpness(img, magnitude):
216 |     magnitudes = np.linspace(0.1, 1.9, 11)
217 |     img = ImageEnhance.Sharpness(img).enhance(random.uniform(magnitudes[magnitude], magnitudes[magnitude+1]))
218 |     return img
219 | 
220 | 
221 | def cutout(org_img, magnitude=None):
222 |     img = np.array(img)
223 | 
224 |     magnitudes = np.linspace(0, 60/331, 11)
225 | 
226 |     img = np.copy(org_img)
227 |     mask_val = img.mean()
228 | 
229 |     if magnitude is None:
230 |         mask_size = 16
231 |     else:
232 |         mask_size = int(round(img.shape[0]*random.uniform(magnitudes[magnitude], magnitudes[magnitude+1])))
233 |     top = np.random.randint(0 - mask_size//2, img.shape[0] - mask_size)
234 |     left = np.random.randint(0 - mask_size//2, img.shape[1] - mask_size)
235 |     bottom = top + mask_size
236 |     right = left + mask_size
237 | 
238 |     if top < 0:
239 |         top = 0
240 |     if left < 0:
241 |         left = 0
242 | 
243 |     img[top:bottom, left:right, :].fill(mask_val)
244 | 
245 |     img = Image.fromarray(img)
246 | 
247 |     return img
248 | 
249 | 
250 | 
251 | class Cutout(object):
252 |     def __init__(self, length=16):
253 |         self.length = length
254 | 
255 |     def __call__(self, img):
256 |         img = np.array(img)
257 | 
258 |         mask_val = img.mean()
259 | 
260 |         top = np.random.randint(0 - self.length//2, img.shape[0] - self.length)
261 |         left = np.random.randint(0 - self.length//2, img.shape[1] - self.length)
262 |         bottom = top + self.length
263 |         right = left + self.length
264 | 
265 |         top = 0 if top < 0 else top
266 |         left = 0 if left < 0 else top
267 | 
268 |         img[top:bottom, left:right, :] = mask_val
269 | 
270 |         img = Image.fromarray(img)
271 | 
272 |         return img


--------------------------------------------------------------------------------
/models.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numbers
  3 | import numpy as np
  4 | import functools
  5 | import h5py
  6 | import math
  7 | from torchvision import models
  8 | import pretrainedmodels
  9 | import torch.nn.functional as F
 10 | import types
 11 | import torch
 12 | from efficientnet_pytorch import EfficientNet
 13 | from collections import OrderedDict
 14 | import torch.nn as nn
 15 | 
 16 | def Dense121(config):
 17 |     return models.densenet121(pretrained=True)
 18 | 
 19 | def Dense161(config):
 20 |     return models.densenet169(pretrained=True)
 21 | 
 22 | def Dense169(config):
 23 |     return models.densenet161(pretrained=True)
 24 | 
 25 | def Dense201(config):
 26 |     return models.densenet201(pretrained=True)
 27 | 
 28 | def Resnet50(config):
 29 |     return pretrainedmodels.__dict__['resnet50'](num_classes=1000, pretrained='imagenet')
 30 | 
 31 | def Resnet101(config):
 32 |     return models.resnet101(pretrained=True)
 33 | 
 34 | def InceptionV3(config):
 35 |     return models.inception_v3(pretrained=True)
 36 | 
 37 | def se_resnext50(config):
 38 |     return pretrainedmodels.__dict__['se_resnext50_32x4d'](num_classes=1000, pretrained='imagenet')
 39 | 
 40 | def se_resnext101(config):
 41 |     return pretrainedmodels.__dict__['se_resnext101_32x4d'](num_classes=1000, pretrained='imagenet')
 42 | 
 43 | def se_resnet50(config):
 44 |     return pretrainedmodels.__dict__['se_resnet50'](num_classes=1000, pretrained='imagenet')
 45 | 
 46 | def se_resnet101(config):
 47 |     return pretrainedmodels.__dict__['se_resnet101'](num_classes=1000, pretrained='imagenet')
 48 | 
 49 | def se_resnet152(config):
 50 |     return pretrainedmodels.__dict__['se_resnet152'](num_classes=1000, pretrained='imagenet')
 51 | 
 52 | def resnext101(config):
 53 |     return pretrainedmodels.__dict__['resnext101_32x4d'](num_classes=1000, pretrained='imagenet')
 54 | 
 55 | def resnext101_64(config):
 56 |     return pretrainedmodels.__dict__['resnext101_64x4d'](num_classes=1000, pretrained='imagenet')
 57 | 
 58 | def senet154(config):
 59 |     return pretrainedmodels.__dict__['senet154'](num_classes=1000, pretrained='imagenet')
 60 | 
 61 | def polynet(config):
 62 |     return pretrainedmodels.__dict__['polynet'](num_classes=1000, pretrained='imagenet')
 63 | 
 64 | def dpn92(config):
 65 |     return pretrainedmodels.__dict__['dpn92'](num_classes=1000, pretrained='imagenet+5k')
 66 | 
 67 | def dpn68b(config):
 68 |     return pretrainedmodels.__dict__['dpn68b'](num_classes=1000, pretrained='imagenet+5k')
 69 | 
 70 | def nasnetamobile(config):
 71 |     return pretrainedmodels.__dict__['nasnetamobile'](num_classes=1000, pretrained='imagenet')
 72 | 
 73 | def resnext101_32_8_wsl(config):
 74 |     return torch.hub.load('facebookresearch/WSL-Images', 'resnext101_32x8d_wsl')
 75 | 
 76 | def resnext101_32_16_wsl(config):
 77 |     return torch.hub.load('facebookresearch/WSL-Images', 'resnext101_32x16d_wsl')
 78 | 
 79 | def resnext101_32_32_wsl(config):
 80 |     return torch.hub.load('facebookresearch/WSL-Images', 'resnext101_32x32d_wsl')
 81 | 
 82 | def resnext101_32_48_wsl(config):
 83 |     return torch.hub.load('facebookresearch/WSL-Images', 'resnext101_32x48d_wsl')
 84 | 
 85 | def efficientnet_b0(config):
 86 |     return EfficientNet.from_pretrained('efficientnet-b0',num_classes=config['numClasses'])
 87 | 
 88 | def efficientnet_b1(config):
 89 |     return EfficientNet.from_pretrained('efficientnet-b1',num_classes=config['numClasses'])
 90 | 
 91 | def efficientnet_b2(config):
 92 |     return EfficientNet.from_pretrained('efficientnet-b2',num_classes=config['numClasses'])
 93 | 
 94 | def efficientnet_b3(config):
 95 |     return EfficientNet.from_pretrained('efficientnet-b3',num_classes=config['numClasses'])
 96 | 
 97 | def efficientnet_b4(config):
 98 |     return EfficientNet.from_pretrained('efficientnet-b4',num_classes=config['numClasses'])
 99 | 
100 | def efficientnet_b5(config):
101 |     return EfficientNet.from_pretrained('efficientnet-b5',num_classes=config['numClasses'])       
102 | 
103 | def efficientnet_b6(config):
104 |     return EfficientNet.from_pretrained('efficientnet-b6',num_classes=config['numClasses'])   
105 | 
106 | def efficientnet_b7(config):
107 |     return EfficientNet.from_pretrained('efficientnet-b7',num_classes=config['numClasses'])  
108 | 
109 | def modify_meta(mdlParams,model):
110 |     # Define FC layers
111 |     if len(mdlParams['fc_layers_before']) > 1:
112 |         model.meta_before = nn.Sequential(nn.Linear(mdlParams['meta_array'].shape[1],mdlParams['fc_layers_before'][0]),
113 |                                     nn.BatchNorm1d(mdlParams['fc_layers_before'][0]),
114 |                                     nn.ReLU(),
115 |                                     nn.Dropout(p=mdlParams['dropout_meta']),
116 |                                     nn.Linear(mdlParams['fc_layers_before'][0],mdlParams['fc_layers_before'][1]),
117 |                                     nn.BatchNorm1d(mdlParams['fc_layers_before'][1]),
118 |                                     nn.ReLU(),
119 |                                     nn.Dropout(p=mdlParams['dropout_meta']))
120 |     else:
121 |         model.meta_before = nn.Sequential(nn.Linear(mdlParams['meta_array'].shape[1],mdlParams['fc_layers_before'][0]),
122 |                                     nn.BatchNorm1d(mdlParams['fc_layers_before'][0]),
123 |                                     nn.ReLU(),
124 |                                     nn.Dropout(p=mdlParams['dropout_meta']))
125 |     # Define fc layers after
126 |     if len(mdlParams['fc_layers_after']) > 0:
127 |         if 'efficient' in mdlParams['model_type']:
128 |             num_cnn_features = model._fc.in_features 
129 |         elif 'wsl' in mdlParams['model_type']:
130 |             num_cnn_features = model.fc.in_features  
131 |         else:
132 |             num_cnn_features = model.last_linear.in_features     
133 |         model.meta_after = nn.Sequential(nn.Linear(mdlParams['fc_layers_before'][-1]+num_cnn_features,mdlParams['fc_layers_after'][0]),
134 |                                     nn.BatchNorm1d(mdlParams['fc_layers_after'][0]),
135 |                                     nn.ReLU())
136 |         classifier_in_features = mdlParams['fc_layers_after'][0] 
137 |     else:
138 |         model.meta_after = None
139 |         classifier_in_features = mdlParams['fc_layers_before'][-1]+model._fc.in_features
140 |     # Modify classifier
141 |     if 'efficient' in mdlParams['model_type']:
142 |         model._fc = nn.Linear(classifier_in_features, mdlParams['numClasses'])
143 |     elif 'wsl' in mdlParams['model_type']:
144 |         model.fc = nn.Linear(classifier_in_features, mdlParams['numClasses']) 
145 |     else:
146 |         model.last_linear = nn.Linear(classifier_in_features, mdlParams['numClasses'])       
147 |     # Modify forward pass
148 |     def new_forward(self, inputs):
149 |         x, meta_data = inputs
150 |         # Normal CNN features
151 |         if 'efficient' in mdlParams['model_type']:
152 |             # Convolution layers
153 |             cnn_features = self.extract_features(x)
154 |             # Pooling and final linear layer
155 |             cnn_features = F.adaptive_avg_pool2d(cnn_features, 1).squeeze(-1).squeeze(-1)
156 |             if self._dropout:
157 |                 cnn_features = F.dropout(cnn_features, p=self._dropout, training=self.training)
158 |         elif 'wsl' in mdlParams['model_type']:
159 |             cnn_features = self.conv1(x)
160 |             cnn_features = self.bn1(cnn_features)
161 |             cnn_features = self.relu(cnn_features)
162 |             cnn_features = self.maxpool(cnn_features)
163 | 
164 |             cnn_features = self.layer1(cnn_features)
165 |             cnn_features = self.layer2(cnn_features)
166 |             cnn_features = self.layer3(cnn_features)
167 |             cnn_features = self.layer4(cnn_features)
168 | 
169 |             cnn_features = self.avgpool(cnn_features)
170 |             cnn_features = torch.flatten(cnn_features, 1) 
171 |         else:
172 |             cnn_features = self.layer0(x)
173 |             cnn_features = self.layer1(cnn_features)
174 |             cnn_features = self.layer2(cnn_features)
175 |             cnn_features = self.layer3(cnn_features)
176 |             cnn_features = self.layer4(cnn_features)   
177 |             cnn_features = self.avg_pool(cnn_features)
178 |             if self.dropout is not None:
179 |                 cnn_features = self.dropout(cnn_features)
180 |             cnn_features = cnn_features.view(cnn_features.size(0), -1)                                
181 |         # Meta part
182 |         #print(meta_data.shape,meta_data)
183 |         meta_features = self.meta_before(meta_data)
184 | 
185 |         # Cat
186 |         features = torch.cat((cnn_features,meta_features),dim=1)
187 |         #print("features cat",features.shape)
188 |         if self.meta_after is not None:
189 |             features = self.meta_after(features)
190 |         # Classifier
191 |         if 'efficient' in mdlParams['model_type']:
192 |             output = self._fc(features)
193 |         elif 'wsl' in mdlParams['model_type']:
194 |             output = self.fc(features)
195 |         else:
196 |             output = self.last_linear(features)
197 |         return output
198 |     model.forward  = types.MethodType(new_forward, model)
199 |     return model                                                                                                                       
200 | 
201 | model_map = OrderedDict([('Dense121',  Dense121),
202 |                         ('Dense169' , Dense161),
203 |                         ('Dense161' , Dense169),
204 |                         ('Dense201' , Dense201),
205 |                         ('Resnet50' , Resnet50),
206 |                         ('Resnet101' , Resnet101),   
207 |                         ('InceptionV3', InceptionV3),# models.inception_v3(pretrained=True),
208 |                         ('se_resnext50', se_resnext50),
209 |                         ('se_resnext101', se_resnext101),
210 |                         ('se_resnet50', se_resnet50),
211 |                         ('se_resnet101', se_resnet101),
212 |                         ('se_resnet152', se_resnet152),
213 |                         ('resnext101', resnext101),
214 |                         ('resnext101_64', resnext101_64),
215 |                         ('senet154', senet154),
216 |                         ('polynet', polynet),
217 |                         ('dpn92', dpn92),
218 |                         ('dpn68b', dpn68b),
219 |                         ('nasnetamobile', nasnetamobile),
220 |                         ('resnext101_32_8_wsl', resnext101_32_8_wsl),
221 |                         ('resnext101_32_16_wsl', resnext101_32_16_wsl),
222 |                         ('resnext101_32_32_wsl', resnext101_32_32_wsl),
223 |                         ('resnext101_32_48_wsl', resnext101_32_48_wsl),
224 |                         ('efficientnet-b0', efficientnet_b0), 
225 |                         ('efficientnet-b1', efficientnet_b1), 
226 |                         ('efficientnet-b2', efficientnet_b2), 
227 |                         ('efficientnet-b3', efficientnet_b3),  
228 |                         ('efficientnet-b4', efficientnet_b4), 
229 |                         ('efficientnet-b5', efficientnet_b5),  
230 |                         ('efficientnet-b6', efficientnet_b6), 
231 |                         ('efficientnet-b7', efficientnet_b7),
232 |                     ])
233 | 
234 | def getModel(config):
235 |   """Returns a function for a model
236 |   Args:
237 |     config: dictionary, contains configuration
238 |   Returns:
239 |     model: A class that builds the desired model
240 |   Raises:
241 |     ValueError: If model name is not recognized.
242 |   """
243 |   if config['model_type'] in model_map:
244 |     func = model_map[config['model_type'] ]
245 |     @functools.wraps(func)
246 |     def model():
247 |         return func(config)
248 |   else:
249 |       raise ValueError('Name of model unknown %s' % config['model_name'] )
250 |   return model


--------------------------------------------------------------------------------
/cfgs/2019/test_effb0_ss.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import h5py
  4 | import re
  5 | import csv
  6 | import numpy as np
  7 | from glob import glob
  8 | import scipy
  9 | import pickle
 10 | import imagesize
 11 | 
 12 | def init(mdlParams_):
 13 |     mdlParams = {}
 14 |     # Save summaries and model here
 15 |     mdlParams['saveDir'] = mdlParams_['pathBase']+'/data/isic/'
 16 |     # Data is loaded from here
 17 |     mdlParams['dataDir'] = mdlParams_['pathBase']+'/data/isic/2019'
 18 | 
 19 |     ### Model Selection ###
 20 |     mdlParams['model_type'] = 'efficientnet-b0'
 21 |     mdlParams['dataset_names'] = ['official']#,'sevenpoint_rez3_ll']
 22 |     mdlParams['file_ending'] = '.png'
 23 |     mdlParams['exclude_inds'] = False
 24 |     mdlParams['same_sized_crops'] = True
 25 |     mdlParams['multiCropEval'] = 9
 26 |     mdlParams['var_im_size'] = True
 27 |     mdlParams['orderedCrop'] = True
 28 |     mdlParams['voting_scheme'] = 'average'    
 29 |     mdlParams['classification'] = True
 30 |     mdlParams['balance_classes'] = 9
 31 |     mdlParams['extra_fac'] = 1.0
 32 |     mdlParams['numClasses'] = 9
 33 |     mdlParams['no_c9_eval'] = True
 34 |     mdlParams['numOut'] = mdlParams['numClasses']
 35 |     mdlParams['numCV'] = 5
 36 |     mdlParams['trans_norm_first'] = True
 37 |     # Scale up for b1-b7
 38 |     mdlParams['input_size'] = [224,224,3]     
 39 | 
 40 |     ### Training Parameters ###
 41 |     # Batch size
 42 |     mdlParams['batchSize'] = 20#*len(mdlParams['numGPUs'])
 43 |     # Initial learning rate
 44 |     mdlParams['learning_rate'] = 0.000015#*len(mdlParams['numGPUs'])
 45 |     # Lower learning rate after no improvement over 100 epochs
 46 |     mdlParams['lowerLRAfter'] = 25
 47 |     # If there is no validation set, start lowering the LR after X steps
 48 |     mdlParams['lowerLRat'] = 50
 49 |     # Divide learning rate by this value
 50 |     mdlParams['LRstep'] = 5
 51 |     # Maximum number of training iterations
 52 |     mdlParams['training_steps'] = 60 #250
 53 |     # Display error every X steps
 54 |     mdlParams['display_step'] = 10
 55 |     # Scale?
 56 |     mdlParams['scale_targets'] = False
 57 |     # Peak at test error during training? (generally, dont do this!)
 58 |     mdlParams['peak_at_testerr'] = False
 59 |     # Print trainerr
 60 |     mdlParams['print_trainerr'] = False
 61 |     # Subtract trainset mean?
 62 |     mdlParams['subtract_set_mean'] = False
 63 |     mdlParams['setMean'] = np.array([0.0, 0.0, 0.0])   
 64 |     mdlParams['setStd'] = np.array([1.0, 1.0, 1.0])   
 65 | 
 66 |     # Data AUG
 67 |     #mdlParams['full_color_distort'] = True
 68 |     mdlParams['autoaugment'] = False     
 69 |     mdlParams['flip_lr_ud'] = True
 70 |     mdlParams['full_rot'] = 180
 71 |     mdlParams['scale'] = (0.8,1.2)
 72 |     mdlParams['shear'] = 10
 73 |     mdlParams['cutout'] = 16
 74 | 
 75 |     ### Data ###
 76 |     mdlParams['preload'] = False
 77 |     # Labels first
 78 |     # Targets, as dictionary, indexed by im file name
 79 |     mdlParams['labels_dict'] = {}
 80 |     path1 = mdlParams['dataDir'] + '/labels/'
 81 |      # All sets
 82 |     allSets = glob(path1 + '*/')   
 83 |     # Go through all sets
 84 |     for i in range(len(allSets)):
 85 |         # Check if want to include this dataset
 86 |         foundSet = False
 87 |         for j in range(len(mdlParams['dataset_names'])):
 88 |             if mdlParams['dataset_names'][j] in allSets[i]:
 89 |                 foundSet = True
 90 |         if not foundSet:
 91 |             continue                
 92 |         # Find csv file
 93 |         files = sorted(glob(allSets[i]+'*'))
 94 |         for j in range(len(files)):
 95 |             if 'csv' in files[j]:
 96 |                 break
 97 |         # Load csv file
 98 |         with open(files[j], newline='') as csvfile:
 99 |             labels_str = csv.reader(csvfile, delimiter=',', quotechar='|')
100 |             for row in labels_str:
101 |                 if 'image' == row[0]:
102 |                     continue
103 |                 #if 'ISIC' in row[0] and '_downsampled' in row[0]:
104 |                 #    print(row[0])
105 |                 if row[0] + '_downsampled' in mdlParams['labels_dict']:
106 |                     print("removed",row[0] + '_downsampled')
107 |                     continue
108 |                 if mdlParams['numClasses'] == 7:
109 |                     mdlParams['labels_dict'][row[0]] = np.array([int(float(row[1])),int(float(row[2])),int(float(row[3])),int(float(row[4])),int(float(row[5])),int(float(row[6])),int(float(row[7]))])
110 |                 elif mdlParams['numClasses'] == 8:
111 |                     if len(row) < 9 or row[8] == '':
112 |                         class_8 = 0
113 |                     else:
114 |                         class_8 = int(float(row[8]))
115 |                     mdlParams['labels_dict'][row[0]] = np.array([int(float(row[1])),int(float(row[2])),int(float(row[3])),int(float(row[4])),int(float(row[5])),int(float(row[6])),int(float(row[7])),class_8])
116 |                 elif mdlParams['numClasses'] == 9:
117 |                     if len(row) < 9 or row[8] == '':
118 |                         class_8 = 0
119 |                     else:
120 |                         class_8 = int(float(row[8]))  
121 |                     if len(row) < 10 or row[9] == '':
122 |                         class_9 = 0
123 |                     else:
124 |                         class_9 = int(float(row[9]))                                           
125 |                     mdlParams['labels_dict'][row[0]] = np.array([int(float(row[1])),int(float(row[2])),int(float(row[3])),int(float(row[4])),int(float(row[5])),int(float(row[6])),int(float(row[7])),class_8,class_9])
126 |     # Save all im paths here
127 |     mdlParams['im_paths'] = []
128 |     mdlParams['labels_list'] = []
129 |     # Define the sets
130 |     path1 = mdlParams['dataDir'] + '/images/'
131 |     # All sets
132 |     allSets = sorted(glob(path1 + '*/'))
133 |     # Ids which name the folders
134 |     # Make official first dataset
135 |     for i in range(len(allSets)):
136 |         if mdlParams['dataset_names'][0] in allSets[i]:
137 |             temp = allSets[i]
138 |             allSets.remove(allSets[i])
139 |             allSets.insert(0, temp)
140 |     print(allSets)        
141 |     # Set of keys, for marking old HAM10000
142 |     mdlParams['key_list'] = []
143 |     if mdlParams['exclude_inds']:
144 |         with open(mdlParams['saveDir'] + 'indices_exclude.pkl','rb') as f:
145 |             indices_exclude = pickle.load(f)          
146 |         exclude_list = []    
147 |     for i in range(len(allSets)):
148 |         # All files in that set
149 |         files = sorted(glob(allSets[i]+'*'))
150 |         # Check if there is something in there, if not, discard
151 |         if len(files) == 0:
152 |             continue
153 |         # Check if want to include this dataset
154 |         foundSet = False
155 |         for j in range(len(mdlParams['dataset_names'])):
156 |             if mdlParams['dataset_names'][j] in allSets[i]:
157 |                 foundSet = True
158 |         if not foundSet:
159 |             continue                    
160 |         for j in range(len(files)):
161 |             if '.jpg' in files[j] or '.jpeg' in files[j] or '.JPG' in files[j] or '.JPEG' in files[j] or '.png' in files[j] or '.PNG' in files[j]:                
162 |                 # Add according label, find it first
163 |                 found_already = False
164 |                 for key in mdlParams['labels_dict']:
165 |                     if key + mdlParams['file_ending'] in files[j]:
166 |                         if found_already:
167 |                             print("Found already:",key,files[j])                     
168 |                         mdlParams['key_list'].append(key)
169 |                         mdlParams['labels_list'].append(mdlParams['labels_dict'][key])
170 |                         found_already = True
171 |                 if found_already:
172 |                     mdlParams['im_paths'].append(files[j])     
173 |                     if mdlParams['exclude_inds']:
174 |                         for key in indices_exclude:
175 |                             if key in files[j]:
176 |                                 exclude_list.append(indices_exclude[key])                                       
177 |     # Convert label list to array
178 |     mdlParams['labels_array'] = np.array(mdlParams['labels_list'])
179 |     print(np.mean(mdlParams['labels_array'],axis=0))        
180 |     # Create indices list with HAM10000 only
181 |     mdlParams['HAM10000_inds'] = []
182 |     HAM_START = 24306
183 |     HAM_END = 34320
184 |     for j in range(len(mdlParams['key_list'])):
185 |         try:
186 |             curr_id = [int(s) for s in re.findall(r'\d+',mdlParams['key_list'][j])][-1]
187 |         except:
188 |             continue
189 |         if curr_id >= HAM_START and curr_id <= HAM_END:
190 |             mdlParams['HAM10000_inds'].append(j)
191 |     mdlParams['HAM10000_inds'] = np.array(mdlParams['HAM10000_inds'])    
192 |     print("Len ham",len(mdlParams['HAM10000_inds']))   
193 |     # Perhaps preload images
194 |     if mdlParams['preload']:
195 |         mdlParams['images_array'] = np.zeros([len(mdlParams['im_paths']),mdlParams['input_size_load'][0],mdlParams['input_size_load'][1],mdlParams['input_size_load'][2]],dtype=np.uint8)
196 |         for i in range(len(mdlParams['im_paths'])):
197 |             x = scipy.ndimage.imread(mdlParams['im_paths'][i])
198 |             #x = x.astype(np.float32)   
199 |             # Scale to 0-1 
200 |             #min_x = np.min(x)
201 |             #max_x = np.max(x)
202 |             #x = (x-min_x)/(max_x-min_x)
203 |             mdlParams['images_array'][i,:,:,:] = x
204 |             if i%1000 == 0:
205 |                 print(i+1,"images loaded...")     
206 |     if mdlParams['subtract_set_mean']:
207 |         mdlParams['images_means'] = np.zeros([len(mdlParams['im_paths']),3])
208 |         for i in range(len(mdlParams['im_paths'])):
209 |             x = scipy.ndimage.imread(mdlParams['im_paths'][i])
210 |             x = x.astype(np.float32)   
211 |             # Scale to 0-1 
212 |             min_x = np.min(x)
213 |             max_x = np.max(x)
214 |             x = (x-min_x)/(max_x-min_x)
215 |             mdlParams['images_means'][i,:] = np.mean(x,(0,1))
216 |             if i%1000 == 0:
217 |                 print(i+1,"images processed for mean...")         
218 | 
219 |     ### Define Indices ###
220 |     with open(mdlParams['saveDir'] + 'indices_isic2019.pkl','rb') as f:
221 |         indices = pickle.load(f)            
222 |     mdlParams['trainIndCV'] = indices['trainIndCV']
223 |     mdlParams['valIndCV'] = indices['valIndCV']
224 |     if mdlParams['exclude_inds']:
225 |         exclude_list = np.array(exclude_list)
226 |         all_inds = np.arange(len(mdlParams['im_paths']))
227 |         exclude_inds = all_inds[exclude_list.astype(bool)]
228 |         for i in range(len(mdlParams['trainIndCV'])):
229 |             mdlParams['trainIndCV'][i] = np.setdiff1d(mdlParams['trainIndCV'][i],exclude_inds)
230 |         for i in range(len(mdlParams['valIndCV'])):
231 |             mdlParams['valIndCV'][i] = np.setdiff1d(mdlParams['valIndCV'][i],exclude_inds)     
232 |     # Consider case with more than one set
233 |     if len(mdlParams['dataset_names']) > 1:
234 |         restInds = np.array(np.arange(25331,mdlParams['labels_array'].shape[0]))
235 |         for i in range(mdlParams['numCV']):
236 |             mdlParams['trainIndCV'][i] = np.concatenate((mdlParams['trainIndCV'][i],restInds))        
237 |     print("Train")
238 |     for i in range(len(mdlParams['trainIndCV'])):
239 |         print(mdlParams['trainIndCV'][i].shape)
240 |     print("Val")
241 |     for i in range(len(mdlParams['valIndCV'])):
242 |         print(mdlParams['valIndCV'][i].shape)    
243 | 
244 |     # Use this for ordered multi crops
245 |     if mdlParams['orderedCrop']:
246 |         # Crop positions, always choose multiCropEval to be 4, 9, 16, 25, etc.
247 |         mdlParams['cropPositions'] = np.zeros([len(mdlParams['im_paths']),mdlParams['multiCropEval'],2],dtype=np.int64)
248 |         #mdlParams['imSizes'] = np.zeros([len(mdlParams['im_paths']),mdlParams['multiCropEval'],2],dtype=np.int64)
249 |         for u in range(len(mdlParams['im_paths'])):
250 |             height, width = imagesize.get(mdlParams['im_paths'][u])
251 |             if width < mdlParams['input_size'][0]:
252 |                 height = int(mdlParams['input_size'][0]/float(width))*height
253 |                 width = mdlParams['input_size'][0]
254 |             if height < mdlParams['input_size'][0]:
255 |                 width = int(mdlParams['input_size'][0]/float(height))*width
256 |                 height = mdlParams['input_size'][0]            
257 |             ind = 0
258 |             for i in range(np.int32(np.sqrt(mdlParams['multiCropEval']))):
259 |                 for j in range(np.int32(np.sqrt(mdlParams['multiCropEval']))):
260 |                     mdlParams['cropPositions'][u,ind,0] = mdlParams['input_size'][0]/2+i*((width-mdlParams['input_size'][1])/(np.sqrt(mdlParams['multiCropEval'])-1))
261 |                     mdlParams['cropPositions'][u,ind,1] = mdlParams['input_size'][1]/2+j*((height-mdlParams['input_size'][0])/(np.sqrt(mdlParams['multiCropEval'])-1))
262 |                     #mdlParams['imSizes'][u,ind,0] = curr_im_size[0]
263 | 
264 |                     ind += 1
265 |         # Sanity checks
266 |         #print("Positions",mdlParams['cropPositions'])
267 |         # Test image sizes
268 |         height = mdlParams['input_size'][0]
269 |         width = mdlParams['input_size'][1]
270 |         for u in range(len(mdlParams['im_paths'])):
271 |             height_test, width_test = imagesize.get(mdlParams['im_paths'][u])
272 |             if width_test < mdlParams['input_size'][0]:
273 |                 height_test = int(mdlParams['input_size'][0]/float(width_test))*height_test
274 |                 width_test = mdlParams['input_size'][0]
275 |             if height_test < mdlParams['input_size'][0]:
276 |                 width_test = int(mdlParams['input_size'][0]/float(height_test))*width_test
277 |                 height_test = mdlParams['input_size'][0]                
278 |             test_im = np.zeros([width_test,height_test]) 
279 |             for i in range(mdlParams['multiCropEval']):
280 |                 im_crop = test_im[np.int32(mdlParams['cropPositions'][u,i,0]-height/2):np.int32(mdlParams['cropPositions'][u,i,0]-height/2)+height,np.int32(mdlParams['cropPositions'][u,i,1]-width/2):np.int32(mdlParams['cropPositions'][u,i,1]-width/2)+width]
281 |                 if im_crop.shape[0] != mdlParams['input_size'][0]:
282 |                     print("Wrong shape",im_crop.shape[0],mdlParams['im_paths'][u])    
283 |                 if im_crop.shape[1] != mdlParams['input_size'][1]:
284 |                     print("Wrong shape",im_crop.shape[1],mdlParams['im_paths'][u])        
285 |     return mdlParams


--------------------------------------------------------------------------------
/cfgs/2019/test_effb0_rr.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import h5py
  4 | import re
  5 | import csv
  6 | import numpy as np
  7 | from glob import glob
  8 | import scipy
  9 | import pickle
 10 | import imagesize
 11 | 
 12 | def init(mdlParams_):
 13 |     mdlParams = {}
 14 |     # Save summaries and model here
 15 |     mdlParams['saveDir'] = mdlParams_['pathBase']+'/data/isic/'
 16 |     # Data is loaded from here
 17 |     mdlParams['dataDir'] = mdlParams_['pathBase']+'/data/isic/2019'
 18 | 
 19 |     ### Model Selection ###
 20 |     mdlParams['model_type'] = 'efficientnet-b0'
 21 |     mdlParams['dataset_names'] = ['official']#,'sevenpoint_rez3_ll']
 22 |     mdlParams['file_ending'] = '.png'
 23 |     mdlParams['exclude_inds'] = False
 24 |     mdlParams['same_sized_crops'] = False
 25 |     mdlParams['multiCropEval'] = 9
 26 |     mdlParams['var_im_size'] = False
 27 |     mdlParams['orderedCrop'] = False
 28 |     mdlParams['voting_scheme'] = 'average'    
 29 |     mdlParams['classification'] = True
 30 |     mdlParams['balance_classes'] = 9
 31 |     mdlParams['extra_fac'] = 1.0
 32 |     mdlParams['numClasses'] = 9
 33 |     mdlParams['no_c9_eval'] = True
 34 |     mdlParams['numOut'] = mdlParams['numClasses']
 35 |     mdlParams['numCV'] = 5
 36 |     mdlParams['trans_norm_first'] = True
 37 |     # Deterministic cropping
 38 |     mdlParams['deterministic_eval'] = True
 39 |     mdlParams['numCropPositions'] = 1
 40 |     num_scales = 4
 41 |     all_scales = [1.0,0.5,0.75,0.25,0.9,0.6,0.4]
 42 |     mdlParams['cropScales'] = all_scales[:num_scales]
 43 |     mdlParams['cropFlipping'] = 4
 44 |     mdlParams['multiCropEval'] = mdlParams['numCropPositions']*len(mdlParams['cropScales'])*mdlParams['cropFlipping']
 45 |     mdlParams['offset_crop'] = 0.2    
 46 |     # Scale up for b1-b7
 47 |     mdlParams['input_size'] = [224,224,3]     
 48 | 
 49 |     ### Training Parameters ###
 50 |     # Batch size
 51 |     mdlParams['batchSize'] = 20#*len(mdlParams['numGPUs'])
 52 |     # Initial learning rate
 53 |     mdlParams['learning_rate'] = 0.000015#*len(mdlParams['numGPUs'])
 54 |     # Lower learning rate after no improvement over 100 epochs
 55 |     mdlParams['lowerLRAfter'] = 25
 56 |     # If there is no validation set, start lowering the LR after X steps
 57 |     mdlParams['lowerLRat'] = 50
 58 |     # Divide learning rate by this value
 59 |     mdlParams['LRstep'] = 5
 60 |     # Maximum number of training iterations
 61 |     mdlParams['training_steps'] = 60 #250
 62 |     # Display error every X steps
 63 |     mdlParams['display_step'] = 10
 64 |     # Scale?
 65 |     mdlParams['scale_targets'] = False
 66 |     # Peak at test error during training? (generally, dont do this!)
 67 |     mdlParams['peak_at_testerr'] = False
 68 |     # Print trainerr
 69 |     mdlParams['print_trainerr'] = False
 70 |     # Subtract trainset mean?
 71 |     mdlParams['subtract_set_mean'] = False
 72 |     mdlParams['setMean'] = np.array([0.0, 0.0, 0.0])   
 73 |     mdlParams['setStd'] = np.array([1.0, 1.0, 1.0])   
 74 | 
 75 |     # Data AUG
 76 |     #mdlParams['full_color_distort'] = True
 77 |     mdlParams['autoaugment'] = False     
 78 |     mdlParams['flip_lr_ud'] = True
 79 |     mdlParams['full_rot'] = 180
 80 |     mdlParams['scale'] = (0.8,1.2)
 81 |     mdlParams['shear'] = 10
 82 |     mdlParams['cutout'] = 16
 83 | 
 84 |     ### Data ###
 85 |     mdlParams['preload'] = False
 86 |     # Labels first
 87 |     # Targets, as dictionary, indexed by im file name
 88 |     mdlParams['labels_dict'] = {}
 89 |     path1 = mdlParams['dataDir'] + '/labels/'
 90 |      # All sets
 91 |     allSets = glob(path1 + '*/')   
 92 |     # Go through all sets
 93 |     for i in range(len(allSets)):
 94 |         # Check if want to include this dataset
 95 |         foundSet = False
 96 |         for j in range(len(mdlParams['dataset_names'])):
 97 |             if mdlParams['dataset_names'][j] in allSets[i]:
 98 |                 foundSet = True
 99 |         if not foundSet:
100 |             continue                
101 |         # Find csv file
102 |         files = sorted(glob(allSets[i]+'*'))
103 |         for j in range(len(files)):
104 |             if 'csv' in files[j]:
105 |                 break
106 |         # Load csv file
107 |         with open(files[j], newline='') as csvfile:
108 |             labels_str = csv.reader(csvfile, delimiter=',', quotechar='|')
109 |             for row in labels_str:
110 |                 if 'image' == row[0]:
111 |                     continue
112 |                 #if 'ISIC' in row[0] and '_downsampled' in row[0]:
113 |                 #    print(row[0])
114 |                 if row[0] + '_downsampled' in mdlParams['labels_dict']:
115 |                     print("removed",row[0] + '_downsampled')
116 |                     continue
117 |                 if mdlParams['numClasses'] == 7:
118 |                     mdlParams['labels_dict'][row[0]] = np.array([int(float(row[1])),int(float(row[2])),int(float(row[3])),int(float(row[4])),int(float(row[5])),int(float(row[6])),int(float(row[7]))])
119 |                 elif mdlParams['numClasses'] == 8:
120 |                     if len(row) < 9 or row[8] == '':
121 |                         class_8 = 0
122 |                     else:
123 |                         class_8 = int(float(row[8]))
124 |                     mdlParams['labels_dict'][row[0]] = np.array([int(float(row[1])),int(float(row[2])),int(float(row[3])),int(float(row[4])),int(float(row[5])),int(float(row[6])),int(float(row[7])),class_8])
125 |                 elif mdlParams['numClasses'] == 9:
126 |                     if len(row) < 9 or row[8] == '':
127 |                         class_8 = 0
128 |                     else:
129 |                         class_8 = int(float(row[8]))  
130 |                     if len(row) < 10 or row[9] == '':
131 |                         class_9 = 0
132 |                     else:
133 |                         class_9 = int(float(row[9]))                                           
134 |                     mdlParams['labels_dict'][row[0]] = np.array([int(float(row[1])),int(float(row[2])),int(float(row[3])),int(float(row[4])),int(float(row[5])),int(float(row[6])),int(float(row[7])),class_8,class_9])
135 |     # Save all im paths here
136 |     mdlParams['im_paths'] = []
137 |     mdlParams['labels_list'] = []
138 |     # Define the sets
139 |     path1 = mdlParams['dataDir'] + '/images/'
140 |     # All sets
141 |     allSets = sorted(glob(path1 + '*/'))
142 |     # Ids which name the folders
143 |     # Make official first dataset
144 |     for i in range(len(allSets)):
145 |         if mdlParams['dataset_names'][0] in allSets[i]:
146 |             temp = allSets[i]
147 |             allSets.remove(allSets[i])
148 |             allSets.insert(0, temp)
149 |     print(allSets)        
150 |     # Set of keys, for marking old HAM10000
151 |     mdlParams['key_list'] = []
152 |     if mdlParams['exclude_inds']:
153 |         with open(mdlParams['saveDir'] + 'indices_exclude.pkl','rb') as f:
154 |             indices_exclude = pickle.load(f)          
155 |         exclude_list = []    
156 |     for i in range(len(allSets)):
157 |         # All files in that set
158 |         files = sorted(glob(allSets[i]+'*'))
159 |         # Check if there is something in there, if not, discard
160 |         if len(files) == 0:
161 |             continue
162 |         # Check if want to include this dataset
163 |         foundSet = False
164 |         for j in range(len(mdlParams['dataset_names'])):
165 |             if mdlParams['dataset_names'][j] in allSets[i]:
166 |                 foundSet = True
167 |         if not foundSet:
168 |             continue                    
169 |         for j in range(len(files)):
170 |             if '.jpg' in files[j] or '.jpeg' in files[j] or '.JPG' in files[j] or '.JPEG' in files[j] or '.png' in files[j] or '.PNG' in files[j]:                
171 |                 # Add according label, find it first
172 |                 found_already = False
173 |                 for key in mdlParams['labels_dict']:
174 |                     if key + mdlParams['file_ending'] in files[j]:
175 |                         if found_already:
176 |                             print("Found already:",key,files[j])                     
177 |                         mdlParams['key_list'].append(key)
178 |                         mdlParams['labels_list'].append(mdlParams['labels_dict'][key])
179 |                         found_already = True
180 |                 if found_already:
181 |                     mdlParams['im_paths'].append(files[j])     
182 |                     if mdlParams['exclude_inds']:
183 |                         for key in indices_exclude:
184 |                             if key in files[j]:
185 |                                 exclude_list.append(indices_exclude[key])                                       
186 |     # Convert label list to array
187 |     mdlParams['labels_array'] = np.array(mdlParams['labels_list'])
188 |     print(np.mean(mdlParams['labels_array'],axis=0))        
189 |     # Create indices list with HAM10000 only
190 |     mdlParams['HAM10000_inds'] = []
191 |     HAM_START = 24306
192 |     HAM_END = 34320
193 |     for j in range(len(mdlParams['key_list'])):
194 |         try:
195 |             curr_id = [int(s) for s in re.findall(r'\d+',mdlParams['key_list'][j])][-1]
196 |         except:
197 |             continue
198 |         if curr_id >= HAM_START and curr_id <= HAM_END:
199 |             mdlParams['HAM10000_inds'].append(j)
200 |     mdlParams['HAM10000_inds'] = np.array(mdlParams['HAM10000_inds'])    
201 |     print("Len ham",len(mdlParams['HAM10000_inds']))   
202 |     # Perhaps preload images
203 |     if mdlParams['preload']:
204 |         mdlParams['images_array'] = np.zeros([len(mdlParams['im_paths']),mdlParams['input_size_load'][0],mdlParams['input_size_load'][1],mdlParams['input_size_load'][2]],dtype=np.uint8)
205 |         for i in range(len(mdlParams['im_paths'])):
206 |             x = scipy.ndimage.imread(mdlParams['im_paths'][i])
207 |             #x = x.astype(np.float32)   
208 |             # Scale to 0-1 
209 |             #min_x = np.min(x)
210 |             #max_x = np.max(x)
211 |             #x = (x-min_x)/(max_x-min_x)
212 |             mdlParams['images_array'][i,:,:,:] = x
213 |             if i%1000 == 0:
214 |                 print(i+1,"images loaded...")     
215 |     if mdlParams['subtract_set_mean']:
216 |         mdlParams['images_means'] = np.zeros([len(mdlParams['im_paths']),3])
217 |         for i in range(len(mdlParams['im_paths'])):
218 |             x = scipy.ndimage.imread(mdlParams['im_paths'][i])
219 |             x = x.astype(np.float32)   
220 |             # Scale to 0-1 
221 |             min_x = np.min(x)
222 |             max_x = np.max(x)
223 |             x = (x-min_x)/(max_x-min_x)
224 |             mdlParams['images_means'][i,:] = np.mean(x,(0,1))
225 |             if i%1000 == 0:
226 |                 print(i+1,"images processed for mean...")         
227 | 
228 |     ### Define Indices ###
229 |     # Just divide into 5 equally large sets
230 |     with open(mdlParams['saveDir'] + 'indices_isic2019.pkl','rb') as f:
231 |         indices = pickle.load(f)           
232 |     mdlParams['trainIndCV'] = indices['trainIndCV']
233 |     mdlParams['valIndCV'] = indices['valIndCV']
234 |     if mdlParams['exclude_inds']:
235 |         exclude_list = np.array(exclude_list)
236 |         all_inds = np.arange(len(mdlParams['im_paths']))
237 |         exclude_inds = all_inds[exclude_list.astype(bool)]
238 |         for i in range(len(mdlParams['trainIndCV'])):
239 |             mdlParams['trainIndCV'][i] = np.setdiff1d(mdlParams['trainIndCV'][i],exclude_inds)
240 |         for i in range(len(mdlParams['valIndCV'])):
241 |             mdlParams['valIndCV'][i] = np.setdiff1d(mdlParams['valIndCV'][i],exclude_inds)     
242 |     # Consider case with more than one set
243 |     if len(mdlParams['dataset_names']) > 1:
244 |         restInds = np.array(np.arange(25331,mdlParams['labels_array'].shape[0]))
245 |         for i in range(mdlParams['numCV']):
246 |             mdlParams['trainIndCV'][i] = np.concatenate((mdlParams['trainIndCV'][i],restInds))        
247 |     print("Train")
248 |     for i in range(len(mdlParams['trainIndCV'])):
249 |         print(mdlParams['trainIndCV'][i].shape)
250 |     print("Val")
251 |     for i in range(len(mdlParams['valIndCV'])):
252 |         print(mdlParams['valIndCV'][i].shape)    
253 | 
254 |     # Use this for ordered multi crops
255 |     if mdlParams['orderedCrop']:
256 |         # Crop positions, always choose multiCropEval to be 4, 9, 16, 25, etc.
257 |         mdlParams['cropPositions'] = np.zeros([len(mdlParams['im_paths']),mdlParams['multiCropEval'],2],dtype=np.int64)
258 |         #mdlParams['imSizes'] = np.zeros([len(mdlParams['im_paths']),mdlParams['multiCropEval'],2],dtype=np.int64)
259 |         for u in range(len(mdlParams['im_paths'])):
260 |             height, width = imagesize.get(mdlParams['im_paths'][u])
261 |             if width < mdlParams['input_size'][0]:
262 |                 height = int(mdlParams['input_size'][0]/float(width))*height
263 |                 width = mdlParams['input_size'][0]
264 |             if height < mdlParams['input_size'][0]:
265 |                 width = int(mdlParams['input_size'][0]/float(height))*width
266 |                 height = mdlParams['input_size'][0]            
267 |             ind = 0
268 |             for i in range(np.int32(np.sqrt(mdlParams['multiCropEval']))):
269 |                 for j in range(np.int32(np.sqrt(mdlParams['multiCropEval']))):
270 |                     mdlParams['cropPositions'][u,ind,0] = mdlParams['input_size'][0]/2+i*((width-mdlParams['input_size'][1])/(np.sqrt(mdlParams['multiCropEval'])-1))
271 |                     mdlParams['cropPositions'][u,ind,1] = mdlParams['input_size'][1]/2+j*((height-mdlParams['input_size'][0])/(np.sqrt(mdlParams['multiCropEval'])-1))
272 |                     #mdlParams['imSizes'][u,ind,0] = curr_im_size[0]
273 | 
274 |                     ind += 1
275 |         # Sanity checks
276 |         #print("Positions",mdlParams['cropPositions'])
277 |         # Test image sizes
278 |         height = mdlParams['input_size'][0]
279 |         width = mdlParams['input_size'][1]
280 |         for u in range(len(mdlParams['im_paths'])):
281 |             height_test, width_test = imagesize.get(mdlParams['im_paths'][u])
282 |             if width_test < mdlParams['input_size'][0]:
283 |                 height_test = int(mdlParams['input_size'][0]/float(width_test))*height_test
284 |                 width_test = mdlParams['input_size'][0]
285 |             if height_test < mdlParams['input_size'][0]:
286 |                 width_test = int(mdlParams['input_size'][0]/float(height_test))*width_test
287 |                 height_test = mdlParams['input_size'][0]                
288 |             test_im = np.zeros([width_test,height_test]) 
289 |             for i in range(mdlParams['multiCropEval']):
290 |                 im_crop = test_im[np.int32(mdlParams['cropPositions'][u,i,0]-height/2):np.int32(mdlParams['cropPositions'][u,i,0]-height/2)+height,np.int32(mdlParams['cropPositions'][u,i,1]-width/2):np.int32(mdlParams['cropPositions'][u,i,1]-width/2)+width]
291 |                 if im_crop.shape[0] != mdlParams['input_size'][0]:
292 |                     print("Wrong shape",im_crop.shape[0],mdlParams['im_paths'][u])    
293 |                 if im_crop.shape[1] != mdlParams['input_size'][1]:
294 |                     print("Wrong shape",im_crop.shape[1],mdlParams['im_paths'][u])       
295 |     return mdlParams


--------------------------------------------------------------------------------
/ensemble.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pickle
  3 | import re
  4 | import os
  5 | import sys
  6 | import itertools
  7 | from glob import glob
  8 | from sklearn.metrics import confusion_matrix, f1_score, auc, roc_curve
  9 | from sklearn.ensemble import RandomForestClassifier
 10 | from sklearn.svm import SVC
 11 | from joblib import Parallel, delayed
 12 | import multiprocessing
 13 | import copy
 14 | 
 15 | 
 16 | # Just assume fixed CV size for ensemble with evaluation
 17 | cvSize = 5
 18 | numClasses = 8
 19 | 
 20 | # First argument is folder, filled with CV results files
 21 | all_preds_path = sys.argv[1]
 22 | 
 23 | # Second argument indicates, whether we are only generating predictions or actually evaluating performance on something
 24 | if 'eval' in sys.argv[2]:
 25 |     evaluate = True
 26 |     # Determin if vote or average is used
 27 |     if 'vote' in sys.argv[2]:
 28 |         evaluate_method = 'vote'
 29 |     else:
 30 |         evaluate_method = 'average'
 31 |     # Determine if exhaustive combination search or ordered search is used
 32 |     if 'exhaust' in sys.argv[2]:
 33 |         exhaustive_search = True
 34 |         num_top_models = [int(s) for s in re.findall(r'\d+',sys.argv[2])][-1]
 35 |     else:
 36 |         exhaustive_search = False
 37 |     # Third argument indicates where subset should be saved
 38 |     if 'subSet' in sys.argv[3]:
 39 |         subSetPath = sys.argv[3]
 40 |     else:
 41 |         subSetPath = None
 42 | else:
 43 |     evaluate = False
 44 |     acceptedList = []
 45 |     if 'last' in sys.argv[2]:
 46 |         acceptedList.append('last')
 47 |     if 'best' in sys.argv[2]:
 48 |         acceptedList.append('best')
 49 |     if 'meta' in sys.argv[2]:
 50 |         acceptedList.append('meta')                
 51 |     # Third argument indicates whether some subset should be used
 52 |     if 'subSet' in sys.argv[3]:
 53 |         # Load subset file
 54 |         with open(sys.argv[3],'rb') as f:
 55 |             subSetDict = pickle.load(f)       
 56 |         subSet = subSetDict['subSet']
 57 |     else:
 58 |         subSet = None    
 59 | 
 60 | # Fourth argument indicates csv path to save final results into
 61 | if len(sys.argv) > 4 and 'csvFile' in sys.argv[4]:
 62 |     csvPath = sys.argv[4]
 63 |     origFilePath = sys.argv[5]
 64 | else:
 65 |     csvPath = None
 66 | 
 67 | # Function to get some metrics back
 68 | def get_metrics(predictions,targets):
 69 |     # Calculate metrics
 70 |     # Accuarcy
 71 |     acc = np.mean(np.equal(np.argmax(predictions,1),np.argmax(targets,1)))
 72 |     # Confusion matrix
 73 |     conf = confusion_matrix(np.argmax(targets,1),np.argmax(predictions,1))     
 74 |     # Class weighted accuracy
 75 |     wacc = conf.diagonal()/conf.sum(axis=1)  
 76 |     # Auc
 77 |     fpr = {}
 78 |     tpr = {}
 79 |     roc_auc = np.zeros([numClasses])
 80 |     for i in range(numClasses):
 81 |         fpr[i], tpr[i], _ = roc_curve(targets[:, i], predictions[:, i])
 82 |         roc_auc[i] = auc(fpr[i], tpr[i])       
 83 |     # F1 Score
 84 |     f1 = f1_score(np.argmax(predictions,1),np.argmax(targets,1),average='weighted')        
 85 |     # Print
 86 |     print("Accuracy:",acc)
 87 |     print("F1-Score:",f1)
 88 |     print("WACC:",wacc)
 89 |     print("Mean WACC:",np.mean(wacc))
 90 |     print("AUC:",roc_auc)
 91 |     print("Mean Auc:",np.mean(roc_auc))        
 92 |     return acc, f1, wacc, roc_auc
 93 | 
 94 | # If its actual evaluation, evaluate each CV indipendently, show results both for each CV set and all of them together
 95 | if evaluate:
 96 |     # Go through all files
 97 |     files = sorted(glob(all_preds_path+'/*'))
 98 |     # Because of unkown prediction size, dont use matrix
 99 |     final_preds = {}
100 |     final_targets = {}
101 |     all_waccs = []
102 |     accum_preds = {}
103 |     # Define each pred size in loop
104 |     firstLoaded = False
105 |     for j in range(len(files)):
106 |         # Skip if it is a directory
107 |         if os.path.isdir(files[j]):
108 |             continue
109 |         # Skip if not a pkl file
110 |         if '.pkl' not in files[j]:
111 |             print("Remove non-pkl files")
112 |             break
113 |         # Load file
114 |         with open(files[j],'rb') as f:
115 |             allDataCurr = pickle.load(f)    
116 |         # Get predictions
117 |         if not firstLoaded:
118 |             # Define accumulated prediction size
119 |             for i in range(cvSize):
120 |                 accum_preds[i] = np.zeros([len(files),len(allDataCurr['bestPred'][i]),numClasses])
121 |             firstLoaded = True
122 |         # Write preds into array
123 |         #print(files[j],allDataCurr['bestPred'][0].shape)
124 |         wacc_avg = 0
125 |         for i in range(cvSize):
126 |             accum_preds[i][j,:,:] = allDataCurr['bestPred'][i]
127 |             final_targets[i] = allDataCurr['targets'][i]
128 |             # Confusion matrix
129 |             conf = confusion_matrix(np.argmax(allDataCurr['targets'][i],1),np.argmax(allDataCurr['bestPred'][i],1))     
130 |             # Class weighted accuracy
131 |             wacc_avg += np.mean(conf.diagonal()/conf.sum(axis=1))  
132 |         wacc_avg = wacc_avg/cvSize    
133 |         all_waccs.append(wacc_avg)         
134 |         # Print performance of model + name
135 |         print("Model:",files[j],"WACC:",wacc_avg)
136 |     # Print results per cv
137 |     # Averaging predictions
138 |     f1_avg = 0
139 |     acc_avg = 0
140 |     auc_avg = np.zeros([numClasses])
141 |     wacc_avg = np.zeros([numClasses])
142 |     # Voting with predictions
143 |     f1_vote = 0
144 |     acc_vote = 0
145 |     auc_vote = np.zeros([numClasses])
146 |     wacc_vote = np.zeros([numClasses])
147 |     # Linear SVM on predictions
148 |     f1_linsvm = 0
149 |     acc_linsvm = 0
150 |     auc_linsvm = np.zeros([numClasses])
151 |     wacc_linsvm = np.zeros([numClasses])
152 |     # RF on predictions
153 |     f1_rf = 0
154 |     acc_rf = 0
155 |     auf_rf = np.zeros([numClasses])
156 |     wacc_rf = np.zeros([numClasses])
157 |     # Helper function to determine top combination
158 |     def evalEnsemble(currComb,eval_auc=False):
159 |         currWacc = np.zeros([cvSize])
160 |         currAUC = np.zeros([cvSize])
161 |         for i in range(cvSize):
162 |             if evaluate_method == 'vote':
163 |                 pred_argmax = np.argmax(accum_preds[i][currComb,:,:],2)   
164 |                 pred_eval = np.zeros([pred_argmax.shape[1],numClasses]) 
165 |                 for j in range(pred_eval.shape[0]):
166 |                     pred_eval[j,:] = np.bincount(pred_argmax[:,j],minlength=numClasses)  
167 |             else:
168 |                 pred_eval = np.mean(accum_preds[i][currComb,:,:],0)
169 |             # Confusion matrix
170 |             conf = confusion_matrix(np.argmax(final_targets[i],1),np.argmax(pred_eval,1))     
171 |             # Class weighted accuracy
172 |             currWacc[i] = np.mean(conf.diagonal()/conf.sum(axis=1))   
173 |             if eval_auc:
174 |                 currAUC_ = np.zeros([numClasses])
175 |                 for j in range(numClasses):
176 |                     fpr, tpr, _ = roc_curve(final_targets[i][:,j], pred_eval[:, j])
177 |                     currAUC_[j] = auc(fpr, tpr)
178 |                 currAUC[i] = np.mean(currAUC_)                
179 |         if eval_auc:
180 |             currAUCstd = np.std(currAUC)
181 |             currAUC = np.mean(currAUC)
182 |         else:
183 |             currAUCstd = currAUC
184 |         currWaccStd = np.std(currWacc)
185 |         currWacc = np.mean(currWacc)
186 |         if eval_auc:
187 |             return currWacc, currWaccStd, currAUC, currAUCstd       
188 |         else:
189 |             return currWacc
190 |     if exhaustive_search:
191 |         # First: determine best subset based on average CV wacc
192 |         # Select best subset based on wacc metric
193 |         # Only take top N models
194 |         top_inds = np.argsort(-np.array(all_waccs))
195 |         elements = top_inds[:num_top_models]
196 |         allCombs = []
197 |         for L in range(0, len(elements)+1):
198 |             for subset in itertools.combinations(elements, L):
199 |                 allCombs.append(subset)
200 |                 #print(subset)
201 |         print("Number of combinations",len(allCombs))
202 |         print("Models considered")
203 |         for i in range(len(elements)):
204 |             print("ID",elements[i],files[elements[i]]) 
205 |         #allWaccs = np.zeros([len(allCombs)])
206 |         num_cores = multiprocessing.cpu_count()
207 |         print("Cores available",num_cores)
208 |         allWaccs = Parallel(n_jobs=num_cores)(delayed(evalEnsemble)(comb) for comb in allCombs)
209 |         # Sort by highest value
210 |         allWaccsSrt = -np.sort(-np.array(allWaccs))
211 |         srtInds = np.argsort(-np.array(allWaccs))
212 |         allCombsSrt = np.array(allCombs)[srtInds]
213 |         for i in range(5):
214 |             print("Top",i+1)
215 |             print("Best WACC",allWaccsSrt[i])       
216 |             wacc, wacc_std, auc_val, auc_val_std = evalEnsemble(allCombsSrt[i],eval_auc=True)
217 |             print("Metrics WACC %.4f +- %.4f AUC %.4f +- %.4f"%(wacc,wacc_std,auc_val,auc_val_std))     
218 |             print("Best Combination:",allCombsSrt[i])
219 |             print("Corresponding File Names")
220 |             subSetDict = {}
221 |             subSetDict['subSet'] = []
222 |             for j in allCombsSrt[i]:
223 |                 print("ID",j,files[j])  
224 |                 # Add filename without last part, indicating the type "best/last/meta/full"
225 |                 if i == 0:                
226 |                     subSetDict['subSet'].append(files[j])    
227 |             print("---------------------------------------------")                 
228 |         bestComb = allCombsSrt[0]     
229 |     else:
230 |         # Only take top N models
231 |         top_inds = np.argsort(-np.array(all_waccs))
232 |         # Go through all top N combs
233 |         allWaccs = np.zeros([len(top_inds)])
234 |         allCombs = []
235 |         for i in range(len(top_inds)):
236 |             allCombs.append([])
237 |             if i==0:
238 |                 allCombs[i].append(top_inds[0])
239 |             else:
240 |                 allCombs[i] = copy.deepcopy(allCombs[i-1])
241 |                 allCombs[i].append(top_inds[i])
242 |             # Test comb
243 |             allWaccs[i] = evalEnsemble(allCombs[i])
244 |         # Sort by highest value
245 |         allWaccsSrt = -np.sort(-np.array(allWaccs))
246 |         srtInds = np.argsort(-np.array(allWaccs))
247 |         allCombsSrt = np.array(allCombs)[srtInds]
248 |         for i in range(len(top_inds)):
249 |             print("Top",i+1)
250 |             print("WACC",allWaccsSrt[i])  
251 |             wacc, wacc_std, auc_val, auc_val_std = evalEnsemble(allCombsSrt[i],eval_auc=True)
252 |             print("Metrics WACC %.4f +- %.4f AUC %.4f +- %.4f"%(wacc,wacc_std,auc_val,auc_val_std))           
253 |             print("Combination:",allCombsSrt[i])
254 |             if i == 0:
255 |                 subSetDict = {}
256 |                 subSetDict['subSet'] = []
257 |                 for j in allCombsSrt[i]:
258 |                     print("ID",j,files[j])  
259 |                     # Add filename without last part, indicating the type "best/last/meta/full"
260 |                     subSetDict['subSet'].append(files[j])
261 |             print("---------------------------------------------") 
262 |         p#rint("Corresponding File Names")  
263 |         #for j in allCombs[-1]:
264 |         #    print("ID",j,files[j])                          
265 |         bestComb = allCombsSrt[0]    
266 |     # Save subset for later
267 |     if subSetPath is not None:
268 |         with open(subSetPath, 'wb') as f:
269 |             pickle.dump(subSetDict, f, pickle.HIGHEST_PROTOCOL)                    
270 | 
271 | else:
272 |     # Only generate predictions. All models predict on the same set -> cv models are equal to full models here    
273 |     # Go through all files
274 |     files = sorted(glob(all_preds_path+'/*'))
275 |     # Because of unkown prediction size, only determin it in the loop
276 |     firstLoaded = False
277 |     ind = 0
278 |     for j in range(len(files)):
279 |         # Skip if not a pkl file
280 |         if '.pkl' not in files[j]:
281 |             continue
282 |         # Potentially check, if this file is among the selected subset
283 |         if subSet is not None:
284 |             # Search
285 |             found = False
286 |             for name in subSet:
287 |                 _, name_only = name.split('ISIC')
288 |                 if name_only in files[j]:
289 |                     found = True
290 |                     break
291 |             if not found:
292 |                 # Check extra for acceptedList inclusion
293 |                 for name in subSet:
294 |                     _, name_only = name.split('ISIC')
295 |                     if name_only[:-13] in files[j]:
296 |                         found = True
297 |                         break
298 |                 if not found:
299 |                     continue
300 |                 # Then check, whether this type of "best,last,meta,full" is desired
301 |                 found = False
302 |                 for name in acceptedList:
303 |                     if name in files[j]:
304 |                         found = True
305 |                         break
306 |                 if not found:
307 |                     continue            
308 |         # Load file
309 |         with open(files[j],'rb') as f:
310 |             allDataCurr = pickle.load(f)    
311 |         # Get predictions
312 |         if not firstLoaded:
313 |             # Define final prediction/targets size, assume fixed CV size
314 |             final_preds = np.zeros([len(allDataCurr['extPred'][0]),numClasses])
315 |             # Define accumulated prediction size
316 |             accum_preds = np.expand_dims(allDataCurr['extPred'][0],0)
317 |             ind += 1
318 |             if len(allDataCurr['extPred']) > 1:
319 |                 for i in range(1,len(allDataCurr['extPred'])):
320 |                     accum_preds = np.concatenate((accum_preds,np.expand_dims(allDataCurr['extPred'][i],0)),0)
321 |                     ind += 1
322 |             else:
323 |                 # Just repeat the first model X times
324 |                 for i in range(1,5):
325 |                     accum_preds = np.concatenate((accum_preds,np.expand_dims(allDataCurr['extPred'][0],0)),0)
326 |                     ind += 1                
327 |             firstLoaded = True
328 |         else:
329 |             # Write preds into array
330 |             if len(allDataCurr['extPred']) > 1:
331 |                 for i in range(len(allDataCurr['extPred'])):
332 |                     accum_preds = np.concatenate((accum_preds,np.expand_dims(allDataCurr['extPred'][i],0)),0)
333 |                     ind += 1
334 |             else:
335 |                 # Just repeat the first model X times
336 |                 for i in range(0,5):
337 |                     accum_preds = np.concatenate((accum_preds,np.expand_dims(allDataCurr['extPred'][0],0)),0)
338 |                     ind += 1                       
339 |         print(files[j])
340 |     # Resize array to actually used size
341 |     print(accum_preds.shape)
342 |     final_preds = accum_preds[:ind,:,:]
343 |     print(final_preds.shape)
344 |     # Average for final predictions
345 |     final_preds = np.mean(final_preds,0)
346 |     class_pred = np.argmax(final_preds,1)
347 |     print(np.mean(final_preds,0))
348 |     # Write into csv file, according to ordered list
349 |     if csvPath is not None:
350 |         # Get order file names from original folder
351 |         files = sorted(glob(origFilePath+'/*'))
352 |         # save into formatted csv file
353 |         with open(csvPath, 'w') as csv_file:
354 |             # First line
355 |             csv_file.write("image,MEL,NV,BCC,AK,BKL,DF,VASC,SCC,UNK\n")
356 |             ind = 0
357 |             for file_name in files:
358 |                 if 'ISIC_' not in file_name:
359 |                     continue
360 |                 splits = file_name.split('\\')
361 |                 name = splits[-1]
362 |                 name, _ = name.split('.')
363 |                 csv_file.write(name + "," + str(final_preds[ind,0]) + "," +  str(final_preds[ind,1]) + "," + str(final_preds[ind,2]) + "," + str(final_preds[ind,3]) + "," + str(final_preds[ind,4]) + "," + str(final_preds[ind,5]) + "," + str(final_preds[ind,6]) + "," + str(final_preds[ind,7]) + "," + str(final_preds[ind,8]) + "\n")
364 |                 ind += 1
365 | 
366 | 
367 | 


--------------------------------------------------------------------------------
/cfgs/2019/test_effb0_ss_meta.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import h5py
  4 | import re
  5 | import csv
  6 | import numpy as np
  7 | from glob import glob
  8 | import scipy
  9 | import pickle
 10 | import imagesize
 11 | 
 12 | def init(mdlParams_):
 13 |     mdlParams = {}
 14 |     # Save summaries and model here
 15 |     mdlParams['saveDir'] = mdlParams_['pathBase']+'/data/isic/'
 16 |     # Data is loaded from here
 17 |     mdlParams['dataDir'] = mdlParams_['pathBase']+'/data/isic/2019'
 18 | 
 19 |     ### Model Selection ###
 20 |     mdlParams['model_type'] = 'efficientnet-b0'
 21 |     mdlParams['dataset_names'] = ['official']#,'sevenpoint_rez3_ll']
 22 |     mdlParams['file_ending'] = '.png'
 23 |     mdlParams['exclude_inds'] = False
 24 |     mdlParams['same_sized_crops'] = True
 25 |     mdlParams['multiCropEval'] = 9
 26 |     mdlParams['var_im_size'] = True
 27 |     mdlParams['orderedCrop'] = True
 28 |     mdlParams['voting_scheme'] = 'average'    
 29 |     mdlParams['classification'] = True
 30 |     mdlParams['balance_classes'] = 9
 31 |     mdlParams['extra_fac'] = 1.0
 32 |     mdlParams['numClasses'] = 9
 33 |     mdlParams['no_c9_eval'] = True
 34 |     mdlParams['numOut'] = mdlParams['numClasses']
 35 |     mdlParams['numCV'] = 5
 36 |     mdlParams['trans_norm_first'] = True
 37 |     # Scale up for b1-b7
 38 |     mdlParams['input_size'] = [224,224,3]    
 39 | 
 40 |     ### Training Parameters ###
 41 |     # Batch size
 42 |     mdlParams['batchSize'] = 20#*len(mdlParams['numGPUs'])
 43 |     # Initial learning rate
 44 |     mdlParams['learning_rate'] = 0.000015#*len(mdlParams['numGPUs'])
 45 |     # Lower learning rate after no improvement over 100 epochs
 46 |     mdlParams['lowerLRAfter'] = 25
 47 |     # If there is no validation set, start lowering the LR after X steps
 48 |     mdlParams['lowerLRat'] = 50
 49 |     # Divide learning rate by this value
 50 |     mdlParams['LRstep'] = 5
 51 |     # Maximum number of training iterations
 52 |     mdlParams['training_steps'] = 60 #250
 53 |     # Display error every X steps
 54 |     mdlParams['display_step'] = 10
 55 |     # Scale?
 56 |     mdlParams['scale_targets'] = False
 57 |     # Peak at test error during training? (generally, dont do this!)
 58 |     mdlParams['peak_at_testerr'] = False
 59 |     # Print trainerr
 60 |     mdlParams['print_trainerr'] = False
 61 |     # Subtract trainset mean?
 62 |     mdlParams['subtract_set_mean'] = False
 63 |     mdlParams['setMean'] = np.array([0.0, 0.0, 0.0])   
 64 |     mdlParams['setStd'] = np.array([1.0, 1.0, 1.0])   
 65 | 
 66 |     # Data AUG
 67 |     #mdlParams['full_color_distort'] = True
 68 |     mdlParams['autoaugment'] = False
 69 |     mdlParams['flip_lr_ud'] = True
 70 |     mdlParams['full_rot'] = 180
 71 |     mdlParams['scale'] = (0.8,1.2)
 72 |     mdlParams['shear'] = 10
 73 |     mdlParams['cutout'] = 16
 74 | 
 75 |     # Meta settings
 76 |     mdlParams['meta_features'] = ['age_num','sex_oh','loc_oh']
 77 |     mdlParams['meta_feature_sizes'] = [1,8,2]
 78 |     mdlParams['encode_nan'] = False
 79 |     # Pretrained model from task 1
 80 |     mdlParams['model_load_path'] = mdlParams_['pathBase']+'/data/isic/2019.test_effb0_ss'
 81 |     mdlParams['fc_layers_before'] = [256,256]
 82 |     # Factor for scaling up the FC layer
 83 |     scale_up_with_larger_b = 1.0
 84 |     mdlParams['fc_layers_after'] = [int(1024*scale_up_with_larger_b)]
 85 |     mdlParams['freeze_cnn'] = True
 86 |     mdlParams['learning_rate_meta'] = 0.00001
 87 |     # each feature is set to missing with this prob
 88 |     mdlParams['drop_augment'] = 0.1
 89 |     # Normal dropout in fc layers
 90 |     mdlParams['dropout_meta'] = 0.4
 91 |     mdlParams['scale_features'] = True      
 92 | 
 93 |     ### Data ###
 94 |     mdlParams['preload'] = False
 95 |     # Labels first
 96 |     # Targets, as dictionary, indexed by im file name
 97 |     mdlParams['labels_dict'] = {}
 98 |     path1 = mdlParams['dataDir'] + '/labels/'
 99 |      # All sets
100 |     allSets = glob(path1 + '*/')   
101 |     # Go through all sets
102 |     for i in range(len(allSets)):
103 |         # Check if want to include this dataset
104 |         foundSet = False
105 |         for j in range(len(mdlParams['dataset_names'])):
106 |             if mdlParams['dataset_names'][j] in allSets[i]:
107 |                 foundSet = True
108 |         if not foundSet:
109 |             continue                
110 |         # Find csv file
111 |         files = sorted(glob(allSets[i]+'*'))
112 |         for j in range(len(files)):
113 |             if 'csv' in files[j]:
114 |                 break
115 |         # Load csv file
116 |         with open(files[j], newline='') as csvfile:
117 |             labels_str = csv.reader(csvfile, delimiter=',', quotechar='|')
118 |             for row in labels_str:
119 |                 if 'image' == row[0]:
120 |                     continue
121 |                 #if 'ISIC' in row[0] and '_downsampled' in row[0]:
122 |                 #    print(row[0])
123 |                 if row[0] + '_downsampled' in mdlParams['labels_dict']:
124 |                     print("removed",row[0] + '_downsampled')
125 |                     continue
126 |                 if mdlParams['numClasses'] == 7:
127 |                     mdlParams['labels_dict'][row[0]] = np.array([int(float(row[1])),int(float(row[2])),int(float(row[3])),int(float(row[4])),int(float(row[5])),int(float(row[6])),int(float(row[7]))])
128 |                 elif mdlParams['numClasses'] == 8:
129 |                     if len(row) < 9 or row[8] == '':
130 |                         class_8 = 0
131 |                     else:
132 |                         class_8 = int(float(row[8]))
133 |                     mdlParams['labels_dict'][row[0]] = np.array([int(float(row[1])),int(float(row[2])),int(float(row[3])),int(float(row[4])),int(float(row[5])),int(float(row[6])),int(float(row[7])),class_8])
134 |                 elif mdlParams['numClasses'] == 9:
135 |                     if len(row) < 9 or row[8] == '':
136 |                         class_8 = 0
137 |                     else:
138 |                         class_8 = int(float(row[8]))  
139 |                     if len(row) < 10 or row[9] == '':
140 |                         class_9 = 0
141 |                     else:
142 |                         class_9 = int(float(row[9]))                                           
143 |                     mdlParams['labels_dict'][row[0]] = np.array([int(float(row[1])),int(float(row[2])),int(float(row[3])),int(float(row[4])),int(float(row[5])),int(float(row[6])),int(float(row[7])),class_8,class_9])
144 |     # Load meta data
145 |     mdlParams['meta_dict'] = {}
146 |     path1 = mdlParams['dataDir'] + '/meta_data/'
147 |      # All sets
148 |     allSets = glob(path1 + '*/')   
149 |     # Go through all sets
150 |     for i in range(len(allSets)):
151 |         # Check if want to include this dataset
152 |         foundSet = False
153 |         for j in range(len(mdlParams['dataset_names'])):
154 |             if mdlParams['dataset_names'][j] in allSets[i]:
155 |                 foundSet = True
156 |         if not foundSet:
157 |             continue                
158 |         # Find csv file
159 |         files = sorted(glob(allSets[i]+'*'))
160 |         for j in range(len(files)):
161 |             if '.pkl' in files[j]:
162 |                 break    
163 |         # Open and load
164 |         with open(files[j],'rb') as f:
165 |             meta_data = pickle.load(f)
166 |         # Write into dict
167 |         for k in range(len(meta_data['im_name'])):
168 |             feature_vector = []
169 |             if 'age_oh' in mdlParams['meta_features']:
170 |                 if mdlParams['encode_nan']:
171 |                     feature_vector.append(meta_data['age_oh'][k,:])
172 |                 else:
173 |                     feature_vector.append(meta_data['age_oh'][k,1:])
174 |             if 'age_num' in mdlParams['meta_features']:
175 |                 feature_vector.append(np.array([meta_data['age_num'][k]]))                      
176 |             if 'loc_oh' in mdlParams['meta_features']:
177 |                 if mdlParams['encode_nan']:
178 |                     feature_vector.append(meta_data['loc_oh'][k,:])
179 |                 else:
180 |                     feature_vector.append(meta_data['loc_oh'][k,1:])
181 |             if 'sex_oh' in mdlParams['meta_features']:
182 |                 if mdlParams['encode_nan']:
183 |                     feature_vector.append(meta_data['sex_oh'][k,:])
184 |                 else:
185 |                     feature_vector.append(meta_data['sex_oh'][k,1:]) 
186 | 
187 |             #print(feature_vector) 
188 |             feature_vector = np.concatenate(feature_vector,axis=0)
189 |             #print("feature vector shape",feature_vector.shape)                                                
190 |             mdlParams['meta_dict'][meta_data['im_name'][k]] = feature_vector    
191 | 
192 | 
193 |     # Save all im paths here
194 |     mdlParams['im_paths'] = []
195 |     mdlParams['labels_list'] = []
196 |     mdlParams['meta_list'] = [] 
197 |     # Define the sets
198 |     path1 = mdlParams['dataDir'] + '/images/'
199 |     # All sets
200 |     allSets = sorted(glob(path1 + '*/'))
201 |     # Ids which name the folders
202 |     # Make official first dataset
203 |     for i in range(len(allSets)):
204 |         if mdlParams['dataset_names'][0] in allSets[i]:
205 |             temp = allSets[i]
206 |             allSets.remove(allSets[i])
207 |             allSets.insert(0, temp)
208 |     print(allSets)        
209 |     # Set of keys, for marking old HAM10000
210 |     mdlParams['key_list'] = []
211 |     if mdlParams['exclude_inds']:
212 |         with open(mdlParams['saveDir'] + 'indices_exclude.pkl','rb') as f:
213 |             indices_exclude = pickle.load(f)          
214 |         exclude_list = []    
215 |     for i in range(len(allSets)):
216 |         # All files in that set
217 |         files = sorted(glob(allSets[i]+'*'))
218 |         # Check if there is something in there, if not, discard
219 |         if len(files) == 0:
220 |             continue
221 |         # Check if want to include this dataset
222 |         foundSet = False
223 |         for j in range(len(mdlParams['dataset_names'])):
224 |             if mdlParams['dataset_names'][j] in allSets[i]:
225 |                 foundSet = True
226 |         if not foundSet:
227 |             continue                    
228 |         for j in range(len(files)):
229 |             if '.jpg' in files[j] or '.jpeg' in files[j] or '.JPG' in files[j] or '.JPEG' in files[j] or '.png' in files[j] or '.PNG' in files[j]:                
230 |                 # Add according label, find it first
231 |                 found_already = False
232 |                 for key in mdlParams['labels_dict']:
233 |                     if key + mdlParams['file_ending'] in files[j]:
234 |                         if found_already:
235 |                             print("Found already:",key,files[j])                     
236 |                         mdlParams['key_list'].append(key)
237 |                         mdlParams['labels_list'].append(mdlParams['labels_dict'][key])
238 |                         mdlParams['meta_list'].append(mdlParams['meta_dict'][key])
239 |                         found_already = True
240 |                 if found_already:
241 |                     mdlParams['im_paths'].append(files[j])     
242 |                     if mdlParams['exclude_inds']:
243 |                         for key in indices_exclude:
244 |                             if key in files[j]:
245 |                                 exclude_list.append(indices_exclude[key])                                       
246 |     # Convert label list to array
247 |     mdlParams['labels_array'] = np.array(mdlParams['labels_list'])
248 |     print(np.mean(mdlParams['labels_array'],axis=0))      
249 |     # Meta data
250 |     mdlParams['meta_array'] = np.array(mdlParams['meta_list'])
251 |     print("final meta shape",mdlParams['meta_array'].shape)        
252 |     # Create indices list with HAM10000 only
253 |     mdlParams['HAM10000_inds'] = []
254 |     HAM_START = 24306
255 |     HAM_END = 34320
256 |     for j in range(len(mdlParams['key_list'])):
257 |         try:
258 |             curr_id = [int(s) for s in re.findall(r'\d+',mdlParams['key_list'][j])][-1]
259 |         except:
260 |             continue
261 |         if curr_id >= HAM_START and curr_id <= HAM_END:
262 |             mdlParams['HAM10000_inds'].append(j)
263 |     mdlParams['HAM10000_inds'] = np.array(mdlParams['HAM10000_inds'])    
264 |     print("Len ham",len(mdlParams['HAM10000_inds']))   
265 |     # Perhaps preload images
266 |     if mdlParams['preload']:
267 |         mdlParams['images_array'] = np.zeros([len(mdlParams['im_paths']),mdlParams['input_size_load'][0],mdlParams['input_size_load'][1],mdlParams['input_size_load'][2]],dtype=np.uint8)
268 |         for i in range(len(mdlParams['im_paths'])):
269 |             x = scipy.ndimage.imread(mdlParams['im_paths'][i])
270 |             #x = x.astype(np.float32)   
271 |             # Scale to 0-1 
272 |             #min_x = np.min(x)
273 |             #max_x = np.max(x)
274 |             #x = (x-min_x)/(max_x-min_x)
275 |             mdlParams['images_array'][i,:,:,:] = x
276 |             if i%1000 == 0:
277 |                 print(i+1,"images loaded...")     
278 |     if mdlParams['subtract_set_mean']:
279 |         mdlParams['images_means'] = np.zeros([len(mdlParams['im_paths']),3])
280 |         for i in range(len(mdlParams['im_paths'])):
281 |             x = scipy.ndimage.imread(mdlParams['im_paths'][i])
282 |             x = x.astype(np.float32)   
283 |             # Scale to 0-1 
284 |             min_x = np.min(x)
285 |             max_x = np.max(x)
286 |             x = (x-min_x)/(max_x-min_x)
287 |             mdlParams['images_means'][i,:] = np.mean(x,(0,1))
288 |             if i%1000 == 0:
289 |                 print(i+1,"images processed for mean...")         
290 | 
291 |     ### Define Indices ###
292 |     with open(mdlParams['saveDir'] + 'indices_isic2019.pkl','rb') as f:
293 |         indices = pickle.load(f)            
294 |     mdlParams['trainIndCV'] = indices['trainIndCV']
295 |     mdlParams['valIndCV'] = indices['valIndCV']
296 |     if mdlParams['exclude_inds']:
297 |         exclude_list = np.array(exclude_list)
298 |         all_inds = np.arange(len(mdlParams['im_paths']))
299 |         exclude_inds = all_inds[exclude_list.astype(bool)]
300 |         for i in range(len(mdlParams['trainIndCV'])):
301 |             mdlParams['trainIndCV'][i] = np.setdiff1d(mdlParams['trainIndCV'][i],exclude_inds)
302 |         for i in range(len(mdlParams['valIndCV'])):
303 |             mdlParams['valIndCV'][i] = np.setdiff1d(mdlParams['valIndCV'][i],exclude_inds)     
304 |     # Consider case with more than one set
305 |     if len(mdlParams['dataset_names']) > 1:
306 |         restInds = np.array(np.arange(25331,mdlParams['labels_array'].shape[0]))
307 |         for i in range(mdlParams['numCV']):
308 |             mdlParams['trainIndCV'][i] = np.concatenate((mdlParams['trainIndCV'][i],restInds))        
309 |     print("Train")
310 |     for i in range(len(mdlParams['trainIndCV'])):
311 |         print(mdlParams['trainIndCV'][i].shape)
312 |     print("Val")
313 |     for i in range(len(mdlParams['valIndCV'])):
314 |         print(mdlParams['valIndCV'][i].shape)    
315 | 
316 |     # Use this for ordered multi crops
317 |     if mdlParams['orderedCrop']:
318 |         # Crop positions, always choose multiCropEval to be 4, 9, 16, 25, etc.
319 |         mdlParams['cropPositions'] = np.zeros([len(mdlParams['im_paths']),mdlParams['multiCropEval'],2],dtype=np.int64)
320 |         #mdlParams['imSizes'] = np.zeros([len(mdlParams['im_paths']),mdlParams['multiCropEval'],2],dtype=np.int64)
321 |         for u in range(len(mdlParams['im_paths'])):
322 |             height, width = imagesize.get(mdlParams['im_paths'][u])
323 |             if width < mdlParams['input_size'][0]:
324 |                 height = int(mdlParams['input_size'][0]/float(width))*height
325 |                 width = mdlParams['input_size'][0]
326 |             if height < mdlParams['input_size'][0]:
327 |                 width = int(mdlParams['input_size'][0]/float(height))*width
328 |                 height = mdlParams['input_size'][0]            
329 |             ind = 0
330 |             for i in range(np.int32(np.sqrt(mdlParams['multiCropEval']))):
331 |                 for j in range(np.int32(np.sqrt(mdlParams['multiCropEval']))):
332 |                     mdlParams['cropPositions'][u,ind,0] = mdlParams['input_size'][0]/2+i*((width-mdlParams['input_size'][1])/(np.sqrt(mdlParams['multiCropEval'])-1))
333 |                     mdlParams['cropPositions'][u,ind,1] = mdlParams['input_size'][1]/2+j*((height-mdlParams['input_size'][0])/(np.sqrt(mdlParams['multiCropEval'])-1))
334 |                     #mdlParams['imSizes'][u,ind,0] = curr_im_size[0]
335 | 
336 |                     ind += 1
337 |         # Sanity checks
338 |         #print("Positions",mdlParams['cropPositions'])
339 |         # Test image sizes
340 |         height = mdlParams['input_size'][0]
341 |         width = mdlParams['input_size'][1]
342 |         for u in range(len(mdlParams['im_paths'])):
343 |             height_test, width_test = imagesize.get(mdlParams['im_paths'][u])
344 |             if width_test < mdlParams['input_size'][0]:
345 |                 height_test = int(mdlParams['input_size'][0]/float(width_test))*height_test
346 |                 width_test = mdlParams['input_size'][0]
347 |             if height_test < mdlParams['input_size'][0]:
348 |                 width_test = int(mdlParams['input_size'][0]/float(height_test))*width_test
349 |                 height_test = mdlParams['input_size'][0]                
350 |             test_im = np.zeros([width_test,height_test]) 
351 |             for i in range(mdlParams['multiCropEval']):
352 |                 im_crop = test_im[np.int32(mdlParams['cropPositions'][u,i,0]-height/2):np.int32(mdlParams['cropPositions'][u,i,0]-height/2)+height,np.int32(mdlParams['cropPositions'][u,i,1]-width/2):np.int32(mdlParams['cropPositions'][u,i,1]-width/2)+width]
353 |                 if im_crop.shape[0] != mdlParams['input_size'][0]:
354 |                     print("Wrong shape",im_crop.shape[0],mdlParams['im_paths'][u])    
355 |                 if im_crop.shape[1] != mdlParams['input_size'][1]:
356 |                     print("Wrong shape",im_crop.shape[1],mdlParams['im_paths'][u])        
357 |     return mdlParams


--------------------------------------------------------------------------------
/cfgs/2019/test_effb0_rr_meta.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import h5py
  4 | import re
  5 | import csv
  6 | import numpy as np
  7 | from glob import glob
  8 | import scipy
  9 | import pickle
 10 | import imagesize
 11 | 
 12 | def init(mdlParams_):
 13 |     mdlParams = {}
 14 |     # Save summaries and model here
 15 |     mdlParams['saveDir'] = mdlParams_['pathBase']+'/data/isic/'
 16 |     # Data is loaded from here
 17 |     mdlParams['dataDir'] = mdlParams_['pathBase']+'/data/isic/2019'
 18 | 
 19 |     ### Model Selection ###
 20 |     mdlParams['model_type'] = 'efficientnet-b0'
 21 |     mdlParams['dataset_names'] = ['official']#,'sevenpoint_rez3_ll']
 22 |     mdlParams['file_ending'] = '.png'
 23 |     mdlParams['exclude_inds'] = False
 24 |     mdlParams['same_sized_crops'] = False
 25 |     mdlParams['multiCropEval'] = 9
 26 |     mdlParams['var_im_size'] = False
 27 |     mdlParams['orderedCrop'] = False
 28 |     mdlParams['voting_scheme'] = 'average'    
 29 |     mdlParams['classification'] = True
 30 |     mdlParams['balance_classes'] = 9
 31 |     mdlParams['extra_fac'] = 1.0
 32 |     mdlParams['numClasses'] = 9
 33 |     mdlParams['no_c9_eval'] = True
 34 |     mdlParams['numOut'] = mdlParams['numClasses']
 35 |     mdlParams['numCV'] = 5
 36 |     mdlParams['trans_norm_first'] = True
 37 |     # Deterministic cropping
 38 |     mdlParams['deterministic_eval'] = True
 39 |     mdlParams['numCropPositions'] = 1
 40 |     num_scales = 4
 41 |     all_scales = [1.0,0.5,0.75,0.25,0.9,0.6,0.4]
 42 |     mdlParams['cropScales'] = all_scales[:num_scales]
 43 |     mdlParams['cropFlipping'] = 4
 44 |     mdlParams['multiCropEval'] = mdlParams['numCropPositions']*len(mdlParams['cropScales'])*mdlParams['cropFlipping']
 45 |     mdlParams['offset_crop'] = 0.2    
 46 |     # Scale up for b1-b7
 47 |     mdlParams['input_size'] = [224,224,3]     
 48 | 
 49 |     ### Training Parameters ###
 50 |     # Batch size
 51 |     mdlParams['batchSize'] = 20#*len(mdlParams['numGPUs'])
 52 |     # Initial learning rate
 53 |     mdlParams['learning_rate'] = 0.000015#*len(mdlParams['numGPUs'])
 54 |     # Lower learning rate after no improvement over 100 epochs
 55 |     mdlParams['lowerLRAfter'] = 25
 56 |     # If there is no validation set, start lowering the LR after X steps
 57 |     mdlParams['lowerLRat'] = 50
 58 |     # Divide learning rate by this value
 59 |     mdlParams['LRstep'] = 5
 60 |     # Maximum number of training iterations
 61 |     mdlParams['training_steps'] = 60 #250
 62 |     # Display error every X steps
 63 |     mdlParams['display_step'] = 10
 64 |     # Scale?
 65 |     mdlParams['scale_targets'] = False
 66 |     # Peak at test error during training? (generally, dont do this!)
 67 |     mdlParams['peak_at_testerr'] = False
 68 |     # Print trainerr
 69 |     mdlParams['print_trainerr'] = False
 70 |     # Subtract trainset mean?
 71 |     mdlParams['subtract_set_mean'] = False
 72 |     mdlParams['setMean'] = np.array([0.0, 0.0, 0.0])   
 73 |     mdlParams['setStd'] = np.array([1.0, 1.0, 1.0])   
 74 | 
 75 |     # Data AUG
 76 |     #mdlParams['full_color_distort'] = True
 77 |     mdlParams['autoaugment'] = False    
 78 |     mdlParams['flip_lr_ud'] = True
 79 |     mdlParams['full_rot'] = 180
 80 |     mdlParams['scale'] = (0.8,1.2)
 81 |     mdlParams['shear'] = 10
 82 |     mdlParams['cutout'] = 16
 83 | 
 84 |     # Meta settings
 85 |     mdlParams['meta_features'] = ['age_num','sex_oh','loc_oh']
 86 |     mdlParams['meta_feature_sizes'] = [1,8,2]
 87 |     mdlParams['encode_nan'] = False
 88 |     mdlParams['model_load_path'] = '/home/Gessert/data/isic/2019.test_effb0_rr'
 89 |     mdlParams['fc_layers_before'] = [256,256]
 90 |     mdlParams['fc_layers_after'] = [1024]
 91 |     mdlParams['freeze_cnn'] = True
 92 |     mdlParams['learning_rate_meta'] = 0.00001
 93 |     # each feature is set to missing with this prob
 94 |     mdlParams['drop_augment'] = 0.1
 95 |     mdlParams['dropout_meta'] = 0.4
 96 |     mdlParams['scale_features'] = True
 97 | 
 98 |     ### Data ###
 99 |     mdlParams['preload'] = False
100 |     # Labels first
101 |     # Targets, as dictionary, indexed by im file name
102 |     mdlParams['labels_dict'] = {}
103 |     path1 = mdlParams['dataDir'] + '/labels/'
104 |      # All sets
105 |     allSets = glob(path1 + '*/')   
106 |     # Go through all sets
107 |     for i in range(len(allSets)):
108 |         # Check if want to include this dataset
109 |         foundSet = False
110 |         for j in range(len(mdlParams['dataset_names'])):
111 |             if mdlParams['dataset_names'][j] in allSets[i]:
112 |                 foundSet = True
113 |         if not foundSet:
114 |             continue                
115 |         # Find csv file
116 |         files = sorted(glob(allSets[i]+'*'))
117 |         for j in range(len(files)):
118 |             if 'csv' in files[j]:
119 |                 break
120 |         # Load csv file
121 |         with open(files[j], newline='') as csvfile:
122 |             labels_str = csv.reader(csvfile, delimiter=',', quotechar='|')
123 |             for row in labels_str:
124 |                 if 'image' == row[0]:
125 |                     continue
126 |                 #if 'ISIC' in row[0] and '_downsampled' in row[0]:
127 |                 #    print(row[0])
128 |                 if row[0] + '_downsampled' in mdlParams['labels_dict']:
129 |                     print("removed",row[0] + '_downsampled')
130 |                     continue
131 |                 if mdlParams['numClasses'] == 7:
132 |                     mdlParams['labels_dict'][row[0]] = np.array([int(float(row[1])),int(float(row[2])),int(float(row[3])),int(float(row[4])),int(float(row[5])),int(float(row[6])),int(float(row[7]))])
133 |                 elif mdlParams['numClasses'] == 8:
134 |                     if len(row) < 9 or row[8] == '':
135 |                         class_8 = 0
136 |                     else:
137 |                         class_8 = int(float(row[8]))
138 |                     mdlParams['labels_dict'][row[0]] = np.array([int(float(row[1])),int(float(row[2])),int(float(row[3])),int(float(row[4])),int(float(row[5])),int(float(row[6])),int(float(row[7])),class_8])
139 |                 elif mdlParams['numClasses'] == 9:
140 |                     if len(row) < 9 or row[8] == '':
141 |                         class_8 = 0
142 |                     else:
143 |                         class_8 = int(float(row[8]))  
144 |                     if len(row) < 10 or row[9] == '':
145 |                         class_9 = 0
146 |                     else:
147 |                         class_9 = int(float(row[9]))                                           
148 |                     mdlParams['labels_dict'][row[0]] = np.array([int(float(row[1])),int(float(row[2])),int(float(row[3])),int(float(row[4])),int(float(row[5])),int(float(row[6])),int(float(row[7])),class_8,class_9])
149 | 
150 |     # Load meta data
151 |     mdlParams['meta_dict'] = {}
152 |     path1 = mdlParams['dataDir'] + '/meta_data/'
153 |      # All sets
154 |     allSets = glob(path1 + '*/')   
155 |     # Go through all sets
156 |     for i in range(len(allSets)):
157 |         # Check if want to include this dataset
158 |         foundSet = False
159 |         for j in range(len(mdlParams['dataset_names'])):
160 |             if mdlParams['dataset_names'][j] in allSets[i]:
161 |                 foundSet = True
162 |         if not foundSet:
163 |             continue                
164 |         # Find csv file
165 |         files = sorted(glob(allSets[i]+'*'))
166 |         for j in range(len(files)):
167 |             if '.pkl' in files[j]:
168 |                 break    
169 |         # Open and load
170 |         with open(files[j],'rb') as f:
171 |             meta_data = pickle.load(f)
172 |         # Write into dict
173 |         for k in range(len(meta_data['im_name'])):
174 |             feature_vector = []
175 |             if 'age_oh' in mdlParams['meta_features']:
176 |                 if mdlParams['encode_nan']:
177 |                     feature_vector.append(meta_data['age_oh'][k,:])
178 |                 else:
179 |                     feature_vector.append(meta_data['age_oh'][k,1:])
180 |             if 'age_num' in mdlParams['meta_features']:
181 |                 feature_vector.append(np.array([meta_data['age_num'][k]]))                      
182 |             if 'loc_oh' in mdlParams['meta_features']:
183 |                 if mdlParams['encode_nan']:
184 |                     feature_vector.append(meta_data['loc_oh'][k,:])
185 |                 else:
186 |                     feature_vector.append(meta_data['loc_oh'][k,1:])
187 |             if 'sex_oh' in mdlParams['meta_features']:
188 |                 if mdlParams['encode_nan']:
189 |                     feature_vector.append(meta_data['sex_oh'][k,:])
190 |                 else:
191 |                     feature_vector.append(meta_data['sex_oh'][k,1:]) 
192 | 
193 |             #print(feature_vector) 
194 |             feature_vector = np.concatenate(feature_vector,axis=0)
195 |             #print("feature vector shape",feature_vector.shape)                                                
196 |             mdlParams['meta_dict'][meta_data['im_name'][k]] = feature_vector   
197 | 
198 |     # Save all im paths here
199 |     mdlParams['im_paths'] = []
200 |     mdlParams['labels_list'] = []
201 |     mdlParams['meta_list'] = []    
202 |     # Define the sets
203 |     path1 = mdlParams['dataDir'] + '/images/'
204 |     # All sets
205 |     allSets = sorted(glob(path1 + '*/'))
206 |     # Ids which name the folders
207 |     # Make official first dataset
208 |     for i in range(len(allSets)):
209 |         if mdlParams['dataset_names'][0] in allSets[i]:
210 |             temp = allSets[i]
211 |             allSets.remove(allSets[i])
212 |             allSets.insert(0, temp)
213 |     print(allSets)        
214 |     # Set of keys, for marking old HAM10000
215 |     mdlParams['key_list'] = []
216 |     if mdlParams['exclude_inds']:
217 |         with open(mdlParams['saveDir'] + 'indices_exclude.pkl','rb') as f:
218 |             indices_exclude = pickle.load(f)          
219 |         exclude_list = []    
220 |     for i in range(len(allSets)):
221 |         # All files in that set
222 |         files = sorted(glob(allSets[i]+'*'))
223 |         # Check if there is something in there, if not, discard
224 |         if len(files) == 0:
225 |             continue
226 |         # Check if want to include this dataset
227 |         foundSet = False
228 |         for j in range(len(mdlParams['dataset_names'])):
229 |             if mdlParams['dataset_names'][j] in allSets[i]:
230 |                 foundSet = True
231 |         if not foundSet:
232 |             continue                    
233 |         for j in range(len(files)):
234 |             if '.jpg' in files[j] or '.jpeg' in files[j] or '.JPG' in files[j] or '.JPEG' in files[j] or '.png' in files[j] or '.PNG' in files[j]:                
235 |                 # Add according label, find it first
236 |                 found_already = False
237 |                 for key in mdlParams['labels_dict']:
238 |                     if key + mdlParams['file_ending'] in files[j]:
239 |                         if found_already:
240 |                             print("Found already:",key,files[j])                     
241 |                         mdlParams['key_list'].append(key)
242 |                         mdlParams['labels_list'].append(mdlParams['labels_dict'][key])
243 |                         mdlParams['meta_list'].append(mdlParams['meta_dict'][key])
244 |                         found_already = True
245 |                 if found_already:
246 |                     mdlParams['im_paths'].append(files[j])     
247 |                     if mdlParams['exclude_inds']:
248 |                         for key in indices_exclude:
249 |                             if key in files[j]:
250 |                                 exclude_list.append(indices_exclude[key])                                       
251 |     # Convert label list to array
252 |     mdlParams['labels_array'] = np.array(mdlParams['labels_list'])
253 |     print(np.mean(mdlParams['labels_array'],axis=0))   
254 |     # Meta data
255 |     mdlParams['meta_array'] = np.array(mdlParams['meta_list'])
256 |     print("final meta shape",mdlParams['meta_array'].shape)               
257 |     # Create indices list with HAM10000 only
258 |     mdlParams['HAM10000_inds'] = []
259 |     HAM_START = 24306
260 |     HAM_END = 34320
261 |     for j in range(len(mdlParams['key_list'])):
262 |         try:
263 |             curr_id = [int(s) for s in re.findall(r'\d+',mdlParams['key_list'][j])][-1]
264 |         except:
265 |             continue
266 |         if curr_id >= HAM_START and curr_id <= HAM_END:
267 |             mdlParams['HAM10000_inds'].append(j)
268 |     mdlParams['HAM10000_inds'] = np.array(mdlParams['HAM10000_inds'])    
269 |     print("Len ham",len(mdlParams['HAM10000_inds']))   
270 |     # Perhaps preload images
271 |     if mdlParams['preload']:
272 |         mdlParams['images_array'] = np.zeros([len(mdlParams['im_paths']),mdlParams['input_size_load'][0],mdlParams['input_size_load'][1],mdlParams['input_size_load'][2]],dtype=np.uint8)
273 |         for i in range(len(mdlParams['im_paths'])):
274 |             x = scipy.ndimage.imread(mdlParams['im_paths'][i])
275 |             #x = x.astype(np.float32)   
276 |             # Scale to 0-1 
277 |             #min_x = np.min(x)
278 |             #max_x = np.max(x)
279 |             #x = (x-min_x)/(max_x-min_x)
280 |             mdlParams['images_array'][i,:,:,:] = x
281 |             if i%1000 == 0:
282 |                 print(i+1,"images loaded...")     
283 |     if mdlParams['subtract_set_mean']:
284 |         mdlParams['images_means'] = np.zeros([len(mdlParams['im_paths']),3])
285 |         for i in range(len(mdlParams['im_paths'])):
286 |             x = scipy.ndimage.imread(mdlParams['im_paths'][i])
287 |             x = x.astype(np.float32)   
288 |             # Scale to 0-1 
289 |             min_x = np.min(x)
290 |             max_x = np.max(x)
291 |             x = (x-min_x)/(max_x-min_x)
292 |             mdlParams['images_means'][i,:] = np.mean(x,(0,1))
293 |             if i%1000 == 0:
294 |                 print(i+1,"images processed for mean...")         
295 | 
296 |     ### Define Indices ###
297 |     with open(mdlParams['saveDir'] + 'indices_isic2019.pkl','rb') as f:
298 |         indices = pickle.load(f)             
299 |     mdlParams['trainIndCV'] = indices['trainIndCV']
300 |     mdlParams['valIndCV'] = indices['valIndCV']
301 |     if mdlParams['exclude_inds']:
302 |         exclude_list = np.array(exclude_list)
303 |         all_inds = np.arange(len(mdlParams['im_paths']))
304 |         exclude_inds = all_inds[exclude_list.astype(bool)]
305 |         for i in range(len(mdlParams['trainIndCV'])):
306 |             mdlParams['trainIndCV'][i] = np.setdiff1d(mdlParams['trainIndCV'][i],exclude_inds)
307 |         for i in range(len(mdlParams['valIndCV'])):
308 |             mdlParams['valIndCV'][i] = np.setdiff1d(mdlParams['valIndCV'][i],exclude_inds)     
309 |     # Consider case with more than one set
310 |     if len(mdlParams['dataset_names']) > 1:
311 |         restInds = np.array(np.arange(25331,mdlParams['labels_array'].shape[0]))
312 |         for i in range(mdlParams['numCV']):
313 |             mdlParams['trainIndCV'][i] = np.concatenate((mdlParams['trainIndCV'][i],restInds))        
314 |     print("Train")
315 |     for i in range(len(mdlParams['trainIndCV'])):
316 |         print(mdlParams['trainIndCV'][i].shape)
317 |     print("Val")
318 |     for i in range(len(mdlParams['valIndCV'])):
319 |         print(mdlParams['valIndCV'][i].shape)    
320 | 
321 |     # Use this for ordered multi crops
322 |     if mdlParams['orderedCrop']:
323 |         # Crop positions, always choose multiCropEval to be 4, 9, 16, 25, etc.
324 |         mdlParams['cropPositions'] = np.zeros([len(mdlParams['im_paths']),mdlParams['multiCropEval'],2],dtype=np.int64)
325 |         #mdlParams['imSizes'] = np.zeros([len(mdlParams['im_paths']),mdlParams['multiCropEval'],2],dtype=np.int64)
326 |         for u in range(len(mdlParams['im_paths'])):
327 |             height, width = imagesize.get(mdlParams['im_paths'][u])
328 |             if width < mdlParams['input_size'][0]:
329 |                 height = int(mdlParams['input_size'][0]/float(width))*height
330 |                 width = mdlParams['input_size'][0]
331 |             if height < mdlParams['input_size'][0]:
332 |                 width = int(mdlParams['input_size'][0]/float(height))*width
333 |                 height = mdlParams['input_size'][0]            
334 |             ind = 0
335 |             for i in range(np.int32(np.sqrt(mdlParams['multiCropEval']))):
336 |                 for j in range(np.int32(np.sqrt(mdlParams['multiCropEval']))):
337 |                     mdlParams['cropPositions'][u,ind,0] = mdlParams['input_size'][0]/2+i*((width-mdlParams['input_size'][1])/(np.sqrt(mdlParams['multiCropEval'])-1))
338 |                     mdlParams['cropPositions'][u,ind,1] = mdlParams['input_size'][1]/2+j*((height-mdlParams['input_size'][0])/(np.sqrt(mdlParams['multiCropEval'])-1))
339 |                     #mdlParams['imSizes'][u,ind,0] = curr_im_size[0]
340 | 
341 |                     ind += 1
342 |         # Sanity checks
343 |         #print("Positions",mdlParams['cropPositions'])
344 |         # Test image sizes
345 |         height = mdlParams['input_size'][0]
346 |         width = mdlParams['input_size'][1]
347 |         for u in range(len(mdlParams['im_paths'])):
348 |             height_test, width_test = imagesize.get(mdlParams['im_paths'][u])
349 |             if width_test < mdlParams['input_size'][0]:
350 |                 height_test = int(mdlParams['input_size'][0]/float(width_test))*height_test
351 |                 width_test = mdlParams['input_size'][0]
352 |             if height_test < mdlParams['input_size'][0]:
353 |                 width_test = int(mdlParams['input_size'][0]/float(height_test))*width_test
354 |                 height_test = mdlParams['input_size'][0]                
355 |             test_im = np.zeros([width_test,height_test]) 
356 |             for i in range(mdlParams['multiCropEval']):
357 |                 im_crop = test_im[np.int32(mdlParams['cropPositions'][u,i,0]-height/2):np.int32(mdlParams['cropPositions'][u,i,0]-height/2)+height,np.int32(mdlParams['cropPositions'][u,i,1]-width/2):np.int32(mdlParams['cropPositions'][u,i,1]-width/2)+width]
358 |                 if im_crop.shape[0] != mdlParams['input_size'][0]:
359 |                     print("Wrong shape",im_crop.shape[0],mdlParams['im_paths'][u])    
360 |                 if im_crop.shape[1] != mdlParams['input_size'][1]:
361 |                     print("Wrong shape",im_crop.shape[1],mdlParams['im_paths'][u])       
362 |     return mdlParams


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.optim as optim
  4 | from torch.optim import lr_scheduler
  5 | import torchvision
  6 | from torchvision import datasets, models as tv_models
  7 | from torch.utils.data import DataLoader
  8 | from torchsummary import summary
  9 | import numpy as np
 10 | from scipy import io
 11 | import threading
 12 | import pickle
 13 | from pathlib import Path
 14 | import math
 15 | import os
 16 | import sys
 17 | from glob import glob
 18 | import re
 19 | import gc
 20 | import importlib
 21 | import time
 22 | import sklearn.preprocessing
 23 | import utils
 24 | from sklearn.utils import class_weight
 25 | import psutil
 26 | import models
 27 | 
 28 | # add configuration file
 29 | # Dictionary for model configuration
 30 | mdlParams = {}
 31 | 
 32 | # Import machine config
 33 | pc_cfg = importlib.import_module('pc_cfgs.'+sys.argv[1])
 34 | mdlParams.update(pc_cfg.mdlParams)
 35 | 
 36 | 
 37 | # Import model config
 38 | model_cfg = importlib.import_module('cfgs.'+sys.argv[2])
 39 | mdlParams_model = model_cfg.init(mdlParams)
 40 | mdlParams.update(mdlParams_model)
 41 | 
 42 | # Indicate training
 43 | mdlParams['trainSetState'] = 'train'
 44 | 
 45 | # Path name from filename
 46 | mdlParams['saveDirBase'] = mdlParams['saveDir'] + sys.argv[2]
 47 | 
 48 | # Set visible devices
 49 | if 'gpu' in sys.argv[3]:
 50 |     mdlParams['numGPUs']= [[int(s) for s in re.findall(r'\d+',sys.argv[3])][-1]]
 51 |     cuda_str = ""
 52 |     for i in range(len(mdlParams['numGPUs'])):
 53 |         cuda_str = cuda_str + str(mdlParams['numGPUs'][i])
 54 |         if i is not len(mdlParams['numGPUs'])-1:
 55 |             cuda_str = cuda_str + ","
 56 |     print("Devices to use:",cuda_str)
 57 |     os.environ["CUDA_VISIBLE_DEVICES"] = cuda_str      
 58 | 
 59 | # Specify val set to train for
 60 | if len(sys.argv) > 4:
 61 |     mdlParams['cv_subset'] = [int(s) for s in re.findall(r'\d+',sys.argv[4])]
 62 |     print("Training validation sets",mdlParams['cv_subset'])
 63 | 
 64 | # Check if there is a validation set, if not, evaluate train error instead
 65 | if 'valIndCV' in mdlParams or 'valInd' in mdlParams:
 66 |     eval_set = 'valInd'
 67 |     print("Evaluating on validation set during training.")
 68 | else:
 69 |     eval_set = 'trainInd'
 70 |     print("No validation set, evaluating on training set during training.")
 71 | 
 72 | # Check if there were previous ones that have alreary bin learned
 73 | prevFile = Path(mdlParams['saveDirBase'] + '/CV.pkl')
 74 | #print(prevFile)
 75 | if prevFile.exists():
 76 |     print("Part of CV already done")
 77 |     with open(mdlParams['saveDirBase'] + '/CV.pkl', 'rb') as f:
 78 |         allData = pickle.load(f)
 79 | else:
 80 |     allData = {}
 81 |     allData['f1Best'] = {}
 82 |     allData['sensBest'] = {}
 83 |     allData['specBest'] = {}
 84 |     allData['accBest'] = {}
 85 |     allData['waccBest'] = {}
 86 |     allData['aucBest'] = {}
 87 |     allData['convergeTime'] = {}
 88 |     allData['bestPred'] = {}
 89 |     allData['targets'] = {}
 90 |  
 91 | # Take care of CV
 92 | if mdlParams.get('cv_subset',None) is not None:
 93 |     cv_set = mdlParams['cv_subset']
 94 | else:
 95 |     cv_set = range(mdlParams['numCV'])
 96 | for cv in cv_set:  
 97 |     # Check if this fold was already trained
 98 |     already_trained = False
 99 |     if 'valIndCV' in mdlParams:
100 |         mdlParams['saveDir'] = mdlParams['saveDirBase'] + '/CVSet' + str(cv)
101 |         if os.path.isdir(mdlParams['saveDirBase']):
102 |             if os.path.isdir(mdlParams['saveDir']):
103 |                 all_max_iter = []
104 |                 for name in os.listdir(mdlParams['saveDir']):
105 |                     int_list = [int(s) for s in re.findall(r'\d+',name)]
106 |                     if len(int_list) > 0:
107 |                         all_max_iter.append(int_list[-1])
108 |                     #if '-' + str(mdlParams['training_steps'])+ '.pt' in name:
109 |                     #    print("Fold %d already fully trained"%(cv))
110 |                     #    already_trained = True
111 |                 all_max_iter = np.array(all_max_iter)
112 |                 if len(all_max_iter) > 0 and np.max(all_max_iter) >= mdlParams['training_steps']:
113 |                     print("Fold %d already fully trained with %d iterations"%(cv,np.max(all_max_iter)))
114 |                     already_trained = True
115 |     if already_trained:
116 |         continue        
117 |     print("CV set",cv)
118 |     # Reset model graph 
119 |     importlib.reload(models)
120 |     #importlib.reload(torchvision)
121 |     # Collect model variables
122 |     modelVars = {}
123 |     #print("here")
124 |     modelVars['device'] = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
125 |     print(modelVars['device'])
126 |     # Def current CV set
127 |     mdlParams['trainInd'] = mdlParams['trainIndCV'][cv]
128 |     if 'valIndCV' in mdlParams:
129 |         mdlParams['valInd'] = mdlParams['valIndCV'][cv]
130 |     # Def current path for saving stuff
131 |     if 'valIndCV' in mdlParams:
132 |         mdlParams['saveDir'] = mdlParams['saveDirBase'] + '/CVSet' + str(cv)
133 |     else:
134 |         mdlParams['saveDir'] = mdlParams['saveDirBase']
135 |     # Create basepath if it doesnt exist yet
136 |     if not os.path.isdir(mdlParams['saveDirBase']):
137 |         os.mkdir(mdlParams['saveDirBase'])
138 |     # Check if there is something to load
139 |     load_old = 0
140 |     if os.path.isdir(mdlParams['saveDir']):
141 |         # Check if a checkpoint is in there
142 |         if len([name for name in os.listdir(mdlParams['saveDir'])]) > 0:
143 |             load_old = 1
144 |             print("Loading old model")
145 |         else:
146 |             # Delete whatever is in there (nothing happens)
147 |             filelist = [os.remove(mdlParams['saveDir'] +'/'+f) for f in os.listdir(mdlParams['saveDir'])]
148 |     else:
149 |         os.mkdir(mdlParams['saveDir'])
150 |     # Save training progress in here
151 |     save_dict = {}
152 |     save_dict['acc'] = []
153 |     save_dict['loss'] = []
154 |     save_dict['wacc'] = []
155 |     save_dict['auc'] = []
156 |     save_dict['sens'] = []
157 |     save_dict['spec'] = []
158 |     save_dict['f1'] = []
159 |     save_dict['step_num'] = []
160 |     if mdlParams['print_trainerr']:
161 |         save_dict_train = {}
162 |         save_dict_train['acc'] = []
163 |         save_dict_train['loss'] = []
164 |         save_dict_train['wacc'] = []
165 |         save_dict_train['auc'] = []
166 |         save_dict_train['sens'] = []
167 |         save_dict_train['spec'] = []
168 |         save_dict_train['f1'] = []
169 |         save_dict_train['step_num'] = []        
170 |     # Potentially calculate setMean to subtract
171 |     if mdlParams['subtract_set_mean'] == 1:
172 |         mdlParams['setMean'] = np.mean(mdlParams['images_means'][mdlParams['trainInd'],:],(0))
173 |         print("Set Mean",mdlParams['setMean']) 
174 | 
175 |     # balance classes
176 |     if mdlParams['balance_classes'] < 3 or mdlParams['balance_classes'] == 7 or mdlParams['balance_classes'] == 11:
177 |         class_weights = class_weight.compute_class_weight('balanced',np.unique(np.argmax(mdlParams['labels_array'][mdlParams['trainInd'],:],1)),np.argmax(mdlParams['labels_array'][mdlParams['trainInd'],:],1)) 
178 |         print("Current class weights",class_weights)
179 |         class_weights = class_weights*mdlParams['extra_fac']
180 |         print("Current class weights with extra",class_weights)             
181 |     elif mdlParams['balance_classes'] == 3 or mdlParams['balance_classes'] == 4:
182 |         # Split training set by classes
183 |         not_one_hot = np.argmax(mdlParams['labels_array'],1)
184 |         mdlParams['class_indices'] = []
185 |         for i in range(mdlParams['numClasses']):
186 |             mdlParams['class_indices'].append(np.where(not_one_hot==i)[0])
187 |             # Kick out non-trainind indices
188 |             mdlParams['class_indices'][i] = np.setdiff1d(mdlParams['class_indices'][i],mdlParams['valInd'])
189 |             #print("Class",i,mdlParams['class_indices'][i].shape,np.min(mdlParams['class_indices'][i]),np.max(mdlParams['class_indices'][i]),np.sum(mdlParams['labels_array'][np.int64(mdlParams['class_indices'][i]),:],0))        
190 |     elif mdlParams['balance_classes'] == 5 or mdlParams['balance_classes'] == 6 or mdlParams['balance_classes'] == 13:
191 |         # Other class balancing loss
192 |         class_weights = 1.0/np.mean(mdlParams['labels_array'][mdlParams['trainInd'],:],axis=0)
193 |         print("Current class weights",class_weights)
194 |         if isinstance(mdlParams['extra_fac'], float):
195 |             class_weights = np.power(class_weights,mdlParams['extra_fac'])
196 |         else:
197 |             class_weights = class_weights*mdlParams['extra_fac']
198 |         print("Current class weights with extra",class_weights) 
199 |     elif mdlParams['balance_classes'] == 9:
200 |         # Only use official indicies for calculation
201 |         print("Balance 9")
202 |         indices_ham = mdlParams['trainInd'][mdlParams['trainInd'] < 25331]
203 |         if mdlParams['numClasses'] == 9:
204 |             class_weights_ = 1.0/np.mean(mdlParams['labels_array'][indices_ham,:8],axis=0)
205 |             #print("class before",class_weights_)
206 |             class_weights = np.zeros([mdlParams['numClasses']])
207 |             class_weights[:8] = class_weights_
208 |             class_weights[-1] = np.max(class_weights_)
209 |         else:
210 |             class_weights = 1.0/np.mean(mdlParams['labels_array'][indices_ham,:],axis=0)
211 |         print("Current class weights",class_weights)             
212 |         if isinstance(mdlParams['extra_fac'], float):
213 |             class_weights = np.power(class_weights,mdlParams['extra_fac'])
214 |         else:
215 |             class_weights = class_weights*mdlParams['extra_fac']
216 |         print("Current class weights with extra",class_weights)             
217 | 
218 |     # Meta scaler
219 |     if mdlParams.get('meta_features',None) is not None and mdlParams['scale_features']:
220 |         mdlParams['feature_scaler_meta'] = sklearn.preprocessing.StandardScaler().fit(mdlParams['meta_array'][mdlParams['trainInd'],:])  
221 |         print("scaler mean",mdlParams['feature_scaler_meta'].mean_,"var",mdlParams['feature_scaler_meta'].var_)  
222 | 
223 |     # Set up dataloaders
224 |     num_workers = psutil.cpu_count(logical=False)
225 |     # For train
226 |     dataset_train = utils.ISICDataset(mdlParams, 'trainInd')
227 |     # For val
228 |     dataset_val = utils.ISICDataset(mdlParams, 'valInd')
229 |     if mdlParams['multiCropEval'] > 0:
230 |         modelVars['dataloader_valInd'] = DataLoader(dataset_val, batch_size=mdlParams['multiCropEval'], shuffle=False, num_workers=num_workers, pin_memory=True)  
231 |     else:
232 |         modelVars['dataloader_valInd'] = DataLoader(dataset_val, batch_size=mdlParams['batchSize'], shuffle=False, num_workers=num_workers, pin_memory=True)               
233 | 
234 |     if mdlParams['balance_classes'] == 12 or mdlParams['balance_classes'] == 13:
235 |         #print(np.argmax(mdlParams['labels_array'][mdlParams['trainInd'],:],1).size(0))
236 |         strat_sampler = utils.StratifiedSampler(mdlParams)
237 |         modelVars['dataloader_trainInd'] = DataLoader(dataset_train, batch_size=mdlParams['batchSize'], sampler=strat_sampler, num_workers=num_workers, pin_memory=True) 
238 |     else:
239 |         modelVars['dataloader_trainInd'] = DataLoader(dataset_train, batch_size=mdlParams['batchSize'], shuffle=True, num_workers=num_workers, pin_memory=True, drop_last=True) 
240 |     #print("Setdiff",np.setdiff1d(mdlParams['trainInd'],mdlParams['trainInd']))
241 |     # Define model 
242 |     modelVars['model'] = models.getModel(mdlParams)()  
243 |     # Load trained model
244 |     if mdlParams.get('meta_features',None) is not None:
245 |         # Find best checkpoint
246 |         files = glob(mdlParams['model_load_path'] + '/CVSet' + str(cv) + '/*')
247 |         global_steps = np.zeros([len(files)])
248 |         #print("files",files)
249 |         for i in range(len(files)):
250 |             # Use meta files to find the highest index
251 |             if 'best' not in files[i]:
252 |                 continue
253 |             if 'checkpoint' not in files[i]:
254 |                 continue                
255 |             # Extract global step
256 |             nums = [int(s) for s in re.findall(r'\d+',files[i])]
257 |             global_steps[i] = nums[-1]
258 |         # Create path with maximum global step found
259 |         chkPath = mdlParams['model_load_path'] + '/CVSet' + str(cv) + '/checkpoint_best-' + str(int(np.max(global_steps))) + '.pt'
260 |         print("Restoring lesion-trained CNN for meta data training: ",chkPath)
261 |         # Load
262 |         state = torch.load(chkPath)
263 |         # Initialize model
264 |         curr_model_dict = modelVars['model'].state_dict()
265 |         for name, param in state['state_dict'].items():
266 |             #print(name,param.shape)
267 |             if isinstance(param, nn.Parameter):
268 |                 # backwards compatibility for serialized parameters
269 |                 param = param.data
270 |             if curr_model_dict[name].shape == param.shape:
271 |                 curr_model_dict[name].copy_(param)
272 |             else:
273 |                 print("not restored",name,param.shape)
274 |         #modelVars['model'].load_state_dict(state['state_dict'])        
275 |     # Original input size
276 |     #if 'Dense' not in mdlParams['model_type']:
277 |     #    print("Original input size",modelVars['model'].input_size)
278 |     #print(modelVars['model'])
279 |     if 'Dense' in mdlParams['model_type']:
280 |         if mdlParams['input_size'][0] != 224:
281 |             modelVars['model'] = utils.modify_densenet_avg_pool(modelVars['model'])
282 |             #print(modelVars['model'])
283 |         num_ftrs = modelVars['model'].classifier.in_features
284 |         modelVars['model'].classifier = nn.Linear(num_ftrs, mdlParams['numClasses'])
285 |         #print(modelVars['model'])
286 |     elif 'dpn' in mdlParams['model_type']:
287 |         num_ftrs = modelVars['model'].classifier.in_channels
288 |         modelVars['model'].classifier = nn.Conv2d(num_ftrs,mdlParams['numClasses'],[1,1])
289 |         #modelVars['model'].add_module('real_classifier',nn.Linear(num_ftrs, mdlParams['numClasses']))
290 |         #print(modelVars['model'])
291 |     elif 'efficient' in mdlParams['model_type']:
292 |         # Do nothing, output is prepared
293 |         num_ftrs = modelVars['model']._fc.in_features
294 |         modelVars['model']._fc = nn.Linear(num_ftrs, mdlParams['numClasses'])    
295 |     elif 'wsl' in mdlParams['model_type']:
296 |         num_ftrs = modelVars['model'].fc.in_features
297 |         modelVars['model'].fc = nn.Linear(num_ftrs, mdlParams['numClasses'])          
298 |     else:
299 |         num_ftrs = modelVars['model'].last_linear.in_features
300 |         modelVars['model'].last_linear = nn.Linear(num_ftrs, mdlParams['numClasses'])    
301 |     # Take care of meta case
302 |     if mdlParams.get('meta_features',None) is not None:
303 |         # freeze cnn first
304 |         if mdlParams['freeze_cnn']:
305 |             # deactivate all
306 |             for param in modelVars['model'].parameters():
307 |                 param.requires_grad = False            
308 |             if 'efficient' in mdlParams['model_type']:
309 |                 # Activate fc
310 |                 for param in modelVars['model']._fc.parameters():
311 |                     param.requires_grad = True
312 |             elif 'wsl' in mdlParams['model_type']:
313 |                 # Activate fc
314 |                 for param in modelVars['model'].fc.parameters():
315 |                     param.requires_grad = True
316 |             else:
317 |                 # Activate fc
318 |                 for param in modelVars['model'].last_linear.parameters():
319 |                     param.requires_grad = True                                
320 |         else:
321 |             # mark cnn parameters
322 |             for param in modelVars['model'].parameters():
323 |                 param.is_cnn_param = True
324 |             # unmark fc
325 |             for param in modelVars['model']._fc.parameters():
326 |                 param.is_cnn_param = False                              
327 |         # modify model
328 |         modelVars['model'] = models.modify_meta(mdlParams,modelVars['model'])  
329 |         # Mark new parameters
330 |         for param in modelVars['model'].parameters():
331 |             if not hasattr(param, 'is_cnn_param'):
332 |                 param.is_cnn_param = False                 
333 |     # multi gpu support
334 |     if len(mdlParams['numGPUs']) > 1:
335 |         modelVars['model'] = nn.DataParallel(modelVars['model']) 
336 |     modelVars['model'] = modelVars['model'].cuda()
337 |     #summary(modelVars['model'], modelVars['model'].input_size)# (mdlParams['input_size'][2], mdlParams['input_size'][0], mdlParams['input_size'][1]))
338 |     # Loss, with class weighting
339 |     if mdlParams.get('focal_loss',False):
340 |         modelVars['criterion'] = utils.FocalLoss(alpha=class_weights.tolist())
341 |     elif mdlParams['balance_classes'] == 3 or mdlParams['balance_classes'] == 0 or mdlParams['balance_classes'] == 12:
342 |         modelVars['criterion'] = nn.CrossEntropyLoss()
343 |     elif mdlParams['balance_classes'] == 8:
344 |         modelVars['criterion'] = nn.CrossEntropyLoss(reduce=False)
345 |     elif mdlParams['balance_classes'] == 6 or mdlParams['balance_classes'] == 7:
346 |         modelVars['criterion'] = nn.CrossEntropyLoss(weight=torch.cuda.FloatTensor(class_weights.astype(np.float32)),reduce=False)
347 |     elif mdlParams['balance_classes'] == 10:
348 |         modelVars['criterion'] = utils.FocalLoss(mdlParams['numClasses'])
349 |     elif mdlParams['balance_classes'] == 11:
350 |         modelVars['criterion'] = utils.FocalLoss(mdlParams['numClasses'],alpha=torch.cuda.FloatTensor(class_weights.astype(np.float32)))
351 |     else:
352 |         modelVars['criterion'] = nn.CrossEntropyLoss(weight=torch.cuda.FloatTensor(class_weights.astype(np.float32)))
353 | 
354 |     if mdlParams.get('meta_features',None) is not None:
355 |         if mdlParams['freeze_cnn']:
356 |             modelVars['optimizer'] = optim.Adam(filter(lambda p: p.requires_grad, modelVars['model'].parameters()), lr=mdlParams['learning_rate_meta'])
357 |             # sanity check
358 |             for param in filter(lambda p: p.requires_grad, modelVars['model'].parameters()):
359 |                 print(param.name,param.shape)
360 |         else:
361 |             modelVars['optimizer'] = optim.Adam([
362 |                                                 {'params': filter(lambda p: not p.is_cnn_param, modelVars['model'].parameters()), 'lr': mdlParams['learning_rate_meta']},
363 |                                                 {'params': filter(lambda p: p.is_cnn_param, modelVars['model'].parameters()), 'lr': mdlParams['learning_rate']}
364 |                                                 ], lr=mdlParams['learning_rate'])
365 |     else:
366 |         modelVars['optimizer'] = optim.Adam(modelVars['model'].parameters(), lr=mdlParams['learning_rate'])
367 | 
368 |     # Decay LR by a factor of 0.1 every 7 epochs
369 |     modelVars['scheduler'] = lr_scheduler.StepLR(modelVars['optimizer'], step_size=mdlParams['lowerLRAfter'], gamma=1/np.float32(mdlParams['LRstep']))
370 | 
371 |     # Define softmax
372 |     modelVars['softmax'] = nn.Softmax(dim=1)
373 | 
374 |     # Set up training
375 |     # loading from checkpoint
376 |     if load_old:
377 |         # Find last, not last best checkpoint
378 |         files = glob(mdlParams['saveDir']+'/*')
379 |         global_steps = np.zeros([len(files)])
380 |         for i in range(len(files)):
381 |             # Use meta files to find the highest index
382 |             if 'best' in files[i]:
383 |                 continue
384 |             if 'checkpoint-' not in files[i]:
385 |                 continue                
386 |             # Extract global step
387 |             nums = [int(s) for s in re.findall(r'\d+',files[i])]
388 |             global_steps[i] = nums[-1]
389 |         # Create path with maximum global step found
390 |         chkPath = mdlParams['saveDir'] + '/checkpoint-' + str(int(np.max(global_steps))) + '.pt'
391 |         print("Restoring: ",chkPath)
392 |         # Load
393 |         state = torch.load(chkPath)
394 |         # Initialize model and optimizer
395 |         modelVars['model'].load_state_dict(state['state_dict'])
396 |         modelVars['optimizer'].load_state_dict(state['optimizer'])     
397 |         start_epoch = state['epoch']+1
398 |         mdlParams['valBest'] = state.get('valBest',1000)
399 |         mdlParams['lastBestInd'] = state.get('lastBestInd',int(np.max(global_steps)))
400 |     else:
401 |         start_epoch = 1
402 |         mdlParams['lastBestInd'] = -1
403 |         # Track metrics for saving best model
404 |         mdlParams['valBest'] = 1000
405 | 
406 |     # Num batches
407 |     numBatchesTrain = int(math.floor(len(mdlParams['trainInd'])/mdlParams['batchSize']))
408 |     print("Train batches",numBatchesTrain)
409 | 
410 |     # Run training
411 |     start_time = time.time()
412 |     print("Start training...")
413 |     for step in range(start_epoch, mdlParams['training_steps']+1):
414 |         # One Epoch of training
415 |         if step >= mdlParams['lowerLRat']-mdlParams['lowerLRAfter']:
416 |             modelVars['scheduler'].step()
417 |         modelVars['model'].train()      
418 |         for j, (inputs, labels, indices) in enumerate(modelVars['dataloader_trainInd']):    
419 |             #print(indices)                  
420 |             #t_load = time.time() 
421 |             # Run optimization        
422 |             if mdlParams.get('meta_features',None) is not None: 
423 |                 inputs[0] = inputs[0].cuda()
424 |                 inputs[1] = inputs[1].cuda()
425 |             else:
426 |                 inputs = inputs.cuda()
427 |             #print(inputs.shape)
428 |             labels = labels.cuda()        
429 |             # zero the parameter gradients
430 |             modelVars['optimizer'].zero_grad()             
431 |             # forward
432 |             # track history if only in train
433 |             with torch.set_grad_enabled(True):             
434 |                 if mdlParams.get('aux_classifier',False):
435 |                     outputs, outputs_aux = modelVars['model'](inputs) 
436 |                     loss1 = modelVars['criterion'](outputs, labels)
437 |                     labels_aux = labels.repeat(mdlParams['multiCropTrain'])
438 |                     loss2 = modelVars['criterion'](outputs_aux, labels_aux) 
439 |                     loss = loss1 + mdlParams['aux_classifier_loss_fac']*loss2     
440 |                 else:               
441 |                     #print("load",time.time()-t_load)    
442 |                     #t_fwd = time.time()   
443 |                     outputs = modelVars['model'](inputs)     
444 |                     #print("forward",time.time()-t_fwd)     
445 |                     #t_bwd = time.time()   
446 |                     loss = modelVars['criterion'](outputs, labels)         
447 |                 # Perhaps adjust weighting of the loss by the specific index
448 |                 if mdlParams['balance_classes'] == 6 or mdlParams['balance_classes'] == 7 or mdlParams['balance_classes'] == 8:
449 |                     #loss = loss.cpu()
450 |                     indices = indices.numpy()
451 |                     loss = loss*torch.cuda.FloatTensor(mdlParams['loss_fac_per_example'][indices].astype(np.float32))
452 |                     loss = torch.mean(loss)
453 |                     #loss = loss.cuda()
454 |                 # backward + optimize only if in training phase
455 |                 loss.backward()                 
456 |                 modelVars['optimizer'].step()     
457 |                 #print("backward",time.time()-t_bwd)                             
458 |         if step % mdlParams['display_step'] == 0 or step == 1:
459 |             # Calculate evaluation metrics
460 |             if mdlParams['classification']:
461 |                 # Adjust model state
462 |                 modelVars['model'].eval()
463 |                 # Get metrics
464 |                 loss, accuracy, sensitivity, specificity, conf_matrix, f1, auc, waccuracy, predictions, targets, _ = utils.getErrClassification_mgpu(mdlParams, eval_set, modelVars)
465 |                 # Save in mat
466 |                 save_dict['loss'].append(loss)
467 |                 save_dict['acc'].append(accuracy)
468 |                 save_dict['wacc'].append(waccuracy)
469 |                 save_dict['auc'].append(auc)
470 |                 save_dict['sens'].append(sensitivity)
471 |                 save_dict['spec'].append(specificity)
472 |                 save_dict['f1'].append(f1)
473 |                 save_dict['step_num'].append(step)
474 |                 if os.path.isfile(mdlParams['saveDir'] + '/progression_'+eval_set+'.mat'):
475 |                     os.remove(mdlParams['saveDir'] + '/progression_'+eval_set+'.mat')                
476 |                 io.savemat(mdlParams['saveDir'] + '/progression_'+eval_set+'.mat',save_dict)                
477 |             eval_metric = -np.mean(waccuracy)
478 |             # Check if we have a new best value
479 |             if eval_metric < mdlParams['valBest']:
480 |                 mdlParams['valBest'] = eval_metric
481 |                 if mdlParams['classification']:
482 |                     allData['f1Best'][cv] = f1
483 |                     allData['sensBest'][cv] = sensitivity
484 |                     allData['specBest'][cv] = specificity
485 |                     allData['accBest'][cv] = accuracy
486 |                     allData['waccBest'][cv] = waccuracy
487 |                     allData['aucBest'][cv] = auc
488 |                 oldBestInd = mdlParams['lastBestInd']
489 |                 mdlParams['lastBestInd'] = step
490 |                 allData['convergeTime'][cv] = step
491 |                 # Save best predictions
492 |                 allData['bestPred'][cv] = predictions
493 |                 allData['targets'][cv] = targets
494 |                 # Write to File
495 |                 with open(mdlParams['saveDirBase'] + '/CV.pkl', 'wb') as f:
496 |                     pickle.dump(allData, f, pickle.HIGHEST_PROTOCOL)                 
497 |                 # Delte previously best model
498 |                 if os.path.isfile(mdlParams['saveDir'] + '/checkpoint_best-' + str(oldBestInd) + '.pt'):
499 |                     os.remove(mdlParams['saveDir'] + '/checkpoint_best-' + str(oldBestInd) + '.pt')
500 |                 # Save currently best model
501 |                 state = {'epoch': step, 'valBest': mdlParams['valBest'], 'lastBestInd': mdlParams['lastBestInd'], 'state_dict': modelVars['model'].state_dict(),'optimizer': modelVars['optimizer'].state_dict()}
502 |                 torch.save(state, mdlParams['saveDir'] + '/checkpoint_best-' + str(step) + '.pt')               
503 |                             
504 |             # If its not better, just save it delete the last checkpoint if it is not current best one
505 |             # Save current model
506 |             state = {'epoch': step, 'valBest': mdlParams['valBest'], 'lastBestInd': mdlParams['lastBestInd'], 'state_dict': modelVars['model'].state_dict(),'optimizer': modelVars['optimizer'].state_dict()}
507 |             torch.save(state, mdlParams['saveDir'] + '/checkpoint-' + str(step) + '.pt')                           
508 |             # Delete last one
509 |             if step == mdlParams['display_step']:
510 |                 lastInd = 1
511 |             else:
512 |                 lastInd = step-mdlParams['display_step']
513 |             if os.path.isfile(mdlParams['saveDir'] + '/checkpoint-' + str(lastInd) + '.pt'):
514 |                 os.remove(mdlParams['saveDir'] + '/checkpoint-' + str(lastInd) + '.pt')       
515 |             # Duration so far
516 |             duration = time.time() - start_time                          
517 |             # Print
518 |             if mdlParams['classification']:
519 |                 print("\n")
520 |                 print("Config:",sys.argv[2])
521 |                 print('Fold: %d Epoch: %d/%d (%d h %d m %d s)' % (cv,step,mdlParams['training_steps'], int(duration/3600), int(np.mod(duration,3600)/60), int(np.mod(np.mod(duration,3600),60))) + time.strftime("%d.%m.-%H:%M:%S", time.localtime()))
522 |                 print("Loss on ",eval_set,"set: ",loss," Accuracy: ",accuracy," F1: ",f1," (best WACC: ",-mdlParams['valBest']," at Epoch ",mdlParams['lastBestInd'],")")
523 |                 print("Auc",auc,"Mean AUC",np.mean(auc))
524 |                 print("Per Class Acc",waccuracy,"Weighted Accuracy",np.mean(waccuracy))
525 |                 print("Sensitivity: ",sensitivity,"Specificity",specificity)
526 |                 print("Confusion Matrix")
527 |                 print(conf_matrix)
528 |                 # Potentially peek at test error
529 |                 if mdlParams['peak_at_testerr']:              
530 |                     loss, accuracy, sensitivity, specificity, _, f1, _, _, _, _, _ = utils.getErrClassification_mgpu(mdlParams, 'testInd', modelVars)
531 |                     print("Test loss: ",loss," Accuracy: ",accuracy," F1: ",f1)
532 |                     print("Sensitivity: ",sensitivity,"Specificity",specificity)
533 |                 # Potentially print train err
534 |                 if mdlParams['print_trainerr'] and 'train' not in eval_set:                
535 |                     loss, accuracy, sensitivity, specificity, conf_matrix, f1, auc, waccuracy, predictions, targets, _ = utils.getErrClassification_mgpu(mdlParams, 'trainInd', modelVars)
536 |                     # Save in mat
537 |                     save_dict_train['loss'].append(loss)
538 |                     save_dict_train['acc'].append(accuracy)
539 |                     save_dict_train['wacc'].append(waccuracy)
540 |                     save_dict_train['auc'].append(auc)
541 |                     save_dict_train['sens'].append(sensitivity)
542 |                     save_dict_train['spec'].append(specificity)
543 |                     save_dict_train['f1'].append(f1)
544 |                     save_dict_train['step_num'].append(step)
545 |                     if os.path.isfile(mdlParams['saveDir'] + '/progression_trainInd.mat'):
546 |                         os.remove(mdlParams['saveDir'] + '/progression_trainInd.mat')                
547 |                     scipy.io.savemat(mdlParams['saveDir'] + '/progression_trainInd.mat',save_dict_train)                     
548 |                     print("Train loss: ",loss," Accuracy: ",accuracy," F1: ",f1)
549 |                     print("Sensitivity: ",sensitivity,"Specificity",specificity)
550 |     # Free everything in modelvars
551 |     modelVars.clear()
552 |     # After CV Training: print CV results and save them
553 |     print("Best F1:",allData['f1Best'][cv])
554 |     print("Best Sens:",allData['sensBest'][cv])
555 |     print("Best Spec:",allData['specBest'][cv])
556 |     print("Best Acc:",allData['accBest'][cv])
557 |     print("Best Per Class Accuracy:",allData['waccBest'][cv])
558 |     print("Best Weighted Acc:",np.mean(allData['waccBest'][cv]))
559 |     print("Best AUC:",allData['aucBest'][cv])
560 |     print("Best Mean AUC:",np.mean(allData['aucBest'][cv]))    
561 |     print("Convergence Steps:",allData['convergeTime'][cv])
562 | 
563 |           
564 |             


--------------------------------------------------------------------------------
/eval.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.optim as optim
  4 | from torch.optim import lr_scheduler
  5 | import torchvision
  6 | from torchvision import datasets, models as tv_models
  7 | from torch.utils.data import DataLoader
  8 | from torchsummary import summary
  9 | import numpy as np
 10 | import models
 11 | import threading
 12 | import pickle
 13 | from pathlib import Path
 14 | import math
 15 | import os
 16 | import sys
 17 | from glob import glob
 18 | import re
 19 | import gc
 20 | import importlib
 21 | import time
 22 | import csv
 23 | import sklearn.preprocessing
 24 | import utils
 25 | from sklearn.utils import class_weight
 26 | import imagesize
 27 | 
 28 | # add configuration file
 29 | # Dictionary for model configuration
 30 | mdlParams = {}
 31 | 
 32 | # Import machine config
 33 | pc_cfg = importlib.import_module('pc_cfgs.'+sys.argv[1])
 34 | mdlParams.update(pc_cfg.mdlParams)
 35 | 
 36 | 
 37 | # If there is another argument, its which checkpoint should be used
 38 | if len(sys.argv) > 6:
 39 |     if 'last' in sys.argv[6]:
 40 |         mdlParams['ckpt_name'] = 'checkpoint-'
 41 |     else:
 42 |         mdlParams['ckpt_name'] = 'checkpoint_best-'
 43 |     if 'first' in sys.argv[6]:
 44 |         mdlParams['use_first'] = True
 45 | else:
 46 |     mdlParams['ckpt_name'] = 'checkpoint-'
 47 | 
 48 | # Set visible devices
 49 | mdlParams['numGPUs']= [[int(s) for s in re.findall(r'\d+',sys.argv[6])][-1]]
 50 | cuda_str = ""
 51 | for i in range(len(mdlParams['numGPUs'])):
 52 |     cuda_str = cuda_str + str(mdlParams['numGPUs'][i])
 53 |     if i is not len(mdlParams['numGPUs'])-1:
 54 |         cuda_str = cuda_str + ","
 55 | print("Devices to use:",cuda_str)
 56 | os.environ["CUDA_VISIBLE_DEVICES"] = cuda_str      
 57 | 
 58 | # If there is another argument, also use a meta learner
 59 | if len(sys.argv) > 7:
 60 |     if 'HAMONLY' in sys.argv[7]:
 61 |         mdlParams['eval_on_ham_only'] = True        
 62 | 
 63 | # Import model config
 64 | model_cfg = importlib.import_module('cfgs.'+sys.argv[2])
 65 | mdlParams_model = model_cfg.init(mdlParams)
 66 | mdlParams.update(mdlParams_model)
 67 | 
 68 | 
 69 | # Path name where model is saved is the fourth argument
 70 | if 'NONE' in sys.argv[5]:
 71 |     mdlParams['saveDirBase'] = mdlParams['saveDir'] + sys.argv[2]
 72 | else:
 73 |     mdlParams['saveDirBase'] = sys.argv[5]
 74 | 
 75 | # Third is multi crop yes no
 76 | if 'multi' in sys.argv[3]:
 77 |     if 'rand' in sys.argv[3]:
 78 |         mdlParams['numRandValSeq'] = [int(s) for s in re.findall(r'\d+',sys.argv[3])][0]
 79 |         print("Random sequence number",mdlParams['numRandValSeq'])
 80 |     else:
 81 |         mdlParams['numRandValSeq'] = 0
 82 |     mdlParams['multiCropEval'] = [int(s) for s in re.findall(r'\d+',sys.argv[3])][-1]
 83 |     mdlParams['voting_scheme'] = sys.argv[4]
 84 |     if 'scale' in sys.argv[3]:
 85 |         print("Multi Crop and Scale Eval with crop number:",mdlParams['multiCropEval']," Voting scheme: ",mdlParams['voting_scheme'])
 86 |         mdlParams['orderedCrop'] = False
 87 |         mdlParams['scale_min'] = [int(s) for s in re.findall(r'\d+',sys.argv[3])][-2]/100.0
 88 |     elif 'determ' in sys.argv[3]:
 89 |         # Example application: multideterm5sc3f2
 90 |         mdlParams['deterministic_eval'] = True
 91 |         mdlParams['numCropPositions'] = [int(s) for s in re.findall(r'\d+',sys.argv[3])][-3]
 92 |         num_scales = [int(s) for s in re.findall(r'\d+',sys.argv[3])][-2]
 93 |         all_scales = [1.0,0.5,0.75,0.25,0.9,0.6,0.4]
 94 |         mdlParams['cropScales'] = all_scales[:num_scales]
 95 |         mdlParams['cropFlipping'] = [int(s) for s in re.findall(r'\d+',sys.argv[3])][-1]
 96 |         print("deterministic eval with crops number",mdlParams['numCropPositions'],"scales",mdlParams['cropScales'],"flipping",mdlParams['cropFlipping'])
 97 |         mdlParams['multiCropEval'] = mdlParams['numCropPositions']*len(mdlParams['cropScales'])*mdlParams['cropFlipping']
 98 |         mdlParams['offset_crop'] = 0.2
 99 |     elif 'order' in sys.argv[3]:
100 |         mdlParams['orderedCrop'] = True
101 |         if mdlParams.get('var_im_size',False):
102 |             # Crop positions, always choose multiCropEval to be 4, 9, 16, 25, etc.
103 |             mdlParams['cropPositions'] = np.zeros([len(mdlParams['im_paths']),mdlParams['multiCropEval'],2],dtype=np.int64)
104 |             #mdlParams['imSizes'] = np.zeros([len(mdlParams['im_paths']),mdlParams['multiCropEval'],2],dtype=np.int64)
105 |             for u in range(len(mdlParams['im_paths'])):
106 |                 height, width = imagesize.get(mdlParams['im_paths'][u])
107 |                 if width < mdlParams['input_size'][0]:
108 |                     height = int(mdlParams['input_size'][0]/float(width))*height
109 |                     width = mdlParams['input_size'][0]
110 |                 if height < mdlParams['input_size'][0]:
111 |                     width = int(mdlParams['input_size'][0]/float(height))*width
112 |                     height = mdlParams['input_size'][0]     
113 |                 if mdlParams.get('resize_large_ones') is not None:
114 |                     if width == mdlParams['large_size'] and height == mdlParams['large_size']:
115 |                         width, height = (mdlParams['resize_large_ones'],mdlParams['resize_large_ones'])                
116 |                 ind = 0
117 |                 for i in range(np.int32(np.sqrt(mdlParams['multiCropEval']))):
118 |                     for j in range(np.int32(np.sqrt(mdlParams['multiCropEval']))):
119 |                         mdlParams['cropPositions'][u,ind,0] = mdlParams['input_size'][0]/2+i*((width-mdlParams['input_size'][1])/(np.sqrt(mdlParams['multiCropEval'])-1))
120 |                         mdlParams['cropPositions'][u,ind,1] = mdlParams['input_size'][1]/2+j*((height-mdlParams['input_size'][0])/(np.sqrt(mdlParams['multiCropEval'])-1))
121 |                         #mdlParams['imSizes'][u,ind,0] = curr_im_size[0]
122 | 
123 |                         ind += 1
124 |             # Sanity checks
125 |             #print("Positions",mdlParams['cropPositions'])
126 |             # Test image sizes
127 |             height = mdlParams['input_size'][0]
128 |             width = mdlParams['input_size'][1]
129 |             for u in range(len(mdlParams['im_paths'])):                     
130 |                 height_test, width_test = imagesize.get(mdlParams['im_paths'][u])
131 |                 if width_test < mdlParams['input_size'][0]:
132 |                     height_test = int(mdlParams['input_size'][0]/float(width_test))*height_test
133 |                     width_test = mdlParams['input_size'][0]
134 |                 if height_test < mdlParams['input_size'][0]:
135 |                     width_test = int(mdlParams['input_size'][0]/float(height_test))*width_test
136 |                     height_test = mdlParams['input_size'][0]     
137 |                 if mdlParams.get('resize_large_ones') is not None:
138 |                     if width_test == mdlParams['large_size'] and height_test == mdlParams['large_size']:
139 |                         width_test, height_test = (mdlParams['resize_large_ones'],mdlParams['resize_large_ones'])                                   
140 |                 test_im = np.zeros([width_test,height_test]) 
141 |                 for i in range(mdlParams['multiCropEval']):
142 |                     im_crop = test_im[np.int32(mdlParams['cropPositions'][u,i,0]-height/2):np.int32(mdlParams['cropPositions'][u,i,0]-height/2)+height,np.int32(mdlParams['cropPositions'][u,i,1]-width/2):np.int32(mdlParams['cropPositions'][u,i,1]-width/2)+width]
143 |                     if im_crop.shape[0] != mdlParams['input_size'][0]:
144 |                         print("Wrong shape",im_crop.shape[0],mdlParams['im_paths'][u])    
145 |                     if im_crop.shape[1] != mdlParams['input_size'][1]:
146 |                         print("Wrong shape",im_crop.shape[1],mdlParams['im_paths'][u]) 
147 |         else:
148 |             # Crop positions, always choose multiCropEval to be 4, 9, 16, 25, etc.
149 |             mdlParams['cropPositions'] = np.zeros([mdlParams['multiCropEval'],2],dtype=np.int64)
150 |             if mdlParams['multiCropEval'] == 5:
151 |                 numCrops = 4
152 |             elif mdlParams['multiCropEval'] == 7:
153 |                 numCrops = 9
154 |                 mdlParams['cropPositions'] = np.zeros([9,2],dtype=np.int64)
155 |             else:
156 |                 numCrops = mdlParams['multiCropEval']
157 |             ind = 0
158 |             for i in range(np.int32(np.sqrt(numCrops))):
159 |                 for j in range(np.int32(np.sqrt(numCrops))):
160 |                     mdlParams['cropPositions'][ind,0] = mdlParams['input_size'][0]/2+i*((mdlParams['input_size_load'][0]-mdlParams['input_size'][0])/(np.sqrt(numCrops)-1))
161 |                     mdlParams['cropPositions'][ind,1] = mdlParams['input_size'][1]/2+j*((mdlParams['input_size_load'][1]-mdlParams['input_size'][1])/(np.sqrt(numCrops)-1))
162 |                     ind += 1
163 |             # Add center crop
164 |             if mdlParams['multiCropEval'] == 5:
165 |                 mdlParams['cropPositions'][4,0] = mdlParams['input_size_load'][0]/2
166 |                 mdlParams['cropPositions'][4,1] = mdlParams['input_size_load'][1]/2   
167 |             if mdlParams['multiCropEval'] == 7:      
168 |                 mdlParams['cropPositions'] = np.delete(mdlParams['cropPositions'],[3,7],0)                     
169 |             # Sanity checks
170 |             print("Positions val",mdlParams['cropPositions'])
171 |             # Test image sizes
172 |             test_im = np.zeros(mdlParams['input_size_load'])
173 |             height = mdlParams['input_size'][0]
174 |             width = mdlParams['input_size'][1]
175 |             for i in range(mdlParams['multiCropEval']):
176 |                 im_crop = test_im[np.int32(mdlParams['cropPositions'][i,0]-height/2):np.int32(mdlParams['cropPositions'][i,0]-height/2)+height,np.int32(mdlParams['cropPositions'][i,1]-width/2):np.int32(mdlParams['cropPositions'][i,1]-width/2)+width,:]
177 |                 print("Shape",i+1,im_crop.shape)         
178 |         print("Multi Crop with order with crop number:",mdlParams['multiCropEval']," Voting scheme: ",mdlParams['voting_scheme'])
179 |         if 'flip' in sys.argv[3]:
180 |             # additional flipping, example: flip2multiorder16
181 |             mdlParams['eval_flipping'] = [int(s) for s in re.findall(r'\d+',sys.argv[3])][-2]
182 |             print("Additional flipping",mdlParams['eval_flipping'])
183 |     else:
184 |         print("Multi Crop Eval with crop number:",mdlParams['multiCropEval']," Voting scheme: ",mdlParams['voting_scheme'])
185 |         mdlParams['orderedCrop'] = False
186 | else:
187 |     mdlParams['multiCropEval'] = 0
188 |     mdlParams['orderedCrop'] = False
189 | 
190 | # Set training set to eval mode
191 | mdlParams['trainSetState'] = 'eval'
192 | 
193 | if mdlParams['numClasses'] == 9 and mdlParams.get('no_c9_eval',False):
194 |     num_classes = mdlParams['numClasses']-1    
195 | else:
196 |     num_classes = mdlParams['numClasses']
197 | # Save results in here
198 | allData = {}
199 | allData['f1Best'] = np.zeros([mdlParams['numCV']])
200 | allData['sensBest'] = np.zeros([mdlParams['numCV'],num_classes])
201 | allData['specBest'] = np.zeros([mdlParams['numCV'],num_classes])
202 | allData['accBest'] = np.zeros([mdlParams['numCV']])
203 | allData['waccBest'] = np.zeros([mdlParams['numCV'],num_classes])
204 | allData['aucBest'] = np.zeros([mdlParams['numCV'],num_classes])
205 | allData['convergeTime'] = {}
206 | allData['bestPred'] = {}
207 | allData['bestPredMC'] = {}
208 | allData['targets'] = {}
209 | allData['extPred'] = {}
210 | allData['f1Best_meta'] = np.zeros([mdlParams['numCV']])
211 | allData['sensBest_meta'] = np.zeros([mdlParams['numCV'],num_classes])
212 | allData['specBest_meta'] = np.zeros([mdlParams['numCV'],num_classes])
213 | allData['accBest_meta'] = np.zeros([mdlParams['numCV']])
214 | allData['waccBest_meta'] = np.zeros([mdlParams['numCV'],num_classes])
215 | allData['aucBest_meta'] = np.zeros([mdlParams['numCV'],num_classes])
216 | #allData['convergeTime'] = {}
217 | allData['bestPred_meta'] = {}
218 | allData['targets_meta'] = {}
219 | 
220 | if not (len(sys.argv) > 8):
221 |     for cv in range(mdlParams['numCV']):
222 |         # Reset model graph 
223 |         importlib.reload(models)
224 |         #importlib.reload(torchvision)
225 |         # Collect model variables
226 |         modelVars = {}
227 |         modelVars['device'] = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
228 |         print(modelVars['device'])
229 |         # Def current CV set
230 |         mdlParams['trainInd'] = mdlParams['trainIndCV'][cv]
231 |         if 'valIndCV' in mdlParams:
232 |             mdlParams['valInd'] = mdlParams['valIndCV'][cv]
233 |         # Def current path for saving stuff
234 |         if 'valIndCV' in mdlParams:
235 |             mdlParams['saveDir'] = mdlParams['saveDirBase'] + '/CVSet' + str(cv)
236 |         else:
237 |             mdlParams['saveDir'] = mdlParams['saveDirBase']
238 | 
239 |         # Potentially calculate setMean to subtract
240 |         if mdlParams['subtract_set_mean'] == 1:
241 |             mdlParams['setMean'] = np.mean(mdlParams['images_means'][mdlParams['trainInd'],:],(0))
242 |             print("Set Mean",mdlParams['setMean']) 
243 | 
244 |         # Potentially only HAM eval
245 |         if mdlParams.get('eval_on_ham_only',False):
246 |             print("Old val inds",len(mdlParams['valInd']))
247 |             mdlParams['valInd'] = np.intersect1d(mdlParams['valInd'],mdlParams['HAM10000_inds'])
248 |             print("New val inds, HAM only",len(mdlParams['valInd']))
249 | 
250 |         # balance classes
251 |         if mdlParams['balance_classes'] < 3 or mdlParams['balance_classes'] == 7 or mdlParams['balance_classes'] == 11:
252 |             class_weights = class_weight.compute_class_weight('balanced',np.unique(np.argmax(mdlParams['labels_array'][mdlParams['trainInd'],:],1)),np.argmax(mdlParams['labels_array'][mdlParams['trainInd'],:],1)) 
253 |             print("Current class weights",class_weights)
254 |             class_weights = class_weights*mdlParams['extra_fac']
255 |             print("Current class weights with extra",class_weights)             
256 |         elif mdlParams['balance_classes'] == 3 or mdlParams['balance_classes'] == 4:
257 |             # Split training set by classes
258 |             not_one_hot = np.argmax(mdlParams['labels_array'],1)
259 |             mdlParams['class_indices'] = []
260 |             for i in range(mdlParams['numClasses']):
261 |                 mdlParams['class_indices'].append(np.where(not_one_hot==i)[0])
262 |                 # Kick out non-trainind indices
263 |                 mdlParams['class_indices'][i] = np.setdiff1d(mdlParams['class_indices'][i],mdlParams['valInd'])
264 |                 #print("Class",i,mdlParams['class_indices'][i].shape,np.min(mdlParams['class_indices'][i]),np.max(mdlParams['class_indices'][i]),np.sum(mdlParams['labels_array'][np.int64(mdlParams['class_indices'][i]),:],0))        
265 |         elif mdlParams['balance_classes'] == 5 or mdlParams['balance_classes'] == 6 or mdlParams['balance_classes'] == 13:
266 |             # Other class balancing loss
267 |             class_weights = 1.0/np.mean(mdlParams['labels_array'][mdlParams['trainInd'],:],axis=0)
268 |             print("Current class weights",class_weights) 
269 |             class_weights = class_weights*mdlParams['extra_fac']
270 |             print("Current class weights with extra",class_weights) 
271 |         elif mdlParams['balance_classes'] == 9:
272 |             # Only use HAM indicies for calculation
273 |             print("Balance 9")
274 |             indices_ham = mdlParams['trainInd'][mdlParams['trainInd'] < 25331]
275 |             if mdlParams['numClasses'] == 9:
276 |                 class_weights_ = 1.0/np.mean(mdlParams['labels_array'][indices_ham,:8],axis=0)
277 |                 #print("class before",class_weights_)
278 |                 class_weights = np.zeros([mdlParams['numClasses']])
279 |                 class_weights[:8] = class_weights_
280 |                 class_weights[-1] = np.max(class_weights_)
281 |             else:
282 |                 class_weights = 1.0/np.mean(mdlParams['labels_array'][indices_ham,:],axis=0)
283 |             print("Current class weights",class_weights)             
284 |             if isinstance(mdlParams['extra_fac'], float):
285 |                 class_weights = np.power(class_weights,mdlParams['extra_fac'])
286 |             else:
287 |                 class_weights = class_weights*mdlParams['extra_fac']
288 |             print("Current class weights with extra",class_weights) 
289 | 
290 | 
291 |         # Set up dataloaders
292 |         # Meta scaler
293 |         if mdlParams.get('meta_features',None) is not None and mdlParams['scale_features']:
294 |             mdlParams['feature_scaler_meta'] = sklearn.preprocessing.StandardScaler().fit(mdlParams['meta_array'][mdlParams['trainInd'],:])  
295 |             #print("scaler mean",mdlParams['feature_scaler_meta'].mean_,"var",mdlParams['feature_scaler_meta'].var_)  
296 |         # For train
297 |         dataset_train = utils.ISICDataset(mdlParams, 'trainInd')
298 |         # For val
299 |         dataset_val = utils.ISICDataset(mdlParams, 'valInd')
300 |         if mdlParams['multiCropEval'] > 0:
301 |             modelVars['dataloader_valInd'] = DataLoader(dataset_val, batch_size=mdlParams['multiCropEval'], shuffle=False, num_workers=8, pin_memory=True)  
302 |         else:
303 |             modelVars['dataloader_valInd'] = DataLoader(dataset_val, batch_size=mdlParams['batchSize'], shuffle=False, num_workers=8, pin_memory=True)         
304 |          
305 |         modelVars['dataloader_trainInd'] = DataLoader(dataset_train, batch_size=mdlParams['batchSize'], shuffle=True, num_workers=8, pin_memory=True)
306 |     
307 |         # For test
308 |         if 'testInd' in mdlParams:
309 |             dataset_test = utils.ISICDataset(mdlParams, 'testInd')
310 |             if mdlParams['multiCropEval'] > 0:
311 |                 modelVars['dataloader_testInd'] = DataLoader(dataset_test, batch_size=mdlParams['multiCropEval'], shuffle=False, num_workers=8, pin_memory=True)  
312 |             else:
313 |                 modelVars['dataloader_testInd'] = DataLoader(dataset_test, batch_size=mdlParams['batchSize'], shuffle=False, num_workers=8, pin_memory=True)            
314 |            
315 |             
316 |         modelVars['model'] = models.getModel(mdlParams)()
317 |         # Original input size
318 |         #if 'Dense' not in mdlParams['model_type']:
319 |         #    print("Original input size",modelVars['model'].input_size)
320 |         #print(modelVars['model'])
321 |         if 'Dense' in mdlParams['model_type']:
322 |             if mdlParams['input_size'][0] != 224:
323 |                 modelVars['model'] = utils.modify_densenet_avg_pool(modelVars['model'])
324 |                 #print(modelVars['model'])
325 |             num_ftrs = modelVars['model'].classifier.in_features
326 |             modelVars['model'].classifier = nn.Linear(num_ftrs, mdlParams['numClasses'])
327 |             #print(modelVars['model'])
328 |         elif 'dpn' in mdlParams['model_type']:
329 |             num_ftrs = modelVars['model'].classifier.in_channels
330 |             modelVars['model'].classifier = nn.Conv2d(num_ftrs,mdlParams['numClasses'],[1,1])
331 |             #modelVars['model'].add_module('real_classifier',nn.Linear(num_ftrs, mdlParams['numClasses']))
332 |             #print(modelVars['model'])
333 |         elif 'efficient' in mdlParams['model_type']:
334 |             # Do nothing, output is prepared
335 |             num_ftrs = modelVars['model']._fc.in_features
336 |             modelVars['model']._fc = nn.Linear(num_ftrs, mdlParams['numClasses'])    
337 |         elif 'wsl' in mdlParams['model_type']:
338 |             num_ftrs = modelVars['model'].fc.in_features
339 |             modelVars['model'].fc = nn.Linear(num_ftrs, mdlParams['numClasses'])          
340 |         else:
341 |             num_ftrs = modelVars['model'].last_linear.in_features
342 |             modelVars['model'].last_linear = nn.Linear(num_ftrs, mdlParams['numClasses'])   
343 |         # modify model
344 |         if mdlParams.get('meta_features',None) is not None:
345 |             modelVars['model'] = models.modify_meta(mdlParams,modelVars['model'])               
346 |         modelVars['model']  = modelVars['model'].to(modelVars['device'])
347 |         #summary(modelVars['model'], (mdlParams['input_size'][2], mdlParams['input_size'][0], mdlParams['input_size'][1]))
348 |         # Loss, with class weighting
349 |         # Loss, with class weighting
350 |         if mdlParams['balance_classes'] == 3 or mdlParams['balance_classes'] == 0 or mdlParams['balance_classes'] == 12:
351 |             modelVars['criterion'] = nn.CrossEntropyLoss()
352 |         elif mdlParams['balance_classes'] == 8:
353 |             modelVars['criterion'] = nn.CrossEntropyLoss(reduce=False)
354 |         elif mdlParams['balance_classes'] == 6 or mdlParams['balance_classes'] == 7:
355 |             modelVars['criterion'] = nn.CrossEntropyLoss(weight=torch.cuda.FloatTensor(class_weights.astype(np.float32)),reduce=False)
356 |         elif mdlParams['balance_classes'] == 10:
357 |             modelVars['criterion'] = utils.FocalLoss(mdlParams['numClasses'])
358 |         elif mdlParams['balance_classes'] == 11:
359 |             modelVars['criterion'] = utils.FocalLoss(mdlParams['numClasses'],alpha=torch.cuda.FloatTensor(class_weights.astype(np.float32)))
360 |         else:
361 |             modelVars['criterion'] = nn.CrossEntropyLoss(weight=torch.cuda.FloatTensor(class_weights.astype(np.float32)))
362 | 
363 |         # Observe that all parameters are being optimized
364 |         modelVars['optimizer'] = optim.Adam(modelVars['model'].parameters(), lr=mdlParams['learning_rate'])
365 | 
366 |         # Decay LR by a factor of 0.1 every 7 epochs
367 |         modelVars['scheduler'] = lr_scheduler.StepLR(modelVars['optimizer'], step_size=mdlParams['lowerLRAfter'], gamma=1/np.float32(mdlParams['LRstep']))
368 | 
369 |         # Define softmax
370 |         modelVars['softmax'] = nn.Softmax(dim=1)
371 | 
372 |         # Manually find latest chekcpoint, tf.train.latest_checkpoint is doing weird shit
373 |         files = glob(mdlParams['saveDir']+'/*')
374 |         #print(mdlParams['saveDir'])
375 |         #print("Files",files)
376 |         global_steps = np.zeros([len(files)])
377 |         for i in range(len(files)):
378 |             # Use meta files to find the highest index
379 |             if 'checkpoint' not in files[i]:
380 |                 continue
381 |             if mdlParams['ckpt_name'] not in files[i]:
382 |                 continue
383 |             # Extract global step
384 |             nums = [int(s) for s in re.findall(r'\d+',files[i])]
385 |             global_steps[i] = nums[-1]
386 |         # Create path with maximum global step found, if first is not wanted
387 |         global_steps = np.sort(global_steps)
388 |         if mdlParams.get('use_first') is not None:
389 |             chkPath = mdlParams['saveDir'] + '/' + mdlParams['ckpt_name'] + str(int(global_steps[-2])) + '.pt'
390 |         else:
391 |             chkPath = mdlParams['saveDir'] + '/' + mdlParams['ckpt_name'] + str(int(np.max(global_steps))) + '.pt'
392 |         print("Restoring: ",chkPath)
393 |         # Load
394 |         state = torch.load(chkPath)
395 |         # Initialize model and optimizer
396 |         modelVars['model'].load_state_dict(state['state_dict'])
397 |         #modelVars['optimizer'].load_state_dict(state['optimizer'])   
398 |         # Construct pkl filename: config name, last/best, saved epoch number
399 |         pklFileName = sys.argv[2] + "_" + sys.argv[6] + "_" + str(int(np.max(global_steps))) + ".pkl"
400 |         modelVars['model'].eval()
401 |         if mdlParams['classification']:
402 |             print("CV Set ",cv+1)
403 |             print("------------------------------------")
404 |             # Training err first, deactivated
405 |             if 'trainInd' in mdlParams and False:
406 |                 loss, accuracy, sensitivity, specificity, conf_matrix, f1, auc, waccuracy, predictions, targets, _ = utils.getErrClassification_mgpu(mdlParams, 'trainInd', modelVars)
407 |                 print("Training Results:")
408 |                 print("----------------------------------")
409 |                 print("Loss",np.mean(loss))
410 |                 print("F1 Score",f1)            
411 |                 print("Sensitivity",sensitivity)
412 |                 print("Specificity",specificity)
413 |                 print("Accuracy",accuracy)
414 |                 print("Per Class Accuracy",waccuracy)
415 |                 print("Weighted Accuracy",waccuracy)
416 |                 print("AUC",auc)
417 |                 print("Mean AUC", np.mean(auc))            
418 |             if 'valInd' in mdlParams and (len(sys.argv) <= 8):
419 |                 loss, accuracy, sensitivity, specificity, conf_matrix, f1, auc, waccuracy, predictions, targets, predictions_mc = utils.getErrClassification_mgpu(mdlParams, 'valInd', modelVars)
420 |                 print("Validation Results:")
421 |                 print("----------------------------------")
422 |                 print("Loss",np.mean(loss))
423 |                 print("F1 Score",f1)            
424 |                 print("Sensitivity",sensitivity)
425 |                 print("Specificity",specificity)
426 |                 print("Accuracy",accuracy)
427 |                 print("Per Class Accuracy",waccuracy)
428 |                 print("Weighted Accuracy",np.mean(waccuracy))
429 |                 print("AUC",auc)
430 |                 print("Mean AUC", np.mean(auc))  
431 |                 # Save results in dict
432 |                 if 'testInd' not in mdlParams:
433 |                     allData['f1Best'][cv] = f1
434 |                     allData['sensBest'][cv,:] = sensitivity
435 |                     allData['specBest'][cv,:] = specificity
436 |                     allData['accBest'][cv] = accuracy
437 |                     allData['waccBest'][cv,:] = waccuracy
438 |                     allData['aucBest'][cv,:] = auc  
439 |                 allData['bestPred'][cv] = predictions
440 |                 allData['bestPredMC'][cv] = predictions_mc
441 |                 allData['targets'][cv] = targets 
442 |                 print("Pred shape",predictions.shape,"Tar shape",targets.shape)
443 |             if 'testInd' in mdlParams:        
444 |                 loss, accuracy, sensitivity, specificity, conf_matrix, f1, auc, waccuracy, predictions, targets, predictions_mc = utils.getErrClassification_mgpu(mdlParams, 'testInd', modelVars)
445 |                 print("Test Results Normal:")
446 |                 print("----------------------------------")
447 |                 print("Loss",np.mean(loss))
448 |                 print("F1 Score",f1)            
449 |                 print("Sensitivity",sensitivity)
450 |                 print("Specificity",specificity)
451 |                 print("Accuracy",accuracy)
452 |                 print("Per Class Accuracy",waccuracy)
453 |                 print("Weighted Accuracy",np.mean(waccuracy))
454 |                 print("AUC",auc)
455 |                 print("Mean AUC", np.mean(auc))  
456 |                 # Save results in dict
457 |                 allData['f1Best'][cv] = f1
458 |                 allData['sensBest'][cv,:] = sensitivity
459 |                 allData['specBest'][cv,:] = specificity
460 |                 allData['accBest'][cv] = accuracy
461 |                 allData['waccBest'][cv,:] = waccuracy
462 |                 allData['aucBest'][cv,:] = auc    
463 |         else:
464 |             # TODO: Regression
465 |             print("Not Implemented")            
466 | # If there is an 8th argument, make extra evaluation for external set
467 | if len(sys.argv) > 8:
468 |     for cv in range(mdlParams['numCV']):
469 |             # Reset model graph 
470 |             importlib.reload(models)
471 |             #importlib.reload(torchvision)
472 |             # Collect model variables
473 |             modelVars = {}
474 |             modelVars['device'] = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")    
475 |             # define new folder, take care that there might be no labels
476 |             print("Creating predictions for path ",sys.argv[8])
477 |             # Add meta data
478 |             if mdlParams.get('meta_features',None) is not None:
479 |                 mdlParams['meta_dict'] = {}
480 |                 path1 = mdlParams['dataDir'] + '/meta_data/test_rez3_ll/meta_data_test.pkl'
481 |                 # Open and load
482 |                 with open(path1,'rb') as f:
483 |                     meta_data = pickle.load(f)
484 |                 # Write into dict
485 |                 for k in range(len(meta_data['im_name'])):
486 |                     feature_vector = []
487 |                     if 'age_oh' in mdlParams['meta_features']:
488 |                         if mdlParams['encode_nan']:
489 |                             feature_vector.append(meta_data['age_oh'][k,:])
490 |                         else:
491 |                             feature_vector.append(meta_data['age_oh'][k,1:])
492 |                     if 'age_num' in mdlParams['meta_features']:
493 |                         feature_vector.append(np.array([meta_data['age_num'][k]]))                      
494 |                     if 'loc_oh' in mdlParams['meta_features']:
495 |                         if mdlParams['encode_nan']:
496 |                             feature_vector.append(meta_data['loc_oh'][k,:])
497 |                         else:
498 |                             feature_vector.append(meta_data['loc_oh'][k,1:])
499 |                     if 'sex_oh' in mdlParams['meta_features']:
500 |                         if mdlParams['encode_nan']:
501 |                             feature_vector.append(meta_data['sex_oh'][k,:])
502 |                         else:
503 |                             feature_vector.append(meta_data['sex_oh'][k,1:]) 
504 | 
505 |                     #print(feature_vector) 
506 |                     feature_vector = np.concatenate(feature_vector,axis=0)
507 |                     #print("feature vector shape",feature_vector.shape)                                                
508 |                     mdlParams['meta_dict'][meta_data['im_name'][k]] = feature_vector                  
509 |             # Define the path
510 |             path1 = sys.argv[8]
511 |             # All files in that set
512 |             files = sorted(glob(path1+'/*'))
513 |             # Define new paths
514 |             mdlParams['im_paths'] = []
515 |             mdlParams['meta_list'] = []
516 |             for j in range(len(files)):
517 |                 inds = [int(s) for s in re.findall(r'\d+',files[j])]
518 |                 if 'ISIC_' in files[j]:
519 |                     mdlParams['im_paths'].append(files[j])
520 |                     if mdlParams.get('meta_features',None) is not None:
521 |                         for key in mdlParams['meta_dict']:
522 |                             if key in files[j]:
523 |                                 mdlParams['meta_list'].append(mdlParams['meta_dict'][key])       
524 |             if mdlParams.get('meta_features',None) is not None:
525 |                 # Meta data
526 |                 mdlParams['meta_array'] = np.array(mdlParams['meta_list'])                
527 |             # Add empty labels
528 |             mdlParams['labels_array'] = np.zeros([len(mdlParams['im_paths']),mdlParams['numClasses']],dtype=np.float32)
529 |             # Define everything as a valind set
530 |             mdlParams['valInd'] = np.array(np.arange(len(mdlParams['im_paths'])))
531 |             mdlParams['trainInd'] = mdlParams['valInd']
532 |             if mdlParams.get('var_im_size',False):
533 |                 # Crop positions, always choose multiCropEval to be 4, 9, 16, 25, etc.
534 |                 mdlParams['cropPositions'] = np.zeros([len(mdlParams['im_paths']),mdlParams['multiCropEval'],2],dtype=np.int64)
535 |                 #mdlParams['imSizes'] = np.zeros([len(mdlParams['im_paths']),mdlParams['multiCropEval'],2],dtype=np.int64)
536 |                 for u in range(len(mdlParams['im_paths'])):
537 |                     height, width = imagesize.get(mdlParams['im_paths'][u])
538 |                     if width < mdlParams['input_size'][0]:
539 |                         height = int(mdlParams['input_size'][0]/float(width))*height
540 |                         width = mdlParams['input_size'][0]
541 |                     if height < mdlParams['input_size'][0]:
542 |                         width = int(mdlParams['input_size'][0]/float(height))*width
543 |                         height = mdlParams['input_size'][0]     
544 |                     if mdlParams.get('resize_large_ones') is not None:
545 |                         if width == mdlParams['large_size'] and height == mdlParams['large_size']:
546 |                             width, height = (mdlParams['resize_large_ones'],mdlParams['resize_large_ones'])                
547 |                     ind = 0
548 |                     for i in range(np.int32(np.sqrt(mdlParams['multiCropEval']))):
549 |                         for j in range(np.int32(np.sqrt(mdlParams['multiCropEval']))):
550 |                             mdlParams['cropPositions'][u,ind,0] = mdlParams['input_size'][0]/2+i*((width-mdlParams['input_size'][1])/(np.sqrt(mdlParams['multiCropEval'])-1))
551 |                             mdlParams['cropPositions'][u,ind,1] = mdlParams['input_size'][1]/2+j*((height-mdlParams['input_size'][0])/(np.sqrt(mdlParams['multiCropEval'])-1))
552 |                             #mdlParams['imSizes'][u,ind,0] = curr_im_size[0]
553 | 
554 |                             ind += 1
555 |                 # Sanity checks
556 |                 #print("Positions",mdlParams['cropPositions'])
557 |                 # Test image sizes
558 |                 test_im = np.zeros(mdlParams['input_size_load'])
559 |                 height = mdlParams['input_size'][0]
560 |                 width = mdlParams['input_size'][1]
561 |                 for u in range(len(mdlParams['im_paths'])):                     
562 |                     height_test, width_test = imagesize.get(mdlParams['im_paths'][u])
563 |                     if width_test < mdlParams['input_size'][0]:
564 |                         height_test = int(mdlParams['input_size'][0]/float(width_test))*height_test
565 |                         width_test = mdlParams['input_size'][0]
566 |                     if height_test < mdlParams['input_size'][0]:
567 |                         width_test = int(mdlParams['input_size'][0]/float(height_test))*width_test
568 |                         height_test = mdlParams['input_size'][0]     
569 |                     if mdlParams.get('resize_large_ones') is not None:
570 |                         if width_test == mdlParams['large_size'] and height_test == mdlParams['large_size']:
571 |                             width_test, height_test = (mdlParams['resize_large_ones'],mdlParams['resize_large_ones'])                                   
572 |                     test_im = np.zeros([width_test,height_test]) 
573 |                     for i in range(mdlParams['multiCropEval']):
574 |                         im_crop = test_im[np.int32(mdlParams['cropPositions'][u,i,0]-height/2):np.int32(mdlParams['cropPositions'][u,i,0]-height/2)+height,np.int32(mdlParams['cropPositions'][u,i,1]-width/2):np.int32(mdlParams['cropPositions'][u,i,1]-width/2)+width]
575 |                         if im_crop.shape[0] != mdlParams['input_size'][0]:
576 |                             print("Wrong shape",im_crop.shape[0],mdlParams['im_paths'][u])    
577 |                         if im_crop.shape[1] != mdlParams['input_size'][1]:
578 |                             print("Wrong shape",im_crop.shape[1],mdlParams['im_paths'][u])                 
579 |             mdlParams['saveDir'] = mdlParams['saveDirBase'] + '/CVSet' + str(cv)
580 |             # balance classes
581 |             if mdlParams['balance_classes'] < 3 or mdlParams['balance_classes'] == 7 or mdlParams['balance_classes'] == 11:
582 |                 class_weights = class_weight.compute_class_weight('balanced',np.unique(np.argmax(mdlParams['labels_array'][mdlParams['trainInd'],:],1)),np.argmax(mdlParams['labels_array'][mdlParams['trainInd'],:],1)) 
583 |                 print("Current class weights",class_weights)
584 |                 class_weights = class_weights*mdlParams['extra_fac']
585 |                 print("Current class weights with extra",class_weights)             
586 |             elif mdlParams['balance_classes'] == 3 or mdlParams['balance_classes'] == 4:
587 |                 # Split training set by classes
588 |                 not_one_hot = np.argmax(mdlParams['labels_array'],1)
589 |                 mdlParams['class_indices'] = []
590 |                 for i in range(mdlParams['numClasses']):
591 |                     mdlParams['class_indices'].append(np.where(not_one_hot==i)[0])
592 |                     # Kick out non-trainind indices
593 |                     mdlParams['class_indices'][i] = np.setdiff1d(mdlParams['class_indices'][i],mdlParams['valInd'])
594 |                     #print("Class",i,mdlParams['class_indices'][i].shape,np.min(mdlParams['class_indices'][i]),np.max(mdlParams['class_indices'][i]),np.sum(mdlParams['labels_array'][np.int64(mdlParams['class_indices'][i]),:],0))        
595 |             elif mdlParams['balance_classes'] == 5 or mdlParams['balance_classes'] == 6 or mdlParams['balance_classes'] == 13:
596 |                 # Other class balancing loss
597 |                 class_weights = 1.0/np.mean(mdlParams['labels_array'][mdlParams['trainInd'],:],axis=0)
598 |                 print("Current class weights",class_weights) 
599 |                 class_weights = class_weights*mdlParams['extra_fac']
600 |                 print("Current class weights with extra",class_weights) 
601 |             elif mdlParams['balance_classes'] == 9:
602 |                 # Only use official indicies for calculation
603 |                 print("Balance 9")
604 |                 indices_ham = mdlParams['trainInd'][mdlParams['trainInd'] < 25331]
605 |                 if mdlParams['numClasses'] == 9:
606 |                     class_weights_ = 1.0/np.mean(mdlParams['labels_array'][indices_ham,:8],axis=0)
607 |                     #print("class before",class_weights_)
608 |                     class_weights = np.zeros([mdlParams['numClasses']])
609 |                     class_weights[:8] = class_weights_
610 |                     class_weights[-1] = np.max(class_weights_)
611 |                 else:
612 |                     class_weights = 1.0/np.mean(mdlParams['labels_array'][indices_ham,:],axis=0)
613 |                 print("Current class weights",class_weights)             
614 |                 if isinstance(mdlParams['extra_fac'], float):
615 |                     class_weights = np.power(class_weights,mdlParams['extra_fac'])
616 |                 else:
617 |                     class_weights = class_weights*mdlParams['extra_fac']
618 |                 print("Current class weights with extra",class_weights) 
619 | 
620 | 
621 |             # Set up dataloaders
622 |             # Meta scaler
623 |             if mdlParams.get('meta_features',None) is not None and mdlParams['scale_features']:
624 |                 mdlParams['feature_scaler_meta'] = sklearn.preprocessing.StandardScaler().fit(mdlParams['meta_array'][mdlParams['trainInd'],:])  
625 |                 #print("scaler mean",mdlParams['feature_scaler_meta'].mean_,"var",mdlParams['feature_scaler_meta'].var_)              
626 |             # For train
627 |             dataset_train = utils.ISICDataset(mdlParams, 'trainInd')
628 |             # For val
629 |             dataset_val = utils.ISICDataset(mdlParams, 'valInd')
630 |             if mdlParams['multiCropEval'] > 0:
631 |                 modelVars['dataloader_valInd'] = DataLoader(dataset_val, batch_size=mdlParams['multiCropEval'], shuffle=False, num_workers=8, pin_memory=True)  
632 |             else:
633 |                 modelVars['dataloader_valInd'] = DataLoader(dataset_val, batch_size=mdlParams['batchSize'], shuffle=False, num_workers=8, pin_memory=True)               
634 |             modelVars['dataloader_trainInd'] = DataLoader(dataset_train, batch_size=mdlParams['batchSize'], shuffle=True, num_workers=8, pin_memory=True)
635 |                     
636 | 
637 |             # Define model 
638 |             modelVars['model'] = models.getModel(mdlParams)()             
639 |             if 'Dense' in mdlParams['model_type']:
640 |                 if mdlParams['input_size'][0] != 224:
641 |                     modelVars['model'] = utils.modify_densenet_avg_pool(modelVars['model'])
642 |                     #print(modelVars['model'])
643 |                 num_ftrs = modelVars['model'].classifier.in_features
644 |                 modelVars['model'].classifier = nn.Linear(num_ftrs, mdlParams['numClasses'])
645 |                 #print(modelVars['model'])
646 |             elif 'dpn' in mdlParams['model_type']:
647 |                 num_ftrs = modelVars['model'].classifier.in_channels
648 |                 modelVars['model'].classifier = nn.Conv2d(num_ftrs,mdlParams['numClasses'],[1,1])
649 |                 #modelVars['model'].add_module('real_classifier',nn.Linear(num_ftrs, mdlParams['numClasses']))
650 |                 #print(modelVars['model'])
651 |             elif 'efficient' in mdlParams['model_type']:
652 |                 # Do nothing, output is prepared
653 |                 num_ftrs = modelVars['model']._fc.in_features
654 |                 modelVars['model']._fc = nn.Linear(num_ftrs, mdlParams['numClasses'])    
655 |             elif 'wsl' in mdlParams['model_type']:
656 |                 num_ftrs = modelVars['model'].fc.in_features
657 |                 modelVars['model'].fc = nn.Linear(num_ftrs, mdlParams['numClasses'])          
658 |             else:
659 |                 num_ftrs = modelVars['model'].last_linear.in_features
660 |                 modelVars['model'].last_linear = nn.Linear(num_ftrs, mdlParams['numClasses'])   
661 |             # modify model
662 |             if mdlParams.get('meta_features',None) is not None:
663 |                 modelVars['model'] = models.modify_meta(mdlParams,modelVars['model'])  
664 |             modelVars['model']  = modelVars['model'].to(modelVars['device'])
665 |             #summary(modelVars['model'], (mdlParams['input_size'][2], mdlParams['input_size'][0], mdlParams['input_size'][1]))
666 |             # Loss, with class weighting
667 |             # Loss, with class weighting
668 |             if mdlParams['balance_classes'] == 3 or mdlParams['balance_classes'] == 0 or mdlParams['balance_classes'] == 12:
669 |                 modelVars['criterion'] = nn.CrossEntropyLoss()
670 |             elif mdlParams['balance_classes'] == 8:
671 |                 modelVars['criterion'] = nn.CrossEntropyLoss(reduce=False)
672 |             elif mdlParams['balance_classes'] == 6 or mdlParams['balance_classes'] == 7:
673 |                 modelVars['criterion'] = nn.CrossEntropyLoss(weight=torch.cuda.FloatTensor(class_weights.astype(np.float32)),reduce=False)
674 |             elif mdlParams['balance_classes'] == 10:
675 |                 modelVars['criterion'] = utils.FocalLoss(mdlParams['numClasses'])
676 |             elif mdlParams['balance_classes'] == 11:
677 |                 modelVars['criterion'] = utils.FocalLoss(mdlParams['numClasses'],alpha=torch.cuda.FloatTensor(class_weights.astype(np.float32)))
678 |             else:
679 |                 modelVars['criterion'] = nn.CrossEntropyLoss(weight=torch.cuda.FloatTensor(class_weights.astype(np.float32)))
680 |             # Observe that all parameters are being optimized
681 |             modelVars['optimizer'] = optim.Adam(modelVars['model'].parameters(), lr=mdlParams['learning_rate'])
682 | 
683 |             # Decay LR by a factor of 0.1 every 7 epochs
684 |             modelVars['scheduler'] = lr_scheduler.StepLR(modelVars['optimizer'], step_size=mdlParams['lowerLRAfter'], gamma=1/np.float32(mdlParams['LRstep']))
685 | 
686 |             # Define softmax
687 |             modelVars['softmax'] = nn.Softmax(dim=1)
688 | 
689 |             # Manually find latest chekcpoint, tf.train.latest_checkpoint is doing weird shit
690 |             files = glob(mdlParams['saveDir']+'/*')
691 |             global_steps = np.zeros([len(files)])
692 |             for i in range(len(files)):
693 |                 # Use meta files to find the highest index
694 |                 if 'checkpoint' not in files[i]:
695 |                     continue
696 |                 if mdlParams['ckpt_name'] not in files[i]:
697 |                     continue
698 |                 # Extract global step
699 |                 nums = [int(s) for s in re.findall(r'\d+',files[i])]
700 |                 global_steps[i] = nums[-1]
701 |             # Create path with maximum global step found, if first is not wanted
702 |             global_steps = np.sort(global_steps)
703 |             if mdlParams.get('use_first') is not None:
704 |                 chkPath = mdlParams['saveDir'] + '/' + mdlParams['ckpt_name'] + str(int(global_steps[-2])) + '.pt'
705 |             else:
706 |                 chkPath = mdlParams['saveDir'] + '/' + mdlParams['ckpt_name'] + str(int(np.max(global_steps))) + '.pt'
707 |             print("Restoring: ",chkPath)
708 |             
709 |             # Load
710 |             state = torch.load(chkPath)
711 |             # Initialize model and optimizer
712 |             modelVars['model'].load_state_dict(state['state_dict'])
713 |             #modelVars['optimizer'].load_state_dict(state['optimizer'])  
714 |             # Get predictions or learn on pred
715 |             modelVars['model'].eval()    
716 |             # Get predictions
717 |             # Turn off the skipping of the last class
718 |             mdlParams['no_c9_eval'] = False
719 |             loss, accuracy, sensitivity, specificity, conf_matrix, f1, auc, waccuracy, predictions, targets, predictions_mc = utils.getErrClassification_mgpu(mdlParams, 'valInd', modelVars)
720 |             # Save predictions            
721 |             allData['extPred'][cv] = predictions
722 |             print("extPred shape",allData['extPred'][cv].shape)
723 |             pklFileName = sys.argv[2] + "_" + sys.argv[6] + "_" + str(int(np.max(global_steps))) + "_predn.pkl"
724 | 
725 | # Mean results over all folds
726 | np.set_printoptions(precision=4)
727 | print("-------------------------------------------------")
728 | print("Mean over all Folds")
729 | print("-------------------------------------------------")
730 | print("F1 Score",np.array([np.mean(allData['f1Best'])]),"+-",np.array([np.std(allData['f1Best'])]))       
731 | print("Sensitivtiy",np.mean(allData['sensBest'],0),"+-",np.std(allData['sensBest'],0))  
732 | print("Specificity",np.mean(allData['specBest'],0),"+-",np.std(allData['specBest'],0))  
733 | print("Mean Specificity",np.array([np.mean(allData['specBest'])]),"+-",np.array([np.std(np.mean(allData['specBest'],1))]))  
734 | print("Accuracy",np.array([np.mean(allData['accBest'])]),"+-",np.array([np.std(allData['accBest'])]))  
735 | print("Per Class Accuracy",np.mean(allData['waccBest'],0),"+-",np.std(allData['waccBest'],0))
736 | print("Weighted Accuracy",np.array([np.mean(allData['waccBest'])]),"+-",np.array([np.std(np.mean(allData['waccBest'],1))])) 
737 | print("AUC",np.mean(allData['aucBest'],0),"+-",np.std(allData['aucBest'],0))    
738 | print("Mean AUC",np.array([np.mean(allData['aucBest'])]),"+-",np.array([np.std(np.mean(allData['aucBest'],1))]))      
739 | # Save dict with results
740 | with open(mdlParams['saveDirBase'] + "/" + pklFileName, 'wb') as f:
741 |     pickle.dump(allData, f, pickle.HIGHEST_PROTOCOL)              
742 | 


--------------------------------------------------------------------------------