├── pc_cfgs ├── __init__.py └── example.py ├── indices_isic2019.pkl ├── Matlab ├── ColorConstancy │ ├── cow2.jpg │ ├── dog3.jpg │ ├── building1.jpg │ ├── building1_cc.jpg │ ├── dilation33.m │ ├── set_border.m │ ├── norm_derivative.m │ ├── ColorConstancyDemo.m │ ├── gDer.m │ ├── fill_border.m │ └── general_cc.m └── adjust_2019.m ├── cfgs ├── 2019 │ ├── test_effb0_ss.py │ ├── test_effb0_rr.py │ ├── test_effb0_ss_meta.py │ └── test_effb0_rr_meta.py └── __pycache__ │ └── __init__.cpython-35.pyc ├── meta_data └── official │ └── meta_data_official.pkl ├── LICENSE ├── README.md ├── auto_augment.py ├── models.py ├── ensemble.py ├── train.py └── eval.py /pc_cfgs/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /indices_isic2019.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngessert/isic2019/HEAD/indices_isic2019.pkl -------------------------------------------------------------------------------- /Matlab/ColorConstancy/cow2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngessert/isic2019/HEAD/Matlab/ColorConstancy/cow2.jpg -------------------------------------------------------------------------------- /Matlab/ColorConstancy/dog3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngessert/isic2019/HEAD/Matlab/ColorConstancy/dog3.jpg -------------------------------------------------------------------------------- /Matlab/ColorConstancy/building1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngessert/isic2019/HEAD/Matlab/ColorConstancy/building1.jpg -------------------------------------------------------------------------------- /Matlab/ColorConstancy/building1_cc.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngessert/isic2019/HEAD/Matlab/ColorConstancy/building1_cc.jpg -------------------------------------------------------------------------------- /cfgs/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngessert/isic2019/HEAD/cfgs/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /meta_data/official/meta_data_official.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ngessert/isic2019/HEAD/meta_data/official/meta_data_official.pkl -------------------------------------------------------------------------------- /pc_cfgs/example.py: -------------------------------------------------------------------------------- 1 | # Empty dict to store machine specific info 2 | mdlParams = {} 3 | # Define machine specific paths 4 | mdlParams['pathBase'] = '/home/Gessert' 5 | -------------------------------------------------------------------------------- /Matlab/ColorConstancy/dilation33.m: -------------------------------------------------------------------------------- 1 | function out = dilation33(in) 2 | 3 | hh=size(in,1); 4 | ll=size(in,2); 5 | out = zeros(hh,ll,3); 6 | out(:,:,1)=[in(2:hh,:); in(hh,:)]; 7 | out(:,:,2)=in; 8 | out(:,:,3)=[in(1,:); in(1:hh-1,:)]; 9 | out2=max(out,[],3); 10 | out(:,:,1)=[out2(:,2:ll), out2(:,ll)]; 11 | out(:,:,2)=out2; 12 | out(:,:,3)=[out2(:,1), out2(:,1:ll-1)]; 13 | out=max(out,[],3); -------------------------------------------------------------------------------- /Matlab/ColorConstancy/set_border.m: -------------------------------------------------------------------------------- 1 | function out=set_border(in,width,method) 2 | %sets border to either zero method=0,or method=1 to average 3 | if nargin<3 4 | method=1; 5 | end 6 | 7 | temp=ones(size(in)); 8 | [y x] = ndgrid(1:size(in,1),1:size(in,2)); 9 | temp=temp.*( (xwidth) ); 10 | temp=temp.*( (ywidth) ); 11 | out=temp.*in; 12 | if method==1 13 | out=out+(sum(out(:))./sum(temp(:))) *(ones(size(in))-temp); 14 | end -------------------------------------------------------------------------------- /Matlab/ColorConstancy/norm_derivative.m: -------------------------------------------------------------------------------- 1 | function [Rw,Gw,Bw]=NormDerivative(in, sigma, order) 2 | 3 | if(nargin<3) order=1; end 4 | 5 | R=in(:,:,1); 6 | G=in(:,:,2); 7 | B=in(:,:,3); 8 | 9 | if(order==1) 10 | Rx=gDer(R,sigma,1,0); 11 | Ry=gDer(R,sigma,0,1); 12 | Rw=sqrt(Rx.^2+Ry.^2); 13 | 14 | Gx=gDer(G,sigma,1,0); 15 | Gy=gDer(G,sigma,0,1); 16 | Gw=sqrt(Gx.^2+Gy.^2); 17 | 18 | Bx=gDer(B,sigma,1,0); 19 | By=gDer(B,sigma,0,1); 20 | Bw=sqrt(Bx.^2+By.^2); 21 | end 22 | 23 | if(order==2) %computes frobius norm 24 | Rxx=gDer(R,sigma,2,0); 25 | Ryy=gDer(R,sigma,0,2); 26 | Rxy=gDer(R,sigma,1,1); 27 | Rw=sqrt(Rxx.^2+4*Rxy.^2+Ryy.^2); 28 | 29 | Gxx=gDer(G,sigma,2,0); 30 | Gyy=gDer(G,sigma,0,2); 31 | Gxy=gDer(G,sigma,1,1); 32 | Gw=sqrt(Gxx.^2+4*Gxy.^2+Gyy.^2); 33 | 34 | Bxx=gDer(B,sigma,2,0); 35 | Byy=gDer(B,sigma,0,2); 36 | Bxy=gDer(B,sigma,1,1); 37 | Bw=sqrt(Bxx.^2+4*Bxy.^2+Byy.^2); 38 | end -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Nils Gessert 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Matlab/ColorConstancy/ColorConstancyDemo.m: -------------------------------------------------------------------------------- 1 | % shows example of illuminant estimation based on Grey-World, Shades of 2 | % Gray, max-RGB, and Grey-Edge algorithm 3 | 4 | 5 | %some example images 6 | input_im=double(imread('building1.jpg')); 7 | %input_im=double(imread('cow2.jpg')); 8 | %input_im=double(imread('dog3.jpg')); 9 | 10 | figure(1);imshow(uint8(input_im)); 11 | title('input image'); 12 | 13 | % Grey-World 14 | [wR,wG,wB,out1]=general_cc(input_im,0,1,0); 15 | figure(2);imshow(uint8(out1)); 16 | title('Grey-World'); 17 | 18 | % max-RGB 19 | [wR,wG,wB,out2]=general_cc(input_im,0,-1,0); 20 | figure(3);imshow(uint8(out2)); 21 | title('max-RGB'); 22 | 23 | % Shades of Grey 24 | mink_norm=5; % any number between 1 and infinity 25 | [wR,wG,wB,out3]=general_cc(input_im,0,mink_norm,0); 26 | figure(4);imshow(uint8(out3)); 27 | title('Shades of Grey'); 28 | 29 | % Grey-Edge 30 | mink_norm=5; % any number between 1 and infinity 31 | sigma=2; % sigma 32 | diff_order=1; % differentiation order (1 or 2) 33 | 34 | [wR,wG,wB,out4]=general_cc(input_im,diff_order,mink_norm,sigma); 35 | figure(5);imshow(uint8(out4)); 36 | title('Grey-Edge'); 37 | -------------------------------------------------------------------------------- /Matlab/ColorConstancy/gDer.m: -------------------------------------------------------------------------------- 1 | function [H]= gDer(f,sigma, iorder,jorder) 2 | 3 | %H = HxRecGauss(f, sigma, sigma, iorder,jorder,3); 4 | %H = HxGaussDerivative2d(f, sigma, iorder,jorder,3); 5 | 6 | %original program 7 | %Initialize the filter 8 | 9 | break_off_sigma = 3.; 10 | filtersize = floor(break_off_sigma*sigma+0.5); 11 | 12 | f=fill_border(f,filtersize); 13 | 14 | x=-filtersize:1:filtersize; 15 | 16 | Gauss=1/(sqrt(2 * pi) * sigma)* exp((x.^2)/(-2 * sigma * sigma) ); 17 | 18 | switch(iorder) 19 | case 0 20 | Gx= Gauss/sum(Gauss); 21 | case 1 22 | Gx = -(x/sigma^2).*Gauss; 23 | Gx = Gx./(sum(sum(x.*Gx))); 24 | case 2 25 | Gx = (x.^2/sigma^4-1/sigma^2).*Gauss; 26 | Gx = Gx-sum(Gx)/size(x,2); 27 | Gx = Gx/sum(0.5*x.*x.*Gx); 28 | end 29 | H = filter2(Gx,f); 30 | 31 | switch(jorder) 32 | case 0 33 | Gy= Gauss/sum(Gauss); 34 | case 1 35 | Gy = -(x/sigma^2).*Gauss; 36 | Gy = Gy./(sum(sum(x.*Gy))); 37 | case 2 38 | Gy = (x.^2/sigma^4-1/sigma^2).*Gauss; 39 | Gy = Gy-sum(Gy)/size(x,2); 40 | Gy = Gy/sum(0.5*x.*x.*Gy); 41 | end 42 | H = filter2(Gy',H); 43 | 44 | H=H(filtersize+1:size(H,1)-filtersize,filtersize+1:size(H,2)-filtersize); -------------------------------------------------------------------------------- /Matlab/ColorConstancy/fill_border.m: -------------------------------------------------------------------------------- 1 | function out=fill_border(in,bw) 2 | 3 | hh=size(in,1); 4 | ww=size(in,2); 5 | dd=size(in,3); 6 | 7 | if(dd==1) 8 | out=zeros(hh+bw*2,ww+bw*2); 9 | 10 | out(1:bw,1:bw)=ones(bw,bw).*in(1,1); 11 | out(bw+hh+1:2*bw+hh,1:bw)=ones(bw,bw).*in(hh,1); 12 | out(1:bw,bw+1+ww:2*bw+ww)=ones(bw,bw).*in(1,ww); 13 | out(bw+hh+1:2*bw+hh,bw+1+ww:2*bw+ww)=ones(bw,bw).*in(hh,ww); 14 | out( bw+1:bw+hh,bw+1:bw+ww )= in; 15 | out(1:bw,bw+1:bw+ww)=ones(bw,1)*in(1,:); 16 | out(bw+hh+1:2*bw+hh,bw+1:bw+ww)=ones(bw,1)*in(hh,:); 17 | out(bw+1:bw+hh,1:bw)=in(:,1)*ones(1,bw); 18 | out(bw+1:bw+hh,bw+ww+1:2*bw+ww)=in(:,ww)*ones(1,bw); 19 | else 20 | out=zeros(hh+bw*2,ww+bw*2,dd); 21 | for(ii=1:dd) 22 | out(1:bw,1:bw,ii)=ones(bw,bw).*in(1,1,ii); 23 | out(bw+hh+1:2*bw+hh,1:bw,ii)=ones(bw,bw).*in(hh,1,ii); 24 | out(1:bw,bw+1+ww:2*bw+ww,ii)=ones(bw,bw).*in(1,ww,ii); 25 | out(bw+hh+1:2*bw+hh,bw+1+ww:2*bw+ww,ii)=ones(bw,bw).*in(hh,ww,ii); 26 | out( bw+1:bw+hh,bw+1:bw+ww,ii )= in(:,:,ii); 27 | out(1:bw,bw+1:bw+ww,ii)=ones(bw,1)*in(1,:,ii); 28 | out(bw+hh+1:2*bw+hh,bw+1:bw+ww,ii)=ones(bw,1)*in(hh,:,ii); 29 | out(bw+1:bw+hh,1:bw,ii)=in(:,1,ii)*ones(1,bw); 30 | out(bw+1:bw+hh,bw+ww+1:2*bw+ww,ii)=in(:,ww,ii)*ones(1,bw); 31 | end 32 | end -------------------------------------------------------------------------------- /Matlab/ColorConstancy/general_cc.m: -------------------------------------------------------------------------------- 1 | % general_cc: estimates the light source of an input_image. 2 | % 3 | % Depending on the parameters the estimation is equal to Grey-Wolrd, Max-RGB, general Grey-World, 4 | % Shades-of-Gray or Grey-Edge algorithm. 5 | % 6 | % SYNOPSIS: 7 | % [white_R ,white_G ,white_B,output_data] = general_cc(input_data,njet,mink_norm,sigma,mask_im) 8 | % 9 | % INPUT : 10 | % input_data : color input image (NxMx3) 11 | % njet : the order of differentiation (range from 0-2). 12 | % mink_norm : minkowski norm used (if mink_norm==-1 then the max 13 | % operation is applied which is equal to minkowski_norm=infinity). 14 | % mask_im : binary images with zeros on image positions which 15 | % should be considered for illuminant estimation. 16 | % OUTPUT: 17 | % [white_R,white_G,white_B] : illuminant color estimation 18 | % output_data : color corrected image 19 | 20 | % LITERATURE : 21 | % 22 | % J. van de Weijer, Th. Gevers, A. Gijsenij 23 | % "Edge-Based Color Constancy" 24 | % IEEE Trans. Image Processing, accepted 2007. 25 | % 26 | % The paper includes references to other Color Constancy algorithms 27 | % included in general_cc.m such as Grey-World, and max-RGB, and 28 | % Shades-of-Gray. 29 | 30 | function [white_R ,white_G ,white_B,output_data] = general_cc(input_data,njet,mink_norm,sigma,mask_im) 31 | 32 | if(nargin<2), njet=0; end 33 | if(nargin<3), mink_norm=1; end 34 | if(nargin<4), sigma=1; end 35 | if(nargin<5), mask_im=zeros(size(input_data,1),size(input_data,2)); end 36 | 37 | % remove all saturated points 38 | saturation_threshold = 255; 39 | mask_im2 = mask_im + (dilation33(double(max(input_data,[],3)>=saturation_threshold))); 40 | mask_im2=double(mask_im2==0); 41 | mask_im2=set_border(mask_im2,sigma+1,0); 42 | % the mask_im2 contains pixels higher saturation_threshold and which are 43 | % not included in mask_im. 44 | 45 | output_data=input_data; 46 | 47 | if(njet==0) 48 | if(sigma~=0) 49 | for ii=1:3 50 | input_data(:,:,ii)=gDer(input_data(:,:,ii),sigma,0,0); 51 | end 52 | end 53 | end 54 | 55 | if(njet>0) 56 | [Rx,Gx,Bx]=norm_derivative(input_data, sigma, njet); 57 | 58 | input_data(:,:,1)=Rx; 59 | input_data(:,:,2)=Gx; 60 | input_data(:,:,3)=Bx; 61 | end 62 | 63 | input_data=abs(input_data); 64 | 65 | if(mink_norm~=-1) % minkowski norm = (1,infinity > 66 | kleur=power(input_data,mink_norm); 67 | white_R = power(sum(sum(kleur(:,:,1).*mask_im2)),1/mink_norm); 68 | white_G = power(sum(sum(kleur(:,:,2).*mask_im2)),1/mink_norm); 69 | white_B = power(sum(sum(kleur(:,:,3).*mask_im2)),1/mink_norm); 70 | 71 | som=sqrt(white_R^2+white_G^2+white_B^2); 72 | 73 | white_R=white_R/som; 74 | white_G=white_G/som; 75 | white_B=white_B/som; 76 | else %minkowski-norm is infinit: Max-algorithm 77 | R=input_data(:,:,1); 78 | G=input_data(:,:,2); 79 | B=input_data(:,:,3); 80 | 81 | white_R=max(R(:).*mask_im2(:)); 82 | white_G=max(G(:).*mask_im2(:)); 83 | white_B=max(B(:).*mask_im2(:)); 84 | 85 | som=sqrt(white_R^2+white_G^2+white_B^2); 86 | 87 | white_R=white_R/som; 88 | white_G=white_G/som; 89 | white_B=white_B/som; 90 | end 91 | output_data(:,:,1)=output_data(:,:,1)/(white_R*sqrt(3)); 92 | output_data(:,:,2)=output_data(:,:,2)/(white_G*sqrt(3)); 93 | output_data(:,:,3)=output_data(:,:,3)/(white_B*sqrt(3)); -------------------------------------------------------------------------------- /Matlab/adjust_2019.m: -------------------------------------------------------------------------------- 1 | pathImSrc = '\isic\2019\ISIC_2019_Training_Input'; 2 | pathImTar = '\isic\2019\official'; 3 | %pathImCheck = '\isic\2019\official_check'; 4 | fold = dir(pathImSrc); 5 | std_size = [450,600]; 6 | preserve_ratio = true; 7 | preserve_size = 600; 8 | crop_black = true; 9 | margin = 0.1; 10 | thresh = 0.3; 11 | resize = true; 12 | use_cc = true; 13 | write_png = false; 14 | write = true; 15 | ind = 1; 16 | all_heights = 0; 17 | all_width = 0; 18 | %initialize 19 | use_cropping = false; 20 | for i=3:length(fold) 21 | try 22 | im = imread([pathImSrc '\' fold(i).name]); 23 | catch 24 | disp(['Image ' fold(i).name ' failed.']) 25 | continue 26 | end 27 | if crop_black 28 | lvl = graythresh(rgb2gray(im)); 29 | BW = imbinarize(imgaussfilt(rgb2gray(im),2),lvl*0.2); 30 | stats = regionprops('table',BW,'Centroid',... 31 | 'MajorAxisLength','MinorAxisLength'); 32 | if size(stats,1) > 0 33 | diameters = mean([stats.MajorAxisLength stats.MinorAxisLength],2); 34 | [diameter_srt,srt_ind] = sort(diameters,'descend'); 35 | %[diameter,ind] = max(diameters); 36 | radius = diameter_srt(1)/2; 37 | center = stats.Centroid(srt_ind(1),:); 38 | % define box 39 | x_min = int32(center(2)-radius+margin*radius); 40 | x_max = int32(center(2)+radius-margin*radius); 41 | y_min = int32(center(1)-radius+margin*radius); 42 | y_max = int32(center(1)+radius-margin*radius); 43 | use_cropping = true; 44 | else 45 | use_cropping = false; 46 | end 47 | if x_min < 1 || x_max > size(im,1) || y_min < 1 || y_max > size(im,2) 48 | if length(diameter_srt) > 1 49 | % try second largest 50 | radius = diameter_srt(2)/2; 51 | center = stats.Centroid(srt_ind(2),:); 52 | % define box 53 | x_min = int32(center(2)-radius+margin*radius); 54 | x_max = int32(center(2)+radius-margin*radius); 55 | y_min = int32(center(1)-radius+margin*radius); 56 | y_max = int32(center(1)+radius-margin*radius); 57 | if x_min < 1 || x_max > size(im,1) || y_min < 1 || y_max > size(im,2) 58 | use_cropping = false; 59 | end 60 | else 61 | use_cropping = false; 62 | end 63 | end 64 | if use_cropping 65 | mean_inside = mean(im(x_min:x_max,y_min:y_max,:),'all'); 66 | mean_outside = (mean(im(1:x_min,:,:),'all')+mean(im(x_min:x_max,1:y_min,:),'all')+mean(im(x_max:end,:,:),'all')+mean(im(x_min:x_max,y_max:end,:),'all'))/4; 67 | if mean_outside/mean_inside > thresh 68 | use_cropping = false; 69 | end 70 | end 71 | if use_cropping 72 | %imwrite(im,[pathImCheck '\' fold(i).name]); 73 | im = im(x_min:x_max,y_min:y_max,:); 74 | %imwrite(im,[pathImCheck '\' replace(fold(i).name,'.jpg','_c.jpg')]); 75 | %disp([fold(i).name ' cropped.']) 76 | end 77 | end 78 | %all_heights(ind) = size(im,1); 79 | %all_width(ind) = size(im,2); 80 | %ind = ind+1; 81 | % resize? 82 | if resize 83 | if preserve_ratio 84 | % long side is resized to target size 85 | if size(im,1) > size(im,2) 86 | im = permute(im,[2,1,3]); 87 | end 88 | if size(im,2) ~= preserve_size 89 | ratio = preserve_size/size(im,2); 90 | %disp(['Before ' mat2str(size(im))]) 91 | im = imresize(im,[int32(round(size(im,1)*ratio)),preserve_size]); 92 | %disp(['After ' mat2str(size(im))]) 93 | end 94 | else 95 | if size(im,1) > size(im,2) 96 | im = permute(im,[2,1,3]); 97 | end 98 | if size(im,1) ~= std_size(1) || size(im,2) ~= std_size(2) 99 | im = imresize(im,std_size); 100 | end 101 | end 102 | end 103 | % cc 104 | if use_cc 105 | [~,~,~,im_new]=general_cc(double(im),0,6,0); 106 | im_new = uint8(im_new); 107 | else 108 | im_new = im; 109 | end 110 | if write 111 | if write_png 112 | imwrite(im_new,[pathImTar '\' replace(fold(i).name,'.jpg','.png')]); 113 | else 114 | imwrite(im_new,[pathImTar '\' fold(i).name],'Quality',100); 115 | end 116 | end 117 | if mod(i,1000) == 0 118 | disp(i) 119 | end 120 | end -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Skin Lesion Classification Using Ensembles of Multi-Resolution EfficientNets with Meta Data 2 | 3 | Code for team DAISYLab's participation in the ([ISIC 2019 challenge](https://challenge2019.isic-archive.com/)). 4 | 5 | We achieved first place in both tasks: ([Leaderboards](https://challenge2019.isic-archive.com/leaderboard.html)). 6 | 7 | Arxiv paper: https://arxiv.org/abs/1910.03910 8 | 9 | Please cite our MethodsX article if you make use of our work: https://doi.org/10.1016/j.mex.2020.100864 10 | 11 | ### Usage 12 | 13 | Here, we explain the basic usage of our code. Note that we used additional datasets that need to be prepared in a similar way. Most of it is based on our [last year's approach](https://github.com/ngessert/isic2018). 14 | 15 | ### Data and Path Preparation 16 | 17 | The images' and labels' directory strucutre should look like this: /isic2019/images/official/ISIC_0024306.jpg and /isic2019/labels/official/labels.csv. The labels in the CSV file should be structured as follows: first column contains the image ID ("ISIC_0024306"), then the one-hot encoded labels follow. 18 | 19 | Other datasets such as the 7-point dataset need to be formatted in a similar way. I.e. there needs to be a "sevenpoint" folder (instead of "official") for the images and a "sevepoint" folder for the labels with the properly fromatted label files. 20 | 21 | Our split for training/validation with 5-Fold CV is included in the "indices_isic2019.pkl" file. This should be placed in the same directory as /isic2019. Note that we do not use a test set. 22 | 23 | In pc_cfgs we include an example for a machine specific cfg. Here, the base folder can be adjusted for different machines. 24 | 25 | In the cfgs folder, there example configs. You can swap out models by using the names given in models.py. For the EfficientNets we used the recommended resolution from the paper --> https://github.com/lukemelas/EfficientNet-PyTorch. 26 | 27 | When training a model with additional meta data, you need the prepared meta data file in the meta_data folder. The meta_data folder is structured similar to the images or labels folder (one subfolder for each dataset). 28 | 29 | ### Training a model 30 | 31 | We included two example config files for full training and 5-Fold CV. More details on the different options, e.g. for balancing and cropping, are given in the paper. To start training, run: `python train.py example 2019.test_effb0_ss gpu0` 32 | 33 | gpu0 indicates the number of the GPU that should be used for training. This is helpful for machines with multiple GPUs. 34 | 35 | ### Evaluate a model 36 | 37 | For model evaluation, there are multiple options. First, a 5-Fold CV model can be evaluated on each held out split. For evaluation of same-sized cropping model (see paper for explanation), run: `python eval.py example 2019.test_effb0_ss multiorder36 average NONE bestgpu0` 38 | 39 | `multiorder36` indicates that ordered, multi-crop evaluation with 36 crops should be performed. Always use 9, 16, 25, 36, 49... etc. number of crops. `average` indicates the predictions should be averaged over the crops (can also be `vote`). `best` indicates that the best model obtained during training should be used. Can be `last` to use the last model saved. 40 | 41 | When evaluating a model with random-resize option (see paper for explanation), run this instead: `python eval.py example 2019.test_effb0_rr multideterm1sc4f4 average NONE bestgpu0` 42 | 43 | If final predictions on new, unlabeled images should be performed, add the path to said images at the end of the evaluation call: `python eval.py example 2019.test_effb0_ss multiorder36 average NONE bestgpu0 NONE /home/Gessert/data/isic/isic2019/images/Test` 44 | 45 | Each evaluation run generates a pkl file that can be used for further ensemble aggregation. 46 | 47 | ### Construct an Ensemble 48 | 49 | Testing ensembles is also split into two parts. First, an ensemble can be constructed based on 5-Fold CV error and the corresponding best models are saved. Then, the final predictions on a new dataset can be made using the generated files from the evaluation section. 50 | 51 | For 5-Fold CV performance assessment, run: `python ensemble.py /path/to/evaluation/files evalexhaust15 /path/to/file/best_models.pkl` 52 | The first path indicates the location where all evaluation pkl files are located. `evalexhaust15`: `eval` indicates that 5-Fold CV evaluation is desired. `exhaust15` indicates that the top 15 performing models should be tested for their optimal combination. I.e., every possible combination (average predictions) of those models is tested for the best performance. Without the exhaust option, only the top N combinations are considered, i.e., the tested combinations are: top1 model, top1+top2 model, top1+top2+top3 model, etc. The last argument indicates the path where the best performing combination is saved. 53 | 54 | For generation of new predictions for unlabeled data, run: `python ensemble.py /path/to/evaluation/files best /path/to/file/best_models.pkl /path/to/predictions.csv /path/to/image/files` 55 | `best` indicates that only the models with best in the name should be considered. This relates to the evaluation where either the best performing model or the last checkpoint can be used for generation. This can be `last` or `bestlast` for both. The next argument is the path to the file that was generated in the first ensemble run. This can just be `NONE` if all models should be included. The next argument is the path to the CSV file that should contain the predictions. The last argument is the path to the image files which is used to match the predictions to image file names. 56 | -------------------------------------------------------------------------------- /auto_augment.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy as np 3 | import scipy 4 | from scipy import ndimage 5 | from PIL import Image, ImageEnhance, ImageOps 6 | 7 | #See: https://github.com/4uiiurz1/pytorch-auto-augment 8 | class AutoAugment(object): 9 | def __init__(self): 10 | self.policies = [ 11 | ['Invert', 0.1, 7, 'Contrast', 0.2, 6], 12 | ['Rotate', 0.7, 2, 'TranslateX', 0.3, 9], 13 | ['Sharpness', 0.8, 1, 'Sharpness', 0.9, 3], 14 | ['ShearY', 0.5, 8, 'TranslateY', 0.7, 9], 15 | ['AutoContrast', 0.5, 8, 'Equalize', 0.9, 2], 16 | ['ShearY', 0.2, 7, 'Posterize', 0.3, 7], 17 | ['Color', 0.4, 3, 'Brightness', 0.6, 7], 18 | ['Sharpness', 0.3, 9, 'Brightness', 0.7, 9], 19 | ['Equalize', 0.6, 5, 'Equalize', 0.5, 1], 20 | ['Contrast', 0.6, 7, 'Sharpness', 0.6, 5], 21 | ['Color', 0.7, 7, 'TranslateX', 0.5, 8], 22 | ['Equalize', 0.3, 7, 'AutoContrast', 0.4, 8], 23 | ['TranslateY', 0.4, 3, 'Sharpness', 0.2, 6], 24 | ['Brightness', 0.9, 6, 'Color', 0.2, 8], 25 | ['Solarize', 0.5, 2, 'Invert', 0, 0.3], 26 | ['Equalize', 0.2, 0, 'AutoContrast', 0.6, 0], 27 | ['Equalize', 0.2, 8, 'Equalize', 0.6, 4], 28 | ['Color', 0.9, 9, 'Equalize', 0.6, 6], 29 | ['AutoContrast', 0.8, 4, 'Solarize', 0.2, 8], 30 | ['Brightness', 0.1, 3, 'Color', 0.7, 0], 31 | ['Solarize', 0.4, 5, 'AutoContrast', 0.9, 3], 32 | ['TranslateY', 0.9, 9, 'TranslateY', 0.7, 9], 33 | ['AutoContrast', 0.9, 2, 'Solarize', 0.8, 3], 34 | ['Equalize', 0.8, 8, 'Invert', 0.1, 3], 35 | ['TranslateY', 0.7, 9, 'AutoContrast', 0.9, 1], 36 | ] 37 | 38 | def __call__(self, img): 39 | img = apply_policy(img, self.policies[random.randrange(len(self.policies))]) 40 | return img 41 | 42 | 43 | operations = { 44 | 'ShearX': lambda img, magnitude: shear_x(img, magnitude), 45 | 'ShearY': lambda img, magnitude: shear_y(img, magnitude), 46 | 'TranslateX': lambda img, magnitude: translate_x(img, magnitude), 47 | 'TranslateY': lambda img, magnitude: translate_y(img, magnitude), 48 | 'Rotate': lambda img, magnitude: rotate(img, magnitude), 49 | 'AutoContrast': lambda img, magnitude: auto_contrast(img, magnitude), 50 | 'Invert': lambda img, magnitude: invert(img, magnitude), 51 | 'Equalize': lambda img, magnitude: equalize(img, magnitude), 52 | 'Solarize': lambda img, magnitude: solarize(img, magnitude), 53 | 'Posterize': lambda img, magnitude: posterize(img, magnitude), 54 | 'Contrast': lambda img, magnitude: contrast(img, magnitude), 55 | 'Color': lambda img, magnitude: color(img, magnitude), 56 | 'Brightness': lambda img, magnitude: brightness(img, magnitude), 57 | 'Sharpness': lambda img, magnitude: sharpness(img, magnitude), 58 | 'Cutout': lambda img, magnitude: cutout(img, magnitude), 59 | } 60 | 61 | 62 | def apply_policy(img, policy): 63 | if random.random() < policy[1]: 64 | img = operations[policy[0]](img, policy[2]) 65 | if random.random() < policy[4]: 66 | img = operations[policy[3]](img, policy[5]) 67 | 68 | return img 69 | 70 | 71 | def transform_matrix_offset_center(matrix, x, y): 72 | o_x = float(x) / 2 + 0.5 73 | o_y = float(y) / 2 + 0.5 74 | offset_matrix = np.array([[1, 0, o_x], [0, 1, o_y], [0, 0, 1]]) 75 | reset_matrix = np.array([[1, 0, -o_x], [0, 1, -o_y], [0, 0, 1]]) 76 | transform_matrix = offset_matrix @ matrix @ reset_matrix 77 | return transform_matrix 78 | 79 | 80 | def shear_x(img, magnitude): 81 | img = np.array(img) 82 | magnitudes = np.linspace(-0.3, 0.3, 11) 83 | 84 | transform_matrix = np.array([[1, random.uniform(magnitudes[magnitude], magnitudes[magnitude+1]), 0], 85 | [0, 1, 0], 86 | [0, 0, 1]]) 87 | transform_matrix = transform_matrix_offset_center(transform_matrix, img.shape[0], img.shape[1]) 88 | affine_matrix = transform_matrix[:2, :2] 89 | offset = transform_matrix[:2, 2] 90 | img = np.stack([ndimage.interpolation.affine_transform( 91 | img[:, :, c], 92 | affine_matrix, 93 | offset) for c in range(img.shape[2])], axis=2) 94 | img = Image.fromarray(img) 95 | return img 96 | 97 | 98 | def shear_y(img, magnitude): 99 | img = np.array(img) 100 | magnitudes = np.linspace(-0.3, 0.3, 11) 101 | 102 | transform_matrix = np.array([[1, 0, 0], 103 | [random.uniform(magnitudes[magnitude], magnitudes[magnitude+1]), 1, 0], 104 | [0, 0, 1]]) 105 | transform_matrix = transform_matrix_offset_center(transform_matrix, img.shape[0], img.shape[1]) 106 | affine_matrix = transform_matrix[:2, :2] 107 | offset = transform_matrix[:2, 2] 108 | img = np.stack([ndimage.interpolation.affine_transform( 109 | img[:, :, c], 110 | affine_matrix, 111 | offset) for c in range(img.shape[2])], axis=2) 112 | img = Image.fromarray(img) 113 | return img 114 | 115 | 116 | def translate_x(img, magnitude): 117 | img = np.array(img) 118 | magnitudes = np.linspace(-150/331, 150/331, 11) 119 | 120 | transform_matrix = np.array([[1, 0, 0], 121 | [0, 1, img.shape[1]*random.uniform(magnitudes[magnitude], magnitudes[magnitude+1])], 122 | [0, 0, 1]]) 123 | transform_matrix = transform_matrix_offset_center(transform_matrix, img.shape[0], img.shape[1]) 124 | affine_matrix = transform_matrix[:2, :2] 125 | offset = transform_matrix[:2, 2] 126 | img = np.stack([ndimage.interpolation.affine_transform( 127 | img[:, :, c], 128 | affine_matrix, 129 | offset) for c in range(img.shape[2])], axis=2) 130 | img = Image.fromarray(img) 131 | return img 132 | 133 | 134 | def translate_y(img, magnitude): 135 | img = np.array(img) 136 | magnitudes = np.linspace(-150/331, 150/331, 11) 137 | 138 | transform_matrix = np.array([[1, 0, img.shape[0]*random.uniform(magnitudes[magnitude], magnitudes[magnitude+1])], 139 | [0, 1, 0], 140 | [0, 0, 1]]) 141 | transform_matrix = transform_matrix_offset_center(transform_matrix, img.shape[0], img.shape[1]) 142 | affine_matrix = transform_matrix[:2, :2] 143 | offset = transform_matrix[:2, 2] 144 | img = np.stack([ndimage.interpolation.affine_transform( 145 | img[:, :, c], 146 | affine_matrix, 147 | offset) for c in range(img.shape[2])], axis=2) 148 | img = Image.fromarray(img) 149 | return img 150 | 151 | 152 | def rotate(img, magnitude): 153 | img = np.array(img) 154 | magnitudes = np.linspace(-30, 30, 11) 155 | theta = np.deg2rad(random.uniform(magnitudes[magnitude], magnitudes[magnitude+1])) 156 | transform_matrix = np.array([[np.cos(theta), -np.sin(theta), 0], 157 | [np.sin(theta), np.cos(theta), 0], 158 | [0, 0, 1]]) 159 | transform_matrix = transform_matrix_offset_center(transform_matrix, img.shape[0], img.shape[1]) 160 | affine_matrix = transform_matrix[:2, :2] 161 | offset = transform_matrix[:2, 2] 162 | img = np.stack([ndimage.interpolation.affine_transform( 163 | img[:, :, c], 164 | affine_matrix, 165 | offset) for c in range(img.shape[2])], axis=2) 166 | img = Image.fromarray(img) 167 | return img 168 | 169 | 170 | def auto_contrast(img, magnitude): 171 | img = ImageOps.autocontrast(img) 172 | return img 173 | 174 | 175 | def invert(img, magnitude): 176 | img = ImageOps.invert(img) 177 | return img 178 | 179 | 180 | def equalize(img, magnitude): 181 | img = ImageOps.equalize(img) 182 | return img 183 | 184 | 185 | def solarize(img, magnitude): 186 | magnitudes = np.linspace(0, 256, 11) 187 | img = ImageOps.solarize(img, random.uniform(magnitudes[magnitude], magnitudes[magnitude+1])) 188 | return img 189 | 190 | 191 | def posterize(img, magnitude): 192 | magnitudes = np.linspace(4, 8, 11) 193 | img = ImageOps.posterize(img, int(round(random.uniform(magnitudes[magnitude], magnitudes[magnitude+1])))) 194 | return img 195 | 196 | 197 | def contrast(img, magnitude): 198 | magnitudes = np.linspace(0.1, 1.9, 11) 199 | img = ImageEnhance.Contrast(img).enhance(random.uniform(magnitudes[magnitude], magnitudes[magnitude+1])) 200 | return img 201 | 202 | 203 | def color(img, magnitude): 204 | magnitudes = np.linspace(0.1, 1.9, 11) 205 | img = ImageEnhance.Color(img).enhance(random.uniform(magnitudes[magnitude], magnitudes[magnitude+1])) 206 | return img 207 | 208 | 209 | def brightness(img, magnitude): 210 | magnitudes = np.linspace(0.1, 1.9, 11) 211 | img = ImageEnhance.Brightness(img).enhance(random.uniform(magnitudes[magnitude], magnitudes[magnitude+1])) 212 | return img 213 | 214 | 215 | def sharpness(img, magnitude): 216 | magnitudes = np.linspace(0.1, 1.9, 11) 217 | img = ImageEnhance.Sharpness(img).enhance(random.uniform(magnitudes[magnitude], magnitudes[magnitude+1])) 218 | return img 219 | 220 | 221 | def cutout(org_img, magnitude=None): 222 | img = np.array(img) 223 | 224 | magnitudes = np.linspace(0, 60/331, 11) 225 | 226 | img = np.copy(org_img) 227 | mask_val = img.mean() 228 | 229 | if magnitude is None: 230 | mask_size = 16 231 | else: 232 | mask_size = int(round(img.shape[0]*random.uniform(magnitudes[magnitude], magnitudes[magnitude+1]))) 233 | top = np.random.randint(0 - mask_size//2, img.shape[0] - mask_size) 234 | left = np.random.randint(0 - mask_size//2, img.shape[1] - mask_size) 235 | bottom = top + mask_size 236 | right = left + mask_size 237 | 238 | if top < 0: 239 | top = 0 240 | if left < 0: 241 | left = 0 242 | 243 | img[top:bottom, left:right, :].fill(mask_val) 244 | 245 | img = Image.fromarray(img) 246 | 247 | return img 248 | 249 | 250 | 251 | class Cutout(object): 252 | def __init__(self, length=16): 253 | self.length = length 254 | 255 | def __call__(self, img): 256 | img = np.array(img) 257 | 258 | mask_val = img.mean() 259 | 260 | top = np.random.randint(0 - self.length//2, img.shape[0] - self.length) 261 | left = np.random.randint(0 - self.length//2, img.shape[1] - self.length) 262 | bottom = top + self.length 263 | right = left + self.length 264 | 265 | top = 0 if top < 0 else top 266 | left = 0 if left < 0 else top 267 | 268 | img[top:bottom, left:right, :] = mask_val 269 | 270 | img = Image.fromarray(img) 271 | 272 | return img -------------------------------------------------------------------------------- /models.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numbers 3 | import numpy as np 4 | import functools 5 | import h5py 6 | import math 7 | from torchvision import models 8 | import pretrainedmodels 9 | import torch.nn.functional as F 10 | import types 11 | import torch 12 | from efficientnet_pytorch import EfficientNet 13 | from collections import OrderedDict 14 | import torch.nn as nn 15 | 16 | def Dense121(config): 17 | return models.densenet121(pretrained=True) 18 | 19 | def Dense161(config): 20 | return models.densenet169(pretrained=True) 21 | 22 | def Dense169(config): 23 | return models.densenet161(pretrained=True) 24 | 25 | def Dense201(config): 26 | return models.densenet201(pretrained=True) 27 | 28 | def Resnet50(config): 29 | return pretrainedmodels.__dict__['resnet50'](num_classes=1000, pretrained='imagenet') 30 | 31 | def Resnet101(config): 32 | return models.resnet101(pretrained=True) 33 | 34 | def InceptionV3(config): 35 | return models.inception_v3(pretrained=True) 36 | 37 | def se_resnext50(config): 38 | return pretrainedmodels.__dict__['se_resnext50_32x4d'](num_classes=1000, pretrained='imagenet') 39 | 40 | def se_resnext101(config): 41 | return pretrainedmodels.__dict__['se_resnext101_32x4d'](num_classes=1000, pretrained='imagenet') 42 | 43 | def se_resnet50(config): 44 | return pretrainedmodels.__dict__['se_resnet50'](num_classes=1000, pretrained='imagenet') 45 | 46 | def se_resnet101(config): 47 | return pretrainedmodels.__dict__['se_resnet101'](num_classes=1000, pretrained='imagenet') 48 | 49 | def se_resnet152(config): 50 | return pretrainedmodels.__dict__['se_resnet152'](num_classes=1000, pretrained='imagenet') 51 | 52 | def resnext101(config): 53 | return pretrainedmodels.__dict__['resnext101_32x4d'](num_classes=1000, pretrained='imagenet') 54 | 55 | def resnext101_64(config): 56 | return pretrainedmodels.__dict__['resnext101_64x4d'](num_classes=1000, pretrained='imagenet') 57 | 58 | def senet154(config): 59 | return pretrainedmodels.__dict__['senet154'](num_classes=1000, pretrained='imagenet') 60 | 61 | def polynet(config): 62 | return pretrainedmodels.__dict__['polynet'](num_classes=1000, pretrained='imagenet') 63 | 64 | def dpn92(config): 65 | return pretrainedmodels.__dict__['dpn92'](num_classes=1000, pretrained='imagenet+5k') 66 | 67 | def dpn68b(config): 68 | return pretrainedmodels.__dict__['dpn68b'](num_classes=1000, pretrained='imagenet+5k') 69 | 70 | def nasnetamobile(config): 71 | return pretrainedmodels.__dict__['nasnetamobile'](num_classes=1000, pretrained='imagenet') 72 | 73 | def resnext101_32_8_wsl(config): 74 | return torch.hub.load('facebookresearch/WSL-Images', 'resnext101_32x8d_wsl') 75 | 76 | def resnext101_32_16_wsl(config): 77 | return torch.hub.load('facebookresearch/WSL-Images', 'resnext101_32x16d_wsl') 78 | 79 | def resnext101_32_32_wsl(config): 80 | return torch.hub.load('facebookresearch/WSL-Images', 'resnext101_32x32d_wsl') 81 | 82 | def resnext101_32_48_wsl(config): 83 | return torch.hub.load('facebookresearch/WSL-Images', 'resnext101_32x48d_wsl') 84 | 85 | def efficientnet_b0(config): 86 | return EfficientNet.from_pretrained('efficientnet-b0',num_classes=config['numClasses']) 87 | 88 | def efficientnet_b1(config): 89 | return EfficientNet.from_pretrained('efficientnet-b1',num_classes=config['numClasses']) 90 | 91 | def efficientnet_b2(config): 92 | return EfficientNet.from_pretrained('efficientnet-b2',num_classes=config['numClasses']) 93 | 94 | def efficientnet_b3(config): 95 | return EfficientNet.from_pretrained('efficientnet-b3',num_classes=config['numClasses']) 96 | 97 | def efficientnet_b4(config): 98 | return EfficientNet.from_pretrained('efficientnet-b4',num_classes=config['numClasses']) 99 | 100 | def efficientnet_b5(config): 101 | return EfficientNet.from_pretrained('efficientnet-b5',num_classes=config['numClasses']) 102 | 103 | def efficientnet_b6(config): 104 | return EfficientNet.from_pretrained('efficientnet-b6',num_classes=config['numClasses']) 105 | 106 | def efficientnet_b7(config): 107 | return EfficientNet.from_pretrained('efficientnet-b7',num_classes=config['numClasses']) 108 | 109 | def modify_meta(mdlParams,model): 110 | # Define FC layers 111 | if len(mdlParams['fc_layers_before']) > 1: 112 | model.meta_before = nn.Sequential(nn.Linear(mdlParams['meta_array'].shape[1],mdlParams['fc_layers_before'][0]), 113 | nn.BatchNorm1d(mdlParams['fc_layers_before'][0]), 114 | nn.ReLU(), 115 | nn.Dropout(p=mdlParams['dropout_meta']), 116 | nn.Linear(mdlParams['fc_layers_before'][0],mdlParams['fc_layers_before'][1]), 117 | nn.BatchNorm1d(mdlParams['fc_layers_before'][1]), 118 | nn.ReLU(), 119 | nn.Dropout(p=mdlParams['dropout_meta'])) 120 | else: 121 | model.meta_before = nn.Sequential(nn.Linear(mdlParams['meta_array'].shape[1],mdlParams['fc_layers_before'][0]), 122 | nn.BatchNorm1d(mdlParams['fc_layers_before'][0]), 123 | nn.ReLU(), 124 | nn.Dropout(p=mdlParams['dropout_meta'])) 125 | # Define fc layers after 126 | if len(mdlParams['fc_layers_after']) > 0: 127 | if 'efficient' in mdlParams['model_type']: 128 | num_cnn_features = model._fc.in_features 129 | elif 'wsl' in mdlParams['model_type']: 130 | num_cnn_features = model.fc.in_features 131 | else: 132 | num_cnn_features = model.last_linear.in_features 133 | model.meta_after = nn.Sequential(nn.Linear(mdlParams['fc_layers_before'][-1]+num_cnn_features,mdlParams['fc_layers_after'][0]), 134 | nn.BatchNorm1d(mdlParams['fc_layers_after'][0]), 135 | nn.ReLU()) 136 | classifier_in_features = mdlParams['fc_layers_after'][0] 137 | else: 138 | model.meta_after = None 139 | classifier_in_features = mdlParams['fc_layers_before'][-1]+model._fc.in_features 140 | # Modify classifier 141 | if 'efficient' in mdlParams['model_type']: 142 | model._fc = nn.Linear(classifier_in_features, mdlParams['numClasses']) 143 | elif 'wsl' in mdlParams['model_type']: 144 | model.fc = nn.Linear(classifier_in_features, mdlParams['numClasses']) 145 | else: 146 | model.last_linear = nn.Linear(classifier_in_features, mdlParams['numClasses']) 147 | # Modify forward pass 148 | def new_forward(self, inputs): 149 | x, meta_data = inputs 150 | # Normal CNN features 151 | if 'efficient' in mdlParams['model_type']: 152 | # Convolution layers 153 | cnn_features = self.extract_features(x) 154 | # Pooling and final linear layer 155 | cnn_features = F.adaptive_avg_pool2d(cnn_features, 1).squeeze(-1).squeeze(-1) 156 | if self._dropout: 157 | cnn_features = F.dropout(cnn_features, p=self._dropout, training=self.training) 158 | elif 'wsl' in mdlParams['model_type']: 159 | cnn_features = self.conv1(x) 160 | cnn_features = self.bn1(cnn_features) 161 | cnn_features = self.relu(cnn_features) 162 | cnn_features = self.maxpool(cnn_features) 163 | 164 | cnn_features = self.layer1(cnn_features) 165 | cnn_features = self.layer2(cnn_features) 166 | cnn_features = self.layer3(cnn_features) 167 | cnn_features = self.layer4(cnn_features) 168 | 169 | cnn_features = self.avgpool(cnn_features) 170 | cnn_features = torch.flatten(cnn_features, 1) 171 | else: 172 | cnn_features = self.layer0(x) 173 | cnn_features = self.layer1(cnn_features) 174 | cnn_features = self.layer2(cnn_features) 175 | cnn_features = self.layer3(cnn_features) 176 | cnn_features = self.layer4(cnn_features) 177 | cnn_features = self.avg_pool(cnn_features) 178 | if self.dropout is not None: 179 | cnn_features = self.dropout(cnn_features) 180 | cnn_features = cnn_features.view(cnn_features.size(0), -1) 181 | # Meta part 182 | #print(meta_data.shape,meta_data) 183 | meta_features = self.meta_before(meta_data) 184 | 185 | # Cat 186 | features = torch.cat((cnn_features,meta_features),dim=1) 187 | #print("features cat",features.shape) 188 | if self.meta_after is not None: 189 | features = self.meta_after(features) 190 | # Classifier 191 | if 'efficient' in mdlParams['model_type']: 192 | output = self._fc(features) 193 | elif 'wsl' in mdlParams['model_type']: 194 | output = self.fc(features) 195 | else: 196 | output = self.last_linear(features) 197 | return output 198 | model.forward = types.MethodType(new_forward, model) 199 | return model 200 | 201 | model_map = OrderedDict([('Dense121', Dense121), 202 | ('Dense169' , Dense161), 203 | ('Dense161' , Dense169), 204 | ('Dense201' , Dense201), 205 | ('Resnet50' , Resnet50), 206 | ('Resnet101' , Resnet101), 207 | ('InceptionV3', InceptionV3),# models.inception_v3(pretrained=True), 208 | ('se_resnext50', se_resnext50), 209 | ('se_resnext101', se_resnext101), 210 | ('se_resnet50', se_resnet50), 211 | ('se_resnet101', se_resnet101), 212 | ('se_resnet152', se_resnet152), 213 | ('resnext101', resnext101), 214 | ('resnext101_64', resnext101_64), 215 | ('senet154', senet154), 216 | ('polynet', polynet), 217 | ('dpn92', dpn92), 218 | ('dpn68b', dpn68b), 219 | ('nasnetamobile', nasnetamobile), 220 | ('resnext101_32_8_wsl', resnext101_32_8_wsl), 221 | ('resnext101_32_16_wsl', resnext101_32_16_wsl), 222 | ('resnext101_32_32_wsl', resnext101_32_32_wsl), 223 | ('resnext101_32_48_wsl', resnext101_32_48_wsl), 224 | ('efficientnet-b0', efficientnet_b0), 225 | ('efficientnet-b1', efficientnet_b1), 226 | ('efficientnet-b2', efficientnet_b2), 227 | ('efficientnet-b3', efficientnet_b3), 228 | ('efficientnet-b4', efficientnet_b4), 229 | ('efficientnet-b5', efficientnet_b5), 230 | ('efficientnet-b6', efficientnet_b6), 231 | ('efficientnet-b7', efficientnet_b7), 232 | ]) 233 | 234 | def getModel(config): 235 | """Returns a function for a model 236 | Args: 237 | config: dictionary, contains configuration 238 | Returns: 239 | model: A class that builds the desired model 240 | Raises: 241 | ValueError: If model name is not recognized. 242 | """ 243 | if config['model_type'] in model_map: 244 | func = model_map[config['model_type'] ] 245 | @functools.wraps(func) 246 | def model(): 247 | return func(config) 248 | else: 249 | raise ValueError('Name of model unknown %s' % config['model_name'] ) 250 | return model -------------------------------------------------------------------------------- /cfgs/2019/test_effb0_ss.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import h5py 4 | import re 5 | import csv 6 | import numpy as np 7 | from glob import glob 8 | import scipy 9 | import pickle 10 | import imagesize 11 | 12 | def init(mdlParams_): 13 | mdlParams = {} 14 | # Save summaries and model here 15 | mdlParams['saveDir'] = mdlParams_['pathBase']+'/data/isic/' 16 | # Data is loaded from here 17 | mdlParams['dataDir'] = mdlParams_['pathBase']+'/data/isic/2019' 18 | 19 | ### Model Selection ### 20 | mdlParams['model_type'] = 'efficientnet-b0' 21 | mdlParams['dataset_names'] = ['official']#,'sevenpoint_rez3_ll'] 22 | mdlParams['file_ending'] = '.png' 23 | mdlParams['exclude_inds'] = False 24 | mdlParams['same_sized_crops'] = True 25 | mdlParams['multiCropEval'] = 9 26 | mdlParams['var_im_size'] = True 27 | mdlParams['orderedCrop'] = True 28 | mdlParams['voting_scheme'] = 'average' 29 | mdlParams['classification'] = True 30 | mdlParams['balance_classes'] = 9 31 | mdlParams['extra_fac'] = 1.0 32 | mdlParams['numClasses'] = 9 33 | mdlParams['no_c9_eval'] = True 34 | mdlParams['numOut'] = mdlParams['numClasses'] 35 | mdlParams['numCV'] = 5 36 | mdlParams['trans_norm_first'] = True 37 | # Scale up for b1-b7 38 | mdlParams['input_size'] = [224,224,3] 39 | 40 | ### Training Parameters ### 41 | # Batch size 42 | mdlParams['batchSize'] = 20#*len(mdlParams['numGPUs']) 43 | # Initial learning rate 44 | mdlParams['learning_rate'] = 0.000015#*len(mdlParams['numGPUs']) 45 | # Lower learning rate after no improvement over 100 epochs 46 | mdlParams['lowerLRAfter'] = 25 47 | # If there is no validation set, start lowering the LR after X steps 48 | mdlParams['lowerLRat'] = 50 49 | # Divide learning rate by this value 50 | mdlParams['LRstep'] = 5 51 | # Maximum number of training iterations 52 | mdlParams['training_steps'] = 60 #250 53 | # Display error every X steps 54 | mdlParams['display_step'] = 10 55 | # Scale? 56 | mdlParams['scale_targets'] = False 57 | # Peak at test error during training? (generally, dont do this!) 58 | mdlParams['peak_at_testerr'] = False 59 | # Print trainerr 60 | mdlParams['print_trainerr'] = False 61 | # Subtract trainset mean? 62 | mdlParams['subtract_set_mean'] = False 63 | mdlParams['setMean'] = np.array([0.0, 0.0, 0.0]) 64 | mdlParams['setStd'] = np.array([1.0, 1.0, 1.0]) 65 | 66 | # Data AUG 67 | #mdlParams['full_color_distort'] = True 68 | mdlParams['autoaugment'] = False 69 | mdlParams['flip_lr_ud'] = True 70 | mdlParams['full_rot'] = 180 71 | mdlParams['scale'] = (0.8,1.2) 72 | mdlParams['shear'] = 10 73 | mdlParams['cutout'] = 16 74 | 75 | ### Data ### 76 | mdlParams['preload'] = False 77 | # Labels first 78 | # Targets, as dictionary, indexed by im file name 79 | mdlParams['labels_dict'] = {} 80 | path1 = mdlParams['dataDir'] + '/labels/' 81 | # All sets 82 | allSets = glob(path1 + '*/') 83 | # Go through all sets 84 | for i in range(len(allSets)): 85 | # Check if want to include this dataset 86 | foundSet = False 87 | for j in range(len(mdlParams['dataset_names'])): 88 | if mdlParams['dataset_names'][j] in allSets[i]: 89 | foundSet = True 90 | if not foundSet: 91 | continue 92 | # Find csv file 93 | files = sorted(glob(allSets[i]+'*')) 94 | for j in range(len(files)): 95 | if 'csv' in files[j]: 96 | break 97 | # Load csv file 98 | with open(files[j], newline='') as csvfile: 99 | labels_str = csv.reader(csvfile, delimiter=',', quotechar='|') 100 | for row in labels_str: 101 | if 'image' == row[0]: 102 | continue 103 | #if 'ISIC' in row[0] and '_downsampled' in row[0]: 104 | # print(row[0]) 105 | if row[0] + '_downsampled' in mdlParams['labels_dict']: 106 | print("removed",row[0] + '_downsampled') 107 | continue 108 | if mdlParams['numClasses'] == 7: 109 | mdlParams['labels_dict'][row[0]] = np.array([int(float(row[1])),int(float(row[2])),int(float(row[3])),int(float(row[4])),int(float(row[5])),int(float(row[6])),int(float(row[7]))]) 110 | elif mdlParams['numClasses'] == 8: 111 | if len(row) < 9 or row[8] == '': 112 | class_8 = 0 113 | else: 114 | class_8 = int(float(row[8])) 115 | mdlParams['labels_dict'][row[0]] = np.array([int(float(row[1])),int(float(row[2])),int(float(row[3])),int(float(row[4])),int(float(row[5])),int(float(row[6])),int(float(row[7])),class_8]) 116 | elif mdlParams['numClasses'] == 9: 117 | if len(row) < 9 or row[8] == '': 118 | class_8 = 0 119 | else: 120 | class_8 = int(float(row[8])) 121 | if len(row) < 10 or row[9] == '': 122 | class_9 = 0 123 | else: 124 | class_9 = int(float(row[9])) 125 | mdlParams['labels_dict'][row[0]] = np.array([int(float(row[1])),int(float(row[2])),int(float(row[3])),int(float(row[4])),int(float(row[5])),int(float(row[6])),int(float(row[7])),class_8,class_9]) 126 | # Save all im paths here 127 | mdlParams['im_paths'] = [] 128 | mdlParams['labels_list'] = [] 129 | # Define the sets 130 | path1 = mdlParams['dataDir'] + '/images/' 131 | # All sets 132 | allSets = sorted(glob(path1 + '*/')) 133 | # Ids which name the folders 134 | # Make official first dataset 135 | for i in range(len(allSets)): 136 | if mdlParams['dataset_names'][0] in allSets[i]: 137 | temp = allSets[i] 138 | allSets.remove(allSets[i]) 139 | allSets.insert(0, temp) 140 | print(allSets) 141 | # Set of keys, for marking old HAM10000 142 | mdlParams['key_list'] = [] 143 | if mdlParams['exclude_inds']: 144 | with open(mdlParams['saveDir'] + 'indices_exclude.pkl','rb') as f: 145 | indices_exclude = pickle.load(f) 146 | exclude_list = [] 147 | for i in range(len(allSets)): 148 | # All files in that set 149 | files = sorted(glob(allSets[i]+'*')) 150 | # Check if there is something in there, if not, discard 151 | if len(files) == 0: 152 | continue 153 | # Check if want to include this dataset 154 | foundSet = False 155 | for j in range(len(mdlParams['dataset_names'])): 156 | if mdlParams['dataset_names'][j] in allSets[i]: 157 | foundSet = True 158 | if not foundSet: 159 | continue 160 | for j in range(len(files)): 161 | if '.jpg' in files[j] or '.jpeg' in files[j] or '.JPG' in files[j] or '.JPEG' in files[j] or '.png' in files[j] or '.PNG' in files[j]: 162 | # Add according label, find it first 163 | found_already = False 164 | for key in mdlParams['labels_dict']: 165 | if key + mdlParams['file_ending'] in files[j]: 166 | if found_already: 167 | print("Found already:",key,files[j]) 168 | mdlParams['key_list'].append(key) 169 | mdlParams['labels_list'].append(mdlParams['labels_dict'][key]) 170 | found_already = True 171 | if found_already: 172 | mdlParams['im_paths'].append(files[j]) 173 | if mdlParams['exclude_inds']: 174 | for key in indices_exclude: 175 | if key in files[j]: 176 | exclude_list.append(indices_exclude[key]) 177 | # Convert label list to array 178 | mdlParams['labels_array'] = np.array(mdlParams['labels_list']) 179 | print(np.mean(mdlParams['labels_array'],axis=0)) 180 | # Create indices list with HAM10000 only 181 | mdlParams['HAM10000_inds'] = [] 182 | HAM_START = 24306 183 | HAM_END = 34320 184 | for j in range(len(mdlParams['key_list'])): 185 | try: 186 | curr_id = [int(s) for s in re.findall(r'\d+',mdlParams['key_list'][j])][-1] 187 | except: 188 | continue 189 | if curr_id >= HAM_START and curr_id <= HAM_END: 190 | mdlParams['HAM10000_inds'].append(j) 191 | mdlParams['HAM10000_inds'] = np.array(mdlParams['HAM10000_inds']) 192 | print("Len ham",len(mdlParams['HAM10000_inds'])) 193 | # Perhaps preload images 194 | if mdlParams['preload']: 195 | mdlParams['images_array'] = np.zeros([len(mdlParams['im_paths']),mdlParams['input_size_load'][0],mdlParams['input_size_load'][1],mdlParams['input_size_load'][2]],dtype=np.uint8) 196 | for i in range(len(mdlParams['im_paths'])): 197 | x = scipy.ndimage.imread(mdlParams['im_paths'][i]) 198 | #x = x.astype(np.float32) 199 | # Scale to 0-1 200 | #min_x = np.min(x) 201 | #max_x = np.max(x) 202 | #x = (x-min_x)/(max_x-min_x) 203 | mdlParams['images_array'][i,:,:,:] = x 204 | if i%1000 == 0: 205 | print(i+1,"images loaded...") 206 | if mdlParams['subtract_set_mean']: 207 | mdlParams['images_means'] = np.zeros([len(mdlParams['im_paths']),3]) 208 | for i in range(len(mdlParams['im_paths'])): 209 | x = scipy.ndimage.imread(mdlParams['im_paths'][i]) 210 | x = x.astype(np.float32) 211 | # Scale to 0-1 212 | min_x = np.min(x) 213 | max_x = np.max(x) 214 | x = (x-min_x)/(max_x-min_x) 215 | mdlParams['images_means'][i,:] = np.mean(x,(0,1)) 216 | if i%1000 == 0: 217 | print(i+1,"images processed for mean...") 218 | 219 | ### Define Indices ### 220 | with open(mdlParams['saveDir'] + 'indices_isic2019.pkl','rb') as f: 221 | indices = pickle.load(f) 222 | mdlParams['trainIndCV'] = indices['trainIndCV'] 223 | mdlParams['valIndCV'] = indices['valIndCV'] 224 | if mdlParams['exclude_inds']: 225 | exclude_list = np.array(exclude_list) 226 | all_inds = np.arange(len(mdlParams['im_paths'])) 227 | exclude_inds = all_inds[exclude_list.astype(bool)] 228 | for i in range(len(mdlParams['trainIndCV'])): 229 | mdlParams['trainIndCV'][i] = np.setdiff1d(mdlParams['trainIndCV'][i],exclude_inds) 230 | for i in range(len(mdlParams['valIndCV'])): 231 | mdlParams['valIndCV'][i] = np.setdiff1d(mdlParams['valIndCV'][i],exclude_inds) 232 | # Consider case with more than one set 233 | if len(mdlParams['dataset_names']) > 1: 234 | restInds = np.array(np.arange(25331,mdlParams['labels_array'].shape[0])) 235 | for i in range(mdlParams['numCV']): 236 | mdlParams['trainIndCV'][i] = np.concatenate((mdlParams['trainIndCV'][i],restInds)) 237 | print("Train") 238 | for i in range(len(mdlParams['trainIndCV'])): 239 | print(mdlParams['trainIndCV'][i].shape) 240 | print("Val") 241 | for i in range(len(mdlParams['valIndCV'])): 242 | print(mdlParams['valIndCV'][i].shape) 243 | 244 | # Use this for ordered multi crops 245 | if mdlParams['orderedCrop']: 246 | # Crop positions, always choose multiCropEval to be 4, 9, 16, 25, etc. 247 | mdlParams['cropPositions'] = np.zeros([len(mdlParams['im_paths']),mdlParams['multiCropEval'],2],dtype=np.int64) 248 | #mdlParams['imSizes'] = np.zeros([len(mdlParams['im_paths']),mdlParams['multiCropEval'],2],dtype=np.int64) 249 | for u in range(len(mdlParams['im_paths'])): 250 | height, width = imagesize.get(mdlParams['im_paths'][u]) 251 | if width < mdlParams['input_size'][0]: 252 | height = int(mdlParams['input_size'][0]/float(width))*height 253 | width = mdlParams['input_size'][0] 254 | if height < mdlParams['input_size'][0]: 255 | width = int(mdlParams['input_size'][0]/float(height))*width 256 | height = mdlParams['input_size'][0] 257 | ind = 0 258 | for i in range(np.int32(np.sqrt(mdlParams['multiCropEval']))): 259 | for j in range(np.int32(np.sqrt(mdlParams['multiCropEval']))): 260 | mdlParams['cropPositions'][u,ind,0] = mdlParams['input_size'][0]/2+i*((width-mdlParams['input_size'][1])/(np.sqrt(mdlParams['multiCropEval'])-1)) 261 | mdlParams['cropPositions'][u,ind,1] = mdlParams['input_size'][1]/2+j*((height-mdlParams['input_size'][0])/(np.sqrt(mdlParams['multiCropEval'])-1)) 262 | #mdlParams['imSizes'][u,ind,0] = curr_im_size[0] 263 | 264 | ind += 1 265 | # Sanity checks 266 | #print("Positions",mdlParams['cropPositions']) 267 | # Test image sizes 268 | height = mdlParams['input_size'][0] 269 | width = mdlParams['input_size'][1] 270 | for u in range(len(mdlParams['im_paths'])): 271 | height_test, width_test = imagesize.get(mdlParams['im_paths'][u]) 272 | if width_test < mdlParams['input_size'][0]: 273 | height_test = int(mdlParams['input_size'][0]/float(width_test))*height_test 274 | width_test = mdlParams['input_size'][0] 275 | if height_test < mdlParams['input_size'][0]: 276 | width_test = int(mdlParams['input_size'][0]/float(height_test))*width_test 277 | height_test = mdlParams['input_size'][0] 278 | test_im = np.zeros([width_test,height_test]) 279 | for i in range(mdlParams['multiCropEval']): 280 | im_crop = test_im[np.int32(mdlParams['cropPositions'][u,i,0]-height/2):np.int32(mdlParams['cropPositions'][u,i,0]-height/2)+height,np.int32(mdlParams['cropPositions'][u,i,1]-width/2):np.int32(mdlParams['cropPositions'][u,i,1]-width/2)+width] 281 | if im_crop.shape[0] != mdlParams['input_size'][0]: 282 | print("Wrong shape",im_crop.shape[0],mdlParams['im_paths'][u]) 283 | if im_crop.shape[1] != mdlParams['input_size'][1]: 284 | print("Wrong shape",im_crop.shape[1],mdlParams['im_paths'][u]) 285 | return mdlParams -------------------------------------------------------------------------------- /cfgs/2019/test_effb0_rr.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import h5py 4 | import re 5 | import csv 6 | import numpy as np 7 | from glob import glob 8 | import scipy 9 | import pickle 10 | import imagesize 11 | 12 | def init(mdlParams_): 13 | mdlParams = {} 14 | # Save summaries and model here 15 | mdlParams['saveDir'] = mdlParams_['pathBase']+'/data/isic/' 16 | # Data is loaded from here 17 | mdlParams['dataDir'] = mdlParams_['pathBase']+'/data/isic/2019' 18 | 19 | ### Model Selection ### 20 | mdlParams['model_type'] = 'efficientnet-b0' 21 | mdlParams['dataset_names'] = ['official']#,'sevenpoint_rez3_ll'] 22 | mdlParams['file_ending'] = '.png' 23 | mdlParams['exclude_inds'] = False 24 | mdlParams['same_sized_crops'] = False 25 | mdlParams['multiCropEval'] = 9 26 | mdlParams['var_im_size'] = False 27 | mdlParams['orderedCrop'] = False 28 | mdlParams['voting_scheme'] = 'average' 29 | mdlParams['classification'] = True 30 | mdlParams['balance_classes'] = 9 31 | mdlParams['extra_fac'] = 1.0 32 | mdlParams['numClasses'] = 9 33 | mdlParams['no_c9_eval'] = True 34 | mdlParams['numOut'] = mdlParams['numClasses'] 35 | mdlParams['numCV'] = 5 36 | mdlParams['trans_norm_first'] = True 37 | # Deterministic cropping 38 | mdlParams['deterministic_eval'] = True 39 | mdlParams['numCropPositions'] = 1 40 | num_scales = 4 41 | all_scales = [1.0,0.5,0.75,0.25,0.9,0.6,0.4] 42 | mdlParams['cropScales'] = all_scales[:num_scales] 43 | mdlParams['cropFlipping'] = 4 44 | mdlParams['multiCropEval'] = mdlParams['numCropPositions']*len(mdlParams['cropScales'])*mdlParams['cropFlipping'] 45 | mdlParams['offset_crop'] = 0.2 46 | # Scale up for b1-b7 47 | mdlParams['input_size'] = [224,224,3] 48 | 49 | ### Training Parameters ### 50 | # Batch size 51 | mdlParams['batchSize'] = 20#*len(mdlParams['numGPUs']) 52 | # Initial learning rate 53 | mdlParams['learning_rate'] = 0.000015#*len(mdlParams['numGPUs']) 54 | # Lower learning rate after no improvement over 100 epochs 55 | mdlParams['lowerLRAfter'] = 25 56 | # If there is no validation set, start lowering the LR after X steps 57 | mdlParams['lowerLRat'] = 50 58 | # Divide learning rate by this value 59 | mdlParams['LRstep'] = 5 60 | # Maximum number of training iterations 61 | mdlParams['training_steps'] = 60 #250 62 | # Display error every X steps 63 | mdlParams['display_step'] = 10 64 | # Scale? 65 | mdlParams['scale_targets'] = False 66 | # Peak at test error during training? (generally, dont do this!) 67 | mdlParams['peak_at_testerr'] = False 68 | # Print trainerr 69 | mdlParams['print_trainerr'] = False 70 | # Subtract trainset mean? 71 | mdlParams['subtract_set_mean'] = False 72 | mdlParams['setMean'] = np.array([0.0, 0.0, 0.0]) 73 | mdlParams['setStd'] = np.array([1.0, 1.0, 1.0]) 74 | 75 | # Data AUG 76 | #mdlParams['full_color_distort'] = True 77 | mdlParams['autoaugment'] = False 78 | mdlParams['flip_lr_ud'] = True 79 | mdlParams['full_rot'] = 180 80 | mdlParams['scale'] = (0.8,1.2) 81 | mdlParams['shear'] = 10 82 | mdlParams['cutout'] = 16 83 | 84 | ### Data ### 85 | mdlParams['preload'] = False 86 | # Labels first 87 | # Targets, as dictionary, indexed by im file name 88 | mdlParams['labels_dict'] = {} 89 | path1 = mdlParams['dataDir'] + '/labels/' 90 | # All sets 91 | allSets = glob(path1 + '*/') 92 | # Go through all sets 93 | for i in range(len(allSets)): 94 | # Check if want to include this dataset 95 | foundSet = False 96 | for j in range(len(mdlParams['dataset_names'])): 97 | if mdlParams['dataset_names'][j] in allSets[i]: 98 | foundSet = True 99 | if not foundSet: 100 | continue 101 | # Find csv file 102 | files = sorted(glob(allSets[i]+'*')) 103 | for j in range(len(files)): 104 | if 'csv' in files[j]: 105 | break 106 | # Load csv file 107 | with open(files[j], newline='') as csvfile: 108 | labels_str = csv.reader(csvfile, delimiter=',', quotechar='|') 109 | for row in labels_str: 110 | if 'image' == row[0]: 111 | continue 112 | #if 'ISIC' in row[0] and '_downsampled' in row[0]: 113 | # print(row[0]) 114 | if row[0] + '_downsampled' in mdlParams['labels_dict']: 115 | print("removed",row[0] + '_downsampled') 116 | continue 117 | if mdlParams['numClasses'] == 7: 118 | mdlParams['labels_dict'][row[0]] = np.array([int(float(row[1])),int(float(row[2])),int(float(row[3])),int(float(row[4])),int(float(row[5])),int(float(row[6])),int(float(row[7]))]) 119 | elif mdlParams['numClasses'] == 8: 120 | if len(row) < 9 or row[8] == '': 121 | class_8 = 0 122 | else: 123 | class_8 = int(float(row[8])) 124 | mdlParams['labels_dict'][row[0]] = np.array([int(float(row[1])),int(float(row[2])),int(float(row[3])),int(float(row[4])),int(float(row[5])),int(float(row[6])),int(float(row[7])),class_8]) 125 | elif mdlParams['numClasses'] == 9: 126 | if len(row) < 9 or row[8] == '': 127 | class_8 = 0 128 | else: 129 | class_8 = int(float(row[8])) 130 | if len(row) < 10 or row[9] == '': 131 | class_9 = 0 132 | else: 133 | class_9 = int(float(row[9])) 134 | mdlParams['labels_dict'][row[0]] = np.array([int(float(row[1])),int(float(row[2])),int(float(row[3])),int(float(row[4])),int(float(row[5])),int(float(row[6])),int(float(row[7])),class_8,class_9]) 135 | # Save all im paths here 136 | mdlParams['im_paths'] = [] 137 | mdlParams['labels_list'] = [] 138 | # Define the sets 139 | path1 = mdlParams['dataDir'] + '/images/' 140 | # All sets 141 | allSets = sorted(glob(path1 + '*/')) 142 | # Ids which name the folders 143 | # Make official first dataset 144 | for i in range(len(allSets)): 145 | if mdlParams['dataset_names'][0] in allSets[i]: 146 | temp = allSets[i] 147 | allSets.remove(allSets[i]) 148 | allSets.insert(0, temp) 149 | print(allSets) 150 | # Set of keys, for marking old HAM10000 151 | mdlParams['key_list'] = [] 152 | if mdlParams['exclude_inds']: 153 | with open(mdlParams['saveDir'] + 'indices_exclude.pkl','rb') as f: 154 | indices_exclude = pickle.load(f) 155 | exclude_list = [] 156 | for i in range(len(allSets)): 157 | # All files in that set 158 | files = sorted(glob(allSets[i]+'*')) 159 | # Check if there is something in there, if not, discard 160 | if len(files) == 0: 161 | continue 162 | # Check if want to include this dataset 163 | foundSet = False 164 | for j in range(len(mdlParams['dataset_names'])): 165 | if mdlParams['dataset_names'][j] in allSets[i]: 166 | foundSet = True 167 | if not foundSet: 168 | continue 169 | for j in range(len(files)): 170 | if '.jpg' in files[j] or '.jpeg' in files[j] or '.JPG' in files[j] or '.JPEG' in files[j] or '.png' in files[j] or '.PNG' in files[j]: 171 | # Add according label, find it first 172 | found_already = False 173 | for key in mdlParams['labels_dict']: 174 | if key + mdlParams['file_ending'] in files[j]: 175 | if found_already: 176 | print("Found already:",key,files[j]) 177 | mdlParams['key_list'].append(key) 178 | mdlParams['labels_list'].append(mdlParams['labels_dict'][key]) 179 | found_already = True 180 | if found_already: 181 | mdlParams['im_paths'].append(files[j]) 182 | if mdlParams['exclude_inds']: 183 | for key in indices_exclude: 184 | if key in files[j]: 185 | exclude_list.append(indices_exclude[key]) 186 | # Convert label list to array 187 | mdlParams['labels_array'] = np.array(mdlParams['labels_list']) 188 | print(np.mean(mdlParams['labels_array'],axis=0)) 189 | # Create indices list with HAM10000 only 190 | mdlParams['HAM10000_inds'] = [] 191 | HAM_START = 24306 192 | HAM_END = 34320 193 | for j in range(len(mdlParams['key_list'])): 194 | try: 195 | curr_id = [int(s) for s in re.findall(r'\d+',mdlParams['key_list'][j])][-1] 196 | except: 197 | continue 198 | if curr_id >= HAM_START and curr_id <= HAM_END: 199 | mdlParams['HAM10000_inds'].append(j) 200 | mdlParams['HAM10000_inds'] = np.array(mdlParams['HAM10000_inds']) 201 | print("Len ham",len(mdlParams['HAM10000_inds'])) 202 | # Perhaps preload images 203 | if mdlParams['preload']: 204 | mdlParams['images_array'] = np.zeros([len(mdlParams['im_paths']),mdlParams['input_size_load'][0],mdlParams['input_size_load'][1],mdlParams['input_size_load'][2]],dtype=np.uint8) 205 | for i in range(len(mdlParams['im_paths'])): 206 | x = scipy.ndimage.imread(mdlParams['im_paths'][i]) 207 | #x = x.astype(np.float32) 208 | # Scale to 0-1 209 | #min_x = np.min(x) 210 | #max_x = np.max(x) 211 | #x = (x-min_x)/(max_x-min_x) 212 | mdlParams['images_array'][i,:,:,:] = x 213 | if i%1000 == 0: 214 | print(i+1,"images loaded...") 215 | if mdlParams['subtract_set_mean']: 216 | mdlParams['images_means'] = np.zeros([len(mdlParams['im_paths']),3]) 217 | for i in range(len(mdlParams['im_paths'])): 218 | x = scipy.ndimage.imread(mdlParams['im_paths'][i]) 219 | x = x.astype(np.float32) 220 | # Scale to 0-1 221 | min_x = np.min(x) 222 | max_x = np.max(x) 223 | x = (x-min_x)/(max_x-min_x) 224 | mdlParams['images_means'][i,:] = np.mean(x,(0,1)) 225 | if i%1000 == 0: 226 | print(i+1,"images processed for mean...") 227 | 228 | ### Define Indices ### 229 | # Just divide into 5 equally large sets 230 | with open(mdlParams['saveDir'] + 'indices_isic2019.pkl','rb') as f: 231 | indices = pickle.load(f) 232 | mdlParams['trainIndCV'] = indices['trainIndCV'] 233 | mdlParams['valIndCV'] = indices['valIndCV'] 234 | if mdlParams['exclude_inds']: 235 | exclude_list = np.array(exclude_list) 236 | all_inds = np.arange(len(mdlParams['im_paths'])) 237 | exclude_inds = all_inds[exclude_list.astype(bool)] 238 | for i in range(len(mdlParams['trainIndCV'])): 239 | mdlParams['trainIndCV'][i] = np.setdiff1d(mdlParams['trainIndCV'][i],exclude_inds) 240 | for i in range(len(mdlParams['valIndCV'])): 241 | mdlParams['valIndCV'][i] = np.setdiff1d(mdlParams['valIndCV'][i],exclude_inds) 242 | # Consider case with more than one set 243 | if len(mdlParams['dataset_names']) > 1: 244 | restInds = np.array(np.arange(25331,mdlParams['labels_array'].shape[0])) 245 | for i in range(mdlParams['numCV']): 246 | mdlParams['trainIndCV'][i] = np.concatenate((mdlParams['trainIndCV'][i],restInds)) 247 | print("Train") 248 | for i in range(len(mdlParams['trainIndCV'])): 249 | print(mdlParams['trainIndCV'][i].shape) 250 | print("Val") 251 | for i in range(len(mdlParams['valIndCV'])): 252 | print(mdlParams['valIndCV'][i].shape) 253 | 254 | # Use this for ordered multi crops 255 | if mdlParams['orderedCrop']: 256 | # Crop positions, always choose multiCropEval to be 4, 9, 16, 25, etc. 257 | mdlParams['cropPositions'] = np.zeros([len(mdlParams['im_paths']),mdlParams['multiCropEval'],2],dtype=np.int64) 258 | #mdlParams['imSizes'] = np.zeros([len(mdlParams['im_paths']),mdlParams['multiCropEval'],2],dtype=np.int64) 259 | for u in range(len(mdlParams['im_paths'])): 260 | height, width = imagesize.get(mdlParams['im_paths'][u]) 261 | if width < mdlParams['input_size'][0]: 262 | height = int(mdlParams['input_size'][0]/float(width))*height 263 | width = mdlParams['input_size'][0] 264 | if height < mdlParams['input_size'][0]: 265 | width = int(mdlParams['input_size'][0]/float(height))*width 266 | height = mdlParams['input_size'][0] 267 | ind = 0 268 | for i in range(np.int32(np.sqrt(mdlParams['multiCropEval']))): 269 | for j in range(np.int32(np.sqrt(mdlParams['multiCropEval']))): 270 | mdlParams['cropPositions'][u,ind,0] = mdlParams['input_size'][0]/2+i*((width-mdlParams['input_size'][1])/(np.sqrt(mdlParams['multiCropEval'])-1)) 271 | mdlParams['cropPositions'][u,ind,1] = mdlParams['input_size'][1]/2+j*((height-mdlParams['input_size'][0])/(np.sqrt(mdlParams['multiCropEval'])-1)) 272 | #mdlParams['imSizes'][u,ind,0] = curr_im_size[0] 273 | 274 | ind += 1 275 | # Sanity checks 276 | #print("Positions",mdlParams['cropPositions']) 277 | # Test image sizes 278 | height = mdlParams['input_size'][0] 279 | width = mdlParams['input_size'][1] 280 | for u in range(len(mdlParams['im_paths'])): 281 | height_test, width_test = imagesize.get(mdlParams['im_paths'][u]) 282 | if width_test < mdlParams['input_size'][0]: 283 | height_test = int(mdlParams['input_size'][0]/float(width_test))*height_test 284 | width_test = mdlParams['input_size'][0] 285 | if height_test < mdlParams['input_size'][0]: 286 | width_test = int(mdlParams['input_size'][0]/float(height_test))*width_test 287 | height_test = mdlParams['input_size'][0] 288 | test_im = np.zeros([width_test,height_test]) 289 | for i in range(mdlParams['multiCropEval']): 290 | im_crop = test_im[np.int32(mdlParams['cropPositions'][u,i,0]-height/2):np.int32(mdlParams['cropPositions'][u,i,0]-height/2)+height,np.int32(mdlParams['cropPositions'][u,i,1]-width/2):np.int32(mdlParams['cropPositions'][u,i,1]-width/2)+width] 291 | if im_crop.shape[0] != mdlParams['input_size'][0]: 292 | print("Wrong shape",im_crop.shape[0],mdlParams['im_paths'][u]) 293 | if im_crop.shape[1] != mdlParams['input_size'][1]: 294 | print("Wrong shape",im_crop.shape[1],mdlParams['im_paths'][u]) 295 | return mdlParams -------------------------------------------------------------------------------- /ensemble.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pickle 3 | import re 4 | import os 5 | import sys 6 | import itertools 7 | from glob import glob 8 | from sklearn.metrics import confusion_matrix, f1_score, auc, roc_curve 9 | from sklearn.ensemble import RandomForestClassifier 10 | from sklearn.svm import SVC 11 | from joblib import Parallel, delayed 12 | import multiprocessing 13 | import copy 14 | 15 | 16 | # Just assume fixed CV size for ensemble with evaluation 17 | cvSize = 5 18 | numClasses = 8 19 | 20 | # First argument is folder, filled with CV results files 21 | all_preds_path = sys.argv[1] 22 | 23 | # Second argument indicates, whether we are only generating predictions or actually evaluating performance on something 24 | if 'eval' in sys.argv[2]: 25 | evaluate = True 26 | # Determin if vote or average is used 27 | if 'vote' in sys.argv[2]: 28 | evaluate_method = 'vote' 29 | else: 30 | evaluate_method = 'average' 31 | # Determine if exhaustive combination search or ordered search is used 32 | if 'exhaust' in sys.argv[2]: 33 | exhaustive_search = True 34 | num_top_models = [int(s) for s in re.findall(r'\d+',sys.argv[2])][-1] 35 | else: 36 | exhaustive_search = False 37 | # Third argument indicates where subset should be saved 38 | if 'subSet' in sys.argv[3]: 39 | subSetPath = sys.argv[3] 40 | else: 41 | subSetPath = None 42 | else: 43 | evaluate = False 44 | acceptedList = [] 45 | if 'last' in sys.argv[2]: 46 | acceptedList.append('last') 47 | if 'best' in sys.argv[2]: 48 | acceptedList.append('best') 49 | if 'meta' in sys.argv[2]: 50 | acceptedList.append('meta') 51 | # Third argument indicates whether some subset should be used 52 | if 'subSet' in sys.argv[3]: 53 | # Load subset file 54 | with open(sys.argv[3],'rb') as f: 55 | subSetDict = pickle.load(f) 56 | subSet = subSetDict['subSet'] 57 | else: 58 | subSet = None 59 | 60 | # Fourth argument indicates csv path to save final results into 61 | if len(sys.argv) > 4 and 'csvFile' in sys.argv[4]: 62 | csvPath = sys.argv[4] 63 | origFilePath = sys.argv[5] 64 | else: 65 | csvPath = None 66 | 67 | # Function to get some metrics back 68 | def get_metrics(predictions,targets): 69 | # Calculate metrics 70 | # Accuarcy 71 | acc = np.mean(np.equal(np.argmax(predictions,1),np.argmax(targets,1))) 72 | # Confusion matrix 73 | conf = confusion_matrix(np.argmax(targets,1),np.argmax(predictions,1)) 74 | # Class weighted accuracy 75 | wacc = conf.diagonal()/conf.sum(axis=1) 76 | # Auc 77 | fpr = {} 78 | tpr = {} 79 | roc_auc = np.zeros([numClasses]) 80 | for i in range(numClasses): 81 | fpr[i], tpr[i], _ = roc_curve(targets[:, i], predictions[:, i]) 82 | roc_auc[i] = auc(fpr[i], tpr[i]) 83 | # F1 Score 84 | f1 = f1_score(np.argmax(predictions,1),np.argmax(targets,1),average='weighted') 85 | # Print 86 | print("Accuracy:",acc) 87 | print("F1-Score:",f1) 88 | print("WACC:",wacc) 89 | print("Mean WACC:",np.mean(wacc)) 90 | print("AUC:",roc_auc) 91 | print("Mean Auc:",np.mean(roc_auc)) 92 | return acc, f1, wacc, roc_auc 93 | 94 | # If its actual evaluation, evaluate each CV indipendently, show results both for each CV set and all of them together 95 | if evaluate: 96 | # Go through all files 97 | files = sorted(glob(all_preds_path+'/*')) 98 | # Because of unkown prediction size, dont use matrix 99 | final_preds = {} 100 | final_targets = {} 101 | all_waccs = [] 102 | accum_preds = {} 103 | # Define each pred size in loop 104 | firstLoaded = False 105 | for j in range(len(files)): 106 | # Skip if it is a directory 107 | if os.path.isdir(files[j]): 108 | continue 109 | # Skip if not a pkl file 110 | if '.pkl' not in files[j]: 111 | print("Remove non-pkl files") 112 | break 113 | # Load file 114 | with open(files[j],'rb') as f: 115 | allDataCurr = pickle.load(f) 116 | # Get predictions 117 | if not firstLoaded: 118 | # Define accumulated prediction size 119 | for i in range(cvSize): 120 | accum_preds[i] = np.zeros([len(files),len(allDataCurr['bestPred'][i]),numClasses]) 121 | firstLoaded = True 122 | # Write preds into array 123 | #print(files[j],allDataCurr['bestPred'][0].shape) 124 | wacc_avg = 0 125 | for i in range(cvSize): 126 | accum_preds[i][j,:,:] = allDataCurr['bestPred'][i] 127 | final_targets[i] = allDataCurr['targets'][i] 128 | # Confusion matrix 129 | conf = confusion_matrix(np.argmax(allDataCurr['targets'][i],1),np.argmax(allDataCurr['bestPred'][i],1)) 130 | # Class weighted accuracy 131 | wacc_avg += np.mean(conf.diagonal()/conf.sum(axis=1)) 132 | wacc_avg = wacc_avg/cvSize 133 | all_waccs.append(wacc_avg) 134 | # Print performance of model + name 135 | print("Model:",files[j],"WACC:",wacc_avg) 136 | # Print results per cv 137 | # Averaging predictions 138 | f1_avg = 0 139 | acc_avg = 0 140 | auc_avg = np.zeros([numClasses]) 141 | wacc_avg = np.zeros([numClasses]) 142 | # Voting with predictions 143 | f1_vote = 0 144 | acc_vote = 0 145 | auc_vote = np.zeros([numClasses]) 146 | wacc_vote = np.zeros([numClasses]) 147 | # Linear SVM on predictions 148 | f1_linsvm = 0 149 | acc_linsvm = 0 150 | auc_linsvm = np.zeros([numClasses]) 151 | wacc_linsvm = np.zeros([numClasses]) 152 | # RF on predictions 153 | f1_rf = 0 154 | acc_rf = 0 155 | auf_rf = np.zeros([numClasses]) 156 | wacc_rf = np.zeros([numClasses]) 157 | # Helper function to determine top combination 158 | def evalEnsemble(currComb,eval_auc=False): 159 | currWacc = np.zeros([cvSize]) 160 | currAUC = np.zeros([cvSize]) 161 | for i in range(cvSize): 162 | if evaluate_method == 'vote': 163 | pred_argmax = np.argmax(accum_preds[i][currComb,:,:],2) 164 | pred_eval = np.zeros([pred_argmax.shape[1],numClasses]) 165 | for j in range(pred_eval.shape[0]): 166 | pred_eval[j,:] = np.bincount(pred_argmax[:,j],minlength=numClasses) 167 | else: 168 | pred_eval = np.mean(accum_preds[i][currComb,:,:],0) 169 | # Confusion matrix 170 | conf = confusion_matrix(np.argmax(final_targets[i],1),np.argmax(pred_eval,1)) 171 | # Class weighted accuracy 172 | currWacc[i] = np.mean(conf.diagonal()/conf.sum(axis=1)) 173 | if eval_auc: 174 | currAUC_ = np.zeros([numClasses]) 175 | for j in range(numClasses): 176 | fpr, tpr, _ = roc_curve(final_targets[i][:,j], pred_eval[:, j]) 177 | currAUC_[j] = auc(fpr, tpr) 178 | currAUC[i] = np.mean(currAUC_) 179 | if eval_auc: 180 | currAUCstd = np.std(currAUC) 181 | currAUC = np.mean(currAUC) 182 | else: 183 | currAUCstd = currAUC 184 | currWaccStd = np.std(currWacc) 185 | currWacc = np.mean(currWacc) 186 | if eval_auc: 187 | return currWacc, currWaccStd, currAUC, currAUCstd 188 | else: 189 | return currWacc 190 | if exhaustive_search: 191 | # First: determine best subset based on average CV wacc 192 | # Select best subset based on wacc metric 193 | # Only take top N models 194 | top_inds = np.argsort(-np.array(all_waccs)) 195 | elements = top_inds[:num_top_models] 196 | allCombs = [] 197 | for L in range(0, len(elements)+1): 198 | for subset in itertools.combinations(elements, L): 199 | allCombs.append(subset) 200 | #print(subset) 201 | print("Number of combinations",len(allCombs)) 202 | print("Models considered") 203 | for i in range(len(elements)): 204 | print("ID",elements[i],files[elements[i]]) 205 | #allWaccs = np.zeros([len(allCombs)]) 206 | num_cores = multiprocessing.cpu_count() 207 | print("Cores available",num_cores) 208 | allWaccs = Parallel(n_jobs=num_cores)(delayed(evalEnsemble)(comb) for comb in allCombs) 209 | # Sort by highest value 210 | allWaccsSrt = -np.sort(-np.array(allWaccs)) 211 | srtInds = np.argsort(-np.array(allWaccs)) 212 | allCombsSrt = np.array(allCombs)[srtInds] 213 | for i in range(5): 214 | print("Top",i+1) 215 | print("Best WACC",allWaccsSrt[i]) 216 | wacc, wacc_std, auc_val, auc_val_std = evalEnsemble(allCombsSrt[i],eval_auc=True) 217 | print("Metrics WACC %.4f +- %.4f AUC %.4f +- %.4f"%(wacc,wacc_std,auc_val,auc_val_std)) 218 | print("Best Combination:",allCombsSrt[i]) 219 | print("Corresponding File Names") 220 | subSetDict = {} 221 | subSetDict['subSet'] = [] 222 | for j in allCombsSrt[i]: 223 | print("ID",j,files[j]) 224 | # Add filename without last part, indicating the type "best/last/meta/full" 225 | if i == 0: 226 | subSetDict['subSet'].append(files[j]) 227 | print("---------------------------------------------") 228 | bestComb = allCombsSrt[0] 229 | else: 230 | # Only take top N models 231 | top_inds = np.argsort(-np.array(all_waccs)) 232 | # Go through all top N combs 233 | allWaccs = np.zeros([len(top_inds)]) 234 | allCombs = [] 235 | for i in range(len(top_inds)): 236 | allCombs.append([]) 237 | if i==0: 238 | allCombs[i].append(top_inds[0]) 239 | else: 240 | allCombs[i] = copy.deepcopy(allCombs[i-1]) 241 | allCombs[i].append(top_inds[i]) 242 | # Test comb 243 | allWaccs[i] = evalEnsemble(allCombs[i]) 244 | # Sort by highest value 245 | allWaccsSrt = -np.sort(-np.array(allWaccs)) 246 | srtInds = np.argsort(-np.array(allWaccs)) 247 | allCombsSrt = np.array(allCombs)[srtInds] 248 | for i in range(len(top_inds)): 249 | print("Top",i+1) 250 | print("WACC",allWaccsSrt[i]) 251 | wacc, wacc_std, auc_val, auc_val_std = evalEnsemble(allCombsSrt[i],eval_auc=True) 252 | print("Metrics WACC %.4f +- %.4f AUC %.4f +- %.4f"%(wacc,wacc_std,auc_val,auc_val_std)) 253 | print("Combination:",allCombsSrt[i]) 254 | if i == 0: 255 | subSetDict = {} 256 | subSetDict['subSet'] = [] 257 | for j in allCombsSrt[i]: 258 | print("ID",j,files[j]) 259 | # Add filename without last part, indicating the type "best/last/meta/full" 260 | subSetDict['subSet'].append(files[j]) 261 | print("---------------------------------------------") 262 | p#rint("Corresponding File Names") 263 | #for j in allCombs[-1]: 264 | # print("ID",j,files[j]) 265 | bestComb = allCombsSrt[0] 266 | # Save subset for later 267 | if subSetPath is not None: 268 | with open(subSetPath, 'wb') as f: 269 | pickle.dump(subSetDict, f, pickle.HIGHEST_PROTOCOL) 270 | 271 | else: 272 | # Only generate predictions. All models predict on the same set -> cv models are equal to full models here 273 | # Go through all files 274 | files = sorted(glob(all_preds_path+'/*')) 275 | # Because of unkown prediction size, only determin it in the loop 276 | firstLoaded = False 277 | ind = 0 278 | for j in range(len(files)): 279 | # Skip if not a pkl file 280 | if '.pkl' not in files[j]: 281 | continue 282 | # Potentially check, if this file is among the selected subset 283 | if subSet is not None: 284 | # Search 285 | found = False 286 | for name in subSet: 287 | _, name_only = name.split('ISIC') 288 | if name_only in files[j]: 289 | found = True 290 | break 291 | if not found: 292 | # Check extra for acceptedList inclusion 293 | for name in subSet: 294 | _, name_only = name.split('ISIC') 295 | if name_only[:-13] in files[j]: 296 | found = True 297 | break 298 | if not found: 299 | continue 300 | # Then check, whether this type of "best,last,meta,full" is desired 301 | found = False 302 | for name in acceptedList: 303 | if name in files[j]: 304 | found = True 305 | break 306 | if not found: 307 | continue 308 | # Load file 309 | with open(files[j],'rb') as f: 310 | allDataCurr = pickle.load(f) 311 | # Get predictions 312 | if not firstLoaded: 313 | # Define final prediction/targets size, assume fixed CV size 314 | final_preds = np.zeros([len(allDataCurr['extPred'][0]),numClasses]) 315 | # Define accumulated prediction size 316 | accum_preds = np.expand_dims(allDataCurr['extPred'][0],0) 317 | ind += 1 318 | if len(allDataCurr['extPred']) > 1: 319 | for i in range(1,len(allDataCurr['extPred'])): 320 | accum_preds = np.concatenate((accum_preds,np.expand_dims(allDataCurr['extPred'][i],0)),0) 321 | ind += 1 322 | else: 323 | # Just repeat the first model X times 324 | for i in range(1,5): 325 | accum_preds = np.concatenate((accum_preds,np.expand_dims(allDataCurr['extPred'][0],0)),0) 326 | ind += 1 327 | firstLoaded = True 328 | else: 329 | # Write preds into array 330 | if len(allDataCurr['extPred']) > 1: 331 | for i in range(len(allDataCurr['extPred'])): 332 | accum_preds = np.concatenate((accum_preds,np.expand_dims(allDataCurr['extPred'][i],0)),0) 333 | ind += 1 334 | else: 335 | # Just repeat the first model X times 336 | for i in range(0,5): 337 | accum_preds = np.concatenate((accum_preds,np.expand_dims(allDataCurr['extPred'][0],0)),0) 338 | ind += 1 339 | print(files[j]) 340 | # Resize array to actually used size 341 | print(accum_preds.shape) 342 | final_preds = accum_preds[:ind,:,:] 343 | print(final_preds.shape) 344 | # Average for final predictions 345 | final_preds = np.mean(final_preds,0) 346 | class_pred = np.argmax(final_preds,1) 347 | print(np.mean(final_preds,0)) 348 | # Write into csv file, according to ordered list 349 | if csvPath is not None: 350 | # Get order file names from original folder 351 | files = sorted(glob(origFilePath+'/*')) 352 | # save into formatted csv file 353 | with open(csvPath, 'w') as csv_file: 354 | # First line 355 | csv_file.write("image,MEL,NV,BCC,AK,BKL,DF,VASC,SCC,UNK\n") 356 | ind = 0 357 | for file_name in files: 358 | if 'ISIC_' not in file_name: 359 | continue 360 | splits = file_name.split('\\') 361 | name = splits[-1] 362 | name, _ = name.split('.') 363 | csv_file.write(name + "," + str(final_preds[ind,0]) + "," + str(final_preds[ind,1]) + "," + str(final_preds[ind,2]) + "," + str(final_preds[ind,3]) + "," + str(final_preds[ind,4]) + "," + str(final_preds[ind,5]) + "," + str(final_preds[ind,6]) + "," + str(final_preds[ind,7]) + "," + str(final_preds[ind,8]) + "\n") 364 | ind += 1 365 | 366 | 367 | -------------------------------------------------------------------------------- /cfgs/2019/test_effb0_ss_meta.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import h5py 4 | import re 5 | import csv 6 | import numpy as np 7 | from glob import glob 8 | import scipy 9 | import pickle 10 | import imagesize 11 | 12 | def init(mdlParams_): 13 | mdlParams = {} 14 | # Save summaries and model here 15 | mdlParams['saveDir'] = mdlParams_['pathBase']+'/data/isic/' 16 | # Data is loaded from here 17 | mdlParams['dataDir'] = mdlParams_['pathBase']+'/data/isic/2019' 18 | 19 | ### Model Selection ### 20 | mdlParams['model_type'] = 'efficientnet-b0' 21 | mdlParams['dataset_names'] = ['official']#,'sevenpoint_rez3_ll'] 22 | mdlParams['file_ending'] = '.png' 23 | mdlParams['exclude_inds'] = False 24 | mdlParams['same_sized_crops'] = True 25 | mdlParams['multiCropEval'] = 9 26 | mdlParams['var_im_size'] = True 27 | mdlParams['orderedCrop'] = True 28 | mdlParams['voting_scheme'] = 'average' 29 | mdlParams['classification'] = True 30 | mdlParams['balance_classes'] = 9 31 | mdlParams['extra_fac'] = 1.0 32 | mdlParams['numClasses'] = 9 33 | mdlParams['no_c9_eval'] = True 34 | mdlParams['numOut'] = mdlParams['numClasses'] 35 | mdlParams['numCV'] = 5 36 | mdlParams['trans_norm_first'] = True 37 | # Scale up for b1-b7 38 | mdlParams['input_size'] = [224,224,3] 39 | 40 | ### Training Parameters ### 41 | # Batch size 42 | mdlParams['batchSize'] = 20#*len(mdlParams['numGPUs']) 43 | # Initial learning rate 44 | mdlParams['learning_rate'] = 0.000015#*len(mdlParams['numGPUs']) 45 | # Lower learning rate after no improvement over 100 epochs 46 | mdlParams['lowerLRAfter'] = 25 47 | # If there is no validation set, start lowering the LR after X steps 48 | mdlParams['lowerLRat'] = 50 49 | # Divide learning rate by this value 50 | mdlParams['LRstep'] = 5 51 | # Maximum number of training iterations 52 | mdlParams['training_steps'] = 60 #250 53 | # Display error every X steps 54 | mdlParams['display_step'] = 10 55 | # Scale? 56 | mdlParams['scale_targets'] = False 57 | # Peak at test error during training? (generally, dont do this!) 58 | mdlParams['peak_at_testerr'] = False 59 | # Print trainerr 60 | mdlParams['print_trainerr'] = False 61 | # Subtract trainset mean? 62 | mdlParams['subtract_set_mean'] = False 63 | mdlParams['setMean'] = np.array([0.0, 0.0, 0.0]) 64 | mdlParams['setStd'] = np.array([1.0, 1.0, 1.0]) 65 | 66 | # Data AUG 67 | #mdlParams['full_color_distort'] = True 68 | mdlParams['autoaugment'] = False 69 | mdlParams['flip_lr_ud'] = True 70 | mdlParams['full_rot'] = 180 71 | mdlParams['scale'] = (0.8,1.2) 72 | mdlParams['shear'] = 10 73 | mdlParams['cutout'] = 16 74 | 75 | # Meta settings 76 | mdlParams['meta_features'] = ['age_num','sex_oh','loc_oh'] 77 | mdlParams['meta_feature_sizes'] = [1,8,2] 78 | mdlParams['encode_nan'] = False 79 | # Pretrained model from task 1 80 | mdlParams['model_load_path'] = mdlParams_['pathBase']+'/data/isic/2019.test_effb0_ss' 81 | mdlParams['fc_layers_before'] = [256,256] 82 | # Factor for scaling up the FC layer 83 | scale_up_with_larger_b = 1.0 84 | mdlParams['fc_layers_after'] = [int(1024*scale_up_with_larger_b)] 85 | mdlParams['freeze_cnn'] = True 86 | mdlParams['learning_rate_meta'] = 0.00001 87 | # each feature is set to missing with this prob 88 | mdlParams['drop_augment'] = 0.1 89 | # Normal dropout in fc layers 90 | mdlParams['dropout_meta'] = 0.4 91 | mdlParams['scale_features'] = True 92 | 93 | ### Data ### 94 | mdlParams['preload'] = False 95 | # Labels first 96 | # Targets, as dictionary, indexed by im file name 97 | mdlParams['labels_dict'] = {} 98 | path1 = mdlParams['dataDir'] + '/labels/' 99 | # All sets 100 | allSets = glob(path1 + '*/') 101 | # Go through all sets 102 | for i in range(len(allSets)): 103 | # Check if want to include this dataset 104 | foundSet = False 105 | for j in range(len(mdlParams['dataset_names'])): 106 | if mdlParams['dataset_names'][j] in allSets[i]: 107 | foundSet = True 108 | if not foundSet: 109 | continue 110 | # Find csv file 111 | files = sorted(glob(allSets[i]+'*')) 112 | for j in range(len(files)): 113 | if 'csv' in files[j]: 114 | break 115 | # Load csv file 116 | with open(files[j], newline='') as csvfile: 117 | labels_str = csv.reader(csvfile, delimiter=',', quotechar='|') 118 | for row in labels_str: 119 | if 'image' == row[0]: 120 | continue 121 | #if 'ISIC' in row[0] and '_downsampled' in row[0]: 122 | # print(row[0]) 123 | if row[0] + '_downsampled' in mdlParams['labels_dict']: 124 | print("removed",row[0] + '_downsampled') 125 | continue 126 | if mdlParams['numClasses'] == 7: 127 | mdlParams['labels_dict'][row[0]] = np.array([int(float(row[1])),int(float(row[2])),int(float(row[3])),int(float(row[4])),int(float(row[5])),int(float(row[6])),int(float(row[7]))]) 128 | elif mdlParams['numClasses'] == 8: 129 | if len(row) < 9 or row[8] == '': 130 | class_8 = 0 131 | else: 132 | class_8 = int(float(row[8])) 133 | mdlParams['labels_dict'][row[0]] = np.array([int(float(row[1])),int(float(row[2])),int(float(row[3])),int(float(row[4])),int(float(row[5])),int(float(row[6])),int(float(row[7])),class_8]) 134 | elif mdlParams['numClasses'] == 9: 135 | if len(row) < 9 or row[8] == '': 136 | class_8 = 0 137 | else: 138 | class_8 = int(float(row[8])) 139 | if len(row) < 10 or row[9] == '': 140 | class_9 = 0 141 | else: 142 | class_9 = int(float(row[9])) 143 | mdlParams['labels_dict'][row[0]] = np.array([int(float(row[1])),int(float(row[2])),int(float(row[3])),int(float(row[4])),int(float(row[5])),int(float(row[6])),int(float(row[7])),class_8,class_9]) 144 | # Load meta data 145 | mdlParams['meta_dict'] = {} 146 | path1 = mdlParams['dataDir'] + '/meta_data/' 147 | # All sets 148 | allSets = glob(path1 + '*/') 149 | # Go through all sets 150 | for i in range(len(allSets)): 151 | # Check if want to include this dataset 152 | foundSet = False 153 | for j in range(len(mdlParams['dataset_names'])): 154 | if mdlParams['dataset_names'][j] in allSets[i]: 155 | foundSet = True 156 | if not foundSet: 157 | continue 158 | # Find csv file 159 | files = sorted(glob(allSets[i]+'*')) 160 | for j in range(len(files)): 161 | if '.pkl' in files[j]: 162 | break 163 | # Open and load 164 | with open(files[j],'rb') as f: 165 | meta_data = pickle.load(f) 166 | # Write into dict 167 | for k in range(len(meta_data['im_name'])): 168 | feature_vector = [] 169 | if 'age_oh' in mdlParams['meta_features']: 170 | if mdlParams['encode_nan']: 171 | feature_vector.append(meta_data['age_oh'][k,:]) 172 | else: 173 | feature_vector.append(meta_data['age_oh'][k,1:]) 174 | if 'age_num' in mdlParams['meta_features']: 175 | feature_vector.append(np.array([meta_data['age_num'][k]])) 176 | if 'loc_oh' in mdlParams['meta_features']: 177 | if mdlParams['encode_nan']: 178 | feature_vector.append(meta_data['loc_oh'][k,:]) 179 | else: 180 | feature_vector.append(meta_data['loc_oh'][k,1:]) 181 | if 'sex_oh' in mdlParams['meta_features']: 182 | if mdlParams['encode_nan']: 183 | feature_vector.append(meta_data['sex_oh'][k,:]) 184 | else: 185 | feature_vector.append(meta_data['sex_oh'][k,1:]) 186 | 187 | #print(feature_vector) 188 | feature_vector = np.concatenate(feature_vector,axis=0) 189 | #print("feature vector shape",feature_vector.shape) 190 | mdlParams['meta_dict'][meta_data['im_name'][k]] = feature_vector 191 | 192 | 193 | # Save all im paths here 194 | mdlParams['im_paths'] = [] 195 | mdlParams['labels_list'] = [] 196 | mdlParams['meta_list'] = [] 197 | # Define the sets 198 | path1 = mdlParams['dataDir'] + '/images/' 199 | # All sets 200 | allSets = sorted(glob(path1 + '*/')) 201 | # Ids which name the folders 202 | # Make official first dataset 203 | for i in range(len(allSets)): 204 | if mdlParams['dataset_names'][0] in allSets[i]: 205 | temp = allSets[i] 206 | allSets.remove(allSets[i]) 207 | allSets.insert(0, temp) 208 | print(allSets) 209 | # Set of keys, for marking old HAM10000 210 | mdlParams['key_list'] = [] 211 | if mdlParams['exclude_inds']: 212 | with open(mdlParams['saveDir'] + 'indices_exclude.pkl','rb') as f: 213 | indices_exclude = pickle.load(f) 214 | exclude_list = [] 215 | for i in range(len(allSets)): 216 | # All files in that set 217 | files = sorted(glob(allSets[i]+'*')) 218 | # Check if there is something in there, if not, discard 219 | if len(files) == 0: 220 | continue 221 | # Check if want to include this dataset 222 | foundSet = False 223 | for j in range(len(mdlParams['dataset_names'])): 224 | if mdlParams['dataset_names'][j] in allSets[i]: 225 | foundSet = True 226 | if not foundSet: 227 | continue 228 | for j in range(len(files)): 229 | if '.jpg' in files[j] or '.jpeg' in files[j] or '.JPG' in files[j] or '.JPEG' in files[j] or '.png' in files[j] or '.PNG' in files[j]: 230 | # Add according label, find it first 231 | found_already = False 232 | for key in mdlParams['labels_dict']: 233 | if key + mdlParams['file_ending'] in files[j]: 234 | if found_already: 235 | print("Found already:",key,files[j]) 236 | mdlParams['key_list'].append(key) 237 | mdlParams['labels_list'].append(mdlParams['labels_dict'][key]) 238 | mdlParams['meta_list'].append(mdlParams['meta_dict'][key]) 239 | found_already = True 240 | if found_already: 241 | mdlParams['im_paths'].append(files[j]) 242 | if mdlParams['exclude_inds']: 243 | for key in indices_exclude: 244 | if key in files[j]: 245 | exclude_list.append(indices_exclude[key]) 246 | # Convert label list to array 247 | mdlParams['labels_array'] = np.array(mdlParams['labels_list']) 248 | print(np.mean(mdlParams['labels_array'],axis=0)) 249 | # Meta data 250 | mdlParams['meta_array'] = np.array(mdlParams['meta_list']) 251 | print("final meta shape",mdlParams['meta_array'].shape) 252 | # Create indices list with HAM10000 only 253 | mdlParams['HAM10000_inds'] = [] 254 | HAM_START = 24306 255 | HAM_END = 34320 256 | for j in range(len(mdlParams['key_list'])): 257 | try: 258 | curr_id = [int(s) for s in re.findall(r'\d+',mdlParams['key_list'][j])][-1] 259 | except: 260 | continue 261 | if curr_id >= HAM_START and curr_id <= HAM_END: 262 | mdlParams['HAM10000_inds'].append(j) 263 | mdlParams['HAM10000_inds'] = np.array(mdlParams['HAM10000_inds']) 264 | print("Len ham",len(mdlParams['HAM10000_inds'])) 265 | # Perhaps preload images 266 | if mdlParams['preload']: 267 | mdlParams['images_array'] = np.zeros([len(mdlParams['im_paths']),mdlParams['input_size_load'][0],mdlParams['input_size_load'][1],mdlParams['input_size_load'][2]],dtype=np.uint8) 268 | for i in range(len(mdlParams['im_paths'])): 269 | x = scipy.ndimage.imread(mdlParams['im_paths'][i]) 270 | #x = x.astype(np.float32) 271 | # Scale to 0-1 272 | #min_x = np.min(x) 273 | #max_x = np.max(x) 274 | #x = (x-min_x)/(max_x-min_x) 275 | mdlParams['images_array'][i,:,:,:] = x 276 | if i%1000 == 0: 277 | print(i+1,"images loaded...") 278 | if mdlParams['subtract_set_mean']: 279 | mdlParams['images_means'] = np.zeros([len(mdlParams['im_paths']),3]) 280 | for i in range(len(mdlParams['im_paths'])): 281 | x = scipy.ndimage.imread(mdlParams['im_paths'][i]) 282 | x = x.astype(np.float32) 283 | # Scale to 0-1 284 | min_x = np.min(x) 285 | max_x = np.max(x) 286 | x = (x-min_x)/(max_x-min_x) 287 | mdlParams['images_means'][i,:] = np.mean(x,(0,1)) 288 | if i%1000 == 0: 289 | print(i+1,"images processed for mean...") 290 | 291 | ### Define Indices ### 292 | with open(mdlParams['saveDir'] + 'indices_isic2019.pkl','rb') as f: 293 | indices = pickle.load(f) 294 | mdlParams['trainIndCV'] = indices['trainIndCV'] 295 | mdlParams['valIndCV'] = indices['valIndCV'] 296 | if mdlParams['exclude_inds']: 297 | exclude_list = np.array(exclude_list) 298 | all_inds = np.arange(len(mdlParams['im_paths'])) 299 | exclude_inds = all_inds[exclude_list.astype(bool)] 300 | for i in range(len(mdlParams['trainIndCV'])): 301 | mdlParams['trainIndCV'][i] = np.setdiff1d(mdlParams['trainIndCV'][i],exclude_inds) 302 | for i in range(len(mdlParams['valIndCV'])): 303 | mdlParams['valIndCV'][i] = np.setdiff1d(mdlParams['valIndCV'][i],exclude_inds) 304 | # Consider case with more than one set 305 | if len(mdlParams['dataset_names']) > 1: 306 | restInds = np.array(np.arange(25331,mdlParams['labels_array'].shape[0])) 307 | for i in range(mdlParams['numCV']): 308 | mdlParams['trainIndCV'][i] = np.concatenate((mdlParams['trainIndCV'][i],restInds)) 309 | print("Train") 310 | for i in range(len(mdlParams['trainIndCV'])): 311 | print(mdlParams['trainIndCV'][i].shape) 312 | print("Val") 313 | for i in range(len(mdlParams['valIndCV'])): 314 | print(mdlParams['valIndCV'][i].shape) 315 | 316 | # Use this for ordered multi crops 317 | if mdlParams['orderedCrop']: 318 | # Crop positions, always choose multiCropEval to be 4, 9, 16, 25, etc. 319 | mdlParams['cropPositions'] = np.zeros([len(mdlParams['im_paths']),mdlParams['multiCropEval'],2],dtype=np.int64) 320 | #mdlParams['imSizes'] = np.zeros([len(mdlParams['im_paths']),mdlParams['multiCropEval'],2],dtype=np.int64) 321 | for u in range(len(mdlParams['im_paths'])): 322 | height, width = imagesize.get(mdlParams['im_paths'][u]) 323 | if width < mdlParams['input_size'][0]: 324 | height = int(mdlParams['input_size'][0]/float(width))*height 325 | width = mdlParams['input_size'][0] 326 | if height < mdlParams['input_size'][0]: 327 | width = int(mdlParams['input_size'][0]/float(height))*width 328 | height = mdlParams['input_size'][0] 329 | ind = 0 330 | for i in range(np.int32(np.sqrt(mdlParams['multiCropEval']))): 331 | for j in range(np.int32(np.sqrt(mdlParams['multiCropEval']))): 332 | mdlParams['cropPositions'][u,ind,0] = mdlParams['input_size'][0]/2+i*((width-mdlParams['input_size'][1])/(np.sqrt(mdlParams['multiCropEval'])-1)) 333 | mdlParams['cropPositions'][u,ind,1] = mdlParams['input_size'][1]/2+j*((height-mdlParams['input_size'][0])/(np.sqrt(mdlParams['multiCropEval'])-1)) 334 | #mdlParams['imSizes'][u,ind,0] = curr_im_size[0] 335 | 336 | ind += 1 337 | # Sanity checks 338 | #print("Positions",mdlParams['cropPositions']) 339 | # Test image sizes 340 | height = mdlParams['input_size'][0] 341 | width = mdlParams['input_size'][1] 342 | for u in range(len(mdlParams['im_paths'])): 343 | height_test, width_test = imagesize.get(mdlParams['im_paths'][u]) 344 | if width_test < mdlParams['input_size'][0]: 345 | height_test = int(mdlParams['input_size'][0]/float(width_test))*height_test 346 | width_test = mdlParams['input_size'][0] 347 | if height_test < mdlParams['input_size'][0]: 348 | width_test = int(mdlParams['input_size'][0]/float(height_test))*width_test 349 | height_test = mdlParams['input_size'][0] 350 | test_im = np.zeros([width_test,height_test]) 351 | for i in range(mdlParams['multiCropEval']): 352 | im_crop = test_im[np.int32(mdlParams['cropPositions'][u,i,0]-height/2):np.int32(mdlParams['cropPositions'][u,i,0]-height/2)+height,np.int32(mdlParams['cropPositions'][u,i,1]-width/2):np.int32(mdlParams['cropPositions'][u,i,1]-width/2)+width] 353 | if im_crop.shape[0] != mdlParams['input_size'][0]: 354 | print("Wrong shape",im_crop.shape[0],mdlParams['im_paths'][u]) 355 | if im_crop.shape[1] != mdlParams['input_size'][1]: 356 | print("Wrong shape",im_crop.shape[1],mdlParams['im_paths'][u]) 357 | return mdlParams -------------------------------------------------------------------------------- /cfgs/2019/test_effb0_rr_meta.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import h5py 4 | import re 5 | import csv 6 | import numpy as np 7 | from glob import glob 8 | import scipy 9 | import pickle 10 | import imagesize 11 | 12 | def init(mdlParams_): 13 | mdlParams = {} 14 | # Save summaries and model here 15 | mdlParams['saveDir'] = mdlParams_['pathBase']+'/data/isic/' 16 | # Data is loaded from here 17 | mdlParams['dataDir'] = mdlParams_['pathBase']+'/data/isic/2019' 18 | 19 | ### Model Selection ### 20 | mdlParams['model_type'] = 'efficientnet-b0' 21 | mdlParams['dataset_names'] = ['official']#,'sevenpoint_rez3_ll'] 22 | mdlParams['file_ending'] = '.png' 23 | mdlParams['exclude_inds'] = False 24 | mdlParams['same_sized_crops'] = False 25 | mdlParams['multiCropEval'] = 9 26 | mdlParams['var_im_size'] = False 27 | mdlParams['orderedCrop'] = False 28 | mdlParams['voting_scheme'] = 'average' 29 | mdlParams['classification'] = True 30 | mdlParams['balance_classes'] = 9 31 | mdlParams['extra_fac'] = 1.0 32 | mdlParams['numClasses'] = 9 33 | mdlParams['no_c9_eval'] = True 34 | mdlParams['numOut'] = mdlParams['numClasses'] 35 | mdlParams['numCV'] = 5 36 | mdlParams['trans_norm_first'] = True 37 | # Deterministic cropping 38 | mdlParams['deterministic_eval'] = True 39 | mdlParams['numCropPositions'] = 1 40 | num_scales = 4 41 | all_scales = [1.0,0.5,0.75,0.25,0.9,0.6,0.4] 42 | mdlParams['cropScales'] = all_scales[:num_scales] 43 | mdlParams['cropFlipping'] = 4 44 | mdlParams['multiCropEval'] = mdlParams['numCropPositions']*len(mdlParams['cropScales'])*mdlParams['cropFlipping'] 45 | mdlParams['offset_crop'] = 0.2 46 | # Scale up for b1-b7 47 | mdlParams['input_size'] = [224,224,3] 48 | 49 | ### Training Parameters ### 50 | # Batch size 51 | mdlParams['batchSize'] = 20#*len(mdlParams['numGPUs']) 52 | # Initial learning rate 53 | mdlParams['learning_rate'] = 0.000015#*len(mdlParams['numGPUs']) 54 | # Lower learning rate after no improvement over 100 epochs 55 | mdlParams['lowerLRAfter'] = 25 56 | # If there is no validation set, start lowering the LR after X steps 57 | mdlParams['lowerLRat'] = 50 58 | # Divide learning rate by this value 59 | mdlParams['LRstep'] = 5 60 | # Maximum number of training iterations 61 | mdlParams['training_steps'] = 60 #250 62 | # Display error every X steps 63 | mdlParams['display_step'] = 10 64 | # Scale? 65 | mdlParams['scale_targets'] = False 66 | # Peak at test error during training? (generally, dont do this!) 67 | mdlParams['peak_at_testerr'] = False 68 | # Print trainerr 69 | mdlParams['print_trainerr'] = False 70 | # Subtract trainset mean? 71 | mdlParams['subtract_set_mean'] = False 72 | mdlParams['setMean'] = np.array([0.0, 0.0, 0.0]) 73 | mdlParams['setStd'] = np.array([1.0, 1.0, 1.0]) 74 | 75 | # Data AUG 76 | #mdlParams['full_color_distort'] = True 77 | mdlParams['autoaugment'] = False 78 | mdlParams['flip_lr_ud'] = True 79 | mdlParams['full_rot'] = 180 80 | mdlParams['scale'] = (0.8,1.2) 81 | mdlParams['shear'] = 10 82 | mdlParams['cutout'] = 16 83 | 84 | # Meta settings 85 | mdlParams['meta_features'] = ['age_num','sex_oh','loc_oh'] 86 | mdlParams['meta_feature_sizes'] = [1,8,2] 87 | mdlParams['encode_nan'] = False 88 | mdlParams['model_load_path'] = '/home/Gessert/data/isic/2019.test_effb0_rr' 89 | mdlParams['fc_layers_before'] = [256,256] 90 | mdlParams['fc_layers_after'] = [1024] 91 | mdlParams['freeze_cnn'] = True 92 | mdlParams['learning_rate_meta'] = 0.00001 93 | # each feature is set to missing with this prob 94 | mdlParams['drop_augment'] = 0.1 95 | mdlParams['dropout_meta'] = 0.4 96 | mdlParams['scale_features'] = True 97 | 98 | ### Data ### 99 | mdlParams['preload'] = False 100 | # Labels first 101 | # Targets, as dictionary, indexed by im file name 102 | mdlParams['labels_dict'] = {} 103 | path1 = mdlParams['dataDir'] + '/labels/' 104 | # All sets 105 | allSets = glob(path1 + '*/') 106 | # Go through all sets 107 | for i in range(len(allSets)): 108 | # Check if want to include this dataset 109 | foundSet = False 110 | for j in range(len(mdlParams['dataset_names'])): 111 | if mdlParams['dataset_names'][j] in allSets[i]: 112 | foundSet = True 113 | if not foundSet: 114 | continue 115 | # Find csv file 116 | files = sorted(glob(allSets[i]+'*')) 117 | for j in range(len(files)): 118 | if 'csv' in files[j]: 119 | break 120 | # Load csv file 121 | with open(files[j], newline='') as csvfile: 122 | labels_str = csv.reader(csvfile, delimiter=',', quotechar='|') 123 | for row in labels_str: 124 | if 'image' == row[0]: 125 | continue 126 | #if 'ISIC' in row[0] and '_downsampled' in row[0]: 127 | # print(row[0]) 128 | if row[0] + '_downsampled' in mdlParams['labels_dict']: 129 | print("removed",row[0] + '_downsampled') 130 | continue 131 | if mdlParams['numClasses'] == 7: 132 | mdlParams['labels_dict'][row[0]] = np.array([int(float(row[1])),int(float(row[2])),int(float(row[3])),int(float(row[4])),int(float(row[5])),int(float(row[6])),int(float(row[7]))]) 133 | elif mdlParams['numClasses'] == 8: 134 | if len(row) < 9 or row[8] == '': 135 | class_8 = 0 136 | else: 137 | class_8 = int(float(row[8])) 138 | mdlParams['labels_dict'][row[0]] = np.array([int(float(row[1])),int(float(row[2])),int(float(row[3])),int(float(row[4])),int(float(row[5])),int(float(row[6])),int(float(row[7])),class_8]) 139 | elif mdlParams['numClasses'] == 9: 140 | if len(row) < 9 or row[8] == '': 141 | class_8 = 0 142 | else: 143 | class_8 = int(float(row[8])) 144 | if len(row) < 10 or row[9] == '': 145 | class_9 = 0 146 | else: 147 | class_9 = int(float(row[9])) 148 | mdlParams['labels_dict'][row[0]] = np.array([int(float(row[1])),int(float(row[2])),int(float(row[3])),int(float(row[4])),int(float(row[5])),int(float(row[6])),int(float(row[7])),class_8,class_9]) 149 | 150 | # Load meta data 151 | mdlParams['meta_dict'] = {} 152 | path1 = mdlParams['dataDir'] + '/meta_data/' 153 | # All sets 154 | allSets = glob(path1 + '*/') 155 | # Go through all sets 156 | for i in range(len(allSets)): 157 | # Check if want to include this dataset 158 | foundSet = False 159 | for j in range(len(mdlParams['dataset_names'])): 160 | if mdlParams['dataset_names'][j] in allSets[i]: 161 | foundSet = True 162 | if not foundSet: 163 | continue 164 | # Find csv file 165 | files = sorted(glob(allSets[i]+'*')) 166 | for j in range(len(files)): 167 | if '.pkl' in files[j]: 168 | break 169 | # Open and load 170 | with open(files[j],'rb') as f: 171 | meta_data = pickle.load(f) 172 | # Write into dict 173 | for k in range(len(meta_data['im_name'])): 174 | feature_vector = [] 175 | if 'age_oh' in mdlParams['meta_features']: 176 | if mdlParams['encode_nan']: 177 | feature_vector.append(meta_data['age_oh'][k,:]) 178 | else: 179 | feature_vector.append(meta_data['age_oh'][k,1:]) 180 | if 'age_num' in mdlParams['meta_features']: 181 | feature_vector.append(np.array([meta_data['age_num'][k]])) 182 | if 'loc_oh' in mdlParams['meta_features']: 183 | if mdlParams['encode_nan']: 184 | feature_vector.append(meta_data['loc_oh'][k,:]) 185 | else: 186 | feature_vector.append(meta_data['loc_oh'][k,1:]) 187 | if 'sex_oh' in mdlParams['meta_features']: 188 | if mdlParams['encode_nan']: 189 | feature_vector.append(meta_data['sex_oh'][k,:]) 190 | else: 191 | feature_vector.append(meta_data['sex_oh'][k,1:]) 192 | 193 | #print(feature_vector) 194 | feature_vector = np.concatenate(feature_vector,axis=0) 195 | #print("feature vector shape",feature_vector.shape) 196 | mdlParams['meta_dict'][meta_data['im_name'][k]] = feature_vector 197 | 198 | # Save all im paths here 199 | mdlParams['im_paths'] = [] 200 | mdlParams['labels_list'] = [] 201 | mdlParams['meta_list'] = [] 202 | # Define the sets 203 | path1 = mdlParams['dataDir'] + '/images/' 204 | # All sets 205 | allSets = sorted(glob(path1 + '*/')) 206 | # Ids which name the folders 207 | # Make official first dataset 208 | for i in range(len(allSets)): 209 | if mdlParams['dataset_names'][0] in allSets[i]: 210 | temp = allSets[i] 211 | allSets.remove(allSets[i]) 212 | allSets.insert(0, temp) 213 | print(allSets) 214 | # Set of keys, for marking old HAM10000 215 | mdlParams['key_list'] = [] 216 | if mdlParams['exclude_inds']: 217 | with open(mdlParams['saveDir'] + 'indices_exclude.pkl','rb') as f: 218 | indices_exclude = pickle.load(f) 219 | exclude_list = [] 220 | for i in range(len(allSets)): 221 | # All files in that set 222 | files = sorted(glob(allSets[i]+'*')) 223 | # Check if there is something in there, if not, discard 224 | if len(files) == 0: 225 | continue 226 | # Check if want to include this dataset 227 | foundSet = False 228 | for j in range(len(mdlParams['dataset_names'])): 229 | if mdlParams['dataset_names'][j] in allSets[i]: 230 | foundSet = True 231 | if not foundSet: 232 | continue 233 | for j in range(len(files)): 234 | if '.jpg' in files[j] or '.jpeg' in files[j] or '.JPG' in files[j] or '.JPEG' in files[j] or '.png' in files[j] or '.PNG' in files[j]: 235 | # Add according label, find it first 236 | found_already = False 237 | for key in mdlParams['labels_dict']: 238 | if key + mdlParams['file_ending'] in files[j]: 239 | if found_already: 240 | print("Found already:",key,files[j]) 241 | mdlParams['key_list'].append(key) 242 | mdlParams['labels_list'].append(mdlParams['labels_dict'][key]) 243 | mdlParams['meta_list'].append(mdlParams['meta_dict'][key]) 244 | found_already = True 245 | if found_already: 246 | mdlParams['im_paths'].append(files[j]) 247 | if mdlParams['exclude_inds']: 248 | for key in indices_exclude: 249 | if key in files[j]: 250 | exclude_list.append(indices_exclude[key]) 251 | # Convert label list to array 252 | mdlParams['labels_array'] = np.array(mdlParams['labels_list']) 253 | print(np.mean(mdlParams['labels_array'],axis=0)) 254 | # Meta data 255 | mdlParams['meta_array'] = np.array(mdlParams['meta_list']) 256 | print("final meta shape",mdlParams['meta_array'].shape) 257 | # Create indices list with HAM10000 only 258 | mdlParams['HAM10000_inds'] = [] 259 | HAM_START = 24306 260 | HAM_END = 34320 261 | for j in range(len(mdlParams['key_list'])): 262 | try: 263 | curr_id = [int(s) for s in re.findall(r'\d+',mdlParams['key_list'][j])][-1] 264 | except: 265 | continue 266 | if curr_id >= HAM_START and curr_id <= HAM_END: 267 | mdlParams['HAM10000_inds'].append(j) 268 | mdlParams['HAM10000_inds'] = np.array(mdlParams['HAM10000_inds']) 269 | print("Len ham",len(mdlParams['HAM10000_inds'])) 270 | # Perhaps preload images 271 | if mdlParams['preload']: 272 | mdlParams['images_array'] = np.zeros([len(mdlParams['im_paths']),mdlParams['input_size_load'][0],mdlParams['input_size_load'][1],mdlParams['input_size_load'][2]],dtype=np.uint8) 273 | for i in range(len(mdlParams['im_paths'])): 274 | x = scipy.ndimage.imread(mdlParams['im_paths'][i]) 275 | #x = x.astype(np.float32) 276 | # Scale to 0-1 277 | #min_x = np.min(x) 278 | #max_x = np.max(x) 279 | #x = (x-min_x)/(max_x-min_x) 280 | mdlParams['images_array'][i,:,:,:] = x 281 | if i%1000 == 0: 282 | print(i+1,"images loaded...") 283 | if mdlParams['subtract_set_mean']: 284 | mdlParams['images_means'] = np.zeros([len(mdlParams['im_paths']),3]) 285 | for i in range(len(mdlParams['im_paths'])): 286 | x = scipy.ndimage.imread(mdlParams['im_paths'][i]) 287 | x = x.astype(np.float32) 288 | # Scale to 0-1 289 | min_x = np.min(x) 290 | max_x = np.max(x) 291 | x = (x-min_x)/(max_x-min_x) 292 | mdlParams['images_means'][i,:] = np.mean(x,(0,1)) 293 | if i%1000 == 0: 294 | print(i+1,"images processed for mean...") 295 | 296 | ### Define Indices ### 297 | with open(mdlParams['saveDir'] + 'indices_isic2019.pkl','rb') as f: 298 | indices = pickle.load(f) 299 | mdlParams['trainIndCV'] = indices['trainIndCV'] 300 | mdlParams['valIndCV'] = indices['valIndCV'] 301 | if mdlParams['exclude_inds']: 302 | exclude_list = np.array(exclude_list) 303 | all_inds = np.arange(len(mdlParams['im_paths'])) 304 | exclude_inds = all_inds[exclude_list.astype(bool)] 305 | for i in range(len(mdlParams['trainIndCV'])): 306 | mdlParams['trainIndCV'][i] = np.setdiff1d(mdlParams['trainIndCV'][i],exclude_inds) 307 | for i in range(len(mdlParams['valIndCV'])): 308 | mdlParams['valIndCV'][i] = np.setdiff1d(mdlParams['valIndCV'][i],exclude_inds) 309 | # Consider case with more than one set 310 | if len(mdlParams['dataset_names']) > 1: 311 | restInds = np.array(np.arange(25331,mdlParams['labels_array'].shape[0])) 312 | for i in range(mdlParams['numCV']): 313 | mdlParams['trainIndCV'][i] = np.concatenate((mdlParams['trainIndCV'][i],restInds)) 314 | print("Train") 315 | for i in range(len(mdlParams['trainIndCV'])): 316 | print(mdlParams['trainIndCV'][i].shape) 317 | print("Val") 318 | for i in range(len(mdlParams['valIndCV'])): 319 | print(mdlParams['valIndCV'][i].shape) 320 | 321 | # Use this for ordered multi crops 322 | if mdlParams['orderedCrop']: 323 | # Crop positions, always choose multiCropEval to be 4, 9, 16, 25, etc. 324 | mdlParams['cropPositions'] = np.zeros([len(mdlParams['im_paths']),mdlParams['multiCropEval'],2],dtype=np.int64) 325 | #mdlParams['imSizes'] = np.zeros([len(mdlParams['im_paths']),mdlParams['multiCropEval'],2],dtype=np.int64) 326 | for u in range(len(mdlParams['im_paths'])): 327 | height, width = imagesize.get(mdlParams['im_paths'][u]) 328 | if width < mdlParams['input_size'][0]: 329 | height = int(mdlParams['input_size'][0]/float(width))*height 330 | width = mdlParams['input_size'][0] 331 | if height < mdlParams['input_size'][0]: 332 | width = int(mdlParams['input_size'][0]/float(height))*width 333 | height = mdlParams['input_size'][0] 334 | ind = 0 335 | for i in range(np.int32(np.sqrt(mdlParams['multiCropEval']))): 336 | for j in range(np.int32(np.sqrt(mdlParams['multiCropEval']))): 337 | mdlParams['cropPositions'][u,ind,0] = mdlParams['input_size'][0]/2+i*((width-mdlParams['input_size'][1])/(np.sqrt(mdlParams['multiCropEval'])-1)) 338 | mdlParams['cropPositions'][u,ind,1] = mdlParams['input_size'][1]/2+j*((height-mdlParams['input_size'][0])/(np.sqrt(mdlParams['multiCropEval'])-1)) 339 | #mdlParams['imSizes'][u,ind,0] = curr_im_size[0] 340 | 341 | ind += 1 342 | # Sanity checks 343 | #print("Positions",mdlParams['cropPositions']) 344 | # Test image sizes 345 | height = mdlParams['input_size'][0] 346 | width = mdlParams['input_size'][1] 347 | for u in range(len(mdlParams['im_paths'])): 348 | height_test, width_test = imagesize.get(mdlParams['im_paths'][u]) 349 | if width_test < mdlParams['input_size'][0]: 350 | height_test = int(mdlParams['input_size'][0]/float(width_test))*height_test 351 | width_test = mdlParams['input_size'][0] 352 | if height_test < mdlParams['input_size'][0]: 353 | width_test = int(mdlParams['input_size'][0]/float(height_test))*width_test 354 | height_test = mdlParams['input_size'][0] 355 | test_im = np.zeros([width_test,height_test]) 356 | for i in range(mdlParams['multiCropEval']): 357 | im_crop = test_im[np.int32(mdlParams['cropPositions'][u,i,0]-height/2):np.int32(mdlParams['cropPositions'][u,i,0]-height/2)+height,np.int32(mdlParams['cropPositions'][u,i,1]-width/2):np.int32(mdlParams['cropPositions'][u,i,1]-width/2)+width] 358 | if im_crop.shape[0] != mdlParams['input_size'][0]: 359 | print("Wrong shape",im_crop.shape[0],mdlParams['im_paths'][u]) 360 | if im_crop.shape[1] != mdlParams['input_size'][1]: 361 | print("Wrong shape",im_crop.shape[1],mdlParams['im_paths'][u]) 362 | return mdlParams -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.optim as optim 4 | from torch.optim import lr_scheduler 5 | import torchvision 6 | from torchvision import datasets, models as tv_models 7 | from torch.utils.data import DataLoader 8 | from torchsummary import summary 9 | import numpy as np 10 | from scipy import io 11 | import threading 12 | import pickle 13 | from pathlib import Path 14 | import math 15 | import os 16 | import sys 17 | from glob import glob 18 | import re 19 | import gc 20 | import importlib 21 | import time 22 | import sklearn.preprocessing 23 | import utils 24 | from sklearn.utils import class_weight 25 | import psutil 26 | import models 27 | 28 | # add configuration file 29 | # Dictionary for model configuration 30 | mdlParams = {} 31 | 32 | # Import machine config 33 | pc_cfg = importlib.import_module('pc_cfgs.'+sys.argv[1]) 34 | mdlParams.update(pc_cfg.mdlParams) 35 | 36 | 37 | # Import model config 38 | model_cfg = importlib.import_module('cfgs.'+sys.argv[2]) 39 | mdlParams_model = model_cfg.init(mdlParams) 40 | mdlParams.update(mdlParams_model) 41 | 42 | # Indicate training 43 | mdlParams['trainSetState'] = 'train' 44 | 45 | # Path name from filename 46 | mdlParams['saveDirBase'] = mdlParams['saveDir'] + sys.argv[2] 47 | 48 | # Set visible devices 49 | if 'gpu' in sys.argv[3]: 50 | mdlParams['numGPUs']= [[int(s) for s in re.findall(r'\d+',sys.argv[3])][-1]] 51 | cuda_str = "" 52 | for i in range(len(mdlParams['numGPUs'])): 53 | cuda_str = cuda_str + str(mdlParams['numGPUs'][i]) 54 | if i is not len(mdlParams['numGPUs'])-1: 55 | cuda_str = cuda_str + "," 56 | print("Devices to use:",cuda_str) 57 | os.environ["CUDA_VISIBLE_DEVICES"] = cuda_str 58 | 59 | # Specify val set to train for 60 | if len(sys.argv) > 4: 61 | mdlParams['cv_subset'] = [int(s) for s in re.findall(r'\d+',sys.argv[4])] 62 | print("Training validation sets",mdlParams['cv_subset']) 63 | 64 | # Check if there is a validation set, if not, evaluate train error instead 65 | if 'valIndCV' in mdlParams or 'valInd' in mdlParams: 66 | eval_set = 'valInd' 67 | print("Evaluating on validation set during training.") 68 | else: 69 | eval_set = 'trainInd' 70 | print("No validation set, evaluating on training set during training.") 71 | 72 | # Check if there were previous ones that have alreary bin learned 73 | prevFile = Path(mdlParams['saveDirBase'] + '/CV.pkl') 74 | #print(prevFile) 75 | if prevFile.exists(): 76 | print("Part of CV already done") 77 | with open(mdlParams['saveDirBase'] + '/CV.pkl', 'rb') as f: 78 | allData = pickle.load(f) 79 | else: 80 | allData = {} 81 | allData['f1Best'] = {} 82 | allData['sensBest'] = {} 83 | allData['specBest'] = {} 84 | allData['accBest'] = {} 85 | allData['waccBest'] = {} 86 | allData['aucBest'] = {} 87 | allData['convergeTime'] = {} 88 | allData['bestPred'] = {} 89 | allData['targets'] = {} 90 | 91 | # Take care of CV 92 | if mdlParams.get('cv_subset',None) is not None: 93 | cv_set = mdlParams['cv_subset'] 94 | else: 95 | cv_set = range(mdlParams['numCV']) 96 | for cv in cv_set: 97 | # Check if this fold was already trained 98 | already_trained = False 99 | if 'valIndCV' in mdlParams: 100 | mdlParams['saveDir'] = mdlParams['saveDirBase'] + '/CVSet' + str(cv) 101 | if os.path.isdir(mdlParams['saveDirBase']): 102 | if os.path.isdir(mdlParams['saveDir']): 103 | all_max_iter = [] 104 | for name in os.listdir(mdlParams['saveDir']): 105 | int_list = [int(s) for s in re.findall(r'\d+',name)] 106 | if len(int_list) > 0: 107 | all_max_iter.append(int_list[-1]) 108 | #if '-' + str(mdlParams['training_steps'])+ '.pt' in name: 109 | # print("Fold %d already fully trained"%(cv)) 110 | # already_trained = True 111 | all_max_iter = np.array(all_max_iter) 112 | if len(all_max_iter) > 0 and np.max(all_max_iter) >= mdlParams['training_steps']: 113 | print("Fold %d already fully trained with %d iterations"%(cv,np.max(all_max_iter))) 114 | already_trained = True 115 | if already_trained: 116 | continue 117 | print("CV set",cv) 118 | # Reset model graph 119 | importlib.reload(models) 120 | #importlib.reload(torchvision) 121 | # Collect model variables 122 | modelVars = {} 123 | #print("here") 124 | modelVars['device'] = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 125 | print(modelVars['device']) 126 | # Def current CV set 127 | mdlParams['trainInd'] = mdlParams['trainIndCV'][cv] 128 | if 'valIndCV' in mdlParams: 129 | mdlParams['valInd'] = mdlParams['valIndCV'][cv] 130 | # Def current path for saving stuff 131 | if 'valIndCV' in mdlParams: 132 | mdlParams['saveDir'] = mdlParams['saveDirBase'] + '/CVSet' + str(cv) 133 | else: 134 | mdlParams['saveDir'] = mdlParams['saveDirBase'] 135 | # Create basepath if it doesnt exist yet 136 | if not os.path.isdir(mdlParams['saveDirBase']): 137 | os.mkdir(mdlParams['saveDirBase']) 138 | # Check if there is something to load 139 | load_old = 0 140 | if os.path.isdir(mdlParams['saveDir']): 141 | # Check if a checkpoint is in there 142 | if len([name for name in os.listdir(mdlParams['saveDir'])]) > 0: 143 | load_old = 1 144 | print("Loading old model") 145 | else: 146 | # Delete whatever is in there (nothing happens) 147 | filelist = [os.remove(mdlParams['saveDir'] +'/'+f) for f in os.listdir(mdlParams['saveDir'])] 148 | else: 149 | os.mkdir(mdlParams['saveDir']) 150 | # Save training progress in here 151 | save_dict = {} 152 | save_dict['acc'] = [] 153 | save_dict['loss'] = [] 154 | save_dict['wacc'] = [] 155 | save_dict['auc'] = [] 156 | save_dict['sens'] = [] 157 | save_dict['spec'] = [] 158 | save_dict['f1'] = [] 159 | save_dict['step_num'] = [] 160 | if mdlParams['print_trainerr']: 161 | save_dict_train = {} 162 | save_dict_train['acc'] = [] 163 | save_dict_train['loss'] = [] 164 | save_dict_train['wacc'] = [] 165 | save_dict_train['auc'] = [] 166 | save_dict_train['sens'] = [] 167 | save_dict_train['spec'] = [] 168 | save_dict_train['f1'] = [] 169 | save_dict_train['step_num'] = [] 170 | # Potentially calculate setMean to subtract 171 | if mdlParams['subtract_set_mean'] == 1: 172 | mdlParams['setMean'] = np.mean(mdlParams['images_means'][mdlParams['trainInd'],:],(0)) 173 | print("Set Mean",mdlParams['setMean']) 174 | 175 | # balance classes 176 | if mdlParams['balance_classes'] < 3 or mdlParams['balance_classes'] == 7 or mdlParams['balance_classes'] == 11: 177 | class_weights = class_weight.compute_class_weight('balanced',np.unique(np.argmax(mdlParams['labels_array'][mdlParams['trainInd'],:],1)),np.argmax(mdlParams['labels_array'][mdlParams['trainInd'],:],1)) 178 | print("Current class weights",class_weights) 179 | class_weights = class_weights*mdlParams['extra_fac'] 180 | print("Current class weights with extra",class_weights) 181 | elif mdlParams['balance_classes'] == 3 or mdlParams['balance_classes'] == 4: 182 | # Split training set by classes 183 | not_one_hot = np.argmax(mdlParams['labels_array'],1) 184 | mdlParams['class_indices'] = [] 185 | for i in range(mdlParams['numClasses']): 186 | mdlParams['class_indices'].append(np.where(not_one_hot==i)[0]) 187 | # Kick out non-trainind indices 188 | mdlParams['class_indices'][i] = np.setdiff1d(mdlParams['class_indices'][i],mdlParams['valInd']) 189 | #print("Class",i,mdlParams['class_indices'][i].shape,np.min(mdlParams['class_indices'][i]),np.max(mdlParams['class_indices'][i]),np.sum(mdlParams['labels_array'][np.int64(mdlParams['class_indices'][i]),:],0)) 190 | elif mdlParams['balance_classes'] == 5 or mdlParams['balance_classes'] == 6 or mdlParams['balance_classes'] == 13: 191 | # Other class balancing loss 192 | class_weights = 1.0/np.mean(mdlParams['labels_array'][mdlParams['trainInd'],:],axis=0) 193 | print("Current class weights",class_weights) 194 | if isinstance(mdlParams['extra_fac'], float): 195 | class_weights = np.power(class_weights,mdlParams['extra_fac']) 196 | else: 197 | class_weights = class_weights*mdlParams['extra_fac'] 198 | print("Current class weights with extra",class_weights) 199 | elif mdlParams['balance_classes'] == 9: 200 | # Only use official indicies for calculation 201 | print("Balance 9") 202 | indices_ham = mdlParams['trainInd'][mdlParams['trainInd'] < 25331] 203 | if mdlParams['numClasses'] == 9: 204 | class_weights_ = 1.0/np.mean(mdlParams['labels_array'][indices_ham,:8],axis=0) 205 | #print("class before",class_weights_) 206 | class_weights = np.zeros([mdlParams['numClasses']]) 207 | class_weights[:8] = class_weights_ 208 | class_weights[-1] = np.max(class_weights_) 209 | else: 210 | class_weights = 1.0/np.mean(mdlParams['labels_array'][indices_ham,:],axis=0) 211 | print("Current class weights",class_weights) 212 | if isinstance(mdlParams['extra_fac'], float): 213 | class_weights = np.power(class_weights,mdlParams['extra_fac']) 214 | else: 215 | class_weights = class_weights*mdlParams['extra_fac'] 216 | print("Current class weights with extra",class_weights) 217 | 218 | # Meta scaler 219 | if mdlParams.get('meta_features',None) is not None and mdlParams['scale_features']: 220 | mdlParams['feature_scaler_meta'] = sklearn.preprocessing.StandardScaler().fit(mdlParams['meta_array'][mdlParams['trainInd'],:]) 221 | print("scaler mean",mdlParams['feature_scaler_meta'].mean_,"var",mdlParams['feature_scaler_meta'].var_) 222 | 223 | # Set up dataloaders 224 | num_workers = psutil.cpu_count(logical=False) 225 | # For train 226 | dataset_train = utils.ISICDataset(mdlParams, 'trainInd') 227 | # For val 228 | dataset_val = utils.ISICDataset(mdlParams, 'valInd') 229 | if mdlParams['multiCropEval'] > 0: 230 | modelVars['dataloader_valInd'] = DataLoader(dataset_val, batch_size=mdlParams['multiCropEval'], shuffle=False, num_workers=num_workers, pin_memory=True) 231 | else: 232 | modelVars['dataloader_valInd'] = DataLoader(dataset_val, batch_size=mdlParams['batchSize'], shuffle=False, num_workers=num_workers, pin_memory=True) 233 | 234 | if mdlParams['balance_classes'] == 12 or mdlParams['balance_classes'] == 13: 235 | #print(np.argmax(mdlParams['labels_array'][mdlParams['trainInd'],:],1).size(0)) 236 | strat_sampler = utils.StratifiedSampler(mdlParams) 237 | modelVars['dataloader_trainInd'] = DataLoader(dataset_train, batch_size=mdlParams['batchSize'], sampler=strat_sampler, num_workers=num_workers, pin_memory=True) 238 | else: 239 | modelVars['dataloader_trainInd'] = DataLoader(dataset_train, batch_size=mdlParams['batchSize'], shuffle=True, num_workers=num_workers, pin_memory=True, drop_last=True) 240 | #print("Setdiff",np.setdiff1d(mdlParams['trainInd'],mdlParams['trainInd'])) 241 | # Define model 242 | modelVars['model'] = models.getModel(mdlParams)() 243 | # Load trained model 244 | if mdlParams.get('meta_features',None) is not None: 245 | # Find best checkpoint 246 | files = glob(mdlParams['model_load_path'] + '/CVSet' + str(cv) + '/*') 247 | global_steps = np.zeros([len(files)]) 248 | #print("files",files) 249 | for i in range(len(files)): 250 | # Use meta files to find the highest index 251 | if 'best' not in files[i]: 252 | continue 253 | if 'checkpoint' not in files[i]: 254 | continue 255 | # Extract global step 256 | nums = [int(s) for s in re.findall(r'\d+',files[i])] 257 | global_steps[i] = nums[-1] 258 | # Create path with maximum global step found 259 | chkPath = mdlParams['model_load_path'] + '/CVSet' + str(cv) + '/checkpoint_best-' + str(int(np.max(global_steps))) + '.pt' 260 | print("Restoring lesion-trained CNN for meta data training: ",chkPath) 261 | # Load 262 | state = torch.load(chkPath) 263 | # Initialize model 264 | curr_model_dict = modelVars['model'].state_dict() 265 | for name, param in state['state_dict'].items(): 266 | #print(name,param.shape) 267 | if isinstance(param, nn.Parameter): 268 | # backwards compatibility for serialized parameters 269 | param = param.data 270 | if curr_model_dict[name].shape == param.shape: 271 | curr_model_dict[name].copy_(param) 272 | else: 273 | print("not restored",name,param.shape) 274 | #modelVars['model'].load_state_dict(state['state_dict']) 275 | # Original input size 276 | #if 'Dense' not in mdlParams['model_type']: 277 | # print("Original input size",modelVars['model'].input_size) 278 | #print(modelVars['model']) 279 | if 'Dense' in mdlParams['model_type']: 280 | if mdlParams['input_size'][0] != 224: 281 | modelVars['model'] = utils.modify_densenet_avg_pool(modelVars['model']) 282 | #print(modelVars['model']) 283 | num_ftrs = modelVars['model'].classifier.in_features 284 | modelVars['model'].classifier = nn.Linear(num_ftrs, mdlParams['numClasses']) 285 | #print(modelVars['model']) 286 | elif 'dpn' in mdlParams['model_type']: 287 | num_ftrs = modelVars['model'].classifier.in_channels 288 | modelVars['model'].classifier = nn.Conv2d(num_ftrs,mdlParams['numClasses'],[1,1]) 289 | #modelVars['model'].add_module('real_classifier',nn.Linear(num_ftrs, mdlParams['numClasses'])) 290 | #print(modelVars['model']) 291 | elif 'efficient' in mdlParams['model_type']: 292 | # Do nothing, output is prepared 293 | num_ftrs = modelVars['model']._fc.in_features 294 | modelVars['model']._fc = nn.Linear(num_ftrs, mdlParams['numClasses']) 295 | elif 'wsl' in mdlParams['model_type']: 296 | num_ftrs = modelVars['model'].fc.in_features 297 | modelVars['model'].fc = nn.Linear(num_ftrs, mdlParams['numClasses']) 298 | else: 299 | num_ftrs = modelVars['model'].last_linear.in_features 300 | modelVars['model'].last_linear = nn.Linear(num_ftrs, mdlParams['numClasses']) 301 | # Take care of meta case 302 | if mdlParams.get('meta_features',None) is not None: 303 | # freeze cnn first 304 | if mdlParams['freeze_cnn']: 305 | # deactivate all 306 | for param in modelVars['model'].parameters(): 307 | param.requires_grad = False 308 | if 'efficient' in mdlParams['model_type']: 309 | # Activate fc 310 | for param in modelVars['model']._fc.parameters(): 311 | param.requires_grad = True 312 | elif 'wsl' in mdlParams['model_type']: 313 | # Activate fc 314 | for param in modelVars['model'].fc.parameters(): 315 | param.requires_grad = True 316 | else: 317 | # Activate fc 318 | for param in modelVars['model'].last_linear.parameters(): 319 | param.requires_grad = True 320 | else: 321 | # mark cnn parameters 322 | for param in modelVars['model'].parameters(): 323 | param.is_cnn_param = True 324 | # unmark fc 325 | for param in modelVars['model']._fc.parameters(): 326 | param.is_cnn_param = False 327 | # modify model 328 | modelVars['model'] = models.modify_meta(mdlParams,modelVars['model']) 329 | # Mark new parameters 330 | for param in modelVars['model'].parameters(): 331 | if not hasattr(param, 'is_cnn_param'): 332 | param.is_cnn_param = False 333 | # multi gpu support 334 | if len(mdlParams['numGPUs']) > 1: 335 | modelVars['model'] = nn.DataParallel(modelVars['model']) 336 | modelVars['model'] = modelVars['model'].cuda() 337 | #summary(modelVars['model'], modelVars['model'].input_size)# (mdlParams['input_size'][2], mdlParams['input_size'][0], mdlParams['input_size'][1])) 338 | # Loss, with class weighting 339 | if mdlParams.get('focal_loss',False): 340 | modelVars['criterion'] = utils.FocalLoss(alpha=class_weights.tolist()) 341 | elif mdlParams['balance_classes'] == 3 or mdlParams['balance_classes'] == 0 or mdlParams['balance_classes'] == 12: 342 | modelVars['criterion'] = nn.CrossEntropyLoss() 343 | elif mdlParams['balance_classes'] == 8: 344 | modelVars['criterion'] = nn.CrossEntropyLoss(reduce=False) 345 | elif mdlParams['balance_classes'] == 6 or mdlParams['balance_classes'] == 7: 346 | modelVars['criterion'] = nn.CrossEntropyLoss(weight=torch.cuda.FloatTensor(class_weights.astype(np.float32)),reduce=False) 347 | elif mdlParams['balance_classes'] == 10: 348 | modelVars['criterion'] = utils.FocalLoss(mdlParams['numClasses']) 349 | elif mdlParams['balance_classes'] == 11: 350 | modelVars['criterion'] = utils.FocalLoss(mdlParams['numClasses'],alpha=torch.cuda.FloatTensor(class_weights.astype(np.float32))) 351 | else: 352 | modelVars['criterion'] = nn.CrossEntropyLoss(weight=torch.cuda.FloatTensor(class_weights.astype(np.float32))) 353 | 354 | if mdlParams.get('meta_features',None) is not None: 355 | if mdlParams['freeze_cnn']: 356 | modelVars['optimizer'] = optim.Adam(filter(lambda p: p.requires_grad, modelVars['model'].parameters()), lr=mdlParams['learning_rate_meta']) 357 | # sanity check 358 | for param in filter(lambda p: p.requires_grad, modelVars['model'].parameters()): 359 | print(param.name,param.shape) 360 | else: 361 | modelVars['optimizer'] = optim.Adam([ 362 | {'params': filter(lambda p: not p.is_cnn_param, modelVars['model'].parameters()), 'lr': mdlParams['learning_rate_meta']}, 363 | {'params': filter(lambda p: p.is_cnn_param, modelVars['model'].parameters()), 'lr': mdlParams['learning_rate']} 364 | ], lr=mdlParams['learning_rate']) 365 | else: 366 | modelVars['optimizer'] = optim.Adam(modelVars['model'].parameters(), lr=mdlParams['learning_rate']) 367 | 368 | # Decay LR by a factor of 0.1 every 7 epochs 369 | modelVars['scheduler'] = lr_scheduler.StepLR(modelVars['optimizer'], step_size=mdlParams['lowerLRAfter'], gamma=1/np.float32(mdlParams['LRstep'])) 370 | 371 | # Define softmax 372 | modelVars['softmax'] = nn.Softmax(dim=1) 373 | 374 | # Set up training 375 | # loading from checkpoint 376 | if load_old: 377 | # Find last, not last best checkpoint 378 | files = glob(mdlParams['saveDir']+'/*') 379 | global_steps = np.zeros([len(files)]) 380 | for i in range(len(files)): 381 | # Use meta files to find the highest index 382 | if 'best' in files[i]: 383 | continue 384 | if 'checkpoint-' not in files[i]: 385 | continue 386 | # Extract global step 387 | nums = [int(s) for s in re.findall(r'\d+',files[i])] 388 | global_steps[i] = nums[-1] 389 | # Create path with maximum global step found 390 | chkPath = mdlParams['saveDir'] + '/checkpoint-' + str(int(np.max(global_steps))) + '.pt' 391 | print("Restoring: ",chkPath) 392 | # Load 393 | state = torch.load(chkPath) 394 | # Initialize model and optimizer 395 | modelVars['model'].load_state_dict(state['state_dict']) 396 | modelVars['optimizer'].load_state_dict(state['optimizer']) 397 | start_epoch = state['epoch']+1 398 | mdlParams['valBest'] = state.get('valBest',1000) 399 | mdlParams['lastBestInd'] = state.get('lastBestInd',int(np.max(global_steps))) 400 | else: 401 | start_epoch = 1 402 | mdlParams['lastBestInd'] = -1 403 | # Track metrics for saving best model 404 | mdlParams['valBest'] = 1000 405 | 406 | # Num batches 407 | numBatchesTrain = int(math.floor(len(mdlParams['trainInd'])/mdlParams['batchSize'])) 408 | print("Train batches",numBatchesTrain) 409 | 410 | # Run training 411 | start_time = time.time() 412 | print("Start training...") 413 | for step in range(start_epoch, mdlParams['training_steps']+1): 414 | # One Epoch of training 415 | if step >= mdlParams['lowerLRat']-mdlParams['lowerLRAfter']: 416 | modelVars['scheduler'].step() 417 | modelVars['model'].train() 418 | for j, (inputs, labels, indices) in enumerate(modelVars['dataloader_trainInd']): 419 | #print(indices) 420 | #t_load = time.time() 421 | # Run optimization 422 | if mdlParams.get('meta_features',None) is not None: 423 | inputs[0] = inputs[0].cuda() 424 | inputs[1] = inputs[1].cuda() 425 | else: 426 | inputs = inputs.cuda() 427 | #print(inputs.shape) 428 | labels = labels.cuda() 429 | # zero the parameter gradients 430 | modelVars['optimizer'].zero_grad() 431 | # forward 432 | # track history if only in train 433 | with torch.set_grad_enabled(True): 434 | if mdlParams.get('aux_classifier',False): 435 | outputs, outputs_aux = modelVars['model'](inputs) 436 | loss1 = modelVars['criterion'](outputs, labels) 437 | labels_aux = labels.repeat(mdlParams['multiCropTrain']) 438 | loss2 = modelVars['criterion'](outputs_aux, labels_aux) 439 | loss = loss1 + mdlParams['aux_classifier_loss_fac']*loss2 440 | else: 441 | #print("load",time.time()-t_load) 442 | #t_fwd = time.time() 443 | outputs = modelVars['model'](inputs) 444 | #print("forward",time.time()-t_fwd) 445 | #t_bwd = time.time() 446 | loss = modelVars['criterion'](outputs, labels) 447 | # Perhaps adjust weighting of the loss by the specific index 448 | if mdlParams['balance_classes'] == 6 or mdlParams['balance_classes'] == 7 or mdlParams['balance_classes'] == 8: 449 | #loss = loss.cpu() 450 | indices = indices.numpy() 451 | loss = loss*torch.cuda.FloatTensor(mdlParams['loss_fac_per_example'][indices].astype(np.float32)) 452 | loss = torch.mean(loss) 453 | #loss = loss.cuda() 454 | # backward + optimize only if in training phase 455 | loss.backward() 456 | modelVars['optimizer'].step() 457 | #print("backward",time.time()-t_bwd) 458 | if step % mdlParams['display_step'] == 0 or step == 1: 459 | # Calculate evaluation metrics 460 | if mdlParams['classification']: 461 | # Adjust model state 462 | modelVars['model'].eval() 463 | # Get metrics 464 | loss, accuracy, sensitivity, specificity, conf_matrix, f1, auc, waccuracy, predictions, targets, _ = utils.getErrClassification_mgpu(mdlParams, eval_set, modelVars) 465 | # Save in mat 466 | save_dict['loss'].append(loss) 467 | save_dict['acc'].append(accuracy) 468 | save_dict['wacc'].append(waccuracy) 469 | save_dict['auc'].append(auc) 470 | save_dict['sens'].append(sensitivity) 471 | save_dict['spec'].append(specificity) 472 | save_dict['f1'].append(f1) 473 | save_dict['step_num'].append(step) 474 | if os.path.isfile(mdlParams['saveDir'] + '/progression_'+eval_set+'.mat'): 475 | os.remove(mdlParams['saveDir'] + '/progression_'+eval_set+'.mat') 476 | io.savemat(mdlParams['saveDir'] + '/progression_'+eval_set+'.mat',save_dict) 477 | eval_metric = -np.mean(waccuracy) 478 | # Check if we have a new best value 479 | if eval_metric < mdlParams['valBest']: 480 | mdlParams['valBest'] = eval_metric 481 | if mdlParams['classification']: 482 | allData['f1Best'][cv] = f1 483 | allData['sensBest'][cv] = sensitivity 484 | allData['specBest'][cv] = specificity 485 | allData['accBest'][cv] = accuracy 486 | allData['waccBest'][cv] = waccuracy 487 | allData['aucBest'][cv] = auc 488 | oldBestInd = mdlParams['lastBestInd'] 489 | mdlParams['lastBestInd'] = step 490 | allData['convergeTime'][cv] = step 491 | # Save best predictions 492 | allData['bestPred'][cv] = predictions 493 | allData['targets'][cv] = targets 494 | # Write to File 495 | with open(mdlParams['saveDirBase'] + '/CV.pkl', 'wb') as f: 496 | pickle.dump(allData, f, pickle.HIGHEST_PROTOCOL) 497 | # Delte previously best model 498 | if os.path.isfile(mdlParams['saveDir'] + '/checkpoint_best-' + str(oldBestInd) + '.pt'): 499 | os.remove(mdlParams['saveDir'] + '/checkpoint_best-' + str(oldBestInd) + '.pt') 500 | # Save currently best model 501 | state = {'epoch': step, 'valBest': mdlParams['valBest'], 'lastBestInd': mdlParams['lastBestInd'], 'state_dict': modelVars['model'].state_dict(),'optimizer': modelVars['optimizer'].state_dict()} 502 | torch.save(state, mdlParams['saveDir'] + '/checkpoint_best-' + str(step) + '.pt') 503 | 504 | # If its not better, just save it delete the last checkpoint if it is not current best one 505 | # Save current model 506 | state = {'epoch': step, 'valBest': mdlParams['valBest'], 'lastBestInd': mdlParams['lastBestInd'], 'state_dict': modelVars['model'].state_dict(),'optimizer': modelVars['optimizer'].state_dict()} 507 | torch.save(state, mdlParams['saveDir'] + '/checkpoint-' + str(step) + '.pt') 508 | # Delete last one 509 | if step == mdlParams['display_step']: 510 | lastInd = 1 511 | else: 512 | lastInd = step-mdlParams['display_step'] 513 | if os.path.isfile(mdlParams['saveDir'] + '/checkpoint-' + str(lastInd) + '.pt'): 514 | os.remove(mdlParams['saveDir'] + '/checkpoint-' + str(lastInd) + '.pt') 515 | # Duration so far 516 | duration = time.time() - start_time 517 | # Print 518 | if mdlParams['classification']: 519 | print("\n") 520 | print("Config:",sys.argv[2]) 521 | print('Fold: %d Epoch: %d/%d (%d h %d m %d s)' % (cv,step,mdlParams['training_steps'], int(duration/3600), int(np.mod(duration,3600)/60), int(np.mod(np.mod(duration,3600),60))) + time.strftime("%d.%m.-%H:%M:%S", time.localtime())) 522 | print("Loss on ",eval_set,"set: ",loss," Accuracy: ",accuracy," F1: ",f1," (best WACC: ",-mdlParams['valBest']," at Epoch ",mdlParams['lastBestInd'],")") 523 | print("Auc",auc,"Mean AUC",np.mean(auc)) 524 | print("Per Class Acc",waccuracy,"Weighted Accuracy",np.mean(waccuracy)) 525 | print("Sensitivity: ",sensitivity,"Specificity",specificity) 526 | print("Confusion Matrix") 527 | print(conf_matrix) 528 | # Potentially peek at test error 529 | if mdlParams['peak_at_testerr']: 530 | loss, accuracy, sensitivity, specificity, _, f1, _, _, _, _, _ = utils.getErrClassification_mgpu(mdlParams, 'testInd', modelVars) 531 | print("Test loss: ",loss," Accuracy: ",accuracy," F1: ",f1) 532 | print("Sensitivity: ",sensitivity,"Specificity",specificity) 533 | # Potentially print train err 534 | if mdlParams['print_trainerr'] and 'train' not in eval_set: 535 | loss, accuracy, sensitivity, specificity, conf_matrix, f1, auc, waccuracy, predictions, targets, _ = utils.getErrClassification_mgpu(mdlParams, 'trainInd', modelVars) 536 | # Save in mat 537 | save_dict_train['loss'].append(loss) 538 | save_dict_train['acc'].append(accuracy) 539 | save_dict_train['wacc'].append(waccuracy) 540 | save_dict_train['auc'].append(auc) 541 | save_dict_train['sens'].append(sensitivity) 542 | save_dict_train['spec'].append(specificity) 543 | save_dict_train['f1'].append(f1) 544 | save_dict_train['step_num'].append(step) 545 | if os.path.isfile(mdlParams['saveDir'] + '/progression_trainInd.mat'): 546 | os.remove(mdlParams['saveDir'] + '/progression_trainInd.mat') 547 | scipy.io.savemat(mdlParams['saveDir'] + '/progression_trainInd.mat',save_dict_train) 548 | print("Train loss: ",loss," Accuracy: ",accuracy," F1: ",f1) 549 | print("Sensitivity: ",sensitivity,"Specificity",specificity) 550 | # Free everything in modelvars 551 | modelVars.clear() 552 | # After CV Training: print CV results and save them 553 | print("Best F1:",allData['f1Best'][cv]) 554 | print("Best Sens:",allData['sensBest'][cv]) 555 | print("Best Spec:",allData['specBest'][cv]) 556 | print("Best Acc:",allData['accBest'][cv]) 557 | print("Best Per Class Accuracy:",allData['waccBest'][cv]) 558 | print("Best Weighted Acc:",np.mean(allData['waccBest'][cv])) 559 | print("Best AUC:",allData['aucBest'][cv]) 560 | print("Best Mean AUC:",np.mean(allData['aucBest'][cv])) 561 | print("Convergence Steps:",allData['convergeTime'][cv]) 562 | 563 | 564 | -------------------------------------------------------------------------------- /eval.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.optim as optim 4 | from torch.optim import lr_scheduler 5 | import torchvision 6 | from torchvision import datasets, models as tv_models 7 | from torch.utils.data import DataLoader 8 | from torchsummary import summary 9 | import numpy as np 10 | import models 11 | import threading 12 | import pickle 13 | from pathlib import Path 14 | import math 15 | import os 16 | import sys 17 | from glob import glob 18 | import re 19 | import gc 20 | import importlib 21 | import time 22 | import csv 23 | import sklearn.preprocessing 24 | import utils 25 | from sklearn.utils import class_weight 26 | import imagesize 27 | 28 | # add configuration file 29 | # Dictionary for model configuration 30 | mdlParams = {} 31 | 32 | # Import machine config 33 | pc_cfg = importlib.import_module('pc_cfgs.'+sys.argv[1]) 34 | mdlParams.update(pc_cfg.mdlParams) 35 | 36 | 37 | # If there is another argument, its which checkpoint should be used 38 | if len(sys.argv) > 6: 39 | if 'last' in sys.argv[6]: 40 | mdlParams['ckpt_name'] = 'checkpoint-' 41 | else: 42 | mdlParams['ckpt_name'] = 'checkpoint_best-' 43 | if 'first' in sys.argv[6]: 44 | mdlParams['use_first'] = True 45 | else: 46 | mdlParams['ckpt_name'] = 'checkpoint-' 47 | 48 | # Set visible devices 49 | mdlParams['numGPUs']= [[int(s) for s in re.findall(r'\d+',sys.argv[6])][-1]] 50 | cuda_str = "" 51 | for i in range(len(mdlParams['numGPUs'])): 52 | cuda_str = cuda_str + str(mdlParams['numGPUs'][i]) 53 | if i is not len(mdlParams['numGPUs'])-1: 54 | cuda_str = cuda_str + "," 55 | print("Devices to use:",cuda_str) 56 | os.environ["CUDA_VISIBLE_DEVICES"] = cuda_str 57 | 58 | # If there is another argument, also use a meta learner 59 | if len(sys.argv) > 7: 60 | if 'HAMONLY' in sys.argv[7]: 61 | mdlParams['eval_on_ham_only'] = True 62 | 63 | # Import model config 64 | model_cfg = importlib.import_module('cfgs.'+sys.argv[2]) 65 | mdlParams_model = model_cfg.init(mdlParams) 66 | mdlParams.update(mdlParams_model) 67 | 68 | 69 | # Path name where model is saved is the fourth argument 70 | if 'NONE' in sys.argv[5]: 71 | mdlParams['saveDirBase'] = mdlParams['saveDir'] + sys.argv[2] 72 | else: 73 | mdlParams['saveDirBase'] = sys.argv[5] 74 | 75 | # Third is multi crop yes no 76 | if 'multi' in sys.argv[3]: 77 | if 'rand' in sys.argv[3]: 78 | mdlParams['numRandValSeq'] = [int(s) for s in re.findall(r'\d+',sys.argv[3])][0] 79 | print("Random sequence number",mdlParams['numRandValSeq']) 80 | else: 81 | mdlParams['numRandValSeq'] = 0 82 | mdlParams['multiCropEval'] = [int(s) for s in re.findall(r'\d+',sys.argv[3])][-1] 83 | mdlParams['voting_scheme'] = sys.argv[4] 84 | if 'scale' in sys.argv[3]: 85 | print("Multi Crop and Scale Eval with crop number:",mdlParams['multiCropEval']," Voting scheme: ",mdlParams['voting_scheme']) 86 | mdlParams['orderedCrop'] = False 87 | mdlParams['scale_min'] = [int(s) for s in re.findall(r'\d+',sys.argv[3])][-2]/100.0 88 | elif 'determ' in sys.argv[3]: 89 | # Example application: multideterm5sc3f2 90 | mdlParams['deterministic_eval'] = True 91 | mdlParams['numCropPositions'] = [int(s) for s in re.findall(r'\d+',sys.argv[3])][-3] 92 | num_scales = [int(s) for s in re.findall(r'\d+',sys.argv[3])][-2] 93 | all_scales = [1.0,0.5,0.75,0.25,0.9,0.6,0.4] 94 | mdlParams['cropScales'] = all_scales[:num_scales] 95 | mdlParams['cropFlipping'] = [int(s) for s in re.findall(r'\d+',sys.argv[3])][-1] 96 | print("deterministic eval with crops number",mdlParams['numCropPositions'],"scales",mdlParams['cropScales'],"flipping",mdlParams['cropFlipping']) 97 | mdlParams['multiCropEval'] = mdlParams['numCropPositions']*len(mdlParams['cropScales'])*mdlParams['cropFlipping'] 98 | mdlParams['offset_crop'] = 0.2 99 | elif 'order' in sys.argv[3]: 100 | mdlParams['orderedCrop'] = True 101 | if mdlParams.get('var_im_size',False): 102 | # Crop positions, always choose multiCropEval to be 4, 9, 16, 25, etc. 103 | mdlParams['cropPositions'] = np.zeros([len(mdlParams['im_paths']),mdlParams['multiCropEval'],2],dtype=np.int64) 104 | #mdlParams['imSizes'] = np.zeros([len(mdlParams['im_paths']),mdlParams['multiCropEval'],2],dtype=np.int64) 105 | for u in range(len(mdlParams['im_paths'])): 106 | height, width = imagesize.get(mdlParams['im_paths'][u]) 107 | if width < mdlParams['input_size'][0]: 108 | height = int(mdlParams['input_size'][0]/float(width))*height 109 | width = mdlParams['input_size'][0] 110 | if height < mdlParams['input_size'][0]: 111 | width = int(mdlParams['input_size'][0]/float(height))*width 112 | height = mdlParams['input_size'][0] 113 | if mdlParams.get('resize_large_ones') is not None: 114 | if width == mdlParams['large_size'] and height == mdlParams['large_size']: 115 | width, height = (mdlParams['resize_large_ones'],mdlParams['resize_large_ones']) 116 | ind = 0 117 | for i in range(np.int32(np.sqrt(mdlParams['multiCropEval']))): 118 | for j in range(np.int32(np.sqrt(mdlParams['multiCropEval']))): 119 | mdlParams['cropPositions'][u,ind,0] = mdlParams['input_size'][0]/2+i*((width-mdlParams['input_size'][1])/(np.sqrt(mdlParams['multiCropEval'])-1)) 120 | mdlParams['cropPositions'][u,ind,1] = mdlParams['input_size'][1]/2+j*((height-mdlParams['input_size'][0])/(np.sqrt(mdlParams['multiCropEval'])-1)) 121 | #mdlParams['imSizes'][u,ind,0] = curr_im_size[0] 122 | 123 | ind += 1 124 | # Sanity checks 125 | #print("Positions",mdlParams['cropPositions']) 126 | # Test image sizes 127 | height = mdlParams['input_size'][0] 128 | width = mdlParams['input_size'][1] 129 | for u in range(len(mdlParams['im_paths'])): 130 | height_test, width_test = imagesize.get(mdlParams['im_paths'][u]) 131 | if width_test < mdlParams['input_size'][0]: 132 | height_test = int(mdlParams['input_size'][0]/float(width_test))*height_test 133 | width_test = mdlParams['input_size'][0] 134 | if height_test < mdlParams['input_size'][0]: 135 | width_test = int(mdlParams['input_size'][0]/float(height_test))*width_test 136 | height_test = mdlParams['input_size'][0] 137 | if mdlParams.get('resize_large_ones') is not None: 138 | if width_test == mdlParams['large_size'] and height_test == mdlParams['large_size']: 139 | width_test, height_test = (mdlParams['resize_large_ones'],mdlParams['resize_large_ones']) 140 | test_im = np.zeros([width_test,height_test]) 141 | for i in range(mdlParams['multiCropEval']): 142 | im_crop = test_im[np.int32(mdlParams['cropPositions'][u,i,0]-height/2):np.int32(mdlParams['cropPositions'][u,i,0]-height/2)+height,np.int32(mdlParams['cropPositions'][u,i,1]-width/2):np.int32(mdlParams['cropPositions'][u,i,1]-width/2)+width] 143 | if im_crop.shape[0] != mdlParams['input_size'][0]: 144 | print("Wrong shape",im_crop.shape[0],mdlParams['im_paths'][u]) 145 | if im_crop.shape[1] != mdlParams['input_size'][1]: 146 | print("Wrong shape",im_crop.shape[1],mdlParams['im_paths'][u]) 147 | else: 148 | # Crop positions, always choose multiCropEval to be 4, 9, 16, 25, etc. 149 | mdlParams['cropPositions'] = np.zeros([mdlParams['multiCropEval'],2],dtype=np.int64) 150 | if mdlParams['multiCropEval'] == 5: 151 | numCrops = 4 152 | elif mdlParams['multiCropEval'] == 7: 153 | numCrops = 9 154 | mdlParams['cropPositions'] = np.zeros([9,2],dtype=np.int64) 155 | else: 156 | numCrops = mdlParams['multiCropEval'] 157 | ind = 0 158 | for i in range(np.int32(np.sqrt(numCrops))): 159 | for j in range(np.int32(np.sqrt(numCrops))): 160 | mdlParams['cropPositions'][ind,0] = mdlParams['input_size'][0]/2+i*((mdlParams['input_size_load'][0]-mdlParams['input_size'][0])/(np.sqrt(numCrops)-1)) 161 | mdlParams['cropPositions'][ind,1] = mdlParams['input_size'][1]/2+j*((mdlParams['input_size_load'][1]-mdlParams['input_size'][1])/(np.sqrt(numCrops)-1)) 162 | ind += 1 163 | # Add center crop 164 | if mdlParams['multiCropEval'] == 5: 165 | mdlParams['cropPositions'][4,0] = mdlParams['input_size_load'][0]/2 166 | mdlParams['cropPositions'][4,1] = mdlParams['input_size_load'][1]/2 167 | if mdlParams['multiCropEval'] == 7: 168 | mdlParams['cropPositions'] = np.delete(mdlParams['cropPositions'],[3,7],0) 169 | # Sanity checks 170 | print("Positions val",mdlParams['cropPositions']) 171 | # Test image sizes 172 | test_im = np.zeros(mdlParams['input_size_load']) 173 | height = mdlParams['input_size'][0] 174 | width = mdlParams['input_size'][1] 175 | for i in range(mdlParams['multiCropEval']): 176 | im_crop = test_im[np.int32(mdlParams['cropPositions'][i,0]-height/2):np.int32(mdlParams['cropPositions'][i,0]-height/2)+height,np.int32(mdlParams['cropPositions'][i,1]-width/2):np.int32(mdlParams['cropPositions'][i,1]-width/2)+width,:] 177 | print("Shape",i+1,im_crop.shape) 178 | print("Multi Crop with order with crop number:",mdlParams['multiCropEval']," Voting scheme: ",mdlParams['voting_scheme']) 179 | if 'flip' in sys.argv[3]: 180 | # additional flipping, example: flip2multiorder16 181 | mdlParams['eval_flipping'] = [int(s) for s in re.findall(r'\d+',sys.argv[3])][-2] 182 | print("Additional flipping",mdlParams['eval_flipping']) 183 | else: 184 | print("Multi Crop Eval with crop number:",mdlParams['multiCropEval']," Voting scheme: ",mdlParams['voting_scheme']) 185 | mdlParams['orderedCrop'] = False 186 | else: 187 | mdlParams['multiCropEval'] = 0 188 | mdlParams['orderedCrop'] = False 189 | 190 | # Set training set to eval mode 191 | mdlParams['trainSetState'] = 'eval' 192 | 193 | if mdlParams['numClasses'] == 9 and mdlParams.get('no_c9_eval',False): 194 | num_classes = mdlParams['numClasses']-1 195 | else: 196 | num_classes = mdlParams['numClasses'] 197 | # Save results in here 198 | allData = {} 199 | allData['f1Best'] = np.zeros([mdlParams['numCV']]) 200 | allData['sensBest'] = np.zeros([mdlParams['numCV'],num_classes]) 201 | allData['specBest'] = np.zeros([mdlParams['numCV'],num_classes]) 202 | allData['accBest'] = np.zeros([mdlParams['numCV']]) 203 | allData['waccBest'] = np.zeros([mdlParams['numCV'],num_classes]) 204 | allData['aucBest'] = np.zeros([mdlParams['numCV'],num_classes]) 205 | allData['convergeTime'] = {} 206 | allData['bestPred'] = {} 207 | allData['bestPredMC'] = {} 208 | allData['targets'] = {} 209 | allData['extPred'] = {} 210 | allData['f1Best_meta'] = np.zeros([mdlParams['numCV']]) 211 | allData['sensBest_meta'] = np.zeros([mdlParams['numCV'],num_classes]) 212 | allData['specBest_meta'] = np.zeros([mdlParams['numCV'],num_classes]) 213 | allData['accBest_meta'] = np.zeros([mdlParams['numCV']]) 214 | allData['waccBest_meta'] = np.zeros([mdlParams['numCV'],num_classes]) 215 | allData['aucBest_meta'] = np.zeros([mdlParams['numCV'],num_classes]) 216 | #allData['convergeTime'] = {} 217 | allData['bestPred_meta'] = {} 218 | allData['targets_meta'] = {} 219 | 220 | if not (len(sys.argv) > 8): 221 | for cv in range(mdlParams['numCV']): 222 | # Reset model graph 223 | importlib.reload(models) 224 | #importlib.reload(torchvision) 225 | # Collect model variables 226 | modelVars = {} 227 | modelVars['device'] = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 228 | print(modelVars['device']) 229 | # Def current CV set 230 | mdlParams['trainInd'] = mdlParams['trainIndCV'][cv] 231 | if 'valIndCV' in mdlParams: 232 | mdlParams['valInd'] = mdlParams['valIndCV'][cv] 233 | # Def current path for saving stuff 234 | if 'valIndCV' in mdlParams: 235 | mdlParams['saveDir'] = mdlParams['saveDirBase'] + '/CVSet' + str(cv) 236 | else: 237 | mdlParams['saveDir'] = mdlParams['saveDirBase'] 238 | 239 | # Potentially calculate setMean to subtract 240 | if mdlParams['subtract_set_mean'] == 1: 241 | mdlParams['setMean'] = np.mean(mdlParams['images_means'][mdlParams['trainInd'],:],(0)) 242 | print("Set Mean",mdlParams['setMean']) 243 | 244 | # Potentially only HAM eval 245 | if mdlParams.get('eval_on_ham_only',False): 246 | print("Old val inds",len(mdlParams['valInd'])) 247 | mdlParams['valInd'] = np.intersect1d(mdlParams['valInd'],mdlParams['HAM10000_inds']) 248 | print("New val inds, HAM only",len(mdlParams['valInd'])) 249 | 250 | # balance classes 251 | if mdlParams['balance_classes'] < 3 or mdlParams['balance_classes'] == 7 or mdlParams['balance_classes'] == 11: 252 | class_weights = class_weight.compute_class_weight('balanced',np.unique(np.argmax(mdlParams['labels_array'][mdlParams['trainInd'],:],1)),np.argmax(mdlParams['labels_array'][mdlParams['trainInd'],:],1)) 253 | print("Current class weights",class_weights) 254 | class_weights = class_weights*mdlParams['extra_fac'] 255 | print("Current class weights with extra",class_weights) 256 | elif mdlParams['balance_classes'] == 3 or mdlParams['balance_classes'] == 4: 257 | # Split training set by classes 258 | not_one_hot = np.argmax(mdlParams['labels_array'],1) 259 | mdlParams['class_indices'] = [] 260 | for i in range(mdlParams['numClasses']): 261 | mdlParams['class_indices'].append(np.where(not_one_hot==i)[0]) 262 | # Kick out non-trainind indices 263 | mdlParams['class_indices'][i] = np.setdiff1d(mdlParams['class_indices'][i],mdlParams['valInd']) 264 | #print("Class",i,mdlParams['class_indices'][i].shape,np.min(mdlParams['class_indices'][i]),np.max(mdlParams['class_indices'][i]),np.sum(mdlParams['labels_array'][np.int64(mdlParams['class_indices'][i]),:],0)) 265 | elif mdlParams['balance_classes'] == 5 or mdlParams['balance_classes'] == 6 or mdlParams['balance_classes'] == 13: 266 | # Other class balancing loss 267 | class_weights = 1.0/np.mean(mdlParams['labels_array'][mdlParams['trainInd'],:],axis=0) 268 | print("Current class weights",class_weights) 269 | class_weights = class_weights*mdlParams['extra_fac'] 270 | print("Current class weights with extra",class_weights) 271 | elif mdlParams['balance_classes'] == 9: 272 | # Only use HAM indicies for calculation 273 | print("Balance 9") 274 | indices_ham = mdlParams['trainInd'][mdlParams['trainInd'] < 25331] 275 | if mdlParams['numClasses'] == 9: 276 | class_weights_ = 1.0/np.mean(mdlParams['labels_array'][indices_ham,:8],axis=0) 277 | #print("class before",class_weights_) 278 | class_weights = np.zeros([mdlParams['numClasses']]) 279 | class_weights[:8] = class_weights_ 280 | class_weights[-1] = np.max(class_weights_) 281 | else: 282 | class_weights = 1.0/np.mean(mdlParams['labels_array'][indices_ham,:],axis=0) 283 | print("Current class weights",class_weights) 284 | if isinstance(mdlParams['extra_fac'], float): 285 | class_weights = np.power(class_weights,mdlParams['extra_fac']) 286 | else: 287 | class_weights = class_weights*mdlParams['extra_fac'] 288 | print("Current class weights with extra",class_weights) 289 | 290 | 291 | # Set up dataloaders 292 | # Meta scaler 293 | if mdlParams.get('meta_features',None) is not None and mdlParams['scale_features']: 294 | mdlParams['feature_scaler_meta'] = sklearn.preprocessing.StandardScaler().fit(mdlParams['meta_array'][mdlParams['trainInd'],:]) 295 | #print("scaler mean",mdlParams['feature_scaler_meta'].mean_,"var",mdlParams['feature_scaler_meta'].var_) 296 | # For train 297 | dataset_train = utils.ISICDataset(mdlParams, 'trainInd') 298 | # For val 299 | dataset_val = utils.ISICDataset(mdlParams, 'valInd') 300 | if mdlParams['multiCropEval'] > 0: 301 | modelVars['dataloader_valInd'] = DataLoader(dataset_val, batch_size=mdlParams['multiCropEval'], shuffle=False, num_workers=8, pin_memory=True) 302 | else: 303 | modelVars['dataloader_valInd'] = DataLoader(dataset_val, batch_size=mdlParams['batchSize'], shuffle=False, num_workers=8, pin_memory=True) 304 | 305 | modelVars['dataloader_trainInd'] = DataLoader(dataset_train, batch_size=mdlParams['batchSize'], shuffle=True, num_workers=8, pin_memory=True) 306 | 307 | # For test 308 | if 'testInd' in mdlParams: 309 | dataset_test = utils.ISICDataset(mdlParams, 'testInd') 310 | if mdlParams['multiCropEval'] > 0: 311 | modelVars['dataloader_testInd'] = DataLoader(dataset_test, batch_size=mdlParams['multiCropEval'], shuffle=False, num_workers=8, pin_memory=True) 312 | else: 313 | modelVars['dataloader_testInd'] = DataLoader(dataset_test, batch_size=mdlParams['batchSize'], shuffle=False, num_workers=8, pin_memory=True) 314 | 315 | 316 | modelVars['model'] = models.getModel(mdlParams)() 317 | # Original input size 318 | #if 'Dense' not in mdlParams['model_type']: 319 | # print("Original input size",modelVars['model'].input_size) 320 | #print(modelVars['model']) 321 | if 'Dense' in mdlParams['model_type']: 322 | if mdlParams['input_size'][0] != 224: 323 | modelVars['model'] = utils.modify_densenet_avg_pool(modelVars['model']) 324 | #print(modelVars['model']) 325 | num_ftrs = modelVars['model'].classifier.in_features 326 | modelVars['model'].classifier = nn.Linear(num_ftrs, mdlParams['numClasses']) 327 | #print(modelVars['model']) 328 | elif 'dpn' in mdlParams['model_type']: 329 | num_ftrs = modelVars['model'].classifier.in_channels 330 | modelVars['model'].classifier = nn.Conv2d(num_ftrs,mdlParams['numClasses'],[1,1]) 331 | #modelVars['model'].add_module('real_classifier',nn.Linear(num_ftrs, mdlParams['numClasses'])) 332 | #print(modelVars['model']) 333 | elif 'efficient' in mdlParams['model_type']: 334 | # Do nothing, output is prepared 335 | num_ftrs = modelVars['model']._fc.in_features 336 | modelVars['model']._fc = nn.Linear(num_ftrs, mdlParams['numClasses']) 337 | elif 'wsl' in mdlParams['model_type']: 338 | num_ftrs = modelVars['model'].fc.in_features 339 | modelVars['model'].fc = nn.Linear(num_ftrs, mdlParams['numClasses']) 340 | else: 341 | num_ftrs = modelVars['model'].last_linear.in_features 342 | modelVars['model'].last_linear = nn.Linear(num_ftrs, mdlParams['numClasses']) 343 | # modify model 344 | if mdlParams.get('meta_features',None) is not None: 345 | modelVars['model'] = models.modify_meta(mdlParams,modelVars['model']) 346 | modelVars['model'] = modelVars['model'].to(modelVars['device']) 347 | #summary(modelVars['model'], (mdlParams['input_size'][2], mdlParams['input_size'][0], mdlParams['input_size'][1])) 348 | # Loss, with class weighting 349 | # Loss, with class weighting 350 | if mdlParams['balance_classes'] == 3 or mdlParams['balance_classes'] == 0 or mdlParams['balance_classes'] == 12: 351 | modelVars['criterion'] = nn.CrossEntropyLoss() 352 | elif mdlParams['balance_classes'] == 8: 353 | modelVars['criterion'] = nn.CrossEntropyLoss(reduce=False) 354 | elif mdlParams['balance_classes'] == 6 or mdlParams['balance_classes'] == 7: 355 | modelVars['criterion'] = nn.CrossEntropyLoss(weight=torch.cuda.FloatTensor(class_weights.astype(np.float32)),reduce=False) 356 | elif mdlParams['balance_classes'] == 10: 357 | modelVars['criterion'] = utils.FocalLoss(mdlParams['numClasses']) 358 | elif mdlParams['balance_classes'] == 11: 359 | modelVars['criterion'] = utils.FocalLoss(mdlParams['numClasses'],alpha=torch.cuda.FloatTensor(class_weights.astype(np.float32))) 360 | else: 361 | modelVars['criterion'] = nn.CrossEntropyLoss(weight=torch.cuda.FloatTensor(class_weights.astype(np.float32))) 362 | 363 | # Observe that all parameters are being optimized 364 | modelVars['optimizer'] = optim.Adam(modelVars['model'].parameters(), lr=mdlParams['learning_rate']) 365 | 366 | # Decay LR by a factor of 0.1 every 7 epochs 367 | modelVars['scheduler'] = lr_scheduler.StepLR(modelVars['optimizer'], step_size=mdlParams['lowerLRAfter'], gamma=1/np.float32(mdlParams['LRstep'])) 368 | 369 | # Define softmax 370 | modelVars['softmax'] = nn.Softmax(dim=1) 371 | 372 | # Manually find latest chekcpoint, tf.train.latest_checkpoint is doing weird shit 373 | files = glob(mdlParams['saveDir']+'/*') 374 | #print(mdlParams['saveDir']) 375 | #print("Files",files) 376 | global_steps = np.zeros([len(files)]) 377 | for i in range(len(files)): 378 | # Use meta files to find the highest index 379 | if 'checkpoint' not in files[i]: 380 | continue 381 | if mdlParams['ckpt_name'] not in files[i]: 382 | continue 383 | # Extract global step 384 | nums = [int(s) for s in re.findall(r'\d+',files[i])] 385 | global_steps[i] = nums[-1] 386 | # Create path with maximum global step found, if first is not wanted 387 | global_steps = np.sort(global_steps) 388 | if mdlParams.get('use_first') is not None: 389 | chkPath = mdlParams['saveDir'] + '/' + mdlParams['ckpt_name'] + str(int(global_steps[-2])) + '.pt' 390 | else: 391 | chkPath = mdlParams['saveDir'] + '/' + mdlParams['ckpt_name'] + str(int(np.max(global_steps))) + '.pt' 392 | print("Restoring: ",chkPath) 393 | # Load 394 | state = torch.load(chkPath) 395 | # Initialize model and optimizer 396 | modelVars['model'].load_state_dict(state['state_dict']) 397 | #modelVars['optimizer'].load_state_dict(state['optimizer']) 398 | # Construct pkl filename: config name, last/best, saved epoch number 399 | pklFileName = sys.argv[2] + "_" + sys.argv[6] + "_" + str(int(np.max(global_steps))) + ".pkl" 400 | modelVars['model'].eval() 401 | if mdlParams['classification']: 402 | print("CV Set ",cv+1) 403 | print("------------------------------------") 404 | # Training err first, deactivated 405 | if 'trainInd' in mdlParams and False: 406 | loss, accuracy, sensitivity, specificity, conf_matrix, f1, auc, waccuracy, predictions, targets, _ = utils.getErrClassification_mgpu(mdlParams, 'trainInd', modelVars) 407 | print("Training Results:") 408 | print("----------------------------------") 409 | print("Loss",np.mean(loss)) 410 | print("F1 Score",f1) 411 | print("Sensitivity",sensitivity) 412 | print("Specificity",specificity) 413 | print("Accuracy",accuracy) 414 | print("Per Class Accuracy",waccuracy) 415 | print("Weighted Accuracy",waccuracy) 416 | print("AUC",auc) 417 | print("Mean AUC", np.mean(auc)) 418 | if 'valInd' in mdlParams and (len(sys.argv) <= 8): 419 | loss, accuracy, sensitivity, specificity, conf_matrix, f1, auc, waccuracy, predictions, targets, predictions_mc = utils.getErrClassification_mgpu(mdlParams, 'valInd', modelVars) 420 | print("Validation Results:") 421 | print("----------------------------------") 422 | print("Loss",np.mean(loss)) 423 | print("F1 Score",f1) 424 | print("Sensitivity",sensitivity) 425 | print("Specificity",specificity) 426 | print("Accuracy",accuracy) 427 | print("Per Class Accuracy",waccuracy) 428 | print("Weighted Accuracy",np.mean(waccuracy)) 429 | print("AUC",auc) 430 | print("Mean AUC", np.mean(auc)) 431 | # Save results in dict 432 | if 'testInd' not in mdlParams: 433 | allData['f1Best'][cv] = f1 434 | allData['sensBest'][cv,:] = sensitivity 435 | allData['specBest'][cv,:] = specificity 436 | allData['accBest'][cv] = accuracy 437 | allData['waccBest'][cv,:] = waccuracy 438 | allData['aucBest'][cv,:] = auc 439 | allData['bestPred'][cv] = predictions 440 | allData['bestPredMC'][cv] = predictions_mc 441 | allData['targets'][cv] = targets 442 | print("Pred shape",predictions.shape,"Tar shape",targets.shape) 443 | if 'testInd' in mdlParams: 444 | loss, accuracy, sensitivity, specificity, conf_matrix, f1, auc, waccuracy, predictions, targets, predictions_mc = utils.getErrClassification_mgpu(mdlParams, 'testInd', modelVars) 445 | print("Test Results Normal:") 446 | print("----------------------------------") 447 | print("Loss",np.mean(loss)) 448 | print("F1 Score",f1) 449 | print("Sensitivity",sensitivity) 450 | print("Specificity",specificity) 451 | print("Accuracy",accuracy) 452 | print("Per Class Accuracy",waccuracy) 453 | print("Weighted Accuracy",np.mean(waccuracy)) 454 | print("AUC",auc) 455 | print("Mean AUC", np.mean(auc)) 456 | # Save results in dict 457 | allData['f1Best'][cv] = f1 458 | allData['sensBest'][cv,:] = sensitivity 459 | allData['specBest'][cv,:] = specificity 460 | allData['accBest'][cv] = accuracy 461 | allData['waccBest'][cv,:] = waccuracy 462 | allData['aucBest'][cv,:] = auc 463 | else: 464 | # TODO: Regression 465 | print("Not Implemented") 466 | # If there is an 8th argument, make extra evaluation for external set 467 | if len(sys.argv) > 8: 468 | for cv in range(mdlParams['numCV']): 469 | # Reset model graph 470 | importlib.reload(models) 471 | #importlib.reload(torchvision) 472 | # Collect model variables 473 | modelVars = {} 474 | modelVars['device'] = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 475 | # define new folder, take care that there might be no labels 476 | print("Creating predictions for path ",sys.argv[8]) 477 | # Add meta data 478 | if mdlParams.get('meta_features',None) is not None: 479 | mdlParams['meta_dict'] = {} 480 | path1 = mdlParams['dataDir'] + '/meta_data/test_rez3_ll/meta_data_test.pkl' 481 | # Open and load 482 | with open(path1,'rb') as f: 483 | meta_data = pickle.load(f) 484 | # Write into dict 485 | for k in range(len(meta_data['im_name'])): 486 | feature_vector = [] 487 | if 'age_oh' in mdlParams['meta_features']: 488 | if mdlParams['encode_nan']: 489 | feature_vector.append(meta_data['age_oh'][k,:]) 490 | else: 491 | feature_vector.append(meta_data['age_oh'][k,1:]) 492 | if 'age_num' in mdlParams['meta_features']: 493 | feature_vector.append(np.array([meta_data['age_num'][k]])) 494 | if 'loc_oh' in mdlParams['meta_features']: 495 | if mdlParams['encode_nan']: 496 | feature_vector.append(meta_data['loc_oh'][k,:]) 497 | else: 498 | feature_vector.append(meta_data['loc_oh'][k,1:]) 499 | if 'sex_oh' in mdlParams['meta_features']: 500 | if mdlParams['encode_nan']: 501 | feature_vector.append(meta_data['sex_oh'][k,:]) 502 | else: 503 | feature_vector.append(meta_data['sex_oh'][k,1:]) 504 | 505 | #print(feature_vector) 506 | feature_vector = np.concatenate(feature_vector,axis=0) 507 | #print("feature vector shape",feature_vector.shape) 508 | mdlParams['meta_dict'][meta_data['im_name'][k]] = feature_vector 509 | # Define the path 510 | path1 = sys.argv[8] 511 | # All files in that set 512 | files = sorted(glob(path1+'/*')) 513 | # Define new paths 514 | mdlParams['im_paths'] = [] 515 | mdlParams['meta_list'] = [] 516 | for j in range(len(files)): 517 | inds = [int(s) for s in re.findall(r'\d+',files[j])] 518 | if 'ISIC_' in files[j]: 519 | mdlParams['im_paths'].append(files[j]) 520 | if mdlParams.get('meta_features',None) is not None: 521 | for key in mdlParams['meta_dict']: 522 | if key in files[j]: 523 | mdlParams['meta_list'].append(mdlParams['meta_dict'][key]) 524 | if mdlParams.get('meta_features',None) is not None: 525 | # Meta data 526 | mdlParams['meta_array'] = np.array(mdlParams['meta_list']) 527 | # Add empty labels 528 | mdlParams['labels_array'] = np.zeros([len(mdlParams['im_paths']),mdlParams['numClasses']],dtype=np.float32) 529 | # Define everything as a valind set 530 | mdlParams['valInd'] = np.array(np.arange(len(mdlParams['im_paths']))) 531 | mdlParams['trainInd'] = mdlParams['valInd'] 532 | if mdlParams.get('var_im_size',False): 533 | # Crop positions, always choose multiCropEval to be 4, 9, 16, 25, etc. 534 | mdlParams['cropPositions'] = np.zeros([len(mdlParams['im_paths']),mdlParams['multiCropEval'],2],dtype=np.int64) 535 | #mdlParams['imSizes'] = np.zeros([len(mdlParams['im_paths']),mdlParams['multiCropEval'],2],dtype=np.int64) 536 | for u in range(len(mdlParams['im_paths'])): 537 | height, width = imagesize.get(mdlParams['im_paths'][u]) 538 | if width < mdlParams['input_size'][0]: 539 | height = int(mdlParams['input_size'][0]/float(width))*height 540 | width = mdlParams['input_size'][0] 541 | if height < mdlParams['input_size'][0]: 542 | width = int(mdlParams['input_size'][0]/float(height))*width 543 | height = mdlParams['input_size'][0] 544 | if mdlParams.get('resize_large_ones') is not None: 545 | if width == mdlParams['large_size'] and height == mdlParams['large_size']: 546 | width, height = (mdlParams['resize_large_ones'],mdlParams['resize_large_ones']) 547 | ind = 0 548 | for i in range(np.int32(np.sqrt(mdlParams['multiCropEval']))): 549 | for j in range(np.int32(np.sqrt(mdlParams['multiCropEval']))): 550 | mdlParams['cropPositions'][u,ind,0] = mdlParams['input_size'][0]/2+i*((width-mdlParams['input_size'][1])/(np.sqrt(mdlParams['multiCropEval'])-1)) 551 | mdlParams['cropPositions'][u,ind,1] = mdlParams['input_size'][1]/2+j*((height-mdlParams['input_size'][0])/(np.sqrt(mdlParams['multiCropEval'])-1)) 552 | #mdlParams['imSizes'][u,ind,0] = curr_im_size[0] 553 | 554 | ind += 1 555 | # Sanity checks 556 | #print("Positions",mdlParams['cropPositions']) 557 | # Test image sizes 558 | test_im = np.zeros(mdlParams['input_size_load']) 559 | height = mdlParams['input_size'][0] 560 | width = mdlParams['input_size'][1] 561 | for u in range(len(mdlParams['im_paths'])): 562 | height_test, width_test = imagesize.get(mdlParams['im_paths'][u]) 563 | if width_test < mdlParams['input_size'][0]: 564 | height_test = int(mdlParams['input_size'][0]/float(width_test))*height_test 565 | width_test = mdlParams['input_size'][0] 566 | if height_test < mdlParams['input_size'][0]: 567 | width_test = int(mdlParams['input_size'][0]/float(height_test))*width_test 568 | height_test = mdlParams['input_size'][0] 569 | if mdlParams.get('resize_large_ones') is not None: 570 | if width_test == mdlParams['large_size'] and height_test == mdlParams['large_size']: 571 | width_test, height_test = (mdlParams['resize_large_ones'],mdlParams['resize_large_ones']) 572 | test_im = np.zeros([width_test,height_test]) 573 | for i in range(mdlParams['multiCropEval']): 574 | im_crop = test_im[np.int32(mdlParams['cropPositions'][u,i,0]-height/2):np.int32(mdlParams['cropPositions'][u,i,0]-height/2)+height,np.int32(mdlParams['cropPositions'][u,i,1]-width/2):np.int32(mdlParams['cropPositions'][u,i,1]-width/2)+width] 575 | if im_crop.shape[0] != mdlParams['input_size'][0]: 576 | print("Wrong shape",im_crop.shape[0],mdlParams['im_paths'][u]) 577 | if im_crop.shape[1] != mdlParams['input_size'][1]: 578 | print("Wrong shape",im_crop.shape[1],mdlParams['im_paths'][u]) 579 | mdlParams['saveDir'] = mdlParams['saveDirBase'] + '/CVSet' + str(cv) 580 | # balance classes 581 | if mdlParams['balance_classes'] < 3 or mdlParams['balance_classes'] == 7 or mdlParams['balance_classes'] == 11: 582 | class_weights = class_weight.compute_class_weight('balanced',np.unique(np.argmax(mdlParams['labels_array'][mdlParams['trainInd'],:],1)),np.argmax(mdlParams['labels_array'][mdlParams['trainInd'],:],1)) 583 | print("Current class weights",class_weights) 584 | class_weights = class_weights*mdlParams['extra_fac'] 585 | print("Current class weights with extra",class_weights) 586 | elif mdlParams['balance_classes'] == 3 or mdlParams['balance_classes'] == 4: 587 | # Split training set by classes 588 | not_one_hot = np.argmax(mdlParams['labels_array'],1) 589 | mdlParams['class_indices'] = [] 590 | for i in range(mdlParams['numClasses']): 591 | mdlParams['class_indices'].append(np.where(not_one_hot==i)[0]) 592 | # Kick out non-trainind indices 593 | mdlParams['class_indices'][i] = np.setdiff1d(mdlParams['class_indices'][i],mdlParams['valInd']) 594 | #print("Class",i,mdlParams['class_indices'][i].shape,np.min(mdlParams['class_indices'][i]),np.max(mdlParams['class_indices'][i]),np.sum(mdlParams['labels_array'][np.int64(mdlParams['class_indices'][i]),:],0)) 595 | elif mdlParams['balance_classes'] == 5 or mdlParams['balance_classes'] == 6 or mdlParams['balance_classes'] == 13: 596 | # Other class balancing loss 597 | class_weights = 1.0/np.mean(mdlParams['labels_array'][mdlParams['trainInd'],:],axis=0) 598 | print("Current class weights",class_weights) 599 | class_weights = class_weights*mdlParams['extra_fac'] 600 | print("Current class weights with extra",class_weights) 601 | elif mdlParams['balance_classes'] == 9: 602 | # Only use official indicies for calculation 603 | print("Balance 9") 604 | indices_ham = mdlParams['trainInd'][mdlParams['trainInd'] < 25331] 605 | if mdlParams['numClasses'] == 9: 606 | class_weights_ = 1.0/np.mean(mdlParams['labels_array'][indices_ham,:8],axis=0) 607 | #print("class before",class_weights_) 608 | class_weights = np.zeros([mdlParams['numClasses']]) 609 | class_weights[:8] = class_weights_ 610 | class_weights[-1] = np.max(class_weights_) 611 | else: 612 | class_weights = 1.0/np.mean(mdlParams['labels_array'][indices_ham,:],axis=0) 613 | print("Current class weights",class_weights) 614 | if isinstance(mdlParams['extra_fac'], float): 615 | class_weights = np.power(class_weights,mdlParams['extra_fac']) 616 | else: 617 | class_weights = class_weights*mdlParams['extra_fac'] 618 | print("Current class weights with extra",class_weights) 619 | 620 | 621 | # Set up dataloaders 622 | # Meta scaler 623 | if mdlParams.get('meta_features',None) is not None and mdlParams['scale_features']: 624 | mdlParams['feature_scaler_meta'] = sklearn.preprocessing.StandardScaler().fit(mdlParams['meta_array'][mdlParams['trainInd'],:]) 625 | #print("scaler mean",mdlParams['feature_scaler_meta'].mean_,"var",mdlParams['feature_scaler_meta'].var_) 626 | # For train 627 | dataset_train = utils.ISICDataset(mdlParams, 'trainInd') 628 | # For val 629 | dataset_val = utils.ISICDataset(mdlParams, 'valInd') 630 | if mdlParams['multiCropEval'] > 0: 631 | modelVars['dataloader_valInd'] = DataLoader(dataset_val, batch_size=mdlParams['multiCropEval'], shuffle=False, num_workers=8, pin_memory=True) 632 | else: 633 | modelVars['dataloader_valInd'] = DataLoader(dataset_val, batch_size=mdlParams['batchSize'], shuffle=False, num_workers=8, pin_memory=True) 634 | modelVars['dataloader_trainInd'] = DataLoader(dataset_train, batch_size=mdlParams['batchSize'], shuffle=True, num_workers=8, pin_memory=True) 635 | 636 | 637 | # Define model 638 | modelVars['model'] = models.getModel(mdlParams)() 639 | if 'Dense' in mdlParams['model_type']: 640 | if mdlParams['input_size'][0] != 224: 641 | modelVars['model'] = utils.modify_densenet_avg_pool(modelVars['model']) 642 | #print(modelVars['model']) 643 | num_ftrs = modelVars['model'].classifier.in_features 644 | modelVars['model'].classifier = nn.Linear(num_ftrs, mdlParams['numClasses']) 645 | #print(modelVars['model']) 646 | elif 'dpn' in mdlParams['model_type']: 647 | num_ftrs = modelVars['model'].classifier.in_channels 648 | modelVars['model'].classifier = nn.Conv2d(num_ftrs,mdlParams['numClasses'],[1,1]) 649 | #modelVars['model'].add_module('real_classifier',nn.Linear(num_ftrs, mdlParams['numClasses'])) 650 | #print(modelVars['model']) 651 | elif 'efficient' in mdlParams['model_type']: 652 | # Do nothing, output is prepared 653 | num_ftrs = modelVars['model']._fc.in_features 654 | modelVars['model']._fc = nn.Linear(num_ftrs, mdlParams['numClasses']) 655 | elif 'wsl' in mdlParams['model_type']: 656 | num_ftrs = modelVars['model'].fc.in_features 657 | modelVars['model'].fc = nn.Linear(num_ftrs, mdlParams['numClasses']) 658 | else: 659 | num_ftrs = modelVars['model'].last_linear.in_features 660 | modelVars['model'].last_linear = nn.Linear(num_ftrs, mdlParams['numClasses']) 661 | # modify model 662 | if mdlParams.get('meta_features',None) is not None: 663 | modelVars['model'] = models.modify_meta(mdlParams,modelVars['model']) 664 | modelVars['model'] = modelVars['model'].to(modelVars['device']) 665 | #summary(modelVars['model'], (mdlParams['input_size'][2], mdlParams['input_size'][0], mdlParams['input_size'][1])) 666 | # Loss, with class weighting 667 | # Loss, with class weighting 668 | if mdlParams['balance_classes'] == 3 or mdlParams['balance_classes'] == 0 or mdlParams['balance_classes'] == 12: 669 | modelVars['criterion'] = nn.CrossEntropyLoss() 670 | elif mdlParams['balance_classes'] == 8: 671 | modelVars['criterion'] = nn.CrossEntropyLoss(reduce=False) 672 | elif mdlParams['balance_classes'] == 6 or mdlParams['balance_classes'] == 7: 673 | modelVars['criterion'] = nn.CrossEntropyLoss(weight=torch.cuda.FloatTensor(class_weights.astype(np.float32)),reduce=False) 674 | elif mdlParams['balance_classes'] == 10: 675 | modelVars['criterion'] = utils.FocalLoss(mdlParams['numClasses']) 676 | elif mdlParams['balance_classes'] == 11: 677 | modelVars['criterion'] = utils.FocalLoss(mdlParams['numClasses'],alpha=torch.cuda.FloatTensor(class_weights.astype(np.float32))) 678 | else: 679 | modelVars['criterion'] = nn.CrossEntropyLoss(weight=torch.cuda.FloatTensor(class_weights.astype(np.float32))) 680 | # Observe that all parameters are being optimized 681 | modelVars['optimizer'] = optim.Adam(modelVars['model'].parameters(), lr=mdlParams['learning_rate']) 682 | 683 | # Decay LR by a factor of 0.1 every 7 epochs 684 | modelVars['scheduler'] = lr_scheduler.StepLR(modelVars['optimizer'], step_size=mdlParams['lowerLRAfter'], gamma=1/np.float32(mdlParams['LRstep'])) 685 | 686 | # Define softmax 687 | modelVars['softmax'] = nn.Softmax(dim=1) 688 | 689 | # Manually find latest chekcpoint, tf.train.latest_checkpoint is doing weird shit 690 | files = glob(mdlParams['saveDir']+'/*') 691 | global_steps = np.zeros([len(files)]) 692 | for i in range(len(files)): 693 | # Use meta files to find the highest index 694 | if 'checkpoint' not in files[i]: 695 | continue 696 | if mdlParams['ckpt_name'] not in files[i]: 697 | continue 698 | # Extract global step 699 | nums = [int(s) for s in re.findall(r'\d+',files[i])] 700 | global_steps[i] = nums[-1] 701 | # Create path with maximum global step found, if first is not wanted 702 | global_steps = np.sort(global_steps) 703 | if mdlParams.get('use_first') is not None: 704 | chkPath = mdlParams['saveDir'] + '/' + mdlParams['ckpt_name'] + str(int(global_steps[-2])) + '.pt' 705 | else: 706 | chkPath = mdlParams['saveDir'] + '/' + mdlParams['ckpt_name'] + str(int(np.max(global_steps))) + '.pt' 707 | print("Restoring: ",chkPath) 708 | 709 | # Load 710 | state = torch.load(chkPath) 711 | # Initialize model and optimizer 712 | modelVars['model'].load_state_dict(state['state_dict']) 713 | #modelVars['optimizer'].load_state_dict(state['optimizer']) 714 | # Get predictions or learn on pred 715 | modelVars['model'].eval() 716 | # Get predictions 717 | # Turn off the skipping of the last class 718 | mdlParams['no_c9_eval'] = False 719 | loss, accuracy, sensitivity, specificity, conf_matrix, f1, auc, waccuracy, predictions, targets, predictions_mc = utils.getErrClassification_mgpu(mdlParams, 'valInd', modelVars) 720 | # Save predictions 721 | allData['extPred'][cv] = predictions 722 | print("extPred shape",allData['extPred'][cv].shape) 723 | pklFileName = sys.argv[2] + "_" + sys.argv[6] + "_" + str(int(np.max(global_steps))) + "_predn.pkl" 724 | 725 | # Mean results over all folds 726 | np.set_printoptions(precision=4) 727 | print("-------------------------------------------------") 728 | print("Mean over all Folds") 729 | print("-------------------------------------------------") 730 | print("F1 Score",np.array([np.mean(allData['f1Best'])]),"+-",np.array([np.std(allData['f1Best'])])) 731 | print("Sensitivtiy",np.mean(allData['sensBest'],0),"+-",np.std(allData['sensBest'],0)) 732 | print("Specificity",np.mean(allData['specBest'],0),"+-",np.std(allData['specBest'],0)) 733 | print("Mean Specificity",np.array([np.mean(allData['specBest'])]),"+-",np.array([np.std(np.mean(allData['specBest'],1))])) 734 | print("Accuracy",np.array([np.mean(allData['accBest'])]),"+-",np.array([np.std(allData['accBest'])])) 735 | print("Per Class Accuracy",np.mean(allData['waccBest'],0),"+-",np.std(allData['waccBest'],0)) 736 | print("Weighted Accuracy",np.array([np.mean(allData['waccBest'])]),"+-",np.array([np.std(np.mean(allData['waccBest'],1))])) 737 | print("AUC",np.mean(allData['aucBest'],0),"+-",np.std(allData['aucBest'],0)) 738 | print("Mean AUC",np.array([np.mean(allData['aucBest'])]),"+-",np.array([np.std(np.mean(allData['aucBest'],1))])) 739 | # Save dict with results 740 | with open(mdlParams['saveDirBase'] + "/" + pklFileName, 'wb') as f: 741 | pickle.dump(allData, f, pickle.HIGHEST_PROTOCOL) 742 | --------------------------------------------------------------------------------