├── C_optimization.m ├── CancerVisualization.m ├── ExtractProstateRegion.m ├── Outlier_Remove.m ├── ProstateRegionfunc.m ├── ProstateRegionfunc_over11.m ├── README.md ├── ROCofSVM.m ├── classifiers_error_rate.m ├── decisionboundary_SVM.m ├── decisionboundary_knn.m ├── decisionboundary_nm.m ├── k_optimization.m ├── knnfeatureselection5to2.m ├── main_ProstateCancerSegmentation.m ├── nearest_mean_classifier.m ├── nmfeatureselection5to2.m ├── normalization.m ├── partioning.m ├── post_processing.m ├── prostate_knn_classifier.m ├── svm_classf.m ├── svmfeatureselection5to2.m └── testfileSelection.m /C_optimization.m: -------------------------------------------------------------------------------- 1 | function [max_accuracy_svm,C_opt]=C_optimization(featuredata_new,trainingdata_selected) 2 | % functions to optimize the parameter C of SVM using simple method 3 | % return a plot and the the optimal C and the best accuracy 4 | %% Author Information 5 | % Hao Wang,Wangbo Zheng 6 | % patrecgroup08 7 | % University of Stuttgart 8 | %% 9 | a = 0.40:0.01:0.52; % the range should be selected different each time. 10 | accuracy_rate = arrayfun(@(c) svm_classf2(featuredata_new,trainingdata_selected,feature_training,feature_test,c),a); 11 | [max_accuracy_svm,c_ind] = max(accuracy_rate); 12 | C_opt=a(c_ind); 13 | % plot 14 | plot(a,accuracy_rate,'r:+'); 15 | xlabel('parameter c'), 16 | ylabel('accuracy'), title('C Optimization'); 17 | grid on; 18 | %% svm function almost same like svm_classf but only one output 19 | function accuracy=svm_classf2(feature_data,trainingdata_selected,feature_training,feature_test,c) 20 | y_train=trainingdata_selected(:,9); 21 | libsvm_options=['-c ' num2str(c) ' -g 0.07 -b 1']; 22 | model=svmtrain(y_train,feature_training,libsvm_options); 23 | testing_label_vector=feature_data.test(:,9); 24 | [predicted_label,accuracy,prob_estimates]=svmpredict(testing_label_vector,feature_test,model, '-b 1'); -------------------------------------------------------------------------------- /CancerVisualization.m: -------------------------------------------------------------------------------- 1 | function [datasetlabelsNMC,datasetlabelsknn,datasetlabelssvm]=CancerVisualization(dataset,feature_data,NMC_opMATRIX,kNN_opMATRIX,predicted_label,PatientID) 2 | % function for visualize the result of three classifiers,based on the 3 | % Imagine app. 4 | % the input: dataset - medical data set 5 | % feature_data_our feature dataset structure(only infos of prostate region) 6 | % NMC_opMATRIX - nearest mean output matrix contains the estimated 7 | % labels in the first column and the true labels in the second. 8 | % kNN_opMATRIX - kNN output matrix contains the estimated 9 | % labels in the first column and the true labels in the second. 10 | % predicted_label- output of svm classifier: estimated labels 11 | % PatientID- ID number of patient,here we use 12,13,14 12 | % the output:datasetlabelsNMC,datasetlabelsknn,datasetlabelssvm are the 13 | % estimated labels with position information. 14 | % return a interface with 5 pictures ,the first is the T2 weighted 15 | % image,the second is the histological labels,and then the results of three 16 | % classifiers nearest mean,kNN and SVM 17 | %% Author Information 18 | % Hao Wang,Wangbo Zheng 19 | % patrecgroup08 20 | % University of Stuttgart 21 | %% 22 | % for nearest mean classifier 23 | datasetlabelsNMC=dataset{PatientID,1}.labelsHisto;% histological labels 24 | datasetlabelsNMC(datasetlabelsNMC~=0)=0; 25 | NMClabelsandposition=[feature_data.test(:,6:8) NMC_opMATRIX(:,1) feature_data.test(:,10)];% position information and label and PatientID 26 | row_index1=NMClabelsandposition(:,5)==PatientID;% select one patient 27 | NMClabelsandpositionP=NMClabelsandposition(row_index1,:); 28 | linearInd = sub2ind(size(datasetlabelsNMC), NMClabelsandpositionP(:,1), NMClabelsandpositionP(:,2), NMClabelsandpositionP(:,3)); 29 | datasetlabelsNMC(linearInd)=NMClabelsandpositionP(:,4);% estimated result with position information 30 | % for knn 31 | datasetlabelsknn=dataset{PatientID,1}.labelsHisto;% histological labels, 32 | datasetlabelsknn(datasetlabelsknn~=0)=0; 33 | knnlabelsandposition=[feature_data.test(:,6:8) kNN_opMATRIX(:,1) feature_data.test(:,10)];% position information and label and PatientID 34 | row_index1=knnlabelsandposition(:,5)==PatientID;% select one patient 35 | knnlabelsandpositionP=knnlabelsandposition(row_index1,:); 36 | linearInd = sub2ind(size(datasetlabelsknn), knnlabelsandpositionP(:,1), knnlabelsandpositionP(:,2), knnlabelsandpositionP(:,3)); 37 | datasetlabelsknn(linearInd)=knnlabelsandpositionP(:,4);% estimated result with position information 38 | % for svm 39 | datasetlabelssvm=dataset{PatientID,1}.labelsHisto; 40 | datasetlabelssvm(datasetlabelssvm~=0)=0; 41 | svmlabelsandposition=[feature_data.test(:,6:8) predicted_label feature_data.test(:,10)];% position information and label and PatientID 42 | row_index1=svmlabelsandposition(:,5)==PatientID;% select one patient 43 | svmlabelsandpositionP=svmlabelsandposition(row_index1,:); 44 | linearInd = sub2ind(size(datasetlabelssvm), svmlabelsandpositionP(:,1), svmlabelsandpositionP(:,2), svmlabelsandpositionP(:,3)); 45 | datasetlabelssvm(linearInd)=svmlabelsandpositionP(:,4);% estimated result with position information 46 | %% pictures 47 | hImgAxes = imagine(dataset{PatientID,1}.Image(:,:,:,5),'T2',dataset{PatientID,1}.labelsHisto,'Histo',datasetlabelsNMC,'nearest mean',... 48 | datasetlabelsknn,'kNN',datasetlabelssvm,'SVM'); -------------------------------------------------------------------------------- /ExtractProstateRegion.m: -------------------------------------------------------------------------------- 1 | function Extract_features=ExtractProstateRegion 2 | % functions to isolate feature vectors belonging to prostate tissue from 3 | % the whole data set. There are two subfunctions ,the first one is function 4 | % for patients Nr.1-Nr.11 of dataset.mat .The second is function for patients 5 | % Nr.12-Nr.14. The output is a 10 rows matrix,which contains 5 rows 6 | % features and 3 rows dimensions,label and the patientID. 7 | %% Author Information 8 | % Hao Wang,Wangbo Zheng 9 | % patrecgroup08 10 | % University of Stuttgart 11 | %% 12 | Extract_features=[]; 13 | for i=1:11 14 | interm_v1=[Extract_features;ProstateRegionfunc(i)]; 15 | Extract_features=interm_v1; 16 | end 17 | for i=12:14 18 | interm_v2=[Extract_features;ProstateRegionfunc_over11(i)]; 19 | Extract_features=interm_v2; 20 | end -------------------------------------------------------------------------------- /Outlier_Remove.m: -------------------------------------------------------------------------------- 1 | function featuredata_new=Outlier_Remove(feature_data) 2 | % function to detection and remove outliers(10% of dataset),using 3 | % mahalanobis distance. 4 | % the input is feature dataset structure. 5 | % the output is a feature dataset structure without outliers 6 | %% Author Information 7 | % Hao Wang,Wangbo Zheng 8 | % patrecgroup08 9 | % University of Stuttgart 10 | %% the 5 features and labels of dataset 11 | fivefeatures_training=feature_data.training(:,1:5); 12 | featurelabels_tranining=feature_data.training(:,end-1); 13 | fivefeatures_test=feature_data.test(:,1:5); 14 | featurelabels_test=feature_data.test(:,end-1); 15 | labels = unique(featurelabels_tranining); 16 | %% detection and remove outliers 17 | mu = zeros(2,5);%initial value of means of two classes 18 | C = cell(2,1);%initial value of covariances of two classes 19 | NrOfData = size(fivefeatures_training,1); 20 | dist = zeros(NrOfData,1);%initial value of mahalanobis distance of two classes 21 | fivefeaturesPLUSdist = [feature_data.training dist]; 22 | Dataout=[]; 23 | for c=1:2 24 | mu(c,:) = mean(fivefeatures_test(featurelabels_test==labels(c),:));%determine means using test dataset 25 | C{c} = cov(fivefeatures_test(featurelabels_test==labels(c),:));%determine covariances using test dataset 26 | tmp = bsxfun(@minus,fivefeatures_training(featurelabels_tranining==labels(c),:),mu(c,:))/chol(C{c}); 27 | intermc1=fivefeaturesPLUSdist(featurelabels_tranining==labels(c),:); 28 | intermc1(:,11) = sum(tmp.*tmp,2);%calculator mahalanobis distance 29 | NrSumData = size(intermc1,1); 30 | Nr_outlier=round(NrSumData/10);%difinition of outlier: 10% features data 31 | % of the training dataset,which have most largest distance 32 | [B I]=sort(intermc1(:,11)); 33 | index_new=I(1:NrSumData-Nr_outlier);% remove the outliers 34 | intermc2=[Dataout;intermc1(index_new,:)]; 35 | Dataout=intermc2; 36 | end 37 | featuredata_new.training=Dataout(:,1:10); 38 | featuredata_new.test=feature_data.test; -------------------------------------------------------------------------------- /ProstateRegionfunc.m: -------------------------------------------------------------------------------- 1 | function index4dim=ProstateRegionfunc(i) 2 | % functions to isolate feature vectors belonging to prostate tissue of 3 | % patients Nr.1-Nr.11 of from the whole data set . 4 | % The input i is the patients ID number. 5 | % The output is a 10 rows matrix,which contains 5 rows features and 3 rows 6 | % dimensions,label and the patientID. 7 | %% Author Information 8 | % Hao Wang,Wangbo Zheng 9 | % patrecgroup08 10 | % University of Stuttgart 11 | %% load dataset 12 | load dataset.mat 13 | %% ground truth 14 | % the ground truth :label=0,if labelA not equals to labelB,label=labelA, 15 | % if labelA = labelB. label=0 for non-prostate tissue,label=1 for healthy 16 | % prostate tissue,label=2 for cancer prostate tissue. 17 | data1=dataset{i,1}.LabelsA; 18 | data2=dataset{i,1}.LabelsB;% data1 & data2 are manual labels from two experts 19 | dataprostate=data1.*data2;% a combination of the two expert labels 20 | dataprostate(dataprostate==2)=0;% label=0,if labelA not equals to labelB 21 | dataprostate(dataprostate==4)=2;% label=2,if labelA = labelB=2 22 | labelcancer=dataprostate; 23 | nonzerolabel=labelcancer(labelcancer~=0);% the prostate region labels 24 | %% extract the patienID and the dimension infomation 25 | patientInd=nonzerolabel; 26 | patientInd(:)=i; % PatientID 27 | dataprostate(dataprostate~=0)=1;% nonzero element means the prostate tissue 28 | % linear index of the nonzero element of the matrix(prostate region) 29 | Ind=find(dataprostate); 30 | [I,J,K]=ind2sub(size(dataprostate),Ind);% row,column,page,feature information 31 | %% extract 5 features 32 | image=dataset{i,1}.Image; 33 | for k=1:5 34 | % prostate region of image(3D),which also includes 5 features 35 | region=dataprostate.*image(:,:,:,k); 36 | feature(:,k) = region(:); 37 | end 38 | feature(all(feature==0,2),:)=[]; % throw away zero elements.or:nonzeroregion=feature(feature~=0) 39 | % transform T2 weighted MR image to the range [0,1] 40 | T2=feature(:,5); 41 | T201=(T2-min(T2))/(max(T2)-min(T2)); 42 | feature(:,5)=T201; 43 | 44 | %% output 45 | index4dim=[feature,I,J,K,nonzerolabel,patientInd]; 46 | 47 | 48 | 49 | 50 | -------------------------------------------------------------------------------- /ProstateRegionfunc_over11.m: -------------------------------------------------------------------------------- 1 | function index4dim=ProstateRegionfunc_over11(i) 2 | % functions to isolate feature vectors belonging to prostate tissue of 3 | % patients Nr.12-Nr.14 of from the whole data set . 4 | % The input i is the patients ID number. 5 | % The output is a 10 rows matrix,which contains 5 rows features and 3 rows 6 | % dimensions,label and the patientID. 7 | %% Author Information 8 | % Hao Wang,Wangbo Zheng 9 | % patrecgroup08 10 | % University of Stuttgart 11 | %% load dataset 12 | load dataset.mat 13 | %% extract the labels,the patienID and the dimension infomation 14 | dataprostate=dataset{i, 1}.labelsHisto; % the labels from an histological analysis 15 | labelcancer=dataprostate; 16 | nonzerolabel=labelcancer(labelcancer~=0);% the labels of prostate region 17 | patientInd=nonzerolabel; 18 | patientInd(:)=i;% PatientID 19 | dataprostate(dataprostate~=0)=1; % nonzero element means the prostate tissue 20 | % linear index of the nonzero element of the matrix(prostate region) 21 | Ind=find(dataprostate); 22 | [I,J,K]=ind2sub(size(dataprostate),Ind); % row,column,page,feature information 23 | %% extract 5 features 24 | image=dataset{i,1}.Image; 25 | image(isnan(image))=0; % remove NaN in dataset 26 | for k=1:5 27 | % prostate region of image(3D),which also includes 5 features 28 | region=dataprostate.*image(:,:,:,k); 29 | feature(:,k) = region(:); 30 | end 31 | feature(all(feature==0,2),:)=[];% throw away zero elements.or:nonzeroregion=feature(feature~=0) 32 | % transform T2 weighted MR image to the range [0,1] 33 | T2=feature(:,5); 34 | T201=(T2-min(T2))/(max(T2)-min(T2)); 35 | feature(:,5)=T201; 36 | %% output 37 | index4dim=[feature,I,J,K,nonzerolabel,patientInd]; 38 | 39 | 40 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Medical-image-segmentation-using-machine-learning 2 | #### Prostate cancer segmentation based on MRI and PET images 3 | Implemented three classifiers (Nearest Mean, kNN and SVM) for automatic prostate cancer segmentation based on MRT and PET images. 4 | -------------------------------------------------------------------------------- /ROCofSVM.m: -------------------------------------------------------------------------------- 1 | function ROCofSVM(predicted_label,prob_estimates) 2 | % function of roc plot for svm 3 | % the input are the output of svm classifier: predicted label and probability 4 | % of estimates 5 | %% Author Information 6 | % Hao Wang,Wangbo Zheng 7 | % patrecgroup08 8 | % University of Stuttgart 9 | %% 10 | predicted_label(predicted_label==2)=0; 11 | plotroc(predicted_label,prob_estimates); -------------------------------------------------------------------------------- /classifiers_error_rate.m: -------------------------------------------------------------------------------- 1 | function er = classifiers_error_rate(NMC_opMATRIX,kNN_opMATRIX,accuracy) 2 | % function to computes the error rate for the the three classifier 3 | % the input:NMC_opMATRIX - nearest mean output matrix contains the estimated 4 | % labels in the first column and the true labels in the second. 5 | % kNN_opMATRIX - kNN output matrix contains the estimated 6 | % labels in the first column and the true labels in the second. 7 | % accuracy - the accuracy of svm in % form 8 | % the output er : error rate of three classifiers 9 | %% Author Information 10 | % Hao Wang,Wangbo Zheng 11 | % patrecgroup08 12 | % University of Stuttgart 13 | %% 14 | er=zeros(1,3); 15 | er(1) = sum(NMC_opMATRIX(:,1)~=NMC_opMATRIX(:,2))/size(NMC_opMATRIX,1); 16 | er(2) = sum(kNN_opMATRIX (:,1)~=kNN_opMATRIX (:,2))/size(kNN_opMATRIX ,1); 17 | er(3) = 1-accuracy(1)/100; 18 | -------------------------------------------------------------------------------- /decisionboundary_SVM.m: -------------------------------------------------------------------------------- 1 | function decisionboundary_SVM(trainingdata_selected) 2 | % function to plot 2 dimensional decision boundary for svm classifier 3 | % the input is selected training set,C is the parameter of SVM 4 | % return a decision boundary plot ,blue x: cancer ,red + for healthy cell 5 | %% Author Information 6 | % Hao Wang,Wangbo Zheng 7 | % patrecgroup08 8 | % University of Stuttgart 9 | %% set dataset for decision boundary 10 | t = trainingdata_selected(:,9);%labels 11 | t(t==2)=0; 12 | DBfeature_data.training = [trainingdata_selected(:,[4 5]) t];% select 2 features 13 | %N = size(DBfeature_data,1); 14 | %% Generate the decision boundaries 15 | [X1 X2] = meshgrid(min(DBfeature_data.training(:,1)):0.01:max(DBfeature_data.training(:,1)),min(DBfeature_data.training(:,2)):0.01:max(DBfeature_data.training(:,2))); 16 | classes = zeros(size(X1));% estimated label 17 | % reset test set using training set 18 | DBfeature_data.test=zeros(length(X1(:)),2); 19 | % Loop over test points 20 | for i = 1:length(X1(:)) 21 | DBfeature_data.test(i,:) = [X1(i) X2(i)]; 22 | end 23 | EVAL_MATRIX=svm_classfTWO(DBfeature_data,0.51); 24 | for i = 1:length(X1(:)) 25 | classes(i) = EVAL_MATRIX(i); 26 | end 27 | ma = {'x','+'};% maker x for cancer + for healthy 28 | fc = {[1 0 0]; [0 0 1]};% red blue 29 | tv = unique(t); 30 | % plot points 31 | figure(2); hold off 32 | for i = 1:length(tv) 33 | pos = find(t==tv(i)); 34 | plot(DBfeature_data.training(pos,1),DBfeature_data.training(pos,2),ma{i},'markerfacecolor',fc{i}); 35 | hold on 36 | end 37 | % plot decision boundary using contour line 38 | contour(X1,X2,classes,[0.5 0.5],'k'); 39 | legend('cancer','healthy'); 40 | title('SVM'); 41 | %% SVM classifier for 2 dimensional feature space 42 | function predicted_label=svm_classfTWO(feature_data,c) 43 | % almost same like svm_classf ,this function only has one output:the 44 | % estimated labels:predicted_label 45 | X_train=feature_data.training(:,1:2); 46 | y_train=feature_data.training(:,3); 47 | libsvm_options=['-c ' num2str(c) ' -g 0.07 -b 1']; 48 | model=svmtrain(y_train,X_train,libsvm_options); 49 | testing_label_vector=feature_data.test(:,end); 50 | [predicted_label,accuracy,prob_estimates]=svmpredict(testing_label_vector,feature_data.test,model, '-b 1'); -------------------------------------------------------------------------------- /decisionboundary_knn.m: -------------------------------------------------------------------------------- 1 | function decisionboundary_knn(trainingdata_selected,K) 2 | % function to plot 2 dimensional decision boundary for kNN classifier 3 | % the input is selected training set,K is the parameter of kNN 4 | % return a decision boundary plot,blue x: cancer ,red + for healthy cell 5 | %% Author Information 6 | % Hao Wang,Wangbo Zheng 7 | % patrecgroup08 8 | % University of Stuttgart 9 | %% set dataset for decision boundary 10 | t = trainingdata_selected(:,9);%labels 11 | t(t==2)=0; 12 | DBfeature_data.training = [trainingdata_selected(:,[4 5]) t];% select 2 features 13 | %N = size(DBfeature_data,1); 14 | %% Generate the decision boundaries 15 | [X1 X2] = meshgrid(min(DBfeature_data.training(:,1)):0.1:max(DBfeature_data.training(:,1)),min(DBfeature_data.training(:,2)):0.1:max(DBfeature_data.training(:,2))); 16 | classes = zeros(size(X1));% estimated label 17 | % reset test set using training set 18 | DBfeature_data.test=zeros(length(X1(:)),2); 19 | % Loop over test points 20 | for i = 1:length(X1(:)) 21 | DBfeature_data.test(i,:) = [X1(i) X2(i)]; 22 | end; 23 | EVAL_MATRIX=prostate_knn_classifierTWO(DBfeature_data,K); 24 | for i = 1:length(X1(:)) 25 | classes(i) = EVAL_MATRIX(i); 26 | end; 27 | ma = {'x','+'};% maker x for cancer + for healthy 28 | fc = {[1 0 0]; [0 0 1]};% red blue 29 | tv = unique(t); 30 | figure(1); hold off 31 | % plot points 32 | for i = 1:length(tv) 33 | pos = find(t==tv(i)); 34 | plot(DBfeature_data.training(pos,1),DBfeature_data.training(pos,2),ma{i},'markerfacecolor',fc{i}); 35 | hold on 36 | end; 37 | % plot decision boundary using contour line 38 | contour(X1,X2,classes,[0.5 0.5],'k'); 39 | legend('cancer','healthy'); 40 | ti = sprintf('kNN classifier K = %g',K); 41 | title(ti); 42 | function EVAL_MATRIX=prostate_knn_classifierTWO(feature_data,k) 43 | % kNN classifier for two dimensional feature 44 | R=feature_data.test(:,1:2); 45 | Q=feature_data.training(:,1:2); 46 | group=feature_data.training(:,end); 47 | EVAL_MATRIX=knnclassify(R,Q,group,k); -------------------------------------------------------------------------------- /decisionboundary_nm.m: -------------------------------------------------------------------------------- 1 | function decisionboundary_nm(trainingdata_selected) 2 | % function to plot 2 dimensional decision boundary for nearest mean classifier 3 | % the input is selected training set 4 | % return a decision boundary plot,blue x: cancer ,red + for healthy cell 5 | %% Author Information 6 | % Hao Wang,Wangbo Zheng 7 | % patrecgroup08 8 | % University of Stuttgart 9 | %% set dataset for decision boundary 10 | t = trainingdata_selected(:,9);%labels 11 | t(t==2)=0; 12 | DBfeature_data.training = [trainingdata_selected(:,[4 5]) t];% select 2 features 13 | %N = size(DBfeature_data,1); 14 | %% Generate the decision boundaries 15 | [X1 X2] = meshgrid(min(DBfeature_data.training(:,1)):0.01:max(DBfeature_data.training(:,1)),min(DBfeature_data.training(:,2)):0.01:max(DBfeature_data.training(:,2))); 16 | classes = zeros(size(X1));% estimated label 17 | % reset test set using training set 18 | DBfeature_data.test=zeros(length(X1(:)),2); 19 | % Loop over test points 20 | for i = 1:length(X1(:)) 21 | DBfeature_data.test(i,:) = [X1(i) X2(i)]; 22 | end; 23 | NMC_opMATRIX = nearest_mean_classifierTWO(DBfeature_data); 24 | for i = 1:length(X1(:)) 25 | classes(i) = NMC_opMATRIX(i); 26 | end; 27 | ma = {'x','+'};% maker x for cancer + for healthy 28 | fc = {[1 0 0]; [0 0 1]};% red blue 29 | tv = unique(t); 30 | % plot points 31 | figure(1); hold off 32 | for i = 1:length(tv) 33 | pos = find(t==tv(i)); 34 | plot(DBfeature_data.training(pos,1),DBfeature_data.training(pos,2),ma{i},'markerfacecolor',fc{i}); 35 | hold on 36 | end; 37 | % plot decision boundary using contour line 38 | contour(X1,X2,classes,[0.5 0.5],'k'); 39 | legend('cancer','healthy'); 40 | title('Nearest mean classifier'); 41 | %% nearest mean classifier for 2 dimensional feature space 42 | function NMC_opMATRIX = nearest_mean_classifierTWO(feature_data) 43 | % almost same like the 5 feature nearest mean classifier 44 | % training 45 | feature_training = feature_data.training(:,1:2); 46 | y_train = feature_data.training(:,3); 47 | 48 | labels = unique(y_train); 49 | d = size(feature_training,2); 50 | mu_nmc = zeros(2,d); 51 | C_nmc = cell(2,1); 52 | for c=1:2 53 | mu_nmc(c,:) = mean(feature_training(y_train==labels(c),:)); 54 | C_nmc{c} = cov(feature_training(y_train==labels(c),:)); 55 | end 56 | % test 57 | feature_test = feature_data.test(:,1:2); 58 | N_TEST = size(feature_test,1); 59 | NMC_opMATRIX = zeros(N_TEST,1); 60 | % estimate the labels for the samples of the test set 61 | 62 | dist = zeros(N_TEST,2); 63 | for c=1:2 64 | tmp = bsxfun(@minus,feature_test,mu_nmc(c,:)) / chol(C_nmc{c}); 65 | dist(:,c) = sum(tmp.*tmp,2); 66 | end 67 | 68 | [dummy,ind] = min(dist,[],2); 69 | NMC_opMATRIX(:,1) = labels(ind); -------------------------------------------------------------------------------- /k_optimization.m: -------------------------------------------------------------------------------- 1 | function [max_accuracy,max_k_ind]=k_optimization(featuredata_new,trainingdata_selected) 2 | % functions to optimize the parameter K of KNN 3 | % return a plot and the the optimal K and the best accuracy 4 | %% Author Information 5 | % Hao Wang,Wangbo Zheng 6 | % patrecgroup08 7 | % University of Stuttgart 8 | %% 9 | RUNS = 200; 10 | a = 1:RUNS;% k for 1 to 200 11 | accuracy_rate = arrayfun(@(k) prostate_knn_classifier(featuredata_new.test,trainingdata_selected(:,1:5),trainingdata_selected(:,9),K),a); 12 | % find the optimal K 13 | [max_accuracy,max_k_ind] = max(accuracy_rate); 14 | % plot 15 | plot(a,accuracy_rate,'r:+'); 16 | xlabel('parameter k'), 17 | ylabel('accuracy'), title('K Optimization'); 18 | grid on; -------------------------------------------------------------------------------- /knnfeatureselection5to2.m: -------------------------------------------------------------------------------- 1 | function ac_max_nm_knn=knnfeatureselection5to2(feature_data) 2 | % function to select 2 of 5 feature for kNN classifier. 3 | % the input is feature dataset 4 | % the output is a vector the best subset number and accuracy 5 | %% Author Information 6 | % Hao Wang,Wangbo Zheng 7 | % patrecgroup08 8 | % University of Stuttgart 9 | %% 10 | nmac_result=[]; 11 | for n=1:4 12 | for m=n+1:5 13 | feature_training=feature_data.training(:,[n m]); 14 | feature_test=feature_data.test(:,[n m]); 15 | ac = prostate_knn_classifier5to2(feature_data,feature_test,feature_training,50); 16 | interm_result=cat(2,nmac_result,[n;m;ac]); 17 | nmac_result=interm_result; 18 | end 19 | end 20 | [M,index_max]=max(nmac_result(3,:)); 21 | ac_max_nm_knn=nmac_result(:,index_max); 22 | %% knn for 2 features 23 | function Ac_knn =prostate_knn_classifier5to2(feature_data,feature_test,feature_training,k) 24 | group=feature_data.training(:,9); 25 | classification=knnclassify(feature_test,feature_training,group,k); 26 | Ac_knn = sum(classification==feature_data.test(:,9))/size(classification,1); -------------------------------------------------------------------------------- /main_ProstateCancerSegmentation.m: -------------------------------------------------------------------------------- 1 | % main function of part 1:Prostate cancer segmentation based on MRI and PET 2 | % images. 3 | %% Author Information 4 | % Hao Wang,Wangbo Zheng 5 | % patrecgroup08 6 | % University of Stuttgart 7 | %% preprocessing 8 | % Extract prostate region 9 | Extract_features=ExtractProstateRegion; 10 | % Feature normalization 11 | norm_extract=normalization(Extract_features); 12 | % Data set partitioning 13 | feature_data=partioning(norm_extract); 14 | % Outlier detection and removal 15 | featuredata_new=Outlier_Remove(feature_data); 16 | % Training set selection 17 | trainingdata_selected = set_selection(featuredata_new,5000); 18 | %% Implementation of classifiers 19 | NMC_opMATRIX = nearest_mean_classifier(featuredata_new);%nearest mean 20 | kNN_opMATRIX = prostate_knn_classifier(featuredata_new.test,trainingdata_selected(:,1:5),trainingdata_selected(:,9),K);%kNN 21 | [predicted_label,accuracy,prob_estimates]=svm_classf(featuredata_new,trainingdata_selected,c);%svm 22 | %% Feature selection 23 | ac_max_nm=nmfeatureselection5to2(featuredata_new);% feature selection for nearest mean classifier 24 | ac_max_nm_knn=knnfeatureselection5to2(featuredata_new);% feature selection for kNN classifier 25 | ac_max_nm_svm=svmfeatureselection5to2(featuredata_new,trainingdata_selected);% feature selection for SVM classifier 26 | % Parameter optimization 27 | [max_accuracy,max_k_ind]=k_optimization(featuredata_new,trainingdata_selected);% K optimization 28 | [max_accuracy_svm,C_opt]=C_optimization(featuredata_new,trainingdata_selected);% C optimization 29 | %% Validation 30 | % error rates comparing 31 | er = classifiers_error_rate(NMC_opMATRIX,kNN_opMATRIX,accuracy); 32 | % ROC of svm 33 | ROCofSVM(predicted_label,prob_estimates); 34 | %% Visualization 35 | % decision boundaries 36 | decisionboundary_nm(trainingdata_selected); 37 | decisionboundary_knn(trainingdata_selected,K);% select k parameter to plot different DB 38 | decisionboundary_SVM(trainingdata_selected); 39 | % visualization using imagine.m 40 | hImgAxes=CancerVisualization(dataset,featuredata_new,NMC_opMATRIX,kNN_opMATRIX,predicted_label,PatientID); 41 | %% post processing 42 | post_processing(dataset,datasetlabelsNMC,datasetlabelsknn,datasetlabelssvm); 43 | 44 | 45 | -------------------------------------------------------------------------------- /nearest_mean_classifier.m: -------------------------------------------------------------------------------- 1 | function NMC_opMATRIX = nearest_mean_classifier(feature_data) 2 | % function of nearest mean classifier for 5 features using the Mahalanobis 3 | % distance 4 | % the input is the feature dataset 5 | % the output NMC_opMATRIX is a matrix containing the true and estimated labels 6 | % for every test sample in the test field of the feature data structure. 7 | %% Author Information 8 | % Hao Wang,Wangbo Zheng 9 | % patrecgroup08 10 | % University of Stuttgart 11 | %% training 12 | feature_training = feature_data.training(:,1:5); 13 | y_train = feature_data.training(:,9);%labels 14 | 15 | labels = unique(y_train); 16 | d = size(feature_training,2); 17 | mu_nmc = zeros(2,d); 18 | C_nmc = cell(2,1); 19 | for c=1:2 20 | mu_nmc(c,:) = mean(feature_training(y_train==labels(c),:)); 21 | C_nmc{c} = cov(feature_training(y_train==labels(c),:)); 22 | end 23 | %% test 24 | feature_test = feature_data.test(:,1:5); 25 | N_TEST = size(feature_test,1); 26 | % will contain the estimated labels in the first column and the true 27 | % labels in the second column 28 | NMC_opMATRIX = zeros(N_TEST,2); 29 | NMC_opMATRIX(:,2) = feature_data.test(:,9); 30 | % estimate the labels for the samples of the test set 31 | 32 | dist = zeros(N_TEST,2); 33 | for c=1:2 34 | tmp = bsxfun(@minus,feature_test,mu_nmc(c,:)) / chol(C_nmc{c}); 35 | dist(:,c) = sum(tmp.*tmp,2); 36 | end 37 | 38 | [dummy,ind] = min(dist,[],2); 39 | NMC_opMATRIX(:,1) = labels(ind); 40 | %ac = sum(NMC_opMATRIX(:,1)==NMC_opMATRIX(:,2))/size(NMC_opMATRIX,1); -------------------------------------------------------------------------------- /nmfeatureselection5to2.m: -------------------------------------------------------------------------------- 1 | function ac_max_nm=nmfeatureselection5to2(feature_data) 2 | % function to select 2 of 5 feature for nearest mean classifier. 3 | % the input is feature dataset 4 | % the output is a vector the best subset number and accuracy 5 | %% Author Information 6 | % Hao Wang,Wangbo Zheng 7 | % patrecgroup08 8 | % University of Stuttgart 9 | %% 10 | nmac_result=[]; 11 | for n=1:4 12 | for m=n+1:5 13 | feature_training=feature_data.training(:,[n m]); 14 | feature_test=feature_data.test(:,[n m]); 15 | ac = nearest_mean_classifier2(feature_data,feature_training,feature_test); 16 | interm_result=cat(2,nmac_result,[n;m;ac]); 17 | nmac_result=interm_result; 18 | end 19 | end 20 | [M,index_max]=max(nmac_result(3,:)); 21 | ac_max_nm=nmac_result(:,index_max); 22 | %% nearest mean classifier for 2 features 23 | function ac = nearest_mean_classifier2(feature_data,feature_training,feature_test) 24 | % mean classifier using the Mahalanobis distance for 2 features 25 | y_train = feature_data.training(:,end-1);; 26 | labels = unique(y_train); 27 | d = size(feature_training,2); 28 | mu_nmc = zeros(2,d); 29 | C_nmc = cell(2,1); 30 | for c=1:2 31 | mu_nmc(c,:) = mean(feature_training(y_train==labels(c),:)); 32 | C_nmc{c} = cov(feature_training(y_train==labels(c),:)); 33 | end 34 | 35 | N_TEST = size(feature_test,1); 36 | 37 | % will contain the estimated labels in the first column and the true 38 | % labels in the second column 39 | nmc_output = zeros(N_TEST,2); 40 | nmc_output(:,2) = feature_data.test(:,9); 41 | 42 | dist = zeros(N_TEST,2); 43 | for c=1:2 44 | tmp = bsxfun(@minus,feature_test,mu_nmc(c,:)) / chol(C_nmc{c}); 45 | dist(:,c) = sum(tmp.*tmp,2); 46 | end 47 | 48 | [dummy ind] = min(dist,[],2); 49 | nmc_output(:,1) = labels(ind); 50 | ac = sum(nmc_output(:,1)==nmc_output(:,2))/size(nmc_output,1) 51 | -------------------------------------------------------------------------------- /normalization.m: -------------------------------------------------------------------------------- 1 | function norm_extract=normalization(Extract_features) 2 | % functions to normalize the 5 feature.Scale each dimension of the feature 3 | % vetor to zero mean and unit variance. 4 | % The input is extracted features. 5 | % The output is normalized features 6 | %% Author Information 7 | % Hao Wang,Wangbo Zheng 8 | % patrecgroup08 9 | % University of Stuttgart 10 | %% Reference 11 | % [1]Andrew R.Webb, Keith D. C.: Statistical Pattern Recognition. 3rd 12 | % edition. JohnWiley & Sons, 2011; 13 | % [2]Aksoy, Selim ; Haralick, Robert M.: Feature normalization 14 | % and likelihood-based similarity measures for image retrieval. 15 | % In: Pattern Recognition Letters 22 (2001), Nr. 5, 563 - 582. 16 | %% 17 | for j=1:5 18 | sigfeature(:,j)=Extract_features(:,j); 19 | normalfeature(:,j)=(sigfeature(:,j)-mean(sigfeature(:,j),1))/std(sigfeature(:,j),0,1); 20 | % the other normalization technique is transforming features to the range[0,1] 21 | % normalfeature01(:,j)=(normalfeature(:,j)-min(normalfeature(:,j)))/(max(normalfeature(:,j))-min(normalfeature(:,j))) 22 | end 23 | norm_extract=[normalfeature,Extract_features(:,6:10)]; 24 | -------------------------------------------------------------------------------- /partioning.m: -------------------------------------------------------------------------------- 1 | function feature_data=partioning(norm_extract) 2 | % function to partition the given 14 datasets into a training set and a 3 | % validation set. 4 | % the input is normalized features. 5 | % the output is a data structure for storing the training and validation 6 | % dataset 7 | %% Author Information 8 | % Hao Wang,Wangbo Zheng 9 | % patrecgroup08 10 | % University of Stuttgart 11 | %% 12 | row_index1=norm_extract(:,10)<12; 13 | feature_data.training=norm_extract(row_index1,:);% training set 14 | row_index2=norm_extract(:,10)>11; 15 | feature_data.test=norm_extract(row_index2,:);% test set 16 | 17 | -------------------------------------------------------------------------------- /post_processing.m: -------------------------------------------------------------------------------- 1 | function post_processing(dataset,datasetlabelsNMC,datasetlabelsknn,datasetlabelssvm) 2 | % post processing based on morphological operations erosion and dilitation 3 | % return the 3 pictures for 3 classifier 4 | % the input:dataset - medical data set 5 | % datasetlabelsNMC,datasetlabelsknn,datasetlabelssvm are the 6 | % estimated labels with position information. 7 | %% Author Information 8 | % Hao Wang,Wangbo Zheng 9 | % patrecgroup08 10 | % University of Stuttgart 11 | %% 12 | se = strel('disk',4);% an array specifies the structuring element neighborhood 13 | se2 = strel('disk',5); 14 | NMCerode= imerode(datasetlabelsNMC,se);% erosion 15 | NMCdilate = imdilate(NMCerode,se2);% dilitation 16 | knn12erode= imerode(datasetlabelsknn,se); 17 | knn12dilate = imdilate(knn12erode,se2); 18 | svm12erode= imerode(datasetlabelssvm,se); 19 | svmdilate = imdilate(svm12erode,se2); 20 | hImgAxes = imagine(dataset{12,1}.Image(:,:,:,5),'T2',dataset{12,1}.labelsHisto,'Histo',datasetlabelsNMC,'nearest mean',NMCdilate,'NM+PP'); 21 | hImgAxes1 = imagine(dataset{12,1}.Image(:,:,:,5),'T2',dataset{12,1}.labelsHisto,'Histo',datasetlabelsknn,'kNN',knn12dilate,'kNN+PP'); 22 | hImgAxes2 = imagine(dataset{12,1}.Image(:,:,:,5),'T2',dataset{12,1}.labelsHisto,'Histo',datasetlabelssvm,'SVM',svmdilate,'SVM+PP'); -------------------------------------------------------------------------------- /prostate_knn_classifier.m: -------------------------------------------------------------------------------- 1 | function kNN_opMATRIX = prostate_knn_classifier(feature_test, X_train, y_train, K) 2 | % function of KNN k-Nearest Neighbors Algorithm. 3 | % the input:X: testing sample features, N-by-P_test matrix. 4 | % X_train: training sample features, N-by-P matrix. 5 | % y_train: training sample labels, N-by-1 column vector. 6 | % K: the k in k-Nearest Neighbors 7 | % the out is kNN_opMATRIX : a matrix containing the true and estimated labels 8 | % for every test sample in the test field of the feature data structure. 9 | %% Author Information 10 | % Hao Wang,Wangbo Zheng 11 | % patrecgroup08 12 | % University of Stuttgart 13 | %% 14 | % X_train = trainingdata_selected(:,1:5); for 5 features 15 | % y_train = trainingdata_selected(:,9); labels 16 | % X = feature_data.test(:,1:5); 17 | 18 | [N_test,~] = size(feature_test); 19 | 20 | predicted_label = zeros(N_test,1); 21 | for i=1:N_test 22 | [dists, neighbors] = top_K_neighbors(X_train,y_train,feature_test(i,:),K); 23 | % calculate the K nearest neighbors and the distances. 24 | predicted_label(i) = recog(y_train(neighbors),max(y_train)); 25 | % recognize the label of the test vector. 26 | end 27 | 28 | kNN_opMATRIX(:,1) = predicted_label; 29 | kNN_opMATRIX(:,2) = feature_data.test(:,9); 30 | % training_percent=1-sum(y~=feature_data.test(:,9))/size(feature_data.test(:,9),1); 31 | end 32 | %% calculte distance 33 | function [dists,neighbors] = top_K_neighbors( X_train,y_train,X_test,K ) 34 | % Input: 35 | % X_test the test vector with 1*P 36 | % X_train and y_train are the train data set 37 | % K is the K neighbor parameter 38 | [N_train,~] = size(X_train); 39 | test_mat = repmat(X_test,N_train,1); 40 | dist_mat = (X_train-double(test_mat)) .^2; 41 | % The distance is the Euclid Distance. 42 | dist_array = sum(dist_mat,2); 43 | [dists, neighbors] = sort(dist_array); 44 | % The neighbors are the index of top K nearest points. 45 | dists = dists(1:K); 46 | neighbors = neighbors(1:K); 47 | 48 | end 49 | 50 | function label_index = recog( K_labels,class_num ) 51 | %RECOG Summary of this function goes here 52 | 53 | [K,~] = size(K_labels); 54 | class_count = zeros(class_num,1); 55 | for i=1:K 56 | class_index = K_labels(i); % +1 is to avoid the 0 index reference. 57 | class_count(class_index) = class_count(class_index) + 1; 58 | end 59 | [result,label_index] = max(class_count); 60 | % result = result ; % Do not forget -1 !!! 61 | end -------------------------------------------------------------------------------- /svm_classf.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kladde99/Medical-image-segmentation-using-machine-learning/ffe47401887fcd42afa46f4bbee33fb9dd51e71a/svm_classf.m -------------------------------------------------------------------------------- /svmfeatureselection5to2.m: -------------------------------------------------------------------------------- 1 | function ac_max_nm_svm=svmfeatureselection5to2(feature_data,trainingdata_selected) 2 | % function to select 2 of 5 feature for SVM classifier. 3 | % the input is feature dataset 4 | % the output is a vector the best subset number and accuracy 5 | %% Author Information 6 | % Hao Wang,Wangbo Zheng 7 | % patrecgroup08 8 | % University of Stuttgart 9 | %% 10 | nmac_result=[]; 11 | for n=1:4 12 | for m=n+1:5 13 | feature_training=trainingdata_selected(:,[n m]); 14 | feature_test=feature_data.test(:,[n m]); 15 | ac=svm_classf5to2(feature_data,trainingdata_selected,feature_training,feature_test,0.45); 16 | interm_result=cat(2,nmac_result,[n;m;ac]); 17 | nmac_result=interm_result; 18 | end 19 | end 20 | [M,index_max]=max(nmac_result(3,:)); 21 | ac_max_nm_svm=nmac_result(:,index_max); 22 | %% svm for 2 features 23 | function SVM_accuracy=svm_classf5to2(feature_data,trainingdata_selected,feature_training,feature_test,c) 24 | y_train=trainingdata_selected(:,9); 25 | libsvm_options=['-c ' num2str(c) ' -g 0.07 -b 1']; 26 | model=svmtrain(y_train,feature_training,libsvm_options); 27 | testing_label_vector=feature_data.test(:,9); 28 | [predicted_label,accuracy,prob_estimates]=svmpredict(testing_label_vector,feature_test,model, '-b 1'); 29 | SVM_accuracy=accuracy(1)/100; -------------------------------------------------------------------------------- /testfileSelection.m: -------------------------------------------------------------------------------- 1 | function [b_test b_training]=testfileSelection(lengthOfFrame,b,NrOfFile) 2 | % implements to get test file frames and training file frames 3 | % to be prepared for cross validation. 4 | %% Author info 5 | % Wangbo Zheng and Hao Wang 6 | % University of Stuttgart 7 | 8 | %% 9 | % first audio file as test file 10 | if NrOfFile==1 11 | positionofstart=1 12 | else 13 | positionofstart=sum(lengthOfFrame(1:NrOfFile-1))+1 14 | end 15 | b_test=b(positionofstart:positionofstart+lengthOfFrame(NrOfFile)-1,:) 16 | if NrOfFile==1 17 | b_training=b(positionofstart+lengthOfFrame(NrOfFile):end,:) 18 | elseif NrOfFile==10 19 | b_training=b(1:positionofstart-1,:) 20 | else 21 | b_training=[b(1:positionofstart-1,:); b(positionofstart+lengthOfFrame(NrOfFile):end,:)] 22 | end; 23 | --------------------------------------------------------------------------------