├── .gitattributes ├── 20.png ├── BreastCancerDetection.py ├── CSIS_Poster.pdf ├── CSISproject.pdf ├── README.md ├── Thumbs.db ├── outnorm.png ├── stainNorm_Macenko.py └── stain_utils.py /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /20.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GiteshJ/Breast-Cancer-Phase-Detection/ad64a49ea181501b161818dce902100fe8324f3c/20.png -------------------------------------------------------------------------------- /BreastCancerDetection.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Mon Jan 15 16:25:12 2018 4 | 5 | @author: GITESH 6 | """ 7 | 8 | from skimage import io,color 9 | from skimage.transform import resize 10 | import numpy as np 11 | from keras import layers 12 | from keras.layers import Input, Add, Dense,Dropout,Concatenate, Activation,merge , ZeroPadding2D, BatchNormalization, Flatten, Conv2D, AveragePooling2D, GlobalAveragePooling2D, MaxPooling2D, GlobalMaxPooling2D 13 | from keras.models import Model, load_model 14 | from keras.preprocessing import image 15 | from keras.utils import layer_utils 16 | from keras.utils.data_utils import get_file 17 | from keras.applications.imagenet_utils import preprocess_input 18 | from IPython.display import SVG 19 | from keras.utils.vis_utils import model_to_dot 20 | from keras.utils import plot_model 21 | from keras.initializers import glorot_uniform 22 | import scipy.misc 23 | from keras.optimizers import * 24 | from matplotlib import pyplot as plt 25 | from numpy import array 26 | from matplotlib.pyplot import imshow 27 | import keras.backend as K 28 | K.set_image_data_format('channels_last') 29 | K.set_learning_phase(1) 30 | from keras.preprocessing.image import ImageDataGenerator 31 | import json 32 | bn='bn_layer_' 33 | conv='conv_layer_' 34 | fc= 'fc_layer_' 35 | k=32 36 | def save_history(history,file): 37 | with open(file, 'w') as f: 38 | json.dump(history, f) 39 | ''' 40 | data = dict() 41 | with open('mydatafile') as f: 42 | data = json.load(f) 43 | ''' 44 | def bottleneck_composite(l,layer): 45 | # bottleneck layer 46 | X=l 47 | if type(l) is list: 48 | if(len(l)==1): 49 | X=l[0] 50 | else: 51 | X=Concatenate(axis=-1)(l) 52 | #X= merge(l, mode='concat', concat_axis=-1) 53 | 54 | X = BatchNormalization(axis = 3, name = bn + str(layer))(X) 55 | X = Activation('relu')(X) 56 | X = Conv2D(4*k, (1, 1), strides = (1, 1),padding='same', name = conv + str(layer), kernel_initializer = glorot_uniform(seed=0))(X) 57 | X = Dropout(0.8)(X) 58 | # Composite layer 59 | layer=layer+1 60 | X = BatchNormalization(axis = 3, name = bn + str(layer))(X) 61 | X = Activation('relu')(X) 62 | X = Conv2D(k, (3, 3), strides = (1, 1),padding='same', name = conv + str(layer), kernel_initializer = glorot_uniform(seed=0))(X) 63 | X = Dropout(0.8)(X) 64 | return X 65 | 66 | 67 | layer=0 68 | def chexnet(classes=14,input_shape=(224,224,3)): 69 | X_input = Input(input_shape) 70 | layer=0 71 | layer=layer+1 72 | X = ZeroPadding2D((3, 3))(X_input) 73 | X = BatchNormalization(axis = 3, name = bn + str(layer))(X) 74 | X = Activation('relu')(X) 75 | X = Conv2D(2*k, (7, 7), strides = (2, 2), name = conv + str(layer), kernel_initializer = glorot_uniform(seed=0))(X) 76 | X = Dropout(0.8)(X) 77 | print(X.shape) 78 | X = ZeroPadding2D((1, 1))(X) 79 | X = MaxPooling2D((3, 3), strides=(2, 2))(X) 80 | print(X.shape) 81 | #Dense Block = 1 82 | layer=layer+1 83 | X=bottleneck_composite(X,layer) 84 | l=[] 85 | l.append(X) 86 | for i in range(0,5): 87 | layer=layer+2 88 | X=bottleneck_composite(l,layer) 89 | l.append(X) 90 | print(X.shape) 91 | # Transition layer = 1 92 | layer=layer+2 93 | X = BatchNormalization(axis = 3, name = bn + str(layer))(X) 94 | X = Activation('relu')(X) 95 | X = Conv2D(k, (1, 1), strides = (1, 1),padding ='same', name = conv + str(layer), kernel_initializer = glorot_uniform(seed=0))(X) 96 | X = Dropout(0.8)(X) 97 | X = AveragePooling2D((2, 2), strides=(2, 2))(X) 98 | print(X.shape) 99 | 100 | #Dense Block = 2 101 | layer=layer+1 102 | X=bottleneck_composite(X,layer) 103 | l=[] 104 | l.append(X) 105 | for i in range(0,11): 106 | layer=layer+2 107 | X=bottleneck_composite(l,layer) 108 | l.append(X) 109 | 110 | print(X.shape) 111 | # Transition layer = 2 112 | layer=layer+2 113 | X = BatchNormalization(axis = 3, name = bn + str(layer))(X) 114 | X = Activation('relu')(X) 115 | X = Conv2D(k, (1, 1), strides = (1, 1),padding ='same', name = conv + str(layer), kernel_initializer = glorot_uniform(seed=0))(X) 116 | X = Dropout(0.8)(X) 117 | X = AveragePooling2D((2, 2), strides=(2, 2))(X) 118 | print(X.shape) 119 | #Dense Block = 3 120 | layer=layer+1 121 | X=bottleneck_composite(X,layer) 122 | l=[] 123 | l.append(X) 124 | for i in range(0,23): 125 | layer=layer+2 126 | X=bottleneck_composite(l,layer) 127 | l.append(X) 128 | print(X.shape) 129 | # Transition layer = 3 130 | layer=layer+2 131 | X = BatchNormalization(axis = 3, name = bn + str(layer))(X) 132 | X = Activation('relu')(X) 133 | X = Conv2D(k, (1, 1), strides = (1, 1),padding ='same', name = conv + str(layer), kernel_initializer = glorot_uniform(seed=0))(X) 134 | X = Dropout(0.8)(X) 135 | X = AveragePooling2D((2, 2), strides=(2, 2))(X) 136 | print(X.shape) 137 | #Dense Block = 4 138 | layer=layer+1 139 | X=bottleneck_composite(X,layer) 140 | l=[] 141 | l.append(X) 142 | for i in range(0,15): 143 | layer=layer+2 144 | X=bottleneck_composite(l,layer) 145 | l.append(X) 146 | print(X.shape) 147 | layer=layer+2 148 | print(X.shape) 149 | X= GlobalAveragePooling2D()(X) 150 | print(X.shape) 151 | # fully connected layer 152 | #X = Flatten()(X) 153 | X = Dense(classes, activation='softmax', name= fc + str(layer), kernel_initializer = glorot_uniform(seed=0))(X) 154 | print(X.shape) 155 | model = Model(inputs = X_input, outputs = X, name="DenseNet121") 156 | 157 | return model 158 | 159 | adam=Adam(lr=0.001) 160 | model = chexnet(classes = 4,input_shape = (224,224,3)) 161 | model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) 162 | #model.summary() 163 | train_datagen = ImageDataGenerator( rescale=1./255) 164 | val_datagen = ImageDataGenerator(rescale=1./255) 165 | test_datagen=ImageDataGenerator(rescale=1./255) 166 | train_generator = train_datagen.flow_from_directory( 167 | 'Train', 168 | target_size=(224,224), 169 | batch_size=32, 170 | class_mode='categorical') 171 | validation_generator = val_datagen.flow_from_directory( 172 | 'Validation', 173 | target_size=(224, 224), 174 | batch_size=32, 175 | class_mode='categorical') 176 | test_generator=test_datagen.flow_from_directory( 177 | 'Test', 178 | target_size=(224,224), 179 | batch_size=32, 180 | class_mode='categorical') 181 | print(train_generator.class_indices) 182 | print(test_generator.class_indices) 183 | print(validation_generator.class_indices) 184 | ''' 185 | model=load_model('my_densenet') 186 | adam=Adam(lr=0.001) 187 | model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) 188 | ''' 189 | history=model.fit_generator(train_generator, epochs =25,steps_per_epoch=2153,validation_data=validation_generator, validation_steps=175) 190 | model.save('my_densenet25_with_dropout_new_data') 191 | #print(model_files[i]) 192 | #print(history.history.keys()) 193 | save_history(history.history,'history_densenet25_with_dropout_new_data') 194 | #print(history_files[i]) 195 | 196 | 197 | preds = model.evaluate_generator(train_generator, steps=2153) 198 | print ("train Loss = " + str(preds[0])) 199 | print ("train Accuracy = " + str(preds[1])) 200 | preds = model.evaluate_generator(validation_generator, steps=175) 201 | print ("validation Loss = " + str(preds[0])) 202 | print ("validation Accuracy = " + str(preds[1])) 203 | preds = model.evaluate_generator(test_generator, steps=175) 204 | print ("test Loss = " + str(preds[0])) 205 | print ("Test Accuracy = " + str(preds[1])) 206 | #model=load_model(model_files[i]) 207 | #print(model_files[i]) 208 | 209 | plt.plot(history.history['acc']) 210 | plt.plot(history.history['val_acc']) 211 | plt.title('model accuracy') 212 | plt.ylabel('accuracy') 213 | plt.xlabel('epoch') 214 | plt.legend(['train', 'test'], loc='upper left') 215 | plt.show() 216 | 217 | # summarize history for loss 218 | plt.plot(history.history['loss']) 219 | plt.plot(history.history['val_loss']) 220 | plt.title('model loss') 221 | plt.ylabel('loss') 222 | plt.xlabel('epoch') 223 | plt.legend(['train', 'test'], loc='upper left') 224 | plt.show() 225 | ### 226 | 227 | print("DONE") -------------------------------------------------------------------------------- /CSIS_Poster.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GiteshJ/Breast-Cancer-Phase-Detection/ad64a49ea181501b161818dce902100fe8324f3c/CSIS_Poster.pdf -------------------------------------------------------------------------------- /CSISproject.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GiteshJ/Breast-Cancer-Phase-Detection/ad64a49ea181501b161818dce902100fe8324f3c/CSISproject.pdf -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Breast Cancer Phase Detection 2 | 3 | ### Description 4 | Currently a product prototype for breast cancer lesion detection which predicts and assesses the presence and current stage of cancer lesion based on histopathological reports supplied as inputs to the machine. The machine has the ability to improve its prediction accuracy based on its past records. 5 | 6 | It involved application of various specialized Machine Learning and Deep Learning algorithms in Python language entirely, The model was trained on a large authentic data-set to achieve more than 89% accuracy. 7 | 8 | It won the **'Best Project Award'** under **IEEE CS India Council SAC** at **IEEE Computer Society India Symposium 2018** competing some of the best teams from other top-notch national engineering colleges in the country. 9 | - Supported by IEEE Computer Society 10 | - Winners at Technergize 2018 at IEEE CSIS 2018 11 | - Web - Based Application 12 | 13 | ### Summary 14 | 15 | - The model accepts the histopathological reports of the user as input. 16 | - Run the trained model on the data provided 17 | - Computation time may vary from seconds to a few minutes 18 | - Output is the status and phase of cancer present in the tissue of the image provided 19 | 20 | ### Poster 21 | > According to Apollo Hostipals there are more than 1 million cases of breast cancer in women per year in India. Breast cancer is also possible in men, although the chances are low. 22 | ![ProjectPoster](https://user-images.githubusercontent.com/25562710/43120230-199a14e6-8f37-11e8-8b93-5d50303b146b.png) 23 | 24 | ### Aim 25 | >The project aims to revolutionize cancer treatment throughout the globe and to remove the ambiguity in decision making. This product can act as a confirmatory step which can be used by patients, doctors as well as medical institutions. 26 | This can drastically decrease the rate of deaths happening across the globe due to the deadly disease by making the system more efficient and transparent. 27 | 28 | ### Tech 29 | 30 | Machine learning and Deep neural network algorithms such as ConvNets, DenseNets, etc. 31 | The entire development has been done in Python. 32 | 33 | **Dataset** is available [here](https://rdm.inesctec.pt/dataset/nis-2017-003). 34 | 35 | ### Installation 36 | 37 | Not Available Now 38 | 39 | ### Team 40 | 41 | | Name | Connect With Us | Or Drop A Mail | 42 | | ------ | ------ |------ | 43 | | Shrutina Agarwal | [LinkedIn](https://www.linkedin.com/in/shrutina-agarwal-664657150/) | *shrutina.agarwal10@gmail.com* | 44 | | Gitesh Jain | [LinkedIn](https://www.linkedin.com/in/gitesh1996/) | *giteshjain844@gmail.com* | 45 | | Tuhin Das | [LinkedIn](https://www.linkedin.com/in/tuhind/) | tuhin.loves.federer@gmail.com | 46 | | Amitrajit Bose | [LinkedIn](https://www.linkedin.com/in/amitrajitbose/) | *amitrajitbose@gmail.com* | 47 | | Sivangi Tandon | [LinkedIn](https://in.linkedin.com/in/sivangi-tandon-40a05016a) | *sivangitandon@gmail.com* | 48 | 49 | 50 | ### Contribute 51 | 52 | Want to contribute? Great! 😍 53 | Drop us a message at the contact details provided above. 54 | 55 | 56 | -------------------------------------------------------------------------------- /Thumbs.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GiteshJ/Breast-Cancer-Phase-Detection/ad64a49ea181501b161818dce902100fe8324f3c/Thumbs.db -------------------------------------------------------------------------------- /outnorm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GiteshJ/Breast-Cancer-Phase-Detection/ad64a49ea181501b161818dce902100fe8324f3c/outnorm.png -------------------------------------------------------------------------------- /stainNorm_Macenko.py: -------------------------------------------------------------------------------- 1 | """ 2 | Stain normalization based on the method of: 3 | 4 | M. Macenko et al., ‘A method for normalizing histology slides for quantitative analysis’, in 2009 IEEE International Symposium on Biomedical Imaging: From Nano to Macro, 2009, pp. 1107–1110. 5 | 6 | Uses the spams package: 7 | 8 | http://spams-devel.gforge.inria.fr/index.html 9 | 10 | Use with python via e.g https://anaconda.org/conda-forge/python-spams 11 | """ 12 | 13 | import numpy as np 14 | import stain_utils as ut 15 | 16 | 17 | def get_stain_matrix(I, beta=0.15, alpha=1): 18 | """ 19 | Get stain matrix (2x3) 20 | :param I: 21 | :param beta: 22 | :param alpha: 23 | :return: 24 | """ 25 | OD = ut.RGB_to_OD(I).reshape((-1, 3)) 26 | OD = (OD[(OD > beta).any(axis=1), :]) 27 | _, V = np.linalg.eigh(np.cov(OD, rowvar=False)) 28 | V = V[:, [2, 1]] 29 | if V[0, 0] < 0: V[:, 0] *= -1 30 | if V[0, 1] < 0: V[:, 1] *= -1 31 | That = np.dot(OD, V) 32 | phi = np.arctan2(That[:, 1], That[:, 0]) 33 | minPhi = np.percentile(phi, alpha) 34 | maxPhi = np.percentile(phi, 100 - alpha) 35 | v1 = np.dot(V, np.array([np.cos(minPhi), np.sin(minPhi)])) 36 | v2 = np.dot(V, np.array([np.cos(maxPhi), np.sin(maxPhi)])) 37 | if v1[0] > v2[0]: 38 | HE = np.array([v1, v2]) 39 | else: 40 | HE = np.array([v2, v1]) 41 | return ut.normalize_rows(HE) 42 | 43 | 44 | ### 45 | 46 | class normalizer(object): 47 | """ 48 | A stain normalization object 49 | """ 50 | 51 | def __init__(self): 52 | self.stain_matrix_target = None 53 | self.target_concentrations = None 54 | 55 | def fit(self, target): 56 | target = ut.standardize_brightness(target) 57 | self.stain_matrix_target = get_stain_matrix(target) 58 | self.target_concentrations = ut.get_concentrations(target, self.stain_matrix_target) 59 | 60 | def target_stains(self): 61 | return ut.OD_to_RGB(self.stain_matrix_target) 62 | 63 | def transform(self, I): 64 | I = ut.standardize_brightness(I) 65 | stain_matrix_source = get_stain_matrix(I) 66 | source_concentrations = ut.get_concentrations(I, stain_matrix_source) 67 | maxC_source = np.percentile(source_concentrations, 99, axis=0).reshape((1, 2)) 68 | maxC_target = np.percentile(self.target_concentrations, 99, axis=0).reshape((1, 2)) 69 | source_concentrations *= (maxC_target / maxC_source) 70 | return (255 * np.exp(-1 * np.dot(source_concentrations, self.stain_matrix_target).reshape(I.shape))).astype( 71 | np.uint8) 72 | 73 | def hematoxylin(self, I): 74 | I = ut.standardize_brightness(I) 75 | h, w, c = I.shape 76 | stain_matrix_source = get_stain_matrix(I) 77 | source_concentrations = ut.get_concentrations(I, stain_matrix_source) 78 | H = source_concentrations[:, 0].reshape(h, w) 79 | H = np.exp(-1 * H) 80 | return H 81 | -------------------------------------------------------------------------------- /stain_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Uses the spams package: 3 | 4 | http://spams-devel.gforge.inria.fr/index.html 5 | 6 | Use with python via e.g https://anaconda.org/conda-forge/python-spams 7 | """ 8 | 9 | import numpy as np 10 | import cv2 as cv 11 | import spams 12 | import matplotlib.pyplot as plt 13 | 14 | 15 | ########################################## 16 | 17 | def read_image(path): 18 | """ 19 | Read an image to RGB uint8 20 | :param path: 21 | :return: 22 | """ 23 | im = cv.imread(path) 24 | im = cv.cvtColor(im, cv.COLOR_BGR2RGB) 25 | return im 26 | 27 | 28 | def show_colors(C): 29 | """ 30 | Shows rows of C as colors (RGB) 31 | :param C: 32 | :return: 33 | """ 34 | n = C.shape[0] 35 | for i in range(n): 36 | if C[i].max() > 1.0: 37 | plt.plot([0, 1], [n - 1 - i, n - 1 - i], c=C[i] / 255, linewidth=20) 38 | else: 39 | plt.plot([0, 1], [n - 1 - i, n - 1 - i], c=C[i], linewidth=20) 40 | plt.axis('off') 41 | plt.axis([0, 1, -1, n]) 42 | 43 | 44 | def show(image, now=True, fig_size=(10, 10)): 45 | """ 46 | Show an image (np.array). 47 | Caution! Rescales image to be in range [0,1]. 48 | :param image: 49 | :param now: 50 | :param fig_size: 51 | :return: 52 | """ 53 | image = image.astype(np.float32) 54 | m, M = image.min(), image.max() 55 | if fig_size != None: 56 | plt.rcParams['figure.figsize'] = (fig_size[0], fig_size[1]) 57 | plt.imshow((image - m) / (M - m), cmap='gray') 58 | plt.axis('off') 59 | if now == True: 60 | plt.show() 61 | 62 | 63 | def build_stack(tup): 64 | """ 65 | Build a stack of images from a tuple of images 66 | :param tup: 67 | :return: 68 | """ 69 | N = len(tup) 70 | if len(tup[0].shape) == 3: 71 | h, w, c = tup[0].shape 72 | stack = np.zeros((N, h, w, c)) 73 | if len(tup[0].shape) == 2: 74 | h, w = tup[0].shape 75 | stack = np.zeros((N, h, w)) 76 | for i in range(N): 77 | stack[i] = tup[i] 78 | return stack 79 | 80 | 81 | def patch_grid(ims, width=5, sub_sample=None, rand=False, save_name=None): 82 | """ 83 | Display a grid of patches 84 | :param ims: 85 | :param width: 86 | :param sub_sample: 87 | :param rand: 88 | :return: 89 | """ 90 | N0 = np.shape(ims)[0] 91 | if sub_sample == None: 92 | N = N0 93 | stack = ims 94 | elif sub_sample != None and rand == False: 95 | N = sub_sample 96 | stack = ims[:N] 97 | elif sub_sample != None and rand == True: 98 | N = sub_sample 99 | idx = np.random.choice(range(N), sub_sample, replace=False) 100 | stack = ims[idx] 101 | height = np.ceil(float(N) / width).astype(np.uint16) 102 | plt.rcParams['figure.figsize'] = (18, (18 / width) * height) 103 | plt.figure() 104 | for i in range(N): 105 | plt.subplot(height, width, i + 1) 106 | im = stack[i] 107 | show(im, now=False, fig_size=None) 108 | if save_name != None: 109 | plt.savefig(save_name) 110 | plt.show() 111 | 112 | 113 | ###################################### 114 | 115 | def standardize_brightness(I): 116 | """ 117 | 118 | :param I: 119 | :return: 120 | """ 121 | p = np.percentile(I, 90) 122 | return np.clip(I * 255.0 / p, 0, 255).astype(np.uint8) 123 | 124 | 125 | def remove_zeros(I): 126 | """ 127 | Remove zeros, replace with 1's. 128 | :param I: uint8 array 129 | :return: 130 | """ 131 | mask = (I == 0) 132 | I[mask] = 1 133 | return I 134 | 135 | 136 | def RGB_to_OD(I): 137 | """ 138 | Convert from RGB to optical density 139 | :param I: 140 | :return: 141 | """ 142 | I = remove_zeros(I) 143 | return -1 * np.log(I / 255) 144 | 145 | 146 | def OD_to_RGB(OD): 147 | """ 148 | Convert from optical density to RGB 149 | :param OD: 150 | :return: 151 | """ 152 | return (255 * np.exp(-1 * OD)).astype(np.uint8) 153 | 154 | 155 | def normalize_rows(A): 156 | """ 157 | Normalize rows of an array 158 | :param A: 159 | :return: 160 | """ 161 | return A / np.linalg.norm(A, axis=1)[:, None] 162 | 163 | 164 | def notwhite_mask(I, thresh=0.8): 165 | """ 166 | Get a binary mask where true denotes 'not white' 167 | :param I: 168 | :param thresh: 169 | :return: 170 | """ 171 | I_LAB = cv.cvtColor(I, cv.COLOR_RGB2LAB) 172 | L = I_LAB[:, :, 0] / 255.0 173 | return (L < thresh) 174 | 175 | 176 | def sign(x): 177 | """ 178 | Returns the sign of x 179 | :param x: 180 | :return: 181 | """ 182 | if x > 0: 183 | return +1 184 | elif x < 0: 185 | return -1 186 | elif x == 0: 187 | return 0 188 | 189 | 190 | def get_concentrations(I, stain_matrix, lamda=0.01): 191 | """ 192 | Get concentrations, a npix x 2 matrix 193 | :param I: 194 | :param stain_matrix: a 2x3 stain matrix 195 | :return: 196 | """ 197 | OD = RGB_to_OD(I).reshape((-1, 3)) 198 | return spams.lasso(OD.T, D=stain_matrix.T, mode=2, lambda1=lamda, pos=True).toarray().T 199 | --------------------------------------------------------------------------------