├── BreastCancer.py ├── Model.png ├── README.md ├── Samples ├── .gitignore ├── InSitu1.jpg ├── InSitu2.jpg ├── Invasive1.jpg ├── Invasive2.jpg ├── Normal1.jpg ├── Normal2.jpg ├── benign1.jpg └── benign2.jpg └── my_model3.h5 /BreastCancer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from keras.layers import Input, Dense, Activation, ZeroPadding2D, Flatten, Conv2D 3 | from keras.layers import MaxPooling2D 4 | from keras.models import Model 5 | from keras.preprocessing import image 6 | from keras.models import load_model 7 | from keras import metrics 8 | 9 | from keras.applications.imagenet_utils import preprocess_input 10 | from IPython.display import SVG 11 | from keras.utils.vis_utils import model_to_dot 12 | from keras.utils import plot_model 13 | from PIL import Image 14 | import keras.backend as K 15 | import tensorflow as tf 16 | import keras 17 | from keras.wrappers.scikit_learn import KerasClassifier 18 | 19 | K.set_image_data_format('channels_last') 20 | from matplotlib.pyplot import imshow 21 | import os 22 | 23 | ####################################################################################################################### 24 | modelSavePath = 'my_model3.h5' 25 | numOfTestPoints = 2 26 | batchSize = 16 27 | numOfEpoches = 10 28 | ####################################################################################################################### 29 | 30 | classes = [] 31 | 32 | 33 | def mean_pred(y_true, y_pred): 34 | return K.mean(y_pred) 35 | 36 | 37 | # Crop and rotate image, return 12 images 38 | def getCropImgs(img, needRotations=False): 39 | # img = img.convert('L') 40 | z = np.asarray(img, dtype=np.int8) 41 | c = [] 42 | for i in range(3): 43 | for j in range(4): 44 | crop = z[512 * i:512 * (i + 1), 512 * j:512 * (j + 1), :] 45 | 46 | c.append(crop) 47 | if needRotations: 48 | c.append(np.rot90(np.rot90(crop))) 49 | 50 | # os.system('cls') 51 | # print("Crop imgs", c[2].shape) 52 | 53 | return c 54 | 55 | # Get the softmax from folder name 56 | def getAsSoftmax(fname): 57 | if (fname == 'b'): 58 | return [1, 0, 0, 0] 59 | elif (fname == 'is'): 60 | return [0, 1, 0, 0] 61 | elif (fname == 'iv'): 62 | return [0, 0, 1, 0] 63 | else: 64 | return [0, 0, 0, 1] 65 | 66 | 67 | # Return all images as numpy array, labels 68 | def get_imgs_frm_folder(path): 69 | # x = np.empty(shape=[19200,512,512,3],dtype=np.int8) 70 | # y = np.empty(shape=[400],dtype=np.int8) 71 | 72 | x = [] 73 | y = [] 74 | 75 | cnt = 0 76 | for foldname in os.listdir(path): 77 | for filename in os.listdir(os.path.join(path, foldname)): 78 | img = Image.open(os.path.join(os.path.join(path, foldname), filename)) 79 | # img.show() 80 | crpImgs = getCropImgs(img) 81 | cnt += 1 82 | if cnt % 10 == 0: 83 | print(str(cnt) + " Images loaded") 84 | for im in crpImgs: 85 | x.append(np.divide(np.asarray(im, np.float16), 255.)) 86 | # Image.fromarray(np.divide(np.asarray(im, np.float16), 255.), 'RGB').show() 87 | y.append(getAsSoftmax(foldname)) 88 | # print(getAsSoftmax(foldname)) 89 | 90 | print("Images cropped") 91 | print("Loading as array") 92 | 93 | return x, y, cnt 94 | 95 | # Load the dataset 96 | def load_dataset(testNum=numOfTestPoints): 97 | print("Loading images..") 98 | 99 | train_set_x_orig, train_set_y_orig, cnt = get_imgs_frm_folder(dataTrainPath) 100 | 101 | testNum = numOfTestPoints * 12 102 | trainNum = (cnt * 12) - testNum 103 | 104 | print(testNum, trainNum) 105 | 106 | train_set_x_orig = np.array(train_set_x_orig, np.float16) 107 | train_set_y_orig = np.array(train_set_y_orig, np.int8) 108 | 109 | nshapeX = train_set_x_orig.shape 110 | nshapeY = train_set_y_orig.shape 111 | 112 | # train_set_y_orig = oh 113 | 114 | print("folder trainX" + str(nshapeX)) 115 | print("folder trainY" + str(nshapeY)) 116 | 117 | print("Images loaded") 118 | 119 | print("Loading all data") 120 | 121 | test_set_x_orig = train_set_x_orig[trainNum:, :, :, :] 122 | train_set_x_orig = train_set_x_orig[0:trainNum, :, :, :] 123 | 124 | test_set_y_orig = train_set_y_orig[trainNum:] 125 | train_set_y_orig = train_set_y_orig[0:trainNum] 126 | 127 | classes = np.array(os.listdir(dataTrainPath)) # the list of classes 128 | 129 | # train_set_y_orig = np.array(train_set_y_orig).reshape((np.array(train_set_y_orig, np.float16).shape[1], 130 | # np.array(train_set_y_orig, np.float16).shape[0])) 131 | # test_set_y_orig = np.array(test_set_y_orig).reshape((np.array(test_set_y_orig, np.float16).shape[1], 132 | # np.array(test_set_y_orig, np.float16).shape[0])) 133 | print(train_set_y_orig[0:50, :]) 134 | print(train_set_x_orig[1]) 135 | print("Data load complete") 136 | 137 | return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes 138 | 139 | 140 | def defModel(input_shape): 141 | X_input = Input(input_shape) 142 | 143 | # The max pooling layers use a stride equal to the pooling size 144 | 145 | X = Conv2D(16, (3, 3), strides=(1, 1))(X_input) # 'Conv.Layer(1)' 146 | 147 | X = Activation('relu')(X) 148 | 149 | X = MaxPooling2D((3, 3), strides=3)(X) # MP Layer(2) 150 | 151 | X = Conv2D(32, (3, 3), strides=(1, 1))(X) # Conv.Layer(3) 152 | 153 | X = Activation('relu')(X) 154 | 155 | X = MaxPooling2D((2, 2), strides=2)(X) # MP Layer(4) 156 | 157 | X = Conv2D(64, (2, 2), strides=(1, 1))(X) # Conv.Layer(5) 158 | 159 | X = Activation('relu')(X) 160 | 161 | X = ZeroPadding2D(padding=(2, 2))(X) # Output of convlayer(5) will be 82x82, we want 84x84 162 | 163 | X = MaxPooling2D((2, 2), strides=2)(X) # MP Layer(6) 164 | 165 | X = Conv2D(64, (2, 2), strides=(1, 1))(X) # Conv.Layer(7) 166 | 167 | X = Activation('relu')(X) 168 | 169 | X = ZeroPadding2D(padding=(2, 2))(X) # Output of convlayer(7) will be 40x40, we want 42x42 170 | 171 | X = MaxPooling2D((3, 3), strides=3)(X) # MP Layer(8) 172 | 173 | X = Conv2D(32, (3, 3), strides=(1, 1))(X) # Con.Layer(9) 174 | 175 | X = Activation('relu')(X) 176 | 177 | X = Flatten()(X) # Convert it to FC 178 | 179 | X = Dense(256, activation='relu')(X) # F.C. layer(10) 180 | 181 | X = Dense(128, activation='relu')(X) # F.C. layer(11) 182 | 183 | X = Dense(4, activation='softmax')(X) 184 | 185 | # ------------------------------------------------------------------------------ 186 | 187 | model = Model(inputs=X_input, outputs=X, name='Model') 188 | 189 | return model 190 | 191 | 192 | def train(batch_size, epochs): 193 | config = tf.ConfigProto() 194 | sess = tf.Session(config=config) 195 | keras.backend.set_session(sess) 196 | 197 | model = defModel(X_train.shape[1:]) 198 | 199 | model.compile('adam', 'categorical_crossentropy', metrics=['accuracy']) 200 | # Uncomment the below code and comment the lines with(<>), to implement the image augmentations. 201 | 202 | # datagen = keras.preprocessing.image.ImageDataGenerator( 203 | # zoom_range=0.2, # randomly zoom into images 204 | # rotation_range=180, # randomly rotate images in the range (degrees, 0 to 180) 205 | # width_shift_range=0.1, # randomly shift images horizontally (fraction of total width) 206 | # height_shift_range=0.1, # randomly shift images vertically (fraction of total height) 207 | # horizontal_flip=False, # randomly flip images 208 | # vertical_flip=False # randomly flip images 209 | # ) 210 | while True: 211 | try: 212 | model = load_model(modelSavePath) 213 | except: 214 | print("Training a new model") 215 | 216 | model.fit(X_train, Y_train, epochs=epochs, batch_size=batch_size) # <> 217 | 218 | # history = model.fit_generator(datagen.flow(X_train, Y_train, batch_size=batch_size), 219 | # epochs=epochs 220 | # # validation_data=(X_test, Y_test)) 221 | # ) 222 | # history.model.save('my_model3.h5') 223 | 224 | model.save(modelSavePath) 225 | 226 | preds = model.evaluate(X_test, Y_test_orig, batch_size=1, verbose=1, sample_weight=None) 227 | print(preds) 228 | 229 | print() 230 | print("Loss = " + str(preds[0])) 231 | print("Test Accuracy = " + str(preds[1]) + "\n\n\n\n\n") 232 | ch = input("Do you wish to continue training? (y/n) ") 233 | if ch == 'y': 234 | epochs = int(input("How many epochs this time? : ")) 235 | continue 236 | else: 237 | break 238 | 239 | return model 240 | 241 | 242 | def predict(img, savedModelPath, showImg=True): 243 | model = load_model(savedModelPath) 244 | # if showImg: 245 | # Image.fromarray(np.array(img, np.float16), 'RGB').show() 246 | 247 | x = img 248 | if showImg: 249 | Image.fromarray(np.array(img, np.float16), 'RGB').show() 250 | x = np.expand_dims(x, axis=0) 251 | 252 | softMaxPred = model.predict(x) 253 | print("prediction from CNN: " + str(softMaxPred) + "\n") 254 | probs = softmaxToProbs(softMaxPred) 255 | 256 | # plot_model(model, to_file='Model.png') 257 | # SVG(model_to_dot(model).create(prog='dot', format='svg')) 258 | maxprob = 0 259 | maxI = 0 260 | for j in range(len(probs)): 261 | # print(str(j) + " : " + str(round(probs[j], 4))) 262 | if probs[j] > maxprob: 263 | maxprob = probs[j] 264 | maxI = j 265 | # print(softMaxPred) 266 | print("prediction index: " + str(maxI)) 267 | return maxI, probs 268 | 269 | 270 | def softmaxToProbs(soft): 271 | z_exp = [np.math.exp(i) for i in soft[0]] 272 | sum_z_exp = sum(z_exp) 273 | return [(i / sum_z_exp) * 100 for i in z_exp] 274 | 275 | 276 | def predictImage(img_path='my_image.jpg', arrayImg=None, printData=True): 277 | crops = [] 278 | if arrayImg == None: 279 | img = image.load_img(img_path) 280 | crops = np.array(getCropImgs(img, needRotations=False), np.float16) 281 | crops = np.divide(crops, 255.) 282 | Image.fromarray(np.array(crops[0]), "RGB").show() 283 | 284 | classes = [] 285 | classes.append("Benign") 286 | classes.append("InSitu") 287 | classes.append("Invasive") 288 | classes.append("Normal") 289 | 290 | compProbs = [] 291 | compProbs.append(0) 292 | compProbs.append(0) 293 | compProbs.append(0) 294 | compProbs.append(0) 295 | 296 | for i in range(len(crops)): 297 | if printData: 298 | print("\n\nCrop " + str(i + 1) + " prediction:\n") 299 | 300 | ___, probs = predict(crops[i], modelSavePath, showImg=False) 301 | 302 | for j in range(len(classes)): 303 | if printData: 304 | print(str(classes[j]) + " : " + str(round(probs[j], 4)) + "%") 305 | compProbs[j] += probs[j] 306 | 307 | if printData: 308 | print("\n\nAverage from all crops\n") 309 | 310 | for j in range(len(classes)): 311 | if printData: 312 | print(str(classes[j]) + " : " + str(round(compProbs[j] / 12, 4)) + "%") 313 | 314 | 315 | ####################################################################### 316 | 317 | print("1. Do you want to train the network\n" 318 | "2. Test the model\n(Enter 1 or 2)?\n") 319 | ch = int(input()) 320 | if ch == 1: 321 | 322 | try: 323 | classes = np.load('classes.npy') 324 | print("Loading") 325 | X_train = np.load('X_train.npy') 326 | Y_train = np.load('Y_train.npy') 327 | X_test = np.load('X_test.npy') 328 | Y_test_orig = np.load('Y_test_orig.npy') 329 | except: 330 | X_train, Y_train, X_test, Y_test_orig, classes = load_dataset() 331 | print("Saving...") 332 | np.save('X_train', X_train) 333 | np.save('Y_train', Y_train) 334 | np.save('X_test', X_test) 335 | np.save('Y_test_orig', Y_test_orig) 336 | np.save('classes', classes) 337 | 338 | # for y in Y_train: 339 | # print(y) 340 | 341 | print("number of training examples = " + str(X_train.shape[0])) 342 | print("number of test examples = " + str(X_test.shape[0])) 343 | print("X_train shape: " + str(X_train.shape)) 344 | print("Y_train shape: " + str(Y_train.shape)) 345 | print("X_test shape: " + str(X_test.shape)) 346 | print("Y_test shape: " + str(Y_test_orig.shape)) 347 | model = train(batch_size=batchSize, epochs=numOfEpoches) 348 | 349 | elif ch == 2: 350 | 351 | c = int(input("1. Test from random images\n2. Test your own custom image\n(Enter 1 or 2)\n")) 352 | if c == 1: 353 | 354 | try: 355 | classes = np.load('classes.npy') 356 | print("Loading") 357 | X_train = np.load('X_train.npy') 358 | Y_train = np.load('Y_train.npy') 359 | X_test = np.load('X_test.npy') 360 | Y_test_orig = np.load('Y_test_orig.npy') 361 | except: 362 | X_train, Y_train, _, __, classes = load_dataset() 363 | print("Saving...") 364 | np.save('X_train', X_train) 365 | np.save('Y_train', Y_train) 366 | np.save('X_test', _) 367 | np.save('Y_test_orig', __) 368 | np.save('classes', classes) 369 | 370 | _ = None 371 | __ = None 372 | testImgsX = [] 373 | testImgsY = [] 374 | ran = [] 375 | print("X_train shape: " + str(X_train.shape)) 376 | print("Y_train shape: " + str(Y_train.shape)) 377 | # print(X_train[1]) 378 | for i in range(10): 379 | ran.append(np.random.randint(0, X_train.shape[0] - 1)) 380 | for ranNum in ran: 381 | testImgsX.append(X_train[ranNum]) 382 | testImgsY.append(Y_train[ranNum]) 383 | # predict(Image.fromarray(X_train[ran],'RGB')) 384 | 385 | X_train = None 386 | Y_train = None 387 | 388 | print("testImgsX shape: " + str(len(testImgsX))) 389 | print("testImgsY shape: " + str(len(testImgsY))) 390 | # print(testImgsY[1]) 391 | # print(testImgsX[1]) 392 | 393 | cnt = 0.0 394 | 395 | classes = [] 396 | classes.append("Benign") 397 | classes.append("InSitu") 398 | classes.append("Invasive") 399 | classes.append("Normal") 400 | 401 | compProbs = [] 402 | compProbs.append(0) 403 | compProbs.append(0) 404 | compProbs.append(0) 405 | compProbs.append(0) 406 | 407 | for i in range(len(testImgsX)): 408 | print("\n\nTest image " + str(i + 1) + " prediction:\n") 409 | 410 | predi, probs = predict(testImgsX[i], modelSavePath, showImg=False) 411 | 412 | for j in range(len(classes)): 413 | print(str(classes[j]) + " : " + str(round(probs[j], 4)) + "%") 414 | compProbs[j] += probs[j] 415 | 416 | maxi = 0 417 | for j in range(len(testImgsY[0])): 418 | if testImgsY[i][j] == 1: # The right class 419 | maxi = j 420 | break 421 | if predi == maxi: 422 | cnt += 1 423 | 424 | print("% of images that are correct: " + str((cnt / len(testImgsX)) * 100)) 425 | 426 | elif c == 2: 427 | predictImage() 428 | 429 | else: 430 | print("Please enter only 1 or 2") 431 | -------------------------------------------------------------------------------- /Model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rishiswethan/Cancer-detection-using-CNN/cf22a8aa42cf536422e5314ad3b0bd4205c166b0/Model.png -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Breast-cancer-detection-using-CNN 2 | 3 | Breast cancer constitutes a leading cause of cancer-related deaths worldwide. Accurate diagnosis of cancer from eosin-stained images remains a complex task, as medical professionals often encounter discrepancies in reaching a final verdict. Computer-Aided Diagnosis (CAD) systems offer a means to reduce cost and enhance the efficiency of this intricate process. Traditional classification approaches rely on problem-specific feature extraction methods based on domain knowledge. To address the numerous challenges posed by feature-based techniques, deep learning methods have emerged as significant alternatives. 4 | 5 | We propose a method for the classification of hematoxylin and eosin-stained breast biopsy images using Convolutional Neural Networks (CNNs). Our method classifies images into four categories: normal tissue, benign lesion, in situ carcinoma, and invasive carcinoma, as well as a binary classification of carcinoma and non-carcinoma. The network architecture is meticulously designed to extract information at various scales, encompassing both individual nuclei and the overall tissue organization. This design enables the seamless integration of our proposed system with whole-slide histology images. Our method achieves an accuracy of 77.8% for the four-class classification and demonstrates a sensitivity of 95.6% for cancer cases. 6 | 7 | To use this project: 8 | 9 | 1. You'll need python3 to run the program 10 | 11 | 2. I've included the preprocessed image data. You can download it from [here](https://drive.google.com/open?id=17LR9ssbENit-3vsEAM63FptNasB5AHrr). Now place the 5 files that you just downloaded with the folder with the `.py` file 12 | 13 | 3. Use `pip install package-name` to install the below packages 14 | 15 | 4. You need to have the following python packages installed 16 | * keras 17 | * tensorflow (Both CPU or GPU version should do) 18 | * PIL 19 | * numpy 20 | 21 | 5. You can modify the default hyparameters by modifying the variables between the `#` in the first few lines line 22 | 23 | To run the program, navigate to the folder in command line and use the following command, 24 | ``` 25 | python BreastCancer.py 26 | ``` 27 | I've also included a pretrained model. To test your own image or one of the samples using it, paste the image in the folder with the `.py` file and rename it as `my_image.jpg`, then during execution choose to test your own image by following the on screen commands 28 | -------------------------------------------------------------------------------- /Samples/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /Samples/InSitu1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rishiswethan/Cancer-detection-using-CNN/cf22a8aa42cf536422e5314ad3b0bd4205c166b0/Samples/InSitu1.jpg -------------------------------------------------------------------------------- /Samples/InSitu2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rishiswethan/Cancer-detection-using-CNN/cf22a8aa42cf536422e5314ad3b0bd4205c166b0/Samples/InSitu2.jpg -------------------------------------------------------------------------------- /Samples/Invasive1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rishiswethan/Cancer-detection-using-CNN/cf22a8aa42cf536422e5314ad3b0bd4205c166b0/Samples/Invasive1.jpg -------------------------------------------------------------------------------- /Samples/Invasive2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rishiswethan/Cancer-detection-using-CNN/cf22a8aa42cf536422e5314ad3b0bd4205c166b0/Samples/Invasive2.jpg -------------------------------------------------------------------------------- /Samples/Normal1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rishiswethan/Cancer-detection-using-CNN/cf22a8aa42cf536422e5314ad3b0bd4205c166b0/Samples/Normal1.jpg -------------------------------------------------------------------------------- /Samples/Normal2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rishiswethan/Cancer-detection-using-CNN/cf22a8aa42cf536422e5314ad3b0bd4205c166b0/Samples/Normal2.jpg -------------------------------------------------------------------------------- /Samples/benign1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rishiswethan/Cancer-detection-using-CNN/cf22a8aa42cf536422e5314ad3b0bd4205c166b0/Samples/benign1.jpg -------------------------------------------------------------------------------- /Samples/benign2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rishiswethan/Cancer-detection-using-CNN/cf22a8aa42cf536422e5314ad3b0bd4205c166b0/Samples/benign2.jpg -------------------------------------------------------------------------------- /my_model3.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rishiswethan/Cancer-detection-using-CNN/cf22a8aa42cf536422e5314ad3b0bd4205c166b0/my_model3.h5 --------------------------------------------------------------------------------