├── BreastCancer.py
├── Model.png
├── README.md
├── Samples
    ├── .gitignore
    ├── InSitu1.jpg
    ├── InSitu2.jpg
    ├── Invasive1.jpg
    ├── Invasive2.jpg
    ├── Normal1.jpg
    ├── Normal2.jpg
    ├── benign1.jpg
    └── benign2.jpg
└── my_model3.h5


/BreastCancer.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from keras.layers import Input, Dense, Activation, ZeroPadding2D, Flatten, Conv2D
  3 | from keras.layers import MaxPooling2D
  4 | from keras.models import Model
  5 | from keras.preprocessing import image
  6 | from keras.models import load_model
  7 | from keras import metrics
  8 | 
  9 | from keras.applications.imagenet_utils import preprocess_input
 10 | from IPython.display import SVG
 11 | from keras.utils.vis_utils import model_to_dot
 12 | from keras.utils import plot_model
 13 | from PIL import Image
 14 | import keras.backend as K
 15 | import tensorflow as tf
 16 | import keras
 17 | from keras.wrappers.scikit_learn import KerasClassifier
 18 | 
 19 | K.set_image_data_format('channels_last')
 20 | from matplotlib.pyplot import imshow
 21 | import os
 22 | 
 23 | #######################################################################################################################
 24 | modelSavePath = 'my_model3.h5'
 25 | numOfTestPoints = 2
 26 | batchSize = 16
 27 | numOfEpoches = 10
 28 | #######################################################################################################################
 29 | 
 30 | classes = []
 31 | 
 32 | 
 33 | def mean_pred(y_true, y_pred):
 34 |     return K.mean(y_pred)
 35 | 
 36 | 
 37 | # Crop and rotate image, return 12 images
 38 | def getCropImgs(img, needRotations=False):
 39 |     # img = img.convert('L')
 40 |     z = np.asarray(img, dtype=np.int8)
 41 |     c = []
 42 |     for i in range(3):
 43 |         for j in range(4):
 44 |             crop = z[512 * i:512 * (i + 1), 512 * j:512 * (j + 1), :]
 45 | 
 46 |             c.append(crop)
 47 |             if needRotations:
 48 |                 c.append(np.rot90(np.rot90(crop)))
 49 | 
 50 |     # os.system('cls')
 51 |     # print("Crop imgs", c[2].shape)
 52 | 
 53 |     return c
 54 | 
 55 | # Get the softmax from folder name
 56 | def getAsSoftmax(fname):
 57 |     if (fname == 'b'):
 58 |         return [1, 0, 0, 0]
 59 |     elif (fname == 'is'):
 60 |         return [0, 1, 0, 0]
 61 |     elif (fname == 'iv'):
 62 |         return [0, 0, 1, 0]
 63 |     else:
 64 |         return [0, 0, 0, 1]
 65 | 
 66 | 
 67 | # Return all images as numpy array, labels
 68 | def get_imgs_frm_folder(path):
 69 |     # x = np.empty(shape=[19200,512,512,3],dtype=np.int8)
 70 |     # y = np.empty(shape=[400],dtype=np.int8)
 71 | 
 72 |     x = []
 73 |     y = []
 74 | 
 75 |     cnt = 0
 76 |     for foldname in os.listdir(path):
 77 |         for filename in os.listdir(os.path.join(path, foldname)):
 78 |             img = Image.open(os.path.join(os.path.join(path, foldname), filename))
 79 |             # img.show()
 80 |             crpImgs = getCropImgs(img)
 81 |             cnt += 1
 82 |             if cnt % 10 == 0:
 83 |                 print(str(cnt) + " Images loaded")
 84 |             for im in crpImgs:
 85 |                 x.append(np.divide(np.asarray(im, np.float16), 255.))
 86 |                 # Image.fromarray(np.divide(np.asarray(im, np.float16), 255.), 'RGB').show()
 87 |                 y.append(getAsSoftmax(foldname))
 88 |                 # print(getAsSoftmax(foldname))
 89 | 
 90 |     print("Images cropped")
 91 |     print("Loading as array")
 92 | 
 93 |     return x, y, cnt
 94 | 
 95 | # Load the dataset
 96 | def load_dataset(testNum=numOfTestPoints):
 97 |     print("Loading images..")
 98 | 
 99 |     train_set_x_orig, train_set_y_orig, cnt = get_imgs_frm_folder(dataTrainPath)
100 | 
101 |     testNum = numOfTestPoints * 12
102 |     trainNum = (cnt * 12) - testNum
103 | 
104 |     print(testNum, trainNum)
105 | 
106 |     train_set_x_orig = np.array(train_set_x_orig, np.float16)
107 |     train_set_y_orig = np.array(train_set_y_orig, np.int8)
108 | 
109 |     nshapeX = train_set_x_orig.shape
110 |     nshapeY = train_set_y_orig.shape
111 | 
112 |     # train_set_y_orig = oh
113 | 
114 |     print("folder trainX" + str(nshapeX))
115 |     print("folder trainY" + str(nshapeY))
116 | 
117 |     print("Images loaded")
118 | 
119 |     print("Loading all data")
120 | 
121 |     test_set_x_orig = train_set_x_orig[trainNum:, :, :, :]
122 |     train_set_x_orig = train_set_x_orig[0:trainNum, :, :, :]
123 | 
124 |     test_set_y_orig = train_set_y_orig[trainNum:]
125 |     train_set_y_orig = train_set_y_orig[0:trainNum]
126 | 
127 |     classes = np.array(os.listdir(dataTrainPath))  # the list of classes
128 | 
129 |     # train_set_y_orig = np.array(train_set_y_orig).reshape((np.array(train_set_y_orig, np.float16).shape[1],
130 |     #                                                       np.array(train_set_y_orig, np.float16).shape[0]))
131 |     # test_set_y_orig = np.array(test_set_y_orig).reshape((np.array(test_set_y_orig, np.float16).shape[1],
132 |     #                                                     np.array(test_set_y_orig, np.float16).shape[0]))
133 |     print(train_set_y_orig[0:50, :])
134 |     print(train_set_x_orig[1])
135 |     print("Data load complete")
136 | 
137 |     return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes
138 | 
139 | 
140 | def defModel(input_shape):
141 |     X_input = Input(input_shape)
142 | 
143 |     # The max pooling layers use a stride equal to the pooling size
144 | 
145 |     X = Conv2D(16, (3, 3), strides=(1, 1))(X_input)  # 'Conv.Layer(1)'
146 | 
147 |     X = Activation('relu')(X)
148 | 
149 |     X = MaxPooling2D((3, 3), strides=3)(X)  # MP Layer(2)
150 | 
151 |     X = Conv2D(32, (3, 3), strides=(1, 1))(X)  # Conv.Layer(3)
152 | 
153 |     X = Activation('relu')(X)
154 | 
155 |     X = MaxPooling2D((2, 2), strides=2)(X)  # MP Layer(4)
156 | 
157 |     X = Conv2D(64, (2, 2), strides=(1, 1))(X)  # Conv.Layer(5)
158 | 
159 |     X = Activation('relu')(X)
160 | 
161 |     X = ZeroPadding2D(padding=(2, 2))(X)  # Output of convlayer(5) will be 82x82, we want 84x84
162 | 
163 |     X = MaxPooling2D((2, 2), strides=2)(X)  # MP Layer(6)
164 | 
165 |     X = Conv2D(64, (2, 2), strides=(1, 1))(X)  # Conv.Layer(7)
166 | 
167 |     X = Activation('relu')(X)
168 | 
169 |     X = ZeroPadding2D(padding=(2, 2))(X)  # Output of convlayer(7) will be 40x40, we want 42x42
170 | 
171 |     X = MaxPooling2D((3, 3), strides=3)(X)  # MP Layer(8)
172 | 
173 |     X = Conv2D(32, (3, 3), strides=(1, 1))(X)  # Con.Layer(9)
174 | 
175 |     X = Activation('relu')(X)
176 | 
177 |     X = Flatten()(X)  # Convert it to FC
178 | 
179 |     X = Dense(256, activation='relu')(X)  # F.C. layer(10)
180 | 
181 |     X = Dense(128, activation='relu')(X)  # F.C. layer(11)
182 | 
183 |     X = Dense(4, activation='softmax')(X)
184 | 
185 |     # ------------------------------------------------------------------------------
186 | 
187 |     model = Model(inputs=X_input, outputs=X, name='Model')
188 | 
189 |     return model
190 | 
191 | 
192 | def train(batch_size, epochs):
193 |     config = tf.ConfigProto()
194 |     sess = tf.Session(config=config)
195 |     keras.backend.set_session(sess)
196 | 
197 |     model = defModel(X_train.shape[1:])
198 | 
199 |     model.compile('adam', 'categorical_crossentropy', metrics=['accuracy'])
200 |     # Uncomment the below code and comment the lines with(<>), to implement the image augmentations.
201 | 
202 |     # datagen = keras.preprocessing.image.ImageDataGenerator(
203 |     # zoom_range=0.2, # randomly zoom into images
204 |     # rotation_range=180,  # randomly rotate images in the range (degrees, 0 to 180)
205 |     # width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
206 |     # height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
207 |     # horizontal_flip=False,  # randomly flip images
208 |     # vertical_flip=False  # randomly flip images
209 |     # )
210 |     while True:
211 |         try:
212 |             model = load_model(modelSavePath)
213 |         except:
214 |             print("Training a new model")
215 | 
216 |         model.fit(X_train, Y_train, epochs=epochs, batch_size=batch_size) # <>
217 | 
218 |         # history = model.fit_generator(datagen.flow(X_train, Y_train, batch_size=batch_size),
219 |         #                              epochs=epochs
220 |         #                              # validation_data=(X_test, Y_test))
221 |         #                              )
222 |         # history.model.save('my_model3.h5')
223 | 
224 |         model.save(modelSavePath)
225 | 
226 |         preds = model.evaluate(X_test, Y_test_orig, batch_size=1, verbose=1, sample_weight=None)
227 |         print(preds)
228 | 
229 |         print()
230 |         print("Loss = " + str(preds[0]))
231 |         print("Test Accuracy = " + str(preds[1]) + "\n\n\n\n\n")
232 |         ch = input("Do you wish to continue training? (y/n) ")
233 |         if ch == 'y':
234 |             epochs = int(input("How many epochs this time? : "))
235 |             continue
236 |         else:
237 |             break
238 | 
239 |     return model
240 | 
241 | 
242 | def predict(img, savedModelPath, showImg=True):
243 |     model = load_model(savedModelPath)
244 |     # if showImg:
245 |     # Image.fromarray(np.array(img, np.float16), 'RGB').show()
246 | 
247 |     x = img
248 |     if showImg:
249 |         Image.fromarray(np.array(img, np.float16), 'RGB').show()
250 |     x = np.expand_dims(x, axis=0)
251 | 
252 |     softMaxPred = model.predict(x)
253 |     print("prediction from CNN: " + str(softMaxPred) + "\n")
254 |     probs = softmaxToProbs(softMaxPred)
255 | 
256 |     # plot_model(model, to_file='Model.png')
257 |     # SVG(model_to_dot(model).create(prog='dot', format='svg'))
258 |     maxprob = 0
259 |     maxI = 0
260 |     for j in range(len(probs)):
261 |         # print(str(j) + " : " + str(round(probs[j], 4)))
262 |         if probs[j] > maxprob:
263 |             maxprob = probs[j]
264 |             maxI = j
265 |     # print(softMaxPred)
266 |     print("prediction index: " + str(maxI))
267 |     return maxI, probs
268 | 
269 | 
270 | def softmaxToProbs(soft):
271 |     z_exp = [np.math.exp(i) for i in soft[0]]
272 |     sum_z_exp = sum(z_exp)
273 |     return [(i / sum_z_exp) * 100 for i in z_exp]
274 | 
275 | 
276 | def predictImage(img_path='my_image.jpg', arrayImg=None, printData=True):
277 |     crops = []
278 |     if arrayImg == None:
279 |         img = image.load_img(img_path)
280 |         crops = np.array(getCropImgs(img, needRotations=False), np.float16)
281 |         crops = np.divide(crops, 255.)
282 |     Image.fromarray(np.array(crops[0]), "RGB").show()
283 | 
284 |     classes = []
285 |     classes.append("Benign")
286 |     classes.append("InSitu")
287 |     classes.append("Invasive")
288 |     classes.append("Normal")
289 | 
290 |     compProbs = []
291 |     compProbs.append(0)
292 |     compProbs.append(0)
293 |     compProbs.append(0)
294 |     compProbs.append(0)
295 | 
296 |     for i in range(len(crops)):
297 |         if printData:
298 |             print("\n\nCrop " + str(i + 1) + " prediction:\n")
299 | 
300 |         ___, probs = predict(crops[i], modelSavePath, showImg=False)
301 | 
302 |         for j in range(len(classes)):
303 |             if printData:
304 |                 print(str(classes[j]) + " : " + str(round(probs[j], 4)) + "%")
305 |             compProbs[j] += probs[j]
306 | 
307 |     if printData:
308 |         print("\n\nAverage from all crops\n")
309 | 
310 |     for j in range(len(classes)):
311 |         if printData:
312 |             print(str(classes[j]) + " : " + str(round(compProbs[j] / 12, 4)) + "%")
313 | 
314 | 
315 | #######################################################################
316 | 
317 | print("1. Do you want to train the network\n"
318 |       "2. Test the model\n(Enter 1 or 2)?\n")
319 | ch = int(input())
320 | if ch == 1:
321 | 
322 |     try:
323 |         classes = np.load('classes.npy')
324 |         print("Loading")
325 |         X_train = np.load('X_train.npy')
326 |         Y_train = np.load('Y_train.npy')
327 |         X_test = np.load('X_test.npy')
328 |         Y_test_orig = np.load('Y_test_orig.npy')
329 |     except:
330 |         X_train, Y_train, X_test, Y_test_orig, classes = load_dataset()
331 |         print("Saving...")
332 |         np.save('X_train', X_train)
333 |         np.save('Y_train', Y_train)
334 |         np.save('X_test', X_test)
335 |         np.save('Y_test_orig', Y_test_orig)
336 |         np.save('classes', classes)
337 | 
338 |     # for y in Y_train:
339 |     #    print(y)
340 | 
341 |     print("number of training examples = " + str(X_train.shape[0]))
342 |     print("number of test examples = " + str(X_test.shape[0]))
343 |     print("X_train shape: " + str(X_train.shape))
344 |     print("Y_train shape: " + str(Y_train.shape))
345 |     print("X_test shape: " + str(X_test.shape))
346 |     print("Y_test shape: " + str(Y_test_orig.shape))
347 |     model = train(batch_size=batchSize, epochs=numOfEpoches)
348 | 
349 | elif ch == 2:
350 | 
351 |     c = int(input("1. Test from random images\n2. Test your own custom image\n(Enter 1 or 2)\n"))
352 |     if c == 1:
353 | 
354 |         try:
355 |             classes = np.load('classes.npy')
356 |             print("Loading")
357 |             X_train = np.load('X_train.npy')
358 |             Y_train = np.load('Y_train.npy')
359 |             X_test = np.load('X_test.npy')
360 |             Y_test_orig = np.load('Y_test_orig.npy')
361 |         except:
362 |             X_train, Y_train, _, __, classes = load_dataset()
363 |             print("Saving...")
364 |             np.save('X_train', X_train)
365 |             np.save('Y_train', Y_train)
366 |             np.save('X_test', _)
367 |             np.save('Y_test_orig', __)
368 |             np.save('classes', classes)
369 | 
370 |         _ = None
371 |         __ = None
372 |         testImgsX = []
373 |         testImgsY = []
374 |         ran = []
375 |         print("X_train shape: " + str(X_train.shape))
376 |         print("Y_train shape: " + str(Y_train.shape))
377 |         # print(X_train[1])
378 |         for i in range(10):
379 |             ran.append(np.random.randint(0, X_train.shape[0] - 1))
380 |         for ranNum in ran:
381 |             testImgsX.append(X_train[ranNum])
382 |             testImgsY.append(Y_train[ranNum])
383 |             # predict(Image.fromarray(X_train[ran],'RGB'))
384 | 
385 |         X_train = None
386 |         Y_train = None
387 | 
388 |         print("testImgsX shape: " + str(len(testImgsX)))
389 |         print("testImgsY shape: " + str(len(testImgsY)))
390 |         # print(testImgsY[1])
391 |         # print(testImgsX[1])
392 | 
393 |         cnt = 0.0
394 | 
395 |         classes = []
396 |         classes.append("Benign")
397 |         classes.append("InSitu")
398 |         classes.append("Invasive")
399 |         classes.append("Normal")
400 | 
401 |         compProbs = []
402 |         compProbs.append(0)
403 |         compProbs.append(0)
404 |         compProbs.append(0)
405 |         compProbs.append(0)
406 | 
407 |         for i in range(len(testImgsX)):
408 |             print("\n\nTest image " + str(i + 1) + " prediction:\n")
409 | 
410 |             predi, probs = predict(testImgsX[i], modelSavePath, showImg=False)
411 | 
412 |             for j in range(len(classes)):
413 |                 print(str(classes[j]) + " : " + str(round(probs[j], 4)) + "%")
414 |                 compProbs[j] += probs[j]
415 | 
416 |             maxi = 0
417 |             for j in range(len(testImgsY[0])):
418 |                 if testImgsY[i][j] == 1:  # The right class
419 |                     maxi = j
420 |                     break
421 |             if predi == maxi:
422 |                 cnt += 1
423 | 
424 |         print("% of images that are correct: " + str((cnt / len(testImgsX)) * 100))
425 | 
426 |     elif c == 2:
427 |         predictImage()
428 | 
429 | else:
430 |     print("Please enter only 1 or 2")
431 | 


--------------------------------------------------------------------------------
/Model.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rishiswethan/Cancer-detection-using-CNN/cf22a8aa42cf536422e5314ad3b0bd4205c166b0/Model.png


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Breast-cancer-detection-using-CNN
 2 | 
 3 | Breast cancer constitutes a leading cause of cancer-related deaths worldwide. Accurate diagnosis of cancer from eosin-stained images remains a complex task, as medical professionals often encounter discrepancies in reaching a final verdict. Computer-Aided Diagnosis (CAD) systems offer a means to reduce cost and enhance the efficiency of this intricate process. Traditional classification approaches rely on problem-specific feature extraction methods based on domain knowledge. To address the numerous challenges posed by feature-based techniques, deep learning methods have emerged as significant alternatives.
 4 | 
 5 | We propose a method for the classification of hematoxylin and eosin-stained breast biopsy images using Convolutional Neural Networks (CNNs). Our method classifies images into four categories: normal tissue, benign lesion, in situ carcinoma, and invasive carcinoma, as well as a binary classification of carcinoma and non-carcinoma. The network architecture is meticulously designed to extract information at various scales, encompassing both individual nuclei and the overall tissue organization. This design enables the seamless integration of our proposed system with whole-slide histology images. Our method achieves an accuracy of 77.8% for the four-class classification and demonstrates a sensitivity of 95.6% for cancer cases.
 6 | 
 7 | To use this project:
 8 | 
 9 | 1. You'll need python3 to run the program
10 | 
11 | 2. I've included the preprocessed image data. You can download it from [here](https://drive.google.com/open?id=17LR9ssbENit-3vsEAM63FptNasB5AHrr). Now place the 5 files that you just downloaded with the folder with the `.py` file
12 | 
13 | 3. Use `pip install package-name` to install the below packages
14 | 
15 | 4. You need to have the following python packages installed
16 | 	* keras
17 | 	* tensorflow (Both CPU or GPU version should do)
18 | 	* PIL
19 | 	* numpy
20 | 
21 | 5. You can modify the default hyparameters by modifying the variables between the `#` in the first few lines line
22 | 
23 | To run the program, navigate to the folder in command line and use the following command,
24 | ```
25 | python BreastCancer.py
26 | ```
27 | I've also included a pretrained model. To test your own image or one of the samples using it, paste the image in the folder with the `.py` file and rename it as `my_image.jpg`, then during execution choose to test your own image by following the on screen commands
28 | 


--------------------------------------------------------------------------------
/Samples/.gitignore:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/Samples/InSitu1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rishiswethan/Cancer-detection-using-CNN/cf22a8aa42cf536422e5314ad3b0bd4205c166b0/Samples/InSitu1.jpg


--------------------------------------------------------------------------------
/Samples/InSitu2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rishiswethan/Cancer-detection-using-CNN/cf22a8aa42cf536422e5314ad3b0bd4205c166b0/Samples/InSitu2.jpg


--------------------------------------------------------------------------------
/Samples/Invasive1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rishiswethan/Cancer-detection-using-CNN/cf22a8aa42cf536422e5314ad3b0bd4205c166b0/Samples/Invasive1.jpg


--------------------------------------------------------------------------------
/Samples/Invasive2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rishiswethan/Cancer-detection-using-CNN/cf22a8aa42cf536422e5314ad3b0bd4205c166b0/Samples/Invasive2.jpg


--------------------------------------------------------------------------------
/Samples/Normal1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rishiswethan/Cancer-detection-using-CNN/cf22a8aa42cf536422e5314ad3b0bd4205c166b0/Samples/Normal1.jpg


--------------------------------------------------------------------------------
/Samples/Normal2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rishiswethan/Cancer-detection-using-CNN/cf22a8aa42cf536422e5314ad3b0bd4205c166b0/Samples/Normal2.jpg


--------------------------------------------------------------------------------
/Samples/benign1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rishiswethan/Cancer-detection-using-CNN/cf22a8aa42cf536422e5314ad3b0bd4205c166b0/Samples/benign1.jpg


--------------------------------------------------------------------------------
/Samples/benign2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rishiswethan/Cancer-detection-using-CNN/cf22a8aa42cf536422e5314ad3b0bd4205c166b0/Samples/benign2.jpg


--------------------------------------------------------------------------------
/my_model3.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rishiswethan/Cancer-detection-using-CNN/cf22a8aa42cf536422e5314ad3b0bd4205c166b0/my_model3.h5


--------------------------------------------------------------------------------