├── .gitignore
├── README.md
├── aug
    ├── __init__.py
    ├── augment.py
    ├── elastic_tran.py
    ├── elastic_tran_each_mask.py
    └── morphological_postprocessing.py
├── eval.py
├── eval_stage2.py
├── input_data.py
├── input_pred_data.py
├── make_ground_truth.py
├── metrics.py
├── nets
    ├── __init__.py
    └── unet.py
├── train.py
├── utils
    ├── checkmate.py
    ├── image_utils.py
    ├── inspect_checkpoint.py
    ├── inspect_tfrecord.py
    ├── morphological_util.py
    ├── oper_utils.py
    ├── oper_utils2.py
    └── unet_ensemble.py
└── validation_tool
    ├── diff_gt_mask.py
    └── make_mask_from_csv.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 
106 | *.pyc
107 | models/
108 | result/
109 | _dataset/
110 | temp/
111 | 
112 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Kaggle Competitions
 2 | 
 3 | ## 2018-Data-Science-Bowl
 4 | - [Find the nuclei in divergent images to advance medical discovery](https://www.kaggle.com/c/data-science-bowl-2018)
 5 | 
 6 | ## Description
 7 | - The 2018 Data Science Bowl offers our most ambitious mission yet: create an algorithm to automate nucleus detection.
 8 | - By automating nucleus detection, you could help unlock cures faster—from rare disorders to the common cold.
 9 | - Identifying the cells’ nuclei is the starting point for most analyses because most of the human body’s 30 trillion cells contain a nucleus full of DNA, 
10 | the genetic code that programs each cell.
11 | - teams will work to automate the process of identifying nuclei, which will allow for more efficient drug testing, 
12 | shortening the 10 years it takes for each new drug to come to market. 
13 | - Teams will create a computer model that can identify a range of nuclei across varied conditions. 
14 | 
15 | ## Evaluation
16 | [https://www.kaggle.com/c/data-science-bowl-2018#evaluation](https://www.kaggle.com/c/data-science-bowl-2018#evaluation)
17 | 
18 | ## Submission File
19 | - In order to reduce the submission file size, our metric uses run-length encoding on the pixel values. 
20 | - you will submit pairs of values that contain a start position and a run length. 
21 | E.g. '1 3' implies starting at pixel 1 and running a total of 3 pixels (1,2,3).
22 | - The competition format requires a space delimited list of pairs. 
23 | For example, '1 3 10 5' implies pixels 1,2,3,10,11,12,13,14 are to be included in the mask. 
24 | The pixels are one-indexed and numbered from top to bottom, then left to right: 1 is pixel (1,1), 2 is pixel (2,1), etc.
25 | - The metric checks that the pairs are sorted, positive, and the decoded pixel values are not duplicated. It also checks that no two predicted masks for the same image are overlapping.
26 | - The file should contain a header and have the following format. 
27 | Each row in your submission represents a single predicted nucleus segmentation for the given ImageId.
28 | ```
29 | ImageId,EncodedPixels  
30 | 0114f484a16c152baa2d82fdd43740880a762c93f436c8988ac461c5c9dbe7d5,1 1  
31 | 0999dab07b11bc85fb8464fc36c947fbd8b5d6ec49817361cb780659ca805eac,1 1  
32 | 0999dab07b11bc85fb8464fc36c947fbd8b5d6ec49817361cb780659ca805eac,2 3 8 9  
33 | etc...
34 | ```
35 | 
36 | ## About
37 | - The Data Science Bowl, presented by Booz Allen and Kaggle, is the world’s premier data science for social good competition.
38 | - The Data Science Bowl brings together data scientists, technologists, domain experts, 
39 | and organizations to take on the world’s challenges with data and technology. 
40 | - During a 90-day period, participants, either alone or working in teams, 
41 | gain access to unique data sets to develop algorithms that address a specific challenge. 
42 | 
43 | ## Timeline
44 | - April 9th, 2018 - Entry deadline and Team merger deadline.
45 | - April 11, 2018 - Stage one deadline and stage two data release. Your model must be finalized and uploaded to Kaggle by this deadline.
46 | - April 16, 2018 - Final submission deadline.
47 | 
48 | ## Reference
49 | - https://www.kaggle.com/c/data-science-bowl-2018/discussion/54741 - 1st place solution description on Kaggle
50 | - https://github.com/selimsef/dsb2018_topcoders/ - DSB2018 [ods.ai] topcoders 1st place solution repo.
51 | - [Generic U-Net Tensorflow implementation for image segmentation](https://github.com/jakeret/tf_unet)
52 | - [A concise code for training and evaluating Unet using tensorflow+keras](https://github.com/zizhaozhang/unet-tensorflow-keras)
53 | - [Implementation of Segnet, FCN, UNet and other models in Keras](https://github.com/divamgupta/image-segmentation-keras)
54 | - [Dilated U-net](https://chuckyee.github.io/cardiac-segmentation/)
55 | - https://github.com/preritj/segmentation
56 | 
57 | 
58 | ## Quick Start
59 | - Download dataset from [Here](https://www.kaggle.com/c/data-science-bowl-2018/data)
60 | - Make the mask file into one file. -> python make_ground_truth.py
61 | - Augment1 -> python aug/elastic_tran.py for Elastic Transform
62 | - Augment2 -> python aug/augment.py
63 | - python train.py
64 | - python eval.py for evaluation
65 |   
66 | 
67 | ## Notice
68 | - This repository cause not a good result. nevertheless might be basic start point.
69 | - You should look closely at the data for better results.
70 | - I used the materials from several references.
71 | 


--------------------------------------------------------------------------------
/aug/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ace19-dev/2018-Data-Science-Bowl/104552aa06bcf6faeead1a443e7d5f3b3231bcb1/aug/__init__.py


--------------------------------------------------------------------------------
/aug/augment.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import argparse
  4 | import tqdm
  5 | import uuid
  6 | 
  7 | import numpy as np
  8 | import pandas as pd
  9 | import tensorflow as tf
 10 | 
 11 | # For using image generation
 12 | from keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img
 13 | 
 14 | from utils.image_utils import read_image
 15 | 
 16 | FLAGS = None
 17 | 
 18 | RANDOM_SEED = 54989
 19 | 
 20 | 
 21 | def read_train_data_properties(source_dir):
 22 |     """Read basic properties of training images and masks"""
 23 |     tmp = []
 24 |     for i, dir_name in enumerate(next(os.walk(source_dir))[1]):
 25 |         img_dir = os.path.join(source_dir, dir_name, 'images')
 26 |         mask_dir = os.path.join(source_dir, dir_name, 'gt_mask')
 27 |         img_name = next(os.walk(img_dir))[2][0]
 28 |         mask_name = next(os.walk(mask_dir))[2][0]
 29 |         img_name_id = os.path.splitext(img_name)[0]
 30 |         img_path = os.path.join(img_dir, img_name)
 31 |         mask_path = os.path.join(mask_dir, mask_name)
 32 |         img_shape = read_image(img_path).shape
 33 |         tmp.append(['{}'.format(img_name_id), img_shape[0], img_shape[1], img_shape[2],
 34 |                     img_path, img_dir, mask_path, mask_dir])
 35 | 
 36 |     train_df = pd.DataFrame(tmp, columns=['img_id', 'img_height', 'img_width', 'num_channels',
 37 |                                           'image_path', 'image_dir', 'mask_path', 'mask_dir'])
 38 |     return train_df
 39 | 
 40 | 
 41 | def make_aug_dir(prefix_name):
 42 |     randomString = str(uuid.uuid4()).replace("-", "")
 43 |     _new = FLAGS.aug_prefix + prefix_name + randomString
 44 | 
 45 |     return _new
 46 | 
 47 | 
 48 | def generate_images(image_generator, src_path, target_dir, seed=None):
 49 |     """Generate new images."""
 50 |     img = load_img(src_path, interpolation='nearest')
 51 |     x = img_to_array(img)
 52 |     x = x.reshape((1,) + x.shape)  # this is a Numpy array
 53 | 
 54 |     if not os.path.exists(target_dir):
 55 |         os.makedirs(target_dir)
 56 | 
 57 |     # Generate new set of images
 58 |     batches = 1
 59 |     for batch in image_generator.flow(x,
 60 |                                       batch_size=1,
 61 |                                       shuffle=False,
 62 |                                       seed=seed,
 63 |                                       save_to_dir=target_dir):
 64 | 
 65 |         batches += 1
 66 |         if batches > 1:
 67 |             break  # otherwise the generator would loop indefinitely
 68 | 
 69 | 
 70 | def main(_):
 71 |     img_gen = ImageDataGenerator(# rotation_range=90.,
 72 |                                  width_shift_range=0.05,
 73 |                                  height_shift_range=0.05,
 74 |                                  brightness_range=[1.0, 1.2],
 75 |                                  fill_mode='reflect',
 76 |                                  horizontal_flip=True,
 77 |                                  vertical_flip=True)
 78 | 
 79 |     train_info = read_train_data_properties(FLAGS.source_dir)
 80 | 
 81 |     # image_augmentation
 82 |     for i, filename in tqdm.tqdm(enumerate(train_info['image_path']), total=len(train_info)):
 83 |         _name = os.path.basename(filename)
 84 |         for n in range(FLAGS.aug_count):
 85 |             seed = np.random.randint(RANDOM_SEED)
 86 |             data_path = os.path.join(FLAGS.target_dir, make_aug_dir(_name[:10]))
 87 | 
 88 |             target_img_dir = os.path.join(data_path, 'images')
 89 |             target_mask_dir = os.path.join(data_path, 'gt_mask')
 90 | 
 91 |             generate_images(img_gen, train_info['image_path'].loc[i], target_img_dir, seed=seed)
 92 |             generate_images(img_gen, train_info['mask_path'].loc[i], target_mask_dir, seed=seed)
 93 | 
 94 | 
 95 | if __name__ == '__main__':
 96 |     parser = argparse.ArgumentParser()
 97 |     parser.add_argument(
 98 |         '--source_dir',
 99 |         default='../../../dl_data/nucleus/stage1_train_elastic',
100 |         type=str,
101 |         help="Train Data directory")
102 | 
103 |     parser.add_argument(
104 |         '--target_dir',
105 |         default='../../../dl_data/nucleus/stage1_train_aug',
106 |         type=str,
107 |         help="Train Data directory")
108 | 
109 |     parser.add_argument(
110 |         '--aug_prefix',
111 |         default='aug_',
112 |         type=str,
113 |         help="prefix name of augmentation")
114 | 
115 |     parser.add_argument(
116 |         '--aug_count',
117 |         type=int,
118 |         default=2,
119 |         help="Count of augmentation")
120 | 
121 |     FLAGS, unparsed = parser.parse_known_args()
122 |     tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)


--------------------------------------------------------------------------------
/aug/elastic_tran.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import argparse
  4 | import tqdm
  5 | import cv2
  6 | import uuid
  7 | 
  8 | import numpy as np
  9 | import tensorflow as tf
 10 | 
 11 | from scipy.ndimage.interpolation import map_coordinates
 12 | from scipy.ndimage.filters import gaussian_filter
 13 | from tqdm import tqdm
 14 | 
 15 | FLAGS = None
 16 | 
 17 | def main(_):
 18 |     train_ids = next(os.walk(FLAGS.source_dir))[1]
 19 | 
 20 |     print('Getting and resizing train images and masks ... ')
 21 |     sys.stdout.flush()
 22 | 
 23 |     os.mkdir(FLAGS.target_dir)
 24 | 
 25 |     for n, id_ in tqdm(enumerate(train_ids), total=len(train_ids)):
 26 |         for aug_count in range (0, FLAGS.aug_count):
 27 |             path = FLAGS.source_dir + id_
 28 |             image_ = cv2.imread(path + '/images/' + id_ + '.png')
 29 |             # image_ = cv2.cvtColor(image_, cv2.COLOR_RGBA2GRAY)
 30 |             # imshow(image_)
 31 |             # plt.show()
 32 |             mask_ = cv2.imread(path + '/gt_mask/' + id_ + '.png')
 33 |             # imshow(mask_)
 34 |             # plt.show()
 35 | 
 36 |             image = np.concatenate((image_, mask_), axis=2)
 37 | 
 38 |             alpha = image.shape[1] * 2
 39 |             sigma = image.shape[1] * 0.08
 40 |             alpha_affine = image.shape[1] * 0.05
 41 |             random_state = np.random.RandomState(None)
 42 | 
 43 |             shape = image.shape
 44 |             shape_size = shape[:2]
 45 | 
 46 |             # Random affine
 47 |             center_square = np.float32(shape_size) // 2
 48 |             square_size = min(shape_size) // 3
 49 |             pts1 = np.float32(
 50 |                 [center_square + square_size, [center_square[0] + square_size, center_square[1] - square_size],
 51 |                  center_square - square_size])
 52 |             pts2 = pts1 + random_state.uniform(-alpha_affine, alpha_affine, size=pts1.shape).astype(np.float32)
 53 |             M = cv2.getAffineTransform(pts1, pts2)
 54 |             image = cv2.warpAffine(image, M, shape_size[::-1], borderMode=cv2.BORDER_REFLECT_101)
 55 | 
 56 |             dx = gaussian_filter((random_state.rand(*shape) * 2 - 1), sigma) * alpha
 57 |             dy = gaussian_filter((random_state.rand(*shape) * 2 - 1), sigma) * alpha
 58 |             dz = np.zeros_like(dx)
 59 | 
 60 |             x, y, z = np.meshgrid(np.arange(shape[1]), np.arange(shape[0]), np.arange(shape[2]))
 61 |             indices = np.reshape(y + dy, (-1, 1)), np.reshape(x + dx, (-1, 1)), np.reshape(z, (-1, 1))
 62 | 
 63 |             im_merge_t = map_coordinates(image, indices, order=1, mode='reflect').reshape(shape)
 64 | 
 65 |             im_t = im_merge_t[..., 0:3]
 66 |             # imshow(im_t)
 67 |             # plt.show()
 68 |             mask_t = im_merge_t[..., 3:6]
 69 |             mask_t = cv2.cvtColor(mask_t, cv2.COLOR_RGB2GRAY)
 70 |             # imshow(mask_t)
 71 |             # plt.show()
 72 | 
 73 |             randomString = str(uuid.uuid4()).replace("-", "")
 74 | 
 75 |             new_id = FLAGS.aug_prefix + randomString + id_[39:]
 76 |             os.mkdir(FLAGS.target_dir + new_id)
 77 |             os.mkdir(FLAGS.target_dir + new_id + '/images/')
 78 |             os.mkdir(FLAGS.target_dir + new_id + '/gt_mask/')
 79 |             cv2.imwrite(FLAGS.target_dir + new_id + '/images/' + new_id + '.png', im_t)
 80 |             cv2.imwrite(FLAGS.target_dir + new_id + '/gt_mask/' + new_id + '.png', mask_t)
 81 | 
 82 | 
 83 | if __name__ == '__main__':
 84 |     parser = argparse.ArgumentParser()
 85 |     parser.add_argument(
 86 |         '--source_dir',
 87 |         default='../../../dl_data/nucleus/stage1_train/',
 88 |         type=str,
 89 |         help="Train Data directory")
 90 | 
 91 |     parser.add_argument(
 92 |         '--target_dir',
 93 |         default='../../../dl_data/nucleus/stage1_train_elastic/',
 94 |         type=str,
 95 |         help="Train Data directory")
 96 | 
 97 |     parser.add_argument(
 98 |         '--aug_prefix',
 99 |         default='elastic_',
100 |         type=str,
101 |         help="prefix name of augmentation")
102 | 
103 |     parser.add_argument(
104 |         '--aug_count',
105 |         type=int,
106 |         default=2,
107 |         help="Count of augmentation")
108 | 
109 |     FLAGS, unparsed = parser.parse_known_args()
110 |     tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)


--------------------------------------------------------------------------------
/aug/elastic_tran_each_mask.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import argparse
  4 | import tqdm
  5 | import cv2
  6 | import uuid
  7 | 
  8 | import numpy as np
  9 | import tensorflow as tf
 10 | 
 11 | from scipy.ndimage.interpolation import map_coordinates
 12 | from scipy.ndimage.filters import gaussian_filter
 13 | from tqdm import tqdm
 14 | 
 15 | FLAGS = None
 16 | 
 17 | def main(_):
 18 |     train_ids = next(os.walk(FLAGS.train_dir))[1]
 19 |     train_ids.sort()
 20 | 
 21 |     print('Getting and resizing train images and masks ... ')
 22 |     sys.stdout.flush()
 23 | 
 24 |     for aug_count in range(0, FLAGS.aug_count):
 25 |         print('elastic transformation step {}'.format(aug_count + 1))
 26 |         for n, id_ in tqdm(enumerate(train_ids), total=len(train_ids)):
 27 |             path = FLAGS.train_dir + id_
 28 |             image_ = cv2.imread(path + '/images/' + id_ + '.png')
 29 |             print('image name : ', path + '/images/' + id_ + '.png')
 30 | 
 31 |             # Read Masks
 32 |             flag = False
 33 |             maks_list = next(os.walk(path + '/masks/'))[2]
 34 |             print('maks size : ', len(maks_list))
 35 | 
 36 |             for mask_file in maks_list:
 37 | 
 38 |                 mask_ = cv2.imread(path + '/masks/' + mask_file)
 39 |                 if flag:
 40 |                     image = np.concatenate((image, mask_), axis=2)
 41 |                 else:
 42 |                     image = np.concatenate((image_, mask_), axis=2)
 43 |                     flag = True
 44 | 
 45 |             # image = np.concatenate((image_, mask_), axis=2)
 46 | 
 47 |             alpha = image.shape[1] * 2
 48 |             sigma = image.shape[1] * 0.08
 49 |             alpha_affine = image.shape[1] * 0.05
 50 |             random_state = np.random.RandomState(None)
 51 | 
 52 |             shape = image.shape
 53 |             shape_size = shape[:2]
 54 | 
 55 |             # Random affine
 56 |             center_square = np.float32(shape_size) // 2
 57 |             square_size = min(shape_size) // 3
 58 |             pts1 = np.float32(
 59 |                 [center_square + square_size, [center_square[0] + square_size, center_square[1] - square_size],
 60 |                  center_square - square_size])
 61 |             pts2 = pts1 + random_state.uniform(-alpha_affine, alpha_affine, size=pts1.shape).astype(np.float32)
 62 |             M = cv2.getAffineTransform(pts1, pts2)
 63 |             try:
 64 |                 image = cv2.warpAffine(image, M, shape_size[::-1], borderMode=cv2.BORDER_REFLECT_101)
 65 |             except:
 66 |                 print('exception')
 67 |                 continue
 68 | 
 69 |             dx = gaussian_filter((random_state.rand(*shape) * 2 - 1), sigma) * alpha
 70 |             dy = gaussian_filter((random_state.rand(*shape) * 2 - 1), sigma) * alpha
 71 |             dz = np.zeros_like(dx)
 72 | 
 73 |             x, y, z = np.meshgrid(np.arange(shape[1]), np.arange(shape[0]), np.arange(shape[2]))
 74 |             indices = np.reshape(y + dy, (-1, 1)), np.reshape(x + dx, (-1, 1)), np.reshape(z, (-1, 1))
 75 | 
 76 |             im_merge_t = map_coordinates(image, indices, order=1, mode='reflect').reshape(shape)
 77 | 
 78 |             randomString = str(uuid.uuid4()).replace("-", "")
 79 |             new_id = id_[:10] + FLAGS.aug_prefix + randomString
 80 |             os.mkdir(FLAGS.train_dir + new_id)
 81 |             os.mkdir(FLAGS.train_dir + new_id + '/images/')
 82 |             im_t = im_merge_t[..., 0:3]
 83 |             cv2.imwrite(FLAGS.train_dir + new_id + '/images/' + new_id + '.png', im_t)
 84 | 
 85 |             index = 3
 86 |             os.mkdir(FLAGS.train_dir + new_id + '/masks/')
 87 |             for mask_file in next(os.walk(path + '/masks/'))[2]:
 88 |                 mask_t = im_merge_t[..., index:index+3]
 89 |                 mask_t = cv2.cvtColor(mask_t, cv2.COLOR_RGB2GRAY)
 90 |                 index = index + 3
 91 |                 cv2.imwrite(FLAGS.train_dir + new_id + '/masks/' + mask_file, mask_t)
 92 | 
 93 | 
 94 | if __name__ == '__main__':
 95 |     parser = argparse.ArgumentParser()
 96 |     parser.add_argument(
 97 |         '--train_dir',
 98 |         default='../../../dl_data/nucleus/stage1_train_valid/',
 99 |         type=str,
100 |         help="Train Data directory")
101 | 
102 |     parser.add_argument(
103 |         '--aug_prefix',
104 |         default='_elastic_',
105 |         type=str,
106 |         help="prefix name of augmentation")
107 | 
108 |     parser.add_argument(
109 |         '--aug_count',
110 |         type=int,
111 |         default=2,
112 |         help="Count of augmentation")
113 | 
114 |     FLAGS, unparsed = parser.parse_known_args()
115 |     tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)


--------------------------------------------------------------------------------
/aug/morphological_postprocessing.py:
--------------------------------------------------------------------------------
  1 | import glob
  2 | import os
  3 | import sys
  4 | import argparse
  5 | from tqdm import tqdm
  6 | 
  7 | from itertools import product
  8 | import numpy as np
  9 | import skimage.morphology as morph
 10 | from skimage.filters import threshold_otsu
 11 | import scipy.ndimage as ndi
 12 | from scipy.stats import itemfreq
 13 | from PIL import Image
 14 | 
 15 | from utils.morphological_util import get_ground_truth, overlay_contours, overlay_masks
 16 | 
 17 | import tensorflow as tf
 18 | 
 19 | 
 20 | FLAGS = None
 21 | 
 22 | 
 23 | 
 24 | def plot_list(images, labels):
 25 | 
 26 |     n_img = len(images)
 27 |     '''
 28 |     n_lab = len(labels)
 29 |     n = n_lab+n_img
 30 |     plt.figure(figsize=(12,8))
 31 |     for i, image in enumerate(images):
 32 |         plt.subplot(1,n,i+1)
 33 |         plt.imshow(image)
 34 |     for j, label in enumerate(labels):
 35 |         plt.subplot(1,n,n_img+j+1)
 36 |         plt.imshow(label, cmap='nipy_spectral')
 37 |     plt.show()
 38 |     '''
 39 | 
 40 | # calculates the average size of the nuclei.
 41 | # We will need to to choose the structural element for our postprocessing
 42 | def mean_blob_size(mask):
 43 |     labels, labels_nr = ndi.label(mask)
 44 |     if labels_nr < 2:
 45 |         mean_area = 1
 46 |         mean_radius = 1
 47 |     else:
 48 |         mean_area = int(itemfreq(labels)[1:, 1].mean())
 49 |         mean_radius = int(np.round(np.sqrt(mean_area) / np.pi))
 50 |     return mean_area, mean_radius
 51 | 
 52 | 
 53 | def clean_mask_v1(m, c):
 54 |     m_b = m > threshold_otsu(m)
 55 |     c_b = c > threshold_otsu(c)
 56 | 
 57 |     m_ = np.where(m_b | c_b, 1, 0)
 58 |     m_ = ndi.binary_fill_holes(m_)
 59 |     m_ = np.where(c_b & (~m_b), 0, m_)
 60 | 
 61 |     area, radius = mean_blob_size(m_b)
 62 |     m_ = morph.binary_opening(m_, selem=morph.disk(0.25 * radius))
 63 |     return m_
 64 | 
 65 | 
 66 | def pad_mask(mask, pad):
 67 |     if pad <= 1:
 68 |         pad = 2
 69 |     h, w = mask.shape
 70 |     h_pad = h + 2 * pad
 71 |     w_pad = w + 2 * pad
 72 |     mask_padded = np.zeros((h_pad, w_pad))
 73 |     mask_padded[pad:pad + h, pad:pad + w] = mask
 74 |     mask_padded[pad - 1, :] = 1
 75 |     mask_padded[pad + h + 1, :] = 1
 76 |     mask_padded[:, pad - 1] = 1
 77 |     mask_padded[:, pad + w + 1] = 1
 78 | 
 79 |     return mask_padded
 80 | 
 81 | 
 82 | def crop_mask(mask, crop):
 83 |     if crop <= 1:
 84 |         crop = 2
 85 |     h, w = mask.shape
 86 |     mask_cropped = mask[crop:h - crop, crop:w - crop]
 87 |     return mask_cropped
 88 | 
 89 | 
 90 | def drop_artifacts(mask_after, mask_pre, min_coverage=0.5):
 91 |     connected, nr_connected = ndi.label(mask_after)
 92 |     mask = np.zeros_like(mask_after)
 93 |     for i in range(1, nr_connected + 1):
 94 |         conn_blob = np.where(connected == i, 1, 0)
 95 |         initial_space = np.where(connected == i, mask_pre, 0)
 96 |         blob_size = np.sum(conn_blob)
 97 |         initial_blob_size = np.sum(initial_space)
 98 |         coverage = float(initial_blob_size) / float(blob_size)
 99 |         if coverage > min_coverage:
100 |             mask = mask + conn_blob
101 |         else:
102 |             mask = mask + initial_space
103 |     return mask
104 | 
105 | 
106 | def clean_mask_v2(m, c):
107 |     # threshold
108 |     m_b = m > threshold_otsu(m)
109 |     c_b = c > threshold_otsu(c)
110 | 
111 |     # combine contours and masks and fill the cells
112 |     m_ = np.where(m_b | c_b, 1, 0)
113 |     m_ = ndi.binary_fill_holes(m_)
114 | 
115 |     # close what wasn't closed before
116 |     area, radius = mean_blob_size(m_b)
117 |     struct_size = int(1.25 * radius)
118 |     struct_el = morph.disk(struct_size)
119 |     m_padded = pad_mask(m_, pad=struct_size)
120 |     m_padded = morph.binary_closing(m_padded, selem=struct_el)
121 |     m_ = crop_mask(m_padded, crop=struct_size)
122 | 
123 |     # open to cut the real cells from the artifacts
124 |     area, radius = mean_blob_size(m_b)
125 |     struct_size = int(0.75 * radius)
126 |     struct_el = morph.disk(struct_size)
127 |     m_ = np.where(c_b & (~m_b), 0, m_)
128 |     m_padded = pad_mask(m_, pad=struct_size)
129 |     m_padded = morph.binary_opening(m_padded, selem=struct_el)
130 |     m_ = crop_mask(m_padded, crop=struct_size)
131 | 
132 |     # join the connected cells with what we had at the beginning
133 |     m_ = np.where(m_b | m_, 1, 0)
134 |     m_ = ndi.binary_fill_holes(m_)
135 | 
136 |     # drop all the cells that weren't present at least in 25% of area in the initial mask
137 |     m_ = drop_artifacts(m_, m_b, min_coverage=0.25)
138 | 
139 |     return m_
140 | 
141 | 
142 | def good_markers_v1(m, c):
143 |     # threshold
144 |     m_b = m > threshold_otsu(m)
145 |     c_b = c > threshold_otsu(c)
146 | 
147 |     mk_ = np.where(c_b, 0, m)
148 |     return mk_
149 | 
150 | 
151 | def good_markers_v2(m_b, c):
152 |     # threshold
153 |     c_thresh = threshold_otsu(c)
154 |     c_b = c > c_thresh
155 | 
156 |     mk_ = np.where(c_b, 0, m_b)
157 |     return mk_
158 | 
159 | 
160 | def good_markers_v3(m_b, c):
161 |     # threshold
162 |     c_b = c > threshold_otsu(c)
163 | 
164 |     mk_ = np.where(c_b, 0, m_b)
165 | 
166 |     area, radius = mean_blob_size(m_b)
167 |     struct_size = int(0.25 * radius)
168 |     struct_el = morph.disk(struct_size)
169 |     m_padded = pad_mask(mk_, pad=struct_size)
170 |     m_padded = morph.erosion(m_padded, selem=struct_el)
171 |     mk_ = crop_mask(m_padded, crop=struct_size)
172 |     mk_, _ = ndi.label(mk_)
173 |     return mk_
174 | 
175 | 
176 | ## Problem 4 we are dropping markers on many images with small cells
177 | ## Good distance
178 | def good_distance_v1(m_b):
179 |     distance = ndi.distance_transform_edt(m_b)
180 |     return distance
181 | 
182 | 
183 | def watershed_v1(mask, contour):
184 |     cleaned_mask = clean_mask_v2(mask, contour)
185 |     good_markers = good_markers_v3(cleaned_mask, contour)
186 |     good_distance = good_distance_v1(cleaned_mask)
187 | 
188 |     water = morph.watershed(-good_distance, good_markers, mask=cleaned_mask)
189 | 
190 |     return water
191 | 
192 | 
193 | def add_dropped_water_blobs(water, mask_cleaned):
194 |     water_mask = (water > 0).astype(np.uint8)
195 |     dropped = mask_cleaned - water_mask
196 |     dropped, _ = ndi.label(dropped)
197 |     dropped = np.where(dropped, dropped + water.max(), 0)
198 |     water = water + dropped
199 |     return water
200 | 
201 | 
202 | def drop_artifacts_per_label(labels, initial_mask):
203 |     labels_cleaned = np.zeros_like(labels)
204 |     for i in range(1, labels.max() + 1):
205 |         component = np.where(labels == i, 1, 0)
206 |         component_initial_mask = np.where(labels == i, initial_mask, 0)
207 |         component = drop_artifacts(component, component_initial_mask)
208 |         labels_cleaned = labels_cleaned + component * i
209 |     return labels_cleaned
210 | 
211 | 
212 | def watershed_v2(mask, contour):
213 |     cleaned_mask = clean_mask_v2(mask, contour)
214 |     good_markers = good_markers_v3(cleaned_mask, contour)
215 |     good_distance = good_distance_v1(cleaned_mask)
216 | 
217 |     water = morph.watershed(-good_distance, good_markers, mask=cleaned_mask)
218 | 
219 |     water = add_dropped_water_blobs(water, cleaned_mask)
220 | 
221 |     m_thresh = threshold_otsu(mask)
222 |     initial_mask_binary = (mask > m_thresh).astype(np.uint8)
223 |     water = drop_artifacts_per_label(water, initial_mask_binary)
224 |     return water
225 | 
226 | 
227 | def relabel(img):
228 |     h, w = img.shape
229 | 
230 |     relabel_dict = {}
231 | 
232 |     for i, k in enumerate(np.unique(img)):
233 |         if k == 0:
234 |             relabel_dict[k] = 0
235 |         else:
236 |             relabel_dict[k] = i
237 |     for i, j in product(range(h), range(w)):
238 |         img[i, j] = relabel_dict[img[i, j]]
239 |     return img
240 | 
241 | 
242 | def drop_small(img, min_size):
243 |     img = morph.remove_small_objects(img, min_size=min_size)
244 |     return relabel(img)
245 | 
246 | 
247 | def fill_holes_per_blob(image):
248 |     image_cleaned = np.zeros_like(image)
249 |     for i in range(1, image.max() + 1):
250 |         mask = np.where(image == i, 1, 0)
251 |         mask = ndi.morphology.binary_fill_holes(mask)
252 |         image_cleaned = image_cleaned + mask * i
253 |     return image_cleaned
254 | 
255 | 
256 | def watershed_v3(mask, contour):
257 |     cleaned_mask = clean_mask_v2(mask, contour)
258 |     good_markers = good_markers_v3(cleaned_mask, contour)
259 |     good_distance = good_distance_v1(cleaned_mask)
260 | 
261 |     labels = morph.watershed(-good_distance, good_markers, mask=cleaned_mask)
262 | 
263 |     labels = add_dropped_water_blobs(labels, cleaned_mask)
264 | 
265 |     m_thresh = threshold_otsu(mask)
266 |     initial_mask_binary = (mask > m_thresh).astype(np.uint8)
267 |     labels = drop_artifacts_per_label(labels, initial_mask_binary)
268 | 
269 |     labels = drop_small(labels, min_size=20)
270 |     labels = fill_holes_per_blob(labels)
271 | 
272 |     return labels
273 | 
274 | 
275 | 
276 | def main(_):
277 |     ground_truth = get_ground_truth(images_dir=FLAGS.images_dir,
278 |                                     subdir_name=FLAGS.subdir_name,
279 |                                     target_dir=None)
280 | 
281 |     contours = overlay_contours(images_dir=FLAGS.images_dir,
282 |                                 subdir_name=FLAGS.subdir_name,
283 |                                 target_dir=None)
284 | 
285 |     masks = overlay_masks(images_dir=FLAGS.images_dir,
286 |                           subdir_name=FLAGS.subdir_name,
287 |                           target_dir=None)
288 | 
289 | 
290 |     ############################
291 |     # Problem 1 -> dirty masks
292 |     ############################
293 |     idx = 5
294 |     dirty = masks[idx], contours[idx], ground_truth[idx]
295 |     plot_list(images=[dirty[0], dirty[1]],
296 |               labels=[dirty[2]])
297 | 
298 |     #################################
299 |     # Problem 2 -> dirty at border
300 |     #################################
301 |     idx = 44
302 |     dirty_at_border = masks[idx], contours[idx], ground_truth[idx]
303 |     plot_list(images=[dirty_at_border[0], dirty_at_border[1]],
304 |               labels=[dirty_at_border[2]])
305 | 
306 |     ################
307 |     # Approach V1
308 |     ################
309 |     m, c, t = dirty
310 | 
311 |     ########################################################################
312 |     #  Let's put it all together in a function - def clean_mask_v1(m,c)
313 |     #
314 |     #  m, c, t = dirty
315 |     #  m_ = clean_mask_v1(m,c)
316 |     #  plot_list(images = [m,c,m_], labels = [t])
317 |     #
318 |     ########################################################################
319 |     # let's proceed to cleaning.
320 |     # First we binarize both the mask and contours using global, otsu thresholding method:
321 |     m_b = m > threshold_otsu(m)
322 |     c_b = c > threshold_otsu(c)
323 |     plot_list(images=[m_b, c_b], labels=[])
324 | 
325 |     # combine binarized masks and contours
326 |     m_ = np.where(m_b | c_b, 1, 0)
327 |     plot_list(images=[m_], labels=[])
328 | 
329 |     # fill the holes that remained
330 |     m_ = ndi.binary_fill_holes(m_)
331 |     plot_list(images=[m_], labels=[])
332 | 
333 |     # Now that we filled the holes in the cells we can detach them again because we have the contour information
334 |     m_ = np.where(c_b & (~m_b), 0, m_)
335 |     plot_list(images=[m_], labels=[])
336 | 
337 |     # We are left with artifacts. Let's use binary_openning to drop them.
338 |     area, radius = mean_blob_size(m_b)
339 |     m_ = morph.binary_opening(m_, selem=morph.disk(0.25 * radius))
340 |     plot_list(images=[m_], labels=[])
341 | 
342 | 
343 | 
344 |     # It works to a certain extend but it removes things that
345 |     # where not connected and/or things around borders
346 | 
347 |     ################
348 |     # Approach V2
349 |     ################
350 |     # Let's start by binarizing and filling the holes again
351 |     m, c, t = dirty_at_border
352 | 
353 |     ########################################################################
354 |     #  Let's put it all together in one function - def clean_mask_v2(m,c)
355 |     #
356 |     #  m,c,t = dirty_at_border
357 |     #  m_ = clean_mask_v2(m,c)
358 |     #
359 |     #  plot_list(images = [m,c,m_], labels = [t])
360 |     #
361 |     ########################################################################
362 | 
363 |     # threshold
364 |     m_b = m > threshold_otsu(m)
365 |     c_b = c > threshold_otsu(c)
366 | 
367 |     # combine contours and masks and fill the cells
368 |     m_ = np.where(m_b | c_b, 1, 0)
369 |     m_ = ndi.binary_fill_holes(m_)
370 |     plot_list(images=[m_], labels=[])
371 | 
372 | 
373 |     # Now we will use binary_closing to fill what wasn't closed with fill holes.
374 |     # We will need two helper functions pad_mask and crop_mask to deal with problems around the edges
375 | 
376 |     # close what wasn't closed before
377 |     area, radius = mean_blob_size(m_b)
378 |     struct_size = int(1.25 * radius)
379 |     struct_el = morph.disk(struct_size)
380 |     m_padded = pad_mask(m_, pad=struct_size)
381 |     m_padded = morph.binary_closing(m_padded, selem=struct_el)
382 |     m_ = crop_mask(m_padded, crop=struct_size)
383 |     plot_list(images=[m_], labels=[])
384 | 
385 |     # we closed everything but it is way more than we wanted.
386 |     # Let's now cut it with our contours and see what we get
387 |     m_ = np.where(c_b & (~m_b), 0, m_)
388 |     plot_list(images=[m_], labels=[])
389 | 
390 |     # we can use binary_openning with a larger structural element. Let's try that
391 |     area, radius = mean_blob_size(m_b)
392 |     struct_size = int(0.75 * radius)
393 |     struct_el = morph.disk(struct_size)
394 |     m_padded = pad_mask(m_, pad=struct_size)
395 |     m_padded = morph.binary_opening(m_padded, selem=struct_el)
396 |     m_ = crop_mask(m_padded, crop=struct_size)
397 |     plot_list(images=[m_, m], labels=[t])
398 | 
399 |     # join the connected cells with what we had at the beginning
400 |     m_ = np.where(m_b | m_, 1, 0)
401 |     m_ = ndi.binary_fill_holes(m_)
402 |     plot_list(images=[m_, m], labels=[t])
403 | 
404 |     m_ = drop_artifacts(m_, m_b, min_coverage=0.25)
405 |     plot_list(images=[m_, m, c], labels=[t])
406 | 
407 | 
408 |     ############################################
409 |     # Problem 3 -> not everything gets filled
410 |     ############################################
411 | 
412 |     ############################################
413 |     # Problem 4 -> some cells get connected
414 |     #
415 |     # Ideas:
416 |     # - work more with dilation
417 |     # - do better around borders
418 |     # - drop some cells after watershed with drop_artifact function
419 |     #
420 |     # TODO: clean_mask_V3 would be dev...
421 |     # Go ahead and try to improve it. The floor is yours
422 |     #
423 |     # def clean_mask_v3(m,c):
424 |     #     return
425 |     #
426 |     ############################################
427 | 
428 | 
429 |     ###################
430 |     # Good Markers
431 |     ###################
432 |     # In this approach we will simply cut the masks with the contours and use that as markers.
433 |     # Simple but really effective.
434 |     for idx in range(5):
435 |         print(idx)
436 |         mask = masks[idx]
437 |         contour = contours[idx]
438 |         cleaned_mask = clean_mask_v2(mask, contour)
439 |         good_markers = good_markers_v1(mask, contour)
440 |         gt = ground_truth[idx]
441 | 
442 |         plot_list(images=[mask, contour, cleaned_mask, good_markers], labels=[gt])
443 | 
444 |     # Problem 1 -> building markers on initial mask when we have better mask
445 |     for idx in range(5):
446 |         print(idx)
447 |         mask = masks[idx]
448 |         contour = contours[idx]
449 |         cleaned_mask = clean_mask_v2(mask, contour)
450 |         good_markers = good_markers_v2(cleaned_mask, contour)
451 |         gt = ground_truth[idx]
452 | 
453 |         plot_list(images=[mask, contour, cleaned_mask, good_markers], labels=[gt])
454 | 
455 |     # Problem 2 some markers are to large and connected
456 |     m, c, t = dirty
457 |     cleaned_mask = clean_mask_v2(m, c)
458 |     c_b = c > threshold_otsu(c)
459 |     mk_ = np.where(c_b, 0, cleaned_mask)
460 |     plot_list(images=[m, c, mk_], labels=[])
461 | 
462 |     # For instance the two markers at the top left are still connected and will be treated
463 |     # as a single marker by the watershed And nowe lets erode the markers
464 |     area, radius = mean_blob_size(m_b)
465 |     struct_size = int(0.25 * radius)
466 |     struct_el = morph.disk(struct_size)
467 |     m_padded = pad_mask(mk_, pad=struct_size)
468 |     m_padded = morph.erosion(m_padded, selem=struct_el)
469 |     mk_ = crop_mask(m_padded, crop=struct_size)
470 |     plot_list(images=[m, c, mk_], labels=[])
471 | 
472 |     # we now compare those markers with the labels we get the following
473 |     mk_, _ = ndi.label(mk_)
474 |     plot_list(images=[cleaned_mask], labels=[mk_, t])
475 | 
476 | 
477 |     #########################################################
478 |     # So the markers and cleaned mask look really good!
479 |     #########################################################
480 |     for idx in range(5):
481 |         print(idx)
482 |         mask = masks[idx]
483 |         contour = contours[idx]
484 |         cleaned_mask = clean_mask_v2(mask, contour)
485 |         good_markers = good_markers_v3(cleaned_mask, contour)
486 |         gt = ground_truth[idx]
487 | 
488 |         plot_list(images=[mask, contour, cleaned_mask], labels=[good_markers, gt])
489 | 
490 | 
491 | 
492 |     # Problem 3 -> still some barely connected markers are left¶
493 |     for idx in [25, 27]:
494 |         print(idx)
495 |         mask = masks[idx]
496 |         contour = contours[idx]
497 |         cleaned_mask = clean_mask_v2(mask, contour)
498 |         good_markers = good_markers_v3(cleaned_mask, contour)
499 |         gt = ground_truth[idx]
500 | 
501 |         plot_list(images=[mask, contour, cleaned_mask], labels=[good_markers, gt])
502 | 
503 |     #########################################################################
504 |     # Problem 4 -> we are dropping markers on many images with small cells
505 |     #
506 |     # Ideas
507 |     # - play with binary closing/opening
508 |     # - involve contours and/or centers in this
509 |     # - we will asume that lost markers are in facet small cells that don't need to be divided and
510 |     #   we will get back all the cells that were dropped in watershed
511 |     # - use local maxima on distance transform
512 |     #
513 |     # TODO: good_markers_v4 need to be dev...
514 |     # def good_markers_v4(m_b,c):
515 |     #     return
516 |     #
517 |     #########################################################################
518 | 
519 |     #####################
520 |     # Good distance
521 |     #####################
522 |     # Here I have no better idea than to use the binary distance from the background.
523 |     # Feel free to improve on that!
524 |     #
525 |     # Idea
526 |     # - investigate imposing some gradients on original image or good clean mask
527 |     #
528 |     for idx in range(5):
529 |         print(idx)
530 |         mask = masks[idx]
531 |         contour = contours[idx]
532 |         cleaned_mask = clean_mask_v2(mask, contour)
533 |         good_markers = good_markers_v3(cleaned_mask, contour)
534 |         good_distance = good_distance_v1(cleaned_mask)
535 |         gt = ground_truth[idx]
536 | 
537 |         plot_list(images=[cleaned_mask, good_distance], labels=[good_markers, gt])
538 | 
539 | 
540 |     ########################
541 |     # Watershed
542 |     ########################
543 |     for idx in range(5):
544 |         print(idx)
545 |         mask = masks[idx]
546 |         contour = contours[idx]
547 |         cleaned_mask = clean_mask_v2(mask, contour)
548 |         good_markers = good_markers_v3(cleaned_mask, contour)
549 |         good_distance = good_distance_v1(cleaned_mask)
550 | 
551 |         water = watershed_v1(mask, contour)
552 | 
553 |         gt = ground_truth[idx]
554 | 
555 |         plot_list(images=[cleaned_mask, good_distance], labels=[good_markers, water, gt])
556 | 
557 | 
558 |     # Problem 1 -> some cells are dumped
559 | 
560 |     # Problem 2 -> some artifacts from mask_cleaning remain
561 |     # Unfortunatelly some cells are dropped, some cells are oversegmented and
562 |     # some artifacts from the mask cleaning still remain.
563 |     # The good thing is we can deal with some of those problems by using ideas we have already tried.
564 |     for idx in range(5):
565 |         print(idx)
566 |         mask = masks[idx]
567 |         contour = contours[idx]
568 |         cleaned_mask = clean_mask_v2(mask, contour)
569 |         good_markers = good_markers_v3(cleaned_mask, contour)
570 |         good_distance = good_distance_v1(cleaned_mask)
571 | 
572 |         water = watershed_v2(mask, contour)
573 | 
574 |         gt = ground_truth[idx]
575 | 
576 |         plot_list(images=[cleaned_mask, good_distance], labels=[good_markers, water, gt])
577 | 
578 | 
579 |     # Problem 3 -> some cells are oversemgmented and small cell chunks remain
580 |     # Now some small pieces of cells may remain at this point or the cells could get oversegmented.
581 |     # We will deal with that by dropping to small to be a cell blobs.
582 |     for idx in range(5):
583 |         print(idx)
584 |         mask = masks[idx]
585 |         contour = contours[idx]
586 |         cleaned_mask = clean_mask_v2(mask, contour)
587 |         good_markers = good_markers_v3(cleaned_mask, contour)
588 |         good_distance = good_distance_v1(cleaned_mask)
589 | 
590 |         water = watershed_v3(mask, contour)
591 | 
592 |         gt = ground_truth[idx]
593 | 
594 |         plot_list(images=[cleaned_mask, good_distance], labels=[good_markers, water, gt])
595 | 
596 |     idx = 0
597 |     train_dir = os.path.join(FLAGS.images_dir, FLAGS.subdir_name)
598 |     for filename in tqdm(glob.glob('{}/*'.format(train_dir))):
599 | 
600 |         imagename = filename.split("/")[-1]
601 | 
602 |         mask = masks[idx]
603 |         contour = contours[idx]
604 | 
605 |         water = watershed_v3(mask, contour)
606 |         img = Image.fromarray(water.astype('uint8'))
607 |         water_path = (filename + '/water/')
608 |         if not os.path.exists(water_path):
609 |             os.makedirs(water_path)
610 |         img.save(os.path.join(water_path, imagename + '.png'))
611 | 
612 |         '''
613 |         cleaned_mask = clean_mask_v2(mask, contour)
614 |         img = Image.fromarray(cleaned_mask.astype('uint8'))
615 |         cleaned_mask_path = (filename + '/cleaned_mask/')
616 |         if not os.path.exists(cleaned_mask_path):
617 |             os.makedirs(cleaned_mask_path)
618 |         img.save(os.path.join(cleaned_mask_path, imagename + '.png'))
619 | 
620 |         good_markers = good_markers_v3(cleaned_mask, contour)
621 |         img = Image.fromarray(good_markers)
622 |         good_markers_path = (filename + '/good_markers/')
623 |         if not os.path.exists(good_markers_path):
624 |             os.makedirs(good_markers_path)
625 |         img.save(os.path.join(good_markers_path, imagename + '.png'))
626 | 
627 |         good_distance = good_distance_v1(cleaned_mask)
628 |         img = Image.fromarray(good_distance.astype('uint8'))
629 |         good_distance_path = (filename + '/good_distance/')
630 |         if not os.path.exists(good_distance_path):
631 |             os.makedirs(good_distance_path)
632 |         img.save(os.path.join(good_distance_path, imagename + '.png'))
633 |         '''
634 |         idx = idx + 1
635 | 
636 | 
637 | 
638 | 
639 | 
640 | 
641 | if __name__ == '__main__':
642 |     parser = argparse.ArgumentParser()
643 |     parser.add_argument(
644 |          '--images_dir',
645 |          default='../../../dl_data/nucleus',
646 |          type=str,
647 |          help="Image directory")
648 | 
649 |     parser.add_argument(
650 |          '--subdir_name',
651 |          default='stage1_train',
652 |          type=str,
653 |          help="Sub directory name")
654 | 
655 |     #parser.add_argument(
656 |     #     '--target_dir',
657 |     #     default='stage1_train',
658 |     #     type=str,
659 |     #     help="Sub directory name")
660 | 
661 |     parser.add_argument(
662 |         '--img_size',
663 |         type=int,
664 |         default=256,
665 |         help="Image height and width")
666 | 
667 |     parser.add_argument(
668 |         '--aug_prefix',
669 |         default='_aug_',
670 |         type=str,
671 |         help="prefix name of augmentation")
672 | 
673 |     parser.add_argument(
674 |         '--aug_count',
675 |         type=int,
676 |         default=2,
677 |         help="Count of augmentation")
678 | 
679 |     FLAGS, unparsed = parser.parse_known_args()
680 |     tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
681 | 


--------------------------------------------------------------------------------
/eval.py:
--------------------------------------------------------------------------------
  1 | """ Collection of methods to compute the score.
  2 | 
  3 | 1. We start with a true and predicted mask, corresponding to one train image.
  4 | 
  5 | 2. The true mask is segmented into different objects. Here lies a main source
  6 | of error. Overlapping or touching nuclei are not separated but are labeled as
  7 | one object. This means that the target mask can contain less objects than
  8 | those that have been originally identified by humans.
  9 | 
 10 | 3. In the same manner the predicted mask is segmented into different objects.
 11 | 
 12 | 4. We compute all intersections between the objects of the true and predicted
 13 | masks. Starting with the largest intersection area we assign true objects to
 14 | predicted ones, until there are no true/pred objects left that overlap.
 15 | We then compute for each true/pred object pair their corresponding intersection
 16 | over union (iou) ratio.
 17 | 
 18 | 5. Given some threshold t we count the object pairs that have an iou > t, which
 19 | yields the number of true positives: tp(t). True objects that have no partner are
 20 | counted as false positives: fp(t). Likewise, predicted objects without a counterpart
 21 | a counted as false negatives: fn(t).
 22 | 
 23 | 6. Now, we compute the precision tp(t)/(tp(t)+fp(t)+fn(t)) for t=0.5,0.55,0.60,...,0.95
 24 | and take the mean value as the final precision (score).
 25 | """
 26 | 
 27 | 
 28 | import os
 29 | import argparse
 30 | import sys
 31 | import datetime
 32 | import csv
 33 | 
 34 | from six.moves import xrange
 35 | from skimage.transform import resize
 36 | from skimage.morphology import label
 37 | # from scipy.ndimage.measurements import label
 38 | import pandas as pd
 39 | 
 40 | import numpy as np
 41 | import tensorflow as tf
 42 | 
 43 | from nets.unet import Unet_32_512, Unet_64_1024
 44 | from utils.oper_utils2 import read_test_data_properties, mask_to_rle, \
 45 |                                 trsf_proba_to_binary, rle_to_mask
 46 | from input_pred_data import Data
 47 | from input_pred_data import DataLoader
 48 | 
 49 | import scipy.ndimage as ndi
 50 | 
 51 | FLAGS = None
 52 | 
 53 | def morpho_op(BW):
 54 |     s = [[0,1,0],[1,1,1],[0,1,0]]#structuring element (diamond shaped)
 55 |     m_morfo = ndi.binary_opening(BW,structure=s,iterations=1)
 56 |     m_morfo = ndi.binary_closing(m_morfo,structure=s,iterations=1)
 57 |     M_filled = ndi.binary_fill_holes(m_morfo,structure=s)
 58 |     return M_filled
 59 | 
 60 | def main(_):
 61 |     # specify GPU
 62 |     if FLAGS.gpu_index:
 63 |         os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
 64 |         os.environ["CUDA_VISIBLE_DEVICES"] = FLAGS.gpu_index
 65 | 
 66 |     tf.logging.set_verbosity(tf.logging.INFO)
 67 | 
 68 |     # TensorFlow session: grow memory when needed. TF, DO NOT USE ALL MY GPU MEMORY!!!
 69 |     gpu_options = tf.GPUOptions(allow_growth=True)
 70 |     config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options)
 71 |     sess = tf.InteractiveSession(config=config)
 72 | 
 73 |     X = tf.placeholder(tf.float32, shape=[None, FLAGS.img_size, FLAGS.img_size, 3], name="X")
 74 |     mode = tf.placeholder(tf.bool, name="mode")  # training or not
 75 | 
 76 |     if FLAGS.use_64_channel:
 77 |         pred = Unet_64_1024(X, mode, FLAGS)
 78 |     else:
 79 |         pred = Unet_32_512(X, mode, FLAGS)
 80 |     # evaluation = tf.argmax(logits, 1)
 81 | 
 82 |     sess.run(tf.global_variables_initializer())
 83 | 
 84 |     # Restore variables from training checkpoints.
 85 |     saver = tf.train.Saver()
 86 |     checkpoint_path = None
 87 |     if FLAGS.checkpoint_dir and FLAGS.checkpoint_file:
 88 |         checkpoint_path = FLAGS.checkpoint_dir+'/'+FLAGS.checkpoint_file
 89 |     else:
 90 |         ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
 91 |         if ckpt and ckpt.model_checkpoint_path:
 92 |             checkpoint_path = ckpt.model_checkpoint_path
 93 | 
 94 |     if checkpoint_path:
 95 |         saver.restore(sess, checkpoint_path)
 96 |         global_step = checkpoint_path.split('/')[-1].split('-')[-1]
 97 |         print('Successfully loaded model from %s at step=%s.' % (
 98 |             checkpoint_path, global_step))
 99 |     else:
100 |         print('No checkpoint file found at %s' % FLAGS.checkpoint_dir)
101 |         return
102 | 
103 | 
104 |     ############################
105 |     # Get data
106 |     ############################
107 |     raw = Data(FLAGS.data_dir)
108 |     test_data = DataLoader(raw.get_data(), FLAGS.img_size, FLAGS.batch_size)
109 | 
110 |     iterator = tf.data.Iterator.from_structure(test_data.dataset.output_types,
111 |                                                test_data.dataset.output_shapes)
112 |     next_batch = iterator.get_next()
113 | 
114 |     # Ops for initializing the two different iterators
115 |     test_init_op = iterator.make_initializer(test_data.dataset)
116 | 
117 |     test_batches_per_epoch = int(test_data.data_size / FLAGS.batch_size)
118 |     if test_data.data_size % FLAGS.batch_size > 0:
119 |         test_batches_per_epoch += 1
120 | 
121 | 
122 |     ##################################################
123 |     # start test & make csv file.
124 |     ##################################################
125 | 
126 |     # Read basic properties of test images.
127 |     test_df = read_test_data_properties(FLAGS.data_dir, 'images')
128 | 
129 |     test_pred_proba = []
130 |     test_pred_fnames = []
131 | 
132 |     start_time = datetime.datetime.now()
133 |     print("start test: {}".format(start_time))
134 | 
135 |     # Initialize iterator with the test dataset
136 |     sess.run(test_init_op)
137 |     for i in range(test_batches_per_epoch):
138 |         batch_xs, fnames = sess.run(next_batch)
139 |         prediction = sess.run(pred,
140 |                               feed_dict={
141 |                                   X: batch_xs,
142 |                                   mode: False}
143 |                               )
144 |         test_pred_proba.extend(prediction)
145 |         test_pred_fnames.extend(fnames)
146 | 
147 |     end_time = datetime.datetime.now()
148 |     print('end test: {}'.format(test_data.data_size, end_time))
149 |     print('test waste time: {}'.format(end_time - start_time))
150 | 
151 |     # Transform propabilities into binary values 0 or 1.
152 |     test_pred = trsf_proba_to_binary(test_pred_proba)
153 | 
154 | 
155 |     # Resize predicted masks to original image size.
156 |     test_pred_to_original_size = []
157 |     for i in range(len(test_pred)):
158 |         res_mask = trsf_proba_to_binary(
159 |             resize(np.squeeze(test_pred[i]),
160 |                    (test_df.loc[i, 'img_height'], test_df.loc[i, 'img_width']),
161 |                    mode='constant',preserve_range=True)
162 |         )
163 |         #
164 |         # fill the holes that remained
165 |         res_mask_f = morpho_op(res_mask)
166 |         # Rescale to 0-255 and convert to uint8
167 |         res_mask_f = (255.0 * res_mask_f).astype(np.uint8)
168 |         res_mask |= res_mask_f
169 |         #
170 |         test_pred_to_original_size.append(res_mask)
171 | 
172 |     test_pred_to_original_size = np.array(test_pred_to_original_size)
173 | 
174 | 
175 |     # # Inspect a test prediction and check run length encoding.
176 |     # for n, id_ in enumerate(test_df['img_id']):
177 |     #     fname = test_pred_fnames[n]
178 |     #     mask = test_pred_to_original_size[n]
179 |     #     rle = list(mask_to_rle(mask))
180 |     #     mask_rec = rle_to_mask(rle, mask.shape)
181 |     #     print('no:{}, {} -> Run length encoding: {} matches, {} misses'.format(
182 |     #         n, fname, (mask_rec == mask).sum(), (mask_rec != mask).sum()))
183 | 
184 | 
185 |     # Run length encoding of predicted test masks.
186 |     test_pred_rle = []
187 |     test_pred_ids = []
188 |     for n, _id in enumerate(test_df['img_id']):
189 |         min_object_size = 20 * test_df.loc[n, 'img_height'] * test_df.loc[n, 'img_width'] / (256 * 256)
190 |         rle = list(mask_to_rle(test_pred_to_original_size[n], min_object_size=min_object_size))
191 |         test_pred_rle.extend(rle)
192 |         test_pred_ids.extend([_id] * len(rle))
193 | 
194 |     # Create submission DataFrame
195 |     if not os.path.exists(FLAGS.result_dir):
196 |         os.makedirs(FLAGS.result_dir)
197 | 
198 |     sub = pd.DataFrame()
199 |     sub['ImageId'] = test_pred_ids
200 |     sub['EncodedPixels'] = pd.Series(test_pred_rle).apply(lambda x: ' '.join(str(y) for y in x))
201 |     sub.to_csv(os.path.join(FLAGS.result_dir, 'submission-nucleus_det-' + global_step + '.csv'),
202 |                index=False)
203 |     sub.head()
204 | 
205 | 
206 | if __name__ == '__main__':
207 |     parser = argparse.ArgumentParser()
208 |     parser.add_argument(
209 |         '--data_dir',
210 |         # default='/home/ace19/dl-data/nucleus_detection/stage1_train',
211 |         default='../../dl_data/nucleus/stage1_test',
212 |         type=str,
213 |         help="Data directory")
214 | 
215 |     parser.add_argument(
216 |         '--batch_size',
217 |         default=24,
218 |         type=int,
219 |         help="Batch size")
220 | 
221 |     parser.add_argument(
222 |         '--checkpoint_dir',
223 |         type=str,
224 |         default=os.getcwd() + '/models',
225 |         help='Directory to read checkpoint.')
226 | 
227 |     parser.add_argument(
228 |         '--checkpoint_file',
229 |         type=str,
230 |         # default='unet.ckpt-20',
231 |         default=None,
232 |         help='checkpoint file name.')
233 | 
234 |     parser.add_argument(
235 |         '--result_dir',
236 |         type=str,
237 |         default=os.getcwd() + '/result',
238 |         help='Directory to write submission.csv file.')
239 | 
240 |     parser.add_argument(
241 |         '--img_size',
242 |         type=int,
243 |         default=256,
244 |         help="Image height and width")
245 | 
246 |     parser.add_argument(
247 |         '--gpu_index',
248 |         type=str,
249 |         # default='0',
250 |         default=None,
251 |         help="Set the gpu index. If you not sepcify then auto")
252 | 
253 |     parser.add_argument(
254 |         '--use_64_channel',
255 |         type=bool,
256 |         # default=True,
257 |         default=False,
258 |         help="If you set True then use the Unet_64_1024. otherwise use the Unet_32_512")
259 | 
260 |     parser.add_argument(
261 |         '--conv_padding',
262 |         type=str,
263 |         default='same',
264 |         # default='valid',
265 |         help="conv padding. if your img_size is 572 and, conv_padding is valid then the label_size is 388")
266 | 
267 |     FLAGS, unparsed = parser.parse_known_args()
268 |     tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)


--------------------------------------------------------------------------------
/eval_stage2.py:
--------------------------------------------------------------------------------
  1 | """ Collection of methods to compute the score.
  2 | 
  3 | 1. We start with a true and predicted mask, corresponding to one train image.
  4 | 
  5 | 2. The true mask is segmented into different objects. Here lies a main source
  6 | of error. Overlapping or touching nuclei are not separated but are labeled as
  7 | one object. This means that the target mask can contain less objects than
  8 | those that have been originally identified by humans.
  9 | 
 10 | 3. In the same manner the predicted mask is segmented into different objects.
 11 | 
 12 | 4. We compute all intersections between the objects of the true and predicted
 13 | masks. Starting with the largest intersection area we assign true objects to
 14 | predicted ones, until there are no true/pred objects left that overlap.
 15 | We then compute for each true/pred object pair their corresponding intersection
 16 | over union (iou) ratio.
 17 | 
 18 | 5. Given some threshold t we count the object pairs that have an iou > t, which
 19 | yields the number of true positives: tp(t). True objects that have no partner are
 20 | counted as false positives: fp(t). Likewise, predicted objects without a counterpart
 21 | a counted as false negatives: fn(t).
 22 | 
 23 | 6. Now, we compute the precision tp(t)/(tp(t)+fp(t)+fn(t)) for t=0.5,0.55,0.60,...,0.95
 24 | and take the mean value as the final precision (score).
 25 | """
 26 | 
 27 | 
 28 | import os
 29 | import argparse
 30 | import sys
 31 | import datetime
 32 | import csv
 33 | 
 34 | from six.moves import xrange
 35 | from skimage.transform import resize
 36 | from skimage.morphology import label
 37 | # from scipy.ndimage.measurements import label
 38 | import pandas as pd
 39 | 
 40 | import numpy as np
 41 | import tensorflow as tf
 42 | 
 43 | from nets.unet import Unet_32_512, Unet_64_1024
 44 | from utils.oper_utils2 import read_test_data_properties, mask_to_rle, \
 45 |                                 trsf_proba_to_binary, rle_to_mask
 46 | from input_pred_data import Data
 47 | from input_pred_data import DataLoader
 48 | 
 49 | from PIL import Image
 50 | import scipy.ndimage as ndi
 51 | 
 52 | FLAGS = None
 53 | 
 54 | def get_image_size(imageId):
 55 |     image_path = os.path.join(FLAGS.data_dir, imageId, 'images')
 56 |     image = os.listdir(image_path)
 57 |     img = Image.open(os.path.join(image_path, image[0]))
 58 | 
 59 |     return img.height, img.width
 60 | 
 61 | def morpho_op(BW):
 62 |     s = [[0,1,0],[1,1,1],[0,1,0]]#structuring element (diamond shaped)
 63 |     m_morfo = ndi.binary_opening(BW,structure=s,iterations=1)
 64 |     m_morfo = ndi.binary_closing(m_morfo,structure=s,iterations=1)
 65 |     M_filled = ndi.binary_fill_holes(m_morfo,structure=s)
 66 |     return M_filled
 67 | 
 68 | def main(_):
 69 |     # specify GPU
 70 |     if FLAGS.gpu_index:
 71 |         os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
 72 |         os.environ["CUDA_VISIBLE_DEVICES"] = FLAGS.gpu_index
 73 | 
 74 |     tf.logging.set_verbosity(tf.logging.INFO)
 75 | 
 76 |     # TensorFlow session: grow memory when needed. TF, DO NOT USE ALL MY GPU MEMORY!!!
 77 |     gpu_options = tf.GPUOptions(allow_growth=True)
 78 |     config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options)
 79 |     sess = tf.InteractiveSession(config=config)
 80 | 
 81 |     X = tf.placeholder(tf.float32, shape=[None, FLAGS.img_size, FLAGS.img_size, 3], name="X")
 82 |     mode = tf.placeholder(tf.bool, name="mode")  # training or not
 83 | 
 84 |     if FLAGS.use_64_channel:
 85 |         pred = Unet_64_1024(X, mode, FLAGS)
 86 |     else:
 87 |         pred = Unet_32_512(X, mode, FLAGS)
 88 |     # evaluation = tf.argmax(logits, 1)
 89 | 
 90 |     sess.run(tf.global_variables_initializer())
 91 | 
 92 | 
 93 |     ##################################################
 94 |     # Restore variables from training checkpoints.
 95 |     ##################################################
 96 |     saver = tf.train.Saver()
 97 | 
 98 |     checkpoint_path = None
 99 |     if FLAGS.checkpoint_dir and FLAGS.checkpoint_file:
100 |         checkpoint_path = FLAGS.checkpoint_dir+'/'+FLAGS.checkpoint_file
101 |     else:
102 |         ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
103 |         if ckpt and ckpt.model_checkpoint_path:
104 |             checkpoint_path = ckpt.model_checkpoint_path
105 | 
106 |     if checkpoint_path:
107 |         saver.restore(sess, checkpoint_path)
108 |         global_step = checkpoint_path.split('/')[-1].split('-')[-1]
109 |         print('Successfully loaded model from %s at step=%s.' % (
110 |             checkpoint_path, global_step))
111 |     else:
112 |         print('No checkpoint file found at %s' % FLAGS.checkpoint_dir)
113 |         return
114 | 
115 | 
116 |     ############################
117 |     # Get data
118 |     ############################
119 |     raw = Data(FLAGS.data_dir)
120 |     test_data = DataLoader(raw.get_data(), FLAGS.img_size, FLAGS.batch_size)
121 | 
122 |     iterator = tf.data.Iterator.from_structure(test_data.dataset.output_types,
123 |                                                test_data.dataset.output_shapes)
124 |     next_batch = iterator.get_next()
125 | 
126 |     # Ops for initializing the two different iterators
127 |     test_init_op = iterator.make_initializer(test_data.dataset)
128 | 
129 |     test_batches_per_epoch = int(test_data.data_size / FLAGS.batch_size)
130 |     if test_data.data_size % FLAGS.batch_size > 0:
131 |         test_batches_per_epoch += 1
132 | 
133 | 
134 |     ##################################################
135 |     # prepare
136 |     ##################################################
137 | 
138 |     # Create result_dir
139 |     if not os.path.exists(FLAGS.result_dir):
140 |         os.makedirs(FLAGS.result_dir)
141 | 
142 |     # Delete existed submission file
143 |     filename = os.path.join(FLAGS.result_dir, 'submission-nucleus_stage2-' + global_step + '.csv')
144 |     if os.path.exists(filename):
145 |         os.remove(filename)
146 | 
147 | 
148 |     ##################################################
149 |     # start test & make csv file.
150 |     ##################################################
151 | 
152 |     start_time = datetime.datetime.now()
153 |     print("start test: {}".format(start_time))
154 | 
155 |     total_process_count = test_data.data_size
156 |     process_count = 0
157 | 
158 |     # Initialize iterator with the test dataset
159 |     sess.run(test_init_op)
160 |     for i in range(test_batches_per_epoch):
161 | 
162 |         batch_xs, fnames = sess.run(next_batch)
163 |         prediction = sess.run(pred,
164 |                               feed_dict={
165 |                                   X: batch_xs,
166 |                                   mode: False}
167 |                               )
168 | 
169 |         # Transform propabilities into binary values 0 or 1.
170 |         test_pred = trsf_proba_to_binary(prediction)
171 | 
172 |         for i in range(len(test_pred)):
173 |             imageId = fnames[i].decode()
174 |             height, width = get_image_size(imageId)
175 | 
176 |             # Resize predicted masks to original image size.
177 |             res_mask = trsf_proba_to_binary(
178 |                 resize(np.squeeze(test_pred[i]), (height, width), mode='constant', preserve_range=True)
179 |             )
180 |             #
181 |             # fill the holes that remained
182 |             res_mask_f = morpho_op(res_mask)
183 |             # Rescale to 0-255 and convert to uint8
184 |             res_mask_f = (255.0 * res_mask_f).astype(np.uint8)
185 |             res_mask |= res_mask_f
186 |             #
187 |             test_pred_to_original_size = np.array(res_mask)
188 | 
189 |             # Run length encoding of predicted test masks.
190 |             test_pred_rle = []
191 |             test_pred_ids = []
192 | 
193 |             # calculate the minimum object size
194 |             min_object_size = 20 * height * width / (FLAGS.img_size * FLAGS.img_size)
195 | 
196 |             # rle
197 |             rle = list(mask_to_rle(test_pred_to_original_size))
198 | 
199 |             if(len(rle) == 0):
200 |                 print('Number of predicted masks is ', len(rle))
201 |                 test_pred_rle.extend([[1, 1]])
202 |                 test_pred_ids.extend([imageId] * 1)
203 | 
204 |             else:
205 |                 test_pred_rle.extend(rle)
206 |                 test_pred_ids.extend([imageId] * len(rle))
207 | 
208 |             sub = pd.DataFrame()
209 |             sub['ImageId'] = test_pred_ids
210 |             sub['EncodedPixels'] = pd.Series(test_pred_rle).apply(lambda x: ' '.join(str(y) for y in x))
211 | 
212 |             if not os.path.isfile(filename):
213 |                 sub.to_csv(filename, index=False)
214 |             else:
215 |                 sub.to_csv(filename, index=False, header=False, mode='a')
216 | 
217 |             process_count += 1
218 |             print('evaluation... %d / %d' % (process_count, total_process_count))
219 | 
220 |     # add bulk data for invalid image
221 |     sub = pd.DataFrame()
222 |     sub['ImageId'] = ['5390acefd575cf9b33413ddf6cbb9ce137ae07dc04616ba24c7b5fe476c827d2']
223 |     sub['EncodedPixels'] = pd.Series([[1, 1]]).apply(lambda x: ' '.join(str(y) for y in x))
224 |     sub.to_csv(filename, index=False, header=False, mode='a')
225 | 
226 |     end_time = datetime.datetime.now()
227 |     print('end test: {}'.format(test_data.data_size, end_time))
228 |     print('test waste time: {}'.format(end_time - start_time))
229 | 
230 | 
231 | if __name__ == '__main__':
232 |     parser = argparse.ArgumentParser()
233 |     parser.add_argument(
234 |         '--data_dir',
235 |         # default='/home/ace19/dl-data/nucleus_detection/stage1_train',
236 |         # default='../../dl_data/nucleus/stage1_test',
237 |         default='../../dl_data/nucleus/stage2_test_final',
238 |         type=str,
239 |         help="Data directory")
240 | 
241 |     parser.add_argument(
242 |         '--batch_size',
243 |         default=4,
244 |         type=int,
245 |         help="Batch size")
246 | 
247 |     parser.add_argument(
248 |         '--checkpoint_dir',
249 |         type=str,
250 |         default=os.getcwd() + '/models',
251 |         help='Directory to read checkpoint.')
252 | 
253 |     parser.add_argument(
254 |         '--checkpoint_file',
255 |         type=str,
256 |         # default='unet.ckpt-20',
257 |         default=None,
258 |         help='checkpoint file name.')
259 | 
260 |     parser.add_argument(
261 |         '--result_dir',
262 |         type=str,
263 |         default=os.getcwd() + '/result',
264 |         help='Directory to write submission.csv file.')
265 | 
266 |     parser.add_argument(
267 |         '--img_size',
268 |         type=int,
269 |         default=512,
270 |         # default=256,
271 |         help="Image height and width")
272 | 
273 |     parser.add_argument(
274 |         '--gpu_index',
275 |         type=str,
276 |         # default='0',
277 |         default=None,
278 |         help="Set the gpu index. If you not sepcify then auto")
279 | 
280 |     parser.add_argument(
281 |         '--use_64_channel',
282 |         type=bool,
283 |         default=True,
284 |         # default=False,
285 |         help="If you set True then use the Unet_64_1024. otherwise use the Unet_32_512")
286 | 
287 |     parser.add_argument(
288 |         '--conv_padding',
289 |         type=str,
290 |         default='same',
291 |         # default='valid',
292 |         help="conv padding. if your img_size is 572 and, conv_padding is valid then the label_size is 388")
293 | 
294 |     FLAGS, unparsed = parser.parse_known_args()
295 |     tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
296 | 


--------------------------------------------------------------------------------
/input_data.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import hashlib
  6 | import os.path
  7 | import random
  8 | 
  9 | import numpy as np
 10 | from skimage import color, io
 11 | from six.moves import xrange  # pylint: disable=redefined-builtin
 12 | import tensorflow as tf
 13 | 
 14 | from tensorflow.python.platform import gfile
 15 | from tensorflow.python.util import compat
 16 | 
 17 | from tensorflow.python.framework import dtypes
 18 | from tensorflow.python.framework.ops import convert_to_tensor
 19 | 
 20 | 
 21 | from utils.oper_utils2 import trsf_proba_to_binary, \
 22 |     imgs_to_grayscale, invert_imgs
 23 | 
 24 | 
 25 | MAX_NUM_WAVS_PER_CLASS = 2**27 - 1  # ~134M
 26 | RANDOM_SEED = 888
 27 | 
 28 | 
 29 | def which_set(filename, validation_percentage):
 30 |     """Determines which data partition the file should belong to.
 31 | 
 32 |     Args:
 33 |       filename: File path of the data sample.
 34 |       validation_percentage: How much of the data set to use for validation.
 35 |     Returns:
 36 |       String, one of 'training', 'validation'.
 37 |     """
 38 |     base_name = os.path.basename(filename)
 39 |     hash_name_hashed = hashlib.sha1(compat.as_bytes(base_name)).hexdigest()
 40 |     percentage_hash = ((int(hash_name_hashed, 16) % (MAX_NUM_WAVS_PER_CLASS + 1)) *
 41 |                        (100.0 / MAX_NUM_WAVS_PER_CLASS))
 42 |     if percentage_hash < validation_percentage:
 43 |         result = 'validation'
 44 |     else:
 45 |         result = 'training'
 46 |     return result
 47 | 
 48 | 
 49 | class Data(object):
 50 |     def __init__(self, data_dir, validation_percentage):
 51 |         self.data_dir = data_dir
 52 |         self._prepare_data_index(validation_percentage)
 53 | 
 54 | 
 55 |     def get_data(self, mode):
 56 |         return self.data_index[mode]
 57 | 
 58 | 
 59 |     def get_size(self, mode):
 60 |         """Calculates the number of samples in the _dataset partition.
 61 |         Args:
 62 |           mode: Which partition, must be 'training', 'validation', or 'testing'.
 63 |         Returns:
 64 |           Number of samples in the partition.
 65 |         """
 66 |         return len(self.data_index[mode])
 67 | 
 68 | 
 69 |     def _prepare_data_index(self, validation_percentage):
 70 |         # Make sure the shuffling and picking of unknowns is deterministic.
 71 |         random.seed(RANDOM_SEED)
 72 |         self.data_index = {'validation': [], 'training': []}
 73 |         data_paths = os.listdir(self.data_dir)
 74 |         for img_path in data_paths:
 75 |             set_index = which_set(img_path, validation_percentage)
 76 |             self.data_index[set_index].append({'image': img_path})
 77 | 
 78 |         # Make sure the ordering is random.
 79 |         for set_index in ['validation', 'training']:
 80 |             random.shuffle(self.data_index[set_index])
 81 | 
 82 | 
 83 | class DataLoader(object):
 84 | 
 85 |     def __init__(self, data_dir, data, img_size, label_size, batch_size, shuffle=True):
 86 | 
 87 |         self.data_size = len(data)
 88 |         images, labels = self._get_data(data_dir, data)
 89 |         self.img_size = img_size
 90 |         self.label_size = label_size
 91 | 
 92 |         # create _dataset, Creating a source
 93 |         dataset = tf.data.Dataset.from_tensor_slices((images, labels))
 94 | 
 95 |         # shuffle the first `buffer_size` elements of the _dataset
 96 |         #  Make sure to call tf.data.Dataset.shuffle() before applying the heavy transformations
 97 |         # (like reading the images, processing them, batching...).
 98 |         if shuffle:
 99 |             dataset = dataset.shuffle(buffer_size= 100 * batch_size)
100 | 
101 |         # distinguish between train/infer. when calling the parsing functions
102 |         # transform to images, preprocess, repeat, batch...
103 |         dataset = dataset.map(self._parse_function, num_parallel_calls=8)
104 | 
105 |         dataset = dataset.prefetch(buffer_size = 10 * batch_size)
106 | 
107 |         # create a new _dataset with batches of images
108 |         dataset = dataset.batch(batch_size)
109 | 
110 |         self.dataset = dataset
111 | 
112 | 
113 |     def _get_data(self, data_dir, data):
114 |         image_paths = np.array(data)
115 |         mask_paths = np.array(data)
116 | 
117 |         for idx, image_path in enumerate(image_paths):
118 |             img_dir = os.path.join(data_dir, image_path['image'], 'images')
119 |             mask_dir = os.path.join(data_dir, image_path['image'], 'gt_mask')
120 | 
121 |             img = os.listdir(img_dir)
122 |             mask = os.listdir(mask_dir)
123 | 
124 |             image_paths[idx] = os.path.join(img_dir, img[0])
125 |             mask_paths[idx] = os.path.join(mask_dir, mask[0])
126 | 
127 |         # convert lists to TF tensor
128 |         image_paths = convert_to_tensor(image_paths, dtype=dtypes.string)
129 |         mask_paths = convert_to_tensor(mask_paths, dtype=dtypes.string)
130 | 
131 |         return image_paths, mask_paths
132 | 
133 | 
134 |     def _parse_function(self, image_file, label_file):
135 |         image_string = tf.read_file(image_file)
136 |         image_decoded = tf.image.decode_png(image_string, channels=3)
137 |         image_resized = tf.image.resize_images(image_decoded,
138 |                                                [self.img_size, self.img_size],
139 |                                                method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
140 |         image = tf.image.convert_image_dtype(image_resized, dtype=tf.float32)
141 |         # Finally, rescale to [-1,1] instead of [0, 1)
142 |         # image = tf.subtract(image, 0.5)
143 |         # image = tf.multiply(image, 2.0)
144 |         # image = tf.image.rgb_to_grayscale(image)
145 | 
146 | 
147 |         label_string = tf.read_file(label_file)
148 |         label_decoded = tf.image.decode_png(label_string, channels=1)
149 |         label_resized = tf.image.resize_images(label_decoded,
150 |                                                [self.label_size, self.label_size],
151 |                                                method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
152 |         label = tf.image.convert_image_dtype(label_resized, dtype=tf.float32)
153 |         # Finally, rescale to [-1,1] instead of [0, 1)
154 |         # label = tf.subtract(label, 0.5)
155 |         # label = tf.multiply(label, 2.0)
156 | 
157 |         return image, label


--------------------------------------------------------------------------------
/input_pred_data.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | from __future__ import absolute_import
 16 | from __future__ import division
 17 | from __future__ import print_function
 18 | 
 19 | import os.path
 20 | import random
 21 | 
 22 | import numpy as np
 23 | import tensorflow as tf
 24 | 
 25 | from tensorflow.python.platform import gfile
 26 | from tensorflow.python.framework import dtypes
 27 | from tensorflow.python.framework.ops import convert_to_tensor
 28 | 
 29 | 
 30 | RANDOM_SEED = 888
 31 | 
 32 | HEIGHT = 256
 33 | WIDTH = 256
 34 | 
 35 | 
 36 | class Data(object):
 37 |     def __init__(self, data_dir):
 38 |         self.data_dir = data_dir
 39 |         self._prepare_data_index()
 40 | 
 41 | 
 42 |     def get_data(self):
 43 |         return self.data_index['prediction']
 44 | 
 45 | 
 46 |     def get_size(self):
 47 |         return len(self.data_index['prediction'])
 48 | 
 49 | 
 50 |     def _prepare_data_index(self):
 51 |         random.seed(RANDOM_SEED)
 52 | 
 53 |         self.data_index = {'prediction': []}
 54 |         # Look through all the subfolders to find audio samples
 55 |         search_path = os.path.join(self.data_dir, '*')
 56 |         for image_path in gfile.Glob(search_path):
 57 |             img = os.path.join(image_path, 'images', os.path.basename(image_path)) + '.png'
 58 |             # img = os.path.join(image_path, 'water', os.path.basename(image_path)) + '.png'
 59 |             self.data_index['prediction'].append({'image': img})
 60 | 
 61 | 
 62 | class DataLoader(object):
 63 |     """
 64 |     Wrapper class around the new Tensorflows _dataset pipeline.
 65 | 
 66 |     Handles loading, partitioning, and preparing training data.
 67 |     """
 68 | 
 69 |     def __init__(self, data, img_size, batch_size):
 70 |         self.data_info = {}
 71 | 
 72 |         self.data_size = len(data)
 73 |         images_path, images_name = self._get_data(data)
 74 |         self.img_size = img_size
 75 | 
 76 |         # create _dataset, Creating a source
 77 |         dataset = tf.data.Dataset.from_tensor_slices((images_path, images_name))
 78 | 
 79 |         # distinguish between train/infer. when calling the parsing functions
 80 |         # transform to images, preprocess, repeat, batch...
 81 |         dataset = dataset.map(self._parse_function, num_parallel_calls=8)
 82 | 
 83 |         dataset = dataset.prefetch(buffer_size = 10 * batch_size)
 84 | 
 85 |         # create a new _dataset with batches of images
 86 |         dataset = dataset.batch(batch_size)
 87 | 
 88 |         self.dataset = dataset
 89 | 
 90 | 
 91 |     def _get_data(self, data):
 92 |         # Data will be populated and returned.
 93 |         image_paths = np.array(data)
 94 |         image_names = np.array(data)
 95 | 
 96 |         for idx, image_path in enumerate(image_paths):
 97 |             image_paths[idx] = image_path['image']
 98 |             image_names[idx] = os.path.basename(image_path['image'])[:-4]
 99 | 
100 |         image_paths.sort()
101 |         image_names.sort()
102 | 
103 |         # convert lists to TF tensor
104 |         image_paths = convert_to_tensor(image_paths, dtype=dtypes.string)
105 |         image_names = convert_to_tensor(image_names, dtype=dtypes.string)
106 | 
107 |         return image_paths, image_names
108 | 
109 | 
110 |     def _parse_function(self, image_path, image_name):
111 |         image_string = tf.read_file(image_path)
112 |         image_decoded = tf.image.decode_png(image_string, channels=3)
113 |         image_resized = tf.image.resize_images(image_decoded,
114 |                                                [self.img_size, self.img_size],
115 |                                                method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
116 |         # image = tf.cast(image_decoded, tf.float32)
117 |         image = tf.image.convert_image_dtype(image_resized, dtype=tf.float32)
118 |         # Finally, rescale to [-1,1] instead of [0, 1)
119 |         # image = tf.subtract(image, 0.5)
120 |         # image = tf.multiply(image, 2.0)
121 |         # image = tf.image.rgb_to_grayscale(image)
122 | 
123 |         return image, image_name
124 | 
125 | 
126 | 


--------------------------------------------------------------------------------
/make_ground_truth.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | import numpy as np
  4 | import tensorflow as tf
  5 | 
  6 | import argparse
  7 | import os
  8 | import sys
  9 | 
 10 | from PIL import Image
 11 | from skimage.io import imread, imshow
 12 | 
 13 | from utils.image_utils import read_image
 14 | 
 15 | import cv2
 16 | import matplotlib.pyplot as plt
 17 | 
 18 | import scipy.ndimage as ndi
 19 | 
 20 | # def get_image_size(data):
 21 | #     image_path = os.path.join(FLAGS.dataset_dir, data, 'images')
 22 | #     image = os.listdir(image_path)
 23 | #     img = Image.open(os.path.join(image_path, image[0]))
 24 | #
 25 | #     return img.height, img.width
 26 | 
 27 | def get_contour(img):
 28 |     # '''
 29 |     img_contour = np.zeros_like(img).astype(np.uint8)
 30 |     # http://opencv-python.readthedocs.io/en/latest/doc/15.imageContours/imageContours.html
 31 |     contours, hierarchy = cv2.findContours(img.astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
 32 |     cv2.drawContours(img_contour, contours, -1, (255, 255, 255), 1)
 33 |     return img_contour
 34 |     # '''
 35 |     '''
 36 |     # https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.morphology.binary_erosion.html
 37 |     _img_contour = (img / 255.0).astype(np.uint8)
 38 |     erosion = ndi.morphology.binary_erosion(_img_contour, border_value=1).astype(_img_contour.dtype)
 39 |     img_contour = _img_contour - erosion
 40 |     img_contour = (255.0 * img_contour).astype(np.uint8)
 41 |     return img_contour
 42 |     '''
 43 | 
 44 | 
 45 | def morpho_op(BW):
 46 |     s = [[0,1,0],[1,1,1],[0,1,0]]#structuring element (diamond shaped)
 47 |     m_morfo = ndi.binary_opening(BW,structure=s,iterations=1)
 48 |     m_morfo = ndi.binary_closing(m_morfo,structure=s,iterations=1)
 49 |     M_filled = ndi.binary_fill_holes(m_morfo,structure=s)
 50 |     return M_filled
 51 | 
 52 | def main(_):
 53 | 
 54 |     filelist = sorted(os.listdir(FLAGS.dataset_dir))
 55 |     filecount = 1
 56 |     for data in filelist:
 57 |         image_path = os.path.join(FLAGS.dataset_dir, data, 'images', data + '.png')
 58 |         img_shape = read_image(image_path).shape
 59 | 
 60 |         mask_path = os.path.join(FLAGS.dataset_dir, data, 'masks')
 61 |         mask_images = sorted(os.listdir(mask_path))
 62 |         mask = np.zeros((img_shape[0], img_shape[1], 1), dtype=np.bool)
 63 |         if FLAGS.use_contour:
 64 |             contour = np.zeros((img_shape[0], img_shape[1], 1), dtype=np.bool)
 65 |         for mask_file in mask_images:
 66 |             _mask = imread(os.path.join(mask_path, mask_file))
 67 |             #
 68 |             # fill the holes that remained
 69 |             _mask_f = morpho_op(_mask)
 70 |             # Rescale to 0-255 and convert to uint8
 71 |             _mask_f = (255.0 * _mask_f).astype(np.uint8)
 72 |             _mask |= _mask_f
 73 |             # imshow(np.squeeze(_mask2))
 74 |             # plt.show()
 75 |             #
 76 |             _mask = np.expand_dims(_mask, axis=-1)
 77 |             mask = np.maximum(mask, _mask)
 78 |             #
 79 |             if FLAGS.use_contour:
 80 |                 _contour = get_contour(_mask)
 81 |                 contour = np.maximum(contour, _contour)
 82 |                 # imshow(np.squeeze(_contour))
 83 |                 # plt.show()
 84 | 
 85 |         gt_path = os.path.join(FLAGS.ground_truth_dir, data, FLAGS.ground_truth_folder)
 86 |         if not os.path.exists(gt_path):
 87 |             os.makedirs(gt_path)
 88 | 
 89 |         print(">> (%d / %d) %s" % (filecount, filelist.__len__(), data))
 90 |         filecount += 1
 91 | 
 92 |         # imshow(np.squeeze(contour))
 93 |         # plt.show()
 94 | 
 95 |         contour_of_mask = get_contour(mask)
 96 |         # imshow(np.squeeze(contour_of_mask))
 97 |         # plt.show()
 98 | 
 99 |         contour_final = contour - contour_of_mask
100 |         # imshow(np.squeeze(contour_final))
101 |         # plt.show()
102 | 
103 |         #imshow(np.squeeze(mask))
104 |         #plt.show()
105 | 
106 |         #mask2 = mask - contour
107 |         #imshow(np.squeeze(mask2))
108 |         #plt.show()
109 | 
110 |         if FLAGS.use_contour:
111 |             mask = mask - contour_final
112 | 
113 |         mask = np.squeeze(mask)
114 |         # imshow(mask)
115 |         # plt.show()
116 |         img = Image.fromarray(mask)
117 |         img.save(os.path.join(gt_path, data + '.png'))
118 |         # img.show(title=X)
119 | 
120 | 
121 | 
122 | if __name__ == '__main__':
123 |     parser = argparse.ArgumentParser()
124 |     parser.add_argument(
125 |         '--dataset_dir',
126 |         default='../../dl_data/nucleus/stage1_train',
127 |         type=str,
128 |         help="Data directory")
129 | 
130 |     parser.add_argument(
131 |         '--ground_truth_dir',
132 |         default='../../dl_data/nucleus/stage1_train',
133 |         type=str,
134 |         help="ground_truth data directory")
135 | 
136 |     parser.add_argument(
137 |         '--ground_truth_folder',
138 |         default='gt_mask',
139 |         type=str,
140 |         help="ground_truth folder")
141 | 
142 |     parser.add_argument(
143 |         '--use_contour',
144 |         default=True,
145 |         type=bool,
146 |         help="use contour")
147 | 
148 |     FLAGS, unparsed = parser.parse_known_args()
149 |     tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
150 | 
151 | 
152 | 


--------------------------------------------------------------------------------
/metrics.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn.metrics.pairwise import pairwise_distances
 3 | from tqdm import tqdm
 4 | 
 5 | from utils import decompose
 6 | 
 7 | 
 8 | def iou(gt, pred):
 9 |     gt[gt > 0] = 1.
10 |     pred[pred > 0] = 1.
11 |     intersection = gt * pred
12 |     union = gt + pred
13 |     union[union > 0] = 1.
14 |     intersection = np.sum(intersection)
15 |     union = np.sum(union)
16 |     if union == 0:
17 |         union = 1e-09
18 |     return intersection / union
19 | 
20 | 
21 | def compute_ious(gt, predictions):
22 |     gt_ = decompose(gt)
23 |     predictions_ = decompose(predictions)
24 |     gt_ = np.asarray([el.flatten() for el in gt_])
25 |     predictions_ = np.asarray([el.flatten() for el in predictions_])
26 |     ious = pairwise_distances(X=gt_, Y=predictions_, metric=iou)
27 |     return ious
28 | 
29 | 
30 | def compute_precision_at(ious, threshold):
31 |     mx1 = np.max(ious, axis=0)
32 |     mx2 = np.max(ious, axis=1)
33 |     tp = np.sum(mx2 >= threshold)
34 |     fp = np.sum(mx2 < threshold)
35 |     fn = np.sum(mx1 < threshold)
36 |     return float(tp) / (tp + fp + fn)
37 | 
38 | 
39 | def compute_eval_metric(gt, predictions):
40 |     thresholds = [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95]
41 |     ious = compute_ious(gt, predictions)
42 |     precisions = [compute_precision_at(ious, th) for th in thresholds]
43 |     return sum(precisions) / len(precisions)
44 | 
45 | 
46 | def intersection_over_union(y_true, y_pred):
47 |     ious = []
48 |     for y_t, y_p in tqdm(list(zip(y_true, y_pred))):
49 |         iou = compute_ious(y_t, y_p)
50 |         iou_mean = 1.0 * np.sum(iou) / iou.shape[0]
51 |         ious.append(iou_mean)
52 | 
53 |     return np.mean(ious)
54 | 
55 | 
56 | def intersection_over_union_thresholds(y_true, y_pred):
57 |     iouts = []
58 |     for y_t, y_p in tqdm(list(zip(y_true, y_pred))):
59 |         iouts.append(compute_eval_metric(y_t, y_p))
60 |     return np.mean(iouts)


--------------------------------------------------------------------------------
/nets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ace19-dev/2018-Data-Science-Bowl/104552aa06bcf6faeead1a443e7d5f3b3231bcb1/nets/__init__.py


--------------------------------------------------------------------------------
/nets/unet.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import tensorflow as tf
  3 | import numpy as np
  4 | 
  5 | from keras.layers import Input, merge, Conv2D, MaxPooling2D, UpSampling2D, Dropout, Cropping2D
  6 | 
  7 | 
  8 | L2_REG = 0.1
  9 | 
 10 | def _conv_conv_pool(input_,
 11 |                    n_filters,
 12 |                    training,
 13 |                    flags,
 14 |                    name,
 15 |                    pool=True,
 16 |                    activation=tf.nn.elu):
 17 |     """{Conv -> BN -> RELU}x2 -> {Pool, optional}
 18 | 
 19 |     Args:
 20 |         input_ (4-D Tensor): (batch_size, H, W, C)
 21 |         n_filters (list): number of filters [int, int]
 22 |         training (1-D Tensor): Boolean Tensor
 23 |         name (str): name postfix
 24 |         pool (bool): If True, MaxPool2D
 25 |         activation: Activaion functions
 26 | 
 27 |     Returns:
 28 |         net: output of the Convolution operations
 29 |         pool (optional): output of the max pooling operations
 30 |     """
 31 |     net = input_
 32 | 
 33 |     with tf.variable_scope("layer{}".format(name)):
 34 |         for i, filters in enumerate(n_filters):
 35 |             # calculate the weight value for kernel_initializer
 36 |             N = net.shape[1] * net.shape[2] * net.shape[3]
 37 |             stddev = np.sqrt(2 / N.value)
 38 | 
 39 |             net = tf.layers.conv2d(
 40 |                 net,
 41 |                 filters,
 42 |                 (3, 3),
 43 |                 activation=None,
 44 |                 padding=flags.conv_padding,     # 'valid' or 'same'
 45 |                 kernel_regularizer=tf.contrib.layers.l2_regularizer(L2_REG),
 46 |                 kernel_initializer=tf.truncated_normal_initializer(stddev=stddev, dtype=tf.float32),
 47 |                 name="conv_{}".format(i + 1))
 48 |             net = tf.layers.batch_normalization(
 49 |                 net, training=training, name="bn_{}".format(i + 1))
 50 |             net = activation(net, name="elu{}_{}".format(name, i + 1))
 51 | 
 52 |         if pool is False:
 53 |             return net
 54 | 
 55 |         pool = tf.layers.max_pooling2d(
 56 |             net, (2, 2), strides=(2, 2), name="pool_{}".format(name))
 57 | 
 58 |         return net, pool
 59 | 
 60 | 
 61 | def _get_crop_shape(target, refer):
 62 |     # width, the 3rd dimension
 63 |     cw = (target.get_shape()[2] - refer.get_shape()[2]).value
 64 |     assert (cw >= 0)
 65 |     if cw % 2 != 0:
 66 |         cw1, cw2 = int(cw/2), int(cw/2) + 1
 67 |     else:
 68 |         cw1, cw2 = int(cw/2), int(cw/2)
 69 |     # height, the 2nd dimension
 70 |     ch = (target.get_shape()[1] - refer.get_shape()[1]).value
 71 |     assert (ch >= 0)
 72 |     if ch % 2 != 0:
 73 |         ch1, ch2 = int(ch/2), int(ch/2) + 1
 74 |     else:
 75 |         ch1, ch2 = int(ch/2), int(ch/2)
 76 |     #
 77 |     return (ch1, ch2), (cw1, cw2)
 78 | 
 79 | def _upconv_concat(inputA, input_B, n_filter, flags, name):
 80 |     """Upsample `inputA` and concat with `input_B`
 81 | 
 82 |     Args:
 83 |         input_A (4-D Tensor): (N, H, W, C)
 84 |         input_B (4-D Tensor): (N, 2*H, 2*H, C2)
 85 |         name (str): name of the concat operation
 86 | 
 87 |     Returns:
 88 |         output (4-D Tensor): (N, 2*H, 2*W, C + C2)
 89 |     """
 90 |     up_conv = _upconv_2D(inputA, n_filter, flags, name)
 91 | 
 92 |     # net = tf.concat(
 93 |     #     [up_conv, input_B], axis=-1, name="concat_{}".format(name))
 94 | 
 95 |     ch, cw = _get_crop_shape(input_B, up_conv)
 96 |     crop_conv = Cropping2D(cropping=(ch, cw))(input_B)
 97 |     net = tf.concat([up_conv, crop_conv], axis=-1, name="concat_{}".format(name))
 98 | 
 99 |     return net
100 | 
101 | 
102 | def _upconv_2D(tensor, n_filter, flags, name):
103 |     """Up Convolution `tensor` by 2 times
104 | 
105 |     Args:
106 |         tensor (4-D Tensor): (N, H, W, C)
107 |         n_filter (int): Filter Size
108 |         name (str): name of upsampling operations
109 | 
110 |     Returns:
111 |         output (4-D Tensor): (N, 2 * H, 2 * W, C)
112 |     """
113 | 
114 |     N = tensor.shape[1] * tensor.shape[2] * tensor.shape[3]
115 |     stddev = np.sqrt(2 / N.value)
116 |     return tf.layers.conv2d_transpose(
117 |         tensor,
118 |         filters=n_filter,
119 |         kernel_size=2,
120 |         strides=2,
121 |         kernel_regularizer=tf.contrib.layers.l2_regularizer(L2_REG),
122 |         # kernel_initializer=tf.truncated_normal_initializer(stddev=stddev, dtype=tf.float32),
123 |         name="upsample_{}".format(name))
124 | 
125 | 
126 | def Unet_64_1024(X, training, flags=None):  # 512 x 512
127 |     """Build a U-Net architecture
128 | 
129 |     Args:
130 |         X (4-D Tensor): (N, H, W, C)
131 |         training (1-D Tensor): Boolean Tensor is required for batchnormalization layers
132 | 
133 |     Returns:
134 |         output (4-D Tensor): (N, H, W, C) - 512 x 512
135 |             Same shape as the `input` tensor
136 | 
137 |     Notes:
138 |         U-Net: Convolutional Networks for Biomedical Image Segmentation
139 |         https://arxiv.org/abs/1505.04597
140 |     """
141 |     # net = X / 127.5 - 1
142 |     conv1, pool1 = _conv_conv_pool(X, [64, 64], training, flags, name=1)
143 |     conv2, pool2 = _conv_conv_pool(pool1, [128, 128], training, flags, name=2)
144 |     conv3, pool3 = _conv_conv_pool(pool2, [256, 256], training, flags, name=3)
145 |     conv4, pool4 = _conv_conv_pool(pool3, [512, 512], training, flags, name=4)
146 |     conv5 = _conv_conv_pool(pool4, [1024, 1024], training, flags, name=5, pool=False)
147 | 
148 |     up6 = _upconv_concat(conv5, conv4, 512, flags, name=6)
149 |     conv6 = _conv_conv_pool(up6, [512, 512], training, flags, name=6, pool=False)
150 | 
151 |     up7 = _upconv_concat(conv6, conv3, 256, flags, name=7)
152 |     conv7 = _conv_conv_pool(up7, [256, 256], training, flags, name=7, pool=False)
153 | 
154 |     up8 = _upconv_concat(conv7, conv2, 128, flags, name=8)
155 |     conv8 = _conv_conv_pool(up8, [128, 128], training, flags, name=8, pool=False)
156 | 
157 |     up9 = _upconv_concat(conv8, conv1, 64, flags, name=9)
158 |     conv9 = _conv_conv_pool(up9, [64, 64], training, flags, name=9, pool=False)
159 | 
160 |     return tf.layers.conv2d(
161 |         conv9,
162 |         1, (1, 1),
163 |         name='final',
164 |         activation=tf.nn.sigmoid,
165 |         padding='same')
166 | 
167 | 
168 | def Unet_32_512(X, training, flags=None): # 256 x 256
169 |     """Build a U-Net architecture
170 | 
171 |     Args:
172 |         X (4-D Tensor): (N, H, W, C) - 256 x 256
173 |         training (1-D Tensor): Boolean Tensor is required for batchnormalization layers
174 | 
175 |     Returns:
176 |         output (4-D Tensor): (N, H, W, C)
177 |             Same shape as the `input` tensor
178 | 
179 |     Notes:
180 |         U-Net: Convolutional Networks for Biomedical Image Segmentation
181 |         https://arxiv.org/abs/1505.04597
182 |     """
183 |     # net = X / 127.5 - 1
184 |     conv1, pool1 = _conv_conv_pool(X, [32, 32], training, flags, name=1)
185 |     conv2, pool2 = _conv_conv_pool(pool1, [64, 64], training, flags, name=2)
186 |     conv3, pool3 = _conv_conv_pool(pool2, [128, 128], training, flags, name=3)
187 |     conv4, pool4 = _conv_conv_pool(pool3, [256, 256], training, flags, name=4)
188 |     conv5 = _conv_conv_pool(pool4, [512, 512], training, flags, name=5, pool=False)
189 | 
190 |     up6 = _upconv_concat(conv5, conv4, 256, flags, name=6)
191 |     conv6 = _conv_conv_pool(up6, [256, 256], training, flags, name=6, pool=False)
192 | 
193 |     up7 = _upconv_concat(conv6, conv3, 128, flags, name=7)
194 |     conv7 = _conv_conv_pool(up7, [128, 128], training, flags, name=7, pool=False)
195 | 
196 |     up8 = _upconv_concat(conv7, conv2, 64, flags, name=8)
197 |     conv8 = _conv_conv_pool(up8, [64, 64], training, flags, name=8, pool=False)
198 | 
199 |     up9 = _upconv_concat(conv8, conv1, 32, flags, name=9)
200 |     conv9 = _conv_conv_pool(up9, [32, 32], training, flags, name=9, pool=False)
201 | 
202 |     return tf.layers.conv2d(
203 |         conv9,
204 |         1, (1, 1),
205 |         name='final',
206 |         activation=tf.nn.sigmoid,
207 |         padding='same')
208 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Simple U-Net implementation in TensorFlow
  3 | 
  4 | Objective: detect vehicles
  5 | 
  6 | y = f(X)
  7 | 
  8 | X: image (640, 960, 3)
  9 | y: mask (640, 960, 1)
 10 |    - binary image
 11 |    - background is masked 0
 12 |    - vehicle is masked 255
 13 | 
 14 | Loss function: maximize IOU
 15 | 
 16 |     (intersection of prediction & grount truth)
 17 |     -------------------------------
 18 |     (union of prediction & ground truth)
 19 | 
 20 | Notes:
 21 |     In the paper, the pixel-wise softmax was used.
 22 |     But, I used the IOU because the datasets I used are
 23 |     not labeled for segmentations
 24 | 
 25 | Original Paper:
 26 |     https://arxiv.org/abs/1505.04597
 27 | """
 28 | import argparse
 29 | import sys
 30 | import os
 31 | import datetime
 32 | 
 33 | import tensorflow as tf
 34 | import numpy as np
 35 | 
 36 | from six.moves import xrange
 37 | 
 38 | import matplotlib.pyplot as plt
 39 | 
 40 | from nets.unet import Unet_32_512, Unet_64_1024
 41 | # from _dataset.dataset_loader import DataLoader
 42 | 
 43 | from input_data import Data
 44 | from input_data import DataLoader
 45 | 
 46 | FLAGS = None
 47 | 
 48 | from utils.checkmate import BestCheckpointSaver
 49 | 
 50 | 
 51 | def IOU(y_pred, y_true):
 52 |     """Returns a (approx) batch_norm_wrapper score
 53 | 
 54 |     intesection = y_pred.flatten() * y_true.flatten()
 55 |     Then, IOU = 2 * intersection / (y_pred.sum() + y_true.sum() + 1e-7) + 1e-7
 56 | 
 57 |     Args:
 58 |         y_pred (4-D array): (N, H, W, 1)
 59 |         y_true (4-D array): (N, H, W, 1)
 60 | 
 61 |     Returns:
 62 |         float: IOU score
 63 |     """
 64 |     H, W, _ = y_pred.get_shape().as_list()[1:]
 65 | 
 66 |     pred_flat = tf.reshape(y_pred, [-1, H * W])
 67 |     true_flat = tf.reshape(y_true, [-1, H * W])
 68 | 
 69 |     intersection = 2 * tf.reduce_sum(pred_flat * true_flat, axis=1) + 1e-7
 70 |     denominator = tf.reduce_sum(pred_flat, axis=1) + \
 71 |                   tf.reduce_sum(true_flat, axis=1) + 1e-7
 72 | 
 73 |     return tf.reduce_mean(intersection / denominator)
 74 | 
 75 |     # smooth = 1.
 76 |     # intersection = 2. * tf.reduce_sum(pred_flat * true_flat, axis=1) + smooth
 77 |     # denominator = tf.reduce_sum(pred_flat, axis=1) + tf.reduce_sum(true_flat, axis=1) + smooth
 78 |     # return tf.reduce_mean(intersection / denominator)
 79 | 
 80 | 
 81 | def get_start_epoch_number(latest_check_point):
 82 |     chck = latest_check_point.split('-')
 83 |     chck.reverse()
 84 |     return int(chck[0]) + 1
 85 | 
 86 | 
 87 | def main(_):
 88 |     # specify GPU
 89 |     if FLAGS.gpu_index:
 90 |         os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
 91 |         os.environ["CUDA_VISIBLE_DEVICES"] = FLAGS.gpu_index
 92 | 
 93 |     # We want to see all the logging messages for this tutorial.
 94 |     tf.logging.set_verbosity(tf.logging.INFO)
 95 | 
 96 |     tf.reset_default_graph()
 97 |     X = tf.placeholder(tf.float32, shape=[None, FLAGS.img_size, FLAGS.img_size, 3], name="X")
 98 |     GT = tf.placeholder(tf.float32, shape=[None, FLAGS.label_size, FLAGS.label_size, 1], name="GT")
 99 |     mode = tf.placeholder(tf.bool, name="mode") # training or not
100 | 
101 |     if FLAGS.use_64_channel:
102 |         pred = Unet_64_1024(X, mode, FLAGS)
103 |     else:
104 |         pred = Unet_32_512(X, mode, FLAGS)
105 | 
106 |     tf.add_to_collection("inputs", X)
107 |     tf.add_to_collection("inputs", mode)
108 |     tf.add_to_collection("outputs", pred)
109 | 
110 |     tf.summary.histogram("Predicted Mask", pred)
111 |     tf.summary.image("Predicted Mask", pred)
112 | 
113 |     # IOU is
114 |     #
115 |     # (the area of intersection)
116 |     # --------------------------
117 |     # (the area of two boxes)
118 |     iou_op = IOU(pred, GT)
119 | 
120 |     loss = -iou_op
121 |     tf.summary.scalar("loss", loss)
122 | 
123 |     # Updates moving mean and moving variance for BatchNorm (train/inference)
124 |     update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
125 |     with tf.control_dependencies(update_ops):
126 |         # other optimizer will be used
127 |         train_op = tf.train.MomentumOptimizer(0.001, 0.99).minimize(loss)
128 | 
129 |     global_step = tf.train.get_or_create_global_step()
130 |     increment_global_step = tf.assign(global_step, global_step + 1)
131 | 
132 |     sess = tf.Session()
133 |     sess.run(tf.global_variables_initializer())
134 | 
135 |     summary_op = tf.summary.merge_all()
136 |     train_summary_writer = tf.summary.FileWriter(FLAGS.logdir + '/train', sess.graph)
137 |     val_summary_writer = tf.summary.FileWriter(FLAGS.logdir + '/validation')
138 | 
139 |     saver = tf.train.Saver()
140 | 
141 |     # For, checkpoint saver
142 |     if FLAGS.best_train_dir:
143 |         best_ckpt_saver = BestCheckpointSaver(title='unet.ckpt', save_dir=FLAGS.best_train_dir, num_to_keep=3, maximize=True)
144 | 
145 |     start_epoch = 1
146 |     epoch_from_ckpt = 0
147 |     if FLAGS.ckpt_path:
148 |         saver.restore(sess, FLAGS.ckpt_path)
149 |         tmp = FLAGS.ckpt_path
150 |         tmp = tmp.split('-')
151 |         tmp.reverse()
152 |         epoch_from_ckpt = int(tmp[0])
153 |         start_epoch = epoch_from_ckpt + 1
154 | 
155 |     if epoch_from_ckpt != FLAGS.epochs + 1:
156 |         tf.logging.info('Training from epoch: %d ', start_epoch)
157 | 
158 |     # Saving as Protocol Buffer (pb)
159 |     tf.train.write_graph(sess.graph_def,
160 |                          FLAGS.train_dir,
161 |                          'unet.pbtxt',
162 |                          as_text=True)
163 | 
164 | 
165 |     ############################
166 |     # Get data
167 |     ############################
168 |     raw = Data(FLAGS.data_dir, FLAGS.validation_percentage)
169 |     tr_data = DataLoader(raw.data_dir,
170 |                          raw.get_data('training'),
171 |                          FLAGS.img_size,
172 |                          FLAGS.label_size,
173 |                          FLAGS.batch_size)
174 |     val_data = DataLoader(raw.data_dir,
175 |                           raw.get_data('validation'),
176 |                           FLAGS.img_size,
177 |                           FLAGS.label_size,
178 |                           FLAGS.batch_size)
179 | 
180 |     iterator = tf.data.Iterator.from_structure(tr_data.dataset.output_types,
181 |                                                tr_data.dataset.output_shapes)
182 |     next_batch = iterator.get_next()
183 | 
184 |     # Ops for initializing the two different iterators
185 |     tr_init_op = iterator.make_initializer(tr_data.dataset)
186 |     val_init_op = iterator.make_initializer(val_data.dataset)
187 | 
188 |     tr_batches_per_epoch = int(tr_data.data_size / FLAGS.batch_size)
189 |     if tr_data.data_size % FLAGS.batch_size > 0:
190 |         tr_batches_per_epoch += 1
191 |     val_batches_per_epoch = int(val_data.data_size / FLAGS.batch_size)
192 |     if val_data.data_size % FLAGS.batch_size > 0:
193 |         val_batches_per_epoch += 1
194 | 
195 | 
196 |     ############################
197 |     # Training
198 |     ############################
199 |     print("{} Training start ... ".format(datetime.datetime.now()))
200 |     for epoch in xrange(start_epoch, FLAGS.epochs + 1):
201 |         print('{} Training epoch-{} start >> '.format(datetime.datetime.now(), epoch))
202 | 
203 |         sess.run(tr_init_op)
204 |         for step in range(tr_batches_per_epoch):
205 |             X_train, y_train = sess.run(next_batch)
206 |             train_summary, accuracy, _, _ = \
207 |                 sess.run([summary_op, iou_op, train_op, increment_global_step],
208 |                          feed_dict={X: X_train,
209 |                                     GT: y_train,
210 |                                     mode: True}
211 |                          )
212 | 
213 |             train_summary_writer.add_summary(train_summary, (epoch-start_epoch)*tr_batches_per_epoch+step)
214 |             tf.logging.info('epoch #%d, step #%d/%d, accuracy(iou) %.5f%%' %
215 |                             (epoch, step, tr_batches_per_epoch, accuracy))
216 | 
217 |         print("{} Validation start ... ".format(datetime.datetime.now()))
218 |         total_val_accuracy = 0
219 |         val_count = 0
220 |         sess.run(val_init_op)
221 |         for n in range(val_batches_per_epoch):
222 |             X_val, y_val = sess.run(next_batch)
223 |             val_summary, val_accuracy = \
224 |                 sess.run([summary_op, iou_op],
225 |                          feed_dict={X: X_val,
226 |                                     GT: y_val,
227 |                                     mode: False}
228 |                          )
229 | 
230 |             # total_val_accuracy += val_step_iou * X_val.shape[0]
231 |             total_val_accuracy += val_accuracy
232 |             val_count += 1
233 | 
234 |             val_summary_writer.add_summary(val_summary, (epoch-start_epoch)*val_batches_per_epoch+n)
235 |             tf.logging.info('step #%d/%d, accuracy(iou) %.5f%%' %
236 |                             (n, val_batches_per_epoch, val_accuracy * 100))
237 | 
238 |         total_val_accuracy /= val_count
239 |         tf.logging.info('step %d: Validation accuracy = %.2f%% (N=%d)' %
240 |                         (epoch, total_val_accuracy * 100, raw.get_size('validation')))
241 | 
242 |         # save checkpoint
243 |         checkpoint_path = os.path.join(FLAGS.train_dir, 'unet.ckpt')
244 |         tf.logging.info('Saving to "%s-%d"', checkpoint_path, epoch)
245 |         saver.save(sess, checkpoint_path, global_step=epoch)
246 | 
247 |         # save best checkpoint
248 |         if FLAGS.best_train_dir:
249 |             best_ckpt_saver.handle(total_val_accuracy, sess, global_step, epoch)
250 | 
251 | 
252 | 
253 | if __name__ == '__main__':
254 |     parser = argparse.ArgumentParser()
255 |     parser.add_argument(
256 |         '--data_dir',
257 |         # default='../../dl_data/nucleus/stage1_train_valid_elas',
258 |         default='../../dl_data/nucleus/stage1_train_aug',
259 |         type=str,
260 |         help="Data directory")
261 | 
262 |     parser.add_argument(
263 |         '--validation_percentage',
264 |         type=int,
265 |         default=10,
266 |         help='What percentage of wavs to use as a validation set.')
267 | 
268 |     parser.add_argument(
269 |         '--logdir',
270 |         type=str,
271 |         default=os.getcwd() + '/models/retrain_logs',
272 |         help="Tensorboard log directory")
273 | 
274 |     parser.add_argument(
275 |         '--train_dir',
276 |         type=str,
277 |         default=os.getcwd() + '/models',
278 |         help='Directory to write event logs and checkpoint.')
279 | 
280 |     parser.add_argument(
281 |         '--best_train_dir',
282 |         type=str,
283 |         # default=os.getcwd() + '/models_best',
284 |         default=None,
285 |         help="Directory to write best checkpoint.")
286 | 
287 |     # parser.add_argument(
288 |     #     '--reg',
289 |     #     type=float,
290 |     #     default=0.1,
291 |     #     help="L2 Regularizer Term")
292 | 
293 |     parser.add_argument(
294 |         '--ckpt_path',
295 |         type=str,
296 |         # default=os.getcwd() + '/models/unet.ckpt-20',
297 |         default='',
298 |         help="Checkpoint directory")
299 | 
300 |     parser.add_argument(
301 |         '--epochs',
302 |         type=int,
303 |         default=50,
304 |         help='Number of epochs')
305 | 
306 |     parser.add_argument(
307 |         '--batch_size',
308 |         default=2,
309 |         type=int,
310 |         help="Batch size")
311 | 
312 |     parser.add_argument(
313 |         '--img_size',
314 |         type=int,
315 |         # default=256,
316 |         default=512,
317 |         # default=572,
318 |         help="Image height and width")
319 | 
320 |     parser.add_argument(
321 |         '--label_size',
322 |         type=int,
323 |         # default=256,
324 |         default=512,
325 |         # default=388,
326 |         help="Label height and width")
327 | 
328 |     parser.add_argument(
329 |         '--conv_padding',
330 |         type=str,
331 |         default='same',
332 |         # default='valid',
333 |         help="conv padding. if your img_size is 572 and, conv_padding is valid then the label_size is 388")
334 | 
335 |     parser.add_argument(
336 |         '--use_64_channel',
337 |         type=bool,
338 |         default=True,
339 |         # default=False,
340 |         help="If you set True then use the Unet_64_1024. otherwise use the Unet_32_512")
341 | 
342 |     parser.add_argument(
343 |         '--gpu_index',
344 |         type=str,
345 |         # default=None,
346 |         default='0',
347 |         # default='1',
348 |         help="Set the gpu index. If you not sepcify then auto")
349 | 
350 |     FLAGS, unparsed = parser.parse_known_args()
351 |     tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
352 | 


--------------------------------------------------------------------------------
/utils/checkmate.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import glob
  3 | import json
  4 | import numpy as np
  5 | import tensorflow as tf
  6 | 
  7 | 
  8 | class BestCheckpointSaver(object):
  9 |     """Maintains a directory containing only the best n checkpoints
 10 | 
 11 |     Inside the directory is a best_checkpoints JSON file containing a dictionary
 12 |     mapping of the best checkpoint filepaths to the values by which the checkpoints
 13 |     are compared.  Only the best n checkpoints are contained in the directory and JSON file.
 14 | 
 15 |     This is a light-weight wrapper class only intended to work in simple,
 16 |     non-distributed settings.  It is not intended to work with the tf.Estimator
 17 |     framework.
 18 |     """
 19 |     def __init__(self, title, save_dir, num_to_keep=1, maximize=True, saver=None):
 20 |         """Creates a `BestCheckpointSaver`
 21 | 
 22 |         `BestCheckpointSaver` acts as a wrapper class around a `tf.train.Saver`
 23 | 
 24 |         Args:
 25 |             save_dir: The directory in which the checkpoint files will be saved
 26 |             num_to_keep: The number of best checkpoint files to retain
 27 |             maximize: Define 'best' values to be the highest values.  For example,
 28 |               set this to True if selecting for the checkpoints with the highest
 29 |               given accuracy.  Or set to False to select for checkpoints with the
 30 |               lowest given error rate.
 31 |             saver: A `tf.train.Saver` to use for saving checkpoints.  A default
 32 |               `tf.train.Saver` will be created if none is provided.
 33 |         """
 34 |         self._num_to_keep = num_to_keep
 35 |         self._title = title
 36 |         self._save_dir = save_dir
 37 |         self._save_path = os.path.join(save_dir, title)
 38 |         self._maximize = maximize
 39 |         self._saver = saver if saver else tf.train.Saver(
 40 |             max_to_keep=None,
 41 |             save_relative_paths=True
 42 |         )
 43 | 
 44 |         if not os.path.exists(save_dir):
 45 |             os.makedirs(save_dir)
 46 |         self.best_checkpoints_file = os.path.join(save_dir, 'best_checkpoints')
 47 | 
 48 |     def handle(self, value, sess, global_step_tensor, global_step_value):
 49 |         """Updates the set of best checkpoints based on the given result.
 50 | 
 51 |         Args:
 52 |             value: The value by which to rank the checkpoint.
 53 |             sess: A tf.Session to use to save the checkpoint
 54 |             global_step_tensor: A `tf.Tensor` represent the global step
 55 |         """
 56 |         #global_step = sess.run(global_step_tensor)
 57 |         current_ckpt = '{}-{}'.format(self._title, global_step_value)
 58 |         value = float(value)
 59 |         if not os.path.exists(self.best_checkpoints_file):
 60 |             self._save_best_checkpoints_file({current_ckpt: value})
 61 |             self._saver.save(sess, self._save_path, global_step=global_step_value)
 62 |             return
 63 | 
 64 |         best_checkpoints = self._load_best_checkpoints_file()
 65 | 
 66 |         if len(best_checkpoints) < self._num_to_keep:
 67 |             best_checkpoints[current_ckpt] = value
 68 |             self._save_best_checkpoints_file(best_checkpoints)
 69 |             self._saver.save(sess, self._save_path, global_step=global_step_value)
 70 |             return
 71 | 
 72 |         if self._maximize:
 73 |             should_save = not all(current_best >= value
 74 |                                   for current_best in best_checkpoints.values())
 75 |         else:
 76 |             should_save = not all(current_best <= value
 77 |                                   for current_best in best_checkpoints.values())
 78 |         if should_save:
 79 |             best_checkpoint_list = self._sort(best_checkpoints)
 80 | 
 81 |             worst_checkpoint = os.path.join(self._save_dir,
 82 |                                             best_checkpoint_list.pop(-1)[0])
 83 |             self._remove_outdated_checkpoint_files(worst_checkpoint)
 84 |             self._update_internal_saver_state(best_checkpoint_list)
 85 | 
 86 |             best_checkpoints = dict(best_checkpoint_list)
 87 |             best_checkpoints[current_ckpt] = value
 88 |             self._save_best_checkpoints_file(best_checkpoints)
 89 | 
 90 |             self._saver.save(sess, self._save_path, global_step=global_step_value)
 91 | 
 92 |     def _save_best_checkpoints_file(self, updated_best_checkpoints):
 93 |         with open(self.best_checkpoints_file, 'w') as f:
 94 |             json.dump(updated_best_checkpoints, f, indent=3)
 95 | 
 96 |     def _remove_outdated_checkpoint_files(self, worst_checkpoint):
 97 |         os.remove(os.path.join(self._save_dir, 'checkpoint'))
 98 |         for ckpt_file in glob.glob(worst_checkpoint + '.*'):
 99 |             os.remove(ckpt_file)
100 | 
101 |     def _update_internal_saver_state(self, best_checkpoint_list):
102 |         best_checkpoint_files = [
103 |             (ckpt[0], np.inf)  # TODO: Try to use actual file timestamp
104 |             for ckpt in best_checkpoint_list
105 |         ]
106 |         self._saver.set_last_checkpoints_with_time(best_checkpoint_files)
107 | 
108 |     def _load_best_checkpoints_file(self):
109 |         with open(self.best_checkpoints_file, 'r') as f:
110 |             best_checkpoints = json.load(f)
111 |         return best_checkpoints
112 | 
113 |     def _sort(self, best_checkpoints):
114 |         best_checkpoints = [
115 |             (ckpt, best_checkpoints[ckpt])
116 |             for ckpt in sorted(best_checkpoints,
117 |                                key=best_checkpoints.get,
118 |                                reverse=self._maximize)
119 |         ]
120 |         return best_checkpoints
121 | 
122 | 
123 | def get_best_checkpoint(best_checkpoint_dir, select_maximum_value=True):
124 |     """ Returns filepath to the best checkpoint
125 | 
126 |     Reads the best_checkpoints file in the best_checkpoint_dir directory.
127 |     Returns the filepath in the best_checkpoints file associated with
128 |     the highest value if select_maximum_value is True, or the filepath
129 |     associated with the lowest value if select_maximum_value is False.
130 | 
131 |     Args:
132 |         best_checkpoint_dir: Directory containing best_checkpoints JSON file
133 |         select_maximum_value: If True, select the filepath associated
134 |           with the highest value.  Otherwise, select the filepath associated
135 |           with the lowest value.
136 | 
137 |     Returns:
138 |         The full path to the best checkpoint file
139 | 
140 |     """
141 |     best_checkpoints_file = os.path.join(best_checkpoint_dir, 'best_checkpoints')
142 |     assert os.path.exists(best_checkpoints_file)
143 |     with open(best_checkpoints_file, 'r') as f:
144 |         best_checkpoints = json.load(f)
145 |     best_checkpoints = [
146 |         ckpt for ckpt in sorted(best_checkpoints,
147 |                                 key=best_checkpoints.get,
148 |                                 reverse=select_maximum_value)
149 |     ]
150 |     return os.path.join(best_checkpoint_dir,  best_checkpoints[0])


--------------------------------------------------------------------------------
/utils/image_utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from typing import Optional, Tuple
 3 | 
 4 | import cv2
 5 | import matplotlib.pyplot as plt
 6 | import numpy as np
 7 | 
 8 | import tensorflow as tf
 9 | 
10 | 
11 | 
12 | def read_image(filepath, color_mode=cv2.IMREAD_COLOR, target_size=None):
13 |     """Read an image from a file and resize it."""
14 |     img = cv2.imread(filepath, color_mode)
15 |     if target_size:
16 |         img = cv2.resize(img, target_size, interpolation=cv2.INTER_AREA)
17 |     return img
18 | 
19 | 
20 | def read_mask(directory, target_size=None):
21 |     """Read and resize masks contained in a given directory."""
22 |     for i, filename in enumerate(next(os.walk(directory))[2]):
23 |         mask_path = os.path.join(directory, filename)
24 |         mask_tmp = read_image(mask_path, cv2.IMREAD_GRAYSCALE, target_size)
25 |         if not i:
26 |             mask = mask_tmp
27 |         else:
28 |             mask = np.maximum(mask, mask_tmp)
29 |     return tf.expand_dims(mask, -1)
30 | 
31 | 
32 | def plot_image(image: np.ndarray, title: Optional[str]=None, **kwargs) -> None:
33 |     """Plot a single image
34 | 
35 |     Args:
36 |         image (2-D or 3-D array): image as a numpy array (H, W) or (H, W, C)
37 |         title (str, optional): title for a plot
38 |         **kwargs: keyword arguemtns for `plt.imshow`
39 |     """
40 |     shape = image.shape
41 | 
42 |     if len(shape) == 3:
43 |         plt.imshow(image, **kwargs)
44 |     elif len(shape) == 2:
45 |         plt.imshow(image, **kwargs)
46 |     else:
47 |         raise TypeError(
48 |             "2-D array or 3-D array should be given but {} was given".format(
49 |                 shape))
50 | 
51 |     if title:
52 |         plt.title(title)
53 | 
54 |     plt.show()


--------------------------------------------------------------------------------
/utils/inspect_checkpoint.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """A simple script for inspect checkpoint files."""
 16 | from __future__ import absolute_import
 17 | from __future__ import division
 18 | from __future__ import print_function
 19 | 
 20 | import argparse
 21 | import sys
 22 | 
 23 | import numpy as np
 24 | 
 25 | from tensorflow.python import pywrap_tensorflow
 26 | from tensorflow.python.platform import app
 27 | from tensorflow.python.platform import flags
 28 | 
 29 | FLAGS = None
 30 | 
 31 | 
 32 | def print_tensors_in_checkpoint_file(file_name, tensor_name, all_tensors):
 33 |     """Prints tensors in a checkpoint file.
 34 | 
 35 |     If no `tensor_name` is provided, prints the tensor names and shapes
 36 |     in the checkpoint file.
 37 | 
 38 |     If `tensor_name` is provided, prints the content of the tensor.
 39 | 
 40 |     Args:
 41 |         file_name: Name of the checkpoint file.
 42 |         tensor_name: Name of the tensor in the checkpoint file to print.
 43 |         all_tensors: Boolean indicating whether to print all tensors.
 44 |     """
 45 |     try:
 46 |         reader = pywrap_tensorflow.NewCheckpointReader(file_name)
 47 |         if all_tensors:
 48 |             var_to_shape_map = reader.get_variable_to_shape_map()
 49 |             for key in var_to_shape_map:
 50 |                 print("tensor_name: ", key)
 51 |                 print(reader.get_tensor(key))
 52 |         elif not tensor_name:
 53 |             print(reader.debug_string().decode("utf-8"))
 54 |         else:
 55 |             print("tensor_name: ", tensor_name)
 56 |             print(reader.get_tensor(tensor_name))
 57 |     except Exception as e:  # pylint: disable=broad-except
 58 |         print(str(e))
 59 |         if "corrupted compressed block contents" in str(e):
 60 |             print("It's likely that your checkpoint file has been compressed "
 61 |                   "with SNAPPY.")
 62 | 
 63 | 
 64 | def parse_numpy_printoption(kv_str):
 65 |     """Sets a single numpy printoption from a string of the form 'x=y'.
 66 | 
 67 |     See documentation on numpy.set_printoptions() for details about what values
 68 |     x and y can take. x can be any option listed there other than 'formatter'.
 69 | 
 70 |     Args:
 71 |         kv_str: A string of the form 'x=y', such as 'threshold=100000'
 72 | 
 73 |     Raises:
 74 |         argparse.ArgumentTypeError: If the string couldn't be used to set any
 75 |                 nump printoption.
 76 |     """
 77 |     k_v_str = kv_str.split("=", 1)
 78 |     if len(k_v_str) != 2 or not k_v_str[0]:
 79 |         raise argparse.ArgumentTypeError("'%s' is not in the form k=v." % kv_str)
 80 |     k, v_str = k_v_str
 81 |     printoptions = np.get_printoptions()
 82 |     if k not in printoptions:
 83 |         raise argparse.ArgumentTypeError("'%s' is not a valid printoption." % k)
 84 |     v_type = type(printoptions[k])
 85 |     if v_type is type(None):
 86 |         raise argparse.ArgumentTypeError(
 87 |                 "Setting '%s' from the command line is not supported." % k)
 88 |     try:
 89 |         v = (v_type(v_str) if v_type is not bool
 90 |              else flags.BooleanParser().Parse(v_str))
 91 |     except ValueError as e:
 92 |         raise argparse.ArgumentTypeError(e.message)
 93 |     np.set_printoptions(**{k: v})
 94 | 
 95 | 
 96 | def main(unused_argv):
 97 |     if not FLAGS.file_name:
 98 |         print("Usage: inspect_checkpoint --file_name=checkpoint_file_name "
 99 |               "[--tensor_name=tensor_to_print]")
100 |         sys.exit(1)
101 |     else:
102 |         print_tensors_in_checkpoint_file(FLAGS.file_name, FLAGS.tensor_name,
103 |                                          FLAGS.all_tensors)
104 | 
105 | 
106 | if __name__ == "__main__":
107 |     parser = argparse.ArgumentParser()
108 |     parser.register("type", "bool", lambda v: v.lower() == "true")
109 |     parser.add_argument(
110 |             "--file_name", type=str, default="", help="Checkpoint filename. "
111 |                                         "Note, if using Checkpoint V2 format, file_name is the "
112 |                                         "shared prefix between all files in the checkpoint.")
113 |     parser.add_argument(
114 |             "--tensor_name",
115 |             type=str,
116 |             default="",
117 |             help="Name of the tensor to inspect")
118 |     parser.add_argument(
119 |             "--all_tensors",
120 |             nargs="?",
121 |             const=True,
122 |             type="bool",
123 |             default=False,
124 |             help="If True, print the values of all the tensors.")
125 |     parser.add_argument(
126 |             "--printoptions",
127 |             nargs="*",
128 |             type=parse_numpy_printoption,
129 |             help="Argument for numpy.set_printoptions(), in the form 'k=v'.")
130 |     FLAGS, unparsed = parser.parse_known_args()
131 |     app.run(main=main, argv=[sys.argv[0]] + unparsed)
132 | 


--------------------------------------------------------------------------------
/utils/inspect_tfrecord.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import sys
 3 | 
 4 | import tensorflow as tf
 5 | 
 6 | 
 7 | FLAGS = None
 8 | 
 9 | def print_tfrecords_count(tfrecord_filename):
10 |     print('sum : ', sum(1 for _ in tf.python_io.tf_record_iterator(tfrecord_filename)))
11 | 
12 | 
13 | # get a list of feature
14 | def get_tfrecords_feature_list(tfrecords_filename):
15 |     record_iterator = tf.python_io.tf_record_iterator(path=tfrecords_filename)
16 | 
17 |     for string_record in record_iterator:
18 |         example = tf.train.Example()
19 |         example.ParseFromString(string_record)
20 |         return example.features.feature.keys()
21 | 
22 |     return []
23 | 
24 | 
25 | # display record
26 | def read_tfrecords(tfrecords_filename, is_train_val=False):
27 |     ptr = 0
28 |     record_iterator = tf.python_io.tf_record_iterator(path=tfrecords_filename)
29 | 
30 |     for string_record in record_iterator:
31 |         example = tf.train.Example()
32 |         example.ParseFromString(string_record)
33 | 
34 |         img = (example.features.feature['image'].bytes_list.value[0])
35 | 
36 |         if is_train_val:
37 |             label = (example.features.feature['label'].int64_list.value[0])
38 |             yield ptr, img, label
39 |         else:
40 |             yield ptr, img
41 | 
42 |         ptr += 1
43 | 
44 | # save to file
45 | def save_to_file():
46 |     with open('train/labels_coco.txt', 'w') as f_labels:
47 |         for idx, img, label in read_tfrecords('./train.tfrecords', is_train_val=True):
48 |             fn = 'train/{}.png'.format(idx)
49 |             with open(fn, 'wb') as f_img:
50 |                 f_img.write(img)
51 |         print >>f_labels, "{} {}".format(fn, label)
52 | 
53 | 
54 | 
55 | 
56 | def main(_):
57 |     tfrecords_filename = FLAGS.data_dir + '/train_003.tfrecord'
58 |     # get_tfrecords_feature_list(tfrecords_filename)
59 |     print_tfrecords_count(tfrecords_filename)
60 | 
61 | if __name__ == '__main__':
62 |     parser = argparse.ArgumentParser()
63 |     parser.add_argument(
64 |         '--data_dir',
65 |         type=str,
66 |         default='/home/ace19/dl-data/nucleus_detection/stage1_train_tfrecord',
67 |         help='')
68 | 
69 |     FLAGS, unparsed = parser.parse_known_args()
70 |     tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)


--------------------------------------------------------------------------------
/utils/morphological_util.py:
--------------------------------------------------------------------------------
  1 | import glob
  2 | import os
  3 | 
  4 | import cv2
  5 | import numpy as np
  6 | import scipy.ndimage as ndi
  7 | from PIL import Image
  8 | from imageio import imwrite
  9 | from skimage.transform import resize
 10 | from sklearn.cluster import KMeans
 11 | import skimage.morphology  # For using image labeling
 12 | 
 13 | from tqdm import tqdm
 14 | 
 15 | 
 16 | def overlay_masks(images_dir, subdir_name, target_dir):
 17 |     train_dir = os.path.join(images_dir, subdir_name)
 18 |     all_mask = []
 19 |     for mask_dirname in tqdm(glob.glob('{}/*/masks'.format(train_dir))):
 20 |         masks = []
 21 |         for image_filepath in glob.glob('{}/*'.format(mask_dirname)):
 22 |             image = np.asarray(Image.open(image_filepath))
 23 |             image = image / 255.0
 24 |             masks.append(image)
 25 |         overlayed_masks = np.sum(masks, axis=0)
 26 |         #target_filepath = '/'.join(mask_dirname.replace(images_dir, target_dir).split('/')[:-1]) + '.png'
 27 |         #os.makedirs(os.path.dirname(target_filepath), exist_ok=True)
 28 |         #imwrite(target_filepath, overlayed_masks)
 29 |         all_mask.append(overlayed_masks)
 30 | 
 31 |     return all_mask
 32 | 
 33 | 
 34 | def overlay_contours(images_dir, subdir_name, target_dir, touching_only=False):
 35 |     train_dir = os.path.join(images_dir, subdir_name)
 36 |     contours = []
 37 |     for mask_dirname in tqdm(glob.glob('{}/*/masks'.format(train_dir))):
 38 |         masks = []
 39 |         for image_filepath in glob.glob('{}/*'.format(mask_dirname)):
 40 |             image = np.asarray(Image.open(image_filepath))
 41 |             image = image / 255.0
 42 |             masks.append(get_contour(image))
 43 |         if touching_only:
 44 |             overlayed_masks = np.where(np.sum(masks, axis=0) > 128. + 255., 255., 0.).astype(np.uint8)
 45 |         else:
 46 |             overlayed_masks = np.where(np.sum(masks, axis=0) > 128., 255., 0.).astype(np.uint8)
 47 |         #target_filepath = '/'.join(mask_dirname.replace(images_dir, target_dir).split('/')[:-1]) + '.png'
 48 |         #os.makedirs(os.path.dirname(target_filepath), exist_ok=True)
 49 |         #imwrite(target_filepath, overlayed_masks)
 50 |         contours.append(overlayed_masks)
 51 | 
 52 |     return contours
 53 | 
 54 | def overlay_centers(images_dir, subdir_name, target_dir):
 55 |     train_dir = os.path.join(images_dir, subdir_name)
 56 |     for mask_dirname in tqdm(glob.glob('{}/*/masks'.format(train_dir))):
 57 |         masks = []
 58 |         for image_filepath in glob.glob('{}/*'.format(mask_dirname)):
 59 |             image = np.asarray(Image.open(image_filepath))
 60 |             image = image / 255.0
 61 |             masks.append(get_center(image))
 62 |         overlayed_masks = np.where(np.sum(masks, axis=0) > 128., 255., 0.).astype(np.uint8)
 63 |         target_filepath = '/'.join(mask_dirname.replace(images_dir, target_dir).split('/')[:-1]) + '.png'
 64 |         os.makedirs(os.path.dirname(target_filepath), exist_ok=True)
 65 |         imwrite(target_filepath, overlayed_masks)
 66 | 
 67 | 
 68 | def get_contour(img):
 69 |     img_contour = np.zeros_like(img).astype(np.uint8)
 70 |     _, contours, hierarchy = cv2.findContours(img.astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
 71 |     cv2.drawContours(img_contour, contours, -1, (255, 255, 255), 4)
 72 |     return img_contour
 73 | 
 74 | 
 75 | def get_center(img):
 76 |     img_center = np.zeros_like(img).astype(np.uint8)
 77 |     y, x = ndi.measurements.center_of_mass(img)
 78 |     cv2.circle(img_center, (int(x), int(y)), 4, (255, 255, 255), -1)
 79 |     return img_center
 80 | 
 81 | def get_ground_truth(images_dir, subdir_name, target_dir):
 82 |     train_dir = os.path.join(images_dir, subdir_name)
 83 |     groud_truth = []
 84 |     for mask_dirname in tqdm(glob.glob('{}/*/masks'.format(train_dir))):
 85 |         masks = []
 86 |         for image_filepath in glob.glob('{}/*'.format(mask_dirname)):
 87 |             image = np.asarray(Image.open(image_filepath))
 88 |             image = image / 255.0
 89 |             masks.append(image)
 90 |         overlayed_masks = np.sum(masks, axis=0)
 91 |         #target_filepath = '/'.join(mask_dirname.replace(images_dir, target_dir).split('/')[:-1]) + '.png'
 92 |         #os.makedirs(os.path.dirname(target_filepath), exist_ok=True)
 93 |         #imwrite(target_filepath, overlayed_masks)
 94 | 
 95 |         lab_mask = skimage.morphology.label(overlayed_masks > 0.5)
 96 | 
 97 |         groud_truth.append(lab_mask)
 98 | 
 99 |     return groud_truth
100 | 


--------------------------------------------------------------------------------
/utils/oper_utils.py:
--------------------------------------------------------------------------------
  1 | import glob
  2 | import logging
  3 | import os
  4 | import sys
  5 | from itertools import product
  6 | 
  7 | import numpy as np
  8 | import pandas as pd
  9 | import yaml
 10 | from PIL import Image
 11 | from attrdict import AttrDict
 12 | from tqdm import tqdm
 13 | 
 14 | 
 15 | def read_yaml(filepath):
 16 |     with open(filepath) as f:
 17 |         config = yaml.load(f)
 18 |     return AttrDict(config)
 19 | 
 20 | 
 21 | def get_logger():
 22 |     logger = logging.getLogger('dsb-2018')
 23 |     logger.setLevel(logging.INFO)
 24 |     message_format = logging.Formatter(fmt='%(asctime)s %(name)s >>> %(message)s',
 25 |                                        datefmt='%Y-%m-%d %H-%M-%S')
 26 | 
 27 |     # console handler for validation info
 28 |     ch_va = logging.StreamHandler(sys.stdout)
 29 |     ch_va.setLevel(logging.INFO)
 30 | 
 31 |     ch_va.setFormatter(fmt=message_format)
 32 | 
 33 |     # add the handlers to the logger
 34 |     logger.addHandler(ch_va)
 35 | 
 36 |     return logger
 37 | 
 38 | 
 39 | def decompose(labeled):
 40 |     nr_true = labeled.max()
 41 |     masks = []
 42 |     for i in range(1, nr_true + 1):
 43 |         msk = labeled.copy()
 44 |         msk[msk != i] = 0.
 45 |         msk[msk == i] = 255.
 46 |         masks.append(msk)
 47 | 
 48 |     if not masks:
 49 |         return [labeled]
 50 |     else:
 51 |         return masks
 52 | 
 53 | 
 54 | def create_submission(experiments_dir, meta, predictions, logger):
 55 |     image_ids, encodings = [], []
 56 |     output = []
 57 |     for image_id, prediction in zip(meta['ImageId'].values, predictions):
 58 |         for mask in decompose(prediction):
 59 |             rle_encoded = ' '.join(str(rle) for rle in run_length_encoding(mask > 128.))
 60 |             if len(rle_encoded) != 0:
 61 |                 image_ids.append(image_id)
 62 |                 encodings.append(rle_encoded)
 63 |                 output.append([image_id, rle_encoded])
 64 |             else:
 65 |                 logger.info('*** image_id {}'.format(image_id))
 66 |                 logger.info('*** rle_encoded {} is empty'.format(rle_encoded))
 67 | 
 68 |     submission = pd.DataFrame(output, columns=['ImageId', 'EncodedPixels']).astype(str)
 69 |     submission = submission[submission['EncodedPixels'] != 'nan']
 70 |     submission_filepath = os.path.join(experiments_dir, 'submission.csv')
 71 |     submission.to_csv(submission_filepath, index=None, encoding='utf-8')
 72 |     logger.info('submission saved to {}'.format(submission_filepath))
 73 |     logger.info('submission head \n\n{}'.format(submission.head()))
 74 | 
 75 | 
 76 | def read_masks(masks_filepaths):
 77 |     masks = []
 78 |     for mask_dir in tqdm(masks_filepaths):
 79 |         mask = []
 80 |         if len(mask_dir) == 1:
 81 |             mask_dir = mask_dir[0]
 82 |         for i, mask_filepath in enumerate(glob.glob('{}/*'.format(mask_dir))):
 83 |             blob = np.asarray(Image.open(mask_filepath))
 84 |             blob_binarized = (blob > 128.).astype(np.uint8) * i
 85 |             mask.append(blob_binarized)
 86 |         mask = np.sum(np.stack(mask, axis=0), axis=0).astype(np.uint8)
 87 |         masks.append(mask)
 88 |     return masks
 89 | 
 90 | 
 91 | def run_length_encoding(x):
 92 |     # https://www.kaggle.com/c/data-science-bowl-2018/discussion/48561#
 93 |     bs = np.where(x.T.flatten())[0]
 94 | 
 95 |     rle = []
 96 |     prev = -2
 97 |     for b in bs:
 98 |         if (b > prev + 1): rle.extend((b + 1, 0))
 99 |         rle[-1] += 1
100 |         prev = b
101 | 
102 |     if len(rle) != 0 and rle[-1] + rle[-2] == x.size:
103 |         rle[-2] = rle[-2] - 1
104 | 
105 |     return rle
106 | 
107 | 
108 | def read_params(ctx):
109 |     if ctx.params.__class__.__name__ == 'OfflineContextParams':
110 |         neptune_config = read_yaml('neptune.yaml')
111 |         params = neptune_config.parameters
112 |     else:
113 |         params = ctx.params
114 |     return params
115 | 
116 | 
117 | def generate_metadata(data_dir,
118 |                       masks_overlayed_dir,
119 |                       contours_overlayed_dir,
120 |                       contours_touching_overlayed_dir,
121 |                       centers_overlayed_dir):
122 |     def stage1_generate_metadata(train):
123 |         df_metadata = pd.DataFrame(columns=['ImageId', 'file_path_image', 'file_path_masks', 'file_path_mask',
124 |                                             'is_train', 'width', 'height', 'n_nuclei'])
125 |         if train:
126 |             tr_te = 'stage1_train'
127 |         else:
128 |             tr_te = 'stage1_test'
129 | 
130 |         for image_id in sorted(os.listdir(os.path.join(data_dir, tr_te))):
131 |             p = os.path.join(data_dir, tr_te, image_id, 'images')
132 |             if image_id != os.listdir(p)[0][:-4]:
133 |                 ValueError('ImageId mismatch ' + str(image_id))
134 |             if len(os.listdir(p)) != 1:
135 |                 ValueError('more than one image in dir')
136 | 
137 |             file_path_image = os.path.join(p, os.listdir(p)[0])
138 |             if train:
139 |                 is_train = 1
140 |                 file_path_masks = os.path.join(data_dir, tr_te, image_id, 'masks')
141 |                 file_path_mask = os.path.join(masks_overlayed_dir, tr_te, image_id + '.png')
142 |                 file_path_contours = os.path.join(contours_overlayed_dir, tr_te, image_id + '.png')
143 |                 file_path_contours_touching = os.path.join(contours_touching_overlayed_dir, tr_te, image_id + '.png')
144 |                 file_path_centers = os.path.join(centers_overlayed_dir, tr_te, image_id + '.png')
145 |                 n_nuclei = len(os.listdir(file_path_masks))
146 |             else:
147 |                 is_train = 0
148 |                 file_path_masks = None
149 |                 file_path_mask = None
150 |                 file_path_contours = None
151 |                 file_path_contours_touching = None
152 |                 file_path_centers = None
153 |                 n_nuclei = None
154 | 
155 |             img = Image.open(file_path_image)
156 |             width = img.size[0]
157 |             height = img.size[1]
158 |             s = df_metadata['ImageId']
159 |             if image_id is s:
160 |                 ValueError('ImageId conflict ' + str(image_id))
161 |             df_metadata = df_metadata.append({'ImageId': image_id,
162 |                                               'file_path_image': file_path_image,
163 |                                               'file_path_masks': file_path_masks,
164 |                                               'file_path_mask': file_path_mask,
165 |                                               'file_path_contours': file_path_contours,
166 |                                               'file_path_contours_touching': file_path_contours_touching,
167 |                                               'file_path_centers': file_path_centers,
168 |                                               'is_train': is_train,
169 |                                               'width': width,
170 |                                               'height': height,
171 |                                               'n_nuclei': n_nuclei}, ignore_index=True)
172 |         return df_metadata
173 | 
174 |     train_metadata = stage1_generate_metadata(train=True)
175 |     test_metadata = stage1_generate_metadata(train=False)
176 |     metadata = train_metadata.append(test_metadata, ignore_index=True)
177 |     return metadata
178 | 
179 | 
180 | def squeeze_inputs(inputs):
181 |     return np.squeeze(inputs[0], axis=1)
182 | 
183 | 
184 | def sigmoid(x):
185 |     return 1. / (1 + np.exp(-x))
186 | 
187 | 
188 | def relabel(img):
189 |     h, w = img.shape
190 | 
191 |     relabel_dict = {}
192 | 
193 |     for i, k in enumerate(np.unique(img)):
194 |         if k == 0:
195 |             relabel_dict[k] = 0
196 |         else:
197 |             relabel_dict[k] = i
198 |     for i, j in product(range(h), range(w)):
199 |         img[i, j] = relabel_dict[img[i, j]]
200 |     return img
201 | 
202 | 
203 | def relabel_random_colors(img, max_colours=1000):
204 |     keys = list(range(1, max_colours, 1))
205 |     np.random.shuffle(keys)
206 |     values = list(range(1, max_colours, 1))
207 |     np.random.shuffle(values)
208 |     funky_dict = {k: v for k, v in zip(keys, values)}
209 |     funky_dict[0] = 0
210 | 
211 |     h, w = img.shape
212 | 
213 |     for i, j in product(range(h), range(w)):
214 |         img[i, j] = funky_dict[img[i, j]]
215 |     return img
216 | 
217 | 
218 | def from_pil(*images):
219 |     return [np.array(image) for image in images]
220 | 
221 | 
222 | def to_pil(*images):
223 |     return [Image.fromarray((image).astype(np.uint8)) for image in images]


--------------------------------------------------------------------------------
/utils/oper_utils2.py:
--------------------------------------------------------------------------------
  1 | import os  # For filepath, directory handling
  2 | import pandas as pd
  3 | 
  4 | import cv2  # To read and manipulate images
  5 | import numpy as np
  6 | 
  7 | from skimage.morphology import label  # For using image labeling
  8 | import matplotlib.pyplot as plt  # Python 2D plotting library
  9 | import matplotlib.cm as cm  # Color map
 10 | 
 11 | 
 12 | min_object_size = 1
 13 | 
 14 | 
 15 | def read_image(filepath, color_mode=cv2.IMREAD_COLOR, target_size=None):
 16 |     """Read an image from a file and resize it."""
 17 |     img = cv2.imread(filepath, color_mode)
 18 |     if target_size:
 19 |         img = cv2.resize(img, target_size, interpolation=cv2.INTER_AREA)
 20 |     return img
 21 | 
 22 | 
 23 | def read_test_data_properties(test_dir, img_dir_name):
 24 |     """Read basic properties of test images."""
 25 |     tmp = []
 26 |     for i, dir_name in enumerate(next(os.walk(test_dir))[1]):
 27 |         img_dir = os.path.join(test_dir, dir_name, img_dir_name)
 28 |         img_name = next(os.walk(img_dir))[2][0]
 29 |         img_name_id = os.path.splitext(img_name)[0]
 30 |         img_path = os.path.join(img_dir, img_name)
 31 |         img_shape = read_image(img_path).shape
 32 |         tmp.append(['{}'.format(img_name_id), img_shape[0], img_shape[1],
 33 |                     img_shape[0] / img_shape[1], img_shape[2], img_path])
 34 | 
 35 |     test_df = pd.DataFrame(tmp, columns=['img_id', 'img_height', 'img_width',
 36 |                                          'img_ratio', 'num_channels', 'image_path'])
 37 |     return test_df
 38 | 
 39 | 
 40 | def normalize_imgs(data):
 41 |     """Normalize images."""
 42 |     return normalize(data, type_=1)
 43 | 
 44 | 
 45 | def normalize_masks(data):
 46 |     """Normalize masks."""
 47 |     return normalize(data, type_=1)
 48 | 
 49 | 
 50 | def normalize(data, type_=1):
 51 |     """Normalize data."""
 52 |     if type_ == 0:
 53 |         # Convert pixel values from [0:255] to [0:1] by global factor
 54 |         data = data.astype(np.float32) / data.max()
 55 |     if type_ == 1:
 56 |         # Convert pixel values from [0:255] to [0:1] by local factor
 57 |         div = data.max(axis=tuple(np.arange(1, len(data.shape))), keepdims=True)
 58 |         div[div < 0.01 * data.mean()] = 1.  # protect against too small pixel intensities
 59 |         data = data.astype(np.float32) / div
 60 |     if type_ == 2:
 61 |         # Standardisation of each image
 62 |         data = data.astype(np.float32) / data.max()
 63 |         mean = data.mean(axis=tuple(np.arange(1, len(data.shape))), keepdims=True)
 64 |         std = data.std(axis=tuple(np.arange(1, len(data.shape))), keepdims=True)
 65 |         data = (data - mean) / std
 66 | 
 67 |     return data
 68 | 
 69 | 
 70 | def trsf_proba_to_binary(y_data):
 71 |     """Transform propabilities into binary values 0 or 1."""
 72 |     return np.greater(y_data, .5).astype(np.uint8)
 73 | 
 74 | 
 75 | def invert_imgs(imgs, cutoff=.5):
 76 |     '''Invert image if mean value is greater than cutoff.'''
 77 |     imgs = np.array(list(map(lambda x: 1. - x if np.mean(x) > cutoff else x, imgs)))
 78 |     return normalize_imgs(imgs)
 79 | 
 80 | def invert_img(img, cutoff=.5):
 81 |     '''Invert image if mean value is greater than cutoff.'''
 82 |     imgs = np.array(list(map(lambda x: 1. - x if np.mean(x) > cutoff else x, imgs)))
 83 |     return normalize_imgs(imgs)
 84 | 
 85 | 
 86 | def imgs_to_grayscale(imgs):
 87 |     '''Transform RGB images into grayscale spectrum.'''
 88 |     if imgs.shape[3] == 3:
 89 |         imgs = normalize_imgs(np.expand_dims(np.mean(imgs, axis=3), axis=3))
 90 |     return imgs
 91 | 
 92 | 
 93 | # Analyze nuclei sizes.
 94 | def get_nuclei_sizes(y_train):
 95 |     nuclei_sizes = []
 96 |     mask_idx = []
 97 |     for i in range(len(y_train)):
 98 |         mask = y_train[i].reshape(y_train.shape[1], y_train.shape[2])
 99 |         lab_mask = label(mask > .5)
100 |         (mask_labels, mask_sizes) = np.unique(lab_mask, return_counts=True)
101 |         nuclei_sizes.extend(mask_sizes[1:])
102 |         mask_idx.extend([i]*len(mask_sizes[1:]))
103 |     return mask_idx, nuclei_sizes
104 | 
105 | # mask_idx, nuclei_sizes = get_nuclei_sizes()
106 | # nuclei_sizes_df = pd.DataFrame()
107 | # nuclei_sizes_df['mask_index'] = mask_idx
108 | # nuclei_sizes_df['nucleous_size'] = nuclei_sizes
109 | #
110 | # print(nuclei_sizes_df.describe())
111 | # nuclei_sizes_df.sort_values(by='nucleous_size', ascending=True).head(10)
112 | 
113 | 
114 | 
115 | def get_labeled_mask(mask, cutoff=.5):
116 |     """Object segmentation by labeling the mask."""
117 |     mask = mask.reshape(mask.shape[0], mask.shape[1])
118 |     lab_mask = label(mask > cutoff)
119 | 
120 |     # Keep only objects that are large enough.
121 |     (mask_labels, mask_sizes) = np.unique(lab_mask, return_counts=True)
122 |     if (mask_sizes < min_object_size).any():
123 |         mask_labels = mask_labels[mask_sizes < min_object_size]
124 |         for n in mask_labels:
125 |             lab_mask[lab_mask == n] = 0
126 |         lab_mask = label(lab_mask > cutoff)
127 | 
128 |     return lab_mask
129 | 
130 | 
131 | def get_iou(y_true_labeled, y_pred_labeled):
132 |     """Compute non-zero intersections over unions."""
133 |     # Array of different objects and occupied area.
134 |     (true_labels, true_areas) = np.unique(y_true_labeled, return_counts=True)
135 |     (pred_labels, pred_areas) = np.unique(y_pred_labeled, return_counts=True)
136 | 
137 |     # Number of different labels.
138 |     n_true_labels = len(true_labels)
139 |     n_pred_labels = len(pred_labels)
140 | 
141 |     # Each mask has at least one identified object.
142 |     if (n_true_labels > 1) and (n_pred_labels > 1):
143 | 
144 |         # Compute all intersections between the objects.
145 |         all_intersections = np.zeros((n_true_labels, n_pred_labels))
146 |         for i in range(y_true_labeled.shape[0]):
147 |             for j in range(y_true_labeled.shape[1]):
148 |                 m = y_true_labeled[i, j]
149 |                 n = y_pred_labeled[i, j]
150 |                 all_intersections[m, n] += 1
151 | 
152 |                 # Assign predicted to true background.
153 |         assigned = [[0, 0]]
154 |         tmp = all_intersections.copy()
155 |         tmp[0, :] = -1
156 |         tmp[:, 0] = -1
157 | 
158 |         # Assign predicted to true objects if they have any overlap.
159 |         for i in range(1, np.min([n_true_labels, n_pred_labels])):
160 |             mn = list(np.unravel_index(np.argmax(tmp), (n_true_labels, n_pred_labels)))
161 |             if all_intersections[mn[0], mn[1]] > 0:
162 |                 assigned.append(mn)
163 |             tmp[mn[0], :] = -1
164 |             tmp[:, mn[1]] = -1
165 |         assigned = np.array(assigned)
166 | 
167 |         # Intersections over unions.
168 |         intersection = np.array([all_intersections[m, n] for m, n in assigned])
169 |         union = np.array([(true_areas[m] + pred_areas[n] - all_intersections[m, n])
170 |                           for m, n in assigned])
171 |         iou = intersection / union
172 | 
173 |         # Remove background.
174 |         iou = iou[1:]
175 |         assigned = assigned[1:]
176 |         true_labels = true_labels[1:]
177 |         pred_labels = pred_labels[1:]
178 | 
179 |         # Labels that are not assigned.
180 |         true_not_assigned = np.setdiff1d(true_labels, assigned[:, 0])
181 |         pred_not_assigned = np.setdiff1d(pred_labels, assigned[:, 1])
182 | 
183 |     else:
184 |         # in case that no object is identified in one of the masks
185 |         iou = np.array([])
186 |         assigned = np.array([])
187 |         true_labels = true_labels[1:]
188 |         pred_labels = pred_labels[1:]
189 |         true_not_assigned = true_labels
190 |         pred_not_assigned = pred_labels
191 | 
192 |     # Returning parameters.
193 |     params = {'iou': iou, 'assigned': assigned, 'true_not_assigned': true_not_assigned,
194 |               'pred_not_assigned': pred_not_assigned, 'true_labels': true_labels,
195 |               'pred_labels': pred_labels}
196 |     return params
197 | 
198 | 
199 | def get_score_summary(y_true, y_pred):
200 |     """Compute the score for a single sample including a detailed summary."""
201 | 
202 |     y_true_labeled = get_labeled_mask(y_true)
203 |     y_pred_labeled = get_labeled_mask(y_pred)
204 | 
205 |     params = get_iou(y_true_labeled, y_pred_labeled)
206 |     iou = params['iou']
207 |     assigned = params['assigned']
208 |     true_not_assigned = params['true_not_assigned']
209 |     pred_not_assigned = params['pred_not_assigned']
210 |     true_labels = params['true_labels']
211 |     pred_labels = params['pred_labels']
212 |     n_true_labels = len(true_labels)
213 |     n_pred_labels = len(pred_labels)
214 | 
215 |     summary = []
216 |     for i, threshold in enumerate(np.arange(0.5, 1.0, 0.05)):
217 |         tp = np.sum(iou > threshold)
218 |         fn = n_true_labels - tp
219 |         fp = n_pred_labels - tp
220 |         if (tp + fp + fn) > 0:
221 |             prec = tp / (tp + fp + fn)
222 |         else:
223 |             prec = 0
224 |         summary.append([threshold, prec, tp, fp, fn])
225 | 
226 |     summary = np.array(summary)
227 |     score = np.mean(summary[:, 1])  # Final score.
228 |     params_dict = {'summary': summary, 'iou': iou, 'assigned': assigned,
229 |                    'true_not_assigned': true_not_assigned,
230 |                    'pred_not_assigned': pred_not_assigned, 'true_labels': true_labels,
231 |                    'pred_labels': pred_labels, 'y_true_labeled': y_true_labeled,
232 |                    'y_pred_labeled': y_pred_labeled}
233 | 
234 |     return score, params_dict
235 | 
236 | 
237 | def get_score(y_true, y_pred):
238 |     """Compute the score for a batch of samples."""
239 |     scores = []
240 |     for i in range(len(y_true)):
241 |         score, _ = get_score_summary(y_true[i], y_pred[i])
242 |         scores.append(score)
243 |     return np.array(scores)
244 | 
245 | 
246 | def plot_score_summary(y_true, y_pred):
247 |     """Plot score summary for a single sample."""
248 |     # Compute score and assign parameters.
249 |     score, params_dict = get_score_summary(y_true, y_pred)
250 | 
251 |     assigned = params_dict['assigned']
252 |     true_not_assigned = params_dict['true_not_assigned']
253 |     pred_not_assigned = params_dict['pred_not_assigned']
254 |     true_labels = params_dict['true_labels']
255 |     pred_labels = params_dict['pred_labels']
256 |     y_true_labeled = params_dict['y_true_labeled']
257 |     y_pred_labeled = params_dict['y_pred_labeled']
258 |     summary = params_dict['summary']
259 | 
260 |     n_assigned = len(assigned)
261 |     n_true_not_assigned = len(true_not_assigned)
262 |     n_pred_not_assigned = len(pred_not_assigned)
263 |     n_true_labels = len(true_labels)
264 |     n_pred_labels = len(pred_labels)
265 | 
266 |     # Summary dataframe.
267 |     summary_df = pd.DataFrame(summary, columns=['threshold', 'precision', 'tp', 'fp', 'fn'])
268 |     print('Final score:', score)
269 |     print(summary_df)
270 | 
271 |     # Plots.
272 |     fig, axs = plt.subplots(2, 3, figsize=(20, 13))
273 | 
274 |     # True mask with true objects.
275 |     img = y_true
276 |     axs[0, 0].imshow(img, cmap=cm.gray)
277 |     axs[0, 0].set_title('{}.) true mask: {} true objects'.format(n, train_df['num_masks'][n]))
278 | 
279 |     # True mask with identified objects.
280 |     # img = np.zeros(y_true.shape)
281 |     # img[y_true_labeled > 0.5] = 255
282 |     img, img_type = imshow_args(y_true_labeled)
283 |     axs[0, 1].imshow(img, img_type)
284 |     axs[0, 1].set_title('{}.) true mask: {} objects identified'.format(n, n_true_labels))
285 | 
286 |     # Predicted mask with identified objects.
287 |     # img = np.zeros(y_true.shape)
288 |     # img[y_pred_labeled > 0.5] = 255
289 |     img, img_type = imshow_args(y_pred_labeled)
290 |     axs[0, 2].imshow(img, img_type)
291 |     axs[0, 2].set_title('{}.) predicted mask: {} objects identified'.format(
292 |         n, n_pred_labels))
293 | 
294 |     # Prediction overlap with true mask.
295 |     img = np.zeros(y_true.shape)
296 |     img[y_true > 0.5] = 100
297 |     for i, j in assigned: img[(y_true_labeled == i) & (y_pred_labeled == j)] = 255
298 |     axs[1, 0].set_title('{}.) {} pred. overlaps (white) with true objects (gray)'.format(
299 |         n, len(assigned)))
300 |     axs[1, 0].imshow(img, cmap='gray', norm=None)
301 | 
302 |     # Intersection over union.
303 |     img = np.zeros(y_true.shape)
304 |     img[(y_pred_labeled > 0) & (y_pred_labeled < 100)] = 100
305 |     img[(y_true_labeled > 0) & (y_true_labeled < 100)] = 100
306 |     for i, j in assigned: img[(y_true_labeled == i) & (y_pred_labeled == j)] = 255
307 |     axs[1, 1].set_title('{}.) {} intersections (white) over unions (gray)'.format(
308 |         n, n_assigned))
309 |     axs[1, 1].imshow(img, cmap='gray');
310 | 
311 |     # False positives and false negatives.
312 |     img = np.zeros(y_true.shape)
313 |     for i in pred_not_assigned: img[(y_pred_labeled == i)] = 255
314 |     for i in true_not_assigned: img[(y_true_labeled == i)] = 100
315 |     axs[1, 2].set_title('{}.) no threshold: {} fp (white), {} fn (gray)'.format(
316 |         n, n_pred_not_assigned, n_true_not_assigned))
317 |     axs[1, 2].imshow(img, cmap='gray');
318 | 
319 |     # Check the score metric for one sample. The predicted mask is simulated
320 |     # and can be modified in order to check the correct implementation of
321 |     # the score metric.
322 | 
323 | 
324 | # Collection of methods for run length encoding.
325 | # For example, '1 3 10 5' implies pixels 1,2,3,10,11,12,13,14 are to be included
326 | # in the mask. The pixels are one-indexed and numbered from top to bottom,
327 | # then left to right: 1 is pixel (1,1), 2 is pixel (2,1), etc.
328 | 
329 | def rle_of_binary(x):
330 |     """ Run length encoding of a binary 2D array. """
331 |     dots = np.where(x.T.flatten() == 1)[0]  # indices from top to down
332 |     run_lengths = []
333 |     prev = -2
334 |     for b in dots:
335 |         if (b > prev + 1): run_lengths.extend((b + 1, 0))
336 |         run_lengths[-1] += 1
337 |         prev = b
338 |     return run_lengths
339 | 
340 | 
341 | def mask_to_rle(mask, cutoff=.5, min_object_size=1.):
342 |     """ Return run length encoding of mask. """
343 |     # segment image and label different objects
344 |     lab_mask = label(mask > cutoff)
345 | 
346 |     # Keep only objects that are large enough.
347 |     (mask_labels, mask_sizes) = np.unique(lab_mask, return_counts=True)
348 |     if (mask_sizes < min_object_size).any():
349 |         mask_labels = mask_labels[mask_sizes < min_object_size]
350 |         for n in mask_labels:
351 |             lab_mask[lab_mask == n] = 0
352 |         lab_mask = label(lab_mask > cutoff)
353 | 
354 |         # Loop over each object excluding the background labeled by 0.
355 |     for i in range(1, lab_mask.max() + 1):
356 |         yield rle_of_binary(lab_mask == i)
357 | 
358 | 
359 | def rle_to_mask(rle, img_shape):
360 |     ''' Return mask from run length encoding.'''
361 |     mask_rec = np.zeros(img_shape).flatten()
362 |     for n in range(len(rle)):
363 |         for i in range(0, len(rle[n]), 2):
364 |             for j in range(rle[n][i + 1]):
365 |                 mask_rec[rle[n][i] - 1 + j] = 1
366 |     return mask_rec.reshape(img_shape[1], img_shape[0]).T
367 | 


--------------------------------------------------------------------------------
/utils/unet_ensemble.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import pandas as pd
  4 | import skimage.io
  5 | import cv2
  6 | import matplotlib.pyplot as plt
  7 | import matplotlib.patches as patches
  8 | 
  9 | # from subprocess import check_output
 10 | # print(check_output(["ls", "../input"]).decode("utf8"))
 11 | 
 12 | STAGE1_TEST = "../../dl_data/nucleus/stage1_test"
 13 | STAGE1_TEST_IMAGE_PATTERN = "%s/{}/images/{}.png" % STAGE1_TEST
 14 | SUBMISSION_IMAGEID = "ImageId"
 15 | SUBMISSION_ENCODED = "EncodedPixels"
 16 | models_path = [
 17 |     "../../dl_data/nucleus/sub-dsbowl2018-1111.csv",
 18 |     "../../dl_data/nucleus/sub-dsbowl2018-2222.csv"
 19 | ]
 20 | 
 21 | # Image loading
 22 | def image_ids_in(root_dir):
 23 |     ids = []
 24 |     for id in os.listdir(root_dir):
 25 |         ids.append(id)
 26 |     return ids
 27 | 
 28 | def read_image(image_id, pattern=STAGE1_TEST_IMAGE_PATTERN):
 29 |     image_file = pattern.format(image_id, image_id)
 30 |     image = skimage.io.imread(image_file)
 31 |     # Drop alpha which is not used
 32 |     image = image[:, :, :3]
 33 |     return image
 34 | 
 35 | def image_id_to_index(image_id, images_ids):
 36 |     i = np.argwhere(np.array(images_ids) == image_id)[0][0]
 37 |     return i
 38 | 
 39 | # RLE decoding functions
 40 | def rle_decode_one_mask(rle_str, mask_shape, mask_dtype):
 41 |     s = rle_str.split()
 42 |     starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
 43 |     starts -= 1
 44 |     ends = starts + lengths
 45 |     mask = np.zeros(np.prod(mask_shape), dtype=mask_dtype)
 46 |     for lo, hi in zip(starts, ends):
 47 |         mask[lo:hi] = 1
 48 |     return mask.reshape(mask_shape[::-1]).T
 49 | 
 50 | def rle_decode_all_masks(masks_str, mask_shape, mask_dtype):
 51 |     image = None
 52 |     i = 0
 53 |     for mask_str in masks_str:
 54 |         i = i + 1
 55 |         mask = rle_decode_one_mask(mask_str, mask_shape, mask_dtype)
 56 |         mask[mask == 1] = i
 57 |         if image is None:
 58 |             image = mask
 59 |         else:
 60 |             image = image + mask
 61 |     return image
 62 | 
 63 | # Test images
 64 | test_image_ids = image_ids_in(STAGE1_TEST)
 65 | 
 66 | # Convert index image (unique value per mask) to array.
 67 | def img_masks_as_masks_array(train_label):
 68 |     # As (masks, height, width)
 69 |     y_true = []
 70 |     uniques = np.unique(train_label)
 71 |     # Remove zero from index
 72 |     indexes = np.delete(uniques, np.where(uniques == 0))
 73 |     for index in indexes:
 74 |         y_true.append(np.where(train_label == index, 1, 0))
 75 |     y_true = np.array(y_true)
 76 |     return y_true
 77 | 
 78 | # Convert back all mask to index image
 79 | def masks_array_to_index_image(masks):
 80 |     mask = np.zeros((masks.shape[1], masks.shape[2]), dtype=np.uint16)
 81 |     for index in range(0, masks.shape[0]):
 82 |         mask[masks[index,:,:] > 0] = index + 1
 83 |     return mask
 84 | 
 85 | # Read image and predicted masks
 86 | def read_test_image_mask(submissionPD, test_id):
 87 |     test_image = read_image(test_id)
 88 |     rle_encoded_masks = submissionPD[submissionPD[SUBMISSION_IMAGEID] == test_id][SUBMISSION_ENCODED].values
 89 |     test_masks = rle_decode_all_masks(rle_encoded_masks, test_image.shape[:-1], np.int32)
 90 |     test_masks_array = img_masks_as_masks_array(test_masks)
 91 |     return test_image, test_masks_array
 92 | 
 93 | # Extract bounding box of mask
 94 | def find_bounding_boxes_on_mask(bin_img, test_id, mask_id, with_ref=None):
 95 |     boxes = []
 96 |     img_bin = np.where(bin_img > 0, 1, 0)
 97 |     img_rgb = (img_bin)*255
 98 |     img_rgb = np.concatenate([img_rgb[:, :, np.newaxis], img_rgb[:, :, np.newaxis], img_rgb[:, :, np.newaxis]], axis=-1)
 99 |     img_rgb = img_rgb.astype(np.uint8)
100 |     im_bw = cv2.cvtColor(img_rgb,cv2.COLOR_RGB2GRAY)
101 |     ret, im_bw = cv2.threshold(im_bw, 127, 255, cv2.THRESH_BINARY)
102 |     pixelpoints = cv2.findNonZero(im_bw)
103 |     x, y, w, h = cv2.boundingRect(pixelpoints)
104 |     if with_ref is not None:
105 |         boxes.append((x, y, w, h, with_ref, test_id, mask_id))
106 |     else:
107 |         boxes.append((x,y,w,h))
108 |     return np.array(boxes)
109 | 
110 | # Extract all bounding boxes
111 | def find_bounding_boxes_on_masks(test_masks_array, test_id, with_ref=None):
112 |     test_masks_pass = []
113 |     boxes_masks = []
114 |     for mask_id in range(0, len(test_masks_array)):
115 |         mask = test_masks_array[mask_id]
116 |         boxes = find_bounding_boxes_on_mask(mask, test_id, mask_id, with_ref=with_ref)
117 |         boxes_masks.append(boxes)
118 |         test_masks_pass.append(mask)
119 |     test_masks_pass = np.array(test_masks_pass)
120 |     boxes_masks = np.array(boxes_masks)
121 |     return test_masks_pass, boxes_masks
122 | # Image and array of masks + bounding boxes for each model (for a given image).
123 | def models_cv_masks_for_image(models_path, test_id, test_image_ids):
124 |     test_id_ref = image_id_to_index(test_id, test_image_ids)
125 |     models_cv_masks = []
126 |     models_cv_masks_boxes = []
127 |     for i in range(0, len(models_path)):
128 |         model_path = models_path[i]
129 |         submission = pd.read_csv(model_path)
130 |         submission.dropna(subset=[SUBMISSION_ENCODED], inplace=True)
131 |         test_image, test_masks_array = read_test_image_mask(submission, test_id)
132 |         test_masks_clean, boxes_masks = find_bounding_boxes_on_masks(test_masks_array, test_id_ref, with_ref=i)
133 |         models_cv_masks.append(test_masks_clean)
134 |         models_cv_masks_boxes.append(boxes_masks)
135 |     return test_image, models_cv_masks, models_cv_masks_boxes
136 | 
137 | # Basic NMS on boxes, https://www.pyimagesearch.com/2014/11/17/non-maximum-suppression-object-detection-python
138 | # Malisiewicz et al.
139 | def non_max_suppression_fast(boxes, overlapThresh):
140 |     # if there are no boxes, return an empty list
141 |     if len(boxes) == 0:
142 |         return []
143 | 
144 |     # if the bounding boxes integers, convert them to floats --
145 |     # this is important since we'll be doing a bunch of divisions
146 |     if boxes.dtype.kind == "i":
147 |         boxes = boxes.astype("float")
148 | 
149 |     # initialize the list of picked indexes
150 |     pick = []
151 | 
152 |     # grab the coordinates of the bounding boxes
153 |     x1 = boxes[:,0]
154 |     y1 = boxes[:,1]
155 |     x2 = boxes[:,0] + boxes[:,2]
156 |     y2 = boxes[:,1] + boxes[:,3]
157 |     # compute the area of the bounding boxes and sort the bounding
158 |     # boxes by the bottom-right y-coordinate of the bounding box
159 |     area = (x2 - x1 + 1) * (y2 - y1 + 1)
160 |     idxs = np.argsort(y2)
161 | 
162 |     # keep looping while some indexes still remain in the indexes list
163 |     while len(idxs) > 0:
164 |         # grab the last index in the indexes list and add the
165 |         # index value to the list of picked indexes
166 |         last = len(idxs) - 1
167 |         i = idxs[last]
168 |         pick.append(i)
169 | 
170 |         # find the largest (x, y) coordinates for the start of the bounding box and the smallest (x, y) coordinates for the end of the bounding box
171 |         xx1 = np.maximum(x1[i], x1[idxs[:last]])
172 |         yy1 = np.maximum(y1[i], y1[idxs[:last]])
173 |         xx2 = np.minimum(x2[i], x2[idxs[:last]])
174 |         yy2 = np.minimum(y2[i], y2[idxs[:last]])
175 |         # compute the width and height of the bounding box
176 |         w = np.maximum(0, xx2 - xx1 + 1)
177 |         h = np.maximum(0, yy2 - yy1 + 1)
178 |         # compute the ratio of overlap
179 |         overlap = (w * h) / area[idxs[:last]]
180 |         # delete all indexes from the index list that have
181 |         idxs = np.delete(idxs, np.concatenate(([last],
182 |             np.where(overlap > overlapThresh)[0])))
183 |     # return only the bounding boxes that were picked using the integer data type
184 |     return boxes[pick].astype("int")
185 | 
186 | # Compute NMS (i.e. select only one box when multiple boxes overlap) for across models.
187 | def models_cv_masks_boxes_nms(models_cv_masks_boxes, threshold=0.3):
188 |     boxes = np.concatenate(models_cv_masks_boxes).squeeze()
189 |     boxes_nms = non_max_suppression_fast(boxes, threshold)
190 |     return boxes_nms
191 | 
192 | # Display some result (on the nightmare images)
193 | # test_id = "0f1f896d9ae5a04752d3239c690402c022db4d72c0d2c087d73380896f72c466"
194 | test_id = "472b1c5ff988dadc209faea92499bc07f305208dbda29d16262b3d543ac91c71"
195 | 
196 | # Get masks and boxes (one per mask) for each model
197 | test_image, test_masks_cv_array, test_masks_boxes_cv_array = models_cv_masks_for_image(models_path, test_id, test_image_ids)
198 | 
199 | # Run NMS ensembling
200 | masks_boxes_nms = models_cv_masks_boxes_nms(test_masks_boxes_cv_array, threshold=0.3)
201 | 
202 | # Plot predictions of each model
203 | fig, ax = plt.subplots(1, 2, figsize=(18, 8))
204 | ax[0].axis('off')
205 | ax[0].imshow(masks_array_to_index_image(test_masks_cv_array[0]), cmap='nipy_spectral')
206 | ax[0].imshow(test_image, alpha=0.45)
207 | ax[0].set_title("Model#0: %d predicted instances for %s"%(len(test_masks_cv_array[0]), models_path[0]))
208 | 
209 | ax[1].axis('off')
210 | ax[1].imshow(masks_array_to_index_image(test_masks_cv_array[1]), cmap='nipy_spectral')
211 | ax[1].imshow(test_image, alpha=0.45)
212 | ax[1].set_title("Model#1: %d predicted instances for %s"%(len(test_masks_cv_array[1]), models_path[1]))
213 | 
214 | plt.tight_layout()
215 | 
216 | # Plot boxes for each model (left) and resulting NMS (right)
217 | fig, ax = plt.subplots(1, 2, figsize=(18, 8))
218 | ax[0].axis('off')
219 | ax[0].set_ylim(test_image.shape[0] + 10, -10)
220 | ax[0].set_xlim(-10, test_image.shape[1] + 10)
221 | cmap = plt.cm.get_cmap('nipy_spectral')
222 | 
223 | # Plot boxes per model
224 | for box in np.concatenate(test_masks_boxes_cv_array).squeeze():
225 |     p = patches.Rectangle((box[0]-1, box[1]-1), box[2], box[3], linewidth=1, facecolor='none', edgecolor=cmap(box[4]*60), alpha=0.75, linestyle="dashed")
226 |     ax[0].add_patch(p)
227 |     ax[0].text(box[0], box[1] + 8, "%d"%box[4], color=cmap(box[4]*60), size=10, backgroundcolor="none")
228 | ax[0].imshow(test_image, alpha=0.6)
229 | ax[0].set_title("Bounding boxes of predicted instances for model #0 and #1")
230 | 
231 | # Plot NMS results
232 | ax[1].set_ylim(test_image.shape[0] + 10, -10)
233 | ax[1].set_xlim(-10, test_image.shape[1] + 10)
234 | ax[1].axis('off')
235 | for box_nms in masks_boxes_nms:
236 |     p = patches.Rectangle((box_nms[0]-1, box_nms[1]-1), box_nms[2], box_nms[3], linewidth=1, facecolor='yellow', alpha=0.25, linestyle="dashed")
237 |     ax[1].add_patch(p)
238 |     ax[1].text(box_nms[0], box_nms[1] + 8, "%d"%box_nms[4], color=cmap(box_nms[4]*60), size=11, backgroundcolor="none")
239 | ax[1].imshow(test_image, alpha=0.6)
240 | ax[1].set_title("Ensemble NMS bounding boxes (%d) of predicted instances with its reference model #0 or #1"%len(masks_boxes_nms))
241 | plt.tight_layout()
242 | 
243 | # Back to masks from NMS boxes
244 | def get_masks_from_boxes_nms(masks_boxes_nms, test_masks_cv_array):
245 |     masks_nms = []
246 |     for box_nms in masks_boxes_nms:
247 |         model_id = box_nms[4]
248 |         mask_id = box_nms[6]
249 |         mask_nms = test_masks_cv_array[model_id][mask_id]
250 |         masks_nms.append(mask_nms)
251 |     masks_nms = np.array(masks_nms)
252 |     return masks_nms
253 | 
254 | # NMS instances
255 | masks_nms = get_masks_from_boxes_nms(masks_boxes_nms, test_masks_cv_array)
256 | 
257 | # Plot masks from NMS boxes
258 | fig, ax = plt.subplots(1, 2, figsize=(18, 8))
259 | ax[0].axis('off')
260 | masks_nms_image = masks_array_to_index_image(masks_nms)
261 | ax[0].imshow(test_image)
262 | ax[0].set_title("%s"%test_id)
263 | ax[1].axis('off')
264 | ax[1].imshow(masks_nms_image, cmap='nipy_spectral')
265 | ax[1].imshow(test_image, alpha=0.45)
266 | ax[1].set_title("Ensemble predicted instances (%d)"%len(masks_nms))
267 | plt.tight_layout()
268 | 
269 | # RLE encoder
270 | def rle_to_string(runs):
271 |     return ' '.join(str(x) for x in runs)
272 | 
273 | def rle_encode_one_mask(mask):
274 |     pixels = mask.T.flatten()
275 |     use_padding = False
276 |     if pixels[0] or pixels[-1]:
277 |         use_padding = True
278 |         pixel_padded = np.zeros([len(pixels) + 2], dtype=pixels.dtype)
279 |         pixel_padded[1:-1] = pixels
280 |         pixels = pixel_padded
281 |     rle = np.where(pixels[1:] != pixels[:-1])[0] + 2
282 |     if use_padding:
283 |         rle = rle - 1
284 |     rle[1::2] = rle[1::2] - rle[:-1:2]
285 |     return rle
286 | 
287 | def rle_encode_all_masks(masks):
288 |     values=list(np.unique(masks))
289 |     values.remove(0)
290 |     RLEs=[]
291 |     for v in values:
292 |         mask = np.where(masks == v, 1, 0)
293 |         rle = rle_encode_one_mask(mask)
294 |         rle_str = rle_to_string(rle)
295 |         RLEs.append(rle_str)
296 |     return RLEs
297 | 
298 | # Generate submission from NMS
299 | def generate_test_submission(image_ids, models_path):
300 |     results = []
301 |     for image_id in image_ids:
302 |         test_image, test_masks_cv_array, test_masks_boxes_cv_array = models_cv_masks_for_image(models_path, image_id, image_ids)
303 |         masks_boxes_nms = models_cv_masks_boxes_nms(test_masks_boxes_cv_array, threshold=0.3)
304 |         masks_nms = masks_array_to_index_image(get_masks_from_boxes_nms(masks_boxes_nms, test_masks_cv_array))
305 |         rle_encoded_masks = rle_encode_all_masks(masks_nms)
306 |         for rle_encoded_mask in rle_encoded_masks:
307 |             info = (image_id, rle_encoded_mask)
308 |             results.append(info)
309 |     df = pd.DataFrame(results, columns=[SUBMISSION_IMAGEID, SUBMISSION_ENCODED])
310 |     return df
311 | 
312 | submissionPD = generate_test_submission(test_image_ids, models_path)
313 | submissionPD.head()
314 | 
315 | submissionPD.to_csv("submission.csv", index=False, sep=",")
316 | 


--------------------------------------------------------------------------------
/validation_tool/diff_gt_mask.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | 
 3 | import numpy as np
 4 | import tensorflow as tf
 5 | 
 6 | import argparse
 7 | import os
 8 | import sys
 9 | 
10 | from skimage.io import imread
11 | 
12 | def main(_):
13 |     gt_mask_list = FLAGS.ground_truth_prefix.split(',')
14 |     if len(gt_mask_list) != 2:
15 |         raise Exception(
16 |             '--mask dirs must has 2 items (with split ,)')
17 | 
18 |     equal_count = 0
19 |     filelist = sorted(os.listdir(FLAGS.dataset_dir))
20 |     for file in filelist:
21 | 
22 |         # a mask
23 |         a_mask_path = os.path.join(FLAGS.dataset_dir, file, gt_mask_list[0])
24 |         b_mask_path = os.path.join(FLAGS.dataset_dir, file, gt_mask_list[1])
25 | 
26 |         a_mask_image = os.listdir(a_mask_path)[0]
27 |         b_mask_image = os.listdir(b_mask_path)[0]
28 | 
29 |         a_mask = imread(os.path.join(a_mask_path, a_mask_image))
30 |         b_mask = imread(os.path.join(b_mask_path, b_mask_image))
31 | 
32 |         equal_sum = np.sum(a_mask == b_mask)
33 |         equal_percentage = (equal_sum*2) / (a_mask.size + b_mask.size)
34 |         print(file, " >>>> equal percentage : ", equal_percentage)
35 | 
36 |         if equal_percentage == 1.0:
37 |             equal_count += 1
38 | 
39 |     # summary
40 |     print(FLAGS.ground_truth_prefix, ">> equal count >>", equal_count, "/", len(filelist))
41 | 
42 | 
43 | if __name__ == '__main__':
44 |     parser = argparse.ArgumentParser()
45 |     parser.add_argument(
46 |         '--dataset_dir',
47 |         default='../../tmp/nucleus_detection/stage1_train',
48 |         type=str,
49 |         help="Data directory")
50 | 
51 |     parser.add_argument(
52 |         '--mask_dirs',
53 |         default='eval_mask,gt_mask',
54 |         type=str,
55 |         help="which mask dirs is used")
56 | 
57 |     FLAGS, unparsed = parser.parse_known_args()
58 |     tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
59 | 
60 | 
61 | 


--------------------------------------------------------------------------------
/validation_tool/make_mask_from_csv.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | import numpy as np
  4 | import tensorflow as tf
  5 | 
  6 | import argparse
  7 | import os
  8 | import sys
  9 | import shutil
 10 | 
 11 | import pandas as pd
 12 | 
 13 | from PIL import Image
 14 | 
 15 | import matplotlib.pyplot as plt
 16 | import matplotlib.cm as cm  # Color map
 17 | 
 18 | 
 19 | def get_image_size(imageId):
 20 |     height = 0
 21 |     width = 0
 22 | 
 23 |     try:
 24 |         image_path = os.path.join(FLAGS.dataset_dir, imageId, 'images')
 25 |         image = os.listdir(image_path)
 26 |         img = Image.open(os.path.join(image_path, image[0]))
 27 |         height = img.height
 28 |         width = img.width
 29 |     except:
 30 |         print("get_image_size exception")
 31 | 
 32 |     return height, width
 33 | 
 34 | def remove_eval_dir(imageId, target_path):
 35 |     dir_path = os.path.join(FLAGS.dataset_dir, imageId, target_path)
 36 |     if os.path.exists(dir_path):
 37 |         shutil.rmtree(dir_path)
 38 | 
 39 | def save_to_image(imageId, data, target_path, prefix):
 40 |     mask_path = os.path.join(FLAGS.dataset_dir, imageId, target_path)
 41 |     if not os.path.exists(mask_path):
 42 |         os.makedirs(mask_path)
 43 | 
 44 |     target = os.path.join(mask_path, prefix + imageId + '.png')
 45 | 
 46 |     # Rescale to 0-255 and convert to uint8
 47 |     rescaled = (255.0 / data.max() * (data - data.min())).astype(np.uint8)
 48 | 
 49 |     # save
 50 |     img = Image.fromarray(rescaled)
 51 |     img.save(target)
 52 |     print("save_to_image >>>", mask_path)
 53 | 
 54 | 
 55 | def main(_):
 56 |     # open csv
 57 |     csv_filename = os.path.join(FLAGS.labels_path)
 58 |     data_frame = pd.read_csv(csv_filename)
 59 |     # print(data_frame['ImageId'])
 60 |     # print(data_frame['EncodedPixels'])
 61 | 
 62 |     # making gt_mask for ImageId
 63 |     index = 0
 64 |     preImageId = ''
 65 |     imageIdChanged = False
 66 |     image = np.zeros(0)
 67 |     height = 0
 68 |     width = 0
 69 |     per_image_index = 0
 70 |     for imageId in data_frame['ImageId']:
 71 |         #print(imageId)
 72 | 
 73 |         if preImageId != imageId:
 74 |             # make gt_mask for preImageId
 75 |             if image.size > 0:
 76 |                 image_2 = image.reshape(width, height).T
 77 |                 #plt.imshow(image_2, cm.gray)
 78 |                 #plt.show()
 79 |                 # save
 80 |                 save_to_image(preImageId, image_2, FLAGS.gt_mask_dir, '')
 81 | 
 82 |             preImageId = imageId
 83 |             imageIdChanged = True
 84 |             per_image_index = 0
 85 |             remove_eval_dir(imageId, FLAGS.mask_dir)
 86 |             remove_eval_dir(imageId, FLAGS.gt_mask_dir)
 87 |             image = np.zeros(0)
 88 |         else:
 89 |             imageIdChanged = False
 90 | 
 91 |         height, width = get_image_size(imageId)
 92 |         #print(imageId, " >>>>> ", width, height)
 93 |         if height == 0 or width == 0:
 94 |             continue
 95 | 
 96 |         mask_info = data_frame['EncodedPixels'][index]
 97 |         mask_array = np.fromstring(mask_info, sep=" ", dtype=np.uint32)
 98 |         #print(mask_info)
 99 |         #print(mask_array)
100 | 
101 |         per_image = np.zeros((height, width), dtype=np.uint8).flatten()
102 |         if imageIdChanged == True:
103 |             image = np.zeros((height, width), dtype=np.uint8).flatten()
104 |         for i in range(0, len(mask_array), 2):
105 |             start_pos = mask_array[i]
106 |             mask_length = mask_array[i + 1]
107 |             #print("start >> ", start_pos)
108 |             #print("mask_length >> ", mask_length)
109 |             for j in range(mask_length):
110 |                 per_image[start_pos - 1 + j] = 1
111 |                 image[start_pos - 1 + j] = 1
112 | 
113 |         # show per each
114 |         per_image_2 = per_image.reshape(width, height).T
115 |         #plt.imshow(per_image, cm.gray)
116 |         #plt.show()
117 | 
118 |         # save
119 |         save_to_image(preImageId, per_image_2, FLAGS.mask_dir, str(per_image_index))
120 |         per_image_index += 1
121 | 
122 |         index += 1
123 |         #break
124 | 
125 |     if image.size > 0:
126 |         image_2 = image.reshape(width, height).T
127 |         #plt.imshow(image_2, cm.gray)
128 |         #plt.show()
129 |         # save
130 |         save_to_image(preImageId, image_2, FLAGS.gt_mask_dir, '')
131 | 
132 | 
133 | if __name__ == '__main__':
134 |     parser = argparse.ArgumentParser()
135 |     parser.add_argument(
136 |         '--labels_path',
137 |         default='../result_2/submission-nucleus_det_stage2-100.csv',
138 |         # default='../../../dl_data/nucleus/stage1_train_labels/stage1_train_labels.csv',
139 |         # default='../../../dl_data/nucleus/stage1_solution/stage1_solution.csv',
140 |         type=str,
141 |         help="Data directory")
142 | 
143 |     parser.add_argument(
144 |         '--dataset_dir',
145 |         # default='../../../dl_data/nucleus/stage1_test',
146 |         default='../../../dl_data/nucleus/stage2_test_final',
147 |         # default='../../../dl_data/nucleus/stage1_train',
148 |         type=str,
149 |         help="Labels directory")
150 | 
151 |     parser.add_argument(
152 |         '--mask_dir',
153 |         default='eval_mask',
154 |         # default='solution_eval_mask',
155 |         type=str,
156 |         help="mask directory name")
157 | 
158 |     parser.add_argument(
159 |         '--gt_mask_dir',
160 |         default='eval_gt_mask',
161 |         # default='solution_eval_gt_mask',
162 |         type=str,
163 |         help="gt_mask directory name")
164 | 
165 |     FLAGS, unparsed = parser.parse_known_args()
166 |     tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)


--------------------------------------------------------------------------------