├── README.md
├── ckpt_to_dd.py
├── compose.py
├── convert.py
├── datasets
    ├── __init__.py
    ├── balanced_val.py
    ├── common.py
    ├── loadvoc.py
    └── utils.py
├── deeplab_resnet
    ├── __init__.py
    ├── model.py
    ├── model_pytorch.py
    └── network_pytorch.py
├── environment.yml
├── eval_pytorch.py
├── kaffe
    ├── __init__.py
    ├── caffe
    │   ├── __init__.py
    │   ├── caffepb.py
    │   └── resolver.py
    ├── errors.py
    ├── graph.py
    ├── layers.py
    ├── shapes.py
    ├── tensorflow
    │   ├── __init__.py
    │   ├── network.py
    │   └── transformer.py
    └── transformers.py
├── losses.py
├── settings.py
└── train_pytorch.py


/README.md:
--------------------------------------------------------------------------------
 1 | # Deeplab-resnet-101 Pytorch with Lovász hinge loss
 2 | 
 3 | Train deeplab-resnet-101 with binary Jaccard loss surrogate, the Lovász hinge, as described in [http://arxiv.org/abs/1705.08790](http://arxiv.org/abs/1705.08790).
 4 | 
 5 | Parts of the code is adapted from [tensorflow-deeplab-resnet](https://github.com/DrSleep/tensorflow-deeplab-resnet) (in particular the conversion from caffe to tensorflow with kaffe).
 6 | 
 7 | The code has not been tested for full training of Deeplab-Resnet yet. Refer to [tensorflow-deeplab-resnet](https://github.com/DrSleep/tensorflow-deeplab-resnet) and possibly extract the weights after training with that framework.
 8 | 
 9 | ## Code status
10 | The code is in early stage. Pull requests welcome.
11 | 
12 | ## Citation
13 | Please cite
14 | ```
15 | @ARTICLE{2017arXiv170508790B,
16 |    author = {{Berman}, M. and {Blaschko}, M.~B.},
17 |     title = "{Optimization of the Jaccard index for image segmentation with the Lov\'asz hinge}",
18 |   journal = {ArXiv e-prints},
19 | archivePrefix = "arXiv",
20 |    eprint = {1705.08790},
21 |  primaryClass = "cs.CV",
22 |  keywords = {Computer Science - Computer Vision and Pattern Recognition},
23 |      year = 2017,
24 |     month = may,
25 |    adsurl = {http://adsabs.harvard.edu/abs/2017arXiv170508790B},
26 | }
27 | ```
28 | if you use the code.
29 | 
30 | ## Dependencies and weights
31 | Relies notably on [Pytorch](http://pytorch.org/) and the standalone [tensorboard](https://github.com/dmlc/tensorboard/tree/master/python) package
32 | 
33 | Using anaconda, install the full requirements using the provided conda environment file:
34 | ```
35 | conda env create --f environemnt.yml
36 | source activate jaccard-segment
37 | ```
38 | 
39 | Convert the Deeplab Caffe weights to tensorflow ckpt using [caffe-tensorflow](https://github.com/ethereon/caffe-tensorflow), then convert them to hdf5 using `ckpt_to_dd.py` and use our wrapper to load in Pytorch.
40 | 
41 | ## Important switches in the settings
42 | By default, finetunes with cross-entropy loss. Use --binary `class` switch for selecting a particular class in the binary case, `--jaccard` for training with the Jaccard hinge loss described in the arxiv paper, `--hinge` to use the Hinge loss, and `--proximal` to use the prox. operator optimization variant for the Jaccard loss as described in the arxiv paper.
43 | 
44 | For the prox. operator, use a learning rate of `1.` and set an equivalent regularization of `1/lr` instead.
45 | 


--------------------------------------------------------------------------------
/ckpt_to_dd.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import deepdish as dd
 3 | import argparse
 4 | import os
 5 | import numpy as np
 6 | 
 7 | def tr(v):
 8 |     # tensorflow weights to pytorch weights
 9 |     if v.ndim == 4:
10 |         return np.ascontiguousarray(v.transpose(3,2,0,1))
11 |     elif v.ndim == 2:
12 |         return np.ascontiguousarray(v.transpose())
13 |     return v
14 | 
15 | def read_ckpt(ckpt):
16 |     # https://github.com/tensorflow/tensorflow/issues/1823
17 |     reader = tf.train.NewCheckpointReader(ckpt)
18 |     weights = {n: reader.get_tensor(n) for (n, _) in reader.get_variable_to_shape_map().iteritems()}
19 |     pyweights = {k: tr(v) for (k, v) in weights.items()}
20 |     return pyweights
21 | 
22 | if __name__ == '__main__':
23 |     parser = argparse.ArgumentParser(description="Converts ckpt weights to deepdish hdf5")
24 |     parser.add_argument("infile", type=str,
25 |                         help="Path to the ckpt.")
26 |     parser.add_argument("outfile", type=str, nargs='?', default='',
27 |                         help="Output file (inferred if missing).")
28 |     args = parser.parse_args()
29 |     if args.outfile == '':
30 |         args.outfile = os.path.splitext(args.infile)[0] + '.h5'
31 |     outdir = os.path.dirname(args.outfile)
32 |     if not os.path.exists(outdir):
33 |         os.makedirs(outdir)
34 |     weights = read_ckpt(args.infile)
35 |     dd.io.save(args.outfile, weights)
36 |     weights2 = dd.io.load(args.outfile)
37 | 
38 | 


--------------------------------------------------------------------------------
/compose.py:
--------------------------------------------------------------------------------
  1 | """
  2 | https://github.com/pytorch/vision/issues/9
  3 | 
  4 | joint transforms for input and target
  5 | applies to sequences of images
  6 | 
  7 | transform = JointCompose([
  8 |     ElasticTransform(),
  9 |     RandomRotate(),
 10 |     [CenterCropNumpy(size=input_shape), CenterCropNumpy(size=target_shape)],
 11 |     [NormalizeNumpy(), None],
 12 |     [Lambda(to_tensor), Lambda(to_tensor)]
 13 | ])
 14 | 
 15 | """
 16 | from __future__ import division, print_function
 17 | import math
 18 | import random
 19 | from PIL import Image, ImageOps
 20 | import numpy as np
 21 | import numbers
 22 | import types
 23 | import collections
 24 | import torch
 25 | 
 26 | class JointCompose(object):
 27 |     """Composes several transforms together, support separate transformations for multiple input.
 28 |     """
 29 | 
 30 |     def __init__(self, transforms):
 31 |         self.transforms = transforms
 32 | 
 33 |     def __call__(self, img):
 34 |         for t in self.transforms:
 35 |             if isinstance(t, collections.Sequence):
 36 |                 assert isinstance(img, collections.Sequence) and len(img) == len(t), "size of image group and transform group does not fit"
 37 |                 tmp_ = []
 38 |                 for i, im_ in enumerate(img):
 39 |                     if callable(t[i]):
 40 |                         tmp_.append(t[i](im_))
 41 |                     else:
 42 |                         tmp_.append(im_)
 43 |                 img = tmp_
 44 |             elif callable(t):
 45 |                 img = t(img)
 46 |             elif t is None:
 47 |                 continue
 48 |             else:
 49 |                 raise Exception('unexpected type')                
 50 |         return img
 51 | 
 52 | class RandomScale(object):
 53 |     """Random resize the given PIL.Image(s)
 54 |     low: ratio of minimum size to original size
 55 |     high: ratio of maximum size to original size
 56 |     interpolation(s): interpolations used.
 57 |        IF auto, uses NEAREST neighbour for second input
 58 |     """
 59 | 
 60 |     def __init__(self, low, high, interpolations='auto'):
 61 |         self.low = low
 62 |         self.high = high
 63 |         self.interpolations = interpolations
 64 | 
 65 |     def __call__(self, images):
 66 |         single = False
 67 |         if not isinstance(images, collections.Sequence):
 68 |             images = [images]
 69 |             single = True
 70 |         interps = self.interpolations
 71 |         if interps == 'auto':
 72 |             interps = Image.BILINEAR
 73 |             if len(images) == 2:
 74 |                 interps = [Image.BILINEAR, Image.NEAREST]
 75 |         if not isinstance(interps, collections.Sequence):
 76 |             interps = [interps] * len(images)
 77 |         resized = []
 78 |         ratio = random.uniform(self.low, self.high)
 79 |         for img, interp in zip(images, interps):
 80 |             h, w = img.size[0], img.size[1]
 81 |             h2, w2 = (int(ratio * h), int(ratio * w))
 82 |             img2 = img.resize((h2, w2), interp)
 83 |             resized.append(img2)
 84 |         if single:
 85 |             resized = resized[0]
 86 |         return resized
 87 | 
 88 | class Scale(object):
 89 |     # MONOCHANNEL FOR NOW # fixme
 90 |     """Rescales the input PIL.Image to the given 'size'.
 91 |     If 'size' is a 2-element tuple or list in the order of (width, height), it will be the exactly size to scale.
 92 |     If 'size' is a number, it will indicate the size of the smaller edge.
 93 |     For example, if height > width, then image will be
 94 |     rescaled to (size * height / width, size)
 95 |     size: size of the exactly size or the smaller edge
 96 |     interpolation: Default: PIL.Image.BILINEAR
 97 |     """
 98 | 
 99 |     def __init__(self, size, interpolation=Image.BILINEAR):
100 |         assert isinstance(size, int) or (isinstance(size, collections.Iterable) and len(size) == 2)
101 |         self.size = size
102 |         self.interpolation = interpolation
103 | 
104 |     def __call__(self, img):
105 |         if isinstance(self.size, int):
106 |             w, h = img.size
107 |             if (w <= h and w == self.size) or (h <= w and h == self.size):
108 |                 return img
109 |             if w < h:
110 |                 ow = self.size
111 |                 oh = int(self.size * h / w)
112 |                 return img.resize((ow, oh), self.interpolation)
113 |             else:
114 |                 oh = self.size
115 |                 ow = int(self.size * w / h)
116 |                 return img.resize((ow, oh), self.interpolation)
117 |         else:
118 |             return img.resize(self.size, self.interpolation)
119 | 
120 | 
121 | class RandomHorizontalFlip(object):
122 |     """Randomly horizontally flips the given PIL.Image with a probability of 0.5
123 |     """
124 | 
125 |     def __call__(self, images):
126 |         if random.random() < 0.5:
127 |             single = False
128 |             if not isinstance(images, collections.Sequence):
129 |                 images = [images]
130 |                 single = True
131 |             images = [img.transpose(Image.FLIP_LEFT_RIGHT) for img in images]
132 |             if single:
133 |                 images = images[0]
134 |         return images
135 | 
136 | 
137 | def ensuretuple(inp, n=2):
138 |     # duplicate value n times if needed
139 |     if not isinstance(inp, collections.Sequence):
140 |         inp = (inp,) * n
141 |     assert len(inp) == n, "Expected input of size " + str(n)
142 |     return inp
143 | 
144 | def pad_to_target(img, target_height, target_width, label=0):
145 |     # Pad image with zeros to the specified height and width if needed
146 |     # This op does nothing if the image already has size bigger than target_height and target_width.
147 |     w, h = img.size
148 |     left = top = right = bottom = 0
149 |     doit = False
150 |     if target_width > w:
151 |         delta = target_width - w
152 |         left = delta // 2
153 |         right = delta - left
154 |         doit = True
155 |     if target_height > h:
156 |         delta = target_height - h
157 |         top = delta // 2
158 |         bottom = delta - top
159 |         doit = True
160 |     if doit:
161 |         img = ImageOps.expand(img, border=(left, top, right, bottom), fill=label)
162 |     assert img.size[0] >= target_width
163 |     assert img.size[1] >= target_height
164 |     return img
165 | 
166 | 
167 | class RandomCropPad(object):
168 |     """Crops the given PIL.Image at a random location to have a region of
169 |     the given size. size can be a tuple (target_height, target_width)
170 |     or an integer, in which case the target will be of a square shape (size, size)
171 |     pad with pad_label if needed. auto -> (0/255)
172 |     """
173 | 
174 |     def __init__(self, size, pad_label='auto'):
175 |         self.target_height, self.target_width = ensuretuple(size)
176 |         self.pad_label = pad_label
177 | 
178 |     def __call__(self, images):
179 |         th, tw = self.target_height, self.target_width
180 |         single = False
181 |         if not isinstance(images, collections.Sequence):
182 |             images = [images]
183 |             single = True
184 |         pad_label = self.pad_label
185 |         if pad_label == 'auto':
186 |             pad_label = 0
187 |             if len(images) == 2:
188 |                 pad_label = [0, 255]
189 |         returns = []
190 |         for image, pad in zip(images, pad_label):
191 |             image = pad_to_target(image, th, tw, pad)
192 |             returns.append(image)
193 |         w, h = returns[0].size
194 |         for ret in returns[1:]:
195 |             assert (w, h) == ret.size, "all inputs images must have same size"
196 |         if w == tw and h == th:
197 |             return returns
198 | 
199 |         x1 = random.randint(0, w - tw)
200 |         y1 = random.randint(0, h - th)
201 |         return [ret.crop((x1, y1, x1 + tw, y1 + th)) for ret in returns]
202 | 
203 | class Normalize(object):
204 |     """Given mean: (R, G, B) and std: (R, G, B),
205 |     will normalize each channel of the torch.*Tensor, i.e.
206 |     channel = (channel - mean) / std
207 |     std is optional
208 |     """
209 | 
210 |     def __init__(self, mean, std=None):
211 |         self.mean = mean
212 |         self.std = std
213 | 
214 |     def __call__(self, tensor):
215 |         if self.std is None:
216 |             for t, m in zip(tensor, self.mean):
217 |                 t.sub_(m)
218 |         else:
219 |             for t, m, s in zip(tensor, self.mean, self.std):
220 |                 t.sub_(m).div_(s)
221 |         return tensor
222 | 
223 | class PILtoTensor(object):
224 |     """ puts channels in front and convert to float, except if mode palette
225 |     """
226 | 
227 |     def __init__(self):
228 |         pass
229 | 
230 |     def __call__(self, inputs):
231 |         single = False
232 |         if not isinstance(inputs, collections.Sequence):
233 |             inputs = [inputs]
234 |             single = True
235 |         res = []
236 |         for im in inputs:
237 |             if im.mode == 'P':
238 |                 dest = torch.from_numpy( np.array(im) )
239 |                 res.append( dest )
240 |             else:
241 |                 dest = torch.from_numpy( np.array(im).transpose(2, 0, 1) )
242 |                 res.append( dest.float() )
243 |         if single:
244 |             res = res[0]
245 |         return res
246 | 
247 | class TensortoPIL(object):
248 |     """ Tensors to arrays
249 |         With flat arrays: label with palette
250 |         with 3d arrays: image, put first channel in the end
251 |     """
252 | 
253 |     def __init__(self, color_map=None):
254 |         self.color_map = color_map
255 | 
256 |     def __call__(self, inputs):
257 |         single = False
258 |         if not isinstance(inputs, collections.Sequence):
259 |             inputs = [inputs]
260 |             single = True
261 |         res = []
262 |         for tens in inputs:
263 |             dest = tens.cpu().numpy()
264 |             if dest.ndim == 3:
265 |                 dest = dest.transpose(1, 2, 0).astype(np.uint8)
266 |                 dest = Image.fromarray(dest)
267 |             elif dest.ndim == 2:
268 |                 dest = dest.astype(np.uint8)
269 |                 dest = Image.fromarray(dest, "P")
270 |                 if self.color_map is not None:
271 |                     cmap = [k for l in self.color_map for k in l]
272 |                     dest.putpalette(cmap)
273 |             res.append(dest)
274 |         if single:
275 |             res = res[0]
276 |         return res
277 | 


--------------------------------------------------------------------------------
/convert.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # This script belongs to https://github.com/ethereon/caffe-tensorflow
 4 | import os
 5 | import sys
 6 | import numpy as np
 7 | import argparse
 8 | from kaffe import KaffeError, print_stderr
 9 | from kaffe.tensorflow import TensorFlowTransformer
10 | 
11 | 
12 | def fatal_error(msg):
13 |     print_stderr(msg)
14 |     exit(-1)
15 | 
16 | 
17 | def validate_arguments(args):
18 |     if (args.data_output_path is not None) and (args.caffemodel is None):
19 |         fatal_error('No input data path provided.')
20 |     if (args.caffemodel is not None) and (args.data_output_path is None):
21 |         fatal_error('No output data path provided.')
22 |     if (args.code_output_path is None) and (args.data_output_path is None):
23 |         fatal_error('No output path specified.')
24 | 
25 | 
26 | def convert(def_path, caffemodel_path, data_output_path, code_output_path, phase):
27 |     try:
28 |         transformer = TensorFlowTransformer(def_path, caffemodel_path, phase=phase)
29 |         print_stderr('Converting data...')
30 |         if caffemodel_path is not None:
31 |             data = transformer.transform_data()
32 |             print_stderr('Saving data...')
33 |             with open(data_output_path, 'wb') as data_out:
34 |                 np.save(data_out, data)
35 |         if code_output_path:
36 |             print_stderr('Saving source...')
37 |             with open(code_output_path, 'wb') as src_out:
38 |                 src_out.write(transformer.transform_source())
39 |         print_stderr('Done.')
40 |     except KaffeError as err:
41 |         fatal_error('Error encountered: {}'.format(err))
42 | 
43 | 
44 | def main():
45 |     parser = argparse.ArgumentParser()
46 |     parser.add_argument('def_path', help='Model definition (.prototxt) path')
47 |     parser.add_argument('--caffemodel', help='Model data (.caffemodel) path')
48 |     parser.add_argument('--data-output-path', help='Converted data output path')
49 |     parser.add_argument('--code-output-path', help='Save generated source to this path')
50 |     parser.add_argument('-p',
51 |                         '--phase',
52 |                         default='test',
53 |                         help='The phase to convert: test (default) or train')
54 |     args = parser.parse_args()
55 |     validate_arguments(args)
56 |     convert(args.def_path, args.caffemodel, args.data_output_path, args.code_output_path,
57 |             args.phase)
58 | 
59 | 
60 | if __name__ == '__main__':
61 |     main()
62 | 


--------------------------------------------------------------------------------
/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | # from loadvoc import load_extended_voc#, load_voc
2 | # from common import Example, SegSet, BinarizedSegSet
3 | # from utils import array_to_segmentation
4 | 
5 | 
6 | 


--------------------------------------------------------------------------------
/datasets/balanced_val.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function, division
 2 | from random import Random
 3 | from datasets.loadvoc import load_extended_voc
 4 | 
 5 | random = Random(1234)
 6 | 
 7 | train, val, test = load_extended_voc()
 8 | 
 9 | # create balanced binary datasets for experimenting
10 | 
11 | balanced = {}
12 | 
13 | for c in sorted(train.classes, key=train.classes.get):
14 |     if c != 'void':
15 |         pos = []
16 |         neg = []
17 |         for i, ex in enumerate(val):
18 |             if c in ex.classes:
19 |                 pos.append(i)
20 |             else:
21 |                 neg.append(i)
22 |         random.shuffle(neg)
23 |         balanced[c] = pos + neg[:len(pos)]
24 | 
25 | 
26 | 
27 | 


--------------------------------------------------------------------------------
/datasets/common.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, division, print_function
  2 | from scipy.misc import imread
  3 | from PIL import Image
  4 | import numpy as np
  5 | import os
  6 | 
  7 | class SegSet(object):
  8 |     # Collection of segmentation Examples
  9 |     def __init__(self, name, examples, imagespath, classes, maskspath=None):
 10 |         self.name = name
 11 |         self.imagespath = imagespath
 12 |         self.maskspath = maskspath
 13 |         self.examples = examples
 14 |         self.classes = classes
 15 |         super(SegSet, self).__init__()
 16 |     def __len__(self):
 17 |         return len(self.examples)
 18 |     def __getitem__(self, key):
 19 |         if isinstance( key, slice ) :
 20 |             if key.start == key.stop == key.step == None:
 21 |                 token = ""
 22 |             else:
 23 |                 token = "["
 24 |                 if key.start != None: token += str(key.start)
 25 |                 token += ":"
 26 |                 if key.stop != None: token += str(key.stop)
 27 |                 if key.step != None: token += ":" + str(key.step)
 28 |                 token += "]"
 29 |             return SegSet(self.name + token, self.examples[key], self.imagespath, self.classes, self.maskspath)
 30 |         if isinstance( key, list ) :
 31 |             # list of indices
 32 |             token = "[<list>]"
 33 |             return SegSet(self.name + token,
 34 |                           [self.examples[i] for i in key],
 35 |                           self.imagespath, self.classes, self.maskspath)
 36 |         elif isinstance( key, int ) :
 37 |             return self.examples[key]
 38 |         elif isinstance( key,  str ): # select a category
 39 |             token = "[" + key + "]"
 40 |             if key[0] == '~':  # select complementary
 41 |                 key = key[1:]
 42 |                 selected = [ex for ex in self.examples if key not in ex.classes]
 43 |             else:
 44 |                 selected = [ex for ex in self.examples if key in ex.classes]
 45 |             return SegSet(self.name + token, selected, self.imagespath, self.classes, self.maskspath)
 46 |         else:
 47 |             raise TypeError, "Invalid argument type."
 48 |     def __repr__(self):
 49 |         return ("<{}: collection of {} examples>".format(
 50 |                       self.name, 
 51 |                       len(self.examples))
 52 |                )
 53 |     def __add__(self, other):
 54 |         assert self.imagespath == other.imagespath
 55 |         if self.maskspath and other.maskspath:
 56 |             assert self.maskspath == other.maskspath
 57 |         maskspath = self.maskspath if self.maskspath else other.maskspath
 58 |         return SegSet(self.name + "+" + other.name,
 59 |                       self.examples + other.examples,
 60 |                       self.imagespath,
 61 |                       self.classes,
 62 |                       maskspath)
 63 |     def impath(self, example):
 64 |         example = self.examples[example] if isinstance(example, int) else example
 65 |         return os.path.join(self.imagespath, example.name + ".jpg")
 66 |     def maskpath(self, example):
 67 |         example = self.examples[example] if isinstance(example, int) else example
 68 |         return os.path.join(self.maskspath, example.name + ".png")
 69 |     def imread(self, example, kind="scipy"):
 70 |         ipath = self.impath(example)
 71 |         if kind == "scipy":
 72 |             return imread(ipath)
 73 |         im = Image.open(ipath)
 74 |         if kind == "PIL":
 75 |             return im
 76 |         if kind == "array":
 77 |             return np.array(im)
 78 |     def binarize(self, cls):
 79 |         token = ".binarize({})".format(cls)
 80 |         binarizedset = BinarizedSegSet(self.name + token, self.examples,
 81 |                                        self.imagespath, self.classes, cls, self.maskspath)
 82 |         return binarizedset
 83 |     def maskread(self, example, kind="array"):
 84 |         mpath = self.maskpath(example)
 85 |         im = Image.open(mpath)
 86 |         if kind == "PIL":
 87 |             return im
 88 |         elif kind == "array":
 89 |             return np.array(im)
 90 |         else:
 91 |             raise NotImplementedError("Unknown return kind {}".format(kind))
 92 | #        return imread(mpath)
 93 |     def read(self, example, kind="array"):
 94 |         return self.imread(example, kind), self.maskread(example, kind)
 95 |         
 96 | 
 97 | class BinarizedSegSet(SegSet):
 98 |     def __init__(self, name, examples, imagespath, classes, target, maskspath=None):
 99 |         self.target = target
100 |         super(BinarizedSegSet, self).__init__(name, examples, imagespath, classes, maskspath)
101 |     def maskread(self, example, kind="array", withvoid=True):
102 |         example = self.examples[example] if isinstance(example, int) else example
103 |         mpath = self.maskpath(example)
104 |         im = Image.open(mpath)
105 |         if self.target in example.classes:
106 |             target_idx = self.classes[self.target]
107 |             arr = np.array(im)
108 |             mask = arr == target_idx
109 |             voidmask = arr == self.classes['void']
110 |             arr[mask] = 1
111 |             arr[~mask] = 0
112 |             arr[voidmask] = self.classes['void']
113 |         else:
114 |             # return 0 labels
115 |             arr = np.array(im)
116 |             arr.fill(0)
117 |         if kind == "array":
118 |             return arr
119 |         elif kind == "PIL":
120 |             im = Image.fromarray(arr, "P")
121 |             im.putpalette([0, 0, 0, 255, 255, 255]
122 |                            + [255, 255, 178] * 253
123 |                            + [255, 178, 253])
124 |             return im
125 |         else:
126 |             raise NotImplementedError("Unknown return kind {}".format(kind))
127 |     def binarize(self, cls):
128 |         raise NotImplementedError("Already binarized to", self.target)
129 |     def __add__(self, other):
130 |         assert self.imagespath == other.imagespath
131 |         if self.maskspath and other.maskspath:
132 |             assert self.maskspath == other.maskspath
133 |         maskspath = self.maskspath if self.maskspath else other.maskspath
134 |         assert self.target == other.target
135 |         return BinarizedSegSet(self.name + "+" + other.name,
136 |                       self.examples + other.examples,
137 |                       self.imagespath,
138 |                       self.classes,
139 |                       self.target,
140 |                       maskspath)
141 | 
142 | 
143 | 
144 | class Example(object):
145 |     def __init__(self, name, source, classes=[]):
146 |         self.name = name
147 |         self.source = source
148 |         self.classes = classes
149 |     def __repr__(self):
150 |         return ("<Example {}>".format(self.name))
151 | 
152 | 


--------------------------------------------------------------------------------
/datasets/loadvoc.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # Maxim Berman, bermanmaxim@gmail.com
  3 | # Load Pascal VOC + Berkeley extended annotations datasets
  4 | 
  5 | from __future__ import absolute_import, division, print_function
  6 | import os, sys
  7 | import scipy.io
  8 | from .common import Example, SegSet
  9 | if 'ipykernel' in sys.modules:
 10 |     from tqdm import tqdm_notebook as tqdm
 11 | else:
 12 |     from tqdm import tqdm
 13 | join = os.path.join
 14 | import warnings
 15 | from skimage.io import imread, imsave
 16 | from .utils import mat2png_hariharan, convert_from_color_segmentation, pascal_classes, color_map
 17 | import numpy as np
 18 | from shutil import copyfile
 19 | from PIL import Image
 20 | import platform
 21 | 
 22 | VOC_DIR = './VOCdevkit/VOC2012'
 23 | EXTENDED_DIR = './VOCdevkit/VOC2012/berkeley'
 24 | 
 25 | VOCVER = 'voc12'
 26 | CACHE_FUSE = join(VOC_DIR, 'FusedSets') # where to store the fused image lists
 27 | #
 28 | MASKS_DIR = join(VOC_DIR, 'SegmentationClassPalette')
 29 | pascal_classes = pascal_classes()
 30 | pascal_classes_inv = {v: k for k, v in pascal_classes.items()}
 31 | 
 32 | def load_extended_voc(voc_dir=VOC_DIR, extended_dir=EXTENDED_DIR,
 33 |         masks_dir=MASKS_DIR, cache_fuse=CACHE_FUSE, vocver=VOCVER):
 34 |     """
 35 |     Fuse VOC and Berkeley annotations
 36 |     Convert annotations to common 21-class + void palette png format
 37 |     Copy labels in same folder MASKS_DIR
 38 |     Returns train/val/test lists and classes (classes present on each image)
 39 |     Caches results in cache_fuse
 40 |     """
 41 | 
 42 |     trainaugf = join(cache_fuse, 'trainaug.txt')
 43 |     valaugf = join(cache_fuse, 'valaug.txt')
 44 |     testaugf = join(cache_fuse, 'testaug.txt')
 45 |     dirsf = join(cache_fuse, 'dirs.txt')
 46 |     infof = join(cache_fuse, 'info.txt')
 47 |     if (os.path.exists(cache_fuse)
 48 |            and os.path.isfile(trainaugf) and os.path.isfile(valaugf)
 49 |            and os.path.isfile(testaugf) and os.path.isfile(dirsf)
 50 |            and os.path.isfile(infof)):
 51 |         # load from cache if files exist
 52 |         train = [l.strip() for l in open(trainaugf)]
 53 |         val = [l.strip() for l in open(valaugf)]
 54 |         test = [l.strip() for l in open(testaugf)]
 55 |         [vocjpg, masks_dir] = [l.strip() for l in open(dirsf)]
 56 |         source = {}
 57 |         classes = {}
 58 |         with open(infof) as f:
 59 |             next(f) # skip header line
 60 |             for line in f:
 61 |                 m = line.split()
 62 |                 source[m[0]] = m[1]
 63 |                 classes[m[0]] = m[2:]
 64 |         print("Loaded dataset from cache " + cache_fuse)
 65 |     else: # no cache, do the computations
 66 |         # fuse image lists
 67 |         vocsets = join(voc_dir, 'ImageSets', 'Segmentation')
 68 |         augmented_root = join(extended_dir, 'benchmark_RELEASE', 'dataset')
 69 | 
 70 |         voctrainF = join(vocsets, 'train.txt')
 71 |         vocvalF = join(vocsets, 'val.txt')
 72 |         voctestF = join(vocsets, 'test.txt')
 73 | 
 74 |         augtrainF = join(augmented_root, 'train.txt')
 75 |         augvalF = join(augmented_root, 'val.txt')
 76 | 
 77 |         voctrain = [l.strip() for l in open(voctrainF)]
 78 |         val = [l.strip() for l in open(vocvalF)]
 79 |         test = [l.strip() for l in open(voctestF)]
 80 |         augtrain = [l.strip() for l in open(augtrainF)]
 81 |         augval = [l.strip() for l in open(augvalF)]
 82 | 
 83 |         source = {}
 84 |         for im in augtrain + augval:
 85 |             source[im] = 'aug'
 86 |         for im in voctrain + val + test:
 87 |             source[im] = str(vocver)
 88 | 
 89 |         train = sorted(set(augtrain + voctrain + augval) - set(val) - set(test))
 90 |         print("Loaded image sets, {} train / {} val / {} test"
 91 |             .format(len(train), len(val), len(test)))
 92 | 
 93 |         # convert to common format
 94 |         vocjpg = join(voc_dir, 'JPEGImages')
 95 |         vocseg = join(voc_dir, 'SegmentationClass')
 96 |         augseg = join(augmented_root, 'cls')
 97 | 
 98 |         if not os.path.exists(masks_dir):
 99 |             os.makedirs(masks_dir)
100 | 
101 |         print('Converting masks to common format...')
102 |         classes = {}
103 |         # just copy voc labels and scan classes
104 |         for im in tqdm([im for im in train + val if source[im] != 'aug'],
105 |                         desc='VOC: copy labels...'):
106 |             srcf = join(vocseg, im+'.png')
107 |             copyfile(srcf, join(masks_dir,  im + '.png'))
108 |             array = np.array(Image.open(srcf))
109 |             clsuniques = np.unique(array)
110 |             classes[im] = [pascal_classes_inv[k] for k in clsuniques]
111 |             # src = imread(srcf)
112 |             # img = convert_from_color_segmentation(src, use_void=True)
113 |             # clsuniques = np.unique(img)
114 |             # classes[im] = [pascal_classes_inv[k] for k in clsuniques]
115 |             # with warnings.catch_warnings():
116 |             #     warnings.simplefilter("ignore")
117 |             #     imsave(join(masks_dir,  im + '.png'), img)
118 | 
119 |         # MAT labels to png
120 |         cmap = color_map(255)
121 |         flat_cmap = [i for l in cmap for i in l]
122 |         for im in tqdm([im for im in train + val if source[im] == 'aug'],
123 |                         desc='AUG: MAT to 1D PNG...'):
124 |             srcf = join(augseg, im+'.mat')
125 |             img = mat2png_hariharan(srcf)
126 |             clsuniques = np.unique(img)
127 |             classes[im] = [pascal_classes_inv[k] for k in clsuniques]
128 |             newimg = Image.fromarray(img, mode="P")
129 |             newimg.putpalette(flat_cmap)
130 |             newimg.save(join(masks_dir,  im + '.png'))
131 |             # with warnings.catch_warnings():
132 |             #     warnings.simplefilter("ignore")
133 |             #     imsave(join(masks_dir,  im + '.png'), img)
134 | 
135 |         # memoize everything
136 |         if not os.path.exists(cache_fuse):
137 |             os.makedirs(cache_fuse)
138 |         open(join(cache_fuse, trainaugf), 'w').write("\n".join(train))
139 |         open(join(cache_fuse, valaugf), 'w').write("\n".join(val))
140 |         open(join(cache_fuse, testaugf), 'w').write("\n".join(test))
141 |         dirs = [vocjpg, masks_dir]
142 |         open(join(cache_fuse, dirsf), 'w').write("\n".join(dirs))
143 |         with open(infof, 'w') as f:
144 |             f.write('\t'.join(['name', 'source', 'classes...']) + '\n')
145 |             for im in train + val:
146 |                 f.write(im + '\t' + source[im] + '\t' + '\t'.join(classes[im]) + '\n')
147 |         print("Saved cache in " + cache_fuse)
148 | 
149 |     train = SegSet('AugVocTrain',
150 |                    [Example(im, source[im], classes[im]) for im in train],
151 |                    vocjpg,
152 |                    pascal_classes,
153 |                    masks_dir,
154 |                   )
155 |     val = SegSet('AugVocVal',
156 |                  [Example(im, source[im], classes[im]) for im in val],
157 |                  vocjpg,
158 |                  pascal_classes,
159 |                  masks_dir,
160 |                 )
161 |     test = SegSet('AugVocTest',
162 |                   [Example(im, vocver) for im in test],
163 |                   vocjpg,
164 |                   pascal_classes,
165 |                  )
166 |     return train, val, test
167 | 
168 | 
169 | if __name__ == '__main__':
170 |     train, val, test = load_extended_voc()
171 | 
172 | 
173 | 


--------------------------------------------------------------------------------
/datasets/utils.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # Martin Kersner, m.kersner@gmail.com
  3 | # 2016/03/11
  4 | #** Maxim Berman ** modified from https://github.com/martinkersner/train-DeepLab/
  5 | 
  6 | import scipy.io
  7 | import struct
  8 | import numpy as np
  9 | from PIL import Image
 10 | 
 11 | import sys
 12 | 
 13 | if 'ipykernel' in sys.modules:
 14 |     from tqdm import tqdm_notebook as tqdm
 15 | else:
 16 |     from tqdm import tqdm
 17 | from concurrent.futures import ProcessPoolExecutor, as_completed
 18 | 
 19 | def pascal_classes(with_background=True, with_void=True, reverse=False):
 20 |   classes = {'aeroplane' : 1,  'bicycle'   : 2,  'bird'        : 3,  'boat'         : 4,
 21 |              'bottle'    : 5,  'bus'       : 6,  'car'         : 7,  'cat'          : 8,
 22 |              'chair'     : 9,  'cow'       : 10, 'diningtable' : 11, 'dog'          : 12,
 23 |              'horse'     : 13, 'motorbike' : 14, 'person'      : 15, 'potted-plant' : 16,
 24 |              'sheep'     : 17, 'sofa'      : 18, 'train'       : 19, 'tv/monitor'   : 20}
 25 |   if with_background: classes['background'] = 0
 26 |   if with_void: classes['void'] = 255
 27 |   if reverse:
 28 |     return {v: k for k, v in classes.iteritems()}
 29 |   return classes
 30 | 
 31 | def pascal_palette(void=False):
 32 |   palette = {(  0,   0,   0) : 0 ,
 33 |              (128,   0,   0) : 1 ,
 34 |              (  0, 128,   0) : 2 ,
 35 |              (128, 128,   0) : 3 ,
 36 |              (  0,   0, 128) : 4 ,
 37 |              (128,   0, 128) : 5 ,
 38 |              (  0, 128, 128) : 6 ,
 39 |              (128, 128, 128) : 7 ,
 40 |              ( 64,   0,   0) : 8 ,
 41 |              (192,   0,   0) : 9 ,
 42 |              ( 64, 128,   0) : 10,
 43 |              (192, 128,   0) : 11,
 44 |              ( 64,   0, 128) : 12,
 45 |              (192,   0, 128) : 13,
 46 |              ( 64, 128, 128) : 14,
 47 |              (192, 128, 128) : 15,
 48 |              (  0,  64,   0) : 16,
 49 |              (128,  64,   0) : 17,
 50 |              (  0, 192,   0) : 18,
 51 |              (128, 192,   0) : 19,
 52 |              (  0,  64, 128) : 20 }
 53 |   if void:
 54 |     palette[(  224,  224, 192)] = 255
 55 | 
 56 |   return palette
 57 | 
 58 | def array_to_segmentation(array):
 59 |   array = array.astype(np.uint8)
 60 |   lab = Image.fromarray(array, "P")
 61 |   cmap = [k for l in color_map() for k in l]
 62 |   lab.putpalette(cmap)
 63 |   return lab
 64 | 
 65 | def pascal_palette_invert():
 66 |   palette_list = pascal_palette().keys()
 67 |   palette = ()
 68 |   
 69 |   for color in palette_list:
 70 |     palette += color
 71 | 
 72 |   return palette
 73 | 
 74 | def color_map(N=256, normalized=False):
 75 |     def bitget(byteval, idx):
 76 |         return ((byteval & (1 << idx)) != 0)
 77 | 
 78 |     dtype = 'float32' if normalized else 'uint8'
 79 |     cmap = np.zeros((N, 3), dtype=dtype)
 80 |     for i in range(N):
 81 |         r = g = b = 0
 82 |         c = i
 83 |         for j in range(8):
 84 |             r = r | (bitget(c, 0) << 7-j)
 85 |             g = g | (bitget(c, 1) << 7-j)
 86 |             b = b | (bitget(c, 2) << 7-j)
 87 |             c = c >> 3
 88 | 
 89 |         cmap[i] = np.array([r, g, b])
 90 | 
 91 |     cmap = cmap/255 if normalized else cmap
 92 |     return cmap
 93 | 
 94 | def pascal_mean_values():
 95 |   return np.array([103.939, 116.779, 123.68], dtype=np.float32)
 96 | 
 97 | def strstr(str1, str2):
 98 |   if str1.find(str2) != -1:
 99 |     return True
100 |   else:
101 |     return False
102 | 
103 | # Mat to png conversion for http://www.cs.berkeley.edu/~bharath2/codes/SBD/download.html
104 | # 'GTcls' key is for class segmentation
105 | # 'GTinst' key is for instance segmentation
106 | def mat2png_hariharan(mat_file, key='GTcls'):
107 |   mat = scipy.io.loadmat(mat_file, mat_dtype=True, squeeze_me=True, struct_as_record=False)
108 |   return mat[key].Segmentation
109 | 
110 | def convert_segmentation_mat2numpy(mat_file):
111 |   np_segm = load_mat(mat_file)
112 |   return np.rot90(np.fliplr(np.argmax(np_segm, axis=2)))
113 | 
114 | def load_mat(mat_file, key='data'):
115 |   mat = scipy.io.loadmat(mat_file, mat_dtype=True, squeeze_me=True, struct_as_record=False)
116 |   return mat[key]
117 | 
118 | # Python version of script in code/densecrf/my_script/LoadBinFile.m
119 | def load_binary_segmentation(bin_file, dtype='int16'):
120 |   with open(bin_file, 'rb') as bf:
121 |     rows = struct.unpack('i', bf.read(4))[0]
122 |     cols = struct.unpack('i', bf.read(4))[0]
123 |     channels = struct.unpack('i', bf.read(4))[0]
124 | 
125 |     num_values = rows * cols # expect only one channel in segmentation output
126 |     out = np.zeros(num_values, dtype=np.uint8) # expect only values between 0 and 255
127 | 
128 |     for i in range(num_values):
129 |       out[i] = np.uint8(struct.unpack('h', bf.read(2))[0])
130 | 
131 |     return np.rot90(np.fliplr(out.reshape((cols, rows))))
132 | 
133 | def convert_from_color_segmentation(arr_3d, use_void=False):
134 |   arr_2d = np.zeros((arr_3d.shape[0], arr_3d.shape[1]), dtype=np.uint8)
135 |   palette = pascal_palette(use_void)
136 |     
137 |   for c, i in palette.items():
138 |     m = np.all(arr_3d == np.array(c).reshape(1, 1, 3), axis=2)
139 |     arr_2d[m] = i
140 | 
141 |   return arr_2d
142 | 
143 | def create_lut(class_ids, max_id=256):
144 |   # Index 0 is the first index used in caffe for denoting labels.
145 |   # Therefore, index 0 is considered as default.
146 |   lut = np.zeros(max_id, dtype=np.uint8)
147 | 
148 |   new_index = 1
149 |   for i in class_ids:
150 |     lut[i] = new_index
151 |     new_index += 1
152 | 
153 |   return lut
154 | 
155 | def get_id_classes(classes):
156 |   all_classes = pascal_classes()
157 |   id_classes = [all_classes[c] for c in classes]
158 |   return id_classes
159 | 
160 | def parallel_process(array, function, n_jobs=8, use_kwargs=False, front_num=3):
161 |     """
162 |         A parallel version of the map function with a progress bar. 
163 | 
164 |         Args:
165 |             array (array-like): An array to iterate over.
166 |             function (function): A python function to apply to the elements of array
167 |             n_jobs (int, default=16): The number of cores to use
168 |             use_kwargs (boolean, default=False): Whether to consider the elements of array as dictionaries of 
169 |                 keyword arguments to function 
170 |             front_num (int, default=3): The number of iterations to run serially before kicking off the parallel job. 
171 |                 Useful for catching bugs
172 |         Returns:
173 |             [function(array[0]), function(array[1]), ...]
174 |     """
175 |     #We run the first few iterations serially to catch bugs
176 |     if front_num > 0:
177 |         front = [function(**a) if use_kwargs else function(a) for a in array[:front_num]]
178 |     #If we set n_jobs to 1, just run a list comprehension. This is useful for benchmarking and debugging.
179 |     if n_jobs==1:
180 |         return front + [function(**a) if use_kwargs else function(a) for a in tqdm(array[front_num:])]
181 |     #Assemble the workers
182 |     with ProcessPoolExecutor(max_workers=n_jobs) as pool:
183 |         #Pass the elements of array into function
184 |         if use_kwargs:
185 |             futures = [pool.submit(function, **a) for a in array[front_num:]]
186 |         else:
187 |             futures = [pool.submit(function, a) for a in array[front_num:]]
188 |         kwargs = {
189 |             'total': len(futures),
190 |             'unit': 'it',
191 |             'unit_scale': True,
192 |             'leave': True,
193 |             'smoothing': 0.1,
194 |         }
195 |         #Print out the progress as tasks complete
196 |         for f in tqdm(as_completed(futures), **kwargs):
197 |             pass
198 |     out = []
199 |     #Get the results from the futures. 
200 |     for i, future in tqdm(enumerate(futures)):
201 |         try:
202 |             out.append(future.result())
203 |         except Exception as e:
204 |             out.append(e)
205 |     return front + out


--------------------------------------------------------------------------------
/deeplab_resnet/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bermanmaxim/jaccardSegment/d6cb4036805911a7cff80b6ab8eab7b4e54f3a7a/deeplab_resnet/__init__.py


--------------------------------------------------------------------------------
/deeplab_resnet/model.py:
--------------------------------------------------------------------------------
  1 | # Converted to TensorFlow .caffemodel
  2 | # with the DeepLab-ResNet configuration.
  3 | # The batch normalisation layer is provided by
  4 | # the slim library (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/slim).
  5 | 
  6 | from kaffe.tensorflow import Network
  7 | import tensorflow as tf
  8 | 
  9 | class DeepLabResNetModel(Network):
 10 |     def setup(self, is_training):
 11 |         '''Network definition.
 12 |         
 13 |         Args:
 14 |           is_training: whether to update the running mean and variance of the batch normalisation layer.
 15 |                        If the batch size is small, it is better to keep the running mean and variance of 
 16 |                        the-pretrained model frozen.
 17 |         '''
 18 |         (self.feed('data')
 19 |              .conv(7, 7, 64, 2, 2, biased=False, relu=False, name='conv1')
 20 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn_conv1')
 21 |              .max_pool(3, 3, 2, 2, name='pool1')
 22 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res2a_branch1')
 23 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn2a_branch1'))
 24 | 
 25 |         (self.feed('pool1')
 26 |              .conv(1, 1, 64, 1, 1, biased=False, relu=False, name='res2a_branch2a')
 27 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn2a_branch2a')
 28 |              .conv(3, 3, 64, 1, 1, biased=False, relu=False, name='res2a_branch2b')
 29 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn2a_branch2b')
 30 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res2a_branch2c')
 31 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn2a_branch2c'))
 32 | 
 33 |         (self.feed('bn2a_branch1', 
 34 |                    'bn2a_branch2c')
 35 |              .add(name='res2a')
 36 |              .relu(name='res2a_relu')
 37 |              .conv(1, 1, 64, 1, 1, biased=False, relu=False, name='res2b_branch2a')
 38 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn2b_branch2a')
 39 |              .conv(3, 3, 64, 1, 1, biased=False, relu=False, name='res2b_branch2b')
 40 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn2b_branch2b')
 41 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res2b_branch2c')
 42 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn2b_branch2c'))
 43 | 
 44 |         (self.feed('res2a_relu', 
 45 |                    'bn2b_branch2c')
 46 |              .add(name='res2b')
 47 |              .relu(name='res2b_relu')
 48 |              .conv(1, 1, 64, 1, 1, biased=False, relu=False, name='res2c_branch2a')
 49 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn2c_branch2a')
 50 |              .conv(3, 3, 64, 1, 1, biased=False, relu=False, name='res2c_branch2b')
 51 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn2c_branch2b')
 52 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res2c_branch2c')
 53 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn2c_branch2c'))
 54 | 
 55 |         (self.feed('res2b_relu', 
 56 |                    'bn2c_branch2c')
 57 |              .add(name='res2c')
 58 |              .relu(name='res2c_relu')
 59 |              .conv(1, 1, 512, 2, 2, biased=False, relu=False, name='res3a_branch1')
 60 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn3a_branch1'))
 61 | 
 62 |         (self.feed('res2c_relu')
 63 |              .conv(1, 1, 128, 2, 2, biased=False, relu=False, name='res3a_branch2a')
 64 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn3a_branch2a')
 65 |              .conv(3, 3, 128, 1, 1, biased=False, relu=False, name='res3a_branch2b')
 66 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn3a_branch2b')
 67 |              .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res3a_branch2c')
 68 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn3a_branch2c'))
 69 | 
 70 |         (self.feed('bn3a_branch1', 
 71 |                    'bn3a_branch2c')
 72 |              .add(name='res3a')
 73 |              .relu(name='res3a_relu')
 74 |              .conv(1, 1, 128, 1, 1, biased=False, relu=False, name='res3b1_branch2a')
 75 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn3b1_branch2a')
 76 |              .conv(3, 3, 128, 1, 1, biased=False, relu=False, name='res3b1_branch2b')
 77 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn3b1_branch2b')
 78 |              .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res3b1_branch2c')
 79 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn3b1_branch2c'))
 80 | 
 81 |         (self.feed('res3a_relu', 
 82 |                    'bn3b1_branch2c')
 83 |              .add(name='res3b1')
 84 |              .relu(name='res3b1_relu')
 85 |              .conv(1, 1, 128, 1, 1, biased=False, relu=False, name='res3b2_branch2a')
 86 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn3b2_branch2a')
 87 |              .conv(3, 3, 128, 1, 1, biased=False, relu=False, name='res3b2_branch2b')
 88 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn3b2_branch2b')
 89 |              .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res3b2_branch2c')
 90 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn3b2_branch2c'))
 91 | 
 92 |         (self.feed('res3b1_relu', 
 93 |                    'bn3b2_branch2c')
 94 |              .add(name='res3b2')
 95 |              .relu(name='res3b2_relu')
 96 |              .conv(1, 1, 128, 1, 1, biased=False, relu=False, name='res3b3_branch2a')
 97 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn3b3_branch2a')
 98 |              .conv(3, 3, 128, 1, 1, biased=False, relu=False, name='res3b3_branch2b')
 99 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn3b3_branch2b')
100 |              .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res3b3_branch2c')
101 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn3b3_branch2c'))
102 | 
103 |         (self.feed('res3b2_relu', 
104 |                    'bn3b3_branch2c')
105 |              .add(name='res3b3')
106 |              .relu(name='res3b3_relu')
107 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4a_branch1')
108 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4a_branch1'))
109 | 
110 |         (self.feed('res3b3_relu')
111 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4a_branch2a')
112 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4a_branch2a')
113 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4a_branch2b')
114 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4a_branch2b')
115 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4a_branch2c')
116 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4a_branch2c'))
117 | 
118 |         (self.feed('bn4a_branch1', 
119 |                    'bn4a_branch2c')
120 |              .add(name='res4a')
121 |              .relu(name='res4a_relu')
122 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b1_branch2a')
123 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b1_branch2a')
124 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b1_branch2b')
125 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b1_branch2b')
126 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b1_branch2c')
127 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b1_branch2c'))
128 | 
129 |         (self.feed('res4a_relu', 
130 |                    'bn4b1_branch2c')
131 |              .add(name='res4b1')
132 |              .relu(name='res4b1_relu')
133 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b2_branch2a')
134 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b2_branch2a')
135 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b2_branch2b')
136 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b2_branch2b')
137 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b2_branch2c')
138 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b2_branch2c'))
139 | 
140 |         (self.feed('res4b1_relu', 
141 |                    'bn4b2_branch2c')
142 |              .add(name='res4b2')
143 |              .relu(name='res4b2_relu')
144 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b3_branch2a')
145 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b3_branch2a')
146 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b3_branch2b')
147 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b3_branch2b')
148 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b3_branch2c')
149 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b3_branch2c'))
150 | 
151 |         (self.feed('res4b2_relu', 
152 |                    'bn4b3_branch2c')
153 |              .add(name='res4b3')
154 |              .relu(name='res4b3_relu')
155 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b4_branch2a')
156 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b4_branch2a')
157 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b4_branch2b')
158 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b4_branch2b')
159 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b4_branch2c')
160 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b4_branch2c'))
161 | 
162 |         (self.feed('res4b3_relu', 
163 |                    'bn4b4_branch2c')
164 |              .add(name='res4b4')
165 |              .relu(name='res4b4_relu')
166 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b5_branch2a')
167 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b5_branch2a')
168 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b5_branch2b')
169 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b5_branch2b')
170 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b5_branch2c')
171 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b5_branch2c'))
172 | 
173 |         (self.feed('res4b4_relu', 
174 |                    'bn4b5_branch2c')
175 |              .add(name='res4b5')
176 |              .relu(name='res4b5_relu')
177 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b6_branch2a')
178 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b6_branch2a')
179 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b6_branch2b')
180 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b6_branch2b')
181 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b6_branch2c')
182 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b6_branch2c'))
183 | 
184 |         (self.feed('res4b5_relu', 
185 |                    'bn4b6_branch2c')
186 |              .add(name='res4b6')
187 |              .relu(name='res4b6_relu')
188 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b7_branch2a')
189 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b7_branch2a')
190 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b7_branch2b')
191 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b7_branch2b')
192 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b7_branch2c')
193 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b7_branch2c'))
194 | 
195 |         (self.feed('res4b6_relu', 
196 |                    'bn4b7_branch2c')
197 |              .add(name='res4b7')
198 |              .relu(name='res4b7_relu')
199 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b8_branch2a')
200 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b8_branch2a')
201 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b8_branch2b')
202 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b8_branch2b')
203 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b8_branch2c')
204 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b8_branch2c'))
205 | 
206 |         (self.feed('res4b7_relu', 
207 |                    'bn4b8_branch2c')
208 |              .add(name='res4b8')
209 |              .relu(name='res4b8_relu')
210 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b9_branch2a')
211 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b9_branch2a')
212 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b9_branch2b')
213 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b9_branch2b')
214 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b9_branch2c')
215 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b9_branch2c'))
216 | 
217 |         (self.feed('res4b8_relu', 
218 |                    'bn4b9_branch2c')
219 |              .add(name='res4b9')
220 |              .relu(name='res4b9_relu')
221 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b10_branch2a')
222 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b10_branch2a')
223 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b10_branch2b')
224 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b10_branch2b')
225 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b10_branch2c')
226 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b10_branch2c'))
227 | 
228 |         (self.feed('res4b9_relu', 
229 |                    'bn4b10_branch2c')
230 |              .add(name='res4b10')
231 |              .relu(name='res4b10_relu')
232 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b11_branch2a')
233 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b11_branch2a')
234 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b11_branch2b')
235 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b11_branch2b')
236 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b11_branch2c')
237 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b11_branch2c'))
238 | 
239 |         (self.feed('res4b10_relu', 
240 |                    'bn4b11_branch2c')
241 |              .add(name='res4b11')
242 |              .relu(name='res4b11_relu')
243 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b12_branch2a')
244 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b12_branch2a')
245 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b12_branch2b')
246 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b12_branch2b')
247 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b12_branch2c')
248 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b12_branch2c'))
249 | 
250 |         (self.feed('res4b11_relu', 
251 |                    'bn4b12_branch2c')
252 |              .add(name='res4b12')
253 |              .relu(name='res4b12_relu')
254 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b13_branch2a')
255 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b13_branch2a')
256 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b13_branch2b')
257 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b13_branch2b')
258 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b13_branch2c')
259 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b13_branch2c'))
260 | 
261 |         (self.feed('res4b12_relu', 
262 |                    'bn4b13_branch2c')
263 |              .add(name='res4b13')
264 |              .relu(name='res4b13_relu')
265 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b14_branch2a')
266 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b14_branch2a')
267 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b14_branch2b')
268 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b14_branch2b')
269 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b14_branch2c')
270 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b14_branch2c'))
271 | 
272 |         (self.feed('res4b13_relu', 
273 |                    'bn4b14_branch2c')
274 |              .add(name='res4b14')
275 |              .relu(name='res4b14_relu')
276 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b15_branch2a')
277 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b15_branch2a')
278 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b15_branch2b')
279 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b15_branch2b')
280 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b15_branch2c')
281 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b15_branch2c'))
282 | 
283 |         (self.feed('res4b14_relu', 
284 |                    'bn4b15_branch2c')
285 |              .add(name='res4b15')
286 |              .relu(name='res4b15_relu')
287 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b16_branch2a')
288 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b16_branch2a')
289 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b16_branch2b')
290 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b16_branch2b')
291 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b16_branch2c')
292 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b16_branch2c'))
293 | 
294 |         (self.feed('res4b15_relu', 
295 |                    'bn4b16_branch2c')
296 |              .add(name='res4b16')
297 |              .relu(name='res4b16_relu')
298 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b17_branch2a')
299 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b17_branch2a')
300 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b17_branch2b')
301 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b17_branch2b')
302 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b17_branch2c')
303 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b17_branch2c'))
304 | 
305 |         (self.feed('res4b16_relu', 
306 |                    'bn4b17_branch2c')
307 |              .add(name='res4b17')
308 |              .relu(name='res4b17_relu')
309 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b18_branch2a')
310 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b18_branch2a')
311 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b18_branch2b')
312 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b18_branch2b')
313 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b18_branch2c')
314 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b18_branch2c'))
315 | 
316 |         (self.feed('res4b17_relu', 
317 |                    'bn4b18_branch2c')
318 |              .add(name='res4b18')
319 |              .relu(name='res4b18_relu')
320 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b19_branch2a')
321 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b19_branch2a')
322 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b19_branch2b')
323 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b19_branch2b')
324 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b19_branch2c')
325 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b19_branch2c'))
326 | 
327 |         (self.feed('res4b18_relu', 
328 |                    'bn4b19_branch2c')
329 |              .add(name='res4b19')
330 |              .relu(name='res4b19_relu')
331 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b20_branch2a')
332 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b20_branch2a')
333 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b20_branch2b')
334 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b20_branch2b')
335 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b20_branch2c')
336 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b20_branch2c'))
337 | 
338 |         (self.feed('res4b19_relu', 
339 |                    'bn4b20_branch2c')
340 |              .add(name='res4b20')
341 |              .relu(name='res4b20_relu')
342 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b21_branch2a')
343 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b21_branch2a')
344 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b21_branch2b')
345 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b21_branch2b')
346 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b21_branch2c')
347 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b21_branch2c'))
348 | 
349 |         (self.feed('res4b20_relu', 
350 |                    'bn4b21_branch2c')
351 |              .add(name='res4b21')
352 |              .relu(name='res4b21_relu')
353 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b22_branch2a')
354 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b22_branch2a')
355 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b22_branch2b')
356 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b22_branch2b')
357 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b22_branch2c')
358 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b22_branch2c'))
359 | 
360 |         (self.feed('res4b21_relu', 
361 |                    'bn4b22_branch2c')
362 |              .add(name='res4b22')
363 |              .relu(name='res4b22_relu')
364 |              .conv(1, 1, 2048, 1, 1, biased=False, relu=False, name='res5a_branch1')
365 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn5a_branch1'))
366 | 
367 |         (self.feed('res4b22_relu')
368 |              .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res5a_branch2a')
369 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn5a_branch2a')
370 |              .atrous_conv(3, 3, 512, 4, padding='SAME', biased=False, relu=False, name='res5a_branch2b')
371 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn5a_branch2b')
372 |              .conv(1, 1, 2048, 1, 1, biased=False, relu=False, name='res5a_branch2c')
373 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn5a_branch2c'))
374 | 
375 |         (self.feed('bn5a_branch1', 
376 |                    'bn5a_branch2c')
377 |              .add(name='res5a')
378 |              .relu(name='res5a_relu')
379 |              .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res5b_branch2a')
380 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn5b_branch2a')
381 |              .atrous_conv(3, 3, 512, 4, padding='SAME', biased=False, relu=False, name='res5b_branch2b')
382 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn5b_branch2b')
383 |              .conv(1, 1, 2048, 1, 1, biased=False, relu=False, name='res5b_branch2c')
384 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn5b_branch2c'))
385 | 
386 |         (self.feed('res5a_relu', 
387 |                    'bn5b_branch2c')
388 |              .add(name='res5b')
389 |              .relu(name='res5b_relu')
390 |              .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res5c_branch2a')
391 |              .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn5c_branch2a')
392 |              .atrous_conv(3, 3, 512, 4, padding='SAME', biased=False, relu=False, name='res5c_branch2b')
393 |              .batch_normalization(activation_fn=tf.nn.relu, name='bn5c_branch2b', is_training=is_training)
394 |              .conv(1, 1, 2048, 1, 1, biased=False, relu=False, name='res5c_branch2c')
395 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn5c_branch2c'))
396 | 
397 |         (self.feed('res5b_relu', 
398 |                    'bn5c_branch2c')
399 |              .add(name='res5c')
400 |              .relu(name='res5c_relu')
401 |              .atrous_conv(3, 3, 21, 6, padding='SAME', relu=False, name='fc1_voc12_c0'))
402 | 
403 |         (self.feed('res5c_relu')
404 |              .atrous_conv(3, 3, 21, 12, padding='SAME', relu=False, name='fc1_voc12_c1'))
405 | 
406 |         (self.feed('res5c_relu')
407 |              .atrous_conv(3, 3, 21, 18, padding='SAME', relu=False, name='fc1_voc12_c2'))
408 | 
409 |         (self.feed('res5c_relu')
410 |              .atrous_conv(3, 3, 21, 24, padding='SAME', relu=False, name='fc1_voc12_c3'))
411 | 
412 |         (self.feed('fc1_voc12_c0', 
413 |                    'fc1_voc12_c1', 
414 |                    'fc1_voc12_c2', 
415 |                    'fc1_voc12_c3')
416 |              .add(name='fc1_voc12'))
417 | 


--------------------------------------------------------------------------------
/deeplab_resnet/model_pytorch.py:
--------------------------------------------------------------------------------
  1 | # Converted by kaffe to tensorflow
  2 | # removed tf references
  3 | # for loading in pytorch.
  4 | 
  5 | # from kaffe.tensorflow import Network
  6 | 
  7 | from network_pytorch import Network
  8 | 
  9 | class DeepLabResNetModel(Network):
 10 |     def setup(self, is_training):
 11 |         '''Network definition.
 12 |         
 13 |         Args:
 14 |           is_training: whether to update the running mean and variance of the batch normalisation layer.
 15 |                        If the batch size is small, it is better to keep the running mean and variance of 
 16 |                        the-pretrained model frozen.
 17 |         '''
 18 |         (self.feed('data')
 19 |              .conv(7, 7, 64, 2, 2, biased=False, relu=False, name='conv1')
 20 |              .batch_normalization(is_training=is_training, activation_fn='relu', name='bn_conv1')
 21 |              .max_pool(3, 3, 2, 2, name='pool1')
 22 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res2a_branch1')
 23 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn2a_branch1'))
 24 | 
 25 |         (self.feed('pool1')
 26 |              .conv(1, 1, 64, 1, 1, biased=False, relu=False, name='res2a_branch2a')
 27 |              .batch_normalization(is_training=is_training, activation_fn='relu', name='bn2a_branch2a')
 28 |              .conv(3, 3, 64, 1, 1, biased=False, relu=False, name='res2a_branch2b')
 29 |              .batch_normalization(is_training=is_training, activation_fn='relu', name='bn2a_branch2b')
 30 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res2a_branch2c')
 31 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn2a_branch2c'))
 32 | 
 33 |         (self.feed('bn2a_branch1', 
 34 |                    'bn2a_branch2c')
 35 |              .add(name='res2a')
 36 |              .relu(name='res2a_relu')
 37 |              .conv(1, 1, 64, 1, 1, biased=False, relu=False, name='res2b_branch2a')
 38 |              .batch_normalization(is_training=is_training, activation_fn='relu', name='bn2b_branch2a')
 39 |              .conv(3, 3, 64, 1, 1, biased=False, relu=False, name='res2b_branch2b')
 40 |              .batch_normalization(is_training=is_training, activation_fn='relu', name='bn2b_branch2b')
 41 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res2b_branch2c')
 42 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn2b_branch2c'))
 43 | 
 44 |         (self.feed('res2a_relu', 
 45 |                    'bn2b_branch2c')
 46 |              .add(name='res2b')
 47 |              .relu(name='res2b_relu')
 48 |              .conv(1, 1, 64, 1, 1, biased=False, relu=False, name='res2c_branch2a')
 49 |              .batch_normalization(is_training=is_training, activation_fn='relu', name='bn2c_branch2a')
 50 |              .conv(3, 3, 64, 1, 1, biased=False, relu=False, name='res2c_branch2b')
 51 |              .batch_normalization(is_training=is_training, activation_fn='relu', name='bn2c_branch2b')
 52 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res2c_branch2c')
 53 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn2c_branch2c'))
 54 | 
 55 |         (self.feed('res2b_relu', 
 56 |                    'bn2c_branch2c')
 57 |              .add(name='res2c')
 58 |              .relu(name='res2c_relu')
 59 |              .conv(1, 1, 512, 2, 2, biased=False, relu=False, name='res3a_branch1')
 60 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn3a_branch1'))
 61 | 
 62 |         (self.feed('res2c_relu')
 63 |              .conv(1, 1, 128, 2, 2, biased=False, relu=False, name='res3a_branch2a')
 64 |              .batch_normalization(is_training=is_training, activation_fn='relu', name='bn3a_branch2a')
 65 |              .conv(3, 3, 128, 1, 1, biased=False, relu=False, name='res3a_branch2b')
 66 |              .batch_normalization(is_training=is_training, activation_fn='relu', name='bn3a_branch2b')
 67 |              .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res3a_branch2c')
 68 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn3a_branch2c'))
 69 | 
 70 |         (self.feed('bn3a_branch1', 
 71 |                    'bn3a_branch2c')
 72 |              .add(name='res3a')
 73 |              .relu(name='res3a_relu')
 74 |              .conv(1, 1, 128, 1, 1, biased=False, relu=False, name='res3b1_branch2a')
 75 |              .batch_normalization(is_training=is_training, activation_fn='relu', name='bn3b1_branch2a')
 76 |              .conv(3, 3, 128, 1, 1, biased=False, relu=False, name='res3b1_branch2b')
 77 |              .batch_normalization(is_training=is_training, activation_fn='relu', name='bn3b1_branch2b')
 78 |              .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res3b1_branch2c')
 79 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn3b1_branch2c'))
 80 | 
 81 |         (self.feed('res3a_relu', 
 82 |                    'bn3b1_branch2c')
 83 |              .add(name='res3b1')
 84 |              .relu(name='res3b1_relu')
 85 |              .conv(1, 1, 128, 1, 1, biased=False, relu=False, name='res3b2_branch2a')
 86 |              .batch_normalization(is_training=is_training, activation_fn='relu', name='bn3b2_branch2a')
 87 |              .conv(3, 3, 128, 1, 1, biased=False, relu=False, name='res3b2_branch2b')
 88 |              .batch_normalization(is_training=is_training, activation_fn='relu', name='bn3b2_branch2b')
 89 |              .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res3b2_branch2c')
 90 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn3b2_branch2c'))
 91 | 
 92 |         (self.feed('res3b1_relu', 
 93 |                    'bn3b2_branch2c')
 94 |              .add(name='res3b2')
 95 |              .relu(name='res3b2_relu')
 96 |              .conv(1, 1, 128, 1, 1, biased=False, relu=False, name='res3b3_branch2a')
 97 |              .batch_normalization(is_training=is_training, activation_fn='relu', name='bn3b3_branch2a')
 98 |              .conv(3, 3, 128, 1, 1, biased=False, relu=False, name='res3b3_branch2b')
 99 |              .batch_normalization(is_training=is_training, activation_fn='relu', name='bn3b3_branch2b')
100 |              .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res3b3_branch2c')
101 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn3b3_branch2c'))
102 | 
103 |         (self.feed('res3b2_relu', 
104 |                    'bn3b3_branch2c')
105 |              .add(name='res3b3')
106 |              .relu(name='res3b3_relu')
107 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4a_branch1')
108 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4a_branch1'))
109 | 
110 |         (self.feed('res3b3_relu')
111 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4a_branch2a')
112 |              .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4a_branch2a')
113 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4a_branch2b')
114 |              .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4a_branch2b')
115 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4a_branch2c')
116 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4a_branch2c'))
117 | 
118 |         (self.feed('bn4a_branch1', 
119 |                    'bn4a_branch2c')
120 |              .add(name='res4a')
121 |              .relu(name='res4a_relu')
122 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b1_branch2a')
123 |              .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b1_branch2a')
124 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b1_branch2b')
125 |              .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b1_branch2b')
126 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b1_branch2c')
127 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b1_branch2c'))
128 | 
129 |         (self.feed('res4a_relu', 
130 |                    'bn4b1_branch2c')
131 |              .add(name='res4b1')
132 |              .relu(name='res4b1_relu')
133 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b2_branch2a')
134 |              .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b2_branch2a')
135 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b2_branch2b')
136 |              .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b2_branch2b')
137 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b2_branch2c')
138 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b2_branch2c'))
139 | 
140 |         (self.feed('res4b1_relu', 
141 |                    'bn4b2_branch2c')
142 |              .add(name='res4b2')
143 |              .relu(name='res4b2_relu')
144 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b3_branch2a')
145 |              .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b3_branch2a')
146 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b3_branch2b')
147 |              .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b3_branch2b')
148 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b3_branch2c')
149 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b3_branch2c'))
150 | 
151 |         (self.feed('res4b2_relu', 
152 |                    'bn4b3_branch2c')
153 |              .add(name='res4b3')
154 |              .relu(name='res4b3_relu')
155 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b4_branch2a')
156 |              .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b4_branch2a')
157 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b4_branch2b')
158 |              .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b4_branch2b')
159 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b4_branch2c')
160 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b4_branch2c'))
161 | 
162 |         (self.feed('res4b3_relu', 
163 |                    'bn4b4_branch2c')
164 |              .add(name='res4b4')
165 |              .relu(name='res4b4_relu')
166 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b5_branch2a')
167 |              .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b5_branch2a')
168 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b5_branch2b')
169 |              .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b5_branch2b')
170 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b5_branch2c')
171 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b5_branch2c'))
172 | 
173 |         (self.feed('res4b4_relu', 
174 |                    'bn4b5_branch2c')
175 |              .add(name='res4b5')
176 |              .relu(name='res4b5_relu')
177 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b6_branch2a')
178 |              .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b6_branch2a')
179 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b6_branch2b')
180 |              .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b6_branch2b')
181 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b6_branch2c')
182 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b6_branch2c'))
183 | 
184 |         (self.feed('res4b5_relu', 
185 |                    'bn4b6_branch2c')
186 |              .add(name='res4b6')
187 |              .relu(name='res4b6_relu')
188 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b7_branch2a')
189 |              .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b7_branch2a')
190 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b7_branch2b')
191 |              .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b7_branch2b')
192 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b7_branch2c')
193 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b7_branch2c'))
194 | 
195 |         (self.feed('res4b6_relu', 
196 |                    'bn4b7_branch2c')
197 |              .add(name='res4b7')
198 |              .relu(name='res4b7_relu')
199 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b8_branch2a')
200 |              .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b8_branch2a')
201 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b8_branch2b')
202 |              .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b8_branch2b')
203 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b8_branch2c')
204 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b8_branch2c'))
205 | 
206 |         (self.feed('res4b7_relu', 
207 |                    'bn4b8_branch2c')
208 |              .add(name='res4b8')
209 |              .relu(name='res4b8_relu')
210 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b9_branch2a')
211 |              .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b9_branch2a')
212 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b9_branch2b')
213 |              .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b9_branch2b')
214 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b9_branch2c')
215 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b9_branch2c'))
216 | 
217 |         (self.feed('res4b8_relu', 
218 |                    'bn4b9_branch2c')
219 |              .add(name='res4b9')
220 |              .relu(name='res4b9_relu')
221 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b10_branch2a')
222 |              .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b10_branch2a')
223 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b10_branch2b')
224 |              .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b10_branch2b')
225 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b10_branch2c')
226 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b10_branch2c'))
227 | 
228 |         (self.feed('res4b9_relu', 
229 |                    'bn4b10_branch2c')
230 |              .add(name='res4b10')
231 |              .relu(name='res4b10_relu')
232 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b11_branch2a')
233 |              .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b11_branch2a')
234 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b11_branch2b')
235 |              .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b11_branch2b')
236 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b11_branch2c')
237 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b11_branch2c'))
238 | 
239 |         (self.feed('res4b10_relu', 
240 |                    'bn4b11_branch2c')
241 |              .add(name='res4b11')
242 |              .relu(name='res4b11_relu')
243 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b12_branch2a')
244 |              .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b12_branch2a')
245 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b12_branch2b')
246 |              .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b12_branch2b')
247 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b12_branch2c')
248 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b12_branch2c'))
249 | 
250 |         (self.feed('res4b11_relu', 
251 |                    'bn4b12_branch2c')
252 |              .add(name='res4b12')
253 |              .relu(name='res4b12_relu')
254 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b13_branch2a')
255 |              .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b13_branch2a')
256 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b13_branch2b')
257 |              .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b13_branch2b')
258 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b13_branch2c')
259 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b13_branch2c'))
260 | 
261 |         (self.feed('res4b12_relu', 
262 |                    'bn4b13_branch2c')
263 |              .add(name='res4b13')
264 |              .relu(name='res4b13_relu')
265 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b14_branch2a')
266 |              .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b14_branch2a')
267 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b14_branch2b')
268 |              .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b14_branch2b')
269 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b14_branch2c')
270 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b14_branch2c'))
271 | 
272 |         (self.feed('res4b13_relu', 
273 |                    'bn4b14_branch2c')
274 |              .add(name='res4b14')
275 |              .relu(name='res4b14_relu')
276 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b15_branch2a')
277 |              .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b15_branch2a')
278 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b15_branch2b')
279 |              .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b15_branch2b')
280 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b15_branch2c')
281 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b15_branch2c'))
282 | 
283 |         (self.feed('res4b14_relu', 
284 |                    'bn4b15_branch2c')
285 |              .add(name='res4b15')
286 |              .relu(name='res4b15_relu')
287 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b16_branch2a')
288 |              .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b16_branch2a')
289 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b16_branch2b')
290 |              .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b16_branch2b')
291 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b16_branch2c')
292 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b16_branch2c'))
293 | 
294 |         (self.feed('res4b15_relu', 
295 |                    'bn4b16_branch2c')
296 |              .add(name='res4b16')
297 |              .relu(name='res4b16_relu')
298 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b17_branch2a')
299 |              .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b17_branch2a')
300 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b17_branch2b')
301 |              .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b17_branch2b')
302 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b17_branch2c')
303 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b17_branch2c'))
304 | 
305 |         (self.feed('res4b16_relu', 
306 |                    'bn4b17_branch2c')
307 |              .add(name='res4b17')
308 |              .relu(name='res4b17_relu')
309 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b18_branch2a')
310 |              .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b18_branch2a')
311 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b18_branch2b')
312 |              .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b18_branch2b')
313 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b18_branch2c')
314 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b18_branch2c'))
315 | 
316 |         (self.feed('res4b17_relu', 
317 |                    'bn4b18_branch2c')
318 |              .add(name='res4b18')
319 |              .relu(name='res4b18_relu')
320 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b19_branch2a')
321 |              .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b19_branch2a')
322 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b19_branch2b')
323 |              .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b19_branch2b')
324 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b19_branch2c')
325 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b19_branch2c'))
326 | 
327 |         (self.feed('res4b18_relu', 
328 |                    'bn4b19_branch2c')
329 |              .add(name='res4b19')
330 |              .relu(name='res4b19_relu')
331 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b20_branch2a')
332 |              .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b20_branch2a')
333 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b20_branch2b')
334 |              .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b20_branch2b')
335 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b20_branch2c')
336 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b20_branch2c'))
337 | 
338 |         (self.feed('res4b19_relu', 
339 |                    'bn4b20_branch2c')
340 |              .add(name='res4b20')
341 |              .relu(name='res4b20_relu')
342 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b21_branch2a')
343 |              .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b21_branch2a')
344 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b21_branch2b')
345 |              .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b21_branch2b')
346 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b21_branch2c')
347 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b21_branch2c'))
348 | 
349 |         (self.feed('res4b20_relu', 
350 |                    'bn4b21_branch2c')
351 |              .add(name='res4b21')
352 |              .relu(name='res4b21_relu')
353 |              .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b22_branch2a')
354 |              .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b22_branch2a')
355 |              .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b22_branch2b')
356 |              .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b22_branch2b')
357 |              .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b22_branch2c')
358 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b22_branch2c'))
359 | 
360 |         (self.feed('res4b21_relu', 
361 |                    'bn4b22_branch2c')
362 |              .add(name='res4b22')
363 |              .relu(name='res4b22_relu')
364 |              .conv(1, 1, 2048, 1, 1, biased=False, relu=False, name='res5a_branch1')
365 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn5a_branch1'))
366 | 
367 |         (self.feed('res4b22_relu')
368 |              .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res5a_branch2a')
369 |              .batch_normalization(is_training=is_training, activation_fn='relu', name='bn5a_branch2a')
370 |              .atrous_conv(3, 3, 512, 4, padding='SAME', biased=False, relu=False, name='res5a_branch2b')
371 |              .batch_normalization(is_training=is_training, activation_fn='relu', name='bn5a_branch2b')
372 |              .conv(1, 1, 2048, 1, 1, biased=False, relu=False, name='res5a_branch2c')
373 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn5a_branch2c'))
374 | 
375 |         (self.feed('bn5a_branch1', 
376 |                    'bn5a_branch2c')
377 |              .add(name='res5a')
378 |              .relu(name='res5a_relu')
379 |              .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res5b_branch2a')
380 |              .batch_normalization(is_training=is_training, activation_fn='relu', name='bn5b_branch2a')
381 |              .atrous_conv(3, 3, 512, 4, padding='SAME', biased=False, relu=False, name='res5b_branch2b')
382 |              .batch_normalization(is_training=is_training, activation_fn='relu', name='bn5b_branch2b')
383 |              .conv(1, 1, 2048, 1, 1, biased=False, relu=False, name='res5b_branch2c')
384 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn5b_branch2c'))
385 | 
386 |         (self.feed('res5a_relu', 
387 |                    'bn5b_branch2c')
388 |              .add(name='res5b')
389 |              .relu(name='res5b_relu')
390 |              .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res5c_branch2a')
391 |              .batch_normalization(is_training=is_training, activation_fn='relu', name='bn5c_branch2a')
392 |              .atrous_conv(3, 3, 512, 4, padding='SAME', biased=False, relu=False, name='res5c_branch2b')
393 |              .batch_normalization(activation_fn='relu', name='bn5c_branch2b', is_training=is_training)
394 |              .conv(1, 1, 2048, 1, 1, biased=False, relu=False, name='res5c_branch2c')
395 |              .batch_normalization(is_training=is_training, activation_fn=None, name='bn5c_branch2c'))
396 | 
397 |         (self.feed('res5b_relu', 
398 |                    'bn5c_branch2c')
399 |              .add(name='res5c')
400 |              .relu(name='res5c_relu')
401 |              .atrous_conv(3, 3, 21, 6, padding='SAME', relu=False, name='fc1_voc12_c0'))
402 | 
403 |         (self.feed('res5c_relu')
404 |              .atrous_conv(3, 3, 21, 12, padding='SAME', relu=False, name='fc1_voc12_c1'))
405 | 
406 |         (self.feed('res5c_relu')
407 |              .atrous_conv(3, 3, 21, 18, padding='SAME', relu=False, name='fc1_voc12_c2'))
408 | 
409 |         (self.feed('res5c_relu')
410 |              .atrous_conv(3, 3, 21, 24, padding='SAME', relu=False, name='fc1_voc12_c3'))
411 | 
412 |         (self.feed('fc1_voc12_c0', 
413 |                    'fc1_voc12_c1', 
414 |                    'fc1_voc12_c2', 
415 |                    'fc1_voc12_c3')
416 |              .add(name='fc1_voc12'))
417 | 


--------------------------------------------------------------------------------
/deeplab_resnet/network_pytorch.py:
--------------------------------------------------------------------------------
  1 | # adapted from kaffe to load pytorch functional functions
  2 | 
  3 | 
  4 | import numpy as np
  5 | import torch
  6 | import torch.nn.functional as F
  7 | 
  8 | DEFAULT_PADDING = 'SAME'
  9 | DEBUG_SIZES = False
 10 | DEBUG_NAMES = False
 11 | 
 12 | def layer(op):
 13 |     '''Decorator for composable network layers.'''
 14 | 
 15 |     def layer_decorated(self, *args, **kwargs):
 16 |         # Automatically set a name if not provided.
 17 |         name = kwargs.setdefault('name', self.get_unique_name(op.__name__))
 18 |         # Figure out the layer inputs.
 19 |         if len(self.terminals) == 0:
 20 |             raise RuntimeError('No input variables found for layer %s.' % name)
 21 |         elif len(self.terminals) == 1:
 22 |             layer_input = self.terminals[0]
 23 |             if DEBUG_SIZES: print(layer_input.size())
 24 |         else:
 25 |             layer_input = list(self.terminals)
 26 |             if DEBUG_SIZES: print([i.size() for i in layer_input])
 27 |         if DEBUG_NAMES: print(name)
 28 |         # Perform the operation and get the output.
 29 |         layer_output = op(self, layer_input, *args, **kwargs)
 30 |         # Add to layer LUT.
 31 |         self.layers[name] = layer_output
 32 |         # This output is now the input for the next layer.
 33 |         self.feed(layer_output)
 34 |         # Return self for chained calls.
 35 |         return self
 36 | 
 37 |     return layer_decorated
 38 | 
 39 | def pad_if_needed(input, padding, kind, k_h, k_w, s_h=1, s_w=1, dilation=1):
 40 |     if padding == 'VALID':
 41 |         return input
 42 |     elif padding == 'SAME' and kind in ('conv2d', 'pool2d'):
 43 |         in_height, in_width = input.size(2), input.size(3)
 44 |         out_height = int(np.ceil(float(in_height) / float(s_h)))
 45 |         out_width  = int(np.ceil(float(in_width) / float(s_w)))
 46 | 
 47 |         pad_along_height = max((out_height - 1) * s_h + k_h - in_height, 0)
 48 |         pad_along_width = max((out_width - 1) * s_w + k_w - in_width, 0)
 49 |         pad_top = pad_along_height // 2
 50 |         pad_bottom = pad_along_height - pad_top
 51 |         pad_left = pad_along_width // 2
 52 |         pad_right = pad_along_width - pad_left
 53 |         input = F.pad(input, (pad_left, pad_right, pad_top, pad_bottom))
 54 |         return input
 55 |     elif kind in ('atrous_conv2d',):
 56 |         effective_height = k_h + (k_h - 1) * (dilation - 1)
 57 |         effective_width = k_w + (k_w - 1) * (dilation - 1)
 58 |         return pad_if_needed(input, padding, 'conv2d', effective_height, effective_width, s_h, s_w, dilation=1)
 59 |     else:
 60 |         raise NotImplementedError
 61 | 
 62 | 
 63 | 
 64 | class Network(object):
 65 | 
 66 |     def __init__(self, inputs, weights, trainable=True, is_training=False):
 67 |         # The input nodes for this network
 68 |         self.inputs = inputs
 69 |         self.weights = weights
 70 |         # The current list of terminal nodes
 71 |         self.terminals = []
 72 |         # Mapping from layer names to layers
 73 |         self.layers = dict(inputs)
 74 |         # If true, the resulting variables are set as trainable
 75 |         self.trainable = trainable
 76 |         # Switch variable for dropout
 77 |         self.use_dropout = 1.0
 78 |         self.setup(is_training)
 79 | 
 80 |     def feed(self, *args):
 81 |         '''Set the input(s) for the next operation by replacing the terminal nodes.
 82 |         The arguments can be either layer names or the actual layers.
 83 |         '''
 84 |         assert len(args) != 0
 85 |         self.terminals = []
 86 |         for fed_layer in args:
 87 |             if isinstance(fed_layer, basestring):
 88 |                 try:
 89 |                     fed_layer = self.layers[fed_layer]
 90 |                 except KeyError:
 91 |                     raise KeyError('Unknown layer name fed: %s' % fed_layer)
 92 |             self.terminals.append(fed_layer)
 93 |         return self
 94 | 
 95 |     def get_output(self):
 96 |         '''Returns the current network output.'''
 97 |         return self.terminals[-1]
 98 | 
 99 |     def get_unique_name(self, prefix):
100 |         '''Returns an index-suffixed unique name for the given prefix.
101 |         This is used for auto-generating layer names based on the type-prefix.
102 |         '''
103 |         ident = sum(t.startswith(prefix) for t, _ in self.layers.items()) + 1
104 |         return '%s_%d' % (prefix, ident)
105 | 
106 |     def validate_padding(self, padding):
107 |         '''Verifies that the padding is one of the supported ones.'''
108 |         assert padding in ('SAME', 'VALID')
109 | 
110 |     @layer
111 |     def conv(self,
112 |              input,
113 |              k_h, 
114 |              k_w, 
115 |              c_o, 
116 |              s_h,
117 |              s_w,
118 |              name,
119 |              relu=True,
120 |              padding=DEFAULT_PADDING,
121 |              group=1,
122 |              biased=True):
123 |         input = pad_if_needed(input, padding, 'conv2d', k_h, k_w, s_h, s_w)
124 | 
125 |         result = F.conv2d(input, 
126 |                           self.weights[name + '/weights'], 
127 |                           bias=self.weights[name + '/biases'] if biased else None,
128 |                           padding=0, 
129 |                           groups=group,
130 |                           stride=(s_h, s_w))
131 |         if relu:
132 |             result = F.relu(result)
133 |         return result
134 | 
135 |     @layer
136 |     def atrous_conv(self,
137 |                     input,
138 |                     k_h, 
139 |                     k_w, 
140 |                     c_o, 
141 |                     dilation,
142 |                     name,
143 |                     relu=True,
144 |                     padding=DEFAULT_PADDING,
145 |                     group=1,
146 |                     biased=True):
147 |         if group != 1:
148 |             raise NotImplementedError
149 |         input = pad_if_needed(input, padding, 'atrous_conv2d', k_h, k_w, dilation=dilation)
150 | 
151 |         result = F.conv2d(input, 
152 |                           self.weights[name + '/weights'], 
153 |                           bias=self.weights[name + '/biases'] if biased else None,
154 |                           padding=0, 
155 |                           dilation=dilation,
156 |                           groups=group,
157 |                           stride=1)
158 |         if relu:
159 |             result = F.relu(result)
160 |         return result
161 |         
162 |     @layer
163 |     def relu(self, input, name):
164 |         return F.relu(input)
165 | 
166 |     @layer
167 |     def max_pool(self, input, k_h, k_w, s_h, s_w, name, padding=DEFAULT_PADDING):
168 |         input = pad_if_needed(input, padding, 'pool2d', k_h, k_w, s_h, s_w)
169 | 
170 |         return F.max_pool2d(input,
171 |                               kernel_size=(k_h, k_w),
172 |                               stride=(s_h, s_w),
173 |                               padding=0)
174 | 
175 |     @layer
176 |     def add(self, inputs, name):
177 |         return sum(inputs)
178 |         
179 |     @layer
180 |     def batch_normalization(self, input, # other arguments are ignored
181 |                             name, is_training, activation_fn=None, scale=True, eps=0.001):
182 |         output = F.batch_norm(input, self.weights[name + '/moving_mean'], self.weights[name + '/moving_variance'],
183 |                               weight=self.weights[name + '/gamma'], bias=self.weights[name + '/beta'], eps=eps)
184 |         if activation_fn is not None:
185 |             if activation_fn == 'relu':
186 |                 output = F.relu(output)
187 |             else:
188 |                 raise NotImplementedError
189 |         return output


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | # conda requirements file
 2 | name: jaccard-segment
 3 | channels:
 4 | - soumith
 5 | - defaults
 6 | dependencies:
 7 | - python=2
 8 | - ipykernel
 9 | - matplotlib
10 | - numpy
11 | - scikit-image
12 | - scipy
13 | - cuda80
14 | - pytorch
15 | - torchvision
16 | - pyyaml
17 | - pip:
18 |   - hickle
19 |   - pillow-simd
20 |   - https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.0.1-cp27-none-linux_x86_64.whl; 'linux' in sys_platform
21 |   - tqdm
22 |   - futures
23 |   - tensorboard
24 | 
25 | 


--------------------------------------------------------------------------------
/eval_pytorch.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function, division
  2 | 
  3 | import argparse
  4 | from datetime import datetime
  5 | import os, sys
  6 | from os.path import join
  7 | import time
  8 | import re
  9 | import platform
 10 | 
 11 | import numpy as np
 12 | 
 13 | import torch
 14 | from torch import optim
 15 | from torch.autograd import Variable
 16 | import torch.utils.data as data
 17 | import torch.nn.functional as F
 18 | 
 19 | import random
 20 | # WARNING: if multiple worker threads, the seeds are useless.
 21 | random.seed(1857)
 22 | torch.manual_seed(1857)
 23 | torch.cuda.manual_seed(1857)
 24 | 
 25 | from settings import get_arguments
 26 | import datasets
 27 | from datasets.loadvoc import load_extended_voc
 28 | from compose import (JointCompose, RandomScale, Normalize,
 29 |                      RandomHorizontalFlip, RandomCropPad, PILtoTensor, Scale, TensortoPIL)
 30 | from PIL.Image import NEAREST
 31 | 
 32 | from losses import *
 33 | 
 34 | import deepdish as dd
 35 | import deeplab_resnet.model_pytorch as modelpy
 36 | from collections import defaultdict
 37 | import yaml
 38 | 
 39 | IGNORE_LABEL = 255
 40 | IMG_MEAN = np.array((104.00698793,116.66876762,122.67891434), dtype=np.float32)
 41 | 
 42 | def create_variables(weights, cuda=True):
 43 |     var = dict()
 44 |     for k, v in weights.items():
 45 |         v = torch.from_numpy(v)
 46 |         if cuda:
 47 |             v = v.cuda()
 48 |         if not (k.endswith('moving_mean') or k.endswith('moving_variance')):
 49 |             v = Variable(v)
 50 |         var[k] = v
 51 |     return var
 52 | 
 53 | def snapshot_variables(weights, dest):
 54 |     out = {}
 55 |     for (k, v) in weights.items():
 56 |         if isinstance(v, Variable):
 57 |             v = v.data
 58 |         out[k] = v.cpu().numpy()
 59 |     dd.io.save(dest, out)
 60 | 
 61 | def training_groups(weights, base_lr, multipliers=[0.1, 1.0, 1.0], train_last=-1, hybrid=False): # multipliers=[1.0, 10.0, 20.0]
 62 |     """
 63 |     get training groups and activates requires_grad for variables
 64 |     train_last: last: only train last ... layers
 65 |     hybrid: if hybrid, train all layers but set momentum to 0 on last layers
 66 |     """
 67 |     fixed = ['moving_mean', 'moving_variance', 'beta', 'gamma']
 68 |     # get training variables, with their lr
 69 |     trained = {k: v for (k, v) in weights.iteritems() if not any([k.endswith(s) for s in fixed])}
 70 |     for v in trained.values():
 71 |         v.requires_grad = True
 72 |     fc_vars = {k: v for (k, v) in trained.iteritems() if 'fc' in k}
 73 |     conv_vars = [v for (k, v) in trained.items() if 'fc' not in k] # lr * 1.0
 74 |     fc_w_vars = [v for (k, v) in fc_vars.items() if 'weights' in k] # lr * 10.0
 75 |     fc_b_vars = [v for (k, v) in fc_vars.items() if 'biases' in k]  # lr * 20.0
 76 |     assert(len(trained) == len(fc_vars) + len(conv_vars))
 77 |     assert(len(fc_vars) == len(fc_w_vars) + len(fc_b_vars))
 78 |     if train_last == -1:
 79 |         print("train all layers")
 80 |         groups = [{'params': conv_vars, 'lr': multipliers[0] * base_lr},
 81 |                   {'params': fc_w_vars, 'lr': multipliers[1] * base_lr},
 82 |                   {'params': fc_b_vars, 'lr': multipliers[2] * base_lr}]
 83 |     elif train_last == 1:
 84 |         print("train last layer only")
 85 |         for v in conv_vars:
 86 |             v.requires_grad = False
 87 |         groups = [{'params': fc_w_vars, 'lr': multipliers[1] * base_lr},
 88 |                   {'params': fc_b_vars, 'lr': multipliers[2] * base_lr}]
 89 |     return groups
 90 | 
 91 | class SegsetWrap(data.Dataset):
 92 |     def __init__(self, segset, transform=None):
 93 |         self.name = segset.name
 94 |         self.segset = segset
 95 |         self.transform = transform
 96 |     def __repr__(self):
 97 |         return "<SegSetWrap '" + self.name + "'>"
 98 |     def __getitem__(self, i):
 99 |         inputs = self.segset.read(i, kind="PIL")
100 |         if self.transform is not None:
101 |             inputs = self.transform(inputs)
102 |         return inputs
103 |     def __len__(self):
104 |         return len(self.segset)
105 | 
106 | def main(args):
107 |     
108 |     print(os.path.basename(__file__), 'arguments:')
109 |     print(yaml.dump(vars(args), default_flow_style=False))
110 | 
111 |     weights = dd.io.load(args.restore_from)
112 |     print('Loaded weights from {}'.format(args.restore_from))
113 |     weights = create_variables(weights, cuda=True)
114 |     forward = lambda input: modelpy.DeepLabResNetModel({'data': input}, weights).layers['fc1_voc12']
115 |     train, val, test = load_extended_voc()
116 |     input_size = map(int, args.input_size.split(',')) if args.input_size is not None else None
117 |     print ('========')
118 | 
119 |     if args.proximal:
120 |         assert args.jaccard
121 | 
122 |     if args.binary == -1:
123 |         print("Multiclass: loss set to cross-entropy")
124 |         lossfn, lossname = crossentropyloss, 'xloss'
125 |         otherlossfn = None
126 |     else:
127 |         print("Binary: loss set to hingeloss")
128 |         if args.jaccard:
129 |             lossfn, lossname = lovaszloss, 'lovaszloss'
130 |             otherlossfn, otherlossname = hingeloss, 'hingeloss'
131 |         elif args.softmax:
132 |             lossfn, lossname = binaryXloss, 'binxloss'
133 |             otherlossfn = None
134 |         else:
135 |             lossfn, lossname = hingeloss, 'hingeloss'
136 |             otherlossfn, otherlossname = lovaszloss, 'lovaszloss'
137 |         train, val = train.binarize(args.binary_str), val.binarize(args.binary_str)
138 | 
139 | 
140 |     # get network output size
141 |     dummy_input = torch.rand((1, 3, input_size[0], input_size[1])).cuda()
142 |     dummy_out = forward(Variable(dummy_input, volatile=True))
143 |     output_size = (dummy_out.size(2), dummy_out.size(3))
144 | 
145 |     transforms_val = JointCompose([PILtoTensor(), 
146 |                                    [Normalize(torch.from_numpy(IMG_MEAN)), None],
147 |                                   ])
148 |     invtransf_val = JointCompose([[Normalize(-torch.from_numpy(IMG_MEAN)), None],
149 |                                    TensortoPIL( datasets.utils.color_map() ), 
150 |                                  ])
151 | 
152 |     if args.sampling == 'balanced':
153 |         from datasets.balanced_val import balanced
154 |         inds = balanced[args.binary_str]
155 |         val.examples = [val[i] for i in inds]
156 |         print('Subsampled val. to balanced set of {:d} examples'.format(len(val)))
157 |     elif args.sampling == 'exclusive':
158 |         val = val[args.binary_str]
159 |         print('Subsampled val. to balanced set of {:d} examples'.format(len(val)))
160 | 
161 |     update_every = args.grad_update_every
162 |     global_batch_size = args.batch_size * update_every
163 | 
164 |     valset = SegsetWrap(val, transforms_val)
165 |     valloader = data.DataLoader(valset, 
166 |                          batch_size=1, 
167 |                          shuffle=False,
168 |                          num_workers=1, 
169 |                          pin_memory=True)
170 | 
171 |     def do_val():
172 |         valiter = iter(valloader)
173 |         stats = defaultdict(list)
174 |         # extract some images spreak evenly in the validation set
175 |         tosee = [int(0.05 * i * len(valiter)) for i in range(1, 20)]
176 |         for valstep, (inputs, labels) in enumerate(valiter):
177 |             start_time = time.time()
178 |             inputs, labels = Variable(inputs.cuda(), volatile=True), labels.cuda().long()
179 |             logits = forward(inputs)
180 |             logits = F.upsample_bilinear(logits, size=labels.size()[1:])
181 |             if args.binary == -1:
182 |                 xloss = crossentropyloss(logits, labels)
183 |                 stats['xloss'].append(xloss.data[0])
184 |                 print('[Validation {}-{:d}], xloss {:.5f} - mean {:.5f}  ({:.3f} sec/step {})'.format(
185 |                          step, valstep, xloss, np.mean(stats['xloss']), time.time() - start_time))
186 |                 # conf, pred = logits.max(1)
187 |             else:
188 |                 conf, multipred = logits.max(1)
189 |                 multipred = multipred.squeeze(1)
190 |                 multipred = (multipred == args.binary).long()
191 |                 imageiou_multi = iouloss(multipred.data.squeeze(0), labels.squeeze(0))
192 |                 stats['imageiou_multi'].append(imageiou_multi)
193 | 
194 |                 logits = logits[:, args.binary, :, :]   # select only 1 output
195 |                 pred = (logits > 0.).long()
196 | 
197 |                 # image output
198 |                 if valstep in tosee:
199 |                     inputim, inputlab = invtransf_val([inputs.data[0, :, :, :], labels[0, :, :]])
200 |                     _, predim = invtransf_val([inputs.data[0, :, :, :], pred.data[0, :, :]])
201 |                     inputim.save("imout/{}_{}in.png".format(args.nickname, valstep),"PNG")
202 |                     inputlab.save("imout/{}_{}inlab.png".format(args.nickname, valstep),"PNG")
203 |                     predim.save("imout/{}_{}out.png".format(args.nickname, valstep),"PNG")
204 | 
205 |                 imageiou = iouloss(pred.data.squeeze(0), labels.squeeze(0))
206 |                 stats['imageiou'].append(imageiou)
207 |                 hloss = hingeloss(logits, labels).data[0]
208 |                 stats['hingeloss'].append(hloss)
209 |                 jloss = lovaszloss(logits, labels).data[0]
210 |                 stats['lovaszloss'].append(jloss)
211 |                 binxloss = binaryXloss(logits, labels).data[0]
212 |                 stats['binxloss'].append(binxloss)
213 | 
214 |                 print(   'hloss {:.5f} - mean {:.5f}, '.format(hloss, np.mean(stats['hingeloss']))
215 |                        + 'lovaszloss {:.5f} - mean {:.5f}, '.format(jloss, np.mean(stats['lovaszloss']))
216 |                        + 'iou {:.5f} - mean {:.5f}, '.format(imageiou, np.mean(stats['imageiou']))
217 |                        + 'iou_multi {:.5f} - mean {:.5f}, '.format(imageiou_multi, np.mean(stats['imageiou_multi']))
218 |                      )
219 | 
220 |     do_val()
221 | 
222 | 
223 | 
224 | if __name__ == '__main__':
225 |     args = get_arguments(sys.argv[1:], 'train')
226 |     main(args)


--------------------------------------------------------------------------------
/kaffe/__init__.py:
--------------------------------------------------------------------------------
1 | from .graph import GraphBuilder, NodeMapper
2 | from .errors import KaffeError, print_stderr
3 | 
4 | from . import tensorflow
5 | 


--------------------------------------------------------------------------------
/kaffe/caffe/__init__.py:
--------------------------------------------------------------------------------
1 | from .resolver import get_caffe_resolver, has_pycaffe
2 | 


--------------------------------------------------------------------------------
/kaffe/caffe/resolver.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | SHARED_CAFFE_RESOLVER = None
 4 | 
 5 | class CaffeResolver(object):
 6 |     def __init__(self):
 7 |         self.import_caffe()
 8 | 
 9 |     def import_caffe(self):
10 |         self.caffe = None
11 |         try:
12 |             # Try to import PyCaffe first
13 |             import caffe
14 |             self.caffe = caffe
15 |         except ImportError:
16 |             # Fall back to the protobuf implementation
17 |             from . import caffepb
18 |             self.caffepb = caffepb
19 |             show_fallback_warning()
20 |         if self.caffe:
21 |             # Use the protobuf code from the imported distribution.
22 |             # This way, Caffe variants with custom layers will work.
23 |             self.caffepb = self.caffe.proto.caffe_pb2
24 |         self.NetParameter = self.caffepb.NetParameter
25 | 
26 |     def has_pycaffe(self):
27 |         return self.caffe is not None
28 | 
29 | def get_caffe_resolver():
30 |     global SHARED_CAFFE_RESOLVER
31 |     if SHARED_CAFFE_RESOLVER is None:
32 |         SHARED_CAFFE_RESOLVER = CaffeResolver()
33 |     return SHARED_CAFFE_RESOLVER
34 | 
35 | def has_pycaffe():
36 |     return get_caffe_resolver().has_pycaffe()
37 | 
38 | def show_fallback_warning():
39 |     msg = '''
40 | ------------------------------------------------------------
41 |     WARNING: PyCaffe not found!
42 |     Falling back to a pure protocol buffer implementation.
43 |     * Conversions will be drastically slower.
44 |     * This backend is UNTESTED!
45 | ------------------------------------------------------------
46 | 
47 | '''
48 |     sys.stderr.write(msg)
49 | 


--------------------------------------------------------------------------------
/kaffe/errors.py:
--------------------------------------------------------------------------------
1 | import sys
2 | 
3 | class KaffeError(Exception):
4 |     pass
5 | 
6 | def print_stderr(msg):
7 |     sys.stderr.write('%s\n' % msg)
8 | 


--------------------------------------------------------------------------------
/kaffe/graph.py:
--------------------------------------------------------------------------------
  1 | from google.protobuf import text_format
  2 | 
  3 | from .caffe import get_caffe_resolver
  4 | from .errors import KaffeError, print_stderr
  5 | from .layers import LayerAdapter, LayerType, NodeKind, NodeDispatch
  6 | from .shapes import TensorShape
  7 | 
  8 | class Node(object):
  9 | 
 10 |     def __init__(self, name, kind, layer=None):
 11 |         self.name = name
 12 |         self.kind = kind
 13 |         self.layer = LayerAdapter(layer, kind) if layer else None
 14 |         self.parents = []
 15 |         self.children = []
 16 |         self.data = None
 17 |         self.output_shape = None
 18 |         self.metadata = {}
 19 | 
 20 |     def add_parent(self, parent_node):
 21 |         assert parent_node not in self.parents
 22 |         self.parents.append(parent_node)
 23 |         if self not in parent_node.children:
 24 |             parent_node.children.append(self)
 25 | 
 26 |     def add_child(self, child_node):
 27 |         assert child_node not in self.children
 28 |         self.children.append(child_node)
 29 |         if self not in child_node.parents:
 30 |             child_node.parents.append(self)
 31 | 
 32 |     def get_only_parent(self):
 33 |         if len(self.parents) != 1:
 34 |             raise KaffeError('Node (%s) expected to have 1 parent. Found %s.' %
 35 |                              (self, len(self.parents)))
 36 |         return self.parents[0]
 37 | 
 38 |     @property
 39 |     def parameters(self):
 40 |         if self.layer is not None:
 41 |             return self.layer.parameters
 42 |         return None
 43 | 
 44 |     def __str__(self):
 45 |         return '[%s] %s' % (self.kind, self.name)
 46 | 
 47 |     def __repr__(self):
 48 |         return '%s (0x%x)' % (self.name, id(self))
 49 | 
 50 | 
 51 | class Graph(object):
 52 | 
 53 |     def __init__(self, nodes=None, name=None):
 54 |         self.nodes = nodes or []
 55 |         self.node_lut = {node.name: node for node in self.nodes}
 56 |         self.name = name
 57 | 
 58 |     def add_node(self, node):
 59 |         self.nodes.append(node)
 60 |         self.node_lut[node.name] = node
 61 | 
 62 |     def get_node(self, name):
 63 |         try:
 64 |             return self.node_lut[name]
 65 |         except KeyError:
 66 |             raise KaffeError('Layer not found: %s' % name)
 67 | 
 68 |     def get_input_nodes(self):
 69 |         return [node for node in self.nodes if len(node.parents) == 0]
 70 | 
 71 |     def get_output_nodes(self):
 72 |         return [node for node in self.nodes if len(node.children) == 0]
 73 | 
 74 |     def topologically_sorted(self):
 75 |         sorted_nodes = []
 76 |         unsorted_nodes = list(self.nodes)
 77 |         temp_marked = set()
 78 |         perm_marked = set()
 79 | 
 80 |         def visit(node):
 81 |             if node in temp_marked:
 82 |                 raise KaffeError('Graph is not a DAG.')
 83 |             if node in perm_marked:
 84 |                 return
 85 |             temp_marked.add(node)
 86 |             for child in node.children:
 87 |                 visit(child)
 88 |             perm_marked.add(node)
 89 |             temp_marked.remove(node)
 90 |             sorted_nodes.insert(0, node)
 91 | 
 92 |         while len(unsorted_nodes):
 93 |             visit(unsorted_nodes.pop())
 94 |         return sorted_nodes
 95 | 
 96 |     def compute_output_shapes(self):
 97 |         sorted_nodes = self.topologically_sorted()
 98 |         for node in sorted_nodes:
 99 |             node.output_shape = TensorShape(*NodeKind.compute_output_shape(node))
100 | 
101 |     def replaced(self, new_nodes):
102 |         return Graph(nodes=new_nodes, name=self.name)
103 | 
104 |     def transformed(self, transformers):
105 |         graph = self
106 |         for transformer in transformers:
107 |             graph = transformer(graph)
108 |             if graph is None:
109 |                 raise KaffeError('Transformer failed: {}'.format(transformer))
110 |             assert isinstance(graph, Graph)
111 |         return graph
112 | 
113 |     def __contains__(self, key):
114 |         return key in self.node_lut
115 | 
116 |     def __str__(self):
117 |         hdr = '{:<20} {:<30} {:>20} {:>20}'.format('Type', 'Name', 'Param', 'Output')
118 |         s = [hdr, '-' * 94]
119 |         for node in self.topologically_sorted():
120 |             # If the node has learned parameters, display the first one's shape.
121 |             # In case of convolutions, this corresponds to the weights.
122 |             data_shape = node.data[0].shape if node.data else '--'
123 |             out_shape = node.output_shape or '--'
124 |             s.append('{:<20} {:<30} {:>20} {:>20}'.format(node.kind, node.name, data_shape,
125 |                                                           tuple(out_shape)))
126 |         return '\n'.join(s)
127 | 
128 | 
129 | class GraphBuilder(object):
130 |     '''Constructs a model graph from a Caffe protocol buffer definition.'''
131 | 
132 |     def __init__(self, def_path, phase='test'):
133 |         '''
134 |         def_path: Path to the model definition (.prototxt)
135 |         data_path: Path to the model data (.caffemodel)
136 |         phase: Either 'test' or 'train'. Used for filtering phase-specific nodes.
137 |         '''
138 |         self.def_path = def_path
139 |         self.phase = phase
140 |         self.load()
141 | 
142 |     def load(self):
143 |         '''Load the layer definitions from the prototxt.'''
144 |         self.params = get_caffe_resolver().NetParameter()
145 |         with open(self.def_path, 'rb') as def_file:
146 |             text_format.Merge(def_file.read(), self.params)
147 | 
148 |     def filter_layers(self, layers):
149 |         '''Filter out layers based on the current phase.'''
150 |         phase_map = {0: 'train', 1: 'test'}
151 |         filtered_layer_names = set()
152 |         filtered_layers = []
153 |         for layer in layers:
154 |             phase = self.phase
155 |             if len(layer.include):
156 |                 phase = phase_map[layer.include[0].phase]
157 |             if len(layer.exclude):
158 |                 phase = phase_map[1 - layer.include[0].phase]
159 |             exclude = (phase != self.phase)
160 |             # Dropout layers appear in a fair number of Caffe
161 |             # test-time networks. These are just ignored. We'll
162 |             # filter them out here.
163 |             if (not exclude) and (phase == 'test'):
164 |                 exclude = (layer.type == LayerType.Dropout)
165 |             if not exclude:
166 |                 filtered_layers.append(layer)
167 |                 # Guard against dupes.
168 |                 assert layer.name not in filtered_layer_names
169 |                 filtered_layer_names.add(layer.name)
170 |         return filtered_layers
171 | 
172 |     def make_node(self, layer):
173 |         '''Create a graph node for the given layer.'''
174 |         kind = NodeKind.map_raw_kind(layer.type)
175 |         if kind is None:
176 |             raise KaffeError('Unknown layer type encountered: %s' % layer.type)
177 |         # We want to use the layer's top names (the "output" names), rather than the
178 |         # name attribute, which is more of readability thing than a functional one.
179 |         # Other layers will refer to a node by its "top name".
180 |         return Node(layer.name, kind, layer=layer)
181 | 
182 |     def make_input_nodes(self):
183 |         '''
184 |         Create data input nodes.
185 | 
186 |         This method is for old-style inputs, where the input specification
187 |         was not treated as a first-class layer in the prototext.
188 |         Newer models use the "Input layer" type.
189 |         '''
190 |         nodes = [Node(name, NodeKind.Data) for name in self.params.input]
191 |         if len(nodes):
192 |             input_dim = map(int, self.params.input_dim)
193 |             if not input_dim:
194 |                 if len(self.params.input_shape) > 0:
195 |                     input_dim = map(int, self.params.input_shape[0].dim)
196 |                 else:
197 |                     raise KaffeError('Dimensions for input not specified.')
198 |             for node in nodes:
199 |                 node.output_shape = tuple(input_dim)
200 |         return nodes
201 | 
202 |     def build(self):
203 |         '''
204 |         Builds the graph from the Caffe layer definitions.
205 |         '''
206 |         # Get the layers
207 |         layers = self.params.layers or self.params.layer
208 |         # Filter out phase-excluded layers
209 |         layers = self.filter_layers(layers)
210 |         # Get any separately-specified input layers
211 |         nodes = self.make_input_nodes()
212 |         nodes += [self.make_node(layer) for layer in layers]
213 |         # Initialize the graph
214 |         graph = Graph(nodes=nodes, name=self.params.name)
215 |         # Connect the nodes
216 |         #
217 |         # A note on layers and outputs:
218 |         # In Caffe, each layer can produce multiple outputs ("tops") from a set of inputs
219 |         # ("bottoms"). The bottoms refer to other layers' tops. The top can rewrite a bottom
220 |         # (in case of in-place operations). Note that the layer's name is not used for establishing
221 |         # any connectivity. It's only used for data association. By convention, a layer with a
222 |         # single top will often use the same name (although this is not required).
223 |         #
224 |         # The current implementation only supports single-output nodes (note that a node can still
225 |         # have multiple children, since multiple child nodes can refer to the single top's name).
226 |         node_outputs = {}
227 |         for layer in layers:
228 |             node = graph.get_node(layer.name)
229 |             for input_name in layer.bottom:
230 |                 assert input_name != layer.name
231 |                 parent_node = node_outputs.get(input_name)
232 |                 if (parent_node is None) or (parent_node == node):
233 |                     parent_node = graph.get_node(input_name)
234 |                 node.add_parent(parent_node)
235 |             if len(layer.top)>1:
236 |                 raise KaffeError('Multiple top nodes are not supported.')
237 |             for output_name in layer.top:
238 |                 if output_name == layer.name:
239 |                     # Output is named the same as the node. No further action required.
240 |                     continue
241 |                 # There are two possibilities here:
242 |                 #
243 |                 # Case 1: output_name refers to another node in the graph.
244 |                 # This is an "in-place operation" that overwrites an existing node.
245 |                 # This would create a cycle in the graph. We'll undo the in-placing
246 |                 # by substituting this node wherever the overwritten node is referenced.
247 |                 #
248 |                 # Case 2: output_name violates the convention layer.name == output_name.
249 |                 # Since we are working in the single-output regime, we will can rename it to
250 |                 # match the layer name.
251 |                 #
252 |                 # For both cases, future references to this top re-routes to this node.
253 |                 node_outputs[output_name] = node
254 | 
255 |         graph.compute_output_shapes()
256 |         return graph
257 | 
258 | 
259 | class NodeMapper(NodeDispatch):
260 | 
261 |     def __init__(self, graph):
262 |         self.graph = graph
263 | 
264 |     def map(self):
265 |         nodes = self.graph.topologically_sorted()
266 |         # Remove input nodes - we'll handle them separately.
267 |         input_nodes = self.graph.get_input_nodes()
268 |         nodes = [t for t in nodes if t not in input_nodes]
269 |         # Decompose DAG into chains.
270 |         chains = []
271 |         for node in nodes:
272 |             attach_to_chain = None
273 |             if len(node.parents) == 1:
274 |                 parent = node.get_only_parent()
275 |                 for chain in chains:
276 |                     if chain[-1] == parent:
277 |                         # Node is part of an existing chain.
278 |                         attach_to_chain = chain
279 |                         break
280 |             if attach_to_chain is None:
281 |                 # Start a new chain for this node.
282 |                 attach_to_chain = []
283 |                 chains.append(attach_to_chain)
284 |             attach_to_chain.append(node)
285 |         # Map each chain.
286 |         mapped_chains = []
287 |         for chain in chains:
288 |             mapped_chains.append(self.map_chain(chain))
289 |         return self.commit(mapped_chains)
290 | 
291 |     def map_chain(self, chain):
292 |         return [self.map_node(node) for node in chain]
293 | 
294 |     def map_node(self, node):
295 |         map_func = self.get_handler(node.kind, 'map')
296 |         mapped_node = map_func(node)
297 |         assert mapped_node is not None
298 |         mapped_node.node = node
299 |         return mapped_node
300 | 
301 |     def commit(self, mapped_chains):
302 |         raise NotImplementedError('Must be implemented by subclass.')
303 | 


--------------------------------------------------------------------------------
/kaffe/layers.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | import numbers
  3 | from collections import namedtuple
  4 | 
  5 | from .shapes import *
  6 | 
  7 | LAYER_DESCRIPTORS = {
  8 | 
  9 |     # Caffe Types
 10 |     'AbsVal': shape_identity,
 11 |     'Accuracy': shape_scalar,
 12 |     'ArgMax': shape_not_implemented,
 13 |     'BatchNorm': shape_identity,
 14 |     'BNLL': shape_not_implemented,
 15 |     'Concat': shape_concat,
 16 |     'ContrastiveLoss': shape_scalar,
 17 |     'Convolution': shape_convolution,
 18 |     'Deconvolution': shape_not_implemented,
 19 |     'Data': shape_data,
 20 |     'Dropout': shape_identity,
 21 |     'DummyData': shape_data,
 22 |     'EuclideanLoss': shape_scalar,
 23 |     'Eltwise': shape_identity,
 24 |     'Exp': shape_identity,
 25 |     'Flatten': shape_not_implemented,
 26 |     'HDF5Data': shape_data,
 27 |     'HDF5Output': shape_identity,
 28 |     'HingeLoss': shape_scalar,
 29 |     'Im2col': shape_not_implemented,
 30 |     'ImageData': shape_data,
 31 |     'InfogainLoss': shape_scalar,
 32 |     'InnerProduct': shape_inner_product,
 33 |     'Input': shape_data,
 34 |     'LRN': shape_identity,
 35 |     'MemoryData': shape_mem_data,
 36 |     'MultinomialLogisticLoss': shape_scalar,
 37 |     'MVN': shape_not_implemented,
 38 |     'Pooling': shape_pool,
 39 |     'Power': shape_identity,
 40 |     'ReLU': shape_identity,
 41 |     'Scale': shape_identity,
 42 |     'Sigmoid': shape_identity,
 43 |     'SigmoidCrossEntropyLoss': shape_scalar,
 44 |     'Silence': shape_not_implemented,
 45 |     'Softmax': shape_identity,
 46 |     'SoftmaxWithLoss': shape_scalar,
 47 |     'Split': shape_not_implemented,
 48 |     'Slice': shape_not_implemented,
 49 |     'TanH': shape_identity,
 50 |     'WindowData': shape_not_implemented,
 51 |     'Threshold': shape_identity,
 52 | }
 53 | 
 54 | LAYER_TYPES = LAYER_DESCRIPTORS.keys()
 55 | 
 56 | LayerType = type('LayerType', (), {t: t for t in LAYER_TYPES})
 57 | 
 58 | class NodeKind(LayerType):
 59 | 
 60 |     @staticmethod
 61 |     def map_raw_kind(kind):
 62 |         if kind in LAYER_TYPES:
 63 |             return kind
 64 |         return None
 65 | 
 66 |     @staticmethod
 67 |     def compute_output_shape(node):
 68 |         try:
 69 |             val = LAYER_DESCRIPTORS[node.kind](node)
 70 |             return val
 71 |         except NotImplementedError:
 72 |             raise KaffeError('Output shape computation not implemented for type: %s' % node.kind)
 73 | 
 74 | 
 75 | class NodeDispatchError(KaffeError):
 76 | 
 77 |     pass
 78 | 
 79 | 
 80 | class NodeDispatch(object):
 81 | 
 82 |     @staticmethod
 83 |     def get_handler_name(node_kind):
 84 |         if len(node_kind) <= 4:
 85 |             # A catch-all for things like ReLU and tanh
 86 |             return node_kind.lower()
 87 |         # Convert from CamelCase to under_scored
 88 |         name = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', node_kind)
 89 |         return re.sub('([a-z0-9])([A-Z])', r'\1_\2', name).lower()
 90 | 
 91 |     def get_handler(self, node_kind, prefix):
 92 |         name = self.get_handler_name(node_kind)
 93 |         name = '_'.join((prefix, name))
 94 |         try:
 95 |             return getattr(self, name)
 96 |         except AttributeError:
 97 |             raise NodeDispatchError('No handler found for node kind: %s (expected: %s)' %
 98 |                                     (node_kind, name))
 99 | 
100 | 
101 | class LayerAdapter(object):
102 | 
103 |     def __init__(self, layer, kind):
104 |         self.layer = layer
105 |         self.kind = kind
106 | 
107 |     @property
108 |     def parameters(self):
109 |         name = NodeDispatch.get_handler_name(self.kind)
110 |         name = '_'.join((name, 'param'))
111 |         try:
112 |             return getattr(self.layer, name)
113 |         except AttributeError:
114 |             raise NodeDispatchError('Caffe parameters not found for layer kind: %s' % (self.kind))
115 | 
116 |     @staticmethod
117 |     def get_kernel_value(scalar, repeated, idx, default=None):
118 |         if scalar:
119 |             return scalar
120 |         if repeated:
121 |             if isinstance(repeated, numbers.Number):
122 |                 return repeated
123 |             if len(repeated) == 1:
124 |                 # Same value applies to all spatial dimensions
125 |                 return int(repeated[0])
126 |             assert idx < len(repeated)
127 |             # Extract the value for the given spatial dimension
128 |             return repeated[idx]
129 |         if default is None:
130 |             raise ValueError('Unable to determine kernel parameter!')
131 |         return default
132 | 
133 |     @property
134 |     def kernel_parameters(self):
135 |         assert self.kind in (NodeKind.Convolution, NodeKind.Pooling)
136 |         params = self.parameters
137 |         k_h = self.get_kernel_value(params.kernel_h, params.kernel_size, 0)
138 |         k_w = self.get_kernel_value(params.kernel_w, params.kernel_size, 1)
139 |         s_h = self.get_kernel_value(params.stride_h, params.stride, 0, default=1)
140 |         s_w = self.get_kernel_value(params.stride_w, params.stride, 1, default=1)
141 |         p_h = self.get_kernel_value(params.pad_h, params.pad, 0, default=0)
142 |         p_w = self.get_kernel_value(params.pad_h, params.pad, 1, default=0)
143 |         return KernelParameters(k_h, k_w, s_h, s_w, p_h, p_w)
144 | 
145 | 
146 | KernelParameters = namedtuple('KernelParameters', ['kernel_h', 'kernel_w', 'stride_h', 'stride_w',
147 |                                                    'pad_h', 'pad_w'])
148 | 


--------------------------------------------------------------------------------
/kaffe/shapes.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | from collections import namedtuple
 3 | 
 4 | from .errors import KaffeError
 5 | 
 6 | TensorShape = namedtuple('TensorShape', ['batch_size', 'channels', 'height', 'width'])
 7 | 
 8 | 
 9 | def get_filter_output_shape(i_h, i_w, params, round_func):
10 |     o_h = (i_h + 2 * params.pad_h - params.kernel_h) / float(params.stride_h) + 1
11 |     o_w = (i_w + 2 * params.pad_w - params.kernel_w) / float(params.stride_w) + 1
12 |     return (int(round_func(o_h)), int(round_func(o_w)))
13 | 
14 | 
15 | def get_strided_kernel_output_shape(node, round_func):
16 |     assert node.layer is not None
17 |     input_shape = node.get_only_parent().output_shape
18 |     o_h, o_w = get_filter_output_shape(input_shape.height, input_shape.width,
19 |                                        node.layer.kernel_parameters, round_func)
20 |     params = node.layer.parameters
21 |     has_c_o = hasattr(params, 'num_output')
22 |     c = params.num_output if has_c_o else input_shape.channels
23 |     return TensorShape(input_shape.batch_size, c, o_h, o_w)
24 | 
25 | 
26 | def shape_not_implemented(node):
27 |     raise NotImplementedError
28 | 
29 | 
30 | def shape_identity(node):
31 |     assert len(node.parents) > 0
32 |     return node.parents[0].output_shape
33 | 
34 | 
35 | def shape_scalar(node):
36 |     return TensorShape(1, 1, 1, 1)
37 | 
38 | 
39 | def shape_data(node):
40 |     if node.output_shape:
41 |         # Old-style input specification
42 |         return node.output_shape
43 |     try:
44 |         # New-style input specification
45 |         return map(int, node.parameters.shape[0].dim)
46 |     except:
47 |         # We most likely have a data layer on our hands. The problem is,
48 |         # Caffe infers the dimensions of the data from the source (eg: LMDB).
49 |         # We want to avoid reading datasets here. Fail for now.
50 |         # This can be temporarily fixed by transforming the data layer to
51 |         # Caffe's "input" layer (as is usually used in the "deploy" version).
52 |         # TODO: Find a better solution for this.
53 |         raise KaffeError('Cannot determine dimensions of data layer.\n'
54 |                          'See comments in function shape_data for more info.')
55 | 
56 | 
57 | def shape_mem_data(node):
58 |     params = node.parameters
59 |     return TensorShape(params.batch_size, params.channels, params.height, params.width)
60 | 
61 | 
62 | def shape_concat(node):
63 |     axis = node.layer.parameters.axis
64 |     output_shape = None
65 |     for parent in node.parents:
66 |         if output_shape is None:
67 |             output_shape = list(parent.output_shape)
68 |         else:
69 |             output_shape[axis] += parent.output_shape[axis]
70 |     return tuple(output_shape)
71 | 
72 | 
73 | def shape_convolution(node):
74 |     return get_strided_kernel_output_shape(node, math.floor)
75 | 
76 | 
77 | def shape_pool(node):
78 |     return get_strided_kernel_output_shape(node, math.ceil)
79 | 
80 | 
81 | def shape_inner_product(node):
82 |     input_shape = node.get_only_parent().output_shape
83 |     return TensorShape(input_shape.batch_size, node.layer.parameters.num_output, 1, 1)
84 | 


--------------------------------------------------------------------------------
/kaffe/tensorflow/__init__.py:
--------------------------------------------------------------------------------
1 | from .transformer import TensorFlowTransformer
2 | from .network import Network
3 | 


--------------------------------------------------------------------------------
/kaffe/tensorflow/network.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | slim = tf.contrib.slim
  4 | 
  5 | DEFAULT_PADDING = 'SAME'
  6 | DEBUG_SIZES = False
  7 | 
  8 | def layer(op):
  9 |     '''Decorator for composable network layers.'''
 10 | 
 11 |     def layer_decorated(self, *args, **kwargs):
 12 |         # Automatically set a name if not provided.
 13 |         name = kwargs.setdefault('name', self.get_unique_name(op.__name__))
 14 |         # Figure out the layer inputs.
 15 |         if len(self.terminals) == 0:
 16 |             raise RuntimeError('No input variables found for layer %s.' % name)
 17 |         elif len(self.terminals) == 1:
 18 |             layer_input = self.terminals[0]
 19 |             if DEBUG_SIZES: print(layer_input.get_shape())
 20 |         else:
 21 |             layer_input = list(self.terminals)
 22 |             if DEBUG_SIZES: print([i.get_shape() for i in layer_input])
 23 |         # Perform the operation and get the output.
 24 |         layer_output = op(self, layer_input, *args, **kwargs)
 25 |         # Add to layer LUT.
 26 |         self.layers[name] = layer_output
 27 |         # This output is now the input for the next layer.
 28 |         self.feed(layer_output)
 29 |         # Return self for chained calls.
 30 |         return self
 31 | 
 32 |     return layer_decorated
 33 | 
 34 | 
 35 | class Network(object):
 36 | 
 37 |     def __init__(self, inputs, trainable=True, is_training=False):
 38 |         # The input nodes for this network
 39 |         self.inputs = inputs
 40 |         # The current list of terminal nodes
 41 |         self.terminals = []
 42 |         # Mapping from layer names to layers
 43 |         self.layers = dict(inputs)
 44 |         # If true, the resulting variables are set as trainable
 45 |         self.trainable = trainable
 46 |         # Switch variable for dropout
 47 |         self.use_dropout = tf.placeholder_with_default(tf.constant(1.0),
 48 |                                                        shape=[],
 49 |                                                        name='use_dropout')
 50 |         self.setup(is_training)
 51 | 
 52 |     def setup(self, is_training):
 53 |         '''Construct the network. '''
 54 |         raise NotImplementedError('Must be implemented by the subclass.')
 55 | 
 56 |     def load(self, data_path, session, ignore_missing=False):
 57 |         '''Load network weights.
 58 |         data_path: The path to the numpy-serialized network weights
 59 |         session: The current TensorFlow session
 60 |         ignore_missing: If true, serialized weights for missing layers are ignored.
 61 |         '''
 62 |         data_dict = np.load(data_path).item()
 63 |         for op_name in data_dict:
 64 |             with tf.variable_scope(op_name, reuse=True):
 65 |                 for param_name, data in data_dict[op_name].iteritems():
 66 |                     try:
 67 |                         var = tf.get_variable(param_name)
 68 |                         session.run(var.assign(data))
 69 |                     except ValueError:
 70 |                         if not ignore_missing:
 71 |                             raise
 72 | 
 73 |     def feed(self, *args):
 74 |         '''Set the input(s) for the next operation by replacing the terminal nodes.
 75 |         The arguments can be either layer names or the actual layers.
 76 |         '''
 77 |         assert len(args) != 0
 78 |         self.terminals = []
 79 |         for fed_layer in args:
 80 |             if isinstance(fed_layer, basestring):
 81 |                 try:
 82 |                     fed_layer = self.layers[fed_layer]
 83 |                 except KeyError:
 84 |                     raise KeyError('Unknown layer name fed: %s' % fed_layer)
 85 |             self.terminals.append(fed_layer)
 86 |         return self
 87 | 
 88 |     def get_output(self):
 89 |         '''Returns the current network output.'''
 90 |         return self.terminals[-1]
 91 | 
 92 |     def get_unique_name(self, prefix):
 93 |         '''Returns an index-suffixed unique name for the given prefix.
 94 |         This is used for auto-generating layer names based on the type-prefix.
 95 |         '''
 96 |         ident = sum(t.startswith(prefix) for t, _ in self.layers.items()) + 1
 97 |         return '%s_%d' % (prefix, ident)
 98 | 
 99 |     def make_var(self, name, shape):
100 |         '''Creates a new TensorFlow variable.'''
101 |         return tf.get_variable(name, shape, trainable=self.trainable)
102 | 
103 |     def validate_padding(self, padding):
104 |         '''Verifies that the padding is one of the supported ones.'''
105 |         assert padding in ('SAME', 'VALID')
106 | 
107 |     @layer
108 |     def conv(self,
109 |              input,
110 |              k_h,
111 |              k_w,
112 |              c_o,
113 |              s_h,
114 |              s_w,
115 |              name,
116 |              relu=True,
117 |              padding=DEFAULT_PADDING,
118 |              group=1,
119 |              biased=True):
120 |         # Verify that the padding is acceptable
121 |         self.validate_padding(padding)
122 |         # Get the number of channels in the input
123 |         c_i = input.get_shape()[-1]
124 |         # Verify that the grouping parameter is valid
125 |         assert c_i % group == 0
126 |         assert c_o % group == 0
127 |         # Convolution for a given input and kernel
128 |         convolve = lambda i, k: tf.nn.conv2d(i, k, [1, s_h, s_w, 1], padding=padding)
129 |         with tf.variable_scope(name) as scope:
130 |             kernel = self.make_var('weights', shape=[k_h, k_w, c_i / group, c_o])
131 |             if group == 1:
132 |                 # This is the common-case. Convolve the input without any further complications.
133 |                 output = convolve(input, kernel)
134 |             else:
135 |                 # Split the input into groups and then convolve each of them independently
136 |                 input_groups = tf.split(3, group, input)
137 |                 kernel_groups = tf.split(3, group, kernel)
138 |                 output_groups = [convolve(i, k) for i, k in zip(input_groups, kernel_groups)]
139 |                 # Concatenate the groups
140 |                 output = tf.concat(3, output_groups)
141 |             # Add the biases
142 |             if biased:
143 |                 biases = self.make_var('biases', [c_o])
144 |                 output = tf.nn.bias_add(output, biases)
145 |             if relu:
146 |                 # ReLU non-linearity
147 |                 output = tf.nn.relu(output, name=scope.name)
148 |             return output
149 | 
150 |     @layer
151 |     def atrous_conv(self,
152 |                     input,
153 |                     k_h,
154 |                     k_w,
155 |                     c_o,
156 |                     dilation,
157 |                     name,
158 |                     relu=True,
159 |                     padding=DEFAULT_PADDING,
160 |                     group=1,
161 |                     biased=True):
162 |         # Verify that the padding is acceptable
163 |         self.validate_padding(padding)
164 |         # Get the number of channels in the input
165 |         c_i = input.get_shape()[-1]
166 |         # Verify that the grouping parameter is valid
167 |         assert c_i % group == 0
168 |         assert c_o % group == 0
169 |         # Convolution for a given input and kernel
170 |         convolve = lambda i, k: tf.nn.atrous_conv2d(i, k, dilation, padding=padding)
171 |         with tf.variable_scope(name) as scope:
172 |             kernel = self.make_var('weights', shape=[k_h, k_w, c_i / group, c_o])
173 |             if group == 1:
174 |                 # This is the common-case. Convolve the input without any further complications.
175 |                 output = convolve(input, kernel)
176 |             else:
177 |                 # Split the input into groups and then convolve each of them independently
178 |                 input_groups = tf.split(3, group, input)
179 |                 kernel_groups = tf.split(3, group, kernel)
180 |                 output_groups = [convolve(i, k) for i, k in zip(input_groups, kernel_groups)]
181 |                 # Concatenate the groups
182 |                 output = tf.concat(3, output_groups)
183 |             # Add the biases
184 |             if biased:
185 |                 biases = self.make_var('biases', [c_o])
186 |                 output = tf.nn.bias_add(output, biases)
187 |             if relu:
188 |                 # ReLU non-linearity
189 |                 output = tf.nn.relu(output, name=scope.name)
190 |             return output
191 |         
192 |     @layer
193 |     def relu(self, input, name):
194 |         return tf.nn.relu(input, name=name)
195 | 
196 |     @layer
197 |     def max_pool(self, input, k_h, k_w, s_h, s_w, name, padding=DEFAULT_PADDING):
198 |         self.validate_padding(padding)
199 |         return tf.nn.max_pool(input,
200 |                               ksize=[1, k_h, k_w, 1],
201 |                               strides=[1, s_h, s_w, 1],
202 |                               padding=padding,
203 |                               name=name)
204 | 
205 |     @layer
206 |     def avg_pool(self, input, k_h, k_w, s_h, s_w, name, padding=DEFAULT_PADDING):
207 |         self.validate_padding(padding)
208 |         return tf.nn.avg_pool(input,
209 |                               ksize=[1, k_h, k_w, 1],
210 |                               strides=[1, s_h, s_w, 1],
211 |                               padding=padding,
212 |                               name=name)
213 | 
214 |     @layer
215 |     def lrn(self, input, radius, alpha, beta, name, bias=1.0):
216 |         return tf.nn.local_response_normalization(input,
217 |                                                   depth_radius=radius,
218 |                                                   alpha=alpha,
219 |                                                   beta=beta,
220 |                                                   bias=bias,
221 |                                                   name=name)
222 | 
223 |     @layer
224 |     def concat(self, inputs, axis, name):
225 |         return tf.concat(concat_dim=axis, values=inputs, name=name)
226 | 
227 |     @layer
228 |     def add(self, inputs, name):
229 |         return tf.add_n(inputs, name=name)
230 | 
231 |     @layer
232 |     def fc(self, input, num_out, name, relu=True):
233 |         with tf.variable_scope(name) as scope:
234 |             input_shape = input.get_shape()
235 |             if input_shape.ndims == 4:
236 |                 # The input is spatial. Vectorize it first.
237 |                 dim = 1
238 |                 for d in input_shape[1:].as_list():
239 |                     dim *= d
240 |                 feed_in = tf.reshape(input, [-1, dim])
241 |             else:
242 |                 feed_in, dim = (input, input_shape[-1].value)
243 |             weights = self.make_var('weights', shape=[dim, num_out])
244 |             biases = self.make_var('biases', [num_out])
245 |             op = tf.nn.relu_layer if relu else tf.nn.xw_plus_b
246 |             fc = op(feed_in, weights, biases, name=scope.name)
247 |             return fc
248 | 
249 |     @layer
250 |     def softmax(self, input, name):
251 |         input_shape = map(lambda v: v.value, input.get_shape())
252 |         if len(input_shape) > 2:
253 |             # For certain models (like NiN), the singleton spatial dimensions
254 |             # need to be explicitly squeezed, since they're not broadcast-able
255 |             # in TensorFlow's NHWC ordering (unlike Caffe's NCHW).
256 |             if input_shape[1] == 1 and input_shape[2] == 1:
257 |                 input = tf.squeeze(input, squeeze_dims=[1, 2])
258 |             else:
259 |                 raise ValueError('Rank 2 tensor input expected for softmax!')
260 |         return tf.nn.softmax(input, name)
261 |         
262 |     @layer
263 |     def batch_normalization(self, input, name, is_training, activation_fn=None, scale=True):
264 |         with tf.variable_scope(name) as scope:
265 |             output = slim.batch_norm(
266 |                 input,
267 |                 activation_fn=activation_fn,
268 |                 is_training=is_training,
269 |                 updates_collections=None,
270 |                 scale=scale,
271 |                 scope=scope)
272 |             return output
273 | 
274 |     @layer
275 |     def dropout(self, input, keep_prob, name):
276 |         keep = 1 - self.use_dropout + (self.use_dropout * keep_prob)
277 |         return tf.nn.dropout(input, keep, name=name)
278 | 


--------------------------------------------------------------------------------
/kaffe/tensorflow/transformer.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | from ..errors import KaffeError, print_stderr
  4 | from ..graph import GraphBuilder, NodeMapper
  5 | from ..layers import NodeKind
  6 | from ..transformers import (DataInjector, DataReshaper, NodeRenamer, ReLUFuser,
  7 |                             BatchNormScaleBiasFuser, BatchNormPreprocessor, ParameterNamer)
  8 | 
  9 | from . import network
 10 | 
 11 | 
 12 | def get_padding_type(kernel_params, input_shape, output_shape):
 13 |     '''Translates Caffe's numeric padding to one of ('SAME', 'VALID').
 14 |     Caffe supports arbitrary padding values, while TensorFlow only
 15 |     supports 'SAME' and 'VALID' modes. So, not all Caffe paddings
 16 |     can be translated to TensorFlow. There are some subtleties to
 17 |     how the padding edge-cases are handled. These are described here:
 18 |     https://github.com/Yangqing/caffe2/blob/master/caffe2/proto/caffe2_legacy.proto
 19 |     '''
 20 |     k_h, k_w, s_h, s_w, p_h, p_w = kernel_params
 21 |     s_o_h = np.ceil(input_shape.height / float(s_h))
 22 |     s_o_w = np.ceil(input_shape.width / float(s_w))
 23 |     if (output_shape.height == s_o_h) and (output_shape.width == s_o_w):
 24 |         return 'SAME'
 25 |     v_o_h = np.ceil((input_shape.height - k_h + 1.0) / float(s_h))
 26 |     v_o_w = np.ceil((input_shape.width - k_w + 1.0) / float(s_w))
 27 |     if (output_shape.height == v_o_h) and (output_shape.width == v_o_w):
 28 |         return 'VALID'
 29 |     return None
 30 | 
 31 | 
 32 | class TensorFlowNode(object):
 33 |     '''An intermediate representation for TensorFlow operations.'''
 34 | 
 35 |     def __init__(self, op, *args, **kwargs):
 36 |         # A string corresponding to the TensorFlow operation
 37 |         self.op = op
 38 |         # Positional arguments for the operation
 39 |         self.args = args
 40 |         # Keyword arguments for the operation
 41 |         self.kwargs = list(kwargs.items())
 42 |         # The source Caffe node
 43 |         self.node = None
 44 | 
 45 |     def format(self, arg):
 46 |         '''Returns a string representation for the given value.'''
 47 |         return "'%s'" % arg if isinstance(arg, basestring) else str(arg)
 48 | 
 49 |     def pair(self, key, value):
 50 |         '''Returns key=formatted(value).'''
 51 |         return '%s=%s' % (key, self.format(value))
 52 | 
 53 |     def emit(self):
 54 |         '''Emits the Python source for this node.'''
 55 |         # Format positional arguments
 56 |         args = map(self.format, self.args)
 57 |         # Format any keyword arguments
 58 |         if self.kwargs:
 59 |             args += [self.pair(k, v) for k, v in self.kwargs]
 60 |         # Set the node name
 61 |         args.append(self.pair('name', self.node.name))
 62 |         args = ', '.join(args)
 63 |         return '%s(%s)' % (self.op, args)
 64 | 
 65 | 
 66 | class MaybeActivated(object):
 67 | 
 68 |     def __init__(self, node, default=True):
 69 |         self.inject_kwargs = {}
 70 |         if node.metadata.get('relu', False) != default:
 71 |             self.inject_kwargs['relu'] = not default
 72 | 
 73 |     def __call__(self, *args, **kwargs):
 74 |         kwargs.update(self.inject_kwargs)
 75 |         return TensorFlowNode(*args, **kwargs)
 76 | 
 77 | 
 78 | class TensorFlowMapper(NodeMapper):
 79 | 
 80 |     def get_kernel_params(self, node):
 81 |         kernel_params = node.layer.kernel_parameters
 82 |         input_shape = node.get_only_parent().output_shape
 83 |         padding = get_padding_type(kernel_params, input_shape, node.output_shape)
 84 |         # Only emit the padding if it's not the default value.
 85 |         padding = {'padding': padding} if padding != network.DEFAULT_PADDING else {}
 86 |         return (kernel_params, padding)
 87 | 
 88 |     def map_convolution(self, node):
 89 |         (kernel_params, kwargs) = self.get_kernel_params(node)
 90 |         h = kernel_params.kernel_h
 91 |         w = kernel_params.kernel_w
 92 |         c_o = node.output_shape[1]
 93 |         c_i = node.parents[0].output_shape[1]
 94 |         group = node.parameters.group
 95 |         if group != 1:
 96 |             kwargs['group'] = group
 97 |         if not node.parameters.bias_term:
 98 |             kwargs['biased'] = False
 99 |         assert kernel_params.kernel_h == h
100 |         assert kernel_params.kernel_w == w
101 |         return MaybeActivated(node)('conv', kernel_params.kernel_h, kernel_params.kernel_w, c_o,
102 |                                     kernel_params.stride_h, kernel_params.stride_w, **kwargs)
103 | 
104 |     def map_relu(self, node):
105 |         return TensorFlowNode('relu')
106 | 
107 |     def map_pooling(self, node):
108 |         pool_type = node.parameters.pool
109 |         if pool_type == 0:
110 |             pool_op = 'max_pool'
111 |         elif pool_type == 1:
112 |             pool_op = 'avg_pool'
113 |         else:
114 |             # Stochastic pooling, for instance.
115 |             raise KaffeError('Unsupported pooling type.')
116 |         (kernel_params, padding) = self.get_kernel_params(node)
117 |         return TensorFlowNode(pool_op, kernel_params.kernel_h, kernel_params.kernel_w,
118 |                               kernel_params.stride_h, kernel_params.stride_w, **padding)
119 | 
120 |     def map_inner_product(self, node):
121 |         #TODO: Axis
122 |         assert node.parameters.axis == 1
123 |         #TODO: Unbiased
124 |         assert node.parameters.bias_term == True
125 |         return MaybeActivated(node)('fc', node.parameters.num_output)
126 | 
127 |     def map_softmax(self, node):
128 |         return TensorFlowNode('softmax')
129 | 
130 |     def map_lrn(self, node):
131 |         params = node.parameters
132 |         # The window size must be an odd value. For a window
133 |         # size of (2*n+1), TensorFlow defines depth_radius = n.
134 |         assert params.local_size % 2 == 1
135 |         # Caffe scales by (alpha/(2*n+1)), whereas TensorFlow
136 |         # just scales by alpha (as does Krizhevsky's paper).
137 |         # We'll account for that here.
138 |         alpha = params.alpha / float(params.local_size)
139 |         return TensorFlowNode('lrn', int(params.local_size / 2), alpha, params.beta)
140 | 
141 |     def map_concat(self, node):
142 |         axis = (2, 3, 1, 0)[node.parameters.axis]
143 |         return TensorFlowNode('concat', axis)
144 | 
145 |     def map_dropout(self, node):
146 |         return TensorFlowNode('dropout', node.parameters.dropout_ratio)
147 | 
148 |     def map_batch_norm(self, node):
149 |         scale_offset = len(node.data) == 4
150 |         kwargs = {'is_training': True} if scale_offset else {'is_training': True, 'scale': False}
151 |         return MaybeActivated(node, default=False)('batch_normalization', **kwargs)
152 | 
153 |     def map_eltwise(self, node):
154 |         operations = {0: 'multiply', 1: 'add', 2: 'max'}
155 |         op_code = node.parameters.operation
156 |         try:
157 |             return TensorFlowNode(operations[op_code])
158 |         except KeyError:
159 |             raise KaffeError('Unknown elementwise operation: {}'.format(op_code))
160 | 
161 |     def commit(self, chains):
162 |         return chains
163 | 
164 | 
165 | class TensorFlowEmitter(object):
166 | 
167 |     def __init__(self, tab=None):
168 |         self.tab = tab or ' ' * 4
169 |         self.prefix = ''
170 | 
171 |     def indent(self):
172 |         self.prefix += self.tab
173 | 
174 |     def outdent(self):
175 |         self.prefix = self.prefix[:-len(self.tab)]
176 | 
177 |     def statement(self, s):
178 |         return self.prefix + s + '\n'
179 | 
180 |     def emit_imports(self):
181 |         return self.statement('from kaffe.tensorflow import Network\n')
182 | 
183 |     def emit_class_def(self, name):
184 |         return self.statement('class %s(Network):' % (name))
185 | 
186 |     def emit_setup_def(self):
187 |         return self.statement('def setup(self):')
188 | 
189 |     def emit_parents(self, chain):
190 |         assert len(chain)
191 |         s = '(self.feed('
192 |         sep = ', \n' + self.prefix + (' ' * len(s))
193 |         s += sep.join(["'%s'" % parent.name for parent in chain[0].node.parents])
194 |         return self.statement(s + ')')
195 | 
196 |     def emit_node(self, node):
197 |         return self.statement(' ' * 5 + '.' + node.emit())
198 | 
199 |     def emit(self, name, chains):
200 |         s = self.emit_imports()
201 |         s += self.emit_class_def(name)
202 |         self.indent()
203 |         s += self.emit_setup_def()
204 |         self.indent()
205 |         blocks = []
206 |         for chain in chains:
207 |             b = ''
208 |             b += self.emit_parents(chain)
209 |             for node in chain:
210 |                 b += self.emit_node(node)
211 |             blocks.append(b[:-1] + ')')
212 |         s = s + '\n\n'.join(blocks)
213 |         return s
214 | 
215 | 
216 | class TensorFlowTransformer(object):
217 | 
218 |     def __init__(self, def_path, data_path, verbose=True, phase='test'):
219 |         self.verbose = verbose
220 |         self.phase = phase
221 |         self.load(def_path, data_path, phase)
222 |         self.params = None
223 |         self.source = None
224 | 
225 |     def load(self, def_path, data_path, phase):
226 |         # Build the graph
227 |         graph = GraphBuilder(def_path, phase).build()
228 | 
229 |         if data_path is not None:
230 |             # Load and associate learned parameters
231 |             graph = DataInjector(def_path, data_path)(graph)
232 | 
233 |         # Transform the graph
234 |         transformers = [
235 |             # Fuse split batch normalization layers
236 |             BatchNormScaleBiasFuser(),
237 | 
238 |             # Fuse ReLUs
239 |             # TODO: Move non-linearity application to layer wrapper, allowing
240 |             # any arbitrary operation to be optionally activated.
241 |             ReLUFuser(allowed_parent_types=[NodeKind.Convolution, NodeKind.InnerProduct,
242 |                                             NodeKind.BatchNorm]),
243 | 
244 |             # Rename nodes
245 |             # Slashes are used for scoping in TensorFlow. Replace slashes
246 |             # in node names with underscores.
247 |             # (Caffe's GoogLeNet implementation uses slashes)
248 |             NodeRenamer(lambda node: node.name.replace('/', '_'))
249 |         ]
250 |         self.graph = graph.transformed(transformers)
251 | 
252 |         # Display the graph
253 |         if self.verbose:
254 |             print_stderr(self.graph)
255 | 
256 |     def transform_data(self):
257 |         if self.params is None:
258 |             transformers = [
259 | 
260 |                 # Reshape the parameters to TensorFlow's ordering
261 |                 DataReshaper({
262 |                     # (c_o, c_i, h, w) -> (h, w, c_i, c_o)
263 |                     NodeKind.Convolution: (2, 3, 1, 0),
264 | 
265 |                     # (c_o, c_i) -> (c_i, c_o)
266 |                     NodeKind.InnerProduct: (1, 0)
267 |                 }),
268 | 
269 |                 # Pre-process batch normalization data
270 |                 BatchNormPreprocessor(),
271 | 
272 |                 # Convert parameters to dictionaries
273 |                 ParameterNamer(),
274 |             ]
275 |             self.graph = self.graph.transformed(transformers)
276 |             self.params = {node.name: node.data for node in self.graph.nodes if node.data}
277 |         return self.params
278 | 
279 |     def transform_source(self):
280 |         if self.source is None:
281 |             mapper = TensorFlowMapper(self.graph)
282 |             chains = mapper.map()
283 |             emitter = TensorFlowEmitter()
284 |             self.source = emitter.emit(self.graph.name, chains)
285 |         return self.source
286 | 


--------------------------------------------------------------------------------
/kaffe/transformers.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | A collection of graph transforms.
  3 | 
  4 | A transformer is a callable that accepts a graph and returns a transformed version.
  5 | '''
  6 | 
  7 | import numpy as np
  8 | 
  9 | from .caffe import get_caffe_resolver, has_pycaffe
 10 | from .errors import KaffeError, print_stderr
 11 | from .layers import NodeKind
 12 | 
 13 | 
 14 | class DataInjector(object):
 15 |     '''
 16 |     Associates parameters loaded from a .caffemodel file with their corresponding nodes.
 17 |     '''
 18 | 
 19 |     def __init__(self, def_path, data_path):
 20 |         # The .prototxt file defining the graph
 21 |         self.def_path = def_path
 22 |         # The .caffemodel file containing the learned parameters
 23 |         self.data_path = data_path
 24 |         # Set to true if the fallback protocol-buffer based backend was used
 25 |         self.did_use_pb = False
 26 |         # A list containing (layer name, parameters) tuples
 27 |         self.params = None
 28 |         # Load the parameters
 29 |         self.load()
 30 | 
 31 |     def load(self):
 32 |         if has_pycaffe():
 33 |             self.load_using_caffe()
 34 |         else:
 35 |             self.load_using_pb()
 36 | 
 37 |     def load_using_caffe(self):
 38 |         caffe = get_caffe_resolver().caffe
 39 |         net = caffe.Net(self.def_path, self.data_path, caffe.TEST)
 40 |         data = lambda blob: blob.data
 41 |         self.params = [(k, map(data, v)) for k, v in net.params.items()]
 42 | 
 43 |     def load_using_pb(self):
 44 |         data = get_caffe_resolver().NetParameter()
 45 |         data.MergeFromString(open(self.data_path, 'rb').read())
 46 |         pair = lambda layer: (layer.name, self.normalize_pb_data(layer))
 47 |         layers = data.layers or data.layer
 48 |         self.params = [pair(layer) for layer in layers if layer.blobs]
 49 |         self.did_use_pb = True
 50 | 
 51 |     def normalize_pb_data(self, layer):
 52 |         transformed = []
 53 |         for blob in layer.blobs:
 54 |             if len(blob.shape.dim):
 55 |                 dims = blob.shape.dim
 56 |                 c_o, c_i, h, w = map(int, [1] * (4 - len(dims)) + list(dims))
 57 |             else:
 58 |                 c_o = blob.num
 59 |                 c_i = blob.channels
 60 |                 h = blob.height
 61 |                 w = blob.width
 62 |             data = np.array(blob.data, dtype=np.float32).reshape(c_o, c_i, h, w)
 63 |             transformed.append(data)
 64 |         return transformed
 65 | 
 66 |     def adjust_parameters(self, node, data):
 67 |         if not self.did_use_pb:
 68 |             return data
 69 |         # When using the protobuf-backend, each parameter initially has four dimensions.
 70 |         # In certain cases (like FC layers), we want to eliminate the singleton dimensions.
 71 |         # This implementation takes care of the common cases. However, it does leave the
 72 |         # potential for future issues.
 73 |         # The Caffe-backend does not suffer from this problem.
 74 |         data = list(data)
 75 |         squeeze_indices = [1]  # Squeeze biases.
 76 |         if node.kind == NodeKind.InnerProduct:
 77 |             squeeze_indices.append(0)  # Squeeze FC.
 78 |         for idx in squeeze_indices:
 79 |             data[idx] = np.squeeze(data[idx])
 80 |         return data
 81 | 
 82 |     def __call__(self, graph):
 83 |         for layer_name, data in self.params:
 84 |             if layer_name in graph:
 85 |                 node = graph.get_node(layer_name)
 86 |                 node.data = self.adjust_parameters(node, data)
 87 |             else:
 88 |                 print_stderr('Ignoring parameters for non-existent layer: %s' % layer_name)
 89 |         return graph
 90 | 
 91 | 
 92 | class DataReshaper(object):
 93 | 
 94 |     def __init__(self, mapping, replace=True):
 95 |         # A dictionary mapping NodeKind to the transposed order.
 96 |         self.mapping = mapping
 97 |         # The node kinds eligible for reshaping
 98 |         self.reshaped_node_types = self.mapping.keys()
 99 |         # If true, the reshaped data will replace the old one.
100 |         # Otherwise, it's set to the reshaped_data attribute.
101 |         self.replace = replace
102 | 
103 |     def has_spatial_parent(self, node):
104 |         try:
105 |             parent = node.get_only_parent()
106 |             s = parent.output_shape
107 |             return s.height > 1 or s.width > 1
108 |         except KaffeError:
109 |             return False
110 | 
111 |     def map(self, node_kind):
112 |         try:
113 |             return self.mapping[node_kind]
114 |         except KeyError:
115 |             raise KaffeError('Ordering not found for node kind: {}'.format(node_kind))
116 | 
117 |     def __call__(self, graph):
118 |         for node in graph.nodes:
119 |             if node.data is None:
120 |                 continue
121 |             if node.kind not in self.reshaped_node_types:
122 |                 # Check for 2+ dimensional data
123 |                 if any(len(tensor.shape) > 1 for tensor in node.data):
124 |                     print_stderr('Warning: parmaters not reshaped for node: {}'.format(node))
125 |                 continue
126 |             transpose_order = self.map(node.kind)
127 |             weights = node.data[0]
128 |             if (node.kind == NodeKind.InnerProduct) and self.has_spatial_parent(node):
129 |                 # The FC layer connected to the spatial layer needs to be
130 |                 # re-wired to match the new spatial ordering.
131 |                 in_shape = node.get_only_parent().output_shape
132 |                 fc_shape = weights.shape
133 |                 output_channels = fc_shape[0]
134 |                 weights = weights.reshape((output_channels, in_shape.channels, in_shape.height,
135 |                                            in_shape.width))
136 |                 weights = weights.transpose(self.map(NodeKind.Convolution))
137 |                 node.reshaped_data = weights.reshape(fc_shape[transpose_order[0]],
138 |                                                      fc_shape[transpose_order[1]])
139 |             else:
140 |                 node.reshaped_data = weights.transpose(transpose_order)
141 | 
142 |         if self.replace:
143 |             for node in graph.nodes:
144 |                 if hasattr(node, 'reshaped_data'):
145 |                     # Set the weights
146 |                     node.data[0] = node.reshaped_data
147 |                     del node.reshaped_data
148 |         return graph
149 | 
150 | 
151 | class SubNodeFuser(object):
152 |     '''
153 |     An abstract helper for merging a single-child with its single-parent.
154 |     '''
155 | 
156 |     def __call__(self, graph):
157 |         nodes = graph.nodes
158 |         fused_nodes = []
159 |         for node in nodes:
160 |             if len(node.parents) != 1:
161 |                 # We're only fusing nodes with single parents
162 |                 continue
163 |             parent = node.get_only_parent()
164 |             if len(parent.children) != 1:
165 |                 # We can only fuse a node if its parent's
166 |                 # value isn't used by any other node.
167 |                 continue
168 |             if not self.is_eligible_pair(parent, node):
169 |                 continue
170 |             # Rewrite the fused node's children to its parent.
171 |             for child in node.children:
172 |                 child.parents.remove(node)
173 |                 parent.add_child(child)
174 |             # Disconnect the fused node from the graph.
175 |             parent.children.remove(node)
176 |             fused_nodes.append(node)
177 |             # Let the sub-class merge the fused node in any arbitrary way.
178 |             self.merge(parent, node)
179 |         transformed_nodes = [node for node in nodes if node not in fused_nodes]
180 |         return graph.replaced(transformed_nodes)
181 | 
182 |     def is_eligible_pair(self, parent, child):
183 |         '''Returns true if this parent/child pair is eligible for fusion.'''
184 |         raise NotImplementedError('Must be implemented by subclass.')
185 | 
186 |     def merge(self, parent, child):
187 |         '''Merge the child node into the parent.'''
188 |         raise NotImplementedError('Must be implemented by subclass')
189 | 
190 | 
191 | class ReLUFuser(SubNodeFuser):
192 |     '''
193 |     Fuses rectified linear units with their parent nodes.
194 |     '''
195 | 
196 |     def __init__(self, allowed_parent_types=None):
197 |         # Fuse ReLUs when the parent node is one of the given types.
198 |         # If None, all node types are eligible.
199 |         self.allowed_parent_types = allowed_parent_types
200 | 
201 |     def is_eligible_pair(self, parent, child):
202 |         return ((self.allowed_parent_types is None or parent.kind in self.allowed_parent_types) and
203 |                 child.kind == NodeKind.ReLU)
204 | 
205 |     def merge(self, parent, _):
206 |         parent.metadata['relu'] = True
207 | 
208 | 
209 | class BatchNormScaleBiasFuser(SubNodeFuser):
210 |     '''
211 |     The original batch normalization paper includes two learned
212 |     parameters: a scaling factor \gamma and a bias \beta.
213 |     Caffe's implementation does not include these two. However, it is commonly
214 |     replicated by adding a scaling+bias layer immidiately after the batch norm.
215 | 
216 |     This fuser merges the scaling+bias layer with the batch norm.
217 |     '''
218 | 
219 |     def is_eligible_pair(self, parent, child):
220 |         return (parent.kind == NodeKind.BatchNorm and child.kind == NodeKind.Scale and
221 |                 child.parameters.axis == 1 and child.parameters.bias_term == True)
222 | 
223 |     def merge(self, parent, child):
224 |         parent.scale_bias_node = child
225 | 
226 | 
227 | class BatchNormPreprocessor(object):
228 |     '''
229 |     Prescale batch normalization parameters.
230 |     Concatenate gamma (scale) and beta (bias) terms if set.
231 |     '''
232 | 
233 |     def __call__(self, graph):
234 |         for node in graph.nodes:
235 |             if node.kind != NodeKind.BatchNorm:
236 |                 continue
237 |             assert node.data is not None
238 |             assert len(node.data) == 3
239 |             mean, variance, scale = node.data
240 |             # Prescale the stats
241 |             scaling_factor = 1.0 / scale if scale != 0 else 0
242 |             mean *= scaling_factor
243 |             variance *= scaling_factor
244 |             # Replace with the updated values
245 |             node.data = [mean, variance]
246 |             if hasattr(node, 'scale_bias_node'):
247 |                 # Include the scale and bias terms
248 |                 gamma, beta = node.scale_bias_node.data
249 |                 node.data += [gamma, beta]
250 |         return graph
251 | 
252 | 
253 | class NodeRenamer(object):
254 |     '''
255 |     Renames nodes in the graph using a given unary function that
256 |     accepts a node and returns its new name.
257 |     '''
258 | 
259 |     def __init__(self, renamer):
260 |         self.renamer = renamer
261 | 
262 |     def __call__(self, graph):
263 |         for node in graph.nodes:
264 |             node.name = self.renamer(node)
265 |         return graph
266 | 
267 | 
268 | class ParameterNamer(object):
269 |     '''
270 |     Convert layer data arrays to a dictionary mapping parameter names to their values.
271 |     '''
272 | 
273 |     def __call__(self, graph):
274 |         for node in graph.nodes:
275 |             if node.data is None:
276 |                 continue
277 |             if node.kind in (NodeKind.Convolution, NodeKind.InnerProduct):
278 |                 names = ('weights',)
279 |                 if node.parameters.bias_term:
280 |                     names += ('biases',)
281 |             elif node.kind == NodeKind.BatchNorm:
282 |                 names = ('moving_mean', 'moving_variance')
283 |                 if len(node.data) == 4:
284 |                     names += ('gamma', 'beta')
285 |             else:
286 |                 print_stderr('WARNING: Unhandled parameters: {}'.format(node.kind))
287 |                 continue
288 |             assert len(names) == len(node.data)
289 |             node.data = dict(zip(names, node.data))
290 |         return graph
291 | 


--------------------------------------------------------------------------------
/losses.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function, division
  2 | import torch
  3 | from torch.autograd import Variable
  4 | import torch.nn.functional as F
  5 | import numpy as np
  6 | 
  7 | VOID_LABEL = 255
  8 | N_CLASSES = 21
  9 | 
 10 | def crossentropyloss(logits, label):
 11 |     mask = (label.view(-1) != VOID_LABEL)
 12 |     nonvoid = mask.long().sum()
 13 |     if nonvoid == 0:
 14 |         # only void pixels, the gradients should be 0
 15 |         return logits.sum() * 0.
 16 |     # if nonvoid == mask.numel():
 17 |     #     # no void pixel, use builtin
 18 |     #     return F.cross_entropy(logits, Variable(label))
 19 |     target = label.view(-1)[mask]
 20 |     C = logits.size(1)
 21 |     logits = logits.permute(0, 2, 3, 1)  # B, H, W, C
 22 |     logits = logits.contiguous().view(-1, C)
 23 |     mask2d = mask.unsqueeze(1).expand(mask.size(0), C).contiguous().view(-1)
 24 |     logits = logits[mask2d].view(-1, C)
 25 |     loss = F.cross_entropy(logits, Variable(target))
 26 |     return loss
 27 | 
 28 | class StableBCELoss(torch.nn.modules.Module):
 29 |     def __init__(self):
 30 |          super(StableBCELoss, self).__init__()
 31 |     def forward(self, input, target):
 32 |          neg_abs = - input.abs()
 33 |          loss = input.clamp(min=0) - input * target + (1 + neg_abs.exp()).log()
 34 |          return loss.mean()
 35 | 
 36 | def binaryXloss(logits, label):
 37 |     mask = (label.view(-1) != VOID_LABEL)
 38 |     nonvoid = mask.long().sum()
 39 |     if nonvoid == 0:
 40 |         # only void pixels, the gradients should be 0
 41 |         return logits.sum() * 0.
 42 |     # if nonvoid == mask.numel():
 43 |     #     # no void pixel, use builtin
 44 |     #     return F.cross_entropy(logits, Variable(label))
 45 |     target = label.contiguous().view(-1)[mask]
 46 |     logits = logits.contiguous().view(-1)[mask]
 47 |     # loss = F.binary_cross_entropy(logits, Variable(target.float()))
 48 |     loss = StableBCELoss()(logits, Variable(target.float()))
 49 |     return loss
 50 | 
 51 | def naive_single(logit, label):
 52 |     # single images
 53 |     mask = (label.view(-1) != 255)
 54 |     num_preds = mask.long().sum()
 55 |     if num_preds == 0:
 56 |         # only void pixels, the gradients should be 0
 57 |         return logits.sum() * 0.
 58 |     target = Variable(label.contiguous().view(-1)[mask].float())
 59 |     logit = logit.contiguous().view(-1)[mask]
 60 |     prob = F.sigmoid(logit)
 61 |     intersect = target * prob
 62 |     union = target + prob - intersect
 63 |     loss = (1. - intersect / union).sum()
 64 |     return loss
 65 | 
 66 | def hingeloss(logits, label):
 67 |     mask = (label.view(-1) != 255)
 68 |     num_preds = mask.long().sum()
 69 |     if num_preds == 0:
 70 |         # only void pixels, the gradients should be 0
 71 |         return logits.sum() * 0.
 72 |     target = label.contiguous().view(-1)[mask]
 73 |     target = 2. * target.float() - 1.  # [target == 0] = -1
 74 |     logits = logits.contiguous().view(-1)[mask]
 75 |     hinge = 1./num_preds * F.relu(1. - logits * Variable(target)).sum()
 76 |     return hinge
 77 | 
 78 | def gamma_fast(gt, permutation):
 79 |     p = len(permutation)
 80 |     gt = gt.gather(0, permutation)
 81 |     gts = gt.sum()
 82 | 
 83 |     intersection = gts - gt.float().cumsum(0)
 84 |     union = gts + (1 - gt).float().cumsum(0)
 85 |     jaccard = 1. - intersection / union
 86 | 
 87 |     jaccard[1:p] = jaccard[1:p] - jaccard[0:-1]
 88 |     return jaccard
 89 | 
 90 | def lovaszloss(logits, labels, prox=False, max_steps=20, debug={}):
 91 |     # image-level Lovasz hinge
 92 |     if logits.size(0) == 1:
 93 |         # single image case
 94 |         loss = lovasz_single(logits.squeeze(0), labels.squeeze(0), prox, max_steps, debug)
 95 |     else:
 96 |         losses = []
 97 |         for logit, label in zip(logits, labels):
 98 |             loss = lovasz_single(logit, label, prox, max_steps, debug)
 99 |             losses.append(loss)
100 |         loss = sum(losses) / len(losses)
101 |     return loss
102 | 
103 | def naiveloss(logits, labels):
104 |     # image-level Lovasz hinge
105 |     if logits.size(0) == 1:
106 |         # single image case
107 |         loss = naive_single(logits.squeeze(0), labels.squeeze(0))
108 |     else:
109 |         losses = []
110 |         for logit, label in zip(logits, labels):
111 |             loss = naive_single(logit, label)
112 |             losses.append(loss)
113 |         loss = sum(losses) / len(losses)
114 |     return loss
115 | 
116 | def iouloss(pred, gt):
117 |     # works for one binary pred and associated target
118 |     # make byte tensors
119 |     pred = (pred == 1)
120 |     mask = (gt != 255)
121 |     gt = (gt == 1)
122 |     union = (gt | pred)[mask].long().sum()
123 |     if not union:
124 |         return 0.
125 |     else:
126 |         intersection = (gt & pred)[mask].long().sum()
127 |         return 1. - intersection / union
128 | 
129 | def compute_step_length(x, grad, active, eps=1e-6):
130 |     # compute next intersection with an edge in the direction grad
131 |     # OR next intersection with a 0 - border
132 |     # returns: delta in ind such that:
133 |     # after a step delta in the direction grad, x[ind] and x[ind+1] will be equal
134 |     delta = np.inf
135 |     ind = -1
136 |     if active > 0:
137 |         numerator = (x[:active] - x[1:active+1])           # always positive (because x is sorted)
138 |         denominator = (grad[:active] - grad[1:active+1])
139 |         # indices corresponding to negative denominator won't intersect
140 |         # also, we are not interested in indices in x that are *already equal*
141 |         valid = (denominator > eps) & (numerator > eps)
142 |         valid_indices = valid.nonzero()
143 |         intersection_times = numerator[valid] / denominator[valid]
144 |         if intersection_times.size():        
145 |             delta, ind = intersection_times.min(0)
146 |             ind = valid_indices[ind]
147 |             delta, ind = delta[0], ind[0, 0]
148 |     if grad[active] > 0:
149 |         intersect_zero = x[active] / grad[active]
150 |         if intersect_zero > 0. and intersect_zero < delta:
151 |             return intersect_zero, -1
152 |     return delta, ind
153 | 
154 | def project(gam, active, members):
155 |     tovisit = set(range(active + 1))
156 |     while tovisit:
157 |         v = tovisit.pop()
158 |         if len(members[v]) > 1:
159 |             avg = 0.
160 |             for k in members[v]:
161 |                 if k != v: tovisit.remove(k)
162 |                 avg += gam[k] / len(members[v])
163 |             for k in members[v]:
164 |                 gam[k] = avg
165 |     if active + 1 < len(gam):
166 |         gam[active + 1:] = 0.
167 | 
168 | def find_proximal(x0, gam, lam, eps=1e-6, max_steps=20, debug={}):
169 |     # x0: sorted margins data
170 |     # gam: initial gamma_fast(target, perm)
171 |     # regularisation parameter lam
172 |     x = x0.clone()
173 |     act = (x >= eps).nonzero()
174 |     finished = False
175 |     if not act.size():
176 |         finished = True
177 |     else:
178 |         active = act[-1, 0]
179 |         members = {i: {i} for i in range(active + 1)}
180 |         if active > 0:
181 |             equal = (x[:active] - x[1:active+1]) < eps
182 |             for i, e in enumerate(equal):
183 |                 if e:
184 |                     members[i].update(members[i + 1])
185 |                     members[i + 1] = members[i]
186 |             project(gam, active, members)
187 |     step = 0
188 |     while not finished and step < max_steps and active > -1:
189 |         step += 1
190 |         res = compute_step_length(x, gam, active, eps)
191 |         delta, ind = res
192 |         
193 |         if ind == -1:
194 |             active = active - len(members[active])
195 |         
196 |         stop = torch.dot(x - x0, gam) / torch.dot(gam, gam) + 1. / lam
197 |         if 0 <= stop < delta:
198 |             delta = stop
199 |             finished = True
200 |         
201 |         x = x - delta * gam
202 |         if not finished:
203 |             if ind >= 0:
204 |                 repr = min(members[ind])
205 |                 members[repr].update(members[ind + 1])
206 |                 for m in members[ind]:
207 |                     if m != repr:
208 |                         members[m] = members[repr]
209 |             project(gam, active, members)
210 |         if "path" in debug:
211 |             debug["path"].append(x.numpy())
212 | 
213 |     if "step" in debug:
214 |         debug["step"] = step
215 |     if "finished" in debug:
216 |         debug["finished"] = finished
217 |     return x, gam
218 | 
219 | 
220 | def lovasz_binary(margins, label, prox=False, max_steps=20, debug={}):
221 |     # 1d vector inputs
222 |     # Workaround: can't sort Variable bug
223 |     # prox: False or lambda regularization value
224 |     _, perm = torch.sort(margins.data, dim=0, descending=True)
225 |     margins_sorted = margins[perm]
226 |     grad = gamma_fast(label, perm)
227 |     loss = torch.dot(F.relu(margins_sorted), Variable(grad))
228 |     if prox is not False:
229 |         xp, gam = find_proximal(margins_sorted.data, grad, prox, max_steps=max_steps, eps=1e-6, debug=debug)
230 |         hook = margins_sorted.register_hook(lambda grad: Variable(margins_sorted.data - xp))
231 |         return loss, hook, gam
232 |     else:
233 |         return loss
234 | 
235 | 
236 | def lovasz_single(logit, label, prox=False, max_steps=20, debug={}):
237 |     # single images
238 |     mask = (label.view(-1) != 255)
239 |     num_preds = mask.long().sum()
240 |     if num_preds == 0:
241 |         # only void pixels, the gradients should be 0
242 |         return logits.sum() * 0.
243 |     target = label.contiguous().view(-1)[mask]
244 |     signs = 2. * target.float() - 1.
245 |     logit = logit.contiguous().view(-1)[mask]
246 |     margins = (1. - logit * Variable(signs))
247 |     loss = lovasz_binary(margins, target, prox, max_steps, debug=debug)
248 |     return loss
249 | 
250 | 
251 | 


--------------------------------------------------------------------------------
/settings.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function, division
  2 | import platform
  3 | import os
  4 | from os.path import join
  5 | from copy import deepcopy
  6 | import argparse
  7 | from datasets.utils import pascal_classes
  8 | 
  9 | # --- settings common to train and eval ---
 10 | defaults = argparse.Namespace()
 11 | defaults.OUTPUT_DIR = './weights'
 12 | 
 13 | # --- train settings ---
 14 | 
 15 | defaults_train = deepcopy(defaults)
 16 | defaults_train.BATCH_SIZE = 1
 17 | defaults_train.GRAD_UPDATE_EVERY = 10  # gradient accumulation
 18 | defaults_train.INPUT_SIZE = '321,321'
 19 | defaults_train.LEARNING_RATE = 5e-4
 20 | defaults_train.MOMENTUM = 0.9
 21 | defaults_train.NUM_STEPS = 1000
 22 | defaults_train.RANDOM_SEED = 1234
 23 | defaults_train.SAVE_NUM_IMAGES = 1
 24 | defaults_train.SAVE_PRED_EVERY = 2000
 25 | defaults_train.DO_VAL_EVERY = 300
 26 | 
 27 | #  --- eval settings ---
 28 | defaults_eval = deepcopy(defaults)
 29 | 
 30 | def get_arguments(argv, mode='eval'):
 31 |     """Parse all the arguments provided from the CLI.
 32 |     
 33 |     Returns:
 34 |       A list of parsed arguments.
 35 |     """
 36 |     classes = pascal_classes(with_void=False)
 37 |     inv_classes = pascal_classes(with_void=False, reverse=True)
 38 |     def pascal_cls(s):
 39 |         n_classes = len(classes)
 40 |         if s in classes:
 41 |             return classes[s]
 42 |         elif 0 <= int(s) < n_classes:
 43 |             return int(s)
 44 |         raise argparse.ArgumentTypeError('Pascal classes: {}'.format(classes))
 45 | 
 46 |     parser = argparse.ArgumentParser(description="Deeplab-resnet-multiscale")
 47 |     if mode == 'eval':
 48 |         defaults = defaults_eval
 49 |     elif mode == 'train':
 50 |         defaults = defaults_train
 51 |     parser.add_argument("--expname", type=str, required=True,
 52 |                         help="Name of the experiment.")
 53 |     parser.add_argument("--nickname", type=str, required=True,
 54 |                         help="Name given to the run (useful for output paths and logging).")
 55 |     parser.add_argument("--restore-from", type=str, required=True,
 56 |                         help="Where restore model parameters from.")
 57 |     parser.add_argument("--binary", type=pascal_cls, metavar="[0-20]", default=-1,
 58 |                         help="Binary classifier with specified class. (class name or id)")
 59 |     parser.add_argument("--sampling", type=str, choices=['sequential', 'shuffle', 'balanced', 'exclusive'],
 60 |                             default='shuffle', help="Trainset sampling (balanced applies to binary)")
 61 |     if mode == 'eval':
 62 |         parser.add_argument("--num-steps", type=int, default=defaults.NUM_STEPS,
 63 |                         help="Number of images in the validation set.")
 64 |     if mode == 'train':
 65 |         parser.add_argument("--threads", type=int, default=4,
 66 |                         help="Number of data fetcher threads")
 67 |         parser.add_argument("--epochs", action="store_true",
 68 |                             help="Count steps in epochs (1 step is 1 epoch)")
 69 |         parser.add_argument("--proximal", action="store_true",
 70 |                             help="Use proximal variant")
 71 |         parser.add_argument("--proxreg", type=float, default=0.5,
 72 |                             help="proximal parameter")
 73 |         parser.add_argument("--maxproxsteps", type=int, default=30,
 74 |                             help="maximal prox. computation steps")
 75 |         parser.add_argument("--no-startval", action="store_true",
 76 |                             help="Don't start with a validation run")
 77 |         parser.add_argument("--batch-size", type=int, default=defaults.BATCH_SIZE,
 78 |                             help="Number of images sent to the network in one step.")
 79 |         parser.add_argument("--grad-update-every", type=int, default=defaults.GRAD_UPDATE_EVERY,
 80 |                             help="Number of steps after which gradient update is applied.")
 81 |         parser.add_argument("--input-size", type=str, default=defaults.INPUT_SIZE,
 82 |                             help="Comma-separated string with height and width of images.")
 83 |         parser.add_argument("-lr", "--learning-rate", type=float, default=defaults.LEARNING_RATE,
 84 |                             help="Base learning rate for training with polynomial decay.")
 85 |         parser.add_argument("--momentum", type=float, default=defaults.MOMENTUM,
 86 |                             help="Momentum component of the optimiser.")
 87 |         parser.add_argument("--no-random-mirror", action="store_false",
 88 |                             help="No random mirror of the inputs during the training.")
 89 |         parser.add_argument("--no-random-scale", action="store_false",
 90 |                             help="No random scale of the inputs during the training.")
 91 |         parser.add_argument("--save-pred-every", type=int, default=defaults.SAVE_PRED_EVERY,
 92 |                             help="Save summaries and checkpoint every often.")
 93 |         parser.add_argument("--do-val-every", type=int, default=defaults.DO_VAL_EVERY,
 94 |                             help="Do validation every...")
 95 |         parser.add_argument("--jaccard", action="store_true",
 96 |                             help="Use lovasz hinge in the binary case.")
 97 |         parser.add_argument("--hinge", action="store_true",
 98 |                             help="Use hinge loss.")
 99 |         parser.add_argument("--num-steps", type=float, default=defaults.NUM_STEPS,
100 |                         help="Number of training steps.")
101 |         parser.add_argument("--start-step", type=int, default=0,
102 |                         help="Start from this training set.")
103 |         parser.add_argument("--train-last", type=int, default=-1,
104 |                         help="Train last .. layers (default -1 -> all).")
105 |         parser.add_argument("--schedule", action="store_true",
106 |                         help="Use learning rate schedule.")
107 |         parser.add_argument("--delete-previous", action="store_true",
108 |                         help="Delete previous logdir if exists.")
109 |     args = parser.parse_args(argv)
110 |     args.snapshot_dir = join(args.output_dir, args.expname)
111 |     if not os.path.exists(args.snapshot_dir):
112 |         os.makedirs(args.snapshot_dir)
113 |     if args.binary != -1:
114 |         args.binary_str = inv_classes[args.binary]
115 |         print('binary selected, class ' + args.binary_str)
116 |     if mode == 'train':
117 |         args.random_mirror = not args.no_random_mirror
118 |         args.random_scale = not args.no_random_scale
119 |     if args.sampling == 'exclusive':
120 |         if args.binary == -1:
121 |             parser.error('The --exclusive flag requires --binary set.')
122 |     if args.sampling == 'balanced':
123 |         if args.binary == -1:
124 |             parser.error('The --balanced flag requires --binary set.')
125 |     if args.jaccard:
126 |         if args.binary == -1:
127 |             parser.error('Jaccard loss requires --binary set.')
128 |     if args.hinge:
129 |         if args.binary == -1:
130 |             parser.error('Hinge loss requires --binary set.')
131 |         
132 |     return args


--------------------------------------------------------------------------------
/train_pytorch.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function, division
  2 | 
  3 | import argparse
  4 | from datetime import datetime
  5 | import os, sys
  6 | from os.path import join
  7 | import time
  8 | import re
  9 | import platform
 10 | 
 11 | import numpy as np
 12 | 
 13 | import torch
 14 | from torch import optim
 15 | from torch.autograd import Variable
 16 | import torch.utils.data as data
 17 | import torch.nn.functional as F
 18 | 
 19 | import random
 20 | # WARNING: if multiple worker threads, the seeds are useless (no warranty on the execution order)
 21 | random.seed(1857)
 22 | torch.manual_seed(1857)
 23 | torch.cuda.manual_seed(1857)
 24 | 
 25 | from settings import get_arguments
 26 | import datasets
 27 | from datasets.loadvoc import load_extended_voc
 28 | from compose import (JointCompose, RandomScale, Normalize,
 29 |                      RandomHorizontalFlip, RandomCropPad, PILtoTensor, Scale, TensortoPIL)
 30 | from PIL.Image import NEAREST
 31 | 
 32 | from losses import *
 33 | 
 34 | import deepdish as dd
 35 | import deeplab_resnet.model_pytorch as modelpy
 36 | from collections import defaultdict
 37 | import yaml
 38 | 
 39 | IGNORE_LABEL = 255
 40 | IMG_MEAN = np.array((104.00698793,116.66876762,122.67891434), dtype=np.float32)
 41 | 
 42 | def create_variables(weights, cuda=True):
 43 |     var = dict()
 44 |     for k, v in weights.items():
 45 |         v = torch.from_numpy(v)
 46 |         if cuda:
 47 |             v = v.cuda()
 48 |         if not (k.endswith('moving_mean') or k.endswith('moving_variance')):
 49 |             v = Variable(v)
 50 |         var[k] = v
 51 |     return var
 52 | 
 53 | def snapshot_variables(weights, dest):
 54 |     out = {}
 55 |     for (k, v) in weights.items():
 56 |         if isinstance(v, Variable):
 57 |             v = v.data
 58 |         out[k] = v.cpu().numpy()
 59 |     dd.io.save(dest, out)
 60 | 
 61 | def training_groups(weights, base_lr, multipliers=[0.1, 1.0, 1.0], train_last=-1):
 62 |     """
 63 |     get training groups and activates requires_grad for variables
 64 |     train_last: last: only train last ... layers
 65 |     hybrid: if hybrid, train all layers but set momentum to 0 on last layers
 66 |     """
 67 |     fixed = ['moving_mean', 'moving_variance', 'beta', 'gamma']
 68 |     # get training variables, with their lr
 69 |     trained = {k: v for (k, v) in weights.iteritems() if not any([k.endswith(s) for s in fixed])}
 70 |     for v in trained.values():
 71 |         v.requires_grad = True
 72 |     fc_vars = {k: v for (k, v) in trained.iteritems() if 'fc' in k}
 73 |     conv_vars = [v for (k, v) in trained.items() if 'fc' not in k] # lr * 1.0
 74 |     fc_w_vars = [v for (k, v) in fc_vars.items() if 'weights' in k] # lr * 10.0
 75 |     fc_b_vars = [v for (k, v) in fc_vars.items() if 'biases' in k]  # lr * 20.0
 76 |     assert(len(trained) == len(fc_vars) + len(conv_vars))
 77 |     assert(len(fc_vars) == len(fc_w_vars) + len(fc_b_vars))
 78 |     if train_last == -1:
 79 |         print("train all layers")
 80 |         groups = [{'params': conv_vars, 'lr': multipliers[0] * base_lr},
 81 |                   {'params': fc_w_vars, 'lr': multipliers[1] * base_lr},
 82 |                   {'params': fc_b_vars, 'lr': multipliers[2] * base_lr}]
 83 |     elif train_last == 1:
 84 |         print("train last layer")
 85 |         for v in conv_vars:
 86 |             v.requires_grad = False
 87 |         groups = [{'params': fc_w_vars, 'lr': multipliers[1] * base_lr},
 88 |                   {'params': fc_b_vars, 'lr': multipliers[2] * base_lr}]
 89 |     return groups
 90 | 
 91 | class SegsetWrap(data.Dataset):
 92 |     def __init__(self, segset, transform=None):
 93 |         self.name = segset.name
 94 |         self.segset = segset
 95 |         self.transform = transform
 96 |     def __repr__(self):
 97 |         return "<SegSetWrap '" + self.name + "'>"
 98 |     def __getitem__(self, i):
 99 |         inputs = self.segset.read(i, kind="PIL")
100 |         if self.transform is not None:
101 |             inputs = self.transform(inputs)
102 |         return inputs
103 |     def __len__(self):
104 |         return len(self.segset)
105 | 
106 | def main(args):
107 |     
108 |     print(os.path.basename(__file__), 'arguments:')
109 |     print(yaml.dump(vars(args), default_flow_style=False))
110 | 
111 |     weights = dd.io.load(args.restore_from)
112 |     print('Loaded weights from {}'.format(args.restore_from))
113 |     weights = create_variables(weights, cuda=True)
114 |     forward = lambda input: modelpy.DeepLabResNetModel({'data': input}, weights).layers['fc1_voc12']
115 |     train, val, test = load_extended_voc()
116 |     input_size = map(int, args.input_size.split(',')) if args.input_size is not None else None
117 |     print ('========')
118 | 
119 |     if args.proximal:
120 |         assert args.jaccard
121 | 
122 |     if args.binary == -1:
123 |         print("Multiclass: loss set to cross-entropy")
124 |         lossfn, lossname = crossentropyloss, 'xloss'
125 |     else:
126 |         if args.jaccard:
127 |             print("loss set to jaccard hinge")
128 |             lossfn, lossname = lovaszloss, 'lovaszloss'
129 |         elif args.hinge:
130 |             print("loss set to hinge loss")
131 |             lossfn, lossname = hingeloss, 'hingeloss'
132 |         else:
133 |             print("loss set to binary cross-entropy")
134 |             lossfn, lossname = binaryXloss, 'binxloss'
135 |         train, val = train.binarize(args.binary_str), val.binarize(args.binary_str)
136 | 
137 |     # get network output size
138 |     def get_size():
139 |         dummy_input = torch.rand((1, 3, input_size[0], input_size[1])).cuda()
140 |         dummy_out = forward(Variable(dummy_input, volatile=True))
141 |         output_size = (dummy_out.size(2), dummy_out.size(3))
142 |         return output_size
143 |     output_size = get_size()
144 | 
145 |     base_lr = args.learning_rate
146 |     groups = training_groups(weights, base_lr, train_last=args.train_last, hybrid=args.hybrid)
147 |     optimizer = optim.SGD(groups, lr=base_lr, momentum=args.momentum)
148 |     groups_lr = [group['lr'] for group in optimizer.param_groups]
149 | 
150 |     transforms_train = JointCompose([RandomScale(0.5, 1.5) if args.random_scale else None, 
151 |                                      RandomHorizontalFlip() if args.random_mirror else None,
152 |                                      RandomCropPad(input_size, (0, IGNORE_LABEL)),
153 |                                      [None, Scale((output_size[1], output_size[0]), NEAREST)],
154 |                                      PILtoTensor(), 
155 |                                      [Normalize(torch.from_numpy(IMG_MEAN)), None],
156 |                                     ])
157 |     transforms_val = JointCompose([PILtoTensor(), 
158 |                                    [Normalize(torch.from_numpy(IMG_MEAN)), None],
159 |                                   ])
160 |     invtransf_val = JointCompose([[Normalize(-torch.from_numpy(IMG_MEAN)), None],
161 |                                    TensortoPIL( datasets.utils.color_map() ), 
162 |                                  ])
163 | 
164 |     if args.sampling == 'sequential':
165 |         trainset = SegsetWrap(train, transforms_train)
166 |         sampler = data.sampler.SequentialSampler(trainset)
167 |     elif args.sampling == 'shuffle':
168 |         trainset = SegsetWrap(train, transforms_train)
169 |         sampler = data.sampler.RandomSampler(trainset)
170 |     elif args.sampling == 'balanced':
171 |         trainset = SegsetWrap(train, transforms_train)
172 |         positives = np.array([(args.binary_str in ex.classes) for ex in train])
173 |         sample_weights = np.zeros(len(positives))
174 |         sample_weights[positives] = 0.5 / positives.sum()
175 |         sample_weights[~positives] = 0.5 / (~positives).sum()
176 |         sampler = data.sampler.WeightedRandomSampler(sample_weights, len(train))
177 |         from datasets.balanced_val import balanced
178 |         inds = balanced[args.binary_str]
179 |         val.examples = [val[i] for i in inds]
180 |         print('Subsampled val. to balanced set of {:d} examples'.format(len(val)))
181 |     elif args.sampling == 'exclusive':
182 |         train, val = train[args.binary_str], val[args.binary_str]
183 |         trainset = SegsetWrap(train, transforms_train)
184 |         sampler = data.sampler.RandomSampler(trainset)
185 |         print('Subsampled train, val. to balanced set of {}, {} examples'.format(len(train), len(val)))
186 | 
187 |     update_every = args.grad_update_every
188 |     global_batch_size = args.batch_size * update_every
189 | 
190 |     trainloader = data.DataLoader(trainset, 
191 |                          batch_size=global_batch_size, 
192 |                          sampler=sampler, 
193 |                          num_workers=args.threads, 
194 |                          pin_memory=True)
195 | 
196 |     valset = SegsetWrap(val, transforms_val)
197 |     valloader = data.DataLoader(valset, 
198 |                          batch_size=1, 
199 |                          shuffle=False,
200 |                          num_workers=1, 
201 |                          pin_memory=True)
202 | 
203 |     step = args.start_step
204 |     finished = False
205 |     epoch = 0
206 | 
207 |     from tensorboard import SummaryWriter
208 |     logdir = join(args.expname + '_logs', args.nickname)
209 |     if os.path.exists(logdir):
210 |         if args.delete_previous:
211 |             var = 'y'
212 |         else:
213 |             var = raw_input(logdir + " already exists. Delete (y/n)? ")
214 |         if var == 'n':
215 |             raise ValueError(logdir + " already exists")
216 |         elif var == 'y':
217 |             import shutil
218 |             shutil.rmtree(logdir)
219 |     log_writer = SummaryWriter(logdir)
220 | #    train_writer = SummaryWriter(log_train)
221 | 
222 |     def snapshot():
223 |         dest = join(args.snapshot_dir, '{}-{}-{:02d}.h5'.format(args.expname, args.nickname, step))
224 |         snapshot_variables(weights, dest)
225 |         print("[{}] step {:d}: saved weights under {}".format(dt, step, dest))
226 | 
227 |     def do_val():
228 |         valiter = iter(valloader)
229 |         stats = defaultdict(list)
230 |         tosee = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] # export some outputs images of the validation set
231 |         for valstep, (inputs, labels) in enumerate(valiter):
232 |             start_time = time.time()
233 |             inputs, labels = Variable(inputs.cuda(), volatile=True), labels.cuda().long()
234 |             logits = forward(inputs)
235 |             logits = F.upsample_bilinear(logits, size=labels.size()[1:])
236 |             if args.binary == -1:
237 |                 xloss = crossentropyloss(logits, labels)
238 |                 stats['xloss'].append(xloss.data[0])
239 |                 print('[Validation {}-{:d}], xloss {:.5f} - mean {:.5f}  ({:.3f} sec/step {})'.format(
240 |                          step, valstep, xloss, np.mean(stats['xloss']), time.time() - start_time))
241 |                 # conf, pred = logits.max(1)
242 |             else:
243 |                 conf, multipred = logits.max(1)
244 |                 multipred = multipred.squeeze(1)
245 |                 multipred = (multipred == args.binary).long()
246 |                 imageiou_multi = iouloss(multipred.data.squeeze(0), labels.squeeze(0))
247 |                 stats['imageiou_multi'].append(imageiou_multi)
248 | 
249 |                 logits = logits[:, args.binary, :, :]   # select only 1 output
250 |                 pred = (logits > 0.).long()
251 | 
252 |                 # image output
253 |                 if valstep in tosee:
254 |                     inputim, inputlab = invtransf_val([inputs.data[0, :, :, :], labels[0, :, :]])
255 |                     _, predim = invtransf_val([inputs.data[0, :, :, :], pred.data[0, :, :]])
256 |                     log_writer.add_image(str(valstep)+'im', np.array(inputim.convert("RGB")))
257 |                     log_writer.add_image(str(valstep)+'lab', np.array(inputlab.convert("RGB")))
258 |                     log_writer.add_image(str(valstep)+'pred', np.array(predim.convert("RGB")))
259 | 
260 |                 imageiou = iouloss(pred.data.squeeze(0), labels.squeeze(0))
261 |                 stats['imageiou'].append(imageiou)
262 |                 hloss = hingeloss(logits, labels).data[0]
263 |                 stats['hingeloss'].append(hloss)
264 |                 jloss = lovaszloss(logits, labels).data[0]
265 |                 stats['lovaszloss'].append(jloss)
266 |                 binxloss = binaryXloss(logits, labels).data[0]
267 |                 stats['binxloss'].append(binxloss)
268 | 
269 |                 print(   '[Validation {}-{:d}], '.format(step, valstep)
270 |                        + 'hloss {:.5f} - mean {:.5f}, '.format(hloss, np.mean(stats['hingeloss']))
271 |                        + 'lovaszloss {:.5f} - mean {:.5f}, '.format(jloss, np.mean(stats['lovaszloss']))
272 |                        + 'iou {:.5f} - mean {:.5f}, '.format(imageiou, np.mean(stats['imageiou']))
273 |                        + 'iou_multi {:.5f} - mean {:.5f}, '.format(imageiou_multi, np.mean(stats['imageiou_multi']))
274 |                        + '({:.3f} sec/step)'.format(time.time() - start_time)
275 |                      )
276 |         for key in stats:
277 |             log_writer.add_scalar(key + '_val', np.mean(stats[key]), step)
278 | 
279 |     if not args.no_startval:
280 |         do_val()
281 | 
282 |     num_steps = args.num_steps
283 |     if args.epochs:
284 |         num_steps *= len(trainloader)
285 |     num_steps = int(num_steps)
286 |     if args.new_schedule:
287 |         half_step = num_steps // 2
288 | 
289 |     while not finished: # new epoch
290 |         trainiter = iter(trainloader)
291 |         def train_step():
292 |             if args.new_schedule and step == half_step:
293 |                 print("==== HALF STEP ====")
294 |                 for group, group_base in zip(optimizer.param_groups, groups_lr):
295 |                     if ('fix_lr' not in group) or not group['fix_lr']:
296 |                         group['lr'] = group_base / 5
297 | 
298 |             inputs, labels = next(trainiter)
299 |             inputs, labels = Variable(inputs.cuda()), labels.cuda().long()
300 |             chunk_inp = torch.split(inputs, args.batch_size, dim=0)
301 |             chunk_lab = torch.split(labels, args.batch_size, dim=0)
302 |             optimizer.zero_grad()
303 |             lossacc = 0.
304 |             # Start gradient accumulation
305 |             for inp, lab in zip(chunk_inp, chunk_lab):
306 |                 logits = forward(inp)
307 |                 if args.binary != -1:
308 |                     logits = logits[:, args.binary, :, :]   # select only 1 output
309 |                 if args.proximal:
310 |                     debug = {"step": -1, "finished": False}
311 |                     proxreg = args.proxreg
312 |                     if args.power_prox > 0:
313 |                         proxreg = proxreg / (1. - step/(num_steps + 0.1)) ** args.power_prox
314 |                     if args.new_schedule:
315 |                         if step >= half_step:
316 |                             proxreg *= 5.
317 |                     loss, hook, gam = lossfn(logits, lab, prox=proxreg, max_steps=args.maxproxsteps, debug=debug)
318 |                     print(str(debug["step"]) + ('' if debug["finished"] else 'E'), end=' ')
319 |                 else:
320 |                     loss = lossfn(logits, lab)
321 |                 loss.backward( torch.Tensor([1. / len(chunk_inp)]).cuda() ) # rescale gradient
322 |                 if args.proximal:
323 |                     hook.remove() # remove hook to free memory
324 |                 lossacc += loss.data[0] / len(chunk_inp)
325 |             optimizer.step()
326 |             return lossacc
327 | 
328 |         for substep in range(len(trainloader)):
329 |             start_time = time.time()
330 |             step += 1
331 |             if step > num_steps:
332 |                 finished = True
333 |                 break
334 |             lossacc = train_step()
335 |             
336 |             duration = time.time() - start_time
337 |             (dt, micro) = datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S.%f').split('.')
338 |             dt = "%s.%03d" % (dt, int(micro) / 1000)
339 |             print('[{}] step {:d} \t loss = {:.5f} ({:.3f} sec/step, epoch {})'.format(
340 |                          dt, step, lossacc, duration, epoch))
341 | 
342 |             log_writer.add_scalar(lossname, lossacc, step)
343 | 
344 |             if step % args.save_pred_every == 0:
345 |                 snapshot()
346 |             if step % args.do_val_every == 0:
347 |                 do_val()
348 | 
349 |         epoch += 1
350 |     # end of main: save weights and do val
351 |     snapshot()
352 |     do_val()
353 | 
354 | 
355 | 
356 | if __name__ == '__main__':
357 |     args = get_arguments(sys.argv[1:], 'train')
358 |     main(args)
359 | 


--------------------------------------------------------------------------------