├── README.md ├── ckpt_to_dd.py ├── compose.py ├── convert.py ├── datasets ├── __init__.py ├── balanced_val.py ├── common.py ├── loadvoc.py └── utils.py ├── deeplab_resnet ├── __init__.py ├── model.py ├── model_pytorch.py └── network_pytorch.py ├── environment.yml ├── eval_pytorch.py ├── kaffe ├── __init__.py ├── caffe │ ├── __init__.py │ ├── caffepb.py │ └── resolver.py ├── errors.py ├── graph.py ├── layers.py ├── shapes.py ├── tensorflow │ ├── __init__.py │ ├── network.py │ └── transformer.py └── transformers.py ├── losses.py ├── settings.py └── train_pytorch.py /README.md: -------------------------------------------------------------------------------- 1 | # Deeplab-resnet-101 Pytorch with Lovász hinge loss 2 | 3 | Train deeplab-resnet-101 with binary Jaccard loss surrogate, the Lovász hinge, as described in [http://arxiv.org/abs/1705.08790](http://arxiv.org/abs/1705.08790). 4 | 5 | Parts of the code is adapted from [tensorflow-deeplab-resnet](https://github.com/DrSleep/tensorflow-deeplab-resnet) (in particular the conversion from caffe to tensorflow with kaffe). 6 | 7 | The code has not been tested for full training of Deeplab-Resnet yet. Refer to [tensorflow-deeplab-resnet](https://github.com/DrSleep/tensorflow-deeplab-resnet) and possibly extract the weights after training with that framework. 8 | 9 | ## Code status 10 | The code is in early stage. Pull requests welcome. 11 | 12 | ## Citation 13 | Please cite 14 | ``` 15 | @ARTICLE{2017arXiv170508790B, 16 | author = {{Berman}, M. and {Blaschko}, M.~B.}, 17 | title = "{Optimization of the Jaccard index for image segmentation with the Lov\'asz hinge}", 18 | journal = {ArXiv e-prints}, 19 | archivePrefix = "arXiv", 20 | eprint = {1705.08790}, 21 | primaryClass = "cs.CV", 22 | keywords = {Computer Science - Computer Vision and Pattern Recognition}, 23 | year = 2017, 24 | month = may, 25 | adsurl = {http://adsabs.harvard.edu/abs/2017arXiv170508790B}, 26 | } 27 | ``` 28 | if you use the code. 29 | 30 | ## Dependencies and weights 31 | Relies notably on [Pytorch](http://pytorch.org/) and the standalone [tensorboard](https://github.com/dmlc/tensorboard/tree/master/python) package 32 | 33 | Using anaconda, install the full requirements using the provided conda environment file: 34 | ``` 35 | conda env create --f environemnt.yml 36 | source activate jaccard-segment 37 | ``` 38 | 39 | Convert the Deeplab Caffe weights to tensorflow ckpt using [caffe-tensorflow](https://github.com/ethereon/caffe-tensorflow), then convert them to hdf5 using `ckpt_to_dd.py` and use our wrapper to load in Pytorch. 40 | 41 | ## Important switches in the settings 42 | By default, finetunes with cross-entropy loss. Use --binary `class` switch for selecting a particular class in the binary case, `--jaccard` for training with the Jaccard hinge loss described in the arxiv paper, `--hinge` to use the Hinge loss, and `--proximal` to use the prox. operator optimization variant for the Jaccard loss as described in the arxiv paper. 43 | 44 | For the prox. operator, use a learning rate of `1.` and set an equivalent regularization of `1/lr` instead. 45 | -------------------------------------------------------------------------------- /ckpt_to_dd.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import deepdish as dd 3 | import argparse 4 | import os 5 | import numpy as np 6 | 7 | def tr(v): 8 | # tensorflow weights to pytorch weights 9 | if v.ndim == 4: 10 | return np.ascontiguousarray(v.transpose(3,2,0,1)) 11 | elif v.ndim == 2: 12 | return np.ascontiguousarray(v.transpose()) 13 | return v 14 | 15 | def read_ckpt(ckpt): 16 | # https://github.com/tensorflow/tensorflow/issues/1823 17 | reader = tf.train.NewCheckpointReader(ckpt) 18 | weights = {n: reader.get_tensor(n) for (n, _) in reader.get_variable_to_shape_map().iteritems()} 19 | pyweights = {k: tr(v) for (k, v) in weights.items()} 20 | return pyweights 21 | 22 | if __name__ == '__main__': 23 | parser = argparse.ArgumentParser(description="Converts ckpt weights to deepdish hdf5") 24 | parser.add_argument("infile", type=str, 25 | help="Path to the ckpt.") 26 | parser.add_argument("outfile", type=str, nargs='?', default='', 27 | help="Output file (inferred if missing).") 28 | args = parser.parse_args() 29 | if args.outfile == '': 30 | args.outfile = os.path.splitext(args.infile)[0] + '.h5' 31 | outdir = os.path.dirname(args.outfile) 32 | if not os.path.exists(outdir): 33 | os.makedirs(outdir) 34 | weights = read_ckpt(args.infile) 35 | dd.io.save(args.outfile, weights) 36 | weights2 = dd.io.load(args.outfile) 37 | 38 | -------------------------------------------------------------------------------- /compose.py: -------------------------------------------------------------------------------- 1 | """ 2 | https://github.com/pytorch/vision/issues/9 3 | 4 | joint transforms for input and target 5 | applies to sequences of images 6 | 7 | transform = JointCompose([ 8 | ElasticTransform(), 9 | RandomRotate(), 10 | [CenterCropNumpy(size=input_shape), CenterCropNumpy(size=target_shape)], 11 | [NormalizeNumpy(), None], 12 | [Lambda(to_tensor), Lambda(to_tensor)] 13 | ]) 14 | 15 | """ 16 | from __future__ import division, print_function 17 | import math 18 | import random 19 | from PIL import Image, ImageOps 20 | import numpy as np 21 | import numbers 22 | import types 23 | import collections 24 | import torch 25 | 26 | class JointCompose(object): 27 | """Composes several transforms together, support separate transformations for multiple input. 28 | """ 29 | 30 | def __init__(self, transforms): 31 | self.transforms = transforms 32 | 33 | def __call__(self, img): 34 | for t in self.transforms: 35 | if isinstance(t, collections.Sequence): 36 | assert isinstance(img, collections.Sequence) and len(img) == len(t), "size of image group and transform group does not fit" 37 | tmp_ = [] 38 | for i, im_ in enumerate(img): 39 | if callable(t[i]): 40 | tmp_.append(t[i](im_)) 41 | else: 42 | tmp_.append(im_) 43 | img = tmp_ 44 | elif callable(t): 45 | img = t(img) 46 | elif t is None: 47 | continue 48 | else: 49 | raise Exception('unexpected type') 50 | return img 51 | 52 | class RandomScale(object): 53 | """Random resize the given PIL.Image(s) 54 | low: ratio of minimum size to original size 55 | high: ratio of maximum size to original size 56 | interpolation(s): interpolations used. 57 | IF auto, uses NEAREST neighbour for second input 58 | """ 59 | 60 | def __init__(self, low, high, interpolations='auto'): 61 | self.low = low 62 | self.high = high 63 | self.interpolations = interpolations 64 | 65 | def __call__(self, images): 66 | single = False 67 | if not isinstance(images, collections.Sequence): 68 | images = [images] 69 | single = True 70 | interps = self.interpolations 71 | if interps == 'auto': 72 | interps = Image.BILINEAR 73 | if len(images) == 2: 74 | interps = [Image.BILINEAR, Image.NEAREST] 75 | if not isinstance(interps, collections.Sequence): 76 | interps = [interps] * len(images) 77 | resized = [] 78 | ratio = random.uniform(self.low, self.high) 79 | for img, interp in zip(images, interps): 80 | h, w = img.size[0], img.size[1] 81 | h2, w2 = (int(ratio * h), int(ratio * w)) 82 | img2 = img.resize((h2, w2), interp) 83 | resized.append(img2) 84 | if single: 85 | resized = resized[0] 86 | return resized 87 | 88 | class Scale(object): 89 | # MONOCHANNEL FOR NOW # fixme 90 | """Rescales the input PIL.Image to the given 'size'. 91 | If 'size' is a 2-element tuple or list in the order of (width, height), it will be the exactly size to scale. 92 | If 'size' is a number, it will indicate the size of the smaller edge. 93 | For example, if height > width, then image will be 94 | rescaled to (size * height / width, size) 95 | size: size of the exactly size or the smaller edge 96 | interpolation: Default: PIL.Image.BILINEAR 97 | """ 98 | 99 | def __init__(self, size, interpolation=Image.BILINEAR): 100 | assert isinstance(size, int) or (isinstance(size, collections.Iterable) and len(size) == 2) 101 | self.size = size 102 | self.interpolation = interpolation 103 | 104 | def __call__(self, img): 105 | if isinstance(self.size, int): 106 | w, h = img.size 107 | if (w <= h and w == self.size) or (h <= w and h == self.size): 108 | return img 109 | if w < h: 110 | ow = self.size 111 | oh = int(self.size * h / w) 112 | return img.resize((ow, oh), self.interpolation) 113 | else: 114 | oh = self.size 115 | ow = int(self.size * w / h) 116 | return img.resize((ow, oh), self.interpolation) 117 | else: 118 | return img.resize(self.size, self.interpolation) 119 | 120 | 121 | class RandomHorizontalFlip(object): 122 | """Randomly horizontally flips the given PIL.Image with a probability of 0.5 123 | """ 124 | 125 | def __call__(self, images): 126 | if random.random() < 0.5: 127 | single = False 128 | if not isinstance(images, collections.Sequence): 129 | images = [images] 130 | single = True 131 | images = [img.transpose(Image.FLIP_LEFT_RIGHT) for img in images] 132 | if single: 133 | images = images[0] 134 | return images 135 | 136 | 137 | def ensuretuple(inp, n=2): 138 | # duplicate value n times if needed 139 | if not isinstance(inp, collections.Sequence): 140 | inp = (inp,) * n 141 | assert len(inp) == n, "Expected input of size " + str(n) 142 | return inp 143 | 144 | def pad_to_target(img, target_height, target_width, label=0): 145 | # Pad image with zeros to the specified height and width if needed 146 | # This op does nothing if the image already has size bigger than target_height and target_width. 147 | w, h = img.size 148 | left = top = right = bottom = 0 149 | doit = False 150 | if target_width > w: 151 | delta = target_width - w 152 | left = delta // 2 153 | right = delta - left 154 | doit = True 155 | if target_height > h: 156 | delta = target_height - h 157 | top = delta // 2 158 | bottom = delta - top 159 | doit = True 160 | if doit: 161 | img = ImageOps.expand(img, border=(left, top, right, bottom), fill=label) 162 | assert img.size[0] >= target_width 163 | assert img.size[1] >= target_height 164 | return img 165 | 166 | 167 | class RandomCropPad(object): 168 | """Crops the given PIL.Image at a random location to have a region of 169 | the given size. size can be a tuple (target_height, target_width) 170 | or an integer, in which case the target will be of a square shape (size, size) 171 | pad with pad_label if needed. auto -> (0/255) 172 | """ 173 | 174 | def __init__(self, size, pad_label='auto'): 175 | self.target_height, self.target_width = ensuretuple(size) 176 | self.pad_label = pad_label 177 | 178 | def __call__(self, images): 179 | th, tw = self.target_height, self.target_width 180 | single = False 181 | if not isinstance(images, collections.Sequence): 182 | images = [images] 183 | single = True 184 | pad_label = self.pad_label 185 | if pad_label == 'auto': 186 | pad_label = 0 187 | if len(images) == 2: 188 | pad_label = [0, 255] 189 | returns = [] 190 | for image, pad in zip(images, pad_label): 191 | image = pad_to_target(image, th, tw, pad) 192 | returns.append(image) 193 | w, h = returns[0].size 194 | for ret in returns[1:]: 195 | assert (w, h) == ret.size, "all inputs images must have same size" 196 | if w == tw and h == th: 197 | return returns 198 | 199 | x1 = random.randint(0, w - tw) 200 | y1 = random.randint(0, h - th) 201 | return [ret.crop((x1, y1, x1 + tw, y1 + th)) for ret in returns] 202 | 203 | class Normalize(object): 204 | """Given mean: (R, G, B) and std: (R, G, B), 205 | will normalize each channel of the torch.*Tensor, i.e. 206 | channel = (channel - mean) / std 207 | std is optional 208 | """ 209 | 210 | def __init__(self, mean, std=None): 211 | self.mean = mean 212 | self.std = std 213 | 214 | def __call__(self, tensor): 215 | if self.std is None: 216 | for t, m in zip(tensor, self.mean): 217 | t.sub_(m) 218 | else: 219 | for t, m, s in zip(tensor, self.mean, self.std): 220 | t.sub_(m).div_(s) 221 | return tensor 222 | 223 | class PILtoTensor(object): 224 | """ puts channels in front and convert to float, except if mode palette 225 | """ 226 | 227 | def __init__(self): 228 | pass 229 | 230 | def __call__(self, inputs): 231 | single = False 232 | if not isinstance(inputs, collections.Sequence): 233 | inputs = [inputs] 234 | single = True 235 | res = [] 236 | for im in inputs: 237 | if im.mode == 'P': 238 | dest = torch.from_numpy( np.array(im) ) 239 | res.append( dest ) 240 | else: 241 | dest = torch.from_numpy( np.array(im).transpose(2, 0, 1) ) 242 | res.append( dest.float() ) 243 | if single: 244 | res = res[0] 245 | return res 246 | 247 | class TensortoPIL(object): 248 | """ Tensors to arrays 249 | With flat arrays: label with palette 250 | with 3d arrays: image, put first channel in the end 251 | """ 252 | 253 | def __init__(self, color_map=None): 254 | self.color_map = color_map 255 | 256 | def __call__(self, inputs): 257 | single = False 258 | if not isinstance(inputs, collections.Sequence): 259 | inputs = [inputs] 260 | single = True 261 | res = [] 262 | for tens in inputs: 263 | dest = tens.cpu().numpy() 264 | if dest.ndim == 3: 265 | dest = dest.transpose(1, 2, 0).astype(np.uint8) 266 | dest = Image.fromarray(dest) 267 | elif dest.ndim == 2: 268 | dest = dest.astype(np.uint8) 269 | dest = Image.fromarray(dest, "P") 270 | if self.color_map is not None: 271 | cmap = [k for l in self.color_map for k in l] 272 | dest.putpalette(cmap) 273 | res.append(dest) 274 | if single: 275 | res = res[0] 276 | return res 277 | -------------------------------------------------------------------------------- /convert.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # This script belongs to https://github.com/ethereon/caffe-tensorflow 4 | import os 5 | import sys 6 | import numpy as np 7 | import argparse 8 | from kaffe import KaffeError, print_stderr 9 | from kaffe.tensorflow import TensorFlowTransformer 10 | 11 | 12 | def fatal_error(msg): 13 | print_stderr(msg) 14 | exit(-1) 15 | 16 | 17 | def validate_arguments(args): 18 | if (args.data_output_path is not None) and (args.caffemodel is None): 19 | fatal_error('No input data path provided.') 20 | if (args.caffemodel is not None) and (args.data_output_path is None): 21 | fatal_error('No output data path provided.') 22 | if (args.code_output_path is None) and (args.data_output_path is None): 23 | fatal_error('No output path specified.') 24 | 25 | 26 | def convert(def_path, caffemodel_path, data_output_path, code_output_path, phase): 27 | try: 28 | transformer = TensorFlowTransformer(def_path, caffemodel_path, phase=phase) 29 | print_stderr('Converting data...') 30 | if caffemodel_path is not None: 31 | data = transformer.transform_data() 32 | print_stderr('Saving data...') 33 | with open(data_output_path, 'wb') as data_out: 34 | np.save(data_out, data) 35 | if code_output_path: 36 | print_stderr('Saving source...') 37 | with open(code_output_path, 'wb') as src_out: 38 | src_out.write(transformer.transform_source()) 39 | print_stderr('Done.') 40 | except KaffeError as err: 41 | fatal_error('Error encountered: {}'.format(err)) 42 | 43 | 44 | def main(): 45 | parser = argparse.ArgumentParser() 46 | parser.add_argument('def_path', help='Model definition (.prototxt) path') 47 | parser.add_argument('--caffemodel', help='Model data (.caffemodel) path') 48 | parser.add_argument('--data-output-path', help='Converted data output path') 49 | parser.add_argument('--code-output-path', help='Save generated source to this path') 50 | parser.add_argument('-p', 51 | '--phase', 52 | default='test', 53 | help='The phase to convert: test (default) or train') 54 | args = parser.parse_args() 55 | validate_arguments(args) 56 | convert(args.def_path, args.caffemodel, args.data_output_path, args.code_output_path, 57 | args.phase) 58 | 59 | 60 | if __name__ == '__main__': 61 | main() 62 | -------------------------------------------------------------------------------- /datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # from loadvoc import load_extended_voc#, load_voc 2 | # from common import Example, SegSet, BinarizedSegSet 3 | # from utils import array_to_segmentation 4 | 5 | 6 | -------------------------------------------------------------------------------- /datasets/balanced_val.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | from random import Random 3 | from datasets.loadvoc import load_extended_voc 4 | 5 | random = Random(1234) 6 | 7 | train, val, test = load_extended_voc() 8 | 9 | # create balanced binary datasets for experimenting 10 | 11 | balanced = {} 12 | 13 | for c in sorted(train.classes, key=train.classes.get): 14 | if c != 'void': 15 | pos = [] 16 | neg = [] 17 | for i, ex in enumerate(val): 18 | if c in ex.classes: 19 | pos.append(i) 20 | else: 21 | neg.append(i) 22 | random.shuffle(neg) 23 | balanced[c] = pos + neg[:len(pos)] 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /datasets/common.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | from scipy.misc import imread 3 | from PIL import Image 4 | import numpy as np 5 | import os 6 | 7 | class SegSet(object): 8 | # Collection of segmentation Examples 9 | def __init__(self, name, examples, imagespath, classes, maskspath=None): 10 | self.name = name 11 | self.imagespath = imagespath 12 | self.maskspath = maskspath 13 | self.examples = examples 14 | self.classes = classes 15 | super(SegSet, self).__init__() 16 | def __len__(self): 17 | return len(self.examples) 18 | def __getitem__(self, key): 19 | if isinstance( key, slice ) : 20 | if key.start == key.stop == key.step == None: 21 | token = "" 22 | else: 23 | token = "[" 24 | if key.start != None: token += str(key.start) 25 | token += ":" 26 | if key.stop != None: token += str(key.stop) 27 | if key.step != None: token += ":" + str(key.step) 28 | token += "]" 29 | return SegSet(self.name + token, self.examples[key], self.imagespath, self.classes, self.maskspath) 30 | if isinstance( key, list ) : 31 | # list of indices 32 | token = "[]" 33 | return SegSet(self.name + token, 34 | [self.examples[i] for i in key], 35 | self.imagespath, self.classes, self.maskspath) 36 | elif isinstance( key, int ) : 37 | return self.examples[key] 38 | elif isinstance( key, str ): # select a category 39 | token = "[" + key + "]" 40 | if key[0] == '~': # select complementary 41 | key = key[1:] 42 | selected = [ex for ex in self.examples if key not in ex.classes] 43 | else: 44 | selected = [ex for ex in self.examples if key in ex.classes] 45 | return SegSet(self.name + token, selected, self.imagespath, self.classes, self.maskspath) 46 | else: 47 | raise TypeError, "Invalid argument type." 48 | def __repr__(self): 49 | return ("<{}: collection of {} examples>".format( 50 | self.name, 51 | len(self.examples)) 52 | ) 53 | def __add__(self, other): 54 | assert self.imagespath == other.imagespath 55 | if self.maskspath and other.maskspath: 56 | assert self.maskspath == other.maskspath 57 | maskspath = self.maskspath if self.maskspath else other.maskspath 58 | return SegSet(self.name + "+" + other.name, 59 | self.examples + other.examples, 60 | self.imagespath, 61 | self.classes, 62 | maskspath) 63 | def impath(self, example): 64 | example = self.examples[example] if isinstance(example, int) else example 65 | return os.path.join(self.imagespath, example.name + ".jpg") 66 | def maskpath(self, example): 67 | example = self.examples[example] if isinstance(example, int) else example 68 | return os.path.join(self.maskspath, example.name + ".png") 69 | def imread(self, example, kind="scipy"): 70 | ipath = self.impath(example) 71 | if kind == "scipy": 72 | return imread(ipath) 73 | im = Image.open(ipath) 74 | if kind == "PIL": 75 | return im 76 | if kind == "array": 77 | return np.array(im) 78 | def binarize(self, cls): 79 | token = ".binarize({})".format(cls) 80 | binarizedset = BinarizedSegSet(self.name + token, self.examples, 81 | self.imagespath, self.classes, cls, self.maskspath) 82 | return binarizedset 83 | def maskread(self, example, kind="array"): 84 | mpath = self.maskpath(example) 85 | im = Image.open(mpath) 86 | if kind == "PIL": 87 | return im 88 | elif kind == "array": 89 | return np.array(im) 90 | else: 91 | raise NotImplementedError("Unknown return kind {}".format(kind)) 92 | # return imread(mpath) 93 | def read(self, example, kind="array"): 94 | return self.imread(example, kind), self.maskread(example, kind) 95 | 96 | 97 | class BinarizedSegSet(SegSet): 98 | def __init__(self, name, examples, imagespath, classes, target, maskspath=None): 99 | self.target = target 100 | super(BinarizedSegSet, self).__init__(name, examples, imagespath, classes, maskspath) 101 | def maskread(self, example, kind="array", withvoid=True): 102 | example = self.examples[example] if isinstance(example, int) else example 103 | mpath = self.maskpath(example) 104 | im = Image.open(mpath) 105 | if self.target in example.classes: 106 | target_idx = self.classes[self.target] 107 | arr = np.array(im) 108 | mask = arr == target_idx 109 | voidmask = arr == self.classes['void'] 110 | arr[mask] = 1 111 | arr[~mask] = 0 112 | arr[voidmask] = self.classes['void'] 113 | else: 114 | # return 0 labels 115 | arr = np.array(im) 116 | arr.fill(0) 117 | if kind == "array": 118 | return arr 119 | elif kind == "PIL": 120 | im = Image.fromarray(arr, "P") 121 | im.putpalette([0, 0, 0, 255, 255, 255] 122 | + [255, 255, 178] * 253 123 | + [255, 178, 253]) 124 | return im 125 | else: 126 | raise NotImplementedError("Unknown return kind {}".format(kind)) 127 | def binarize(self, cls): 128 | raise NotImplementedError("Already binarized to", self.target) 129 | def __add__(self, other): 130 | assert self.imagespath == other.imagespath 131 | if self.maskspath and other.maskspath: 132 | assert self.maskspath == other.maskspath 133 | maskspath = self.maskspath if self.maskspath else other.maskspath 134 | assert self.target == other.target 135 | return BinarizedSegSet(self.name + "+" + other.name, 136 | self.examples + other.examples, 137 | self.imagespath, 138 | self.classes, 139 | self.target, 140 | maskspath) 141 | 142 | 143 | 144 | class Example(object): 145 | def __init__(self, name, source, classes=[]): 146 | self.name = name 147 | self.source = source 148 | self.classes = classes 149 | def __repr__(self): 150 | return ("".format(self.name)) 151 | 152 | -------------------------------------------------------------------------------- /datasets/loadvoc.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Maxim Berman, bermanmaxim@gmail.com 3 | # Load Pascal VOC + Berkeley extended annotations datasets 4 | 5 | from __future__ import absolute_import, division, print_function 6 | import os, sys 7 | import scipy.io 8 | from .common import Example, SegSet 9 | if 'ipykernel' in sys.modules: 10 | from tqdm import tqdm_notebook as tqdm 11 | else: 12 | from tqdm import tqdm 13 | join = os.path.join 14 | import warnings 15 | from skimage.io import imread, imsave 16 | from .utils import mat2png_hariharan, convert_from_color_segmentation, pascal_classes, color_map 17 | import numpy as np 18 | from shutil import copyfile 19 | from PIL import Image 20 | import platform 21 | 22 | VOC_DIR = './VOCdevkit/VOC2012' 23 | EXTENDED_DIR = './VOCdevkit/VOC2012/berkeley' 24 | 25 | VOCVER = 'voc12' 26 | CACHE_FUSE = join(VOC_DIR, 'FusedSets') # where to store the fused image lists 27 | # 28 | MASKS_DIR = join(VOC_DIR, 'SegmentationClassPalette') 29 | pascal_classes = pascal_classes() 30 | pascal_classes_inv = {v: k for k, v in pascal_classes.items()} 31 | 32 | def load_extended_voc(voc_dir=VOC_DIR, extended_dir=EXTENDED_DIR, 33 | masks_dir=MASKS_DIR, cache_fuse=CACHE_FUSE, vocver=VOCVER): 34 | """ 35 | Fuse VOC and Berkeley annotations 36 | Convert annotations to common 21-class + void palette png format 37 | Copy labels in same folder MASKS_DIR 38 | Returns train/val/test lists and classes (classes present on each image) 39 | Caches results in cache_fuse 40 | """ 41 | 42 | trainaugf = join(cache_fuse, 'trainaug.txt') 43 | valaugf = join(cache_fuse, 'valaug.txt') 44 | testaugf = join(cache_fuse, 'testaug.txt') 45 | dirsf = join(cache_fuse, 'dirs.txt') 46 | infof = join(cache_fuse, 'info.txt') 47 | if (os.path.exists(cache_fuse) 48 | and os.path.isfile(trainaugf) and os.path.isfile(valaugf) 49 | and os.path.isfile(testaugf) and os.path.isfile(dirsf) 50 | and os.path.isfile(infof)): 51 | # load from cache if files exist 52 | train = [l.strip() for l in open(trainaugf)] 53 | val = [l.strip() for l in open(valaugf)] 54 | test = [l.strip() for l in open(testaugf)] 55 | [vocjpg, masks_dir] = [l.strip() for l in open(dirsf)] 56 | source = {} 57 | classes = {} 58 | with open(infof) as f: 59 | next(f) # skip header line 60 | for line in f: 61 | m = line.split() 62 | source[m[0]] = m[1] 63 | classes[m[0]] = m[2:] 64 | print("Loaded dataset from cache " + cache_fuse) 65 | else: # no cache, do the computations 66 | # fuse image lists 67 | vocsets = join(voc_dir, 'ImageSets', 'Segmentation') 68 | augmented_root = join(extended_dir, 'benchmark_RELEASE', 'dataset') 69 | 70 | voctrainF = join(vocsets, 'train.txt') 71 | vocvalF = join(vocsets, 'val.txt') 72 | voctestF = join(vocsets, 'test.txt') 73 | 74 | augtrainF = join(augmented_root, 'train.txt') 75 | augvalF = join(augmented_root, 'val.txt') 76 | 77 | voctrain = [l.strip() for l in open(voctrainF)] 78 | val = [l.strip() for l in open(vocvalF)] 79 | test = [l.strip() for l in open(voctestF)] 80 | augtrain = [l.strip() for l in open(augtrainF)] 81 | augval = [l.strip() for l in open(augvalF)] 82 | 83 | source = {} 84 | for im in augtrain + augval: 85 | source[im] = 'aug' 86 | for im in voctrain + val + test: 87 | source[im] = str(vocver) 88 | 89 | train = sorted(set(augtrain + voctrain + augval) - set(val) - set(test)) 90 | print("Loaded image sets, {} train / {} val / {} test" 91 | .format(len(train), len(val), len(test))) 92 | 93 | # convert to common format 94 | vocjpg = join(voc_dir, 'JPEGImages') 95 | vocseg = join(voc_dir, 'SegmentationClass') 96 | augseg = join(augmented_root, 'cls') 97 | 98 | if not os.path.exists(masks_dir): 99 | os.makedirs(masks_dir) 100 | 101 | print('Converting masks to common format...') 102 | classes = {} 103 | # just copy voc labels and scan classes 104 | for im in tqdm([im for im in train + val if source[im] != 'aug'], 105 | desc='VOC: copy labels...'): 106 | srcf = join(vocseg, im+'.png') 107 | copyfile(srcf, join(masks_dir, im + '.png')) 108 | array = np.array(Image.open(srcf)) 109 | clsuniques = np.unique(array) 110 | classes[im] = [pascal_classes_inv[k] for k in clsuniques] 111 | # src = imread(srcf) 112 | # img = convert_from_color_segmentation(src, use_void=True) 113 | # clsuniques = np.unique(img) 114 | # classes[im] = [pascal_classes_inv[k] for k in clsuniques] 115 | # with warnings.catch_warnings(): 116 | # warnings.simplefilter("ignore") 117 | # imsave(join(masks_dir, im + '.png'), img) 118 | 119 | # MAT labels to png 120 | cmap = color_map(255) 121 | flat_cmap = [i for l in cmap for i in l] 122 | for im in tqdm([im for im in train + val if source[im] == 'aug'], 123 | desc='AUG: MAT to 1D PNG...'): 124 | srcf = join(augseg, im+'.mat') 125 | img = mat2png_hariharan(srcf) 126 | clsuniques = np.unique(img) 127 | classes[im] = [pascal_classes_inv[k] for k in clsuniques] 128 | newimg = Image.fromarray(img, mode="P") 129 | newimg.putpalette(flat_cmap) 130 | newimg.save(join(masks_dir, im + '.png')) 131 | # with warnings.catch_warnings(): 132 | # warnings.simplefilter("ignore") 133 | # imsave(join(masks_dir, im + '.png'), img) 134 | 135 | # memoize everything 136 | if not os.path.exists(cache_fuse): 137 | os.makedirs(cache_fuse) 138 | open(join(cache_fuse, trainaugf), 'w').write("\n".join(train)) 139 | open(join(cache_fuse, valaugf), 'w').write("\n".join(val)) 140 | open(join(cache_fuse, testaugf), 'w').write("\n".join(test)) 141 | dirs = [vocjpg, masks_dir] 142 | open(join(cache_fuse, dirsf), 'w').write("\n".join(dirs)) 143 | with open(infof, 'w') as f: 144 | f.write('\t'.join(['name', 'source', 'classes...']) + '\n') 145 | for im in train + val: 146 | f.write(im + '\t' + source[im] + '\t' + '\t'.join(classes[im]) + '\n') 147 | print("Saved cache in " + cache_fuse) 148 | 149 | train = SegSet('AugVocTrain', 150 | [Example(im, source[im], classes[im]) for im in train], 151 | vocjpg, 152 | pascal_classes, 153 | masks_dir, 154 | ) 155 | val = SegSet('AugVocVal', 156 | [Example(im, source[im], classes[im]) for im in val], 157 | vocjpg, 158 | pascal_classes, 159 | masks_dir, 160 | ) 161 | test = SegSet('AugVocTest', 162 | [Example(im, vocver) for im in test], 163 | vocjpg, 164 | pascal_classes, 165 | ) 166 | return train, val, test 167 | 168 | 169 | if __name__ == '__main__': 170 | train, val, test = load_extended_voc() 171 | 172 | 173 | -------------------------------------------------------------------------------- /datasets/utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Martin Kersner, m.kersner@gmail.com 3 | # 2016/03/11 4 | #** Maxim Berman ** modified from https://github.com/martinkersner/train-DeepLab/ 5 | 6 | import scipy.io 7 | import struct 8 | import numpy as np 9 | from PIL import Image 10 | 11 | import sys 12 | 13 | if 'ipykernel' in sys.modules: 14 | from tqdm import tqdm_notebook as tqdm 15 | else: 16 | from tqdm import tqdm 17 | from concurrent.futures import ProcessPoolExecutor, as_completed 18 | 19 | def pascal_classes(with_background=True, with_void=True, reverse=False): 20 | classes = {'aeroplane' : 1, 'bicycle' : 2, 'bird' : 3, 'boat' : 4, 21 | 'bottle' : 5, 'bus' : 6, 'car' : 7, 'cat' : 8, 22 | 'chair' : 9, 'cow' : 10, 'diningtable' : 11, 'dog' : 12, 23 | 'horse' : 13, 'motorbike' : 14, 'person' : 15, 'potted-plant' : 16, 24 | 'sheep' : 17, 'sofa' : 18, 'train' : 19, 'tv/monitor' : 20} 25 | if with_background: classes['background'] = 0 26 | if with_void: classes['void'] = 255 27 | if reverse: 28 | return {v: k for k, v in classes.iteritems()} 29 | return classes 30 | 31 | def pascal_palette(void=False): 32 | palette = {( 0, 0, 0) : 0 , 33 | (128, 0, 0) : 1 , 34 | ( 0, 128, 0) : 2 , 35 | (128, 128, 0) : 3 , 36 | ( 0, 0, 128) : 4 , 37 | (128, 0, 128) : 5 , 38 | ( 0, 128, 128) : 6 , 39 | (128, 128, 128) : 7 , 40 | ( 64, 0, 0) : 8 , 41 | (192, 0, 0) : 9 , 42 | ( 64, 128, 0) : 10, 43 | (192, 128, 0) : 11, 44 | ( 64, 0, 128) : 12, 45 | (192, 0, 128) : 13, 46 | ( 64, 128, 128) : 14, 47 | (192, 128, 128) : 15, 48 | ( 0, 64, 0) : 16, 49 | (128, 64, 0) : 17, 50 | ( 0, 192, 0) : 18, 51 | (128, 192, 0) : 19, 52 | ( 0, 64, 128) : 20 } 53 | if void: 54 | palette[( 224, 224, 192)] = 255 55 | 56 | return palette 57 | 58 | def array_to_segmentation(array): 59 | array = array.astype(np.uint8) 60 | lab = Image.fromarray(array, "P") 61 | cmap = [k for l in color_map() for k in l] 62 | lab.putpalette(cmap) 63 | return lab 64 | 65 | def pascal_palette_invert(): 66 | palette_list = pascal_palette().keys() 67 | palette = () 68 | 69 | for color in palette_list: 70 | palette += color 71 | 72 | return palette 73 | 74 | def color_map(N=256, normalized=False): 75 | def bitget(byteval, idx): 76 | return ((byteval & (1 << idx)) != 0) 77 | 78 | dtype = 'float32' if normalized else 'uint8' 79 | cmap = np.zeros((N, 3), dtype=dtype) 80 | for i in range(N): 81 | r = g = b = 0 82 | c = i 83 | for j in range(8): 84 | r = r | (bitget(c, 0) << 7-j) 85 | g = g | (bitget(c, 1) << 7-j) 86 | b = b | (bitget(c, 2) << 7-j) 87 | c = c >> 3 88 | 89 | cmap[i] = np.array([r, g, b]) 90 | 91 | cmap = cmap/255 if normalized else cmap 92 | return cmap 93 | 94 | def pascal_mean_values(): 95 | return np.array([103.939, 116.779, 123.68], dtype=np.float32) 96 | 97 | def strstr(str1, str2): 98 | if str1.find(str2) != -1: 99 | return True 100 | else: 101 | return False 102 | 103 | # Mat to png conversion for http://www.cs.berkeley.edu/~bharath2/codes/SBD/download.html 104 | # 'GTcls' key is for class segmentation 105 | # 'GTinst' key is for instance segmentation 106 | def mat2png_hariharan(mat_file, key='GTcls'): 107 | mat = scipy.io.loadmat(mat_file, mat_dtype=True, squeeze_me=True, struct_as_record=False) 108 | return mat[key].Segmentation 109 | 110 | def convert_segmentation_mat2numpy(mat_file): 111 | np_segm = load_mat(mat_file) 112 | return np.rot90(np.fliplr(np.argmax(np_segm, axis=2))) 113 | 114 | def load_mat(mat_file, key='data'): 115 | mat = scipy.io.loadmat(mat_file, mat_dtype=True, squeeze_me=True, struct_as_record=False) 116 | return mat[key] 117 | 118 | # Python version of script in code/densecrf/my_script/LoadBinFile.m 119 | def load_binary_segmentation(bin_file, dtype='int16'): 120 | with open(bin_file, 'rb') as bf: 121 | rows = struct.unpack('i', bf.read(4))[0] 122 | cols = struct.unpack('i', bf.read(4))[0] 123 | channels = struct.unpack('i', bf.read(4))[0] 124 | 125 | num_values = rows * cols # expect only one channel in segmentation output 126 | out = np.zeros(num_values, dtype=np.uint8) # expect only values between 0 and 255 127 | 128 | for i in range(num_values): 129 | out[i] = np.uint8(struct.unpack('h', bf.read(2))[0]) 130 | 131 | return np.rot90(np.fliplr(out.reshape((cols, rows)))) 132 | 133 | def convert_from_color_segmentation(arr_3d, use_void=False): 134 | arr_2d = np.zeros((arr_3d.shape[0], arr_3d.shape[1]), dtype=np.uint8) 135 | palette = pascal_palette(use_void) 136 | 137 | for c, i in palette.items(): 138 | m = np.all(arr_3d == np.array(c).reshape(1, 1, 3), axis=2) 139 | arr_2d[m] = i 140 | 141 | return arr_2d 142 | 143 | def create_lut(class_ids, max_id=256): 144 | # Index 0 is the first index used in caffe for denoting labels. 145 | # Therefore, index 0 is considered as default. 146 | lut = np.zeros(max_id, dtype=np.uint8) 147 | 148 | new_index = 1 149 | for i in class_ids: 150 | lut[i] = new_index 151 | new_index += 1 152 | 153 | return lut 154 | 155 | def get_id_classes(classes): 156 | all_classes = pascal_classes() 157 | id_classes = [all_classes[c] for c in classes] 158 | return id_classes 159 | 160 | def parallel_process(array, function, n_jobs=8, use_kwargs=False, front_num=3): 161 | """ 162 | A parallel version of the map function with a progress bar. 163 | 164 | Args: 165 | array (array-like): An array to iterate over. 166 | function (function): A python function to apply to the elements of array 167 | n_jobs (int, default=16): The number of cores to use 168 | use_kwargs (boolean, default=False): Whether to consider the elements of array as dictionaries of 169 | keyword arguments to function 170 | front_num (int, default=3): The number of iterations to run serially before kicking off the parallel job. 171 | Useful for catching bugs 172 | Returns: 173 | [function(array[0]), function(array[1]), ...] 174 | """ 175 | #We run the first few iterations serially to catch bugs 176 | if front_num > 0: 177 | front = [function(**a) if use_kwargs else function(a) for a in array[:front_num]] 178 | #If we set n_jobs to 1, just run a list comprehension. This is useful for benchmarking and debugging. 179 | if n_jobs==1: 180 | return front + [function(**a) if use_kwargs else function(a) for a in tqdm(array[front_num:])] 181 | #Assemble the workers 182 | with ProcessPoolExecutor(max_workers=n_jobs) as pool: 183 | #Pass the elements of array into function 184 | if use_kwargs: 185 | futures = [pool.submit(function, **a) for a in array[front_num:]] 186 | else: 187 | futures = [pool.submit(function, a) for a in array[front_num:]] 188 | kwargs = { 189 | 'total': len(futures), 190 | 'unit': 'it', 191 | 'unit_scale': True, 192 | 'leave': True, 193 | 'smoothing': 0.1, 194 | } 195 | #Print out the progress as tasks complete 196 | for f in tqdm(as_completed(futures), **kwargs): 197 | pass 198 | out = [] 199 | #Get the results from the futures. 200 | for i, future in tqdm(enumerate(futures)): 201 | try: 202 | out.append(future.result()) 203 | except Exception as e: 204 | out.append(e) 205 | return front + out -------------------------------------------------------------------------------- /deeplab_resnet/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bermanmaxim/jaccardSegment/d6cb4036805911a7cff80b6ab8eab7b4e54f3a7a/deeplab_resnet/__init__.py -------------------------------------------------------------------------------- /deeplab_resnet/model.py: -------------------------------------------------------------------------------- 1 | # Converted to TensorFlow .caffemodel 2 | # with the DeepLab-ResNet configuration. 3 | # The batch normalisation layer is provided by 4 | # the slim library (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/slim). 5 | 6 | from kaffe.tensorflow import Network 7 | import tensorflow as tf 8 | 9 | class DeepLabResNetModel(Network): 10 | def setup(self, is_training): 11 | '''Network definition. 12 | 13 | Args: 14 | is_training: whether to update the running mean and variance of the batch normalisation layer. 15 | If the batch size is small, it is better to keep the running mean and variance of 16 | the-pretrained model frozen. 17 | ''' 18 | (self.feed('data') 19 | .conv(7, 7, 64, 2, 2, biased=False, relu=False, name='conv1') 20 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn_conv1') 21 | .max_pool(3, 3, 2, 2, name='pool1') 22 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res2a_branch1') 23 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn2a_branch1')) 24 | 25 | (self.feed('pool1') 26 | .conv(1, 1, 64, 1, 1, biased=False, relu=False, name='res2a_branch2a') 27 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn2a_branch2a') 28 | .conv(3, 3, 64, 1, 1, biased=False, relu=False, name='res2a_branch2b') 29 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn2a_branch2b') 30 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res2a_branch2c') 31 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn2a_branch2c')) 32 | 33 | (self.feed('bn2a_branch1', 34 | 'bn2a_branch2c') 35 | .add(name='res2a') 36 | .relu(name='res2a_relu') 37 | .conv(1, 1, 64, 1, 1, biased=False, relu=False, name='res2b_branch2a') 38 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn2b_branch2a') 39 | .conv(3, 3, 64, 1, 1, biased=False, relu=False, name='res2b_branch2b') 40 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn2b_branch2b') 41 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res2b_branch2c') 42 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn2b_branch2c')) 43 | 44 | (self.feed('res2a_relu', 45 | 'bn2b_branch2c') 46 | .add(name='res2b') 47 | .relu(name='res2b_relu') 48 | .conv(1, 1, 64, 1, 1, biased=False, relu=False, name='res2c_branch2a') 49 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn2c_branch2a') 50 | .conv(3, 3, 64, 1, 1, biased=False, relu=False, name='res2c_branch2b') 51 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn2c_branch2b') 52 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res2c_branch2c') 53 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn2c_branch2c')) 54 | 55 | (self.feed('res2b_relu', 56 | 'bn2c_branch2c') 57 | .add(name='res2c') 58 | .relu(name='res2c_relu') 59 | .conv(1, 1, 512, 2, 2, biased=False, relu=False, name='res3a_branch1') 60 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn3a_branch1')) 61 | 62 | (self.feed('res2c_relu') 63 | .conv(1, 1, 128, 2, 2, biased=False, relu=False, name='res3a_branch2a') 64 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn3a_branch2a') 65 | .conv(3, 3, 128, 1, 1, biased=False, relu=False, name='res3a_branch2b') 66 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn3a_branch2b') 67 | .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res3a_branch2c') 68 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn3a_branch2c')) 69 | 70 | (self.feed('bn3a_branch1', 71 | 'bn3a_branch2c') 72 | .add(name='res3a') 73 | .relu(name='res3a_relu') 74 | .conv(1, 1, 128, 1, 1, biased=False, relu=False, name='res3b1_branch2a') 75 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn3b1_branch2a') 76 | .conv(3, 3, 128, 1, 1, biased=False, relu=False, name='res3b1_branch2b') 77 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn3b1_branch2b') 78 | .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res3b1_branch2c') 79 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn3b1_branch2c')) 80 | 81 | (self.feed('res3a_relu', 82 | 'bn3b1_branch2c') 83 | .add(name='res3b1') 84 | .relu(name='res3b1_relu') 85 | .conv(1, 1, 128, 1, 1, biased=False, relu=False, name='res3b2_branch2a') 86 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn3b2_branch2a') 87 | .conv(3, 3, 128, 1, 1, biased=False, relu=False, name='res3b2_branch2b') 88 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn3b2_branch2b') 89 | .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res3b2_branch2c') 90 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn3b2_branch2c')) 91 | 92 | (self.feed('res3b1_relu', 93 | 'bn3b2_branch2c') 94 | .add(name='res3b2') 95 | .relu(name='res3b2_relu') 96 | .conv(1, 1, 128, 1, 1, biased=False, relu=False, name='res3b3_branch2a') 97 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn3b3_branch2a') 98 | .conv(3, 3, 128, 1, 1, biased=False, relu=False, name='res3b3_branch2b') 99 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn3b3_branch2b') 100 | .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res3b3_branch2c') 101 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn3b3_branch2c')) 102 | 103 | (self.feed('res3b2_relu', 104 | 'bn3b3_branch2c') 105 | .add(name='res3b3') 106 | .relu(name='res3b3_relu') 107 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4a_branch1') 108 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4a_branch1')) 109 | 110 | (self.feed('res3b3_relu') 111 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4a_branch2a') 112 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4a_branch2a') 113 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4a_branch2b') 114 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4a_branch2b') 115 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4a_branch2c') 116 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4a_branch2c')) 117 | 118 | (self.feed('bn4a_branch1', 119 | 'bn4a_branch2c') 120 | .add(name='res4a') 121 | .relu(name='res4a_relu') 122 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b1_branch2a') 123 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b1_branch2a') 124 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b1_branch2b') 125 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b1_branch2b') 126 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b1_branch2c') 127 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b1_branch2c')) 128 | 129 | (self.feed('res4a_relu', 130 | 'bn4b1_branch2c') 131 | .add(name='res4b1') 132 | .relu(name='res4b1_relu') 133 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b2_branch2a') 134 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b2_branch2a') 135 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b2_branch2b') 136 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b2_branch2b') 137 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b2_branch2c') 138 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b2_branch2c')) 139 | 140 | (self.feed('res4b1_relu', 141 | 'bn4b2_branch2c') 142 | .add(name='res4b2') 143 | .relu(name='res4b2_relu') 144 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b3_branch2a') 145 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b3_branch2a') 146 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b3_branch2b') 147 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b3_branch2b') 148 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b3_branch2c') 149 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b3_branch2c')) 150 | 151 | (self.feed('res4b2_relu', 152 | 'bn4b3_branch2c') 153 | .add(name='res4b3') 154 | .relu(name='res4b3_relu') 155 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b4_branch2a') 156 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b4_branch2a') 157 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b4_branch2b') 158 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b4_branch2b') 159 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b4_branch2c') 160 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b4_branch2c')) 161 | 162 | (self.feed('res4b3_relu', 163 | 'bn4b4_branch2c') 164 | .add(name='res4b4') 165 | .relu(name='res4b4_relu') 166 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b5_branch2a') 167 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b5_branch2a') 168 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b5_branch2b') 169 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b5_branch2b') 170 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b5_branch2c') 171 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b5_branch2c')) 172 | 173 | (self.feed('res4b4_relu', 174 | 'bn4b5_branch2c') 175 | .add(name='res4b5') 176 | .relu(name='res4b5_relu') 177 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b6_branch2a') 178 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b6_branch2a') 179 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b6_branch2b') 180 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b6_branch2b') 181 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b6_branch2c') 182 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b6_branch2c')) 183 | 184 | (self.feed('res4b5_relu', 185 | 'bn4b6_branch2c') 186 | .add(name='res4b6') 187 | .relu(name='res4b6_relu') 188 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b7_branch2a') 189 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b7_branch2a') 190 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b7_branch2b') 191 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b7_branch2b') 192 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b7_branch2c') 193 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b7_branch2c')) 194 | 195 | (self.feed('res4b6_relu', 196 | 'bn4b7_branch2c') 197 | .add(name='res4b7') 198 | .relu(name='res4b7_relu') 199 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b8_branch2a') 200 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b8_branch2a') 201 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b8_branch2b') 202 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b8_branch2b') 203 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b8_branch2c') 204 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b8_branch2c')) 205 | 206 | (self.feed('res4b7_relu', 207 | 'bn4b8_branch2c') 208 | .add(name='res4b8') 209 | .relu(name='res4b8_relu') 210 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b9_branch2a') 211 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b9_branch2a') 212 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b9_branch2b') 213 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b9_branch2b') 214 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b9_branch2c') 215 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b9_branch2c')) 216 | 217 | (self.feed('res4b8_relu', 218 | 'bn4b9_branch2c') 219 | .add(name='res4b9') 220 | .relu(name='res4b9_relu') 221 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b10_branch2a') 222 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b10_branch2a') 223 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b10_branch2b') 224 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b10_branch2b') 225 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b10_branch2c') 226 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b10_branch2c')) 227 | 228 | (self.feed('res4b9_relu', 229 | 'bn4b10_branch2c') 230 | .add(name='res4b10') 231 | .relu(name='res4b10_relu') 232 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b11_branch2a') 233 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b11_branch2a') 234 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b11_branch2b') 235 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b11_branch2b') 236 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b11_branch2c') 237 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b11_branch2c')) 238 | 239 | (self.feed('res4b10_relu', 240 | 'bn4b11_branch2c') 241 | .add(name='res4b11') 242 | .relu(name='res4b11_relu') 243 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b12_branch2a') 244 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b12_branch2a') 245 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b12_branch2b') 246 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b12_branch2b') 247 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b12_branch2c') 248 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b12_branch2c')) 249 | 250 | (self.feed('res4b11_relu', 251 | 'bn4b12_branch2c') 252 | .add(name='res4b12') 253 | .relu(name='res4b12_relu') 254 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b13_branch2a') 255 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b13_branch2a') 256 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b13_branch2b') 257 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b13_branch2b') 258 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b13_branch2c') 259 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b13_branch2c')) 260 | 261 | (self.feed('res4b12_relu', 262 | 'bn4b13_branch2c') 263 | .add(name='res4b13') 264 | .relu(name='res4b13_relu') 265 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b14_branch2a') 266 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b14_branch2a') 267 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b14_branch2b') 268 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b14_branch2b') 269 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b14_branch2c') 270 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b14_branch2c')) 271 | 272 | (self.feed('res4b13_relu', 273 | 'bn4b14_branch2c') 274 | .add(name='res4b14') 275 | .relu(name='res4b14_relu') 276 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b15_branch2a') 277 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b15_branch2a') 278 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b15_branch2b') 279 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b15_branch2b') 280 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b15_branch2c') 281 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b15_branch2c')) 282 | 283 | (self.feed('res4b14_relu', 284 | 'bn4b15_branch2c') 285 | .add(name='res4b15') 286 | .relu(name='res4b15_relu') 287 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b16_branch2a') 288 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b16_branch2a') 289 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b16_branch2b') 290 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b16_branch2b') 291 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b16_branch2c') 292 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b16_branch2c')) 293 | 294 | (self.feed('res4b15_relu', 295 | 'bn4b16_branch2c') 296 | .add(name='res4b16') 297 | .relu(name='res4b16_relu') 298 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b17_branch2a') 299 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b17_branch2a') 300 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b17_branch2b') 301 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b17_branch2b') 302 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b17_branch2c') 303 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b17_branch2c')) 304 | 305 | (self.feed('res4b16_relu', 306 | 'bn4b17_branch2c') 307 | .add(name='res4b17') 308 | .relu(name='res4b17_relu') 309 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b18_branch2a') 310 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b18_branch2a') 311 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b18_branch2b') 312 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b18_branch2b') 313 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b18_branch2c') 314 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b18_branch2c')) 315 | 316 | (self.feed('res4b17_relu', 317 | 'bn4b18_branch2c') 318 | .add(name='res4b18') 319 | .relu(name='res4b18_relu') 320 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b19_branch2a') 321 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b19_branch2a') 322 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b19_branch2b') 323 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b19_branch2b') 324 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b19_branch2c') 325 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b19_branch2c')) 326 | 327 | (self.feed('res4b18_relu', 328 | 'bn4b19_branch2c') 329 | .add(name='res4b19') 330 | .relu(name='res4b19_relu') 331 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b20_branch2a') 332 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b20_branch2a') 333 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b20_branch2b') 334 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b20_branch2b') 335 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b20_branch2c') 336 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b20_branch2c')) 337 | 338 | (self.feed('res4b19_relu', 339 | 'bn4b20_branch2c') 340 | .add(name='res4b20') 341 | .relu(name='res4b20_relu') 342 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b21_branch2a') 343 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b21_branch2a') 344 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b21_branch2b') 345 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b21_branch2b') 346 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b21_branch2c') 347 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b21_branch2c')) 348 | 349 | (self.feed('res4b20_relu', 350 | 'bn4b21_branch2c') 351 | .add(name='res4b21') 352 | .relu(name='res4b21_relu') 353 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b22_branch2a') 354 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b22_branch2a') 355 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b22_branch2b') 356 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn4b22_branch2b') 357 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b22_branch2c') 358 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b22_branch2c')) 359 | 360 | (self.feed('res4b21_relu', 361 | 'bn4b22_branch2c') 362 | .add(name='res4b22') 363 | .relu(name='res4b22_relu') 364 | .conv(1, 1, 2048, 1, 1, biased=False, relu=False, name='res5a_branch1') 365 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn5a_branch1')) 366 | 367 | (self.feed('res4b22_relu') 368 | .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res5a_branch2a') 369 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn5a_branch2a') 370 | .atrous_conv(3, 3, 512, 4, padding='SAME', biased=False, relu=False, name='res5a_branch2b') 371 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn5a_branch2b') 372 | .conv(1, 1, 2048, 1, 1, biased=False, relu=False, name='res5a_branch2c') 373 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn5a_branch2c')) 374 | 375 | (self.feed('bn5a_branch1', 376 | 'bn5a_branch2c') 377 | .add(name='res5a') 378 | .relu(name='res5a_relu') 379 | .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res5b_branch2a') 380 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn5b_branch2a') 381 | .atrous_conv(3, 3, 512, 4, padding='SAME', biased=False, relu=False, name='res5b_branch2b') 382 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn5b_branch2b') 383 | .conv(1, 1, 2048, 1, 1, biased=False, relu=False, name='res5b_branch2c') 384 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn5b_branch2c')) 385 | 386 | (self.feed('res5a_relu', 387 | 'bn5b_branch2c') 388 | .add(name='res5b') 389 | .relu(name='res5b_relu') 390 | .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res5c_branch2a') 391 | .batch_normalization(is_training=is_training, activation_fn=tf.nn.relu, name='bn5c_branch2a') 392 | .atrous_conv(3, 3, 512, 4, padding='SAME', biased=False, relu=False, name='res5c_branch2b') 393 | .batch_normalization(activation_fn=tf.nn.relu, name='bn5c_branch2b', is_training=is_training) 394 | .conv(1, 1, 2048, 1, 1, biased=False, relu=False, name='res5c_branch2c') 395 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn5c_branch2c')) 396 | 397 | (self.feed('res5b_relu', 398 | 'bn5c_branch2c') 399 | .add(name='res5c') 400 | .relu(name='res5c_relu') 401 | .atrous_conv(3, 3, 21, 6, padding='SAME', relu=False, name='fc1_voc12_c0')) 402 | 403 | (self.feed('res5c_relu') 404 | .atrous_conv(3, 3, 21, 12, padding='SAME', relu=False, name='fc1_voc12_c1')) 405 | 406 | (self.feed('res5c_relu') 407 | .atrous_conv(3, 3, 21, 18, padding='SAME', relu=False, name='fc1_voc12_c2')) 408 | 409 | (self.feed('res5c_relu') 410 | .atrous_conv(3, 3, 21, 24, padding='SAME', relu=False, name='fc1_voc12_c3')) 411 | 412 | (self.feed('fc1_voc12_c0', 413 | 'fc1_voc12_c1', 414 | 'fc1_voc12_c2', 415 | 'fc1_voc12_c3') 416 | .add(name='fc1_voc12')) 417 | -------------------------------------------------------------------------------- /deeplab_resnet/model_pytorch.py: -------------------------------------------------------------------------------- 1 | # Converted by kaffe to tensorflow 2 | # removed tf references 3 | # for loading in pytorch. 4 | 5 | # from kaffe.tensorflow import Network 6 | 7 | from network_pytorch import Network 8 | 9 | class DeepLabResNetModel(Network): 10 | def setup(self, is_training): 11 | '''Network definition. 12 | 13 | Args: 14 | is_training: whether to update the running mean and variance of the batch normalisation layer. 15 | If the batch size is small, it is better to keep the running mean and variance of 16 | the-pretrained model frozen. 17 | ''' 18 | (self.feed('data') 19 | .conv(7, 7, 64, 2, 2, biased=False, relu=False, name='conv1') 20 | .batch_normalization(is_training=is_training, activation_fn='relu', name='bn_conv1') 21 | .max_pool(3, 3, 2, 2, name='pool1') 22 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res2a_branch1') 23 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn2a_branch1')) 24 | 25 | (self.feed('pool1') 26 | .conv(1, 1, 64, 1, 1, biased=False, relu=False, name='res2a_branch2a') 27 | .batch_normalization(is_training=is_training, activation_fn='relu', name='bn2a_branch2a') 28 | .conv(3, 3, 64, 1, 1, biased=False, relu=False, name='res2a_branch2b') 29 | .batch_normalization(is_training=is_training, activation_fn='relu', name='bn2a_branch2b') 30 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res2a_branch2c') 31 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn2a_branch2c')) 32 | 33 | (self.feed('bn2a_branch1', 34 | 'bn2a_branch2c') 35 | .add(name='res2a') 36 | .relu(name='res2a_relu') 37 | .conv(1, 1, 64, 1, 1, biased=False, relu=False, name='res2b_branch2a') 38 | .batch_normalization(is_training=is_training, activation_fn='relu', name='bn2b_branch2a') 39 | .conv(3, 3, 64, 1, 1, biased=False, relu=False, name='res2b_branch2b') 40 | .batch_normalization(is_training=is_training, activation_fn='relu', name='bn2b_branch2b') 41 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res2b_branch2c') 42 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn2b_branch2c')) 43 | 44 | (self.feed('res2a_relu', 45 | 'bn2b_branch2c') 46 | .add(name='res2b') 47 | .relu(name='res2b_relu') 48 | .conv(1, 1, 64, 1, 1, biased=False, relu=False, name='res2c_branch2a') 49 | .batch_normalization(is_training=is_training, activation_fn='relu', name='bn2c_branch2a') 50 | .conv(3, 3, 64, 1, 1, biased=False, relu=False, name='res2c_branch2b') 51 | .batch_normalization(is_training=is_training, activation_fn='relu', name='bn2c_branch2b') 52 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res2c_branch2c') 53 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn2c_branch2c')) 54 | 55 | (self.feed('res2b_relu', 56 | 'bn2c_branch2c') 57 | .add(name='res2c') 58 | .relu(name='res2c_relu') 59 | .conv(1, 1, 512, 2, 2, biased=False, relu=False, name='res3a_branch1') 60 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn3a_branch1')) 61 | 62 | (self.feed('res2c_relu') 63 | .conv(1, 1, 128, 2, 2, biased=False, relu=False, name='res3a_branch2a') 64 | .batch_normalization(is_training=is_training, activation_fn='relu', name='bn3a_branch2a') 65 | .conv(3, 3, 128, 1, 1, biased=False, relu=False, name='res3a_branch2b') 66 | .batch_normalization(is_training=is_training, activation_fn='relu', name='bn3a_branch2b') 67 | .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res3a_branch2c') 68 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn3a_branch2c')) 69 | 70 | (self.feed('bn3a_branch1', 71 | 'bn3a_branch2c') 72 | .add(name='res3a') 73 | .relu(name='res3a_relu') 74 | .conv(1, 1, 128, 1, 1, biased=False, relu=False, name='res3b1_branch2a') 75 | .batch_normalization(is_training=is_training, activation_fn='relu', name='bn3b1_branch2a') 76 | .conv(3, 3, 128, 1, 1, biased=False, relu=False, name='res3b1_branch2b') 77 | .batch_normalization(is_training=is_training, activation_fn='relu', name='bn3b1_branch2b') 78 | .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res3b1_branch2c') 79 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn3b1_branch2c')) 80 | 81 | (self.feed('res3a_relu', 82 | 'bn3b1_branch2c') 83 | .add(name='res3b1') 84 | .relu(name='res3b1_relu') 85 | .conv(1, 1, 128, 1, 1, biased=False, relu=False, name='res3b2_branch2a') 86 | .batch_normalization(is_training=is_training, activation_fn='relu', name='bn3b2_branch2a') 87 | .conv(3, 3, 128, 1, 1, biased=False, relu=False, name='res3b2_branch2b') 88 | .batch_normalization(is_training=is_training, activation_fn='relu', name='bn3b2_branch2b') 89 | .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res3b2_branch2c') 90 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn3b2_branch2c')) 91 | 92 | (self.feed('res3b1_relu', 93 | 'bn3b2_branch2c') 94 | .add(name='res3b2') 95 | .relu(name='res3b2_relu') 96 | .conv(1, 1, 128, 1, 1, biased=False, relu=False, name='res3b3_branch2a') 97 | .batch_normalization(is_training=is_training, activation_fn='relu', name='bn3b3_branch2a') 98 | .conv(3, 3, 128, 1, 1, biased=False, relu=False, name='res3b3_branch2b') 99 | .batch_normalization(is_training=is_training, activation_fn='relu', name='bn3b3_branch2b') 100 | .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res3b3_branch2c') 101 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn3b3_branch2c')) 102 | 103 | (self.feed('res3b2_relu', 104 | 'bn3b3_branch2c') 105 | .add(name='res3b3') 106 | .relu(name='res3b3_relu') 107 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4a_branch1') 108 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4a_branch1')) 109 | 110 | (self.feed('res3b3_relu') 111 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4a_branch2a') 112 | .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4a_branch2a') 113 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4a_branch2b') 114 | .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4a_branch2b') 115 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4a_branch2c') 116 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4a_branch2c')) 117 | 118 | (self.feed('bn4a_branch1', 119 | 'bn4a_branch2c') 120 | .add(name='res4a') 121 | .relu(name='res4a_relu') 122 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b1_branch2a') 123 | .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b1_branch2a') 124 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b1_branch2b') 125 | .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b1_branch2b') 126 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b1_branch2c') 127 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b1_branch2c')) 128 | 129 | (self.feed('res4a_relu', 130 | 'bn4b1_branch2c') 131 | .add(name='res4b1') 132 | .relu(name='res4b1_relu') 133 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b2_branch2a') 134 | .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b2_branch2a') 135 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b2_branch2b') 136 | .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b2_branch2b') 137 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b2_branch2c') 138 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b2_branch2c')) 139 | 140 | (self.feed('res4b1_relu', 141 | 'bn4b2_branch2c') 142 | .add(name='res4b2') 143 | .relu(name='res4b2_relu') 144 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b3_branch2a') 145 | .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b3_branch2a') 146 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b3_branch2b') 147 | .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b3_branch2b') 148 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b3_branch2c') 149 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b3_branch2c')) 150 | 151 | (self.feed('res4b2_relu', 152 | 'bn4b3_branch2c') 153 | .add(name='res4b3') 154 | .relu(name='res4b3_relu') 155 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b4_branch2a') 156 | .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b4_branch2a') 157 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b4_branch2b') 158 | .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b4_branch2b') 159 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b4_branch2c') 160 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b4_branch2c')) 161 | 162 | (self.feed('res4b3_relu', 163 | 'bn4b4_branch2c') 164 | .add(name='res4b4') 165 | .relu(name='res4b4_relu') 166 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b5_branch2a') 167 | .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b5_branch2a') 168 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b5_branch2b') 169 | .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b5_branch2b') 170 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b5_branch2c') 171 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b5_branch2c')) 172 | 173 | (self.feed('res4b4_relu', 174 | 'bn4b5_branch2c') 175 | .add(name='res4b5') 176 | .relu(name='res4b5_relu') 177 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b6_branch2a') 178 | .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b6_branch2a') 179 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b6_branch2b') 180 | .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b6_branch2b') 181 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b6_branch2c') 182 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b6_branch2c')) 183 | 184 | (self.feed('res4b5_relu', 185 | 'bn4b6_branch2c') 186 | .add(name='res4b6') 187 | .relu(name='res4b6_relu') 188 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b7_branch2a') 189 | .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b7_branch2a') 190 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b7_branch2b') 191 | .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b7_branch2b') 192 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b7_branch2c') 193 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b7_branch2c')) 194 | 195 | (self.feed('res4b6_relu', 196 | 'bn4b7_branch2c') 197 | .add(name='res4b7') 198 | .relu(name='res4b7_relu') 199 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b8_branch2a') 200 | .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b8_branch2a') 201 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b8_branch2b') 202 | .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b8_branch2b') 203 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b8_branch2c') 204 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b8_branch2c')) 205 | 206 | (self.feed('res4b7_relu', 207 | 'bn4b8_branch2c') 208 | .add(name='res4b8') 209 | .relu(name='res4b8_relu') 210 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b9_branch2a') 211 | .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b9_branch2a') 212 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b9_branch2b') 213 | .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b9_branch2b') 214 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b9_branch2c') 215 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b9_branch2c')) 216 | 217 | (self.feed('res4b8_relu', 218 | 'bn4b9_branch2c') 219 | .add(name='res4b9') 220 | .relu(name='res4b9_relu') 221 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b10_branch2a') 222 | .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b10_branch2a') 223 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b10_branch2b') 224 | .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b10_branch2b') 225 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b10_branch2c') 226 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b10_branch2c')) 227 | 228 | (self.feed('res4b9_relu', 229 | 'bn4b10_branch2c') 230 | .add(name='res4b10') 231 | .relu(name='res4b10_relu') 232 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b11_branch2a') 233 | .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b11_branch2a') 234 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b11_branch2b') 235 | .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b11_branch2b') 236 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b11_branch2c') 237 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b11_branch2c')) 238 | 239 | (self.feed('res4b10_relu', 240 | 'bn4b11_branch2c') 241 | .add(name='res4b11') 242 | .relu(name='res4b11_relu') 243 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b12_branch2a') 244 | .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b12_branch2a') 245 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b12_branch2b') 246 | .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b12_branch2b') 247 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b12_branch2c') 248 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b12_branch2c')) 249 | 250 | (self.feed('res4b11_relu', 251 | 'bn4b12_branch2c') 252 | .add(name='res4b12') 253 | .relu(name='res4b12_relu') 254 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b13_branch2a') 255 | .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b13_branch2a') 256 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b13_branch2b') 257 | .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b13_branch2b') 258 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b13_branch2c') 259 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b13_branch2c')) 260 | 261 | (self.feed('res4b12_relu', 262 | 'bn4b13_branch2c') 263 | .add(name='res4b13') 264 | .relu(name='res4b13_relu') 265 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b14_branch2a') 266 | .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b14_branch2a') 267 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b14_branch2b') 268 | .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b14_branch2b') 269 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b14_branch2c') 270 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b14_branch2c')) 271 | 272 | (self.feed('res4b13_relu', 273 | 'bn4b14_branch2c') 274 | .add(name='res4b14') 275 | .relu(name='res4b14_relu') 276 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b15_branch2a') 277 | .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b15_branch2a') 278 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b15_branch2b') 279 | .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b15_branch2b') 280 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b15_branch2c') 281 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b15_branch2c')) 282 | 283 | (self.feed('res4b14_relu', 284 | 'bn4b15_branch2c') 285 | .add(name='res4b15') 286 | .relu(name='res4b15_relu') 287 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b16_branch2a') 288 | .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b16_branch2a') 289 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b16_branch2b') 290 | .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b16_branch2b') 291 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b16_branch2c') 292 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b16_branch2c')) 293 | 294 | (self.feed('res4b15_relu', 295 | 'bn4b16_branch2c') 296 | .add(name='res4b16') 297 | .relu(name='res4b16_relu') 298 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b17_branch2a') 299 | .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b17_branch2a') 300 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b17_branch2b') 301 | .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b17_branch2b') 302 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b17_branch2c') 303 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b17_branch2c')) 304 | 305 | (self.feed('res4b16_relu', 306 | 'bn4b17_branch2c') 307 | .add(name='res4b17') 308 | .relu(name='res4b17_relu') 309 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b18_branch2a') 310 | .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b18_branch2a') 311 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b18_branch2b') 312 | .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b18_branch2b') 313 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b18_branch2c') 314 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b18_branch2c')) 315 | 316 | (self.feed('res4b17_relu', 317 | 'bn4b18_branch2c') 318 | .add(name='res4b18') 319 | .relu(name='res4b18_relu') 320 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b19_branch2a') 321 | .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b19_branch2a') 322 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b19_branch2b') 323 | .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b19_branch2b') 324 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b19_branch2c') 325 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b19_branch2c')) 326 | 327 | (self.feed('res4b18_relu', 328 | 'bn4b19_branch2c') 329 | .add(name='res4b19') 330 | .relu(name='res4b19_relu') 331 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b20_branch2a') 332 | .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b20_branch2a') 333 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b20_branch2b') 334 | .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b20_branch2b') 335 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b20_branch2c') 336 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b20_branch2c')) 337 | 338 | (self.feed('res4b19_relu', 339 | 'bn4b20_branch2c') 340 | .add(name='res4b20') 341 | .relu(name='res4b20_relu') 342 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b21_branch2a') 343 | .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b21_branch2a') 344 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b21_branch2b') 345 | .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b21_branch2b') 346 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b21_branch2c') 347 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b21_branch2c')) 348 | 349 | (self.feed('res4b20_relu', 350 | 'bn4b21_branch2c') 351 | .add(name='res4b21') 352 | .relu(name='res4b21_relu') 353 | .conv(1, 1, 256, 1, 1, biased=False, relu=False, name='res4b22_branch2a') 354 | .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b22_branch2a') 355 | .atrous_conv(3, 3, 256, 2, padding='SAME', biased=False, relu=False, name='res4b22_branch2b') 356 | .batch_normalization(is_training=is_training, activation_fn='relu', name='bn4b22_branch2b') 357 | .conv(1, 1, 1024, 1, 1, biased=False, relu=False, name='res4b22_branch2c') 358 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn4b22_branch2c')) 359 | 360 | (self.feed('res4b21_relu', 361 | 'bn4b22_branch2c') 362 | .add(name='res4b22') 363 | .relu(name='res4b22_relu') 364 | .conv(1, 1, 2048, 1, 1, biased=False, relu=False, name='res5a_branch1') 365 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn5a_branch1')) 366 | 367 | (self.feed('res4b22_relu') 368 | .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res5a_branch2a') 369 | .batch_normalization(is_training=is_training, activation_fn='relu', name='bn5a_branch2a') 370 | .atrous_conv(3, 3, 512, 4, padding='SAME', biased=False, relu=False, name='res5a_branch2b') 371 | .batch_normalization(is_training=is_training, activation_fn='relu', name='bn5a_branch2b') 372 | .conv(1, 1, 2048, 1, 1, biased=False, relu=False, name='res5a_branch2c') 373 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn5a_branch2c')) 374 | 375 | (self.feed('bn5a_branch1', 376 | 'bn5a_branch2c') 377 | .add(name='res5a') 378 | .relu(name='res5a_relu') 379 | .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res5b_branch2a') 380 | .batch_normalization(is_training=is_training, activation_fn='relu', name='bn5b_branch2a') 381 | .atrous_conv(3, 3, 512, 4, padding='SAME', biased=False, relu=False, name='res5b_branch2b') 382 | .batch_normalization(is_training=is_training, activation_fn='relu', name='bn5b_branch2b') 383 | .conv(1, 1, 2048, 1, 1, biased=False, relu=False, name='res5b_branch2c') 384 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn5b_branch2c')) 385 | 386 | (self.feed('res5a_relu', 387 | 'bn5b_branch2c') 388 | .add(name='res5b') 389 | .relu(name='res5b_relu') 390 | .conv(1, 1, 512, 1, 1, biased=False, relu=False, name='res5c_branch2a') 391 | .batch_normalization(is_training=is_training, activation_fn='relu', name='bn5c_branch2a') 392 | .atrous_conv(3, 3, 512, 4, padding='SAME', biased=False, relu=False, name='res5c_branch2b') 393 | .batch_normalization(activation_fn='relu', name='bn5c_branch2b', is_training=is_training) 394 | .conv(1, 1, 2048, 1, 1, biased=False, relu=False, name='res5c_branch2c') 395 | .batch_normalization(is_training=is_training, activation_fn=None, name='bn5c_branch2c')) 396 | 397 | (self.feed('res5b_relu', 398 | 'bn5c_branch2c') 399 | .add(name='res5c') 400 | .relu(name='res5c_relu') 401 | .atrous_conv(3, 3, 21, 6, padding='SAME', relu=False, name='fc1_voc12_c0')) 402 | 403 | (self.feed('res5c_relu') 404 | .atrous_conv(3, 3, 21, 12, padding='SAME', relu=False, name='fc1_voc12_c1')) 405 | 406 | (self.feed('res5c_relu') 407 | .atrous_conv(3, 3, 21, 18, padding='SAME', relu=False, name='fc1_voc12_c2')) 408 | 409 | (self.feed('res5c_relu') 410 | .atrous_conv(3, 3, 21, 24, padding='SAME', relu=False, name='fc1_voc12_c3')) 411 | 412 | (self.feed('fc1_voc12_c0', 413 | 'fc1_voc12_c1', 414 | 'fc1_voc12_c2', 415 | 'fc1_voc12_c3') 416 | .add(name='fc1_voc12')) 417 | -------------------------------------------------------------------------------- /deeplab_resnet/network_pytorch.py: -------------------------------------------------------------------------------- 1 | # adapted from kaffe to load pytorch functional functions 2 | 3 | 4 | import numpy as np 5 | import torch 6 | import torch.nn.functional as F 7 | 8 | DEFAULT_PADDING = 'SAME' 9 | DEBUG_SIZES = False 10 | DEBUG_NAMES = False 11 | 12 | def layer(op): 13 | '''Decorator for composable network layers.''' 14 | 15 | def layer_decorated(self, *args, **kwargs): 16 | # Automatically set a name if not provided. 17 | name = kwargs.setdefault('name', self.get_unique_name(op.__name__)) 18 | # Figure out the layer inputs. 19 | if len(self.terminals) == 0: 20 | raise RuntimeError('No input variables found for layer %s.' % name) 21 | elif len(self.terminals) == 1: 22 | layer_input = self.terminals[0] 23 | if DEBUG_SIZES: print(layer_input.size()) 24 | else: 25 | layer_input = list(self.terminals) 26 | if DEBUG_SIZES: print([i.size() for i in layer_input]) 27 | if DEBUG_NAMES: print(name) 28 | # Perform the operation and get the output. 29 | layer_output = op(self, layer_input, *args, **kwargs) 30 | # Add to layer LUT. 31 | self.layers[name] = layer_output 32 | # This output is now the input for the next layer. 33 | self.feed(layer_output) 34 | # Return self for chained calls. 35 | return self 36 | 37 | return layer_decorated 38 | 39 | def pad_if_needed(input, padding, kind, k_h, k_w, s_h=1, s_w=1, dilation=1): 40 | if padding == 'VALID': 41 | return input 42 | elif padding == 'SAME' and kind in ('conv2d', 'pool2d'): 43 | in_height, in_width = input.size(2), input.size(3) 44 | out_height = int(np.ceil(float(in_height) / float(s_h))) 45 | out_width = int(np.ceil(float(in_width) / float(s_w))) 46 | 47 | pad_along_height = max((out_height - 1) * s_h + k_h - in_height, 0) 48 | pad_along_width = max((out_width - 1) * s_w + k_w - in_width, 0) 49 | pad_top = pad_along_height // 2 50 | pad_bottom = pad_along_height - pad_top 51 | pad_left = pad_along_width // 2 52 | pad_right = pad_along_width - pad_left 53 | input = F.pad(input, (pad_left, pad_right, pad_top, pad_bottom)) 54 | return input 55 | elif kind in ('atrous_conv2d',): 56 | effective_height = k_h + (k_h - 1) * (dilation - 1) 57 | effective_width = k_w + (k_w - 1) * (dilation - 1) 58 | return pad_if_needed(input, padding, 'conv2d', effective_height, effective_width, s_h, s_w, dilation=1) 59 | else: 60 | raise NotImplementedError 61 | 62 | 63 | 64 | class Network(object): 65 | 66 | def __init__(self, inputs, weights, trainable=True, is_training=False): 67 | # The input nodes for this network 68 | self.inputs = inputs 69 | self.weights = weights 70 | # The current list of terminal nodes 71 | self.terminals = [] 72 | # Mapping from layer names to layers 73 | self.layers = dict(inputs) 74 | # If true, the resulting variables are set as trainable 75 | self.trainable = trainable 76 | # Switch variable for dropout 77 | self.use_dropout = 1.0 78 | self.setup(is_training) 79 | 80 | def feed(self, *args): 81 | '''Set the input(s) for the next operation by replacing the terminal nodes. 82 | The arguments can be either layer names or the actual layers. 83 | ''' 84 | assert len(args) != 0 85 | self.terminals = [] 86 | for fed_layer in args: 87 | if isinstance(fed_layer, basestring): 88 | try: 89 | fed_layer = self.layers[fed_layer] 90 | except KeyError: 91 | raise KeyError('Unknown layer name fed: %s' % fed_layer) 92 | self.terminals.append(fed_layer) 93 | return self 94 | 95 | def get_output(self): 96 | '''Returns the current network output.''' 97 | return self.terminals[-1] 98 | 99 | def get_unique_name(self, prefix): 100 | '''Returns an index-suffixed unique name for the given prefix. 101 | This is used for auto-generating layer names based on the type-prefix. 102 | ''' 103 | ident = sum(t.startswith(prefix) for t, _ in self.layers.items()) + 1 104 | return '%s_%d' % (prefix, ident) 105 | 106 | def validate_padding(self, padding): 107 | '''Verifies that the padding is one of the supported ones.''' 108 | assert padding in ('SAME', 'VALID') 109 | 110 | @layer 111 | def conv(self, 112 | input, 113 | k_h, 114 | k_w, 115 | c_o, 116 | s_h, 117 | s_w, 118 | name, 119 | relu=True, 120 | padding=DEFAULT_PADDING, 121 | group=1, 122 | biased=True): 123 | input = pad_if_needed(input, padding, 'conv2d', k_h, k_w, s_h, s_w) 124 | 125 | result = F.conv2d(input, 126 | self.weights[name + '/weights'], 127 | bias=self.weights[name + '/biases'] if biased else None, 128 | padding=0, 129 | groups=group, 130 | stride=(s_h, s_w)) 131 | if relu: 132 | result = F.relu(result) 133 | return result 134 | 135 | @layer 136 | def atrous_conv(self, 137 | input, 138 | k_h, 139 | k_w, 140 | c_o, 141 | dilation, 142 | name, 143 | relu=True, 144 | padding=DEFAULT_PADDING, 145 | group=1, 146 | biased=True): 147 | if group != 1: 148 | raise NotImplementedError 149 | input = pad_if_needed(input, padding, 'atrous_conv2d', k_h, k_w, dilation=dilation) 150 | 151 | result = F.conv2d(input, 152 | self.weights[name + '/weights'], 153 | bias=self.weights[name + '/biases'] if biased else None, 154 | padding=0, 155 | dilation=dilation, 156 | groups=group, 157 | stride=1) 158 | if relu: 159 | result = F.relu(result) 160 | return result 161 | 162 | @layer 163 | def relu(self, input, name): 164 | return F.relu(input) 165 | 166 | @layer 167 | def max_pool(self, input, k_h, k_w, s_h, s_w, name, padding=DEFAULT_PADDING): 168 | input = pad_if_needed(input, padding, 'pool2d', k_h, k_w, s_h, s_w) 169 | 170 | return F.max_pool2d(input, 171 | kernel_size=(k_h, k_w), 172 | stride=(s_h, s_w), 173 | padding=0) 174 | 175 | @layer 176 | def add(self, inputs, name): 177 | return sum(inputs) 178 | 179 | @layer 180 | def batch_normalization(self, input, # other arguments are ignored 181 | name, is_training, activation_fn=None, scale=True, eps=0.001): 182 | output = F.batch_norm(input, self.weights[name + '/moving_mean'], self.weights[name + '/moving_variance'], 183 | weight=self.weights[name + '/gamma'], bias=self.weights[name + '/beta'], eps=eps) 184 | if activation_fn is not None: 185 | if activation_fn == 'relu': 186 | output = F.relu(output) 187 | else: 188 | raise NotImplementedError 189 | return output -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | # conda requirements file 2 | name: jaccard-segment 3 | channels: 4 | - soumith 5 | - defaults 6 | dependencies: 7 | - python=2 8 | - ipykernel 9 | - matplotlib 10 | - numpy 11 | - scikit-image 12 | - scipy 13 | - cuda80 14 | - pytorch 15 | - torchvision 16 | - pyyaml 17 | - pip: 18 | - hickle 19 | - pillow-simd 20 | - https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.0.1-cp27-none-linux_x86_64.whl; 'linux' in sys_platform 21 | - tqdm 22 | - futures 23 | - tensorboard 24 | 25 | -------------------------------------------------------------------------------- /eval_pytorch.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | 3 | import argparse 4 | from datetime import datetime 5 | import os, sys 6 | from os.path import join 7 | import time 8 | import re 9 | import platform 10 | 11 | import numpy as np 12 | 13 | import torch 14 | from torch import optim 15 | from torch.autograd import Variable 16 | import torch.utils.data as data 17 | import torch.nn.functional as F 18 | 19 | import random 20 | # WARNING: if multiple worker threads, the seeds are useless. 21 | random.seed(1857) 22 | torch.manual_seed(1857) 23 | torch.cuda.manual_seed(1857) 24 | 25 | from settings import get_arguments 26 | import datasets 27 | from datasets.loadvoc import load_extended_voc 28 | from compose import (JointCompose, RandomScale, Normalize, 29 | RandomHorizontalFlip, RandomCropPad, PILtoTensor, Scale, TensortoPIL) 30 | from PIL.Image import NEAREST 31 | 32 | from losses import * 33 | 34 | import deepdish as dd 35 | import deeplab_resnet.model_pytorch as modelpy 36 | from collections import defaultdict 37 | import yaml 38 | 39 | IGNORE_LABEL = 255 40 | IMG_MEAN = np.array((104.00698793,116.66876762,122.67891434), dtype=np.float32) 41 | 42 | def create_variables(weights, cuda=True): 43 | var = dict() 44 | for k, v in weights.items(): 45 | v = torch.from_numpy(v) 46 | if cuda: 47 | v = v.cuda() 48 | if not (k.endswith('moving_mean') or k.endswith('moving_variance')): 49 | v = Variable(v) 50 | var[k] = v 51 | return var 52 | 53 | def snapshot_variables(weights, dest): 54 | out = {} 55 | for (k, v) in weights.items(): 56 | if isinstance(v, Variable): 57 | v = v.data 58 | out[k] = v.cpu().numpy() 59 | dd.io.save(dest, out) 60 | 61 | def training_groups(weights, base_lr, multipliers=[0.1, 1.0, 1.0], train_last=-1, hybrid=False): # multipliers=[1.0, 10.0, 20.0] 62 | """ 63 | get training groups and activates requires_grad for variables 64 | train_last: last: only train last ... layers 65 | hybrid: if hybrid, train all layers but set momentum to 0 on last layers 66 | """ 67 | fixed = ['moving_mean', 'moving_variance', 'beta', 'gamma'] 68 | # get training variables, with their lr 69 | trained = {k: v for (k, v) in weights.iteritems() if not any([k.endswith(s) for s in fixed])} 70 | for v in trained.values(): 71 | v.requires_grad = True 72 | fc_vars = {k: v for (k, v) in trained.iteritems() if 'fc' in k} 73 | conv_vars = [v for (k, v) in trained.items() if 'fc' not in k] # lr * 1.0 74 | fc_w_vars = [v for (k, v) in fc_vars.items() if 'weights' in k] # lr * 10.0 75 | fc_b_vars = [v for (k, v) in fc_vars.items() if 'biases' in k] # lr * 20.0 76 | assert(len(trained) == len(fc_vars) + len(conv_vars)) 77 | assert(len(fc_vars) == len(fc_w_vars) + len(fc_b_vars)) 78 | if train_last == -1: 79 | print("train all layers") 80 | groups = [{'params': conv_vars, 'lr': multipliers[0] * base_lr}, 81 | {'params': fc_w_vars, 'lr': multipliers[1] * base_lr}, 82 | {'params': fc_b_vars, 'lr': multipliers[2] * base_lr}] 83 | elif train_last == 1: 84 | print("train last layer only") 85 | for v in conv_vars: 86 | v.requires_grad = False 87 | groups = [{'params': fc_w_vars, 'lr': multipliers[1] * base_lr}, 88 | {'params': fc_b_vars, 'lr': multipliers[2] * base_lr}] 89 | return groups 90 | 91 | class SegsetWrap(data.Dataset): 92 | def __init__(self, segset, transform=None): 93 | self.name = segset.name 94 | self.segset = segset 95 | self.transform = transform 96 | def __repr__(self): 97 | return "" 98 | def __getitem__(self, i): 99 | inputs = self.segset.read(i, kind="PIL") 100 | if self.transform is not None: 101 | inputs = self.transform(inputs) 102 | return inputs 103 | def __len__(self): 104 | return len(self.segset) 105 | 106 | def main(args): 107 | 108 | print(os.path.basename(__file__), 'arguments:') 109 | print(yaml.dump(vars(args), default_flow_style=False)) 110 | 111 | weights = dd.io.load(args.restore_from) 112 | print('Loaded weights from {}'.format(args.restore_from)) 113 | weights = create_variables(weights, cuda=True) 114 | forward = lambda input: modelpy.DeepLabResNetModel({'data': input}, weights).layers['fc1_voc12'] 115 | train, val, test = load_extended_voc() 116 | input_size = map(int, args.input_size.split(',')) if args.input_size is not None else None 117 | print ('========') 118 | 119 | if args.proximal: 120 | assert args.jaccard 121 | 122 | if args.binary == -1: 123 | print("Multiclass: loss set to cross-entropy") 124 | lossfn, lossname = crossentropyloss, 'xloss' 125 | otherlossfn = None 126 | else: 127 | print("Binary: loss set to hingeloss") 128 | if args.jaccard: 129 | lossfn, lossname = lovaszloss, 'lovaszloss' 130 | otherlossfn, otherlossname = hingeloss, 'hingeloss' 131 | elif args.softmax: 132 | lossfn, lossname = binaryXloss, 'binxloss' 133 | otherlossfn = None 134 | else: 135 | lossfn, lossname = hingeloss, 'hingeloss' 136 | otherlossfn, otherlossname = lovaszloss, 'lovaszloss' 137 | train, val = train.binarize(args.binary_str), val.binarize(args.binary_str) 138 | 139 | 140 | # get network output size 141 | dummy_input = torch.rand((1, 3, input_size[0], input_size[1])).cuda() 142 | dummy_out = forward(Variable(dummy_input, volatile=True)) 143 | output_size = (dummy_out.size(2), dummy_out.size(3)) 144 | 145 | transforms_val = JointCompose([PILtoTensor(), 146 | [Normalize(torch.from_numpy(IMG_MEAN)), None], 147 | ]) 148 | invtransf_val = JointCompose([[Normalize(-torch.from_numpy(IMG_MEAN)), None], 149 | TensortoPIL( datasets.utils.color_map() ), 150 | ]) 151 | 152 | if args.sampling == 'balanced': 153 | from datasets.balanced_val import balanced 154 | inds = balanced[args.binary_str] 155 | val.examples = [val[i] for i in inds] 156 | print('Subsampled val. to balanced set of {:d} examples'.format(len(val))) 157 | elif args.sampling == 'exclusive': 158 | val = val[args.binary_str] 159 | print('Subsampled val. to balanced set of {:d} examples'.format(len(val))) 160 | 161 | update_every = args.grad_update_every 162 | global_batch_size = args.batch_size * update_every 163 | 164 | valset = SegsetWrap(val, transforms_val) 165 | valloader = data.DataLoader(valset, 166 | batch_size=1, 167 | shuffle=False, 168 | num_workers=1, 169 | pin_memory=True) 170 | 171 | def do_val(): 172 | valiter = iter(valloader) 173 | stats = defaultdict(list) 174 | # extract some images spreak evenly in the validation set 175 | tosee = [int(0.05 * i * len(valiter)) for i in range(1, 20)] 176 | for valstep, (inputs, labels) in enumerate(valiter): 177 | start_time = time.time() 178 | inputs, labels = Variable(inputs.cuda(), volatile=True), labels.cuda().long() 179 | logits = forward(inputs) 180 | logits = F.upsample_bilinear(logits, size=labels.size()[1:]) 181 | if args.binary == -1: 182 | xloss = crossentropyloss(logits, labels) 183 | stats['xloss'].append(xloss.data[0]) 184 | print('[Validation {}-{:d}], xloss {:.5f} - mean {:.5f} ({:.3f} sec/step {})'.format( 185 | step, valstep, xloss, np.mean(stats['xloss']), time.time() - start_time)) 186 | # conf, pred = logits.max(1) 187 | else: 188 | conf, multipred = logits.max(1) 189 | multipred = multipred.squeeze(1) 190 | multipred = (multipred == args.binary).long() 191 | imageiou_multi = iouloss(multipred.data.squeeze(0), labels.squeeze(0)) 192 | stats['imageiou_multi'].append(imageiou_multi) 193 | 194 | logits = logits[:, args.binary, :, :] # select only 1 output 195 | pred = (logits > 0.).long() 196 | 197 | # image output 198 | if valstep in tosee: 199 | inputim, inputlab = invtransf_val([inputs.data[0, :, :, :], labels[0, :, :]]) 200 | _, predim = invtransf_val([inputs.data[0, :, :, :], pred.data[0, :, :]]) 201 | inputim.save("imout/{}_{}in.png".format(args.nickname, valstep),"PNG") 202 | inputlab.save("imout/{}_{}inlab.png".format(args.nickname, valstep),"PNG") 203 | predim.save("imout/{}_{}out.png".format(args.nickname, valstep),"PNG") 204 | 205 | imageiou = iouloss(pred.data.squeeze(0), labels.squeeze(0)) 206 | stats['imageiou'].append(imageiou) 207 | hloss = hingeloss(logits, labels).data[0] 208 | stats['hingeloss'].append(hloss) 209 | jloss = lovaszloss(logits, labels).data[0] 210 | stats['lovaszloss'].append(jloss) 211 | binxloss = binaryXloss(logits, labels).data[0] 212 | stats['binxloss'].append(binxloss) 213 | 214 | print( 'hloss {:.5f} - mean {:.5f}, '.format(hloss, np.mean(stats['hingeloss'])) 215 | + 'lovaszloss {:.5f} - mean {:.5f}, '.format(jloss, np.mean(stats['lovaszloss'])) 216 | + 'iou {:.5f} - mean {:.5f}, '.format(imageiou, np.mean(stats['imageiou'])) 217 | + 'iou_multi {:.5f} - mean {:.5f}, '.format(imageiou_multi, np.mean(stats['imageiou_multi'])) 218 | ) 219 | 220 | do_val() 221 | 222 | 223 | 224 | if __name__ == '__main__': 225 | args = get_arguments(sys.argv[1:], 'train') 226 | main(args) -------------------------------------------------------------------------------- /kaffe/__init__.py: -------------------------------------------------------------------------------- 1 | from .graph import GraphBuilder, NodeMapper 2 | from .errors import KaffeError, print_stderr 3 | 4 | from . import tensorflow 5 | -------------------------------------------------------------------------------- /kaffe/caffe/__init__.py: -------------------------------------------------------------------------------- 1 | from .resolver import get_caffe_resolver, has_pycaffe 2 | -------------------------------------------------------------------------------- /kaffe/caffe/resolver.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | SHARED_CAFFE_RESOLVER = None 4 | 5 | class CaffeResolver(object): 6 | def __init__(self): 7 | self.import_caffe() 8 | 9 | def import_caffe(self): 10 | self.caffe = None 11 | try: 12 | # Try to import PyCaffe first 13 | import caffe 14 | self.caffe = caffe 15 | except ImportError: 16 | # Fall back to the protobuf implementation 17 | from . import caffepb 18 | self.caffepb = caffepb 19 | show_fallback_warning() 20 | if self.caffe: 21 | # Use the protobuf code from the imported distribution. 22 | # This way, Caffe variants with custom layers will work. 23 | self.caffepb = self.caffe.proto.caffe_pb2 24 | self.NetParameter = self.caffepb.NetParameter 25 | 26 | def has_pycaffe(self): 27 | return self.caffe is not None 28 | 29 | def get_caffe_resolver(): 30 | global SHARED_CAFFE_RESOLVER 31 | if SHARED_CAFFE_RESOLVER is None: 32 | SHARED_CAFFE_RESOLVER = CaffeResolver() 33 | return SHARED_CAFFE_RESOLVER 34 | 35 | def has_pycaffe(): 36 | return get_caffe_resolver().has_pycaffe() 37 | 38 | def show_fallback_warning(): 39 | msg = ''' 40 | ------------------------------------------------------------ 41 | WARNING: PyCaffe not found! 42 | Falling back to a pure protocol buffer implementation. 43 | * Conversions will be drastically slower. 44 | * This backend is UNTESTED! 45 | ------------------------------------------------------------ 46 | 47 | ''' 48 | sys.stderr.write(msg) 49 | -------------------------------------------------------------------------------- /kaffe/errors.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | class KaffeError(Exception): 4 | pass 5 | 6 | def print_stderr(msg): 7 | sys.stderr.write('%s\n' % msg) 8 | -------------------------------------------------------------------------------- /kaffe/graph.py: -------------------------------------------------------------------------------- 1 | from google.protobuf import text_format 2 | 3 | from .caffe import get_caffe_resolver 4 | from .errors import KaffeError, print_stderr 5 | from .layers import LayerAdapter, LayerType, NodeKind, NodeDispatch 6 | from .shapes import TensorShape 7 | 8 | class Node(object): 9 | 10 | def __init__(self, name, kind, layer=None): 11 | self.name = name 12 | self.kind = kind 13 | self.layer = LayerAdapter(layer, kind) if layer else None 14 | self.parents = [] 15 | self.children = [] 16 | self.data = None 17 | self.output_shape = None 18 | self.metadata = {} 19 | 20 | def add_parent(self, parent_node): 21 | assert parent_node not in self.parents 22 | self.parents.append(parent_node) 23 | if self not in parent_node.children: 24 | parent_node.children.append(self) 25 | 26 | def add_child(self, child_node): 27 | assert child_node not in self.children 28 | self.children.append(child_node) 29 | if self not in child_node.parents: 30 | child_node.parents.append(self) 31 | 32 | def get_only_parent(self): 33 | if len(self.parents) != 1: 34 | raise KaffeError('Node (%s) expected to have 1 parent. Found %s.' % 35 | (self, len(self.parents))) 36 | return self.parents[0] 37 | 38 | @property 39 | def parameters(self): 40 | if self.layer is not None: 41 | return self.layer.parameters 42 | return None 43 | 44 | def __str__(self): 45 | return '[%s] %s' % (self.kind, self.name) 46 | 47 | def __repr__(self): 48 | return '%s (0x%x)' % (self.name, id(self)) 49 | 50 | 51 | class Graph(object): 52 | 53 | def __init__(self, nodes=None, name=None): 54 | self.nodes = nodes or [] 55 | self.node_lut = {node.name: node for node in self.nodes} 56 | self.name = name 57 | 58 | def add_node(self, node): 59 | self.nodes.append(node) 60 | self.node_lut[node.name] = node 61 | 62 | def get_node(self, name): 63 | try: 64 | return self.node_lut[name] 65 | except KeyError: 66 | raise KaffeError('Layer not found: %s' % name) 67 | 68 | def get_input_nodes(self): 69 | return [node for node in self.nodes if len(node.parents) == 0] 70 | 71 | def get_output_nodes(self): 72 | return [node for node in self.nodes if len(node.children) == 0] 73 | 74 | def topologically_sorted(self): 75 | sorted_nodes = [] 76 | unsorted_nodes = list(self.nodes) 77 | temp_marked = set() 78 | perm_marked = set() 79 | 80 | def visit(node): 81 | if node in temp_marked: 82 | raise KaffeError('Graph is not a DAG.') 83 | if node in perm_marked: 84 | return 85 | temp_marked.add(node) 86 | for child in node.children: 87 | visit(child) 88 | perm_marked.add(node) 89 | temp_marked.remove(node) 90 | sorted_nodes.insert(0, node) 91 | 92 | while len(unsorted_nodes): 93 | visit(unsorted_nodes.pop()) 94 | return sorted_nodes 95 | 96 | def compute_output_shapes(self): 97 | sorted_nodes = self.topologically_sorted() 98 | for node in sorted_nodes: 99 | node.output_shape = TensorShape(*NodeKind.compute_output_shape(node)) 100 | 101 | def replaced(self, new_nodes): 102 | return Graph(nodes=new_nodes, name=self.name) 103 | 104 | def transformed(self, transformers): 105 | graph = self 106 | for transformer in transformers: 107 | graph = transformer(graph) 108 | if graph is None: 109 | raise KaffeError('Transformer failed: {}'.format(transformer)) 110 | assert isinstance(graph, Graph) 111 | return graph 112 | 113 | def __contains__(self, key): 114 | return key in self.node_lut 115 | 116 | def __str__(self): 117 | hdr = '{:<20} {:<30} {:>20} {:>20}'.format('Type', 'Name', 'Param', 'Output') 118 | s = [hdr, '-' * 94] 119 | for node in self.topologically_sorted(): 120 | # If the node has learned parameters, display the first one's shape. 121 | # In case of convolutions, this corresponds to the weights. 122 | data_shape = node.data[0].shape if node.data else '--' 123 | out_shape = node.output_shape or '--' 124 | s.append('{:<20} {:<30} {:>20} {:>20}'.format(node.kind, node.name, data_shape, 125 | tuple(out_shape))) 126 | return '\n'.join(s) 127 | 128 | 129 | class GraphBuilder(object): 130 | '''Constructs a model graph from a Caffe protocol buffer definition.''' 131 | 132 | def __init__(self, def_path, phase='test'): 133 | ''' 134 | def_path: Path to the model definition (.prototxt) 135 | data_path: Path to the model data (.caffemodel) 136 | phase: Either 'test' or 'train'. Used for filtering phase-specific nodes. 137 | ''' 138 | self.def_path = def_path 139 | self.phase = phase 140 | self.load() 141 | 142 | def load(self): 143 | '''Load the layer definitions from the prototxt.''' 144 | self.params = get_caffe_resolver().NetParameter() 145 | with open(self.def_path, 'rb') as def_file: 146 | text_format.Merge(def_file.read(), self.params) 147 | 148 | def filter_layers(self, layers): 149 | '''Filter out layers based on the current phase.''' 150 | phase_map = {0: 'train', 1: 'test'} 151 | filtered_layer_names = set() 152 | filtered_layers = [] 153 | for layer in layers: 154 | phase = self.phase 155 | if len(layer.include): 156 | phase = phase_map[layer.include[0].phase] 157 | if len(layer.exclude): 158 | phase = phase_map[1 - layer.include[0].phase] 159 | exclude = (phase != self.phase) 160 | # Dropout layers appear in a fair number of Caffe 161 | # test-time networks. These are just ignored. We'll 162 | # filter them out here. 163 | if (not exclude) and (phase == 'test'): 164 | exclude = (layer.type == LayerType.Dropout) 165 | if not exclude: 166 | filtered_layers.append(layer) 167 | # Guard against dupes. 168 | assert layer.name not in filtered_layer_names 169 | filtered_layer_names.add(layer.name) 170 | return filtered_layers 171 | 172 | def make_node(self, layer): 173 | '''Create a graph node for the given layer.''' 174 | kind = NodeKind.map_raw_kind(layer.type) 175 | if kind is None: 176 | raise KaffeError('Unknown layer type encountered: %s' % layer.type) 177 | # We want to use the layer's top names (the "output" names), rather than the 178 | # name attribute, which is more of readability thing than a functional one. 179 | # Other layers will refer to a node by its "top name". 180 | return Node(layer.name, kind, layer=layer) 181 | 182 | def make_input_nodes(self): 183 | ''' 184 | Create data input nodes. 185 | 186 | This method is for old-style inputs, where the input specification 187 | was not treated as a first-class layer in the prototext. 188 | Newer models use the "Input layer" type. 189 | ''' 190 | nodes = [Node(name, NodeKind.Data) for name in self.params.input] 191 | if len(nodes): 192 | input_dim = map(int, self.params.input_dim) 193 | if not input_dim: 194 | if len(self.params.input_shape) > 0: 195 | input_dim = map(int, self.params.input_shape[0].dim) 196 | else: 197 | raise KaffeError('Dimensions for input not specified.') 198 | for node in nodes: 199 | node.output_shape = tuple(input_dim) 200 | return nodes 201 | 202 | def build(self): 203 | ''' 204 | Builds the graph from the Caffe layer definitions. 205 | ''' 206 | # Get the layers 207 | layers = self.params.layers or self.params.layer 208 | # Filter out phase-excluded layers 209 | layers = self.filter_layers(layers) 210 | # Get any separately-specified input layers 211 | nodes = self.make_input_nodes() 212 | nodes += [self.make_node(layer) for layer in layers] 213 | # Initialize the graph 214 | graph = Graph(nodes=nodes, name=self.params.name) 215 | # Connect the nodes 216 | # 217 | # A note on layers and outputs: 218 | # In Caffe, each layer can produce multiple outputs ("tops") from a set of inputs 219 | # ("bottoms"). The bottoms refer to other layers' tops. The top can rewrite a bottom 220 | # (in case of in-place operations). Note that the layer's name is not used for establishing 221 | # any connectivity. It's only used for data association. By convention, a layer with a 222 | # single top will often use the same name (although this is not required). 223 | # 224 | # The current implementation only supports single-output nodes (note that a node can still 225 | # have multiple children, since multiple child nodes can refer to the single top's name). 226 | node_outputs = {} 227 | for layer in layers: 228 | node = graph.get_node(layer.name) 229 | for input_name in layer.bottom: 230 | assert input_name != layer.name 231 | parent_node = node_outputs.get(input_name) 232 | if (parent_node is None) or (parent_node == node): 233 | parent_node = graph.get_node(input_name) 234 | node.add_parent(parent_node) 235 | if len(layer.top)>1: 236 | raise KaffeError('Multiple top nodes are not supported.') 237 | for output_name in layer.top: 238 | if output_name == layer.name: 239 | # Output is named the same as the node. No further action required. 240 | continue 241 | # There are two possibilities here: 242 | # 243 | # Case 1: output_name refers to another node in the graph. 244 | # This is an "in-place operation" that overwrites an existing node. 245 | # This would create a cycle in the graph. We'll undo the in-placing 246 | # by substituting this node wherever the overwritten node is referenced. 247 | # 248 | # Case 2: output_name violates the convention layer.name == output_name. 249 | # Since we are working in the single-output regime, we will can rename it to 250 | # match the layer name. 251 | # 252 | # For both cases, future references to this top re-routes to this node. 253 | node_outputs[output_name] = node 254 | 255 | graph.compute_output_shapes() 256 | return graph 257 | 258 | 259 | class NodeMapper(NodeDispatch): 260 | 261 | def __init__(self, graph): 262 | self.graph = graph 263 | 264 | def map(self): 265 | nodes = self.graph.topologically_sorted() 266 | # Remove input nodes - we'll handle them separately. 267 | input_nodes = self.graph.get_input_nodes() 268 | nodes = [t for t in nodes if t not in input_nodes] 269 | # Decompose DAG into chains. 270 | chains = [] 271 | for node in nodes: 272 | attach_to_chain = None 273 | if len(node.parents) == 1: 274 | parent = node.get_only_parent() 275 | for chain in chains: 276 | if chain[-1] == parent: 277 | # Node is part of an existing chain. 278 | attach_to_chain = chain 279 | break 280 | if attach_to_chain is None: 281 | # Start a new chain for this node. 282 | attach_to_chain = [] 283 | chains.append(attach_to_chain) 284 | attach_to_chain.append(node) 285 | # Map each chain. 286 | mapped_chains = [] 287 | for chain in chains: 288 | mapped_chains.append(self.map_chain(chain)) 289 | return self.commit(mapped_chains) 290 | 291 | def map_chain(self, chain): 292 | return [self.map_node(node) for node in chain] 293 | 294 | def map_node(self, node): 295 | map_func = self.get_handler(node.kind, 'map') 296 | mapped_node = map_func(node) 297 | assert mapped_node is not None 298 | mapped_node.node = node 299 | return mapped_node 300 | 301 | def commit(self, mapped_chains): 302 | raise NotImplementedError('Must be implemented by subclass.') 303 | -------------------------------------------------------------------------------- /kaffe/layers.py: -------------------------------------------------------------------------------- 1 | import re 2 | import numbers 3 | from collections import namedtuple 4 | 5 | from .shapes import * 6 | 7 | LAYER_DESCRIPTORS = { 8 | 9 | # Caffe Types 10 | 'AbsVal': shape_identity, 11 | 'Accuracy': shape_scalar, 12 | 'ArgMax': shape_not_implemented, 13 | 'BatchNorm': shape_identity, 14 | 'BNLL': shape_not_implemented, 15 | 'Concat': shape_concat, 16 | 'ContrastiveLoss': shape_scalar, 17 | 'Convolution': shape_convolution, 18 | 'Deconvolution': shape_not_implemented, 19 | 'Data': shape_data, 20 | 'Dropout': shape_identity, 21 | 'DummyData': shape_data, 22 | 'EuclideanLoss': shape_scalar, 23 | 'Eltwise': shape_identity, 24 | 'Exp': shape_identity, 25 | 'Flatten': shape_not_implemented, 26 | 'HDF5Data': shape_data, 27 | 'HDF5Output': shape_identity, 28 | 'HingeLoss': shape_scalar, 29 | 'Im2col': shape_not_implemented, 30 | 'ImageData': shape_data, 31 | 'InfogainLoss': shape_scalar, 32 | 'InnerProduct': shape_inner_product, 33 | 'Input': shape_data, 34 | 'LRN': shape_identity, 35 | 'MemoryData': shape_mem_data, 36 | 'MultinomialLogisticLoss': shape_scalar, 37 | 'MVN': shape_not_implemented, 38 | 'Pooling': shape_pool, 39 | 'Power': shape_identity, 40 | 'ReLU': shape_identity, 41 | 'Scale': shape_identity, 42 | 'Sigmoid': shape_identity, 43 | 'SigmoidCrossEntropyLoss': shape_scalar, 44 | 'Silence': shape_not_implemented, 45 | 'Softmax': shape_identity, 46 | 'SoftmaxWithLoss': shape_scalar, 47 | 'Split': shape_not_implemented, 48 | 'Slice': shape_not_implemented, 49 | 'TanH': shape_identity, 50 | 'WindowData': shape_not_implemented, 51 | 'Threshold': shape_identity, 52 | } 53 | 54 | LAYER_TYPES = LAYER_DESCRIPTORS.keys() 55 | 56 | LayerType = type('LayerType', (), {t: t for t in LAYER_TYPES}) 57 | 58 | class NodeKind(LayerType): 59 | 60 | @staticmethod 61 | def map_raw_kind(kind): 62 | if kind in LAYER_TYPES: 63 | return kind 64 | return None 65 | 66 | @staticmethod 67 | def compute_output_shape(node): 68 | try: 69 | val = LAYER_DESCRIPTORS[node.kind](node) 70 | return val 71 | except NotImplementedError: 72 | raise KaffeError('Output shape computation not implemented for type: %s' % node.kind) 73 | 74 | 75 | class NodeDispatchError(KaffeError): 76 | 77 | pass 78 | 79 | 80 | class NodeDispatch(object): 81 | 82 | @staticmethod 83 | def get_handler_name(node_kind): 84 | if len(node_kind) <= 4: 85 | # A catch-all for things like ReLU and tanh 86 | return node_kind.lower() 87 | # Convert from CamelCase to under_scored 88 | name = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', node_kind) 89 | return re.sub('([a-z0-9])([A-Z])', r'\1_\2', name).lower() 90 | 91 | def get_handler(self, node_kind, prefix): 92 | name = self.get_handler_name(node_kind) 93 | name = '_'.join((prefix, name)) 94 | try: 95 | return getattr(self, name) 96 | except AttributeError: 97 | raise NodeDispatchError('No handler found for node kind: %s (expected: %s)' % 98 | (node_kind, name)) 99 | 100 | 101 | class LayerAdapter(object): 102 | 103 | def __init__(self, layer, kind): 104 | self.layer = layer 105 | self.kind = kind 106 | 107 | @property 108 | def parameters(self): 109 | name = NodeDispatch.get_handler_name(self.kind) 110 | name = '_'.join((name, 'param')) 111 | try: 112 | return getattr(self.layer, name) 113 | except AttributeError: 114 | raise NodeDispatchError('Caffe parameters not found for layer kind: %s' % (self.kind)) 115 | 116 | @staticmethod 117 | def get_kernel_value(scalar, repeated, idx, default=None): 118 | if scalar: 119 | return scalar 120 | if repeated: 121 | if isinstance(repeated, numbers.Number): 122 | return repeated 123 | if len(repeated) == 1: 124 | # Same value applies to all spatial dimensions 125 | return int(repeated[0]) 126 | assert idx < len(repeated) 127 | # Extract the value for the given spatial dimension 128 | return repeated[idx] 129 | if default is None: 130 | raise ValueError('Unable to determine kernel parameter!') 131 | return default 132 | 133 | @property 134 | def kernel_parameters(self): 135 | assert self.kind in (NodeKind.Convolution, NodeKind.Pooling) 136 | params = self.parameters 137 | k_h = self.get_kernel_value(params.kernel_h, params.kernel_size, 0) 138 | k_w = self.get_kernel_value(params.kernel_w, params.kernel_size, 1) 139 | s_h = self.get_kernel_value(params.stride_h, params.stride, 0, default=1) 140 | s_w = self.get_kernel_value(params.stride_w, params.stride, 1, default=1) 141 | p_h = self.get_kernel_value(params.pad_h, params.pad, 0, default=0) 142 | p_w = self.get_kernel_value(params.pad_h, params.pad, 1, default=0) 143 | return KernelParameters(k_h, k_w, s_h, s_w, p_h, p_w) 144 | 145 | 146 | KernelParameters = namedtuple('KernelParameters', ['kernel_h', 'kernel_w', 'stride_h', 'stride_w', 147 | 'pad_h', 'pad_w']) 148 | -------------------------------------------------------------------------------- /kaffe/shapes.py: -------------------------------------------------------------------------------- 1 | import math 2 | from collections import namedtuple 3 | 4 | from .errors import KaffeError 5 | 6 | TensorShape = namedtuple('TensorShape', ['batch_size', 'channels', 'height', 'width']) 7 | 8 | 9 | def get_filter_output_shape(i_h, i_w, params, round_func): 10 | o_h = (i_h + 2 * params.pad_h - params.kernel_h) / float(params.stride_h) + 1 11 | o_w = (i_w + 2 * params.pad_w - params.kernel_w) / float(params.stride_w) + 1 12 | return (int(round_func(o_h)), int(round_func(o_w))) 13 | 14 | 15 | def get_strided_kernel_output_shape(node, round_func): 16 | assert node.layer is not None 17 | input_shape = node.get_only_parent().output_shape 18 | o_h, o_w = get_filter_output_shape(input_shape.height, input_shape.width, 19 | node.layer.kernel_parameters, round_func) 20 | params = node.layer.parameters 21 | has_c_o = hasattr(params, 'num_output') 22 | c = params.num_output if has_c_o else input_shape.channels 23 | return TensorShape(input_shape.batch_size, c, o_h, o_w) 24 | 25 | 26 | def shape_not_implemented(node): 27 | raise NotImplementedError 28 | 29 | 30 | def shape_identity(node): 31 | assert len(node.parents) > 0 32 | return node.parents[0].output_shape 33 | 34 | 35 | def shape_scalar(node): 36 | return TensorShape(1, 1, 1, 1) 37 | 38 | 39 | def shape_data(node): 40 | if node.output_shape: 41 | # Old-style input specification 42 | return node.output_shape 43 | try: 44 | # New-style input specification 45 | return map(int, node.parameters.shape[0].dim) 46 | except: 47 | # We most likely have a data layer on our hands. The problem is, 48 | # Caffe infers the dimensions of the data from the source (eg: LMDB). 49 | # We want to avoid reading datasets here. Fail for now. 50 | # This can be temporarily fixed by transforming the data layer to 51 | # Caffe's "input" layer (as is usually used in the "deploy" version). 52 | # TODO: Find a better solution for this. 53 | raise KaffeError('Cannot determine dimensions of data layer.\n' 54 | 'See comments in function shape_data for more info.') 55 | 56 | 57 | def shape_mem_data(node): 58 | params = node.parameters 59 | return TensorShape(params.batch_size, params.channels, params.height, params.width) 60 | 61 | 62 | def shape_concat(node): 63 | axis = node.layer.parameters.axis 64 | output_shape = None 65 | for parent in node.parents: 66 | if output_shape is None: 67 | output_shape = list(parent.output_shape) 68 | else: 69 | output_shape[axis] += parent.output_shape[axis] 70 | return tuple(output_shape) 71 | 72 | 73 | def shape_convolution(node): 74 | return get_strided_kernel_output_shape(node, math.floor) 75 | 76 | 77 | def shape_pool(node): 78 | return get_strided_kernel_output_shape(node, math.ceil) 79 | 80 | 81 | def shape_inner_product(node): 82 | input_shape = node.get_only_parent().output_shape 83 | return TensorShape(input_shape.batch_size, node.layer.parameters.num_output, 1, 1) 84 | -------------------------------------------------------------------------------- /kaffe/tensorflow/__init__.py: -------------------------------------------------------------------------------- 1 | from .transformer import TensorFlowTransformer 2 | from .network import Network 3 | -------------------------------------------------------------------------------- /kaffe/tensorflow/network.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | slim = tf.contrib.slim 4 | 5 | DEFAULT_PADDING = 'SAME' 6 | DEBUG_SIZES = False 7 | 8 | def layer(op): 9 | '''Decorator for composable network layers.''' 10 | 11 | def layer_decorated(self, *args, **kwargs): 12 | # Automatically set a name if not provided. 13 | name = kwargs.setdefault('name', self.get_unique_name(op.__name__)) 14 | # Figure out the layer inputs. 15 | if len(self.terminals) == 0: 16 | raise RuntimeError('No input variables found for layer %s.' % name) 17 | elif len(self.terminals) == 1: 18 | layer_input = self.terminals[0] 19 | if DEBUG_SIZES: print(layer_input.get_shape()) 20 | else: 21 | layer_input = list(self.terminals) 22 | if DEBUG_SIZES: print([i.get_shape() for i in layer_input]) 23 | # Perform the operation and get the output. 24 | layer_output = op(self, layer_input, *args, **kwargs) 25 | # Add to layer LUT. 26 | self.layers[name] = layer_output 27 | # This output is now the input for the next layer. 28 | self.feed(layer_output) 29 | # Return self for chained calls. 30 | return self 31 | 32 | return layer_decorated 33 | 34 | 35 | class Network(object): 36 | 37 | def __init__(self, inputs, trainable=True, is_training=False): 38 | # The input nodes for this network 39 | self.inputs = inputs 40 | # The current list of terminal nodes 41 | self.terminals = [] 42 | # Mapping from layer names to layers 43 | self.layers = dict(inputs) 44 | # If true, the resulting variables are set as trainable 45 | self.trainable = trainable 46 | # Switch variable for dropout 47 | self.use_dropout = tf.placeholder_with_default(tf.constant(1.0), 48 | shape=[], 49 | name='use_dropout') 50 | self.setup(is_training) 51 | 52 | def setup(self, is_training): 53 | '''Construct the network. ''' 54 | raise NotImplementedError('Must be implemented by the subclass.') 55 | 56 | def load(self, data_path, session, ignore_missing=False): 57 | '''Load network weights. 58 | data_path: The path to the numpy-serialized network weights 59 | session: The current TensorFlow session 60 | ignore_missing: If true, serialized weights for missing layers are ignored. 61 | ''' 62 | data_dict = np.load(data_path).item() 63 | for op_name in data_dict: 64 | with tf.variable_scope(op_name, reuse=True): 65 | for param_name, data in data_dict[op_name].iteritems(): 66 | try: 67 | var = tf.get_variable(param_name) 68 | session.run(var.assign(data)) 69 | except ValueError: 70 | if not ignore_missing: 71 | raise 72 | 73 | def feed(self, *args): 74 | '''Set the input(s) for the next operation by replacing the terminal nodes. 75 | The arguments can be either layer names or the actual layers. 76 | ''' 77 | assert len(args) != 0 78 | self.terminals = [] 79 | for fed_layer in args: 80 | if isinstance(fed_layer, basestring): 81 | try: 82 | fed_layer = self.layers[fed_layer] 83 | except KeyError: 84 | raise KeyError('Unknown layer name fed: %s' % fed_layer) 85 | self.terminals.append(fed_layer) 86 | return self 87 | 88 | def get_output(self): 89 | '''Returns the current network output.''' 90 | return self.terminals[-1] 91 | 92 | def get_unique_name(self, prefix): 93 | '''Returns an index-suffixed unique name for the given prefix. 94 | This is used for auto-generating layer names based on the type-prefix. 95 | ''' 96 | ident = sum(t.startswith(prefix) for t, _ in self.layers.items()) + 1 97 | return '%s_%d' % (prefix, ident) 98 | 99 | def make_var(self, name, shape): 100 | '''Creates a new TensorFlow variable.''' 101 | return tf.get_variable(name, shape, trainable=self.trainable) 102 | 103 | def validate_padding(self, padding): 104 | '''Verifies that the padding is one of the supported ones.''' 105 | assert padding in ('SAME', 'VALID') 106 | 107 | @layer 108 | def conv(self, 109 | input, 110 | k_h, 111 | k_w, 112 | c_o, 113 | s_h, 114 | s_w, 115 | name, 116 | relu=True, 117 | padding=DEFAULT_PADDING, 118 | group=1, 119 | biased=True): 120 | # Verify that the padding is acceptable 121 | self.validate_padding(padding) 122 | # Get the number of channels in the input 123 | c_i = input.get_shape()[-1] 124 | # Verify that the grouping parameter is valid 125 | assert c_i % group == 0 126 | assert c_o % group == 0 127 | # Convolution for a given input and kernel 128 | convolve = lambda i, k: tf.nn.conv2d(i, k, [1, s_h, s_w, 1], padding=padding) 129 | with tf.variable_scope(name) as scope: 130 | kernel = self.make_var('weights', shape=[k_h, k_w, c_i / group, c_o]) 131 | if group == 1: 132 | # This is the common-case. Convolve the input without any further complications. 133 | output = convolve(input, kernel) 134 | else: 135 | # Split the input into groups and then convolve each of them independently 136 | input_groups = tf.split(3, group, input) 137 | kernel_groups = tf.split(3, group, kernel) 138 | output_groups = [convolve(i, k) for i, k in zip(input_groups, kernel_groups)] 139 | # Concatenate the groups 140 | output = tf.concat(3, output_groups) 141 | # Add the biases 142 | if biased: 143 | biases = self.make_var('biases', [c_o]) 144 | output = tf.nn.bias_add(output, biases) 145 | if relu: 146 | # ReLU non-linearity 147 | output = tf.nn.relu(output, name=scope.name) 148 | return output 149 | 150 | @layer 151 | def atrous_conv(self, 152 | input, 153 | k_h, 154 | k_w, 155 | c_o, 156 | dilation, 157 | name, 158 | relu=True, 159 | padding=DEFAULT_PADDING, 160 | group=1, 161 | biased=True): 162 | # Verify that the padding is acceptable 163 | self.validate_padding(padding) 164 | # Get the number of channels in the input 165 | c_i = input.get_shape()[-1] 166 | # Verify that the grouping parameter is valid 167 | assert c_i % group == 0 168 | assert c_o % group == 0 169 | # Convolution for a given input and kernel 170 | convolve = lambda i, k: tf.nn.atrous_conv2d(i, k, dilation, padding=padding) 171 | with tf.variable_scope(name) as scope: 172 | kernel = self.make_var('weights', shape=[k_h, k_w, c_i / group, c_o]) 173 | if group == 1: 174 | # This is the common-case. Convolve the input without any further complications. 175 | output = convolve(input, kernel) 176 | else: 177 | # Split the input into groups and then convolve each of them independently 178 | input_groups = tf.split(3, group, input) 179 | kernel_groups = tf.split(3, group, kernel) 180 | output_groups = [convolve(i, k) for i, k in zip(input_groups, kernel_groups)] 181 | # Concatenate the groups 182 | output = tf.concat(3, output_groups) 183 | # Add the biases 184 | if biased: 185 | biases = self.make_var('biases', [c_o]) 186 | output = tf.nn.bias_add(output, biases) 187 | if relu: 188 | # ReLU non-linearity 189 | output = tf.nn.relu(output, name=scope.name) 190 | return output 191 | 192 | @layer 193 | def relu(self, input, name): 194 | return tf.nn.relu(input, name=name) 195 | 196 | @layer 197 | def max_pool(self, input, k_h, k_w, s_h, s_w, name, padding=DEFAULT_PADDING): 198 | self.validate_padding(padding) 199 | return tf.nn.max_pool(input, 200 | ksize=[1, k_h, k_w, 1], 201 | strides=[1, s_h, s_w, 1], 202 | padding=padding, 203 | name=name) 204 | 205 | @layer 206 | def avg_pool(self, input, k_h, k_w, s_h, s_w, name, padding=DEFAULT_PADDING): 207 | self.validate_padding(padding) 208 | return tf.nn.avg_pool(input, 209 | ksize=[1, k_h, k_w, 1], 210 | strides=[1, s_h, s_w, 1], 211 | padding=padding, 212 | name=name) 213 | 214 | @layer 215 | def lrn(self, input, radius, alpha, beta, name, bias=1.0): 216 | return tf.nn.local_response_normalization(input, 217 | depth_radius=radius, 218 | alpha=alpha, 219 | beta=beta, 220 | bias=bias, 221 | name=name) 222 | 223 | @layer 224 | def concat(self, inputs, axis, name): 225 | return tf.concat(concat_dim=axis, values=inputs, name=name) 226 | 227 | @layer 228 | def add(self, inputs, name): 229 | return tf.add_n(inputs, name=name) 230 | 231 | @layer 232 | def fc(self, input, num_out, name, relu=True): 233 | with tf.variable_scope(name) as scope: 234 | input_shape = input.get_shape() 235 | if input_shape.ndims == 4: 236 | # The input is spatial. Vectorize it first. 237 | dim = 1 238 | for d in input_shape[1:].as_list(): 239 | dim *= d 240 | feed_in = tf.reshape(input, [-1, dim]) 241 | else: 242 | feed_in, dim = (input, input_shape[-1].value) 243 | weights = self.make_var('weights', shape=[dim, num_out]) 244 | biases = self.make_var('biases', [num_out]) 245 | op = tf.nn.relu_layer if relu else tf.nn.xw_plus_b 246 | fc = op(feed_in, weights, biases, name=scope.name) 247 | return fc 248 | 249 | @layer 250 | def softmax(self, input, name): 251 | input_shape = map(lambda v: v.value, input.get_shape()) 252 | if len(input_shape) > 2: 253 | # For certain models (like NiN), the singleton spatial dimensions 254 | # need to be explicitly squeezed, since they're not broadcast-able 255 | # in TensorFlow's NHWC ordering (unlike Caffe's NCHW). 256 | if input_shape[1] == 1 and input_shape[2] == 1: 257 | input = tf.squeeze(input, squeeze_dims=[1, 2]) 258 | else: 259 | raise ValueError('Rank 2 tensor input expected for softmax!') 260 | return tf.nn.softmax(input, name) 261 | 262 | @layer 263 | def batch_normalization(self, input, name, is_training, activation_fn=None, scale=True): 264 | with tf.variable_scope(name) as scope: 265 | output = slim.batch_norm( 266 | input, 267 | activation_fn=activation_fn, 268 | is_training=is_training, 269 | updates_collections=None, 270 | scale=scale, 271 | scope=scope) 272 | return output 273 | 274 | @layer 275 | def dropout(self, input, keep_prob, name): 276 | keep = 1 - self.use_dropout + (self.use_dropout * keep_prob) 277 | return tf.nn.dropout(input, keep, name=name) 278 | -------------------------------------------------------------------------------- /kaffe/tensorflow/transformer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from ..errors import KaffeError, print_stderr 4 | from ..graph import GraphBuilder, NodeMapper 5 | from ..layers import NodeKind 6 | from ..transformers import (DataInjector, DataReshaper, NodeRenamer, ReLUFuser, 7 | BatchNormScaleBiasFuser, BatchNormPreprocessor, ParameterNamer) 8 | 9 | from . import network 10 | 11 | 12 | def get_padding_type(kernel_params, input_shape, output_shape): 13 | '''Translates Caffe's numeric padding to one of ('SAME', 'VALID'). 14 | Caffe supports arbitrary padding values, while TensorFlow only 15 | supports 'SAME' and 'VALID' modes. So, not all Caffe paddings 16 | can be translated to TensorFlow. There are some subtleties to 17 | how the padding edge-cases are handled. These are described here: 18 | https://github.com/Yangqing/caffe2/blob/master/caffe2/proto/caffe2_legacy.proto 19 | ''' 20 | k_h, k_w, s_h, s_w, p_h, p_w = kernel_params 21 | s_o_h = np.ceil(input_shape.height / float(s_h)) 22 | s_o_w = np.ceil(input_shape.width / float(s_w)) 23 | if (output_shape.height == s_o_h) and (output_shape.width == s_o_w): 24 | return 'SAME' 25 | v_o_h = np.ceil((input_shape.height - k_h + 1.0) / float(s_h)) 26 | v_o_w = np.ceil((input_shape.width - k_w + 1.0) / float(s_w)) 27 | if (output_shape.height == v_o_h) and (output_shape.width == v_o_w): 28 | return 'VALID' 29 | return None 30 | 31 | 32 | class TensorFlowNode(object): 33 | '''An intermediate representation for TensorFlow operations.''' 34 | 35 | def __init__(self, op, *args, **kwargs): 36 | # A string corresponding to the TensorFlow operation 37 | self.op = op 38 | # Positional arguments for the operation 39 | self.args = args 40 | # Keyword arguments for the operation 41 | self.kwargs = list(kwargs.items()) 42 | # The source Caffe node 43 | self.node = None 44 | 45 | def format(self, arg): 46 | '''Returns a string representation for the given value.''' 47 | return "'%s'" % arg if isinstance(arg, basestring) else str(arg) 48 | 49 | def pair(self, key, value): 50 | '''Returns key=formatted(value).''' 51 | return '%s=%s' % (key, self.format(value)) 52 | 53 | def emit(self): 54 | '''Emits the Python source for this node.''' 55 | # Format positional arguments 56 | args = map(self.format, self.args) 57 | # Format any keyword arguments 58 | if self.kwargs: 59 | args += [self.pair(k, v) for k, v in self.kwargs] 60 | # Set the node name 61 | args.append(self.pair('name', self.node.name)) 62 | args = ', '.join(args) 63 | return '%s(%s)' % (self.op, args) 64 | 65 | 66 | class MaybeActivated(object): 67 | 68 | def __init__(self, node, default=True): 69 | self.inject_kwargs = {} 70 | if node.metadata.get('relu', False) != default: 71 | self.inject_kwargs['relu'] = not default 72 | 73 | def __call__(self, *args, **kwargs): 74 | kwargs.update(self.inject_kwargs) 75 | return TensorFlowNode(*args, **kwargs) 76 | 77 | 78 | class TensorFlowMapper(NodeMapper): 79 | 80 | def get_kernel_params(self, node): 81 | kernel_params = node.layer.kernel_parameters 82 | input_shape = node.get_only_parent().output_shape 83 | padding = get_padding_type(kernel_params, input_shape, node.output_shape) 84 | # Only emit the padding if it's not the default value. 85 | padding = {'padding': padding} if padding != network.DEFAULT_PADDING else {} 86 | return (kernel_params, padding) 87 | 88 | def map_convolution(self, node): 89 | (kernel_params, kwargs) = self.get_kernel_params(node) 90 | h = kernel_params.kernel_h 91 | w = kernel_params.kernel_w 92 | c_o = node.output_shape[1] 93 | c_i = node.parents[0].output_shape[1] 94 | group = node.parameters.group 95 | if group != 1: 96 | kwargs['group'] = group 97 | if not node.parameters.bias_term: 98 | kwargs['biased'] = False 99 | assert kernel_params.kernel_h == h 100 | assert kernel_params.kernel_w == w 101 | return MaybeActivated(node)('conv', kernel_params.kernel_h, kernel_params.kernel_w, c_o, 102 | kernel_params.stride_h, kernel_params.stride_w, **kwargs) 103 | 104 | def map_relu(self, node): 105 | return TensorFlowNode('relu') 106 | 107 | def map_pooling(self, node): 108 | pool_type = node.parameters.pool 109 | if pool_type == 0: 110 | pool_op = 'max_pool' 111 | elif pool_type == 1: 112 | pool_op = 'avg_pool' 113 | else: 114 | # Stochastic pooling, for instance. 115 | raise KaffeError('Unsupported pooling type.') 116 | (kernel_params, padding) = self.get_kernel_params(node) 117 | return TensorFlowNode(pool_op, kernel_params.kernel_h, kernel_params.kernel_w, 118 | kernel_params.stride_h, kernel_params.stride_w, **padding) 119 | 120 | def map_inner_product(self, node): 121 | #TODO: Axis 122 | assert node.parameters.axis == 1 123 | #TODO: Unbiased 124 | assert node.parameters.bias_term == True 125 | return MaybeActivated(node)('fc', node.parameters.num_output) 126 | 127 | def map_softmax(self, node): 128 | return TensorFlowNode('softmax') 129 | 130 | def map_lrn(self, node): 131 | params = node.parameters 132 | # The window size must be an odd value. For a window 133 | # size of (2*n+1), TensorFlow defines depth_radius = n. 134 | assert params.local_size % 2 == 1 135 | # Caffe scales by (alpha/(2*n+1)), whereas TensorFlow 136 | # just scales by alpha (as does Krizhevsky's paper). 137 | # We'll account for that here. 138 | alpha = params.alpha / float(params.local_size) 139 | return TensorFlowNode('lrn', int(params.local_size / 2), alpha, params.beta) 140 | 141 | def map_concat(self, node): 142 | axis = (2, 3, 1, 0)[node.parameters.axis] 143 | return TensorFlowNode('concat', axis) 144 | 145 | def map_dropout(self, node): 146 | return TensorFlowNode('dropout', node.parameters.dropout_ratio) 147 | 148 | def map_batch_norm(self, node): 149 | scale_offset = len(node.data) == 4 150 | kwargs = {'is_training': True} if scale_offset else {'is_training': True, 'scale': False} 151 | return MaybeActivated(node, default=False)('batch_normalization', **kwargs) 152 | 153 | def map_eltwise(self, node): 154 | operations = {0: 'multiply', 1: 'add', 2: 'max'} 155 | op_code = node.parameters.operation 156 | try: 157 | return TensorFlowNode(operations[op_code]) 158 | except KeyError: 159 | raise KaffeError('Unknown elementwise operation: {}'.format(op_code)) 160 | 161 | def commit(self, chains): 162 | return chains 163 | 164 | 165 | class TensorFlowEmitter(object): 166 | 167 | def __init__(self, tab=None): 168 | self.tab = tab or ' ' * 4 169 | self.prefix = '' 170 | 171 | def indent(self): 172 | self.prefix += self.tab 173 | 174 | def outdent(self): 175 | self.prefix = self.prefix[:-len(self.tab)] 176 | 177 | def statement(self, s): 178 | return self.prefix + s + '\n' 179 | 180 | def emit_imports(self): 181 | return self.statement('from kaffe.tensorflow import Network\n') 182 | 183 | def emit_class_def(self, name): 184 | return self.statement('class %s(Network):' % (name)) 185 | 186 | def emit_setup_def(self): 187 | return self.statement('def setup(self):') 188 | 189 | def emit_parents(self, chain): 190 | assert len(chain) 191 | s = '(self.feed(' 192 | sep = ', \n' + self.prefix + (' ' * len(s)) 193 | s += sep.join(["'%s'" % parent.name for parent in chain[0].node.parents]) 194 | return self.statement(s + ')') 195 | 196 | def emit_node(self, node): 197 | return self.statement(' ' * 5 + '.' + node.emit()) 198 | 199 | def emit(self, name, chains): 200 | s = self.emit_imports() 201 | s += self.emit_class_def(name) 202 | self.indent() 203 | s += self.emit_setup_def() 204 | self.indent() 205 | blocks = [] 206 | for chain in chains: 207 | b = '' 208 | b += self.emit_parents(chain) 209 | for node in chain: 210 | b += self.emit_node(node) 211 | blocks.append(b[:-1] + ')') 212 | s = s + '\n\n'.join(blocks) 213 | return s 214 | 215 | 216 | class TensorFlowTransformer(object): 217 | 218 | def __init__(self, def_path, data_path, verbose=True, phase='test'): 219 | self.verbose = verbose 220 | self.phase = phase 221 | self.load(def_path, data_path, phase) 222 | self.params = None 223 | self.source = None 224 | 225 | def load(self, def_path, data_path, phase): 226 | # Build the graph 227 | graph = GraphBuilder(def_path, phase).build() 228 | 229 | if data_path is not None: 230 | # Load and associate learned parameters 231 | graph = DataInjector(def_path, data_path)(graph) 232 | 233 | # Transform the graph 234 | transformers = [ 235 | # Fuse split batch normalization layers 236 | BatchNormScaleBiasFuser(), 237 | 238 | # Fuse ReLUs 239 | # TODO: Move non-linearity application to layer wrapper, allowing 240 | # any arbitrary operation to be optionally activated. 241 | ReLUFuser(allowed_parent_types=[NodeKind.Convolution, NodeKind.InnerProduct, 242 | NodeKind.BatchNorm]), 243 | 244 | # Rename nodes 245 | # Slashes are used for scoping in TensorFlow. Replace slashes 246 | # in node names with underscores. 247 | # (Caffe's GoogLeNet implementation uses slashes) 248 | NodeRenamer(lambda node: node.name.replace('/', '_')) 249 | ] 250 | self.graph = graph.transformed(transformers) 251 | 252 | # Display the graph 253 | if self.verbose: 254 | print_stderr(self.graph) 255 | 256 | def transform_data(self): 257 | if self.params is None: 258 | transformers = [ 259 | 260 | # Reshape the parameters to TensorFlow's ordering 261 | DataReshaper({ 262 | # (c_o, c_i, h, w) -> (h, w, c_i, c_o) 263 | NodeKind.Convolution: (2, 3, 1, 0), 264 | 265 | # (c_o, c_i) -> (c_i, c_o) 266 | NodeKind.InnerProduct: (1, 0) 267 | }), 268 | 269 | # Pre-process batch normalization data 270 | BatchNormPreprocessor(), 271 | 272 | # Convert parameters to dictionaries 273 | ParameterNamer(), 274 | ] 275 | self.graph = self.graph.transformed(transformers) 276 | self.params = {node.name: node.data for node in self.graph.nodes if node.data} 277 | return self.params 278 | 279 | def transform_source(self): 280 | if self.source is None: 281 | mapper = TensorFlowMapper(self.graph) 282 | chains = mapper.map() 283 | emitter = TensorFlowEmitter() 284 | self.source = emitter.emit(self.graph.name, chains) 285 | return self.source 286 | -------------------------------------------------------------------------------- /kaffe/transformers.py: -------------------------------------------------------------------------------- 1 | ''' 2 | A collection of graph transforms. 3 | 4 | A transformer is a callable that accepts a graph and returns a transformed version. 5 | ''' 6 | 7 | import numpy as np 8 | 9 | from .caffe import get_caffe_resolver, has_pycaffe 10 | from .errors import KaffeError, print_stderr 11 | from .layers import NodeKind 12 | 13 | 14 | class DataInjector(object): 15 | ''' 16 | Associates parameters loaded from a .caffemodel file with their corresponding nodes. 17 | ''' 18 | 19 | def __init__(self, def_path, data_path): 20 | # The .prototxt file defining the graph 21 | self.def_path = def_path 22 | # The .caffemodel file containing the learned parameters 23 | self.data_path = data_path 24 | # Set to true if the fallback protocol-buffer based backend was used 25 | self.did_use_pb = False 26 | # A list containing (layer name, parameters) tuples 27 | self.params = None 28 | # Load the parameters 29 | self.load() 30 | 31 | def load(self): 32 | if has_pycaffe(): 33 | self.load_using_caffe() 34 | else: 35 | self.load_using_pb() 36 | 37 | def load_using_caffe(self): 38 | caffe = get_caffe_resolver().caffe 39 | net = caffe.Net(self.def_path, self.data_path, caffe.TEST) 40 | data = lambda blob: blob.data 41 | self.params = [(k, map(data, v)) for k, v in net.params.items()] 42 | 43 | def load_using_pb(self): 44 | data = get_caffe_resolver().NetParameter() 45 | data.MergeFromString(open(self.data_path, 'rb').read()) 46 | pair = lambda layer: (layer.name, self.normalize_pb_data(layer)) 47 | layers = data.layers or data.layer 48 | self.params = [pair(layer) for layer in layers if layer.blobs] 49 | self.did_use_pb = True 50 | 51 | def normalize_pb_data(self, layer): 52 | transformed = [] 53 | for blob in layer.blobs: 54 | if len(blob.shape.dim): 55 | dims = blob.shape.dim 56 | c_o, c_i, h, w = map(int, [1] * (4 - len(dims)) + list(dims)) 57 | else: 58 | c_o = blob.num 59 | c_i = blob.channels 60 | h = blob.height 61 | w = blob.width 62 | data = np.array(blob.data, dtype=np.float32).reshape(c_o, c_i, h, w) 63 | transformed.append(data) 64 | return transformed 65 | 66 | def adjust_parameters(self, node, data): 67 | if not self.did_use_pb: 68 | return data 69 | # When using the protobuf-backend, each parameter initially has four dimensions. 70 | # In certain cases (like FC layers), we want to eliminate the singleton dimensions. 71 | # This implementation takes care of the common cases. However, it does leave the 72 | # potential for future issues. 73 | # The Caffe-backend does not suffer from this problem. 74 | data = list(data) 75 | squeeze_indices = [1] # Squeeze biases. 76 | if node.kind == NodeKind.InnerProduct: 77 | squeeze_indices.append(0) # Squeeze FC. 78 | for idx in squeeze_indices: 79 | data[idx] = np.squeeze(data[idx]) 80 | return data 81 | 82 | def __call__(self, graph): 83 | for layer_name, data in self.params: 84 | if layer_name in graph: 85 | node = graph.get_node(layer_name) 86 | node.data = self.adjust_parameters(node, data) 87 | else: 88 | print_stderr('Ignoring parameters for non-existent layer: %s' % layer_name) 89 | return graph 90 | 91 | 92 | class DataReshaper(object): 93 | 94 | def __init__(self, mapping, replace=True): 95 | # A dictionary mapping NodeKind to the transposed order. 96 | self.mapping = mapping 97 | # The node kinds eligible for reshaping 98 | self.reshaped_node_types = self.mapping.keys() 99 | # If true, the reshaped data will replace the old one. 100 | # Otherwise, it's set to the reshaped_data attribute. 101 | self.replace = replace 102 | 103 | def has_spatial_parent(self, node): 104 | try: 105 | parent = node.get_only_parent() 106 | s = parent.output_shape 107 | return s.height > 1 or s.width > 1 108 | except KaffeError: 109 | return False 110 | 111 | def map(self, node_kind): 112 | try: 113 | return self.mapping[node_kind] 114 | except KeyError: 115 | raise KaffeError('Ordering not found for node kind: {}'.format(node_kind)) 116 | 117 | def __call__(self, graph): 118 | for node in graph.nodes: 119 | if node.data is None: 120 | continue 121 | if node.kind not in self.reshaped_node_types: 122 | # Check for 2+ dimensional data 123 | if any(len(tensor.shape) > 1 for tensor in node.data): 124 | print_stderr('Warning: parmaters not reshaped for node: {}'.format(node)) 125 | continue 126 | transpose_order = self.map(node.kind) 127 | weights = node.data[0] 128 | if (node.kind == NodeKind.InnerProduct) and self.has_spatial_parent(node): 129 | # The FC layer connected to the spatial layer needs to be 130 | # re-wired to match the new spatial ordering. 131 | in_shape = node.get_only_parent().output_shape 132 | fc_shape = weights.shape 133 | output_channels = fc_shape[0] 134 | weights = weights.reshape((output_channels, in_shape.channels, in_shape.height, 135 | in_shape.width)) 136 | weights = weights.transpose(self.map(NodeKind.Convolution)) 137 | node.reshaped_data = weights.reshape(fc_shape[transpose_order[0]], 138 | fc_shape[transpose_order[1]]) 139 | else: 140 | node.reshaped_data = weights.transpose(transpose_order) 141 | 142 | if self.replace: 143 | for node in graph.nodes: 144 | if hasattr(node, 'reshaped_data'): 145 | # Set the weights 146 | node.data[0] = node.reshaped_data 147 | del node.reshaped_data 148 | return graph 149 | 150 | 151 | class SubNodeFuser(object): 152 | ''' 153 | An abstract helper for merging a single-child with its single-parent. 154 | ''' 155 | 156 | def __call__(self, graph): 157 | nodes = graph.nodes 158 | fused_nodes = [] 159 | for node in nodes: 160 | if len(node.parents) != 1: 161 | # We're only fusing nodes with single parents 162 | continue 163 | parent = node.get_only_parent() 164 | if len(parent.children) != 1: 165 | # We can only fuse a node if its parent's 166 | # value isn't used by any other node. 167 | continue 168 | if not self.is_eligible_pair(parent, node): 169 | continue 170 | # Rewrite the fused node's children to its parent. 171 | for child in node.children: 172 | child.parents.remove(node) 173 | parent.add_child(child) 174 | # Disconnect the fused node from the graph. 175 | parent.children.remove(node) 176 | fused_nodes.append(node) 177 | # Let the sub-class merge the fused node in any arbitrary way. 178 | self.merge(parent, node) 179 | transformed_nodes = [node for node in nodes if node not in fused_nodes] 180 | return graph.replaced(transformed_nodes) 181 | 182 | def is_eligible_pair(self, parent, child): 183 | '''Returns true if this parent/child pair is eligible for fusion.''' 184 | raise NotImplementedError('Must be implemented by subclass.') 185 | 186 | def merge(self, parent, child): 187 | '''Merge the child node into the parent.''' 188 | raise NotImplementedError('Must be implemented by subclass') 189 | 190 | 191 | class ReLUFuser(SubNodeFuser): 192 | ''' 193 | Fuses rectified linear units with their parent nodes. 194 | ''' 195 | 196 | def __init__(self, allowed_parent_types=None): 197 | # Fuse ReLUs when the parent node is one of the given types. 198 | # If None, all node types are eligible. 199 | self.allowed_parent_types = allowed_parent_types 200 | 201 | def is_eligible_pair(self, parent, child): 202 | return ((self.allowed_parent_types is None or parent.kind in self.allowed_parent_types) and 203 | child.kind == NodeKind.ReLU) 204 | 205 | def merge(self, parent, _): 206 | parent.metadata['relu'] = True 207 | 208 | 209 | class BatchNormScaleBiasFuser(SubNodeFuser): 210 | ''' 211 | The original batch normalization paper includes two learned 212 | parameters: a scaling factor \gamma and a bias \beta. 213 | Caffe's implementation does not include these two. However, it is commonly 214 | replicated by adding a scaling+bias layer immidiately after the batch norm. 215 | 216 | This fuser merges the scaling+bias layer with the batch norm. 217 | ''' 218 | 219 | def is_eligible_pair(self, parent, child): 220 | return (parent.kind == NodeKind.BatchNorm and child.kind == NodeKind.Scale and 221 | child.parameters.axis == 1 and child.parameters.bias_term == True) 222 | 223 | def merge(self, parent, child): 224 | parent.scale_bias_node = child 225 | 226 | 227 | class BatchNormPreprocessor(object): 228 | ''' 229 | Prescale batch normalization parameters. 230 | Concatenate gamma (scale) and beta (bias) terms if set. 231 | ''' 232 | 233 | def __call__(self, graph): 234 | for node in graph.nodes: 235 | if node.kind != NodeKind.BatchNorm: 236 | continue 237 | assert node.data is not None 238 | assert len(node.data) == 3 239 | mean, variance, scale = node.data 240 | # Prescale the stats 241 | scaling_factor = 1.0 / scale if scale != 0 else 0 242 | mean *= scaling_factor 243 | variance *= scaling_factor 244 | # Replace with the updated values 245 | node.data = [mean, variance] 246 | if hasattr(node, 'scale_bias_node'): 247 | # Include the scale and bias terms 248 | gamma, beta = node.scale_bias_node.data 249 | node.data += [gamma, beta] 250 | return graph 251 | 252 | 253 | class NodeRenamer(object): 254 | ''' 255 | Renames nodes in the graph using a given unary function that 256 | accepts a node and returns its new name. 257 | ''' 258 | 259 | def __init__(self, renamer): 260 | self.renamer = renamer 261 | 262 | def __call__(self, graph): 263 | for node in graph.nodes: 264 | node.name = self.renamer(node) 265 | return graph 266 | 267 | 268 | class ParameterNamer(object): 269 | ''' 270 | Convert layer data arrays to a dictionary mapping parameter names to their values. 271 | ''' 272 | 273 | def __call__(self, graph): 274 | for node in graph.nodes: 275 | if node.data is None: 276 | continue 277 | if node.kind in (NodeKind.Convolution, NodeKind.InnerProduct): 278 | names = ('weights',) 279 | if node.parameters.bias_term: 280 | names += ('biases',) 281 | elif node.kind == NodeKind.BatchNorm: 282 | names = ('moving_mean', 'moving_variance') 283 | if len(node.data) == 4: 284 | names += ('gamma', 'beta') 285 | else: 286 | print_stderr('WARNING: Unhandled parameters: {}'.format(node.kind)) 287 | continue 288 | assert len(names) == len(node.data) 289 | node.data = dict(zip(names, node.data)) 290 | return graph 291 | -------------------------------------------------------------------------------- /losses.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | import torch 3 | from torch.autograd import Variable 4 | import torch.nn.functional as F 5 | import numpy as np 6 | 7 | VOID_LABEL = 255 8 | N_CLASSES = 21 9 | 10 | def crossentropyloss(logits, label): 11 | mask = (label.view(-1) != VOID_LABEL) 12 | nonvoid = mask.long().sum() 13 | if nonvoid == 0: 14 | # only void pixels, the gradients should be 0 15 | return logits.sum() * 0. 16 | # if nonvoid == mask.numel(): 17 | # # no void pixel, use builtin 18 | # return F.cross_entropy(logits, Variable(label)) 19 | target = label.view(-1)[mask] 20 | C = logits.size(1) 21 | logits = logits.permute(0, 2, 3, 1) # B, H, W, C 22 | logits = logits.contiguous().view(-1, C) 23 | mask2d = mask.unsqueeze(1).expand(mask.size(0), C).contiguous().view(-1) 24 | logits = logits[mask2d].view(-1, C) 25 | loss = F.cross_entropy(logits, Variable(target)) 26 | return loss 27 | 28 | class StableBCELoss(torch.nn.modules.Module): 29 | def __init__(self): 30 | super(StableBCELoss, self).__init__() 31 | def forward(self, input, target): 32 | neg_abs = - input.abs() 33 | loss = input.clamp(min=0) - input * target + (1 + neg_abs.exp()).log() 34 | return loss.mean() 35 | 36 | def binaryXloss(logits, label): 37 | mask = (label.view(-1) != VOID_LABEL) 38 | nonvoid = mask.long().sum() 39 | if nonvoid == 0: 40 | # only void pixels, the gradients should be 0 41 | return logits.sum() * 0. 42 | # if nonvoid == mask.numel(): 43 | # # no void pixel, use builtin 44 | # return F.cross_entropy(logits, Variable(label)) 45 | target = label.contiguous().view(-1)[mask] 46 | logits = logits.contiguous().view(-1)[mask] 47 | # loss = F.binary_cross_entropy(logits, Variable(target.float())) 48 | loss = StableBCELoss()(logits, Variable(target.float())) 49 | return loss 50 | 51 | def naive_single(logit, label): 52 | # single images 53 | mask = (label.view(-1) != 255) 54 | num_preds = mask.long().sum() 55 | if num_preds == 0: 56 | # only void pixels, the gradients should be 0 57 | return logits.sum() * 0. 58 | target = Variable(label.contiguous().view(-1)[mask].float()) 59 | logit = logit.contiguous().view(-1)[mask] 60 | prob = F.sigmoid(logit) 61 | intersect = target * prob 62 | union = target + prob - intersect 63 | loss = (1. - intersect / union).sum() 64 | return loss 65 | 66 | def hingeloss(logits, label): 67 | mask = (label.view(-1) != 255) 68 | num_preds = mask.long().sum() 69 | if num_preds == 0: 70 | # only void pixels, the gradients should be 0 71 | return logits.sum() * 0. 72 | target = label.contiguous().view(-1)[mask] 73 | target = 2. * target.float() - 1. # [target == 0] = -1 74 | logits = logits.contiguous().view(-1)[mask] 75 | hinge = 1./num_preds * F.relu(1. - logits * Variable(target)).sum() 76 | return hinge 77 | 78 | def gamma_fast(gt, permutation): 79 | p = len(permutation) 80 | gt = gt.gather(0, permutation) 81 | gts = gt.sum() 82 | 83 | intersection = gts - gt.float().cumsum(0) 84 | union = gts + (1 - gt).float().cumsum(0) 85 | jaccard = 1. - intersection / union 86 | 87 | jaccard[1:p] = jaccard[1:p] - jaccard[0:-1] 88 | return jaccard 89 | 90 | def lovaszloss(logits, labels, prox=False, max_steps=20, debug={}): 91 | # image-level Lovasz hinge 92 | if logits.size(0) == 1: 93 | # single image case 94 | loss = lovasz_single(logits.squeeze(0), labels.squeeze(0), prox, max_steps, debug) 95 | else: 96 | losses = [] 97 | for logit, label in zip(logits, labels): 98 | loss = lovasz_single(logit, label, prox, max_steps, debug) 99 | losses.append(loss) 100 | loss = sum(losses) / len(losses) 101 | return loss 102 | 103 | def naiveloss(logits, labels): 104 | # image-level Lovasz hinge 105 | if logits.size(0) == 1: 106 | # single image case 107 | loss = naive_single(logits.squeeze(0), labels.squeeze(0)) 108 | else: 109 | losses = [] 110 | for logit, label in zip(logits, labels): 111 | loss = naive_single(logit, label) 112 | losses.append(loss) 113 | loss = sum(losses) / len(losses) 114 | return loss 115 | 116 | def iouloss(pred, gt): 117 | # works for one binary pred and associated target 118 | # make byte tensors 119 | pred = (pred == 1) 120 | mask = (gt != 255) 121 | gt = (gt == 1) 122 | union = (gt | pred)[mask].long().sum() 123 | if not union: 124 | return 0. 125 | else: 126 | intersection = (gt & pred)[mask].long().sum() 127 | return 1. - intersection / union 128 | 129 | def compute_step_length(x, grad, active, eps=1e-6): 130 | # compute next intersection with an edge in the direction grad 131 | # OR next intersection with a 0 - border 132 | # returns: delta in ind such that: 133 | # after a step delta in the direction grad, x[ind] and x[ind+1] will be equal 134 | delta = np.inf 135 | ind = -1 136 | if active > 0: 137 | numerator = (x[:active] - x[1:active+1]) # always positive (because x is sorted) 138 | denominator = (grad[:active] - grad[1:active+1]) 139 | # indices corresponding to negative denominator won't intersect 140 | # also, we are not interested in indices in x that are *already equal* 141 | valid = (denominator > eps) & (numerator > eps) 142 | valid_indices = valid.nonzero() 143 | intersection_times = numerator[valid] / denominator[valid] 144 | if intersection_times.size(): 145 | delta, ind = intersection_times.min(0) 146 | ind = valid_indices[ind] 147 | delta, ind = delta[0], ind[0, 0] 148 | if grad[active] > 0: 149 | intersect_zero = x[active] / grad[active] 150 | if intersect_zero > 0. and intersect_zero < delta: 151 | return intersect_zero, -1 152 | return delta, ind 153 | 154 | def project(gam, active, members): 155 | tovisit = set(range(active + 1)) 156 | while tovisit: 157 | v = tovisit.pop() 158 | if len(members[v]) > 1: 159 | avg = 0. 160 | for k in members[v]: 161 | if k != v: tovisit.remove(k) 162 | avg += gam[k] / len(members[v]) 163 | for k in members[v]: 164 | gam[k] = avg 165 | if active + 1 < len(gam): 166 | gam[active + 1:] = 0. 167 | 168 | def find_proximal(x0, gam, lam, eps=1e-6, max_steps=20, debug={}): 169 | # x0: sorted margins data 170 | # gam: initial gamma_fast(target, perm) 171 | # regularisation parameter lam 172 | x = x0.clone() 173 | act = (x >= eps).nonzero() 174 | finished = False 175 | if not act.size(): 176 | finished = True 177 | else: 178 | active = act[-1, 0] 179 | members = {i: {i} for i in range(active + 1)} 180 | if active > 0: 181 | equal = (x[:active] - x[1:active+1]) < eps 182 | for i, e in enumerate(equal): 183 | if e: 184 | members[i].update(members[i + 1]) 185 | members[i + 1] = members[i] 186 | project(gam, active, members) 187 | step = 0 188 | while not finished and step < max_steps and active > -1: 189 | step += 1 190 | res = compute_step_length(x, gam, active, eps) 191 | delta, ind = res 192 | 193 | if ind == -1: 194 | active = active - len(members[active]) 195 | 196 | stop = torch.dot(x - x0, gam) / torch.dot(gam, gam) + 1. / lam 197 | if 0 <= stop < delta: 198 | delta = stop 199 | finished = True 200 | 201 | x = x - delta * gam 202 | if not finished: 203 | if ind >= 0: 204 | repr = min(members[ind]) 205 | members[repr].update(members[ind + 1]) 206 | for m in members[ind]: 207 | if m != repr: 208 | members[m] = members[repr] 209 | project(gam, active, members) 210 | if "path" in debug: 211 | debug["path"].append(x.numpy()) 212 | 213 | if "step" in debug: 214 | debug["step"] = step 215 | if "finished" in debug: 216 | debug["finished"] = finished 217 | return x, gam 218 | 219 | 220 | def lovasz_binary(margins, label, prox=False, max_steps=20, debug={}): 221 | # 1d vector inputs 222 | # Workaround: can't sort Variable bug 223 | # prox: False or lambda regularization value 224 | _, perm = torch.sort(margins.data, dim=0, descending=True) 225 | margins_sorted = margins[perm] 226 | grad = gamma_fast(label, perm) 227 | loss = torch.dot(F.relu(margins_sorted), Variable(grad)) 228 | if prox is not False: 229 | xp, gam = find_proximal(margins_sorted.data, grad, prox, max_steps=max_steps, eps=1e-6, debug=debug) 230 | hook = margins_sorted.register_hook(lambda grad: Variable(margins_sorted.data - xp)) 231 | return loss, hook, gam 232 | else: 233 | return loss 234 | 235 | 236 | def lovasz_single(logit, label, prox=False, max_steps=20, debug={}): 237 | # single images 238 | mask = (label.view(-1) != 255) 239 | num_preds = mask.long().sum() 240 | if num_preds == 0: 241 | # only void pixels, the gradients should be 0 242 | return logits.sum() * 0. 243 | target = label.contiguous().view(-1)[mask] 244 | signs = 2. * target.float() - 1. 245 | logit = logit.contiguous().view(-1)[mask] 246 | margins = (1. - logit * Variable(signs)) 247 | loss = lovasz_binary(margins, target, prox, max_steps, debug=debug) 248 | return loss 249 | 250 | 251 | -------------------------------------------------------------------------------- /settings.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | import platform 3 | import os 4 | from os.path import join 5 | from copy import deepcopy 6 | import argparse 7 | from datasets.utils import pascal_classes 8 | 9 | # --- settings common to train and eval --- 10 | defaults = argparse.Namespace() 11 | defaults.OUTPUT_DIR = './weights' 12 | 13 | # --- train settings --- 14 | 15 | defaults_train = deepcopy(defaults) 16 | defaults_train.BATCH_SIZE = 1 17 | defaults_train.GRAD_UPDATE_EVERY = 10 # gradient accumulation 18 | defaults_train.INPUT_SIZE = '321,321' 19 | defaults_train.LEARNING_RATE = 5e-4 20 | defaults_train.MOMENTUM = 0.9 21 | defaults_train.NUM_STEPS = 1000 22 | defaults_train.RANDOM_SEED = 1234 23 | defaults_train.SAVE_NUM_IMAGES = 1 24 | defaults_train.SAVE_PRED_EVERY = 2000 25 | defaults_train.DO_VAL_EVERY = 300 26 | 27 | # --- eval settings --- 28 | defaults_eval = deepcopy(defaults) 29 | 30 | def get_arguments(argv, mode='eval'): 31 | """Parse all the arguments provided from the CLI. 32 | 33 | Returns: 34 | A list of parsed arguments. 35 | """ 36 | classes = pascal_classes(with_void=False) 37 | inv_classes = pascal_classes(with_void=False, reverse=True) 38 | def pascal_cls(s): 39 | n_classes = len(classes) 40 | if s in classes: 41 | return classes[s] 42 | elif 0 <= int(s) < n_classes: 43 | return int(s) 44 | raise argparse.ArgumentTypeError('Pascal classes: {}'.format(classes)) 45 | 46 | parser = argparse.ArgumentParser(description="Deeplab-resnet-multiscale") 47 | if mode == 'eval': 48 | defaults = defaults_eval 49 | elif mode == 'train': 50 | defaults = defaults_train 51 | parser.add_argument("--expname", type=str, required=True, 52 | help="Name of the experiment.") 53 | parser.add_argument("--nickname", type=str, required=True, 54 | help="Name given to the run (useful for output paths and logging).") 55 | parser.add_argument("--restore-from", type=str, required=True, 56 | help="Where restore model parameters from.") 57 | parser.add_argument("--binary", type=pascal_cls, metavar="[0-20]", default=-1, 58 | help="Binary classifier with specified class. (class name or id)") 59 | parser.add_argument("--sampling", type=str, choices=['sequential', 'shuffle', 'balanced', 'exclusive'], 60 | default='shuffle', help="Trainset sampling (balanced applies to binary)") 61 | if mode == 'eval': 62 | parser.add_argument("--num-steps", type=int, default=defaults.NUM_STEPS, 63 | help="Number of images in the validation set.") 64 | if mode == 'train': 65 | parser.add_argument("--threads", type=int, default=4, 66 | help="Number of data fetcher threads") 67 | parser.add_argument("--epochs", action="store_true", 68 | help="Count steps in epochs (1 step is 1 epoch)") 69 | parser.add_argument("--proximal", action="store_true", 70 | help="Use proximal variant") 71 | parser.add_argument("--proxreg", type=float, default=0.5, 72 | help="proximal parameter") 73 | parser.add_argument("--maxproxsteps", type=int, default=30, 74 | help="maximal prox. computation steps") 75 | parser.add_argument("--no-startval", action="store_true", 76 | help="Don't start with a validation run") 77 | parser.add_argument("--batch-size", type=int, default=defaults.BATCH_SIZE, 78 | help="Number of images sent to the network in one step.") 79 | parser.add_argument("--grad-update-every", type=int, default=defaults.GRAD_UPDATE_EVERY, 80 | help="Number of steps after which gradient update is applied.") 81 | parser.add_argument("--input-size", type=str, default=defaults.INPUT_SIZE, 82 | help="Comma-separated string with height and width of images.") 83 | parser.add_argument("-lr", "--learning-rate", type=float, default=defaults.LEARNING_RATE, 84 | help="Base learning rate for training with polynomial decay.") 85 | parser.add_argument("--momentum", type=float, default=defaults.MOMENTUM, 86 | help="Momentum component of the optimiser.") 87 | parser.add_argument("--no-random-mirror", action="store_false", 88 | help="No random mirror of the inputs during the training.") 89 | parser.add_argument("--no-random-scale", action="store_false", 90 | help="No random scale of the inputs during the training.") 91 | parser.add_argument("--save-pred-every", type=int, default=defaults.SAVE_PRED_EVERY, 92 | help="Save summaries and checkpoint every often.") 93 | parser.add_argument("--do-val-every", type=int, default=defaults.DO_VAL_EVERY, 94 | help="Do validation every...") 95 | parser.add_argument("--jaccard", action="store_true", 96 | help="Use lovasz hinge in the binary case.") 97 | parser.add_argument("--hinge", action="store_true", 98 | help="Use hinge loss.") 99 | parser.add_argument("--num-steps", type=float, default=defaults.NUM_STEPS, 100 | help="Number of training steps.") 101 | parser.add_argument("--start-step", type=int, default=0, 102 | help="Start from this training set.") 103 | parser.add_argument("--train-last", type=int, default=-1, 104 | help="Train last .. layers (default -1 -> all).") 105 | parser.add_argument("--schedule", action="store_true", 106 | help="Use learning rate schedule.") 107 | parser.add_argument("--delete-previous", action="store_true", 108 | help="Delete previous logdir if exists.") 109 | args = parser.parse_args(argv) 110 | args.snapshot_dir = join(args.output_dir, args.expname) 111 | if not os.path.exists(args.snapshot_dir): 112 | os.makedirs(args.snapshot_dir) 113 | if args.binary != -1: 114 | args.binary_str = inv_classes[args.binary] 115 | print('binary selected, class ' + args.binary_str) 116 | if mode == 'train': 117 | args.random_mirror = not args.no_random_mirror 118 | args.random_scale = not args.no_random_scale 119 | if args.sampling == 'exclusive': 120 | if args.binary == -1: 121 | parser.error('The --exclusive flag requires --binary set.') 122 | if args.sampling == 'balanced': 123 | if args.binary == -1: 124 | parser.error('The --balanced flag requires --binary set.') 125 | if args.jaccard: 126 | if args.binary == -1: 127 | parser.error('Jaccard loss requires --binary set.') 128 | if args.hinge: 129 | if args.binary == -1: 130 | parser.error('Hinge loss requires --binary set.') 131 | 132 | return args -------------------------------------------------------------------------------- /train_pytorch.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | 3 | import argparse 4 | from datetime import datetime 5 | import os, sys 6 | from os.path import join 7 | import time 8 | import re 9 | import platform 10 | 11 | import numpy as np 12 | 13 | import torch 14 | from torch import optim 15 | from torch.autograd import Variable 16 | import torch.utils.data as data 17 | import torch.nn.functional as F 18 | 19 | import random 20 | # WARNING: if multiple worker threads, the seeds are useless (no warranty on the execution order) 21 | random.seed(1857) 22 | torch.manual_seed(1857) 23 | torch.cuda.manual_seed(1857) 24 | 25 | from settings import get_arguments 26 | import datasets 27 | from datasets.loadvoc import load_extended_voc 28 | from compose import (JointCompose, RandomScale, Normalize, 29 | RandomHorizontalFlip, RandomCropPad, PILtoTensor, Scale, TensortoPIL) 30 | from PIL.Image import NEAREST 31 | 32 | from losses import * 33 | 34 | import deepdish as dd 35 | import deeplab_resnet.model_pytorch as modelpy 36 | from collections import defaultdict 37 | import yaml 38 | 39 | IGNORE_LABEL = 255 40 | IMG_MEAN = np.array((104.00698793,116.66876762,122.67891434), dtype=np.float32) 41 | 42 | def create_variables(weights, cuda=True): 43 | var = dict() 44 | for k, v in weights.items(): 45 | v = torch.from_numpy(v) 46 | if cuda: 47 | v = v.cuda() 48 | if not (k.endswith('moving_mean') or k.endswith('moving_variance')): 49 | v = Variable(v) 50 | var[k] = v 51 | return var 52 | 53 | def snapshot_variables(weights, dest): 54 | out = {} 55 | for (k, v) in weights.items(): 56 | if isinstance(v, Variable): 57 | v = v.data 58 | out[k] = v.cpu().numpy() 59 | dd.io.save(dest, out) 60 | 61 | def training_groups(weights, base_lr, multipliers=[0.1, 1.0, 1.0], train_last=-1): 62 | """ 63 | get training groups and activates requires_grad for variables 64 | train_last: last: only train last ... layers 65 | hybrid: if hybrid, train all layers but set momentum to 0 on last layers 66 | """ 67 | fixed = ['moving_mean', 'moving_variance', 'beta', 'gamma'] 68 | # get training variables, with their lr 69 | trained = {k: v for (k, v) in weights.iteritems() if not any([k.endswith(s) for s in fixed])} 70 | for v in trained.values(): 71 | v.requires_grad = True 72 | fc_vars = {k: v for (k, v) in trained.iteritems() if 'fc' in k} 73 | conv_vars = [v for (k, v) in trained.items() if 'fc' not in k] # lr * 1.0 74 | fc_w_vars = [v for (k, v) in fc_vars.items() if 'weights' in k] # lr * 10.0 75 | fc_b_vars = [v for (k, v) in fc_vars.items() if 'biases' in k] # lr * 20.0 76 | assert(len(trained) == len(fc_vars) + len(conv_vars)) 77 | assert(len(fc_vars) == len(fc_w_vars) + len(fc_b_vars)) 78 | if train_last == -1: 79 | print("train all layers") 80 | groups = [{'params': conv_vars, 'lr': multipliers[0] * base_lr}, 81 | {'params': fc_w_vars, 'lr': multipliers[1] * base_lr}, 82 | {'params': fc_b_vars, 'lr': multipliers[2] * base_lr}] 83 | elif train_last == 1: 84 | print("train last layer") 85 | for v in conv_vars: 86 | v.requires_grad = False 87 | groups = [{'params': fc_w_vars, 'lr': multipliers[1] * base_lr}, 88 | {'params': fc_b_vars, 'lr': multipliers[2] * base_lr}] 89 | return groups 90 | 91 | class SegsetWrap(data.Dataset): 92 | def __init__(self, segset, transform=None): 93 | self.name = segset.name 94 | self.segset = segset 95 | self.transform = transform 96 | def __repr__(self): 97 | return "" 98 | def __getitem__(self, i): 99 | inputs = self.segset.read(i, kind="PIL") 100 | if self.transform is not None: 101 | inputs = self.transform(inputs) 102 | return inputs 103 | def __len__(self): 104 | return len(self.segset) 105 | 106 | def main(args): 107 | 108 | print(os.path.basename(__file__), 'arguments:') 109 | print(yaml.dump(vars(args), default_flow_style=False)) 110 | 111 | weights = dd.io.load(args.restore_from) 112 | print('Loaded weights from {}'.format(args.restore_from)) 113 | weights = create_variables(weights, cuda=True) 114 | forward = lambda input: modelpy.DeepLabResNetModel({'data': input}, weights).layers['fc1_voc12'] 115 | train, val, test = load_extended_voc() 116 | input_size = map(int, args.input_size.split(',')) if args.input_size is not None else None 117 | print ('========') 118 | 119 | if args.proximal: 120 | assert args.jaccard 121 | 122 | if args.binary == -1: 123 | print("Multiclass: loss set to cross-entropy") 124 | lossfn, lossname = crossentropyloss, 'xloss' 125 | else: 126 | if args.jaccard: 127 | print("loss set to jaccard hinge") 128 | lossfn, lossname = lovaszloss, 'lovaszloss' 129 | elif args.hinge: 130 | print("loss set to hinge loss") 131 | lossfn, lossname = hingeloss, 'hingeloss' 132 | else: 133 | print("loss set to binary cross-entropy") 134 | lossfn, lossname = binaryXloss, 'binxloss' 135 | train, val = train.binarize(args.binary_str), val.binarize(args.binary_str) 136 | 137 | # get network output size 138 | def get_size(): 139 | dummy_input = torch.rand((1, 3, input_size[0], input_size[1])).cuda() 140 | dummy_out = forward(Variable(dummy_input, volatile=True)) 141 | output_size = (dummy_out.size(2), dummy_out.size(3)) 142 | return output_size 143 | output_size = get_size() 144 | 145 | base_lr = args.learning_rate 146 | groups = training_groups(weights, base_lr, train_last=args.train_last, hybrid=args.hybrid) 147 | optimizer = optim.SGD(groups, lr=base_lr, momentum=args.momentum) 148 | groups_lr = [group['lr'] for group in optimizer.param_groups] 149 | 150 | transforms_train = JointCompose([RandomScale(0.5, 1.5) if args.random_scale else None, 151 | RandomHorizontalFlip() if args.random_mirror else None, 152 | RandomCropPad(input_size, (0, IGNORE_LABEL)), 153 | [None, Scale((output_size[1], output_size[0]), NEAREST)], 154 | PILtoTensor(), 155 | [Normalize(torch.from_numpy(IMG_MEAN)), None], 156 | ]) 157 | transforms_val = JointCompose([PILtoTensor(), 158 | [Normalize(torch.from_numpy(IMG_MEAN)), None], 159 | ]) 160 | invtransf_val = JointCompose([[Normalize(-torch.from_numpy(IMG_MEAN)), None], 161 | TensortoPIL( datasets.utils.color_map() ), 162 | ]) 163 | 164 | if args.sampling == 'sequential': 165 | trainset = SegsetWrap(train, transforms_train) 166 | sampler = data.sampler.SequentialSampler(trainset) 167 | elif args.sampling == 'shuffle': 168 | trainset = SegsetWrap(train, transforms_train) 169 | sampler = data.sampler.RandomSampler(trainset) 170 | elif args.sampling == 'balanced': 171 | trainset = SegsetWrap(train, transforms_train) 172 | positives = np.array([(args.binary_str in ex.classes) for ex in train]) 173 | sample_weights = np.zeros(len(positives)) 174 | sample_weights[positives] = 0.5 / positives.sum() 175 | sample_weights[~positives] = 0.5 / (~positives).sum() 176 | sampler = data.sampler.WeightedRandomSampler(sample_weights, len(train)) 177 | from datasets.balanced_val import balanced 178 | inds = balanced[args.binary_str] 179 | val.examples = [val[i] for i in inds] 180 | print('Subsampled val. to balanced set of {:d} examples'.format(len(val))) 181 | elif args.sampling == 'exclusive': 182 | train, val = train[args.binary_str], val[args.binary_str] 183 | trainset = SegsetWrap(train, transforms_train) 184 | sampler = data.sampler.RandomSampler(trainset) 185 | print('Subsampled train, val. to balanced set of {}, {} examples'.format(len(train), len(val))) 186 | 187 | update_every = args.grad_update_every 188 | global_batch_size = args.batch_size * update_every 189 | 190 | trainloader = data.DataLoader(trainset, 191 | batch_size=global_batch_size, 192 | sampler=sampler, 193 | num_workers=args.threads, 194 | pin_memory=True) 195 | 196 | valset = SegsetWrap(val, transforms_val) 197 | valloader = data.DataLoader(valset, 198 | batch_size=1, 199 | shuffle=False, 200 | num_workers=1, 201 | pin_memory=True) 202 | 203 | step = args.start_step 204 | finished = False 205 | epoch = 0 206 | 207 | from tensorboard import SummaryWriter 208 | logdir = join(args.expname + '_logs', args.nickname) 209 | if os.path.exists(logdir): 210 | if args.delete_previous: 211 | var = 'y' 212 | else: 213 | var = raw_input(logdir + " already exists. Delete (y/n)? ") 214 | if var == 'n': 215 | raise ValueError(logdir + " already exists") 216 | elif var == 'y': 217 | import shutil 218 | shutil.rmtree(logdir) 219 | log_writer = SummaryWriter(logdir) 220 | # train_writer = SummaryWriter(log_train) 221 | 222 | def snapshot(): 223 | dest = join(args.snapshot_dir, '{}-{}-{:02d}.h5'.format(args.expname, args.nickname, step)) 224 | snapshot_variables(weights, dest) 225 | print("[{}] step {:d}: saved weights under {}".format(dt, step, dest)) 226 | 227 | def do_val(): 228 | valiter = iter(valloader) 229 | stats = defaultdict(list) 230 | tosee = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] # export some outputs images of the validation set 231 | for valstep, (inputs, labels) in enumerate(valiter): 232 | start_time = time.time() 233 | inputs, labels = Variable(inputs.cuda(), volatile=True), labels.cuda().long() 234 | logits = forward(inputs) 235 | logits = F.upsample_bilinear(logits, size=labels.size()[1:]) 236 | if args.binary == -1: 237 | xloss = crossentropyloss(logits, labels) 238 | stats['xloss'].append(xloss.data[0]) 239 | print('[Validation {}-{:d}], xloss {:.5f} - mean {:.5f} ({:.3f} sec/step {})'.format( 240 | step, valstep, xloss, np.mean(stats['xloss']), time.time() - start_time)) 241 | # conf, pred = logits.max(1) 242 | else: 243 | conf, multipred = logits.max(1) 244 | multipred = multipred.squeeze(1) 245 | multipred = (multipred == args.binary).long() 246 | imageiou_multi = iouloss(multipred.data.squeeze(0), labels.squeeze(0)) 247 | stats['imageiou_multi'].append(imageiou_multi) 248 | 249 | logits = logits[:, args.binary, :, :] # select only 1 output 250 | pred = (logits > 0.).long() 251 | 252 | # image output 253 | if valstep in tosee: 254 | inputim, inputlab = invtransf_val([inputs.data[0, :, :, :], labels[0, :, :]]) 255 | _, predim = invtransf_val([inputs.data[0, :, :, :], pred.data[0, :, :]]) 256 | log_writer.add_image(str(valstep)+'im', np.array(inputim.convert("RGB"))) 257 | log_writer.add_image(str(valstep)+'lab', np.array(inputlab.convert("RGB"))) 258 | log_writer.add_image(str(valstep)+'pred', np.array(predim.convert("RGB"))) 259 | 260 | imageiou = iouloss(pred.data.squeeze(0), labels.squeeze(0)) 261 | stats['imageiou'].append(imageiou) 262 | hloss = hingeloss(logits, labels).data[0] 263 | stats['hingeloss'].append(hloss) 264 | jloss = lovaszloss(logits, labels).data[0] 265 | stats['lovaszloss'].append(jloss) 266 | binxloss = binaryXloss(logits, labels).data[0] 267 | stats['binxloss'].append(binxloss) 268 | 269 | print( '[Validation {}-{:d}], '.format(step, valstep) 270 | + 'hloss {:.5f} - mean {:.5f}, '.format(hloss, np.mean(stats['hingeloss'])) 271 | + 'lovaszloss {:.5f} - mean {:.5f}, '.format(jloss, np.mean(stats['lovaszloss'])) 272 | + 'iou {:.5f} - mean {:.5f}, '.format(imageiou, np.mean(stats['imageiou'])) 273 | + 'iou_multi {:.5f} - mean {:.5f}, '.format(imageiou_multi, np.mean(stats['imageiou_multi'])) 274 | + '({:.3f} sec/step)'.format(time.time() - start_time) 275 | ) 276 | for key in stats: 277 | log_writer.add_scalar(key + '_val', np.mean(stats[key]), step) 278 | 279 | if not args.no_startval: 280 | do_val() 281 | 282 | num_steps = args.num_steps 283 | if args.epochs: 284 | num_steps *= len(trainloader) 285 | num_steps = int(num_steps) 286 | if args.new_schedule: 287 | half_step = num_steps // 2 288 | 289 | while not finished: # new epoch 290 | trainiter = iter(trainloader) 291 | def train_step(): 292 | if args.new_schedule and step == half_step: 293 | print("==== HALF STEP ====") 294 | for group, group_base in zip(optimizer.param_groups, groups_lr): 295 | if ('fix_lr' not in group) or not group['fix_lr']: 296 | group['lr'] = group_base / 5 297 | 298 | inputs, labels = next(trainiter) 299 | inputs, labels = Variable(inputs.cuda()), labels.cuda().long() 300 | chunk_inp = torch.split(inputs, args.batch_size, dim=0) 301 | chunk_lab = torch.split(labels, args.batch_size, dim=0) 302 | optimizer.zero_grad() 303 | lossacc = 0. 304 | # Start gradient accumulation 305 | for inp, lab in zip(chunk_inp, chunk_lab): 306 | logits = forward(inp) 307 | if args.binary != -1: 308 | logits = logits[:, args.binary, :, :] # select only 1 output 309 | if args.proximal: 310 | debug = {"step": -1, "finished": False} 311 | proxreg = args.proxreg 312 | if args.power_prox > 0: 313 | proxreg = proxreg / (1. - step/(num_steps + 0.1)) ** args.power_prox 314 | if args.new_schedule: 315 | if step >= half_step: 316 | proxreg *= 5. 317 | loss, hook, gam = lossfn(logits, lab, prox=proxreg, max_steps=args.maxproxsteps, debug=debug) 318 | print(str(debug["step"]) + ('' if debug["finished"] else 'E'), end=' ') 319 | else: 320 | loss = lossfn(logits, lab) 321 | loss.backward( torch.Tensor([1. / len(chunk_inp)]).cuda() ) # rescale gradient 322 | if args.proximal: 323 | hook.remove() # remove hook to free memory 324 | lossacc += loss.data[0] / len(chunk_inp) 325 | optimizer.step() 326 | return lossacc 327 | 328 | for substep in range(len(trainloader)): 329 | start_time = time.time() 330 | step += 1 331 | if step > num_steps: 332 | finished = True 333 | break 334 | lossacc = train_step() 335 | 336 | duration = time.time() - start_time 337 | (dt, micro) = datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S.%f').split('.') 338 | dt = "%s.%03d" % (dt, int(micro) / 1000) 339 | print('[{}] step {:d} \t loss = {:.5f} ({:.3f} sec/step, epoch {})'.format( 340 | dt, step, lossacc, duration, epoch)) 341 | 342 | log_writer.add_scalar(lossname, lossacc, step) 343 | 344 | if step % args.save_pred_every == 0: 345 | snapshot() 346 | if step % args.do_val_every == 0: 347 | do_val() 348 | 349 | epoch += 1 350 | # end of main: save weights and do val 351 | snapshot() 352 | do_val() 353 | 354 | 355 | 356 | if __name__ == '__main__': 357 | args = get_arguments(sys.argv[1:], 'train') 358 | main(args) 359 | --------------------------------------------------------------------------------