├── lib ├── __init__.py ├── .DS_Store ├── data │ ├── class_idx.pkl │ ├── prior_factor.npy │ ├── pts_in_hull.npy │ ├── selected_class.npy │ ├── semseg_avg_inv.npy │ ├── camera_mean_and_std.pkl │ ├── semseg_prior_factor.npy │ ├── DejaVuSerifCondensed.ttf │ ├── jigsaw_max_hamming_set.npy │ ├── prior_factor_in_door.npy │ ├── places_class_to_keep.txt │ ├── places_class_names.txt │ └── class_list.txt ├── models │ ├── architectures.py │ ├── base_net.py │ ├── resnet_config.py │ ├── utils.py │ ├── encoder_decoder_segmentation_semantic.py │ ├── gan_discriminators.py │ └── encoder_decoder_segmentation.py ├── general_utils.py ├── losses │ └── all.py └── optimizers │ └── train_steps.py ├── .gitattributes ├── dataset └── README.md ├── .DS_Store ├── demo ├── .DS_Store ├── illus.png ├── tree.png └── precision.png ├── tools ├── .DS_Store ├── init_paths.py ├── task_similarity_tree.m ├── download_model.sh ├── plot_dendrogram.m ├── affinity.py └── utils.py ├── vipa-logo.png ├── explain_result └── sort_gt.npy ├── requirement.txt ├── LICENSE ├── experiments └── final │ ├── class_1000 │ └── config.py │ ├── class_places │ └── config.py │ ├── room_layout │ └── config.py │ ├── segment25d │ └── config.py │ ├── curvature │ └── config.py │ ├── ego_motion │ └── config.py │ ├── fix_pose │ └── config.py │ ├── segmentsemantic │ └── config.py │ ├── segment2d │ └── config.py │ ├── point_match │ └── config.py │ ├── vanishing_point │ └── config.py │ ├── non_fixated_pose │ └── config.py │ ├── denoise │ └── config.py │ ├── inpainting_whole │ └── config.py │ ├── autoencoder │ └── config.py │ ├── edge2d │ └── config.py │ ├── jigsaw │ └── config.py │ ├── reshade │ └── config.py │ ├── rgb2sfnorm │ └── config.py │ ├── keypoint3d │ └── config.py │ ├── colorization │ └── config.py │ ├── rgb2mist │ └── config.py │ ├── keypoint2d │ └── config.py │ ├── edge3d │ └── config.py │ └── rgb2depth │ └── config.py └── README.md /lib/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /dataset/README.md: -------------------------------------------------------------------------------- 1 | ### Dataset 2 | - Taskonomy Tiny 3 | - MS COCO 4 | - Indoor Scene 5 | -------------------------------------------------------------------------------- /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zju-vipa/TransferbilityFromAttributionMaps/HEAD/.DS_Store -------------------------------------------------------------------------------- /demo/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zju-vipa/TransferbilityFromAttributionMaps/HEAD/demo/.DS_Store -------------------------------------------------------------------------------- /demo/illus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zju-vipa/TransferbilityFromAttributionMaps/HEAD/demo/illus.png -------------------------------------------------------------------------------- /demo/tree.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zju-vipa/TransferbilityFromAttributionMaps/HEAD/demo/tree.png -------------------------------------------------------------------------------- /lib/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zju-vipa/TransferbilityFromAttributionMaps/HEAD/lib/.DS_Store -------------------------------------------------------------------------------- /tools/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zju-vipa/TransferbilityFromAttributionMaps/HEAD/tools/.DS_Store -------------------------------------------------------------------------------- /vipa-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zju-vipa/TransferbilityFromAttributionMaps/HEAD/vipa-logo.png -------------------------------------------------------------------------------- /demo/precision.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zju-vipa/TransferbilityFromAttributionMaps/HEAD/demo/precision.png -------------------------------------------------------------------------------- /lib/data/class_idx.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zju-vipa/TransferbilityFromAttributionMaps/HEAD/lib/data/class_idx.pkl -------------------------------------------------------------------------------- /lib/data/prior_factor.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zju-vipa/TransferbilityFromAttributionMaps/HEAD/lib/data/prior_factor.npy -------------------------------------------------------------------------------- /lib/data/pts_in_hull.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zju-vipa/TransferbilityFromAttributionMaps/HEAD/lib/data/pts_in_hull.npy -------------------------------------------------------------------------------- /explain_result/sort_gt.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zju-vipa/TransferbilityFromAttributionMaps/HEAD/explain_result/sort_gt.npy -------------------------------------------------------------------------------- /lib/data/selected_class.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zju-vipa/TransferbilityFromAttributionMaps/HEAD/lib/data/selected_class.npy -------------------------------------------------------------------------------- /lib/data/semseg_avg_inv.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zju-vipa/TransferbilityFromAttributionMaps/HEAD/lib/data/semseg_avg_inv.npy -------------------------------------------------------------------------------- /lib/data/camera_mean_and_std.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zju-vipa/TransferbilityFromAttributionMaps/HEAD/lib/data/camera_mean_and_std.pkl -------------------------------------------------------------------------------- /lib/data/semseg_prior_factor.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zju-vipa/TransferbilityFromAttributionMaps/HEAD/lib/data/semseg_prior_factor.npy -------------------------------------------------------------------------------- /lib/data/DejaVuSerifCondensed.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zju-vipa/TransferbilityFromAttributionMaps/HEAD/lib/data/DejaVuSerifCondensed.ttf -------------------------------------------------------------------------------- /lib/data/jigsaw_max_hamming_set.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zju-vipa/TransferbilityFromAttributionMaps/HEAD/lib/data/jigsaw_max_hamming_set.npy -------------------------------------------------------------------------------- /lib/data/prior_factor_in_door.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zju-vipa/TransferbilityFromAttributionMaps/HEAD/lib/data/prior_factor_in_door.npy -------------------------------------------------------------------------------- /tools/init_paths.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import sys 3 | 4 | cur_dir = osp.dirname( __file__ ) 5 | # Search lib first 6 | lib_path = osp.join( cur_dir, '..', 'lib' ) 7 | sys.path.insert( 0, lib_path ) 8 | 9 | # Then elsewhere 10 | root_path = osp.join( cur_dir, '..' ) 11 | sys.path.insert( 1, root_path ) -------------------------------------------------------------------------------- /tools/task_similarity_tree.m: -------------------------------------------------------------------------------- 1 | clear,clc 2 | 3 | %load('../explain_result/taskonomy/affinity.mat'); 4 | load('~/Downloads/affinity.mat') 5 | affinity_saliency = squeeze(affinity(1, :, :)); 6 | affinity_gradxinput = squeeze(affinity(2, :, :)); 7 | affinity_elrp = squeeze(affinity(3, :, :)); 8 | 9 | affinity_saliency([8,20],:) = []; 10 | affinity_gradxinput([8,20],:) = []; 11 | affinity_elrp([8,20],:) = []; 12 | 13 | task_list = {'Autoencoder', 'Curvature', 'Denoise', 'Edge 2D', 'Edge 3D', ... 14 | 'Keypoint 2D','Keypoint 3D', ... 15 | 'Reshade' ,'Rgb2depth' ,'Rgb2mist','Rgb2sfnorm', ... 16 | 'Room Layout', 'Segment 25D', 'Segment 2D', 'Vanishing Point', ... 17 | 'Segment Semantic' ,'Class 1000' ,'Class Places'}; 18 | 19 | plot_dendrogram(affinity_saliency, task_list); 20 | plot_dendrogram(affinity_gradxinput, task_list); 21 | plot_dendrogram(affinity_elrp, task_list); 22 | -------------------------------------------------------------------------------- /tools/download_model.sh: -------------------------------------------------------------------------------- 1 | ##!/usr/bin/env bash 2 | 3 | CURRDIR=$(pwd) 4 | BASEDIR=$(dirname "$0") 5 | 6 | TASKS="autoencoder \ 7 | class_1000 \ 8 | class_places \ 9 | colorization \ 10 | curvature \ 11 | denoise \ 12 | edge2d \ 13 | edge3d \ 14 | inpainting_whole \ 15 | jigsaw \ 16 | keypoint2d \ 17 | keypoint3d \ 18 | reshade \ 19 | rgb2depth \ 20 | rgb2mist \ 21 | rgb2sfnorm \ 22 | room_layout \ 23 | segment25d \ 24 | segment2d \ 25 | segmentsemantic \ 26 | vanishing_point" 27 | mkdir -p "$CURRDIR/$BASEDIR/../temp" 28 | 29 | SUBFIX="data-00000-of-00001 meta index" 30 | 31 | for t in $TASKS; do 32 | mkdir -p "$CURRDIR/$BASEDIR/../temp/${t}" 33 | for s in $SUBFIX; do 34 | echo "Downloading ${t}'s model.${s}" 35 | wget "https://s3-us-west-2.amazonaws.com/taskonomy-unpacked-oregon/\ 36 | model_log_final/${t}/logs/model.permanent-ckpt.${s}" -P $CURRDIR/$BASEDIR/../temp/${t} 37 | done 38 | done 39 | -------------------------------------------------------------------------------- /requirement.txt: -------------------------------------------------------------------------------- 1 | absl-py==0.1.10 2 | awsebcli==3.12.3 3 | bleach==1.5.0 4 | blessed==1.14.2 5 | boto3==1.5.32 6 | botocore==1.9.0 7 | cement==2.8.2 8 | certifi==2018.1.18 9 | click==6.7 10 | colorama==0.3.7 11 | cycler==0.10.0 12 | decorator==4.2.1 13 | dockerpty==0.4.1 14 | docopt==0.6.2 15 | docutils==0.14 16 | Flask==0.12.2 17 | gunicorn==19.7.1 18 | html5lib==0.9999999 19 | itsdangerous==0.24 20 | Jinja2==2.10 21 | jmespath==0.9.3 22 | Markdown==2.6.11 23 | MarkupSafe==1.0 24 | matplotlib==2.0.2 25 | mock==2.0.0 26 | networkx==2.1 27 | numpy==1.12.1 28 | olefile==0.45.1 29 | pathspec==0.5.5 30 | pbr==3.1.1 31 | Pillow==4.2.1 32 | protobuf==3.4.0 33 | pyparsing==2.2.0 34 | python-dateutil==2.6.1 35 | pytz==2018.3 36 | PyYAML==3.12 37 | requests==2.9.1 38 | s3transfer==0.1.11 39 | scikit-image==0.12.3 40 | scikit-learn==0.19.0 41 | scipy==1.0.0 42 | semantic-version==2.5.0 43 | six==1.11.0 44 | tabulate==0.7.5 45 | tensorflow==1.5.0 46 | tensorflow-tensorboard==1.5.1 47 | termcolor==1.1.0 48 | transforms3d==0.3.1 49 | wcwidth==0.1.7 50 | websocket-client==0.47.0 51 | Werkzeug==0.14.1 52 | 53 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 yxchen 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /lib/models/architectures.py: -------------------------------------------------------------------------------- 1 | ''' architectures.py 2 | 3 | Contains high-level model architectures assembled from smaller parts 4 | ''' 5 | from __future__ import absolute_import, division, print_function 6 | 7 | import argparse 8 | import os 9 | import tensorflow as tf 10 | import tensorflow.contrib.slim as slim 11 | 12 | from models.encoder_decoder import StandardED as EncoderDecoder 13 | from models.encoder_decoder_cgan import EDWithCGAN as EncoderDecoderWithCGAN 14 | from models.encoder_decoder_cgan_softmax import EDWithSoftmaxRegenerationCGAN as EDSoftmaxRegenCGAN 15 | from models.siamese_nets import StandardSiamese as Siamese 16 | from models.cycle_siamese_nets import CycleSiamese as CycleSiamese 17 | from models.basic_feedforward import StandardFeedforward as BasicFF 18 | from models.encoder_decoder_segmentation import SegmentationED as SegmentationEncoderDecoder 19 | from models.encoder_decoder_segmentation_semantic import SemSegED 20 | from models.encoder_decoder_softmax import SoftmaxED as EDSoftmax 21 | 22 | ALLOWABLE_TYPES = [ 23 | BasicFF, 24 | CycleSiamese, 25 | EncoderDecoderWithCGAN, 26 | EncoderDecoder, 27 | EDSoftmax, 28 | EDSoftmaxRegenCGAN, 29 | SegmentationEncoderDecoder, 30 | SemSegED, 31 | Siamese, 32 | 'empty' ] 33 | -------------------------------------------------------------------------------- /tools/plot_dendrogram.m: -------------------------------------------------------------------------------- 1 | function plot_dendrogram(affinity_matrix,task_list_labels) 2 | semantic = {'Class 1000', 'Class Places','Segment Semantic'}; 3 | geometrix = {'Vanishing Point','Room Layout'}; 4 | dimension2 = {'Edge 2D','Keypoint 2D','Colorization','Inpainting Whole','Autoencoder','Segment 2D','Denoise'}; 5 | dimension3 = {'Curvature','Edge 3D','Keypoint 3D','Reshade','Rgb2depth','Rgb2sfnorm','Segment 25D','Rgb2mist'}; 6 | 7 | figure 8 | Z = linkage(affinity_matrix); 9 | D = pdist(affinity_matrix); 10 | leafOrder = optimalleaforder(Z,D); 11 | 12 | for i = 1:length(task_list_labels) 13 | if ismember(task_list_labels{i}, semantic) 14 | task_list_labels{i} = ['\color{magenta} ' task_list_labels{i}]; 15 | elseif ismember(task_list_labels{i}, geometrix) 16 | task_list_labels{i} = ['\color{red} ' task_list_labels{i}]; 17 | elseif ismember(task_list_labels{i}, dimension2) 18 | task_list_labels{i} = ['\color{blue} ' task_list_labels{i}]; 19 | elseif ismember(task_list_labels{i}, dimension3) 20 | task_list_labels{i} = ['\color{green} ' task_list_labels{i}]; 21 | else 22 | task_list_labels{i} = ['\color{black} ' task_list_labels{i}]; 23 | end 24 | end 25 | dendrogram(Z,20,'Orientation','left','ColorThreshold','default','Labels',task_list_labels,'Reorder',leafOrder) 26 | end -------------------------------------------------------------------------------- /lib/models/base_net.py: -------------------------------------------------------------------------------- 1 | ''' 2 | General config.py options that can be used for all models. : 3 | ''' 4 | import tensorflow as tf 5 | import tensorflow.contrib.slim as slim 6 | 7 | from optimizers import ops as optimizers 8 | 9 | class BaseNet(object): 10 | 11 | def __init__(self, global_step, cfg): 12 | self.cfg = cfg 13 | self.decoder_only = False 14 | self.extended_summaries = False 15 | if 'extended_summaries' in cfg: 16 | self.extended_summaries = cfg['extended_summaries'] 17 | 18 | def build_model(self): 19 | raise NotImplementedError( 'build_model not implemented') 20 | 21 | def get_losses(self): 22 | raise NotImplementedError( 'get_losses not implemented') 23 | 24 | def build_train_op( self, global_step ): 25 | if not self.model_built or self.total_loss is None: 26 | raise RuntimeError( "Cannot build optimizers until 'build_model' ({0}) and 'get_losses' {1} are run".format( 27 | self.model_built, self.total_loss is not None ) ) 28 | self.global_step = global_step 29 | self.optimizer = optimizers.build_optimizer( global_step=global_step, cfg=self.cfg ) 30 | self.train_op = slim.learning.create_train_op( self.total_loss, self.optimizer ) 31 | 32 | def train_step(self): 33 | raise NotImplementedError( 'train_step not implemented' ) 34 | 35 | def get_train_step_fn(self): 36 | return slim.learning.train_step -------------------------------------------------------------------------------- /lib/data/places_class_to_keep.txt: -------------------------------------------------------------------------------- 1 | 0 2 | 0 3 | 0 4 | 1 5 | 0 6 | 0 7 | 0 8 | 0 9 | 0 10 | 0 11 | 0 12 | 0 13 | 0 14 | 0 15 | 0 16 | 0 17 | 0 18 | 0 19 | 0 20 | 0 21 | 0 22 | 0 23 | 0 24 | 0 25 | 0 26 | 0 27 | 1 28 | 0 29 | 0 30 | 0 31 | 0 32 | 0 33 | 0 34 | 0 35 | 0 36 | 0 37 | 0 38 | 0 39 | 0 40 | 0 41 | 0 42 | 0 43 | 0 44 | 1 45 | 0 46 | 1 47 | 0 48 | 0 49 | 0 50 | 0 51 | 0 52 | 1 53 | 1 54 | 0 55 | 0 56 | 0 57 | 0 58 | 0 59 | 0 60 | 0 61 | 0 62 | 0 63 | 0 64 | 1 65 | 0 66 | 0 67 | 0 68 | 0 69 | 0 70 | 0 71 | 0 72 | 0 73 | 0 74 | 0 75 | 0 76 | 0 77 | 0 78 | 0 79 | 0 80 | 0 81 | 0 82 | 0 83 | 0 84 | 0 85 | 0 86 | 0 87 | 0 88 | 0 89 | 0 90 | 1 91 | 0 92 | 0 93 | 0 94 | 0 95 | 0 96 | 1 97 | 0 98 | 0 99 | 0 100 | 0 101 | 0 102 | 0 103 | 0 104 | 0 105 | 0 106 | 0 107 | 0 108 | 0 109 | 0 110 | 0 111 | 0 112 | 0 113 | 0 114 | 0 115 | 0 116 | 0 117 | 0 118 | 0 119 | 0 120 | 0 121 | 0 122 | 1 123 | 0 124 | 0 125 | 1 126 | 0 127 | 0 128 | 0 129 | 0 130 | 0 131 | 0 132 | 0 133 | 0 134 | 0 135 | 0 136 | 0 137 | 0 138 | 0 139 | 0 140 | 0 141 | 0 142 | 0 143 | 0 144 | 0 145 | 0 146 | 0 147 | 0 148 | 0 149 | 0 150 | 0 151 | 0 152 | 0 153 | 0 154 | 0 155 | 0 156 | 0 157 | 1 158 | 0 159 | 0 160 | 0 161 | 0 162 | 0 163 | 0 164 | 0 165 | 0 166 | 0 167 | 0 168 | 0 169 | 0 170 | 0 171 | 0 172 | 0 173 | 0 174 | 0 175 | 0 176 | 0 177 | 1 178 | 1 179 | 0 180 | 0 181 | 0 182 | 0 183 | 1 184 | 0 185 | 0 186 | 0 187 | 0 188 | 0 189 | 0 190 | 0 191 | 0 192 | 0 193 | 0 194 | 0 195 | 0 196 | 0 197 | 0 198 | 0 199 | 0 200 | 0 201 | 0 202 | 0 203 | 0 204 | 1 205 | 0 206 | 0 207 | 0 208 | 0 209 | 1 210 | 0 211 | 0 212 | 0 213 | 0 214 | 0 215 | 0 216 | 1 217 | 0 218 | 0 219 | 0 220 | 0 221 | 0 222 | 0 223 | 0 224 | 0 225 | 0 226 | 0 227 | 0 228 | 0 229 | 0 230 | 0 231 | 0 232 | 0 233 | 0 234 | 0 235 | 0 236 | 0 237 | 0 238 | 0 239 | 0 240 | 0 241 | 1 242 | 1 243 | 0 244 | 0 245 | 0 246 | 0 247 | 0 248 | 0 249 | 0 250 | 0 251 | 0 252 | 0 253 | 0 254 | 1 255 | 0 256 | 0 257 | 0 258 | 0 259 | 0 260 | 0 261 | 0 262 | 0 263 | 0 264 | 0 265 | 0 266 | 0 267 | 0 268 | 0 269 | 0 270 | 1 271 | 0 272 | 0 273 | 0 274 | 0 275 | 0 276 | 0 277 | 0 278 | 0 279 | 0 280 | 0 281 | 0 282 | 1 283 | 0 284 | 0 285 | 0 286 | 1 287 | 0 288 | 0 289 | 0 290 | 0 291 | 0 292 | 0 293 | 0 294 | 0 295 | 0 296 | 0 297 | 0 298 | 0 299 | 0 300 | 0 301 | 0 302 | 0 303 | 0 304 | 1 305 | 0 306 | 0 307 | 0 308 | 0 309 | 0 310 | 0 311 | 0 312 | 1 313 | 0 314 | 0 315 | 0 316 | 0 317 | 0 318 | 1 319 | 0 320 | 0 321 | 0 322 | 0 323 | 0 324 | 0 325 | 0 326 | 0 327 | 0 328 | 0 329 | 1 330 | 0 331 | 0 332 | 0 333 | 0 334 | 0 335 | 0 336 | 0 337 | 0 338 | 0 339 | 0 340 | 0 341 | 0 342 | 0 343 | 0 344 | 1 345 | 0 346 | 0 347 | 0 348 | 0 349 | 0 350 | 0 351 | 0 352 | 0 353 | 0 354 | 0 355 | 0 356 | 0 357 | 0 358 | 0 359 | 1 360 | 0 361 | 0 362 | 0 363 | 0 364 | 1 365 | 0 366 | -------------------------------------------------------------------------------- /lib/models/resnet_config.py: -------------------------------------------------------------------------------- 1 | # This is a variable scope aware configuation object for TensorFlow 2 | 3 | import tensorflow as tf 4 | 5 | FLAGS = tf.app.flags.FLAGS 6 | 7 | class Config: 8 | def __init__(self): 9 | root = self.Scope('') 10 | for k, v in FLAGS.__dict__['__flags'].items(): 11 | root[k] = v 12 | self.stack = [ root ] 13 | 14 | def iteritems(self): 15 | return self.to_dict().items() 16 | 17 | def to_dict(self): 18 | self._pop_stale() 19 | out = {} 20 | # Work backwards from the flags to top fo the stack 21 | # overwriting keys that were found earlier. 22 | for i in range(len(self.stack)): 23 | cs = self.stack[-i] 24 | for name in cs: 25 | out[name] = cs[name] 26 | return out 27 | 28 | def _pop_stale(self): 29 | var_scope_name = tf.get_variable_scope().name 30 | top = self.stack[0] 31 | while not top.contains(var_scope_name): 32 | # We aren't in this scope anymore 33 | self.stack.pop(0) 34 | top = self.stack[0] 35 | 36 | def __getitem__(self, name): 37 | self._pop_stale() 38 | # Recursively extract value 39 | for i in range(len(self.stack)): 40 | cs = self.stack[i] 41 | if name in cs: 42 | return cs[name] 43 | 44 | raise KeyError(name) 45 | 46 | def set_default(self, name, value): 47 | if not name in self: 48 | self[name] = value 49 | 50 | def __contains__(self, name): 51 | self._pop_stale() 52 | for i in range(len(self.stack)): 53 | cs = self.stack[i] 54 | if name in cs: 55 | return True 56 | return False 57 | 58 | def __setitem__(self, name, value): 59 | self._pop_stale() 60 | top = self.stack[0] 61 | var_scope_name = tf.get_variable_scope().name 62 | assert top.contains(var_scope_name) 63 | 64 | if top.name != var_scope_name: 65 | top = self.Scope(var_scope_name) 66 | self.stack.insert(0, top) 67 | 68 | top[name] = value 69 | 70 | class Scope(dict): 71 | def __init__(self, name): 72 | self.name = name 73 | 74 | def contains(self, var_scope_name): 75 | return var_scope_name.startswith(self.name) 76 | 77 | 78 | 79 | # Test 80 | if __name__ == '__main__': 81 | 82 | def assert_raises(exception, fn): 83 | try: 84 | fn() 85 | except exception: 86 | pass 87 | else: 88 | assert False, "Expected exception" 89 | 90 | c = Config() 91 | 92 | c['hello'] = 1 93 | assert c['hello'] == 1 94 | 95 | with tf.variable_scope('foo'): 96 | c.set_default("bar", 10) 97 | c['bar'] = 2 98 | assert c['bar'] == 2 99 | assert c['hello'] == 1 100 | 101 | c.set_default("mario", True) 102 | 103 | with tf.variable_scope('meow'): 104 | c['dog'] = 3 105 | assert c['dog'] == 3 106 | assert c['bar'] == 2 107 | assert c['hello'] == 1 108 | 109 | assert c['mario'] == True 110 | 111 | assert_raises(KeyError, lambda: c['dog']) 112 | assert c['bar'] == 2 113 | assert c['hello'] == 1 114 | 115 | 116 | -------------------------------------------------------------------------------- /tools/affinity.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import numpy as np 3 | import os 4 | import init_paths 5 | import argparse 6 | import time 7 | import scipy.io 8 | 9 | parser = argparse.ArgumentParser() 10 | 11 | parser.add_argument('--explain-result-root', dest='explain_result_root', type=str) 12 | parser.set_defaults(explain_result_root='explain_result') 13 | 14 | parser.add_argument('--dataset', dest='dataset', type=str) 15 | parser.set_defaults(dataset='taskonomy') 16 | 17 | parser.add_argument('--imlist-size', dest='imlist_size', type=int) 18 | parser.set_defaults(imlist_size=1000) 19 | 20 | args = parser.parse_args() 21 | 22 | prj_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) 23 | result_root = os.path.join(prj_dir, args.explain_result_root, args.dataset) 24 | explain_methods = {'saliency': 'saliency', 'grad*input': 'gradXinput', 'elrp': 'elrp'} 25 | method_index_mapping = {'saliency': 0, 'grad*input': 1, 'elrp': 2} 26 | 27 | list_of_tasks = 'autoencoder curvature denoise edge2d edge3d \ 28 | keypoint2d keypoint3d colorization \ 29 | reshade rgb2depth rgb2mist rgb2sfnorm \ 30 | room_layout segment25d segment2d vanishing_point \ 31 | segmentsemantic class_1000 class_places inpainting_whole' 32 | task_list = list_of_tasks.split(' ') 33 | 34 | affinity = np.zeros((3, len(task_list), len(task_list)), float) 35 | attribution_all = np.zeros((3, len(task_list), args.imlist_size, 256*256)) 36 | 37 | for method_key, explain_method in explain_methods.items(): 38 | for task_i in range(len(task_list)): 39 | attribution = np.load(os.path.join(result_root, task_list[task_i], explain_method + '.npy')) 40 | attribution = np.mean(attribution, axis=3) 41 | attribution = attribution.reshape(attribution.shape[0], -1) 42 | attribution_all[method_index_mapping[method_key], task_i] = attribution 43 | 44 | for method_key, explain_method in explain_methods.items(): 45 | for target_task_i in range(len(task_list)): 46 | for source_task_i in range(len(task_list)): 47 | if source_task_i == target_task_i: 48 | affinity[method_index_mapping[method_key], target_task_i, source_task_i] = args.imlist_size 49 | continue 50 | if source_task_i > target_task_i: 51 | continue 52 | 53 | target_attribution = attribution_all[method_index_mapping[method_key], target_task_i] 54 | source_attribution = attribution_all[method_index_mapping[method_key], source_task_i] 55 | 56 | affinity_sum = 0 57 | for ind in range(target_attribution.shape[0]): 58 | affinity_sum += np.inner(target_attribution[ind], source_attribution[ind]) / \ 59 | (np.linalg.norm(target_attribution[ind])*np.linalg.norm(source_attribution[ind])) 60 | 61 | affinity[method_index_mapping[method_key], target_task_i, source_task_i] = affinity_sum 62 | affinity[method_index_mapping[method_key], source_task_i, target_task_i] = affinity_sum 63 | 64 | print('Target task {} done.'.format(task_list[target_task_i])) 65 | 66 | np.save(os.path.join(prj_dir, args.explain_result_root, args.dataset, 'affinity.npy'), affinity / args.imlist_size) 67 | scipy.io.savemat(os.path.join(prj_dir, args.explain_result_root, args.dataset, 'affinity.mat'), 68 | {'affinity': affinity / args.imlist_size}) 69 | 70 | for task_i in range(len(task_list)): 71 | print('---------------- For task {} ----------------'.format(task_list[task_i])) 72 | for method, ind in method_index_mapping.items(): 73 | print('-----> Method: {}'.format(method)) 74 | affinity_ = affinity[ind][task_i] 75 | ind_sort = np.argsort(affinity_) 76 | print([task_list[t] for t in ind_sort]) 77 | 78 | -------------------------------------------------------------------------------- /lib/general_utils.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Defines general utility functions 3 | ''' 4 | from models.architectures import ALLOWABLE_TYPES as ALLOWABLE_MODEL_TYPES 5 | 6 | import os 7 | 8 | 9 | ############################# 10 | # Dynamically set variables 11 | ############################# 12 | class RuntimeDeterminedEnviromentVars( object ): 13 | ''' 14 | Example use: 15 | inputs = { 'num_samples_epoch': 100 } 16 | cfg = { 'batch_size': 5, 'epoch_steps': [ '', 'steps_per_epoch' ] } 17 | 18 | for key, value in cfg.items(): 19 | if isinstance( value, list ) and len( value ) == 2 and value[0] == 'LOAD_DYNAMIC': 20 | RuntimeDeterminedEnviromentVars.register( cfg, key, value[1] ) 21 | 22 | RuntimeDeterminedEnviromentVars.load_dynamic_variables( inputs, cfg ) 23 | RuntimeDeterminedEnviromentVars.populate_registered_variables() 24 | print( cfg ) # epoch_steps = 20 25 | ''' 26 | registered_variables = [] 27 | is_loaded = False 28 | # These are initialized in load_dynamic_variables 29 | steps_per_epoch = '' # An int that condains the number of steps the network will take per epoch 30 | 31 | @classmethod 32 | def load_dynamic_variables( cls, inputs, cfg ): 33 | ''' 34 | Args: 35 | inputs: a dict from train.py 36 | cfg: a dict from a config.py 37 | ''' 38 | cls.steps_per_epoch = inputs[ 'num_samples_epoch' ] // cfg[ 'batch_size' ] 39 | cls.is_loaded = True 40 | 41 | @classmethod 42 | def register( cls, dict_containing_field_to_populate, field_name, attr_name ): 43 | cls.registered_variables.append( [dict_containing_field_to_populate, field_name, attr_name] ) 44 | 45 | @classmethod 46 | def register_dict( cls, dict_to_register ): 47 | ''' 48 | Registers any fields in the dict that should be dynamically loaded. 49 | Such fields should have value: [ '', attr_name ] 50 | ''' 51 | for key, value in dict_to_register.items(): 52 | if isinstance( value, list ) and len( value ) == 2 and value[0] == '': 53 | cls.register( dict_to_register, key, value[1] ) 54 | elif isinstance( value, dict ): 55 | cls.register_dict( value ) 56 | 57 | @classmethod 58 | def populate_registered_variables( cls ): 59 | for dict_containing_field_to_populate, field_name, attr_name in cls.registered_variables: 60 | dict_containing_field_to_populate[field_name] = getattr( cls, attr_name ) 61 | print( "\t{0}={1}".format( field_name, getattr( cls, attr_name ) ) ) 62 | 63 | 64 | ########################### 65 | # Utility functions 66 | ########################### 67 | def validate_config( cfg ): 68 | ''' 69 | Catches many general cfg errors. 70 | ''' 71 | if cfg[ 'model_type' ] not in ALLOWABLE_MODEL_TYPES: 72 | raise ValueError( "'model_type' in config.py must be one of {0}".format( ALLOWABLE_MODEL_TYPES )) 73 | if cfg[ 'model_type' ] is not 'empty' and 'optimizer' not in cfg: 74 | raise ValueError( "an 'optimizer' must be specified".format( ALLOWABLE_MODEL_TYPES )) 75 | if 'optimizer' in cfg and 'optimizer_kwargs' not in cfg: 76 | raise ValueError( "The arguments for the optimizer {0} must be given, named, in 'optimizer_kwargs'".format( cfg[ 'optimizer' ] )) 77 | 78 | 79 | def load_config( cfg_dir, nopause=False ): 80 | ''' 81 | Raises: 82 | FileNotFoundError if 'config.py' doesn't exist in cfg_dir 83 | ''' 84 | if not os.path.isfile( os.path.join( cfg_dir, 'config.py' ) ): 85 | raise ImportError( 'config.py not found in {0}'.format( cfg_dir ) ) 86 | import sys 87 | try: 88 | del sys.modules[ 'config' ] 89 | except: 90 | pass 91 | 92 | sys.path.insert( 0, cfg_dir ) 93 | import config as loading_config 94 | # cleanup 95 | # print([ v for v in sys.modules if "config" in v]) 96 | # return 97 | cfg = loading_config.get_cfg( nopause ) 98 | 99 | try: 100 | del sys.modules[ 'config' ] 101 | except: 102 | pass 103 | sys.path.remove(cfg_dir) 104 | 105 | return cfg 106 | 107 | def update_keys(old_dict, key_starts_with, new_dict): 108 | for k, v in new_dict.items(): 109 | if k.startswith(key_starts_with): 110 | old_dict[k] = v 111 | return old_dict -------------------------------------------------------------------------------- /experiments/final/class_1000/config.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import functools 4 | import numpy as np 5 | import os 6 | import sys 7 | import tensorflow as tf 8 | 9 | sys.path.insert( 1, os.path.realpath( '../../models' ) ) 10 | sys.path.insert( 1, os.path.realpath( '../../lib' ) ) 11 | 12 | import data.load_ops as load_ops 13 | from data.load_ops import mask_if_channel_le 14 | from data.task_data_loading import load_and_specify_preprocessors 15 | from general_utils import RuntimeDeterminedEnviromentVars 16 | import models.architectures as architectures 17 | from models.gan_discriminators import pix2pix_discriminator 18 | from models.resnet_v1 import resnet_v1_50_16x16 19 | from models.sample_models import * 20 | from models.utils import leaky_relu 21 | 22 | def get_cfg( nopause=False ): 23 | cfg = {} 24 | cfg['is_discriminative'] = True 25 | # params 26 | cfg['num_epochs'] = 30 27 | cfg['model_path'] = None 28 | 29 | # logging 30 | config_dir = os.path.dirname(os.path.realpath( __file__ )) 31 | task_name = os.path.basename( config_dir ) 32 | 33 | # model 34 | cfg['model_type'] = architectures.BasicFF 35 | cfg['weight_decay'] = 2e-6 36 | cfg['instance_noise_sigma'] = 0.1 37 | cfg['instance_noise_anneal_fn'] = tf.train.inverse_time_decay 38 | cfg['instance_noise_anneal_fn_kwargs'] = { 39 | 'decay_rate': 0.2, 40 | 'decay_steps': 1000 41 | } 42 | 43 | batch_size = 32 44 | cfg['encoder'] = resnet_encoder_fully_convolutional_16x16x8 45 | cfg['hidden_size'] = 1024 46 | cfg['encoder_kwargs'] = { 47 | 'resnet_build_fn' : resnet_v1_50_16x16, 48 | 'weight_decay': cfg['weight_decay'], 49 | 'flatten': True, 50 | 'batch_size': batch_size 51 | } 52 | 53 | cfg['return_accuracy']=True 54 | 55 | # learning 56 | cfg['initial_learning_rate'] = 1e-4 57 | cfg[ 'optimizer' ] = tf.train.AdamOptimizer 58 | cfg[ 'optimizer_kwargs' ] = {} 59 | 60 | 61 | def pwc(initial_lr, **kwargs): 62 | global_step = kwargs['global_step'] 63 | del kwargs['global_step'] 64 | return tf.train.piecewise_constant(global_step, **kwargs) 65 | cfg['learning_rate_schedule'] = pwc 66 | cfg['learning_rate_schedule_kwargs' ] = { 67 | 'boundaries': [np.int64(0), np.int64(80000)], # need to be int64 since global step is... 68 | 'values': [cfg['initial_learning_rate'], cfg['initial_learning_rate']/10] 69 | } 70 | # inputs 71 | cfg['input_dim'] = (256, 256) # (1024, 1024) 72 | cfg['input_num_channels'] = 3 73 | cfg['input_dtype'] = tf.float32 74 | cfg['input_domain_name'] = 'rgb' 75 | cfg['input_preprocessing_fn'] = load_ops.resize_rescale_image 76 | cfg['input_preprocessing_fn_kwargs'] = { 77 | 'new_dims': cfg['input_dim'], 78 | 'new_scale': [-1, 1] 79 | } 80 | cfg['single_filename_to_multiple']=True 81 | 82 | # outputs 83 | cfg['target_dim'] = 1000 # (1024, 1024) 84 | cfg['target_dtype'] = tf.float32 85 | cfg['target_from_filenames'] = load_ops.class_1000_softmax 86 | cfg['mask_by_target_func'] = True 87 | 88 | # Post processing 89 | cfg['metric_net'] = encoder_multilayers_fc_bn 90 | cfg['metric_kwargs'] = { 91 | 'hidden_size': 2048, 92 | 'layer_num': 2, 93 | 'output_size': cfg['target_dim'] 94 | } 95 | 96 | # input pipeline 97 | cfg['preprocess_fn'] = load_and_specify_preprocessors 98 | cfg['randomize'] = False 99 | cfg['num_read_threads'] = 300 100 | cfg['batch_size'] = batch_size 101 | cfg['inputs_queue_capacity'] = 4096 102 | 103 | # Checkpoints and summaries 104 | cfg['summary_save_every_secs'] = 300 105 | cfg['checkpoint_save_every_secs'] = 600 106 | 107 | RuntimeDeterminedEnviromentVars.register_dict( cfg ) # These will be loaded at runtime 108 | print_cfg( cfg, nopause=nopause ) 109 | return cfg 110 | 111 | def print_cfg( cfg, nopause=False ): 112 | print('-------------------------------------------------') 113 | print('config:') 114 | template = '\t{0:30}{1}' 115 | for key in sorted( cfg.keys() ): 116 | print(template.format(key, cfg[key])) 117 | print('-------------------------------------------------') 118 | 119 | if nopause: 120 | return 121 | raw_input('Press Enter to continue...') 122 | print('-------------------------------------------------') 123 | -------------------------------------------------------------------------------- /experiments/final/class_places/config.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import functools 4 | import numpy as np 5 | import os 6 | import sys 7 | import tensorflow as tf 8 | 9 | sys.path.insert( 1, os.path.realpath( '../../models' ) ) 10 | sys.path.insert( 1, os.path.realpath( '../../lib' ) ) 11 | 12 | import data.load_ops as load_ops 13 | from data.load_ops import mask_if_channel_le 14 | from data.task_data_loading import load_and_specify_preprocessors 15 | from general_utils import RuntimeDeterminedEnviromentVars 16 | import models.architectures as architectures 17 | from models.gan_discriminators import pix2pix_discriminator 18 | from models.resnet_v1 import resnet_v1_50_16x16 19 | from models.sample_models import * 20 | from models.utils import leaky_relu 21 | 22 | def get_cfg( nopause=False ): 23 | cfg = {} 24 | cfg['is_discriminative'] = True 25 | # params 26 | cfg['num_epochs'] = 30 27 | cfg['model_path'] = None 28 | 29 | # logging 30 | config_dir = os.path.dirname(os.path.realpath( __file__ )) 31 | task_name = os.path.basename( config_dir ) 32 | 33 | # model 34 | cfg['model_type'] = architectures.BasicFF 35 | cfg['weight_decay'] = 1e-5 36 | cfg['instance_noise_sigma'] = 0.1 37 | cfg['instance_noise_anneal_fn'] = tf.train.inverse_time_decay 38 | cfg['instance_noise_anneal_fn_kwargs'] = { 39 | 'decay_rate': 0.2, 40 | 'decay_steps': 1000 41 | } 42 | 43 | batch_size = 32 44 | cfg['encoder'] = resnet_encoder_fully_convolutional_16x16x8 45 | cfg['hidden_size'] = 1024 46 | cfg['encoder_kwargs'] = { 47 | 'resnet_build_fn' : resnet_v1_50_16x16, 48 | 'weight_decay': cfg['weight_decay'], 49 | 'flatten': True, 50 | 'batch_size': batch_size 51 | } 52 | 53 | cfg['return_accuracy']=True 54 | 55 | # learning 56 | cfg['initial_learning_rate'] = 1e-4 57 | cfg[ 'optimizer' ] = tf.train.AdamOptimizer 58 | cfg[ 'optimizer_kwargs' ] = {} 59 | 60 | 61 | def pwc(initial_lr, **kwargs): 62 | global_step = kwargs['global_step'] 63 | del kwargs['global_step'] 64 | return tf.train.piecewise_constant(global_step, **kwargs) 65 | cfg['learning_rate_schedule'] = pwc 66 | cfg['learning_rate_schedule_kwargs' ] = { 67 | 'boundaries': [np.int64(0), np.int64(80000)], # need to be int64 since global step is... 68 | 'values': [cfg['initial_learning_rate'], cfg['initial_learning_rate']/10] 69 | } 70 | # inputs 71 | cfg['input_dim'] = (256, 256) # (1024, 1024) 72 | cfg['input_num_channels'] = 3 73 | cfg['input_dtype'] = tf.float32 74 | cfg['input_domain_name'] = 'rgb' 75 | cfg['input_preprocessing_fn'] = load_ops.resize_rescale_image 76 | cfg['input_preprocessing_fn_kwargs'] = { 77 | 'new_dims': cfg['input_dim'], 78 | 'new_scale': [-1, 1] 79 | } 80 | cfg['single_filename_to_multiple']=True 81 | 82 | # outputs 83 | cfg['target_dim'] = 63 # (1024, 1024) 84 | cfg['target_dtype'] = tf.float32 85 | cfg['target_from_filenames'] = load_ops.class_places_workspace_and_home 86 | cfg['mask_by_target_func'] = True 87 | 88 | # Post processing 89 | cfg['metric_net'] = encoder_multilayers_fc_bn 90 | cfg['metric_kwargs'] = { 91 | 'hidden_size': 2048, 92 | 'layer_num': 2, 93 | 'output_size': cfg['target_dim'] 94 | } 95 | 96 | # input pipeline 97 | cfg['preprocess_fn'] = load_and_specify_preprocessors 98 | cfg['randomize'] = True 99 | cfg['num_read_threads'] = 300 100 | cfg['batch_size'] = batch_size 101 | cfg['inputs_queue_capacity'] = 4096 102 | 103 | # Checkpoints and summaries 104 | cfg['summary_save_every_secs'] = 300 105 | cfg['checkpoint_save_every_secs'] = 600 106 | RuntimeDeterminedEnviromentVars.register_dict( cfg ) # These will be loaded at runtime 107 | print_cfg( cfg, nopause=nopause ) 108 | return cfg 109 | 110 | def print_cfg( cfg, nopause=False ): 111 | print('-------------------------------------------------') 112 | print('config:') 113 | template = '\t{0:30}{1}' 114 | for key in sorted( cfg.keys() ): 115 | print(template.format(key, cfg[key])) 116 | print('-------------------------------------------------') 117 | 118 | if nopause: 119 | return 120 | raw_input('Press Enter to continue...') 121 | print('-------------------------------------------------') 122 | -------------------------------------------------------------------------------- /experiments/final/room_layout/config.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import functools 4 | import numpy as np 5 | import os 6 | import sys 7 | import tensorflow as tf 8 | 9 | sys.path.insert( 1, os.path.realpath( '../../models' ) ) 10 | sys.path.insert( 1, os.path.realpath( '../../lib' ) ) 11 | 12 | import data.load_ops as load_ops 13 | from data.load_ops import mask_if_channel_le 14 | from data.task_data_loading import load_and_specify_preprocessors 15 | from general_utils import RuntimeDeterminedEnviromentVars 16 | import models.architectures as architectures 17 | from models.gan_discriminators import pix2pix_discriminator 18 | from models.resnet_v1 import resnet_v1_50_16x16 19 | from models.sample_models import * 20 | from models.utils import leaky_relu 21 | 22 | def get_cfg( nopause=False ): 23 | cfg = {} 24 | cfg['is_discriminative'] = True 25 | # params 26 | cfg['num_epochs'] = 30 27 | cfg['model_path'] = None 28 | 29 | # logging 30 | config_dir = os.path.dirname(os.path.realpath( __file__ )) 31 | task_name = os.path.basename( config_dir ) 32 | 33 | # model 34 | cfg['model_type'] = architectures.BasicFF 35 | cfg['weight_decay'] = 2e-6 36 | cfg['instance_noise_sigma'] = 0.1 37 | cfg['instance_noise_anneal_fn'] = tf.train.inverse_time_decay 38 | cfg['instance_noise_anneal_fn_kwargs'] = { 39 | 'decay_rate': 0.2, 40 | 'decay_steps': 1000 41 | } 42 | 43 | batch_size = 32 44 | cfg['encoder'] = resnet_encoder_fully_convolutional_16x16x8 45 | cfg['hidden_size'] = 1024 46 | cfg['encoder_kwargs'] = { 47 | 'resnet_build_fn' : resnet_v1_50_16x16, 48 | 'weight_decay': cfg['weight_decay'], 49 | 'flatten': True, 50 | 'batch_size': batch_size 51 | } 52 | 53 | cfg['return_accuracy']=False 54 | 55 | # learning 56 | cfg['initial_learning_rate'] = 1e-4 57 | cfg[ 'optimizer' ] = tf.train.AdamOptimizer 58 | cfg[ 'optimizer_kwargs' ] = {} 59 | 60 | cfg['clip_norm'] = 1 61 | 62 | def pwc(initial_lr, **kwargs): 63 | global_step = kwargs['global_step'] 64 | del kwargs['global_step'] 65 | return tf.train.piecewise_constant(global_step, **kwargs) 66 | cfg['learning_rate_schedule'] = pwc 67 | cfg['learning_rate_schedule_kwargs' ] = { 68 | 'boundaries': [np.int64(0), np.int64(80000)], # need to be int64 since global step is... 69 | 'values': [cfg['initial_learning_rate'], cfg['initial_learning_rate']/10] 70 | } 71 | # inputs 72 | cfg['input_dim'] = (256, 256) # (1024, 1024) 73 | cfg['input_num_channels'] = 3 74 | cfg['input_dtype'] = tf.float32 75 | cfg['input_domain_name'] = 'rgb' 76 | cfg['input_preprocessing_fn'] = load_ops.resize_rescale_image 77 | cfg['input_preprocessing_fn_kwargs'] = { 78 | 'new_dims': cfg['input_dim'], 79 | 'new_scale': [-1, 1] 80 | } 81 | cfg['single_filename_to_multiple']=True 82 | 83 | # outputs 84 | cfg['target_dim'] = 9 # (1024, 1024) 85 | cfg['target_dtype'] = tf.float32 86 | cfg['target_from_filenames'] = load_ops.room_layout 87 | 88 | # Post processing 89 | cfg['metric_net'] = encoder_multilayers_fc_bn 90 | cfg['metric_kwargs'] = { 91 | 'hidden_size': 2048, 92 | 'layer_num': 2, 93 | 'output_size': cfg['target_dim'] 94 | } 95 | 96 | cfg['l2_loss']=True 97 | cfg['loss_threshold']=1.0 98 | # input pipeline 99 | cfg['preprocess_fn'] = load_and_specify_preprocessors 100 | cfg['randomize'] = False 101 | cfg['num_read_threads'] = 300 102 | cfg['batch_size'] = batch_size 103 | cfg['inputs_queue_capacity'] = 4096 104 | 105 | # Checkpoints and summaries 106 | cfg['summary_save_every_secs'] = 300 107 | cfg['checkpoint_save_every_secs'] = 600 108 | RuntimeDeterminedEnviromentVars.register_dict( cfg ) # These will be loaded at runtime 109 | print_cfg( cfg, nopause=nopause ) 110 | return cfg 111 | 112 | def print_cfg( cfg, nopause=False ): 113 | print('-------------------------------------------------') 114 | print('config:') 115 | template = '\t{0:30}{1}' 116 | for key in sorted( cfg.keys() ): 117 | print(template.format(key, cfg[key])) 118 | print('-------------------------------------------------') 119 | 120 | if nopause: 121 | return 122 | raw_input('Press Enter to continue...') 123 | print('-------------------------------------------------') 124 | -------------------------------------------------------------------------------- /experiments/final/segment25d/config.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import functools 4 | import numpy as np 5 | import os 6 | import sys 7 | import tensorflow as tf 8 | 9 | sys.path.insert( 1, os.path.realpath( '../../models' ) ) 10 | sys.path.insert( 1, os.path.realpath( '../../lib' ) ) 11 | 12 | import data.load_ops as load_ops 13 | from data.load_ops import mask_if_channel_le 14 | from data.task_data_loading import load_and_specify_preprocessors 15 | from general_utils import RuntimeDeterminedEnviromentVars 16 | import models.architectures as architectures 17 | from models.gan_discriminators import pix2pix_discriminator 18 | from models.resnet_v1 import resnet_v1_50_16x16 19 | from models.sample_models import * 20 | from models.utils import leaky_relu 21 | 22 | def get_cfg( nopause=False ): 23 | cfg = {} 24 | 25 | # params 26 | cfg['num_epochs'] = 30 27 | cfg['model_path'] = None 28 | 29 | # logging 30 | config_dir = os.path.dirname(os.path.realpath( __file__ )) 31 | task_name = os.path.basename( config_dir ) 32 | 33 | # model 34 | cfg['model_type'] = architectures.SegmentationEncoderDecoder 35 | cfg['weight_decay'] = 2e-6 36 | cfg['instance_noise_sigma'] = 0.1 37 | cfg['instance_noise_anneal_fn'] = tf.train.inverse_time_decay 38 | cfg['instance_noise_anneal_fn_kwargs'] = { 39 | 'decay_rate': 0.2, 40 | 'decay_steps': 1000 41 | } 42 | 43 | cfg['encoder'] = resnet_encoder_fully_convolutional_16x16x8 44 | cfg['hidden_size'] = 1024 45 | cfg['encoder_kwargs'] = { 46 | 'resnet_build_fn' : resnet_v1_50_16x16, 47 | 'weight_decay': cfg['weight_decay'] 48 | } 49 | 50 | cfg['decoder'] = decoder_fc_15_layer_256_resolution_fully_convolutional_16x16x4 51 | cfg['decoder_kwargs'] = { 52 | 'activation_fn': leaky_relu( 0.2 ), 53 | 'weight_decay': cfg['weight_decay'] 54 | } 55 | 56 | # learning 57 | cfg['initial_learning_rate'] = 1e-4 58 | cfg[ 'optimizer' ] = tf.train.AdamOptimizer 59 | cfg[ 'optimizer_kwargs' ] = {} 60 | def pwc(initial_lr, **kwargs): 61 | global_step = kwargs['global_step'] 62 | del kwargs['global_step'] 63 | return tf.train.piecewise_constant(global_step, **kwargs) 64 | cfg['learning_rate_schedule'] = pwc 65 | cfg['learning_rate_schedule_kwargs' ] = { 66 | 'boundaries': [np.int64(0), np.int64(80000)], # need to be int64 since global step is... 67 | 'values': [cfg['initial_learning_rate'], cfg['initial_learning_rate']/10] 68 | } 69 | # inputs 70 | cfg['input_dim'] = (256, 256) # (1024, 1024) 71 | cfg['input_num_channels'] = 3 72 | cfg['input_dtype'] = tf.float32 73 | cfg['input_domain_name'] = 'rgb' 74 | cfg['input_preprocessing_fn'] = load_ops.resize_rescale_image 75 | cfg['input_preprocessing_fn_kwargs'] = { 76 | 'new_dims': cfg['input_dim'], 77 | 'new_scale': [-1, 1] 78 | } 79 | 80 | # outputs 81 | cfg['output_dim'] = (256,256) 82 | cfg['num_pixels'] = 300 83 | cfg['only_target_discriminative'] = True 84 | cfg['target_num_channels'] = 64 85 | cfg['target_dim'] = (cfg['num_pixels'], 3) # (1024, 1024) 86 | cfg['target_dtype'] = tf.int32 87 | cfg['target_domain_name'] = 'segment25d' 88 | 89 | cfg['target_from_filenames'] = load_ops.segment_pixel_sample 90 | cfg['target_from_filenames_kwargs'] = { 91 | 'new_dims': cfg['output_dim'], 92 | 'num_pixels': cfg['num_pixels'], 93 | 'domain': cfg['target_domain_name'] 94 | } 95 | 96 | cfg['return_accuracy'] = False 97 | 98 | # input pipeline 99 | cfg['preprocess_fn'] = load_and_specify_preprocessors 100 | cfg['randomize'] = False 101 | cfg['num_read_threads'] = 300 102 | cfg['batch_size'] = 32 103 | cfg['inputs_queue_capacity'] = 4096 104 | 105 | # Checkpoints and summaries 106 | cfg['summary_save_every_secs'] = 300 107 | cfg['checkpoint_save_every_secs'] = 600 108 | RuntimeDeterminedEnviromentVars.register_dict( cfg ) # These will be loaded at runtime 109 | print_cfg( cfg, nopause=nopause ) 110 | return cfg 111 | 112 | def print_cfg( cfg, nopause=False ): 113 | print('-------------------------------------------------') 114 | print('config:') 115 | template = '\t{0:30}{1}' 116 | for key in sorted( cfg.keys() ): 117 | print(template.format(key, cfg[key])) 118 | print('-------------------------------------------------') 119 | 120 | if nopause: 121 | return 122 | raw_input('Press Enter to continue...') 123 | print('-------------------------------------------------') 124 | -------------------------------------------------------------------------------- /experiments/final/curvature/config.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import functools 4 | import numpy as np 5 | import os 6 | import sys 7 | import tensorflow as tf 8 | 9 | sys.path.insert( 1, os.path.realpath( '../../models' ) ) 10 | sys.path.insert( 1, os.path.realpath( '../../lib' ) ) 11 | 12 | import data.load_ops as load_ops 13 | from data.load_ops import mask_if_channel_le 14 | from data.task_data_loading import load_and_specify_preprocessors 15 | from general_utils import RuntimeDeterminedEnviromentVars 16 | import models.architectures as architectures 17 | from models.gan_discriminators import pix2pix_discriminator 18 | from models.resnet_v1 import resnet_v1_50_16x16 19 | from models.sample_models import * 20 | from models.utils import leaky_relu 21 | 22 | def get_cfg( nopause=False ): 23 | cfg = {} 24 | 25 | # params 26 | cfg['num_epochs'] = 30 27 | cfg['model_path'] = None 28 | 29 | # logging 30 | config_dir = os.path.dirname(os.path.realpath( __file__ )) 31 | task_name = os.path.basename( config_dir ) 32 | 33 | # model 34 | cfg['model_type'] = architectures.EncoderDecoderWithCGAN 35 | cfg['n_g_steps_before_d'] = 1 36 | cfg['n_d_steps_after_g'] = 1 37 | cfg['init_g_steps'] = 25000 38 | cfg['l_norm_weight_prop'] = 0.996 39 | cfg['weight_decay'] = 1e-6 # 1e-7, 1 40 | cfg['instance_noise_sigma'] = 0.1 41 | cfg['instance_noise_anneal_fn'] = tf.train.inverse_time_decay 42 | cfg['instance_noise_anneal_fn_kwargs'] = { 43 | 'decay_rate': 0.2, 44 | 'decay_steps': 1000 45 | } 46 | 47 | cfg['encoder'] = resnet_encoder_fully_convolutional_16x16x8 48 | cfg['hidden_size'] = 1024 49 | cfg['encoder_kwargs'] = { 50 | 'resnet_build_fn' : resnet_v1_50_16x16, 51 | 'weight_decay': cfg['weight_decay'] 52 | } 53 | cfg['decoder'] = decoder_fc_15_layer_256_resolution_fully_convolutional_16x16x4 54 | cfg['decoder_kwargs'] = { 55 | 'activation_fn': leaky_relu( 0.2 ), 56 | 'weight_decay': cfg['weight_decay'] 57 | } 58 | cfg['discriminator'] = pix2pix_discriminator 59 | cfg['discriminator_kwargs'] = { 60 | 'n_layers': 5, 61 | 'stride': 4, 62 | 'n_channel_multiplier': 64, 63 | 'weight_decay': 10.*cfg['weight_decay'] 64 | } 65 | 66 | # loss 67 | cfg['gan_loss_kwargs'] = { 68 | 'real_label': 0.9, # Positive labels 1 -> 0.9 69 | 'fake_label': 0.0 70 | } 71 | 72 | # learning 73 | cfg['initial_learning_rate'] = 1e-4 74 | cfg[ 'optimizer' ] = tf.train.AdamOptimizer 75 | cfg[ 'optimizer_kwargs' ] = {} 76 | cfg[ 'discriminator_learning_args' ] = { 77 | 'initial_learning_rate':1e-5, 78 | 'optimizer': tf.train.GradientDescentOptimizer, 79 | 'optimizer_kwargs': {} 80 | } 81 | 82 | # inputs 83 | cfg['input_dim'] = (256, 256) # (1024, 1024) 84 | cfg['input_num_channels'] = 3 85 | cfg['input_dtype'] = tf.float32 86 | cfg['input_domain_name'] = 'rgb' 87 | cfg['input_preprocessing_fn'] = load_ops.resize_rescale_image 88 | cfg['input_preprocessing_fn_kwargs'] = { 89 | 'new_dims': cfg['input_dim'], 90 | 'new_scale': [-1, 1] 91 | } 92 | 93 | # outputs 94 | cfg['target_num_channels'] = 2 95 | cfg['target_dim'] = (256, 256) # (1024, 1024) 96 | cfg['target_dtype'] = tf.float32 97 | cfg['target_domain_name'] = 'curvature' 98 | cfg['target_preprocessing_fn'] = load_ops.curvature_preprocess 99 | cfg['target_preprocessing_fn_kwargs'] = { 100 | 'new_dims': cfg['target_dim'] 101 | } 102 | 103 | # masks 104 | cfg['depth_mask'] = True 105 | cfg['l2_loss'] = True 106 | 107 | # input pipeline 108 | cfg['preprocess_fn'] = load_and_specify_preprocessors 109 | cfg['randomize'] = False 110 | cfg['num_read_threads'] = 300 111 | cfg['batch_size'] = 32 112 | cfg['inputs_queue_capacity'] = 4096 113 | 114 | # Checkpoints and summaries 115 | cfg['summary_save_every_secs'] = 300 116 | cfg['checkpoint_save_every_secs'] = 600 117 | RuntimeDeterminedEnviromentVars.register_dict( cfg ) # These will be loaded at runtime 118 | print_cfg( cfg, nopause=nopause ) 119 | return cfg 120 | 121 | def print_cfg( cfg, nopause=False ): 122 | print('-------------------------------------------------') 123 | print('config:') 124 | template = '\t{0:30}{1}' 125 | for key in sorted( cfg.keys() ): 126 | print(template.format(key, cfg[key])) 127 | print('-------------------------------------------------') 128 | 129 | if nopause: 130 | return 131 | raw_input('Press Enter to continue...') 132 | print('-------------------------------------------------') 133 | -------------------------------------------------------------------------------- /experiments/final/ego_motion/config.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import functools 4 | import numpy as np 5 | import os 6 | import sys 7 | import tensorflow as tf 8 | 9 | sys.path.insert( 1, os.path.realpath( '../../models' ) ) 10 | sys.path.insert( 1, os.path.realpath( '../../lib' ) ) 11 | 12 | import data.load_ops as load_ops 13 | from data.load_ops import mask_if_channel_le 14 | from data.task_data_loading import load_and_specify_preprocessors, load_and_specify_preprocessors_for_single_filename_to_imgs 15 | 16 | from general_utils import RuntimeDeterminedEnviromentVars 17 | import models.architectures as architectures 18 | from models.gan_discriminators import pix2pix_discriminator 19 | from models.resnet_v1 import resnet_v1_50_16x16 20 | from models.sample_models import * 21 | from models.utils import leaky_relu 22 | 23 | def get_cfg( nopause=False ): 24 | cfg = {} 25 | cfg['is_discriminative'] = True 26 | # params 27 | cfg['num_epochs'] = 30 28 | cfg['model_path'] = None 29 | 30 | # logging 31 | config_dir = os.path.dirname(os.path.realpath( __file__ )) 32 | task_name = os.path.basename( config_dir ) 33 | 34 | # model 35 | cfg['model_type'] = architectures.CycleSiamese 36 | cfg['weight_decay'] = 2e-6 37 | cfg['instance_noise_sigma'] = 0.1 38 | cfg['instance_noise_anneal_fn'] = tf.train.inverse_time_decay 39 | cfg['instance_noise_anneal_fn_kwargs'] = { 40 | 'decay_rate': 0.2, 41 | 'decay_steps': 1000 42 | } 43 | 44 | batch_size = 8 45 | cfg['encoder'] = resnet_encoder_fully_convolutional_16x16x8 46 | cfg['hidden_size'] = 1024 47 | cfg['encoder_kwargs'] = { 48 | 'resnet_build_fn' : resnet_v1_50_16x16, 49 | 'weight_decay': cfg['weight_decay'], 50 | 'flatten': True, 51 | 'batch_size': batch_size 52 | 53 | } 54 | 55 | cfg['return_accuracy']=False 56 | 57 | # learning 58 | cfg['initial_learning_rate'] = 1e-4 59 | cfg[ 'optimizer' ] = tf.train.AdamOptimizer 60 | cfg[ 'optimizer_kwargs' ] = {} 61 | cfg[ 'discriminator_learning_args' ] = { 62 | 'initial_learning_rate':1e-5, 63 | 'optimizer': tf.train.GradientDescentOptimizer, 64 | 'optimizer_kwargs': {} 65 | } 66 | def pwc(initial_lr, **kwargs): 67 | global_step = kwargs['global_step'] 68 | del kwargs['global_step'] 69 | return tf.train.piecewise_constant(global_step, **kwargs) 70 | cfg['learning_rate_schedule'] = pwc 71 | cfg['learning_rate_schedule_kwargs' ] = { 72 | 'boundaries': [np.int64(0), np.int64(80000)], # need to be int64 since global step is... 73 | 'values': [cfg['initial_learning_rate'], cfg['initial_learning_rate']/10] 74 | } 75 | #cfg['clip_norm'] = 1 76 | 77 | # inputs 78 | cfg['input_dim'] = (256, 256) # (1024, 1024) 79 | cfg['input_num_channels'] = 3 80 | cfg['input_dtype'] = tf.float32 81 | cfg['num_input'] = 3 82 | cfg['input_domain_name'] = 'rgb' 83 | cfg['input_preprocessing_fn'] = load_ops.resize_rescale_image 84 | cfg['input_preprocessing_fn_kwargs'] = { 85 | 'new_dims': cfg['input_dim'], 86 | 'new_scale': [-1, 1] 87 | } 88 | cfg['single_filename_to_multiple']=True 89 | 90 | # outputs 91 | cfg['target_dim'] = 18 # (1024, 1024) 92 | cfg['target_dtype'] = tf.float32 93 | cfg['target_from_filenames'] = load_ops.triplet_fixated_egomotion 94 | 95 | # Post processing 96 | cfg['metric_net'] = encoder_multilayers_fc 97 | cfg['metric_kwargs'] = { 98 | 'hidden_size': 1024, 99 | 'layer_num': 3, 100 | 'output_size': 6 101 | } 102 | cfg['l2_loss']=True 103 | 104 | # input pipeline 105 | cfg['preprocess_fn'] = load_and_specify_preprocessors_for_single_filename_to_imgs 106 | cfg['randomize'] = False 107 | cfg['num_read_threads'] = 30 108 | cfg['batch_size'] = 8 109 | cfg['inputs_queue_capacity'] = 4096 110 | 111 | # Checkpoints and summaries 112 | cfg['summary_save_every_secs'] = 0 113 | cfg['checkpoint_save_every_secs'] = 600 114 | RuntimeDeterminedEnviromentVars.register_dict( cfg ) # These will be loaded at runtime 115 | print_cfg( cfg, nopause=nopause ) 116 | return cfg 117 | 118 | def print_cfg( cfg, nopause=False ): 119 | print('-------------------------------------------------') 120 | print('config:') 121 | template = '\t{0:30}{1}' 122 | for key in sorted( cfg.keys() ): 123 | print(template.format(key, cfg[key])) 124 | print('-------------------------------------------------') 125 | 126 | if nopause: 127 | return 128 | raw_input('Press Enter to continue...') 129 | print('-------------------------------------------------') 130 | -------------------------------------------------------------------------------- /experiments/final/fix_pose/config.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import functools 4 | import numpy as np 5 | import os 6 | import sys 7 | import tensorflow as tf 8 | 9 | sys.path.insert( 1, os.path.realpath( '../../models' ) ) 10 | sys.path.insert( 1, os.path.realpath( '../../lib' ) ) 11 | 12 | import data.load_ops as load_ops 13 | from data.load_ops import mask_if_channel_le 14 | from data.task_data_loading import load_and_specify_preprocessors, load_and_specify_preprocessors_for_single_filename_to_imgs 15 | from general_utils import RuntimeDeterminedEnviromentVars 16 | import models.architectures as architectures 17 | from models.gan_discriminators import pix2pix_discriminator 18 | from models.resnet_v1 import resnet_v1_50_16x16 19 | from models.sample_models import * 20 | from models.utils import leaky_relu 21 | 22 | def get_cfg( nopause=False ): 23 | cfg = {} 24 | cfg['is_discriminative'] = True 25 | # params 26 | cfg['num_epochs'] = 30 27 | cfg['model_path'] = None 28 | 29 | # logging 30 | config_dir = os.path.dirname(os.path.realpath( __file__ )) 31 | task_name = os.path.basename( config_dir ) 32 | 33 | # model 34 | cfg['model_type'] = architectures.Siamese 35 | cfg['weight_decay'] = 2e-6 36 | cfg['instance_noise_sigma'] = 0.1 37 | cfg['instance_noise_anneal_fn'] = tf.train.inverse_time_decay 38 | cfg['instance_noise_anneal_fn_kwargs'] = { 39 | 'decay_rate': 0.2, 40 | 'decay_steps': 1000 41 | } 42 | 43 | batch_size = 16 44 | cfg['encoder'] = resnet_encoder_fully_convolutional_16x16x8 45 | cfg['hidden_size'] = 1024 46 | cfg['encoder_kwargs'] = { 47 | 'resnet_build_fn' : resnet_v1_50_16x16, 48 | 'weight_decay': cfg['weight_decay'], 49 | 'flatten': True, 50 | 'batch_size': batch_size 51 | } 52 | 53 | cfg['return_accuracy']=False 54 | 55 | # learning 56 | cfg['initial_learning_rate'] = 1e-4 57 | cfg[ 'optimizer' ] = tf.train.AdamOptimizer 58 | cfg[ 'optimizer_kwargs' ] = {} 59 | def pwc(initial_lr, **kwargs): 60 | global_step = kwargs['global_step'] 61 | del kwargs['global_step'] 62 | return tf.train.piecewise_constant(global_step, **kwargs) 63 | cfg['learning_rate_schedule'] = pwc 64 | cfg['learning_rate_schedule_kwargs' ] = { 65 | 'boundaries': [np.int64(0), np.int64(80000)], # need to be int64 since global step is... 66 | 'values': [cfg['initial_learning_rate'], cfg['initial_learning_rate']/10] 67 | } 68 | #cfg['clip_norm'] = 1 69 | 70 | 71 | # inputs 72 | cfg['input_dim'] = (256, 256) # (1024, 1024) 73 | cfg['input_num_channels'] = 3 74 | cfg['input_dtype'] = tf.float32 75 | cfg['num_input'] = 2 76 | cfg['input_domain_name'] = 'rgb' 77 | cfg['input_preprocessing_fn'] = load_ops.resize_rescale_image 78 | cfg['input_preprocessing_fn_kwargs'] = { 79 | 'new_dims': cfg['input_dim'], 80 | 'new_scale': [-1, 1] 81 | } 82 | cfg['single_filename_to_multiple']=True 83 | 84 | # outputs 85 | cfg['target_dim'] = 6 # (1024, 1024) 86 | cfg['target_dtype'] = tf.float64 87 | cfg['target_from_filenames'] = load_ops.fixated_camera_pose 88 | 89 | # Post processing 90 | cfg['metric_net'] = encoder_multilayers_fc_bn 91 | cfg['metric_kwargs'] = { 92 | 'hidden_size': 2048, 93 | 'layer_num': 2, 94 | 'output_size': cfg['target_dim'], 95 | 'batch_norm_decay' : 0.9, 96 | 'batch_norm_epsilon' : 1e-5, 97 | 'batch_norm_scale' : True, 98 | 'batch_norm_center' : True 99 | } 100 | cfg['l2_loss']=True 101 | cfg['loss_threshold']=1.0 102 | 103 | # input pipeline 104 | cfg['preprocess_fn'] = load_and_specify_preprocessors_for_single_filename_to_imgs 105 | cfg['randomize'] = False 106 | cfg['num_read_threads'] = 100 107 | cfg['batch_size'] = batch_size 108 | cfg['inputs_queue_capacity'] = 4096 109 | 110 | # Checkpoints and summaries 111 | cfg['summary_save_every_secs'] = 300 112 | cfg['checkpoint_save_every_secs'] = 600 113 | RuntimeDeterminedEnviromentVars.register_dict( cfg ) # These will be loaded at runtime 114 | print_cfg( cfg, nopause=nopause ) 115 | return cfg 116 | 117 | def print_cfg( cfg, nopause=False ): 118 | print('-------------------------------------------------') 119 | print('config:') 120 | template = '\t{0:30}{1}' 121 | for key in sorted( cfg.keys() ): 122 | print(template.format(key, cfg[key])) 123 | print('-------------------------------------------------') 124 | 125 | if nopause: 126 | return 127 | raw_input('Press Enter to continue...') 128 | print('-------------------------------------------------') 129 | -------------------------------------------------------------------------------- /experiments/final/segmentsemantic/config.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import functools 4 | import numpy as np 5 | import os 6 | import sys 7 | import tensorflow as tf 8 | 9 | sys.path.insert( 1, os.path.realpath( '../../models' ) ) 10 | sys.path.insert( 1, os.path.realpath( '../../lib' ) ) 11 | 12 | import data.load_ops as load_ops 13 | from data.load_ops import mask_if_channel_le 14 | from data.task_data_loading import load_and_specify_preprocessors 15 | from general_utils import RuntimeDeterminedEnviromentVars 16 | import models.architectures as architectures 17 | from models.gan_discriminators import pix2pix_discriminator 18 | from models.resnet_v1 import resnet_v1_50_16x16 19 | from models.sample_models import * 20 | from models.utils import leaky_relu 21 | 22 | def get_cfg( nopause=False ): 23 | cfg = {} 24 | 25 | # params 26 | cfg['num_epochs'] = 30 27 | cfg['model_path'] = None 28 | 29 | # logging 30 | config_dir = os.path.dirname(os.path.realpath( __file__ )) 31 | task_name = os.path.basename( config_dir ) 32 | 33 | # model 34 | cfg['model_type'] = architectures.EDSoftmax 35 | cfg['weight_decay'] = 2e-6 36 | cfg['instance_noise_sigma'] = 0.1 37 | cfg['instance_noise_anneal_fn'] = tf.train.inverse_time_decay 38 | cfg['instance_noise_anneal_fn_kwargs'] = { 39 | 'decay_rate': 0.2, 40 | 'decay_steps': 1000 41 | } 42 | 43 | cfg['encoder'] = resnet_encoder_fully_convolutional_16x16x8 44 | cfg['hidden_size'] = 1024 45 | cfg['encoder_kwargs'] = { 46 | 'resnet_build_fn' : resnet_v1_50_16x16, 47 | 'weight_decay': cfg['weight_decay'] 48 | } 49 | cfg['decoder'] = decoder_fc_15_layer_256_resolution_fully_convolutional_16x16x4 50 | cfg['decoder_kwargs'] = { 51 | 'activation_fn': leaky_relu( 0.2 ), 52 | 'weight_decay': cfg['weight_decay'] 53 | } 54 | 55 | # learning 56 | cfg['initial_learning_rate'] = 1e-4 57 | cfg[ 'optimizer' ] = tf.train.AdamOptimizer 58 | cfg[ 'optimizer_kwargs' ] = {} 59 | cfg[ 'discriminator_learning_args' ] = { 60 | 'initial_learning_rate':1e-5, 61 | 'optimizer': tf.train.GradientDescentOptimizer, 62 | 'optimizer_kwargs': {} 63 | } 64 | def pwc(initial_lr, **kwargs): 65 | global_step = kwargs['global_step'] 66 | del kwargs['global_step'] 67 | return tf.train.piecewise_constant(global_step, **kwargs) 68 | cfg['learning_rate_schedule'] = pwc 69 | cfg['learning_rate_schedule_kwargs' ] = { 70 | 'boundaries': [np.int64(0), np.int64(80000)], # need to be int64 since global step is... 71 | 'values': [cfg['initial_learning_rate'], cfg['initial_learning_rate']/10] 72 | } 73 | # inputs 74 | cfg['input_dim'] = (256, 256) # (1024, 1024) 75 | cfg['input_num_channels'] = 3 76 | cfg['input_dtype'] = tf.float32 77 | cfg['input_domain_name'] = 'rgb' 78 | cfg['input_preprocessing_fn'] = load_ops.resize_rescale_image 79 | cfg['input_preprocessing_fn_kwargs'] = { 80 | 'new_dims': cfg['input_dim'], 81 | 'new_scale': [-1, 1] 82 | } 83 | 84 | # outputs 85 | cfg['only_target_discriminative'] = True 86 | cfg['target_domain_name'] = 'segmentsemantic' 87 | cfg['return_accuracy'] = True 88 | cfg['target_from_filenames'] = load_ops.semantic_segment_rebalanced 89 | 90 | # outputs 91 | cfg['target_num_channels'] = 17 92 | cfg['target_dim'] = (256, 256) # (1024, 1024) 93 | cfg['target_dtype'] = tf.int64 94 | cfg['target_from_filenames_kwargs'] = { 95 | 'new_dims': (256,256), 96 | 'domain' : 'segmentsemantic' 97 | } 98 | cfg['mask_by_target_func'] = True 99 | 100 | # masks 101 | 102 | # input pipeline 103 | cfg['preprocess_fn'] = load_and_specify_preprocessors 104 | cfg['randomize'] = False 105 | cfg['num_read_threads'] = 300 106 | cfg['batch_size'] = 32 107 | cfg['inputs_queue_capacity'] = 4096 108 | 109 | # Checkpoints and summaries 110 | cfg['summary_save_every_secs'] = 300 111 | cfg['checkpoint_save_every_secs'] = 600 112 | RuntimeDeterminedEnviromentVars.register_dict( cfg ) # These will be loaded at runtime 113 | print_cfg( cfg, nopause=nopause ) 114 | return cfg 115 | 116 | def print_cfg( cfg, nopause=False ): 117 | print('-------------------------------------------------') 118 | print('config:') 119 | template = '\t{0:30}{1}' 120 | for key in sorted( cfg.keys() ): 121 | print(template.format(key, cfg[key])) 122 | print('-------------------------------------------------') 123 | 124 | if nopause: 125 | return 126 | raw_input('Press Enter to continue...') 127 | print('-------------------------------------------------') 128 | -------------------------------------------------------------------------------- /experiments/final/segment2d/config.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import functools 4 | import numpy as np 5 | import os 6 | import sys 7 | import tensorflow as tf 8 | 9 | sys.path.insert( 1, os.path.realpath( '../../models' ) ) 10 | sys.path.insert( 1, os.path.realpath( '../../lib' ) ) 11 | 12 | import data.load_ops as load_ops 13 | from data.load_ops import mask_if_channel_le 14 | from data.task_data_loading import load_and_specify_preprocessors 15 | from general_utils import RuntimeDeterminedEnviromentVars 16 | import models.architectures as architectures 17 | from models.gan_discriminators import pix2pix_discriminator 18 | from models.resnet_v1 import resnet_v1_50_16x16 19 | from models.sample_models import * 20 | from models.utils import leaky_relu 21 | 22 | def get_cfg( nopause=False ): 23 | cfg = {} 24 | 25 | # params 26 | cfg['num_epochs'] = 30 27 | cfg['model_path'] = None 28 | 29 | # logging 30 | config_dir = os.path.dirname(os.path.realpath( __file__ )) 31 | task_name = os.path.basename( config_dir ) 32 | 33 | # model 34 | cfg['model_type'] = architectures.SegmentationEncoderDecoder 35 | cfg['weight_decay'] = 2e-6 36 | cfg['instance_noise_sigma'] = 0.1 37 | cfg['instance_noise_anneal_fn'] = tf.train.inverse_time_decay 38 | cfg['instance_noise_anneal_fn_kwargs'] = { 39 | 'decay_rate': 0.2, 40 | 'decay_steps': 1000 41 | } 42 | 43 | cfg['encoder'] = resnet_encoder_fully_convolutional_16x16x8 44 | cfg['hidden_size'] = 1024 45 | cfg['encoder_kwargs'] = { 46 | 'resnet_build_fn' : resnet_v1_50_16x16, 47 | 'weight_decay': cfg['weight_decay'] 48 | } 49 | cfg['decoder'] = decoder_fc_15_layer_256_resolution_fully_convolutional_16x16x4 50 | cfg['decoder_kwargs'] = { 51 | 'activation_fn': leaky_relu( 0.2 ), 52 | 'weight_decay': cfg['weight_decay'] 53 | } 54 | 55 | # learning 56 | cfg['initial_learning_rate'] = 1e-4 57 | cfg[ 'optimizer' ] = tf.train.AdamOptimizer 58 | cfg[ 'optimizer_kwargs' ] = {} 59 | cfg[ 'discriminator_learning_args' ] = { 60 | 'initial_learning_rate':1e-5, 61 | 'optimizer': tf.train.GradientDescentOptimizer, 62 | 'optimizer_kwargs': {} 63 | } 64 | def pwc(initial_lr, **kwargs): 65 | global_step = kwargs['global_step'] 66 | del kwargs['global_step'] 67 | return tf.train.piecewise_constant(global_step, **kwargs) 68 | cfg['learning_rate_schedule'] = pwc 69 | cfg['learning_rate_schedule_kwargs' ] = { 70 | 'boundaries': [np.int64(0), np.int64(80000)], # need to be int64 since global step is... 71 | 'values': [cfg['initial_learning_rate'], cfg['initial_learning_rate']/10] 72 | } 73 | # inputs 74 | cfg['input_dim'] = (256, 256) # (1024, 1024) 75 | cfg['input_num_channels'] = 3 76 | cfg['input_dtype'] = tf.float32 77 | cfg['input_domain_name'] = 'rgb' 78 | cfg['input_preprocessing_fn'] = load_ops.resize_rescale_image 79 | cfg['input_preprocessing_fn_kwargs'] = { 80 | 'new_dims': cfg['input_dim'], 81 | 'new_scale': [-1, 1] 82 | } 83 | 84 | # outputs 85 | cfg['num_pixels'] = 300 86 | cfg['only_target_discriminative'] = True 87 | cfg['target_num_channels'] = 64 88 | cfg['target_dim'] = (cfg['num_pixels'], 3) # (1024, 1024) 89 | cfg['target_dtype'] = tf.int32 90 | cfg['target_domain_name'] = 'segment2d' 91 | 92 | cfg['target_from_filenames'] = load_ops.segment_pixel_sample 93 | cfg['target_from_filenames_kwargs'] = { 94 | 'new_dims': (256,256), 95 | 'num_pixels': cfg['num_pixels'], 96 | 'domain': cfg['target_domain_name'] 97 | } 98 | 99 | cfg['return_accuracy'] = False 100 | 101 | # input pipeline 102 | cfg['preprocess_fn'] = load_and_specify_preprocessors 103 | cfg['randomize'] = False 104 | cfg['num_read_threads'] = 300 105 | cfg['batch_size'] = 32 106 | cfg['inputs_queue_capacity'] = 4096 107 | 108 | # Checkpoints and summaries 109 | cfg['summary_save_every_secs'] = 300 110 | cfg['checkpoint_save_every_secs'] = 600 111 | RuntimeDeterminedEnviromentVars.register_dict( cfg ) # These will be loaded at runtime 112 | print_cfg( cfg, nopause=nopause ) 113 | return cfg 114 | 115 | def print_cfg( cfg, nopause=False ): 116 | print('-------------------------------------------------') 117 | print('config:') 118 | template = '\t{0:30}{1}' 119 | for key in sorted( cfg.keys() ): 120 | print(template.format(key, cfg[key])) 121 | print('-------------------------------------------------') 122 | 123 | if nopause: 124 | return 125 | raw_input('Press Enter to continue...') 126 | print('-------------------------------------------------') 127 | -------------------------------------------------------------------------------- /experiments/final/point_match/config.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import functools 4 | import numpy as np 5 | import os 6 | import sys 7 | import tensorflow as tf 8 | 9 | sys.path.insert( 1, os.path.realpath( '../../models' ) ) 10 | sys.path.insert( 1, os.path.realpath( '../../lib' ) ) 11 | 12 | import data.load_ops as load_ops 13 | from data.load_ops import mask_if_channel_le 14 | from data.task_data_loading import load_and_specify_preprocessors, load_and_specify_preprocessors_for_single_filename_to_imgs 15 | from general_utils import RuntimeDeterminedEnviromentVars 16 | import models.architectures as architectures 17 | from models.gan_discriminators import pix2pix_discriminator 18 | from models.resnet_v1 import resnet_v1_50_16x16 19 | from models.sample_models import * 20 | from models.utils import leaky_relu 21 | 22 | def get_cfg( nopause=False ): 23 | cfg = {} 24 | cfg['is_discriminative'] = True 25 | # params 26 | cfg['num_epochs'] = 30 27 | cfg['model_path'] = None 28 | 29 | # logging 30 | config_dir = os.path.dirname(os.path.realpath( __file__ )) 31 | task_name = os.path.basename( config_dir ) 32 | 33 | # model 34 | cfg['model_type'] = architectures.Siamese 35 | cfg['weight_decay'] = 2e-6 36 | cfg['instance_noise_sigma'] = 0.1 37 | cfg['instance_noise_anneal_fn'] = tf.train.inverse_time_decay 38 | cfg['instance_noise_anneal_fn_kwargs'] = { 39 | 'decay_rate': 0.2, 40 | 'decay_steps': 1000 41 | } 42 | 43 | cfg['batch_size'] = 32 44 | cfg['encoder'] = resnet_encoder_fully_convolutional_16x16x8 45 | cfg['hidden_size'] = 1024 46 | cfg['encoder_kwargs'] = { 47 | 'resnet_build_fn' : resnet_v1_50_16x16, 48 | 'weight_decay': cfg['weight_decay'], 49 | 'flatten': True, 50 | 'batch_size': cfg['batch_size'] 51 | 52 | } 53 | 54 | cfg['return_accuracy']=True 55 | 56 | # learning 57 | cfg['initial_learning_rate'] = 1e-4 58 | cfg[ 'optimizer' ] = tf.train.AdamOptimizer 59 | cfg[ 'optimizer_kwargs' ] = {} 60 | cfg[ 'discriminator_learning_args' ] = { 61 | 'initial_learning_rate':1e-5, 62 | 'optimizer': tf.train.GradientDescentOptimizer, 63 | 'optimizer_kwargs': {} 64 | } 65 | def pwc(initial_lr, **kwargs): 66 | global_step = kwargs['global_step'] 67 | del kwargs['global_step'] 68 | return tf.train.piecewise_constant(global_step, **kwargs) 69 | cfg['learning_rate_schedule'] = pwc 70 | cfg['learning_rate_schedule_kwargs' ] = { 71 | 'boundaries': [np.int64(0), np.int64(80000)], # need to be int64 since global step is... 72 | 'values': [cfg['initial_learning_rate'], cfg['initial_learning_rate']/10] 73 | } 74 | # inputs 75 | cfg['input_dim'] = (256, 256) # (1024, 1024) 76 | cfg['input_num_channels'] = 3 77 | cfg['input_dtype'] = tf.float32 78 | cfg['num_input'] = 2 79 | cfg['input_domain_name'] = 'rgb' 80 | cfg['input_preprocessing_fn'] = load_ops.resize_rescale_image 81 | cfg['input_preprocessing_fn_kwargs'] = { 82 | 'new_dims': cfg['input_dim'], 83 | 'new_scale': [-1, 1] 84 | } 85 | cfg['single_filename_to_multiple']=True 86 | 87 | # outputs 88 | cfg['target_dim'] = 1 # (1024, 1024) 89 | cfg['target_dtype'] = tf.int64 90 | cfg['target_from_filenames'] = load_ops.point_match_new 91 | 92 | # Post processing 93 | cfg['metric_net'] = encoder_multilayers_fc_bn 94 | cfg['metric_kwargs'] = { 95 | 'hidden_size': 256, 96 | 'layer_num': 2, 97 | 'output_size': 2, 98 | 'batch_norm_decay' : 0.9, 99 | 'batch_norm_epsilon' : 1e-5, 100 | 'batch_norm_scale' : True, 101 | 'batch_norm_center' : True 102 | } 103 | 104 | # input pipeline 105 | cfg['preprocess_fn'] = load_and_specify_preprocessors_for_single_filename_to_imgs 106 | cfg['randomize'] = False 107 | cfg['num_read_threads'] = 300 108 | cfg['inputs_queue_capacity'] = 4096 109 | 110 | # Checkpoints and summaries 111 | cfg['summary_save_every_secs'] = 300 112 | cfg['checkpoint_save_every_secs'] = 600 113 | RuntimeDeterminedEnviromentVars.register_dict( cfg ) # These will be loaded at runtime 114 | print_cfg( cfg, nopause=nopause ) 115 | return cfg 116 | 117 | def print_cfg( cfg, nopause=False ): 118 | print('-------------------------------------------------') 119 | print('config:') 120 | template = '\t{0:30}{1}' 121 | for key in sorted( cfg.keys() ): 122 | print(template.format(key, cfg[key])) 123 | print('-------------------------------------------------') 124 | 125 | if nopause: 126 | return 127 | raw_input('Press Enter to continue...') 128 | print('-------------------------------------------------') 129 | -------------------------------------------------------------------------------- /experiments/final/vanishing_point/config.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import functools 4 | import numpy as np 5 | import os 6 | import sys 7 | import tensorflow as tf 8 | 9 | sys.path.insert( 1, os.path.realpath( '../../models' ) ) 10 | sys.path.insert( 1, os.path.realpath( '../../lib' ) ) 11 | 12 | import data.load_ops as load_ops 13 | from data.load_ops import mask_if_channel_le 14 | from data.task_data_loading import load_and_specify_preprocessors 15 | from general_utils import RuntimeDeterminedEnviromentVars 16 | import models.architectures as architectures 17 | from models.gan_discriminators import pix2pix_discriminator 18 | from models.resnet_v1 import resnet_v1_50_16x16 19 | from models.sample_models import * 20 | from models.utils import leaky_relu 21 | 22 | def get_cfg( nopause=False ): 23 | cfg = {} 24 | cfg['is_discriminative'] = True 25 | # params 26 | cfg['num_epochs'] = 30 27 | cfg['model_path'] = None 28 | 29 | # logging 30 | config_dir = os.path.dirname(os.path.realpath( __file__ )) 31 | task_name = os.path.basename( config_dir ) 32 | 33 | # model 34 | cfg['model_type'] = architectures.BasicFF 35 | cfg['weight_decay'] = 2e-6 36 | cfg['instance_noise_sigma'] = 0.1 37 | cfg['instance_noise_anneal_fn'] = tf.train.inverse_time_decay 38 | cfg['instance_noise_anneal_fn_kwargs'] = { 39 | 'decay_rate': 0.2, 40 | 'decay_steps': 1000 41 | } 42 | 43 | cfg['batch_size'] = 32 44 | cfg['encoder'] = resnet_encoder_fully_convolutional_16x16x8 45 | cfg['hidden_size'] = 1024 46 | cfg['encoder_kwargs'] = { 47 | 'resnet_build_fn' : resnet_v1_50_16x16, 48 | 'weight_decay': cfg['weight_decay'], 49 | 'flatten': True, 50 | 'batch_size': cfg['batch_size'] 51 | 52 | } 53 | 54 | cfg['return_accuracy']=False 55 | 56 | # learning 57 | cfg['initial_learning_rate'] = 1e-4 58 | cfg[ 'optimizer' ] = tf.train.AdamOptimizer 59 | cfg[ 'optimizer_kwargs' ] = {} 60 | cfg[ 'discriminator_learning_args' ] = { 61 | 'initial_learning_rate':1e-5, 62 | 'optimizer': tf.train.GradientDescentOptimizer, 63 | 'optimizer_kwargs': {} 64 | } 65 | cfg['clip_norm'] = 1 66 | def pwc(initial_lr, **kwargs): 67 | global_step = kwargs['global_step'] 68 | del kwargs['global_step'] 69 | return tf.train.piecewise_constant(global_step, **kwargs) 70 | cfg['learning_rate_schedule'] = pwc 71 | cfg['learning_rate_schedule_kwargs' ] = { 72 | 'boundaries': [np.int64(0), np.int64(80000)], # need to be int64 since global step is... 73 | 'values': [cfg['initial_learning_rate'], cfg['initial_learning_rate']/10] 74 | } 75 | 76 | # inputs 77 | cfg['input_dim'] = (256, 256) # (1024, 1024) 78 | cfg['input_num_channels'] = 3 79 | cfg['input_dtype'] = tf.float32 80 | cfg['input_domain_name'] = 'rgb' 81 | cfg['input_preprocessing_fn'] = load_ops.resize_rescale_image 82 | cfg['input_preprocessing_fn_kwargs'] = { 83 | 'new_dims': cfg['input_dim'], 84 | 'new_scale': [-1, 1] 85 | } 86 | cfg['single_filename_to_multiple']=True 87 | 88 | # outputs 89 | cfg['target_dim'] = 9 # (1024, 1024) 90 | cfg['target_dtype'] = tf.float32 91 | cfg['target_from_filenames'] = load_ops.vanishing_point_well_defined 92 | 93 | # Post processing 94 | cfg['metric_net'] = encoder_multilayers_fc_bn 95 | cfg['metric_kwargs'] = { 96 | 'hidden_size': 2048, 97 | 'layer_num': 2, 98 | 'output_size': cfg['target_dim'], 99 | 'batch_norm_decay' : 0.9, 100 | 'batch_norm_epsilon' : 1e-5, 101 | 'batch_norm_scale' : True, 102 | 'batch_norm_center' : True, 103 | 'weight_decay': cfg['weight_decay'] 104 | } 105 | 106 | cfg['l2_loss']=True 107 | cfg['loss_threshold']=1.0 108 | # input pipeline 109 | cfg['preprocess_fn'] = load_and_specify_preprocessors 110 | cfg['randomize'] = False 111 | cfg['num_read_threads'] = 300 112 | cfg['inputs_queue_capacity'] = 4096 113 | 114 | # Checkpoints and summaries 115 | cfg['summary_save_every_secs'] = 300 116 | cfg['checkpoint_save_every_secs'] = 600 117 | RuntimeDeterminedEnviromentVars.register_dict( cfg ) # These will be loaded at runtime 118 | print_cfg( cfg, nopause=nopause ) 119 | return cfg 120 | 121 | def print_cfg( cfg, nopause=False ): 122 | print('-------------------------------------------------') 123 | print('config:') 124 | template = '\t{0:30}{1}' 125 | for key in sorted( cfg.keys() ): 126 | print(template.format(key, cfg[key])) 127 | print('-------------------------------------------------') 128 | 129 | if nopause: 130 | return 131 | raw_input('Press Enter to continue...') 132 | print('-------------------------------------------------') 133 | -------------------------------------------------------------------------------- /experiments/final/non_fixated_pose/config.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import functools 4 | import numpy as np 5 | import os 6 | import sys 7 | import tensorflow as tf 8 | 9 | sys.path.insert( 1, os.path.realpath( '../../models' ) ) 10 | sys.path.insert( 1, os.path.realpath( '../../lib' ) ) 11 | 12 | import data.load_ops as load_ops 13 | from data.load_ops import mask_if_channel_le 14 | from data.task_data_loading import load_and_specify_preprocessors, load_and_specify_preprocessors_for_single_filename_to_imgs 15 | from general_utils import RuntimeDeterminedEnviromentVars 16 | import models.architectures as architectures 17 | from models.gan_discriminators import pix2pix_discriminator 18 | from models.resnet_v1 import resnet_v1_50_16x16 19 | from models.sample_models import * 20 | from models.utils import leaky_relu 21 | 22 | def get_cfg( nopause=False ): 23 | cfg = {} 24 | cfg['is_discriminative'] = True 25 | # params 26 | cfg['num_epochs'] = 30 27 | cfg['model_path'] = None 28 | 29 | # logging 30 | config_dir = os.path.dirname(os.path.realpath( __file__ )) 31 | task_name = os.path.basename( config_dir ) 32 | 33 | # model 34 | cfg['model_type'] = architectures.Siamese 35 | cfg['weight_decay'] = 2e-6 36 | cfg['instance_noise_sigma'] = 0.1 37 | cfg['instance_noise_anneal_fn'] = tf.train.inverse_time_decay 38 | cfg['instance_noise_anneal_fn_kwargs'] = { 39 | 'decay_rate': 0.2, 40 | 'decay_steps': 1000 41 | } 42 | 43 | batch_size = 8 44 | cfg['encoder'] = resnet_encoder_fully_convolutional_16x16x8 45 | cfg['hidden_size'] = 1024 46 | cfg['encoder_kwargs'] = { 47 | 'resnet_build_fn' : resnet_v1_50_16x16, 48 | 'weight_decay': cfg['weight_decay'], 49 | 'flatten': True, 50 | 'batch_size': batch_size 51 | 52 | } 53 | 54 | cfg['return_accuracy']=False 55 | 56 | # learning 57 | cfg['initial_learning_rate'] = 1e-4 58 | cfg[ 'optimizer' ] = tf.train.AdamOptimizer 59 | cfg[ 'optimizer_kwargs' ] = {} 60 | cfg[ 'discriminator_learning_args' ] = { 61 | 'initial_learning_rate':1e-5, 62 | 'optimizer': tf.train.GradientDescentOptimizer, 63 | 'optimizer_kwargs': {} 64 | } 65 | def pwc(initial_lr, **kwargs): 66 | global_step = kwargs['global_step'] 67 | del kwargs['global_step'] 68 | return tf.train.piecewise_constant(global_step, **kwargs) 69 | cfg['learning_rate_schedule'] = pwc 70 | cfg['learning_rate_schedule_kwargs' ] = { 71 | 'boundaries': [np.int64(0), np.int64(80000)], # need to be int64 since global step is... 72 | 'values': [cfg['initial_learning_rate'], cfg['initial_learning_rate']/10] 73 | } 74 | 75 | # inputs 76 | cfg['input_dim'] = (256, 256) # (1024, 1024) 77 | cfg['input_num_channels'] = 3 78 | cfg['input_dtype'] = tf.float32 79 | cfg['num_input'] = 2 80 | cfg['input_domain_name'] = 'rgb' 81 | cfg['input_preprocessing_fn'] = load_ops.resize_rescale_image 82 | cfg['input_preprocessing_fn_kwargs'] = { 83 | 'new_dims': cfg['input_dim'], 84 | 'new_scale': [-1, 1] 85 | } 86 | cfg['single_filename_to_multiple']=True 87 | 88 | # outputs 89 | cfg['target_dim'] = 6 # (1024, 1024) 90 | cfg['target_dtype'] = tf.float64 91 | cfg['target_from_filenames'] = load_ops.nonfixated_camera_pose 92 | 93 | # Post processing 94 | cfg['metric_net'] = encoder_multilayers_fc_bn 95 | cfg['metric_kwargs'] = { 96 | 'hidden_size': 2048, 97 | 'layer_num': 3, 98 | 'output_size': cfg['target_dim'], 99 | 'batch_norm_decay' : 0.9, 100 | 'batch_norm_epsilon' : 1e-5, 101 | 'batch_norm_scale' : True, 102 | 'batch_norm_center' : True 103 | } 104 | cfg['l2_loss']=True 105 | cfg['loss_threshold']=1.0 106 | # input pipeline 107 | cfg['preprocess_fn'] = load_and_specify_preprocessors_for_single_filename_to_imgs 108 | cfg['randomize'] = False 109 | cfg['num_read_threads'] = 30 110 | cfg['batch_size'] = 8 111 | cfg['inputs_queue_capacity'] = 4096 112 | 113 | # Checkpoints and summaries 114 | cfg['summary_save_every_secs'] = 300 115 | cfg['checkpoint_save_every_secs'] = 600 116 | RuntimeDeterminedEnviromentVars.register_dict( cfg ) # These will be loaded at runtime 117 | print_cfg( cfg, nopause=nopause ) 118 | return cfg 119 | 120 | def print_cfg( cfg, nopause=False ): 121 | print('-------------------------------------------------') 122 | print('config:') 123 | template = '\t{0:30}{1}' 124 | for key in sorted( cfg.keys() ): 125 | print(template.format(key, cfg[key])) 126 | print('-------------------------------------------------') 127 | 128 | if nopause: 129 | return 130 | raw_input('Press Enter to continue...') 131 | print('-------------------------------------------------') 132 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Deep Model Transferbility from Attribution Maps 2 | 3 | - [*"Paper: Deep Model Transferbility from Attribution Maps"*](https:), NeurIPS 2019.(released soon) 4 | 5 | J. Song, Y. Chen, X. Wang, C. Shen, M. Song 6 | 7 | [Homepage of VIPA Group](https://www.vipazoo.cn/index_en.html), Zhejiang University, China 8 | 9 | ![](demo/illus.png) 10 | 11 | ## Getting Started 12 | 13 | These instructions below will get you a copy of the project up and running on your local machine for development and testing purposes. 14 | 15 | ### Prerequisites 16 | 17 | Install the following: 18 | 19 | ``` 20 | - Python >= 3.6 21 | - Tensorflow >= 1.10.0 22 | - Matlab R2019a 23 | ``` 24 | 25 | Then, install python packages: 26 | 27 | ``` 28 | pip install -r requirements.txt 29 | ``` 30 | 31 | In order to generate attribution maps from Deep Models, you also need to download [DeepExplain](https://github.com/marcoancona/DeepExplain) which this project utilizes to generate attribution maps, and copy it to your project directory **$DIR**. 32 | 33 | ``` 34 | cp -r DeepExplain-master/deepexplain $DIR/lib/ 35 | ``` 36 | 37 | ### Probe datasets 38 | 39 | Those datasets involved in this project are: 40 | 41 | - [Taskonomy Tiny](https://github.com/StanfordVL/taskonomy/tree/master/data#downloading-the-dataset) 42 | - [MS COCO Val 2014](http://images.cocodataset.org/zips/test2014.zip) 43 | - [Indoor Scene](http://groups.csail.mit.edu/vision/LabelMe/NewImages/indoorCVPR_09.tar) 44 | 45 | Make sure to download them and move to **$DIR/dataset**. 46 | 47 | Those datasets need to be arranged in the following format: 48 | 49 | ``` 50 | |- dataset 51 | | |---taskonomy 52 | | | |---collierville_rgb 53 | | | | |---point_0_view_0_domain_rgb.png 54 | | | | |---... 55 | | | |---corozal_rgb 56 | | | |---darden_rgb 57 | | | |---markleeville_rgb 58 | | | |---wiconisco_rgb 59 | | |---coco 60 | | | |---COCO_val2014_000000000042.jpg 61 | | | |---... 62 | | |---indoor 63 | | | |---Images 64 | | | | |---airport_inside 65 | | | | | |---airport_inside_0001.jpg 66 | | | | |---bowling 67 | | | | |---... 68 | ``` 69 | 70 | You can also check **$DIR/explain_result/name_of_dataset/imlist.txt** to find out how those images are arranged(Note that images in the *imlist.txt* are randomly selected from dataset, please feel free to test using more images). 71 | 72 | ### Pre-trained Models 73 | 74 | Download pre-trained models: 75 | 76 | ``` 77 | sh tools/download_model.sh 78 | ``` 79 | 80 | ## Running the tests 81 | 82 | First, generate Attribution Maps and save corresponding Attribution maps to explain results directory: 83 | 84 | ``` 85 | cd tools 86 | python deep_attribution.py --explain_result_root explain_result --dataset taskonomy --imlist-size 1000 87 | python deep_attribution.py --explain_result_root explain_result --dataset coco --imlist-size 1000 88 | python deep_attribution.py --explain_result_root explain_result --dataset indoor --imlist-size 1005 89 | ``` 90 | 91 | Calculate affinity matrix of those tasks according to the Attribution Maps: 92 | 93 | ``` 94 | python affinity.py --dataset taskonomy --imlist-size 1000 95 | python affinity.py --dataset coco --imlist-size 1000 96 | python affinity.py --dataset indoor --imlist-size 1005 97 | ``` 98 | 99 | ## Visualization 100 | 101 | Plot P@K, R@K Curve, make sure you've already completed testing on three datasets: 102 | 103 | ``` 104 | python plot.py --fig-save fig 105 | ``` 106 | 107 | e.g. 108 | 109 | 图片名称 110 | 111 | Plot Task Similarity Tree: 112 | 113 | ``` 114 | matlab -nosplash -nodesktop task_similarity_tree.m 115 | ``` 116 | 117 | e.g. 118 | 119 | 图片名称 120 | 121 | ## Todo 122 | 123 | Release code of experiment generating attribution maps based on other kinds of pre-trained models(different tasks, different architectures, etc.). 124 | 125 | Apply more explain methods and see how they work. 126 | 127 | ## Acknowledgement 128 | 129 | This repo is built upon the code from [Taskonomy](http://taskonomy.stanford.edu/). 130 | 131 | And we thank [deepexplain](https://github.com/marcoancona/DeepExplain) for providing the attribution tools. 132 | 133 | ## Citation 134 | 135 | If you find this code useful, please cite the following: 136 | 137 | ``` 138 | @inproceedings{ModelTransfer_NeurIPS2019, 139 | title={Deep Model Transferbility from Attribution Maps}, 140 | author={Jie Song, Yixin Chen, Xinchao Wang, Chengchao Shen, Mingli Song}, 141 | booktitle={NeurIPS}, 142 | year={2019} 143 | } 144 | ``` 145 | 146 | ## Contact 147 | 148 | If you have any question, please feel free to contact 149 | 150 | Jie Song, sjie@zju.edu.cn; 151 | 152 | Yixin Chen, chenyix@zju.edu.cn. 153 | 154 | [Homepage of VIPA Group](https://www.vipazoo.cn/index_en.html), Zhejiang University, China 155 | 156 |
157 | icon 158 |
159 | 160 | 161 | -------------------------------------------------------------------------------- /experiments/final/denoise/config.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import functools 4 | import numpy as np 5 | import os 6 | import sys 7 | import tensorflow as tf 8 | 9 | sys.path.insert( 1, os.path.realpath( '../../models' ) ) 10 | sys.path.insert( 1, os.path.realpath( '../../lib' ) ) 11 | 12 | import data.load_ops as load_ops 13 | from data.load_ops import mask_if_channel_le 14 | from data.task_data_loading import load_and_specify_preprocessors 15 | from general_utils import RuntimeDeterminedEnviromentVars 16 | import models.architectures as architectures 17 | from models.gan_discriminators import pix2pix_discriminator 18 | from models.resnet_v1 import resnet_v1_50_16x16 19 | from models.sample_models import * 20 | from models.utils import leaky_relu 21 | 22 | def get_cfg( nopause=False ): 23 | cfg = {} 24 | 25 | # params 26 | cfg['num_epochs'] = 30 27 | cfg['model_path'] = None 28 | 29 | # logging 30 | config_dir = os.path.dirname(os.path.realpath( __file__ )) 31 | task_name = os.path.basename( config_dir ) 32 | 33 | # model 34 | cfg['model_type'] = architectures.EncoderDecoderWithCGAN 35 | cfg['n_g_steps_before_d'] = 1 36 | cfg['n_d_steps_after_g'] = 1 37 | cfg['init_g_steps'] = 25000 38 | cfg['l_norm_weight_prop'] = 0.996 39 | cfg['weight_decay'] = 2e-6 40 | cfg['instance_noise_sigma'] = 0.1 41 | cfg['instance_noise_anneal_fn'] = tf.train.inverse_time_decay 42 | cfg['instance_noise_anneal_fn_kwargs'] = { 43 | 'decay_rate': 0.2, 44 | 'decay_steps': 1000 45 | } 46 | 47 | cfg['encoder'] = resnet_encoder_fully_convolutional_16x16x8 48 | cfg['hidden_size'] = 1024 49 | cfg['encoder_kwargs'] = { 50 | 'resnet_build_fn' : resnet_v1_50_16x16, 51 | 'weight_decay': cfg['weight_decay'] 52 | } 53 | cfg['decoder'] = decoder_fc_15_layer_256_resolution_fully_convolutional_16x16x4 54 | cfg['decoder_kwargs'] = { 55 | 'activation_fn': leaky_relu( 0.2 ), 56 | 'weight_decay': cfg['weight_decay'] 57 | } 58 | cfg['discriminator'] = pix2pix_discriminator 59 | cfg['discriminator_kwargs'] = { 60 | 'n_layers': 5, 61 | 'stride': 4, 62 | 'n_channel_multiplier': 64, 63 | 'weight_decay': 10.*cfg['weight_decay'] 64 | } 65 | 66 | # loss 67 | cfg['gan_loss_kwargs'] = { 68 | 'real_label': 0.9, # Positive labels 1 -> 0.9 69 | 'fake_label': 0.0 70 | } 71 | 72 | # learning 73 | cfg['initial_learning_rate'] = 1e-4 74 | cfg[ 'optimizer' ] = tf.train.AdamOptimizer 75 | cfg[ 'optimizer_kwargs' ] = {} 76 | cfg[ 'discriminator_learning_args' ] = { 77 | 'initial_learning_rate':1e-5, 78 | 'optimizer': tf.train.GradientDescentOptimizer, 79 | 'optimizer_kwargs': {} 80 | } 81 | def pwc(initial_lr, **kwargs): 82 | global_step = kwargs['global_step'] 83 | del kwargs['global_step'] 84 | return tf.train.piecewise_constant(global_step, **kwargs) 85 | cfg['learning_rate_schedule'] = pwc 86 | cfg['learning_rate_schedule_kwargs' ] = { 87 | 'boundaries': [np.int64(0), np.int64(80000)], # need to be int64 since global step is... 88 | 'values': [cfg['initial_learning_rate'], cfg['initial_learning_rate']/10] 89 | } 90 | # inputs 91 | cfg['input_dim'] = (256, 256) # (1024, 1024) 92 | cfg['input_num_channels'] = 3 93 | cfg['input_dtype'] = tf.float32 94 | cfg['input_domain_name'] = 'rgb' 95 | cfg['input_preprocessing_fn'] = load_ops.random_noise_image 96 | cfg['input_preprocessing_fn_kwargs'] = { 97 | 'new_dims': cfg['input_dim'], 98 | 'new_scale': [-1, 1] 99 | } 100 | 101 | # outputs 102 | cfg['target_num_channels'] = 3 103 | cfg['target_dim'] = (256, 256) # (1024, 1024) 104 | cfg['target_dtype'] = tf.float32 105 | cfg['target_domain_name'] = 'rgb' 106 | cfg['target_preprocessing_fn'] = load_ops.resize_rescale_image 107 | cfg['target_preprocessing_fn_kwargs'] = { 108 | 'new_dims': cfg['target_dim'], 109 | 'new_scale': [-1, 1] 110 | } 111 | 112 | # masks 113 | 114 | # input pipeline 115 | cfg['preprocess_fn'] = load_and_specify_preprocessors 116 | cfg['randomize'] = False 117 | cfg['num_read_threads'] = 300 118 | cfg['batch_size'] = 32 119 | cfg['inputs_queue_capacity'] = 4096 120 | 121 | # Checkpoints and summaries 122 | cfg['summary_save_every_secs'] = 300 123 | cfg['checkpoint_save_every_secs'] = 600 124 | RuntimeDeterminedEnviromentVars.register_dict( cfg ) # These will be loaded at runtime 125 | print_cfg( cfg, nopause=nopause ) 126 | return cfg 127 | 128 | def print_cfg( cfg, nopause=False ): 129 | print('-------------------------------------------------') 130 | print('config:') 131 | template = '\t{0:30}{1}' 132 | for key in sorted( cfg.keys() ): 133 | print(template.format(key, cfg[key])) 134 | print('-------------------------------------------------') 135 | 136 | if nopause: 137 | return 138 | raw_input('Press Enter to continue...') 139 | print('-------------------------------------------------') 140 | -------------------------------------------------------------------------------- /experiments/final/inpainting_whole/config.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import functools 4 | import numpy as np 5 | import os 6 | import sys 7 | import tensorflow as tf 8 | 9 | sys.path.insert( 1, os.path.realpath( '../../models' ) ) 10 | sys.path.insert( 1, os.path.realpath( '../../lib' ) ) 11 | 12 | import data.load_ops as load_ops 13 | from data.load_ops import mask_if_channel_le 14 | from data.task_data_loading import load_and_specify_preprocessors 15 | from general_utils import RuntimeDeterminedEnviromentVars 16 | import models.architectures as architectures 17 | from models.gan_discriminators import pix2pix_discriminator 18 | from models.resnet_v1 import resnet_v1_50_16x16 19 | from models.sample_models import * 20 | from models.utils import leaky_relu 21 | 22 | def get_cfg( nopause=False ): 23 | cfg = {} 24 | 25 | # params 26 | cfg['num_epochs'] = 30 27 | cfg['model_path'] = None 28 | 29 | # logging 30 | config_dir = os.path.dirname(os.path.realpath( __file__ )) 31 | task_name = os.path.basename( config_dir ) 32 | 33 | # model 34 | cfg['model_type'] = architectures.EncoderDecoderWithCGAN 35 | cfg['n_g_steps_before_d'] = 1 36 | cfg['n_d_steps_after_g'] = 1 37 | cfg['init_g_steps'] = 25000 38 | cfg['l_norm_weight_prop'] = 0.996 39 | cfg['weight_decay'] = 2e-6 40 | cfg['instance_noise_sigma'] = 0.1 41 | cfg['instance_noise_anneal_fn'] = tf.train.inverse_time_decay 42 | cfg['instance_noise_anneal_fn_kwargs'] = { 43 | 'decay_rate': 0.2, 44 | 'decay_steps': 1000 45 | } 46 | 47 | cfg['encoder'] = resnet_encoder_fully_convolutional_16x16x8 48 | cfg['hidden_size'] = 1024 49 | cfg['encoder_kwargs'] = { 50 | 'resnet_build_fn' : resnet_v1_50_16x16, 51 | 'weight_decay': cfg['weight_decay'] 52 | } 53 | cfg['decoder'] = decoder_fc_15_layer_256_resolution_fully_convolutional_16x16x4 54 | cfg['decoder_kwargs'] = { 55 | 'activation_fn': leaky_relu( 0.2 ), 56 | 'weight_decay': cfg['weight_decay'] 57 | } 58 | cfg['discriminator'] = pix2pix_discriminator 59 | cfg['discriminator_kwargs'] = { 60 | 'n_layers': 5, 61 | 'stride': 4, 62 | 'n_channel_multiplier': 64, 63 | 'weight_decay': 10.*cfg['weight_decay'] 64 | } 65 | 66 | # loss 67 | cfg['gan_loss_kwargs'] = { 68 | 'real_label': 0.9, # Positive labels 1 -> 0.9 69 | 'fake_label': 0.0 70 | } 71 | 72 | # learning 73 | cfg['initial_learning_rate'] = 1e-4 74 | cfg[ 'optimizer' ] = tf.train.AdamOptimizer 75 | cfg[ 'optimizer_kwargs' ] = {} 76 | cfg[ 'discriminator_learning_args' ] = { 77 | 'initial_learning_rate':1e-5, 78 | 'optimizer': tf.train.GradientDescentOptimizer, 79 | 'optimizer_kwargs': {} 80 | } 81 | def pwc(initial_lr, **kwargs): 82 | global_step = kwargs['global_step'] 83 | del kwargs['global_step'] 84 | return tf.train.piecewise_constant(global_step, **kwargs) 85 | cfg['learning_rate_schedule'] = pwc 86 | cfg['learning_rate_schedule_kwargs' ] = { 87 | 'boundaries': [np.int64(0), np.int64(80000)], # need to be int64 since global step is... 88 | 'values': [cfg['initial_learning_rate'], cfg['initial_learning_rate']/10] 89 | } 90 | # inputs 91 | cfg['input_dim'] = (256, 256) # (1024, 1024) 92 | cfg['input_num_channels'] = 3 93 | cfg['input_dtype'] = tf.float32 94 | cfg['input_domain_name'] = 'rgb' 95 | cfg['input_preprocessing_fn'] = load_ops.context_encoder_input 96 | cfg['input_preprocessing_fn_kwargs'] = { 97 | 'new_dims': cfg['input_dim'], 98 | 'new_scale': [-1, 1] 99 | } 100 | 101 | # outputs 102 | cfg['target_num_channels'] = 3 103 | cfg['target_dim'] = (256, 256) # (1024, 1024) 104 | cfg['target_dtype'] = tf.float32 105 | cfg['target_domain_name'] = 'rgb' 106 | cfg['target_preprocessing_fn'] = load_ops.resize_rescale_image 107 | cfg['target_preprocessing_fn_kwargs'] = { 108 | 'new_dims': cfg['target_dim'], 109 | 'new_scale': [-1, 1] 110 | } 111 | 112 | # masks 113 | 114 | # input pipeline 115 | cfg['preprocess_fn'] = load_and_specify_preprocessors 116 | cfg['randomize'] = False 117 | cfg['num_read_threads'] = 300 118 | cfg['batch_size'] = 32 119 | cfg['inputs_queue_capacity'] = 4096 120 | 121 | # Checkpoints and summaries 122 | cfg['summary_save_every_secs'] = 300 123 | cfg['checkpoint_save_every_secs'] = 600 124 | RuntimeDeterminedEnviromentVars.register_dict( cfg ) # These will be loaded at runtime 125 | print_cfg( cfg, nopause=nopause ) 126 | return cfg 127 | 128 | def print_cfg( cfg, nopause=False ): 129 | print('-------------------------------------------------') 130 | print('config:') 131 | template = '\t{0:30}{1}' 132 | for key in sorted( cfg.keys() ): 133 | print(template.format(key, cfg[key])) 134 | print('-------------------------------------------------') 135 | 136 | if nopause: 137 | return 138 | raw_input('Press Enter to continue...') 139 | print('-------------------------------------------------') 140 | -------------------------------------------------------------------------------- /experiments/final/autoencoder/config.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import functools 4 | import numpy as np 5 | import os 6 | import sys 7 | import tensorflow as tf 8 | 9 | sys.path.insert( 1, os.path.realpath( '../../models' ) ) 10 | sys.path.insert( 1, os.path.realpath( '../../lib' ) ) 11 | 12 | import data.load_ops as load_ops 13 | from data.load_ops import mask_if_channel_le 14 | from data.task_data_loading import load_and_specify_preprocessors 15 | from general_utils import RuntimeDeterminedEnviromentVars 16 | import models.architectures as architectures 17 | from models.gan_discriminators import pix2pix_discriminator 18 | from models.resnet_v1 import resnet_v1_50_16x16 19 | from models.sample_models import * 20 | from models.utils import leaky_relu 21 | 22 | def get_cfg( nopause=False ): 23 | cfg = {} 24 | 25 | # params 26 | cfg['num_epochs'] = 30 27 | cfg['model_path'] = None 28 | 29 | # logging 30 | config_dir = os.path.dirname(os.path.realpath( __file__ )) 31 | task_name = os.path.basename( config_dir ) 32 | 33 | # model 34 | cfg['model_type'] = architectures.EncoderDecoderWithCGAN 35 | cfg['n_g_steps_before_d'] = 1 36 | cfg['n_d_steps_after_g'] = 1 37 | cfg['init_g_steps'] = 25000 38 | cfg['l_norm_weight_prop'] = 0.996 39 | cfg['weight_decay'] = 2e-6 40 | cfg['instance_noise_sigma'] = 0.1 41 | cfg['instance_noise_anneal_fn'] = tf.train.inverse_time_decay 42 | cfg['instance_noise_anneal_fn_kwargs'] = { 43 | 'decay_rate': 0.2, 44 | 'decay_steps': 1000 45 | } 46 | 47 | cfg['encoder'] = resnet_encoder_fully_convolutional_16x16x8 48 | cfg['hidden_size'] = 1024 49 | cfg['encoder_kwargs'] = { 50 | 'resnet_build_fn' : resnet_v1_50_16x16, 51 | 'weight_decay': cfg['weight_decay'] 52 | } 53 | cfg['decoder'] = decoder_fc_15_layer_256_resolution_fully_convolutional_16x16x4 54 | cfg['decoder_kwargs'] = { 55 | 'activation_fn': leaky_relu( 0.2 ), 56 | 'weight_decay': cfg['weight_decay'] 57 | } 58 | cfg['discriminator'] = pix2pix_discriminator 59 | cfg['discriminator_kwargs'] = { 60 | 'n_layers': 5, 61 | 'stride': 4, 62 | 'n_channel_multiplier': 64, 63 | 'weight_decay': 10.*cfg['weight_decay'] 64 | } 65 | 66 | # loss 67 | cfg['gan_loss_kwargs'] = { 68 | 'real_label': 0.9, # Positive labels 1 -> 0.9 69 | 'fake_label': 0.0 70 | } 71 | 72 | # learning 73 | cfg['initial_learning_rate'] = 1e-4 74 | cfg[ 'optimizer' ] = tf.train.AdamOptimizer 75 | cfg[ 'optimizer_kwargs' ] = {} 76 | cfg[ 'discriminator_learning_args' ] = { 77 | 'initial_learning_rate':1e-5, 78 | 'optimizer': tf.train.GradientDescentOptimizer, 79 | 'optimizer_kwargs': {} 80 | } 81 | def pwc(initial_lr, **kwargs): 82 | global_step = kwargs['global_step'] 83 | del kwargs['global_step'] 84 | return tf.train.piecewise_constant(global_step, **kwargs) 85 | cfg['learning_rate_schedule'] = pwc 86 | cfg['learning_rate_schedule_kwargs' ] = { 87 | 'boundaries': [np.int64(0), np.int64(80000)], # need to be int64 since global step is... 88 | 'values': [cfg['initial_learning_rate'], cfg['initial_learning_rate']/10] 89 | } 90 | 91 | # inputs 92 | cfg['input_dim'] = (256, 256) # (1024, 1024) 93 | cfg['input_num_channels'] = 3 94 | cfg['input_dtype'] = tf.float32 95 | cfg['input_domain_name'] = 'rgb' 96 | cfg['input_preprocessing_fn'] = load_ops.resize_rescale_image 97 | cfg['input_preprocessing_fn_kwargs'] = { 98 | 'new_dims': cfg['input_dim'], 99 | 'new_scale': [-1, 1] 100 | } 101 | 102 | # outputs 103 | cfg['target_num_channels'] = 3 104 | cfg['target_dim'] = (256, 256) # (1024, 1024) 105 | cfg['target_dtype'] = tf.float32 106 | cfg['target_domain_name'] = 'rgb' 107 | cfg['target_preprocessing_fn'] = load_ops.resize_rescale_image 108 | cfg['target_preprocessing_fn_kwargs'] = { 109 | 'new_dims': cfg['target_dim'], 110 | 'new_scale': [-1, 1] 111 | } 112 | 113 | # masks 114 | 115 | # input pipeline 116 | cfg['preprocess_fn'] = load_and_specify_preprocessors 117 | cfg['randomize'] = False 118 | cfg['num_read_threads'] = 300 119 | cfg['batch_size'] = 32 120 | cfg['inputs_queue_capacity'] = 4096 121 | 122 | # Checkpoints and summaries 123 | cfg['summary_save_every_secs'] = 300 124 | cfg['checkpoint_save_every_secs'] = 600 125 | 126 | RuntimeDeterminedEnviromentVars.register_dict( cfg ) # These will be loaded at runtime 127 | print_cfg( cfg, nopause=nopause ) 128 | return cfg 129 | 130 | def print_cfg( cfg, nopause=False ): 131 | print('-------------------------------------------------') 132 | print('config:') 133 | template = '\t{0:30}{1}' 134 | for key in sorted( cfg.keys() ): 135 | print(template.format(key, cfg[key])) 136 | print('-------------------------------------------------') 137 | 138 | if nopause: 139 | return 140 | raw_input('Press Enter to continue...') 141 | print('-------------------------------------------------') 142 | -------------------------------------------------------------------------------- /experiments/final/edge2d/config.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import functools 4 | import numpy as np 5 | import os 6 | import sys 7 | import tensorflow as tf 8 | 9 | sys.path.insert( 1, os.path.realpath( '../../../../models' ) ) 10 | sys.path.insert( 1, os.path.realpath( '../../../../lib' ) ) 11 | 12 | import data.load_ops as load_ops 13 | from data.load_ops import mask_if_channel_le 14 | from data.task_data_loading import load_and_specify_preprocessors 15 | from general_utils import RuntimeDeterminedEnviromentVars 16 | import models.architectures as architectures 17 | from models.gan_discriminators import pix2pix_discriminator 18 | from models.resnet_v1 import resnet_v1_50_16x16 19 | from models.sample_models import * 20 | from models.utils import leaky_relu 21 | 22 | def get_cfg( nopause=False ): 23 | cfg = {} 24 | 25 | # params 26 | cfg['num_epochs'] = 30 27 | cfg['model_path'] = None 28 | 29 | # logging 30 | config_dir = os.path.dirname(os.path.realpath( __file__ )) 31 | task_name = os.path.basename( config_dir ) 32 | 33 | # model 34 | cfg['model_type'] = architectures.EncoderDecoderWithCGAN 35 | cfg['n_g_steps_before_d'] = 1 36 | cfg['n_d_steps_after_g'] = 1 37 | cfg['init_g_steps'] = 25000 38 | cfg['l_norm_weight_prop'] = 0.996 39 | cfg['weight_decay'] = 2e-6 40 | cfg['instance_noise_sigma'] = 0.1 41 | cfg['instance_noise_anneal_fn'] = tf.train.inverse_time_decay 42 | cfg['instance_noise_anneal_fn_kwargs'] = { 43 | 'decay_rate': 0.2, 44 | 'decay_steps': 1000 45 | } 46 | 47 | cfg['encoder'] = resnet_encoder_fully_convolutional_16x16x8 48 | cfg['hidden_size'] = 1024 49 | cfg['encoder_kwargs'] = { 50 | 'resnet_build_fn' : resnet_v1_50_16x16, 51 | 'weight_decay': cfg['weight_decay'] 52 | } 53 | cfg['decoder'] = decoder_fc_15_layer_256_resolution_fully_convolutional_16x16x4 54 | cfg['decoder_kwargs'] = { 55 | 'activation_fn': leaky_relu( 0.2 ), 56 | 'weight_decay': cfg['weight_decay'] 57 | } 58 | cfg['discriminator'] = pix2pix_discriminator 59 | cfg['discriminator_kwargs'] = { 60 | 'n_layers': 5, 61 | 'stride': 4, 62 | 'n_channel_multiplier': 64, 63 | 'weight_decay': 10.*cfg['weight_decay'] 64 | } 65 | 66 | # loss 67 | cfg['gan_loss_kwargs'] = { 68 | 'real_label': 0.9, # Positive labels 1 -> 0.9 69 | 'fake_label': 0.0 70 | } 71 | 72 | # learning 73 | cfg['initial_learning_rate'] = 1e-4 74 | cfg[ 'optimizer' ] = tf.train.AdamOptimizer 75 | cfg[ 'optimizer_kwargs' ] = {} 76 | cfg[ 'discriminator_learning_args' ] = { 77 | 'initial_learning_rate':1e-5, 78 | 'optimizer': tf.train.GradientDescentOptimizer, 79 | 'optimizer_kwargs': {} 80 | } 81 | def pwc(initial_lr, **kwargs): 82 | global_step = kwargs['global_step'] 83 | del kwargs['global_step'] 84 | return tf.train.piecewise_constant(global_step, **kwargs) 85 | cfg['learning_rate_schedule'] = pwc 86 | cfg['learning_rate_schedule_kwargs' ] = { 87 | 'boundaries': [np.int64(0), np.int64(80000)], # need to be int64 since global step is... 88 | 'values': [cfg['initial_learning_rate'], cfg['initial_learning_rate']/10] 89 | } 90 | # inputs 91 | cfg['input_dim'] = (256, 256) # (1024, 1024) 92 | cfg['input_num_channels'] = 3 93 | cfg['input_dtype'] = tf.float32 94 | cfg['input_domain_name'] = 'rgb' 95 | cfg['input_preprocessing_fn'] = load_ops.resize_rescale_image 96 | cfg['input_preprocessing_fn_kwargs'] = { 97 | 'new_dims': cfg['input_dim'], 98 | 'new_scale': [-1, 1] 99 | } 100 | 101 | # outputs 102 | cfg['target_num_channels'] = 1 103 | cfg['target_dim'] = (256, 256) # (1024, 1024) 104 | cfg['target_dtype'] = tf.float32 105 | cfg['target_domain_name'] = 'edge2d' 106 | cfg['target_preprocessing_fn'] = load_ops.resize_rescale_image 107 | cfg['target_preprocessing_fn_kwargs'] = { 108 | 'new_dims': cfg['target_dim'], 109 | 'new_scale': [-1, 1], 110 | 'current_scale': [0,0.08] 111 | } 112 | 113 | # input pipeline 114 | cfg['preprocess_fn'] = load_and_specify_preprocessors 115 | cfg['randomize'] = False 116 | cfg['num_read_threads'] = 300 117 | cfg['batch_size'] = 32 118 | cfg['inputs_queue_capacity'] = 4096 119 | 120 | # Checkpoints and summaries 121 | cfg['summary_save_every_secs'] = 300 122 | cfg['checkpoint_save_every_secs'] = 600 123 | RuntimeDeterminedEnviromentVars.register_dict( cfg ) # These will be loaded at runtime 124 | print_cfg( cfg, nopause=nopause ) 125 | return cfg 126 | 127 | def print_cfg( cfg, nopause=False ): 128 | print('-------------------------------------------------') 129 | print('config:') 130 | template = '\t{0:30}{1}' 131 | for key in sorted( cfg.keys() ): 132 | print(template.format(key, cfg[key])) 133 | print('-------------------------------------------------') 134 | 135 | if nopause: 136 | return 137 | raw_input('Press Enter to continue...') 138 | print('-------------------------------------------------') 139 | -------------------------------------------------------------------------------- /experiments/final/jigsaw/config.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import functools 4 | import numpy as np 5 | import os 6 | import sys 7 | import tensorflow as tf 8 | 9 | sys.path.insert( 1, os.path.realpath( '../../models' ) ) 10 | sys.path.insert( 1, os.path.realpath( '../../lib' ) ) 11 | 12 | import data.load_ops as load_ops 13 | from data.load_ops import mask_if_channel_le 14 | from data.task_data_loading import load_and_specify_preprocessors, load_and_specify_preprocessors_for_input_depends_on_target 15 | from general_utils import RuntimeDeterminedEnviromentVars 16 | import models.architectures as architectures 17 | from models.gan_discriminators import pix2pix_discriminator 18 | from models.resnet_v1 import resnet_v1_50_16x16 19 | from models.sample_models import * 20 | from models.utils import leaky_relu 21 | 22 | def get_cfg( nopause=False ): 23 | cfg = {} 24 | cfg['is_discriminative'] = True 25 | # params 26 | cfg['num_epochs'] = 30 27 | cfg['model_path'] = None 28 | 29 | # logging 30 | config_dir = os.path.dirname(os.path.realpath( __file__ )) 31 | task_name = os.path.basename( config_dir ) 32 | 33 | # model 34 | cfg['model_type'] = architectures.Siamese 35 | cfg['weight_decay'] = 2e-6 36 | cfg['instance_noise_sigma'] = 0.1 37 | cfg['instance_noise_anneal_fn'] = tf.train.inverse_time_decay 38 | cfg['instance_noise_anneal_fn_kwargs'] = { 39 | 'decay_rate': 0.2, 40 | 'decay_steps': 1000 41 | } 42 | 43 | batch_size = 4 44 | cfg['encoder'] = resnet_encoder_fully_convolutional_16x16x8 45 | cfg['hidden_size'] = 1024 46 | cfg['encoder_kwargs'] = { 47 | 'resnet_build_fn' : resnet_v1_50_16x16, 48 | 'weight_decay': cfg['weight_decay'], 49 | 'flatten': True, 50 | 'batch_size': batch_size 51 | 52 | } 53 | cfg['return_accuracy']=True 54 | 55 | # learning 56 | cfg['initial_learning_rate'] = 1e-4 57 | cfg[ 'optimizer' ] = tf.train.AdamOptimizer 58 | cfg[ 'optimizer_kwargs' ] = {} 59 | cfg[ 'discriminator_learning_args' ] = { 60 | 'initial_learning_rate':1e-5, 61 | 'optimizer': tf.train.GradientDescentOptimizer, 62 | 'optimizer_kwargs': {} 63 | } 64 | #cfg['clip_norm'] = 1 65 | def pwc(initial_lr, **kwargs): 66 | global_step = kwargs['global_step'] 67 | del kwargs['global_step'] 68 | return tf.train.piecewise_constant(global_step, **kwargs) 69 | cfg['learning_rate_schedule'] = pwc 70 | cfg['learning_rate_schedule_kwargs' ] = { 71 | 'boundaries': [np.int64(0), np.int64(80000)], # need to be int64 since global step is... 72 | 'values': [cfg['initial_learning_rate'], cfg['initial_learning_rate']/10] 73 | } 74 | # inputs 75 | cfg['input_dim'] = (256, 256) # (1024, 1024) 76 | cfg['input_num_channels'] = 3 77 | cfg['input_dtype'] = tf.float32 78 | cfg['num_input'] = 9 79 | cfg['input_domain_name'] = 'rgb' 80 | cfg['input_preprocessing_fn'] = load_ops.generate_jigsaw_input 81 | # cfg['input_preprocessing_fn'] = load_ops.generate_jigsaw_input_for_representation_extraction 82 | cfg['input_preprocessing_fn_kwargs'] = { 83 | 'new_dims': cfg['input_dim'], 84 | 'new_scale': [-1, 1] 85 | } 86 | 87 | cfg['single_filename_to_multiple']=True 88 | 89 | num_jigsaw_class = 100 90 | # outputs 91 | cfg['target_dim'] = 1 # (1024, 1024) 92 | cfg['target_dtype'] = tf.int64 93 | cfg['target_from_filenames'] = load_ops.jigsaw_rand_index 94 | cfg['find_target_in_config'] = True 95 | cfg['target_dict'] = np.load(os.path.join(os.path.dirname(os.path.realpath(__file__)), 96 | '../../../lib/data', 'jigsaw_max_hamming_set.npy')) 97 | 98 | # masks 99 | cfg['metric_net'] = encoder_multilayers_fc_bn 100 | cfg['metric_kwargs'] = { 101 | 'hidden_size': 2048, 102 | 'layer_num': 3, 103 | 'output_size': num_jigsaw_class, 104 | 'batch_norm_decay' : 0.9, 105 | 'batch_norm_epsilon' : 1e-5, 106 | 'batch_norm_scale' : True, 107 | 'batch_norm_center' : True 108 | } 109 | 110 | # input pipeline 111 | cfg['preprocess_fn'] = load_and_specify_preprocessors_for_input_depends_on_target 112 | cfg['randomize'] = False 113 | cfg['num_read_threads'] = 300 114 | cfg['batch_size'] = batch_size 115 | cfg['inputs_queue_capacity'] = 4096 116 | 117 | # Checkpoints and summaries 118 | cfg['summary_save_every_secs'] = 300 119 | cfg['checkpoint_save_every_secs'] = 600 120 | RuntimeDeterminedEnviromentVars.register_dict( cfg ) # These will be loaded at runtime 121 | print_cfg( cfg, nopause=nopause ) 122 | return cfg 123 | 124 | def print_cfg( cfg, nopause=False ): 125 | print('-------------------------------------------------') 126 | print('config:') 127 | template = '\t{0:30}{1}' 128 | for key in sorted( cfg.keys() ): 129 | print(template.format(key, cfg[key])) 130 | print('-------------------------------------------------') 131 | 132 | if nopause: 133 | return 134 | raw_input('Press Enter to continue...') 135 | print('-------------------------------------------------') 136 | -------------------------------------------------------------------------------- /experiments/final/reshade/config.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import functools 4 | import numpy as np 5 | import os 6 | import sys 7 | import tensorflow as tf 8 | 9 | sys.path.insert( 1, os.path.realpath( '../../../../models' ) ) 10 | sys.path.insert( 1, os.path.realpath( '../../../../lib' ) ) 11 | 12 | import data.load_ops as load_ops 13 | from data.load_ops import mask_if_channel_le 14 | from data.task_data_loading import load_and_specify_preprocessors 15 | from general_utils import RuntimeDeterminedEnviromentVars 16 | import models.architectures as architectures 17 | from models.gan_discriminators import pix2pix_discriminator 18 | from models.resnet_v1 import resnet_v1_50_16x16 19 | from models.sample_models import * 20 | from models.utils import leaky_relu 21 | 22 | def get_cfg( nopause=False ): 23 | cfg = {} 24 | 25 | # params 26 | cfg['num_epochs'] = 30 27 | cfg['model_path'] = None 28 | 29 | # logging 30 | config_dir = os.path.dirname(os.path.realpath( __file__ )) 31 | task_name = os.path.basename( config_dir ) 32 | 33 | # model 34 | cfg['model_type'] = architectures.EncoderDecoderWithCGAN 35 | cfg['n_g_steps_before_d'] = 1 36 | cfg['n_d_steps_after_g'] = 1 37 | cfg['init_g_steps'] = 25000 38 | cfg['l_norm_weight_prop'] = 0.996 39 | cfg['weight_decay'] = 2e-6 40 | cfg['instance_noise_sigma'] = 0.1 41 | cfg['instance_noise_anneal_fn'] = tf.train.inverse_time_decay 42 | cfg['instance_noise_anneal_fn_kwargs'] = { 43 | 'decay_rate': 0.2, 44 | 'decay_steps': 1000 45 | } 46 | 47 | cfg['encoder'] = resnet_encoder_fully_convolutional_16x16x8 48 | cfg['hidden_size'] = 1024 49 | cfg['encoder_kwargs'] = { 50 | 'resnet_build_fn' : resnet_v1_50_16x16, 51 | 'weight_decay': cfg['weight_decay'] 52 | } 53 | cfg['decoder'] = decoder_fc_15_layer_256_resolution_fully_convolutional_16x16x4 54 | cfg['decoder_kwargs'] = { 55 | 'activation_fn': leaky_relu( 0.2 ), 56 | 'weight_decay': cfg['weight_decay'] 57 | } 58 | cfg['discriminator'] = pix2pix_discriminator 59 | cfg['discriminator_kwargs'] = { 60 | 'n_layers': 5, 61 | 'stride': 4, 62 | 'n_channel_multiplier': 64, 63 | 'weight_decay': 10.*cfg['weight_decay'] 64 | } 65 | 66 | # loss 67 | cfg['gan_loss_kwargs'] = { 68 | 'real_label': 0.9, # Positive labels 1 -> 0.9 69 | 'fake_label': 0.0 70 | } 71 | 72 | # learning 73 | cfg['initial_learning_rate'] = 1e-4 74 | cfg[ 'optimizer' ] = tf.train.AdamOptimizer 75 | cfg[ 'optimizer_kwargs' ] = {} 76 | cfg[ 'discriminator_learning_args' ] = { 77 | 'initial_learning_rate':1e-5, 78 | 'optimizer': tf.train.GradientDescentOptimizer, 79 | 'optimizer_kwargs': {} 80 | } 81 | def pwc(initial_lr, **kwargs): 82 | global_step = kwargs['global_step'] 83 | del kwargs['global_step'] 84 | return tf.train.piecewise_constant(global_step, **kwargs) 85 | cfg['learning_rate_schedule'] = pwc 86 | cfg['learning_rate_schedule_kwargs' ] = { 87 | 'boundaries': [np.int64(0), np.int64(80000)], # need to be int64 since global step is... 88 | 'values': [cfg['initial_learning_rate'], cfg['initial_learning_rate']/10] 89 | } 90 | # inputs 91 | cfg['input_dim'] = (256, 256) # (1024, 1024) 92 | cfg['input_num_channels'] = 3 93 | cfg['input_dtype'] = tf.float32 94 | cfg['input_domain_name'] = 'rgb' 95 | cfg['input_preprocessing_fn'] = load_ops.resize_rescale_image 96 | cfg['input_preprocessing_fn_kwargs'] = { 97 | 'new_dims': cfg['input_dim'], 98 | 'new_scale': [-1, 1] 99 | } 100 | 101 | # outputs 102 | cfg['target_num_channels'] = 1 103 | cfg['target_dim'] = (256, 256) # (1024, 1024) 104 | cfg['target_dtype'] = tf.float32 105 | cfg['target_domain_name'] = 'reshade' 106 | cfg['target_preprocessing_fn'] = load_ops.resize_rescale_image 107 | cfg['target_preprocessing_fn_kwargs'] = { 108 | 'new_dims': cfg['target_dim'], 109 | 'new_scale': [-1, 1] 110 | } 111 | 112 | # masks 113 | 114 | cfg['depth_mask'] = True 115 | 116 | # input pipeline 117 | cfg['preprocess_fn'] = load_and_specify_preprocessors 118 | cfg['randomize'] = False 119 | cfg['num_read_threads'] = 300 120 | cfg['batch_size'] = 32 121 | cfg['inputs_queue_capacity'] = 4096 122 | 123 | # Checkpoints and summaries 124 | cfg['summary_save_every_secs'] = 300 125 | cfg['checkpoint_save_every_secs'] = 600 126 | RuntimeDeterminedEnviromentVars.register_dict( cfg ) # These will be loaded at runtime 127 | print_cfg( cfg, nopause=nopause ) 128 | return cfg 129 | 130 | def print_cfg( cfg, nopause=False ): 131 | print('-------------------------------------------------') 132 | print('config:') 133 | template = '\t{0:30}{1}' 134 | for key in sorted( cfg.keys() ): 135 | print(template.format(key, cfg[key])) 136 | print('-------------------------------------------------') 137 | 138 | if nopause: 139 | return 140 | raw_input('Press Enter to continue...') 141 | print('-------------------------------------------------') 142 | -------------------------------------------------------------------------------- /experiments/final/rgb2sfnorm/config.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import functools 4 | import numpy as np 5 | import os 6 | import sys 7 | import tensorflow as tf 8 | 9 | sys.path.insert( 1, os.path.realpath( '../../../../models' ) ) 10 | sys.path.insert( 1, os.path.realpath( '../../../../lib' ) ) 11 | 12 | import data.load_ops as load_ops 13 | from data.load_ops import mask_if_channel_le 14 | from data.task_data_loading import load_and_specify_preprocessors 15 | from general_utils import RuntimeDeterminedEnviromentVars 16 | import models.architectures as architectures 17 | from models.gan_discriminators import pix2pix_discriminator 18 | from models.resnet_v1 import resnet_v1_50_16x16 19 | from models.sample_models import * 20 | from models.utils import leaky_relu 21 | 22 | def get_cfg( nopause=False ): 23 | cfg = {} 24 | 25 | # params 26 | cfg['num_epochs'] = 30 27 | cfg['model_path'] = None 28 | 29 | # logging 30 | config_dir = os.path.dirname(os.path.realpath( __file__ )) 31 | task_name = os.path.basename( config_dir ) 32 | 33 | # model 34 | cfg['model_type'] = architectures.EncoderDecoderWithCGAN 35 | cfg['n_g_steps_before_d'] = 1 36 | cfg['n_d_steps_after_g'] = 1 37 | cfg['init_g_steps'] = 25000 38 | cfg['l_norm_weight_prop'] = 0.996 39 | cfg['weight_decay'] = 2e-6 40 | cfg['instance_noise_sigma'] = 0.1 41 | cfg['instance_noise_anneal_fn'] = tf.train.inverse_time_decay 42 | cfg['instance_noise_anneal_fn_kwargs'] = { 43 | 'decay_rate': 0.2, 44 | 'decay_steps': 1000 45 | } 46 | 47 | cfg['encoder'] = resnet_encoder_fully_convolutional_16x16x8 48 | cfg['hidden_size'] = 1024 49 | cfg['encoder_kwargs'] = { 50 | 'resnet_build_fn' : resnet_v1_50_16x16, 51 | 'weight_decay': cfg['weight_decay'] 52 | } 53 | cfg['decoder'] = decoder_fc_15_layer_256_resolution_fully_convolutional_16x16x4 54 | cfg['decoder_kwargs'] = { 55 | 'activation_fn': leaky_relu( 0.2 ), 56 | 'weight_decay': cfg['weight_decay'] 57 | } 58 | cfg['discriminator'] = pix2pix_discriminator 59 | cfg['discriminator_kwargs'] = { 60 | 'n_layers': 5, 61 | 'stride': 4, 62 | 'n_channel_multiplier': 64, 63 | 'weight_decay': 10.*cfg['weight_decay'] 64 | } 65 | 66 | # loss 67 | cfg['gan_loss_kwargs'] = { 68 | 'real_label': 0.9, # Positive labels 1 -> 0.9 69 | 'fake_label': 0.0 70 | } 71 | 72 | # learning 73 | cfg['initial_learning_rate'] = 1e-4 74 | cfg[ 'optimizer' ] = tf.train.AdamOptimizer 75 | cfg[ 'optimizer_kwargs' ] = {} 76 | cfg[ 'discriminator_learning_args' ] = { 77 | 'initial_learning_rate':1e-5, 78 | 'optimizer': tf.train.GradientDescentOptimizer, 79 | 'optimizer_kwargs': {} 80 | } 81 | def pwc(initial_lr, **kwargs): 82 | global_step = kwargs['global_step'] 83 | del kwargs['global_step'] 84 | return tf.train.piecewise_constant(global_step, **kwargs) 85 | cfg['learning_rate_schedule'] = pwc 86 | cfg['learning_rate_schedule_kwargs' ] = { 87 | 'boundaries': [np.int64(0), np.int64(80000)], # need to be int64 since global step is... 88 | 'values': [cfg['initial_learning_rate'], cfg['initial_learning_rate']/10] 89 | } 90 | # inputs 91 | cfg['input_dim'] = (256, 256) # (1024, 1024) 92 | cfg['input_num_channels'] = 3 93 | cfg['input_dtype'] = tf.float32 94 | cfg['input_domain_name'] = 'rgb' 95 | cfg['input_preprocessing_fn'] = load_ops.resize_rescale_image 96 | cfg['input_preprocessing_fn_kwargs'] = { 97 | 'new_dims': cfg['input_dim'], 98 | 'new_scale': [-1, 1] 99 | } 100 | 101 | # outputs 102 | cfg['target_num_channels'] = 3 103 | cfg['target_dim'] = (256, 256) # (1024, 1024) 104 | cfg['target_dtype'] = tf.float32 105 | cfg['target_domain_name'] = 'normal' 106 | cfg['target_preprocessing_fn'] = load_ops.resize_rescale_image 107 | cfg['target_preprocessing_fn_kwargs'] = { 108 | 'new_dims': cfg['target_dim'], 109 | 'new_scale': [-1, 1] 110 | } 111 | 112 | # masks 113 | cfg['depth_mask'] = True 114 | 115 | # input pipeline 116 | cfg['preprocess_fn'] = load_and_specify_preprocessors 117 | cfg['randomize'] = False 118 | cfg['num_read_threads'] = 300 119 | cfg['batch_size'] = 32 120 | cfg['inputs_queue_capacity'] = 4096 121 | 122 | # Checkpoints and summaries 123 | cfg['summary_save_every_secs'] = 300 124 | cfg['checkpoint_save_every_secs'] = 600 125 | RuntimeDeterminedEnviromentVars.register_dict( cfg ) # These will be loaded at runtime 126 | print_cfg( cfg, nopause=nopause ) 127 | return cfg 128 | 129 | def print_cfg( cfg, nopause=False ): 130 | print('-------------------------------------------------') 131 | print('config:') 132 | template = '\t{0:30}{1}' 133 | for key in sorted( cfg.keys() ): 134 | print(template.format(key, cfg[key])) 135 | print('-------------------------------------------------') 136 | 137 | if nopause: 138 | return 139 | raw_input('Press Enter to continue...') 140 | print('-------------------------------------------------') 141 | -------------------------------------------------------------------------------- /experiments/final/keypoint3d/config.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import functools 4 | import numpy as np 5 | import os 6 | import sys 7 | import tensorflow as tf 8 | 9 | sys.path.insert( 1, os.path.realpath( '../../../../models' ) ) 10 | sys.path.insert( 1, os.path.realpath( '../../../../lib' ) ) 11 | 12 | import data.load_ops as load_ops 13 | from data.load_ops import mask_if_channel_le 14 | from data.task_data_loading import load_and_specify_preprocessors 15 | from general_utils import RuntimeDeterminedEnviromentVars 16 | import models.architectures as architectures 17 | from models.gan_discriminators import pix2pix_discriminator 18 | from models.resnet_v1 import resnet_v1_50_16x16 19 | from models.sample_models import * 20 | from models.utils import leaky_relu 21 | 22 | def get_cfg( nopause=False ): 23 | cfg = {} 24 | 25 | # params 26 | cfg['num_epochs'] = 30 27 | cfg['model_path'] = None 28 | 29 | # logging 30 | config_dir = os.path.dirname(os.path.realpath( __file__ )) 31 | task_name = os.path.basename( config_dir ) 32 | 33 | # model 34 | cfg['model_type'] = architectures.EncoderDecoderWithCGAN 35 | cfg['n_g_steps_before_d'] = 1 36 | cfg['n_d_steps_after_g'] = 1 37 | cfg['init_g_steps'] = 25000 38 | cfg['l_norm_weight_prop'] = 0.996 39 | cfg['weight_decay'] = 2e-6 40 | cfg['instance_noise_sigma'] = 0.1 41 | cfg['instance_noise_anneal_fn'] = tf.train.inverse_time_decay 42 | cfg['instance_noise_anneal_fn_kwargs'] = { 43 | 'decay_rate': 0.2, 44 | 'decay_steps': 1000 45 | } 46 | 47 | cfg['encoder'] = resnet_encoder_fully_convolutional_16x16x8 48 | cfg['hidden_size'] = 1024 49 | cfg['encoder_kwargs'] = { 50 | 'resnet_build_fn' : resnet_v1_50_16x16, 51 | 'weight_decay': cfg['weight_decay'] 52 | } 53 | cfg['decoder'] = decoder_fc_15_layer_256_resolution_fully_convolutional_16x16x4 54 | cfg['decoder_kwargs'] = { 55 | 'activation_fn': leaky_relu( 0.2 ), 56 | 'weight_decay': cfg['weight_decay'] 57 | } 58 | cfg['discriminator'] = pix2pix_discriminator 59 | cfg['discriminator_kwargs'] = { 60 | 'n_layers': 5, 61 | 'stride': 4, 62 | 'n_channel_multiplier': 64, 63 | 'weight_decay': 10.*cfg['weight_decay'] 64 | } 65 | 66 | # loss 67 | cfg['gan_loss_kwargs'] = { 68 | 'real_label': 0.9, # Positive labels 1 -> 0.9 69 | 'fake_label': 0.0 70 | } 71 | 72 | # learning 73 | cfg['initial_learning_rate'] = 1e-4 74 | cfg[ 'optimizer' ] = tf.train.AdamOptimizer 75 | cfg[ 'optimizer_kwargs' ] = {} 76 | cfg[ 'discriminator_learning_args' ] = { 77 | 'initial_learning_rate':1e-5, 78 | 'optimizer': tf.train.GradientDescentOptimizer, 79 | 'optimizer_kwargs': {} 80 | } 81 | def pwc(initial_lr, **kwargs): 82 | global_step = kwargs['global_step'] 83 | del kwargs['global_step'] 84 | return tf.train.piecewise_constant(global_step, **kwargs) 85 | cfg['learning_rate_schedule'] = pwc 86 | cfg['learning_rate_schedule_kwargs' ] = { 87 | 'boundaries': [np.int64(0), np.int64(80000)], # need to be int64 since global step is... 88 | 'values': [cfg['initial_learning_rate'], cfg['initial_learning_rate']/10] 89 | } 90 | # inputs 91 | cfg['input_dim'] = (256, 256) # (1024, 1024) 92 | cfg['input_num_channels'] = 3 93 | cfg['input_dtype'] = tf.float32 94 | cfg['input_domain_name'] = 'rgb' 95 | cfg['input_preprocessing_fn'] = load_ops.resize_rescale_image 96 | cfg['input_preprocessing_fn_kwargs'] = { 97 | 'new_dims': cfg['input_dim'], 98 | 'new_scale': [-1, 1] 99 | } 100 | 101 | # outputs 102 | cfg['target_num_channels'] = 1 103 | cfg['target_dim'] = (256, 256) # (1024, 1024) 104 | cfg['target_dtype'] = tf.float32 105 | cfg['target_domain_name'] = 'keypoint' 106 | cfg['target_preprocessing_fn'] = load_ops.resize_rescale_image 107 | cfg['target_preprocessing_fn_kwargs'] = { 108 | 'new_dims': cfg['target_dim'], 109 | 'new_scale': [-1, 1] 110 | } 111 | 112 | # masks 113 | 114 | cfg['depth_mask'] = True 115 | 116 | # input pipeline 117 | cfg['preprocess_fn'] = load_and_specify_preprocessors 118 | cfg['randomize'] = False 119 | cfg['num_read_threads'] = 300 120 | cfg['batch_size'] = 32 121 | cfg['inputs_queue_capacity'] = 4096 122 | 123 | # Checkpoints and summaries 124 | cfg['summary_save_every_secs'] = 300 125 | cfg['checkpoint_save_every_secs'] = 600 126 | RuntimeDeterminedEnviromentVars.register_dict( cfg ) # These will be loaded at runtime 127 | print_cfg( cfg, nopause=nopause ) 128 | return cfg 129 | 130 | def print_cfg( cfg, nopause=False ): 131 | print('-------------------------------------------------') 132 | print('config:') 133 | template = '\t{0:30}{1}' 134 | for key in sorted( cfg.keys() ): 135 | print(template.format(key, cfg[key])) 136 | print('-------------------------------------------------') 137 | 138 | if nopause: 139 | return 140 | raw_input('Press Enter to continue...') 141 | print('-------------------------------------------------') 142 | -------------------------------------------------------------------------------- /experiments/final/colorization/config.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import functools 4 | import numpy as np 5 | import os 6 | import sys 7 | import tensorflow as tf 8 | 9 | sys.path.insert( 1, os.path.realpath( '../../models' ) ) 10 | sys.path.insert( 1, os.path.realpath( '../../lib' ) ) 11 | 12 | import data.load_ops as load_ops 13 | from data.load_ops import mask_if_channel_le 14 | from data.task_data_loading import load_and_specify_preprocessors 15 | from general_utils import RuntimeDeterminedEnviromentVars 16 | import models.architectures as architectures 17 | from models.gan_discriminators import pix2pix_discriminator 18 | from models.resnet_v1 import resnet_v1_50_16x16 19 | from models.sample_models import * 20 | from models.utils import leaky_relu 21 | 22 | def get_cfg( nopause=False ): 23 | cfg = {} 24 | 25 | # params 26 | cfg['num_epochs'] = 30 27 | cfg['model_path'] = None 28 | 29 | # logging 30 | config_dir = os.path.dirname(os.path.realpath( __file__ )) 31 | task_name = os.path.basename( config_dir ) 32 | 33 | # model 34 | cfg['model_type'] = architectures.EDSoftmaxRegenCGAN 35 | cfg['n_g_steps_before_d'] = 1 36 | cfg['n_d_steps_after_g'] = 1 37 | cfg['init_g_steps'] = 25000 38 | cfg['l_norm_weight_prop'] = 0.99 39 | cfg['weight_decay'] = 2e-6 40 | cfg['instance_noise_sigma'] = 0.1 41 | cfg['instance_noise_anneal_fn'] = tf.train.inverse_time_decay 42 | cfg['instance_noise_anneal_fn_kwargs'] = { 43 | 'decay_rate': 0.2, 44 | 'decay_steps': 1000 45 | } 46 | 47 | cfg['initial_learning_rate'] = 1e-4 48 | cfg['encoder'] = resnet_encoder_fully_convolutional_16x16x8 49 | cfg['hidden_size'] = 1024 50 | cfg['encoder_kwargs'] = { 51 | 'resnet_build_fn' : resnet_v1_50_16x16, 52 | 'weight_decay': cfg['weight_decay'] 53 | } 54 | cfg['decoder'] = decoder_fc_15_layer_64_resolution_16x16 55 | cfg['decoder_kwargs'] = { 56 | 'activation_fn': leaky_relu( 0.2 ), 57 | 'weight_decay': cfg['weight_decay'] 58 | } 59 | cfg['discriminator'] = pix2pix_discriminator 60 | cfg['discriminator_kwargs'] = { 61 | 'n_layers': 5, 62 | 'stride': 4, 63 | 'n_channel_multiplier': 64, 64 | 'weight_decay': 10.*cfg['weight_decay'] 65 | } 66 | def pwc(initial_lr, **kwargs): 67 | global_step = kwargs['global_step'] 68 | del kwargs['global_step'] 69 | return tf.train.piecewise_constant(global_step, **kwargs) 70 | cfg['learning_rate_schedule'] = pwc 71 | cfg['learning_rate_schedule_kwargs' ] = { 72 | 'boundaries': [np.int64(0), np.int64(80000)], # need to be int64 since global step is... 73 | 'values': [cfg['initial_learning_rate'], cfg['initial_learning_rate']/10] 74 | } 75 | # loss 76 | cfg['gan_loss_kwargs'] = { 77 | 'real_label': 0.9, # Positive labels 1 -> 0.9 78 | 'fake_label': 0.0 79 | } 80 | 81 | # learning 82 | cfg['initial_learning_rate'] = 1e-4 83 | cfg[ 'optimizer' ] = tf.train.AdamOptimizer 84 | cfg[ 'optimizer_kwargs' ] = {} 85 | cfg[ 'discriminator_learning_args' ] = { 86 | 'initial_learning_rate':1e-5, 87 | 'optimizer': tf.train.GradientDescentOptimizer, 88 | 'optimizer_kwargs': {} 89 | } 90 | 91 | # inputs 92 | cfg['input_dim'] = (256, 256) # (1024, 1024) 93 | cfg['input_num_channels'] = 1 94 | cfg['input_dtype'] = tf.float32 95 | cfg['input_domain_name'] = 'rgb' 96 | cfg['input_preprocessing_fn'] = load_ops.to_light 97 | cfg['input_preprocessing_fn_kwargs'] = { 98 | 'new_dims': cfg['input_dim'], 99 | 'new_scale': [-1, 1] 100 | } 101 | 102 | cfg['return_accuracy'] = True 103 | 104 | # outputs 105 | cfg['target_num_channels'] = 313 106 | cfg['target_dim'] = (64, 64) # (1024, 1024) 107 | cfg['target_dtype'] = tf.float32 108 | cfg['target_domain_name'] = 'rgb' 109 | cfg['target_preprocessing_fn'] = load_ops.ab_image_to_prob 110 | cfg['target_preprocessing_fn_kwargs'] = { 111 | 'new_dims': cfg['target_dim'] 112 | } 113 | cfg['mask_by_target_func'] = True 114 | 115 | # masks 116 | 117 | # input pipeline 118 | cfg['preprocess_fn'] = load_and_specify_preprocessors 119 | cfg['randomize'] = False 120 | cfg['num_read_threads'] = 300 121 | cfg['batch_size'] = 32 122 | cfg['inputs_queue_capacity'] = 4096 123 | 124 | # Checkpoints and summaries 125 | cfg['summary_save_every_secs'] = 300 126 | cfg['checkpoint_save_every_secs'] = 600 127 | 128 | RuntimeDeterminedEnviromentVars.register_dict( cfg ) # These will be loaded at runtime 129 | print_cfg( cfg, nopause=nopause ) 130 | return cfg 131 | 132 | def print_cfg( cfg, nopause=False ): 133 | print('-------------------------------------------------') 134 | print('config:') 135 | template = '\t{0:30}{1}' 136 | for key in sorted( cfg.keys() ): 137 | print(template.format(key, cfg[key])) 138 | print('-------------------------------------------------') 139 | 140 | if nopause: 141 | return 142 | raw_input('Press Enter to continue...') 143 | print('-------------------------------------------------') 144 | -------------------------------------------------------------------------------- /experiments/final/rgb2mist/config.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import functools 4 | import numpy as np 5 | import os 6 | import sys 7 | import tensorflow as tf 8 | 9 | sys.path.insert( 1, os.path.realpath( '../../../../models' ) ) 10 | sys.path.insert( 1, os.path.realpath( '../../../../lib' ) ) 11 | 12 | import data.load_ops as load_ops 13 | from data.load_ops import mask_if_channel_ge 14 | from data.task_data_loading import load_and_specify_preprocessors 15 | from general_utils import RuntimeDeterminedEnviromentVars 16 | import models.architectures as architectures 17 | from models.gan_discriminators import pix2pix_discriminator 18 | from models.resnet_v1 import resnet_v1_50_16x16 19 | from models.sample_models import * 20 | from models.utils import leaky_relu 21 | 22 | def get_cfg( nopause=False ): 23 | cfg = {} 24 | 25 | # params 26 | cfg['num_epochs'] = 30 27 | cfg['model_path'] = None 28 | 29 | # logging 30 | config_dir = os.path.dirname(os.path.realpath( __file__ )) 31 | task_name = os.path.basename( config_dir ) 32 | 33 | # model 34 | cfg['model_type'] = architectures.EncoderDecoderWithCGAN 35 | cfg['n_g_steps_before_d'] = 1 36 | cfg['n_d_steps_after_g'] = 1 37 | cfg['init_g_steps'] = 25000 38 | cfg['l_norm_weight_prop'] = 0.996 39 | cfg['weight_decay'] = 2e-6 40 | cfg['instance_noise_sigma'] = 0.03 41 | cfg['instance_noise_anneal_fn'] = tf.train.inverse_time_decay 42 | cfg['instance_noise_anneal_fn_kwargs'] = { 43 | 'decay_rate': 0.2, 44 | 'decay_steps': 1000 45 | } 46 | 47 | cfg['encoder'] = resnet_encoder_fully_convolutional_16x16x8 48 | cfg['hidden_size'] = 1024 49 | cfg['encoder_kwargs'] = { 50 | 'resnet_build_fn' : resnet_v1_50_16x16, 51 | 'weight_decay': cfg['weight_decay'] 52 | } 53 | cfg['decoder'] = decoder_fc_15_layer_256_resolution_fully_convolutional_16x16x4 54 | cfg['decoder_kwargs'] = { 55 | 'activation_fn': leaky_relu( 0.2 ), 56 | 'weight_decay': cfg['weight_decay'] 57 | } 58 | cfg['discriminator'] = pix2pix_discriminator 59 | cfg['discriminator_kwargs'] = { 60 | 'n_layers': 5, 61 | 'stride': 4, 62 | 'n_channel_multiplier': 64, 63 | 'weight_decay': 10.*cfg['weight_decay'] 64 | } 65 | 66 | # loss 67 | cfg['gan_loss_kwargs'] = { 68 | 'real_label': 0.9, # Positive labels 1 -> 0.9 69 | 'fake_label': 0.0 70 | } 71 | 72 | # learning 73 | cfg['initial_learning_rate'] = 1e-4 74 | cfg[ 'optimizer' ] = tf.train.AdamOptimizer 75 | cfg[ 'optimizer_kwargs' ] = {} 76 | cfg[ 'discriminator_learning_args' ] = { 77 | 'initial_learning_rate':1e-5, 78 | 'optimizer': tf.train.GradientDescentOptimizer, 79 | 'optimizer_kwargs': {} 80 | } 81 | def pwc(initial_lr, **kwargs): 82 | global_step = kwargs['global_step'] 83 | del kwargs['global_step'] 84 | return tf.train.piecewise_constant(global_step, **kwargs) 85 | cfg['learning_rate_schedule'] = pwc 86 | cfg['learning_rate_schedule_kwargs' ] = { 87 | 'boundaries': [np.int64(0), np.int64(80000)], # need to be int64 since global step is... 88 | 'values': [cfg['initial_learning_rate'], cfg['initial_learning_rate']/10] 89 | } 90 | # inputs 91 | cfg['input_dim'] = (256, 256) # (1024, 1024) 92 | cfg['input_num_channels'] = 3 93 | cfg['input_dtype'] = tf.float32 94 | cfg['input_domain_name'] = 'rgb' 95 | cfg['input_preprocessing_fn'] = load_ops.resize_rescale_image 96 | cfg['input_preprocessing_fn_kwargs'] = { 97 | 'new_dims': cfg['input_dim'], 98 | 'new_scale': [-1, 1] 99 | } 100 | 101 | # outputs 102 | cfg['target_num_channels'] = 1 103 | cfg['target_dim'] = (256, 256) # (1024, 1024) 104 | cfg['target_dtype'] = tf.float32 105 | cfg['target_domain_name'] = 'mist' 106 | cfg['target_preprocessing_fn'] = load_ops.resize_and_rescale_image_log 107 | cfg['target_preprocessing_fn_kwargs'] = { 108 | 'new_dims': cfg['target_dim'], 109 | 'offset': 1., 110 | 'normalizer': np.log( 2. ** 16.0 ) 111 | } 112 | 113 | # masks 114 | cfg['depth_mask'] = True 115 | 116 | # input pipeline 117 | cfg['preprocess_fn'] = load_and_specify_preprocessors 118 | cfg['randomize'] = False 119 | cfg['num_read_threads'] = 300 120 | cfg['batch_size'] = 32 121 | cfg['inputs_queue_capacity'] = 4096 122 | 123 | # Checkpoints and summaries 124 | cfg['summary_save_every_secs'] = 300 125 | cfg['checkpoint_save_every_secs'] = 600 126 | RuntimeDeterminedEnviromentVars.register_dict( cfg ) # These will be loaded at runtime 127 | print_cfg( cfg, nopause=nopause ) 128 | return cfg 129 | 130 | def print_cfg( cfg, nopause=False ): 131 | print('-------------------------------------------------') 132 | print('config:') 133 | template = '\t{0:30}{1}' 134 | for key in sorted( cfg.keys() ): 135 | print(template.format(key, cfg[key])) 136 | print('-------------------------------------------------') 137 | 138 | if nopause: 139 | return 140 | raw_input('Press Enter to continue...') 141 | print('-------------------------------------------------') 142 | -------------------------------------------------------------------------------- /experiments/final/keypoint2d/config.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import functools 4 | import numpy as np 5 | import os 6 | import sys 7 | import tensorflow as tf 8 | 9 | sys.path.insert( 1, os.path.realpath( '../../../../models' ) ) 10 | sys.path.insert( 1, os.path.realpath( '../../../../lib' ) ) 11 | 12 | import data.load_ops as load_ops 13 | from data.load_ops import mask_if_channel_le 14 | from data.task_data_loading import load_and_specify_preprocessors 15 | from general_utils import RuntimeDeterminedEnviromentVars 16 | import models.architectures as architectures 17 | from models.gan_discriminators import pix2pix_discriminator 18 | from models.resnet_v1 import resnet_v1_50_16x16 19 | from models.sample_models import * 20 | from models.utils import leaky_relu 21 | 22 | def get_cfg( nopause=False ): 23 | cfg = {} 24 | 25 | # params 26 | cfg['num_epochs'] = 30 27 | cfg['model_path'] = None 28 | 29 | # logging 30 | config_dir = os.path.dirname(os.path.realpath( __file__ )) 31 | task_name = os.path.basename( config_dir ) 32 | 33 | # model 34 | cfg['model_type'] = architectures.EncoderDecoderWithCGAN 35 | cfg['n_g_steps_before_d'] = 1 36 | cfg['n_d_steps_after_g'] = 1 37 | cfg['init_g_steps'] = 25000 38 | cfg['l_norm_weight_prop'] = 0.996 39 | cfg['weight_decay'] = 2e-6 40 | cfg['instance_noise_sigma'] = 0.1 41 | cfg['instance_noise_anneal_fn'] = tf.train.inverse_time_decay 42 | cfg['instance_noise_anneal_fn_kwargs'] = { 43 | 'decay_rate': 0.2, 44 | 'decay_steps': 1000 45 | } 46 | 47 | cfg['encoder'] = resnet_encoder_fully_convolutional_16x16x8 48 | cfg['hidden_size'] = 1024 49 | cfg['encoder_kwargs'] = { 50 | 'resnet_build_fn' : resnet_v1_50_16x16, 51 | 'weight_decay': cfg['weight_decay'] 52 | } 53 | cfg['decoder'] = decoder_fc_15_layer_256_resolution_fully_convolutional_16x16x4 54 | cfg['decoder_kwargs'] = { 55 | 'activation_fn': leaky_relu( 0.2 ), 56 | 'weight_decay': cfg['weight_decay'] 57 | } 58 | cfg['discriminator'] = pix2pix_discriminator 59 | cfg['discriminator_kwargs'] = { 60 | 'n_layers': 5, 61 | 'stride': 4, 62 | 'n_channel_multiplier': 64, 63 | 'weight_decay': 10.*cfg['weight_decay'] 64 | } 65 | 66 | # loss 67 | cfg['gan_loss_kwargs'] = { 68 | 'real_label': 0.9, # Positive labels 1 -> 0.9 69 | 'fake_label': 0.0 70 | } 71 | 72 | # learning 73 | cfg['initial_learning_rate'] = 1e-4 74 | cfg[ 'optimizer' ] = tf.train.AdamOptimizer 75 | cfg[ 'optimizer_kwargs' ] = {} 76 | cfg[ 'discriminator_learning_args' ] = { 77 | 'initial_learning_rate':1e-5, 78 | 'optimizer': tf.train.GradientDescentOptimizer, 79 | 'optimizer_kwargs': {} 80 | } 81 | def pwc(initial_lr, **kwargs): 82 | global_step = kwargs['global_step'] 83 | del kwargs['global_step'] 84 | return tf.train.piecewise_constant(global_step, **kwargs) 85 | cfg['learning_rate_schedule'] = pwc 86 | cfg['learning_rate_schedule_kwargs' ] = { 87 | 'boundaries': [np.int64(0), np.int64(80000)], # need to be int64 since global step is... 88 | 'values': [cfg['initial_learning_rate'], cfg['initial_learning_rate']/10] 89 | } 90 | # inputs 91 | cfg['input_dim'] = (256, 256) # (1024, 1024) 92 | cfg['input_num_channels'] = 3 93 | cfg['input_dtype'] = tf.float32 94 | cfg['input_domain_name'] = 'rgb' 95 | cfg['input_preprocessing_fn'] = load_ops.resize_rescale_image 96 | cfg['input_preprocessing_fn_kwargs'] = { 97 | 'new_dims': cfg['input_dim'], 98 | 'new_scale': [-1, 1] 99 | } 100 | 101 | # outputs 102 | cfg['target_num_channels'] = 1 103 | cfg['target_dim'] = (256, 256) # (1024, 1024) 104 | cfg['target_dtype'] = tf.float32 105 | cfg['target_domain_name'] = 'keypoint2d' 106 | cfg['target_preprocessing_fn'] = load_ops.resize_rescale_image 107 | cfg['target_preprocessing_fn_kwargs'] = { 108 | 'new_dims': cfg['target_dim'], 109 | 'new_scale': [-1, 1], 110 | 'current_scale': [0.0, 0.005] 111 | } 112 | 113 | # masks 114 | 115 | #cfg['depth_mask'] = True 116 | 117 | # input pipeline 118 | cfg['preprocess_fn'] = load_and_specify_preprocessors 119 | cfg['randomize'] = False 120 | cfg['num_read_threads'] = 300 121 | cfg['batch_size'] = 32 122 | cfg['inputs_queue_capacity'] = 4096 123 | 124 | # Checkpoints and summaries 125 | cfg['summary_save_every_secs'] = 300 126 | cfg['checkpoint_save_every_secs'] = 600 127 | RuntimeDeterminedEnviromentVars.register_dict( cfg ) # These will be loaded at runtime 128 | print_cfg( cfg, nopause=nopause ) 129 | return cfg 130 | 131 | def print_cfg( cfg, nopause=False ): 132 | print('-------------------------------------------------') 133 | print('config:') 134 | template = '\t{0:30}{1}' 135 | for key in sorted( cfg.keys() ): 136 | print(template.format(key, cfg[key])) 137 | print('-------------------------------------------------') 138 | 139 | if nopause: 140 | return 141 | raw_input('Press Enter to continue...') 142 | print('-------------------------------------------------') 143 | -------------------------------------------------------------------------------- /experiments/final/edge3d/config.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import functools 4 | import numpy as np 5 | import os 6 | import sys 7 | import tensorflow as tf 8 | 9 | sys.path.insert( 1, os.path.realpath( '../../../../models' ) ) 10 | sys.path.insert( 1, os.path.realpath( '../../../../lib' ) ) 11 | 12 | import data.load_ops as load_ops 13 | from data.load_ops import mask_if_channel_le 14 | from data.task_data_loading import load_and_specify_preprocessors 15 | from general_utils import RuntimeDeterminedEnviromentVars 16 | import models.architectures as architectures 17 | from models.gan_discriminators import pix2pix_discriminator 18 | from models.resnet_v1 import resnet_v1_50_16x16 19 | from models.sample_models import * 20 | from models.utils import leaky_relu 21 | 22 | def get_cfg( nopause=False ): 23 | cfg = {} 24 | 25 | # params 26 | cfg['num_epochs'] = 30 27 | cfg['model_path'] = None 28 | 29 | # logging 30 | config_dir = os.path.dirname(os.path.realpath( __file__ )) 31 | task_name = os.path.basename( config_dir ) 32 | 33 | # model 34 | cfg['model_type'] = architectures.EncoderDecoderWithCGAN 35 | cfg['n_g_steps_before_d'] = 1 36 | cfg['n_d_steps_after_g'] = 1 37 | cfg['init_g_steps'] = 25000 38 | cfg['l_norm_weight_prop'] = 0.996 39 | cfg['weight_decay'] = 2e-6 40 | cfg['instance_noise_sigma'] = 0.1 41 | cfg['instance_noise_anneal_fn'] = tf.train.inverse_time_decay 42 | cfg['instance_noise_anneal_fn_kwargs'] = { 43 | 'decay_rate': 0.2, 44 | 'decay_steps': 1000 45 | } 46 | 47 | cfg['encoder'] = resnet_encoder_fully_convolutional_16x16x8 48 | cfg['hidden_size'] = 1024 49 | cfg['encoder_kwargs'] = { 50 | 'resnet_build_fn' : resnet_v1_50_16x16, 51 | 'weight_decay': cfg['weight_decay'] 52 | } 53 | cfg['decoder'] = decoder_fc_15_layer_256_resolution_fully_convolutional_16x16x4 54 | cfg['decoder_kwargs'] = { 55 | 'activation_fn': leaky_relu( 0.2 ), 56 | 'weight_decay': cfg['weight_decay'] 57 | } 58 | cfg['discriminator'] = pix2pix_discriminator 59 | cfg['discriminator_kwargs'] = { 60 | 'n_layers': 5, 61 | 'stride': 4, 62 | 'n_channel_multiplier': 64, 63 | 'weight_decay': 10.*cfg['weight_decay'] 64 | } 65 | 66 | # loss 67 | cfg['gan_loss_kwargs'] = { 68 | 'real_label': 0.9, # Positive labels 1 -> 0.9 69 | 'fake_label': 0.0 70 | } 71 | 72 | # learning 73 | cfg['initial_learning_rate'] = 1e-4 74 | cfg[ 'optimizer' ] = tf.train.AdamOptimizer 75 | cfg[ 'optimizer_kwargs' ] = {} 76 | cfg[ 'discriminator_learning_args' ] = { 77 | 'initial_learning_rate':1e-5, 78 | 'optimizer': tf.train.GradientDescentOptimizer, 79 | 'optimizer_kwargs': {} 80 | } 81 | def pwc(initial_lr, **kwargs): 82 | global_step = kwargs['global_step'] 83 | del kwargs['global_step'] 84 | return tf.train.piecewise_constant(global_step, **kwargs) 85 | cfg['learning_rate_schedule'] = pwc 86 | cfg['learning_rate_schedule_kwargs' ] = { 87 | 'boundaries': [np.int64(0), np.int64(80000)], # need to be int64 since global step is... 88 | 'values': [cfg['initial_learning_rate'], cfg['initial_learning_rate']/10] 89 | } 90 | # inputs 91 | cfg['input_dim'] = (256, 256) # (1024, 1024) 92 | cfg['input_num_channels'] = 3 93 | cfg['input_dtype'] = tf.float32 94 | cfg['input_domain_name'] = 'rgb' 95 | cfg['input_preprocessing_fn'] = load_ops.resize_rescale_image 96 | cfg['input_preprocessing_fn_kwargs'] = { 97 | 'new_dims': cfg['input_dim'], 98 | 'new_scale': [-1, 1] 99 | } 100 | 101 | # outputs 102 | cfg['target_num_channels'] = 1 103 | cfg['target_dim'] = (256, 256) # (1024, 1024) 104 | cfg['target_dtype'] = tf.float32 105 | cfg['target_domain_name'] = 'edge' 106 | cfg['target_preprocessing_fn'] = load_ops.resize_rescale_image_gaussian_blur 107 | cfg['target_preprocessing_fn_kwargs'] = { 108 | 'new_dims': cfg['target_dim'], 109 | 'new_scale': [-1, 1], 110 | 'current_scale' : [0.0, 0.00625], 111 | 'no_clip' : True 112 | } 113 | 114 | cfg['depth_mask'] = True 115 | 116 | # input pipeline 117 | cfg['preprocess_fn'] = load_and_specify_preprocessors 118 | cfg['randomize'] = False 119 | cfg['num_read_threads'] =300 120 | cfg['batch_size'] = 32 121 | cfg['inputs_queue_capacity'] = 4096 122 | 123 | # Checkpoints and summaries 124 | cfg['summary_save_every_secs'] = 300 125 | cfg['checkpoint_save_every_secs'] = 600 126 | RuntimeDeterminedEnviromentVars.register_dict( cfg ) # These will be loaded at runtime 127 | print_cfg( cfg, nopause=nopause ) 128 | return cfg 129 | 130 | def print_cfg( cfg, nopause=False ): 131 | print('-------------------------------------------------') 132 | print('config:') 133 | template = '\t{0:30}{1}' 134 | for key in sorted( cfg.keys() ): 135 | print(template.format(key, cfg[key])) 136 | print('-------------------------------------------------') 137 | 138 | if nopause: 139 | return 140 | raw_input('Press Enter to continue...') 141 | print('-------------------------------------------------') 142 | -------------------------------------------------------------------------------- /lib/models/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import os 4 | import tensorflow as tf 5 | import tensorflow.contrib.slim as slim 6 | 7 | # ------layers------- 8 | @slim.add_arg_scope 9 | def add_conv_layer( *args, **kwargs ): 10 | # net, out_channels, kernel, stride, scope ): 11 | net = slim.conv2d( *args, **kwargs ) 12 | tf.add_to_collection( tf.GraphKeys.ACTIVATIONS, net ) 13 | #if 'scope' in kwargs: 14 | # print( '\t\t{scope}'.format( scope=kwargs['scope'] ), net.get_shape() ) 15 | return net 16 | 17 | @slim.add_arg_scope 18 | def add_conv_transpose_layer( *args, **kwargs ): 19 | net = slim.conv2d_transpose( *args, **kwargs ) 20 | tf.add_to_collection( tf.GraphKeys.ACTIVATIONS, net ) 21 | #if 'scope' in kwargs: 22 | # print( '\t\t{scope}'.format( scope=kwargs['scope'] ), net.get_shape() ) 23 | return net 24 | 25 | @slim.add_arg_scope 26 | def add_flatten_layer( net, batch_size, scope ): 27 | net = tf.reshape(net, shape=[batch_size, -1], 28 | name=scope) 29 | #print('\t\t{scope}'.format( scope=scope ), net.get_shape() ) 30 | return net 31 | 32 | @slim.add_arg_scope 33 | def add_gaussian_noise_layer( input_layer, std, scope ): 34 | with tf.variable_scope( scope ) as sc: 35 | noise = tf.random_normal( shape=input_layer.get_shape(), mean=0.0, stddev=std, 36 | dtype=tf.float32 ) 37 | #print('\t\t{scope}'.format( scope=scope ), noise.get_shape() ) 38 | return input_layer + noise 39 | 40 | @slim.add_arg_scope 41 | def add_reshape_layer( net, shape, scope ): 42 | net = tf.reshape(net, shape=shape, name=scope) 43 | #print('\t\t{scope}'.format( scope=scope ), net.get_shape() ) 44 | return net 45 | 46 | @slim.add_arg_scope 47 | def add_squeeze_layer( net, scope ): 48 | net = tf.squeeze(net, squeeze_dims=[1,2] , name=scope) # tf 0.12.0rc: squeeze_dims -> axis 49 | #print('\t\t{scope}'.format( scope=scope ), net.get_shape() ) 50 | return net 51 | 52 | @slim.add_arg_scope 53 | def add_conv_fc_layer( *args, **kwargs ): 54 | ''' 55 | Sets up a FC-Conv layer using the args passed in 56 | ''' 57 | net = args[ 0 ] 58 | pre_fc_shape = [ int( x ) for x in net.get_shape() ] 59 | kwargs[ 'kernel_size' ] = pre_fc_shape[1:3] 60 | kwargs[ 'stride' ] = 1 61 | kwargs[ 'padding' ] = 'VALID' 62 | net = add_conv_layer( *args, **kwargs ) 63 | net = add_squeeze_layer( net, scope='squeeze' ) 64 | tf.add_to_collection( tf.GraphKeys.ACTIVATIONS, net ) 65 | return net 66 | 67 | @slim.add_arg_scope 68 | def add_fc_with_dropout_layer( net, is_training, num_outputs, dropout=0.8, activation_fn=None, reuse=None, scope=None ): 69 | ''' 70 | Sets up a FC layer with dropout using the args passed in 71 | ''' 72 | #print(activation_fn) 73 | net = slim.fully_connected(net, num_outputs, 74 | activation_fn=activation_fn, 75 | reuse=reuse, 76 | scope=scope) 77 | net = slim.dropout(net, keep_prob=dropout, is_training=is_training) 78 | tf.add_to_collection(tf.GraphKeys.ACTIVATIONS, net) 79 | #if 'scope' is not None: 80 | # print( '\t\t{scope}'.format( scope=scope ), net.get_shape() ) 81 | return net 82 | 83 | 84 | @slim.add_arg_scope 85 | def add_fc_layer( net, is_training, num_outputs, activation_fn=None, reuse=None, scope=None ): 86 | ''' 87 | Sets up a FC layer using the args passed in 88 | ''' 89 | net = slim.fully_connected(net, num_outputs, 90 | activation_fn=activation_fn, 91 | reuse=reuse, 92 | scope=scope) 93 | tf.add_to_collection(tf.GraphKeys.ACTIVATIONS, net) 94 | #if 'scope' is not None: 95 | # print( '\t\t{scope}'.format( scope=scope ), net.get_shape() ) 96 | return net 97 | 98 | 99 | # ------activation fns------- 100 | # These are not activation functions because they have additional parameters. 101 | # However, they return an activation function with the specified parameters 102 | # 'baked in'. 103 | def leaky_relu( leak=0.2, name='leaky_relu' ): 104 | return lambda x: tf.maximum( x, leak * x, name='leaky_relu' ) 105 | 106 | 107 | # ------normalization fns------- 108 | 109 | # ------utils from tf 0.12.0rc------- 110 | if tf.__version__ == '0.10.0': 111 | print( "Building for Tensorflow version {0}".format( tf.__version__ ) ) 112 | def convert_collection_to_dict(collection): 113 | """Returns a dict of Tensors using get_tensor_alias as key. 114 | Args: 115 | collection: A collection. 116 | Returns: 117 | A dictionary of {get_tensor_alias(tensor): tensor} 118 | """ 119 | return {get_tensor_alias(t): t for t in tf.get_collection(collection)} 120 | 121 | def get_tensor_alias(tensor): 122 | """Given a tensor gather its alias, its op.name or its name. 123 | If the tensor does not have an alias it would default to its name. 124 | Args: 125 | tensor: A `Tensor`. 126 | Returns: 127 | A string with the alias of the tensor. 128 | """ 129 | if hasattr(tensor, 'alias'): 130 | alias = tensor.alias 131 | else: 132 | if tensor.name[-2:] == ':0': 133 | # Use op.name for tensor ending in :0 134 | alias = tensor.op.name 135 | else: 136 | alias = tensor.name 137 | return alias 138 | else: 139 | print( "Building for Tensorflow version {0}".format( tf.__version__ ) ) 140 | convert_collection_to_dict = slim.utils.convert_collection_to_dict 141 | -------------------------------------------------------------------------------- /experiments/final/rgb2depth/config.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import functools 4 | import numpy as np 5 | import os 6 | import sys 7 | import tensorflow as tf 8 | 9 | sys.path.insert( 1, os.path.realpath( '../../../../models' ) ) 10 | sys.path.insert( 1, os.path.realpath( '../../../../lib' ) ) 11 | 12 | import data.load_ops as load_ops 13 | from data.load_ops import mask_if_channel_ge 14 | from data.task_data_loading import load_and_specify_preprocessors 15 | from general_utils import RuntimeDeterminedEnviromentVars 16 | import models.architectures as architectures 17 | from models.gan_discriminators import pix2pix_discriminator 18 | from models.resnet_v1 import resnet_v1_50_16x16 19 | from models.sample_models import * 20 | from models.utils import leaky_relu 21 | 22 | def get_cfg( nopause=False ): 23 | cfg = {} 24 | 25 | # params 26 | cfg['num_epochs'] = 30 27 | cfg['model_path'] = None 28 | 29 | # logging 30 | config_dir = os.path.dirname(os.path.realpath( __file__ )) 31 | task_name = os.path.basename( config_dir ) 32 | 33 | # model 34 | cfg['model_type'] = architectures.EncoderDecoderWithCGAN 35 | cfg['n_g_steps_before_d'] = 1 36 | cfg['n_d_steps_after_g'] = 1 37 | cfg['init_g_steps'] = 25000 38 | cfg['l_norm_weight_prop'] = 0.996 39 | cfg['weight_decay'] = 2e-6 40 | cfg['instance_noise_sigma'] = 0.03 41 | cfg['instance_noise_anneal_fn'] = tf.train.inverse_time_decay 42 | cfg['instance_noise_anneal_fn_kwargs'] = { 43 | 'decay_rate': 0.2, 44 | 'decay_steps': 1000 45 | } 46 | 47 | cfg['encoder'] = resnet_encoder_fully_convolutional_16x16x8 48 | cfg['hidden_size'] = 1024 49 | cfg['encoder_kwargs'] = { 50 | 'resnet_build_fn' : resnet_v1_50_16x16, 51 | 'weight_decay': cfg['weight_decay'] 52 | } 53 | cfg['decoder'] = decoder_fc_15_layer_256_resolution_fully_convolutional_16x16x4 54 | cfg['decoder_kwargs'] = { 55 | 'activation_fn': leaky_relu( 0.2 ), 56 | 'weight_decay': cfg['weight_decay'] 57 | } 58 | cfg['discriminator'] = pix2pix_discriminator 59 | cfg['discriminator_kwargs'] = { 60 | 'n_layers': 5, 61 | 'stride': 4, 62 | 'n_channel_multiplier': 64, 63 | 'weight_decay': 10.*cfg['weight_decay'] 64 | } 65 | 66 | # loss 67 | cfg['gan_loss_kwargs'] = { 68 | 'real_label': 0.9, # Positive labels 1 -> 0.9 69 | 'fake_label': 0.0 70 | } 71 | 72 | # learning 73 | cfg['initial_learning_rate'] = 1e-4 74 | cfg[ 'optimizer' ] = tf.train.AdamOptimizer 75 | cfg[ 'optimizer_kwargs' ] = {} 76 | cfg[ 'discriminator_learning_args' ] = { 77 | 'initial_learning_rate':1e-5, 78 | 'optimizer': tf.train.GradientDescentOptimizer, 79 | 'optimizer_kwargs': {} 80 | } 81 | def pwc(initial_lr, **kwargs): 82 | global_step = kwargs['global_step'] 83 | del kwargs['global_step'] 84 | return tf.train.piecewise_constant(global_step, **kwargs) 85 | cfg['learning_rate_schedule'] = pwc 86 | cfg['learning_rate_schedule_kwargs' ] = { 87 | 'boundaries': [np.int64(0), np.int64(80000)], # need to be int64 since global step is... 88 | 'values': [cfg['initial_learning_rate'], cfg['initial_learning_rate']/10] 89 | } 90 | # inputs 91 | cfg['input_dim'] = (256, 256) # (1024, 1024) 92 | cfg['input_num_channels'] = 3 93 | cfg['input_dtype'] = tf.float32 94 | cfg['input_domain_name'] = 'rgb' 95 | cfg['input_preprocessing_fn'] = load_ops.resize_rescale_image 96 | cfg['input_preprocessing_fn_kwargs'] = { 97 | 'new_dims': cfg['input_dim'], 98 | 'new_scale': [-1, 1] 99 | } 100 | 101 | # outputs 102 | cfg['target_num_channels'] = 1 103 | cfg['target_dim'] = (256, 256) # (1024, 1024) 104 | cfg['target_dtype'] = tf.float32 105 | cfg['target_domain_name'] = 'depth' 106 | cfg['target_preprocessing_fn'] = load_ops.resize_and_rescale_image_log 107 | cfg['target_preprocessing_fn_kwargs'] = { 108 | 'new_dims': cfg['target_dim'], 109 | 'offset': 1., 110 | 'normalizer': np.log( 2. ** 16.0 ) 111 | } 112 | 113 | # masks 114 | cfg['mask_fn'] = mask_if_channel_ge # given target image as input 115 | cfg['mask_fn_kwargs'] = { 116 | 'img': '', 117 | 'channel_idx': 0, 118 | 'threshhold': 64500, # roughly max value - 1000. This margin is for interpolation errors 119 | 'broadcast_to_dim': cfg['target_num_channels'] 120 | } 121 | 122 | #cfg['depth_mask'] = True 123 | 124 | # input pipeline 125 | cfg['preprocess_fn'] = load_and_specify_preprocessors 126 | cfg['randomize'] = False 127 | cfg['num_read_threads'] = 300 128 | cfg['batch_size'] = 32 129 | cfg['inputs_queue_capacity'] = 4096 130 | 131 | # Checkpoints and summaries 132 | cfg['summary_save_every_secs'] = 300 133 | cfg['checkpoint_save_every_secs'] = 600 134 | RuntimeDeterminedEnviromentVars.register_dict( cfg ) # These will be loaded at runtime 135 | print_cfg( cfg, nopause=nopause ) 136 | return cfg 137 | 138 | def print_cfg( cfg, nopause=False ): 139 | print('-------------------------------------------------') 140 | print('config:') 141 | template = '\t{0:30}{1}' 142 | for key in sorted( cfg.keys() ): 143 | print(template.format(key, cfg[key])) 144 | print('-------------------------------------------------') 145 | 146 | if nopause: 147 | return 148 | raw_input('Press Enter to continue...') 149 | print('-------------------------------------------------') 150 | -------------------------------------------------------------------------------- /lib/models/encoder_decoder_segmentation_semantic.py: -------------------------------------------------------------------------------- 1 | '''Segmentation encoder-decoder model 2 | 3 | Assumes there is one input and one output. 4 | 5 | The output is an embedding vector for each pixel, the size of the embedding vector is 6 | the number of channels for target specified by the config file, aka 'target_num_channel'. 7 | 8 | The target is a vector of pixel location & segmentation ID. The number of pixels is specified 9 | by 'num_pixels' in config file. 10 | 11 | Model-specific config.py options: (inherits from models.base_net) 12 | 'batch_size': An int. The number of images to use in a batch 13 | 'hidden_size': An int. The number of hidden neurons to use. 14 | 'target_num_channels': The number of channels to output from the decoder 15 | 'num_pixels': The number of pixels sampled for metric learning. 16 | 17 | Encoder: 18 | 'encoder': A function that will build take 'input_placeholder', 'is_training', 19 | 'hidden_size', and returns a representation. 20 | -'encoder_kwargs': A Dict of all the args to pass to 'encoder'. The Dict should 21 | not include the mandatory arguments given above. ({}) 22 | Decoder: 23 | 'decoder': A function that will build take 'encoder_output', 'is_training', 24 | 'num_output_channels' (value from 'target_num_channels'), and returns a 25 | batch of representation vectors. 26 | -'decoder_kwargs': A Dict of all the args to pass to 'decoder'. The Dict should 27 | not include the mandatory arguments given above. ({}) 28 | 29 | ''' 30 | from __future__ import absolute_import, division, print_function 31 | 32 | from models.encoder_decoder_segmentation import SegmentationED 33 | import losses.all as losses_lib 34 | import tensorflow as tf 35 | import tensorflow.contrib.slim as slim 36 | import numpy as np 37 | import pdb 38 | import optimizers.train_steps as train_steps 39 | import optimizers.ops as optimize 40 | from models.utils import add_fc_layer 41 | from functools import partial 42 | 43 | class SemSegED(SegmentationED): 44 | ''' Segmentation encoder decoder model 45 | Encodes an input into a low-dimensional representation and reconstructs 46 | the input from the low-dimensional representation. Uses metric loss. 47 | 48 | Metric loss follows the function of paper: Semantic Instance Segmentation via Deep Metric Learning 49 | (Equation 1) 50 | 51 | Assumes inputs are scaled to [0, 1] (which will be rescaled to [-1, 1]. 52 | ''' 53 | 54 | def __init__(self, global_step, cfg): 55 | ''' 56 | Args: 57 | cfg: Configuration. 58 | ''' 59 | super(SemSegED, self).__init__(global_step, cfg) 60 | if 'hidden_size' not in cfg: 61 | raise ValueError( "config.py for encoder-decoder must specify 'hidden_size'" ) 62 | if 'num_pixels' not in cfg: 63 | raise ValueError( "config.py for segmentation must specify 'num_pixels'(how many pixels to sample)") 64 | 65 | self.batch_size = cfg['batch_size'] 66 | self.num_pixels = cfg['num_pixels'] 67 | 68 | idxes = np.asarray([range(self.batch_size)] * self.num_pixels).T 69 | self.batch_index_slice = tf.cast(tf.stack(idxes), cfg['target_dtype']) 70 | 71 | self.input_type = cfg['input_dtype'] 72 | 73 | self.cfg = cfg 74 | 75 | 76 | def get_losses( self, output_vectors, idx_segments, masks ): 77 | '''Returns the metric loss for 'num_pixels' embedding vectors. 78 | 79 | Args: 80 | output_imgs: Tensor of images output by the decoder. 81 | desired_imgs: Tensor of target images to be output by the decoder. 82 | masks: Tensor of masks to be applied when computing sum of squares 83 | loss. 84 | 85 | Returns: 86 | losses: list of tensors representing each loss component 87 | ''' 88 | print('setting up losses...') 89 | self.output_images = output_vectors 90 | self.target_images = idx_segments 91 | self.masks = masks 92 | 93 | with tf.variable_scope('losses'): 94 | last_axis = 2 95 | fir, sec, seg_id, weights = tf.unstack(idx_segments, axis=last_axis) 96 | 97 | idxes = tf.stack([self.batch_index_slice, fir, sec], axis=last_axis) 98 | self.embed = tf.gather_nd( output_vectors, idxes ) 99 | embed = self.embed 100 | self.class_logits = add_fc_layer(output_vectors, self.is_training, self.cfg['num_classes']) 101 | self.selected_logits = tf.gather_nd( self.class_logits, idxes ) 102 | class_logits_flat = tf.reshape(self.selected_logits, [-1, self.cfg['num_classes']]) 103 | seg_id_flat = tf.reshape(seg_id, [-1]) 104 | weights_flat = tf.reshape(weights, [-1]) 105 | temp = losses_lib.get_sparse_softmax_loss( 106 | class_logits_flat, seg_id_flat, weights_flat) 107 | self.softmax_loss = tf.reduce_mean(temp) 108 | tf.add_to_collection(tf.GraphKeys.LOSSES, self.softmax_loss) 109 | 110 | square = tf.reduce_sum( embed*embed, axis=-1 ) 111 | square_t = tf.expand_dims(square, axis=-1) 112 | square = tf.expand_dims(square, axis=1) 113 | 114 | pairwise_dist = square - 2 * tf.matmul(embed, tf.transpose(embed, perm=[0,2,1])) + square_t 115 | pairwise_dist = tf.clip_by_value( pairwise_dist, 0, 80) 116 | #pairwise_dist = 0 - pairwise_dist 117 | self.pairwise_dist = pairwise_dist 118 | pairwise_exp = tf.exp(pairwise_dist) + 1 119 | sigma = tf.divide(2 , pairwise_exp) 120 | sigma = tf.clip_by_value(sigma,1e-7,1.0 - 1e-7) 121 | self.sigma = sigma 122 | same = tf.log(sigma) 123 | diff = tf.log(1 - sigma) 124 | 125 | self.same = same 126 | self.diff = diff 127 | 128 | seg_id_i = tf.tile(tf.expand_dims(seg_id, -1), [1, 1, self.num_pixels]) 129 | seg_id_j = tf.transpose(seg_id_i, perm=[0,2,1]) 130 | 131 | seg_comp = tf.equal(seg_id_i, seg_id_j) 132 | seg_same = tf.cast(seg_comp, self.input_type) 133 | seg_diff = 1 - seg_same 134 | 135 | loss_matrix = seg_same * same + seg_diff * diff 136 | reduced_loss = 0 - tf.reduce_mean(loss_matrix) # / self.num_pixels 137 | 138 | tf.add_to_collection(tf.GraphKeys.LOSSES, reduced_loss) 139 | self.metric_loss = reduced_loss 140 | losses = [reduced_loss] 141 | return losses 142 | 143 | 144 | -------------------------------------------------------------------------------- /lib/losses/all.py: -------------------------------------------------------------------------------- 1 | '''Losses used in encoder/decoder and d2 encoder. 2 | 3 | from: KChen @ https://github.com/kchen92/joint-representation/blob/master/lib/losses.py 4 | ''' 5 | from __future__ import absolute_import 6 | from __future__ import division 7 | from __future__ import print_function 8 | 9 | import tensorflow as tf 10 | import tensorflow.contrib.slim as slim 11 | 12 | 13 | def get_gan_loss( discriminator_predictions_real, discriminator_predictions_fake, 14 | real_label=1.0, fake_label=0.0, epsilon=1e-7, scope=None, self=None ): 15 | ''' 16 | Returns the loss from the output of a discriminator 17 | Warnings: When building the train_op, make sure to have them update 18 | only the discriminator/generator variables as appropriate! 19 | 20 | Args: 21 | discriminator_predictions_real: A Tensor of [batch_size,] of discriminator 22 | results on real data 23 | discriminator_predictions_fake: A Tensor of [batch_size,] of discriminator 24 | results on fake data 25 | real_label: The label to use for real images 26 | fake_label: Label to use for fake images 27 | scope: The scope tht al variables will be declared under 28 | 29 | Returns: 30 | generator_loss, discriminator_loss_real, discriminator_loss_fake 31 | ''' 32 | if scope is not None: 33 | scope = scope + '_gan_loss' 34 | else: 35 | scope = '' 36 | 37 | # create labels 38 | labels_real = tf.constant( real_label, dtype=tf.float32, 39 | shape=discriminator_predictions_real.get_shape(), name='real_labels' ) 40 | labels_fake = tf.constant( fake_label, dtype=tf.float32, 41 | shape=discriminator_predictions_fake.get_shape(), name='fake_labels' ) 42 | self.labels_real = labels_real 43 | self.labels_fake = labels_fake 44 | with tf.variable_scope( scope ) as sc: 45 | # raise NotImplementedError('make log loss') 46 | loss_d_real = slim.losses.sigmoid_cross_entropy( discriminator_predictions_real, labels_real, 47 | scope='discriminator/real' ) 48 | loss_d_fake = slim.losses.sigmoid_cross_entropy( discriminator_predictions_fake, labels_fake, 49 | scope='discriminator/fake' ) 50 | loss_g = slim.losses.sigmoid_cross_entropy( discriminator_predictions_fake, 1. - labels_fake, 51 | scope='generator' ) # Generator should make images look real 52 | 53 | # loss_d_real = slim.losses.log_loss( discriminator_predictions_real, labels_real, 54 | # epsilon=epsilon, scope='discriminator/real' ) 55 | # loss_d_fake = slim.losses.log_loss( discriminator_predictions_fake, labels_fake, 56 | # epsilon=epsilon, scope='discriminator/fake' ) 57 | # loss_g = slim.losses.log_loss( discriminator_predictions_fake, 1. - labels_fake, # Generator should make images look real 58 | # epsilon=epsilon, scope='generator' ) 59 | return loss_g, loss_d_real, loss_d_fake 60 | 61 | # Common L-norm losses 62 | def get_l1_loss(predictions, targets, scope=None): 63 | '''Return sum of squares loss after masking. 64 | ''' 65 | if scope is not None: 66 | scope = scope + '_abs_diff_loss' 67 | return slim.losses.absolute_difference( predictions, targets, scope=scope ) 68 | 69 | def get_l1_loss_with_mask(predictions, targets, mask, scope=None): 70 | '''Return sum of squares loss after masking. 71 | ''' 72 | if scope is not None: 73 | scope = scope + '_abs_diff_loss' 74 | if tf.__version__ == '0.10.0': 75 | return slim.losses.absolute_difference(predictions, targets, weight=mask, scope=scope) 76 | else: 77 | return slim.losses.absolute_difference(predictions, targets, weights=mask, scope=scope) 78 | 79 | def get_l2_loss(predictions, targets, scope=None): 80 | '''Return sum of squares loss after masking. 81 | ''' 82 | if scope is not None: 83 | scope = scope + '_mse_loss' 84 | # will be renamed to mean_square_error in next tensorflow version. weight->weights 85 | if tf.__version__ == '0.10.0': 86 | return slim.losses.sum_of_squares(predictions, targets, scope=scope) 87 | else: 88 | return slim.losses.mean_squared_error(predictions, targets, scope=scope) 89 | 90 | 91 | def get_l2_loss_with_mask(output_img, desired_img, mask, scope=None): 92 | '''Return sum of squares loss after masking. 93 | ''' 94 | if scope is not None: 95 | scope = scope + '_mse_loss' 96 | # will be renamed to mean_squared_error in next tensorflow version. weight->weights 97 | if tf.__version__ == '0.10.0': 98 | return slim.losses.sum_of_squares(output_img, desired_img, 99 | weight=mask, scope=scope) 100 | else: 101 | return slim.losses.mean_squared_error(output_img, desired_img, 102 | weights=mask, scope=scope) 103 | 104 | def get_cosine_distance_loss(predictions, targets, dim=1, scope=None): 105 | '''Assume predictions and targets are vectors 106 | ''' 107 | if scope is not None: 108 | scope = scope + '_cos_dist_loss' 109 | # unit-normalize 110 | normalized_predictions = tf.nn.l2_normalize(predictions, dim=1, 111 | name='normalize_predictions') 112 | normalized_targets = tf.nn.l2_normalize(targets, dim=1, 113 | name='normalize_targets') 114 | return slim.losses.cosine_distance(normalized_predictions, 115 | normalized_targets, dim, 116 | scope=scope) 117 | 118 | def get_sparse_softmax_loss(predictions, targets, mask, scope=None): 119 | ''' Compute Softmax Cross Entropy losses between predictions and targets 120 | Can leverage mask as either pure masking or weight 121 | ''' 122 | if scope is not None: 123 | scope = scope + '_softmax_loss' 124 | #return tf.nn.softmax_cross_entropy_with_logits(logits=predictions, 125 | # labels=targets, name='softmax_loss') 126 | return tf.losses.sparse_softmax_cross_entropy( targets, predictions, weights=mask, scope=scope) 127 | 128 | def get_softmax_loss(predictions, targets, mask, scope=None): 129 | ''' Compute Softmax Cross Entropy losses between predictions and targets 130 | Can leverage mask as either pure masking or weight 131 | ''' 132 | if scope is not None: 133 | scope = scope + '_softmax_loss' 134 | #return tf.nn.softmax_cross_entropy_with_logits(logits=predictions, 135 | # labels=targets, name='softmax_loss') 136 | return tf.losses.softmax_cross_entropy( targets, predictions, weights=mask, 137 | scope=scope) 138 | 139 | -------------------------------------------------------------------------------- /lib/optimizers/train_steps.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Defines train_step_fns which allow fancy training regimens 3 | ''' 4 | from __future__ import absolute_import, division, print_function 5 | import numpy as np 6 | import time 7 | import tensorflow as tf 8 | import tensorflow.contrib.slim as slim 9 | ######################## 10 | # Train step functions 11 | ######################## 12 | def gan_train_step_fn( sess, g_and_d_train_ops, global_step, return_accuracy=False, 13 | n_g_steps_before_d=1, n_d_steps_after_g=1, init_g_steps=0, train_step_kwargs={} ): 14 | ''' 15 | Executes a training step for a GAN. This may have multiple 16 | generator updates and may have multiple discriminator 17 | updates. The global_step may be updated several times, but 18 | this is all considered one step. 19 | 20 | Args: 21 | sess 22 | g_and_d_train_ops: A Tuple of ( g_train_op, d_train_op ) 23 | global_step: A Tensor that will be incremented 24 | n_g_steps_before_d + n_d_steps_after_g times. 25 | n_g_steps_before_d: Run g_train_op this many times 26 | n_d_steps_after_g: Run d_train_op this many times after running 27 | g_train_op 28 | train_step_kwargs: Currently only 'should_log' fn is used. 29 | 30 | Returns: 31 | mean_g_losses, should_stop 32 | ''' 33 | start_time = time.time() 34 | if return_accuracy: 35 | g_train_op, d_train_op, g_lnorm_op, accuracy_op = g_and_d_train_ops 36 | accuracy = sess.run(accuracy_op) 37 | else: 38 | g_train_op, d_train_op, g_lnorm_op = g_and_d_train_ops 39 | curr_global_step = sess.run( global_step ) 40 | 41 | if init_g_steps > 0 and curr_global_step < init_g_steps: 42 | g_losses = sess.run( g_lnorm_op ) 43 | d_losses = 0 44 | else: 45 | if n_g_steps_before_d >= 1 and n_d_steps_after_g == 1: 46 | g_losses = [ sess.run( g_train_op ) for i in range( n_g_steps_before_d - 1 ) ] 47 | d_loss, last_g_loss = sess.run( [g_train_op, d_train_op] ) 48 | g_losses.append( last_g_loss ) 49 | d_losses = [ d_loss ] 50 | else: 51 | g_losses = [ sess.run( g_train_op ) for i in range( n_g_steps_before_d ) ] 52 | d_losses = [ sess.run( d_train_op ) for i in range( n_d_steps_after_g ) ] 53 | np_global_step = sess.run( global_step ) #train_step_kwargs[ 'increment_global_step_op' ] ) 54 | time_elapsed = time.time() - start_time 55 | 56 | # Logging 57 | if 'should_log' in train_step_kwargs: 58 | if train_step_kwargs['should_log']( np_global_step ): 59 | if return_accuracy: 60 | tf.logging.info('global step %d: g_loss = %.4f, d_loss = %.4f (%.2f sec/step), accuracy = %.4f', 61 | np_global_step, np.mean( g_losses ), np.mean( d_losses ), time_elapsed, accuracy) 62 | else: 63 | tf.logging.info('global step %d: g_loss = %.4f, d_loss = %.4f (%.2f sec/step)', 64 | np_global_step, np.mean( g_losses ), np.mean( d_losses ), time_elapsed) 65 | 66 | should_stop = should_stop_fn( np_global_step, **train_step_kwargs ) 67 | if return_accuracy: 68 | return np.mean( g_losses ), np.mean(accuracy), should_stop 69 | return np.mean( g_losses ), should_stop 70 | 71 | def discriminative_train_step_fn( sess, train_ops, global_step, return_accuracy=False, train_step_kwargs={} ): 72 | ''' 73 | Executes a training step a discriminative network for one step. 74 | Args: 75 | sess 76 | global_step: A Tensor that will be incremented 77 | train_step_kwargs: Currently only 'should_log' fn is used. 78 | 79 | Returns: 80 | loss, should_stop 81 | ''' 82 | start_time = time.time() 83 | loss_op, accuracy_op = train_ops 84 | if return_accuracy: 85 | accuracy = sess.run(accuracy_op) 86 | discriminative_loss = sess.run(loss_op) 87 | np_global_step = sess.run( global_step ) 88 | time_elapsed = time.time() - start_time 89 | 90 | # Logging 91 | if 'should_log' in train_step_kwargs: 92 | if train_step_kwargs['should_log']( np_global_step ): 93 | if return_accuracy: 94 | tf.logging.info('global step %d: loss = %.4f, accuracy = %.4f (%.2f sec/step)', 95 | np_global_step, np.mean(discriminative_loss), np.mean(accuracy), time_elapsed) 96 | else: 97 | tf.logging.info('global step %d: loss = %.4f (%.2f sec/step)', 98 | np_global_step, np.mean(discriminative_loss), time_elapsed) 99 | 100 | should_stop = should_stop_fn( np_global_step, **train_step_kwargs ) 101 | if return_accuracy: 102 | return np.mean( discriminative_loss ), np.mean(accuracy), should_stop 103 | return np.mean( discriminative_loss ), should_stop 104 | 105 | 106 | ###################################### 107 | # Generating args for train_step_fns 108 | ###################################### 109 | def get_default_train_step_kwargs( global_step, max_steps, log_every_n_steps=1, trace_every_n_steps=None ): 110 | ''' Sets some default arguments for any train_step_fn ''' 111 | with tf.name_scope('train_step'): 112 | train_step_kwargs = { 'max_steps': max_steps } 113 | 114 | if max_steps: 115 | should_stop_op = tf.greater_equal(global_step, max_steps) 116 | else: 117 | should_stop_op = tf.constant(False) 118 | train_step_kwargs['should_stop'] = should_stop_op 119 | train_step_kwargs['should_log'] = lambda x: ( x % log_every_n_steps == 0 ) 120 | if trace_every_n_steps is not None: 121 | train_step_kwargs['should_trace'] = tf.equal( 122 | tf.mod(global_step, trace_every_n_steps), 0) 123 | train_step_kwargs['logdir'] = logdir 124 | 125 | train_step_kwargs[ 'global_step_copy' ] = tf.identity( global_step, name='global_step_copy' ) 126 | train_step_kwargs[ 'increment_global_step_op' ] = tf.assign( global_step, global_step+1 ) 127 | 128 | return train_step_kwargs 129 | 130 | 131 | ###################### 132 | # should_stop_fn 133 | ###################### 134 | def should_stop_fn( np_global_step, max_steps=None, **kwargs ): 135 | ''' 136 | Determines whether training/testing should stop. Currently 137 | works only based on max_steps, but this could also use forms of 138 | early stopping based on loss. 139 | 140 | Args: 141 | np_global_step: An int or np.int 142 | max_steps: An int 143 | **kwargs: Currently unused 144 | Returns: 145 | should_stop: A bool 146 | ''' 147 | if max_steps and np_global_step >= max_steps: 148 | return True 149 | else: 150 | return False 151 | -------------------------------------------------------------------------------- /lib/models/gan_discriminators.py: -------------------------------------------------------------------------------- 1 | ''' gan_discriminators.py 2 | 3 | Contains discriminators that can be used for a GAN or cGAN loss 4 | ''' 5 | from __future__ import absolute_import, division, print_function 6 | from models.utils import * 7 | import tensorflow as tf 8 | import tensorflow.contrib.slim as slim 9 | 10 | 11 | @slim.add_arg_scope 12 | def pix2pix_discriminator( decoder_output, is_training, n_layers=3, n_channel_multiplier=64, stride=2, 13 | weight_decay=0.0001, activation_fn=leaky_relu( leak=0.2 ), 14 | batch_norm_decay=0.9, batch_norm_epsilon=1e-5, batch_norm_scale=True, batch_norm_center=True, 15 | scope='discriminator', reuse=None, **kwargs): 16 | ''' 17 | Builds the discriminator from the pix2pix paper. This doesn't not contain any dropout layers. 18 | 19 | Structure: 20 | input -> [ B x H x W x C ] 21 | conv - relu -> [ B x (H/2) x (W/2) x n_channel_multiplier ] 22 | conv - batchnorm - relu -> [ B x (H/4) x (W/4) x n_channel_multiplier*2^k ] 23 | ... Depends on n_layers 24 | conv - batchnorm - relu -> [ B x H' x W' x n_channel_multiplier*8 ] 25 | conv - sigmoid -> [ B x H' x W' x n_channel_multiplier*8 ] 26 | avg_pool -> [ B ] 27 | 28 | Args: 29 | decoder_output: input to the discriminator. Before given to the fn, this may be concatenated 30 | channel-wise with the input_imgs so that the discriminator can also see the input imgs. 31 | is_training: A bool 32 | n_layers: The number of conv_layers to use. At least 2. 33 | n_channel_multiplier: Proportional to how many channels to use in the discriminator. 34 | weight_decay: Value to use for L2 weight regularization. 35 | batch_norm_decay: passed through 36 | batch_norm_epsilon: passed through 37 | batch_norm_scale: passed through 38 | batch_norm_center: passed through 39 | scope: Scope that all variables will be under 40 | reuse: Whether to reuse variables in this scope 41 | **kwargs: Allows other args to be passed in. Unused. 42 | 43 | Returns: 44 | The output of avg_pool. 45 | ''' 46 | batch_norm_params = { 47 | 'is_training': is_training, 48 | 'decay': batch_norm_decay, 49 | 'epsilon': batch_norm_epsilon, 50 | 'scale': batch_norm_scale, 51 | 'center': batch_norm_center, 52 | 'updates_collections': tf.GraphKeys.UPDATE_OPS, 53 | } 54 | 55 | # print('\tbuilding discriminator') 56 | with tf.variable_scope(scope, reuse=reuse) as sc: 57 | if reuse: 58 | sc.reuse_variables() 59 | end_points_collection = sc.original_name_scope + '_end_points' 60 | with slim.arg_scope([slim.conv2d, slim.fully_connected], 61 | activation_fn=activation_fn, 62 | normalizer_fn=slim.batch_norm, 63 | normalizer_params=batch_norm_params, 64 | outputs_collections=end_points_collection, 65 | weights_regularizer=slim.l2_regularizer(weight_decay)): 66 | with slim.arg_scope([slim.conv2d], 67 | padding='SAME', 68 | stride=stride): 69 | net = decoder_output 70 | # print('\t\tinput', net.get_shape()) 71 | 72 | # First layer: conv2d-ReLu 73 | net = add_conv_layer( net, n_channel_multiplier, [5, 5], scope='conv1', normalizer_fn=None ) 74 | 75 | # Central layers: conv2d-batchNorm-ReLu 76 | for k_layer in range(1, n_layers-2 ): 77 | nf_mult = min( 2**k_layer, 8 ) 78 | net = add_conv_layer( net, n_channel_multiplier * nf_mult, [5, 5], 79 | scope='conv{0}'.format( k_layer + 1 ) ) 80 | 81 | # Last two layers: conv2d-batchnorm-relu, but with stride=1 82 | nf_mult = min( 2**(n_layers-1), 8 ) 83 | net = add_conv_layer( net, n_channel_multiplier * nf_mult, [4, 4], 84 | stride=1, scope='conv{0}'.format( n_layers - 1 ) ) 85 | net = add_conv_layer( net, 1, [4, 4], 86 | stride=1, normalizer_fn=None, 87 | activation_fn=None, scope='conv{0}'.format( n_layers ) ) 88 | probs = tf.reduce_mean( tf.nn.sigmoid( net ), reduction_indices=[1,2,3], name='avg_pool' ) 89 | # print('\t\tlogits', net.get_shape()) 90 | end_points = convert_collection_to_dict(end_points_collection) 91 | end_points[ 'probabilities' ] = probs 92 | return net, end_points 93 | 94 | 95 | @slim.add_arg_scope 96 | def pixelGAN_dsicriminator( decoder_output, is_training, n_channel_multiplier=64, 97 | weight_decay=0.0001, 98 | batch_norm_decay=0.997, batch_norm_epsilon=1e-5, batch_norm_scale=True, batch_norm_center=True, 99 | scope='discriminator', reuse=None, **kwargs): 100 | ''' 101 | Builds the a discriminator that only looks at pixels - no spatial information is used! 102 | Probably crappy, but appeared in the code for https://arxiv.org/pdf/1611.07004v1.pdf, 103 | and it's super easy to implement. 104 | 105 | 106 | Structure: 107 | conv2d - ReLU -> [H, W, n_channel_multiplier] 108 | conv2d - BatchNorm - ReLU -> [H, W, 2 * n_channel_multiplier] 109 | conv2d -> [H, W, 1] 110 | 111 | ''' 112 | batch_norm_params = { 113 | 'is_training': is_training, 114 | 'decay': batch_norm_decay, 115 | 'epsilon': batch_norm_epsilon, 116 | 'scale': batch_norm_scale, 117 | 'center': batch_norm_center, 118 | 'updates_collections': tf.GraphKeys.UPDATE_OPS, 119 | } 120 | 121 | print('\tbuilding discriminator') 122 | with tf.variable_scope(scope, reuse=reuse) as sc: 123 | end_points_collection = sc.original_name_scope + '_end_points' 124 | with slim.arg_scope([slim.conv2d, slim.fully_connected], 125 | activation_fn=tf.nn.relu, 126 | normalizer_fn=slim.batch_norm, 127 | normalizer_params=batch_norm_params, 128 | outputs_collections=end_points_collection, 129 | weights_regularizer=slim.l2_regularizer(weight_decay)): 130 | with slim.arg_scope([slim.conv2d], 131 | padding='VALID', 132 | stride=1): 133 | net = decoder_output 134 | print('\t\tinput', net.get_shape()) 135 | net = add_conv_layer( net, n_channel_multiplier, [1, 1], scope='conv1', normalizer_fn=None ) 136 | net = add_conv_layer( net, n_channel_multiplier * 2, [1, 1], scope='conv2' ) 137 | net = add_conv_layer( net, n_channel_multiplier, [1, 1], scope='conv3', 138 | normalizer_fn=None, activation_fn=tf.sigmoid ) 139 | net = tf.reduce_mean( net, reduction_indices=[1,2,3], name='avg_pool' ) 140 | print('\t\tprobabilities', net.get_shape()) 141 | end_points = convert_collection_to_dict(end_points_collection) 142 | return net, end_points 143 | 144 | -------------------------------------------------------------------------------- /lib/data/places_class_names.txt: -------------------------------------------------------------------------------- 1 | '/a/airfield' 2 | '/a/airplane_cabin' 3 | '/a/airport_terminal' 4 | '/a/alcove' 5 | '/a/alley' 6 | '/a/amphitheater' 7 | '/a/amusement_arcade' 8 | '/a/amusement_park' 9 | '/a/apartment_building/outdoor' 10 | '/a/aquarium' 11 | '/a/aqueduct' 12 | '/a/arcade' 13 | '/a/arch' 14 | '/a/archaelogical_excavation' 15 | '/a/archive' 16 | '/a/arena/hockey' 17 | '/a/arena/performance' 18 | '/a/arena/rodeo' 19 | '/a/army_base' 20 | '/a/art_gallery' 21 | '/a/art_school' 22 | '/a/art_studio' 23 | '/a/artists_loft' 24 | '/a/assembly_line' 25 | '/a/athletic_field/outdoor' 26 | '/a/atrium/public' 27 | '/a/attic' 28 | '/a/auditorium' 29 | '/a/auto_factory' 30 | '/a/auto_showroom' 31 | '/b/badlands' 32 | '/b/bakery/shop' 33 | '/b/balcony/exterior' 34 | '/b/balcony/interior' 35 | '/b/ball_pit' 36 | '/b/ballroom' 37 | '/b/bamboo_forest' 38 | '/b/bank_vault' 39 | '/b/banquet_hall' 40 | '/b/bar' 41 | '/b/barn' 42 | '/b/barndoor' 43 | '/b/baseball_field' 44 | '/b/basement' 45 | '/b/basketball_court/indoor' 46 | '/b/bathroom' 47 | '/b/bazaar/indoor' 48 | '/b/bazaar/outdoor' 49 | '/b/beach' 50 | '/b/beach_house' 51 | '/b/beauty_salon' 52 | '/b/bedchamber' 53 | '/b/bedroom' 54 | '/b/beer_garden' 55 | '/b/beer_hall' 56 | '/b/berth' 57 | '/b/biology_laboratory' 58 | '/b/boardwalk' 59 | '/b/boat_deck' 60 | '/b/boathouse' 61 | '/b/bookstore' 62 | '/b/booth/indoor' 63 | '/b/botanical_garden' 64 | '/b/bow_window/indoor' 65 | '/b/bowling_alley' 66 | '/b/boxing_ring' 67 | '/b/bridge' 68 | '/b/building_facade' 69 | '/b/bullring' 70 | '/b/burial_chamber' 71 | '/b/bus_interior' 72 | '/b/bus_station/indoor' 73 | '/b/butchers_shop' 74 | '/b/butte' 75 | '/c/cabin/outdoor' 76 | '/c/cafeteria' 77 | '/c/campsite' 78 | '/c/campus' 79 | '/c/canal/natural' 80 | '/c/canal/urban' 81 | '/c/candy_store' 82 | '/c/canyon' 83 | '/c/car_interior' 84 | '/c/carrousel' 85 | '/c/castle' 86 | '/c/catacomb' 87 | '/c/cemetery' 88 | '/c/chalet' 89 | '/c/chemistry_lab' 90 | '/c/childs_room' 91 | '/c/church/indoor' 92 | '/c/church/outdoor' 93 | '/c/classroom' 94 | '/c/clean_room' 95 | '/c/cliff' 96 | '/c/closet' 97 | '/c/clothing_store' 98 | '/c/coast' 99 | '/c/cockpit' 100 | '/c/coffee_shop' 101 | '/c/computer_room' 102 | '/c/conference_center' 103 | '/c/conference_room' 104 | '/c/construction_site' 105 | '/c/corn_field' 106 | '/c/corral' 107 | '/c/corridor' 108 | '/c/cottage' 109 | '/c/courthouse' 110 | '/c/courtyard' 111 | '/c/creek' 112 | '/c/crevasse' 113 | '/c/crosswalk' 114 | '/d/dam' 115 | '/d/delicatessen' 116 | '/d/department_store' 117 | '/d/desert/sand' 118 | '/d/desert/vegetation' 119 | '/d/desert_road' 120 | '/d/diner/outdoor' 121 | '/d/dining_hall' 122 | '/d/dining_room' 123 | '/d/discotheque' 124 | '/d/doorway/outdoor' 125 | '/d/dorm_room' 126 | '/d/downtown' 127 | '/d/dressing_room' 128 | '/d/driveway' 129 | '/d/drugstore' 130 | '/e/elevator/door' 131 | '/e/elevator_lobby' 132 | '/e/elevator_shaft' 133 | '/e/embassy' 134 | '/e/engine_room' 135 | '/e/entrance_hall' 136 | '/e/escalator/indoor' 137 | '/e/excavation' 138 | '/f/fabric_store' 139 | '/f/farm' 140 | '/f/fastfood_restaurant' 141 | '/f/field/cultivated' 142 | '/f/field/wild' 143 | '/f/field_road' 144 | '/f/fire_escape' 145 | '/f/fire_station' 146 | '/f/fishpond' 147 | '/f/flea_market/indoor' 148 | '/f/florist_shop/indoor' 149 | '/f/food_court' 150 | '/f/football_field' 151 | '/f/forest/broadleaf' 152 | '/f/forest_path' 153 | '/f/forest_road' 154 | '/f/formal_garden' 155 | '/f/fountain' 156 | '/g/galley' 157 | '/g/garage/indoor' 158 | '/g/garage/outdoor' 159 | '/g/gas_station' 160 | '/g/gazebo/exterior' 161 | '/g/general_store/indoor' 162 | '/g/general_store/outdoor' 163 | '/g/gift_shop' 164 | '/g/glacier' 165 | '/g/golf_course' 166 | '/g/greenhouse/indoor' 167 | '/g/greenhouse/outdoor' 168 | '/g/grotto' 169 | '/g/gymnasium/indoor' 170 | '/h/hangar/indoor' 171 | '/h/hangar/outdoor' 172 | '/h/harbor' 173 | '/h/hardware_store' 174 | '/h/hayfield' 175 | '/h/heliport' 176 | '/h/highway' 177 | '/h/home_office' 178 | '/h/home_theater' 179 | '/h/hospital' 180 | '/h/hospital_room' 181 | '/h/hot_spring' 182 | '/h/hotel/outdoor' 183 | '/h/hotel_room' 184 | '/h/house' 185 | '/h/hunting_lodge/outdoor' 186 | '/i/ice_cream_parlor' 187 | '/i/ice_floe' 188 | '/i/ice_shelf' 189 | '/i/ice_skating_rink/indoor' 190 | '/i/ice_skating_rink/outdoor' 191 | '/i/iceberg' 192 | '/i/igloo' 193 | '/i/industrial_area' 194 | '/i/inn/outdoor' 195 | '/i/islet' 196 | '/j/jacuzzi/indoor' 197 | '/j/jail_cell' 198 | '/j/japanese_garden' 199 | '/j/jewelry_shop' 200 | '/j/junkyard' 201 | '/k/kasbah' 202 | '/k/kennel/outdoor' 203 | '/k/kindergarden_classroom' 204 | '/k/kitchen' 205 | '/l/lagoon' 206 | '/l/lake/natural' 207 | '/l/landfill' 208 | '/l/landing_deck' 209 | '/l/laundromat' 210 | '/l/lawn' 211 | '/l/lecture_room' 212 | '/l/legislative_chamber' 213 | '/l/library/indoor' 214 | '/l/library/outdoor' 215 | '/l/lighthouse' 216 | '/l/living_room' 217 | '/l/loading_dock' 218 | '/l/lobby' 219 | '/l/lock_chamber' 220 | '/l/locker_room' 221 | '/m/mansion' 222 | '/m/manufactured_home' 223 | '/m/market/indoor' 224 | '/m/market/outdoor' 225 | '/m/marsh' 226 | '/m/martial_arts_gym' 227 | '/m/mausoleum' 228 | '/m/medina' 229 | '/m/mezzanine' 230 | '/m/moat/water' 231 | '/m/mosque/outdoor' 232 | '/m/motel' 233 | '/m/mountain' 234 | '/m/mountain_path' 235 | '/m/mountain_snowy' 236 | '/m/movie_theater/indoor' 237 | '/m/museum/indoor' 238 | '/m/museum/outdoor' 239 | '/m/music_studio' 240 | '/n/natural_history_museum' 241 | '/n/nursery' 242 | '/n/nursing_home' 243 | '/o/oast_house' 244 | '/o/ocean' 245 | '/o/office' 246 | '/o/office_building' 247 | '/o/office_cubicles' 248 | '/o/oilrig' 249 | '/o/operating_room' 250 | '/o/orchard' 251 | '/o/orchestra_pit' 252 | '/p/pagoda' 253 | '/p/palace' 254 | '/p/pantry' 255 | '/p/park' 256 | '/p/parking_garage/indoor' 257 | '/p/parking_garage/outdoor' 258 | '/p/parking_lot' 259 | '/p/pasture' 260 | '/p/patio' 261 | '/p/pavilion' 262 | '/p/pet_shop' 263 | '/p/pharmacy' 264 | '/p/phone_booth' 265 | '/p/physics_laboratory' 266 | '/p/picnic_area' 267 | '/p/pier' 268 | '/p/pizzeria' 269 | '/p/playground' 270 | '/p/playroom' 271 | '/p/plaza' 272 | '/p/pond' 273 | '/p/porch' 274 | '/p/promenade' 275 | '/p/pub/indoor' 276 | '/r/racecourse' 277 | '/r/raceway' 278 | '/r/raft' 279 | '/r/railroad_track' 280 | '/r/rainforest' 281 | '/r/reception' 282 | '/r/recreation_room' 283 | '/r/repair_shop' 284 | '/r/residential_neighborhood' 285 | '/r/restaurant' 286 | '/r/restaurant_kitchen' 287 | '/r/restaurant_patio' 288 | '/r/rice_paddy' 289 | '/r/river' 290 | '/r/rock_arch' 291 | '/r/roof_garden' 292 | '/r/rope_bridge' 293 | '/r/ruin' 294 | '/r/runway' 295 | '/s/sandbox' 296 | '/s/sauna' 297 | '/s/schoolhouse' 298 | '/s/science_museum' 299 | '/s/server_room' 300 | '/s/shed' 301 | '/s/shoe_shop' 302 | '/s/shopfront' 303 | '/s/shopping_mall/indoor' 304 | '/s/shower' 305 | '/s/ski_resort' 306 | '/s/ski_slope' 307 | '/s/sky' 308 | '/s/skyscraper' 309 | '/s/slum' 310 | '/s/snowfield' 311 | '/s/soccer_field' 312 | '/s/stable' 313 | '/s/stadium/baseball' 314 | '/s/stadium/football' 315 | '/s/stadium/soccer' 316 | '/s/stage/indoor' 317 | '/s/stage/outdoor' 318 | '/s/staircase' 319 | '/s/storage_room' 320 | '/s/street' 321 | '/s/subway_station/platform' 322 | '/s/supermarket' 323 | '/s/sushi_bar' 324 | '/s/swamp' 325 | '/s/swimming_hole' 326 | '/s/swimming_pool/indoor' 327 | '/s/swimming_pool/outdoor' 328 | '/s/synagogue/outdoor' 329 | '/t/television_room' 330 | '/t/television_studio' 331 | '/t/temple/asia' 332 | '/t/throne_room' 333 | '/t/ticket_booth' 334 | '/t/topiary_garden' 335 | '/t/tower' 336 | '/t/toyshop' 337 | '/t/train_interior' 338 | '/t/train_station/platform' 339 | '/t/tree_farm' 340 | '/t/tree_house' 341 | '/t/trench' 342 | '/t/tundra' 343 | '/u/underwater/ocean_deep' 344 | '/u/utility_room' 345 | '/v/valley' 346 | '/v/vegetable_garden' 347 | '/v/veterinarians_office' 348 | '/v/viaduct' 349 | '/v/village' 350 | '/v/vineyard' 351 | '/v/volcano' 352 | '/v/volleyball_court/outdoor' 353 | '/w/waiting_room' 354 | '/w/water_park' 355 | '/w/water_tower' 356 | '/w/waterfall' 357 | '/w/watering_hole' 358 | '/w/wave' 359 | '/w/wet_bar' 360 | '/w/wheat_field' 361 | '/w/wind_farm' 362 | '/w/windmill' 363 | '/y/yard' 364 | '/y/youth_hostel' 365 | '/z/zen_garden' 366 | -------------------------------------------------------------------------------- /lib/models/encoder_decoder_segmentation.py: -------------------------------------------------------------------------------- 1 | '''Segmentation encoder-decoder model 2 | 3 | Assumes there is one input and one output. 4 | 5 | The output is an embedding vector for each pixel, the size of the embedding vector is 6 | the number of channels for target specified by the config file, aka 'target_num_channel'. 7 | 8 | The target is a vector of pixel location & segmentation ID. The number of pixels is specified 9 | by 'num_pixels' in config file. 10 | 11 | Model-specific config.py options: (inherits from models.base_net) 12 | 'batch_size': An int. The number of images to use in a batch 13 | 'hidden_size': An int. The number of hidden neurons to use. 14 | 'target_num_channels': The number of channels to output from the decoder 15 | 'num_pixels': The number of pixels sampled for metric learning. 16 | 17 | Encoder: 18 | 'encoder': A function that will build take 'input_placeholder', 'is_training', 19 | 'hidden_size', and returns a representation. 20 | -'encoder_kwargs': A Dict of all the args to pass to 'encoder'. The Dict should 21 | not include the mandatory arguments given above. ({}) 22 | Decoder: 23 | 'decoder': A function that will build take 'encoder_output', 'is_training', 24 | 'num_output_channels' (value from 'target_num_channels'), and returns a 25 | batch of representation vectors. 26 | -'decoder_kwargs': A Dict of all the args to pass to 'decoder'. The Dict should 27 | not include the mandatory arguments given above. ({}) 28 | 29 | ''' 30 | from __future__ import absolute_import, division, print_function 31 | 32 | from models.encoder_decoder import StandardED 33 | import losses.all as losses_lib 34 | import tensorflow as tf 35 | import tensorflow.contrib.slim as slim 36 | import numpy as np 37 | import pdb 38 | import optimizers.train_steps as train_steps 39 | import optimizers.ops as optimize 40 | from functools import partial 41 | 42 | class SegmentationED(StandardED): 43 | ''' Segmentation encoder decoder model 44 | Encodes an input into a low-dimensional representation and reconstructs 45 | the input from the low-dimensional representation. Uses metric loss. 46 | 47 | Metric loss follows the function of paper: Semantic Instance Segmentation via Deep Metric Learning 48 | (Equation 1) 49 | 50 | Assumes inputs are scaled to [0, 1] (which will be rescaled to [-1, 1]. 51 | ''' 52 | 53 | def __init__(self, global_step, cfg): 54 | ''' 55 | Args: 56 | cfg: Configuration. 57 | ''' 58 | super(SegmentationED, self).__init__(global_step, cfg) 59 | if 'hidden_size' not in cfg: 60 | raise ValueError( "config.py for encoder-decoder must specify 'hidden_size'" ) 61 | if 'num_pixels' not in cfg: 62 | raise ValueError( "config.py for segmentation must specify 'num_pixels'(how many pixels to sample)") 63 | 64 | self.batch_size = cfg['batch_size'] 65 | self.num_pixels = cfg['num_pixels'] 66 | 67 | idxes = np.asarray([range(self.batch_size)] * self.num_pixels).T 68 | self.batch_index_slice = tf.cast(tf.stack(idxes), cfg['target_dtype']) 69 | 70 | self.input_type = cfg['input_dtype'] 71 | 72 | self.cfg = cfg 73 | 74 | def build_ones_mask(self): 75 | '''Build a mask of ones which has the same size as the input. 76 | ''' 77 | cfg = self.cfg 78 | C = cfg['target_num_channels'] 79 | batch_size = cfg['batch_size'] 80 | mask = tf.constant(1.0, tf.float32, shape=[batch_size, 256, 256, C], 81 | name='identity_mask') 82 | return mask 83 | 84 | def get_losses( self, output_vectors, idx_segments, masks ): 85 | '''Returns the metric loss for 'num_pixels' embedding vectors. 86 | 87 | Args: 88 | output_imgs: Tensor of images output by the decoder. 89 | desired_imgs: Tensor of target images to be output by the decoder. 90 | masks: Tensor of masks to be applied when computing sum of squares 91 | loss. 92 | 93 | Returns: 94 | losses: list of tensors representing each loss component 95 | ''' 96 | print('setting up losses...') 97 | self.output_images = output_vectors 98 | self.target_images = idx_segments 99 | # self.targets = idx_segments 100 | self.masks = masks 101 | 102 | with tf.variable_scope('losses'): 103 | last_axis = 2 104 | fir, sec, seg_id = tf.unstack(idx_segments, axis=last_axis) 105 | 106 | idxes = tf.stack([self.batch_index_slice, fir, sec], axis=last_axis) 107 | self.embed = tf.gather_nd( output_vectors, idxes ) 108 | embed = self.embed 109 | square = tf.reduce_sum( embed*embed, axis=-1 ) 110 | square_t = tf.expand_dims(square, axis=-1) 111 | square = tf.expand_dims(square, axis=1) 112 | 113 | pairwise_dist = square - 2 * tf.matmul(embed, tf.transpose(embed, perm=[0,2,1])) + square_t 114 | pairwise_dist = tf.clip_by_value( pairwise_dist, 0, 80) 115 | #pairwise_dist = 0 - pairwise_dist 116 | self.pairwise_dist = pairwise_dist 117 | pairwise_exp = tf.exp(pairwise_dist) + 1 118 | sigma = tf.divide(2 , pairwise_exp) 119 | sigma = tf.clip_by_value(sigma,1e-7,1.0 - 1e-7) 120 | self.sigma = sigma 121 | same = tf.log(sigma) 122 | diff = tf.log(1 - sigma) 123 | 124 | self.same = same 125 | self.diff = diff 126 | 127 | seg_id_i = tf.tile(tf.expand_dims(seg_id, -1), [1, 1, self.num_pixels]) 128 | seg_id_j = tf.transpose(seg_id_i, perm=[0,2,1]) 129 | 130 | seg_comp = tf.equal(seg_id_i, seg_id_j) 131 | seg_same = tf.cast(seg_comp, self.input_type) 132 | seg_diff = 1 - seg_same 133 | 134 | loss_matrix = seg_same * same + seg_diff * diff 135 | reduced_loss = 0 - tf.reduce_mean(loss_matrix) # / self.num_pixels 136 | 137 | tf.add_to_collection(tf.GraphKeys.LOSSES, reduced_loss) 138 | self.metric_loss = reduced_loss 139 | losses = [reduced_loss] 140 | return losses 141 | 142 | def get_train_step_fn( self ): 143 | ''' 144 | Returns: 145 | A train_step funciton which takes args: 146 | (sess, train_ops, global_stepf) 147 | ''' 148 | return partial( train_steps.discriminative_train_step_fn, 149 | return_accuracy=self.cfg['return_accuracy'] ) 150 | 151 | def build_train_op( self, global_step ): 152 | ''' 153 | Builds train ops for discriminative task 154 | 155 | Args: 156 | global_step: A Tensor to be incremented 157 | Returns: 158 | [ loss_op, accuracy ] 159 | ''' 160 | if not self.model_built or self.total_loss is None : 161 | raise RuntimeError( "Cannot build optimizers until 'build_model' ({0}) and 'get_losses' {1} are run".format( 162 | self.model_built, self.losses_built ) ) 163 | self.global_step = global_step 164 | 165 | t_vars = tf.trainable_variables() 166 | 167 | # Create the optimizer train_op for the generator 168 | 169 | self.optimizer = optimize.build_optimizer( global_step=self.global_step, cfg=self.cfg ) 170 | if 'clip_norm' in self.cfg: 171 | self.loss_op = optimize.create_train_op( self.total_loss, self.optimizer, update_global_step=True, clip_gradient_norm=self.cfg['clip_norm']) 172 | else: 173 | if self.is_training: 174 | self.loss_op = optimize.create_train_op( self.total_loss, self.optimizer, update_global_step=True ) 175 | else: 176 | self.loss_op = optimize.create_train_op( self.total_loss, self.optimizer, is_training=False, update_global_step=True ) 177 | 178 | # Create a train_op for the discriminator 179 | 180 | self.train_op = [ self.loss_op, 0 ] 181 | self.train_op_built = True 182 | return self.train_op 183 | -------------------------------------------------------------------------------- /lib/data/class_list.txt: -------------------------------------------------------------------------------- 1 | 0 2 | 0 3 | 0 4 | 0 5 | 0 6 | 0 7 | 0 8 | 0 9 | 0 10 | 0 11 | 0 12 | 0 13 | 0 14 | 0 15 | 0 16 | 0 17 | 0 18 | 0 19 | 0 20 | 0 21 | 0 22 | 0 23 | 0 24 | 0 25 | 0 26 | 0 27 | 0 28 | 0 29 | 0 30 | 0 31 | 0 32 | 0 33 | 0 34 | 0 35 | 0 36 | 0 37 | 0 38 | 0 39 | 0 40 | 0 41 | 0 42 | 0 43 | 0 44 | 0 45 | 0 46 | 0 47 | 0 48 | 0 49 | 0 50 | 0 51 | 0 52 | 0 53 | 0 54 | 0 55 | 0 56 | 0 57 | 0 58 | 0 59 | 0 60 | 0 61 | 0 62 | 0 63 | 0 64 | 0 65 | 0 66 | 0 67 | 0 68 | 0 69 | 0 70 | 0 71 | 0 72 | 0 73 | 0 74 | 0 75 | 0 76 | 0 77 | 0 78 | 0 79 | 0 80 | 0 81 | 0 82 | 0 83 | 0 84 | 0 85 | 0 86 | 0 87 | 0 88 | 0 89 | 0 90 | 0 91 | 0 92 | 0 93 | 0 94 | 0 95 | 0 96 | 0 97 | 0 98 | 0 99 | 0 100 | 0 101 | 0 102 | 0 103 | 0 104 | 0 105 | 0 106 | 0 107 | 0 108 | 0 109 | 0 110 | 0 111 | 0 112 | 0 113 | 0 114 | 0 115 | 0 116 | 0 117 | 0 118 | 0 119 | 0 120 | 0 121 | 0 122 | 0 123 | 0 124 | 0 125 | 0 126 | 0 127 | 0 128 | 0 129 | 0 130 | 0 131 | 0 132 | 0 133 | 0 134 | 0 135 | 0 136 | 0 137 | 0 138 | 0 139 | 0 140 | 0 141 | 0 142 | 0 143 | 0 144 | 0 145 | 0 146 | 0 147 | 0 148 | 0 149 | 0 150 | 0 151 | 0 152 | 0 153 | 0 154 | 0 155 | 0 156 | 0 157 | 0 158 | 0 159 | 0 160 | 0 161 | 0 162 | 0 163 | 0 164 | 0 165 | 0 166 | 0 167 | 0 168 | 0 169 | 0 170 | 0 171 | 0 172 | 0 173 | 0 174 | 0 175 | 0 176 | 0 177 | 0 178 | 0 179 | 0 180 | 0 181 | 0 182 | 0 183 | 0 184 | 0 185 | 0 186 | 0 187 | 0 188 | 0 189 | 0 190 | 0 191 | 0 192 | 0 193 | 0 194 | 0 195 | 0 196 | 0 197 | 0 198 | 0 199 | 0 200 | 0 201 | 0 202 | 0 203 | 0 204 | 0 205 | 0 206 | 0 207 | 0 208 | 0 209 | 0 210 | 0 211 | 0 212 | 0 213 | 0 214 | 0 215 | 0 216 | 0 217 | 0 218 | 0 219 | 0 220 | 0 221 | 0 222 | 0 223 | 0 224 | 0 225 | 0 226 | 0 227 | 0 228 | 0 229 | 0 230 | 0 231 | 0 232 | 0 233 | 0 234 | 0 235 | 0 236 | 0 237 | 0 238 | 0 239 | 0 240 | 0 241 | 0 242 | 0 243 | 0 244 | 0 245 | 0 246 | 0 247 | 0 248 | 0 249 | 0 250 | 0 251 | 0 252 | 0 253 | 0 254 | 0 255 | 0 256 | 0 257 | 0 258 | 0 259 | 0 260 | 0 261 | 0 262 | 0 263 | 0 264 | 0 265 | 0 266 | 0 267 | 0 268 | 0 269 | 0 270 | 0 271 | 0 272 | 0 273 | 0 274 | 0 275 | 0 276 | 0 277 | 0 278 | 0 279 | 0 280 | 0 281 | 0 282 | 0 283 | 0 284 | 0 285 | 0 286 | 0 287 | 0 288 | 0 289 | 0 290 | 0 291 | 0 292 | 0 293 | 0 294 | 0 295 | 0 296 | 0 297 | 0 298 | 0 299 | 0 300 | 0 301 | 0 302 | 0 303 | 0 304 | 0 305 | 0 306 | 0 307 | 0 308 | 0 309 | 0 310 | 0 311 | 0 312 | 0 313 | 0 314 | 0 315 | 0 316 | 0 317 | 0 318 | 0 319 | 0 320 | 0 321 | 0 322 | 0 323 | 0 324 | 0 325 | 0 326 | 0 327 | 0 328 | 0 329 | 0 330 | 0 331 | 0 332 | 0 333 | 0 334 | 0 335 | 0 336 | 0 337 | 0 338 | 0 339 | 0 340 | 0 341 | 0 342 | 0 343 | 0 344 | 0 345 | 0 346 | 0 347 | 0 348 | 0 349 | 0 350 | 0 351 | 0 352 | 0 353 | 0 354 | 0 355 | 0 356 | 0 357 | 0 358 | 0 359 | 0 360 | 0 361 | 0 362 | 0 363 | 0 364 | 0 365 | 0 366 | 0 367 | 0 368 | 0 369 | 0 370 | 0 371 | 0 372 | 0 373 | 0 374 | 0 375 | 0 376 | 0 377 | 0 378 | 0 379 | 0 380 | 0 381 | 0 382 | 0 383 | 0 384 | 0 385 | 0 386 | 0 387 | 0 388 | 0 389 | 0 390 | 0 391 | 0 392 | 0 393 | 0 394 | 0 395 | 0 396 | 0 397 | 0 398 | 0 399 | 0 400 | 0 401 | 0 402 | 1 403 | 1 404 | 0 405 | 0 406 | 0 407 | 0 408 | 0 409 | 0 410 | 1 411 | 0 412 | 0 413 | 1 414 | 0 415 | 1 416 | 0 417 | 0 418 | 0 419 | 0 420 | 0 421 | 0 422 | 0 423 | 0 424 | 0 425 | 0 426 | 0 427 | 0 428 | 0 429 | 0 430 | 0 431 | 1 432 | 1 433 | 0 434 | 0 435 | 0 436 | 1 437 | 0 438 | 0 439 | 0 440 | 0 441 | 0 442 | 0 443 | 0 444 | 0 445 | 0 446 | 0 447 | 0 448 | 0 449 | 0 450 | 0 451 | 0 452 | 0 453 | 0 454 | 1 455 | 0 456 | 0 457 | 0 458 | 0 459 | 0 460 | 0 461 | 0 462 | 0 463 | 1 464 | 1 465 | 0 466 | 0 467 | 0 468 | 0 469 | 0 470 | 0 471 | 1 472 | 0 473 | 0 474 | 0 475 | 0 476 | 0 477 | 0 478 | 0 479 | 1 480 | 0 481 | 0 482 | 0 483 | 0 484 | 0 485 | 0 486 | 0 487 | 0 488 | 1 489 | 1 490 | 0 491 | 0 492 | 0 493 | 1 494 | 0 495 | 0 496 | 1 497 | 0 498 | 0 499 | 0 500 | 0 501 | 0 502 | 0 503 | 0 504 | 0 505 | 1 506 | 1 507 | 0 508 | 0 509 | 1 510 | 0 511 | 0 512 | 0 513 | 0 514 | 0 515 | 0 516 | 0 517 | 0 518 | 0 519 | 0 520 | 1 521 | 1 522 | 1 523 | 0 524 | 0 525 | 0 526 | 0 527 | 1 528 | 1 529 | 1 530 | 0 531 | 1 532 | 0 533 | 1 534 | 1 535 | 1 536 | 0 537 | 0 538 | 0 539 | 0 540 | 1 541 | 0 542 | 0 543 | 0 544 | 0 545 | 0 546 | 1 547 | 1 548 | 0 549 | 1 550 | 0 551 | 1 552 | 0 553 | 0 554 | 1 555 | 0 556 | 0 557 | 0 558 | 0 559 | 0 560 | 1 561 | 0 562 | 0 563 | 0 564 | 0 565 | 1 566 | 0 567 | 0 568 | 0 569 | 0 570 | 0 571 | 0 572 | 0 573 | 0 574 | 0 575 | 0 576 | 0 577 | 0 578 | 0 579 | 0 580 | 1 581 | 0 582 | 0 583 | 0 584 | 0 585 | 0 586 | 0 587 | 0 588 | 0 589 | 1 590 | 0 591 | 0 592 | 0 593 | 0 594 | 0 595 | 0 596 | 0 597 | 0 598 | 0 599 | 1 600 | 0 601 | 0 602 | 0 603 | 0 604 | 0 605 | 0 606 | 0 607 | 1 608 | 0 609 | 0 610 | 0 611 | 0 612 | 0 613 | 0 614 | 0 615 | 0 616 | 0 617 | 0 618 | 0 619 | 0 620 | 1 621 | 1 622 | 0 623 | 0 624 | 0 625 | 0 626 | 0 627 | 0 628 | 0 629 | 0 630 | 0 631 | 0 632 | 0 633 | 1 634 | 0 635 | 0 636 | 0 637 | 0 638 | 0 639 | 0 640 | 0 641 | 0 642 | 0 643 | 0 644 | 0 645 | 0 646 | 0 647 | 0 648 | 0 649 | 1 650 | 0 651 | 0 652 | 1 653 | 0 654 | 0 655 | 0 656 | 0 657 | 0 658 | 0 659 | 0 660 | 0 661 | 0 662 | 0 663 | 1 664 | 0 665 | 1 666 | 0 667 | 0 668 | 0 669 | 0 670 | 0 671 | 0 672 | 0 673 | 0 674 | 1 675 | 0 676 | 0 677 | 0 678 | 0 679 | 0 680 | 0 681 | 0 682 | 1 683 | 0 684 | 0 685 | 0 686 | 0 687 | 0 688 | 0 689 | 0 690 | 0 691 | 0 692 | 0 693 | 0 694 | 0 695 | 0 696 | 0 697 | 0 698 | 0 699 | 0 700 | 0 701 | 1 702 | 0 703 | 0 704 | 0 705 | 0 706 | 0 707 | 1 708 | 0 709 | 0 710 | 0 711 | 0 712 | 1 713 | 0 714 | 1 715 | 0 716 | 0 717 | 1 718 | 0 719 | 0 720 | 0 721 | 0 722 | 1 723 | 0 724 | 0 725 | 0 726 | 0 727 | 0 728 | 0 729 | 1 730 | 1 731 | 0 732 | 0 733 | 0 734 | 1 735 | 0 736 | 0 737 | 1 738 | 1 739 | 1 740 | 0 741 | 0 742 | 0 743 | 1 744 | 0 745 | 0 746 | 0 747 | 0 748 | 0 749 | 1 750 | 0 751 | 1 752 | 0 753 | 1 754 | 1 755 | 1 756 | 0 757 | 0 758 | 0 759 | 0 760 | 0 761 | 1 762 | 1 763 | 0 764 | 0 765 | 0 766 | 1 767 | 0 768 | 0 769 | 0 770 | 0 771 | 1 772 | 0 773 | 0 774 | 1 775 | 0 776 | 0 777 | 0 778 | 0 779 | 1 780 | 0 781 | 0 782 | 0 783 | 0 784 | 0 785 | 0 786 | 0 787 | 0 788 | 0 789 | 0 790 | 0 791 | 0 792 | 0 793 | 0 794 | 0 795 | 1 796 | 0 797 | 0 798 | 0 799 | 0 800 | 1 801 | 0 802 | 0 803 | 0 804 | 0 805 | 1 806 | 1 807 | 0 808 | 0 809 | 0 810 | 1 811 | 0 812 | 0 813 | 0 814 | 0 815 | 0 816 | 0 817 | 0 818 | 0 819 | 0 820 | 0 821 | 0 822 | 0 823 | 0 824 | 0 825 | 0 826 | 1 827 | 0 828 | 1 829 | 0 830 | 0 831 | 0 832 | 1 833 | 0 834 | 0 835 | 0 836 | 0 837 | 0 838 | 0 839 | 0 840 | 0 841 | 0 842 | 0 843 | 0 844 | 0 845 | 1 846 | 0 847 | 1 848 | 0 849 | 0 850 | 1 851 | 0 852 | 1 853 | 0 854 | 0 855 | 0 856 | 0 857 | 0 858 | 0 859 | 0 860 | 1 861 | 0 862 | 1 863 | 0 864 | 0 865 | 0 866 | 0 867 | 0 868 | 0 869 | 0 870 | 0 871 | 0 872 | 0 873 | 0 874 | 0 875 | 0 876 | 0 877 | 0 878 | 0 879 | 0 880 | 0 881 | 0 882 | 0 883 | 1 884 | 1 885 | 0 886 | 0 887 | 0 888 | 0 889 | 0 890 | 0 891 | 0 892 | 0 893 | 1 894 | 0 895 | 1 896 | 0 897 | 0 898 | 1 899 | 0 900 | 0 901 | 0 902 | 0 903 | 0 904 | 0 905 | 1 906 | 1 907 | 0 908 | 1 909 | 0 910 | 0 911 | 0 912 | 0 913 | 0 914 | 0 915 | 0 916 | 0 917 | 0 918 | 0 919 | 0 920 | 0 921 | 0 922 | 0 923 | 0 924 | 1 925 | 0 926 | 0 927 | 0 928 | 0 929 | 0 930 | 0 931 | 0 932 | 0 933 | 0 934 | 0 935 | 0 936 | 0 937 | 0 938 | 0 939 | 0 940 | 0 941 | 0 942 | 0 943 | 0 944 | 0 945 | 0 946 | 0 947 | 0 948 | 0 949 | 0 950 | 0 951 | 0 952 | 0 953 | 0 954 | 0 955 | 0 956 | 0 957 | 0 958 | 0 959 | 0 960 | 0 961 | 0 962 | 0 963 | 0 964 | 0 965 | 0 966 | 0 967 | 0 968 | 0 969 | 1 970 | 0 971 | 0 972 | 0 973 | 0 974 | 0 975 | 0 976 | 0 977 | 0 978 | 0 979 | 0 980 | 0 981 | 0 982 | 0 983 | 0 984 | 0 985 | 0 986 | 0 987 | 0 988 | 0 989 | 0 990 | 0 991 | 0 992 | 0 993 | 0 994 | 0 995 | 0 996 | 0 997 | 0 998 | 0 999 | 0 1000 | 1 1001 | -------------------------------------------------------------------------------- /tools/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | utils.py 3 | 4 | Contains some useful functions for creating models 5 | """ 6 | from __future__ import absolute_import, division, print_function 7 | 8 | import numpy as np 9 | import os 10 | import pickle 11 | import random 12 | import tensorflow as tf 13 | import tensorflow.contrib.slim as slim 14 | import threading 15 | import concurrent.futures 16 | 17 | 18 | import init_paths 19 | import data.load_ops as load_ops 20 | from data.load_ops import create_input_placeholders_and_ops, get_filepaths_list 21 | import general_utils 22 | import optimizers.train_steps as train_steps 23 | import models.architectures as architectures 24 | 25 | from skimage import feature, transform 26 | import matplotlib.pyplot as plt 27 | 28 | def get_available_devices(): 29 | from tensorflow.python.client import device_lib 30 | gpu_options = tf.GPUOptions(allow_growth=True) 31 | sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) 32 | return device_lib.list_local_devices() 33 | 34 | def get_max_steps( num_samples_epoch, cfg , is_training=True): 35 | if cfg['num_epochs']: 36 | max_steps = num_samples_epoch * cfg['num_epochs'] // cfg['batch_size'] 37 | else: 38 | max_steps = None 39 | if not is_training: 40 | max_steps = num_samples_epoch // cfg['batch_size'] 41 | print( 'number of steps per epoch:', 42 | num_samples_epoch // cfg['batch_size'] ) 43 | print( 'max steps:', max_steps ) 44 | return max_steps 45 | 46 | def load_config( cfg_dir, nopause=False ): 47 | ''' 48 | Raises: 49 | FileNotFoundError if 'config.py' doesn't exist in cfg_dir 50 | ''' 51 | if not os.path.isfile( os.path.join( cfg_dir, 'config.py' ) ): 52 | raise ImportError( 'config.py not found in {0}'.format( cfg_dir ) ) 53 | import sys 54 | sys.path.insert( 0, cfg_dir ) 55 | from config import get_cfg 56 | cfg = get_cfg( nopause ) 57 | # cleanup 58 | try: 59 | del sys.modules[ 'config' ] 60 | except: 61 | pass 62 | sys.path.remove(cfg_dir) 63 | 64 | return cfg 65 | 66 | def print_start_info( cfg, max_steps, is_training=False ): 67 | model_type = 'training' if is_training else 'testing' 68 | print("--------------- begin {0} ---------------".format( model_type )) 69 | print('number of epochs', cfg['num_epochs']) 70 | print('batch size', cfg['batch_size']) 71 | print('total number of training steps:', max_steps) 72 | 73 | 74 | ################## 75 | # Model building 76 | ################## 77 | def create_init_fn( cfg, model ): 78 | # restore model 79 | if cfg['model_path'] is not None: 80 | print('******* USING SAVED MODEL *******') 81 | checkpoint_path = cfg['model_path'] 82 | model['model'].decoder 83 | # Create an initial assignment function. 84 | def InitAssignFn(sess): 85 | print('restoring model...') 86 | sess.run(init_assign_op, init_feed_dict) 87 | print('model restored') 88 | 89 | init_fn = InitAssignFn 90 | else: 91 | print('******* TRAINING FROM SCRATCH *******') 92 | init_fn = None 93 | return init_fn 94 | 95 | def setup_and_restore_model( sess, inputs, cfg, is_training=False ): 96 | model = setup_model( inputs, cfg, is_training=False ) 97 | model[ 'saver_op' ].restore( sess, cfg[ 'model_path' ] ) 98 | return model 99 | 100 | def setup_input( cfg, is_training=False, use_filename_queue=False ): 101 | ''' 102 | Builds input tensors from the config. 103 | ''' 104 | inputs = {} 105 | # Generate placeholder input tensors 106 | placeholders, batches, load_and_enqueue, enqueue_op = create_input_placeholders_and_ops( cfg ) 107 | 108 | input_batches = list( batches ) # [ inputs, targets, mask, data_idx ] 109 | 110 | inputs[ 'enqueue_op' ] = enqueue_op 111 | inputs[ 'load_and_enqueue' ] = load_and_enqueue 112 | inputs[ 'max_steps' ] = 6666 113 | inputs[ 'num_samples_epoch' ] = 6666 114 | 115 | inputs[ 'input_batches' ] = input_batches 116 | inputs[ 'input_batch' ] = input_batches[0] 117 | inputs[ 'target_batch' ] = input_batches[1] 118 | inputs[ 'mask_batch' ] = input_batches[2] 119 | inputs[ 'data_idxs' ] = input_batches[3] 120 | inputs[ 'placeholders' ] = placeholders 121 | inputs[ 'input_placeholder' ] = placeholders[0] 122 | inputs[ 'target_placeholder' ] = placeholders[1] 123 | inputs[ 'mask_placeholder' ] = placeholders[2] 124 | inputs[ 'data_idx_placeholder' ] = placeholders[3] 125 | return inputs 126 | 127 | 128 | def setup_model( inputs, cfg, is_training=False ): 129 | ''' 130 | Sets up the `model` dict, and instantiates a model in 'model', 131 | and then calls model['model'].build 132 | 133 | Args: 134 | inputs: A dict, the result of setup_inputs 135 | cfg: A dict from config.py 136 | is_training: Bool, used for batch norm and the like 137 | 138 | Returns: 139 | model: A dict with 'model': cfg['model_type']( cfg ), and other 140 | useful attributes like 'global_step' 141 | ''' 142 | validate_model( inputs, cfg ) 143 | model = {} 144 | model[ 'global_step' ] = slim.get_or_create_global_step() 145 | 146 | model[ 'input_batch' ] = tf.identity( inputs[ 'input_batch' ] ) 147 | if 'representation_batch' in inputs: 148 | model[ 'representation_batch' ] = tf.identity( inputs[ 'representation_batch' ] ) 149 | model[ 'target_batch' ] = tf.identity( inputs[ 'target_batch' ] ) 150 | model[ 'mask_batch' ] = tf.identity( inputs[ 'mask_batch' ] ) 151 | model[ 'data_idxs' ] = tf.identity( inputs[ 'data_idxs' ] ) 152 | 153 | # instantiate the model 154 | if cfg[ 'model_type' ] == 'empty': 155 | return model 156 | else: 157 | model[ 'model' ] = cfg[ 'model_type' ]( global_step=model[ 'global_step' ], cfg=cfg ) 158 | 159 | # build the model 160 | if 'representation_batch' in inputs: 161 | input_imgs = (inputs[ 'input_batch' ], inputs[ 'representation_batch' ]) 162 | else: 163 | input_imgs = inputs[ 'input_batch' ] 164 | model[ 'model' ].build_model( 165 | input_imgs=input_imgs, 166 | targets=inputs[ 'target_batch' ], 167 | masks=inputs[ 'mask_batch' ], 168 | is_training=is_training ) 169 | 170 | if is_training: 171 | model[ 'model' ].build_train_op( global_step=model[ 'global_step' ] ) 172 | model[ 'train_op' ] = model[ 'model' ].train_op 173 | model[ 'train_step_fn' ] = model[ 'model' ].get_train_step_fn() 174 | model[ 'train_step_kwargs' ] = train_steps.get_default_train_step_kwargs( 175 | global_step=model[ 'global_step' ], 176 | max_steps=inputs[ 'max_steps' ], 177 | log_every_n_steps=10 ) 178 | 179 | #model[ 'init_op' ] = model[ 'model' ].init_op 180 | if hasattr( model['model'], 'init_fn' ): 181 | model[ 'init_fn' ] = model['model'].init_fn 182 | else: 183 | model[ 'init_fn' ] = None 184 | 185 | max_to_keep = cfg['num_epochs'] * 2 186 | if 'max_ckpts_to_keep' in cfg: 187 | max_to_keep = cfg['max_ckpts_to_keep'] 188 | model[ 'saver_op' ] = tf.train.Saver(max_to_keep=max_to_keep) 189 | return model 190 | 191 | def validate_model( inputs, cfg ): 192 | general_utils.validate_config( cfg ) 193 | 194 | 195 | def plot(data, xi=None, cmap='RdBu_r', axis=plt, percentile=100, dilation=3.0, alpha=0.8): 196 | dx, dy = 0.05, 0.05 197 | xx = np.arange(0.0, data.shape[1], dx) 198 | yy = np.arange(0.0, data.shape[0], dy) 199 | xmin, xmax, ymin, ymax = np.amin(xx), np.amax(xx), np.amin(yy), np.amax(yy) 200 | extent = xmin, xmax, ymin, ymax 201 | cmap_xi = plt.get_cmap('Greys_r') 202 | cmap_xi.set_bad(alpha=0) 203 | overlay = None 204 | if xi is not None: 205 | # Compute edges (to overlay to heatmaps later) 206 | xi_greyscale = xi if len(xi.shape) == 2 else np.mean(xi, axis=-1) 207 | in_image_upscaled = transform.rescale(xi_greyscale, dilation, mode='constant') 208 | edges = feature.canny(in_image_upscaled).astype(float) 209 | edges[edges < 0.5] = np.nan 210 | edges[:5, :] = np.nan 211 | edges[-5:, :] = np.nan 212 | edges[:, :5] = np.nan 213 | edges[:, -5:] = np.nan 214 | overlay = edges 215 | 216 | abs_max = np.percentile(np.abs(data), percentile) 217 | abs_min = abs_max 218 | 219 | if len(data.shape) == 3: 220 | data = np.mean(data, 2) 221 | axis.imshow(data, extent=extent, interpolation='none', cmap=cmap, vmin=-abs_min, vmax=abs_max) 222 | if overlay is not None: 223 | axis.imshow(overlay, extent=extent, interpolation='none', cmap=cmap_xi, alpha=alpha) 224 | axis.axis('off') 225 | return axis 226 | 227 | 228 | 229 | --------------------------------------------------------------------------------