├── .gitattributes ├── .github └── ISSUE_TEMPLATE │ ├── bug_report.md │ ├── custom.md │ └── feature_request.md ├── LICENSE ├── README.md ├── SiamMask.py ├── automask.py ├── figures ├── install.gif └── showcase.gif ├── mask_spline.py ├── requirements.txt ├── trackers ├── SiamMask │ ├── net.py │ ├── resnet.py │ ├── siammask.py │ └── utils │ │ ├── __init__.py │ │ ├── anchors.py │ │ ├── bbox_helper.py │ │ ├── config_helper.py │ │ ├── load_helper.py │ │ ├── log_helper.py │ │ ├── tracker_config.py │ │ └── tracking_utils.py ├── THOR_modules │ ├── modules.py │ ├── utils.py │ └── wrapper.py ├── __init__.py └── tracker.py └── utils ├── __init__.py └── bbox_helper.py /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 1. Go to '...' 16 | 2. Click on '....' 17 | 3. Scroll down to '....' 18 | 4. See error 19 | 20 | **Expected behavior** 21 | A clear and concise description of what you expected to happen. 22 | 23 | **Screenshots** 24 | If applicable, add screenshots to help explain your problem. 25 | 26 | **Desktop (please complete the following information):** 27 | - OS: [e.g. Windows] 28 | - Parameters 29 | 30 | **Additional context** 31 | Add any other context about the problem here. 32 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/custom.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Custom issue template 3 | about: Describe this issue template's purpose here. 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | 11 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Lukas Blecher 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AutoMask - a Blender Add-on for automatic rotoscoping 2 | This repository contains a Blender 2.8x Add-on that can mask an object in the `Movie Clip Editor` with the help of Machine Learning. 3 | AutoMask is the communication between Blender and SiamMask. 4 | 5 | Here is an example for the usage of AutoMask. 6 | ![Example](https://github.com/lukas-blecher/AutoMask/blob/master/figures/showcase.gif?raw=true) 7 | 8 | ## Usage 9 | First select a bounding box of the object. Then hit one of the track buttons. 10 | The masks for every frame will be saved in a separate and new Mask Layer. If you are using the single step masking as a starting point for your mask you can also 11 | ### Parameters 12 | * **Max. Length** is the roughly the maximum amount of pixels a mask segment can trace. The lower this value is the closer the final mask will be to the network output. Bear in mind that the network output is by no means a perfect mask. But it can be a great starting point. 13 | * **Directions** is the amount of directions (of 8 possible) one mask segment can cover. 2 will produce more controll points but a closer match in the mask, but the [S-Curves ](https://docs.blender.org/manual/en/latest/movie_clip/masking/scurve.html) can also handle 3 different directions. 14 | * **Threshold** is the amount of pixels that can go in another direction than the rest of a given segment. 15 | 16 | 17 | ## Installation 18 | 1. Download the repository: 19 | Download the repository as `.zip` file and extract it or clone it to your computer. 20 | 2. Python: 21 | To use this Add-on, several 3rd party libraries are required (see `requirements.txt`). You can either install the dependencies to 22 | the Blender python or if python is already installed on the system and the python version is compatible with the Blender python version you can also install the dependencies on your system. 23 | 3. Adding the Project to the Python path: 24 | Open `automask.py` in your favorite text editor and replace `PYTHON_PATH` with the path to you python site-packages if needed and `PROJECT_DIR` with the path to the directory you downloaded this repository to. 25 | 4. Install Dependencies: 26 | * PyTorch 27 | The neural network that does the heavy lifting is written for [PyTorch](https://pytorch.org/). 28 | A Nvidia GPU is recommended but not necessarily required. However, it does speed up the process significantly. If you have a supported GPU install pytorch with GPU support. 29 | * Other Requirements 30 | Most of the required libraries are standard and easy to install. 31 | ```pip install -r requirements.txt``` 32 | 5. Model: 33 | The model weights for SiamMask can be downloaded from [http://www.robots.ox.ac.uk/~qwang/SiamMask_VOT.pth](http://www.robots.ox.ac.uk/~qwang/SiamMask_VOT.pth). Save the file to the subfolder `trackers/SiamMask` under the name `model.pth`. 34 | 6. Add to Blender: 35 | ![Installation](https://github.com/lukas-blecher/AutoMask/blob/master/figures/install.gif?raw=true) 36 | ## Acknowledgements 37 | [SiamMask](https://github.com/foolwood/SiamMask), [THOR](https://github.com/xl-sr/THOR) 38 | -------------------------------------------------------------------------------- /SiamMask.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import os 3 | import numpy as np 4 | import sys 5 | from PIL import Image 6 | 7 | from trackers.tracker import SiamMask_Tracker, cfg 8 | from mask_spline import mask2rect 9 | 10 | 11 | class ImageReader: 12 | is_video = True 13 | 14 | def __init__(self, movie_details, framenum): 15 | print(movie_details) 16 | if movie_details['source'] == 'SEQUENCE': 17 | self.is_video = False 18 | dirname = os.path.dirname(movie_details['path']) 19 | imgs = os.listdir(dirname) 20 | ind = imgs.index(os.path.basename(movie_details['path'])) 21 | self.imgs = [os.path.join(dirname, i) for i in imgs[ind:]] 22 | self.i = framenum-1 23 | else: 24 | self.vs = cv2.VideoCapture(movie_details['path']) 25 | self.vs.set(1, framenum-1) 26 | print(self.is_video) 27 | 28 | def read(self): 29 | if self.is_video: 30 | return self.vs.read() 31 | else: 32 | if self.i+1 > len(self.imgs): 33 | return False, cv2.imread(self.imgs[-1]) 34 | else: 35 | self.i += 1 36 | return True, cv2.imread(self.imgs[self.i-1]) 37 | 38 | 39 | def create_model(path): 40 | return SiamMask_Tracker(cfg, path) 41 | 42 | 43 | def track_object(model, state, mask, movie_details, framenum): 44 | vs = ImageReader(movie_details, framenum) 45 | ret, im = vs.read() 46 | im = im[..., :3] 47 | if type(state) == str: 48 | model = create_model(state) 49 | state = model.setup(im, *mask2rect(mask)) 50 | state['mask'] = mask 51 | if not ret: 52 | return None, state, model 53 | _, im = vs.read() 54 | im = im[..., :3] 55 | state = model.track(im, state) 56 | new_mask = state['mask'] > state['p'].seg_thr 57 | return new_mask, state, model 58 | -------------------------------------------------------------------------------- /automask.py: -------------------------------------------------------------------------------- 1 | 2 | from __future__ import print_function 3 | import bpy 4 | from bpy.types import Operator, Panel, PropertyGroup, WindowManager 5 | from bpy.props import PointerProperty, StringProperty, IntProperty, FloatProperty, BoolProperty 6 | import sys 7 | paths = [ 8 | #PYTHON_PATH, 9 | PROJECT_DIR 10 | ] 11 | for p in paths: 12 | sys.path.insert(0, p) 13 | from mask_spline import * 14 | from SiamMask import * 15 | 16 | bl_info = { 17 | 'blender': (2, 80, 0), 18 | 'name': 'AutoMask', 19 | 'category': 'Motion Tracking', 20 | 'location': 'Masking> Movie Clip Editor > AutoMask', 21 | 'author': 'Lukas Blecher' 22 | } 23 | 24 | 25 | class Settings(PropertyGroup): 26 | 27 | maxnum: IntProperty( 28 | name="Directions", 29 | description="The lower this value is the more points will be created", 30 | default=3, 31 | min=1, 32 | max=5 33 | ) 34 | 35 | maxlen: IntProperty( 36 | name="Max. Length", 37 | description="The maximum amount of pixels a mask line segment is tracing", 38 | default=150, 39 | min=1 40 | ) 41 | 42 | threshold: IntProperty( 43 | name="Treshold", 44 | description="The amount of points that can point in a different direction\nbefore a new segment is created", 45 | default=10, 46 | min=0 47 | ) 48 | 49 | my_float: FloatProperty( 50 | name="Float Value", 51 | description="A float property", 52 | default=23.7, 53 | min=0.01, 54 | max=30.0 55 | ) 56 | 57 | change_layer: BoolProperty( 58 | name="Change Layer", 59 | description="Change the active Mask Layer according to the frame\nwhen moving the along the timeline", 60 | default=True 61 | ) 62 | 63 | 64 | class AutoMask_helper: 65 | def big_trans(self, inv=False): 66 | return lambda x: x 67 | 68 | def small_trans(self, inv=False): 69 | frac = max(self.hw)/float(min(self.hw)) 70 | off = .5-1/(2.*frac) 71 | if not inv: 72 | return lambda x: (x-off)*frac 73 | else: 74 | return lambda x: (x/frac)+off 75 | 76 | def copy_point_attributes(self, point, new_point): 77 | attributes = ['co', 'handle_left', 'handle_left_type', 'handle_right', 'handle_right_type', 'handle_type', 'weight'] 78 | for attr in attributes: 79 | setattr(new_point, attr, getattr(point, attr)) 80 | 81 | def absolute_coord(self, coordinate): 82 | width, height = self.hw 83 | coord = coordinate.copy() 84 | return [self.xtrans(coord.x)*width, (1-self.ytrans(coord.y))*height] 85 | 86 | def relative_coord(self, coordinate): 87 | width, height = self.hw 88 | return [self.xinvt(coordinate[0]/float(width)), self.yinvt(1-(coordinate[1]/float(height)))] 89 | 90 | def set_coordinate_transform(self): 91 | if self.hw[0] < self.hw[1]: 92 | self.xtrans = self.small_trans() 93 | self.xinvt = self.small_trans(True) 94 | self.ytrans = self.big_trans() 95 | self.yinvt = self.big_trans() 96 | elif self.hw[0] > self.hw[1]: 97 | self.ytrans = self.small_trans() 98 | self.yinvt = self.small_trans(True) 99 | self.xtrans = self.big_trans() 100 | self.xinvt = self.big_trans() 101 | else: 102 | self.xtrans = self.big_trans() 103 | self.xinvt = self.big_trans() 104 | self.ytrans = self.big_trans() 105 | self.yinvt = self.big_trans() 106 | 107 | def hide_layer(self, layer, hide=True): 108 | layer.hide = hide 109 | layer.hide_render = hide 110 | layer.hide_select = hide 111 | layer.keyframe_insert('hide') 112 | layer.keyframe_insert('hide_render') 113 | layer.keyframe_insert('hide_select') 114 | 115 | def automask(self, context, model, state, movpath): 116 | mask = context.space_data.mask 117 | if mask is None: 118 | raise ValueError 119 | settings = context.scene.settings 120 | layer = mask.layers.active 121 | if layer is None: 122 | raise ValueError 123 | maskSplines = layer.splines 124 | co_tot, lhand_tot, rhand_tot = [], [], [] 125 | framenum = bpy.context.scene.frame_current 126 | try: 127 | _ = int(layer.name.split('.f')[-1]) 128 | except ValueError: 129 | # no frame identification in the masklayer name 130 | layer.name = layer.name + '.f%i' % framenum 131 | for i, maskSpline in enumerate(maskSplines): 132 | points = maskSpline.points 133 | maskSpline.use_cyclic = True 134 | co, lhand, rhand = [], [], [] 135 | for p in points: 136 | # need types to be free as it is the most general type 137 | p.handle_left_type = 'FREE' 138 | p.handle_right_type = 'FREE' 139 | co.append(self.absolute_coord(p.co)) 140 | lhand.append(self.absolute_coord(p.handle_left)) 141 | rhand.append(self.absolute_coord(p.handle_right)) 142 | # collection of coordinates and handles 143 | crl = [co, rhand, lhand] 144 | # get mask from the point coordinates 145 | curr_mask = crl2mask(crl, int(self.hw[0]), int(self.hw[1])) 146 | 147 | # load model 148 | next_mask, state, model = track_object(model, state, curr_mask, movpath, framenum) 149 | if next_mask is None: 150 | return {'CANCELLED'} 151 | # trace mask returned by SiamMask 152 | success, crl = fit2mask(next_mask, maxnum=settings.maxnum, threshold=settings.threshold, maxlen=settings.maxlen) 153 | success = success and state['score'] > .8 154 | if not success: 155 | return {'CANCELLED'} 156 | # save handle positions for each spline, so we can change the position later 157 | co, rh, lh = crl 158 | co_tot.append(co) 159 | rhand_tot.append(rh) 160 | lhand_tot.append(lh) 161 | #propagate in time 162 | bpy.ops.ed.undo_push() 163 | self.hide_layer(layer, False) 164 | name = '%s.f%i' % (layer.name.split('.f')[0], framenum+1) 165 | new_layer = mask.layers.get(name) 166 | if new_layer is None: 167 | new_layer = mask.layers.new(name=name) 168 | else: 169 | for spline in new_layer.splines: 170 | new_layer.splines.remove(spline) 171 | self.hide_layer(new_layer, True) 172 | mask.layers.active = new_layer 173 | bpy.ops.clip.change_frame(frame=framenum+1) 174 | self.hide_layer(layer, True) 175 | self.hide_layer(new_layer, False) 176 | for i in range(len(maskSplines)): 177 | maskSpline = new_layer.splines.new() 178 | points = maskSpline.points 179 | maskSpline.use_cyclic = True 180 | co, rh, lh = co_tot[i], rhand_tot[i], lhand_tot[i] 181 | # create points in the mask if needed 182 | N, newN = len(points), len(co) 183 | if newN > N: 184 | points.add(newN-N) 185 | for i in range(1, newN-N+1): 186 | self.copy_point_attributes(p, points[-i]) 187 | 188 | # change handles to the found optimum position 189 | for i, p in enumerate(points): 190 | p.co.x, p.co.y = self.relative_coord(co[i]) 191 | p.handle_left.x, p.handle_left.y = self.relative_coord(lh[i]) 192 | p.handle_right.x, p.handle_right.y = self.relative_coord(rh[i]) 193 | return model, state 194 | 195 | 196 | class OBJECT_OT_automask_single(Operator): 197 | bl_idname = "object.automask_single" 198 | bl_label = "" 199 | bl_description = "Track the selected mask \nto the next frame" 200 | 201 | def execute(self, context): 202 | clip = context.space_data.clip 203 | movpath = bpy.path.abspath(clip.filepath) 204 | movie_details = {'path': movpath, 'source': clip.source} 205 | if clip.source == 'SEQUENCE': 206 | movie_details['duration'] = clip.frame_duration 207 | amh = AutoMask_helper() 208 | amh.hw = clip.size 209 | amh.set_coordinate_transform() 210 | proj_dir = paths[-1] 211 | if proj_dir == '': 212 | raise ValueError('AutoMask path is empty.') 213 | state = proj_dir # set first state to proj_dir 214 | model = None 215 | ret = amh.automask(context, model, state, movie_details) 216 | if type(ret) == set: 217 | return ret 218 | del ret 219 | return {'FINISHED'} 220 | 221 | 222 | class OBJECT_OT_automask(Operator): 223 | bl_idname = "object.automask" 224 | bl_label = "" 225 | bl_description = "Track the selected mask\nunitl it is lost" 226 | 227 | _updating = False 228 | _calcs_done = True 229 | _timer = None 230 | 231 | def modal(self, context, event): 232 | if event.type in {'RIGHTMOUSE', 'ESC'}: 233 | self._calcs_done = True 234 | elif event.type == 'TIMER' and not self._updating and not self._calcs_done: 235 | self._updating = True 236 | frame_end = context.scene.frame_end 237 | if bpy.context.scene.frame_current < frame_end: 238 | try: 239 | ret = self.amh.automask(context, self.model, self.state, self.amh.movpath) 240 | except ValueError: 241 | self.report({'ERROR'}, 'No Mask is selected') 242 | return self.cancel(context) 243 | if type(ret) == set: 244 | self._calcs_done = True 245 | else: 246 | self.model = ret[0] 247 | self.state = ret[1] 248 | self._updating = False 249 | if self._calcs_done: 250 | return self.cancel(context) 251 | 252 | return {'PASS_THROUGH'} 253 | 254 | def execute(self, context): 255 | clip = context.space_data.clip 256 | self.amh = AutoMask_helper() 257 | movie_details = {'path': bpy.path.abspath(clip.filepath), 'source': clip.source} 258 | if clip.source == 'SEQUENCE': 259 | movie_details['duration'] = clip.frame_duration 260 | self.amh.movpath = movie_details 261 | self.amh.hw = clip.size 262 | self.amh.set_coordinate_transform() 263 | proj_dir = paths[-1] 264 | if proj_dir == '': 265 | raise ValueError('AutoMask path is empty.') 266 | self.state = proj_dir # set first state to proj_dir 267 | self.model = None 268 | self._calcs_done = False 269 | context.window_manager.modal_handler_add(self) 270 | self._updating = False 271 | self._timer = context.window_manager.event_timer_add(.05, window=context.window) 272 | return {'RUNNING_MODAL'} 273 | 274 | def cancel(self, context): 275 | if self._timer is not None: 276 | context.window_manager.event_timer_remove(self._timer) 277 | self._timer = None 278 | del self.model 279 | del self.state 280 | return {'CANCELLED'} 281 | 282 | 283 | def delete_layer_keyframes(layer): 284 | layer.keyframe_delete('hide') 285 | layer.keyframe_delete('hide_render') 286 | layer.keyframe_delete('hide_select') 287 | 288 | 289 | def clear_masks(context, forwards=True): 290 | f = context.scene.frame_current 291 | mask = context.space_data.mask 292 | layers = mask.layers 293 | for l in layers: 294 | try: 295 | l_num = int(l.name.split('.f')[-1]) 296 | except ValueError: 297 | continue 298 | if (forwards and f < l_num) or (not forwards and f > l_num): 299 | delete_layer_keyframes(l) 300 | layers.remove(l) 301 | 302 | 303 | def MaskLayerActivater(scene): 304 | if scene.settings.change_layer: 305 | f = scene.frame_current 306 | masks = bpy.data.masks 307 | for m in masks: 308 | layers = m.layers 309 | for l in layers: 310 | try: 311 | l_num = int(l.name.split('.f')[-1]) 312 | except ValueError: 313 | continue 314 | if f == l_num: 315 | layers.active = l 316 | break 317 | 318 | 319 | class OBJECT_OT_clear_forwards(Operator): 320 | bl_idname = "object.clear_forwards" 321 | bl_label = "" 322 | bl_description = "Delete all the masks after this frame" 323 | 324 | def execute(self, context): 325 | bpy.ops.ed.undo_push() 326 | clear_masks(context) 327 | MaskLayerActivater(context.scene) 328 | return {'FINISHED'} 329 | 330 | 331 | class OBJECT_OT_clear_backwards(Operator): 332 | bl_idname = "object.clear_backwards" 333 | bl_label = "" 334 | bl_description = "Delete all the masks before this frame" 335 | 336 | def execute(self, context): 337 | bpy.ops.ed.undo_push() 338 | clear_masks(context, False) 339 | MaskLayerActivater(context.scene) 340 | return {'FINISHED'} 341 | 342 | 343 | class PANEL0_PT_automask(Panel): 344 | bl_label = "Mask Tracking" 345 | bl_idname = "PANEL0_PT_automask" 346 | bl_space_type = 'CLIP_EDITOR' 347 | bl_region_type = 'UI' 348 | bl_category = "AutoMask" 349 | 350 | @classmethod 351 | def poll(cls, context): 352 | return (context.area.spaces.active.clip is not None) 353 | 354 | # Draw UI 355 | def draw(self, context): 356 | settings = context.scene.settings 357 | layout = self.layout 358 | layout.use_property_split = True # Active single-column layout 359 | # track masks operators 360 | c = layout.column() 361 | row = c.row() 362 | split = row.split(factor=0.3) 363 | c = split.column() 364 | c.label(text="Track:") 365 | split = split.split() 366 | c = split.row() 367 | c.operator("object.automask", icon="TRACKING_FORWARDS") 368 | c.operator("object.automask_single", icon="TRACKING_FORWARDS_SINGLE") 369 | # clear mask operators (crashes blender as of now) 370 | #c = layout.column() 371 | #row = c.row() 372 | #split = row.split(factor=0.3) 373 | #c = split.column() 374 | #c.label(text="Clear:") 375 | #split = split.split() 376 | #c = split.row() 377 | #c.operator("object.clear_backwards", icon="TRACKING_CLEAR_BACKWARDS") 378 | #c.operator("object.clear_forwards", icon="TRACKING_CLEAR_FORWARDS") 379 | row = layout.column() 380 | layout.prop(settings, 'maxlen') 381 | layout.prop(settings, 'threshold') 382 | layout.prop(settings, 'maxnum') 383 | layout.prop(settings, 'change_layer') 384 | 385 | layout.separator() 386 | 387 | 388 | classes = (OBJECT_OT_automask_single, OBJECT_OT_automask, OBJECT_OT_clear_forwards, OBJECT_OT_clear_backwards, PANEL0_PT_automask, Settings) 389 | 390 | 391 | def remove_handler(): 392 | my_handler_list = bpy.app.handlers.frame_change_pre 393 | fin = len(my_handler_list) 394 | for idx, func in enumerate(reversed(my_handler_list)): 395 | if func.__name__ == 'MaskLayerActivater': 396 | my_handler_list.pop(fin-1-idx) 397 | 398 | 399 | def register(): 400 | from bpy.utils import register_class 401 | for cls in classes: 402 | register_class(cls) 403 | bpy.types.Scene.settings = PointerProperty(type=Settings) 404 | remove_handler() 405 | bpy.app.handlers.frame_change_pre.append(MaskLayerActivater) 406 | 407 | 408 | def unregister(): 409 | from bpy.utils import unregister_class 410 | for cls in reversed(classes): 411 | unregister_class(cls) 412 | del bpy.types.Scene.settings 413 | remove_handler() 414 | 415 | 416 | if __name__ == "__main__": 417 | register() 418 | -------------------------------------------------------------------------------- /figures/install.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lukas-blecher/AutoMask/b301dd0d69c782a16f5ead1abc81a63f1be383f4/figures/install.gif -------------------------------------------------------------------------------- /figures/showcase.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lukas-blecher/AutoMask/b301dd0d69c782a16f5ead1abc81a63f1be383f4/figures/showcase.gif -------------------------------------------------------------------------------- /mask_spline.py: -------------------------------------------------------------------------------- 1 | from geomdl import BSpline 2 | from geomdl import utilities 3 | from geomdl import fitting 4 | from PIL import Image, ImageDraw 5 | from skimage import measure 6 | import numpy as np 7 | import logging 8 | logger = logging.getLogger('global') 9 | 10 | mapping = {0: np.array([-1., -1.]), 11 | 1: np.array([-1., 0.]), 12 | 2: np.array([0., -1.]), 13 | 3: np.array([-1., 1.]), 14 | 4: np.array([0., 0.]), 15 | 5: np.array([1., 1.]), 16 | 6: np.array([1., 0.]), 17 | 7: np.array([0., 1.]), 18 | 8: np.array([1., -1.])} 19 | 20 | 21 | def dir2num(x): 22 | length = np.sqrt((x**2).sum()) 23 | if length > 0: 24 | x = np.round(x/length) 25 | x = np.round(np.sign(x)) 26 | for i in range(9): 27 | if (x == mapping[i]).all(): 28 | return i 29 | return 4 30 | 31 | 32 | def bspline2mask(cps, width, height, delta=0.05, scaling=5): 33 | connecs = [] 34 | for i in range(len(cps)): 35 | curve = BSpline.Curve() 36 | curve.degree = 3 37 | curve.ctrlpts = cps[i] 38 | curve.knotvector = utilities.generate_knot_vector(curve.degree, len(curve.ctrlpts)) 39 | # print('delta',delta) 40 | curve.delta = delta 41 | curve.evaluate() 42 | connecs.append(curve.evalpts) 43 | 44 | polygon = np.array(connecs).flatten().tolist() 45 | img = Image.new('L', (width, height), 255) 46 | ImageDraw.Draw(img).polygon(polygon, outline=0, fill=0) 47 | mask = np.array(img.resize((width//scaling, height//scaling), Image.NEAREST)) 48 | return mask == False 49 | 50 | 51 | def crl2mask(crl, width, height, delta=.05, scaling=1): 52 | c, r, l = crl if type(crl) == list else crl.tolist() 53 | cps = [] 54 | for i in range(len(c)): 55 | ip = (i+1) % len(c) 56 | cps.append([c[i], r[i], l[ip], c[ip]]) 57 | return bspline2mask(cps, width, height, delta=delta, scaling=scaling) 58 | 59 | 60 | def mask2rect(mask): 61 | y, x = np.where(mask == 1) 62 | mi, ma = np.array((x.min(), y.min())), np.array((x.max(), y.max())) 63 | return (mi+ma)/2, (ma-mi) 64 | 65 | 66 | def make_cirular(cps, distance=5): 67 | # takes cps returns crl 68 | cps = np.array(cps) 69 | start, end = cps[:, 0, :], cps[:, -1, :] 70 | # first test for approximate circularity 71 | absmask = [np.abs(start[:, i].T[None, ...]-end[:, None, i]) for i in (0, 1)] 72 | for i in range(2): 73 | matrix = absmask[i] < distance 74 | assert matrix.sum(0).all() and matrix.sum(1).all(), 'Mask is not circular' 75 | e = np.arange(len(cps)) 76 | s = np.roll(e, -1) 77 | c = np.mean(np.array([end[e], start[s]]), axis=0) 78 | r = cps[s, 1, :] 79 | l = cps[e, 2, :] 80 | return np.array([c, r, l])[..., [1, 0]] 81 | 82 | 83 | def fit2mask(target, maxnum=4, distance=3, threshold=5, maxlen=150): 84 | contours = measure.find_contours(target, .8) 85 | # choose contour with highest point count 86 | c = contours[np.argmax([len(c) for c in contours])] 87 | # convert to directions and remove unnecessary points 88 | direction = [] 89 | last = c[0] 90 | del_inds = [] 91 | for i in range(-1, len(c)): 92 | number = dir2num(last-c[i]) 93 | if number == 4: 94 | del_inds.append(i % len(c)) 95 | continue 96 | direction.append(number) 97 | last = c[i] 98 | c = np.delete(c, del_inds, axis=0) 99 | direction = np.array(direction) 100 | # split curve into segments 101 | breaks = [0] 102 | count, i = 0, 0 103 | max_pixel = len(direction) 104 | while count < max_pixel: 105 | i = count % len(direction) 106 | if i >= breaks[-1]: 107 | dirs = direction[breaks[-1]:i] 108 | else: 109 | dirs = direction[np.concatenate([np.arange(breaks[-1], len(direction)), np.arange(i)])] 110 | bindirs = np.bincount(dirs) 111 | difdir = np.diff(dirs) 112 | if (np.diff(np.where(difdir != 0)[0]) > threshold).sum() > (maxnum-2) or (len(np.unique(bindirs)) > maxnum and sorted(bindirs)[-maxnum-1] >= threshold): 113 | delta = (difdir[-threshold-1:] != 0).sum() 114 | new_break = (i-1-delta) % len(direction) 115 | i -= delta 116 | if breaks[0] == 0: 117 | breaks[0] = new_break 118 | max_pixel += new_break 119 | else: 120 | breaks.append(new_break) 121 | elif i-breaks[-1] >= maxlen and breaks[0] != 0: 122 | breaks.append((i-1) % len(direction)) 123 | count += 1 124 | # refine break points to alway have 4 or more points for fitting reasons 125 | perm = np.argsort(breaks) 126 | diffs = np.diff([*sorted(breaks), len(direction)+min(breaks)]) 127 | if np.count_nonzero(diffs < 4): 128 | bad_inds = np.where(diffs < 4)[0] 129 | for j in bad_inds: 130 | breaks[perm[(j+1) % len(perm)]] -= int(round(diffs[j]/2)) 131 | for j in sorted(perm[bad_inds])[::-1]: 132 | del breaks[j] 133 | # sort points into found segements 134 | segments = [] 135 | split_ind = np.split(np.arange(len(direction)), sorted(breaks)) 136 | split_ind[0] = np.concatenate((split_ind[-1], split_ind[0])) 137 | del split_ind[-1] 138 | for ind in split_ind: 139 | segments.append(c[ind % len(c)]) 140 | succ = True 141 | crl = None 142 | try: 143 | # check that we have all points 144 | assert sum([len(s) for s in segments]) >= len(c), '%i points were given instead of %i' % (sum([len(s) for s in segments]), len(c)) 145 | # fit bspline curves to the segments 146 | final_cps = [] 147 | for i in range(len(segments)): 148 | points = segments[i].tolist() 149 | if len(points) == 0: 150 | continue 151 | assert len(points) >= 4, "%i Points to fit were given. At least 4 points are needed." % len(points) 152 | curve = fitting.approximate_curve(points, 3, ctrlpts_size=4) 153 | final_cps.append(curve.ctrlpts) 154 | 155 | crl = make_cirular(final_cps, distance).tolist() 156 | except AssertionError as e: 157 | succ = False 158 | logger.info('No approximation to the mask could be found. Try again with other parameters. %s' % e) 159 | return succ, crl 160 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | scipy>=1.2.1 2 | scikit_image>=0.14.2 3 | opencv_python_headless>=4.1.1.26 4 | imutils>=0.5.3 5 | geomdl>=5.2.9 6 | torch>=1.3.1 7 | numpy>=1.18.0 8 | Pillow>=6.2.1 9 | scikit-image 10 | -------------------------------------------------------------------------------- /trackers/SiamMask/net.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from .utils.load_helper import load_pretrain 6 | from .utils.anchors import Anchors 7 | from .resnet import resnet50 8 | 9 | # basic model 10 | 11 | class SiamMask(nn.Module): 12 | def __init__(self, anchors=None, o_sz=127, g_sz=127): 13 | super(SiamMask, self).__init__() 14 | self.anchors = anchors # anchor_cfg 15 | self.anchor_num = len(self.anchors["ratios"]) * len(self.anchors["scales"]) 16 | self.anchor = Anchors(anchors) 17 | self.features = None 18 | self.rpn_model = None 19 | self.mask_model = None 20 | self.o_sz = o_sz 21 | self.g_sz = g_sz 22 | self.all_anchors = None 23 | 24 | def feature_extractor(self, x): 25 | return self.features(x) 26 | 27 | def rpn(self, template, search): 28 | pred_cls, pred_loc = self.rpn_model(template, search) 29 | return pred_cls, pred_loc 30 | 31 | def mask(self, template, search): 32 | pred_mask = self.mask_model(template, search) 33 | return pred_mask 34 | 35 | def template(self, z): 36 | self.zf = self.feature_extractor(z) 37 | cls_kernel, loc_kernel = self.rpn_model.template(self.zf) 38 | return cls_kernel, loc_kernel 39 | 40 | def track(self, x, cls_kernel=None, loc_kernel=None, softmax=False): 41 | xf = self.feature_extractor(x) 42 | rpn_pred_cls, rpn_pred_loc = self.rpn_model.track(xf, cls_kernel, loc_kernel) 43 | if softmax: 44 | rpn_pred_cls = self.softmax(rpn_pred_cls) 45 | return rpn_pred_cls, rpn_pred_loc 46 | 47 | # rpn 48 | 49 | class RPN(nn.Module): 50 | def __init__(self): 51 | super(RPN, self).__init__() 52 | 53 | def forward(self, z_f, x_f): 54 | raise NotImplementedError 55 | 56 | def template(self, template): 57 | raise NotImplementedError 58 | 59 | def track(self, search): 60 | raise NotImplementedError 61 | 62 | def conv2d_dw_group(x, kernel): 63 | batch, channel = kernel.shape[:2] 64 | ## WRAPPER: changed, otherwise it does not work with batches 65 | # x = x.view(1, batch*channel, x.size(2), x.size(3)) # 1 * (b*c) * k * k 66 | x = x.expand(batch, *x.shape[1:]) 67 | x = x.contiguous().view(1, batch*channel, x.size(2), x.size(3)) # 1 * (b*c) * k * k 68 | kernel = kernel.view(batch*channel, 1, kernel.size(2), kernel.size(3)) # (b*c) * 1 * H * W 69 | out = F.conv2d(x, kernel, groups=batch*channel) 70 | out = out.view(batch, channel, out.size(2), out.size(3)) 71 | 72 | return out 73 | 74 | class DepthCorr(nn.Module): 75 | def __init__(self, in_channels, hidden, out_channels, kernel_size=3): 76 | super(DepthCorr, self).__init__() 77 | # adjust layer for asymmetrical features 78 | self.conv_kernel = nn.Sequential( 79 | nn.Conv2d(in_channels, hidden, kernel_size=kernel_size, bias=False), 80 | nn.BatchNorm2d(hidden), 81 | nn.ReLU(inplace=True), 82 | ) 83 | self.conv_search = nn.Sequential( 84 | nn.Conv2d(in_channels, hidden, kernel_size=kernel_size, bias=False), 85 | nn.BatchNorm2d(hidden), 86 | nn.ReLU(inplace=True), 87 | ) 88 | 89 | self.head = nn.Sequential( 90 | nn.Conv2d(hidden, hidden, kernel_size=1, bias=False), 91 | nn.BatchNorm2d(hidden), 92 | nn.ReLU(inplace=True), 93 | nn.Conv2d(hidden, out_channels, kernel_size=1) 94 | ) 95 | 96 | def forward_corr(self, kernel, input): 97 | kernel = self.conv_kernel(kernel) 98 | input = self.conv_search(input) 99 | feature = conv2d_dw_group(input, kernel) 100 | return feature 101 | 102 | def forward(self, kernel, search): 103 | feature = self.forward_corr(kernel, search) 104 | out = self.head(feature) 105 | return out 106 | 107 | # mask 108 | 109 | class Mask(nn.Module): 110 | def __init__(self): 111 | super(Mask, self).__init__() 112 | 113 | def forward(self, z_f, x_f): 114 | raise NotImplementedError 115 | 116 | def template(self, template): 117 | raise NotImplementedError 118 | 119 | def track(self, search): 120 | raise NotImplementedError 121 | 122 | # additional modules 123 | 124 | class Features(nn.Module): 125 | def __init__(self): 126 | super(Features, self).__init__() 127 | self.feature_size = -1 128 | 129 | def forward(self, x): 130 | raise NotImplementedError 131 | 132 | class ResDownS(nn.Module): 133 | def __init__(self, inplane, outplane): 134 | super(ResDownS, self).__init__() 135 | self.downsample = nn.Sequential( 136 | nn.Conv2d(inplane, outplane, kernel_size=1, bias=False), 137 | nn.BatchNorm2d(outplane)) 138 | 139 | def forward(self, x): 140 | x = self.downsample(x) 141 | if x.size(3) < 20: 142 | l, r = 4, -4 143 | x = x[:, :, l:r, l:r] 144 | return x 145 | 146 | class ResDown(Features): 147 | def __init__(self, pretrain=False): 148 | super(ResDown, self).__init__() 149 | self.features = resnet50(layer3=True, layer4=False) 150 | if pretrain: 151 | load_pretrain(self.features, 'resnet.model') 152 | 153 | self.downsample = ResDownS(1024, 256) 154 | 155 | def forward(self, x): 156 | output = self.features(x) 157 | p3 = self.downsample(output[-1]) 158 | return p3 159 | 160 | def forward_all(self, x): 161 | output = self.features(x) 162 | p3 = self.downsample(output[-1]) 163 | return output, p3 164 | 165 | class UP(RPN): 166 | def __init__(self, anchor_num=5, feature_in=256, feature_out=256): 167 | super(UP, self).__init__() 168 | 169 | self.anchor_num = anchor_num 170 | self.feature_in = feature_in 171 | self.feature_out = feature_out 172 | 173 | self.cls_output = 2 * self.anchor_num 174 | self.loc_output = 4 * self.anchor_num 175 | 176 | self.cls = DepthCorr(feature_in, feature_out, self.cls_output) 177 | self.loc = DepthCorr(feature_in, feature_out, self.loc_output) 178 | 179 | def forward(self, z_f, x_f): 180 | cls = self.cls(z_f, x_f) 181 | loc = self.loc(z_f, x_f) 182 | return cls, loc 183 | 184 | class MaskCorr(Mask): 185 | def __init__(self, oSz=63): 186 | super(MaskCorr, self).__init__() 187 | self.oSz = oSz 188 | self.mask = DepthCorr(256, 256, self.oSz**2) 189 | 190 | def forward(self, z, x): 191 | return self.mask(z, x) 192 | 193 | class Refine(nn.Module): 194 | def __init__(self): 195 | """ 196 | Mask refinement module 197 | Please refer SiamMask (Appendix A) 198 | https://arxiv.org/abs/1812.05050 199 | """ 200 | super(Refine, self).__init__() 201 | self.v0 = nn.Sequential(nn.Conv2d(64, 16, 3, padding=1), nn.ReLU(), 202 | nn.Conv2d(16, 4, 3, padding=1), nn.ReLU()) 203 | 204 | self.v1 = nn.Sequential(nn.Conv2d(256, 64, 3, padding=1), nn.ReLU(), 205 | nn.Conv2d(64, 16, 3, padding=1), nn.ReLU()) 206 | 207 | self.v2 = nn.Sequential(nn.Conv2d(512, 128, 3, padding=1), nn.ReLU(), 208 | nn.Conv2d(128, 32, 3, padding=1), nn.ReLU()) 209 | 210 | self.h2 = nn.Sequential(nn.Conv2d(32, 32, 3, padding=1), nn.ReLU(), 211 | nn.Conv2d(32, 32, 3, padding=1), nn.ReLU()) 212 | 213 | self.h1 = nn.Sequential(nn.Conv2d(16, 16, 3, padding=1), nn.ReLU(), 214 | nn.Conv2d(16, 16, 3, padding=1), nn.ReLU()) 215 | 216 | self.h0 = nn.Sequential(nn.Conv2d(4, 4, 3, padding=1), nn.ReLU(), 217 | nn.Conv2d(4, 4, 3, padding=1), nn.ReLU()) 218 | 219 | self.deconv = nn.ConvTranspose2d(256, 32, 15, 15) 220 | 221 | self.post0 = nn.Conv2d(32, 16, 3, padding=1) 222 | self.post1 = nn.Conv2d(16, 4, 3, padding=1) 223 | self.post2 = nn.Conv2d(4, 1, 3, padding=1) 224 | 225 | def forward(self, f, corr_feature, pos=None): 226 | pos = [int(i) for i in pos] 227 | p0 = torch.nn.functional.pad(f[0], [16,16,16,16])[:, :, 4*pos[0]:4*pos[0]+61, 4*pos[1]:4*pos[1]+61] 228 | p1 = torch.nn.functional.pad(f[1], [8,8,8,8])[:, :, 2*pos[0]:2*pos[0]+31, 2*pos[1]:2*pos[1]+31] 229 | p2 = torch.nn.functional.pad(f[2], [4,4,4,4])[:, :, pos[0]:pos[0]+15, pos[1]:pos[1]+15] 230 | 231 | p3 = corr_feature[:, :, pos[0], pos[1]].view(-1, 256, 1, 1) 232 | 233 | out = self.deconv(p3) 234 | out = self.post0(F.interpolate(self.h2(out) + self.v2(p2), size=(31, 31))) 235 | out = self.post1(F.interpolate(self.h1(out) + self.v1(p1), size=(61, 61))) 236 | out = self.post2(F.interpolate(self.h0(out) + self.v0(p0), size=(127, 127))) 237 | out = out.view(-1, 127*127) 238 | return out 239 | 240 | # final siammask model 241 | 242 | class SiamMaskCustom(SiamMask): 243 | def __init__(self, pretrain=False, **kwargs): 244 | super(SiamMaskCustom, self).__init__(**kwargs) 245 | self.features = ResDown(pretrain=pretrain) 246 | self.rpn_model = UP(anchor_num=self.anchor_num, feature_in=256, feature_out=256) 247 | self.mask_model = MaskCorr() 248 | self.refine_model = Refine() 249 | self.best_temp = 0 250 | 251 | def refine(self, f, pos=None): 252 | return self.refine_model(f, pos) 253 | 254 | def template(self, template): 255 | self.zf = self.features(template) 256 | return self.zf 257 | 258 | def track(self, search): 259 | search = self.features(search) 260 | rpn_pred_cls, rpn_pred_loc = self.rpn(self.zf, search) 261 | return rpn_pred_cls, rpn_pred_loc 262 | 263 | def track_mask(self, search): 264 | self.feature, self.search = self.features.forward_all(search) 265 | rpn_pred_cls, rpn_pred_loc = self.rpn(self.zf, self.search) 266 | self.corr_feature = self.mask_model.mask.forward_corr(self.zf, self.search) 267 | pred_mask = self.mask_model.mask.head(self.corr_feature) 268 | return rpn_pred_cls, rpn_pred_loc, pred_mask 269 | 270 | def track_refine(self, pos): 271 | ### WRAPPER 272 | self.corr_feature = self.corr_feature[self.best_temp].unsqueeze(0) 273 | ### 274 | pred_mask = self.refine_model(self.feature, self.corr_feature, pos=pos) 275 | return pred_mask 276 | -------------------------------------------------------------------------------- /trackers/SiamMask/resnet.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # SiamMask 3 | # Licensed under The MIT License 4 | # Written by Qiang Wang (wangqiang2015 at ia.ac.cn) 5 | # -------------------------------------------------------- 6 | 7 | import torch.nn as nn 8 | import torch 9 | from torch.autograd import Variable 10 | import math 11 | import torch.utils.model_zoo as model_zoo 12 | 13 | __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 14 | 'resnet152'] 15 | 16 | 17 | model_urls = { 18 | 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', 19 | 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', 20 | 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', 21 | 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', 22 | 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', 23 | } 24 | 25 | class Features(nn.Module): 26 | def __init__(self): 27 | super(Features, self).__init__() 28 | self.feature_size = -1 29 | 30 | def forward(self, x): 31 | raise NotImplementedError 32 | 33 | def conv3x3(in_planes, out_planes, stride=1): 34 | "3x3 convolution with padding" 35 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 36 | padding=1, bias=False) 37 | 38 | 39 | class BasicBlock(nn.Module): 40 | expansion = 1 41 | 42 | def __init__(self, inplanes, planes, stride=1, downsample=None): 43 | super(BasicBlock, self).__init__() 44 | self.conv1 = conv3x3(inplanes, planes, stride) 45 | self.bn1 = nn.BatchNorm2d(planes) 46 | self.relu = nn.ReLU(inplace=True) 47 | self.conv2 = conv3x3(planes, planes) 48 | self.bn2 = nn.BatchNorm2d(planes) 49 | self.downsample = downsample 50 | self.stride = stride 51 | 52 | def forward(self, x): 53 | residual = x 54 | 55 | out = self.conv1(x) 56 | out = self.bn1(out) 57 | out = self.relu(out) 58 | 59 | out = self.conv2(out) 60 | out = self.bn2(out) 61 | 62 | if self.downsample is not None: 63 | residual = self.downsample(x) 64 | 65 | out += residual 66 | out = self.relu(out) 67 | 68 | return out 69 | 70 | 71 | class Bottleneck(Features): 72 | expansion = 4 73 | 74 | def __init__(self, inplanes, planes, stride=1, downsample=None, dilation=1): 75 | super(Bottleneck, self).__init__() 76 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 77 | self.bn1 = nn.BatchNorm2d(planes) 78 | # padding = (2 - stride) + (dilation // 2 - 1) 79 | padding = 2 - stride 80 | assert stride==1 or dilation==1, "stride and dilation must have one equals to zero at least" 81 | if dilation > 1: 82 | padding = dilation 83 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 84 | padding=padding, bias=False, dilation=dilation) 85 | self.bn2 = nn.BatchNorm2d(planes) 86 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 87 | self.bn3 = nn.BatchNorm2d(planes * 4) 88 | self.relu = nn.ReLU(inplace=True) 89 | self.downsample = downsample 90 | self.stride = stride 91 | 92 | def forward(self, x): 93 | residual = x 94 | 95 | out = self.conv1(x) 96 | out = self.bn1(out) 97 | out = self.relu(out) 98 | 99 | out = self.conv2(out) 100 | out = self.bn2(out) 101 | out = self.relu(out) 102 | 103 | out = self.conv3(out) 104 | out = self.bn3(out) 105 | 106 | if self.downsample is not None: 107 | residual = self.downsample(x) 108 | 109 | if out.size() != residual.size(): 110 | print(out.size(), residual.size()) 111 | out += residual 112 | 113 | out = self.relu(out) 114 | 115 | return out 116 | 117 | 118 | 119 | class Bottleneck_nop(nn.Module): 120 | expansion = 4 121 | 122 | def __init__(self, inplanes, planes, stride=1, downsample=None): 123 | super(Bottleneck_nop, self).__init__() 124 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 125 | self.bn1 = nn.BatchNorm2d(planes) 126 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 127 | padding=0, bias=False) 128 | self.bn2 = nn.BatchNorm2d(planes) 129 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 130 | self.bn3 = nn.BatchNorm2d(planes * 4) 131 | self.relu = nn.ReLU(inplace=True) 132 | self.downsample = downsample 133 | self.stride = stride 134 | 135 | def forward(self, x): 136 | residual = x 137 | 138 | out = self.conv1(x) 139 | out = self.bn1(out) 140 | out = self.relu(out) 141 | 142 | out = self.conv2(out) 143 | out = self.bn2(out) 144 | out = self.relu(out) 145 | 146 | out = self.conv3(out) 147 | out = self.bn3(out) 148 | 149 | if self.downsample is not None: 150 | residual = self.downsample(x) 151 | 152 | s = residual.size(3) 153 | residual = residual[:, :, 1:s-1, 1:s-1] 154 | 155 | out += residual 156 | out = self.relu(out) 157 | 158 | return out 159 | 160 | 161 | class ResNet(nn.Module): 162 | 163 | def __init__(self, block, layers, layer4=False, layer3=False): 164 | self.inplanes = 64 165 | super(ResNet, self).__init__() 166 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=0, # 3 167 | bias=False) 168 | self.bn1 = nn.BatchNorm2d(64) 169 | self.relu = nn.ReLU(inplace=True) 170 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 171 | self.layer1 = self._make_layer(block, 64, layers[0]) 172 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) # 31x31, 15x15 173 | 174 | self.feature_size = 128 * block.expansion 175 | 176 | if layer3: 177 | self.layer3 = self._make_layer(block, 256, layers[2], stride=1, dilation=2) # 15x15, 7x7 178 | self.feature_size = (256 + 128) * block.expansion 179 | else: 180 | self.layer3 = lambda x:x # identity 181 | 182 | if layer4: 183 | self.layer4 = self._make_layer(block, 512, layers[3], stride=1, dilation=4) # 7x7, 3x3 184 | self.feature_size = 512 * block.expansion 185 | else: 186 | self.layer4 = lambda x:x # identity 187 | 188 | for m in self.modules(): 189 | if isinstance(m, nn.Conv2d): 190 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 191 | m.weight.data.normal_(0, math.sqrt(2. / n)) 192 | elif isinstance(m, nn.BatchNorm2d): 193 | m.weight.data.fill_(1) 194 | m.bias.data.zero_() 195 | 196 | def _make_layer(self, block, planes, blocks, stride=1, dilation=1): 197 | downsample = None 198 | dd = dilation 199 | if stride != 1 or self.inplanes != planes * block.expansion: 200 | if stride == 1 and dilation == 1: 201 | downsample = nn.Sequential( 202 | nn.Conv2d(self.inplanes, planes * block.expansion, 203 | kernel_size=1, stride=stride, bias=False), 204 | nn.BatchNorm2d(planes * block.expansion), 205 | ) 206 | else: 207 | if dilation > 1: 208 | dd = dilation // 2 209 | padding = dd 210 | else: 211 | dd = 1 212 | padding = 0 213 | downsample = nn.Sequential( 214 | nn.Conv2d(self.inplanes, planes * block.expansion, 215 | kernel_size=3, stride=stride, bias=False, 216 | padding=padding, dilation=dd), 217 | nn.BatchNorm2d(planes * block.expansion), 218 | ) 219 | 220 | layers = [] 221 | # layers.append(block(self.inplanes, planes, stride, downsample, dilation=dilation)) 222 | layers.append(block(self.inplanes, planes, stride, downsample, dilation=dd)) 223 | self.inplanes = planes * block.expansion 224 | for i in range(1, blocks): 225 | layers.append(block(self.inplanes, planes, dilation=dilation)) 226 | 227 | return nn.Sequential(*layers) 228 | 229 | def forward(self, x): 230 | x = self.conv1(x) 231 | x = self.bn1(x) 232 | p0 = self.relu(x) 233 | x = self.maxpool(p0) 234 | 235 | p1 = self.layer1(x) 236 | p2 = self.layer2(p1) 237 | p3 = self.layer3(p2) 238 | 239 | return p0, p1, p2, p3 240 | 241 | 242 | class ResAdjust(nn.Module): 243 | def __init__(self, 244 | block=Bottleneck, 245 | out_channels=256, 246 | adjust_number=1, 247 | fuse_layers=[2,3,4]): 248 | super(ResAdjust, self).__init__() 249 | self.fuse_layers = set(fuse_layers) 250 | 251 | if 2 in self.fuse_layers: 252 | self.layer2 = self._make_layer(block, 128, 1, out_channels, adjust_number) 253 | if 3 in self.fuse_layers: 254 | self.layer3 = self._make_layer(block, 256, 2, out_channels, adjust_number) 255 | if 4 in self.fuse_layers: 256 | self.layer4 = self._make_layer(block, 512, 4, out_channels, adjust_number) 257 | 258 | self.feature_size = out_channels * len(self.fuse_layers) 259 | 260 | 261 | def _make_layer(self, block, plances, dilation, out, number=1): 262 | 263 | layers = [] 264 | 265 | for _ in range(number): 266 | layer = block(plances * block.expansion, plances, dilation=dilation) 267 | layers.append(layer) 268 | 269 | downsample = nn.Sequential( 270 | nn.Conv2d(plances * block.expansion, out, kernel_size=3, padding=1, bias=False), 271 | nn.BatchNorm2d(out) 272 | ) 273 | layers.append(downsample) 274 | 275 | return nn.Sequential(*layers) 276 | 277 | def forward(self, p2, p3, p4): 278 | 279 | outputs = [] 280 | 281 | if 2 in self.fuse_layers: 282 | outputs.append(self.layer2(p2)) 283 | if 3 in self.fuse_layers: 284 | outputs.append(self.layer3(p3)) 285 | if 4 in self.fuse_layers: 286 | outputs.append(self.layer4(p4)) 287 | # return torch.cat(outputs, 1) 288 | return outputs 289 | 290 | 291 | def resnet18(pretrained=False, **kwargs): 292 | """Constructs a ResNet-18 model. 293 | 294 | Args: 295 | pretrained (bool): If True, returns a model pre-trained on ImageNet 296 | """ 297 | model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs) 298 | if pretrained: 299 | model.load_state_dict(model_zoo.load_url(model_urls['resnet18'])) 300 | return model 301 | 302 | 303 | def resnet34(pretrained=False, **kwargs): 304 | """Constructs a ResNet-34 model. 305 | 306 | Args: 307 | pretrained (bool): If True, returns a model pre-trained on ImageNet 308 | """ 309 | model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs) 310 | if pretrained: 311 | model.load_state_dict(model_zoo.load_url(model_urls['resnet34'])) 312 | return model 313 | 314 | 315 | def resnet50(pretrained=False, **kwargs): 316 | """Constructs a ResNet-50 model. 317 | 318 | Args: 319 | pretrained (bool): If True, returns a model pre-trained on ImageNet 320 | """ 321 | model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) 322 | if pretrained: 323 | model.load_state_dict(model_zoo.load_url(model_urls['resnet50'])) 324 | return model 325 | 326 | 327 | def resnet101(pretrained=False, **kwargs): 328 | """Constructs a ResNet-101 model. 329 | 330 | Args: 331 | pretrained (bool): If True, returns a model pre-trained on ImageNet 332 | """ 333 | model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) 334 | if pretrained: 335 | model.load_state_dict(model_zoo.load_url(model_urls['resnet101'])) 336 | return model 337 | 338 | 339 | def resnet152(pretrained=False, **kwargs): 340 | """Constructs a ResNet-152 model. 341 | 342 | Args: 343 | pretrained (bool): If True, returns a model pre-trained on ImageNet 344 | """ 345 | model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs) 346 | if pretrained: 347 | model.load_state_dict(model_zoo.load_url(model_urls['resnet152'])) 348 | return model 349 | 350 | if __name__ == '__main__': 351 | net = resnet50() 352 | print(net) 353 | net = net.cuda() 354 | 355 | var = torch.FloatTensor(1,3,127,127).cuda() 356 | var = Variable(var) 357 | 358 | net(var) 359 | print('*************') 360 | var = torch.FloatTensor(1,3,255,255).cuda() 361 | var = Variable(var) 362 | 363 | net(var) 364 | 365 | -------------------------------------------------------------------------------- /trackers/SiamMask/siammask.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # SiamMask 3 | # Licensed under The MIT License 4 | # Written by Qiang Wang (wangqiang2015 at ia.ac.cn) 5 | # Revised for THOR by Axel Sauer (axel.sauer@tum.de) 6 | # -------------------------------------------------------- 7 | 8 | from __future__ import division 9 | import argparse 10 | import logging 11 | import numpy as np 12 | import cv2 13 | from PIL import Image 14 | from os import makedirs 15 | from os.path import join, isdir, isfile 16 | 17 | import torch 18 | from torch.autograd import Variable 19 | import torch.nn.functional as F 20 | 21 | # relative imports 22 | from .utils.log_helper import init_log, add_file_handler 23 | from .utils.bbox_helper import get_axis_aligned_bbox, cxy_wh_2_rect 24 | from .utils.anchors import Anchors, generate_anchor 25 | from .utils.tracker_config import TrackerConfig 26 | from .utils.tracking_utils import get_subwindow_tracking 27 | 28 | def SiamMask_init(im, target_pos, target_sz, model, hp=None): 29 | state = dict() 30 | state['im_h'] = im.shape[0] 31 | state['im_w'] = im.shape[1] 32 | 33 | p = TrackerConfig() 34 | p.update(hp, model.anchors) 35 | p.renew() 36 | 37 | p.scales = model.anchors['scales'] 38 | p.ratios = model.anchors['ratios'] 39 | p.anchor_num = len(p.ratios) * len(p.scales) 40 | p.anchor = generate_anchor(model.anchors, p.score_size) 41 | 42 | avg_chans = np.mean(im, axis=(0, 1)) 43 | 44 | if p.windowing == 'cosine': 45 | window = np.outer(np.hanning(p.score_size), np.hanning(p.score_size)) 46 | elif p.windowing == 'uniform': 47 | window = np.ones((p.score_size, p.score_size)) 48 | window = np.tile(window.flatten(), p.anchor_num) 49 | 50 | use_cuda = torch.cuda.is_available() 51 | state['device'] = torch.device("cuda" if use_cuda else "cpu") 52 | state['p'] = p 53 | state['model'] = model 54 | state['avg_chans'] = avg_chans 55 | state['window'] = window 56 | state['score'] = 1.0 57 | state['target_pos'] = target_pos 58 | state['target_sz'] = target_sz 59 | return state 60 | 61 | def SiamMask_track(state, im, temp_mem): 62 | p = state['p'] 63 | avg_chans = state['avg_chans'] 64 | window = state['window'] 65 | old_pos = state['target_pos'] 66 | old_sz = state['target_sz'] 67 | dev = state['device'] 68 | 69 | # get search area 70 | wc_x = old_sz[1] + p.context_amount * sum(old_sz) 71 | hc_x = old_sz[0] + p.context_amount * sum(old_sz) 72 | s_z = np.sqrt(wc_x * hc_x) 73 | 74 | scale_x = p.exemplar_size / s_z 75 | d_search = (p.instance_size - p.exemplar_size) / 2 76 | pad = d_search / scale_x 77 | s_x = s_z + 2 * pad 78 | crop_box = [old_pos[0] - round(s_x) / 2, old_pos[1] - round(s_x) / 2, round(s_x), round(s_x)] 79 | 80 | # extract scaled crops for search region x at previous target position 81 | x_crop = Variable(get_subwindow_tracking(im, old_pos, p.instance_size, round(s_x), avg_chans).unsqueeze(0)) 82 | 83 | # track 84 | target_pos, target_sz, score, best_id = temp_mem.batch_evaluate(x_crop.to(dev), old_pos, 85 | old_sz, window, 86 | scale_x, p) 87 | 88 | # mask refinement 89 | best_pscore_id_mask = np.unravel_index(best_id, (5, p.score_size, p.score_size)) 90 | delta_x, delta_y = best_pscore_id_mask[2], best_pscore_id_mask[1] 91 | mask = state['model'].track_refine((delta_y, delta_x)).to(dev).sigmoid().squeeze().view( 92 | p.out_size, p.out_size).cpu().data.numpy() 93 | 94 | def crop_back(image, bbox, out_sz, padding=-1): 95 | a = (out_sz[0] - 1) / bbox[2] 96 | b = (out_sz[1] - 1) / bbox[3] 97 | c = -a * bbox[0] 98 | d = -b * bbox[1] 99 | mapping = np.array([[a, 0, c], 100 | [0, b, d]]).astype(np.float) 101 | crop = cv2.warpAffine(image, mapping, (out_sz[0], out_sz[1]), 102 | flags=cv2.INTER_LINEAR, 103 | borderMode=cv2.BORDER_CONSTANT, 104 | borderValue=padding) 105 | return crop 106 | 107 | s = crop_box[2] / p.instance_size 108 | sub_box = [crop_box[0] + (delta_x - p.base_size / 2) * p.total_stride * s, 109 | crop_box[1] + (delta_y - p.base_size / 2) * p.total_stride * s, 110 | s * p.exemplar_size, s * p.exemplar_size] 111 | s = p.out_size / sub_box[2] 112 | back_box = [-sub_box[0] * s, -sub_box[1] * s, state['im_w'] * s, state['im_h'] * s] 113 | mask_in_img = crop_back(mask, back_box, (state['im_w'], state['im_h'])) 114 | 115 | target_mask = (mask_in_img > p.seg_thr).astype(np.uint8) 116 | if cv2.__version__[-5] == '4': 117 | contours, _ = cv2.findContours(target_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) 118 | else: 119 | _, contours, _ = cv2.findContours(target_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) 120 | cnt_area = [cv2.contourArea(cnt) for cnt in contours] 121 | if len(contours) != 0 and np.max(cnt_area) > 100: 122 | contour = contours[np.argmax(cnt_area)] # use max area polygon 123 | polygon = contour.reshape(-1, 2) 124 | prbox = cv2.boxPoints(cv2.minAreaRect(polygon)) # Rotated Rectangle 125 | rbox_in_img = prbox 126 | else: # empty mask 127 | location = cxy_wh_2_rect(target_pos, target_sz) 128 | rbox_in_img = np.array([[location[0], location[1]], 129 | [location[0] + location[2], location[1]], 130 | [location[0] + location[2], location[1] + location[3]], 131 | [location[0], location[1] + location[3]]]) 132 | 133 | state['mask'] = mask_in_img 134 | state['polygon'] = rbox_in_img 135 | 136 | # clip in min and max of the bb 137 | target_pos[0] = max(0, min(state['im_w'], target_pos[0])) 138 | target_pos[1] = max(0, min(state['im_h'], target_pos[1])) 139 | target_sz[0] = max(10, min(state['im_w'], target_sz[0])) 140 | target_sz[1] = max(10, min(state['im_h'], target_sz[1])) 141 | 142 | state['target_pos'] = target_pos 143 | state['target_sz'] = target_sz 144 | state['score'] = score 145 | state['crop'] = x_crop 146 | 147 | return state 148 | -------------------------------------------------------------------------------- /trackers/SiamMask/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lukas-blecher/AutoMask/b301dd0d69c782a16f5ead1abc81a63f1be383f4/trackers/SiamMask/utils/__init__.py -------------------------------------------------------------------------------- /trackers/SiamMask/utils/anchors.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # SiamMask 3 | # Licensed under The MIT License 4 | # Written by Qiang Wang (wangqiang2015 at ia.ac.cn) 5 | # Revised for THOR by Axel Sauer (axel.sauer@tum.de) 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | import math 10 | from .bbox_helper import center2corner, corner2center 11 | 12 | def generate_anchor(cfg, score_size): 13 | anchors = Anchors(cfg) 14 | anchor = anchors.anchors 15 | x1, y1, x2, y2 = anchor[:, 0], anchor[:, 1], anchor[:, 2], anchor[:, 3] 16 | anchor = np.stack([(x1+x2)*0.5, (y1+y2)*0.5, x2-x1, y2-y1], 1) 17 | 18 | total_stride = anchors.stride 19 | anchor_num = anchor.shape[0] 20 | 21 | anchor = np.tile(anchor, score_size * score_size).reshape((-1, 4)) 22 | ori = - (score_size // 2) * total_stride 23 | xx, yy = np.meshgrid([ori + total_stride * dx for dx in range(score_size)], 24 | [ori + total_stride * dy for dy in range(score_size)]) 25 | xx, yy = np.tile(xx.flatten(), (anchor_num, 1)).flatten(), \ 26 | np.tile(yy.flatten(), (anchor_num, 1)).flatten() 27 | anchor[:, 0], anchor[:, 1] = xx.astype(np.float32), yy.astype(np.float32) 28 | return anchor 29 | 30 | class Anchors: 31 | def __init__(self, cfg): 32 | self.stride = 8 33 | self.ratios = [0.33, 0.5, 1, 2, 3] 34 | self.scales = [8] 35 | self.round_dight = 0 36 | self.image_center = 0 37 | self.size = 0 38 | 39 | self.__dict__.update(cfg) 40 | 41 | self.anchor_num = len(self.scales) * len(self.ratios) 42 | self.anchors = None # in single position (anchor_num*4) 43 | self.all_anchors = None # in all position 2*(4*anchor_num*h*w) 44 | self.generate_anchors() 45 | 46 | def generate_anchors(self): 47 | self.anchors = np.zeros((self.anchor_num, 4), dtype=np.float32) 48 | 49 | size = self.stride * self.stride 50 | count = 0 51 | for r in self.ratios: 52 | if self.round_dight > 0: 53 | ws = round(math.sqrt(size*1. / r), self.round_dight) 54 | hs = round(ws * r, self.round_dight) 55 | else: 56 | ws = int(math.sqrt(size*1. / r)) 57 | hs = int(ws * r) 58 | 59 | for s in self.scales: 60 | w = ws * s 61 | h = hs * s 62 | self.anchors[count][:] = [-w*0.5, -h*0.5, w*0.5, h*0.5][:] 63 | count += 1 64 | 65 | def generate_all_anchors(self, im_c, size): 66 | if self.image_center == im_c and self.size == size: 67 | return False 68 | self.image_center = im_c 69 | self.size = size 70 | 71 | a0x = im_c - size // 2 * self.stride 72 | ori = np.array([a0x] * 4, dtype=np.float32) 73 | zero_anchors = self.anchors + ori 74 | 75 | x1 = zero_anchors[:, 0] 76 | y1 = zero_anchors[:, 1] 77 | x2 = zero_anchors[:, 2] 78 | y2 = zero_anchors[:, 3] 79 | 80 | x1, y1, x2, y2 = map(lambda x: x.reshape(self.anchor_num, 1, 1), [x1, y1, x2, y2]) 81 | cx, cy, w, h = corner2center([x1, y1, x2, y2]) 82 | 83 | disp_x = np.arange(0, size).reshape(1, 1, -1) * self.stride 84 | disp_y = np.arange(0, size).reshape(1, -1, 1) * self.stride 85 | 86 | cx = cx + disp_x 87 | cy = cy + disp_y 88 | 89 | # broadcast 90 | zero = np.zeros((self.anchor_num, size, size), dtype=np.float32) 91 | cx, cy, w, h = map(lambda x: x + zero, [cx, cy, w, h]) 92 | x1, y1, x2, y2 = center2corner([cx, cy, w, h]) 93 | 94 | self.all_anchors = np.stack([x1, y1, x2, y2]), np.stack([cx, cy, w, h]) 95 | return True 96 | 97 | 98 | -------------------------------------------------------------------------------- /trackers/SiamMask/utils/bbox_helper.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # SiamMask 3 | # Licensed under The MIT License 4 | # Written by Qiang Wang (wangqiang2015 at ia.ac.cn) 5 | # Revised for THOR by Axel Sauer (axel.sauer@tum.de) 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | from collections import namedtuple 10 | 11 | Corner = namedtuple('Corner', 'x1 y1 x2 y2') 12 | BBox = Corner 13 | Center = namedtuple('Center', 'x y w h') 14 | 15 | 16 | def corner2center(corner): 17 | """ 18 | :param corner: Corner or np.array 4*N 19 | :return: Center or 4 np.array N 20 | """ 21 | if isinstance(corner, Corner): 22 | x1, y1, x2, y2 = corner 23 | return Center((x1 + x2) * 0.5, (y1 + y2) * 0.5, (x2 - x1), (y2 - y1)) 24 | else: 25 | x1, y1, x2, y2 = corner[0], corner[1], corner[2], corner[3] 26 | x = (x1 + x2) * 0.5 27 | y = (y1 + y2) * 0.5 28 | w = x2 - x1 29 | h = y2 - y1 30 | return x, y, w, h 31 | 32 | 33 | def center2corner(center): 34 | """ 35 | :param center: Center or np.array 4*N 36 | :return: Corner or np.array 4*N 37 | """ 38 | if isinstance(center, Center): 39 | x, y, w, h = center 40 | return Corner(x - w * 0.5, y - h * 0.5, x + w * 0.5, y + h * 0.5) 41 | else: 42 | x, y, w, h = center[0], center[1], center[2], center[3] 43 | x1 = x - w * 0.5 44 | y1 = y - h * 0.5 45 | x2 = x + w * 0.5 46 | y2 = y + h * 0.5 47 | return x1, y1, x2, y2 48 | 49 | 50 | def cxy_wh_2_rect(pos, sz): 51 | return np.array([pos[0]-sz[0]/2, pos[1]-sz[1]/2, sz[0], sz[1]]) # 0-index 52 | 53 | 54 | def get_axis_aligned_bbox(region): 55 | nv = region.size 56 | if nv == 8: 57 | cx = np.mean(region[0::2]) 58 | cy = np.mean(region[1::2]) 59 | x1 = min(region[0::2]) 60 | x2 = max(region[0::2]) 61 | y1 = min(region[1::2]) 62 | y2 = max(region[1::2]) 63 | A1 = np.linalg.norm(region[0:2] - region[2:4]) * np.linalg.norm(region[2:4] - region[4:6]) 64 | A2 = (x2 - x1) * (y2 - y1) 65 | s = np.sqrt(A1 / A2) 66 | w = s * (x2 - x1) + 1 67 | h = s * (y2 - y1) + 1 68 | else: 69 | x = region[0] 70 | y = region[1] 71 | w = region[2] 72 | h = region[3] 73 | cx = x+w/2 74 | cy = y+h/2 75 | 76 | return cx, cy, w, h 77 | 78 | 79 | -------------------------------------------------------------------------------- /trackers/SiamMask/utils/config_helper.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # SiamMask 3 | # Licensed under The MIT License 4 | # Written by Qiang Wang (wangqiang2015 at ia.ac.cn) 5 | # Revised for THOR by Axel Sauer (axel.sauer@tum.de) 6 | # -------------------------------------------------------- 7 | 8 | import json 9 | from os.path import exists 10 | 11 | 12 | def load_config(config, arch): 13 | assert exists(config), '"{}" not exists'.format(config) 14 | config = json.load(open(config)) 15 | 16 | # deal with network 17 | if 'network' not in config: 18 | print('Warning: network lost in config. This will be error in next version') 19 | 20 | config['network'] = {} 21 | 22 | if not arch: 23 | raise Exception('no arch provided') 24 | 25 | arch = config['network']['arch'] 26 | 27 | return config 28 | 29 | -------------------------------------------------------------------------------- /trackers/SiamMask/utils/load_helper.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import logging 3 | logger = logging.getLogger('global') 4 | 5 | 6 | def check_keys(model, pretrained_state_dict): 7 | ckpt_keys = set(pretrained_state_dict.keys()) 8 | model_keys = set(model.state_dict().keys()) 9 | used_pretrained_keys = model_keys & ckpt_keys 10 | unused_pretrained_keys = ckpt_keys - model_keys 11 | missing_keys = model_keys - ckpt_keys 12 | if len(missing_keys) > 0: 13 | logger.info('[Warning] missing keys: {}'.format(missing_keys)) 14 | logger.info('missing keys:{}'.format(len(missing_keys))) 15 | if len(unused_pretrained_keys) > 0: 16 | logger.info('[Warning] unused_pretrained_keys: {}'.format(unused_pretrained_keys)) 17 | logger.info('unused checkpoint keys:{}'.format(len(unused_pretrained_keys))) 18 | logger.info('used keys:{}'.format(len(used_pretrained_keys))) 19 | assert len(used_pretrained_keys) > 0, 'load NONE from pretrained checkpoint' 20 | return True 21 | 22 | 23 | def remove_prefix(state_dict, prefix): 24 | ''' Old style model is stored with all names of parameters share common prefix 'module.' ''' 25 | logger.info('remove prefix \'{}\''.format(prefix)) 26 | f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x 27 | return {f(key): value for key, value in state_dict.items()} 28 | 29 | 30 | def load_pretrain(model, pretrained_path): 31 | logger.info('load pretrained model from {}'.format(pretrained_path)) 32 | 33 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 34 | pretrained_dict = torch.load(pretrained_path, map_location=device) 35 | if "state_dict" in pretrained_dict.keys(): 36 | pretrained_dict = remove_prefix(pretrained_dict['state_dict'], 'module.') 37 | else: 38 | pretrained_dict = remove_prefix(pretrained_dict, 'module.') 39 | 40 | try: 41 | check_keys(model, pretrained_dict) 42 | except: 43 | logger.info('[Warning]: using pretrain as features. Adding "features." as prefix') 44 | new_dict = {} 45 | for k, v in pretrained_dict.items(): 46 | k = 'features.' + k 47 | new_dict[k] = v 48 | pretrained_dict = new_dict 49 | check_keys(model, pretrained_dict) 50 | model.load_state_dict(pretrained_dict, strict=False) 51 | return model 52 | -------------------------------------------------------------------------------- /trackers/SiamMask/utils/log_helper.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # SiamMask 3 | # Licensed under The MIT License 4 | # Written by Qiang Wang (wangqiang2015 at ia.ac.cn) 5 | # -------------------------------------------------------- 6 | from __future__ import division 7 | 8 | import os 9 | import logging 10 | import sys 11 | 12 | if hasattr(sys, 'frozen'): # support for py2exe 13 | _srcfile = "logging%s__init__%s" % (os.sep, __file__[-4:]) 14 | elif __file__[-4:].lower() in ['.pyc', '.pyo']: 15 | _srcfile = __file__[:-4] + '.py' 16 | else: 17 | _srcfile = __file__ 18 | _srcfile = os.path.normcase(_srcfile) 19 | 20 | 21 | logs = set() 22 | 23 | 24 | class Filter: 25 | def __init__(self, flag): 26 | self.flag = flag 27 | 28 | def filter(self, x): return self.flag 29 | 30 | 31 | class Dummy: 32 | def __init__(self, *arg, **kwargs): 33 | pass 34 | 35 | def __getattr__(self, arg): 36 | def dummy(*args, **kwargs): pass 37 | return dummy 38 | 39 | 40 | def get_format(logger, level): 41 | if 'SLURM_PROCID' in os.environ: 42 | rank = int(os.environ['SLURM_PROCID']) 43 | 44 | if level == logging.INFO: 45 | logger.addFilter(Filter(rank == 0)) 46 | else: 47 | rank = 0 48 | format_str = '[%(asctime)s-rk{}-%(filename)s#%(lineno)3d] %(message)s'.format(rank) 49 | formatter = logging.Formatter(format_str) 50 | return formatter 51 | 52 | 53 | def init_log(name, level = logging.INFO, format_func=get_format): 54 | if (name, level) in logs: return 55 | logs.add((name, level)) 56 | logger = logging.getLogger(name) 57 | logger.setLevel(level) 58 | ch = logging.StreamHandler() 59 | ch.setLevel(level) 60 | formatter = format_func(logger, level) 61 | ch.setFormatter(formatter) 62 | logger.addHandler(ch) 63 | return logger 64 | 65 | 66 | def add_file_handler(name, log_file, level = logging.INFO): 67 | logger = logging.getLogger(name) 68 | fh = logging.FileHandler(log_file) 69 | fh.setFormatter(get_format(logger, level)) 70 | logger.addHandler(fh) 71 | 72 | 73 | init_log('global') 74 | 75 | -------------------------------------------------------------------------------- /trackers/SiamMask/utils/tracker_config.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # SiamMask 3 | # Licensed under The MIT License 4 | # Written by Qiang Wang (wangqiang2015 at ia.ac.cn) 5 | # -------------------------------------------------------- 6 | from __future__ import division 7 | from .anchors import Anchors 8 | 9 | 10 | class TrackerConfig(object): 11 | # These are the default hyper-params for SiamMask 12 | penalty_k = 0.04 13 | window_influence = 0.42 14 | lr = 0.25 15 | seg_thr = 0.3 # for mask 16 | windowing = 'cosine' # to penalize large displacements [cosine/uniform] 17 | # Params from the network architecture, have to be consistent with the training 18 | exemplar_size = 127 # input z size 19 | instance_size = 255 # input x size (search region) 20 | instance_size_glob = 550 # input x size (search region) 21 | total_stride = 8 22 | out_size = 63 # for mask 23 | base_size = 8 24 | score_size = (instance_size-exemplar_size)//total_stride+1+base_size 25 | score_size_glob = (instance_size_glob-exemplar_size)//total_stride+1+base_size 26 | context_amount = 0.5 # context amount for the exemplar 27 | ratios = [0.33, 0.5, 1, 2, 3] 28 | scales = [8, ] 29 | anchor_num = len(ratios) * len(scales) 30 | round_dight = 0 31 | anchor = [] 32 | 33 | def update(self, newparam=None, anchors=None): 34 | if newparam: 35 | for key, value in newparam.items(): 36 | setattr(self, key, value) 37 | if anchors is not None: 38 | if isinstance(anchors, dict): 39 | anchors = Anchors(anchors) 40 | if isinstance(anchors, Anchors): 41 | self.total_stride = anchors.stride 42 | self.ratios = anchors.ratios 43 | self.scales = anchors.scales 44 | self.round_dight = anchors.round_dight 45 | self.renew() 46 | 47 | def renew(self): 48 | self.score_size = (self.instance_size - self.exemplar_size) // self.total_stride + 1 + self.base_size 49 | self.anchor_num = len(self.ratios) * len(self.scales) 50 | 51 | 52 | 53 | 54 | -------------------------------------------------------------------------------- /trackers/SiamMask/utils/tracking_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import cv2 4 | 5 | def to_torch(ndarray): 6 | if type(ndarray).__module__ == 'numpy': 7 | return torch.from_numpy(ndarray) 8 | elif not torch.is_tensor(ndarray): 9 | raise ValueError("Cannot convert {} to torch tensor" 10 | .format(type(ndarray))) 11 | return ndarray 12 | 13 | 14 | def im_to_torch(img): 15 | img = np.transpose(img, (2, 0, 1)) # C*H*W 16 | img = to_torch(img).float() 17 | return img 18 | 19 | 20 | 21 | def get_subwindow_tracking(im, pos, model_sz, original_sz, avg_chans, out_mode='torch'): 22 | if isinstance(pos, float): 23 | pos = [pos, pos] 24 | sz = original_sz 25 | im_sz = im.shape 26 | c = (original_sz + 1) / 2 27 | context_xmin = round(pos[0] - c) 28 | context_xmax = context_xmin + sz - 1 29 | context_ymin = round(pos[1] - c) 30 | context_ymax = context_ymin + sz - 1 31 | left_pad = int(max(0., -context_xmin)) 32 | top_pad = int(max(0., -context_ymin)) 33 | right_pad = int(max(0., context_xmax - im_sz[1] + 1)) 34 | bottom_pad = int(max(0., context_ymax - im_sz[0] + 1)) 35 | 36 | context_xmin = context_xmin + left_pad 37 | context_xmax = context_xmax + left_pad 38 | context_ymin = context_ymin + top_pad 39 | context_ymax = context_ymax + top_pad 40 | 41 | r, c, k = im_sz 42 | if any([top_pad, bottom_pad, left_pad, right_pad]): 43 | te_im = np.zeros((r + top_pad + bottom_pad, c + left_pad + right_pad, k), np.uint8) 44 | te_im[top_pad:top_pad + r, left_pad:left_pad + c, :] = im 45 | if top_pad: 46 | te_im[0:top_pad, left_pad:left_pad + c, :] = avg_chans 47 | if bottom_pad: 48 | te_im[r + top_pad:, left_pad:left_pad + c, :] = avg_chans 49 | if left_pad: 50 | te_im[:, 0:left_pad, :] = avg_chans 51 | if right_pad: 52 | te_im[:, c + left_pad:, :] = avg_chans 53 | im_patch_original = te_im[int(context_ymin):int(context_ymax + 1), int(context_xmin):int(context_xmax + 1), :] 54 | else: 55 | im_patch_original = im[int(context_ymin):int(context_ymax + 1), int(context_xmin):int(context_xmax + 1), :] 56 | 57 | if not np.array_equal(model_sz, original_sz): 58 | im_patch = cv2.resize(im_patch_original, (model_sz, model_sz)) 59 | else: 60 | im_patch = im_patch_original 61 | 62 | return im_to_torch(im_patch) 63 | -------------------------------------------------------------------------------- /trackers/THOR_modules/modules.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # THOR 3 | # Licensed under The MIT License 4 | # Written by Axel Sauer (axel.sauer@tum.de) 5 | # -------------------------------------------------------- 6 | 7 | import os 8 | from math import ceil, floor 9 | from collections import deque 10 | import abc 11 | 12 | import numpy as np 13 | import torch 14 | import torch.nn.functional as F 15 | 16 | from .utils import to_numpy, print_color 17 | 18 | TEMPLATE_SIZE = 127 # for visualization 19 | 20 | class TemplateModule(): 21 | def __init__(self, K, verbose, viz): 22 | self.verbose = verbose 23 | self.viz = viz 24 | self.is_full = False 25 | 26 | self._K = K 27 | self._templates_stack = None 28 | self._base_sim = 0 29 | self._gram_matrix = None 30 | 31 | if viz: 32 | # canvas to visualize the templates 33 | self._canvas_break = 5 # each row shows 5 templates max 34 | rows, cols = ceil(K/self._canvas_break), min(K, self._canvas_break) 35 | self.canvas = np.zeros((TEMPLATE_SIZE*rows, TEMPLATE_SIZE*cols, 3), dtype=np.uint8) 36 | 37 | def __len__(self): 38 | return self._K 39 | 40 | def pairwise_similarities(self, T_n, to_cpu=True): 41 | """ 42 | calculate similarity of given template to all templates in memory 43 | """ 44 | assert isinstance(T_n, torch.Tensor) 45 | sims = F.conv2d(T_n, self._templates_stack) 46 | if to_cpu: 47 | return np.squeeze(to_numpy(sims.data)) 48 | else: 49 | return sims 50 | 51 | def _calculate_gram_matrix(self): 52 | dists = [self.pairwise_similarities(T, False) for T in self.templates['compare']] 53 | return [np.squeeze(to_numpy(d.data)) for d in dists] 54 | 55 | def _append_temp(self, temp): 56 | """ 57 | append the given template to current memory 58 | """ 59 | for k in temp.keys(): 60 | self.templates[k].append(temp[k]) 61 | 62 | def _set_temp(self, temp, idx): 63 | """ 64 | switch out the template at idx 65 | """ 66 | for k in temp.keys(): 67 | self.templates[k][idx] = temp[k] 68 | self._templates_stack[idx, :, :, :] = temp['compare'] 69 | 70 | def _update_canvas(self, temp, idx): 71 | """ 72 | insert the template at given idx (transformed to row & col of canvas) 73 | """ 74 | s_z = TEMPLATE_SIZE 75 | row, col = floor(idx/self._canvas_break), idx%self._canvas_break 76 | self.canvas[row*s_z:(row+1)*s_z, 77 | s_z*col:s_z*(col + 1), :] = temp['im'] 78 | 79 | @abc.abstractmethod 80 | def update(self, temp): 81 | """ 82 | check if template should be taken into memory 83 | """ 84 | raise NotImplementedError("Must be implemented in subclass.") 85 | 86 | @abc.abstractmethod 87 | def fill(self, temp): 88 | """ 89 | fill all slots in the memory with the given template 90 | """ 91 | raise NotImplementedError("Must be implemented in subclass.") 92 | 93 | class ST_Module(TemplateModule): 94 | def __init__(self, K, template_keys, calc_div, verbose, viz): 95 | assert isinstance(K, int) 96 | if not K: return None 97 | super(ST_Module, self).__init__(K=K, verbose=verbose, viz=viz) 98 | self.templates = {key: deque(maxlen=K) for key in template_keys} 99 | self.calc_div = calc_div 100 | 101 | def _rebuild_canvas(self): 102 | """ 103 | rebuild the full canvas with the current templates 104 | """ 105 | self.canvas = np.concatenate(list(self.templates['im']), axis=1).astype(np.uint8) 106 | 107 | @staticmethod 108 | def normed_div_measure(t): 109 | """ calculate the normed diversity measure of t, the lower the more diverse """ 110 | assert t.shape[0]==t.shape[1] 111 | dim = t.shape[0] - 1 112 | triu_no = int(dim/2*(dim + 1)) 113 | return np.sum(np.triu(t, 1)) / (t[0,0] * triu_no) 114 | 115 | def _update_gram_matrix(self, temp): 116 | # calculate the current distance 117 | curr_sims = self.pairwise_similarities(temp['compare']) 118 | curr_sims = np.expand_dims(curr_sims, axis=1) 119 | 120 | # update gram matrix 121 | all_dists_new = np.block([[self._gram_matrix, curr_sims], [0, curr_sims.T]]) 122 | 123 | # delete the row & col with idx 0 - the oldest template 124 | self._gram_matrix = all_dists_new[1:,1:] 125 | 126 | 127 | def fill(self, temp): 128 | for _ in range(self._K): 129 | self._append_temp(temp) 130 | 131 | def update(self, temp): 132 | """ 133 | append to the current memory and rebuild canvas 134 | return div_scale (diversity of the current memory) 135 | """ 136 | self._append_temp(temp) 137 | if self.viz: self._rebuild_canvas() 138 | 139 | # calculate diversity measure for the dynamic lower bound 140 | if self.calc_div: 141 | self._templates_stack = torch.cat(list(self.templates['compare'])) 142 | 143 | # calulate base dist_mat 144 | if not self.is_full: 145 | self._gram_matrix = np.squeeze(self._calculate_gram_matrix()) 146 | self.is_full = True 147 | 148 | # update distance matrix and calculate the div scale 149 | self._update_gram_matrix(temp) 150 | 151 | return self.normed_div_measure(t=self._gram_matrix) 152 | else: 153 | return 1.0 154 | 155 | class LT_Module(TemplateModule): 156 | def __init__(self, K, template_keys, lb, lb_type, verbose, viz): 157 | assert isinstance(K, int) 158 | super(LT_Module, self).__init__(K=K, verbose=verbose, viz=viz) 159 | 160 | self._K = K 161 | self._lb = lb 162 | self._lb_type = lb_type 163 | self._filled_idx = 0 164 | self.templates = {key: [] for key in template_keys} 165 | # self.save_det(np.array([np.nan])) 166 | 167 | def _throw_away_or_keep(self, curr_sims, self_sim, div_scale): 168 | """ 169 | determine if we keep the template or not 170 | if the template is rejected: return -1 (not better) or -2 (rejected by lower bound) 171 | if we keep the template: return idx where to switch 172 | """ 173 | base_sim = self._base_sim 174 | curr_sims = np.expand_dims(curr_sims, axis=1) 175 | 176 | # normalize the gram_matrix, otherwise determinants are huge 177 | gram_matrix_norm = self._gram_matrix/base_sim 178 | curr_sims_norm = curr_sims/base_sim 179 | 180 | # check if distance to base template is below lower bound 181 | if self._lb_type=='static': 182 | reject = (curr_sims[0] < self._lb*base_sim) 183 | 184 | elif self._lb_type=='dynamic': 185 | lb = self._lb - (1 - div_scale) 186 | lb = np.clip(lb, 0.0, 1.0) 187 | reject = (curr_sims[0] < lb*base_sim) 188 | 189 | elif self._lb_type=='ensemble': 190 | reject = not all(curr_sims_norm > self._lb) 191 | 192 | else: 193 | raise TypeError(f"lower boundary type {self._lb_type} not known.") 194 | 195 | if reject: return -2 196 | 197 | # fill the memory with adjacent frames if they are not 198 | # populated with something different than the base frame yet 199 | if self._filled_idx < (self._K-1): 200 | self._filled_idx += 1 201 | throwaway_idx = self._filled_idx 202 | 203 | # determine if and in which spot the template increases the current gram determinant 204 | else: 205 | curr_det = np.linalg.det(gram_matrix_norm) 206 | 207 | # start at 1 so we never throwaway the base template 208 | dets = np.zeros((self._K - 1)) 209 | for i in range(self._K - 1): 210 | mat = np.copy(gram_matrix_norm) 211 | mat[i + 1, :] = curr_sims_norm.T 212 | mat[:, i + 1] = curr_sims_norm.T 213 | mat[i + 1, i + 1] = self_sim/base_sim 214 | dets[i] = np.linalg.det(mat) 215 | 216 | # check if any of the new combinations is better than the prev. gram_matrix 217 | max_idx = np.argmax(dets) 218 | if curr_det > dets[max_idx]: 219 | throwaway_idx = -1 220 | else: 221 | throwaway_idx = max_idx + 1 222 | 223 | assert throwaway_idx != 0 224 | return throwaway_idx if throwaway_idx != self._K else -1 225 | 226 | @staticmethod 227 | def save_det(d, p): 228 | if os.path.exists(p): 229 | old_det = np.load(p) 230 | else: 231 | old_det = np.array([]) 232 | np.save(p, np.concatenate([old_det, d.reshape(-1)])) 233 | 234 | def _update_gram_matrix(self, curr_sims, self_sim, idx): 235 | """ 236 | update the current gram matrix 237 | """ 238 | curr_sims = np.expand_dims(curr_sims, axis=1) 239 | # add the self similarity at throwaway_idx spot 240 | curr_sims[idx] = self_sim 241 | 242 | self._gram_matrix[idx, :] = curr_sims.T 243 | self._gram_matrix[:, idx] = curr_sims.T 244 | 245 | # gram_matrix_norm = self._gram_matrix/self._base_sim 246 | # curr_det = np.linalg.det(gram_matrix_norm) 247 | # self.save_det(curr_det, 'determinants_dyn.npy') 248 | 249 | def fill(self, temp): 250 | for i in range(self._K): 251 | self._append_temp(temp) 252 | if self.viz: self._update_canvas(temp=temp, idx=i) 253 | 254 | def update(self, temp, div_scale): 255 | """ 256 | decide if the templates is taken into the lt module 257 | """ 258 | if not self.is_full: 259 | self._templates_stack = torch.cat(self.templates['compare']) 260 | self._gram_matrix = np.squeeze(self._calculate_gram_matrix()) 261 | self._base_sim = self._gram_matrix[0, 0] 262 | self.is_full = True 263 | 264 | # calculate the "throwaway_idx", the spot that the new template will take 265 | curr_sims = self.pairwise_similarities(temp['compare']) 266 | self_sim = F.conv2d(temp['compare'], temp['compare']).squeeze().item() 267 | throwaway_idx = self._throw_away_or_keep(curr_sims=curr_sims, self_sim=self_sim, 268 | div_scale=div_scale) 269 | 270 | # if the idx is -2 or -1, the template is rejected, otherwise we update 271 | if throwaway_idx == -2: 272 | pass 273 | elif throwaway_idx == -1: 274 | pass 275 | else: 276 | self._set_temp(temp=temp, idx=throwaway_idx) 277 | self._update_gram_matrix(curr_sims=curr_sims, self_sim=self_sim, idx=throwaway_idx) 278 | if self.viz: 279 | self._update_canvas(temp=temp, idx=throwaway_idx) 280 | 281 | class Dummy_Module(): 282 | def __init__(self, template_keys): 283 | self.templates = {key: [] for key in template_keys} 284 | 285 | def __len__(self): 286 | return 0 287 | 288 | def fill(self, temp): 289 | return False 290 | 291 | def update(self, temp): 292 | return 1.0 293 | -------------------------------------------------------------------------------- /trackers/THOR_modules/utils.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # THOR 3 | # Licensed under The MIT License 4 | # Written by Axel Sauer (axel.sauer@tum.de) 5 | # -------------------------------------------------------- 6 | 7 | import cv2 8 | import torch 9 | import numpy as np 10 | 11 | # numpy - torch conversions 12 | 13 | 14 | def to_torch(ndarray): 15 | if type(ndarray).__module__ == 'numpy': 16 | return torch.from_numpy(ndarray) 17 | elif not torch.is_tensor(ndarray): 18 | raise ValueError("Cannot convert {} to torch tensor" 19 | .format(type(ndarray))) 20 | return ndarray 21 | 22 | 23 | def to_numpy(tensor): 24 | if torch.is_tensor(tensor): 25 | return tensor.cpu().numpy() 26 | elif type(tensor).__module__ != 'numpy': 27 | raise ValueError("Cannot convert {} to numpy array" 28 | .format(type(tensor))) 29 | return tensor 30 | 31 | 32 | def torch_to_img(img): 33 | img = to_numpy(torch.squeeze(img, 0)) 34 | img = np.transpose(img, (1, 2, 0)) # H*W*C 35 | return img 36 | 37 | 38 | def im_to_torch(img): 39 | img = np.transpose(img, (2, 0, 1)) # C*H*W 40 | img = to_torch(img).float() 41 | return img 42 | 43 | # tracker specific functions 44 | 45 | 46 | def get_subwindow_tracking_SiamRPN(im, pos, model_sz, original_sz, avg_chans): 47 | if isinstance(pos, float): 48 | pos = [pos, pos] 49 | sz = original_sz 50 | im_sz = im.shape 51 | c = (original_sz + 1) / 2 52 | context_xmin = round(pos[0] - c) 53 | context_xmax = context_xmin + sz - 1 54 | context_ymin = round(pos[1] - c) 55 | context_ymax = context_ymin + sz - 1 56 | left_pad = int(max(0., -context_xmin)) 57 | top_pad = int(max(0., -context_ymin)) 58 | right_pad = int(max(0., context_xmax - im_sz[1] + 1)) 59 | bottom_pad = int(max(0., context_ymax - im_sz[0] + 1)) 60 | 61 | context_xmin = context_xmin + left_pad 62 | context_xmax = context_xmax + left_pad 63 | context_ymin = context_ymin + top_pad 64 | context_ymax = context_ymax + top_pad 65 | 66 | r, c, k = im.shape 67 | if any([top_pad, bottom_pad, left_pad, right_pad]): 68 | te_im = np.zeros((r + top_pad + bottom_pad, c + left_pad + right_pad, k), np.uint8) 69 | te_im[top_pad:top_pad + r, left_pad:left_pad + c, :] = im 70 | if top_pad: 71 | te_im[0:top_pad, left_pad:left_pad + c, :] = avg_chans 72 | if bottom_pad: 73 | te_im[r + top_pad:, left_pad:left_pad + c, :] = avg_chans 74 | if left_pad: 75 | te_im[:, 0:left_pad, :] = avg_chans 76 | if right_pad: 77 | te_im[:, c + left_pad:, :] = avg_chans 78 | im_patch_original = te_im[int(context_ymin):int(context_ymax + 1), int(context_xmin):int(context_xmax + 1), :] 79 | else: 80 | im_patch_original = im[int(context_ymin):int(context_ymax + 1), int(context_xmin):int(context_xmax + 1), :] 81 | 82 | if not np.array_equal(model_sz, original_sz): 83 | im_patch = cv2.resize(im_patch_original, (model_sz, model_sz)) 84 | else: 85 | im_patch = im_patch_original 86 | 87 | return im_to_torch(im_patch) 88 | 89 | 90 | # Misc 91 | LIMIT = 99999999 92 | 93 | 94 | def xywh_to_xyxy(bboxes, clipMin=-LIMIT, clipWidth=LIMIT, clipHeight=LIMIT, 95 | round=False): 96 | addedAxis = False 97 | if isinstance(bboxes, list): 98 | bboxes = np.array(bboxes).astype(np.float32) 99 | if len(bboxes.shape) == 1: 100 | addedAxis = True 101 | bboxes = bboxes[:, np.newaxis] 102 | bboxesOut = np.zeros(bboxes.shape) 103 | xMid = bboxes[0, ...] 104 | yMid = bboxes[1, ...] 105 | width = bboxes[2, ...] 106 | height = bboxes[3, ...] 107 | bboxesOut[0, ...] = xMid - width / 2.0 108 | bboxesOut[1, ...] = yMid - height / 2.0 109 | bboxesOut[2, ...] = xMid + width / 2.0 110 | bboxesOut[3, ...] = yMid + height / 2.0 111 | if clipMin != -LIMIT or clipWidth != LIMIT or clipHeight != LIMIT: 112 | bboxesOut = clip_bbox(bboxesOut, clipMin, clipWidth, clipHeight) 113 | if bboxesOut.shape[0] > 4: 114 | bboxesOut[4:, ...] = bboxes[4:, ...] 115 | if addedAxis: 116 | bboxesOut = bboxesOut[:, 0] 117 | if round: 118 | bboxesOut = np.round(bboxesOut).astype(int) 119 | return bboxesOut 120 | 121 | 122 | def print_color(str_, color="Fore.RED"): 123 | print(color + str_) 124 | 125 | 126 | def IOU_numpy(rect1, rect2): 127 | x1s = np.fmax(rect1[0], rect2[0]) 128 | x2s = np.fmin(rect1[2], rect2[2]) 129 | y1s = np.fmax(rect1[1], rect2[1]) 130 | y2s = np.fmin(rect1[3], rect2[3]) 131 | ws = np.fmax(x2s - x1s, 0) 132 | hs = np.fmax(y2s - y1s, 0) 133 | intersection = ws * hs 134 | rects1Area = (rect1[2] - rect1[0]) * (rect1[3] - rect1[1]) 135 | rect2Area = (rect2[2] - rect2[0]) * (rect2[3] - rect2[1]) 136 | union = np.fmax(rects1Area + rect2Area - intersection, .00001) 137 | return intersection * 1.0 / union 138 | -------------------------------------------------------------------------------- /trackers/THOR_modules/wrapper.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # THOR 3 | # Licensed under The MIT License 4 | # Written by Axel Sauer (axel.sauer@tum.de) 5 | # -------------------------------------------------------- 6 | 7 | import abc 8 | from types import SimpleNamespace 9 | 10 | import numpy as np 11 | import torch 12 | import torch.nn.functional as F 13 | import cv2 14 | from scipy.signal import tukey 15 | 16 | from .utils import * 17 | from .modules import ST_Module, LT_Module, Dummy_Module 18 | 19 | MEDIATE_SIZE = 255 20 | 21 | class THOR_Wrapper(): 22 | def __init__(self, cfg, net): 23 | use_cuda = torch.cuda.is_available() 24 | self.device = torch.device("cuda" if use_cuda else "cpu") 25 | self._cfg = SimpleNamespace(**cfg) 26 | 27 | self._mem_len_total = self._cfg.K_st + self._cfg.K_lt 28 | assert self._cfg.K_lt > 0 29 | 30 | self.do_full_init = True 31 | self._net = net 32 | self._curr_type = 'lt' 33 | self.score_viz = None 34 | self.template_keys = ['im', 'raw', 'kernel', 'compare'] 35 | 36 | def setup(self, im, pos, sz): 37 | """ 38 | initialize the short-term and long-term module 39 | """ 40 | self.avg_chans = np.mean(im, axis=(0, 1)) 41 | self._frame_no = 0 42 | 43 | # make the template 44 | crop = self._get_crop(im, pos, sz) 45 | temp = self._make_template(crop) 46 | 47 | # initialize the short term module 48 | if self._cfg.K_st: 49 | self.st_module = ST_Module(K=self._cfg.K_st, template_keys=self.template_keys, 50 | calc_div=(self._cfg.lb_type=='dynamic'), 51 | verbose=self._cfg.verbose, viz=self._cfg.viz) 52 | else: 53 | self.st_module = Dummy_Module(self.template_keys) 54 | self.st_module.fill(temp) 55 | 56 | # initialize the long term module 57 | if self.do_full_init or self._cfg.vanilla: 58 | self.lt_module = LT_Module(K=self._cfg.K_lt, template_keys=self.template_keys, 59 | lb=self._cfg.lb, lb_type=self._cfg.lb_type, 60 | verbose=self._cfg.verbose, viz=self._cfg.viz) 61 | self.lt_module.fill(temp) 62 | self.do_full_init = False 63 | else: 64 | # reinitialize long term only at the beginning of the episode 65 | self.lt_module.update(temp, div_scale=0) 66 | 67 | def update(self, im, curr_crop, pos, sz): 68 | """ 69 | update the short-term and long-term module and 70 | update the shown templates and activations (score_viz) 71 | """ 72 | self._frame_no += 1 73 | 74 | # only update according to dilation steps 75 | if not self._frame_no%self._cfg.dilation: 76 | crop = self._get_crop(im, pos, sz) 77 | temp = self.crop_to_mem(crop) 78 | 79 | # reset st if it drifted 80 | if self._cfg.K_st and self._curr_type=='lt': 81 | self.st_module.fill(temp) 82 | 83 | if self._cfg.viz: 84 | self._show_modulate(torch_to_img(curr_crop), self.score_viz) 85 | self._show_templates('st') 86 | self._show_templates('lt') 87 | 88 | def crop_to_mem(self, crop): 89 | """ 90 | make the template and insert into modules 91 | """ 92 | temp = self._make_template(crop) 93 | 94 | # temp to st and lt module 95 | div_scale = self.st_module.update(temp) 96 | if self._cfg.K_lt > 1: 97 | self.lt_module.update(temp, div_scale=div_scale) 98 | 99 | return temp 100 | 101 | def _get_best_temp(self, pos, sz, score): 102 | """ 103 | determine the best template and return the prediction and the 104 | score of the best long-term template 105 | """ 106 | # get the best score in st and lt memory 107 | score_st, score_lt = np.split(score, [self._cfg.K_st]) 108 | best_st = [] if not len(score_st) else np.argmax(score_st) 109 | best_lt = np.argmax(score_lt) + self._cfg.K_st 110 | 111 | # calculate iou and switch to lt if iou too low 112 | iou = self.get_IoU(pos.T[best_st], sz.T[best_st], pos.T[best_lt], sz.T[best_lt]) 113 | self._curr_type = 'lt' if iou < self._cfg.iou_tresh else 'st' 114 | 115 | return (best_lt if self._curr_type=='lt' else best_st), score[best_lt] 116 | 117 | def _show_templates(self, mode='lt'): 118 | if mode=='st' and not self._cfg.K_st: return 119 | mem = self.st_module if mode=='st' else self.lt_module 120 | y_plot = 50 if mode=='st' else 300 121 | 122 | temp_canvas = mem.canvas.copy() 123 | cv2.imshow(f"Templates {mode}", temp_canvas) 124 | cv2.moveWindow(f"Templates {mode}", 1200, y_plot) 125 | 126 | @staticmethod 127 | def get_IoU(pos_1, sz_1, pos_2, sz_2): 128 | if not len(pos_1): return 0.0 # st memory is empty 129 | if not len(pos_2): return 1.0 # lt memory is empy 130 | return IOU_numpy(xywh_to_xyxy(np.append(pos_1, sz_1)), \ 131 | xywh_to_xyxy(np.append(pos_2, sz_2))) 132 | 133 | @staticmethod 134 | def modulate(score, mem_len, out_sz): 135 | """ 136 | modulate the prediction of each template with a mean activation map of all templates 137 | """ 138 | score_per_temp = int(np.prod(score.shape) / (mem_len * np.prod(out_sz))) 139 | score_im = score.reshape(mem_len, score_per_temp, *out_sz) 140 | score_mean = np.mean(score_im, axis=1) 141 | 142 | #modulation according to score: 143 | weights = np.max(score_mean, axis=(1, 2)) 144 | weights = weights.reshape(len(weights), 1, 1) 145 | score_mean *= weights 146 | # modulate the mean with the weights 147 | score_mean_all = np.mean(score_mean, axis=0).reshape(1, *out_sz) 148 | score_mean_norm = score_mean_all/np.max(score_mean_all) 149 | 150 | # modulate: multiply with the mean 151 | mean_tiled = np.tile(score_mean_norm.reshape(1, -1), score_per_temp) 152 | score = score*mean_tiled 153 | return score, score_mean_norm 154 | 155 | @staticmethod 156 | def _show_modulate(im, score_viz): 157 | """ 158 | show the current activations on top of the current crop 159 | """ 160 | if score_viz is None: return # modulation is not active 161 | 162 | im = cv2.resize(im, (MEDIATE_SIZE, MEDIATE_SIZE)).astype(np.uint8) 163 | canvas = np.zeros([im.shape[0], im.shape[1], 3], dtype=np.uint8) 164 | 165 | # calculate the color map 166 | score_im_base = cv2.resize(score_viz[0], im.shape[:2]) 167 | score_im_base = (255*score_im_base).astype(np.uint8) 168 | im_color = cv2.applyColorMap(score_im_base, cv2.COLORMAP_JET) 169 | 170 | # show the image 171 | overlayed_im = cv2.addWeighted(im, 0.8, im_color, 0.7, 0) 172 | canvas[:, :im.shape[1], :] = overlayed_im 173 | cv2.imshow('modulated', canvas) 174 | cv2.moveWindow('modulated', 1200, 800) 175 | 176 | @abc.abstractmethod 177 | def custom_forward(self, x): 178 | """ 179 | implements the forward pass through the network of the tracker 180 | with an added batch dimension [tracker specific] 181 | """ 182 | raise NotImplementedError("Must be implemented in subclass.") 183 | 184 | @abc.abstractmethod 185 | def _get_crop(self, im, pos, sz): 186 | """ 187 | get the crop from the search window [tracker specific] 188 | """ 189 | raise NotImplementedError("Must be implemented in subclass.") 190 | 191 | @abc.abstractmethod 192 | def _make_template(self, crop): 193 | """ 194 | given a crop, make a template [tracker specific] 195 | """ 196 | raise NotImplementedError("Must be implemented in subclass.") 197 | 198 | @abc.abstractmethod 199 | def batch_evaluate(self, crop): 200 | """ 201 | take evalue method from original tracker and add batch processing for all 202 | templates in memory and add modulating [tracker specific] 203 | """ 204 | raise NotImplementedError("Must be implemented in subclass.") 205 | 206 | class THOR_SiamMask(THOR_Wrapper): 207 | def __init__(self, cfg, net): 208 | super(THOR_SiamMask, self).__init__(cfg, net) 209 | self.template_sz = 127 210 | self.kernel_sz = 7 211 | 212 | def _get_crop(self, im, pos, sz): 213 | wc_z = sz[0] + self._cfg.context_temp * sum(sz) 214 | hc_z = sz[1] + self._cfg.context_temp * sum(sz) 215 | context_size = round(np.sqrt(wc_z * hc_z)) 216 | 217 | crop = get_subwindow_tracking_SiamRPN(im=im, pos=pos, model_sz=self.template_sz, 218 | original_sz=context_size, 219 | avg_chans=self.avg_chans) 220 | return crop.unsqueeze(0) 221 | 222 | def _make_template(self, crop): 223 | temp = {} 224 | temp['raw'] = crop.to(self.device) 225 | temp['im'] = torch_to_img(crop) 226 | temp['kernel'] = self._net.template(temp['raw']) 227 | 228 | # add the tukey window to the temp for comparison 229 | alpha = self._cfg.tukey_alpha 230 | win = np.outer(tukey(self.kernel_sz, alpha), tukey(self.kernel_sz, alpha)) 231 | temp['compare'] = temp['kernel'] * torch.Tensor(win).to(self.device) 232 | return temp 233 | 234 | def custom_forward(self, x): 235 | self._net.zf = torch.cat(list(self.st_module.templates['kernel']) + \ 236 | list(self.lt_module.templates['kernel'])) 237 | pred_cls, pred_loc, _ = self._net.track_mask(x) 238 | return pred_loc, pred_cls 239 | 240 | def batch_evaluate(self, crop, pos, size, window, scale_x, p): 241 | """ 242 | adapted from SiamRPNs tracker_evaluate 243 | """ 244 | delta, score = self.custom_forward(crop) 245 | 246 | out_sz = score.shape[-2:] 247 | batch_sz = self._mem_len_total 248 | 249 | delta = delta.contiguous().view(batch_sz, 4, -1).data.cpu().numpy() 250 | score = F.softmax(score.contiguous().view(batch_sz, 2, -1), dim=1).data[:, 1, :].cpu().numpy() 251 | 252 | # delta regression 253 | anc = np.tile(p.anchor, (batch_sz, 1, 1)) 254 | delta[:, 0, :] = delta[:, 0, :] * anc[:, :, 2] + anc[:, :, 0] 255 | delta[:, 1, :] = delta[:, 1, :] * anc[:, :, 3] + anc[:, :, 1] 256 | delta[:, 2, :] = np.exp(delta[:, 2, :]) * anc[:, :, 2] 257 | delta[:, 3, :] = np.exp(delta[:, 3, :]) *anc[:, :, 3] 258 | 259 | # penalizing 260 | def change(r): 261 | return np.maximum(r, 1./r) 262 | 263 | def sz(w, h): 264 | pad = (w + h) * 0.5 265 | sz2 = (w + pad) * (h + pad) 266 | return np.sqrt(sz2) 267 | 268 | def sz_wh(wh): 269 | pad = (wh[0] + wh[1]) * 0.5 270 | sz2 = (wh[0] + pad) * (wh[1] + pad) 271 | return np.sqrt(sz2) 272 | 273 | # scale penalty 274 | target_sz_in_crop = size*scale_x 275 | s_c = change(sz(delta[:, 2, :], delta[:, 3, :]) / (sz_wh(target_sz_in_crop))) 276 | # ratio penalty 277 | r_c = change((size[0] / size[1]) / (delta[:, 2, :] / delta[:, 3, :])) 278 | 279 | penalty = np.exp(-(r_c * s_c - 1.) * p.penalty_k) 280 | pscore = penalty * score 281 | pscore = pscore * (1 - p.window_influence) + window * p.window_influence 282 | 283 | # mediating 284 | if self._cfg.modulate: 285 | pscore, self.score_viz = self.modulate(pscore, self._mem_len_total, out_sz) 286 | 287 | # target regression 288 | best_pscore_id = np.argmax(pscore, axis=1) 289 | # arange is needed for correct indexing 290 | target = (delta[np.arange(batch_sz), :, best_pscore_id] / scale_x) 291 | lr = penalty[np.arange(batch_sz), best_pscore_id] *\ 292 | score[np.arange(batch_sz), best_pscore_id] * p.lr 293 | target, lr = target.astype(np.float64), lr.astype(np.float64) 294 | 295 | res_x = target[:, 0] + pos[0] 296 | res_y = target[:, 1] + pos[1] 297 | res_w = size[0] * (1 - lr) + target[:, 2] * lr 298 | res_h = size[1] * (1 - lr) + target[:, 3] * lr 299 | 300 | target_pos = np.array([res_x, res_y]) 301 | target_sz = np.array([res_w, res_h]) 302 | best_scores = pscore[np.arange(batch_sz), best_pscore_id] 303 | 304 | # determine the currently best template 305 | best_temp, lt_score = self._get_best_temp(target_pos, target_sz, best_scores) 306 | self._net.best_temp = best_temp 307 | 308 | return np.squeeze(target_pos[:, best_temp]), np.squeeze(target_sz[:, best_temp]), \ 309 | lt_score, best_pscore_id[best_temp] 310 | -------------------------------------------------------------------------------- /trackers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lukas-blecher/AutoMask/b301dd0d69c782a16f5ead1abc81a63f1be383f4/trackers/__init__.py -------------------------------------------------------------------------------- /trackers/tracker.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # THOR 3 | # Licensed under The MIT License 4 | # Written by Axel Sauer (axel.sauer@tum.de) 5 | # Modified by Lukas Blecher 6 | # -------------------------------------------------------- 7 | 8 | from os.path import dirname, abspath, join 9 | import torch 10 | from trackers.THOR_modules.wrapper import THOR_SiamMask 11 | 12 | # SiamMask Imports 13 | from trackers.SiamMask.net import SiamMaskCustom 14 | from trackers.SiamMask.siammask import SiamMask_init, SiamMask_track 15 | from trackers.SiamMask.utils.load_helper import load_pretrain 16 | 17 | cfg = {'tracker': {'window_influence': 0.42, 18 | 'instance_size': 255, 19 | 'base_size': 8, 20 | 'out_size': 127, 21 | 'seg_thr': 0.3, 22 | 'penalty_k': 0.04, 23 | 'lr': 0.25}, 24 | 'anchors': {'stride': 8, 25 | 'ratios': [0.33, 0.5, 1, 2, 3], 26 | 'scales': [8], 27 | 'round_dight': 0}, 28 | 'THOR': {'K_st': 6, 29 | 'K_lt': 3, 30 | 'iou_tresh': 0.742568, 31 | 'lb': 0.27996, 32 | 'tukey_alpha': 0.697998, 33 | 'lb_type': 'ensemble', 34 | 'modulate': True, 35 | 'dilation': 10, 36 | 'context_temp': 0.5, 37 | 'viz': False, 38 | 'verbose': False, 39 | 'vanilla': False}} 40 | 41 | 42 | class Tracker(): 43 | def __init__(self): 44 | use_cuda = torch.cuda.is_available() 45 | self.device = torch.device("cuda" if use_cuda else "cpu") 46 | self.mask = False 47 | self.temp_mem = None 48 | 49 | def init_func(self, im, pos, sz): 50 | raise NotImplementedError 51 | 52 | def track_func(self, state, im): 53 | raise NotImplementedError 54 | 55 | def setup(self, im, target_pos, target_sz): 56 | state = self.init_func(im, target_pos, target_sz) 57 | self.temp_mem.setup(im, target_pos, target_sz) 58 | return state 59 | 60 | def track(self, im, state): 61 | state = self.track_func(state, im) 62 | self.temp_mem.update(im, state['crop'], state['target_pos'], state['target_sz']) 63 | return state 64 | 65 | 66 | class SiamMask_Tracker(Tracker): 67 | def __init__(self, cfg, proj_path=dirname(abspath(__file__))): 68 | super(SiamMask_Tracker, self).__init__() 69 | self.cfg = cfg 70 | self.mask = True 71 | 72 | # setting up the model 73 | model = SiamMaskCustom(anchors=cfg['anchors']) 74 | model = load_pretrain(model, join(proj_path, 'trackers/SiamMask/model.pth')) 75 | self.model = model.eval().to(self.device) 76 | 77 | # set up template memory 78 | self.temp_mem = THOR_SiamMask(cfg=cfg['THOR'], net=self.model) 79 | 80 | def init_func(self, im, pos, sz): 81 | return SiamMask_init(im, pos, sz, self.model, self.cfg['tracker']) 82 | 83 | def track_func(self, state, im): 84 | return SiamMask_track(state, im, self.temp_mem) 85 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lukas-blecher/AutoMask/b301dd0d69c782a16f5ead1abc81a63f1be383f4/utils/__init__.py -------------------------------------------------------------------------------- /utils/bbox_helper.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # SiamMask 3 | # Licensed under The MIT License 4 | # Written by Qiang Wang (wangqiang2015 at ia.ac.cn) 5 | # -------------------------------------------------------- 6 | import numpy as np 7 | from collections import namedtuple 8 | 9 | Corner = namedtuple('Corner', 'x1 y1 x2 y2') 10 | BBox = Corner 11 | Center = namedtuple('Center', 'x y w h') 12 | 13 | 14 | def corner2center(corner): 15 | """ 16 | :param corner: Corner or np.array 4*N 17 | :return: Center or 4 np.array N 18 | """ 19 | if isinstance(corner, Corner): 20 | x1, y1, x2, y2 = corner 21 | return Center((x1 + x2) * 0.5, (y1 + y2) * 0.5, (x2 - x1), (y2 - y1)) 22 | else: 23 | x1, y1, x2, y2 = corner[0], corner[1], corner[2], corner[3] 24 | x = (x1 + x2) * 0.5 25 | y = (y1 + y2) * 0.5 26 | w = x2 - x1 27 | h = y2 - y1 28 | return x, y, w, h 29 | 30 | 31 | def center2corner(center): 32 | """ 33 | :param center: Center or np.array 4*N 34 | :return: Corner or np.array 4*N 35 | """ 36 | if isinstance(center, Center): 37 | x, y, w, h = center 38 | return Corner(x - w * 0.5, y - h * 0.5, x + w * 0.5, y + h * 0.5) 39 | else: 40 | x, y, w, h = center[0], center[1], center[2], center[3] 41 | x1 = x - w * 0.5 42 | y1 = y - h * 0.5 43 | x2 = x + w * 0.5 44 | y2 = y + h * 0.5 45 | return x1, y1, x2, y2 46 | 47 | 48 | def cxy_wh_2_rect(pos, sz): 49 | return np.array([pos[0]-sz[0]/2, pos[1]-sz[1]/2, sz[0], sz[1]]) # 0-index 50 | 51 | 52 | def rect_2_cxy_wh(rect): 53 | return np.array([rect[0]+rect[2]/2, rect[1]+rect[3]/2]), np.array([rect[2], rect[3]]) # 0-index 54 | 55 | 56 | def get_axis_aligned_bbox(region): 57 | nv = region.size 58 | if nv == 8: 59 | cx = np.mean(region[0::2]) 60 | cy = np.mean(region[1::2]) 61 | x1 = min(region[0::2]) 62 | x2 = max(region[0::2]) 63 | y1 = min(region[1::2]) 64 | y2 = max(region[1::2]) 65 | A1 = np.linalg.norm(region[0:2] - region[2:4]) * np.linalg.norm(region[2:4] - region[4:6]) 66 | A2 = (x2 - x1) * (y2 - y1) 67 | s = np.sqrt(A1 / A2) 68 | w = s * (x2 - x1) + 1 69 | h = s * (y2 - y1) + 1 70 | else: 71 | x = region[0] 72 | y = region[1] 73 | w = region[2] 74 | h = region[3] 75 | cx = x+w/2 76 | cy = y+h/2 77 | 78 | return cx, cy, w, h 79 | 80 | 81 | LIMIT = 99999999 82 | def xyxy_to_xywh(bboxes, clipMin=-LIMIT, clipWidth=LIMIT, clipHeight=LIMIT, 83 | round=False): 84 | addedAxis = False 85 | if isinstance(bboxes, list): 86 | bboxes = np.array(bboxes).astype(np.float32) 87 | if len(bboxes.shape) == 1: 88 | addedAxis = True 89 | bboxes = bboxes[:,np.newaxis] 90 | bboxesOut = np.zeros(bboxes.shape) 91 | x1 = bboxes[0,...] 92 | y1 = bboxes[1,...] 93 | x2 = bboxes[2,...] 94 | y2 = bboxes[3,...] 95 | bboxesOut[0,...] = (x1 + x2) / 2.0 96 | bboxesOut[1,...] = (y1 + y2) / 2.0 97 | bboxesOut[2,...] = x2 - x1 98 | bboxesOut[3,...] = y2 - y1 99 | if clipMin != -LIMIT or clipWidth != LIMIT or clipHeight != LIMIT: 100 | bboxesOut = clip_bbox(bboxesOut, clipMin, clipWidth, clipHeight) 101 | if bboxesOut.shape[0] > 4: 102 | bboxesOut[4:,...] = bboxes[4:,...] 103 | if addedAxis: 104 | bboxesOut = bboxesOut[:,0] 105 | if round: 106 | bboxesOut = np.round(bboxesOut).astype(int) 107 | return bboxesOut 108 | --------------------------------------------------------------------------------