├── LICENSE
├── README.md
├── configs
    ├── _base_
    │   ├── faster_rcnn_r50_fpn.py
    │   └── qdtrack_faster_rcnn_r50_fpn.py
    ├── bdd100k
    │   └── cem_bdd.py
    └── tao
    │   ├── cem_r101_lvis.py
    │   ├── cem_swinB_lvis.py
    │   ├── cem_swinL_lvis.py
    │   ├── cem_swinS_lvis.py
    │   ├── cem_swinT_lvis.py
    │   ├── tracker_r101_tao.py
    │   ├── tracker_swinB_tao.py
    │   ├── tracker_swinL_tao.py
    │   ├── tracker_swinS_tao.py
    │   └── tracker_swinT_tao.py
├── docs
    ├── GET_STARTED.md
    └── INSTALL.md
├── figures
    ├── teaser-teter.png
    └── teta-teaser.png
├── requirements.txt
├── setup.cfg
├── setup.py
├── teta
    ├── LICENSE
    ├── README.md
    ├── docs
    │   └── TAO-format.txt
    ├── figures
    │   ├── figure_1.png
    │   └── teta-teaser.png
    ├── requirements.txt
    ├── scripts
    │   ├── run_coco.py
    │   └── run_tao.py
    ├── setup.py
    └── teta
    │   ├── __init__.py
    │   ├── _timing.py
    │   ├── config.py
    │   ├── datasets
    │       ├── __init__.py
    │       ├── _base_dataset.py
    │       ├── bdd.py
    │       ├── bdd_mots.py
    │       ├── coco.py
    │       ├── coco_mots.py
    │       └── tao.py
    │   ├── eval.py
    │   ├── metrics
    │       ├── __init__.py
    │       ├── _base_metric.py
    │       └── teta.py
    │   └── utils.py
├── teter
    ├── VERSION
    ├── __init__.py
    ├── apis
    │   ├── __init__.py
    │   ├── inference.py
    │   ├── test.py
    │   └── train.py
    ├── core
    │   ├── __init__.py
    │   ├── evaluation
    │   │   ├── __init__.py
    │   │   ├── box_track.toml
    │   │   ├── eval_hooks.py
    │   │   └── mot.py
    │   ├── to_bdd100k
    │   │   ├── __init__.py
    │   │   ├── transforms.py
    │   │   └── utils.py
    │   ├── track
    │   │   ├── __init__.py
    │   │   ├── similarity.py
    │   │   └── transforms.py
    │   └── utils
    │   │   ├── __init__.py
    │   │   └── visualization.py
    ├── datasets
    │   ├── __init__.py
    │   ├── bdd_video_dataset.py
    │   ├── builder.py
    │   ├── coco_video_dataset.py
    │   ├── parsers
    │   │   ├── __init__.py
    │   │   ├── coco_api.py
    │   │   └── coco_video_parser.py
    │   ├── pipelines
    │   │   ├── __init__.py
    │   │   ├── formatting.py
    │   │   ├── h5backend.py
    │   │   ├── loading.py
    │   │   └── transforms.py
    │   ├── samplers
    │   │   ├── __init__.py
    │   │   └── distributed_video_sampler.py
    │   └── tao_dataset.py
    ├── models
    │   ├── __init__.py
    │   ├── builder.py
    │   ├── losses
    │   │   ├── __init__.py
    │   │   ├── l2_loss.py
    │   │   ├── multipos_cross_entropy_loss.py
    │   │   └── unbiased_supcontrat.py
    │   ├── mot
    │   │   ├── __init__.py
    │   │   └── teter.py
    │   ├── roi_heads
    │   │   ├── __init__.py
    │   │   ├── teter_roi_head.py
    │   │   └── track_heads
    │   │   │   ├── __init__.py
    │   │   │   ├── cem_head.py
    │   │   │   └── quasi_dense_embed_head.py
    │   └── trackers
    │   │   ├── __init__.py
    │   │   ├── teter_bdd.py
    │   │   └── teter_tao.py
    ├── utils
    │   ├── __init__.py
    │   ├── collect_env.py
    │   └── logger.py
    └── version.py
└── tools
    ├── convert_datasets
        └── tao2coco.py
    ├── dist_test.sh
    ├── dist_train.sh
    ├── slurm_test.sh
    ├── slurm_train.sh
    ├── test.py
    ├── to_bdd100k.py
    └── train.py


/configs/_base_/faster_rcnn_r50_fpn.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | model = dict(
  3 |     type='FasterRCNN',
  4 |     backbone=dict(
  5 |         type='ResNet',
  6 |         depth=50,
  7 |         num_stages=4,
  8 |         out_indices=(0, 1, 2, 3),
  9 |         frozen_stages=1,
 10 |         norm_cfg=dict(type='BN', requires_grad=True),
 11 |         norm_eval=True,
 12 |         style='pytorch',
 13 |         init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
 14 |     neck=dict(
 15 |         type='FPN',
 16 |         in_channels=[256, 512, 1024, 2048],
 17 |         out_channels=256,
 18 |         num_outs=5),
 19 |     rpn_head=dict(
 20 |         type='RPNHead',
 21 |         in_channels=256,
 22 |         feat_channels=256,
 23 |         anchor_generator=dict(
 24 |             type='AnchorGenerator',
 25 |             scales=[8],
 26 |             ratios=[0.5, 1.0, 2.0],
 27 |             strides=[4, 8, 16, 32, 64]),
 28 |         bbox_coder=dict(
 29 |             type='DeltaXYWHBBoxCoder',
 30 |             target_means=[.0, .0, .0, .0],
 31 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
 32 |         loss_cls=dict(
 33 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 34 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 35 |     roi_head=dict(
 36 |         type='StandardRoIHead',
 37 |         bbox_roi_extractor=dict(
 38 |             type='SingleRoIExtractor',
 39 |             roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
 40 |             out_channels=256,
 41 |             featmap_strides=[4, 8, 16, 32]),
 42 |         bbox_head=dict(
 43 |             type='Shared2FCBBoxHead',
 44 |             in_channels=256,
 45 |             fc_out_channels=1024,
 46 |             roi_feat_size=7,
 47 |             num_classes=80,
 48 |             bbox_coder=dict(
 49 |                 type='DeltaXYWHBBoxCoder',
 50 |                 target_means=[0., 0., 0., 0.],
 51 |                 target_stds=[0.1, 0.1, 0.2, 0.2]),
 52 |             reg_class_agnostic=False,
 53 |             loss_cls=dict(
 54 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
 55 |             loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
 56 |     # model training and testing settings
 57 |     train_cfg=dict(
 58 |         rpn=dict(
 59 |             assigner=dict(
 60 |                 type='MaxIoUAssigner',
 61 |                 pos_iou_thr=0.7,
 62 |                 neg_iou_thr=0.3,
 63 |                 min_pos_iou=0.3,
 64 |                 match_low_quality=True,
 65 |                 ignore_iof_thr=-1),
 66 |             sampler=dict(
 67 |                 type='RandomSampler',
 68 |                 num=256,
 69 |                 pos_fraction=0.5,
 70 |                 neg_pos_ub=-1,
 71 |                 add_gt_as_proposals=False),
 72 |             allowed_border=-1,
 73 |             pos_weight=-1,
 74 |             debug=False),
 75 |         rpn_proposal=dict(
 76 |             nms_pre=2000,
 77 |             max_per_img=1000,
 78 |             nms=dict(type='nms', iou_threshold=0.7),
 79 |             min_bbox_size=0),
 80 |         rcnn=dict(
 81 |             assigner=dict(
 82 |                 type='MaxIoUAssigner',
 83 |                 pos_iou_thr=0.5,
 84 |                 neg_iou_thr=0.5,
 85 |                 min_pos_iou=0.5,
 86 |                 match_low_quality=False,
 87 |                 ignore_iof_thr=-1),
 88 |             sampler=dict(
 89 |                 type='RandomSampler',
 90 |                 num=512,
 91 |                 pos_fraction=0.25,
 92 |                 neg_pos_ub=-1,
 93 |                 add_gt_as_proposals=True),
 94 |             pos_weight=-1,
 95 |             debug=False)),
 96 |     test_cfg=dict(
 97 |         rpn=dict(
 98 |             nms_pre=1000,
 99 |             max_per_img=1000,
100 |             nms=dict(type='nms', iou_threshold=0.7),
101 |             min_bbox_size=0),
102 |         rcnn=dict(
103 |             score_thr=0.05,
104 |             nms=dict(type='nms', iou_threshold=0.5),
105 |             max_per_img=100)
106 |         # soft-nms is also supported for rcnn testing
107 |         # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05)
108 |     ))
109 | 


--------------------------------------------------------------------------------
/configs/_base_/qdtrack_faster_rcnn_r50_fpn.py:
--------------------------------------------------------------------------------
 1 | _base_ = './faster_rcnn_r50_fpn.py'
 2 | model = dict(
 3 |     type='QDTrack',
 4 |     rpn_head=dict(
 5 |         loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),
 6 |     roi_head=dict(
 7 |         type='QuasiDenseRoIHead',
 8 |         track_roi_extractor=dict(
 9 |             type='SingleRoIExtractor',
10 |             roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
11 |             out_channels=256,
12 |             featmap_strides=[4, 8, 16, 32]),
13 |         track_head=dict(
14 |             type='QuasiDenseEmbedHead',
15 |             num_convs=4,
16 |             num_fcs=1,
17 |             embed_channels=256,
18 |             norm_cfg=dict(type='GN', num_groups=32),
19 |             loss_track=dict(type='MultiPosCrossEntropyLoss', loss_weight=0.25),
20 |             loss_track_aux=dict(
21 |                 type='L2Loss',
22 |                 neg_pos_ub=3,
23 |                 pos_margin=0,
24 |                 neg_margin=0.1,
25 |                 hard_mining=True,
26 |                 loss_weight=1.0))),
27 |     train_cfg=dict(
28 |         embed=dict(
29 |             assigner=dict(
30 |                 type='MaxIoUAssigner',
31 |                 pos_iou_thr=0.7,
32 |                 neg_iou_thr=0.3,
33 |                 min_pos_iou=0.5,
34 |                 match_low_quality=False,
35 |                 ignore_iof_thr=-1),
36 |             sampler=dict(
37 |                 type='CombinedSampler',
38 |                 num=256,
39 |                 pos_fraction=0.5,
40 |                 neg_pos_ub=3,
41 |                 add_gt_as_proposals=True,
42 |                 pos_sampler=dict(type='InstanceBalancedPosSampler'),
43 |                 neg_sampler=dict(type='RandomSampler')))))


--------------------------------------------------------------------------------
/configs/bdd100k/cem_bdd.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | _base_ = '../_base_/qdtrack_faster_rcnn_r50_fpn.py'
  3 | model = dict(
  4 |     type='TETer',
  5 |     freeze_detector=True,
  6 |     freeze_qd = True,
  7 |     method='teter',
  8 |     roi_head=dict(
  9 |         type='TETerRoIHead',
 10 |         finetune_cem=True,
 11 |         bbox_head=dict(num_classes=8),
 12 |         cem_roi_extractor=dict(
 13 |             type='SingleRoIExtractor',
 14 |             roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
 15 |             out_channels=256,
 16 |             featmap_strides=[4, 8, 16, 32]),
 17 |         cem_head=dict(
 18 |             type='ClsExemplarHead',
 19 |             num_convs=4,
 20 |             num_fcs=3,
 21 |             embed_channels=256,
 22 |             norm_cfg=dict(type='GN', num_groups=32),
 23 |             loss_track=dict(type='UnbiasedSupConLoss', temperature=0.07, contrast_mode='all',
 24 |                             pos_normalize=True,
 25 |                             loss_weight=0.25)
 26 |             , softmax_temp=-1),
 27 | 
 28 |         track_head=dict(
 29 |             type='QuasiDenseEmbedHead',
 30 |             num_convs=4,
 31 |             num_fcs=1,
 32 |             embed_channels=256,
 33 |             norm_cfg=dict(type='GN', num_groups=32),
 34 |             loss_track=dict(type='MultiPosCrossEntropyLoss', loss_weight=0.25),
 35 |             loss_track_aux=dict(
 36 |                 type='L2Loss',
 37 |                 neg_pos_ub=3,
 38 |                 pos_margin=0,
 39 |                 neg_margin=0.1,
 40 |                 hard_mining=True,
 41 |                 loss_weight=1.0))
 42 |     ),
 43 |     tracker=dict(
 44 |         type='TETerBDD',
 45 |         init_score_thr=0.7,
 46 |         obj_score_thr=0.3,
 47 |         match_score_thr=0.5,
 48 |         memo_tracklet_frames=10,
 49 |         memo_backdrop_frames=1,
 50 |         memo_momentum=0.8,
 51 |         nms_conf_thr=0.5,
 52 |         nms_backdrop_iou_thr=0.3,
 53 |         nms_class_iou_thr=0.7,
 54 |         contrastive_thr = 0.5,
 55 |         match_metric='bisoftmax'),
 56 | 
 57 |     # model training and testing settings
 58 |     train_cfg=dict(
 59 |         embed=dict(
 60 |             sampler=dict(
 61 |                 type='CombinedSampler',
 62 |                 num=256,
 63 |                 pos_fraction=0.5,
 64 |                 neg_pos_ub=3,
 65 |                 add_gt_as_proposals=True,
 66 |                 pos_sampler=dict(type='InstanceBalancedPosSampler'),
 67 |                 neg_sampler=dict(
 68 |                     type='IoUBalancedNegSampler',
 69 |                     floor_thr=-1,
 70 |                     floor_fraction=0,
 71 |                     num_bins=3)))))
 72 | # dataset settings
 73 | dataset_type = 'BDDVideoDataset'
 74 | data_root = 'data/bdd/bdd100k/'
 75 | ann_root = 'data/bdd/'
 76 | img_norm_cfg = dict(
 77 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 78 | train_pipeline = [
 79 |     dict(type='LoadMultiImagesFromFile'),
 80 |     # comment above line and comment out the lines below if use hdf5 file.
 81 |     # dict(type='LoadMultiImagesFromFile',
 82 |     #      file_client_args=dict(
 83 |     #          img_db_path= 'data/bdd/hdf5s/100k_train.hdf5',
 84 |     #          # vid_db_path='data/bdd/hdf5s/track_train.hdf5',
 85 |     #          backend='hdf5',
 86 |     #          type='bdd')),
 87 |     dict(type='SeqLoadAnnotations', with_bbox=True, with_ins_id=True),
 88 |     dict(
 89 |         type='SeqResize',
 90 |         img_scale=[(1296, 640), (1296, 672), (1296, 704), (1296, 736),
 91 |                    (1296, 768), (1296, 800), (1296, 720)],
 92 |         share_params=False,
 93 |         multiscale_mode='value',
 94 |         keep_ratio=True),
 95 |     dict(type='SeqRandomFlip', share_params=False, flip_ratio=0.5),
 96 |     dict(type='SeqNormalize', **img_norm_cfg),
 97 |     dict(type='SeqPad', size_divisor=32),
 98 |     dict(type='SeqDefaultFormatBundle'),
 99 |     dict(
100 |         type='SeqCollect',
101 |         keys=['img', 'gt_bboxes', 'gt_labels', 'gt_match_indices'],
102 |         ref_prefix='ref'),
103 | ]
104 | test_pipeline = [
105 |     dict(type='LoadImageFromFile'),
106 |     # comment above line and comment out the lines below if use hdf5 file.
107 |     # dict(type='LoadImageFromFile',
108 |     #      file_client_args=dict(
109 |     #          vid_db_path='data/bdd/hdf5s/track_val.hdf5',
110 |     #          backend='hdf5',
111 |     #          type='bdd')),
112 |     dict(
113 |         type='MultiScaleFlipAug',
114 |         img_scale=(1296, 720),
115 |         flip=False,
116 |         transforms=[
117 |             dict(type='Resize', keep_ratio=True),
118 |             dict(type='RandomFlip'),
119 |             dict(type='Normalize', **img_norm_cfg),
120 |             dict(type='Pad', size_divisor=32),
121 |             dict(type='ImageToTensor', keys=['img']),
122 |             dict(type='VideoCollect', keys=['img'])
123 |         ])
124 | ]
125 | data = dict(
126 |     samples_per_gpu=16,
127 |     workers_per_gpu=2,
128 |     train=[
129 |         dict(
130 |             type=dataset_type,
131 |             load_as_video=False,
132 |             ann_file=ann_root +
133 |                      'annotations/det_20/det_train_cocofmt.json',
134 |             img_prefix=data_root + 'images/100k/train/',
135 |             pipeline=train_pipeline)
136 |     ],
137 |     val=dict(
138 |         type=dataset_type,
139 |         ann_file=ann_root +
140 |                  'annotations/box_track_20/box_track_val_cocofmt.json',
141 |         scalabel_gt = ann_root + 'annotations/scalabel_gt/box_track_20/val/',
142 |         img_prefix=data_root + 'images/track/val/',
143 |         pipeline=test_pipeline),
144 |     test=dict(
145 |         type=dataset_type,
146 |         ann_file=ann_root +
147 |                  'annotations/box_track_20/box_track_val_cocofmt.json',
148 |         scalabel_gt=ann_root + 'annotations/scalabel_gt/box_track_20/val/',
149 |         img_prefix=data_root + 'images/track/val/',
150 |         pipeline=test_pipeline))
151 | # optimizer
152 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
153 | optimizer_config = dict(grad_clip=None)
154 | # learning policy
155 | lr_config = dict(
156 |     policy='step',
157 |     warmup='linear',
158 |     warmup_iters=1000,
159 |     warmup_ratio=1.0 / 1000,
160 |     step=[8, 11])
161 | # checkpoint savingp
162 | checkpoint_config = dict(interval=1)
163 | # yapf:disable
164 | log_config = dict(
165 |     interval=50,
166 |     hooks=[
167 |         dict(type='TextLoggerHook'),
168 |         # dict(type='TensorboardLoggerHook')
169 |     ])
170 | # yapf:enable
171 | # runtime settings
172 | total_epochs = 12
173 | dist_params = dict(backend='nccl')
174 | log_level = 'INFO'
175 | load_from = None
176 | resume_from = None
177 | workflow = [('train', 1)]
178 | evaluation = dict(metric=['bbox', 'track'], interval=1)
179 | 


--------------------------------------------------------------------------------
/configs/tao/cem_r101_lvis.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | _base_ = '../_base_/qdtrack_faster_rcnn_r50_fpn.py'
  3 | model = dict(
  4 |             type='TETer',
  5 |             freeze_detector=False,
  6 |             backbone=dict(
  7 |                 depth=101,
  8 |                 init_cfg=dict(type='Pretrained',
  9 |                               checkpoint='torchvision://resnet101')),
 10 |             roi_head=dict(
 11 |                 type='TETerRoIHead',
 12 |                 bbox_head=dict(num_classes=1230),
 13 |                 cem_roi_extractor=dict(
 14 |                               type='SingleRoIExtractor',
 15 |                               roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
 16 |                               out_channels=256,
 17 |                               featmap_strides=[4, 8, 16, 32]),
 18 |                 cem_head=dict(
 19 |                       type='ClsExemplarHead',
 20 |                       num_convs=4,
 21 |                       num_fcs=3,
 22 |                       embed_channels=1230,
 23 |                       norm_cfg=dict(type='GN', num_groups=32),
 24 |                       loss_track=dict(type='UnbiasedSupConLoss',
 25 |                                       temperature=0.07,
 26 |                                       contrast_mode='all',
 27 |                                       pos_normalize=True,
 28 |                                       loss_weight=0.25)
 29 |                               , softmax_temp=-1),
 30 | 
 31 |                 track_head=dict(
 32 |                     type='QuasiDenseEmbedHead',
 33 |                     num_convs=4,
 34 |                     num_fcs=1,
 35 |                     embed_channels=256,
 36 |                     norm_cfg=dict(type='GN', num_groups=32),
 37 |                     loss_track=dict(type='MultiPosCrossEntropyLoss',
 38 |                                     loss_weight=0.25,
 39 |                                     version='unbiased'),
 40 |                     loss_track_aux=dict(
 41 |                         type='L2Loss',
 42 |                         neg_pos_ub=3,
 43 |                         pos_margin=0,
 44 |                         neg_margin=0.1,
 45 |                         hard_mining=True,
 46 |                         loss_weight=1.0))
 47 |                           ),
 48 | 
 49 |             tracker=dict(
 50 |                 type='TETerTAO',
 51 |                 init_score_thr=0.0001,
 52 |                 obj_score_thr=0.0001,
 53 |                 match_score_thr=0.5,
 54 |                 memo_frames=10,
 55 |                 momentum_embed=0.8,
 56 |                 momentum_obj_score=0.5,
 57 |                 match_metric='bisoftmax',
 58 |                 match_with_cosine=True,
 59 |                 contrastive_thr=0.5),
 60 | 
 61 |             train_cfg=dict(
 62 |                 cem=dict(
 63 |                     assigner=dict(
 64 |                         type='MaxIoUAssigner',
 65 |                         pos_iou_thr=0.7,
 66 |                         neg_iou_thr=0.3,
 67 |                         min_pos_iou=0.5,
 68 |                         match_low_quality=False,
 69 |                         ignore_iof_thr=-1),
 70 |                     sampler=dict(
 71 |                         type='CombinedSampler',
 72 |                         num=256,
 73 |                         pos_fraction=1,
 74 |                         neg_pos_ub=0,
 75 |                         add_gt_as_proposals=True,
 76 |                         pos_sampler=dict(type='InstanceBalancedPosSampler'),
 77 |                         neg_sampler=dict(type='RandomSampler'))
 78 |                 )
 79 |             ),
 80 | 
 81 |            test_cfg=dict(
 82 |                  rcnn=dict(
 83 |                      score_thr=0.0001,
 84 |                      nms=dict(type='nms', iou_threshold=0.5),
 85 |                      max_per_img=300)
 86 |              )
 87 | )
 88 | # dataset settings
 89 | img_norm_cfg = dict(
 90 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 91 | train_pipeline = [
 92 |     dict(type='LoadMultiImagesFromFile'),
 93 |     # comment above line and comment out the lines below if use hdf5 file.
 94 |     # dict(
 95 |     #     type='LoadMultiImagesFromFile',
 96 |     #     file_client_args=dict(
 97 |     #         img_db_path='data/lvis/train_imgs.hdf5',
 98 |     #         backend='hdf5',
 99 |     #         type='lvis')),
100 |     dict(type='SeqLoadAnnotations', with_bbox=True, with_ins_id=True),
101 |     dict(
102 |         type='SeqResize',
103 |         img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),
104 |                    (1333, 768), (1333, 800)],
105 |         share_params=False,
106 |         multiscale_mode='value',
107 |         keep_ratio=True),
108 |     dict(type='SeqRandomFlip', share_params=False, flip_ratio=0.5),
109 |     dict(type='SeqNormalize', **img_norm_cfg),
110 |     dict(type='SeqPad', size_divisor=32),
111 |     dict(type='SeqDefaultFormatBundle'),
112 |     dict(
113 |         type='SeqCollect',
114 |         keys=['img', 'gt_bboxes', 'gt_labels', 'gt_match_indices'],
115 |         ref_prefix='ref'),
116 | ]
117 | 
118 | test_pipeline = [
119 |     dict(type='LoadImageFromFile'),
120 |     # comment above line and comment out the lines below if use hdf5 file.
121 |     # dict(type='LoadImageFromFile',
122 |     #      file_client_args=dict(
123 |     #          img_db_path='data/tao/tao_val_imgs.hdf5',
124 |     #          backend='hdf5',
125 |     #          type='tao')),
126 |     dict(
127 |         type='MultiScaleFlipAug',
128 |         img_scale=(1333, 800),
129 |         flip=False,
130 |         transforms=[
131 |             dict(type='Resize', keep_ratio=True),
132 |             dict(type='RandomFlip'),
133 |             dict(type='Normalize', **img_norm_cfg),
134 |             dict(type='Pad', size_divisor=32),
135 |             dict(type='ImageToTensor', keys=['img']),
136 |             dict(type='VideoCollect', keys=['img'])
137 |         ])
138 | ]
139 | 
140 | dataset_type = 'TaoDataset'
141 | data = dict(
142 |     samples_per_gpu=2,
143 |     workers_per_gpu=2,
144 |     train=dict(
145 |         _delete_=True,
146 |         type='ClassBalancedDataset',
147 |         oversample_thr=1e-3,
148 |         dataset=dict(
149 |             type=dataset_type,
150 |             classes='data/lvis/annotations/lvis_classes.txt',
151 |             load_as_video=False,
152 |             ann_file='data/lvis/annotations/lvisv0.5+coco_train.json',
153 |             img_prefix='data/lvis/train2017/',
154 |             key_img_sampler=dict(interval=1),
155 |             ref_img_sampler=dict(num_ref_imgs=1, scope=1, method='uniform'),
156 |             pipeline=train_pipeline)
157 |     ),
158 |     val=dict(
159 |         type=dataset_type,
160 |         classes='data/lvis/annotations/lvis_classes.txt',
161 |         ann_file='data/tao/annotations/validation_ours.json',
162 |         img_prefix='data/tao/frames/',
163 |         ref_img_sampler=None,
164 |         pipeline=test_pipeline),
165 |     test=dict(
166 |         type=dataset_type,
167 |         classes='data/lvis/annotations/lvis_classes.txt',
168 |         ann_file='data/tao/annotations/validation_ours.json',
169 |         img_prefix='data/tao/frames/',
170 |         ref_img_sampler=None,
171 |         pipeline=test_pipeline)
172 | 
173 | )
174 | 
175 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
176 | optimizer_config = dict(grad_clip=None)
177 | # learning policy
178 | lr_config = dict(
179 |     policy='step',
180 |     warmup='linear',
181 |     warmup_iters=1000,
182 |     warmup_ratio=1.0 / 1000,
183 |     step=[16, 22])
184 | total_epochs = 24
185 | 
186 | # checkpoint saving
187 | checkpoint_config = dict(interval=1)
188 | # yapf:disable
189 | log_config = dict(
190 |     interval=50,
191 |     hooks=[
192 |         dict(type='TextLoggerHook'),
193 |         # dict(type='TensorboardLoggerHook')
194 |     ])
195 | 
196 | dist_params = dict(backend='nccl')
197 | log_level = 'INFO'
198 | load_from = None
199 | resume_from = None
200 | workflow = [('train', 1)]
201 | evaluation = dict(metric=['bbox'], start=1, interval=1, resfile_path='/scratch/cem_lvis/')
202 | work_dir = './saved_models/cem_lvis/'
203 | 


--------------------------------------------------------------------------------
/configs/tao/cem_swinB_lvis.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | _base_ = '../_base_/qdtrack_faster_rcnn_r50_fpn.py'
  3 | pretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window12_384_22k.pth'  # noqa
  4 | model = dict(
  5 |     type='TETer',
  6 |     freeze_detector=False,
  7 |     backbone=dict(
  8 |         _delete_=True,
  9 |         type='SwinTransformer',
 10 |         embed_dims=128,
 11 |         depths=[2, 2, 18, 2],
 12 |         num_heads=[4, 8, 16, 32],
 13 |         window_size=12,
 14 |         mlp_ratio=4,
 15 |         qkv_bias=True,
 16 |         qk_scale=None,
 17 |         drop_rate=0.,
 18 |         attn_drop_rate=0.,
 19 |         drop_path_rate=0.2,
 20 |         patch_norm=True,
 21 |         out_indices=(0, 1, 2, 3),
 22 |         with_cp=False,
 23 |         convert_weights=True,
 24 |         init_cfg=dict(type='Pretrained', checkpoint=pretrained)),
 25 |     neck=dict(in_channels=[128, 256, 512, 1024]),
 26 |     roi_head=dict(
 27 |         type='TETerRoIHead',
 28 |         bbox_head=dict(num_classes=1230),
 29 |         cem_roi_extractor=dict(
 30 |             type='SingleRoIExtractor',
 31 |             roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
 32 |             out_channels=256,
 33 |             featmap_strides=[4, 8, 16, 32]),
 34 |         cem_head=dict(
 35 |             type='ClsExemplarHead',
 36 |             num_convs=4,
 37 |             num_fcs=3,
 38 |             embed_channels=1230,
 39 |             norm_cfg=dict(type='GN', num_groups=32),
 40 |             loss_track=dict(type='UnbiasedSupConLoss',
 41 |                             temperature=0.07,
 42 |                             contrast_mode='all',
 43 |                             pos_normalize=True,
 44 |                             loss_weight=0.25)
 45 |             , softmax_temp=-1),
 46 | 
 47 |         track_head=dict(
 48 |             type='QuasiDenseEmbedHead',
 49 |             num_convs=4,
 50 |             num_fcs=1,
 51 |             embed_channels=256,
 52 |             norm_cfg=dict(type='GN', num_groups=32),
 53 |             loss_track=dict(type='MultiPosCrossEntropyLoss',
 54 |                             loss_weight=0.25,
 55 |                             version='unbiased'),
 56 |             loss_track_aux=dict(
 57 |                 type='L2Loss',
 58 |                 neg_pos_ub=3,
 59 |                 pos_margin=0,
 60 |                 neg_margin=0.1,
 61 |                 hard_mining=True,
 62 |                 loss_weight=1.0))
 63 |     ),
 64 |     tracker=dict(
 65 |         type='TETerTAO',
 66 |         init_score_thr=0.0001,
 67 |         obj_score_thr=0.0001,
 68 |         match_score_thr=0.5,
 69 |         memo_frames=10,
 70 |         momentum_embed=0.8,
 71 |         momentum_obj_score=0.5,
 72 |         match_metric='bisoftmax',
 73 |         match_with_cosine=True,
 74 |         contrastive_thr=0.5,
 75 |     ),
 76 |     train_cfg=dict(
 77 |         cem=dict(
 78 |             assigner=dict(
 79 |                 type='MaxIoUAssigner',
 80 |                 pos_iou_thr=0.7,
 81 |                 neg_iou_thr=0.3,
 82 |                 min_pos_iou=0.5,
 83 |                 match_low_quality=False,
 84 |                 ignore_iof_thr=-1),
 85 |             sampler=dict(
 86 |                 type='CombinedSampler',
 87 |                 num=256,
 88 |                 pos_fraction=1,
 89 |                 neg_pos_ub=0,
 90 |                 add_gt_as_proposals=True,
 91 |                 pos_sampler=dict(type='InstanceBalancedPosSampler'),
 92 |                 neg_sampler=dict(type='RandomSampler'))
 93 |         )
 94 |     ),
 95 | 
 96 |     test_cfg=dict(
 97 |         rcnn=dict(
 98 |             score_thr=0.0001,
 99 |             nms=dict(type='nms', iou_threshold=0.5),
100 |             max_per_img=300)
101 |     )
102 | )
103 | # dataset settings
104 | img_norm_cfg = dict(
105 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
106 | train_pipeline = [
107 |     dict(type='LoadMultiImagesFromFile'),
108 |     # comment above line and comment out the lines below if use hdf5 file.
109 |     # dict(
110 |     #     type='LoadMultiImagesFromFile',
111 |     #     file_client_args=dict(
112 |     #         img_db_path='data/lvis/train_imgs.hdf5',
113 |     #         backend='hdf5',
114 |     #         type='lvis')),
115 |     dict(type='SeqLoadAnnotations', with_bbox=True, with_ins_id=True),
116 |     dict(
117 |         type='SeqResize',
118 |         img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),
119 |                    (1333, 768), (1333, 800)],
120 |         share_params=False,
121 |         multiscale_mode='value',
122 |         keep_ratio=True),
123 |     dict(type='SeqRandomFlip', share_params=False, flip_ratio=0.5),
124 |     dict(type='SeqNormalize', **img_norm_cfg),
125 |     dict(type='SeqPad', size_divisor=32),
126 |     dict(type='SeqDefaultFormatBundle'),
127 |     dict(
128 |         type='SeqCollect',
129 |         keys=['img', 'gt_bboxes', 'gt_labels', 'gt_match_indices'],
130 |         ref_prefix='ref'),
131 | ]
132 | 
133 | test_pipeline = [
134 |     dict(type='LoadImageFromFile'),
135 |     # dict(type='LoadImageFromFile',
136 |     #      file_client_args=dict(
137 |     #          img_db_path='data/tao/tao_val_imgs.hdf5',
138 |     #          backend='hdf5',
139 |     #          type='tao')),
140 |     dict(
141 |         type='MultiScaleFlipAug',
142 |         img_scale=(1333, 800),
143 |         flip=False,
144 |         transforms=[
145 |             dict(type='Resize', keep_ratio=True),
146 |             dict(type='RandomFlip'),
147 |             dict(type='Normalize', **img_norm_cfg),
148 |             dict(type='Pad', size_divisor=32),
149 |             dict(type='ImageToTensor', keys=['img']),
150 |             dict(type='VideoCollect', keys=['img'])
151 |         ])
152 | ]
153 | 
154 | ## dataset settings
155 | dataset_type = 'TaoDataset'
156 | data = dict(
157 |     samples_per_gpu=2,
158 |     workers_per_gpu=2,
159 |     train=dict(
160 |         _delete_=True,
161 |         type='ClassBalancedDataset',
162 |         oversample_thr=1e-3,
163 |         dataset=dict(
164 |             type=dataset_type,
165 |             classes='data/lvis/annotations/lvis_classes.txt',
166 |             load_as_video=False,
167 |             ann_file='data/lvis/annotations/lvisv0.5+coco_train.json',
168 |             img_prefix='data/lvis/train2017/',
169 |             key_img_sampler=dict(interval=1),
170 |             ref_img_sampler=dict(num_ref_imgs=1, scope=1, method='uniform'),
171 |             pipeline=train_pipeline)
172 |     ),
173 |     val=dict(
174 |         type=dataset_type,
175 |         classes='data/lvis/annotations/lvis_classes.txt',
176 |         ann_file='data/tao/annotations/validation_ours.json',
177 |         img_prefix='data/tao/frames/',
178 |         ref_img_sampler=None,
179 |         pipeline=test_pipeline),
180 |     test=dict(
181 |         type=dataset_type,
182 |         classes='data/lvis/annotations/lvis_classes.txt',
183 |         ann_file='data/tao/annotations/validation_ours.json',
184 |         img_prefix='data/tao/frames/',
185 |         ref_img_sampler=None,
186 |         pipeline=test_pipeline)
187 | 
188 | )
189 | # optimizer
190 | optimizer = dict(
191 |     # _delete_=True,
192 |     type='AdamW',
193 |     lr=0.0001,
194 |     betas=(0.9, 0.999),
195 |     weight_decay=0.05,
196 |     paramwise_cfg=dict(
197 |         custom_keys={
198 |             'absolute_pos_embed': dict(decay_mult=0.),
199 |             'relative_position_bias_table': dict(decay_mult=0.),
200 |             'norm': dict(decay_mult=0.)
201 |         }))
202 | optimizer_config = dict(grad_clip=None)
203 | # learning policy
204 | lr_config = dict(
205 |     policy='step',
206 |     warmup='linear',
207 |     warmup_iters=1000,
208 |     warmup_ratio=0.001,
209 |     step=[27, 33])
210 | runner = dict(type='EpochBasedRunner', max_epochs=36)
211 | 
212 | 
213 | # checkpoint saving
214 | checkpoint_config = dict(interval=1)
215 | # yapf:disable
216 | log_config = dict(
217 |     interval=50,
218 |     hooks=[
219 |         dict(type='TextLoggerHook'),
220 |     ])
221 | # yapf:enable
222 | # runtime settings
223 | total_epochs = 36
224 | dist_params = dict(backend='nccl')
225 | log_level = 'INFO'
226 | load_from = None
227 | resume_from = None
228 | workflow = [('train', 1)]
229 | evaluation = dict(metric=['bbox'], start=2, interval=2)
230 | 


--------------------------------------------------------------------------------
/configs/tao/cem_swinL_lvis.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | _base_ = '../_base_/qdtrack_faster_rcnn_r50_fpn.py'
  3 | pretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_large_patch4_window12_384_22k.pth'  # noqa
  4 | model = dict(
  5 |     type='TETer',
  6 |     freeze_detector=False,
  7 |     backbone=dict(
  8 |         _delete_=True,
  9 |         type='SwinTransformer',
 10 |         embed_dims=192,
 11 |         depths=[2, 2, 18, 2],
 12 |         num_heads=[6, 12, 24, 48],
 13 |         window_size=12,
 14 |         mlp_ratio=4,
 15 |         qkv_bias=True,
 16 |         qk_scale=None,
 17 |         drop_rate=0.,
 18 |         attn_drop_rate=0.,
 19 |         drop_path_rate=0.2,
 20 |         patch_norm=True,
 21 |         out_indices=(0, 1, 2, 3),
 22 |         with_cp=False,
 23 |         convert_weights=True,
 24 |         init_cfg=dict(type='Pretrained', checkpoint=pretrained)),
 25 |     neck=dict(in_channels=[192, 384, 768, 1536]),
 26 |     roi_head=dict(
 27 |         type='TETerRoIHead',
 28 |         bbox_head=dict(num_classes=1230),
 29 |         cem_roi_extractor=dict(
 30 |             type='SingleRoIExtractor',
 31 |             roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
 32 |             out_channels=256,
 33 |             featmap_strides=[4, 8, 16, 32]),
 34 |         cem_head=dict(
 35 |             type='ClsExemplarHead',
 36 |             num_convs=4,
 37 |             num_fcs=3,
 38 |             embed_channels=1230,
 39 |             norm_cfg=dict(type='GN', num_groups=32),
 40 |             loss_track=dict(type='UnbiasedSupConLoss',
 41 |                             temperature=0.07,
 42 |                             contrast_mode='all',
 43 |                             pos_normalize=True,
 44 |                             loss_weight=0.25)
 45 |             , softmax_temp=-1),
 46 | 
 47 |         track_head=dict(
 48 |             type='QuasiDenseEmbedHead',
 49 |             num_convs=4,
 50 |             num_fcs=1,
 51 |             embed_channels=256,
 52 |             norm_cfg=dict(type='GN', num_groups=32),
 53 |             loss_track=dict(type='MultiPosCrossEntropyLoss',
 54 |                             loss_weight=0.25,
 55 |                             version='unbiased'),
 56 |             loss_track_aux=dict(
 57 |                 type='L2Loss',
 58 |                 neg_pos_ub=3,
 59 |                 pos_margin=0,
 60 |                 neg_margin=0.1,
 61 |                 hard_mining=True,
 62 |                 loss_weight=1.0))
 63 |     ),
 64 |     tracker=dict(
 65 |         type='TETerTAO',
 66 |         init_score_thr=0.0001,
 67 |         obj_score_thr=0.0001,
 68 |         match_score_thr=0.5,
 69 |         memo_frames=10,
 70 |         momentum_embed=0.8,
 71 |         momentum_obj_score=0.5,
 72 |         match_metric='bisoftmax',
 73 |         match_with_cosine=True,
 74 |         contrastive_thr=0.5,
 75 |     ),
 76 |     train_cfg=dict(
 77 |         cem=dict(
 78 |             assigner=dict(
 79 |                 type='MaxIoUAssigner',
 80 |                 pos_iou_thr=0.7,
 81 |                 neg_iou_thr=0.3,
 82 |                 min_pos_iou=0.5,
 83 |                 match_low_quality=False,
 84 |                 ignore_iof_thr=-1),
 85 |             sampler=dict(
 86 |                 type='CombinedSampler',
 87 |                 num=256,
 88 |                 pos_fraction=1,
 89 |                 neg_pos_ub=0,
 90 |                 add_gt_as_proposals=True,
 91 |                 pos_sampler=dict(type='InstanceBalancedPosSampler'),
 92 |                 neg_sampler=dict(type='RandomSampler'))
 93 |         )
 94 |     ),
 95 | 
 96 |     test_cfg=dict(
 97 |         rcnn=dict(
 98 |             score_thr=0.0001,
 99 |             nms=dict(type='nms', iou_threshold=0.5),
100 |             max_per_img=300)
101 |     )
102 | )
103 | # dataset settings
104 | img_norm_cfg = dict(
105 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
106 | train_pipeline = [
107 |     dict(type='LoadMultiImagesFromFile'),
108 |     # comment above line and comment out the lines below if use hdf5 file.
109 |     # dict(
110 |     #     type='LoadMultiImagesFromFile',
111 |     #     file_client_args=dict(
112 |     #         img_db_path='data/lvis/train_imgs.hdf5',
113 |     #         backend='hdf5',
114 |     #         type='lvis')),
115 |     dict(type='SeqLoadAnnotations', with_bbox=True, with_ins_id=True),
116 |     dict(
117 |         type='SeqResize',
118 |         img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),
119 |                    (1333, 768), (1333, 800)],
120 |         share_params=False,
121 |         multiscale_mode='value',
122 |         keep_ratio=True),
123 |     dict(type='SeqRandomFlip', share_params=False, flip_ratio=0.5),
124 |     dict(type='SeqNormalize', **img_norm_cfg),
125 |     dict(type='SeqPad', size_divisor=32),
126 |     dict(type='SeqDefaultFormatBundle'),
127 |     dict(
128 |         type='SeqCollect',
129 |         keys=['img', 'gt_bboxes', 'gt_labels', 'gt_match_indices'],
130 |         ref_prefix='ref'),
131 | ]
132 | 
133 | test_pipeline = [
134 |     dict(type='LoadImageFromFile'),
135 |     # dict(type='LoadImageFromFile',
136 |     #      file_client_args=dict(
137 |     #          img_db_path='data/tao/tao_val_imgs.hdf5',
138 |     #          backend='hdf5',
139 |     #          type='tao')),
140 |     dict(
141 |         type='MultiScaleFlipAug',
142 |         img_scale=(1333, 800),
143 |         flip=False,
144 |         transforms=[
145 |             dict(type='Resize', keep_ratio=True),
146 |             dict(type='RandomFlip'),
147 |             dict(type='Normalize', **img_norm_cfg),
148 |             dict(type='Pad', size_divisor=32),
149 |             dict(type='ImageToTensor', keys=['img']),
150 |             dict(type='VideoCollect', keys=['img'])
151 |         ])
152 | ]
153 | 
154 | ## dataset settings
155 | dataset_type = 'TaoDataset'
156 | data = dict(
157 |     samples_per_gpu=2,
158 |     workers_per_gpu=2,
159 |     train=dict(
160 |         _delete_=True,
161 |         type='ClassBalancedDataset',
162 |         oversample_thr=1e-3,
163 |         dataset=dict(
164 |             type=dataset_type,
165 |             classes='data/lvis/annotations/lvis_classes.txt',
166 |             load_as_video=False,
167 |             ann_file='data/lvis/annotations/lvisv0.5+coco_train.json',
168 |             img_prefix='data/lvis/train2017/',
169 |             key_img_sampler=dict(interval=1),
170 |             ref_img_sampler=dict(num_ref_imgs=1, scope=1, method='uniform'),
171 |             pipeline=train_pipeline)
172 |     ),
173 |     val=dict(
174 |         type=dataset_type,
175 |         classes='data/lvis/annotations/lvis_classes.txt',
176 |         ann_file='data/tao/annotations/validation_ours.json',
177 |         img_prefix='data/tao/frames/',
178 |         ref_img_sampler=None,
179 |         pipeline=test_pipeline),
180 |     test=dict(
181 |         type=dataset_type,
182 |         classes='data/lvis/annotations/lvis_classes.txt',
183 |         ann_file='data/tao/annotations/validation_ours.json',
184 |         img_prefix='data/tao/frames/',
185 |         ref_img_sampler=None,
186 |         pipeline=test_pipeline)
187 | 
188 | )
189 | # optimizer
190 | optimizer = dict(
191 |     # _delete_=True,
192 |     type='AdamW',
193 |     lr=0.0001,
194 |     betas=(0.9, 0.999),
195 |     weight_decay=0.05,
196 |     paramwise_cfg=dict(
197 |         custom_keys={
198 |             'absolute_pos_embed': dict(decay_mult=0.),
199 |             'relative_position_bias_table': dict(decay_mult=0.),
200 |             'norm': dict(decay_mult=0.)
201 |         }))
202 | optimizer_config = dict(grad_clip=None)
203 | # learning policy
204 | lr_config = dict(
205 |     policy='step',
206 |     warmup='linear',
207 |     warmup_iters=1000,
208 |     warmup_ratio=0.001,
209 |     step=[27, 33])
210 | runner = dict(type='EpochBasedRunner', max_epochs=36)
211 | 
212 | 
213 | # checkpoint saving
214 | checkpoint_config = dict(interval=1)
215 | # yapf:disable
216 | log_config = dict(
217 |     interval=50,
218 |     hooks=[
219 |         dict(type='TextLoggerHook'),
220 |     ])
221 | # yapf:enable
222 | # runtime settings
223 | total_epochs = 36
224 | dist_params = dict(backend='nccl')
225 | log_level = 'INFO'
226 | load_from = None
227 | resume_from = None
228 | workflow = [('train', 1)]
229 | evaluation = dict(metric=['bbox'], start=2, interval=2)
230 | 


--------------------------------------------------------------------------------
/configs/tao/cem_swinS_lvis.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | _base_ = '../_base_/qdtrack_faster_rcnn_r50_fpn.py'
  3 | pretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_small_patch4_window7_224.pth'  # noqa
  4 | model = dict(
  5 |     type='TETer',
  6 |     freeze_detector=False,
  7 |     backbone=dict(
  8 |         _delete_=True,
  9 |         type='SwinTransformer',
 10 |         embed_dims=96,
 11 |         depths=[2, 2, 18, 2],
 12 |         num_heads=[3, 6, 12, 24],
 13 |         window_size=7,
 14 |         mlp_ratio=4,
 15 |         qkv_bias=True,
 16 |         qk_scale=None,
 17 |         drop_rate=0.,
 18 |         attn_drop_rate=0.,
 19 |         drop_path_rate=0.2,
 20 |         patch_norm=True,
 21 |         out_indices=(0, 1, 2, 3),
 22 |         with_cp=False,
 23 |         convert_weights=True,
 24 |         init_cfg=dict(type='Pretrained', checkpoint=pretrained)),
 25 |     neck=dict(in_channels=[96, 192, 384, 768]),
 26 |     roi_head=dict(
 27 |         type='TETerRoIHead',
 28 |         bbox_head=dict(num_classes=1230),
 29 |         cem_roi_extractor=dict(
 30 |             type='SingleRoIExtractor',
 31 |             roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
 32 |             out_channels=256,
 33 |             featmap_strides=[4, 8, 16, 32]),
 34 |         cem_head=dict(
 35 |             type='ClsExemplarHead',
 36 |             num_convs=4,
 37 |             num_fcs=3,
 38 |             embed_channels=1230,
 39 |             norm_cfg=dict(type='GN', num_groups=32),
 40 |             loss_track=dict(type='UnbiasedSupConLoss',
 41 |                             temperature=0.07,
 42 |                             contrast_mode='all',
 43 |                             pos_normalize=True,
 44 |                             loss_weight=0.25)
 45 |             , softmax_temp=-1),
 46 | 
 47 |         track_head=dict(
 48 |             type='QuasiDenseEmbedHead',
 49 |             num_convs=4,
 50 |             num_fcs=1,
 51 |             embed_channels=256,
 52 |             norm_cfg=dict(type='GN', num_groups=32),
 53 |             loss_track=dict(type='MultiPosCrossEntropyLoss',
 54 |                             loss_weight=0.25,
 55 |                             version='unbiased'),
 56 |             loss_track_aux=dict(
 57 |                 type='L2Loss',
 58 |                 neg_pos_ub=3,
 59 |                 pos_margin=0,
 60 |                 neg_margin=0.1,
 61 |                 hard_mining=True,
 62 |                 loss_weight=1.0))
 63 |     ),
 64 |     tracker=dict(
 65 |         type='TETerTAO',
 66 |         init_score_thr=0.0001,
 67 |         obj_score_thr=0.0001,
 68 |         match_score_thr=0.5,
 69 |         memo_frames=10,
 70 |         momentum_embed=0.8,
 71 |         momentum_obj_score=0.5,
 72 |         match_metric='bisoftmax',
 73 |         match_with_cosine=True,
 74 |         contrastive_thr=0.5,
 75 |     ),
 76 |     train_cfg=dict(
 77 |         cem=dict(
 78 |             assigner=dict(
 79 |                 type='MaxIoUAssigner',
 80 |                 pos_iou_thr=0.7,
 81 |                 neg_iou_thr=0.3,
 82 |                 min_pos_iou=0.5,
 83 |                 match_low_quality=False,
 84 |                 ignore_iof_thr=-1),
 85 |             sampler=dict(
 86 |                 type='CombinedSampler',
 87 |                 num=256,
 88 |                 pos_fraction=1,
 89 |                 neg_pos_ub=0,
 90 |                 add_gt_as_proposals=True,
 91 |                 pos_sampler=dict(type='InstanceBalancedPosSampler'),
 92 |                 neg_sampler=dict(type='RandomSampler'))
 93 |         )
 94 |     ),
 95 | 
 96 |     test_cfg=dict(
 97 |         rcnn=dict(
 98 |             score_thr=0.0001,
 99 |             nms=dict(type='nms', iou_threshold=0.5),
100 |             max_per_img=300)
101 |     )
102 | )
103 | # dataset settings
104 | img_norm_cfg = dict(
105 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
106 | train_pipeline = [
107 |     dict(type='LoadMultiImagesFromFile'),
108 |     # comment above line and comment out the lines below if use hdf5 file.
109 |     # dict(
110 |     #     type='LoadMultiImagesFromFile',
111 |     #     file_client_args=dict(
112 |     #         img_db_path='data/lvis/train_imgs.hdf5',
113 |     #         backend='hdf5',
114 |     #         type='lvis')),
115 |     dict(type='SeqLoadAnnotations', with_bbox=True, with_ins_id=True),
116 |     dict(
117 |         type='SeqResize',
118 |         img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),
119 |                    (1333, 768), (1333, 800)],
120 |         share_params=False,
121 |         multiscale_mode='value',
122 |         keep_ratio=True),
123 |     dict(type='SeqRandomFlip', share_params=False, flip_ratio=0.5),
124 |     dict(type='SeqNormalize', **img_norm_cfg),
125 |     dict(type='SeqPad', size_divisor=32),
126 |     dict(type='SeqDefaultFormatBundle'),
127 |     dict(
128 |         type='SeqCollect',
129 |         keys=['img', 'gt_bboxes', 'gt_labels', 'gt_match_indices'],
130 |         ref_prefix='ref'),
131 | ]
132 | 
133 | test_pipeline = [
134 |     dict(type='LoadImageFromFile'),
135 |     # dict(type='LoadImageFromFile',
136 |     #      file_client_args=dict(
137 |     #          img_db_path='data/tao/tao_val_imgs.hdf5',
138 |     #          backend='hdf5',
139 |     #          type='tao')),
140 |     dict(
141 |         type='MultiScaleFlipAug',
142 |         img_scale=(1333, 800),
143 |         flip=False,
144 |         transforms=[
145 |             dict(type='Resize', keep_ratio=True),
146 |             dict(type='RandomFlip'),
147 |             dict(type='Normalize', **img_norm_cfg),
148 |             dict(type='Pad', size_divisor=32),
149 |             dict(type='ImageToTensor', keys=['img']),
150 |             dict(type='VideoCollect', keys=['img'])
151 |         ])
152 | ]
153 | 
154 | ## dataset settings
155 | dataset_type = 'TaoDataset'
156 | data = dict(
157 |     samples_per_gpu=2,
158 |     workers_per_gpu=2,
159 |     train=dict(
160 |         _delete_=True,
161 |         type='ClassBalancedDataset',
162 |         oversample_thr=1e-3,
163 |         dataset=dict(
164 |             type=dataset_type,
165 |             classes='data/lvis/annotations/lvis_classes.txt',
166 |             load_as_video=False,
167 |             ann_file='data/lvis/annotations/lvisv0.5+coco_train.json',
168 |             img_prefix='data/lvis/train2017/',
169 |             key_img_sampler=dict(interval=1),
170 |             ref_img_sampler=dict(num_ref_imgs=1, scope=1, method='uniform'),
171 |             pipeline=train_pipeline)
172 |     ),
173 |     val=dict(
174 |         type=dataset_type,
175 |         classes='data/lvis/annotations/lvis_classes.txt',
176 |         ann_file='data/tao/annotations/validation_ours.json',
177 |         img_prefix='data/tao/frames/',
178 |         ref_img_sampler=None,
179 |         pipeline=test_pipeline),
180 |     test=dict(
181 |         type=dataset_type,
182 |         classes='data/lvis/annotations/lvis_classes.txt',
183 |         ann_file='data/tao/annotations/validation_ours.json',
184 |         img_prefix='data/tao/frames/',
185 |         ref_img_sampler=None,
186 |         pipeline=test_pipeline)
187 | 
188 | )
189 | # optimizer
190 | optimizer = dict(
191 |     # _delete_=True,
192 |     type='AdamW',
193 |     lr=0.0001,
194 |     betas=(0.9, 0.999),
195 |     weight_decay=0.05,
196 |     paramwise_cfg=dict(
197 |         custom_keys={
198 |             'absolute_pos_embed': dict(decay_mult=0.),
199 |             'relative_position_bias_table': dict(decay_mult=0.),
200 |             'norm': dict(decay_mult=0.)
201 |         }))
202 | optimizer_config = dict(grad_clip=None)
203 | # learning policy
204 | lr_config = dict(
205 |     policy='step',
206 |     warmup='linear',
207 |     warmup_iters=1000,
208 |     warmup_ratio=0.001,
209 |     step=[27, 33])
210 | runner = dict(type='EpochBasedRunner', max_epochs=36)
211 | 
212 | 
213 | # checkpoint saving
214 | checkpoint_config = dict(interval=1)
215 | # yapf:disable
216 | log_config = dict(
217 |     interval=50,
218 |     hooks=[
219 |         dict(type='TextLoggerHook'),
220 |     ])
221 | # yapf:enable
222 | # runtime settings
223 | total_epochs = 36
224 | dist_params = dict(backend='nccl')
225 | log_level = 'INFO'
226 | load_from = None
227 | resume_from = None
228 | workflow = [('train', 1)]
229 | evaluation = dict(metric=['bbox'], start=2, interval=2)
230 | 


--------------------------------------------------------------------------------
/configs/tao/cem_swinT_lvis.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | _base_ = '../_base_/qdtrack_faster_rcnn_r50_fpn.py'
  3 | pretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_tiny_patch4_window7_224.pth'  # noqa
  4 | model = dict(
  5 |     type='TETer',
  6 |     freeze_detector=False,
  7 |     backbone=dict(
  8 |         _delete_=True,
  9 |         type='SwinTransformer',
 10 |         embed_dims=96,
 11 |         depths=[2, 2, 6, 2],
 12 |         num_heads=[3, 6, 12, 24],
 13 |         window_size=7,
 14 |         mlp_ratio=4,
 15 |         qkv_bias=True,
 16 |         qk_scale=None,
 17 |         drop_rate=0.,
 18 |         attn_drop_rate=0.,
 19 |         drop_path_rate=0.2,
 20 |         patch_norm=True,
 21 |         out_indices=(0, 1, 2, 3),
 22 |         with_cp=False,
 23 |         convert_weights=True,
 24 |         init_cfg=dict(type='Pretrained', checkpoint=pretrained)),
 25 |     neck=dict(in_channels=[96, 192, 384, 768]),
 26 |     roi_head=dict(
 27 |         type='TETerRoIHead',
 28 |         bbox_head=dict(num_classes=1230),
 29 |         cem_roi_extractor=dict(
 30 |             type='SingleRoIExtractor',
 31 |             roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
 32 |             out_channels=256,
 33 |             featmap_strides=[4, 8, 16, 32]),
 34 |         cem_head=dict(
 35 |             type='ClsExemplarHead',
 36 |             num_convs=4,
 37 |             num_fcs=3,
 38 |             embed_channels=1230,
 39 |             norm_cfg=dict(type='GN', num_groups=32),
 40 |             loss_track=dict(type='UnbiasedSupConLoss',
 41 |                             temperature=0.07,
 42 |                             contrast_mode='all',
 43 |                             pos_normalize=True,
 44 |                             loss_weight=0.25)
 45 |             , softmax_temp=-1),
 46 |         track_head=dict(
 47 |             type='QuasiDenseEmbedHead',
 48 |             num_convs=4,
 49 |             num_fcs=1,
 50 |             embed_channels=256,
 51 |             norm_cfg=dict(type='GN', num_groups=32),
 52 |             loss_track=dict(type='MultiPosCrossEntropyLoss',
 53 |                             loss_weight=0.25,
 54 |                             version='unbiased'),
 55 |             loss_track_aux=dict(
 56 |                 type='L2Loss',
 57 |                 neg_pos_ub=3,
 58 |                 pos_margin=0,
 59 |                 neg_margin=0.1,
 60 |                 hard_mining=True,
 61 |                 loss_weight=1.0))
 62 |     ),
 63 |     tracker=dict(
 64 |         type='TETerTAO',
 65 |         init_score_thr=0.0001,
 66 |         obj_score_thr=0.0001,
 67 |         match_score_thr=0.5,
 68 |         memo_frames=10,
 69 |         momentum_embed=0.8,
 70 |         momentum_obj_score=0.5,
 71 |         match_metric='bisoftmax',
 72 |         match_with_cosine=True,
 73 |         contrastive_thr=0.5,
 74 |     ),
 75 |     train_cfg=dict(
 76 |         cem=dict(
 77 |             assigner=dict(
 78 |                 type='MaxIoUAssigner',
 79 |                 pos_iou_thr=0.7,
 80 |                 neg_iou_thr=0.3,
 81 |                 min_pos_iou=0.5,
 82 |                 match_low_quality=False,
 83 |                 ignore_iof_thr=-1),
 84 |             sampler=dict(
 85 |                 type='CombinedSampler',
 86 |                 num=256,
 87 |                 pos_fraction=1,
 88 |                 neg_pos_ub=0,
 89 |                 add_gt_as_proposals=True,
 90 |                 pos_sampler=dict(type='InstanceBalancedPosSampler'),
 91 |                 neg_sampler=dict(type='RandomSampler'))
 92 |         )
 93 |     ),
 94 | 
 95 |     test_cfg=dict(
 96 |         rcnn=dict(
 97 |             score_thr=0.0001,
 98 |             nms=dict(type='nms', iou_threshold=0.5),
 99 |             max_per_img=300)
100 |     )
101 | )
102 | # dataset settings
103 | img_norm_cfg = dict(
104 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
105 | train_pipeline = [
106 |     dict(type='LoadMultiImagesFromFile'),
107 |     # comment above line and comment out the lines below if use hdf5 file.
108 |     # dict(
109 |     #     type='LoadMultiImagesFromFile',
110 |     #     file_client_args=dict(
111 |     #         img_db_path='data/lvis/train_imgs.hdf5',
112 |     #         backend='hdf5',
113 |     #         type='lvis')),
114 |     dict(type='SeqLoadAnnotations', with_bbox=True, with_ins_id=True),
115 |     dict(
116 |         type='SeqResize',
117 |         img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),
118 |                    (1333, 768), (1333, 800)],
119 |         share_params=False,
120 |         multiscale_mode='value',
121 |         keep_ratio=True),
122 |     dict(type='SeqRandomFlip', share_params=False, flip_ratio=0.5),
123 |     dict(type='SeqNormalize', **img_norm_cfg),
124 |     dict(type='SeqPad', size_divisor=32),
125 |     dict(type='SeqDefaultFormatBundle'),
126 |     dict(
127 |         type='SeqCollect',
128 |         keys=['img', 'gt_bboxes', 'gt_labels', 'gt_match_indices'],
129 |         ref_prefix='ref'),
130 | ]
131 | 
132 | test_pipeline = [
133 |     dict(type='LoadImageFromFile'),
134 |     # dict(type='LoadImageFromFile',
135 |     #      file_client_args=dict(
136 |     #          img_db_path='data/tao/tao_val_imgs.hdf5',
137 |     #          backend='hdf5',
138 |     #          type='tao')),
139 |     dict(
140 |         type='MultiScaleFlipAug',
141 |         img_scale=(1333, 800),
142 |         flip=False,
143 |         transforms=[
144 |             dict(type='Resize', keep_ratio=True),
145 |             dict(type='RandomFlip'),
146 |             dict(type='Normalize', **img_norm_cfg),
147 |             dict(type='Pad', size_divisor=32),
148 |             dict(type='ImageToTensor', keys=['img']),
149 |             dict(type='VideoCollect', keys=['img'])
150 |         ])
151 | ]
152 | 
153 | ## dataset settings
154 | dataset_type = 'TaoDataset'
155 | data = dict(
156 |     samples_per_gpu=2,
157 |     workers_per_gpu=2,
158 |     train=dict(
159 |         _delete_=True,
160 |         type='ClassBalancedDataset',
161 |         oversample_thr=1e-3,
162 |         dataset=dict(
163 |             type=dataset_type,
164 |             classes='data/lvis/annotations/lvis_classes.txt',
165 |             load_as_video=False,
166 |             ann_file='data/lvis/annotations/lvisv0.5+coco_train.json',
167 |             img_prefix='data/lvis/train2017/',
168 |             key_img_sampler=dict(interval=1),
169 |             ref_img_sampler=dict(num_ref_imgs=1, scope=1, method='uniform'),
170 |             pipeline=train_pipeline)
171 |     ),
172 |     val=dict(
173 |         type=dataset_type,
174 |         classes='data/lvis/annotations/lvis_classes.txt',
175 |         ann_file='data/tao/annotations/validation_ours.json',
176 |         img_prefix='data/tao/frames/',
177 |         ref_img_sampler=None,
178 |         pipeline=test_pipeline),
179 |     test=dict(
180 |         type=dataset_type,
181 |         classes='data/lvis/annotations/lvis_classes.txt',
182 |         ann_file='data/tao/annotations/validation_ours.json',
183 |         img_prefix='data/tao/frames/',
184 |         ref_img_sampler=None,
185 |         pipeline=test_pipeline)
186 | 
187 | )
188 | # optimizer
189 | optimizer = dict(
190 |     # _delete_=True,
191 |     type='AdamW',
192 |     lr=0.0001,
193 |     betas=(0.9, 0.999),
194 |     weight_decay=0.05,
195 |     paramwise_cfg=dict(
196 |         custom_keys={
197 |             'absolute_pos_embed': dict(decay_mult=0.),
198 |             'relative_position_bias_table': dict(decay_mult=0.),
199 |             'norm': dict(decay_mult=0.)
200 |         }))
201 | optimizer_config = dict(grad_clip=None)
202 | # learning policy
203 | lr_config = dict(
204 |     policy='step',
205 |     warmup='linear',
206 |     warmup_iters=1000,
207 |     warmup_ratio=0.001,
208 |     step=[27, 33])
209 | runner = dict(type='EpochBasedRunner', max_epochs=36)
210 | 
211 | 
212 | # checkpoint saving
213 | checkpoint_config = dict(interval=1)
214 | # yapf:disable
215 | log_config = dict(
216 |     interval=50,
217 |     hooks=[
218 |         dict(type='TextLoggerHook'),
219 |     ])
220 | # yapf:enable
221 | # runtime settings
222 | total_epochs = 36
223 | dist_params = dict(backend='nccl')
224 | log_level = 'INFO'
225 | load_from = None
226 | resume_from = None
227 | workflow = [('train', 1)]
228 | evaluation = dict(metric=['bbox'], start=2, interval=2)
229 | 


--------------------------------------------------------------------------------
/configs/tao/tracker_r101_tao.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | _base_ = './cem_r101_lvis.py'
  3 | model = dict(
  4 |     freeze_detector=True,
  5 |     freeze_cem=True,
  6 |     method='teter',
  7 |     roi_head=dict(bbox_head=dict(num_classes=1230),
  8 |                   track_head=dict(
  9 |                       type='QuasiDenseEmbedHead',
 10 |                       num_convs=4,
 11 |                       num_fcs=1,
 12 |                       embed_channels=256,
 13 |                       norm_cfg=dict(type='GN', num_groups=32),
 14 |                       loss_track=dict(type='MultiPosCrossEntropyLoss', loss_weight=0.25),
 15 |                       loss_track_aux=dict(
 16 |                           type='L2Loss',
 17 |                           neg_pos_ub=3,
 18 |                           pos_margin=0,
 19 |                           neg_margin=0.1,
 20 |                           hard_mining=True,
 21 |                           loss_weight=1.0))
 22 |                   ),
 23 | 
 24 |     test_cfg=dict(
 25 |         rcnn=dict(
 26 |             score_thr=0.0001,
 27 |             nms=dict(type='nms', iou_threshold=0.5, class_agnostic=True, split_thr=100000),
 28 |             max_per_img=50)
 29 |             )
 30 | )
 31 | 
 32 | # dataset settings
 33 | img_norm_cfg = dict(
 34 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 35 | train_pipeline = [
 36 |     dict(type='LoadMultiImagesFromFile'),
 37 |     # dict(
 38 |     #     type='LoadMultiImagesFromFile',
 39 |     #     file_client_args=dict(
 40 |     #         img_db_path='data/tao/tao_train_imgs.hdf5',
 41 |     #         backend='hdf5',
 42 |     #         type='tao')),
 43 |     dict(type='SeqLoadAnnotations', with_bbox=True, with_ins_id=True),
 44 |     dict(
 45 |         type='SeqResize',
 46 |         img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),
 47 |                    (1333, 768), (1333, 800)],
 48 |         share_params=True,
 49 |         multiscale_mode='value',
 50 |         keep_ratio=True),
 51 |     dict(type='SeqRandomFlip', share_params=True, flip_ratio=0.5),
 52 |     dict(type='SeqNormalize', **img_norm_cfg),
 53 |     dict(type='SeqPad', size_divisor=32),
 54 |     dict(type='SeqDefaultFormatBundle'),
 55 |     dict(
 56 |         type='SeqCollect',
 57 |         keys=['img', 'gt_bboxes', 'gt_labels', 'gt_match_indices'],
 58 |         ref_prefix='ref'),
 59 | ]
 60 | 
 61 | test_pipeline = [
 62 |     dict(type='LoadImageFromFile'),
 63 |     # dict(type='LoadImageFromFile',
 64 |     #      file_client_args=dict(
 65 |     #          img_db_path='data/tao/tao_val_imgs.hdf5',
 66 |     #          backend='hdf5',
 67 |     #          type='tao')),
 68 |     dict(
 69 |         type='MultiScaleFlipAug',
 70 |         img_scale=(1333, 800),
 71 |         flip=False,
 72 |         transforms=[
 73 |             dict(type='Resize', keep_ratio=True),
 74 |             dict(type='RandomFlip'),
 75 |             dict(type='Normalize', **img_norm_cfg),
 76 |             dict(type='Pad', size_divisor=32),
 77 |             dict(type='ImageToTensor', keys=['img']),
 78 |             dict(type='VideoCollect', keys=['img'])
 79 |         ])
 80 | ]
 81 | dataset_type = 'TaoDataset'
 82 | data = dict(
 83 |     samples_per_gpu=2,
 84 |     workers_per_gpu=2,
 85 |     train=dict(
 86 |         _delete_=True,
 87 |         type='ClassBalancedDataset',
 88 |         oversample_thr=1e-3,
 89 |         dataset=dict(
 90 |             type=dataset_type,
 91 |             classes='data/lvis/annotations/lvis_classes.txt',
 92 |             ann_file='data/tao/annotations/train_ours.json',
 93 |             img_prefix='data/tao/frames/',
 94 |             key_img_sampler=dict(interval=1),
 95 |             ref_img_sampler=dict(num_ref_imgs=1, scope=1, method='uniform'),
 96 |             pipeline=train_pipeline)),
 97 |     val=dict(
 98 |         type=dataset_type,
 99 |         classes='data/lvis/annotations/lvis_classes.txt',
100 |         ann_file='data/tao/annotations/validation_ours.json',
101 |         img_prefix='data/tao/frames/',
102 |         ref_img_sampler=None,
103 |         pipeline=test_pipeline),
104 |     test=dict(
105 |         type=dataset_type,
106 |         classes='data/lvis/annotations/lvis_classes.txt',
107 |         ann_file='data/tao/annotations/validation_ours.json',
108 |         img_prefix='data/tao/frames/',
109 |         ref_img_sampler=None,
110 |         pipeline=test_pipeline)
111 | )
112 | optimizer = dict(type='SGD', lr=0.002, momentum=0.9, weight_decay=0.0001)
113 | lr_config = dict(
114 |     policy='step',
115 |     warmup='linear',
116 |     warmup_iters=1000,
117 |     warmup_ratio=1.0 / 1000,
118 |     step=[8, 11])
119 | total_epochs = 12
120 | load_from = None
121 | evaluation = dict(metric=['track'], start=8, interval=1, resfile_path='/scratch/tmp/')
122 | work_dir = './saved_models/teter_swinT/'
123 | 


--------------------------------------------------------------------------------
/configs/tao/tracker_swinB_tao.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | _base_ = './cem_swinB_lvis.py'
  3 | model = dict(
  4 |     freeze_detector=True,
  5 |     freeze_cem=True,
  6 |     method='teter',
  7 |     roi_head=dict(
  8 |         bbox_head=dict(num_classes=1230),
  9 |         track_head = dict(
 10 |             type='QuasiDenseEmbedHead',
 11 |             num_convs=4,
 12 |             num_fcs=1,
 13 |             embed_channels=256,
 14 |             norm_cfg=dict(type='GN', num_groups=32),
 15 |             loss_track=dict(type='MultiPosCrossEntropyLoss',
 16 |                             loss_weight=0.25),
 17 |             loss_track_aux=dict(
 18 |                 type='L2Loss',
 19 |                 neg_pos_ub=3,
 20 |                 pos_margin=0,
 21 |                 neg_margin=0.1,
 22 |                 hard_mining=True,
 23 |                 loss_weight=1.0)
 24 |     )),
 25 | 
 26 | 
 27 |     test_cfg=dict(
 28 |         rcnn=dict(
 29 |             score_thr=0.0001,
 30 |             nms=dict(type='nms',
 31 |                      iou_threshold=0.5,
 32 |                      class_agnostic=True,
 33 |                      split_thr=100000),
 34 |             max_per_img=50)
 35 |             )
 36 | )
 37 | 
 38 | # dataset settings
 39 | img_norm_cfg = dict(
 40 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 41 | train_pipeline = [
 42 |     dict(type='LoadMultiImagesFromFile'),
 43 |     # dict(
 44 |     #     type='LoadMultiImagesFromFile',
 45 |     #     file_client_args=dict(
 46 |     #         img_db_path='data/tao/tao_train_imgs.hdf5',
 47 |     #         backend='hdf5',
 48 |     #         type='tao')),
 49 |     dict(type='SeqLoadAnnotations', with_bbox=True, with_ins_id=True),
 50 |     dict(
 51 |         type='SeqResize',
 52 |         img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),
 53 |                    (1333, 768), (1333, 800)],
 54 |         share_params=True,
 55 |         multiscale_mode='value',
 56 |         keep_ratio=True),
 57 |     dict(type='SeqRandomFlip', share_params=True, flip_ratio=0.5),
 58 |     dict(type='SeqNormalize', **img_norm_cfg),
 59 |     dict(type='SeqPad', size_divisor=32),
 60 |     dict(type='SeqDefaultFormatBundle'),
 61 |     dict(
 62 |         type='SeqCollect',
 63 |         keys=['img', 'gt_bboxes', 'gt_labels', 'gt_match_indices'],
 64 |         ref_prefix='ref'),
 65 | ]
 66 | 
 67 | test_pipeline = [
 68 |     dict(type='LoadImageFromFile'),
 69 |     # dict(type='LoadImageFromFile',
 70 |     #      file_client_args=dict(
 71 |     #          img_db_path='data/tao/tao_val_imgs.hdf5',
 72 |     #          backend='hdf5',
 73 |     #          type='tao')),
 74 |     dict(
 75 |         type='MultiScaleFlipAug',
 76 |         img_scale=(1333, 800),
 77 |         flip=False,
 78 |         transforms=[
 79 |             dict(type='Resize', keep_ratio=True),
 80 |             dict(type='RandomFlip'),
 81 |             dict(type='Normalize', **img_norm_cfg),
 82 |             dict(type='Pad', size_divisor=32),
 83 |             dict(type='ImageToTensor', keys=['img']),
 84 |             dict(type='VideoCollect', keys=['img'])
 85 |         ])
 86 | ]
 87 | dataset_type = 'TaoDataset'
 88 | data = dict(
 89 |     samples_per_gpu=2,
 90 |     workers_per_gpu=2,
 91 |     train=dict(
 92 |         _delete_=True,
 93 |         type='ClassBalancedDataset',
 94 |         oversample_thr=1e-3,
 95 |         dataset=dict(
 96 |             type=dataset_type,
 97 |             classes='data/lvis/annotations/lvis_classes.txt',
 98 |             ann_file='data/tao/annotations/train_ours.json',
 99 |             img_prefix='data/tao/frames/',
100 |             key_img_sampler=dict(interval=1),
101 |             ref_img_sampler=dict(num_ref_imgs=1, scope=1, method='uniform'),
102 |             pipeline=train_pipeline)),
103 |     val=dict(
104 |         type=dataset_type,
105 |         classes='data/lvis/annotations/lvis_classes.txt',
106 |         ann_file='data/tao/annotations/validation_ours.json',
107 |         img_prefix='data/tao/frames/',
108 |         ref_img_sampler=None,
109 |         pipeline=test_pipeline),
110 |     test=dict(
111 |         type=dataset_type,
112 |         classes='data/lvis/annotations/lvis_classes.txt',
113 |         ann_file='data/tao/annotations/validation_ours.json',
114 |         img_prefix='data/tao/frames/',
115 |         ref_img_sampler=None,
116 |         pipeline=test_pipeline)
117 | )
118 | optimizer = dict(
119 |     _delete_=True,
120 |     type='SGD', lr=0.002, momentum=0.9, weight_decay=0.0001)
121 | lr_config = dict(
122 |     _delete_=True,
123 |     policy='step',
124 |     warmup='linear',
125 |     warmup_iters=1000,
126 |     warmup_ratio=1.0 / 1000,
127 |     step=[8, 11])
128 | total_epochs = 12
129 | load_from = None
130 | evaluation = dict(metric=['track'], start=8, interval=1, resfile_path='/scratch/tmp/')
131 | work_dir = './saved_models/teter_swinB/'
132 | 


--------------------------------------------------------------------------------
/configs/tao/tracker_swinL_tao.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | _base_ = './cem_swinL_lvis.py'
  3 | model = dict(
  4 |     freeze_detector=True,
  5 |     freeze_cem=True,
  6 |     method='teter',
  7 |     roi_head=dict(
  8 |         bbox_head=dict(num_classes=1230),
  9 |         track_head = dict(
 10 |             type='QuasiDenseEmbedHead',
 11 |             num_convs=4,
 12 |             num_fcs=1,
 13 |             embed_channels=256,
 14 |             norm_cfg=dict(type='GN', num_groups=32),
 15 |             loss_track=dict(type='MultiPosCrossEntropyLoss',
 16 |                             loss_weight=0.25),
 17 |             loss_track_aux=dict(
 18 |                 type='L2Loss',
 19 |                 neg_pos_ub=3,
 20 |                 pos_margin=0,
 21 |                 neg_margin=0.1,
 22 |                 hard_mining=True,
 23 |                 loss_weight=1.0)
 24 |     )),
 25 | 
 26 | 
 27 |     test_cfg=dict(
 28 |         rcnn=dict(
 29 |             score_thr=0.0001,
 30 |             nms=dict(type='nms',
 31 |                      iou_threshold=0.5,
 32 |                      class_agnostic=True,
 33 |                      split_thr=100000),
 34 |             max_per_img=50)
 35 |             )
 36 | )
 37 | 
 38 | # dataset settings
 39 | img_norm_cfg = dict(
 40 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 41 | train_pipeline = [
 42 |     dict(type='LoadMultiImagesFromFile'),
 43 |     # dict(
 44 |     #     type='LoadMultiImagesFromFile',
 45 |     #     file_client_args=dict(
 46 |     #         img_db_path='data/tao/tao_train_imgs.hdf5',
 47 |     #         backend='hdf5',
 48 |     #         type='tao')),
 49 |     dict(type='SeqLoadAnnotations', with_bbox=True, with_ins_id=True),
 50 |     dict(
 51 |         type='SeqResize',
 52 |         img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),
 53 |                    (1333, 768), (1333, 800)],
 54 |         share_params=True,
 55 |         multiscale_mode='value',
 56 |         keep_ratio=True),
 57 |     dict(type='SeqRandomFlip', share_params=True, flip_ratio=0.5),
 58 |     dict(type='SeqNormalize', **img_norm_cfg),
 59 |     dict(type='SeqPad', size_divisor=32),
 60 |     dict(type='SeqDefaultFormatBundle'),
 61 |     dict(
 62 |         type='SeqCollect',
 63 |         keys=['img', 'gt_bboxes', 'gt_labels', 'gt_match_indices'],
 64 |         ref_prefix='ref'),
 65 | ]
 66 | 
 67 | test_pipeline = [
 68 |     dict(type='LoadImageFromFile'),
 69 |     # dict(type='LoadImageFromFile',
 70 |     #      file_client_args=dict(
 71 |     #          img_db_path='data/tao/tao_val_imgs.hdf5',
 72 |     #          backend='hdf5',
 73 |     #          type='tao')),
 74 |     dict(
 75 |         type='MultiScaleFlipAug',
 76 |         img_scale=(1333, 800),
 77 |         flip=False,
 78 |         transforms=[
 79 |             dict(type='Resize', keep_ratio=True),
 80 |             dict(type='RandomFlip'),
 81 |             dict(type='Normalize', **img_norm_cfg),
 82 |             dict(type='Pad', size_divisor=32),
 83 |             dict(type='ImageToTensor', keys=['img']),
 84 |             dict(type='VideoCollect', keys=['img'])
 85 |         ])
 86 | ]
 87 | dataset_type = 'TaoDataset'
 88 | data = dict(
 89 |     samples_per_gpu=2,
 90 |     workers_per_gpu=2,
 91 |     train=dict(
 92 |         _delete_=True,
 93 |         type='ClassBalancedDataset',
 94 |         oversample_thr=1e-3,
 95 |         dataset=dict(
 96 |             type=dataset_type,
 97 |             classes='data/lvis/annotations/lvis_classes.txt',
 98 |             ann_file='data/tao/annotations/train_ours.json',
 99 |             img_prefix='data/tao/frames/',
100 |             key_img_sampler=dict(interval=1),
101 |             ref_img_sampler=dict(num_ref_imgs=1, scope=1, method='uniform'),
102 |             pipeline=train_pipeline)),
103 |     val=dict(
104 |         type=dataset_type,
105 |         classes='data/lvis/annotations/lvis_classes.txt',
106 |         ann_file='data/tao/annotations/validation_ours.json',
107 |         img_prefix='data/tao/frames/',
108 |         ref_img_sampler=None,
109 |         pipeline=test_pipeline),
110 |     test=dict(
111 |         type=dataset_type,
112 |         classes='data/lvis/annotations/lvis_classes.txt',
113 |         ann_file='data/tao/annotations/validation_ours.json',
114 |         img_prefix='data/tao/frames/',
115 |         ref_img_sampler=None,
116 |         pipeline=test_pipeline)
117 | )
118 | optimizer = dict(
119 |     _delete_=True,
120 |     type='SGD', lr=0.002, momentum=0.9, weight_decay=0.0001)
121 | lr_config = dict(
122 |     _delete_=True,
123 |     policy='step',
124 |     warmup='linear',
125 |     warmup_iters=1000,
126 |     warmup_ratio=1.0 / 1000,
127 |     step=[8, 11])
128 | total_epochs = 12
129 | load_from = None
130 | evaluation = dict(metric=['track'], start=8, interval=1, resfile_path='/scratch/tmp/')
131 | work_dir = './saved_models/teter_swinB/'
132 | 


--------------------------------------------------------------------------------
/configs/tao/tracker_swinS_tao.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | _base_ = './cem_swinS_lvis.py'
  3 | model = dict(
  4 |     freeze_detector=True,
  5 |     freeze_cem=True,
  6 |     method='teter',
  7 |     roi_head=dict(
  8 |         bbox_head=dict(num_classes=1230),
  9 |         track_head = dict(
 10 |             type='QuasiDenseEmbedHead',
 11 |             num_convs=4,
 12 |             num_fcs=1,
 13 |             embed_channels=256,
 14 |             norm_cfg=dict(type='GN', num_groups=32),
 15 |             loss_track=dict(type='MultiPosCrossEntropyLoss',
 16 |                             loss_weight=0.25),
 17 |             loss_track_aux=dict(
 18 |                 type='L2Loss',
 19 |                 neg_pos_ub=3,
 20 |                 pos_margin=0,
 21 |                 neg_margin=0.1,
 22 |                 hard_mining=True,
 23 |                 loss_weight=1.0)
 24 |     )),
 25 | 
 26 | 
 27 |     test_cfg=dict(
 28 |         rcnn=dict(
 29 |             score_thr=0.0001,
 30 |             nms=dict(type='nms',
 31 |                      iou_threshold=0.5,
 32 |                      class_agnostic=True,
 33 |                      split_thr=100000),
 34 |             max_per_img=50)
 35 |             )
 36 | )
 37 | 
 38 | # dataset settings
 39 | img_norm_cfg = dict(
 40 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 41 | train_pipeline = [
 42 |     dict(type='LoadMultiImagesFromFile'),
 43 |     # dict(
 44 |     #     type='LoadMultiImagesFromFile',
 45 |     #     file_client_args=dict(
 46 |     #         img_db_path='data/tao/tao_train_imgs.hdf5',
 47 |     #         backend='hdf5',
 48 |     #         type='tao')),
 49 |     dict(type='SeqLoadAnnotations', with_bbox=True, with_ins_id=True),
 50 |     dict(
 51 |         type='SeqResize',
 52 |         img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),
 53 |                    (1333, 768), (1333, 800)],
 54 |         share_params=True,
 55 |         multiscale_mode='value',
 56 |         keep_ratio=True),
 57 |     dict(type='SeqRandomFlip', share_params=True, flip_ratio=0.5),
 58 |     dict(type='SeqNormalize', **img_norm_cfg),
 59 |     dict(type='SeqPad', size_divisor=32),
 60 |     dict(type='SeqDefaultFormatBundle'),
 61 |     dict(
 62 |         type='SeqCollect',
 63 |         keys=['img', 'gt_bboxes', 'gt_labels', 'gt_match_indices'],
 64 |         ref_prefix='ref'),
 65 | ]
 66 | 
 67 | test_pipeline = [
 68 |     dict(type='LoadImageFromFile'),
 69 |     # dict(type='LoadImageFromFile',
 70 |     #      file_client_args=dict(
 71 |     #          img_db_path='data/tao/tao_val_imgs.hdf5',
 72 |     #          backend='hdf5',
 73 |     #          type='tao')),
 74 |     dict(
 75 |         type='MultiScaleFlipAug',
 76 |         img_scale=(1333, 800),
 77 |         flip=False,
 78 |         transforms=[
 79 |             dict(type='Resize', keep_ratio=True),
 80 |             dict(type='RandomFlip'),
 81 |             dict(type='Normalize', **img_norm_cfg),
 82 |             dict(type='Pad', size_divisor=32),
 83 |             dict(type='ImageToTensor', keys=['img']),
 84 |             dict(type='VideoCollect', keys=['img'])
 85 |         ])
 86 | ]
 87 | dataset_type = 'TaoDataset'
 88 | data = dict(
 89 |     samples_per_gpu=2,
 90 |     workers_per_gpu=2,
 91 |     train=dict(
 92 |         _delete_=True,
 93 |         type='ClassBalancedDataset',
 94 |         oversample_thr=1e-3,
 95 |         dataset=dict(
 96 |             type=dataset_type,
 97 |             classes='data/lvis/annotations/lvis_classes.txt',
 98 |             ann_file='data/tao/annotations/train_ours.json',
 99 |             img_prefix='data/tao/frames/',
100 |             key_img_sampler=dict(interval=1),
101 |             ref_img_sampler=dict(num_ref_imgs=1, scope=1, method='uniform'),
102 |             pipeline=train_pipeline)),
103 |     val=dict(
104 |         type=dataset_type,
105 |         classes='data/lvis/annotations/lvis_classes.txt',
106 |         ann_file='data/tao/annotations/validation_ours.json',
107 |         img_prefix='data/tao/frames/',
108 |         ref_img_sampler=None,
109 |         pipeline=test_pipeline),
110 |     test=dict(
111 |         type=dataset_type,
112 |         classes='data/lvis/annotations/lvis_classes.txt',
113 |         ann_file='data/tao/annotations/validation_ours.json',
114 |         img_prefix='data/tao/frames/',
115 |         ref_img_sampler=None,
116 |         pipeline=test_pipeline)
117 | )
118 | optimizer = dict(
119 |     _delete_=True,
120 |     type='SGD', lr=0.002, momentum=0.9, weight_decay=0.0001)
121 | lr_config = dict(
122 |     _delete_=True,
123 |     policy='step',
124 |     warmup='linear',
125 |     warmup_iters=1000,
126 |     warmup_ratio=1.0 / 1000,
127 |     step=[8, 11])
128 | total_epochs = 12
129 | load_from = None
130 | evaluation = dict(metric=['track'], start=8, interval=1, resfile_path='/scratch/tmp/')
131 | work_dir = './saved_models/teter_swinS/'
132 | 


--------------------------------------------------------------------------------
/configs/tao/tracker_swinT_tao.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | _base_ = './cem_swinT_lvis.py'
  3 | model = dict(
  4 |     freeze_detector=True,
  5 |     freeze_cem=True,
  6 |     method='teter',
  7 |     roi_head=dict(
  8 |         bbox_head=dict(num_classes=1230),
  9 |         track_head = dict(
 10 |             type='QuasiDenseEmbedHead',
 11 |             num_convs=4,
 12 |             num_fcs=1,
 13 |             embed_channels=256,
 14 |             norm_cfg=dict(type='GN', num_groups=32),
 15 |             loss_track=dict(type='MultiPosCrossEntropyLoss',
 16 |                             loss_weight=0.25),
 17 |             loss_track_aux=dict(
 18 |                 type='L2Loss',
 19 |                 neg_pos_ub=3,
 20 |                 pos_margin=0,
 21 |                 neg_margin=0.1,
 22 |                 hard_mining=True,
 23 |                 loss_weight=1.0)
 24 |     )),
 25 | 
 26 | 
 27 |     test_cfg=dict(
 28 |         rcnn=dict(
 29 |             score_thr=0.0001,
 30 |             nms=dict(type='nms',
 31 |                      iou_threshold=0.5,
 32 |                      class_agnostic=True,
 33 |                      split_thr=100000),
 34 |             max_per_img=50)
 35 |             )
 36 | )
 37 | 
 38 | # dataset settings
 39 | img_norm_cfg = dict(
 40 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 41 | train_pipeline = [
 42 |     dict(type='LoadMultiImagesFromFile'),
 43 |     # dict(
 44 |     #     type='LoadMultiImagesFromFile',
 45 |     #     file_client_args=dict(
 46 |     #         img_db_path='data/tao/tao_train_imgs.hdf5',
 47 |     #         backend='hdf5',
 48 |     #         type='tao')),
 49 |     dict(type='SeqLoadAnnotations', with_bbox=True, with_ins_id=True),
 50 |     dict(
 51 |         type='SeqResize',
 52 |         img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),
 53 |                    (1333, 768), (1333, 800)],
 54 |         share_params=True,
 55 |         multiscale_mode='value',
 56 |         keep_ratio=True),
 57 |     dict(type='SeqRandomFlip', share_params=True, flip_ratio=0.5),
 58 |     dict(type='SeqNormalize', **img_norm_cfg),
 59 |     dict(type='SeqPad', size_divisor=32),
 60 |     dict(type='SeqDefaultFormatBundle'),
 61 |     dict(
 62 |         type='SeqCollect',
 63 |         keys=['img', 'gt_bboxes', 'gt_labels', 'gt_match_indices'],
 64 |         ref_prefix='ref'),
 65 | ]
 66 | 
 67 | test_pipeline = [
 68 |     dict(type='LoadImageFromFile'),
 69 |     # dict(type='LoadImageFromFile',
 70 |     #      file_client_args=dict(
 71 |     #          img_db_path='data/tao/tao_val_imgs.hdf5',
 72 |     #          backend='hdf5',
 73 |     #          type='tao')),
 74 |     dict(
 75 |         type='MultiScaleFlipAug',
 76 |         img_scale=(1333, 800),
 77 |         flip=False,
 78 |         transforms=[
 79 |             dict(type='Resize', keep_ratio=True),
 80 |             dict(type='RandomFlip'),
 81 |             dict(type='Normalize', **img_norm_cfg),
 82 |             dict(type='Pad', size_divisor=32),
 83 |             dict(type='ImageToTensor', keys=['img']),
 84 |             dict(type='VideoCollect', keys=['img'])
 85 |         ])
 86 | ]
 87 | dataset_type = 'TaoDataset'
 88 | data = dict(
 89 |     samples_per_gpu=2,
 90 |     workers_per_gpu=2,
 91 |     train=dict(
 92 |         _delete_=True,
 93 |         type='ClassBalancedDataset',
 94 |         oversample_thr=1e-3,
 95 |         dataset=dict(
 96 |             type=dataset_type,
 97 |             classes='data/lvis/annotations/lvis_classes.txt',
 98 |             ann_file='data/tao/annotations/train_ours.json',
 99 |             img_prefix='data/tao/frames/',
100 |             key_img_sampler=dict(interval=1),
101 |             ref_img_sampler=dict(num_ref_imgs=1, scope=1, method='uniform'),
102 |             pipeline=train_pipeline)),
103 |     val=dict(
104 |         type=dataset_type,
105 |         classes='data/lvis/annotations/lvis_classes.txt',
106 |         ann_file='data/tao/annotations/validation_ours.json',
107 |         img_prefix='data/tao/frames/',
108 |         ref_img_sampler=None,
109 |         pipeline=test_pipeline),
110 |     test=dict(
111 |         type=dataset_type,
112 |         classes='data/lvis/annotations/lvis_classes.txt',
113 |         ann_file='data/tao/annotations/validation_ours.json',
114 |         img_prefix='data/tao/frames/',
115 |         ref_img_sampler=None,
116 |         pipeline=test_pipeline)
117 | )
118 | optimizer = dict(
119 |     _delete_=True,
120 |     type='SGD', lr=0.002, momentum=0.9, weight_decay=0.0001)
121 | lr_config = dict(
122 |     _delete_=True,
123 |     policy='step',
124 |     warmup='linear',
125 |     warmup_iters=1000,
126 |     warmup_ratio=1.0 / 1000,
127 |     step=[8, 11])
128 | total_epochs = 12
129 | load_from = None
130 | evaluation = dict(metric=['track'], start=8, interval=1, resfile_path='/scratch/tmp/')
131 | work_dir = './saved_models/teter_swinT/'
132 | 


--------------------------------------------------------------------------------
/docs/INSTALL.md:
--------------------------------------------------------------------------------
 1 | ## Installation
 2 | TETer builds upon mmdetection framework. 
 3 | Please install following packages.
 4 | 
 5 | ### Requirements
 6 | - [pytorch >= 1.10](https://pytorch.org/get-started/locally/)
 7 | - [mmcv-full == 1.4.4](https://github.com/open-mmlab/mmcv)
 8 | - [mmdetection == 2.23.0](https://github.com/open-mmlab/mmdetection)
 9 |   
10 | 
11 | ### Install TETA
12 | 
13 | Please refer to [TETA](../teta/README.md)
14 | 
15 | 


--------------------------------------------------------------------------------
/figures/teaser-teter.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/tet/a62a9c0affec3a97f2cd0263141c53bcfb9c79f7/figures/teaser-teter.png


--------------------------------------------------------------------------------
/figures/teta-teaser.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/tet/a62a9c0affec3a97f2cd0263141c53bcfb9c79f7/figures/teta-teaser.png


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | lvis
 2 | motmetrics
 3 | numpy
 4 | pycocotools
 5 | seaborn
 6 | tqdm
 7 | timm
 8 | h5py
 9 | git+https://github.com/bdd100k/bdd100k.git
10 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [isort]
 2 | line_length = 79
 3 | multi_line_output = 0
 4 | known_standard_library = setuptools
 5 | known_first_party = teter
 6 | known_third_party = cv2,mmcv,mmdet,motmetrics,numpy,pandas,pycocotools,torch,torchvision,tqdm
 7 | no_lines_before = STDLIB,LOCALFOLDER
 8 | default_section = THIRDPARTY
 9 | 
10 | [yapf]
11 | BASED_ON_STYLE = pep8
12 | BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF = true
13 | SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN = true
14 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import subprocess
  3 | import time
  4 | from setuptools import find_packages, setup
  5 | 
  6 | 
  7 | def readme():
  8 |     with open('README.md', encoding='utf-8') as f:
  9 |         content = f.read()
 10 |     return content
 11 | 
 12 | 
 13 | version_file = 'teter/version.py'
 14 | 
 15 | 
 16 | def get_git_hash():
 17 | 
 18 |     def _minimal_ext_cmd(cmd):
 19 |         # construct minimal environment
 20 |         env = {}
 21 |         for k in ['SYSTEMROOT', 'PATH', 'HOME']:
 22 |             v = os.environ.get(k)
 23 |             if v is not None:
 24 |                 env[k] = v
 25 |         # LANGUAGE is used on win32
 26 |         env['LANGUAGE'] = 'C'
 27 |         env['LANG'] = 'C'
 28 |         env['LC_ALL'] = 'C'
 29 |         out = subprocess.Popen(
 30 |             cmd, stdout=subprocess.PIPE, env=env).communicate()[0]
 31 |         return out
 32 | 
 33 |     try:
 34 |         out = _minimal_ext_cmd(['git', 'rev-parse', 'HEAD'])
 35 |         sha = out.strip().decode('ascii')
 36 |     except OSError:
 37 |         sha = 'unknown'
 38 | 
 39 |     return sha
 40 | 
 41 | 
 42 | def get_hash():
 43 |     if os.path.exists('.git'):
 44 |         sha = get_git_hash()[:7]
 45 |     elif os.path.exists(version_file):
 46 |         try:
 47 |             from teter.version import __version__
 48 |             sha = __version__.split('+')[-1]
 49 |         except ImportError:
 50 |             raise ImportError('Unable to get git version')
 51 |     else:
 52 |         sha = 'unknown'
 53 | 
 54 |     return sha
 55 | 
 56 | 
 57 | def write_version_py():
 58 |     content = """# GENERATED VERSION FILE
 59 | # TIME: {}
 60 | __version__ = '{}'
 61 | short_version = '{}'
 62 | version_info = ({})
 63 | """
 64 |     sha = get_hash()
 65 |     with open('teter/VERSION', 'r') as f:
 66 |         SHORT_VERSION = f.read().strip()
 67 |     VERSION_INFO = ', '.join(SHORT_VERSION.split('.'))
 68 |     VERSION = SHORT_VERSION + '+' + sha
 69 | 
 70 |     version_file_str = content.format(time.asctime(), VERSION, SHORT_VERSION,
 71 |                                       VERSION_INFO)
 72 |     with open(version_file, 'w') as f:
 73 |         f.write(version_file_str)
 74 | 
 75 | 
 76 | def get_version():
 77 |     with open(version_file, 'r') as f:
 78 |         exec(compile(f.read(), version_file, 'exec'))
 79 |     return locals()['__version__']
 80 | 
 81 | 
 82 | def get_requirements(filename='requirements.txt'):
 83 |     here = os.path.dirname(os.path.realpath(__file__))
 84 |     with open(os.path.join(here, filename), 'r') as f:
 85 |         requires = [line.replace('\n', '') for line in f.readlines()]
 86 |     for i, req in enumerate(requires):
 87 |         if req.startswith("git"):
 88 |             pkg_name = req.split("/")[-1].split(".")[0]
 89 |             req = pkg_name
 90 |         requires[i] = req
 91 |     return requires
 92 | 
 93 | 
 94 | if __name__ == '__main__':
 95 |     write_version_py()
 96 |     setup(
 97 |         name='teter',
 98 |         version=get_version(),
 99 |         description='A template for pytorch projects.',
100 |         long_description=readme(),
101 |         packages=find_packages(exclude=('configs', 'tools', 'demo')),
102 |         package_data={'teter.ops': ['*/*.so']},
103 |         classifiers=[
104 |             'Development Status :: 4 - Beta',
105 |             'License :: OSI Approved :: Apache Software License',
106 |             'Operating System :: OS Independent',
107 |             'Programming Language :: Python :: 3',
108 |             'Programming Language :: Python :: 3.5',
109 |             'Programming Language :: Python :: 3.6',
110 |             'Programming Language :: Python :: 3.7',
111 |         ],
112 |         license='Apache License 2.0',
113 |         setup_requires=['pytest-runner', 'cython', 'numpy'],
114 |         tests_require=['pytest', 'xdoctest'],
115 |         install_requires=get_requirements(),
116 |         zip_safe=False)
117 | 


--------------------------------------------------------------------------------
/teta/README.md:
--------------------------------------------------------------------------------
 1 | # Track Every Thing Accuracy
 2 | [Track Every Thing in the Wild](https://arxiv.org/abs/2207.12978) [ECCV 2022].
 3 | 
 4 | This is the official implementation of TETA metric describe in the paper. 
 5 | 
 6 | <img src="figures/figure_1.png" width="600">
 7 | 
 8 | The proposed TETA metric disentangles classification performance from tracking.
 9 | Instead of using the predicted class labels to group per-class tracking results, we use location with the help of local cluster evaluation.
10 | We treat each ground truth bounding box of the target class as the anchor of each cluster and group prediction results inside each cluster to evaluate the localization and association performance.
11 | Our local clusters enable us to evaluate tracks even when the class prediction is wrong.
12 | 
13 | <img src="figures/teta-teaser.png" width="400">
14 | 
15 | ## Install
16 | Install the TETA environment using pip.
17 | ```angular2html
18 | pip install -r requirements.txt
19 | ```
20 | Go to the root of the teta folder and quick install by
21 | ```
22 | pip install -e .
23 | ```
24 | ## Support data format
25 | Result format follows COCO-VID format. We describe the format in detail [here](./docs/TAO-format.txt)
26 | 
27 | ## How to Run
28 | Run on TAO.
29 | ```angular2html
30 | python scripts/run_tao.py --METRICS TETA --TRACKERS_TO_EVAL TETer --GT_FOLDER ${GT_JSON_PATH}.json --TRACKER_SUB_FOLDER ${RESULT_JSON_PATH}.json   
31 | ```
32 | Run  on BDD100K.
33 | ```angular2html
34 | python scripts/run_coco.py --METRICS TETA --TRACKERS_TO_EVAL TETer --GT_FOLDER ${GT_JSON_PATH}.json --TRACKER_SUB_FOLDER ${RESULT_JSON_PATH}.json   
35 | ```
36 | 
37 | ## Citation
38 | 
39 | ```
40 | @InProceedings{trackeverything,
41 |   title = {Tracking Every Thing in the Wild},
42 |   author = {Li, Siyuan and Danelljan, Martin and Ding, Henghui and Huang, Thomas E. and Yu, Fisher},
43 |   booktitle = {Proceedings of the European Conference on Computer Vision (ECCV)},
44 |   month = {Oct},
45 |   year = {2022}
46 | }
47 | ```


--------------------------------------------------------------------------------
/teta/docs/TAO-format.txt:
--------------------------------------------------------------------------------
 1 | Taken from: https://github.com/TAO-Dataset/tao/blob/master/tao/toolkit/tao/tao.py
 2 | 
 3 | Annotation file format:
 4 | {
 5 |     "info" : info,
 6 |     "images" : [image],
 7 |     "videos": [video],
 8 |     "tracks": [track],
 9 |     "annotations" : [annotation],
10 |     "categories": [category],
11 |     "licenses" : [license],
12 | }
13 | info: As in MS COCO
14 | image: {
15 |     "id" : int,
16 |     "video_id": int,
17 |     "file_name" : str,
18 |     "license" : int,
19 |     # Redundant fields for COCO-compatibility
20 |     "width": int,
21 |     "height": int,
22 |     "frame_index": int
23 | }
24 | video: {
25 |     "id": int,
26 |     "name": str,
27 |     "width" : int,
28 |     "height" : int,
29 |     "neg_category_ids": [int],
30 |     "not_exhaustive_category_ids": [int],
31 |     "metadata": dict,  # Metadata about the video
32 | }
33 | track: {
34 |     "id": int,
35 |     "category_id": int,
36 |     "video_id": int
37 | }
38 | category: {
39 |     "id": int,
40 |     "name": str,
41 |     "synset": str,  # For non-LVIS objects, this is "unknown"
42 |     ... [other fields copied from LVIS v0.5 and unused]
43 | }
44 | annotation: {
45 |     "image_id": int,
46 |     "track_id": int,
47 |     "bbox": [x,y,width,height],
48 |     "area": float,
49 |     # Redundant field for compatibility with COCO scripts
50 |     "category_id": int
51 | }
52 | license: {
53 |     "id" : int,
54 |     "name" : str,
55 |     "url" : str,
56 | }
57 | 
58 | Prediction format:
59 | 
60 | [{
61 |     "image_id" : int,
62 |     "category_id" : int,
63 |     "bbox" : [x,y,width,height],
64 |     "score" : float,
65 |     "track_id": int,
66 |     "video_id": int
67 | }]


--------------------------------------------------------------------------------
/teta/figures/figure_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/tet/a62a9c0affec3a97f2cd0263141c53bcfb9c79f7/teta/figures/figure_1.png


--------------------------------------------------------------------------------
/teta/figures/teta-teaser.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/tet/a62a9c0affec3a97f2cd0263141c53bcfb9c79f7/teta/figures/teta-teaser.png


--------------------------------------------------------------------------------
/teta/requirements.txt:
--------------------------------------------------------------------------------
1 | scipy
2 | numpy
3 | 


--------------------------------------------------------------------------------
/teta/scripts/run_coco.py:
--------------------------------------------------------------------------------
 1 | """ evaluate.py
 2 | 
 3 | Run example:
 4 | evaluate.py --USE_PARALLEL False --METRICS TETA --TRACKERS_TO_EVAL qdtrack
 5 | 
 6 | Command Line Arguments: Defaults, # Comments
 7 |     Eval arguments:
 8 |         'USE_PARALLEL': False,
 9 |         'NUM_PARALLEL_CORES': 8,
10 |         'BREAK_ON_ERROR': True,  # Raises exception and exits with error
11 |         'RETURN_ON_ERROR': False,  # if not BREAK_ON_ERROR, then returns from function on error
12 |         'LOG_ON_ERROR': os.path.join(code_path, 'error_log.txt'),  # if not None, save any errors into a log file.
13 |         'PRINT_RESULTS': True,
14 |         'PRINT_ONLY_COMBINED': False,
15 |         'PRINT_CONFIG': True,
16 |         'TIME_PROGRESS': True,
17 |         'DISPLAY_LESS_PROGRESS': True,
18 |         'OUTPUT_SUMMARY': True,
19 |         'OUTPUT_EMPTY_CLASSES': True,  # If False, summary files are not output for classes with no detections
20 |         'OUTPUT_TEM_RAW_DATA': True,
21 |     Dataset arguments:
22 |         'GT_FOLDER': os.path.join(code_path, 'data/gt/tao/tao_training'),  # Location of GT data
23 |         'TRACKERS_FOLDER': os.path.join(code_path, 'data/trackers/tao/tao_training'),  # Trackers location
24 |         'OUTPUT_FOLDER': None,  # Where to save eval results (if None, same as TRACKERS_FOLDER)
25 |         'TRACKERS_TO_EVAL': None,  # Filenames of trackers to eval (if None, all in folder)
26 |         'CLASSES_TO_EVAL': None,  # Classes to eval (if None, all classes)
27 |         'SPLIT_TO_EVAL': 'training',  # Valid: 'training', 'val'
28 |         'PRINT_CONFIG': True,  # Whether to print current config
29 |         'TRACKER_SUB_FOLDER': 'data',  # Tracker files are in TRACKER_FOLDER/tracker_name/TRACKER_SUB_FOLDER
30 |         'OUTPUT_SUB_FOLDER': '',  # Output files are saved in OUTPUT_FOLDER/tracker_name/OUTPUT_SUB_FOLDER
31 |         'TRACKER_DISPLAY_NAMES': None,  # Names of trackers to display, if None: TRACKERS_TO_EVAL
32 |         'MAX_DETECTIONS': 300,  # Number of maximal allowed detections per image (0 for unlimited)
33 |     Metric arguments:
34 |         'METRICS': ['HOTA', 'CLEAR', 'Identity', 'TrackMAP']
35 | """
36 | 
37 | import sys
38 | import os
39 | import argparse
40 | from multiprocessing import freeze_support
41 | 
42 | from teta.config import parse_configs
43 | from teta.datasets import COCO
44 | from teta.eval import Evaluator
45 | from teta.metrics import TETA
46 | 
47 | 
48 | def evaluate():
49 |     """Evaluate with TETA."""
50 |     eval_config, dataset_config, metrics_config = parse_configs()
51 |     evaluator = Evaluator(eval_config)
52 |     dataset_list = [COCO(dataset_config)]
53 |     metrics_list = []
54 |     metric = TETA(exhaustive=True)
55 |     if metric.get_name() in metrics_config["METRICS"]:
56 |         metrics_list.append(metric)
57 |     if len(metrics_list) == 0:
58 |         raise Exception("No metrics selected for evaluation")
59 |     evaluator.evaluate(dataset_list, metrics_list)
60 | 
61 | 
62 | if __name__ == "__main__":
63 |     freeze_support()
64 |     evaluate()
65 | 


--------------------------------------------------------------------------------
/teta/scripts/run_tao.py:
--------------------------------------------------------------------------------
 1 | """ evaluate.py
 2 | 
 3 | Run example:
 4 | evaluate.py --USE_PARALLEL False --METRICS TETA --TRACKERS_TO_EVAL qdtrack
 5 | 
 6 | Command Line Arguments: Defaults, # Comments
 7 |     Eval arguments:
 8 |         'USE_PARALLEL': False,
 9 |         'NUM_PARALLEL_CORES': 8,
10 |         'BREAK_ON_ERROR': True,  # Raises exception and exits with error
11 |         'RETURN_ON_ERROR': False,  # if not BREAK_ON_ERROR, then returns from function on error
12 |         'LOG_ON_ERROR': os.path.join(code_path, 'error_log.txt'),  # if not None, save any errors into a log file.
13 |         'PRINT_RESULTS': True,
14 |         'PRINT_ONLY_COMBINED': False,
15 |         'PRINT_CONFIG': True,
16 |         'TIME_PROGRESS': True,
17 |         'DISPLAY_LESS_PROGRESS': True,
18 |         'OUTPUT_SUMMARY': True,
19 |         'OUTPUT_EMPTY_CLASSES': True,  # If False, summary files are not output for classes with no detections
20 |         'OUTPUT_TEM_RAW_DATA': True,
21 |     Dataset arguments:
22 |         'GT_FOLDER': os.path.join(code_path, 'data/gt/tao/tao_training'),  # Location of GT data
23 |         'TRACKERS_FOLDER': os.path.join(code_path, 'data/trackers/tao/tao_training'),  # Trackers location
24 |         'OUTPUT_FOLDER': None,  # Where to save eval results (if None, same as TRACKERS_FOLDER)
25 |         'TRACKERS_TO_EVAL': None,  # Filenames of trackers to eval (if None, all in folder)
26 |         'CLASSES_TO_EVAL': None,  # Classes to eval (if None, all classes)
27 |         'SPLIT_TO_EVAL': 'training',  # Valid: 'training', 'val'
28 |         'PRINT_CONFIG': True,  # Whether to print current config
29 |         'TRACKER_SUB_FOLDER': 'data',  # Tracker files are in TRACKER_FOLDER/tracker_name/TRACKER_SUB_FOLDER
30 |         'OUTPUT_SUB_FOLDER': '',  # Output files are saved in OUTPUT_FOLDER/tracker_name/OUTPUT_SUB_FOLDER
31 |         'TRACKER_DISPLAY_NAMES': None,  # Names of trackers to display, if None: TRACKERS_TO_EVAL
32 |         'MAX_DETECTIONS': 300,  # Number of maximal allowed detections per image (0 for unlimited)
33 |     Metric arguments:
34 |         'METRICS': ['HOTA', 'CLEAR', 'Identity', 'TrackMAP']
35 | """
36 | 
37 | import sys
38 | import os
39 | import argparse
40 | from multiprocessing import freeze_support
41 | 
42 | from teta.config import parse_configs
43 | from teta.datasets import TAO
44 | from teta.eval import Evaluator
45 | from teta.metrics import TETA
46 | 
47 | 
48 | def evaluate():
49 |     """Evaluate with TETA."""
50 |     eval_config, dataset_config, metrics_config = parse_configs()
51 |     evaluator = Evaluator(eval_config)
52 |     dataset_list = [TAO(dataset_config)]
53 |     metrics_list = []
54 |     metric = TETA(exhaustive=False)
55 |     if metric.get_name() in metrics_config["METRICS"]:
56 |         metrics_list.append(metric)
57 |     if len(metrics_list) == 0:
58 |         raise Exception("No metrics selected for evaluation")
59 |     evaluator.evaluate(dataset_list, metrics_list)
60 | 
61 | 
62 | if __name__ == "__main__":
63 |     freeze_support()
64 |     evaluate()
65 | 


--------------------------------------------------------------------------------
/teta/setup.py:
--------------------------------------------------------------------------------
  1 | import io
  2 | import os
  3 | import sys
  4 | from shutil import rmtree
  5 | 
  6 | from setuptools import find_packages, setup, Command
  7 | 
  8 | # Package meta-data.
  9 | NAME = 'teta'
 10 | DESCRIPTION = 'Track Every Thing Accuracy (TETA metric)'
 11 | EMAIL = 'siyuan.li@vision.ee.ethz.ch'
 12 | AUTHOR = 'Siyuan Li'
 13 | REQUIRES_PYTHON = '>=3.6.0'
 14 | VERSION = '0.1.0'
 15 | 
 16 | # What packages are required for this module to be executed?
 17 | REQUIRED = [
 18 |     'script_utils @ git+https://github.com/achalddave/python-script-utils.git@v0.0.2#egg=script_utils',
 19 |     'numpy', 'scipy'
 20 | ]
 21 | 
 22 | # What packages are optional?
 23 | EXTRAS = {
 24 | }
 25 | 
 26 | here = os.path.abspath(os.path.dirname(__file__))
 27 | 
 28 | # Import the README and use it as the long-description.
 29 | # Note: this will only work if 'README.md' is present in your MANIFEST.in file!
 30 | try:
 31 |     with io.open(os.path.join(here, 'README.md'), encoding='utf-8') as f:
 32 |         long_description = '\n' + f.read()
 33 | except FileNotFoundError:
 34 |     long_description = DESCRIPTION
 35 | 
 36 | # Load the package's __version__.py module as a dictionary.
 37 | about = {}
 38 | if not VERSION:
 39 |     project_slug = NAME.lower().replace("-", "_").replace(" ", "_")
 40 |     with open(os.path.join(here, project_slug, '__version__.py')) as f:
 41 |         exec(f.read(), about)
 42 | else:
 43 |     about['__version__'] = VERSION
 44 | 
 45 | 
 46 | class UploadCommand(Command):
 47 |     """Support setup.py upload."""
 48 | 
 49 |     description = 'Build and publish the package.'
 50 |     user_options = []
 51 | 
 52 |     @staticmethod
 53 |     def status(s):
 54 |         """Prints things in bold."""
 55 |         print('\033[1m{0}\033[0m'.format(s))
 56 | 
 57 |     def initialize_options(self):
 58 |         pass
 59 | 
 60 |     def finalize_options(self):
 61 |         pass
 62 | 
 63 |     def run(self):
 64 |         try:
 65 |             self.status('Removing previous builds…')
 66 |             rmtree(os.path.join(here, 'dist'))
 67 |         except OSError:
 68 |             pass
 69 | 
 70 |         self.status('Building Source and Wheel (universal) distribution…')
 71 |         os.system('{0} setup.py sdist bdist_wheel --universal'.format(sys.executable))
 72 | 
 73 |         self.status('Uploading the package to PyPI via Twine…')
 74 |         os.system('twine upload dist/*')
 75 | 
 76 |         self.status('Pushing git tags…')
 77 |         os.system('git tag v{0}'.format(about['__version__']))
 78 |         os.system('git push --tags')
 79 | 
 80 |         sys.exit()
 81 | 
 82 | 
 83 | # Where the magic happens:
 84 | setup(
 85 |     name=NAME,
 86 |     version=about['__version__'],
 87 |     description=DESCRIPTION,
 88 |     long_description=long_description,
 89 |     long_description_content_type='text/markdown',
 90 |     author=AUTHOR,
 91 |     author_email=EMAIL,
 92 |     python_requires=REQUIRES_PYTHON,
 93 |     # url=URL,
 94 |     packages=find_packages(exclude=["tests", "*.tests", "*.tests.*", "tests.*"]),
 95 |     # If your package is a single module, use this instead of 'packages':
 96 |     # py_modules=['tao'],
 97 | 
 98 |     # entry_points={
 99 |     #     'console_scripts': ['mycli=mymodule:cli'],
100 |     # },
101 |     install_requires=REQUIRED,
102 |     extras_require=EXTRAS,
103 |     include_package_data=True,
104 |     license='MIT',
105 |     classifiers=[
106 |         # Trove classifiers
107 |         # Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers
108 |         'License :: OSI Approved :: MIT License',
109 |         'Programming Language :: Python',
110 |         'Programming Language :: Python :: 3',
111 |         'Programming Language :: Python :: 3.6',
112 |         'Programming Language :: Python :: Implementation :: CPython',
113 |         'Programming Language :: Python :: Implementation :: PyPy'
114 |     ],
115 |     # $ setup.py publish support.
116 |     cmdclass={
117 |         'upload': UploadCommand,
118 |     },
119 | )


--------------------------------------------------------------------------------
/teta/teta/__init__.py:
--------------------------------------------------------------------------------
1 | from . import config, datasets, metrics, utils
2 | from .eval import Evaluator
3 | 


--------------------------------------------------------------------------------
/teta/teta/_timing.py:
--------------------------------------------------------------------------------
 1 | import inspect
 2 | from functools import wraps
 3 | from time import perf_counter
 4 | 
 5 | DO_TIMING = False
 6 | DISPLAY_LESS_PROGRESS = False
 7 | timer_dict = {}
 8 | counter = 0
 9 | 
10 | 
11 | def time(f):
12 |     @wraps(f)
13 |     def wrap(*args, **kw):
14 |         if DO_TIMING:
15 |             # Run function with timing
16 |             ts = perf_counter()
17 |             result = f(*args, **kw)
18 |             te = perf_counter()
19 |             tt = te - ts
20 | 
21 |             # Get function name
22 |             arg_names = inspect.getfullargspec(f)[0]
23 |             if arg_names[0] == "self" and DISPLAY_LESS_PROGRESS:
24 |                 return result
25 |             elif arg_names[0] == "self":
26 |                 method_name = type(args[0]).__name__ + "." + f.__name__
27 |             else:
28 |                 method_name = f.__name__
29 | 
30 |             # Record accumulative time in each function for analysis
31 |             if method_name in timer_dict.keys():
32 |                 timer_dict[method_name] += tt
33 |             else:
34 |                 timer_dict[method_name] = tt
35 | 
36 |             # If code is finished, display timing summary
37 |             if method_name == "Evaluator.evaluate":
38 |                 print("")
39 |                 print("Timing analysis:")
40 |                 for key, value in timer_dict.items():
41 |                     print("%-70s %2.4f sec" % (key, value))
42 |             else:
43 |                 # Get function argument values for printing special arguments of interest
44 |                 arg_titles = ["tracker", "seq", "cls"]
45 |                 arg_vals = []
46 |                 for i, a in enumerate(arg_names):
47 |                     if a in arg_titles:
48 |                         arg_vals.append(args[i])
49 |                 arg_text = "(" + ", ".join(arg_vals) + ")"
50 | 
51 |                 # Display methods and functions with different indentation.
52 |                 if arg_names[0] == "self":
53 |                     print("%-74s %2.4f sec" % (" " * 4 + method_name + arg_text, tt))
54 |                 elif arg_names[0] == "test":
55 |                     pass
56 |                 else:
57 |                     global counter
58 |                     counter += 1
59 |                     print("%i %-70s %2.4f sec" % (counter, method_name + arg_text, tt))
60 | 
61 |             return result
62 |         else:
63 |             # If config["TIME_PROGRESS"] is false, or config["USE_PARALLEL"] is true, run functions normally without timing.
64 |             return f(*args, **kw)
65 | 
66 |     return wrap
67 | 


--------------------------------------------------------------------------------
/teta/teta/config.py:
--------------------------------------------------------------------------------
  1 | """Config."""
  2 | import argparse
  3 | import os
  4 | 
  5 | 
  6 | def parse_configs():
  7 |     """Parse command line."""
  8 |     default_eval_config = get_default_eval_config()
  9 |     default_eval_config["DISPLAY_LESS_PROGRESS"] = True
 10 |     default_dataset_config = get_default_dataset_config()
 11 |     default_metrics_config = {"METRICS": ["TETA"]}
 12 |     config = {
 13 |         **default_eval_config,
 14 |         **default_dataset_config,
 15 |         **default_metrics_config,
 16 |     }
 17 |     parser = argparse.ArgumentParser()
 18 |     for setting in config.keys():
 19 |         if type(config[setting]) == list or type(config[setting]) == type(None):
 20 |             parser.add_argument("--" + setting, nargs="+")
 21 |         else:
 22 |             parser.add_argument("--" + setting)
 23 |     args = parser.parse_args().__dict__
 24 |     for setting in args.keys():
 25 |         if args[setting] is not None:
 26 |             if type(config[setting]) == type(True):
 27 |                 if args[setting] == "True":
 28 |                     x = True
 29 |                 elif args[setting] == "False":
 30 |                     x = False
 31 |                 else:
 32 |                     raise Exception(
 33 |                         f"Command line parameter {setting} must be True/False"
 34 |                     )
 35 |             elif type(config[setting]) == type(1):
 36 |                 x = int(args[setting])
 37 |             elif type(args[setting]) == type(None):
 38 |                 x = None
 39 |             else:
 40 |                 x = args[setting]
 41 |             config[setting] = x
 42 |     eval_config = {k: v for k, v in config.items() if k in default_eval_config.keys()}
 43 |     dataset_config = {
 44 |         k: v for k, v in config.items() if k in default_dataset_config.keys()
 45 |     }
 46 |     metrics_config = {
 47 |         k: v for k, v in config.items() if k in default_metrics_config.keys()
 48 |     }
 49 | 
 50 |     return eval_config, dataset_config, metrics_config
 51 | 
 52 | 
 53 | def get_default_eval_config():
 54 |     """Returns the default config values for evaluation."""
 55 |     code_path = get_code_path()
 56 |     default_config = {
 57 |         "USE_PARALLEL": True,
 58 |         "NUM_PARALLEL_CORES": 8,
 59 |         "BREAK_ON_ERROR": True,
 60 |         "RETURN_ON_ERROR": False,
 61 |         "LOG_ON_ERROR": os.path.join(code_path, "error_log.txt"),
 62 |         "PRINT_RESULTS": True,
 63 |         "PRINT_ONLY_COMBINED": True,
 64 |         "PRINT_CONFIG": True,
 65 |         "TIME_PROGRESS": True,
 66 |         "DISPLAY_LESS_PROGRESS": True,
 67 |         "OUTPUT_SUMMARY": True,
 68 |         "OUTPUT_EMPTY_CLASSES": True,
 69 |         "OUTPUT_TEM_RAW_DATA": True,
 70 |         "OUTPUT_PER_SEQ_RES": True,
 71 |     }
 72 |     return default_config
 73 | 
 74 | 
 75 | def get_default_dataset_config():
 76 |     """Default class config values"""
 77 |     code_path = get_code_path()
 78 |     default_config = {
 79 |         "GT_FOLDER": os.path.join(
 80 |             code_path, "data/gt/tao/tao_training"
 81 |         ),  # Location of GT data
 82 |         "TRACKERS_FOLDER": os.path.join(
 83 |             code_path, "data/trackers/tao/tao_training"
 84 |         ),  # Trackers location
 85 |         "OUTPUT_FOLDER": None,  # Where to save eval results (if None, same as TRACKERS_FOLDER)
 86 |         "TRACKERS_TO_EVAL": ['TETer'],  # Filenames of trackers to eval (if None, all in folder)
 87 |         "CLASSES_TO_EVAL": None,  # Classes to eval (if None, all classes)
 88 |         "SPLIT_TO_EVAL": "training",  # Valid: 'training', 'val'
 89 |         "PRINT_CONFIG": True,  # Whether to print current config
 90 |         "TRACKER_SUB_FOLDER": "data",  # Tracker files are in TRACKER_FOLDER/tracker_name/TRACKER_SUB_FOLDER
 91 |         "OUTPUT_SUB_FOLDER": "",  # Output files are saved in OUTPUT_FOLDER/tracker_name/OUTPUT_SUB_FOLDER
 92 |         "TRACKER_DISPLAY_NAMES": None,  # Names of trackers to display, if None: TRACKERS_TO_EVAL
 93 |         "MAX_DETECTIONS": 0,  # Number of maximal allowed detections per image (0 for unlimited)
 94 |     }
 95 |     return default_config
 96 | 
 97 | 
 98 | def init_config(config, default_config, name=None):
 99 |     """Initialize non-given config values with defaults."""
100 |     if config is None:
101 |         config = default_config
102 |     else:
103 |         for k in default_config.keys():
104 |             if k not in config.keys():
105 |                 config[k] = default_config[k]
106 |     if name and config["PRINT_CONFIG"]:
107 |         print("\n%s Config:" % name)
108 |         for c in config.keys():
109 |             print("%-20s : %-30s" % (c, config[c]))
110 |     return config
111 | 
112 | 
113 | def update_config(config):
114 |     """
115 |     Parse the arguments of a script and updates the config values for a given value if specified in the arguments.
116 |     :param config: the config to update
117 |     :return: the updated config
118 |     """
119 |     parser = argparse.ArgumentParser()
120 |     for setting in config.keys():
121 |         if type(config[setting]) == list or type(config[setting]) == type(None):
122 |             parser.add_argument("--" + setting, nargs="+")
123 |         else:
124 |             parser.add_argument("--" + setting)
125 |     args = parser.parse_args().__dict__
126 |     for setting in args.keys():
127 |         if args[setting] is not None:
128 |             if type(config[setting]) == type(True):
129 |                 if args[setting] == "True":
130 |                     x = True
131 |                 elif args[setting] == "False":
132 |                     x = False
133 |                 else:
134 |                     raise Exception(
135 |                         "Command line parameter " + setting + "must be True or False"
136 |                     )
137 |             elif type(config[setting]) == type(1):
138 |                 x = int(args[setting])
139 |             elif type(args[setting]) == type(None):
140 |                 x = None
141 |             else:
142 |                 x = args[setting]
143 |             config[setting] = x
144 |     return config
145 | 
146 | 
147 | def get_code_path():
148 |     """Get base path where code is"""
149 |     return os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
150 | 


--------------------------------------------------------------------------------
/teta/teta/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | """Datasets."""
2 | from .coco import COCO
3 | from .tao import TAO
4 | from .bdd import BDD
5 | from .coco_mots import COCOMOTS
6 | from .bdd_mots import BDDMOTS


--------------------------------------------------------------------------------
/teta/teta/metrics/__init__.py:
--------------------------------------------------------------------------------
1 | from .teta import TETA
2 | 


--------------------------------------------------------------------------------
/teta/teta/metrics/_base_metric.py:
--------------------------------------------------------------------------------
  1 | from abc import ABC, abstractmethod
  2 | 
  3 | import numpy as np
  4 | 
  5 | from .. import _timing
  6 | from ..utils import TrackEvalException
  7 | 
  8 | 
  9 | class _BaseMetric(ABC):
 10 |     @abstractmethod
 11 |     def __init__(self):
 12 |         self.plottable = False
 13 |         self.integer_fields = []
 14 |         self.float_fields = []
 15 |         self.array_labels = []
 16 |         self.integer_array_fields = []
 17 |         self.float_array_fields = []
 18 |         self.fields = []
 19 |         self.summary_fields = []
 20 |         self.registered = False
 21 | 
 22 |     #####################################################################
 23 |     # Abstract functions for subclasses to implement
 24 | 
 25 |     @_timing.time
 26 |     @abstractmethod
 27 |     def eval_sequence(self, data):
 28 |         ...
 29 | 
 30 |     @abstractmethod
 31 |     def combine_sequences(self, all_res):
 32 |         ...
 33 | 
 34 |     @abstractmethod
 35 |     def combine_classes_class_averaged(self, all_res, ignore_empty=False):
 36 |         ...
 37 | 
 38 |     @abstractmethod
 39 |     def combine_classes_det_averaged(self, all_res):
 40 |         ...
 41 | 
 42 |     def plot_single_tracker_results(self, all_res, tracker, output_folder, cls):
 43 |         """Plot results, only valid for metrics with self.plottable."""
 44 |         if self.plottable:
 45 |             raise NotImplementedError(
 46 |                 f"plot_results is not implemented for metric {self.get_name()}"
 47 |             )
 48 |         else:
 49 |             pass
 50 | 
 51 |     #####################################################################
 52 |     # Helper functions which are useful for all metrics:
 53 | 
 54 |     @classmethod
 55 |     def get_name(cls):
 56 |         return cls.__name__
 57 | 
 58 |     @staticmethod
 59 |     def _combine_sum(all_res, field):
 60 |         """Combine sequence results via sum"""
 61 |         return sum([all_res[k][field] for k in all_res.keys()])
 62 | 
 63 |     @staticmethod
 64 |     def _combine_weighted_av(all_res, field, comb_res, weight_field):
 65 |         """Combine sequence results via weighted average."""
 66 |         return sum(
 67 |             [all_res[k][field] * all_res[k][weight_field] for k in all_res.keys()]
 68 |         ) / np.maximum(1.0, comb_res[weight_field])
 69 | 
 70 |     def print_table(self, table_res, tracker, cls):
 71 |         """Print table of results for all sequences."""
 72 |         print("")
 73 |         metric_name = self.get_name()
 74 |         self._row_print(
 75 |             [metric_name + ": " + tracker + "-" + cls] + self.summary_fields
 76 |         )
 77 |         for seq, results in sorted(table_res.items()):
 78 |             if seq == "COMBINED_SEQ":
 79 |                 continue
 80 |             summary_res = self._summary_row(results)
 81 |             self._row_print([seq] + summary_res)
 82 |         summary_res = self._summary_row(table_res["COMBINED_SEQ"])
 83 |         self._row_print(["COMBINED"] + summary_res)
 84 | 
 85 |     def _summary_row(self, results_):
 86 |         vals = []
 87 |         for h in self.summary_fields:
 88 |             if h in self.float_array_fields:
 89 |                 vals.append("{0:1.5g}".format(100 * np.mean(results_[h])))
 90 |             elif h in self.float_fields:
 91 |                 vals.append("{0:1.5g}".format(100 * float(results_[h])))
 92 |             elif h in self.integer_fields:
 93 |                 vals.append("{0:d}".format(int(results_[h])))
 94 |             else:
 95 |                 raise NotImplementedError(
 96 |                     "Summary function not implemented for this field type."
 97 |                 )
 98 |         return vals
 99 | 
100 |     @staticmethod
101 |     def _row_print(*argv):
102 |         """Print results in evenly spaced rows, with more space in first row."""
103 |         if len(argv) == 1:
104 |             argv = argv[0]
105 |         to_print = "%-35s" % argv[0]
106 |         for v in argv[1:]:
107 |             to_print += "%-10s" % str(v)
108 |         print(to_print)
109 | 
110 |     def summary_results(self, table_res):
111 |         """Return a simple summary of final results for a tracker."""
112 |         return dict(
113 |             zip(self.summary_fields, self._summary_row(table_res["COMBINED_SEQ"]),)
114 |         )
115 | 
116 |     def detailed_results(self, table_res):
117 |         """Return detailed final results for a tracker."""
118 |         # Get detailed field information
119 |         detailed_fields = self.float_fields + self.integer_fields
120 |         for h in self.float_array_fields + self.integer_array_fields:
121 |             for alpha in [int(100 * x) for x in self.array_labels]:
122 |                 detailed_fields.append(h + "___" + str(alpha))
123 |             detailed_fields.append(h + "___AUC")
124 | 
125 |         # Get detailed results
126 |         detailed_results = {}
127 |         for seq, res in table_res.items():
128 |             detailed_row = self._detailed_row(res)
129 |             if len(detailed_row) != len(detailed_fields):
130 |                 raise TrackEvalException(
131 |                     f"Field names and data have different sizes "
132 |                     f"({len(detailed_row)} and {len(detailed_fields)})"
133 |                 )
134 |             detailed_results[seq] = dict(zip(detailed_fields, detailed_row))
135 |         return detailed_results
136 | 
137 |     def _detailed_row(self, res):
138 |         detailed_row = []
139 |         for h in self.float_fields + self.integer_fields:
140 |             detailed_row.append(res[h])
141 |         for h in self.float_array_fields + self.integer_array_fields:
142 |             for i, _ in enumerate([int(100 * x) for x in self.array_labels]):
143 |                 detailed_row.append(res[h][i])
144 |             detailed_row.append(np.mean(res[h]))
145 |         return detailed_row
146 | 


--------------------------------------------------------------------------------
/teta/teta/utils.py:
--------------------------------------------------------------------------------
 1 | import csv
 2 | import os
 3 | from collections import OrderedDict
 4 | 
 5 | 
 6 | def validate_metrics_list(metrics_list):
 7 |     """Get names of metric class and ensures they are unique, further checks that the fields within each metric class
 8 |     do not have overlapping names.
 9 |     """
10 |     metric_names = [metric.get_name() for metric in metrics_list]
11 |     # check metric names are unique
12 |     if len(metric_names) != len(set(metric_names)):
13 |         raise TrackEvalException(
14 |             "Code being run with multiple metrics of the same name"
15 |         )
16 |     fields = []
17 |     for m in metrics_list:
18 |         fields += m.fields
19 |     # check metric fields are unique
20 |     if len(fields) != len(set(fields)):
21 |         raise TrackEvalException(
22 |             "Code being run with multiple metrics with fields of the same name"
23 |         )
24 |     return metric_names
25 | 
26 | 
27 | def get_track_id_str(ann):
28 |     """Get name of track ID in annotation."""
29 |     if "track_id" in ann:
30 |         tk_str = "track_id"
31 |     elif "instance_id" in ann:
32 |         tk_str = "instance_id"
33 |     elif "scalabel_id" in ann:
34 |         tk_str = "scalabel_id"
35 |     else:
36 |         assert False, "No track/instance ID."
37 |     return tk_str
38 | 
39 | 
40 | class TrackEvalException(Exception):
41 |     """Custom exception for catching expected errors."""
42 | 
43 |     ...
44 | 


--------------------------------------------------------------------------------
/teter/VERSION:
--------------------------------------------------------------------------------
1 | 0.1.0
2 | 


--------------------------------------------------------------------------------
/teter/__init__.py:
--------------------------------------------------------------------------------
1 | from .version import __version__, short_version
2 | 
3 | __all__ = ["__version__", "short_version"]
4 | 


--------------------------------------------------------------------------------
/teter/apis/__init__.py:
--------------------------------------------------------------------------------
 1 | from .inference import inference_model, init_model
 2 | from .test import multi_gpu_test, single_gpu_test
 3 | from .train import train_model
 4 | 
 5 | __all__ = [
 6 |     "init_model",
 7 |     "inference_model",
 8 |     "multi_gpu_test",
 9 |     "single_gpu_test",
10 |     "train_model",
11 | ]
12 | 


--------------------------------------------------------------------------------
/teter/apis/inference.py:
--------------------------------------------------------------------------------
  1 | import mmcv
  2 | import numpy as np
  3 | import torch
  4 | import warnings
  5 | from mmcv.ops import RoIPool
  6 | from mmcv.parallel import collate, scatter
  7 | from mmcv.runner import load_checkpoint
  8 | from mmdet.core import get_classes
  9 | from mmdet.datasets import replace_ImageToTensor
 10 | from mmdet.datasets.pipelines import Compose
 11 | 
 12 | from teter.models import build_model
 13 | 
 14 | 
 15 | def init_model(config, checkpoint=None, device="cuda:0", cfg_options=None):
 16 |     """Initialize a detector from config file.
 17 | 
 18 |     Args:
 19 |         config (str or :obj:`mmcv.Config`): Config file path or the config
 20 |             object.
 21 |         checkpoint (str, optional): Checkpoint path. If left as None, the model
 22 |             will not load any weights.
 23 |         cfg_options (dict): Options to override some settings in the used
 24 |             config.
 25 | 
 26 |     Returns:
 27 |         nn.Module: The constructed detector.
 28 |     """
 29 |     if isinstance(config, str):
 30 |         config = mmcv.Config.fromfile(config)
 31 |     elif not isinstance(config, mmcv.Config):
 32 |         raise TypeError(
 33 |             "config must be a filename or Config object, " f"but got {type(config)}"
 34 |         )
 35 |     if cfg_options is not None:
 36 |         config.merge_from_dict(cfg_options)
 37 |     config.model.pretrained = None
 38 |     config.model.train_cfg = None
 39 |     model = build_model(config.model, test_cfg=config.get("test_cfg"))
 40 |     if checkpoint is not None:
 41 |         map_loc = "cpu" if device == "cpu" else None
 42 |         checkpoint = load_checkpoint(model, checkpoint, map_location=map_loc)
 43 |         if "CLASSES" in checkpoint["meta"]:
 44 |             model.CLASSES = checkpoint["meta"]["CLASSES"]
 45 |         else:
 46 |             warnings.simplefilter("once")
 47 |             warnings.warn(
 48 |                 "Class names are not saved in the checkpoint's "
 49 |                 "meta data, use COCO classes by default."
 50 |             )
 51 |             model.CLASSES = get_classes("coco")
 52 |     model.cfg = config  # save the config in the model for convenience
 53 |     model.to(device)
 54 |     model.eval()
 55 |     return model
 56 | 
 57 | 
 58 | def inference_model(model, imgs, frame_id):
 59 |     if isinstance(imgs, (list, tuple)):
 60 |         is_batch = True
 61 |     else:
 62 |         imgs = [imgs]
 63 |         is_batch = False
 64 | 
 65 |     cfg = model.cfg
 66 |     device = next(model.parameters()).device  # model device
 67 | 
 68 |     if isinstance(imgs[0], np.ndarray):
 69 |         cfg = cfg.copy()
 70 |         # set loading pipeline type
 71 |         cfg.data.test.pipeline[0].type = "LoadImageFromWebcam"
 72 | 
 73 |     cfg.data.test.pipeline = replace_ImageToTensor(cfg.data.test.pipeline)
 74 |     test_pipeline = Compose(cfg.data.test.pipeline)
 75 | 
 76 |     datas = []
 77 |     for img in imgs:
 78 |         # prepare data
 79 |         if isinstance(img, np.ndarray):
 80 |             # directly add img
 81 |             data = dict(img=img, frame_id=frame_id)
 82 |         else:
 83 |             # add information into dict
 84 |             data = dict(img_info=dict(filename=img, frame_id=frame_id), img_prefix=None)
 85 |         # build the data pipeline
 86 | 
 87 |         data = test_pipeline(data)
 88 |         datas.append(data)
 89 | 
 90 |     data = collate(datas, samples_per_gpu=len(imgs))
 91 |     # just get the actual data from DataContainer
 92 |     data["img_metas"] = [img_metas.data[0] for img_metas in data["img_metas"]]
 93 |     data["img"] = [img.data[0] for img in data["img"]]
 94 |     if next(model.parameters()).is_cuda:
 95 |         # scatter to specified GPU
 96 |         data = scatter(data, [device])[0]
 97 |     else:
 98 |         for m in model.modules():
 99 |             assert not isinstance(
100 |                 m, RoIPool
101 |             ), "CPU inference with RoIPool is not supported currently."
102 | 
103 |     # forward the model
104 |     with torch.no_grad():
105 |         results = model(return_loss=False, rescale=True, detection_only=True, **data)
106 | 
107 |     if not is_batch:
108 |         return results[0]
109 |     else:
110 |         return results
111 | 
112 | 
113 | def show_result_pyplot(
114 |     model,
115 |     img,
116 |     result,
117 |     score_thr=0.3,
118 |     fig_size=(15, 10),
119 |     title="result",
120 |     block=True,
121 |     wait_time=0,
122 | ):
123 |     """Visualize the detection results on the image.
124 | 
125 |     Args:
126 |         model (nn.Module): The loaded detector.
127 |         img (str or np.ndarray): Image filename or loaded image.
128 |         result (tuple[list] or list): The detection result, can be either
129 |             (bbox, segm) or just bbox.
130 |         score_thr (float): The threshold to visualize the bboxes and masks.
131 |         fig_size (tuple): Figure size of the pyplot figure.
132 |         title (str): Title of the pyplot figure.
133 |         block (bool): Whether to block GUI. Default: True
134 |         wait_time (float): Value of waitKey param.
135 |                 Default: 0.
136 |     """
137 |     warnings.warn('"block" will be deprecated in v2.9.0,' 'Please use "wait_time"')
138 |     warnings.warn('"fig_size" are deprecated and takes no effect.')
139 |     if hasattr(model, "module"):
140 |         model = model.module
141 |     model.show_result(
142 |         img,
143 |         result,
144 |         score_thr=score_thr,
145 |         show=True,
146 |         wait_time=wait_time,
147 |         win_name=title,
148 |         bbox_color=(72, 101, 241),
149 |         text_color=(72, 101, 241),
150 |     )
151 | 


--------------------------------------------------------------------------------
/teter/apis/test.py:
--------------------------------------------------------------------------------
  1 | import mmcv
  2 | import os.path as osp
  3 | import shutil
  4 | import tempfile
  5 | import time
  6 | import torch
  7 | import torch.distributed as dist
  8 | from collections import defaultdict
  9 | from mmcv.runner import get_dist_info
 10 | 
 11 | 
 12 | def single_gpu_test(model, data_loader, show=False, out_dir=None, show_score_thr=0.3):
 13 |     model.eval()
 14 |     results = defaultdict(list)
 15 |     dataset = data_loader.dataset
 16 |     prog_bar = mmcv.ProgressBar(len(dataset))
 17 |     for i, data in enumerate(data_loader):
 18 |         with torch.no_grad():
 19 |             result = model(return_loss=False, rescale=True, **data)
 20 |         for k, v in result.items():
 21 |             results[k].append(v)
 22 | 
 23 |         if show or out_dir:
 24 |             pass  # TODO
 25 | 
 26 |         batch_size = data["img"][0].size(0)
 27 |         for _ in range(batch_size):
 28 |             prog_bar.update()
 29 |     return results
 30 | 
 31 | 
 32 | def multi_gpu_test(model, data_loader, tmpdir=None, gpu_collect=False):
 33 |     """Test model with multiple gpus.
 34 | 
 35 |     This method tests model with multiple gpus and collects the results
 36 |     under two different modes: gpu and cpu modes. By setting 'gpu_collect=True'
 37 |     it encodes results to gpu tensors and use gpu communication for results
 38 |     collection. On cpu mode it saves the results on different gpus to 'tmpdir'
 39 |     and collects them by the rank 0 worker.
 40 | 
 41 |     Args:
 42 |         model (nn.Module): Model to be tested.
 43 |         data_loader (nn.Dataloader): Pytorch data loader.
 44 |         tmpdir (str): Path of directory to save the temporary results from
 45 |             different gpus under cpu mode.
 46 |         gpu_collect (bool): Option to use either gpu or cpu to collect results.
 47 | 
 48 |     Returns:
 49 |         list: The prediction results.
 50 |     """
 51 |     model.eval()
 52 |     results = defaultdict(list)
 53 |     dataset = data_loader.dataset
 54 |     rank, world_size = get_dist_info()
 55 |     if rank == 0:
 56 |         prog_bar = mmcv.ProgressBar(len(dataset))
 57 |     time.sleep(2)  # This line can prevent deadlock problem in some cases.
 58 |     for i, data in enumerate(data_loader):
 59 |         with torch.no_grad():
 60 |             result = model(return_loss=False, rescale=True, **data)
 61 |         for k, v in result.items():
 62 |             results[k].append(v)
 63 | 
 64 |         if rank == 0:
 65 |             batch_size = (
 66 |                 len(data["img_meta"]._data)
 67 |                 if "img_meta" in data
 68 |                 else data["img"][0].size(0)
 69 |             )
 70 |             for _ in range(batch_size * world_size):
 71 |                 prog_bar.update()
 72 | 
 73 |     # collect results from all ranks
 74 |     if gpu_collect:
 75 |         raise NotImplementedError
 76 |     else:
 77 |         results = collect_results_cpu(results, len(dataset), tmpdir)
 78 |     return results
 79 | 
 80 | 
 81 | def collect_results_cpu(result_part, size, tmpdir=None):
 82 |     rank, world_size = get_dist_info()
 83 |     # create a tmp dir if it is not specified
 84 |     if tmpdir is None:
 85 |         MAX_LEN = 512
 86 |         # 32 is whitespace
 87 |         dir_tensor = torch.full((MAX_LEN,), 32, dtype=torch.uint8, device="cuda")
 88 |         if rank == 0:
 89 |             tmpdir = tempfile.mkdtemp()
 90 |             tmpdir = torch.tensor(
 91 |                 bytearray(tmpdir.encode()), dtype=torch.uint8, device="cuda"
 92 |             )
 93 |             dir_tensor[: len(tmpdir)] = tmpdir
 94 |         dist.broadcast(dir_tensor, 0)
 95 |         tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip()
 96 |     else:
 97 |         mmcv.mkdir_or_exist(tmpdir)
 98 |     # dump the part result to the dir
 99 |     mmcv.dump(result_part, osp.join(tmpdir, f"part_{rank}.pkl"))
100 |     dist.barrier()
101 |     # collect all parts
102 |     if rank != 0:
103 |         return None
104 |     else:
105 |         # load results of all parts from tmp dir
106 |         part_list = defaultdict(list)
107 |         for i in range(world_size):
108 |             part_file = osp.join(tmpdir, f"part_{i}.pkl")
109 |             part_file = mmcv.load(part_file)
110 |             for k, v in part_file.items():
111 |                 part_list[k].extend(v)
112 |         shutil.rmtree(tmpdir)
113 |         return part_list
114 | 


--------------------------------------------------------------------------------
/teter/apis/train.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
  3 | from mmcv.runner import (HOOKS, DistSamplerSeedHook, EpochBasedRunner,
  4 |                          Fp16OptimizerHook, OptimizerHook, build_optimizer)
  5 | from mmcv.utils import build_from_cfg
  6 | # from mmdet.core import Fp16OptimizerHook
  7 | from mmdet.datasets import build_dataset
  8 | 
  9 | from teter.core import DistEvalHook, EvalHook
 10 | from teter.datasets import build_dataloader
 11 | from teter.utils import get_root_logger
 12 | 
 13 | 
 14 | def train_model(
 15 |     model, dataset, cfg, distributed=False, validate=False, timestamp=None, meta=None
 16 | ):
 17 |     logger = get_root_logger(cfg.log_level)
 18 | 
 19 |     # prepare data loaders
 20 |     dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset]
 21 |     if "imgs_per_gpu" in cfg.data:
 22 |         logger.warning(
 23 |             '"imgs_per_gpu" is deprecated in MMDet V2.0. '
 24 |             'Please use "samples_per_gpu" instead'
 25 |         )
 26 |         if "samples_per_gpu" in cfg.data:
 27 |             logger.warning(
 28 |                 f'Got "imgs_per_gpu"={cfg.data.imgs_per_gpu} and '
 29 |                 f'"samples_per_gpu"={cfg.data.samples_per_gpu}, "imgs_per_gpu"'
 30 |                 f"={cfg.data.imgs_per_gpu} is used in this experiments"
 31 |             )
 32 |         else:
 33 |             logger.warning(
 34 |                 'Automatically set "samples_per_gpu"="imgs_per_gpu"='
 35 |                 f"{cfg.data.imgs_per_gpu} in this experiments"
 36 |             )
 37 |         cfg.data.samples_per_gpu = cfg.data.imgs_per_gpu
 38 | 
 39 |     data_loaders = [
 40 |         build_dataloader(
 41 |             ds,
 42 |             cfg.data.samples_per_gpu,
 43 |             cfg.data.workers_per_gpu,
 44 |             # cfg.gpus will be ignored if distributed
 45 |             len(cfg.gpu_ids),
 46 |             dist=distributed,
 47 |             seed=cfg.seed,
 48 |         )
 49 |         for ds in dataset
 50 |     ]
 51 | 
 52 |     # put model on gpus
 53 |     if distributed:
 54 |         find_unused_parameters = cfg.get("find_unused_parameters", False)
 55 |         # Sets the `find_unused_parameters` parameter in
 56 |         # torch.nn.parallel.DistributedDataParallel
 57 |         model = MMDistributedDataParallel(
 58 |             model.cuda(),
 59 |             device_ids=[torch.cuda.current_device()],
 60 |             broadcast_buffers=False,
 61 |             find_unused_parameters=find_unused_parameters,
 62 |         )
 63 |     else:
 64 |         model = MMDataParallel(model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids)
 65 | 
 66 |     # build runner
 67 |     optimizer = build_optimizer(model, cfg.optimizer)
 68 |     runner = EpochBasedRunner(
 69 |         model, optimizer=optimizer, work_dir=cfg.work_dir, logger=logger, meta=meta
 70 |     )
 71 |     # an ugly workaround to make .log and .log.json filenames the same
 72 |     runner.timestamp = timestamp
 73 | 
 74 |     # fp16 setting
 75 |     fp16_cfg = cfg.get("fp16", None)
 76 |     if fp16_cfg is not None:
 77 |         optimizer_config = Fp16OptimizerHook(
 78 |             **cfg.optimizer_config, **fp16_cfg, distributed=distributed
 79 |         )
 80 |     elif distributed and "type" not in cfg.optimizer_config:
 81 |         optimizer_config = OptimizerHook(**cfg.optimizer_config)
 82 |     else:
 83 |         optimizer_config = cfg.optimizer_config
 84 | 
 85 |     # register hooks
 86 |     runner.register_training_hooks(
 87 |         cfg.lr_config,
 88 |         optimizer_config,
 89 |         cfg.checkpoint_config,
 90 |         cfg.log_config,
 91 |         cfg.get("momentum_config", None),
 92 |     )
 93 |     if distributed:
 94 |         runner.register_hook(DistSamplerSeedHook())
 95 | 
 96 |     # register eval hooks
 97 |     if validate:
 98 |         val_dataset = build_dataset(cfg.data.val, dict(test_mode=True))
 99 |         val_dataloader = build_dataloader(
100 |             val_dataset,
101 |             samples_per_gpu=1,
102 |             workers_per_gpu=cfg.data.workers_per_gpu,
103 |             dist=distributed,
104 |             shuffle=False,
105 |         )
106 |         eval_cfg = cfg.get("evaluation", {})
107 |         eval_hook = DistEvalHook if distributed else EvalHook
108 |         runner.register_hook(eval_hook(val_dataloader, **eval_cfg))
109 | 
110 |     # user-defined hooks
111 |     if cfg.get("custom_hooks", None):
112 |         custom_hooks = cfg.custom_hooks
113 |         assert isinstance(
114 |             custom_hooks, list
115 |         ), f"custom_hooks expect list type, but got {type(custom_hooks)}"
116 |         for hook_cfg in cfg.custom_hooks:
117 |             assert isinstance(hook_cfg, dict), (
118 |                 "Each item in custom_hooks expects dict type, but got "
119 |                 f"{type(hook_cfg)}"
120 |             )
121 |             hook_cfg = hook_cfg.copy()
122 |             priority = hook_cfg.pop("priority", "NORMAL")
123 |             hook = build_from_cfg(hook_cfg, HOOKS)
124 |             runner.register_hook(hook, priority=priority)
125 | 
126 |     if cfg.resume_from:
127 |         runner.resume(cfg.resume_from)
128 |     elif cfg.load_from:
129 |         runner.load_checkpoint(cfg.load_from)
130 |     runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
131 | 


--------------------------------------------------------------------------------
/teter/core/__init__.py:
--------------------------------------------------------------------------------
1 | from .evaluation import *  # noqa: F401, F403
2 | from .track import *  # noqa: F401, F403
3 | from .utils import *  # noqa: F401, F403
4 | 


--------------------------------------------------------------------------------
/teter/core/evaluation/__init__.py:
--------------------------------------------------------------------------------
1 | from .eval_hooks import DistEvalHook, EvalHook
2 | from .mot import eval_mot
3 | 
4 | __all__ = ["EvalHook", "DistEvalHook", "eval_mot"]
5 | 


--------------------------------------------------------------------------------
/teter/core/evaluation/box_track.toml:
--------------------------------------------------------------------------------
 1 | remove_ignored = false
 2 | ignored_as_class = false
 3 | 
 4 | [ignored_mapping]
 5 | "other person" = "pedestrian"
 6 | "other vehicle" = "car"
 7 | "trailer" = "truck"
 8 | 
 9 | [name_mapping]
10 | bike = "bicycle"
11 | caravan = "car"
12 | motor = "motorcycle"
13 | person = "pedestrian"
14 | van = "car"
15 | 
16 | [scalabel]
17 |     [scalabel.imageSize]
18 |     height = 720
19 |     width = 1280
20 | 
21 |     [[scalabel.attributes]]
22 |     name = "crowd"
23 |     type = "switch"
24 |     tag = "c"
25 | 
26 |     [[scalabel.categories]]
27 |     name = "human"
28 |         [[scalabel.categories.subcategories]]
29 |         name = "pedestrian"
30 | 
31 |         [[scalabel.categories.subcategories]]
32 |         name = "rider"
33 | 
34 |     [[scalabel.categories]]
35 |     name = "vehicle"
36 |         [[scalabel.categories.subcategories]]
37 |             name = "car"
38 | 
39 |         [[scalabel.categories.subcategories]]
40 |             name = "truck"
41 | 
42 |         [[scalabel.categories.subcategories]]
43 |             name = "bus"
44 | 
45 |         [[scalabel.categories.subcategories]]
46 |         name = "train"
47 | 
48 |     [[scalabel.categories]]
49 |     name = "bike"
50 |         [[scalabel.categories.subcategories]]
51 |         name = "motorcycle"
52 | 
53 |         [[scalabel.categories.subcategories]]
54 |         name = "bicycle"


--------------------------------------------------------------------------------
/teter/core/evaluation/eval_hooks.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | import torch.distributed as dist
 3 | from mmcv.runner import DistEvalHook as BaseDistEvalHook
 4 | from mmcv.runner import EvalHook as BaseEvalHook
 5 | from torch.nn.modules.batchnorm import _BatchNorm
 6 | 
 7 | 
 8 | class EvalHook(BaseEvalHook):
 9 |     """Please refer to `mmcv.runner.hooks.evaluation.py:EvalHook` for detailed
10 |     docstring."""
11 | 
12 |     def _do_evaluate(self, runner):
13 |         """perform evaluation and save ckpt."""
14 |         if not self._should_evaluate(runner):
15 |             return
16 | 
17 |         if (
18 |             hasattr(self.dataloader.dataset, "load_as_video")
19 |             and self.dataloader.dataset.load_as_video
20 |         ):
21 |             from teter.apis import single_gpu_test
22 |         else:
23 |             from mmdet.apis import single_gpu_test
24 |         results = single_gpu_test(runner.model, self.dataloader, show=False)
25 |         runner.log_buffer.output["eval_iter_num"] = len(self.dataloader)
26 |         key_score = self.evaluate(runner, results)
27 |         if self.save_best:
28 |             self._save_ckpt(runner, key_score)
29 | 
30 | 
31 | class DistEvalHook(BaseDistEvalHook):
32 |     """Please refer to `mmcv.runner.hooks.evaluation.py:DistEvalHook` for
33 |     detailed docstring."""
34 | 
35 |     def _do_evaluate(self, runner):
36 |         """perform evaluation and save ckpt."""
37 |         # Synchronization of BatchNorm's buffer (running_mean
38 |         # and running_var) is not supported in the DDP of pytorch,
39 |         # which may cause the inconsistent performance of models in
40 |         # different ranks, so we broadcast BatchNorm's buffers
41 |         # of rank 0 to other ranks to avoid this.
42 |         if self.broadcast_bn_buffer:
43 |             model = runner.model
44 |             for name, module in model.named_modules():
45 |                 if isinstance(module, _BatchNorm) and module.track_running_stats:
46 |                     dist.broadcast(module.running_var, 0)
47 |                     dist.broadcast(module.running_mean, 0)
48 | 
49 |         if not self._should_evaluate(runner):
50 |             return
51 | 
52 |         tmpdir = self.tmpdir
53 |         if tmpdir is None:
54 |             tmpdir = osp.join(runner.work_dir, ".eval_hook")
55 | 
56 |         if (
57 |             hasattr(self.dataloader.dataset, "load_as_video")
58 |             and self.dataloader.dataset.load_as_video
59 |         ):
60 |             from teter.apis import multi_gpu_test
61 |         else:
62 |             from mmdet.apis import multi_gpu_test
63 |         results = multi_gpu_test(
64 |             runner.model, self.dataloader, tmpdir=tmpdir, gpu_collect=self.gpu_collect
65 |         )
66 |         if runner.rank == 0:
67 |             print("\n")
68 |             runner.log_buffer.output["eval_iter_num"] = len(self.dataloader)
69 |             key_score = self.evaluate(runner, results)
70 | 
71 |             if self.save_best:
72 |                 self._save_ckpt(runner, key_score)
73 | 


--------------------------------------------------------------------------------
/teter/core/to_bdd100k/__init__.py:
--------------------------------------------------------------------------------
1 | from .transforms import preds2bdd100k
2 | 
3 | __all__ = ["preds2bdd100k"]
4 | 


--------------------------------------------------------------------------------
/teter/core/to_bdd100k/transforms.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import os.path as osp
  3 | from scalabel.label.io import save
  4 | from scalabel.label.transforms import bbox_to_box2d
  5 | from scalabel.label.typing import Frame, Label
  6 | from tqdm import tqdm
  7 | 
  8 | from ..evaluation import xyxy2xywh
  9 | from .utils import mask_merge_parallel
 10 | 
 11 | CATEGORIES = [
 12 |     "",
 13 |     "pedestrian",
 14 |     "rider",
 15 |     "car",
 16 |     "truck",
 17 |     "bus",
 18 |     "train",
 19 |     "motorcycle",
 20 |     "bicycle",
 21 |     "traffic light",
 22 |     "traffic sign",
 23 | ]
 24 | 
 25 | 
 26 | def det_to_bdd100k(dataset, results, out_base, nproc):
 27 |     bdd100k = []
 28 |     ann_id = 0
 29 |     print(f"\nStart converting to BDD100K detection format")
 30 |     if "bbox_results" in results:
 31 |         results = results["bbox_results"]
 32 |     for idx, bboxes_list in tqdm(enumerate(results)):
 33 |         img_name = dataset.data_infos[idx]["file_name"]
 34 |         frame = Frame(name=img_name, labels=[])
 35 | 
 36 |         for cls_, bboxes in enumerate(bboxes_list):
 37 |             for bbox in bboxes:
 38 |                 ann_id += 1
 39 |                 label = Label(
 40 |                     id=ann_id,
 41 |                     score=bbox[-1],
 42 |                     box2d=bbox_to_box2d(xyxy2xywh(bbox)),
 43 |                     category=CATEGORIES[cls_ + 1],
 44 |                 )
 45 |                 frame.labels.append(label)
 46 |         bdd100k.append(frame)
 47 | 
 48 |     print(f"\nWriting the converted json")
 49 |     out_path = osp.join(out_base, "det.json")
 50 |     save(out_path, bdd100k)
 51 | 
 52 | 
 53 | def ins_seg_to_bdd100k(dataset, results, out_base, nproc=4):
 54 |     bdd100k = []
 55 |     bitmask_base = osp.join(out_base, "ins_seg")
 56 |     if not osp.exists(bitmask_base):
 57 |         os.makedirs(bitmask_base)
 58 | 
 59 |     if "bbox_results" in results and "segm_results" in results:
 60 |         results = [
 61 |             [bbox, segm]
 62 |             for bbox, segm in zip(results["bbox_results"], results["segm_results"])
 63 |         ]
 64 | 
 65 |     track_dicts = []
 66 |     img_names = [dataset.data_infos[idx]["file_name"] for idx in range(len(results))]
 67 | 
 68 |     print(f"\nStart converting to BDD100K instance segmentation format")
 69 |     ann_id = 0
 70 |     for idx, [bboxes_list, segms_list] in enumerate(results):
 71 |         index = 0
 72 |         frame = Frame(name=img_names[idx], labels=[])
 73 |         track_dict = {}
 74 |         for cls_, (bboxes, segms) in enumerate(zip(bboxes_list, segms_list)):
 75 |             for bbox, segm in zip(bboxes, segms):
 76 |                 ann_id += 1
 77 |                 index += 1
 78 |                 label = Label(id=str(ann_id), index=index, score=bbox[-1])
 79 |                 frame.labels.append(label)
 80 |                 instance = {"bbox": bbox, "segm": segm, "label": cls_}
 81 |                 track_dict[index] = instance
 82 | 
 83 |         bdd100k.append(frame)
 84 |         track_dicts.append(track_dict)
 85 | 
 86 |     print(f"\nWriting the converted json")
 87 |     out_path = osp.join(out_base, "ins_seg.json")
 88 |     save(out_path, bdd100k)
 89 | 
 90 |     mask_merge_parallel(track_dicts, img_names, bitmask_base, nproc)
 91 | 
 92 | 
 93 | def box_track_to_bdd100k(dataset, results, out_base, nproc):
 94 |     bdd100k = []
 95 |     track_base = osp.join(out_base, "box_track")
 96 |     if not osp.exists(track_base):
 97 |         os.makedirs(track_base)
 98 | 
 99 |     print(f"\nStart converting to BDD100K box tracking format")
100 |     for idx, track_dict in tqdm(enumerate(results["track_results"])):
101 |         img_name = dataset.data_infos[idx]["file_name"]
102 |         frame_index = dataset.data_infos[idx]["frame_id"]
103 |         vid_name = os.path.split(img_name)[0]
104 |         frame = Frame(
105 |             name=img_name, video_name=vid_name, frame_index=frame_index, labels=[]
106 |         )
107 | 
108 |         for id_, instance in track_dict.items():
109 |             bbox = instance["bbox"]
110 |             cls_ = instance["label"]
111 |             label = Label(
112 |                 id=id_,
113 |                 score=bbox[-1],
114 |                 box2d=bbox_to_box2d(xyxy2xywh(bbox)),
115 |                 category=CATEGORIES[cls_ + 1],
116 |             )
117 |             frame.labels.append(label)
118 |         bdd100k.append(frame)
119 | 
120 |     print(f"\nWriting the converted json")
121 |     out_path = osp.join(out_base, "box_track.json")
122 |     save(out_path, bdd100k)
123 | 
124 | 
125 | def seg_track_to_bdd100k(dataset, results, out_base, nproc=4):
126 |     bitmask_base = osp.join(out_base, "seg_track")
127 |     if not osp.exists(bitmask_base):
128 |         os.makedirs(bitmask_base)
129 | 
130 |     print(f"\nStart converting to BDD100K seg tracking format")
131 |     img_names = [
132 |         dataset.data_infos[idx]["file_name"]
133 |         for idx in range(len(results["track_results"]))
134 |     ]
135 |     mask_merge_parallel(results["track_results"], img_names, bitmask_base, nproc)
136 | 
137 | 
138 | def preds2bdd100k(dataset, results, tasks, out_base, *args, **kwargs):
139 |     metric2func = dict(
140 |         det=det_to_bdd100k,
141 |         ins_seg=ins_seg_to_bdd100k,
142 |         box_track=box_track_to_bdd100k,
143 |         seg_track=seg_track_to_bdd100k,
144 |     )
145 | 
146 |     for task in tasks:
147 |         metric2func[task](dataset, results, out_base, *args, **kwargs)
148 | 


--------------------------------------------------------------------------------
/teter/core/to_bdd100k/utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import os
 3 | import os.path as osp
 4 | import pycocotools.mask as mask_utils
 5 | from functools import partial
 6 | from multiprocessing import Pool
 7 | from PIL import Image
 8 | from tqdm import tqdm
 9 | 
10 | SHAPE = [720, 1280]
11 | 
12 | 
13 | def mask_prepare(track_dict):
14 |     scores, colors, masks = [], [], []
15 |     for id_, instance in track_dict.items():
16 |         masks.append(mask_utils.decode(instance["segm"]))
17 |         colors.append([instance["label"] + 1, 0, id_ >> 8, id_ & 255])
18 |         scores.append(instance["bbox"][-1])
19 |     return scores, colors, masks
20 | 
21 | 
22 | def mask_merge(mask_infor, img_name, bitmask_base):
23 |     scores, colors, masks = mask_infor
24 |     bitmask = np.zeros((*SHAPE, 4), dtype=np.uint8)
25 |     sorted_idxs = np.argsort(scores)
26 |     for idx in sorted_idxs:
27 |         for i in range(4):
28 |             bitmask[..., i] = (
29 |                 bitmask[..., i] * (1 - masks[idx]) + masks[idx] * colors[idx][i]
30 |             )
31 |     bitmask_path = osp.join(bitmask_base, img_name.replace(".jpg", ".png"))
32 |     bitmask_dir = osp.split(bitmask_path)[0]
33 |     if not osp.exists(bitmask_dir):
34 |         os.makedirs(bitmask_dir)
35 |     bitmask = Image.fromarray(bitmask)
36 |     bitmask.save(bitmask_path)
37 | 
38 | 
39 | def mask_merge_parallel(track_dicts, img_names, bitmask_base, nproc):
40 |     with Pool(nproc) as pool:
41 |         print("\nCollecting mask information")
42 |         mask_infors = pool.map(mask_prepare, tqdm(track_dicts))
43 |         print("\nMerging overlapped masks.")
44 |         pool.starmap(
45 |             partial(mask_merge, bitmask_base=bitmask_base),
46 |             tqdm(zip(mask_infors, img_names), total=len(mask_infors)),
47 |         )
48 | 


--------------------------------------------------------------------------------
/teter/core/track/__init__.py:
--------------------------------------------------------------------------------
1 | from .similarity import cal_similarity
2 | from .transforms import restore_result, track2result
3 | 
4 | __all__ = ["cal_similarity", "track2result", "restore_result"]
5 | 


--------------------------------------------------------------------------------
/teter/core/track/similarity.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn.functional as F
 3 | 
 4 | 
 5 | 
 6 | def cal_similarity(key_embeds,
 7 |                    ref_embeds,
 8 |                    method='dot_product',
 9 |                    temperature=-1):
10 | 
11 |     assert method in ['dot_product', 'cosine']
12 | 
13 |     if key_embeds.size(0) == 0 or ref_embeds.size(0) == 0:
14 |         return torch.zeros((key_embeds.size(0), ref_embeds.size(0)),
15 |                            device=key_embeds.device)
16 | 
17 |     if method == 'cosine':
18 |         key_embeds = F.normalize(key_embeds, p=2, dim=1)
19 |         ref_embeds = F.normalize(ref_embeds, p=2, dim=1)
20 |         dists = torch.mm(key_embeds, ref_embeds.t())
21 |         if temperature > 0 and temperature <= 1:
22 |             dists /= temperature
23 |         return dists
24 | 
25 |     elif method == 'dot_product':
26 | 
27 |         if temperature>1:
28 |             dists = torch.mm(key_embeds, ref_embeds.t())
29 |             dists *= temperature
30 |         else:
31 |             dists = torch.mm(key_embeds, ref_embeds.t())
32 | 
33 |         return dists
34 | 


--------------------------------------------------------------------------------
/teter/core/track/transforms.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | 
 5 | def track2result(bboxes, labels, ids, num_classes):
 6 |     valid_inds = ids > -1
 7 |     bboxes = bboxes[valid_inds]
 8 |     labels = labels[valid_inds]
 9 |     ids = ids[valid_inds]
10 | 
11 |     if bboxes.shape[0] == 0:
12 |         return [np.zeros((0, 6), dtype=np.float32) for i in range(num_classes)]
13 |     else:
14 |         if isinstance(bboxes, torch.Tensor):
15 |             bboxes = bboxes.cpu().numpy()
16 |             labels = labels.cpu().numpy()
17 |             ids = ids.cpu().numpy()
18 |         return [
19 |             np.concatenate((ids[labels == i, None], bboxes[labels == i, :]), axis=1)
20 |             for i in range(num_classes)
21 |         ]
22 | 
23 | 
24 | def restore_result(result, return_ids=False):
25 |     labels = []
26 |     for i, bbox in enumerate(result):
27 |         labels.extend([i] * bbox.shape[0])
28 |     bboxes = np.concatenate(result, axis=0).astype(np.float32)
29 |     labels = np.array(labels, dtype=np.int64)
30 |     if return_ids:
31 |         ids = bboxes[:, 0].astype(np.int64)
32 |         bboxes = bboxes[:, 1:]
33 |         return bboxes, labels, ids
34 |     else:
35 |         return bboxes, labels
36 | 


--------------------------------------------------------------------------------
/teter/core/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .visualization import imshow_mot_errors, imshow_tracks
2 | 
3 | __all__ = ["imshow_tracks", "imshow_mot_errors"]
4 | 


--------------------------------------------------------------------------------
/teter/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | from mmdet.datasets.builder import DATASETS, PIPELINES, build_dataset
 2 | 
 3 | from .bdd_video_dataset import BDDVideoDataset
 4 | from .builder import build_dataloader
 5 | from .coco_video_dataset import CocoVideoDataset
 6 | from .parsers import CocoVID
 7 | from .pipelines import (LoadMultiImagesFromFile, SeqCollect,
 8 |                         SeqDefaultFormatBundle, SeqLoadAnnotations,
 9 |                         SeqNormalize, SeqPad, SeqRandomFlip, SeqResize)
10 | from .tao_dataset import TaoDataset
11 | 
12 | __all__ = [
13 |     "DATASETS",
14 |     "PIPELINES",
15 |     "build_dataloader",
16 |     "build_dataset",
17 |     "CocoVID",
18 |     "BDDVideoDataset",
19 |     "CocoVideoDataset",
20 |     "LoadMultiImagesFromFile",
21 |     "SeqLoadAnnotations",
22 |     "SeqResize",
23 |     "SeqNormalize",
24 |     "SeqRandomFlip",
25 |     "SeqPad",
26 |     "SeqDefaultFormatBundle",
27 |     "SeqCollect",
28 |     "TaoDataset",
29 | ]
30 | 


--------------------------------------------------------------------------------
/teter/datasets/builder.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import random
 3 | from functools import partial
 4 | from mmcv.parallel import collate
 5 | from mmcv.runner import get_dist_info
 6 | from mmdet.datasets.samplers import DistributedGroupSampler, GroupSampler
 7 | from torch.utils.data import DataLoader
 8 | 
 9 | from .samplers import DistributedVideoSampler
10 | 
11 | 
12 | def build_dataloader(
13 |     dataset,
14 |     samples_per_gpu,
15 |     workers_per_gpu,
16 |     num_gpus=1,
17 |     dist=True,
18 |     shuffle=True,
19 |     seed=None,
20 |     **kwargs
21 | ):
22 |     """Build PyTorch DataLoader.
23 | 
24 |     In distributed training, each GPU/process has a dataloader.
25 |     In non-distributed training, there is only one dataloader for all GPUs.
26 | 
27 |     Args:
28 |         dataset (Dataset): A PyTorch dataset.
29 |         samples_per_gpu (int): Number of training samples on each GPU, i.e.,
30 |             batch size of each GPU.
31 |         workers_per_gpu (int): How many subprocesses to use for data loading
32 |             for each GPU.
33 |         num_gpus (int): Number of GPUs. Only used in non-distributed training.
34 |         dist (bool): Distributed training/test or not. Default: True.
35 |         shuffle (bool): Whether to shuffle the data at every epoch.
36 |             Default: True.
37 |         kwargs: any keyword argument to be used to initialize DataLoader
38 | 
39 |     Returns:
40 |         DataLoader: A PyTorch dataloader.
41 |     """
42 |     rank, world_size = get_dist_info()
43 |     if dist:
44 |         if shuffle:
45 |             sampler = DistributedGroupSampler(
46 |                 dataset, samples_per_gpu, world_size, rank
47 |             )
48 |         else:
49 |             sampler = DistributedVideoSampler(dataset, world_size, rank, shuffle=False)
50 |         batch_size = samples_per_gpu
51 |         num_workers = workers_per_gpu
52 |     else:
53 |         sampler = GroupSampler(dataset, samples_per_gpu) if shuffle else None
54 |         batch_size = num_gpus * samples_per_gpu
55 |         num_workers = num_gpus * workers_per_gpu
56 | 
57 |     init_fn = (
58 |         partial(worker_init_fn, num_workers=num_workers, rank=rank, seed=seed)
59 |         if seed is not None
60 |         else None
61 |     )
62 | 
63 |     data_loader = DataLoader(
64 |         dataset,
65 |         batch_size=batch_size,
66 |         sampler=sampler,
67 |         num_workers=num_workers,
68 |         collate_fn=partial(collate, samples_per_gpu=samples_per_gpu),
69 |         pin_memory=False,
70 |         worker_init_fn=init_fn,
71 |         **kwargs
72 |     )
73 | 
74 |     return data_loader
75 | 
76 | 
77 | def worker_init_fn(worker_id, num_workers, rank, seed):
78 |     # The seed of each worker equals to
79 |     # num_worker * rank + worker_id + user_seed
80 |     worker_seed = num_workers * rank + worker_id + seed
81 |     np.random.seed(worker_seed)
82 |     random.seed(worker_seed)
83 | 


--------------------------------------------------------------------------------
/teter/datasets/parsers/__init__.py:
--------------------------------------------------------------------------------
1 | from .coco_api import COCO, COCOeval
2 | from .coco_video_parser import CocoVID
3 | 
4 | __all__ = ["COCO", "COCOeval", "CocoVID"]
5 | 


--------------------------------------------------------------------------------
/teter/datasets/parsers/coco_api.py:
--------------------------------------------------------------------------------
 1 | # This file add snake case alias for coco api
 2 | 
 3 | import pycocotools
 4 | import warnings
 5 | from pycocotools.coco import COCO as _COCO
 6 | from pycocotools.cocoeval import COCOeval as _COCOeval
 7 | 
 8 | 
 9 | class COCO(_COCO):
10 |     """This class is almost the same as official pycocotools package.
11 | 
12 |     It implements some snake case function aliases. So that the COCO class has
13 |     the same interface as LVIS class.
14 |     """
15 | 
16 |     def __init__(self, annotation_file=None):
17 |         if getattr(pycocotools, "__version__", "0") >= "12.0.2":
18 |             warnings.warn(
19 |                 'mmpycocotools is deprecated. Please install official pycocotools by "pip install pycocotools"',  # noqa: E501
20 |                 UserWarning,
21 |             )
22 |         super().__init__(annotation_file=annotation_file)
23 |         self.img_ann_map = self.imgToAnns
24 |         self.cat_img_map = self.catToImgs
25 | 
26 |     def get_ann_ids(self, img_ids=[], cat_ids=[], area_rng=[], iscrowd=None):
27 |         return self.getAnnIds(img_ids, cat_ids, area_rng, iscrowd)
28 | 
29 |     def get_cat_ids(self, cat_names=[], sup_names=[], cat_ids=[]):
30 |         return self.getCatIds(cat_names, sup_names, cat_ids)
31 | 
32 |     def get_img_ids(self, img_ids=[], cat_ids=[]):
33 |         return self.getImgIds(img_ids, cat_ids)
34 | 
35 |     def load_anns(self, ids):
36 |         return self.loadAnns(ids)
37 | 
38 |     def load_cats(self, ids):
39 |         return self.loadCats(ids)
40 | 
41 |     def load_imgs(self, ids):
42 |         return self.loadImgs(ids)
43 | 
44 | 
45 | # just for the ease of import
46 | COCOeval = _COCOeval
47 | 


--------------------------------------------------------------------------------
/teter/datasets/parsers/coco_video_parser.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from collections import defaultdict
 3 | from pycocotools.coco import _isArrayLike
 4 | 
 5 | from .coco_api import COCO
 6 | 
 7 | 
 8 | class CocoVID(COCO):
 9 |     def __init__(self, annotation_file=None):
10 |         assert annotation_file, "Annotation file must be provided."
11 |         super(CocoVID, self).__init__(annotation_file=annotation_file)
12 | 
13 |     def createIndex(self):
14 |         print("creating index...")
15 |         anns, cats, imgs, vids = {}, {}, {}, {}
16 |         imgToAnns, catToImgs, vidToImgs = (
17 |             defaultdict(list),
18 |             defaultdict(list),
19 |             defaultdict(list),
20 |         )
21 | 
22 |         if "videos" in self.dataset:
23 |             for video in self.dataset["videos"]:
24 |                 vids[video["id"]] = video
25 | 
26 |         if "annotations" in self.dataset:
27 |             for ann in self.dataset["annotations"]:
28 |                 imgToAnns[ann["image_id"]].append(ann)
29 |                 anns[ann["id"]] = ann
30 | 
31 |         if "images" in self.dataset:
32 |             for img in self.dataset["images"]:
33 |                 vidToImgs[img["video_id"]].append(img)
34 |                 imgs[img["id"]] = img
35 | 
36 |         if "categories" in self.dataset:
37 |             for cat in self.dataset["categories"]:
38 |                 cats[cat["id"]] = cat
39 | 
40 |         if "annotations" in self.dataset and "categories" in self.dataset:
41 |             for ann in self.dataset["annotations"]:
42 |                 catToImgs[ann["category_id"]].append(ann["image_id"])
43 | 
44 |         print("index created!")
45 | 
46 |         self.anns = anns
47 |         self.imgToAnns = imgToAnns
48 |         self.catToImgs = catToImgs
49 |         self.imgs = imgs
50 |         self.cats = cats
51 |         self.videos = vids
52 |         self.vidToImgs = vidToImgs
53 | 
54 |     def get_vid_ids(self, vidIds=[]):
55 |         vidIds = vidIds if _isArrayLike(vidIds) else [vidIds]
56 | 
57 |         if len(vidIds) == 0:
58 |             ids = self.videos.keys()
59 |         else:
60 |             ids = set(vidIds)
61 | 
62 |         return list(ids)
63 | 
64 |     def get_img_ids_from_vid(self, vidId):
65 |         img_infos = self.vidToImgs[vidId]
66 |         ids = list(np.zeros([len(img_infos)], dtype=np.int))
67 |         for img_info in img_infos:
68 |             ids[img_info["frame_id"]] = img_info["id"]
69 |         return ids
70 | 
71 |     def load_vids(self, ids=[]):
72 |         if _isArrayLike(ids):
73 |             return [self.videos[id] for id in ids]
74 |         elif type(ids) == int:
75 |             return [self.videos[ids]]
76 | 


--------------------------------------------------------------------------------
/teter/datasets/pipelines/__init__.py:
--------------------------------------------------------------------------------
 1 | from .formatting import SeqCollect, SeqDefaultFormatBundle, VideoCollect
 2 | from .h5backend import HDF5Backend
 3 | from .loading import LoadMultiImagesFromFile, SeqLoadAnnotations
 4 | from .transforms import (SeqNormalize, SeqPad, SeqPhotoMetricDistortion,
 5 |                          SeqRandomCrop, SeqRandomFlip, SeqResize)
 6 | 
 7 | __all__ = [
 8 |     "LoadMultiImagesFromFile",
 9 |     "SeqLoadAnnotations",
10 |     "SeqResize",
11 |     "SeqNormalize",
12 |     "SeqRandomFlip",
13 |     "SeqPad",
14 |     "SeqDefaultFormatBundle",
15 |     "SeqCollect",
16 |     "VideoCollect",
17 |     "SeqPhotoMetricDistortion",
18 |     "SeqRandomCrop",
19 |     "HDF5Backend",
20 | ]
21 | 


--------------------------------------------------------------------------------
/teter/datasets/pipelines/formatting.py:
--------------------------------------------------------------------------------
  1 | from mmcv.parallel import DataContainer as DC
  2 | from mmdet.datasets.builder import PIPELINES
  3 | from mmdet.datasets.pipelines import Collect, DefaultFormatBundle, to_tensor
  4 | 
  5 | 
  6 | @PIPELINES.register_module()
  7 | class SeqDefaultFormatBundle(DefaultFormatBundle):
  8 |     def __call__(self, results):
  9 |         outs = []
 10 |         for _results in results:
 11 |             _results = super().__call__(_results)
 12 |             _results["gt_match_indices"] = DC(to_tensor(_results["gt_match_indices"]))
 13 |             outs.append(_results)
 14 |         return outs
 15 | 
 16 | 
 17 | @PIPELINES.register_module()
 18 | class VideoCollect(Collect):
 19 |     """Collect data from the loader relevant to the specific task.
 20 | 
 21 |     This is usually the last stage of the data loader pipeline. Typically keys
 22 |     is set to some subset of "img", "proposals", "gt_bboxes",
 23 |     "gt_bboxes_ignore", "gt_labels", and/or "gt_masks".
 24 | 
 25 |     The "img_meta" item is always populated.  The contents of the "img_meta"
 26 |     dictionary depends on "meta_keys". By default this includes:
 27 | 
 28 |         - "img_shape": shape of the image input to the network as a tuple \
 29 |             (h, w, c).  Note that images may be zero padded on the \
 30 |             bottom/right if the batch tensor is larger than this shape.
 31 | 
 32 |         - "scale_factor": a float indicating the preprocessing scale
 33 | 
 34 |         - "flip": a boolean indicating if image flip transform was used
 35 | 
 36 |         - "filename": path to the image file
 37 | 
 38 |         - "ori_shape": original shape of the image as a tuple (h, w, c)
 39 | 
 40 |         - "pad_shape": image shape after padding
 41 | 
 42 |         - "img_norm_cfg": a dict of normalization information:
 43 | 
 44 |             - mean - per channel mean subtraction
 45 |             - std - per channel std divisor
 46 |             - to_rgb - bool indicating if bgr was converted to rgb
 47 | 
 48 |     Args:
 49 |         keys (Sequence[str]): Keys of results to be collected in ``data``.
 50 |         meta_keys (Sequence[str], optional): Meta keys to be converted to
 51 |             ``mmcv.DataContainer`` and collected in ``data[img_metas]``.
 52 |             Default: ``('filename', 'ori_filename', 'ori_shape', 'img_shape',
 53 |             'pad_shape', 'scale_factor', 'flip', 'flip_direction',
 54 |             'img_norm_cfg')``
 55 |     """
 56 | 
 57 |     def __init__(
 58 |         self,
 59 |         keys,
 60 |         meta_keys=(
 61 |             "filename",
 62 |             "ori_filename",
 63 |             "ori_shape",
 64 |             "img_shape",
 65 |             "pad_shape",
 66 |             "scale_factor",
 67 |             "flip",
 68 |             "flip_direction",
 69 |             "img_norm_cfg",
 70 |             "frame_id",
 71 |         ),
 72 |     ):
 73 |         self.keys = keys
 74 |         self.meta_keys = meta_keys
 75 | 
 76 | 
 77 | @PIPELINES.register_module(force=True)
 78 | class SeqCollect(VideoCollect):
 79 |     def __init__(
 80 |         self,
 81 |         keys,
 82 |         ref_prefix="ref",
 83 |         meta_keys=(
 84 |             "filename",
 85 |             "ori_filename",
 86 |             "ori_shape",
 87 |             "img_shape",
 88 |             "pad_shape",
 89 |             "scale_factor",
 90 |             "flip",
 91 |             "flip_direction",
 92 |             "img_norm_cfg",
 93 |         ),
 94 |     ):
 95 |         self.keys = keys
 96 |         self.ref_prefix = ref_prefix
 97 |         self.meta_keys = meta_keys
 98 | 
 99 |     def __call__(self, results):
100 |         outs = []
101 |         for _results in results:
102 |             _results = super().__call__(_results)
103 |             outs.append(_results)
104 | 
105 |         assert len(outs) == 2
106 |         data = {}
107 |         data.update(outs[0])
108 |         for k, v in outs[1].items():
109 |             data[f"{self.ref_prefix}_{k}"] = v
110 | 
111 |         return data
112 | 


--------------------------------------------------------------------------------
/teter/datasets/pipelines/h5backend.py:
--------------------------------------------------------------------------------
 1 | import h5py
 2 | import numpy as np
 3 | import os
 4 | from mmcv import BaseStorageBackend, FileClient
 5 | 
 6 | 
 7 | @FileClient.register_backend("hdf5", force=True)
 8 | class HDF5Backend(BaseStorageBackend):
 9 |     def __init__(self, img_db_path=None, vid_db_path=None, type="tao", **kwargs):
10 | 
11 |         # h5 file path
12 |         self.img_db_path = img_db_path
13 |         self.vid_db_path = vid_db_path
14 | 
15 |         self.img_client = None
16 |         self.vid_client = None
17 |         self.type = type
18 | 
19 |     def get(self, filepath):
20 |         """Get values according to the filepath.
21 |         Args:
22 |             filepath (str | obj:`Path`): Here, filepath is the lmdb key.
23 |         # """
24 | 
25 |         filepath = str(filepath)
26 |         if self.type == "tao":
27 |             if self.img_client is None and self.img_db_path is not None:
28 |                 self.img_client = h5py.File(self.img_db_path, "r")
29 |             key_list = filepath.split("/")
30 |             value_buf = np.array(
31 |                 self.img_client[key_list[-4]][key_list[-3]][key_list[-2]][key_list[-1]]
32 |             )
33 |         elif self.type == "key":
34 |             if self.img_client is None and self.img_db_path is not None:
35 |                 self.img_client = h5py.File(self.img_db_path, "r")
36 |             value_buf = self.img_client[filepath]
37 |         elif self.type == "lvis":
38 |             if self.img_client is None and self.img_db_path is not None:
39 |                 self.img_client = h5py.File(self.img_db_path, "r")
40 |             filefolder, filename = os.path.split(filepath)
41 |             value_buf = np.array(self.img_client[filename])
42 |         elif self.type == "lasot":
43 |             if self.img_client is None and self.img_db_path is not None:
44 |                 self.img_client = h5py.File(self.img_db_path, "r")
45 |             key_list = filepath.split("/")
46 |             value_buf = np.array(
47 |                 self.img_client[key_list[-4]][key_list[-3]][key_list[-2]][key_list[-1]][
48 |                     "raw"
49 |                 ]
50 |             )[0]
51 |         elif self.type == "bdd":
52 |             filefolder, filename = os.path.split(filepath)
53 |             path, group_name = os.path.split(filefolder)
54 | 
55 |             if self.vid_client is None and self.vid_db_path is not None:
56 |                 self.vid_client = h5py.File(self.vid_db_path, "r")
57 |             if self.img_client is None and self.img_db_path is not None:
58 |                 self.img_client = h5py.File(self.img_db_path, "r")
59 |             if "/100k/" in filefolder:
60 |                 value_buf = np.array(self.img_client[filename])
61 |             else:
62 |                 group = self.vid_client[group_name]
63 |                 value_buf = np.array(group[filename])
64 | 
65 |         return value_buf
66 | 
67 |     def get_text(self, filepath):
68 |         raise NotImplementedError
69 | 


--------------------------------------------------------------------------------
/teter/datasets/pipelines/loading.py:
--------------------------------------------------------------------------------
 1 | from mmdet.datasets.builder import PIPELINES
 2 | from mmdet.datasets.pipelines import LoadAnnotations, LoadImageFromFile
 3 | 
 4 | 
 5 | @PIPELINES.register_module()
 6 | class LoadMultiImagesFromFile(LoadImageFromFile):
 7 |     def __init__(self, *args, **kwargs):
 8 |         super().__init__(*args, **kwargs)
 9 | 
10 |     def __call__(self, results):
11 |         outs = []
12 |         for _results in results:
13 |             _results = super().__call__(_results)
14 |             outs.append(_results)
15 |         return outs
16 | 
17 | 
18 | @PIPELINES.register_module()
19 | class SeqLoadAnnotations(LoadAnnotations):
20 |     def __init__(self, with_ins_id=False, *args, **kwargs):
21 |         super().__init__(*args, **kwargs)
22 |         self.with_ins_id = with_ins_id
23 | 
24 |     def _load_ins_ids(self, results):
25 |         """Private function to load label annotations.
26 | 
27 |         Args:
28 |             results (dict): Result dict from :obj:`mmdet.CustomDataset`.
29 | 
30 |         Returns:
31 |             dict: The dict contains loaded label annotations.
32 |         """
33 | 
34 |         results["gt_match_indices"] = results["ann_info"]["match_indices"].copy()
35 | 
36 |         return results
37 | 
38 |     def __call__(self, results):
39 |         outs = []
40 |         for _results in results:
41 |             _results = super().__call__(_results)
42 |             if self.with_ins_id:
43 |                 _results = self._load_ins_ids(_results)
44 |             outs.append(_results)
45 |         return outs
46 | 


--------------------------------------------------------------------------------
/teter/datasets/samplers/__init__.py:
--------------------------------------------------------------------------------
1 | from .distributed_video_sampler import DistributedVideoSampler
2 | 
3 | __all__ = ["DistributedVideoSampler"]
4 | 


--------------------------------------------------------------------------------
/teter/datasets/samplers/distributed_video_sampler.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from torch.utils.data import DistributedSampler as _DistributedSampler
 3 | 
 4 | 
 5 | class DistributedVideoSampler(_DistributedSampler):
 6 |     def __init__(self, dataset, num_replicas=None, rank=None, shuffle=False):
 7 |         super().__init__(dataset, num_replicas=num_replicas, rank=rank)
 8 |         self.shuffle = shuffle
 9 |         assert not self.shuffle, "Specific for video sequential testing."
10 |         self.num_samples = len(dataset)
11 | 
12 |         first_frame_indices = []
13 |         for i, img_info in enumerate(self.dataset.data_infos):
14 |             if img_info["frame_id"] == 0:
15 |                 first_frame_indices.append(i)
16 | 
17 |         chunks = np.array_split(first_frame_indices, num_replicas)
18 |         split_flags = [c[0] for c in chunks]
19 |         split_flags.append(self.num_samples)
20 | 
21 |         self.indices = [
22 |             list(range(split_flags[i], split_flags[i + 1]))
23 |             for i in range(self.num_replicas)
24 |         ]
25 | 
26 |     def __iter__(self):
27 |         indices = self.indices[self.rank]
28 |         return iter(indices)
29 | 


--------------------------------------------------------------------------------
/teter/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .builder import MODELS, TRACKERS, build_model, build_tracker
2 | from .losses import *  # noqa: F401,F403
3 | from .mot import *  # noqa: F401,F403
4 | from .roi_heads import *  # noqa: F401,F403
5 | from .trackers import *  # noqa: F401,F403
6 | 
7 | __all__ = ["MODELS", "TRACKERS", "build_model", "build_tracker"]
8 | 


--------------------------------------------------------------------------------
/teter/models/builder.py:
--------------------------------------------------------------------------------
 1 | from mmcv.cnn import build_model_from_cfg as build
 2 | from mmcv.utils import Registry
 3 | 
 4 | MODELS = Registry("model")
 5 | TRACKERS = Registry("tracker")
 6 | 
 7 | 
 8 | def build_tracker(cfg):
 9 |     """Build tracker."""
10 |     return build(cfg, TRACKERS)
11 | 
12 | 
13 | def build_model(cfg, train_cfg=None, test_cfg=None):
14 |     """Build model."""
15 |     return build(cfg, MODELS, dict(train_cfg=train_cfg, test_cfg=test_cfg))
16 | 


--------------------------------------------------------------------------------
/teter/models/losses/__init__.py:
--------------------------------------------------------------------------------
1 | from .l2_loss import L2Loss
2 | from .multipos_cross_entropy_loss import MultiPosCrossEntropyLoss
3 | from .unbiased_supcontrat import UnbiasedSupConLoss
4 | 
5 | __all__ = ["L2Loss", "MultiPosCrossEntropyLoss", "UnbiasedSupConLoss"]
6 | 


--------------------------------------------------------------------------------
/teter/models/losses/l2_loss.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import torch.nn as nn
  4 | from mmdet.models import LOSSES, weighted_loss
  5 | 
  6 | 
  7 | @weighted_loss
  8 | def l2_loss(pred, target):
  9 |     """L2 loss.
 10 | 
 11 |     Args:
 12 |         pred (torch.Tensor): The prediction.
 13 |         target (torch.Tensor): The learning target of the prediction.
 14 | 
 15 |     Returns:
 16 |         torch.Tensor: Calculated loss
 17 |     """
 18 |     assert pred.size() == target.size() and target.numel() > 0
 19 |     loss = torch.abs(pred - target) ** 2
 20 |     return loss
 21 | 
 22 | 
 23 | @LOSSES.register_module(force=True)
 24 | class L2Loss(nn.Module):
 25 |     """L2 loss.
 26 | 
 27 |     Args:
 28 |         reduction (str, optional): The method to reduce the loss.
 29 |             Options are "none", "mean" and "sum".
 30 |         loss_weight (float, optional): The weight of loss.
 31 |     """
 32 | 
 33 |     def __init__(
 34 |         self,
 35 |         neg_pos_ub=-1,
 36 |         pos_margin=-1,
 37 |         neg_margin=-1,
 38 |         hard_mining=False,
 39 |         reduction="mean",
 40 |         loss_weight=1.0,
 41 |     ):
 42 |         super(L2Loss, self).__init__()
 43 |         self.neg_pos_ub = neg_pos_ub
 44 |         self.pos_margin = pos_margin
 45 |         self.neg_margin = neg_margin
 46 |         self.hard_mining = hard_mining
 47 |         self.reduction = reduction
 48 |         self.loss_weight = loss_weight
 49 | 
 50 |     def forward(
 51 |         self, pred, target, weight=None, avg_factor=None, reduction_override=None
 52 |     ):
 53 |         """Forward function.
 54 | 
 55 |         Args:
 56 |             pred (torch.Tensor): The prediction.
 57 |             target (torch.Tensor): The learning target of the prediction.
 58 |             weight (torch.Tensor, optional): The weight of loss for each
 59 |                 prediction. Defaults to None.
 60 |             avg_factor (int, optional): Average factor that is used to average
 61 |                 the loss. Defaults to None.
 62 |             reduction_override (str, optional): The reduction method used to
 63 |                 override the original reduction method of the loss.
 64 |                 Defaults to None.
 65 |         """
 66 |         assert reduction_override in (None, "none", "mean", "sum")
 67 |         reduction = reduction_override if reduction_override else self.reduction
 68 |         pred, weight, avg_factor = self.update_weight(pred, target, weight, avg_factor)
 69 |         loss_bbox = self.loss_weight * l2_loss(
 70 |             pred, target, weight, reduction=reduction, avg_factor=avg_factor
 71 |         )
 72 |         return loss_bbox
 73 | 
 74 |     def update_weight(self, pred, target, weight, avg_factor):
 75 |         if weight is None:
 76 |             weight = target.new_ones(target.size())
 77 |         invalid_inds = weight <= 0
 78 |         target[invalid_inds] = -1
 79 |         pos_inds = target == 1
 80 |         neg_inds = target == 0
 81 | 
 82 |         if self.pos_margin > 0:
 83 |             pred[pos_inds] -= self.pos_margin
 84 |         if self.neg_margin > 0:
 85 |             pred[neg_inds] -= self.neg_margin
 86 |         pred = torch.clamp(pred, min=0, max=1)
 87 | 
 88 |         num_pos = int((target == 1).sum())
 89 |         num_neg = int((target == 0).sum())
 90 |         if self.neg_pos_ub > 0 and num_neg / num_pos > self.neg_pos_ub:
 91 |             num_neg = num_pos * self.neg_pos_ub
 92 |             neg_idx = torch.nonzero(target == 0, as_tuple=False)
 93 | 
 94 |             if self.hard_mining:
 95 |                 costs = l2_loss(pred, target, reduction="none")[
 96 |                     neg_idx[:, 0], neg_idx[:, 1]
 97 |                 ].detach()
 98 |                 neg_idx = neg_idx[costs.topk(num_neg)[1], :]
 99 |             else:
100 |                 neg_idx = self.random_choice(neg_idx, num_neg)
101 | 
102 |             new_neg_inds = neg_inds.new_zeros(neg_inds.size()).bool()
103 |             new_neg_inds[neg_idx[:, 0], neg_idx[:, 1]] = True
104 | 
105 |             invalid_neg_inds = torch.logical_xor(neg_inds, new_neg_inds)
106 |             weight[invalid_neg_inds] = 0
107 | 
108 |         avg_factor = (weight > 0).sum()
109 |         return pred, weight, avg_factor
110 | 
111 |     @staticmethod
112 |     def random_choice(gallery, num):
113 |         """Random select some elements from the gallery.
114 | 
115 |         It seems that Pytorch's implementation is slower than numpy so we use
116 |         numpy to randperm the indices.
117 |         """
118 |         assert len(gallery) >= num
119 |         if isinstance(gallery, list):
120 |             gallery = np.array(gallery)
121 |         cands = np.arange(len(gallery))
122 |         np.random.shuffle(cands)
123 |         rand_inds = cands[:num]
124 |         if not isinstance(gallery, np.ndarray):
125 |             rand_inds = torch.from_numpy(rand_inds).long().to(gallery.device)
126 |         return gallery[rand_inds]
127 | 


--------------------------------------------------------------------------------
/teter/models/losses/multipos_cross_entropy_loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from mmdet.models import LOSSES, weight_reduce_loss
 4 | 
 5 | 
 6 | def multi_pos_cross_entropy(
 7 |     pred, label,
 8 |         weight=None,
 9 |         reduction="mean",
10 |         avg_factor=None,
11 |         version="ori",
12 |         pos_normalize=True
13 | ):
14 | 
15 |     if version == "unbiased":
16 | 
17 |         valid_mask = label.sum(1) != 0
18 |         pred = pred[valid_mask]
19 |         label = label[valid_mask]
20 |         weight = weight[valid_mask]
21 |         logits_max, _ = torch.max(pred, dim=1, keepdim=True)
22 |         logits = pred - logits_max.detach()
23 | 
24 |         if pos_normalize:
25 |             pos_norm = torch.div(label, label.sum(1).reshape(-1, 1))
26 |             exp_logits = (torch.exp(logits)) * pos_norm + (
27 |                 torch.exp(logits)
28 |             ) * torch.logical_not(label)
29 |         else:
30 |             exp_logits = torch.exp(logits)
31 |         exp_logits_input = exp_logits.sum(1, keepdim=True)
32 |         log_prob = logits - torch.log(exp_logits_input)
33 | 
34 |         mean_log_prob_pos = (label * log_prob).sum(1) / label.sum(1)
35 |         loss = -mean_log_prob_pos
36 | 
37 |     elif version == "ori":
38 |         # a more numerical stable implementation.
39 |         pos_inds = label == 1
40 |         neg_inds = label == 0
41 |         pred_pos = pred * pos_inds.float()
42 |         pred_neg = pred * neg_inds.float()
43 |         # use -inf to mask out unwanted elements.
44 |         pred_pos[neg_inds] = pred_pos[neg_inds] + float("inf")
45 |         pred_neg[pos_inds] = pred_neg[pos_inds] + float("-inf")
46 | 
47 |         _pos_expand = torch.repeat_interleave(pred_pos, pred.shape[1], dim=1)
48 |         _neg_expand = pred_neg.repeat(1, pred.shape[1])
49 | 
50 |         x = torch.nn.functional.pad((_neg_expand - _pos_expand), (0, 1), "constant", 0)
51 |         loss = torch.logsumexp(x, dim=1)
52 | 
53 |     # apply weights and do the reduction
54 |     if weight is not None:
55 |         weight = weight.float()
56 |     loss = weight_reduce_loss(
57 |         loss, weight=weight, reduction=reduction, avg_factor=avg_factor
58 |     )
59 | 
60 |     return loss
61 | 
62 | 
63 | @LOSSES.register_module(force=True)
64 | class MultiPosCrossEntropyLoss(nn.Module):
65 |     def __init__(self, reduction="mean", loss_weight=1.0, version="v3"):
66 |         super(MultiPosCrossEntropyLoss, self).__init__()
67 |         self.reduction = reduction
68 |         self.loss_weight = loss_weight
69 |         self.version = version
70 | 
71 |     def forward(
72 |         self,
73 |         cls_score,
74 |         label,
75 |         weight=None,
76 |         avg_factor=None,
77 |         reduction_override=None,
78 |         **kwargs
79 |     ):
80 |         assert cls_score.size() == label.size()
81 |         assert reduction_override in (None, "none", "mean", "sum")
82 |         reduction = reduction_override if reduction_override else self.reduction
83 |         loss_cls = self.loss_weight * multi_pos_cross_entropy(
84 |             cls_score,
85 |             label,
86 |             weight,
87 |             reduction=reduction,
88 |             avg_factor=avg_factor,
89 |             version=self.version,
90 |             **kwargs
91 |         )
92 |         return loss_cls
93 | 


--------------------------------------------------------------------------------
/teter/models/losses/unbiased_supcontrat.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | from mmdet.models import LOSSES
  6 | 
  7 | 
  8 | @LOSSES.register_module()
  9 | class UnbiasedSupConLoss(nn.Module):
 10 |     def __init__(
 11 |         self,
 12 |         temperature=0.07,
 13 |         contrast_mode="all",
 14 |         base_temperature=0.07,
 15 |         pos_normalize=True,
 16 |         loss_weight=1,
 17 |     ):
 18 |         super(UnbiasedSupConLoss, self).__init__()
 19 |         self.temperature = temperature
 20 |         self.contrast_mode = contrast_mode
 21 |         self.base_temperature = base_temperature
 22 |         self.pos_normalize = pos_normalize
 23 |         self.loss_weight = loss_weight
 24 | 
 25 |     def forward(self, features, labels=None, mask=None):
 26 |         """Compute loss for model. If both `labels` and `mask` are None,
 27 |         Args:
 28 |             features: hidden vector of shape [bsz, n_views, ...].
 29 |             labels: ground truth of shape [bsz].
 30 |             mask: contrastive mask of shape [bsz, bsz], mask_{i,j}=1 if sample j
 31 |                 has the same class as sample i. Can be asymmetric.
 32 |         Returns:
 33 |             A loss scalar.
 34 |         """
 35 |         device = torch.device("cuda") if features.is_cuda else torch.device("cpu")
 36 | 
 37 |         if len(features.shape) < 3:
 38 |             raise ValueError(
 39 |                 "`features` needs to be [bsz, n_views, ...],"
 40 |                 "at least 3 dimensions are required"
 41 |             )
 42 |         if len(features.shape) > 3:
 43 |             features = features.view(features.shape[0], features.shape[1], -1)
 44 | 
 45 |         batch_size = features.shape[0]
 46 |         if labels is not None and mask is not None:
 47 |             raise ValueError("Cannot define both `labels` and `mask`")
 48 |         elif labels is None and mask is None:
 49 |             mask = torch.eye(batch_size, dtype=torch.float32).to(device)
 50 |         elif labels is not None:
 51 |             labels = labels.contiguous().view(-1, 1)
 52 |             if labels.shape[0] != batch_size:
 53 |                 raise ValueError("Num of labels does not match num of features")
 54 |             mask = torch.eq(labels, labels.T).float().to(device)
 55 |             valid_mask = mask.sum(1) != 1
 56 |             labels = labels[valid_mask]
 57 |             features = features[valid_mask]
 58 |             mask = torch.eq(labels, labels.T).float().to(device)
 59 |             batch_size = features.shape[0]
 60 |             if batch_size == 0:
 61 |                 return torch.tensor([0.0], requires_grad=True)
 62 |         else:
 63 |             mask = mask.float().to(device)
 64 | 
 65 |         contrast_count = features.shape[1]
 66 |         contrast_feature = torch.cat(torch.unbind(features, dim=1), dim=0)
 67 |         if self.contrast_mode == "one":
 68 |             anchor_feature = features[:, 0]
 69 |             anchor_count = 1
 70 |         elif self.contrast_mode == "all":
 71 |             anchor_feature = contrast_feature
 72 |             anchor_count = contrast_count
 73 |         else:
 74 |             raise ValueError("Unknown mode: {}".format(self.contrast_mode))
 75 | 
 76 |         # compute logits
 77 |         anchor_dot_contrast = torch.div(
 78 |             torch.matmul(anchor_feature, contrast_feature.T), self.temperature
 79 |         )
 80 |         # for numerical stability
 81 |         if min(anchor_dot_contrast.shape) != 0:
 82 |             # return torch.tensor(0.0).to(anchor_dot_contrast.device)
 83 |             logits_max, _ = torch.max(anchor_dot_contrast, dim=1, keepdim=True)
 84 |             logits = anchor_dot_contrast - logits_max.detach()
 85 |         else:
 86 |             logits = anchor_dot_contrast
 87 |         # tile mask
 88 |         mask = mask.repeat(anchor_count, contrast_count)
 89 | 
 90 |         # mask-out self-contrast cases
 91 |         logits_mask = torch.scatter(
 92 |             torch.ones_like(mask),
 93 |             1,
 94 |             torch.arange(batch_size * anchor_count).view(-1, 1).to(device),
 95 |             0,
 96 |         )
 97 |         mask = mask * logits_mask
 98 | 
 99 |         # compute log_prob
100 |         if self.pos_normalize:
101 |             pos_norm = torch.div(mask, mask.sum(1).reshape(-1, 1))
102 |             exp_logits = (torch.exp(logits) * logits_mask) * pos_norm + (
103 |                 torch.exp(logits) * logits_mask
104 |             ) * torch.logical_not(mask)
105 |         else:
106 |             exp_logits = torch.exp(logits) * logits_mask
107 |         exp_logits_input = exp_logits.sum(1, keepdim=True)
108 |         log_prob = logits - torch.log(exp_logits_input)
109 | 
110 |         # compute mean of log-likelihood over positive
111 |         mean_log_prob_pos = (mask * log_prob).sum(1) / mask.sum(1)
112 | 
113 |         # loss
114 | 
115 |         loss = -(self.temperature / self.base_temperature) * mean_log_prob_pos
116 |         loss = loss.view(anchor_count, batch_size).mean()
117 | 
118 |         return loss * self.loss_weight
119 | 


--------------------------------------------------------------------------------
/teter/models/mot/__init__.py:
--------------------------------------------------------------------------------
1 | from .teter import TETer
2 | 
3 | __all__ = ["TETer"]
4 | 


--------------------------------------------------------------------------------
/teter/models/mot/teter.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | import numpy as np
  3 | from mmdet.core import bbox2result
  4 | from mmdet.models import TwoStageDetector
  5 | 
  6 | from teter.core import imshow_tracks, restore_result, track2result
  7 | from ..builder import MODELS, build_tracker
  8 | 
  9 | 
 10 | @MODELS.register_module()
 11 | class TETer(TwoStageDetector):
 12 |     def __init__(
 13 |         self,
 14 |         tracker=None,
 15 |         freeze_detector=False,
 16 |         freeze_cem=False,
 17 |         freeze_qd=False,
 18 |         method="teter",
 19 |         *args,
 20 |         **kwargs
 21 |     ):
 22 |         self.prepare_cfg(kwargs)
 23 |         super().__init__(*args, **kwargs)
 24 |         self.tracker_cfg = tracker
 25 |         self.method = method
 26 |         print(self.method)
 27 |         self.freeze_detector = freeze_detector
 28 |         self.freeze_cem = freeze_cem
 29 |         self.freeze_qd = freeze_qd
 30 |         if self.freeze_detector:
 31 |             self._freeze_detector()
 32 | 
 33 |     def _freeze_detector(self):
 34 | 
 35 |         self.detector = [
 36 |             self.backbone,
 37 |             self.neck,
 38 |             self.rpn_head,
 39 |             self.roi_head.bbox_head,
 40 |         ]
 41 |         if self.freeze_cem:
 42 |             self.detector.append(self.roi_head.cem_head)
 43 | 
 44 |         if self.freeze_qd:
 45 |             self.detector.append(self.roi_head.track_head)
 46 | 
 47 |         for model in self.detector:
 48 |             model.eval()
 49 |             for param in model.parameters():
 50 |                 param.requires_grad = False
 51 | 
 52 |     def prepare_cfg(self, kwargs):
 53 |         if kwargs.get("train_cfg", False):
 54 |             if kwargs["train_cfg"].get("embed", None):
 55 |                 kwargs["roi_head"]["track_train_cfg"] = kwargs["train_cfg"].get(
 56 |                     "embed", None
 57 |                 )
 58 |             if kwargs["train_cfg"].get("cem", None):
 59 |                 kwargs["roi_head"]["cem_train_cfg"] = kwargs["train_cfg"].get(
 60 |                     "cem", None
 61 |                 )
 62 | 
 63 |     def init_tracker(self):
 64 |         self.tracker = build_tracker(self.tracker_cfg)
 65 | 
 66 |     def forward_train(
 67 |         self,
 68 |         img,
 69 |         img_metas,
 70 |         gt_bboxes,
 71 |         gt_labels,
 72 |         gt_match_indices,
 73 |         ref_img,
 74 |         ref_img_metas,
 75 |         ref_gt_bboxes,
 76 |         ref_gt_labels,
 77 |         ref_gt_match_indices,
 78 |         gt_bboxes_ignore=None,
 79 |         gt_masks=None,
 80 |         ref_gt_bboxes_ignore=None,
 81 |         ref_gt_masks=None,
 82 |         **kwargs
 83 |     ):
 84 |         x = self.extract_feat(img)
 85 | 
 86 |         losses = dict()
 87 | 
 88 |         # RPN forward and loss
 89 |         proposal_cfg = self.train_cfg.get("rpn_proposal", self.test_cfg.rpn)
 90 |         rpn_losses, proposal_list = self.rpn_head.forward_train(
 91 |             x,
 92 |             img_metas,
 93 |             gt_bboxes,
 94 |             gt_labels=None,
 95 |             gt_bboxes_ignore=gt_bboxes_ignore,
 96 |             proposal_cfg=proposal_cfg,
 97 |         )
 98 |         losses.update(rpn_losses)
 99 | 
100 |         ref_x = self.extract_feat(ref_img)
101 |         ref_proposals = self.rpn_head.simple_test_rpn(ref_x, ref_img_metas)
102 | 
103 |         roi_losses = self.roi_head.forward_train(
104 |             x,
105 |             img_metas,
106 |             proposal_list,
107 |             gt_bboxes,
108 |             gt_labels,
109 |             gt_match_indices,
110 |             ref_x,
111 |             ref_img_metas,
112 |             ref_proposals,
113 |             ref_gt_bboxes,
114 |             ref_gt_labels,
115 |             gt_bboxes_ignore,
116 |             gt_masks,
117 |             ref_gt_bboxes_ignore,
118 |             **kwargs
119 |         )
120 |         losses.update(roi_losses)
121 | 
122 |         return losses
123 | 
124 |     def simple_test(self, img, img_metas, rescale=False):
125 | 
126 |         assert self.roi_head.with_track, "Track head must be implemented."
127 |         frame_id = img_metas[0].get("frame_id", -1)
128 |         if frame_id == 0:
129 |             self.init_tracker()
130 | 
131 |         x = self.extract_feat(img)
132 |         proposal_list = self.rpn_head.simple_test_rpn(x, img_metas)
133 | 
134 |         outputs = self.roi_head.simple_test(x, img_metas, proposal_list, rescale)
135 |         if len(outputs) == 4:
136 |             det_bboxes, det_labels, cem_feats, track_feats = outputs
137 |         elif len(outputs) == 3:
138 |             det_bboxes, det_labels, track_feats = outputs
139 |             cem_feats = copy.deepcopy(track_feats)
140 | 
141 |         if track_feats is not None:
142 | 
143 |             bboxes, labels, ids = self.tracker.match(
144 |                 bboxes=det_bboxes,
145 |                 labels=det_labels,
146 |                 embeds=track_feats,
147 |                 cls_embeds=cem_feats,
148 |                 frame_id=frame_id,
149 |                 method=self.method,
150 |             )
151 | 
152 |         bbox_result = bbox2result(
153 |             det_bboxes, det_labels, self.roi_head.bbox_head.num_classes
154 |         )
155 | 
156 |         if track_feats is not None:
157 |             track_result = track2result(
158 |                 bboxes, labels, ids, self.roi_head.bbox_head.num_classes
159 |             )
160 |         else:
161 |             track_result = [
162 |                 np.zeros((0, 6), dtype=np.float32)
163 |                 for i in range(self.roi_head.bbox_head.num_classes)
164 |             ]
165 |         return dict(bbox_results=bbox_result, track_results=track_result)
166 | 
167 |     def show_result(
168 |         self,
169 |         img,
170 |         result,
171 |         thickness=1,
172 |         font_scale=0.5,
173 |         show=False,
174 |         out_file=None,
175 |         wait_time=0,
176 |         backend="cv2",
177 |         **kwargs
178 |     ):
179 |         """Visualize tracking results.
180 | 
181 |         Args:
182 |             img (str | ndarray): Filename of loaded image.
183 |             result (dict): Tracking result.
184 |                 The value of key 'track_results' is ndarray with shape (n, 6)
185 |                 in [id, tl_x, tl_y, br_x, br_y, score] format.
186 |                 The value of key 'bbox_results' is ndarray with shape (n, 5)
187 |                 in [tl_x, tl_y, br_x, br_y, score] format.
188 |             thickness (int, optional): Thickness of lines. Defaults to 1.
189 |             font_scale (float, optional): Font scales of texts. Defaults
190 |                 to 0.5.
191 |             show (bool, optional): Whether show the visualizations on the
192 |                 fly. Defaults to False.
193 |             out_file (str | None, optional): Output filename. Defaults to None.
194 |             backend (str, optional): Backend to draw the bounding boxes,
195 |                 options are `cv2` and `plt`. Defaults to 'cv2'.
196 | 
197 |         Returns:
198 |             ndarray: Visualized image.
199 |         """
200 |         assert isinstance(result, dict)
201 |         track_result = result.get("track_results", None)
202 |         bboxes, labels, ids = restore_result(track_result, return_ids=True)
203 |         img = imshow_tracks(
204 |             img,
205 |             bboxes,
206 |             labels,
207 |             ids,
208 |             classes=self.CLASSES,
209 |             thickness=thickness,
210 |             font_scale=font_scale,
211 |             show=show,
212 |             out_file=out_file,
213 |             wait_time=wait_time,
214 |             backend=backend,
215 |         )
216 |         return img
217 | 


--------------------------------------------------------------------------------
/teter/models/roi_heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .teter_roi_head import TETerRoIHead
2 | from .track_heads import QuasiDenseEmbedHead
3 | 
4 | __all__ = ["QuasiDenseEmbedHead", "TETerRoIHead"]
5 | 


--------------------------------------------------------------------------------
/teter/models/roi_heads/track_heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .cem_head import ClsExemplarHead
2 | from .quasi_dense_embed_head import QuasiDenseEmbedHead
3 | 
4 | __all__ = ["QuasiDenseEmbedHead", "ClsExemplarHead"]
5 | 


--------------------------------------------------------------------------------
/teter/models/roi_heads/track_heads/cem_head.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import torch.nn as nn
  4 | from mmcv.cnn import ConvModule
  5 | from mmdet.models import HEADS, build_loss
  6 | 
  7 | from teter.core import cal_similarity
  8 | 
  9 | 
 10 | @HEADS.register_module(force=True)
 11 | class ClsExemplarHead(nn.Module):
 12 |     def __init__(
 13 |         self,
 14 |         num_convs=4,
 15 |         num_fcs=1,
 16 |         roi_feat_size=7,
 17 |         in_channels=256,
 18 |         conv_out_channels=256,
 19 |         fc_out_channels=1024,
 20 |         embed_channels=256,
 21 |         conv_cfg=None,
 22 |         norm_cfg=None,
 23 |         softmax_temp=-1,
 24 |         loss_track=dict(type="MultiPosCrossEntropyLoss", loss_weight=1),
 25 |     ):
 26 |         super(ClsExemplarHead, self).__init__()
 27 | 
 28 |         self.num_convs = num_convs
 29 |         self.num_fcs = num_fcs
 30 |         self.roi_feat_size = roi_feat_size
 31 |         self.in_channels = in_channels
 32 |         self.conv_out_channels = conv_out_channels
 33 |         self.fc_out_channels = fc_out_channels
 34 |         self.embed_channels = embed_channels
 35 |         self.conv_cfg = conv_cfg
 36 |         self.norm_cfg = norm_cfg
 37 |         self.relu = nn.ReLU(inplace=True)
 38 |         self.convs, self.fcs, last_layer_dim = self._add_conv_fc_branch(
 39 |             self.num_convs, self.num_fcs, self.in_channels
 40 |         )
 41 |         self.fc_embed = nn.Linear(last_layer_dim, embed_channels)
 42 | 
 43 |         self.softmax_temp = softmax_temp
 44 |         self.loss_track = build_loss(loss_track)
 45 | 
 46 |     def _add_conv_fc_branch(self, num_convs, num_fcs, in_channels):
 47 |         last_layer_dim = in_channels
 48 |         # add branch specific conv layers
 49 |         convs = nn.ModuleList()
 50 |         if num_convs > 0:
 51 |             for i in range(num_convs):
 52 |                 conv_in_channels = last_layer_dim if i == 0 else self.conv_out_channels
 53 |                 convs.append(
 54 |                     ConvModule(
 55 |                         conv_in_channels,
 56 |                         self.conv_out_channels,
 57 |                         3,
 58 |                         padding=1,
 59 |                         conv_cfg=self.conv_cfg,
 60 |                         norm_cfg=self.norm_cfg,
 61 |                     )
 62 |                 )
 63 |             last_layer_dim = self.conv_out_channels
 64 |         # add branch specific fc layers
 65 |         fcs = nn.ModuleList()
 66 |         if num_fcs > 0:
 67 |             last_layer_dim *= self.roi_feat_size * self.roi_feat_size
 68 |             for i in range(num_fcs):
 69 |                 fc_in_channels = last_layer_dim if i == 0 else self.fc_out_channels
 70 |                 fcs.append(nn.Linear(fc_in_channels, self.fc_out_channels))
 71 |             last_layer_dim = self.fc_out_channels
 72 |         return convs, fcs, last_layer_dim
 73 | 
 74 |     def init_weights(self):
 75 | 
 76 |         for m in self.fcs:
 77 |             if isinstance(m, nn.Linear):
 78 |                 nn.init.xavier_uniform_(m.weight)
 79 |                 nn.init.constant_(m.bias, 0)
 80 |         nn.init.normal_(self.fc_embed.weight, 0, 0.01)
 81 |         nn.init.constant_(self.fc_embed.bias, 0)
 82 | 
 83 |     def forward(self, x):
 84 | 
 85 |         if self.num_convs > 0:
 86 |             for i, conv in enumerate(self.convs):
 87 |                 x = conv(x)
 88 |         x = x.view(x.size(0), -1)
 89 |         if self.num_fcs > 0:
 90 |             for i, fc in enumerate(self.fcs):
 91 |                 x = self.relu(fc(x))
 92 |         x = self.fc_embed(x)
 93 | 
 94 |         return x
 95 | 
 96 |     def sup_contra_loss(self, features, labels):
 97 | 
 98 |         losses = dict()
 99 |         loss_track = self.loss_track(features, labels)
100 |         losses["loss_cem"] = loss_track
101 | 
102 |         return losses
103 | 


--------------------------------------------------------------------------------
/teter/models/roi_heads/track_heads/quasi_dense_embed_head.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import torch.nn as nn
  4 | from mmcv.cnn import ConvModule
  5 | from mmdet.models import HEADS, build_loss
  6 | 
  7 | from teter.core import cal_similarity
  8 | 
  9 | 
 10 | @HEADS.register_module(force=True)
 11 | class QuasiDenseEmbedHead(nn.Module):
 12 |     def __init__(
 13 |         self,
 14 |         num_convs=4,
 15 |         num_fcs=1,
 16 |         roi_feat_size=7,
 17 |         in_channels=256,
 18 |         conv_out_channels=256,
 19 |         fc_out_channels=1024,
 20 |         embed_channels=256,
 21 |         conv_cfg=None,
 22 |         norm_cfg=None,
 23 |         softmax_temp=-1,
 24 |         loss_track=dict(type="MultiPosCrossEntropyLoss", loss_weight=0.25),
 25 |         loss_track_aux=dict(
 26 |             type="L2Loss", sample_ratio=3, margin=0.3, loss_weight=1.0, hard_mining=True
 27 |         ),
 28 |     ):
 29 |         super(QuasiDenseEmbedHead, self).__init__()
 30 |         self.num_convs = num_convs
 31 |         self.num_fcs = num_fcs
 32 |         self.roi_feat_size = roi_feat_size
 33 |         self.in_channels = in_channels
 34 |         self.conv_out_channels = conv_out_channels
 35 |         self.fc_out_channels = fc_out_channels
 36 |         self.embed_channels = embed_channels
 37 |         self.conv_cfg = conv_cfg
 38 |         self.norm_cfg = norm_cfg
 39 |         self.relu = nn.ReLU(inplace=True)
 40 |         self.convs, self.fcs, last_layer_dim = self._add_conv_fc_branch(
 41 |             self.num_convs, self.num_fcs, self.in_channels
 42 |         )
 43 |         self.fc_embed = nn.Linear(last_layer_dim, embed_channels)
 44 | 
 45 |         self.softmax_temp = softmax_temp
 46 |         self.loss_track = build_loss(loss_track)
 47 |         if loss_track_aux is not None:
 48 |             self.loss_track_aux = build_loss(loss_track_aux)
 49 |         else:
 50 |             self.loss_track_aux = None
 51 | 
 52 |     def _add_conv_fc_branch(self, num_convs, num_fcs, in_channels):
 53 |         last_layer_dim = in_channels
 54 |         # add branch specific conv layers
 55 |         convs = nn.ModuleList()
 56 |         if num_convs > 0:
 57 |             for i in range(num_convs):
 58 |                 conv_in_channels = last_layer_dim if i == 0 else self.conv_out_channels
 59 |                 convs.append(
 60 |                     ConvModule(
 61 |                         conv_in_channels,
 62 |                         self.conv_out_channels,
 63 |                         3,
 64 |                         padding=1,
 65 |                         conv_cfg=self.conv_cfg,
 66 |                         norm_cfg=self.norm_cfg,
 67 |                     )
 68 |                 )
 69 |             last_layer_dim = self.conv_out_channels
 70 |         # add branch specific fc layers
 71 |         fcs = nn.ModuleList()
 72 |         if num_fcs > 0:
 73 |             last_layer_dim *= self.roi_feat_size * self.roi_feat_size
 74 |             for i in range(num_fcs):
 75 |                 fc_in_channels = last_layer_dim if i == 0 else self.fc_out_channels
 76 |                 fcs.append(nn.Linear(fc_in_channels, self.fc_out_channels))
 77 |             last_layer_dim = self.fc_out_channels
 78 |         return convs, fcs, last_layer_dim
 79 | 
 80 |     def init_weights(self):
 81 |         for m in self.fcs:
 82 |             if isinstance(m, nn.Linear):
 83 |                 nn.init.xavier_uniform_(m.weight)
 84 |                 nn.init.constant_(m.bias, 0)
 85 |         nn.init.normal_(self.fc_embed.weight, 0, 0.01)
 86 |         nn.init.constant_(self.fc_embed.bias, 0)
 87 | 
 88 |     def forward(self, x):
 89 |         if self.num_convs > 0:
 90 |             for i, conv in enumerate(self.convs):
 91 |                 x = conv(x)
 92 |         x = x.view(x.size(0), -1)
 93 |         if self.num_fcs > 0:
 94 |             for i, fc in enumerate(self.fcs):
 95 |                 x = self.relu(fc(x))
 96 |         x = self.fc_embed(x)
 97 |         return x
 98 | 
 99 |     def get_track_targets(
100 |         self, gt_match_indices, key_sampling_results, ref_sampling_results
101 |     ):
102 |         track_targets = []
103 |         track_weights = []
104 |         for _gt_match_indices, key_res, ref_res in zip(
105 |             gt_match_indices, key_sampling_results, ref_sampling_results
106 |         ):
107 |             targets = _gt_match_indices.new_zeros(
108 |                 (key_res.pos_bboxes.size(0), ref_res.bboxes.size(0)), dtype=torch.int
109 |             )
110 |             _match_indices = _gt_match_indices[key_res.pos_assigned_gt_inds]
111 |             pos2pos = (
112 |                 _match_indices.view(-1, 1) == ref_res.pos_assigned_gt_inds.view(1, -1)
113 |             ).int()
114 |             targets[:, : pos2pos.size(1)] = pos2pos
115 |             weights = (targets.sum(dim=1) > 0).float()
116 |             track_targets.append(targets)
117 |             track_weights.append(weights)
118 |         return track_targets, track_weights
119 | 
120 |     def match(self, key_embeds, ref_embeds, key_sampling_results, ref_sampling_results):
121 |         num_key_rois = [res.pos_bboxes.size(0) for res in key_sampling_results]
122 |         key_embeds = torch.split(key_embeds, num_key_rois)
123 |         num_ref_rois = [res.bboxes.size(0) for res in ref_sampling_results]
124 |         ref_embeds = torch.split(ref_embeds, num_ref_rois)
125 | 
126 |         dists, cos_dists = [], []
127 |         for key_embed, ref_embed in zip(key_embeds, ref_embeds):
128 |             dist = cal_similarity(
129 |                 key_embed,
130 |                 ref_embed,
131 |                 method="dot_product",
132 |                 temperature=self.softmax_temp,
133 |             )
134 |             dists.append(dist)
135 |             if self.loss_track_aux is not None:
136 |                 cos_dist = cal_similarity(key_embed, ref_embed, method="cosine")
137 |                 cos_dists.append(cos_dist)
138 |             else:
139 |                 cos_dists.append(None)
140 |         return dists, cos_dists
141 | 
142 |     def loss(self, dists, cos_dists, targets, weights):
143 |         losses = dict()
144 | 
145 |         loss_track = 0.0
146 |         loss_track_aux = 0.0
147 |         for _dists, _cos_dists, _targets, _weights in zip(
148 |             dists, cos_dists, targets, weights
149 |         ):
150 |             loss_track += self.loss_track(
151 |                 _dists, _targets, _weights, avg_factor=_weights.sum()
152 |             )
153 |             if self.loss_track_aux is not None:
154 |                 loss_track_aux += self.loss_track_aux(_cos_dists, _targets)
155 |         losses["loss_track"] = loss_track / len(dists)
156 | 
157 |         if self.loss_track_aux is not None:
158 |             losses["loss_track_aux"] = loss_track_aux / len(dists)
159 | 
160 |         return losses
161 | 
162 |     @staticmethod
163 |     def random_choice(gallery, num):
164 |         """Random select some elements from the gallery.
165 | 
166 |         It seems that Pytorch's implementation is slower than numpy so we use
167 |         numpy to randperm the indices.
168 |         """
169 |         assert len(gallery) >= num
170 |         if isinstance(gallery, list):
171 |             gallery = np.array(gallery)
172 |         cands = np.arange(len(gallery))
173 |         np.random.shuffle(cands)
174 |         rand_inds = cands[:num]
175 |         if not isinstance(gallery, np.ndarray):
176 |             rand_inds = torch.from_numpy(rand_inds).long().to(gallery.device)
177 |         return gallery[rand_inds]
178 | 


--------------------------------------------------------------------------------
/teter/models/trackers/__init__.py:
--------------------------------------------------------------------------------
1 | from .teter_bdd import TETerBDD
2 | from .teter_tao import TETerTAO
3 | 
4 | __all__ = ["TETerTAO", "TETerBDD"]
5 | 


--------------------------------------------------------------------------------
/teter/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .collect_env import collect_env
2 | from .logger import get_root_logger
3 | 
4 | __all__ = ["collect_env", "get_root_logger"]
5 | 


--------------------------------------------------------------------------------
/teter/utils/collect_env.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import mmcv
 3 | import os.path as osp
 4 | import subprocess
 5 | import sys
 6 | import torch
 7 | import torchvision
 8 | from collections import defaultdict
 9 | 
10 | import teter
11 | 
12 | 
13 | def collect_env():
14 |     env_info = {}
15 |     env_info["sys.platform"] = sys.platform
16 |     env_info["Python"] = sys.version.replace("\n", "")
17 | 
18 |     cuda_available = torch.cuda.is_available()
19 |     env_info["CUDA available"] = cuda_available
20 | 
21 |     if cuda_available:
22 |         from torch.utils.cpp_extension import CUDA_HOME
23 | 
24 |         env_info["CUDA_HOME"] = CUDA_HOME
25 | 
26 |         if CUDA_HOME is not None and osp.isdir(CUDA_HOME):
27 |             try:
28 |                 nvcc = osp.join(CUDA_HOME, "bin/nvcc")
29 |                 nvcc = subprocess.check_output(f'"{nvcc}" -V | tail -n1', shell=True)
30 |                 nvcc = nvcc.decode("utf-8").strip()
31 |             except subprocess.SubprocessError:
32 |                 nvcc = "Not Available"
33 |             env_info["NVCC"] = nvcc
34 | 
35 |         devices = defaultdict(list)
36 |         for k in range(torch.cuda.device_count()):
37 |             devices[torch.cuda.get_device_name(k)].append(str(k))
38 |         for name, devids in devices.items():
39 |             env_info["GPU " + ",".join(devids)] = name
40 | 
41 |     gcc = subprocess.check_output("gcc --version | head -n1", shell=True)
42 |     gcc = gcc.decode("utf-8").strip()
43 |     env_info["GCC"] = gcc
44 | 
45 |     env_info["PyTorch"] = torch.__version__
46 |     env_info["PyTorch compiling details"] = torch.__config__.show()
47 | 
48 |     env_info["TorchVision"] = torchvision.__version__
49 | 
50 |     env_info["OpenCV"] = cv2.__version__
51 | 
52 |     env_info["MMCV"] = mmcv.__version__
53 |     env_info["teter"] = teter.__version__
54 | 
55 |     return env_info
56 | 
57 | 
58 | if __name__ == "__main__":
59 |     for name, val in collect_env().items():
60 |         print(f"{name}: {val}")
61 | 


--------------------------------------------------------------------------------
/teter/utils/logger.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from mmcv.utils import get_logger
3 | 
4 | 
5 | def get_root_logger(log_file=None, log_level=logging.INFO):
6 |     return get_logger("teter", log_file, log_level)
7 | 


--------------------------------------------------------------------------------
/teter/version.py:
--------------------------------------------------------------------------------
1 | # GENERATED VERSION FILE
2 | __version__ = "dev-0.1.0"
3 | short_version = "0.1.0"
4 | version_info = (0, 1, 0)
5 | 


--------------------------------------------------------------------------------
/tools/convert_datasets/tao2coco.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os.path as osp
  3 | from collections import defaultdict
  4 | 
  5 | import mmcv
  6 | from tao.toolkit.tao import Tao
  7 | from tqdm import tqdm
  8 | 
  9 | 
 10 | def parse_args():
 11 |     parser = argparse.ArgumentParser(
 12 |         description='Make annotation files for TAO')
 13 |     parser.add_argument('-t', '--tao', help='path of TAO json file')
 14 |     parser.add_argument(
 15 |         '--filter-classes',
 16 |         action='store_true',
 17 |         help='whether filter 1230 classes to 482.')
 18 |     return parser.parse_args()
 19 | 
 20 | 
 21 | def get_classes(tao_path, filter_classes=True):
 22 |     train = mmcv.load(osp.join(tao_path, 'train.json'))
 23 | 
 24 |     train_classes = list(set([_['category_id'] for _ in train['annotations']]))
 25 |     print(f'TAO train set contains {len(train_classes)} categories.')
 26 | 
 27 |     val = mmcv.load(osp.join(tao_path, 'validation.json'))
 28 |     val_classes = list(set([_['category_id'] for _ in val['annotations']]))
 29 |     print(f'TAO val set contains {len(val_classes)} categories.')
 30 | 
 31 |     test = mmcv.load(osp.join(tao_path, 'test_categories.json'))
 32 |     test_classes = list(set([_['id'] for _ in test['categories']]))
 33 |     print(f'TAO test set contains {len(test_classes)} categories.')
 34 | 
 35 |     tao_classes = set(train_classes + val_classes + test_classes)
 36 |     print(f'TAO totally contains {len(tao_classes)} categories.')
 37 | 
 38 |     tao_classes = [_ for _ in train['categories'] if _['id'] in tao_classes]
 39 | 
 40 |     with open(osp.join(tao_path, 'tao_classes.txt'), 'wt') as f:
 41 |         for c in tao_classes:
 42 |             name = c['name']
 43 |             f.writelines(f'{name}\n')
 44 | 
 45 |     if filter_classes:
 46 |         return tao_classes
 47 |     else:
 48 |         return train['categories']
 49 | 
 50 | 
 51 | def convert_tao(file, classes):
 52 |     tao = Tao(file)
 53 |     raw = mmcv.load(file)
 54 | 
 55 |     out = defaultdict(list)
 56 |     out['tracks'] = raw['tracks'].copy()
 57 |     out['info'] = raw['info'].copy()
 58 |     out['licenses'] = raw['licenses'].copy()
 59 |     out['categories'] = classes
 60 | 
 61 |     for video in tqdm(raw['videos']):
 62 |         img_infos = tao.vid_img_map[video['id']]
 63 |         img_infos = sorted(img_infos, key=lambda x: x['frame_index'])
 64 |         frame_range = img_infos[1]['frame_index'] - img_infos[0]['frame_index']
 65 |         video['frame_range'] = frame_range
 66 |         out['videos'].append(video)
 67 |         for i, img_info in enumerate(img_infos):
 68 |             img_info['frame_id'] = i
 69 |             img_info['neg_category_ids'] = video['neg_category_ids']
 70 |             img_info['not_exhaustive_category_ids'] = video[
 71 |                 'not_exhaustive_category_ids']
 72 |             out['images'].append(img_info)
 73 |             ann_infos = tao.img_ann_map[img_info['id']]
 74 |             for ann_info in ann_infos:
 75 |                 ann_info['instance_id'] = ann_info['track_id']
 76 |                 out['annotations'].append(ann_info)
 77 | 
 78 |     assert len(out['videos']) == len(raw['videos'])
 79 |     assert len(out['images']) == len(raw['images'])
 80 |     assert len(out['annotations']) == len(raw['annotations'])
 81 |     return out
 82 | 
 83 | 
 84 | def main():
 85 |     args = parse_args()
 86 | 
 87 |     classes = get_classes(args.tao, args.filter_classes)
 88 |     print(f'convert with {len(classes)} classes')
 89 | 
 90 |     for file in [
 91 |             'train.json', 'validation.json', 'test_without_annotations.json'
 92 |     ]:
 93 |         print(f'convert {file}')
 94 |         out = convert_tao(osp.join(args.tao, file), classes)
 95 |         c = '_482' if args.filter_classes else ''
 96 |         prefix = file.split('.')[0].split('_')[0]
 97 |         out_file = f'{prefix}{c}_ours.json'
 98 |         mmcv.dump(out, osp.join(args.tao, out_file))
 99 | 
100 | 
101 | if __name__ == '__main__':
102 |     main()
103 | 


--------------------------------------------------------------------------------
/tools/dist_test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CONFIG=$1
 4 | CHECKPOINT=$2
 5 | GPUS=$3
 6 | PORT=$4
 7 | if [ -z "$4" ]; then
 8 |     PORT=33333
 9 | fi
10 | 
11 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
12 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
13 |     $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:5}
14 | 


--------------------------------------------------------------------------------
/tools/dist_train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CONFIG=$1
 4 | GPUS=$2
 5 | PORT=$3
 6 | 
 7 | if [ -z "$3" ]; then
 8 |     PORT=29533
 9 | fi
10 | 
11 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
12 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
13 |     $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:4}
14 | 


--------------------------------------------------------------------------------
/tools/slurm_test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | PARTITION=$1
 6 | JOB_NAME=$2
 7 | CONFIG=$3
 8 | CHECKPOINT=$4
 9 | GPUS=${GPUS:-8}
10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8}
11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5}
12 | PY_ARGS=${@:5}
13 | SRUN_ARGS=${SRUN_ARGS:-""}
14 | 
15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
16 | srun -p ${PARTITION} \
17 |     --job-name=${JOB_NAME} \
18 |     --gres=gpu:${GPUS_PER_NODE} \
19 |     --ntasks=${GPUS} \
20 |     --ntasks-per-node=${GPUS_PER_NODE} \
21 |     --cpus-per-task=${CPUS_PER_TASK} \
22 |     --kill-on-bad-exit=1 \
23 |     ${SRUN_ARGS} \
24 |     python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS}
25 | 


--------------------------------------------------------------------------------
/tools/slurm_train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | PARTITION=$1
 6 | JOB_NAME=$2
 7 | CONFIG=$3
 8 | WORK_DIR=$4
 9 | GPUS=${GPUS:-8}
10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8}
11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5}
12 | SRUN_ARGS=${SRUN_ARGS:-""}
13 | PY_ARGS=${@:5}
14 | 
15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
16 | srun -p ${PARTITION} \
17 |     --job-name=${JOB_NAME} \
18 |     --gres=gpu:${GPUS_PER_NODE} \
19 |     --ntasks=${GPUS} \
20 |     --ntasks-per-node=${GPUS_PER_NODE} \
21 |     --cpus-per-task=${CPUS_PER_TASK} \
22 |     --kill-on-bad-exit=1 \
23 |     ${SRUN_ARGS} \
24 |     python -u tools/train.py ${CONFIG} --work-dir=${WORK_DIR} --launcher="slurm" ${PY_ARGS}
25 | 


--------------------------------------------------------------------------------
/tools/test.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | 
  4 | import mmcv
  5 | import torch
  6 | from mmcv import Config, DictAction
  7 | from mmcv.cnn import fuse_conv_bn
  8 | from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
  9 | from mmcv.runner import get_dist_info, init_dist, load_checkpoint
 10 | # from mmdet.core import wrap_fp16_model
 11 | from mmdet.datasets import build_dataset
 12 | 
 13 | 
 14 | def parse_args():
 15 |     parser = argparse.ArgumentParser(description='teter test model')
 16 |     parser.add_argument('config', help='test config file path')
 17 |     parser.add_argument('checkpoint', help='checkpoint file')
 18 |     parser.add_argument('--out', help='output result file')
 19 |     parser.add_argument(
 20 |         '--fuse-conv-bn',
 21 |         action='store_true',
 22 |         help='Whether to fuse conv and bn, this will slightly increase'
 23 |         'the inference speed')
 24 |     parser.add_argument(
 25 |         '--format-only',
 26 |         action='store_true',
 27 |         help='Format the output results without perform evaluation. It is'
 28 |         'useful when you want to format the result to a specific format and '
 29 |         'submit it to the test server')
 30 |     parser.add_argument('--eval', type=str, nargs='+', help='eval types')
 31 |     parser.add_argument('--show', action='store_true', help='show results')
 32 |     parser.add_argument(
 33 |         '--show-dir', help='directory where painted images will be saved')
 34 |     parser.add_argument(
 35 |         '--gpu-collect',
 36 |         action='store_true',
 37 |         help='whether to use gpu to collect results.')
 38 |     parser.add_argument(
 39 |         '--tmpdir',
 40 |         help='tmp directory used for collecting results from multiple '
 41 |         'workers, available when gpu-collect is not specified')
 42 |     parser.add_argument(
 43 |         '--show_score_thr', default=0.3, type=float, help='output result file')
 44 |     parser.add_argument(
 45 |         '--cfg-options',
 46 |         nargs='+',
 47 |         action=DictAction,
 48 |         help='override some settings in the used config, the key-value pair '
 49 |         'in xxx=yyy format will be merged into config file.')
 50 |     parser.add_argument(
 51 |         '--eval-options',
 52 |         nargs='+',
 53 |         action=DictAction,
 54 |         help='custom options for evaluation, the key-value pair in xxx=yyy '
 55 |         'format will be kwargs for dataset.evaluate() function')
 56 |     parser.add_argument(
 57 |         '--launcher',
 58 |         choices=['none', 'pytorch', 'slurm', 'mpi'],
 59 |         default='none',
 60 |         help='job launcher')
 61 |     parser.add_argument('--local_rank', type=int, default=0)
 62 |     args = parser.parse_args()
 63 |     if 'LOCAL_RANK' not in os.environ:
 64 |         os.environ['LOCAL_RANK'] = str(args.local_rank)
 65 |     return args
 66 | 
 67 | 
 68 | def main():
 69 |     args = parse_args()
 70 | 
 71 |     assert args.out or args.eval or args.format_only or args.show \
 72 |         or args.show_dir, \
 73 |         ('Please specify at least one operation (save/eval/format/show the '
 74 |          'results / save the results) with the argument "--out", "--eval"'
 75 |          ', "--format-only", "--show" or "--show-dir"')
 76 | 
 77 |     if args.eval and args.format_only:
 78 |         raise ValueError('--eval and --format_only cannot be both specified')
 79 | 
 80 |     if args.out is not None and not args.out.endswith(('.pkl', '.pickle')):
 81 |         raise ValueError('The output file must be a pkl file.')
 82 | 
 83 |     cfg = Config.fromfile(args.config)
 84 | 
 85 |     if args.cfg_options is not None:
 86 |         cfg.merge_from_dict(args.cfg_options)
 87 | 
 88 |     if cfg.get('USE_MMDET', False):
 89 |         from mmdet.apis import multi_gpu_test, single_gpu_test
 90 |         from mmdet.models import build_detector as build_model
 91 |         from mmdet.datasets import build_dataloader
 92 |     else:
 93 |         from teter.apis import multi_gpu_test, single_gpu_test
 94 |         from teter.models import build_model
 95 |         from teter.datasets import build_dataloader
 96 | 
 97 |     # set cudnn_benchmark
 98 |     if cfg.get('cudnn_benchmark', False):
 99 |         torch.backends.cudnn.benchmark = True
100 |     cfg.model.pretrained = None
101 |     cfg.data.test.test_mode = True
102 | 
103 |     # init distributed env first, since logger depends on the dist info.
104 |     if args.launcher == 'none':
105 |         distributed = False
106 |     else:
107 |         distributed = True
108 |         init_dist(args.launcher, **cfg.dist_params)
109 | 
110 |     # build the dataloader
111 |     dataset = build_dataset(cfg.data.test)
112 |     data_loader = build_dataloader(
113 |         dataset,
114 |         samples_per_gpu=1,
115 |         workers_per_gpu=cfg.data.workers_per_gpu,
116 |         dist=distributed,
117 |         shuffle=False)
118 | 
119 |     # build the model and load checkpoint
120 |     model = build_model(cfg.model, train_cfg=None, test_cfg=None)
121 |     # fp16_cfg = cfg.get('fp16', None)
122 |     # if fp16_cfg is not None:
123 |     #     wrap_fp16_model(model)
124 |     checkpoint = load_checkpoint(model, args.checkpoint, map_location='cpu')
125 | 
126 |     if args.fuse_conv_bn:
127 |         model = fuse_conv_bn(model)
128 | 
129 |     if 'CLASSES' in checkpoint['meta']:
130 |         model.CLASSES = checkpoint['meta']['CLASSES']
131 |     else:
132 |         model.CLASSES = dataset.CLASSES
133 | 
134 |     if not distributed:
135 |         model = MMDataParallel(model, device_ids=[0])
136 |         outputs = single_gpu_test(model, data_loader, args.show, args.show_dir,
137 |                                   args.show_score_thr)
138 |     else:
139 |         model = MMDistributedDataParallel(
140 |             model.cuda(),
141 |             device_ids=[torch.cuda.current_device()],
142 |             broadcast_buffers=False)
143 |         outputs = multi_gpu_test(model, data_loader, args.tmpdir,
144 |                                  args.gpu_collect)
145 | 
146 |     rank, _ = get_dist_info()
147 |     if rank == 0:
148 |         if args.out:
149 |             print(f'\nwriting results to {args.out}')
150 |             mmcv.dump(outputs, args.out)
151 |         kwargs = {} if args.eval_options is None else args.eval_options
152 |         if args.format_only:
153 |             dataset.format_results(outputs, **kwargs)
154 |         if args.eval:
155 |             eval_kwargs = cfg.get('evaluation', {}).copy()
156 |             # hard-code way to remove EvalHook args
157 |             for key in ['interval', 'tmpdir', 'start', 'gpu_collect']:
158 |                 eval_kwargs.pop(key, None)
159 |             eval_kwargs.update(dict(metric=args.eval, **kwargs))
160 |             print(dataset.evaluate(outputs, **eval_kwargs))
161 | 
162 | 
163 | if __name__ == '__main__':
164 |     main()
165 | 


--------------------------------------------------------------------------------
/tools/to_bdd100k.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os
 3 | 
 4 | import mmcv
 5 | from mmcv import Config, DictAction
 6 | from mmdet.datasets import build_dataset
 7 | from teter.core.to_bdd100k import preds2bdd100k
 8 | 
 9 | 
10 | def parse_args():
11 |     parser = argparse.ArgumentParser(description='teter test model')
12 |     parser.add_argument('config', help='test config file path')
13 |     parser.add_argument('--res', help='output result file')
14 |     parser.add_argument(
15 |         '--bdd-dir',
16 |         type=str,
17 |         help='path to the folder that will contain files in bdd100k format')
18 |     parser.add_argument(
19 |         '--coco-file',
20 |         type=str,
21 |         help='path to that json file that is in COCO submission format')
22 |     parser.add_argument(
23 |         '--task',
24 |         type=str,
25 |         nargs='+',
26 |         help='task types',
27 |         choices=['det', 'ins_seg', 'box_track', 'seg_track'])
28 |     parser.add_argument(
29 |         '--nproc',
30 |         type=int,
31 |         help='number of process for mask merging')
32 |     parser.add_argument(
33 |         '--cfg-options',
34 |         nargs='+',
35 |         action=DictAction,
36 |         help='override some settings in the used config, the key-value pair '
37 |         'in xxx=yyy format will be merged into config file.')
38 |     args = parser.parse_args()
39 |     return args
40 | 
41 | 
42 | def main():
43 |     args = parse_args()
44 | 
45 |     if not os.path.isfile(args.res):
46 |         raise ValueError('The result file does not exist.')
47 | 
48 |     cfg = Config.fromfile(args.config)
49 | 
50 |     if args.cfg_options is not None:
51 |         cfg.merge_from_dict(args.cfg_options)
52 | 
53 |     if cfg.get('USE_MMDET', False):
54 |         from mmdet.datasets import build_dataloader
55 |     else:
56 |         from teter.datasets import build_dataloader
57 | 
58 |     # build the dataloader
59 |     cfg.data.test.test_mode = True
60 |     dataset = build_dataset(cfg.data.test)
61 | 
62 |     print(f'\nLoading results from {args.res}')
63 |     results = mmcv.load(args.res)
64 | 
65 |     if args.coco_file:
66 |         dataset.format_results(results, jsonfile_prefix=args.coco_file)
67 |     if args.bdd_dir:
68 |         preds2bdd100k(
69 |             dataset, results, args.task, out_base=args.bdd_dir, nproc=args.nproc)
70 | 
71 | if __name__ == '__main__':
72 |     main()
73 | 


--------------------------------------------------------------------------------
/tools/train.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import copy
  3 | import os
  4 | import os.path as osp
  5 | import time
  6 | 
  7 | import mmcv
  8 | import torch
  9 | from mmcv import Config, DictAction
 10 | from mmcv.runner import init_dist
 11 | from mmdet.apis import set_random_seed
 12 | from mmdet.datasets import build_dataset
 13 | 
 14 | from teter import __version__
 15 | from teter.utils import collect_env, get_root_logger
 16 | 
 17 | 
 18 | def parse_args():
 19 |     parser = argparse.ArgumentParser(description='Train a model')
 20 |     parser.add_argument('config', help='train config file path')
 21 |     parser.add_argument('--work-dir', help='the dir to save logs and models')
 22 |     parser.add_argument(
 23 |         '--resume-from', help='the checkpoint file to resume from')
 24 |     parser.add_argument(
 25 |         '--no-validate',
 26 |         action='store_true',
 27 |         help='whether not to evaluate the checkpoint during training')
 28 |     group_gpus = parser.add_mutually_exclusive_group()
 29 |     group_gpus.add_argument(
 30 |         '--gpus',
 31 |         type=int,
 32 |         help='number of gpus to use '
 33 |         '(only applicable to non-distributed training)')
 34 |     group_gpus.add_argument(
 35 |         '--gpu-ids',
 36 |         type=int,
 37 |         nargs='+',
 38 |         help='ids of gpus to use '
 39 |         '(only applicable to non-distributed training)')
 40 |     parser.add_argument('--seed', type=int, default=None, help='random seed')
 41 |     parser.add_argument(
 42 |         '--deterministic',
 43 |         action='store_true',
 44 |         help='whether to set deterministic options for CUDNN backend.')
 45 |     parser.add_argument(
 46 |         '--cfg-options',
 47 |         nargs='+',
 48 |         action=DictAction,
 49 |         help='override some settings in the used config, the key-value pair '
 50 |         'in xxx=yyy format will be merged into config file.')
 51 |     parser.add_argument(
 52 |         '--launcher',
 53 |         choices=['none', 'pytorch', 'slurm', 'mpi'],
 54 |         default='none',
 55 |         help='job launcher')
 56 |     parser.add_argument('--local_rank', type=int, default=0)
 57 |     args = parser.parse_args()
 58 |     if 'LOCAL_RANK' not in os.environ:
 59 |         os.environ['LOCAL_RANK'] = str(args.local_rank)
 60 | 
 61 |     return args
 62 | 
 63 | 
 64 | def main():
 65 |     args = parse_args()
 66 | 
 67 |     cfg = Config.fromfile(args.config)
 68 | 
 69 |     if args.cfg_options is not None:
 70 |         cfg.merge_from_dict(args.cfg_options)
 71 | 
 72 |     if cfg.get('USE_MMDET', False):
 73 |         from mmdet.apis import train_detector as train_model
 74 |         from mmdet.models import build_detector as build_model
 75 |     else:
 76 |         from teter.apis import train_model
 77 |         from teter.models import build_model
 78 | 
 79 |     # set cudnn_benchmark
 80 |     if cfg.get('cudnn_benchmark', False):
 81 |         torch.backends.cudnn.benchmark = True
 82 | 
 83 |     # work_dir is determined in this priority: CLI > segment in file > filename
 84 |     if args.work_dir is not None:
 85 |         # update configs according to CLI args if args.work_dir is not None
 86 |         cfg.work_dir = args.work_dir
 87 |     elif cfg.get('work_dir', None) is None:
 88 |         # use config filename as default work_dir if cfg.work_dir is None
 89 |         cfg.work_dir = osp.join('./work_dirs',
 90 |                                 osp.splitext(osp.basename(args.config))[0])
 91 |     if args.resume_from is not None:
 92 |         cfg.resume_from = args.resume_from
 93 |     if args.gpu_ids is not None:
 94 |         cfg.gpu_ids = args.gpu_ids
 95 |     else:
 96 |         cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus)
 97 | 
 98 |     # init distributed env first, since logger depends on the dist info.
 99 |     if args.launcher == 'none':
100 |         distributed = False
101 |     else:
102 |         distributed = True
103 |         init_dist(args.launcher, **cfg.dist_params)
104 | 
105 |     # create work_dir
106 |     mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
107 |     # dump config
108 |     cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config)))
109 |     # init the logger before other steps
110 |     timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())
111 |     log_file = osp.join(cfg.work_dir, f'{timestamp}.log')
112 |     logger = get_root_logger(log_file=log_file, log_level=cfg.log_level)
113 | 
114 |     # init the meta dict to record some important information such as
115 |     # environment info and seed, which will be logged
116 |     meta = dict()
117 |     # log env info
118 |     env_info_dict = collect_env()
119 |     env_info = '\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()])
120 |     dash_line = '-' * 60 + '\n'
121 |     logger.info('Environment info:\n' + dash_line + env_info + '\n' +
122 |                 dash_line)
123 |     meta['env_info'] = env_info
124 | 
125 |     # log some basic info
126 |     logger.info(f'Distributed training: {distributed}')
127 |     logger.info(f'Config:\n{cfg.pretty_text}')
128 | 
129 |     # set random seeds
130 |     if args.seed is not None:
131 |         logger.info(f'Set random seed to {args.seed}, '
132 |                     f'deterministic: {args.deterministic}')
133 |         set_random_seed(args.seed, deterministic=args.deterministic)
134 |     cfg.seed = args.seed
135 |     meta['seed'] = args.seed
136 | 
137 |     model = build_model(
138 |         cfg.model,
139 |         train_cfg=cfg.get('train_cfg'),
140 |         test_cfg=cfg.get('test_cfg'))
141 |     model.init_weights()
142 | 
143 |     datasets = [build_dataset(cfg.data.train)]
144 |     if len(cfg.workflow) == 2:
145 |         val_dataset = copy.deepcopy(cfg.data.val)
146 |         val_dataset.pipeline = cfg.data.train.pipeline
147 |         datasets.append(build_dataset(val_dataset))
148 |     if cfg.checkpoint_config is not None:
149 |         # save teter version, config file content and class names in
150 |         # checkpoints as meta data
151 |         cfg.checkpoint_config.meta = dict(
152 |             qdtrack_version=__version__,
153 |             config=cfg.pretty_text,
154 |             CLASSES=datasets[0].CLASSES)
155 |     # add an attribute for visualization convenience
156 |     model.CLASSES = datasets[0].CLASSES
157 |     train_model(
158 |         model,
159 |         datasets,
160 |         cfg,
161 |         distributed=distributed,
162 |         validate=(not args.no_validate),
163 |         timestamp=timestamp,
164 |         meta=meta)
165 | 
166 | 
167 | if __name__ == '__main__':
168 |     main()
169 | 


--------------------------------------------------------------------------------