├── LICENSE ├── README.md ├── configs ├── _base_ │ ├── faster_rcnn_r50_fpn.py │ └── qdtrack_faster_rcnn_r50_fpn.py ├── bdd100k │ └── cem_bdd.py └── tao │ ├── cem_r101_lvis.py │ ├── cem_swinB_lvis.py │ ├── cem_swinL_lvis.py │ ├── cem_swinS_lvis.py │ ├── cem_swinT_lvis.py │ ├── tracker_r101_tao.py │ ├── tracker_swinB_tao.py │ ├── tracker_swinL_tao.py │ ├── tracker_swinS_tao.py │ └── tracker_swinT_tao.py ├── docs ├── GET_STARTED.md └── INSTALL.md ├── figures ├── teaser-teter.png └── teta-teaser.png ├── requirements.txt ├── setup.cfg ├── setup.py ├── teta ├── LICENSE ├── README.md ├── docs │ └── TAO-format.txt ├── figures │ ├── figure_1.png │ └── teta-teaser.png ├── requirements.txt ├── scripts │ ├── run_coco.py │ └── run_tao.py ├── setup.py └── teta │ ├── __init__.py │ ├── _timing.py │ ├── config.py │ ├── datasets │ ├── __init__.py │ ├── _base_dataset.py │ ├── bdd.py │ ├── bdd_mots.py │ ├── coco.py │ ├── coco_mots.py │ └── tao.py │ ├── eval.py │ ├── metrics │ ├── __init__.py │ ├── _base_metric.py │ └── teta.py │ └── utils.py ├── teter ├── VERSION ├── __init__.py ├── apis │ ├── __init__.py │ ├── inference.py │ ├── test.py │ └── train.py ├── core │ ├── __init__.py │ ├── evaluation │ │ ├── __init__.py │ │ ├── box_track.toml │ │ ├── eval_hooks.py │ │ └── mot.py │ ├── to_bdd100k │ │ ├── __init__.py │ │ ├── transforms.py │ │ └── utils.py │ ├── track │ │ ├── __init__.py │ │ ├── similarity.py │ │ └── transforms.py │ └── utils │ │ ├── __init__.py │ │ └── visualization.py ├── datasets │ ├── __init__.py │ ├── bdd_video_dataset.py │ ├── builder.py │ ├── coco_video_dataset.py │ ├── parsers │ │ ├── __init__.py │ │ ├── coco_api.py │ │ └── coco_video_parser.py │ ├── pipelines │ │ ├── __init__.py │ │ ├── formatting.py │ │ ├── h5backend.py │ │ ├── loading.py │ │ └── transforms.py │ ├── samplers │ │ ├── __init__.py │ │ └── distributed_video_sampler.py │ └── tao_dataset.py ├── models │ ├── __init__.py │ ├── builder.py │ ├── losses │ │ ├── __init__.py │ │ ├── l2_loss.py │ │ ├── multipos_cross_entropy_loss.py │ │ └── unbiased_supcontrat.py │ ├── mot │ │ ├── __init__.py │ │ └── teter.py │ ├── roi_heads │ │ ├── __init__.py │ │ ├── teter_roi_head.py │ │ └── track_heads │ │ │ ├── __init__.py │ │ │ ├── cem_head.py │ │ │ └── quasi_dense_embed_head.py │ └── trackers │ │ ├── __init__.py │ │ ├── teter_bdd.py │ │ └── teter_tao.py ├── utils │ ├── __init__.py │ ├── collect_env.py │ └── logger.py └── version.py └── tools ├── convert_datasets └── tao2coco.py ├── dist_test.sh ├── dist_train.sh ├── slurm_test.sh ├── slurm_train.sh ├── test.py ├── to_bdd100k.py └── train.py /configs/_base_/faster_rcnn_r50_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='FasterRCNN', 4 | backbone=dict( 5 | type='ResNet', 6 | depth=50, 7 | num_stages=4, 8 | out_indices=(0, 1, 2, 3), 9 | frozen_stages=1, 10 | norm_cfg=dict(type='BN', requires_grad=True), 11 | norm_eval=True, 12 | style='pytorch', 13 | init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), 14 | neck=dict( 15 | type='FPN', 16 | in_channels=[256, 512, 1024, 2048], 17 | out_channels=256, 18 | num_outs=5), 19 | rpn_head=dict( 20 | type='RPNHead', 21 | in_channels=256, 22 | feat_channels=256, 23 | anchor_generator=dict( 24 | type='AnchorGenerator', 25 | scales=[8], 26 | ratios=[0.5, 1.0, 2.0], 27 | strides=[4, 8, 16, 32, 64]), 28 | bbox_coder=dict( 29 | type='DeltaXYWHBBoxCoder', 30 | target_means=[.0, .0, .0, .0], 31 | target_stds=[1.0, 1.0, 1.0, 1.0]), 32 | loss_cls=dict( 33 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 34 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 35 | roi_head=dict( 36 | type='StandardRoIHead', 37 | bbox_roi_extractor=dict( 38 | type='SingleRoIExtractor', 39 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 40 | out_channels=256, 41 | featmap_strides=[4, 8, 16, 32]), 42 | bbox_head=dict( 43 | type='Shared2FCBBoxHead', 44 | in_channels=256, 45 | fc_out_channels=1024, 46 | roi_feat_size=7, 47 | num_classes=80, 48 | bbox_coder=dict( 49 | type='DeltaXYWHBBoxCoder', 50 | target_means=[0., 0., 0., 0.], 51 | target_stds=[0.1, 0.1, 0.2, 0.2]), 52 | reg_class_agnostic=False, 53 | loss_cls=dict( 54 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 55 | loss_bbox=dict(type='L1Loss', loss_weight=1.0))), 56 | # model training and testing settings 57 | train_cfg=dict( 58 | rpn=dict( 59 | assigner=dict( 60 | type='MaxIoUAssigner', 61 | pos_iou_thr=0.7, 62 | neg_iou_thr=0.3, 63 | min_pos_iou=0.3, 64 | match_low_quality=True, 65 | ignore_iof_thr=-1), 66 | sampler=dict( 67 | type='RandomSampler', 68 | num=256, 69 | pos_fraction=0.5, 70 | neg_pos_ub=-1, 71 | add_gt_as_proposals=False), 72 | allowed_border=-1, 73 | pos_weight=-1, 74 | debug=False), 75 | rpn_proposal=dict( 76 | nms_pre=2000, 77 | max_per_img=1000, 78 | nms=dict(type='nms', iou_threshold=0.7), 79 | min_bbox_size=0), 80 | rcnn=dict( 81 | assigner=dict( 82 | type='MaxIoUAssigner', 83 | pos_iou_thr=0.5, 84 | neg_iou_thr=0.5, 85 | min_pos_iou=0.5, 86 | match_low_quality=False, 87 | ignore_iof_thr=-1), 88 | sampler=dict( 89 | type='RandomSampler', 90 | num=512, 91 | pos_fraction=0.25, 92 | neg_pos_ub=-1, 93 | add_gt_as_proposals=True), 94 | pos_weight=-1, 95 | debug=False)), 96 | test_cfg=dict( 97 | rpn=dict( 98 | nms_pre=1000, 99 | max_per_img=1000, 100 | nms=dict(type='nms', iou_threshold=0.7), 101 | min_bbox_size=0), 102 | rcnn=dict( 103 | score_thr=0.05, 104 | nms=dict(type='nms', iou_threshold=0.5), 105 | max_per_img=100) 106 | # soft-nms is also supported for rcnn testing 107 | # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05) 108 | )) 109 | -------------------------------------------------------------------------------- /configs/_base_/qdtrack_faster_rcnn_r50_fpn.py: -------------------------------------------------------------------------------- 1 | _base_ = './faster_rcnn_r50_fpn.py' 2 | model = dict( 3 | type='QDTrack', 4 | rpn_head=dict( 5 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)), 6 | roi_head=dict( 7 | type='QuasiDenseRoIHead', 8 | track_roi_extractor=dict( 9 | type='SingleRoIExtractor', 10 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 11 | out_channels=256, 12 | featmap_strides=[4, 8, 16, 32]), 13 | track_head=dict( 14 | type='QuasiDenseEmbedHead', 15 | num_convs=4, 16 | num_fcs=1, 17 | embed_channels=256, 18 | norm_cfg=dict(type='GN', num_groups=32), 19 | loss_track=dict(type='MultiPosCrossEntropyLoss', loss_weight=0.25), 20 | loss_track_aux=dict( 21 | type='L2Loss', 22 | neg_pos_ub=3, 23 | pos_margin=0, 24 | neg_margin=0.1, 25 | hard_mining=True, 26 | loss_weight=1.0))), 27 | train_cfg=dict( 28 | embed=dict( 29 | assigner=dict( 30 | type='MaxIoUAssigner', 31 | pos_iou_thr=0.7, 32 | neg_iou_thr=0.3, 33 | min_pos_iou=0.5, 34 | match_low_quality=False, 35 | ignore_iof_thr=-1), 36 | sampler=dict( 37 | type='CombinedSampler', 38 | num=256, 39 | pos_fraction=0.5, 40 | neg_pos_ub=3, 41 | add_gt_as_proposals=True, 42 | pos_sampler=dict(type='InstanceBalancedPosSampler'), 43 | neg_sampler=dict(type='RandomSampler'))))) -------------------------------------------------------------------------------- /configs/bdd100k/cem_bdd.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | _base_ = '../_base_/qdtrack_faster_rcnn_r50_fpn.py' 3 | model = dict( 4 | type='TETer', 5 | freeze_detector=True, 6 | freeze_qd = True, 7 | method='teter', 8 | roi_head=dict( 9 | type='TETerRoIHead', 10 | finetune_cem=True, 11 | bbox_head=dict(num_classes=8), 12 | cem_roi_extractor=dict( 13 | type='SingleRoIExtractor', 14 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 15 | out_channels=256, 16 | featmap_strides=[4, 8, 16, 32]), 17 | cem_head=dict( 18 | type='ClsExemplarHead', 19 | num_convs=4, 20 | num_fcs=3, 21 | embed_channels=256, 22 | norm_cfg=dict(type='GN', num_groups=32), 23 | loss_track=dict(type='UnbiasedSupConLoss', temperature=0.07, contrast_mode='all', 24 | pos_normalize=True, 25 | loss_weight=0.25) 26 | , softmax_temp=-1), 27 | 28 | track_head=dict( 29 | type='QuasiDenseEmbedHead', 30 | num_convs=4, 31 | num_fcs=1, 32 | embed_channels=256, 33 | norm_cfg=dict(type='GN', num_groups=32), 34 | loss_track=dict(type='MultiPosCrossEntropyLoss', loss_weight=0.25), 35 | loss_track_aux=dict( 36 | type='L2Loss', 37 | neg_pos_ub=3, 38 | pos_margin=0, 39 | neg_margin=0.1, 40 | hard_mining=True, 41 | loss_weight=1.0)) 42 | ), 43 | tracker=dict( 44 | type='TETerBDD', 45 | init_score_thr=0.7, 46 | obj_score_thr=0.3, 47 | match_score_thr=0.5, 48 | memo_tracklet_frames=10, 49 | memo_backdrop_frames=1, 50 | memo_momentum=0.8, 51 | nms_conf_thr=0.5, 52 | nms_backdrop_iou_thr=0.3, 53 | nms_class_iou_thr=0.7, 54 | contrastive_thr = 0.5, 55 | match_metric='bisoftmax'), 56 | 57 | # model training and testing settings 58 | train_cfg=dict( 59 | embed=dict( 60 | sampler=dict( 61 | type='CombinedSampler', 62 | num=256, 63 | pos_fraction=0.5, 64 | neg_pos_ub=3, 65 | add_gt_as_proposals=True, 66 | pos_sampler=dict(type='InstanceBalancedPosSampler'), 67 | neg_sampler=dict( 68 | type='IoUBalancedNegSampler', 69 | floor_thr=-1, 70 | floor_fraction=0, 71 | num_bins=3))))) 72 | # dataset settings 73 | dataset_type = 'BDDVideoDataset' 74 | data_root = 'data/bdd/bdd100k/' 75 | ann_root = 'data/bdd/' 76 | img_norm_cfg = dict( 77 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 78 | train_pipeline = [ 79 | dict(type='LoadMultiImagesFromFile'), 80 | # comment above line and comment out the lines below if use hdf5 file. 81 | # dict(type='LoadMultiImagesFromFile', 82 | # file_client_args=dict( 83 | # img_db_path= 'data/bdd/hdf5s/100k_train.hdf5', 84 | # # vid_db_path='data/bdd/hdf5s/track_train.hdf5', 85 | # backend='hdf5', 86 | # type='bdd')), 87 | dict(type='SeqLoadAnnotations', with_bbox=True, with_ins_id=True), 88 | dict( 89 | type='SeqResize', 90 | img_scale=[(1296, 640), (1296, 672), (1296, 704), (1296, 736), 91 | (1296, 768), (1296, 800), (1296, 720)], 92 | share_params=False, 93 | multiscale_mode='value', 94 | keep_ratio=True), 95 | dict(type='SeqRandomFlip', share_params=False, flip_ratio=0.5), 96 | dict(type='SeqNormalize', **img_norm_cfg), 97 | dict(type='SeqPad', size_divisor=32), 98 | dict(type='SeqDefaultFormatBundle'), 99 | dict( 100 | type='SeqCollect', 101 | keys=['img', 'gt_bboxes', 'gt_labels', 'gt_match_indices'], 102 | ref_prefix='ref'), 103 | ] 104 | test_pipeline = [ 105 | dict(type='LoadImageFromFile'), 106 | # comment above line and comment out the lines below if use hdf5 file. 107 | # dict(type='LoadImageFromFile', 108 | # file_client_args=dict( 109 | # vid_db_path='data/bdd/hdf5s/track_val.hdf5', 110 | # backend='hdf5', 111 | # type='bdd')), 112 | dict( 113 | type='MultiScaleFlipAug', 114 | img_scale=(1296, 720), 115 | flip=False, 116 | transforms=[ 117 | dict(type='Resize', keep_ratio=True), 118 | dict(type='RandomFlip'), 119 | dict(type='Normalize', **img_norm_cfg), 120 | dict(type='Pad', size_divisor=32), 121 | dict(type='ImageToTensor', keys=['img']), 122 | dict(type='VideoCollect', keys=['img']) 123 | ]) 124 | ] 125 | data = dict( 126 | samples_per_gpu=16, 127 | workers_per_gpu=2, 128 | train=[ 129 | dict( 130 | type=dataset_type, 131 | load_as_video=False, 132 | ann_file=ann_root + 133 | 'annotations/det_20/det_train_cocofmt.json', 134 | img_prefix=data_root + 'images/100k/train/', 135 | pipeline=train_pipeline) 136 | ], 137 | val=dict( 138 | type=dataset_type, 139 | ann_file=ann_root + 140 | 'annotations/box_track_20/box_track_val_cocofmt.json', 141 | scalabel_gt = ann_root + 'annotations/scalabel_gt/box_track_20/val/', 142 | img_prefix=data_root + 'images/track/val/', 143 | pipeline=test_pipeline), 144 | test=dict( 145 | type=dataset_type, 146 | ann_file=ann_root + 147 | 'annotations/box_track_20/box_track_val_cocofmt.json', 148 | scalabel_gt=ann_root + 'annotations/scalabel_gt/box_track_20/val/', 149 | img_prefix=data_root + 'images/track/val/', 150 | pipeline=test_pipeline)) 151 | # optimizer 152 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 153 | optimizer_config = dict(grad_clip=None) 154 | # learning policy 155 | lr_config = dict( 156 | policy='step', 157 | warmup='linear', 158 | warmup_iters=1000, 159 | warmup_ratio=1.0 / 1000, 160 | step=[8, 11]) 161 | # checkpoint savingp 162 | checkpoint_config = dict(interval=1) 163 | # yapf:disable 164 | log_config = dict( 165 | interval=50, 166 | hooks=[ 167 | dict(type='TextLoggerHook'), 168 | # dict(type='TensorboardLoggerHook') 169 | ]) 170 | # yapf:enable 171 | # runtime settings 172 | total_epochs = 12 173 | dist_params = dict(backend='nccl') 174 | log_level = 'INFO' 175 | load_from = None 176 | resume_from = None 177 | workflow = [('train', 1)] 178 | evaluation = dict(metric=['bbox', 'track'], interval=1) 179 | -------------------------------------------------------------------------------- /configs/tao/cem_r101_lvis.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | _base_ = '../_base_/qdtrack_faster_rcnn_r50_fpn.py' 3 | model = dict( 4 | type='TETer', 5 | freeze_detector=False, 6 | backbone=dict( 7 | depth=101, 8 | init_cfg=dict(type='Pretrained', 9 | checkpoint='torchvision://resnet101')), 10 | roi_head=dict( 11 | type='TETerRoIHead', 12 | bbox_head=dict(num_classes=1230), 13 | cem_roi_extractor=dict( 14 | type='SingleRoIExtractor', 15 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 16 | out_channels=256, 17 | featmap_strides=[4, 8, 16, 32]), 18 | cem_head=dict( 19 | type='ClsExemplarHead', 20 | num_convs=4, 21 | num_fcs=3, 22 | embed_channels=1230, 23 | norm_cfg=dict(type='GN', num_groups=32), 24 | loss_track=dict(type='UnbiasedSupConLoss', 25 | temperature=0.07, 26 | contrast_mode='all', 27 | pos_normalize=True, 28 | loss_weight=0.25) 29 | , softmax_temp=-1), 30 | 31 | track_head=dict( 32 | type='QuasiDenseEmbedHead', 33 | num_convs=4, 34 | num_fcs=1, 35 | embed_channels=256, 36 | norm_cfg=dict(type='GN', num_groups=32), 37 | loss_track=dict(type='MultiPosCrossEntropyLoss', 38 | loss_weight=0.25, 39 | version='unbiased'), 40 | loss_track_aux=dict( 41 | type='L2Loss', 42 | neg_pos_ub=3, 43 | pos_margin=0, 44 | neg_margin=0.1, 45 | hard_mining=True, 46 | loss_weight=1.0)) 47 | ), 48 | 49 | tracker=dict( 50 | type='TETerTAO', 51 | init_score_thr=0.0001, 52 | obj_score_thr=0.0001, 53 | match_score_thr=0.5, 54 | memo_frames=10, 55 | momentum_embed=0.8, 56 | momentum_obj_score=0.5, 57 | match_metric='bisoftmax', 58 | match_with_cosine=True, 59 | contrastive_thr=0.5), 60 | 61 | train_cfg=dict( 62 | cem=dict( 63 | assigner=dict( 64 | type='MaxIoUAssigner', 65 | pos_iou_thr=0.7, 66 | neg_iou_thr=0.3, 67 | min_pos_iou=0.5, 68 | match_low_quality=False, 69 | ignore_iof_thr=-1), 70 | sampler=dict( 71 | type='CombinedSampler', 72 | num=256, 73 | pos_fraction=1, 74 | neg_pos_ub=0, 75 | add_gt_as_proposals=True, 76 | pos_sampler=dict(type='InstanceBalancedPosSampler'), 77 | neg_sampler=dict(type='RandomSampler')) 78 | ) 79 | ), 80 | 81 | test_cfg=dict( 82 | rcnn=dict( 83 | score_thr=0.0001, 84 | nms=dict(type='nms', iou_threshold=0.5), 85 | max_per_img=300) 86 | ) 87 | ) 88 | # dataset settings 89 | img_norm_cfg = dict( 90 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 91 | train_pipeline = [ 92 | dict(type='LoadMultiImagesFromFile'), 93 | # comment above line and comment out the lines below if use hdf5 file. 94 | # dict( 95 | # type='LoadMultiImagesFromFile', 96 | # file_client_args=dict( 97 | # img_db_path='data/lvis/train_imgs.hdf5', 98 | # backend='hdf5', 99 | # type='lvis')), 100 | dict(type='SeqLoadAnnotations', with_bbox=True, with_ins_id=True), 101 | dict( 102 | type='SeqResize', 103 | img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736), 104 | (1333, 768), (1333, 800)], 105 | share_params=False, 106 | multiscale_mode='value', 107 | keep_ratio=True), 108 | dict(type='SeqRandomFlip', share_params=False, flip_ratio=0.5), 109 | dict(type='SeqNormalize', **img_norm_cfg), 110 | dict(type='SeqPad', size_divisor=32), 111 | dict(type='SeqDefaultFormatBundle'), 112 | dict( 113 | type='SeqCollect', 114 | keys=['img', 'gt_bboxes', 'gt_labels', 'gt_match_indices'], 115 | ref_prefix='ref'), 116 | ] 117 | 118 | test_pipeline = [ 119 | dict(type='LoadImageFromFile'), 120 | # comment above line and comment out the lines below if use hdf5 file. 121 | # dict(type='LoadImageFromFile', 122 | # file_client_args=dict( 123 | # img_db_path='data/tao/tao_val_imgs.hdf5', 124 | # backend='hdf5', 125 | # type='tao')), 126 | dict( 127 | type='MultiScaleFlipAug', 128 | img_scale=(1333, 800), 129 | flip=False, 130 | transforms=[ 131 | dict(type='Resize', keep_ratio=True), 132 | dict(type='RandomFlip'), 133 | dict(type='Normalize', **img_norm_cfg), 134 | dict(type='Pad', size_divisor=32), 135 | dict(type='ImageToTensor', keys=['img']), 136 | dict(type='VideoCollect', keys=['img']) 137 | ]) 138 | ] 139 | 140 | dataset_type = 'TaoDataset' 141 | data = dict( 142 | samples_per_gpu=2, 143 | workers_per_gpu=2, 144 | train=dict( 145 | _delete_=True, 146 | type='ClassBalancedDataset', 147 | oversample_thr=1e-3, 148 | dataset=dict( 149 | type=dataset_type, 150 | classes='data/lvis/annotations/lvis_classes.txt', 151 | load_as_video=False, 152 | ann_file='data/lvis/annotations/lvisv0.5+coco_train.json', 153 | img_prefix='data/lvis/train2017/', 154 | key_img_sampler=dict(interval=1), 155 | ref_img_sampler=dict(num_ref_imgs=1, scope=1, method='uniform'), 156 | pipeline=train_pipeline) 157 | ), 158 | val=dict( 159 | type=dataset_type, 160 | classes='data/lvis/annotations/lvis_classes.txt', 161 | ann_file='data/tao/annotations/validation_ours.json', 162 | img_prefix='data/tao/frames/', 163 | ref_img_sampler=None, 164 | pipeline=test_pipeline), 165 | test=dict( 166 | type=dataset_type, 167 | classes='data/lvis/annotations/lvis_classes.txt', 168 | ann_file='data/tao/annotations/validation_ours.json', 169 | img_prefix='data/tao/frames/', 170 | ref_img_sampler=None, 171 | pipeline=test_pipeline) 172 | 173 | ) 174 | 175 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 176 | optimizer_config = dict(grad_clip=None) 177 | # learning policy 178 | lr_config = dict( 179 | policy='step', 180 | warmup='linear', 181 | warmup_iters=1000, 182 | warmup_ratio=1.0 / 1000, 183 | step=[16, 22]) 184 | total_epochs = 24 185 | 186 | # checkpoint saving 187 | checkpoint_config = dict(interval=1) 188 | # yapf:disable 189 | log_config = dict( 190 | interval=50, 191 | hooks=[ 192 | dict(type='TextLoggerHook'), 193 | # dict(type='TensorboardLoggerHook') 194 | ]) 195 | 196 | dist_params = dict(backend='nccl') 197 | log_level = 'INFO' 198 | load_from = None 199 | resume_from = None 200 | workflow = [('train', 1)] 201 | evaluation = dict(metric=['bbox'], start=1, interval=1, resfile_path='/scratch/cem_lvis/') 202 | work_dir = './saved_models/cem_lvis/' 203 | -------------------------------------------------------------------------------- /configs/tao/cem_swinB_lvis.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | _base_ = '../_base_/qdtrack_faster_rcnn_r50_fpn.py' 3 | pretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window12_384_22k.pth' # noqa 4 | model = dict( 5 | type='TETer', 6 | freeze_detector=False, 7 | backbone=dict( 8 | _delete_=True, 9 | type='SwinTransformer', 10 | embed_dims=128, 11 | depths=[2, 2, 18, 2], 12 | num_heads=[4, 8, 16, 32], 13 | window_size=12, 14 | mlp_ratio=4, 15 | qkv_bias=True, 16 | qk_scale=None, 17 | drop_rate=0., 18 | attn_drop_rate=0., 19 | drop_path_rate=0.2, 20 | patch_norm=True, 21 | out_indices=(0, 1, 2, 3), 22 | with_cp=False, 23 | convert_weights=True, 24 | init_cfg=dict(type='Pretrained', checkpoint=pretrained)), 25 | neck=dict(in_channels=[128, 256, 512, 1024]), 26 | roi_head=dict( 27 | type='TETerRoIHead', 28 | bbox_head=dict(num_classes=1230), 29 | cem_roi_extractor=dict( 30 | type='SingleRoIExtractor', 31 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 32 | out_channels=256, 33 | featmap_strides=[4, 8, 16, 32]), 34 | cem_head=dict( 35 | type='ClsExemplarHead', 36 | num_convs=4, 37 | num_fcs=3, 38 | embed_channels=1230, 39 | norm_cfg=dict(type='GN', num_groups=32), 40 | loss_track=dict(type='UnbiasedSupConLoss', 41 | temperature=0.07, 42 | contrast_mode='all', 43 | pos_normalize=True, 44 | loss_weight=0.25) 45 | , softmax_temp=-1), 46 | 47 | track_head=dict( 48 | type='QuasiDenseEmbedHead', 49 | num_convs=4, 50 | num_fcs=1, 51 | embed_channels=256, 52 | norm_cfg=dict(type='GN', num_groups=32), 53 | loss_track=dict(type='MultiPosCrossEntropyLoss', 54 | loss_weight=0.25, 55 | version='unbiased'), 56 | loss_track_aux=dict( 57 | type='L2Loss', 58 | neg_pos_ub=3, 59 | pos_margin=0, 60 | neg_margin=0.1, 61 | hard_mining=True, 62 | loss_weight=1.0)) 63 | ), 64 | tracker=dict( 65 | type='TETerTAO', 66 | init_score_thr=0.0001, 67 | obj_score_thr=0.0001, 68 | match_score_thr=0.5, 69 | memo_frames=10, 70 | momentum_embed=0.8, 71 | momentum_obj_score=0.5, 72 | match_metric='bisoftmax', 73 | match_with_cosine=True, 74 | contrastive_thr=0.5, 75 | ), 76 | train_cfg=dict( 77 | cem=dict( 78 | assigner=dict( 79 | type='MaxIoUAssigner', 80 | pos_iou_thr=0.7, 81 | neg_iou_thr=0.3, 82 | min_pos_iou=0.5, 83 | match_low_quality=False, 84 | ignore_iof_thr=-1), 85 | sampler=dict( 86 | type='CombinedSampler', 87 | num=256, 88 | pos_fraction=1, 89 | neg_pos_ub=0, 90 | add_gt_as_proposals=True, 91 | pos_sampler=dict(type='InstanceBalancedPosSampler'), 92 | neg_sampler=dict(type='RandomSampler')) 93 | ) 94 | ), 95 | 96 | test_cfg=dict( 97 | rcnn=dict( 98 | score_thr=0.0001, 99 | nms=dict(type='nms', iou_threshold=0.5), 100 | max_per_img=300) 101 | ) 102 | ) 103 | # dataset settings 104 | img_norm_cfg = dict( 105 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 106 | train_pipeline = [ 107 | dict(type='LoadMultiImagesFromFile'), 108 | # comment above line and comment out the lines below if use hdf5 file. 109 | # dict( 110 | # type='LoadMultiImagesFromFile', 111 | # file_client_args=dict( 112 | # img_db_path='data/lvis/train_imgs.hdf5', 113 | # backend='hdf5', 114 | # type='lvis')), 115 | dict(type='SeqLoadAnnotations', with_bbox=True, with_ins_id=True), 116 | dict( 117 | type='SeqResize', 118 | img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736), 119 | (1333, 768), (1333, 800)], 120 | share_params=False, 121 | multiscale_mode='value', 122 | keep_ratio=True), 123 | dict(type='SeqRandomFlip', share_params=False, flip_ratio=0.5), 124 | dict(type='SeqNormalize', **img_norm_cfg), 125 | dict(type='SeqPad', size_divisor=32), 126 | dict(type='SeqDefaultFormatBundle'), 127 | dict( 128 | type='SeqCollect', 129 | keys=['img', 'gt_bboxes', 'gt_labels', 'gt_match_indices'], 130 | ref_prefix='ref'), 131 | ] 132 | 133 | test_pipeline = [ 134 | dict(type='LoadImageFromFile'), 135 | # dict(type='LoadImageFromFile', 136 | # file_client_args=dict( 137 | # img_db_path='data/tao/tao_val_imgs.hdf5', 138 | # backend='hdf5', 139 | # type='tao')), 140 | dict( 141 | type='MultiScaleFlipAug', 142 | img_scale=(1333, 800), 143 | flip=False, 144 | transforms=[ 145 | dict(type='Resize', keep_ratio=True), 146 | dict(type='RandomFlip'), 147 | dict(type='Normalize', **img_norm_cfg), 148 | dict(type='Pad', size_divisor=32), 149 | dict(type='ImageToTensor', keys=['img']), 150 | dict(type='VideoCollect', keys=['img']) 151 | ]) 152 | ] 153 | 154 | ## dataset settings 155 | dataset_type = 'TaoDataset' 156 | data = dict( 157 | samples_per_gpu=2, 158 | workers_per_gpu=2, 159 | train=dict( 160 | _delete_=True, 161 | type='ClassBalancedDataset', 162 | oversample_thr=1e-3, 163 | dataset=dict( 164 | type=dataset_type, 165 | classes='data/lvis/annotations/lvis_classes.txt', 166 | load_as_video=False, 167 | ann_file='data/lvis/annotations/lvisv0.5+coco_train.json', 168 | img_prefix='data/lvis/train2017/', 169 | key_img_sampler=dict(interval=1), 170 | ref_img_sampler=dict(num_ref_imgs=1, scope=1, method='uniform'), 171 | pipeline=train_pipeline) 172 | ), 173 | val=dict( 174 | type=dataset_type, 175 | classes='data/lvis/annotations/lvis_classes.txt', 176 | ann_file='data/tao/annotations/validation_ours.json', 177 | img_prefix='data/tao/frames/', 178 | ref_img_sampler=None, 179 | pipeline=test_pipeline), 180 | test=dict( 181 | type=dataset_type, 182 | classes='data/lvis/annotations/lvis_classes.txt', 183 | ann_file='data/tao/annotations/validation_ours.json', 184 | img_prefix='data/tao/frames/', 185 | ref_img_sampler=None, 186 | pipeline=test_pipeline) 187 | 188 | ) 189 | # optimizer 190 | optimizer = dict( 191 | # _delete_=True, 192 | type='AdamW', 193 | lr=0.0001, 194 | betas=(0.9, 0.999), 195 | weight_decay=0.05, 196 | paramwise_cfg=dict( 197 | custom_keys={ 198 | 'absolute_pos_embed': dict(decay_mult=0.), 199 | 'relative_position_bias_table': dict(decay_mult=0.), 200 | 'norm': dict(decay_mult=0.) 201 | })) 202 | optimizer_config = dict(grad_clip=None) 203 | # learning policy 204 | lr_config = dict( 205 | policy='step', 206 | warmup='linear', 207 | warmup_iters=1000, 208 | warmup_ratio=0.001, 209 | step=[27, 33]) 210 | runner = dict(type='EpochBasedRunner', max_epochs=36) 211 | 212 | 213 | # checkpoint saving 214 | checkpoint_config = dict(interval=1) 215 | # yapf:disable 216 | log_config = dict( 217 | interval=50, 218 | hooks=[ 219 | dict(type='TextLoggerHook'), 220 | ]) 221 | # yapf:enable 222 | # runtime settings 223 | total_epochs = 36 224 | dist_params = dict(backend='nccl') 225 | log_level = 'INFO' 226 | load_from = None 227 | resume_from = None 228 | workflow = [('train', 1)] 229 | evaluation = dict(metric=['bbox'], start=2, interval=2) 230 | -------------------------------------------------------------------------------- /configs/tao/cem_swinL_lvis.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | _base_ = '../_base_/qdtrack_faster_rcnn_r50_fpn.py' 3 | pretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_large_patch4_window12_384_22k.pth' # noqa 4 | model = dict( 5 | type='TETer', 6 | freeze_detector=False, 7 | backbone=dict( 8 | _delete_=True, 9 | type='SwinTransformer', 10 | embed_dims=192, 11 | depths=[2, 2, 18, 2], 12 | num_heads=[6, 12, 24, 48], 13 | window_size=12, 14 | mlp_ratio=4, 15 | qkv_bias=True, 16 | qk_scale=None, 17 | drop_rate=0., 18 | attn_drop_rate=0., 19 | drop_path_rate=0.2, 20 | patch_norm=True, 21 | out_indices=(0, 1, 2, 3), 22 | with_cp=False, 23 | convert_weights=True, 24 | init_cfg=dict(type='Pretrained', checkpoint=pretrained)), 25 | neck=dict(in_channels=[192, 384, 768, 1536]), 26 | roi_head=dict( 27 | type='TETerRoIHead', 28 | bbox_head=dict(num_classes=1230), 29 | cem_roi_extractor=dict( 30 | type='SingleRoIExtractor', 31 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 32 | out_channels=256, 33 | featmap_strides=[4, 8, 16, 32]), 34 | cem_head=dict( 35 | type='ClsExemplarHead', 36 | num_convs=4, 37 | num_fcs=3, 38 | embed_channels=1230, 39 | norm_cfg=dict(type='GN', num_groups=32), 40 | loss_track=dict(type='UnbiasedSupConLoss', 41 | temperature=0.07, 42 | contrast_mode='all', 43 | pos_normalize=True, 44 | loss_weight=0.25) 45 | , softmax_temp=-1), 46 | 47 | track_head=dict( 48 | type='QuasiDenseEmbedHead', 49 | num_convs=4, 50 | num_fcs=1, 51 | embed_channels=256, 52 | norm_cfg=dict(type='GN', num_groups=32), 53 | loss_track=dict(type='MultiPosCrossEntropyLoss', 54 | loss_weight=0.25, 55 | version='unbiased'), 56 | loss_track_aux=dict( 57 | type='L2Loss', 58 | neg_pos_ub=3, 59 | pos_margin=0, 60 | neg_margin=0.1, 61 | hard_mining=True, 62 | loss_weight=1.0)) 63 | ), 64 | tracker=dict( 65 | type='TETerTAO', 66 | init_score_thr=0.0001, 67 | obj_score_thr=0.0001, 68 | match_score_thr=0.5, 69 | memo_frames=10, 70 | momentum_embed=0.8, 71 | momentum_obj_score=0.5, 72 | match_metric='bisoftmax', 73 | match_with_cosine=True, 74 | contrastive_thr=0.5, 75 | ), 76 | train_cfg=dict( 77 | cem=dict( 78 | assigner=dict( 79 | type='MaxIoUAssigner', 80 | pos_iou_thr=0.7, 81 | neg_iou_thr=0.3, 82 | min_pos_iou=0.5, 83 | match_low_quality=False, 84 | ignore_iof_thr=-1), 85 | sampler=dict( 86 | type='CombinedSampler', 87 | num=256, 88 | pos_fraction=1, 89 | neg_pos_ub=0, 90 | add_gt_as_proposals=True, 91 | pos_sampler=dict(type='InstanceBalancedPosSampler'), 92 | neg_sampler=dict(type='RandomSampler')) 93 | ) 94 | ), 95 | 96 | test_cfg=dict( 97 | rcnn=dict( 98 | score_thr=0.0001, 99 | nms=dict(type='nms', iou_threshold=0.5), 100 | max_per_img=300) 101 | ) 102 | ) 103 | # dataset settings 104 | img_norm_cfg = dict( 105 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 106 | train_pipeline = [ 107 | dict(type='LoadMultiImagesFromFile'), 108 | # comment above line and comment out the lines below if use hdf5 file. 109 | # dict( 110 | # type='LoadMultiImagesFromFile', 111 | # file_client_args=dict( 112 | # img_db_path='data/lvis/train_imgs.hdf5', 113 | # backend='hdf5', 114 | # type='lvis')), 115 | dict(type='SeqLoadAnnotations', with_bbox=True, with_ins_id=True), 116 | dict( 117 | type='SeqResize', 118 | img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736), 119 | (1333, 768), (1333, 800)], 120 | share_params=False, 121 | multiscale_mode='value', 122 | keep_ratio=True), 123 | dict(type='SeqRandomFlip', share_params=False, flip_ratio=0.5), 124 | dict(type='SeqNormalize', **img_norm_cfg), 125 | dict(type='SeqPad', size_divisor=32), 126 | dict(type='SeqDefaultFormatBundle'), 127 | dict( 128 | type='SeqCollect', 129 | keys=['img', 'gt_bboxes', 'gt_labels', 'gt_match_indices'], 130 | ref_prefix='ref'), 131 | ] 132 | 133 | test_pipeline = [ 134 | dict(type='LoadImageFromFile'), 135 | # dict(type='LoadImageFromFile', 136 | # file_client_args=dict( 137 | # img_db_path='data/tao/tao_val_imgs.hdf5', 138 | # backend='hdf5', 139 | # type='tao')), 140 | dict( 141 | type='MultiScaleFlipAug', 142 | img_scale=(1333, 800), 143 | flip=False, 144 | transforms=[ 145 | dict(type='Resize', keep_ratio=True), 146 | dict(type='RandomFlip'), 147 | dict(type='Normalize', **img_norm_cfg), 148 | dict(type='Pad', size_divisor=32), 149 | dict(type='ImageToTensor', keys=['img']), 150 | dict(type='VideoCollect', keys=['img']) 151 | ]) 152 | ] 153 | 154 | ## dataset settings 155 | dataset_type = 'TaoDataset' 156 | data = dict( 157 | samples_per_gpu=2, 158 | workers_per_gpu=2, 159 | train=dict( 160 | _delete_=True, 161 | type='ClassBalancedDataset', 162 | oversample_thr=1e-3, 163 | dataset=dict( 164 | type=dataset_type, 165 | classes='data/lvis/annotations/lvis_classes.txt', 166 | load_as_video=False, 167 | ann_file='data/lvis/annotations/lvisv0.5+coco_train.json', 168 | img_prefix='data/lvis/train2017/', 169 | key_img_sampler=dict(interval=1), 170 | ref_img_sampler=dict(num_ref_imgs=1, scope=1, method='uniform'), 171 | pipeline=train_pipeline) 172 | ), 173 | val=dict( 174 | type=dataset_type, 175 | classes='data/lvis/annotations/lvis_classes.txt', 176 | ann_file='data/tao/annotations/validation_ours.json', 177 | img_prefix='data/tao/frames/', 178 | ref_img_sampler=None, 179 | pipeline=test_pipeline), 180 | test=dict( 181 | type=dataset_type, 182 | classes='data/lvis/annotations/lvis_classes.txt', 183 | ann_file='data/tao/annotations/validation_ours.json', 184 | img_prefix='data/tao/frames/', 185 | ref_img_sampler=None, 186 | pipeline=test_pipeline) 187 | 188 | ) 189 | # optimizer 190 | optimizer = dict( 191 | # _delete_=True, 192 | type='AdamW', 193 | lr=0.0001, 194 | betas=(0.9, 0.999), 195 | weight_decay=0.05, 196 | paramwise_cfg=dict( 197 | custom_keys={ 198 | 'absolute_pos_embed': dict(decay_mult=0.), 199 | 'relative_position_bias_table': dict(decay_mult=0.), 200 | 'norm': dict(decay_mult=0.) 201 | })) 202 | optimizer_config = dict(grad_clip=None) 203 | # learning policy 204 | lr_config = dict( 205 | policy='step', 206 | warmup='linear', 207 | warmup_iters=1000, 208 | warmup_ratio=0.001, 209 | step=[27, 33]) 210 | runner = dict(type='EpochBasedRunner', max_epochs=36) 211 | 212 | 213 | # checkpoint saving 214 | checkpoint_config = dict(interval=1) 215 | # yapf:disable 216 | log_config = dict( 217 | interval=50, 218 | hooks=[ 219 | dict(type='TextLoggerHook'), 220 | ]) 221 | # yapf:enable 222 | # runtime settings 223 | total_epochs = 36 224 | dist_params = dict(backend='nccl') 225 | log_level = 'INFO' 226 | load_from = None 227 | resume_from = None 228 | workflow = [('train', 1)] 229 | evaluation = dict(metric=['bbox'], start=2, interval=2) 230 | -------------------------------------------------------------------------------- /configs/tao/cem_swinS_lvis.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | _base_ = '../_base_/qdtrack_faster_rcnn_r50_fpn.py' 3 | pretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_small_patch4_window7_224.pth' # noqa 4 | model = dict( 5 | type='TETer', 6 | freeze_detector=False, 7 | backbone=dict( 8 | _delete_=True, 9 | type='SwinTransformer', 10 | embed_dims=96, 11 | depths=[2, 2, 18, 2], 12 | num_heads=[3, 6, 12, 24], 13 | window_size=7, 14 | mlp_ratio=4, 15 | qkv_bias=True, 16 | qk_scale=None, 17 | drop_rate=0., 18 | attn_drop_rate=0., 19 | drop_path_rate=0.2, 20 | patch_norm=True, 21 | out_indices=(0, 1, 2, 3), 22 | with_cp=False, 23 | convert_weights=True, 24 | init_cfg=dict(type='Pretrained', checkpoint=pretrained)), 25 | neck=dict(in_channels=[96, 192, 384, 768]), 26 | roi_head=dict( 27 | type='TETerRoIHead', 28 | bbox_head=dict(num_classes=1230), 29 | cem_roi_extractor=dict( 30 | type='SingleRoIExtractor', 31 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 32 | out_channels=256, 33 | featmap_strides=[4, 8, 16, 32]), 34 | cem_head=dict( 35 | type='ClsExemplarHead', 36 | num_convs=4, 37 | num_fcs=3, 38 | embed_channels=1230, 39 | norm_cfg=dict(type='GN', num_groups=32), 40 | loss_track=dict(type='UnbiasedSupConLoss', 41 | temperature=0.07, 42 | contrast_mode='all', 43 | pos_normalize=True, 44 | loss_weight=0.25) 45 | , softmax_temp=-1), 46 | 47 | track_head=dict( 48 | type='QuasiDenseEmbedHead', 49 | num_convs=4, 50 | num_fcs=1, 51 | embed_channels=256, 52 | norm_cfg=dict(type='GN', num_groups=32), 53 | loss_track=dict(type='MultiPosCrossEntropyLoss', 54 | loss_weight=0.25, 55 | version='unbiased'), 56 | loss_track_aux=dict( 57 | type='L2Loss', 58 | neg_pos_ub=3, 59 | pos_margin=0, 60 | neg_margin=0.1, 61 | hard_mining=True, 62 | loss_weight=1.0)) 63 | ), 64 | tracker=dict( 65 | type='TETerTAO', 66 | init_score_thr=0.0001, 67 | obj_score_thr=0.0001, 68 | match_score_thr=0.5, 69 | memo_frames=10, 70 | momentum_embed=0.8, 71 | momentum_obj_score=0.5, 72 | match_metric='bisoftmax', 73 | match_with_cosine=True, 74 | contrastive_thr=0.5, 75 | ), 76 | train_cfg=dict( 77 | cem=dict( 78 | assigner=dict( 79 | type='MaxIoUAssigner', 80 | pos_iou_thr=0.7, 81 | neg_iou_thr=0.3, 82 | min_pos_iou=0.5, 83 | match_low_quality=False, 84 | ignore_iof_thr=-1), 85 | sampler=dict( 86 | type='CombinedSampler', 87 | num=256, 88 | pos_fraction=1, 89 | neg_pos_ub=0, 90 | add_gt_as_proposals=True, 91 | pos_sampler=dict(type='InstanceBalancedPosSampler'), 92 | neg_sampler=dict(type='RandomSampler')) 93 | ) 94 | ), 95 | 96 | test_cfg=dict( 97 | rcnn=dict( 98 | score_thr=0.0001, 99 | nms=dict(type='nms', iou_threshold=0.5), 100 | max_per_img=300) 101 | ) 102 | ) 103 | # dataset settings 104 | img_norm_cfg = dict( 105 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 106 | train_pipeline = [ 107 | dict(type='LoadMultiImagesFromFile'), 108 | # comment above line and comment out the lines below if use hdf5 file. 109 | # dict( 110 | # type='LoadMultiImagesFromFile', 111 | # file_client_args=dict( 112 | # img_db_path='data/lvis/train_imgs.hdf5', 113 | # backend='hdf5', 114 | # type='lvis')), 115 | dict(type='SeqLoadAnnotations', with_bbox=True, with_ins_id=True), 116 | dict( 117 | type='SeqResize', 118 | img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736), 119 | (1333, 768), (1333, 800)], 120 | share_params=False, 121 | multiscale_mode='value', 122 | keep_ratio=True), 123 | dict(type='SeqRandomFlip', share_params=False, flip_ratio=0.5), 124 | dict(type='SeqNormalize', **img_norm_cfg), 125 | dict(type='SeqPad', size_divisor=32), 126 | dict(type='SeqDefaultFormatBundle'), 127 | dict( 128 | type='SeqCollect', 129 | keys=['img', 'gt_bboxes', 'gt_labels', 'gt_match_indices'], 130 | ref_prefix='ref'), 131 | ] 132 | 133 | test_pipeline = [ 134 | dict(type='LoadImageFromFile'), 135 | # dict(type='LoadImageFromFile', 136 | # file_client_args=dict( 137 | # img_db_path='data/tao/tao_val_imgs.hdf5', 138 | # backend='hdf5', 139 | # type='tao')), 140 | dict( 141 | type='MultiScaleFlipAug', 142 | img_scale=(1333, 800), 143 | flip=False, 144 | transforms=[ 145 | dict(type='Resize', keep_ratio=True), 146 | dict(type='RandomFlip'), 147 | dict(type='Normalize', **img_norm_cfg), 148 | dict(type='Pad', size_divisor=32), 149 | dict(type='ImageToTensor', keys=['img']), 150 | dict(type='VideoCollect', keys=['img']) 151 | ]) 152 | ] 153 | 154 | ## dataset settings 155 | dataset_type = 'TaoDataset' 156 | data = dict( 157 | samples_per_gpu=2, 158 | workers_per_gpu=2, 159 | train=dict( 160 | _delete_=True, 161 | type='ClassBalancedDataset', 162 | oversample_thr=1e-3, 163 | dataset=dict( 164 | type=dataset_type, 165 | classes='data/lvis/annotations/lvis_classes.txt', 166 | load_as_video=False, 167 | ann_file='data/lvis/annotations/lvisv0.5+coco_train.json', 168 | img_prefix='data/lvis/train2017/', 169 | key_img_sampler=dict(interval=1), 170 | ref_img_sampler=dict(num_ref_imgs=1, scope=1, method='uniform'), 171 | pipeline=train_pipeline) 172 | ), 173 | val=dict( 174 | type=dataset_type, 175 | classes='data/lvis/annotations/lvis_classes.txt', 176 | ann_file='data/tao/annotations/validation_ours.json', 177 | img_prefix='data/tao/frames/', 178 | ref_img_sampler=None, 179 | pipeline=test_pipeline), 180 | test=dict( 181 | type=dataset_type, 182 | classes='data/lvis/annotations/lvis_classes.txt', 183 | ann_file='data/tao/annotations/validation_ours.json', 184 | img_prefix='data/tao/frames/', 185 | ref_img_sampler=None, 186 | pipeline=test_pipeline) 187 | 188 | ) 189 | # optimizer 190 | optimizer = dict( 191 | # _delete_=True, 192 | type='AdamW', 193 | lr=0.0001, 194 | betas=(0.9, 0.999), 195 | weight_decay=0.05, 196 | paramwise_cfg=dict( 197 | custom_keys={ 198 | 'absolute_pos_embed': dict(decay_mult=0.), 199 | 'relative_position_bias_table': dict(decay_mult=0.), 200 | 'norm': dict(decay_mult=0.) 201 | })) 202 | optimizer_config = dict(grad_clip=None) 203 | # learning policy 204 | lr_config = dict( 205 | policy='step', 206 | warmup='linear', 207 | warmup_iters=1000, 208 | warmup_ratio=0.001, 209 | step=[27, 33]) 210 | runner = dict(type='EpochBasedRunner', max_epochs=36) 211 | 212 | 213 | # checkpoint saving 214 | checkpoint_config = dict(interval=1) 215 | # yapf:disable 216 | log_config = dict( 217 | interval=50, 218 | hooks=[ 219 | dict(type='TextLoggerHook'), 220 | ]) 221 | # yapf:enable 222 | # runtime settings 223 | total_epochs = 36 224 | dist_params = dict(backend='nccl') 225 | log_level = 'INFO' 226 | load_from = None 227 | resume_from = None 228 | workflow = [('train', 1)] 229 | evaluation = dict(metric=['bbox'], start=2, interval=2) 230 | -------------------------------------------------------------------------------- /configs/tao/cem_swinT_lvis.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | _base_ = '../_base_/qdtrack_faster_rcnn_r50_fpn.py' 3 | pretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_tiny_patch4_window7_224.pth' # noqa 4 | model = dict( 5 | type='TETer', 6 | freeze_detector=False, 7 | backbone=dict( 8 | _delete_=True, 9 | type='SwinTransformer', 10 | embed_dims=96, 11 | depths=[2, 2, 6, 2], 12 | num_heads=[3, 6, 12, 24], 13 | window_size=7, 14 | mlp_ratio=4, 15 | qkv_bias=True, 16 | qk_scale=None, 17 | drop_rate=0., 18 | attn_drop_rate=0., 19 | drop_path_rate=0.2, 20 | patch_norm=True, 21 | out_indices=(0, 1, 2, 3), 22 | with_cp=False, 23 | convert_weights=True, 24 | init_cfg=dict(type='Pretrained', checkpoint=pretrained)), 25 | neck=dict(in_channels=[96, 192, 384, 768]), 26 | roi_head=dict( 27 | type='TETerRoIHead', 28 | bbox_head=dict(num_classes=1230), 29 | cem_roi_extractor=dict( 30 | type='SingleRoIExtractor', 31 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 32 | out_channels=256, 33 | featmap_strides=[4, 8, 16, 32]), 34 | cem_head=dict( 35 | type='ClsExemplarHead', 36 | num_convs=4, 37 | num_fcs=3, 38 | embed_channels=1230, 39 | norm_cfg=dict(type='GN', num_groups=32), 40 | loss_track=dict(type='UnbiasedSupConLoss', 41 | temperature=0.07, 42 | contrast_mode='all', 43 | pos_normalize=True, 44 | loss_weight=0.25) 45 | , softmax_temp=-1), 46 | track_head=dict( 47 | type='QuasiDenseEmbedHead', 48 | num_convs=4, 49 | num_fcs=1, 50 | embed_channels=256, 51 | norm_cfg=dict(type='GN', num_groups=32), 52 | loss_track=dict(type='MultiPosCrossEntropyLoss', 53 | loss_weight=0.25, 54 | version='unbiased'), 55 | loss_track_aux=dict( 56 | type='L2Loss', 57 | neg_pos_ub=3, 58 | pos_margin=0, 59 | neg_margin=0.1, 60 | hard_mining=True, 61 | loss_weight=1.0)) 62 | ), 63 | tracker=dict( 64 | type='TETerTAO', 65 | init_score_thr=0.0001, 66 | obj_score_thr=0.0001, 67 | match_score_thr=0.5, 68 | memo_frames=10, 69 | momentum_embed=0.8, 70 | momentum_obj_score=0.5, 71 | match_metric='bisoftmax', 72 | match_with_cosine=True, 73 | contrastive_thr=0.5, 74 | ), 75 | train_cfg=dict( 76 | cem=dict( 77 | assigner=dict( 78 | type='MaxIoUAssigner', 79 | pos_iou_thr=0.7, 80 | neg_iou_thr=0.3, 81 | min_pos_iou=0.5, 82 | match_low_quality=False, 83 | ignore_iof_thr=-1), 84 | sampler=dict( 85 | type='CombinedSampler', 86 | num=256, 87 | pos_fraction=1, 88 | neg_pos_ub=0, 89 | add_gt_as_proposals=True, 90 | pos_sampler=dict(type='InstanceBalancedPosSampler'), 91 | neg_sampler=dict(type='RandomSampler')) 92 | ) 93 | ), 94 | 95 | test_cfg=dict( 96 | rcnn=dict( 97 | score_thr=0.0001, 98 | nms=dict(type='nms', iou_threshold=0.5), 99 | max_per_img=300) 100 | ) 101 | ) 102 | # dataset settings 103 | img_norm_cfg = dict( 104 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 105 | train_pipeline = [ 106 | dict(type='LoadMultiImagesFromFile'), 107 | # comment above line and comment out the lines below if use hdf5 file. 108 | # dict( 109 | # type='LoadMultiImagesFromFile', 110 | # file_client_args=dict( 111 | # img_db_path='data/lvis/train_imgs.hdf5', 112 | # backend='hdf5', 113 | # type='lvis')), 114 | dict(type='SeqLoadAnnotations', with_bbox=True, with_ins_id=True), 115 | dict( 116 | type='SeqResize', 117 | img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736), 118 | (1333, 768), (1333, 800)], 119 | share_params=False, 120 | multiscale_mode='value', 121 | keep_ratio=True), 122 | dict(type='SeqRandomFlip', share_params=False, flip_ratio=0.5), 123 | dict(type='SeqNormalize', **img_norm_cfg), 124 | dict(type='SeqPad', size_divisor=32), 125 | dict(type='SeqDefaultFormatBundle'), 126 | dict( 127 | type='SeqCollect', 128 | keys=['img', 'gt_bboxes', 'gt_labels', 'gt_match_indices'], 129 | ref_prefix='ref'), 130 | ] 131 | 132 | test_pipeline = [ 133 | dict(type='LoadImageFromFile'), 134 | # dict(type='LoadImageFromFile', 135 | # file_client_args=dict( 136 | # img_db_path='data/tao/tao_val_imgs.hdf5', 137 | # backend='hdf5', 138 | # type='tao')), 139 | dict( 140 | type='MultiScaleFlipAug', 141 | img_scale=(1333, 800), 142 | flip=False, 143 | transforms=[ 144 | dict(type='Resize', keep_ratio=True), 145 | dict(type='RandomFlip'), 146 | dict(type='Normalize', **img_norm_cfg), 147 | dict(type='Pad', size_divisor=32), 148 | dict(type='ImageToTensor', keys=['img']), 149 | dict(type='VideoCollect', keys=['img']) 150 | ]) 151 | ] 152 | 153 | ## dataset settings 154 | dataset_type = 'TaoDataset' 155 | data = dict( 156 | samples_per_gpu=2, 157 | workers_per_gpu=2, 158 | train=dict( 159 | _delete_=True, 160 | type='ClassBalancedDataset', 161 | oversample_thr=1e-3, 162 | dataset=dict( 163 | type=dataset_type, 164 | classes='data/lvis/annotations/lvis_classes.txt', 165 | load_as_video=False, 166 | ann_file='data/lvis/annotations/lvisv0.5+coco_train.json', 167 | img_prefix='data/lvis/train2017/', 168 | key_img_sampler=dict(interval=1), 169 | ref_img_sampler=dict(num_ref_imgs=1, scope=1, method='uniform'), 170 | pipeline=train_pipeline) 171 | ), 172 | val=dict( 173 | type=dataset_type, 174 | classes='data/lvis/annotations/lvis_classes.txt', 175 | ann_file='data/tao/annotations/validation_ours.json', 176 | img_prefix='data/tao/frames/', 177 | ref_img_sampler=None, 178 | pipeline=test_pipeline), 179 | test=dict( 180 | type=dataset_type, 181 | classes='data/lvis/annotations/lvis_classes.txt', 182 | ann_file='data/tao/annotations/validation_ours.json', 183 | img_prefix='data/tao/frames/', 184 | ref_img_sampler=None, 185 | pipeline=test_pipeline) 186 | 187 | ) 188 | # optimizer 189 | optimizer = dict( 190 | # _delete_=True, 191 | type='AdamW', 192 | lr=0.0001, 193 | betas=(0.9, 0.999), 194 | weight_decay=0.05, 195 | paramwise_cfg=dict( 196 | custom_keys={ 197 | 'absolute_pos_embed': dict(decay_mult=0.), 198 | 'relative_position_bias_table': dict(decay_mult=0.), 199 | 'norm': dict(decay_mult=0.) 200 | })) 201 | optimizer_config = dict(grad_clip=None) 202 | # learning policy 203 | lr_config = dict( 204 | policy='step', 205 | warmup='linear', 206 | warmup_iters=1000, 207 | warmup_ratio=0.001, 208 | step=[27, 33]) 209 | runner = dict(type='EpochBasedRunner', max_epochs=36) 210 | 211 | 212 | # checkpoint saving 213 | checkpoint_config = dict(interval=1) 214 | # yapf:disable 215 | log_config = dict( 216 | interval=50, 217 | hooks=[ 218 | dict(type='TextLoggerHook'), 219 | ]) 220 | # yapf:enable 221 | # runtime settings 222 | total_epochs = 36 223 | dist_params = dict(backend='nccl') 224 | log_level = 'INFO' 225 | load_from = None 226 | resume_from = None 227 | workflow = [('train', 1)] 228 | evaluation = dict(metric=['bbox'], start=2, interval=2) 229 | -------------------------------------------------------------------------------- /configs/tao/tracker_r101_tao.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | _base_ = './cem_r101_lvis.py' 3 | model = dict( 4 | freeze_detector=True, 5 | freeze_cem=True, 6 | method='teter', 7 | roi_head=dict(bbox_head=dict(num_classes=1230), 8 | track_head=dict( 9 | type='QuasiDenseEmbedHead', 10 | num_convs=4, 11 | num_fcs=1, 12 | embed_channels=256, 13 | norm_cfg=dict(type='GN', num_groups=32), 14 | loss_track=dict(type='MultiPosCrossEntropyLoss', loss_weight=0.25), 15 | loss_track_aux=dict( 16 | type='L2Loss', 17 | neg_pos_ub=3, 18 | pos_margin=0, 19 | neg_margin=0.1, 20 | hard_mining=True, 21 | loss_weight=1.0)) 22 | ), 23 | 24 | test_cfg=dict( 25 | rcnn=dict( 26 | score_thr=0.0001, 27 | nms=dict(type='nms', iou_threshold=0.5, class_agnostic=True, split_thr=100000), 28 | max_per_img=50) 29 | ) 30 | ) 31 | 32 | # dataset settings 33 | img_norm_cfg = dict( 34 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 35 | train_pipeline = [ 36 | dict(type='LoadMultiImagesFromFile'), 37 | # dict( 38 | # type='LoadMultiImagesFromFile', 39 | # file_client_args=dict( 40 | # img_db_path='data/tao/tao_train_imgs.hdf5', 41 | # backend='hdf5', 42 | # type='tao')), 43 | dict(type='SeqLoadAnnotations', with_bbox=True, with_ins_id=True), 44 | dict( 45 | type='SeqResize', 46 | img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736), 47 | (1333, 768), (1333, 800)], 48 | share_params=True, 49 | multiscale_mode='value', 50 | keep_ratio=True), 51 | dict(type='SeqRandomFlip', share_params=True, flip_ratio=0.5), 52 | dict(type='SeqNormalize', **img_norm_cfg), 53 | dict(type='SeqPad', size_divisor=32), 54 | dict(type='SeqDefaultFormatBundle'), 55 | dict( 56 | type='SeqCollect', 57 | keys=['img', 'gt_bboxes', 'gt_labels', 'gt_match_indices'], 58 | ref_prefix='ref'), 59 | ] 60 | 61 | test_pipeline = [ 62 | dict(type='LoadImageFromFile'), 63 | # dict(type='LoadImageFromFile', 64 | # file_client_args=dict( 65 | # img_db_path='data/tao/tao_val_imgs.hdf5', 66 | # backend='hdf5', 67 | # type='tao')), 68 | dict( 69 | type='MultiScaleFlipAug', 70 | img_scale=(1333, 800), 71 | flip=False, 72 | transforms=[ 73 | dict(type='Resize', keep_ratio=True), 74 | dict(type='RandomFlip'), 75 | dict(type='Normalize', **img_norm_cfg), 76 | dict(type='Pad', size_divisor=32), 77 | dict(type='ImageToTensor', keys=['img']), 78 | dict(type='VideoCollect', keys=['img']) 79 | ]) 80 | ] 81 | dataset_type = 'TaoDataset' 82 | data = dict( 83 | samples_per_gpu=2, 84 | workers_per_gpu=2, 85 | train=dict( 86 | _delete_=True, 87 | type='ClassBalancedDataset', 88 | oversample_thr=1e-3, 89 | dataset=dict( 90 | type=dataset_type, 91 | classes='data/lvis/annotations/lvis_classes.txt', 92 | ann_file='data/tao/annotations/train_ours.json', 93 | img_prefix='data/tao/frames/', 94 | key_img_sampler=dict(interval=1), 95 | ref_img_sampler=dict(num_ref_imgs=1, scope=1, method='uniform'), 96 | pipeline=train_pipeline)), 97 | val=dict( 98 | type=dataset_type, 99 | classes='data/lvis/annotations/lvis_classes.txt', 100 | ann_file='data/tao/annotations/validation_ours.json', 101 | img_prefix='data/tao/frames/', 102 | ref_img_sampler=None, 103 | pipeline=test_pipeline), 104 | test=dict( 105 | type=dataset_type, 106 | classes='data/lvis/annotations/lvis_classes.txt', 107 | ann_file='data/tao/annotations/validation_ours.json', 108 | img_prefix='data/tao/frames/', 109 | ref_img_sampler=None, 110 | pipeline=test_pipeline) 111 | ) 112 | optimizer = dict(type='SGD', lr=0.002, momentum=0.9, weight_decay=0.0001) 113 | lr_config = dict( 114 | policy='step', 115 | warmup='linear', 116 | warmup_iters=1000, 117 | warmup_ratio=1.0 / 1000, 118 | step=[8, 11]) 119 | total_epochs = 12 120 | load_from = None 121 | evaluation = dict(metric=['track'], start=8, interval=1, resfile_path='/scratch/tmp/') 122 | work_dir = './saved_models/teter_swinT/' 123 | -------------------------------------------------------------------------------- /configs/tao/tracker_swinB_tao.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | _base_ = './cem_swinB_lvis.py' 3 | model = dict( 4 | freeze_detector=True, 5 | freeze_cem=True, 6 | method='teter', 7 | roi_head=dict( 8 | bbox_head=dict(num_classes=1230), 9 | track_head = dict( 10 | type='QuasiDenseEmbedHead', 11 | num_convs=4, 12 | num_fcs=1, 13 | embed_channels=256, 14 | norm_cfg=dict(type='GN', num_groups=32), 15 | loss_track=dict(type='MultiPosCrossEntropyLoss', 16 | loss_weight=0.25), 17 | loss_track_aux=dict( 18 | type='L2Loss', 19 | neg_pos_ub=3, 20 | pos_margin=0, 21 | neg_margin=0.1, 22 | hard_mining=True, 23 | loss_weight=1.0) 24 | )), 25 | 26 | 27 | test_cfg=dict( 28 | rcnn=dict( 29 | score_thr=0.0001, 30 | nms=dict(type='nms', 31 | iou_threshold=0.5, 32 | class_agnostic=True, 33 | split_thr=100000), 34 | max_per_img=50) 35 | ) 36 | ) 37 | 38 | # dataset settings 39 | img_norm_cfg = dict( 40 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 41 | train_pipeline = [ 42 | dict(type='LoadMultiImagesFromFile'), 43 | # dict( 44 | # type='LoadMultiImagesFromFile', 45 | # file_client_args=dict( 46 | # img_db_path='data/tao/tao_train_imgs.hdf5', 47 | # backend='hdf5', 48 | # type='tao')), 49 | dict(type='SeqLoadAnnotations', with_bbox=True, with_ins_id=True), 50 | dict( 51 | type='SeqResize', 52 | img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736), 53 | (1333, 768), (1333, 800)], 54 | share_params=True, 55 | multiscale_mode='value', 56 | keep_ratio=True), 57 | dict(type='SeqRandomFlip', share_params=True, flip_ratio=0.5), 58 | dict(type='SeqNormalize', **img_norm_cfg), 59 | dict(type='SeqPad', size_divisor=32), 60 | dict(type='SeqDefaultFormatBundle'), 61 | dict( 62 | type='SeqCollect', 63 | keys=['img', 'gt_bboxes', 'gt_labels', 'gt_match_indices'], 64 | ref_prefix='ref'), 65 | ] 66 | 67 | test_pipeline = [ 68 | dict(type='LoadImageFromFile'), 69 | # dict(type='LoadImageFromFile', 70 | # file_client_args=dict( 71 | # img_db_path='data/tao/tao_val_imgs.hdf5', 72 | # backend='hdf5', 73 | # type='tao')), 74 | dict( 75 | type='MultiScaleFlipAug', 76 | img_scale=(1333, 800), 77 | flip=False, 78 | transforms=[ 79 | dict(type='Resize', keep_ratio=True), 80 | dict(type='RandomFlip'), 81 | dict(type='Normalize', **img_norm_cfg), 82 | dict(type='Pad', size_divisor=32), 83 | dict(type='ImageToTensor', keys=['img']), 84 | dict(type='VideoCollect', keys=['img']) 85 | ]) 86 | ] 87 | dataset_type = 'TaoDataset' 88 | data = dict( 89 | samples_per_gpu=2, 90 | workers_per_gpu=2, 91 | train=dict( 92 | _delete_=True, 93 | type='ClassBalancedDataset', 94 | oversample_thr=1e-3, 95 | dataset=dict( 96 | type=dataset_type, 97 | classes='data/lvis/annotations/lvis_classes.txt', 98 | ann_file='data/tao/annotations/train_ours.json', 99 | img_prefix='data/tao/frames/', 100 | key_img_sampler=dict(interval=1), 101 | ref_img_sampler=dict(num_ref_imgs=1, scope=1, method='uniform'), 102 | pipeline=train_pipeline)), 103 | val=dict( 104 | type=dataset_type, 105 | classes='data/lvis/annotations/lvis_classes.txt', 106 | ann_file='data/tao/annotations/validation_ours.json', 107 | img_prefix='data/tao/frames/', 108 | ref_img_sampler=None, 109 | pipeline=test_pipeline), 110 | test=dict( 111 | type=dataset_type, 112 | classes='data/lvis/annotations/lvis_classes.txt', 113 | ann_file='data/tao/annotations/validation_ours.json', 114 | img_prefix='data/tao/frames/', 115 | ref_img_sampler=None, 116 | pipeline=test_pipeline) 117 | ) 118 | optimizer = dict( 119 | _delete_=True, 120 | type='SGD', lr=0.002, momentum=0.9, weight_decay=0.0001) 121 | lr_config = dict( 122 | _delete_=True, 123 | policy='step', 124 | warmup='linear', 125 | warmup_iters=1000, 126 | warmup_ratio=1.0 / 1000, 127 | step=[8, 11]) 128 | total_epochs = 12 129 | load_from = None 130 | evaluation = dict(metric=['track'], start=8, interval=1, resfile_path='/scratch/tmp/') 131 | work_dir = './saved_models/teter_swinB/' 132 | -------------------------------------------------------------------------------- /configs/tao/tracker_swinL_tao.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | _base_ = './cem_swinL_lvis.py' 3 | model = dict( 4 | freeze_detector=True, 5 | freeze_cem=True, 6 | method='teter', 7 | roi_head=dict( 8 | bbox_head=dict(num_classes=1230), 9 | track_head = dict( 10 | type='QuasiDenseEmbedHead', 11 | num_convs=4, 12 | num_fcs=1, 13 | embed_channels=256, 14 | norm_cfg=dict(type='GN', num_groups=32), 15 | loss_track=dict(type='MultiPosCrossEntropyLoss', 16 | loss_weight=0.25), 17 | loss_track_aux=dict( 18 | type='L2Loss', 19 | neg_pos_ub=3, 20 | pos_margin=0, 21 | neg_margin=0.1, 22 | hard_mining=True, 23 | loss_weight=1.0) 24 | )), 25 | 26 | 27 | test_cfg=dict( 28 | rcnn=dict( 29 | score_thr=0.0001, 30 | nms=dict(type='nms', 31 | iou_threshold=0.5, 32 | class_agnostic=True, 33 | split_thr=100000), 34 | max_per_img=50) 35 | ) 36 | ) 37 | 38 | # dataset settings 39 | img_norm_cfg = dict( 40 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 41 | train_pipeline = [ 42 | dict(type='LoadMultiImagesFromFile'), 43 | # dict( 44 | # type='LoadMultiImagesFromFile', 45 | # file_client_args=dict( 46 | # img_db_path='data/tao/tao_train_imgs.hdf5', 47 | # backend='hdf5', 48 | # type='tao')), 49 | dict(type='SeqLoadAnnotations', with_bbox=True, with_ins_id=True), 50 | dict( 51 | type='SeqResize', 52 | img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736), 53 | (1333, 768), (1333, 800)], 54 | share_params=True, 55 | multiscale_mode='value', 56 | keep_ratio=True), 57 | dict(type='SeqRandomFlip', share_params=True, flip_ratio=0.5), 58 | dict(type='SeqNormalize', **img_norm_cfg), 59 | dict(type='SeqPad', size_divisor=32), 60 | dict(type='SeqDefaultFormatBundle'), 61 | dict( 62 | type='SeqCollect', 63 | keys=['img', 'gt_bboxes', 'gt_labels', 'gt_match_indices'], 64 | ref_prefix='ref'), 65 | ] 66 | 67 | test_pipeline = [ 68 | dict(type='LoadImageFromFile'), 69 | # dict(type='LoadImageFromFile', 70 | # file_client_args=dict( 71 | # img_db_path='data/tao/tao_val_imgs.hdf5', 72 | # backend='hdf5', 73 | # type='tao')), 74 | dict( 75 | type='MultiScaleFlipAug', 76 | img_scale=(1333, 800), 77 | flip=False, 78 | transforms=[ 79 | dict(type='Resize', keep_ratio=True), 80 | dict(type='RandomFlip'), 81 | dict(type='Normalize', **img_norm_cfg), 82 | dict(type='Pad', size_divisor=32), 83 | dict(type='ImageToTensor', keys=['img']), 84 | dict(type='VideoCollect', keys=['img']) 85 | ]) 86 | ] 87 | dataset_type = 'TaoDataset' 88 | data = dict( 89 | samples_per_gpu=2, 90 | workers_per_gpu=2, 91 | train=dict( 92 | _delete_=True, 93 | type='ClassBalancedDataset', 94 | oversample_thr=1e-3, 95 | dataset=dict( 96 | type=dataset_type, 97 | classes='data/lvis/annotations/lvis_classes.txt', 98 | ann_file='data/tao/annotations/train_ours.json', 99 | img_prefix='data/tao/frames/', 100 | key_img_sampler=dict(interval=1), 101 | ref_img_sampler=dict(num_ref_imgs=1, scope=1, method='uniform'), 102 | pipeline=train_pipeline)), 103 | val=dict( 104 | type=dataset_type, 105 | classes='data/lvis/annotations/lvis_classes.txt', 106 | ann_file='data/tao/annotations/validation_ours.json', 107 | img_prefix='data/tao/frames/', 108 | ref_img_sampler=None, 109 | pipeline=test_pipeline), 110 | test=dict( 111 | type=dataset_type, 112 | classes='data/lvis/annotations/lvis_classes.txt', 113 | ann_file='data/tao/annotations/validation_ours.json', 114 | img_prefix='data/tao/frames/', 115 | ref_img_sampler=None, 116 | pipeline=test_pipeline) 117 | ) 118 | optimizer = dict( 119 | _delete_=True, 120 | type='SGD', lr=0.002, momentum=0.9, weight_decay=0.0001) 121 | lr_config = dict( 122 | _delete_=True, 123 | policy='step', 124 | warmup='linear', 125 | warmup_iters=1000, 126 | warmup_ratio=1.0 / 1000, 127 | step=[8, 11]) 128 | total_epochs = 12 129 | load_from = None 130 | evaluation = dict(metric=['track'], start=8, interval=1, resfile_path='/scratch/tmp/') 131 | work_dir = './saved_models/teter_swinB/' 132 | -------------------------------------------------------------------------------- /configs/tao/tracker_swinS_tao.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | _base_ = './cem_swinS_lvis.py' 3 | model = dict( 4 | freeze_detector=True, 5 | freeze_cem=True, 6 | method='teter', 7 | roi_head=dict( 8 | bbox_head=dict(num_classes=1230), 9 | track_head = dict( 10 | type='QuasiDenseEmbedHead', 11 | num_convs=4, 12 | num_fcs=1, 13 | embed_channels=256, 14 | norm_cfg=dict(type='GN', num_groups=32), 15 | loss_track=dict(type='MultiPosCrossEntropyLoss', 16 | loss_weight=0.25), 17 | loss_track_aux=dict( 18 | type='L2Loss', 19 | neg_pos_ub=3, 20 | pos_margin=0, 21 | neg_margin=0.1, 22 | hard_mining=True, 23 | loss_weight=1.0) 24 | )), 25 | 26 | 27 | test_cfg=dict( 28 | rcnn=dict( 29 | score_thr=0.0001, 30 | nms=dict(type='nms', 31 | iou_threshold=0.5, 32 | class_agnostic=True, 33 | split_thr=100000), 34 | max_per_img=50) 35 | ) 36 | ) 37 | 38 | # dataset settings 39 | img_norm_cfg = dict( 40 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 41 | train_pipeline = [ 42 | dict(type='LoadMultiImagesFromFile'), 43 | # dict( 44 | # type='LoadMultiImagesFromFile', 45 | # file_client_args=dict( 46 | # img_db_path='data/tao/tao_train_imgs.hdf5', 47 | # backend='hdf5', 48 | # type='tao')), 49 | dict(type='SeqLoadAnnotations', with_bbox=True, with_ins_id=True), 50 | dict( 51 | type='SeqResize', 52 | img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736), 53 | (1333, 768), (1333, 800)], 54 | share_params=True, 55 | multiscale_mode='value', 56 | keep_ratio=True), 57 | dict(type='SeqRandomFlip', share_params=True, flip_ratio=0.5), 58 | dict(type='SeqNormalize', **img_norm_cfg), 59 | dict(type='SeqPad', size_divisor=32), 60 | dict(type='SeqDefaultFormatBundle'), 61 | dict( 62 | type='SeqCollect', 63 | keys=['img', 'gt_bboxes', 'gt_labels', 'gt_match_indices'], 64 | ref_prefix='ref'), 65 | ] 66 | 67 | test_pipeline = [ 68 | dict(type='LoadImageFromFile'), 69 | # dict(type='LoadImageFromFile', 70 | # file_client_args=dict( 71 | # img_db_path='data/tao/tao_val_imgs.hdf5', 72 | # backend='hdf5', 73 | # type='tao')), 74 | dict( 75 | type='MultiScaleFlipAug', 76 | img_scale=(1333, 800), 77 | flip=False, 78 | transforms=[ 79 | dict(type='Resize', keep_ratio=True), 80 | dict(type='RandomFlip'), 81 | dict(type='Normalize', **img_norm_cfg), 82 | dict(type='Pad', size_divisor=32), 83 | dict(type='ImageToTensor', keys=['img']), 84 | dict(type='VideoCollect', keys=['img']) 85 | ]) 86 | ] 87 | dataset_type = 'TaoDataset' 88 | data = dict( 89 | samples_per_gpu=2, 90 | workers_per_gpu=2, 91 | train=dict( 92 | _delete_=True, 93 | type='ClassBalancedDataset', 94 | oversample_thr=1e-3, 95 | dataset=dict( 96 | type=dataset_type, 97 | classes='data/lvis/annotations/lvis_classes.txt', 98 | ann_file='data/tao/annotations/train_ours.json', 99 | img_prefix='data/tao/frames/', 100 | key_img_sampler=dict(interval=1), 101 | ref_img_sampler=dict(num_ref_imgs=1, scope=1, method='uniform'), 102 | pipeline=train_pipeline)), 103 | val=dict( 104 | type=dataset_type, 105 | classes='data/lvis/annotations/lvis_classes.txt', 106 | ann_file='data/tao/annotations/validation_ours.json', 107 | img_prefix='data/tao/frames/', 108 | ref_img_sampler=None, 109 | pipeline=test_pipeline), 110 | test=dict( 111 | type=dataset_type, 112 | classes='data/lvis/annotations/lvis_classes.txt', 113 | ann_file='data/tao/annotations/validation_ours.json', 114 | img_prefix='data/tao/frames/', 115 | ref_img_sampler=None, 116 | pipeline=test_pipeline) 117 | ) 118 | optimizer = dict( 119 | _delete_=True, 120 | type='SGD', lr=0.002, momentum=0.9, weight_decay=0.0001) 121 | lr_config = dict( 122 | _delete_=True, 123 | policy='step', 124 | warmup='linear', 125 | warmup_iters=1000, 126 | warmup_ratio=1.0 / 1000, 127 | step=[8, 11]) 128 | total_epochs = 12 129 | load_from = None 130 | evaluation = dict(metric=['track'], start=8, interval=1, resfile_path='/scratch/tmp/') 131 | work_dir = './saved_models/teter_swinS/' 132 | -------------------------------------------------------------------------------- /configs/tao/tracker_swinT_tao.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | _base_ = './cem_swinT_lvis.py' 3 | model = dict( 4 | freeze_detector=True, 5 | freeze_cem=True, 6 | method='teter', 7 | roi_head=dict( 8 | bbox_head=dict(num_classes=1230), 9 | track_head = dict( 10 | type='QuasiDenseEmbedHead', 11 | num_convs=4, 12 | num_fcs=1, 13 | embed_channels=256, 14 | norm_cfg=dict(type='GN', num_groups=32), 15 | loss_track=dict(type='MultiPosCrossEntropyLoss', 16 | loss_weight=0.25), 17 | loss_track_aux=dict( 18 | type='L2Loss', 19 | neg_pos_ub=3, 20 | pos_margin=0, 21 | neg_margin=0.1, 22 | hard_mining=True, 23 | loss_weight=1.0) 24 | )), 25 | 26 | 27 | test_cfg=dict( 28 | rcnn=dict( 29 | score_thr=0.0001, 30 | nms=dict(type='nms', 31 | iou_threshold=0.5, 32 | class_agnostic=True, 33 | split_thr=100000), 34 | max_per_img=50) 35 | ) 36 | ) 37 | 38 | # dataset settings 39 | img_norm_cfg = dict( 40 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 41 | train_pipeline = [ 42 | dict(type='LoadMultiImagesFromFile'), 43 | # dict( 44 | # type='LoadMultiImagesFromFile', 45 | # file_client_args=dict( 46 | # img_db_path='data/tao/tao_train_imgs.hdf5', 47 | # backend='hdf5', 48 | # type='tao')), 49 | dict(type='SeqLoadAnnotations', with_bbox=True, with_ins_id=True), 50 | dict( 51 | type='SeqResize', 52 | img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736), 53 | (1333, 768), (1333, 800)], 54 | share_params=True, 55 | multiscale_mode='value', 56 | keep_ratio=True), 57 | dict(type='SeqRandomFlip', share_params=True, flip_ratio=0.5), 58 | dict(type='SeqNormalize', **img_norm_cfg), 59 | dict(type='SeqPad', size_divisor=32), 60 | dict(type='SeqDefaultFormatBundle'), 61 | dict( 62 | type='SeqCollect', 63 | keys=['img', 'gt_bboxes', 'gt_labels', 'gt_match_indices'], 64 | ref_prefix='ref'), 65 | ] 66 | 67 | test_pipeline = [ 68 | dict(type='LoadImageFromFile'), 69 | # dict(type='LoadImageFromFile', 70 | # file_client_args=dict( 71 | # img_db_path='data/tao/tao_val_imgs.hdf5', 72 | # backend='hdf5', 73 | # type='tao')), 74 | dict( 75 | type='MultiScaleFlipAug', 76 | img_scale=(1333, 800), 77 | flip=False, 78 | transforms=[ 79 | dict(type='Resize', keep_ratio=True), 80 | dict(type='RandomFlip'), 81 | dict(type='Normalize', **img_norm_cfg), 82 | dict(type='Pad', size_divisor=32), 83 | dict(type='ImageToTensor', keys=['img']), 84 | dict(type='VideoCollect', keys=['img']) 85 | ]) 86 | ] 87 | dataset_type = 'TaoDataset' 88 | data = dict( 89 | samples_per_gpu=2, 90 | workers_per_gpu=2, 91 | train=dict( 92 | _delete_=True, 93 | type='ClassBalancedDataset', 94 | oversample_thr=1e-3, 95 | dataset=dict( 96 | type=dataset_type, 97 | classes='data/lvis/annotations/lvis_classes.txt', 98 | ann_file='data/tao/annotations/train_ours.json', 99 | img_prefix='data/tao/frames/', 100 | key_img_sampler=dict(interval=1), 101 | ref_img_sampler=dict(num_ref_imgs=1, scope=1, method='uniform'), 102 | pipeline=train_pipeline)), 103 | val=dict( 104 | type=dataset_type, 105 | classes='data/lvis/annotations/lvis_classes.txt', 106 | ann_file='data/tao/annotations/validation_ours.json', 107 | img_prefix='data/tao/frames/', 108 | ref_img_sampler=None, 109 | pipeline=test_pipeline), 110 | test=dict( 111 | type=dataset_type, 112 | classes='data/lvis/annotations/lvis_classes.txt', 113 | ann_file='data/tao/annotations/validation_ours.json', 114 | img_prefix='data/tao/frames/', 115 | ref_img_sampler=None, 116 | pipeline=test_pipeline) 117 | ) 118 | optimizer = dict( 119 | _delete_=True, 120 | type='SGD', lr=0.002, momentum=0.9, weight_decay=0.0001) 121 | lr_config = dict( 122 | _delete_=True, 123 | policy='step', 124 | warmup='linear', 125 | warmup_iters=1000, 126 | warmup_ratio=1.0 / 1000, 127 | step=[8, 11]) 128 | total_epochs = 12 129 | load_from = None 130 | evaluation = dict(metric=['track'], start=8, interval=1, resfile_path='/scratch/tmp/') 131 | work_dir = './saved_models/teter_swinT/' 132 | -------------------------------------------------------------------------------- /docs/INSTALL.md: -------------------------------------------------------------------------------- 1 | ## Installation 2 | TETer builds upon mmdetection framework. 3 | Please install following packages. 4 | 5 | ### Requirements 6 | - [pytorch >= 1.10](https://pytorch.org/get-started/locally/) 7 | - [mmcv-full == 1.4.4](https://github.com/open-mmlab/mmcv) 8 | - [mmdetection == 2.23.0](https://github.com/open-mmlab/mmdetection) 9 | 10 | 11 | ### Install TETA 12 | 13 | Please refer to [TETA](../teta/README.md) 14 | 15 | -------------------------------------------------------------------------------- /figures/teaser-teter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/tet/a62a9c0affec3a97f2cd0263141c53bcfb9c79f7/figures/teaser-teter.png -------------------------------------------------------------------------------- /figures/teta-teaser.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/tet/a62a9c0affec3a97f2cd0263141c53bcfb9c79f7/figures/teta-teaser.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | lvis 2 | motmetrics 3 | numpy 4 | pycocotools 5 | seaborn 6 | tqdm 7 | timm 8 | h5py 9 | git+https://github.com/bdd100k/bdd100k.git 10 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [isort] 2 | line_length = 79 3 | multi_line_output = 0 4 | known_standard_library = setuptools 5 | known_first_party = teter 6 | known_third_party = cv2,mmcv,mmdet,motmetrics,numpy,pandas,pycocotools,torch,torchvision,tqdm 7 | no_lines_before = STDLIB,LOCALFOLDER 8 | default_section = THIRDPARTY 9 | 10 | [yapf] 11 | BASED_ON_STYLE = pep8 12 | BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF = true 13 | SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN = true 14 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import time 4 | from setuptools import find_packages, setup 5 | 6 | 7 | def readme(): 8 | with open('README.md', encoding='utf-8') as f: 9 | content = f.read() 10 | return content 11 | 12 | 13 | version_file = 'teter/version.py' 14 | 15 | 16 | def get_git_hash(): 17 | 18 | def _minimal_ext_cmd(cmd): 19 | # construct minimal environment 20 | env = {} 21 | for k in ['SYSTEMROOT', 'PATH', 'HOME']: 22 | v = os.environ.get(k) 23 | if v is not None: 24 | env[k] = v 25 | # LANGUAGE is used on win32 26 | env['LANGUAGE'] = 'C' 27 | env['LANG'] = 'C' 28 | env['LC_ALL'] = 'C' 29 | out = subprocess.Popen( 30 | cmd, stdout=subprocess.PIPE, env=env).communicate()[0] 31 | return out 32 | 33 | try: 34 | out = _minimal_ext_cmd(['git', 'rev-parse', 'HEAD']) 35 | sha = out.strip().decode('ascii') 36 | except OSError: 37 | sha = 'unknown' 38 | 39 | return sha 40 | 41 | 42 | def get_hash(): 43 | if os.path.exists('.git'): 44 | sha = get_git_hash()[:7] 45 | elif os.path.exists(version_file): 46 | try: 47 | from teter.version import __version__ 48 | sha = __version__.split('+')[-1] 49 | except ImportError: 50 | raise ImportError('Unable to get git version') 51 | else: 52 | sha = 'unknown' 53 | 54 | return sha 55 | 56 | 57 | def write_version_py(): 58 | content = """# GENERATED VERSION FILE 59 | # TIME: {} 60 | __version__ = '{}' 61 | short_version = '{}' 62 | version_info = ({}) 63 | """ 64 | sha = get_hash() 65 | with open('teter/VERSION', 'r') as f: 66 | SHORT_VERSION = f.read().strip() 67 | VERSION_INFO = ', '.join(SHORT_VERSION.split('.')) 68 | VERSION = SHORT_VERSION + '+' + sha 69 | 70 | version_file_str = content.format(time.asctime(), VERSION, SHORT_VERSION, 71 | VERSION_INFO) 72 | with open(version_file, 'w') as f: 73 | f.write(version_file_str) 74 | 75 | 76 | def get_version(): 77 | with open(version_file, 'r') as f: 78 | exec(compile(f.read(), version_file, 'exec')) 79 | return locals()['__version__'] 80 | 81 | 82 | def get_requirements(filename='requirements.txt'): 83 | here = os.path.dirname(os.path.realpath(__file__)) 84 | with open(os.path.join(here, filename), 'r') as f: 85 | requires = [line.replace('\n', '') for line in f.readlines()] 86 | for i, req in enumerate(requires): 87 | if req.startswith("git"): 88 | pkg_name = req.split("/")[-1].split(".")[0] 89 | req = pkg_name 90 | requires[i] = req 91 | return requires 92 | 93 | 94 | if __name__ == '__main__': 95 | write_version_py() 96 | setup( 97 | name='teter', 98 | version=get_version(), 99 | description='A template for pytorch projects.', 100 | long_description=readme(), 101 | packages=find_packages(exclude=('configs', 'tools', 'demo')), 102 | package_data={'teter.ops': ['*/*.so']}, 103 | classifiers=[ 104 | 'Development Status :: 4 - Beta', 105 | 'License :: OSI Approved :: Apache Software License', 106 | 'Operating System :: OS Independent', 107 | 'Programming Language :: Python :: 3', 108 | 'Programming Language :: Python :: 3.5', 109 | 'Programming Language :: Python :: 3.6', 110 | 'Programming Language :: Python :: 3.7', 111 | ], 112 | license='Apache License 2.0', 113 | setup_requires=['pytest-runner', 'cython', 'numpy'], 114 | tests_require=['pytest', 'xdoctest'], 115 | install_requires=get_requirements(), 116 | zip_safe=False) 117 | -------------------------------------------------------------------------------- /teta/README.md: -------------------------------------------------------------------------------- 1 | # Track Every Thing Accuracy 2 | [Track Every Thing in the Wild](https://arxiv.org/abs/2207.12978) [ECCV 2022]. 3 | 4 | This is the official implementation of TETA metric describe in the paper. 5 | 6 | 7 | 8 | The proposed TETA metric disentangles classification performance from tracking. 9 | Instead of using the predicted class labels to group per-class tracking results, we use location with the help of local cluster evaluation. 10 | We treat each ground truth bounding box of the target class as the anchor of each cluster and group prediction results inside each cluster to evaluate the localization and association performance. 11 | Our local clusters enable us to evaluate tracks even when the class prediction is wrong. 12 | 13 | 14 | 15 | ## Install 16 | Install the TETA environment using pip. 17 | ```angular2html 18 | pip install -r requirements.txt 19 | ``` 20 | Go to the root of the teta folder and quick install by 21 | ``` 22 | pip install -e . 23 | ``` 24 | ## Support data format 25 | Result format follows COCO-VID format. We describe the format in detail [here](./docs/TAO-format.txt) 26 | 27 | ## How to Run 28 | Run on TAO. 29 | ```angular2html 30 | python scripts/run_tao.py --METRICS TETA --TRACKERS_TO_EVAL TETer --GT_FOLDER ${GT_JSON_PATH}.json --TRACKER_SUB_FOLDER ${RESULT_JSON_PATH}.json 31 | ``` 32 | Run on BDD100K. 33 | ```angular2html 34 | python scripts/run_coco.py --METRICS TETA --TRACKERS_TO_EVAL TETer --GT_FOLDER ${GT_JSON_PATH}.json --TRACKER_SUB_FOLDER ${RESULT_JSON_PATH}.json 35 | ``` 36 | 37 | ## Citation 38 | 39 | ``` 40 | @InProceedings{trackeverything, 41 | title = {Tracking Every Thing in the Wild}, 42 | author = {Li, Siyuan and Danelljan, Martin and Ding, Henghui and Huang, Thomas E. and Yu, Fisher}, 43 | booktitle = {Proceedings of the European Conference on Computer Vision (ECCV)}, 44 | month = {Oct}, 45 | year = {2022} 46 | } 47 | ``` -------------------------------------------------------------------------------- /teta/docs/TAO-format.txt: -------------------------------------------------------------------------------- 1 | Taken from: https://github.com/TAO-Dataset/tao/blob/master/tao/toolkit/tao/tao.py 2 | 3 | Annotation file format: 4 | { 5 | "info" : info, 6 | "images" : [image], 7 | "videos": [video], 8 | "tracks": [track], 9 | "annotations" : [annotation], 10 | "categories": [category], 11 | "licenses" : [license], 12 | } 13 | info: As in MS COCO 14 | image: { 15 | "id" : int, 16 | "video_id": int, 17 | "file_name" : str, 18 | "license" : int, 19 | # Redundant fields for COCO-compatibility 20 | "width": int, 21 | "height": int, 22 | "frame_index": int 23 | } 24 | video: { 25 | "id": int, 26 | "name": str, 27 | "width" : int, 28 | "height" : int, 29 | "neg_category_ids": [int], 30 | "not_exhaustive_category_ids": [int], 31 | "metadata": dict, # Metadata about the video 32 | } 33 | track: { 34 | "id": int, 35 | "category_id": int, 36 | "video_id": int 37 | } 38 | category: { 39 | "id": int, 40 | "name": str, 41 | "synset": str, # For non-LVIS objects, this is "unknown" 42 | ... [other fields copied from LVIS v0.5 and unused] 43 | } 44 | annotation: { 45 | "image_id": int, 46 | "track_id": int, 47 | "bbox": [x,y,width,height], 48 | "area": float, 49 | # Redundant field for compatibility with COCO scripts 50 | "category_id": int 51 | } 52 | license: { 53 | "id" : int, 54 | "name" : str, 55 | "url" : str, 56 | } 57 | 58 | Prediction format: 59 | 60 | [{ 61 | "image_id" : int, 62 | "category_id" : int, 63 | "bbox" : [x,y,width,height], 64 | "score" : float, 65 | "track_id": int, 66 | "video_id": int 67 | }] -------------------------------------------------------------------------------- /teta/figures/figure_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/tet/a62a9c0affec3a97f2cd0263141c53bcfb9c79f7/teta/figures/figure_1.png -------------------------------------------------------------------------------- /teta/figures/teta-teaser.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SysCV/tet/a62a9c0affec3a97f2cd0263141c53bcfb9c79f7/teta/figures/teta-teaser.png -------------------------------------------------------------------------------- /teta/requirements.txt: -------------------------------------------------------------------------------- 1 | scipy 2 | numpy 3 | -------------------------------------------------------------------------------- /teta/scripts/run_coco.py: -------------------------------------------------------------------------------- 1 | """ evaluate.py 2 | 3 | Run example: 4 | evaluate.py --USE_PARALLEL False --METRICS TETA --TRACKERS_TO_EVAL qdtrack 5 | 6 | Command Line Arguments: Defaults, # Comments 7 | Eval arguments: 8 | 'USE_PARALLEL': False, 9 | 'NUM_PARALLEL_CORES': 8, 10 | 'BREAK_ON_ERROR': True, # Raises exception and exits with error 11 | 'RETURN_ON_ERROR': False, # if not BREAK_ON_ERROR, then returns from function on error 12 | 'LOG_ON_ERROR': os.path.join(code_path, 'error_log.txt'), # if not None, save any errors into a log file. 13 | 'PRINT_RESULTS': True, 14 | 'PRINT_ONLY_COMBINED': False, 15 | 'PRINT_CONFIG': True, 16 | 'TIME_PROGRESS': True, 17 | 'DISPLAY_LESS_PROGRESS': True, 18 | 'OUTPUT_SUMMARY': True, 19 | 'OUTPUT_EMPTY_CLASSES': True, # If False, summary files are not output for classes with no detections 20 | 'OUTPUT_TEM_RAW_DATA': True, 21 | Dataset arguments: 22 | 'GT_FOLDER': os.path.join(code_path, 'data/gt/tao/tao_training'), # Location of GT data 23 | 'TRACKERS_FOLDER': os.path.join(code_path, 'data/trackers/tao/tao_training'), # Trackers location 24 | 'OUTPUT_FOLDER': None, # Where to save eval results (if None, same as TRACKERS_FOLDER) 25 | 'TRACKERS_TO_EVAL': None, # Filenames of trackers to eval (if None, all in folder) 26 | 'CLASSES_TO_EVAL': None, # Classes to eval (if None, all classes) 27 | 'SPLIT_TO_EVAL': 'training', # Valid: 'training', 'val' 28 | 'PRINT_CONFIG': True, # Whether to print current config 29 | 'TRACKER_SUB_FOLDER': 'data', # Tracker files are in TRACKER_FOLDER/tracker_name/TRACKER_SUB_FOLDER 30 | 'OUTPUT_SUB_FOLDER': '', # Output files are saved in OUTPUT_FOLDER/tracker_name/OUTPUT_SUB_FOLDER 31 | 'TRACKER_DISPLAY_NAMES': None, # Names of trackers to display, if None: TRACKERS_TO_EVAL 32 | 'MAX_DETECTIONS': 300, # Number of maximal allowed detections per image (0 for unlimited) 33 | Metric arguments: 34 | 'METRICS': ['HOTA', 'CLEAR', 'Identity', 'TrackMAP'] 35 | """ 36 | 37 | import sys 38 | import os 39 | import argparse 40 | from multiprocessing import freeze_support 41 | 42 | from teta.config import parse_configs 43 | from teta.datasets import COCO 44 | from teta.eval import Evaluator 45 | from teta.metrics import TETA 46 | 47 | 48 | def evaluate(): 49 | """Evaluate with TETA.""" 50 | eval_config, dataset_config, metrics_config = parse_configs() 51 | evaluator = Evaluator(eval_config) 52 | dataset_list = [COCO(dataset_config)] 53 | metrics_list = [] 54 | metric = TETA(exhaustive=True) 55 | if metric.get_name() in metrics_config["METRICS"]: 56 | metrics_list.append(metric) 57 | if len(metrics_list) == 0: 58 | raise Exception("No metrics selected for evaluation") 59 | evaluator.evaluate(dataset_list, metrics_list) 60 | 61 | 62 | if __name__ == "__main__": 63 | freeze_support() 64 | evaluate() 65 | -------------------------------------------------------------------------------- /teta/scripts/run_tao.py: -------------------------------------------------------------------------------- 1 | """ evaluate.py 2 | 3 | Run example: 4 | evaluate.py --USE_PARALLEL False --METRICS TETA --TRACKERS_TO_EVAL qdtrack 5 | 6 | Command Line Arguments: Defaults, # Comments 7 | Eval arguments: 8 | 'USE_PARALLEL': False, 9 | 'NUM_PARALLEL_CORES': 8, 10 | 'BREAK_ON_ERROR': True, # Raises exception and exits with error 11 | 'RETURN_ON_ERROR': False, # if not BREAK_ON_ERROR, then returns from function on error 12 | 'LOG_ON_ERROR': os.path.join(code_path, 'error_log.txt'), # if not None, save any errors into a log file. 13 | 'PRINT_RESULTS': True, 14 | 'PRINT_ONLY_COMBINED': False, 15 | 'PRINT_CONFIG': True, 16 | 'TIME_PROGRESS': True, 17 | 'DISPLAY_LESS_PROGRESS': True, 18 | 'OUTPUT_SUMMARY': True, 19 | 'OUTPUT_EMPTY_CLASSES': True, # If False, summary files are not output for classes with no detections 20 | 'OUTPUT_TEM_RAW_DATA': True, 21 | Dataset arguments: 22 | 'GT_FOLDER': os.path.join(code_path, 'data/gt/tao/tao_training'), # Location of GT data 23 | 'TRACKERS_FOLDER': os.path.join(code_path, 'data/trackers/tao/tao_training'), # Trackers location 24 | 'OUTPUT_FOLDER': None, # Where to save eval results (if None, same as TRACKERS_FOLDER) 25 | 'TRACKERS_TO_EVAL': None, # Filenames of trackers to eval (if None, all in folder) 26 | 'CLASSES_TO_EVAL': None, # Classes to eval (if None, all classes) 27 | 'SPLIT_TO_EVAL': 'training', # Valid: 'training', 'val' 28 | 'PRINT_CONFIG': True, # Whether to print current config 29 | 'TRACKER_SUB_FOLDER': 'data', # Tracker files are in TRACKER_FOLDER/tracker_name/TRACKER_SUB_FOLDER 30 | 'OUTPUT_SUB_FOLDER': '', # Output files are saved in OUTPUT_FOLDER/tracker_name/OUTPUT_SUB_FOLDER 31 | 'TRACKER_DISPLAY_NAMES': None, # Names of trackers to display, if None: TRACKERS_TO_EVAL 32 | 'MAX_DETECTIONS': 300, # Number of maximal allowed detections per image (0 for unlimited) 33 | Metric arguments: 34 | 'METRICS': ['HOTA', 'CLEAR', 'Identity', 'TrackMAP'] 35 | """ 36 | 37 | import sys 38 | import os 39 | import argparse 40 | from multiprocessing import freeze_support 41 | 42 | from teta.config import parse_configs 43 | from teta.datasets import TAO 44 | from teta.eval import Evaluator 45 | from teta.metrics import TETA 46 | 47 | 48 | def evaluate(): 49 | """Evaluate with TETA.""" 50 | eval_config, dataset_config, metrics_config = parse_configs() 51 | evaluator = Evaluator(eval_config) 52 | dataset_list = [TAO(dataset_config)] 53 | metrics_list = [] 54 | metric = TETA(exhaustive=False) 55 | if metric.get_name() in metrics_config["METRICS"]: 56 | metrics_list.append(metric) 57 | if len(metrics_list) == 0: 58 | raise Exception("No metrics selected for evaluation") 59 | evaluator.evaluate(dataset_list, metrics_list) 60 | 61 | 62 | if __name__ == "__main__": 63 | freeze_support() 64 | evaluate() 65 | -------------------------------------------------------------------------------- /teta/setup.py: -------------------------------------------------------------------------------- 1 | import io 2 | import os 3 | import sys 4 | from shutil import rmtree 5 | 6 | from setuptools import find_packages, setup, Command 7 | 8 | # Package meta-data. 9 | NAME = 'teta' 10 | DESCRIPTION = 'Track Every Thing Accuracy (TETA metric)' 11 | EMAIL = 'siyuan.li@vision.ee.ethz.ch' 12 | AUTHOR = 'Siyuan Li' 13 | REQUIRES_PYTHON = '>=3.6.0' 14 | VERSION = '0.1.0' 15 | 16 | # What packages are required for this module to be executed? 17 | REQUIRED = [ 18 | 'script_utils @ git+https://github.com/achalddave/python-script-utils.git@v0.0.2#egg=script_utils', 19 | 'numpy', 'scipy' 20 | ] 21 | 22 | # What packages are optional? 23 | EXTRAS = { 24 | } 25 | 26 | here = os.path.abspath(os.path.dirname(__file__)) 27 | 28 | # Import the README and use it as the long-description. 29 | # Note: this will only work if 'README.md' is present in your MANIFEST.in file! 30 | try: 31 | with io.open(os.path.join(here, 'README.md'), encoding='utf-8') as f: 32 | long_description = '\n' + f.read() 33 | except FileNotFoundError: 34 | long_description = DESCRIPTION 35 | 36 | # Load the package's __version__.py module as a dictionary. 37 | about = {} 38 | if not VERSION: 39 | project_slug = NAME.lower().replace("-", "_").replace(" ", "_") 40 | with open(os.path.join(here, project_slug, '__version__.py')) as f: 41 | exec(f.read(), about) 42 | else: 43 | about['__version__'] = VERSION 44 | 45 | 46 | class UploadCommand(Command): 47 | """Support setup.py upload.""" 48 | 49 | description = 'Build and publish the package.' 50 | user_options = [] 51 | 52 | @staticmethod 53 | def status(s): 54 | """Prints things in bold.""" 55 | print('\033[1m{0}\033[0m'.format(s)) 56 | 57 | def initialize_options(self): 58 | pass 59 | 60 | def finalize_options(self): 61 | pass 62 | 63 | def run(self): 64 | try: 65 | self.status('Removing previous builds…') 66 | rmtree(os.path.join(here, 'dist')) 67 | except OSError: 68 | pass 69 | 70 | self.status('Building Source and Wheel (universal) distribution…') 71 | os.system('{0} setup.py sdist bdist_wheel --universal'.format(sys.executable)) 72 | 73 | self.status('Uploading the package to PyPI via Twine…') 74 | os.system('twine upload dist/*') 75 | 76 | self.status('Pushing git tags…') 77 | os.system('git tag v{0}'.format(about['__version__'])) 78 | os.system('git push --tags') 79 | 80 | sys.exit() 81 | 82 | 83 | # Where the magic happens: 84 | setup( 85 | name=NAME, 86 | version=about['__version__'], 87 | description=DESCRIPTION, 88 | long_description=long_description, 89 | long_description_content_type='text/markdown', 90 | author=AUTHOR, 91 | author_email=EMAIL, 92 | python_requires=REQUIRES_PYTHON, 93 | # url=URL, 94 | packages=find_packages(exclude=["tests", "*.tests", "*.tests.*", "tests.*"]), 95 | # If your package is a single module, use this instead of 'packages': 96 | # py_modules=['tao'], 97 | 98 | # entry_points={ 99 | # 'console_scripts': ['mycli=mymodule:cli'], 100 | # }, 101 | install_requires=REQUIRED, 102 | extras_require=EXTRAS, 103 | include_package_data=True, 104 | license='MIT', 105 | classifiers=[ 106 | # Trove classifiers 107 | # Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers 108 | 'License :: OSI Approved :: MIT License', 109 | 'Programming Language :: Python', 110 | 'Programming Language :: Python :: 3', 111 | 'Programming Language :: Python :: 3.6', 112 | 'Programming Language :: Python :: Implementation :: CPython', 113 | 'Programming Language :: Python :: Implementation :: PyPy' 114 | ], 115 | # $ setup.py publish support. 116 | cmdclass={ 117 | 'upload': UploadCommand, 118 | }, 119 | ) -------------------------------------------------------------------------------- /teta/teta/__init__.py: -------------------------------------------------------------------------------- 1 | from . import config, datasets, metrics, utils 2 | from .eval import Evaluator 3 | -------------------------------------------------------------------------------- /teta/teta/_timing.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | from functools import wraps 3 | from time import perf_counter 4 | 5 | DO_TIMING = False 6 | DISPLAY_LESS_PROGRESS = False 7 | timer_dict = {} 8 | counter = 0 9 | 10 | 11 | def time(f): 12 | @wraps(f) 13 | def wrap(*args, **kw): 14 | if DO_TIMING: 15 | # Run function with timing 16 | ts = perf_counter() 17 | result = f(*args, **kw) 18 | te = perf_counter() 19 | tt = te - ts 20 | 21 | # Get function name 22 | arg_names = inspect.getfullargspec(f)[0] 23 | if arg_names[0] == "self" and DISPLAY_LESS_PROGRESS: 24 | return result 25 | elif arg_names[0] == "self": 26 | method_name = type(args[0]).__name__ + "." + f.__name__ 27 | else: 28 | method_name = f.__name__ 29 | 30 | # Record accumulative time in each function for analysis 31 | if method_name in timer_dict.keys(): 32 | timer_dict[method_name] += tt 33 | else: 34 | timer_dict[method_name] = tt 35 | 36 | # If code is finished, display timing summary 37 | if method_name == "Evaluator.evaluate": 38 | print("") 39 | print("Timing analysis:") 40 | for key, value in timer_dict.items(): 41 | print("%-70s %2.4f sec" % (key, value)) 42 | else: 43 | # Get function argument values for printing special arguments of interest 44 | arg_titles = ["tracker", "seq", "cls"] 45 | arg_vals = [] 46 | for i, a in enumerate(arg_names): 47 | if a in arg_titles: 48 | arg_vals.append(args[i]) 49 | arg_text = "(" + ", ".join(arg_vals) + ")" 50 | 51 | # Display methods and functions with different indentation. 52 | if arg_names[0] == "self": 53 | print("%-74s %2.4f sec" % (" " * 4 + method_name + arg_text, tt)) 54 | elif arg_names[0] == "test": 55 | pass 56 | else: 57 | global counter 58 | counter += 1 59 | print("%i %-70s %2.4f sec" % (counter, method_name + arg_text, tt)) 60 | 61 | return result 62 | else: 63 | # If config["TIME_PROGRESS"] is false, or config["USE_PARALLEL"] is true, run functions normally without timing. 64 | return f(*args, **kw) 65 | 66 | return wrap 67 | -------------------------------------------------------------------------------- /teta/teta/config.py: -------------------------------------------------------------------------------- 1 | """Config.""" 2 | import argparse 3 | import os 4 | 5 | 6 | def parse_configs(): 7 | """Parse command line.""" 8 | default_eval_config = get_default_eval_config() 9 | default_eval_config["DISPLAY_LESS_PROGRESS"] = True 10 | default_dataset_config = get_default_dataset_config() 11 | default_metrics_config = {"METRICS": ["TETA"]} 12 | config = { 13 | **default_eval_config, 14 | **default_dataset_config, 15 | **default_metrics_config, 16 | } 17 | parser = argparse.ArgumentParser() 18 | for setting in config.keys(): 19 | if type(config[setting]) == list or type(config[setting]) == type(None): 20 | parser.add_argument("--" + setting, nargs="+") 21 | else: 22 | parser.add_argument("--" + setting) 23 | args = parser.parse_args().__dict__ 24 | for setting in args.keys(): 25 | if args[setting] is not None: 26 | if type(config[setting]) == type(True): 27 | if args[setting] == "True": 28 | x = True 29 | elif args[setting] == "False": 30 | x = False 31 | else: 32 | raise Exception( 33 | f"Command line parameter {setting} must be True/False" 34 | ) 35 | elif type(config[setting]) == type(1): 36 | x = int(args[setting]) 37 | elif type(args[setting]) == type(None): 38 | x = None 39 | else: 40 | x = args[setting] 41 | config[setting] = x 42 | eval_config = {k: v for k, v in config.items() if k in default_eval_config.keys()} 43 | dataset_config = { 44 | k: v for k, v in config.items() if k in default_dataset_config.keys() 45 | } 46 | metrics_config = { 47 | k: v for k, v in config.items() if k in default_metrics_config.keys() 48 | } 49 | 50 | return eval_config, dataset_config, metrics_config 51 | 52 | 53 | def get_default_eval_config(): 54 | """Returns the default config values for evaluation.""" 55 | code_path = get_code_path() 56 | default_config = { 57 | "USE_PARALLEL": True, 58 | "NUM_PARALLEL_CORES": 8, 59 | "BREAK_ON_ERROR": True, 60 | "RETURN_ON_ERROR": False, 61 | "LOG_ON_ERROR": os.path.join(code_path, "error_log.txt"), 62 | "PRINT_RESULTS": True, 63 | "PRINT_ONLY_COMBINED": True, 64 | "PRINT_CONFIG": True, 65 | "TIME_PROGRESS": True, 66 | "DISPLAY_LESS_PROGRESS": True, 67 | "OUTPUT_SUMMARY": True, 68 | "OUTPUT_EMPTY_CLASSES": True, 69 | "OUTPUT_TEM_RAW_DATA": True, 70 | "OUTPUT_PER_SEQ_RES": True, 71 | } 72 | return default_config 73 | 74 | 75 | def get_default_dataset_config(): 76 | """Default class config values""" 77 | code_path = get_code_path() 78 | default_config = { 79 | "GT_FOLDER": os.path.join( 80 | code_path, "data/gt/tao/tao_training" 81 | ), # Location of GT data 82 | "TRACKERS_FOLDER": os.path.join( 83 | code_path, "data/trackers/tao/tao_training" 84 | ), # Trackers location 85 | "OUTPUT_FOLDER": None, # Where to save eval results (if None, same as TRACKERS_FOLDER) 86 | "TRACKERS_TO_EVAL": ['TETer'], # Filenames of trackers to eval (if None, all in folder) 87 | "CLASSES_TO_EVAL": None, # Classes to eval (if None, all classes) 88 | "SPLIT_TO_EVAL": "training", # Valid: 'training', 'val' 89 | "PRINT_CONFIG": True, # Whether to print current config 90 | "TRACKER_SUB_FOLDER": "data", # Tracker files are in TRACKER_FOLDER/tracker_name/TRACKER_SUB_FOLDER 91 | "OUTPUT_SUB_FOLDER": "", # Output files are saved in OUTPUT_FOLDER/tracker_name/OUTPUT_SUB_FOLDER 92 | "TRACKER_DISPLAY_NAMES": None, # Names of trackers to display, if None: TRACKERS_TO_EVAL 93 | "MAX_DETECTIONS": 0, # Number of maximal allowed detections per image (0 for unlimited) 94 | } 95 | return default_config 96 | 97 | 98 | def init_config(config, default_config, name=None): 99 | """Initialize non-given config values with defaults.""" 100 | if config is None: 101 | config = default_config 102 | else: 103 | for k in default_config.keys(): 104 | if k not in config.keys(): 105 | config[k] = default_config[k] 106 | if name and config["PRINT_CONFIG"]: 107 | print("\n%s Config:" % name) 108 | for c in config.keys(): 109 | print("%-20s : %-30s" % (c, config[c])) 110 | return config 111 | 112 | 113 | def update_config(config): 114 | """ 115 | Parse the arguments of a script and updates the config values for a given value if specified in the arguments. 116 | :param config: the config to update 117 | :return: the updated config 118 | """ 119 | parser = argparse.ArgumentParser() 120 | for setting in config.keys(): 121 | if type(config[setting]) == list or type(config[setting]) == type(None): 122 | parser.add_argument("--" + setting, nargs="+") 123 | else: 124 | parser.add_argument("--" + setting) 125 | args = parser.parse_args().__dict__ 126 | for setting in args.keys(): 127 | if args[setting] is not None: 128 | if type(config[setting]) == type(True): 129 | if args[setting] == "True": 130 | x = True 131 | elif args[setting] == "False": 132 | x = False 133 | else: 134 | raise Exception( 135 | "Command line parameter " + setting + "must be True or False" 136 | ) 137 | elif type(config[setting]) == type(1): 138 | x = int(args[setting]) 139 | elif type(args[setting]) == type(None): 140 | x = None 141 | else: 142 | x = args[setting] 143 | config[setting] = x 144 | return config 145 | 146 | 147 | def get_code_path(): 148 | """Get base path where code is""" 149 | return os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) 150 | -------------------------------------------------------------------------------- /teta/teta/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | """Datasets.""" 2 | from .coco import COCO 3 | from .tao import TAO 4 | from .bdd import BDD 5 | from .coco_mots import COCOMOTS 6 | from .bdd_mots import BDDMOTS -------------------------------------------------------------------------------- /teta/teta/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | from .teta import TETA 2 | -------------------------------------------------------------------------------- /teta/teta/metrics/_base_metric.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | 3 | import numpy as np 4 | 5 | from .. import _timing 6 | from ..utils import TrackEvalException 7 | 8 | 9 | class _BaseMetric(ABC): 10 | @abstractmethod 11 | def __init__(self): 12 | self.plottable = False 13 | self.integer_fields = [] 14 | self.float_fields = [] 15 | self.array_labels = [] 16 | self.integer_array_fields = [] 17 | self.float_array_fields = [] 18 | self.fields = [] 19 | self.summary_fields = [] 20 | self.registered = False 21 | 22 | ##################################################################### 23 | # Abstract functions for subclasses to implement 24 | 25 | @_timing.time 26 | @abstractmethod 27 | def eval_sequence(self, data): 28 | ... 29 | 30 | @abstractmethod 31 | def combine_sequences(self, all_res): 32 | ... 33 | 34 | @abstractmethod 35 | def combine_classes_class_averaged(self, all_res, ignore_empty=False): 36 | ... 37 | 38 | @abstractmethod 39 | def combine_classes_det_averaged(self, all_res): 40 | ... 41 | 42 | def plot_single_tracker_results(self, all_res, tracker, output_folder, cls): 43 | """Plot results, only valid for metrics with self.plottable.""" 44 | if self.plottable: 45 | raise NotImplementedError( 46 | f"plot_results is not implemented for metric {self.get_name()}" 47 | ) 48 | else: 49 | pass 50 | 51 | ##################################################################### 52 | # Helper functions which are useful for all metrics: 53 | 54 | @classmethod 55 | def get_name(cls): 56 | return cls.__name__ 57 | 58 | @staticmethod 59 | def _combine_sum(all_res, field): 60 | """Combine sequence results via sum""" 61 | return sum([all_res[k][field] for k in all_res.keys()]) 62 | 63 | @staticmethod 64 | def _combine_weighted_av(all_res, field, comb_res, weight_field): 65 | """Combine sequence results via weighted average.""" 66 | return sum( 67 | [all_res[k][field] * all_res[k][weight_field] for k in all_res.keys()] 68 | ) / np.maximum(1.0, comb_res[weight_field]) 69 | 70 | def print_table(self, table_res, tracker, cls): 71 | """Print table of results for all sequences.""" 72 | print("") 73 | metric_name = self.get_name() 74 | self._row_print( 75 | [metric_name + ": " + tracker + "-" + cls] + self.summary_fields 76 | ) 77 | for seq, results in sorted(table_res.items()): 78 | if seq == "COMBINED_SEQ": 79 | continue 80 | summary_res = self._summary_row(results) 81 | self._row_print([seq] + summary_res) 82 | summary_res = self._summary_row(table_res["COMBINED_SEQ"]) 83 | self._row_print(["COMBINED"] + summary_res) 84 | 85 | def _summary_row(self, results_): 86 | vals = [] 87 | for h in self.summary_fields: 88 | if h in self.float_array_fields: 89 | vals.append("{0:1.5g}".format(100 * np.mean(results_[h]))) 90 | elif h in self.float_fields: 91 | vals.append("{0:1.5g}".format(100 * float(results_[h]))) 92 | elif h in self.integer_fields: 93 | vals.append("{0:d}".format(int(results_[h]))) 94 | else: 95 | raise NotImplementedError( 96 | "Summary function not implemented for this field type." 97 | ) 98 | return vals 99 | 100 | @staticmethod 101 | def _row_print(*argv): 102 | """Print results in evenly spaced rows, with more space in first row.""" 103 | if len(argv) == 1: 104 | argv = argv[0] 105 | to_print = "%-35s" % argv[0] 106 | for v in argv[1:]: 107 | to_print += "%-10s" % str(v) 108 | print(to_print) 109 | 110 | def summary_results(self, table_res): 111 | """Return a simple summary of final results for a tracker.""" 112 | return dict( 113 | zip(self.summary_fields, self._summary_row(table_res["COMBINED_SEQ"]),) 114 | ) 115 | 116 | def detailed_results(self, table_res): 117 | """Return detailed final results for a tracker.""" 118 | # Get detailed field information 119 | detailed_fields = self.float_fields + self.integer_fields 120 | for h in self.float_array_fields + self.integer_array_fields: 121 | for alpha in [int(100 * x) for x in self.array_labels]: 122 | detailed_fields.append(h + "___" + str(alpha)) 123 | detailed_fields.append(h + "___AUC") 124 | 125 | # Get detailed results 126 | detailed_results = {} 127 | for seq, res in table_res.items(): 128 | detailed_row = self._detailed_row(res) 129 | if len(detailed_row) != len(detailed_fields): 130 | raise TrackEvalException( 131 | f"Field names and data have different sizes " 132 | f"({len(detailed_row)} and {len(detailed_fields)})" 133 | ) 134 | detailed_results[seq] = dict(zip(detailed_fields, detailed_row)) 135 | return detailed_results 136 | 137 | def _detailed_row(self, res): 138 | detailed_row = [] 139 | for h in self.float_fields + self.integer_fields: 140 | detailed_row.append(res[h]) 141 | for h in self.float_array_fields + self.integer_array_fields: 142 | for i, _ in enumerate([int(100 * x) for x in self.array_labels]): 143 | detailed_row.append(res[h][i]) 144 | detailed_row.append(np.mean(res[h])) 145 | return detailed_row 146 | -------------------------------------------------------------------------------- /teta/teta/utils.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import os 3 | from collections import OrderedDict 4 | 5 | 6 | def validate_metrics_list(metrics_list): 7 | """Get names of metric class and ensures they are unique, further checks that the fields within each metric class 8 | do not have overlapping names. 9 | """ 10 | metric_names = [metric.get_name() for metric in metrics_list] 11 | # check metric names are unique 12 | if len(metric_names) != len(set(metric_names)): 13 | raise TrackEvalException( 14 | "Code being run with multiple metrics of the same name" 15 | ) 16 | fields = [] 17 | for m in metrics_list: 18 | fields += m.fields 19 | # check metric fields are unique 20 | if len(fields) != len(set(fields)): 21 | raise TrackEvalException( 22 | "Code being run with multiple metrics with fields of the same name" 23 | ) 24 | return metric_names 25 | 26 | 27 | def get_track_id_str(ann): 28 | """Get name of track ID in annotation.""" 29 | if "track_id" in ann: 30 | tk_str = "track_id" 31 | elif "instance_id" in ann: 32 | tk_str = "instance_id" 33 | elif "scalabel_id" in ann: 34 | tk_str = "scalabel_id" 35 | else: 36 | assert False, "No track/instance ID." 37 | return tk_str 38 | 39 | 40 | class TrackEvalException(Exception): 41 | """Custom exception for catching expected errors.""" 42 | 43 | ... 44 | -------------------------------------------------------------------------------- /teter/VERSION: -------------------------------------------------------------------------------- 1 | 0.1.0 2 | -------------------------------------------------------------------------------- /teter/__init__.py: -------------------------------------------------------------------------------- 1 | from .version import __version__, short_version 2 | 3 | __all__ = ["__version__", "short_version"] 4 | -------------------------------------------------------------------------------- /teter/apis/__init__.py: -------------------------------------------------------------------------------- 1 | from .inference import inference_model, init_model 2 | from .test import multi_gpu_test, single_gpu_test 3 | from .train import train_model 4 | 5 | __all__ = [ 6 | "init_model", 7 | "inference_model", 8 | "multi_gpu_test", 9 | "single_gpu_test", 10 | "train_model", 11 | ] 12 | -------------------------------------------------------------------------------- /teter/apis/inference.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | import numpy as np 3 | import torch 4 | import warnings 5 | from mmcv.ops import RoIPool 6 | from mmcv.parallel import collate, scatter 7 | from mmcv.runner import load_checkpoint 8 | from mmdet.core import get_classes 9 | from mmdet.datasets import replace_ImageToTensor 10 | from mmdet.datasets.pipelines import Compose 11 | 12 | from teter.models import build_model 13 | 14 | 15 | def init_model(config, checkpoint=None, device="cuda:0", cfg_options=None): 16 | """Initialize a detector from config file. 17 | 18 | Args: 19 | config (str or :obj:`mmcv.Config`): Config file path or the config 20 | object. 21 | checkpoint (str, optional): Checkpoint path. If left as None, the model 22 | will not load any weights. 23 | cfg_options (dict): Options to override some settings in the used 24 | config. 25 | 26 | Returns: 27 | nn.Module: The constructed detector. 28 | """ 29 | if isinstance(config, str): 30 | config = mmcv.Config.fromfile(config) 31 | elif not isinstance(config, mmcv.Config): 32 | raise TypeError( 33 | "config must be a filename or Config object, " f"but got {type(config)}" 34 | ) 35 | if cfg_options is not None: 36 | config.merge_from_dict(cfg_options) 37 | config.model.pretrained = None 38 | config.model.train_cfg = None 39 | model = build_model(config.model, test_cfg=config.get("test_cfg")) 40 | if checkpoint is not None: 41 | map_loc = "cpu" if device == "cpu" else None 42 | checkpoint = load_checkpoint(model, checkpoint, map_location=map_loc) 43 | if "CLASSES" in checkpoint["meta"]: 44 | model.CLASSES = checkpoint["meta"]["CLASSES"] 45 | else: 46 | warnings.simplefilter("once") 47 | warnings.warn( 48 | "Class names are not saved in the checkpoint's " 49 | "meta data, use COCO classes by default." 50 | ) 51 | model.CLASSES = get_classes("coco") 52 | model.cfg = config # save the config in the model for convenience 53 | model.to(device) 54 | model.eval() 55 | return model 56 | 57 | 58 | def inference_model(model, imgs, frame_id): 59 | if isinstance(imgs, (list, tuple)): 60 | is_batch = True 61 | else: 62 | imgs = [imgs] 63 | is_batch = False 64 | 65 | cfg = model.cfg 66 | device = next(model.parameters()).device # model device 67 | 68 | if isinstance(imgs[0], np.ndarray): 69 | cfg = cfg.copy() 70 | # set loading pipeline type 71 | cfg.data.test.pipeline[0].type = "LoadImageFromWebcam" 72 | 73 | cfg.data.test.pipeline = replace_ImageToTensor(cfg.data.test.pipeline) 74 | test_pipeline = Compose(cfg.data.test.pipeline) 75 | 76 | datas = [] 77 | for img in imgs: 78 | # prepare data 79 | if isinstance(img, np.ndarray): 80 | # directly add img 81 | data = dict(img=img, frame_id=frame_id) 82 | else: 83 | # add information into dict 84 | data = dict(img_info=dict(filename=img, frame_id=frame_id), img_prefix=None) 85 | # build the data pipeline 86 | 87 | data = test_pipeline(data) 88 | datas.append(data) 89 | 90 | data = collate(datas, samples_per_gpu=len(imgs)) 91 | # just get the actual data from DataContainer 92 | data["img_metas"] = [img_metas.data[0] for img_metas in data["img_metas"]] 93 | data["img"] = [img.data[0] for img in data["img"]] 94 | if next(model.parameters()).is_cuda: 95 | # scatter to specified GPU 96 | data = scatter(data, [device])[0] 97 | else: 98 | for m in model.modules(): 99 | assert not isinstance( 100 | m, RoIPool 101 | ), "CPU inference with RoIPool is not supported currently." 102 | 103 | # forward the model 104 | with torch.no_grad(): 105 | results = model(return_loss=False, rescale=True, detection_only=True, **data) 106 | 107 | if not is_batch: 108 | return results[0] 109 | else: 110 | return results 111 | 112 | 113 | def show_result_pyplot( 114 | model, 115 | img, 116 | result, 117 | score_thr=0.3, 118 | fig_size=(15, 10), 119 | title="result", 120 | block=True, 121 | wait_time=0, 122 | ): 123 | """Visualize the detection results on the image. 124 | 125 | Args: 126 | model (nn.Module): The loaded detector. 127 | img (str or np.ndarray): Image filename or loaded image. 128 | result (tuple[list] or list): The detection result, can be either 129 | (bbox, segm) or just bbox. 130 | score_thr (float): The threshold to visualize the bboxes and masks. 131 | fig_size (tuple): Figure size of the pyplot figure. 132 | title (str): Title of the pyplot figure. 133 | block (bool): Whether to block GUI. Default: True 134 | wait_time (float): Value of waitKey param. 135 | Default: 0. 136 | """ 137 | warnings.warn('"block" will be deprecated in v2.9.0,' 'Please use "wait_time"') 138 | warnings.warn('"fig_size" are deprecated and takes no effect.') 139 | if hasattr(model, "module"): 140 | model = model.module 141 | model.show_result( 142 | img, 143 | result, 144 | score_thr=score_thr, 145 | show=True, 146 | wait_time=wait_time, 147 | win_name=title, 148 | bbox_color=(72, 101, 241), 149 | text_color=(72, 101, 241), 150 | ) 151 | -------------------------------------------------------------------------------- /teter/apis/test.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | import os.path as osp 3 | import shutil 4 | import tempfile 5 | import time 6 | import torch 7 | import torch.distributed as dist 8 | from collections import defaultdict 9 | from mmcv.runner import get_dist_info 10 | 11 | 12 | def single_gpu_test(model, data_loader, show=False, out_dir=None, show_score_thr=0.3): 13 | model.eval() 14 | results = defaultdict(list) 15 | dataset = data_loader.dataset 16 | prog_bar = mmcv.ProgressBar(len(dataset)) 17 | for i, data in enumerate(data_loader): 18 | with torch.no_grad(): 19 | result = model(return_loss=False, rescale=True, **data) 20 | for k, v in result.items(): 21 | results[k].append(v) 22 | 23 | if show or out_dir: 24 | pass # TODO 25 | 26 | batch_size = data["img"][0].size(0) 27 | for _ in range(batch_size): 28 | prog_bar.update() 29 | return results 30 | 31 | 32 | def multi_gpu_test(model, data_loader, tmpdir=None, gpu_collect=False): 33 | """Test model with multiple gpus. 34 | 35 | This method tests model with multiple gpus and collects the results 36 | under two different modes: gpu and cpu modes. By setting 'gpu_collect=True' 37 | it encodes results to gpu tensors and use gpu communication for results 38 | collection. On cpu mode it saves the results on different gpus to 'tmpdir' 39 | and collects them by the rank 0 worker. 40 | 41 | Args: 42 | model (nn.Module): Model to be tested. 43 | data_loader (nn.Dataloader): Pytorch data loader. 44 | tmpdir (str): Path of directory to save the temporary results from 45 | different gpus under cpu mode. 46 | gpu_collect (bool): Option to use either gpu or cpu to collect results. 47 | 48 | Returns: 49 | list: The prediction results. 50 | """ 51 | model.eval() 52 | results = defaultdict(list) 53 | dataset = data_loader.dataset 54 | rank, world_size = get_dist_info() 55 | if rank == 0: 56 | prog_bar = mmcv.ProgressBar(len(dataset)) 57 | time.sleep(2) # This line can prevent deadlock problem in some cases. 58 | for i, data in enumerate(data_loader): 59 | with torch.no_grad(): 60 | result = model(return_loss=False, rescale=True, **data) 61 | for k, v in result.items(): 62 | results[k].append(v) 63 | 64 | if rank == 0: 65 | batch_size = ( 66 | len(data["img_meta"]._data) 67 | if "img_meta" in data 68 | else data["img"][0].size(0) 69 | ) 70 | for _ in range(batch_size * world_size): 71 | prog_bar.update() 72 | 73 | # collect results from all ranks 74 | if gpu_collect: 75 | raise NotImplementedError 76 | else: 77 | results = collect_results_cpu(results, len(dataset), tmpdir) 78 | return results 79 | 80 | 81 | def collect_results_cpu(result_part, size, tmpdir=None): 82 | rank, world_size = get_dist_info() 83 | # create a tmp dir if it is not specified 84 | if tmpdir is None: 85 | MAX_LEN = 512 86 | # 32 is whitespace 87 | dir_tensor = torch.full((MAX_LEN,), 32, dtype=torch.uint8, device="cuda") 88 | if rank == 0: 89 | tmpdir = tempfile.mkdtemp() 90 | tmpdir = torch.tensor( 91 | bytearray(tmpdir.encode()), dtype=torch.uint8, device="cuda" 92 | ) 93 | dir_tensor[: len(tmpdir)] = tmpdir 94 | dist.broadcast(dir_tensor, 0) 95 | tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip() 96 | else: 97 | mmcv.mkdir_or_exist(tmpdir) 98 | # dump the part result to the dir 99 | mmcv.dump(result_part, osp.join(tmpdir, f"part_{rank}.pkl")) 100 | dist.barrier() 101 | # collect all parts 102 | if rank != 0: 103 | return None 104 | else: 105 | # load results of all parts from tmp dir 106 | part_list = defaultdict(list) 107 | for i in range(world_size): 108 | part_file = osp.join(tmpdir, f"part_{i}.pkl") 109 | part_file = mmcv.load(part_file) 110 | for k, v in part_file.items(): 111 | part_list[k].extend(v) 112 | shutil.rmtree(tmpdir) 113 | return part_list 114 | -------------------------------------------------------------------------------- /teter/apis/train.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from mmcv.parallel import MMDataParallel, MMDistributedDataParallel 3 | from mmcv.runner import (HOOKS, DistSamplerSeedHook, EpochBasedRunner, 4 | Fp16OptimizerHook, OptimizerHook, build_optimizer) 5 | from mmcv.utils import build_from_cfg 6 | # from mmdet.core import Fp16OptimizerHook 7 | from mmdet.datasets import build_dataset 8 | 9 | from teter.core import DistEvalHook, EvalHook 10 | from teter.datasets import build_dataloader 11 | from teter.utils import get_root_logger 12 | 13 | 14 | def train_model( 15 | model, dataset, cfg, distributed=False, validate=False, timestamp=None, meta=None 16 | ): 17 | logger = get_root_logger(cfg.log_level) 18 | 19 | # prepare data loaders 20 | dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset] 21 | if "imgs_per_gpu" in cfg.data: 22 | logger.warning( 23 | '"imgs_per_gpu" is deprecated in MMDet V2.0. ' 24 | 'Please use "samples_per_gpu" instead' 25 | ) 26 | if "samples_per_gpu" in cfg.data: 27 | logger.warning( 28 | f'Got "imgs_per_gpu"={cfg.data.imgs_per_gpu} and ' 29 | f'"samples_per_gpu"={cfg.data.samples_per_gpu}, "imgs_per_gpu"' 30 | f"={cfg.data.imgs_per_gpu} is used in this experiments" 31 | ) 32 | else: 33 | logger.warning( 34 | 'Automatically set "samples_per_gpu"="imgs_per_gpu"=' 35 | f"{cfg.data.imgs_per_gpu} in this experiments" 36 | ) 37 | cfg.data.samples_per_gpu = cfg.data.imgs_per_gpu 38 | 39 | data_loaders = [ 40 | build_dataloader( 41 | ds, 42 | cfg.data.samples_per_gpu, 43 | cfg.data.workers_per_gpu, 44 | # cfg.gpus will be ignored if distributed 45 | len(cfg.gpu_ids), 46 | dist=distributed, 47 | seed=cfg.seed, 48 | ) 49 | for ds in dataset 50 | ] 51 | 52 | # put model on gpus 53 | if distributed: 54 | find_unused_parameters = cfg.get("find_unused_parameters", False) 55 | # Sets the `find_unused_parameters` parameter in 56 | # torch.nn.parallel.DistributedDataParallel 57 | model = MMDistributedDataParallel( 58 | model.cuda(), 59 | device_ids=[torch.cuda.current_device()], 60 | broadcast_buffers=False, 61 | find_unused_parameters=find_unused_parameters, 62 | ) 63 | else: 64 | model = MMDataParallel(model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids) 65 | 66 | # build runner 67 | optimizer = build_optimizer(model, cfg.optimizer) 68 | runner = EpochBasedRunner( 69 | model, optimizer=optimizer, work_dir=cfg.work_dir, logger=logger, meta=meta 70 | ) 71 | # an ugly workaround to make .log and .log.json filenames the same 72 | runner.timestamp = timestamp 73 | 74 | # fp16 setting 75 | fp16_cfg = cfg.get("fp16", None) 76 | if fp16_cfg is not None: 77 | optimizer_config = Fp16OptimizerHook( 78 | **cfg.optimizer_config, **fp16_cfg, distributed=distributed 79 | ) 80 | elif distributed and "type" not in cfg.optimizer_config: 81 | optimizer_config = OptimizerHook(**cfg.optimizer_config) 82 | else: 83 | optimizer_config = cfg.optimizer_config 84 | 85 | # register hooks 86 | runner.register_training_hooks( 87 | cfg.lr_config, 88 | optimizer_config, 89 | cfg.checkpoint_config, 90 | cfg.log_config, 91 | cfg.get("momentum_config", None), 92 | ) 93 | if distributed: 94 | runner.register_hook(DistSamplerSeedHook()) 95 | 96 | # register eval hooks 97 | if validate: 98 | val_dataset = build_dataset(cfg.data.val, dict(test_mode=True)) 99 | val_dataloader = build_dataloader( 100 | val_dataset, 101 | samples_per_gpu=1, 102 | workers_per_gpu=cfg.data.workers_per_gpu, 103 | dist=distributed, 104 | shuffle=False, 105 | ) 106 | eval_cfg = cfg.get("evaluation", {}) 107 | eval_hook = DistEvalHook if distributed else EvalHook 108 | runner.register_hook(eval_hook(val_dataloader, **eval_cfg)) 109 | 110 | # user-defined hooks 111 | if cfg.get("custom_hooks", None): 112 | custom_hooks = cfg.custom_hooks 113 | assert isinstance( 114 | custom_hooks, list 115 | ), f"custom_hooks expect list type, but got {type(custom_hooks)}" 116 | for hook_cfg in cfg.custom_hooks: 117 | assert isinstance(hook_cfg, dict), ( 118 | "Each item in custom_hooks expects dict type, but got " 119 | f"{type(hook_cfg)}" 120 | ) 121 | hook_cfg = hook_cfg.copy() 122 | priority = hook_cfg.pop("priority", "NORMAL") 123 | hook = build_from_cfg(hook_cfg, HOOKS) 124 | runner.register_hook(hook, priority=priority) 125 | 126 | if cfg.resume_from: 127 | runner.resume(cfg.resume_from) 128 | elif cfg.load_from: 129 | runner.load_checkpoint(cfg.load_from) 130 | runner.run(data_loaders, cfg.workflow, cfg.total_epochs) 131 | -------------------------------------------------------------------------------- /teter/core/__init__.py: -------------------------------------------------------------------------------- 1 | from .evaluation import * # noqa: F401, F403 2 | from .track import * # noqa: F401, F403 3 | from .utils import * # noqa: F401, F403 4 | -------------------------------------------------------------------------------- /teter/core/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | from .eval_hooks import DistEvalHook, EvalHook 2 | from .mot import eval_mot 3 | 4 | __all__ = ["EvalHook", "DistEvalHook", "eval_mot"] 5 | -------------------------------------------------------------------------------- /teter/core/evaluation/box_track.toml: -------------------------------------------------------------------------------- 1 | remove_ignored = false 2 | ignored_as_class = false 3 | 4 | [ignored_mapping] 5 | "other person" = "pedestrian" 6 | "other vehicle" = "car" 7 | "trailer" = "truck" 8 | 9 | [name_mapping] 10 | bike = "bicycle" 11 | caravan = "car" 12 | motor = "motorcycle" 13 | person = "pedestrian" 14 | van = "car" 15 | 16 | [scalabel] 17 | [scalabel.imageSize] 18 | height = 720 19 | width = 1280 20 | 21 | [[scalabel.attributes]] 22 | name = "crowd" 23 | type = "switch" 24 | tag = "c" 25 | 26 | [[scalabel.categories]] 27 | name = "human" 28 | [[scalabel.categories.subcategories]] 29 | name = "pedestrian" 30 | 31 | [[scalabel.categories.subcategories]] 32 | name = "rider" 33 | 34 | [[scalabel.categories]] 35 | name = "vehicle" 36 | [[scalabel.categories.subcategories]] 37 | name = "car" 38 | 39 | [[scalabel.categories.subcategories]] 40 | name = "truck" 41 | 42 | [[scalabel.categories.subcategories]] 43 | name = "bus" 44 | 45 | [[scalabel.categories.subcategories]] 46 | name = "train" 47 | 48 | [[scalabel.categories]] 49 | name = "bike" 50 | [[scalabel.categories.subcategories]] 51 | name = "motorcycle" 52 | 53 | [[scalabel.categories.subcategories]] 54 | name = "bicycle" -------------------------------------------------------------------------------- /teter/core/evaluation/eval_hooks.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import torch.distributed as dist 3 | from mmcv.runner import DistEvalHook as BaseDistEvalHook 4 | from mmcv.runner import EvalHook as BaseEvalHook 5 | from torch.nn.modules.batchnorm import _BatchNorm 6 | 7 | 8 | class EvalHook(BaseEvalHook): 9 | """Please refer to `mmcv.runner.hooks.evaluation.py:EvalHook` for detailed 10 | docstring.""" 11 | 12 | def _do_evaluate(self, runner): 13 | """perform evaluation and save ckpt.""" 14 | if not self._should_evaluate(runner): 15 | return 16 | 17 | if ( 18 | hasattr(self.dataloader.dataset, "load_as_video") 19 | and self.dataloader.dataset.load_as_video 20 | ): 21 | from teter.apis import single_gpu_test 22 | else: 23 | from mmdet.apis import single_gpu_test 24 | results = single_gpu_test(runner.model, self.dataloader, show=False) 25 | runner.log_buffer.output["eval_iter_num"] = len(self.dataloader) 26 | key_score = self.evaluate(runner, results) 27 | if self.save_best: 28 | self._save_ckpt(runner, key_score) 29 | 30 | 31 | class DistEvalHook(BaseDistEvalHook): 32 | """Please refer to `mmcv.runner.hooks.evaluation.py:DistEvalHook` for 33 | detailed docstring.""" 34 | 35 | def _do_evaluate(self, runner): 36 | """perform evaluation and save ckpt.""" 37 | # Synchronization of BatchNorm's buffer (running_mean 38 | # and running_var) is not supported in the DDP of pytorch, 39 | # which may cause the inconsistent performance of models in 40 | # different ranks, so we broadcast BatchNorm's buffers 41 | # of rank 0 to other ranks to avoid this. 42 | if self.broadcast_bn_buffer: 43 | model = runner.model 44 | for name, module in model.named_modules(): 45 | if isinstance(module, _BatchNorm) and module.track_running_stats: 46 | dist.broadcast(module.running_var, 0) 47 | dist.broadcast(module.running_mean, 0) 48 | 49 | if not self._should_evaluate(runner): 50 | return 51 | 52 | tmpdir = self.tmpdir 53 | if tmpdir is None: 54 | tmpdir = osp.join(runner.work_dir, ".eval_hook") 55 | 56 | if ( 57 | hasattr(self.dataloader.dataset, "load_as_video") 58 | and self.dataloader.dataset.load_as_video 59 | ): 60 | from teter.apis import multi_gpu_test 61 | else: 62 | from mmdet.apis import multi_gpu_test 63 | results = multi_gpu_test( 64 | runner.model, self.dataloader, tmpdir=tmpdir, gpu_collect=self.gpu_collect 65 | ) 66 | if runner.rank == 0: 67 | print("\n") 68 | runner.log_buffer.output["eval_iter_num"] = len(self.dataloader) 69 | key_score = self.evaluate(runner, results) 70 | 71 | if self.save_best: 72 | self._save_ckpt(runner, key_score) 73 | -------------------------------------------------------------------------------- /teter/core/to_bdd100k/__init__.py: -------------------------------------------------------------------------------- 1 | from .transforms import preds2bdd100k 2 | 3 | __all__ = ["preds2bdd100k"] 4 | -------------------------------------------------------------------------------- /teter/core/to_bdd100k/transforms.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path as osp 3 | from scalabel.label.io import save 4 | from scalabel.label.transforms import bbox_to_box2d 5 | from scalabel.label.typing import Frame, Label 6 | from tqdm import tqdm 7 | 8 | from ..evaluation import xyxy2xywh 9 | from .utils import mask_merge_parallel 10 | 11 | CATEGORIES = [ 12 | "", 13 | "pedestrian", 14 | "rider", 15 | "car", 16 | "truck", 17 | "bus", 18 | "train", 19 | "motorcycle", 20 | "bicycle", 21 | "traffic light", 22 | "traffic sign", 23 | ] 24 | 25 | 26 | def det_to_bdd100k(dataset, results, out_base, nproc): 27 | bdd100k = [] 28 | ann_id = 0 29 | print(f"\nStart converting to BDD100K detection format") 30 | if "bbox_results" in results: 31 | results = results["bbox_results"] 32 | for idx, bboxes_list in tqdm(enumerate(results)): 33 | img_name = dataset.data_infos[idx]["file_name"] 34 | frame = Frame(name=img_name, labels=[]) 35 | 36 | for cls_, bboxes in enumerate(bboxes_list): 37 | for bbox in bboxes: 38 | ann_id += 1 39 | label = Label( 40 | id=ann_id, 41 | score=bbox[-1], 42 | box2d=bbox_to_box2d(xyxy2xywh(bbox)), 43 | category=CATEGORIES[cls_ + 1], 44 | ) 45 | frame.labels.append(label) 46 | bdd100k.append(frame) 47 | 48 | print(f"\nWriting the converted json") 49 | out_path = osp.join(out_base, "det.json") 50 | save(out_path, bdd100k) 51 | 52 | 53 | def ins_seg_to_bdd100k(dataset, results, out_base, nproc=4): 54 | bdd100k = [] 55 | bitmask_base = osp.join(out_base, "ins_seg") 56 | if not osp.exists(bitmask_base): 57 | os.makedirs(bitmask_base) 58 | 59 | if "bbox_results" in results and "segm_results" in results: 60 | results = [ 61 | [bbox, segm] 62 | for bbox, segm in zip(results["bbox_results"], results["segm_results"]) 63 | ] 64 | 65 | track_dicts = [] 66 | img_names = [dataset.data_infos[idx]["file_name"] for idx in range(len(results))] 67 | 68 | print(f"\nStart converting to BDD100K instance segmentation format") 69 | ann_id = 0 70 | for idx, [bboxes_list, segms_list] in enumerate(results): 71 | index = 0 72 | frame = Frame(name=img_names[idx], labels=[]) 73 | track_dict = {} 74 | for cls_, (bboxes, segms) in enumerate(zip(bboxes_list, segms_list)): 75 | for bbox, segm in zip(bboxes, segms): 76 | ann_id += 1 77 | index += 1 78 | label = Label(id=str(ann_id), index=index, score=bbox[-1]) 79 | frame.labels.append(label) 80 | instance = {"bbox": bbox, "segm": segm, "label": cls_} 81 | track_dict[index] = instance 82 | 83 | bdd100k.append(frame) 84 | track_dicts.append(track_dict) 85 | 86 | print(f"\nWriting the converted json") 87 | out_path = osp.join(out_base, "ins_seg.json") 88 | save(out_path, bdd100k) 89 | 90 | mask_merge_parallel(track_dicts, img_names, bitmask_base, nproc) 91 | 92 | 93 | def box_track_to_bdd100k(dataset, results, out_base, nproc): 94 | bdd100k = [] 95 | track_base = osp.join(out_base, "box_track") 96 | if not osp.exists(track_base): 97 | os.makedirs(track_base) 98 | 99 | print(f"\nStart converting to BDD100K box tracking format") 100 | for idx, track_dict in tqdm(enumerate(results["track_results"])): 101 | img_name = dataset.data_infos[idx]["file_name"] 102 | frame_index = dataset.data_infos[idx]["frame_id"] 103 | vid_name = os.path.split(img_name)[0] 104 | frame = Frame( 105 | name=img_name, video_name=vid_name, frame_index=frame_index, labels=[] 106 | ) 107 | 108 | for id_, instance in track_dict.items(): 109 | bbox = instance["bbox"] 110 | cls_ = instance["label"] 111 | label = Label( 112 | id=id_, 113 | score=bbox[-1], 114 | box2d=bbox_to_box2d(xyxy2xywh(bbox)), 115 | category=CATEGORIES[cls_ + 1], 116 | ) 117 | frame.labels.append(label) 118 | bdd100k.append(frame) 119 | 120 | print(f"\nWriting the converted json") 121 | out_path = osp.join(out_base, "box_track.json") 122 | save(out_path, bdd100k) 123 | 124 | 125 | def seg_track_to_bdd100k(dataset, results, out_base, nproc=4): 126 | bitmask_base = osp.join(out_base, "seg_track") 127 | if not osp.exists(bitmask_base): 128 | os.makedirs(bitmask_base) 129 | 130 | print(f"\nStart converting to BDD100K seg tracking format") 131 | img_names = [ 132 | dataset.data_infos[idx]["file_name"] 133 | for idx in range(len(results["track_results"])) 134 | ] 135 | mask_merge_parallel(results["track_results"], img_names, bitmask_base, nproc) 136 | 137 | 138 | def preds2bdd100k(dataset, results, tasks, out_base, *args, **kwargs): 139 | metric2func = dict( 140 | det=det_to_bdd100k, 141 | ins_seg=ins_seg_to_bdd100k, 142 | box_track=box_track_to_bdd100k, 143 | seg_track=seg_track_to_bdd100k, 144 | ) 145 | 146 | for task in tasks: 147 | metric2func[task](dataset, results, out_base, *args, **kwargs) 148 | -------------------------------------------------------------------------------- /teter/core/to_bdd100k/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | import os.path as osp 4 | import pycocotools.mask as mask_utils 5 | from functools import partial 6 | from multiprocessing import Pool 7 | from PIL import Image 8 | from tqdm import tqdm 9 | 10 | SHAPE = [720, 1280] 11 | 12 | 13 | def mask_prepare(track_dict): 14 | scores, colors, masks = [], [], [] 15 | for id_, instance in track_dict.items(): 16 | masks.append(mask_utils.decode(instance["segm"])) 17 | colors.append([instance["label"] + 1, 0, id_ >> 8, id_ & 255]) 18 | scores.append(instance["bbox"][-1]) 19 | return scores, colors, masks 20 | 21 | 22 | def mask_merge(mask_infor, img_name, bitmask_base): 23 | scores, colors, masks = mask_infor 24 | bitmask = np.zeros((*SHAPE, 4), dtype=np.uint8) 25 | sorted_idxs = np.argsort(scores) 26 | for idx in sorted_idxs: 27 | for i in range(4): 28 | bitmask[..., i] = ( 29 | bitmask[..., i] * (1 - masks[idx]) + masks[idx] * colors[idx][i] 30 | ) 31 | bitmask_path = osp.join(bitmask_base, img_name.replace(".jpg", ".png")) 32 | bitmask_dir = osp.split(bitmask_path)[0] 33 | if not osp.exists(bitmask_dir): 34 | os.makedirs(bitmask_dir) 35 | bitmask = Image.fromarray(bitmask) 36 | bitmask.save(bitmask_path) 37 | 38 | 39 | def mask_merge_parallel(track_dicts, img_names, bitmask_base, nproc): 40 | with Pool(nproc) as pool: 41 | print("\nCollecting mask information") 42 | mask_infors = pool.map(mask_prepare, tqdm(track_dicts)) 43 | print("\nMerging overlapped masks.") 44 | pool.starmap( 45 | partial(mask_merge, bitmask_base=bitmask_base), 46 | tqdm(zip(mask_infors, img_names), total=len(mask_infors)), 47 | ) 48 | -------------------------------------------------------------------------------- /teter/core/track/__init__.py: -------------------------------------------------------------------------------- 1 | from .similarity import cal_similarity 2 | from .transforms import restore_result, track2result 3 | 4 | __all__ = ["cal_similarity", "track2result", "restore_result"] 5 | -------------------------------------------------------------------------------- /teter/core/track/similarity.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | 4 | 5 | 6 | def cal_similarity(key_embeds, 7 | ref_embeds, 8 | method='dot_product', 9 | temperature=-1): 10 | 11 | assert method in ['dot_product', 'cosine'] 12 | 13 | if key_embeds.size(0) == 0 or ref_embeds.size(0) == 0: 14 | return torch.zeros((key_embeds.size(0), ref_embeds.size(0)), 15 | device=key_embeds.device) 16 | 17 | if method == 'cosine': 18 | key_embeds = F.normalize(key_embeds, p=2, dim=1) 19 | ref_embeds = F.normalize(ref_embeds, p=2, dim=1) 20 | dists = torch.mm(key_embeds, ref_embeds.t()) 21 | if temperature > 0 and temperature <= 1: 22 | dists /= temperature 23 | return dists 24 | 25 | elif method == 'dot_product': 26 | 27 | if temperature>1: 28 | dists = torch.mm(key_embeds, ref_embeds.t()) 29 | dists *= temperature 30 | else: 31 | dists = torch.mm(key_embeds, ref_embeds.t()) 32 | 33 | return dists 34 | -------------------------------------------------------------------------------- /teter/core/track/transforms.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | 5 | def track2result(bboxes, labels, ids, num_classes): 6 | valid_inds = ids > -1 7 | bboxes = bboxes[valid_inds] 8 | labels = labels[valid_inds] 9 | ids = ids[valid_inds] 10 | 11 | if bboxes.shape[0] == 0: 12 | return [np.zeros((0, 6), dtype=np.float32) for i in range(num_classes)] 13 | else: 14 | if isinstance(bboxes, torch.Tensor): 15 | bboxes = bboxes.cpu().numpy() 16 | labels = labels.cpu().numpy() 17 | ids = ids.cpu().numpy() 18 | return [ 19 | np.concatenate((ids[labels == i, None], bboxes[labels == i, :]), axis=1) 20 | for i in range(num_classes) 21 | ] 22 | 23 | 24 | def restore_result(result, return_ids=False): 25 | labels = [] 26 | for i, bbox in enumerate(result): 27 | labels.extend([i] * bbox.shape[0]) 28 | bboxes = np.concatenate(result, axis=0).astype(np.float32) 29 | labels = np.array(labels, dtype=np.int64) 30 | if return_ids: 31 | ids = bboxes[:, 0].astype(np.int64) 32 | bboxes = bboxes[:, 1:] 33 | return bboxes, labels, ids 34 | else: 35 | return bboxes, labels 36 | -------------------------------------------------------------------------------- /teter/core/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .visualization import imshow_mot_errors, imshow_tracks 2 | 3 | __all__ = ["imshow_tracks", "imshow_mot_errors"] 4 | -------------------------------------------------------------------------------- /teter/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from mmdet.datasets.builder import DATASETS, PIPELINES, build_dataset 2 | 3 | from .bdd_video_dataset import BDDVideoDataset 4 | from .builder import build_dataloader 5 | from .coco_video_dataset import CocoVideoDataset 6 | from .parsers import CocoVID 7 | from .pipelines import (LoadMultiImagesFromFile, SeqCollect, 8 | SeqDefaultFormatBundle, SeqLoadAnnotations, 9 | SeqNormalize, SeqPad, SeqRandomFlip, SeqResize) 10 | from .tao_dataset import TaoDataset 11 | 12 | __all__ = [ 13 | "DATASETS", 14 | "PIPELINES", 15 | "build_dataloader", 16 | "build_dataset", 17 | "CocoVID", 18 | "BDDVideoDataset", 19 | "CocoVideoDataset", 20 | "LoadMultiImagesFromFile", 21 | "SeqLoadAnnotations", 22 | "SeqResize", 23 | "SeqNormalize", 24 | "SeqRandomFlip", 25 | "SeqPad", 26 | "SeqDefaultFormatBundle", 27 | "SeqCollect", 28 | "TaoDataset", 29 | ] 30 | -------------------------------------------------------------------------------- /teter/datasets/builder.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import random 3 | from functools import partial 4 | from mmcv.parallel import collate 5 | from mmcv.runner import get_dist_info 6 | from mmdet.datasets.samplers import DistributedGroupSampler, GroupSampler 7 | from torch.utils.data import DataLoader 8 | 9 | from .samplers import DistributedVideoSampler 10 | 11 | 12 | def build_dataloader( 13 | dataset, 14 | samples_per_gpu, 15 | workers_per_gpu, 16 | num_gpus=1, 17 | dist=True, 18 | shuffle=True, 19 | seed=None, 20 | **kwargs 21 | ): 22 | """Build PyTorch DataLoader. 23 | 24 | In distributed training, each GPU/process has a dataloader. 25 | In non-distributed training, there is only one dataloader for all GPUs. 26 | 27 | Args: 28 | dataset (Dataset): A PyTorch dataset. 29 | samples_per_gpu (int): Number of training samples on each GPU, i.e., 30 | batch size of each GPU. 31 | workers_per_gpu (int): How many subprocesses to use for data loading 32 | for each GPU. 33 | num_gpus (int): Number of GPUs. Only used in non-distributed training. 34 | dist (bool): Distributed training/test or not. Default: True. 35 | shuffle (bool): Whether to shuffle the data at every epoch. 36 | Default: True. 37 | kwargs: any keyword argument to be used to initialize DataLoader 38 | 39 | Returns: 40 | DataLoader: A PyTorch dataloader. 41 | """ 42 | rank, world_size = get_dist_info() 43 | if dist: 44 | if shuffle: 45 | sampler = DistributedGroupSampler( 46 | dataset, samples_per_gpu, world_size, rank 47 | ) 48 | else: 49 | sampler = DistributedVideoSampler(dataset, world_size, rank, shuffle=False) 50 | batch_size = samples_per_gpu 51 | num_workers = workers_per_gpu 52 | else: 53 | sampler = GroupSampler(dataset, samples_per_gpu) if shuffle else None 54 | batch_size = num_gpus * samples_per_gpu 55 | num_workers = num_gpus * workers_per_gpu 56 | 57 | init_fn = ( 58 | partial(worker_init_fn, num_workers=num_workers, rank=rank, seed=seed) 59 | if seed is not None 60 | else None 61 | ) 62 | 63 | data_loader = DataLoader( 64 | dataset, 65 | batch_size=batch_size, 66 | sampler=sampler, 67 | num_workers=num_workers, 68 | collate_fn=partial(collate, samples_per_gpu=samples_per_gpu), 69 | pin_memory=False, 70 | worker_init_fn=init_fn, 71 | **kwargs 72 | ) 73 | 74 | return data_loader 75 | 76 | 77 | def worker_init_fn(worker_id, num_workers, rank, seed): 78 | # The seed of each worker equals to 79 | # num_worker * rank + worker_id + user_seed 80 | worker_seed = num_workers * rank + worker_id + seed 81 | np.random.seed(worker_seed) 82 | random.seed(worker_seed) 83 | -------------------------------------------------------------------------------- /teter/datasets/parsers/__init__.py: -------------------------------------------------------------------------------- 1 | from .coco_api import COCO, COCOeval 2 | from .coco_video_parser import CocoVID 3 | 4 | __all__ = ["COCO", "COCOeval", "CocoVID"] 5 | -------------------------------------------------------------------------------- /teter/datasets/parsers/coco_api.py: -------------------------------------------------------------------------------- 1 | # This file add snake case alias for coco api 2 | 3 | import pycocotools 4 | import warnings 5 | from pycocotools.coco import COCO as _COCO 6 | from pycocotools.cocoeval import COCOeval as _COCOeval 7 | 8 | 9 | class COCO(_COCO): 10 | """This class is almost the same as official pycocotools package. 11 | 12 | It implements some snake case function aliases. So that the COCO class has 13 | the same interface as LVIS class. 14 | """ 15 | 16 | def __init__(self, annotation_file=None): 17 | if getattr(pycocotools, "__version__", "0") >= "12.0.2": 18 | warnings.warn( 19 | 'mmpycocotools is deprecated. Please install official pycocotools by "pip install pycocotools"', # noqa: E501 20 | UserWarning, 21 | ) 22 | super().__init__(annotation_file=annotation_file) 23 | self.img_ann_map = self.imgToAnns 24 | self.cat_img_map = self.catToImgs 25 | 26 | def get_ann_ids(self, img_ids=[], cat_ids=[], area_rng=[], iscrowd=None): 27 | return self.getAnnIds(img_ids, cat_ids, area_rng, iscrowd) 28 | 29 | def get_cat_ids(self, cat_names=[], sup_names=[], cat_ids=[]): 30 | return self.getCatIds(cat_names, sup_names, cat_ids) 31 | 32 | def get_img_ids(self, img_ids=[], cat_ids=[]): 33 | return self.getImgIds(img_ids, cat_ids) 34 | 35 | def load_anns(self, ids): 36 | return self.loadAnns(ids) 37 | 38 | def load_cats(self, ids): 39 | return self.loadCats(ids) 40 | 41 | def load_imgs(self, ids): 42 | return self.loadImgs(ids) 43 | 44 | 45 | # just for the ease of import 46 | COCOeval = _COCOeval 47 | -------------------------------------------------------------------------------- /teter/datasets/parsers/coco_video_parser.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from collections import defaultdict 3 | from pycocotools.coco import _isArrayLike 4 | 5 | from .coco_api import COCO 6 | 7 | 8 | class CocoVID(COCO): 9 | def __init__(self, annotation_file=None): 10 | assert annotation_file, "Annotation file must be provided." 11 | super(CocoVID, self).__init__(annotation_file=annotation_file) 12 | 13 | def createIndex(self): 14 | print("creating index...") 15 | anns, cats, imgs, vids = {}, {}, {}, {} 16 | imgToAnns, catToImgs, vidToImgs = ( 17 | defaultdict(list), 18 | defaultdict(list), 19 | defaultdict(list), 20 | ) 21 | 22 | if "videos" in self.dataset: 23 | for video in self.dataset["videos"]: 24 | vids[video["id"]] = video 25 | 26 | if "annotations" in self.dataset: 27 | for ann in self.dataset["annotations"]: 28 | imgToAnns[ann["image_id"]].append(ann) 29 | anns[ann["id"]] = ann 30 | 31 | if "images" in self.dataset: 32 | for img in self.dataset["images"]: 33 | vidToImgs[img["video_id"]].append(img) 34 | imgs[img["id"]] = img 35 | 36 | if "categories" in self.dataset: 37 | for cat in self.dataset["categories"]: 38 | cats[cat["id"]] = cat 39 | 40 | if "annotations" in self.dataset and "categories" in self.dataset: 41 | for ann in self.dataset["annotations"]: 42 | catToImgs[ann["category_id"]].append(ann["image_id"]) 43 | 44 | print("index created!") 45 | 46 | self.anns = anns 47 | self.imgToAnns = imgToAnns 48 | self.catToImgs = catToImgs 49 | self.imgs = imgs 50 | self.cats = cats 51 | self.videos = vids 52 | self.vidToImgs = vidToImgs 53 | 54 | def get_vid_ids(self, vidIds=[]): 55 | vidIds = vidIds if _isArrayLike(vidIds) else [vidIds] 56 | 57 | if len(vidIds) == 0: 58 | ids = self.videos.keys() 59 | else: 60 | ids = set(vidIds) 61 | 62 | return list(ids) 63 | 64 | def get_img_ids_from_vid(self, vidId): 65 | img_infos = self.vidToImgs[vidId] 66 | ids = list(np.zeros([len(img_infos)], dtype=np.int)) 67 | for img_info in img_infos: 68 | ids[img_info["frame_id"]] = img_info["id"] 69 | return ids 70 | 71 | def load_vids(self, ids=[]): 72 | if _isArrayLike(ids): 73 | return [self.videos[id] for id in ids] 74 | elif type(ids) == int: 75 | return [self.videos[ids]] 76 | -------------------------------------------------------------------------------- /teter/datasets/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | from .formatting import SeqCollect, SeqDefaultFormatBundle, VideoCollect 2 | from .h5backend import HDF5Backend 3 | from .loading import LoadMultiImagesFromFile, SeqLoadAnnotations 4 | from .transforms import (SeqNormalize, SeqPad, SeqPhotoMetricDistortion, 5 | SeqRandomCrop, SeqRandomFlip, SeqResize) 6 | 7 | __all__ = [ 8 | "LoadMultiImagesFromFile", 9 | "SeqLoadAnnotations", 10 | "SeqResize", 11 | "SeqNormalize", 12 | "SeqRandomFlip", 13 | "SeqPad", 14 | "SeqDefaultFormatBundle", 15 | "SeqCollect", 16 | "VideoCollect", 17 | "SeqPhotoMetricDistortion", 18 | "SeqRandomCrop", 19 | "HDF5Backend", 20 | ] 21 | -------------------------------------------------------------------------------- /teter/datasets/pipelines/formatting.py: -------------------------------------------------------------------------------- 1 | from mmcv.parallel import DataContainer as DC 2 | from mmdet.datasets.builder import PIPELINES 3 | from mmdet.datasets.pipelines import Collect, DefaultFormatBundle, to_tensor 4 | 5 | 6 | @PIPELINES.register_module() 7 | class SeqDefaultFormatBundle(DefaultFormatBundle): 8 | def __call__(self, results): 9 | outs = [] 10 | for _results in results: 11 | _results = super().__call__(_results) 12 | _results["gt_match_indices"] = DC(to_tensor(_results["gt_match_indices"])) 13 | outs.append(_results) 14 | return outs 15 | 16 | 17 | @PIPELINES.register_module() 18 | class VideoCollect(Collect): 19 | """Collect data from the loader relevant to the specific task. 20 | 21 | This is usually the last stage of the data loader pipeline. Typically keys 22 | is set to some subset of "img", "proposals", "gt_bboxes", 23 | "gt_bboxes_ignore", "gt_labels", and/or "gt_masks". 24 | 25 | The "img_meta" item is always populated. The contents of the "img_meta" 26 | dictionary depends on "meta_keys". By default this includes: 27 | 28 | - "img_shape": shape of the image input to the network as a tuple \ 29 | (h, w, c). Note that images may be zero padded on the \ 30 | bottom/right if the batch tensor is larger than this shape. 31 | 32 | - "scale_factor": a float indicating the preprocessing scale 33 | 34 | - "flip": a boolean indicating if image flip transform was used 35 | 36 | - "filename": path to the image file 37 | 38 | - "ori_shape": original shape of the image as a tuple (h, w, c) 39 | 40 | - "pad_shape": image shape after padding 41 | 42 | - "img_norm_cfg": a dict of normalization information: 43 | 44 | - mean - per channel mean subtraction 45 | - std - per channel std divisor 46 | - to_rgb - bool indicating if bgr was converted to rgb 47 | 48 | Args: 49 | keys (Sequence[str]): Keys of results to be collected in ``data``. 50 | meta_keys (Sequence[str], optional): Meta keys to be converted to 51 | ``mmcv.DataContainer`` and collected in ``data[img_metas]``. 52 | Default: ``('filename', 'ori_filename', 'ori_shape', 'img_shape', 53 | 'pad_shape', 'scale_factor', 'flip', 'flip_direction', 54 | 'img_norm_cfg')`` 55 | """ 56 | 57 | def __init__( 58 | self, 59 | keys, 60 | meta_keys=( 61 | "filename", 62 | "ori_filename", 63 | "ori_shape", 64 | "img_shape", 65 | "pad_shape", 66 | "scale_factor", 67 | "flip", 68 | "flip_direction", 69 | "img_norm_cfg", 70 | "frame_id", 71 | ), 72 | ): 73 | self.keys = keys 74 | self.meta_keys = meta_keys 75 | 76 | 77 | @PIPELINES.register_module(force=True) 78 | class SeqCollect(VideoCollect): 79 | def __init__( 80 | self, 81 | keys, 82 | ref_prefix="ref", 83 | meta_keys=( 84 | "filename", 85 | "ori_filename", 86 | "ori_shape", 87 | "img_shape", 88 | "pad_shape", 89 | "scale_factor", 90 | "flip", 91 | "flip_direction", 92 | "img_norm_cfg", 93 | ), 94 | ): 95 | self.keys = keys 96 | self.ref_prefix = ref_prefix 97 | self.meta_keys = meta_keys 98 | 99 | def __call__(self, results): 100 | outs = [] 101 | for _results in results: 102 | _results = super().__call__(_results) 103 | outs.append(_results) 104 | 105 | assert len(outs) == 2 106 | data = {} 107 | data.update(outs[0]) 108 | for k, v in outs[1].items(): 109 | data[f"{self.ref_prefix}_{k}"] = v 110 | 111 | return data 112 | -------------------------------------------------------------------------------- /teter/datasets/pipelines/h5backend.py: -------------------------------------------------------------------------------- 1 | import h5py 2 | import numpy as np 3 | import os 4 | from mmcv import BaseStorageBackend, FileClient 5 | 6 | 7 | @FileClient.register_backend("hdf5", force=True) 8 | class HDF5Backend(BaseStorageBackend): 9 | def __init__(self, img_db_path=None, vid_db_path=None, type="tao", **kwargs): 10 | 11 | # h5 file path 12 | self.img_db_path = img_db_path 13 | self.vid_db_path = vid_db_path 14 | 15 | self.img_client = None 16 | self.vid_client = None 17 | self.type = type 18 | 19 | def get(self, filepath): 20 | """Get values according to the filepath. 21 | Args: 22 | filepath (str | obj:`Path`): Here, filepath is the lmdb key. 23 | # """ 24 | 25 | filepath = str(filepath) 26 | if self.type == "tao": 27 | if self.img_client is None and self.img_db_path is not None: 28 | self.img_client = h5py.File(self.img_db_path, "r") 29 | key_list = filepath.split("/") 30 | value_buf = np.array( 31 | self.img_client[key_list[-4]][key_list[-3]][key_list[-2]][key_list[-1]] 32 | ) 33 | elif self.type == "key": 34 | if self.img_client is None and self.img_db_path is not None: 35 | self.img_client = h5py.File(self.img_db_path, "r") 36 | value_buf = self.img_client[filepath] 37 | elif self.type == "lvis": 38 | if self.img_client is None and self.img_db_path is not None: 39 | self.img_client = h5py.File(self.img_db_path, "r") 40 | filefolder, filename = os.path.split(filepath) 41 | value_buf = np.array(self.img_client[filename]) 42 | elif self.type == "lasot": 43 | if self.img_client is None and self.img_db_path is not None: 44 | self.img_client = h5py.File(self.img_db_path, "r") 45 | key_list = filepath.split("/") 46 | value_buf = np.array( 47 | self.img_client[key_list[-4]][key_list[-3]][key_list[-2]][key_list[-1]][ 48 | "raw" 49 | ] 50 | )[0] 51 | elif self.type == "bdd": 52 | filefolder, filename = os.path.split(filepath) 53 | path, group_name = os.path.split(filefolder) 54 | 55 | if self.vid_client is None and self.vid_db_path is not None: 56 | self.vid_client = h5py.File(self.vid_db_path, "r") 57 | if self.img_client is None and self.img_db_path is not None: 58 | self.img_client = h5py.File(self.img_db_path, "r") 59 | if "/100k/" in filefolder: 60 | value_buf = np.array(self.img_client[filename]) 61 | else: 62 | group = self.vid_client[group_name] 63 | value_buf = np.array(group[filename]) 64 | 65 | return value_buf 66 | 67 | def get_text(self, filepath): 68 | raise NotImplementedError 69 | -------------------------------------------------------------------------------- /teter/datasets/pipelines/loading.py: -------------------------------------------------------------------------------- 1 | from mmdet.datasets.builder import PIPELINES 2 | from mmdet.datasets.pipelines import LoadAnnotations, LoadImageFromFile 3 | 4 | 5 | @PIPELINES.register_module() 6 | class LoadMultiImagesFromFile(LoadImageFromFile): 7 | def __init__(self, *args, **kwargs): 8 | super().__init__(*args, **kwargs) 9 | 10 | def __call__(self, results): 11 | outs = [] 12 | for _results in results: 13 | _results = super().__call__(_results) 14 | outs.append(_results) 15 | return outs 16 | 17 | 18 | @PIPELINES.register_module() 19 | class SeqLoadAnnotations(LoadAnnotations): 20 | def __init__(self, with_ins_id=False, *args, **kwargs): 21 | super().__init__(*args, **kwargs) 22 | self.with_ins_id = with_ins_id 23 | 24 | def _load_ins_ids(self, results): 25 | """Private function to load label annotations. 26 | 27 | Args: 28 | results (dict): Result dict from :obj:`mmdet.CustomDataset`. 29 | 30 | Returns: 31 | dict: The dict contains loaded label annotations. 32 | """ 33 | 34 | results["gt_match_indices"] = results["ann_info"]["match_indices"].copy() 35 | 36 | return results 37 | 38 | def __call__(self, results): 39 | outs = [] 40 | for _results in results: 41 | _results = super().__call__(_results) 42 | if self.with_ins_id: 43 | _results = self._load_ins_ids(_results) 44 | outs.append(_results) 45 | return outs 46 | -------------------------------------------------------------------------------- /teter/datasets/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | from .distributed_video_sampler import DistributedVideoSampler 2 | 3 | __all__ = ["DistributedVideoSampler"] 4 | -------------------------------------------------------------------------------- /teter/datasets/samplers/distributed_video_sampler.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from torch.utils.data import DistributedSampler as _DistributedSampler 3 | 4 | 5 | class DistributedVideoSampler(_DistributedSampler): 6 | def __init__(self, dataset, num_replicas=None, rank=None, shuffle=False): 7 | super().__init__(dataset, num_replicas=num_replicas, rank=rank) 8 | self.shuffle = shuffle 9 | assert not self.shuffle, "Specific for video sequential testing." 10 | self.num_samples = len(dataset) 11 | 12 | first_frame_indices = [] 13 | for i, img_info in enumerate(self.dataset.data_infos): 14 | if img_info["frame_id"] == 0: 15 | first_frame_indices.append(i) 16 | 17 | chunks = np.array_split(first_frame_indices, num_replicas) 18 | split_flags = [c[0] for c in chunks] 19 | split_flags.append(self.num_samples) 20 | 21 | self.indices = [ 22 | list(range(split_flags[i], split_flags[i + 1])) 23 | for i in range(self.num_replicas) 24 | ] 25 | 26 | def __iter__(self): 27 | indices = self.indices[self.rank] 28 | return iter(indices) 29 | -------------------------------------------------------------------------------- /teter/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .builder import MODELS, TRACKERS, build_model, build_tracker 2 | from .losses import * # noqa: F401,F403 3 | from .mot import * # noqa: F401,F403 4 | from .roi_heads import * # noqa: F401,F403 5 | from .trackers import * # noqa: F401,F403 6 | 7 | __all__ = ["MODELS", "TRACKERS", "build_model", "build_tracker"] 8 | -------------------------------------------------------------------------------- /teter/models/builder.py: -------------------------------------------------------------------------------- 1 | from mmcv.cnn import build_model_from_cfg as build 2 | from mmcv.utils import Registry 3 | 4 | MODELS = Registry("model") 5 | TRACKERS = Registry("tracker") 6 | 7 | 8 | def build_tracker(cfg): 9 | """Build tracker.""" 10 | return build(cfg, TRACKERS) 11 | 12 | 13 | def build_model(cfg, train_cfg=None, test_cfg=None): 14 | """Build model.""" 15 | return build(cfg, MODELS, dict(train_cfg=train_cfg, test_cfg=test_cfg)) 16 | -------------------------------------------------------------------------------- /teter/models/losses/__init__.py: -------------------------------------------------------------------------------- 1 | from .l2_loss import L2Loss 2 | from .multipos_cross_entropy_loss import MultiPosCrossEntropyLoss 3 | from .unbiased_supcontrat import UnbiasedSupConLoss 4 | 5 | __all__ = ["L2Loss", "MultiPosCrossEntropyLoss", "UnbiasedSupConLoss"] 6 | -------------------------------------------------------------------------------- /teter/models/losses/l2_loss.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | from mmdet.models import LOSSES, weighted_loss 5 | 6 | 7 | @weighted_loss 8 | def l2_loss(pred, target): 9 | """L2 loss. 10 | 11 | Args: 12 | pred (torch.Tensor): The prediction. 13 | target (torch.Tensor): The learning target of the prediction. 14 | 15 | Returns: 16 | torch.Tensor: Calculated loss 17 | """ 18 | assert pred.size() == target.size() and target.numel() > 0 19 | loss = torch.abs(pred - target) ** 2 20 | return loss 21 | 22 | 23 | @LOSSES.register_module(force=True) 24 | class L2Loss(nn.Module): 25 | """L2 loss. 26 | 27 | Args: 28 | reduction (str, optional): The method to reduce the loss. 29 | Options are "none", "mean" and "sum". 30 | loss_weight (float, optional): The weight of loss. 31 | """ 32 | 33 | def __init__( 34 | self, 35 | neg_pos_ub=-1, 36 | pos_margin=-1, 37 | neg_margin=-1, 38 | hard_mining=False, 39 | reduction="mean", 40 | loss_weight=1.0, 41 | ): 42 | super(L2Loss, self).__init__() 43 | self.neg_pos_ub = neg_pos_ub 44 | self.pos_margin = pos_margin 45 | self.neg_margin = neg_margin 46 | self.hard_mining = hard_mining 47 | self.reduction = reduction 48 | self.loss_weight = loss_weight 49 | 50 | def forward( 51 | self, pred, target, weight=None, avg_factor=None, reduction_override=None 52 | ): 53 | """Forward function. 54 | 55 | Args: 56 | pred (torch.Tensor): The prediction. 57 | target (torch.Tensor): The learning target of the prediction. 58 | weight (torch.Tensor, optional): The weight of loss for each 59 | prediction. Defaults to None. 60 | avg_factor (int, optional): Average factor that is used to average 61 | the loss. Defaults to None. 62 | reduction_override (str, optional): The reduction method used to 63 | override the original reduction method of the loss. 64 | Defaults to None. 65 | """ 66 | assert reduction_override in (None, "none", "mean", "sum") 67 | reduction = reduction_override if reduction_override else self.reduction 68 | pred, weight, avg_factor = self.update_weight(pred, target, weight, avg_factor) 69 | loss_bbox = self.loss_weight * l2_loss( 70 | pred, target, weight, reduction=reduction, avg_factor=avg_factor 71 | ) 72 | return loss_bbox 73 | 74 | def update_weight(self, pred, target, weight, avg_factor): 75 | if weight is None: 76 | weight = target.new_ones(target.size()) 77 | invalid_inds = weight <= 0 78 | target[invalid_inds] = -1 79 | pos_inds = target == 1 80 | neg_inds = target == 0 81 | 82 | if self.pos_margin > 0: 83 | pred[pos_inds] -= self.pos_margin 84 | if self.neg_margin > 0: 85 | pred[neg_inds] -= self.neg_margin 86 | pred = torch.clamp(pred, min=0, max=1) 87 | 88 | num_pos = int((target == 1).sum()) 89 | num_neg = int((target == 0).sum()) 90 | if self.neg_pos_ub > 0 and num_neg / num_pos > self.neg_pos_ub: 91 | num_neg = num_pos * self.neg_pos_ub 92 | neg_idx = torch.nonzero(target == 0, as_tuple=False) 93 | 94 | if self.hard_mining: 95 | costs = l2_loss(pred, target, reduction="none")[ 96 | neg_idx[:, 0], neg_idx[:, 1] 97 | ].detach() 98 | neg_idx = neg_idx[costs.topk(num_neg)[1], :] 99 | else: 100 | neg_idx = self.random_choice(neg_idx, num_neg) 101 | 102 | new_neg_inds = neg_inds.new_zeros(neg_inds.size()).bool() 103 | new_neg_inds[neg_idx[:, 0], neg_idx[:, 1]] = True 104 | 105 | invalid_neg_inds = torch.logical_xor(neg_inds, new_neg_inds) 106 | weight[invalid_neg_inds] = 0 107 | 108 | avg_factor = (weight > 0).sum() 109 | return pred, weight, avg_factor 110 | 111 | @staticmethod 112 | def random_choice(gallery, num): 113 | """Random select some elements from the gallery. 114 | 115 | It seems that Pytorch's implementation is slower than numpy so we use 116 | numpy to randperm the indices. 117 | """ 118 | assert len(gallery) >= num 119 | if isinstance(gallery, list): 120 | gallery = np.array(gallery) 121 | cands = np.arange(len(gallery)) 122 | np.random.shuffle(cands) 123 | rand_inds = cands[:num] 124 | if not isinstance(gallery, np.ndarray): 125 | rand_inds = torch.from_numpy(rand_inds).long().to(gallery.device) 126 | return gallery[rand_inds] 127 | -------------------------------------------------------------------------------- /teter/models/losses/multipos_cross_entropy_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from mmdet.models import LOSSES, weight_reduce_loss 4 | 5 | 6 | def multi_pos_cross_entropy( 7 | pred, label, 8 | weight=None, 9 | reduction="mean", 10 | avg_factor=None, 11 | version="ori", 12 | pos_normalize=True 13 | ): 14 | 15 | if version == "unbiased": 16 | 17 | valid_mask = label.sum(1) != 0 18 | pred = pred[valid_mask] 19 | label = label[valid_mask] 20 | weight = weight[valid_mask] 21 | logits_max, _ = torch.max(pred, dim=1, keepdim=True) 22 | logits = pred - logits_max.detach() 23 | 24 | if pos_normalize: 25 | pos_norm = torch.div(label, label.sum(1).reshape(-1, 1)) 26 | exp_logits = (torch.exp(logits)) * pos_norm + ( 27 | torch.exp(logits) 28 | ) * torch.logical_not(label) 29 | else: 30 | exp_logits = torch.exp(logits) 31 | exp_logits_input = exp_logits.sum(1, keepdim=True) 32 | log_prob = logits - torch.log(exp_logits_input) 33 | 34 | mean_log_prob_pos = (label * log_prob).sum(1) / label.sum(1) 35 | loss = -mean_log_prob_pos 36 | 37 | elif version == "ori": 38 | # a more numerical stable implementation. 39 | pos_inds = label == 1 40 | neg_inds = label == 0 41 | pred_pos = pred * pos_inds.float() 42 | pred_neg = pred * neg_inds.float() 43 | # use -inf to mask out unwanted elements. 44 | pred_pos[neg_inds] = pred_pos[neg_inds] + float("inf") 45 | pred_neg[pos_inds] = pred_neg[pos_inds] + float("-inf") 46 | 47 | _pos_expand = torch.repeat_interleave(pred_pos, pred.shape[1], dim=1) 48 | _neg_expand = pred_neg.repeat(1, pred.shape[1]) 49 | 50 | x = torch.nn.functional.pad((_neg_expand - _pos_expand), (0, 1), "constant", 0) 51 | loss = torch.logsumexp(x, dim=1) 52 | 53 | # apply weights and do the reduction 54 | if weight is not None: 55 | weight = weight.float() 56 | loss = weight_reduce_loss( 57 | loss, weight=weight, reduction=reduction, avg_factor=avg_factor 58 | ) 59 | 60 | return loss 61 | 62 | 63 | @LOSSES.register_module(force=True) 64 | class MultiPosCrossEntropyLoss(nn.Module): 65 | def __init__(self, reduction="mean", loss_weight=1.0, version="v3"): 66 | super(MultiPosCrossEntropyLoss, self).__init__() 67 | self.reduction = reduction 68 | self.loss_weight = loss_weight 69 | self.version = version 70 | 71 | def forward( 72 | self, 73 | cls_score, 74 | label, 75 | weight=None, 76 | avg_factor=None, 77 | reduction_override=None, 78 | **kwargs 79 | ): 80 | assert cls_score.size() == label.size() 81 | assert reduction_override in (None, "none", "mean", "sum") 82 | reduction = reduction_override if reduction_override else self.reduction 83 | loss_cls = self.loss_weight * multi_pos_cross_entropy( 84 | cls_score, 85 | label, 86 | weight, 87 | reduction=reduction, 88 | avg_factor=avg_factor, 89 | version=self.version, 90 | **kwargs 91 | ) 92 | return loss_cls 93 | -------------------------------------------------------------------------------- /teter/models/losses/unbiased_supcontrat.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import torch 4 | import torch.nn as nn 5 | from mmdet.models import LOSSES 6 | 7 | 8 | @LOSSES.register_module() 9 | class UnbiasedSupConLoss(nn.Module): 10 | def __init__( 11 | self, 12 | temperature=0.07, 13 | contrast_mode="all", 14 | base_temperature=0.07, 15 | pos_normalize=True, 16 | loss_weight=1, 17 | ): 18 | super(UnbiasedSupConLoss, self).__init__() 19 | self.temperature = temperature 20 | self.contrast_mode = contrast_mode 21 | self.base_temperature = base_temperature 22 | self.pos_normalize = pos_normalize 23 | self.loss_weight = loss_weight 24 | 25 | def forward(self, features, labels=None, mask=None): 26 | """Compute loss for model. If both `labels` and `mask` are None, 27 | Args: 28 | features: hidden vector of shape [bsz, n_views, ...]. 29 | labels: ground truth of shape [bsz]. 30 | mask: contrastive mask of shape [bsz, bsz], mask_{i,j}=1 if sample j 31 | has the same class as sample i. Can be asymmetric. 32 | Returns: 33 | A loss scalar. 34 | """ 35 | device = torch.device("cuda") if features.is_cuda else torch.device("cpu") 36 | 37 | if len(features.shape) < 3: 38 | raise ValueError( 39 | "`features` needs to be [bsz, n_views, ...]," 40 | "at least 3 dimensions are required" 41 | ) 42 | if len(features.shape) > 3: 43 | features = features.view(features.shape[0], features.shape[1], -1) 44 | 45 | batch_size = features.shape[0] 46 | if labels is not None and mask is not None: 47 | raise ValueError("Cannot define both `labels` and `mask`") 48 | elif labels is None and mask is None: 49 | mask = torch.eye(batch_size, dtype=torch.float32).to(device) 50 | elif labels is not None: 51 | labels = labels.contiguous().view(-1, 1) 52 | if labels.shape[0] != batch_size: 53 | raise ValueError("Num of labels does not match num of features") 54 | mask = torch.eq(labels, labels.T).float().to(device) 55 | valid_mask = mask.sum(1) != 1 56 | labels = labels[valid_mask] 57 | features = features[valid_mask] 58 | mask = torch.eq(labels, labels.T).float().to(device) 59 | batch_size = features.shape[0] 60 | if batch_size == 0: 61 | return torch.tensor([0.0], requires_grad=True) 62 | else: 63 | mask = mask.float().to(device) 64 | 65 | contrast_count = features.shape[1] 66 | contrast_feature = torch.cat(torch.unbind(features, dim=1), dim=0) 67 | if self.contrast_mode == "one": 68 | anchor_feature = features[:, 0] 69 | anchor_count = 1 70 | elif self.contrast_mode == "all": 71 | anchor_feature = contrast_feature 72 | anchor_count = contrast_count 73 | else: 74 | raise ValueError("Unknown mode: {}".format(self.contrast_mode)) 75 | 76 | # compute logits 77 | anchor_dot_contrast = torch.div( 78 | torch.matmul(anchor_feature, contrast_feature.T), self.temperature 79 | ) 80 | # for numerical stability 81 | if min(anchor_dot_contrast.shape) != 0: 82 | # return torch.tensor(0.0).to(anchor_dot_contrast.device) 83 | logits_max, _ = torch.max(anchor_dot_contrast, dim=1, keepdim=True) 84 | logits = anchor_dot_contrast - logits_max.detach() 85 | else: 86 | logits = anchor_dot_contrast 87 | # tile mask 88 | mask = mask.repeat(anchor_count, contrast_count) 89 | 90 | # mask-out self-contrast cases 91 | logits_mask = torch.scatter( 92 | torch.ones_like(mask), 93 | 1, 94 | torch.arange(batch_size * anchor_count).view(-1, 1).to(device), 95 | 0, 96 | ) 97 | mask = mask * logits_mask 98 | 99 | # compute log_prob 100 | if self.pos_normalize: 101 | pos_norm = torch.div(mask, mask.sum(1).reshape(-1, 1)) 102 | exp_logits = (torch.exp(logits) * logits_mask) * pos_norm + ( 103 | torch.exp(logits) * logits_mask 104 | ) * torch.logical_not(mask) 105 | else: 106 | exp_logits = torch.exp(logits) * logits_mask 107 | exp_logits_input = exp_logits.sum(1, keepdim=True) 108 | log_prob = logits - torch.log(exp_logits_input) 109 | 110 | # compute mean of log-likelihood over positive 111 | mean_log_prob_pos = (mask * log_prob).sum(1) / mask.sum(1) 112 | 113 | # loss 114 | 115 | loss = -(self.temperature / self.base_temperature) * mean_log_prob_pos 116 | loss = loss.view(anchor_count, batch_size).mean() 117 | 118 | return loss * self.loss_weight 119 | -------------------------------------------------------------------------------- /teter/models/mot/__init__.py: -------------------------------------------------------------------------------- 1 | from .teter import TETer 2 | 3 | __all__ = ["TETer"] 4 | -------------------------------------------------------------------------------- /teter/models/mot/teter.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import numpy as np 3 | from mmdet.core import bbox2result 4 | from mmdet.models import TwoStageDetector 5 | 6 | from teter.core import imshow_tracks, restore_result, track2result 7 | from ..builder import MODELS, build_tracker 8 | 9 | 10 | @MODELS.register_module() 11 | class TETer(TwoStageDetector): 12 | def __init__( 13 | self, 14 | tracker=None, 15 | freeze_detector=False, 16 | freeze_cem=False, 17 | freeze_qd=False, 18 | method="teter", 19 | *args, 20 | **kwargs 21 | ): 22 | self.prepare_cfg(kwargs) 23 | super().__init__(*args, **kwargs) 24 | self.tracker_cfg = tracker 25 | self.method = method 26 | print(self.method) 27 | self.freeze_detector = freeze_detector 28 | self.freeze_cem = freeze_cem 29 | self.freeze_qd = freeze_qd 30 | if self.freeze_detector: 31 | self._freeze_detector() 32 | 33 | def _freeze_detector(self): 34 | 35 | self.detector = [ 36 | self.backbone, 37 | self.neck, 38 | self.rpn_head, 39 | self.roi_head.bbox_head, 40 | ] 41 | if self.freeze_cem: 42 | self.detector.append(self.roi_head.cem_head) 43 | 44 | if self.freeze_qd: 45 | self.detector.append(self.roi_head.track_head) 46 | 47 | for model in self.detector: 48 | model.eval() 49 | for param in model.parameters(): 50 | param.requires_grad = False 51 | 52 | def prepare_cfg(self, kwargs): 53 | if kwargs.get("train_cfg", False): 54 | if kwargs["train_cfg"].get("embed", None): 55 | kwargs["roi_head"]["track_train_cfg"] = kwargs["train_cfg"].get( 56 | "embed", None 57 | ) 58 | if kwargs["train_cfg"].get("cem", None): 59 | kwargs["roi_head"]["cem_train_cfg"] = kwargs["train_cfg"].get( 60 | "cem", None 61 | ) 62 | 63 | def init_tracker(self): 64 | self.tracker = build_tracker(self.tracker_cfg) 65 | 66 | def forward_train( 67 | self, 68 | img, 69 | img_metas, 70 | gt_bboxes, 71 | gt_labels, 72 | gt_match_indices, 73 | ref_img, 74 | ref_img_metas, 75 | ref_gt_bboxes, 76 | ref_gt_labels, 77 | ref_gt_match_indices, 78 | gt_bboxes_ignore=None, 79 | gt_masks=None, 80 | ref_gt_bboxes_ignore=None, 81 | ref_gt_masks=None, 82 | **kwargs 83 | ): 84 | x = self.extract_feat(img) 85 | 86 | losses = dict() 87 | 88 | # RPN forward and loss 89 | proposal_cfg = self.train_cfg.get("rpn_proposal", self.test_cfg.rpn) 90 | rpn_losses, proposal_list = self.rpn_head.forward_train( 91 | x, 92 | img_metas, 93 | gt_bboxes, 94 | gt_labels=None, 95 | gt_bboxes_ignore=gt_bboxes_ignore, 96 | proposal_cfg=proposal_cfg, 97 | ) 98 | losses.update(rpn_losses) 99 | 100 | ref_x = self.extract_feat(ref_img) 101 | ref_proposals = self.rpn_head.simple_test_rpn(ref_x, ref_img_metas) 102 | 103 | roi_losses = self.roi_head.forward_train( 104 | x, 105 | img_metas, 106 | proposal_list, 107 | gt_bboxes, 108 | gt_labels, 109 | gt_match_indices, 110 | ref_x, 111 | ref_img_metas, 112 | ref_proposals, 113 | ref_gt_bboxes, 114 | ref_gt_labels, 115 | gt_bboxes_ignore, 116 | gt_masks, 117 | ref_gt_bboxes_ignore, 118 | **kwargs 119 | ) 120 | losses.update(roi_losses) 121 | 122 | return losses 123 | 124 | def simple_test(self, img, img_metas, rescale=False): 125 | 126 | assert self.roi_head.with_track, "Track head must be implemented." 127 | frame_id = img_metas[0].get("frame_id", -1) 128 | if frame_id == 0: 129 | self.init_tracker() 130 | 131 | x = self.extract_feat(img) 132 | proposal_list = self.rpn_head.simple_test_rpn(x, img_metas) 133 | 134 | outputs = self.roi_head.simple_test(x, img_metas, proposal_list, rescale) 135 | if len(outputs) == 4: 136 | det_bboxes, det_labels, cem_feats, track_feats = outputs 137 | elif len(outputs) == 3: 138 | det_bboxes, det_labels, track_feats = outputs 139 | cem_feats = copy.deepcopy(track_feats) 140 | 141 | if track_feats is not None: 142 | 143 | bboxes, labels, ids = self.tracker.match( 144 | bboxes=det_bboxes, 145 | labels=det_labels, 146 | embeds=track_feats, 147 | cls_embeds=cem_feats, 148 | frame_id=frame_id, 149 | method=self.method, 150 | ) 151 | 152 | bbox_result = bbox2result( 153 | det_bboxes, det_labels, self.roi_head.bbox_head.num_classes 154 | ) 155 | 156 | if track_feats is not None: 157 | track_result = track2result( 158 | bboxes, labels, ids, self.roi_head.bbox_head.num_classes 159 | ) 160 | else: 161 | track_result = [ 162 | np.zeros((0, 6), dtype=np.float32) 163 | for i in range(self.roi_head.bbox_head.num_classes) 164 | ] 165 | return dict(bbox_results=bbox_result, track_results=track_result) 166 | 167 | def show_result( 168 | self, 169 | img, 170 | result, 171 | thickness=1, 172 | font_scale=0.5, 173 | show=False, 174 | out_file=None, 175 | wait_time=0, 176 | backend="cv2", 177 | **kwargs 178 | ): 179 | """Visualize tracking results. 180 | 181 | Args: 182 | img (str | ndarray): Filename of loaded image. 183 | result (dict): Tracking result. 184 | The value of key 'track_results' is ndarray with shape (n, 6) 185 | in [id, tl_x, tl_y, br_x, br_y, score] format. 186 | The value of key 'bbox_results' is ndarray with shape (n, 5) 187 | in [tl_x, tl_y, br_x, br_y, score] format. 188 | thickness (int, optional): Thickness of lines. Defaults to 1. 189 | font_scale (float, optional): Font scales of texts. Defaults 190 | to 0.5. 191 | show (bool, optional): Whether show the visualizations on the 192 | fly. Defaults to False. 193 | out_file (str | None, optional): Output filename. Defaults to None. 194 | backend (str, optional): Backend to draw the bounding boxes, 195 | options are `cv2` and `plt`. Defaults to 'cv2'. 196 | 197 | Returns: 198 | ndarray: Visualized image. 199 | """ 200 | assert isinstance(result, dict) 201 | track_result = result.get("track_results", None) 202 | bboxes, labels, ids = restore_result(track_result, return_ids=True) 203 | img = imshow_tracks( 204 | img, 205 | bboxes, 206 | labels, 207 | ids, 208 | classes=self.CLASSES, 209 | thickness=thickness, 210 | font_scale=font_scale, 211 | show=show, 212 | out_file=out_file, 213 | wait_time=wait_time, 214 | backend=backend, 215 | ) 216 | return img 217 | -------------------------------------------------------------------------------- /teter/models/roi_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .teter_roi_head import TETerRoIHead 2 | from .track_heads import QuasiDenseEmbedHead 3 | 4 | __all__ = ["QuasiDenseEmbedHead", "TETerRoIHead"] 5 | -------------------------------------------------------------------------------- /teter/models/roi_heads/track_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .cem_head import ClsExemplarHead 2 | from .quasi_dense_embed_head import QuasiDenseEmbedHead 3 | 4 | __all__ = ["QuasiDenseEmbedHead", "ClsExemplarHead"] 5 | -------------------------------------------------------------------------------- /teter/models/roi_heads/track_heads/cem_head.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | from mmcv.cnn import ConvModule 5 | from mmdet.models import HEADS, build_loss 6 | 7 | from teter.core import cal_similarity 8 | 9 | 10 | @HEADS.register_module(force=True) 11 | class ClsExemplarHead(nn.Module): 12 | def __init__( 13 | self, 14 | num_convs=4, 15 | num_fcs=1, 16 | roi_feat_size=7, 17 | in_channels=256, 18 | conv_out_channels=256, 19 | fc_out_channels=1024, 20 | embed_channels=256, 21 | conv_cfg=None, 22 | norm_cfg=None, 23 | softmax_temp=-1, 24 | loss_track=dict(type="MultiPosCrossEntropyLoss", loss_weight=1), 25 | ): 26 | super(ClsExemplarHead, self).__init__() 27 | 28 | self.num_convs = num_convs 29 | self.num_fcs = num_fcs 30 | self.roi_feat_size = roi_feat_size 31 | self.in_channels = in_channels 32 | self.conv_out_channels = conv_out_channels 33 | self.fc_out_channels = fc_out_channels 34 | self.embed_channels = embed_channels 35 | self.conv_cfg = conv_cfg 36 | self.norm_cfg = norm_cfg 37 | self.relu = nn.ReLU(inplace=True) 38 | self.convs, self.fcs, last_layer_dim = self._add_conv_fc_branch( 39 | self.num_convs, self.num_fcs, self.in_channels 40 | ) 41 | self.fc_embed = nn.Linear(last_layer_dim, embed_channels) 42 | 43 | self.softmax_temp = softmax_temp 44 | self.loss_track = build_loss(loss_track) 45 | 46 | def _add_conv_fc_branch(self, num_convs, num_fcs, in_channels): 47 | last_layer_dim = in_channels 48 | # add branch specific conv layers 49 | convs = nn.ModuleList() 50 | if num_convs > 0: 51 | for i in range(num_convs): 52 | conv_in_channels = last_layer_dim if i == 0 else self.conv_out_channels 53 | convs.append( 54 | ConvModule( 55 | conv_in_channels, 56 | self.conv_out_channels, 57 | 3, 58 | padding=1, 59 | conv_cfg=self.conv_cfg, 60 | norm_cfg=self.norm_cfg, 61 | ) 62 | ) 63 | last_layer_dim = self.conv_out_channels 64 | # add branch specific fc layers 65 | fcs = nn.ModuleList() 66 | if num_fcs > 0: 67 | last_layer_dim *= self.roi_feat_size * self.roi_feat_size 68 | for i in range(num_fcs): 69 | fc_in_channels = last_layer_dim if i == 0 else self.fc_out_channels 70 | fcs.append(nn.Linear(fc_in_channels, self.fc_out_channels)) 71 | last_layer_dim = self.fc_out_channels 72 | return convs, fcs, last_layer_dim 73 | 74 | def init_weights(self): 75 | 76 | for m in self.fcs: 77 | if isinstance(m, nn.Linear): 78 | nn.init.xavier_uniform_(m.weight) 79 | nn.init.constant_(m.bias, 0) 80 | nn.init.normal_(self.fc_embed.weight, 0, 0.01) 81 | nn.init.constant_(self.fc_embed.bias, 0) 82 | 83 | def forward(self, x): 84 | 85 | if self.num_convs > 0: 86 | for i, conv in enumerate(self.convs): 87 | x = conv(x) 88 | x = x.view(x.size(0), -1) 89 | if self.num_fcs > 0: 90 | for i, fc in enumerate(self.fcs): 91 | x = self.relu(fc(x)) 92 | x = self.fc_embed(x) 93 | 94 | return x 95 | 96 | def sup_contra_loss(self, features, labels): 97 | 98 | losses = dict() 99 | loss_track = self.loss_track(features, labels) 100 | losses["loss_cem"] = loss_track 101 | 102 | return losses 103 | -------------------------------------------------------------------------------- /teter/models/roi_heads/track_heads/quasi_dense_embed_head.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | from mmcv.cnn import ConvModule 5 | from mmdet.models import HEADS, build_loss 6 | 7 | from teter.core import cal_similarity 8 | 9 | 10 | @HEADS.register_module(force=True) 11 | class QuasiDenseEmbedHead(nn.Module): 12 | def __init__( 13 | self, 14 | num_convs=4, 15 | num_fcs=1, 16 | roi_feat_size=7, 17 | in_channels=256, 18 | conv_out_channels=256, 19 | fc_out_channels=1024, 20 | embed_channels=256, 21 | conv_cfg=None, 22 | norm_cfg=None, 23 | softmax_temp=-1, 24 | loss_track=dict(type="MultiPosCrossEntropyLoss", loss_weight=0.25), 25 | loss_track_aux=dict( 26 | type="L2Loss", sample_ratio=3, margin=0.3, loss_weight=1.0, hard_mining=True 27 | ), 28 | ): 29 | super(QuasiDenseEmbedHead, self).__init__() 30 | self.num_convs = num_convs 31 | self.num_fcs = num_fcs 32 | self.roi_feat_size = roi_feat_size 33 | self.in_channels = in_channels 34 | self.conv_out_channels = conv_out_channels 35 | self.fc_out_channels = fc_out_channels 36 | self.embed_channels = embed_channels 37 | self.conv_cfg = conv_cfg 38 | self.norm_cfg = norm_cfg 39 | self.relu = nn.ReLU(inplace=True) 40 | self.convs, self.fcs, last_layer_dim = self._add_conv_fc_branch( 41 | self.num_convs, self.num_fcs, self.in_channels 42 | ) 43 | self.fc_embed = nn.Linear(last_layer_dim, embed_channels) 44 | 45 | self.softmax_temp = softmax_temp 46 | self.loss_track = build_loss(loss_track) 47 | if loss_track_aux is not None: 48 | self.loss_track_aux = build_loss(loss_track_aux) 49 | else: 50 | self.loss_track_aux = None 51 | 52 | def _add_conv_fc_branch(self, num_convs, num_fcs, in_channels): 53 | last_layer_dim = in_channels 54 | # add branch specific conv layers 55 | convs = nn.ModuleList() 56 | if num_convs > 0: 57 | for i in range(num_convs): 58 | conv_in_channels = last_layer_dim if i == 0 else self.conv_out_channels 59 | convs.append( 60 | ConvModule( 61 | conv_in_channels, 62 | self.conv_out_channels, 63 | 3, 64 | padding=1, 65 | conv_cfg=self.conv_cfg, 66 | norm_cfg=self.norm_cfg, 67 | ) 68 | ) 69 | last_layer_dim = self.conv_out_channels 70 | # add branch specific fc layers 71 | fcs = nn.ModuleList() 72 | if num_fcs > 0: 73 | last_layer_dim *= self.roi_feat_size * self.roi_feat_size 74 | for i in range(num_fcs): 75 | fc_in_channels = last_layer_dim if i == 0 else self.fc_out_channels 76 | fcs.append(nn.Linear(fc_in_channels, self.fc_out_channels)) 77 | last_layer_dim = self.fc_out_channels 78 | return convs, fcs, last_layer_dim 79 | 80 | def init_weights(self): 81 | for m in self.fcs: 82 | if isinstance(m, nn.Linear): 83 | nn.init.xavier_uniform_(m.weight) 84 | nn.init.constant_(m.bias, 0) 85 | nn.init.normal_(self.fc_embed.weight, 0, 0.01) 86 | nn.init.constant_(self.fc_embed.bias, 0) 87 | 88 | def forward(self, x): 89 | if self.num_convs > 0: 90 | for i, conv in enumerate(self.convs): 91 | x = conv(x) 92 | x = x.view(x.size(0), -1) 93 | if self.num_fcs > 0: 94 | for i, fc in enumerate(self.fcs): 95 | x = self.relu(fc(x)) 96 | x = self.fc_embed(x) 97 | return x 98 | 99 | def get_track_targets( 100 | self, gt_match_indices, key_sampling_results, ref_sampling_results 101 | ): 102 | track_targets = [] 103 | track_weights = [] 104 | for _gt_match_indices, key_res, ref_res in zip( 105 | gt_match_indices, key_sampling_results, ref_sampling_results 106 | ): 107 | targets = _gt_match_indices.new_zeros( 108 | (key_res.pos_bboxes.size(0), ref_res.bboxes.size(0)), dtype=torch.int 109 | ) 110 | _match_indices = _gt_match_indices[key_res.pos_assigned_gt_inds] 111 | pos2pos = ( 112 | _match_indices.view(-1, 1) == ref_res.pos_assigned_gt_inds.view(1, -1) 113 | ).int() 114 | targets[:, : pos2pos.size(1)] = pos2pos 115 | weights = (targets.sum(dim=1) > 0).float() 116 | track_targets.append(targets) 117 | track_weights.append(weights) 118 | return track_targets, track_weights 119 | 120 | def match(self, key_embeds, ref_embeds, key_sampling_results, ref_sampling_results): 121 | num_key_rois = [res.pos_bboxes.size(0) for res in key_sampling_results] 122 | key_embeds = torch.split(key_embeds, num_key_rois) 123 | num_ref_rois = [res.bboxes.size(0) for res in ref_sampling_results] 124 | ref_embeds = torch.split(ref_embeds, num_ref_rois) 125 | 126 | dists, cos_dists = [], [] 127 | for key_embed, ref_embed in zip(key_embeds, ref_embeds): 128 | dist = cal_similarity( 129 | key_embed, 130 | ref_embed, 131 | method="dot_product", 132 | temperature=self.softmax_temp, 133 | ) 134 | dists.append(dist) 135 | if self.loss_track_aux is not None: 136 | cos_dist = cal_similarity(key_embed, ref_embed, method="cosine") 137 | cos_dists.append(cos_dist) 138 | else: 139 | cos_dists.append(None) 140 | return dists, cos_dists 141 | 142 | def loss(self, dists, cos_dists, targets, weights): 143 | losses = dict() 144 | 145 | loss_track = 0.0 146 | loss_track_aux = 0.0 147 | for _dists, _cos_dists, _targets, _weights in zip( 148 | dists, cos_dists, targets, weights 149 | ): 150 | loss_track += self.loss_track( 151 | _dists, _targets, _weights, avg_factor=_weights.sum() 152 | ) 153 | if self.loss_track_aux is not None: 154 | loss_track_aux += self.loss_track_aux(_cos_dists, _targets) 155 | losses["loss_track"] = loss_track / len(dists) 156 | 157 | if self.loss_track_aux is not None: 158 | losses["loss_track_aux"] = loss_track_aux / len(dists) 159 | 160 | return losses 161 | 162 | @staticmethod 163 | def random_choice(gallery, num): 164 | """Random select some elements from the gallery. 165 | 166 | It seems that Pytorch's implementation is slower than numpy so we use 167 | numpy to randperm the indices. 168 | """ 169 | assert len(gallery) >= num 170 | if isinstance(gallery, list): 171 | gallery = np.array(gallery) 172 | cands = np.arange(len(gallery)) 173 | np.random.shuffle(cands) 174 | rand_inds = cands[:num] 175 | if not isinstance(gallery, np.ndarray): 176 | rand_inds = torch.from_numpy(rand_inds).long().to(gallery.device) 177 | return gallery[rand_inds] 178 | -------------------------------------------------------------------------------- /teter/models/trackers/__init__.py: -------------------------------------------------------------------------------- 1 | from .teter_bdd import TETerBDD 2 | from .teter_tao import TETerTAO 3 | 4 | __all__ = ["TETerTAO", "TETerBDD"] 5 | -------------------------------------------------------------------------------- /teter/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .collect_env import collect_env 2 | from .logger import get_root_logger 3 | 4 | __all__ = ["collect_env", "get_root_logger"] 5 | -------------------------------------------------------------------------------- /teter/utils/collect_env.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import mmcv 3 | import os.path as osp 4 | import subprocess 5 | import sys 6 | import torch 7 | import torchvision 8 | from collections import defaultdict 9 | 10 | import teter 11 | 12 | 13 | def collect_env(): 14 | env_info = {} 15 | env_info["sys.platform"] = sys.platform 16 | env_info["Python"] = sys.version.replace("\n", "") 17 | 18 | cuda_available = torch.cuda.is_available() 19 | env_info["CUDA available"] = cuda_available 20 | 21 | if cuda_available: 22 | from torch.utils.cpp_extension import CUDA_HOME 23 | 24 | env_info["CUDA_HOME"] = CUDA_HOME 25 | 26 | if CUDA_HOME is not None and osp.isdir(CUDA_HOME): 27 | try: 28 | nvcc = osp.join(CUDA_HOME, "bin/nvcc") 29 | nvcc = subprocess.check_output(f'"{nvcc}" -V | tail -n1', shell=True) 30 | nvcc = nvcc.decode("utf-8").strip() 31 | except subprocess.SubprocessError: 32 | nvcc = "Not Available" 33 | env_info["NVCC"] = nvcc 34 | 35 | devices = defaultdict(list) 36 | for k in range(torch.cuda.device_count()): 37 | devices[torch.cuda.get_device_name(k)].append(str(k)) 38 | for name, devids in devices.items(): 39 | env_info["GPU " + ",".join(devids)] = name 40 | 41 | gcc = subprocess.check_output("gcc --version | head -n1", shell=True) 42 | gcc = gcc.decode("utf-8").strip() 43 | env_info["GCC"] = gcc 44 | 45 | env_info["PyTorch"] = torch.__version__ 46 | env_info["PyTorch compiling details"] = torch.__config__.show() 47 | 48 | env_info["TorchVision"] = torchvision.__version__ 49 | 50 | env_info["OpenCV"] = cv2.__version__ 51 | 52 | env_info["MMCV"] = mmcv.__version__ 53 | env_info["teter"] = teter.__version__ 54 | 55 | return env_info 56 | 57 | 58 | if __name__ == "__main__": 59 | for name, val in collect_env().items(): 60 | print(f"{name}: {val}") 61 | -------------------------------------------------------------------------------- /teter/utils/logger.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from mmcv.utils import get_logger 3 | 4 | 5 | def get_root_logger(log_file=None, log_level=logging.INFO): 6 | return get_logger("teter", log_file, log_level) 7 | -------------------------------------------------------------------------------- /teter/version.py: -------------------------------------------------------------------------------- 1 | # GENERATED VERSION FILE 2 | __version__ = "dev-0.1.0" 3 | short_version = "0.1.0" 4 | version_info = (0, 1, 0) 5 | -------------------------------------------------------------------------------- /tools/convert_datasets/tao2coco.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os.path as osp 3 | from collections import defaultdict 4 | 5 | import mmcv 6 | from tao.toolkit.tao import Tao 7 | from tqdm import tqdm 8 | 9 | 10 | def parse_args(): 11 | parser = argparse.ArgumentParser( 12 | description='Make annotation files for TAO') 13 | parser.add_argument('-t', '--tao', help='path of TAO json file') 14 | parser.add_argument( 15 | '--filter-classes', 16 | action='store_true', 17 | help='whether filter 1230 classes to 482.') 18 | return parser.parse_args() 19 | 20 | 21 | def get_classes(tao_path, filter_classes=True): 22 | train = mmcv.load(osp.join(tao_path, 'train.json')) 23 | 24 | train_classes = list(set([_['category_id'] for _ in train['annotations']])) 25 | print(f'TAO train set contains {len(train_classes)} categories.') 26 | 27 | val = mmcv.load(osp.join(tao_path, 'validation.json')) 28 | val_classes = list(set([_['category_id'] for _ in val['annotations']])) 29 | print(f'TAO val set contains {len(val_classes)} categories.') 30 | 31 | test = mmcv.load(osp.join(tao_path, 'test_categories.json')) 32 | test_classes = list(set([_['id'] for _ in test['categories']])) 33 | print(f'TAO test set contains {len(test_classes)} categories.') 34 | 35 | tao_classes = set(train_classes + val_classes + test_classes) 36 | print(f'TAO totally contains {len(tao_classes)} categories.') 37 | 38 | tao_classes = [_ for _ in train['categories'] if _['id'] in tao_classes] 39 | 40 | with open(osp.join(tao_path, 'tao_classes.txt'), 'wt') as f: 41 | for c in tao_classes: 42 | name = c['name'] 43 | f.writelines(f'{name}\n') 44 | 45 | if filter_classes: 46 | return tao_classes 47 | else: 48 | return train['categories'] 49 | 50 | 51 | def convert_tao(file, classes): 52 | tao = Tao(file) 53 | raw = mmcv.load(file) 54 | 55 | out = defaultdict(list) 56 | out['tracks'] = raw['tracks'].copy() 57 | out['info'] = raw['info'].copy() 58 | out['licenses'] = raw['licenses'].copy() 59 | out['categories'] = classes 60 | 61 | for video in tqdm(raw['videos']): 62 | img_infos = tao.vid_img_map[video['id']] 63 | img_infos = sorted(img_infos, key=lambda x: x['frame_index']) 64 | frame_range = img_infos[1]['frame_index'] - img_infos[0]['frame_index'] 65 | video['frame_range'] = frame_range 66 | out['videos'].append(video) 67 | for i, img_info in enumerate(img_infos): 68 | img_info['frame_id'] = i 69 | img_info['neg_category_ids'] = video['neg_category_ids'] 70 | img_info['not_exhaustive_category_ids'] = video[ 71 | 'not_exhaustive_category_ids'] 72 | out['images'].append(img_info) 73 | ann_infos = tao.img_ann_map[img_info['id']] 74 | for ann_info in ann_infos: 75 | ann_info['instance_id'] = ann_info['track_id'] 76 | out['annotations'].append(ann_info) 77 | 78 | assert len(out['videos']) == len(raw['videos']) 79 | assert len(out['images']) == len(raw['images']) 80 | assert len(out['annotations']) == len(raw['annotations']) 81 | return out 82 | 83 | 84 | def main(): 85 | args = parse_args() 86 | 87 | classes = get_classes(args.tao, args.filter_classes) 88 | print(f'convert with {len(classes)} classes') 89 | 90 | for file in [ 91 | 'train.json', 'validation.json', 'test_without_annotations.json' 92 | ]: 93 | print(f'convert {file}') 94 | out = convert_tao(osp.join(args.tao, file), classes) 95 | c = '_482' if args.filter_classes else '' 96 | prefix = file.split('.')[0].split('_')[0] 97 | out_file = f'{prefix}{c}_ours.json' 98 | mmcv.dump(out, osp.join(args.tao, out_file)) 99 | 100 | 101 | if __name__ == '__main__': 102 | main() 103 | -------------------------------------------------------------------------------- /tools/dist_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | CHECKPOINT=$2 5 | GPUS=$3 6 | PORT=$4 7 | if [ -z "$4" ]; then 8 | PORT=33333 9 | fi 10 | 11 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 12 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 13 | $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:5} 14 | -------------------------------------------------------------------------------- /tools/dist_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | GPUS=$2 5 | PORT=$3 6 | 7 | if [ -z "$3" ]; then 8 | PORT=29533 9 | fi 10 | 11 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 12 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 13 | $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:4} 14 | -------------------------------------------------------------------------------- /tools/slurm_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | PARTITION=$1 6 | JOB_NAME=$2 7 | CONFIG=$3 8 | CHECKPOINT=$4 9 | GPUS=${GPUS:-8} 10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8} 11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5} 12 | PY_ARGS=${@:5} 13 | SRUN_ARGS=${SRUN_ARGS:-""} 14 | 15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 16 | srun -p ${PARTITION} \ 17 | --job-name=${JOB_NAME} \ 18 | --gres=gpu:${GPUS_PER_NODE} \ 19 | --ntasks=${GPUS} \ 20 | --ntasks-per-node=${GPUS_PER_NODE} \ 21 | --cpus-per-task=${CPUS_PER_TASK} \ 22 | --kill-on-bad-exit=1 \ 23 | ${SRUN_ARGS} \ 24 | python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS} 25 | -------------------------------------------------------------------------------- /tools/slurm_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | PARTITION=$1 6 | JOB_NAME=$2 7 | CONFIG=$3 8 | WORK_DIR=$4 9 | GPUS=${GPUS:-8} 10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8} 11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5} 12 | SRUN_ARGS=${SRUN_ARGS:-""} 13 | PY_ARGS=${@:5} 14 | 15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 16 | srun -p ${PARTITION} \ 17 | --job-name=${JOB_NAME} \ 18 | --gres=gpu:${GPUS_PER_NODE} \ 19 | --ntasks=${GPUS} \ 20 | --ntasks-per-node=${GPUS_PER_NODE} \ 21 | --cpus-per-task=${CPUS_PER_TASK} \ 22 | --kill-on-bad-exit=1 \ 23 | ${SRUN_ARGS} \ 24 | python -u tools/train.py ${CONFIG} --work-dir=${WORK_DIR} --launcher="slurm" ${PY_ARGS} 25 | -------------------------------------------------------------------------------- /tools/test.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | 4 | import mmcv 5 | import torch 6 | from mmcv import Config, DictAction 7 | from mmcv.cnn import fuse_conv_bn 8 | from mmcv.parallel import MMDataParallel, MMDistributedDataParallel 9 | from mmcv.runner import get_dist_info, init_dist, load_checkpoint 10 | # from mmdet.core import wrap_fp16_model 11 | from mmdet.datasets import build_dataset 12 | 13 | 14 | def parse_args(): 15 | parser = argparse.ArgumentParser(description='teter test model') 16 | parser.add_argument('config', help='test config file path') 17 | parser.add_argument('checkpoint', help='checkpoint file') 18 | parser.add_argument('--out', help='output result file') 19 | parser.add_argument( 20 | '--fuse-conv-bn', 21 | action='store_true', 22 | help='Whether to fuse conv and bn, this will slightly increase' 23 | 'the inference speed') 24 | parser.add_argument( 25 | '--format-only', 26 | action='store_true', 27 | help='Format the output results without perform evaluation. It is' 28 | 'useful when you want to format the result to a specific format and ' 29 | 'submit it to the test server') 30 | parser.add_argument('--eval', type=str, nargs='+', help='eval types') 31 | parser.add_argument('--show', action='store_true', help='show results') 32 | parser.add_argument( 33 | '--show-dir', help='directory where painted images will be saved') 34 | parser.add_argument( 35 | '--gpu-collect', 36 | action='store_true', 37 | help='whether to use gpu to collect results.') 38 | parser.add_argument( 39 | '--tmpdir', 40 | help='tmp directory used for collecting results from multiple ' 41 | 'workers, available when gpu-collect is not specified') 42 | parser.add_argument( 43 | '--show_score_thr', default=0.3, type=float, help='output result file') 44 | parser.add_argument( 45 | '--cfg-options', 46 | nargs='+', 47 | action=DictAction, 48 | help='override some settings in the used config, the key-value pair ' 49 | 'in xxx=yyy format will be merged into config file.') 50 | parser.add_argument( 51 | '--eval-options', 52 | nargs='+', 53 | action=DictAction, 54 | help='custom options for evaluation, the key-value pair in xxx=yyy ' 55 | 'format will be kwargs for dataset.evaluate() function') 56 | parser.add_argument( 57 | '--launcher', 58 | choices=['none', 'pytorch', 'slurm', 'mpi'], 59 | default='none', 60 | help='job launcher') 61 | parser.add_argument('--local_rank', type=int, default=0) 62 | args = parser.parse_args() 63 | if 'LOCAL_RANK' not in os.environ: 64 | os.environ['LOCAL_RANK'] = str(args.local_rank) 65 | return args 66 | 67 | 68 | def main(): 69 | args = parse_args() 70 | 71 | assert args.out or args.eval or args.format_only or args.show \ 72 | or args.show_dir, \ 73 | ('Please specify at least one operation (save/eval/format/show the ' 74 | 'results / save the results) with the argument "--out", "--eval"' 75 | ', "--format-only", "--show" or "--show-dir"') 76 | 77 | if args.eval and args.format_only: 78 | raise ValueError('--eval and --format_only cannot be both specified') 79 | 80 | if args.out is not None and not args.out.endswith(('.pkl', '.pickle')): 81 | raise ValueError('The output file must be a pkl file.') 82 | 83 | cfg = Config.fromfile(args.config) 84 | 85 | if args.cfg_options is not None: 86 | cfg.merge_from_dict(args.cfg_options) 87 | 88 | if cfg.get('USE_MMDET', False): 89 | from mmdet.apis import multi_gpu_test, single_gpu_test 90 | from mmdet.models import build_detector as build_model 91 | from mmdet.datasets import build_dataloader 92 | else: 93 | from teter.apis import multi_gpu_test, single_gpu_test 94 | from teter.models import build_model 95 | from teter.datasets import build_dataloader 96 | 97 | # set cudnn_benchmark 98 | if cfg.get('cudnn_benchmark', False): 99 | torch.backends.cudnn.benchmark = True 100 | cfg.model.pretrained = None 101 | cfg.data.test.test_mode = True 102 | 103 | # init distributed env first, since logger depends on the dist info. 104 | if args.launcher == 'none': 105 | distributed = False 106 | else: 107 | distributed = True 108 | init_dist(args.launcher, **cfg.dist_params) 109 | 110 | # build the dataloader 111 | dataset = build_dataset(cfg.data.test) 112 | data_loader = build_dataloader( 113 | dataset, 114 | samples_per_gpu=1, 115 | workers_per_gpu=cfg.data.workers_per_gpu, 116 | dist=distributed, 117 | shuffle=False) 118 | 119 | # build the model and load checkpoint 120 | model = build_model(cfg.model, train_cfg=None, test_cfg=None) 121 | # fp16_cfg = cfg.get('fp16', None) 122 | # if fp16_cfg is not None: 123 | # wrap_fp16_model(model) 124 | checkpoint = load_checkpoint(model, args.checkpoint, map_location='cpu') 125 | 126 | if args.fuse_conv_bn: 127 | model = fuse_conv_bn(model) 128 | 129 | if 'CLASSES' in checkpoint['meta']: 130 | model.CLASSES = checkpoint['meta']['CLASSES'] 131 | else: 132 | model.CLASSES = dataset.CLASSES 133 | 134 | if not distributed: 135 | model = MMDataParallel(model, device_ids=[0]) 136 | outputs = single_gpu_test(model, data_loader, args.show, args.show_dir, 137 | args.show_score_thr) 138 | else: 139 | model = MMDistributedDataParallel( 140 | model.cuda(), 141 | device_ids=[torch.cuda.current_device()], 142 | broadcast_buffers=False) 143 | outputs = multi_gpu_test(model, data_loader, args.tmpdir, 144 | args.gpu_collect) 145 | 146 | rank, _ = get_dist_info() 147 | if rank == 0: 148 | if args.out: 149 | print(f'\nwriting results to {args.out}') 150 | mmcv.dump(outputs, args.out) 151 | kwargs = {} if args.eval_options is None else args.eval_options 152 | if args.format_only: 153 | dataset.format_results(outputs, **kwargs) 154 | if args.eval: 155 | eval_kwargs = cfg.get('evaluation', {}).copy() 156 | # hard-code way to remove EvalHook args 157 | for key in ['interval', 'tmpdir', 'start', 'gpu_collect']: 158 | eval_kwargs.pop(key, None) 159 | eval_kwargs.update(dict(metric=args.eval, **kwargs)) 160 | print(dataset.evaluate(outputs, **eval_kwargs)) 161 | 162 | 163 | if __name__ == '__main__': 164 | main() 165 | -------------------------------------------------------------------------------- /tools/to_bdd100k.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | 4 | import mmcv 5 | from mmcv import Config, DictAction 6 | from mmdet.datasets import build_dataset 7 | from teter.core.to_bdd100k import preds2bdd100k 8 | 9 | 10 | def parse_args(): 11 | parser = argparse.ArgumentParser(description='teter test model') 12 | parser.add_argument('config', help='test config file path') 13 | parser.add_argument('--res', help='output result file') 14 | parser.add_argument( 15 | '--bdd-dir', 16 | type=str, 17 | help='path to the folder that will contain files in bdd100k format') 18 | parser.add_argument( 19 | '--coco-file', 20 | type=str, 21 | help='path to that json file that is in COCO submission format') 22 | parser.add_argument( 23 | '--task', 24 | type=str, 25 | nargs='+', 26 | help='task types', 27 | choices=['det', 'ins_seg', 'box_track', 'seg_track']) 28 | parser.add_argument( 29 | '--nproc', 30 | type=int, 31 | help='number of process for mask merging') 32 | parser.add_argument( 33 | '--cfg-options', 34 | nargs='+', 35 | action=DictAction, 36 | help='override some settings in the used config, the key-value pair ' 37 | 'in xxx=yyy format will be merged into config file.') 38 | args = parser.parse_args() 39 | return args 40 | 41 | 42 | def main(): 43 | args = parse_args() 44 | 45 | if not os.path.isfile(args.res): 46 | raise ValueError('The result file does not exist.') 47 | 48 | cfg = Config.fromfile(args.config) 49 | 50 | if args.cfg_options is not None: 51 | cfg.merge_from_dict(args.cfg_options) 52 | 53 | if cfg.get('USE_MMDET', False): 54 | from mmdet.datasets import build_dataloader 55 | else: 56 | from teter.datasets import build_dataloader 57 | 58 | # build the dataloader 59 | cfg.data.test.test_mode = True 60 | dataset = build_dataset(cfg.data.test) 61 | 62 | print(f'\nLoading results from {args.res}') 63 | results = mmcv.load(args.res) 64 | 65 | if args.coco_file: 66 | dataset.format_results(results, jsonfile_prefix=args.coco_file) 67 | if args.bdd_dir: 68 | preds2bdd100k( 69 | dataset, results, args.task, out_base=args.bdd_dir, nproc=args.nproc) 70 | 71 | if __name__ == '__main__': 72 | main() 73 | -------------------------------------------------------------------------------- /tools/train.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import copy 3 | import os 4 | import os.path as osp 5 | import time 6 | 7 | import mmcv 8 | import torch 9 | from mmcv import Config, DictAction 10 | from mmcv.runner import init_dist 11 | from mmdet.apis import set_random_seed 12 | from mmdet.datasets import build_dataset 13 | 14 | from teter import __version__ 15 | from teter.utils import collect_env, get_root_logger 16 | 17 | 18 | def parse_args(): 19 | parser = argparse.ArgumentParser(description='Train a model') 20 | parser.add_argument('config', help='train config file path') 21 | parser.add_argument('--work-dir', help='the dir to save logs and models') 22 | parser.add_argument( 23 | '--resume-from', help='the checkpoint file to resume from') 24 | parser.add_argument( 25 | '--no-validate', 26 | action='store_true', 27 | help='whether not to evaluate the checkpoint during training') 28 | group_gpus = parser.add_mutually_exclusive_group() 29 | group_gpus.add_argument( 30 | '--gpus', 31 | type=int, 32 | help='number of gpus to use ' 33 | '(only applicable to non-distributed training)') 34 | group_gpus.add_argument( 35 | '--gpu-ids', 36 | type=int, 37 | nargs='+', 38 | help='ids of gpus to use ' 39 | '(only applicable to non-distributed training)') 40 | parser.add_argument('--seed', type=int, default=None, help='random seed') 41 | parser.add_argument( 42 | '--deterministic', 43 | action='store_true', 44 | help='whether to set deterministic options for CUDNN backend.') 45 | parser.add_argument( 46 | '--cfg-options', 47 | nargs='+', 48 | action=DictAction, 49 | help='override some settings in the used config, the key-value pair ' 50 | 'in xxx=yyy format will be merged into config file.') 51 | parser.add_argument( 52 | '--launcher', 53 | choices=['none', 'pytorch', 'slurm', 'mpi'], 54 | default='none', 55 | help='job launcher') 56 | parser.add_argument('--local_rank', type=int, default=0) 57 | args = parser.parse_args() 58 | if 'LOCAL_RANK' not in os.environ: 59 | os.environ['LOCAL_RANK'] = str(args.local_rank) 60 | 61 | return args 62 | 63 | 64 | def main(): 65 | args = parse_args() 66 | 67 | cfg = Config.fromfile(args.config) 68 | 69 | if args.cfg_options is not None: 70 | cfg.merge_from_dict(args.cfg_options) 71 | 72 | if cfg.get('USE_MMDET', False): 73 | from mmdet.apis import train_detector as train_model 74 | from mmdet.models import build_detector as build_model 75 | else: 76 | from teter.apis import train_model 77 | from teter.models import build_model 78 | 79 | # set cudnn_benchmark 80 | if cfg.get('cudnn_benchmark', False): 81 | torch.backends.cudnn.benchmark = True 82 | 83 | # work_dir is determined in this priority: CLI > segment in file > filename 84 | if args.work_dir is not None: 85 | # update configs according to CLI args if args.work_dir is not None 86 | cfg.work_dir = args.work_dir 87 | elif cfg.get('work_dir', None) is None: 88 | # use config filename as default work_dir if cfg.work_dir is None 89 | cfg.work_dir = osp.join('./work_dirs', 90 | osp.splitext(osp.basename(args.config))[0]) 91 | if args.resume_from is not None: 92 | cfg.resume_from = args.resume_from 93 | if args.gpu_ids is not None: 94 | cfg.gpu_ids = args.gpu_ids 95 | else: 96 | cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus) 97 | 98 | # init distributed env first, since logger depends on the dist info. 99 | if args.launcher == 'none': 100 | distributed = False 101 | else: 102 | distributed = True 103 | init_dist(args.launcher, **cfg.dist_params) 104 | 105 | # create work_dir 106 | mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir)) 107 | # dump config 108 | cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config))) 109 | # init the logger before other steps 110 | timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) 111 | log_file = osp.join(cfg.work_dir, f'{timestamp}.log') 112 | logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) 113 | 114 | # init the meta dict to record some important information such as 115 | # environment info and seed, which will be logged 116 | meta = dict() 117 | # log env info 118 | env_info_dict = collect_env() 119 | env_info = '\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()]) 120 | dash_line = '-' * 60 + '\n' 121 | logger.info('Environment info:\n' + dash_line + env_info + '\n' + 122 | dash_line) 123 | meta['env_info'] = env_info 124 | 125 | # log some basic info 126 | logger.info(f'Distributed training: {distributed}') 127 | logger.info(f'Config:\n{cfg.pretty_text}') 128 | 129 | # set random seeds 130 | if args.seed is not None: 131 | logger.info(f'Set random seed to {args.seed}, ' 132 | f'deterministic: {args.deterministic}') 133 | set_random_seed(args.seed, deterministic=args.deterministic) 134 | cfg.seed = args.seed 135 | meta['seed'] = args.seed 136 | 137 | model = build_model( 138 | cfg.model, 139 | train_cfg=cfg.get('train_cfg'), 140 | test_cfg=cfg.get('test_cfg')) 141 | model.init_weights() 142 | 143 | datasets = [build_dataset(cfg.data.train)] 144 | if len(cfg.workflow) == 2: 145 | val_dataset = copy.deepcopy(cfg.data.val) 146 | val_dataset.pipeline = cfg.data.train.pipeline 147 | datasets.append(build_dataset(val_dataset)) 148 | if cfg.checkpoint_config is not None: 149 | # save teter version, config file content and class names in 150 | # checkpoints as meta data 151 | cfg.checkpoint_config.meta = dict( 152 | qdtrack_version=__version__, 153 | config=cfg.pretty_text, 154 | CLASSES=datasets[0].CLASSES) 155 | # add an attribute for visualization convenience 156 | model.CLASSES = datasets[0].CLASSES 157 | train_model( 158 | model, 159 | datasets, 160 | cfg, 161 | distributed=distributed, 162 | validate=(not args.no_validate), 163 | timestamp=timestamp, 164 | meta=meta) 165 | 166 | 167 | if __name__ == '__main__': 168 | main() 169 | --------------------------------------------------------------------------------