├── LICENSE
├── README.md
├── configs
├── _base_
│ ├── faster_rcnn_r50_fpn.py
│ └── qdtrack_faster_rcnn_r50_fpn.py
├── bdd100k
│ └── cem_bdd.py
└── tao
│ ├── cem_r101_lvis.py
│ ├── cem_swinB_lvis.py
│ ├── cem_swinL_lvis.py
│ ├── cem_swinS_lvis.py
│ ├── cem_swinT_lvis.py
│ ├── tracker_r101_tao.py
│ ├── tracker_swinB_tao.py
│ ├── tracker_swinL_tao.py
│ ├── tracker_swinS_tao.py
│ └── tracker_swinT_tao.py
├── docs
├── GET_STARTED.md
└── INSTALL.md
├── figures
├── teaser-teter.png
└── teta-teaser.png
├── requirements.txt
├── setup.cfg
├── setup.py
├── teta
├── LICENSE
├── README.md
├── docs
│ └── TAO-format.txt
├── figures
│ ├── figure_1.png
│ └── teta-teaser.png
├── requirements.txt
├── scripts
│ ├── run_coco.py
│ └── run_tao.py
├── setup.py
└── teta
│ ├── __init__.py
│ ├── _timing.py
│ ├── config.py
│ ├── datasets
│ ├── __init__.py
│ ├── _base_dataset.py
│ ├── bdd.py
│ ├── bdd_mots.py
│ ├── coco.py
│ ├── coco_mots.py
│ └── tao.py
│ ├── eval.py
│ ├── metrics
│ ├── __init__.py
│ ├── _base_metric.py
│ └── teta.py
│ └── utils.py
├── teter
├── VERSION
├── __init__.py
├── apis
│ ├── __init__.py
│ ├── inference.py
│ ├── test.py
│ └── train.py
├── core
│ ├── __init__.py
│ ├── evaluation
│ │ ├── __init__.py
│ │ ├── box_track.toml
│ │ ├── eval_hooks.py
│ │ └── mot.py
│ ├── to_bdd100k
│ │ ├── __init__.py
│ │ ├── transforms.py
│ │ └── utils.py
│ ├── track
│ │ ├── __init__.py
│ │ ├── similarity.py
│ │ └── transforms.py
│ └── utils
│ │ ├── __init__.py
│ │ └── visualization.py
├── datasets
│ ├── __init__.py
│ ├── bdd_video_dataset.py
│ ├── builder.py
│ ├── coco_video_dataset.py
│ ├── parsers
│ │ ├── __init__.py
│ │ ├── coco_api.py
│ │ └── coco_video_parser.py
│ ├── pipelines
│ │ ├── __init__.py
│ │ ├── formatting.py
│ │ ├── h5backend.py
│ │ ├── loading.py
│ │ └── transforms.py
│ ├── samplers
│ │ ├── __init__.py
│ │ └── distributed_video_sampler.py
│ └── tao_dataset.py
├── models
│ ├── __init__.py
│ ├── builder.py
│ ├── losses
│ │ ├── __init__.py
│ │ ├── l2_loss.py
│ │ ├── multipos_cross_entropy_loss.py
│ │ └── unbiased_supcontrat.py
│ ├── mot
│ │ ├── __init__.py
│ │ └── teter.py
│ ├── roi_heads
│ │ ├── __init__.py
│ │ ├── teter_roi_head.py
│ │ └── track_heads
│ │ │ ├── __init__.py
│ │ │ ├── cem_head.py
│ │ │ └── quasi_dense_embed_head.py
│ └── trackers
│ │ ├── __init__.py
│ │ ├── teter_bdd.py
│ │ └── teter_tao.py
├── utils
│ ├── __init__.py
│ ├── collect_env.py
│ └── logger.py
└── version.py
└── tools
├── convert_datasets
└── tao2coco.py
├── dist_test.sh
├── dist_train.sh
├── slurm_test.sh
├── slurm_train.sh
├── test.py
├── to_bdd100k.py
└── train.py
/configs/_base_/faster_rcnn_r50_fpn.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | model = dict(
3 | type='FasterRCNN',
4 | backbone=dict(
5 | type='ResNet',
6 | depth=50,
7 | num_stages=4,
8 | out_indices=(0, 1, 2, 3),
9 | frozen_stages=1,
10 | norm_cfg=dict(type='BN', requires_grad=True),
11 | norm_eval=True,
12 | style='pytorch',
13 | init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
14 | neck=dict(
15 | type='FPN',
16 | in_channels=[256, 512, 1024, 2048],
17 | out_channels=256,
18 | num_outs=5),
19 | rpn_head=dict(
20 | type='RPNHead',
21 | in_channels=256,
22 | feat_channels=256,
23 | anchor_generator=dict(
24 | type='AnchorGenerator',
25 | scales=[8],
26 | ratios=[0.5, 1.0, 2.0],
27 | strides=[4, 8, 16, 32, 64]),
28 | bbox_coder=dict(
29 | type='DeltaXYWHBBoxCoder',
30 | target_means=[.0, .0, .0, .0],
31 | target_stds=[1.0, 1.0, 1.0, 1.0]),
32 | loss_cls=dict(
33 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
34 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
35 | roi_head=dict(
36 | type='StandardRoIHead',
37 | bbox_roi_extractor=dict(
38 | type='SingleRoIExtractor',
39 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
40 | out_channels=256,
41 | featmap_strides=[4, 8, 16, 32]),
42 | bbox_head=dict(
43 | type='Shared2FCBBoxHead',
44 | in_channels=256,
45 | fc_out_channels=1024,
46 | roi_feat_size=7,
47 | num_classes=80,
48 | bbox_coder=dict(
49 | type='DeltaXYWHBBoxCoder',
50 | target_means=[0., 0., 0., 0.],
51 | target_stds=[0.1, 0.1, 0.2, 0.2]),
52 | reg_class_agnostic=False,
53 | loss_cls=dict(
54 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
55 | loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
56 | # model training and testing settings
57 | train_cfg=dict(
58 | rpn=dict(
59 | assigner=dict(
60 | type='MaxIoUAssigner',
61 | pos_iou_thr=0.7,
62 | neg_iou_thr=0.3,
63 | min_pos_iou=0.3,
64 | match_low_quality=True,
65 | ignore_iof_thr=-1),
66 | sampler=dict(
67 | type='RandomSampler',
68 | num=256,
69 | pos_fraction=0.5,
70 | neg_pos_ub=-1,
71 | add_gt_as_proposals=False),
72 | allowed_border=-1,
73 | pos_weight=-1,
74 | debug=False),
75 | rpn_proposal=dict(
76 | nms_pre=2000,
77 | max_per_img=1000,
78 | nms=dict(type='nms', iou_threshold=0.7),
79 | min_bbox_size=0),
80 | rcnn=dict(
81 | assigner=dict(
82 | type='MaxIoUAssigner',
83 | pos_iou_thr=0.5,
84 | neg_iou_thr=0.5,
85 | min_pos_iou=0.5,
86 | match_low_quality=False,
87 | ignore_iof_thr=-1),
88 | sampler=dict(
89 | type='RandomSampler',
90 | num=512,
91 | pos_fraction=0.25,
92 | neg_pos_ub=-1,
93 | add_gt_as_proposals=True),
94 | pos_weight=-1,
95 | debug=False)),
96 | test_cfg=dict(
97 | rpn=dict(
98 | nms_pre=1000,
99 | max_per_img=1000,
100 | nms=dict(type='nms', iou_threshold=0.7),
101 | min_bbox_size=0),
102 | rcnn=dict(
103 | score_thr=0.05,
104 | nms=dict(type='nms', iou_threshold=0.5),
105 | max_per_img=100)
106 | # soft-nms is also supported for rcnn testing
107 | # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05)
108 | ))
109 |
--------------------------------------------------------------------------------
/configs/_base_/qdtrack_faster_rcnn_r50_fpn.py:
--------------------------------------------------------------------------------
1 | _base_ = './faster_rcnn_r50_fpn.py'
2 | model = dict(
3 | type='QDTrack',
4 | rpn_head=dict(
5 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),
6 | roi_head=dict(
7 | type='QuasiDenseRoIHead',
8 | track_roi_extractor=dict(
9 | type='SingleRoIExtractor',
10 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
11 | out_channels=256,
12 | featmap_strides=[4, 8, 16, 32]),
13 | track_head=dict(
14 | type='QuasiDenseEmbedHead',
15 | num_convs=4,
16 | num_fcs=1,
17 | embed_channels=256,
18 | norm_cfg=dict(type='GN', num_groups=32),
19 | loss_track=dict(type='MultiPosCrossEntropyLoss', loss_weight=0.25),
20 | loss_track_aux=dict(
21 | type='L2Loss',
22 | neg_pos_ub=3,
23 | pos_margin=0,
24 | neg_margin=0.1,
25 | hard_mining=True,
26 | loss_weight=1.0))),
27 | train_cfg=dict(
28 | embed=dict(
29 | assigner=dict(
30 | type='MaxIoUAssigner',
31 | pos_iou_thr=0.7,
32 | neg_iou_thr=0.3,
33 | min_pos_iou=0.5,
34 | match_low_quality=False,
35 | ignore_iof_thr=-1),
36 | sampler=dict(
37 | type='CombinedSampler',
38 | num=256,
39 | pos_fraction=0.5,
40 | neg_pos_ub=3,
41 | add_gt_as_proposals=True,
42 | pos_sampler=dict(type='InstanceBalancedPosSampler'),
43 | neg_sampler=dict(type='RandomSampler')))))
--------------------------------------------------------------------------------
/configs/bdd100k/cem_bdd.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | _base_ = '../_base_/qdtrack_faster_rcnn_r50_fpn.py'
3 | model = dict(
4 | type='TETer',
5 | freeze_detector=True,
6 | freeze_qd = True,
7 | method='teter',
8 | roi_head=dict(
9 | type='TETerRoIHead',
10 | finetune_cem=True,
11 | bbox_head=dict(num_classes=8),
12 | cem_roi_extractor=dict(
13 | type='SingleRoIExtractor',
14 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
15 | out_channels=256,
16 | featmap_strides=[4, 8, 16, 32]),
17 | cem_head=dict(
18 | type='ClsExemplarHead',
19 | num_convs=4,
20 | num_fcs=3,
21 | embed_channels=256,
22 | norm_cfg=dict(type='GN', num_groups=32),
23 | loss_track=dict(type='UnbiasedSupConLoss', temperature=0.07, contrast_mode='all',
24 | pos_normalize=True,
25 | loss_weight=0.25)
26 | , softmax_temp=-1),
27 |
28 | track_head=dict(
29 | type='QuasiDenseEmbedHead',
30 | num_convs=4,
31 | num_fcs=1,
32 | embed_channels=256,
33 | norm_cfg=dict(type='GN', num_groups=32),
34 | loss_track=dict(type='MultiPosCrossEntropyLoss', loss_weight=0.25),
35 | loss_track_aux=dict(
36 | type='L2Loss',
37 | neg_pos_ub=3,
38 | pos_margin=0,
39 | neg_margin=0.1,
40 | hard_mining=True,
41 | loss_weight=1.0))
42 | ),
43 | tracker=dict(
44 | type='TETerBDD',
45 | init_score_thr=0.7,
46 | obj_score_thr=0.3,
47 | match_score_thr=0.5,
48 | memo_tracklet_frames=10,
49 | memo_backdrop_frames=1,
50 | memo_momentum=0.8,
51 | nms_conf_thr=0.5,
52 | nms_backdrop_iou_thr=0.3,
53 | nms_class_iou_thr=0.7,
54 | contrastive_thr = 0.5,
55 | match_metric='bisoftmax'),
56 |
57 | # model training and testing settings
58 | train_cfg=dict(
59 | embed=dict(
60 | sampler=dict(
61 | type='CombinedSampler',
62 | num=256,
63 | pos_fraction=0.5,
64 | neg_pos_ub=3,
65 | add_gt_as_proposals=True,
66 | pos_sampler=dict(type='InstanceBalancedPosSampler'),
67 | neg_sampler=dict(
68 | type='IoUBalancedNegSampler',
69 | floor_thr=-1,
70 | floor_fraction=0,
71 | num_bins=3)))))
72 | # dataset settings
73 | dataset_type = 'BDDVideoDataset'
74 | data_root = 'data/bdd/bdd100k/'
75 | ann_root = 'data/bdd/'
76 | img_norm_cfg = dict(
77 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
78 | train_pipeline = [
79 | dict(type='LoadMultiImagesFromFile'),
80 | # comment above line and comment out the lines below if use hdf5 file.
81 | # dict(type='LoadMultiImagesFromFile',
82 | # file_client_args=dict(
83 | # img_db_path= 'data/bdd/hdf5s/100k_train.hdf5',
84 | # # vid_db_path='data/bdd/hdf5s/track_train.hdf5',
85 | # backend='hdf5',
86 | # type='bdd')),
87 | dict(type='SeqLoadAnnotations', with_bbox=True, with_ins_id=True),
88 | dict(
89 | type='SeqResize',
90 | img_scale=[(1296, 640), (1296, 672), (1296, 704), (1296, 736),
91 | (1296, 768), (1296, 800), (1296, 720)],
92 | share_params=False,
93 | multiscale_mode='value',
94 | keep_ratio=True),
95 | dict(type='SeqRandomFlip', share_params=False, flip_ratio=0.5),
96 | dict(type='SeqNormalize', **img_norm_cfg),
97 | dict(type='SeqPad', size_divisor=32),
98 | dict(type='SeqDefaultFormatBundle'),
99 | dict(
100 | type='SeqCollect',
101 | keys=['img', 'gt_bboxes', 'gt_labels', 'gt_match_indices'],
102 | ref_prefix='ref'),
103 | ]
104 | test_pipeline = [
105 | dict(type='LoadImageFromFile'),
106 | # comment above line and comment out the lines below if use hdf5 file.
107 | # dict(type='LoadImageFromFile',
108 | # file_client_args=dict(
109 | # vid_db_path='data/bdd/hdf5s/track_val.hdf5',
110 | # backend='hdf5',
111 | # type='bdd')),
112 | dict(
113 | type='MultiScaleFlipAug',
114 | img_scale=(1296, 720),
115 | flip=False,
116 | transforms=[
117 | dict(type='Resize', keep_ratio=True),
118 | dict(type='RandomFlip'),
119 | dict(type='Normalize', **img_norm_cfg),
120 | dict(type='Pad', size_divisor=32),
121 | dict(type='ImageToTensor', keys=['img']),
122 | dict(type='VideoCollect', keys=['img'])
123 | ])
124 | ]
125 | data = dict(
126 | samples_per_gpu=16,
127 | workers_per_gpu=2,
128 | train=[
129 | dict(
130 | type=dataset_type,
131 | load_as_video=False,
132 | ann_file=ann_root +
133 | 'annotations/det_20/det_train_cocofmt.json',
134 | img_prefix=data_root + 'images/100k/train/',
135 | pipeline=train_pipeline)
136 | ],
137 | val=dict(
138 | type=dataset_type,
139 | ann_file=ann_root +
140 | 'annotations/box_track_20/box_track_val_cocofmt.json',
141 | scalabel_gt = ann_root + 'annotations/scalabel_gt/box_track_20/val/',
142 | img_prefix=data_root + 'images/track/val/',
143 | pipeline=test_pipeline),
144 | test=dict(
145 | type=dataset_type,
146 | ann_file=ann_root +
147 | 'annotations/box_track_20/box_track_val_cocofmt.json',
148 | scalabel_gt=ann_root + 'annotations/scalabel_gt/box_track_20/val/',
149 | img_prefix=data_root + 'images/track/val/',
150 | pipeline=test_pipeline))
151 | # optimizer
152 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
153 | optimizer_config = dict(grad_clip=None)
154 | # learning policy
155 | lr_config = dict(
156 | policy='step',
157 | warmup='linear',
158 | warmup_iters=1000,
159 | warmup_ratio=1.0 / 1000,
160 | step=[8, 11])
161 | # checkpoint savingp
162 | checkpoint_config = dict(interval=1)
163 | # yapf:disable
164 | log_config = dict(
165 | interval=50,
166 | hooks=[
167 | dict(type='TextLoggerHook'),
168 | # dict(type='TensorboardLoggerHook')
169 | ])
170 | # yapf:enable
171 | # runtime settings
172 | total_epochs = 12
173 | dist_params = dict(backend='nccl')
174 | log_level = 'INFO'
175 | load_from = None
176 | resume_from = None
177 | workflow = [('train', 1)]
178 | evaluation = dict(metric=['bbox', 'track'], interval=1)
179 |
--------------------------------------------------------------------------------
/configs/tao/cem_r101_lvis.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | _base_ = '../_base_/qdtrack_faster_rcnn_r50_fpn.py'
3 | model = dict(
4 | type='TETer',
5 | freeze_detector=False,
6 | backbone=dict(
7 | depth=101,
8 | init_cfg=dict(type='Pretrained',
9 | checkpoint='torchvision://resnet101')),
10 | roi_head=dict(
11 | type='TETerRoIHead',
12 | bbox_head=dict(num_classes=1230),
13 | cem_roi_extractor=dict(
14 | type='SingleRoIExtractor',
15 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
16 | out_channels=256,
17 | featmap_strides=[4, 8, 16, 32]),
18 | cem_head=dict(
19 | type='ClsExemplarHead',
20 | num_convs=4,
21 | num_fcs=3,
22 | embed_channels=1230,
23 | norm_cfg=dict(type='GN', num_groups=32),
24 | loss_track=dict(type='UnbiasedSupConLoss',
25 | temperature=0.07,
26 | contrast_mode='all',
27 | pos_normalize=True,
28 | loss_weight=0.25)
29 | , softmax_temp=-1),
30 |
31 | track_head=dict(
32 | type='QuasiDenseEmbedHead',
33 | num_convs=4,
34 | num_fcs=1,
35 | embed_channels=256,
36 | norm_cfg=dict(type='GN', num_groups=32),
37 | loss_track=dict(type='MultiPosCrossEntropyLoss',
38 | loss_weight=0.25,
39 | version='unbiased'),
40 | loss_track_aux=dict(
41 | type='L2Loss',
42 | neg_pos_ub=3,
43 | pos_margin=0,
44 | neg_margin=0.1,
45 | hard_mining=True,
46 | loss_weight=1.0))
47 | ),
48 |
49 | tracker=dict(
50 | type='TETerTAO',
51 | init_score_thr=0.0001,
52 | obj_score_thr=0.0001,
53 | match_score_thr=0.5,
54 | memo_frames=10,
55 | momentum_embed=0.8,
56 | momentum_obj_score=0.5,
57 | match_metric='bisoftmax',
58 | match_with_cosine=True,
59 | contrastive_thr=0.5),
60 |
61 | train_cfg=dict(
62 | cem=dict(
63 | assigner=dict(
64 | type='MaxIoUAssigner',
65 | pos_iou_thr=0.7,
66 | neg_iou_thr=0.3,
67 | min_pos_iou=0.5,
68 | match_low_quality=False,
69 | ignore_iof_thr=-1),
70 | sampler=dict(
71 | type='CombinedSampler',
72 | num=256,
73 | pos_fraction=1,
74 | neg_pos_ub=0,
75 | add_gt_as_proposals=True,
76 | pos_sampler=dict(type='InstanceBalancedPosSampler'),
77 | neg_sampler=dict(type='RandomSampler'))
78 | )
79 | ),
80 |
81 | test_cfg=dict(
82 | rcnn=dict(
83 | score_thr=0.0001,
84 | nms=dict(type='nms', iou_threshold=0.5),
85 | max_per_img=300)
86 | )
87 | )
88 | # dataset settings
89 | img_norm_cfg = dict(
90 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
91 | train_pipeline = [
92 | dict(type='LoadMultiImagesFromFile'),
93 | # comment above line and comment out the lines below if use hdf5 file.
94 | # dict(
95 | # type='LoadMultiImagesFromFile',
96 | # file_client_args=dict(
97 | # img_db_path='data/lvis/train_imgs.hdf5',
98 | # backend='hdf5',
99 | # type='lvis')),
100 | dict(type='SeqLoadAnnotations', with_bbox=True, with_ins_id=True),
101 | dict(
102 | type='SeqResize',
103 | img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),
104 | (1333, 768), (1333, 800)],
105 | share_params=False,
106 | multiscale_mode='value',
107 | keep_ratio=True),
108 | dict(type='SeqRandomFlip', share_params=False, flip_ratio=0.5),
109 | dict(type='SeqNormalize', **img_norm_cfg),
110 | dict(type='SeqPad', size_divisor=32),
111 | dict(type='SeqDefaultFormatBundle'),
112 | dict(
113 | type='SeqCollect',
114 | keys=['img', 'gt_bboxes', 'gt_labels', 'gt_match_indices'],
115 | ref_prefix='ref'),
116 | ]
117 |
118 | test_pipeline = [
119 | dict(type='LoadImageFromFile'),
120 | # comment above line and comment out the lines below if use hdf5 file.
121 | # dict(type='LoadImageFromFile',
122 | # file_client_args=dict(
123 | # img_db_path='data/tao/tao_val_imgs.hdf5',
124 | # backend='hdf5',
125 | # type='tao')),
126 | dict(
127 | type='MultiScaleFlipAug',
128 | img_scale=(1333, 800),
129 | flip=False,
130 | transforms=[
131 | dict(type='Resize', keep_ratio=True),
132 | dict(type='RandomFlip'),
133 | dict(type='Normalize', **img_norm_cfg),
134 | dict(type='Pad', size_divisor=32),
135 | dict(type='ImageToTensor', keys=['img']),
136 | dict(type='VideoCollect', keys=['img'])
137 | ])
138 | ]
139 |
140 | dataset_type = 'TaoDataset'
141 | data = dict(
142 | samples_per_gpu=2,
143 | workers_per_gpu=2,
144 | train=dict(
145 | _delete_=True,
146 | type='ClassBalancedDataset',
147 | oversample_thr=1e-3,
148 | dataset=dict(
149 | type=dataset_type,
150 | classes='data/lvis/annotations/lvis_classes.txt',
151 | load_as_video=False,
152 | ann_file='data/lvis/annotations/lvisv0.5+coco_train.json',
153 | img_prefix='data/lvis/train2017/',
154 | key_img_sampler=dict(interval=1),
155 | ref_img_sampler=dict(num_ref_imgs=1, scope=1, method='uniform'),
156 | pipeline=train_pipeline)
157 | ),
158 | val=dict(
159 | type=dataset_type,
160 | classes='data/lvis/annotations/lvis_classes.txt',
161 | ann_file='data/tao/annotations/validation_ours.json',
162 | img_prefix='data/tao/frames/',
163 | ref_img_sampler=None,
164 | pipeline=test_pipeline),
165 | test=dict(
166 | type=dataset_type,
167 | classes='data/lvis/annotations/lvis_classes.txt',
168 | ann_file='data/tao/annotations/validation_ours.json',
169 | img_prefix='data/tao/frames/',
170 | ref_img_sampler=None,
171 | pipeline=test_pipeline)
172 |
173 | )
174 |
175 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
176 | optimizer_config = dict(grad_clip=None)
177 | # learning policy
178 | lr_config = dict(
179 | policy='step',
180 | warmup='linear',
181 | warmup_iters=1000,
182 | warmup_ratio=1.0 / 1000,
183 | step=[16, 22])
184 | total_epochs = 24
185 |
186 | # checkpoint saving
187 | checkpoint_config = dict(interval=1)
188 | # yapf:disable
189 | log_config = dict(
190 | interval=50,
191 | hooks=[
192 | dict(type='TextLoggerHook'),
193 | # dict(type='TensorboardLoggerHook')
194 | ])
195 |
196 | dist_params = dict(backend='nccl')
197 | log_level = 'INFO'
198 | load_from = None
199 | resume_from = None
200 | workflow = [('train', 1)]
201 | evaluation = dict(metric=['bbox'], start=1, interval=1, resfile_path='/scratch/cem_lvis/')
202 | work_dir = './saved_models/cem_lvis/'
203 |
--------------------------------------------------------------------------------
/configs/tao/cem_swinB_lvis.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | _base_ = '../_base_/qdtrack_faster_rcnn_r50_fpn.py'
3 | pretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window12_384_22k.pth' # noqa
4 | model = dict(
5 | type='TETer',
6 | freeze_detector=False,
7 | backbone=dict(
8 | _delete_=True,
9 | type='SwinTransformer',
10 | embed_dims=128,
11 | depths=[2, 2, 18, 2],
12 | num_heads=[4, 8, 16, 32],
13 | window_size=12,
14 | mlp_ratio=4,
15 | qkv_bias=True,
16 | qk_scale=None,
17 | drop_rate=0.,
18 | attn_drop_rate=0.,
19 | drop_path_rate=0.2,
20 | patch_norm=True,
21 | out_indices=(0, 1, 2, 3),
22 | with_cp=False,
23 | convert_weights=True,
24 | init_cfg=dict(type='Pretrained', checkpoint=pretrained)),
25 | neck=dict(in_channels=[128, 256, 512, 1024]),
26 | roi_head=dict(
27 | type='TETerRoIHead',
28 | bbox_head=dict(num_classes=1230),
29 | cem_roi_extractor=dict(
30 | type='SingleRoIExtractor',
31 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
32 | out_channels=256,
33 | featmap_strides=[4, 8, 16, 32]),
34 | cem_head=dict(
35 | type='ClsExemplarHead',
36 | num_convs=4,
37 | num_fcs=3,
38 | embed_channels=1230,
39 | norm_cfg=dict(type='GN', num_groups=32),
40 | loss_track=dict(type='UnbiasedSupConLoss',
41 | temperature=0.07,
42 | contrast_mode='all',
43 | pos_normalize=True,
44 | loss_weight=0.25)
45 | , softmax_temp=-1),
46 |
47 | track_head=dict(
48 | type='QuasiDenseEmbedHead',
49 | num_convs=4,
50 | num_fcs=1,
51 | embed_channels=256,
52 | norm_cfg=dict(type='GN', num_groups=32),
53 | loss_track=dict(type='MultiPosCrossEntropyLoss',
54 | loss_weight=0.25,
55 | version='unbiased'),
56 | loss_track_aux=dict(
57 | type='L2Loss',
58 | neg_pos_ub=3,
59 | pos_margin=0,
60 | neg_margin=0.1,
61 | hard_mining=True,
62 | loss_weight=1.0))
63 | ),
64 | tracker=dict(
65 | type='TETerTAO',
66 | init_score_thr=0.0001,
67 | obj_score_thr=0.0001,
68 | match_score_thr=0.5,
69 | memo_frames=10,
70 | momentum_embed=0.8,
71 | momentum_obj_score=0.5,
72 | match_metric='bisoftmax',
73 | match_with_cosine=True,
74 | contrastive_thr=0.5,
75 | ),
76 | train_cfg=dict(
77 | cem=dict(
78 | assigner=dict(
79 | type='MaxIoUAssigner',
80 | pos_iou_thr=0.7,
81 | neg_iou_thr=0.3,
82 | min_pos_iou=0.5,
83 | match_low_quality=False,
84 | ignore_iof_thr=-1),
85 | sampler=dict(
86 | type='CombinedSampler',
87 | num=256,
88 | pos_fraction=1,
89 | neg_pos_ub=0,
90 | add_gt_as_proposals=True,
91 | pos_sampler=dict(type='InstanceBalancedPosSampler'),
92 | neg_sampler=dict(type='RandomSampler'))
93 | )
94 | ),
95 |
96 | test_cfg=dict(
97 | rcnn=dict(
98 | score_thr=0.0001,
99 | nms=dict(type='nms', iou_threshold=0.5),
100 | max_per_img=300)
101 | )
102 | )
103 | # dataset settings
104 | img_norm_cfg = dict(
105 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
106 | train_pipeline = [
107 | dict(type='LoadMultiImagesFromFile'),
108 | # comment above line and comment out the lines below if use hdf5 file.
109 | # dict(
110 | # type='LoadMultiImagesFromFile',
111 | # file_client_args=dict(
112 | # img_db_path='data/lvis/train_imgs.hdf5',
113 | # backend='hdf5',
114 | # type='lvis')),
115 | dict(type='SeqLoadAnnotations', with_bbox=True, with_ins_id=True),
116 | dict(
117 | type='SeqResize',
118 | img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),
119 | (1333, 768), (1333, 800)],
120 | share_params=False,
121 | multiscale_mode='value',
122 | keep_ratio=True),
123 | dict(type='SeqRandomFlip', share_params=False, flip_ratio=0.5),
124 | dict(type='SeqNormalize', **img_norm_cfg),
125 | dict(type='SeqPad', size_divisor=32),
126 | dict(type='SeqDefaultFormatBundle'),
127 | dict(
128 | type='SeqCollect',
129 | keys=['img', 'gt_bboxes', 'gt_labels', 'gt_match_indices'],
130 | ref_prefix='ref'),
131 | ]
132 |
133 | test_pipeline = [
134 | dict(type='LoadImageFromFile'),
135 | # dict(type='LoadImageFromFile',
136 | # file_client_args=dict(
137 | # img_db_path='data/tao/tao_val_imgs.hdf5',
138 | # backend='hdf5',
139 | # type='tao')),
140 | dict(
141 | type='MultiScaleFlipAug',
142 | img_scale=(1333, 800),
143 | flip=False,
144 | transforms=[
145 | dict(type='Resize', keep_ratio=True),
146 | dict(type='RandomFlip'),
147 | dict(type='Normalize', **img_norm_cfg),
148 | dict(type='Pad', size_divisor=32),
149 | dict(type='ImageToTensor', keys=['img']),
150 | dict(type='VideoCollect', keys=['img'])
151 | ])
152 | ]
153 |
154 | ## dataset settings
155 | dataset_type = 'TaoDataset'
156 | data = dict(
157 | samples_per_gpu=2,
158 | workers_per_gpu=2,
159 | train=dict(
160 | _delete_=True,
161 | type='ClassBalancedDataset',
162 | oversample_thr=1e-3,
163 | dataset=dict(
164 | type=dataset_type,
165 | classes='data/lvis/annotations/lvis_classes.txt',
166 | load_as_video=False,
167 | ann_file='data/lvis/annotations/lvisv0.5+coco_train.json',
168 | img_prefix='data/lvis/train2017/',
169 | key_img_sampler=dict(interval=1),
170 | ref_img_sampler=dict(num_ref_imgs=1, scope=1, method='uniform'),
171 | pipeline=train_pipeline)
172 | ),
173 | val=dict(
174 | type=dataset_type,
175 | classes='data/lvis/annotations/lvis_classes.txt',
176 | ann_file='data/tao/annotations/validation_ours.json',
177 | img_prefix='data/tao/frames/',
178 | ref_img_sampler=None,
179 | pipeline=test_pipeline),
180 | test=dict(
181 | type=dataset_type,
182 | classes='data/lvis/annotations/lvis_classes.txt',
183 | ann_file='data/tao/annotations/validation_ours.json',
184 | img_prefix='data/tao/frames/',
185 | ref_img_sampler=None,
186 | pipeline=test_pipeline)
187 |
188 | )
189 | # optimizer
190 | optimizer = dict(
191 | # _delete_=True,
192 | type='AdamW',
193 | lr=0.0001,
194 | betas=(0.9, 0.999),
195 | weight_decay=0.05,
196 | paramwise_cfg=dict(
197 | custom_keys={
198 | 'absolute_pos_embed': dict(decay_mult=0.),
199 | 'relative_position_bias_table': dict(decay_mult=0.),
200 | 'norm': dict(decay_mult=0.)
201 | }))
202 | optimizer_config = dict(grad_clip=None)
203 | # learning policy
204 | lr_config = dict(
205 | policy='step',
206 | warmup='linear',
207 | warmup_iters=1000,
208 | warmup_ratio=0.001,
209 | step=[27, 33])
210 | runner = dict(type='EpochBasedRunner', max_epochs=36)
211 |
212 |
213 | # checkpoint saving
214 | checkpoint_config = dict(interval=1)
215 | # yapf:disable
216 | log_config = dict(
217 | interval=50,
218 | hooks=[
219 | dict(type='TextLoggerHook'),
220 | ])
221 | # yapf:enable
222 | # runtime settings
223 | total_epochs = 36
224 | dist_params = dict(backend='nccl')
225 | log_level = 'INFO'
226 | load_from = None
227 | resume_from = None
228 | workflow = [('train', 1)]
229 | evaluation = dict(metric=['bbox'], start=2, interval=2)
230 |
--------------------------------------------------------------------------------
/configs/tao/cem_swinL_lvis.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | _base_ = '../_base_/qdtrack_faster_rcnn_r50_fpn.py'
3 | pretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_large_patch4_window12_384_22k.pth' # noqa
4 | model = dict(
5 | type='TETer',
6 | freeze_detector=False,
7 | backbone=dict(
8 | _delete_=True,
9 | type='SwinTransformer',
10 | embed_dims=192,
11 | depths=[2, 2, 18, 2],
12 | num_heads=[6, 12, 24, 48],
13 | window_size=12,
14 | mlp_ratio=4,
15 | qkv_bias=True,
16 | qk_scale=None,
17 | drop_rate=0.,
18 | attn_drop_rate=0.,
19 | drop_path_rate=0.2,
20 | patch_norm=True,
21 | out_indices=(0, 1, 2, 3),
22 | with_cp=False,
23 | convert_weights=True,
24 | init_cfg=dict(type='Pretrained', checkpoint=pretrained)),
25 | neck=dict(in_channels=[192, 384, 768, 1536]),
26 | roi_head=dict(
27 | type='TETerRoIHead',
28 | bbox_head=dict(num_classes=1230),
29 | cem_roi_extractor=dict(
30 | type='SingleRoIExtractor',
31 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
32 | out_channels=256,
33 | featmap_strides=[4, 8, 16, 32]),
34 | cem_head=dict(
35 | type='ClsExemplarHead',
36 | num_convs=4,
37 | num_fcs=3,
38 | embed_channels=1230,
39 | norm_cfg=dict(type='GN', num_groups=32),
40 | loss_track=dict(type='UnbiasedSupConLoss',
41 | temperature=0.07,
42 | contrast_mode='all',
43 | pos_normalize=True,
44 | loss_weight=0.25)
45 | , softmax_temp=-1),
46 |
47 | track_head=dict(
48 | type='QuasiDenseEmbedHead',
49 | num_convs=4,
50 | num_fcs=1,
51 | embed_channels=256,
52 | norm_cfg=dict(type='GN', num_groups=32),
53 | loss_track=dict(type='MultiPosCrossEntropyLoss',
54 | loss_weight=0.25,
55 | version='unbiased'),
56 | loss_track_aux=dict(
57 | type='L2Loss',
58 | neg_pos_ub=3,
59 | pos_margin=0,
60 | neg_margin=0.1,
61 | hard_mining=True,
62 | loss_weight=1.0))
63 | ),
64 | tracker=dict(
65 | type='TETerTAO',
66 | init_score_thr=0.0001,
67 | obj_score_thr=0.0001,
68 | match_score_thr=0.5,
69 | memo_frames=10,
70 | momentum_embed=0.8,
71 | momentum_obj_score=0.5,
72 | match_metric='bisoftmax',
73 | match_with_cosine=True,
74 | contrastive_thr=0.5,
75 | ),
76 | train_cfg=dict(
77 | cem=dict(
78 | assigner=dict(
79 | type='MaxIoUAssigner',
80 | pos_iou_thr=0.7,
81 | neg_iou_thr=0.3,
82 | min_pos_iou=0.5,
83 | match_low_quality=False,
84 | ignore_iof_thr=-1),
85 | sampler=dict(
86 | type='CombinedSampler',
87 | num=256,
88 | pos_fraction=1,
89 | neg_pos_ub=0,
90 | add_gt_as_proposals=True,
91 | pos_sampler=dict(type='InstanceBalancedPosSampler'),
92 | neg_sampler=dict(type='RandomSampler'))
93 | )
94 | ),
95 |
96 | test_cfg=dict(
97 | rcnn=dict(
98 | score_thr=0.0001,
99 | nms=dict(type='nms', iou_threshold=0.5),
100 | max_per_img=300)
101 | )
102 | )
103 | # dataset settings
104 | img_norm_cfg = dict(
105 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
106 | train_pipeline = [
107 | dict(type='LoadMultiImagesFromFile'),
108 | # comment above line and comment out the lines below if use hdf5 file.
109 | # dict(
110 | # type='LoadMultiImagesFromFile',
111 | # file_client_args=dict(
112 | # img_db_path='data/lvis/train_imgs.hdf5',
113 | # backend='hdf5',
114 | # type='lvis')),
115 | dict(type='SeqLoadAnnotations', with_bbox=True, with_ins_id=True),
116 | dict(
117 | type='SeqResize',
118 | img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),
119 | (1333, 768), (1333, 800)],
120 | share_params=False,
121 | multiscale_mode='value',
122 | keep_ratio=True),
123 | dict(type='SeqRandomFlip', share_params=False, flip_ratio=0.5),
124 | dict(type='SeqNormalize', **img_norm_cfg),
125 | dict(type='SeqPad', size_divisor=32),
126 | dict(type='SeqDefaultFormatBundle'),
127 | dict(
128 | type='SeqCollect',
129 | keys=['img', 'gt_bboxes', 'gt_labels', 'gt_match_indices'],
130 | ref_prefix='ref'),
131 | ]
132 |
133 | test_pipeline = [
134 | dict(type='LoadImageFromFile'),
135 | # dict(type='LoadImageFromFile',
136 | # file_client_args=dict(
137 | # img_db_path='data/tao/tao_val_imgs.hdf5',
138 | # backend='hdf5',
139 | # type='tao')),
140 | dict(
141 | type='MultiScaleFlipAug',
142 | img_scale=(1333, 800),
143 | flip=False,
144 | transforms=[
145 | dict(type='Resize', keep_ratio=True),
146 | dict(type='RandomFlip'),
147 | dict(type='Normalize', **img_norm_cfg),
148 | dict(type='Pad', size_divisor=32),
149 | dict(type='ImageToTensor', keys=['img']),
150 | dict(type='VideoCollect', keys=['img'])
151 | ])
152 | ]
153 |
154 | ## dataset settings
155 | dataset_type = 'TaoDataset'
156 | data = dict(
157 | samples_per_gpu=2,
158 | workers_per_gpu=2,
159 | train=dict(
160 | _delete_=True,
161 | type='ClassBalancedDataset',
162 | oversample_thr=1e-3,
163 | dataset=dict(
164 | type=dataset_type,
165 | classes='data/lvis/annotations/lvis_classes.txt',
166 | load_as_video=False,
167 | ann_file='data/lvis/annotations/lvisv0.5+coco_train.json',
168 | img_prefix='data/lvis/train2017/',
169 | key_img_sampler=dict(interval=1),
170 | ref_img_sampler=dict(num_ref_imgs=1, scope=1, method='uniform'),
171 | pipeline=train_pipeline)
172 | ),
173 | val=dict(
174 | type=dataset_type,
175 | classes='data/lvis/annotations/lvis_classes.txt',
176 | ann_file='data/tao/annotations/validation_ours.json',
177 | img_prefix='data/tao/frames/',
178 | ref_img_sampler=None,
179 | pipeline=test_pipeline),
180 | test=dict(
181 | type=dataset_type,
182 | classes='data/lvis/annotations/lvis_classes.txt',
183 | ann_file='data/tao/annotations/validation_ours.json',
184 | img_prefix='data/tao/frames/',
185 | ref_img_sampler=None,
186 | pipeline=test_pipeline)
187 |
188 | )
189 | # optimizer
190 | optimizer = dict(
191 | # _delete_=True,
192 | type='AdamW',
193 | lr=0.0001,
194 | betas=(0.9, 0.999),
195 | weight_decay=0.05,
196 | paramwise_cfg=dict(
197 | custom_keys={
198 | 'absolute_pos_embed': dict(decay_mult=0.),
199 | 'relative_position_bias_table': dict(decay_mult=0.),
200 | 'norm': dict(decay_mult=0.)
201 | }))
202 | optimizer_config = dict(grad_clip=None)
203 | # learning policy
204 | lr_config = dict(
205 | policy='step',
206 | warmup='linear',
207 | warmup_iters=1000,
208 | warmup_ratio=0.001,
209 | step=[27, 33])
210 | runner = dict(type='EpochBasedRunner', max_epochs=36)
211 |
212 |
213 | # checkpoint saving
214 | checkpoint_config = dict(interval=1)
215 | # yapf:disable
216 | log_config = dict(
217 | interval=50,
218 | hooks=[
219 | dict(type='TextLoggerHook'),
220 | ])
221 | # yapf:enable
222 | # runtime settings
223 | total_epochs = 36
224 | dist_params = dict(backend='nccl')
225 | log_level = 'INFO'
226 | load_from = None
227 | resume_from = None
228 | workflow = [('train', 1)]
229 | evaluation = dict(metric=['bbox'], start=2, interval=2)
230 |
--------------------------------------------------------------------------------
/configs/tao/cem_swinS_lvis.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | _base_ = '../_base_/qdtrack_faster_rcnn_r50_fpn.py'
3 | pretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_small_patch4_window7_224.pth' # noqa
4 | model = dict(
5 | type='TETer',
6 | freeze_detector=False,
7 | backbone=dict(
8 | _delete_=True,
9 | type='SwinTransformer',
10 | embed_dims=96,
11 | depths=[2, 2, 18, 2],
12 | num_heads=[3, 6, 12, 24],
13 | window_size=7,
14 | mlp_ratio=4,
15 | qkv_bias=True,
16 | qk_scale=None,
17 | drop_rate=0.,
18 | attn_drop_rate=0.,
19 | drop_path_rate=0.2,
20 | patch_norm=True,
21 | out_indices=(0, 1, 2, 3),
22 | with_cp=False,
23 | convert_weights=True,
24 | init_cfg=dict(type='Pretrained', checkpoint=pretrained)),
25 | neck=dict(in_channels=[96, 192, 384, 768]),
26 | roi_head=dict(
27 | type='TETerRoIHead',
28 | bbox_head=dict(num_classes=1230),
29 | cem_roi_extractor=dict(
30 | type='SingleRoIExtractor',
31 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
32 | out_channels=256,
33 | featmap_strides=[4, 8, 16, 32]),
34 | cem_head=dict(
35 | type='ClsExemplarHead',
36 | num_convs=4,
37 | num_fcs=3,
38 | embed_channels=1230,
39 | norm_cfg=dict(type='GN', num_groups=32),
40 | loss_track=dict(type='UnbiasedSupConLoss',
41 | temperature=0.07,
42 | contrast_mode='all',
43 | pos_normalize=True,
44 | loss_weight=0.25)
45 | , softmax_temp=-1),
46 |
47 | track_head=dict(
48 | type='QuasiDenseEmbedHead',
49 | num_convs=4,
50 | num_fcs=1,
51 | embed_channels=256,
52 | norm_cfg=dict(type='GN', num_groups=32),
53 | loss_track=dict(type='MultiPosCrossEntropyLoss',
54 | loss_weight=0.25,
55 | version='unbiased'),
56 | loss_track_aux=dict(
57 | type='L2Loss',
58 | neg_pos_ub=3,
59 | pos_margin=0,
60 | neg_margin=0.1,
61 | hard_mining=True,
62 | loss_weight=1.0))
63 | ),
64 | tracker=dict(
65 | type='TETerTAO',
66 | init_score_thr=0.0001,
67 | obj_score_thr=0.0001,
68 | match_score_thr=0.5,
69 | memo_frames=10,
70 | momentum_embed=0.8,
71 | momentum_obj_score=0.5,
72 | match_metric='bisoftmax',
73 | match_with_cosine=True,
74 | contrastive_thr=0.5,
75 | ),
76 | train_cfg=dict(
77 | cem=dict(
78 | assigner=dict(
79 | type='MaxIoUAssigner',
80 | pos_iou_thr=0.7,
81 | neg_iou_thr=0.3,
82 | min_pos_iou=0.5,
83 | match_low_quality=False,
84 | ignore_iof_thr=-1),
85 | sampler=dict(
86 | type='CombinedSampler',
87 | num=256,
88 | pos_fraction=1,
89 | neg_pos_ub=0,
90 | add_gt_as_proposals=True,
91 | pos_sampler=dict(type='InstanceBalancedPosSampler'),
92 | neg_sampler=dict(type='RandomSampler'))
93 | )
94 | ),
95 |
96 | test_cfg=dict(
97 | rcnn=dict(
98 | score_thr=0.0001,
99 | nms=dict(type='nms', iou_threshold=0.5),
100 | max_per_img=300)
101 | )
102 | )
103 | # dataset settings
104 | img_norm_cfg = dict(
105 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
106 | train_pipeline = [
107 | dict(type='LoadMultiImagesFromFile'),
108 | # comment above line and comment out the lines below if use hdf5 file.
109 | # dict(
110 | # type='LoadMultiImagesFromFile',
111 | # file_client_args=dict(
112 | # img_db_path='data/lvis/train_imgs.hdf5',
113 | # backend='hdf5',
114 | # type='lvis')),
115 | dict(type='SeqLoadAnnotations', with_bbox=True, with_ins_id=True),
116 | dict(
117 | type='SeqResize',
118 | img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),
119 | (1333, 768), (1333, 800)],
120 | share_params=False,
121 | multiscale_mode='value',
122 | keep_ratio=True),
123 | dict(type='SeqRandomFlip', share_params=False, flip_ratio=0.5),
124 | dict(type='SeqNormalize', **img_norm_cfg),
125 | dict(type='SeqPad', size_divisor=32),
126 | dict(type='SeqDefaultFormatBundle'),
127 | dict(
128 | type='SeqCollect',
129 | keys=['img', 'gt_bboxes', 'gt_labels', 'gt_match_indices'],
130 | ref_prefix='ref'),
131 | ]
132 |
133 | test_pipeline = [
134 | dict(type='LoadImageFromFile'),
135 | # dict(type='LoadImageFromFile',
136 | # file_client_args=dict(
137 | # img_db_path='data/tao/tao_val_imgs.hdf5',
138 | # backend='hdf5',
139 | # type='tao')),
140 | dict(
141 | type='MultiScaleFlipAug',
142 | img_scale=(1333, 800),
143 | flip=False,
144 | transforms=[
145 | dict(type='Resize', keep_ratio=True),
146 | dict(type='RandomFlip'),
147 | dict(type='Normalize', **img_norm_cfg),
148 | dict(type='Pad', size_divisor=32),
149 | dict(type='ImageToTensor', keys=['img']),
150 | dict(type='VideoCollect', keys=['img'])
151 | ])
152 | ]
153 |
154 | ## dataset settings
155 | dataset_type = 'TaoDataset'
156 | data = dict(
157 | samples_per_gpu=2,
158 | workers_per_gpu=2,
159 | train=dict(
160 | _delete_=True,
161 | type='ClassBalancedDataset',
162 | oversample_thr=1e-3,
163 | dataset=dict(
164 | type=dataset_type,
165 | classes='data/lvis/annotations/lvis_classes.txt',
166 | load_as_video=False,
167 | ann_file='data/lvis/annotations/lvisv0.5+coco_train.json',
168 | img_prefix='data/lvis/train2017/',
169 | key_img_sampler=dict(interval=1),
170 | ref_img_sampler=dict(num_ref_imgs=1, scope=1, method='uniform'),
171 | pipeline=train_pipeline)
172 | ),
173 | val=dict(
174 | type=dataset_type,
175 | classes='data/lvis/annotations/lvis_classes.txt',
176 | ann_file='data/tao/annotations/validation_ours.json',
177 | img_prefix='data/tao/frames/',
178 | ref_img_sampler=None,
179 | pipeline=test_pipeline),
180 | test=dict(
181 | type=dataset_type,
182 | classes='data/lvis/annotations/lvis_classes.txt',
183 | ann_file='data/tao/annotations/validation_ours.json',
184 | img_prefix='data/tao/frames/',
185 | ref_img_sampler=None,
186 | pipeline=test_pipeline)
187 |
188 | )
189 | # optimizer
190 | optimizer = dict(
191 | # _delete_=True,
192 | type='AdamW',
193 | lr=0.0001,
194 | betas=(0.9, 0.999),
195 | weight_decay=0.05,
196 | paramwise_cfg=dict(
197 | custom_keys={
198 | 'absolute_pos_embed': dict(decay_mult=0.),
199 | 'relative_position_bias_table': dict(decay_mult=0.),
200 | 'norm': dict(decay_mult=0.)
201 | }))
202 | optimizer_config = dict(grad_clip=None)
203 | # learning policy
204 | lr_config = dict(
205 | policy='step',
206 | warmup='linear',
207 | warmup_iters=1000,
208 | warmup_ratio=0.001,
209 | step=[27, 33])
210 | runner = dict(type='EpochBasedRunner', max_epochs=36)
211 |
212 |
213 | # checkpoint saving
214 | checkpoint_config = dict(interval=1)
215 | # yapf:disable
216 | log_config = dict(
217 | interval=50,
218 | hooks=[
219 | dict(type='TextLoggerHook'),
220 | ])
221 | # yapf:enable
222 | # runtime settings
223 | total_epochs = 36
224 | dist_params = dict(backend='nccl')
225 | log_level = 'INFO'
226 | load_from = None
227 | resume_from = None
228 | workflow = [('train', 1)]
229 | evaluation = dict(metric=['bbox'], start=2, interval=2)
230 |
--------------------------------------------------------------------------------
/configs/tao/cem_swinT_lvis.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | _base_ = '../_base_/qdtrack_faster_rcnn_r50_fpn.py'
3 | pretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_tiny_patch4_window7_224.pth' # noqa
4 | model = dict(
5 | type='TETer',
6 | freeze_detector=False,
7 | backbone=dict(
8 | _delete_=True,
9 | type='SwinTransformer',
10 | embed_dims=96,
11 | depths=[2, 2, 6, 2],
12 | num_heads=[3, 6, 12, 24],
13 | window_size=7,
14 | mlp_ratio=4,
15 | qkv_bias=True,
16 | qk_scale=None,
17 | drop_rate=0.,
18 | attn_drop_rate=0.,
19 | drop_path_rate=0.2,
20 | patch_norm=True,
21 | out_indices=(0, 1, 2, 3),
22 | with_cp=False,
23 | convert_weights=True,
24 | init_cfg=dict(type='Pretrained', checkpoint=pretrained)),
25 | neck=dict(in_channels=[96, 192, 384, 768]),
26 | roi_head=dict(
27 | type='TETerRoIHead',
28 | bbox_head=dict(num_classes=1230),
29 | cem_roi_extractor=dict(
30 | type='SingleRoIExtractor',
31 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
32 | out_channels=256,
33 | featmap_strides=[4, 8, 16, 32]),
34 | cem_head=dict(
35 | type='ClsExemplarHead',
36 | num_convs=4,
37 | num_fcs=3,
38 | embed_channels=1230,
39 | norm_cfg=dict(type='GN', num_groups=32),
40 | loss_track=dict(type='UnbiasedSupConLoss',
41 | temperature=0.07,
42 | contrast_mode='all',
43 | pos_normalize=True,
44 | loss_weight=0.25)
45 | , softmax_temp=-1),
46 | track_head=dict(
47 | type='QuasiDenseEmbedHead',
48 | num_convs=4,
49 | num_fcs=1,
50 | embed_channels=256,
51 | norm_cfg=dict(type='GN', num_groups=32),
52 | loss_track=dict(type='MultiPosCrossEntropyLoss',
53 | loss_weight=0.25,
54 | version='unbiased'),
55 | loss_track_aux=dict(
56 | type='L2Loss',
57 | neg_pos_ub=3,
58 | pos_margin=0,
59 | neg_margin=0.1,
60 | hard_mining=True,
61 | loss_weight=1.0))
62 | ),
63 | tracker=dict(
64 | type='TETerTAO',
65 | init_score_thr=0.0001,
66 | obj_score_thr=0.0001,
67 | match_score_thr=0.5,
68 | memo_frames=10,
69 | momentum_embed=0.8,
70 | momentum_obj_score=0.5,
71 | match_metric='bisoftmax',
72 | match_with_cosine=True,
73 | contrastive_thr=0.5,
74 | ),
75 | train_cfg=dict(
76 | cem=dict(
77 | assigner=dict(
78 | type='MaxIoUAssigner',
79 | pos_iou_thr=0.7,
80 | neg_iou_thr=0.3,
81 | min_pos_iou=0.5,
82 | match_low_quality=False,
83 | ignore_iof_thr=-1),
84 | sampler=dict(
85 | type='CombinedSampler',
86 | num=256,
87 | pos_fraction=1,
88 | neg_pos_ub=0,
89 | add_gt_as_proposals=True,
90 | pos_sampler=dict(type='InstanceBalancedPosSampler'),
91 | neg_sampler=dict(type='RandomSampler'))
92 | )
93 | ),
94 |
95 | test_cfg=dict(
96 | rcnn=dict(
97 | score_thr=0.0001,
98 | nms=dict(type='nms', iou_threshold=0.5),
99 | max_per_img=300)
100 | )
101 | )
102 | # dataset settings
103 | img_norm_cfg = dict(
104 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
105 | train_pipeline = [
106 | dict(type='LoadMultiImagesFromFile'),
107 | # comment above line and comment out the lines below if use hdf5 file.
108 | # dict(
109 | # type='LoadMultiImagesFromFile',
110 | # file_client_args=dict(
111 | # img_db_path='data/lvis/train_imgs.hdf5',
112 | # backend='hdf5',
113 | # type='lvis')),
114 | dict(type='SeqLoadAnnotations', with_bbox=True, with_ins_id=True),
115 | dict(
116 | type='SeqResize',
117 | img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),
118 | (1333, 768), (1333, 800)],
119 | share_params=False,
120 | multiscale_mode='value',
121 | keep_ratio=True),
122 | dict(type='SeqRandomFlip', share_params=False, flip_ratio=0.5),
123 | dict(type='SeqNormalize', **img_norm_cfg),
124 | dict(type='SeqPad', size_divisor=32),
125 | dict(type='SeqDefaultFormatBundle'),
126 | dict(
127 | type='SeqCollect',
128 | keys=['img', 'gt_bboxes', 'gt_labels', 'gt_match_indices'],
129 | ref_prefix='ref'),
130 | ]
131 |
132 | test_pipeline = [
133 | dict(type='LoadImageFromFile'),
134 | # dict(type='LoadImageFromFile',
135 | # file_client_args=dict(
136 | # img_db_path='data/tao/tao_val_imgs.hdf5',
137 | # backend='hdf5',
138 | # type='tao')),
139 | dict(
140 | type='MultiScaleFlipAug',
141 | img_scale=(1333, 800),
142 | flip=False,
143 | transforms=[
144 | dict(type='Resize', keep_ratio=True),
145 | dict(type='RandomFlip'),
146 | dict(type='Normalize', **img_norm_cfg),
147 | dict(type='Pad', size_divisor=32),
148 | dict(type='ImageToTensor', keys=['img']),
149 | dict(type='VideoCollect', keys=['img'])
150 | ])
151 | ]
152 |
153 | ## dataset settings
154 | dataset_type = 'TaoDataset'
155 | data = dict(
156 | samples_per_gpu=2,
157 | workers_per_gpu=2,
158 | train=dict(
159 | _delete_=True,
160 | type='ClassBalancedDataset',
161 | oversample_thr=1e-3,
162 | dataset=dict(
163 | type=dataset_type,
164 | classes='data/lvis/annotations/lvis_classes.txt',
165 | load_as_video=False,
166 | ann_file='data/lvis/annotations/lvisv0.5+coco_train.json',
167 | img_prefix='data/lvis/train2017/',
168 | key_img_sampler=dict(interval=1),
169 | ref_img_sampler=dict(num_ref_imgs=1, scope=1, method='uniform'),
170 | pipeline=train_pipeline)
171 | ),
172 | val=dict(
173 | type=dataset_type,
174 | classes='data/lvis/annotations/lvis_classes.txt',
175 | ann_file='data/tao/annotations/validation_ours.json',
176 | img_prefix='data/tao/frames/',
177 | ref_img_sampler=None,
178 | pipeline=test_pipeline),
179 | test=dict(
180 | type=dataset_type,
181 | classes='data/lvis/annotations/lvis_classes.txt',
182 | ann_file='data/tao/annotations/validation_ours.json',
183 | img_prefix='data/tao/frames/',
184 | ref_img_sampler=None,
185 | pipeline=test_pipeline)
186 |
187 | )
188 | # optimizer
189 | optimizer = dict(
190 | # _delete_=True,
191 | type='AdamW',
192 | lr=0.0001,
193 | betas=(0.9, 0.999),
194 | weight_decay=0.05,
195 | paramwise_cfg=dict(
196 | custom_keys={
197 | 'absolute_pos_embed': dict(decay_mult=0.),
198 | 'relative_position_bias_table': dict(decay_mult=0.),
199 | 'norm': dict(decay_mult=0.)
200 | }))
201 | optimizer_config = dict(grad_clip=None)
202 | # learning policy
203 | lr_config = dict(
204 | policy='step',
205 | warmup='linear',
206 | warmup_iters=1000,
207 | warmup_ratio=0.001,
208 | step=[27, 33])
209 | runner = dict(type='EpochBasedRunner', max_epochs=36)
210 |
211 |
212 | # checkpoint saving
213 | checkpoint_config = dict(interval=1)
214 | # yapf:disable
215 | log_config = dict(
216 | interval=50,
217 | hooks=[
218 | dict(type='TextLoggerHook'),
219 | ])
220 | # yapf:enable
221 | # runtime settings
222 | total_epochs = 36
223 | dist_params = dict(backend='nccl')
224 | log_level = 'INFO'
225 | load_from = None
226 | resume_from = None
227 | workflow = [('train', 1)]
228 | evaluation = dict(metric=['bbox'], start=2, interval=2)
229 |
--------------------------------------------------------------------------------
/configs/tao/tracker_r101_tao.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | _base_ = './cem_r101_lvis.py'
3 | model = dict(
4 | freeze_detector=True,
5 | freeze_cem=True,
6 | method='teter',
7 | roi_head=dict(bbox_head=dict(num_classes=1230),
8 | track_head=dict(
9 | type='QuasiDenseEmbedHead',
10 | num_convs=4,
11 | num_fcs=1,
12 | embed_channels=256,
13 | norm_cfg=dict(type='GN', num_groups=32),
14 | loss_track=dict(type='MultiPosCrossEntropyLoss', loss_weight=0.25),
15 | loss_track_aux=dict(
16 | type='L2Loss',
17 | neg_pos_ub=3,
18 | pos_margin=0,
19 | neg_margin=0.1,
20 | hard_mining=True,
21 | loss_weight=1.0))
22 | ),
23 |
24 | test_cfg=dict(
25 | rcnn=dict(
26 | score_thr=0.0001,
27 | nms=dict(type='nms', iou_threshold=0.5, class_agnostic=True, split_thr=100000),
28 | max_per_img=50)
29 | )
30 | )
31 |
32 | # dataset settings
33 | img_norm_cfg = dict(
34 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
35 | train_pipeline = [
36 | dict(type='LoadMultiImagesFromFile'),
37 | # dict(
38 | # type='LoadMultiImagesFromFile',
39 | # file_client_args=dict(
40 | # img_db_path='data/tao/tao_train_imgs.hdf5',
41 | # backend='hdf5',
42 | # type='tao')),
43 | dict(type='SeqLoadAnnotations', with_bbox=True, with_ins_id=True),
44 | dict(
45 | type='SeqResize',
46 | img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),
47 | (1333, 768), (1333, 800)],
48 | share_params=True,
49 | multiscale_mode='value',
50 | keep_ratio=True),
51 | dict(type='SeqRandomFlip', share_params=True, flip_ratio=0.5),
52 | dict(type='SeqNormalize', **img_norm_cfg),
53 | dict(type='SeqPad', size_divisor=32),
54 | dict(type='SeqDefaultFormatBundle'),
55 | dict(
56 | type='SeqCollect',
57 | keys=['img', 'gt_bboxes', 'gt_labels', 'gt_match_indices'],
58 | ref_prefix='ref'),
59 | ]
60 |
61 | test_pipeline = [
62 | dict(type='LoadImageFromFile'),
63 | # dict(type='LoadImageFromFile',
64 | # file_client_args=dict(
65 | # img_db_path='data/tao/tao_val_imgs.hdf5',
66 | # backend='hdf5',
67 | # type='tao')),
68 | dict(
69 | type='MultiScaleFlipAug',
70 | img_scale=(1333, 800),
71 | flip=False,
72 | transforms=[
73 | dict(type='Resize', keep_ratio=True),
74 | dict(type='RandomFlip'),
75 | dict(type='Normalize', **img_norm_cfg),
76 | dict(type='Pad', size_divisor=32),
77 | dict(type='ImageToTensor', keys=['img']),
78 | dict(type='VideoCollect', keys=['img'])
79 | ])
80 | ]
81 | dataset_type = 'TaoDataset'
82 | data = dict(
83 | samples_per_gpu=2,
84 | workers_per_gpu=2,
85 | train=dict(
86 | _delete_=True,
87 | type='ClassBalancedDataset',
88 | oversample_thr=1e-3,
89 | dataset=dict(
90 | type=dataset_type,
91 | classes='data/lvis/annotations/lvis_classes.txt',
92 | ann_file='data/tao/annotations/train_ours.json',
93 | img_prefix='data/tao/frames/',
94 | key_img_sampler=dict(interval=1),
95 | ref_img_sampler=dict(num_ref_imgs=1, scope=1, method='uniform'),
96 | pipeline=train_pipeline)),
97 | val=dict(
98 | type=dataset_type,
99 | classes='data/lvis/annotations/lvis_classes.txt',
100 | ann_file='data/tao/annotations/validation_ours.json',
101 | img_prefix='data/tao/frames/',
102 | ref_img_sampler=None,
103 | pipeline=test_pipeline),
104 | test=dict(
105 | type=dataset_type,
106 | classes='data/lvis/annotations/lvis_classes.txt',
107 | ann_file='data/tao/annotations/validation_ours.json',
108 | img_prefix='data/tao/frames/',
109 | ref_img_sampler=None,
110 | pipeline=test_pipeline)
111 | )
112 | optimizer = dict(type='SGD', lr=0.002, momentum=0.9, weight_decay=0.0001)
113 | lr_config = dict(
114 | policy='step',
115 | warmup='linear',
116 | warmup_iters=1000,
117 | warmup_ratio=1.0 / 1000,
118 | step=[8, 11])
119 | total_epochs = 12
120 | load_from = None
121 | evaluation = dict(metric=['track'], start=8, interval=1, resfile_path='/scratch/tmp/')
122 | work_dir = './saved_models/teter_swinT/'
123 |
--------------------------------------------------------------------------------
/configs/tao/tracker_swinB_tao.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | _base_ = './cem_swinB_lvis.py'
3 | model = dict(
4 | freeze_detector=True,
5 | freeze_cem=True,
6 | method='teter',
7 | roi_head=dict(
8 | bbox_head=dict(num_classes=1230),
9 | track_head = dict(
10 | type='QuasiDenseEmbedHead',
11 | num_convs=4,
12 | num_fcs=1,
13 | embed_channels=256,
14 | norm_cfg=dict(type='GN', num_groups=32),
15 | loss_track=dict(type='MultiPosCrossEntropyLoss',
16 | loss_weight=0.25),
17 | loss_track_aux=dict(
18 | type='L2Loss',
19 | neg_pos_ub=3,
20 | pos_margin=0,
21 | neg_margin=0.1,
22 | hard_mining=True,
23 | loss_weight=1.0)
24 | )),
25 |
26 |
27 | test_cfg=dict(
28 | rcnn=dict(
29 | score_thr=0.0001,
30 | nms=dict(type='nms',
31 | iou_threshold=0.5,
32 | class_agnostic=True,
33 | split_thr=100000),
34 | max_per_img=50)
35 | )
36 | )
37 |
38 | # dataset settings
39 | img_norm_cfg = dict(
40 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
41 | train_pipeline = [
42 | dict(type='LoadMultiImagesFromFile'),
43 | # dict(
44 | # type='LoadMultiImagesFromFile',
45 | # file_client_args=dict(
46 | # img_db_path='data/tao/tao_train_imgs.hdf5',
47 | # backend='hdf5',
48 | # type='tao')),
49 | dict(type='SeqLoadAnnotations', with_bbox=True, with_ins_id=True),
50 | dict(
51 | type='SeqResize',
52 | img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),
53 | (1333, 768), (1333, 800)],
54 | share_params=True,
55 | multiscale_mode='value',
56 | keep_ratio=True),
57 | dict(type='SeqRandomFlip', share_params=True, flip_ratio=0.5),
58 | dict(type='SeqNormalize', **img_norm_cfg),
59 | dict(type='SeqPad', size_divisor=32),
60 | dict(type='SeqDefaultFormatBundle'),
61 | dict(
62 | type='SeqCollect',
63 | keys=['img', 'gt_bboxes', 'gt_labels', 'gt_match_indices'],
64 | ref_prefix='ref'),
65 | ]
66 |
67 | test_pipeline = [
68 | dict(type='LoadImageFromFile'),
69 | # dict(type='LoadImageFromFile',
70 | # file_client_args=dict(
71 | # img_db_path='data/tao/tao_val_imgs.hdf5',
72 | # backend='hdf5',
73 | # type='tao')),
74 | dict(
75 | type='MultiScaleFlipAug',
76 | img_scale=(1333, 800),
77 | flip=False,
78 | transforms=[
79 | dict(type='Resize', keep_ratio=True),
80 | dict(type='RandomFlip'),
81 | dict(type='Normalize', **img_norm_cfg),
82 | dict(type='Pad', size_divisor=32),
83 | dict(type='ImageToTensor', keys=['img']),
84 | dict(type='VideoCollect', keys=['img'])
85 | ])
86 | ]
87 | dataset_type = 'TaoDataset'
88 | data = dict(
89 | samples_per_gpu=2,
90 | workers_per_gpu=2,
91 | train=dict(
92 | _delete_=True,
93 | type='ClassBalancedDataset',
94 | oversample_thr=1e-3,
95 | dataset=dict(
96 | type=dataset_type,
97 | classes='data/lvis/annotations/lvis_classes.txt',
98 | ann_file='data/tao/annotations/train_ours.json',
99 | img_prefix='data/tao/frames/',
100 | key_img_sampler=dict(interval=1),
101 | ref_img_sampler=dict(num_ref_imgs=1, scope=1, method='uniform'),
102 | pipeline=train_pipeline)),
103 | val=dict(
104 | type=dataset_type,
105 | classes='data/lvis/annotations/lvis_classes.txt',
106 | ann_file='data/tao/annotations/validation_ours.json',
107 | img_prefix='data/tao/frames/',
108 | ref_img_sampler=None,
109 | pipeline=test_pipeline),
110 | test=dict(
111 | type=dataset_type,
112 | classes='data/lvis/annotations/lvis_classes.txt',
113 | ann_file='data/tao/annotations/validation_ours.json',
114 | img_prefix='data/tao/frames/',
115 | ref_img_sampler=None,
116 | pipeline=test_pipeline)
117 | )
118 | optimizer = dict(
119 | _delete_=True,
120 | type='SGD', lr=0.002, momentum=0.9, weight_decay=0.0001)
121 | lr_config = dict(
122 | _delete_=True,
123 | policy='step',
124 | warmup='linear',
125 | warmup_iters=1000,
126 | warmup_ratio=1.0 / 1000,
127 | step=[8, 11])
128 | total_epochs = 12
129 | load_from = None
130 | evaluation = dict(metric=['track'], start=8, interval=1, resfile_path='/scratch/tmp/')
131 | work_dir = './saved_models/teter_swinB/'
132 |
--------------------------------------------------------------------------------
/configs/tao/tracker_swinL_tao.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | _base_ = './cem_swinL_lvis.py'
3 | model = dict(
4 | freeze_detector=True,
5 | freeze_cem=True,
6 | method='teter',
7 | roi_head=dict(
8 | bbox_head=dict(num_classes=1230),
9 | track_head = dict(
10 | type='QuasiDenseEmbedHead',
11 | num_convs=4,
12 | num_fcs=1,
13 | embed_channels=256,
14 | norm_cfg=dict(type='GN', num_groups=32),
15 | loss_track=dict(type='MultiPosCrossEntropyLoss',
16 | loss_weight=0.25),
17 | loss_track_aux=dict(
18 | type='L2Loss',
19 | neg_pos_ub=3,
20 | pos_margin=0,
21 | neg_margin=0.1,
22 | hard_mining=True,
23 | loss_weight=1.0)
24 | )),
25 |
26 |
27 | test_cfg=dict(
28 | rcnn=dict(
29 | score_thr=0.0001,
30 | nms=dict(type='nms',
31 | iou_threshold=0.5,
32 | class_agnostic=True,
33 | split_thr=100000),
34 | max_per_img=50)
35 | )
36 | )
37 |
38 | # dataset settings
39 | img_norm_cfg = dict(
40 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
41 | train_pipeline = [
42 | dict(type='LoadMultiImagesFromFile'),
43 | # dict(
44 | # type='LoadMultiImagesFromFile',
45 | # file_client_args=dict(
46 | # img_db_path='data/tao/tao_train_imgs.hdf5',
47 | # backend='hdf5',
48 | # type='tao')),
49 | dict(type='SeqLoadAnnotations', with_bbox=True, with_ins_id=True),
50 | dict(
51 | type='SeqResize',
52 | img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),
53 | (1333, 768), (1333, 800)],
54 | share_params=True,
55 | multiscale_mode='value',
56 | keep_ratio=True),
57 | dict(type='SeqRandomFlip', share_params=True, flip_ratio=0.5),
58 | dict(type='SeqNormalize', **img_norm_cfg),
59 | dict(type='SeqPad', size_divisor=32),
60 | dict(type='SeqDefaultFormatBundle'),
61 | dict(
62 | type='SeqCollect',
63 | keys=['img', 'gt_bboxes', 'gt_labels', 'gt_match_indices'],
64 | ref_prefix='ref'),
65 | ]
66 |
67 | test_pipeline = [
68 | dict(type='LoadImageFromFile'),
69 | # dict(type='LoadImageFromFile',
70 | # file_client_args=dict(
71 | # img_db_path='data/tao/tao_val_imgs.hdf5',
72 | # backend='hdf5',
73 | # type='tao')),
74 | dict(
75 | type='MultiScaleFlipAug',
76 | img_scale=(1333, 800),
77 | flip=False,
78 | transforms=[
79 | dict(type='Resize', keep_ratio=True),
80 | dict(type='RandomFlip'),
81 | dict(type='Normalize', **img_norm_cfg),
82 | dict(type='Pad', size_divisor=32),
83 | dict(type='ImageToTensor', keys=['img']),
84 | dict(type='VideoCollect', keys=['img'])
85 | ])
86 | ]
87 | dataset_type = 'TaoDataset'
88 | data = dict(
89 | samples_per_gpu=2,
90 | workers_per_gpu=2,
91 | train=dict(
92 | _delete_=True,
93 | type='ClassBalancedDataset',
94 | oversample_thr=1e-3,
95 | dataset=dict(
96 | type=dataset_type,
97 | classes='data/lvis/annotations/lvis_classes.txt',
98 | ann_file='data/tao/annotations/train_ours.json',
99 | img_prefix='data/tao/frames/',
100 | key_img_sampler=dict(interval=1),
101 | ref_img_sampler=dict(num_ref_imgs=1, scope=1, method='uniform'),
102 | pipeline=train_pipeline)),
103 | val=dict(
104 | type=dataset_type,
105 | classes='data/lvis/annotations/lvis_classes.txt',
106 | ann_file='data/tao/annotations/validation_ours.json',
107 | img_prefix='data/tao/frames/',
108 | ref_img_sampler=None,
109 | pipeline=test_pipeline),
110 | test=dict(
111 | type=dataset_type,
112 | classes='data/lvis/annotations/lvis_classes.txt',
113 | ann_file='data/tao/annotations/validation_ours.json',
114 | img_prefix='data/tao/frames/',
115 | ref_img_sampler=None,
116 | pipeline=test_pipeline)
117 | )
118 | optimizer = dict(
119 | _delete_=True,
120 | type='SGD', lr=0.002, momentum=0.9, weight_decay=0.0001)
121 | lr_config = dict(
122 | _delete_=True,
123 | policy='step',
124 | warmup='linear',
125 | warmup_iters=1000,
126 | warmup_ratio=1.0 / 1000,
127 | step=[8, 11])
128 | total_epochs = 12
129 | load_from = None
130 | evaluation = dict(metric=['track'], start=8, interval=1, resfile_path='/scratch/tmp/')
131 | work_dir = './saved_models/teter_swinB/'
132 |
--------------------------------------------------------------------------------
/configs/tao/tracker_swinS_tao.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | _base_ = './cem_swinS_lvis.py'
3 | model = dict(
4 | freeze_detector=True,
5 | freeze_cem=True,
6 | method='teter',
7 | roi_head=dict(
8 | bbox_head=dict(num_classes=1230),
9 | track_head = dict(
10 | type='QuasiDenseEmbedHead',
11 | num_convs=4,
12 | num_fcs=1,
13 | embed_channels=256,
14 | norm_cfg=dict(type='GN', num_groups=32),
15 | loss_track=dict(type='MultiPosCrossEntropyLoss',
16 | loss_weight=0.25),
17 | loss_track_aux=dict(
18 | type='L2Loss',
19 | neg_pos_ub=3,
20 | pos_margin=0,
21 | neg_margin=0.1,
22 | hard_mining=True,
23 | loss_weight=1.0)
24 | )),
25 |
26 |
27 | test_cfg=dict(
28 | rcnn=dict(
29 | score_thr=0.0001,
30 | nms=dict(type='nms',
31 | iou_threshold=0.5,
32 | class_agnostic=True,
33 | split_thr=100000),
34 | max_per_img=50)
35 | )
36 | )
37 |
38 | # dataset settings
39 | img_norm_cfg = dict(
40 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
41 | train_pipeline = [
42 | dict(type='LoadMultiImagesFromFile'),
43 | # dict(
44 | # type='LoadMultiImagesFromFile',
45 | # file_client_args=dict(
46 | # img_db_path='data/tao/tao_train_imgs.hdf5',
47 | # backend='hdf5',
48 | # type='tao')),
49 | dict(type='SeqLoadAnnotations', with_bbox=True, with_ins_id=True),
50 | dict(
51 | type='SeqResize',
52 | img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),
53 | (1333, 768), (1333, 800)],
54 | share_params=True,
55 | multiscale_mode='value',
56 | keep_ratio=True),
57 | dict(type='SeqRandomFlip', share_params=True, flip_ratio=0.5),
58 | dict(type='SeqNormalize', **img_norm_cfg),
59 | dict(type='SeqPad', size_divisor=32),
60 | dict(type='SeqDefaultFormatBundle'),
61 | dict(
62 | type='SeqCollect',
63 | keys=['img', 'gt_bboxes', 'gt_labels', 'gt_match_indices'],
64 | ref_prefix='ref'),
65 | ]
66 |
67 | test_pipeline = [
68 | dict(type='LoadImageFromFile'),
69 | # dict(type='LoadImageFromFile',
70 | # file_client_args=dict(
71 | # img_db_path='data/tao/tao_val_imgs.hdf5',
72 | # backend='hdf5',
73 | # type='tao')),
74 | dict(
75 | type='MultiScaleFlipAug',
76 | img_scale=(1333, 800),
77 | flip=False,
78 | transforms=[
79 | dict(type='Resize', keep_ratio=True),
80 | dict(type='RandomFlip'),
81 | dict(type='Normalize', **img_norm_cfg),
82 | dict(type='Pad', size_divisor=32),
83 | dict(type='ImageToTensor', keys=['img']),
84 | dict(type='VideoCollect', keys=['img'])
85 | ])
86 | ]
87 | dataset_type = 'TaoDataset'
88 | data = dict(
89 | samples_per_gpu=2,
90 | workers_per_gpu=2,
91 | train=dict(
92 | _delete_=True,
93 | type='ClassBalancedDataset',
94 | oversample_thr=1e-3,
95 | dataset=dict(
96 | type=dataset_type,
97 | classes='data/lvis/annotations/lvis_classes.txt',
98 | ann_file='data/tao/annotations/train_ours.json',
99 | img_prefix='data/tao/frames/',
100 | key_img_sampler=dict(interval=1),
101 | ref_img_sampler=dict(num_ref_imgs=1, scope=1, method='uniform'),
102 | pipeline=train_pipeline)),
103 | val=dict(
104 | type=dataset_type,
105 | classes='data/lvis/annotations/lvis_classes.txt',
106 | ann_file='data/tao/annotations/validation_ours.json',
107 | img_prefix='data/tao/frames/',
108 | ref_img_sampler=None,
109 | pipeline=test_pipeline),
110 | test=dict(
111 | type=dataset_type,
112 | classes='data/lvis/annotations/lvis_classes.txt',
113 | ann_file='data/tao/annotations/validation_ours.json',
114 | img_prefix='data/tao/frames/',
115 | ref_img_sampler=None,
116 | pipeline=test_pipeline)
117 | )
118 | optimizer = dict(
119 | _delete_=True,
120 | type='SGD', lr=0.002, momentum=0.9, weight_decay=0.0001)
121 | lr_config = dict(
122 | _delete_=True,
123 | policy='step',
124 | warmup='linear',
125 | warmup_iters=1000,
126 | warmup_ratio=1.0 / 1000,
127 | step=[8, 11])
128 | total_epochs = 12
129 | load_from = None
130 | evaluation = dict(metric=['track'], start=8, interval=1, resfile_path='/scratch/tmp/')
131 | work_dir = './saved_models/teter_swinS/'
132 |
--------------------------------------------------------------------------------
/configs/tao/tracker_swinT_tao.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | _base_ = './cem_swinT_lvis.py'
3 | model = dict(
4 | freeze_detector=True,
5 | freeze_cem=True,
6 | method='teter',
7 | roi_head=dict(
8 | bbox_head=dict(num_classes=1230),
9 | track_head = dict(
10 | type='QuasiDenseEmbedHead',
11 | num_convs=4,
12 | num_fcs=1,
13 | embed_channels=256,
14 | norm_cfg=dict(type='GN', num_groups=32),
15 | loss_track=dict(type='MultiPosCrossEntropyLoss',
16 | loss_weight=0.25),
17 | loss_track_aux=dict(
18 | type='L2Loss',
19 | neg_pos_ub=3,
20 | pos_margin=0,
21 | neg_margin=0.1,
22 | hard_mining=True,
23 | loss_weight=1.0)
24 | )),
25 |
26 |
27 | test_cfg=dict(
28 | rcnn=dict(
29 | score_thr=0.0001,
30 | nms=dict(type='nms',
31 | iou_threshold=0.5,
32 | class_agnostic=True,
33 | split_thr=100000),
34 | max_per_img=50)
35 | )
36 | )
37 |
38 | # dataset settings
39 | img_norm_cfg = dict(
40 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
41 | train_pipeline = [
42 | dict(type='LoadMultiImagesFromFile'),
43 | # dict(
44 | # type='LoadMultiImagesFromFile',
45 | # file_client_args=dict(
46 | # img_db_path='data/tao/tao_train_imgs.hdf5',
47 | # backend='hdf5',
48 | # type='tao')),
49 | dict(type='SeqLoadAnnotations', with_bbox=True, with_ins_id=True),
50 | dict(
51 | type='SeqResize',
52 | img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),
53 | (1333, 768), (1333, 800)],
54 | share_params=True,
55 | multiscale_mode='value',
56 | keep_ratio=True),
57 | dict(type='SeqRandomFlip', share_params=True, flip_ratio=0.5),
58 | dict(type='SeqNormalize', **img_norm_cfg),
59 | dict(type='SeqPad', size_divisor=32),
60 | dict(type='SeqDefaultFormatBundle'),
61 | dict(
62 | type='SeqCollect',
63 | keys=['img', 'gt_bboxes', 'gt_labels', 'gt_match_indices'],
64 | ref_prefix='ref'),
65 | ]
66 |
67 | test_pipeline = [
68 | dict(type='LoadImageFromFile'),
69 | # dict(type='LoadImageFromFile',
70 | # file_client_args=dict(
71 | # img_db_path='data/tao/tao_val_imgs.hdf5',
72 | # backend='hdf5',
73 | # type='tao')),
74 | dict(
75 | type='MultiScaleFlipAug',
76 | img_scale=(1333, 800),
77 | flip=False,
78 | transforms=[
79 | dict(type='Resize', keep_ratio=True),
80 | dict(type='RandomFlip'),
81 | dict(type='Normalize', **img_norm_cfg),
82 | dict(type='Pad', size_divisor=32),
83 | dict(type='ImageToTensor', keys=['img']),
84 | dict(type='VideoCollect', keys=['img'])
85 | ])
86 | ]
87 | dataset_type = 'TaoDataset'
88 | data = dict(
89 | samples_per_gpu=2,
90 | workers_per_gpu=2,
91 | train=dict(
92 | _delete_=True,
93 | type='ClassBalancedDataset',
94 | oversample_thr=1e-3,
95 | dataset=dict(
96 | type=dataset_type,
97 | classes='data/lvis/annotations/lvis_classes.txt',
98 | ann_file='data/tao/annotations/train_ours.json',
99 | img_prefix='data/tao/frames/',
100 | key_img_sampler=dict(interval=1),
101 | ref_img_sampler=dict(num_ref_imgs=1, scope=1, method='uniform'),
102 | pipeline=train_pipeline)),
103 | val=dict(
104 | type=dataset_type,
105 | classes='data/lvis/annotations/lvis_classes.txt',
106 | ann_file='data/tao/annotations/validation_ours.json',
107 | img_prefix='data/tao/frames/',
108 | ref_img_sampler=None,
109 | pipeline=test_pipeline),
110 | test=dict(
111 | type=dataset_type,
112 | classes='data/lvis/annotations/lvis_classes.txt',
113 | ann_file='data/tao/annotations/validation_ours.json',
114 | img_prefix='data/tao/frames/',
115 | ref_img_sampler=None,
116 | pipeline=test_pipeline)
117 | )
118 | optimizer = dict(
119 | _delete_=True,
120 | type='SGD', lr=0.002, momentum=0.9, weight_decay=0.0001)
121 | lr_config = dict(
122 | _delete_=True,
123 | policy='step',
124 | warmup='linear',
125 | warmup_iters=1000,
126 | warmup_ratio=1.0 / 1000,
127 | step=[8, 11])
128 | total_epochs = 12
129 | load_from = None
130 | evaluation = dict(metric=['track'], start=8, interval=1, resfile_path='/scratch/tmp/')
131 | work_dir = './saved_models/teter_swinT/'
132 |
--------------------------------------------------------------------------------
/docs/INSTALL.md:
--------------------------------------------------------------------------------
1 | ## Installation
2 | TETer builds upon mmdetection framework.
3 | Please install following packages.
4 |
5 | ### Requirements
6 | - [pytorch >= 1.10](https://pytorch.org/get-started/locally/)
7 | - [mmcv-full == 1.4.4](https://github.com/open-mmlab/mmcv)
8 | - [mmdetection == 2.23.0](https://github.com/open-mmlab/mmdetection)
9 |
10 |
11 | ### Install TETA
12 |
13 | Please refer to [TETA](../teta/README.md)
14 |
15 |
--------------------------------------------------------------------------------
/figures/teaser-teter.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/tet/a62a9c0affec3a97f2cd0263141c53bcfb9c79f7/figures/teaser-teter.png
--------------------------------------------------------------------------------
/figures/teta-teaser.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/tet/a62a9c0affec3a97f2cd0263141c53bcfb9c79f7/figures/teta-teaser.png
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | lvis
2 | motmetrics
3 | numpy
4 | pycocotools
5 | seaborn
6 | tqdm
7 | timm
8 | h5py
9 | git+https://github.com/bdd100k/bdd100k.git
10 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [isort]
2 | line_length = 79
3 | multi_line_output = 0
4 | known_standard_library = setuptools
5 | known_first_party = teter
6 | known_third_party = cv2,mmcv,mmdet,motmetrics,numpy,pandas,pycocotools,torch,torchvision,tqdm
7 | no_lines_before = STDLIB,LOCALFOLDER
8 | default_section = THIRDPARTY
9 |
10 | [yapf]
11 | BASED_ON_STYLE = pep8
12 | BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF = true
13 | SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN = true
14 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | import os
2 | import subprocess
3 | import time
4 | from setuptools import find_packages, setup
5 |
6 |
7 | def readme():
8 | with open('README.md', encoding='utf-8') as f:
9 | content = f.read()
10 | return content
11 |
12 |
13 | version_file = 'teter/version.py'
14 |
15 |
16 | def get_git_hash():
17 |
18 | def _minimal_ext_cmd(cmd):
19 | # construct minimal environment
20 | env = {}
21 | for k in ['SYSTEMROOT', 'PATH', 'HOME']:
22 | v = os.environ.get(k)
23 | if v is not None:
24 | env[k] = v
25 | # LANGUAGE is used on win32
26 | env['LANGUAGE'] = 'C'
27 | env['LANG'] = 'C'
28 | env['LC_ALL'] = 'C'
29 | out = subprocess.Popen(
30 | cmd, stdout=subprocess.PIPE, env=env).communicate()[0]
31 | return out
32 |
33 | try:
34 | out = _minimal_ext_cmd(['git', 'rev-parse', 'HEAD'])
35 | sha = out.strip().decode('ascii')
36 | except OSError:
37 | sha = 'unknown'
38 |
39 | return sha
40 |
41 |
42 | def get_hash():
43 | if os.path.exists('.git'):
44 | sha = get_git_hash()[:7]
45 | elif os.path.exists(version_file):
46 | try:
47 | from teter.version import __version__
48 | sha = __version__.split('+')[-1]
49 | except ImportError:
50 | raise ImportError('Unable to get git version')
51 | else:
52 | sha = 'unknown'
53 |
54 | return sha
55 |
56 |
57 | def write_version_py():
58 | content = """# GENERATED VERSION FILE
59 | # TIME: {}
60 | __version__ = '{}'
61 | short_version = '{}'
62 | version_info = ({})
63 | """
64 | sha = get_hash()
65 | with open('teter/VERSION', 'r') as f:
66 | SHORT_VERSION = f.read().strip()
67 | VERSION_INFO = ', '.join(SHORT_VERSION.split('.'))
68 | VERSION = SHORT_VERSION + '+' + sha
69 |
70 | version_file_str = content.format(time.asctime(), VERSION, SHORT_VERSION,
71 | VERSION_INFO)
72 | with open(version_file, 'w') as f:
73 | f.write(version_file_str)
74 |
75 |
76 | def get_version():
77 | with open(version_file, 'r') as f:
78 | exec(compile(f.read(), version_file, 'exec'))
79 | return locals()['__version__']
80 |
81 |
82 | def get_requirements(filename='requirements.txt'):
83 | here = os.path.dirname(os.path.realpath(__file__))
84 | with open(os.path.join(here, filename), 'r') as f:
85 | requires = [line.replace('\n', '') for line in f.readlines()]
86 | for i, req in enumerate(requires):
87 | if req.startswith("git"):
88 | pkg_name = req.split("/")[-1].split(".")[0]
89 | req = pkg_name
90 | requires[i] = req
91 | return requires
92 |
93 |
94 | if __name__ == '__main__':
95 | write_version_py()
96 | setup(
97 | name='teter',
98 | version=get_version(),
99 | description='A template for pytorch projects.',
100 | long_description=readme(),
101 | packages=find_packages(exclude=('configs', 'tools', 'demo')),
102 | package_data={'teter.ops': ['*/*.so']},
103 | classifiers=[
104 | 'Development Status :: 4 - Beta',
105 | 'License :: OSI Approved :: Apache Software License',
106 | 'Operating System :: OS Independent',
107 | 'Programming Language :: Python :: 3',
108 | 'Programming Language :: Python :: 3.5',
109 | 'Programming Language :: Python :: 3.6',
110 | 'Programming Language :: Python :: 3.7',
111 | ],
112 | license='Apache License 2.0',
113 | setup_requires=['pytest-runner', 'cython', 'numpy'],
114 | tests_require=['pytest', 'xdoctest'],
115 | install_requires=get_requirements(),
116 | zip_safe=False)
117 |
--------------------------------------------------------------------------------
/teta/README.md:
--------------------------------------------------------------------------------
1 | # Track Every Thing Accuracy
2 | [Track Every Thing in the Wild](https://arxiv.org/abs/2207.12978) [ECCV 2022].
3 |
4 | This is the official implementation of TETA metric describe in the paper.
5 |
6 |
7 |
8 | The proposed TETA metric disentangles classification performance from tracking.
9 | Instead of using the predicted class labels to group per-class tracking results, we use location with the help of local cluster evaluation.
10 | We treat each ground truth bounding box of the target class as the anchor of each cluster and group prediction results inside each cluster to evaluate the localization and association performance.
11 | Our local clusters enable us to evaluate tracks even when the class prediction is wrong.
12 |
13 |
14 |
15 | ## Install
16 | Install the TETA environment using pip.
17 | ```angular2html
18 | pip install -r requirements.txt
19 | ```
20 | Go to the root of the teta folder and quick install by
21 | ```
22 | pip install -e .
23 | ```
24 | ## Support data format
25 | Result format follows COCO-VID format. We describe the format in detail [here](./docs/TAO-format.txt)
26 |
27 | ## How to Run
28 | Run on TAO.
29 | ```angular2html
30 | python scripts/run_tao.py --METRICS TETA --TRACKERS_TO_EVAL TETer --GT_FOLDER ${GT_JSON_PATH}.json --TRACKER_SUB_FOLDER ${RESULT_JSON_PATH}.json
31 | ```
32 | Run on BDD100K.
33 | ```angular2html
34 | python scripts/run_coco.py --METRICS TETA --TRACKERS_TO_EVAL TETer --GT_FOLDER ${GT_JSON_PATH}.json --TRACKER_SUB_FOLDER ${RESULT_JSON_PATH}.json
35 | ```
36 |
37 | ## Citation
38 |
39 | ```
40 | @InProceedings{trackeverything,
41 | title = {Tracking Every Thing in the Wild},
42 | author = {Li, Siyuan and Danelljan, Martin and Ding, Henghui and Huang, Thomas E. and Yu, Fisher},
43 | booktitle = {Proceedings of the European Conference on Computer Vision (ECCV)},
44 | month = {Oct},
45 | year = {2022}
46 | }
47 | ```
--------------------------------------------------------------------------------
/teta/docs/TAO-format.txt:
--------------------------------------------------------------------------------
1 | Taken from: https://github.com/TAO-Dataset/tao/blob/master/tao/toolkit/tao/tao.py
2 |
3 | Annotation file format:
4 | {
5 | "info" : info,
6 | "images" : [image],
7 | "videos": [video],
8 | "tracks": [track],
9 | "annotations" : [annotation],
10 | "categories": [category],
11 | "licenses" : [license],
12 | }
13 | info: As in MS COCO
14 | image: {
15 | "id" : int,
16 | "video_id": int,
17 | "file_name" : str,
18 | "license" : int,
19 | # Redundant fields for COCO-compatibility
20 | "width": int,
21 | "height": int,
22 | "frame_index": int
23 | }
24 | video: {
25 | "id": int,
26 | "name": str,
27 | "width" : int,
28 | "height" : int,
29 | "neg_category_ids": [int],
30 | "not_exhaustive_category_ids": [int],
31 | "metadata": dict, # Metadata about the video
32 | }
33 | track: {
34 | "id": int,
35 | "category_id": int,
36 | "video_id": int
37 | }
38 | category: {
39 | "id": int,
40 | "name": str,
41 | "synset": str, # For non-LVIS objects, this is "unknown"
42 | ... [other fields copied from LVIS v0.5 and unused]
43 | }
44 | annotation: {
45 | "image_id": int,
46 | "track_id": int,
47 | "bbox": [x,y,width,height],
48 | "area": float,
49 | # Redundant field for compatibility with COCO scripts
50 | "category_id": int
51 | }
52 | license: {
53 | "id" : int,
54 | "name" : str,
55 | "url" : str,
56 | }
57 |
58 | Prediction format:
59 |
60 | [{
61 | "image_id" : int,
62 | "category_id" : int,
63 | "bbox" : [x,y,width,height],
64 | "score" : float,
65 | "track_id": int,
66 | "video_id": int
67 | }]
--------------------------------------------------------------------------------
/teta/figures/figure_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/tet/a62a9c0affec3a97f2cd0263141c53bcfb9c79f7/teta/figures/figure_1.png
--------------------------------------------------------------------------------
/teta/figures/teta-teaser.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SysCV/tet/a62a9c0affec3a97f2cd0263141c53bcfb9c79f7/teta/figures/teta-teaser.png
--------------------------------------------------------------------------------
/teta/requirements.txt:
--------------------------------------------------------------------------------
1 | scipy
2 | numpy
3 |
--------------------------------------------------------------------------------
/teta/scripts/run_coco.py:
--------------------------------------------------------------------------------
1 | """ evaluate.py
2 |
3 | Run example:
4 | evaluate.py --USE_PARALLEL False --METRICS TETA --TRACKERS_TO_EVAL qdtrack
5 |
6 | Command Line Arguments: Defaults, # Comments
7 | Eval arguments:
8 | 'USE_PARALLEL': False,
9 | 'NUM_PARALLEL_CORES': 8,
10 | 'BREAK_ON_ERROR': True, # Raises exception and exits with error
11 | 'RETURN_ON_ERROR': False, # if not BREAK_ON_ERROR, then returns from function on error
12 | 'LOG_ON_ERROR': os.path.join(code_path, 'error_log.txt'), # if not None, save any errors into a log file.
13 | 'PRINT_RESULTS': True,
14 | 'PRINT_ONLY_COMBINED': False,
15 | 'PRINT_CONFIG': True,
16 | 'TIME_PROGRESS': True,
17 | 'DISPLAY_LESS_PROGRESS': True,
18 | 'OUTPUT_SUMMARY': True,
19 | 'OUTPUT_EMPTY_CLASSES': True, # If False, summary files are not output for classes with no detections
20 | 'OUTPUT_TEM_RAW_DATA': True,
21 | Dataset arguments:
22 | 'GT_FOLDER': os.path.join(code_path, 'data/gt/tao/tao_training'), # Location of GT data
23 | 'TRACKERS_FOLDER': os.path.join(code_path, 'data/trackers/tao/tao_training'), # Trackers location
24 | 'OUTPUT_FOLDER': None, # Where to save eval results (if None, same as TRACKERS_FOLDER)
25 | 'TRACKERS_TO_EVAL': None, # Filenames of trackers to eval (if None, all in folder)
26 | 'CLASSES_TO_EVAL': None, # Classes to eval (if None, all classes)
27 | 'SPLIT_TO_EVAL': 'training', # Valid: 'training', 'val'
28 | 'PRINT_CONFIG': True, # Whether to print current config
29 | 'TRACKER_SUB_FOLDER': 'data', # Tracker files are in TRACKER_FOLDER/tracker_name/TRACKER_SUB_FOLDER
30 | 'OUTPUT_SUB_FOLDER': '', # Output files are saved in OUTPUT_FOLDER/tracker_name/OUTPUT_SUB_FOLDER
31 | 'TRACKER_DISPLAY_NAMES': None, # Names of trackers to display, if None: TRACKERS_TO_EVAL
32 | 'MAX_DETECTIONS': 300, # Number of maximal allowed detections per image (0 for unlimited)
33 | Metric arguments:
34 | 'METRICS': ['HOTA', 'CLEAR', 'Identity', 'TrackMAP']
35 | """
36 |
37 | import sys
38 | import os
39 | import argparse
40 | from multiprocessing import freeze_support
41 |
42 | from teta.config import parse_configs
43 | from teta.datasets import COCO
44 | from teta.eval import Evaluator
45 | from teta.metrics import TETA
46 |
47 |
48 | def evaluate():
49 | """Evaluate with TETA."""
50 | eval_config, dataset_config, metrics_config = parse_configs()
51 | evaluator = Evaluator(eval_config)
52 | dataset_list = [COCO(dataset_config)]
53 | metrics_list = []
54 | metric = TETA(exhaustive=True)
55 | if metric.get_name() in metrics_config["METRICS"]:
56 | metrics_list.append(metric)
57 | if len(metrics_list) == 0:
58 | raise Exception("No metrics selected for evaluation")
59 | evaluator.evaluate(dataset_list, metrics_list)
60 |
61 |
62 | if __name__ == "__main__":
63 | freeze_support()
64 | evaluate()
65 |
--------------------------------------------------------------------------------
/teta/scripts/run_tao.py:
--------------------------------------------------------------------------------
1 | """ evaluate.py
2 |
3 | Run example:
4 | evaluate.py --USE_PARALLEL False --METRICS TETA --TRACKERS_TO_EVAL qdtrack
5 |
6 | Command Line Arguments: Defaults, # Comments
7 | Eval arguments:
8 | 'USE_PARALLEL': False,
9 | 'NUM_PARALLEL_CORES': 8,
10 | 'BREAK_ON_ERROR': True, # Raises exception and exits with error
11 | 'RETURN_ON_ERROR': False, # if not BREAK_ON_ERROR, then returns from function on error
12 | 'LOG_ON_ERROR': os.path.join(code_path, 'error_log.txt'), # if not None, save any errors into a log file.
13 | 'PRINT_RESULTS': True,
14 | 'PRINT_ONLY_COMBINED': False,
15 | 'PRINT_CONFIG': True,
16 | 'TIME_PROGRESS': True,
17 | 'DISPLAY_LESS_PROGRESS': True,
18 | 'OUTPUT_SUMMARY': True,
19 | 'OUTPUT_EMPTY_CLASSES': True, # If False, summary files are not output for classes with no detections
20 | 'OUTPUT_TEM_RAW_DATA': True,
21 | Dataset arguments:
22 | 'GT_FOLDER': os.path.join(code_path, 'data/gt/tao/tao_training'), # Location of GT data
23 | 'TRACKERS_FOLDER': os.path.join(code_path, 'data/trackers/tao/tao_training'), # Trackers location
24 | 'OUTPUT_FOLDER': None, # Where to save eval results (if None, same as TRACKERS_FOLDER)
25 | 'TRACKERS_TO_EVAL': None, # Filenames of trackers to eval (if None, all in folder)
26 | 'CLASSES_TO_EVAL': None, # Classes to eval (if None, all classes)
27 | 'SPLIT_TO_EVAL': 'training', # Valid: 'training', 'val'
28 | 'PRINT_CONFIG': True, # Whether to print current config
29 | 'TRACKER_SUB_FOLDER': 'data', # Tracker files are in TRACKER_FOLDER/tracker_name/TRACKER_SUB_FOLDER
30 | 'OUTPUT_SUB_FOLDER': '', # Output files are saved in OUTPUT_FOLDER/tracker_name/OUTPUT_SUB_FOLDER
31 | 'TRACKER_DISPLAY_NAMES': None, # Names of trackers to display, if None: TRACKERS_TO_EVAL
32 | 'MAX_DETECTIONS': 300, # Number of maximal allowed detections per image (0 for unlimited)
33 | Metric arguments:
34 | 'METRICS': ['HOTA', 'CLEAR', 'Identity', 'TrackMAP']
35 | """
36 |
37 | import sys
38 | import os
39 | import argparse
40 | from multiprocessing import freeze_support
41 |
42 | from teta.config import parse_configs
43 | from teta.datasets import TAO
44 | from teta.eval import Evaluator
45 | from teta.metrics import TETA
46 |
47 |
48 | def evaluate():
49 | """Evaluate with TETA."""
50 | eval_config, dataset_config, metrics_config = parse_configs()
51 | evaluator = Evaluator(eval_config)
52 | dataset_list = [TAO(dataset_config)]
53 | metrics_list = []
54 | metric = TETA(exhaustive=False)
55 | if metric.get_name() in metrics_config["METRICS"]:
56 | metrics_list.append(metric)
57 | if len(metrics_list) == 0:
58 | raise Exception("No metrics selected for evaluation")
59 | evaluator.evaluate(dataset_list, metrics_list)
60 |
61 |
62 | if __name__ == "__main__":
63 | freeze_support()
64 | evaluate()
65 |
--------------------------------------------------------------------------------
/teta/setup.py:
--------------------------------------------------------------------------------
1 | import io
2 | import os
3 | import sys
4 | from shutil import rmtree
5 |
6 | from setuptools import find_packages, setup, Command
7 |
8 | # Package meta-data.
9 | NAME = 'teta'
10 | DESCRIPTION = 'Track Every Thing Accuracy (TETA metric)'
11 | EMAIL = 'siyuan.li@vision.ee.ethz.ch'
12 | AUTHOR = 'Siyuan Li'
13 | REQUIRES_PYTHON = '>=3.6.0'
14 | VERSION = '0.1.0'
15 |
16 | # What packages are required for this module to be executed?
17 | REQUIRED = [
18 | 'script_utils @ git+https://github.com/achalddave/python-script-utils.git@v0.0.2#egg=script_utils',
19 | 'numpy', 'scipy'
20 | ]
21 |
22 | # What packages are optional?
23 | EXTRAS = {
24 | }
25 |
26 | here = os.path.abspath(os.path.dirname(__file__))
27 |
28 | # Import the README and use it as the long-description.
29 | # Note: this will only work if 'README.md' is present in your MANIFEST.in file!
30 | try:
31 | with io.open(os.path.join(here, 'README.md'), encoding='utf-8') as f:
32 | long_description = '\n' + f.read()
33 | except FileNotFoundError:
34 | long_description = DESCRIPTION
35 |
36 | # Load the package's __version__.py module as a dictionary.
37 | about = {}
38 | if not VERSION:
39 | project_slug = NAME.lower().replace("-", "_").replace(" ", "_")
40 | with open(os.path.join(here, project_slug, '__version__.py')) as f:
41 | exec(f.read(), about)
42 | else:
43 | about['__version__'] = VERSION
44 |
45 |
46 | class UploadCommand(Command):
47 | """Support setup.py upload."""
48 |
49 | description = 'Build and publish the package.'
50 | user_options = []
51 |
52 | @staticmethod
53 | def status(s):
54 | """Prints things in bold."""
55 | print('\033[1m{0}\033[0m'.format(s))
56 |
57 | def initialize_options(self):
58 | pass
59 |
60 | def finalize_options(self):
61 | pass
62 |
63 | def run(self):
64 | try:
65 | self.status('Removing previous builds…')
66 | rmtree(os.path.join(here, 'dist'))
67 | except OSError:
68 | pass
69 |
70 | self.status('Building Source and Wheel (universal) distribution…')
71 | os.system('{0} setup.py sdist bdist_wheel --universal'.format(sys.executable))
72 |
73 | self.status('Uploading the package to PyPI via Twine…')
74 | os.system('twine upload dist/*')
75 |
76 | self.status('Pushing git tags…')
77 | os.system('git tag v{0}'.format(about['__version__']))
78 | os.system('git push --tags')
79 |
80 | sys.exit()
81 |
82 |
83 | # Where the magic happens:
84 | setup(
85 | name=NAME,
86 | version=about['__version__'],
87 | description=DESCRIPTION,
88 | long_description=long_description,
89 | long_description_content_type='text/markdown',
90 | author=AUTHOR,
91 | author_email=EMAIL,
92 | python_requires=REQUIRES_PYTHON,
93 | # url=URL,
94 | packages=find_packages(exclude=["tests", "*.tests", "*.tests.*", "tests.*"]),
95 | # If your package is a single module, use this instead of 'packages':
96 | # py_modules=['tao'],
97 |
98 | # entry_points={
99 | # 'console_scripts': ['mycli=mymodule:cli'],
100 | # },
101 | install_requires=REQUIRED,
102 | extras_require=EXTRAS,
103 | include_package_data=True,
104 | license='MIT',
105 | classifiers=[
106 | # Trove classifiers
107 | # Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers
108 | 'License :: OSI Approved :: MIT License',
109 | 'Programming Language :: Python',
110 | 'Programming Language :: Python :: 3',
111 | 'Programming Language :: Python :: 3.6',
112 | 'Programming Language :: Python :: Implementation :: CPython',
113 | 'Programming Language :: Python :: Implementation :: PyPy'
114 | ],
115 | # $ setup.py publish support.
116 | cmdclass={
117 | 'upload': UploadCommand,
118 | },
119 | )
--------------------------------------------------------------------------------
/teta/teta/__init__.py:
--------------------------------------------------------------------------------
1 | from . import config, datasets, metrics, utils
2 | from .eval import Evaluator
3 |
--------------------------------------------------------------------------------
/teta/teta/_timing.py:
--------------------------------------------------------------------------------
1 | import inspect
2 | from functools import wraps
3 | from time import perf_counter
4 |
5 | DO_TIMING = False
6 | DISPLAY_LESS_PROGRESS = False
7 | timer_dict = {}
8 | counter = 0
9 |
10 |
11 | def time(f):
12 | @wraps(f)
13 | def wrap(*args, **kw):
14 | if DO_TIMING:
15 | # Run function with timing
16 | ts = perf_counter()
17 | result = f(*args, **kw)
18 | te = perf_counter()
19 | tt = te - ts
20 |
21 | # Get function name
22 | arg_names = inspect.getfullargspec(f)[0]
23 | if arg_names[0] == "self" and DISPLAY_LESS_PROGRESS:
24 | return result
25 | elif arg_names[0] == "self":
26 | method_name = type(args[0]).__name__ + "." + f.__name__
27 | else:
28 | method_name = f.__name__
29 |
30 | # Record accumulative time in each function for analysis
31 | if method_name in timer_dict.keys():
32 | timer_dict[method_name] += tt
33 | else:
34 | timer_dict[method_name] = tt
35 |
36 | # If code is finished, display timing summary
37 | if method_name == "Evaluator.evaluate":
38 | print("")
39 | print("Timing analysis:")
40 | for key, value in timer_dict.items():
41 | print("%-70s %2.4f sec" % (key, value))
42 | else:
43 | # Get function argument values for printing special arguments of interest
44 | arg_titles = ["tracker", "seq", "cls"]
45 | arg_vals = []
46 | for i, a in enumerate(arg_names):
47 | if a in arg_titles:
48 | arg_vals.append(args[i])
49 | arg_text = "(" + ", ".join(arg_vals) + ")"
50 |
51 | # Display methods and functions with different indentation.
52 | if arg_names[0] == "self":
53 | print("%-74s %2.4f sec" % (" " * 4 + method_name + arg_text, tt))
54 | elif arg_names[0] == "test":
55 | pass
56 | else:
57 | global counter
58 | counter += 1
59 | print("%i %-70s %2.4f sec" % (counter, method_name + arg_text, tt))
60 |
61 | return result
62 | else:
63 | # If config["TIME_PROGRESS"] is false, or config["USE_PARALLEL"] is true, run functions normally without timing.
64 | return f(*args, **kw)
65 |
66 | return wrap
67 |
--------------------------------------------------------------------------------
/teta/teta/config.py:
--------------------------------------------------------------------------------
1 | """Config."""
2 | import argparse
3 | import os
4 |
5 |
6 | def parse_configs():
7 | """Parse command line."""
8 | default_eval_config = get_default_eval_config()
9 | default_eval_config["DISPLAY_LESS_PROGRESS"] = True
10 | default_dataset_config = get_default_dataset_config()
11 | default_metrics_config = {"METRICS": ["TETA"]}
12 | config = {
13 | **default_eval_config,
14 | **default_dataset_config,
15 | **default_metrics_config,
16 | }
17 | parser = argparse.ArgumentParser()
18 | for setting in config.keys():
19 | if type(config[setting]) == list or type(config[setting]) == type(None):
20 | parser.add_argument("--" + setting, nargs="+")
21 | else:
22 | parser.add_argument("--" + setting)
23 | args = parser.parse_args().__dict__
24 | for setting in args.keys():
25 | if args[setting] is not None:
26 | if type(config[setting]) == type(True):
27 | if args[setting] == "True":
28 | x = True
29 | elif args[setting] == "False":
30 | x = False
31 | else:
32 | raise Exception(
33 | f"Command line parameter {setting} must be True/False"
34 | )
35 | elif type(config[setting]) == type(1):
36 | x = int(args[setting])
37 | elif type(args[setting]) == type(None):
38 | x = None
39 | else:
40 | x = args[setting]
41 | config[setting] = x
42 | eval_config = {k: v for k, v in config.items() if k in default_eval_config.keys()}
43 | dataset_config = {
44 | k: v for k, v in config.items() if k in default_dataset_config.keys()
45 | }
46 | metrics_config = {
47 | k: v for k, v in config.items() if k in default_metrics_config.keys()
48 | }
49 |
50 | return eval_config, dataset_config, metrics_config
51 |
52 |
53 | def get_default_eval_config():
54 | """Returns the default config values for evaluation."""
55 | code_path = get_code_path()
56 | default_config = {
57 | "USE_PARALLEL": True,
58 | "NUM_PARALLEL_CORES": 8,
59 | "BREAK_ON_ERROR": True,
60 | "RETURN_ON_ERROR": False,
61 | "LOG_ON_ERROR": os.path.join(code_path, "error_log.txt"),
62 | "PRINT_RESULTS": True,
63 | "PRINT_ONLY_COMBINED": True,
64 | "PRINT_CONFIG": True,
65 | "TIME_PROGRESS": True,
66 | "DISPLAY_LESS_PROGRESS": True,
67 | "OUTPUT_SUMMARY": True,
68 | "OUTPUT_EMPTY_CLASSES": True,
69 | "OUTPUT_TEM_RAW_DATA": True,
70 | "OUTPUT_PER_SEQ_RES": True,
71 | }
72 | return default_config
73 |
74 |
75 | def get_default_dataset_config():
76 | """Default class config values"""
77 | code_path = get_code_path()
78 | default_config = {
79 | "GT_FOLDER": os.path.join(
80 | code_path, "data/gt/tao/tao_training"
81 | ), # Location of GT data
82 | "TRACKERS_FOLDER": os.path.join(
83 | code_path, "data/trackers/tao/tao_training"
84 | ), # Trackers location
85 | "OUTPUT_FOLDER": None, # Where to save eval results (if None, same as TRACKERS_FOLDER)
86 | "TRACKERS_TO_EVAL": ['TETer'], # Filenames of trackers to eval (if None, all in folder)
87 | "CLASSES_TO_EVAL": None, # Classes to eval (if None, all classes)
88 | "SPLIT_TO_EVAL": "training", # Valid: 'training', 'val'
89 | "PRINT_CONFIG": True, # Whether to print current config
90 | "TRACKER_SUB_FOLDER": "data", # Tracker files are in TRACKER_FOLDER/tracker_name/TRACKER_SUB_FOLDER
91 | "OUTPUT_SUB_FOLDER": "", # Output files are saved in OUTPUT_FOLDER/tracker_name/OUTPUT_SUB_FOLDER
92 | "TRACKER_DISPLAY_NAMES": None, # Names of trackers to display, if None: TRACKERS_TO_EVAL
93 | "MAX_DETECTIONS": 0, # Number of maximal allowed detections per image (0 for unlimited)
94 | }
95 | return default_config
96 |
97 |
98 | def init_config(config, default_config, name=None):
99 | """Initialize non-given config values with defaults."""
100 | if config is None:
101 | config = default_config
102 | else:
103 | for k in default_config.keys():
104 | if k not in config.keys():
105 | config[k] = default_config[k]
106 | if name and config["PRINT_CONFIG"]:
107 | print("\n%s Config:" % name)
108 | for c in config.keys():
109 | print("%-20s : %-30s" % (c, config[c]))
110 | return config
111 |
112 |
113 | def update_config(config):
114 | """
115 | Parse the arguments of a script and updates the config values for a given value if specified in the arguments.
116 | :param config: the config to update
117 | :return: the updated config
118 | """
119 | parser = argparse.ArgumentParser()
120 | for setting in config.keys():
121 | if type(config[setting]) == list or type(config[setting]) == type(None):
122 | parser.add_argument("--" + setting, nargs="+")
123 | else:
124 | parser.add_argument("--" + setting)
125 | args = parser.parse_args().__dict__
126 | for setting in args.keys():
127 | if args[setting] is not None:
128 | if type(config[setting]) == type(True):
129 | if args[setting] == "True":
130 | x = True
131 | elif args[setting] == "False":
132 | x = False
133 | else:
134 | raise Exception(
135 | "Command line parameter " + setting + "must be True or False"
136 | )
137 | elif type(config[setting]) == type(1):
138 | x = int(args[setting])
139 | elif type(args[setting]) == type(None):
140 | x = None
141 | else:
142 | x = args[setting]
143 | config[setting] = x
144 | return config
145 |
146 |
147 | def get_code_path():
148 | """Get base path where code is"""
149 | return os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
150 |
--------------------------------------------------------------------------------
/teta/teta/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | """Datasets."""
2 | from .coco import COCO
3 | from .tao import TAO
4 | from .bdd import BDD
5 | from .coco_mots import COCOMOTS
6 | from .bdd_mots import BDDMOTS
--------------------------------------------------------------------------------
/teta/teta/metrics/__init__.py:
--------------------------------------------------------------------------------
1 | from .teta import TETA
2 |
--------------------------------------------------------------------------------
/teta/teta/metrics/_base_metric.py:
--------------------------------------------------------------------------------
1 | from abc import ABC, abstractmethod
2 |
3 | import numpy as np
4 |
5 | from .. import _timing
6 | from ..utils import TrackEvalException
7 |
8 |
9 | class _BaseMetric(ABC):
10 | @abstractmethod
11 | def __init__(self):
12 | self.plottable = False
13 | self.integer_fields = []
14 | self.float_fields = []
15 | self.array_labels = []
16 | self.integer_array_fields = []
17 | self.float_array_fields = []
18 | self.fields = []
19 | self.summary_fields = []
20 | self.registered = False
21 |
22 | #####################################################################
23 | # Abstract functions for subclasses to implement
24 |
25 | @_timing.time
26 | @abstractmethod
27 | def eval_sequence(self, data):
28 | ...
29 |
30 | @abstractmethod
31 | def combine_sequences(self, all_res):
32 | ...
33 |
34 | @abstractmethod
35 | def combine_classes_class_averaged(self, all_res, ignore_empty=False):
36 | ...
37 |
38 | @abstractmethod
39 | def combine_classes_det_averaged(self, all_res):
40 | ...
41 |
42 | def plot_single_tracker_results(self, all_res, tracker, output_folder, cls):
43 | """Plot results, only valid for metrics with self.plottable."""
44 | if self.plottable:
45 | raise NotImplementedError(
46 | f"plot_results is not implemented for metric {self.get_name()}"
47 | )
48 | else:
49 | pass
50 |
51 | #####################################################################
52 | # Helper functions which are useful for all metrics:
53 |
54 | @classmethod
55 | def get_name(cls):
56 | return cls.__name__
57 |
58 | @staticmethod
59 | def _combine_sum(all_res, field):
60 | """Combine sequence results via sum"""
61 | return sum([all_res[k][field] for k in all_res.keys()])
62 |
63 | @staticmethod
64 | def _combine_weighted_av(all_res, field, comb_res, weight_field):
65 | """Combine sequence results via weighted average."""
66 | return sum(
67 | [all_res[k][field] * all_res[k][weight_field] for k in all_res.keys()]
68 | ) / np.maximum(1.0, comb_res[weight_field])
69 |
70 | def print_table(self, table_res, tracker, cls):
71 | """Print table of results for all sequences."""
72 | print("")
73 | metric_name = self.get_name()
74 | self._row_print(
75 | [metric_name + ": " + tracker + "-" + cls] + self.summary_fields
76 | )
77 | for seq, results in sorted(table_res.items()):
78 | if seq == "COMBINED_SEQ":
79 | continue
80 | summary_res = self._summary_row(results)
81 | self._row_print([seq] + summary_res)
82 | summary_res = self._summary_row(table_res["COMBINED_SEQ"])
83 | self._row_print(["COMBINED"] + summary_res)
84 |
85 | def _summary_row(self, results_):
86 | vals = []
87 | for h in self.summary_fields:
88 | if h in self.float_array_fields:
89 | vals.append("{0:1.5g}".format(100 * np.mean(results_[h])))
90 | elif h in self.float_fields:
91 | vals.append("{0:1.5g}".format(100 * float(results_[h])))
92 | elif h in self.integer_fields:
93 | vals.append("{0:d}".format(int(results_[h])))
94 | else:
95 | raise NotImplementedError(
96 | "Summary function not implemented for this field type."
97 | )
98 | return vals
99 |
100 | @staticmethod
101 | def _row_print(*argv):
102 | """Print results in evenly spaced rows, with more space in first row."""
103 | if len(argv) == 1:
104 | argv = argv[0]
105 | to_print = "%-35s" % argv[0]
106 | for v in argv[1:]:
107 | to_print += "%-10s" % str(v)
108 | print(to_print)
109 |
110 | def summary_results(self, table_res):
111 | """Return a simple summary of final results for a tracker."""
112 | return dict(
113 | zip(self.summary_fields, self._summary_row(table_res["COMBINED_SEQ"]),)
114 | )
115 |
116 | def detailed_results(self, table_res):
117 | """Return detailed final results for a tracker."""
118 | # Get detailed field information
119 | detailed_fields = self.float_fields + self.integer_fields
120 | for h in self.float_array_fields + self.integer_array_fields:
121 | for alpha in [int(100 * x) for x in self.array_labels]:
122 | detailed_fields.append(h + "___" + str(alpha))
123 | detailed_fields.append(h + "___AUC")
124 |
125 | # Get detailed results
126 | detailed_results = {}
127 | for seq, res in table_res.items():
128 | detailed_row = self._detailed_row(res)
129 | if len(detailed_row) != len(detailed_fields):
130 | raise TrackEvalException(
131 | f"Field names and data have different sizes "
132 | f"({len(detailed_row)} and {len(detailed_fields)})"
133 | )
134 | detailed_results[seq] = dict(zip(detailed_fields, detailed_row))
135 | return detailed_results
136 |
137 | def _detailed_row(self, res):
138 | detailed_row = []
139 | for h in self.float_fields + self.integer_fields:
140 | detailed_row.append(res[h])
141 | for h in self.float_array_fields + self.integer_array_fields:
142 | for i, _ in enumerate([int(100 * x) for x in self.array_labels]):
143 | detailed_row.append(res[h][i])
144 | detailed_row.append(np.mean(res[h]))
145 | return detailed_row
146 |
--------------------------------------------------------------------------------
/teta/teta/utils.py:
--------------------------------------------------------------------------------
1 | import csv
2 | import os
3 | from collections import OrderedDict
4 |
5 |
6 | def validate_metrics_list(metrics_list):
7 | """Get names of metric class and ensures they are unique, further checks that the fields within each metric class
8 | do not have overlapping names.
9 | """
10 | metric_names = [metric.get_name() for metric in metrics_list]
11 | # check metric names are unique
12 | if len(metric_names) != len(set(metric_names)):
13 | raise TrackEvalException(
14 | "Code being run with multiple metrics of the same name"
15 | )
16 | fields = []
17 | for m in metrics_list:
18 | fields += m.fields
19 | # check metric fields are unique
20 | if len(fields) != len(set(fields)):
21 | raise TrackEvalException(
22 | "Code being run with multiple metrics with fields of the same name"
23 | )
24 | return metric_names
25 |
26 |
27 | def get_track_id_str(ann):
28 | """Get name of track ID in annotation."""
29 | if "track_id" in ann:
30 | tk_str = "track_id"
31 | elif "instance_id" in ann:
32 | tk_str = "instance_id"
33 | elif "scalabel_id" in ann:
34 | tk_str = "scalabel_id"
35 | else:
36 | assert False, "No track/instance ID."
37 | return tk_str
38 |
39 |
40 | class TrackEvalException(Exception):
41 | """Custom exception for catching expected errors."""
42 |
43 | ...
44 |
--------------------------------------------------------------------------------
/teter/VERSION:
--------------------------------------------------------------------------------
1 | 0.1.0
2 |
--------------------------------------------------------------------------------
/teter/__init__.py:
--------------------------------------------------------------------------------
1 | from .version import __version__, short_version
2 |
3 | __all__ = ["__version__", "short_version"]
4 |
--------------------------------------------------------------------------------
/teter/apis/__init__.py:
--------------------------------------------------------------------------------
1 | from .inference import inference_model, init_model
2 | from .test import multi_gpu_test, single_gpu_test
3 | from .train import train_model
4 |
5 | __all__ = [
6 | "init_model",
7 | "inference_model",
8 | "multi_gpu_test",
9 | "single_gpu_test",
10 | "train_model",
11 | ]
12 |
--------------------------------------------------------------------------------
/teter/apis/inference.py:
--------------------------------------------------------------------------------
1 | import mmcv
2 | import numpy as np
3 | import torch
4 | import warnings
5 | from mmcv.ops import RoIPool
6 | from mmcv.parallel import collate, scatter
7 | from mmcv.runner import load_checkpoint
8 | from mmdet.core import get_classes
9 | from mmdet.datasets import replace_ImageToTensor
10 | from mmdet.datasets.pipelines import Compose
11 |
12 | from teter.models import build_model
13 |
14 |
15 | def init_model(config, checkpoint=None, device="cuda:0", cfg_options=None):
16 | """Initialize a detector from config file.
17 |
18 | Args:
19 | config (str or :obj:`mmcv.Config`): Config file path or the config
20 | object.
21 | checkpoint (str, optional): Checkpoint path. If left as None, the model
22 | will not load any weights.
23 | cfg_options (dict): Options to override some settings in the used
24 | config.
25 |
26 | Returns:
27 | nn.Module: The constructed detector.
28 | """
29 | if isinstance(config, str):
30 | config = mmcv.Config.fromfile(config)
31 | elif not isinstance(config, mmcv.Config):
32 | raise TypeError(
33 | "config must be a filename or Config object, " f"but got {type(config)}"
34 | )
35 | if cfg_options is not None:
36 | config.merge_from_dict(cfg_options)
37 | config.model.pretrained = None
38 | config.model.train_cfg = None
39 | model = build_model(config.model, test_cfg=config.get("test_cfg"))
40 | if checkpoint is not None:
41 | map_loc = "cpu" if device == "cpu" else None
42 | checkpoint = load_checkpoint(model, checkpoint, map_location=map_loc)
43 | if "CLASSES" in checkpoint["meta"]:
44 | model.CLASSES = checkpoint["meta"]["CLASSES"]
45 | else:
46 | warnings.simplefilter("once")
47 | warnings.warn(
48 | "Class names are not saved in the checkpoint's "
49 | "meta data, use COCO classes by default."
50 | )
51 | model.CLASSES = get_classes("coco")
52 | model.cfg = config # save the config in the model for convenience
53 | model.to(device)
54 | model.eval()
55 | return model
56 |
57 |
58 | def inference_model(model, imgs, frame_id):
59 | if isinstance(imgs, (list, tuple)):
60 | is_batch = True
61 | else:
62 | imgs = [imgs]
63 | is_batch = False
64 |
65 | cfg = model.cfg
66 | device = next(model.parameters()).device # model device
67 |
68 | if isinstance(imgs[0], np.ndarray):
69 | cfg = cfg.copy()
70 | # set loading pipeline type
71 | cfg.data.test.pipeline[0].type = "LoadImageFromWebcam"
72 |
73 | cfg.data.test.pipeline = replace_ImageToTensor(cfg.data.test.pipeline)
74 | test_pipeline = Compose(cfg.data.test.pipeline)
75 |
76 | datas = []
77 | for img in imgs:
78 | # prepare data
79 | if isinstance(img, np.ndarray):
80 | # directly add img
81 | data = dict(img=img, frame_id=frame_id)
82 | else:
83 | # add information into dict
84 | data = dict(img_info=dict(filename=img, frame_id=frame_id), img_prefix=None)
85 | # build the data pipeline
86 |
87 | data = test_pipeline(data)
88 | datas.append(data)
89 |
90 | data = collate(datas, samples_per_gpu=len(imgs))
91 | # just get the actual data from DataContainer
92 | data["img_metas"] = [img_metas.data[0] for img_metas in data["img_metas"]]
93 | data["img"] = [img.data[0] for img in data["img"]]
94 | if next(model.parameters()).is_cuda:
95 | # scatter to specified GPU
96 | data = scatter(data, [device])[0]
97 | else:
98 | for m in model.modules():
99 | assert not isinstance(
100 | m, RoIPool
101 | ), "CPU inference with RoIPool is not supported currently."
102 |
103 | # forward the model
104 | with torch.no_grad():
105 | results = model(return_loss=False, rescale=True, detection_only=True, **data)
106 |
107 | if not is_batch:
108 | return results[0]
109 | else:
110 | return results
111 |
112 |
113 | def show_result_pyplot(
114 | model,
115 | img,
116 | result,
117 | score_thr=0.3,
118 | fig_size=(15, 10),
119 | title="result",
120 | block=True,
121 | wait_time=0,
122 | ):
123 | """Visualize the detection results on the image.
124 |
125 | Args:
126 | model (nn.Module): The loaded detector.
127 | img (str or np.ndarray): Image filename or loaded image.
128 | result (tuple[list] or list): The detection result, can be either
129 | (bbox, segm) or just bbox.
130 | score_thr (float): The threshold to visualize the bboxes and masks.
131 | fig_size (tuple): Figure size of the pyplot figure.
132 | title (str): Title of the pyplot figure.
133 | block (bool): Whether to block GUI. Default: True
134 | wait_time (float): Value of waitKey param.
135 | Default: 0.
136 | """
137 | warnings.warn('"block" will be deprecated in v2.9.0,' 'Please use "wait_time"')
138 | warnings.warn('"fig_size" are deprecated and takes no effect.')
139 | if hasattr(model, "module"):
140 | model = model.module
141 | model.show_result(
142 | img,
143 | result,
144 | score_thr=score_thr,
145 | show=True,
146 | wait_time=wait_time,
147 | win_name=title,
148 | bbox_color=(72, 101, 241),
149 | text_color=(72, 101, 241),
150 | )
151 |
--------------------------------------------------------------------------------
/teter/apis/test.py:
--------------------------------------------------------------------------------
1 | import mmcv
2 | import os.path as osp
3 | import shutil
4 | import tempfile
5 | import time
6 | import torch
7 | import torch.distributed as dist
8 | from collections import defaultdict
9 | from mmcv.runner import get_dist_info
10 |
11 |
12 | def single_gpu_test(model, data_loader, show=False, out_dir=None, show_score_thr=0.3):
13 | model.eval()
14 | results = defaultdict(list)
15 | dataset = data_loader.dataset
16 | prog_bar = mmcv.ProgressBar(len(dataset))
17 | for i, data in enumerate(data_loader):
18 | with torch.no_grad():
19 | result = model(return_loss=False, rescale=True, **data)
20 | for k, v in result.items():
21 | results[k].append(v)
22 |
23 | if show or out_dir:
24 | pass # TODO
25 |
26 | batch_size = data["img"][0].size(0)
27 | for _ in range(batch_size):
28 | prog_bar.update()
29 | return results
30 |
31 |
32 | def multi_gpu_test(model, data_loader, tmpdir=None, gpu_collect=False):
33 | """Test model with multiple gpus.
34 |
35 | This method tests model with multiple gpus and collects the results
36 | under two different modes: gpu and cpu modes. By setting 'gpu_collect=True'
37 | it encodes results to gpu tensors and use gpu communication for results
38 | collection. On cpu mode it saves the results on different gpus to 'tmpdir'
39 | and collects them by the rank 0 worker.
40 |
41 | Args:
42 | model (nn.Module): Model to be tested.
43 | data_loader (nn.Dataloader): Pytorch data loader.
44 | tmpdir (str): Path of directory to save the temporary results from
45 | different gpus under cpu mode.
46 | gpu_collect (bool): Option to use either gpu or cpu to collect results.
47 |
48 | Returns:
49 | list: The prediction results.
50 | """
51 | model.eval()
52 | results = defaultdict(list)
53 | dataset = data_loader.dataset
54 | rank, world_size = get_dist_info()
55 | if rank == 0:
56 | prog_bar = mmcv.ProgressBar(len(dataset))
57 | time.sleep(2) # This line can prevent deadlock problem in some cases.
58 | for i, data in enumerate(data_loader):
59 | with torch.no_grad():
60 | result = model(return_loss=False, rescale=True, **data)
61 | for k, v in result.items():
62 | results[k].append(v)
63 |
64 | if rank == 0:
65 | batch_size = (
66 | len(data["img_meta"]._data)
67 | if "img_meta" in data
68 | else data["img"][0].size(0)
69 | )
70 | for _ in range(batch_size * world_size):
71 | prog_bar.update()
72 |
73 | # collect results from all ranks
74 | if gpu_collect:
75 | raise NotImplementedError
76 | else:
77 | results = collect_results_cpu(results, len(dataset), tmpdir)
78 | return results
79 |
80 |
81 | def collect_results_cpu(result_part, size, tmpdir=None):
82 | rank, world_size = get_dist_info()
83 | # create a tmp dir if it is not specified
84 | if tmpdir is None:
85 | MAX_LEN = 512
86 | # 32 is whitespace
87 | dir_tensor = torch.full((MAX_LEN,), 32, dtype=torch.uint8, device="cuda")
88 | if rank == 0:
89 | tmpdir = tempfile.mkdtemp()
90 | tmpdir = torch.tensor(
91 | bytearray(tmpdir.encode()), dtype=torch.uint8, device="cuda"
92 | )
93 | dir_tensor[: len(tmpdir)] = tmpdir
94 | dist.broadcast(dir_tensor, 0)
95 | tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip()
96 | else:
97 | mmcv.mkdir_or_exist(tmpdir)
98 | # dump the part result to the dir
99 | mmcv.dump(result_part, osp.join(tmpdir, f"part_{rank}.pkl"))
100 | dist.barrier()
101 | # collect all parts
102 | if rank != 0:
103 | return None
104 | else:
105 | # load results of all parts from tmp dir
106 | part_list = defaultdict(list)
107 | for i in range(world_size):
108 | part_file = osp.join(tmpdir, f"part_{i}.pkl")
109 | part_file = mmcv.load(part_file)
110 | for k, v in part_file.items():
111 | part_list[k].extend(v)
112 | shutil.rmtree(tmpdir)
113 | return part_list
114 |
--------------------------------------------------------------------------------
/teter/apis/train.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
3 | from mmcv.runner import (HOOKS, DistSamplerSeedHook, EpochBasedRunner,
4 | Fp16OptimizerHook, OptimizerHook, build_optimizer)
5 | from mmcv.utils import build_from_cfg
6 | # from mmdet.core import Fp16OptimizerHook
7 | from mmdet.datasets import build_dataset
8 |
9 | from teter.core import DistEvalHook, EvalHook
10 | from teter.datasets import build_dataloader
11 | from teter.utils import get_root_logger
12 |
13 |
14 | def train_model(
15 | model, dataset, cfg, distributed=False, validate=False, timestamp=None, meta=None
16 | ):
17 | logger = get_root_logger(cfg.log_level)
18 |
19 | # prepare data loaders
20 | dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset]
21 | if "imgs_per_gpu" in cfg.data:
22 | logger.warning(
23 | '"imgs_per_gpu" is deprecated in MMDet V2.0. '
24 | 'Please use "samples_per_gpu" instead'
25 | )
26 | if "samples_per_gpu" in cfg.data:
27 | logger.warning(
28 | f'Got "imgs_per_gpu"={cfg.data.imgs_per_gpu} and '
29 | f'"samples_per_gpu"={cfg.data.samples_per_gpu}, "imgs_per_gpu"'
30 | f"={cfg.data.imgs_per_gpu} is used in this experiments"
31 | )
32 | else:
33 | logger.warning(
34 | 'Automatically set "samples_per_gpu"="imgs_per_gpu"='
35 | f"{cfg.data.imgs_per_gpu} in this experiments"
36 | )
37 | cfg.data.samples_per_gpu = cfg.data.imgs_per_gpu
38 |
39 | data_loaders = [
40 | build_dataloader(
41 | ds,
42 | cfg.data.samples_per_gpu,
43 | cfg.data.workers_per_gpu,
44 | # cfg.gpus will be ignored if distributed
45 | len(cfg.gpu_ids),
46 | dist=distributed,
47 | seed=cfg.seed,
48 | )
49 | for ds in dataset
50 | ]
51 |
52 | # put model on gpus
53 | if distributed:
54 | find_unused_parameters = cfg.get("find_unused_parameters", False)
55 | # Sets the `find_unused_parameters` parameter in
56 | # torch.nn.parallel.DistributedDataParallel
57 | model = MMDistributedDataParallel(
58 | model.cuda(),
59 | device_ids=[torch.cuda.current_device()],
60 | broadcast_buffers=False,
61 | find_unused_parameters=find_unused_parameters,
62 | )
63 | else:
64 | model = MMDataParallel(model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids)
65 |
66 | # build runner
67 | optimizer = build_optimizer(model, cfg.optimizer)
68 | runner = EpochBasedRunner(
69 | model, optimizer=optimizer, work_dir=cfg.work_dir, logger=logger, meta=meta
70 | )
71 | # an ugly workaround to make .log and .log.json filenames the same
72 | runner.timestamp = timestamp
73 |
74 | # fp16 setting
75 | fp16_cfg = cfg.get("fp16", None)
76 | if fp16_cfg is not None:
77 | optimizer_config = Fp16OptimizerHook(
78 | **cfg.optimizer_config, **fp16_cfg, distributed=distributed
79 | )
80 | elif distributed and "type" not in cfg.optimizer_config:
81 | optimizer_config = OptimizerHook(**cfg.optimizer_config)
82 | else:
83 | optimizer_config = cfg.optimizer_config
84 |
85 | # register hooks
86 | runner.register_training_hooks(
87 | cfg.lr_config,
88 | optimizer_config,
89 | cfg.checkpoint_config,
90 | cfg.log_config,
91 | cfg.get("momentum_config", None),
92 | )
93 | if distributed:
94 | runner.register_hook(DistSamplerSeedHook())
95 |
96 | # register eval hooks
97 | if validate:
98 | val_dataset = build_dataset(cfg.data.val, dict(test_mode=True))
99 | val_dataloader = build_dataloader(
100 | val_dataset,
101 | samples_per_gpu=1,
102 | workers_per_gpu=cfg.data.workers_per_gpu,
103 | dist=distributed,
104 | shuffle=False,
105 | )
106 | eval_cfg = cfg.get("evaluation", {})
107 | eval_hook = DistEvalHook if distributed else EvalHook
108 | runner.register_hook(eval_hook(val_dataloader, **eval_cfg))
109 |
110 | # user-defined hooks
111 | if cfg.get("custom_hooks", None):
112 | custom_hooks = cfg.custom_hooks
113 | assert isinstance(
114 | custom_hooks, list
115 | ), f"custom_hooks expect list type, but got {type(custom_hooks)}"
116 | for hook_cfg in cfg.custom_hooks:
117 | assert isinstance(hook_cfg, dict), (
118 | "Each item in custom_hooks expects dict type, but got "
119 | f"{type(hook_cfg)}"
120 | )
121 | hook_cfg = hook_cfg.copy()
122 | priority = hook_cfg.pop("priority", "NORMAL")
123 | hook = build_from_cfg(hook_cfg, HOOKS)
124 | runner.register_hook(hook, priority=priority)
125 |
126 | if cfg.resume_from:
127 | runner.resume(cfg.resume_from)
128 | elif cfg.load_from:
129 | runner.load_checkpoint(cfg.load_from)
130 | runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
131 |
--------------------------------------------------------------------------------
/teter/core/__init__.py:
--------------------------------------------------------------------------------
1 | from .evaluation import * # noqa: F401, F403
2 | from .track import * # noqa: F401, F403
3 | from .utils import * # noqa: F401, F403
4 |
--------------------------------------------------------------------------------
/teter/core/evaluation/__init__.py:
--------------------------------------------------------------------------------
1 | from .eval_hooks import DistEvalHook, EvalHook
2 | from .mot import eval_mot
3 |
4 | __all__ = ["EvalHook", "DistEvalHook", "eval_mot"]
5 |
--------------------------------------------------------------------------------
/teter/core/evaluation/box_track.toml:
--------------------------------------------------------------------------------
1 | remove_ignored = false
2 | ignored_as_class = false
3 |
4 | [ignored_mapping]
5 | "other person" = "pedestrian"
6 | "other vehicle" = "car"
7 | "trailer" = "truck"
8 |
9 | [name_mapping]
10 | bike = "bicycle"
11 | caravan = "car"
12 | motor = "motorcycle"
13 | person = "pedestrian"
14 | van = "car"
15 |
16 | [scalabel]
17 | [scalabel.imageSize]
18 | height = 720
19 | width = 1280
20 |
21 | [[scalabel.attributes]]
22 | name = "crowd"
23 | type = "switch"
24 | tag = "c"
25 |
26 | [[scalabel.categories]]
27 | name = "human"
28 | [[scalabel.categories.subcategories]]
29 | name = "pedestrian"
30 |
31 | [[scalabel.categories.subcategories]]
32 | name = "rider"
33 |
34 | [[scalabel.categories]]
35 | name = "vehicle"
36 | [[scalabel.categories.subcategories]]
37 | name = "car"
38 |
39 | [[scalabel.categories.subcategories]]
40 | name = "truck"
41 |
42 | [[scalabel.categories.subcategories]]
43 | name = "bus"
44 |
45 | [[scalabel.categories.subcategories]]
46 | name = "train"
47 |
48 | [[scalabel.categories]]
49 | name = "bike"
50 | [[scalabel.categories.subcategories]]
51 | name = "motorcycle"
52 |
53 | [[scalabel.categories.subcategories]]
54 | name = "bicycle"
--------------------------------------------------------------------------------
/teter/core/evaluation/eval_hooks.py:
--------------------------------------------------------------------------------
1 | import os.path as osp
2 | import torch.distributed as dist
3 | from mmcv.runner import DistEvalHook as BaseDistEvalHook
4 | from mmcv.runner import EvalHook as BaseEvalHook
5 | from torch.nn.modules.batchnorm import _BatchNorm
6 |
7 |
8 | class EvalHook(BaseEvalHook):
9 | """Please refer to `mmcv.runner.hooks.evaluation.py:EvalHook` for detailed
10 | docstring."""
11 |
12 | def _do_evaluate(self, runner):
13 | """perform evaluation and save ckpt."""
14 | if not self._should_evaluate(runner):
15 | return
16 |
17 | if (
18 | hasattr(self.dataloader.dataset, "load_as_video")
19 | and self.dataloader.dataset.load_as_video
20 | ):
21 | from teter.apis import single_gpu_test
22 | else:
23 | from mmdet.apis import single_gpu_test
24 | results = single_gpu_test(runner.model, self.dataloader, show=False)
25 | runner.log_buffer.output["eval_iter_num"] = len(self.dataloader)
26 | key_score = self.evaluate(runner, results)
27 | if self.save_best:
28 | self._save_ckpt(runner, key_score)
29 |
30 |
31 | class DistEvalHook(BaseDistEvalHook):
32 | """Please refer to `mmcv.runner.hooks.evaluation.py:DistEvalHook` for
33 | detailed docstring."""
34 |
35 | def _do_evaluate(self, runner):
36 | """perform evaluation and save ckpt."""
37 | # Synchronization of BatchNorm's buffer (running_mean
38 | # and running_var) is not supported in the DDP of pytorch,
39 | # which may cause the inconsistent performance of models in
40 | # different ranks, so we broadcast BatchNorm's buffers
41 | # of rank 0 to other ranks to avoid this.
42 | if self.broadcast_bn_buffer:
43 | model = runner.model
44 | for name, module in model.named_modules():
45 | if isinstance(module, _BatchNorm) and module.track_running_stats:
46 | dist.broadcast(module.running_var, 0)
47 | dist.broadcast(module.running_mean, 0)
48 |
49 | if not self._should_evaluate(runner):
50 | return
51 |
52 | tmpdir = self.tmpdir
53 | if tmpdir is None:
54 | tmpdir = osp.join(runner.work_dir, ".eval_hook")
55 |
56 | if (
57 | hasattr(self.dataloader.dataset, "load_as_video")
58 | and self.dataloader.dataset.load_as_video
59 | ):
60 | from teter.apis import multi_gpu_test
61 | else:
62 | from mmdet.apis import multi_gpu_test
63 | results = multi_gpu_test(
64 | runner.model, self.dataloader, tmpdir=tmpdir, gpu_collect=self.gpu_collect
65 | )
66 | if runner.rank == 0:
67 | print("\n")
68 | runner.log_buffer.output["eval_iter_num"] = len(self.dataloader)
69 | key_score = self.evaluate(runner, results)
70 |
71 | if self.save_best:
72 | self._save_ckpt(runner, key_score)
73 |
--------------------------------------------------------------------------------
/teter/core/to_bdd100k/__init__.py:
--------------------------------------------------------------------------------
1 | from .transforms import preds2bdd100k
2 |
3 | __all__ = ["preds2bdd100k"]
4 |
--------------------------------------------------------------------------------
/teter/core/to_bdd100k/transforms.py:
--------------------------------------------------------------------------------
1 | import os
2 | import os.path as osp
3 | from scalabel.label.io import save
4 | from scalabel.label.transforms import bbox_to_box2d
5 | from scalabel.label.typing import Frame, Label
6 | from tqdm import tqdm
7 |
8 | from ..evaluation import xyxy2xywh
9 | from .utils import mask_merge_parallel
10 |
11 | CATEGORIES = [
12 | "",
13 | "pedestrian",
14 | "rider",
15 | "car",
16 | "truck",
17 | "bus",
18 | "train",
19 | "motorcycle",
20 | "bicycle",
21 | "traffic light",
22 | "traffic sign",
23 | ]
24 |
25 |
26 | def det_to_bdd100k(dataset, results, out_base, nproc):
27 | bdd100k = []
28 | ann_id = 0
29 | print(f"\nStart converting to BDD100K detection format")
30 | if "bbox_results" in results:
31 | results = results["bbox_results"]
32 | for idx, bboxes_list in tqdm(enumerate(results)):
33 | img_name = dataset.data_infos[idx]["file_name"]
34 | frame = Frame(name=img_name, labels=[])
35 |
36 | for cls_, bboxes in enumerate(bboxes_list):
37 | for bbox in bboxes:
38 | ann_id += 1
39 | label = Label(
40 | id=ann_id,
41 | score=bbox[-1],
42 | box2d=bbox_to_box2d(xyxy2xywh(bbox)),
43 | category=CATEGORIES[cls_ + 1],
44 | )
45 | frame.labels.append(label)
46 | bdd100k.append(frame)
47 |
48 | print(f"\nWriting the converted json")
49 | out_path = osp.join(out_base, "det.json")
50 | save(out_path, bdd100k)
51 |
52 |
53 | def ins_seg_to_bdd100k(dataset, results, out_base, nproc=4):
54 | bdd100k = []
55 | bitmask_base = osp.join(out_base, "ins_seg")
56 | if not osp.exists(bitmask_base):
57 | os.makedirs(bitmask_base)
58 |
59 | if "bbox_results" in results and "segm_results" in results:
60 | results = [
61 | [bbox, segm]
62 | for bbox, segm in zip(results["bbox_results"], results["segm_results"])
63 | ]
64 |
65 | track_dicts = []
66 | img_names = [dataset.data_infos[idx]["file_name"] for idx in range(len(results))]
67 |
68 | print(f"\nStart converting to BDD100K instance segmentation format")
69 | ann_id = 0
70 | for idx, [bboxes_list, segms_list] in enumerate(results):
71 | index = 0
72 | frame = Frame(name=img_names[idx], labels=[])
73 | track_dict = {}
74 | for cls_, (bboxes, segms) in enumerate(zip(bboxes_list, segms_list)):
75 | for bbox, segm in zip(bboxes, segms):
76 | ann_id += 1
77 | index += 1
78 | label = Label(id=str(ann_id), index=index, score=bbox[-1])
79 | frame.labels.append(label)
80 | instance = {"bbox": bbox, "segm": segm, "label": cls_}
81 | track_dict[index] = instance
82 |
83 | bdd100k.append(frame)
84 | track_dicts.append(track_dict)
85 |
86 | print(f"\nWriting the converted json")
87 | out_path = osp.join(out_base, "ins_seg.json")
88 | save(out_path, bdd100k)
89 |
90 | mask_merge_parallel(track_dicts, img_names, bitmask_base, nproc)
91 |
92 |
93 | def box_track_to_bdd100k(dataset, results, out_base, nproc):
94 | bdd100k = []
95 | track_base = osp.join(out_base, "box_track")
96 | if not osp.exists(track_base):
97 | os.makedirs(track_base)
98 |
99 | print(f"\nStart converting to BDD100K box tracking format")
100 | for idx, track_dict in tqdm(enumerate(results["track_results"])):
101 | img_name = dataset.data_infos[idx]["file_name"]
102 | frame_index = dataset.data_infos[idx]["frame_id"]
103 | vid_name = os.path.split(img_name)[0]
104 | frame = Frame(
105 | name=img_name, video_name=vid_name, frame_index=frame_index, labels=[]
106 | )
107 |
108 | for id_, instance in track_dict.items():
109 | bbox = instance["bbox"]
110 | cls_ = instance["label"]
111 | label = Label(
112 | id=id_,
113 | score=bbox[-1],
114 | box2d=bbox_to_box2d(xyxy2xywh(bbox)),
115 | category=CATEGORIES[cls_ + 1],
116 | )
117 | frame.labels.append(label)
118 | bdd100k.append(frame)
119 |
120 | print(f"\nWriting the converted json")
121 | out_path = osp.join(out_base, "box_track.json")
122 | save(out_path, bdd100k)
123 |
124 |
125 | def seg_track_to_bdd100k(dataset, results, out_base, nproc=4):
126 | bitmask_base = osp.join(out_base, "seg_track")
127 | if not osp.exists(bitmask_base):
128 | os.makedirs(bitmask_base)
129 |
130 | print(f"\nStart converting to BDD100K seg tracking format")
131 | img_names = [
132 | dataset.data_infos[idx]["file_name"]
133 | for idx in range(len(results["track_results"]))
134 | ]
135 | mask_merge_parallel(results["track_results"], img_names, bitmask_base, nproc)
136 |
137 |
138 | def preds2bdd100k(dataset, results, tasks, out_base, *args, **kwargs):
139 | metric2func = dict(
140 | det=det_to_bdd100k,
141 | ins_seg=ins_seg_to_bdd100k,
142 | box_track=box_track_to_bdd100k,
143 | seg_track=seg_track_to_bdd100k,
144 | )
145 |
146 | for task in tasks:
147 | metric2func[task](dataset, results, out_base, *args, **kwargs)
148 |
--------------------------------------------------------------------------------
/teter/core/to_bdd100k/utils.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import os
3 | import os.path as osp
4 | import pycocotools.mask as mask_utils
5 | from functools import partial
6 | from multiprocessing import Pool
7 | from PIL import Image
8 | from tqdm import tqdm
9 |
10 | SHAPE = [720, 1280]
11 |
12 |
13 | def mask_prepare(track_dict):
14 | scores, colors, masks = [], [], []
15 | for id_, instance in track_dict.items():
16 | masks.append(mask_utils.decode(instance["segm"]))
17 | colors.append([instance["label"] + 1, 0, id_ >> 8, id_ & 255])
18 | scores.append(instance["bbox"][-1])
19 | return scores, colors, masks
20 |
21 |
22 | def mask_merge(mask_infor, img_name, bitmask_base):
23 | scores, colors, masks = mask_infor
24 | bitmask = np.zeros((*SHAPE, 4), dtype=np.uint8)
25 | sorted_idxs = np.argsort(scores)
26 | for idx in sorted_idxs:
27 | for i in range(4):
28 | bitmask[..., i] = (
29 | bitmask[..., i] * (1 - masks[idx]) + masks[idx] * colors[idx][i]
30 | )
31 | bitmask_path = osp.join(bitmask_base, img_name.replace(".jpg", ".png"))
32 | bitmask_dir = osp.split(bitmask_path)[0]
33 | if not osp.exists(bitmask_dir):
34 | os.makedirs(bitmask_dir)
35 | bitmask = Image.fromarray(bitmask)
36 | bitmask.save(bitmask_path)
37 |
38 |
39 | def mask_merge_parallel(track_dicts, img_names, bitmask_base, nproc):
40 | with Pool(nproc) as pool:
41 | print("\nCollecting mask information")
42 | mask_infors = pool.map(mask_prepare, tqdm(track_dicts))
43 | print("\nMerging overlapped masks.")
44 | pool.starmap(
45 | partial(mask_merge, bitmask_base=bitmask_base),
46 | tqdm(zip(mask_infors, img_names), total=len(mask_infors)),
47 | )
48 |
--------------------------------------------------------------------------------
/teter/core/track/__init__.py:
--------------------------------------------------------------------------------
1 | from .similarity import cal_similarity
2 | from .transforms import restore_result, track2result
3 |
4 | __all__ = ["cal_similarity", "track2result", "restore_result"]
5 |
--------------------------------------------------------------------------------
/teter/core/track/similarity.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn.functional as F
3 |
4 |
5 |
6 | def cal_similarity(key_embeds,
7 | ref_embeds,
8 | method='dot_product',
9 | temperature=-1):
10 |
11 | assert method in ['dot_product', 'cosine']
12 |
13 | if key_embeds.size(0) == 0 or ref_embeds.size(0) == 0:
14 | return torch.zeros((key_embeds.size(0), ref_embeds.size(0)),
15 | device=key_embeds.device)
16 |
17 | if method == 'cosine':
18 | key_embeds = F.normalize(key_embeds, p=2, dim=1)
19 | ref_embeds = F.normalize(ref_embeds, p=2, dim=1)
20 | dists = torch.mm(key_embeds, ref_embeds.t())
21 | if temperature > 0 and temperature <= 1:
22 | dists /= temperature
23 | return dists
24 |
25 | elif method == 'dot_product':
26 |
27 | if temperature>1:
28 | dists = torch.mm(key_embeds, ref_embeds.t())
29 | dists *= temperature
30 | else:
31 | dists = torch.mm(key_embeds, ref_embeds.t())
32 |
33 | return dists
34 |
--------------------------------------------------------------------------------
/teter/core/track/transforms.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 |
4 |
5 | def track2result(bboxes, labels, ids, num_classes):
6 | valid_inds = ids > -1
7 | bboxes = bboxes[valid_inds]
8 | labels = labels[valid_inds]
9 | ids = ids[valid_inds]
10 |
11 | if bboxes.shape[0] == 0:
12 | return [np.zeros((0, 6), dtype=np.float32) for i in range(num_classes)]
13 | else:
14 | if isinstance(bboxes, torch.Tensor):
15 | bboxes = bboxes.cpu().numpy()
16 | labels = labels.cpu().numpy()
17 | ids = ids.cpu().numpy()
18 | return [
19 | np.concatenate((ids[labels == i, None], bboxes[labels == i, :]), axis=1)
20 | for i in range(num_classes)
21 | ]
22 |
23 |
24 | def restore_result(result, return_ids=False):
25 | labels = []
26 | for i, bbox in enumerate(result):
27 | labels.extend([i] * bbox.shape[0])
28 | bboxes = np.concatenate(result, axis=0).astype(np.float32)
29 | labels = np.array(labels, dtype=np.int64)
30 | if return_ids:
31 | ids = bboxes[:, 0].astype(np.int64)
32 | bboxes = bboxes[:, 1:]
33 | return bboxes, labels, ids
34 | else:
35 | return bboxes, labels
36 |
--------------------------------------------------------------------------------
/teter/core/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .visualization import imshow_mot_errors, imshow_tracks
2 |
3 | __all__ = ["imshow_tracks", "imshow_mot_errors"]
4 |
--------------------------------------------------------------------------------
/teter/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from mmdet.datasets.builder import DATASETS, PIPELINES, build_dataset
2 |
3 | from .bdd_video_dataset import BDDVideoDataset
4 | from .builder import build_dataloader
5 | from .coco_video_dataset import CocoVideoDataset
6 | from .parsers import CocoVID
7 | from .pipelines import (LoadMultiImagesFromFile, SeqCollect,
8 | SeqDefaultFormatBundle, SeqLoadAnnotations,
9 | SeqNormalize, SeqPad, SeqRandomFlip, SeqResize)
10 | from .tao_dataset import TaoDataset
11 |
12 | __all__ = [
13 | "DATASETS",
14 | "PIPELINES",
15 | "build_dataloader",
16 | "build_dataset",
17 | "CocoVID",
18 | "BDDVideoDataset",
19 | "CocoVideoDataset",
20 | "LoadMultiImagesFromFile",
21 | "SeqLoadAnnotations",
22 | "SeqResize",
23 | "SeqNormalize",
24 | "SeqRandomFlip",
25 | "SeqPad",
26 | "SeqDefaultFormatBundle",
27 | "SeqCollect",
28 | "TaoDataset",
29 | ]
30 |
--------------------------------------------------------------------------------
/teter/datasets/builder.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import random
3 | from functools import partial
4 | from mmcv.parallel import collate
5 | from mmcv.runner import get_dist_info
6 | from mmdet.datasets.samplers import DistributedGroupSampler, GroupSampler
7 | from torch.utils.data import DataLoader
8 |
9 | from .samplers import DistributedVideoSampler
10 |
11 |
12 | def build_dataloader(
13 | dataset,
14 | samples_per_gpu,
15 | workers_per_gpu,
16 | num_gpus=1,
17 | dist=True,
18 | shuffle=True,
19 | seed=None,
20 | **kwargs
21 | ):
22 | """Build PyTorch DataLoader.
23 |
24 | In distributed training, each GPU/process has a dataloader.
25 | In non-distributed training, there is only one dataloader for all GPUs.
26 |
27 | Args:
28 | dataset (Dataset): A PyTorch dataset.
29 | samples_per_gpu (int): Number of training samples on each GPU, i.e.,
30 | batch size of each GPU.
31 | workers_per_gpu (int): How many subprocesses to use for data loading
32 | for each GPU.
33 | num_gpus (int): Number of GPUs. Only used in non-distributed training.
34 | dist (bool): Distributed training/test or not. Default: True.
35 | shuffle (bool): Whether to shuffle the data at every epoch.
36 | Default: True.
37 | kwargs: any keyword argument to be used to initialize DataLoader
38 |
39 | Returns:
40 | DataLoader: A PyTorch dataloader.
41 | """
42 | rank, world_size = get_dist_info()
43 | if dist:
44 | if shuffle:
45 | sampler = DistributedGroupSampler(
46 | dataset, samples_per_gpu, world_size, rank
47 | )
48 | else:
49 | sampler = DistributedVideoSampler(dataset, world_size, rank, shuffle=False)
50 | batch_size = samples_per_gpu
51 | num_workers = workers_per_gpu
52 | else:
53 | sampler = GroupSampler(dataset, samples_per_gpu) if shuffle else None
54 | batch_size = num_gpus * samples_per_gpu
55 | num_workers = num_gpus * workers_per_gpu
56 |
57 | init_fn = (
58 | partial(worker_init_fn, num_workers=num_workers, rank=rank, seed=seed)
59 | if seed is not None
60 | else None
61 | )
62 |
63 | data_loader = DataLoader(
64 | dataset,
65 | batch_size=batch_size,
66 | sampler=sampler,
67 | num_workers=num_workers,
68 | collate_fn=partial(collate, samples_per_gpu=samples_per_gpu),
69 | pin_memory=False,
70 | worker_init_fn=init_fn,
71 | **kwargs
72 | )
73 |
74 | return data_loader
75 |
76 |
77 | def worker_init_fn(worker_id, num_workers, rank, seed):
78 | # The seed of each worker equals to
79 | # num_worker * rank + worker_id + user_seed
80 | worker_seed = num_workers * rank + worker_id + seed
81 | np.random.seed(worker_seed)
82 | random.seed(worker_seed)
83 |
--------------------------------------------------------------------------------
/teter/datasets/parsers/__init__.py:
--------------------------------------------------------------------------------
1 | from .coco_api import COCO, COCOeval
2 | from .coco_video_parser import CocoVID
3 |
4 | __all__ = ["COCO", "COCOeval", "CocoVID"]
5 |
--------------------------------------------------------------------------------
/teter/datasets/parsers/coco_api.py:
--------------------------------------------------------------------------------
1 | # This file add snake case alias for coco api
2 |
3 | import pycocotools
4 | import warnings
5 | from pycocotools.coco import COCO as _COCO
6 | from pycocotools.cocoeval import COCOeval as _COCOeval
7 |
8 |
9 | class COCO(_COCO):
10 | """This class is almost the same as official pycocotools package.
11 |
12 | It implements some snake case function aliases. So that the COCO class has
13 | the same interface as LVIS class.
14 | """
15 |
16 | def __init__(self, annotation_file=None):
17 | if getattr(pycocotools, "__version__", "0") >= "12.0.2":
18 | warnings.warn(
19 | 'mmpycocotools is deprecated. Please install official pycocotools by "pip install pycocotools"', # noqa: E501
20 | UserWarning,
21 | )
22 | super().__init__(annotation_file=annotation_file)
23 | self.img_ann_map = self.imgToAnns
24 | self.cat_img_map = self.catToImgs
25 |
26 | def get_ann_ids(self, img_ids=[], cat_ids=[], area_rng=[], iscrowd=None):
27 | return self.getAnnIds(img_ids, cat_ids, area_rng, iscrowd)
28 |
29 | def get_cat_ids(self, cat_names=[], sup_names=[], cat_ids=[]):
30 | return self.getCatIds(cat_names, sup_names, cat_ids)
31 |
32 | def get_img_ids(self, img_ids=[], cat_ids=[]):
33 | return self.getImgIds(img_ids, cat_ids)
34 |
35 | def load_anns(self, ids):
36 | return self.loadAnns(ids)
37 |
38 | def load_cats(self, ids):
39 | return self.loadCats(ids)
40 |
41 | def load_imgs(self, ids):
42 | return self.loadImgs(ids)
43 |
44 |
45 | # just for the ease of import
46 | COCOeval = _COCOeval
47 |
--------------------------------------------------------------------------------
/teter/datasets/parsers/coco_video_parser.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from collections import defaultdict
3 | from pycocotools.coco import _isArrayLike
4 |
5 | from .coco_api import COCO
6 |
7 |
8 | class CocoVID(COCO):
9 | def __init__(self, annotation_file=None):
10 | assert annotation_file, "Annotation file must be provided."
11 | super(CocoVID, self).__init__(annotation_file=annotation_file)
12 |
13 | def createIndex(self):
14 | print("creating index...")
15 | anns, cats, imgs, vids = {}, {}, {}, {}
16 | imgToAnns, catToImgs, vidToImgs = (
17 | defaultdict(list),
18 | defaultdict(list),
19 | defaultdict(list),
20 | )
21 |
22 | if "videos" in self.dataset:
23 | for video in self.dataset["videos"]:
24 | vids[video["id"]] = video
25 |
26 | if "annotations" in self.dataset:
27 | for ann in self.dataset["annotations"]:
28 | imgToAnns[ann["image_id"]].append(ann)
29 | anns[ann["id"]] = ann
30 |
31 | if "images" in self.dataset:
32 | for img in self.dataset["images"]:
33 | vidToImgs[img["video_id"]].append(img)
34 | imgs[img["id"]] = img
35 |
36 | if "categories" in self.dataset:
37 | for cat in self.dataset["categories"]:
38 | cats[cat["id"]] = cat
39 |
40 | if "annotations" in self.dataset and "categories" in self.dataset:
41 | for ann in self.dataset["annotations"]:
42 | catToImgs[ann["category_id"]].append(ann["image_id"])
43 |
44 | print("index created!")
45 |
46 | self.anns = anns
47 | self.imgToAnns = imgToAnns
48 | self.catToImgs = catToImgs
49 | self.imgs = imgs
50 | self.cats = cats
51 | self.videos = vids
52 | self.vidToImgs = vidToImgs
53 |
54 | def get_vid_ids(self, vidIds=[]):
55 | vidIds = vidIds if _isArrayLike(vidIds) else [vidIds]
56 |
57 | if len(vidIds) == 0:
58 | ids = self.videos.keys()
59 | else:
60 | ids = set(vidIds)
61 |
62 | return list(ids)
63 |
64 | def get_img_ids_from_vid(self, vidId):
65 | img_infos = self.vidToImgs[vidId]
66 | ids = list(np.zeros([len(img_infos)], dtype=np.int))
67 | for img_info in img_infos:
68 | ids[img_info["frame_id"]] = img_info["id"]
69 | return ids
70 |
71 | def load_vids(self, ids=[]):
72 | if _isArrayLike(ids):
73 | return [self.videos[id] for id in ids]
74 | elif type(ids) == int:
75 | return [self.videos[ids]]
76 |
--------------------------------------------------------------------------------
/teter/datasets/pipelines/__init__.py:
--------------------------------------------------------------------------------
1 | from .formatting import SeqCollect, SeqDefaultFormatBundle, VideoCollect
2 | from .h5backend import HDF5Backend
3 | from .loading import LoadMultiImagesFromFile, SeqLoadAnnotations
4 | from .transforms import (SeqNormalize, SeqPad, SeqPhotoMetricDistortion,
5 | SeqRandomCrop, SeqRandomFlip, SeqResize)
6 |
7 | __all__ = [
8 | "LoadMultiImagesFromFile",
9 | "SeqLoadAnnotations",
10 | "SeqResize",
11 | "SeqNormalize",
12 | "SeqRandomFlip",
13 | "SeqPad",
14 | "SeqDefaultFormatBundle",
15 | "SeqCollect",
16 | "VideoCollect",
17 | "SeqPhotoMetricDistortion",
18 | "SeqRandomCrop",
19 | "HDF5Backend",
20 | ]
21 |
--------------------------------------------------------------------------------
/teter/datasets/pipelines/formatting.py:
--------------------------------------------------------------------------------
1 | from mmcv.parallel import DataContainer as DC
2 | from mmdet.datasets.builder import PIPELINES
3 | from mmdet.datasets.pipelines import Collect, DefaultFormatBundle, to_tensor
4 |
5 |
6 | @PIPELINES.register_module()
7 | class SeqDefaultFormatBundle(DefaultFormatBundle):
8 | def __call__(self, results):
9 | outs = []
10 | for _results in results:
11 | _results = super().__call__(_results)
12 | _results["gt_match_indices"] = DC(to_tensor(_results["gt_match_indices"]))
13 | outs.append(_results)
14 | return outs
15 |
16 |
17 | @PIPELINES.register_module()
18 | class VideoCollect(Collect):
19 | """Collect data from the loader relevant to the specific task.
20 |
21 | This is usually the last stage of the data loader pipeline. Typically keys
22 | is set to some subset of "img", "proposals", "gt_bboxes",
23 | "gt_bboxes_ignore", "gt_labels", and/or "gt_masks".
24 |
25 | The "img_meta" item is always populated. The contents of the "img_meta"
26 | dictionary depends on "meta_keys". By default this includes:
27 |
28 | - "img_shape": shape of the image input to the network as a tuple \
29 | (h, w, c). Note that images may be zero padded on the \
30 | bottom/right if the batch tensor is larger than this shape.
31 |
32 | - "scale_factor": a float indicating the preprocessing scale
33 |
34 | - "flip": a boolean indicating if image flip transform was used
35 |
36 | - "filename": path to the image file
37 |
38 | - "ori_shape": original shape of the image as a tuple (h, w, c)
39 |
40 | - "pad_shape": image shape after padding
41 |
42 | - "img_norm_cfg": a dict of normalization information:
43 |
44 | - mean - per channel mean subtraction
45 | - std - per channel std divisor
46 | - to_rgb - bool indicating if bgr was converted to rgb
47 |
48 | Args:
49 | keys (Sequence[str]): Keys of results to be collected in ``data``.
50 | meta_keys (Sequence[str], optional): Meta keys to be converted to
51 | ``mmcv.DataContainer`` and collected in ``data[img_metas]``.
52 | Default: ``('filename', 'ori_filename', 'ori_shape', 'img_shape',
53 | 'pad_shape', 'scale_factor', 'flip', 'flip_direction',
54 | 'img_norm_cfg')``
55 | """
56 |
57 | def __init__(
58 | self,
59 | keys,
60 | meta_keys=(
61 | "filename",
62 | "ori_filename",
63 | "ori_shape",
64 | "img_shape",
65 | "pad_shape",
66 | "scale_factor",
67 | "flip",
68 | "flip_direction",
69 | "img_norm_cfg",
70 | "frame_id",
71 | ),
72 | ):
73 | self.keys = keys
74 | self.meta_keys = meta_keys
75 |
76 |
77 | @PIPELINES.register_module(force=True)
78 | class SeqCollect(VideoCollect):
79 | def __init__(
80 | self,
81 | keys,
82 | ref_prefix="ref",
83 | meta_keys=(
84 | "filename",
85 | "ori_filename",
86 | "ori_shape",
87 | "img_shape",
88 | "pad_shape",
89 | "scale_factor",
90 | "flip",
91 | "flip_direction",
92 | "img_norm_cfg",
93 | ),
94 | ):
95 | self.keys = keys
96 | self.ref_prefix = ref_prefix
97 | self.meta_keys = meta_keys
98 |
99 | def __call__(self, results):
100 | outs = []
101 | for _results in results:
102 | _results = super().__call__(_results)
103 | outs.append(_results)
104 |
105 | assert len(outs) == 2
106 | data = {}
107 | data.update(outs[0])
108 | for k, v in outs[1].items():
109 | data[f"{self.ref_prefix}_{k}"] = v
110 |
111 | return data
112 |
--------------------------------------------------------------------------------
/teter/datasets/pipelines/h5backend.py:
--------------------------------------------------------------------------------
1 | import h5py
2 | import numpy as np
3 | import os
4 | from mmcv import BaseStorageBackend, FileClient
5 |
6 |
7 | @FileClient.register_backend("hdf5", force=True)
8 | class HDF5Backend(BaseStorageBackend):
9 | def __init__(self, img_db_path=None, vid_db_path=None, type="tao", **kwargs):
10 |
11 | # h5 file path
12 | self.img_db_path = img_db_path
13 | self.vid_db_path = vid_db_path
14 |
15 | self.img_client = None
16 | self.vid_client = None
17 | self.type = type
18 |
19 | def get(self, filepath):
20 | """Get values according to the filepath.
21 | Args:
22 | filepath (str | obj:`Path`): Here, filepath is the lmdb key.
23 | # """
24 |
25 | filepath = str(filepath)
26 | if self.type == "tao":
27 | if self.img_client is None and self.img_db_path is not None:
28 | self.img_client = h5py.File(self.img_db_path, "r")
29 | key_list = filepath.split("/")
30 | value_buf = np.array(
31 | self.img_client[key_list[-4]][key_list[-3]][key_list[-2]][key_list[-1]]
32 | )
33 | elif self.type == "key":
34 | if self.img_client is None and self.img_db_path is not None:
35 | self.img_client = h5py.File(self.img_db_path, "r")
36 | value_buf = self.img_client[filepath]
37 | elif self.type == "lvis":
38 | if self.img_client is None and self.img_db_path is not None:
39 | self.img_client = h5py.File(self.img_db_path, "r")
40 | filefolder, filename = os.path.split(filepath)
41 | value_buf = np.array(self.img_client[filename])
42 | elif self.type == "lasot":
43 | if self.img_client is None and self.img_db_path is not None:
44 | self.img_client = h5py.File(self.img_db_path, "r")
45 | key_list = filepath.split("/")
46 | value_buf = np.array(
47 | self.img_client[key_list[-4]][key_list[-3]][key_list[-2]][key_list[-1]][
48 | "raw"
49 | ]
50 | )[0]
51 | elif self.type == "bdd":
52 | filefolder, filename = os.path.split(filepath)
53 | path, group_name = os.path.split(filefolder)
54 |
55 | if self.vid_client is None and self.vid_db_path is not None:
56 | self.vid_client = h5py.File(self.vid_db_path, "r")
57 | if self.img_client is None and self.img_db_path is not None:
58 | self.img_client = h5py.File(self.img_db_path, "r")
59 | if "/100k/" in filefolder:
60 | value_buf = np.array(self.img_client[filename])
61 | else:
62 | group = self.vid_client[group_name]
63 | value_buf = np.array(group[filename])
64 |
65 | return value_buf
66 |
67 | def get_text(self, filepath):
68 | raise NotImplementedError
69 |
--------------------------------------------------------------------------------
/teter/datasets/pipelines/loading.py:
--------------------------------------------------------------------------------
1 | from mmdet.datasets.builder import PIPELINES
2 | from mmdet.datasets.pipelines import LoadAnnotations, LoadImageFromFile
3 |
4 |
5 | @PIPELINES.register_module()
6 | class LoadMultiImagesFromFile(LoadImageFromFile):
7 | def __init__(self, *args, **kwargs):
8 | super().__init__(*args, **kwargs)
9 |
10 | def __call__(self, results):
11 | outs = []
12 | for _results in results:
13 | _results = super().__call__(_results)
14 | outs.append(_results)
15 | return outs
16 |
17 |
18 | @PIPELINES.register_module()
19 | class SeqLoadAnnotations(LoadAnnotations):
20 | def __init__(self, with_ins_id=False, *args, **kwargs):
21 | super().__init__(*args, **kwargs)
22 | self.with_ins_id = with_ins_id
23 |
24 | def _load_ins_ids(self, results):
25 | """Private function to load label annotations.
26 |
27 | Args:
28 | results (dict): Result dict from :obj:`mmdet.CustomDataset`.
29 |
30 | Returns:
31 | dict: The dict contains loaded label annotations.
32 | """
33 |
34 | results["gt_match_indices"] = results["ann_info"]["match_indices"].copy()
35 |
36 | return results
37 |
38 | def __call__(self, results):
39 | outs = []
40 | for _results in results:
41 | _results = super().__call__(_results)
42 | if self.with_ins_id:
43 | _results = self._load_ins_ids(_results)
44 | outs.append(_results)
45 | return outs
46 |
--------------------------------------------------------------------------------
/teter/datasets/samplers/__init__.py:
--------------------------------------------------------------------------------
1 | from .distributed_video_sampler import DistributedVideoSampler
2 |
3 | __all__ = ["DistributedVideoSampler"]
4 |
--------------------------------------------------------------------------------
/teter/datasets/samplers/distributed_video_sampler.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from torch.utils.data import DistributedSampler as _DistributedSampler
3 |
4 |
5 | class DistributedVideoSampler(_DistributedSampler):
6 | def __init__(self, dataset, num_replicas=None, rank=None, shuffle=False):
7 | super().__init__(dataset, num_replicas=num_replicas, rank=rank)
8 | self.shuffle = shuffle
9 | assert not self.shuffle, "Specific for video sequential testing."
10 | self.num_samples = len(dataset)
11 |
12 | first_frame_indices = []
13 | for i, img_info in enumerate(self.dataset.data_infos):
14 | if img_info["frame_id"] == 0:
15 | first_frame_indices.append(i)
16 |
17 | chunks = np.array_split(first_frame_indices, num_replicas)
18 | split_flags = [c[0] for c in chunks]
19 | split_flags.append(self.num_samples)
20 |
21 | self.indices = [
22 | list(range(split_flags[i], split_flags[i + 1]))
23 | for i in range(self.num_replicas)
24 | ]
25 |
26 | def __iter__(self):
27 | indices = self.indices[self.rank]
28 | return iter(indices)
29 |
--------------------------------------------------------------------------------
/teter/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .builder import MODELS, TRACKERS, build_model, build_tracker
2 | from .losses import * # noqa: F401,F403
3 | from .mot import * # noqa: F401,F403
4 | from .roi_heads import * # noqa: F401,F403
5 | from .trackers import * # noqa: F401,F403
6 |
7 | __all__ = ["MODELS", "TRACKERS", "build_model", "build_tracker"]
8 |
--------------------------------------------------------------------------------
/teter/models/builder.py:
--------------------------------------------------------------------------------
1 | from mmcv.cnn import build_model_from_cfg as build
2 | from mmcv.utils import Registry
3 |
4 | MODELS = Registry("model")
5 | TRACKERS = Registry("tracker")
6 |
7 |
8 | def build_tracker(cfg):
9 | """Build tracker."""
10 | return build(cfg, TRACKERS)
11 |
12 |
13 | def build_model(cfg, train_cfg=None, test_cfg=None):
14 | """Build model."""
15 | return build(cfg, MODELS, dict(train_cfg=train_cfg, test_cfg=test_cfg))
16 |
--------------------------------------------------------------------------------
/teter/models/losses/__init__.py:
--------------------------------------------------------------------------------
1 | from .l2_loss import L2Loss
2 | from .multipos_cross_entropy_loss import MultiPosCrossEntropyLoss
3 | from .unbiased_supcontrat import UnbiasedSupConLoss
4 |
5 | __all__ = ["L2Loss", "MultiPosCrossEntropyLoss", "UnbiasedSupConLoss"]
6 |
--------------------------------------------------------------------------------
/teter/models/losses/l2_loss.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 | import torch.nn as nn
4 | from mmdet.models import LOSSES, weighted_loss
5 |
6 |
7 | @weighted_loss
8 | def l2_loss(pred, target):
9 | """L2 loss.
10 |
11 | Args:
12 | pred (torch.Tensor): The prediction.
13 | target (torch.Tensor): The learning target of the prediction.
14 |
15 | Returns:
16 | torch.Tensor: Calculated loss
17 | """
18 | assert pred.size() == target.size() and target.numel() > 0
19 | loss = torch.abs(pred - target) ** 2
20 | return loss
21 |
22 |
23 | @LOSSES.register_module(force=True)
24 | class L2Loss(nn.Module):
25 | """L2 loss.
26 |
27 | Args:
28 | reduction (str, optional): The method to reduce the loss.
29 | Options are "none", "mean" and "sum".
30 | loss_weight (float, optional): The weight of loss.
31 | """
32 |
33 | def __init__(
34 | self,
35 | neg_pos_ub=-1,
36 | pos_margin=-1,
37 | neg_margin=-1,
38 | hard_mining=False,
39 | reduction="mean",
40 | loss_weight=1.0,
41 | ):
42 | super(L2Loss, self).__init__()
43 | self.neg_pos_ub = neg_pos_ub
44 | self.pos_margin = pos_margin
45 | self.neg_margin = neg_margin
46 | self.hard_mining = hard_mining
47 | self.reduction = reduction
48 | self.loss_weight = loss_weight
49 |
50 | def forward(
51 | self, pred, target, weight=None, avg_factor=None, reduction_override=None
52 | ):
53 | """Forward function.
54 |
55 | Args:
56 | pred (torch.Tensor): The prediction.
57 | target (torch.Tensor): The learning target of the prediction.
58 | weight (torch.Tensor, optional): The weight of loss for each
59 | prediction. Defaults to None.
60 | avg_factor (int, optional): Average factor that is used to average
61 | the loss. Defaults to None.
62 | reduction_override (str, optional): The reduction method used to
63 | override the original reduction method of the loss.
64 | Defaults to None.
65 | """
66 | assert reduction_override in (None, "none", "mean", "sum")
67 | reduction = reduction_override if reduction_override else self.reduction
68 | pred, weight, avg_factor = self.update_weight(pred, target, weight, avg_factor)
69 | loss_bbox = self.loss_weight * l2_loss(
70 | pred, target, weight, reduction=reduction, avg_factor=avg_factor
71 | )
72 | return loss_bbox
73 |
74 | def update_weight(self, pred, target, weight, avg_factor):
75 | if weight is None:
76 | weight = target.new_ones(target.size())
77 | invalid_inds = weight <= 0
78 | target[invalid_inds] = -1
79 | pos_inds = target == 1
80 | neg_inds = target == 0
81 |
82 | if self.pos_margin > 0:
83 | pred[pos_inds] -= self.pos_margin
84 | if self.neg_margin > 0:
85 | pred[neg_inds] -= self.neg_margin
86 | pred = torch.clamp(pred, min=0, max=1)
87 |
88 | num_pos = int((target == 1).sum())
89 | num_neg = int((target == 0).sum())
90 | if self.neg_pos_ub > 0 and num_neg / num_pos > self.neg_pos_ub:
91 | num_neg = num_pos * self.neg_pos_ub
92 | neg_idx = torch.nonzero(target == 0, as_tuple=False)
93 |
94 | if self.hard_mining:
95 | costs = l2_loss(pred, target, reduction="none")[
96 | neg_idx[:, 0], neg_idx[:, 1]
97 | ].detach()
98 | neg_idx = neg_idx[costs.topk(num_neg)[1], :]
99 | else:
100 | neg_idx = self.random_choice(neg_idx, num_neg)
101 |
102 | new_neg_inds = neg_inds.new_zeros(neg_inds.size()).bool()
103 | new_neg_inds[neg_idx[:, 0], neg_idx[:, 1]] = True
104 |
105 | invalid_neg_inds = torch.logical_xor(neg_inds, new_neg_inds)
106 | weight[invalid_neg_inds] = 0
107 |
108 | avg_factor = (weight > 0).sum()
109 | return pred, weight, avg_factor
110 |
111 | @staticmethod
112 | def random_choice(gallery, num):
113 | """Random select some elements from the gallery.
114 |
115 | It seems that Pytorch's implementation is slower than numpy so we use
116 | numpy to randperm the indices.
117 | """
118 | assert len(gallery) >= num
119 | if isinstance(gallery, list):
120 | gallery = np.array(gallery)
121 | cands = np.arange(len(gallery))
122 | np.random.shuffle(cands)
123 | rand_inds = cands[:num]
124 | if not isinstance(gallery, np.ndarray):
125 | rand_inds = torch.from_numpy(rand_inds).long().to(gallery.device)
126 | return gallery[rand_inds]
127 |
--------------------------------------------------------------------------------
/teter/models/losses/multipos_cross_entropy_loss.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | from mmdet.models import LOSSES, weight_reduce_loss
4 |
5 |
6 | def multi_pos_cross_entropy(
7 | pred, label,
8 | weight=None,
9 | reduction="mean",
10 | avg_factor=None,
11 | version="ori",
12 | pos_normalize=True
13 | ):
14 |
15 | if version == "unbiased":
16 |
17 | valid_mask = label.sum(1) != 0
18 | pred = pred[valid_mask]
19 | label = label[valid_mask]
20 | weight = weight[valid_mask]
21 | logits_max, _ = torch.max(pred, dim=1, keepdim=True)
22 | logits = pred - logits_max.detach()
23 |
24 | if pos_normalize:
25 | pos_norm = torch.div(label, label.sum(1).reshape(-1, 1))
26 | exp_logits = (torch.exp(logits)) * pos_norm + (
27 | torch.exp(logits)
28 | ) * torch.logical_not(label)
29 | else:
30 | exp_logits = torch.exp(logits)
31 | exp_logits_input = exp_logits.sum(1, keepdim=True)
32 | log_prob = logits - torch.log(exp_logits_input)
33 |
34 | mean_log_prob_pos = (label * log_prob).sum(1) / label.sum(1)
35 | loss = -mean_log_prob_pos
36 |
37 | elif version == "ori":
38 | # a more numerical stable implementation.
39 | pos_inds = label == 1
40 | neg_inds = label == 0
41 | pred_pos = pred * pos_inds.float()
42 | pred_neg = pred * neg_inds.float()
43 | # use -inf to mask out unwanted elements.
44 | pred_pos[neg_inds] = pred_pos[neg_inds] + float("inf")
45 | pred_neg[pos_inds] = pred_neg[pos_inds] + float("-inf")
46 |
47 | _pos_expand = torch.repeat_interleave(pred_pos, pred.shape[1], dim=1)
48 | _neg_expand = pred_neg.repeat(1, pred.shape[1])
49 |
50 | x = torch.nn.functional.pad((_neg_expand - _pos_expand), (0, 1), "constant", 0)
51 | loss = torch.logsumexp(x, dim=1)
52 |
53 | # apply weights and do the reduction
54 | if weight is not None:
55 | weight = weight.float()
56 | loss = weight_reduce_loss(
57 | loss, weight=weight, reduction=reduction, avg_factor=avg_factor
58 | )
59 |
60 | return loss
61 |
62 |
63 | @LOSSES.register_module(force=True)
64 | class MultiPosCrossEntropyLoss(nn.Module):
65 | def __init__(self, reduction="mean", loss_weight=1.0, version="v3"):
66 | super(MultiPosCrossEntropyLoss, self).__init__()
67 | self.reduction = reduction
68 | self.loss_weight = loss_weight
69 | self.version = version
70 |
71 | def forward(
72 | self,
73 | cls_score,
74 | label,
75 | weight=None,
76 | avg_factor=None,
77 | reduction_override=None,
78 | **kwargs
79 | ):
80 | assert cls_score.size() == label.size()
81 | assert reduction_override in (None, "none", "mean", "sum")
82 | reduction = reduction_override if reduction_override else self.reduction
83 | loss_cls = self.loss_weight * multi_pos_cross_entropy(
84 | cls_score,
85 | label,
86 | weight,
87 | reduction=reduction,
88 | avg_factor=avg_factor,
89 | version=self.version,
90 | **kwargs
91 | )
92 | return loss_cls
93 |
--------------------------------------------------------------------------------
/teter/models/losses/unbiased_supcontrat.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 |
3 | import torch
4 | import torch.nn as nn
5 | from mmdet.models import LOSSES
6 |
7 |
8 | @LOSSES.register_module()
9 | class UnbiasedSupConLoss(nn.Module):
10 | def __init__(
11 | self,
12 | temperature=0.07,
13 | contrast_mode="all",
14 | base_temperature=0.07,
15 | pos_normalize=True,
16 | loss_weight=1,
17 | ):
18 | super(UnbiasedSupConLoss, self).__init__()
19 | self.temperature = temperature
20 | self.contrast_mode = contrast_mode
21 | self.base_temperature = base_temperature
22 | self.pos_normalize = pos_normalize
23 | self.loss_weight = loss_weight
24 |
25 | def forward(self, features, labels=None, mask=None):
26 | """Compute loss for model. If both `labels` and `mask` are None,
27 | Args:
28 | features: hidden vector of shape [bsz, n_views, ...].
29 | labels: ground truth of shape [bsz].
30 | mask: contrastive mask of shape [bsz, bsz], mask_{i,j}=1 if sample j
31 | has the same class as sample i. Can be asymmetric.
32 | Returns:
33 | A loss scalar.
34 | """
35 | device = torch.device("cuda") if features.is_cuda else torch.device("cpu")
36 |
37 | if len(features.shape) < 3:
38 | raise ValueError(
39 | "`features` needs to be [bsz, n_views, ...],"
40 | "at least 3 dimensions are required"
41 | )
42 | if len(features.shape) > 3:
43 | features = features.view(features.shape[0], features.shape[1], -1)
44 |
45 | batch_size = features.shape[0]
46 | if labels is not None and mask is not None:
47 | raise ValueError("Cannot define both `labels` and `mask`")
48 | elif labels is None and mask is None:
49 | mask = torch.eye(batch_size, dtype=torch.float32).to(device)
50 | elif labels is not None:
51 | labels = labels.contiguous().view(-1, 1)
52 | if labels.shape[0] != batch_size:
53 | raise ValueError("Num of labels does not match num of features")
54 | mask = torch.eq(labels, labels.T).float().to(device)
55 | valid_mask = mask.sum(1) != 1
56 | labels = labels[valid_mask]
57 | features = features[valid_mask]
58 | mask = torch.eq(labels, labels.T).float().to(device)
59 | batch_size = features.shape[0]
60 | if batch_size == 0:
61 | return torch.tensor([0.0], requires_grad=True)
62 | else:
63 | mask = mask.float().to(device)
64 |
65 | contrast_count = features.shape[1]
66 | contrast_feature = torch.cat(torch.unbind(features, dim=1), dim=0)
67 | if self.contrast_mode == "one":
68 | anchor_feature = features[:, 0]
69 | anchor_count = 1
70 | elif self.contrast_mode == "all":
71 | anchor_feature = contrast_feature
72 | anchor_count = contrast_count
73 | else:
74 | raise ValueError("Unknown mode: {}".format(self.contrast_mode))
75 |
76 | # compute logits
77 | anchor_dot_contrast = torch.div(
78 | torch.matmul(anchor_feature, contrast_feature.T), self.temperature
79 | )
80 | # for numerical stability
81 | if min(anchor_dot_contrast.shape) != 0:
82 | # return torch.tensor(0.0).to(anchor_dot_contrast.device)
83 | logits_max, _ = torch.max(anchor_dot_contrast, dim=1, keepdim=True)
84 | logits = anchor_dot_contrast - logits_max.detach()
85 | else:
86 | logits = anchor_dot_contrast
87 | # tile mask
88 | mask = mask.repeat(anchor_count, contrast_count)
89 |
90 | # mask-out self-contrast cases
91 | logits_mask = torch.scatter(
92 | torch.ones_like(mask),
93 | 1,
94 | torch.arange(batch_size * anchor_count).view(-1, 1).to(device),
95 | 0,
96 | )
97 | mask = mask * logits_mask
98 |
99 | # compute log_prob
100 | if self.pos_normalize:
101 | pos_norm = torch.div(mask, mask.sum(1).reshape(-1, 1))
102 | exp_logits = (torch.exp(logits) * logits_mask) * pos_norm + (
103 | torch.exp(logits) * logits_mask
104 | ) * torch.logical_not(mask)
105 | else:
106 | exp_logits = torch.exp(logits) * logits_mask
107 | exp_logits_input = exp_logits.sum(1, keepdim=True)
108 | log_prob = logits - torch.log(exp_logits_input)
109 |
110 | # compute mean of log-likelihood over positive
111 | mean_log_prob_pos = (mask * log_prob).sum(1) / mask.sum(1)
112 |
113 | # loss
114 |
115 | loss = -(self.temperature / self.base_temperature) * mean_log_prob_pos
116 | loss = loss.view(anchor_count, batch_size).mean()
117 |
118 | return loss * self.loss_weight
119 |
--------------------------------------------------------------------------------
/teter/models/mot/__init__.py:
--------------------------------------------------------------------------------
1 | from .teter import TETer
2 |
3 | __all__ = ["TETer"]
4 |
--------------------------------------------------------------------------------
/teter/models/mot/teter.py:
--------------------------------------------------------------------------------
1 | import copy
2 | import numpy as np
3 | from mmdet.core import bbox2result
4 | from mmdet.models import TwoStageDetector
5 |
6 | from teter.core import imshow_tracks, restore_result, track2result
7 | from ..builder import MODELS, build_tracker
8 |
9 |
10 | @MODELS.register_module()
11 | class TETer(TwoStageDetector):
12 | def __init__(
13 | self,
14 | tracker=None,
15 | freeze_detector=False,
16 | freeze_cem=False,
17 | freeze_qd=False,
18 | method="teter",
19 | *args,
20 | **kwargs
21 | ):
22 | self.prepare_cfg(kwargs)
23 | super().__init__(*args, **kwargs)
24 | self.tracker_cfg = tracker
25 | self.method = method
26 | print(self.method)
27 | self.freeze_detector = freeze_detector
28 | self.freeze_cem = freeze_cem
29 | self.freeze_qd = freeze_qd
30 | if self.freeze_detector:
31 | self._freeze_detector()
32 |
33 | def _freeze_detector(self):
34 |
35 | self.detector = [
36 | self.backbone,
37 | self.neck,
38 | self.rpn_head,
39 | self.roi_head.bbox_head,
40 | ]
41 | if self.freeze_cem:
42 | self.detector.append(self.roi_head.cem_head)
43 |
44 | if self.freeze_qd:
45 | self.detector.append(self.roi_head.track_head)
46 |
47 | for model in self.detector:
48 | model.eval()
49 | for param in model.parameters():
50 | param.requires_grad = False
51 |
52 | def prepare_cfg(self, kwargs):
53 | if kwargs.get("train_cfg", False):
54 | if kwargs["train_cfg"].get("embed", None):
55 | kwargs["roi_head"]["track_train_cfg"] = kwargs["train_cfg"].get(
56 | "embed", None
57 | )
58 | if kwargs["train_cfg"].get("cem", None):
59 | kwargs["roi_head"]["cem_train_cfg"] = kwargs["train_cfg"].get(
60 | "cem", None
61 | )
62 |
63 | def init_tracker(self):
64 | self.tracker = build_tracker(self.tracker_cfg)
65 |
66 | def forward_train(
67 | self,
68 | img,
69 | img_metas,
70 | gt_bboxes,
71 | gt_labels,
72 | gt_match_indices,
73 | ref_img,
74 | ref_img_metas,
75 | ref_gt_bboxes,
76 | ref_gt_labels,
77 | ref_gt_match_indices,
78 | gt_bboxes_ignore=None,
79 | gt_masks=None,
80 | ref_gt_bboxes_ignore=None,
81 | ref_gt_masks=None,
82 | **kwargs
83 | ):
84 | x = self.extract_feat(img)
85 |
86 | losses = dict()
87 |
88 | # RPN forward and loss
89 | proposal_cfg = self.train_cfg.get("rpn_proposal", self.test_cfg.rpn)
90 | rpn_losses, proposal_list = self.rpn_head.forward_train(
91 | x,
92 | img_metas,
93 | gt_bboxes,
94 | gt_labels=None,
95 | gt_bboxes_ignore=gt_bboxes_ignore,
96 | proposal_cfg=proposal_cfg,
97 | )
98 | losses.update(rpn_losses)
99 |
100 | ref_x = self.extract_feat(ref_img)
101 | ref_proposals = self.rpn_head.simple_test_rpn(ref_x, ref_img_metas)
102 |
103 | roi_losses = self.roi_head.forward_train(
104 | x,
105 | img_metas,
106 | proposal_list,
107 | gt_bboxes,
108 | gt_labels,
109 | gt_match_indices,
110 | ref_x,
111 | ref_img_metas,
112 | ref_proposals,
113 | ref_gt_bboxes,
114 | ref_gt_labels,
115 | gt_bboxes_ignore,
116 | gt_masks,
117 | ref_gt_bboxes_ignore,
118 | **kwargs
119 | )
120 | losses.update(roi_losses)
121 |
122 | return losses
123 |
124 | def simple_test(self, img, img_metas, rescale=False):
125 |
126 | assert self.roi_head.with_track, "Track head must be implemented."
127 | frame_id = img_metas[0].get("frame_id", -1)
128 | if frame_id == 0:
129 | self.init_tracker()
130 |
131 | x = self.extract_feat(img)
132 | proposal_list = self.rpn_head.simple_test_rpn(x, img_metas)
133 |
134 | outputs = self.roi_head.simple_test(x, img_metas, proposal_list, rescale)
135 | if len(outputs) == 4:
136 | det_bboxes, det_labels, cem_feats, track_feats = outputs
137 | elif len(outputs) == 3:
138 | det_bboxes, det_labels, track_feats = outputs
139 | cem_feats = copy.deepcopy(track_feats)
140 |
141 | if track_feats is not None:
142 |
143 | bboxes, labels, ids = self.tracker.match(
144 | bboxes=det_bboxes,
145 | labels=det_labels,
146 | embeds=track_feats,
147 | cls_embeds=cem_feats,
148 | frame_id=frame_id,
149 | method=self.method,
150 | )
151 |
152 | bbox_result = bbox2result(
153 | det_bboxes, det_labels, self.roi_head.bbox_head.num_classes
154 | )
155 |
156 | if track_feats is not None:
157 | track_result = track2result(
158 | bboxes, labels, ids, self.roi_head.bbox_head.num_classes
159 | )
160 | else:
161 | track_result = [
162 | np.zeros((0, 6), dtype=np.float32)
163 | for i in range(self.roi_head.bbox_head.num_classes)
164 | ]
165 | return dict(bbox_results=bbox_result, track_results=track_result)
166 |
167 | def show_result(
168 | self,
169 | img,
170 | result,
171 | thickness=1,
172 | font_scale=0.5,
173 | show=False,
174 | out_file=None,
175 | wait_time=0,
176 | backend="cv2",
177 | **kwargs
178 | ):
179 | """Visualize tracking results.
180 |
181 | Args:
182 | img (str | ndarray): Filename of loaded image.
183 | result (dict): Tracking result.
184 | The value of key 'track_results' is ndarray with shape (n, 6)
185 | in [id, tl_x, tl_y, br_x, br_y, score] format.
186 | The value of key 'bbox_results' is ndarray with shape (n, 5)
187 | in [tl_x, tl_y, br_x, br_y, score] format.
188 | thickness (int, optional): Thickness of lines. Defaults to 1.
189 | font_scale (float, optional): Font scales of texts. Defaults
190 | to 0.5.
191 | show (bool, optional): Whether show the visualizations on the
192 | fly. Defaults to False.
193 | out_file (str | None, optional): Output filename. Defaults to None.
194 | backend (str, optional): Backend to draw the bounding boxes,
195 | options are `cv2` and `plt`. Defaults to 'cv2'.
196 |
197 | Returns:
198 | ndarray: Visualized image.
199 | """
200 | assert isinstance(result, dict)
201 | track_result = result.get("track_results", None)
202 | bboxes, labels, ids = restore_result(track_result, return_ids=True)
203 | img = imshow_tracks(
204 | img,
205 | bboxes,
206 | labels,
207 | ids,
208 | classes=self.CLASSES,
209 | thickness=thickness,
210 | font_scale=font_scale,
211 | show=show,
212 | out_file=out_file,
213 | wait_time=wait_time,
214 | backend=backend,
215 | )
216 | return img
217 |
--------------------------------------------------------------------------------
/teter/models/roi_heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .teter_roi_head import TETerRoIHead
2 | from .track_heads import QuasiDenseEmbedHead
3 |
4 | __all__ = ["QuasiDenseEmbedHead", "TETerRoIHead"]
5 |
--------------------------------------------------------------------------------
/teter/models/roi_heads/track_heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .cem_head import ClsExemplarHead
2 | from .quasi_dense_embed_head import QuasiDenseEmbedHead
3 |
4 | __all__ = ["QuasiDenseEmbedHead", "ClsExemplarHead"]
5 |
--------------------------------------------------------------------------------
/teter/models/roi_heads/track_heads/cem_head.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 | import torch.nn as nn
4 | from mmcv.cnn import ConvModule
5 | from mmdet.models import HEADS, build_loss
6 |
7 | from teter.core import cal_similarity
8 |
9 |
10 | @HEADS.register_module(force=True)
11 | class ClsExemplarHead(nn.Module):
12 | def __init__(
13 | self,
14 | num_convs=4,
15 | num_fcs=1,
16 | roi_feat_size=7,
17 | in_channels=256,
18 | conv_out_channels=256,
19 | fc_out_channels=1024,
20 | embed_channels=256,
21 | conv_cfg=None,
22 | norm_cfg=None,
23 | softmax_temp=-1,
24 | loss_track=dict(type="MultiPosCrossEntropyLoss", loss_weight=1),
25 | ):
26 | super(ClsExemplarHead, self).__init__()
27 |
28 | self.num_convs = num_convs
29 | self.num_fcs = num_fcs
30 | self.roi_feat_size = roi_feat_size
31 | self.in_channels = in_channels
32 | self.conv_out_channels = conv_out_channels
33 | self.fc_out_channels = fc_out_channels
34 | self.embed_channels = embed_channels
35 | self.conv_cfg = conv_cfg
36 | self.norm_cfg = norm_cfg
37 | self.relu = nn.ReLU(inplace=True)
38 | self.convs, self.fcs, last_layer_dim = self._add_conv_fc_branch(
39 | self.num_convs, self.num_fcs, self.in_channels
40 | )
41 | self.fc_embed = nn.Linear(last_layer_dim, embed_channels)
42 |
43 | self.softmax_temp = softmax_temp
44 | self.loss_track = build_loss(loss_track)
45 |
46 | def _add_conv_fc_branch(self, num_convs, num_fcs, in_channels):
47 | last_layer_dim = in_channels
48 | # add branch specific conv layers
49 | convs = nn.ModuleList()
50 | if num_convs > 0:
51 | for i in range(num_convs):
52 | conv_in_channels = last_layer_dim if i == 0 else self.conv_out_channels
53 | convs.append(
54 | ConvModule(
55 | conv_in_channels,
56 | self.conv_out_channels,
57 | 3,
58 | padding=1,
59 | conv_cfg=self.conv_cfg,
60 | norm_cfg=self.norm_cfg,
61 | )
62 | )
63 | last_layer_dim = self.conv_out_channels
64 | # add branch specific fc layers
65 | fcs = nn.ModuleList()
66 | if num_fcs > 0:
67 | last_layer_dim *= self.roi_feat_size * self.roi_feat_size
68 | for i in range(num_fcs):
69 | fc_in_channels = last_layer_dim if i == 0 else self.fc_out_channels
70 | fcs.append(nn.Linear(fc_in_channels, self.fc_out_channels))
71 | last_layer_dim = self.fc_out_channels
72 | return convs, fcs, last_layer_dim
73 |
74 | def init_weights(self):
75 |
76 | for m in self.fcs:
77 | if isinstance(m, nn.Linear):
78 | nn.init.xavier_uniform_(m.weight)
79 | nn.init.constant_(m.bias, 0)
80 | nn.init.normal_(self.fc_embed.weight, 0, 0.01)
81 | nn.init.constant_(self.fc_embed.bias, 0)
82 |
83 | def forward(self, x):
84 |
85 | if self.num_convs > 0:
86 | for i, conv in enumerate(self.convs):
87 | x = conv(x)
88 | x = x.view(x.size(0), -1)
89 | if self.num_fcs > 0:
90 | for i, fc in enumerate(self.fcs):
91 | x = self.relu(fc(x))
92 | x = self.fc_embed(x)
93 |
94 | return x
95 |
96 | def sup_contra_loss(self, features, labels):
97 |
98 | losses = dict()
99 | loss_track = self.loss_track(features, labels)
100 | losses["loss_cem"] = loss_track
101 |
102 | return losses
103 |
--------------------------------------------------------------------------------
/teter/models/roi_heads/track_heads/quasi_dense_embed_head.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 | import torch.nn as nn
4 | from mmcv.cnn import ConvModule
5 | from mmdet.models import HEADS, build_loss
6 |
7 | from teter.core import cal_similarity
8 |
9 |
10 | @HEADS.register_module(force=True)
11 | class QuasiDenseEmbedHead(nn.Module):
12 | def __init__(
13 | self,
14 | num_convs=4,
15 | num_fcs=1,
16 | roi_feat_size=7,
17 | in_channels=256,
18 | conv_out_channels=256,
19 | fc_out_channels=1024,
20 | embed_channels=256,
21 | conv_cfg=None,
22 | norm_cfg=None,
23 | softmax_temp=-1,
24 | loss_track=dict(type="MultiPosCrossEntropyLoss", loss_weight=0.25),
25 | loss_track_aux=dict(
26 | type="L2Loss", sample_ratio=3, margin=0.3, loss_weight=1.0, hard_mining=True
27 | ),
28 | ):
29 | super(QuasiDenseEmbedHead, self).__init__()
30 | self.num_convs = num_convs
31 | self.num_fcs = num_fcs
32 | self.roi_feat_size = roi_feat_size
33 | self.in_channels = in_channels
34 | self.conv_out_channels = conv_out_channels
35 | self.fc_out_channels = fc_out_channels
36 | self.embed_channels = embed_channels
37 | self.conv_cfg = conv_cfg
38 | self.norm_cfg = norm_cfg
39 | self.relu = nn.ReLU(inplace=True)
40 | self.convs, self.fcs, last_layer_dim = self._add_conv_fc_branch(
41 | self.num_convs, self.num_fcs, self.in_channels
42 | )
43 | self.fc_embed = nn.Linear(last_layer_dim, embed_channels)
44 |
45 | self.softmax_temp = softmax_temp
46 | self.loss_track = build_loss(loss_track)
47 | if loss_track_aux is not None:
48 | self.loss_track_aux = build_loss(loss_track_aux)
49 | else:
50 | self.loss_track_aux = None
51 |
52 | def _add_conv_fc_branch(self, num_convs, num_fcs, in_channels):
53 | last_layer_dim = in_channels
54 | # add branch specific conv layers
55 | convs = nn.ModuleList()
56 | if num_convs > 0:
57 | for i in range(num_convs):
58 | conv_in_channels = last_layer_dim if i == 0 else self.conv_out_channels
59 | convs.append(
60 | ConvModule(
61 | conv_in_channels,
62 | self.conv_out_channels,
63 | 3,
64 | padding=1,
65 | conv_cfg=self.conv_cfg,
66 | norm_cfg=self.norm_cfg,
67 | )
68 | )
69 | last_layer_dim = self.conv_out_channels
70 | # add branch specific fc layers
71 | fcs = nn.ModuleList()
72 | if num_fcs > 0:
73 | last_layer_dim *= self.roi_feat_size * self.roi_feat_size
74 | for i in range(num_fcs):
75 | fc_in_channels = last_layer_dim if i == 0 else self.fc_out_channels
76 | fcs.append(nn.Linear(fc_in_channels, self.fc_out_channels))
77 | last_layer_dim = self.fc_out_channels
78 | return convs, fcs, last_layer_dim
79 |
80 | def init_weights(self):
81 | for m in self.fcs:
82 | if isinstance(m, nn.Linear):
83 | nn.init.xavier_uniform_(m.weight)
84 | nn.init.constant_(m.bias, 0)
85 | nn.init.normal_(self.fc_embed.weight, 0, 0.01)
86 | nn.init.constant_(self.fc_embed.bias, 0)
87 |
88 | def forward(self, x):
89 | if self.num_convs > 0:
90 | for i, conv in enumerate(self.convs):
91 | x = conv(x)
92 | x = x.view(x.size(0), -1)
93 | if self.num_fcs > 0:
94 | for i, fc in enumerate(self.fcs):
95 | x = self.relu(fc(x))
96 | x = self.fc_embed(x)
97 | return x
98 |
99 | def get_track_targets(
100 | self, gt_match_indices, key_sampling_results, ref_sampling_results
101 | ):
102 | track_targets = []
103 | track_weights = []
104 | for _gt_match_indices, key_res, ref_res in zip(
105 | gt_match_indices, key_sampling_results, ref_sampling_results
106 | ):
107 | targets = _gt_match_indices.new_zeros(
108 | (key_res.pos_bboxes.size(0), ref_res.bboxes.size(0)), dtype=torch.int
109 | )
110 | _match_indices = _gt_match_indices[key_res.pos_assigned_gt_inds]
111 | pos2pos = (
112 | _match_indices.view(-1, 1) == ref_res.pos_assigned_gt_inds.view(1, -1)
113 | ).int()
114 | targets[:, : pos2pos.size(1)] = pos2pos
115 | weights = (targets.sum(dim=1) > 0).float()
116 | track_targets.append(targets)
117 | track_weights.append(weights)
118 | return track_targets, track_weights
119 |
120 | def match(self, key_embeds, ref_embeds, key_sampling_results, ref_sampling_results):
121 | num_key_rois = [res.pos_bboxes.size(0) for res in key_sampling_results]
122 | key_embeds = torch.split(key_embeds, num_key_rois)
123 | num_ref_rois = [res.bboxes.size(0) for res in ref_sampling_results]
124 | ref_embeds = torch.split(ref_embeds, num_ref_rois)
125 |
126 | dists, cos_dists = [], []
127 | for key_embed, ref_embed in zip(key_embeds, ref_embeds):
128 | dist = cal_similarity(
129 | key_embed,
130 | ref_embed,
131 | method="dot_product",
132 | temperature=self.softmax_temp,
133 | )
134 | dists.append(dist)
135 | if self.loss_track_aux is not None:
136 | cos_dist = cal_similarity(key_embed, ref_embed, method="cosine")
137 | cos_dists.append(cos_dist)
138 | else:
139 | cos_dists.append(None)
140 | return dists, cos_dists
141 |
142 | def loss(self, dists, cos_dists, targets, weights):
143 | losses = dict()
144 |
145 | loss_track = 0.0
146 | loss_track_aux = 0.0
147 | for _dists, _cos_dists, _targets, _weights in zip(
148 | dists, cos_dists, targets, weights
149 | ):
150 | loss_track += self.loss_track(
151 | _dists, _targets, _weights, avg_factor=_weights.sum()
152 | )
153 | if self.loss_track_aux is not None:
154 | loss_track_aux += self.loss_track_aux(_cos_dists, _targets)
155 | losses["loss_track"] = loss_track / len(dists)
156 |
157 | if self.loss_track_aux is not None:
158 | losses["loss_track_aux"] = loss_track_aux / len(dists)
159 |
160 | return losses
161 |
162 | @staticmethod
163 | def random_choice(gallery, num):
164 | """Random select some elements from the gallery.
165 |
166 | It seems that Pytorch's implementation is slower than numpy so we use
167 | numpy to randperm the indices.
168 | """
169 | assert len(gallery) >= num
170 | if isinstance(gallery, list):
171 | gallery = np.array(gallery)
172 | cands = np.arange(len(gallery))
173 | np.random.shuffle(cands)
174 | rand_inds = cands[:num]
175 | if not isinstance(gallery, np.ndarray):
176 | rand_inds = torch.from_numpy(rand_inds).long().to(gallery.device)
177 | return gallery[rand_inds]
178 |
--------------------------------------------------------------------------------
/teter/models/trackers/__init__.py:
--------------------------------------------------------------------------------
1 | from .teter_bdd import TETerBDD
2 | from .teter_tao import TETerTAO
3 |
4 | __all__ = ["TETerTAO", "TETerBDD"]
5 |
--------------------------------------------------------------------------------
/teter/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .collect_env import collect_env
2 | from .logger import get_root_logger
3 |
4 | __all__ = ["collect_env", "get_root_logger"]
5 |
--------------------------------------------------------------------------------
/teter/utils/collect_env.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import mmcv
3 | import os.path as osp
4 | import subprocess
5 | import sys
6 | import torch
7 | import torchvision
8 | from collections import defaultdict
9 |
10 | import teter
11 |
12 |
13 | def collect_env():
14 | env_info = {}
15 | env_info["sys.platform"] = sys.platform
16 | env_info["Python"] = sys.version.replace("\n", "")
17 |
18 | cuda_available = torch.cuda.is_available()
19 | env_info["CUDA available"] = cuda_available
20 |
21 | if cuda_available:
22 | from torch.utils.cpp_extension import CUDA_HOME
23 |
24 | env_info["CUDA_HOME"] = CUDA_HOME
25 |
26 | if CUDA_HOME is not None and osp.isdir(CUDA_HOME):
27 | try:
28 | nvcc = osp.join(CUDA_HOME, "bin/nvcc")
29 | nvcc = subprocess.check_output(f'"{nvcc}" -V | tail -n1', shell=True)
30 | nvcc = nvcc.decode("utf-8").strip()
31 | except subprocess.SubprocessError:
32 | nvcc = "Not Available"
33 | env_info["NVCC"] = nvcc
34 |
35 | devices = defaultdict(list)
36 | for k in range(torch.cuda.device_count()):
37 | devices[torch.cuda.get_device_name(k)].append(str(k))
38 | for name, devids in devices.items():
39 | env_info["GPU " + ",".join(devids)] = name
40 |
41 | gcc = subprocess.check_output("gcc --version | head -n1", shell=True)
42 | gcc = gcc.decode("utf-8").strip()
43 | env_info["GCC"] = gcc
44 |
45 | env_info["PyTorch"] = torch.__version__
46 | env_info["PyTorch compiling details"] = torch.__config__.show()
47 |
48 | env_info["TorchVision"] = torchvision.__version__
49 |
50 | env_info["OpenCV"] = cv2.__version__
51 |
52 | env_info["MMCV"] = mmcv.__version__
53 | env_info["teter"] = teter.__version__
54 |
55 | return env_info
56 |
57 |
58 | if __name__ == "__main__":
59 | for name, val in collect_env().items():
60 | print(f"{name}: {val}")
61 |
--------------------------------------------------------------------------------
/teter/utils/logger.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from mmcv.utils import get_logger
3 |
4 |
5 | def get_root_logger(log_file=None, log_level=logging.INFO):
6 | return get_logger("teter", log_file, log_level)
7 |
--------------------------------------------------------------------------------
/teter/version.py:
--------------------------------------------------------------------------------
1 | # GENERATED VERSION FILE
2 | __version__ = "dev-0.1.0"
3 | short_version = "0.1.0"
4 | version_info = (0, 1, 0)
5 |
--------------------------------------------------------------------------------
/tools/convert_datasets/tao2coco.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import os.path as osp
3 | from collections import defaultdict
4 |
5 | import mmcv
6 | from tao.toolkit.tao import Tao
7 | from tqdm import tqdm
8 |
9 |
10 | def parse_args():
11 | parser = argparse.ArgumentParser(
12 | description='Make annotation files for TAO')
13 | parser.add_argument('-t', '--tao', help='path of TAO json file')
14 | parser.add_argument(
15 | '--filter-classes',
16 | action='store_true',
17 | help='whether filter 1230 classes to 482.')
18 | return parser.parse_args()
19 |
20 |
21 | def get_classes(tao_path, filter_classes=True):
22 | train = mmcv.load(osp.join(tao_path, 'train.json'))
23 |
24 | train_classes = list(set([_['category_id'] for _ in train['annotations']]))
25 | print(f'TAO train set contains {len(train_classes)} categories.')
26 |
27 | val = mmcv.load(osp.join(tao_path, 'validation.json'))
28 | val_classes = list(set([_['category_id'] for _ in val['annotations']]))
29 | print(f'TAO val set contains {len(val_classes)} categories.')
30 |
31 | test = mmcv.load(osp.join(tao_path, 'test_categories.json'))
32 | test_classes = list(set([_['id'] for _ in test['categories']]))
33 | print(f'TAO test set contains {len(test_classes)} categories.')
34 |
35 | tao_classes = set(train_classes + val_classes + test_classes)
36 | print(f'TAO totally contains {len(tao_classes)} categories.')
37 |
38 | tao_classes = [_ for _ in train['categories'] if _['id'] in tao_classes]
39 |
40 | with open(osp.join(tao_path, 'tao_classes.txt'), 'wt') as f:
41 | for c in tao_classes:
42 | name = c['name']
43 | f.writelines(f'{name}\n')
44 |
45 | if filter_classes:
46 | return tao_classes
47 | else:
48 | return train['categories']
49 |
50 |
51 | def convert_tao(file, classes):
52 | tao = Tao(file)
53 | raw = mmcv.load(file)
54 |
55 | out = defaultdict(list)
56 | out['tracks'] = raw['tracks'].copy()
57 | out['info'] = raw['info'].copy()
58 | out['licenses'] = raw['licenses'].copy()
59 | out['categories'] = classes
60 |
61 | for video in tqdm(raw['videos']):
62 | img_infos = tao.vid_img_map[video['id']]
63 | img_infos = sorted(img_infos, key=lambda x: x['frame_index'])
64 | frame_range = img_infos[1]['frame_index'] - img_infos[0]['frame_index']
65 | video['frame_range'] = frame_range
66 | out['videos'].append(video)
67 | for i, img_info in enumerate(img_infos):
68 | img_info['frame_id'] = i
69 | img_info['neg_category_ids'] = video['neg_category_ids']
70 | img_info['not_exhaustive_category_ids'] = video[
71 | 'not_exhaustive_category_ids']
72 | out['images'].append(img_info)
73 | ann_infos = tao.img_ann_map[img_info['id']]
74 | for ann_info in ann_infos:
75 | ann_info['instance_id'] = ann_info['track_id']
76 | out['annotations'].append(ann_info)
77 |
78 | assert len(out['videos']) == len(raw['videos'])
79 | assert len(out['images']) == len(raw['images'])
80 | assert len(out['annotations']) == len(raw['annotations'])
81 | return out
82 |
83 |
84 | def main():
85 | args = parse_args()
86 |
87 | classes = get_classes(args.tao, args.filter_classes)
88 | print(f'convert with {len(classes)} classes')
89 |
90 | for file in [
91 | 'train.json', 'validation.json', 'test_without_annotations.json'
92 | ]:
93 | print(f'convert {file}')
94 | out = convert_tao(osp.join(args.tao, file), classes)
95 | c = '_482' if args.filter_classes else ''
96 | prefix = file.split('.')[0].split('_')[0]
97 | out_file = f'{prefix}{c}_ours.json'
98 | mmcv.dump(out, osp.join(args.tao, out_file))
99 |
100 |
101 | if __name__ == '__main__':
102 | main()
103 |
--------------------------------------------------------------------------------
/tools/dist_test.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | CONFIG=$1
4 | CHECKPOINT=$2
5 | GPUS=$3
6 | PORT=$4
7 | if [ -z "$4" ]; then
8 | PORT=33333
9 | fi
10 |
11 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
12 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
13 | $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:5}
14 |
--------------------------------------------------------------------------------
/tools/dist_train.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | CONFIG=$1
4 | GPUS=$2
5 | PORT=$3
6 |
7 | if [ -z "$3" ]; then
8 | PORT=29533
9 | fi
10 |
11 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
12 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
13 | $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:4}
14 |
--------------------------------------------------------------------------------
/tools/slurm_test.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | set -x
4 |
5 | PARTITION=$1
6 | JOB_NAME=$2
7 | CONFIG=$3
8 | CHECKPOINT=$4
9 | GPUS=${GPUS:-8}
10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8}
11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5}
12 | PY_ARGS=${@:5}
13 | SRUN_ARGS=${SRUN_ARGS:-""}
14 |
15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
16 | srun -p ${PARTITION} \
17 | --job-name=${JOB_NAME} \
18 | --gres=gpu:${GPUS_PER_NODE} \
19 | --ntasks=${GPUS} \
20 | --ntasks-per-node=${GPUS_PER_NODE} \
21 | --cpus-per-task=${CPUS_PER_TASK} \
22 | --kill-on-bad-exit=1 \
23 | ${SRUN_ARGS} \
24 | python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS}
25 |
--------------------------------------------------------------------------------
/tools/slurm_train.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | set -x
4 |
5 | PARTITION=$1
6 | JOB_NAME=$2
7 | CONFIG=$3
8 | WORK_DIR=$4
9 | GPUS=${GPUS:-8}
10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8}
11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5}
12 | SRUN_ARGS=${SRUN_ARGS:-""}
13 | PY_ARGS=${@:5}
14 |
15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
16 | srun -p ${PARTITION} \
17 | --job-name=${JOB_NAME} \
18 | --gres=gpu:${GPUS_PER_NODE} \
19 | --ntasks=${GPUS} \
20 | --ntasks-per-node=${GPUS_PER_NODE} \
21 | --cpus-per-task=${CPUS_PER_TASK} \
22 | --kill-on-bad-exit=1 \
23 | ${SRUN_ARGS} \
24 | python -u tools/train.py ${CONFIG} --work-dir=${WORK_DIR} --launcher="slurm" ${PY_ARGS}
25 |
--------------------------------------------------------------------------------
/tools/test.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import os
3 |
4 | import mmcv
5 | import torch
6 | from mmcv import Config, DictAction
7 | from mmcv.cnn import fuse_conv_bn
8 | from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
9 | from mmcv.runner import get_dist_info, init_dist, load_checkpoint
10 | # from mmdet.core import wrap_fp16_model
11 | from mmdet.datasets import build_dataset
12 |
13 |
14 | def parse_args():
15 | parser = argparse.ArgumentParser(description='teter test model')
16 | parser.add_argument('config', help='test config file path')
17 | parser.add_argument('checkpoint', help='checkpoint file')
18 | parser.add_argument('--out', help='output result file')
19 | parser.add_argument(
20 | '--fuse-conv-bn',
21 | action='store_true',
22 | help='Whether to fuse conv and bn, this will slightly increase'
23 | 'the inference speed')
24 | parser.add_argument(
25 | '--format-only',
26 | action='store_true',
27 | help='Format the output results without perform evaluation. It is'
28 | 'useful when you want to format the result to a specific format and '
29 | 'submit it to the test server')
30 | parser.add_argument('--eval', type=str, nargs='+', help='eval types')
31 | parser.add_argument('--show', action='store_true', help='show results')
32 | parser.add_argument(
33 | '--show-dir', help='directory where painted images will be saved')
34 | parser.add_argument(
35 | '--gpu-collect',
36 | action='store_true',
37 | help='whether to use gpu to collect results.')
38 | parser.add_argument(
39 | '--tmpdir',
40 | help='tmp directory used for collecting results from multiple '
41 | 'workers, available when gpu-collect is not specified')
42 | parser.add_argument(
43 | '--show_score_thr', default=0.3, type=float, help='output result file')
44 | parser.add_argument(
45 | '--cfg-options',
46 | nargs='+',
47 | action=DictAction,
48 | help='override some settings in the used config, the key-value pair '
49 | 'in xxx=yyy format will be merged into config file.')
50 | parser.add_argument(
51 | '--eval-options',
52 | nargs='+',
53 | action=DictAction,
54 | help='custom options for evaluation, the key-value pair in xxx=yyy '
55 | 'format will be kwargs for dataset.evaluate() function')
56 | parser.add_argument(
57 | '--launcher',
58 | choices=['none', 'pytorch', 'slurm', 'mpi'],
59 | default='none',
60 | help='job launcher')
61 | parser.add_argument('--local_rank', type=int, default=0)
62 | args = parser.parse_args()
63 | if 'LOCAL_RANK' not in os.environ:
64 | os.environ['LOCAL_RANK'] = str(args.local_rank)
65 | return args
66 |
67 |
68 | def main():
69 | args = parse_args()
70 |
71 | assert args.out or args.eval or args.format_only or args.show \
72 | or args.show_dir, \
73 | ('Please specify at least one operation (save/eval/format/show the '
74 | 'results / save the results) with the argument "--out", "--eval"'
75 | ', "--format-only", "--show" or "--show-dir"')
76 |
77 | if args.eval and args.format_only:
78 | raise ValueError('--eval and --format_only cannot be both specified')
79 |
80 | if args.out is not None and not args.out.endswith(('.pkl', '.pickle')):
81 | raise ValueError('The output file must be a pkl file.')
82 |
83 | cfg = Config.fromfile(args.config)
84 |
85 | if args.cfg_options is not None:
86 | cfg.merge_from_dict(args.cfg_options)
87 |
88 | if cfg.get('USE_MMDET', False):
89 | from mmdet.apis import multi_gpu_test, single_gpu_test
90 | from mmdet.models import build_detector as build_model
91 | from mmdet.datasets import build_dataloader
92 | else:
93 | from teter.apis import multi_gpu_test, single_gpu_test
94 | from teter.models import build_model
95 | from teter.datasets import build_dataloader
96 |
97 | # set cudnn_benchmark
98 | if cfg.get('cudnn_benchmark', False):
99 | torch.backends.cudnn.benchmark = True
100 | cfg.model.pretrained = None
101 | cfg.data.test.test_mode = True
102 |
103 | # init distributed env first, since logger depends on the dist info.
104 | if args.launcher == 'none':
105 | distributed = False
106 | else:
107 | distributed = True
108 | init_dist(args.launcher, **cfg.dist_params)
109 |
110 | # build the dataloader
111 | dataset = build_dataset(cfg.data.test)
112 | data_loader = build_dataloader(
113 | dataset,
114 | samples_per_gpu=1,
115 | workers_per_gpu=cfg.data.workers_per_gpu,
116 | dist=distributed,
117 | shuffle=False)
118 |
119 | # build the model and load checkpoint
120 | model = build_model(cfg.model, train_cfg=None, test_cfg=None)
121 | # fp16_cfg = cfg.get('fp16', None)
122 | # if fp16_cfg is not None:
123 | # wrap_fp16_model(model)
124 | checkpoint = load_checkpoint(model, args.checkpoint, map_location='cpu')
125 |
126 | if args.fuse_conv_bn:
127 | model = fuse_conv_bn(model)
128 |
129 | if 'CLASSES' in checkpoint['meta']:
130 | model.CLASSES = checkpoint['meta']['CLASSES']
131 | else:
132 | model.CLASSES = dataset.CLASSES
133 |
134 | if not distributed:
135 | model = MMDataParallel(model, device_ids=[0])
136 | outputs = single_gpu_test(model, data_loader, args.show, args.show_dir,
137 | args.show_score_thr)
138 | else:
139 | model = MMDistributedDataParallel(
140 | model.cuda(),
141 | device_ids=[torch.cuda.current_device()],
142 | broadcast_buffers=False)
143 | outputs = multi_gpu_test(model, data_loader, args.tmpdir,
144 | args.gpu_collect)
145 |
146 | rank, _ = get_dist_info()
147 | if rank == 0:
148 | if args.out:
149 | print(f'\nwriting results to {args.out}')
150 | mmcv.dump(outputs, args.out)
151 | kwargs = {} if args.eval_options is None else args.eval_options
152 | if args.format_only:
153 | dataset.format_results(outputs, **kwargs)
154 | if args.eval:
155 | eval_kwargs = cfg.get('evaluation', {}).copy()
156 | # hard-code way to remove EvalHook args
157 | for key in ['interval', 'tmpdir', 'start', 'gpu_collect']:
158 | eval_kwargs.pop(key, None)
159 | eval_kwargs.update(dict(metric=args.eval, **kwargs))
160 | print(dataset.evaluate(outputs, **eval_kwargs))
161 |
162 |
163 | if __name__ == '__main__':
164 | main()
165 |
--------------------------------------------------------------------------------
/tools/to_bdd100k.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import os
3 |
4 | import mmcv
5 | from mmcv import Config, DictAction
6 | from mmdet.datasets import build_dataset
7 | from teter.core.to_bdd100k import preds2bdd100k
8 |
9 |
10 | def parse_args():
11 | parser = argparse.ArgumentParser(description='teter test model')
12 | parser.add_argument('config', help='test config file path')
13 | parser.add_argument('--res', help='output result file')
14 | parser.add_argument(
15 | '--bdd-dir',
16 | type=str,
17 | help='path to the folder that will contain files in bdd100k format')
18 | parser.add_argument(
19 | '--coco-file',
20 | type=str,
21 | help='path to that json file that is in COCO submission format')
22 | parser.add_argument(
23 | '--task',
24 | type=str,
25 | nargs='+',
26 | help='task types',
27 | choices=['det', 'ins_seg', 'box_track', 'seg_track'])
28 | parser.add_argument(
29 | '--nproc',
30 | type=int,
31 | help='number of process for mask merging')
32 | parser.add_argument(
33 | '--cfg-options',
34 | nargs='+',
35 | action=DictAction,
36 | help='override some settings in the used config, the key-value pair '
37 | 'in xxx=yyy format will be merged into config file.')
38 | args = parser.parse_args()
39 | return args
40 |
41 |
42 | def main():
43 | args = parse_args()
44 |
45 | if not os.path.isfile(args.res):
46 | raise ValueError('The result file does not exist.')
47 |
48 | cfg = Config.fromfile(args.config)
49 |
50 | if args.cfg_options is not None:
51 | cfg.merge_from_dict(args.cfg_options)
52 |
53 | if cfg.get('USE_MMDET', False):
54 | from mmdet.datasets import build_dataloader
55 | else:
56 | from teter.datasets import build_dataloader
57 |
58 | # build the dataloader
59 | cfg.data.test.test_mode = True
60 | dataset = build_dataset(cfg.data.test)
61 |
62 | print(f'\nLoading results from {args.res}')
63 | results = mmcv.load(args.res)
64 |
65 | if args.coco_file:
66 | dataset.format_results(results, jsonfile_prefix=args.coco_file)
67 | if args.bdd_dir:
68 | preds2bdd100k(
69 | dataset, results, args.task, out_base=args.bdd_dir, nproc=args.nproc)
70 |
71 | if __name__ == '__main__':
72 | main()
73 |
--------------------------------------------------------------------------------
/tools/train.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import copy
3 | import os
4 | import os.path as osp
5 | import time
6 |
7 | import mmcv
8 | import torch
9 | from mmcv import Config, DictAction
10 | from mmcv.runner import init_dist
11 | from mmdet.apis import set_random_seed
12 | from mmdet.datasets import build_dataset
13 |
14 | from teter import __version__
15 | from teter.utils import collect_env, get_root_logger
16 |
17 |
18 | def parse_args():
19 | parser = argparse.ArgumentParser(description='Train a model')
20 | parser.add_argument('config', help='train config file path')
21 | parser.add_argument('--work-dir', help='the dir to save logs and models')
22 | parser.add_argument(
23 | '--resume-from', help='the checkpoint file to resume from')
24 | parser.add_argument(
25 | '--no-validate',
26 | action='store_true',
27 | help='whether not to evaluate the checkpoint during training')
28 | group_gpus = parser.add_mutually_exclusive_group()
29 | group_gpus.add_argument(
30 | '--gpus',
31 | type=int,
32 | help='number of gpus to use '
33 | '(only applicable to non-distributed training)')
34 | group_gpus.add_argument(
35 | '--gpu-ids',
36 | type=int,
37 | nargs='+',
38 | help='ids of gpus to use '
39 | '(only applicable to non-distributed training)')
40 | parser.add_argument('--seed', type=int, default=None, help='random seed')
41 | parser.add_argument(
42 | '--deterministic',
43 | action='store_true',
44 | help='whether to set deterministic options for CUDNN backend.')
45 | parser.add_argument(
46 | '--cfg-options',
47 | nargs='+',
48 | action=DictAction,
49 | help='override some settings in the used config, the key-value pair '
50 | 'in xxx=yyy format will be merged into config file.')
51 | parser.add_argument(
52 | '--launcher',
53 | choices=['none', 'pytorch', 'slurm', 'mpi'],
54 | default='none',
55 | help='job launcher')
56 | parser.add_argument('--local_rank', type=int, default=0)
57 | args = parser.parse_args()
58 | if 'LOCAL_RANK' not in os.environ:
59 | os.environ['LOCAL_RANK'] = str(args.local_rank)
60 |
61 | return args
62 |
63 |
64 | def main():
65 | args = parse_args()
66 |
67 | cfg = Config.fromfile(args.config)
68 |
69 | if args.cfg_options is not None:
70 | cfg.merge_from_dict(args.cfg_options)
71 |
72 | if cfg.get('USE_MMDET', False):
73 | from mmdet.apis import train_detector as train_model
74 | from mmdet.models import build_detector as build_model
75 | else:
76 | from teter.apis import train_model
77 | from teter.models import build_model
78 |
79 | # set cudnn_benchmark
80 | if cfg.get('cudnn_benchmark', False):
81 | torch.backends.cudnn.benchmark = True
82 |
83 | # work_dir is determined in this priority: CLI > segment in file > filename
84 | if args.work_dir is not None:
85 | # update configs according to CLI args if args.work_dir is not None
86 | cfg.work_dir = args.work_dir
87 | elif cfg.get('work_dir', None) is None:
88 | # use config filename as default work_dir if cfg.work_dir is None
89 | cfg.work_dir = osp.join('./work_dirs',
90 | osp.splitext(osp.basename(args.config))[0])
91 | if args.resume_from is not None:
92 | cfg.resume_from = args.resume_from
93 | if args.gpu_ids is not None:
94 | cfg.gpu_ids = args.gpu_ids
95 | else:
96 | cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus)
97 |
98 | # init distributed env first, since logger depends on the dist info.
99 | if args.launcher == 'none':
100 | distributed = False
101 | else:
102 | distributed = True
103 | init_dist(args.launcher, **cfg.dist_params)
104 |
105 | # create work_dir
106 | mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
107 | # dump config
108 | cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config)))
109 | # init the logger before other steps
110 | timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())
111 | log_file = osp.join(cfg.work_dir, f'{timestamp}.log')
112 | logger = get_root_logger(log_file=log_file, log_level=cfg.log_level)
113 |
114 | # init the meta dict to record some important information such as
115 | # environment info and seed, which will be logged
116 | meta = dict()
117 | # log env info
118 | env_info_dict = collect_env()
119 | env_info = '\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()])
120 | dash_line = '-' * 60 + '\n'
121 | logger.info('Environment info:\n' + dash_line + env_info + '\n' +
122 | dash_line)
123 | meta['env_info'] = env_info
124 |
125 | # log some basic info
126 | logger.info(f'Distributed training: {distributed}')
127 | logger.info(f'Config:\n{cfg.pretty_text}')
128 |
129 | # set random seeds
130 | if args.seed is not None:
131 | logger.info(f'Set random seed to {args.seed}, '
132 | f'deterministic: {args.deterministic}')
133 | set_random_seed(args.seed, deterministic=args.deterministic)
134 | cfg.seed = args.seed
135 | meta['seed'] = args.seed
136 |
137 | model = build_model(
138 | cfg.model,
139 | train_cfg=cfg.get('train_cfg'),
140 | test_cfg=cfg.get('test_cfg'))
141 | model.init_weights()
142 |
143 | datasets = [build_dataset(cfg.data.train)]
144 | if len(cfg.workflow) == 2:
145 | val_dataset = copy.deepcopy(cfg.data.val)
146 | val_dataset.pipeline = cfg.data.train.pipeline
147 | datasets.append(build_dataset(val_dataset))
148 | if cfg.checkpoint_config is not None:
149 | # save teter version, config file content and class names in
150 | # checkpoints as meta data
151 | cfg.checkpoint_config.meta = dict(
152 | qdtrack_version=__version__,
153 | config=cfg.pretty_text,
154 | CLASSES=datasets[0].CLASSES)
155 | # add an attribute for visualization convenience
156 | model.CLASSES = datasets[0].CLASSES
157 | train_model(
158 | model,
159 | datasets,
160 | cfg,
161 | distributed=distributed,
162 | validate=(not args.no_validate),
163 | timestamp=timestamp,
164 | meta=meta)
165 |
166 |
167 | if __name__ == '__main__':
168 | main()
169 |
--------------------------------------------------------------------------------