├── .gitattributes
├── README.assets
    ├── image-20230819130751450.png
    ├── image-20230819134931428.png
    ├── MOT20.gif
    └── dancetrack.gif
├── yolox
    ├── __init__.py
    ├── core
    │   └── __init__.py
    ├── layers
    │   ├── __init__.py
    │   ├── csrc
    │   │   ├── vision.cpp
    │   │   └── cocoeval
    │   │   │   └── cocoeval.h
    │   └── fast_coco_eval_api.py
    ├── exp
    │   ├── __init__.py
    │   ├── build.py
    │   └── base_exp.py
    ├── evaluators
    │   └── __init__.py
    ├── data
    │   ├── datasets
    │   │   ├── __init__.py
    │   │   ├── datasets_wrapper.py
    │   │   └── mot.py
    │   ├── __init__.py
    │   ├── data_prefetcher.py
    │   └── samplers.py
    ├── models
    │   ├── __init__.py
    │   ├── yolox.py
    │   ├── yolo_fpn.py
    │   ├── losses.py
    │   └── yolo_pafpn.py
    ├── utils
    │   ├── __init__.py
    │   ├── checkpoint.py
    │   ├── setup_env.py
    │   ├── ema.py
    │   ├── logger.py
    │   ├── demo_utils.py
    │   ├── allreduce_norm.py
    │   ├── metric.py
    │   ├── model_utils.py
    │   ├── cluster_nms.py
    │   ├── visualize.py
    │   └── box_ops.py
    ├── tracking_utils
    │   ├── timer.py
    │   ├── io.py
    │   └── evaluation.py
    └── tracker
    │   └── basetrack.py
├── requirements.txt
├── exps
    ├── default
    │   ├── yolox_l.py
    │   ├── yolox_m.py
    │   ├── yolox_s.py
    │   ├── yolox_x.py
    │   ├── yolox_tiny.py
    │   ├── nano.py
    │   └── yolov3.py
    └── example
    │   └── mot
    │       ├── yolox_x_diffusion_det_mot17.py
    │       ├── yolox_x_diffusion_det_mot20.py
    │       ├── yolox_x_diffusion_det_dancetrack.py
    │       ├── yolox_x_diffusion_det_mot17_ablation.py
    │       ├── yolox_x_diffusion_track_mot17.py
    │       ├── yolox_x_diffusion_track_mot17_baseline.py
    │       ├── yolox_x_diffusion_track_mot20.py
    │       ├── yolox_x_diffusion_track_dancetrack.py
    │       ├── yolox_x_diffusion_track_mot20_baseline.py
    │       ├── yolox_x_diffusion_track_dancetrack_baseline.py
    │       └── yolox_x_diffusion_track_mot17_ablation.py
├── tools
    ├── convert_video.py
    ├── mix_data_bdd100k.py
    ├── convert_crowdhuman_to_coco.py
    ├── convert_ethz_to_coco.py
    ├── convert_cityperson_to_coco.py
    ├── mix_data_test_mot20.py
    ├── mix_data_ablation.py
    ├── mota.py
    ├── train.py
    ├── mix_data_test_mot17.py
    └── convert_bdd100k_to_coco.py
├── setup.py
├── .gitignore
└── diffusion
    └── models
        └── diffusionnet.py


/.gitattributes:
--------------------------------------------------------------------------------
1 | README.assets/MOT20.gif filter=lfs diff=lfs merge=lfs -text
2 | README.assets/dancetrack.gif filter=lfs diff=lfs merge=lfs -text
3 | 


--------------------------------------------------------------------------------
/README.assets/image-20230819130751450.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RainBowLuoCS/DiffusionTrack/HEAD/README.assets/image-20230819130751450.png


--------------------------------------------------------------------------------
/README.assets/image-20230819134931428.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RainBowLuoCS/DiffusionTrack/HEAD/README.assets/image-20230819134931428.png


--------------------------------------------------------------------------------
/README.assets/MOT20.gif:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:d7944be05fb6e8f06b5b5c5f348febf24a738d9dc5267824dc49dff5cc56b101
3 | size 83313257
4 | 


--------------------------------------------------------------------------------
/README.assets/dancetrack.gif:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:94c7e7accd4ff802dd8635834085b00ec9e9597c95c75580c0eacde22816ce17
3 | size 76207196
4 | 


--------------------------------------------------------------------------------
/yolox/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding:utf-8 -*-
3 | 
4 | from .utils import configure_module
5 | 
6 | configure_module()
7 | 
8 | __version__ = "0.1.0"
9 | 


--------------------------------------------------------------------------------
/yolox/core/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding:utf-8 -*-
3 | # Copyright (c) Megvii, Inc. and its affiliates.
4 | 
5 | from .launch import launch
6 | from .trainer import Trainer
7 | 


--------------------------------------------------------------------------------
/yolox/layers/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding:utf-8 -*-
3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
4 | 
5 | from .fast_coco_eval_api import COCOeval_opt
6 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | numpy
 2 | torch>=1.7
 3 | opencv_python
 4 | loguru
 5 | scikit-image
 6 | tqdm
 7 | torchvision>=0.10.0
 8 | Pillow
 9 | thop
10 | ninja
11 | tabulate
12 | tensorboard
13 | lap
14 | motmetrics
15 | filterpy
16 | h5py
17 | 


--------------------------------------------------------------------------------
/yolox/exp/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding:utf-8 -*-
3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
4 | 
5 | from .base_exp import BaseExp
6 | from .build import get_exp
7 | from .yolox_base import Exp
8 | 


--------------------------------------------------------------------------------
/yolox/evaluators/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding:utf-8 -*-
3 | # Copyright (c) Megvii, Inc. and its affiliates.
4 | 
5 | from .coco_evaluator import COCOEvaluator
6 | from .diffusion_mot_evaluator import DiffusionMOTEvaluator
7 | from .diffusion_mot_evaluator_kl import DiffusionMOTEvaluatorKL
8 | 


--------------------------------------------------------------------------------
/yolox/data/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding:utf-8 -*-
3 | # Copyright (c) Megvii, Inc. and its affiliates.
4 | 
5 | from .datasets_wrapper import ConcatDataset, Dataset, MixConcatDataset
6 | from .mosaicdetection import MosaicDetection,DiffusionMosaicDetection
7 | from .mot import MOTDataset
8 | 


--------------------------------------------------------------------------------
/yolox/models/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 4 | 
 5 | from .darknet import CSPDarknet, Darknet
 6 | from .losses import IOUloss
 7 | from .yolo_fpn import YOLOFPN
 8 | from .yolo_head import YOLOXHead
 9 | from .yolo_pafpn import YOLOPAFPN
10 | from .yolox import YOLOX
11 | 


--------------------------------------------------------------------------------
/yolox/data/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) Megvii, Inc. and its affiliates.
 4 | 
 5 | from .data_augment import TrainTransform, ValTransform,DiffusionValTransform,DiffusionTrainTransform
 6 | from .data_prefetcher import DataPrefetcher
 7 | from .dataloading import DataLoader, get_yolox_datadir
 8 | from .datasets import *
 9 | from .samplers import InfiniteSampler, YoloBatchSampler
10 | 


--------------------------------------------------------------------------------
/exps/default/yolox_l.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) Megvii, Inc. and its affiliates.
 4 | 
 5 | import os
 6 | 
 7 | from yolox.exp import Exp as MyExp
 8 | 
 9 | 
10 | class Exp(MyExp):
11 |     def __init__(self):
12 |         super(Exp, self).__init__()
13 |         self.depth = 1.0
14 |         self.width = 1.0
15 |         self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
16 | 


--------------------------------------------------------------------------------
/exps/default/yolox_m.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) Megvii, Inc. and its affiliates.
 4 | 
 5 | import os
 6 | 
 7 | from yolox.exp import Exp as MyExp
 8 | 
 9 | 
10 | class Exp(MyExp):
11 |     def __init__(self):
12 |         super(Exp, self).__init__()
13 |         self.depth = 0.67
14 |         self.width = 0.75
15 |         self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
16 | 


--------------------------------------------------------------------------------
/exps/default/yolox_s.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) Megvii, Inc. and its affiliates.
 4 | 
 5 | import os
 6 | 
 7 | from yolox.exp import Exp as MyExp
 8 | 
 9 | 
10 | class Exp(MyExp):
11 |     def __init__(self):
12 |         super(Exp, self).__init__()
13 |         self.depth = 0.33
14 |         self.width = 0.50
15 |         self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
16 | 


--------------------------------------------------------------------------------
/exps/default/yolox_x.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) Megvii, Inc. and its affiliates.
 4 | 
 5 | import os
 6 | 
 7 | from yolox.exp import Exp as MyExp
 8 | 
 9 | 
10 | class Exp(MyExp):
11 |     def __init__(self):
12 |         super(Exp, self).__init__()
13 |         self.depth = 1.33
14 |         self.width = 1.25
15 |         self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
16 | 


--------------------------------------------------------------------------------
/yolox/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 4 | 
 5 | from .allreduce_norm import *
 6 | from .boxes import *
 7 | from .checkpoint import load_ckpt, save_checkpoint
 8 | from .demo_utils import *
 9 | from .dist import *
10 | from .ema import ModelEMA
11 | from .logger import setup_logger
12 | from .lr_scheduler import LRScheduler
13 | from .metric import *
14 | from .model_utils import *
15 | from .setup_env import *
16 | from .visualize import *
17 | 


--------------------------------------------------------------------------------
/exps/default/yolox_tiny.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) Megvii, Inc. and its affiliates.
 4 | 
 5 | import os
 6 | 
 7 | from yolox.exp import Exp as MyExp
 8 | 
 9 | 
10 | class Exp(MyExp):
11 |     def __init__(self):
12 |         super(Exp, self).__init__()
13 |         self.depth = 0.33
14 |         self.width = 0.375
15 |         self.scale = (0.5, 1.5)
16 |         self.random_size = (10, 20)
17 |         self.test_size = (416, 416)
18 |         self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
19 |         self.enable_mixup = False
20 | 


--------------------------------------------------------------------------------
/yolox/layers/csrc/vision.cpp:
--------------------------------------------------------------------------------
 1 | #include "cocoeval/cocoeval.h"
 2 | 
 3 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
 4 |     m.def("COCOevalAccumulate", &COCOeval::Accumulate, "COCOeval::Accumulate");
 5 |     m.def(
 6 |         "COCOevalEvaluateImages",
 7 |         &COCOeval::EvaluateImages,
 8 |         "COCOeval::EvaluateImages");
 9 |     pybind11::class_<COCOeval::InstanceAnnotation>(m, "InstanceAnnotation")
10 |         .def(pybind11::init<uint64_t, double, double, bool, bool>());
11 |     pybind11::class_<COCOeval::ImageEvaluation>(m, "ImageEvaluation")
12 |         .def(pybind11::init<>());
13 | }
14 | 


--------------------------------------------------------------------------------
/tools/convert_video.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | 
 3 | def convert_video(video_path):
 4 |     cap = cv2.VideoCapture(video_path)
 5 |     width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)  # float
 6 |     height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)  # float
 7 |     fps = cap.get(cv2.CAP_PROP_FPS)
 8 |     video_name = video_path.split('/')[-1].split('.')[0]
 9 |     save_name = video_name + '_converted'
10 |     save_path = video_path.replace(video_name, save_name)
11 |     vid_writer = cv2.VideoWriter(
12 |         save_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (int(width), int(height))
13 |     )
14 |     while True:
15 |         ret_val, frame = cap.read()
16 |         if ret_val:
17 |             vid_writer.write(frame)
18 |             ch = cv2.waitKey(1)
19 |             if ch == 27 or ch == ord("q") or ch == ord("Q"):
20 |                 break
21 |         else:
22 |             break
23 | 
24 | if __name__ == "__main__":
25 |     video_path = 'videos/palace.mp4'
26 |     convert_video(video_path)


--------------------------------------------------------------------------------
/yolox/tracking_utils/timer.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | 
 4 | class Timer(object):
 5 |     """A simple timer."""
 6 |     def __init__(self):
 7 |         self.total_time = 0.
 8 |         self.calls = 0
 9 |         self.start_time = 0.
10 |         self.diff = 0.
11 |         self.average_time = 0.
12 | 
13 |         self.duration = 0.
14 | 
15 |     def tic(self):
16 |         # using time.time instead of time.clock because time time.clock
17 |         # does not normalize for multithreading
18 |         self.start_time = time.time()
19 | 
20 |     def toc(self, average=True):
21 |         self.diff = time.time() - self.start_time
22 |         self.total_time += self.diff
23 |         self.calls += 1
24 |         self.average_time = self.total_time / self.calls
25 |         if average:
26 |             self.duration = self.average_time
27 |         else:
28 |             self.duration = self.diff
29 |         return self.duration
30 | 
31 |     def clear(self):
32 |         self.total_time = 0.
33 |         self.calls = 0
34 |         self.start_time = 0.
35 |         self.diff = 0.
36 |         self.average_time = 0.
37 |         self.duration = 0.


--------------------------------------------------------------------------------
/yolox/tracker/basetrack.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from collections import OrderedDict
 3 | 
 4 | 
 5 | class TrackState(object):
 6 |     New = 0
 7 |     Tracked = 1
 8 |     Lost = 2
 9 |     Removed = 3
10 | 
11 | 
12 | class BaseTrack(object):
13 |     _count = 0
14 | 
15 |     track_id = 0
16 |     is_activated = False
17 |     state = TrackState.New
18 | 
19 |     history = OrderedDict()
20 |     features = []
21 |     curr_feature = None
22 |     score = 0
23 |     start_frame = 0
24 |     frame_id = 0
25 |     time_since_update = 0
26 | 
27 |     # multi-camera
28 |     location = (np.inf, np.inf)
29 | 
30 |     @property
31 |     def end_frame(self):
32 |         return self.frame_id
33 | 
34 |     @staticmethod
35 |     def next_id():
36 |         BaseTrack._count += 1
37 |         return BaseTrack._count
38 | 
39 |     def activate(self, *args):
40 |         raise NotImplementedError
41 | 
42 |     def predict(self):
43 |         raise NotImplementedError
44 | 
45 |     def update(self, *args, **kwargs):
46 |         raise NotImplementedError
47 | 
48 |     def mark_lost(self):
49 |         self.state = TrackState.Lost
50 | 
51 |     def mark_removed(self):
52 |         self.state = TrackState.Removed
53 | 


--------------------------------------------------------------------------------
/exps/default/nano.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) Megvii, Inc. and its affiliates.
 4 | 
 5 | import os
 6 | import torch.nn as nn
 7 | 
 8 | from yolox.exp import Exp as MyExp
 9 | 
10 | 
11 | class Exp(MyExp):
12 |     def __init__(self):
13 |         super(Exp, self).__init__()
14 |         self.depth = 0.33
15 |         self.width = 0.25
16 |         self.scale = (0.5, 1.5)
17 |         self.random_size = (10, 20)
18 |         self.test_size = (416, 416)
19 |         self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
20 |         self.enable_mixup = False
21 | 
22 |     def get_model(self, sublinear=False):
23 | 
24 |         def init_yolo(M):
25 |             for m in M.modules():
26 |                 if isinstance(m, nn.BatchNorm2d):
27 |                     m.eps = 1e-3
28 |                     m.momentum = 0.03
29 |         if "model" not in self.__dict__:
30 |             from yolox.models import YOLOX, YOLOPAFPN, YOLOXHead
31 |             in_channels = [256, 512, 1024]
32 |             # NANO model use depthwise = True, which is main difference.
33 |             backbone = YOLOPAFPN(self.depth, self.width, in_channels=in_channels, depthwise=True)
34 |             head = YOLOXHead(self.num_classes, self.width, in_channels=in_channels, depthwise=True)
35 |             self.model = YOLOX(backbone, head)
36 | 
37 |         self.model.apply(init_yolo)
38 |         self.model.head.initialize_biases(1e-2)
39 |         return self.model
40 | 


--------------------------------------------------------------------------------
/yolox/utils/checkpoint.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 4 | from loguru import logger
 5 | 
 6 | import torch
 7 | 
 8 | import os
 9 | import shutil
10 | 
11 | 
12 | def load_ckpt(model, ckpt):
13 |     model_state_dict = model.state_dict()
14 |     load_dict = {}
15 |     for key_model, v in model_state_dict.items():
16 |         if key_model not in ckpt:
17 |             logger.warning(
18 |                 "{} is not in the ckpt. Please double check and see if this is desired.".format(
19 |                     key_model
20 |                 )
21 |             )
22 |             continue
23 |         v_ckpt = ckpt[key_model]
24 |         if v.shape != v_ckpt.shape:
25 |             logger.warning(
26 |                 "Shape of {} in checkpoint is {}, while shape of {} in model is {}.".format(
27 |                     key_model, v_ckpt.shape, key_model, v.shape
28 |                 )
29 |             )
30 |             continue
31 |         load_dict[key_model] = v_ckpt
32 | 
33 |     model.load_state_dict(load_dict, strict=False)
34 |     return model
35 | 
36 | 
37 | def save_checkpoint(state, is_best, save_dir, model_name=""):
38 |     if not os.path.exists(save_dir):
39 |         os.makedirs(save_dir)
40 |     filename = os.path.join(save_dir, model_name + "_ckpt.pth.tar")
41 |     torch.save(state, filename)
42 |     if is_best:
43 |         best_filename = os.path.join(save_dir, "best_ckpt.pth.tar")
44 |         shutil.copyfile(filename, best_filename)
45 | 


--------------------------------------------------------------------------------
/yolox/models/yolox.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- encoding: utf-8 -*-
 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 4 | 
 5 | import torch.nn as nn
 6 | 
 7 | from .yolo_head import YOLOXHead
 8 | from .yolo_pafpn import YOLOPAFPN
 9 | 
10 | class YOLOX(nn.Module):
11 |     """
12 |     YOLOX model module. The module list is defined by create_yolov3_modules function.
13 |     The network returns loss values from three YOLO layers during training
14 |     and detection results during test.
15 |     """
16 | 
17 |     def __init__(self, backbone=None, head=None):
18 |         super().__init__()
19 |         if backbone is None:
20 |             backbone = YOLOPAFPN()
21 |         if head is None:
22 |             head = YOLOXHead(80)
23 | 
24 |         self.backbone = backbone
25 |         self.head = head
26 | 
27 |     def forward(self, x, targets=None):
28 |         # fpn output content features of [dark3, dark4, dark5]
29 |         fpn_outs = self.backbone(x)
30 | 
31 |         if self.training:
32 |             assert targets is not None
33 |             loss, iou_loss, conf_loss, cls_loss, l1_loss, num_fg = self.head(
34 |                 fpn_outs, targets, x
35 |             )
36 |             outputs = {
37 |                 "total_loss": loss,
38 |                 "iou_loss": iou_loss,
39 |                 "l1_loss": l1_loss,
40 |                 "conf_loss": conf_loss,
41 |                 "cls_loss": cls_loss,
42 |                 "num_fg": num_fg,
43 |             }
44 |         else:
45 |             outputs = self.head(fpn_outs)
46 | 
47 |         return outputs
48 | 


--------------------------------------------------------------------------------
/yolox/exp/build.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 4 | 
 5 | import importlib
 6 | import os
 7 | import sys
 8 | 
 9 | 
10 | def get_exp_by_file(exp_file):
11 |     try:
12 |         sys.path.append(os.path.dirname(exp_file))
13 |         current_exp = importlib.import_module(os.path.basename(exp_file).split(".")[0])
14 |         exp = current_exp.Exp()
15 |     except Exception:
16 |         raise ImportError("{} doesn't contains class named 'Exp'".format(exp_file))
17 |     return exp
18 | 
19 | 
20 | def get_exp_by_name(exp_name):
21 |     import yolox
22 | 
23 |     yolox_path = os.path.dirname(os.path.dirname(yolox.__file__))
24 |     filedict = {
25 |         "yolox-s": "yolox_s.py",
26 |         "yolox-m": "yolox_m.py",
27 |         "yolox-l": "yolox_l.py",
28 |         "yolox-x": "yolox_x.py",
29 |         "yolox-tiny": "yolox_tiny.py",
30 |         "yolox-nano": "nano.py",
31 |         "yolov3": "yolov3.py",
32 |     }
33 |     filename = filedict[exp_name]
34 |     exp_path = os.path.join(yolox_path, "exps", "default", filename)
35 |     return get_exp_by_file(exp_path)
36 | 
37 | 
38 | def get_exp(exp_file, exp_name):
39 |     """
40 |     get Exp object by file or name. If exp_file and exp_name
41 |     are both provided, get Exp by exp_file.
42 | 
43 |     Args:
44 |         exp_file (str): file path of experiment.
45 |         exp_name (str): name of experiment. "yolo-s",
46 |     """
47 |     assert (
48 |         exp_file is not None or exp_name is not None
49 |     ), "plz provide exp file or exp name."
50 |     if exp_file is not None:
51 |         return get_exp_by_file(exp_file)
52 |     else:
53 |         return get_exp_by_name(exp_name)
54 | 


--------------------------------------------------------------------------------
/yolox/utils/setup_env.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 4 | 
 5 | import cv2
 6 | 
 7 | import os
 8 | import subprocess
 9 | 
10 | __all__ = ["configure_nccl", "configure_module"]
11 | 
12 | 
13 | def configure_nccl():
14 |     """Configure multi-machine environment variables of NCCL."""
15 |     os.environ["NCCL_LAUNCH_MODE"] = "PARALLEL"
16 |     os.environ["NCCL_IB_HCA"] = subprocess.getoutput(
17 |         "pushd /sys/class/infiniband/ > /dev/null; for i in mlx5_*; "
18 |         "do cat $i/ports/1/gid_attrs/types/* 2>/dev/null "
19 |         "| grep v >/dev/null && echo $i ; done; popd > /dev/null"
20 |     )
21 |     os.environ["NCCL_IB_GID_INDEX"] = "3"
22 |     os.environ["NCCL_IB_TC"] = "106"
23 | 
24 | 
25 | def configure_module(ulimit_value=8192):
26 |     """
27 |     Configure pytorch module environment. setting of ulimit and cv2 will be set.
28 | 
29 |     Args:
30 |         ulimit_value(int): default open file number on linux. Default value: 8192.
31 |     """
32 |     # system setting
33 |     try:
34 |         import resource
35 | 
36 |         rlimit = resource.getrlimit(resource.RLIMIT_NOFILE)
37 |         resource.setrlimit(resource.RLIMIT_NOFILE, (ulimit_value, rlimit[1]))
38 |     except Exception:
39 |         # Exception might be raised in Windows OS or rlimit reaches max limit number.
40 |         # However, set rlimit value might not be necessary.
41 |         pass
42 | 
43 |     # cv2
44 |     # multiprocess might be harmful on performance of torch dataloader
45 |     os.environ["OPENCV_OPENCL_RUNTIME"] = "disabled"
46 |     try:
47 |         cv2.setNumThreads(0)
48 |         cv2.ocl.setUseOpenCL(False)
49 |     except Exception:
50 |         # cv2 version mismatch might rasie exceptions.
51 |         pass
52 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # Copyright (c) Megvii, Inc. and its affiliates. All Rights Reserved
 3 | 
 4 | import re
 5 | import setuptools
 6 | import glob
 7 | from os import path
 8 | import torch
 9 | from torch.utils.cpp_extension import CppExtension
10 | 
11 | torch_ver = [int(x) for x in torch.__version__.split(".")[:2]]
12 | assert torch_ver >= [1, 3], "Requires PyTorch >= 1.3"
13 | 
14 | 
15 | def get_extensions():
16 |     this_dir = path.dirname(path.abspath(__file__))
17 |     extensions_dir = path.join(this_dir, "yolox", "layers", "csrc")
18 | 
19 |     main_source = path.join(extensions_dir, "vision.cpp")
20 |     sources = glob.glob(path.join(extensions_dir, "**", "*.cpp"))
21 | 
22 |     sources = [main_source] + sources
23 |     extension = CppExtension
24 | 
25 |     extra_compile_args = {"cxx": ["-O3"]}
26 |     define_macros = []
27 | 
28 |     include_dirs = [extensions_dir]
29 | 
30 |     ext_modules = [
31 |         extension(
32 |             "yolox._C",
33 |             sources,
34 |             include_dirs=include_dirs,
35 |             define_macros=define_macros,
36 |             extra_compile_args=extra_compile_args,
37 |         )
38 |     ]
39 | 
40 |     return ext_modules
41 | 
42 | 
43 | with open("yolox/__init__.py", "r") as f:
44 |     version = re.search(
45 |         r'^__version__\s*=\s*[\'"]([^\'"]*)[\'"]',
46 |         f.read(), re.MULTILINE
47 |     ).group(1)
48 | 
49 | 
50 | # with open("README.md", "r") as f:
51 | #     long_description = f.read()
52 | 
53 | long_description="sss"
54 | setuptools.setup(
55 |     name="yolox",
56 |     version=version,
57 |     author="basedet team",
58 |     python_requires=">=3.6",
59 |     long_description=long_description,
60 |     ext_modules=get_extensions(),
61 |     classifiers=["Programming Language :: Python :: 3", "Operating System :: OS Independent"],
62 |     cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
63 |     packages=setuptools.find_namespace_packages(),
64 | )
65 | 


--------------------------------------------------------------------------------
/tools/mix_data_bdd100k.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | import numpy as np
 4 | 
 5 | """
 6 | cd datasets
 7 | mkdir -p mix_det/annotations
 8 | cp mot/annotations/val_half.json mix_det/annotations/val_half.json
 9 | cp mot/annotations/test.json mix_det/annotations/test.json
10 | cd mix_det
11 | ln -s ../mot/train mot_train
12 | ln -s ../crowdhuman/CrowdHuman_train crowdhuman_train
13 | ln -s ../crowdhuman/CrowdHuman_val crowdhuman_val
14 | ln -s ../Cityscapes cp_train
15 | ln -s ../ETHZ ethz_train
16 | cd ..
17 | """
18 | 
19 | bdd100ktrain_json = json.load(open('datasets/bdd100k/annotations/mix_train_val.json','r'))
20 | # need_index=np.random.choice(range(len(bdd100ktrain_json['images'])),len(bdd100ktrain_json['images'])//3,replace=False)
21 | # need_img_ids={}
22 | img_list = list()
23 | for img in bdd100ktrain_json['images']:
24 |     img['is_video']=1
25 |     img_list.append(img)
26 |     # need_img_ids[bdd100ktrain_json['images'][img_idx]['id']]=1
27 | 
28 | ann_list = list()
29 | for ann in bdd100ktrain_json['annotations']:
30 |     # if ann['image_id'] in need_img_ids:
31 |     ann_list.append(ann)
32 | 
33 | video_list = bdd100ktrain_json['videos']
34 | category_list = bdd100ktrain_json['categories']
35 | 
36 | 
37 | print('bdd100ktrain')
38 | 
39 | max_img = len(img_list)
40 | max_ann = len(ann_list)
41 | max_video = len(video_list)
42 | 
43 | bdd100kval_json = json.load(open('datasets/bdd100k/annotations/val.json','r'))
44 | for img in bdd100kval_json['images']:
45 |     img['prev_image_id'] = img['prev_image_id'] + max_img
46 |     img['next_image_id'] = img['next_image_id'] + max_img
47 |     img['id'] = img['id'] + max_img
48 |     img['video_id']+= max_video
49 |     img['is_video']=1
50 |     img_list.append(img)
51 |     
52 | for ann in bdd100kval_json['annotations']:
53 |     ann['id'] = ann['id'] + max_ann
54 |     ann['image_id'] = ann['image_id'] + max_img
55 |     ann_list.append(ann)
56 | 
57 | for vid in bdd100kval_json['videos']:
58 |     vid['id']+=max_video
59 |     video_list.append(vid)
60 | 
61 | print('bdd100ktest')
62 | 
63 | mix_json = dict()
64 | mix_json['images'] = img_list
65 | mix_json['annotations'] = ann_list
66 | mix_json['videos'] = video_list
67 | mix_json['categories'] = category_list
68 | json.dump(mix_json, open('datasets/bdd100k/annotations/mix_train_val.json','w'))
69 | 


--------------------------------------------------------------------------------
/yolox/exp/base_exp.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 4 | 
 5 | import torch
 6 | from torch.nn import Module
 7 | 
 8 | from yolox.utils import LRScheduler
 9 | 
10 | import ast
11 | import pprint
12 | from abc import ABCMeta, abstractmethod
13 | from tabulate import tabulate
14 | from typing import Dict
15 | 
16 | 
17 | class BaseExp(metaclass=ABCMeta):
18 |     """Basic class for any experiment."""
19 | 
20 |     def __init__(self):
21 |         self.seed = None
22 |         self.output_dir = "./DiffusionTrack_outputs"
23 |         self.print_interval = 100
24 |         self.eval_interval = 10
25 | 
26 |     @abstractmethod
27 |     def get_model(self) -> Module:
28 |         pass
29 | 
30 |     @abstractmethod
31 |     def get_data_loader(
32 |         self, batch_size: int, is_distributed: bool
33 |     ) -> Dict[str, torch.utils.data.DataLoader]:
34 |         pass
35 | 
36 |     @abstractmethod
37 |     def get_optimizer(self, batch_size: int) -> torch.optim.Optimizer:
38 |         pass
39 | 
40 |     @abstractmethod
41 |     def get_lr_scheduler(
42 |         self, lr: float, iters_per_epoch: int, **kwargs
43 |     ) -> LRScheduler:
44 |         pass
45 | 
46 |     @abstractmethod
47 |     def get_evaluator(self):
48 |         pass
49 | 
50 |     @abstractmethod
51 |     def eval(self, model, evaluator, weights):
52 |         pass
53 | 
54 |     def __repr__(self):
55 |         table_header = ["keys", "values"]
56 |         exp_table = [
57 |             (str(k), pprint.pformat(v))
58 |             for k, v in vars(self).items()
59 |             if not k.startswith("_")
60 |         ]
61 |         return tabulate(exp_table, headers=table_header, tablefmt="fancy_grid")
62 | 
63 |     def merge(self, cfg_list):
64 |         assert len(cfg_list) % 2 == 0
65 |         for k, v in zip(cfg_list[0::2], cfg_list[1::2]):
66 |             # only update value with same key
67 |             if hasattr(self, k):
68 |                 src_value = getattr(self, k)
69 |                 src_type = type(src_value)
70 |                 if src_value is not None and src_type != type(v):
71 |                     try:
72 |                         v = src_type(v)
73 |                     except Exception:
74 |                         v = ast.literal_eval(v)
75 |                 setattr(self, k, v)
76 | 


--------------------------------------------------------------------------------
/tools/convert_crowdhuman_to_coco.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | import json
 4 | from PIL import Image
 5 | 
 6 | DATA_PATH = 'datasets/crowdhuman/'
 7 | OUT_PATH = DATA_PATH + 'annotations/'
 8 | SPLITS = ['val', 'train']
 9 | DEBUG = False
10 | 
11 | def load_func(fpath):
12 |     print('fpath', fpath)
13 |     assert os.path.exists(fpath)
14 |     with open(fpath,'r') as fid:
15 |         lines = fid.readlines()
16 |     records =[json.loads(line.strip('\n')) for line in lines]
17 |     return records
18 | 
19 | if __name__ == '__main__':
20 |     if not os.path.exists(OUT_PATH):
21 |         os.mkdir(OUT_PATH)
22 |     for split in SPLITS:
23 |         data_path = DATA_PATH + split
24 |         out_path = OUT_PATH + '{}.json'.format(split)
25 |         out = {'images': [], 'annotations': [], 'categories': [{'id': 1, 'name': 'person'}]}
26 |         ann_path = DATA_PATH + 'annotation_{}.odgt'.format(split)
27 |         anns_data = load_func(ann_path)
28 |         image_cnt = 0
29 |         ann_cnt = 0
30 |         video_cnt = 0
31 |         for ann_data in anns_data:
32 |             image_cnt += 1
33 |             file_path = DATA_PATH + 'CrowdHuman_{}/Images/'.format(split) + '{}.jpg'.format(ann_data['ID'])
34 |             im = Image.open(file_path)
35 |             image_info = {'file_name': '{}.jpg'.format(ann_data['ID']), 
36 |                           'id': image_cnt,
37 |                           'height': im.size[1], 
38 |                           'width': im.size[0]}
39 |             out['images'].append(image_info)
40 |             if split != 'test':
41 |                 anns = ann_data['gtboxes']
42 |                 for i in range(len(anns)):
43 |                     ann_cnt += 1
44 |                     fbox = anns[i]['fbox']
45 |                     ann = {'id': ann_cnt,
46 |                          'category_id': 1,
47 |                          'image_id': image_cnt,
48 |                          'track_id': -1,
49 |                          'bbox_vis': anns[i]['vbox'],
50 |                          'bbox': fbox,
51 |                          'area': fbox[2] * fbox[3],
52 |                          'iscrowd': 1 if 'extra' in anns[i] and \
53 |                                          'ignore' in anns[i]['extra'] and \
54 |                                          anns[i]['extra']['ignore'] == 1 else 0}
55 |                     out['annotations'].append(ann)
56 |         print('loaded {} for {} images and {} samples'.format(split, len(out['images']), len(out['annotations'])))
57 |         json.dump(out, open(out_path, 'w'))


--------------------------------------------------------------------------------
/tools/convert_ethz_to_coco.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | import json
 4 | from PIL import Image
 5 | 
 6 | DATA_PATH = 'datasets/ETHZ/'
 7 | DATA_FILE_PATH = 'datasets/data_path/eth.train'
 8 | OUT_PATH = DATA_PATH + 'annotations/'
 9 | 
10 | def load_paths(data_path):
11 |     with open(data_path, 'r') as file:
12 |         img_files = file.readlines()
13 |         img_files = [x.replace('\n', '') for x in img_files]
14 |         img_files = list(filter(lambda x: len(x) > 0, img_files))
15 |     label_files = [x.replace('images', 'labels_with_ids').replace('.png', '.txt').replace('.jpg', '.txt') for x in img_files]
16 |     return img_files, label_files                    
17 | 
18 | if __name__ == '__main__':
19 |     if not os.path.exists(OUT_PATH):
20 |         os.mkdir(OUT_PATH)
21 | 
22 |     out_path = OUT_PATH + 'train.json'
23 |     out = {'images': [], 'annotations': [], 'categories': [{'id': 1, 'name': 'person'}]}
24 |     img_paths, label_paths = load_paths(DATA_FILE_PATH)
25 |     image_cnt = 0
26 |     ann_cnt = 0
27 |     video_cnt = 0
28 |     for img_path, label_path in zip(img_paths, label_paths):
29 |         image_cnt += 1
30 |         im = Image.open(os.path.join("datasets", img_path))
31 |         image_info = {'file_name': img_path, 
32 |                         'id': image_cnt,
33 |                         'height': im.size[1], 
34 |                         'width': im.size[0]}
35 |         out['images'].append(image_info)
36 |         # Load labels
37 |         if os.path.isfile(os.path.join("datasets", label_path)):
38 |             labels0 = np.loadtxt(os.path.join("datasets", label_path), dtype=np.float32).reshape(-1, 6)
39 |             # Normalized xywh to pixel xyxy format
40 |             labels = labels0.copy()
41 |             labels[:, 2] = image_info['width'] * (labels0[:, 2] - labels0[:, 4] / 2)
42 |             labels[:, 3] = image_info['height'] * (labels0[:, 3] - labels0[:, 5] / 2)
43 |             labels[:, 4] = image_info['width'] * labels0[:, 4]
44 |             labels[:, 5] = image_info['height'] * labels0[:, 5]
45 |         else:
46 |             labels = np.array([])
47 |         for i in range(len(labels)):
48 |             ann_cnt += 1
49 |             fbox = labels[i, 2:6].tolist()
50 |             ann = {'id': ann_cnt,
51 |                     'category_id': 1,
52 |                     'image_id': image_cnt,
53 |                     'track_id': -1,
54 |                     'bbox': fbox,
55 |                     'area': fbox[2] * fbox[3],
56 |                     'iscrowd': 0}
57 |             out['annotations'].append(ann)
58 |     print('loaded train for {} images and {} samples'.format(len(out['images']), len(out['annotations'])))
59 |     json.dump(out, open(out_path, 'w'))
60 | 


--------------------------------------------------------------------------------
/tools/convert_cityperson_to_coco.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | import json
 4 | from PIL import Image
 5 | 
 6 | DATA_PATH = 'datasets/Cityscapes/'
 7 | DATA_FILE_PATH = 'datasets/data_path/citypersons.train'
 8 | OUT_PATH = DATA_PATH + 'annotations/'
 9 | 
10 | def load_paths(data_path):
11 |     with open(data_path, 'r') as file:
12 |         img_files = file.readlines()
13 |         img_files = [x.replace('\n', '') for x in img_files]
14 |         img_files = list(filter(lambda x: len(x) > 0, img_files))
15 |     label_files = [x.replace('images', 'labels_with_ids').replace('.png', '.txt').replace('.jpg', '.txt') for x in img_files]
16 |     return img_files, label_files                    
17 | 
18 | if __name__ == '__main__':
19 |     if not os.path.exists(OUT_PATH):
20 |         os.mkdir(OUT_PATH)
21 | 
22 |     out_path = OUT_PATH + 'train.json'
23 |     out = {'images': [], 'annotations': [], 'categories': [{'id': 1, 'name': 'person'}]}
24 |     img_paths, label_paths = load_paths(DATA_FILE_PATH)
25 |     image_cnt = 0
26 |     ann_cnt = 0
27 |     video_cnt = 0
28 |     for img_path, label_path in zip(img_paths, label_paths):
29 |         image_cnt += 1
30 |         im = Image.open(os.path.join("datasets", img_path))
31 |         image_info = {'file_name': img_path, 
32 |                         'id': image_cnt,
33 |                         'height': im.size[1], 
34 |                         'width': im.size[0]}
35 |         out['images'].append(image_info)
36 |         # Load labels
37 |         if os.path.isfile(os.path.join("datasets", label_path)):
38 |             labels0 = np.loadtxt(os.path.join("datasets", label_path), dtype=np.float32).reshape(-1, 6)
39 |             # Normalized xywh to pixel xyxy format
40 |             labels = labels0.copy()
41 |             labels[:, 2] = image_info['width'] * (labels0[:, 2] - labels0[:, 4] / 2)
42 |             labels[:, 3] = image_info['height'] * (labels0[:, 3] - labels0[:, 5] / 2)
43 |             labels[:, 4] = image_info['width'] * labels0[:, 4]
44 |             labels[:, 5] = image_info['height'] * labels0[:, 5]
45 |         else:
46 |             labels = np.array([])
47 |         for i in range(len(labels)):
48 |             ann_cnt += 1
49 |             fbox = labels[i, 2:6].tolist()
50 |             ann = {'id': ann_cnt,
51 |                     'category_id': 1,
52 |                     'image_id': image_cnt,
53 |                     'track_id': -1,
54 |                     'bbox': fbox,
55 |                     'area': fbox[2] * fbox[3],
56 |                     'iscrowd': 0}
57 |             out['annotations'].append(ann)
58 |     print('loaded train for {} images and {} samples'.format(len(out['images']), len(out['annotations'])))
59 |     json.dump(out, open(out_path, 'w'))
60 | 


--------------------------------------------------------------------------------
/tools/mix_data_test_mot20.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | 
 4 | 
 5 | """
 6 | cd datasets
 7 | mkdir -p mix_mot20_ch/annotations
 8 | cp MOT20/annotations/val_half.json mix_mot20_ch/annotations/val_half.json
 9 | cp MOT20/annotations/test.json mix_mot20_ch/annotations/test.json
10 | cd mix_mot20_ch
11 | ln -s ../MOT20/train mot20_train
12 | ln -s ../crowdhuman/CrowdHuman_train crowdhuman_train
13 | ln -s ../crowdhuman/CrowdHuman_val crowdhuman_val
14 | cd ..
15 | """
16 | 
17 | mot_json = json.load(open('datasets/MOT20/annotations/train.json','r'))
18 | 
19 | img_list = list()
20 | for img in mot_json['images']:
21 |     img['file_name'] = 'mot20_train/' + img['file_name']
22 |     img_list.append(img)
23 | 
24 | ann_list = list()
25 | for ann in mot_json['annotations']:
26 |     ann_list.append(ann)
27 | 
28 | video_list = mot_json['videos']
29 | category_list = mot_json['categories']
30 | 
31 | 
32 | max_img = 10000
33 | max_ann = 2000000
34 | max_video = 10
35 | 
36 | crowdhuman_json = json.load(open('datasets/crowdhuman/annotations/train.json','r'))
37 | img_id_count = 0
38 | for img in crowdhuman_json['images']:
39 |     img_id_count += 1
40 |     img['file_name'] = 'crowdhuman_train/Images/' + img['file_name']
41 |     img['frame_id'] = img_id_count
42 |     img['prev_image_id'] = img['id'] + max_img
43 |     img['next_image_id'] = img['id'] + max_img
44 |     img['id'] = img['id'] + max_img
45 |     img['video_id'] = max_video
46 |     img_list.append(img)
47 |     
48 | for ann in crowdhuman_json['annotations']:
49 |     ann['id'] = ann['id'] + max_ann
50 |     ann['image_id'] = ann['image_id'] + max_img
51 |     ann_list.append(ann)
52 | 
53 | video_list.append({
54 |     'id': max_video,
55 |     'file_name': 'crowdhuman_train'
56 | })
57 | 
58 | 
59 | max_img = 30000
60 | max_ann = 10000000
61 | 
62 | crowdhuman_val_json = json.load(open('datasets/crowdhuman/annotations/val.json','r'))
63 | img_id_count = 0
64 | for img in crowdhuman_val_json['images']:
65 |     img_id_count += 1
66 |     img['file_name'] = 'crowdhuman_val/Images/' + img['file_name']
67 |     img['frame_id'] = img_id_count
68 |     img['prev_image_id'] = img['id'] + max_img
69 |     img['next_image_id'] = img['id'] + max_img
70 |     img['id'] = img['id'] + max_img
71 |     img['video_id'] = max_video
72 |     img_list.append(img)
73 |     
74 | for ann in crowdhuman_val_json['annotations']:
75 |     ann['id'] = ann['id'] + max_ann
76 |     ann['image_id'] = ann['image_id'] + max_img
77 |     ann_list.append(ann)
78 | 
79 | video_list.append({
80 |     'id': max_video,
81 |     'file_name': 'crowdhuman_val'
82 | })
83 | 
84 | mix_json = dict()
85 | mix_json['images'] = img_list
86 | mix_json['annotations'] = ann_list
87 | mix_json['videos'] = video_list
88 | mix_json['categories'] = category_list
89 | json.dump(mix_json, open('datasets/mix_mot20_ch/annotations/train.json','w'))


--------------------------------------------------------------------------------
/yolox/utils/ema.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 4 | import torch
 5 | import torch.nn as nn
 6 | 
 7 | import math
 8 | from copy import deepcopy
 9 | 
10 | 
11 | def is_parallel(model):
12 |     """check if model is in parallel mode."""
13 | 
14 |     parallel_type = (
15 |         nn.parallel.DataParallel,
16 |         nn.parallel.DistributedDataParallel,
17 |     )
18 |     return isinstance(model, parallel_type)
19 | 
20 | 
21 | def copy_attr(a, b, include=(), exclude=()):
22 |     # Copy attributes from b to a, options to only include [...] and to exclude [...]
23 |     for k, v in b.__dict__.items():
24 |         if (len(include) and k not in include) or k.startswith("_") or k in exclude:
25 |             continue
26 |         else:
27 |             setattr(a, k, v)
28 | 
29 | 
30 | class ModelEMA:
31 |     """
32 |     Model Exponential Moving Average from https://github.com/rwightman/pytorch-image-models
33 |     Keep a moving average of everything in the model state_dict (parameters and buffers).
34 |     This is intended to allow functionality like
35 |     https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage
36 |     A smoothed version of the weights is necessary for some training schemes to perform well.
37 |     This class is sensitive where it is initialized in the sequence of model init,
38 |     GPU assignment and distributed training wrappers.
39 |     """
40 | 
41 |     def __init__(self, model, decay=0.9999, updates=0):
42 |         """
43 |         Args:
44 |             model (nn.Module): model to apply EMA.
45 |             decay (float): ema decay reate.
46 |             updates (int): counter of EMA updates.
47 |         """
48 |         # Create EMA(FP32)
49 |         self.ema = deepcopy(model.module if is_parallel(model) else model).eval()
50 |         self.updates = updates
51 |         # decay exponential ramp (to help early epochs)
52 |         self.decay = lambda x: decay * (1 - math.exp(-x / 2000))
53 |         for p in self.ema.parameters():
54 |             p.requires_grad_(False)
55 | 
56 |     def update(self, model):
57 |         # Update EMA parameters
58 |         with torch.no_grad():
59 |             self.updates += 1
60 |             d = self.decay(self.updates)
61 | 
62 |             msd = (
63 |                 model.module.state_dict() if is_parallel(model) else model.state_dict()
64 |             )  # model state_dict
65 |             for k, v in self.ema.state_dict().items():
66 |                 if v.dtype.is_floating_point:
67 |                     v *= d
68 |                     v += (1.0 - d) * msd[k].detach()
69 | 
70 |     def update_attr(self, model, include=(), exclude=("process_group", "reducer")):
71 |         # Update EMA attributes
72 |         copy_attr(self.ema, model, include, exclude)
73 | 


--------------------------------------------------------------------------------
/yolox/models/yolo_fpn.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- encoding: utf-8 -*-
 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 4 | 
 5 | import torch
 6 | import torch.nn as nn
 7 | 
 8 | from .darknet import Darknet
 9 | from .network_blocks import BaseConv
10 | 
11 | 
12 | class YOLOFPN(nn.Module):
13 |     """
14 |     YOLOFPN module. Darknet 53 is the default backbone of this model.
15 |     """
16 | 
17 |     def __init__(
18 |         self,
19 |         depth=53,
20 |         in_features=["dark3", "dark4", "dark5"],
21 |     ):
22 |         super().__init__()
23 | 
24 |         self.backbone = Darknet(depth)
25 |         self.in_features = in_features
26 | 
27 |         # out 1
28 |         self.out1_cbl = self._make_cbl(512, 256, 1)
29 |         self.out1 = self._make_embedding([256, 512], 512 + 256)
30 | 
31 |         # out 2
32 |         self.out2_cbl = self._make_cbl(256, 128, 1)
33 |         self.out2 = self._make_embedding([128, 256], 256 + 128)
34 | 
35 |         # upsample
36 |         self.upsample = nn.Upsample(scale_factor=2, mode="nearest")
37 | 
38 |     def _make_cbl(self, _in, _out, ks):
39 |         return BaseConv(_in, _out, ks, stride=1, act="lrelu")
40 | 
41 |     def _make_embedding(self, filters_list, in_filters):
42 |         m = nn.Sequential(
43 |             *[
44 |                 self._make_cbl(in_filters, filters_list[0], 1),
45 |                 self._make_cbl(filters_list[0], filters_list[1], 3),
46 |                 self._make_cbl(filters_list[1], filters_list[0], 1),
47 |                 self._make_cbl(filters_list[0], filters_list[1], 3),
48 |                 self._make_cbl(filters_list[1], filters_list[0], 1),
49 |             ]
50 |         )
51 |         return m
52 | 
53 |     def load_pretrained_model(self, filename="./weights/darknet53.mix.pth"):
54 |         with open(filename, "rb") as f:
55 |             state_dict = torch.load(f, map_location="cpu")
56 |         print("loading pretrained weights...")
57 |         self.backbone.load_state_dict(state_dict)
58 | 
59 |     def forward(self, inputs):
60 |         """
61 |         Args:
62 |             inputs (Tensor): input image.
63 | 
64 |         Returns:
65 |             Tuple[Tensor]: FPN output features..
66 |         """
67 |         #  backbone
68 |         out_features = self.backbone(inputs)
69 |         x2, x1, x0 = [out_features[f] for f in self.in_features]
70 | 
71 |         #  yolo branch 1
72 |         x1_in = self.out1_cbl(x0)
73 |         x1_in = self.upsample(x1_in)
74 |         x1_in = torch.cat([x1_in, x1], 1)
75 |         out_dark4 = self.out1(x1_in)
76 | 
77 |         #  yolo branch 2
78 |         x2_in = self.out2_cbl(out_dark4)
79 |         x2_in = self.upsample(x2_in)
80 |         x2_in = torch.cat([x2_in, x2], 1)
81 |         out_dark3 = self.out2(x2_in)
82 | 
83 |         outputs = (out_dark3, out_dark4, x0)
84 |         return outputs
85 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | datasets/*
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | # output
132 | docs/api
133 | .code-workspace.code-workspace
134 | *.pkl
135 | *.npy
136 | *.pth
137 | *.onnx
138 | *.engine
139 | events.out.tfevents*
140 | pretrained
141 | *_outputs/
142 | DiffusionTrack_*/
143 | datasets/
144 | *.pth.tar
145 | *.tar.gz
146 | src/*
147 | test.py
148 | id_rsa_cs
149 | module_test.py
150 | vis_fold


--------------------------------------------------------------------------------
/tools/mix_data_ablation.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | 
 4 | 
 5 | """
 6 | cd datasets
 7 | mkdir -p mix_mot_ch/annotations
 8 | cp mot/annotations/val_half.json mix_mot_ch/annotations/val_half.json
 9 | cp mot/annotations/test.json mix_mot_ch/annotations/test.json
10 | cd mix_mot_ch
11 | ln -s ../mot/train mot_train
12 | ln -s ../crowdhuman/CrowdHuman_train crowdhuman_train
13 | ln -s ../crowdhuman/CrowdHuman_val crowdhuman_val
14 | cd ..
15 | """
16 | 
17 | mot_json = json.load(open('datasets/mot/annotations/train_half.json','r'))
18 | 
19 | img_list = list()
20 | for img in mot_json['images']:
21 |     img['file_name'] = 'mot_train/' + img['file_name']
22 |     img_list.append(img)
23 | 
24 | ann_list = list()
25 | for ann in mot_json['annotations']:
26 |     ann_list.append(ann)
27 | 
28 | video_list = mot_json['videos']
29 | category_list = mot_json['categories']
30 | 
31 | print('mot17')
32 | 
33 | max_img = 10000
34 | max_ann = 2000000
35 | max_video = 10
36 | 
37 | crowdhuman_json = json.load(open('datasets/crowdhuman/annotations/train.json','r'))
38 | img_id_count = 0
39 | for img in crowdhuman_json['images']:
40 |     img_id_count += 1
41 |     img['file_name'] = 'crowdhuman_train/Images/' + img['file_name']
42 |     img['frame_id'] = img_id_count
43 |     img['prev_image_id'] = img['id'] + max_img
44 |     img['next_image_id'] = img['id'] + max_img
45 |     img['id'] = img['id'] + max_img
46 |     img['video_id'] = max_video
47 |     img_list.append(img)
48 |     
49 | for ann in crowdhuman_json['annotations']:
50 |     ann['id'] = ann['id'] + max_ann
51 |     ann['image_id'] = ann['image_id'] + max_img
52 |     ann_list.append(ann)
53 | 
54 | video_list.append({
55 |     'id': max_video,
56 |     'file_name': 'crowdhuman_train'
57 | })
58 | 
59 | print('crowdhuman_train')
60 | 
61 | max_img = 30000
62 | max_ann = 10000000
63 | 
64 | crowdhuman_val_json = json.load(open('datasets/crowdhuman/annotations/val.json','r'))
65 | img_id_count = 0
66 | for img in crowdhuman_val_json['images']:
67 |     img_id_count += 1
68 |     img['file_name'] = 'crowdhuman_val/Images/' + img['file_name']
69 |     img['frame_id'] = img_id_count
70 |     img['prev_image_id'] = img['id'] + max_img
71 |     img['next_image_id'] = img['id'] + max_img
72 |     img['id'] = img['id'] + max_img
73 |     img['video_id'] = max_video
74 |     img_list.append(img)
75 |     
76 | for ann in crowdhuman_val_json['annotations']:
77 |     ann['id'] = ann['id'] + max_ann
78 |     ann['image_id'] = ann['image_id'] + max_img
79 |     ann_list.append(ann)
80 | 
81 | video_list.append({
82 |     'id': max_video,
83 |     'file_name': 'crowdhuman_val'
84 | })
85 | 
86 | print('crowdhuman_val')
87 | 
88 | mix_json = dict()
89 | mix_json['images'] = img_list
90 | mix_json['annotations'] = ann_list
91 | mix_json['videos'] = video_list
92 | mix_json['categories'] = category_list
93 | json.dump(mix_json, open('datasets/mix_mot_ch/annotations/train.json','w'))


--------------------------------------------------------------------------------
/yolox/utils/logger.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 4 | 
 5 | from loguru import logger
 6 | 
 7 | import inspect
 8 | import os
 9 | import sys
10 | 
11 | 
12 | def get_caller_name(depth=0):
13 |     """
14 |     Args:
15 |         depth (int): Depth of caller conext, use 0 for caller depth. Default value: 0.
16 | 
17 |     Returns:
18 |         str: module name of the caller
19 |     """
20 |     # the following logic is a little bit faster than inspect.stack() logic
21 |     frame = inspect.currentframe().f_back
22 |     for _ in range(depth):
23 |         frame = frame.f_back
24 | 
25 |     return frame.f_globals["__name__"]
26 | 
27 | 
28 | class StreamToLoguru:
29 |     """
30 |     stream object that redirects writes to a logger instance.
31 |     """
32 | 
33 |     def __init__(self, level="INFO", caller_names=("apex", "pycocotools")):
34 |         """
35 |         Args:
36 |             level(str): log level string of loguru. Default value: "INFO".
37 |             caller_names(tuple): caller names of redirected module.
38 |                 Default value: (apex, pycocotools).
39 |         """
40 |         self.level = level
41 |         self.linebuf = ""
42 |         self.caller_names = caller_names
43 | 
44 |     def write(self, buf):
45 |         full_name = get_caller_name(depth=1)
46 |         module_name = full_name.rsplit(".", maxsplit=-1)[0]
47 |         if module_name in self.caller_names:
48 |             for line in buf.rstrip().splitlines():
49 |                 # use caller level log
50 |                 logger.opt(depth=2).log(self.level, line.rstrip())
51 |         else:
52 |             sys.__stdout__.write(buf)
53 | 
54 |     def flush(self):
55 |         pass
56 | 
57 | 
58 | def redirect_sys_output(log_level="INFO"):
59 |     redirect_logger = StreamToLoguru(log_level)
60 |     sys.stderr = redirect_logger
61 |     sys.stdout = redirect_logger
62 | 
63 | 
64 | def setup_logger(save_dir, distributed_rank=0, filename="log.txt", mode="a"):
65 |     """setup logger for training and testing.
66 |     Args:
67 |         save_dir(str): location to save log file
68 |         distributed_rank(int): device rank when multi-gpu environment
69 |         filename (string): log save name.
70 |         mode(str): log file write mode, `append` or `override`. default is `a`.
71 | 
72 |     Return:
73 |         logger instance.
74 |     """
75 |     loguru_format = (
76 |         "<green>{time:YYYY-MM-DD HH:mm:ss}</green> | "
77 |         "<level>{level: <8}</level> | "
78 |         "<cyan>{name}</cyan>:<cyan>{line}</cyan> - <level>{message}</level>"
79 |     )
80 | 
81 |     logger.remove()
82 |     save_file = os.path.join(save_dir, filename)
83 |     if mode == "o" and os.path.exists(save_file):
84 |         os.remove(save_file)
85 |     # only keep logger in rank0 process
86 |     if distributed_rank == 0:
87 |         logger.add(
88 |             sys.stderr,
89 |             format=loguru_format,
90 |             level="INFO",
91 |             enqueue=True,
92 |         )
93 |         logger.add(save_file)
94 | 
95 |     # redirect stdout/stderr to loguru
96 |     redirect_sys_output("INFO")
97 | 


--------------------------------------------------------------------------------
/yolox/utils/demo_utils.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 4 | 
 5 | import numpy as np
 6 | 
 7 | import os
 8 | 
 9 | __all__ = ["mkdir", "nms", "multiclass_nms", "demo_postprocess"]
10 | 
11 | 
12 | def mkdir(path):
13 |     if not os.path.exists(path):
14 |         os.makedirs(path)
15 | 
16 | 
17 | def nms(boxes, scores, nms_thr):
18 |     """Single class NMS implemented in Numpy."""
19 |     x1 = boxes[:, 0]
20 |     y1 = boxes[:, 1]
21 |     x2 = boxes[:, 2]
22 |     y2 = boxes[:, 3]
23 | 
24 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
25 |     order = scores.argsort()[::-1]
26 | 
27 |     keep = []
28 |     while order.size > 0:
29 |         i = order[0]
30 |         keep.append(i)
31 |         xx1 = np.maximum(x1[i], x1[order[1:]])
32 |         yy1 = np.maximum(y1[i], y1[order[1:]])
33 |         xx2 = np.minimum(x2[i], x2[order[1:]])
34 |         yy2 = np.minimum(y2[i], y2[order[1:]])
35 | 
36 |         w = np.maximum(0.0, xx2 - xx1 + 1)
37 |         h = np.maximum(0.0, yy2 - yy1 + 1)
38 |         inter = w * h
39 |         ovr = inter / (areas[i] + areas[order[1:]] - inter)
40 | 
41 |         inds = np.where(ovr <= nms_thr)[0]
42 |         order = order[inds + 1]
43 | 
44 |     return keep
45 | 
46 | 
47 | def multiclass_nms(boxes, scores, nms_thr, score_thr):
48 |     """Multiclass NMS implemented in Numpy"""
49 |     final_dets = []
50 |     num_classes = scores.shape[1]
51 |     for cls_ind in range(num_classes):
52 |         cls_scores = scores[:, cls_ind]
53 |         valid_score_mask = cls_scores > score_thr
54 |         if valid_score_mask.sum() == 0:
55 |             continue
56 |         else:
57 |             valid_scores = cls_scores[valid_score_mask]
58 |             valid_boxes = boxes[valid_score_mask]
59 |             keep = nms(valid_boxes, valid_scores, nms_thr)
60 |             if len(keep) > 0:
61 |                 cls_inds = np.ones((len(keep), 1)) * cls_ind
62 |                 dets = np.concatenate(
63 |                     [valid_boxes[keep], valid_scores[keep, None], cls_inds], 1
64 |                 )
65 |                 final_dets.append(dets)
66 |     if len(final_dets) == 0:
67 |         return None
68 |     return np.concatenate(final_dets, 0)
69 | 
70 | 
71 | def demo_postprocess(outputs, img_size, p6=False):
72 | 
73 |     grids = []
74 |     expanded_strides = []
75 | 
76 |     if not p6:
77 |         strides = [8, 16, 32]
78 |     else:
79 |         strides = [8, 16, 32, 64]
80 | 
81 |     hsizes = [img_size[0] // stride for stride in strides]
82 |     wsizes = [img_size[1] // stride for stride in strides]
83 | 
84 |     for hsize, wsize, stride in zip(hsizes, wsizes, strides):
85 |         xv, yv = np.meshgrid(np.arange(wsize), np.arange(hsize))
86 |         grid = np.stack((xv, yv), 2).reshape(1, -1, 2)
87 |         grids.append(grid)
88 |         shape = grid.shape[:2]
89 |         expanded_strides.append(np.full((*shape, 1), stride))
90 | 
91 |     grids = np.concatenate(grids, 1)
92 |     expanded_strides = np.concatenate(expanded_strides, 1)
93 |     outputs[..., :2] = (outputs[..., :2] + grids) * expanded_strides
94 |     outputs[..., 2:4] = np.exp(outputs[..., 2:4]) * expanded_strides
95 | 
96 |     return outputs
97 | 


--------------------------------------------------------------------------------
/yolox/models/losses.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- encoding: utf-8 -*-
 3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 4 | 
 5 | import torch
 6 | import torch.nn as nn
 7 | import torch.nn.functional as F
 8 | 
 9 | 
10 | class IOUloss(nn.Module):
11 |     def __init__(self, reduction="none", loss_type="iou"):
12 |         super(IOUloss, self).__init__()
13 |         self.reduction = reduction
14 |         self.loss_type = loss_type
15 | 
16 |     def forward(self, pred, target):
17 |         assert pred.shape[0] == target.shape[0]
18 | 
19 |         pred = pred.view(-1, 4)
20 |         target = target.view(-1, 4)
21 |         tl = torch.max(
22 |             (pred[:, :2] - pred[:, 2:] / 2), (target[:, :2] - target[:, 2:] / 2)
23 |         )
24 |         br = torch.min(
25 |             (pred[:, :2] + pred[:, 2:] / 2), (target[:, :2] + target[:, 2:] / 2)
26 |         )
27 | 
28 |         area_p = torch.prod(pred[:, 2:], 1)
29 |         area_g = torch.prod(target[:, 2:], 1)
30 | 
31 |         en = (tl < br).type(tl.type()).prod(dim=1)
32 |         area_i = torch.prod(br - tl, 1) * en
33 |         iou = (area_i) / (area_p + area_g - area_i + 1e-16)
34 | 
35 |         if self.loss_type == "iou":
36 |             loss = 1 - iou ** 2
37 |         elif self.loss_type == "giou":
38 |             c_tl = torch.min(
39 |                 (pred[:, :2] - pred[:, 2:] / 2), (target[:, :2] - target[:, 2:] / 2)
40 |             )
41 |             c_br = torch.max(
42 |                 (pred[:, :2] + pred[:, 2:] / 2), (target[:, :2] + target[:, 2:] / 2)
43 |             )
44 |             area_c = torch.prod(c_br - c_tl, 1)
45 |             giou = iou - (area_c - area_i) / area_c.clamp(1e-16)
46 |             loss = 1 - giou.clamp(min=-1.0, max=1.0)
47 | 
48 |         if self.reduction == "mean":
49 |             loss = loss.mean()
50 |         elif self.reduction == "sum":
51 |             loss = loss.sum()
52 | 
53 |         return loss
54 | 
55 | 
56 | def sigmoid_focal_loss(inputs, targets, num_boxes, alpha: float = 0.25, gamma: float = 2):
57 |     """
58 |     Loss used in RetinaNet for dense detection: https://arxiv.org/abs/1708.02002.
59 |     Args:
60 |         inputs: A float tensor of arbitrary shape.
61 |                 The predictions for each example.
62 |         targets: A float tensor with the same shape as inputs. Stores the binary
63 |                  classification label for each element in inputs
64 |                 (0 for the negative class and 1 for the positive class).
65 |         alpha: (optional) Weighting factor in range (0,1) to balance
66 |                 positive vs negative examples. Default = -1 (no weighting).
67 |         gamma: Exponent of the modulating factor (1 - p_t) to
68 |                balance easy vs hard examples.
69 |     Returns:
70 |         Loss tensor
71 |     """
72 |     prob = inputs.sigmoid()
73 |     ce_loss = F.binary_cross_entropy_with_logits(inputs, targets, reduction="none")
74 |     p_t = prob * targets + (1 - prob) * (1 - targets)
75 |     loss = ce_loss * ((1 - p_t) ** gamma)
76 | 
77 |     if alpha >= 0:
78 |         alpha_t = alpha * targets + (1 - alpha) * (1 - targets)
79 |         loss = alpha_t * loss
80 |     #return loss.mean(0).sum() / num_boxes
81 |     return loss.sum() / num_boxes


--------------------------------------------------------------------------------
/exps/default/yolov3.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) Megvii, Inc. and its affiliates.
 4 | 
 5 | import os
 6 | import torch
 7 | import torch.nn as nn
 8 | 
 9 | from yolox.exp import Exp as MyExp
10 | 
11 | 
12 | class Exp(MyExp):
13 |     def __init__(self):
14 |         super(Exp, self).__init__()
15 |         self.depth = 1.0
16 |         self.width = 1.0
17 |         self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
18 | 
19 |     def get_model(self, sublinear=False):
20 |         def init_yolo(M):
21 |             for m in M.modules():
22 |                 if isinstance(m, nn.BatchNorm2d):
23 |                     m.eps = 1e-3
24 |                     m.momentum = 0.03
25 |         if "model" not in self.__dict__:
26 |             from yolox.models import YOLOX, YOLOFPN, YOLOXHead
27 |             backbone = YOLOFPN()
28 |             head = YOLOXHead(self.num_classes, self.width, in_channels=[128, 256, 512], act="lrelu")
29 |             self.model = YOLOX(backbone, head)
30 |         self.model.apply(init_yolo)
31 |         self.model.head.initialize_biases(1e-2)
32 | 
33 |         return self.model
34 | 
35 |     def get_data_loader(self, batch_size, is_distributed, no_aug=False):
36 |         from data.datasets.cocodataset import COCODataset
37 |         from data.datasets.mosaicdetection import MosaicDetection
38 |         from data.datasets.data_augment import TrainTransform
39 |         from data.datasets.dataloading import YoloBatchSampler, DataLoader, InfiniteSampler
40 |         import torch.distributed as dist
41 | 
42 |         dataset = COCODataset(
43 |                 data_dir='data/COCO/',
44 |                 json_file=self.train_ann,
45 |                 img_size=self.input_size,
46 |                 preproc=TrainTransform(
47 |                     rgb_means=(0.485, 0.456, 0.406),
48 |                     std=(0.229, 0.224, 0.225),
49 |                     max_labels=50
50 |                 ),
51 |         )
52 | 
53 |         dataset = MosaicDetection(
54 |             dataset,
55 |             mosaic=not no_aug,
56 |             img_size=self.input_size,
57 |             preproc=TrainTransform(
58 |                 rgb_means=(0.485, 0.456, 0.406),
59 |                 std=(0.229, 0.224, 0.225),
60 |                 max_labels=120
61 |             ),
62 |             degrees=self.degrees,
63 |             translate=self.translate,
64 |             scale=self.scale,
65 |             shear=self.shear,
66 |             perspective=self.perspective,
67 |         )
68 | 
69 |         self.dataset = dataset
70 | 
71 |         if is_distributed:
72 |             batch_size = batch_size // dist.get_world_size()
73 |             sampler = InfiniteSampler(len(self.dataset), seed=self.seed if self.seed else 0)
74 |         else:
75 |             sampler = torch.utils.data.RandomSampler(self.dataset)
76 | 
77 |         batch_sampler = YoloBatchSampler(
78 |             sampler=sampler,
79 |             batch_size=batch_size,
80 |             drop_last=False,
81 |             input_dimension=self.input_size,
82 |             mosaic=not no_aug
83 |         )
84 | 
85 |         dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True}
86 |         dataloader_kwargs["batch_sampler"] = batch_sampler
87 |         train_loader = DataLoader(self.dataset, **dataloader_kwargs)
88 | 
89 |         return train_loader
90 | 


--------------------------------------------------------------------------------
/diffusion/models/diffusionnet.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import random
 3 | from typing import List
 4 | from collections import namedtuple
 5 | 
 6 | import torch
 7 | import torch.nn.functional as F
 8 | from torch import nn
 9 | from yolox.models.yolo_pafpn import YOLOPAFPN
10 | from .diffusion_head import DiffusionHead
11 | from yolox.models.network_blocks import BaseConv
12 | 
13 | class DiffusionNet(nn.Module):
14 |     """
15 |     Implement DiffusionNet
16 |     """
17 | 
18 |     def __init__(self, backbone=None, head=None, act="silu"):
19 |         super().__init__()
20 |         self.backbone=backbone
21 |         self.head=head
22 |         self.projs=nn.ModuleList()
23 |         in_channels=backbone.in_channels
24 |         for i in range(len(in_channels)):
25 |             self.projs.append(
26 |                 BaseConv(
27 |                     in_channels=int(in_channels[i] * head.width),
28 |                     out_channels=int(head.hidden_dim),
29 |                     ksize=1,
30 |                     stride=1,
31 |                     act=act,
32 |                 ))
33 | 
34 |     def forward(self, x, targets=(None,None),random_flip=False,input_size=None):
35 |         # fpn output content features of [dark3, dark4, dark5]
36 |         # x format (pre_imgs,cur_imgs) (B,C,H,W)
37 |         # targets format (pre_targets,cur_targets) (B,N,5) class cx cy w h
38 |         pre_imgs,cur_imgs=x
39 |         pre_targets,cur_targets=targets
40 |         mate_info=(pre_imgs.shape,pre_imgs.device,pre_imgs.dtype)
41 |         bs,_,_,_=mate_info[0]
42 |         if cur_imgs is None:
43 |             x_input=pre_imgs
44 |         else:
45 |             x_input=torch.cat([pre_imgs,cur_imgs],dim=0)
46 | 
47 |         fpn_outs = self.backbone(x_input)
48 |         flip_mode=False
49 |         if random_flip and torch.randn((1,1))[0]>0.5:
50 |             flip_mode=True
51 |         pre_features,cur_features=[],[]
52 |         
53 |         for proj,x_out in zip(self.projs,fpn_outs):
54 |             l_feat=proj(x_out)
55 |             if cur_imgs is None:
56 |                 pre_features.append(l_feat)
57 |                 if flip_mode:
58 |                     cur_features.append(torch.flip(l_feat,dims=[3]))
59 |                 else:
60 |                     cur_features.append(l_feat.clone())
61 |             else:
62 |                 pre_l_feat,cur_l_feat=l_feat.split(bs,dim=0)
63 |                 pre_features.append(pre_l_feat)
64 |                 cur_features.append(cur_l_feat)
65 | 
66 |         features=(pre_features,cur_features)
67 | 
68 |         if self.training:
69 |             assert pre_targets is not None
70 |             if cur_targets is None:
71 |                 cur_targets=pre_targets.clone()
72 |                 if flip_mode:
73 |                     nlabels=(cur_targets.sum(-1)>0).sum(-1)
74 |                     for idx,nlabel in enumerate(nlabels):
75 |                         cur_targets[idx,:nlabel,1]=input_size[1]-cur_targets[idx,:nlabel,1]
76 |             loss_dict = self.head(
77 |                 features,mate_info,targets=torch.cat([pre_targets,cur_targets],dim=0))
78 |             if 'total_loss' not in loss_dict:
79 |                 loss_dict['total_loss']=sum(loss_dict.values())
80 |             outputs=loss_dict
81 |             return outputs
82 |         else:  
83 |             outputs = self.head(features,mate_info,targets=pre_targets)
84 | 
85 |         return outputs
86 | 
87 | 
88 | 


--------------------------------------------------------------------------------
/yolox/utils/allreduce_norm.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding:utf-8 -*-
  3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  4 | 
  5 | import torch
  6 | from torch import distributed as dist
  7 | from torch import nn
  8 | 
  9 | import pickle
 10 | from collections import OrderedDict
 11 | 
 12 | from .dist import _get_global_gloo_group, get_world_size
 13 | 
 14 | ASYNC_NORM = (
 15 |     nn.BatchNorm1d,
 16 |     nn.BatchNorm2d,
 17 |     nn.BatchNorm3d,
 18 |     nn.InstanceNorm1d,
 19 |     nn.InstanceNorm2d,
 20 |     nn.InstanceNorm3d,
 21 | )
 22 | 
 23 | __all__ = [
 24 |     "get_async_norm_states",
 25 |     "pyobj2tensor",
 26 |     "tensor2pyobj",
 27 |     "all_reduce",
 28 |     "all_reduce_norm",
 29 | ]
 30 | 
 31 | 
 32 | def get_async_norm_states(module):
 33 |     async_norm_states = OrderedDict()
 34 |     for name, child in module.named_modules():
 35 |         if isinstance(child, ASYNC_NORM):
 36 |             for k, v in child.state_dict().items():
 37 |                 async_norm_states[".".join([name, k])] = v
 38 |     return async_norm_states
 39 | 
 40 | 
 41 | def pyobj2tensor(pyobj, device="cuda"):
 42 |     """serialize picklable python object to tensor"""
 43 |     storage = torch.ByteStorage.from_buffer(pickle.dumps(pyobj))
 44 |     return torch.ByteTensor(storage).to(device=device)
 45 | 
 46 | 
 47 | def tensor2pyobj(tensor):
 48 |     """deserialize tensor to picklable python object"""
 49 |     return pickle.loads(tensor.cpu().numpy().tobytes())
 50 | 
 51 | 
 52 | def _get_reduce_op(op_name):
 53 |     return {
 54 |         "sum": dist.ReduceOp.SUM,
 55 |         "mean": dist.ReduceOp.SUM,
 56 |     }[op_name.lower()]
 57 | 
 58 | 
 59 | def all_reduce(py_dict, op="sum", group=None):
 60 |     """
 61 |     Apply all reduce function for python dict object.
 62 |     NOTE: make sure that every py_dict has the same keys and values are in the same shape.
 63 | 
 64 |     Args:
 65 |         py_dict (dict): dict to apply all reduce op.
 66 |         op (str): operator, could be "sum" or "mean".
 67 |     """
 68 |     world_size = get_world_size()
 69 |     if world_size == 1:
 70 |         return py_dict
 71 |     if group is None:
 72 |         group = _get_global_gloo_group()
 73 |     if dist.get_world_size(group) == 1:
 74 |         return py_dict
 75 | 
 76 |     # all reduce logic across different devices.
 77 |     py_key = list(py_dict.keys())
 78 |     py_key_tensor = pyobj2tensor(py_key)
 79 |     dist.broadcast(py_key_tensor, src=0)
 80 |     py_key = tensor2pyobj(py_key_tensor)
 81 | 
 82 |     tensor_shapes = [py_dict[k].shape for k in py_key]
 83 |     tensor_numels = [py_dict[k].numel() for k in py_key]
 84 | 
 85 |     flatten_tensor = torch.cat([py_dict[k].flatten() for k in py_key])
 86 |     dist.all_reduce(flatten_tensor, op=_get_reduce_op(op))
 87 |     if op == "mean":
 88 |         flatten_tensor /= world_size
 89 | 
 90 |     split_tensors = [
 91 |         x.reshape(shape)
 92 |         for x, shape in zip(torch.split(flatten_tensor, tensor_numels), tensor_shapes)
 93 |     ]
 94 |     return OrderedDict({k: v for k, v in zip(py_key, split_tensors)})
 95 | 
 96 | 
 97 | def all_reduce_norm(module):
 98 |     """
 99 |     All reduce norm statistics in different devices.
100 |     """
101 |     states = get_async_norm_states(module)
102 |     states = all_reduce(states, op="mean")
103 |     module.load_state_dict(states, strict=False)
104 | 


--------------------------------------------------------------------------------
/yolox/data/data_prefetcher.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) Megvii, Inc. and its affiliates.
 4 | 
 5 | import torch
 6 | import torch.distributed as dist
 7 | 
 8 | from yolox.utils import synchronize
 9 | 
10 | import random
11 | 
12 | 
13 | class DataPrefetcher:
14 |     """
15 |     DataPrefetcher is inspired by code of following file:
16 |     https://github.com/NVIDIA/apex/blob/master/examples/imagenet/main_amp.py
17 |     It could speedup your pytorch dataloader. For more information, please check
18 |     https://github.com/NVIDIA/apex/issues/304#issuecomment-493562789.
19 |     """
20 | 
21 |     def __init__(self, loader,task):
22 |         self.loader = iter(loader)
23 |         self.task=task
24 |         self.stream = torch.cuda.Stream()
25 |         self.record_stream = DataPrefetcher._record_stream_for_image
26 |         self.preload()
27 | 
28 |     def preload(self):
29 |         try:
30 |             if self.task=="tracking":
31 |                 self.next_input_pre, self.next_target_pre,self.next_input_cur, self.next_target_cur,_, _ = next(self.loader)
32 |             else:
33 |                 self.next_input_pre, self.next_target_pre, _, _ = next(self.loader)
34 |         except StopIteration:
35 |             self.next_input_pre = None
36 |             self.next_target_pre = None
37 |             if self.task=="tracking":
38 |                 self.next_input_cur = None
39 |                 self.next_target_cur = None
40 |             return
41 | 
42 |         with torch.cuda.stream(self.stream):
43 |             self.next_input_pre = self.next_input_pre.cuda(non_blocking=True)
44 |             self.next_target_pre = self.next_target_pre.cuda(non_blocking=True)
45 |             if self.task=="tracking":
46 |                 self.next_input_cur = self.next_input_cur.cuda(non_blocking=True)
47 |                 self.next_target_cur = self.next_target_cur.cuda(non_blocking=True)
48 |                 
49 | 
50 |     def next(self):
51 |         torch.cuda.current_stream().wait_stream(self.stream)
52 |         input_pre = self.next_input_pre
53 |         target_pre = self.next_target_pre
54 |         input_cur = None
55 |         target_cur = None
56 |         if self.task=="tracking":
57 |             input_cur = self.next_input_cur
58 |             target_cur = self.next_target_cur
59 |         if input_pre is not None:
60 |             self.record_stream(input_pre)
61 |         if target_pre is not None:
62 |             target_pre.record_stream(torch.cuda.current_stream())
63 |         if self.task=="tracking":
64 |             if input_cur is not None:
65 |                 self.record_stream(input_cur)
66 |             if target_cur is not None:
67 |                 target_cur.record_stream(torch.cuda.current_stream())
68 |         self.preload()
69 |         return input_pre,target_pre,input_cur,target_cur
70 |         
71 | 
72 |     @staticmethod
73 |     def _record_stream_for_image(input):
74 |         input.record_stream(torch.cuda.current_stream())
75 | 
76 | 
77 | def random_resize(data_loader, exp, epoch, rank, is_distributed):
78 |     tensor = torch.LongTensor(1).cuda()
79 |     if is_distributed:
80 |         synchronize()
81 | 
82 |     if rank == 0:
83 |         if epoch > exp.max_epoch - 10:
84 |             size = exp.input_size
85 |         else:
86 |             size = random.randint(*exp.random_size)
87 |             size = int(32 * size)
88 |         tensor.fill_(size)
89 | 
90 |     if is_distributed:
91 |         synchronize()
92 |         dist.broadcast(tensor, 0)
93 | 
94 |     input_size = data_loader.change_input_dim(multiple=tensor.item(), random_range=None)
95 |     return 


--------------------------------------------------------------------------------
/yolox/data/samplers.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | # Copyright (c) Megvii, Inc. and its affiliates.
 4 | 
 5 | import torch
 6 | import torch.distributed as dist
 7 | from torch.utils.data.sampler import BatchSampler as torchBatchSampler
 8 | from torch.utils.data.sampler import Sampler
 9 | 
10 | import itertools
11 | from typing import Optional
12 | 
13 | 
14 | class YoloBatchSampler(torchBatchSampler):
15 |     """
16 |     This batch sampler will generate mini-batches of (dim, index) tuples from another sampler.
17 |     It works just like the :class:`torch.utils.data.sampler.BatchSampler`,
18 |     but it will prepend a dimension, whilst ensuring it stays the same across one mini-batch.
19 |     """
20 | 
21 |     def __init__(self, *args, input_dimension=None, mosaic=True, **kwargs):
22 |         super().__init__(*args, **kwargs)
23 |         self.input_dim = input_dimension
24 |         self.new_input_dim = None
25 |         self.mosaic = mosaic
26 | 
27 |     def __iter__(self):
28 |         self.__set_input_dim()
29 |         for batch in super().__iter__():
30 |             yield [(self.input_dim, idx, self.mosaic) for idx in batch]
31 |             self.__set_input_dim()
32 | 
33 |     def __set_input_dim(self):
34 |         """ This function randomly changes the the input dimension of the dataset. """
35 |         if self.new_input_dim is not None:
36 |             self.input_dim = (self.new_input_dim[0], self.new_input_dim[1])
37 |             self.new_input_dim = None
38 | 
39 | 
40 | class InfiniteSampler(Sampler):
41 |     """
42 |     In training, we only care about the "infinite stream" of training data.
43 |     So this sampler produces an infinite stream of indices and
44 |     all workers cooperate to correctly shuffle the indices and sample different indices.
45 |     The samplers in each worker effectively produces `indices[worker_id::num_workers]`
46 |     where `indices` is an infinite stream of indices consisting of
47 |     `shuffle(range(size)) + shuffle(range(size)) + ...` (if shuffle is True)
48 |     or `range(size) + range(size) + ...` (if shuffle is False)
49 |     """
50 | 
51 |     def __init__(
52 |         self,
53 |         size: int,
54 |         shuffle: bool = True,
55 |         seed: Optional[int] = 0,
56 |         rank=0,
57 |         world_size=1,
58 |     ):
59 |         """
60 |         Args:
61 |             size (int): the total number of data of the underlying dataset to sample from
62 |             shuffle (bool): whether to shuffle the indices or not
63 |             seed (int): the initial seed of the shuffle. Must be the same
64 |                 across all workers. If None, will use a random seed shared
65 |                 among workers (require synchronization among all workers).
66 |         """
67 |         self._size = size
68 |         assert size > 0
69 |         self._shuffle = shuffle
70 |         self._seed = int(seed)
71 | 
72 |         if dist.is_available() and dist.is_initialized():
73 |             self._rank = dist.get_rank()
74 |             self._world_size = dist.get_world_size()
75 |         else:
76 |             self._rank = rank
77 |             self._world_size = world_size
78 | 
79 |     def __iter__(self):
80 |         start = self._rank
81 |         yield from itertools.islice(
82 |             self._infinite_indices(), start, None, self._world_size
83 |         )
84 | 
85 |     def _infinite_indices(self):
86 |         g = torch.Generator()
87 |         g.manual_seed(self._seed)
88 |         while True:
89 |             if self._shuffle:
90 |                 yield from torch.randperm(self._size, generator=g)
91 |             else:
92 |                 yield from torch.arange(self._size)
93 | 
94 |     def __len__(self):
95 |         return self._size // self._world_size
96 | 


--------------------------------------------------------------------------------
/yolox/utils/metric.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  4 | import numpy as np
  5 | 
  6 | import torch
  7 | 
  8 | import functools
  9 | import os
 10 | import time
 11 | from collections import defaultdict, deque
 12 | 
 13 | __all__ = [
 14 |     "AverageMeter",
 15 |     "MeterBuffer",
 16 |     "get_total_and_free_memory_in_Mb",
 17 |     "occupy_mem",
 18 |     "gpu_mem_usage",
 19 | ]
 20 | 
 21 | 
 22 | def get_total_and_free_memory_in_Mb(cuda_device):
 23 |     devices_info_str = os.popen(
 24 |         "nvidia-smi --query-gpu=memory.total,memory.used --format=csv,nounits,noheader"
 25 |     )
 26 |     devices_info = devices_info_str.read().strip().split("\n")
 27 |     total, used = devices_info[int(cuda_device)].split(",")
 28 |     return int(total), int(used)
 29 | 
 30 | 
 31 | def occupy_mem(cuda_device, mem_ratio=0.95):
 32 |     """
 33 |     pre-allocate gpu memory for training to avoid memory Fragmentation.
 34 |     """
 35 |     total, used = get_total_and_free_memory_in_Mb(cuda_device)
 36 |     max_mem = int(total * mem_ratio)
 37 |     block_mem = max_mem - used
 38 |     x = torch.cuda.FloatTensor(256, 1024, block_mem)
 39 |     del x
 40 |     time.sleep(5)
 41 | 
 42 | 
 43 | def gpu_mem_usage():
 44 |     """
 45 |     Compute the GPU memory usage for the current device (MB).
 46 |     """
 47 |     mem_usage_bytes = torch.cuda.max_memory_allocated()
 48 |     return mem_usage_bytes / (1024 * 1024)
 49 | 
 50 | 
 51 | class AverageMeter:
 52 |     """Track a series of values and provide access to smoothed values over a
 53 |     window or the global series average.
 54 |     """
 55 | 
 56 |     def __init__(self, window_size=50):
 57 |         self._deque = deque(maxlen=window_size)
 58 |         self._total = 0.0
 59 |         self._count = 0
 60 | 
 61 |     def update(self, value):
 62 |         self._deque.append(value)
 63 |         self._count += 1
 64 |         self._total += value
 65 | 
 66 |     @property
 67 |     def median(self):
 68 |         d = np.array(list(self._deque))
 69 |         return np.median(d)
 70 | 
 71 |     @property
 72 |     def avg(self):
 73 |         # if deque is empty, nan will be returned.
 74 |         d = np.array(list(self._deque))
 75 |         return d.mean()
 76 | 
 77 |     @property
 78 |     def global_avg(self):
 79 |         return self._total / max(self._count, 1e-5)
 80 | 
 81 |     @property
 82 |     def latest(self):
 83 |         return self._deque[-1] if len(self._deque) > 0 else None
 84 | 
 85 |     @property
 86 |     def total(self):
 87 |         return self._total
 88 | 
 89 |     def reset(self):
 90 |         self._deque.clear()
 91 |         self._total = 0.0
 92 |         self._count = 0
 93 | 
 94 |     def clear(self):
 95 |         self._deque.clear()
 96 | 
 97 | 
 98 | class MeterBuffer(defaultdict):
 99 |     """Computes and stores the average and current value"""
100 | 
101 |     def __init__(self, window_size=20):
102 |         factory = functools.partial(AverageMeter, window_size=window_size)
103 |         super().__init__(factory)
104 | 
105 |     def reset(self):
106 |         for v in self.values():
107 |             v.reset()
108 | 
109 |     def get_filtered_meter(self, filter_key="time"):
110 |         return {k: v for k, v in self.items() if filter_key in k}
111 | 
112 |     def update(self, values=None, **kwargs):
113 |         if values is None:
114 |             values = {}
115 |         values.update(kwargs)
116 |         for k, v in values.items():
117 |             if isinstance(v, torch.Tensor):
118 |                 v = v.detach()
119 |             self[k].update(v)
120 | 
121 |     def clear_meters(self):
122 |         for v in self.values():
123 |             v.clear()
124 | 


--------------------------------------------------------------------------------
/yolox/layers/csrc/cocoeval/cocoeval.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | #pragma once
 3 | 
 4 | #include <pybind11/numpy.h>
 5 | #include <pybind11/pybind11.h>
 6 | #include <pybind11/stl.h>
 7 | #include <pybind11/stl_bind.h>
 8 | #include <vector>
 9 | 
10 | namespace py = pybind11;
11 | 
12 | namespace COCOeval {
13 | 
14 | // Annotation data for a single object instance in an image
15 | struct InstanceAnnotation {
16 |   InstanceAnnotation(
17 |       uint64_t id,
18 |       double score,
19 |       double area,
20 |       bool is_crowd,
21 |       bool ignore)
22 |       : id{id}, score{score}, area{area}, is_crowd{is_crowd}, ignore{ignore} {}
23 |   uint64_t id;
24 |   double score = 0.;
25 |   double area = 0.;
26 |   bool is_crowd = false;
27 |   bool ignore = false;
28 | };
29 | 
30 | // Stores intermediate results for evaluating detection results for a single
31 | // image that has D detected instances and G ground truth instances. This stores
32 | // matches between detected and ground truth instances
33 | struct ImageEvaluation {
34 |   // For each of the D detected instances, the id of the matched ground truth
35 |   // instance, or 0 if unmatched
36 |   std::vector<uint64_t> detection_matches;
37 | 
38 |   // The detection score of each of the D detected instances
39 |   std::vector<double> detection_scores;
40 | 
41 |   // Marks whether or not each of G instances was ignored from evaluation (e.g.,
42 |   // because it's outside area_range)
43 |   std::vector<bool> ground_truth_ignores;
44 | 
45 |   // Marks whether or not each of D instances was ignored from evaluation (e.g.,
46 |   // because it's outside aRng)
47 |   std::vector<bool> detection_ignores;
48 | };
49 | 
50 | template <class T>
51 | using ImageCategoryInstances = std::vector<std::vector<std::vector<T>>>;
52 | 
53 | // C++ implementation of COCO API cocoeval.py::COCOeval.evaluateImg().  For each
54 | // combination of image, category, area range settings, and IOU thresholds to
55 | // evaluate, it matches detected instances to ground truth instances and stores
56 | // the results into a vector of ImageEvaluation results, which will be
57 | // interpreted by the COCOeval::Accumulate() function to produce precion-recall
58 | // curves.  The parameters of nested vectors have the following semantics:
59 | //   image_category_ious[i][c][d][g] is the intersection over union of the d'th
60 | //     detected instance and g'th ground truth instance of
61 | //     category category_ids[c] in image image_ids[i]
62 | //   image_category_ground_truth_instances[i][c] is a vector of ground truth
63 | //     instances in image image_ids[i] of category category_ids[c]
64 | //   image_category_detection_instances[i][c] is a vector of detected
65 | //     instances in image image_ids[i] of category category_ids[c]
66 | std::vector<ImageEvaluation> EvaluateImages(
67 |     const std::vector<std::array<double, 2>>& area_ranges, // vector of 2-tuples
68 |     int max_detections,
69 |     const std::vector<double>& iou_thresholds,
70 |     const ImageCategoryInstances<std::vector<double>>& image_category_ious,
71 |     const ImageCategoryInstances<InstanceAnnotation>&
72 |         image_category_ground_truth_instances,
73 |     const ImageCategoryInstances<InstanceAnnotation>&
74 |         image_category_detection_instances);
75 | 
76 | // C++ implementation of COCOeval.accumulate(), which generates precision
77 | // recall curves for each set of category, IOU threshold, detection area range,
78 | // and max number of detections parameters.  It is assumed that the parameter
79 | // evaluations is the return value of the functon COCOeval::EvaluateImages(),
80 | // which was called with the same parameter settings params
81 | py::dict Accumulate(
82 |     const py::object& params,
83 |     const std::vector<ImageEvaluation>& evalutations);
84 | 
85 | } // namespace COCOeval
86 | 


--------------------------------------------------------------------------------
/yolox/utils/model_utils.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding:utf-8 -*-
  3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  4 | 
  5 | import torch
  6 | import torch.nn as nn
  7 | from thop import profile
  8 | 
  9 | from copy import deepcopy
 10 | 
 11 | __all__ = [
 12 |     "fuse_conv_and_bn",
 13 |     "fuse_model",
 14 |     "get_model_info",
 15 |     "replace_module",
 16 | ]
 17 | 
 18 | 
 19 | def get_model_info(model, tsize):
 20 | 
 21 |     stride = 64
 22 |     img = torch.zeros((2, 3, stride, stride), device=next(model.parameters()).device)
 23 |     flops, params = profile(deepcopy(model), inputs=(img.split(1,dim=0),), verbose=False)
 24 |     params /= 1e6
 25 |     flops /= 1e9
 26 |     flops *= tsize[0] * tsize[1] / stride / stride * 2  # Gflops
 27 |     info = "Params: {:.2f}M, Gflops: {:.2f}".format(params, flops)
 28 |     return info
 29 | 
 30 | 
 31 | def fuse_conv_and_bn(conv, bn):
 32 |     # Fuse convolution and batchnorm layers https://tehnokv.com/posts/fusing-batchnorm-and-conv/
 33 |     fusedconv = (
 34 |         nn.Conv2d(
 35 |             conv.in_channels,
 36 |             conv.out_channels,
 37 |             kernel_size=conv.kernel_size,
 38 |             stride=conv.stride,
 39 |             padding=conv.padding,
 40 |             groups=conv.groups,
 41 |             bias=True,
 42 |         )
 43 |         .requires_grad_(False)
 44 |         .to(conv.weight.device)
 45 |     )
 46 | 
 47 |     # prepare filters
 48 |     w_conv = conv.weight.clone().view(conv.out_channels, -1)
 49 |     w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var)))
 50 |     fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.shape))
 51 | 
 52 |     # prepare spatial bias
 53 |     b_conv = (
 54 |         torch.zeros(conv.weight.size(0), device=conv.weight.device)
 55 |         if conv.bias is None
 56 |         else conv.bias
 57 |     )
 58 |     b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(
 59 |         torch.sqrt(bn.running_var + bn.eps)
 60 |     )
 61 |     fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn)
 62 | 
 63 |     return fusedconv
 64 | 
 65 | 
 66 | def fuse_model(model):
 67 |     from yolox.models.network_blocks import BaseConv
 68 | 
 69 |     for m in model.modules():
 70 |         if type(m) is BaseConv and hasattr(m, "bn"):
 71 |             m.conv = fuse_conv_and_bn(m.conv, m.bn)  # update conv
 72 |             delattr(m, "bn")  # remove batchnorm
 73 |             m.forward = m.fuseforward  # update forward
 74 |     return model
 75 | 
 76 | 
 77 | def replace_module(module, replaced_module_type, new_module_type, replace_func=None):
 78 |     """
 79 |     Replace given type in module to a new type. mostly used in deploy.
 80 | 
 81 |     Args:
 82 |         module (nn.Module): model to apply replace operation.
 83 |         replaced_module_type (Type): module type to be replaced.
 84 |         new_module_type (Type)
 85 |         replace_func (function): python function to describe replace logic. Defalut value None.
 86 | 
 87 |     Returns:
 88 |         model (nn.Module): module that already been replaced.
 89 |     """
 90 | 
 91 |     def default_replace_func(replaced_module_type, new_module_type):
 92 |         return new_module_type()
 93 | 
 94 |     if replace_func is None:
 95 |         replace_func = default_replace_func
 96 | 
 97 |     model = module
 98 |     if isinstance(module, replaced_module_type):
 99 |         model = replace_func(replaced_module_type, new_module_type)
100 |     else:  # recurrsively replace
101 |         for name, child in module.named_children():
102 |             new_child = replace_module(child, replaced_module_type, new_module_type)
103 |             if new_child is not child:  # child is already replaced
104 |                 model.add_module(name, new_child)
105 | 
106 |     return model
107 | 


--------------------------------------------------------------------------------
/tools/mota.py:
--------------------------------------------------------------------------------
 1 | from loguru import logger
 2 | import numpy as np
 3 | np.float = float
 4 | np.int = int
 5 | np.object = object
 6 | np.bool = bool
 7 | import torch
 8 | import torch.backends.cudnn as cudnn
 9 | from torch.nn.parallel import DistributedDataParallel as DDP
10 | import sys
11 | import os
12 | 
13 | prj_path = os.path.join(os.path.dirname(__file__), '..')
14 | if prj_path not in sys.path:
15 |     sys.path.append(prj_path)
16 |     
17 | from yolox.core import launch
18 | from yolox.exp import get_exp
19 | from yolox.utils import configure_nccl, fuse_model, get_local_rank, get_model_info, setup_logger
20 | 
21 | import argparse
22 | import os
23 | import random
24 | import warnings
25 | import glob
26 | import motmetrics as mm
27 | from collections import OrderedDict
28 | from pathlib import Path
29 | 
30 | 
31 | def compare_dataframes(gts, ts):
32 |     accs = []
33 |     names = []
34 |     for k, tsacc in ts.items():
35 |         if k in gts:            
36 |             logger.info('Comparing {}...'.format(k))
37 |             accs.append(mm.utils.compare_to_groundtruth(gts[k], tsacc, 'iou', distth=0.5))
38 |             names.append(k)
39 |         else:
40 |             logger.warning('No ground truth for {}, skipping.'.format(k))
41 | 
42 |     return accs, names
43 | 
44 | 
45 | # evaluate MOTA
46 | 
47 | results_folder = 'DiffusionTrack_outputs/yolox_x_diffusion_track_mot17_ablation/track_results_mot17_ablation_1_500'
48 | mm.lap.default_solver = 'lap'
49 | 
50 | gt_type = '_val_half'
51 | #gt_type = ''
52 | print('gt_type', gt_type)
53 | gtfiles = glob.glob(
54 |     os.path.join('datasets/mot/train', '*/gt/gt{}.txt'.format(gt_type)))
55 | print('gt_files', gtfiles)
56 | tsfiles = [f for f in glob.glob(os.path.join(results_folder, '*.txt')) if not os.path.basename(f).startswith('eval')]
57 | 
58 | logger.info('Found {} groundtruths and {} test files.'.format(len(gtfiles), len(tsfiles)))
59 | logger.info('Available LAP solvers {}'.format(mm.lap.available_solvers))
60 | logger.info('Default LAP solver \'{}\''.format(mm.lap.default_solver))
61 | logger.info('Loading files.')
62 | 
63 | gt = OrderedDict([(Path(f).parts[-3], mm.io.loadtxt(f, fmt='mot15-2D', min_confidence=1)) for f in gtfiles])
64 | ts = OrderedDict([(os.path.splitext(Path(f).parts[-1])[0], mm.io.loadtxt(f, fmt='mot15-2D', min_confidence=-1.0)) for f in tsfiles])    
65 | 
66 | mh = mm.metrics.create()    
67 | accs, names = compare_dataframes(gt, ts)
68 | 
69 | logger.info('Running metrics')
70 | metrics = ['recall', 'precision', 'num_unique_objects', 'mostly_tracked',
71 |             'partially_tracked', 'mostly_lost', 'num_false_positives', 'num_misses',
72 |             'num_switches', 'num_fragmentations', 'mota', 'motp', 'num_objects']
73 | summary = mh.compute_many(accs, names=names, metrics=metrics, generate_overall=True)
74 | # summary = mh.compute_many(accs, names=names, metrics=mm.metrics.motchallenge_metrics, generate_overall=True)
75 | # print(mm.io.render_summary(
76 | #   summary, formatters=mh.formatters, 
77 | #   namemap=mm.io.motchallenge_metric_names))
78 | div_dict = {
79 |     'num_objects': ['num_false_positives', 'num_misses', 'num_switches', 'num_fragmentations'],
80 |     'num_unique_objects': ['mostly_tracked', 'partially_tracked', 'mostly_lost']}
81 | for divisor in div_dict:
82 |     for divided in div_dict[divisor]:
83 |         summary[divided] = (summary[divided] / summary[divisor])
84 | fmt = mh.formatters
85 | change_fmt_list = ['num_false_positives', 'num_misses', 'num_switches', 'num_fragmentations', 'mostly_tracked',
86 |                     'partially_tracked', 'mostly_lost']
87 | for k in change_fmt_list:
88 |     fmt[k] = fmt['mota']
89 | print(mm.io.render_summary(summary, formatters=fmt, namemap=mm.io.motchallenge_metric_names))
90 | 
91 | metrics = mm.metrics.motchallenge_metrics + ['num_objects']
92 | summary = mh.compute_many(accs, names=names, metrics=metrics, generate_overall=True)
93 | print(mm.io.render_summary(summary, formatters=mh.formatters, namemap=mm.io.motchallenge_metric_names))
94 | logger.info('Completed')
95 | 


--------------------------------------------------------------------------------
/yolox/models/yolo_pafpn.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- encoding: utf-8 -*-
  3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  4 | 
  5 | import torch
  6 | import torch.nn as nn
  7 | 
  8 | from .darknet import CSPDarknet
  9 | from .network_blocks import BaseConv, CSPLayer, DWConv
 10 | 
 11 | 
 12 | class YOLOPAFPN(nn.Module):
 13 |     """
 14 |     YOLOv3 model. Darknet 53 is the default backbone of this model.
 15 |     """
 16 | 
 17 |     def __init__(
 18 |         self,
 19 |         depth=1.0,
 20 |         width=1.0,
 21 |         in_features=("dark3", "dark4", "dark5"),
 22 |         in_channels=[256, 512, 1024],
 23 |         depthwise=False,
 24 |         act="silu",
 25 |     ):
 26 |         super().__init__()
 27 |         self.backbone = CSPDarknet(depth, width, depthwise=depthwise, act=act)
 28 |         self.in_features = in_features
 29 |         self.in_channels = in_channels
 30 |         Conv = DWConv if depthwise else BaseConv
 31 | 
 32 |         self.upsample = nn.Upsample(scale_factor=2, mode="nearest")
 33 |         self.lateral_conv0 = BaseConv(
 34 |             int(in_channels[2] * width), int(in_channels[1] * width), 1, 1, act=act
 35 |         )
 36 |         self.C3_p4 = CSPLayer(
 37 |             int(2 * in_channels[1] * width),
 38 |             int(in_channels[1] * width),
 39 |             round(3 * depth),
 40 |             False,
 41 |             depthwise=depthwise,
 42 |             act=act,
 43 |         )  # cat
 44 | 
 45 |         self.reduce_conv1 = BaseConv(
 46 |             int(in_channels[1] * width), int(in_channels[0] * width), 1, 1, act=act
 47 |         )
 48 |         self.C3_p3 = CSPLayer(
 49 |             int(2 * in_channels[0] * width),
 50 |             int(in_channels[0] * width),
 51 |             round(3 * depth),
 52 |             False,
 53 |             depthwise=depthwise,
 54 |             act=act,
 55 |         )
 56 | 
 57 |         # bottom-up conv
 58 |         self.bu_conv2 = Conv(
 59 |             int(in_channels[0] * width), int(in_channels[0] * width), 3, 2, act=act
 60 |         )
 61 |         self.C3_n3 = CSPLayer(
 62 |             int(2 * in_channels[0] * width),
 63 |             int(in_channels[1] * width),
 64 |             round(3 * depth),
 65 |             False,
 66 |             depthwise=depthwise,
 67 |             act=act,
 68 |         )
 69 | 
 70 |         # bottom-up conv
 71 |         self.bu_conv1 = Conv(
 72 |             int(in_channels[1] * width), int(in_channels[1] * width), 3, 2, act=act
 73 |         )
 74 |         self.C3_n4 = CSPLayer(
 75 |             int(2 * in_channels[1] * width),
 76 |             int(in_channels[2] * width),
 77 |             round(3 * depth),
 78 |             False,
 79 |             depthwise=depthwise,
 80 |             act=act,
 81 |         )
 82 | 
 83 | 
 84 |     def forward(self, input):
 85 |         """
 86 |         Args:
 87 |             inputs: input images.
 88 | 
 89 |         Returns:
 90 |             Tuple[Tensor]: FPN feature.
 91 |         """
 92 | 
 93 |         #  backbone
 94 |         out_features = self.backbone(input)
 95 |         features = [out_features[f] for f in self.in_features]
 96 |         [x2, x1, x0] = features
 97 | 
 98 |         fpn_out0 = self.lateral_conv0(x0)  # 1024->512/32
 99 |         f_out0 = self.upsample(fpn_out0)  # 512/16
100 |         f_out0 = torch.cat([f_out0, x1], 1)  # 512->1024/16
101 |         f_out0 = self.C3_p4(f_out0)  # 1024->512/16
102 | 
103 |         fpn_out1 = self.reduce_conv1(f_out0)  # 512->256/16
104 |         f_out1 = self.upsample(fpn_out1)  # 256/8
105 |         f_out1 = torch.cat([f_out1, x2], 1)  # 256->512/8
106 |         pan_out2 = self.C3_p3(f_out1)  # 512->256/8
107 | 
108 |         p_out1 = self.bu_conv2(pan_out2)  # 256->256/16
109 |         p_out1 = torch.cat([p_out1, fpn_out1], 1)  # 256->512/16
110 |         pan_out1 = self.C3_n3(p_out1)  # 512->512/16
111 | 
112 |         p_out0 = self.bu_conv1(pan_out1)  # 512->512/32
113 |         p_out0 = torch.cat([p_out0, fpn_out0], 1)  # 512->1024/32
114 |         pan_out0 = self.C3_n4(p_out0)  # 1024->1024/32
115 | 
116 |         outputs = (pan_out2, pan_out1, pan_out0)
117 |         return outputs
118 | 


--------------------------------------------------------------------------------
/yolox/tracking_utils/io.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from typing import Dict
  3 | import numpy as np
  4 | 
  5 | 
  6 | def write_results(filename, results_dict: Dict, data_type: str):
  7 |     if not filename:
  8 |         return
  9 |     path = os.path.dirname(filename)
 10 |     if not os.path.exists(path):
 11 |         os.makedirs(path)
 12 | 
 13 |     if data_type in ('mot', 'mcmot', 'lab'):
 14 |         save_format = '{frame},{id},{x1},{y1},{w},{h},1,-1,-1,-1\n'
 15 |     elif data_type == 'kitti':
 16 |         save_format = '{frame} {id} pedestrian -1 -1 -10 {x1} {y1} {x2} {y2} -1 -1 -1 -1000 -1000 -1000 -10 {score}\n'
 17 |     else:
 18 |         raise ValueError(data_type)
 19 | 
 20 |     with open(filename, 'w') as f:
 21 |         for frame_id, frame_data in results_dict.items():
 22 |             if data_type == 'kitti':
 23 |                 frame_id -= 1
 24 |             for tlwh, track_id in frame_data:
 25 |                 if track_id < 0:
 26 |                     continue
 27 |                 x1, y1, w, h = tlwh
 28 |                 x2, y2 = x1 + w, y1 + h
 29 |                 line = save_format.format(frame=frame_id, id=track_id, x1=x1, y1=y1, x2=x2, y2=y2, w=w, h=h, score=1.0)
 30 |                 f.write(line)
 31 | 
 32 | 
 33 | def read_results(filename, data_type: str, is_gt=False, is_ignore=False):
 34 |     if data_type in ('mot', 'lab'):
 35 |         read_fun = read_mot_results
 36 |     else:
 37 |         raise ValueError('Unknown data type: {}'.format(data_type))
 38 | 
 39 |     return read_fun(filename, is_gt, is_ignore)
 40 | 
 41 | 
 42 | """
 43 | labels={'ped', ...			% 1
 44 | 'person_on_vhcl', ...	% 2
 45 | 'car', ...				% 3
 46 | 'bicycle', ...			% 4
 47 | 'mbike', ...			% 5
 48 | 'non_mot_vhcl', ...		% 6
 49 | 'static_person', ...	% 7
 50 | 'distractor', ...		% 8
 51 | 'occluder', ...			% 9
 52 | 'occluder_on_grnd', ...		%10
 53 | 'occluder_full', ...		% 11
 54 | 'reflection', ...		% 12
 55 | 'crowd' ...			% 13
 56 | };
 57 | """
 58 | 
 59 | 
 60 | def read_mot_results(filename, is_gt, is_ignore):
 61 |     valid_labels = {1}
 62 |     ignore_labels = {2, 7, 8, 12}
 63 |     results_dict = dict()
 64 |     if os.path.isfile(filename):
 65 |         with open(filename, 'r') as f:
 66 |             for line in f.readlines():
 67 |                 linelist = line.split(',')
 68 |                 if len(linelist) < 7:
 69 |                     continue
 70 |                 fid = int(linelist[0])
 71 |                 if fid < 1:
 72 |                     continue
 73 |                 results_dict.setdefault(fid, list())
 74 | 
 75 |                 box_size = float(linelist[4]) * float(linelist[5])
 76 | 
 77 |                 if is_gt:
 78 |                     if 'MOT16-' in filename or 'MOT17-' in filename:
 79 |                         label = int(float(linelist[7]))
 80 |                         mark = int(float(linelist[6]))
 81 |                         if mark == 0 or label not in valid_labels:
 82 |                             continue
 83 |                     score = 1
 84 |                 elif is_ignore:
 85 |                     if 'MOT16-' in filename or 'MOT17-' in filename:
 86 |                         label = int(float(linelist[7]))
 87 |                         vis_ratio = float(linelist[8])
 88 |                         if label not in ignore_labels and vis_ratio >= 0:
 89 |                             continue
 90 |                     else:
 91 |                         continue
 92 |                     score = 1
 93 |                 else:
 94 |                     score = float(linelist[6])
 95 | 
 96 |                 #if box_size > 7000:
 97 |                 #if box_size <= 7000 or box_size >= 15000:
 98 |                 #if box_size < 15000:
 99 |                     #continue
100 | 
101 |                 tlwh = tuple(map(float, linelist[2:6]))
102 |                 target_id = int(linelist[1])
103 | 
104 |                 results_dict[fid].append((tlwh, target_id, score))
105 | 
106 |     return results_dict
107 | 
108 | 
109 | def unzip_objs(objs):
110 |     if len(objs) > 0:
111 |         tlwhs, ids, scores = zip(*objs)
112 |     else:
113 |         tlwhs, ids, scores = [], [], []
114 |     tlwhs = np.asarray(tlwhs, dtype=float).reshape(-1, 4)
115 | 
116 |     return tlwhs, ids, scores


--------------------------------------------------------------------------------
/tools/train.py:
--------------------------------------------------------------------------------
  1 | from loguru import logger
  2 | import numpy as np
  3 | np.float = float
  4 | np.int = int
  5 | np.object = object
  6 | np.bool = bool
  7 | import torch
  8 | import torch.backends.cudnn as cudnn
  9 | import os
 10 | # os.environ["CUDA_VISIBLE_DEVICES"]="2,3,4,5,6,7"
 11 | import sys
 12 | prj_path = os.path.join(os.path.dirname(__file__), '..')
 13 | 
 14 | if prj_path not in sys.path:
 15 |     sys.path.append(prj_path)
 16 | from yolox.core import Trainer, launch
 17 | from yolox.exp import get_exp
 18 | 
 19 | import argparse
 20 | import random
 21 | import warnings
 22 | 
 23 | 
 24 | def make_parser():
 25 |     parser = argparse.ArgumentParser("YOLOX train parser")
 26 |     parser.add_argument("-expn", "--experiment-name", type=str, default=None)
 27 |     parser.add_argument("-n", "--name", type=str, default=None, help="model name")
 28 | 
 29 |     # distributed
 30 |     parser.add_argument(
 31 |         "--dist-backend", default="nccl", type=str, help="distributed backend"
 32 |     )
 33 |     parser.add_argument(
 34 |         "--dist-url",
 35 |         default=None,
 36 |         type=str,
 37 |         help="url used to set up distributed training",
 38 |     )
 39 |     parser.add_argument("-b", "--batch-size", type=int, default=2*8, help="batch size")
 40 |     parser.add_argument(
 41 |         "-d", "--devices", default=8, type=int, help="device for training"
 42 |     )
 43 |     parser.add_argument(
 44 |         "--local_rank", default=0, type=int, help="local rank for dist training"
 45 |     )
 46 |     parser.add_argument(
 47 |         "-f",
 48 |         "--exp_file",
 49 |         default="exps/example/mot/yolox_x_diffusion_track_dancetrack_baseline.py",
 50 |         type=str,
 51 |         help="plz input your expriment description file",
 52 |     )
 53 |     parser.add_argument(
 54 |         "--resume", default=False, action="store_true", help="resume training"
 55 |     )
 56 |     parser.add_argument("-c", "--ckpt", default="diffusion_dancetrack_det.pth.tar", type=str, help="checkpoint file")
 57 |     parser.add_argument(
 58 |         "-e",
 59 |         "--start_epoch",
 60 |         default=None,
 61 |         type=int,
 62 |         help="resume training start epoch",
 63 |     )
 64 |     parser.add_argument(
 65 |         "--num_machines", default=1, type=int, help="num of node for training"
 66 |     )
 67 |     parser.add_argument(
 68 |         "--machine_rank", default=0, type=int, help="node rank for multi-node training"
 69 |     )
 70 |     parser.add_argument(
 71 |         "--fp16", 
 72 |         dest="fp16",
 73 |         default=False,
 74 |         action="store_true",
 75 |         help="Adopting mix precision training.",
 76 |     )
 77 |     parser.add_argument(
 78 |         "-o",
 79 |         "--occupy",
 80 |         dest="occupy",
 81 |         default=False,
 82 |         action="store_true",
 83 |         help="occupy GPU memory first for training.",
 84 |     )
 85 |     parser.add_argument(
 86 |         "opts",
 87 |         help="Modify config options using the command-line",
 88 |         default=None,
 89 |         nargs=argparse.REMAINDER,
 90 |     )
 91 |     return parser
 92 | 
 93 | 
 94 | @logger.catch
 95 | def main(exp, args):
 96 |     if exp.seed is not None:
 97 |         random.seed(exp.seed)
 98 |         torch.manual_seed(exp.seed)
 99 |         cudnn.deterministic = True
100 |         warnings.warn(
101 |             "You have chosen to seed training. This will turn on the CUDNN deterministic setting, "
102 |             "which can slow down your training considerably! You may see unexpected behavior "
103 |             "when restarting from checkpoints."
104 |         )
105 | 
106 |     # set environment variables for distributed training
107 |     cudnn.benchmark = True
108 | 
109 |     trainer = Trainer(exp, args)
110 |     trainer.train()
111 | 
112 | 
113 | if __name__ == "__main__":
114 |     args = make_parser().parse_args()
115 |     # args.exp_file=f
116 |     # args.ckpt=c
117 |     exp = get_exp(args.exp_file, args.name)
118 |     exp.merge(args.opts)
119 | 
120 |     if not args.experiment_name:
121 |         args.experiment_name = exp.exp_name
122 | 
123 |     num_gpu = torch.cuda.device_count() if args.devices is None else args.devices
124 |     assert num_gpu <= torch.cuda.device_count()
125 | 
126 |     launch(
127 |         main,
128 |         num_gpu,
129 |         args.num_machines, 
130 |         args.machine_rank,
131 |         backend=args.dist_backend,
132 |         dist_url=args.dist_url,
133 |         args=(exp, args),
134 |     )
135 | 


--------------------------------------------------------------------------------
/yolox/tracking_utils/evaluation.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import copy
  4 | import motmetrics as mm
  5 | mm.lap.default_solver = 'lap'
  6 | 
  7 | from yolox.tracking_utils.io import read_results, unzip_objs
  8 | 
  9 | 
 10 | class Evaluator(object):
 11 | 
 12 |     def __init__(self, data_root, seq_name, data_type):
 13 |         self.data_root = data_root
 14 |         self.seq_name = seq_name
 15 |         self.data_type = data_type
 16 | 
 17 |         self.load_annotations()
 18 |         self.reset_accumulator()
 19 | 
 20 |     def load_annotations(self):
 21 |         assert self.data_type == 'mot'
 22 | 
 23 |         gt_filename = os.path.join(self.data_root, self.seq_name, 'gt', 'gt.txt')
 24 |         self.gt_frame_dict = read_results(gt_filename, self.data_type, is_gt=True)
 25 |         self.gt_ignore_frame_dict = read_results(gt_filename, self.data_type, is_ignore=True)
 26 | 
 27 |     def reset_accumulator(self):
 28 |         self.acc = mm.MOTAccumulator(auto_id=True)
 29 | 
 30 |     def eval_frame(self, frame_id, trk_tlwhs, trk_ids, rtn_events=False):
 31 |         # results
 32 |         trk_tlwhs = np.copy(trk_tlwhs)
 33 |         trk_ids = np.copy(trk_ids)
 34 | 
 35 |         # gts
 36 |         gt_objs = self.gt_frame_dict.get(frame_id, [])
 37 |         gt_tlwhs, gt_ids = unzip_objs(gt_objs)[:2]
 38 | 
 39 |         # ignore boxes
 40 |         ignore_objs = self.gt_ignore_frame_dict.get(frame_id, [])
 41 |         ignore_tlwhs = unzip_objs(ignore_objs)[0]
 42 | 
 43 |         # remove ignored results
 44 |         keep = np.ones(len(trk_tlwhs), dtype=bool)
 45 |         iou_distance = mm.distances.iou_matrix(ignore_tlwhs, trk_tlwhs, max_iou=0.5)
 46 |         if len(iou_distance) > 0:
 47 |             match_is, match_js = mm.lap.linear_sum_assignment(iou_distance)
 48 |             match_is, match_js = map(lambda a: np.asarray(a, dtype=int), [match_is, match_js])
 49 |             match_ious = iou_distance[match_is, match_js]
 50 | 
 51 |             match_js = np.asarray(match_js, dtype=int)
 52 |             match_js = match_js[np.logical_not(np.isnan(match_ious))]
 53 |             keep[match_js] = False
 54 |             trk_tlwhs = trk_tlwhs[keep]
 55 |             trk_ids = trk_ids[keep]
 56 |         #match_is, match_js = mm.lap.linear_sum_assignment(iou_distance)
 57 |         #match_is, match_js = map(lambda a: np.asarray(a, dtype=int), [match_is, match_js])
 58 |         #match_ious = iou_distance[match_is, match_js]
 59 | 
 60 |         #match_js = np.asarray(match_js, dtype=int)
 61 |         #match_js = match_js[np.logical_not(np.isnan(match_ious))]
 62 |         #keep[match_js] = False
 63 |         #trk_tlwhs = trk_tlwhs[keep]
 64 |         #trk_ids = trk_ids[keep]
 65 | 
 66 |         # get distance matrix
 67 |         iou_distance = mm.distances.iou_matrix(gt_tlwhs, trk_tlwhs, max_iou=0.5)
 68 | 
 69 |         # acc
 70 |         self.acc.update(gt_ids, trk_ids, iou_distance)
 71 | 
 72 |         if rtn_events and iou_distance.size > 0 and hasattr(self.acc, 'last_mot_events'):
 73 |             events = self.acc.last_mot_events  # only supported by https://github.com/longcw/py-motmetrics
 74 |         else:
 75 |             events = None
 76 |         return events
 77 | 
 78 |     def eval_file(self, filename):
 79 |         self.reset_accumulator()
 80 | 
 81 |         result_frame_dict = read_results(filename, self.data_type, is_gt=False)
 82 |         #frames = sorted(list(set(self.gt_frame_dict.keys()) | set(result_frame_dict.keys())))
 83 |         frames = sorted(list(set(result_frame_dict.keys())))
 84 |         for frame_id in frames:
 85 |             trk_objs = result_frame_dict.get(frame_id, [])
 86 |             trk_tlwhs, trk_ids = unzip_objs(trk_objs)[:2]
 87 |             self.eval_frame(frame_id, trk_tlwhs, trk_ids, rtn_events=False)
 88 | 
 89 |         return self.acc
 90 | 
 91 |     @staticmethod
 92 |     def get_summary(accs, names, metrics=('mota', 'num_switches', 'idp', 'idr', 'idf1', 'precision', 'recall')):
 93 |         names = copy.deepcopy(names)
 94 |         if metrics is None:
 95 |             metrics = mm.metrics.motchallenge_metrics
 96 |         metrics = copy.deepcopy(metrics)
 97 | 
 98 |         mh = mm.metrics.create()
 99 |         summary = mh.compute_many(
100 |             accs,
101 |             metrics=metrics,
102 |             names=names,
103 |             generate_overall=True
104 |         )
105 | 
106 |         return summary
107 | 
108 |     @staticmethod
109 |     def save_summary(summary, filename):
110 |         import pandas as pd
111 |         writer = pd.ExcelWriter(filename)
112 |         summary.to_excel(writer)
113 |         writer.save()


--------------------------------------------------------------------------------
/tools/mix_data_test_mot17.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | 
  4 | 
  5 | """
  6 | cd datasets
  7 | mkdir -p mix_det/annotations
  8 | cp mot/annotations/val_half.json mix_det/annotations/val_half.json
  9 | cp mot/annotations/test.json mix_det/annotations/test.json
 10 | cd mix_det
 11 | ln -s ../mot/train mot_train
 12 | ln -s ../crowdhuman/CrowdHuman_train crowdhuman_train
 13 | ln -s ../crowdhuman/CrowdHuman_val crowdhuman_val
 14 | ln -s ../Cityscapes cp_train
 15 | ln -s ../ETHZ ethz_train
 16 | cd ..
 17 | """
 18 | 
 19 | mot_json = json.load(open('datasets/mot/annotations/train.json','r'))
 20 | 
 21 | img_list = list()
 22 | for img in mot_json['images']:
 23 |     img['file_name'] = 'mot_train/' + img['file_name']
 24 |     img_list.append(img)
 25 | 
 26 | ann_list = list()
 27 | for ann in mot_json['annotations']:
 28 |     ann_list.append(ann)
 29 | 
 30 | video_list = mot_json['videos']
 31 | category_list = mot_json['categories']
 32 | 
 33 | 
 34 | print('mot17')
 35 | 
 36 | max_img = 10000
 37 | max_ann = 2000000
 38 | max_video = 10
 39 | 
 40 | crowdhuman_json = json.load(open('datasets/crowdhuman/annotations/train.json','r'))
 41 | img_id_count = 0
 42 | for img in crowdhuman_json['images']:
 43 |     img_id_count += 1
 44 |     img['file_name'] = 'crowdhuman_train/Images/' + img['file_name']
 45 |     img['frame_id'] = img_id_count
 46 |     img['prev_image_id'] = img['id'] + max_img
 47 |     img['next_image_id'] = img['id'] + max_img
 48 |     img['id'] = img['id'] + max_img
 49 |     img['video_id'] = max_video
 50 |     img_list.append(img)
 51 |     
 52 | for ann in crowdhuman_json['annotations']:
 53 |     ann['id'] = ann['id'] + max_ann
 54 |     ann['image_id'] = ann['image_id'] + max_img
 55 |     ann_list.append(ann)
 56 | 
 57 | print('crowdhuman_train')
 58 | 
 59 | video_list.append({
 60 |     'id': max_video,
 61 |     'file_name': 'crowdhuman_train'
 62 | })
 63 | 
 64 | 
 65 | max_img = 30000
 66 | max_ann = 10000000
 67 | 
 68 | crowdhuman_val_json = json.load(open('datasets/crowdhuman/annotations/val.json','r'))
 69 | img_id_count = 0
 70 | for img in crowdhuman_val_json['images']:
 71 |     img_id_count += 1
 72 |     img['file_name'] = 'crowdhuman_val/Images/' + img['file_name']
 73 |     img['frame_id'] = img_id_count
 74 |     img['prev_image_id'] = img['id'] + max_img
 75 |     img['next_image_id'] = img['id'] + max_img
 76 |     img['id'] = img['id'] + max_img
 77 |     img['video_id'] = max_video
 78 |     img_list.append(img)
 79 |     
 80 | for ann in crowdhuman_val_json['annotations']:
 81 |     ann['id'] = ann['id'] + max_ann
 82 |     ann['image_id'] = ann['image_id'] + max_img
 83 |     ann_list.append(ann)
 84 | 
 85 | print('crowdhuman_val')
 86 | 
 87 | video_list.append({
 88 |     'id': max_video,
 89 |     'file_name': 'crowdhuman_val'
 90 | })
 91 | 
 92 | max_img = 40000
 93 | max_ann = 20000000
 94 | 
 95 | ethz_json = json.load(open('datasets/ETHZ/annotations/train.json','r'))
 96 | img_id_count = 0
 97 | for img in ethz_json['images']:
 98 |     img_id_count += 1
 99 |     img['file_name'] = 'ethz_train/' + img['file_name'][5:]
100 |     img['frame_id'] = img_id_count
101 |     img['prev_image_id'] = img['id'] + max_img
102 |     img['next_image_id'] = img['id'] + max_img
103 |     img['id'] = img['id'] + max_img
104 |     img['video_id'] = max_video
105 |     img_list.append(img)
106 |     
107 | for ann in ethz_json['annotations']:
108 |     ann['id'] = ann['id'] + max_ann
109 |     ann['image_id'] = ann['image_id'] + max_img
110 |     ann_list.append(ann)
111 | 
112 | print('ETHZ')
113 | 
114 | video_list.append({
115 |     'id': max_video,
116 |     'file_name': 'ethz'
117 | })
118 | 
119 | max_img = 50000
120 | max_ann = 25000000
121 | 
122 | cp_json = json.load(open('datasets/Cityscapes/annotations/train.json','r'))
123 | img_id_count = 0
124 | for img in cp_json['images']:
125 |     img_id_count += 1
126 |     img['file_name'] = 'cp_train/' + img['file_name'][11:]
127 |     img['frame_id'] = img_id_count
128 |     img['prev_image_id'] = img['id'] + max_img
129 |     img['next_image_id'] = img['id'] + max_img
130 |     img['id'] = img['id'] + max_img
131 |     img['video_id'] = max_video
132 |     img_list.append(img)
133 |     
134 | for ann in cp_json['annotations']:
135 |     ann['id'] = ann['id'] + max_ann
136 |     ann['image_id'] = ann['image_id'] + max_img
137 |     ann_list.append(ann)
138 | 
139 | print('Cityscapes')
140 | 
141 | video_list.append({
142 |     'id': max_video,
143 |     'file_name': 'cityperson'
144 | })
145 | 
146 | mix_json = dict()
147 | mix_json['images'] = img_list
148 | mix_json['annotations'] = ann_list
149 | mix_json['videos'] = video_list
150 | mix_json['categories'] = category_list
151 | json.dump(mix_json, open('datasets/mix_det/annotations/train.json','w'))
152 | 


--------------------------------------------------------------------------------
/yolox/data/datasets/datasets_wrapper.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding:utf-8 -*-
  3 | # Copyright (c) Megvii, Inc. and its affiliates.
  4 | 
  5 | from torch.utils.data.dataset import ConcatDataset as torchConcatDataset
  6 | from torch.utils.data.dataset import Dataset as torchDataset
  7 | 
  8 | import bisect
  9 | from functools import wraps
 10 | 
 11 | 
 12 | class ConcatDataset(torchConcatDataset):
 13 |     def __init__(self, datasets):
 14 |         super(ConcatDataset, self).__init__(datasets)
 15 |         if hasattr(self.datasets[0], "input_dim"):
 16 |             self._input_dim = self.datasets[0].input_dim
 17 |             self.input_dim = self.datasets[0].input_dim
 18 | 
 19 |     def pull_item(self, idx):
 20 |         if idx < 0:
 21 |             if -idx > len(self):
 22 |                 raise ValueError(
 23 |                     "absolute value of index should not exceed dataset length"
 24 |                 )
 25 |             idx = len(self) + idx
 26 |         dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx)
 27 |         if dataset_idx == 0:
 28 |             sample_idx = idx
 29 |         else:
 30 |             sample_idx = idx - self.cumulative_sizes[dataset_idx - 1]
 31 |         return self.datasets[dataset_idx].pull_item(sample_idx)
 32 | 
 33 | 
 34 | class MixConcatDataset(torchConcatDataset):
 35 |     def __init__(self, datasets):
 36 |         super(MixConcatDataset, self).__init__(datasets)
 37 |         if hasattr(self.datasets[0], "input_dim"):
 38 |             self._input_dim = self.datasets[0].input_dim
 39 |             self.input_dim = self.datasets[0].input_dim
 40 | 
 41 |     def __getitem__(self, index):
 42 | 
 43 |         if not isinstance(index, int):
 44 |             idx = index[1]
 45 |         if idx < 0:
 46 |             if -idx > len(self):
 47 |                 raise ValueError(
 48 |                     "absolute value of index should not exceed dataset length"
 49 |                 )
 50 |             idx = len(self) + idx
 51 |         dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx)
 52 |         if dataset_idx == 0:
 53 |             sample_idx = idx
 54 |         else:
 55 |             sample_idx = idx - self.cumulative_sizes[dataset_idx - 1]
 56 |         if not isinstance(index, int):
 57 |             index = (index[0], sample_idx, index[2])
 58 | 
 59 |         return self.datasets[dataset_idx][index]
 60 | 
 61 | 
 62 | class Dataset(torchDataset):
 63 |     """ This class is a subclass of the base :class:`torch.utils.data.Dataset`,
 64 |     that enables on the fly resizing of the ``input_dim``.
 65 | 
 66 |     Args:
 67 |         input_dimension (tuple): (width,height) tuple with default dimensions of the network
 68 |     """
 69 | 
 70 |     def __init__(self, input_dimension, mosaic=True):
 71 |         super().__init__()
 72 |         self.__input_dim = input_dimension[:2]
 73 |         self.enable_mosaic = mosaic
 74 | 
 75 |     @property
 76 |     def input_dim(self):
 77 |         """
 78 |         Dimension that can be used by transforms to set the correct image size, etc.
 79 |         This allows transforms to have a single source of truth
 80 |         for the input dimension of the network.
 81 | 
 82 |         Return:
 83 |             list: Tuple containing the current width,height
 84 |         """
 85 |         if hasattr(self, "_input_dim"):
 86 |             return self._input_dim
 87 |         return self.__input_dim
 88 | 
 89 |     @staticmethod
 90 |     def resize_getitem(getitem_fn):
 91 |         """
 92 |         Decorator method that needs to be used around the ``__getitem__`` method. |br|
 93 |         This decorator enables the on the fly resizing of
 94 |         the ``input_dim`` with our :class:`~lightnet.data.DataLoader` class.
 95 | 
 96 |         Example:
 97 |             >>> class CustomSet(ln.data.Dataset):
 98 |             ...     def __len__(self):
 99 |             ...         return 10
100 |             ...     @ln.data.Dataset.resize_getitem
101 |             ...     def __getitem__(self, index):
102 |             ...         # Should return (image, anno) but here we return input_dim
103 |             ...         return self.input_dim
104 |             >>> data = CustomSet((200,200))
105 |             >>> data[0]
106 |             (200, 200)
107 |             >>> data[(480,320), 0]
108 |             (480, 320)
109 |         """
110 | 
111 |         @wraps(getitem_fn)
112 |         def wrapper(self, index):
113 |             if not isinstance(index, int):
114 |                 has_dim = True
115 |                 self._input_dim = index[0]
116 |                 self.enable_mosaic = index[2]
117 |                 index = index[1]
118 |             else:
119 |                 has_dim = False
120 | 
121 |             ret_val = getitem_fn(self, index)
122 | 
123 |             if has_dim:
124 |                 del self._input_dim
125 | 
126 |             return ret_val
127 | 
128 |         return wrapper
129 | 


--------------------------------------------------------------------------------
/yolox/utils/cluster_nms.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | 
  3 | @torch.jit.script
  4 | def intersect(box_a, box_b):
  5 |     """ We resize both tensors to [A,B,2] without new malloc:
  6 |     [A,2] -> [A,1,2] -> [A,B,2]
  7 |     [B,2] -> [1,B,2] -> [A,B,2]
  8 |     Then we compute the area of intersect between box_a and box_b.
  9 |     Args:
 10 |       box_a: (tensor) bounding boxes, Shape: [n,A,4].
 11 |       box_b: (tensor) bounding boxes, Shape: [n,B,4].
 12 |     Return:
 13 |       (tensor) intersection area, Shape: [n,A,B].
 14 |     """
 15 |     n = box_a.size(0)
 16 |     A = box_a.size(1)
 17 |     B = box_b.size(1)
 18 |     max_xy = torch.min(box_a[:, :, 2:].unsqueeze(2).expand(n, A, B, 2),
 19 |                        box_b[:, :, 2:].unsqueeze(1).expand(n, A, B, 2))
 20 |     min_xy = torch.max(box_a[:, :, :2].unsqueeze(2).expand(n, A, B, 2),
 21 |                        box_b[:, :, :2].unsqueeze(1).expand(n, A, B, 2))
 22 |     return torch.clamp(max_xy - min_xy, min=0).prod(3)  # inter
 23 | 
 24 | @torch.jit.script
 25 | def garea(box_a, box_b):
 26 |     """ We resize both tensors to [A,B,2] without new malloc:
 27 |     [A,2] -> [A,1,2] -> [A,B,2]
 28 |     [B,2] -> [1,B,2] -> [A,B,2]
 29 |     Then we compute the area of intersect between box_a and box_b.
 30 |     Args:
 31 |       box_a: (tensor) bounding boxes, Shape: [n,A,4].
 32 |       box_b: (tensor) bounding boxes, Shape: [n,B,4].
 33 |     Return:
 34 |       (tensor) intersection area, Shape: [n,A,B].
 35 |     """
 36 |     n = box_a.size(0)
 37 |     A = box_a.size(1)
 38 |     B = box_b.size(1)
 39 |     max_xy = torch.max(box_a[:, :, 2:].unsqueeze(2).expand(n, A, B, 2),
 40 |                        box_b[:, :, 2:].unsqueeze(1).expand(n, A, B, 2))
 41 |     min_xy = torch.min(box_a[:, :, :2].unsqueeze(2).expand(n, A, B, 2),
 42 |                        box_b[:, :, :2].unsqueeze(1).expand(n, A, B, 2))
 43 |     return torch.clamp(max_xy - min_xy, min=0).prod(3)  # inter
 44 | 
 45 | @torch.jit.script
 46 | def get_box_area(box):
 47 |     return (box[:, :, 2]-box[:, :, 0]) *(box[:, :, 3]-box[:, :, 1])
 48 | 
 49 | def giou_3d(box_a,box_b,box_c,box_d):
 50 |     """Compute the jaccard overlap of two sets of boxes.  The jaccard overlap
 51 |     is simply the intersection over union of two boxes.  Here we operate on
 52 |     ground truth boxes and default boxes. If iscrowd=True, put the crowd in box_b.
 53 |     E.g.:
 54 |         A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
 55 |     Args:
 56 |         box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4]
 57 |         box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4]
 58 |     Return:
 59 |         jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)]
 60 |     """
 61 |     use_batch = True
 62 |     if box_a.dim() == 2:
 63 |         use_batch = False
 64 |         box_a = box_a[None, ...]
 65 |         box_b = box_b[None, ...]
 66 |         box_c = box_c[None, ...]
 67 |         box_d = box_d[None, ...]
 68 | 
 69 |     interab = intersect(box_a,box_b)
 70 |     intercd = intersect(box_c,box_d)
 71 |     
 72 |     area_ab= garea(box_a,box_b)
 73 |     area_cd=garea(box_c,box_d)
 74 | 
 75 |     area_a = get_box_area(box_a).unsqueeze(2).expand_as(interab)  # [A,B]
 76 |     area_b = get_box_area(box_b).unsqueeze(1).expand_as(interab)  # [A,B]
 77 |     area_c = get_box_area(box_c).unsqueeze(2).expand_as(intercd)  # [A,B]
 78 |     area_d = get_box_area(box_d).unsqueeze(1).expand_as(intercd)  # [A,B]
 79 |     unionab = area_a + area_b - interab
 80 |     unioncd = area_c+area_d-intercd
 81 | 
 82 |     uiouabcd = (interab+intercd) / (unionab+unioncd)
 83 |     out=uiouabcd-(area_ab+area_cd-unionab-unioncd)/(area_ab+area_cd)
 84 |     return out if use_batch else out.squeeze(0)
 85 | 
 86 | def cluster_nms(boxes_a,boxes_c,scores,iou_threshold:float=0.5, top_k:int=500):
 87 |     # Collapse all the classes into 1 
 88 |     _, idx = scores.sort(0, descending=True)
 89 |     idx = idx[:top_k]
 90 |     boxes_a = boxes_a[idx]
 91 |     boxes_b = boxes_a
 92 |     boxes_c = boxes_c[idx]
 93 |     boxes_d = boxes_c
 94 |     iou = giou_3d(boxes_a,boxes_b,boxes_c,boxes_d).triu_(diagonal=1)
 95 |     B = iou
 96 |     for i in range(200):
 97 |         A=B
 98 |         maxA,_=torch.max(A, dim=0)
 99 |         E = (maxA<=iou_threshold).float().unsqueeze(1).expand_as(A)
100 |         B=iou.mul(E)
101 |         if A.equal(B)==True:
102 |             break
103 |     idx_out = idx[maxA <= iou_threshold]
104 |     return idx_out
105 | 
106 | 
107 | 
108 | # ## test
109 | 
110 | # boxes_a=[[100,100,200,200],
111 | #          [110,110,210,210],
112 | #          [50,50,150,150],
113 | #          [100,100,200,200],
114 | #          [90,90,190,190],]
115 | 
116 | # boxes_c=[[100,100,200,200],
117 | #          [110,110,210,210],
118 | #          [150,150,250,250],
119 | #          [0,0,100,100],
120 | #          [10,10,110,110],]
121 | 
122 | # scores=[0.91,0.9,0.95,0.9,0.8]
123 | 
124 | # boxes_a=torch.tensor(boxes_a,dtype=torch.float)
125 | # boxes_c=torch.tensor(boxes_c,dtype=torch.float)
126 | # scores=torch.tensor(scores,dtype=torch.float)
127 | 
128 | 
129 | # indix=cluster_nms(boxes_a,boxes_c,scores)
130 | # print(indix)
131 | 


--------------------------------------------------------------------------------
/tools/convert_bdd100k_to_coco.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import os
  3 | import json
  4 | import tqdm
  5 | import numpy as np
  6 | 
  7 | labels_path = 'datasets/bdd100k/labels'
  8 | img_path = 'datasets/bdd100k/images'
  9 | # mot_labels_path  = '/data/yourname/BDD100K-MOT/GT'
 10 | 
 11 | out_path = 'datasets/bdd100k/annotations/'
 12 | 
 13 | split = ['train']
 14 | categories = [
 15 |     {"id": 1, "name": "pedestrian"},
 16 |     {"id": 2, "name": "rider"},
 17 |     {"id": 3, "name": "car"},
 18 |     {"id": 4, "name": "truck"},
 19 |     {"id": 5, "name": "bus"},
 20 |     {"id": 6, "name": "train"},
 21 |     {"id": 7, "name": "motorcycle"},
 22 |     {"id": 8, "name": "bicycle"},
 23 |     # {"id": 9, "name": "traffic light"},
 24 |     # {"id": 10, "name": "traffic sign"},
 25 | ]
 26 | 
 27 | # "traffic light":9, "traffic sign":10
 28 | cat = {"pedestrian":1, "rider":2, "car":3, "truck":4, "bus":5, "train":6, "motorcycle":7, "bicycle":8,}
 29 | # 1: pedestrian
 30 | # 2: rider
 31 | # 3: car
 32 | # 4: truck
 33 | # 5: bus
 34 | # 6: train
 35 | # 7: motorcycle
 36 | # 8: bicycle  
 37 | # 9: traffic light --- Don't need tracking
 38 | # 10: traffic sign  ---   Don't need tracking
 39 | # For MOT and MOTS, only the first 8 classes are used and evaluated
 40 | 
 41 | def read_tid_num_per_video(video_ann_dir):
 42 |     anns = np.loadtxt(video_ann_dir, dtype=np.float32, delimiter=',')
 43 |     max_tid = max(anns[:, 1])
 44 |     return int(max_tid)
 45 |     
 46 | 
 47 | for s in split:
 48 |     img_id = 1; ann_id = 1; video_cnt = 0; 
 49 |     tid_cnt = 0 
 50 |     images = []; annotations=[]; videos = []
 51 |     all_video=[d for d in os.listdir(os.path.join(labels_path, s)) if '.json' in d]
 52 |     need_index=np.random.choice(range(len(all_video)),len(all_video)//3,replace=False)
 53 |     video_labels_list = [all_video[i] for i in need_index]
 54 |     
 55 |     for v_label in tqdm.tqdm(video_labels_list):
 56 |         video_cnt += 1
 57 |         video = {'id': video_cnt, 'file_name':v_label[:-5]}
 58 |         videos.append(video)
 59 |         
 60 |         v_lab_path = os.path.join(os.path.join(labels_path, s, v_label))
 61 |         with open(v_lab_path, 'r') as f:
 62 |             annos=json.load(f)# anns per video
 63 |         num_frames  = len(annos)# the number of frames per video
 64 |         sign_cnt = 0
 65 |         for ann in annos:# ann --- 每一帧的标注信息，这里放过了空白帧
 66 |             
 67 |             img_name = os.path.join(img_path, s, ann['videoName'], ann['name'])
 68 |             img=cv2.imread(img_name)
 69 |             h,w,_ = img.shape
 70 |             
 71 |             img_info = {
 72 |             'file_name':img_name,
 73 |             'width':w,
 74 |             'height':h,
 75 |             'id': img_id,
 76 |             'frame_id': ann['frameIndex'] + 1,# 严格按照 数据集 标记的帧indx 来进行排序，这将有利于 判断 相邻帧 之间的关系
 77 |             'prev_image_id': -1 if ann['frameIndex'] == 0 else img_id - 1,
 78 |             'next_image_id': -1 if ann['frameIndex'] == num_frames-1 else img_id + 1,
 79 |             'video_id': video_cnt
 80 |             }# 所有的图像信息images中 ，这里也会添加空白标注帧的图像信息
 81 |             images.append(img_info)
 82 |             
 83 |             for j, lab in enumerate(ann['labels']):
 84 |                 #  lab---每一个实例的标注信息  如果遇到空白标注帧--ann['labels']为空 则循环不执行 如果帧为非空 则继续执行此循环
 85 |                 if lab['category'] in cat:# 为了避免 'other vehicle' 类
 86 |                     pass
 87 |                 else:
 88 |                     continue
 89 |                     
 90 |                 track_id = lab['id']
 91 |                      
 92 |                 if sign_cnt == 0 and j==0:
 93 |                     firstid = track_id
 94 |                     sign_cnt = 1      
 95 |                      
 96 |                 tid_curr = int(track_id) - int(firstid) + 1
 97 |                 tid_cnt+=1
 98 |                 is_crowd = lab['attributes']['crowd']
 99 |                 x1, y1, x2, y2=lab['box2d']['x1'], lab['box2d']['y1'], lab['box2d']['x2'], lab['box2d']['y2']
100 |                 
101 |                 annotation = {
102 |                     'image_id': img_id,
103 |                     'conf': 1,
104 |                     'bbox': [x1, y1, x2-x1, y2-y1],
105 |                     'category_id': cat[lab['category']],
106 |                     'id': ann_id,
107 |                     'iscrowd':  1 if is_crowd else 0,
108 |                     'track_id': tid_curr + tid_cnt,
109 |                     'segmentation': [],
110 |                     'area': (x2-x1)*(y2-y1),
111 |                     'box_id':int(track_id)   
112 |                 }
113 |                 annotations.append(annotation)
114 |                 ann_id += 1
115 |                     
116 |             img_id += 1
117 |             
118 |         # tid_cnt += read_tid_num_per_video(os.path.join(mot_labels_path, s, v_label[:-5]+'.txt'))
119 |             
120 |     dataset_dict = {}
121 |     dataset_dict["images"] = images
122 |     dataset_dict["annotations"] = annotations
123 |     dataset_dict["categories"] = categories
124 |     dataset_dict["videos"] = videos
125 |     
126 |     json_str = json.dumps(dataset_dict)
127 |     print(f' The number of detection objects is {ann_id - 1}, The number of detection imgs is {img_id -1} .')
128 |     with open(out_path+f'{s}.json', 'w') as json_file:
129 |         json_file.write(json_str)


--------------------------------------------------------------------------------
/yolox/data/datasets/mot.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import numpy as np
  3 | from pycocotools.coco import COCO
  4 | from collections import defaultdict
  5 | import os
  6 | 
  7 | from ..dataloading import get_yolox_datadir
  8 | from .datasets_wrapper import Dataset
  9 | 
 10 | 
 11 | class MOTDataset(Dataset):
 12 |     """
 13 |     COCO dataset class.
 14 |     """
 15 | 
 16 |     def __init__(
 17 |         self,
 18 |         data_dir=None,
 19 |         json_file="train_half.json",
 20 |         name="train",
 21 |         img_size=(608, 1088),
 22 |         preproc=None,
 23 |     ):
 24 |         """
 25 |         COCO dataset initialization. Annotation data are read into memory by COCO API.
 26 |         Args:
 27 |             data_dir (str): dataset root directory
 28 |             json_file (str): COCO json file name
 29 |             name (str): COCO data name (e.g. 'train2017' or 'val2017')
 30 |             img_size (int): target image size after pre-processing
 31 |             preproc: data augmentation strategy
 32 |         """
 33 |         super().__init__(img_size)
 34 |         if data_dir is None:
 35 |             data_dir = os.path.join(get_yolox_datadir(), "mot")
 36 |         self.data_dir = data_dir
 37 |         self.json_file = json_file
 38 | 
 39 |         self.coco = COCO(os.path.join(self.data_dir, "annotations", self.json_file))
 40 |         self.ids = self.coco.getImgIds()
 41 |         self.class_ids = sorted(self.coco.getCatIds())
 42 |         cats = self.coco.loadCats(self.coco.getCatIds())
 43 |         self._classes = tuple([c["name"] for c in cats])
 44 |         self.video_info=defaultdict(list) 
 45 |         self.annotations = self._load_coco_annotations()
 46 |         # "DanceTrack FRCNN" in self.coco.loadImgs(min(v))[0]["file_name"] or "MOT20" in self.coco.loadImgs(min(v))[0]["file_name"]
 47 |         self.video_info={k:(min(v),max(v),True) for k,v in self.video_info.items()}
 48 |         self.name = name
 49 |         self.img_size = img_size
 50 |         self.preproc = preproc
 51 | 
 52 |     def __len__(self):
 53 |         return len(self.ids)
 54 | 
 55 |     def _load_coco_annotations(self):
 56 |         return [self.load_anno_from_ids(index,_ids) for index,_ids in enumerate(self.ids)]
 57 | 
 58 |     def load_anno_from_ids(self,index,id_):
 59 |         im_ann = self.coco.loadImgs(id_)[0]
 60 |         width = im_ann["width"]
 61 |         height = im_ann["height"]
 62 |         frame_id = im_ann["frame_id"]
 63 |         video_id = im_ann["video_id"]
 64 |         self.video_info[video_id].append(index)
 65 |         anno_ids = self.coco.getAnnIds(imgIds=[int(id_)], iscrowd=False)
 66 |         annotations = self.coco.loadAnns(anno_ids)
 67 |         objs = []
 68 |         for obj in annotations:
 69 |             x1 = obj["bbox"][0]
 70 |             y1 = obj["bbox"][1]
 71 |             x2 = x1 + obj["bbox"][2]
 72 |             y2 = y1 + obj["bbox"][3]
 73 |             if obj["area"] > 0 and x2 >= x1 and y2 >= y1:
 74 |                 obj["clean_bbox"] = [x1, y1, x2, y2]
 75 |                 objs.append(obj)
 76 | 
 77 |         num_objs = len(objs)
 78 | 
 79 |         res = np.zeros((num_objs, 6))
 80 | 
 81 |         for ix, obj in enumerate(objs):
 82 |             cls = self.class_ids.index(obj["category_id"])
 83 |             res[ix, 0:4] = obj["clean_bbox"]
 84 |             res[ix, 4] = cls
 85 |             res[ix, 5] = obj["track_id"]
 86 | 
 87 |         file_name = im_ann["file_name"] if "file_name" in im_ann else "{:012}".format(id_) + ".jpg"
 88 |         img_info = (height, width, frame_id, video_id, file_name)
 89 | 
 90 |         del im_ann, annotations
 91 | 
 92 |         return (res, img_info, file_name)
 93 | 
 94 |     def load_anno(self, index):
 95 |         return self.annotations[index][0]
 96 | 
 97 |     def pull_item(self, index):
 98 |         id_ = self.ids[index]
 99 | 
100 |         res, img_info, file_name = self.annotations[index]
101 |         # load image and preprocess
102 |         img_file = os.path.join(
103 |             self.data_dir, self.name, file_name
104 |         )
105 |         # img_file=file_name
106 |         img = cv2.imread(img_file)
107 |         assert img is not None
108 | 
109 |         return img, res.copy(), img_info, np.array([id_])
110 | 
111 |     @Dataset.resize_getitem
112 |     def __getitem__(self, index):
113 |         """
114 |         One image / label pair for the given index is picked up and pre-processed.
115 | 
116 |         Args:
117 |             index (int): data index
118 | 
119 |         Returns:
120 |             img (numpy.ndarray): pre-processed image
121 |             padded_labels (torch.Tensor): pre-processed label data.
122 |                 The shape is :math:`[max_labels, 5]`.
123 |                 each label consists of [class, xc, yc, w, h]:
124 |                     class (float): class index.
125 |                     xc, yc (float) : center of bbox whose values range from 0 to 1.
126 |                     w, h (float) : size of bbox whose values range from 0 to 1.
127 |             info_img : tuple of h, w, nh, nw, dx, dy.
128 |                 h, w (int): original shape of the image
129 |                 nh, nw (int): shape of the resized image without padding
130 |                 dx, dy (int): pad size
131 |             img_id (int): same as the input index. Used for evaluation.
132 |         """
133 |         img, target, img_info, img_id = self.pull_item(index)
134 | 
135 |         if self.preproc is not None:
136 |             img, target = self.preproc(img, target, self.input_dim)
137 |         return img,target,img_info,img_id
138 | 


--------------------------------------------------------------------------------
/yolox/utils/visualize.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding:utf-8 -*-
  3 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  4 | 
  5 | import cv2
  6 | import numpy as np
  7 | 
  8 | __all__ = ["vis"]
  9 | 
 10 | 
 11 | def vis(img, boxes, scores, cls_ids, conf=0.5, class_names=None):
 12 | 
 13 |     for i in range(len(boxes)):
 14 |         box = boxes[i]
 15 |         cls_id = int(cls_ids[i])
 16 |         score = scores[i]
 17 |         if score < conf:
 18 |             continue
 19 |         x0 = int(box[0])
 20 |         y0 = int(box[1])
 21 |         x1 = int(box[2])
 22 |         y1 = int(box[3])
 23 | 
 24 |         color = (_COLORS[cls_id] * 255).astype(np.uint8).tolist()
 25 |         text = '{}:{:.1f}%'.format(class_names[cls_id], score * 100)
 26 |         txt_color = (0, 0, 0) if np.mean(_COLORS[cls_id]) > 0.5 else (255, 255, 255)
 27 |         font = cv2.FONT_HERSHEY_SIMPLEX
 28 | 
 29 |         txt_size = cv2.getTextSize(text, font, 0.4, 1)[0]
 30 |         cv2.rectangle(img, (x0, y0), (x1, y1), color, 2)
 31 | 
 32 |         txt_bk_color = (_COLORS[cls_id] * 255 * 0.7).astype(np.uint8).tolist()
 33 |         cv2.rectangle(
 34 |             img,
 35 |             (x0, y0 + 1),
 36 |             (x0 + txt_size[0] + 1, y0 + int(1.5*txt_size[1])),
 37 |             txt_bk_color,
 38 |             -1
 39 |         )
 40 |         cv2.putText(img, text, (x0, y0 + txt_size[1]), font, 0.4, txt_color, thickness=1)
 41 | 
 42 |     return img
 43 | 
 44 | 
 45 | def get_color(idx):
 46 |     idx = idx * 3
 47 |     color = ((37 * idx) % 255, (17 * idx) % 255, (29 * idx) % 255)
 48 | 
 49 |     return color
 50 | 
 51 | 
 52 | def plot_tracking(image, tlwhs, obj_ids, scores=None, frame_id=0, fps=0., ids2=None):
 53 |     im = np.ascontiguousarray(np.copy(image))
 54 |     im_h, im_w = im.shape[:2]
 55 | 
 56 |     top_view = np.zeros([im_w, im_w, 3], dtype=np.uint8) + 255
 57 | 
 58 |     #text_scale = max(1, image.shape[1] / 1600.)
 59 |     #text_thickness = 2
 60 |     #line_thickness = max(1, int(image.shape[1] / 500.))
 61 |     text_scale = 2
 62 |     text_thickness = 2
 63 |     line_thickness = 3
 64 | 
 65 |     radius = max(5, int(im_w/140.))
 66 |     cv2.putText(im, 'frame: %d fps: %.2f num: %d' % (frame_id, fps, len(tlwhs)),
 67 |                 (0, int(15 * text_scale)), cv2.FONT_HERSHEY_PLAIN, 2, (0, 0, 255), thickness=2)
 68 | 
 69 |     for i, tlwh in enumerate(tlwhs):
 70 |         x1, y1, w, h = tlwh
 71 |         intbox = tuple(map(int, (x1, y1, x1 + w, y1 + h)))
 72 |         obj_id = int(obj_ids[i])
 73 |         id_text = '{}'.format(int(obj_id))
 74 |         if ids2 is not None:
 75 |             id_text = id_text + ', {}'.format(int(ids2[i]))
 76 |         color = get_color(abs(obj_id))
 77 |         cv2.rectangle(im, intbox[0:2], intbox[2:4], color=color, thickness=line_thickness)
 78 |         cv2.putText(im, id_text, (intbox[0], intbox[1]), cv2.FONT_HERSHEY_PLAIN, text_scale, (0, 0, 255),
 79 |                     thickness=text_thickness)
 80 |     return im
 81 | 
 82 | 
 83 | _COLORS = np.array(
 84 |     [
 85 |         0.000, 0.447, 0.741,
 86 |         0.850, 0.325, 0.098,
 87 |         0.929, 0.694, 0.125,
 88 |         0.494, 0.184, 0.556,
 89 |         0.466, 0.674, 0.188,
 90 |         0.301, 0.745, 0.933,
 91 |         0.635, 0.078, 0.184,
 92 |         0.300, 0.300, 0.300,
 93 |         0.600, 0.600, 0.600,
 94 |         1.000, 0.000, 0.000,
 95 |         1.000, 0.500, 0.000,
 96 |         0.749, 0.749, 0.000,
 97 |         0.000, 1.000, 0.000,
 98 |         0.000, 0.000, 1.000,
 99 |         0.667, 0.000, 1.000,
100 |         0.333, 0.333, 0.000,
101 |         0.333, 0.667, 0.000,
102 |         0.333, 1.000, 0.000,
103 |         0.667, 0.333, 0.000,
104 |         0.667, 0.667, 0.000,
105 |         0.667, 1.000, 0.000,
106 |         1.000, 0.333, 0.000,
107 |         1.000, 0.667, 0.000,
108 |         1.000, 1.000, 0.000,
109 |         0.000, 0.333, 0.500,
110 |         0.000, 0.667, 0.500,
111 |         0.000, 1.000, 0.500,
112 |         0.333, 0.000, 0.500,
113 |         0.333, 0.333, 0.500,
114 |         0.333, 0.667, 0.500,
115 |         0.333, 1.000, 0.500,
116 |         0.667, 0.000, 0.500,
117 |         0.667, 0.333, 0.500,
118 |         0.667, 0.667, 0.500,
119 |         0.667, 1.000, 0.500,
120 |         1.000, 0.000, 0.500,
121 |         1.000, 0.333, 0.500,
122 |         1.000, 0.667, 0.500,
123 |         1.000, 1.000, 0.500,
124 |         0.000, 0.333, 1.000,
125 |         0.000, 0.667, 1.000,
126 |         0.000, 1.000, 1.000,
127 |         0.333, 0.000, 1.000,
128 |         0.333, 0.333, 1.000,
129 |         0.333, 0.667, 1.000,
130 |         0.333, 1.000, 1.000,
131 |         0.667, 0.000, 1.000,
132 |         0.667, 0.333, 1.000,
133 |         0.667, 0.667, 1.000,
134 |         0.667, 1.000, 1.000,
135 |         1.000, 0.000, 1.000,
136 |         1.000, 0.333, 1.000,
137 |         1.000, 0.667, 1.000,
138 |         0.333, 0.000, 0.000,
139 |         0.500, 0.000, 0.000,
140 |         0.667, 0.000, 0.000,
141 |         0.833, 0.000, 0.000,
142 |         1.000, 0.000, 0.000,
143 |         0.000, 0.167, 0.000,
144 |         0.000, 0.333, 0.000,
145 |         0.000, 0.500, 0.000,
146 |         0.000, 0.667, 0.000,
147 |         0.000, 0.833, 0.000,
148 |         0.000, 1.000, 0.000,
149 |         0.000, 0.000, 0.167,
150 |         0.000, 0.000, 0.333,
151 |         0.000, 0.000, 0.500,
152 |         0.000, 0.000, 0.667,
153 |         0.000, 0.000, 0.833,
154 |         0.000, 0.000, 1.000,
155 |         0.000, 0.000, 0.000,
156 |         0.143, 0.143, 0.143,
157 |         0.286, 0.286, 0.286,
158 |         0.429, 0.429, 0.429,
159 |         0.571, 0.571, 0.571,
160 |         0.714, 0.714, 0.714,
161 |         0.857, 0.857, 0.857,
162 |         0.000, 0.447, 0.741,
163 |         0.314, 0.717, 0.741,
164 |         0.50, 0.5, 0
165 |     ]
166 | ).astype(np.float32).reshape(-1, 3)
167 | 


--------------------------------------------------------------------------------
/yolox/utils/box_ops.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  2 | """
  3 | Utilities for bounding box manipulation and GIoU.
  4 | """
  5 | import torch
  6 | from torchvision.ops.boxes import box_area
  7 | from yolox.utils.cluster_nms import giou_3d
  8 | 
  9 | def box_cxcywh_to_xyxy(x):
 10 |     x_c, y_c, w, h = x.unbind(-1)
 11 |     b = [(x_c - 0.5 * w), (y_c - 0.5 * h),
 12 |          (x_c + 0.5 * w), (y_c + 0.5 * h)]
 13 |     return torch.stack(b, dim=-1)
 14 | 
 15 | 
 16 | def box_xyxy_to_cxcywh(x):
 17 |     x0, y0, x1, y1 = x.unbind(-1)
 18 |     b = [(x0 + x1) / 2, (y0 + y1) / 2,
 19 |          (x1 - x0), (y1 - y0)]
 20 |     return torch.stack(b, dim=-1)
 21 | 
 22 | 
 23 | # modified from torchvision to also return the union
 24 | def box_iou(boxes1, boxes2):
 25 |     area1 = box_area(boxes1)
 26 |     area2 = box_area(boxes2)
 27 | 
 28 |     lt = torch.max(boxes1[:, None, :2], boxes2[:, :2])  # [N,M,2]
 29 |     rb = torch.min(boxes1[:, None, 2:], boxes2[:, 2:])  # [N,M,2]
 30 | 
 31 |     wh = (rb - lt).clamp(min=0)  # [N,M,2]
 32 |     inter = wh[:, :, 0] * wh[:, :, 1]  # [N,M]
 33 | 
 34 |     union = area1[:, None] + area2 - inter
 35 | 
 36 |     iou = inter / union
 37 |     return iou, union
 38 | 
 39 | 
 40 | def generalized_box_iou(boxes1,boxes2,boxes3,boxes4):
 41 |     """
 42 |     Generalized IoU from https://giou.stanford.edu/
 43 | 
 44 |     The boxes should be in [x0, y0, x1, y1] format
 45 | 
 46 |     Returns a [N, M] pairwise matrix, where N = len(boxes1)
 47 |     and M = len(boxes2)
 48 |     """
 49 |     # degenerate boxes gives inf / nan results
 50 |     # so do an early check
 51 |     # boxes1=boxes1.float()
 52 |     # boxes2=boxes2.float()
 53 |     # boxes3=boxes3.float()
 54 |     # boxes4=boxes4.float()
 55 |     assert (boxes1[:, 2:] >= boxes1[:, :2]).all()
 56 |     assert (boxes2[:, 2:] >= boxes2[:, :2]).all()
 57 |     assert (boxes3[:, 2:] >= boxes3[:, :2]).all()
 58 |     assert (boxes4[:, 2:] >= boxes4[:, :2]).all()
 59 |     # iou1, union1 = box_iou(boxes1, boxes3)
 60 |     # iou2, union2 = box_iou(boxes2, boxes4)
 61 |     # lt = torch.min(boxes1[:, None, :2], boxes3[:, :2])
 62 |     # rb = torch.max(boxes1[:, None, 2:], boxes3[:, 2:])
 63 | 
 64 |     # wh = (rb - lt).clamp(min=0)  # [N,M,2]
 65 |     # area1 = wh[:, :, 0] * wh[:, :, 1]
 66 | 
 67 |     # lt = torch.min(boxes2[:, None, :2], boxes4[:, :2])
 68 |     # rb = torch.max(boxes2[:, None, 2:], boxes4[:, 2:])
 69 | 
 70 |     # wh = (rb - lt).clamp(min=0)  # [N,M,2]
 71 |     # area2 = wh[:, :, 0] * wh[:, :, 1]
 72 |     # uiou=(iou1*union1+iou2*union2)/(union1+union2)
 73 |     # uunion=union1+union2
 74 |     # uarea=area1+area2
 75 |     # return  uiou- (uarea - uunion) / uarea
 76 | 
 77 |     return giou_3d(boxes1,boxes3,boxes2,boxes4)
 78 | 
 79 | 
 80 | def masks_to_boxes(masks):
 81 |     """Compute the bounding boxes around the provided masks
 82 | 
 83 |     The masks should be in format [N, H, W] where N is the number of masks, (H, W) are the spatial dimensions.
 84 | 
 85 |     Returns a [N, 4] tensors, with the boxes in xyxy format
 86 |     """
 87 |     if masks.numel() == 0:
 88 |         return torch.zeros((0, 4), device=masks.device)
 89 | 
 90 |     h, w = masks.shape[-2:]
 91 | 
 92 |     y = torch.arange(0, h, dtype=torch.float)
 93 |     x = torch.arange(0, w, dtype=torch.float)
 94 |     y, x = torch.meshgrid(y, x)
 95 | 
 96 |     x_mask = (masks * x.unsqueeze(0))
 97 |     x_max = x_mask.flatten(1).max(-1)[0]
 98 |     x_min = x_mask.masked_fill(~(masks.bool()), 1e8).flatten(1).min(-1)[0]
 99 | 
100 |     y_mask = (masks * y.unsqueeze(0))
101 |     y_max = y_mask.flatten(1).max(-1)[0]
102 |     y_min = y_mask.masked_fill(~(masks.bool()), 1e8).flatten(1).min(-1)[0]
103 | 
104 |     return torch.stack([x_min, y_min, x_max, y_max], 1)
105 | 
106 | 
107 | 
108 | # boxes = targets[:, :4].copy()
109 | # labels = targets[:, 4].copy()
110 | # ids = targets[:, 5].copy()
111 | # if len(boxes) == 0:
112 | #     targets = np.zeros((self.max_labels, 6), dtype=np.float32)
113 | #     image, r_o = preproc(image, input_dim, self.means, self.std)
114 | #     image = np.ascontiguousarray(image, dtype=np.float32)
115 | #     return image, targets
116 | 
117 | # image_o = image.copy()
118 | # targets_o = targets.copy()
119 | # height_o, width_o, _ = image_o.shape
120 | # boxes_o = targets_o[:, :4]
121 | # labels_o = targets_o[:, 4]
122 | # ids_o = targets_o[:, 5]
123 | # # bbox_o: [xyxy] to [c_x,c_y,w,h]
124 | # boxes_o = xyxy2cxcywh(boxes_o)
125 | 
126 | # image_t = _distort(image)
127 | # image_t, boxes_t ,image_r,boxes_r= _mirror(image_t, boxes)
128 | # height, width, _ = image_t.shape
129 | # image_t, r_t = preproc(image_t, input_dim, self.means, self.std)
130 | # image_t, r_r = preproc(image_r, input_dim, self.means, self.std)
131 | # # boxes [xyxy] 2 [cx,cy,w,h]
132 | # boxes_t = xyxy2cxcywh(boxes_t)
133 | # boxes_t *= r_t
134 | 
135 | # boxes_r = xyxy2cxcywh(boxes_r)
136 | # boxes_r *= r_r
137 | 
138 | # mask_b = np.minimum(boxes_t[:, 2], boxes_t[:, 3]) > 1
139 | # boxes_t = boxes_t[mask_b]
140 | # boxes_r = boxes_r[mask_b]
141 | 
142 | # labels_t = labels[mask_b]
143 | # ids_t = ids[mask_b]
144 | 
145 | # if len(boxes_t) == 0:
146 | #     image_t, r_o = preproc(image_o, input_dim, self.means, self.std)
147 | #     boxes_o *= r_o
148 | #     boxes_t = boxes_o
149 | #     image_r=image_t
150 | #     boxes_r=boxes_t
151 | #     labels_t = labels_o
152 | #     ids_t = ids_o
153 | 
154 | # labels_t = np.expand_dims(labels_t, 1)
155 | # ids_t = np.expand_dims(ids_t, 1)
156 | 
157 | # targets_t = np.hstack((labels_t, boxes_t, ids_t))
158 | # padded_labels = np.zeros((self.max_labels, 6))
159 | # padded_labels[range(len(targets_t))[: self.max_labels]] = targets_t[
160 | #     : self.max_labels
161 | # ]
162 | 
163 | # targets_r = np.hstack((labels_t, boxes_r, ids_t))
164 | # padded_labels_r = np.zeros((self.max_labels, 6))
165 | # padded_labels_r[range(len(targets_r))[: self.max_labels]] = targets_r[
166 | #     : self.max_labels
167 | # ]
168 | # padded_labels = np.ascontiguousarray(padded_labels, dtype=np.float32)
169 | # image_t = np.ascontiguousarray(image_t, dtype=np.float32)
170 | # return image_t, padded_labels
171 | 


--------------------------------------------------------------------------------
/yolox/layers/fast_coco_eval_api.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding:utf-8 -*-
  3 | # This file comes from
  4 | # https://github.com/facebookresearch/detectron2/blob/master/detectron2/evaluation/fast_eval_api.py
  5 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  6 | # Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
  7 | 
  8 | import numpy as np
  9 | from pycocotools.cocoeval import COCOeval
 10 | 
 11 | # import torch first to make yolox._C work without ImportError of libc10.so
 12 | # in YOLOX, env is already set in __init__.py.
 13 | from yolox import _C
 14 | 
 15 | import copy
 16 | import time
 17 | 
 18 | 
 19 | class COCOeval_opt(COCOeval):
 20 |     """
 21 |     This is a slightly modified version of the original COCO API, where the functions evaluateImg()
 22 |     and accumulate() are implemented in C++ to speedup evaluation
 23 |     """
 24 | 
 25 |     def evaluate(self):
 26 |         """
 27 |         Run per image evaluation on given images and store results in self.evalImgs_cpp, a
 28 |         datastructure that isn't readable from Python but is used by a c++ implementation of
 29 |         accumulate().  Unlike the original COCO PythonAPI, we don't populate the datastructure
 30 |         self.evalImgs because this datastructure is a computational bottleneck.
 31 |         :return: None
 32 |         """
 33 |         tic = time.time()
 34 | 
 35 |         print("Running per image evaluation...")
 36 |         p = self.params
 37 |         # add backward compatibility if useSegm is specified in params
 38 |         if p.useSegm is not None:
 39 |             p.iouType = "segm" if p.useSegm == 1 else "bbox"
 40 |             print(
 41 |                 "useSegm (deprecated) is not None. Running {} evaluation".format(
 42 |                     p.iouType
 43 |                 )
 44 |             )
 45 |         print("Evaluate annotation type *{}*".format(p.iouType))
 46 |         p.imgIds = list(np.unique(p.imgIds))
 47 |         if p.useCats:
 48 |             p.catIds = list(np.unique(p.catIds))
 49 |         p.maxDets = sorted(p.maxDets)
 50 |         self.params = p
 51 | 
 52 |         self._prepare()
 53 | 
 54 |         # loop through images, area range, max detection number
 55 |         catIds = p.catIds if p.useCats else [-1]
 56 | 
 57 |         if p.iouType == "segm" or p.iouType == "bbox":
 58 |             computeIoU = self.computeIoU
 59 |         elif p.iouType == "keypoints":
 60 |             computeIoU = self.computeOks
 61 |         self.ious = {
 62 |             (imgId, catId): computeIoU(imgId, catId)
 63 |             for imgId in p.imgIds
 64 |             for catId in catIds
 65 |         }
 66 | 
 67 |         maxDet = p.maxDets[-1]
 68 | 
 69 |         # <<<< Beginning of code differences with original COCO API
 70 |         def convert_instances_to_cpp(instances, is_det=False):
 71 |             # Convert annotations for a list of instances in an image to a format that's fast
 72 |             # to access in C++
 73 |             instances_cpp = []
 74 |             for instance in instances:
 75 |                 instance_cpp = _C.InstanceAnnotation(
 76 |                     int(instance["id"]),
 77 |                     instance["score"] if is_det else instance.get("score", 0.0),
 78 |                     instance["area"],
 79 |                     bool(instance.get("iscrowd", 0)),
 80 |                     bool(instance.get("ignore", 0)),
 81 |                 )
 82 |                 instances_cpp.append(instance_cpp)
 83 |             return instances_cpp
 84 | 
 85 |         # Convert GT annotations, detections, and IOUs to a format that's fast to access in C++
 86 |         ground_truth_instances = [
 87 |             [convert_instances_to_cpp(self._gts[imgId, catId]) for catId in p.catIds]
 88 |             for imgId in p.imgIds
 89 |         ]
 90 |         detected_instances = [
 91 |             [
 92 |                 convert_instances_to_cpp(self._dts[imgId, catId], is_det=True)
 93 |                 for catId in p.catIds
 94 |             ]
 95 |             for imgId in p.imgIds
 96 |         ]
 97 |         ious = [[self.ious[imgId, catId] for catId in catIds] for imgId in p.imgIds]
 98 | 
 99 |         if not p.useCats:
100 |             # For each image, flatten per-category lists into a single list
101 |             ground_truth_instances = [
102 |                 [[o for c in i for o in c]] for i in ground_truth_instances
103 |             ]
104 |             detected_instances = [
105 |                 [[o for c in i for o in c]] for i in detected_instances
106 |             ]
107 | 
108 |         # Call C++ implementation of self.evaluateImgs()
109 |         self._evalImgs_cpp = _C.COCOevalEvaluateImages(
110 |             p.areaRng,
111 |             maxDet,
112 |             p.iouThrs,
113 |             ious,
114 |             ground_truth_instances,
115 |             detected_instances,
116 |         )
117 |         self._evalImgs = None
118 | 
119 |         self._paramsEval = copy.deepcopy(self.params)
120 |         toc = time.time()
121 |         print("COCOeval_opt.evaluate() finished in {:0.2f} seconds.".format(toc - tic))
122 |         # >>>> End of code differences with original COCO API
123 | 
124 |     def accumulate(self):
125 |         """
126 |         Accumulate per image evaluation results and store the result in self.eval.  Does not
127 |         support changing parameter settings from those used by self.evaluate()
128 |         """
129 |         print("Accumulating evaluation results...")
130 |         tic = time.time()
131 |         if not hasattr(self, "_evalImgs_cpp"):
132 |             print("Please run evaluate() first")
133 | 
134 |         self.eval = _C.COCOevalAccumulate(self._paramsEval, self._evalImgs_cpp)
135 | 
136 |         # recall is num_iou_thresholds X num_categories X num_area_ranges X num_max_detections
137 |         self.eval["recall"] = np.array(self.eval["recall"]).reshape(
138 |             self.eval["counts"][:1] + self.eval["counts"][2:]
139 |         )
140 | 
141 |         # precision and scores are num_iou_thresholds X num_recall_thresholds X num_categories X
142 |         # num_area_ranges X num_max_detections
143 |         self.eval["precision"] = np.array(self.eval["precision"]).reshape(
144 |             self.eval["counts"]
145 |         )
146 |         self.eval["scores"] = np.array(self.eval["scores"]).reshape(self.eval["counts"])
147 |         toc = time.time()
148 |         print(
149 |             "COCOeval_opt.accumulate() finished in {:0.2f} seconds.".format(toc - tic)
150 |         )
151 | 


--------------------------------------------------------------------------------
/exps/example/mot/yolox_x_diffusion_det_mot17.py:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | import os
  3 | import random
  4 | import torch
  5 | import torch.nn as nn
  6 | import torch.distributed as dist
  7 | from torch.optim import AdamW
  8 | from yolox.exp import Exp as MyExp
  9 | from yolox.data import get_yolox_datadir
 10 | 
 11 | class Exp(MyExp):
 12 |     def __init__(self):
 13 |         super(Exp, self).__init__()
 14 |         self.num_classes = 1
 15 |         self.depth = 1.33
 16 |         self.width = 1.25
 17 |         self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
 18 |         self.train_ann = "train.json"
 19 |         self.val_ann = "val_half.json"
 20 |         self.input_size = (800, 1440)
 21 |         self.test_size = (800, 1440)
 22 |         self.random_size = (18, 32)
 23 |         self.max_epoch = 30
 24 |         self.print_interval = 20
 25 |         self.eval_interval = 5
 26 |         self.no_aug_epochs = 10
 27 |         self.basic_lr_per_img = 0.001 / 64.0
 28 |         self.warmup_epochs = 1
 29 |         self.task="detection"
 30 |         self.enable_mixup = True
 31 |         self.seed=8823
 32 |         self.conf_thresh=0.4
 33 |         self.det_thresh=0.7
 34 |         self.nms_thresh2d=0.75
 35 |         self.nms_thresh3d=0.7
 36 |         self.interval=5
 37 | 
 38 |     def get_data_loader(self, batch_size, is_distributed, no_aug=False):
 39 |         from yolox.data import (
 40 |             MOTDataset,
 41 |             TrainTransform,
 42 |             YoloBatchSampler,
 43 |             DataLoader,
 44 |             InfiniteSampler,
 45 |             MosaicDetection,
 46 |         )
 47 | 
 48 |         dataset = MOTDataset(
 49 |             data_dir=os.path.join(get_yolox_datadir(), "mix_det"),
 50 |             json_file=self.train_ann,
 51 |             name='',
 52 |             img_size=self.input_size,
 53 |             preproc=TrainTransform(
 54 |                 rgb_means=(0.485, 0.456, 0.406),
 55 |                 std=(0.229, 0.224, 0.225),
 56 |                 max_labels=500,
 57 |             ),
 58 |         )
 59 | 
 60 |         dataset = MosaicDetection(
 61 |             dataset,
 62 |             mosaic=not no_aug,
 63 |             img_size=self.input_size,
 64 |             preproc=TrainTransform(
 65 |                 rgb_means=(0.485, 0.456, 0.406),
 66 |                 std=(0.229, 0.224, 0.225),
 67 |                 max_labels=1000, 
 68 |             ),
 69 |             degrees=self.degrees, 
 70 |             translate=self.translate,
 71 |             scale=self.scale,
 72 |             shear=self.shear,
 73 |             perspective=self.perspective,
 74 |             enable_mixup=self.enable_mixup,
 75 |         )
 76 | 
 77 |         self.dataset = dataset
 78 | 
 79 |         if is_distributed:
 80 |             batch_size = batch_size // dist.get_world_size()
 81 | 
 82 |         sampler = InfiniteSampler(
 83 |             len(self.dataset), seed=self.seed if self.seed else 0
 84 |         )
 85 | 
 86 |         batch_sampler = YoloBatchSampler(
 87 |             sampler=sampler,
 88 |             batch_size=batch_size,
 89 |             drop_last=False,
 90 |             input_dimension=self.input_size,
 91 |             mosaic=not no_aug,
 92 |         )
 93 | 
 94 |         dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True}
 95 |         dataloader_kwargs["batch_sampler"] = batch_sampler
 96 |         train_loader = DataLoader(self.dataset, **dataloader_kwargs)
 97 | 
 98 |         return train_loader
 99 | 
100 |     def get_eval_loader(self, batch_size, is_distributed, testdev=False):
101 |         from yolox.data import MOTDataset,DiffusionValTransform
102 | 
103 |         valdataset = MOTDataset(
104 |             data_dir=os.path.join(get_yolox_datadir(), "mot"),
105 |             json_file=self.val_ann,
106 |             img_size=self.test_size,
107 |             name='train',
108 |             preproc=DiffusionValTransform(
109 |                 rgb_means=(0.485, 0.456, 0.406),
110 |                 std=(0.229, 0.224, 0.225),
111 |                 max_labels=1000, 
112 |             )
113 |         )
114 | 
115 |         if is_distributed:
116 |             batch_size = batch_size // dist.get_world_size()
117 |             sampler = torch.utils.data.distributed.DistributedSampler(
118 |                 valdataset, shuffle=False
119 |             )
120 |         else:
121 |             sampler = torch.utils.data.SequentialSampler(valdataset)
122 | 
123 |         dataloader_kwargs = {
124 |             "num_workers": self.data_num_workers,
125 |             "pin_memory": True,
126 |             "sampler": sampler,
127 |         }
128 |         dataloader_kwargs["batch_size"] = batch_size
129 |         val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs)
130 | 
131 |         return val_loader
132 | 
133 |     def get_evaluator(self, batch_size, is_distributed, testdev=False):
134 |         from yolox.evaluators import COCOEvaluator
135 | 
136 |         val_loader = self.get_eval_loader(batch_size, is_distributed, testdev=testdev)
137 |         evaluator = COCOEvaluator(
138 |             dataloader=val_loader,
139 |             img_size=self.test_size,
140 |             confthre=self.conf_thresh,
141 |             nmsthre3d=self.nms_thresh3d,
142 |             detthre=self.det_thresh,
143 |             nmsthre2d=self.nms_thresh2d,
144 |             num_classes=self.num_classes,
145 |             testdev=testdev,
146 |         )
147 |         return evaluator
148 |     
149 |     def get_model(self):
150 |         from yolox.models import YOLOPAFPN, YOLOX, YOLOXHead
151 |         from diffusion.models.diffusionnet import DiffusionNet,DiffusionHead
152 | 
153 |         def init_yolo(M):
154 |             for m in M.modules():
155 |                 if isinstance(m, nn.BatchNorm2d):
156 |                     m.eps = 1e-3
157 |                     m.momentum = 0.03
158 | 
159 |         if getattr(self, "model", None) is None:
160 |             in_channels = [256, 512, 1024]
161 |             backbone = YOLOPAFPN(self.depth, self.width, in_channels=in_channels)
162 |             for value in backbone.parameters():
163 |                 value.requires_grad=False
164 |             head=DiffusionHead(self.num_classes,self.width)
165 |             self.model = DiffusionNet(backbone, head)
166 | 
167 |         self.model.apply(init_yolo)
168 |         # self.model.head.initialize_biases(1e-2)
169 |         return self.model
170 | 
171 |     def get_optimizer(self, batch_size):
172 |         lr=2.5e-05
173 |         weight_decay = 0.0001
174 |         self.optimizer=AdamW(self.model.parameters(),lr=lr,weight_decay=weight_decay) 
175 |         return self.optimizer
176 | 


--------------------------------------------------------------------------------
/exps/example/mot/yolox_x_diffusion_det_mot20.py:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | import os
  3 | import random
  4 | import torch
  5 | import torch.nn as nn
  6 | import torch.distributed as dist
  7 | from torch.optim import AdamW
  8 | from yolox.exp import Exp as MyExp
  9 | from yolox.data import get_yolox_datadir
 10 | 
 11 | class Exp(MyExp):
 12 |     def __init__(self):
 13 |         super(Exp, self).__init__()
 14 |         self.num_classes = 1
 15 |         self.depth = 1.33
 16 |         self.width = 1.25
 17 |         self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
 18 |         self.train_ann = "train.json"
 19 |         self.val_ann = "val_half.json"
 20 |         self.input_size = (896, 1600)
 21 |         self.test_size = (896, 1600)
 22 |         self.random_size = (20, 36)
 23 |         self.max_epoch = 30
 24 |         self.print_interval = 20
 25 |         self.eval_interval = 5
 26 |         self.no_aug_epochs = 10
 27 |         self.basic_lr_per_img = 0.001 / 64.0
 28 |         self.warmup_epochs = 1
 29 |         self.task="detection"
 30 |         self.enable_mixup = True
 31 |         self.seed=8823
 32 |         self.conf_thresh=0.4
 33 |         self.det_thresh=0.7
 34 |         self.nms_thresh2d=0.75
 35 |         self.nms_thresh3d=0.7
 36 |         self.interval=5
 37 | 
 38 |     def get_data_loader(self, batch_size, is_distributed, no_aug=False):
 39 |         from yolox.data import (
 40 |             MOTDataset,
 41 |             TrainTransform,
 42 |             YoloBatchSampler,
 43 |             DataLoader,
 44 |             InfiniteSampler,
 45 |             MosaicDetection,
 46 |         )
 47 | 
 48 |         dataset = MOTDataset(
 49 |             data_dir=os.path.join(get_yolox_datadir(), "mix_mot20_ch"),
 50 |             json_file=self.train_ann,
 51 |             name='',
 52 |             img_size=self.input_size,
 53 |             preproc=TrainTransform(
 54 |                 rgb_means=(0.485, 0.456, 0.406),
 55 |                 std=(0.229, 0.224, 0.225),
 56 |                 max_labels=500,
 57 |             ),
 58 |         )
 59 | 
 60 |         dataset = MosaicDetection(
 61 |             dataset,
 62 |             mosaic=not no_aug,
 63 |             img_size=self.input_size,
 64 |             preproc=TrainTransform(
 65 |                 rgb_means=(0.485, 0.456, 0.406),
 66 |                 std=(0.229, 0.224, 0.225),
 67 |                 max_labels=1200, 
 68 |             ),
 69 |             degrees=self.degrees, 
 70 |             translate=self.translate,
 71 |             scale=self.scale,
 72 |             shear=self.shear,
 73 |             perspective=self.perspective,
 74 |             enable_mixup=self.enable_mixup,
 75 |         )
 76 | 
 77 |         self.dataset = dataset
 78 | 
 79 |         if is_distributed:
 80 |             batch_size = batch_size // dist.get_world_size()
 81 | 
 82 |         sampler = InfiniteSampler(
 83 |             len(self.dataset), seed=self.seed if self.seed else 0
 84 |         )
 85 | 
 86 |         batch_sampler = YoloBatchSampler(
 87 |             sampler=sampler,
 88 |             batch_size=batch_size,
 89 |             drop_last=False,
 90 |             input_dimension=self.input_size,
 91 |             mosaic=not no_aug,
 92 |         )
 93 | 
 94 |         dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True}
 95 |         dataloader_kwargs["batch_sampler"] = batch_sampler
 96 |         train_loader = DataLoader(self.dataset, **dataloader_kwargs)
 97 | 
 98 |         return train_loader
 99 | 
100 |     def get_eval_loader(self, batch_size, is_distributed, testdev=False):
101 |         from yolox.data import MOTDataset,DiffusionValTransform
102 | 
103 |         valdataset = MOTDataset(
104 |             data_dir=os.path.join(get_yolox_datadir(), "MOT20"),
105 |             json_file=self.val_ann,
106 |             img_size=self.test_size,
107 |             name='train',
108 |             preproc=DiffusionValTransform(
109 |                 rgb_means=(0.485, 0.456, 0.406),
110 |                 std=(0.229, 0.224, 0.225),
111 |                 max_labels=1200, 
112 |             )
113 |         )
114 | 
115 |         if is_distributed:
116 |             batch_size = batch_size // dist.get_world_size()
117 |             sampler = torch.utils.data.distributed.DistributedSampler(
118 |                 valdataset, shuffle=False
119 |             )
120 |         else:
121 |             sampler = torch.utils.data.SequentialSampler(valdataset)
122 | 
123 |         dataloader_kwargs = {
124 |             "num_workers": self.data_num_workers,
125 |             "pin_memory": True,
126 |             "sampler": sampler,
127 |         }
128 |         dataloader_kwargs["batch_size"] = batch_size
129 |         val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs)
130 | 
131 |         return val_loader
132 | 
133 |     def get_evaluator(self, batch_size, is_distributed, testdev=False):
134 |         from yolox.evaluators import COCOEvaluator
135 | 
136 |         val_loader = self.get_eval_loader(batch_size, is_distributed, testdev=testdev)
137 |         evaluator = COCOEvaluator(
138 |             dataloader=val_loader,
139 |             img_size=self.test_size,
140 |             confthre=self.conf_thresh,
141 |             nmsthre3d=self.nms_thresh3d,
142 |             detthre=self.det_thresh,
143 |             nmsthre2d=self.nms_thresh2d,
144 |             num_classes=self.num_classes,
145 |             testdev=testdev,
146 |         )
147 |         return evaluator
148 |     
149 |     def get_model(self):
150 |         from yolox.models import YOLOPAFPN, YOLOX, YOLOXHead
151 |         from diffusion.models.diffusionnet import DiffusionNet,DiffusionHead
152 | 
153 |         def init_yolo(M):
154 |             for m in M.modules():
155 |                 if isinstance(m, nn.BatchNorm2d):
156 |                     m.eps = 1e-3
157 |                     m.momentum = 0.03
158 | 
159 |         if getattr(self, "model", None) is None:
160 |             in_channels = [256, 512, 1024]
161 |             backbone = YOLOPAFPN(self.depth, self.width, in_channels=in_channels)
162 |             for value in backbone.parameters():
163 |                 value.requires_grad=False
164 |             head=DiffusionHead(self.num_classes,self.width)
165 |             self.model = DiffusionNet(backbone, head)
166 | 
167 |         self.model.apply(init_yolo)
168 |         # self.model.head.initialize_biases(1e-2)
169 |         return self.model
170 | 
171 |     def get_optimizer(self, batch_size):
172 |         lr=2.5e-05
173 |         weight_decay = 0.0001
174 |         self.optimizer=AdamW(self.model.parameters(),lr=lr,weight_decay=weight_decay) 
175 |         return self.optimizer
176 | 


--------------------------------------------------------------------------------
/exps/example/mot/yolox_x_diffusion_det_dancetrack.py:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | import os
  3 | import random
  4 | import torch
  5 | import torch.nn as nn
  6 | import torch.distributed as dist
  7 | from torch.optim import AdamW
  8 | from yolox.exp import Exp as MyExp
  9 | from yolox.data import get_yolox_datadir
 10 | 
 11 | class Exp(MyExp):
 12 |     def __init__(self):
 13 |         super(Exp, self).__init__()
 14 |         self.num_classes = 1
 15 |         self.depth = 1.33
 16 |         self.width = 1.25
 17 |         self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
 18 |         self.train_ann = "train.json"
 19 |         self.val_ann = "train.json"
 20 |         self.input_size = (896, 1600)
 21 |         self.test_size = (896, 1600)
 22 |         self.random_size = (18, 32)
 23 |         self.max_epoch = 20
 24 |         self.print_interval = 20
 25 |         self.eval_interval = 40
 26 |         self.no_aug_epochs = 5
 27 |         self.basic_lr_per_img = 0.001 / 64.0
 28 |         self.warmup_epochs = 1
 29 |         self.task="detection"
 30 |         self.enable_mixup = True
 31 |         self.seed=8823
 32 |         self.conf_thresh=0.4
 33 |         self.det_thresh=0.7
 34 |         self.nms_thresh2d=0.75
 35 |         self.nms_thresh3d=0.7
 36 |         self.interval=5
 37 | 
 38 |     def get_data_loader(self, batch_size, is_distributed, no_aug=False):
 39 |         from yolox.data import (
 40 |             MOTDataset,
 41 |             TrainTransform,
 42 |             YoloBatchSampler,
 43 |             DataLoader,
 44 |             InfiniteSampler,
 45 |             MosaicDetection,
 46 |         )
 47 | 
 48 |         dataset = MOTDataset(
 49 |             data_dir=os.path.join(get_yolox_datadir(), "dancetrack"),
 50 |             json_file=self.train_ann,
 51 |             name='train',
 52 |             img_size=self.input_size,
 53 |             preproc=TrainTransform(
 54 |                 rgb_means=(0.485, 0.456, 0.406),
 55 |                 std=(0.229, 0.224, 0.225),
 56 |                 max_labels=500,
 57 |             ),
 58 |         )
 59 | 
 60 |         dataset = MosaicDetection(
 61 |             dataset,
 62 |             mosaic=not no_aug,
 63 |             img_size=self.input_size,
 64 |             preproc=TrainTransform(
 65 |                 rgb_means=(0.485, 0.456, 0.406),
 66 |                 std=(0.229, 0.224, 0.225),
 67 |                 max_labels=1000, 
 68 |             ),
 69 |             degrees=self.degrees, 
 70 |             translate=self.translate,
 71 |             scale=self.scale,
 72 |             shear=self.shear,
 73 |             perspective=self.perspective,
 74 |             enable_mixup=self.enable_mixup,
 75 |         )
 76 | 
 77 |         self.dataset = dataset
 78 | 
 79 |         if is_distributed:
 80 |             batch_size = batch_size // dist.get_world_size()
 81 | 
 82 |         sampler = InfiniteSampler(
 83 |             len(self.dataset), seed=self.seed if self.seed else 0
 84 |         )
 85 | 
 86 |         batch_sampler = YoloBatchSampler(
 87 |             sampler=sampler,
 88 |             batch_size=batch_size,
 89 |             drop_last=False,
 90 |             input_dimension=self.input_size,
 91 |             mosaic=not no_aug,
 92 |         )
 93 | 
 94 |         dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True}
 95 |         dataloader_kwargs["batch_sampler"] = batch_sampler
 96 |         train_loader = DataLoader(self.dataset, **dataloader_kwargs)
 97 | 
 98 |         return train_loader
 99 | 
100 |     def get_eval_loader(self, batch_size, is_distributed, testdev=False):
101 |         from yolox.data import MOTDataset,DiffusionValTransform
102 | 
103 |         valdataset = MOTDataset(
104 |             data_dir=os.path.join(get_yolox_datadir(), "dancetrack"),
105 |             json_file=self.val_ann,
106 |             img_size=self.test_size,
107 |             name='train',
108 |             preproc=DiffusionValTransform(
109 |                 rgb_means=(0.485, 0.456, 0.406),
110 |                 std=(0.229, 0.224, 0.225),
111 |                 max_labels=1000, 
112 |             )
113 |         )
114 | 
115 |         if is_distributed:
116 |             batch_size = batch_size // dist.get_world_size()
117 |             sampler = torch.utils.data.distributed.DistributedSampler(
118 |                 valdataset, shuffle=False
119 |             )
120 |         else:
121 |             sampler = torch.utils.data.SequentialSampler(valdataset)
122 | 
123 |         dataloader_kwargs = {
124 |             "num_workers": self.data_num_workers,
125 |             "pin_memory": True,
126 |             "sampler": sampler,
127 |         }
128 |         dataloader_kwargs["batch_size"] = batch_size
129 |         val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs)
130 | 
131 |         return val_loader
132 | 
133 |     def get_evaluator(self, batch_size, is_distributed, testdev=False):
134 |         from yolox.evaluators import COCOEvaluator
135 | 
136 |         val_loader = self.get_eval_loader(batch_size, is_distributed, testdev=testdev)
137 |         evaluator = COCOEvaluator(
138 |             dataloader=val_loader,
139 |             img_size=self.test_size,
140 |             confthre=self.conf_thresh,
141 |             nmsthre3d=self.nms_thresh3d,
142 |             detthre=self.det_thresh,
143 |             nmsthre2d=self.nms_thresh2d,
144 |             num_classes=self.num_classes,
145 |             testdev=testdev,
146 |         )
147 |         return evaluator
148 |     
149 |     def get_model(self):
150 |         from yolox.models import YOLOPAFPN, YOLOX, YOLOXHead
151 |         from diffusion.models.diffusionnet import DiffusionNet,DiffusionHead
152 | 
153 |         def init_yolo(M):
154 |             for m in M.modules():
155 |                 if isinstance(m, nn.BatchNorm2d):
156 |                     m.eps = 1e-3
157 |                     m.momentum = 0.03
158 | 
159 |         if getattr(self, "model", None) is None:
160 |             in_channels = [256, 512, 1024]
161 |             backbone = YOLOPAFPN(self.depth, self.width, in_channels=in_channels)
162 |             for value in backbone.parameters():
163 |                 value.requires_grad=False
164 |             head=DiffusionHead(self.num_classes,self.width)
165 |             self.model = DiffusionNet(backbone, head)
166 | 
167 |         self.model.apply(init_yolo)
168 |         # self.model.head.initialize_biases(1e-2)
169 |         return self.model
170 | 
171 |     def get_optimizer(self, batch_size):
172 |         lr=2.5e-05
173 |         weight_decay = 0.0001
174 |         self.optimizer=AdamW(self.model.parameters(),lr=lr,weight_decay=weight_decay) 
175 |         return self.optimizer
176 | 


--------------------------------------------------------------------------------
/exps/example/mot/yolox_x_diffusion_det_mot17_ablation.py:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | import os
  3 | import random
  4 | import torch
  5 | import torch.nn as nn
  6 | import torch.distributed as dist
  7 | from torch.optim import AdamW
  8 | from yolox.exp import Exp as MyExp
  9 | from yolox.data import get_yolox_datadir
 10 | 
 11 | class Exp(MyExp):
 12 |     def __init__(self):
 13 |         super(Exp, self).__init__()
 14 |         self.num_classes = 1
 15 |         self.depth = 1.33
 16 |         self.width = 1.25
 17 |         self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
 18 |         self.train_ann = "train_half.json"
 19 |         self.val_ann = "val_half.json"
 20 |         self.input_size = (800, 1440)
 21 |         self.test_size = (800, 1440)
 22 |         self.random_size = (18, 32)
 23 |         self.max_epoch = 30
 24 |         self.print_interval = 20
 25 |         self.eval_interval = 5
 26 |         self.no_aug_epochs = 10
 27 |         self.basic_lr_per_img = 0.001 / 64.0
 28 |         self.warmup_epochs = 1
 29 |         self.task="detection"
 30 |         self.enable_mixup = True
 31 |         self.seed=8823
 32 |         self.conf_thresh=0.4
 33 |         self.det_thresh=0.7
 34 |         self.nms_thresh2d=0.75
 35 |         self.nms_thresh3d=0.7
 36 |         self.interval=5
 37 | 
 38 |     def get_data_loader(self, batch_size, is_distributed, no_aug=False):
 39 |         from yolox.data import (
 40 |             MOTDataset,
 41 |             TrainTransform,
 42 |             YoloBatchSampler,
 43 |             DataLoader,
 44 |             InfiniteSampler,
 45 |             MosaicDetection,
 46 |         )
 47 | 
 48 |         dataset = MOTDataset(
 49 |             data_dir=os.path.join(get_yolox_datadir(), "mix_mot_ch"),
 50 |             json_file=self.train_ann,
 51 |             name='',
 52 |             img_size=self.input_size,
 53 |             preproc=TrainTransform(
 54 |                 rgb_means=(0.485, 0.456, 0.406),
 55 |                 std=(0.229, 0.224, 0.225),
 56 |                 max_labels=500,
 57 |             ),
 58 |         )
 59 | 
 60 |         dataset = MosaicDetection(
 61 |             dataset,
 62 |             mosaic=not no_aug,
 63 |             img_size=self.input_size,
 64 |             preproc=TrainTransform(
 65 |                 rgb_means=(0.485, 0.456, 0.406),
 66 |                 std=(0.229, 0.224, 0.225),
 67 |                 max_labels=1000, 
 68 |             ),
 69 |             degrees=self.degrees, 
 70 |             translate=self.translate,
 71 |             scale=self.scale,
 72 |             shear=self.shear,
 73 |             perspective=self.perspective,
 74 |             enable_mixup=self.enable_mixup,
 75 |         )
 76 | 
 77 |         self.dataset = dataset
 78 | 
 79 |         if is_distributed:
 80 |             batch_size = batch_size // dist.get_world_size()
 81 | 
 82 |         sampler = InfiniteSampler(
 83 |             len(self.dataset), seed=self.seed if self.seed else 0
 84 |         )
 85 | 
 86 |         batch_sampler = YoloBatchSampler(
 87 |             sampler=sampler,
 88 |             batch_size=batch_size,
 89 |             drop_last=False,
 90 |             input_dimension=self.input_size,
 91 |             mosaic=not no_aug,
 92 |         )
 93 | 
 94 |         dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True}
 95 |         dataloader_kwargs["batch_sampler"] = batch_sampler
 96 |         train_loader = DataLoader(self.dataset, **dataloader_kwargs)
 97 | 
 98 |         return train_loader
 99 | 
100 |     def get_eval_loader(self, batch_size, is_distributed, testdev=False):
101 |         from yolox.data import MOTDataset,DiffusionValTransform
102 | 
103 |         valdataset = MOTDataset(
104 |             data_dir=os.path.join(get_yolox_datadir(), "mot"),
105 |             json_file=self.val_ann,
106 |             img_size=self.test_size,
107 |             name='train',
108 |             preproc=DiffusionValTransform(
109 |                 rgb_means=(0.485, 0.456, 0.406),
110 |                 std=(0.229, 0.224, 0.225),
111 |                 max_labels=1000, 
112 |             )
113 |         )
114 | 
115 |         if is_distributed:
116 |             batch_size = batch_size // dist.get_world_size()
117 |             sampler = torch.utils.data.distributed.DistributedSampler(
118 |                 valdataset, shuffle=False
119 |             )
120 |         else:
121 |             sampler = torch.utils.data.SequentialSampler(valdataset)
122 | 
123 |         dataloader_kwargs = {
124 |             "num_workers": self.data_num_workers,
125 |             "pin_memory": True,
126 |             "sampler": sampler,
127 |         }
128 |         dataloader_kwargs["batch_size"] = batch_size
129 |         val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs)
130 | 
131 |         return val_loader
132 | 
133 |     def get_evaluator(self, batch_size, is_distributed, testdev=False):
134 |         from yolox.evaluators import COCOEvaluator
135 | 
136 |         val_loader = self.get_eval_loader(batch_size, is_distributed, testdev=testdev)
137 |         evaluator = COCOEvaluator(
138 |             dataloader=val_loader,
139 |             img_size=self.test_size,
140 |             confthre=self.conf_thresh,
141 |             nmsthre3d=self.nms_thresh3d,
142 |             detthre=self.det_thresh,
143 |             nmsthre2d=self.nms_thresh2d,
144 |             num_classes=self.num_classes,
145 |             testdev=testdev,
146 |         )
147 |         return evaluator
148 |     
149 |     def get_model(self):
150 |         from yolox.models import YOLOPAFPN, YOLOX, YOLOXHead
151 |         from diffusion.models.diffusionnet import DiffusionNet,DiffusionHead
152 | 
153 |         def init_yolo(M):
154 |             for m in M.modules():
155 |                 if isinstance(m, nn.BatchNorm2d):
156 |                     m.eps = 1e-3
157 |                     m.momentum = 0.03
158 | 
159 |         if getattr(self, "model", None) is None:
160 |             in_channels = [256, 512, 1024]
161 |             backbone = YOLOPAFPN(self.depth, self.width, in_channels=in_channels)
162 |             for value in backbone.parameters():
163 |                 value.requires_grad=False
164 |             head=DiffusionHead(self.num_classes,self.width)
165 |             self.model = DiffusionNet(backbone, head)
166 | 
167 |         self.model.apply(init_yolo)
168 |         # self.model.head.initialize_biases(1e-2)
169 |         return self.model
170 | 
171 |     def get_optimizer(self, batch_size):
172 |         lr=2.5e-05
173 |         weight_decay = 0.0001
174 |         self.optimizer=AdamW(self.model.parameters(),lr=lr,weight_decay=weight_decay) 
175 |         return self.optimizer
176 | 


--------------------------------------------------------------------------------
/exps/example/mot/yolox_x_diffusion_track_mot17.py:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | import os
  3 | import random
  4 | import torch
  5 | import torch.nn as nn
  6 | import torch.distributed as dist
  7 | from torch.optim import AdamW
  8 | from yolox.exp import Exp as MyExp
  9 | from yolox.data import get_yolox_datadir
 10 | 
 11 | class Exp(MyExp):
 12 |     def __init__(self):
 13 |         super(Exp, self).__init__()
 14 |         self.num_classes = 1
 15 |         self.depth = 1.33
 16 |         self.width = 1.25
 17 |         self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
 18 |         self.train_ann = "train.json"
 19 |         self.val_ann = "val_half.json"
 20 |         self.input_size = (800, 1440)
 21 |         self.test_size = (800, 1440)
 22 |         self.random_size = (18, 32)
 23 |         self.max_epoch = 30
 24 |         self.print_interval = 20 
 25 |         self.eval_interval = 5
 26 |         self.no_aug_epochs = 10
 27 |         self.basic_lr_per_img = 0.001 / 64.0
 28 |         self.warmup_epochs = 1
 29 |         self.task="tracking"
 30 |         self.seed=8823
 31 |         self.conf_thresh=0.4
 32 |         self.det_thresh=0.7
 33 |         self.nms_thresh2d=0.75
 34 |         self.nms_thresh3d=0.7
 35 |         self.interval=5
 36 | 
 37 |     def get_data_loader(self, batch_size, is_distributed, no_aug=False):
 38 |         from yolox.data import (
 39 |             MOTDataset,
 40 |             TrainTransform,
 41 |             YoloBatchSampler,
 42 |             DataLoader,
 43 |             InfiniteSampler,
 44 |             MosaicDetection,
 45 |             DiffusionMosaicDetection,
 46 |             DiffusionTrainTransform
 47 |         )
 48 | 
 49 |         dataset = MOTDataset(
 50 |             data_dir=os.path.join(get_yolox_datadir(), "mot"),
 51 |             json_file=self.train_ann,
 52 |             name='train',
 53 |             img_size=self.input_size,
 54 |             preproc=TrainTransform(
 55 |                 rgb_means=(0.485, 0.456, 0.406),
 56 |                 std=(0.229, 0.224, 0.225),
 57 |                 max_labels=500,
 58 |             ),
 59 |         )
 60 | 
 61 |         dataset = DiffusionMosaicDetection(
 62 |             dataset,
 63 |             mosaic=not no_aug,
 64 |             img_size=self.input_size,
 65 |             preproc=DiffusionTrainTransform(
 66 |                 rgb_means=(0.485, 0.456, 0.406),
 67 |                 std=(0.229, 0.224, 0.225),
 68 |                 max_labels=1000, 
 69 |             ),
 70 |             degrees=self.degrees, 
 71 |             translate=self.translate,
 72 |             scale=self.scale,
 73 |             shear=self.shear,
 74 |             perspective=self.perspective,
 75 |             enable_mixup=self.enable_mixup,
 76 |         )
 77 | 
 78 |         self.dataset = dataset
 79 | 
 80 |         if is_distributed:
 81 |             batch_size = batch_size // dist.get_world_size()
 82 | 
 83 |         sampler = InfiniteSampler(
 84 |             len(self.dataset), seed=self.seed if self.seed else 0
 85 |         )
 86 | 
 87 |         batch_sampler = YoloBatchSampler(
 88 |             sampler=sampler,
 89 |             batch_size=batch_size,
 90 |             drop_last=False,
 91 |             input_dimension=self.input_size,
 92 |             mosaic=not no_aug,
 93 |         )
 94 | 
 95 |         dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True}
 96 |         dataloader_kwargs["batch_sampler"] = batch_sampler
 97 |         train_loader = DataLoader(self.dataset, **dataloader_kwargs)
 98 | 
 99 |         return train_loader
100 | 
101 |     def get_eval_loader(self, batch_size, is_distributed, testdev=False):
102 |         from yolox.data import MOTDataset,DiffusionValTransform
103 | 
104 |         valdataset = MOTDataset(
105 |             data_dir=os.path.join(get_yolox_datadir(), "mot"),
106 |             json_file=self.val_ann,
107 |             img_size=self.test_size,
108 |             name='train',
109 |             preproc=DiffusionValTransform(
110 |                 rgb_means=(0.485, 0.456, 0.406),
111 |                 std=(0.229, 0.224, 0.225),
112 |                 max_labels=1000, 
113 |             )
114 |         )
115 | 
116 |         if is_distributed:
117 |             batch_size = batch_size // dist.get_world_size()
118 |             sampler = torch.utils.data.distributed.DistributedSampler(
119 |                 valdataset, shuffle=False
120 |             )
121 |         else:
122 |             sampler = torch.utils.data.SequentialSampler(valdataset)
123 | 
124 |         dataloader_kwargs = {
125 |             "num_workers": self.data_num_workers,
126 |             "pin_memory": True,
127 |             "sampler": sampler,
128 |         }
129 |         dataloader_kwargs["batch_size"] = batch_size
130 |         val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs)
131 | 
132 |         return val_loader
133 | 
134 |     def get_evaluator(self, batch_size, is_distributed, testdev=False):
135 |         from yolox.evaluators import COCOEvaluator
136 | 
137 |         val_loader = self.get_eval_loader(batch_size, is_distributed, testdev=testdev)
138 |         evaluator = COCOEvaluator(
139 |             dataloader=val_loader,
140 |             img_size=self.test_size,
141 |             confthre=self.conf_thresh,
142 |             nmsthre3d=self.nms_thresh3d,
143 |             detthre=self.det_thresh,
144 |             nmsthre2d=self.nms_thresh2d,
145 |             num_classes=self.num_classes,
146 |             testdev=testdev,
147 |         )
148 |         return evaluator
149 |     
150 |     def get_model(self):
151 |         from yolox.models import YOLOPAFPN, YOLOX, YOLOXHead
152 |         from diffusion.models.diffusionnet import DiffusionNet,DiffusionHead
153 | 
154 |         def init_yolo(M):
155 |             for m in M.modules():
156 |                 if isinstance(m, nn.BatchNorm2d):
157 |                     m.eps = 1e-3
158 |                     m.momentum = 0.03
159 | 
160 |         if getattr(self, "model", None) is None:
161 |             in_channels = [256, 512, 1024]
162 |             backbone = YOLOPAFPN(self.depth, self.width, in_channels=in_channels)
163 |             for value in backbone.parameters():
164 |                 value.requires_grad=False
165 |             head=DiffusionHead(self.num_classes,self.width)
166 |             self.model = DiffusionNet(backbone, head)
167 | 
168 |         self.model.apply(init_yolo)
169 |         # self.model.head.initialize_biases(1e-2)
170 |         return self.model
171 | 
172 |     def get_optimizer(self, batch_size):
173 |         lr=2.5e-05
174 |         weight_decay = 0.0001
175 |         self.optimizer=AdamW(self.model.parameters(),lr=lr,weight_decay=weight_decay) 
176 |         return self.optimizer
177 | 


--------------------------------------------------------------------------------
/exps/example/mot/yolox_x_diffusion_track_mot17_baseline.py:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | import os
  3 | import random
  4 | import torch
  5 | import torch.nn as nn
  6 | import torch.distributed as dist
  7 | from torch.optim import AdamW
  8 | from yolox.exp import Exp as MyExp
  9 | from yolox.data import get_yolox_datadir
 10 | 
 11 | class Exp(MyExp):
 12 |     def __init__(self):
 13 |         super(Exp, self).__init__()
 14 |         self.num_classes = 1
 15 |         self.depth = 1.33
 16 |         self.width = 1.25
 17 |         self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
 18 |         self.train_ann = "train.json"
 19 |         self.val_ann = "test.json"
 20 |         self.input_size = (800, 1440)
 21 |         self.test_size = (800, 1440)
 22 |         self.random_size = (18, 32)
 23 |         self.max_epoch = 30
 24 |         self.print_interval = 20 
 25 |         self.eval_interval = 5
 26 |         self.no_aug_epochs = 10
 27 |         self.basic_lr_per_img = 0.001 / 64.0
 28 |         self.warmup_epochs = 1
 29 |         self.task="tracking"
 30 |         self.seed=8823
 31 |         self.conf_thresh=0.4
 32 |         self.det_thresh=0.7
 33 |         self.nms_thresh2d=0.75
 34 |         self.nms_thresh3d=0.7
 35 |         self.interval=5
 36 | 
 37 |     def get_data_loader(self, batch_size, is_distributed, no_aug=False):
 38 |         from yolox.data import (
 39 |             MOTDataset,
 40 |             TrainTransform,
 41 |             YoloBatchSampler,
 42 |             DataLoader,
 43 |             InfiniteSampler,
 44 |             MosaicDetection,
 45 |             DiffusionMosaicDetection,
 46 |             DiffusionTrainTransform
 47 |         )
 48 | 
 49 |         dataset = MOTDataset(
 50 |             data_dir=os.path.join(get_yolox_datadir(), "mot"),
 51 |             json_file=self.train_ann,
 52 |             name='train',
 53 |             img_size=self.input_size,
 54 |             preproc=TrainTransform(
 55 |                 rgb_means=(0.485, 0.456, 0.406),
 56 |                 std=(0.229, 0.224, 0.225),
 57 |                 max_labels=500,
 58 |             ),
 59 |         )
 60 | 
 61 |         dataset = DiffusionMosaicDetection(
 62 |             dataset,
 63 |             mosaic=not no_aug,
 64 |             img_size=self.input_size,
 65 |             preproc=DiffusionTrainTransform(
 66 |                 rgb_means=(0.485, 0.456, 0.406),
 67 |                 std=(0.229, 0.224, 0.225),
 68 |                 max_labels=1000, 
 69 |             ),
 70 |             degrees=self.degrees, 
 71 |             translate=self.translate,
 72 |             scale=self.scale,
 73 |             shear=self.shear,
 74 |             perspective=self.perspective,
 75 |             enable_mixup=self.enable_mixup,
 76 |         )
 77 | 
 78 |         self.dataset = dataset
 79 | 
 80 |         if is_distributed:
 81 |             batch_size = batch_size // dist.get_world_size()
 82 | 
 83 |         sampler = InfiniteSampler(
 84 |             len(self.dataset), seed=self.seed if self.seed else 0
 85 |         )
 86 | 
 87 |         batch_sampler = YoloBatchSampler(
 88 |             sampler=sampler,
 89 |             batch_size=batch_size,
 90 |             drop_last=False,
 91 |             input_dimension=self.input_size,
 92 |             mosaic=not no_aug,
 93 |         )
 94 | 
 95 |         dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True}
 96 |         dataloader_kwargs["batch_sampler"] = batch_sampler
 97 |         train_loader = DataLoader(self.dataset, **dataloader_kwargs)
 98 | 
 99 |         return train_loader
100 | 
101 |     def get_eval_loader(self, batch_size, is_distributed, testdev=False):
102 |         from yolox.data import MOTDataset,DiffusionValTransform
103 | 
104 |         valdataset = MOTDataset(
105 |             data_dir=os.path.join(get_yolox_datadir(), "mot"),
106 |             json_file=self.val_ann,
107 |             img_size=self.test_size,
108 |             name='test',
109 |             preproc=DiffusionValTransform(
110 |                 rgb_means=(0.485, 0.456, 0.406),
111 |                 std=(0.229, 0.224, 0.225),
112 |                 max_labels=1000, 
113 |             )
114 |         )
115 | 
116 |         if is_distributed:
117 |             batch_size = batch_size // dist.get_world_size()
118 |             sampler = torch.utils.data.distributed.DistributedSampler(
119 |                 valdataset, shuffle=False
120 |             )
121 |         else:
122 |             sampler = torch.utils.data.SequentialSampler(valdataset)
123 | 
124 |         dataloader_kwargs = {
125 |             "num_workers": self.data_num_workers,
126 |             "pin_memory": True,
127 |             "sampler": sampler,
128 |         }
129 |         dataloader_kwargs["batch_size"] = batch_size
130 |         val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs)
131 | 
132 |         return val_loader
133 | 
134 |     def get_evaluator(self, batch_size, is_distributed, testdev=False):
135 |         from yolox.evaluators import COCOEvaluator
136 | 
137 |         val_loader = self.get_eval_loader(batch_size, is_distributed, testdev=testdev)
138 |         evaluator = COCOEvaluator(
139 |             dataloader=val_loader,
140 |             img_size=self.test_size,
141 |             confthre=self.conf_thresh,
142 |             nmsthre3d=self.nms_thresh3d,
143 |             detthre=self.det_thresh,
144 |             nmsthre2d=self.nms_thresh2d,
145 |             num_classes=self.num_classes,
146 |             testdev=testdev,
147 |         )
148 |         return evaluator
149 |     
150 |     def get_model(self):
151 |         from yolox.models import YOLOPAFPN, YOLOX, YOLOXHead
152 |         from diffusion.models.diffusionnet import DiffusionNet,DiffusionHead
153 | 
154 |         def init_yolo(M):
155 |             for m in M.modules():
156 |                 if isinstance(m, nn.BatchNorm2d):
157 |                     m.eps = 1e-3
158 |                     m.momentum = 0.03
159 | 
160 |         if getattr(self, "model", None) is None:
161 |             in_channels = [256, 512, 1024]
162 |             backbone = YOLOPAFPN(self.depth, self.width, in_channels=in_channels)
163 |             for value in backbone.parameters():
164 |                 value.requires_grad=False
165 |             head=DiffusionHead(self.num_classes,self.width)
166 |             self.model = DiffusionNet(backbone, head)
167 | 
168 |         self.model.apply(init_yolo)
169 |         # self.model.head.initialize_biases(1e-2)
170 |         return self.model
171 | 
172 |     def get_optimizer(self, batch_size):
173 |         lr=2.5e-05
174 |         weight_decay = 0.0001
175 |         self.optimizer=AdamW(self.model.parameters(),lr=lr,weight_decay=weight_decay) 
176 |         return self.optimizer
177 | 


--------------------------------------------------------------------------------
/exps/example/mot/yolox_x_diffusion_track_mot20.py:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | import os
  3 | import random
  4 | import torch
  5 | import torch.nn as nn
  6 | import torch.distributed as dist
  7 | from torch.optim import AdamW
  8 | from yolox.exp import Exp as MyExp
  9 | from yolox.data import get_yolox_datadir
 10 | 
 11 | class Exp(MyExp):
 12 |     def __init__(self):
 13 |         super(Exp, self).__init__()
 14 |         self.num_classes = 1
 15 |         self.depth = 1.33
 16 |         self.width = 1.25
 17 |         self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
 18 |         self.train_ann = "train.json"
 19 |         self.val_ann = "val_half.json"
 20 |         self.input_size = (896, 1600)
 21 |         self.test_size = (896, 1600)
 22 |         self.random_size = (20, 36)
 23 |         self.max_epoch = 30
 24 |         self.print_interval = 20
 25 |         self.eval_interval = 5
 26 |         self.no_aug_epochs = 10
 27 |         self.basic_lr_per_img = 0.001 / 64.0
 28 |         self.warmup_epochs = 1
 29 |         self.task="tracking"
 30 |         self.seed=8823
 31 |         self.conf_thresh=0.4
 32 |         self.det_thresh=0.7
 33 |         self.nms_thresh2d=0.75
 34 |         self.nms_thresh3d=0.7
 35 |         self.interval=5
 36 | 
 37 |     def get_data_loader(self, batch_size, is_distributed, no_aug=False):
 38 |         from yolox.data import (
 39 |             MOTDataset,
 40 |             TrainTransform,
 41 |             YoloBatchSampler,
 42 |             DataLoader,
 43 |             InfiniteSampler,
 44 |             MosaicDetection,
 45 |             DiffusionMosaicDetection,
 46 |             DiffusionTrainTransform
 47 |         )
 48 | 
 49 |         dataset = MOTDataset(
 50 |             data_dir=os.path.join(get_yolox_datadir(), "MOT20"),
 51 |             json_file=self.train_ann,
 52 |             name='train',
 53 |             img_size=self.input_size,
 54 |             preproc=TrainTransform(
 55 |                 rgb_means=(0.485, 0.456, 0.406),
 56 |                 std=(0.229, 0.224, 0.225),
 57 |                 max_labels=500,
 58 |             ),
 59 |         )
 60 | 
 61 |         dataset = DiffusionMosaicDetection(
 62 |             dataset,
 63 |             mosaic=not no_aug,
 64 |             img_size=self.input_size,
 65 |             preproc=DiffusionTrainTransform(
 66 |                 rgb_means=(0.485, 0.456, 0.406),
 67 |                 std=(0.229, 0.224, 0.225),
 68 |                 max_labels=1200, 
 69 |             ),
 70 |             degrees=self.degrees, 
 71 |             translate=self.translate,
 72 |             scale=self.scale,
 73 |             shear=self.shear,
 74 |             perspective=self.perspective,
 75 |             enable_mixup=self.enable_mixup,
 76 |         )
 77 | 
 78 |         self.dataset = dataset
 79 | 
 80 |         if is_distributed:
 81 |             batch_size = batch_size // dist.get_world_size()
 82 | 
 83 |         sampler = InfiniteSampler(
 84 |             len(self.dataset), seed=self.seed if self.seed else 0
 85 |         )
 86 | 
 87 |         batch_sampler = YoloBatchSampler(
 88 |             sampler=sampler,
 89 |             batch_size=batch_size,
 90 |             drop_last=False,
 91 |             input_dimension=self.input_size,
 92 |             mosaic=not no_aug,
 93 |         )
 94 | 
 95 |         dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True}
 96 |         dataloader_kwargs["batch_sampler"] = batch_sampler
 97 |         train_loader = DataLoader(self.dataset, **dataloader_kwargs)
 98 | 
 99 |         return train_loader
100 | 
101 |     def get_eval_loader(self, batch_size, is_distributed, testdev=False):
102 |         from yolox.data import MOTDataset,DiffusionValTransform
103 | 
104 |         valdataset = MOTDataset(
105 |             data_dir=os.path.join(get_yolox_datadir(), "MOT20"),
106 |             json_file=self.val_ann,
107 |             img_size=self.test_size,
108 |             name='train',
109 |             preproc=DiffusionValTransform(
110 |                 rgb_means=(0.485, 0.456, 0.406),
111 |                 std=(0.229, 0.224, 0.225),
112 |                 max_labels=1200, 
113 |             )
114 |         )
115 | 
116 |         if is_distributed:
117 |             batch_size = batch_size // dist.get_world_size()
118 |             sampler = torch.utils.data.distributed.DistributedSampler(
119 |                 valdataset, shuffle=False
120 |             )
121 |         else:
122 |             sampler = torch.utils.data.SequentialSampler(valdataset)
123 | 
124 |         dataloader_kwargs = {
125 |             "num_workers": self.data_num_workers,
126 |             "pin_memory": True,
127 |             "sampler": sampler,
128 |         }
129 |         dataloader_kwargs["batch_size"] = batch_size
130 |         val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs)
131 | 
132 |         return val_loader
133 | 
134 |     def get_evaluator(self, batch_size, is_distributed, testdev=False):
135 |         from yolox.evaluators import COCOEvaluator
136 | 
137 |         val_loader = self.get_eval_loader(batch_size, is_distributed, testdev=testdev)
138 |         evaluator = COCOEvaluator(
139 |             dataloader=val_loader,
140 |             img_size=self.test_size,
141 |             confthre=self.conf_thresh,
142 |             nmsthre3d=self.nms_thresh3d,
143 |             detthre=self.det_thresh,
144 |             nmsthre2d=self.nms_thresh2d,
145 |             num_classes=self.num_classes,
146 |             testdev=testdev,
147 |         )
148 |         return evaluator
149 |     
150 |     def get_model(self):
151 |         from yolox.models import YOLOPAFPN, YOLOX, YOLOXHead
152 |         from diffusion.models.diffusionnet import DiffusionNet,DiffusionHead
153 | 
154 |         def init_yolo(M):
155 |             for m in M.modules():
156 |                 if isinstance(m, nn.BatchNorm2d):
157 |                     m.eps = 1e-3
158 |                     m.momentum = 0.03
159 | 
160 |         if getattr(self, "model", None) is None:
161 |             in_channels = [256, 512, 1024]
162 |             backbone = YOLOPAFPN(self.depth, self.width, in_channels=in_channels)
163 |             for value in backbone.parameters():
164 |                 value.requires_grad=False
165 |             head=DiffusionHead(self.num_classes,self.width)
166 |             self.model = DiffusionNet(backbone, head)
167 | 
168 |         self.model.apply(init_yolo)
169 |         # self.model.head.initialize_biases(1e-2)
170 |         return self.model
171 | 
172 |     def get_optimizer(self, batch_size):
173 |         lr=2.5e-05
174 |         weight_decay = 0.0001
175 |         self.optimizer=AdamW(self.model.parameters(),lr=lr,weight_decay=weight_decay) 
176 |         return self.optimizer
177 | 


--------------------------------------------------------------------------------
/exps/example/mot/yolox_x_diffusion_track_dancetrack.py:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | import os
  3 | import random
  4 | import torch
  5 | import torch.nn as nn
  6 | import torch.distributed as dist
  7 | from torch.optim import AdamW
  8 | from yolox.exp import Exp as MyExp
  9 | from yolox.data import get_yolox_datadir
 10 | 
 11 | class Exp(MyExp):
 12 |     def __init__(self):
 13 |         super(Exp, self).__init__()
 14 |         self.num_classes = 1
 15 |         self.depth = 1.33
 16 |         self.width = 1.25
 17 |         self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
 18 |         self.train_ann = "train.json"
 19 |         self.val_ann = "train.json"
 20 |         self.input_size = (896, 1600)
 21 |         self.test_size = (896, 1600)
 22 |         self.random_size = (18, 32)
 23 |         self.max_epoch = 20
 24 |         self.print_interval = 20 
 25 |         self.eval_interval = 40
 26 |         self.no_aug_epochs = 5
 27 |         self.basic_lr_per_img = 0.001 / 64.0
 28 |         self.warmup_epochs = 1
 29 |         self.task="tracking"
 30 |         self.seed=8823
 31 |         self.conf_thresh=0.4
 32 |         self.det_thresh=0.7
 33 |         self.nms_thresh2d=0.75
 34 |         self.nms_thresh3d=0.7
 35 |         self.interval=5
 36 | 
 37 |     def get_data_loader(self, batch_size, is_distributed, no_aug=False):
 38 |         from yolox.data import (
 39 |             MOTDataset,
 40 |             TrainTransform,
 41 |             YoloBatchSampler,
 42 |             DataLoader,
 43 |             InfiniteSampler,
 44 |             MosaicDetection,
 45 |             DiffusionMosaicDetection,
 46 |             DiffusionTrainTransform
 47 |         )
 48 | 
 49 |         dataset = MOTDataset(
 50 |             data_dir=os.path.join(get_yolox_datadir(), "dancetrack"),
 51 |             json_file=self.train_ann,
 52 |             name='train',
 53 |             img_size=self.input_size,
 54 |             preproc=TrainTransform(
 55 |                 rgb_means=(0.485, 0.456, 0.406),
 56 |                 std=(0.229, 0.224, 0.225),
 57 |                 max_labels=500,
 58 |             ),
 59 |         )
 60 | 
 61 |         dataset = DiffusionMosaicDetection(
 62 |             dataset,
 63 |             mosaic=not no_aug,
 64 |             img_size=self.input_size,
 65 |             preproc=DiffusionTrainTransform(
 66 |                 rgb_means=(0.485, 0.456, 0.406),
 67 |                 std=(0.229, 0.224, 0.225),
 68 |                 max_labels=1000, 
 69 |             ),
 70 |             degrees=self.degrees, 
 71 |             translate=self.translate,
 72 |             scale=self.scale,
 73 |             shear=self.shear,
 74 |             perspective=self.perspective,
 75 |             enable_mixup=self.enable_mixup,
 76 |         )
 77 | 
 78 |         self.dataset = dataset
 79 | 
 80 |         if is_distributed:
 81 |             batch_size = batch_size // dist.get_world_size()
 82 | 
 83 |         sampler = InfiniteSampler(
 84 |             len(self.dataset), seed=self.seed if self.seed else 0
 85 |         )
 86 | 
 87 |         batch_sampler = YoloBatchSampler(
 88 |             sampler=sampler,
 89 |             batch_size=batch_size,
 90 |             drop_last=False,
 91 |             input_dimension=self.input_size,
 92 |             mosaic=not no_aug,
 93 |         )
 94 | 
 95 |         dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True}
 96 |         dataloader_kwargs["batch_sampler"] = batch_sampler
 97 |         train_loader = DataLoader(self.dataset, **dataloader_kwargs)
 98 | 
 99 |         return train_loader
100 | 
101 |     def get_eval_loader(self, batch_size, is_distributed, testdev=False):
102 |         from yolox.data import MOTDataset,DiffusionValTransform
103 | 
104 |         valdataset = MOTDataset(
105 |             data_dir=os.path.join(get_yolox_datadir(), "dancetrack"),
106 |             json_file=self.val_ann,
107 |             img_size=self.test_size,
108 |             name='train',
109 |             preproc=DiffusionValTransform(
110 |                 rgb_means=(0.485, 0.456, 0.406),
111 |                 std=(0.229, 0.224, 0.225),
112 |                 max_labels=1000, 
113 |             )
114 |         )
115 | 
116 |         if is_distributed:
117 |             batch_size = batch_size // dist.get_world_size()
118 |             sampler = torch.utils.data.distributed.DistributedSampler(
119 |                 valdataset, shuffle=False
120 |             )
121 |         else:
122 |             sampler = torch.utils.data.SequentialSampler(valdataset)
123 | 
124 |         dataloader_kwargs = {
125 |             "num_workers": self.data_num_workers,
126 |             "pin_memory": True,
127 |             "sampler": sampler,
128 |         }
129 |         dataloader_kwargs["batch_size"] = batch_size
130 |         val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs)
131 | 
132 |         return val_loader
133 | 
134 |     def get_evaluator(self, batch_size, is_distributed, testdev=False):
135 |         from yolox.evaluators import COCOEvaluator
136 | 
137 |         val_loader = self.get_eval_loader(batch_size, is_distributed, testdev=testdev)
138 |         evaluator = COCOEvaluator(
139 |             dataloader=val_loader,
140 |             img_size=self.test_size,
141 |             confthre=self.conf_thresh,
142 |             nmsthre3d=self.nms_thresh3d,
143 |             detthre=self.det_thresh,
144 |             nmsthre2d=self.nms_thresh2d,
145 |             num_classes=self.num_classes,
146 |             testdev=testdev,
147 |         )
148 |         return evaluator
149 |     
150 |     def get_model(self):
151 |         from yolox.models import YOLOPAFPN, YOLOX, YOLOXHead
152 |         from diffusion.models.diffusionnet import DiffusionNet,DiffusionHead
153 | 
154 |         def init_yolo(M):
155 |             for m in M.modules():
156 |                 if isinstance(m, nn.BatchNorm2d):
157 |                     m.eps = 1e-3
158 |                     m.momentum = 0.03
159 | 
160 |         if getattr(self, "model", None) is None:
161 |             in_channels = [256, 512, 1024]
162 |             backbone = YOLOPAFPN(self.depth, self.width, in_channels=in_channels)
163 |             for value in backbone.parameters():
164 |                 value.requires_grad=False
165 |             head=DiffusionHead(self.num_classes,self.width)
166 |             self.model = DiffusionNet(backbone, head)
167 | 
168 |         self.model.apply(init_yolo)
169 |         # self.model.head.initialize_biases(1e-2)
170 |         return self.model
171 | 
172 |     def get_optimizer(self, batch_size):
173 |         lr=2.5e-05
174 |         weight_decay = 0.0001
175 |         self.optimizer=AdamW(self.model.parameters(),lr=lr,weight_decay=weight_decay) 
176 |         return self.optimizer
177 | 


--------------------------------------------------------------------------------
/exps/example/mot/yolox_x_diffusion_track_mot20_baseline.py:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | import os
  3 | import random
  4 | import torch
  5 | import torch.nn as nn
  6 | import torch.distributed as dist
  7 | from torch.optim import AdamW
  8 | from yolox.exp import Exp as MyExp
  9 | from yolox.data import get_yolox_datadir
 10 | 
 11 | class Exp(MyExp):
 12 |     def __init__(self):
 13 |         super(Exp, self).__init__()
 14 |         self.num_classes = 1
 15 |         self.depth = 1.33
 16 |         self.width = 1.25
 17 |         self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
 18 |         self.train_ann = "train.json"
 19 |         self.val_ann = "val_half.json"
 20 |         self.input_size = (896, 1600)
 21 |         self.test_size = (896, 1600)
 22 |         self.random_size = (20, 36)
 23 |         self.max_epoch = 30
 24 |         self.print_interval = 20
 25 |         self.eval_interval = 5
 26 |         self.no_aug_epochs = 10
 27 |         self.basic_lr_per_img = 0.001 / 64.0
 28 |         self.warmup_epochs = 1
 29 |         self.task="tracking"
 30 |         self.seed=8823
 31 |         self.conf_thresh=0.4
 32 |         self.det_thresh=0.7
 33 |         self.nms_thresh2d=0.75
 34 |         self.nms_thresh3d=0.7
 35 |         self.interval=5
 36 | 
 37 |     def get_data_loader(self, batch_size, is_distributed, no_aug=False):
 38 |         from yolox.data import (
 39 |             MOTDataset,
 40 |             TrainTransform,
 41 |             YoloBatchSampler,
 42 |             DataLoader,
 43 |             InfiniteSampler,
 44 |             MosaicDetection,
 45 |             DiffusionMosaicDetection,
 46 |             DiffusionTrainTransform
 47 |         )
 48 | 
 49 |         dataset = MOTDataset(
 50 |             data_dir=os.path.join(get_yolox_datadir(), "MOT20"),
 51 |             json_file=self.train_ann,
 52 |             name='train',
 53 |             img_size=self.input_size,
 54 |             preproc=TrainTransform(
 55 |                 rgb_means=(0.485, 0.456, 0.406),
 56 |                 std=(0.229, 0.224, 0.225),
 57 |                 max_labels=500,
 58 |             ),
 59 |         )
 60 | 
 61 |         dataset = DiffusionMosaicDetection(
 62 |             dataset,
 63 |             mosaic=not no_aug,
 64 |             img_size=self.input_size,
 65 |             preproc=DiffusionTrainTransform(
 66 |                 rgb_means=(0.485, 0.456, 0.406),
 67 |                 std=(0.229, 0.224, 0.225),
 68 |                 max_labels=1200, 
 69 |             ),
 70 |             degrees=self.degrees, 
 71 |             translate=self.translate,
 72 |             scale=self.scale,
 73 |             shear=self.shear,
 74 |             perspective=self.perspective,
 75 |             enable_mixup=self.enable_mixup,
 76 |         )
 77 | 
 78 |         self.dataset = dataset
 79 | 
 80 |         if is_distributed:
 81 |             batch_size = batch_size // dist.get_world_size()
 82 | 
 83 |         sampler = InfiniteSampler(
 84 |             len(self.dataset), seed=self.seed if self.seed else 0
 85 |         )
 86 | 
 87 |         batch_sampler = YoloBatchSampler(
 88 |             sampler=sampler,
 89 |             batch_size=batch_size,
 90 |             drop_last=False,
 91 |             input_dimension=self.input_size,
 92 |             mosaic=not no_aug,
 93 |         )
 94 | 
 95 |         dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True}
 96 |         dataloader_kwargs["batch_sampler"] = batch_sampler
 97 |         train_loader = DataLoader(self.dataset, **dataloader_kwargs)
 98 | 
 99 |         return train_loader
100 | 
101 |     def get_eval_loader(self, batch_size, is_distributed, testdev=False):
102 |         from yolox.data import MOTDataset,DiffusionValTransform
103 | 
104 |         valdataset = MOTDataset(
105 |             data_dir=os.path.join(get_yolox_datadir(), "MOT20"),
106 |             json_file=self.val_ann,
107 |             img_size=self.test_size,
108 |             name='train',
109 |             preproc=DiffusionValTransform(
110 |                 rgb_means=(0.485, 0.456, 0.406),
111 |                 std=(0.229, 0.224, 0.225),
112 |                 max_labels=1200, 
113 |             )
114 |         )
115 | 
116 |         if is_distributed:
117 |             batch_size = batch_size // dist.get_world_size()
118 |             sampler = torch.utils.data.distributed.DistributedSampler(
119 |                 valdataset, shuffle=False
120 |             )
121 |         else:
122 |             sampler = torch.utils.data.SequentialSampler(valdataset)
123 | 
124 |         dataloader_kwargs = {
125 |             "num_workers": self.data_num_workers,
126 |             "pin_memory": True,
127 |             "sampler": sampler,
128 |         }
129 |         dataloader_kwargs["batch_size"] = batch_size
130 |         val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs)
131 | 
132 |         return val_loader
133 | 
134 |     def get_evaluator(self, batch_size, is_distributed, testdev=False):
135 |         from yolox.evaluators import COCOEvaluator
136 | 
137 |         val_loader = self.get_eval_loader(batch_size, is_distributed, testdev=testdev)
138 |         evaluator = COCOEvaluator(
139 |             dataloader=val_loader,
140 |             img_size=self.test_size,
141 |             confthre=self.conf_thresh,
142 |             nmsthre3d=self.nms_thresh3d,
143 |             detthre=self.det_thresh,
144 |             nmsthre2d=self.nms_thresh2d,
145 |             num_classes=self.num_classes,
146 |             testdev=testdev,
147 |         )
148 |         return evaluator
149 |     
150 |     def get_model(self):
151 |         from yolox.models import YOLOPAFPN, YOLOX, YOLOXHead
152 |         from diffusion.models.diffusionnet import DiffusionNet,DiffusionHead
153 | 
154 |         def init_yolo(M):
155 |             for m in M.modules():
156 |                 if isinstance(m, nn.BatchNorm2d):
157 |                     m.eps = 1e-3
158 |                     m.momentum = 0.03
159 | 
160 |         if getattr(self, "model", None) is None:
161 |             in_channels = [256, 512, 1024]
162 |             backbone = YOLOPAFPN(self.depth, self.width, in_channels=in_channels)
163 |             for value in backbone.parameters():
164 |                 value.requires_grad=False
165 |             head=DiffusionHead(self.num_classes,self.width)
166 |             self.model = DiffusionNet(backbone, head)
167 | 
168 |         self.model.apply(init_yolo)
169 |         # self.model.head.initialize_biases(1e-2)
170 |         return self.model
171 | 
172 |     def get_optimizer(self, batch_size):
173 |         lr=2.5e-05
174 |         weight_decay = 0.0001
175 |         self.optimizer=AdamW(self.model.parameters(),lr=lr,weight_decay=weight_decay) 
176 |         return self.optimizer
177 | 


--------------------------------------------------------------------------------
/exps/example/mot/yolox_x_diffusion_track_dancetrack_baseline.py:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | import os
  3 | import random
  4 | import torch
  5 | import torch.nn as nn
  6 | import torch.distributed as dist
  7 | from torch.optim import AdamW
  8 | from yolox.exp import Exp as MyExp
  9 | from yolox.data import get_yolox_datadir
 10 | 
 11 | class Exp(MyExp):
 12 |     def __init__(self):
 13 |         super(Exp, self).__init__()
 14 |         self.num_classes = 1
 15 |         self.depth = 1.33
 16 |         self.width = 1.25
 17 |         self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
 18 |         self.train_ann = "train.json"
 19 |         self.val_ann = "test.json"
 20 |         self.input_size = (896, 1600)
 21 |         self.test_size = (896, 1600)
 22 |         self.random_size = (18, 32)
 23 |         self.max_epoch = 20
 24 |         self.print_interval = 20 
 25 |         self.eval_interval = 40
 26 |         self.no_aug_epochs = 5
 27 |         self.basic_lr_per_img = 0.001 / 64.0
 28 |         self.warmup_epochs = 1
 29 |         self.task="tracking"
 30 |         self.seed=8823
 31 |         self.conf_thresh=0.4
 32 |         self.det_thresh=0.7
 33 |         self.nms_thresh2d=0.75
 34 |         self.nms_thresh3d=0.7
 35 |         self.interval=5
 36 | 
 37 |     def get_data_loader(self, batch_size, is_distributed, no_aug=False):
 38 |         from yolox.data import (
 39 |             MOTDataset,
 40 |             TrainTransform,
 41 |             YoloBatchSampler,
 42 |             DataLoader,
 43 |             InfiniteSampler,
 44 |             MosaicDetection,
 45 |             DiffusionMosaicDetection,
 46 |             DiffusionTrainTransform
 47 |         )
 48 | 
 49 |         dataset = MOTDataset(
 50 |             data_dir=os.path.join(get_yolox_datadir(), "dancetrack"),
 51 |             json_file=self.train_ann,
 52 |             name='train',
 53 |             img_size=self.input_size,
 54 |             preproc=TrainTransform(
 55 |                 rgb_means=(0.485, 0.456, 0.406),
 56 |                 std=(0.229, 0.224, 0.225),
 57 |                 max_labels=500,
 58 |             ),
 59 |         )
 60 | 
 61 |         dataset = DiffusionMosaicDetection(
 62 |             dataset,
 63 |             mosaic=not no_aug,
 64 |             img_size=self.input_size,
 65 |             preproc=DiffusionTrainTransform(
 66 |                 rgb_means=(0.485, 0.456, 0.406),
 67 |                 std=(0.229, 0.224, 0.225),
 68 |                 max_labels=1000, 
 69 |             ),
 70 |             degrees=self.degrees, 
 71 |             translate=self.translate,
 72 |             scale=self.scale,
 73 |             shear=self.shear,
 74 |             perspective=self.perspective,
 75 |             enable_mixup=self.enable_mixup,
 76 |         )
 77 | 
 78 |         self.dataset = dataset
 79 | 
 80 |         if is_distributed:
 81 |             batch_size = batch_size // dist.get_world_size()
 82 | 
 83 |         sampler = InfiniteSampler(
 84 |             len(self.dataset), seed=self.seed if self.seed else 0
 85 |         )
 86 | 
 87 |         batch_sampler = YoloBatchSampler(
 88 |             sampler=sampler,
 89 |             batch_size=batch_size,
 90 |             drop_last=False,
 91 |             input_dimension=self.input_size,
 92 |             mosaic=not no_aug,
 93 |         )
 94 | 
 95 |         dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True}
 96 |         dataloader_kwargs["batch_sampler"] = batch_sampler
 97 |         train_loader = DataLoader(self.dataset, **dataloader_kwargs)
 98 | 
 99 |         return train_loader
100 | 
101 |     def get_eval_loader(self, batch_size, is_distributed, testdev=False):
102 |         from yolox.data import MOTDataset,DiffusionValTransform
103 | 
104 |         valdataset = MOTDataset(
105 |             data_dir=os.path.join(get_yolox_datadir(), "dancetrack"),
106 |             json_file=self.val_ann,
107 |             img_size=self.test_size,
108 |             name='test',
109 |             preproc=DiffusionValTransform(
110 |                 rgb_means=(0.485, 0.456, 0.406),
111 |                 std=(0.229, 0.224, 0.225),
112 |                 max_labels=1000, 
113 |             )
114 |         )
115 | 
116 |         if is_distributed:
117 |             batch_size = batch_size // dist.get_world_size()
118 |             sampler = torch.utils.data.distributed.DistributedSampler(
119 |                 valdataset, shuffle=False
120 |             )
121 |         else:
122 |             sampler = torch.utils.data.SequentialSampler(valdataset)
123 | 
124 |         dataloader_kwargs = {
125 |             "num_workers": self.data_num_workers,
126 |             "pin_memory": True,
127 |             "sampler": sampler,
128 |         }
129 |         dataloader_kwargs["batch_size"] = batch_size
130 |         val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs)
131 | 
132 |         return val_loader
133 | 
134 |     def get_evaluator(self, batch_size, is_distributed, testdev=False):
135 |         from yolox.evaluators import COCOEvaluator
136 | 
137 |         val_loader = self.get_eval_loader(batch_size, is_distributed, testdev=testdev)
138 |         evaluator = COCOEvaluator(
139 |             dataloader=val_loader,
140 |             img_size=self.test_size,
141 |             confthre=self.conf_thresh,
142 |             nmsthre3d=self.nms_thresh3d,
143 |             detthre=self.det_thresh,
144 |             nmsthre2d=self.nms_thresh2d,
145 |             num_classes=self.num_classes,
146 |             testdev=testdev,
147 |         )
148 |         return evaluator
149 |     
150 |     def get_model(self):
151 |         from yolox.models import YOLOPAFPN, YOLOX, YOLOXHead
152 |         from diffusion.models.diffusionnet import DiffusionNet,DiffusionHead
153 | 
154 |         def init_yolo(M):
155 |             for m in M.modules():
156 |                 if isinstance(m, nn.BatchNorm2d):
157 |                     m.eps = 1e-3
158 |                     m.momentum = 0.03
159 | 
160 |         if getattr(self, "model", None) is None:
161 |             in_channels = [256, 512, 1024]
162 |             backbone = YOLOPAFPN(self.depth, self.width, in_channels=in_channels)
163 |             for value in backbone.parameters():
164 |                 value.requires_grad=False
165 |             head=DiffusionHead(self.num_classes,self.width)
166 |             self.model = DiffusionNet(backbone, head)
167 | 
168 |         self.model.apply(init_yolo)
169 |         # self.model.head.initialize_biases(1e-2)
170 |         return self.model
171 | 
172 |     def get_optimizer(self, batch_size):
173 |         lr=2.5e-05
174 |         weight_decay = 0.0001
175 |         self.optimizer=AdamW(self.model.parameters(),lr=lr,weight_decay=weight_decay) 
176 |         return self.optimizer
177 | 


--------------------------------------------------------------------------------
/exps/example/mot/yolox_x_diffusion_track_mot17_ablation.py:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | import os
  3 | import random
  4 | import torch
  5 | import torch.nn as nn
  6 | import torch.distributed as dist
  7 | from torch.optim import AdamW
  8 | from yolox.exp import Exp as MyExp
  9 | from yolox.data import get_yolox_datadir
 10 | 
 11 | class Exp(MyExp):
 12 |     def __init__(self):
 13 |         super(Exp, self).__init__()
 14 |         self.num_classes = 1
 15 |         self.depth = 1.33
 16 |         self.width = 1.25
 17 |         self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0]
 18 |         self.train_ann = "train_half.json"
 19 |         self.val_ann = "val_half.json"
 20 |         self.input_size = (800, 1440)
 21 |         self.test_size = (800, 1440)
 22 |         self.random_size = (18, 32)
 23 |         self.max_epoch = 30
 24 |         self.print_interval = 20
 25 |         self.eval_interval = 5
 26 |         self.no_aug_epochs = 10
 27 |         self.basic_lr_per_img = 0.001 / 64.0
 28 |         self.warmup_epochs = 1
 29 |         self.task="tracking"
 30 |         self.enable_mixup = True
 31 |         self.seed=8823
 32 |         self.conf_thresh=0.25
 33 |         self.det_thresh=0.7
 34 |         self.nms_thresh2d=0.75
 35 |         self.nms_thresh3d=0.7
 36 |         self.interval=5
 37 | 
 38 |     def get_data_loader(self, batch_size, is_distributed, no_aug=False):
 39 |         from yolox.data import (
 40 |             MOTDataset,
 41 |             TrainTransform,
 42 |             YoloBatchSampler,
 43 |             DataLoader,
 44 |             InfiniteSampler,
 45 |             MosaicDetection,
 46 |             DiffusionMosaicDetection,
 47 |             DiffusionTrainTransform
 48 |         )
 49 | 
 50 |         dataset = MOTDataset(
 51 |             data_dir=os.path.join(get_yolox_datadir(), "mot"),
 52 |             json_file=self.train_ann,
 53 |             name='train',
 54 |             img_size=self.input_size,
 55 |             preproc=TrainTransform(
 56 |                 rgb_means=(0.485, 0.456, 0.406),
 57 |                 std=(0.229, 0.224, 0.225),
 58 |                 max_labels=500,
 59 |             ),
 60 |         )
 61 | 
 62 |         dataset = DiffusionMosaicDetection(
 63 |             dataset,
 64 |             mosaic=not no_aug,
 65 |             img_size=self.input_size,
 66 |             preproc=DiffusionTrainTransform(
 67 |                 rgb_means=(0.485, 0.456, 0.406),
 68 |                 std=(0.229, 0.224, 0.225),
 69 |                 max_labels=1000, 
 70 |             ),
 71 |             degrees=self.degrees, 
 72 |             translate=self.translate,
 73 |             scale=self.scale,
 74 |             shear=self.shear,
 75 |             perspective=self.perspective,
 76 |             enable_mixup=self.enable_mixup,
 77 |         )
 78 | 
 79 |         self.dataset = dataset
 80 | 
 81 |         if is_distributed:
 82 |             batch_size = batch_size // dist.get_world_size()
 83 | 
 84 |         sampler = InfiniteSampler(
 85 |             len(self.dataset), seed=self.seed if self.seed else 0
 86 |         )
 87 | 
 88 |         batch_sampler = YoloBatchSampler(
 89 |             sampler=sampler,
 90 |             batch_size=batch_size,
 91 |             drop_last=False,
 92 |             input_dimension=self.input_size,
 93 |             mosaic=not no_aug,
 94 |         )
 95 | 
 96 |         dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True}
 97 |         dataloader_kwargs["batch_sampler"] = batch_sampler
 98 |         train_loader = DataLoader(self.dataset, **dataloader_kwargs)
 99 | 
100 |         return train_loader
101 | 
102 |     def get_eval_loader(self, batch_size, is_distributed, testdev=False):
103 |         from yolox.data import MOTDataset,DiffusionValTransform
104 | 
105 |         valdataset = MOTDataset(
106 |             data_dir=os.path.join(get_yolox_datadir(), "mot"),
107 |             json_file=self.val_ann,
108 |             img_size=self.test_size,
109 |             name='train',
110 |             preproc=DiffusionValTransform(
111 |                 rgb_means=(0.485, 0.456, 0.406),
112 |                 std=(0.229, 0.224, 0.225),
113 |                 max_labels=1000, 
114 |             )
115 |         )
116 | 
117 |         if is_distributed:
118 |             batch_size = batch_size // dist.get_world_size()
119 |             sampler = torch.utils.data.distributed.DistributedSampler(
120 |                 valdataset, shuffle=False
121 |             )
122 |         else:
123 |             sampler = torch.utils.data.SequentialSampler(valdataset)
124 | 
125 |         dataloader_kwargs = {
126 |             "num_workers": self.data_num_workers,
127 |             "pin_memory": True,
128 |             "sampler": sampler,
129 |         }
130 |         dataloader_kwargs["batch_size"] = batch_size
131 |         val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs)
132 | 
133 |         return val_loader
134 | 
135 |     def get_evaluator(self, batch_size, is_distributed, testdev=False):
136 |         from yolox.evaluators import COCOEvaluator
137 | 
138 |         val_loader = self.get_eval_loader(batch_size, is_distributed, testdev=testdev)
139 |         evaluator = COCOEvaluator(
140 |             dataloader=val_loader,
141 |             img_size=self.test_size,
142 |             confthre=self.conf_thresh,
143 |             nmsthre3d=self.nms_thresh3d,
144 |             detthre=self.det_thresh,
145 |             nmsthre2d=self.nms_thresh2d,
146 |             num_classes=self.num_classes,
147 |             testdev=testdev,
148 |         )
149 |         return evaluator
150 |     
151 |     def get_model(self):
152 |         from yolox.models import YOLOPAFPN, YOLOX, YOLOXHead
153 |         from diffusion.models.diffusionnet import DiffusionNet,DiffusionHead
154 | 
155 |         def init_yolo(M):
156 |             for m in M.modules():
157 |                 if isinstance(m, nn.BatchNorm2d):
158 |                     m.eps = 1e-3
159 |                     m.momentum = 0.03
160 | 
161 |         if getattr(self, "model", None) is None:
162 |             in_channels = [256, 512, 1024]
163 |             backbone = YOLOPAFPN(self.depth, self.width, in_channels=in_channels)
164 |             for value in backbone.parameters():
165 |                 value.requires_grad=False
166 |             head=DiffusionHead(self.num_classes,self.width)
167 |             self.model = DiffusionNet(backbone, head)
168 | 
169 |         self.model.apply(init_yolo)
170 |         # self.model.head.initialize_biases(1e-2)
171 |         return self.model
172 | 
173 |     def get_optimizer(self, batch_size):
174 |         lr=2.5e-05
175 |         weight_decay = 0.0001
176 |         self.optimizer=AdamW(self.model.parameters(),lr=lr,weight_decay=weight_decay) 
177 |         return self.optimizer
178 | 


--------------------------------------------------------------------------------