├── .gitignore
├── .gitmodules
├── LICENSE
├── NOTICE
├── README.md
├── experiments
├── coco_pose_tracking.sh
├── coco_tracking.sh
├── crowdhuman.sh
├── kitti_fulltrain.sh
├── kitti_half.sh
├── kitti_half_sc.sh
├── mot17_fulltrain.sh
├── mot17_fulltrain_sc.sh
├── mot17_half.sh
├── mot17_half_sc.sh
├── nuScenes_3Ddetection_e140.sh
└── nuScenes_3Dtracking.sh
├── readme
├── DATA.md
├── GETTING_STARTED.md
├── INSTALL.md
├── MODEL_ZOO.md
├── coco_det.gif
├── coco_pose.gif
├── fig2.png
└── nuscenes_3d.gif
├── requirements.txt
├── src
├── _init_paths.py
├── convert_onnx.py
├── demo.py
├── lib
│ ├── dataset
│ │ ├── dataset_factory.py
│ │ ├── datasets
│ │ │ ├── coco.py
│ │ │ ├── coco_hp.py
│ │ │ ├── crowdhuman.py
│ │ │ ├── custom_dataset.py
│ │ │ ├── kitti.py
│ │ │ ├── kitti_tracking.py
│ │ │ ├── mot.py
│ │ │ └── nuscenes.py
│ │ └── generic_dataset.py
│ ├── detector.py
│ ├── external
│ │ ├── .gitignore
│ │ ├── Makefile
│ │ ├── __init__.py
│ │ ├── nms.pyx
│ │ └── setup.py
│ ├── logger.py
│ ├── model
│ │ ├── data_parallel.py
│ │ ├── decode.py
│ │ ├── losses.py
│ │ ├── model.py
│ │ ├── networks
│ │ │ ├── backbones
│ │ │ │ ├── dla.py
│ │ │ │ ├── mobilenet.py
│ │ │ │ └── resnet.py
│ │ │ ├── base_model.py
│ │ │ ├── dla.py
│ │ │ ├── dlav0.py
│ │ │ ├── generic_network.py
│ │ │ ├── necks
│ │ │ │ ├── dlaup.py
│ │ │ │ └── msraup.py
│ │ │ ├── resdcn.py
│ │ │ └── resnet.py
│ │ ├── scatter_gather.py
│ │ └── utils.py
│ ├── opts.py
│ ├── trainer.py
│ └── utils
│ │ ├── __init__.py
│ │ ├── ddd_utils.py
│ │ ├── debugger.py
│ │ ├── image.py
│ │ ├── post_process.py
│ │ ├── tracker.py
│ │ └── utils.py
├── main.py
├── test.py
└── tools
│ ├── _init_paths.py
│ ├── annot_bbox.py
│ ├── convert_crowdhuman_to_coco.py
│ ├── convert_kittitrack_to_coco.py
│ ├── convert_mot_det_to_results.py
│ ├── convert_mot_to_coco.py
│ ├── convert_nuScenes.py
│ ├── eval_kitti_track
│ ├── data
│ │ └── tracking
│ │ │ ├── evaluate_tracking.seqmap
│ │ │ ├── evaluate_tracking.seqmap.test
│ │ │ ├── evaluate_tracking.seqmap.training
│ │ │ ├── evaluate_trackingtrain_1-2.seqmap
│ │ │ ├── evaluate_trackingtrain_2-2.seqmap
│ │ │ ├── evaluate_trackingval_half.seqmap
│ │ │ ├── label_02
│ │ │ ├── 0000.txt
│ │ │ ├── 0001.txt
│ │ │ ├── 0002.txt
│ │ │ ├── 0003.txt
│ │ │ ├── 0004.txt
│ │ │ ├── 0005.txt
│ │ │ ├── 0006.txt
│ │ │ ├── 0007.txt
│ │ │ ├── 0008.txt
│ │ │ ├── 0009.txt
│ │ │ ├── 0010.txt
│ │ │ ├── 0011.txt
│ │ │ ├── 0012.txt
│ │ │ ├── 0013.txt
│ │ │ ├── 0014.txt
│ │ │ ├── 0015.txt
│ │ │ ├── 0016.txt
│ │ │ ├── 0017.txt
│ │ │ ├── 0018.txt
│ │ │ ├── 0019.txt
│ │ │ └── 0020.txt
│ │ │ ├── label_02_train_half
│ │ │ ├── 0000.txt
│ │ │ ├── 0001.txt
│ │ │ ├── 0002.txt
│ │ │ ├── 0003.txt
│ │ │ ├── 0004.txt
│ │ │ ├── 0005.txt
│ │ │ ├── 0006.txt
│ │ │ ├── 0007.txt
│ │ │ ├── 0008.txt
│ │ │ ├── 0009.txt
│ │ │ ├── 0010.txt
│ │ │ ├── 0011.txt
│ │ │ ├── 0012.txt
│ │ │ ├── 0013.txt
│ │ │ ├── 0014.txt
│ │ │ ├── 0015.txt
│ │ │ ├── 0016.txt
│ │ │ ├── 0017.txt
│ │ │ ├── 0018.txt
│ │ │ ├── 0019.txt
│ │ │ └── 0020.txt
│ │ │ └── label_02_val_half
│ │ │ ├── 0000.txt
│ │ │ ├── 0001.txt
│ │ │ ├── 0002.txt
│ │ │ ├── 0003.txt
│ │ │ ├── 0004.txt
│ │ │ ├── 0005.txt
│ │ │ ├── 0006.txt
│ │ │ ├── 0007.txt
│ │ │ ├── 0008.txt
│ │ │ ├── 0009.txt
│ │ │ ├── 0010.txt
│ │ │ ├── 0011.txt
│ │ │ ├── 0012.txt
│ │ │ ├── 0013.txt
│ │ │ ├── 0014.txt
│ │ │ ├── 0015.txt
│ │ │ ├── 0016.txt
│ │ │ ├── 0017.txt
│ │ │ ├── 0018.txt
│ │ │ ├── 0019.txt
│ │ │ └── 0020.txt
│ ├── evaluate_tracking.py
│ ├── mailpy.py
│ └── munkres.py
│ ├── eval_motchallenge.py
│ ├── get_mot_17.sh
│ ├── nuScenes_lib
│ ├── export_kitti.py
│ └── utils_kitti.py
│ ├── remove_optimizers.py
│ ├── vis_tracking_kitti.py
│ └── vis_tracking_mot.py
└── videos
└── nuscenes_mini.mp4
/.gitignore:
--------------------------------------------------------------------------------
1 | videos/
2 | *.zip
3 | centernet_models/*
4 | centertrack_models/*
5 | */slurm-*.out
6 | src/slurm/
7 | results/*
8 | src/lib/models/networks/DCNv2
9 | src/lib/models/networks/DCNv2_04
10 | src/lib/models/networks/DCNv2_10
11 | src/lib/model/networks/DCNv2
12 | src/lib/model/networks/DCNv2_04
13 | src/lib/model/networks/DCNv2_10
14 | .idea/
15 | legacy/*
16 | models/*
17 | .DS_Store
18 | debug/*
19 | *.DS_Store
20 | data
21 | !src/tools/eval_kitti_track/data
22 | exp
23 | exp/*
24 | *.json
25 | *.mat
26 | models/*
27 | model/*
28 | src/.vscode/*
29 | src/paths.py
30 | preds/*
31 | *.h5
32 | *.pth
33 | *.checkpoint
34 | # Byte-compiled / optimized / DLL files
35 | __pycache__/
36 | *.py[cod]
37 | *$py.class
38 |
39 | # C extensions
40 | *.so
41 |
42 | # Distribution / packaging
43 | .Python
44 | env/
45 | build/
46 | develop-eggs/
47 | dist/
48 | downloads/
49 | eggs/
50 | .eggs/
51 | lib64/
52 | parts/
53 | sdist/
54 | var/
55 | wheels/
56 | *.egg-info/
57 | .installed.cfg
58 | *.egg
59 |
60 | # PyInstaller
61 | # Usually these files are written by a python script from a template
62 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
63 | *.manifest
64 | *.spec
65 |
66 | # Installer logs
67 | pip-log.txt
68 | pip-delete-this-directory.txt
69 |
70 | # Unit test / coverage reports
71 | htmlcov/
72 | .tox/
73 | .coverage
74 | .coverage.*
75 | .cache
76 | nosetests.xml
77 | coverage.xml
78 | *.cover
79 | .hypothesis/
80 |
81 | # Translations
82 | *.mo
83 | *.pot
84 |
85 | # Django stuff:
86 | *.log
87 | local_settings.py
88 |
89 | # Flask stuff:
90 | instance/
91 | .webassets-cache
92 |
93 | # Scrapy stuff:
94 | .scrapy
95 |
96 | # Sphinx documentation
97 | docs/_build/
98 |
99 | # PyBuilder
100 | target/
101 |
102 | # Jupyter Notebook
103 | .ipynb_checkpoints
104 |
105 | # pyenv
106 | .python-version
107 |
108 | # celery beat schedule file
109 | celerybeat-schedule
110 |
111 | # SageMath parsed files
112 | *.sage.py
113 |
114 | # dotenv
115 | .env
116 |
117 | # virtualenv
118 | .venv
119 | venv/
120 | ENV/
121 |
122 | # Spyder project settings
123 | .spyderproject
124 | .spyproject
125 |
126 | # Rope project settings
127 | .ropeproject
128 |
129 | # mkdocs documentation
130 | /site
131 |
132 | # mypy
133 | .mypy_cache/
134 |
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "nuscenes-devkit"]
2 | path = src/tools/nuscenes-devkit
3 | url = https://github.com/nutonomy/nuscenes-devkit
4 | branch = master
5 |
6 | [submodule "nuscenes-devkit-alpha02"]
7 | path = src/tools/nuscenes-devkit-alpha02
8 | url = https://github.com/nutonomy/nuscenes-devkit
9 | branch = e2d8c4b331567dc0bc36271dc21cdef65970eb7e
10 |
11 | [submodule "DCN-v2"]
12 | path = src/lib/model/networks/DCNv2
13 | url = https://github.com/CharlesShang/DCNv2/
14 | branch = master
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 Xingyi Zhou
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Tracking Objects as Points
2 | Simultaneous object detection and tracking using center points:
3 | 
4 | > [**Tracking Objects as Points**](http://arxiv.org/abs/2004.01177),
5 | > Xingyi Zhou, Vladlen Koltun, Philipp Krähenbühl,
6 | > *arXiv technical report ([arXiv 2004.01177](http://arxiv.org/abs/2004.01177))*
7 |
8 |
9 | @article{zhou2020tracking,
10 | title={Tracking Objects as Points},
11 | author={Zhou, Xingyi and Koltun, Vladlen and Kr{\"a}henb{\"u}hl, Philipp},
12 | journal={ECCV},
13 | year={2020}
14 | }
15 |
16 | Contact: [zhouxy2017@gmail.com](mailto:zhouxy2017@gmail.com). Any questions or discussion are welcome!
17 |
18 | ## Abstract
19 | Tracking has traditionally been the art of following interest points through space and time. This changed with the rise of powerful deep networks. Nowadays, tracking is dominated by pipelines that perform object detection followed by temporal association, also known as tracking-by-detection. In this paper, we present a simultaneous detection and tracking algorithm that is simpler, faster, and more accurate than the state of the art. Our tracker, CenterTrack, applies a detection model to a pair of images and detections from the prior frame. Given this minimal input, CenterTrack localizes objects and predicts their associations with the previous frame. That's it. CenterTrack is simple, online (no peeking into the future), and real-time. It achieves 67.3% MOTA on the MOT17 challenge at 22 FPS and 89.4% MOTA on the KITTI tracking benchmark at 15 FPS, setting a new state of the art on both datasets. CenterTrack is easily extended to monocular 3D tracking by regressing additional 3D attributes. Using monocular video input, it achieves 28.3% AMOTA@0.2 on the newly released nuScenes 3D tracking benchmark, substantially outperforming the monocular baseline on this benchmark while running at 28 FPS.
20 |
21 |
22 | ## Features at a glance
23 |
24 | - One-sentence method summary: Our model takes the current frame, the previous frame, and a heatmap rendered from previous tracking results as input, and predicts the current detection heatmap as well as their offsets to centers in the previous frame.
25 |
26 | - The model can be trained on still **image datasets** if videos are not available.
27 |
28 | - Easily extends to monocular 3d object tracking, multi-category tracking, and pose tracking.
29 |
30 | - State-of-the-art performance on MOT17, KITTI, and nuScenes monocular tracking benchmarks.
31 |
32 | ## Main results
33 |
34 | ### Pedestrian tracking on MOT17 test set
35 |
36 | | Detection | MOTA | FPS |
37 | |--------------|-----------|--------|
38 | |Public | 61.5 | 22 |
39 | |Private | 67.8 | 22 |
40 |
41 | ### 2D vehicle tracking on KITTI test set (with flip test)
42 |
43 | | MOTA | FPS |
44 | |-------------|--------|
45 | | 89.44 | 15 |
46 |
47 | ### 3D tracking on nuScenes test set
48 |
49 | | AMOTA @ 0.2 | AMOTA | FPS |
50 | |---------------|---------|--------|
51 | | 27.8 | 4.6 | 28 |
52 |
53 | Besides benchmark evaluation, we also provide models for 80-category tracking and pose tracking trained on COCO. See the sample visual results below (Video files from [openpose](https://github.com/CMU-Perceptual-Computing-Lab/openpose) and [YOLO](https://pjreddie.com/darknet/yolov2/)).
54 |
55 |
56 |
57 |
58 |
59 | All models and details are available in our [Model zoo](readme/MODEL_ZOO.md).
60 |
61 | ## Installation
62 |
63 | Please refer to [INSTALL.md](readme/INSTALL.md) for installation instructions.
64 |
65 | ## Use CenterTrack
66 |
67 | We support demo for videos, webcam, and image folders.
68 |
69 | First, download the models (By default, [nuscenes\_3d\_tracking](https://drive.google.com/file/d/1gPQFzqneDtT_PjJRRuyskRsNTRHXovw1) for monocular 3D tracking, [coco_tracking](https://drive.google.com/file/d/11DEfWa0TKYzNqY3CXR51WVvjMb4oRl08) for 80-category detection and
70 | [coco_pose_tracking](https://drive.google.com/file/d/1yGFC_Q9wzSHL1d4eZW_44EBB2H42YKYt) for pose tracking)
71 | from the [Model zoo](readme/MODEL_ZOO.md) and put them in `CenterNet_ROOT/models/`.
72 |
73 | We provide a video clip from the [nuScenes dataset](https://www.nuscenes.org/?externalData=all&mapData=all&modalities=Any) in `videos/nuscenes_mini.mp4`.
74 | To test monocular 3D tracking on this video, run
75 |
76 | ~~~
77 | python demo.py tracking,ddd --load_model ../models/nuScenes_3Dtracking.pth --dataset nuscenes --pre_hm --track_thresh 0.1 --demo ../videos/nuscenes_mini.mp4 --test_focal_length 633
78 | ~~~
79 |
80 | You will need to specify `test_focal_length` for monocular 3D tracking demo to convert the image coordinate system back to 3D.
81 | The value `633` is half of a typical focal length (`~1266`) in nuScenes dataset in input resolution `1600x900`.
82 | The mini demo video is in an input resolution of `800x448`, so we need to use a half focal length.
83 | You don't need to set the `test_focal_length` when testing on the original nuScenes data.
84 |
85 | If setup correctly, you will see an output video like:
86 |
87 |
88 |
89 |
90 | Similarly, for 80-category tracking on images/ video, run:
91 |
92 | ~~~
93 | python demo.py tracking --load_model ../models/coco_tracking.pth --demo /path/to/image/or/folder/or/video
94 | ~~~
95 |
96 | If you want to test with person tracking models, you need to add `--num_class 1`:
97 |
98 | ~~~
99 | python demo.py tracking --load_model ../models/mot17_half.pth --num_class 1 --demo /path/to/image/or/folder/or/video
100 | ~~~
101 |
102 | For webcam demo, run
103 |
104 | ~~~
105 | python demo.py tracking --load_model ../models/coco_tracking.pth --demo webcam
106 | ~~~
107 |
108 | For monocular 3D tracking, run
109 |
110 | ~~~
111 | python demo.py tracking,ddd --demo webcam --load_model ../models/coco_tracking.pth --demo /path/to/image/or/folder/or/video/or/webcam
112 | ~~~
113 |
114 | Similarly, for pose tracking, run:
115 |
116 | ~~~
117 | python demo.py tracking,multi_pose --load_model ../models/coco_pose.pth --demo /path/to/image/or/folder/or/video/or/webcam
118 | ~~~
119 | The result for the example images should look like:
120 |
121 | You can add `--debug 2` to visualize the heatmap and offset predictions.
122 |
123 | To use this CenterTrack in your own project, you can
124 |
125 | ~~~
126 | import sys
127 | CENTERTRACK_PATH = /path/to/CenterTrack/src/lib/
128 | sys.path.insert(0, CENTERTRACK_PATH)
129 |
130 | from detector import Detector
131 | from opts import opts
132 |
133 | MODEL_PATH = /path/to/model
134 | TASK = 'tracking' # or 'tracking,multi_pose' for pose tracking and 'tracking,ddd' for monocular 3d tracking
135 | opt = opts().init('{} --load_model {}'.format(TASK, MODEL_PATH).split(' '))
136 | detector = Detector(opt)
137 |
138 | images = ['''image read from open cv or from a video''']
139 | for img in images:
140 | ret = detector.run(img)['results']
141 | ~~~
142 | Each `ret` will be a list dict: `[{'bbox': [x1, y1, x2, y2], 'tracking_id': id, ...}]`
143 |
144 | ## Training on custom dataset
145 |
146 | If you want to train CenterTrack on your own dataset, you can use `--dataset custom` and manually specify the annotation file, image path, input resolutions, and number of categories. You still need to create the annotation files in COCO format (referring to the many `convert_X_to_coco.py` examples in `tools`). For example, you can use the following command to train on our [mot17 experiment](experiments/mot17_half_sc.sh) without using the pre-defined mot dataset file:
147 |
148 | ~~~
149 | python main.py tracking --exp_id mot17_half_sc --dataset custom --custom_dataset_ann_path ../data/mot17/annotations/train_half.json --custom_dataset_img_path ../data/mot17/train/ --input_h 544 --input_w 960 --num_classes 1 --pre_hm --ltrb_amodal --same_aug --hm_disturb 0.05 --lost_disturb 0.4 --fp_disturb 0.1 --gpus 0,1
150 |
151 | ~~~
152 |
153 | ## Benchmark Evaluation and Training
154 |
155 | After [installation](readme/INSTALL.md), follow the instructions in [DATA.md](readme/DATA.md) to setup the datasets. Then check [GETTING_STARTED.md](readme/GETTING_STARTED.md) to reproduce the results in the paper.
156 | We provide scripts for all the experiments in the [experiments](experiments) folder.
157 |
158 | ## License
159 |
160 | CenterTrack is developed upon [CenterNet](https://github.com/xingyizhou/CenterNet). Both codebases are released under MIT License themselves. Some code of CenterNet are from third-parties with different licenses, please check the CenterNet repo for details. In addition, this repo uses [py-motmetrics](https://github.com/cheind/py-motmetrics) for MOT evaluation and [nuscenes-devkit](https://github.com/nutonomy/nuscenes-devkit) for nuScenes evaluation and preprocessing. See [NOTICE](NOTICE) for detail. Please note the licenses of each dataset. Most of the datasets we used in this project are under non-commercial licenses.
161 |
162 |
--------------------------------------------------------------------------------
/experiments/coco_pose_tracking.sh:
--------------------------------------------------------------------------------
1 | cd src
2 | # train, the model is finetuned from a CenterNet detection model from the CenterNet model zoo.
3 | python main.py tracking,multi_pose --exp_id coco_pose_tracking --dataset coco_hp --load_model ../models/multi_pose_dla_3x.pth --gpus 0,1,2,3,4,5,6,7 --batch_size 128 --lr 5e-4 --num_workers 16 --pre_hm --shift 0.05 --scale 0.05 --hm_disturb 0.05 --lost_disturb 0.4 --fp_disturb 0.1
--------------------------------------------------------------------------------
/experiments/coco_tracking.sh:
--------------------------------------------------------------------------------
1 | cd src
2 | # train, the model is finetuned from a CenterNet detection model from the CenterNet model zoo.
3 | python main.py tracking --exp_id coco_tracking --tracking --load_model ../models/ctdet_coco_dla_2x.pth --gpus 0,1,2,3,4,5,6,7 --batch_size 128 --lr 5e-4 --num_workers 16 --pre_hm --shift 0.05 --scale 0.05 --hm_disturb 0.05 --lost_disturb 0.4 --fp_disturb 0.1
--------------------------------------------------------------------------------
/experiments/crowdhuman.sh:
--------------------------------------------------------------------------------
1 | cd src
2 | # train
3 | python main.py tracking --exp_id crowdhuman --dataset crowdhuman --ltrb_amodal --pre_hm --shift 0.05 --scale 0.05 --hm_disturb 0.05 --lost_disturb 0.4 --fp_disturb 0.1 --num_epochs 140 --lr_step 90,120 --save_point 60,90 --gpus 0,1,2,3 --batch_size 64 --lr 2.5e-4 --num_workers 16
4 | cd ..
--------------------------------------------------------------------------------
/experiments/kitti_fulltrain.sh:
--------------------------------------------------------------------------------
1 | cd src
2 | # train
3 | python main.py tracking --exp_id kitti_fulltrain --dataset kitti_tracking --dataset_version train --pre_hm --same_aug --hm_disturb 0.05 --lost_disturb 0.2 --fp_disturb 0.1 --gpus 0,1 --batch_size 16 --load_model ../models/nuScenes_3Ddetection_e140.pth
4 | # test
5 | python test.py tracking --exp_id kitti_fulltrain --dataset kitti_tracking --dataset_version test --pre_hm --track_thresh 0.4 --resume
6 |
--------------------------------------------------------------------------------
/experiments/kitti_half.sh:
--------------------------------------------------------------------------------
1 | cd src
2 | # train
3 | python main.py tracking --exp_id kitti_half --dataset kitti_tracking --dataset_version train_half --pre_hm --same_aug --hm_disturb 0.05 --lost_disturb 0.2 --fp_disturb 0.1 --gpus 0,1 --batch_size 16 --load_model ../models/nuScenes_3Ddetection_e140.pth
4 | # test
5 | python test.py tracking --exp_id kitti_half --dataset kitti_tracking --dataset_version val_half --pre_hm --track_thresh 0.4 --resume
--------------------------------------------------------------------------------
/experiments/kitti_half_sc.sh:
--------------------------------------------------------------------------------
1 | cd src
2 | # train
3 | python main.py tracking --exp_id kitti_half_sc --dataset kitti_tracking --dataset_version train_half --pre_hm --same_aug --hm_disturb 0.05 --lost_disturb 0.2 --fp_disturb 0.1 --gpus 0,1 --batch_size 16
4 | # test
5 | python test.py tracking --exp_id kitti_half_sc --dataset kitti_tracking --dataset_version val_half --pre_hm --track_thresh 0.4 --pre_thresh 0.5 --resume
--------------------------------------------------------------------------------
/experiments/mot17_fulltrain.sh:
--------------------------------------------------------------------------------
1 | cd src
2 | # train
3 | python main.py tracking --exp_id mot17_fulltrain --dataset mot --dataset_version 17trainval --pre_hm --ltrb_amodal --same_aug --hm_disturb 0.05 --lost_disturb 0.4 --fp_disturb 0.1 --gpus 0,1 --load_model ../models/crowdhuman.pth
4 | # test
5 | python test.py tracking --exp_id mot17_fulltrain --dataset mot --dataset_version 17test --pre_hm --ltrb_amodal --track_thresh 0.4 --pre_thresh 0.5 --resume
6 | cd ..
--------------------------------------------------------------------------------
/experiments/mot17_fulltrain_sc.sh:
--------------------------------------------------------------------------------
1 | cd src
2 | # train
3 | python main.py tracking --exp_id mot17_fulltrain_sc --dataset mot --dataset_version 17trainval --pre_hm --ltrb_amodal --same_aug --hm_disturb 0.05 --lost_disturb 0.4 --fp_disturb 0.1 --gpus 0,1
4 | # test
5 | python test.py tracking --exp_id mot17_fulltrain_sc --dataset mot --dataset_version 17test --pre_hm --ltrb_amodal --track_thresh 0.4 --pre_thresh 0.5 --resume
6 | # test with public detection
7 | python test.py tracking --exp_id mot17_fulltrain_sc --dataset mot --dataset_version 17test --pre_hm --ltrb_amodal --track_thresh 0.4 --pre_thresh 0.5 --resume --public_det --load_results ../data/mot17/results/test_det.json
8 | cd ..
--------------------------------------------------------------------------------
/experiments/mot17_half.sh:
--------------------------------------------------------------------------------
1 | cd src
2 | # train
3 | python main.py tracking --exp_id mot17_half --dataset mot --dataset_version 17halftrain --pre_hm --ltrb_amodal --same_aug --hm_disturb 0.05 --lost_disturb 0.4 --fp_disturb 0.1 --gpus 0,1 --load_model ../models/crowdhuman.pth
4 | # test
5 | python test.py tracking --exp_id mot17_half --dataset mot --dataset_version 17halfval --pre_hm --ltrb_amodal --track_thresh 0.4 --pre_thresh 0.5 --resume
6 | # test with public detection
7 | python test.py tracking --exp_id mot17_half --dataset mot --dataset_version 17halfval --pre_hm --ltrb_amodal --track_thresh 0.4 --pre_thresh 0.5 --resume --public_det --load_results ../data/mot17/results/val_half_det.json
8 | cd ..
--------------------------------------------------------------------------------
/experiments/mot17_half_sc.sh:
--------------------------------------------------------------------------------
1 | cd src
2 | # train
3 | python main.py tracking --exp_id mot17_half_sc --dataset mot --dataset_version 17halftrain --pre_hm --ltrb_amodal --same_aug --hm_disturb 0.05 --lost_disturb 0.4 --fp_disturb 0.1 --gpus 0,1
4 | # test
5 | python test.py tracking --exp_id mot17_half_sc --dataset mot --dataset_version 17halfval --pre_hm --ltrb_amodal --track_thresh 0.4 --pre_thresh 0.5 --resume
6 | cd ..
--------------------------------------------------------------------------------
/experiments/nuScenes_3Ddetection_e140.sh:
--------------------------------------------------------------------------------
1 | cd src
2 | # train
3 | python main.py ddd --exp_id nuScenes_3Ddetection_e140 --dataset nuscenes --batch_size 128 --gpus 0,1,2,3,4,5,6,7 --lr 5e-4 --num_epochs 140 --lr_step 90,120 --save_point 90,120
4 | # test
5 | python test.py ddd --exp_id nuScenes_3Ddetection_e140 --dataset nuscenes --resume
6 | cd ..
--------------------------------------------------------------------------------
/experiments/nuScenes_3Dtracking.sh:
--------------------------------------------------------------------------------
1 | cd src
2 | # train
3 | python main.py tracking,ddd --exp_id nuScenes_3Dtracking --dataset nuscenes --pre_hm --load_model ../models/nuScenes_3Ddetection_e140.pth --shift 0.01 --scale 0.05 --lost_disturb 0.4 --fp_disturb 0.1 --hm_disturb 0.05 --batch_size 64 --gpus 0,1,2,3 --lr 2.5e-4 --save_point 60
4 | # test
5 | python test.py tracking,ddd --exp_id nuScenes_3Dtracking --dataset nuscenes --pre_hm --track_thresh 0.1 --resume
6 | cd ..
--------------------------------------------------------------------------------
/readme/DATA.md:
--------------------------------------------------------------------------------
1 | # Dataset preparation
2 |
3 | If you want to reproduce the results in the paper for benchmark evaluation or training, you will need to setup datasets.
4 |
5 | ### MOT 2017
6 |
7 | MOT is is used to train and evaluate the system. We will only use the training set (and create a validation set from it) for developing this project.
8 |
9 | We have packed the dataset preprocessing code as a script.
10 |
11 | ~~~
12 | cd $CenterTrack_ROOT/tools/
13 | bash get_mot_17.sh
14 | ~~~
15 |
16 | The script includes:
17 |
18 | - Download and unzip the dataset from [MOT17 website](https://motchallenge.net/data/MOT17/).
19 | - Convert it into COCO format using `tools/convert_mot_to_coco.py`.
20 | - Create the half-half train/ val set described in the paper.
21 | - Convert the public detection into a specific format.
22 | - The output data structure should be:
23 |
24 | ~~~
25 | ${CenterTrack_ROOT}
26 | |-- data
27 | `-- |-- mot17
28 | `-- |--- train
29 | | |--- MOT17-02-FRCNN
30 | | | |--- img1
31 | | | |--- gt
32 | | | | |--- gt.txt
33 | | | | |--- gt_train_half.txt
34 | | | | |--- gt_val_half.txt
35 | | | |--- det
36 | | | | |--- det.txt
37 | | | | |--- det_train_half.txt
38 | | | | |--- det_val_half.txt
39 | | |--- ...
40 | |--- test
41 | | |--- MOT17-01-FRCNN
42 | |---|--- ...
43 | `---| annotations
44 | |--- train_half.json
45 | |--- val_half.json
46 | |--- train.json
47 | `--- test.json
48 | ~~~
49 |
50 | ### KITTI Tracking
51 |
52 | We use KITTI Tracking to train and evaluate the system as well. Again, we will only use the training set (and create a validation set from it) for developing this project. Note that KITTI Tracking is 2D tracking and is different from KITTI detection (they use the same image, but different train/ val set).
53 |
54 | - Download [images](http://www.cvlibs.net/download.php?file=data_tracking_image_2.zip), [annotations](http://www.cvlibs.net/download.php?file=data_tracking_label_2.zip), and [calibration information](http://www.cvlibs.net/download.php?file=data_tracking_calib.zip) (not used in 2D tracking, only if you want to demo 3D detection/ tracking) from [KITTI Tracking website](http://www.cvlibs.net/datasets/kitti/eval_tracking.php) and unzip. Place or symlink the data as below:
55 |
56 | ~~~
57 | ${CenterTrack_ROOT}
58 | |-- data
59 | `-- |-- kitti_tracking
60 | `-- |-- data_tracking_image_2
61 | | |-- training
62 | | |-- |-- image_02
63 | | |-- |-- |-- 0000
64 | | |-- |-- |-- ...
65 | |-- |-- testing
66 | |-- label_02
67 | | |-- 0000.txt
68 | | |-- ...
69 | `-- data_tracking_calib
70 | ~~~
71 |
72 | - Run `python convert_kitti_to_coco.py` in `tools` to convert the annotation into COCO format.
73 | - The resulting data structure should look like:
74 |
75 | ~~~
76 | ${CenterTrack_ROOT}
77 | |-- data
78 | `-- |-- kitti_tracking
79 | `-- |-- data_tracking_image_2
80 | | |-- training
81 | | | |-- image_02
82 | | | | |-- 0000
83 | | | | |-- ...
84 | |-- |-- testing
85 | |-- label_02
86 | | |-- 0000.txt
87 | | |-- ...
88 | |-- data_tracking_calib
89 | |-- label_02_val_half
90 | | |-- 0000.txt
91 | | |-- ...
92 | |-- label_02_train_half
93 | | |-- 0000.txt
94 | | |-- ...
95 | `-- annotations
96 | |-- tracking_train.json
97 | |-- tracking_test.json
98 | |-- tracking_train_half.json
99 | `-- tracking_val_half.json
100 | ~~~
101 |
102 | ### nuScenes
103 |
104 | nuScenes is used for training and evaluating 3D object tracking. We also used nuScenes for pretraining KITTI models.
105 |
106 |
107 | - Download the dataset from [nuScenes website](https://www.nuscenes.org/download). You only need to download the "Keyframe blobs", and only need the images data. You also need to download the maps and all metadata to make the nuScenes API happy.
108 |
109 |
110 | - Unzip, rename, and place (or symlink) the data as below. You will need to merge folders from different zip files.
111 |
112 | ~~~
113 | ${CenterTrack_ROOT}
114 | |-- data
115 | `-- |-- nuscenes
116 | `-- |-- v1.0-trainval
117 | | |-- samples
118 | | | |-- CAM_BACK
119 | | | | | -- xxx.jpg
120 | | | |-- CAM_BACK_LEFT
121 | | | |-- CAM_BACK_RIGHT
122 | | | |-- CAM_FRONT
123 | | | |-- CAM_FRONT_LEFT
124 | | | |-- CAM_FRONT_RIGHT
125 | |-- |-- maps
126 | `-- |-- v1.0-trainval_meta
127 | ~~~
128 |
129 | - Run `python convert_kitti_to_coco.py` in `tools` to convert the annotation into COCO format. It will create `train.json`, `val.json`, `test.json` under `data/nuscenes/annotations`. nuScenes API is required for running the data preprocessing.
130 |
131 | ### CrowdHuman
132 |
133 | CrowdHuman is used for pretraining the MOT model. Only the training set is used.
134 |
135 | - Download the dataset from [its website](https://www.crowdhuman.org/download.html).
136 |
137 | - Unzip and place (or symlink) the data as below. You will need to merge folders from different zip files.
138 |
139 | ~~~
140 | ${CenterTrack_ROOT}
141 | |-- data
142 | `-- |-- crowdhuman
143 | |-- |-- CrowdHuman_train
144 | | | |-- Images
145 | |-- |-- CrowdHuman_val
146 | | | |-- Images
147 | |-- |-- annotation_train.odgt
148 | |-- |-- annotation_val.odgt
149 | ~~~
150 |
151 | - Run `python convert_kitti_to_coco.py` in `tools` to convert the annotation into COCO format. It will create `train.json`, `val.json` under `data/crowdhuman/annotations`.
152 |
153 | ### COCO
154 |
155 | COCO is used to train a demo system for 80-category tracking or pose tracking.
156 | The models are NOT evaluated in any benchmarks.
157 |
158 | - Download the images (2017 Train, 2017 Val, 2017 Test) from [coco website](http://cocodataset.org/#download).
159 | - Download annotation files (2017 train/val and test image info) from [coco website](http://cocodataset.org/#download).
160 | - Place the data (or create symlinks) to make the data folder like:
161 |
162 | ~~~
163 | ${CenterTrack_ROOT}
164 | |-- data
165 | `-- |-- coco
166 | `-- |-- annotations
167 | | |-- instances_train2017.json
168 | | |-- instances_val2017.json
169 | | |-- person_keypoints_train2017.json
170 | | |-- person_keypoints_val2017.json
171 | | |-- image_info_test-dev2017.json
172 | |---|-- train2017
173 | |---|-- val2017
174 | `---|-- test2017
175 | ~~~
176 |
177 |
178 | ## References
179 | Please cite the corresponding References if you use the datasets.
180 |
181 | ~~~
182 | @article{MOT16,
183 | title = {{MOT}16: {A} Benchmark for Multi-Object Tracking},
184 | shorttitle = {MOT16},
185 | url = {http://arxiv.org/abs/1603.00831},
186 | journal = {arXiv:1603.00831 [cs]},
187 | author = {Milan, A. and Leal-Taix\'{e}, L. and Reid, I. and Roth, S. and Schindler, K.},
188 | month = mar,
189 | year = {2016},
190 | note = {arXiv: 1603.00831},
191 | keywords = {Computer Science - Computer Vision and Pattern Recognition}
192 | }
193 |
194 | @article{shao2018crowdhuman,
195 | title={Crowdhuman: A benchmark for detecting human in a crowd},
196 | author={Shao, Shuai and Zhao, Zijian and Li, Boxun and Xiao, Tete and Yu, Gang and Zhang, Xiangyu and Sun, Jian},
197 | journal={arXiv:1805.00123},
198 | year={2018}
199 | }
200 |
201 | @INPROCEEDINGS{Geiger2012CVPR,
202 | author = {Andreas Geiger and Philip Lenz and Raquel Urtasun},
203 | title = {Are we ready for Autonomous Driving? The KITTI Vision Benchmark Suite},
204 | booktitle = {CVPR},
205 | year = {2012}
206 | }
207 |
208 | @inproceedings{lin2014microsoft,
209 | title={Microsoft {COCO}: Common objects in context},
210 | author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
211 | booktitle={ECCV},
212 | year={2014},
213 | }
214 |
215 | @inproceedings{nuscenes2019,
216 | title={{nuScenes}: A multimodal dataset for autonomous driving},
217 | author={Holger Caesar and Varun Bankiti and Alex H. Lang and Sourabh Vora and Venice Erin Liong and Qiang Xu and Anush Krishnan and Yu Pan and Giancarlo Baldan and Oscar Beijbom},
218 | booktitle={CVPR},
219 | year={2020}
220 | }
221 | ~~~
--------------------------------------------------------------------------------
/readme/GETTING_STARTED.md:
--------------------------------------------------------------------------------
1 | # Getting Started
2 |
3 | This document provides tutorials to train and evaluate CenterTrack. Before getting started, make sure you have finished [installation](INSTALL.md) and [dataset setup](DATA.md).
4 |
5 | ## Benchmark evaluation
6 |
7 | First, download the models you want to evaluate from our [model zoo](MODEL_ZOO.md) and put them in `CenterTrack_ROOT/models/`.
8 |
9 | ### MOT17
10 |
11 | To test the tracking performance on MOT17 with our pretrained model, run
12 |
13 | ~~~
14 | python test.py tracking --exp_id mot17_half --dataset mot --dataset_version 17halfval --pre_hm --ltrb_amodal --track_thresh 0.4 --pre_thresh 0.5 --load_model ../models/mot17_half.pth
15 | ~~~
16 |
17 | This will give a MOTA of `66.1` if set up correctly. `--pre_hm` is to enable the input heatmap. `--ltrb_amodal` is to use the left, top, right, bottom bounding box representation to enable detecting out-of-image bounding box (We observed this is important for MOT datasets). And `--track_thresh` and `--pre_thresh` are the score threshold for predicting a bounding box ($\theta$ in the paper) and feeding the heatmap to the next frame ($\tau$ in the paper), respectively.
18 |
19 | To test with public detection, run
20 |
21 | ~~~
22 | python test.py tracking --exp_id mot17_half_public --dataset mot --dataset_version 17halfval --pre_hm --ltrb_amodal --track_thresh 0.4 --pre_thresh 0.5 --load_model ../models/mot17_half.pth --public_det --load_results ../data/mot17/results/val_half_det.json
23 | ~~~
24 |
25 | The expected MOTA is `63.1`.
26 |
27 | To test on the test set, run
28 |
29 | ~~~
30 | python test.py tracking --exp_id mot17_fulltrain_public --dataset mot --dataset_version 17test --pre_hm --ltrb_amodal --track_thresh 0.4 --pre_thresh 0.5 --load_model ../models/mot17_fulltrain_sc.pth --public_det --load_results ../data/mot17/results/test_det.json
31 | ~~~
32 |
33 | The Test set evaluation requires submitting to the official test server.
34 | We discourage the users to submit our predictions to the test set to prevent test set abuse.
35 | You can append `--debug 2` to above commends to visualize the predictions.
36 |
37 | See the experiments folder for testing in other settings.
38 |
39 |
40 | ### KITTI Tracking
41 |
42 | Run:
43 |
44 | ~~~
45 | python test.py tracking --exp_id kitti_half --dataset kitti_tracking --dataset_version val_half --pre_hm --track_thresh 0.4 --load_model ../models/kitti_half.pth
46 | ~~~
47 |
48 | The expected MOTA is `88.7`.
49 |
50 | ### nuScenes
51 |
52 | Run:
53 |
54 | ~~~
55 | python test.py tracking,ddd --exp_id nuScenes_3Dtracking --load_model ../models/nuScenes_3Dtracking.pth --dataset nuscenes --track_thresh 0.1 --pre_hm
56 | ~~~
57 |
58 | The expected AMOTA is `6.8`.
59 |
60 | ## Training
61 | We have packed all the training scripts in the [experiments](../experiments) folder.
62 | The experiment names correspond to the model name in the [model zoo](MODEL_ZOO.md).
63 | The number of GPUs for each experiment can be found in the scripts and the model zoo.
64 | If the training is terminated before finishing, you can use the same command with `--resume` to resume training. It will found the latest model with the same `exp_id`.
65 | Some experiments rely on pretraining on another model. In this case, download the pretrained model from our model zoo or train that model first.
--------------------------------------------------------------------------------
/readme/INSTALL.md:
--------------------------------------------------------------------------------
1 | # Installation
2 |
3 |
4 | The code was tested on Ubuntu 16.04, with [Anaconda](https://www.anaconda.com/download) Python 3.6, CUDA 10.0, and [PyTorch]((http://pytorch.org/)) v1.0.
5 | It should be compatible with PyTorch <=1.4 and python >=0.4 (you will need to switch DCNv2 version for PyTorch <1.0).
6 | After installing Anaconda:
7 |
8 | 0. [Optional but highly recommended] create a new conda environment.
9 |
10 | ~~~
11 | conda create --name CenterTrack python=3.6
12 | ~~~
13 | And activate the environment.
14 |
15 | ~~~
16 | conda activate CenterTrack
17 | ~~~
18 |
19 | 1. Install PyTorch:
20 |
21 | ~~~
22 | conda install pytorch torchvision -c pytorch
23 | ~~~
24 |
25 |
26 | 2. Install [COCOAPI](https://github.com/cocodataset/cocoapi):
27 |
28 | ~~~
29 | pip install cython; pip install -U 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'
30 | ~~~
31 |
32 | 3. Clone this repo:
33 |
34 | ~~~
35 | CenterTrack_ROOT=/path/to/clone/CenterTrack
36 | git clone --recursive https://github.com/xingyizhou/CenterTrack $CenterTrack_ROOT
37 | ~~~
38 |
39 | You can manually install the [submodules](../.gitmodules) if you forget `--recursive`.
40 |
41 | 4. Install the requirements
42 |
43 | ~~~
44 | pip install -r requirements.txt
45 | ~~~
46 |
47 |
48 | 5. Compile deformable convolutional (from [DCNv2](https://github.com/CharlesShang/DCNv2/)).
49 |
50 | ~~~
51 | cd $CenterTrack_ROOT/src/lib/model/networks/
52 | # git clone https://github.com/CharlesShang/DCNv2/ # clone if it is not automatically downloaded by `--recursive`.
53 | cd DCNv2
54 | ./make.sh
55 | ~~~
56 |
57 | 6. Download pertained models for [monocular 3D tracking](https://drive.google.com/open?id=1e8zR1m1QMJne-Tjp-2iY_o81hn2CiQRt), [80-category tracking](https://drive.google.com/open?id=1tJCEJmdtYIh8VuN8CClGNws3YO7QGd40), or [pose tracking](https://drive.google.com/open?id=1H0YvFYCOIZ06EzAkC2NxECNQGXxK27hH) and move them to `$CenterTrack_ROOT/models/`. More models can be found in [Model zoo](MODEL_ZOO.md).
--------------------------------------------------------------------------------
/readme/MODEL_ZOO.md:
--------------------------------------------------------------------------------
1 | # MODEL ZOO
2 |
3 | ### Common settings and notes
4 |
5 | - The experiments are run with PyTorch 1.0, CUDA 10.0, and CUDNN 7.5.
6 | - Training times are measured on our servers with TITAN V GPUs (12 GB Memory).
7 | - Testing times are measured on our local machine with TITAN Xp GPU.
8 | - The models can be downloaded directly from [Google drive](https://drive.google.com/drive/folders/1y_CWlbboW_dfOx6zT9MU4ugLaLc6FEE8).
9 |
10 | ## 2D bounding box Tracking
11 |
12 | ### MOT17
13 |
14 | | Model | GPUs |Train time| Test time | Valication MOTA | Test MOTA | Download |
15 | |-----------------------|------|----------|-----------|------------------|------------|----------|
16 | | [mot17_fulltrain](../experiments/mot17_fulltrain.sh) | 4 | 4h | 45ms | - |67.3 (Private Detection)| [model](https://drive.google.com/file/d/1JYqO_IEoHpd7JEzZRXZSVesnEL4e-tnf) |
17 | | [mot17_fulltrain_sc](../experiments/mot17_fulltrain_sc.sh) | 4 | 4h | 45ms | - |61.4 (Public Detection) | [model](https://drive.google.com/file/d/17rtVMuFOnRzXj0_3egrFI5j-wc8XviDZ) |
18 | | [mot17_half](../experiments/mot17_half.sh) | 4 | 2h | 45ms | 66.1 | - | [model](https://drive.google.com/file/d/1rJ0fzRcpRQPjaN17lcqfKgsz-wJRifHh) |
19 | | [mot17_half_sc](../experiments/mot17_half_sc.sh) | 4 | 2h | 45ms | 60.7 | - | [model](https://drive.google.com/file/d/1o_cCo92WiVg8mgwyESd1Gg1AZYnq1iAJ) |
20 | | [crowdhuman](../experiments/crowdhuman.sh) | 4 | 21h | 45ms | 52.2 | - |[model](https://drive.google.com/file/d/1SD31FLwbXArcX3LXnRCqh6RF-q38nO7f) |
21 |
22 | #### Notes
23 |
24 | - `*_half` corresponds to the half-half video train/ val split mentioned in the paper.
25 | - `*_fulltrain` corresponds to train on the full training set, and evaluate on the official test server. These models are provided for arXiv and demo purposes. It is highly NOT recommended to submit our predictions to the test server, for not abusing the test set. Usually the validation results are all you need for developing.
26 | - `mot17_half`/ `mot17_fulltrain` are finetuned on the `crowdhuman` model, and `mot17_half_sc`/ `mot17_fulltrain_sc` are trained from ImageNet initialization.
27 | - The validation results are both using private detection.
28 | - All the MOT models are trained for 70 epochs, with learning rate dropped at the 60th epoch.
29 | - The crowdhuman model is trained on CrowdHuman dataset with the "training on static image data" technic in our paper, and evaluate directly in MOT17 validation set. The crowdhuman pretraining uses 140 epochs, with learning rate dropped at 90 and 140 epochs.
30 | - The training schedules are not well studies.
31 | - We observe about 1 MOTA random noise for MOT models.
32 | - If the resulting MOTA of your self-trained model is not desired, playing around with the `--track_thresh` and `--pre_thresh` sometimes gives a better number (See Appendix H of the paper).
33 | - The MOT models, even trained on the full training set, still does not look great for in-the-wild videos. The crowdhuman model is a better choice for real world application. However, be aware that both datasets are in non-commercial licenses.
34 |
35 |
36 | ### KITTI 2D Tracking
37 |
38 | | Model |GPUs| Train time| Test time | Validation MOTA | Test MOTA | Download |
39 | |-----------------------|----|-----------|-----------|------------------|------------|-----------|
40 | | [kitti_fulltrain](../experiments/kitti_fulltrain.sh) (flip)| 2 | 9h | 66 | - | 89.44 | [model](https://drive.google.com/file/d/13oUEpeZ8bVQ6z7A6SH88de4SwLgh_kMB) |
41 | | [kitti_half](../experiments/kitti_half.sh) | 2 | 4.5h | 40 | 88.7 | - | [model](https://drive.google.com/file/d/1AZiFG0p3VxB2pA_5XIkbue4ASfxaA3e1) |
42 | | [kitti_half_sc](../experiments/kitti_half_sc.sh) | 2 | 4.5h | 40 | 84.5 | - | [model](https://drive.google.com/file/d/13rmdfi1rX3X7yFOndzyARTYO51uSNW0Z)|
43 |
44 | #### Notes
45 |
46 | - We use flip-test for the model we submitted to the test server (kitti_fulltrain_flip).
47 | - `kitti_fulltrain` are finetuned on the nuScenes_3Ddetection_e140 model (see below).
48 | - All the models are trained for 70 epochs.
49 | - We observe up to 1.5 MOTA jittering due to randomness. The results are reported for the best model.
50 |
51 | ## Monocular 3D Detection/ Tracking
52 |
53 | ### nuScenes
54 |
55 | | Model | GPUs |Train time| Test time | Val AMOTA@0.2 | Val AMOTA | Val mAP | Download |
56 | |--------------------------|------|----------|-----------|---------------|-----------|---------|-----------|
57 | | [nuScenes_3Ddetection_e140](../experiments/nuScenes_3Ddetection_e140.sh)| 8 | 72h | 28ms | - | - | 30.27 | [model](https://drive.google.com/file/d/1o989b1tANh49uHhNbsCCJ5J57FGiaFut) |
58 | | [nuScenes_3Dtracking](../experiments/nuScenes_3Dtracking.sh) | 8 | 40h | 28ms | 28.3 | 6.8 | - | [model](https://drive.google.com/file/d/1gPQFzqneDtT_PjJRRuyskRsNTRHXovw1) |
59 |
60 | #### Notes
61 |
62 | - Both models are trained on our DGX servers with 8x 32G V100 GPUs.
63 | - The 3D detection model is trained on all 6 camera images of the keyframes for 140 epochs. It does not include attributes and velocity prediction and is different from the model we used in the 3D detection leaderboard. See the CenterNet repo for details about the full 3D detection model we used for test set evaluation.
64 | - The 3D tracking model is finetuned on the 3D detection model for 70 epochs.
65 | - Training on 4 GPUs or 8x 12G GPUs with smaller batchsize is OK, if the [linear learning rate rule](https://arxiv.org/abs/1706.02677) is applied.
66 |
67 | ## COCO Tracking (for demo purpose only)
68 |
69 | | Model |GPUs| Train time| Test time | Download |
70 | |-----------------------|----|-----------|-----------|-----------|
71 | | [coco_tracking](../experiments/coco_tracking.sh) | 8 | 39h | 30ms | [model](https://drive.google.com/file/d/11DEfWa0TKYzNqY3CXR51WVvjMb4oRl08) |
72 | | [coco_pose_tracking](../experiments/coco_pose_tracking.sh) | 8 | 19h | 33ms | [model](https://drive.google.com/file/d/1yGFC_Q9wzSHL1d4eZW_44EBB2H42YKYt)|
73 |
74 | - Both models are trained with the "training on static image data" technic in our paper.
75 | - The models are not evaluated on any benchmarks since there are no suitable ones in this setting. We provide them for demo purpose only.
--------------------------------------------------------------------------------
/readme/coco_det.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xingyizhou/CenterTrack/e4e7534cc2ebfbd31e0cde680988f286c65fe34f/readme/coco_det.gif
--------------------------------------------------------------------------------
/readme/coco_pose.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xingyizhou/CenterTrack/e4e7534cc2ebfbd31e0cde680988f286c65fe34f/readme/coco_pose.gif
--------------------------------------------------------------------------------
/readme/fig2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xingyizhou/CenterTrack/e4e7534cc2ebfbd31e0cde680988f286c65fe34f/readme/fig2.png
--------------------------------------------------------------------------------
/readme/nuscenes_3d.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xingyizhou/CenterTrack/e4e7534cc2ebfbd31e0cde680988f286c65fe34f/readme/nuscenes_3d.gif
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | opencv-python
2 | Cython
3 | numba
4 | progress
5 | matplotlib
6 | easydict
7 | scipy
8 | pyquaternion
9 | nuscenes-devkit
10 | pyyaml
11 | motmetrics
12 | scikit-learn==0.22.2
--------------------------------------------------------------------------------
/src/_init_paths.py:
--------------------------------------------------------------------------------
1 | import os.path as osp
2 | import sys
3 |
4 | def add_path(path):
5 | if path not in sys.path:
6 | sys.path.insert(0, path)
7 |
8 | this_dir = osp.dirname(__file__)
9 |
10 | # Add lib to PYTHONPATH
11 | lib_path = osp.join(this_dir, 'lib')
12 | add_path(lib_path)
13 |
--------------------------------------------------------------------------------
/src/convert_onnx.py:
--------------------------------------------------------------------------------
1 | '''
2 | Script to convert a trained CenterNet model to ONNX, currently only
3 | support non-DCN models.
4 | '''
5 | from __future__ import absolute_import
6 | from __future__ import division
7 | from __future__ import print_function
8 |
9 | import _init_paths
10 | import os
11 | import json
12 | import cv2
13 | import numpy as np
14 | import time
15 | from progress.bar import Bar
16 | import torch
17 | import copy
18 |
19 | from model.model import create_model, load_model
20 | from opts import opts
21 | from dataset.dataset_factory import dataset_factory
22 | from detector import Detector
23 |
24 |
25 | def convert_onnx(opt):
26 | os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str
27 | opt.model_output_list = True
28 | if opt.gpus[0] >= 0:
29 | opt.device = torch.device('cuda')
30 | else:
31 | opt.device = torch.device('cpu')
32 | Dataset = dataset_factory[opt.test_dataset]
33 | opt = opts().update_dataset_info_and_set_heads(opt, Dataset)
34 | print(opt)
35 | model = create_model(
36 | opt.arch, opt.heads, opt.head_conv, opt=opt)
37 | if opt.load_model != '':
38 | model = load_model(model, opt.load_model, opt)
39 | model = model.to(opt.device)
40 | model.eval()
41 | dummy_input1 = torch.randn(1, 3, opt.input_h, opt.input_w).to(opt.device)
42 |
43 | if opt.tracking:
44 | dummy_input2 = torch.randn(1, 3, opt.input_h, opt.input_w).to(opt.device)
45 | if opt.pre_hm:
46 | dummy_input3 = torch.randn(1, 1, opt.input_h, opt.input_w).to(opt.device)
47 | torch.onnx.export(
48 | model, (dummy_input1, dummy_input2, dummy_input3),
49 | "../models/{}.onnx".format(opt.exp_id))
50 | else:
51 | torch.onnx.export(
52 | model, (dummy_input1, dummy_input2),
53 | "../models/{}.onnx".format(opt.exp_id))
54 | else:
55 | torch.onnx.export(
56 | model, (dummy_input1, ),
57 | "../models/{}.onnx".format(opt.exp_id))
58 | if __name__ == '__main__':
59 | opt = opts().parse()
60 | convert_onnx(opt)
61 |
62 |
--------------------------------------------------------------------------------
/src/demo.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import _init_paths
6 |
7 | import os
8 | import sys
9 | import cv2
10 | import json
11 | import copy
12 | import numpy as np
13 | from opts import opts
14 | from detector import Detector
15 |
16 |
17 | image_ext = ['jpg', 'jpeg', 'png', 'webp']
18 | video_ext = ['mp4', 'mov', 'avi', 'mkv']
19 | time_stats = ['tot', 'load', 'pre', 'net', 'dec', 'post', 'merge', 'display']
20 |
21 | def demo(opt):
22 | os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str
23 | opt.debug = max(opt.debug, 1)
24 | detector = Detector(opt)
25 |
26 | if opt.demo == 'webcam' or \
27 | opt.demo[opt.demo.rfind('.') + 1:].lower() in video_ext:
28 | is_video = True
29 | # demo on video stream
30 | cam = cv2.VideoCapture(0 if opt.demo == 'webcam' else opt.demo)
31 | else:
32 | is_video = False
33 | # Demo on images sequences
34 | if os.path.isdir(opt.demo):
35 | image_names = []
36 | ls = os.listdir(opt.demo)
37 | for file_name in sorted(ls):
38 | ext = file_name[file_name.rfind('.') + 1:].lower()
39 | if ext in image_ext:
40 | image_names.append(os.path.join(opt.demo, file_name))
41 | else:
42 | image_names = [opt.demo]
43 |
44 | # Initialize output video
45 | out = None
46 | out_name = opt.demo[opt.demo.rfind('/') + 1:]
47 | print('out_name', out_name)
48 | if opt.save_video:
49 | # fourcc = cv2.VideoWriter_fourcc(*'XVID')
50 | fourcc = cv2.VideoWriter_fourcc(*'H264')
51 | out = cv2.VideoWriter('../results/{}.mp4'.format(
52 | opt.exp_id + '_' + out_name),fourcc, opt.save_framerate, (
53 | opt.video_w, opt.video_h))
54 |
55 | if opt.debug < 5:
56 | detector.pause = False
57 | cnt = 0
58 | results = {}
59 |
60 | while True:
61 | if is_video:
62 | _, img = cam.read()
63 | if img is None:
64 | save_and_exit(opt, out, results, out_name)
65 | else:
66 | if cnt < len(image_names):
67 | img = cv2.imread(image_names[cnt])
68 | else:
69 | save_and_exit(opt, out, results, out_name)
70 | cnt += 1
71 |
72 | # resize the original video for saving video results
73 | if opt.resize_video:
74 | img = cv2.resize(img, (opt.video_w, opt.video_h))
75 |
76 | # skip the first X frames of the video
77 | if cnt < opt.skip_first:
78 | continue
79 |
80 | cv2.imshow('input', img)
81 |
82 | # track or detect the image.
83 | ret = detector.run(img)
84 |
85 | # log run time
86 | time_str = 'frame {} |'.format(cnt)
87 | for stat in time_stats:
88 | time_str = time_str + '{} {:.3f}s |'.format(stat, ret[stat])
89 | print(time_str)
90 |
91 | # results[cnt] is a list of dicts:
92 | # [{'bbox': [x1, y1, x2, y2], 'tracking_id': id, 'category_id': c, ...}]
93 | results[cnt] = ret['results']
94 |
95 | # save debug image to video
96 | if opt.save_video:
97 | out.write(ret['generic'])
98 | if not is_video:
99 | cv2.imwrite('../results/demo{}.jpg'.format(cnt), ret['generic'])
100 |
101 | # esc to quit and finish saving video
102 | if cv2.waitKey(1) == 27:
103 | save_and_exit(opt, out, results, out_name)
104 | return
105 | save_and_exit(opt, out, results)
106 |
107 |
108 | def save_and_exit(opt, out=None, results=None, out_name=''):
109 | if opt.save_results and (results is not None):
110 | save_dir = '../results/{}_results.json'.format(opt.exp_id + '_' + out_name)
111 | print('saving results to', save_dir)
112 | json.dump(_to_list(copy.deepcopy(results)),
113 | open(save_dir, 'w'))
114 | if opt.save_video and out is not None:
115 | out.release()
116 | sys.exit(0)
117 |
118 | def _to_list(results):
119 | for img_id in results:
120 | for t in range(len(results[img_id])):
121 | for k in results[img_id][t]:
122 | if isinstance(results[img_id][t][k], (np.ndarray, np.float32)):
123 | results[img_id][t][k] = results[img_id][t][k].tolist()
124 | return results
125 |
126 | if __name__ == '__main__':
127 | opt = opts().init()
128 | demo(opt)
129 |
--------------------------------------------------------------------------------
/src/lib/dataset/dataset_factory.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import pycocotools.coco as coco
6 | from pycocotools.cocoeval import COCOeval
7 | import numpy as np
8 | import json
9 | import os
10 |
11 | from .datasets.coco import COCO
12 | from .datasets.kitti import KITTI
13 | from .datasets.coco_hp import COCOHP
14 | from .datasets.mot import MOT
15 | from .datasets.nuscenes import nuScenes
16 | from .datasets.crowdhuman import CrowdHuman
17 | from .datasets.kitti_tracking import KITTITracking
18 | from .datasets.custom_dataset import CustomDataset
19 |
20 | dataset_factory = {
21 | 'custom': CustomDataset,
22 | 'coco': COCO,
23 | 'kitti': KITTI,
24 | 'coco_hp': COCOHP,
25 | 'mot': MOT,
26 | 'nuscenes': nuScenes,
27 | 'crowdhuman': CrowdHuman,
28 | 'kitti_tracking': KITTITracking,
29 | }
30 |
31 |
32 | def get_dataset(dataset):
33 | return dataset_factory[dataset]
34 |
--------------------------------------------------------------------------------
/src/lib/dataset/datasets/coco.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import pycocotools.coco as coco
6 | from pycocotools.cocoeval import COCOeval
7 | import numpy as np
8 | import json
9 | import os
10 | import copy
11 |
12 | from ..generic_dataset import GenericDataset
13 |
14 | class COCO(GenericDataset):
15 | default_resolution = [512, 512]
16 | num_categories = 80
17 | class_name = [
18 | 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
19 | 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
20 | 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse',
21 | 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack',
22 | 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis',
23 | 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
24 | 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass',
25 | 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich',
26 | 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake',
27 | 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv',
28 | 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave',
29 | 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
30 | 'scissors', 'teddy bear', 'hair drier', 'toothbrush']
31 | _valid_ids = [
32 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13,
33 | 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
34 | 24, 25, 27, 28, 31, 32, 33, 34, 35, 36,
35 | 37, 38, 39, 40, 41, 42, 43, 44, 46, 47,
36 | 48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
37 | 58, 59, 60, 61, 62, 63, 64, 65, 67, 70,
38 | 72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
39 | 82, 84, 85, 86, 87, 88, 89, 90]
40 | cat_ids = {v: i + 1 for i, v in enumerate(_valid_ids)}
41 | num_joints = 17
42 | flip_idx = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10],
43 | [11, 12], [13, 14], [15, 16]]
44 | edges = [[0, 1], [0, 2], [1, 3], [2, 4],
45 | [4, 6], [3, 5], [5, 6],
46 | [5, 7], [7, 9], [6, 8], [8, 10],
47 | [6, 12], [5, 11], [11, 12],
48 | [12, 14], [14, 16], [11, 13], [13, 15]]
49 | max_objs = 128
50 | def __init__(self, opt, split):
51 | # load annotations
52 | data_dir = os.path.join(opt.data_dir, 'coco')
53 | img_dir = os.path.join(data_dir, '{}2017'.format(split))
54 | if opt.trainval:
55 | split = 'test'
56 | ann_path = os.path.join(
57 | data_dir, 'annotations',
58 | 'image_info_test-dev2017.json')
59 | else:
60 | ann_path = os.path.join(
61 | data_dir, 'annotations',
62 | 'instances_{}2017.json').format(split)
63 |
64 | self.images = None
65 | # load image list and coco
66 | super(COCO, self).__init__(opt, split, ann_path, img_dir)
67 |
68 | self.num_samples = len(self.images)
69 |
70 | print('Loaded {} {} samples'.format(split, self.num_samples))
71 |
72 | def _to_float(self, x):
73 | return float("{:.2f}".format(x))
74 |
75 | def convert_eval_format(self, all_bboxes):
76 | detections = []
77 | for image_id in all_bboxes:
78 | if type(all_bboxes[image_id]) != type({}):
79 | # newest format
80 | for j in range(len(all_bboxes[image_id])):
81 | item = all_bboxes[image_id][j]
82 | cat_id = item['class'] - 1
83 | category_id = self._valid_ids[cat_id]
84 | bbox = item['bbox']
85 | bbox[2] -= bbox[0]
86 | bbox[3] -= bbox[1]
87 | bbox_out = list(map(self._to_float, bbox[0:4]))
88 | detection = {
89 | "image_id": int(image_id),
90 | "category_id": int(category_id),
91 | "bbox": bbox_out,
92 | "score": float("{:.2f}".format(item['score']))
93 | }
94 | detections.append(detection)
95 | return detections
96 |
97 | def __len__(self):
98 | return self.num_samples
99 |
100 | def save_results(self, results, save_dir):
101 | json.dump(self.convert_eval_format(results),
102 | open('{}/results_coco.json'.format(save_dir), 'w'))
103 |
104 | def run_eval(self, results, save_dir):
105 | self.save_results(results, save_dir)
106 | coco_dets = self.coco.loadRes('{}/results_coco.json'.format(save_dir))
107 | coco_eval = COCOeval(self.coco, coco_dets, "bbox")
108 | coco_eval.evaluate()
109 | coco_eval.accumulate()
110 | coco_eval.summarize()
--------------------------------------------------------------------------------
/src/lib/dataset/datasets/coco_hp.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import pycocotools.coco as coco
6 | from pycocotools.cocoeval import COCOeval
7 | import numpy as np
8 | import json
9 | import os
10 |
11 | from ..generic_dataset import GenericDataset
12 |
13 | class COCOHP(GenericDataset):
14 | num_categories = 1
15 | class_name = ['']
16 | num_joints = 17
17 | default_resolution = [512, 512]
18 | flip_idx = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10],
19 | [11, 12], [13, 14], [15, 16]]
20 | edges = [[0, 1], [0, 2], [1, 3], [2, 4],
21 | [4, 6], [3, 5], [5, 6],
22 | [5, 7], [7, 9], [6, 8], [8, 10],
23 | [6, 12], [5, 11], [11, 12],
24 | [12, 14], [14, 16], [11, 13], [13, 15]]
25 | max_objs = 32
26 | cat_ids = {1: 1}
27 |
28 | def __init__(self, opt, split):
29 | data_dir = os.path.join(opt.data_dir, 'coco')
30 | img_dir = os.path.join(data_dir, '{}2017'.format(split))
31 | if split == 'test':
32 | ann_path = os.path.join(data_dir, 'annotations',
33 | 'image_info_test-dev2017.json').format(split)
34 | else:
35 | ann_path = os.path.join(data_dir, 'annotations',
36 | 'person_keypoints_{}2017.json').format(split)
37 |
38 |
39 | self.images = None
40 | # load image list and coco
41 | super(COCOHP, self).__init__(opt, split, ann_path, img_dir)
42 |
43 | if split == 'train':
44 | image_ids = self.coco.getImgIds()
45 | self.images = []
46 | for img_id in image_ids:
47 | idxs = self.coco.getAnnIds(imgIds=[img_id])
48 | if len(idxs) > 0:
49 | self.images.append(img_id)
50 |
51 | self.num_samples = len(self.images)
52 | print('Loaded {} {} samples'.format(split, self.num_samples))
53 |
54 | def _to_float(self, x):
55 | return float("{:.2f}".format(x))
56 |
57 | def convert_eval_format(self, all_bboxes):
58 | # import pdb; pdb.set_trace()
59 | detections = []
60 | for image_id in all_bboxes:
61 | if type(all_bboxes[image_id]) != type({}):
62 | # newest format
63 | for j in range(len(all_bboxes[image_id])):
64 | item = all_bboxes[image_id][j]
65 | if item['class'] != 1:
66 | continue
67 | category_id = 1
68 | keypoints = np.concatenate([
69 | np.array(item['hps'], dtype=np.float32).reshape(-1, 2),
70 | np.ones((17, 1), dtype=np.float32)], axis=1).reshape(51).tolist()
71 | detection = {
72 | "image_id": int(image_id),
73 | "category_id": int(category_id),
74 | "score": float("{:.2f}".format(item['score'])),
75 | "keypoints": keypoints
76 | }
77 | if 'bbox' in item:
78 | bbox = item['bbox']
79 | bbox[2] -= bbox[0]
80 | bbox[3] -= bbox[1]
81 | bbox_out = list(map(self._to_float, bbox[0:4]))
82 | detection['bbox'] = bbox_out
83 | detections.append(detection)
84 | return detections
85 |
86 | def __len__(self):
87 | return self.num_samples
88 |
89 | def save_results(self, results, save_dir):
90 | json.dump(self.convert_eval_format(results),
91 | open('{}/results_cocohp.json'.format(save_dir), 'w'))
92 |
93 |
94 | def run_eval(self, results, save_dir):
95 | # result_json = os.path.join(opt.save_dir, "results.json")
96 | # detections = convert_eval_format(all_boxes)
97 | # json.dump(detections, open(result_json, "w"))
98 | self.save_results(results, save_dir)
99 | coco_dets = self.coco.loadRes('{}/results_cocohp.json'.format(save_dir))
100 | coco_eval = COCOeval(self.coco, coco_dets, "keypoints")
101 | coco_eval.evaluate()
102 | coco_eval.accumulate()
103 | coco_eval.summarize()
104 | coco_eval = COCOeval(self.coco, coco_dets, "bbox")
105 | coco_eval.evaluate()
106 | coco_eval.accumulate()
107 | coco_eval.summarize()
108 |
--------------------------------------------------------------------------------
/src/lib/dataset/datasets/crowdhuman.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import pycocotools.coco as coco
6 | from pycocotools.cocoeval import COCOeval
7 | import numpy as np
8 | import json
9 | import os
10 |
11 | from ..generic_dataset import GenericDataset
12 |
13 | class CrowdHuman(GenericDataset):
14 | num_classes = 1
15 | num_joints = 17
16 | default_resolution = [512, 512]
17 | max_objs = 128
18 | class_name = ['person']
19 | cat_ids = {1: 1}
20 | def __init__(self, opt, split):
21 | super(CrowdHuman, self).__init__()
22 | data_dir = os.path.join(opt.data_dir, 'crowdhuman')
23 | img_dir = os.path.join(
24 | data_dir, 'CrowdHuman_{}'.format(split), 'Images')
25 | ann_path = os.path.join(data_dir, 'annotations',
26 | '{}.json').format(split)
27 |
28 | print('==> initializing CityPersons {} data.'.format(split))
29 |
30 | self.images = None
31 | # load image list and coco
32 | super(CrowdHuman, self).__init__(opt, split, ann_path, img_dir)
33 |
34 | self.num_samples = len(self.images)
35 |
36 | print('Loaded {} {} samples'.format(split, self.num_samples))
37 |
38 | def _to_float(self, x):
39 | return float("{:.2f}".format(x))
40 |
41 | def _save_results(self, records, fpath):
42 | with open(fpath,'w') as fid:
43 | for record in records:
44 | line = json.dumps(record)+'\n'
45 | fid.write(line)
46 | return fpath
47 |
48 | def convert_eval_format(self, all_bboxes):
49 | detections = []
50 | person_id = 1
51 | for image_id in all_bboxes:
52 | if type(all_bboxes[image_id]) != type({}):
53 | # newest format
54 | dtboxes = []
55 | for j in range(len(all_bboxes[image_id])):
56 | item = all_bboxes[image_id][j]
57 | if item['class'] != person_id:
58 | continue
59 | bbox = item['bbox']
60 | bbox[2] -= bbox[0]
61 | bbox[3] -= bbox[1]
62 | bbox_out = list(map(self._to_float, bbox[0:4]))
63 | detection = {
64 | "tag": 1,
65 | "box": bbox_out,
66 | "score": float("{:.2f}".format(item['score']))
67 | }
68 | dtboxes.append(detection)
69 | img_info = self.coco.loadImgs(ids=[image_id])[0]
70 | file_name = img_info['file_name']
71 | detections.append({'ID': file_name[:-4], 'dtboxes': dtboxes})
72 | return detections
73 |
74 | def __len__(self):
75 | return self.num_samples
76 |
77 | def save_results(self, results, save_dir):
78 | self._save_results(self.convert_eval_format(results),
79 | '{}/results_crowdhuman.odgt'.format(save_dir))
80 | def run_eval(self, results, save_dir):
81 | self.save_results(results, save_dir)
82 | try:
83 | os.system('python tools/crowdhuman_eval/demo.py ' + \
84 | '../data/crowdhuman/annotation_val.odgt ' + \
85 | '{}/results_crowdhuman.odgt'.format(save_dir))
86 | except:
87 | print('Crowdhuman evaluation not setup!')
--------------------------------------------------------------------------------
/src/lib/dataset/datasets/custom_dataset.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | from ..generic_dataset import GenericDataset
6 |
7 | class CustomDataset(GenericDataset):
8 | num_categories = 1
9 | default_resolution = [-1, -1]
10 | class_name = ['']
11 | max_objs = 128
12 | cat_ids = {1: 1}
13 | def __init__(self, opt, split):
14 | assert (opt.custom_dataset_img_path != '') and \
15 | (opt.custom_dataset_ann_path != '') and \
16 | (opt.num_classes != -1) and \
17 | (opt.input_h != -1) and (opt.input_w != -1), \
18 | 'The following arguments must be specified for custom datasets: ' + \
19 | 'custom_dataset_img_path, custom_dataset_ann_path, num_classes, ' + \
20 | 'input_h, input_w.'
21 | img_dir = opt.custom_dataset_img_path
22 | ann_path = opt.custom_dataset_ann_path
23 | self.num_categories = opt.num_classes
24 | self.class_name = ['' for _ in range(self.num_categories)]
25 | self.default_resolution = [opt.input_h, opt.input_w]
26 | self.cat_ids = {i: i for i in range(1, self.num_categories + 1)}
27 |
28 | self.images = None
29 | # load image list and coco
30 | super().__init__(opt, split, ann_path, img_dir)
31 |
32 | self.num_samples = len(self.images)
33 | print('Loaded Custom dataset {} samples'.format(self.num_samples))
34 |
35 | def __len__(self):
36 | return self.num_samples
37 |
38 | def run_eval(self, results, save_dir):
39 | pass
40 |
--------------------------------------------------------------------------------
/src/lib/dataset/datasets/kitti.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import pycocotools.coco as coco
6 | import numpy as np
7 | import torch
8 | import json
9 | import cv2
10 | import os
11 | import math
12 |
13 | from ..generic_dataset import GenericDataset
14 | from utils.ddd_utils import compute_box_3d, project_to_image
15 |
16 | class KITTI(GenericDataset):
17 | num_categories = 3
18 | default_resolution = [384, 1280]
19 | # ['Pedestrian', 'Car', 'Cyclist', 'Van', 'Truck', 'Person_sitting',
20 | # 'Tram', 'Misc', 'DontCare']
21 | class_name = ['Pedestrian', 'Car', 'Cyclist']
22 | # negative id is for "not as negative sample for abs(id)".
23 | # 0 for ignore losses for all categories in the bounding box region
24 | cat_ids = {1:1, 2:2, 3:3, 4:-2, 5:-2, 6:-1, 7:-9999, 8:-9999, 9:0}
25 | max_objs = 50
26 | def __init__(self, opt, split):
27 | data_dir = os.path.join(opt.data_dir, 'kitti')
28 | img_dir = os.path.join(data_dir, 'images', 'trainval')
29 | if opt.trainval:
30 | split = 'trainval' if split == 'train' else 'test'
31 | img_dir = os.path.join(data_dir, 'images', split)
32 | ann_path = os.path.join(
33 | data_dir, 'annotations', 'kitti_v2_{}.json').format(split)
34 | else:
35 | ann_path = os.path.join(data_dir,
36 | 'annotations', 'kitti_v2_{}_{}.json').format(opt.kitti_split, split)
37 |
38 | self.images = None
39 | # load image list and coco
40 | super(KITTI, self).__init__(opt, split, ann_path, img_dir)
41 | self.alpha_in_degree = False
42 | self.num_samples = len(self.images)
43 |
44 | print('Loaded {} {} samples'.format(split, self.num_samples))
45 |
46 |
47 | def __len__(self):
48 | return self.num_samples
49 |
50 | def _to_float(self, x):
51 | return float("{:.2f}".format(x))
52 |
53 | def convert_eval_format(self, all_bboxes):
54 | pass
55 |
56 | def save_results(self, results, save_dir):
57 | results_dir = os.path.join(save_dir, 'results_kitti')
58 | if not os.path.exists(results_dir):
59 | os.mkdir(results_dir)
60 | for img_id in results.keys():
61 | out_path = os.path.join(results_dir, '{:06d}.txt'.format(img_id))
62 | f = open(out_path, 'w')
63 | for i in range(len(results[img_id])):
64 | item = results[img_id][i]
65 | category_id = item['class']
66 | cls_name_ind = category_id
67 | class_name = self.class_name[cls_name_ind - 1]
68 | if not ('alpha' in item):
69 | item['alpha'] = -1
70 | if not ('rot_y' in item):
71 | item['rot_y'] = -1
72 | if 'dim' in item:
73 | item['dim'] = [max(item['dim'][0], 0.01),
74 | max(item['dim'][1], 0.01), max(item['dim'][2], 0.01)]
75 | if not ('dim' in item):
76 | item['dim'] = [-1000, -1000, -1000]
77 | if not ('loc' in item):
78 | item['loc'] = [-1000, -1000, -1000]
79 | f.write('{} 0.0 0'.format(class_name))
80 | f.write(' {:.2f}'.format(item['alpha']))
81 | f.write(' {:.2f} {:.2f} {:.2f} {:.2f}'.format(
82 | item['bbox'][0], item['bbox'][1], item['bbox'][2], item['bbox'][3]))
83 |
84 | f.write(' {:.2f} {:.2f} {:.2f}'.format(
85 | item['dim'][0], item['dim'][1], item['dim'][2]))
86 | f.write(' {:.2f} {:.2f} {:.2f}'.format(
87 | item['loc'][0], item['loc'][1], item['loc'][2]))
88 | f.write(' {:.2f} {:.2f}\n'.format(item['rot_y'], item['score']))
89 | f.close()
90 |
91 | def run_eval(self, results, save_dir):
92 | # import pdb; pdb.set_trace()
93 | self.save_results(results, save_dir)
94 | print('Results of IoU threshold 0.7')
95 | os.system('./tools/kitti_eval/evaluate_object_3d_offline_07 ' + \
96 | '../data/kitti/training/label_val ' + \
97 | '{}/results_kitti/'.format(save_dir))
98 | print('Results of IoU threshold 0.5')
99 | os.system('./tools/kitti_eval/evaluate_object_3d_offline ' + \
100 | '../data/kitti/training/label_val ' + \
101 | '{}/results_kitti/'.format(save_dir))
102 |
103 |
--------------------------------------------------------------------------------
/src/lib/dataset/datasets/kitti_tracking.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import pycocotools.coco as coco
6 | import numpy as np
7 | import torch
8 | import json
9 | import cv2
10 | import os
11 | import math
12 |
13 | from ..generic_dataset import GenericDataset
14 | from utils.ddd_utils import compute_box_3d, project_to_image
15 |
16 | class KITTITracking(GenericDataset):
17 | num_categories = 3
18 | default_resolution = [384, 1280]
19 | class_name = ['Pedestrian', 'Car', 'Cyclist']
20 | # negative id is for "not as negative sample for abs(id)".
21 | # 0 for ignore losses for all categories in the bounding box region
22 | # ['Pedestrian', 'Car', 'Cyclist', 'Van', 'Truck', 'Person_sitting',
23 | # 'Tram', 'Misc', 'DontCare']
24 | cat_ids = {1:1, 2:2, 3:3, 4:-2, 5:-2, 6:-1, 7:-9999, 8:-9999, 9:0}
25 | max_objs = 50
26 | def __init__(self, opt, split):
27 | data_dir = os.path.join(opt.data_dir, 'kitti_tracking')
28 | split_ = 'train' if opt.dataset_version != 'test' else 'test' #'test'
29 | img_dir = os.path.join(
30 | data_dir, 'data_tracking_image_2', '{}ing'.format(split_), 'image_02')
31 | ann_file_ = split_ if opt.dataset_version == '' else opt.dataset_version
32 | print('Warning! opt.dataset_version is not set')
33 | ann_path = os.path.join(
34 | data_dir, 'annotations', 'tracking_{}.json'.format(
35 | ann_file_))
36 | self.images = None
37 | super(KITTITracking, self).__init__(opt, split, ann_path, img_dir)
38 | self.alpha_in_degree = False
39 | self.num_samples = len(self.images)
40 |
41 | print('Loaded {} {} samples'.format(split, self.num_samples))
42 |
43 |
44 | def __len__(self):
45 | return self.num_samples
46 |
47 | def _to_float(self, x):
48 | return float("{:.2f}".format(x))
49 |
50 |
51 | def save_results(self, results, save_dir):
52 | results_dir = os.path.join(save_dir, 'results_kitti_tracking')
53 | if not os.path.exists(results_dir):
54 | os.mkdir(results_dir)
55 |
56 | for video in self.coco.dataset['videos']:
57 | video_id = video['id']
58 | file_name = video['file_name']
59 | out_path = os.path.join(results_dir, '{}.txt'.format(file_name))
60 | f = open(out_path, 'w')
61 | images = self.video_to_images[video_id]
62 |
63 | for image_info in images:
64 | img_id = image_info['id']
65 | if not (img_id in results):
66 | continue
67 | frame_id = image_info['frame_id']
68 | for i in range(len(results[img_id])):
69 | item = results[img_id][i]
70 | category_id = item['class']
71 | cls_name_ind = category_id
72 | class_name = self.class_name[cls_name_ind - 1]
73 | if not ('alpha' in item):
74 | item['alpha'] = -1
75 | if not ('rot_y' in item):
76 | item['rot_y'] = -10
77 | if 'dim' in item:
78 | item['dim'] = [max(item['dim'][0], 0.01),
79 | max(item['dim'][1], 0.01), max(item['dim'][2], 0.01)]
80 | if not ('dim' in item):
81 | item['dim'] = [-1, -1, -1]
82 | if not ('loc' in item):
83 | item['loc'] = [-1000, -1000, -1000]
84 |
85 | track_id = item['tracking_id'] if 'tracking_id' in item else -1
86 | f.write('{} {} {} -1 -1'.format(frame_id - 1, track_id, class_name))
87 | f.write(' {:d}'.format(int(item['alpha'])))
88 | f.write(' {:.2f} {:.2f} {:.2f} {:.2f}'.format(
89 | item['bbox'][0], item['bbox'][1], item['bbox'][2], item['bbox'][3]))
90 |
91 | f.write(' {:d} {:d} {:d}'.format(
92 | int(item['dim'][0]), int(item['dim'][1]), int(item['dim'][2])))
93 | f.write(' {:d} {:d} {:d}'.format(
94 | int(item['loc'][0]), int(item['loc'][1]), int(item['loc'][2])))
95 | f.write(' {:d} {:.2f}\n'.format(int(item['rot_y']), item['score']))
96 | f.close()
97 |
98 | def run_eval(self, results, save_dir):
99 | self.save_results(results, save_dir)
100 | os.system('python tools/eval_kitti_track/evaluate_tracking.py ' + \
101 | '{}/results_kitti_tracking/ {}'.format(
102 | save_dir, self.opt.dataset_version))
103 |
104 |
--------------------------------------------------------------------------------
/src/lib/dataset/datasets/mot.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import pycocotools.coco as coco
6 | from pycocotools.cocoeval import COCOeval
7 | import numpy as np
8 | import json
9 | import os
10 | from collections import defaultdict
11 | from ..generic_dataset import GenericDataset
12 |
13 | class MOT(GenericDataset):
14 | num_categories = 1
15 | default_resolution = [544, 960]
16 | class_name = ['']
17 | max_objs = 256
18 | cat_ids = {1: 1, -1: -1}
19 | def __init__(self, opt, split):
20 | self.dataset_version = opt.dataset_version
21 | self.year = int(self.dataset_version[:2])
22 | print('Using MOT {} {}'.format(self.year, self.dataset_version))
23 | data_dir = os.path.join(opt.data_dir, 'mot{}'.format(self.year))
24 |
25 | if opt.dataset_version in ['17trainval', '17test']:
26 | ann_file = '{}.json'.format('train' if split == 'train' else \
27 | 'test')
28 | elif opt.dataset_version == '17halftrain':
29 | ann_file = '{}.json'.format('train_half')
30 | elif opt.dataset_version == '17halfval':
31 | ann_file = '{}.json'.format('val_half')
32 | img_dir = os.path.join(data_dir, '{}'.format(
33 | 'test' if 'test' in self.dataset_version else 'train'))
34 |
35 | print('ann_file', ann_file)
36 | ann_path = os.path.join(data_dir, 'annotations', ann_file)
37 |
38 | self.images = None
39 | # load image list and coco
40 | super(MOT, self).__init__(opt, split, ann_path, img_dir)
41 |
42 | self.num_samples = len(self.images)
43 | print('Loaded MOT {} {} {} samples'.format(
44 | self.dataset_version, split, self.num_samples))
45 |
46 | def _to_float(self, x):
47 | return float("{:.2f}".format(x))
48 |
49 | def __len__(self):
50 | return self.num_samples
51 |
52 | def save_results(self, results, save_dir):
53 | results_dir = os.path.join(save_dir, 'results_mot{}'.format(self.dataset_version))
54 | if not os.path.exists(results_dir):
55 | os.mkdir(results_dir)
56 | for video in self.coco.dataset['videos']:
57 | video_id = video['id']
58 | file_name = video['file_name']
59 | out_path = os.path.join(results_dir, '{}.txt'.format(file_name))
60 | f = open(out_path, 'w')
61 | images = self.video_to_images[video_id]
62 | tracks = defaultdict(list)
63 | for image_info in images:
64 | if not (image_info['id'] in results):
65 | continue
66 | result = results[image_info['id']]
67 | frame_id = image_info['frame_id']
68 | for item in result:
69 | if not ('tracking_id' in item):
70 | item['tracking_id'] = np.random.randint(100000)
71 | if item['active'] == 0:
72 | continue
73 | tracking_id = item['tracking_id']
74 | bbox = item['bbox']
75 | bbox = [bbox[0], bbox[1], bbox[2], bbox[3]]
76 | tracks[tracking_id].append([frame_id] + bbox)
77 | rename_track_id = 0
78 | for track_id in sorted(tracks):
79 | rename_track_id += 1
80 | for t in tracks[track_id]:
81 | f.write('{},{},{:.2f},{:.2f},{:.2f},{:.2f},-1,-1,-1,-1\n'.format(
82 | t[0], rename_track_id, t[1], t[2], t[3]-t[1], t[4]-t[2]))
83 | f.close()
84 |
85 | def run_eval(self, results, save_dir):
86 | self.save_results(results, save_dir)
87 | gt_type_str = '{}'.format(
88 | '_train_half' if '17halftrain' in self.opt.dataset_version \
89 | else '_val_half' if '17halfval' in self.opt.dataset_version \
90 | else '')
91 | gt_type_str = '_val_half' if self.year in [16, 19] else gt_type_str
92 | gt_type_str = '--gt_type {}'.format(gt_type_str) if gt_type_str != '' else \
93 | ''
94 | os.system('python tools/eval_motchallenge.py ' + \
95 | '../data/mot{}/{}/ '.format(self.year, 'train') + \
96 | '{}/results_mot{}/ '.format(save_dir, self.dataset_version) + \
97 | gt_type_str + ' --eval_official')
98 |
--------------------------------------------------------------------------------
/src/lib/external/.gitignore:
--------------------------------------------------------------------------------
1 | bbox.c
2 | bbox.cpython-35m-x86_64-linux-gnu.so
3 | bbox.cpython-36m-x86_64-linux-gnu.so
4 |
5 | nms.c
6 | nms.cpython-35m-x86_64-linux-gnu.so
7 | nms.cpython-36m-x86_64-linux-gnu.so
8 |
--------------------------------------------------------------------------------
/src/lib/external/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | python setup.py build_ext --inplace
3 | rm -rf build
4 |
--------------------------------------------------------------------------------
/src/lib/external/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xingyizhou/CenterTrack/e4e7534cc2ebfbd31e0cde680988f286c65fe34f/src/lib/external/__init__.py
--------------------------------------------------------------------------------
/src/lib/external/setup.py:
--------------------------------------------------------------------------------
1 | import numpy
2 | from distutils.core import setup
3 | from distutils.extension import Extension
4 | from Cython.Build import cythonize
5 |
6 | extensions = [
7 | Extension(
8 | "nms",
9 | ["nms.pyx"],
10 | extra_compile_args=["-Wno-cpp", "-Wno-unused-function"]
11 | )
12 | ]
13 |
14 | setup(
15 | name="coco",
16 | ext_modules=cythonize(extensions),
17 | include_dirs=[numpy.get_include()]
18 | )
19 |
--------------------------------------------------------------------------------
/src/lib/logger.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | # Code referenced from https://gist.github.com/gyglim/1f8dfb1b5c82627ae3efcfbbadb9f514
6 | import os
7 | import time
8 | import sys
9 | import torch
10 | import subprocess
11 | USE_TENSORBOARD = True
12 | try:
13 | import tensorboardX
14 | print('Using tensorboardX')
15 | except:
16 | USE_TENSORBOARD = False
17 |
18 | class Logger(object):
19 | def __init__(self, opt):
20 | """Create a summary writer logging to log_dir."""
21 | if not os.path.exists(opt.save_dir):
22 | os.makedirs(opt.save_dir)
23 | if not os.path.exists(opt.debug_dir):
24 | os.makedirs(opt.debug_dir)
25 |
26 | time_str = time.strftime('%Y-%m-%d-%H-%M')
27 |
28 | args = dict((name, getattr(opt, name)) for name in dir(opt)
29 | if not name.startswith('_'))
30 | file_name = os.path.join(opt.save_dir, 'opt.txt')
31 | with open(file_name, 'wt') as opt_file:
32 | opt_file.write('==> commit hash: {}\n'.format(
33 | subprocess.check_output(["git", "describe"])))
34 | opt_file.write('==> torch version: {}\n'.format(torch.__version__))
35 | opt_file.write('==> cudnn version: {}\n'.format(
36 | torch.backends.cudnn.version()))
37 | opt_file.write('==> Cmd:\n')
38 | opt_file.write(str(sys.argv))
39 | opt_file.write('\n==> Opt:\n')
40 | for k, v in sorted(args.items()):
41 | opt_file.write(' %s: %s\n' % (str(k), str(v)))
42 |
43 | log_dir = opt.save_dir + '/logs_{}'.format(time_str)
44 | if USE_TENSORBOARD:
45 | self.writer = tensorboardX.SummaryWriter(log_dir=log_dir)
46 | else:
47 | if not os.path.exists(os.path.dirname(log_dir)):
48 | os.mkdir(os.path.dirname(log_dir))
49 | if not os.path.exists(log_dir):
50 | os.mkdir(log_dir)
51 | self.log = open(log_dir + '/log.txt', 'w')
52 | try:
53 | os.system('cp {}/opt.txt {}/'.format(opt.save_dir, log_dir))
54 | except:
55 | pass
56 | self.start_line = True
57 |
58 | def write(self, txt):
59 | if self.start_line:
60 | time_str = time.strftime('%Y-%m-%d-%H-%M')
61 | self.log.write('{}: {}'.format(time_str, txt))
62 | else:
63 | self.log.write(txt)
64 | self.start_line = False
65 | if '\n' in txt:
66 | self.start_line = True
67 | self.log.flush()
68 |
69 | def close(self):
70 | self.log.close()
71 |
72 | def scalar_summary(self, tag, value, step):
73 | """Log a scalar variable."""
74 | if USE_TENSORBOARD:
75 | self.writer.add_scalar(tag, value, step)
76 |
--------------------------------------------------------------------------------
/src/lib/model/data_parallel.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch.nn.modules import Module
3 | from torch.nn.parallel.scatter_gather import gather
4 | from torch.nn.parallel.replicate import replicate
5 | from torch.nn.parallel.parallel_apply import parallel_apply
6 |
7 |
8 | from .scatter_gather import scatter_kwargs
9 |
10 | class _DataParallel(Module):
11 | r"""Implements data parallelism at the module level.
12 |
13 | This container parallelizes the application of the given module by
14 | splitting the input across the specified devices by chunking in the batch
15 | dimension. In the forward pass, the module is replicated on each device,
16 | and each replica handles a portion of the input. During the backwards
17 | pass, gradients from each replica are summed into the original module.
18 |
19 | The batch size should be larger than the number of GPUs used. It should
20 | also be an integer multiple of the number of GPUs so that each chunk is the
21 | same size (so that each GPU processes the same number of samples).
22 |
23 | See also: :ref:`cuda-nn-dataparallel-instead`
24 |
25 | Arbitrary positional and keyword inputs are allowed to be passed into
26 | DataParallel EXCEPT Tensors. All variables will be scattered on dim
27 | specified (default 0). Primitive types will be broadcasted, but all
28 | other types will be a shallow copy and can be corrupted if written to in
29 | the model's forward pass.
30 |
31 | Args:
32 | module: module to be parallelized
33 | device_ids: CUDA devices (default: all devices)
34 | output_device: device location of output (default: device_ids[0])
35 |
36 | Example::
37 |
38 | >>> net = torch.nn.DataParallel(model, device_ids=[0, 1, 2])
39 | >>> output = net(input_var)
40 | """
41 |
42 | # TODO: update notes/cuda.rst when this class handles 8+ GPUs well
43 |
44 | def __init__(self, module, device_ids=None, output_device=None, dim=0, chunk_sizes=None):
45 | super(_DataParallel, self).__init__()
46 |
47 | if not torch.cuda.is_available():
48 | self.module = module
49 | self.device_ids = []
50 | return
51 |
52 | if device_ids is None:
53 | device_ids = list(range(torch.cuda.device_count()))
54 | if output_device is None:
55 | output_device = device_ids[0]
56 | self.dim = dim
57 | self.module = module
58 | self.device_ids = device_ids
59 | self.chunk_sizes = chunk_sizes
60 | self.output_device = output_device
61 | if len(self.device_ids) == 1:
62 | self.module.cuda(device_ids[0])
63 |
64 | def forward(self, *inputs, **kwargs):
65 | if not self.device_ids:
66 | return self.module(*inputs, **kwargs)
67 | inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids, self.chunk_sizes)
68 | if len(self.device_ids) == 1:
69 | return self.module(*inputs[0], **kwargs[0])
70 | replicas = self.replicate(self.module, self.device_ids[:len(inputs)])
71 | outputs = self.parallel_apply(replicas, inputs, kwargs)
72 | return self.gather(outputs, self.output_device)
73 |
74 | def replicate(self, module, device_ids):
75 | return replicate(module, device_ids)
76 |
77 | def scatter(self, inputs, kwargs, device_ids, chunk_sizes):
78 | return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim, chunk_sizes=self.chunk_sizes)
79 |
80 | def parallel_apply(self, replicas, inputs, kwargs):
81 | return parallel_apply(replicas, inputs, kwargs, self.device_ids[:len(replicas)])
82 |
83 | def gather(self, outputs, output_device):
84 | return gather(outputs, output_device, dim=self.dim)
85 |
86 |
87 | def data_parallel(module, inputs, device_ids=None, output_device=None, dim=0, module_kwargs=None):
88 | r"""Evaluates module(input) in parallel across the GPUs given in device_ids.
89 |
90 | This is the functional version of the DataParallel module.
91 |
92 | Args:
93 | module: the module to evaluate in parallel
94 | inputs: inputs to the module
95 | device_ids: GPU ids on which to replicate module
96 | output_device: GPU location of the output Use -1 to indicate the CPU.
97 | (default: device_ids[0])
98 | Returns:
99 | a Variable containing the result of module(input) located on
100 | output_device
101 | """
102 | if not isinstance(inputs, tuple):
103 | inputs = (inputs,)
104 |
105 | if device_ids is None:
106 | device_ids = list(range(torch.cuda.device_count()))
107 |
108 | if output_device is None:
109 | output_device = device_ids[0]
110 |
111 | inputs, module_kwargs = scatter_kwargs(inputs, module_kwargs, device_ids, dim)
112 | if len(device_ids) == 1:
113 | return module(*inputs[0], **module_kwargs[0])
114 | used_device_ids = device_ids[:len(inputs)]
115 | replicas = replicate(module, used_device_ids)
116 | outputs = parallel_apply(replicas, inputs, module_kwargs, used_device_ids)
117 | return gather(outputs, output_device, dim)
118 |
119 | def DataParallel(module, device_ids=None, output_device=None, dim=0, chunk_sizes=None):
120 | if chunk_sizes is None:
121 | return torch.nn.DataParallel(module, device_ids, output_device, dim)
122 | standard_size = True
123 | for i in range(1, len(chunk_sizes)):
124 | if chunk_sizes[i] != chunk_sizes[0]:
125 | standard_size = False
126 | if standard_size:
127 | return torch.nn.DataParallel(module, device_ids, output_device, dim)
128 | return _DataParallel(module, device_ids, output_device, dim, chunk_sizes)
--------------------------------------------------------------------------------
/src/lib/model/decode.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import torch
6 | import torch.nn as nn
7 | from .utils import _gather_feat, _tranpose_and_gather_feat
8 | from .utils import _nms, _topk, _topk_channel
9 |
10 |
11 | def _update_kps_with_hm(
12 | kps, output, batch, num_joints, K, bboxes=None, scores=None):
13 | if 'hm_hp' in output:
14 | hm_hp = output['hm_hp']
15 | hm_hp = _nms(hm_hp)
16 | thresh = 0.2
17 | kps = kps.view(batch, K, num_joints, 2).permute(
18 | 0, 2, 1, 3).contiguous() # b x J x K x 2
19 | reg_kps = kps.unsqueeze(3).expand(batch, num_joints, K, K, 2)
20 | hm_score, hm_inds, hm_ys, hm_xs = _topk_channel(hm_hp, K=K) # b x J x K
21 | if 'hp_offset' in output or 'reg' in output:
22 | hp_offset = output['hp_offset'] if 'hp_offset' in output \
23 | else output['reg']
24 | hp_offset = _tranpose_and_gather_feat(
25 | hp_offset, hm_inds.view(batch, -1))
26 | hp_offset = hp_offset.view(batch, num_joints, K, 2)
27 | hm_xs = hm_xs + hp_offset[:, :, :, 0]
28 | hm_ys = hm_ys + hp_offset[:, :, :, 1]
29 | else:
30 | hm_xs = hm_xs + 0.5
31 | hm_ys = hm_ys + 0.5
32 |
33 | mask = (hm_score > thresh).float()
34 | hm_score = (1 - mask) * -1 + mask * hm_score
35 | hm_ys = (1 - mask) * (-10000) + mask * hm_ys
36 | hm_xs = (1 - mask) * (-10000) + mask * hm_xs
37 | hm_kps = torch.stack([hm_xs, hm_ys], dim=-1).unsqueeze(
38 | 2).expand(batch, num_joints, K, K, 2)
39 | dist = (((reg_kps - hm_kps) ** 2).sum(dim=4) ** 0.5)
40 | min_dist, min_ind = dist.min(dim=3) # b x J x K
41 | hm_score = hm_score.gather(2, min_ind).unsqueeze(-1) # b x J x K x 1
42 | min_dist = min_dist.unsqueeze(-1)
43 | min_ind = min_ind.view(batch, num_joints, K, 1, 1).expand(
44 | batch, num_joints, K, 1, 2)
45 | hm_kps = hm_kps.gather(3, min_ind)
46 | hm_kps = hm_kps.view(batch, num_joints, K, 2)
47 | mask = (hm_score < thresh)
48 |
49 | if bboxes is not None:
50 | l = bboxes[:, :, 0].view(batch, 1, K, 1).expand(batch, num_joints, K, 1)
51 | t = bboxes[:, :, 1].view(batch, 1, K, 1).expand(batch, num_joints, K, 1)
52 | r = bboxes[:, :, 2].view(batch, 1, K, 1).expand(batch, num_joints, K, 1)
53 | b = bboxes[:, :, 3].view(batch, 1, K, 1).expand(batch, num_joints, K, 1)
54 | mask = (hm_kps[..., 0:1] < l) + (hm_kps[..., 0:1] > r) + \
55 | (hm_kps[..., 1:2] < t) + (hm_kps[..., 1:2] > b) + mask
56 | else:
57 | l = kps[:, :, :, 0:1].min(dim=1, keepdim=True)[0]
58 | t = kps[:, :, :, 1:2].min(dim=1, keepdim=True)[0]
59 | r = kps[:, :, :, 0:1].max(dim=1, keepdim=True)[0]
60 | b = kps[:, :, :, 1:2].max(dim=1, keepdim=True)[0]
61 | margin = 0.25
62 | l = l - (r - l) * margin
63 | r = r + (r - l) * margin
64 | t = t - (b - t) * margin
65 | b = b + (b - t) * margin
66 | mask = (hm_kps[..., 0:1] < l) + (hm_kps[..., 0:1] > r) + \
67 | (hm_kps[..., 1:2] < t) + (hm_kps[..., 1:2] > b) + mask
68 | # sc = (kps[:, :, :, :].max(dim=1, keepdim=True) - kps[:, :, :, :].min(dim=1))
69 | # mask = mask + (min_dist > 10)
70 | mask = (mask > 0).float()
71 | kps_score = (1 - mask) * hm_score + mask * \
72 | scores.unsqueeze(-1).expand(batch, num_joints, K, 1) # bJK1
73 | kps_score = scores * kps_score.mean(dim=1).view(batch, K)
74 | # kps_score[scores < 0.1] = 0
75 | mask = mask.expand(batch, num_joints, K, 2)
76 | kps = (1 - mask) * hm_kps + mask * kps
77 | kps = kps.permute(0, 2, 1, 3).contiguous().view(
78 | batch, K, num_joints * 2)
79 | return kps, kps_score
80 | else:
81 | return kps, kps
82 |
83 | def generic_decode(output, K=100, opt=None):
84 | if not ('hm' in output):
85 | return {}
86 |
87 | if opt.zero_tracking:
88 | output['tracking'] *= 0
89 |
90 | heat = output['hm']
91 | batch, cat, height, width = heat.size()
92 |
93 | heat = _nms(heat)
94 | scores, inds, clses, ys0, xs0 = _topk(heat, K=K)
95 |
96 | clses = clses.view(batch, K)
97 | scores = scores.view(batch, K)
98 | bboxes = None
99 | cts = torch.cat([xs0.unsqueeze(2), ys0.unsqueeze(2)], dim=2)
100 | ret = {'scores': scores, 'clses': clses.float(),
101 | 'xs': xs0, 'ys': ys0, 'cts': cts}
102 | if 'reg' in output:
103 | reg = output['reg']
104 | reg = _tranpose_and_gather_feat(reg, inds)
105 | reg = reg.view(batch, K, 2)
106 | xs = xs0.view(batch, K, 1) + reg[:, :, 0:1]
107 | ys = ys0.view(batch, K, 1) + reg[:, :, 1:2]
108 | else:
109 | xs = xs0.view(batch, K, 1) + 0.5
110 | ys = ys0.view(batch, K, 1) + 0.5
111 |
112 | if 'wh' in output:
113 | wh = output['wh']
114 | wh = _tranpose_and_gather_feat(wh, inds) # B x K x (F)
115 | # wh = wh.view(batch, K, -1)
116 | wh = wh.view(batch, K, 2)
117 | wh[wh < 0] = 0
118 | if wh.size(2) == 2 * cat: # cat spec
119 | wh = wh.view(batch, K, -1, 2)
120 | cats = clses.view(batch, K, 1, 1).expand(batch, K, 1, 2)
121 | wh = wh.gather(2, cats.long()).squeeze(2) # B x K x 2
122 | else:
123 | pass
124 | bboxes = torch.cat([xs - wh[..., 0:1] / 2,
125 | ys - wh[..., 1:2] / 2,
126 | xs + wh[..., 0:1] / 2,
127 | ys + wh[..., 1:2] / 2], dim=2)
128 | ret['bboxes'] = bboxes
129 | # print('ret bbox', ret['bboxes'])
130 |
131 | if 'ltrb' in output:
132 | ltrb = output['ltrb']
133 | ltrb = _tranpose_and_gather_feat(ltrb, inds) # B x K x 4
134 | ltrb = ltrb.view(batch, K, 4)
135 | bboxes = torch.cat([xs0.view(batch, K, 1) + ltrb[..., 0:1],
136 | ys0.view(batch, K, 1) + ltrb[..., 1:2],
137 | xs0.view(batch, K, 1) + ltrb[..., 2:3],
138 | ys0.view(batch, K, 1) + ltrb[..., 3:4]], dim=2)
139 | ret['bboxes'] = bboxes
140 |
141 |
142 | regression_heads = ['tracking', 'dep', 'rot', 'dim', 'amodel_offset',
143 | 'nuscenes_att', 'velocity']
144 |
145 | for head in regression_heads:
146 | if head in output:
147 | ret[head] = _tranpose_and_gather_feat(
148 | output[head], inds).view(batch, K, -1)
149 |
150 | if 'ltrb_amodal' in output:
151 | ltrb_amodal = output['ltrb_amodal']
152 | ltrb_amodal = _tranpose_and_gather_feat(ltrb_amodal, inds) # B x K x 4
153 | ltrb_amodal = ltrb_amodal.view(batch, K, 4)
154 | bboxes_amodal = torch.cat([xs0.view(batch, K, 1) + ltrb_amodal[..., 0:1],
155 | ys0.view(batch, K, 1) + ltrb_amodal[..., 1:2],
156 | xs0.view(batch, K, 1) + ltrb_amodal[..., 2:3],
157 | ys0.view(batch, K, 1) + ltrb_amodal[..., 3:4]], dim=2)
158 | ret['bboxes_amodal'] = bboxes_amodal
159 | ret['bboxes'] = bboxes_amodal
160 |
161 | if 'hps' in output:
162 | kps = output['hps']
163 | num_joints = kps.shape[1] // 2
164 | kps = _tranpose_and_gather_feat(kps, inds)
165 | kps = kps.view(batch, K, num_joints * 2)
166 | kps[..., ::2] += xs0.view(batch, K, 1).expand(batch, K, num_joints)
167 | kps[..., 1::2] += ys0.view(batch, K, 1).expand(batch, K, num_joints)
168 | kps, kps_score = _update_kps_with_hm(
169 | kps, output, batch, num_joints, K, bboxes, scores)
170 | ret['hps'] = kps
171 | ret['kps_score'] = kps_score
172 |
173 | if 'pre_inds' in output and output['pre_inds'] is not None:
174 | pre_inds = output['pre_inds'] # B x pre_K
175 | pre_K = pre_inds.shape[1]
176 | pre_ys = (pre_inds / width).int().float()
177 | pre_xs = (pre_inds % width).int().float()
178 |
179 | ret['pre_cts'] = torch.cat(
180 | [pre_xs.unsqueeze(2), pre_ys.unsqueeze(2)], dim=2)
181 |
182 | return ret
183 |
--------------------------------------------------------------------------------
/src/lib/model/losses.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------------
2 | # Portions of this code are from
3 | # CornerNet (https://github.com/princeton-vl/CornerNet)
4 | # Copyright (c) 2018, University of Michigan
5 | # Licensed under the BSD 3-Clause License
6 | # ------------------------------------------------------------------------------
7 | from __future__ import absolute_import
8 | from __future__ import division
9 | from __future__ import print_function
10 |
11 | import torch
12 | import torch.nn as nn
13 | from .utils import _tranpose_and_gather_feat, _nms, _topk
14 | import torch.nn.functional as F
15 | from utils.image import draw_umich_gaussian
16 |
17 | def _slow_neg_loss(pred, gt):
18 | '''focal loss from CornerNet'''
19 | pos_inds = gt.eq(1).float()
20 | neg_inds = gt.lt(1).float()
21 |
22 | neg_weights = torch.pow(1 - gt[neg_inds], 4)
23 |
24 | loss = 0
25 | pos_pred = pred[pos_inds]
26 | neg_pred = pred[neg_inds]
27 |
28 | pos_loss = torch.log(pos_pred) * torch.pow(1 - pos_pred, 2)
29 | neg_loss = torch.log(1 - neg_pred) * torch.pow(neg_pred, 2) * neg_weights
30 |
31 | num_pos = pos_inds.float().sum()
32 | pos_loss = pos_loss.sum()
33 | neg_loss = neg_loss.sum()
34 |
35 | if pos_pred.nelement() == 0:
36 | loss = loss - neg_loss
37 | else:
38 | loss = loss - (pos_loss + neg_loss) / num_pos
39 | return loss
40 |
41 | def _neg_loss(pred, gt):
42 | ''' Reimplemented focal loss. Exactly the same as CornerNet.
43 | Runs faster and costs a little bit more memory
44 | Arguments:
45 | pred (batch x c x h x w)
46 | gt_regr (batch x c x h x w)
47 | '''
48 | pos_inds = gt.eq(1).float()
49 | neg_inds = gt.lt(1).float()
50 |
51 | neg_weights = torch.pow(1 - gt, 4)
52 |
53 | loss = 0
54 | pos_loss = torch.log(pred) * torch.pow(1 - pred, 2) * pos_inds
55 | neg_loss = torch.log(1 - pred) * torch.pow(pred, 2) * neg_weights * neg_inds
56 |
57 | num_pos = pos_inds.float().sum()
58 | pos_loss = pos_loss.sum()
59 | neg_loss = neg_loss.sum()
60 | if num_pos == 0:
61 | loss = loss - neg_loss
62 | else:
63 | loss = loss - (pos_loss + neg_loss) / num_pos
64 | return loss
65 |
66 |
67 | def _only_neg_loss(pred, gt):
68 | gt = torch.pow(1 - gt, 4)
69 | neg_loss = torch.log(1 - pred) * torch.pow(pred, 2) * gt
70 | return neg_loss.sum()
71 |
72 | class FastFocalLoss(nn.Module):
73 | '''
74 | Reimplemented focal loss, exactly the same as the CornerNet version.
75 | Faster and costs much less memory.
76 | '''
77 | def __init__(self, opt=None):
78 | super(FastFocalLoss, self).__init__()
79 | self.only_neg_loss = _only_neg_loss
80 |
81 | def forward(self, out, target, ind, mask, cat):
82 | '''
83 | Arguments:
84 | out, target: B x C x H x W
85 | ind, mask: B x M
86 | cat (category id for peaks): B x M
87 | '''
88 | neg_loss = self.only_neg_loss(out, target)
89 | pos_pred_pix = _tranpose_and_gather_feat(out, ind) # B x M x C
90 | pos_pred = pos_pred_pix.gather(2, cat.unsqueeze(2)) # B x M
91 | num_pos = mask.sum()
92 | pos_loss = torch.log(pos_pred) * torch.pow(1 - pos_pred, 2) * \
93 | mask.unsqueeze(2)
94 | pos_loss = pos_loss.sum()
95 | if num_pos == 0:
96 | return - neg_loss
97 | return - (pos_loss + neg_loss) / num_pos
98 |
99 | def _reg_loss(regr, gt_regr, mask):
100 | ''' L1 regression loss
101 | Arguments:
102 | regr (batch x max_objects x dim)
103 | gt_regr (batch x max_objects x dim)
104 | mask (batch x max_objects)
105 | '''
106 | num = mask.float().sum()
107 | mask = mask.unsqueeze(2).expand_as(gt_regr).float()
108 |
109 | regr = regr * mask
110 | gt_regr = gt_regr * mask
111 |
112 | regr_loss = nn.functional.smooth_l1_loss(regr, gt_regr, reduction='sum')
113 | regr_loss = regr_loss / (num + 1e-4)
114 | return regr_loss
115 |
116 |
117 | class RegWeightedL1Loss(nn.Module):
118 | def __init__(self):
119 | super(RegWeightedL1Loss, self).__init__()
120 |
121 | def forward(self, output, mask, ind, target):
122 | pred = _tranpose_and_gather_feat(output, ind)
123 | # loss = F.l1_loss(pred * mask, target * mask, reduction='elementwise_mean')
124 | loss = F.l1_loss(pred * mask, target * mask, reduction='sum')
125 | loss = loss / (mask.sum() + 1e-4)
126 | return loss
127 |
128 |
129 | class WeightedBCELoss(nn.Module):
130 | def __init__(self):
131 | super(WeightedBCELoss, self).__init__()
132 | self.bceloss = torch.nn.BCEWithLogitsLoss(reduction='none')
133 |
134 | def forward(self, output, mask, ind, target):
135 | # output: B x F x H x W
136 | # ind: B x M
137 | # mask: B x M x F
138 | # target: B x M x F
139 | pred = _tranpose_and_gather_feat(output, ind) # B x M x F
140 | loss = mask * self.bceloss(pred, target)
141 | loss = loss.sum() / (mask.sum() + 1e-4)
142 | return loss
143 |
144 | class BinRotLoss(nn.Module):
145 | def __init__(self):
146 | super(BinRotLoss, self).__init__()
147 |
148 | def forward(self, output, mask, ind, rotbin, rotres):
149 | pred = _tranpose_and_gather_feat(output, ind)
150 | loss = compute_rot_loss(pred, rotbin, rotres, mask)
151 | return loss
152 |
153 | def compute_res_loss(output, target):
154 | return F.smooth_l1_loss(output, target, reduction='elementwise_mean')
155 |
156 | def compute_bin_loss(output, target, mask):
157 | mask = mask.expand_as(output)
158 | output = output * mask.float()
159 | return F.cross_entropy(output, target, reduction='elementwise_mean')
160 |
161 | def compute_rot_loss(output, target_bin, target_res, mask):
162 | # output: (B, 128, 8) [bin1_cls[0], bin1_cls[1], bin1_sin, bin1_cos,
163 | # bin2_cls[0], bin2_cls[1], bin2_sin, bin2_cos]
164 | # target_bin: (B, 128, 2) [bin1_cls, bin2_cls]
165 | # target_res: (B, 128, 2) [bin1_res, bin2_res]
166 | # mask: (B, 128, 1)
167 | output = output.view(-1, 8)
168 | target_bin = target_bin.view(-1, 2)
169 | target_res = target_res.view(-1, 2)
170 | mask = mask.view(-1, 1)
171 | loss_bin1 = compute_bin_loss(output[:, 0:2], target_bin[:, 0], mask)
172 | loss_bin2 = compute_bin_loss(output[:, 4:6], target_bin[:, 1], mask)
173 | loss_res = torch.zeros_like(loss_bin1)
174 | if target_bin[:, 0].nonzero().shape[0] > 0:
175 | idx1 = target_bin[:, 0].nonzero()[:, 0]
176 | valid_output1 = torch.index_select(output, 0, idx1.long())
177 | valid_target_res1 = torch.index_select(target_res, 0, idx1.long())
178 | loss_sin1 = compute_res_loss(
179 | valid_output1[:, 2], torch.sin(valid_target_res1[:, 0]))
180 | loss_cos1 = compute_res_loss(
181 | valid_output1[:, 3], torch.cos(valid_target_res1[:, 0]))
182 | loss_res += loss_sin1 + loss_cos1
183 | if target_bin[:, 1].nonzero().shape[0] > 0:
184 | idx2 = target_bin[:, 1].nonzero()[:, 0]
185 | valid_output2 = torch.index_select(output, 0, idx2.long())
186 | valid_target_res2 = torch.index_select(target_res, 0, idx2.long())
187 | loss_sin2 = compute_res_loss(
188 | valid_output2[:, 6], torch.sin(valid_target_res2[:, 1]))
189 | loss_cos2 = compute_res_loss(
190 | valid_output2[:, 7], torch.cos(valid_target_res2[:, 1]))
191 | loss_res += loss_sin2 + loss_cos2
192 | return loss_bin1 + loss_bin2 + loss_res
--------------------------------------------------------------------------------
/src/lib/model/model.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import torchvision.models as models
6 | import torch
7 | import torch.nn as nn
8 | import os
9 |
10 | from .networks.dla import DLASeg
11 | from .networks.resdcn import PoseResDCN
12 | from .networks.resnet import PoseResNet
13 | from .networks.dlav0 import DLASegv0
14 | from .networks.generic_network import GenericNetwork
15 |
16 | _network_factory = {
17 | 'resdcn': PoseResDCN,
18 | 'dla': DLASeg,
19 | 'res': PoseResNet,
20 | 'dlav0': DLASegv0,
21 | 'generic': GenericNetwork
22 | }
23 |
24 | def create_model(arch, head, head_conv, opt=None):
25 | num_layers = int(arch[arch.find('_') + 1:]) if '_' in arch else 0
26 | arch = arch[:arch.find('_')] if '_' in arch else arch
27 | model_class = _network_factory[arch]
28 | model = model_class(num_layers, heads=head, head_convs=head_conv, opt=opt)
29 | return model
30 |
31 | def load_model(model, model_path, opt, optimizer=None):
32 | start_epoch = 0
33 | checkpoint = torch.load(model_path, map_location=lambda storage, loc: storage)
34 | print('loaded {}, epoch {}'.format(model_path, checkpoint['epoch']))
35 | state_dict_ = checkpoint['state_dict']
36 | state_dict = {}
37 |
38 | # convert data_parallal to model
39 | for k in state_dict_:
40 | if k.startswith('module') and not k.startswith('module_list'):
41 | state_dict[k[7:]] = state_dict_[k]
42 | else:
43 | state_dict[k] = state_dict_[k]
44 | model_state_dict = model.state_dict()
45 |
46 | # check loaded parameters and created model parameters
47 | for k in state_dict:
48 | if k in model_state_dict:
49 | if (state_dict[k].shape != model_state_dict[k].shape) or \
50 | (opt.reset_hm and k.startswith('hm') and (state_dict[k].shape[0] in [80, 1])):
51 | if opt.reuse_hm:
52 | print('Reusing parameter {}, required shape{}, '\
53 | 'loaded shape{}.'.format(
54 | k, model_state_dict[k].shape, state_dict[k].shape))
55 | if state_dict[k].shape[0] < state_dict[k].shape[0]:
56 | model_state_dict[k][:state_dict[k].shape[0]] = state_dict[k]
57 | else:
58 | model_state_dict[k] = state_dict[k][:model_state_dict[k].shape[0]]
59 | state_dict[k] = model_state_dict[k]
60 | else:
61 | print('Skip loading parameter {}, required shape{}, '\
62 | 'loaded shape{}.'.format(
63 | k, model_state_dict[k].shape, state_dict[k].shape))
64 | state_dict[k] = model_state_dict[k]
65 | else:
66 | print('Drop parameter {}.'.format(k))
67 | for k in model_state_dict:
68 | if not (k in state_dict):
69 | print('No param {}.'.format(k))
70 | state_dict[k] = model_state_dict[k]
71 | model.load_state_dict(state_dict, strict=False)
72 |
73 | # resume optimizer parameters
74 | if optimizer is not None and opt.resume:
75 | if 'optimizer' in checkpoint:
76 | # optimizer.load_state_dict(checkpoint['optimizer'])
77 | start_epoch = checkpoint['epoch']
78 | start_lr = opt.lr
79 | for step in opt.lr_step:
80 | if start_epoch >= step:
81 | start_lr *= 0.1
82 | for param_group in optimizer.param_groups:
83 | param_group['lr'] = start_lr
84 | print('Resumed optimizer with start lr', start_lr)
85 | else:
86 | print('No optimizer parameters in checkpoint.')
87 | if optimizer is not None:
88 | return model, optimizer, start_epoch
89 | else:
90 | return model
91 |
92 | def save_model(path, epoch, model, optimizer=None):
93 | if isinstance(model, torch.nn.DataParallel):
94 | state_dict = model.module.state_dict()
95 | else:
96 | state_dict = model.state_dict()
97 | data = {'epoch': epoch,
98 | 'state_dict': state_dict}
99 | if not (optimizer is None):
100 | data['optimizer'] = optimizer.state_dict()
101 | torch.save(data, path)
102 |
103 |
--------------------------------------------------------------------------------
/src/lib/model/networks/backbones/mobilenet.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import os
6 | import math
7 | import logging
8 | import numpy as np
9 |
10 | import torch
11 | from torch import nn
12 | import torch.nn.functional as F
13 | import torch.utils.model_zoo as model_zoo
14 | from torchvision.models.utils import load_state_dict_from_url
15 |
16 | BN_MOMENTUM = 0.1
17 |
18 | model_urls = {
19 | 'mobilenet_v2': 'https://download.pytorch.org/models/mobilenet_v2-b0353104.pth',
20 | }
21 |
22 | def _make_divisible(v, divisor, min_value=None):
23 | """
24 | This function is taken from the original tf repo.
25 | It ensures that all layers have a channel number that is divisible by 8
26 | It can be seen here:
27 | https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
28 | :param v:
29 | :param divisor:
30 | :param min_value:
31 | :return:
32 | """
33 | if min_value is None:
34 | min_value = divisor
35 | new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
36 | # Make sure that round down does not go down by more than 10%.
37 | if new_v < 0.9 * v:
38 | new_v += divisor
39 | return new_v
40 |
41 |
42 | class ConvBNReLU(nn.Sequential):
43 | def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1):
44 | padding = (kernel_size - 1) // 2
45 | super(ConvBNReLU, self).__init__(
46 | nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False),
47 | nn.BatchNorm2d(out_planes),
48 | nn.ReLU6(inplace=True)
49 | )
50 |
51 |
52 | class InvertedResidual(nn.Module):
53 | def __init__(self, inp, oup, stride, expand_ratio):
54 | super(InvertedResidual, self).__init__()
55 | self.stride = stride
56 | assert stride in [1, 2]
57 |
58 | hidden_dim = int(round(inp * expand_ratio))
59 | self.use_res_connect = self.stride == 1 and inp == oup
60 |
61 | layers = []
62 | if expand_ratio != 1:
63 | # pw
64 | layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1))
65 | layers.extend([
66 | # dw
67 | ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim),
68 | # pw-linear
69 | nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
70 | nn.BatchNorm2d(oup),
71 | ])
72 | self.conv = nn.Sequential(*layers)
73 |
74 | def forward(self, x):
75 | if self.use_res_connect:
76 | return x + self.conv(x)
77 | else:
78 | return self.conv(x)
79 |
80 |
81 | class MobileNetV2(nn.Module):
82 | def __init__(self, opt,
83 | width_mult=1.0,
84 | round_nearest=8,
85 | block=None):
86 | """
87 | MobileNet V2 main class
88 | Args:
89 | num_classes (int): Number of classes
90 | width_mult (float): Width multiplier - adjusts number of channels in each layer by this amount
91 | round_nearest (int): Round the number of channels in each layer to be a multiple of this number
92 | Set to 1 to turn off rounding
93 | block: Module specifying inverted residual building block for mobilenet
94 | """
95 | super().__init__()
96 | if block is None:
97 | block = InvertedResidual
98 | input_channel = 32
99 | last_channel = 1280
100 |
101 | inverted_residual_setting = [
102 | # t, c, n, s
103 | [1, 16, 1, 1], # 1
104 | [6, 24, 2, 2], # 2
105 | [6, 32, 3, 2], # 3
106 | [6, 64, 4, 2], # 4
107 | [6, 96, 3, 1], # 5
108 | [6, 160, 3, 2],# 6
109 | [6, 320, 1, 1],# 7
110 | ]
111 |
112 | # only check the first element, assuming user knows t,c,n,s are required
113 | if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4:
114 | raise ValueError("inverted_residual_setting should be non-empty "
115 | "or a 4-element list, got {}".format(inverted_residual_setting))
116 |
117 | # building first layer
118 | input_channel = _make_divisible(input_channel * width_mult, round_nearest)
119 | # self.last_channel = _make_divisible(last_channel * max(1.0, width_mult), round_nearest)
120 | if opt.pre_img:
121 | print('adding pre_img layer...')
122 | self.pre_img_layer = nn.Sequential(
123 | nn.Conv2d(3, input_channel, kernel_size=3, padding=1, stride=2, bias=False),
124 | nn.BatchNorm2d(input_channel))
125 | if opt.pre_hm:
126 | print('adding pre_hm layer...')
127 | self.pre_hm_layer = nn.Sequential(
128 | nn.Conv2d(1, input_channel, kernel_size=3, padding=1, stride=2, bias=False),
129 | nn.BatchNorm2d(input_channel))
130 | features = [ConvBNReLU(3, input_channel, stride=2)]
131 | self.key_block = [True]
132 | all_channels = [input_channel]
133 | self.channels = [input_channel]
134 | # building inverted residual blocks
135 | for t, c, n, s in inverted_residual_setting:
136 | output_channel = _make_divisible(c * width_mult, round_nearest)
137 | for i in range(n):
138 | stride = s if i == 0 else 1
139 | features.append(block(input_channel, output_channel, stride, expand_ratio=t))
140 | input_channel = output_channel
141 | if stride == 2:
142 | self.key_block.append(True)
143 | else:
144 | self.key_block.append(False)
145 | all_channels.append(output_channel)
146 | # building last several layers
147 | # features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size=1))
148 | # self.key_block.append(False)
149 | # all_channels.append(self.last_channel)
150 | for i in range(len(self.key_block) - 1):
151 | if self.key_block[i + 1]:
152 | self.key_block[i] = True
153 | self.key_block[i + 1] = False
154 | self.channels.append(all_channels[i])
155 | self.key_block[-1] = True
156 | self.channels.append(all_channels[-1])
157 | print('channels', self.channels)
158 | # make it nn.Sequential
159 | self.features = nn.ModuleList(features)
160 | print('len(self.features)', len(self.features))
161 | # self.channels = [, ]
162 |
163 | # weight initialization
164 | for m in self.modules():
165 | if isinstance(m, nn.Conv2d):
166 | nn.init.kaiming_normal_(m.weight, mode='fan_out')
167 | if m.bias is not None:
168 | nn.init.zeros_(m.bias)
169 | elif isinstance(m, nn.BatchNorm2d):
170 | nn.init.ones_(m.weight)
171 | nn.init.zeros_(m.bias)
172 | elif isinstance(m, nn.Linear):
173 | nn.init.normal_(m.weight, 0, 0.01)
174 | nn.init.zeros_(m.bias)
175 | state_dict = load_state_dict_from_url(model_urls['mobilenet_v2'])
176 | self.load_state_dict(state_dict, strict=False)
177 |
178 | def forward(self, inputs, pre_img=None, pre_hm=None):
179 | x = self.features[0](inputs)
180 | if pre_img is not None:
181 | x = x + self.pre_img_layer(pre_img)
182 | if pre_hm is not None:
183 | x = x + self.pre_hm_layer(pre_hm)
184 | y = [x]
185 | for i in range(1, len(self.features)):
186 | x = self.features[i](x)
187 | # print('i, shape, is_key', i, x.shape, self.key_block[i])
188 | if self.key_block[i]:
189 | y.append(x)
190 | return y
191 |
192 |
--------------------------------------------------------------------------------
/src/lib/model/networks/backbones/resnet.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import os
6 | import math
7 | import logging
8 | import numpy as np
9 |
10 | import torch
11 | from torch import nn
12 | import torch.nn.functional as F
13 | import torch.utils.model_zoo as model_zoo
14 |
15 | BN_MOMENTUM = 0.1
16 |
17 | model_urls = {
18 | 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
19 | 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
20 | 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
21 | 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
22 | 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
23 | }
24 |
25 | def conv3x3(in_planes, out_planes, stride=1):
26 | """3x3 convolution with padding"""
27 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
28 | padding=1, bias=False)
29 |
30 |
31 | class BasicBlock(nn.Module):
32 | expansion = 1
33 |
34 | def __init__(self, inplanes, planes, stride=1, downsample=None):
35 | super(BasicBlock, self).__init__()
36 | self.conv1 = conv3x3(inplanes, planes, stride)
37 | self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
38 | self.relu = nn.ReLU(inplace=True)
39 | self.conv2 = conv3x3(planes, planes)
40 | self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
41 | self.downsample = downsample
42 | self.stride = stride
43 |
44 | def forward(self, x):
45 | residual = x
46 |
47 | out = self.conv1(x)
48 | out = self.bn1(out)
49 | out = self.relu(out)
50 |
51 | out = self.conv2(out)
52 | out = self.bn2(out)
53 |
54 | if self.downsample is not None:
55 | residual = self.downsample(x)
56 |
57 | out += residual
58 | out = self.relu(out)
59 |
60 | return out
61 |
62 |
63 | class Bottleneck(nn.Module):
64 | expansion = 4
65 |
66 | def __init__(self, inplanes, planes, stride=1, downsample=None):
67 | super(Bottleneck, self).__init__()
68 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
69 | self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
70 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
71 | padding=1, bias=False)
72 | self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
73 | self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1,
74 | bias=False)
75 | self.bn3 = nn.BatchNorm2d(planes * self.expansion,
76 | momentum=BN_MOMENTUM)
77 | self.relu = nn.ReLU(inplace=True)
78 | self.downsample = downsample
79 | self.stride = stride
80 |
81 | def forward(self, x):
82 | residual = x
83 |
84 | out = self.conv1(x)
85 | out = self.bn1(out)
86 | out = self.relu(out)
87 |
88 | out = self.conv2(out)
89 | out = self.bn2(out)
90 | out = self.relu(out)
91 |
92 | out = self.conv3(out)
93 | out = self.bn3(out)
94 |
95 | if self.downsample is not None:
96 | residual = self.downsample(x)
97 |
98 | out += residual
99 | out = self.relu(out)
100 |
101 | return out
102 |
103 |
104 | resnet_spec = {18: (BasicBlock, [2, 2, 2, 2]),
105 | 34: (BasicBlock, [3, 4, 6, 3]),
106 | 50: (Bottleneck, [3, 4, 6, 3]),
107 | 101: (Bottleneck, [3, 4, 23, 3]),
108 | 152: (Bottleneck, [3, 8, 36, 3])}
109 |
110 | class Resnet(nn.Module):
111 | def __init__(self, opt):
112 | super().__init__()
113 | assert (not opt.pre_hm) and (not opt.pre_img)
114 | self.inplanes = 64
115 | block, layers = resnet_spec[opt.num_layers]
116 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
117 | bias=False)
118 | self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
119 | self.relu = nn.ReLU(inplace=True)
120 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
121 | self.layer1 = self._make_layer(block, 64, layers[0])
122 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
123 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
124 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
125 |
126 | self.channels = [64, 64,
127 | 64 * block.expansion,
128 | 128 * block.expansion,
129 | 256 * block.expansion,
130 | 512 * block.expansion]
131 |
132 | self._init_weights(opt.num_layers)
133 |
134 |
135 | def _make_layer(self, block, planes, blocks, stride=1):
136 | downsample = None
137 | if stride != 1 or self.inplanes != planes * block.expansion:
138 | downsample = nn.Sequential(
139 | nn.Conv2d(self.inplanes, planes * block.expansion,
140 | kernel_size=1, stride=stride, bias=False),
141 | nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM),
142 | )
143 |
144 | layers = []
145 | layers.append(block(self.inplanes, planes, stride, downsample))
146 | self.inplanes = planes * block.expansion
147 | for i in range(1, blocks):
148 | layers.append(block(self.inplanes, planes))
149 |
150 | return nn.Sequential(*layers)
151 |
152 | def forward(self, x):
153 | x = self.conv1(x)
154 | x = self.bn1(x)
155 | x = self.relu(x)
156 | y = [x]
157 | x = self.maxpool(x)
158 | y.append(x)
159 |
160 | x = self.layer1(x)
161 | y.append(x)
162 | x = self.layer2(x)
163 | y.append(x)
164 | x = self.layer3(x)
165 | y.append(x)
166 | x = self.layer4(x)
167 | y.append(x)
168 |
169 | return y
170 |
171 | def _init_weights(self, num_layers):
172 | url = model_urls['resnet{}'.format(num_layers)]
173 | pretrained_state_dict = model_zoo.load_url(url)
174 | print('=> loading pretrained model {}'.format(url))
175 | self.load_state_dict(pretrained_state_dict, strict=False)
--------------------------------------------------------------------------------
/src/lib/model/networks/base_model.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import torch
6 | from torch import nn
7 |
8 | def fill_fc_weights(layers):
9 | for m in layers.modules():
10 | if isinstance(m, nn.Conv2d):
11 | if m.bias is not None:
12 | nn.init.constant_(m.bias, 0)
13 |
14 | class BaseModel(nn.Module):
15 | def __init__(self, heads, head_convs, num_stacks, last_channel, opt=None):
16 | super(BaseModel, self).__init__()
17 | if opt is not None and opt.head_kernel != 3:
18 | print('Using head kernel:', opt.head_kernel)
19 | head_kernel = opt.head_kernel
20 | else:
21 | head_kernel = 3
22 | self.num_stacks = num_stacks
23 | self.heads = heads
24 | for head in self.heads:
25 | classes = self.heads[head]
26 | head_conv = head_convs[head]
27 | if len(head_conv) > 0:
28 | out = nn.Conv2d(head_conv[-1], classes,
29 | kernel_size=1, stride=1, padding=0, bias=True)
30 | conv = nn.Conv2d(last_channel, head_conv[0],
31 | kernel_size=head_kernel,
32 | padding=head_kernel // 2, bias=True)
33 | convs = [conv]
34 | for k in range(1, len(head_conv)):
35 | convs.append(nn.Conv2d(head_conv[k - 1], head_conv[k],
36 | kernel_size=1, bias=True))
37 | if len(convs) == 1:
38 | fc = nn.Sequential(conv, nn.ReLU(inplace=True), out)
39 | elif len(convs) == 2:
40 | fc = nn.Sequential(
41 | convs[0], nn.ReLU(inplace=True),
42 | convs[1], nn.ReLU(inplace=True), out)
43 | elif len(convs) == 3:
44 | fc = nn.Sequential(
45 | convs[0], nn.ReLU(inplace=True),
46 | convs[1], nn.ReLU(inplace=True),
47 | convs[2], nn.ReLU(inplace=True), out)
48 | elif len(convs) == 4:
49 | fc = nn.Sequential(
50 | convs[0], nn.ReLU(inplace=True),
51 | convs[1], nn.ReLU(inplace=True),
52 | convs[2], nn.ReLU(inplace=True),
53 | convs[3], nn.ReLU(inplace=True), out)
54 | if 'hm' in head:
55 | fc[-1].bias.data.fill_(opt.prior_bias)
56 | else:
57 | fill_fc_weights(fc)
58 | else:
59 | fc = nn.Conv2d(last_channel, classes,
60 | kernel_size=1, stride=1, padding=0, bias=True)
61 | if 'hm' in head:
62 | fc.bias.data.fill_(opt.prior_bias)
63 | else:
64 | fill_fc_weights(fc)
65 | self.__setattr__(head, fc)
66 |
67 | def img2feats(self, x):
68 | raise NotImplementedError
69 |
70 | def imgpre2feats(self, x, pre_img=None, pre_hm=None):
71 | raise NotImplementedError
72 |
73 | def forward(self, x, pre_img=None, pre_hm=None):
74 | if (pre_hm is not None) or (pre_img is not None):
75 | feats = self.imgpre2feats(x, pre_img, pre_hm)
76 | else:
77 | feats = self.img2feats(x)
78 | out = []
79 | if self.opt.model_output_list:
80 | for s in range(self.num_stacks):
81 | z = []
82 | for head in sorted(self.heads):
83 | z.append(self.__getattr__(head)(feats[s]))
84 | out.append(z)
85 | else:
86 | for s in range(self.num_stacks):
87 | z = {}
88 | for head in self.heads:
89 | z[head] = self.__getattr__(head)(feats[s])
90 | out.append(z)
91 | return out
92 |
--------------------------------------------------------------------------------
/src/lib/model/networks/generic_network.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import torch
6 | from torch import nn
7 | from .backbones.dla import dla34
8 | from .backbones.resnet import Resnet
9 | from .backbones.mobilenet import MobileNetV2
10 | from .necks.dlaup import DLASeg
11 | from .necks.msraup import MSRAUp
12 |
13 | backbone_factory = {
14 | 'dla34': dla34,
15 | 'resnet': Resnet,
16 | 'mobilenet': MobileNetV2
17 | }
18 |
19 | neck_factory = {
20 | 'dlaup': DLASeg,
21 | 'msraup': MSRAUp
22 | }
23 |
24 | def fill_fc_weights(layers):
25 | for m in layers.modules():
26 | if isinstance(m, nn.Conv2d):
27 | if m.bias is not None:
28 | nn.init.constant_(m.bias, 0)
29 |
30 | class GenericNetwork(nn.Module):
31 | def __init__(self, num_layers, heads, head_convs, num_stacks=1, opt=None):
32 | super(GenericNetwork, self).__init__()
33 | print('Using generic model with backbone {} and neck {}'.format(
34 | opt.backbone, opt.neck))
35 | # assert (not opt.pre_hm) and (not opt.pre_img)
36 | if opt is not None and opt.head_kernel != 3:
37 | print('Using head kernel:', opt.head_kernel)
38 | head_kernel = opt.head_kernel
39 | else:
40 | head_kernel = 3
41 | self.opt = opt
42 | self.backbone = backbone_factory[opt.backbone](opt=opt)
43 | channels = self.backbone.channels
44 | self.neck = neck_factory[opt.neck](opt=opt, channels=channels)
45 | last_channel = self.neck.out_channel
46 | self.num_stacks = num_stacks
47 | self.heads = heads
48 | for head in self.heads:
49 | classes = self.heads[head]
50 | head_conv = head_convs[head]
51 | if len(head_conv) > 0:
52 | out = nn.Conv2d(head_conv[-1], classes,
53 | kernel_size=1, stride=1, padding=0, bias=True)
54 | conv = nn.Conv2d(last_channel, head_conv[0],
55 | kernel_size=head_kernel,
56 | padding=head_kernel // 2, bias=True)
57 | convs = [conv]
58 | for k in range(1, len(head_conv)):
59 | convs.append(nn.Conv2d(head_conv[k - 1], head_conv[k],
60 | kernel_size=1, bias=True))
61 | if len(convs) == 1:
62 | fc = nn.Sequential(conv, nn.ReLU(inplace=True), out)
63 | elif len(convs) == 2:
64 | fc = nn.Sequential(
65 | convs[0], nn.ReLU(inplace=True),
66 | convs[1], nn.ReLU(inplace=True), out)
67 | elif len(convs) == 3:
68 | fc = nn.Sequential(
69 | convs[0], nn.ReLU(inplace=True),
70 | convs[1], nn.ReLU(inplace=True),
71 | convs[2], nn.ReLU(inplace=True), out)
72 | elif len(convs) == 4:
73 | fc = nn.Sequential(
74 | convs[0], nn.ReLU(inplace=True),
75 | convs[1], nn.ReLU(inplace=True),
76 | convs[2], nn.ReLU(inplace=True),
77 | convs[3], nn.ReLU(inplace=True), out)
78 | if 'hm' in head:
79 | fc[-1].bias.data.fill_(opt.prior_bias)
80 | else:
81 | fill_fc_weights(fc)
82 | else:
83 | fc = nn.Conv2d(last_channel, classes,
84 | kernel_size=1, stride=1, padding=0, bias=True)
85 | if 'hm' in head:
86 | fc.bias.data.fill_(opt.prior_bias)
87 | else:
88 | fill_fc_weights(fc)
89 | self.__setattr__(head, fc)
90 |
91 | def forward(self, x, pre_img=None, pre_hm=None):
92 | y = self.backbone(x, pre_img, pre_hm)
93 | feats = self.neck(y)
94 | out = []
95 | if self.opt.model_output_list:
96 | for s in range(self.num_stacks):
97 | z = []
98 | for head in sorted(self.heads):
99 | z.append(self.__getattr__(head)(feats[s]))
100 | out.append(z)
101 | else:
102 | for s in range(self.num_stacks):
103 | z = {}
104 | for head in self.heads:
105 | z[head] = self.__getattr__(head)(feats[s])
106 | out.append(z)
107 | return out
108 |
--------------------------------------------------------------------------------
/src/lib/model/networks/necks/dlaup.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import os
6 | import math
7 | import logging
8 | import numpy as np
9 | from os.path import join
10 |
11 | import torch
12 | from torch import nn
13 | import torch.nn.functional as F
14 | import torch.utils.model_zoo as model_zoo
15 |
16 | try:
17 | from ..DCNv2.dcn_v2 import DCN
18 | except:
19 | print('import DCN failed')
20 | DCN = None
21 |
22 | BN_MOMENTUM = 0.1
23 |
24 | class Identity(nn.Module):
25 |
26 | def __init__(self):
27 | super(Identity, self).__init__()
28 |
29 | def forward(self, x):
30 | return x
31 |
32 |
33 | def fill_fc_weights(layers):
34 | for m in layers.modules():
35 | if isinstance(m, nn.Conv2d):
36 | if m.bias is not None:
37 | nn.init.constant_(m.bias, 0)
38 |
39 |
40 | def fill_up_weights(up):
41 | w = up.weight.data
42 | f = math.ceil(w.size(2) / 2)
43 | c = (2 * f - 1 - f % 2) / (2. * f)
44 | for i in range(w.size(2)):
45 | for j in range(w.size(3)):
46 | w[0, 0, i, j] = \
47 | (1 - math.fabs(i / f - c)) * (1 - math.fabs(j / f - c))
48 | for c in range(1, w.size(0)):
49 | w[c, 0, :, :] = w[0, 0, :, :]
50 |
51 |
52 | class Conv(nn.Module):
53 | def __init__(self, chi, cho):
54 | super(Conv, self).__init__()
55 | self.conv = nn.Sequential(
56 | nn.Conv2d(chi, cho, kernel_size=1, stride=1, bias=False),
57 | nn.BatchNorm2d(cho, momentum=BN_MOMENTUM),
58 | nn.ReLU(inplace=True))
59 |
60 | def forward(self, x):
61 | return self.conv(x)
62 |
63 |
64 | class GlobalConv(nn.Module):
65 | def __init__(self, chi, cho, k=7, d=1):
66 | super(GlobalConv, self).__init__()
67 | gcl = nn.Sequential(
68 | nn.Conv2d(chi, cho, kernel_size=(k, 1), stride=1, bias=False,
69 | dilation=d, padding=(d * (k // 2), 0)),
70 | nn.Conv2d(cho, cho, kernel_size=(1, k), stride=1, bias=False,
71 | dilation=d, padding=(0, d * (k // 2))))
72 | gcr = nn.Sequential(
73 | nn.Conv2d(chi, cho, kernel_size=(1, k), stride=1, bias=False,
74 | dilation=d, padding=(0, d * (k // 2))),
75 | nn.Conv2d(cho, cho, kernel_size=(k, 1), stride=1, bias=False,
76 | dilation=d, padding=(d * (k // 2), 0)))
77 | fill_fc_weights(gcl)
78 | fill_fc_weights(gcr)
79 | self.gcl = gcl
80 | self.gcr = gcr
81 | self.act = nn.Sequential(
82 | nn.BatchNorm2d(cho, momentum=BN_MOMENTUM),
83 | nn.ReLU(inplace=True)
84 | )
85 |
86 | def forward(self, x):
87 | x = self.gcl(x) + self.gcr(x)
88 | x = self.act(x)
89 | return x
90 |
91 |
92 | class DeformConv(nn.Module):
93 | def __init__(self, chi, cho):
94 | super(DeformConv, self).__init__()
95 | self.actf = nn.Sequential(
96 | nn.BatchNorm2d(cho, momentum=BN_MOMENTUM),
97 | nn.ReLU(inplace=True)
98 | )
99 | self.conv = DCN(chi, cho, kernel_size=(3,3), stride=1, padding=1, dilation=1, deformable_groups=1)
100 |
101 | def forward(self, x):
102 | x = self.conv(x)
103 | x = self.actf(x)
104 | return x
105 |
106 |
107 | class IDAUp(nn.Module):
108 | def __init__(self, o, channels, up_f, node_type=(DeformConv, DeformConv)):
109 | super(IDAUp, self).__init__()
110 | for i in range(1, len(channels)):
111 | c = channels[i]
112 | f = int(up_f[i])
113 | proj = node_type[0](c, o)
114 | node = node_type[1](o, o)
115 |
116 | up = nn.ConvTranspose2d(o, o, f * 2, stride=f,
117 | padding=f // 2, output_padding=0,
118 | groups=o, bias=False)
119 | fill_up_weights(up)
120 |
121 | setattr(self, 'proj_' + str(i), proj)
122 | setattr(self, 'up_' + str(i), up)
123 | setattr(self, 'node_' + str(i), node)
124 |
125 |
126 | def forward(self, layers, startp, endp):
127 | for i in range(startp + 1, endp):
128 | upsample = getattr(self, 'up_' + str(i - startp))
129 | project = getattr(self, 'proj_' + str(i - startp))
130 | layers[i] = upsample(project(layers[i]))
131 | node = getattr(self, 'node_' + str(i - startp))
132 | layers[i] = node(layers[i] + layers[i - 1])
133 |
134 |
135 |
136 | class DLAUp(nn.Module):
137 | def __init__(self, startp, channels, scales, in_channels=None,
138 | node_type=DeformConv):
139 | super(DLAUp, self).__init__()
140 | self.startp = startp
141 | if in_channels is None:
142 | in_channels = channels
143 | self.channels = channels
144 | channels = list(channels)
145 | scales = np.array(scales, dtype=int)
146 | for i in range(len(channels) - 1):
147 | j = -i - 2
148 | setattr(self, 'ida_{}'.format(i),
149 | IDAUp(channels[j], in_channels[j:],
150 | scales[j:] // scales[j],
151 | node_type=node_type))
152 | scales[j + 1:] = scales[j]
153 | in_channels[j + 1:] = [channels[j] for _ in channels[j + 1:]]
154 |
155 | def forward(self, layers):
156 | out = [layers[-1]] # start with 32
157 | for i in range(len(layers) - self.startp - 1):
158 | ida = getattr(self, 'ida_{}'.format(i))
159 | ida(layers, len(layers) -i - 2, len(layers))
160 | out.insert(0, layers[-1])
161 | return out
162 |
163 | DLA_NODE = {
164 | 'dcn': (DeformConv, DeformConv),
165 | 'gcn': (Conv, GlobalConv),
166 | 'conv': (Conv, Conv),
167 | }
168 |
169 | class DLASeg(nn.Module):
170 | def __init__(self, opt, channels):
171 | super().__init__()
172 | self.opt = opt
173 | self.channels = channels
174 | self.node_type = DLA_NODE[opt.dla_node]
175 | print('Using node type:', self.node_type)
176 | down_ratio = 4
177 | self.first_level = int(np.log2(down_ratio))
178 | self.last_level = 5
179 |
180 | scales = [2 ** i for i in range(len(channels[self.first_level:]))]
181 | self.dla_up = DLAUp(
182 | self.first_level, channels[self.first_level:], scales,
183 | node_type=self.node_type)
184 | self.out_channel = channels[self.first_level]
185 |
186 | self.ida_up = IDAUp(
187 | self.out_channel, channels[self.first_level:self.last_level],
188 | [2 ** i for i in range(self.last_level - self.first_level)],
189 | node_type=self.node_type)
190 |
191 |
192 | def forward(self, x):
193 | x = self.dla_up(x)
194 | y = []
195 | for i in range(self.last_level - self.first_level):
196 | y.append(x[i].clone())
197 | self.ida_up(y, 0, len(y))
198 |
199 | return [y[-1]]
200 |
--------------------------------------------------------------------------------
/src/lib/model/networks/necks/msraup.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------------
2 | # Copyright (c) Microsoft
3 | # Licensed under the MIT License.
4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
5 | # Modified by Dequan Wang and Xingyi Zhou
6 | # ------------------------------------------------------------------------------
7 |
8 | from __future__ import absolute_import
9 | from __future__ import division
10 | from __future__ import print_function
11 |
12 | import os
13 | import math
14 | import logging
15 |
16 | import torch
17 | import torch.nn as nn
18 |
19 | try:
20 | from ..DCNv2.dcn_v2 import DCN
21 | except:
22 | print('import DCN failed')
23 | DCN = None
24 |
25 |
26 | BN_MOMENTUM = 0.1
27 |
28 | def fill_up_weights(up):
29 | w = up.weight.data
30 | f = math.ceil(w.size(2) / 2)
31 | c = (2 * f - 1 - f % 2) / (2. * f)
32 | for i in range(w.size(2)):
33 | for j in range(w.size(3)):
34 | w[0, 0, i, j] = \
35 | (1 - math.fabs(i / f - c)) * (1 - math.fabs(j / f - c))
36 | for c in range(1, w.size(0)):
37 | w[c, 0, :, :] = w[0, 0, :, :]
38 |
39 | def fill_fc_weights(layers):
40 | for m in layers.modules():
41 | if isinstance(m, nn.Conv2d):
42 | nn.init.normal_(m.weight, std=0.001)
43 | # torch.nn.init.kaiming_normal_(m.weight.data, nonlinearity='relu')
44 | # torch.nn.init.xavier_normal_(m.weight.data)
45 | if m.bias is not None:
46 | nn.init.constant_(m.bias, 0)
47 |
48 | class MSRAUp(nn.Module):
49 | # def __init__(self, block, layers, heads, head_conv):
50 | def __init__(self, opt, channels):
51 | super().__init__()
52 | self.opt = opt
53 | assert self.opt.msra_outchannel in [64, 256]
54 | self.deconv_with_bias = False
55 | self.inplanes = channels[-1]
56 | self.out_channel = self.opt.msra_outchannel
57 | # used for deconv layers
58 | if self.opt.msra_outchannel == 64:
59 | print('Using slimed resnet: 256 128 64 up channels.')
60 | self.deconv_layers = self._make_deconv_layer(
61 | 3,
62 | [256, 128, 64],
63 | [4, 4, 4],
64 | )
65 | else:
66 | print('Using original resnet: 256 256 256 up channels.')
67 | print('Using 256 deconvs')
68 | self.deconv_layers = self._make_deconv_layer(
69 | 3,
70 | [256, 256, 256],
71 | [4, 4, 4],
72 | )
73 | self.init_weights()
74 |
75 |
76 | def forward(self, x):
77 | x = self.deconv_layers(x[-1])
78 | return [x]
79 |
80 | def _get_deconv_cfg(self, deconv_kernel, index):
81 | if deconv_kernel == 4:
82 | padding = 1
83 | output_padding = 0
84 | elif deconv_kernel == 3:
85 | padding = 1
86 | output_padding = 1
87 | elif deconv_kernel == 2:
88 | padding = 0
89 | output_padding = 0
90 |
91 | return deconv_kernel, padding, output_padding
92 |
93 | def _make_deconv_layer(self, num_layers, num_filters, num_kernels):
94 | assert num_layers == len(num_filters), \
95 | 'ERROR: num_deconv_layers is different len(num_deconv_filters)'
96 | assert num_layers == len(num_kernels), \
97 | 'ERROR: num_deconv_layers is different len(num_deconv_filters)'
98 |
99 | layers = []
100 | for i in range(num_layers):
101 | kernel, padding, output_padding = \
102 | self._get_deconv_cfg(num_kernels[i], i)
103 |
104 | planes = num_filters[i]
105 | fc = DCN(self.inplanes, planes,
106 | kernel_size=(3,3), stride=1,
107 | padding=1, dilation=1, deformable_groups=1)
108 | # fc = nn.Conv2d(self.inplanes, planes,
109 | # kernel_size=3, stride=1,
110 | # padding=1, dilation=1, bias=False)
111 | # fill_fc_weights(fc)
112 | up = nn.ConvTranspose2d(
113 | in_channels=planes,
114 | out_channels=planes,
115 | kernel_size=kernel,
116 | stride=2,
117 | padding=padding,
118 | output_padding=output_padding,
119 | bias=self.deconv_with_bias)
120 | fill_up_weights(up)
121 |
122 | layers.append(fc)
123 | layers.append(nn.BatchNorm2d(planes, momentum=BN_MOMENTUM))
124 | layers.append(nn.ReLU(inplace=True))
125 | layers.append(up)
126 | layers.append(nn.BatchNorm2d(planes, momentum=BN_MOMENTUM))
127 | layers.append(nn.ReLU(inplace=True))
128 | self.inplanes = planes
129 |
130 | return nn.Sequential(*layers)
131 |
132 | def init_weights(self):
133 | for name, m in self.deconv_layers.named_modules():
134 | if isinstance(m, nn.BatchNorm2d):
135 | nn.init.constant_(m.weight, 1)
136 | nn.init.constant_(m.bias, 0)
137 |
--------------------------------------------------------------------------------
/src/lib/model/scatter_gather.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch.autograd import Variable
3 | from torch.nn.parallel._functions import Scatter, Gather
4 |
5 |
6 | def scatter(inputs, target_gpus, dim=0, chunk_sizes=None):
7 | r"""
8 | Slices variables into approximately equal chunks and
9 | distributes them across given GPUs. Duplicates
10 | references to objects that are not variables. Does not
11 | support Tensors.
12 | """
13 | def scatter_map(obj):
14 | if isinstance(obj, Variable):
15 | return Scatter.apply(target_gpus, chunk_sizes, dim, obj)
16 | assert not torch.is_tensor(obj), "Tensors not supported in scatter."
17 | if isinstance(obj, tuple):
18 | return list(zip(*map(scatter_map, obj)))
19 | if isinstance(obj, list):
20 | return list(map(list, zip(*map(scatter_map, obj))))
21 | if isinstance(obj, dict):
22 | return list(map(type(obj), zip(*map(scatter_map, obj.items()))))
23 | return [obj for targets in target_gpus]
24 |
25 | return scatter_map(inputs)
26 |
27 |
28 | def scatter_kwargs(inputs, kwargs, target_gpus, dim=0, chunk_sizes=None):
29 | r"""Scatter with support for kwargs dictionary"""
30 | inputs = scatter(inputs, target_gpus, dim, chunk_sizes) if inputs else []
31 | kwargs = scatter(kwargs, target_gpus, dim, chunk_sizes) if kwargs else []
32 | if len(inputs) < len(kwargs):
33 | inputs.extend([() for _ in range(len(kwargs) - len(inputs))])
34 | elif len(kwargs) < len(inputs):
35 | kwargs.extend([{} for _ in range(len(inputs) - len(kwargs))])
36 | inputs = tuple(inputs)
37 | kwargs = tuple(kwargs)
38 | return inputs, kwargs
39 |
--------------------------------------------------------------------------------
/src/lib/model/utils.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import torch
6 | import torch.nn as nn
7 |
8 | def _sigmoid(x):
9 | y = torch.clamp(x.sigmoid_(), min=1e-4, max=1-1e-4)
10 | return y
11 |
12 | def _sigmoid12(x):
13 | y = torch.clamp(x.sigmoid_(), 1e-12)
14 | return y
15 |
16 | def _gather_feat(feat, ind):
17 | dim = feat.size(2)
18 | ind = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim)
19 | feat = feat.gather(1, ind)
20 | return feat
21 |
22 | def _tranpose_and_gather_feat(feat, ind):
23 | feat = feat.permute(0, 2, 3, 1).contiguous()
24 | feat = feat.view(feat.size(0), -1, feat.size(3))
25 | feat = _gather_feat(feat, ind)
26 | return feat
27 |
28 | def flip_tensor(x):
29 | return torch.flip(x, [3])
30 | # tmp = x.detach().cpu().numpy()[..., ::-1].copy()
31 | # return torch.from_numpy(tmp).to(x.device)
32 |
33 | def flip_lr(x, flip_idx):
34 | tmp = x.detach().cpu().numpy()[..., ::-1].copy()
35 | shape = tmp.shape
36 | for e in flip_idx:
37 | tmp[:, e[0], ...], tmp[:, e[1], ...] = \
38 | tmp[:, e[1], ...].copy(), tmp[:, e[0], ...].copy()
39 | return torch.from_numpy(tmp.reshape(shape)).to(x.device)
40 |
41 | def flip_lr_off(x, flip_idx):
42 | tmp = x.detach().cpu().numpy()[..., ::-1].copy()
43 | shape = tmp.shape
44 | tmp = tmp.reshape(tmp.shape[0], 17, 2,
45 | tmp.shape[2], tmp.shape[3])
46 | tmp[:, :, 0, :, :] *= -1
47 | for e in flip_idx:
48 | tmp[:, e[0], ...], tmp[:, e[1], ...] = \
49 | tmp[:, e[1], ...].copy(), tmp[:, e[0], ...].copy()
50 | return torch.from_numpy(tmp.reshape(shape)).to(x.device)
51 |
52 | def _nms(heat, kernel=3):
53 | pad = (kernel - 1) // 2
54 |
55 | hmax = nn.functional.max_pool2d(
56 | heat, (kernel, kernel), stride=1, padding=pad)
57 | keep = (hmax == heat).float()
58 | return heat * keep
59 |
60 | def _topk_channel(scores, K=100):
61 | batch, cat, height, width = scores.size()
62 |
63 | topk_scores, topk_inds = torch.topk(scores.view(batch, cat, -1), K)
64 |
65 | topk_inds = topk_inds % (height * width)
66 | topk_ys = (topk_inds / width).int().float()
67 | topk_xs = (topk_inds % width).int().float()
68 |
69 | return topk_scores, topk_inds, topk_ys, topk_xs
70 |
71 | def _topk(scores, K=100):
72 | batch, cat, height, width = scores.size()
73 |
74 | topk_scores, topk_inds = torch.topk(scores.view(batch, cat, -1), K)
75 |
76 | topk_inds = topk_inds % (height * width)
77 | topk_ys = (topk_inds / width).int().float()
78 | topk_xs = (topk_inds % width).int().float()
79 |
80 | topk_score, topk_ind = torch.topk(topk_scores.view(batch, -1), K)
81 | topk_clses = (topk_ind / K).int()
82 | topk_inds = _gather_feat(
83 | topk_inds.view(batch, -1, 1), topk_ind).view(batch, K)
84 | topk_ys = _gather_feat(topk_ys.view(batch, -1, 1), topk_ind).view(batch, K)
85 | topk_xs = _gather_feat(topk_xs.view(batch, -1, 1), topk_ind).view(batch, K)
86 |
87 | return topk_score, topk_inds, topk_clses, topk_ys, topk_xs
88 |
--------------------------------------------------------------------------------
/src/lib/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xingyizhou/CenterTrack/e4e7534cc2ebfbd31e0cde680988f286c65fe34f/src/lib/utils/__init__.py
--------------------------------------------------------------------------------
/src/lib/utils/ddd_utils.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import numpy as np
6 | import cv2
7 |
8 | def comput_corners_3d(dim, rotation_y):
9 | # dim: 3
10 | # location: 3
11 | # rotation_y: 1
12 | # return: 8 x 3
13 | c, s = np.cos(rotation_y), np.sin(rotation_y)
14 | R = np.array([[c, 0, s], [0, 1, 0], [-s, 0, c]], dtype=np.float32)
15 | l, w, h = dim[2], dim[1], dim[0]
16 | x_corners = [l/2, l/2, -l/2, -l/2, l/2, l/2, -l/2, -l/2]
17 | y_corners = [0,0,0,0,-h,-h,-h,-h]
18 | z_corners = [w/2, -w/2, -w/2, w/2, w/2, -w/2, -w/2, w/2]
19 |
20 | corners = np.array([x_corners, y_corners, z_corners], dtype=np.float32)
21 | corners_3d = np.dot(R, corners).transpose(1, 0)
22 | return corners_3d
23 |
24 | def compute_box_3d(dim, location, rotation_y):
25 | # dim: 3
26 | # location: 3
27 | # rotation_y: 1
28 | # return: 8 x 3
29 | corners_3d = comput_corners_3d(dim, rotation_y)
30 | corners_3d = corners_3d + np.array(location, dtype=np.float32).reshape(1, 3)
31 | return corners_3d
32 |
33 | def project_to_image(pts_3d, P):
34 | # pts_3d: n x 3
35 | # P: 3 x 4
36 | # return: n x 2
37 | pts_3d_homo = np.concatenate(
38 | [pts_3d, np.ones((pts_3d.shape[0], 1), dtype=np.float32)], axis=1)
39 | pts_2d = np.dot(P, pts_3d_homo.transpose(1, 0)).transpose(1, 0)
40 | pts_2d = pts_2d[:, :2] / pts_2d[:, 2:]
41 | # import pdb; pdb.set_trace()
42 | return pts_2d
43 |
44 | def compute_orientation_3d(dim, location, rotation_y):
45 | # dim: 3
46 | # location: 3
47 | # rotation_y: 1
48 | # return: 2 x 3
49 | c, s = np.cos(rotation_y), np.sin(rotation_y)
50 | R = np.array([[c, 0, s], [0, 1, 0], [-s, 0, c]], dtype=np.float32)
51 | orientation_3d = np.array([[0, dim[2]], [0, 0], [0, 0]], dtype=np.float32)
52 | orientation_3d = np.dot(R, orientation_3d)
53 | orientation_3d = orientation_3d + \
54 | np.array(location, dtype=np.float32).reshape(3, 1)
55 | return orientation_3d.transpose(1, 0)
56 |
57 | def draw_box_3d(image, corners, c=(255, 0, 255), same_color=False):
58 | face_idx = [[0,1,5,4],
59 | [1,2,6, 5],
60 | [3,0,4,7],
61 | [2,3,7,6]]
62 | right_corners = [1, 2, 6, 5] if not same_color else []
63 | left_corners = [0, 3, 7, 4] if not same_color else []
64 | thickness = 4 if same_color else 2
65 | corners = corners.astype(np.int32)
66 | for ind_f in range(3, -1, -1):
67 | f = face_idx[ind_f]
68 | for j in range(4):
69 | # print('corners', corners)
70 | cc = c
71 | if (f[j] in left_corners) and (f[(j+1)%4] in left_corners):
72 | cc = (255, 0, 0)
73 | if (f[j] in right_corners) and (f[(j+1)%4] in right_corners):
74 | cc = (0, 0, 255)
75 | try:
76 | cv2.line(image, (corners[f[j], 0], corners[f[j], 1]),
77 | (corners[f[(j+1)%4], 0], corners[f[(j+1)%4], 1]), cc, thickness, lineType=cv2.LINE_AA)
78 | except:
79 | pass
80 | if ind_f == 0:
81 | try:
82 | cv2.line(image, (corners[f[0], 0], corners[f[0], 1]),
83 | (corners[f[2], 0], corners[f[2], 1]), c, 1, lineType=cv2.LINE_AA)
84 | cv2.line(image, (corners[f[1], 0], corners[f[1], 1]),
85 | (corners[f[3], 0], corners[f[3], 1]), c, 1, lineType=cv2.LINE_AA)
86 | except:
87 | pass
88 | # top_idx = [0, 1, 2, 3]
89 | return image
90 |
91 | def unproject_2d_to_3d(pt_2d, depth, P):
92 | # pts_2d: 2
93 | # depth: 1
94 | # P: 3 x 4
95 | # return: 3
96 | z = depth - P[2, 3]
97 | x = (pt_2d[0] * depth - P[0, 3] - P[0, 2] * z) / P[0, 0]
98 | y = (pt_2d[1] * depth - P[1, 3] - P[1, 2] * z) / P[1, 1]
99 | pt_3d = np.array([x, y, z], dtype=np.float32).reshape(3)
100 | return pt_3d
101 |
102 | def alpha2rot_y(alpha, x, cx, fx):
103 | """
104 | Get rotation_y by alpha + theta - 180
105 | alpha : Observation angle of object, ranging [-pi..pi]
106 | x : Object center x to the camera center (x-W/2), in pixels
107 | rotation_y : Rotation ry around Y-axis in camera coordinates [-pi..pi]
108 | """
109 | rot_y = alpha + np.arctan2(x - cx, fx)
110 | if rot_y > np.pi:
111 | rot_y -= 2 * np.pi
112 | if rot_y < -np.pi:
113 | rot_y += 2 * np.pi
114 | return rot_y
115 |
116 | def rot_y2alpha(rot_y, x, cx, fx):
117 | """
118 | Get rotation_y by alpha + theta - 180
119 | alpha : Observation angle of object, ranging [-pi..pi]
120 | x : Object center x to the camera center (x-W/2), in pixels
121 | rotation_y : Rotation ry around Y-axis in camera coordinates [-pi..pi]
122 | """
123 | alpha = rot_y - np.arctan2(x - cx, fx)
124 | if alpha > np.pi:
125 | alpha -= 2 * np.pi
126 | if alpha < -np.pi:
127 | alpha += 2 * np.pi
128 | return alpha
129 |
130 |
131 | def ddd2locrot(center, alpha, dim, depth, calib):
132 | # single image
133 | locations = unproject_2d_to_3d(center, depth, calib)
134 | locations[1] += dim[0] / 2
135 | rotation_y = alpha2rot_y(alpha, center[0], calib[0, 2], calib[0, 0])
136 | return locations, rotation_y
137 |
138 | def project_3d_bbox(location, dim, rotation_y, calib):
139 | box_3d = compute_box_3d(dim, location, rotation_y)
140 | box_2d = project_to_image(box_3d, calib)
141 | return box_2d
142 |
143 |
144 | if __name__ == '__main__':
145 | calib = np.array(
146 | [[7.070493000000e+02, 0.000000000000e+00, 6.040814000000e+02, 4.575831000000e+01],
147 | [0.000000000000e+00, 7.070493000000e+02, 1.805066000000e+02, -3.454157000000e-01],
148 | [0.000000000000e+00, 0.000000000000e+00, 1.000000000000e+00, 4.981016000000e-03]],
149 | dtype=np.float32)
150 | alpha = -0.20
151 | tl = np.array([712.40, 143.00], dtype=np.float32)
152 | br = np.array([810.73, 307.92], dtype=np.float32)
153 | ct = (tl + br) / 2
154 | rotation_y = 0.01
155 | print('alpha2rot_y', alpha2rot_y(alpha, ct[0], calib[0, 2], calib[0, 0]))
156 | print('rotation_y', rotation_y)
157 |
--------------------------------------------------------------------------------
/src/lib/utils/image.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------------
2 | # Copyright (c) Microsoft
3 | # Licensed under the MIT License.
4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
5 | # Modified by Xingyi Zhou
6 | # ------------------------------------------------------------------------------
7 |
8 | from __future__ import absolute_import
9 | from __future__ import division
10 | from __future__ import print_function
11 |
12 | import numpy as np
13 | import cv2
14 | import random
15 |
16 | def flip(img):
17 | return img[:, :, ::-1].copy()
18 |
19 | # @numba.jit(nopython=True, nogil=True)
20 | def transform_preds_with_trans(coords, trans):
21 | # target_coords = np.concatenate(
22 | # [coords, np.ones((coords.shape[0], 1), np.float32)], axis=1)
23 | target_coords = np.ones((coords.shape[0], 3), np.float32)
24 | target_coords[:, :2] = coords
25 | target_coords = np.dot(trans, target_coords.transpose()).transpose()
26 | return target_coords[:, :2]
27 |
28 |
29 | def transform_preds(coords, center, scale, output_size):
30 | target_coords = np.zeros(coords.shape)
31 | trans = get_affine_transform(center, scale, 0, output_size, inv=1)
32 | for p in range(coords.shape[0]):
33 | target_coords[p, 0:2] = affine_transform(coords[p, 0:2], trans)
34 | return target_coords
35 |
36 |
37 | def get_affine_transform(center,
38 | scale,
39 | rot,
40 | output_size,
41 | shift=np.array([0, 0], dtype=np.float32),
42 | inv=0):
43 | if not isinstance(scale, np.ndarray) and not isinstance(scale, list):
44 | scale = np.array([scale, scale], dtype=np.float32)
45 |
46 | scale_tmp = scale
47 | src_w = scale_tmp[0]
48 | dst_w = output_size[0]
49 | dst_h = output_size[1]
50 |
51 | rot_rad = np.pi * rot / 180
52 | src_dir = get_dir([0, src_w * -0.5], rot_rad)
53 | dst_dir = np.array([0, dst_w * -0.5], np.float32)
54 |
55 | src = np.zeros((3, 2), dtype=np.float32)
56 | dst = np.zeros((3, 2), dtype=np.float32)
57 | src[0, :] = center + scale_tmp * shift
58 | src[1, :] = center + src_dir + scale_tmp * shift
59 | dst[0, :] = [dst_w * 0.5, dst_h * 0.5]
60 | dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5], np.float32) + dst_dir
61 |
62 | src[2:, :] = get_3rd_point(src[0, :], src[1, :])
63 | dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :])
64 |
65 | if inv:
66 | trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
67 | else:
68 | trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
69 |
70 | return trans
71 |
72 |
73 | def affine_transform(pt, t):
74 | new_pt = np.array([pt[0], pt[1], 1.], dtype=np.float32).T
75 | new_pt = np.dot(t, new_pt)
76 | return new_pt[:2]
77 |
78 |
79 | def get_3rd_point(a, b):
80 | direct = a - b
81 | return b + np.array([-direct[1], direct[0]], dtype=np.float32)
82 |
83 |
84 | def get_dir(src_point, rot_rad):
85 | sn, cs = np.sin(rot_rad), np.cos(rot_rad)
86 |
87 | src_result = [0, 0]
88 | src_result[0] = src_point[0] * cs - src_point[1] * sn
89 | src_result[1] = src_point[0] * sn + src_point[1] * cs
90 |
91 | return src_result
92 |
93 |
94 | def crop(img, center, scale, output_size, rot=0):
95 | trans = get_affine_transform(center, scale, rot, output_size)
96 |
97 | dst_img = cv2.warpAffine(img,
98 | trans,
99 | (int(output_size[0]), int(output_size[1])),
100 | flags=cv2.INTER_LINEAR)
101 |
102 | return dst_img
103 |
104 | # @numba.jit(nopython=True, nogil=True)
105 | def gaussian_radius(det_size, min_overlap=0.7):
106 | height, width = det_size
107 |
108 | a1 = 1
109 | b1 = (height + width)
110 | c1 = width * height * (1 - min_overlap) / (1 + min_overlap)
111 | sq1 = np.sqrt(b1 ** 2 - 4 * a1 * c1)
112 | r1 = (b1 + sq1) / 2
113 |
114 | a2 = 4
115 | b2 = 2 * (height + width)
116 | c2 = (1 - min_overlap) * width * height
117 | sq2 = np.sqrt(b2 ** 2 - 4 * a2 * c2)
118 | r2 = (b2 + sq2) / 2
119 |
120 | a3 = 4 * min_overlap
121 | b3 = -2 * min_overlap * (height + width)
122 | c3 = (min_overlap - 1) * width * height
123 | sq3 = np.sqrt(b3 ** 2 - 4 * a3 * c3)
124 | r3 = (b3 + sq3) / 2
125 | return min(r1, r2, r3)
126 |
127 |
128 | # @numba.jit(nopython=True, nogil=True)
129 | def gaussian2D(shape, sigma=1):
130 | m, n = [(ss - 1.) / 2. for ss in shape]
131 | y, x = np.ogrid[-m:m+1,-n:n+1]
132 | # y, x = np.arange(-m, m + 1).reshape(-1, 1), np.arange(-n, n + 1).reshape(1, -1)
133 | h = np.exp(-(x * x + y * y) / (2 * sigma * sigma))
134 | h[h < np.finfo(h.dtype).eps * h.max()] = 0
135 | return h
136 |
137 | # @numba.jit(nopython=True, nogil=True)
138 | def draw_umich_gaussian(heatmap, center, radius, k=1):
139 | # import pdb; pdb.set_trace()
140 | diameter = 2 * radius + 1
141 | gaussian = gaussian2D((diameter, diameter), sigma=diameter / 6)
142 |
143 | x, y = int(center[0]), int(center[1])
144 |
145 | height, width = heatmap.shape[0:2]
146 |
147 | left, right = min(x, radius), min(width - x, radius + 1)
148 | top, bottom = min(y, radius), min(height - y, radius + 1)
149 | # import pdb; pdb.set_trace()
150 | masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right]
151 | masked_gaussian = gaussian[radius - top:radius + bottom, radius - left:radius + right]
152 | if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0: # TODO debug
153 | np.maximum(masked_heatmap, masked_gaussian * k, out=masked_heatmap)
154 | return heatmap
155 |
156 | def draw_dense_reg(regmap, heatmap, center, value, radius, is_offset=False):
157 | diameter = 2 * radius + 1
158 | gaussian = gaussian2D((diameter, diameter), sigma=diameter / 6)
159 | value = np.array(value, dtype=np.float32).reshape(-1, 1, 1)
160 | dim = value.shape[0]
161 | reg = np.ones((dim, diameter*2+1, diameter*2+1), dtype=np.float32) * value
162 | if is_offset and dim == 2:
163 | delta = np.arange(diameter*2+1) - radius
164 | reg[0] = reg[0] - delta.reshape(1, -1)
165 | reg[1] = reg[1] - delta.reshape(-1, 1)
166 |
167 | x, y = int(center[0]), int(center[1])
168 |
169 | height, width = heatmap.shape[0:2]
170 |
171 | left, right = min(x, radius), min(width - x, radius + 1)
172 | top, bottom = min(y, radius), min(height - y, radius + 1)
173 |
174 | masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right]
175 | masked_regmap = regmap[:, y - top:y + bottom, x - left:x + right]
176 | masked_gaussian = gaussian[radius - top:radius + bottom,
177 | radius - left:radius + right]
178 | masked_reg = reg[:, radius - top:radius + bottom,
179 | radius - left:radius + right]
180 | if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0: # TODO debug
181 | idx = (masked_gaussian >= masked_heatmap).reshape(
182 | 1, masked_gaussian.shape[0], masked_gaussian.shape[1])
183 | masked_regmap = (1-idx) * masked_regmap + idx * masked_reg
184 | regmap[:, y - top:y + bottom, x - left:x + right] = masked_regmap
185 | return regmap
186 |
187 |
188 | def draw_msra_gaussian(heatmap, center, sigma):
189 | tmp_size = sigma * 3
190 | mu_x = int(center[0] + 0.5)
191 | mu_y = int(center[1] + 0.5)
192 | w, h = heatmap.shape[0], heatmap.shape[1]
193 | ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)]
194 | br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)]
195 | if ul[0] >= h or ul[1] >= w or br[0] < 0 or br[1] < 0:
196 | return heatmap
197 | size = 2 * tmp_size + 1
198 | x = np.arange(0, size, 1, np.float32)
199 | y = x[:, np.newaxis]
200 | x0 = y0 = size // 2
201 | g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma ** 2))
202 | g_x = max(0, -ul[0]), min(br[0], h) - ul[0]
203 | g_y = max(0, -ul[1]), min(br[1], w) - ul[1]
204 | img_x = max(0, ul[0]), min(br[0], h)
205 | img_y = max(0, ul[1]), min(br[1], w)
206 | heatmap[img_y[0]:img_y[1], img_x[0]:img_x[1]] = np.maximum(
207 | heatmap[img_y[0]:img_y[1], img_x[0]:img_x[1]],
208 | g[g_y[0]:g_y[1], g_x[0]:g_x[1]])
209 | return heatmap
210 |
211 | def grayscale(image):
212 | return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
213 |
214 | def lighting_(data_rng, image, alphastd, eigval, eigvec):
215 | alpha = data_rng.normal(scale=alphastd, size=(3, ))
216 | image += np.dot(eigvec, eigval * alpha)
217 |
218 | def blend_(alpha, image1, image2):
219 | image1 *= alpha
220 | image2 *= (1 - alpha)
221 | image1 += image2
222 |
223 | def saturation_(data_rng, image, gs, gs_mean, var):
224 | alpha = 1. + data_rng.uniform(low=-var, high=var)
225 | blend_(alpha, image, gs[:, :, None])
226 |
227 | def brightness_(data_rng, image, gs, gs_mean, var):
228 | alpha = 1. + data_rng.uniform(low=-var, high=var)
229 | image *= alpha
230 |
231 | def contrast_(data_rng, image, gs, gs_mean, var):
232 | alpha = 1. + data_rng.uniform(low=-var, high=var)
233 | blend_(alpha, image, gs_mean)
234 |
235 | def color_aug(data_rng, image, eig_val, eig_vec):
236 | functions = [brightness_, contrast_, saturation_]
237 | random.shuffle(functions)
238 |
239 | gs = grayscale(image)
240 | gs_mean = gs.mean()
241 | for f in functions:
242 | f(data_rng, image, gs, gs_mean, 0.4)
243 | lighting_(data_rng, image, 0.1, eig_val, eig_vec)
244 |
--------------------------------------------------------------------------------
/src/lib/utils/post_process.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import numpy as np
6 | import cv2
7 | from .image import transform_preds_with_trans, get_affine_transform
8 | from .ddd_utils import ddd2locrot, comput_corners_3d
9 | from .ddd_utils import project_to_image, rot_y2alpha
10 | import numba
11 |
12 | def get_alpha(rot):
13 | # output: (B, 8) [bin1_cls[0], bin1_cls[1], bin1_sin, bin1_cos,
14 | # bin2_cls[0], bin2_cls[1], bin2_sin, bin2_cos]
15 | # return rot[:, 0]
16 | idx = rot[:, 1] > rot[:, 5]
17 | alpha1 = np.arctan2(rot[:, 2], rot[:, 3]) + (-0.5 * np.pi)
18 | alpha2 = np.arctan2(rot[:, 6], rot[:, 7]) + ( 0.5 * np.pi)
19 | return alpha1 * idx + alpha2 * (1 - idx)
20 |
21 | def generic_post_process(
22 | opt, dets, c, s, h, w, num_classes, calibs=None, height=-1, width=-1):
23 | if not ('scores' in dets):
24 | return [{}], [{}]
25 | ret = []
26 |
27 | for i in range(len(dets['scores'])):
28 | preds = []
29 | trans = get_affine_transform(
30 | c[i], s[i], 0, (w, h), inv=1).astype(np.float32)
31 | for j in range(len(dets['scores'][i])):
32 | if dets['scores'][i][j] < opt.out_thresh:
33 | break
34 | item = {}
35 | item['score'] = dets['scores'][i][j]
36 | item['class'] = int(dets['clses'][i][j]) + 1
37 | item['ct'] = transform_preds_with_trans(
38 | (dets['cts'][i][j]).reshape(1, 2), trans).reshape(2)
39 |
40 | if 'tracking' in dets:
41 | tracking = transform_preds_with_trans(
42 | (dets['tracking'][i][j] + dets['cts'][i][j]).reshape(1, 2),
43 | trans).reshape(2)
44 | item['tracking'] = tracking - item['ct']
45 |
46 | if 'bboxes' in dets:
47 | bbox = transform_preds_with_trans(
48 | dets['bboxes'][i][j].reshape(2, 2), trans).reshape(4)
49 | item['bbox'] = bbox
50 |
51 | if 'hps' in dets:
52 | pts = transform_preds_with_trans(
53 | dets['hps'][i][j].reshape(-1, 2), trans).reshape(-1)
54 | item['hps'] = pts
55 |
56 | if 'dep' in dets and len(dets['dep'][i]) > j:
57 | item['dep'] = dets['dep'][i][j]
58 |
59 | if 'dim' in dets and len(dets['dim'][i]) > j:
60 | item['dim'] = dets['dim'][i][j]
61 |
62 | if 'rot' in dets and len(dets['rot'][i]) > j:
63 | item['alpha'] = get_alpha(dets['rot'][i][j:j+1])[0]
64 |
65 | if 'rot' in dets and 'dep' in dets and 'dim' in dets \
66 | and len(dets['dep'][i]) > j:
67 | if 'amodel_offset' in dets and len(dets['amodel_offset'][i]) > j:
68 | ct_output = dets['bboxes'][i][j].reshape(2, 2).mean(axis=0)
69 | amodel_ct_output = ct_output + dets['amodel_offset'][i][j]
70 | ct = transform_preds_with_trans(
71 | amodel_ct_output.reshape(1, 2), trans).reshape(2).tolist()
72 | else:
73 | bbox = item['bbox']
74 | ct = [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2]
75 | item['ct'] = ct
76 | item['loc'], item['rot_y'] = ddd2locrot(
77 | ct, item['alpha'], item['dim'], item['dep'], calibs[i])
78 |
79 | preds.append(item)
80 |
81 | if 'nuscenes_att' in dets:
82 | for j in range(len(preds)):
83 | preds[j]['nuscenes_att'] = dets['nuscenes_att'][i][j]
84 |
85 | if 'velocity' in dets:
86 | for j in range(len(preds)):
87 | preds[j]['velocity'] = dets['velocity'][i][j]
88 |
89 | ret.append(preds)
90 |
91 | return ret
--------------------------------------------------------------------------------
/src/lib/utils/tracker.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from sklearn.utils.linear_assignment_ import linear_assignment
3 | from numba import jit
4 | import copy
5 |
6 | class Tracker(object):
7 | def __init__(self, opt):
8 | self.opt = opt
9 | self.reset()
10 |
11 | def init_track(self, results):
12 | for item in results:
13 | if item['score'] > self.opt.new_thresh:
14 | self.id_count += 1
15 | # active and age are never used in the paper
16 | item['active'] = 1
17 | item['age'] = 1
18 | item['tracking_id'] = self.id_count
19 | if not ('ct' in item):
20 | bbox = item['bbox']
21 | item['ct'] = [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2]
22 | self.tracks.append(item)
23 |
24 | def reset(self):
25 | self.id_count = 0
26 | self.tracks = []
27 |
28 | def step(self, results, public_det=None):
29 | N = len(results)
30 | M = len(self.tracks)
31 |
32 | dets = np.array(
33 | [det['ct'] + det['tracking'] for det in results], np.float32) # N x 2
34 | track_size = np.array([((track['bbox'][2] - track['bbox'][0]) * \
35 | (track['bbox'][3] - track['bbox'][1])) \
36 | for track in self.tracks], np.float32) # M
37 | track_cat = np.array([track['class'] for track in self.tracks], np.int32) # M
38 | item_size = np.array([((item['bbox'][2] - item['bbox'][0]) * \
39 | (item['bbox'][3] - item['bbox'][1])) \
40 | for item in results], np.float32) # N
41 | item_cat = np.array([item['class'] for item in results], np.int32) # N
42 | tracks = np.array(
43 | [pre_det['ct'] for pre_det in self.tracks], np.float32) # M x 2
44 | dist = (((tracks.reshape(1, -1, 2) - \
45 | dets.reshape(-1, 1, 2)) ** 2).sum(axis=2)) # N x M
46 |
47 | invalid = ((dist > track_size.reshape(1, M)) + \
48 | (dist > item_size.reshape(N, 1)) + \
49 | (item_cat.reshape(N, 1) != track_cat.reshape(1, M))) > 0
50 | dist = dist + invalid * 1e18
51 |
52 | if self.opt.hungarian:
53 | item_score = np.array([item['score'] for item in results], np.float32) # N
54 | dist[dist > 1e18] = 1e18
55 | matched_indices = linear_assignment(dist)
56 | else:
57 | matched_indices = greedy_assignment(copy.deepcopy(dist))
58 | unmatched_dets = [d for d in range(dets.shape[0]) \
59 | if not (d in matched_indices[:, 0])]
60 | unmatched_tracks = [d for d in range(tracks.shape[0]) \
61 | if not (d in matched_indices[:, 1])]
62 |
63 | if self.opt.hungarian:
64 | matches = []
65 | for m in matched_indices:
66 | if dist[m[0], m[1]] > 1e16:
67 | unmatched_dets.append(m[0])
68 | unmatched_tracks.append(m[1])
69 | else:
70 | matches.append(m)
71 | matches = np.array(matches).reshape(-1, 2)
72 | else:
73 | matches = matched_indices
74 |
75 | ret = []
76 | for m in matches:
77 | track = results[m[0]]
78 | track['tracking_id'] = self.tracks[m[1]]['tracking_id']
79 | track['age'] = 1
80 | track['active'] = self.tracks[m[1]]['active'] + 1
81 | ret.append(track)
82 |
83 | if self.opt.public_det and len(unmatched_dets) > 0:
84 | # Public detection: only create tracks from provided detections
85 | pub_dets = np.array([d['ct'] for d in public_det], np.float32)
86 | dist3 = ((dets.reshape(-1, 1, 2) - pub_dets.reshape(1, -1, 2)) ** 2).sum(
87 | axis=2)
88 | matched_dets = [d for d in range(dets.shape[0]) \
89 | if not (d in unmatched_dets)]
90 | dist3[matched_dets] = 1e18
91 | for j in range(len(pub_dets)):
92 | i = dist3[:, j].argmin()
93 | if dist3[i, j] < item_size[i]:
94 | dist3[i, :] = 1e18
95 | track = results[i]
96 | if track['score'] > self.opt.new_thresh:
97 | self.id_count += 1
98 | track['tracking_id'] = self.id_count
99 | track['age'] = 1
100 | track['active'] = 1
101 | ret.append(track)
102 | else:
103 | # Private detection: create tracks for all un-matched detections
104 | for i in unmatched_dets:
105 | track = results[i]
106 | if track['score'] > self.opt.new_thresh:
107 | self.id_count += 1
108 | track['tracking_id'] = self.id_count
109 | track['age'] = 1
110 | track['active'] = 1
111 | ret.append(track)
112 |
113 | for i in unmatched_tracks:
114 | track = self.tracks[i]
115 | if track['age'] < self.opt.max_age:
116 | track['age'] += 1
117 | track['active'] = 0
118 | bbox = track['bbox']
119 | ct = track['ct']
120 | v = [0, 0]
121 | track['bbox'] = [
122 | bbox[0] + v[0], bbox[1] + v[1],
123 | bbox[2] + v[0], bbox[3] + v[1]]
124 | track['ct'] = [ct[0] + v[0], ct[1] + v[1]]
125 | ret.append(track)
126 | self.tracks = ret
127 | return ret
128 |
129 | def greedy_assignment(dist):
130 | matched_indices = []
131 | if dist.shape[1] == 0:
132 | return np.array(matched_indices, np.int32).reshape(-1, 2)
133 | for i in range(dist.shape[0]):
134 | j = dist[i].argmin()
135 | if dist[i][j] < 1e16:
136 | dist[:, j] = 1e18
137 | matched_indices.append([i, j])
138 | return np.array(matched_indices, np.int32).reshape(-1, 2)
139 |
--------------------------------------------------------------------------------
/src/lib/utils/utils.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import torch
6 |
7 | class AverageMeter(object):
8 | """Computes and stores the average and current value"""
9 | def __init__(self):
10 | self.reset()
11 |
12 | def reset(self):
13 | self.val = 0
14 | self.avg = 0
15 | self.sum = 0
16 | self.count = 0
17 |
18 | def update(self, val, n=1):
19 | self.val = val
20 | self.sum += val * n
21 | self.count += n
22 | if self.count > 0:
23 | self.avg = self.sum / self.count
--------------------------------------------------------------------------------
/src/main.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import _init_paths
6 | import os
7 |
8 | import torch
9 | import torch.utils.data
10 | from opts import opts
11 | from model.model import create_model, load_model, save_model
12 | from model.data_parallel import DataParallel
13 | from logger import Logger
14 | from dataset.dataset_factory import get_dataset
15 | from trainer import Trainer
16 |
17 | def get_optimizer(opt, model):
18 | if opt.optim == 'adam':
19 | optimizer = torch.optim.Adam(model.parameters(), opt.lr)
20 | elif opt.optim == 'sgd':
21 | print('Using SGD')
22 | optimizer = torch.optim.SGD(
23 | model.parameters(), opt.lr, momentum=0.9, weight_decay=0.0001)
24 | else:
25 | assert 0, opt.optim
26 | return optimizer
27 |
28 | def main(opt):
29 | torch.manual_seed(opt.seed)
30 | torch.backends.cudnn.benchmark = not opt.not_cuda_benchmark and not opt.test
31 | Dataset = get_dataset(opt.dataset)
32 | opt = opts().update_dataset_info_and_set_heads(opt, Dataset)
33 | print(opt)
34 | if not opt.not_set_cuda_env:
35 | os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str
36 | opt.device = torch.device('cuda' if opt.gpus[0] >= 0 else 'cpu')
37 | logger = Logger(opt)
38 |
39 | print('Creating model...')
40 | model = create_model(opt.arch, opt.heads, opt.head_conv, opt=opt)
41 | optimizer = get_optimizer(opt, model)
42 | start_epoch = 0
43 | if opt.load_model != '':
44 | model, optimizer, start_epoch = load_model(
45 | model, opt.load_model, opt, optimizer)
46 |
47 | trainer = Trainer(opt, model, optimizer)
48 | trainer.set_device(opt.gpus, opt.chunk_sizes, opt.device)
49 |
50 | if opt.val_intervals < opt.num_epochs or opt.test:
51 | print('Setting up validation data...')
52 | val_loader = torch.utils.data.DataLoader(
53 | Dataset(opt, 'val'), batch_size=1, shuffle=False, num_workers=1,
54 | pin_memory=True)
55 |
56 | if opt.test:
57 | _, preds = trainer.val(0, val_loader)
58 | val_loader.dataset.run_eval(preds, opt.save_dir)
59 | return
60 |
61 | print('Setting up train data...')
62 | train_loader = torch.utils.data.DataLoader(
63 | Dataset(opt, 'train'), batch_size=opt.batch_size, shuffle=True,
64 | num_workers=opt.num_workers, pin_memory=True, drop_last=True
65 | )
66 |
67 | print('Starting training...')
68 | for epoch in range(start_epoch + 1, opt.num_epochs + 1):
69 | mark = epoch if opt.save_all else 'last'
70 | log_dict_train, _ = trainer.train(epoch, train_loader)
71 | logger.write('epoch: {} |'.format(epoch))
72 | for k, v in log_dict_train.items():
73 | logger.scalar_summary('train_{}'.format(k), v, epoch)
74 | logger.write('{} {:8f} | '.format(k, v))
75 | if opt.val_intervals > 0 and epoch % opt.val_intervals == 0:
76 | save_model(os.path.join(opt.save_dir, 'model_{}.pth'.format(mark)),
77 | epoch, model, optimizer)
78 | with torch.no_grad():
79 | log_dict_val, preds = trainer.val(epoch, val_loader)
80 | if opt.eval_val:
81 | val_loader.dataset.run_eval(preds, opt.save_dir)
82 | for k, v in log_dict_val.items():
83 | logger.scalar_summary('val_{}'.format(k), v, epoch)
84 | logger.write('{} {:8f} | '.format(k, v))
85 | else:
86 | save_model(os.path.join(opt.save_dir, 'model_last.pth'),
87 | epoch, model, optimizer)
88 | logger.write('\n')
89 | if epoch in opt.save_point:
90 | save_model(os.path.join(opt.save_dir, 'model_{}.pth'.format(epoch)),
91 | epoch, model, optimizer)
92 | if epoch in opt.lr_step:
93 | lr = opt.lr * (0.1 ** (opt.lr_step.index(epoch) + 1))
94 | print('Drop LR to', lr)
95 | for param_group in optimizer.param_groups:
96 | param_group['lr'] = lr
97 | logger.close()
98 |
99 | if __name__ == '__main__':
100 | opt = opts().parse()
101 | main(opt)
102 |
--------------------------------------------------------------------------------
/src/test.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import _init_paths
6 | import os
7 | import json
8 | import cv2
9 | import numpy as np
10 | import time
11 | from progress.bar import Bar
12 | import torch
13 | import copy
14 |
15 | from opts import opts
16 | from logger import Logger
17 | from utils.utils import AverageMeter
18 | from dataset.dataset_factory import dataset_factory
19 | from detector import Detector
20 |
21 |
22 | class PrefetchDataset(torch.utils.data.Dataset):
23 | def __init__(self, opt, dataset, pre_process_func):
24 | self.images = dataset.images
25 | self.load_image_func = dataset.coco.loadImgs
26 | self.img_dir = dataset.img_dir
27 | self.pre_process_func = pre_process_func
28 | self.get_default_calib = dataset.get_default_calib
29 | self.opt = opt
30 |
31 | def __getitem__(self, index):
32 | img_id = self.images[index]
33 | img_info = self.load_image_func(ids=[img_id])[0]
34 | img_path = os.path.join(self.img_dir, img_info['file_name'])
35 | image = cv2.imread(img_path)
36 | images, meta = {}, {}
37 | for scale in opt.test_scales:
38 | input_meta = {}
39 | calib = img_info['calib'] if 'calib' in img_info \
40 | else self.get_default_calib(image.shape[1], image.shape[0])
41 | input_meta['calib'] = calib
42 | images[scale], meta[scale] = self.pre_process_func(
43 | image, scale, input_meta)
44 | ret = {'images': images, 'image': image, 'meta': meta}
45 | if 'frame_id' in img_info and img_info['frame_id'] == 1:
46 | ret['is_first_frame'] = 1
47 | ret['video_id'] = img_info['video_id']
48 | return img_id, ret
49 |
50 | def __len__(self):
51 | return len(self.images)
52 |
53 | def prefetch_test(opt):
54 | if not opt.not_set_cuda_env:
55 | os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str
56 | Dataset = dataset_factory[opt.test_dataset]
57 | opt = opts().update_dataset_info_and_set_heads(opt, Dataset)
58 | print(opt)
59 | Logger(opt)
60 |
61 | split = 'val' if not opt.trainval else 'test'
62 | dataset = Dataset(opt, split)
63 | detector = Detector(opt)
64 |
65 | if opt.load_results != '':
66 | load_results = json.load(open(opt.load_results, 'r'))
67 | for img_id in load_results:
68 | for k in range(len(load_results[img_id])):
69 | if load_results[img_id][k]['class'] - 1 in opt.ignore_loaded_cats:
70 | load_results[img_id][k]['score'] = -1
71 | else:
72 | load_results = {}
73 |
74 | data_loader = torch.utils.data.DataLoader(
75 | PrefetchDataset(opt, dataset, detector.pre_process),
76 | batch_size=1, shuffle=False, num_workers=1, pin_memory=True)
77 |
78 | results = {}
79 | num_iters = len(data_loader) if opt.num_iters < 0 else opt.num_iters
80 | bar = Bar('{}'.format(opt.exp_id), max=num_iters)
81 | time_stats = ['tot', 'load', 'pre', 'net', 'dec', 'post', 'merge', 'track']
82 | avg_time_stats = {t: AverageMeter() for t in time_stats}
83 | if opt.use_loaded_results:
84 | for img_id in data_loader.dataset.images:
85 | results[img_id] = load_results['{}'.format(img_id)]
86 | num_iters = 0
87 | for ind, (img_id, pre_processed_images) in enumerate(data_loader):
88 | if ind >= num_iters:
89 | break
90 | if opt.tracking and ('is_first_frame' in pre_processed_images):
91 | if '{}'.format(int(img_id.numpy().astype(np.int32)[0])) in load_results:
92 | pre_processed_images['meta']['pre_dets'] = \
93 | load_results['{}'.format(int(img_id.numpy().astype(np.int32)[0]))]
94 | else:
95 | print()
96 | print('No pre_dets for', int(img_id.numpy().astype(np.int32)[0]),
97 | '. Use empty initialization.')
98 | pre_processed_images['meta']['pre_dets'] = []
99 | detector.reset_tracking()
100 | print('Start tracking video', int(pre_processed_images['video_id']))
101 | if opt.public_det:
102 | if '{}'.format(int(img_id.numpy().astype(np.int32)[0])) in load_results:
103 | pre_processed_images['meta']['cur_dets'] = \
104 | load_results['{}'.format(int(img_id.numpy().astype(np.int32)[0]))]
105 | else:
106 | print('No cur_dets for', int(img_id.numpy().astype(np.int32)[0]))
107 | pre_processed_images['meta']['cur_dets'] = []
108 |
109 | ret = detector.run(pre_processed_images)
110 | results[int(img_id.numpy().astype(np.int32)[0])] = ret['results']
111 |
112 | Bar.suffix = '[{0}/{1}]|Tot: {total:} |ETA: {eta:} '.format(
113 | ind, num_iters, total=bar.elapsed_td, eta=bar.eta_td)
114 | for t in avg_time_stats:
115 | avg_time_stats[t].update(ret[t])
116 | Bar.suffix = Bar.suffix + '|{} {tm.val:.3f}s ({tm.avg:.3f}s) '.format(
117 | t, tm = avg_time_stats[t])
118 | if opt.print_iter > 0:
119 | if ind % opt.print_iter == 0:
120 | print('{}/{}| {}'.format(opt.task, opt.exp_id, Bar.suffix))
121 | else:
122 | bar.next()
123 | bar.finish()
124 | if opt.save_results:
125 | print('saving results to', opt.save_dir + '/save_results_{}{}.json'.format(
126 | opt.test_dataset, opt.dataset_version))
127 | json.dump(_to_list(copy.deepcopy(results)),
128 | open(opt.save_dir + '/save_results_{}{}.json'.format(
129 | opt.test_dataset, opt.dataset_version), 'w'))
130 | dataset.run_eval(results, opt.save_dir)
131 |
132 | def test(opt):
133 | os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str
134 |
135 | Dataset = dataset_factory[opt.test_dataset]
136 | opt = opts().update_dataset_info_and_set_heads(opt, Dataset)
137 | print(opt)
138 | Logger(opt)
139 |
140 | split = 'val' if not opt.trainval else 'test'
141 | dataset = Dataset(opt, split)
142 | detector = Detector(opt)
143 |
144 | if opt.load_results != '': # load results in json
145 | load_results = json.load(open(opt.load_results, 'r'))
146 |
147 | results = {}
148 | num_iters = len(dataset) if opt.num_iters < 0 else opt.num_iters
149 | bar = Bar('{}'.format(opt.exp_id), max=num_iters)
150 | time_stats = ['tot', 'load', 'pre', 'net', 'dec', 'post', 'merge']
151 | avg_time_stats = {t: AverageMeter() for t in time_stats}
152 | for ind in range(num_iters):
153 | img_id = dataset.images[ind]
154 | img_info = dataset.coco.loadImgs(ids=[img_id])[0]
155 | img_path = os.path.join(dataset.img_dir, img_info['file_name'])
156 | input_meta = {}
157 | if 'calib' in img_info:
158 | input_meta['calib'] = img_info['calib']
159 | if (opt.tracking and ('frame_id' in img_info) and img_info['frame_id'] == 1):
160 | detector.reset_tracking()
161 | input_meta['pre_dets'] = load_results[img_id]
162 |
163 | ret = detector.run(img_path, input_meta)
164 | results[img_id] = ret['results']
165 |
166 | Bar.suffix = '[{0}/{1}]|Tot: {total:} |ETA: {eta:} '.format(
167 | ind, num_iters, total=bar.elapsed_td, eta=bar.eta_td)
168 | for t in avg_time_stats:
169 | avg_time_stats[t].update(ret[t])
170 | Bar.suffix = Bar.suffix + '|{} {:.3f} '.format(t, avg_time_stats[t].avg)
171 | bar.next()
172 | bar.finish()
173 | if opt.save_results:
174 | print('saving results to', opt.save_dir + '/save_results_{}{}.json'.format(
175 | opt.test_dataset, opt.dataset_version))
176 | json.dump(_to_list(copy.deepcopy(results)),
177 | open(opt.save_dir + '/save_results_{}{}.json'.format(
178 | opt.test_dataset, opt.dataset_version), 'w'))
179 | dataset.run_eval(results, opt.save_dir)
180 |
181 |
182 | def _to_list(results):
183 | for img_id in results:
184 | for t in range(len(results[img_id])):
185 | for k in results[img_id][t]:
186 | if isinstance(results[img_id][t][k], (np.ndarray, np.float32)):
187 | results[img_id][t][k] = results[img_id][t][k].tolist()
188 | return results
189 |
190 | if __name__ == '__main__':
191 | opt = opts().parse()
192 | if opt.not_prefetch_test:
193 | test(opt)
194 | else:
195 | prefetch_test(opt)
196 |
--------------------------------------------------------------------------------
/src/tools/_init_paths.py:
--------------------------------------------------------------------------------
1 | import os.path as osp
2 | import sys
3 |
4 | def add_path(path):
5 | if path not in sys.path:
6 | sys.path.insert(0, path)
7 |
8 | this_dir = osp.dirname(__file__)
9 |
10 | # Add lib to PYTHONPATH
11 | lib_path = osp.join(this_dir, '../lib')
12 | add_path(lib_path)
13 |
--------------------------------------------------------------------------------
/src/tools/annot_bbox.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import json
4 | import cv2
5 | import argparse
6 | import numpy as np
7 | image_ext = ['jpg', 'jpeg', 'png', 'webp']
8 |
9 | parser = argparse.ArgumentParser()
10 | parser.add_argument('--image_path', default='')
11 | parser.add_argument('--save_path', default='')
12 | MAX_CACHE = 20
13 | CAT_NAMES = ['cat']
14 |
15 | def _sort_expt(pts):
16 | t, l, b, r = 0, 0, 0, 0
17 | for i in range(4):
18 | if pts[i][0] < pts[l][0]:
19 | l = i
20 | if pts[i][1] < pts[t][1]:
21 | t = i
22 | if pts[i][0] > pts[r][0]:
23 | r = i
24 | if pts[i][1] > pts[b][1]:
25 | b = i
26 | ret = [pts[t], pts[l], pts[b], pts[r]]
27 | return ret
28 |
29 | def _expt2bbox(expt):
30 | expt = np.array(expt, dtype=np.int32)
31 | bbox = [int(expt[:, 0].min()), int(expt[:, 1].min()),
32 | int(expt[:, 0].max()), int(expt[:, 1].max())]
33 | return bbox
34 |
35 | def save_txt(txt_name, pts_cls):
36 | ret = []
37 | for i in range(len(pts_cls)):
38 | ret.append(np.array(pts_cls[i][:4], dtype=np.int32).reshape(8).tolist() \
39 | + [pts_cls[i][4]])
40 | np.savetxt(txt_name, np.array(ret, dtype=np.int32), fmt='%d')
41 |
42 | def click(event, x, y, flags, param):
43 | global expt_cls, bboxes, pts
44 | if event == cv2.EVENT_LBUTTONDOWN:
45 | pts.append([x, y])
46 | cv2.circle(img, (x, y), 5, (255, 0, 255), -1)
47 | if len(pts) == 4:
48 | expt = _sort_expt(pts)
49 | bbox = _expt2bbox(expt)
50 | expt_cls.append(expt + [cls])
51 | cv2.rectangle(img, (bbox[0], bbox[1]), (bbox[2], bbox[3]),
52 | (255, 0, 255), 2, cv2.LINE_AA)
53 | pts = []
54 |
55 | if __name__ == '__main__':
56 | cat_info = []
57 | for i, cat in enumerate(CAT_NAMES):
58 | cat_info.append({'name': cat, 'id': i + 1})
59 |
60 | args = parser.parse_args()
61 | if args.save_path == '':
62 | args.save_path = os.path.join(args.image_path, '..', 'click_annotation')
63 | if not os.path.exists(args.save_path):
64 | os.mkdir(args.save_path)
65 |
66 | ann_path = os.path.join(args.save_path, 'annotations.json')
67 | if os.path.exists(ann_path):
68 | anns = json.load(open(ann_path, 'r'))
69 | else:
70 | anns = {'annotations': [], 'images': [], 'categories': cat_info}
71 |
72 | assert os.path.exists(args.image_path)
73 | ls = os.listdir(args.image_path)
74 | image_names = []
75 | for file_name in sorted(ls):
76 | ext = file_name[file_name.rfind('.') + 1:].lower()
77 | if (ext in image_ext):
78 | image_names.append(file_name)
79 |
80 | i = 0
81 | cls = 1
82 | cached = 0
83 | while i < len(image_names):
84 | image_name = image_names[i]
85 | txt_name = os.path.join(
86 | args.save_path, image_name[:image_name.rfind('.')] + '.txt')
87 | if os.path.exists(txt_name) or image_name in anns:
88 | i = i + 1
89 | continue
90 | image_path = os.path.join(args.image_path, image_name)
91 | img = cv2.imread(image_path)
92 | cv2.namedWindow(image_name)
93 | cv2.setMouseCallback(image_name, click)
94 | expt_cls, pts = [], []
95 | while True:
96 | finished = False
97 | cv2.imshow(image_name, img)
98 | key = cv2.waitKey(1)
99 | if key == 100:
100 | i = i + 1
101 | save_txt(txt_name, expt_cls)
102 | image_id = len(anns['images'])
103 | image_info = {'file_name': image_name, 'id': image_id}
104 | anns['images'].append(image_info)
105 | for ann in expt_cls:
106 | ann_id = len(anns['annotations'])
107 | ann_dict = {'image_id': image_id, 'id': ann_id, 'categoty_id': ann[4],
108 | 'bbox': _expt2bbox(ann[:4]), 'extreme_points': ann[:4]}
109 | anns['annotations'].append(ann_dict)
110 | cached = cached + 1
111 | print('saved to ', txt_name)
112 | if cached > MAX_CACHE:
113 | print('Saving json', ann_path)
114 | json.dump(anns, open(ann_path, 'w'))
115 | cached = 0
116 | break
117 | elif key == 97:
118 | i = i - 1
119 | break
120 | elif key == 27:
121 | json.dump(anns, open(ann_path, 'w'))
122 | sys.exit(0)
123 | cv2.destroyAllWindows()
124 |
--------------------------------------------------------------------------------
/src/tools/convert_crowdhuman_to_coco.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 | import json
4 | import cv2
5 |
6 | DATA_PATH = '../../data/crowdhuman/'
7 | OUT_PATH = DATA_PATH + 'annotations/'
8 | SPLITS = ['val', 'train']
9 | DEBUG = False
10 |
11 | def load_func(fpath):
12 | print('fpath', fpath)
13 | assert os.path.exists(fpath)
14 | with open(fpath,'r') as fid:
15 | lines = fid.readlines()
16 | records =[json.loads(line.strip('\n')) for line in lines]
17 | return records
18 |
19 | if __name__ == '__main__':
20 | if not os.exists(OUT_PATH):
21 | os.mkdir(OUT_PATH)
22 | for split in SPLITS:
23 | data_path = DATA_PATH + split
24 | out_path = OUT_PATH + '{}.json'.format(split)
25 | out = {'images': [], 'annotations': [],
26 | 'categories': [{'id': 1, 'name': 'person'}]}
27 | ann_path = DATA_PATH + '/annotation_{}.odgt'.format(split)
28 | anns_data = load_func(ann_path)
29 | image_cnt = 0
30 | ann_cnt = 0
31 | video_cnt = 0
32 | for ann_data in anns_data:
33 | image_cnt += 1
34 | image_info = {'file_name': '{}.jpg'.format(ann_data['ID']),
35 | 'id': image_cnt}
36 | out['images'].append(image_info)
37 | if split != 'test':
38 | anns = ann_data['gtboxes']
39 | for i in range(len(anns)):
40 | ann_cnt += 1
41 | ann = {'id': ann_cnt,
42 | 'category_id': 1,
43 | 'image_id': image_cnt,
44 | 'bbox_vis': anns[i]['vbox'],
45 | 'bbox': anns[i]['fbox'],
46 | 'iscrowd': 1 if 'extra' in anns[i] and \
47 | 'ignore' in anns[i]['extra'] and \
48 | anns[i]['extra']['ignore'] == 1 else 0}
49 | out['annotations'].append(ann)
50 | print('loaded {} for {} images and {} samples'.format(
51 | split, len(out['images']), len(out['annotations'])))
52 | json.dump(out, open(out_path, 'w'))
53 |
54 |
55 |
56 |
--------------------------------------------------------------------------------
/src/tools/convert_kittitrack_to_coco.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import pickle
6 | import json
7 | import numpy as np
8 | import os
9 | import cv2
10 | DATA_PATH = '../../data/kitti_tracking/'
11 | SPLITS = ['train_half', 'val_half', 'train', 'test']
12 | VIDEO_SETS = {'train': range(21), 'test': range(29),
13 | 'train_half': range(21), 'val_half': range(21)}
14 | CREATE_HALF_LABEL = True
15 | DEBUG = False
16 |
17 | '''
18 | #Values Name Description
19 | ----------------------------------------------------------------------------
20 | 1 frame Frame within the sequence where the object appearers
21 | 1 track id Unique tracking id of this object within this sequence
22 | 1 type Describes the type of object: 'Car', 'Van', 'Truck',
23 | 'Pedestrian', 'Person_sitting', 'Cyclist', 'Tram',
24 | 'Misc' or 'DontCare'
25 | 1 truncated Integer (0,1,2) indicating the level of truncation.
26 | Note that this is in contrast to the object detection
27 | benchmark where truncation is a float in [0,1].
28 | 1 occluded Integer (0,1,2,3) indicating occlusion state:
29 | 0 = fully visible, 1 = partly occluded
30 | 2 = largely occluded, 3 = unknown
31 | 1 alpha Observation angle of object, ranging [-pi..pi]
32 | 4 bbox 2D bounding box of object in the image (0-based index):
33 | contains left, top, right, bottom pixel coordinates
34 | 3 dimensions 3D object dimensions: height, width, length (in meters)
35 | 3 location 3D object location x,y,z in camera coordinates (in meters)
36 | 1 rotation_y Rotation ry around Y-axis in camera coordinates [-pi..pi]
37 | 1 score Only for results: Float, indicating confidence in
38 | detection, needed for p/r curves, higher is better.
39 | '''
40 |
41 | def project_to_image(pts_3d, P):
42 | # pts_3d: n x 3
43 | # P: 3 x 4
44 | # return: n x 2
45 | pts_3d_homo = np.concatenate(
46 | [pts_3d, np.ones((pts_3d.shape[0], 1), dtype=np.float32)], axis=1)
47 | pts_2d = np.dot(P, pts_3d_homo.transpose(1, 0)).transpose(1, 0)
48 | pts_2d = pts_2d[:, :2] / pts_2d[:, 2:]
49 | return pts_2d
50 |
51 | def read_clib(calib_path):
52 | f = open(calib_path, 'r')
53 | for i, line in enumerate(f):
54 | if i == 2:
55 | calib = np.array(line.strip().split(' ')[1:], dtype=np.float32)
56 | calib = calib.reshape(3, 4)
57 | return calib
58 |
59 | def _bbox_to_coco_bbox(bbox):
60 | return [(bbox[0]), (bbox[1]),
61 | (bbox[2] - bbox[0]), (bbox[3] - bbox[1])]
62 |
63 | cats = ['Pedestrian', 'Car', 'Cyclist', 'Van', 'Truck', 'Person_sitting',
64 | 'Tram', 'Misc', 'DontCare']
65 |
66 |
67 | cat_ids = {cat: i + 1 for i, cat in enumerate(cats)}
68 | cat_ids['Person'] = cat_ids['Person_sitting']
69 |
70 | cat_info = []
71 | for i, cat in enumerate(cats):
72 | cat_info.append({'name': cat, 'id': i + 1})
73 |
74 | if __name__ == '__main__':
75 | for split in SPLITS:
76 | ann_dir = DATA_PATH + '/label_02/'
77 | ret = {'images': [], 'annotations': [], "categories": cat_info,
78 | 'videos': []}
79 | num_images = 0
80 | for i in VIDEO_SETS[split]:
81 | image_id_base = num_images
82 | video_name = '{:04d}'.format(i)
83 | ret['videos'].append({'id': i + 1, 'file_name': video_name})
84 | ann_dir = 'train' if not ('test' in split) else split
85 | video_path = DATA_PATH + \
86 | '/data_tracking_image_2/{}ing/image_02/{}'.format(ann_dir, video_name)
87 | calib_path = DATA_PATH + 'data_tracking_calib/{}ing/calib/'.format(ann_dir) \
88 | + '{}.txt'.format(video_name)
89 | calib = read_clib(calib_path)
90 | image_files = sorted(os.listdir(video_path))
91 | num_images_video = len(image_files)
92 | if CREATE_HALF_LABEL and 'half' in split:
93 | image_range = [0, num_images_video // 2 - 1] if split == 'train_half' else \
94 | [num_images_video // 2, num_images_video - 1]
95 | else:
96 | image_range = [0, num_images_video - 1]
97 | print('num_frames', video_name, image_range[1] - image_range[0] + 1)
98 | for j, image_name in enumerate(image_files):
99 | if (j < image_range[0] or j > image_range[1]):
100 | continue
101 | num_images += 1
102 | image_info = {'file_name': '{}/{:06d}.png'.format(video_name, j),
103 | 'id': num_images,
104 | 'calib': calib.tolist(),
105 | 'video_id': i + 1,
106 | 'frame_id': j + 1 - image_range[0]}
107 | ret['images'].append(image_info)
108 |
109 | if split == 'test':
110 | continue
111 | # 0 -1 DontCare -1 -1 -10.000000 219.310000 188.490000 245.500000 218.560000 -1000.000000 -1000.000000 -1000.000000 -10.000000 -1.000000 -1.000000 -1.000000
112 | ann_path = DATA_PATH + 'label_02/{}.txt'.format(video_name)
113 | anns = open(ann_path, 'r')
114 |
115 | if CREATE_HALF_LABEL and 'half' in split:
116 | label_out_folder = DATA_PATH + 'label_02_{}/'.format(split)
117 | label_out_path = label_out_folder + '{}.txt'.format(video_name)
118 | if not os.path.exists(label_out_folder):
119 | os.mkdir(label_out_folder)
120 | label_out_file = open(label_out_path, 'w')
121 |
122 | for ann_ind, txt in enumerate(anns):
123 | tmp = txt[:-1].split(' ')
124 | frame_id = int(tmp[0])
125 | track_id = int(tmp[1])
126 | cat_id = cat_ids[tmp[2]]
127 | truncated = int(float(tmp[3]))
128 | occluded = int(tmp[4])
129 | alpha = float(tmp[5])
130 | bbox = [float(tmp[6]), float(tmp[7]), float(tmp[8]), float(tmp[9])]
131 | dim = [float(tmp[10]), float(tmp[11]), float(tmp[12])]
132 | location = [float(tmp[13]), float(tmp[14]), float(tmp[15])]
133 | rotation_y = float(tmp[16])
134 | amodel_center = project_to_image(
135 | np.array([location[0], location[1] - dim[0] / 2, location[2]],
136 | np.float32).reshape(1, 3), calib)[0].tolist()
137 | ann = {'image_id': frame_id + 1 - image_range[0] + image_id_base,
138 | 'id': int(len(ret['annotations']) + 1),
139 | 'category_id': cat_id,
140 | 'dim': dim,
141 | 'bbox': _bbox_to_coco_bbox(bbox),
142 | 'depth': location[2],
143 | 'alpha': alpha,
144 | 'truncated': truncated,
145 | 'occluded': occluded,
146 | 'location': location,
147 | 'rotation_y': rotation_y,
148 | 'amodel_center': amodel_center,
149 | 'track_id': track_id + 1}
150 | if CREATE_HALF_LABEL and 'half' in split:
151 | if (frame_id < image_range[0] or frame_id > image_range[1]):
152 | continue
153 | out_frame_id = frame_id - image_range[0]
154 | label_out_file.write('{} {}'.format(
155 | out_frame_id, txt[txt.find(' ') + 1:]))
156 |
157 | ret['annotations'].append(ann)
158 |
159 | print("# images: ", len(ret['images']))
160 | print("# annotations: ", len(ret['annotations']))
161 | out_dir = '{}/annotations/'.format(DATA_PATH)
162 | if not os.path.exists(out_dir):
163 | os.mkdir(out_dir)
164 | out_path = '{}/annotations/tracking_{}.json'.format(
165 | DATA_PATH, split)
166 | json.dump(ret, open(out_path, 'w'))
167 |
--------------------------------------------------------------------------------
/src/tools/convert_mot_det_to_results.py:
--------------------------------------------------------------------------------
1 | import json
2 | import numpy as np
3 | import os
4 | from collections import defaultdict
5 | split = 'val_half'
6 |
7 | DET_PATH = '../../data/mot17/'
8 | ANN_PATH = '../../data/mot17/annotations/{}.json'.format(split)
9 | OUT_DIR = '../../data/mot17/results/'
10 | OUT_PATH = OUT_DIR + '{}_det.json'.format(split)
11 |
12 | if __name__ == '__main__':
13 | if not os.path.exists(OUT_DIR):
14 | os.mkdir(OUT_DIR)
15 | seqs = [s for s in os.listdir(DET_PATH) if '_det' in s]
16 | data = json.load(open(ANN_PATH, 'r'))
17 | images = data['images']
18 | image_to_anns = defaultdict(list)
19 | for seq in sorted(seqs):
20 | print('seq', seq)
21 | seq_path = '{}/{}/'.format(DET_PATH, seq)
22 | if split == 'val_half':
23 | ann_path = seq_path + 'det/det_val_half.txt'
24 | train_ann_path = seq_path + 'det/det_train_half.txt'
25 | train_anns = np.loadtxt(train_ann_path, dtype=np.float32, delimiter=',')
26 | frame_base = int(train_anns[:, 0].max())
27 | else:
28 | ann_path = seq_path + 'det/det.txt'
29 | frame_base = 0
30 | if not IS_THIRD_PARTY:
31 | anns = np.loadtxt(ann_path, dtype=np.float32, delimiter=',')
32 | for i in range(len(anns)):
33 | frame_id = int(anns[i][0])
34 | file_name = '{}/img1/{:06d}.jpg'.format(seq, frame_id + frame_base)
35 | bbox = (anns[i][2:6]).tolist()
36 | score = 1 # float(anns[i][8])
37 | image_to_anns[file_name].append(bbox + [score])
38 |
39 | results = {}
40 | for image_info in images:
41 | image_id = image_info['id']
42 | file_name = image_info['file_name']
43 | dets = image_to_anns[file_name]
44 | results[image_id] = []
45 | for det in dets:
46 | bbox = [float(det[0]), float(det[1]), \
47 | float(det[0] + det[2]), float(det[1] + det[3])]
48 | ct = [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2]
49 | results[image_id].append(
50 | {'bbox': bbox, 'score': float(det[4]), 'class': 1, 'ct': ct})
51 | out_path = OUT_PATH
52 | json.dump(results, open(out_path, 'w'))
53 |
--------------------------------------------------------------------------------
/src/tools/convert_mot_to_coco.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 | import json
4 | import cv2
5 |
6 | # Use the same script for MOT16
7 | # DATA_PATH = '../../data/mot16/'
8 | DATA_PATH = '../../data/mot17/'
9 | OUT_PATH = DATA_PATH + 'annotations/'
10 | SPLITS = ['train_half', 'val_half', 'train', 'test']
11 | HALF_VIDEO = True
12 | CREATE_SPLITTED_ANN = True
13 | CREATE_SPLITTED_DET = True
14 |
15 | if __name__ == '__main__':
16 | for split in SPLITS:
17 | data_path = DATA_PATH + (split if not HALF_VIDEO else 'train')
18 | out_path = OUT_PATH + '{}.json'.format(split)
19 | out = {'images': [], 'annotations': [],
20 | 'categories': [{'id': 1, 'name': 'pedestrain'}],
21 | 'videos': []}
22 | seqs = os.listdir(data_path)
23 | image_cnt = 0
24 | ann_cnt = 0
25 | video_cnt = 0
26 | for seq in sorted(seqs):
27 | if '.DS_Store' in seq:
28 | continue
29 | if 'mot17' in DATA_PATH and (split != 'test' and not ('FRCNN' in seq)):
30 | continue
31 | video_cnt += 1
32 | out['videos'].append({
33 | 'id': video_cnt,
34 | 'file_name': seq})
35 | seq_path = '{}/{}/'.format(data_path, seq)
36 | img_path = seq_path + 'img1/'
37 | ann_path = seq_path + 'gt/gt.txt'
38 | images = os.listdir(img_path)
39 | num_images = len([image for image in images if 'jpg' in image])
40 | if HALF_VIDEO and ('half' in split):
41 | image_range = [0, num_images // 2] if 'train' in split else \
42 | [num_images // 2 + 1, num_images - 1]
43 | else:
44 | image_range = [0, num_images - 1]
45 | for i in range(num_images):
46 | if (i < image_range[0] or i > image_range[1]):
47 | continue
48 | image_info = {'file_name': '{}/img1/{:06d}.jpg'.format(seq, i + 1),
49 | 'id': image_cnt + i + 1,
50 | 'frame_id': i + 1 - image_range[0],
51 | 'prev_image_id': image_cnt + i if i > 0 else -1,
52 | 'next_image_id': \
53 | image_cnt + i + 2 if i < num_images - 1 else -1,
54 | 'video_id': video_cnt}
55 | out['images'].append(image_info)
56 | print('{}: {} images'.format(seq, num_images))
57 | if split != 'test':
58 | det_path = seq_path + 'det/det.txt'
59 | anns = np.loadtxt(ann_path, dtype=np.float32, delimiter=',')
60 | dets = np.loadtxt(det_path, dtype=np.float32, delimiter=',')
61 | if CREATE_SPLITTED_ANN and ('half' in split):
62 | anns_out = np.array([anns[i] for i in range(anns.shape[0]) if \
63 | int(anns[i][0]) - 1 >= image_range[0] and \
64 | int(anns[i][0]) - 1 <= image_range[1]], np.float32)
65 | anns_out[:, 0] -= image_range[0]
66 | gt_out = seq_path + '/gt/gt_{}.txt'.format(split)
67 | fout = open(gt_out, 'w')
68 | for o in anns_out:
69 | fout.write(
70 | '{:d},{:d},{:d},{:d},{:d},{:d},{:d},{:d},{:.6f}\n'.format(
71 | int(o[0]),int(o[1]),int(o[2]),int(o[3]),int(o[4]),int(o[5]),
72 | int(o[6]),int(o[7]),o[8]))
73 | fout.close()
74 | if CREATE_SPLITTED_DET and ('half' in split):
75 | dets_out = np.array([dets[i] for i in range(dets.shape[0]) if \
76 | int(dets[i][0]) - 1 >= image_range[0] and \
77 | int(dets[i][0]) - 1 <= image_range[1]], np.float32)
78 | dets_out[:, 0] -= image_range[0]
79 | det_out = seq_path + '/det/det_{}.txt'.format(split)
80 | dout = open(det_out, 'w')
81 | for o in dets_out:
82 | dout.write(
83 | '{:d},{:d},{:.1f},{:.1f},{:.1f},{:.1f},{:.6f}\n'.format(
84 | int(o[0]),int(o[1]),float(o[2]),float(o[3]),float(o[4]),float(o[5]),
85 | float(o[6])))
86 | dout.close()
87 |
88 | print(' {} ann images'.format(int(anns[:, 0].max())))
89 | for i in range(anns.shape[0]):
90 | frame_id = int(anns[i][0])
91 | if (frame_id - 1 < image_range[0] or frame_id - 1> image_range[1]):
92 | continue
93 | track_id = int(anns[i][1])
94 | cat_id = int(anns[i][7])
95 | ann_cnt += 1
96 | if not ('15' in DATA_PATH):
97 | if not (float(anns[i][8]) >= 0.25):
98 | continue
99 | if not (int(anns[i][6]) == 1):
100 | continue
101 | if (int(anns[i][7]) in [3, 4, 5, 6, 9, 10, 11]): # Non-person
102 | continue
103 | if (int(anns[i][7]) in [2, 7, 8, 12]): # Ignored person
104 | category_id = -1
105 | else:
106 | category_id = 1
107 | else:
108 | category_id = 1
109 | ann = {'id': ann_cnt,
110 | 'category_id': category_id,
111 | 'image_id': image_cnt + frame_id,
112 | 'track_id': track_id,
113 | 'bbox': anns[i][2:6].tolist(),
114 | 'conf': float(anns[i][6])}
115 | out['annotations'].append(ann)
116 | image_cnt += num_images
117 | print('loaded {} for {} images and {} samples'.format(
118 | split, len(out['images']), len(out['annotations'])))
119 | json.dump(out, open(out_path, 'w'))
120 |
121 |
122 |
123 |
--------------------------------------------------------------------------------
/src/tools/eval_kitti_track/data/tracking/evaluate_tracking.seqmap:
--------------------------------------------------------------------------------
1 | 0000 empty 000000 000154
2 | 0001 empty 000000 000447
3 | 0002 empty 000000 000233
4 | 0003 empty 000000 000144
5 | 0004 empty 000000 000314
6 | 0005 empty 000000 000297
7 | 0006 empty 000000 000270
8 | 0007 empty 000000 000800
9 | 0008 empty 000000 000390
10 | 0009 empty 000000 000803
11 | 0010 empty 000000 000294
12 | 0011 empty 000000 000373
13 | 0012 empty 000000 000078
14 | 0013 empty 000000 000340
15 | 0014 empty 000000 000106
16 | 0015 empty 000000 000376
17 | 0016 empty 000000 000209
18 | 0017 empty 000000 000145
19 | 0018 empty 000000 000339
20 | 0019 empty 000000 001059
21 | 0020 empty 000000 000837
22 |
--------------------------------------------------------------------------------
/src/tools/eval_kitti_track/data/tracking/evaluate_tracking.seqmap.test:
--------------------------------------------------------------------------------
1 | 0000 empty 000000 000465
2 | 0001 empty 000000 000147
3 | 0002 empty 000000 000243
4 | 0003 empty 000000 000257
5 | 0004 empty 000000 000421
6 | 0005 empty 000000 000809
7 | 0006 empty 000000 000114
8 | 0007 empty 000000 000215
9 | 0008 empty 000000 000165
10 | 0009 empty 000000 000349
11 | 0010 empty 000000 001176
12 | 0011 empty 000000 000774
13 | 0012 empty 000000 000694
14 | 0013 empty 000000 000152
15 | 0014 empty 000000 000850
16 | 0015 empty 000000 000701
17 | 0016 empty 000000 000510
18 | 0017 empty 000000 000305
19 | 0018 empty 000000 000180
20 | 0019 empty 000000 000404
21 | 0020 empty 000000 000173
22 | 0021 empty 000000 000203
23 | 0022 empty 000000 000436
24 | 0023 empty 000000 000430
25 | 0024 empty 000000 000316
26 | 0025 empty 000000 000176
27 | 0026 empty 000000 000170
28 | 0027 empty 000000 000085
29 | 0028 empty 000000 000175
30 |
--------------------------------------------------------------------------------
/src/tools/eval_kitti_track/data/tracking/evaluate_tracking.seqmap.training:
--------------------------------------------------------------------------------
1 | 0000 empty 000000 000154
2 | 0001 empty 000000 000447
3 | 0002 empty 000000 000233
4 | 0003 empty 000000 000144
5 | 0004 empty 000000 000314
6 | 0005 empty 000000 000297
7 | 0006 empty 000000 000270
8 | 0007 empty 000000 000800
9 | 0008 empty 000000 000390
10 | 0009 empty 000000 000803
11 | 0010 empty 000000 000294
12 | 0011 empty 000000 000373
13 | 0012 empty 000000 000078
14 | 0013 empty 000000 000340
15 | 0014 empty 000000 000106
16 | 0015 empty 000000 000376
17 | 0016 empty 000000 000209
18 | 0017 empty 000000 000145
19 | 0018 empty 000000 000339
20 | 0019 empty 000000 001059
21 | 0020 empty 000000 000837
22 |
--------------------------------------------------------------------------------
/src/tools/eval_kitti_track/data/tracking/evaluate_trackingtrain_1-2.seqmap:
--------------------------------------------------------------------------------
1 | 0000 empty 000000 000154
2 | 0001 empty 000000 000447
3 | 0002 empty 000000 000233
4 | 0003 empty 000000 000144
5 | 0004 empty 000000 000314
6 | 0005 empty 000000 000297
7 | 0006 empty 000000 000270
8 | 0007 empty 000000 000800
9 | 0008 empty 000000 000390
10 | 0009 empty 000000 000803
11 |
--------------------------------------------------------------------------------
/src/tools/eval_kitti_track/data/tracking/evaluate_trackingtrain_2-2.seqmap:
--------------------------------------------------------------------------------
1 | 0010 empty 000000 000294
2 | 0011 empty 000000 000373
3 | 0012 empty 000000 000078
4 | 0013 empty 000000 000340
5 | 0014 empty 000000 000106
6 | 0015 empty 000000 000376
7 | 0016 empty 000000 000209
8 | 0017 empty 000000 000145
9 | 0018 empty 000000 000339
10 | 0019 empty 000000 001059
11 | 0020 empty 000000 000837
12 |
--------------------------------------------------------------------------------
/src/tools/eval_kitti_track/data/tracking/evaluate_trackingval_half.seqmap:
--------------------------------------------------------------------------------
1 | 0000 empty 000000 000077
2 | 0001 empty 000000 000224
3 | 0002 empty 000000 000117
4 | 0003 empty 000000 000072
5 | 0004 empty 000000 000157
6 | 0005 empty 000000 000149
7 | 0006 empty 000000 000135
8 | 0007 empty 000000 000400
9 | 0008 empty 000000 000195
10 | 0009 empty 000000 000402
11 | 0010 empty 000000 000147
12 | 0011 empty 000000 000187
13 | 0012 empty 000000 000039
14 | 0013 empty 000000 000170
15 | 0014 empty 000000 000053
16 | 0015 empty 000000 000188
17 | 0016 empty 000000 000105
18 | 0017 empty 000000 000073
19 | 0018 empty 000000 000170
20 | 0019 empty 000000 000530
21 | 0020 empty 000000 000419
22 |
--------------------------------------------------------------------------------
/src/tools/eval_kitti_track/mailpy.py:
--------------------------------------------------------------------------------
1 | class Mail:
2 | """ Dummy class to print messages without sending e-mails"""
3 | def __init__(self,mailaddress):
4 | pass
5 | def msg(self,msg):
6 | print(msg)
7 | def finalize(self,success,benchmark,sha_key,mailaddress=None):
8 | if success:
9 | print("Results for %s (benchmark: %s) sucessfully created" % (benchmark,sha_key))
10 | else:
11 | print("Creating results for %s (benchmark: %s) failed" % (benchmark,sha_key))
12 |
13 |
--------------------------------------------------------------------------------
/src/tools/eval_motchallenge.py:
--------------------------------------------------------------------------------
1 | """py-motmetrics - metrics for multiple object tracker (MOT) benchmarking.
2 | Christoph Heindl, 2017
3 | https://github.com/cheind/py-motmetrics
4 | Modified by Xingyi Zhou
5 | """
6 |
7 | import argparse
8 | import glob
9 | import os
10 | import logging
11 | import motmetrics as mm
12 | import pandas as pd
13 | from collections import OrderedDict
14 | from pathlib import Path
15 |
16 | def parse_args():
17 | parser = argparse.ArgumentParser(description="""
18 | Compute metrics for trackers using MOTChallenge ground-truth data.
19 | Files
20 | -----
21 | All file content, ground truth and test files, have to comply with the
22 | format described in
23 | Milan, Anton, et al.
24 | "Mot16: A benchmark for multi-object tracking."
25 | arXiv preprint arXiv:1603.00831 (2016).
26 | https://motchallenge.net/
27 | Structure
28 | ---------
29 | Layout for ground truth data
30 | //gt/gt.txt
31 | //gt/gt.txt
32 | ...
33 | Layout for test data
34 | /.txt
35 | /.txt
36 | ...
37 | Sequences of ground truth and test will be matched according to the ``
38 | string.""", formatter_class=argparse.RawTextHelpFormatter)
39 |
40 | parser.add_argument('groundtruths', type=str, help='Directory containing ground truth files.')
41 | parser.add_argument('tests', type=str, help='Directory containing tracker result files')
42 | parser.add_argument('--gt_type', type=str, default='')
43 | parser.add_argument('--eval_official', action='store_true')
44 | parser.add_argument('--loglevel', type=str, help='Log level', default='info')
45 | parser.add_argument('--fmt', type=str, help='Data format', default='mot15-2D')
46 | parser.add_argument('--solver', type=str, help='LAP solver to use')
47 | return parser.parse_args()
48 |
49 | def compare_dataframes(gts, ts):
50 | accs = []
51 | names = []
52 | for k, tsacc in ts.items():
53 | if k in gts:
54 | logging.info('Comparing {}...'.format(k))
55 | accs.append(mm.utils.compare_to_groundtruth(gts[k], tsacc, 'iou', distth=0.5))
56 | names.append(k)
57 | else:
58 | logging.warning('No ground truth for {}, skipping.'.format(k))
59 |
60 | return accs, names
61 |
62 | if __name__ == '__main__':
63 |
64 | args = parse_args()
65 |
66 | loglevel = getattr(logging, args.loglevel.upper(), None)
67 | if not isinstance(loglevel, int):
68 | raise ValueError('Invalid log level: {} '.format(args.loglevel))
69 | logging.basicConfig(level=loglevel, format='%(asctime)s %(levelname)s - %(message)s', datefmt='%I:%M:%S')
70 |
71 | if args.solver:
72 | mm.lap.default_solver = args.solver
73 |
74 | gt_type = args.gt_type
75 | print('gt_type', gt_type)
76 | gtfiles = glob.glob(
77 | os.path.join(args.groundtruths, '*/gt/gt{}.txt'.format(gt_type)))
78 | print('gt_files', gtfiles)
79 | tsfiles = [f for f in glob.glob(os.path.join(args.tests, '*.txt')) if not os.path.basename(f).startswith('eval')]
80 |
81 | logging.info('Found {} groundtruths and {} test files.'.format(len(gtfiles), len(tsfiles)))
82 | logging.info('Available LAP solvers {}'.format(mm.lap.available_solvers))
83 | logging.info('Default LAP solver \'{}\''.format(mm.lap.default_solver))
84 | logging.info('Loading files.')
85 |
86 | gt = OrderedDict([(Path(f).parts[-3], mm.io.loadtxt(f, fmt=args.fmt, min_confidence=1)) for f in gtfiles])
87 | ts = OrderedDict([(os.path.splitext(Path(f).parts[-1])[0], mm.io.loadtxt(f, fmt=args.fmt)) for f in tsfiles])
88 |
89 | mh = mm.metrics.create()
90 | accs, names = compare_dataframes(gt, ts)
91 |
92 | logging.info('Running metrics')
93 | metrics = ['recall', 'precision', 'num_unique_objects', 'mostly_tracked', \
94 | 'partially_tracked', 'mostly_lost', 'num_false_positives', 'num_misses', \
95 | 'num_switches', 'num_fragmentations', 'mota', 'motp', 'num_objects']
96 | summary = mh.compute_many(
97 | accs, names=names,
98 | metrics=metrics, generate_overall=True)
99 | # summary = mh.compute_many(accs, names=names, metrics=mm.metrics.motchallenge_metrics, generate_overall=True)
100 | # print(mm.io.render_summary(
101 | # summary, formatters=mh.formatters,
102 | # namemap=mm.io.motchallenge_metric_names))
103 | div_dict = {
104 | 'num_objects': ['num_false_positives', 'num_misses',
105 | 'num_switches', 'num_fragmentations'],
106 | 'num_unique_objects': ['mostly_tracked', 'partially_tracked',
107 | 'mostly_lost']}
108 | for divisor in div_dict:
109 | for divided in div_dict[divisor]:
110 | summary[divided] = (summary[divided] / summary[divisor])
111 | fmt = mh.formatters
112 | change_fmt_list = ['num_false_positives', 'num_misses', 'num_switches',
113 | 'num_fragmentations', 'mostly_tracked', 'partially_tracked',
114 | 'mostly_lost']
115 | for k in change_fmt_list:
116 | fmt[k] = fmt['mota']
117 | print(mm.io.render_summary(
118 | summary, formatters=fmt,
119 | namemap=mm.io.motchallenge_metric_names))
120 | if args.eval_official:
121 | metrics = mm.metrics.motchallenge_metrics + ['num_objects']
122 | summary = mh.compute_many(
123 | accs, names=names,
124 | metrics=metrics, generate_overall=True)
125 | print(mm.io.render_summary(
126 | summary, formatters=mh.formatters,
127 | namemap=mm.io.motchallenge_metric_names))
128 | logging.info('Completed')
129 |
--------------------------------------------------------------------------------
/src/tools/get_mot_17.sh:
--------------------------------------------------------------------------------
1 | mkdir ../../data/mot17
2 | cd ../../data/mot17
3 | wget https://motchallenge.net/data/MOT17.zip
4 | unzip MOT17.zip
5 | rm MOT17.zip
6 | mkdir annotations
7 | cd ../../src/tools/
8 | python convert_mot_to_coco.py
9 | python convert_mot_det_to_results
--------------------------------------------------------------------------------
/src/tools/remove_optimizers.py:
--------------------------------------------------------------------------------
1 | import os
2 | import torch
3 | IN_PATH = '../../centertrack_models/'
4 | OUT_PATH = '../../models/'
5 | REMOVE_KEYS = ['base.fc']
6 |
7 | if __name__ == '__main__':
8 | models = sorted(os.listdir(IN_PATH))
9 | for model in models:
10 | model_path = IN_PATH + model
11 | print(model)
12 | data = torch.load(model_path)
13 | state_dict = data['state_dict']
14 | keys = state_dict.keys()
15 | delete_keys = []
16 | for k in keys:
17 | should_delete = False
18 | for remove_key in REMOVE_KEYS:
19 | if remove_key in k:
20 | should_delete = True
21 | if should_delete:
22 | delete_keys.append(k)
23 | for k in delete_keys:
24 | print('delete ', k)
25 | del state_dict[k]
26 | out_data = {'epoch': data['epoch'], 'state_dict': state_dict}
27 | torch.save(out_data, OUT_PATH + model)
28 |
--------------------------------------------------------------------------------
/src/tools/vis_tracking_kitti.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import cv2
3 | import os
4 | import glob
5 | import sys
6 | from collections import defaultdict
7 | from pathlib import Path
8 |
9 | DATA_PATH = '../../data/kitti_tracking/'
10 | IMG_PATH = DATA_PATH + 'data_tracking_image_2/testing/image_02/'
11 | SAVE_VIDEO = False
12 | IS_GT = False
13 |
14 | cats = ['Pedestrian', 'Car', 'Cyclist']
15 | cat_ids = {cat: i for i, cat in enumerate(cats)}
16 | COLORS = [(255, 0, 255), (122, 122, 255), (255, 0, 0)]
17 |
18 | def draw_bbox(img, bboxes, c=(255, 0, 255)):
19 | for bbox in bboxes:
20 | color = COLORS[int(bbox[5])]
21 | cv2.rectangle(img, (int(bbox[0]), int(bbox[1])),
22 | (int(bbox[2]), int(bbox[3])),
23 | color, 2, lineType=cv2.LINE_AA)
24 | ct = [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2]
25 | txt = '{}'.format(int(bbox[4]))
26 | cv2.putText(img, txt, (int(ct[0]), int(ct[1])),
27 | cv2.FONT_HERSHEY_SIMPLEX, 0.5,
28 | color, thickness=1, lineType=cv2.LINE_AA)
29 |
30 | if __name__ == '__main__':
31 | seqs = os.listdir(IMG_PATH)
32 | if SAVE_VIDEO:
33 | save_path = sys.argv[1][:sys.argv[1].rfind('/res')] + '/video'
34 | if not os.path.exists(save_path):
35 | os.mkdir(save_path)
36 | print('save_video_path', save_path)
37 | for seq in sorted(seqs):
38 | print('seq', seq)
39 | if '.DS_Store' in seq:
40 | continue
41 | # if SAVE_VIDEO:
42 | # fourcc = cv2.VideoWriter_fourcc(*'XVID')
43 | # video = cv2.VideoWriter(
44 | # '{}/{}.avi'.format(save_path, seq),fourcc, 10.0, (1024, 750))
45 |
46 |
47 | preds = {}
48 | for K in range(1, len(sys.argv)):
49 | pred_path = sys.argv[K] + '/{}.txt'.format(seq)
50 | pred_file = open(pred_path, 'r')
51 | preds[K] = defaultdict(list)
52 | for line in pred_file:
53 | tmp = line[:-1].split(' ')
54 | frame_id = int(tmp[0])
55 | track_id = int(tmp[1])
56 | cat_id = cat_ids[tmp[2]]
57 | bbox = [float(tmp[6]), float(tmp[7]), float(tmp[8]), float(tmp[9])]
58 | score = float(tmp[17])
59 | preds[K][frame_id].append(bbox + [track_id, cat_id, score])
60 |
61 | images_path = '{}/{}/'.format(IMG_PATH, seq)
62 | images = os.listdir(images_path)
63 | num_images = len([image for image in images if 'png' in image])
64 |
65 | for i in range(num_images):
66 | frame_id = i
67 | file_path = '{}/{:06d}.png'.format(images_path, i)
68 | img = cv2.imread(file_path)
69 | for K in range(1, len(sys.argv)):
70 | img_pred = img.copy()
71 | draw_bbox(img_pred, preds[K][frame_id])
72 | cv2.imshow('pred{}'.format(K), img_pred)
73 | cv2.waitKey()
74 | # if SAVE_VIDEO:
75 | # video.write(img_pred)
76 | # if SAVE_VIDEO:
77 | # video.release()
78 |
--------------------------------------------------------------------------------
/src/tools/vis_tracking_mot.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import cv2
3 | import os
4 | import glob
5 | import sys
6 | from collections import defaultdict
7 | from pathlib import Path
8 |
9 | GT_PATH = '../../data/mot17/test/'
10 | IMG_PATH = GT_PATH
11 | SAVE_VIDEO = True
12 | RESIZE = 2
13 | IS_GT = False
14 |
15 | def draw_bbox(img, bboxes, c=(255, 0, 255)):
16 | for bbox in bboxes:
17 | cv2.rectangle(img, (int(bbox[0]), int(bbox[1])),
18 | (int(bbox[0] + bbox[2]), int(bbox[1] + bbox[3])),
19 | c, 2, lineType=cv2.LINE_AA)
20 | ct = [bbox[0] + bbox[2] / 2, bbox[1] + bbox[3] / 2]
21 | txt = '{}'.format(bbox[4])
22 | cv2.putText(img, txt, (int(ct[0]), int(ct[1])),
23 | cv2.FONT_HERSHEY_SIMPLEX, 0.5,
24 | (255, 122, 255), thickness=1, lineType=cv2.LINE_AA)
25 |
26 | if __name__ == '__main__':
27 | seqs = os.listdir(GT_PATH)
28 | if SAVE_VIDEO:
29 | save_path = sys.argv[1][:sys.argv[1].rfind('/res')] + '/video'
30 | if not os.path.exists(save_path):
31 | os.mkdir(save_path)
32 | print('save_video_path', save_path)
33 | for seq in sorted(seqs):
34 | print('seq', seq)
35 | # if len(sys.argv) > 2 and not sys.argv[2] in seq:
36 | # continue
37 | if '.DS_Store' in seq:
38 | continue
39 | # if SAVE_VIDEO:
40 | # fourcc = cv2.VideoWriter_fourcc(*'XVID')
41 | # video = cv2.VideoWriter(
42 | # '{}/{}.avi'.format(save_path, seq),fourcc, 10.0, (1024, 750))
43 | seq_path = '{}/{}/'.format(GT_PATH, seq)
44 | if IS_GT:
45 | ann_path = seq_path + 'gt/gt.txt'
46 | else:
47 | ann_path = seq_path + 'det/det.txt'
48 | anns = np.loadtxt(ann_path, dtype=np.float32, delimiter=',')
49 | print('anns shape', anns.shape)
50 | image_to_anns = defaultdict(list)
51 | for i in range(anns.shape[0]):
52 | if (not IS_GT) or (int(anns[i][6]) == 1 and float(anns[i][8]) >= 0.25):
53 | frame_id = int(anns[i][0])
54 | track_id = int(anns[i][1])
55 | bbox = (anns[i][2:6] / RESIZE).tolist()
56 | image_to_anns[frame_id].append(bbox + [track_id])
57 |
58 | image_to_preds = {}
59 | for K in range(1, len(sys.argv)):
60 | image_to_preds[K] = defaultdict(list)
61 | pred_path = sys.argv[K] + '/{}.txt'.format(seq)
62 | try:
63 | preds = np.loadtxt(pred_path, dtype=np.float32, delimiter=',')
64 | except:
65 | preds = np.loadtxt(pred_path, dtype=np.float32, delimiter=' ')
66 | for i in range(preds.shape[0]):
67 | frame_id = int(preds[i][0])
68 | track_id = int(preds[i][1])
69 | bbox = (preds[i][2:6] / RESIZE).tolist()
70 | image_to_preds[K][frame_id].append(bbox + [track_id])
71 |
72 | img_path = seq_path + 'img1/'
73 | images = os.listdir(img_path)
74 | num_images = len([image for image in images if 'jpg' in image])
75 |
76 | for i in range(num_images):
77 | frame_id = i + 1
78 | file_name = '{}/img1/{:06d}.jpg'.format(seq, i + 1)
79 | file_path = IMG_PATH + file_name
80 | img = cv2.imread(file_path)
81 | if RESIZE != 1:
82 | img = cv2.resize(img, (img.shape[1] // RESIZE, img.shape[0] // RESIZE))
83 | for K in range(1, len(sys.argv)):
84 | img_pred = img.copy()
85 | draw_bbox(img_pred, image_to_preds[K][frame_id])
86 | cv2.imshow('pred{}'.format(K), img_pred)
87 | draw_bbox(img, image_to_anns[frame_id])
88 | cv2.imshow('gt', img)
89 | cv2.waitKey()
90 | # if SAVE_VIDEO:
91 | # video.write(img_pred)
92 | # if SAVE_VIDEO:
93 | # video.release()
94 |
--------------------------------------------------------------------------------
/videos/nuscenes_mini.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xingyizhou/CenterTrack/e4e7534cc2ebfbd31e0cde680988f286c65fe34f/videos/nuscenes_mini.mp4
--------------------------------------------------------------------------------