├── .gitignore ├── .gitmodules ├── LICENSE ├── NOTICE ├── README.md ├── experiments ├── coco_pose_tracking.sh ├── coco_tracking.sh ├── crowdhuman.sh ├── kitti_fulltrain.sh ├── kitti_half.sh ├── kitti_half_sc.sh ├── mot17_fulltrain.sh ├── mot17_fulltrain_sc.sh ├── mot17_half.sh ├── mot17_half_sc.sh ├── nuScenes_3Ddetection_e140.sh └── nuScenes_3Dtracking.sh ├── readme ├── DATA.md ├── GETTING_STARTED.md ├── INSTALL.md ├── MODEL_ZOO.md ├── coco_det.gif ├── coco_pose.gif ├── fig2.png └── nuscenes_3d.gif ├── requirements.txt ├── src ├── _init_paths.py ├── convert_onnx.py ├── demo.py ├── lib │ ├── dataset │ │ ├── dataset_factory.py │ │ ├── datasets │ │ │ ├── coco.py │ │ │ ├── coco_hp.py │ │ │ ├── crowdhuman.py │ │ │ ├── custom_dataset.py │ │ │ ├── kitti.py │ │ │ ├── kitti_tracking.py │ │ │ ├── mot.py │ │ │ └── nuscenes.py │ │ └── generic_dataset.py │ ├── detector.py │ ├── external │ │ ├── .gitignore │ │ ├── Makefile │ │ ├── __init__.py │ │ ├── nms.pyx │ │ └── setup.py │ ├── logger.py │ ├── model │ │ ├── data_parallel.py │ │ ├── decode.py │ │ ├── losses.py │ │ ├── model.py │ │ ├── networks │ │ │ ├── backbones │ │ │ │ ├── dla.py │ │ │ │ ├── mobilenet.py │ │ │ │ └── resnet.py │ │ │ ├── base_model.py │ │ │ ├── dla.py │ │ │ ├── dlav0.py │ │ │ ├── generic_network.py │ │ │ ├── necks │ │ │ │ ├── dlaup.py │ │ │ │ └── msraup.py │ │ │ ├── resdcn.py │ │ │ └── resnet.py │ │ ├── scatter_gather.py │ │ └── utils.py │ ├── opts.py │ ├── trainer.py │ └── utils │ │ ├── __init__.py │ │ ├── ddd_utils.py │ │ ├── debugger.py │ │ ├── image.py │ │ ├── post_process.py │ │ ├── tracker.py │ │ └── utils.py ├── main.py ├── test.py └── tools │ ├── _init_paths.py │ ├── annot_bbox.py │ ├── convert_crowdhuman_to_coco.py │ ├── convert_kittitrack_to_coco.py │ ├── convert_mot_det_to_results.py │ ├── convert_mot_to_coco.py │ ├── convert_nuScenes.py │ ├── eval_kitti_track │ ├── data │ │ └── tracking │ │ │ ├── evaluate_tracking.seqmap │ │ │ ├── evaluate_tracking.seqmap.test │ │ │ ├── evaluate_tracking.seqmap.training │ │ │ ├── evaluate_trackingtrain_1-2.seqmap │ │ │ ├── evaluate_trackingtrain_2-2.seqmap │ │ │ ├── evaluate_trackingval_half.seqmap │ │ │ ├── label_02 │ │ │ ├── 0000.txt │ │ │ ├── 0001.txt │ │ │ ├── 0002.txt │ │ │ ├── 0003.txt │ │ │ ├── 0004.txt │ │ │ ├── 0005.txt │ │ │ ├── 0006.txt │ │ │ ├── 0007.txt │ │ │ ├── 0008.txt │ │ │ ├── 0009.txt │ │ │ ├── 0010.txt │ │ │ ├── 0011.txt │ │ │ ├── 0012.txt │ │ │ ├── 0013.txt │ │ │ ├── 0014.txt │ │ │ ├── 0015.txt │ │ │ ├── 0016.txt │ │ │ ├── 0017.txt │ │ │ ├── 0018.txt │ │ │ ├── 0019.txt │ │ │ └── 0020.txt │ │ │ ├── label_02_train_half │ │ │ ├── 0000.txt │ │ │ ├── 0001.txt │ │ │ ├── 0002.txt │ │ │ ├── 0003.txt │ │ │ ├── 0004.txt │ │ │ ├── 0005.txt │ │ │ ├── 0006.txt │ │ │ ├── 0007.txt │ │ │ ├── 0008.txt │ │ │ ├── 0009.txt │ │ │ ├── 0010.txt │ │ │ ├── 0011.txt │ │ │ ├── 0012.txt │ │ │ ├── 0013.txt │ │ │ ├── 0014.txt │ │ │ ├── 0015.txt │ │ │ ├── 0016.txt │ │ │ ├── 0017.txt │ │ │ ├── 0018.txt │ │ │ ├── 0019.txt │ │ │ └── 0020.txt │ │ │ └── label_02_val_half │ │ │ ├── 0000.txt │ │ │ ├── 0001.txt │ │ │ ├── 0002.txt │ │ │ ├── 0003.txt │ │ │ ├── 0004.txt │ │ │ ├── 0005.txt │ │ │ ├── 0006.txt │ │ │ ├── 0007.txt │ │ │ ├── 0008.txt │ │ │ ├── 0009.txt │ │ │ ├── 0010.txt │ │ │ ├── 0011.txt │ │ │ ├── 0012.txt │ │ │ ├── 0013.txt │ │ │ ├── 0014.txt │ │ │ ├── 0015.txt │ │ │ ├── 0016.txt │ │ │ ├── 0017.txt │ │ │ ├── 0018.txt │ │ │ ├── 0019.txt │ │ │ └── 0020.txt │ ├── evaluate_tracking.py │ ├── mailpy.py │ └── munkres.py │ ├── eval_motchallenge.py │ ├── get_mot_17.sh │ ├── nuScenes_lib │ ├── export_kitti.py │ └── utils_kitti.py │ ├── remove_optimizers.py │ ├── vis_tracking_kitti.py │ └── vis_tracking_mot.py └── videos └── nuscenes_mini.mp4 /.gitignore: -------------------------------------------------------------------------------- 1 | videos/ 2 | *.zip 3 | centernet_models/* 4 | centertrack_models/* 5 | */slurm-*.out 6 | src/slurm/ 7 | results/* 8 | src/lib/models/networks/DCNv2 9 | src/lib/models/networks/DCNv2_04 10 | src/lib/models/networks/DCNv2_10 11 | src/lib/model/networks/DCNv2 12 | src/lib/model/networks/DCNv2_04 13 | src/lib/model/networks/DCNv2_10 14 | .idea/ 15 | legacy/* 16 | models/* 17 | .DS_Store 18 | debug/* 19 | *.DS_Store 20 | data 21 | !src/tools/eval_kitti_track/data 22 | exp 23 | exp/* 24 | *.json 25 | *.mat 26 | models/* 27 | model/* 28 | src/.vscode/* 29 | src/paths.py 30 | preds/* 31 | *.h5 32 | *.pth 33 | *.checkpoint 34 | # Byte-compiled / optimized / DLL files 35 | __pycache__/ 36 | *.py[cod] 37 | *$py.class 38 | 39 | # C extensions 40 | *.so 41 | 42 | # Distribution / packaging 43 | .Python 44 | env/ 45 | build/ 46 | develop-eggs/ 47 | dist/ 48 | downloads/ 49 | eggs/ 50 | .eggs/ 51 | lib64/ 52 | parts/ 53 | sdist/ 54 | var/ 55 | wheels/ 56 | *.egg-info/ 57 | .installed.cfg 58 | *.egg 59 | 60 | # PyInstaller 61 | # Usually these files are written by a python script from a template 62 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 63 | *.manifest 64 | *.spec 65 | 66 | # Installer logs 67 | pip-log.txt 68 | pip-delete-this-directory.txt 69 | 70 | # Unit test / coverage reports 71 | htmlcov/ 72 | .tox/ 73 | .coverage 74 | .coverage.* 75 | .cache 76 | nosetests.xml 77 | coverage.xml 78 | *.cover 79 | .hypothesis/ 80 | 81 | # Translations 82 | *.mo 83 | *.pot 84 | 85 | # Django stuff: 86 | *.log 87 | local_settings.py 88 | 89 | # Flask stuff: 90 | instance/ 91 | .webassets-cache 92 | 93 | # Scrapy stuff: 94 | .scrapy 95 | 96 | # Sphinx documentation 97 | docs/_build/ 98 | 99 | # PyBuilder 100 | target/ 101 | 102 | # Jupyter Notebook 103 | .ipynb_checkpoints 104 | 105 | # pyenv 106 | .python-version 107 | 108 | # celery beat schedule file 109 | celerybeat-schedule 110 | 111 | # SageMath parsed files 112 | *.sage.py 113 | 114 | # dotenv 115 | .env 116 | 117 | # virtualenv 118 | .venv 119 | venv/ 120 | ENV/ 121 | 122 | # Spyder project settings 123 | .spyderproject 124 | .spyproject 125 | 126 | # Rope project settings 127 | .ropeproject 128 | 129 | # mkdocs documentation 130 | /site 131 | 132 | # mypy 133 | .mypy_cache/ 134 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "nuscenes-devkit"] 2 | path = src/tools/nuscenes-devkit 3 | url = https://github.com/nutonomy/nuscenes-devkit 4 | branch = master 5 | 6 | [submodule "nuscenes-devkit-alpha02"] 7 | path = src/tools/nuscenes-devkit-alpha02 8 | url = https://github.com/nutonomy/nuscenes-devkit 9 | branch = e2d8c4b331567dc0bc36271dc21cdef65970eb7e 10 | 11 | [submodule "DCN-v2"] 12 | path = src/lib/model/networks/DCNv2 13 | url = https://github.com/CharlesShang/DCNv2/ 14 | branch = master -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Xingyi Zhou 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Tracking Objects as Points 2 | Simultaneous object detection and tracking using center points: 3 | ![](readme/fig2.png) 4 | > [**Tracking Objects as Points**](http://arxiv.org/abs/2004.01177), 5 | > Xingyi Zhou, Vladlen Koltun, Philipp Krähenbühl, 6 | > *arXiv technical report ([arXiv 2004.01177](http://arxiv.org/abs/2004.01177))* 7 | 8 | 9 | @article{zhou2020tracking, 10 | title={Tracking Objects as Points}, 11 | author={Zhou, Xingyi and Koltun, Vladlen and Kr{\"a}henb{\"u}hl, Philipp}, 12 | journal={ECCV}, 13 | year={2020} 14 | } 15 | 16 | Contact: [zhouxy2017@gmail.com](mailto:zhouxy2017@gmail.com). Any questions or discussion are welcome! 17 | 18 | ## Abstract 19 | Tracking has traditionally been the art of following interest points through space and time. This changed with the rise of powerful deep networks. Nowadays, tracking is dominated by pipelines that perform object detection followed by temporal association, also known as tracking-by-detection. In this paper, we present a simultaneous detection and tracking algorithm that is simpler, faster, and more accurate than the state of the art. Our tracker, CenterTrack, applies a detection model to a pair of images and detections from the prior frame. Given this minimal input, CenterTrack localizes objects and predicts their associations with the previous frame. That's it. CenterTrack is simple, online (no peeking into the future), and real-time. It achieves 67.3% MOTA on the MOT17 challenge at 22 FPS and 89.4% MOTA on the KITTI tracking benchmark at 15 FPS, setting a new state of the art on both datasets. CenterTrack is easily extended to monocular 3D tracking by regressing additional 3D attributes. Using monocular video input, it achieves 28.3% AMOTA@0.2 on the newly released nuScenes 3D tracking benchmark, substantially outperforming the monocular baseline on this benchmark while running at 28 FPS. 20 | 21 | 22 | ## Features at a glance 23 | 24 | - One-sentence method summary: Our model takes the current frame, the previous frame, and a heatmap rendered from previous tracking results as input, and predicts the current detection heatmap as well as their offsets to centers in the previous frame. 25 | 26 | - The model can be trained on still **image datasets** if videos are not available. 27 | 28 | - Easily extends to monocular 3d object tracking, multi-category tracking, and pose tracking. 29 | 30 | - State-of-the-art performance on MOT17, KITTI, and nuScenes monocular tracking benchmarks. 31 | 32 | ## Main results 33 | 34 | ### Pedestrian tracking on MOT17 test set 35 | 36 | | Detection | MOTA | FPS | 37 | |--------------|-----------|--------| 38 | |Public | 61.5 | 22 | 39 | |Private | 67.8 | 22 | 40 | 41 | ### 2D vehicle tracking on KITTI test set (with flip test) 42 | 43 | | MOTA | FPS | 44 | |-------------|--------| 45 | | 89.44 | 15 | 46 | 47 | ### 3D tracking on nuScenes test set 48 | 49 | | AMOTA @ 0.2 | AMOTA | FPS | 50 | |---------------|---------|--------| 51 | | 27.8 | 4.6 | 28 | 52 | 53 | Besides benchmark evaluation, we also provide models for 80-category tracking and pose tracking trained on COCO. See the sample visual results below (Video files from [openpose](https://github.com/CMU-Perceptual-Computing-Lab/openpose) and [YOLO](https://pjreddie.com/darknet/yolov2/)). 54 | 55 |

56 | 57 |

58 | 59 | All models and details are available in our [Model zoo](readme/MODEL_ZOO.md). 60 | 61 | ## Installation 62 | 63 | Please refer to [INSTALL.md](readme/INSTALL.md) for installation instructions. 64 | 65 | ## Use CenterTrack 66 | 67 | We support demo for videos, webcam, and image folders. 68 | 69 | First, download the models (By default, [nuscenes\_3d\_tracking](https://drive.google.com/file/d/1gPQFzqneDtT_PjJRRuyskRsNTRHXovw1) for monocular 3D tracking, [coco_tracking](https://drive.google.com/file/d/11DEfWa0TKYzNqY3CXR51WVvjMb4oRl08) for 80-category detection and 70 | [coco_pose_tracking](https://drive.google.com/file/d/1yGFC_Q9wzSHL1d4eZW_44EBB2H42YKYt) for pose tracking) 71 | from the [Model zoo](readme/MODEL_ZOO.md) and put them in `CenterNet_ROOT/models/`. 72 | 73 | We provide a video clip from the [nuScenes dataset](https://www.nuscenes.org/?externalData=all&mapData=all&modalities=Any) in `videos/nuscenes_mini.mp4`. 74 | To test monocular 3D tracking on this video, run 75 | 76 | ~~~ 77 | python demo.py tracking,ddd --load_model ../models/nuScenes_3Dtracking.pth --dataset nuscenes --pre_hm --track_thresh 0.1 --demo ../videos/nuscenes_mini.mp4 --test_focal_length 633 78 | ~~~ 79 | 80 | You will need to specify `test_focal_length` for monocular 3D tracking demo to convert the image coordinate system back to 3D. 81 | The value `633` is half of a typical focal length (`~1266`) in nuScenes dataset in input resolution `1600x900`. 82 | The mini demo video is in an input resolution of `800x448`, so we need to use a half focal length. 83 | You don't need to set the `test_focal_length` when testing on the original nuScenes data. 84 | 85 | If setup correctly, you will see an output video like: 86 | 87 |

88 | 89 | 90 | Similarly, for 80-category tracking on images/ video, run: 91 | 92 | ~~~ 93 | python demo.py tracking --load_model ../models/coco_tracking.pth --demo /path/to/image/or/folder/or/video 94 | ~~~ 95 | 96 | If you want to test with person tracking models, you need to add `--num_class 1`: 97 | 98 | ~~~ 99 | python demo.py tracking --load_model ../models/mot17_half.pth --num_class 1 --demo /path/to/image/or/folder/or/video 100 | ~~~ 101 | 102 | For webcam demo, run 103 | 104 | ~~~ 105 | python demo.py tracking --load_model ../models/coco_tracking.pth --demo webcam 106 | ~~~ 107 | 108 | For monocular 3D tracking, run 109 | 110 | ~~~ 111 | python demo.py tracking,ddd --demo webcam --load_model ../models/coco_tracking.pth --demo /path/to/image/or/folder/or/video/or/webcam 112 | ~~~ 113 | 114 | Similarly, for pose tracking, run: 115 | 116 | ~~~ 117 | python demo.py tracking,multi_pose --load_model ../models/coco_pose.pth --demo /path/to/image/or/folder/or/video/or/webcam 118 | ~~~ 119 | The result for the example images should look like: 120 | 121 | You can add `--debug 2` to visualize the heatmap and offset predictions. 122 | 123 | To use this CenterTrack in your own project, you can 124 | 125 | ~~~ 126 | import sys 127 | CENTERTRACK_PATH = /path/to/CenterTrack/src/lib/ 128 | sys.path.insert(0, CENTERTRACK_PATH) 129 | 130 | from detector import Detector 131 | from opts import opts 132 | 133 | MODEL_PATH = /path/to/model 134 | TASK = 'tracking' # or 'tracking,multi_pose' for pose tracking and 'tracking,ddd' for monocular 3d tracking 135 | opt = opts().init('{} --load_model {}'.format(TASK, MODEL_PATH).split(' ')) 136 | detector = Detector(opt) 137 | 138 | images = ['''image read from open cv or from a video'''] 139 | for img in images: 140 | ret = detector.run(img)['results'] 141 | ~~~ 142 | Each `ret` will be a list dict: `[{'bbox': [x1, y1, x2, y2], 'tracking_id': id, ...}]` 143 | 144 | ## Training on custom dataset 145 | 146 | If you want to train CenterTrack on your own dataset, you can use `--dataset custom` and manually specify the annotation file, image path, input resolutions, and number of categories. You still need to create the annotation files in COCO format (referring to the many `convert_X_to_coco.py` examples in `tools`). For example, you can use the following command to train on our [mot17 experiment](experiments/mot17_half_sc.sh) without using the pre-defined mot dataset file: 147 | 148 | ~~~ 149 | python main.py tracking --exp_id mot17_half_sc --dataset custom --custom_dataset_ann_path ../data/mot17/annotations/train_half.json --custom_dataset_img_path ../data/mot17/train/ --input_h 544 --input_w 960 --num_classes 1 --pre_hm --ltrb_amodal --same_aug --hm_disturb 0.05 --lost_disturb 0.4 --fp_disturb 0.1 --gpus 0,1 150 | 151 | ~~~ 152 | 153 | ## Benchmark Evaluation and Training 154 | 155 | After [installation](readme/INSTALL.md), follow the instructions in [DATA.md](readme/DATA.md) to setup the datasets. Then check [GETTING_STARTED.md](readme/GETTING_STARTED.md) to reproduce the results in the paper. 156 | We provide scripts for all the experiments in the [experiments](experiments) folder. 157 | 158 | ## License 159 | 160 | CenterTrack is developed upon [CenterNet](https://github.com/xingyizhou/CenterNet). Both codebases are released under MIT License themselves. Some code of CenterNet are from third-parties with different licenses, please check the CenterNet repo for details. In addition, this repo uses [py-motmetrics](https://github.com/cheind/py-motmetrics) for MOT evaluation and [nuscenes-devkit](https://github.com/nutonomy/nuscenes-devkit) for nuScenes evaluation and preprocessing. See [NOTICE](NOTICE) for detail. Please note the licenses of each dataset. Most of the datasets we used in this project are under non-commercial licenses. 161 | 162 | -------------------------------------------------------------------------------- /experiments/coco_pose_tracking.sh: -------------------------------------------------------------------------------- 1 | cd src 2 | # train, the model is finetuned from a CenterNet detection model from the CenterNet model zoo. 3 | python main.py tracking,multi_pose --exp_id coco_pose_tracking --dataset coco_hp --load_model ../models/multi_pose_dla_3x.pth --gpus 0,1,2,3,4,5,6,7 --batch_size 128 --lr 5e-4 --num_workers 16 --pre_hm --shift 0.05 --scale 0.05 --hm_disturb 0.05 --lost_disturb 0.4 --fp_disturb 0.1 -------------------------------------------------------------------------------- /experiments/coco_tracking.sh: -------------------------------------------------------------------------------- 1 | cd src 2 | # train, the model is finetuned from a CenterNet detection model from the CenterNet model zoo. 3 | python main.py tracking --exp_id coco_tracking --tracking --load_model ../models/ctdet_coco_dla_2x.pth --gpus 0,1,2,3,4,5,6,7 --batch_size 128 --lr 5e-4 --num_workers 16 --pre_hm --shift 0.05 --scale 0.05 --hm_disturb 0.05 --lost_disturb 0.4 --fp_disturb 0.1 -------------------------------------------------------------------------------- /experiments/crowdhuman.sh: -------------------------------------------------------------------------------- 1 | cd src 2 | # train 3 | python main.py tracking --exp_id crowdhuman --dataset crowdhuman --ltrb_amodal --pre_hm --shift 0.05 --scale 0.05 --hm_disturb 0.05 --lost_disturb 0.4 --fp_disturb 0.1 --num_epochs 140 --lr_step 90,120 --save_point 60,90 --gpus 0,1,2,3 --batch_size 64 --lr 2.5e-4 --num_workers 16 4 | cd .. -------------------------------------------------------------------------------- /experiments/kitti_fulltrain.sh: -------------------------------------------------------------------------------- 1 | cd src 2 | # train 3 | python main.py tracking --exp_id kitti_fulltrain --dataset kitti_tracking --dataset_version train --pre_hm --same_aug --hm_disturb 0.05 --lost_disturb 0.2 --fp_disturb 0.1 --gpus 0,1 --batch_size 16 --load_model ../models/nuScenes_3Ddetection_e140.pth 4 | # test 5 | python test.py tracking --exp_id kitti_fulltrain --dataset kitti_tracking --dataset_version test --pre_hm --track_thresh 0.4 --resume 6 | -------------------------------------------------------------------------------- /experiments/kitti_half.sh: -------------------------------------------------------------------------------- 1 | cd src 2 | # train 3 | python main.py tracking --exp_id kitti_half --dataset kitti_tracking --dataset_version train_half --pre_hm --same_aug --hm_disturb 0.05 --lost_disturb 0.2 --fp_disturb 0.1 --gpus 0,1 --batch_size 16 --load_model ../models/nuScenes_3Ddetection_e140.pth 4 | # test 5 | python test.py tracking --exp_id kitti_half --dataset kitti_tracking --dataset_version val_half --pre_hm --track_thresh 0.4 --resume -------------------------------------------------------------------------------- /experiments/kitti_half_sc.sh: -------------------------------------------------------------------------------- 1 | cd src 2 | # train 3 | python main.py tracking --exp_id kitti_half_sc --dataset kitti_tracking --dataset_version train_half --pre_hm --same_aug --hm_disturb 0.05 --lost_disturb 0.2 --fp_disturb 0.1 --gpus 0,1 --batch_size 16 4 | # test 5 | python test.py tracking --exp_id kitti_half_sc --dataset kitti_tracking --dataset_version val_half --pre_hm --track_thresh 0.4 --pre_thresh 0.5 --resume -------------------------------------------------------------------------------- /experiments/mot17_fulltrain.sh: -------------------------------------------------------------------------------- 1 | cd src 2 | # train 3 | python main.py tracking --exp_id mot17_fulltrain --dataset mot --dataset_version 17trainval --pre_hm --ltrb_amodal --same_aug --hm_disturb 0.05 --lost_disturb 0.4 --fp_disturb 0.1 --gpus 0,1 --load_model ../models/crowdhuman.pth 4 | # test 5 | python test.py tracking --exp_id mot17_fulltrain --dataset mot --dataset_version 17test --pre_hm --ltrb_amodal --track_thresh 0.4 --pre_thresh 0.5 --resume 6 | cd .. -------------------------------------------------------------------------------- /experiments/mot17_fulltrain_sc.sh: -------------------------------------------------------------------------------- 1 | cd src 2 | # train 3 | python main.py tracking --exp_id mot17_fulltrain_sc --dataset mot --dataset_version 17trainval --pre_hm --ltrb_amodal --same_aug --hm_disturb 0.05 --lost_disturb 0.4 --fp_disturb 0.1 --gpus 0,1 4 | # test 5 | python test.py tracking --exp_id mot17_fulltrain_sc --dataset mot --dataset_version 17test --pre_hm --ltrb_amodal --track_thresh 0.4 --pre_thresh 0.5 --resume 6 | # test with public detection 7 | python test.py tracking --exp_id mot17_fulltrain_sc --dataset mot --dataset_version 17test --pre_hm --ltrb_amodal --track_thresh 0.4 --pre_thresh 0.5 --resume --public_det --load_results ../data/mot17/results/test_det.json 8 | cd .. -------------------------------------------------------------------------------- /experiments/mot17_half.sh: -------------------------------------------------------------------------------- 1 | cd src 2 | # train 3 | python main.py tracking --exp_id mot17_half --dataset mot --dataset_version 17halftrain --pre_hm --ltrb_amodal --same_aug --hm_disturb 0.05 --lost_disturb 0.4 --fp_disturb 0.1 --gpus 0,1 --load_model ../models/crowdhuman.pth 4 | # test 5 | python test.py tracking --exp_id mot17_half --dataset mot --dataset_version 17halfval --pre_hm --ltrb_amodal --track_thresh 0.4 --pre_thresh 0.5 --resume 6 | # test with public detection 7 | python test.py tracking --exp_id mot17_half --dataset mot --dataset_version 17halfval --pre_hm --ltrb_amodal --track_thresh 0.4 --pre_thresh 0.5 --resume --public_det --load_results ../data/mot17/results/val_half_det.json 8 | cd .. -------------------------------------------------------------------------------- /experiments/mot17_half_sc.sh: -------------------------------------------------------------------------------- 1 | cd src 2 | # train 3 | python main.py tracking --exp_id mot17_half_sc --dataset mot --dataset_version 17halftrain --pre_hm --ltrb_amodal --same_aug --hm_disturb 0.05 --lost_disturb 0.4 --fp_disturb 0.1 --gpus 0,1 4 | # test 5 | python test.py tracking --exp_id mot17_half_sc --dataset mot --dataset_version 17halfval --pre_hm --ltrb_amodal --track_thresh 0.4 --pre_thresh 0.5 --resume 6 | cd .. -------------------------------------------------------------------------------- /experiments/nuScenes_3Ddetection_e140.sh: -------------------------------------------------------------------------------- 1 | cd src 2 | # train 3 | python main.py ddd --exp_id nuScenes_3Ddetection_e140 --dataset nuscenes --batch_size 128 --gpus 0,1,2,3,4,5,6,7 --lr 5e-4 --num_epochs 140 --lr_step 90,120 --save_point 90,120 4 | # test 5 | python test.py ddd --exp_id nuScenes_3Ddetection_e140 --dataset nuscenes --resume 6 | cd .. -------------------------------------------------------------------------------- /experiments/nuScenes_3Dtracking.sh: -------------------------------------------------------------------------------- 1 | cd src 2 | # train 3 | python main.py tracking,ddd --exp_id nuScenes_3Dtracking --dataset nuscenes --pre_hm --load_model ../models/nuScenes_3Ddetection_e140.pth --shift 0.01 --scale 0.05 --lost_disturb 0.4 --fp_disturb 0.1 --hm_disturb 0.05 --batch_size 64 --gpus 0,1,2,3 --lr 2.5e-4 --save_point 60 4 | # test 5 | python test.py tracking,ddd --exp_id nuScenes_3Dtracking --dataset nuscenes --pre_hm --track_thresh 0.1 --resume 6 | cd .. -------------------------------------------------------------------------------- /readme/DATA.md: -------------------------------------------------------------------------------- 1 | # Dataset preparation 2 | 3 | If you want to reproduce the results in the paper for benchmark evaluation or training, you will need to setup datasets. 4 | 5 | ### MOT 2017 6 | 7 | MOT is is used to train and evaluate the system. We will only use the training set (and create a validation set from it) for developing this project. 8 | 9 | We have packed the dataset preprocessing code as a script. 10 | 11 | ~~~ 12 | cd $CenterTrack_ROOT/tools/ 13 | bash get_mot_17.sh 14 | ~~~ 15 | 16 | The script includes: 17 | 18 | - Download and unzip the dataset from [MOT17 website](https://motchallenge.net/data/MOT17/). 19 | - Convert it into COCO format using `tools/convert_mot_to_coco.py`. 20 | - Create the half-half train/ val set described in the paper. 21 | - Convert the public detection into a specific format. 22 | - The output data structure should be: 23 | 24 | ~~~ 25 | ${CenterTrack_ROOT} 26 | |-- data 27 | `-- |-- mot17 28 | `-- |--- train 29 | | |--- MOT17-02-FRCNN 30 | | | |--- img1 31 | | | |--- gt 32 | | | | |--- gt.txt 33 | | | | |--- gt_train_half.txt 34 | | | | |--- gt_val_half.txt 35 | | | |--- det 36 | | | | |--- det.txt 37 | | | | |--- det_train_half.txt 38 | | | | |--- det_val_half.txt 39 | | |--- ... 40 | |--- test 41 | | |--- MOT17-01-FRCNN 42 | |---|--- ... 43 | `---| annotations 44 | |--- train_half.json 45 | |--- val_half.json 46 | |--- train.json 47 | `--- test.json 48 | ~~~ 49 | 50 | ### KITTI Tracking 51 | 52 | We use KITTI Tracking to train and evaluate the system as well. Again, we will only use the training set (and create a validation set from it) for developing this project. Note that KITTI Tracking is 2D tracking and is different from KITTI detection (they use the same image, but different train/ val set). 53 | 54 | - Download [images](http://www.cvlibs.net/download.php?file=data_tracking_image_2.zip), [annotations](http://www.cvlibs.net/download.php?file=data_tracking_label_2.zip), and [calibration information](http://www.cvlibs.net/download.php?file=data_tracking_calib.zip) (not used in 2D tracking, only if you want to demo 3D detection/ tracking) from [KITTI Tracking website](http://www.cvlibs.net/datasets/kitti/eval_tracking.php) and unzip. Place or symlink the data as below: 55 | 56 | ~~~ 57 | ${CenterTrack_ROOT} 58 | |-- data 59 | `-- |-- kitti_tracking 60 | `-- |-- data_tracking_image_2 61 | | |-- training 62 | | |-- |-- image_02 63 | | |-- |-- |-- 0000 64 | | |-- |-- |-- ... 65 | |-- |-- testing 66 | |-- label_02 67 | | |-- 0000.txt 68 | | |-- ... 69 | `-- data_tracking_calib 70 | ~~~ 71 | 72 | - Run `python convert_kitti_to_coco.py` in `tools` to convert the annotation into COCO format. 73 | - The resulting data structure should look like: 74 | 75 | ~~~ 76 | ${CenterTrack_ROOT} 77 | |-- data 78 | `-- |-- kitti_tracking 79 | `-- |-- data_tracking_image_2 80 | | |-- training 81 | | | |-- image_02 82 | | | | |-- 0000 83 | | | | |-- ... 84 | |-- |-- testing 85 | |-- label_02 86 | | |-- 0000.txt 87 | | |-- ... 88 | |-- data_tracking_calib 89 | |-- label_02_val_half 90 | | |-- 0000.txt 91 | | |-- ... 92 | |-- label_02_train_half 93 | | |-- 0000.txt 94 | | |-- ... 95 | `-- annotations 96 | |-- tracking_train.json 97 | |-- tracking_test.json 98 | |-- tracking_train_half.json 99 | `-- tracking_val_half.json 100 | ~~~ 101 | 102 | ### nuScenes 103 | 104 | nuScenes is used for training and evaluating 3D object tracking. We also used nuScenes for pretraining KITTI models. 105 | 106 | 107 | - Download the dataset from [nuScenes website](https://www.nuscenes.org/download). You only need to download the "Keyframe blobs", and only need the images data. You also need to download the maps and all metadata to make the nuScenes API happy. 108 | 109 | 110 | - Unzip, rename, and place (or symlink) the data as below. You will need to merge folders from different zip files. 111 | 112 | ~~~ 113 | ${CenterTrack_ROOT} 114 | |-- data 115 | `-- |-- nuscenes 116 | `-- |-- v1.0-trainval 117 | | |-- samples 118 | | | |-- CAM_BACK 119 | | | | | -- xxx.jpg 120 | | | |-- CAM_BACK_LEFT 121 | | | |-- CAM_BACK_RIGHT 122 | | | |-- CAM_FRONT 123 | | | |-- CAM_FRONT_LEFT 124 | | | |-- CAM_FRONT_RIGHT 125 | |-- |-- maps 126 | `-- |-- v1.0-trainval_meta 127 | ~~~ 128 | 129 | - Run `python convert_kitti_to_coco.py` in `tools` to convert the annotation into COCO format. It will create `train.json`, `val.json`, `test.json` under `data/nuscenes/annotations`. nuScenes API is required for running the data preprocessing. 130 | 131 | ### CrowdHuman 132 | 133 | CrowdHuman is used for pretraining the MOT model. Only the training set is used. 134 | 135 | - Download the dataset from [its website](https://www.crowdhuman.org/download.html). 136 | 137 | - Unzip and place (or symlink) the data as below. You will need to merge folders from different zip files. 138 | 139 | ~~~ 140 | ${CenterTrack_ROOT} 141 | |-- data 142 | `-- |-- crowdhuman 143 | |-- |-- CrowdHuman_train 144 | | | |-- Images 145 | |-- |-- CrowdHuman_val 146 | | | |-- Images 147 | |-- |-- annotation_train.odgt 148 | |-- |-- annotation_val.odgt 149 | ~~~ 150 | 151 | - Run `python convert_kitti_to_coco.py` in `tools` to convert the annotation into COCO format. It will create `train.json`, `val.json` under `data/crowdhuman/annotations`. 152 | 153 | ### COCO 154 | 155 | COCO is used to train a demo system for 80-category tracking or pose tracking. 156 | The models are NOT evaluated in any benchmarks. 157 | 158 | - Download the images (2017 Train, 2017 Val, 2017 Test) from [coco website](http://cocodataset.org/#download). 159 | - Download annotation files (2017 train/val and test image info) from [coco website](http://cocodataset.org/#download). 160 | - Place the data (or create symlinks) to make the data folder like: 161 | 162 | ~~~ 163 | ${CenterTrack_ROOT} 164 | |-- data 165 | `-- |-- coco 166 | `-- |-- annotations 167 | | |-- instances_train2017.json 168 | | |-- instances_val2017.json 169 | | |-- person_keypoints_train2017.json 170 | | |-- person_keypoints_val2017.json 171 | | |-- image_info_test-dev2017.json 172 | |---|-- train2017 173 | |---|-- val2017 174 | `---|-- test2017 175 | ~~~ 176 | 177 | 178 | ## References 179 | Please cite the corresponding References if you use the datasets. 180 | 181 | ~~~ 182 | @article{MOT16, 183 | title = {{MOT}16: {A} Benchmark for Multi-Object Tracking}, 184 | shorttitle = {MOT16}, 185 | url = {http://arxiv.org/abs/1603.00831}, 186 | journal = {arXiv:1603.00831 [cs]}, 187 | author = {Milan, A. and Leal-Taix\'{e}, L. and Reid, I. and Roth, S. and Schindler, K.}, 188 | month = mar, 189 | year = {2016}, 190 | note = {arXiv: 1603.00831}, 191 | keywords = {Computer Science - Computer Vision and Pattern Recognition} 192 | } 193 | 194 | @article{shao2018crowdhuman, 195 | title={Crowdhuman: A benchmark for detecting human in a crowd}, 196 | author={Shao, Shuai and Zhao, Zijian and Li, Boxun and Xiao, Tete and Yu, Gang and Zhang, Xiangyu and Sun, Jian}, 197 | journal={arXiv:1805.00123}, 198 | year={2018} 199 | } 200 | 201 | @INPROCEEDINGS{Geiger2012CVPR, 202 | author = {Andreas Geiger and Philip Lenz and Raquel Urtasun}, 203 | title = {Are we ready for Autonomous Driving? The KITTI Vision Benchmark Suite}, 204 | booktitle = {CVPR}, 205 | year = {2012} 206 | } 207 | 208 | @inproceedings{lin2014microsoft, 209 | title={Microsoft {COCO}: Common objects in context}, 210 | author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence}, 211 | booktitle={ECCV}, 212 | year={2014}, 213 | } 214 | 215 | @inproceedings{nuscenes2019, 216 | title={{nuScenes}: A multimodal dataset for autonomous driving}, 217 | author={Holger Caesar and Varun Bankiti and Alex H. Lang and Sourabh Vora and Venice Erin Liong and Qiang Xu and Anush Krishnan and Yu Pan and Giancarlo Baldan and Oscar Beijbom}, 218 | booktitle={CVPR}, 219 | year={2020} 220 | } 221 | ~~~ -------------------------------------------------------------------------------- /readme/GETTING_STARTED.md: -------------------------------------------------------------------------------- 1 | # Getting Started 2 | 3 | This document provides tutorials to train and evaluate CenterTrack. Before getting started, make sure you have finished [installation](INSTALL.md) and [dataset setup](DATA.md). 4 | 5 | ## Benchmark evaluation 6 | 7 | First, download the models you want to evaluate from our [model zoo](MODEL_ZOO.md) and put them in `CenterTrack_ROOT/models/`. 8 | 9 | ### MOT17 10 | 11 | To test the tracking performance on MOT17 with our pretrained model, run 12 | 13 | ~~~ 14 | python test.py tracking --exp_id mot17_half --dataset mot --dataset_version 17halfval --pre_hm --ltrb_amodal --track_thresh 0.4 --pre_thresh 0.5 --load_model ../models/mot17_half.pth 15 | ~~~ 16 | 17 | This will give a MOTA of `66.1` if set up correctly. `--pre_hm` is to enable the input heatmap. `--ltrb_amodal` is to use the left, top, right, bottom bounding box representation to enable detecting out-of-image bounding box (We observed this is important for MOT datasets). And `--track_thresh` and `--pre_thresh` are the score threshold for predicting a bounding box ($\theta$ in the paper) and feeding the heatmap to the next frame ($\tau$ in the paper), respectively. 18 | 19 | To test with public detection, run 20 | 21 | ~~~ 22 | python test.py tracking --exp_id mot17_half_public --dataset mot --dataset_version 17halfval --pre_hm --ltrb_amodal --track_thresh 0.4 --pre_thresh 0.5 --load_model ../models/mot17_half.pth --public_det --load_results ../data/mot17/results/val_half_det.json 23 | ~~~ 24 | 25 | The expected MOTA is `63.1`. 26 | 27 | To test on the test set, run 28 | 29 | ~~~ 30 | python test.py tracking --exp_id mot17_fulltrain_public --dataset mot --dataset_version 17test --pre_hm --ltrb_amodal --track_thresh 0.4 --pre_thresh 0.5 --load_model ../models/mot17_fulltrain_sc.pth --public_det --load_results ../data/mot17/results/test_det.json 31 | ~~~ 32 | 33 | The Test set evaluation requires submitting to the official test server. 34 | We discourage the users to submit our predictions to the test set to prevent test set abuse. 35 | You can append `--debug 2` to above commends to visualize the predictions. 36 | 37 | See the experiments folder for testing in other settings. 38 | 39 | 40 | ### KITTI Tracking 41 | 42 | Run: 43 | 44 | ~~~ 45 | python test.py tracking --exp_id kitti_half --dataset kitti_tracking --dataset_version val_half --pre_hm --track_thresh 0.4 --load_model ../models/kitti_half.pth 46 | ~~~ 47 | 48 | The expected MOTA is `88.7`. 49 | 50 | ### nuScenes 51 | 52 | Run: 53 | 54 | ~~~ 55 | python test.py tracking,ddd --exp_id nuScenes_3Dtracking --load_model ../models/nuScenes_3Dtracking.pth --dataset nuscenes --track_thresh 0.1 --pre_hm 56 | ~~~ 57 | 58 | The expected AMOTA is `6.8`. 59 | 60 | ## Training 61 | We have packed all the training scripts in the [experiments](../experiments) folder. 62 | The experiment names correspond to the model name in the [model zoo](MODEL_ZOO.md). 63 | The number of GPUs for each experiment can be found in the scripts and the model zoo. 64 | If the training is terminated before finishing, you can use the same command with `--resume` to resume training. It will found the latest model with the same `exp_id`. 65 | Some experiments rely on pretraining on another model. In this case, download the pretrained model from our model zoo or train that model first. -------------------------------------------------------------------------------- /readme/INSTALL.md: -------------------------------------------------------------------------------- 1 | # Installation 2 | 3 | 4 | The code was tested on Ubuntu 16.04, with [Anaconda](https://www.anaconda.com/download) Python 3.6, CUDA 10.0, and [PyTorch]((http://pytorch.org/)) v1.0. 5 | It should be compatible with PyTorch <=1.4 and python >=0.4 (you will need to switch DCNv2 version for PyTorch <1.0). 6 | After installing Anaconda: 7 | 8 | 0. [Optional but highly recommended] create a new conda environment. 9 | 10 | ~~~ 11 | conda create --name CenterTrack python=3.6 12 | ~~~ 13 | And activate the environment. 14 | 15 | ~~~ 16 | conda activate CenterTrack 17 | ~~~ 18 | 19 | 1. Install PyTorch: 20 | 21 | ~~~ 22 | conda install pytorch torchvision -c pytorch 23 | ~~~ 24 | 25 | 26 | 2. Install [COCOAPI](https://github.com/cocodataset/cocoapi): 27 | 28 | ~~~ 29 | pip install cython; pip install -U 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI' 30 | ~~~ 31 | 32 | 3. Clone this repo: 33 | 34 | ~~~ 35 | CenterTrack_ROOT=/path/to/clone/CenterTrack 36 | git clone --recursive https://github.com/xingyizhou/CenterTrack $CenterTrack_ROOT 37 | ~~~ 38 | 39 | You can manually install the [submodules](../.gitmodules) if you forget `--recursive`. 40 | 41 | 4. Install the requirements 42 | 43 | ~~~ 44 | pip install -r requirements.txt 45 | ~~~ 46 | 47 | 48 | 5. Compile deformable convolutional (from [DCNv2](https://github.com/CharlesShang/DCNv2/)). 49 | 50 | ~~~ 51 | cd $CenterTrack_ROOT/src/lib/model/networks/ 52 | # git clone https://github.com/CharlesShang/DCNv2/ # clone if it is not automatically downloaded by `--recursive`. 53 | cd DCNv2 54 | ./make.sh 55 | ~~~ 56 | 57 | 6. Download pertained models for [monocular 3D tracking](https://drive.google.com/open?id=1e8zR1m1QMJne-Tjp-2iY_o81hn2CiQRt), [80-category tracking](https://drive.google.com/open?id=1tJCEJmdtYIh8VuN8CClGNws3YO7QGd40), or [pose tracking](https://drive.google.com/open?id=1H0YvFYCOIZ06EzAkC2NxECNQGXxK27hH) and move them to `$CenterTrack_ROOT/models/`. More models can be found in [Model zoo](MODEL_ZOO.md). -------------------------------------------------------------------------------- /readme/MODEL_ZOO.md: -------------------------------------------------------------------------------- 1 | # MODEL ZOO 2 | 3 | ### Common settings and notes 4 | 5 | - The experiments are run with PyTorch 1.0, CUDA 10.0, and CUDNN 7.5. 6 | - Training times are measured on our servers with TITAN V GPUs (12 GB Memory). 7 | - Testing times are measured on our local machine with TITAN Xp GPU. 8 | - The models can be downloaded directly from [Google drive](https://drive.google.com/drive/folders/1y_CWlbboW_dfOx6zT9MU4ugLaLc6FEE8). 9 | 10 | ## 2D bounding box Tracking 11 | 12 | ### MOT17 13 | 14 | | Model | GPUs |Train time| Test time | Valication MOTA | Test MOTA | Download | 15 | |-----------------------|------|----------|-----------|------------------|------------|----------| 16 | | [mot17_fulltrain](../experiments/mot17_fulltrain.sh) | 4 | 4h | 45ms | - |67.3 (Private Detection)| [model](https://drive.google.com/file/d/1JYqO_IEoHpd7JEzZRXZSVesnEL4e-tnf) | 17 | | [mot17_fulltrain_sc](../experiments/mot17_fulltrain_sc.sh) | 4 | 4h | 45ms | - |61.4 (Public Detection) | [model](https://drive.google.com/file/d/17rtVMuFOnRzXj0_3egrFI5j-wc8XviDZ) | 18 | | [mot17_half](../experiments/mot17_half.sh) | 4 | 2h | 45ms | 66.1 | - | [model](https://drive.google.com/file/d/1rJ0fzRcpRQPjaN17lcqfKgsz-wJRifHh) | 19 | | [mot17_half_sc](../experiments/mot17_half_sc.sh) | 4 | 2h | 45ms | 60.7 | - | [model](https://drive.google.com/file/d/1o_cCo92WiVg8mgwyESd1Gg1AZYnq1iAJ) | 20 | | [crowdhuman](../experiments/crowdhuman.sh) | 4 | 21h | 45ms | 52.2 | - |[model](https://drive.google.com/file/d/1SD31FLwbXArcX3LXnRCqh6RF-q38nO7f) | 21 | 22 | #### Notes 23 | 24 | - `*_half` corresponds to the half-half video train/ val split mentioned in the paper. 25 | - `*_fulltrain` corresponds to train on the full training set, and evaluate on the official test server. These models are provided for arXiv and demo purposes. It is highly NOT recommended to submit our predictions to the test server, for not abusing the test set. Usually the validation results are all you need for developing. 26 | - `mot17_half`/ `mot17_fulltrain` are finetuned on the `crowdhuman` model, and `mot17_half_sc`/ `mot17_fulltrain_sc` are trained from ImageNet initialization. 27 | - The validation results are both using private detection. 28 | - All the MOT models are trained for 70 epochs, with learning rate dropped at the 60th epoch. 29 | - The crowdhuman model is trained on CrowdHuman dataset with the "training on static image data" technic in our paper, and evaluate directly in MOT17 validation set. The crowdhuman pretraining uses 140 epochs, with learning rate dropped at 90 and 140 epochs. 30 | - The training schedules are not well studies. 31 | - We observe about 1 MOTA random noise for MOT models. 32 | - If the resulting MOTA of your self-trained model is not desired, playing around with the `--track_thresh` and `--pre_thresh` sometimes gives a better number (See Appendix H of the paper). 33 | - The MOT models, even trained on the full training set, still does not look great for in-the-wild videos. The crowdhuman model is a better choice for real world application. However, be aware that both datasets are in non-commercial licenses. 34 | 35 | 36 | ### KITTI 2D Tracking 37 | 38 | | Model |GPUs| Train time| Test time | Validation MOTA | Test MOTA | Download | 39 | |-----------------------|----|-----------|-----------|------------------|------------|-----------| 40 | | [kitti_fulltrain](../experiments/kitti_fulltrain.sh) (flip)| 2 | 9h | 66 | - | 89.44 | [model](https://drive.google.com/file/d/13oUEpeZ8bVQ6z7A6SH88de4SwLgh_kMB) | 41 | | [kitti_half](../experiments/kitti_half.sh) | 2 | 4.5h | 40 | 88.7 | - | [model](https://drive.google.com/file/d/1AZiFG0p3VxB2pA_5XIkbue4ASfxaA3e1) | 42 | | [kitti_half_sc](../experiments/kitti_half_sc.sh) | 2 | 4.5h | 40 | 84.5 | - | [model](https://drive.google.com/file/d/13rmdfi1rX3X7yFOndzyARTYO51uSNW0Z)| 43 | 44 | #### Notes 45 | 46 | - We use flip-test for the model we submitted to the test server (kitti_fulltrain_flip). 47 | - `kitti_fulltrain` are finetuned on the nuScenes_3Ddetection_e140 model (see below). 48 | - All the models are trained for 70 epochs. 49 | - We observe up to 1.5 MOTA jittering due to randomness. The results are reported for the best model. 50 | 51 | ## Monocular 3D Detection/ Tracking 52 | 53 | ### nuScenes 54 | 55 | | Model | GPUs |Train time| Test time | Val AMOTA@0.2 | Val AMOTA | Val mAP | Download | 56 | |--------------------------|------|----------|-----------|---------------|-----------|---------|-----------| 57 | | [nuScenes_3Ddetection_e140](../experiments/nuScenes_3Ddetection_e140.sh)| 8 | 72h | 28ms | - | - | 30.27 | [model](https://drive.google.com/file/d/1o989b1tANh49uHhNbsCCJ5J57FGiaFut) | 58 | | [nuScenes_3Dtracking](../experiments/nuScenes_3Dtracking.sh) | 8 | 40h | 28ms | 28.3 | 6.8 | - | [model](https://drive.google.com/file/d/1gPQFzqneDtT_PjJRRuyskRsNTRHXovw1) | 59 | 60 | #### Notes 61 | 62 | - Both models are trained on our DGX servers with 8x 32G V100 GPUs. 63 | - The 3D detection model is trained on all 6 camera images of the keyframes for 140 epochs. It does not include attributes and velocity prediction and is different from the model we used in the 3D detection leaderboard. See the CenterNet repo for details about the full 3D detection model we used for test set evaluation. 64 | - The 3D tracking model is finetuned on the 3D detection model for 70 epochs. 65 | - Training on 4 GPUs or 8x 12G GPUs with smaller batchsize is OK, if the [linear learning rate rule](https://arxiv.org/abs/1706.02677) is applied. 66 | 67 | ## COCO Tracking (for demo purpose only) 68 | 69 | | Model |GPUs| Train time| Test time | Download | 70 | |-----------------------|----|-----------|-----------|-----------| 71 | | [coco_tracking](../experiments/coco_tracking.sh) | 8 | 39h | 30ms | [model](https://drive.google.com/file/d/11DEfWa0TKYzNqY3CXR51WVvjMb4oRl08) | 72 | | [coco_pose_tracking](../experiments/coco_pose_tracking.sh) | 8 | 19h | 33ms | [model](https://drive.google.com/file/d/1yGFC_Q9wzSHL1d4eZW_44EBB2H42YKYt)| 73 | 74 | - Both models are trained with the "training on static image data" technic in our paper. 75 | - The models are not evaluated on any benchmarks since there are no suitable ones in this setting. We provide them for demo purpose only. -------------------------------------------------------------------------------- /readme/coco_det.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyizhou/CenterTrack/e4e7534cc2ebfbd31e0cde680988f286c65fe34f/readme/coco_det.gif -------------------------------------------------------------------------------- /readme/coco_pose.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyizhou/CenterTrack/e4e7534cc2ebfbd31e0cde680988f286c65fe34f/readme/coco_pose.gif -------------------------------------------------------------------------------- /readme/fig2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyizhou/CenterTrack/e4e7534cc2ebfbd31e0cde680988f286c65fe34f/readme/fig2.png -------------------------------------------------------------------------------- /readme/nuscenes_3d.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyizhou/CenterTrack/e4e7534cc2ebfbd31e0cde680988f286c65fe34f/readme/nuscenes_3d.gif -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | opencv-python 2 | Cython 3 | numba 4 | progress 5 | matplotlib 6 | easydict 7 | scipy 8 | pyquaternion 9 | nuscenes-devkit 10 | pyyaml 11 | motmetrics 12 | scikit-learn==0.22.2 -------------------------------------------------------------------------------- /src/_init_paths.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import sys 3 | 4 | def add_path(path): 5 | if path not in sys.path: 6 | sys.path.insert(0, path) 7 | 8 | this_dir = osp.dirname(__file__) 9 | 10 | # Add lib to PYTHONPATH 11 | lib_path = osp.join(this_dir, 'lib') 12 | add_path(lib_path) 13 | -------------------------------------------------------------------------------- /src/convert_onnx.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Script to convert a trained CenterNet model to ONNX, currently only 3 | support non-DCN models. 4 | ''' 5 | from __future__ import absolute_import 6 | from __future__ import division 7 | from __future__ import print_function 8 | 9 | import _init_paths 10 | import os 11 | import json 12 | import cv2 13 | import numpy as np 14 | import time 15 | from progress.bar import Bar 16 | import torch 17 | import copy 18 | 19 | from model.model import create_model, load_model 20 | from opts import opts 21 | from dataset.dataset_factory import dataset_factory 22 | from detector import Detector 23 | 24 | 25 | def convert_onnx(opt): 26 | os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str 27 | opt.model_output_list = True 28 | if opt.gpus[0] >= 0: 29 | opt.device = torch.device('cuda') 30 | else: 31 | opt.device = torch.device('cpu') 32 | Dataset = dataset_factory[opt.test_dataset] 33 | opt = opts().update_dataset_info_and_set_heads(opt, Dataset) 34 | print(opt) 35 | model = create_model( 36 | opt.arch, opt.heads, opt.head_conv, opt=opt) 37 | if opt.load_model != '': 38 | model = load_model(model, opt.load_model, opt) 39 | model = model.to(opt.device) 40 | model.eval() 41 | dummy_input1 = torch.randn(1, 3, opt.input_h, opt.input_w).to(opt.device) 42 | 43 | if opt.tracking: 44 | dummy_input2 = torch.randn(1, 3, opt.input_h, opt.input_w).to(opt.device) 45 | if opt.pre_hm: 46 | dummy_input3 = torch.randn(1, 1, opt.input_h, opt.input_w).to(opt.device) 47 | torch.onnx.export( 48 | model, (dummy_input1, dummy_input2, dummy_input3), 49 | "../models/{}.onnx".format(opt.exp_id)) 50 | else: 51 | torch.onnx.export( 52 | model, (dummy_input1, dummy_input2), 53 | "../models/{}.onnx".format(opt.exp_id)) 54 | else: 55 | torch.onnx.export( 56 | model, (dummy_input1, ), 57 | "../models/{}.onnx".format(opt.exp_id)) 58 | if __name__ == '__main__': 59 | opt = opts().parse() 60 | convert_onnx(opt) 61 | 62 | -------------------------------------------------------------------------------- /src/demo.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import _init_paths 6 | 7 | import os 8 | import sys 9 | import cv2 10 | import json 11 | import copy 12 | import numpy as np 13 | from opts import opts 14 | from detector import Detector 15 | 16 | 17 | image_ext = ['jpg', 'jpeg', 'png', 'webp'] 18 | video_ext = ['mp4', 'mov', 'avi', 'mkv'] 19 | time_stats = ['tot', 'load', 'pre', 'net', 'dec', 'post', 'merge', 'display'] 20 | 21 | def demo(opt): 22 | os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str 23 | opt.debug = max(opt.debug, 1) 24 | detector = Detector(opt) 25 | 26 | if opt.demo == 'webcam' or \ 27 | opt.demo[opt.demo.rfind('.') + 1:].lower() in video_ext: 28 | is_video = True 29 | # demo on video stream 30 | cam = cv2.VideoCapture(0 if opt.demo == 'webcam' else opt.demo) 31 | else: 32 | is_video = False 33 | # Demo on images sequences 34 | if os.path.isdir(opt.demo): 35 | image_names = [] 36 | ls = os.listdir(opt.demo) 37 | for file_name in sorted(ls): 38 | ext = file_name[file_name.rfind('.') + 1:].lower() 39 | if ext in image_ext: 40 | image_names.append(os.path.join(opt.demo, file_name)) 41 | else: 42 | image_names = [opt.demo] 43 | 44 | # Initialize output video 45 | out = None 46 | out_name = opt.demo[opt.demo.rfind('/') + 1:] 47 | print('out_name', out_name) 48 | if opt.save_video: 49 | # fourcc = cv2.VideoWriter_fourcc(*'XVID') 50 | fourcc = cv2.VideoWriter_fourcc(*'H264') 51 | out = cv2.VideoWriter('../results/{}.mp4'.format( 52 | opt.exp_id + '_' + out_name),fourcc, opt.save_framerate, ( 53 | opt.video_w, opt.video_h)) 54 | 55 | if opt.debug < 5: 56 | detector.pause = False 57 | cnt = 0 58 | results = {} 59 | 60 | while True: 61 | if is_video: 62 | _, img = cam.read() 63 | if img is None: 64 | save_and_exit(opt, out, results, out_name) 65 | else: 66 | if cnt < len(image_names): 67 | img = cv2.imread(image_names[cnt]) 68 | else: 69 | save_and_exit(opt, out, results, out_name) 70 | cnt += 1 71 | 72 | # resize the original video for saving video results 73 | if opt.resize_video: 74 | img = cv2.resize(img, (opt.video_w, opt.video_h)) 75 | 76 | # skip the first X frames of the video 77 | if cnt < opt.skip_first: 78 | continue 79 | 80 | cv2.imshow('input', img) 81 | 82 | # track or detect the image. 83 | ret = detector.run(img) 84 | 85 | # log run time 86 | time_str = 'frame {} |'.format(cnt) 87 | for stat in time_stats: 88 | time_str = time_str + '{} {:.3f}s |'.format(stat, ret[stat]) 89 | print(time_str) 90 | 91 | # results[cnt] is a list of dicts: 92 | # [{'bbox': [x1, y1, x2, y2], 'tracking_id': id, 'category_id': c, ...}] 93 | results[cnt] = ret['results'] 94 | 95 | # save debug image to video 96 | if opt.save_video: 97 | out.write(ret['generic']) 98 | if not is_video: 99 | cv2.imwrite('../results/demo{}.jpg'.format(cnt), ret['generic']) 100 | 101 | # esc to quit and finish saving video 102 | if cv2.waitKey(1) == 27: 103 | save_and_exit(opt, out, results, out_name) 104 | return 105 | save_and_exit(opt, out, results) 106 | 107 | 108 | def save_and_exit(opt, out=None, results=None, out_name=''): 109 | if opt.save_results and (results is not None): 110 | save_dir = '../results/{}_results.json'.format(opt.exp_id + '_' + out_name) 111 | print('saving results to', save_dir) 112 | json.dump(_to_list(copy.deepcopy(results)), 113 | open(save_dir, 'w')) 114 | if opt.save_video and out is not None: 115 | out.release() 116 | sys.exit(0) 117 | 118 | def _to_list(results): 119 | for img_id in results: 120 | for t in range(len(results[img_id])): 121 | for k in results[img_id][t]: 122 | if isinstance(results[img_id][t][k], (np.ndarray, np.float32)): 123 | results[img_id][t][k] = results[img_id][t][k].tolist() 124 | return results 125 | 126 | if __name__ == '__main__': 127 | opt = opts().init() 128 | demo(opt) 129 | -------------------------------------------------------------------------------- /src/lib/dataset/dataset_factory.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import pycocotools.coco as coco 6 | from pycocotools.cocoeval import COCOeval 7 | import numpy as np 8 | import json 9 | import os 10 | 11 | from .datasets.coco import COCO 12 | from .datasets.kitti import KITTI 13 | from .datasets.coco_hp import COCOHP 14 | from .datasets.mot import MOT 15 | from .datasets.nuscenes import nuScenes 16 | from .datasets.crowdhuman import CrowdHuman 17 | from .datasets.kitti_tracking import KITTITracking 18 | from .datasets.custom_dataset import CustomDataset 19 | 20 | dataset_factory = { 21 | 'custom': CustomDataset, 22 | 'coco': COCO, 23 | 'kitti': KITTI, 24 | 'coco_hp': COCOHP, 25 | 'mot': MOT, 26 | 'nuscenes': nuScenes, 27 | 'crowdhuman': CrowdHuman, 28 | 'kitti_tracking': KITTITracking, 29 | } 30 | 31 | 32 | def get_dataset(dataset): 33 | return dataset_factory[dataset] 34 | -------------------------------------------------------------------------------- /src/lib/dataset/datasets/coco.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import pycocotools.coco as coco 6 | from pycocotools.cocoeval import COCOeval 7 | import numpy as np 8 | import json 9 | import os 10 | import copy 11 | 12 | from ..generic_dataset import GenericDataset 13 | 14 | class COCO(GenericDataset): 15 | default_resolution = [512, 512] 16 | num_categories = 80 17 | class_name = [ 18 | 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 19 | 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 20 | 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 21 | 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 22 | 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 23 | 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 24 | 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 25 | 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 26 | 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 27 | 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 28 | 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 29 | 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 30 | 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] 31 | _valid_ids = [ 32 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 33 | 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 34 | 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 35 | 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 36 | 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 37 | 58, 59, 60, 61, 62, 63, 64, 65, 67, 70, 38 | 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 39 | 82, 84, 85, 86, 87, 88, 89, 90] 40 | cat_ids = {v: i + 1 for i, v in enumerate(_valid_ids)} 41 | num_joints = 17 42 | flip_idx = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], 43 | [11, 12], [13, 14], [15, 16]] 44 | edges = [[0, 1], [0, 2], [1, 3], [2, 4], 45 | [4, 6], [3, 5], [5, 6], 46 | [5, 7], [7, 9], [6, 8], [8, 10], 47 | [6, 12], [5, 11], [11, 12], 48 | [12, 14], [14, 16], [11, 13], [13, 15]] 49 | max_objs = 128 50 | def __init__(self, opt, split): 51 | # load annotations 52 | data_dir = os.path.join(opt.data_dir, 'coco') 53 | img_dir = os.path.join(data_dir, '{}2017'.format(split)) 54 | if opt.trainval: 55 | split = 'test' 56 | ann_path = os.path.join( 57 | data_dir, 'annotations', 58 | 'image_info_test-dev2017.json') 59 | else: 60 | ann_path = os.path.join( 61 | data_dir, 'annotations', 62 | 'instances_{}2017.json').format(split) 63 | 64 | self.images = None 65 | # load image list and coco 66 | super(COCO, self).__init__(opt, split, ann_path, img_dir) 67 | 68 | self.num_samples = len(self.images) 69 | 70 | print('Loaded {} {} samples'.format(split, self.num_samples)) 71 | 72 | def _to_float(self, x): 73 | return float("{:.2f}".format(x)) 74 | 75 | def convert_eval_format(self, all_bboxes): 76 | detections = [] 77 | for image_id in all_bboxes: 78 | if type(all_bboxes[image_id]) != type({}): 79 | # newest format 80 | for j in range(len(all_bboxes[image_id])): 81 | item = all_bboxes[image_id][j] 82 | cat_id = item['class'] - 1 83 | category_id = self._valid_ids[cat_id] 84 | bbox = item['bbox'] 85 | bbox[2] -= bbox[0] 86 | bbox[3] -= bbox[1] 87 | bbox_out = list(map(self._to_float, bbox[0:4])) 88 | detection = { 89 | "image_id": int(image_id), 90 | "category_id": int(category_id), 91 | "bbox": bbox_out, 92 | "score": float("{:.2f}".format(item['score'])) 93 | } 94 | detections.append(detection) 95 | return detections 96 | 97 | def __len__(self): 98 | return self.num_samples 99 | 100 | def save_results(self, results, save_dir): 101 | json.dump(self.convert_eval_format(results), 102 | open('{}/results_coco.json'.format(save_dir), 'w')) 103 | 104 | def run_eval(self, results, save_dir): 105 | self.save_results(results, save_dir) 106 | coco_dets = self.coco.loadRes('{}/results_coco.json'.format(save_dir)) 107 | coco_eval = COCOeval(self.coco, coco_dets, "bbox") 108 | coco_eval.evaluate() 109 | coco_eval.accumulate() 110 | coco_eval.summarize() -------------------------------------------------------------------------------- /src/lib/dataset/datasets/coco_hp.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import pycocotools.coco as coco 6 | from pycocotools.cocoeval import COCOeval 7 | import numpy as np 8 | import json 9 | import os 10 | 11 | from ..generic_dataset import GenericDataset 12 | 13 | class COCOHP(GenericDataset): 14 | num_categories = 1 15 | class_name = [''] 16 | num_joints = 17 17 | default_resolution = [512, 512] 18 | flip_idx = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], 19 | [11, 12], [13, 14], [15, 16]] 20 | edges = [[0, 1], [0, 2], [1, 3], [2, 4], 21 | [4, 6], [3, 5], [5, 6], 22 | [5, 7], [7, 9], [6, 8], [8, 10], 23 | [6, 12], [5, 11], [11, 12], 24 | [12, 14], [14, 16], [11, 13], [13, 15]] 25 | max_objs = 32 26 | cat_ids = {1: 1} 27 | 28 | def __init__(self, opt, split): 29 | data_dir = os.path.join(opt.data_dir, 'coco') 30 | img_dir = os.path.join(data_dir, '{}2017'.format(split)) 31 | if split == 'test': 32 | ann_path = os.path.join(data_dir, 'annotations', 33 | 'image_info_test-dev2017.json').format(split) 34 | else: 35 | ann_path = os.path.join(data_dir, 'annotations', 36 | 'person_keypoints_{}2017.json').format(split) 37 | 38 | 39 | self.images = None 40 | # load image list and coco 41 | super(COCOHP, self).__init__(opt, split, ann_path, img_dir) 42 | 43 | if split == 'train': 44 | image_ids = self.coco.getImgIds() 45 | self.images = [] 46 | for img_id in image_ids: 47 | idxs = self.coco.getAnnIds(imgIds=[img_id]) 48 | if len(idxs) > 0: 49 | self.images.append(img_id) 50 | 51 | self.num_samples = len(self.images) 52 | print('Loaded {} {} samples'.format(split, self.num_samples)) 53 | 54 | def _to_float(self, x): 55 | return float("{:.2f}".format(x)) 56 | 57 | def convert_eval_format(self, all_bboxes): 58 | # import pdb; pdb.set_trace() 59 | detections = [] 60 | for image_id in all_bboxes: 61 | if type(all_bboxes[image_id]) != type({}): 62 | # newest format 63 | for j in range(len(all_bboxes[image_id])): 64 | item = all_bboxes[image_id][j] 65 | if item['class'] != 1: 66 | continue 67 | category_id = 1 68 | keypoints = np.concatenate([ 69 | np.array(item['hps'], dtype=np.float32).reshape(-1, 2), 70 | np.ones((17, 1), dtype=np.float32)], axis=1).reshape(51).tolist() 71 | detection = { 72 | "image_id": int(image_id), 73 | "category_id": int(category_id), 74 | "score": float("{:.2f}".format(item['score'])), 75 | "keypoints": keypoints 76 | } 77 | if 'bbox' in item: 78 | bbox = item['bbox'] 79 | bbox[2] -= bbox[0] 80 | bbox[3] -= bbox[1] 81 | bbox_out = list(map(self._to_float, bbox[0:4])) 82 | detection['bbox'] = bbox_out 83 | detections.append(detection) 84 | return detections 85 | 86 | def __len__(self): 87 | return self.num_samples 88 | 89 | def save_results(self, results, save_dir): 90 | json.dump(self.convert_eval_format(results), 91 | open('{}/results_cocohp.json'.format(save_dir), 'w')) 92 | 93 | 94 | def run_eval(self, results, save_dir): 95 | # result_json = os.path.join(opt.save_dir, "results.json") 96 | # detections = convert_eval_format(all_boxes) 97 | # json.dump(detections, open(result_json, "w")) 98 | self.save_results(results, save_dir) 99 | coco_dets = self.coco.loadRes('{}/results_cocohp.json'.format(save_dir)) 100 | coco_eval = COCOeval(self.coco, coco_dets, "keypoints") 101 | coco_eval.evaluate() 102 | coco_eval.accumulate() 103 | coco_eval.summarize() 104 | coco_eval = COCOeval(self.coco, coco_dets, "bbox") 105 | coco_eval.evaluate() 106 | coco_eval.accumulate() 107 | coco_eval.summarize() 108 | -------------------------------------------------------------------------------- /src/lib/dataset/datasets/crowdhuman.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import pycocotools.coco as coco 6 | from pycocotools.cocoeval import COCOeval 7 | import numpy as np 8 | import json 9 | import os 10 | 11 | from ..generic_dataset import GenericDataset 12 | 13 | class CrowdHuman(GenericDataset): 14 | num_classes = 1 15 | num_joints = 17 16 | default_resolution = [512, 512] 17 | max_objs = 128 18 | class_name = ['person'] 19 | cat_ids = {1: 1} 20 | def __init__(self, opt, split): 21 | super(CrowdHuman, self).__init__() 22 | data_dir = os.path.join(opt.data_dir, 'crowdhuman') 23 | img_dir = os.path.join( 24 | data_dir, 'CrowdHuman_{}'.format(split), 'Images') 25 | ann_path = os.path.join(data_dir, 'annotations', 26 | '{}.json').format(split) 27 | 28 | print('==> initializing CityPersons {} data.'.format(split)) 29 | 30 | self.images = None 31 | # load image list and coco 32 | super(CrowdHuman, self).__init__(opt, split, ann_path, img_dir) 33 | 34 | self.num_samples = len(self.images) 35 | 36 | print('Loaded {} {} samples'.format(split, self.num_samples)) 37 | 38 | def _to_float(self, x): 39 | return float("{:.2f}".format(x)) 40 | 41 | def _save_results(self, records, fpath): 42 | with open(fpath,'w') as fid: 43 | for record in records: 44 | line = json.dumps(record)+'\n' 45 | fid.write(line) 46 | return fpath 47 | 48 | def convert_eval_format(self, all_bboxes): 49 | detections = [] 50 | person_id = 1 51 | for image_id in all_bboxes: 52 | if type(all_bboxes[image_id]) != type({}): 53 | # newest format 54 | dtboxes = [] 55 | for j in range(len(all_bboxes[image_id])): 56 | item = all_bboxes[image_id][j] 57 | if item['class'] != person_id: 58 | continue 59 | bbox = item['bbox'] 60 | bbox[2] -= bbox[0] 61 | bbox[3] -= bbox[1] 62 | bbox_out = list(map(self._to_float, bbox[0:4])) 63 | detection = { 64 | "tag": 1, 65 | "box": bbox_out, 66 | "score": float("{:.2f}".format(item['score'])) 67 | } 68 | dtboxes.append(detection) 69 | img_info = self.coco.loadImgs(ids=[image_id])[0] 70 | file_name = img_info['file_name'] 71 | detections.append({'ID': file_name[:-4], 'dtboxes': dtboxes}) 72 | return detections 73 | 74 | def __len__(self): 75 | return self.num_samples 76 | 77 | def save_results(self, results, save_dir): 78 | self._save_results(self.convert_eval_format(results), 79 | '{}/results_crowdhuman.odgt'.format(save_dir)) 80 | def run_eval(self, results, save_dir): 81 | self.save_results(results, save_dir) 82 | try: 83 | os.system('python tools/crowdhuman_eval/demo.py ' + \ 84 | '../data/crowdhuman/annotation_val.odgt ' + \ 85 | '{}/results_crowdhuman.odgt'.format(save_dir)) 86 | except: 87 | print('Crowdhuman evaluation not setup!') -------------------------------------------------------------------------------- /src/lib/dataset/datasets/custom_dataset.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | from ..generic_dataset import GenericDataset 6 | 7 | class CustomDataset(GenericDataset): 8 | num_categories = 1 9 | default_resolution = [-1, -1] 10 | class_name = [''] 11 | max_objs = 128 12 | cat_ids = {1: 1} 13 | def __init__(self, opt, split): 14 | assert (opt.custom_dataset_img_path != '') and \ 15 | (opt.custom_dataset_ann_path != '') and \ 16 | (opt.num_classes != -1) and \ 17 | (opt.input_h != -1) and (opt.input_w != -1), \ 18 | 'The following arguments must be specified for custom datasets: ' + \ 19 | 'custom_dataset_img_path, custom_dataset_ann_path, num_classes, ' + \ 20 | 'input_h, input_w.' 21 | img_dir = opt.custom_dataset_img_path 22 | ann_path = opt.custom_dataset_ann_path 23 | self.num_categories = opt.num_classes 24 | self.class_name = ['' for _ in range(self.num_categories)] 25 | self.default_resolution = [opt.input_h, opt.input_w] 26 | self.cat_ids = {i: i for i in range(1, self.num_categories + 1)} 27 | 28 | self.images = None 29 | # load image list and coco 30 | super().__init__(opt, split, ann_path, img_dir) 31 | 32 | self.num_samples = len(self.images) 33 | print('Loaded Custom dataset {} samples'.format(self.num_samples)) 34 | 35 | def __len__(self): 36 | return self.num_samples 37 | 38 | def run_eval(self, results, save_dir): 39 | pass 40 | -------------------------------------------------------------------------------- /src/lib/dataset/datasets/kitti.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import pycocotools.coco as coco 6 | import numpy as np 7 | import torch 8 | import json 9 | import cv2 10 | import os 11 | import math 12 | 13 | from ..generic_dataset import GenericDataset 14 | from utils.ddd_utils import compute_box_3d, project_to_image 15 | 16 | class KITTI(GenericDataset): 17 | num_categories = 3 18 | default_resolution = [384, 1280] 19 | # ['Pedestrian', 'Car', 'Cyclist', 'Van', 'Truck', 'Person_sitting', 20 | # 'Tram', 'Misc', 'DontCare'] 21 | class_name = ['Pedestrian', 'Car', 'Cyclist'] 22 | # negative id is for "not as negative sample for abs(id)". 23 | # 0 for ignore losses for all categories in the bounding box region 24 | cat_ids = {1:1, 2:2, 3:3, 4:-2, 5:-2, 6:-1, 7:-9999, 8:-9999, 9:0} 25 | max_objs = 50 26 | def __init__(self, opt, split): 27 | data_dir = os.path.join(opt.data_dir, 'kitti') 28 | img_dir = os.path.join(data_dir, 'images', 'trainval') 29 | if opt.trainval: 30 | split = 'trainval' if split == 'train' else 'test' 31 | img_dir = os.path.join(data_dir, 'images', split) 32 | ann_path = os.path.join( 33 | data_dir, 'annotations', 'kitti_v2_{}.json').format(split) 34 | else: 35 | ann_path = os.path.join(data_dir, 36 | 'annotations', 'kitti_v2_{}_{}.json').format(opt.kitti_split, split) 37 | 38 | self.images = None 39 | # load image list and coco 40 | super(KITTI, self).__init__(opt, split, ann_path, img_dir) 41 | self.alpha_in_degree = False 42 | self.num_samples = len(self.images) 43 | 44 | print('Loaded {} {} samples'.format(split, self.num_samples)) 45 | 46 | 47 | def __len__(self): 48 | return self.num_samples 49 | 50 | def _to_float(self, x): 51 | return float("{:.2f}".format(x)) 52 | 53 | def convert_eval_format(self, all_bboxes): 54 | pass 55 | 56 | def save_results(self, results, save_dir): 57 | results_dir = os.path.join(save_dir, 'results_kitti') 58 | if not os.path.exists(results_dir): 59 | os.mkdir(results_dir) 60 | for img_id in results.keys(): 61 | out_path = os.path.join(results_dir, '{:06d}.txt'.format(img_id)) 62 | f = open(out_path, 'w') 63 | for i in range(len(results[img_id])): 64 | item = results[img_id][i] 65 | category_id = item['class'] 66 | cls_name_ind = category_id 67 | class_name = self.class_name[cls_name_ind - 1] 68 | if not ('alpha' in item): 69 | item['alpha'] = -1 70 | if not ('rot_y' in item): 71 | item['rot_y'] = -1 72 | if 'dim' in item: 73 | item['dim'] = [max(item['dim'][0], 0.01), 74 | max(item['dim'][1], 0.01), max(item['dim'][2], 0.01)] 75 | if not ('dim' in item): 76 | item['dim'] = [-1000, -1000, -1000] 77 | if not ('loc' in item): 78 | item['loc'] = [-1000, -1000, -1000] 79 | f.write('{} 0.0 0'.format(class_name)) 80 | f.write(' {:.2f}'.format(item['alpha'])) 81 | f.write(' {:.2f} {:.2f} {:.2f} {:.2f}'.format( 82 | item['bbox'][0], item['bbox'][1], item['bbox'][2], item['bbox'][3])) 83 | 84 | f.write(' {:.2f} {:.2f} {:.2f}'.format( 85 | item['dim'][0], item['dim'][1], item['dim'][2])) 86 | f.write(' {:.2f} {:.2f} {:.2f}'.format( 87 | item['loc'][0], item['loc'][1], item['loc'][2])) 88 | f.write(' {:.2f} {:.2f}\n'.format(item['rot_y'], item['score'])) 89 | f.close() 90 | 91 | def run_eval(self, results, save_dir): 92 | # import pdb; pdb.set_trace() 93 | self.save_results(results, save_dir) 94 | print('Results of IoU threshold 0.7') 95 | os.system('./tools/kitti_eval/evaluate_object_3d_offline_07 ' + \ 96 | '../data/kitti/training/label_val ' + \ 97 | '{}/results_kitti/'.format(save_dir)) 98 | print('Results of IoU threshold 0.5') 99 | os.system('./tools/kitti_eval/evaluate_object_3d_offline ' + \ 100 | '../data/kitti/training/label_val ' + \ 101 | '{}/results_kitti/'.format(save_dir)) 102 | 103 | -------------------------------------------------------------------------------- /src/lib/dataset/datasets/kitti_tracking.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import pycocotools.coco as coco 6 | import numpy as np 7 | import torch 8 | import json 9 | import cv2 10 | import os 11 | import math 12 | 13 | from ..generic_dataset import GenericDataset 14 | from utils.ddd_utils import compute_box_3d, project_to_image 15 | 16 | class KITTITracking(GenericDataset): 17 | num_categories = 3 18 | default_resolution = [384, 1280] 19 | class_name = ['Pedestrian', 'Car', 'Cyclist'] 20 | # negative id is for "not as negative sample for abs(id)". 21 | # 0 for ignore losses for all categories in the bounding box region 22 | # ['Pedestrian', 'Car', 'Cyclist', 'Van', 'Truck', 'Person_sitting', 23 | # 'Tram', 'Misc', 'DontCare'] 24 | cat_ids = {1:1, 2:2, 3:3, 4:-2, 5:-2, 6:-1, 7:-9999, 8:-9999, 9:0} 25 | max_objs = 50 26 | def __init__(self, opt, split): 27 | data_dir = os.path.join(opt.data_dir, 'kitti_tracking') 28 | split_ = 'train' if opt.dataset_version != 'test' else 'test' #'test' 29 | img_dir = os.path.join( 30 | data_dir, 'data_tracking_image_2', '{}ing'.format(split_), 'image_02') 31 | ann_file_ = split_ if opt.dataset_version == '' else opt.dataset_version 32 | print('Warning! opt.dataset_version is not set') 33 | ann_path = os.path.join( 34 | data_dir, 'annotations', 'tracking_{}.json'.format( 35 | ann_file_)) 36 | self.images = None 37 | super(KITTITracking, self).__init__(opt, split, ann_path, img_dir) 38 | self.alpha_in_degree = False 39 | self.num_samples = len(self.images) 40 | 41 | print('Loaded {} {} samples'.format(split, self.num_samples)) 42 | 43 | 44 | def __len__(self): 45 | return self.num_samples 46 | 47 | def _to_float(self, x): 48 | return float("{:.2f}".format(x)) 49 | 50 | 51 | def save_results(self, results, save_dir): 52 | results_dir = os.path.join(save_dir, 'results_kitti_tracking') 53 | if not os.path.exists(results_dir): 54 | os.mkdir(results_dir) 55 | 56 | for video in self.coco.dataset['videos']: 57 | video_id = video['id'] 58 | file_name = video['file_name'] 59 | out_path = os.path.join(results_dir, '{}.txt'.format(file_name)) 60 | f = open(out_path, 'w') 61 | images = self.video_to_images[video_id] 62 | 63 | for image_info in images: 64 | img_id = image_info['id'] 65 | if not (img_id in results): 66 | continue 67 | frame_id = image_info['frame_id'] 68 | for i in range(len(results[img_id])): 69 | item = results[img_id][i] 70 | category_id = item['class'] 71 | cls_name_ind = category_id 72 | class_name = self.class_name[cls_name_ind - 1] 73 | if not ('alpha' in item): 74 | item['alpha'] = -1 75 | if not ('rot_y' in item): 76 | item['rot_y'] = -10 77 | if 'dim' in item: 78 | item['dim'] = [max(item['dim'][0], 0.01), 79 | max(item['dim'][1], 0.01), max(item['dim'][2], 0.01)] 80 | if not ('dim' in item): 81 | item['dim'] = [-1, -1, -1] 82 | if not ('loc' in item): 83 | item['loc'] = [-1000, -1000, -1000] 84 | 85 | track_id = item['tracking_id'] if 'tracking_id' in item else -1 86 | f.write('{} {} {} -1 -1'.format(frame_id - 1, track_id, class_name)) 87 | f.write(' {:d}'.format(int(item['alpha']))) 88 | f.write(' {:.2f} {:.2f} {:.2f} {:.2f}'.format( 89 | item['bbox'][0], item['bbox'][1], item['bbox'][2], item['bbox'][3])) 90 | 91 | f.write(' {:d} {:d} {:d}'.format( 92 | int(item['dim'][0]), int(item['dim'][1]), int(item['dim'][2]))) 93 | f.write(' {:d} {:d} {:d}'.format( 94 | int(item['loc'][0]), int(item['loc'][1]), int(item['loc'][2]))) 95 | f.write(' {:d} {:.2f}\n'.format(int(item['rot_y']), item['score'])) 96 | f.close() 97 | 98 | def run_eval(self, results, save_dir): 99 | self.save_results(results, save_dir) 100 | os.system('python tools/eval_kitti_track/evaluate_tracking.py ' + \ 101 | '{}/results_kitti_tracking/ {}'.format( 102 | save_dir, self.opt.dataset_version)) 103 | 104 | -------------------------------------------------------------------------------- /src/lib/dataset/datasets/mot.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import pycocotools.coco as coco 6 | from pycocotools.cocoeval import COCOeval 7 | import numpy as np 8 | import json 9 | import os 10 | from collections import defaultdict 11 | from ..generic_dataset import GenericDataset 12 | 13 | class MOT(GenericDataset): 14 | num_categories = 1 15 | default_resolution = [544, 960] 16 | class_name = [''] 17 | max_objs = 256 18 | cat_ids = {1: 1, -1: -1} 19 | def __init__(self, opt, split): 20 | self.dataset_version = opt.dataset_version 21 | self.year = int(self.dataset_version[:2]) 22 | print('Using MOT {} {}'.format(self.year, self.dataset_version)) 23 | data_dir = os.path.join(opt.data_dir, 'mot{}'.format(self.year)) 24 | 25 | if opt.dataset_version in ['17trainval', '17test']: 26 | ann_file = '{}.json'.format('train' if split == 'train' else \ 27 | 'test') 28 | elif opt.dataset_version == '17halftrain': 29 | ann_file = '{}.json'.format('train_half') 30 | elif opt.dataset_version == '17halfval': 31 | ann_file = '{}.json'.format('val_half') 32 | img_dir = os.path.join(data_dir, '{}'.format( 33 | 'test' if 'test' in self.dataset_version else 'train')) 34 | 35 | print('ann_file', ann_file) 36 | ann_path = os.path.join(data_dir, 'annotations', ann_file) 37 | 38 | self.images = None 39 | # load image list and coco 40 | super(MOT, self).__init__(opt, split, ann_path, img_dir) 41 | 42 | self.num_samples = len(self.images) 43 | print('Loaded MOT {} {} {} samples'.format( 44 | self.dataset_version, split, self.num_samples)) 45 | 46 | def _to_float(self, x): 47 | return float("{:.2f}".format(x)) 48 | 49 | def __len__(self): 50 | return self.num_samples 51 | 52 | def save_results(self, results, save_dir): 53 | results_dir = os.path.join(save_dir, 'results_mot{}'.format(self.dataset_version)) 54 | if not os.path.exists(results_dir): 55 | os.mkdir(results_dir) 56 | for video in self.coco.dataset['videos']: 57 | video_id = video['id'] 58 | file_name = video['file_name'] 59 | out_path = os.path.join(results_dir, '{}.txt'.format(file_name)) 60 | f = open(out_path, 'w') 61 | images = self.video_to_images[video_id] 62 | tracks = defaultdict(list) 63 | for image_info in images: 64 | if not (image_info['id'] in results): 65 | continue 66 | result = results[image_info['id']] 67 | frame_id = image_info['frame_id'] 68 | for item in result: 69 | if not ('tracking_id' in item): 70 | item['tracking_id'] = np.random.randint(100000) 71 | if item['active'] == 0: 72 | continue 73 | tracking_id = item['tracking_id'] 74 | bbox = item['bbox'] 75 | bbox = [bbox[0], bbox[1], bbox[2], bbox[3]] 76 | tracks[tracking_id].append([frame_id] + bbox) 77 | rename_track_id = 0 78 | for track_id in sorted(tracks): 79 | rename_track_id += 1 80 | for t in tracks[track_id]: 81 | f.write('{},{},{:.2f},{:.2f},{:.2f},{:.2f},-1,-1,-1,-1\n'.format( 82 | t[0], rename_track_id, t[1], t[2], t[3]-t[1], t[4]-t[2])) 83 | f.close() 84 | 85 | def run_eval(self, results, save_dir): 86 | self.save_results(results, save_dir) 87 | gt_type_str = '{}'.format( 88 | '_train_half' if '17halftrain' in self.opt.dataset_version \ 89 | else '_val_half' if '17halfval' in self.opt.dataset_version \ 90 | else '') 91 | gt_type_str = '_val_half' if self.year in [16, 19] else gt_type_str 92 | gt_type_str = '--gt_type {}'.format(gt_type_str) if gt_type_str != '' else \ 93 | '' 94 | os.system('python tools/eval_motchallenge.py ' + \ 95 | '../data/mot{}/{}/ '.format(self.year, 'train') + \ 96 | '{}/results_mot{}/ '.format(save_dir, self.dataset_version) + \ 97 | gt_type_str + ' --eval_official') 98 | -------------------------------------------------------------------------------- /src/lib/external/.gitignore: -------------------------------------------------------------------------------- 1 | bbox.c 2 | bbox.cpython-35m-x86_64-linux-gnu.so 3 | bbox.cpython-36m-x86_64-linux-gnu.so 4 | 5 | nms.c 6 | nms.cpython-35m-x86_64-linux-gnu.so 7 | nms.cpython-36m-x86_64-linux-gnu.so 8 | -------------------------------------------------------------------------------- /src/lib/external/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | python setup.py build_ext --inplace 3 | rm -rf build 4 | -------------------------------------------------------------------------------- /src/lib/external/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyizhou/CenterTrack/e4e7534cc2ebfbd31e0cde680988f286c65fe34f/src/lib/external/__init__.py -------------------------------------------------------------------------------- /src/lib/external/setup.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | from distutils.core import setup 3 | from distutils.extension import Extension 4 | from Cython.Build import cythonize 5 | 6 | extensions = [ 7 | Extension( 8 | "nms", 9 | ["nms.pyx"], 10 | extra_compile_args=["-Wno-cpp", "-Wno-unused-function"] 11 | ) 12 | ] 13 | 14 | setup( 15 | name="coco", 16 | ext_modules=cythonize(extensions), 17 | include_dirs=[numpy.get_include()] 18 | ) 19 | -------------------------------------------------------------------------------- /src/lib/logger.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | # Code referenced from https://gist.github.com/gyglim/1f8dfb1b5c82627ae3efcfbbadb9f514 6 | import os 7 | import time 8 | import sys 9 | import torch 10 | import subprocess 11 | USE_TENSORBOARD = True 12 | try: 13 | import tensorboardX 14 | print('Using tensorboardX') 15 | except: 16 | USE_TENSORBOARD = False 17 | 18 | class Logger(object): 19 | def __init__(self, opt): 20 | """Create a summary writer logging to log_dir.""" 21 | if not os.path.exists(opt.save_dir): 22 | os.makedirs(opt.save_dir) 23 | if not os.path.exists(opt.debug_dir): 24 | os.makedirs(opt.debug_dir) 25 | 26 | time_str = time.strftime('%Y-%m-%d-%H-%M') 27 | 28 | args = dict((name, getattr(opt, name)) for name in dir(opt) 29 | if not name.startswith('_')) 30 | file_name = os.path.join(opt.save_dir, 'opt.txt') 31 | with open(file_name, 'wt') as opt_file: 32 | opt_file.write('==> commit hash: {}\n'.format( 33 | subprocess.check_output(["git", "describe"]))) 34 | opt_file.write('==> torch version: {}\n'.format(torch.__version__)) 35 | opt_file.write('==> cudnn version: {}\n'.format( 36 | torch.backends.cudnn.version())) 37 | opt_file.write('==> Cmd:\n') 38 | opt_file.write(str(sys.argv)) 39 | opt_file.write('\n==> Opt:\n') 40 | for k, v in sorted(args.items()): 41 | opt_file.write(' %s: %s\n' % (str(k), str(v))) 42 | 43 | log_dir = opt.save_dir + '/logs_{}'.format(time_str) 44 | if USE_TENSORBOARD: 45 | self.writer = tensorboardX.SummaryWriter(log_dir=log_dir) 46 | else: 47 | if not os.path.exists(os.path.dirname(log_dir)): 48 | os.mkdir(os.path.dirname(log_dir)) 49 | if not os.path.exists(log_dir): 50 | os.mkdir(log_dir) 51 | self.log = open(log_dir + '/log.txt', 'w') 52 | try: 53 | os.system('cp {}/opt.txt {}/'.format(opt.save_dir, log_dir)) 54 | except: 55 | pass 56 | self.start_line = True 57 | 58 | def write(self, txt): 59 | if self.start_line: 60 | time_str = time.strftime('%Y-%m-%d-%H-%M') 61 | self.log.write('{}: {}'.format(time_str, txt)) 62 | else: 63 | self.log.write(txt) 64 | self.start_line = False 65 | if '\n' in txt: 66 | self.start_line = True 67 | self.log.flush() 68 | 69 | def close(self): 70 | self.log.close() 71 | 72 | def scalar_summary(self, tag, value, step): 73 | """Log a scalar variable.""" 74 | if USE_TENSORBOARD: 75 | self.writer.add_scalar(tag, value, step) 76 | -------------------------------------------------------------------------------- /src/lib/model/data_parallel.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.nn.modules import Module 3 | from torch.nn.parallel.scatter_gather import gather 4 | from torch.nn.parallel.replicate import replicate 5 | from torch.nn.parallel.parallel_apply import parallel_apply 6 | 7 | 8 | from .scatter_gather import scatter_kwargs 9 | 10 | class _DataParallel(Module): 11 | r"""Implements data parallelism at the module level. 12 | 13 | This container parallelizes the application of the given module by 14 | splitting the input across the specified devices by chunking in the batch 15 | dimension. In the forward pass, the module is replicated on each device, 16 | and each replica handles a portion of the input. During the backwards 17 | pass, gradients from each replica are summed into the original module. 18 | 19 | The batch size should be larger than the number of GPUs used. It should 20 | also be an integer multiple of the number of GPUs so that each chunk is the 21 | same size (so that each GPU processes the same number of samples). 22 | 23 | See also: :ref:`cuda-nn-dataparallel-instead` 24 | 25 | Arbitrary positional and keyword inputs are allowed to be passed into 26 | DataParallel EXCEPT Tensors. All variables will be scattered on dim 27 | specified (default 0). Primitive types will be broadcasted, but all 28 | other types will be a shallow copy and can be corrupted if written to in 29 | the model's forward pass. 30 | 31 | Args: 32 | module: module to be parallelized 33 | device_ids: CUDA devices (default: all devices) 34 | output_device: device location of output (default: device_ids[0]) 35 | 36 | Example:: 37 | 38 | >>> net = torch.nn.DataParallel(model, device_ids=[0, 1, 2]) 39 | >>> output = net(input_var) 40 | """ 41 | 42 | # TODO: update notes/cuda.rst when this class handles 8+ GPUs well 43 | 44 | def __init__(self, module, device_ids=None, output_device=None, dim=0, chunk_sizes=None): 45 | super(_DataParallel, self).__init__() 46 | 47 | if not torch.cuda.is_available(): 48 | self.module = module 49 | self.device_ids = [] 50 | return 51 | 52 | if device_ids is None: 53 | device_ids = list(range(torch.cuda.device_count())) 54 | if output_device is None: 55 | output_device = device_ids[0] 56 | self.dim = dim 57 | self.module = module 58 | self.device_ids = device_ids 59 | self.chunk_sizes = chunk_sizes 60 | self.output_device = output_device 61 | if len(self.device_ids) == 1: 62 | self.module.cuda(device_ids[0]) 63 | 64 | def forward(self, *inputs, **kwargs): 65 | if not self.device_ids: 66 | return self.module(*inputs, **kwargs) 67 | inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids, self.chunk_sizes) 68 | if len(self.device_ids) == 1: 69 | return self.module(*inputs[0], **kwargs[0]) 70 | replicas = self.replicate(self.module, self.device_ids[:len(inputs)]) 71 | outputs = self.parallel_apply(replicas, inputs, kwargs) 72 | return self.gather(outputs, self.output_device) 73 | 74 | def replicate(self, module, device_ids): 75 | return replicate(module, device_ids) 76 | 77 | def scatter(self, inputs, kwargs, device_ids, chunk_sizes): 78 | return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim, chunk_sizes=self.chunk_sizes) 79 | 80 | def parallel_apply(self, replicas, inputs, kwargs): 81 | return parallel_apply(replicas, inputs, kwargs, self.device_ids[:len(replicas)]) 82 | 83 | def gather(self, outputs, output_device): 84 | return gather(outputs, output_device, dim=self.dim) 85 | 86 | 87 | def data_parallel(module, inputs, device_ids=None, output_device=None, dim=0, module_kwargs=None): 88 | r"""Evaluates module(input) in parallel across the GPUs given in device_ids. 89 | 90 | This is the functional version of the DataParallel module. 91 | 92 | Args: 93 | module: the module to evaluate in parallel 94 | inputs: inputs to the module 95 | device_ids: GPU ids on which to replicate module 96 | output_device: GPU location of the output Use -1 to indicate the CPU. 97 | (default: device_ids[0]) 98 | Returns: 99 | a Variable containing the result of module(input) located on 100 | output_device 101 | """ 102 | if not isinstance(inputs, tuple): 103 | inputs = (inputs,) 104 | 105 | if device_ids is None: 106 | device_ids = list(range(torch.cuda.device_count())) 107 | 108 | if output_device is None: 109 | output_device = device_ids[0] 110 | 111 | inputs, module_kwargs = scatter_kwargs(inputs, module_kwargs, device_ids, dim) 112 | if len(device_ids) == 1: 113 | return module(*inputs[0], **module_kwargs[0]) 114 | used_device_ids = device_ids[:len(inputs)] 115 | replicas = replicate(module, used_device_ids) 116 | outputs = parallel_apply(replicas, inputs, module_kwargs, used_device_ids) 117 | return gather(outputs, output_device, dim) 118 | 119 | def DataParallel(module, device_ids=None, output_device=None, dim=0, chunk_sizes=None): 120 | if chunk_sizes is None: 121 | return torch.nn.DataParallel(module, device_ids, output_device, dim) 122 | standard_size = True 123 | for i in range(1, len(chunk_sizes)): 124 | if chunk_sizes[i] != chunk_sizes[0]: 125 | standard_size = False 126 | if standard_size: 127 | return torch.nn.DataParallel(module, device_ids, output_device, dim) 128 | return _DataParallel(module, device_ids, output_device, dim, chunk_sizes) -------------------------------------------------------------------------------- /src/lib/model/decode.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import torch 6 | import torch.nn as nn 7 | from .utils import _gather_feat, _tranpose_and_gather_feat 8 | from .utils import _nms, _topk, _topk_channel 9 | 10 | 11 | def _update_kps_with_hm( 12 | kps, output, batch, num_joints, K, bboxes=None, scores=None): 13 | if 'hm_hp' in output: 14 | hm_hp = output['hm_hp'] 15 | hm_hp = _nms(hm_hp) 16 | thresh = 0.2 17 | kps = kps.view(batch, K, num_joints, 2).permute( 18 | 0, 2, 1, 3).contiguous() # b x J x K x 2 19 | reg_kps = kps.unsqueeze(3).expand(batch, num_joints, K, K, 2) 20 | hm_score, hm_inds, hm_ys, hm_xs = _topk_channel(hm_hp, K=K) # b x J x K 21 | if 'hp_offset' in output or 'reg' in output: 22 | hp_offset = output['hp_offset'] if 'hp_offset' in output \ 23 | else output['reg'] 24 | hp_offset = _tranpose_and_gather_feat( 25 | hp_offset, hm_inds.view(batch, -1)) 26 | hp_offset = hp_offset.view(batch, num_joints, K, 2) 27 | hm_xs = hm_xs + hp_offset[:, :, :, 0] 28 | hm_ys = hm_ys + hp_offset[:, :, :, 1] 29 | else: 30 | hm_xs = hm_xs + 0.5 31 | hm_ys = hm_ys + 0.5 32 | 33 | mask = (hm_score > thresh).float() 34 | hm_score = (1 - mask) * -1 + mask * hm_score 35 | hm_ys = (1 - mask) * (-10000) + mask * hm_ys 36 | hm_xs = (1 - mask) * (-10000) + mask * hm_xs 37 | hm_kps = torch.stack([hm_xs, hm_ys], dim=-1).unsqueeze( 38 | 2).expand(batch, num_joints, K, K, 2) 39 | dist = (((reg_kps - hm_kps) ** 2).sum(dim=4) ** 0.5) 40 | min_dist, min_ind = dist.min(dim=3) # b x J x K 41 | hm_score = hm_score.gather(2, min_ind).unsqueeze(-1) # b x J x K x 1 42 | min_dist = min_dist.unsqueeze(-1) 43 | min_ind = min_ind.view(batch, num_joints, K, 1, 1).expand( 44 | batch, num_joints, K, 1, 2) 45 | hm_kps = hm_kps.gather(3, min_ind) 46 | hm_kps = hm_kps.view(batch, num_joints, K, 2) 47 | mask = (hm_score < thresh) 48 | 49 | if bboxes is not None: 50 | l = bboxes[:, :, 0].view(batch, 1, K, 1).expand(batch, num_joints, K, 1) 51 | t = bboxes[:, :, 1].view(batch, 1, K, 1).expand(batch, num_joints, K, 1) 52 | r = bboxes[:, :, 2].view(batch, 1, K, 1).expand(batch, num_joints, K, 1) 53 | b = bboxes[:, :, 3].view(batch, 1, K, 1).expand(batch, num_joints, K, 1) 54 | mask = (hm_kps[..., 0:1] < l) + (hm_kps[..., 0:1] > r) + \ 55 | (hm_kps[..., 1:2] < t) + (hm_kps[..., 1:2] > b) + mask 56 | else: 57 | l = kps[:, :, :, 0:1].min(dim=1, keepdim=True)[0] 58 | t = kps[:, :, :, 1:2].min(dim=1, keepdim=True)[0] 59 | r = kps[:, :, :, 0:1].max(dim=1, keepdim=True)[0] 60 | b = kps[:, :, :, 1:2].max(dim=1, keepdim=True)[0] 61 | margin = 0.25 62 | l = l - (r - l) * margin 63 | r = r + (r - l) * margin 64 | t = t - (b - t) * margin 65 | b = b + (b - t) * margin 66 | mask = (hm_kps[..., 0:1] < l) + (hm_kps[..., 0:1] > r) + \ 67 | (hm_kps[..., 1:2] < t) + (hm_kps[..., 1:2] > b) + mask 68 | # sc = (kps[:, :, :, :].max(dim=1, keepdim=True) - kps[:, :, :, :].min(dim=1)) 69 | # mask = mask + (min_dist > 10) 70 | mask = (mask > 0).float() 71 | kps_score = (1 - mask) * hm_score + mask * \ 72 | scores.unsqueeze(-1).expand(batch, num_joints, K, 1) # bJK1 73 | kps_score = scores * kps_score.mean(dim=1).view(batch, K) 74 | # kps_score[scores < 0.1] = 0 75 | mask = mask.expand(batch, num_joints, K, 2) 76 | kps = (1 - mask) * hm_kps + mask * kps 77 | kps = kps.permute(0, 2, 1, 3).contiguous().view( 78 | batch, K, num_joints * 2) 79 | return kps, kps_score 80 | else: 81 | return kps, kps 82 | 83 | def generic_decode(output, K=100, opt=None): 84 | if not ('hm' in output): 85 | return {} 86 | 87 | if opt.zero_tracking: 88 | output['tracking'] *= 0 89 | 90 | heat = output['hm'] 91 | batch, cat, height, width = heat.size() 92 | 93 | heat = _nms(heat) 94 | scores, inds, clses, ys0, xs0 = _topk(heat, K=K) 95 | 96 | clses = clses.view(batch, K) 97 | scores = scores.view(batch, K) 98 | bboxes = None 99 | cts = torch.cat([xs0.unsqueeze(2), ys0.unsqueeze(2)], dim=2) 100 | ret = {'scores': scores, 'clses': clses.float(), 101 | 'xs': xs0, 'ys': ys0, 'cts': cts} 102 | if 'reg' in output: 103 | reg = output['reg'] 104 | reg = _tranpose_and_gather_feat(reg, inds) 105 | reg = reg.view(batch, K, 2) 106 | xs = xs0.view(batch, K, 1) + reg[:, :, 0:1] 107 | ys = ys0.view(batch, K, 1) + reg[:, :, 1:2] 108 | else: 109 | xs = xs0.view(batch, K, 1) + 0.5 110 | ys = ys0.view(batch, K, 1) + 0.5 111 | 112 | if 'wh' in output: 113 | wh = output['wh'] 114 | wh = _tranpose_and_gather_feat(wh, inds) # B x K x (F) 115 | # wh = wh.view(batch, K, -1) 116 | wh = wh.view(batch, K, 2) 117 | wh[wh < 0] = 0 118 | if wh.size(2) == 2 * cat: # cat spec 119 | wh = wh.view(batch, K, -1, 2) 120 | cats = clses.view(batch, K, 1, 1).expand(batch, K, 1, 2) 121 | wh = wh.gather(2, cats.long()).squeeze(2) # B x K x 2 122 | else: 123 | pass 124 | bboxes = torch.cat([xs - wh[..., 0:1] / 2, 125 | ys - wh[..., 1:2] / 2, 126 | xs + wh[..., 0:1] / 2, 127 | ys + wh[..., 1:2] / 2], dim=2) 128 | ret['bboxes'] = bboxes 129 | # print('ret bbox', ret['bboxes']) 130 | 131 | if 'ltrb' in output: 132 | ltrb = output['ltrb'] 133 | ltrb = _tranpose_and_gather_feat(ltrb, inds) # B x K x 4 134 | ltrb = ltrb.view(batch, K, 4) 135 | bboxes = torch.cat([xs0.view(batch, K, 1) + ltrb[..., 0:1], 136 | ys0.view(batch, K, 1) + ltrb[..., 1:2], 137 | xs0.view(batch, K, 1) + ltrb[..., 2:3], 138 | ys0.view(batch, K, 1) + ltrb[..., 3:4]], dim=2) 139 | ret['bboxes'] = bboxes 140 | 141 | 142 | regression_heads = ['tracking', 'dep', 'rot', 'dim', 'amodel_offset', 143 | 'nuscenes_att', 'velocity'] 144 | 145 | for head in regression_heads: 146 | if head in output: 147 | ret[head] = _tranpose_and_gather_feat( 148 | output[head], inds).view(batch, K, -1) 149 | 150 | if 'ltrb_amodal' in output: 151 | ltrb_amodal = output['ltrb_amodal'] 152 | ltrb_amodal = _tranpose_and_gather_feat(ltrb_amodal, inds) # B x K x 4 153 | ltrb_amodal = ltrb_amodal.view(batch, K, 4) 154 | bboxes_amodal = torch.cat([xs0.view(batch, K, 1) + ltrb_amodal[..., 0:1], 155 | ys0.view(batch, K, 1) + ltrb_amodal[..., 1:2], 156 | xs0.view(batch, K, 1) + ltrb_amodal[..., 2:3], 157 | ys0.view(batch, K, 1) + ltrb_amodal[..., 3:4]], dim=2) 158 | ret['bboxes_amodal'] = bboxes_amodal 159 | ret['bboxes'] = bboxes_amodal 160 | 161 | if 'hps' in output: 162 | kps = output['hps'] 163 | num_joints = kps.shape[1] // 2 164 | kps = _tranpose_and_gather_feat(kps, inds) 165 | kps = kps.view(batch, K, num_joints * 2) 166 | kps[..., ::2] += xs0.view(batch, K, 1).expand(batch, K, num_joints) 167 | kps[..., 1::2] += ys0.view(batch, K, 1).expand(batch, K, num_joints) 168 | kps, kps_score = _update_kps_with_hm( 169 | kps, output, batch, num_joints, K, bboxes, scores) 170 | ret['hps'] = kps 171 | ret['kps_score'] = kps_score 172 | 173 | if 'pre_inds' in output and output['pre_inds'] is not None: 174 | pre_inds = output['pre_inds'] # B x pre_K 175 | pre_K = pre_inds.shape[1] 176 | pre_ys = (pre_inds / width).int().float() 177 | pre_xs = (pre_inds % width).int().float() 178 | 179 | ret['pre_cts'] = torch.cat( 180 | [pre_xs.unsqueeze(2), pre_ys.unsqueeze(2)], dim=2) 181 | 182 | return ret 183 | -------------------------------------------------------------------------------- /src/lib/model/losses.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Portions of this code are from 3 | # CornerNet (https://github.com/princeton-vl/CornerNet) 4 | # Copyright (c) 2018, University of Michigan 5 | # Licensed under the BSD 3-Clause License 6 | # ------------------------------------------------------------------------------ 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import torch 12 | import torch.nn as nn 13 | from .utils import _tranpose_and_gather_feat, _nms, _topk 14 | import torch.nn.functional as F 15 | from utils.image import draw_umich_gaussian 16 | 17 | def _slow_neg_loss(pred, gt): 18 | '''focal loss from CornerNet''' 19 | pos_inds = gt.eq(1).float() 20 | neg_inds = gt.lt(1).float() 21 | 22 | neg_weights = torch.pow(1 - gt[neg_inds], 4) 23 | 24 | loss = 0 25 | pos_pred = pred[pos_inds] 26 | neg_pred = pred[neg_inds] 27 | 28 | pos_loss = torch.log(pos_pred) * torch.pow(1 - pos_pred, 2) 29 | neg_loss = torch.log(1 - neg_pred) * torch.pow(neg_pred, 2) * neg_weights 30 | 31 | num_pos = pos_inds.float().sum() 32 | pos_loss = pos_loss.sum() 33 | neg_loss = neg_loss.sum() 34 | 35 | if pos_pred.nelement() == 0: 36 | loss = loss - neg_loss 37 | else: 38 | loss = loss - (pos_loss + neg_loss) / num_pos 39 | return loss 40 | 41 | def _neg_loss(pred, gt): 42 | ''' Reimplemented focal loss. Exactly the same as CornerNet. 43 | Runs faster and costs a little bit more memory 44 | Arguments: 45 | pred (batch x c x h x w) 46 | gt_regr (batch x c x h x w) 47 | ''' 48 | pos_inds = gt.eq(1).float() 49 | neg_inds = gt.lt(1).float() 50 | 51 | neg_weights = torch.pow(1 - gt, 4) 52 | 53 | loss = 0 54 | pos_loss = torch.log(pred) * torch.pow(1 - pred, 2) * pos_inds 55 | neg_loss = torch.log(1 - pred) * torch.pow(pred, 2) * neg_weights * neg_inds 56 | 57 | num_pos = pos_inds.float().sum() 58 | pos_loss = pos_loss.sum() 59 | neg_loss = neg_loss.sum() 60 | if num_pos == 0: 61 | loss = loss - neg_loss 62 | else: 63 | loss = loss - (pos_loss + neg_loss) / num_pos 64 | return loss 65 | 66 | 67 | def _only_neg_loss(pred, gt): 68 | gt = torch.pow(1 - gt, 4) 69 | neg_loss = torch.log(1 - pred) * torch.pow(pred, 2) * gt 70 | return neg_loss.sum() 71 | 72 | class FastFocalLoss(nn.Module): 73 | ''' 74 | Reimplemented focal loss, exactly the same as the CornerNet version. 75 | Faster and costs much less memory. 76 | ''' 77 | def __init__(self, opt=None): 78 | super(FastFocalLoss, self).__init__() 79 | self.only_neg_loss = _only_neg_loss 80 | 81 | def forward(self, out, target, ind, mask, cat): 82 | ''' 83 | Arguments: 84 | out, target: B x C x H x W 85 | ind, mask: B x M 86 | cat (category id for peaks): B x M 87 | ''' 88 | neg_loss = self.only_neg_loss(out, target) 89 | pos_pred_pix = _tranpose_and_gather_feat(out, ind) # B x M x C 90 | pos_pred = pos_pred_pix.gather(2, cat.unsqueeze(2)) # B x M 91 | num_pos = mask.sum() 92 | pos_loss = torch.log(pos_pred) * torch.pow(1 - pos_pred, 2) * \ 93 | mask.unsqueeze(2) 94 | pos_loss = pos_loss.sum() 95 | if num_pos == 0: 96 | return - neg_loss 97 | return - (pos_loss + neg_loss) / num_pos 98 | 99 | def _reg_loss(regr, gt_regr, mask): 100 | ''' L1 regression loss 101 | Arguments: 102 | regr (batch x max_objects x dim) 103 | gt_regr (batch x max_objects x dim) 104 | mask (batch x max_objects) 105 | ''' 106 | num = mask.float().sum() 107 | mask = mask.unsqueeze(2).expand_as(gt_regr).float() 108 | 109 | regr = regr * mask 110 | gt_regr = gt_regr * mask 111 | 112 | regr_loss = nn.functional.smooth_l1_loss(regr, gt_regr, reduction='sum') 113 | regr_loss = regr_loss / (num + 1e-4) 114 | return regr_loss 115 | 116 | 117 | class RegWeightedL1Loss(nn.Module): 118 | def __init__(self): 119 | super(RegWeightedL1Loss, self).__init__() 120 | 121 | def forward(self, output, mask, ind, target): 122 | pred = _tranpose_and_gather_feat(output, ind) 123 | # loss = F.l1_loss(pred * mask, target * mask, reduction='elementwise_mean') 124 | loss = F.l1_loss(pred * mask, target * mask, reduction='sum') 125 | loss = loss / (mask.sum() + 1e-4) 126 | return loss 127 | 128 | 129 | class WeightedBCELoss(nn.Module): 130 | def __init__(self): 131 | super(WeightedBCELoss, self).__init__() 132 | self.bceloss = torch.nn.BCEWithLogitsLoss(reduction='none') 133 | 134 | def forward(self, output, mask, ind, target): 135 | # output: B x F x H x W 136 | # ind: B x M 137 | # mask: B x M x F 138 | # target: B x M x F 139 | pred = _tranpose_and_gather_feat(output, ind) # B x M x F 140 | loss = mask * self.bceloss(pred, target) 141 | loss = loss.sum() / (mask.sum() + 1e-4) 142 | return loss 143 | 144 | class BinRotLoss(nn.Module): 145 | def __init__(self): 146 | super(BinRotLoss, self).__init__() 147 | 148 | def forward(self, output, mask, ind, rotbin, rotres): 149 | pred = _tranpose_and_gather_feat(output, ind) 150 | loss = compute_rot_loss(pred, rotbin, rotres, mask) 151 | return loss 152 | 153 | def compute_res_loss(output, target): 154 | return F.smooth_l1_loss(output, target, reduction='elementwise_mean') 155 | 156 | def compute_bin_loss(output, target, mask): 157 | mask = mask.expand_as(output) 158 | output = output * mask.float() 159 | return F.cross_entropy(output, target, reduction='elementwise_mean') 160 | 161 | def compute_rot_loss(output, target_bin, target_res, mask): 162 | # output: (B, 128, 8) [bin1_cls[0], bin1_cls[1], bin1_sin, bin1_cos, 163 | # bin2_cls[0], bin2_cls[1], bin2_sin, bin2_cos] 164 | # target_bin: (B, 128, 2) [bin1_cls, bin2_cls] 165 | # target_res: (B, 128, 2) [bin1_res, bin2_res] 166 | # mask: (B, 128, 1) 167 | output = output.view(-1, 8) 168 | target_bin = target_bin.view(-1, 2) 169 | target_res = target_res.view(-1, 2) 170 | mask = mask.view(-1, 1) 171 | loss_bin1 = compute_bin_loss(output[:, 0:2], target_bin[:, 0], mask) 172 | loss_bin2 = compute_bin_loss(output[:, 4:6], target_bin[:, 1], mask) 173 | loss_res = torch.zeros_like(loss_bin1) 174 | if target_bin[:, 0].nonzero().shape[0] > 0: 175 | idx1 = target_bin[:, 0].nonzero()[:, 0] 176 | valid_output1 = torch.index_select(output, 0, idx1.long()) 177 | valid_target_res1 = torch.index_select(target_res, 0, idx1.long()) 178 | loss_sin1 = compute_res_loss( 179 | valid_output1[:, 2], torch.sin(valid_target_res1[:, 0])) 180 | loss_cos1 = compute_res_loss( 181 | valid_output1[:, 3], torch.cos(valid_target_res1[:, 0])) 182 | loss_res += loss_sin1 + loss_cos1 183 | if target_bin[:, 1].nonzero().shape[0] > 0: 184 | idx2 = target_bin[:, 1].nonzero()[:, 0] 185 | valid_output2 = torch.index_select(output, 0, idx2.long()) 186 | valid_target_res2 = torch.index_select(target_res, 0, idx2.long()) 187 | loss_sin2 = compute_res_loss( 188 | valid_output2[:, 6], torch.sin(valid_target_res2[:, 1])) 189 | loss_cos2 = compute_res_loss( 190 | valid_output2[:, 7], torch.cos(valid_target_res2[:, 1])) 191 | loss_res += loss_sin2 + loss_cos2 192 | return loss_bin1 + loss_bin2 + loss_res -------------------------------------------------------------------------------- /src/lib/model/model.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import torchvision.models as models 6 | import torch 7 | import torch.nn as nn 8 | import os 9 | 10 | from .networks.dla import DLASeg 11 | from .networks.resdcn import PoseResDCN 12 | from .networks.resnet import PoseResNet 13 | from .networks.dlav0 import DLASegv0 14 | from .networks.generic_network import GenericNetwork 15 | 16 | _network_factory = { 17 | 'resdcn': PoseResDCN, 18 | 'dla': DLASeg, 19 | 'res': PoseResNet, 20 | 'dlav0': DLASegv0, 21 | 'generic': GenericNetwork 22 | } 23 | 24 | def create_model(arch, head, head_conv, opt=None): 25 | num_layers = int(arch[arch.find('_') + 1:]) if '_' in arch else 0 26 | arch = arch[:arch.find('_')] if '_' in arch else arch 27 | model_class = _network_factory[arch] 28 | model = model_class(num_layers, heads=head, head_convs=head_conv, opt=opt) 29 | return model 30 | 31 | def load_model(model, model_path, opt, optimizer=None): 32 | start_epoch = 0 33 | checkpoint = torch.load(model_path, map_location=lambda storage, loc: storage) 34 | print('loaded {}, epoch {}'.format(model_path, checkpoint['epoch'])) 35 | state_dict_ = checkpoint['state_dict'] 36 | state_dict = {} 37 | 38 | # convert data_parallal to model 39 | for k in state_dict_: 40 | if k.startswith('module') and not k.startswith('module_list'): 41 | state_dict[k[7:]] = state_dict_[k] 42 | else: 43 | state_dict[k] = state_dict_[k] 44 | model_state_dict = model.state_dict() 45 | 46 | # check loaded parameters and created model parameters 47 | for k in state_dict: 48 | if k in model_state_dict: 49 | if (state_dict[k].shape != model_state_dict[k].shape) or \ 50 | (opt.reset_hm and k.startswith('hm') and (state_dict[k].shape[0] in [80, 1])): 51 | if opt.reuse_hm: 52 | print('Reusing parameter {}, required shape{}, '\ 53 | 'loaded shape{}.'.format( 54 | k, model_state_dict[k].shape, state_dict[k].shape)) 55 | if state_dict[k].shape[0] < state_dict[k].shape[0]: 56 | model_state_dict[k][:state_dict[k].shape[0]] = state_dict[k] 57 | else: 58 | model_state_dict[k] = state_dict[k][:model_state_dict[k].shape[0]] 59 | state_dict[k] = model_state_dict[k] 60 | else: 61 | print('Skip loading parameter {}, required shape{}, '\ 62 | 'loaded shape{}.'.format( 63 | k, model_state_dict[k].shape, state_dict[k].shape)) 64 | state_dict[k] = model_state_dict[k] 65 | else: 66 | print('Drop parameter {}.'.format(k)) 67 | for k in model_state_dict: 68 | if not (k in state_dict): 69 | print('No param {}.'.format(k)) 70 | state_dict[k] = model_state_dict[k] 71 | model.load_state_dict(state_dict, strict=False) 72 | 73 | # resume optimizer parameters 74 | if optimizer is not None and opt.resume: 75 | if 'optimizer' in checkpoint: 76 | # optimizer.load_state_dict(checkpoint['optimizer']) 77 | start_epoch = checkpoint['epoch'] 78 | start_lr = opt.lr 79 | for step in opt.lr_step: 80 | if start_epoch >= step: 81 | start_lr *= 0.1 82 | for param_group in optimizer.param_groups: 83 | param_group['lr'] = start_lr 84 | print('Resumed optimizer with start lr', start_lr) 85 | else: 86 | print('No optimizer parameters in checkpoint.') 87 | if optimizer is not None: 88 | return model, optimizer, start_epoch 89 | else: 90 | return model 91 | 92 | def save_model(path, epoch, model, optimizer=None): 93 | if isinstance(model, torch.nn.DataParallel): 94 | state_dict = model.module.state_dict() 95 | else: 96 | state_dict = model.state_dict() 97 | data = {'epoch': epoch, 98 | 'state_dict': state_dict} 99 | if not (optimizer is None): 100 | data['optimizer'] = optimizer.state_dict() 101 | torch.save(data, path) 102 | 103 | -------------------------------------------------------------------------------- /src/lib/model/networks/backbones/mobilenet.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import os 6 | import math 7 | import logging 8 | import numpy as np 9 | 10 | import torch 11 | from torch import nn 12 | import torch.nn.functional as F 13 | import torch.utils.model_zoo as model_zoo 14 | from torchvision.models.utils import load_state_dict_from_url 15 | 16 | BN_MOMENTUM = 0.1 17 | 18 | model_urls = { 19 | 'mobilenet_v2': 'https://download.pytorch.org/models/mobilenet_v2-b0353104.pth', 20 | } 21 | 22 | def _make_divisible(v, divisor, min_value=None): 23 | """ 24 | This function is taken from the original tf repo. 25 | It ensures that all layers have a channel number that is divisible by 8 26 | It can be seen here: 27 | https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py 28 | :param v: 29 | :param divisor: 30 | :param min_value: 31 | :return: 32 | """ 33 | if min_value is None: 34 | min_value = divisor 35 | new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) 36 | # Make sure that round down does not go down by more than 10%. 37 | if new_v < 0.9 * v: 38 | new_v += divisor 39 | return new_v 40 | 41 | 42 | class ConvBNReLU(nn.Sequential): 43 | def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1): 44 | padding = (kernel_size - 1) // 2 45 | super(ConvBNReLU, self).__init__( 46 | nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False), 47 | nn.BatchNorm2d(out_planes), 48 | nn.ReLU6(inplace=True) 49 | ) 50 | 51 | 52 | class InvertedResidual(nn.Module): 53 | def __init__(self, inp, oup, stride, expand_ratio): 54 | super(InvertedResidual, self).__init__() 55 | self.stride = stride 56 | assert stride in [1, 2] 57 | 58 | hidden_dim = int(round(inp * expand_ratio)) 59 | self.use_res_connect = self.stride == 1 and inp == oup 60 | 61 | layers = [] 62 | if expand_ratio != 1: 63 | # pw 64 | layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1)) 65 | layers.extend([ 66 | # dw 67 | ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim), 68 | # pw-linear 69 | nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False), 70 | nn.BatchNorm2d(oup), 71 | ]) 72 | self.conv = nn.Sequential(*layers) 73 | 74 | def forward(self, x): 75 | if self.use_res_connect: 76 | return x + self.conv(x) 77 | else: 78 | return self.conv(x) 79 | 80 | 81 | class MobileNetV2(nn.Module): 82 | def __init__(self, opt, 83 | width_mult=1.0, 84 | round_nearest=8, 85 | block=None): 86 | """ 87 | MobileNet V2 main class 88 | Args: 89 | num_classes (int): Number of classes 90 | width_mult (float): Width multiplier - adjusts number of channels in each layer by this amount 91 | round_nearest (int): Round the number of channels in each layer to be a multiple of this number 92 | Set to 1 to turn off rounding 93 | block: Module specifying inverted residual building block for mobilenet 94 | """ 95 | super().__init__() 96 | if block is None: 97 | block = InvertedResidual 98 | input_channel = 32 99 | last_channel = 1280 100 | 101 | inverted_residual_setting = [ 102 | # t, c, n, s 103 | [1, 16, 1, 1], # 1 104 | [6, 24, 2, 2], # 2 105 | [6, 32, 3, 2], # 3 106 | [6, 64, 4, 2], # 4 107 | [6, 96, 3, 1], # 5 108 | [6, 160, 3, 2],# 6 109 | [6, 320, 1, 1],# 7 110 | ] 111 | 112 | # only check the first element, assuming user knows t,c,n,s are required 113 | if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4: 114 | raise ValueError("inverted_residual_setting should be non-empty " 115 | "or a 4-element list, got {}".format(inverted_residual_setting)) 116 | 117 | # building first layer 118 | input_channel = _make_divisible(input_channel * width_mult, round_nearest) 119 | # self.last_channel = _make_divisible(last_channel * max(1.0, width_mult), round_nearest) 120 | if opt.pre_img: 121 | print('adding pre_img layer...') 122 | self.pre_img_layer = nn.Sequential( 123 | nn.Conv2d(3, input_channel, kernel_size=3, padding=1, stride=2, bias=False), 124 | nn.BatchNorm2d(input_channel)) 125 | if opt.pre_hm: 126 | print('adding pre_hm layer...') 127 | self.pre_hm_layer = nn.Sequential( 128 | nn.Conv2d(1, input_channel, kernel_size=3, padding=1, stride=2, bias=False), 129 | nn.BatchNorm2d(input_channel)) 130 | features = [ConvBNReLU(3, input_channel, stride=2)] 131 | self.key_block = [True] 132 | all_channels = [input_channel] 133 | self.channels = [input_channel] 134 | # building inverted residual blocks 135 | for t, c, n, s in inverted_residual_setting: 136 | output_channel = _make_divisible(c * width_mult, round_nearest) 137 | for i in range(n): 138 | stride = s if i == 0 else 1 139 | features.append(block(input_channel, output_channel, stride, expand_ratio=t)) 140 | input_channel = output_channel 141 | if stride == 2: 142 | self.key_block.append(True) 143 | else: 144 | self.key_block.append(False) 145 | all_channels.append(output_channel) 146 | # building last several layers 147 | # features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size=1)) 148 | # self.key_block.append(False) 149 | # all_channels.append(self.last_channel) 150 | for i in range(len(self.key_block) - 1): 151 | if self.key_block[i + 1]: 152 | self.key_block[i] = True 153 | self.key_block[i + 1] = False 154 | self.channels.append(all_channels[i]) 155 | self.key_block[-1] = True 156 | self.channels.append(all_channels[-1]) 157 | print('channels', self.channels) 158 | # make it nn.Sequential 159 | self.features = nn.ModuleList(features) 160 | print('len(self.features)', len(self.features)) 161 | # self.channels = [, ] 162 | 163 | # weight initialization 164 | for m in self.modules(): 165 | if isinstance(m, nn.Conv2d): 166 | nn.init.kaiming_normal_(m.weight, mode='fan_out') 167 | if m.bias is not None: 168 | nn.init.zeros_(m.bias) 169 | elif isinstance(m, nn.BatchNorm2d): 170 | nn.init.ones_(m.weight) 171 | nn.init.zeros_(m.bias) 172 | elif isinstance(m, nn.Linear): 173 | nn.init.normal_(m.weight, 0, 0.01) 174 | nn.init.zeros_(m.bias) 175 | state_dict = load_state_dict_from_url(model_urls['mobilenet_v2']) 176 | self.load_state_dict(state_dict, strict=False) 177 | 178 | def forward(self, inputs, pre_img=None, pre_hm=None): 179 | x = self.features[0](inputs) 180 | if pre_img is not None: 181 | x = x + self.pre_img_layer(pre_img) 182 | if pre_hm is not None: 183 | x = x + self.pre_hm_layer(pre_hm) 184 | y = [x] 185 | for i in range(1, len(self.features)): 186 | x = self.features[i](x) 187 | # print('i, shape, is_key', i, x.shape, self.key_block[i]) 188 | if self.key_block[i]: 189 | y.append(x) 190 | return y 191 | 192 | -------------------------------------------------------------------------------- /src/lib/model/networks/backbones/resnet.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import os 6 | import math 7 | import logging 8 | import numpy as np 9 | 10 | import torch 11 | from torch import nn 12 | import torch.nn.functional as F 13 | import torch.utils.model_zoo as model_zoo 14 | 15 | BN_MOMENTUM = 0.1 16 | 17 | model_urls = { 18 | 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', 19 | 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', 20 | 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', 21 | 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', 22 | 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', 23 | } 24 | 25 | def conv3x3(in_planes, out_planes, stride=1): 26 | """3x3 convolution with padding""" 27 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 28 | padding=1, bias=False) 29 | 30 | 31 | class BasicBlock(nn.Module): 32 | expansion = 1 33 | 34 | def __init__(self, inplanes, planes, stride=1, downsample=None): 35 | super(BasicBlock, self).__init__() 36 | self.conv1 = conv3x3(inplanes, planes, stride) 37 | self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) 38 | self.relu = nn.ReLU(inplace=True) 39 | self.conv2 = conv3x3(planes, planes) 40 | self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) 41 | self.downsample = downsample 42 | self.stride = stride 43 | 44 | def forward(self, x): 45 | residual = x 46 | 47 | out = self.conv1(x) 48 | out = self.bn1(out) 49 | out = self.relu(out) 50 | 51 | out = self.conv2(out) 52 | out = self.bn2(out) 53 | 54 | if self.downsample is not None: 55 | residual = self.downsample(x) 56 | 57 | out += residual 58 | out = self.relu(out) 59 | 60 | return out 61 | 62 | 63 | class Bottleneck(nn.Module): 64 | expansion = 4 65 | 66 | def __init__(self, inplanes, planes, stride=1, downsample=None): 67 | super(Bottleneck, self).__init__() 68 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 69 | self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) 70 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 71 | padding=1, bias=False) 72 | self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM) 73 | self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, 74 | bias=False) 75 | self.bn3 = nn.BatchNorm2d(planes * self.expansion, 76 | momentum=BN_MOMENTUM) 77 | self.relu = nn.ReLU(inplace=True) 78 | self.downsample = downsample 79 | self.stride = stride 80 | 81 | def forward(self, x): 82 | residual = x 83 | 84 | out = self.conv1(x) 85 | out = self.bn1(out) 86 | out = self.relu(out) 87 | 88 | out = self.conv2(out) 89 | out = self.bn2(out) 90 | out = self.relu(out) 91 | 92 | out = self.conv3(out) 93 | out = self.bn3(out) 94 | 95 | if self.downsample is not None: 96 | residual = self.downsample(x) 97 | 98 | out += residual 99 | out = self.relu(out) 100 | 101 | return out 102 | 103 | 104 | resnet_spec = {18: (BasicBlock, [2, 2, 2, 2]), 105 | 34: (BasicBlock, [3, 4, 6, 3]), 106 | 50: (Bottleneck, [3, 4, 6, 3]), 107 | 101: (Bottleneck, [3, 4, 23, 3]), 108 | 152: (Bottleneck, [3, 8, 36, 3])} 109 | 110 | class Resnet(nn.Module): 111 | def __init__(self, opt): 112 | super().__init__() 113 | assert (not opt.pre_hm) and (not opt.pre_img) 114 | self.inplanes = 64 115 | block, layers = resnet_spec[opt.num_layers] 116 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 117 | bias=False) 118 | self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM) 119 | self.relu = nn.ReLU(inplace=True) 120 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 121 | self.layer1 = self._make_layer(block, 64, layers[0]) 122 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 123 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 124 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 125 | 126 | self.channels = [64, 64, 127 | 64 * block.expansion, 128 | 128 * block.expansion, 129 | 256 * block.expansion, 130 | 512 * block.expansion] 131 | 132 | self._init_weights(opt.num_layers) 133 | 134 | 135 | def _make_layer(self, block, planes, blocks, stride=1): 136 | downsample = None 137 | if stride != 1 or self.inplanes != planes * block.expansion: 138 | downsample = nn.Sequential( 139 | nn.Conv2d(self.inplanes, planes * block.expansion, 140 | kernel_size=1, stride=stride, bias=False), 141 | nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM), 142 | ) 143 | 144 | layers = [] 145 | layers.append(block(self.inplanes, planes, stride, downsample)) 146 | self.inplanes = planes * block.expansion 147 | for i in range(1, blocks): 148 | layers.append(block(self.inplanes, planes)) 149 | 150 | return nn.Sequential(*layers) 151 | 152 | def forward(self, x): 153 | x = self.conv1(x) 154 | x = self.bn1(x) 155 | x = self.relu(x) 156 | y = [x] 157 | x = self.maxpool(x) 158 | y.append(x) 159 | 160 | x = self.layer1(x) 161 | y.append(x) 162 | x = self.layer2(x) 163 | y.append(x) 164 | x = self.layer3(x) 165 | y.append(x) 166 | x = self.layer4(x) 167 | y.append(x) 168 | 169 | return y 170 | 171 | def _init_weights(self, num_layers): 172 | url = model_urls['resnet{}'.format(num_layers)] 173 | pretrained_state_dict = model_zoo.load_url(url) 174 | print('=> loading pretrained model {}'.format(url)) 175 | self.load_state_dict(pretrained_state_dict, strict=False) -------------------------------------------------------------------------------- /src/lib/model/networks/base_model.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import torch 6 | from torch import nn 7 | 8 | def fill_fc_weights(layers): 9 | for m in layers.modules(): 10 | if isinstance(m, nn.Conv2d): 11 | if m.bias is not None: 12 | nn.init.constant_(m.bias, 0) 13 | 14 | class BaseModel(nn.Module): 15 | def __init__(self, heads, head_convs, num_stacks, last_channel, opt=None): 16 | super(BaseModel, self).__init__() 17 | if opt is not None and opt.head_kernel != 3: 18 | print('Using head kernel:', opt.head_kernel) 19 | head_kernel = opt.head_kernel 20 | else: 21 | head_kernel = 3 22 | self.num_stacks = num_stacks 23 | self.heads = heads 24 | for head in self.heads: 25 | classes = self.heads[head] 26 | head_conv = head_convs[head] 27 | if len(head_conv) > 0: 28 | out = nn.Conv2d(head_conv[-1], classes, 29 | kernel_size=1, stride=1, padding=0, bias=True) 30 | conv = nn.Conv2d(last_channel, head_conv[0], 31 | kernel_size=head_kernel, 32 | padding=head_kernel // 2, bias=True) 33 | convs = [conv] 34 | for k in range(1, len(head_conv)): 35 | convs.append(nn.Conv2d(head_conv[k - 1], head_conv[k], 36 | kernel_size=1, bias=True)) 37 | if len(convs) == 1: 38 | fc = nn.Sequential(conv, nn.ReLU(inplace=True), out) 39 | elif len(convs) == 2: 40 | fc = nn.Sequential( 41 | convs[0], nn.ReLU(inplace=True), 42 | convs[1], nn.ReLU(inplace=True), out) 43 | elif len(convs) == 3: 44 | fc = nn.Sequential( 45 | convs[0], nn.ReLU(inplace=True), 46 | convs[1], nn.ReLU(inplace=True), 47 | convs[2], nn.ReLU(inplace=True), out) 48 | elif len(convs) == 4: 49 | fc = nn.Sequential( 50 | convs[0], nn.ReLU(inplace=True), 51 | convs[1], nn.ReLU(inplace=True), 52 | convs[2], nn.ReLU(inplace=True), 53 | convs[3], nn.ReLU(inplace=True), out) 54 | if 'hm' in head: 55 | fc[-1].bias.data.fill_(opt.prior_bias) 56 | else: 57 | fill_fc_weights(fc) 58 | else: 59 | fc = nn.Conv2d(last_channel, classes, 60 | kernel_size=1, stride=1, padding=0, bias=True) 61 | if 'hm' in head: 62 | fc.bias.data.fill_(opt.prior_bias) 63 | else: 64 | fill_fc_weights(fc) 65 | self.__setattr__(head, fc) 66 | 67 | def img2feats(self, x): 68 | raise NotImplementedError 69 | 70 | def imgpre2feats(self, x, pre_img=None, pre_hm=None): 71 | raise NotImplementedError 72 | 73 | def forward(self, x, pre_img=None, pre_hm=None): 74 | if (pre_hm is not None) or (pre_img is not None): 75 | feats = self.imgpre2feats(x, pre_img, pre_hm) 76 | else: 77 | feats = self.img2feats(x) 78 | out = [] 79 | if self.opt.model_output_list: 80 | for s in range(self.num_stacks): 81 | z = [] 82 | for head in sorted(self.heads): 83 | z.append(self.__getattr__(head)(feats[s])) 84 | out.append(z) 85 | else: 86 | for s in range(self.num_stacks): 87 | z = {} 88 | for head in self.heads: 89 | z[head] = self.__getattr__(head)(feats[s]) 90 | out.append(z) 91 | return out 92 | -------------------------------------------------------------------------------- /src/lib/model/networks/generic_network.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import torch 6 | from torch import nn 7 | from .backbones.dla import dla34 8 | from .backbones.resnet import Resnet 9 | from .backbones.mobilenet import MobileNetV2 10 | from .necks.dlaup import DLASeg 11 | from .necks.msraup import MSRAUp 12 | 13 | backbone_factory = { 14 | 'dla34': dla34, 15 | 'resnet': Resnet, 16 | 'mobilenet': MobileNetV2 17 | } 18 | 19 | neck_factory = { 20 | 'dlaup': DLASeg, 21 | 'msraup': MSRAUp 22 | } 23 | 24 | def fill_fc_weights(layers): 25 | for m in layers.modules(): 26 | if isinstance(m, nn.Conv2d): 27 | if m.bias is not None: 28 | nn.init.constant_(m.bias, 0) 29 | 30 | class GenericNetwork(nn.Module): 31 | def __init__(self, num_layers, heads, head_convs, num_stacks=1, opt=None): 32 | super(GenericNetwork, self).__init__() 33 | print('Using generic model with backbone {} and neck {}'.format( 34 | opt.backbone, opt.neck)) 35 | # assert (not opt.pre_hm) and (not opt.pre_img) 36 | if opt is not None and opt.head_kernel != 3: 37 | print('Using head kernel:', opt.head_kernel) 38 | head_kernel = opt.head_kernel 39 | else: 40 | head_kernel = 3 41 | self.opt = opt 42 | self.backbone = backbone_factory[opt.backbone](opt=opt) 43 | channels = self.backbone.channels 44 | self.neck = neck_factory[opt.neck](opt=opt, channels=channels) 45 | last_channel = self.neck.out_channel 46 | self.num_stacks = num_stacks 47 | self.heads = heads 48 | for head in self.heads: 49 | classes = self.heads[head] 50 | head_conv = head_convs[head] 51 | if len(head_conv) > 0: 52 | out = nn.Conv2d(head_conv[-1], classes, 53 | kernel_size=1, stride=1, padding=0, bias=True) 54 | conv = nn.Conv2d(last_channel, head_conv[0], 55 | kernel_size=head_kernel, 56 | padding=head_kernel // 2, bias=True) 57 | convs = [conv] 58 | for k in range(1, len(head_conv)): 59 | convs.append(nn.Conv2d(head_conv[k - 1], head_conv[k], 60 | kernel_size=1, bias=True)) 61 | if len(convs) == 1: 62 | fc = nn.Sequential(conv, nn.ReLU(inplace=True), out) 63 | elif len(convs) == 2: 64 | fc = nn.Sequential( 65 | convs[0], nn.ReLU(inplace=True), 66 | convs[1], nn.ReLU(inplace=True), out) 67 | elif len(convs) == 3: 68 | fc = nn.Sequential( 69 | convs[0], nn.ReLU(inplace=True), 70 | convs[1], nn.ReLU(inplace=True), 71 | convs[2], nn.ReLU(inplace=True), out) 72 | elif len(convs) == 4: 73 | fc = nn.Sequential( 74 | convs[0], nn.ReLU(inplace=True), 75 | convs[1], nn.ReLU(inplace=True), 76 | convs[2], nn.ReLU(inplace=True), 77 | convs[3], nn.ReLU(inplace=True), out) 78 | if 'hm' in head: 79 | fc[-1].bias.data.fill_(opt.prior_bias) 80 | else: 81 | fill_fc_weights(fc) 82 | else: 83 | fc = nn.Conv2d(last_channel, classes, 84 | kernel_size=1, stride=1, padding=0, bias=True) 85 | if 'hm' in head: 86 | fc.bias.data.fill_(opt.prior_bias) 87 | else: 88 | fill_fc_weights(fc) 89 | self.__setattr__(head, fc) 90 | 91 | def forward(self, x, pre_img=None, pre_hm=None): 92 | y = self.backbone(x, pre_img, pre_hm) 93 | feats = self.neck(y) 94 | out = [] 95 | if self.opt.model_output_list: 96 | for s in range(self.num_stacks): 97 | z = [] 98 | for head in sorted(self.heads): 99 | z.append(self.__getattr__(head)(feats[s])) 100 | out.append(z) 101 | else: 102 | for s in range(self.num_stacks): 103 | z = {} 104 | for head in self.heads: 105 | z[head] = self.__getattr__(head)(feats[s]) 106 | out.append(z) 107 | return out 108 | -------------------------------------------------------------------------------- /src/lib/model/networks/necks/dlaup.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import os 6 | import math 7 | import logging 8 | import numpy as np 9 | from os.path import join 10 | 11 | import torch 12 | from torch import nn 13 | import torch.nn.functional as F 14 | import torch.utils.model_zoo as model_zoo 15 | 16 | try: 17 | from ..DCNv2.dcn_v2 import DCN 18 | except: 19 | print('import DCN failed') 20 | DCN = None 21 | 22 | BN_MOMENTUM = 0.1 23 | 24 | class Identity(nn.Module): 25 | 26 | def __init__(self): 27 | super(Identity, self).__init__() 28 | 29 | def forward(self, x): 30 | return x 31 | 32 | 33 | def fill_fc_weights(layers): 34 | for m in layers.modules(): 35 | if isinstance(m, nn.Conv2d): 36 | if m.bias is not None: 37 | nn.init.constant_(m.bias, 0) 38 | 39 | 40 | def fill_up_weights(up): 41 | w = up.weight.data 42 | f = math.ceil(w.size(2) / 2) 43 | c = (2 * f - 1 - f % 2) / (2. * f) 44 | for i in range(w.size(2)): 45 | for j in range(w.size(3)): 46 | w[0, 0, i, j] = \ 47 | (1 - math.fabs(i / f - c)) * (1 - math.fabs(j / f - c)) 48 | for c in range(1, w.size(0)): 49 | w[c, 0, :, :] = w[0, 0, :, :] 50 | 51 | 52 | class Conv(nn.Module): 53 | def __init__(self, chi, cho): 54 | super(Conv, self).__init__() 55 | self.conv = nn.Sequential( 56 | nn.Conv2d(chi, cho, kernel_size=1, stride=1, bias=False), 57 | nn.BatchNorm2d(cho, momentum=BN_MOMENTUM), 58 | nn.ReLU(inplace=True)) 59 | 60 | def forward(self, x): 61 | return self.conv(x) 62 | 63 | 64 | class GlobalConv(nn.Module): 65 | def __init__(self, chi, cho, k=7, d=1): 66 | super(GlobalConv, self).__init__() 67 | gcl = nn.Sequential( 68 | nn.Conv2d(chi, cho, kernel_size=(k, 1), stride=1, bias=False, 69 | dilation=d, padding=(d * (k // 2), 0)), 70 | nn.Conv2d(cho, cho, kernel_size=(1, k), stride=1, bias=False, 71 | dilation=d, padding=(0, d * (k // 2)))) 72 | gcr = nn.Sequential( 73 | nn.Conv2d(chi, cho, kernel_size=(1, k), stride=1, bias=False, 74 | dilation=d, padding=(0, d * (k // 2))), 75 | nn.Conv2d(cho, cho, kernel_size=(k, 1), stride=1, bias=False, 76 | dilation=d, padding=(d * (k // 2), 0))) 77 | fill_fc_weights(gcl) 78 | fill_fc_weights(gcr) 79 | self.gcl = gcl 80 | self.gcr = gcr 81 | self.act = nn.Sequential( 82 | nn.BatchNorm2d(cho, momentum=BN_MOMENTUM), 83 | nn.ReLU(inplace=True) 84 | ) 85 | 86 | def forward(self, x): 87 | x = self.gcl(x) + self.gcr(x) 88 | x = self.act(x) 89 | return x 90 | 91 | 92 | class DeformConv(nn.Module): 93 | def __init__(self, chi, cho): 94 | super(DeformConv, self).__init__() 95 | self.actf = nn.Sequential( 96 | nn.BatchNorm2d(cho, momentum=BN_MOMENTUM), 97 | nn.ReLU(inplace=True) 98 | ) 99 | self.conv = DCN(chi, cho, kernel_size=(3,3), stride=1, padding=1, dilation=1, deformable_groups=1) 100 | 101 | def forward(self, x): 102 | x = self.conv(x) 103 | x = self.actf(x) 104 | return x 105 | 106 | 107 | class IDAUp(nn.Module): 108 | def __init__(self, o, channels, up_f, node_type=(DeformConv, DeformConv)): 109 | super(IDAUp, self).__init__() 110 | for i in range(1, len(channels)): 111 | c = channels[i] 112 | f = int(up_f[i]) 113 | proj = node_type[0](c, o) 114 | node = node_type[1](o, o) 115 | 116 | up = nn.ConvTranspose2d(o, o, f * 2, stride=f, 117 | padding=f // 2, output_padding=0, 118 | groups=o, bias=False) 119 | fill_up_weights(up) 120 | 121 | setattr(self, 'proj_' + str(i), proj) 122 | setattr(self, 'up_' + str(i), up) 123 | setattr(self, 'node_' + str(i), node) 124 | 125 | 126 | def forward(self, layers, startp, endp): 127 | for i in range(startp + 1, endp): 128 | upsample = getattr(self, 'up_' + str(i - startp)) 129 | project = getattr(self, 'proj_' + str(i - startp)) 130 | layers[i] = upsample(project(layers[i])) 131 | node = getattr(self, 'node_' + str(i - startp)) 132 | layers[i] = node(layers[i] + layers[i - 1]) 133 | 134 | 135 | 136 | class DLAUp(nn.Module): 137 | def __init__(self, startp, channels, scales, in_channels=None, 138 | node_type=DeformConv): 139 | super(DLAUp, self).__init__() 140 | self.startp = startp 141 | if in_channels is None: 142 | in_channels = channels 143 | self.channels = channels 144 | channels = list(channels) 145 | scales = np.array(scales, dtype=int) 146 | for i in range(len(channels) - 1): 147 | j = -i - 2 148 | setattr(self, 'ida_{}'.format(i), 149 | IDAUp(channels[j], in_channels[j:], 150 | scales[j:] // scales[j], 151 | node_type=node_type)) 152 | scales[j + 1:] = scales[j] 153 | in_channels[j + 1:] = [channels[j] for _ in channels[j + 1:]] 154 | 155 | def forward(self, layers): 156 | out = [layers[-1]] # start with 32 157 | for i in range(len(layers) - self.startp - 1): 158 | ida = getattr(self, 'ida_{}'.format(i)) 159 | ida(layers, len(layers) -i - 2, len(layers)) 160 | out.insert(0, layers[-1]) 161 | return out 162 | 163 | DLA_NODE = { 164 | 'dcn': (DeformConv, DeformConv), 165 | 'gcn': (Conv, GlobalConv), 166 | 'conv': (Conv, Conv), 167 | } 168 | 169 | class DLASeg(nn.Module): 170 | def __init__(self, opt, channels): 171 | super().__init__() 172 | self.opt = opt 173 | self.channels = channels 174 | self.node_type = DLA_NODE[opt.dla_node] 175 | print('Using node type:', self.node_type) 176 | down_ratio = 4 177 | self.first_level = int(np.log2(down_ratio)) 178 | self.last_level = 5 179 | 180 | scales = [2 ** i for i in range(len(channels[self.first_level:]))] 181 | self.dla_up = DLAUp( 182 | self.first_level, channels[self.first_level:], scales, 183 | node_type=self.node_type) 184 | self.out_channel = channels[self.first_level] 185 | 186 | self.ida_up = IDAUp( 187 | self.out_channel, channels[self.first_level:self.last_level], 188 | [2 ** i for i in range(self.last_level - self.first_level)], 189 | node_type=self.node_type) 190 | 191 | 192 | def forward(self, x): 193 | x = self.dla_up(x) 194 | y = [] 195 | for i in range(self.last_level - self.first_level): 196 | y.append(x[i].clone()) 197 | self.ida_up(y, 0, len(y)) 198 | 199 | return [y[-1]] 200 | -------------------------------------------------------------------------------- /src/lib/model/networks/necks/msraup.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # Modified by Dequan Wang and Xingyi Zhou 6 | # ------------------------------------------------------------------------------ 7 | 8 | from __future__ import absolute_import 9 | from __future__ import division 10 | from __future__ import print_function 11 | 12 | import os 13 | import math 14 | import logging 15 | 16 | import torch 17 | import torch.nn as nn 18 | 19 | try: 20 | from ..DCNv2.dcn_v2 import DCN 21 | except: 22 | print('import DCN failed') 23 | DCN = None 24 | 25 | 26 | BN_MOMENTUM = 0.1 27 | 28 | def fill_up_weights(up): 29 | w = up.weight.data 30 | f = math.ceil(w.size(2) / 2) 31 | c = (2 * f - 1 - f % 2) / (2. * f) 32 | for i in range(w.size(2)): 33 | for j in range(w.size(3)): 34 | w[0, 0, i, j] = \ 35 | (1 - math.fabs(i / f - c)) * (1 - math.fabs(j / f - c)) 36 | for c in range(1, w.size(0)): 37 | w[c, 0, :, :] = w[0, 0, :, :] 38 | 39 | def fill_fc_weights(layers): 40 | for m in layers.modules(): 41 | if isinstance(m, nn.Conv2d): 42 | nn.init.normal_(m.weight, std=0.001) 43 | # torch.nn.init.kaiming_normal_(m.weight.data, nonlinearity='relu') 44 | # torch.nn.init.xavier_normal_(m.weight.data) 45 | if m.bias is not None: 46 | nn.init.constant_(m.bias, 0) 47 | 48 | class MSRAUp(nn.Module): 49 | # def __init__(self, block, layers, heads, head_conv): 50 | def __init__(self, opt, channels): 51 | super().__init__() 52 | self.opt = opt 53 | assert self.opt.msra_outchannel in [64, 256] 54 | self.deconv_with_bias = False 55 | self.inplanes = channels[-1] 56 | self.out_channel = self.opt.msra_outchannel 57 | # used for deconv layers 58 | if self.opt.msra_outchannel == 64: 59 | print('Using slimed resnet: 256 128 64 up channels.') 60 | self.deconv_layers = self._make_deconv_layer( 61 | 3, 62 | [256, 128, 64], 63 | [4, 4, 4], 64 | ) 65 | else: 66 | print('Using original resnet: 256 256 256 up channels.') 67 | print('Using 256 deconvs') 68 | self.deconv_layers = self._make_deconv_layer( 69 | 3, 70 | [256, 256, 256], 71 | [4, 4, 4], 72 | ) 73 | self.init_weights() 74 | 75 | 76 | def forward(self, x): 77 | x = self.deconv_layers(x[-1]) 78 | return [x] 79 | 80 | def _get_deconv_cfg(self, deconv_kernel, index): 81 | if deconv_kernel == 4: 82 | padding = 1 83 | output_padding = 0 84 | elif deconv_kernel == 3: 85 | padding = 1 86 | output_padding = 1 87 | elif deconv_kernel == 2: 88 | padding = 0 89 | output_padding = 0 90 | 91 | return deconv_kernel, padding, output_padding 92 | 93 | def _make_deconv_layer(self, num_layers, num_filters, num_kernels): 94 | assert num_layers == len(num_filters), \ 95 | 'ERROR: num_deconv_layers is different len(num_deconv_filters)' 96 | assert num_layers == len(num_kernels), \ 97 | 'ERROR: num_deconv_layers is different len(num_deconv_filters)' 98 | 99 | layers = [] 100 | for i in range(num_layers): 101 | kernel, padding, output_padding = \ 102 | self._get_deconv_cfg(num_kernels[i], i) 103 | 104 | planes = num_filters[i] 105 | fc = DCN(self.inplanes, planes, 106 | kernel_size=(3,3), stride=1, 107 | padding=1, dilation=1, deformable_groups=1) 108 | # fc = nn.Conv2d(self.inplanes, planes, 109 | # kernel_size=3, stride=1, 110 | # padding=1, dilation=1, bias=False) 111 | # fill_fc_weights(fc) 112 | up = nn.ConvTranspose2d( 113 | in_channels=planes, 114 | out_channels=planes, 115 | kernel_size=kernel, 116 | stride=2, 117 | padding=padding, 118 | output_padding=output_padding, 119 | bias=self.deconv_with_bias) 120 | fill_up_weights(up) 121 | 122 | layers.append(fc) 123 | layers.append(nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)) 124 | layers.append(nn.ReLU(inplace=True)) 125 | layers.append(up) 126 | layers.append(nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)) 127 | layers.append(nn.ReLU(inplace=True)) 128 | self.inplanes = planes 129 | 130 | return nn.Sequential(*layers) 131 | 132 | def init_weights(self): 133 | for name, m in self.deconv_layers.named_modules(): 134 | if isinstance(m, nn.BatchNorm2d): 135 | nn.init.constant_(m.weight, 1) 136 | nn.init.constant_(m.bias, 0) 137 | -------------------------------------------------------------------------------- /src/lib/model/scatter_gather.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Variable 3 | from torch.nn.parallel._functions import Scatter, Gather 4 | 5 | 6 | def scatter(inputs, target_gpus, dim=0, chunk_sizes=None): 7 | r""" 8 | Slices variables into approximately equal chunks and 9 | distributes them across given GPUs. Duplicates 10 | references to objects that are not variables. Does not 11 | support Tensors. 12 | """ 13 | def scatter_map(obj): 14 | if isinstance(obj, Variable): 15 | return Scatter.apply(target_gpus, chunk_sizes, dim, obj) 16 | assert not torch.is_tensor(obj), "Tensors not supported in scatter." 17 | if isinstance(obj, tuple): 18 | return list(zip(*map(scatter_map, obj))) 19 | if isinstance(obj, list): 20 | return list(map(list, zip(*map(scatter_map, obj)))) 21 | if isinstance(obj, dict): 22 | return list(map(type(obj), zip(*map(scatter_map, obj.items())))) 23 | return [obj for targets in target_gpus] 24 | 25 | return scatter_map(inputs) 26 | 27 | 28 | def scatter_kwargs(inputs, kwargs, target_gpus, dim=0, chunk_sizes=None): 29 | r"""Scatter with support for kwargs dictionary""" 30 | inputs = scatter(inputs, target_gpus, dim, chunk_sizes) if inputs else [] 31 | kwargs = scatter(kwargs, target_gpus, dim, chunk_sizes) if kwargs else [] 32 | if len(inputs) < len(kwargs): 33 | inputs.extend([() for _ in range(len(kwargs) - len(inputs))]) 34 | elif len(kwargs) < len(inputs): 35 | kwargs.extend([{} for _ in range(len(inputs) - len(kwargs))]) 36 | inputs = tuple(inputs) 37 | kwargs = tuple(kwargs) 38 | return inputs, kwargs 39 | -------------------------------------------------------------------------------- /src/lib/model/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import torch 6 | import torch.nn as nn 7 | 8 | def _sigmoid(x): 9 | y = torch.clamp(x.sigmoid_(), min=1e-4, max=1-1e-4) 10 | return y 11 | 12 | def _sigmoid12(x): 13 | y = torch.clamp(x.sigmoid_(), 1e-12) 14 | return y 15 | 16 | def _gather_feat(feat, ind): 17 | dim = feat.size(2) 18 | ind = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim) 19 | feat = feat.gather(1, ind) 20 | return feat 21 | 22 | def _tranpose_and_gather_feat(feat, ind): 23 | feat = feat.permute(0, 2, 3, 1).contiguous() 24 | feat = feat.view(feat.size(0), -1, feat.size(3)) 25 | feat = _gather_feat(feat, ind) 26 | return feat 27 | 28 | def flip_tensor(x): 29 | return torch.flip(x, [3]) 30 | # tmp = x.detach().cpu().numpy()[..., ::-1].copy() 31 | # return torch.from_numpy(tmp).to(x.device) 32 | 33 | def flip_lr(x, flip_idx): 34 | tmp = x.detach().cpu().numpy()[..., ::-1].copy() 35 | shape = tmp.shape 36 | for e in flip_idx: 37 | tmp[:, e[0], ...], tmp[:, e[1], ...] = \ 38 | tmp[:, e[1], ...].copy(), tmp[:, e[0], ...].copy() 39 | return torch.from_numpy(tmp.reshape(shape)).to(x.device) 40 | 41 | def flip_lr_off(x, flip_idx): 42 | tmp = x.detach().cpu().numpy()[..., ::-1].copy() 43 | shape = tmp.shape 44 | tmp = tmp.reshape(tmp.shape[0], 17, 2, 45 | tmp.shape[2], tmp.shape[3]) 46 | tmp[:, :, 0, :, :] *= -1 47 | for e in flip_idx: 48 | tmp[:, e[0], ...], tmp[:, e[1], ...] = \ 49 | tmp[:, e[1], ...].copy(), tmp[:, e[0], ...].copy() 50 | return torch.from_numpy(tmp.reshape(shape)).to(x.device) 51 | 52 | def _nms(heat, kernel=3): 53 | pad = (kernel - 1) // 2 54 | 55 | hmax = nn.functional.max_pool2d( 56 | heat, (kernel, kernel), stride=1, padding=pad) 57 | keep = (hmax == heat).float() 58 | return heat * keep 59 | 60 | def _topk_channel(scores, K=100): 61 | batch, cat, height, width = scores.size() 62 | 63 | topk_scores, topk_inds = torch.topk(scores.view(batch, cat, -1), K) 64 | 65 | topk_inds = topk_inds % (height * width) 66 | topk_ys = (topk_inds / width).int().float() 67 | topk_xs = (topk_inds % width).int().float() 68 | 69 | return topk_scores, topk_inds, topk_ys, topk_xs 70 | 71 | def _topk(scores, K=100): 72 | batch, cat, height, width = scores.size() 73 | 74 | topk_scores, topk_inds = torch.topk(scores.view(batch, cat, -1), K) 75 | 76 | topk_inds = topk_inds % (height * width) 77 | topk_ys = (topk_inds / width).int().float() 78 | topk_xs = (topk_inds % width).int().float() 79 | 80 | topk_score, topk_ind = torch.topk(topk_scores.view(batch, -1), K) 81 | topk_clses = (topk_ind / K).int() 82 | topk_inds = _gather_feat( 83 | topk_inds.view(batch, -1, 1), topk_ind).view(batch, K) 84 | topk_ys = _gather_feat(topk_ys.view(batch, -1, 1), topk_ind).view(batch, K) 85 | topk_xs = _gather_feat(topk_xs.view(batch, -1, 1), topk_ind).view(batch, K) 86 | 87 | return topk_score, topk_inds, topk_clses, topk_ys, topk_xs 88 | -------------------------------------------------------------------------------- /src/lib/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyizhou/CenterTrack/e4e7534cc2ebfbd31e0cde680988f286c65fe34f/src/lib/utils/__init__.py -------------------------------------------------------------------------------- /src/lib/utils/ddd_utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import numpy as np 6 | import cv2 7 | 8 | def comput_corners_3d(dim, rotation_y): 9 | # dim: 3 10 | # location: 3 11 | # rotation_y: 1 12 | # return: 8 x 3 13 | c, s = np.cos(rotation_y), np.sin(rotation_y) 14 | R = np.array([[c, 0, s], [0, 1, 0], [-s, 0, c]], dtype=np.float32) 15 | l, w, h = dim[2], dim[1], dim[0] 16 | x_corners = [l/2, l/2, -l/2, -l/2, l/2, l/2, -l/2, -l/2] 17 | y_corners = [0,0,0,0,-h,-h,-h,-h] 18 | z_corners = [w/2, -w/2, -w/2, w/2, w/2, -w/2, -w/2, w/2] 19 | 20 | corners = np.array([x_corners, y_corners, z_corners], dtype=np.float32) 21 | corners_3d = np.dot(R, corners).transpose(1, 0) 22 | return corners_3d 23 | 24 | def compute_box_3d(dim, location, rotation_y): 25 | # dim: 3 26 | # location: 3 27 | # rotation_y: 1 28 | # return: 8 x 3 29 | corners_3d = comput_corners_3d(dim, rotation_y) 30 | corners_3d = corners_3d + np.array(location, dtype=np.float32).reshape(1, 3) 31 | return corners_3d 32 | 33 | def project_to_image(pts_3d, P): 34 | # pts_3d: n x 3 35 | # P: 3 x 4 36 | # return: n x 2 37 | pts_3d_homo = np.concatenate( 38 | [pts_3d, np.ones((pts_3d.shape[0], 1), dtype=np.float32)], axis=1) 39 | pts_2d = np.dot(P, pts_3d_homo.transpose(1, 0)).transpose(1, 0) 40 | pts_2d = pts_2d[:, :2] / pts_2d[:, 2:] 41 | # import pdb; pdb.set_trace() 42 | return pts_2d 43 | 44 | def compute_orientation_3d(dim, location, rotation_y): 45 | # dim: 3 46 | # location: 3 47 | # rotation_y: 1 48 | # return: 2 x 3 49 | c, s = np.cos(rotation_y), np.sin(rotation_y) 50 | R = np.array([[c, 0, s], [0, 1, 0], [-s, 0, c]], dtype=np.float32) 51 | orientation_3d = np.array([[0, dim[2]], [0, 0], [0, 0]], dtype=np.float32) 52 | orientation_3d = np.dot(R, orientation_3d) 53 | orientation_3d = orientation_3d + \ 54 | np.array(location, dtype=np.float32).reshape(3, 1) 55 | return orientation_3d.transpose(1, 0) 56 | 57 | def draw_box_3d(image, corners, c=(255, 0, 255), same_color=False): 58 | face_idx = [[0,1,5,4], 59 | [1,2,6, 5], 60 | [3,0,4,7], 61 | [2,3,7,6]] 62 | right_corners = [1, 2, 6, 5] if not same_color else [] 63 | left_corners = [0, 3, 7, 4] if not same_color else [] 64 | thickness = 4 if same_color else 2 65 | corners = corners.astype(np.int32) 66 | for ind_f in range(3, -1, -1): 67 | f = face_idx[ind_f] 68 | for j in range(4): 69 | # print('corners', corners) 70 | cc = c 71 | if (f[j] in left_corners) and (f[(j+1)%4] in left_corners): 72 | cc = (255, 0, 0) 73 | if (f[j] in right_corners) and (f[(j+1)%4] in right_corners): 74 | cc = (0, 0, 255) 75 | try: 76 | cv2.line(image, (corners[f[j], 0], corners[f[j], 1]), 77 | (corners[f[(j+1)%4], 0], corners[f[(j+1)%4], 1]), cc, thickness, lineType=cv2.LINE_AA) 78 | except: 79 | pass 80 | if ind_f == 0: 81 | try: 82 | cv2.line(image, (corners[f[0], 0], corners[f[0], 1]), 83 | (corners[f[2], 0], corners[f[2], 1]), c, 1, lineType=cv2.LINE_AA) 84 | cv2.line(image, (corners[f[1], 0], corners[f[1], 1]), 85 | (corners[f[3], 0], corners[f[3], 1]), c, 1, lineType=cv2.LINE_AA) 86 | except: 87 | pass 88 | # top_idx = [0, 1, 2, 3] 89 | return image 90 | 91 | def unproject_2d_to_3d(pt_2d, depth, P): 92 | # pts_2d: 2 93 | # depth: 1 94 | # P: 3 x 4 95 | # return: 3 96 | z = depth - P[2, 3] 97 | x = (pt_2d[0] * depth - P[0, 3] - P[0, 2] * z) / P[0, 0] 98 | y = (pt_2d[1] * depth - P[1, 3] - P[1, 2] * z) / P[1, 1] 99 | pt_3d = np.array([x, y, z], dtype=np.float32).reshape(3) 100 | return pt_3d 101 | 102 | def alpha2rot_y(alpha, x, cx, fx): 103 | """ 104 | Get rotation_y by alpha + theta - 180 105 | alpha : Observation angle of object, ranging [-pi..pi] 106 | x : Object center x to the camera center (x-W/2), in pixels 107 | rotation_y : Rotation ry around Y-axis in camera coordinates [-pi..pi] 108 | """ 109 | rot_y = alpha + np.arctan2(x - cx, fx) 110 | if rot_y > np.pi: 111 | rot_y -= 2 * np.pi 112 | if rot_y < -np.pi: 113 | rot_y += 2 * np.pi 114 | return rot_y 115 | 116 | def rot_y2alpha(rot_y, x, cx, fx): 117 | """ 118 | Get rotation_y by alpha + theta - 180 119 | alpha : Observation angle of object, ranging [-pi..pi] 120 | x : Object center x to the camera center (x-W/2), in pixels 121 | rotation_y : Rotation ry around Y-axis in camera coordinates [-pi..pi] 122 | """ 123 | alpha = rot_y - np.arctan2(x - cx, fx) 124 | if alpha > np.pi: 125 | alpha -= 2 * np.pi 126 | if alpha < -np.pi: 127 | alpha += 2 * np.pi 128 | return alpha 129 | 130 | 131 | def ddd2locrot(center, alpha, dim, depth, calib): 132 | # single image 133 | locations = unproject_2d_to_3d(center, depth, calib) 134 | locations[1] += dim[0] / 2 135 | rotation_y = alpha2rot_y(alpha, center[0], calib[0, 2], calib[0, 0]) 136 | return locations, rotation_y 137 | 138 | def project_3d_bbox(location, dim, rotation_y, calib): 139 | box_3d = compute_box_3d(dim, location, rotation_y) 140 | box_2d = project_to_image(box_3d, calib) 141 | return box_2d 142 | 143 | 144 | if __name__ == '__main__': 145 | calib = np.array( 146 | [[7.070493000000e+02, 0.000000000000e+00, 6.040814000000e+02, 4.575831000000e+01], 147 | [0.000000000000e+00, 7.070493000000e+02, 1.805066000000e+02, -3.454157000000e-01], 148 | [0.000000000000e+00, 0.000000000000e+00, 1.000000000000e+00, 4.981016000000e-03]], 149 | dtype=np.float32) 150 | alpha = -0.20 151 | tl = np.array([712.40, 143.00], dtype=np.float32) 152 | br = np.array([810.73, 307.92], dtype=np.float32) 153 | ct = (tl + br) / 2 154 | rotation_y = 0.01 155 | print('alpha2rot_y', alpha2rot_y(alpha, ct[0], calib[0, 2], calib[0, 0])) 156 | print('rotation_y', rotation_y) 157 | -------------------------------------------------------------------------------- /src/lib/utils/image.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # Modified by Xingyi Zhou 6 | # ------------------------------------------------------------------------------ 7 | 8 | from __future__ import absolute_import 9 | from __future__ import division 10 | from __future__ import print_function 11 | 12 | import numpy as np 13 | import cv2 14 | import random 15 | 16 | def flip(img): 17 | return img[:, :, ::-1].copy() 18 | 19 | # @numba.jit(nopython=True, nogil=True) 20 | def transform_preds_with_trans(coords, trans): 21 | # target_coords = np.concatenate( 22 | # [coords, np.ones((coords.shape[0], 1), np.float32)], axis=1) 23 | target_coords = np.ones((coords.shape[0], 3), np.float32) 24 | target_coords[:, :2] = coords 25 | target_coords = np.dot(trans, target_coords.transpose()).transpose() 26 | return target_coords[:, :2] 27 | 28 | 29 | def transform_preds(coords, center, scale, output_size): 30 | target_coords = np.zeros(coords.shape) 31 | trans = get_affine_transform(center, scale, 0, output_size, inv=1) 32 | for p in range(coords.shape[0]): 33 | target_coords[p, 0:2] = affine_transform(coords[p, 0:2], trans) 34 | return target_coords 35 | 36 | 37 | def get_affine_transform(center, 38 | scale, 39 | rot, 40 | output_size, 41 | shift=np.array([0, 0], dtype=np.float32), 42 | inv=0): 43 | if not isinstance(scale, np.ndarray) and not isinstance(scale, list): 44 | scale = np.array([scale, scale], dtype=np.float32) 45 | 46 | scale_tmp = scale 47 | src_w = scale_tmp[0] 48 | dst_w = output_size[0] 49 | dst_h = output_size[1] 50 | 51 | rot_rad = np.pi * rot / 180 52 | src_dir = get_dir([0, src_w * -0.5], rot_rad) 53 | dst_dir = np.array([0, dst_w * -0.5], np.float32) 54 | 55 | src = np.zeros((3, 2), dtype=np.float32) 56 | dst = np.zeros((3, 2), dtype=np.float32) 57 | src[0, :] = center + scale_tmp * shift 58 | src[1, :] = center + src_dir + scale_tmp * shift 59 | dst[0, :] = [dst_w * 0.5, dst_h * 0.5] 60 | dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5], np.float32) + dst_dir 61 | 62 | src[2:, :] = get_3rd_point(src[0, :], src[1, :]) 63 | dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :]) 64 | 65 | if inv: 66 | trans = cv2.getAffineTransform(np.float32(dst), np.float32(src)) 67 | else: 68 | trans = cv2.getAffineTransform(np.float32(src), np.float32(dst)) 69 | 70 | return trans 71 | 72 | 73 | def affine_transform(pt, t): 74 | new_pt = np.array([pt[0], pt[1], 1.], dtype=np.float32).T 75 | new_pt = np.dot(t, new_pt) 76 | return new_pt[:2] 77 | 78 | 79 | def get_3rd_point(a, b): 80 | direct = a - b 81 | return b + np.array([-direct[1], direct[0]], dtype=np.float32) 82 | 83 | 84 | def get_dir(src_point, rot_rad): 85 | sn, cs = np.sin(rot_rad), np.cos(rot_rad) 86 | 87 | src_result = [0, 0] 88 | src_result[0] = src_point[0] * cs - src_point[1] * sn 89 | src_result[1] = src_point[0] * sn + src_point[1] * cs 90 | 91 | return src_result 92 | 93 | 94 | def crop(img, center, scale, output_size, rot=0): 95 | trans = get_affine_transform(center, scale, rot, output_size) 96 | 97 | dst_img = cv2.warpAffine(img, 98 | trans, 99 | (int(output_size[0]), int(output_size[1])), 100 | flags=cv2.INTER_LINEAR) 101 | 102 | return dst_img 103 | 104 | # @numba.jit(nopython=True, nogil=True) 105 | def gaussian_radius(det_size, min_overlap=0.7): 106 | height, width = det_size 107 | 108 | a1 = 1 109 | b1 = (height + width) 110 | c1 = width * height * (1 - min_overlap) / (1 + min_overlap) 111 | sq1 = np.sqrt(b1 ** 2 - 4 * a1 * c1) 112 | r1 = (b1 + sq1) / 2 113 | 114 | a2 = 4 115 | b2 = 2 * (height + width) 116 | c2 = (1 - min_overlap) * width * height 117 | sq2 = np.sqrt(b2 ** 2 - 4 * a2 * c2) 118 | r2 = (b2 + sq2) / 2 119 | 120 | a3 = 4 * min_overlap 121 | b3 = -2 * min_overlap * (height + width) 122 | c3 = (min_overlap - 1) * width * height 123 | sq3 = np.sqrt(b3 ** 2 - 4 * a3 * c3) 124 | r3 = (b3 + sq3) / 2 125 | return min(r1, r2, r3) 126 | 127 | 128 | # @numba.jit(nopython=True, nogil=True) 129 | def gaussian2D(shape, sigma=1): 130 | m, n = [(ss - 1.) / 2. for ss in shape] 131 | y, x = np.ogrid[-m:m+1,-n:n+1] 132 | # y, x = np.arange(-m, m + 1).reshape(-1, 1), np.arange(-n, n + 1).reshape(1, -1) 133 | h = np.exp(-(x * x + y * y) / (2 * sigma * sigma)) 134 | h[h < np.finfo(h.dtype).eps * h.max()] = 0 135 | return h 136 | 137 | # @numba.jit(nopython=True, nogil=True) 138 | def draw_umich_gaussian(heatmap, center, radius, k=1): 139 | # import pdb; pdb.set_trace() 140 | diameter = 2 * radius + 1 141 | gaussian = gaussian2D((diameter, diameter), sigma=diameter / 6) 142 | 143 | x, y = int(center[0]), int(center[1]) 144 | 145 | height, width = heatmap.shape[0:2] 146 | 147 | left, right = min(x, radius), min(width - x, radius + 1) 148 | top, bottom = min(y, radius), min(height - y, radius + 1) 149 | # import pdb; pdb.set_trace() 150 | masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right] 151 | masked_gaussian = gaussian[radius - top:radius + bottom, radius - left:radius + right] 152 | if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0: # TODO debug 153 | np.maximum(masked_heatmap, masked_gaussian * k, out=masked_heatmap) 154 | return heatmap 155 | 156 | def draw_dense_reg(regmap, heatmap, center, value, radius, is_offset=False): 157 | diameter = 2 * radius + 1 158 | gaussian = gaussian2D((diameter, diameter), sigma=diameter / 6) 159 | value = np.array(value, dtype=np.float32).reshape(-1, 1, 1) 160 | dim = value.shape[0] 161 | reg = np.ones((dim, diameter*2+1, diameter*2+1), dtype=np.float32) * value 162 | if is_offset and dim == 2: 163 | delta = np.arange(diameter*2+1) - radius 164 | reg[0] = reg[0] - delta.reshape(1, -1) 165 | reg[1] = reg[1] - delta.reshape(-1, 1) 166 | 167 | x, y = int(center[0]), int(center[1]) 168 | 169 | height, width = heatmap.shape[0:2] 170 | 171 | left, right = min(x, radius), min(width - x, radius + 1) 172 | top, bottom = min(y, radius), min(height - y, radius + 1) 173 | 174 | masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right] 175 | masked_regmap = regmap[:, y - top:y + bottom, x - left:x + right] 176 | masked_gaussian = gaussian[radius - top:radius + bottom, 177 | radius - left:radius + right] 178 | masked_reg = reg[:, radius - top:radius + bottom, 179 | radius - left:radius + right] 180 | if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0: # TODO debug 181 | idx = (masked_gaussian >= masked_heatmap).reshape( 182 | 1, masked_gaussian.shape[0], masked_gaussian.shape[1]) 183 | masked_regmap = (1-idx) * masked_regmap + idx * masked_reg 184 | regmap[:, y - top:y + bottom, x - left:x + right] = masked_regmap 185 | return regmap 186 | 187 | 188 | def draw_msra_gaussian(heatmap, center, sigma): 189 | tmp_size = sigma * 3 190 | mu_x = int(center[0] + 0.5) 191 | mu_y = int(center[1] + 0.5) 192 | w, h = heatmap.shape[0], heatmap.shape[1] 193 | ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)] 194 | br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)] 195 | if ul[0] >= h or ul[1] >= w or br[0] < 0 or br[1] < 0: 196 | return heatmap 197 | size = 2 * tmp_size + 1 198 | x = np.arange(0, size, 1, np.float32) 199 | y = x[:, np.newaxis] 200 | x0 = y0 = size // 2 201 | g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma ** 2)) 202 | g_x = max(0, -ul[0]), min(br[0], h) - ul[0] 203 | g_y = max(0, -ul[1]), min(br[1], w) - ul[1] 204 | img_x = max(0, ul[0]), min(br[0], h) 205 | img_y = max(0, ul[1]), min(br[1], w) 206 | heatmap[img_y[0]:img_y[1], img_x[0]:img_x[1]] = np.maximum( 207 | heatmap[img_y[0]:img_y[1], img_x[0]:img_x[1]], 208 | g[g_y[0]:g_y[1], g_x[0]:g_x[1]]) 209 | return heatmap 210 | 211 | def grayscale(image): 212 | return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) 213 | 214 | def lighting_(data_rng, image, alphastd, eigval, eigvec): 215 | alpha = data_rng.normal(scale=alphastd, size=(3, )) 216 | image += np.dot(eigvec, eigval * alpha) 217 | 218 | def blend_(alpha, image1, image2): 219 | image1 *= alpha 220 | image2 *= (1 - alpha) 221 | image1 += image2 222 | 223 | def saturation_(data_rng, image, gs, gs_mean, var): 224 | alpha = 1. + data_rng.uniform(low=-var, high=var) 225 | blend_(alpha, image, gs[:, :, None]) 226 | 227 | def brightness_(data_rng, image, gs, gs_mean, var): 228 | alpha = 1. + data_rng.uniform(low=-var, high=var) 229 | image *= alpha 230 | 231 | def contrast_(data_rng, image, gs, gs_mean, var): 232 | alpha = 1. + data_rng.uniform(low=-var, high=var) 233 | blend_(alpha, image, gs_mean) 234 | 235 | def color_aug(data_rng, image, eig_val, eig_vec): 236 | functions = [brightness_, contrast_, saturation_] 237 | random.shuffle(functions) 238 | 239 | gs = grayscale(image) 240 | gs_mean = gs.mean() 241 | for f in functions: 242 | f(data_rng, image, gs, gs_mean, 0.4) 243 | lighting_(data_rng, image, 0.1, eig_val, eig_vec) 244 | -------------------------------------------------------------------------------- /src/lib/utils/post_process.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import numpy as np 6 | import cv2 7 | from .image import transform_preds_with_trans, get_affine_transform 8 | from .ddd_utils import ddd2locrot, comput_corners_3d 9 | from .ddd_utils import project_to_image, rot_y2alpha 10 | import numba 11 | 12 | def get_alpha(rot): 13 | # output: (B, 8) [bin1_cls[0], bin1_cls[1], bin1_sin, bin1_cos, 14 | # bin2_cls[0], bin2_cls[1], bin2_sin, bin2_cos] 15 | # return rot[:, 0] 16 | idx = rot[:, 1] > rot[:, 5] 17 | alpha1 = np.arctan2(rot[:, 2], rot[:, 3]) + (-0.5 * np.pi) 18 | alpha2 = np.arctan2(rot[:, 6], rot[:, 7]) + ( 0.5 * np.pi) 19 | return alpha1 * idx + alpha2 * (1 - idx) 20 | 21 | def generic_post_process( 22 | opt, dets, c, s, h, w, num_classes, calibs=None, height=-1, width=-1): 23 | if not ('scores' in dets): 24 | return [{}], [{}] 25 | ret = [] 26 | 27 | for i in range(len(dets['scores'])): 28 | preds = [] 29 | trans = get_affine_transform( 30 | c[i], s[i], 0, (w, h), inv=1).astype(np.float32) 31 | for j in range(len(dets['scores'][i])): 32 | if dets['scores'][i][j] < opt.out_thresh: 33 | break 34 | item = {} 35 | item['score'] = dets['scores'][i][j] 36 | item['class'] = int(dets['clses'][i][j]) + 1 37 | item['ct'] = transform_preds_with_trans( 38 | (dets['cts'][i][j]).reshape(1, 2), trans).reshape(2) 39 | 40 | if 'tracking' in dets: 41 | tracking = transform_preds_with_trans( 42 | (dets['tracking'][i][j] + dets['cts'][i][j]).reshape(1, 2), 43 | trans).reshape(2) 44 | item['tracking'] = tracking - item['ct'] 45 | 46 | if 'bboxes' in dets: 47 | bbox = transform_preds_with_trans( 48 | dets['bboxes'][i][j].reshape(2, 2), trans).reshape(4) 49 | item['bbox'] = bbox 50 | 51 | if 'hps' in dets: 52 | pts = transform_preds_with_trans( 53 | dets['hps'][i][j].reshape(-1, 2), trans).reshape(-1) 54 | item['hps'] = pts 55 | 56 | if 'dep' in dets and len(dets['dep'][i]) > j: 57 | item['dep'] = dets['dep'][i][j] 58 | 59 | if 'dim' in dets and len(dets['dim'][i]) > j: 60 | item['dim'] = dets['dim'][i][j] 61 | 62 | if 'rot' in dets and len(dets['rot'][i]) > j: 63 | item['alpha'] = get_alpha(dets['rot'][i][j:j+1])[0] 64 | 65 | if 'rot' in dets and 'dep' in dets and 'dim' in dets \ 66 | and len(dets['dep'][i]) > j: 67 | if 'amodel_offset' in dets and len(dets['amodel_offset'][i]) > j: 68 | ct_output = dets['bboxes'][i][j].reshape(2, 2).mean(axis=0) 69 | amodel_ct_output = ct_output + dets['amodel_offset'][i][j] 70 | ct = transform_preds_with_trans( 71 | amodel_ct_output.reshape(1, 2), trans).reshape(2).tolist() 72 | else: 73 | bbox = item['bbox'] 74 | ct = [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2] 75 | item['ct'] = ct 76 | item['loc'], item['rot_y'] = ddd2locrot( 77 | ct, item['alpha'], item['dim'], item['dep'], calibs[i]) 78 | 79 | preds.append(item) 80 | 81 | if 'nuscenes_att' in dets: 82 | for j in range(len(preds)): 83 | preds[j]['nuscenes_att'] = dets['nuscenes_att'][i][j] 84 | 85 | if 'velocity' in dets: 86 | for j in range(len(preds)): 87 | preds[j]['velocity'] = dets['velocity'][i][j] 88 | 89 | ret.append(preds) 90 | 91 | return ret -------------------------------------------------------------------------------- /src/lib/utils/tracker.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.utils.linear_assignment_ import linear_assignment 3 | from numba import jit 4 | import copy 5 | 6 | class Tracker(object): 7 | def __init__(self, opt): 8 | self.opt = opt 9 | self.reset() 10 | 11 | def init_track(self, results): 12 | for item in results: 13 | if item['score'] > self.opt.new_thresh: 14 | self.id_count += 1 15 | # active and age are never used in the paper 16 | item['active'] = 1 17 | item['age'] = 1 18 | item['tracking_id'] = self.id_count 19 | if not ('ct' in item): 20 | bbox = item['bbox'] 21 | item['ct'] = [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2] 22 | self.tracks.append(item) 23 | 24 | def reset(self): 25 | self.id_count = 0 26 | self.tracks = [] 27 | 28 | def step(self, results, public_det=None): 29 | N = len(results) 30 | M = len(self.tracks) 31 | 32 | dets = np.array( 33 | [det['ct'] + det['tracking'] for det in results], np.float32) # N x 2 34 | track_size = np.array([((track['bbox'][2] - track['bbox'][0]) * \ 35 | (track['bbox'][3] - track['bbox'][1])) \ 36 | for track in self.tracks], np.float32) # M 37 | track_cat = np.array([track['class'] for track in self.tracks], np.int32) # M 38 | item_size = np.array([((item['bbox'][2] - item['bbox'][0]) * \ 39 | (item['bbox'][3] - item['bbox'][1])) \ 40 | for item in results], np.float32) # N 41 | item_cat = np.array([item['class'] for item in results], np.int32) # N 42 | tracks = np.array( 43 | [pre_det['ct'] for pre_det in self.tracks], np.float32) # M x 2 44 | dist = (((tracks.reshape(1, -1, 2) - \ 45 | dets.reshape(-1, 1, 2)) ** 2).sum(axis=2)) # N x M 46 | 47 | invalid = ((dist > track_size.reshape(1, M)) + \ 48 | (dist > item_size.reshape(N, 1)) + \ 49 | (item_cat.reshape(N, 1) != track_cat.reshape(1, M))) > 0 50 | dist = dist + invalid * 1e18 51 | 52 | if self.opt.hungarian: 53 | item_score = np.array([item['score'] for item in results], np.float32) # N 54 | dist[dist > 1e18] = 1e18 55 | matched_indices = linear_assignment(dist) 56 | else: 57 | matched_indices = greedy_assignment(copy.deepcopy(dist)) 58 | unmatched_dets = [d for d in range(dets.shape[0]) \ 59 | if not (d in matched_indices[:, 0])] 60 | unmatched_tracks = [d for d in range(tracks.shape[0]) \ 61 | if not (d in matched_indices[:, 1])] 62 | 63 | if self.opt.hungarian: 64 | matches = [] 65 | for m in matched_indices: 66 | if dist[m[0], m[1]] > 1e16: 67 | unmatched_dets.append(m[0]) 68 | unmatched_tracks.append(m[1]) 69 | else: 70 | matches.append(m) 71 | matches = np.array(matches).reshape(-1, 2) 72 | else: 73 | matches = matched_indices 74 | 75 | ret = [] 76 | for m in matches: 77 | track = results[m[0]] 78 | track['tracking_id'] = self.tracks[m[1]]['tracking_id'] 79 | track['age'] = 1 80 | track['active'] = self.tracks[m[1]]['active'] + 1 81 | ret.append(track) 82 | 83 | if self.opt.public_det and len(unmatched_dets) > 0: 84 | # Public detection: only create tracks from provided detections 85 | pub_dets = np.array([d['ct'] for d in public_det], np.float32) 86 | dist3 = ((dets.reshape(-1, 1, 2) - pub_dets.reshape(1, -1, 2)) ** 2).sum( 87 | axis=2) 88 | matched_dets = [d for d in range(dets.shape[0]) \ 89 | if not (d in unmatched_dets)] 90 | dist3[matched_dets] = 1e18 91 | for j in range(len(pub_dets)): 92 | i = dist3[:, j].argmin() 93 | if dist3[i, j] < item_size[i]: 94 | dist3[i, :] = 1e18 95 | track = results[i] 96 | if track['score'] > self.opt.new_thresh: 97 | self.id_count += 1 98 | track['tracking_id'] = self.id_count 99 | track['age'] = 1 100 | track['active'] = 1 101 | ret.append(track) 102 | else: 103 | # Private detection: create tracks for all un-matched detections 104 | for i in unmatched_dets: 105 | track = results[i] 106 | if track['score'] > self.opt.new_thresh: 107 | self.id_count += 1 108 | track['tracking_id'] = self.id_count 109 | track['age'] = 1 110 | track['active'] = 1 111 | ret.append(track) 112 | 113 | for i in unmatched_tracks: 114 | track = self.tracks[i] 115 | if track['age'] < self.opt.max_age: 116 | track['age'] += 1 117 | track['active'] = 0 118 | bbox = track['bbox'] 119 | ct = track['ct'] 120 | v = [0, 0] 121 | track['bbox'] = [ 122 | bbox[0] + v[0], bbox[1] + v[1], 123 | bbox[2] + v[0], bbox[3] + v[1]] 124 | track['ct'] = [ct[0] + v[0], ct[1] + v[1]] 125 | ret.append(track) 126 | self.tracks = ret 127 | return ret 128 | 129 | def greedy_assignment(dist): 130 | matched_indices = [] 131 | if dist.shape[1] == 0: 132 | return np.array(matched_indices, np.int32).reshape(-1, 2) 133 | for i in range(dist.shape[0]): 134 | j = dist[i].argmin() 135 | if dist[i][j] < 1e16: 136 | dist[:, j] = 1e18 137 | matched_indices.append([i, j]) 138 | return np.array(matched_indices, np.int32).reshape(-1, 2) 139 | -------------------------------------------------------------------------------- /src/lib/utils/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import torch 6 | 7 | class AverageMeter(object): 8 | """Computes and stores the average and current value""" 9 | def __init__(self): 10 | self.reset() 11 | 12 | def reset(self): 13 | self.val = 0 14 | self.avg = 0 15 | self.sum = 0 16 | self.count = 0 17 | 18 | def update(self, val, n=1): 19 | self.val = val 20 | self.sum += val * n 21 | self.count += n 22 | if self.count > 0: 23 | self.avg = self.sum / self.count -------------------------------------------------------------------------------- /src/main.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import _init_paths 6 | import os 7 | 8 | import torch 9 | import torch.utils.data 10 | from opts import opts 11 | from model.model import create_model, load_model, save_model 12 | from model.data_parallel import DataParallel 13 | from logger import Logger 14 | from dataset.dataset_factory import get_dataset 15 | from trainer import Trainer 16 | 17 | def get_optimizer(opt, model): 18 | if opt.optim == 'adam': 19 | optimizer = torch.optim.Adam(model.parameters(), opt.lr) 20 | elif opt.optim == 'sgd': 21 | print('Using SGD') 22 | optimizer = torch.optim.SGD( 23 | model.parameters(), opt.lr, momentum=0.9, weight_decay=0.0001) 24 | else: 25 | assert 0, opt.optim 26 | return optimizer 27 | 28 | def main(opt): 29 | torch.manual_seed(opt.seed) 30 | torch.backends.cudnn.benchmark = not opt.not_cuda_benchmark and not opt.test 31 | Dataset = get_dataset(opt.dataset) 32 | opt = opts().update_dataset_info_and_set_heads(opt, Dataset) 33 | print(opt) 34 | if not opt.not_set_cuda_env: 35 | os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str 36 | opt.device = torch.device('cuda' if opt.gpus[0] >= 0 else 'cpu') 37 | logger = Logger(opt) 38 | 39 | print('Creating model...') 40 | model = create_model(opt.arch, opt.heads, opt.head_conv, opt=opt) 41 | optimizer = get_optimizer(opt, model) 42 | start_epoch = 0 43 | if opt.load_model != '': 44 | model, optimizer, start_epoch = load_model( 45 | model, opt.load_model, opt, optimizer) 46 | 47 | trainer = Trainer(opt, model, optimizer) 48 | trainer.set_device(opt.gpus, opt.chunk_sizes, opt.device) 49 | 50 | if opt.val_intervals < opt.num_epochs or opt.test: 51 | print('Setting up validation data...') 52 | val_loader = torch.utils.data.DataLoader( 53 | Dataset(opt, 'val'), batch_size=1, shuffle=False, num_workers=1, 54 | pin_memory=True) 55 | 56 | if opt.test: 57 | _, preds = trainer.val(0, val_loader) 58 | val_loader.dataset.run_eval(preds, opt.save_dir) 59 | return 60 | 61 | print('Setting up train data...') 62 | train_loader = torch.utils.data.DataLoader( 63 | Dataset(opt, 'train'), batch_size=opt.batch_size, shuffle=True, 64 | num_workers=opt.num_workers, pin_memory=True, drop_last=True 65 | ) 66 | 67 | print('Starting training...') 68 | for epoch in range(start_epoch + 1, opt.num_epochs + 1): 69 | mark = epoch if opt.save_all else 'last' 70 | log_dict_train, _ = trainer.train(epoch, train_loader) 71 | logger.write('epoch: {} |'.format(epoch)) 72 | for k, v in log_dict_train.items(): 73 | logger.scalar_summary('train_{}'.format(k), v, epoch) 74 | logger.write('{} {:8f} | '.format(k, v)) 75 | if opt.val_intervals > 0 and epoch % opt.val_intervals == 0: 76 | save_model(os.path.join(opt.save_dir, 'model_{}.pth'.format(mark)), 77 | epoch, model, optimizer) 78 | with torch.no_grad(): 79 | log_dict_val, preds = trainer.val(epoch, val_loader) 80 | if opt.eval_val: 81 | val_loader.dataset.run_eval(preds, opt.save_dir) 82 | for k, v in log_dict_val.items(): 83 | logger.scalar_summary('val_{}'.format(k), v, epoch) 84 | logger.write('{} {:8f} | '.format(k, v)) 85 | else: 86 | save_model(os.path.join(opt.save_dir, 'model_last.pth'), 87 | epoch, model, optimizer) 88 | logger.write('\n') 89 | if epoch in opt.save_point: 90 | save_model(os.path.join(opt.save_dir, 'model_{}.pth'.format(epoch)), 91 | epoch, model, optimizer) 92 | if epoch in opt.lr_step: 93 | lr = opt.lr * (0.1 ** (opt.lr_step.index(epoch) + 1)) 94 | print('Drop LR to', lr) 95 | for param_group in optimizer.param_groups: 96 | param_group['lr'] = lr 97 | logger.close() 98 | 99 | if __name__ == '__main__': 100 | opt = opts().parse() 101 | main(opt) 102 | -------------------------------------------------------------------------------- /src/test.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import _init_paths 6 | import os 7 | import json 8 | import cv2 9 | import numpy as np 10 | import time 11 | from progress.bar import Bar 12 | import torch 13 | import copy 14 | 15 | from opts import opts 16 | from logger import Logger 17 | from utils.utils import AverageMeter 18 | from dataset.dataset_factory import dataset_factory 19 | from detector import Detector 20 | 21 | 22 | class PrefetchDataset(torch.utils.data.Dataset): 23 | def __init__(self, opt, dataset, pre_process_func): 24 | self.images = dataset.images 25 | self.load_image_func = dataset.coco.loadImgs 26 | self.img_dir = dataset.img_dir 27 | self.pre_process_func = pre_process_func 28 | self.get_default_calib = dataset.get_default_calib 29 | self.opt = opt 30 | 31 | def __getitem__(self, index): 32 | img_id = self.images[index] 33 | img_info = self.load_image_func(ids=[img_id])[0] 34 | img_path = os.path.join(self.img_dir, img_info['file_name']) 35 | image = cv2.imread(img_path) 36 | images, meta = {}, {} 37 | for scale in opt.test_scales: 38 | input_meta = {} 39 | calib = img_info['calib'] if 'calib' in img_info \ 40 | else self.get_default_calib(image.shape[1], image.shape[0]) 41 | input_meta['calib'] = calib 42 | images[scale], meta[scale] = self.pre_process_func( 43 | image, scale, input_meta) 44 | ret = {'images': images, 'image': image, 'meta': meta} 45 | if 'frame_id' in img_info and img_info['frame_id'] == 1: 46 | ret['is_first_frame'] = 1 47 | ret['video_id'] = img_info['video_id'] 48 | return img_id, ret 49 | 50 | def __len__(self): 51 | return len(self.images) 52 | 53 | def prefetch_test(opt): 54 | if not opt.not_set_cuda_env: 55 | os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str 56 | Dataset = dataset_factory[opt.test_dataset] 57 | opt = opts().update_dataset_info_and_set_heads(opt, Dataset) 58 | print(opt) 59 | Logger(opt) 60 | 61 | split = 'val' if not opt.trainval else 'test' 62 | dataset = Dataset(opt, split) 63 | detector = Detector(opt) 64 | 65 | if opt.load_results != '': 66 | load_results = json.load(open(opt.load_results, 'r')) 67 | for img_id in load_results: 68 | for k in range(len(load_results[img_id])): 69 | if load_results[img_id][k]['class'] - 1 in opt.ignore_loaded_cats: 70 | load_results[img_id][k]['score'] = -1 71 | else: 72 | load_results = {} 73 | 74 | data_loader = torch.utils.data.DataLoader( 75 | PrefetchDataset(opt, dataset, detector.pre_process), 76 | batch_size=1, shuffle=False, num_workers=1, pin_memory=True) 77 | 78 | results = {} 79 | num_iters = len(data_loader) if opt.num_iters < 0 else opt.num_iters 80 | bar = Bar('{}'.format(opt.exp_id), max=num_iters) 81 | time_stats = ['tot', 'load', 'pre', 'net', 'dec', 'post', 'merge', 'track'] 82 | avg_time_stats = {t: AverageMeter() for t in time_stats} 83 | if opt.use_loaded_results: 84 | for img_id in data_loader.dataset.images: 85 | results[img_id] = load_results['{}'.format(img_id)] 86 | num_iters = 0 87 | for ind, (img_id, pre_processed_images) in enumerate(data_loader): 88 | if ind >= num_iters: 89 | break 90 | if opt.tracking and ('is_first_frame' in pre_processed_images): 91 | if '{}'.format(int(img_id.numpy().astype(np.int32)[0])) in load_results: 92 | pre_processed_images['meta']['pre_dets'] = \ 93 | load_results['{}'.format(int(img_id.numpy().astype(np.int32)[0]))] 94 | else: 95 | print() 96 | print('No pre_dets for', int(img_id.numpy().astype(np.int32)[0]), 97 | '. Use empty initialization.') 98 | pre_processed_images['meta']['pre_dets'] = [] 99 | detector.reset_tracking() 100 | print('Start tracking video', int(pre_processed_images['video_id'])) 101 | if opt.public_det: 102 | if '{}'.format(int(img_id.numpy().astype(np.int32)[0])) in load_results: 103 | pre_processed_images['meta']['cur_dets'] = \ 104 | load_results['{}'.format(int(img_id.numpy().astype(np.int32)[0]))] 105 | else: 106 | print('No cur_dets for', int(img_id.numpy().astype(np.int32)[0])) 107 | pre_processed_images['meta']['cur_dets'] = [] 108 | 109 | ret = detector.run(pre_processed_images) 110 | results[int(img_id.numpy().astype(np.int32)[0])] = ret['results'] 111 | 112 | Bar.suffix = '[{0}/{1}]|Tot: {total:} |ETA: {eta:} '.format( 113 | ind, num_iters, total=bar.elapsed_td, eta=bar.eta_td) 114 | for t in avg_time_stats: 115 | avg_time_stats[t].update(ret[t]) 116 | Bar.suffix = Bar.suffix + '|{} {tm.val:.3f}s ({tm.avg:.3f}s) '.format( 117 | t, tm = avg_time_stats[t]) 118 | if opt.print_iter > 0: 119 | if ind % opt.print_iter == 0: 120 | print('{}/{}| {}'.format(opt.task, opt.exp_id, Bar.suffix)) 121 | else: 122 | bar.next() 123 | bar.finish() 124 | if opt.save_results: 125 | print('saving results to', opt.save_dir + '/save_results_{}{}.json'.format( 126 | opt.test_dataset, opt.dataset_version)) 127 | json.dump(_to_list(copy.deepcopy(results)), 128 | open(opt.save_dir + '/save_results_{}{}.json'.format( 129 | opt.test_dataset, opt.dataset_version), 'w')) 130 | dataset.run_eval(results, opt.save_dir) 131 | 132 | def test(opt): 133 | os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str 134 | 135 | Dataset = dataset_factory[opt.test_dataset] 136 | opt = opts().update_dataset_info_and_set_heads(opt, Dataset) 137 | print(opt) 138 | Logger(opt) 139 | 140 | split = 'val' if not opt.trainval else 'test' 141 | dataset = Dataset(opt, split) 142 | detector = Detector(opt) 143 | 144 | if opt.load_results != '': # load results in json 145 | load_results = json.load(open(opt.load_results, 'r')) 146 | 147 | results = {} 148 | num_iters = len(dataset) if opt.num_iters < 0 else opt.num_iters 149 | bar = Bar('{}'.format(opt.exp_id), max=num_iters) 150 | time_stats = ['tot', 'load', 'pre', 'net', 'dec', 'post', 'merge'] 151 | avg_time_stats = {t: AverageMeter() for t in time_stats} 152 | for ind in range(num_iters): 153 | img_id = dataset.images[ind] 154 | img_info = dataset.coco.loadImgs(ids=[img_id])[0] 155 | img_path = os.path.join(dataset.img_dir, img_info['file_name']) 156 | input_meta = {} 157 | if 'calib' in img_info: 158 | input_meta['calib'] = img_info['calib'] 159 | if (opt.tracking and ('frame_id' in img_info) and img_info['frame_id'] == 1): 160 | detector.reset_tracking() 161 | input_meta['pre_dets'] = load_results[img_id] 162 | 163 | ret = detector.run(img_path, input_meta) 164 | results[img_id] = ret['results'] 165 | 166 | Bar.suffix = '[{0}/{1}]|Tot: {total:} |ETA: {eta:} '.format( 167 | ind, num_iters, total=bar.elapsed_td, eta=bar.eta_td) 168 | for t in avg_time_stats: 169 | avg_time_stats[t].update(ret[t]) 170 | Bar.suffix = Bar.suffix + '|{} {:.3f} '.format(t, avg_time_stats[t].avg) 171 | bar.next() 172 | bar.finish() 173 | if opt.save_results: 174 | print('saving results to', opt.save_dir + '/save_results_{}{}.json'.format( 175 | opt.test_dataset, opt.dataset_version)) 176 | json.dump(_to_list(copy.deepcopy(results)), 177 | open(opt.save_dir + '/save_results_{}{}.json'.format( 178 | opt.test_dataset, opt.dataset_version), 'w')) 179 | dataset.run_eval(results, opt.save_dir) 180 | 181 | 182 | def _to_list(results): 183 | for img_id in results: 184 | for t in range(len(results[img_id])): 185 | for k in results[img_id][t]: 186 | if isinstance(results[img_id][t][k], (np.ndarray, np.float32)): 187 | results[img_id][t][k] = results[img_id][t][k].tolist() 188 | return results 189 | 190 | if __name__ == '__main__': 191 | opt = opts().parse() 192 | if opt.not_prefetch_test: 193 | test(opt) 194 | else: 195 | prefetch_test(opt) 196 | -------------------------------------------------------------------------------- /src/tools/_init_paths.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import sys 3 | 4 | def add_path(path): 5 | if path not in sys.path: 6 | sys.path.insert(0, path) 7 | 8 | this_dir = osp.dirname(__file__) 9 | 10 | # Add lib to PYTHONPATH 11 | lib_path = osp.join(this_dir, '../lib') 12 | add_path(lib_path) 13 | -------------------------------------------------------------------------------- /src/tools/annot_bbox.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import json 4 | import cv2 5 | import argparse 6 | import numpy as np 7 | image_ext = ['jpg', 'jpeg', 'png', 'webp'] 8 | 9 | parser = argparse.ArgumentParser() 10 | parser.add_argument('--image_path', default='') 11 | parser.add_argument('--save_path', default='') 12 | MAX_CACHE = 20 13 | CAT_NAMES = ['cat'] 14 | 15 | def _sort_expt(pts): 16 | t, l, b, r = 0, 0, 0, 0 17 | for i in range(4): 18 | if pts[i][0] < pts[l][0]: 19 | l = i 20 | if pts[i][1] < pts[t][1]: 21 | t = i 22 | if pts[i][0] > pts[r][0]: 23 | r = i 24 | if pts[i][1] > pts[b][1]: 25 | b = i 26 | ret = [pts[t], pts[l], pts[b], pts[r]] 27 | return ret 28 | 29 | def _expt2bbox(expt): 30 | expt = np.array(expt, dtype=np.int32) 31 | bbox = [int(expt[:, 0].min()), int(expt[:, 1].min()), 32 | int(expt[:, 0].max()), int(expt[:, 1].max())] 33 | return bbox 34 | 35 | def save_txt(txt_name, pts_cls): 36 | ret = [] 37 | for i in range(len(pts_cls)): 38 | ret.append(np.array(pts_cls[i][:4], dtype=np.int32).reshape(8).tolist() \ 39 | + [pts_cls[i][4]]) 40 | np.savetxt(txt_name, np.array(ret, dtype=np.int32), fmt='%d') 41 | 42 | def click(event, x, y, flags, param): 43 | global expt_cls, bboxes, pts 44 | if event == cv2.EVENT_LBUTTONDOWN: 45 | pts.append([x, y]) 46 | cv2.circle(img, (x, y), 5, (255, 0, 255), -1) 47 | if len(pts) == 4: 48 | expt = _sort_expt(pts) 49 | bbox = _expt2bbox(expt) 50 | expt_cls.append(expt + [cls]) 51 | cv2.rectangle(img, (bbox[0], bbox[1]), (bbox[2], bbox[3]), 52 | (255, 0, 255), 2, cv2.LINE_AA) 53 | pts = [] 54 | 55 | if __name__ == '__main__': 56 | cat_info = [] 57 | for i, cat in enumerate(CAT_NAMES): 58 | cat_info.append({'name': cat, 'id': i + 1}) 59 | 60 | args = parser.parse_args() 61 | if args.save_path == '': 62 | args.save_path = os.path.join(args.image_path, '..', 'click_annotation') 63 | if not os.path.exists(args.save_path): 64 | os.mkdir(args.save_path) 65 | 66 | ann_path = os.path.join(args.save_path, 'annotations.json') 67 | if os.path.exists(ann_path): 68 | anns = json.load(open(ann_path, 'r')) 69 | else: 70 | anns = {'annotations': [], 'images': [], 'categories': cat_info} 71 | 72 | assert os.path.exists(args.image_path) 73 | ls = os.listdir(args.image_path) 74 | image_names = [] 75 | for file_name in sorted(ls): 76 | ext = file_name[file_name.rfind('.') + 1:].lower() 77 | if (ext in image_ext): 78 | image_names.append(file_name) 79 | 80 | i = 0 81 | cls = 1 82 | cached = 0 83 | while i < len(image_names): 84 | image_name = image_names[i] 85 | txt_name = os.path.join( 86 | args.save_path, image_name[:image_name.rfind('.')] + '.txt') 87 | if os.path.exists(txt_name) or image_name in anns: 88 | i = i + 1 89 | continue 90 | image_path = os.path.join(args.image_path, image_name) 91 | img = cv2.imread(image_path) 92 | cv2.namedWindow(image_name) 93 | cv2.setMouseCallback(image_name, click) 94 | expt_cls, pts = [], [] 95 | while True: 96 | finished = False 97 | cv2.imshow(image_name, img) 98 | key = cv2.waitKey(1) 99 | if key == 100: 100 | i = i + 1 101 | save_txt(txt_name, expt_cls) 102 | image_id = len(anns['images']) 103 | image_info = {'file_name': image_name, 'id': image_id} 104 | anns['images'].append(image_info) 105 | for ann in expt_cls: 106 | ann_id = len(anns['annotations']) 107 | ann_dict = {'image_id': image_id, 'id': ann_id, 'categoty_id': ann[4], 108 | 'bbox': _expt2bbox(ann[:4]), 'extreme_points': ann[:4]} 109 | anns['annotations'].append(ann_dict) 110 | cached = cached + 1 111 | print('saved to ', txt_name) 112 | if cached > MAX_CACHE: 113 | print('Saving json', ann_path) 114 | json.dump(anns, open(ann_path, 'w')) 115 | cached = 0 116 | break 117 | elif key == 97: 118 | i = i - 1 119 | break 120 | elif key == 27: 121 | json.dump(anns, open(ann_path, 'w')) 122 | sys.exit(0) 123 | cv2.destroyAllWindows() 124 | -------------------------------------------------------------------------------- /src/tools/convert_crowdhuman_to_coco.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import json 4 | import cv2 5 | 6 | DATA_PATH = '../../data/crowdhuman/' 7 | OUT_PATH = DATA_PATH + 'annotations/' 8 | SPLITS = ['val', 'train'] 9 | DEBUG = False 10 | 11 | def load_func(fpath): 12 | print('fpath', fpath) 13 | assert os.path.exists(fpath) 14 | with open(fpath,'r') as fid: 15 | lines = fid.readlines() 16 | records =[json.loads(line.strip('\n')) for line in lines] 17 | return records 18 | 19 | if __name__ == '__main__': 20 | if not os.exists(OUT_PATH): 21 | os.mkdir(OUT_PATH) 22 | for split in SPLITS: 23 | data_path = DATA_PATH + split 24 | out_path = OUT_PATH + '{}.json'.format(split) 25 | out = {'images': [], 'annotations': [], 26 | 'categories': [{'id': 1, 'name': 'person'}]} 27 | ann_path = DATA_PATH + '/annotation_{}.odgt'.format(split) 28 | anns_data = load_func(ann_path) 29 | image_cnt = 0 30 | ann_cnt = 0 31 | video_cnt = 0 32 | for ann_data in anns_data: 33 | image_cnt += 1 34 | image_info = {'file_name': '{}.jpg'.format(ann_data['ID']), 35 | 'id': image_cnt} 36 | out['images'].append(image_info) 37 | if split != 'test': 38 | anns = ann_data['gtboxes'] 39 | for i in range(len(anns)): 40 | ann_cnt += 1 41 | ann = {'id': ann_cnt, 42 | 'category_id': 1, 43 | 'image_id': image_cnt, 44 | 'bbox_vis': anns[i]['vbox'], 45 | 'bbox': anns[i]['fbox'], 46 | 'iscrowd': 1 if 'extra' in anns[i] and \ 47 | 'ignore' in anns[i]['extra'] and \ 48 | anns[i]['extra']['ignore'] == 1 else 0} 49 | out['annotations'].append(ann) 50 | print('loaded {} for {} images and {} samples'.format( 51 | split, len(out['images']), len(out['annotations']))) 52 | json.dump(out, open(out_path, 'w')) 53 | 54 | 55 | 56 | -------------------------------------------------------------------------------- /src/tools/convert_kittitrack_to_coco.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import pickle 6 | import json 7 | import numpy as np 8 | import os 9 | import cv2 10 | DATA_PATH = '../../data/kitti_tracking/' 11 | SPLITS = ['train_half', 'val_half', 'train', 'test'] 12 | VIDEO_SETS = {'train': range(21), 'test': range(29), 13 | 'train_half': range(21), 'val_half': range(21)} 14 | CREATE_HALF_LABEL = True 15 | DEBUG = False 16 | 17 | ''' 18 | #Values Name Description 19 | ---------------------------------------------------------------------------- 20 | 1 frame Frame within the sequence where the object appearers 21 | 1 track id Unique tracking id of this object within this sequence 22 | 1 type Describes the type of object: 'Car', 'Van', 'Truck', 23 | 'Pedestrian', 'Person_sitting', 'Cyclist', 'Tram', 24 | 'Misc' or 'DontCare' 25 | 1 truncated Integer (0,1,2) indicating the level of truncation. 26 | Note that this is in contrast to the object detection 27 | benchmark where truncation is a float in [0,1]. 28 | 1 occluded Integer (0,1,2,3) indicating occlusion state: 29 | 0 = fully visible, 1 = partly occluded 30 | 2 = largely occluded, 3 = unknown 31 | 1 alpha Observation angle of object, ranging [-pi..pi] 32 | 4 bbox 2D bounding box of object in the image (0-based index): 33 | contains left, top, right, bottom pixel coordinates 34 | 3 dimensions 3D object dimensions: height, width, length (in meters) 35 | 3 location 3D object location x,y,z in camera coordinates (in meters) 36 | 1 rotation_y Rotation ry around Y-axis in camera coordinates [-pi..pi] 37 | 1 score Only for results: Float, indicating confidence in 38 | detection, needed for p/r curves, higher is better. 39 | ''' 40 | 41 | def project_to_image(pts_3d, P): 42 | # pts_3d: n x 3 43 | # P: 3 x 4 44 | # return: n x 2 45 | pts_3d_homo = np.concatenate( 46 | [pts_3d, np.ones((pts_3d.shape[0], 1), dtype=np.float32)], axis=1) 47 | pts_2d = np.dot(P, pts_3d_homo.transpose(1, 0)).transpose(1, 0) 48 | pts_2d = pts_2d[:, :2] / pts_2d[:, 2:] 49 | return pts_2d 50 | 51 | def read_clib(calib_path): 52 | f = open(calib_path, 'r') 53 | for i, line in enumerate(f): 54 | if i == 2: 55 | calib = np.array(line.strip().split(' ')[1:], dtype=np.float32) 56 | calib = calib.reshape(3, 4) 57 | return calib 58 | 59 | def _bbox_to_coco_bbox(bbox): 60 | return [(bbox[0]), (bbox[1]), 61 | (bbox[2] - bbox[0]), (bbox[3] - bbox[1])] 62 | 63 | cats = ['Pedestrian', 'Car', 'Cyclist', 'Van', 'Truck', 'Person_sitting', 64 | 'Tram', 'Misc', 'DontCare'] 65 | 66 | 67 | cat_ids = {cat: i + 1 for i, cat in enumerate(cats)} 68 | cat_ids['Person'] = cat_ids['Person_sitting'] 69 | 70 | cat_info = [] 71 | for i, cat in enumerate(cats): 72 | cat_info.append({'name': cat, 'id': i + 1}) 73 | 74 | if __name__ == '__main__': 75 | for split in SPLITS: 76 | ann_dir = DATA_PATH + '/label_02/' 77 | ret = {'images': [], 'annotations': [], "categories": cat_info, 78 | 'videos': []} 79 | num_images = 0 80 | for i in VIDEO_SETS[split]: 81 | image_id_base = num_images 82 | video_name = '{:04d}'.format(i) 83 | ret['videos'].append({'id': i + 1, 'file_name': video_name}) 84 | ann_dir = 'train' if not ('test' in split) else split 85 | video_path = DATA_PATH + \ 86 | '/data_tracking_image_2/{}ing/image_02/{}'.format(ann_dir, video_name) 87 | calib_path = DATA_PATH + 'data_tracking_calib/{}ing/calib/'.format(ann_dir) \ 88 | + '{}.txt'.format(video_name) 89 | calib = read_clib(calib_path) 90 | image_files = sorted(os.listdir(video_path)) 91 | num_images_video = len(image_files) 92 | if CREATE_HALF_LABEL and 'half' in split: 93 | image_range = [0, num_images_video // 2 - 1] if split == 'train_half' else \ 94 | [num_images_video // 2, num_images_video - 1] 95 | else: 96 | image_range = [0, num_images_video - 1] 97 | print('num_frames', video_name, image_range[1] - image_range[0] + 1) 98 | for j, image_name in enumerate(image_files): 99 | if (j < image_range[0] or j > image_range[1]): 100 | continue 101 | num_images += 1 102 | image_info = {'file_name': '{}/{:06d}.png'.format(video_name, j), 103 | 'id': num_images, 104 | 'calib': calib.tolist(), 105 | 'video_id': i + 1, 106 | 'frame_id': j + 1 - image_range[0]} 107 | ret['images'].append(image_info) 108 | 109 | if split == 'test': 110 | continue 111 | # 0 -1 DontCare -1 -1 -10.000000 219.310000 188.490000 245.500000 218.560000 -1000.000000 -1000.000000 -1000.000000 -10.000000 -1.000000 -1.000000 -1.000000 112 | ann_path = DATA_PATH + 'label_02/{}.txt'.format(video_name) 113 | anns = open(ann_path, 'r') 114 | 115 | if CREATE_HALF_LABEL and 'half' in split: 116 | label_out_folder = DATA_PATH + 'label_02_{}/'.format(split) 117 | label_out_path = label_out_folder + '{}.txt'.format(video_name) 118 | if not os.path.exists(label_out_folder): 119 | os.mkdir(label_out_folder) 120 | label_out_file = open(label_out_path, 'w') 121 | 122 | for ann_ind, txt in enumerate(anns): 123 | tmp = txt[:-1].split(' ') 124 | frame_id = int(tmp[0]) 125 | track_id = int(tmp[1]) 126 | cat_id = cat_ids[tmp[2]] 127 | truncated = int(float(tmp[3])) 128 | occluded = int(tmp[4]) 129 | alpha = float(tmp[5]) 130 | bbox = [float(tmp[6]), float(tmp[7]), float(tmp[8]), float(tmp[9])] 131 | dim = [float(tmp[10]), float(tmp[11]), float(tmp[12])] 132 | location = [float(tmp[13]), float(tmp[14]), float(tmp[15])] 133 | rotation_y = float(tmp[16]) 134 | amodel_center = project_to_image( 135 | np.array([location[0], location[1] - dim[0] / 2, location[2]], 136 | np.float32).reshape(1, 3), calib)[0].tolist() 137 | ann = {'image_id': frame_id + 1 - image_range[0] + image_id_base, 138 | 'id': int(len(ret['annotations']) + 1), 139 | 'category_id': cat_id, 140 | 'dim': dim, 141 | 'bbox': _bbox_to_coco_bbox(bbox), 142 | 'depth': location[2], 143 | 'alpha': alpha, 144 | 'truncated': truncated, 145 | 'occluded': occluded, 146 | 'location': location, 147 | 'rotation_y': rotation_y, 148 | 'amodel_center': amodel_center, 149 | 'track_id': track_id + 1} 150 | if CREATE_HALF_LABEL and 'half' in split: 151 | if (frame_id < image_range[0] or frame_id > image_range[1]): 152 | continue 153 | out_frame_id = frame_id - image_range[0] 154 | label_out_file.write('{} {}'.format( 155 | out_frame_id, txt[txt.find(' ') + 1:])) 156 | 157 | ret['annotations'].append(ann) 158 | 159 | print("# images: ", len(ret['images'])) 160 | print("# annotations: ", len(ret['annotations'])) 161 | out_dir = '{}/annotations/'.format(DATA_PATH) 162 | if not os.path.exists(out_dir): 163 | os.mkdir(out_dir) 164 | out_path = '{}/annotations/tracking_{}.json'.format( 165 | DATA_PATH, split) 166 | json.dump(ret, open(out_path, 'w')) 167 | -------------------------------------------------------------------------------- /src/tools/convert_mot_det_to_results.py: -------------------------------------------------------------------------------- 1 | import json 2 | import numpy as np 3 | import os 4 | from collections import defaultdict 5 | split = 'val_half' 6 | 7 | DET_PATH = '../../data/mot17/' 8 | ANN_PATH = '../../data/mot17/annotations/{}.json'.format(split) 9 | OUT_DIR = '../../data/mot17/results/' 10 | OUT_PATH = OUT_DIR + '{}_det.json'.format(split) 11 | 12 | if __name__ == '__main__': 13 | if not os.path.exists(OUT_DIR): 14 | os.mkdir(OUT_DIR) 15 | seqs = [s for s in os.listdir(DET_PATH) if '_det' in s] 16 | data = json.load(open(ANN_PATH, 'r')) 17 | images = data['images'] 18 | image_to_anns = defaultdict(list) 19 | for seq in sorted(seqs): 20 | print('seq', seq) 21 | seq_path = '{}/{}/'.format(DET_PATH, seq) 22 | if split == 'val_half': 23 | ann_path = seq_path + 'det/det_val_half.txt' 24 | train_ann_path = seq_path + 'det/det_train_half.txt' 25 | train_anns = np.loadtxt(train_ann_path, dtype=np.float32, delimiter=',') 26 | frame_base = int(train_anns[:, 0].max()) 27 | else: 28 | ann_path = seq_path + 'det/det.txt' 29 | frame_base = 0 30 | if not IS_THIRD_PARTY: 31 | anns = np.loadtxt(ann_path, dtype=np.float32, delimiter=',') 32 | for i in range(len(anns)): 33 | frame_id = int(anns[i][0]) 34 | file_name = '{}/img1/{:06d}.jpg'.format(seq, frame_id + frame_base) 35 | bbox = (anns[i][2:6]).tolist() 36 | score = 1 # float(anns[i][8]) 37 | image_to_anns[file_name].append(bbox + [score]) 38 | 39 | results = {} 40 | for image_info in images: 41 | image_id = image_info['id'] 42 | file_name = image_info['file_name'] 43 | dets = image_to_anns[file_name] 44 | results[image_id] = [] 45 | for det in dets: 46 | bbox = [float(det[0]), float(det[1]), \ 47 | float(det[0] + det[2]), float(det[1] + det[3])] 48 | ct = [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2] 49 | results[image_id].append( 50 | {'bbox': bbox, 'score': float(det[4]), 'class': 1, 'ct': ct}) 51 | out_path = OUT_PATH 52 | json.dump(results, open(out_path, 'w')) 53 | -------------------------------------------------------------------------------- /src/tools/convert_mot_to_coco.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import json 4 | import cv2 5 | 6 | # Use the same script for MOT16 7 | # DATA_PATH = '../../data/mot16/' 8 | DATA_PATH = '../../data/mot17/' 9 | OUT_PATH = DATA_PATH + 'annotations/' 10 | SPLITS = ['train_half', 'val_half', 'train', 'test'] 11 | HALF_VIDEO = True 12 | CREATE_SPLITTED_ANN = True 13 | CREATE_SPLITTED_DET = True 14 | 15 | if __name__ == '__main__': 16 | for split in SPLITS: 17 | data_path = DATA_PATH + (split if not HALF_VIDEO else 'train') 18 | out_path = OUT_PATH + '{}.json'.format(split) 19 | out = {'images': [], 'annotations': [], 20 | 'categories': [{'id': 1, 'name': 'pedestrain'}], 21 | 'videos': []} 22 | seqs = os.listdir(data_path) 23 | image_cnt = 0 24 | ann_cnt = 0 25 | video_cnt = 0 26 | for seq in sorted(seqs): 27 | if '.DS_Store' in seq: 28 | continue 29 | if 'mot17' in DATA_PATH and (split != 'test' and not ('FRCNN' in seq)): 30 | continue 31 | video_cnt += 1 32 | out['videos'].append({ 33 | 'id': video_cnt, 34 | 'file_name': seq}) 35 | seq_path = '{}/{}/'.format(data_path, seq) 36 | img_path = seq_path + 'img1/' 37 | ann_path = seq_path + 'gt/gt.txt' 38 | images = os.listdir(img_path) 39 | num_images = len([image for image in images if 'jpg' in image]) 40 | if HALF_VIDEO and ('half' in split): 41 | image_range = [0, num_images // 2] if 'train' in split else \ 42 | [num_images // 2 + 1, num_images - 1] 43 | else: 44 | image_range = [0, num_images - 1] 45 | for i in range(num_images): 46 | if (i < image_range[0] or i > image_range[1]): 47 | continue 48 | image_info = {'file_name': '{}/img1/{:06d}.jpg'.format(seq, i + 1), 49 | 'id': image_cnt + i + 1, 50 | 'frame_id': i + 1 - image_range[0], 51 | 'prev_image_id': image_cnt + i if i > 0 else -1, 52 | 'next_image_id': \ 53 | image_cnt + i + 2 if i < num_images - 1 else -1, 54 | 'video_id': video_cnt} 55 | out['images'].append(image_info) 56 | print('{}: {} images'.format(seq, num_images)) 57 | if split != 'test': 58 | det_path = seq_path + 'det/det.txt' 59 | anns = np.loadtxt(ann_path, dtype=np.float32, delimiter=',') 60 | dets = np.loadtxt(det_path, dtype=np.float32, delimiter=',') 61 | if CREATE_SPLITTED_ANN and ('half' in split): 62 | anns_out = np.array([anns[i] for i in range(anns.shape[0]) if \ 63 | int(anns[i][0]) - 1 >= image_range[0] and \ 64 | int(anns[i][0]) - 1 <= image_range[1]], np.float32) 65 | anns_out[:, 0] -= image_range[0] 66 | gt_out = seq_path + '/gt/gt_{}.txt'.format(split) 67 | fout = open(gt_out, 'w') 68 | for o in anns_out: 69 | fout.write( 70 | '{:d},{:d},{:d},{:d},{:d},{:d},{:d},{:d},{:.6f}\n'.format( 71 | int(o[0]),int(o[1]),int(o[2]),int(o[3]),int(o[4]),int(o[5]), 72 | int(o[6]),int(o[7]),o[8])) 73 | fout.close() 74 | if CREATE_SPLITTED_DET and ('half' in split): 75 | dets_out = np.array([dets[i] for i in range(dets.shape[0]) if \ 76 | int(dets[i][0]) - 1 >= image_range[0] and \ 77 | int(dets[i][0]) - 1 <= image_range[1]], np.float32) 78 | dets_out[:, 0] -= image_range[0] 79 | det_out = seq_path + '/det/det_{}.txt'.format(split) 80 | dout = open(det_out, 'w') 81 | for o in dets_out: 82 | dout.write( 83 | '{:d},{:d},{:.1f},{:.1f},{:.1f},{:.1f},{:.6f}\n'.format( 84 | int(o[0]),int(o[1]),float(o[2]),float(o[3]),float(o[4]),float(o[5]), 85 | float(o[6]))) 86 | dout.close() 87 | 88 | print(' {} ann images'.format(int(anns[:, 0].max()))) 89 | for i in range(anns.shape[0]): 90 | frame_id = int(anns[i][0]) 91 | if (frame_id - 1 < image_range[0] or frame_id - 1> image_range[1]): 92 | continue 93 | track_id = int(anns[i][1]) 94 | cat_id = int(anns[i][7]) 95 | ann_cnt += 1 96 | if not ('15' in DATA_PATH): 97 | if not (float(anns[i][8]) >= 0.25): 98 | continue 99 | if not (int(anns[i][6]) == 1): 100 | continue 101 | if (int(anns[i][7]) in [3, 4, 5, 6, 9, 10, 11]): # Non-person 102 | continue 103 | if (int(anns[i][7]) in [2, 7, 8, 12]): # Ignored person 104 | category_id = -1 105 | else: 106 | category_id = 1 107 | else: 108 | category_id = 1 109 | ann = {'id': ann_cnt, 110 | 'category_id': category_id, 111 | 'image_id': image_cnt + frame_id, 112 | 'track_id': track_id, 113 | 'bbox': anns[i][2:6].tolist(), 114 | 'conf': float(anns[i][6])} 115 | out['annotations'].append(ann) 116 | image_cnt += num_images 117 | print('loaded {} for {} images and {} samples'.format( 118 | split, len(out['images']), len(out['annotations']))) 119 | json.dump(out, open(out_path, 'w')) 120 | 121 | 122 | 123 | -------------------------------------------------------------------------------- /src/tools/eval_kitti_track/data/tracking/evaluate_tracking.seqmap: -------------------------------------------------------------------------------- 1 | 0000 empty 000000 000154 2 | 0001 empty 000000 000447 3 | 0002 empty 000000 000233 4 | 0003 empty 000000 000144 5 | 0004 empty 000000 000314 6 | 0005 empty 000000 000297 7 | 0006 empty 000000 000270 8 | 0007 empty 000000 000800 9 | 0008 empty 000000 000390 10 | 0009 empty 000000 000803 11 | 0010 empty 000000 000294 12 | 0011 empty 000000 000373 13 | 0012 empty 000000 000078 14 | 0013 empty 000000 000340 15 | 0014 empty 000000 000106 16 | 0015 empty 000000 000376 17 | 0016 empty 000000 000209 18 | 0017 empty 000000 000145 19 | 0018 empty 000000 000339 20 | 0019 empty 000000 001059 21 | 0020 empty 000000 000837 22 | -------------------------------------------------------------------------------- /src/tools/eval_kitti_track/data/tracking/evaluate_tracking.seqmap.test: -------------------------------------------------------------------------------- 1 | 0000 empty 000000 000465 2 | 0001 empty 000000 000147 3 | 0002 empty 000000 000243 4 | 0003 empty 000000 000257 5 | 0004 empty 000000 000421 6 | 0005 empty 000000 000809 7 | 0006 empty 000000 000114 8 | 0007 empty 000000 000215 9 | 0008 empty 000000 000165 10 | 0009 empty 000000 000349 11 | 0010 empty 000000 001176 12 | 0011 empty 000000 000774 13 | 0012 empty 000000 000694 14 | 0013 empty 000000 000152 15 | 0014 empty 000000 000850 16 | 0015 empty 000000 000701 17 | 0016 empty 000000 000510 18 | 0017 empty 000000 000305 19 | 0018 empty 000000 000180 20 | 0019 empty 000000 000404 21 | 0020 empty 000000 000173 22 | 0021 empty 000000 000203 23 | 0022 empty 000000 000436 24 | 0023 empty 000000 000430 25 | 0024 empty 000000 000316 26 | 0025 empty 000000 000176 27 | 0026 empty 000000 000170 28 | 0027 empty 000000 000085 29 | 0028 empty 000000 000175 30 | -------------------------------------------------------------------------------- /src/tools/eval_kitti_track/data/tracking/evaluate_tracking.seqmap.training: -------------------------------------------------------------------------------- 1 | 0000 empty 000000 000154 2 | 0001 empty 000000 000447 3 | 0002 empty 000000 000233 4 | 0003 empty 000000 000144 5 | 0004 empty 000000 000314 6 | 0005 empty 000000 000297 7 | 0006 empty 000000 000270 8 | 0007 empty 000000 000800 9 | 0008 empty 000000 000390 10 | 0009 empty 000000 000803 11 | 0010 empty 000000 000294 12 | 0011 empty 000000 000373 13 | 0012 empty 000000 000078 14 | 0013 empty 000000 000340 15 | 0014 empty 000000 000106 16 | 0015 empty 000000 000376 17 | 0016 empty 000000 000209 18 | 0017 empty 000000 000145 19 | 0018 empty 000000 000339 20 | 0019 empty 000000 001059 21 | 0020 empty 000000 000837 22 | -------------------------------------------------------------------------------- /src/tools/eval_kitti_track/data/tracking/evaluate_trackingtrain_1-2.seqmap: -------------------------------------------------------------------------------- 1 | 0000 empty 000000 000154 2 | 0001 empty 000000 000447 3 | 0002 empty 000000 000233 4 | 0003 empty 000000 000144 5 | 0004 empty 000000 000314 6 | 0005 empty 000000 000297 7 | 0006 empty 000000 000270 8 | 0007 empty 000000 000800 9 | 0008 empty 000000 000390 10 | 0009 empty 000000 000803 11 | -------------------------------------------------------------------------------- /src/tools/eval_kitti_track/data/tracking/evaluate_trackingtrain_2-2.seqmap: -------------------------------------------------------------------------------- 1 | 0010 empty 000000 000294 2 | 0011 empty 000000 000373 3 | 0012 empty 000000 000078 4 | 0013 empty 000000 000340 5 | 0014 empty 000000 000106 6 | 0015 empty 000000 000376 7 | 0016 empty 000000 000209 8 | 0017 empty 000000 000145 9 | 0018 empty 000000 000339 10 | 0019 empty 000000 001059 11 | 0020 empty 000000 000837 12 | -------------------------------------------------------------------------------- /src/tools/eval_kitti_track/data/tracking/evaluate_trackingval_half.seqmap: -------------------------------------------------------------------------------- 1 | 0000 empty 000000 000077 2 | 0001 empty 000000 000224 3 | 0002 empty 000000 000117 4 | 0003 empty 000000 000072 5 | 0004 empty 000000 000157 6 | 0005 empty 000000 000149 7 | 0006 empty 000000 000135 8 | 0007 empty 000000 000400 9 | 0008 empty 000000 000195 10 | 0009 empty 000000 000402 11 | 0010 empty 000000 000147 12 | 0011 empty 000000 000187 13 | 0012 empty 000000 000039 14 | 0013 empty 000000 000170 15 | 0014 empty 000000 000053 16 | 0015 empty 000000 000188 17 | 0016 empty 000000 000105 18 | 0017 empty 000000 000073 19 | 0018 empty 000000 000170 20 | 0019 empty 000000 000530 21 | 0020 empty 000000 000419 22 | -------------------------------------------------------------------------------- /src/tools/eval_kitti_track/mailpy.py: -------------------------------------------------------------------------------- 1 | class Mail: 2 | """ Dummy class to print messages without sending e-mails""" 3 | def __init__(self,mailaddress): 4 | pass 5 | def msg(self,msg): 6 | print(msg) 7 | def finalize(self,success,benchmark,sha_key,mailaddress=None): 8 | if success: 9 | print("Results for %s (benchmark: %s) sucessfully created" % (benchmark,sha_key)) 10 | else: 11 | print("Creating results for %s (benchmark: %s) failed" % (benchmark,sha_key)) 12 | 13 | -------------------------------------------------------------------------------- /src/tools/eval_motchallenge.py: -------------------------------------------------------------------------------- 1 | """py-motmetrics - metrics for multiple object tracker (MOT) benchmarking. 2 | Christoph Heindl, 2017 3 | https://github.com/cheind/py-motmetrics 4 | Modified by Xingyi Zhou 5 | """ 6 | 7 | import argparse 8 | import glob 9 | import os 10 | import logging 11 | import motmetrics as mm 12 | import pandas as pd 13 | from collections import OrderedDict 14 | from pathlib import Path 15 | 16 | def parse_args(): 17 | parser = argparse.ArgumentParser(description=""" 18 | Compute metrics for trackers using MOTChallenge ground-truth data. 19 | Files 20 | ----- 21 | All file content, ground truth and test files, have to comply with the 22 | format described in 23 | Milan, Anton, et al. 24 | "Mot16: A benchmark for multi-object tracking." 25 | arXiv preprint arXiv:1603.00831 (2016). 26 | https://motchallenge.net/ 27 | Structure 28 | --------- 29 | Layout for ground truth data 30 | //gt/gt.txt 31 | //gt/gt.txt 32 | ... 33 | Layout for test data 34 | /.txt 35 | /.txt 36 | ... 37 | Sequences of ground truth and test will be matched according to the `` 38 | string.""", formatter_class=argparse.RawTextHelpFormatter) 39 | 40 | parser.add_argument('groundtruths', type=str, help='Directory containing ground truth files.') 41 | parser.add_argument('tests', type=str, help='Directory containing tracker result files') 42 | parser.add_argument('--gt_type', type=str, default='') 43 | parser.add_argument('--eval_official', action='store_true') 44 | parser.add_argument('--loglevel', type=str, help='Log level', default='info') 45 | parser.add_argument('--fmt', type=str, help='Data format', default='mot15-2D') 46 | parser.add_argument('--solver', type=str, help='LAP solver to use') 47 | return parser.parse_args() 48 | 49 | def compare_dataframes(gts, ts): 50 | accs = [] 51 | names = [] 52 | for k, tsacc in ts.items(): 53 | if k in gts: 54 | logging.info('Comparing {}...'.format(k)) 55 | accs.append(mm.utils.compare_to_groundtruth(gts[k], tsacc, 'iou', distth=0.5)) 56 | names.append(k) 57 | else: 58 | logging.warning('No ground truth for {}, skipping.'.format(k)) 59 | 60 | return accs, names 61 | 62 | if __name__ == '__main__': 63 | 64 | args = parse_args() 65 | 66 | loglevel = getattr(logging, args.loglevel.upper(), None) 67 | if not isinstance(loglevel, int): 68 | raise ValueError('Invalid log level: {} '.format(args.loglevel)) 69 | logging.basicConfig(level=loglevel, format='%(asctime)s %(levelname)s - %(message)s', datefmt='%I:%M:%S') 70 | 71 | if args.solver: 72 | mm.lap.default_solver = args.solver 73 | 74 | gt_type = args.gt_type 75 | print('gt_type', gt_type) 76 | gtfiles = glob.glob( 77 | os.path.join(args.groundtruths, '*/gt/gt{}.txt'.format(gt_type))) 78 | print('gt_files', gtfiles) 79 | tsfiles = [f for f in glob.glob(os.path.join(args.tests, '*.txt')) if not os.path.basename(f).startswith('eval')] 80 | 81 | logging.info('Found {} groundtruths and {} test files.'.format(len(gtfiles), len(tsfiles))) 82 | logging.info('Available LAP solvers {}'.format(mm.lap.available_solvers)) 83 | logging.info('Default LAP solver \'{}\''.format(mm.lap.default_solver)) 84 | logging.info('Loading files.') 85 | 86 | gt = OrderedDict([(Path(f).parts[-3], mm.io.loadtxt(f, fmt=args.fmt, min_confidence=1)) for f in gtfiles]) 87 | ts = OrderedDict([(os.path.splitext(Path(f).parts[-1])[0], mm.io.loadtxt(f, fmt=args.fmt)) for f in tsfiles]) 88 | 89 | mh = mm.metrics.create() 90 | accs, names = compare_dataframes(gt, ts) 91 | 92 | logging.info('Running metrics') 93 | metrics = ['recall', 'precision', 'num_unique_objects', 'mostly_tracked', \ 94 | 'partially_tracked', 'mostly_lost', 'num_false_positives', 'num_misses', \ 95 | 'num_switches', 'num_fragmentations', 'mota', 'motp', 'num_objects'] 96 | summary = mh.compute_many( 97 | accs, names=names, 98 | metrics=metrics, generate_overall=True) 99 | # summary = mh.compute_many(accs, names=names, metrics=mm.metrics.motchallenge_metrics, generate_overall=True) 100 | # print(mm.io.render_summary( 101 | # summary, formatters=mh.formatters, 102 | # namemap=mm.io.motchallenge_metric_names)) 103 | div_dict = { 104 | 'num_objects': ['num_false_positives', 'num_misses', 105 | 'num_switches', 'num_fragmentations'], 106 | 'num_unique_objects': ['mostly_tracked', 'partially_tracked', 107 | 'mostly_lost']} 108 | for divisor in div_dict: 109 | for divided in div_dict[divisor]: 110 | summary[divided] = (summary[divided] / summary[divisor]) 111 | fmt = mh.formatters 112 | change_fmt_list = ['num_false_positives', 'num_misses', 'num_switches', 113 | 'num_fragmentations', 'mostly_tracked', 'partially_tracked', 114 | 'mostly_lost'] 115 | for k in change_fmt_list: 116 | fmt[k] = fmt['mota'] 117 | print(mm.io.render_summary( 118 | summary, formatters=fmt, 119 | namemap=mm.io.motchallenge_metric_names)) 120 | if args.eval_official: 121 | metrics = mm.metrics.motchallenge_metrics + ['num_objects'] 122 | summary = mh.compute_many( 123 | accs, names=names, 124 | metrics=metrics, generate_overall=True) 125 | print(mm.io.render_summary( 126 | summary, formatters=mh.formatters, 127 | namemap=mm.io.motchallenge_metric_names)) 128 | logging.info('Completed') 129 | -------------------------------------------------------------------------------- /src/tools/get_mot_17.sh: -------------------------------------------------------------------------------- 1 | mkdir ../../data/mot17 2 | cd ../../data/mot17 3 | wget https://motchallenge.net/data/MOT17.zip 4 | unzip MOT17.zip 5 | rm MOT17.zip 6 | mkdir annotations 7 | cd ../../src/tools/ 8 | python convert_mot_to_coco.py 9 | python convert_mot_det_to_results -------------------------------------------------------------------------------- /src/tools/remove_optimizers.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | IN_PATH = '../../centertrack_models/' 4 | OUT_PATH = '../../models/' 5 | REMOVE_KEYS = ['base.fc'] 6 | 7 | if __name__ == '__main__': 8 | models = sorted(os.listdir(IN_PATH)) 9 | for model in models: 10 | model_path = IN_PATH + model 11 | print(model) 12 | data = torch.load(model_path) 13 | state_dict = data['state_dict'] 14 | keys = state_dict.keys() 15 | delete_keys = [] 16 | for k in keys: 17 | should_delete = False 18 | for remove_key in REMOVE_KEYS: 19 | if remove_key in k: 20 | should_delete = True 21 | if should_delete: 22 | delete_keys.append(k) 23 | for k in delete_keys: 24 | print('delete ', k) 25 | del state_dict[k] 26 | out_data = {'epoch': data['epoch'], 'state_dict': state_dict} 27 | torch.save(out_data, OUT_PATH + model) 28 | -------------------------------------------------------------------------------- /src/tools/vis_tracking_kitti.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | import os 4 | import glob 5 | import sys 6 | from collections import defaultdict 7 | from pathlib import Path 8 | 9 | DATA_PATH = '../../data/kitti_tracking/' 10 | IMG_PATH = DATA_PATH + 'data_tracking_image_2/testing/image_02/' 11 | SAVE_VIDEO = False 12 | IS_GT = False 13 | 14 | cats = ['Pedestrian', 'Car', 'Cyclist'] 15 | cat_ids = {cat: i for i, cat in enumerate(cats)} 16 | COLORS = [(255, 0, 255), (122, 122, 255), (255, 0, 0)] 17 | 18 | def draw_bbox(img, bboxes, c=(255, 0, 255)): 19 | for bbox in bboxes: 20 | color = COLORS[int(bbox[5])] 21 | cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), 22 | (int(bbox[2]), int(bbox[3])), 23 | color, 2, lineType=cv2.LINE_AA) 24 | ct = [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2] 25 | txt = '{}'.format(int(bbox[4])) 26 | cv2.putText(img, txt, (int(ct[0]), int(ct[1])), 27 | cv2.FONT_HERSHEY_SIMPLEX, 0.5, 28 | color, thickness=1, lineType=cv2.LINE_AA) 29 | 30 | if __name__ == '__main__': 31 | seqs = os.listdir(IMG_PATH) 32 | if SAVE_VIDEO: 33 | save_path = sys.argv[1][:sys.argv[1].rfind('/res')] + '/video' 34 | if not os.path.exists(save_path): 35 | os.mkdir(save_path) 36 | print('save_video_path', save_path) 37 | for seq in sorted(seqs): 38 | print('seq', seq) 39 | if '.DS_Store' in seq: 40 | continue 41 | # if SAVE_VIDEO: 42 | # fourcc = cv2.VideoWriter_fourcc(*'XVID') 43 | # video = cv2.VideoWriter( 44 | # '{}/{}.avi'.format(save_path, seq),fourcc, 10.0, (1024, 750)) 45 | 46 | 47 | preds = {} 48 | for K in range(1, len(sys.argv)): 49 | pred_path = sys.argv[K] + '/{}.txt'.format(seq) 50 | pred_file = open(pred_path, 'r') 51 | preds[K] = defaultdict(list) 52 | for line in pred_file: 53 | tmp = line[:-1].split(' ') 54 | frame_id = int(tmp[0]) 55 | track_id = int(tmp[1]) 56 | cat_id = cat_ids[tmp[2]] 57 | bbox = [float(tmp[6]), float(tmp[7]), float(tmp[8]), float(tmp[9])] 58 | score = float(tmp[17]) 59 | preds[K][frame_id].append(bbox + [track_id, cat_id, score]) 60 | 61 | images_path = '{}/{}/'.format(IMG_PATH, seq) 62 | images = os.listdir(images_path) 63 | num_images = len([image for image in images if 'png' in image]) 64 | 65 | for i in range(num_images): 66 | frame_id = i 67 | file_path = '{}/{:06d}.png'.format(images_path, i) 68 | img = cv2.imread(file_path) 69 | for K in range(1, len(sys.argv)): 70 | img_pred = img.copy() 71 | draw_bbox(img_pred, preds[K][frame_id]) 72 | cv2.imshow('pred{}'.format(K), img_pred) 73 | cv2.waitKey() 74 | # if SAVE_VIDEO: 75 | # video.write(img_pred) 76 | # if SAVE_VIDEO: 77 | # video.release() 78 | -------------------------------------------------------------------------------- /src/tools/vis_tracking_mot.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | import os 4 | import glob 5 | import sys 6 | from collections import defaultdict 7 | from pathlib import Path 8 | 9 | GT_PATH = '../../data/mot17/test/' 10 | IMG_PATH = GT_PATH 11 | SAVE_VIDEO = True 12 | RESIZE = 2 13 | IS_GT = False 14 | 15 | def draw_bbox(img, bboxes, c=(255, 0, 255)): 16 | for bbox in bboxes: 17 | cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), 18 | (int(bbox[0] + bbox[2]), int(bbox[1] + bbox[3])), 19 | c, 2, lineType=cv2.LINE_AA) 20 | ct = [bbox[0] + bbox[2] / 2, bbox[1] + bbox[3] / 2] 21 | txt = '{}'.format(bbox[4]) 22 | cv2.putText(img, txt, (int(ct[0]), int(ct[1])), 23 | cv2.FONT_HERSHEY_SIMPLEX, 0.5, 24 | (255, 122, 255), thickness=1, lineType=cv2.LINE_AA) 25 | 26 | if __name__ == '__main__': 27 | seqs = os.listdir(GT_PATH) 28 | if SAVE_VIDEO: 29 | save_path = sys.argv[1][:sys.argv[1].rfind('/res')] + '/video' 30 | if not os.path.exists(save_path): 31 | os.mkdir(save_path) 32 | print('save_video_path', save_path) 33 | for seq in sorted(seqs): 34 | print('seq', seq) 35 | # if len(sys.argv) > 2 and not sys.argv[2] in seq: 36 | # continue 37 | if '.DS_Store' in seq: 38 | continue 39 | # if SAVE_VIDEO: 40 | # fourcc = cv2.VideoWriter_fourcc(*'XVID') 41 | # video = cv2.VideoWriter( 42 | # '{}/{}.avi'.format(save_path, seq),fourcc, 10.0, (1024, 750)) 43 | seq_path = '{}/{}/'.format(GT_PATH, seq) 44 | if IS_GT: 45 | ann_path = seq_path + 'gt/gt.txt' 46 | else: 47 | ann_path = seq_path + 'det/det.txt' 48 | anns = np.loadtxt(ann_path, dtype=np.float32, delimiter=',') 49 | print('anns shape', anns.shape) 50 | image_to_anns = defaultdict(list) 51 | for i in range(anns.shape[0]): 52 | if (not IS_GT) or (int(anns[i][6]) == 1 and float(anns[i][8]) >= 0.25): 53 | frame_id = int(anns[i][0]) 54 | track_id = int(anns[i][1]) 55 | bbox = (anns[i][2:6] / RESIZE).tolist() 56 | image_to_anns[frame_id].append(bbox + [track_id]) 57 | 58 | image_to_preds = {} 59 | for K in range(1, len(sys.argv)): 60 | image_to_preds[K] = defaultdict(list) 61 | pred_path = sys.argv[K] + '/{}.txt'.format(seq) 62 | try: 63 | preds = np.loadtxt(pred_path, dtype=np.float32, delimiter=',') 64 | except: 65 | preds = np.loadtxt(pred_path, dtype=np.float32, delimiter=' ') 66 | for i in range(preds.shape[0]): 67 | frame_id = int(preds[i][0]) 68 | track_id = int(preds[i][1]) 69 | bbox = (preds[i][2:6] / RESIZE).tolist() 70 | image_to_preds[K][frame_id].append(bbox + [track_id]) 71 | 72 | img_path = seq_path + 'img1/' 73 | images = os.listdir(img_path) 74 | num_images = len([image for image in images if 'jpg' in image]) 75 | 76 | for i in range(num_images): 77 | frame_id = i + 1 78 | file_name = '{}/img1/{:06d}.jpg'.format(seq, i + 1) 79 | file_path = IMG_PATH + file_name 80 | img = cv2.imread(file_path) 81 | if RESIZE != 1: 82 | img = cv2.resize(img, (img.shape[1] // RESIZE, img.shape[0] // RESIZE)) 83 | for K in range(1, len(sys.argv)): 84 | img_pred = img.copy() 85 | draw_bbox(img_pred, image_to_preds[K][frame_id]) 86 | cv2.imshow('pred{}'.format(K), img_pred) 87 | draw_bbox(img, image_to_anns[frame_id]) 88 | cv2.imshow('gt', img) 89 | cv2.waitKey() 90 | # if SAVE_VIDEO: 91 | # video.write(img_pred) 92 | # if SAVE_VIDEO: 93 | # video.release() 94 | -------------------------------------------------------------------------------- /videos/nuscenes_mini.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xingyizhou/CenterTrack/e4e7534cc2ebfbd31e0cde680988f286c65fe34f/videos/nuscenes_mini.mp4 --------------------------------------------------------------------------------