├── .gitignore
├── .gitmodules
├── LICENSE
├── NOTICE
├── README.md
├── experiments
    ├── coco_pose_tracking.sh
    ├── coco_tracking.sh
    ├── crowdhuman.sh
    ├── kitti_fulltrain.sh
    ├── kitti_half.sh
    ├── kitti_half_sc.sh
    ├── mot17_fulltrain.sh
    ├── mot17_fulltrain_sc.sh
    ├── mot17_half.sh
    ├── mot17_half_sc.sh
    ├── nuScenes_3Ddetection_e140.sh
    └── nuScenes_3Dtracking.sh
├── readme
    ├── DATA.md
    ├── GETTING_STARTED.md
    ├── INSTALL.md
    ├── MODEL_ZOO.md
    ├── coco_det.gif
    ├── coco_pose.gif
    ├── fig2.png
    └── nuscenes_3d.gif
├── requirements.txt
├── src
    ├── _init_paths.py
    ├── convert_onnx.py
    ├── demo.py
    ├── lib
    │   ├── dataset
    │   │   ├── dataset_factory.py
    │   │   ├── datasets
    │   │   │   ├── coco.py
    │   │   │   ├── coco_hp.py
    │   │   │   ├── crowdhuman.py
    │   │   │   ├── custom_dataset.py
    │   │   │   ├── kitti.py
    │   │   │   ├── kitti_tracking.py
    │   │   │   ├── mot.py
    │   │   │   └── nuscenes.py
    │   │   └── generic_dataset.py
    │   ├── detector.py
    │   ├── external
    │   │   ├── .gitignore
    │   │   ├── Makefile
    │   │   ├── __init__.py
    │   │   ├── nms.pyx
    │   │   └── setup.py
    │   ├── logger.py
    │   ├── model
    │   │   ├── data_parallel.py
    │   │   ├── decode.py
    │   │   ├── losses.py
    │   │   ├── model.py
    │   │   ├── networks
    │   │   │   ├── backbones
    │   │   │   │   ├── dla.py
    │   │   │   │   ├── mobilenet.py
    │   │   │   │   └── resnet.py
    │   │   │   ├── base_model.py
    │   │   │   ├── dla.py
    │   │   │   ├── dlav0.py
    │   │   │   ├── generic_network.py
    │   │   │   ├── necks
    │   │   │   │   ├── dlaup.py
    │   │   │   │   └── msraup.py
    │   │   │   ├── resdcn.py
    │   │   │   └── resnet.py
    │   │   ├── scatter_gather.py
    │   │   └── utils.py
    │   ├── opts.py
    │   ├── trainer.py
    │   └── utils
    │   │   ├── __init__.py
    │   │   ├── ddd_utils.py
    │   │   ├── debugger.py
    │   │   ├── image.py
    │   │   ├── post_process.py
    │   │   ├── tracker.py
    │   │   └── utils.py
    ├── main.py
    ├── test.py
    └── tools
    │   ├── _init_paths.py
    │   ├── annot_bbox.py
    │   ├── convert_crowdhuman_to_coco.py
    │   ├── convert_kittitrack_to_coco.py
    │   ├── convert_mot_det_to_results.py
    │   ├── convert_mot_to_coco.py
    │   ├── convert_nuScenes.py
    │   ├── eval_kitti_track
    │       ├── data
    │       │   └── tracking
    │       │   │   ├── evaluate_tracking.seqmap
    │       │   │   ├── evaluate_tracking.seqmap.test
    │       │   │   ├── evaluate_tracking.seqmap.training
    │       │   │   ├── evaluate_trackingtrain_1-2.seqmap
    │       │   │   ├── evaluate_trackingtrain_2-2.seqmap
    │       │   │   ├── evaluate_trackingval_half.seqmap
    │       │   │   ├── label_02
    │       │   │       ├── 0000.txt
    │       │   │       ├── 0001.txt
    │       │   │       ├── 0002.txt
    │       │   │       ├── 0003.txt
    │       │   │       ├── 0004.txt
    │       │   │       ├── 0005.txt
    │       │   │       ├── 0006.txt
    │       │   │       ├── 0007.txt
    │       │   │       ├── 0008.txt
    │       │   │       ├── 0009.txt
    │       │   │       ├── 0010.txt
    │       │   │       ├── 0011.txt
    │       │   │       ├── 0012.txt
    │       │   │       ├── 0013.txt
    │       │   │       ├── 0014.txt
    │       │   │       ├── 0015.txt
    │       │   │       ├── 0016.txt
    │       │   │       ├── 0017.txt
    │       │   │       ├── 0018.txt
    │       │   │       ├── 0019.txt
    │       │   │       └── 0020.txt
    │       │   │   ├── label_02_train_half
    │       │   │       ├── 0000.txt
    │       │   │       ├── 0001.txt
    │       │   │       ├── 0002.txt
    │       │   │       ├── 0003.txt
    │       │   │       ├── 0004.txt
    │       │   │       ├── 0005.txt
    │       │   │       ├── 0006.txt
    │       │   │       ├── 0007.txt
    │       │   │       ├── 0008.txt
    │       │   │       ├── 0009.txt
    │       │   │       ├── 0010.txt
    │       │   │       ├── 0011.txt
    │       │   │       ├── 0012.txt
    │       │   │       ├── 0013.txt
    │       │   │       ├── 0014.txt
    │       │   │       ├── 0015.txt
    │       │   │       ├── 0016.txt
    │       │   │       ├── 0017.txt
    │       │   │       ├── 0018.txt
    │       │   │       ├── 0019.txt
    │       │   │       └── 0020.txt
    │       │   │   └── label_02_val_half
    │       │   │       ├── 0000.txt
    │       │   │       ├── 0001.txt
    │       │   │       ├── 0002.txt
    │       │   │       ├── 0003.txt
    │       │   │       ├── 0004.txt
    │       │   │       ├── 0005.txt
    │       │   │       ├── 0006.txt
    │       │   │       ├── 0007.txt
    │       │   │       ├── 0008.txt
    │       │   │       ├── 0009.txt
    │       │   │       ├── 0010.txt
    │       │   │       ├── 0011.txt
    │       │   │       ├── 0012.txt
    │       │   │       ├── 0013.txt
    │       │   │       ├── 0014.txt
    │       │   │       ├── 0015.txt
    │       │   │       ├── 0016.txt
    │       │   │       ├── 0017.txt
    │       │   │       ├── 0018.txt
    │       │   │       ├── 0019.txt
    │       │   │       └── 0020.txt
    │       ├── evaluate_tracking.py
    │       ├── mailpy.py
    │       └── munkres.py
    │   ├── eval_motchallenge.py
    │   ├── get_mot_17.sh
    │   ├── nuScenes_lib
    │       ├── export_kitti.py
    │       └── utils_kitti.py
    │   ├── remove_optimizers.py
    │   ├── vis_tracking_kitti.py
    │   └── vis_tracking_mot.py
└── videos
    └── nuscenes_mini.mp4


/.gitignore:
--------------------------------------------------------------------------------
  1 | videos/
  2 | *.zip
  3 | centernet_models/*
  4 | centertrack_models/*
  5 | */slurm-*.out
  6 | src/slurm/
  7 | results/*
  8 | src/lib/models/networks/DCNv2
  9 | src/lib/models/networks/DCNv2_04
 10 | src/lib/models/networks/DCNv2_10
 11 | src/lib/model/networks/DCNv2
 12 | src/lib/model/networks/DCNv2_04
 13 | src/lib/model/networks/DCNv2_10
 14 | .idea/
 15 | legacy/*
 16 | models/*
 17 | .DS_Store
 18 | debug/*
 19 | *.DS_Store
 20 | data
 21 | !src/tools/eval_kitti_track/data
 22 | exp
 23 | exp/*
 24 | *.json
 25 | *.mat
 26 | models/*
 27 | model/*
 28 | src/.vscode/*
 29 | src/paths.py
 30 | preds/*
 31 | *.h5
 32 | *.pth
 33 | *.checkpoint
 34 | # Byte-compiled / optimized / DLL files
 35 | __pycache__/
 36 | *.py[cod]
 37 | *$py.class
 38 | 
 39 | # C extensions
 40 | *.so
 41 | 
 42 | # Distribution / packaging
 43 | .Python
 44 | env/
 45 | build/
 46 | develop-eggs/
 47 | dist/
 48 | downloads/
 49 | eggs/
 50 | .eggs/
 51 | lib64/
 52 | parts/
 53 | sdist/
 54 | var/
 55 | wheels/
 56 | *.egg-info/
 57 | .installed.cfg
 58 | *.egg
 59 | 
 60 | # PyInstaller
 61 | #  Usually these files are written by a python script from a template
 62 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 63 | *.manifest
 64 | *.spec
 65 | 
 66 | # Installer logs
 67 | pip-log.txt
 68 | pip-delete-this-directory.txt
 69 | 
 70 | # Unit test / coverage reports
 71 | htmlcov/
 72 | .tox/
 73 | .coverage
 74 | .coverage.*
 75 | .cache
 76 | nosetests.xml
 77 | coverage.xml
 78 | *.cover
 79 | .hypothesis/
 80 | 
 81 | # Translations
 82 | *.mo
 83 | *.pot
 84 | 
 85 | # Django stuff:
 86 | *.log
 87 | local_settings.py
 88 | 
 89 | # Flask stuff:
 90 | instance/
 91 | .webassets-cache
 92 | 
 93 | # Scrapy stuff:
 94 | .scrapy
 95 | 
 96 | # Sphinx documentation
 97 | docs/_build/
 98 | 
 99 | # PyBuilder
100 | target/
101 | 
102 | # Jupyter Notebook
103 | .ipynb_checkpoints
104 | 
105 | # pyenv
106 | .python-version
107 | 
108 | # celery beat schedule file
109 | celerybeat-schedule
110 | 
111 | # SageMath parsed files
112 | *.sage.py
113 | 
114 | # dotenv
115 | .env
116 | 
117 | # virtualenv
118 | .venv
119 | venv/
120 | ENV/
121 | 
122 | # Spyder project settings
123 | .spyderproject
124 | .spyproject
125 | 
126 | # Rope project settings
127 | .ropeproject
128 | 
129 | # mkdocs documentation
130 | /site
131 | 
132 | # mypy
133 | .mypy_cache/
134 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
 1 | [submodule "nuscenes-devkit"]
 2 | 	path = src/tools/nuscenes-devkit
 3 | 	url = https://github.com/nutonomy/nuscenes-devkit
 4 | 	branch = master
 5 | 
 6 | [submodule "nuscenes-devkit-alpha02"]
 7 | 	path = src/tools/nuscenes-devkit-alpha02
 8 | 	url = https://github.com/nutonomy/nuscenes-devkit
 9 | 	branch = e2d8c4b331567dc0bc36271dc21cdef65970eb7e
10 | 
11 | [submodule "DCN-v2"]
12 | 	path = src/lib/model/networks/DCNv2
13 | 	url = https://github.com/CharlesShang/DCNv2/
14 | 	branch = master


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Xingyi Zhou
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Tracking Objects as Points
  2 | Simultaneous object detection and tracking using center points:
  3 | ![](readme/fig2.png)
  4 | > [**Tracking Objects as Points**](http://arxiv.org/abs/2004.01177),            
  5 | > Xingyi Zhou, Vladlen Koltun, Philipp Kr&auml;henb&uuml;hl,        
  6 | > *arXiv technical report ([arXiv 2004.01177](http://arxiv.org/abs/2004.01177))*  
  7 | 
  8 | 
  9 |     @article{zhou2020tracking,
 10 |       title={Tracking Objects as Points},
 11 |       author={Zhou, Xingyi and Koltun, Vladlen and Kr{\"a}henb{\"u}hl, Philipp},
 12 |       journal={ECCV},
 13 |       year={2020}
 14 |     }
 15 | 
 16 | Contact: [zhouxy2017@gmail.com](mailto:zhouxy2017@gmail.com). Any questions or discussion are welcome! 
 17 | 
 18 | ## Abstract
 19 | Tracking has traditionally been the art of following interest points through space and time. This changed with the rise of powerful deep networks. Nowadays, tracking is dominated by pipelines that perform object detection followed by temporal association, also known as tracking-by-detection. In this paper, we present a simultaneous detection and tracking algorithm that is simpler, faster, and more accurate than the state of the art. Our tracker, CenterTrack, applies a detection model to a pair of images and detections from the prior frame. Given this minimal input, CenterTrack localizes objects and predicts their associations with the previous frame. That's it. CenterTrack is simple, online (no peeking into the future), and real-time. It achieves 67.3% MOTA on the MOT17 challenge at 22 FPS and 89.4% MOTA on the KITTI tracking benchmark at 15 FPS, setting a new state of the art on both datasets. CenterTrack is easily extended to monocular 3D tracking by regressing additional 3D attributes. Using monocular video input, it achieves 28.3% AMOTA@0.2 on the newly released nuScenes 3D tracking benchmark, substantially outperforming the monocular baseline on this benchmark while running at 28 FPS.
 20 | 
 21 | 
 22 | ## Features at a glance
 23 | 
 24 | - One-sentence method summary: Our model takes the current frame, the previous frame, and a heatmap rendered from previous tracking results as input, and predicts the current detection heatmap as well as their offsets to centers in the previous frame.
 25 | 
 26 | - The model can be trained on still **image datasets** if videos are not available.
 27 | 
 28 | - Easily extends to monocular 3d object tracking, multi-category tracking, and pose tracking.
 29 | 
 30 | - State-of-the-art performance on MOT17, KITTI, and nuScenes monocular tracking benchmarks.
 31 | 
 32 | ## Main results
 33 | 
 34 | ### Pedestrian tracking on MOT17 test set
 35 | 
 36 | | Detection    |  MOTA     | FPS    |
 37 | |--------------|-----------|--------|
 38 | |Public        | 61.5      |  22    |
 39 | |Private       | 67.8      |  22    |
 40 | 
 41 | ### 2D vehicle tracking on KITTI test set (with flip test)
 42 | 
 43 | |  MOTA       |  FPS   |
 44 | |-------------|--------|
 45 | | 89.44       |   15   |
 46 | 
 47 | ### 3D tracking on nuScenes test set
 48 | 
 49 | |  AMOTA @ 0.2  |  AMOTA  |  FPS   |
 50 | |---------------|---------|--------|
 51 | | 27.8          |    4.6  | 28     |
 52 | 
 53 | Besides benchmark evaluation, we also provide models for 80-category tracking and pose tracking trained on COCO. See the sample visual results below (Video files from [openpose](https://github.com/CMU-Perceptual-Computing-Lab/openpose) and [YOLO](https://pjreddie.com/darknet/yolov2/)).
 54 | 
 55 | <p align="center"> <img src='readme/coco_det.gif' align="center" height="230px"> </p> 
 56 | 
 57 | <p align="center"> <img src='readme/coco_pose.gif' align="center" height="230px"> </p>
 58 | 
 59 | All models and details are available in our [Model zoo](readme/MODEL_ZOO.md).
 60 | 
 61 | ## Installation
 62 | 
 63 | Please refer to [INSTALL.md](readme/INSTALL.md) for installation instructions.
 64 | 
 65 | ## Use CenterTrack
 66 | 
 67 | We support demo for videos, webcam, and image folders. 
 68 | 
 69 | First, download the models (By default, [nuscenes\_3d\_tracking](https://drive.google.com/file/d/1gPQFzqneDtT_PjJRRuyskRsNTRHXovw1) for monocular 3D tracking, [coco_tracking](https://drive.google.com/file/d/11DEfWa0TKYzNqY3CXR51WVvjMb4oRl08) for 80-category detection and 
 70 | [coco_pose_tracking](https://drive.google.com/file/d/1yGFC_Q9wzSHL1d4eZW_44EBB2H42YKYt) for pose tracking) 
 71 | from the [Model zoo](readme/MODEL_ZOO.md) and put them in `CenterNet_ROOT/models/`.
 72 | 
 73 | We provide a video clip from the [nuScenes dataset](https://www.nuscenes.org/?externalData=all&mapData=all&modalities=Any) in `videos/nuscenes_mini.mp4`.
 74 | To test monocular 3D tracking on this video, run
 75 | 
 76 | ~~~
 77 | python demo.py tracking,ddd --load_model ../models/nuScenes_3Dtracking.pth --dataset nuscenes --pre_hm --track_thresh 0.1 --demo ../videos/nuscenes_mini.mp4 --test_focal_length 633
 78 | ~~~
 79 | 
 80 | You will need to specify `test_focal_length` for monocular 3D tracking demo to convert the image coordinate system back to 3D.
 81 | The value `633` is half of a typical focal length (`~1266`) in nuScenes dataset in input resolution `1600x900`.
 82 | The mini demo video is in an input resolution of `800x448`, so we need to use a half focal length.
 83 | You don't need to set the `test_focal_length` when testing on the original nuScenes data.
 84 | 
 85 | If setup correctly, you will see an output video like:
 86 | 
 87 | <p align="center"> <img src='readme/nuscenes_3d.gif' align="center" height="230px"> </p>
 88 | 
 89 | 
 90 | Similarly, for 80-category tracking on images/ video, run:
 91 | 
 92 | ~~~
 93 | python demo.py tracking --load_model ../models/coco_tracking.pth --demo /path/to/image/or/folder/or/video 
 94 | ~~~
 95 | 
 96 | If you want to test with person tracking models, you need to add `--num_class 1`:
 97 | 
 98 | ~~~
 99 | python demo.py tracking --load_model ../models/mot17_half.pth --num_class 1 --demo /path/to/image/or/folder/or/video 
100 | ~~~
101 | 
102 | For webcam demo, run     
103 | 
104 | ~~~
105 | python demo.py tracking --load_model ../models/coco_tracking.pth --demo webcam 
106 | ~~~
107 | 
108 | For monocular 3D tracking, run 
109 | 
110 | ~~~
111 | python demo.py tracking,ddd --demo webcam --load_model ../models/coco_tracking.pth --demo /path/to/image/or/folder/or/video/or/webcam 
112 | ~~~
113 | 
114 | Similarly, for pose tracking, run:
115 | 
116 | ~~~
117 | python demo.py tracking,multi_pose --load_model ../models/coco_pose.pth --demo /path/to/image/or/folder/or/video/or/webcam 
118 | ~~~
119 | The result for the example images should look like:
120 | 
121 | You can add `--debug 2` to visualize the heatmap and offset predictions.
122 | 
123 | To use this CenterTrack in your own project, you can 
124 | 
125 | ~~~
126 | import sys
127 | CENTERTRACK_PATH = /path/to/CenterTrack/src/lib/
128 | sys.path.insert(0, CENTERTRACK_PATH)
129 | 
130 | from detector import Detector
131 | from opts import opts
132 | 
133 | MODEL_PATH = /path/to/model
134 | TASK = 'tracking' # or 'tracking,multi_pose' for pose tracking and 'tracking,ddd' for monocular 3d tracking
135 | opt = opts().init('{} --load_model {}'.format(TASK, MODEL_PATH).split(' '))
136 | detector = Detector(opt)
137 | 
138 | images = ['''image read from open cv or from a video''']
139 | for img in images:
140 |   ret = detector.run(img)['results']
141 | ~~~
142 | Each `ret` will be a list dict: `[{'bbox': [x1, y1, x2, y2], 'tracking_id': id, ...}]`
143 | 
144 | ## Training on custom dataset
145 | 
146 | If you want to train CenterTrack on your own dataset, you can use `--dataset custom` and manually specify the annotation file, image path, input resolutions, and number of categories. You still need to create the annotation files in COCO format (referring to the many `convert_X_to_coco.py` examples in `tools`). For example, you can use the following command to train on our [mot17 experiment](experiments/mot17_half_sc.sh) without using the pre-defined mot dataset file:
147 | 
148 | ~~~
149 | python main.py tracking --exp_id mot17_half_sc --dataset custom --custom_dataset_ann_path ../data/mot17/annotations/train_half.json --custom_dataset_img_path ../data/mot17/train/ --input_h 544 --input_w 960 --num_classes 1 --pre_hm --ltrb_amodal --same_aug --hm_disturb 0.05 --lost_disturb 0.4 --fp_disturb 0.1 --gpus 0,1
150 | 
151 | ~~~
152 | 
153 | ## Benchmark Evaluation and Training
154 | 
155 | After [installation](readme/INSTALL.md), follow the instructions in [DATA.md](readme/DATA.md) to setup the datasets. Then check [GETTING_STARTED.md](readme/GETTING_STARTED.md) to reproduce the results in the paper.
156 | We provide scripts for all the experiments in the [experiments](experiments) folder.
157 | 
158 | ## License
159 | 
160 | CenterTrack is developed upon [CenterNet](https://github.com/xingyizhou/CenterNet). Both codebases are released under MIT License themselves. Some code of CenterNet are from third-parties with different licenses, please check the CenterNet repo for details. In addition, this repo uses [py-motmetrics](https://github.com/cheind/py-motmetrics) for MOT evaluation and [nuscenes-devkit](https://github.com/nutonomy/nuscenes-devkit) for nuScenes evaluation and preprocessing. See [NOTICE](NOTICE) for detail. Please note the licenses of each dataset. Most of the datasets we used in this project are under non-commercial licenses.
161 | 
162 | 


--------------------------------------------------------------------------------
/experiments/coco_pose_tracking.sh:
--------------------------------------------------------------------------------
1 | cd src
2 | # train, the model is finetuned from a CenterNet detection model from the CenterNet model zoo.
3 | python main.py tracking,multi_pose --exp_id coco_pose_tracking --dataset coco_hp --load_model ../models/multi_pose_dla_3x.pth --gpus 0,1,2,3,4,5,6,7 --batch_size 128 --lr 5e-4 --num_workers 16 --pre_hm --shift 0.05 --scale 0.05 --hm_disturb 0.05 --lost_disturb 0.4 --fp_disturb 0.1


--------------------------------------------------------------------------------
/experiments/coco_tracking.sh:
--------------------------------------------------------------------------------
1 | cd src
2 | # train, the model is finetuned from a CenterNet detection model from the CenterNet model zoo.
3 | python main.py tracking --exp_id coco_tracking --tracking --load_model ../models/ctdet_coco_dla_2x.pth  --gpus 0,1,2,3,4,5,6,7 --batch_size 128 --lr 5e-4 --num_workers 16 --pre_hm --shift 0.05 --scale 0.05 --hm_disturb 0.05 --lost_disturb 0.4 --fp_disturb 0.1


--------------------------------------------------------------------------------
/experiments/crowdhuman.sh:
--------------------------------------------------------------------------------
1 | cd src
2 | # train
3 | python main.py tracking --exp_id crowdhuman --dataset crowdhuman --ltrb_amodal --pre_hm --shift 0.05 --scale 0.05 --hm_disturb 0.05 --lost_disturb 0.4 --fp_disturb 0.1 --num_epochs 140 --lr_step 90,120 --save_point 60,90 --gpus 0,1,2,3 --batch_size 64 --lr 2.5e-4 --num_workers 16
4 | cd ..


--------------------------------------------------------------------------------
/experiments/kitti_fulltrain.sh:
--------------------------------------------------------------------------------
1 | cd src
2 | # train
3 | python main.py tracking --exp_id kitti_fulltrain --dataset kitti_tracking --dataset_version train --pre_hm --same_aug --hm_disturb 0.05 --lost_disturb 0.2 --fp_disturb 0.1 --gpus 0,1 --batch_size 16 --load_model ../models/nuScenes_3Ddetection_e140.pth
4 | # test
5 | python test.py tracking --exp_id kitti_fulltrain --dataset kitti_tracking --dataset_version test --pre_hm --track_thresh 0.4 --resume
6 | 


--------------------------------------------------------------------------------
/experiments/kitti_half.sh:
--------------------------------------------------------------------------------
1 | cd src
2 | # train
3 | python main.py tracking --exp_id kitti_half --dataset kitti_tracking --dataset_version train_half --pre_hm --same_aug --hm_disturb 0.05 --lost_disturb 0.2 --fp_disturb 0.1 --gpus 0,1 --batch_size 16 --load_model ../models/nuScenes_3Ddetection_e140.pth
4 | # test
5 | python test.py tracking --exp_id kitti_half --dataset kitti_tracking --dataset_version val_half --pre_hm --track_thresh 0.4 --resume


--------------------------------------------------------------------------------
/experiments/kitti_half_sc.sh:
--------------------------------------------------------------------------------
1 | cd src
2 | # train
3 | python main.py tracking --exp_id kitti_half_sc --dataset kitti_tracking --dataset_version train_half --pre_hm --same_aug --hm_disturb 0.05 --lost_disturb 0.2 --fp_disturb 0.1 --gpus 0,1 --batch_size 16
4 | # test
5 | python test.py tracking --exp_id kitti_half_sc --dataset kitti_tracking --dataset_version val_half --pre_hm --track_thresh 0.4 --pre_thresh 0.5 --resume


--------------------------------------------------------------------------------
/experiments/mot17_fulltrain.sh:
--------------------------------------------------------------------------------
1 | cd src
2 | # train
3 | python main.py tracking --exp_id mot17_fulltrain --dataset mot --dataset_version 17trainval --pre_hm --ltrb_amodal --same_aug --hm_disturb 0.05 --lost_disturb 0.4 --fp_disturb 0.1 --gpus 0,1 --load_model ../models/crowdhuman.pth
4 | # test
5 | python test.py tracking --exp_id mot17_fulltrain --dataset mot --dataset_version 17test --pre_hm --ltrb_amodal --track_thresh 0.4 --pre_thresh 0.5 --resume
6 | cd ..


--------------------------------------------------------------------------------
/experiments/mot17_fulltrain_sc.sh:
--------------------------------------------------------------------------------
1 | cd src
2 | # train
3 | python main.py tracking --exp_id mot17_fulltrain_sc --dataset mot --dataset_version 17trainval --pre_hm --ltrb_amodal --same_aug --hm_disturb 0.05 --lost_disturb 0.4 --fp_disturb 0.1 --gpus 0,1
4 | # test
5 | python test.py tracking --exp_id mot17_fulltrain_sc --dataset mot --dataset_version 17test --pre_hm --ltrb_amodal --track_thresh 0.4 --pre_thresh 0.5 --resume
6 | # test with public detection
7 | python test.py tracking --exp_id mot17_fulltrain_sc --dataset mot --dataset_version 17test --pre_hm --ltrb_amodal --track_thresh 0.4 --pre_thresh 0.5 --resume --public_det --load_results ../data/mot17/results/test_det.json
8 | cd ..


--------------------------------------------------------------------------------
/experiments/mot17_half.sh:
--------------------------------------------------------------------------------
1 | cd src
2 | # train
3 | python main.py tracking --exp_id mot17_half --dataset mot --dataset_version 17halftrain --pre_hm --ltrb_amodal --same_aug --hm_disturb 0.05 --lost_disturb 0.4 --fp_disturb 0.1 --gpus 0,1 --load_model ../models/crowdhuman.pth
4 | # test
5 | python test.py tracking --exp_id mot17_half --dataset mot --dataset_version 17halfval --pre_hm --ltrb_amodal --track_thresh 0.4 --pre_thresh 0.5 --resume
6 | # test with public detection
7 | python test.py tracking --exp_id mot17_half --dataset mot --dataset_version 17halfval --pre_hm --ltrb_amodal --track_thresh 0.4 --pre_thresh 0.5 --resume --public_det --load_results ../data/mot17/results/val_half_det.json
8 | cd ..


--------------------------------------------------------------------------------
/experiments/mot17_half_sc.sh:
--------------------------------------------------------------------------------
1 | cd src
2 | # train
3 | python main.py tracking --exp_id mot17_half_sc --dataset mot --dataset_version 17halftrain --pre_hm --ltrb_amodal --same_aug --hm_disturb 0.05 --lost_disturb 0.4 --fp_disturb 0.1 --gpus 0,1
4 | # test
5 | python test.py tracking --exp_id mot17_half_sc --dataset mot --dataset_version 17halfval --pre_hm --ltrb_amodal --track_thresh 0.4 --pre_thresh 0.5 --resume
6 | cd ..


--------------------------------------------------------------------------------
/experiments/nuScenes_3Ddetection_e140.sh:
--------------------------------------------------------------------------------
1 | cd src
2 | # train 
3 | python main.py ddd --exp_id nuScenes_3Ddetection_e140 --dataset nuscenes --batch_size 128 --gpus 0,1,2,3,4,5,6,7 --lr 5e-4 --num_epochs 140 --lr_step 90,120 --save_point 90,120
4 | # test
5 | python test.py ddd --exp_id nuScenes_3Ddetection_e140 --dataset nuscenes --resume
6 | cd ..


--------------------------------------------------------------------------------
/experiments/nuScenes_3Dtracking.sh:
--------------------------------------------------------------------------------
1 | cd src
2 | # train 
3 | python main.py tracking,ddd --exp_id nuScenes_3Dtracking --dataset nuscenes --pre_hm --load_model ../models/nuScenes_3Ddetection_e140.pth --shift 0.01 --scale 0.05 --lost_disturb 0.4 --fp_disturb 0.1 --hm_disturb 0.05 --batch_size 64 --gpus 0,1,2,3 --lr 2.5e-4 --save_point 60
4 | # test
5 | python test.py tracking,ddd --exp_id nuScenes_3Dtracking --dataset nuscenes --pre_hm --track_thresh 0.1 --resume
6 | cd ..


--------------------------------------------------------------------------------
/readme/DATA.md:
--------------------------------------------------------------------------------
  1 | # Dataset preparation
  2 | 
  3 | If you want to reproduce the results in the paper for benchmark evaluation or training, you will need to setup datasets.
  4 | 
  5 | ### MOT 2017
  6 | 
  7 | MOT is is used to train and evaluate the system. We will only use the training set (and create a validation set from it) for developing this project.
  8 | 
  9 | We have packed the dataset preprocessing code as a script.
 10 | 
 11 | ~~~
 12 |     cd $CenterTrack_ROOT/tools/
 13 |     bash get_mot_17.sh
 14 | ~~~
 15 | 
 16 | The script includes:
 17 | 
 18 | - Download and unzip the dataset from [MOT17 website](https://motchallenge.net/data/MOT17/).
 19 | - Convert it into COCO format using `tools/convert_mot_to_coco.py`.
 20 | - Create the half-half train/ val set described in the paper.
 21 | - Convert the public detection into a specific format.
 22 | - The output data structure should be:
 23 | 
 24 |   ~~~
 25 |   ${CenterTrack_ROOT}
 26 |   |-- data
 27 |   `-- |-- mot17
 28 |       `-- |--- train
 29 |           |   |--- MOT17-02-FRCNN
 30 |           |   |    |--- img1
 31 |           |   |    |--- gt
 32 |           |   |    |   |--- gt.txt
 33 |           |   |    |   |--- gt_train_half.txt
 34 |           |   |    |   |--- gt_val_half.txt
 35 |           |   |    |--- det
 36 |           |   |    |   |--- det.txt
 37 |           |   |    |   |--- det_train_half.txt
 38 |           |   |    |   |--- det_val_half.txt
 39 |           |   |--- ...
 40 |           |--- test
 41 |           |   |--- MOT17-01-FRCNN
 42 |           |---|--- ...
 43 |           `---| annotations
 44 |               |--- train_half.json
 45 |               |--- val_half.json
 46 |               |--- train.json
 47 |               `--- test.json
 48 |   ~~~
 49 | 
 50 | ### KITTI Tracking
 51 | 
 52 | We use KITTI Tracking to train and evaluate the system as well. Again, we will only use the training set (and create a validation set from it) for developing this project. Note that KITTI Tracking is 2D tracking and is different from KITTI detection (they use the same image, but different train/ val set).
 53 | 
 54 | - Download [images](http://www.cvlibs.net/download.php?file=data_tracking_image_2.zip), [annotations](http://www.cvlibs.net/download.php?file=data_tracking_label_2.zip), and [calibration information](http://www.cvlibs.net/download.php?file=data_tracking_calib.zip) (not used in 2D tracking, only if you want to demo 3D detection/ tracking) from [KITTI Tracking website](http://www.cvlibs.net/datasets/kitti/eval_tracking.php) and unzip. Place or symlink the data as below:
 55 | 
 56 |   ~~~
 57 |   ${CenterTrack_ROOT}
 58 |   |-- data
 59 |   `-- |-- kitti_tracking
 60 |       `-- |-- data_tracking_image_2
 61 |           |   |-- training
 62 |           |   |-- |-- image_02
 63 |           |   |-- |-- |-- 0000
 64 |           |   |-- |-- |-- ...
 65 |           |-- |-- testing
 66 |           |-- label_02
 67 |           |   |-- 0000.txt
 68 |           |   |-- ...
 69 |           `-- data_tracking_calib
 70 |   ~~~
 71 | 
 72 | - Run `python convert_kitti_to_coco.py` in `tools` to convert the annotation into COCO format. 
 73 | - The resulting data structure should look like:
 74 | 
 75 |   ~~~
 76 |   ${CenterTrack_ROOT}
 77 |   |-- data
 78 |   `-- |-- kitti_tracking
 79 |       `-- |-- data_tracking_image_2
 80 |           |   |-- training
 81 |           |   |   |-- image_02
 82 |           |   |   |   |-- 0000
 83 |           |   |   |   |-- ...
 84 |           |-- |-- testing
 85 |           |-- label_02
 86 |           |   |-- 0000.txt
 87 |           |   |-- ...
 88 |           |-- data_tracking_calib
 89 |           |-- label_02_val_half
 90 |           |   |-- 0000.txt
 91 |           |   |-- ...
 92 |           |-- label_02_train_half
 93 |           |   |-- 0000.txt
 94 |           |   |-- ...
 95 |           `-- annotations
 96 |               |-- tracking_train.json
 97 |               |-- tracking_test.json
 98 |               |-- tracking_train_half.json
 99 |               `-- tracking_val_half.json
100 |   ~~~
101 | 
102 | ### nuScenes
103 | 
104 | nuScenes is used for training and evaluating 3D object tracking. We also used nuScenes for pretraining KITTI models.
105 | 
106 | 
107 | - Download the dataset from [nuScenes website](https://www.nuscenes.org/download). You only need to download the "Keyframe blobs", and only need the images data. You also need to download the maps and all metadata to make the nuScenes API happy.
108 | 
109 | 
110 | - Unzip, rename, and place (or symlink) the data as below. You will need to merge folders from different zip files.
111 | 
112 |   ~~~
113 |   ${CenterTrack_ROOT}
114 |   |-- data
115 |   `-- |-- nuscenes
116 |       `-- |-- v1.0-trainval
117 |           |   |-- samples
118 |           |   |   |-- CAM_BACK
119 |           |   |   |   | -- xxx.jpg
120 |           |   |   |-- CAM_BACK_LEFT
121 |           |   |   |-- CAM_BACK_RIGHT
122 |           |   |   |-- CAM_FRONT
123 |           |   |   |-- CAM_FRONT_LEFT
124 |           |   |   |-- CAM_FRONT_RIGHT
125 |           |-- |-- maps
126 |           `-- |-- v1.0-trainval_meta
127 |   ~~~
128 | 
129 | - Run `python convert_kitti_to_coco.py` in `tools` to convert the annotation into COCO format. It will create `train.json`, `val.json`, `test.json` under `data/nuscenes/annotations`. nuScenes API is required for running the data preprocessing.
130 | 
131 | ### CrowdHuman
132 | 
133 | CrowdHuman is used for pretraining the MOT model. Only the training set is used.
134 | 
135 | - Download the dataset from [its website](https://www.crowdhuman.org/download.html).
136 | 
137 | - Unzip and place (or symlink) the data as below. You will need to merge folders from different zip files.
138 | 
139 |   ~~~
140 |   ${CenterTrack_ROOT}
141 |   |-- data
142 |   `-- |-- crowdhuman
143 |       |-- |-- CrowdHuman_train
144 |       |   |   |-- Images
145 |       |-- |-- CrowdHuman_val
146 |       |   |   |-- Images
147 |       |-- |-- annotation_train.odgt
148 |       |-- |-- annotation_val.odgt
149 |   ~~~
150 | 
151 | - Run `python convert_kitti_to_coco.py` in `tools` to convert the annotation into COCO format. It will create `train.json`, `val.json` under `data/crowdhuman/annotations`.
152 | 
153 | ### COCO
154 | 
155 | COCO is used to train a demo system for 80-category tracking or pose tracking. 
156 | The models are NOT evaluated in any benchmarks.
157 | 
158 | - Download the images (2017 Train, 2017 Val, 2017 Test) from [coco website](http://cocodataset.org/#download).
159 | - Download annotation files (2017 train/val and test image info) from [coco website](http://cocodataset.org/#download). 
160 | - Place the data (or create symlinks) to make the data folder like:
161 | 
162 |   ~~~
163 |   ${CenterTrack_ROOT}
164 |   |-- data
165 |   `-- |-- coco
166 |       `-- |-- annotations
167 |           |   |-- instances_train2017.json
168 |           |   |-- instances_val2017.json
169 |           |   |-- person_keypoints_train2017.json
170 |           |   |-- person_keypoints_val2017.json
171 |           |   |-- image_info_test-dev2017.json
172 |           |---|-- train2017
173 |           |---|-- val2017
174 |           `---|-- test2017
175 |   ~~~
176 | 
177 | 
178 | ## References
179 | Please cite the corresponding References if you use the datasets.
180 | 
181 | ~~~
182 |   @article{MOT16,
183 |     title = {{MOT}16: {A} Benchmark for Multi-Object Tracking},
184 |     shorttitle = {MOT16},
185 |     url = {http://arxiv.org/abs/1603.00831},
186 |     journal = {arXiv:1603.00831 [cs]},
187 |     author = {Milan, A. and Leal-Taix\'{e}, L. and Reid, I. and Roth, S. and Schindler, K.},
188 |     month = mar,
189 |     year = {2016},
190 |     note = {arXiv: 1603.00831},
191 |     keywords = {Computer Science - Computer Vision and Pattern Recognition}
192 |   }
193 | 
194 |   @article{shao2018crowdhuman,
195 |     title={Crowdhuman: A benchmark for detecting human in a crowd},
196 |     author={Shao, Shuai and Zhao, Zijian and Li, Boxun and Xiao, Tete and Yu, Gang and Zhang, Xiangyu and Sun, Jian},
197 |     journal={arXiv:1805.00123},
198 |     year={2018}
199 |   }
200 | 
201 |   @INPROCEEDINGS{Geiger2012CVPR,
202 |     author = {Andreas Geiger and Philip Lenz and Raquel Urtasun},
203 |     title = {Are we ready for Autonomous Driving? The KITTI Vision Benchmark Suite},
204 |     booktitle = {CVPR},
205 |     year = {2012}
206 |   }
207 | 
208 |   @inproceedings{lin2014microsoft,
209 |     title={Microsoft {COCO}: Common objects in context},
210 |     author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
211 |     booktitle={ECCV},
212 |     year={2014},
213 |   }
214 | 
215 |   @inproceedings{nuscenes2019,
216 |   title={{nuScenes}: A multimodal dataset for autonomous driving},
217 |   author={Holger Caesar and Varun Bankiti and Alex H. Lang and Sourabh Vora and Venice Erin Liong and Qiang Xu and Anush Krishnan and Yu Pan and Giancarlo Baldan and Oscar Beijbom},
218 |   booktitle={CVPR},
219 |   year={2020}
220 |   }
221 | ~~~


--------------------------------------------------------------------------------
/readme/GETTING_STARTED.md:
--------------------------------------------------------------------------------
 1 | # Getting Started
 2 | 
 3 | This document provides tutorials to train and evaluate CenterTrack. Before getting started, make sure you have finished [installation](INSTALL.md) and [dataset setup](DATA.md).
 4 | 
 5 | ## Benchmark evaluation
 6 | 
 7 | First, download the models you want to evaluate from our [model zoo](MODEL_ZOO.md) and put them in `CenterTrack_ROOT/models/`. 
 8 | 
 9 | ### MOT17
10 | 
11 | To test the tracking performance on MOT17 with our pretrained model, run
12 | 
13 | ~~~
14 |  python test.py tracking --exp_id mot17_half --dataset mot --dataset_version 17halfval --pre_hm --ltrb_amodal --track_thresh 0.4 --pre_thresh 0.5 --load_model ../models/mot17_half.pth
15 | ~~~
16 | 
17 | This will give a MOTA of `66.1` if set up correctly. `--pre_hm` is to enable the input heatmap. `--ltrb_amodal` is to use the left, top, right, bottom bounding box representation to enable detecting out-of-image bounding box (We observed this is important for MOT datasets). And `--track_thresh` and `--pre_thresh` are the score threshold for predicting a bounding box ($\theta$ in the paper) and feeding the heatmap to the next frame ($\tau$ in the paper), respectively.
18 | 
19 | To test with public detection, run
20 | 
21 | ~~~
22 |  python test.py tracking --exp_id mot17_half_public --dataset mot --dataset_version 17halfval --pre_hm --ltrb_amodal --track_thresh 0.4 --pre_thresh 0.5 --load_model ../models/mot17_half.pth --public_det --load_results ../data/mot17/results/val_half_det.json
23 | ~~~
24 | 
25 | The expected MOTA is `63.1`.
26 | 
27 | To test on the test set, run
28 | 
29 | ~~~
30 |  python test.py tracking --exp_id mot17_fulltrain_public --dataset mot --dataset_version 17test --pre_hm --ltrb_amodal --track_thresh 0.4 --pre_thresh 0.5 --load_model ../models/mot17_fulltrain_sc.pth --public_det --load_results ../data/mot17/results/test_det.json
31 | ~~~
32 | 
33 | The Test set evaluation requires submitting to the official test server.
34 | We discourage the users to submit our predictions to the test set to prevent test set abuse.
35 | You can append `--debug 2` to above commends to visualize the predictions.
36 | 
37 | See the experiments folder for testing in other settings.
38 | 
39 | 
40 | ### KITTI Tracking
41 | 
42 | Run:
43 | 
44 | ~~~
45 | python test.py tracking --exp_id kitti_half --dataset kitti_tracking --dataset_version val_half --pre_hm --track_thresh 0.4 --load_model ../models/kitti_half.pth
46 | ~~~
47 | 
48 | The expected MOTA is `88.7`.
49 | 
50 | ### nuScenes
51 | 
52 | Run:
53 | 
54 | ~~~
55 | python test.py tracking,ddd --exp_id nuScenes_3Dtracking --load_model ../models/nuScenes_3Dtracking.pth --dataset nuscenes --track_thresh 0.1 --pre_hm
56 | ~~~
57 | 
58 | The expected AMOTA is `6.8`.
59 | 
60 | ## Training
61 | We have packed all the training scripts in the [experiments](../experiments) folder.
62 | The experiment names correspond to the model name in the [model zoo](MODEL_ZOO.md).
63 | The number of GPUs for each experiment can be found in the scripts and the model zoo.
64 | If the training is terminated before finishing, you can use the same command with `--resume` to resume training. It will found the latest model with the same `exp_id`.
65 | Some experiments rely on pretraining on another model. In this case, download the pretrained model from our model zoo or train that model first.


--------------------------------------------------------------------------------
/readme/INSTALL.md:
--------------------------------------------------------------------------------
 1 | # Installation
 2 | 
 3 | 
 4 | The code was tested on Ubuntu 16.04, with [Anaconda](https://www.anaconda.com/download) Python 3.6, CUDA 10.0, and [PyTorch]((http://pytorch.org/)) v1.0.
 5 | It should be compatible with PyTorch <=1.4 and python >=0.4 (you will need to switch DCNv2 version for PyTorch <1.0).
 6 | After installing Anaconda:
 7 | 
 8 | 0. [Optional but highly recommended] create a new conda environment. 
 9 | 
10 |     ~~~
11 |     conda create --name CenterTrack python=3.6
12 |     ~~~
13 |     And activate the environment.
14 |     
15 |     ~~~
16 |     conda activate CenterTrack
17 |     ~~~
18 | 
19 | 1. Install PyTorch:
20 | 
21 |     ~~~
22 |     conda install pytorch torchvision -c pytorch
23 |     ~~~
24 |     
25 | 
26 | 2. Install [COCOAPI](https://github.com/cocodataset/cocoapi):
27 | 
28 |     ~~~
29 |     pip install cython; pip install -U 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'
30 |     ~~~
31 | 
32 | 3. Clone this repo:
33 | 
34 |     ~~~
35 |     CenterTrack_ROOT=/path/to/clone/CenterTrack
36 |     git clone --recursive https://github.com/xingyizhou/CenterTrack $CenterTrack_ROOT
37 |     ~~~
38 | 
39 |     You can manually install the [submodules](../.gitmodules) if you forget `--recursive`.
40 | 
41 | 4. Install the requirements
42 | 
43 |     ~~~
44 |     pip install -r requirements.txt
45 |     ~~~
46 |     
47 |     
48 | 5. Compile deformable convolutional (from [DCNv2](https://github.com/CharlesShang/DCNv2/)).
49 | 
50 |     ~~~
51 |     cd $CenterTrack_ROOT/src/lib/model/networks/
52 |     # git clone https://github.com/CharlesShang/DCNv2/ # clone if it is not automatically downloaded by `--recursive`.
53 |     cd DCNv2
54 |     ./make.sh
55 |     ~~~
56 | 
57 | 6. Download pertained models for [monocular 3D tracking](https://drive.google.com/open?id=1e8zR1m1QMJne-Tjp-2iY_o81hn2CiQRt), [80-category tracking](https://drive.google.com/open?id=1tJCEJmdtYIh8VuN8CClGNws3YO7QGd40), or [pose tracking](https://drive.google.com/open?id=1H0YvFYCOIZ06EzAkC2NxECNQGXxK27hH) and move them to `$CenterTrack_ROOT/models/`. More models can be found in [Model zoo](MODEL_ZOO.md).


--------------------------------------------------------------------------------
/readme/MODEL_ZOO.md:
--------------------------------------------------------------------------------
 1 | # MODEL ZOO
 2 | 
 3 | ### Common settings and notes
 4 | 
 5 | - The experiments are run with PyTorch 1.0, CUDA 10.0, and CUDNN 7.5.
 6 | - Training times are measured on our servers with TITAN V GPUs (12 GB Memory).
 7 | - Testing times are measured on our local machine with TITAN Xp GPU. 
 8 | - The models can be downloaded directly from [Google drive](https://drive.google.com/drive/folders/1y_CWlbboW_dfOx6zT9MU4ugLaLc6FEE8).
 9 | 
10 | ## 2D bounding box Tracking
11 | 
12 | ### MOT17
13 | 
14 | | Model                 | GPUs |Train time| Test time | Valication MOTA  | Test MOTA  | Download | 
15 | |-----------------------|------|----------|-----------|------------------|------------|----------|
16 | | [mot17_fulltrain](../experiments/mot17_fulltrain.sh)       |    4 |    4h    | 45ms      |          -       |67.3 (Private Detection)|  [model](https://drive.google.com/file/d/1JYqO_IEoHpd7JEzZRXZSVesnEL4e-tnf)     |
17 | | [mot17_fulltrain_sc](../experiments/mot17_fulltrain_sc.sh)    |    4 |    4h    | 45ms      |          -       |61.4 (Public Detection) | [model](https://drive.google.com/file/d/17rtVMuFOnRzXj0_3egrFI5j-wc8XviDZ)  |
18 | | [mot17_half](../experiments/mot17_half.sh)            |    4 |    2h    | 45ms      |         66.1     |      -     | [model](https://drive.google.com/file/d/1rJ0fzRcpRQPjaN17lcqfKgsz-wJRifHh) |
19 | | [mot17_half_sc](../experiments/mot17_half_sc.sh)         |    4 |    2h    | 45ms      |         60.7     |      -     | [model](https://drive.google.com/file/d/1o_cCo92WiVg8mgwyESd1Gg1AZYnq1iAJ) |
20 | | [crowdhuman](../experiments/crowdhuman.sh)            |    4 |    21h   | 45ms      |         52.2     |      -     |[model](https://drive.google.com/file/d/1SD31FLwbXArcX3LXnRCqh6RF-q38nO7f) |
21 | 
22 | #### Notes
23 | 
24 | - `*_half` corresponds to the half-half video train/ val split mentioned in the paper. 
25 | - `*_fulltrain` corresponds to train on the full training set, and evaluate on the official test server. These models are provided for arXiv and demo purposes. It is highly NOT recommended to submit our predictions to the test server, for not abusing the test set. Usually the validation results are all you need for developing.
26 | - `mot17_half`/ `mot17_fulltrain` are finetuned on the `crowdhuman` model, and `mot17_half_sc`/ `mot17_fulltrain_sc` are trained from ImageNet initialization.
27 | - The validation results are both using private detection.
28 | - All the MOT models are trained for 70 epochs, with learning rate dropped at the 60th epoch. 
29 | - The crowdhuman model is trained on CrowdHuman dataset with the "training on static image data" technic in our paper, and evaluate directly in MOT17 validation set. The crowdhuman pretraining uses 140 epochs, with learning rate dropped at 90 and 140 epochs.
30 | - The training schedules are not well studies.
31 | - We observe about 1 MOTA random noise for MOT models. 
32 | - If the resulting MOTA of your self-trained model is not desired, playing around with the `--track_thresh` and `--pre_thresh` sometimes gives a better number (See Appendix H of the paper). 
33 | - The MOT models, even trained on the full training set, still does not look great for in-the-wild videos. The crowdhuman model is a better choice for real world application. However, be aware that both datasets are in non-commercial licenses.
34 | 
35 | 
36 | ### KITTI 2D Tracking
37 | 
38 | | Model                 |GPUs| Train time| Test time | Validation MOTA  | Test MOTA  | Download  |
39 | |-----------------------|----|-----------|-----------|------------------|------------|-----------|
40 | | [kitti_fulltrain](../experiments/kitti_fulltrain.sh) (flip)| 2  |     9h    |      66   |               -  |   89.44    | [model](https://drive.google.com/file/d/13oUEpeZ8bVQ6z7A6SH88de4SwLgh_kMB)  |
41 | | [kitti_half](../experiments/kitti_half.sh)            | 2  |     4.5h  |      40   |             88.7 |   -        | [model](https://drive.google.com/file/d/1AZiFG0p3VxB2pA_5XIkbue4ASfxaA3e1)  |
42 | | [kitti_half_sc](../experiments/kitti_half_sc.sh)         | 2  |     4.5h  |      40   |             84.5 |   -        | [model](https://drive.google.com/file/d/13rmdfi1rX3X7yFOndzyARTYO51uSNW0Z)|
43 | 
44 | #### Notes
45 | 
46 | - We use flip-test for the model we submitted to the test server (kitti_fulltrain_flip).
47 | - `kitti_fulltrain` are finetuned on the nuScenes_3Ddetection_e140 model (see below). 
48 | - All the models are trained for 70 epochs.
49 | - We observe up to 1.5 MOTA jittering due to randomness. The results are reported for the best model.
50 | 
51 | ## Monocular 3D Detection/ Tracking
52 | 
53 | ### nuScenes
54 | 
55 | | Model                    | GPUs |Train time| Test time | Val AMOTA@0.2 | Val AMOTA | Val mAP |  Download | 
56 | |--------------------------|------|----------|-----------|---------------|-----------|---------|-----------|
57 | | [nuScenes_3Ddetection_e140](../experiments/nuScenes_3Ddetection_e140.sh)| 8    |     72h  |    28ms   |        -      |   -       |  30.27  | [model](https://drive.google.com/file/d/1o989b1tANh49uHhNbsCCJ5J57FGiaFut) |
58 | | [nuScenes_3Dtracking](../experiments/nuScenes_3Dtracking.sh)      | 8    |     40h  |    28ms   |       28.3    |  6.8      |  -      | [model](https://drive.google.com/file/d/1gPQFzqneDtT_PjJRRuyskRsNTRHXovw1) |
59 | 
60 | #### Notes
61 | 
62 | - Both models are trained on our DGX servers with 8x 32G V100 GPUs.
63 | - The 3D detection model is trained on all 6 camera images of the keyframes for 140 epochs. It does not include attributes and velocity prediction and is different from the model we used in the 3D detection leaderboard. See the CenterNet repo for details about the full 3D detection model we used for test set evaluation.
64 | - The 3D tracking model is finetuned on the 3D detection model for 70 epochs. 
65 | - Training on 4 GPUs or 8x 12G GPUs with smaller batchsize is OK, if the [linear learning rate rule](https://arxiv.org/abs/1706.02677) is applied.
66 | 
67 | ## COCO Tracking (for demo purpose only)
68 | 
69 | | Model                 |GPUs| Train time| Test time | Download  |
70 | |-----------------------|----|-----------|-----------|-----------|
71 | | [coco_tracking](../experiments/coco_tracking.sh)         | 8  |     39h   |    30ms   | [model](https://drive.google.com/file/d/11DEfWa0TKYzNqY3CXR51WVvjMb4oRl08)   |
72 | | [coco_pose_tracking](../experiments/coco_pose_tracking.sh)    | 8  |     19h   |    33ms   | [model](https://drive.google.com/file/d/1yGFC_Q9wzSHL1d4eZW_44EBB2H42YKYt)|
73 | 
74 | - Both models are trained with the "training on static image data" technic in our paper.
75 | - The models are not evaluated on any benchmarks since there are no suitable ones in this setting. We provide them for demo purpose only.


--------------------------------------------------------------------------------
/readme/coco_det.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xingyizhou/CenterTrack/e4e7534cc2ebfbd31e0cde680988f286c65fe34f/readme/coco_det.gif


--------------------------------------------------------------------------------
/readme/coco_pose.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xingyizhou/CenterTrack/e4e7534cc2ebfbd31e0cde680988f286c65fe34f/readme/coco_pose.gif


--------------------------------------------------------------------------------
/readme/fig2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xingyizhou/CenterTrack/e4e7534cc2ebfbd31e0cde680988f286c65fe34f/readme/fig2.png


--------------------------------------------------------------------------------
/readme/nuscenes_3d.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xingyizhou/CenterTrack/e4e7534cc2ebfbd31e0cde680988f286c65fe34f/readme/nuscenes_3d.gif


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | opencv-python
 2 | Cython
 3 | numba
 4 | progress
 5 | matplotlib
 6 | easydict
 7 | scipy
 8 | pyquaternion
 9 | nuscenes-devkit
10 | pyyaml
11 | motmetrics
12 | scikit-learn==0.22.2


--------------------------------------------------------------------------------
/src/_init_paths.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | import sys
 3 | 
 4 | def add_path(path):
 5 |     if path not in sys.path:
 6 |         sys.path.insert(0, path)
 7 | 
 8 | this_dir = osp.dirname(__file__)
 9 | 
10 | # Add lib to PYTHONPATH
11 | lib_path = osp.join(this_dir, 'lib')
12 | add_path(lib_path)
13 | 


--------------------------------------------------------------------------------
/src/convert_onnx.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Script to convert a trained CenterNet model to ONNX, currently only
 3 | support non-DCN models.
 4 | '''
 5 | from __future__ import absolute_import
 6 | from __future__ import division
 7 | from __future__ import print_function
 8 | 
 9 | import _init_paths
10 | import os
11 | import json
12 | import cv2
13 | import numpy as np
14 | import time
15 | from progress.bar import Bar
16 | import torch
17 | import copy
18 | 
19 | from model.model import create_model, load_model
20 | from opts import opts
21 | from dataset.dataset_factory import dataset_factory
22 | from detector import Detector
23 | 
24 | 
25 | def convert_onnx(opt):
26 |   os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str
27 |   opt.model_output_list = True
28 |   if opt.gpus[0] >= 0:
29 |     opt.device = torch.device('cuda')
30 |   else:
31 |     opt.device = torch.device('cpu')
32 |   Dataset = dataset_factory[opt.test_dataset]
33 |   opt = opts().update_dataset_info_and_set_heads(opt, Dataset)
34 |   print(opt)
35 |   model = create_model(
36 |       opt.arch, opt.heads, opt.head_conv, opt=opt)
37 |   if opt.load_model != '':
38 |     model = load_model(model, opt.load_model, opt)
39 |   model = model.to(opt.device)
40 |   model.eval()
41 |   dummy_input1 = torch.randn(1, 3, opt.input_h, opt.input_w).to(opt.device)
42 | 
43 |   if opt.tracking:
44 |     dummy_input2 = torch.randn(1, 3, opt.input_h, opt.input_w).to(opt.device)
45 |     if opt.pre_hm:
46 |       dummy_input3 = torch.randn(1, 1, opt.input_h, opt.input_w).to(opt.device)
47 |       torch.onnx.export(
48 |         model, (dummy_input1, dummy_input2, dummy_input3), 
49 |         "../models/{}.onnx".format(opt.exp_id))
50 |     else:
51 |       torch.onnx.export(
52 |         model, (dummy_input1, dummy_input2), 
53 |         "../models/{}.onnx".format(opt.exp_id))
54 |   else:
55 |     torch.onnx.export(
56 |       model, (dummy_input1, ), 
57 |       "../models/{}.onnx".format(opt.exp_id))
58 | if __name__ == '__main__':
59 |   opt = opts().parse()
60 |   convert_onnx(opt)
61 | 
62 | 


--------------------------------------------------------------------------------
/src/demo.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import _init_paths
  6 | 
  7 | import os
  8 | import sys
  9 | import cv2
 10 | import json
 11 | import copy
 12 | import numpy as np
 13 | from opts import opts
 14 | from detector import Detector
 15 | 
 16 | 
 17 | image_ext = ['jpg', 'jpeg', 'png', 'webp']
 18 | video_ext = ['mp4', 'mov', 'avi', 'mkv']
 19 | time_stats = ['tot', 'load', 'pre', 'net', 'dec', 'post', 'merge', 'display']
 20 | 
 21 | def demo(opt):
 22 |   os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str
 23 |   opt.debug = max(opt.debug, 1)
 24 |   detector = Detector(opt)
 25 | 
 26 |   if opt.demo == 'webcam' or \
 27 |     opt.demo[opt.demo.rfind('.') + 1:].lower() in video_ext:
 28 |     is_video = True
 29 |     # demo on video stream
 30 |     cam = cv2.VideoCapture(0 if opt.demo == 'webcam' else opt.demo)
 31 |   else:
 32 |     is_video = False
 33 |     # Demo on images sequences
 34 |     if os.path.isdir(opt.demo):
 35 |       image_names = []
 36 |       ls = os.listdir(opt.demo)
 37 |       for file_name in sorted(ls):
 38 |           ext = file_name[file_name.rfind('.') + 1:].lower()
 39 |           if ext in image_ext:
 40 |               image_names.append(os.path.join(opt.demo, file_name))
 41 |     else:
 42 |       image_names = [opt.demo]
 43 | 
 44 |   # Initialize output video
 45 |   out = None
 46 |   out_name = opt.demo[opt.demo.rfind('/') + 1:]
 47 |   print('out_name', out_name)
 48 |   if opt.save_video:
 49 |     # fourcc = cv2.VideoWriter_fourcc(*'XVID')
 50 |     fourcc = cv2.VideoWriter_fourcc(*'H264')
 51 |     out = cv2.VideoWriter('../results/{}.mp4'.format(
 52 |       opt.exp_id + '_' + out_name),fourcc, opt.save_framerate, (
 53 |         opt.video_w, opt.video_h))
 54 |   
 55 |   if opt.debug < 5:
 56 |     detector.pause = False
 57 |   cnt = 0
 58 |   results = {}
 59 | 
 60 |   while True:
 61 |       if is_video:
 62 |         _, img = cam.read()
 63 |         if img is None:
 64 |           save_and_exit(opt, out, results, out_name)
 65 |       else:
 66 |         if cnt < len(image_names):
 67 |           img = cv2.imread(image_names[cnt])
 68 |         else:
 69 |           save_and_exit(opt, out, results, out_name)
 70 |       cnt += 1
 71 | 
 72 |       # resize the original video for saving video results
 73 |       if opt.resize_video:
 74 |         img = cv2.resize(img, (opt.video_w, opt.video_h))
 75 | 
 76 |       # skip the first X frames of the video
 77 |       if cnt < opt.skip_first:
 78 |         continue
 79 |       
 80 |       cv2.imshow('input', img)
 81 | 
 82 |       # track or detect the image.
 83 |       ret = detector.run(img)
 84 | 
 85 |       # log run time
 86 |       time_str = 'frame {} |'.format(cnt)
 87 |       for stat in time_stats:
 88 |         time_str = time_str + '{} {:.3f}s |'.format(stat, ret[stat])
 89 |       print(time_str)
 90 | 
 91 |       # results[cnt] is a list of dicts:
 92 |       #  [{'bbox': [x1, y1, x2, y2], 'tracking_id': id, 'category_id': c, ...}]
 93 |       results[cnt] = ret['results']
 94 | 
 95 |       # save debug image to video
 96 |       if opt.save_video:
 97 |         out.write(ret['generic'])
 98 |         if not is_video:
 99 |           cv2.imwrite('../results/demo{}.jpg'.format(cnt), ret['generic'])
100 |       
101 |       # esc to quit and finish saving video
102 |       if cv2.waitKey(1) == 27:
103 |         save_and_exit(opt, out, results, out_name)
104 |         return 
105 |   save_and_exit(opt, out, results)
106 | 
107 | 
108 | def save_and_exit(opt, out=None, results=None, out_name=''):
109 |   if opt.save_results and (results is not None):
110 |     save_dir =  '../results/{}_results.json'.format(opt.exp_id + '_' + out_name)
111 |     print('saving results to', save_dir)
112 |     json.dump(_to_list(copy.deepcopy(results)), 
113 |               open(save_dir, 'w'))
114 |   if opt.save_video and out is not None:
115 |     out.release()
116 |   sys.exit(0)
117 | 
118 | def _to_list(results):
119 |   for img_id in results:
120 |     for t in range(len(results[img_id])):
121 |       for k in results[img_id][t]:
122 |         if isinstance(results[img_id][t][k], (np.ndarray, np.float32)):
123 |           results[img_id][t][k] = results[img_id][t][k].tolist()
124 |   return results
125 | 
126 | if __name__ == '__main__':
127 |   opt = opts().init()
128 |   demo(opt)
129 | 


--------------------------------------------------------------------------------
/src/lib/dataset/dataset_factory.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import pycocotools.coco as coco
 6 | from pycocotools.cocoeval import COCOeval
 7 | import numpy as np
 8 | import json
 9 | import os
10 | 
11 | from .datasets.coco import COCO
12 | from .datasets.kitti import KITTI
13 | from .datasets.coco_hp import COCOHP
14 | from .datasets.mot import MOT
15 | from .datasets.nuscenes import nuScenes
16 | from .datasets.crowdhuman import CrowdHuman
17 | from .datasets.kitti_tracking import KITTITracking
18 | from .datasets.custom_dataset import CustomDataset
19 | 
20 | dataset_factory = {
21 |   'custom': CustomDataset,
22 |   'coco': COCO,
23 |   'kitti': KITTI,
24 |   'coco_hp': COCOHP,
25 |   'mot': MOT,
26 |   'nuscenes': nuScenes,
27 |   'crowdhuman': CrowdHuman,
28 |   'kitti_tracking': KITTITracking,
29 | }
30 | 
31 | 
32 | def get_dataset(dataset):
33 |   return dataset_factory[dataset]
34 | 


--------------------------------------------------------------------------------
/src/lib/dataset/datasets/coco.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import pycocotools.coco as coco
  6 | from pycocotools.cocoeval import COCOeval
  7 | import numpy as np
  8 | import json
  9 | import os
 10 | import copy
 11 | 
 12 | from ..generic_dataset import GenericDataset
 13 | 
 14 | class COCO(GenericDataset):
 15 |   default_resolution = [512, 512]
 16 |   num_categories = 80
 17 |   class_name = [
 18 |     'person', 'bicycle', 'car', 'motorcycle', 'airplane',
 19 |     'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
 20 |     'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse',
 21 |     'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack',
 22 |     'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis',
 23 |     'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
 24 |     'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass',
 25 |     'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich',
 26 |     'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake',
 27 |     'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv',
 28 |     'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave',
 29 |     'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
 30 |     'scissors', 'teddy bear', 'hair drier', 'toothbrush']
 31 |   _valid_ids = [
 32 |       1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 
 33 |       14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 
 34 |       24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 
 35 |       37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 
 36 |       48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 
 37 |       58, 59, 60, 61, 62, 63, 64, 65, 67, 70, 
 38 |       72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 
 39 |       82, 84, 85, 86, 87, 88, 89, 90]
 40 |   cat_ids = {v: i + 1 for i, v in enumerate(_valid_ids)}
 41 |   num_joints = 17
 42 |   flip_idx = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], 
 43 |               [11, 12], [13, 14], [15, 16]]
 44 |   edges = [[0, 1], [0, 2], [1, 3], [2, 4], 
 45 |            [4, 6], [3, 5], [5, 6], 
 46 |            [5, 7], [7, 9], [6, 8], [8, 10], 
 47 |            [6, 12], [5, 11], [11, 12], 
 48 |            [12, 14], [14, 16], [11, 13], [13, 15]]
 49 |   max_objs = 128
 50 |   def __init__(self, opt, split):
 51 |     # load annotations
 52 |     data_dir = os.path.join(opt.data_dir, 'coco')
 53 |     img_dir = os.path.join(data_dir, '{}2017'.format(split))
 54 |     if opt.trainval:
 55 |       split = 'test'
 56 |       ann_path = os.path.join(
 57 |           data_dir, 'annotations', 
 58 |           'image_info_test-dev2017.json')
 59 |     else:
 60 |         ann_path = os.path.join(
 61 |           data_dir, 'annotations', 
 62 |           'instances_{}2017.json').format(split)
 63 | 
 64 |     self.images = None
 65 |     # load image list and coco
 66 |     super(COCO, self).__init__(opt, split, ann_path, img_dir)
 67 | 
 68 |     self.num_samples = len(self.images)
 69 | 
 70 |     print('Loaded {} {} samples'.format(split, self.num_samples))
 71 | 
 72 |   def _to_float(self, x):
 73 |     return float("{:.2f}".format(x))
 74 | 
 75 |   def convert_eval_format(self, all_bboxes):
 76 |     detections = []
 77 |     for image_id in all_bboxes:
 78 |       if type(all_bboxes[image_id]) != type({}):
 79 |         # newest format
 80 |         for j in range(len(all_bboxes[image_id])):
 81 |           item = all_bboxes[image_id][j]
 82 |           cat_id = item['class'] - 1
 83 |           category_id = self._valid_ids[cat_id]
 84 |           bbox = item['bbox']
 85 |           bbox[2] -= bbox[0]
 86 |           bbox[3] -= bbox[1]
 87 |           bbox_out  = list(map(self._to_float, bbox[0:4]))
 88 |           detection = {
 89 |               "image_id": int(image_id),
 90 |               "category_id": int(category_id),
 91 |               "bbox": bbox_out,
 92 |               "score": float("{:.2f}".format(item['score']))
 93 |           }
 94 |           detections.append(detection)
 95 |     return detections
 96 | 
 97 |   def __len__(self):
 98 |     return self.num_samples
 99 | 
100 |   def save_results(self, results, save_dir):
101 |     json.dump(self.convert_eval_format(results), 
102 |                 open('{}/results_coco.json'.format(save_dir), 'w'))
103 |   
104 |   def run_eval(self, results, save_dir):
105 |     self.save_results(results, save_dir)
106 |     coco_dets = self.coco.loadRes('{}/results_coco.json'.format(save_dir))
107 |     coco_eval = COCOeval(self.coco, coco_dets, "bbox")
108 |     coco_eval.evaluate()
109 |     coco_eval.accumulate()
110 |     coco_eval.summarize()


--------------------------------------------------------------------------------
/src/lib/dataset/datasets/coco_hp.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import pycocotools.coco as coco
  6 | from pycocotools.cocoeval import COCOeval
  7 | import numpy as np
  8 | import json
  9 | import os
 10 | 
 11 | from ..generic_dataset import GenericDataset
 12 | 
 13 | class COCOHP(GenericDataset):
 14 |   num_categories = 1
 15 |   class_name = ['']
 16 |   num_joints = 17
 17 |   default_resolution = [512, 512]
 18 |   flip_idx = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], 
 19 |               [11, 12], [13, 14], [15, 16]]
 20 |   edges = [[0, 1], [0, 2], [1, 3], [2, 4], 
 21 |                   [4, 6], [3, 5], [5, 6], 
 22 |                   [5, 7], [7, 9], [6, 8], [8, 10], 
 23 |                   [6, 12], [5, 11], [11, 12], 
 24 |                   [12, 14], [14, 16], [11, 13], [13, 15]]
 25 |   max_objs = 32
 26 |   cat_ids = {1: 1}
 27 | 
 28 |   def __init__(self, opt, split):
 29 |     data_dir = os.path.join(opt.data_dir, 'coco')
 30 |     img_dir = os.path.join(data_dir, '{}2017'.format(split))
 31 |     if split == 'test':
 32 |       ann_path = os.path.join(data_dir, 'annotations', 
 33 |           'image_info_test-dev2017.json').format(split)
 34 |     else:
 35 |       ann_path = os.path.join(data_dir, 'annotations', 
 36 |         'person_keypoints_{}2017.json').format(split)
 37 |     
 38 | 
 39 |     self.images = None
 40 |     # load image list and coco
 41 |     super(COCOHP, self).__init__(opt, split, ann_path, img_dir)
 42 | 
 43 |     if split == 'train':
 44 |       image_ids = self.coco.getImgIds()
 45 |       self.images = []
 46 |       for img_id in image_ids:
 47 |         idxs = self.coco.getAnnIds(imgIds=[img_id])
 48 |         if len(idxs) > 0:
 49 |           self.images.append(img_id)
 50 |     
 51 |     self.num_samples = len(self.images)
 52 |     print('Loaded {} {} samples'.format(split, self.num_samples))
 53 | 
 54 |   def _to_float(self, x):
 55 |     return float("{:.2f}".format(x))
 56 | 
 57 |   def convert_eval_format(self, all_bboxes):
 58 |     # import pdb; pdb.set_trace()
 59 |     detections = []
 60 |     for image_id in all_bboxes:
 61 |       if type(all_bboxes[image_id]) != type({}):
 62 |         # newest format
 63 |         for j in range(len(all_bboxes[image_id])):
 64 |           item = all_bboxes[image_id][j]
 65 |           if item['class'] != 1:
 66 |             continue
 67 |           category_id = 1
 68 |           keypoints = np.concatenate([
 69 |             np.array(item['hps'], dtype=np.float32).reshape(-1, 2), 
 70 |             np.ones((17, 1), dtype=np.float32)], axis=1).reshape(51).tolist()
 71 |           detection = {
 72 |               "image_id": int(image_id),
 73 |               "category_id": int(category_id),
 74 |               "score": float("{:.2f}".format(item['score'])),
 75 |               "keypoints": keypoints
 76 |           }
 77 |           if 'bbox' in item:
 78 |             bbox = item['bbox']
 79 |             bbox[2] -= bbox[0]
 80 |             bbox[3] -= bbox[1]
 81 |             bbox_out  = list(map(self._to_float, bbox[0:4]))
 82 |             detection['bbox'] = bbox_out
 83 |           detections.append(detection)
 84 |     return detections
 85 | 
 86 |   def __len__(self):
 87 |     return self.num_samples
 88 | 
 89 |   def save_results(self, results, save_dir):
 90 |     json.dump(self.convert_eval_format(results), 
 91 |               open('{}/results_cocohp.json'.format(save_dir), 'w'))
 92 | 
 93 | 
 94 |   def run_eval(self, results, save_dir):
 95 |     # result_json = os.path.join(opt.save_dir, "results.json")
 96 |     # detections  = convert_eval_format(all_boxes)
 97 |     # json.dump(detections, open(result_json, "w"))
 98 |     self.save_results(results, save_dir)
 99 |     coco_dets = self.coco.loadRes('{}/results_cocohp.json'.format(save_dir))
100 |     coco_eval = COCOeval(self.coco, coco_dets, "keypoints")
101 |     coco_eval.evaluate()
102 |     coco_eval.accumulate()
103 |     coco_eval.summarize()
104 |     coco_eval = COCOeval(self.coco, coco_dets, "bbox")
105 |     coco_eval.evaluate()
106 |     coco_eval.accumulate()
107 |     coco_eval.summarize()
108 | 


--------------------------------------------------------------------------------
/src/lib/dataset/datasets/crowdhuman.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import pycocotools.coco as coco
 6 | from pycocotools.cocoeval import COCOeval
 7 | import numpy as np
 8 | import json
 9 | import os
10 | 
11 | from ..generic_dataset import GenericDataset
12 | 
13 | class CrowdHuman(GenericDataset):
14 |   num_classes = 1
15 |   num_joints = 17
16 |   default_resolution = [512, 512]
17 |   max_objs = 128
18 |   class_name = ['person']
19 |   cat_ids = {1: 1}
20 |   def __init__(self, opt, split):
21 |     super(CrowdHuman, self).__init__()
22 |     data_dir = os.path.join(opt.data_dir, 'crowdhuman')
23 |     img_dir = os.path.join(
24 |       data_dir, 'CrowdHuman_{}'.format(split), 'Images')
25 |     ann_path = os.path.join(data_dir, 'annotations', 
26 |       '{}.json').format(split)
27 | 
28 |     print('==> initializing CityPersons {} data.'.format(split))
29 | 
30 |     self.images = None
31 |     # load image list and coco
32 |     super(CrowdHuman, self).__init__(opt, split, ann_path, img_dir)
33 | 
34 |     self.num_samples = len(self.images)
35 | 
36 |     print('Loaded {} {} samples'.format(split, self.num_samples))
37 | 
38 |   def _to_float(self, x):
39 |     return float("{:.2f}".format(x))
40 | 
41 |   def _save_results(self, records, fpath):
42 |     with open(fpath,'w') as fid:
43 |       for record in records:
44 |         line = json.dumps(record)+'\n'
45 |         fid.write(line)
46 |     return fpath
47 | 
48 |   def convert_eval_format(self, all_bboxes):
49 |     detections = []
50 |     person_id = 1
51 |     for image_id in all_bboxes:
52 |       if type(all_bboxes[image_id]) != type({}):
53 |         # newest format
54 |         dtboxes = []
55 |         for j in range(len(all_bboxes[image_id])):
56 |           item = all_bboxes[image_id][j]
57 |           if item['class'] != person_id:
58 |             continue
59 |           bbox = item['bbox']
60 |           bbox[2] -= bbox[0]
61 |           bbox[3] -= bbox[1]
62 |           bbox_out  = list(map(self._to_float, bbox[0:4]))
63 |           detection = {
64 |               "tag": 1,
65 |               "box": bbox_out,
66 |               "score": float("{:.2f}".format(item['score']))
67 |           }
68 |           dtboxes.append(detection)
69 |       img_info = self.coco.loadImgs(ids=[image_id])[0]
70 |       file_name = img_info['file_name']
71 |       detections.append({'ID': file_name[:-4], 'dtboxes': dtboxes})
72 |     return detections
73 | 
74 |   def __len__(self):
75 |     return self.num_samples
76 | 
77 |   def save_results(self, results, save_dir):
78 |     self._save_results(self.convert_eval_format(results),
79 |                        '{}/results_crowdhuman.odgt'.format(save_dir))
80 |   def run_eval(self, results, save_dir):
81 |     self.save_results(results, save_dir)
82 |     try:
83 |       os.system('python tools/crowdhuman_eval/demo.py ' + \
84 |                 '../data/crowdhuman/annotation_val.odgt ' + \
85 |                 '{}/results_crowdhuman.odgt'.format(save_dir))
86 |     except:
87 |       print('Crowdhuman evaluation not setup!')


--------------------------------------------------------------------------------
/src/lib/dataset/datasets/custom_dataset.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | from ..generic_dataset import GenericDataset
 6 | 
 7 | class CustomDataset(GenericDataset):
 8 |   num_categories = 1
 9 |   default_resolution = [-1, -1]
10 |   class_name = ['']
11 |   max_objs = 128
12 |   cat_ids = {1: 1}
13 |   def __init__(self, opt, split):
14 |     assert (opt.custom_dataset_img_path != '') and \
15 |       (opt.custom_dataset_ann_path != '') and \
16 |       (opt.num_classes != -1) and \
17 |       (opt.input_h != -1) and (opt.input_w != -1), \
18 |       'The following arguments must be specified for custom datasets: ' + \
19 |       'custom_dataset_img_path, custom_dataset_ann_path, num_classes, ' + \
20 |       'input_h, input_w.'
21 |     img_dir = opt.custom_dataset_img_path
22 |     ann_path = opt.custom_dataset_ann_path
23 |     self.num_categories = opt.num_classes
24 |     self.class_name = ['' for _ in range(self.num_categories)]
25 |     self.default_resolution = [opt.input_h, opt.input_w]
26 |     self.cat_ids = {i: i for i in range(1, self.num_categories + 1)}
27 | 
28 |     self.images = None
29 |     # load image list and coco
30 |     super().__init__(opt, split, ann_path, img_dir)
31 | 
32 |     self.num_samples = len(self.images)
33 |     print('Loaded Custom dataset {} samples'.format(self.num_samples))
34 |   
35 |   def __len__(self):
36 |     return self.num_samples
37 | 
38 |   def run_eval(self, results, save_dir):
39 |     pass
40 | 


--------------------------------------------------------------------------------
/src/lib/dataset/datasets/kitti.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import pycocotools.coco as coco
  6 | import numpy as np
  7 | import torch
  8 | import json
  9 | import cv2
 10 | import os
 11 | import math
 12 | 
 13 | from ..generic_dataset import GenericDataset
 14 | from utils.ddd_utils import compute_box_3d, project_to_image
 15 | 
 16 | class KITTI(GenericDataset):
 17 |   num_categories = 3
 18 |   default_resolution = [384, 1280]
 19 |   # ['Pedestrian', 'Car', 'Cyclist', 'Van', 'Truck',  'Person_sitting',
 20 |   #       'Tram', 'Misc', 'DontCare']
 21 |   class_name = ['Pedestrian', 'Car', 'Cyclist']
 22 |   # negative id is for "not as negative sample for abs(id)".
 23 |   # 0 for ignore losses for all categories in the bounding box region
 24 |   cat_ids = {1:1, 2:2, 3:3, 4:-2, 5:-2, 6:-1, 7:-9999, 8:-9999, 9:0}
 25 |   max_objs = 50
 26 |   def __init__(self, opt, split):
 27 |     data_dir = os.path.join(opt.data_dir, 'kitti')
 28 |     img_dir = os.path.join(data_dir, 'images', 'trainval')
 29 |     if opt.trainval:
 30 |       split = 'trainval' if split == 'train' else 'test'
 31 |       img_dir = os.path.join(data_dir, 'images', split)
 32 |       ann_path = os.path.join(
 33 |         data_dir, 'annotations', 'kitti_v2_{}.json').format(split)
 34 |     else:
 35 |       ann_path = os.path.join(data_dir, 
 36 |         'annotations', 'kitti_v2_{}_{}.json').format(opt.kitti_split, split)
 37 | 
 38 |     self.images = None
 39 |     # load image list and coco
 40 |     super(KITTI, self).__init__(opt, split, ann_path, img_dir)
 41 |     self.alpha_in_degree = False
 42 |     self.num_samples = len(self.images)
 43 | 
 44 |     print('Loaded {} {} samples'.format(split, self.num_samples))
 45 | 
 46 | 
 47 |   def __len__(self):
 48 |     return self.num_samples
 49 | 
 50 |   def _to_float(self, x):
 51 |     return float("{:.2f}".format(x))
 52 | 
 53 |   def convert_eval_format(self, all_bboxes):
 54 |     pass
 55 | 
 56 |   def save_results(self, results, save_dir):
 57 |     results_dir = os.path.join(save_dir, 'results_kitti')
 58 |     if not os.path.exists(results_dir):
 59 |       os.mkdir(results_dir)
 60 |     for img_id in results.keys():
 61 |       out_path = os.path.join(results_dir, '{:06d}.txt'.format(img_id))
 62 |       f = open(out_path, 'w')
 63 |       for i in range(len(results[img_id])):
 64 |         item = results[img_id][i]
 65 |         category_id = item['class']
 66 |         cls_name_ind = category_id
 67 |         class_name = self.class_name[cls_name_ind - 1]
 68 |         if not ('alpha' in item):
 69 |           item['alpha'] = -1
 70 |         if not ('rot_y' in item):
 71 |           item['rot_y'] = -1
 72 |         if 'dim' in item:
 73 |           item['dim'] = [max(item['dim'][0], 0.01), 
 74 |             max(item['dim'][1], 0.01), max(item['dim'][2], 0.01)]
 75 |         if not ('dim' in item):
 76 |           item['dim'] = [-1000, -1000, -1000]
 77 |         if not ('loc' in item):
 78 |           item['loc'] = [-1000, -1000, -1000]
 79 |         f.write('{} 0.0 0'.format(class_name))
 80 |         f.write(' {:.2f}'.format(item['alpha']))
 81 |         f.write(' {:.2f} {:.2f} {:.2f} {:.2f}'.format(
 82 |           item['bbox'][0], item['bbox'][1], item['bbox'][2], item['bbox'][3]))
 83 |         
 84 |         f.write(' {:.2f} {:.2f} {:.2f}'.format(
 85 |           item['dim'][0], item['dim'][1], item['dim'][2]))
 86 |         f.write(' {:.2f} {:.2f} {:.2f}'.format(
 87 |           item['loc'][0], item['loc'][1], item['loc'][2]))
 88 |         f.write(' {:.2f} {:.2f}\n'.format(item['rot_y'], item['score']))
 89 |       f.close()
 90 | 
 91 |   def run_eval(self, results, save_dir):
 92 |     # import pdb; pdb.set_trace()
 93 |     self.save_results(results, save_dir)
 94 |     print('Results of IoU threshold 0.7')
 95 |     os.system('./tools/kitti_eval/evaluate_object_3d_offline_07 ' + \
 96 |               '../data/kitti/training/label_val ' + \
 97 |               '{}/results_kitti/'.format(save_dir))
 98 |     print('Results of IoU threshold 0.5')
 99 |     os.system('./tools/kitti_eval/evaluate_object_3d_offline ' + \
100 |               '../data/kitti/training/label_val ' + \
101 |               '{}/results_kitti/'.format(save_dir))
102 |     
103 | 


--------------------------------------------------------------------------------
/src/lib/dataset/datasets/kitti_tracking.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import pycocotools.coco as coco
  6 | import numpy as np
  7 | import torch
  8 | import json
  9 | import cv2
 10 | import os
 11 | import math
 12 | 
 13 | from ..generic_dataset import GenericDataset
 14 | from utils.ddd_utils import compute_box_3d, project_to_image
 15 | 
 16 | class KITTITracking(GenericDataset):
 17 |   num_categories = 3
 18 |   default_resolution = [384, 1280]
 19 |   class_name = ['Pedestrian', 'Car', 'Cyclist']
 20 |   # negative id is for "not as negative sample for abs(id)".
 21 |   # 0 for ignore losses for all categories in the bounding box region
 22 |   # ['Pedestrian', 'Car', 'Cyclist', 'Van', 'Truck',  'Person_sitting',
 23 |   #       'Tram', 'Misc', 'DontCare']
 24 |   cat_ids = {1:1, 2:2, 3:3, 4:-2, 5:-2, 6:-1, 7:-9999, 8:-9999, 9:0}
 25 |   max_objs = 50
 26 |   def __init__(self, opt, split):
 27 |     data_dir = os.path.join(opt.data_dir, 'kitti_tracking')
 28 |     split_ = 'train' if opt.dataset_version != 'test' else 'test' #'test'
 29 |     img_dir = os.path.join(
 30 |       data_dir, 'data_tracking_image_2', '{}ing'.format(split_), 'image_02')
 31 |     ann_file_ = split_ if opt.dataset_version == '' else opt.dataset_version
 32 |     print('Warning! opt.dataset_version is not set')
 33 |     ann_path = os.path.join(
 34 |       data_dir, 'annotations', 'tracking_{}.json'.format(
 35 |         ann_file_))
 36 |     self.images = None
 37 |     super(KITTITracking, self).__init__(opt, split, ann_path, img_dir)
 38 |     self.alpha_in_degree = False
 39 |     self.num_samples = len(self.images)
 40 | 
 41 |     print('Loaded {} {} samples'.format(split, self.num_samples))
 42 | 
 43 | 
 44 |   def __len__(self):
 45 |     return self.num_samples
 46 | 
 47 |   def _to_float(self, x):
 48 |     return float("{:.2f}".format(x))
 49 | 
 50 | 
 51 |   def save_results(self, results, save_dir):
 52 |     results_dir = os.path.join(save_dir, 'results_kitti_tracking')
 53 |     if not os.path.exists(results_dir):
 54 |       os.mkdir(results_dir)
 55 | 
 56 |     for video in self.coco.dataset['videos']:
 57 |       video_id = video['id']
 58 |       file_name = video['file_name']
 59 |       out_path = os.path.join(results_dir, '{}.txt'.format(file_name))
 60 |       f = open(out_path, 'w')
 61 |       images = self.video_to_images[video_id]
 62 |       
 63 |       for image_info in images:
 64 |         img_id = image_info['id']
 65 |         if not (img_id in results):
 66 |           continue
 67 |         frame_id = image_info['frame_id'] 
 68 |         for i in range(len(results[img_id])):
 69 |           item = results[img_id][i]
 70 |           category_id = item['class']
 71 |           cls_name_ind = category_id
 72 |           class_name = self.class_name[cls_name_ind - 1]
 73 |           if not ('alpha' in item):
 74 |             item['alpha'] = -1
 75 |           if not ('rot_y' in item):
 76 |             item['rot_y'] = -10
 77 |           if 'dim' in item:
 78 |             item['dim'] = [max(item['dim'][0], 0.01), 
 79 |               max(item['dim'][1], 0.01), max(item['dim'][2], 0.01)]
 80 |           if not ('dim' in item):
 81 |             item['dim'] = [-1, -1, -1]
 82 |           if not ('loc' in item):
 83 |             item['loc'] = [-1000, -1000, -1000]
 84 |           
 85 |           track_id = item['tracking_id'] if 'tracking_id' in item else -1
 86 |           f.write('{} {} {} -1 -1'.format(frame_id - 1, track_id, class_name))
 87 |           f.write(' {:d}'.format(int(item['alpha'])))
 88 |           f.write(' {:.2f} {:.2f} {:.2f} {:.2f}'.format(
 89 |             item['bbox'][0], item['bbox'][1], item['bbox'][2], item['bbox'][3]))
 90 |           
 91 |           f.write(' {:d} {:d} {:d}'.format(
 92 |             int(item['dim'][0]), int(item['dim'][1]), int(item['dim'][2])))
 93 |           f.write(' {:d} {:d} {:d}'.format(
 94 |             int(item['loc'][0]), int(item['loc'][1]), int(item['loc'][2])))
 95 |           f.write(' {:d} {:.2f}\n'.format(int(item['rot_y']), item['score']))
 96 |       f.close()
 97 | 
 98 |   def run_eval(self, results, save_dir):
 99 |     self.save_results(results, save_dir)
100 |     os.system('python tools/eval_kitti_track/evaluate_tracking.py ' + \
101 |               '{}/results_kitti_tracking/ {}'.format(
102 |                 save_dir, self.opt.dataset_version))
103 |     
104 | 


--------------------------------------------------------------------------------
/src/lib/dataset/datasets/mot.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import pycocotools.coco as coco
 6 | from pycocotools.cocoeval import COCOeval
 7 | import numpy as np
 8 | import json
 9 | import os
10 | from collections import defaultdict
11 | from ..generic_dataset import GenericDataset
12 | 
13 | class MOT(GenericDataset):
14 |   num_categories = 1
15 |   default_resolution = [544, 960]
16 |   class_name = ['']
17 |   max_objs = 256
18 |   cat_ids = {1: 1, -1: -1}
19 |   def __init__(self, opt, split):
20 |     self.dataset_version = opt.dataset_version
21 |     self.year = int(self.dataset_version[:2])
22 |     print('Using MOT {} {}'.format(self.year, self.dataset_version))
23 |     data_dir = os.path.join(opt.data_dir, 'mot{}'.format(self.year))
24 | 
25 |     if opt.dataset_version in ['17trainval', '17test']:
26 |       ann_file = '{}.json'.format('train' if split == 'train' else \
27 |         'test')
28 |     elif opt.dataset_version == '17halftrain':
29 |       ann_file = '{}.json'.format('train_half')
30 |     elif opt.dataset_version == '17halfval':
31 |       ann_file = '{}.json'.format('val_half')
32 |     img_dir = os.path.join(data_dir, '{}'.format(
33 |       'test' if 'test' in self.dataset_version else 'train'))
34 |     
35 |     print('ann_file', ann_file)
36 |     ann_path = os.path.join(data_dir, 'annotations', ann_file)
37 | 
38 |     self.images = None
39 |     # load image list and coco
40 |     super(MOT, self).__init__(opt, split, ann_path, img_dir)
41 | 
42 |     self.num_samples = len(self.images)
43 |     print('Loaded MOT {} {} {} samples'.format(
44 |       self.dataset_version, split, self.num_samples))
45 | 
46 |   def _to_float(self, x):
47 |     return float("{:.2f}".format(x))
48 | 
49 |   def __len__(self):
50 |     return self.num_samples
51 | 
52 |   def save_results(self, results, save_dir):
53 |     results_dir = os.path.join(save_dir, 'results_mot{}'.format(self.dataset_version))
54 |     if not os.path.exists(results_dir):
55 |       os.mkdir(results_dir)
56 |     for video in self.coco.dataset['videos']:
57 |       video_id = video['id']
58 |       file_name = video['file_name']
59 |       out_path = os.path.join(results_dir, '{}.txt'.format(file_name))
60 |       f = open(out_path, 'w')
61 |       images = self.video_to_images[video_id]
62 |       tracks = defaultdict(list)
63 |       for image_info in images:
64 |         if not (image_info['id'] in results):
65 |           continue
66 |         result = results[image_info['id']]
67 |         frame_id = image_info['frame_id']
68 |         for item in result:
69 |           if not ('tracking_id' in item):
70 |             item['tracking_id'] = np.random.randint(100000)
71 |           if item['active'] == 0:
72 |             continue
73 |           tracking_id = item['tracking_id']
74 |           bbox = item['bbox']
75 |           bbox = [bbox[0], bbox[1], bbox[2], bbox[3]]
76 |           tracks[tracking_id].append([frame_id] + bbox)
77 |       rename_track_id = 0
78 |       for track_id in sorted(tracks):
79 |         rename_track_id += 1
80 |         for t in tracks[track_id]:
81 |           f.write('{},{},{:.2f},{:.2f},{:.2f},{:.2f},-1,-1,-1,-1\n'.format(
82 |             t[0], rename_track_id, t[1], t[2], t[3]-t[1], t[4]-t[2]))
83 |       f.close()
84 |   
85 |   def run_eval(self, results, save_dir):
86 |     self.save_results(results, save_dir)
87 |     gt_type_str = '{}'.format(
88 |                 '_train_half' if '17halftrain' in self.opt.dataset_version \
89 |                 else '_val_half' if '17halfval' in self.opt.dataset_version \
90 |                 else '')
91 |     gt_type_str = '_val_half' if self.year in [16, 19] else gt_type_str
92 |     gt_type_str = '--gt_type {}'.format(gt_type_str) if gt_type_str != '' else \
93 |       ''
94 |     os.system('python tools/eval_motchallenge.py ' + \
95 |               '../data/mot{}/{}/ '.format(self.year, 'train') + \
96 |               '{}/results_mot{}/ '.format(save_dir, self.dataset_version) + \
97 |               gt_type_str + ' --eval_official')
98 | 


--------------------------------------------------------------------------------
/src/lib/external/.gitignore:
--------------------------------------------------------------------------------
1 | bbox.c
2 | bbox.cpython-35m-x86_64-linux-gnu.so
3 | bbox.cpython-36m-x86_64-linux-gnu.so
4 | 
5 | nms.c
6 | nms.cpython-35m-x86_64-linux-gnu.so
7 | nms.cpython-36m-x86_64-linux-gnu.so
8 | 


--------------------------------------------------------------------------------
/src/lib/external/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | 	python setup.py build_ext --inplace
3 | 	rm -rf build
4 | 


--------------------------------------------------------------------------------
/src/lib/external/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xingyizhou/CenterTrack/e4e7534cc2ebfbd31e0cde680988f286c65fe34f/src/lib/external/__init__.py


--------------------------------------------------------------------------------
/src/lib/external/setup.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | from distutils.core import setup
 3 | from distutils.extension import Extension
 4 | from Cython.Build import cythonize
 5 | 
 6 | extensions = [
 7 |     Extension(
 8 |         "nms", 
 9 |         ["nms.pyx"],
10 |         extra_compile_args=["-Wno-cpp", "-Wno-unused-function"]
11 |     )
12 | ]
13 | 
14 | setup(
15 |     name="coco",
16 |     ext_modules=cythonize(extensions),
17 |     include_dirs=[numpy.get_include()]
18 | )
19 | 


--------------------------------------------------------------------------------
/src/lib/logger.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | # Code referenced from https://gist.github.com/gyglim/1f8dfb1b5c82627ae3efcfbbadb9f514
 6 | import os
 7 | import time
 8 | import sys
 9 | import torch
10 | import subprocess
11 | USE_TENSORBOARD = True
12 | try:
13 |   import tensorboardX
14 |   print('Using tensorboardX')
15 | except:
16 |   USE_TENSORBOARD = False
17 | 
18 | class Logger(object):
19 |   def __init__(self, opt):
20 |     """Create a summary writer logging to log_dir."""
21 |     if not os.path.exists(opt.save_dir):
22 |       os.makedirs(opt.save_dir)
23 |     if not os.path.exists(opt.debug_dir):
24 |       os.makedirs(opt.debug_dir)
25 |    
26 |     time_str = time.strftime('%Y-%m-%d-%H-%M')
27 | 
28 |     args = dict((name, getattr(opt, name)) for name in dir(opt)
29 |                 if not name.startswith('_'))
30 |     file_name = os.path.join(opt.save_dir, 'opt.txt')
31 |     with open(file_name, 'wt') as opt_file:
32 |       opt_file.write('==> commit hash: {}\n'.format(
33 |         subprocess.check_output(["git", "describe"])))
34 |       opt_file.write('==> torch version: {}\n'.format(torch.__version__))
35 |       opt_file.write('==> cudnn version: {}\n'.format(
36 |         torch.backends.cudnn.version()))
37 |       opt_file.write('==> Cmd:\n')
38 |       opt_file.write(str(sys.argv))
39 |       opt_file.write('\n==> Opt:\n')
40 |       for k, v in sorted(args.items()):
41 |         opt_file.write('  %s: %s\n' % (str(k), str(v)))
42 |           
43 |     log_dir = opt.save_dir + '/logs_{}'.format(time_str)
44 |     if USE_TENSORBOARD:
45 |       self.writer = tensorboardX.SummaryWriter(log_dir=log_dir)
46 |     else:
47 |       if not os.path.exists(os.path.dirname(log_dir)):
48 |         os.mkdir(os.path.dirname(log_dir))
49 |       if not os.path.exists(log_dir):
50 |         os.mkdir(log_dir)
51 |     self.log = open(log_dir + '/log.txt', 'w')
52 |     try:
53 |       os.system('cp {}/opt.txt {}/'.format(opt.save_dir, log_dir))
54 |     except:
55 |       pass
56 |     self.start_line = True
57 | 
58 |   def write(self, txt):
59 |     if self.start_line:
60 |       time_str = time.strftime('%Y-%m-%d-%H-%M')
61 |       self.log.write('{}: {}'.format(time_str, txt))
62 |     else:
63 |       self.log.write(txt)  
64 |     self.start_line = False
65 |     if '\n' in txt:
66 |       self.start_line = True
67 |       self.log.flush()
68 |   
69 |   def close(self):
70 |     self.log.close()
71 |   
72 |   def scalar_summary(self, tag, value, step):
73 |     """Log a scalar variable."""
74 |     if USE_TENSORBOARD:
75 |       self.writer.add_scalar(tag, value, step)
76 | 


--------------------------------------------------------------------------------
/src/lib/model/data_parallel.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch.nn.modules import Module
  3 | from torch.nn.parallel.scatter_gather import gather
  4 | from torch.nn.parallel.replicate import replicate
  5 | from torch.nn.parallel.parallel_apply import parallel_apply
  6 | 
  7 | 
  8 | from .scatter_gather import scatter_kwargs
  9 | 
 10 | class _DataParallel(Module):
 11 |     r"""Implements data parallelism at the module level.
 12 | 
 13 |     This container parallelizes the application of the given module by
 14 |     splitting the input across the specified devices by chunking in the batch
 15 |     dimension. In the forward pass, the module is replicated on each device,
 16 |     and each replica handles a portion of the input. During the backwards
 17 |     pass, gradients from each replica are summed into the original module.
 18 | 
 19 |     The batch size should be larger than the number of GPUs used. It should
 20 |     also be an integer multiple of the number of GPUs so that each chunk is the
 21 |     same size (so that each GPU processes the same number of samples).
 22 | 
 23 |     See also: :ref:`cuda-nn-dataparallel-instead`
 24 | 
 25 |     Arbitrary positional and keyword inputs are allowed to be passed into
 26 |     DataParallel EXCEPT Tensors. All variables will be scattered on dim
 27 |     specified (default 0). Primitive types will be broadcasted, but all
 28 |     other types will be a shallow copy and can be corrupted if written to in
 29 |     the model's forward pass.
 30 | 
 31 |     Args:
 32 |         module: module to be parallelized
 33 |         device_ids: CUDA devices (default: all devices)
 34 |         output_device: device location of output (default: device_ids[0])
 35 | 
 36 |     Example::
 37 | 
 38 |         >>> net = torch.nn.DataParallel(model, device_ids=[0, 1, 2])
 39 |         >>> output = net(input_var)
 40 |     """
 41 | 
 42 |     # TODO: update notes/cuda.rst when this class handles 8+ GPUs well
 43 | 
 44 |     def __init__(self, module, device_ids=None, output_device=None, dim=0, chunk_sizes=None):
 45 |         super(_DataParallel, self).__init__()
 46 | 
 47 |         if not torch.cuda.is_available():
 48 |             self.module = module
 49 |             self.device_ids = []
 50 |             return
 51 | 
 52 |         if device_ids is None:
 53 |             device_ids = list(range(torch.cuda.device_count()))
 54 |         if output_device is None:
 55 |             output_device = device_ids[0]
 56 |         self.dim = dim
 57 |         self.module = module
 58 |         self.device_ids = device_ids
 59 |         self.chunk_sizes = chunk_sizes
 60 |         self.output_device = output_device
 61 |         if len(self.device_ids) == 1:
 62 |             self.module.cuda(device_ids[0])
 63 | 
 64 |     def forward(self, *inputs, **kwargs):
 65 |         if not self.device_ids:
 66 |             return self.module(*inputs, **kwargs)
 67 |         inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids, self.chunk_sizes)
 68 |         if len(self.device_ids) == 1:
 69 |             return self.module(*inputs[0], **kwargs[0])
 70 |         replicas = self.replicate(self.module, self.device_ids[:len(inputs)])
 71 |         outputs = self.parallel_apply(replicas, inputs, kwargs)
 72 |         return self.gather(outputs, self.output_device)
 73 | 
 74 |     def replicate(self, module, device_ids):
 75 |         return replicate(module, device_ids)
 76 | 
 77 |     def scatter(self, inputs, kwargs, device_ids, chunk_sizes):
 78 |         return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim, chunk_sizes=self.chunk_sizes)
 79 | 
 80 |     def parallel_apply(self, replicas, inputs, kwargs):
 81 |         return parallel_apply(replicas, inputs, kwargs, self.device_ids[:len(replicas)])
 82 | 
 83 |     def gather(self, outputs, output_device):
 84 |         return gather(outputs, output_device, dim=self.dim)
 85 | 
 86 | 
 87 | def data_parallel(module, inputs, device_ids=None, output_device=None, dim=0, module_kwargs=None):
 88 |     r"""Evaluates module(input) in parallel across the GPUs given in device_ids.
 89 | 
 90 |     This is the functional version of the DataParallel module.
 91 | 
 92 |     Args:
 93 |         module: the module to evaluate in parallel
 94 |         inputs: inputs to the module
 95 |         device_ids: GPU ids on which to replicate module
 96 |         output_device: GPU location of the output  Use -1 to indicate the CPU.
 97 |             (default: device_ids[0])
 98 |     Returns:
 99 |         a Variable containing the result of module(input) located on
100 |         output_device
101 |     """
102 |     if not isinstance(inputs, tuple):
103 |         inputs = (inputs,)
104 | 
105 |     if device_ids is None:
106 |         device_ids = list(range(torch.cuda.device_count()))
107 | 
108 |     if output_device is None:
109 |         output_device = device_ids[0]
110 | 
111 |     inputs, module_kwargs = scatter_kwargs(inputs, module_kwargs, device_ids, dim)
112 |     if len(device_ids) == 1:
113 |         return module(*inputs[0], **module_kwargs[0])
114 |     used_device_ids = device_ids[:len(inputs)]
115 |     replicas = replicate(module, used_device_ids)
116 |     outputs = parallel_apply(replicas, inputs, module_kwargs, used_device_ids)
117 |     return gather(outputs, output_device, dim)
118 | 
119 | def DataParallel(module, device_ids=None, output_device=None, dim=0, chunk_sizes=None):
120 |     if chunk_sizes is None:
121 |         return torch.nn.DataParallel(module, device_ids, output_device, dim)
122 |     standard_size = True
123 |     for i in range(1, len(chunk_sizes)):
124 |         if chunk_sizes[i] != chunk_sizes[0]:
125 |             standard_size = False
126 |     if standard_size:
127 |         return torch.nn.DataParallel(module, device_ids, output_device, dim)
128 |     return _DataParallel(module, device_ids, output_device, dim, chunk_sizes)


--------------------------------------------------------------------------------
/src/lib/model/decode.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import torch
  6 | import torch.nn as nn
  7 | from .utils import _gather_feat, _tranpose_and_gather_feat
  8 | from .utils import _nms, _topk, _topk_channel
  9 | 
 10 | 
 11 | def _update_kps_with_hm(
 12 |   kps, output, batch, num_joints, K, bboxes=None, scores=None):
 13 |   if 'hm_hp' in output:
 14 |     hm_hp = output['hm_hp']
 15 |     hm_hp = _nms(hm_hp)
 16 |     thresh = 0.2
 17 |     kps = kps.view(batch, K, num_joints, 2).permute(
 18 |         0, 2, 1, 3).contiguous() # b x J x K x 2
 19 |     reg_kps = kps.unsqueeze(3).expand(batch, num_joints, K, K, 2)
 20 |     hm_score, hm_inds, hm_ys, hm_xs = _topk_channel(hm_hp, K=K) # b x J x K
 21 |     if 'hp_offset' in output or 'reg' in output:
 22 |         hp_offset = output['hp_offset'] if 'hp_offset' in output \
 23 |                     else output['reg']
 24 |         hp_offset = _tranpose_and_gather_feat(
 25 |             hp_offset, hm_inds.view(batch, -1))
 26 |         hp_offset = hp_offset.view(batch, num_joints, K, 2)
 27 |         hm_xs = hm_xs + hp_offset[:, :, :, 0]
 28 |         hm_ys = hm_ys + hp_offset[:, :, :, 1]
 29 |     else:
 30 |         hm_xs = hm_xs + 0.5
 31 |         hm_ys = hm_ys + 0.5
 32 |     
 33 |     mask = (hm_score > thresh).float()
 34 |     hm_score = (1 - mask) * -1 + mask * hm_score
 35 |     hm_ys = (1 - mask) * (-10000) + mask * hm_ys
 36 |     hm_xs = (1 - mask) * (-10000) + mask * hm_xs
 37 |     hm_kps = torch.stack([hm_xs, hm_ys], dim=-1).unsqueeze(
 38 |         2).expand(batch, num_joints, K, K, 2)
 39 |     dist = (((reg_kps - hm_kps) ** 2).sum(dim=4) ** 0.5)
 40 |     min_dist, min_ind = dist.min(dim=3) # b x J x K
 41 |     hm_score = hm_score.gather(2, min_ind).unsqueeze(-1) # b x J x K x 1
 42 |     min_dist = min_dist.unsqueeze(-1)
 43 |     min_ind = min_ind.view(batch, num_joints, K, 1, 1).expand(
 44 |         batch, num_joints, K, 1, 2)
 45 |     hm_kps = hm_kps.gather(3, min_ind)
 46 |     hm_kps = hm_kps.view(batch, num_joints, K, 2)        
 47 |     mask = (hm_score < thresh)
 48 |     
 49 |     if bboxes is not None:
 50 |       l = bboxes[:, :, 0].view(batch, 1, K, 1).expand(batch, num_joints, K, 1)
 51 |       t = bboxes[:, :, 1].view(batch, 1, K, 1).expand(batch, num_joints, K, 1)
 52 |       r = bboxes[:, :, 2].view(batch, 1, K, 1).expand(batch, num_joints, K, 1)
 53 |       b = bboxes[:, :, 3].view(batch, 1, K, 1).expand(batch, num_joints, K, 1)
 54 |       mask = (hm_kps[..., 0:1] < l) + (hm_kps[..., 0:1] > r) + \
 55 |               (hm_kps[..., 1:2] < t) + (hm_kps[..., 1:2] > b) + mask
 56 |     else:
 57 |       l = kps[:, :, :, 0:1].min(dim=1, keepdim=True)[0]
 58 |       t = kps[:, :, :, 1:2].min(dim=1, keepdim=True)[0]
 59 |       r = kps[:, :, :, 0:1].max(dim=1, keepdim=True)[0]
 60 |       b = kps[:, :, :, 1:2].max(dim=1, keepdim=True)[0]
 61 |       margin = 0.25
 62 |       l = l - (r - l) * margin
 63 |       r = r + (r - l) * margin
 64 |       t = t - (b - t) * margin
 65 |       b = b + (b - t) * margin
 66 |       mask = (hm_kps[..., 0:1] < l) + (hm_kps[..., 0:1] > r) + \
 67 |               (hm_kps[..., 1:2] < t) + (hm_kps[..., 1:2] > b) + mask
 68 |       # sc = (kps[:, :, :, :].max(dim=1, keepdim=True) - kps[:, :, :, :].min(dim=1))
 69 |     # mask = mask + (min_dist > 10)
 70 |     mask = (mask > 0).float()
 71 |     kps_score = (1 - mask) * hm_score + mask * \
 72 |       scores.unsqueeze(-1).expand(batch, num_joints, K, 1) # bJK1
 73 |     kps_score = scores * kps_score.mean(dim=1).view(batch, K)
 74 |     # kps_score[scores < 0.1] = 0
 75 |     mask = mask.expand(batch, num_joints, K, 2)
 76 |     kps = (1 - mask) * hm_kps + mask * kps
 77 |     kps = kps.permute(0, 2, 1, 3).contiguous().view(
 78 |         batch, K, num_joints * 2)
 79 |     return kps, kps_score
 80 |   else:
 81 |     return kps, kps
 82 | 
 83 | def generic_decode(output, K=100, opt=None):
 84 |   if not ('hm' in output):
 85 |     return {}
 86 | 
 87 |   if opt.zero_tracking:
 88 |     output['tracking'] *= 0
 89 |   
 90 |   heat = output['hm']
 91 |   batch, cat, height, width = heat.size()
 92 | 
 93 |   heat = _nms(heat)
 94 |   scores, inds, clses, ys0, xs0 = _topk(heat, K=K)
 95 | 
 96 |   clses  = clses.view(batch, K)
 97 |   scores = scores.view(batch, K)
 98 |   bboxes = None
 99 |   cts = torch.cat([xs0.unsqueeze(2), ys0.unsqueeze(2)], dim=2)
100 |   ret = {'scores': scores, 'clses': clses.float(), 
101 |          'xs': xs0, 'ys': ys0, 'cts': cts}
102 |   if 'reg' in output:
103 |     reg = output['reg']
104 |     reg = _tranpose_and_gather_feat(reg, inds)
105 |     reg = reg.view(batch, K, 2)
106 |     xs = xs0.view(batch, K, 1) + reg[:, :, 0:1]
107 |     ys = ys0.view(batch, K, 1) + reg[:, :, 1:2]
108 |   else:
109 |     xs = xs0.view(batch, K, 1) + 0.5
110 |     ys = ys0.view(batch, K, 1) + 0.5
111 | 
112 |   if 'wh' in output:
113 |     wh = output['wh']
114 |     wh = _tranpose_and_gather_feat(wh, inds) # B x K x (F)
115 |     # wh = wh.view(batch, K, -1)
116 |     wh = wh.view(batch, K, 2)
117 |     wh[wh < 0] = 0
118 |     if wh.size(2) == 2 * cat: # cat spec
119 |       wh = wh.view(batch, K, -1, 2)
120 |       cats = clses.view(batch, K, 1, 1).expand(batch, K, 1, 2)
121 |       wh = wh.gather(2, cats.long()).squeeze(2) # B x K x 2
122 |     else:
123 |       pass
124 |     bboxes = torch.cat([xs - wh[..., 0:1] / 2, 
125 |                         ys - wh[..., 1:2] / 2,
126 |                         xs + wh[..., 0:1] / 2, 
127 |                         ys + wh[..., 1:2] / 2], dim=2)
128 |     ret['bboxes'] = bboxes
129 |     # print('ret bbox', ret['bboxes'])
130 |  
131 |   if 'ltrb' in output:
132 |     ltrb = output['ltrb']
133 |     ltrb = _tranpose_and_gather_feat(ltrb, inds) # B x K x 4
134 |     ltrb = ltrb.view(batch, K, 4)
135 |     bboxes = torch.cat([xs0.view(batch, K, 1) + ltrb[..., 0:1], 
136 |                         ys0.view(batch, K, 1) + ltrb[..., 1:2],
137 |                         xs0.view(batch, K, 1) + ltrb[..., 2:3], 
138 |                         ys0.view(batch, K, 1) + ltrb[..., 3:4]], dim=2)
139 |     ret['bboxes'] = bboxes
140 | 
141 |  
142 |   regression_heads = ['tracking', 'dep', 'rot', 'dim', 'amodel_offset',
143 |     'nuscenes_att', 'velocity']
144 | 
145 |   for head in regression_heads:
146 |     if head in output:
147 |       ret[head] = _tranpose_and_gather_feat(
148 |         output[head], inds).view(batch, K, -1)
149 | 
150 |   if 'ltrb_amodal' in output:
151 |     ltrb_amodal = output['ltrb_amodal']
152 |     ltrb_amodal = _tranpose_and_gather_feat(ltrb_amodal, inds) # B x K x 4
153 |     ltrb_amodal = ltrb_amodal.view(batch, K, 4)
154 |     bboxes_amodal = torch.cat([xs0.view(batch, K, 1) + ltrb_amodal[..., 0:1], 
155 |                           ys0.view(batch, K, 1) + ltrb_amodal[..., 1:2],
156 |                           xs0.view(batch, K, 1) + ltrb_amodal[..., 2:3], 
157 |                           ys0.view(batch, K, 1) + ltrb_amodal[..., 3:4]], dim=2)
158 |     ret['bboxes_amodal'] = bboxes_amodal
159 |     ret['bboxes'] = bboxes_amodal
160 | 
161 |   if 'hps' in output:
162 |     kps = output['hps']
163 |     num_joints = kps.shape[1] // 2
164 |     kps = _tranpose_and_gather_feat(kps, inds)
165 |     kps = kps.view(batch, K, num_joints * 2)
166 |     kps[..., ::2] += xs0.view(batch, K, 1).expand(batch, K, num_joints)
167 |     kps[..., 1::2] += ys0.view(batch, K, 1).expand(batch, K, num_joints)
168 |     kps, kps_score = _update_kps_with_hm(
169 |       kps, output, batch, num_joints, K, bboxes, scores)
170 |     ret['hps'] = kps
171 |     ret['kps_score'] = kps_score
172 | 
173 |   if 'pre_inds' in output and output['pre_inds'] is not None:
174 |     pre_inds = output['pre_inds'] # B x pre_K
175 |     pre_K = pre_inds.shape[1]
176 |     pre_ys = (pre_inds / width).int().float()
177 |     pre_xs = (pre_inds % width).int().float()
178 | 
179 |     ret['pre_cts'] = torch.cat(
180 |       [pre_xs.unsqueeze(2), pre_ys.unsqueeze(2)], dim=2)
181 |   
182 |   return ret
183 | 


--------------------------------------------------------------------------------
/src/lib/model/losses.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Portions of this code are from
  3 | # CornerNet (https://github.com/princeton-vl/CornerNet)
  4 | # Copyright (c) 2018, University of Michigan
  5 | # Licensed under the BSD 3-Clause License
  6 | # ------------------------------------------------------------------------------
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import torch
 12 | import torch.nn as nn
 13 | from .utils import _tranpose_and_gather_feat, _nms, _topk
 14 | import torch.nn.functional as F
 15 | from utils.image import draw_umich_gaussian
 16 | 
 17 | def _slow_neg_loss(pred, gt):
 18 |   '''focal loss from CornerNet'''
 19 |   pos_inds = gt.eq(1).float()
 20 |   neg_inds = gt.lt(1).float()
 21 | 
 22 |   neg_weights = torch.pow(1 - gt[neg_inds], 4)
 23 | 
 24 |   loss = 0
 25 |   pos_pred = pred[pos_inds]
 26 |   neg_pred = pred[neg_inds]
 27 | 
 28 |   pos_loss = torch.log(pos_pred) * torch.pow(1 - pos_pred, 2)
 29 |   neg_loss = torch.log(1 - neg_pred) * torch.pow(neg_pred, 2) * neg_weights
 30 | 
 31 |   num_pos  = pos_inds.float().sum()
 32 |   pos_loss = pos_loss.sum()
 33 |   neg_loss = neg_loss.sum()
 34 | 
 35 |   if pos_pred.nelement() == 0:
 36 |     loss = loss - neg_loss
 37 |   else:
 38 |     loss = loss - (pos_loss + neg_loss) / num_pos
 39 |   return loss
 40 | 
 41 | def _neg_loss(pred, gt):
 42 |   ''' Reimplemented focal loss. Exactly the same as CornerNet.
 43 |       Runs faster and costs a little bit more memory
 44 |     Arguments:
 45 |       pred (batch x c x h x w)
 46 |       gt_regr (batch x c x h x w)
 47 |   '''
 48 |   pos_inds = gt.eq(1).float()
 49 |   neg_inds = gt.lt(1).float()
 50 | 
 51 |   neg_weights = torch.pow(1 - gt, 4)
 52 | 
 53 |   loss = 0
 54 |   pos_loss = torch.log(pred) * torch.pow(1 - pred, 2) * pos_inds
 55 |   neg_loss = torch.log(1 - pred) * torch.pow(pred, 2) * neg_weights * neg_inds
 56 | 
 57 |   num_pos  = pos_inds.float().sum()
 58 |   pos_loss = pos_loss.sum()
 59 |   neg_loss = neg_loss.sum()
 60 |   if num_pos == 0:
 61 |     loss = loss - neg_loss
 62 |   else:
 63 |     loss = loss - (pos_loss + neg_loss) / num_pos
 64 |   return loss
 65 | 
 66 | 
 67 | def _only_neg_loss(pred, gt):
 68 |   gt = torch.pow(1 - gt, 4)
 69 |   neg_loss = torch.log(1 - pred) * torch.pow(pred, 2) * gt
 70 |   return neg_loss.sum()
 71 | 
 72 | class FastFocalLoss(nn.Module):
 73 |   '''
 74 |   Reimplemented focal loss, exactly the same as the CornerNet version.
 75 |   Faster and costs much less memory.
 76 |   '''
 77 |   def __init__(self, opt=None):
 78 |     super(FastFocalLoss, self).__init__()
 79 |     self.only_neg_loss = _only_neg_loss
 80 | 
 81 |   def forward(self, out, target, ind, mask, cat):
 82 |     '''
 83 |     Arguments:
 84 |       out, target: B x C x H x W
 85 |       ind, mask: B x M
 86 |       cat (category id for peaks): B x M
 87 |     '''
 88 |     neg_loss = self.only_neg_loss(out, target)
 89 |     pos_pred_pix = _tranpose_and_gather_feat(out, ind) # B x M x C
 90 |     pos_pred = pos_pred_pix.gather(2, cat.unsqueeze(2)) # B x M
 91 |     num_pos = mask.sum()
 92 |     pos_loss = torch.log(pos_pred) * torch.pow(1 - pos_pred, 2) * \
 93 |                mask.unsqueeze(2)
 94 |     pos_loss = pos_loss.sum()
 95 |     if num_pos == 0:
 96 |       return - neg_loss
 97 |     return - (pos_loss + neg_loss) / num_pos
 98 | 
 99 | def _reg_loss(regr, gt_regr, mask):
100 |   ''' L1 regression loss
101 |     Arguments:
102 |       regr (batch x max_objects x dim)
103 |       gt_regr (batch x max_objects x dim)
104 |       mask (batch x max_objects)
105 |   '''
106 |   num = mask.float().sum()
107 |   mask = mask.unsqueeze(2).expand_as(gt_regr).float()
108 | 
109 |   regr = regr * mask
110 |   gt_regr = gt_regr * mask
111 |     
112 |   regr_loss = nn.functional.smooth_l1_loss(regr, gt_regr, reduction='sum')
113 |   regr_loss = regr_loss / (num + 1e-4)
114 |   return regr_loss
115 | 
116 | 
117 | class RegWeightedL1Loss(nn.Module):
118 |   def __init__(self):
119 |     super(RegWeightedL1Loss, self).__init__()
120 |   
121 |   def forward(self, output, mask, ind, target):
122 |     pred = _tranpose_and_gather_feat(output, ind)
123 |     # loss = F.l1_loss(pred * mask, target * mask, reduction='elementwise_mean')
124 |     loss = F.l1_loss(pred * mask, target * mask, reduction='sum')
125 |     loss = loss / (mask.sum() + 1e-4)
126 |     return loss
127 | 
128 | 
129 | class WeightedBCELoss(nn.Module):
130 |   def __init__(self):
131 |     super(WeightedBCELoss, self).__init__()
132 |     self.bceloss = torch.nn.BCEWithLogitsLoss(reduction='none')
133 | 
134 |   def forward(self, output, mask, ind, target):
135 |     # output: B x F x H x W
136 |     # ind: B x M
137 |     # mask: B x M x F
138 |     # target: B x M x F
139 |     pred = _tranpose_and_gather_feat(output, ind) # B x M x F
140 |     loss = mask * self.bceloss(pred, target)
141 |     loss = loss.sum() / (mask.sum() + 1e-4)
142 |     return loss
143 | 
144 | class BinRotLoss(nn.Module):
145 |   def __init__(self):
146 |     super(BinRotLoss, self).__init__()
147 |   
148 |   def forward(self, output, mask, ind, rotbin, rotres):
149 |     pred = _tranpose_and_gather_feat(output, ind)
150 |     loss = compute_rot_loss(pred, rotbin, rotres, mask)
151 |     return loss
152 | 
153 | def compute_res_loss(output, target):
154 |     return F.smooth_l1_loss(output, target, reduction='elementwise_mean')
155 | 
156 | def compute_bin_loss(output, target, mask):
157 |     mask = mask.expand_as(output)
158 |     output = output * mask.float()
159 |     return F.cross_entropy(output, target, reduction='elementwise_mean')
160 | 
161 | def compute_rot_loss(output, target_bin, target_res, mask):
162 |     # output: (B, 128, 8) [bin1_cls[0], bin1_cls[1], bin1_sin, bin1_cos, 
163 |     #                 bin2_cls[0], bin2_cls[1], bin2_sin, bin2_cos]
164 |     # target_bin: (B, 128, 2) [bin1_cls, bin2_cls]
165 |     # target_res: (B, 128, 2) [bin1_res, bin2_res]
166 |     # mask: (B, 128, 1)
167 |     output = output.view(-1, 8)
168 |     target_bin = target_bin.view(-1, 2)
169 |     target_res = target_res.view(-1, 2)
170 |     mask = mask.view(-1, 1)
171 |     loss_bin1 = compute_bin_loss(output[:, 0:2], target_bin[:, 0], mask)
172 |     loss_bin2 = compute_bin_loss(output[:, 4:6], target_bin[:, 1], mask)
173 |     loss_res = torch.zeros_like(loss_bin1)
174 |     if target_bin[:, 0].nonzero().shape[0] > 0:
175 |         idx1 = target_bin[:, 0].nonzero()[:, 0]
176 |         valid_output1 = torch.index_select(output, 0, idx1.long())
177 |         valid_target_res1 = torch.index_select(target_res, 0, idx1.long())
178 |         loss_sin1 = compute_res_loss(
179 |           valid_output1[:, 2], torch.sin(valid_target_res1[:, 0]))
180 |         loss_cos1 = compute_res_loss(
181 |           valid_output1[:, 3], torch.cos(valid_target_res1[:, 0]))
182 |         loss_res += loss_sin1 + loss_cos1
183 |     if target_bin[:, 1].nonzero().shape[0] > 0:
184 |         idx2 = target_bin[:, 1].nonzero()[:, 0]
185 |         valid_output2 = torch.index_select(output, 0, idx2.long())
186 |         valid_target_res2 = torch.index_select(target_res, 0, idx2.long())
187 |         loss_sin2 = compute_res_loss(
188 |           valid_output2[:, 6], torch.sin(valid_target_res2[:, 1]))
189 |         loss_cos2 = compute_res_loss(
190 |           valid_output2[:, 7], torch.cos(valid_target_res2[:, 1]))
191 |         loss_res += loss_sin2 + loss_cos2
192 |     return loss_bin1 + loss_bin2 + loss_res


--------------------------------------------------------------------------------
/src/lib/model/model.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import torchvision.models as models
  6 | import torch
  7 | import torch.nn as nn
  8 | import os
  9 | 
 10 | from .networks.dla import DLASeg
 11 | from .networks.resdcn import PoseResDCN
 12 | from .networks.resnet import PoseResNet
 13 | from .networks.dlav0 import DLASegv0
 14 | from .networks.generic_network import GenericNetwork
 15 | 
 16 | _network_factory = {
 17 |   'resdcn': PoseResDCN,
 18 |   'dla': DLASeg,
 19 |   'res': PoseResNet,
 20 |   'dlav0': DLASegv0,
 21 |   'generic': GenericNetwork
 22 | }
 23 | 
 24 | def create_model(arch, head, head_conv, opt=None):
 25 |   num_layers = int(arch[arch.find('_') + 1:]) if '_' in arch else 0
 26 |   arch = arch[:arch.find('_')] if '_' in arch else arch
 27 |   model_class = _network_factory[arch]
 28 |   model = model_class(num_layers, heads=head, head_convs=head_conv, opt=opt)
 29 |   return model
 30 | 
 31 | def load_model(model, model_path, opt, optimizer=None):
 32 |   start_epoch = 0
 33 |   checkpoint = torch.load(model_path, map_location=lambda storage, loc: storage)
 34 |   print('loaded {}, epoch {}'.format(model_path, checkpoint['epoch']))
 35 |   state_dict_ = checkpoint['state_dict']
 36 |   state_dict = {}
 37 |    
 38 |   # convert data_parallal to model
 39 |   for k in state_dict_:
 40 |     if k.startswith('module') and not k.startswith('module_list'):
 41 |       state_dict[k[7:]] = state_dict_[k]
 42 |     else:
 43 |       state_dict[k] = state_dict_[k]
 44 |   model_state_dict = model.state_dict()
 45 | 
 46 |   # check loaded parameters and created model parameters
 47 |   for k in state_dict:
 48 |     if k in model_state_dict:
 49 |       if (state_dict[k].shape != model_state_dict[k].shape) or \
 50 |         (opt.reset_hm and k.startswith('hm') and (state_dict[k].shape[0] in [80, 1])):
 51 |         if opt.reuse_hm:
 52 |           print('Reusing parameter {}, required shape{}, '\
 53 |                 'loaded shape{}.'.format(
 54 |             k, model_state_dict[k].shape, state_dict[k].shape))
 55 |           if state_dict[k].shape[0] < state_dict[k].shape[0]:
 56 |             model_state_dict[k][:state_dict[k].shape[0]] = state_dict[k]
 57 |           else:
 58 |             model_state_dict[k] = state_dict[k][:model_state_dict[k].shape[0]]
 59 |           state_dict[k] = model_state_dict[k]
 60 |         else:
 61 |           print('Skip loading parameter {}, required shape{}, '\
 62 |                 'loaded shape{}.'.format(
 63 |             k, model_state_dict[k].shape, state_dict[k].shape))
 64 |           state_dict[k] = model_state_dict[k]
 65 |     else:
 66 |       print('Drop parameter {}.'.format(k))
 67 |   for k in model_state_dict:
 68 |     if not (k in state_dict):
 69 |       print('No param {}.'.format(k))
 70 |       state_dict[k] = model_state_dict[k]
 71 |   model.load_state_dict(state_dict, strict=False)
 72 | 
 73 |   # resume optimizer parameters
 74 |   if optimizer is not None and opt.resume:
 75 |     if 'optimizer' in checkpoint:
 76 |       # optimizer.load_state_dict(checkpoint['optimizer'])
 77 |       start_epoch = checkpoint['epoch']
 78 |       start_lr = opt.lr
 79 |       for step in opt.lr_step:
 80 |         if start_epoch >= step:
 81 |           start_lr *= 0.1
 82 |       for param_group in optimizer.param_groups:
 83 |         param_group['lr'] = start_lr
 84 |       print('Resumed optimizer with start lr', start_lr)
 85 |     else:
 86 |       print('No optimizer parameters in checkpoint.')
 87 |   if optimizer is not None:
 88 |     return model, optimizer, start_epoch
 89 |   else:
 90 |     return model
 91 | 
 92 | def save_model(path, epoch, model, optimizer=None):
 93 |   if isinstance(model, torch.nn.DataParallel):
 94 |     state_dict = model.module.state_dict()
 95 |   else:
 96 |     state_dict = model.state_dict()
 97 |   data = {'epoch': epoch,
 98 |           'state_dict': state_dict}
 99 |   if not (optimizer is None):
100 |     data['optimizer'] = optimizer.state_dict()
101 |   torch.save(data, path)
102 | 
103 | 


--------------------------------------------------------------------------------
/src/lib/model/networks/backbones/mobilenet.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import os
  6 | import math
  7 | import logging
  8 | import numpy as np
  9 | 
 10 | import torch
 11 | from torch import nn
 12 | import torch.nn.functional as F
 13 | import torch.utils.model_zoo as model_zoo
 14 | from torchvision.models.utils import load_state_dict_from_url
 15 | 
 16 | BN_MOMENTUM = 0.1
 17 | 
 18 | model_urls = {
 19 |      'mobilenet_v2': 'https://download.pytorch.org/models/mobilenet_v2-b0353104.pth',
 20 | }
 21 | 
 22 | def _make_divisible(v, divisor, min_value=None):
 23 |     """
 24 |     This function is taken from the original tf repo.
 25 |     It ensures that all layers have a channel number that is divisible by 8
 26 |     It can be seen here:
 27 |     https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
 28 |     :param v:
 29 |     :param divisor:
 30 |     :param min_value:
 31 |     :return:
 32 |     """
 33 |     if min_value is None:
 34 |         min_value = divisor
 35 |     new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
 36 |     # Make sure that round down does not go down by more than 10%.
 37 |     if new_v < 0.9 * v:
 38 |         new_v += divisor
 39 |     return new_v
 40 | 
 41 | 
 42 | class ConvBNReLU(nn.Sequential):
 43 |     def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1):
 44 |         padding = (kernel_size - 1) // 2
 45 |         super(ConvBNReLU, self).__init__(
 46 |             nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False),
 47 |             nn.BatchNorm2d(out_planes),
 48 |             nn.ReLU6(inplace=True)
 49 |         )
 50 | 
 51 | 
 52 | class InvertedResidual(nn.Module):
 53 |     def __init__(self, inp, oup, stride, expand_ratio):
 54 |         super(InvertedResidual, self).__init__()
 55 |         self.stride = stride
 56 |         assert stride in [1, 2]
 57 | 
 58 |         hidden_dim = int(round(inp * expand_ratio))
 59 |         self.use_res_connect = self.stride == 1 and inp == oup
 60 | 
 61 |         layers = []
 62 |         if expand_ratio != 1:
 63 |             # pw
 64 |             layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1))
 65 |         layers.extend([
 66 |             # dw
 67 |             ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim),
 68 |             # pw-linear
 69 |             nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
 70 |             nn.BatchNorm2d(oup),
 71 |         ])
 72 |         self.conv = nn.Sequential(*layers)
 73 | 
 74 |     def forward(self, x):
 75 |         if self.use_res_connect:
 76 |             return x + self.conv(x)
 77 |         else:
 78 |             return self.conv(x)
 79 | 
 80 | 
 81 | class MobileNetV2(nn.Module):
 82 |     def __init__(self, opt,
 83 |                  width_mult=1.0,
 84 |                  round_nearest=8,
 85 |                  block=None):
 86 |         """
 87 |         MobileNet V2 main class
 88 |         Args:
 89 |             num_classes (int): Number of classes
 90 |             width_mult (float): Width multiplier - adjusts number of channels in each layer by this amount
 91 |             round_nearest (int): Round the number of channels in each layer to be a multiple of this number
 92 |             Set to 1 to turn off rounding
 93 |             block: Module specifying inverted residual building block for mobilenet
 94 |         """
 95 |         super().__init__()
 96 |         if block is None:
 97 |             block = InvertedResidual
 98 |         input_channel = 32
 99 |         last_channel = 1280
100 | 
101 |         inverted_residual_setting = [
102 |             # t, c, n, s
103 |             [1, 16, 1, 1], # 1
104 |             [6, 24, 2, 2], # 2
105 |             [6, 32, 3, 2], # 3
106 |             [6, 64, 4, 2], # 4
107 |             [6, 96, 3, 1], # 5
108 |             [6, 160, 3, 2],# 6
109 |             [6, 320, 1, 1],# 7
110 |         ]
111 |         
112 |         # only check the first element, assuming user knows t,c,n,s are required
113 |         if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4:
114 |             raise ValueError("inverted_residual_setting should be non-empty "
115 |                              "or a 4-element list, got {}".format(inverted_residual_setting))
116 | 
117 |         # building first layer
118 |         input_channel = _make_divisible(input_channel * width_mult, round_nearest)
119 |         # self.last_channel = _make_divisible(last_channel * max(1.0, width_mult), round_nearest)
120 |         if opt.pre_img:
121 |             print('adding pre_img layer...')
122 |             self.pre_img_layer = nn.Sequential(
123 |             nn.Conv2d(3, input_channel, kernel_size=3, padding=1, stride=2, bias=False),
124 |             nn.BatchNorm2d(input_channel))
125 |         if opt.pre_hm:
126 |             print('adding pre_hm layer...')
127 |             self.pre_hm_layer = nn.Sequential(
128 |             nn.Conv2d(1, input_channel, kernel_size=3, padding=1, stride=2, bias=False),
129 |             nn.BatchNorm2d(input_channel))
130 |         features = [ConvBNReLU(3, input_channel, stride=2)]
131 |         self.key_block = [True]
132 |         all_channels = [input_channel]
133 |         self.channels = [input_channel]
134 |         # building inverted residual blocks
135 |         for t, c, n, s in inverted_residual_setting:
136 |             output_channel = _make_divisible(c * width_mult, round_nearest)
137 |             for i in range(n):
138 |                 stride = s if i == 0 else 1
139 |                 features.append(block(input_channel, output_channel, stride, expand_ratio=t))
140 |                 input_channel = output_channel
141 |                 if stride == 2:
142 |                     self.key_block.append(True)
143 |                 else:
144 |                     self.key_block.append(False)
145 |                 all_channels.append(output_channel)
146 |         # building last several layers
147 |         # features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size=1))
148 |         # self.key_block.append(False)
149 |         # all_channels.append(self.last_channel)
150 |         for i in range(len(self.key_block) - 1):
151 |           if self.key_block[i + 1]:
152 |             self.key_block[i] = True
153 |             self.key_block[i + 1] = False
154 |             self.channels.append(all_channels[i])
155 |         self.key_block[-1] = True
156 |         self.channels.append(all_channels[-1])
157 |         print('channels', self.channels)
158 |         # make it nn.Sequential
159 |         self.features = nn.ModuleList(features)
160 |         print('len(self.features)', len(self.features))
161 |         # self.channels = [, ]
162 | 
163 |         # weight initialization
164 |         for m in self.modules():
165 |             if isinstance(m, nn.Conv2d):
166 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out')
167 |                 if m.bias is not None:
168 |                     nn.init.zeros_(m.bias)
169 |             elif isinstance(m, nn.BatchNorm2d):
170 |                 nn.init.ones_(m.weight)
171 |                 nn.init.zeros_(m.bias)
172 |             elif isinstance(m, nn.Linear):
173 |                 nn.init.normal_(m.weight, 0, 0.01)
174 |                 nn.init.zeros_(m.bias)
175 |         state_dict = load_state_dict_from_url(model_urls['mobilenet_v2'])
176 |         self.load_state_dict(state_dict, strict=False)
177 | 
178 |     def forward(self, inputs, pre_img=None, pre_hm=None):
179 |         x = self.features[0](inputs)
180 |         if pre_img is not None:
181 |             x = x + self.pre_img_layer(pre_img)
182 |         if pre_hm is not None:
183 |             x = x + self.pre_hm_layer(pre_hm)
184 |         y = [x]
185 |         for i in range(1, len(self.features)):
186 |             x = self.features[i](x)
187 |             # print('i, shape, is_key', i, x.shape, self.key_block[i])
188 |             if self.key_block[i]:
189 |                 y.append(x)
190 |         return y
191 | 
192 | 


--------------------------------------------------------------------------------
/src/lib/model/networks/backbones/resnet.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import os
  6 | import math
  7 | import logging
  8 | import numpy as np
  9 | 
 10 | import torch
 11 | from torch import nn
 12 | import torch.nn.functional as F
 13 | import torch.utils.model_zoo as model_zoo
 14 | 
 15 | BN_MOMENTUM = 0.1
 16 | 
 17 | model_urls = {
 18 |     'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
 19 |     'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
 20 |     'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
 21 |     'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
 22 |     'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
 23 | }
 24 | 
 25 | def conv3x3(in_planes, out_planes, stride=1):
 26 |     """3x3 convolution with padding"""
 27 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
 28 |                      padding=1, bias=False)
 29 | 
 30 | 
 31 | class BasicBlock(nn.Module):
 32 |     expansion = 1
 33 | 
 34 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 35 |         super(BasicBlock, self).__init__()
 36 |         self.conv1 = conv3x3(inplanes, planes, stride)
 37 |         self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
 38 |         self.relu = nn.ReLU(inplace=True)
 39 |         self.conv2 = conv3x3(planes, planes)
 40 |         self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
 41 |         self.downsample = downsample
 42 |         self.stride = stride
 43 | 
 44 |     def forward(self, x):
 45 |         residual = x
 46 | 
 47 |         out = self.conv1(x)
 48 |         out = self.bn1(out)
 49 |         out = self.relu(out)
 50 | 
 51 |         out = self.conv2(out)
 52 |         out = self.bn2(out)
 53 | 
 54 |         if self.downsample is not None:
 55 |             residual = self.downsample(x)
 56 | 
 57 |         out += residual
 58 |         out = self.relu(out)
 59 | 
 60 |         return out
 61 | 
 62 | 
 63 | class Bottleneck(nn.Module):
 64 |     expansion = 4
 65 | 
 66 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 67 |         super(Bottleneck, self).__init__()
 68 |         self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
 69 |         self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
 70 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
 71 |                                padding=1, bias=False)
 72 |         self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
 73 |         self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1,
 74 |                                bias=False)
 75 |         self.bn3 = nn.BatchNorm2d(planes * self.expansion,
 76 |                                   momentum=BN_MOMENTUM)
 77 |         self.relu = nn.ReLU(inplace=True)
 78 |         self.downsample = downsample
 79 |         self.stride = stride
 80 | 
 81 |     def forward(self, x):
 82 |         residual = x
 83 | 
 84 |         out = self.conv1(x)
 85 |         out = self.bn1(out)
 86 |         out = self.relu(out)
 87 | 
 88 |         out = self.conv2(out)
 89 |         out = self.bn2(out)
 90 |         out = self.relu(out)
 91 | 
 92 |         out = self.conv3(out)
 93 |         out = self.bn3(out)
 94 | 
 95 |         if self.downsample is not None:
 96 |             residual = self.downsample(x)
 97 | 
 98 |         out += residual
 99 |         out = self.relu(out)
100 | 
101 |         return out
102 | 
103 | 
104 | resnet_spec = {18: (BasicBlock, [2, 2, 2, 2]),
105 |                34: (BasicBlock, [3, 4, 6, 3]),
106 |                50: (Bottleneck, [3, 4, 6, 3]),
107 |                101: (Bottleneck, [3, 4, 23, 3]),
108 |                152: (Bottleneck, [3, 8, 36, 3])}
109 | 
110 | class Resnet(nn.Module):
111 |     def __init__(self, opt):
112 |         super().__init__()
113 |         assert (not opt.pre_hm) and (not opt.pre_img)
114 |         self.inplanes = 64
115 |         block, layers = resnet_spec[opt.num_layers]
116 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
117 |                                bias=False)
118 |         self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
119 |         self.relu = nn.ReLU(inplace=True)
120 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
121 |         self.layer1 = self._make_layer(block, 64, layers[0])
122 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
123 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
124 |         self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
125 | 
126 |         self.channels = [64, 64, 
127 |                        64 * block.expansion, 
128 |                        128 * block.expansion, 
129 |                        256 * block.expansion, 
130 |                        512 * block.expansion]
131 | 
132 |         self._init_weights(opt.num_layers)
133 | 
134 | 
135 |     def _make_layer(self, block, planes, blocks, stride=1):
136 |         downsample = None
137 |         if stride != 1 or self.inplanes != planes * block.expansion:
138 |             downsample = nn.Sequential(
139 |                 nn.Conv2d(self.inplanes, planes * block.expansion,
140 |                           kernel_size=1, stride=stride, bias=False),
141 |                 nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM),
142 |             )
143 | 
144 |         layers = []
145 |         layers.append(block(self.inplanes, planes, stride, downsample))
146 |         self.inplanes = planes * block.expansion
147 |         for i in range(1, blocks):
148 |             layers.append(block(self.inplanes, planes))
149 | 
150 |         return nn.Sequential(*layers)
151 | 
152 |     def forward(self, x):
153 |         x = self.conv1(x)
154 |         x = self.bn1(x)
155 |         x = self.relu(x)
156 |         y = [x]
157 |         x = self.maxpool(x)
158 |         y.append(x)
159 | 
160 |         x = self.layer1(x)
161 |         y.append(x)
162 |         x = self.layer2(x)
163 |         y.append(x)
164 |         x = self.layer3(x)
165 |         y.append(x)
166 |         x = self.layer4(x)
167 |         y.append(x)
168 | 
169 |         return y
170 | 
171 |     def _init_weights(self, num_layers):
172 |         url = model_urls['resnet{}'.format(num_layers)]
173 |         pretrained_state_dict = model_zoo.load_url(url)
174 |         print('=> loading pretrained model {}'.format(url))
175 |         self.load_state_dict(pretrained_state_dict, strict=False)


--------------------------------------------------------------------------------
/src/lib/model/networks/base_model.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import torch
 6 | from torch import nn
 7 | 
 8 | def fill_fc_weights(layers):
 9 |     for m in layers.modules():
10 |         if isinstance(m, nn.Conv2d):
11 |             if m.bias is not None:
12 |                 nn.init.constant_(m.bias, 0)
13 | 
14 | class BaseModel(nn.Module):
15 |     def __init__(self, heads, head_convs, num_stacks, last_channel, opt=None):
16 |         super(BaseModel, self).__init__()
17 |         if opt is not None and opt.head_kernel != 3:
18 |           print('Using head kernel:', opt.head_kernel)
19 |           head_kernel = opt.head_kernel
20 |         else:
21 |           head_kernel = 3
22 |         self.num_stacks = num_stacks
23 |         self.heads = heads
24 |         for head in self.heads:
25 |             classes = self.heads[head]
26 |             head_conv = head_convs[head]
27 |             if len(head_conv) > 0:
28 |               out = nn.Conv2d(head_conv[-1], classes, 
29 |                     kernel_size=1, stride=1, padding=0, bias=True)
30 |               conv = nn.Conv2d(last_channel, head_conv[0],
31 |                                kernel_size=head_kernel, 
32 |                                padding=head_kernel // 2, bias=True)
33 |               convs = [conv]
34 |               for k in range(1, len(head_conv)):
35 |                   convs.append(nn.Conv2d(head_conv[k - 1], head_conv[k], 
36 |                                kernel_size=1, bias=True))
37 |               if len(convs) == 1:
38 |                 fc = nn.Sequential(conv, nn.ReLU(inplace=True), out)
39 |               elif len(convs) == 2:
40 |                 fc = nn.Sequential(
41 |                   convs[0], nn.ReLU(inplace=True), 
42 |                   convs[1], nn.ReLU(inplace=True), out)
43 |               elif len(convs) == 3:
44 |                 fc = nn.Sequential(
45 |                     convs[0], nn.ReLU(inplace=True), 
46 |                     convs[1], nn.ReLU(inplace=True), 
47 |                     convs[2], nn.ReLU(inplace=True), out)
48 |               elif len(convs) == 4:
49 |                 fc = nn.Sequential(
50 |                     convs[0], nn.ReLU(inplace=True), 
51 |                     convs[1], nn.ReLU(inplace=True), 
52 |                     convs[2], nn.ReLU(inplace=True), 
53 |                     convs[3], nn.ReLU(inplace=True), out)
54 |               if 'hm' in head:
55 |                 fc[-1].bias.data.fill_(opt.prior_bias)
56 |               else:
57 |                 fill_fc_weights(fc)
58 |             else:
59 |               fc = nn.Conv2d(last_channel, classes, 
60 |                   kernel_size=1, stride=1, padding=0, bias=True)
61 |               if 'hm' in head:
62 |                 fc.bias.data.fill_(opt.prior_bias)
63 |               else:
64 |                 fill_fc_weights(fc)
65 |             self.__setattr__(head, fc)
66 | 
67 |     def img2feats(self, x):
68 |       raise NotImplementedError
69 |     
70 |     def imgpre2feats(self, x, pre_img=None, pre_hm=None):
71 |       raise NotImplementedError
72 | 
73 |     def forward(self, x, pre_img=None, pre_hm=None):
74 |       if (pre_hm is not None) or (pre_img is not None):
75 |         feats = self.imgpre2feats(x, pre_img, pre_hm)
76 |       else:
77 |         feats = self.img2feats(x)
78 |       out = []
79 |       if self.opt.model_output_list:
80 |         for s in range(self.num_stacks):
81 |           z = []
82 |           for head in sorted(self.heads):
83 |               z.append(self.__getattr__(head)(feats[s]))
84 |           out.append(z)
85 |       else:
86 |         for s in range(self.num_stacks):
87 |           z = {}
88 |           for head in self.heads:
89 |               z[head] = self.__getattr__(head)(feats[s])
90 |           out.append(z)
91 |       return out
92 | 


--------------------------------------------------------------------------------
/src/lib/model/networks/generic_network.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import torch
  6 | from torch import nn
  7 | from .backbones.dla import dla34
  8 | from .backbones.resnet import Resnet
  9 | from .backbones.mobilenet import MobileNetV2
 10 | from .necks.dlaup import DLASeg
 11 | from .necks.msraup import MSRAUp
 12 | 
 13 | backbone_factory = {
 14 |   'dla34': dla34,
 15 |   'resnet': Resnet,
 16 |   'mobilenet': MobileNetV2
 17 | }
 18 | 
 19 | neck_factory = {
 20 |   'dlaup': DLASeg,
 21 |   'msraup': MSRAUp
 22 | }
 23 | 
 24 | def fill_fc_weights(layers):
 25 |     for m in layers.modules():
 26 |         if isinstance(m, nn.Conv2d):
 27 |             if m.bias is not None:
 28 |                 nn.init.constant_(m.bias, 0)
 29 | 
 30 | class GenericNetwork(nn.Module):
 31 |     def __init__(self, num_layers, heads, head_convs, num_stacks=1, opt=None):
 32 |         super(GenericNetwork, self).__init__()
 33 |         print('Using generic model with backbone {} and neck {}'.format(
 34 |           opt.backbone, opt.neck))
 35 |         # assert (not opt.pre_hm) and (not opt.pre_img)
 36 |         if opt is not None and opt.head_kernel != 3:
 37 |           print('Using head kernel:', opt.head_kernel)
 38 |           head_kernel = opt.head_kernel
 39 |         else:
 40 |           head_kernel = 3
 41 |         self.opt = opt
 42 |         self.backbone = backbone_factory[opt.backbone](opt=opt)
 43 |         channels = self.backbone.channels
 44 |         self.neck = neck_factory[opt.neck](opt=opt, channels=channels)
 45 |         last_channel = self.neck.out_channel
 46 |         self.num_stacks = num_stacks
 47 |         self.heads = heads
 48 |         for head in self.heads:
 49 |             classes = self.heads[head]
 50 |             head_conv = head_convs[head]
 51 |             if len(head_conv) > 0:
 52 |               out = nn.Conv2d(head_conv[-1], classes, 
 53 |                     kernel_size=1, stride=1, padding=0, bias=True)
 54 |               conv = nn.Conv2d(last_channel, head_conv[0],
 55 |                                kernel_size=head_kernel, 
 56 |                                padding=head_kernel // 2, bias=True)
 57 |               convs = [conv]
 58 |               for k in range(1, len(head_conv)):
 59 |                   convs.append(nn.Conv2d(head_conv[k - 1], head_conv[k], 
 60 |                                kernel_size=1, bias=True))
 61 |               if len(convs) == 1:
 62 |                 fc = nn.Sequential(conv, nn.ReLU(inplace=True), out)
 63 |               elif len(convs) == 2:
 64 |                 fc = nn.Sequential(
 65 |                   convs[0], nn.ReLU(inplace=True), 
 66 |                   convs[1], nn.ReLU(inplace=True), out)
 67 |               elif len(convs) == 3:
 68 |                 fc = nn.Sequential(
 69 |                     convs[0], nn.ReLU(inplace=True), 
 70 |                     convs[1], nn.ReLU(inplace=True), 
 71 |                     convs[2], nn.ReLU(inplace=True), out)
 72 |               elif len(convs) == 4:
 73 |                 fc = nn.Sequential(
 74 |                     convs[0], nn.ReLU(inplace=True), 
 75 |                     convs[1], nn.ReLU(inplace=True), 
 76 |                     convs[2], nn.ReLU(inplace=True), 
 77 |                     convs[3], nn.ReLU(inplace=True), out)
 78 |               if 'hm' in head:
 79 |                 fc[-1].bias.data.fill_(opt.prior_bias)
 80 |               else:
 81 |                 fill_fc_weights(fc)
 82 |             else:
 83 |               fc = nn.Conv2d(last_channel, classes, 
 84 |                   kernel_size=1, stride=1, padding=0, bias=True)
 85 |               if 'hm' in head:
 86 |                 fc.bias.data.fill_(opt.prior_bias)
 87 |               else:
 88 |                 fill_fc_weights(fc)
 89 |             self.__setattr__(head, fc)
 90 | 
 91 |     def forward(self, x, pre_img=None, pre_hm=None):
 92 |       y = self.backbone(x, pre_img, pre_hm)
 93 |       feats = self.neck(y)
 94 |       out = []
 95 |       if self.opt.model_output_list:
 96 |         for s in range(self.num_stacks):
 97 |           z = []
 98 |           for head in sorted(self.heads):
 99 |               z.append(self.__getattr__(head)(feats[s]))
100 |           out.append(z)
101 |       else:
102 |         for s in range(self.num_stacks):
103 |           z = {}
104 |           for head in self.heads:
105 |               z[head] = self.__getattr__(head)(feats[s])
106 |           out.append(z)
107 |       return out
108 | 


--------------------------------------------------------------------------------
/src/lib/model/networks/necks/dlaup.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import os
  6 | import math
  7 | import logging
  8 | import numpy as np
  9 | from os.path import join
 10 | 
 11 | import torch
 12 | from torch import nn
 13 | import torch.nn.functional as F
 14 | import torch.utils.model_zoo as model_zoo
 15 | 
 16 | try:
 17 |   from ..DCNv2.dcn_v2 import DCN
 18 | except:
 19 |   print('import DCN failed')
 20 |   DCN = None
 21 | 
 22 | BN_MOMENTUM = 0.1
 23 | 
 24 | class Identity(nn.Module):
 25 | 
 26 |     def __init__(self):
 27 |         super(Identity, self).__init__()
 28 | 
 29 |     def forward(self, x):
 30 |         return x
 31 | 
 32 | 
 33 | def fill_fc_weights(layers):
 34 |     for m in layers.modules():
 35 |         if isinstance(m, nn.Conv2d):
 36 |             if m.bias is not None:
 37 |                 nn.init.constant_(m.bias, 0)
 38 | 
 39 | 
 40 | def fill_up_weights(up):
 41 |     w = up.weight.data
 42 |     f = math.ceil(w.size(2) / 2)
 43 |     c = (2 * f - 1 - f % 2) / (2. * f)
 44 |     for i in range(w.size(2)):
 45 |         for j in range(w.size(3)):
 46 |             w[0, 0, i, j] = \
 47 |                 (1 - math.fabs(i / f - c)) * (1 - math.fabs(j / f - c))
 48 |     for c in range(1, w.size(0)):
 49 |         w[c, 0, :, :] = w[0, 0, :, :]
 50 | 
 51 | 
 52 | class Conv(nn.Module):
 53 |     def __init__(self, chi, cho):
 54 |         super(Conv, self).__init__()
 55 |         self.conv = nn.Sequential(
 56 |             nn.Conv2d(chi, cho, kernel_size=1, stride=1, bias=False),
 57 |             nn.BatchNorm2d(cho, momentum=BN_MOMENTUM),
 58 |             nn.ReLU(inplace=True))
 59 |     
 60 |     def forward(self, x):
 61 |         return self.conv(x)
 62 | 
 63 | 
 64 | class GlobalConv(nn.Module):
 65 |     def __init__(self, chi, cho, k=7, d=1):
 66 |         super(GlobalConv, self).__init__()
 67 |         gcl = nn.Sequential(
 68 |             nn.Conv2d(chi, cho, kernel_size=(k, 1), stride=1, bias=False, 
 69 |                                 dilation=d, padding=(d * (k // 2), 0)),
 70 |             nn.Conv2d(cho, cho, kernel_size=(1, k), stride=1, bias=False, 
 71 |                                 dilation=d, padding=(0, d * (k // 2))))
 72 |         gcr = nn.Sequential(
 73 |             nn.Conv2d(chi, cho, kernel_size=(1, k), stride=1, bias=False, 
 74 |                                 dilation=d, padding=(0, d * (k // 2))),
 75 |             nn.Conv2d(cho, cho, kernel_size=(k, 1), stride=1, bias=False, 
 76 |                                 dilation=d, padding=(d * (k // 2), 0)))
 77 |         fill_fc_weights(gcl)
 78 |         fill_fc_weights(gcr)
 79 |         self.gcl = gcl
 80 |         self.gcr = gcr
 81 |         self.act = nn.Sequential(
 82 |             nn.BatchNorm2d(cho, momentum=BN_MOMENTUM),
 83 |             nn.ReLU(inplace=True)
 84 |         )
 85 | 
 86 |     def forward(self, x):
 87 |         x = self.gcl(x) + self.gcr(x)
 88 |         x = self.act(x)
 89 |         return x
 90 | 
 91 | 
 92 | class DeformConv(nn.Module):
 93 |     def __init__(self, chi, cho):
 94 |         super(DeformConv, self).__init__()
 95 |         self.actf = nn.Sequential(
 96 |             nn.BatchNorm2d(cho, momentum=BN_MOMENTUM),
 97 |             nn.ReLU(inplace=True)
 98 |         )
 99 |         self.conv = DCN(chi, cho, kernel_size=(3,3), stride=1, padding=1, dilation=1, deformable_groups=1)
100 | 
101 |     def forward(self, x):
102 |         x = self.conv(x)
103 |         x = self.actf(x)
104 |         return x
105 | 
106 | 
107 | class IDAUp(nn.Module):
108 |     def __init__(self, o, channels, up_f, node_type=(DeformConv, DeformConv)):
109 |         super(IDAUp, self).__init__()
110 |         for i in range(1, len(channels)):
111 |             c = channels[i]
112 |             f = int(up_f[i])  
113 |             proj = node_type[0](c, o)
114 |             node = node_type[1](o, o)
115 |      
116 |             up = nn.ConvTranspose2d(o, o, f * 2, stride=f, 
117 |                                     padding=f // 2, output_padding=0,
118 |                                     groups=o, bias=False)
119 |             fill_up_weights(up)
120 | 
121 |             setattr(self, 'proj_' + str(i), proj)
122 |             setattr(self, 'up_' + str(i), up)
123 |             setattr(self, 'node_' + str(i), node)
124 |                  
125 |         
126 |     def forward(self, layers, startp, endp):
127 |         for i in range(startp + 1, endp):
128 |             upsample = getattr(self, 'up_' + str(i - startp))
129 |             project = getattr(self, 'proj_' + str(i - startp))
130 |             layers[i] = upsample(project(layers[i]))
131 |             node = getattr(self, 'node_' + str(i - startp))
132 |             layers[i] = node(layers[i] + layers[i - 1])
133 | 
134 | 
135 | 
136 | class DLAUp(nn.Module):
137 |     def __init__(self, startp, channels, scales, in_channels=None, 
138 |                  node_type=DeformConv):
139 |         super(DLAUp, self).__init__()
140 |         self.startp = startp
141 |         if in_channels is None:
142 |             in_channels = channels
143 |         self.channels = channels
144 |         channels = list(channels)
145 |         scales = np.array(scales, dtype=int)
146 |         for i in range(len(channels) - 1):
147 |             j = -i - 2
148 |             setattr(self, 'ida_{}'.format(i),
149 |                     IDAUp(channels[j], in_channels[j:],
150 |                           scales[j:] // scales[j],
151 |                           node_type=node_type))
152 |             scales[j + 1:] = scales[j]
153 |             in_channels[j + 1:] = [channels[j] for _ in channels[j + 1:]]
154 | 
155 |     def forward(self, layers):
156 |         out = [layers[-1]] # start with 32
157 |         for i in range(len(layers) - self.startp - 1):
158 |             ida = getattr(self, 'ida_{}'.format(i))
159 |             ida(layers, len(layers) -i - 2, len(layers))
160 |             out.insert(0, layers[-1])
161 |         return out
162 | 
163 | DLA_NODE = {
164 |     'dcn': (DeformConv, DeformConv),
165 |     'gcn': (Conv, GlobalConv),
166 |     'conv': (Conv, Conv),
167 | }
168 | 
169 | class DLASeg(nn.Module):
170 |     def __init__(self, opt, channels):
171 |         super().__init__()
172 |         self.opt = opt
173 |         self.channels = channels
174 |         self.node_type = DLA_NODE[opt.dla_node]
175 |         print('Using node type:', self.node_type)
176 |         down_ratio = 4
177 |         self.first_level = int(np.log2(down_ratio))
178 |         self.last_level = 5
179 | 
180 |         scales = [2 ** i for i in range(len(channels[self.first_level:]))]
181 |         self.dla_up = DLAUp(
182 |             self.first_level, channels[self.first_level:], scales,
183 |             node_type=self.node_type)
184 |         self.out_channel = channels[self.first_level]
185 | 
186 |         self.ida_up = IDAUp(
187 |             self.out_channel, channels[self.first_level:self.last_level], 
188 |             [2 ** i for i in range(self.last_level - self.first_level)],
189 |             node_type=self.node_type)
190 |         
191 | 
192 |     def forward(self, x):
193 |         x = self.dla_up(x)
194 |         y = []
195 |         for i in range(self.last_level - self.first_level):
196 |             y.append(x[i].clone())
197 |         self.ida_up(y, 0, len(y))
198 | 
199 |         return [y[-1]]
200 | 


--------------------------------------------------------------------------------
/src/lib/model/networks/necks/msraup.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft
  3 | # Licensed under the MIT License.
  4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
  5 | # Modified by Dequan Wang and Xingyi Zhou
  6 | # ------------------------------------------------------------------------------
  7 | 
  8 | from __future__ import absolute_import
  9 | from __future__ import division
 10 | from __future__ import print_function
 11 | 
 12 | import os
 13 | import math
 14 | import logging
 15 | 
 16 | import torch
 17 | import torch.nn as nn
 18 | 
 19 | try:
 20 |   from ..DCNv2.dcn_v2 import DCN
 21 | except:
 22 |   print('import DCN failed')
 23 |   DCN = None
 24 | 
 25 | 
 26 | BN_MOMENTUM = 0.1
 27 | 
 28 | def fill_up_weights(up):
 29 |     w = up.weight.data
 30 |     f = math.ceil(w.size(2) / 2)
 31 |     c = (2 * f - 1 - f % 2) / (2. * f)
 32 |     for i in range(w.size(2)):
 33 |         for j in range(w.size(3)):
 34 |             w[0, 0, i, j] = \
 35 |                 (1 - math.fabs(i / f - c)) * (1 - math.fabs(j / f - c))
 36 |     for c in range(1, w.size(0)):
 37 |         w[c, 0, :, :] = w[0, 0, :, :] 
 38 | 
 39 | def fill_fc_weights(layers):
 40 |     for m in layers.modules():
 41 |         if isinstance(m, nn.Conv2d):
 42 |             nn.init.normal_(m.weight, std=0.001)
 43 |             # torch.nn.init.kaiming_normal_(m.weight.data, nonlinearity='relu')
 44 |             # torch.nn.init.xavier_normal_(m.weight.data)
 45 |             if m.bias is not None:
 46 |                 nn.init.constant_(m.bias, 0)
 47 | 
 48 | class MSRAUp(nn.Module):
 49 |     # def __init__(self, block, layers, heads, head_conv):
 50 |     def __init__(self, opt, channels):
 51 |         super().__init__()
 52 |         self.opt = opt
 53 |         assert self.opt.msra_outchannel in [64, 256]
 54 |         self.deconv_with_bias = False
 55 |         self.inplanes = channels[-1]
 56 |         self.out_channel = self.opt.msra_outchannel
 57 |         # used for deconv layers
 58 |         if self.opt.msra_outchannel == 64:
 59 |             print('Using slimed resnet: 256 128 64 up channels.')
 60 |             self.deconv_layers = self._make_deconv_layer(
 61 |                 3,
 62 |                 [256, 128, 64],
 63 |                 [4, 4, 4],
 64 |             )
 65 |         else:
 66 |             print('Using original resnet: 256 256 256 up channels.')
 67 |             print('Using 256 deconvs')
 68 |             self.deconv_layers = self._make_deconv_layer(
 69 |                 3,
 70 |                 [256, 256, 256],
 71 |                 [4, 4, 4],
 72 |             )
 73 |         self.init_weights()
 74 |         
 75 | 
 76 |     def forward(self, x):
 77 |         x = self.deconv_layers(x[-1])
 78 |         return [x]
 79 | 
 80 |     def _get_deconv_cfg(self, deconv_kernel, index):
 81 |         if deconv_kernel == 4:
 82 |             padding = 1
 83 |             output_padding = 0
 84 |         elif deconv_kernel == 3:
 85 |             padding = 1
 86 |             output_padding = 1
 87 |         elif deconv_kernel == 2:
 88 |             padding = 0
 89 |             output_padding = 0
 90 | 
 91 |         return deconv_kernel, padding, output_padding
 92 | 
 93 |     def _make_deconv_layer(self, num_layers, num_filters, num_kernels):
 94 |         assert num_layers == len(num_filters), \
 95 |             'ERROR: num_deconv_layers is different len(num_deconv_filters)'
 96 |         assert num_layers == len(num_kernels), \
 97 |             'ERROR: num_deconv_layers is different len(num_deconv_filters)'
 98 | 
 99 |         layers = []
100 |         for i in range(num_layers):
101 |             kernel, padding, output_padding = \
102 |                 self._get_deconv_cfg(num_kernels[i], i)
103 | 
104 |             planes = num_filters[i]
105 |             fc = DCN(self.inplanes, planes, 
106 |                     kernel_size=(3,3), stride=1,
107 |                     padding=1, dilation=1, deformable_groups=1)
108 |             # fc = nn.Conv2d(self.inplanes, planes,
109 |             #         kernel_size=3, stride=1, 
110 |             #         padding=1, dilation=1, bias=False)
111 |             # fill_fc_weights(fc)
112 |             up = nn.ConvTranspose2d(
113 |                     in_channels=planes,
114 |                     out_channels=planes,
115 |                     kernel_size=kernel,
116 |                     stride=2,
117 |                     padding=padding,
118 |                     output_padding=output_padding,
119 |                     bias=self.deconv_with_bias)
120 |             fill_up_weights(up)
121 | 
122 |             layers.append(fc)
123 |             layers.append(nn.BatchNorm2d(planes, momentum=BN_MOMENTUM))
124 |             layers.append(nn.ReLU(inplace=True))
125 |             layers.append(up)
126 |             layers.append(nn.BatchNorm2d(planes, momentum=BN_MOMENTUM))
127 |             layers.append(nn.ReLU(inplace=True))
128 |             self.inplanes = planes
129 | 
130 |         return nn.Sequential(*layers)
131 | 
132 |     def init_weights(self):
133 |         for name, m in self.deconv_layers.named_modules():
134 |             if isinstance(m, nn.BatchNorm2d):
135 |                 nn.init.constant_(m.weight, 1)
136 |                 nn.init.constant_(m.bias, 0)
137 | 


--------------------------------------------------------------------------------
/src/lib/model/scatter_gather.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Variable
 3 | from torch.nn.parallel._functions import Scatter, Gather
 4 | 
 5 | 
 6 | def scatter(inputs, target_gpus, dim=0, chunk_sizes=None):
 7 |     r"""
 8 |     Slices variables into approximately equal chunks and
 9 |     distributes them across given GPUs. Duplicates
10 |     references to objects that are not variables. Does not
11 |     support Tensors.
12 |     """
13 |     def scatter_map(obj):
14 |         if isinstance(obj, Variable):
15 |             return Scatter.apply(target_gpus, chunk_sizes, dim, obj)
16 |         assert not torch.is_tensor(obj), "Tensors not supported in scatter."
17 |         if isinstance(obj, tuple):
18 |             return list(zip(*map(scatter_map, obj)))
19 |         if isinstance(obj, list):
20 |             return list(map(list, zip(*map(scatter_map, obj))))
21 |         if isinstance(obj, dict):
22 |             return list(map(type(obj), zip(*map(scatter_map, obj.items()))))
23 |         return [obj for targets in target_gpus]
24 | 
25 |     return scatter_map(inputs)
26 | 
27 | 
28 | def scatter_kwargs(inputs, kwargs, target_gpus, dim=0, chunk_sizes=None):
29 |     r"""Scatter with support for kwargs dictionary"""
30 |     inputs = scatter(inputs, target_gpus, dim, chunk_sizes) if inputs else []
31 |     kwargs = scatter(kwargs, target_gpus, dim, chunk_sizes) if kwargs else []
32 |     if len(inputs) < len(kwargs):
33 |         inputs.extend([() for _ in range(len(kwargs) - len(inputs))])
34 |     elif len(kwargs) < len(inputs):
35 |         kwargs.extend([{} for _ in range(len(inputs) - len(kwargs))])
36 |     inputs = tuple(inputs)
37 |     kwargs = tuple(kwargs)
38 |     return inputs, kwargs
39 | 


--------------------------------------------------------------------------------
/src/lib/model/utils.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import torch
 6 | import torch.nn as nn
 7 | 
 8 | def _sigmoid(x):
 9 |   y = torch.clamp(x.sigmoid_(), min=1e-4, max=1-1e-4)
10 |   return y
11 | 
12 | def _sigmoid12(x):
13 |   y = torch.clamp(x.sigmoid_(), 1e-12)
14 |   return y
15 | 
16 | def _gather_feat(feat, ind):
17 |   dim = feat.size(2)
18 |   ind = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim)
19 |   feat = feat.gather(1, ind)
20 |   return feat
21 | 
22 | def _tranpose_and_gather_feat(feat, ind):
23 |   feat = feat.permute(0, 2, 3, 1).contiguous()
24 |   feat = feat.view(feat.size(0), -1, feat.size(3))
25 |   feat = _gather_feat(feat, ind)
26 |   return feat
27 | 
28 | def flip_tensor(x):
29 |   return torch.flip(x, [3])
30 |   # tmp = x.detach().cpu().numpy()[..., ::-1].copy()
31 |   # return torch.from_numpy(tmp).to(x.device)
32 | 
33 | def flip_lr(x, flip_idx):
34 |   tmp = x.detach().cpu().numpy()[..., ::-1].copy()
35 |   shape = tmp.shape
36 |   for e in flip_idx:
37 |     tmp[:, e[0], ...], tmp[:, e[1], ...] = \
38 |       tmp[:, e[1], ...].copy(), tmp[:, e[0], ...].copy()
39 |   return torch.from_numpy(tmp.reshape(shape)).to(x.device)
40 | 
41 | def flip_lr_off(x, flip_idx):
42 |   tmp = x.detach().cpu().numpy()[..., ::-1].copy()
43 |   shape = tmp.shape
44 |   tmp = tmp.reshape(tmp.shape[0], 17, 2, 
45 |                     tmp.shape[2], tmp.shape[3])
46 |   tmp[:, :, 0, :, :] *= -1
47 |   for e in flip_idx:
48 |     tmp[:, e[0], ...], tmp[:, e[1], ...] = \
49 |       tmp[:, e[1], ...].copy(), tmp[:, e[0], ...].copy()
50 |   return torch.from_numpy(tmp.reshape(shape)).to(x.device)
51 | 
52 | def _nms(heat, kernel=3):
53 |   pad = (kernel - 1) // 2
54 | 
55 |   hmax = nn.functional.max_pool2d(
56 |       heat, (kernel, kernel), stride=1, padding=pad)
57 |   keep = (hmax == heat).float()
58 |   return heat * keep
59 | 
60 | def _topk_channel(scores, K=100):
61 |   batch, cat, height, width = scores.size()
62 |   
63 |   topk_scores, topk_inds = torch.topk(scores.view(batch, cat, -1), K)
64 | 
65 |   topk_inds = topk_inds % (height * width)
66 |   topk_ys   = (topk_inds / width).int().float()
67 |   topk_xs   = (topk_inds % width).int().float()
68 | 
69 |   return topk_scores, topk_inds, topk_ys, topk_xs
70 | 
71 | def _topk(scores, K=100):
72 |   batch, cat, height, width = scores.size()
73 |     
74 |   topk_scores, topk_inds = torch.topk(scores.view(batch, cat, -1), K)
75 | 
76 |   topk_inds = topk_inds % (height * width)
77 |   topk_ys   = (topk_inds / width).int().float()
78 |   topk_xs   = (topk_inds % width).int().float()
79 |     
80 |   topk_score, topk_ind = torch.topk(topk_scores.view(batch, -1), K)
81 |   topk_clses = (topk_ind / K).int()
82 |   topk_inds = _gather_feat(
83 |       topk_inds.view(batch, -1, 1), topk_ind).view(batch, K)
84 |   topk_ys = _gather_feat(topk_ys.view(batch, -1, 1), topk_ind).view(batch, K)
85 |   topk_xs = _gather_feat(topk_xs.view(batch, -1, 1), topk_ind).view(batch, K)
86 | 
87 |   return topk_score, topk_inds, topk_clses, topk_ys, topk_xs
88 | 


--------------------------------------------------------------------------------
/src/lib/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xingyizhou/CenterTrack/e4e7534cc2ebfbd31e0cde680988f286c65fe34f/src/lib/utils/__init__.py


--------------------------------------------------------------------------------
/src/lib/utils/ddd_utils.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import numpy as np
  6 | import cv2
  7 | 
  8 | def comput_corners_3d(dim, rotation_y):
  9 |   # dim: 3
 10 |   # location: 3
 11 |   # rotation_y: 1
 12 |   # return: 8 x 3
 13 |   c, s = np.cos(rotation_y), np.sin(rotation_y)
 14 |   R = np.array([[c, 0, s], [0, 1, 0], [-s, 0, c]], dtype=np.float32)
 15 |   l, w, h = dim[2], dim[1], dim[0]
 16 |   x_corners = [l/2, l/2, -l/2, -l/2, l/2, l/2, -l/2, -l/2]
 17 |   y_corners = [0,0,0,0,-h,-h,-h,-h]
 18 |   z_corners = [w/2, -w/2, -w/2, w/2, w/2, -w/2, -w/2, w/2]
 19 | 
 20 |   corners = np.array([x_corners, y_corners, z_corners], dtype=np.float32)
 21 |   corners_3d = np.dot(R, corners).transpose(1, 0)
 22 |   return corners_3d
 23 | 
 24 | def compute_box_3d(dim, location, rotation_y):
 25 |   # dim: 3
 26 |   # location: 3
 27 |   # rotation_y: 1
 28 |   # return: 8 x 3
 29 |   corners_3d = comput_corners_3d(dim, rotation_y)
 30 |   corners_3d = corners_3d + np.array(location, dtype=np.float32).reshape(1, 3)
 31 |   return corners_3d
 32 | 
 33 | def project_to_image(pts_3d, P):
 34 |   # pts_3d: n x 3
 35 |   # P: 3 x 4
 36 |   # return: n x 2
 37 |   pts_3d_homo = np.concatenate(
 38 |     [pts_3d, np.ones((pts_3d.shape[0], 1), dtype=np.float32)], axis=1)
 39 |   pts_2d = np.dot(P, pts_3d_homo.transpose(1, 0)).transpose(1, 0)
 40 |   pts_2d = pts_2d[:, :2] / pts_2d[:, 2:]
 41 |   # import pdb; pdb.set_trace()
 42 |   return pts_2d
 43 | 
 44 | def compute_orientation_3d(dim, location, rotation_y):
 45 |   # dim: 3
 46 |   # location: 3
 47 |   # rotation_y: 1
 48 |   # return: 2 x 3
 49 |   c, s = np.cos(rotation_y), np.sin(rotation_y)
 50 |   R = np.array([[c, 0, s], [0, 1, 0], [-s, 0, c]], dtype=np.float32)
 51 |   orientation_3d = np.array([[0, dim[2]], [0, 0], [0, 0]], dtype=np.float32)
 52 |   orientation_3d = np.dot(R, orientation_3d)
 53 |   orientation_3d = orientation_3d + \
 54 |                    np.array(location, dtype=np.float32).reshape(3, 1)
 55 |   return orientation_3d.transpose(1, 0)
 56 | 
 57 | def draw_box_3d(image, corners, c=(255, 0, 255), same_color=False):
 58 |   face_idx = [[0,1,5,4],
 59 |               [1,2,6, 5],
 60 |               [3,0,4,7],
 61 |               [2,3,7,6]]
 62 |   right_corners = [1, 2, 6, 5] if not same_color else []
 63 |   left_corners = [0, 3, 7, 4] if not same_color else []
 64 |   thickness = 4 if same_color else 2
 65 |   corners = corners.astype(np.int32)
 66 |   for ind_f in range(3, -1, -1):
 67 |     f = face_idx[ind_f]
 68 |     for j in range(4):
 69 |       # print('corners', corners)
 70 |       cc = c
 71 |       if (f[j] in left_corners) and (f[(j+1)%4] in left_corners):
 72 |         cc = (255, 0, 0)
 73 |       if (f[j] in right_corners) and (f[(j+1)%4] in right_corners):
 74 |         cc = (0, 0, 255)
 75 |       try:
 76 |         cv2.line(image, (corners[f[j], 0], corners[f[j], 1]),
 77 |             (corners[f[(j+1)%4], 0], corners[f[(j+1)%4], 1]), cc, thickness, lineType=cv2.LINE_AA)
 78 |       except:
 79 |         pass
 80 |     if ind_f == 0:
 81 |       try:
 82 |         cv2.line(image, (corners[f[0], 0], corners[f[0], 1]),
 83 |                  (corners[f[2], 0], corners[f[2], 1]), c, 1, lineType=cv2.LINE_AA)
 84 |         cv2.line(image, (corners[f[1], 0], corners[f[1], 1]),
 85 |                  (corners[f[3], 0], corners[f[3], 1]), c, 1, lineType=cv2.LINE_AA)
 86 |       except:
 87 |         pass
 88 |     # top_idx = [0, 1, 2, 3]
 89 |   return image
 90 | 
 91 | def unproject_2d_to_3d(pt_2d, depth, P):
 92 |   # pts_2d: 2
 93 |   # depth: 1
 94 |   # P: 3 x 4
 95 |   # return: 3
 96 |   z = depth - P[2, 3]
 97 |   x = (pt_2d[0] * depth - P[0, 3] - P[0, 2] * z) / P[0, 0]
 98 |   y = (pt_2d[1] * depth - P[1, 3] - P[1, 2] * z) / P[1, 1]
 99 |   pt_3d = np.array([x, y, z], dtype=np.float32).reshape(3)
100 |   return pt_3d
101 | 
102 | def alpha2rot_y(alpha, x, cx, fx):
103 |     """
104 |     Get rotation_y by alpha + theta - 180
105 |     alpha : Observation angle of object, ranging [-pi..pi]
106 |     x : Object center x to the camera center (x-W/2), in pixels
107 |     rotation_y : Rotation ry around Y-axis in camera coordinates [-pi..pi]
108 |     """
109 |     rot_y = alpha + np.arctan2(x - cx, fx)
110 |     if rot_y > np.pi:
111 |       rot_y -= 2 * np.pi
112 |     if rot_y < -np.pi:
113 |       rot_y += 2 * np.pi
114 |     return rot_y
115 | 
116 | def rot_y2alpha(rot_y, x, cx, fx):
117 |     """
118 |     Get rotation_y by alpha + theta - 180
119 |     alpha : Observation angle of object, ranging [-pi..pi]
120 |     x : Object center x to the camera center (x-W/2), in pixels
121 |     rotation_y : Rotation ry around Y-axis in camera coordinates [-pi..pi]
122 |     """
123 |     alpha = rot_y - np.arctan2(x - cx, fx)
124 |     if alpha > np.pi:
125 |       alpha -= 2 * np.pi
126 |     if alpha < -np.pi:
127 |       alpha += 2 * np.pi
128 |     return alpha
129 | 
130 | 
131 | def ddd2locrot(center, alpha, dim, depth, calib):
132 |   # single image
133 |   locations = unproject_2d_to_3d(center, depth, calib)
134 |   locations[1] += dim[0] / 2
135 |   rotation_y = alpha2rot_y(alpha, center[0], calib[0, 2], calib[0, 0])
136 |   return locations, rotation_y
137 | 
138 | def project_3d_bbox(location, dim, rotation_y, calib):
139 |   box_3d = compute_box_3d(dim, location, rotation_y)
140 |   box_2d = project_to_image(box_3d, calib)
141 |   return box_2d
142 | 
143 | 
144 | if __name__ == '__main__':
145 |   calib = np.array(
146 |     [[7.070493000000e+02, 0.000000000000e+00, 6.040814000000e+02, 4.575831000000e+01],
147 |      [0.000000000000e+00, 7.070493000000e+02, 1.805066000000e+02, -3.454157000000e-01],
148 |      [0.000000000000e+00, 0.000000000000e+00, 1.000000000000e+00, 4.981016000000e-03]],
149 |     dtype=np.float32)
150 |   alpha = -0.20
151 |   tl = np.array([712.40, 143.00], dtype=np.float32)
152 |   br = np.array([810.73, 307.92], dtype=np.float32)
153 |   ct = (tl + br) / 2
154 |   rotation_y = 0.01
155 |   print('alpha2rot_y', alpha2rot_y(alpha, ct[0], calib[0, 2], calib[0, 0]))
156 |   print('rotation_y', rotation_y)
157 | 


--------------------------------------------------------------------------------
/src/lib/utils/image.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft
  3 | # Licensed under the MIT License.
  4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
  5 | # Modified by Xingyi Zhou
  6 | # ------------------------------------------------------------------------------
  7 | 
  8 | from __future__ import absolute_import
  9 | from __future__ import division
 10 | from __future__ import print_function
 11 | 
 12 | import numpy as np
 13 | import cv2
 14 | import random
 15 | 
 16 | def flip(img):
 17 |   return img[:, :, ::-1].copy()  
 18 | 
 19 | # @numba.jit(nopython=True, nogil=True)
 20 | def transform_preds_with_trans(coords, trans):
 21 |     # target_coords = np.concatenate(
 22 |     #   [coords, np.ones((coords.shape[0], 1), np.float32)], axis=1)
 23 |     target_coords = np.ones((coords.shape[0], 3), np.float32)
 24 |     target_coords[:, :2] = coords
 25 |     target_coords = np.dot(trans, target_coords.transpose()).transpose()
 26 |     return target_coords[:, :2]
 27 | 
 28 | 
 29 | def transform_preds(coords, center, scale, output_size):
 30 |     target_coords = np.zeros(coords.shape)
 31 |     trans = get_affine_transform(center, scale, 0, output_size, inv=1)
 32 |     for p in range(coords.shape[0]):
 33 |         target_coords[p, 0:2] = affine_transform(coords[p, 0:2], trans)
 34 |     return target_coords
 35 | 
 36 | 
 37 | def get_affine_transform(center,
 38 |                          scale,
 39 |                          rot,
 40 |                          output_size,
 41 |                          shift=np.array([0, 0], dtype=np.float32),
 42 |                          inv=0):
 43 |     if not isinstance(scale, np.ndarray) and not isinstance(scale, list):
 44 |         scale = np.array([scale, scale], dtype=np.float32)
 45 | 
 46 |     scale_tmp = scale
 47 |     src_w = scale_tmp[0]
 48 |     dst_w = output_size[0]
 49 |     dst_h = output_size[1]
 50 | 
 51 |     rot_rad = np.pi * rot / 180
 52 |     src_dir = get_dir([0, src_w * -0.5], rot_rad)
 53 |     dst_dir = np.array([0, dst_w * -0.5], np.float32)
 54 | 
 55 |     src = np.zeros((3, 2), dtype=np.float32)
 56 |     dst = np.zeros((3, 2), dtype=np.float32)
 57 |     src[0, :] = center + scale_tmp * shift
 58 |     src[1, :] = center + src_dir + scale_tmp * shift
 59 |     dst[0, :] = [dst_w * 0.5, dst_h * 0.5]
 60 |     dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5], np.float32) + dst_dir
 61 | 
 62 |     src[2:, :] = get_3rd_point(src[0, :], src[1, :])
 63 |     dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :])
 64 | 
 65 |     if inv:
 66 |         trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
 67 |     else:
 68 |         trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
 69 | 
 70 |     return trans
 71 | 
 72 | 
 73 | def affine_transform(pt, t):
 74 |     new_pt = np.array([pt[0], pt[1], 1.], dtype=np.float32).T
 75 |     new_pt = np.dot(t, new_pt)
 76 |     return new_pt[:2]
 77 | 
 78 | 
 79 | def get_3rd_point(a, b):
 80 |     direct = a - b
 81 |     return b + np.array([-direct[1], direct[0]], dtype=np.float32)
 82 | 
 83 | 
 84 | def get_dir(src_point, rot_rad):
 85 |     sn, cs = np.sin(rot_rad), np.cos(rot_rad)
 86 | 
 87 |     src_result = [0, 0]
 88 |     src_result[0] = src_point[0] * cs - src_point[1] * sn
 89 |     src_result[1] = src_point[0] * sn + src_point[1] * cs
 90 | 
 91 |     return src_result
 92 | 
 93 | 
 94 | def crop(img, center, scale, output_size, rot=0):
 95 |     trans = get_affine_transform(center, scale, rot, output_size)
 96 | 
 97 |     dst_img = cv2.warpAffine(img,
 98 |                              trans,
 99 |                              (int(output_size[0]), int(output_size[1])),
100 |                              flags=cv2.INTER_LINEAR)
101 | 
102 |     return dst_img
103 | 
104 | # @numba.jit(nopython=True, nogil=True)
105 | def gaussian_radius(det_size, min_overlap=0.7):
106 |   height, width = det_size
107 | 
108 |   a1  = 1
109 |   b1  = (height + width)
110 |   c1  = width * height * (1 - min_overlap) / (1 + min_overlap)
111 |   sq1 = np.sqrt(b1 ** 2 - 4 * a1 * c1)
112 |   r1  = (b1 + sq1) / 2
113 | 
114 |   a2  = 4
115 |   b2  = 2 * (height + width)
116 |   c2  = (1 - min_overlap) * width * height
117 |   sq2 = np.sqrt(b2 ** 2 - 4 * a2 * c2)
118 |   r2  = (b2 + sq2) / 2
119 | 
120 |   a3  = 4 * min_overlap
121 |   b3  = -2 * min_overlap * (height + width)
122 |   c3  = (min_overlap - 1) * width * height
123 |   sq3 = np.sqrt(b3 ** 2 - 4 * a3 * c3)
124 |   r3  = (b3 + sq3) / 2
125 |   return min(r1, r2, r3)
126 | 
127 | 
128 | # @numba.jit(nopython=True, nogil=True)
129 | def gaussian2D(shape, sigma=1):
130 |     m, n = [(ss - 1.) / 2. for ss in shape]
131 |     y, x = np.ogrid[-m:m+1,-n:n+1]
132 |     # y, x = np.arange(-m, m + 1).reshape(-1, 1), np.arange(-n, n + 1).reshape(1, -1)
133 |     h = np.exp(-(x * x + y * y) / (2 * sigma * sigma))
134 |     h[h < np.finfo(h.dtype).eps * h.max()] = 0
135 |     return h
136 | 
137 | # @numba.jit(nopython=True, nogil=True)
138 | def draw_umich_gaussian(heatmap, center, radius, k=1):
139 |   # import pdb; pdb.set_trace()
140 |   diameter = 2 * radius + 1
141 |   gaussian = gaussian2D((diameter, diameter), sigma=diameter / 6)
142 |   
143 |   x, y = int(center[0]), int(center[1])
144 | 
145 |   height, width = heatmap.shape[0:2]
146 |     
147 |   left, right = min(x, radius), min(width - x, radius + 1)
148 |   top, bottom = min(y, radius), min(height - y, radius + 1)
149 |   # import pdb; pdb.set_trace()
150 |   masked_heatmap  = heatmap[y - top:y + bottom, x - left:x + right]
151 |   masked_gaussian = gaussian[radius - top:radius + bottom, radius - left:radius + right]
152 |   if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0: # TODO debug
153 |     np.maximum(masked_heatmap, masked_gaussian * k, out=masked_heatmap)
154 |   return heatmap
155 | 
156 | def draw_dense_reg(regmap, heatmap, center, value, radius, is_offset=False):
157 |   diameter = 2 * radius + 1
158 |   gaussian = gaussian2D((diameter, diameter), sigma=diameter / 6)
159 |   value = np.array(value, dtype=np.float32).reshape(-1, 1, 1)
160 |   dim = value.shape[0]
161 |   reg = np.ones((dim, diameter*2+1, diameter*2+1), dtype=np.float32) * value
162 |   if is_offset and dim == 2:
163 |     delta = np.arange(diameter*2+1) - radius
164 |     reg[0] = reg[0] - delta.reshape(1, -1)
165 |     reg[1] = reg[1] - delta.reshape(-1, 1)
166 |   
167 |   x, y = int(center[0]), int(center[1])
168 | 
169 |   height, width = heatmap.shape[0:2]
170 |     
171 |   left, right = min(x, radius), min(width - x, radius + 1)
172 |   top, bottom = min(y, radius), min(height - y, radius + 1)
173 | 
174 |   masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right]
175 |   masked_regmap = regmap[:, y - top:y + bottom, x - left:x + right]
176 |   masked_gaussian = gaussian[radius - top:radius + bottom,
177 |                              radius - left:radius + right]
178 |   masked_reg = reg[:, radius - top:radius + bottom,
179 |                       radius - left:radius + right]
180 |   if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0: # TODO debug
181 |     idx = (masked_gaussian >= masked_heatmap).reshape(
182 |       1, masked_gaussian.shape[0], masked_gaussian.shape[1])
183 |     masked_regmap = (1-idx) * masked_regmap + idx * masked_reg
184 |   regmap[:, y - top:y + bottom, x - left:x + right] = masked_regmap
185 |   return regmap
186 | 
187 | 
188 | def draw_msra_gaussian(heatmap, center, sigma):
189 |   tmp_size = sigma * 3
190 |   mu_x = int(center[0] + 0.5)
191 |   mu_y = int(center[1] + 0.5)
192 |   w, h = heatmap.shape[0], heatmap.shape[1]
193 |   ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)]
194 |   br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)]
195 |   if ul[0] >= h or ul[1] >= w or br[0] < 0 or br[1] < 0:
196 |     return heatmap
197 |   size = 2 * tmp_size + 1
198 |   x = np.arange(0, size, 1, np.float32)
199 |   y = x[:, np.newaxis]
200 |   x0 = y0 = size // 2
201 |   g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma ** 2))
202 |   g_x = max(0, -ul[0]), min(br[0], h) - ul[0]
203 |   g_y = max(0, -ul[1]), min(br[1], w) - ul[1]
204 |   img_x = max(0, ul[0]), min(br[0], h)
205 |   img_y = max(0, ul[1]), min(br[1], w)
206 |   heatmap[img_y[0]:img_y[1], img_x[0]:img_x[1]] = np.maximum(
207 |     heatmap[img_y[0]:img_y[1], img_x[0]:img_x[1]],
208 |     g[g_y[0]:g_y[1], g_x[0]:g_x[1]])
209 |   return heatmap
210 | 
211 | def grayscale(image):
212 |     return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
213 | 
214 | def lighting_(data_rng, image, alphastd, eigval, eigvec):
215 |     alpha = data_rng.normal(scale=alphastd, size=(3, ))
216 |     image += np.dot(eigvec, eigval * alpha)
217 | 
218 | def blend_(alpha, image1, image2):
219 |     image1 *= alpha
220 |     image2 *= (1 - alpha)
221 |     image1 += image2
222 | 
223 | def saturation_(data_rng, image, gs, gs_mean, var):
224 |     alpha = 1. + data_rng.uniform(low=-var, high=var)
225 |     blend_(alpha, image, gs[:, :, None])
226 | 
227 | def brightness_(data_rng, image, gs, gs_mean, var):
228 |     alpha = 1. + data_rng.uniform(low=-var, high=var)
229 |     image *= alpha
230 | 
231 | def contrast_(data_rng, image, gs, gs_mean, var):
232 |     alpha = 1. + data_rng.uniform(low=-var, high=var)
233 |     blend_(alpha, image, gs_mean)
234 | 
235 | def color_aug(data_rng, image, eig_val, eig_vec):
236 |     functions = [brightness_, contrast_, saturation_]
237 |     random.shuffle(functions)
238 | 
239 |     gs = grayscale(image)
240 |     gs_mean = gs.mean()
241 |     for f in functions:
242 |         f(data_rng, image, gs, gs_mean, 0.4)
243 |     lighting_(data_rng, image, 0.1, eig_val, eig_vec)
244 | 


--------------------------------------------------------------------------------
/src/lib/utils/post_process.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import numpy as np
 6 | import cv2
 7 | from .image import transform_preds_with_trans, get_affine_transform
 8 | from .ddd_utils import ddd2locrot, comput_corners_3d
 9 | from .ddd_utils import project_to_image, rot_y2alpha
10 | import numba
11 | 
12 | def get_alpha(rot):
13 |   # output: (B, 8) [bin1_cls[0], bin1_cls[1], bin1_sin, bin1_cos, 
14 |   #                 bin2_cls[0], bin2_cls[1], bin2_sin, bin2_cos]
15 |   # return rot[:, 0]
16 |   idx = rot[:, 1] > rot[:, 5]
17 |   alpha1 = np.arctan2(rot[:, 2], rot[:, 3]) + (-0.5 * np.pi)
18 |   alpha2 = np.arctan2(rot[:, 6], rot[:, 7]) + ( 0.5 * np.pi)
19 |   return alpha1 * idx + alpha2 * (1 - idx)
20 | 
21 | def generic_post_process(
22 |   opt, dets, c, s, h, w, num_classes, calibs=None, height=-1, width=-1):
23 |   if not ('scores' in dets):
24 |     return [{}], [{}]
25 |   ret = []
26 | 
27 |   for i in range(len(dets['scores'])):
28 |     preds = []
29 |     trans = get_affine_transform(
30 |       c[i], s[i], 0, (w, h), inv=1).astype(np.float32)
31 |     for j in range(len(dets['scores'][i])):
32 |       if dets['scores'][i][j] < opt.out_thresh:
33 |         break
34 |       item = {}
35 |       item['score'] = dets['scores'][i][j]
36 |       item['class'] = int(dets['clses'][i][j]) + 1
37 |       item['ct'] = transform_preds_with_trans(
38 |         (dets['cts'][i][j]).reshape(1, 2), trans).reshape(2)
39 | 
40 |       if 'tracking' in dets:
41 |         tracking = transform_preds_with_trans(
42 |           (dets['tracking'][i][j] + dets['cts'][i][j]).reshape(1, 2), 
43 |           trans).reshape(2)
44 |         item['tracking'] = tracking - item['ct']
45 | 
46 |       if 'bboxes' in dets:
47 |         bbox = transform_preds_with_trans(
48 |           dets['bboxes'][i][j].reshape(2, 2), trans).reshape(4)
49 |         item['bbox'] = bbox
50 | 
51 |       if 'hps' in dets:
52 |         pts = transform_preds_with_trans(
53 |           dets['hps'][i][j].reshape(-1, 2), trans).reshape(-1)
54 |         item['hps'] = pts
55 | 
56 |       if 'dep' in dets and len(dets['dep'][i]) > j:
57 |         item['dep'] = dets['dep'][i][j]
58 |       
59 |       if 'dim' in dets and len(dets['dim'][i]) > j:
60 |         item['dim'] = dets['dim'][i][j]
61 | 
62 |       if 'rot' in dets and len(dets['rot'][i]) > j:
63 |         item['alpha'] = get_alpha(dets['rot'][i][j:j+1])[0]
64 |       
65 |       if 'rot' in dets and 'dep' in dets and 'dim' in dets \
66 |         and len(dets['dep'][i]) > j:
67 |         if 'amodel_offset' in dets and len(dets['amodel_offset'][i]) > j:
68 |           ct_output = dets['bboxes'][i][j].reshape(2, 2).mean(axis=0)
69 |           amodel_ct_output = ct_output + dets['amodel_offset'][i][j]
70 |           ct = transform_preds_with_trans(
71 |             amodel_ct_output.reshape(1, 2), trans).reshape(2).tolist()
72 |         else:
73 |           bbox = item['bbox']
74 |           ct = [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2]
75 |         item['ct'] = ct
76 |         item['loc'], item['rot_y'] = ddd2locrot(
77 |           ct, item['alpha'], item['dim'], item['dep'], calibs[i])
78 |       
79 |       preds.append(item)
80 | 
81 |     if 'nuscenes_att' in dets:
82 |       for j in range(len(preds)):
83 |         preds[j]['nuscenes_att'] = dets['nuscenes_att'][i][j]
84 | 
85 |     if 'velocity' in dets:
86 |       for j in range(len(preds)):
87 |         preds[j]['velocity'] = dets['velocity'][i][j]
88 | 
89 |     ret.append(preds)
90 |   
91 |   return ret


--------------------------------------------------------------------------------
/src/lib/utils/tracker.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from sklearn.utils.linear_assignment_ import linear_assignment
  3 | from numba import jit
  4 | import copy
  5 | 
  6 | class Tracker(object):
  7 |   def __init__(self, opt):
  8 |     self.opt = opt
  9 |     self.reset()
 10 | 
 11 |   def init_track(self, results):
 12 |     for item in results:
 13 |       if item['score'] > self.opt.new_thresh:
 14 |         self.id_count += 1
 15 |         # active and age are never used in the paper
 16 |         item['active'] = 1
 17 |         item['age'] = 1
 18 |         item['tracking_id'] = self.id_count
 19 |         if not ('ct' in item):
 20 |           bbox = item['bbox']
 21 |           item['ct'] = [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2]
 22 |         self.tracks.append(item)
 23 | 
 24 |   def reset(self):
 25 |     self.id_count = 0
 26 |     self.tracks = []
 27 | 
 28 |   def step(self, results, public_det=None):
 29 |     N = len(results)
 30 |     M = len(self.tracks)
 31 | 
 32 |     dets = np.array(
 33 |       [det['ct'] + det['tracking'] for det in results], np.float32) # N x 2
 34 |     track_size = np.array([((track['bbox'][2] - track['bbox'][0]) * \
 35 |       (track['bbox'][3] - track['bbox'][1])) \
 36 |       for track in self.tracks], np.float32) # M
 37 |     track_cat = np.array([track['class'] for track in self.tracks], np.int32) # M
 38 |     item_size = np.array([((item['bbox'][2] - item['bbox'][0]) * \
 39 |       (item['bbox'][3] - item['bbox'][1])) \
 40 |       for item in results], np.float32) # N
 41 |     item_cat = np.array([item['class'] for item in results], np.int32) # N
 42 |     tracks = np.array(
 43 |       [pre_det['ct'] for pre_det in self.tracks], np.float32) # M x 2
 44 |     dist = (((tracks.reshape(1, -1, 2) - \
 45 |               dets.reshape(-1, 1, 2)) ** 2).sum(axis=2)) # N x M
 46 | 
 47 |     invalid = ((dist > track_size.reshape(1, M)) + \
 48 |       (dist > item_size.reshape(N, 1)) + \
 49 |       (item_cat.reshape(N, 1) != track_cat.reshape(1, M))) > 0
 50 |     dist = dist + invalid * 1e18
 51 |     
 52 |     if self.opt.hungarian:
 53 |       item_score = np.array([item['score'] for item in results], np.float32) # N
 54 |       dist[dist > 1e18] = 1e18
 55 |       matched_indices = linear_assignment(dist)
 56 |     else:
 57 |       matched_indices = greedy_assignment(copy.deepcopy(dist))
 58 |     unmatched_dets = [d for d in range(dets.shape[0]) \
 59 |       if not (d in matched_indices[:, 0])]
 60 |     unmatched_tracks = [d for d in range(tracks.shape[0]) \
 61 |       if not (d in matched_indices[:, 1])]
 62 |     
 63 |     if self.opt.hungarian:
 64 |       matches = []
 65 |       for m in matched_indices:
 66 |         if dist[m[0], m[1]] > 1e16:
 67 |           unmatched_dets.append(m[0])
 68 |           unmatched_tracks.append(m[1])
 69 |         else:
 70 |           matches.append(m)
 71 |       matches = np.array(matches).reshape(-1, 2)
 72 |     else:
 73 |       matches = matched_indices
 74 | 
 75 |     ret = []
 76 |     for m in matches:
 77 |       track = results[m[0]]
 78 |       track['tracking_id'] = self.tracks[m[1]]['tracking_id']
 79 |       track['age'] = 1
 80 |       track['active'] = self.tracks[m[1]]['active'] + 1
 81 |       ret.append(track)
 82 | 
 83 |     if self.opt.public_det and len(unmatched_dets) > 0:
 84 |       # Public detection: only create tracks from provided detections
 85 |       pub_dets = np.array([d['ct'] for d in public_det], np.float32)
 86 |       dist3 = ((dets.reshape(-1, 1, 2) - pub_dets.reshape(1, -1, 2)) ** 2).sum(
 87 |         axis=2)
 88 |       matched_dets = [d for d in range(dets.shape[0]) \
 89 |         if not (d in unmatched_dets)]
 90 |       dist3[matched_dets] = 1e18
 91 |       for j in range(len(pub_dets)):
 92 |         i = dist3[:, j].argmin()
 93 |         if dist3[i, j] < item_size[i]:
 94 |           dist3[i, :] = 1e18
 95 |           track = results[i]
 96 |           if track['score'] > self.opt.new_thresh:
 97 |             self.id_count += 1
 98 |             track['tracking_id'] = self.id_count
 99 |             track['age'] = 1
100 |             track['active'] = 1
101 |             ret.append(track)
102 |     else:
103 |       # Private detection: create tracks for all un-matched detections
104 |       for i in unmatched_dets:
105 |         track = results[i]
106 |         if track['score'] > self.opt.new_thresh:
107 |           self.id_count += 1
108 |           track['tracking_id'] = self.id_count
109 |           track['age'] = 1
110 |           track['active'] =  1
111 |           ret.append(track)
112 |     
113 |     for i in unmatched_tracks:
114 |       track = self.tracks[i]
115 |       if track['age'] < self.opt.max_age:
116 |         track['age'] += 1
117 |         track['active'] = 0
118 |         bbox = track['bbox']
119 |         ct = track['ct']
120 |         v = [0, 0]
121 |         track['bbox'] = [
122 |           bbox[0] + v[0], bbox[1] + v[1],
123 |           bbox[2] + v[0], bbox[3] + v[1]]
124 |         track['ct'] = [ct[0] + v[0], ct[1] + v[1]]
125 |         ret.append(track)
126 |     self.tracks = ret
127 |     return ret
128 | 
129 | def greedy_assignment(dist):
130 |   matched_indices = []
131 |   if dist.shape[1] == 0:
132 |     return np.array(matched_indices, np.int32).reshape(-1, 2)
133 |   for i in range(dist.shape[0]):
134 |     j = dist[i].argmin()
135 |     if dist[i][j] < 1e16:
136 |       dist[:, j] = 1e18
137 |       matched_indices.append([i, j])
138 |   return np.array(matched_indices, np.int32).reshape(-1, 2)
139 | 


--------------------------------------------------------------------------------
/src/lib/utils/utils.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import torch
 6 | 
 7 | class AverageMeter(object):
 8 |     """Computes and stores the average and current value"""
 9 |     def __init__(self):
10 |         self.reset()
11 | 
12 |     def reset(self):
13 |         self.val = 0
14 |         self.avg = 0
15 |         self.sum = 0
16 |         self.count = 0
17 | 
18 |     def update(self, val, n=1):
19 |         self.val = val
20 |         self.sum += val * n
21 |         self.count += n
22 |         if self.count > 0:
23 |           self.avg = self.sum / self.count


--------------------------------------------------------------------------------
/src/main.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import _init_paths
  6 | import os
  7 | 
  8 | import torch
  9 | import torch.utils.data
 10 | from opts import opts
 11 | from model.model import create_model, load_model, save_model
 12 | from model.data_parallel import DataParallel
 13 | from logger import Logger
 14 | from dataset.dataset_factory import get_dataset
 15 | from trainer import Trainer
 16 | 
 17 | def get_optimizer(opt, model):
 18 |   if opt.optim == 'adam':
 19 |     optimizer = torch.optim.Adam(model.parameters(), opt.lr)
 20 |   elif opt.optim == 'sgd':
 21 |     print('Using SGD')
 22 |     optimizer = torch.optim.SGD(
 23 |       model.parameters(), opt.lr, momentum=0.9, weight_decay=0.0001)
 24 |   else:
 25 |     assert 0, opt.optim
 26 |   return optimizer
 27 | 
 28 | def main(opt):
 29 |   torch.manual_seed(opt.seed)
 30 |   torch.backends.cudnn.benchmark = not opt.not_cuda_benchmark and not opt.test
 31 |   Dataset = get_dataset(opt.dataset)
 32 |   opt = opts().update_dataset_info_and_set_heads(opt, Dataset)
 33 |   print(opt)
 34 |   if not opt.not_set_cuda_env:
 35 |     os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str
 36 |   opt.device = torch.device('cuda' if opt.gpus[0] >= 0 else 'cpu')
 37 |   logger = Logger(opt)
 38 | 
 39 |   print('Creating model...')
 40 |   model = create_model(opt.arch, opt.heads, opt.head_conv, opt=opt)
 41 |   optimizer = get_optimizer(opt, model)
 42 |   start_epoch = 0
 43 |   if opt.load_model != '':
 44 |     model, optimizer, start_epoch = load_model(
 45 |       model, opt.load_model, opt, optimizer)
 46 | 
 47 |   trainer = Trainer(opt, model, optimizer)
 48 |   trainer.set_device(opt.gpus, opt.chunk_sizes, opt.device)
 49 |   
 50 |   if opt.val_intervals < opt.num_epochs or opt.test:
 51 |     print('Setting up validation data...')
 52 |     val_loader = torch.utils.data.DataLoader(
 53 |       Dataset(opt, 'val'), batch_size=1, shuffle=False, num_workers=1,
 54 |       pin_memory=True)
 55 | 
 56 |     if opt.test:
 57 |       _, preds = trainer.val(0, val_loader)
 58 |       val_loader.dataset.run_eval(preds, opt.save_dir)
 59 |       return
 60 | 
 61 |   print('Setting up train data...')
 62 |   train_loader = torch.utils.data.DataLoader(
 63 |       Dataset(opt, 'train'), batch_size=opt.batch_size, shuffle=True,
 64 |       num_workers=opt.num_workers, pin_memory=True, drop_last=True
 65 |   )
 66 | 
 67 |   print('Starting training...')
 68 |   for epoch in range(start_epoch + 1, opt.num_epochs + 1):
 69 |     mark = epoch if opt.save_all else 'last'
 70 |     log_dict_train, _ = trainer.train(epoch, train_loader)
 71 |     logger.write('epoch: {} |'.format(epoch))
 72 |     for k, v in log_dict_train.items():
 73 |       logger.scalar_summary('train_{}'.format(k), v, epoch)
 74 |       logger.write('{} {:8f} | '.format(k, v))
 75 |     if opt.val_intervals > 0 and epoch % opt.val_intervals == 0:
 76 |       save_model(os.path.join(opt.save_dir, 'model_{}.pth'.format(mark)), 
 77 |                  epoch, model, optimizer)
 78 |       with torch.no_grad():
 79 |         log_dict_val, preds = trainer.val(epoch, val_loader)
 80 |         if opt.eval_val:
 81 |           val_loader.dataset.run_eval(preds, opt.save_dir)
 82 |       for k, v in log_dict_val.items():
 83 |         logger.scalar_summary('val_{}'.format(k), v, epoch)
 84 |         logger.write('{} {:8f} | '.format(k, v))
 85 |     else:
 86 |       save_model(os.path.join(opt.save_dir, 'model_last.pth'), 
 87 |                  epoch, model, optimizer)
 88 |     logger.write('\n')
 89 |     if epoch in opt.save_point:
 90 |       save_model(os.path.join(opt.save_dir, 'model_{}.pth'.format(epoch)), 
 91 |                  epoch, model, optimizer)
 92 |     if epoch in opt.lr_step:
 93 |       lr = opt.lr * (0.1 ** (opt.lr_step.index(epoch) + 1))
 94 |       print('Drop LR to', lr)
 95 |       for param_group in optimizer.param_groups:
 96 |           param_group['lr'] = lr
 97 |   logger.close()
 98 | 
 99 | if __name__ == '__main__':
100 |   opt = opts().parse()
101 |   main(opt)
102 | 


--------------------------------------------------------------------------------
/src/test.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import _init_paths
  6 | import os
  7 | import json
  8 | import cv2
  9 | import numpy as np
 10 | import time
 11 | from progress.bar import Bar
 12 | import torch
 13 | import copy
 14 | 
 15 | from opts import opts
 16 | from logger import Logger
 17 | from utils.utils import AverageMeter
 18 | from dataset.dataset_factory import dataset_factory
 19 | from detector import Detector
 20 | 
 21 | 
 22 | class PrefetchDataset(torch.utils.data.Dataset):
 23 |   def __init__(self, opt, dataset, pre_process_func):
 24 |     self.images = dataset.images
 25 |     self.load_image_func = dataset.coco.loadImgs
 26 |     self.img_dir = dataset.img_dir
 27 |     self.pre_process_func = pre_process_func
 28 |     self.get_default_calib = dataset.get_default_calib
 29 |     self.opt = opt
 30 |   
 31 |   def __getitem__(self, index):
 32 |     img_id = self.images[index]
 33 |     img_info = self.load_image_func(ids=[img_id])[0]
 34 |     img_path = os.path.join(self.img_dir, img_info['file_name'])
 35 |     image = cv2.imread(img_path)
 36 |     images, meta = {}, {}
 37 |     for scale in opt.test_scales:
 38 |       input_meta = {}
 39 |       calib = img_info['calib'] if 'calib' in img_info \
 40 |         else self.get_default_calib(image.shape[1], image.shape[0])
 41 |       input_meta['calib'] = calib
 42 |       images[scale], meta[scale] = self.pre_process_func(
 43 |         image, scale, input_meta)
 44 |     ret = {'images': images, 'image': image, 'meta': meta}
 45 |     if 'frame_id' in img_info and img_info['frame_id'] == 1:
 46 |       ret['is_first_frame'] = 1
 47 |       ret['video_id'] = img_info['video_id']
 48 |     return img_id, ret
 49 | 
 50 |   def __len__(self):
 51 |     return len(self.images)
 52 | 
 53 | def prefetch_test(opt):
 54 |   if not opt.not_set_cuda_env:
 55 |     os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str
 56 |   Dataset = dataset_factory[opt.test_dataset]
 57 |   opt = opts().update_dataset_info_and_set_heads(opt, Dataset)
 58 |   print(opt)
 59 |   Logger(opt)
 60 |   
 61 |   split = 'val' if not opt.trainval else 'test'
 62 |   dataset = Dataset(opt, split)
 63 |   detector = Detector(opt)
 64 |   
 65 |   if opt.load_results != '':
 66 |     load_results = json.load(open(opt.load_results, 'r'))
 67 |     for img_id in load_results:
 68 |       for k in range(len(load_results[img_id])):
 69 |         if load_results[img_id][k]['class'] - 1 in opt.ignore_loaded_cats:
 70 |           load_results[img_id][k]['score'] = -1
 71 |   else:
 72 |     load_results = {}
 73 | 
 74 |   data_loader = torch.utils.data.DataLoader(
 75 |     PrefetchDataset(opt, dataset, detector.pre_process), 
 76 |     batch_size=1, shuffle=False, num_workers=1, pin_memory=True)
 77 | 
 78 |   results = {}
 79 |   num_iters = len(data_loader) if opt.num_iters < 0 else opt.num_iters
 80 |   bar = Bar('{}'.format(opt.exp_id), max=num_iters)
 81 |   time_stats = ['tot', 'load', 'pre', 'net', 'dec', 'post', 'merge', 'track']
 82 |   avg_time_stats = {t: AverageMeter() for t in time_stats}
 83 |   if opt.use_loaded_results:
 84 |     for img_id in data_loader.dataset.images:
 85 |       results[img_id] = load_results['{}'.format(img_id)]
 86 |     num_iters = 0
 87 |   for ind, (img_id, pre_processed_images) in enumerate(data_loader):
 88 |     if ind >= num_iters:
 89 |       break
 90 |     if opt.tracking and ('is_first_frame' in pre_processed_images):
 91 |       if '{}'.format(int(img_id.numpy().astype(np.int32)[0])) in load_results:
 92 |         pre_processed_images['meta']['pre_dets'] = \
 93 |           load_results['{}'.format(int(img_id.numpy().astype(np.int32)[0]))]
 94 |       else:
 95 |         print()
 96 |         print('No pre_dets for', int(img_id.numpy().astype(np.int32)[0]), 
 97 |           '. Use empty initialization.')
 98 |         pre_processed_images['meta']['pre_dets'] = []
 99 |       detector.reset_tracking()
100 |       print('Start tracking video', int(pre_processed_images['video_id']))
101 |     if opt.public_det:
102 |       if '{}'.format(int(img_id.numpy().astype(np.int32)[0])) in load_results:
103 |         pre_processed_images['meta']['cur_dets'] = \
104 |           load_results['{}'.format(int(img_id.numpy().astype(np.int32)[0]))]
105 |       else:
106 |         print('No cur_dets for', int(img_id.numpy().astype(np.int32)[0]))
107 |         pre_processed_images['meta']['cur_dets'] = []
108 |     
109 |     ret = detector.run(pre_processed_images)
110 |     results[int(img_id.numpy().astype(np.int32)[0])] = ret['results']
111 |     
112 |     Bar.suffix = '[{0}/{1}]|Tot: {total:} |ETA: {eta:} '.format(
113 |                    ind, num_iters, total=bar.elapsed_td, eta=bar.eta_td)
114 |     for t in avg_time_stats:
115 |       avg_time_stats[t].update(ret[t])
116 |       Bar.suffix = Bar.suffix + '|{} {tm.val:.3f}s ({tm.avg:.3f}s) '.format(
117 |         t, tm = avg_time_stats[t])
118 |     if opt.print_iter > 0:
119 |       if ind % opt.print_iter == 0:
120 |         print('{}/{}| {}'.format(opt.task, opt.exp_id, Bar.suffix))
121 |     else:
122 |       bar.next()
123 |   bar.finish()
124 |   if opt.save_results:
125 |     print('saving results to', opt.save_dir + '/save_results_{}{}.json'.format(
126 |       opt.test_dataset, opt.dataset_version))
127 |     json.dump(_to_list(copy.deepcopy(results)), 
128 |               open(opt.save_dir + '/save_results_{}{}.json'.format(
129 |                 opt.test_dataset, opt.dataset_version), 'w'))
130 |   dataset.run_eval(results, opt.save_dir)
131 | 
132 | def test(opt):
133 |   os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str
134 | 
135 |   Dataset = dataset_factory[opt.test_dataset]
136 |   opt = opts().update_dataset_info_and_set_heads(opt, Dataset)
137 |   print(opt)
138 |   Logger(opt)
139 |   
140 |   split = 'val' if not opt.trainval else 'test'
141 |   dataset = Dataset(opt, split)
142 |   detector = Detector(opt)
143 | 
144 |   if opt.load_results != '': # load results in json
145 |     load_results = json.load(open(opt.load_results, 'r'))
146 | 
147 |   results = {}
148 |   num_iters = len(dataset) if opt.num_iters < 0 else opt.num_iters
149 |   bar = Bar('{}'.format(opt.exp_id), max=num_iters)
150 |   time_stats = ['tot', 'load', 'pre', 'net', 'dec', 'post', 'merge']
151 |   avg_time_stats = {t: AverageMeter() for t in time_stats}
152 |   for ind in range(num_iters):
153 |     img_id = dataset.images[ind]
154 |     img_info = dataset.coco.loadImgs(ids=[img_id])[0]
155 |     img_path = os.path.join(dataset.img_dir, img_info['file_name'])
156 |     input_meta = {}
157 |     if 'calib' in img_info:
158 |       input_meta['calib'] = img_info['calib']
159 |     if (opt.tracking and ('frame_id' in img_info) and img_info['frame_id'] == 1):
160 |       detector.reset_tracking()
161 |       input_meta['pre_dets'] = load_results[img_id]
162 | 
163 |     ret = detector.run(img_path, input_meta)    
164 |     results[img_id] = ret['results']
165 | 
166 |     Bar.suffix = '[{0}/{1}]|Tot: {total:} |ETA: {eta:} '.format(
167 |                    ind, num_iters, total=bar.elapsed_td, eta=bar.eta_td)
168 |     for t in avg_time_stats:
169 |       avg_time_stats[t].update(ret[t])
170 |       Bar.suffix = Bar.suffix + '|{} {:.3f} '.format(t, avg_time_stats[t].avg)
171 |     bar.next()
172 |   bar.finish()
173 |   if opt.save_results:
174 |     print('saving results to', opt.save_dir + '/save_results_{}{}.json'.format(
175 |       opt.test_dataset, opt.dataset_version))
176 |     json.dump(_to_list(copy.deepcopy(results)), 
177 |               open(opt.save_dir + '/save_results_{}{}.json'.format(
178 |                 opt.test_dataset, opt.dataset_version), 'w'))
179 |   dataset.run_eval(results, opt.save_dir)
180 | 
181 | 
182 | def _to_list(results):
183 |   for img_id in results:
184 |     for t in range(len(results[img_id])):
185 |       for k in results[img_id][t]:
186 |         if isinstance(results[img_id][t][k], (np.ndarray, np.float32)):
187 |           results[img_id][t][k] = results[img_id][t][k].tolist()
188 |   return results
189 | 
190 | if __name__ == '__main__':
191 |   opt = opts().parse()
192 |   if opt.not_prefetch_test:
193 |     test(opt)
194 |   else:
195 |     prefetch_test(opt)
196 | 


--------------------------------------------------------------------------------
/src/tools/_init_paths.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | import sys
 3 | 
 4 | def add_path(path):
 5 |     if path not in sys.path:
 6 |         sys.path.insert(0, path)
 7 | 
 8 | this_dir = osp.dirname(__file__)
 9 | 
10 | # Add lib to PYTHONPATH
11 | lib_path = osp.join(this_dir, '../lib')
12 | add_path(lib_path)
13 | 


--------------------------------------------------------------------------------
/src/tools/annot_bbox.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import json
  4 | import cv2
  5 | import argparse
  6 | import numpy as np
  7 | image_ext = ['jpg', 'jpeg', 'png', 'webp']
  8 | 
  9 | parser = argparse.ArgumentParser()
 10 | parser.add_argument('--image_path', default='')
 11 | parser.add_argument('--save_path', default='')
 12 | MAX_CACHE = 20
 13 | CAT_NAMES = ['cat']
 14 | 
 15 | def _sort_expt(pts):
 16 |   t, l, b, r = 0, 0, 0, 0
 17 |   for i in range(4):
 18 |     if pts[i][0] < pts[l][0]:
 19 |       l = i
 20 |     if pts[i][1] < pts[t][1]:
 21 |       t = i
 22 |     if pts[i][0] > pts[r][0]:
 23 |       r = i
 24 |     if pts[i][1] > pts[b][1]:
 25 |       b = i
 26 |   ret = [pts[t], pts[l], pts[b], pts[r]]
 27 |   return ret
 28 | 
 29 | def _expt2bbox(expt):
 30 |   expt = np.array(expt, dtype=np.int32)
 31 |   bbox = [int(expt[:, 0].min()), int(expt[:, 1].min()), 
 32 |           int(expt[:, 0].max()), int(expt[:, 1].max())]
 33 |   return bbox
 34 | 
 35 | def save_txt(txt_name, pts_cls):
 36 |   ret = []
 37 |   for i in range(len(pts_cls)):
 38 |     ret.append(np.array(pts_cls[i][:4], dtype=np.int32).reshape(8).tolist() \
 39 |                + [pts_cls[i][4]])
 40 |   np.savetxt(txt_name, np.array(ret, dtype=np.int32), fmt='%d')
 41 | 
 42 | def click(event, x, y, flags, param):
 43 |   global expt_cls, bboxes, pts
 44 |   if event == cv2.EVENT_LBUTTONDOWN:
 45 |     pts.append([x, y])
 46 |     cv2.circle(img, (x, y), 5, (255, 0, 255), -1)
 47 |     if len(pts) == 4:
 48 |       expt = _sort_expt(pts)
 49 |       bbox = _expt2bbox(expt)
 50 |       expt_cls.append(expt + [cls])
 51 |       cv2.rectangle(img, (bbox[0], bbox[1]), (bbox[2], bbox[3]), 
 52 |                     (255, 0, 255), 2, cv2.LINE_AA)
 53 |       pts = []
 54 | 
 55 | if __name__ == '__main__':
 56 |   cat_info = []
 57 |   for i, cat in enumerate(CAT_NAMES):
 58 |     cat_info.append({'name': cat, 'id': i + 1})
 59 | 
 60 |   args = parser.parse_args()
 61 |   if args.save_path == '':
 62 |     args.save_path = os.path.join(args.image_path, '..', 'click_annotation')
 63 |   if not os.path.exists(args.save_path):
 64 |     os.mkdir(args.save_path)
 65 |   
 66 |   ann_path = os.path.join(args.save_path, 'annotations.json')
 67 |   if os.path.exists(ann_path):
 68 |     anns = json.load(open(ann_path, 'r'))
 69 |   else:
 70 |     anns = {'annotations': [], 'images': [], 'categories': cat_info}
 71 | 
 72 |   assert os.path.exists(args.image_path)
 73 |   ls = os.listdir(args.image_path)
 74 |   image_names = []
 75 |   for file_name in sorted(ls):
 76 |     ext = file_name[file_name.rfind('.') + 1:].lower()
 77 |     if (ext in image_ext):
 78 |       image_names.append(file_name)
 79 |   
 80 |   i = 0
 81 |   cls = 1
 82 |   cached = 0
 83 |   while i < len(image_names):
 84 |     image_name = image_names[i]
 85 |     txt_name = os.path.join(
 86 |       args.save_path, image_name[:image_name.rfind('.')] + '.txt')
 87 |     if os.path.exists(txt_name) or image_name in anns:
 88 |       i = i + 1
 89 |       continue
 90 |     image_path = os.path.join(args.image_path, image_name)
 91 |     img = cv2.imread(image_path)
 92 |     cv2.namedWindow(image_name)
 93 |     cv2.setMouseCallback(image_name, click)
 94 |     expt_cls, pts = [], []
 95 |     while True:
 96 |       finished = False
 97 |       cv2.imshow(image_name, img)
 98 |       key = cv2.waitKey(1)
 99 |       if key == 100:
100 |         i = i + 1
101 |         save_txt(txt_name, expt_cls)
102 |         image_id = len(anns['images'])
103 |         image_info = {'file_name': image_name, 'id': image_id}
104 |         anns['images'].append(image_info)
105 |         for ann in expt_cls:
106 |           ann_id = len(anns['annotations'])
107 |           ann_dict = {'image_id': image_id, 'id': ann_id, 'categoty_id': ann[4],
108 |                       'bbox': _expt2bbox(ann[:4]), 'extreme_points': ann[:4]}
109 |           anns['annotations'].append(ann_dict)
110 |           cached = cached + 1
111 |         print('saved to ', txt_name)
112 |         if cached > MAX_CACHE:
113 |           print('Saving json', ann_path)
114 |           json.dump(anns, open(ann_path, 'w'))
115 |           cached = 0
116 |         break
117 |       elif key == 97:
118 |         i = i - 1
119 |         break
120 |       elif key == 27:
121 |         json.dump(anns, open(ann_path, 'w'))
122 |         sys.exit(0)
123 |     cv2.destroyAllWindows()
124 | 


--------------------------------------------------------------------------------
/src/tools/convert_crowdhuman_to_coco.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | import json
 4 | import cv2
 5 | 
 6 | DATA_PATH = '../../data/crowdhuman/'
 7 | OUT_PATH = DATA_PATH + 'annotations/'
 8 | SPLITS = ['val', 'train']
 9 | DEBUG = False
10 | 
11 | def load_func(fpath):
12 |     print('fpath', fpath)
13 |     assert os.path.exists(fpath)
14 |     with open(fpath,'r') as fid:
15 |         lines = fid.readlines()
16 |     records =[json.loads(line.strip('\n')) for line in lines]
17 |     return records
18 | 
19 | if __name__ == '__main__':
20 |   if not os.exists(OUT_PATH):
21 |     os.mkdir(OUT_PATH)
22 |   for split in SPLITS:
23 |     data_path = DATA_PATH + split
24 |     out_path = OUT_PATH + '{}.json'.format(split)
25 |     out = {'images': [], 'annotations': [], 
26 |            'categories': [{'id': 1, 'name': 'person'}]}
27 |     ann_path = DATA_PATH + '/annotation_{}.odgt'.format(split)
28 |     anns_data = load_func(ann_path)
29 |     image_cnt = 0
30 |     ann_cnt = 0
31 |     video_cnt = 0
32 |     for ann_data in anns_data:
33 |       image_cnt += 1
34 |       image_info = {'file_name': '{}.jpg'.format(ann_data['ID']),
35 |                     'id': image_cnt}
36 |       out['images'].append(image_info)
37 |       if split != 'test':
38 |         anns = ann_data['gtboxes']
39 |         for i in range(len(anns)):
40 |           ann_cnt += 1
41 |           ann = {'id': ann_cnt,
42 |                  'category_id': 1,
43 |                  'image_id': image_cnt,
44 |                  'bbox_vis': anns[i]['vbox'],
45 |                  'bbox': anns[i]['fbox'],
46 |                  'iscrowd': 1 if 'extra' in anns[i] and \
47 |                                  'ignore' in anns[i]['extra'] and \
48 |                                  anns[i]['extra']['ignore'] == 1 else 0}
49 |           out['annotations'].append(ann)
50 |     print('loaded {} for {} images and {} samples'.format(
51 |       split, len(out['images']), len(out['annotations'])))
52 |     json.dump(out, open(out_path, 'w'))
53 |         
54 |         
55 | 
56 | 


--------------------------------------------------------------------------------
/src/tools/convert_kittitrack_to_coco.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import pickle
  6 | import json
  7 | import numpy as np
  8 | import os
  9 | import cv2
 10 | DATA_PATH = '../../data/kitti_tracking/'
 11 | SPLITS = ['train_half', 'val_half', 'train', 'test']
 12 | VIDEO_SETS = {'train': range(21), 'test': range(29), 
 13 |   'train_half': range(21), 'val_half': range(21)}
 14 | CREATE_HALF_LABEL = True
 15 | DEBUG = False
 16 | 
 17 | '''
 18 | #Values    Name      Description
 19 | ----------------------------------------------------------------------------
 20 |    1    frame        Frame within the sequence where the object appearers
 21 |    1    track id     Unique tracking id of this object within this sequence
 22 |    1    type         Describes the type of object: 'Car', 'Van', 'Truck',
 23 |                      'Pedestrian', 'Person_sitting', 'Cyclist', 'Tram',
 24 |                      'Misc' or 'DontCare'
 25 |    1    truncated    Integer (0,1,2) indicating the level of truncation.
 26 |                      Note that this is in contrast to the object detection
 27 |                      benchmark where truncation is a float in [0,1].
 28 |    1    occluded     Integer (0,1,2,3) indicating occlusion state:
 29 |                      0 = fully visible, 1 = partly occluded
 30 |                      2 = largely occluded, 3 = unknown
 31 |    1    alpha        Observation angle of object, ranging [-pi..pi]
 32 |    4    bbox         2D bounding box of object in the image (0-based index):
 33 |                      contains left, top, right, bottom pixel coordinates
 34 |    3    dimensions   3D object dimensions: height, width, length (in meters)
 35 |    3    location     3D object location x,y,z in camera coordinates (in meters)
 36 |    1    rotation_y   Rotation ry around Y-axis in camera coordinates [-pi..pi]
 37 |    1    score        Only for results: Float, indicating confidence in
 38 |                      detection, needed for p/r curves, higher is better.
 39 | '''
 40 | 
 41 | def project_to_image(pts_3d, P):
 42 |   # pts_3d: n x 3
 43 |   # P: 3 x 4
 44 |   # return: n x 2
 45 |   pts_3d_homo = np.concatenate(
 46 |     [pts_3d, np.ones((pts_3d.shape[0], 1), dtype=np.float32)], axis=1)
 47 |   pts_2d = np.dot(P, pts_3d_homo.transpose(1, 0)).transpose(1, 0)
 48 |   pts_2d = pts_2d[:, :2] / pts_2d[:, 2:]
 49 |   return pts_2d
 50 | 
 51 | def read_clib(calib_path):
 52 |   f = open(calib_path, 'r')
 53 |   for i, line in enumerate(f):
 54 |     if i == 2:
 55 |       calib = np.array(line.strip().split(' ')[1:], dtype=np.float32)
 56 |       calib = calib.reshape(3, 4)
 57 |       return calib
 58 | 
 59 | def _bbox_to_coco_bbox(bbox):
 60 |   return [(bbox[0]), (bbox[1]),
 61 |           (bbox[2] - bbox[0]), (bbox[3] - bbox[1])]
 62 | 
 63 | cats = ['Pedestrian', 'Car', 'Cyclist', 'Van', 'Truck',  'Person_sitting',
 64 |         'Tram', 'Misc', 'DontCare']
 65 | 
 66 | 
 67 | cat_ids = {cat: i + 1 for i, cat in enumerate(cats)}
 68 | cat_ids['Person'] = cat_ids['Person_sitting']
 69 | 
 70 | cat_info = []
 71 | for i, cat in enumerate(cats):
 72 |   cat_info.append({'name': cat, 'id': i + 1})
 73 | 
 74 | if __name__ == '__main__':
 75 |   for split in SPLITS:
 76 |     ann_dir = DATA_PATH + '/label_02/'
 77 |     ret = {'images': [], 'annotations': [], "categories": cat_info,
 78 |            'videos': []}
 79 |     num_images = 0
 80 |     for i in VIDEO_SETS[split]:
 81 |       image_id_base = num_images
 82 |       video_name = '{:04d}'.format(i)
 83 |       ret['videos'].append({'id': i + 1, 'file_name': video_name})
 84 |       ann_dir = 'train'  if not ('test' in split) else split
 85 |       video_path = DATA_PATH + \
 86 |         '/data_tracking_image_2/{}ing/image_02/{}'.format(ann_dir, video_name)
 87 |       calib_path = DATA_PATH + 'data_tracking_calib/{}ing/calib/'.format(ann_dir) \
 88 |         + '{}.txt'.format(video_name)
 89 |       calib = read_clib(calib_path)
 90 |       image_files = sorted(os.listdir(video_path))
 91 |       num_images_video = len(image_files)
 92 |       if CREATE_HALF_LABEL and 'half' in split:
 93 |         image_range = [0, num_images_video // 2 - 1] if split == 'train_half' else \
 94 |           [num_images_video // 2, num_images_video - 1]
 95 |       else:
 96 |         image_range = [0, num_images_video - 1]
 97 |       print('num_frames', video_name, image_range[1] - image_range[0] + 1)
 98 |       for j, image_name in enumerate(image_files):
 99 |         if (j < image_range[0] or j > image_range[1]):
100 |           continue
101 |         num_images += 1
102 |         image_info = {'file_name': '{}/{:06d}.png'.format(video_name, j),
103 |                       'id': num_images,
104 |                       'calib': calib.tolist(),
105 |                       'video_id': i + 1,
106 |                       'frame_id': j + 1 - image_range[0]}
107 |         ret['images'].append(image_info)
108 | 
109 |       if split == 'test':
110 |         continue
111 |       # 0 -1 DontCare -1 -1 -10.000000 219.310000 188.490000 245.500000 218.560000 -1000.000000 -1000.000000 -1000.000000 -10.000000 -1.000000 -1.000000 -1.000000
112 |       ann_path = DATA_PATH + 'label_02/{}.txt'.format(video_name)
113 |       anns = open(ann_path, 'r')
114 |       
115 |       if CREATE_HALF_LABEL and 'half' in split:
116 |         label_out_folder = DATA_PATH + 'label_02_{}/'.format(split)
117 |         label_out_path = label_out_folder + '{}.txt'.format(video_name)
118 |         if not os.path.exists(label_out_folder):
119 |           os.mkdir(label_out_folder)
120 |         label_out_file = open(label_out_path, 'w')
121 |       
122 |       for ann_ind, txt in enumerate(anns):
123 |         tmp = txt[:-1].split(' ')
124 |         frame_id = int(tmp[0])
125 |         track_id = int(tmp[1])
126 |         cat_id = cat_ids[tmp[2]]
127 |         truncated = int(float(tmp[3]))
128 |         occluded = int(tmp[4])
129 |         alpha = float(tmp[5])
130 |         bbox = [float(tmp[6]), float(tmp[7]), float(tmp[8]), float(tmp[9])]
131 |         dim = [float(tmp[10]), float(tmp[11]), float(tmp[12])]
132 |         location = [float(tmp[13]), float(tmp[14]), float(tmp[15])]
133 |         rotation_y = float(tmp[16])
134 |         amodel_center = project_to_image(
135 |           np.array([location[0], location[1] - dim[0] / 2, location[2]], 
136 |             np.float32).reshape(1, 3), calib)[0].tolist()  
137 |         ann = {'image_id': frame_id + 1 - image_range[0] + image_id_base,
138 |                'id': int(len(ret['annotations']) + 1),
139 |                'category_id': cat_id,
140 |                'dim': dim,
141 |                'bbox': _bbox_to_coco_bbox(bbox),
142 |                'depth': location[2],
143 |                'alpha': alpha,
144 |                'truncated': truncated,
145 |                'occluded': occluded,
146 |                'location': location,
147 |                'rotation_y': rotation_y,
148 |                'amodel_center': amodel_center,
149 |                'track_id': track_id + 1}
150 |         if CREATE_HALF_LABEL and 'half' in split:
151 |           if (frame_id < image_range[0] or frame_id > image_range[1]):
152 |             continue
153 |           out_frame_id = frame_id - image_range[0]
154 |           label_out_file.write('{} {}'.format(
155 |             out_frame_id, txt[txt.find(' ') + 1:]))
156 |         
157 |         ret['annotations'].append(ann)
158 |       
159 |     print("# images: ", len(ret['images']))
160 |     print("# annotations: ", len(ret['annotations']))
161 |     out_dir = '{}/annotations/'.format(DATA_PATH)
162 |     if not os.path.exists(out_dir):
163 |       os.mkdir(out_dir)
164 |     out_path = '{}/annotations/tracking_{}.json'.format(
165 |       DATA_PATH, split)
166 |     json.dump(ret, open(out_path, 'w'))
167 | 


--------------------------------------------------------------------------------
/src/tools/convert_mot_det_to_results.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import numpy as np
 3 | import os
 4 | from collections import defaultdict
 5 | split = 'val_half'
 6 | 
 7 | DET_PATH = '../../data/mot17/'
 8 | ANN_PATH = '../../data/mot17/annotations/{}.json'.format(split)
 9 | OUT_DIR = '../../data/mot17/results/'
10 | OUT_PATH = OUT_DIR + '{}_det.json'.format(split)
11 | 
12 | if __name__ == '__main__':
13 |   if not os.path.exists(OUT_DIR):
14 |     os.mkdir(OUT_DIR)
15 |   seqs = [s for s in os.listdir(DET_PATH) if '_det' in s]
16 |   data = json.load(open(ANN_PATH, 'r'))
17 |   images = data['images']
18 |   image_to_anns = defaultdict(list)
19 |   for seq in sorted(seqs):
20 |     print('seq', seq)
21 |     seq_path = '{}/{}/'.format(DET_PATH, seq)
22 |     if split == 'val_half':
23 |       ann_path = seq_path + 'det/det_val_half.txt'
24 |       train_ann_path = seq_path + 'det/det_train_half.txt'
25 |       train_anns = np.loadtxt(train_ann_path, dtype=np.float32, delimiter=',')
26 |       frame_base = int(train_anns[:, 0].max())
27 |     else:
28 |       ann_path = seq_path + 'det/det.txt'
29 |       frame_base = 0
30 |     if not IS_THIRD_PARTY:
31 |       anns = np.loadtxt(ann_path, dtype=np.float32, delimiter=',')
32 |     for i in range(len(anns)):
33 |       frame_id = int(anns[i][0])
34 |       file_name = '{}/img1/{:06d}.jpg'.format(seq, frame_id + frame_base)
35 |       bbox = (anns[i][2:6]).tolist()
36 |       score = 1 # float(anns[i][8])
37 |       image_to_anns[file_name].append(bbox + [score])
38 | 
39 |   results = {}
40 |   for image_info in images:
41 |     image_id = image_info['id']
42 |     file_name = image_info['file_name']
43 |     dets = image_to_anns[file_name]
44 |     results[image_id] = []
45 |     for det in dets:
46 |       bbox = [float(det[0]), float(det[1]), \
47 |               float(det[0] + det[2]), float(det[1] + det[3])]
48 |       ct = [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2]
49 |       results[image_id].append(
50 |         {'bbox': bbox, 'score': float(det[4]), 'class': 1, 'ct': ct})
51 |   out_path = OUT_PATH
52 |   json.dump(results, open(out_path, 'w'))
53 | 


--------------------------------------------------------------------------------
/src/tools/convert_mot_to_coco.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import json
  4 | import cv2
  5 | 
  6 | # Use the same script for MOT16
  7 | # DATA_PATH = '../../data/mot16/'
  8 | DATA_PATH = '../../data/mot17/'
  9 | OUT_PATH = DATA_PATH + 'annotations/'
 10 | SPLITS = ['train_half', 'val_half', 'train', 'test']
 11 | HALF_VIDEO = True
 12 | CREATE_SPLITTED_ANN = True
 13 | CREATE_SPLITTED_DET = True
 14 | 
 15 | if __name__ == '__main__':
 16 |   for split in SPLITS:
 17 |     data_path = DATA_PATH + (split if not HALF_VIDEO else 'train')
 18 |     out_path = OUT_PATH + '{}.json'.format(split)
 19 |     out = {'images': [], 'annotations': [], 
 20 |            'categories': [{'id': 1, 'name': 'pedestrain'}],
 21 |            'videos': []}
 22 |     seqs = os.listdir(data_path)
 23 |     image_cnt = 0
 24 |     ann_cnt = 0
 25 |     video_cnt = 0
 26 |     for seq in sorted(seqs):
 27 |       if '.DS_Store' in seq:
 28 |         continue
 29 |       if 'mot17' in DATA_PATH and (split != 'test' and not ('FRCNN' in seq)):
 30 |         continue
 31 |       video_cnt += 1
 32 |       out['videos'].append({
 33 |         'id': video_cnt,
 34 |         'file_name': seq})
 35 |       seq_path = '{}/{}/'.format(data_path, seq)
 36 |       img_path = seq_path + 'img1/'
 37 |       ann_path = seq_path + 'gt/gt.txt'
 38 |       images = os.listdir(img_path)
 39 |       num_images = len([image for image in images if 'jpg' in image])
 40 |       if HALF_VIDEO and ('half' in split):
 41 |         image_range = [0, num_images // 2] if 'train' in split else \
 42 |           [num_images // 2 + 1, num_images - 1]
 43 |       else:
 44 |         image_range = [0, num_images - 1]
 45 |       for i in range(num_images):
 46 |         if (i < image_range[0] or i > image_range[1]):
 47 |           continue
 48 |         image_info = {'file_name': '{}/img1/{:06d}.jpg'.format(seq, i + 1),
 49 |                       'id': image_cnt + i + 1,
 50 |                       'frame_id': i + 1 - image_range[0],
 51 |                       'prev_image_id': image_cnt + i if i > 0 else -1,
 52 |                       'next_image_id': \
 53 |                         image_cnt + i + 2 if i < num_images - 1 else -1,
 54 |                       'video_id': video_cnt}
 55 |         out['images'].append(image_info)
 56 |       print('{}: {} images'.format(seq, num_images))
 57 |       if split != 'test':
 58 |         det_path = seq_path + 'det/det.txt'
 59 |         anns = np.loadtxt(ann_path, dtype=np.float32, delimiter=',')
 60 |         dets = np.loadtxt(det_path, dtype=np.float32, delimiter=',')
 61 |         if CREATE_SPLITTED_ANN and ('half' in split):
 62 |           anns_out = np.array([anns[i] for i in range(anns.shape[0]) if \
 63 |             int(anns[i][0]) - 1 >= image_range[0] and \
 64 |             int(anns[i][0]) - 1 <= image_range[1]], np.float32)
 65 |           anns_out[:, 0] -= image_range[0]
 66 |           gt_out = seq_path + '/gt/gt_{}.txt'.format(split)
 67 |           fout = open(gt_out, 'w')
 68 |           for o in anns_out:
 69 |             fout.write(
 70 |               '{:d},{:d},{:d},{:d},{:d},{:d},{:d},{:d},{:.6f}\n'.format(
 71 |               int(o[0]),int(o[1]),int(o[2]),int(o[3]),int(o[4]),int(o[5]),
 72 |               int(o[6]),int(o[7]),o[8]))
 73 |           fout.close()
 74 |         if CREATE_SPLITTED_DET and ('half' in split):
 75 |           dets_out = np.array([dets[i] for i in range(dets.shape[0]) if \
 76 |             int(dets[i][0]) - 1 >= image_range[0] and \
 77 |             int(dets[i][0]) - 1 <= image_range[1]], np.float32)
 78 |           dets_out[:, 0] -= image_range[0]
 79 |           det_out = seq_path + '/det/det_{}.txt'.format(split)
 80 |           dout = open(det_out, 'w')
 81 |           for o in dets_out:
 82 |             dout.write(
 83 |               '{:d},{:d},{:.1f},{:.1f},{:.1f},{:.1f},{:.6f}\n'.format(
 84 |               int(o[0]),int(o[1]),float(o[2]),float(o[3]),float(o[4]),float(o[5]),
 85 |               float(o[6])))
 86 |           dout.close()
 87 | 
 88 |         print(' {} ann images'.format(int(anns[:, 0].max())))
 89 |         for i in range(anns.shape[0]):
 90 |           frame_id = int(anns[i][0])
 91 |           if (frame_id - 1 < image_range[0] or frame_id - 1> image_range[1]):
 92 |             continue
 93 |           track_id = int(anns[i][1])
 94 |           cat_id = int(anns[i][7])
 95 |           ann_cnt += 1
 96 |           if not ('15' in DATA_PATH):
 97 |             if not (float(anns[i][8]) >= 0.25):
 98 |               continue
 99 |             if not (int(anns[i][6]) == 1):
100 |               continue
101 |             if (int(anns[i][7]) in [3, 4, 5, 6, 9, 10, 11]): # Non-person
102 |               continue
103 |             if (int(anns[i][7]) in [2, 7, 8, 12]): # Ignored person
104 |               category_id = -1
105 |             else:
106 |               category_id = 1
107 |           else:
108 |             category_id = 1
109 |           ann = {'id': ann_cnt,
110 |                  'category_id': category_id,
111 |                  'image_id': image_cnt + frame_id,
112 |                  'track_id': track_id,
113 |                  'bbox': anns[i][2:6].tolist(),
114 |                  'conf': float(anns[i][6])}
115 |           out['annotations'].append(ann)
116 |       image_cnt += num_images
117 |     print('loaded {} for {} images and {} samples'.format(
118 |       split, len(out['images']), len(out['annotations'])))
119 |     json.dump(out, open(out_path, 'w'))
120 |         
121 |         
122 | 
123 | 


--------------------------------------------------------------------------------
/src/tools/eval_kitti_track/data/tracking/evaluate_tracking.seqmap:
--------------------------------------------------------------------------------
 1 | 0000 empty 000000 000154
 2 | 0001 empty 000000 000447
 3 | 0002 empty 000000 000233
 4 | 0003 empty 000000 000144
 5 | 0004 empty 000000 000314
 6 | 0005 empty 000000 000297
 7 | 0006 empty 000000 000270
 8 | 0007 empty 000000 000800
 9 | 0008 empty 000000 000390
10 | 0009 empty 000000 000803
11 | 0010 empty 000000 000294
12 | 0011 empty 000000 000373
13 | 0012 empty 000000 000078
14 | 0013 empty 000000 000340
15 | 0014 empty 000000 000106
16 | 0015 empty 000000 000376
17 | 0016 empty 000000 000209
18 | 0017 empty 000000 000145
19 | 0018 empty 000000 000339
20 | 0019 empty 000000 001059
21 | 0020 empty 000000 000837
22 | 


--------------------------------------------------------------------------------
/src/tools/eval_kitti_track/data/tracking/evaluate_tracking.seqmap.test:
--------------------------------------------------------------------------------
 1 | 0000 empty 000000 000465
 2 | 0001 empty 000000 000147
 3 | 0002 empty 000000 000243
 4 | 0003 empty 000000 000257
 5 | 0004 empty 000000 000421
 6 | 0005 empty 000000 000809
 7 | 0006 empty 000000 000114
 8 | 0007 empty 000000 000215
 9 | 0008 empty 000000 000165
10 | 0009 empty 000000 000349
11 | 0010 empty 000000 001176
12 | 0011 empty 000000 000774
13 | 0012 empty 000000 000694
14 | 0013 empty 000000 000152
15 | 0014 empty 000000 000850
16 | 0015 empty 000000 000701
17 | 0016 empty 000000 000510
18 | 0017 empty 000000 000305
19 | 0018 empty 000000 000180
20 | 0019 empty 000000 000404
21 | 0020 empty 000000 000173
22 | 0021 empty 000000 000203
23 | 0022 empty 000000 000436
24 | 0023 empty 000000 000430
25 | 0024 empty 000000 000316
26 | 0025 empty 000000 000176
27 | 0026 empty 000000 000170
28 | 0027 empty 000000 000085
29 | 0028 empty 000000 000175
30 | 


--------------------------------------------------------------------------------
/src/tools/eval_kitti_track/data/tracking/evaluate_tracking.seqmap.training:
--------------------------------------------------------------------------------
 1 | 0000 empty 000000 000154
 2 | 0001 empty 000000 000447
 3 | 0002 empty 000000 000233
 4 | 0003 empty 000000 000144
 5 | 0004 empty 000000 000314
 6 | 0005 empty 000000 000297
 7 | 0006 empty 000000 000270
 8 | 0007 empty 000000 000800
 9 | 0008 empty 000000 000390
10 | 0009 empty 000000 000803
11 | 0010 empty 000000 000294
12 | 0011 empty 000000 000373
13 | 0012 empty 000000 000078
14 | 0013 empty 000000 000340
15 | 0014 empty 000000 000106
16 | 0015 empty 000000 000376
17 | 0016 empty 000000 000209
18 | 0017 empty 000000 000145
19 | 0018 empty 000000 000339
20 | 0019 empty 000000 001059
21 | 0020 empty 000000 000837
22 | 


--------------------------------------------------------------------------------
/src/tools/eval_kitti_track/data/tracking/evaluate_trackingtrain_1-2.seqmap:
--------------------------------------------------------------------------------
 1 | 0000 empty 000000 000154
 2 | 0001 empty 000000 000447
 3 | 0002 empty 000000 000233
 4 | 0003 empty 000000 000144
 5 | 0004 empty 000000 000314
 6 | 0005 empty 000000 000297
 7 | 0006 empty 000000 000270
 8 | 0007 empty 000000 000800
 9 | 0008 empty 000000 000390
10 | 0009 empty 000000 000803
11 | 


--------------------------------------------------------------------------------
/src/tools/eval_kitti_track/data/tracking/evaluate_trackingtrain_2-2.seqmap:
--------------------------------------------------------------------------------
 1 | 0010 empty 000000 000294
 2 | 0011 empty 000000 000373
 3 | 0012 empty 000000 000078
 4 | 0013 empty 000000 000340
 5 | 0014 empty 000000 000106
 6 | 0015 empty 000000 000376
 7 | 0016 empty 000000 000209
 8 | 0017 empty 000000 000145
 9 | 0018 empty 000000 000339
10 | 0019 empty 000000 001059
11 | 0020 empty 000000 000837
12 | 


--------------------------------------------------------------------------------
/src/tools/eval_kitti_track/data/tracking/evaluate_trackingval_half.seqmap:
--------------------------------------------------------------------------------
 1 | 0000 empty 000000 000077
 2 | 0001 empty 000000 000224
 3 | 0002 empty 000000 000117
 4 | 0003 empty 000000 000072
 5 | 0004 empty 000000 000157
 6 | 0005 empty 000000 000149
 7 | 0006 empty 000000 000135
 8 | 0007 empty 000000 000400
 9 | 0008 empty 000000 000195
10 | 0009 empty 000000 000402
11 | 0010 empty 000000 000147
12 | 0011 empty 000000 000187
13 | 0012 empty 000000 000039
14 | 0013 empty 000000 000170
15 | 0014 empty 000000 000053
16 | 0015 empty 000000 000188
17 | 0016 empty 000000 000105
18 | 0017 empty 000000 000073
19 | 0018 empty 000000 000170
20 | 0019 empty 000000 000530
21 | 0020 empty 000000 000419
22 | 


--------------------------------------------------------------------------------
/src/tools/eval_kitti_track/mailpy.py:
--------------------------------------------------------------------------------
 1 | class Mail:
 2 |     """ Dummy class to print messages without sending e-mails"""
 3 |     def __init__(self,mailaddress):
 4 |         pass
 5 |     def msg(self,msg):
 6 |         print(msg)
 7 |     def finalize(self,success,benchmark,sha_key,mailaddress=None):
 8 |         if success:
 9 |             print("Results for %s (benchmark: %s) sucessfully created" % (benchmark,sha_key))
10 |         else:
11 |             print("Creating results for %s (benchmark: %s) failed" % (benchmark,sha_key))
12 | 
13 | 


--------------------------------------------------------------------------------
/src/tools/eval_motchallenge.py:
--------------------------------------------------------------------------------
  1 | """py-motmetrics - metrics for multiple object tracker (MOT) benchmarking.
  2 | Christoph Heindl, 2017
  3 | https://github.com/cheind/py-motmetrics
  4 | Modified by Xingyi Zhou
  5 | """
  6 | 
  7 | import argparse
  8 | import glob
  9 | import os
 10 | import logging
 11 | import motmetrics as mm
 12 | import pandas as pd
 13 | from collections import OrderedDict
 14 | from pathlib import Path
 15 | 
 16 | def parse_args():
 17 |     parser = argparse.ArgumentParser(description="""
 18 | Compute metrics for trackers using MOTChallenge ground-truth data.
 19 | Files
 20 | -----
 21 | All file content, ground truth and test files, have to comply with the
 22 | format described in 
 23 | Milan, Anton, et al. 
 24 | "Mot16: A benchmark for multi-object tracking." 
 25 | arXiv preprint arXiv:1603.00831 (2016).
 26 | https://motchallenge.net/
 27 | Structure
 28 | ---------
 29 | Layout for ground truth data
 30 |     <GT_ROOT>/<SEQUENCE_1>/gt/gt.txt
 31 |     <GT_ROOT>/<SEQUENCE_2>/gt/gt.txt
 32 |     ...
 33 | Layout for test data
 34 |     <TEST_ROOT>/<SEQUENCE_1>.txt
 35 |     <TEST_ROOT>/<SEQUENCE_2>.txt
 36 |     ...
 37 | Sequences of ground truth and test will be matched according to the `<SEQUENCE_X>`
 38 | string.""", formatter_class=argparse.RawTextHelpFormatter)
 39 | 
 40 |     parser.add_argument('groundtruths', type=str, help='Directory containing ground truth files.')   
 41 |     parser.add_argument('tests', type=str, help='Directory containing tracker result files')
 42 |     parser.add_argument('--gt_type', type=str, default='')
 43 |     parser.add_argument('--eval_official', action='store_true')
 44 |     parser.add_argument('--loglevel', type=str, help='Log level', default='info')
 45 |     parser.add_argument('--fmt', type=str, help='Data format', default='mot15-2D')
 46 |     parser.add_argument('--solver', type=str, help='LAP solver to use')
 47 |     return parser.parse_args()
 48 | 
 49 | def compare_dataframes(gts, ts):
 50 |     accs = []
 51 |     names = []
 52 |     for k, tsacc in ts.items():
 53 |         if k in gts:            
 54 |             logging.info('Comparing {}...'.format(k))
 55 |             accs.append(mm.utils.compare_to_groundtruth(gts[k], tsacc, 'iou', distth=0.5))
 56 |             names.append(k)
 57 |         else:
 58 |             logging.warning('No ground truth for {}, skipping.'.format(k))
 59 | 
 60 |     return accs, names
 61 | 
 62 | if __name__ == '__main__':
 63 | 
 64 |     args = parse_args()
 65 | 
 66 |     loglevel = getattr(logging, args.loglevel.upper(), None)
 67 |     if not isinstance(loglevel, int):
 68 |         raise ValueError('Invalid log level: {} '.format(args.loglevel))        
 69 |     logging.basicConfig(level=loglevel, format='%(asctime)s %(levelname)s - %(message)s', datefmt='%I:%M:%S')
 70 | 
 71 |     if args.solver:
 72 |         mm.lap.default_solver = args.solver
 73 | 
 74 |     gt_type = args.gt_type
 75 |     print('gt_type', gt_type)
 76 |     gtfiles = glob.glob(
 77 |       os.path.join(args.groundtruths, '*/gt/gt{}.txt'.format(gt_type)))
 78 |     print('gt_files', gtfiles)
 79 |     tsfiles = [f for f in glob.glob(os.path.join(args.tests, '*.txt')) if not os.path.basename(f).startswith('eval')]
 80 | 
 81 |     logging.info('Found {} groundtruths and {} test files.'.format(len(gtfiles), len(tsfiles)))
 82 |     logging.info('Available LAP solvers {}'.format(mm.lap.available_solvers))
 83 |     logging.info('Default LAP solver \'{}\''.format(mm.lap.default_solver))
 84 |     logging.info('Loading files.')
 85 |     
 86 |     gt = OrderedDict([(Path(f).parts[-3], mm.io.loadtxt(f, fmt=args.fmt, min_confidence=1)) for f in gtfiles])
 87 |     ts = OrderedDict([(os.path.splitext(Path(f).parts[-1])[0], mm.io.loadtxt(f, fmt=args.fmt)) for f in tsfiles])    
 88 | 
 89 |     mh = mm.metrics.create()    
 90 |     accs, names = compare_dataframes(gt, ts)
 91 |     
 92 |     logging.info('Running metrics')
 93 |     metrics = ['recall', 'precision', 'num_unique_objects', 'mostly_tracked', \
 94 |       'partially_tracked', 'mostly_lost', 'num_false_positives', 'num_misses', \
 95 |       'num_switches', 'num_fragmentations', 'mota', 'motp', 'num_objects']
 96 |     summary = mh.compute_many(
 97 |       accs, names=names, 
 98 |       metrics=metrics, generate_overall=True)
 99 |     # summary = mh.compute_many(accs, names=names, metrics=mm.metrics.motchallenge_metrics, generate_overall=True)
100 |     # print(mm.io.render_summary(
101 |     #   summary, formatters=mh.formatters, 
102 |     #   namemap=mm.io.motchallenge_metric_names))
103 |     div_dict = {
104 |         'num_objects': ['num_false_positives', 'num_misses', 
105 |           'num_switches', 'num_fragmentations'],
106 |         'num_unique_objects': ['mostly_tracked', 'partially_tracked', 
107 |           'mostly_lost']}
108 |     for divisor in div_dict:
109 |         for divided in div_dict[divisor]:
110 |             summary[divided] = (summary[divided] / summary[divisor])
111 |     fmt = mh.formatters
112 |     change_fmt_list = ['num_false_positives', 'num_misses', 'num_switches', 
113 |       'num_fragmentations', 'mostly_tracked', 'partially_tracked', 
114 |       'mostly_lost']
115 |     for k in change_fmt_list:
116 |         fmt[k] = fmt['mota']
117 |     print(mm.io.render_summary(
118 |       summary, formatters=fmt, 
119 |       namemap=mm.io.motchallenge_metric_names))
120 |     if args.eval_official:
121 |         metrics = mm.metrics.motchallenge_metrics + ['num_objects']
122 |         summary = mh.compute_many(
123 |         accs, names=names, 
124 |         metrics=metrics, generate_overall=True)
125 |         print(mm.io.render_summary(
126 |         summary, formatters=mh.formatters, 
127 |         namemap=mm.io.motchallenge_metric_names))
128 |         logging.info('Completed')
129 |     


--------------------------------------------------------------------------------
/src/tools/get_mot_17.sh:
--------------------------------------------------------------------------------
1 | mkdir ../../data/mot17
2 | cd ../../data/mot17
3 | wget https://motchallenge.net/data/MOT17.zip
4 | unzip MOT17.zip
5 | rm MOT17.zip
6 | mkdir annotations
7 | cd ../../src/tools/
8 | python convert_mot_to_coco.py
9 | python convert_mot_det_to_results


--------------------------------------------------------------------------------
/src/tools/remove_optimizers.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | IN_PATH = '../../centertrack_models/'
 4 | OUT_PATH = '../../models/'
 5 | REMOVE_KEYS = ['base.fc']
 6 | 
 7 | if __name__ == '__main__':
 8 |   models = sorted(os.listdir(IN_PATH))
 9 |   for model in models:
10 |     model_path = IN_PATH + model
11 |     print(model)
12 |     data = torch.load(model_path)
13 |     state_dict = data['state_dict']
14 |     keys = state_dict.keys()
15 |     delete_keys = []
16 |     for k in keys:
17 |       should_delete = False
18 |       for remove_key in REMOVE_KEYS:
19 |         if remove_key in k:
20 |           should_delete = True
21 |       if should_delete:
22 |         delete_keys.append(k)
23 |     for k in delete_keys:
24 |       print('delete ', k)
25 |       del state_dict[k]
26 |     out_data = {'epoch': data['epoch'], 'state_dict': state_dict}
27 |     torch.save(out_data, OUT_PATH + model)
28 | 


--------------------------------------------------------------------------------
/src/tools/vis_tracking_kitti.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import cv2
 3 | import os
 4 | import glob
 5 | import sys
 6 | from collections import defaultdict
 7 | from pathlib import Path
 8 | 
 9 | DATA_PATH = '../../data/kitti_tracking/'
10 | IMG_PATH = DATA_PATH + 'data_tracking_image_2/testing/image_02/'
11 | SAVE_VIDEO = False
12 | IS_GT = False
13 | 
14 | cats = ['Pedestrian', 'Car', 'Cyclist']
15 | cat_ids = {cat: i for i, cat in enumerate(cats)}
16 | COLORS = [(255, 0, 255), (122, 122, 255), (255, 0, 0)]
17 | 
18 | def draw_bbox(img, bboxes, c=(255, 0, 255)):
19 |   for bbox in bboxes:
20 |     color = COLORS[int(bbox[5])]
21 |     cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), 
22 |       (int(bbox[2]), int(bbox[3])), 
23 |       color, 2, lineType=cv2.LINE_AA)
24 |     ct = [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2]
25 |     txt = '{}'.format(int(bbox[4]))
26 |     cv2.putText(img, txt, (int(ct[0]), int(ct[1])), 
27 |                 cv2.FONT_HERSHEY_SIMPLEX, 0.5, 
28 |                 color, thickness=1, lineType=cv2.LINE_AA)
29 | 
30 | if __name__ == '__main__':
31 |   seqs = os.listdir(IMG_PATH)
32 |   if SAVE_VIDEO:
33 |     save_path = sys.argv[1][:sys.argv[1].rfind('/res')] + '/video'
34 |     if not os.path.exists(save_path):
35 |       os.mkdir(save_path)
36 |     print('save_video_path', save_path)
37 |   for seq in sorted(seqs):
38 |     print('seq', seq)
39 |     if '.DS_Store' in seq:
40 |       continue
41 |     # if SAVE_VIDEO:
42 |     #   fourcc = cv2.VideoWriter_fourcc(*'XVID')
43 |     #   video = cv2.VideoWriter(
44 |     #     '{}/{}.avi'.format(save_path, seq),fourcc, 10.0, (1024, 750))
45 |     
46 |     
47 |     preds = {}
48 |     for K in range(1, len(sys.argv)):
49 |       pred_path = sys.argv[K] + '/{}.txt'.format(seq)
50 |       pred_file = open(pred_path, 'r')
51 |       preds[K] = defaultdict(list)
52 |       for line in pred_file:
53 |         tmp = line[:-1].split(' ')
54 |         frame_id = int(tmp[0])
55 |         track_id = int(tmp[1])
56 |         cat_id = cat_ids[tmp[2]]
57 |         bbox = [float(tmp[6]), float(tmp[7]), float(tmp[8]), float(tmp[9])]
58 |         score = float(tmp[17])
59 |         preds[K][frame_id].append(bbox + [track_id, cat_id, score])
60 | 
61 |     images_path = '{}/{}/'.format(IMG_PATH, seq)
62 |     images = os.listdir(images_path)
63 |     num_images = len([image for image in images if 'png' in image])
64 |     
65 |     for i in range(num_images):
66 |       frame_id = i
67 |       file_path = '{}/{:06d}.png'.format(images_path, i)
68 |       img = cv2.imread(file_path)
69 |       for K in range(1, len(sys.argv)):
70 |         img_pred = img.copy()
71 |         draw_bbox(img_pred, preds[K][frame_id])
72 |         cv2.imshow('pred{}'.format(K), img_pred)
73 |       cv2.waitKey()
74 |       # if SAVE_VIDEO:
75 |       #   video.write(img_pred)
76 |     # if SAVE_VIDEO:
77 |     #   video.release()
78 | 


--------------------------------------------------------------------------------
/src/tools/vis_tracking_mot.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import cv2
 3 | import os
 4 | import glob
 5 | import sys
 6 | from collections import defaultdict
 7 | from pathlib import Path
 8 | 
 9 | GT_PATH = '../../data/mot17/test/'
10 | IMG_PATH = GT_PATH
11 | SAVE_VIDEO = True
12 | RESIZE = 2
13 | IS_GT = False
14 | 
15 | def draw_bbox(img, bboxes, c=(255, 0, 255)):
16 |   for bbox in bboxes:
17 |     cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), 
18 |       (int(bbox[0] + bbox[2]), int(bbox[1] + bbox[3])), 
19 |       c, 2, lineType=cv2.LINE_AA)
20 |     ct = [bbox[0] + bbox[2] / 2, bbox[1] + bbox[3] / 2]
21 |     txt = '{}'.format(bbox[4])
22 |     cv2.putText(img, txt, (int(ct[0]), int(ct[1])), 
23 |                 cv2.FONT_HERSHEY_SIMPLEX, 0.5, 
24 |                 (255, 122, 255), thickness=1, lineType=cv2.LINE_AA)
25 | 
26 | if __name__ == '__main__':
27 |   seqs = os.listdir(GT_PATH)
28 |   if SAVE_VIDEO:
29 |     save_path = sys.argv[1][:sys.argv[1].rfind('/res')] + '/video'
30 |     if not os.path.exists(save_path):
31 |       os.mkdir(save_path)
32 |     print('save_video_path', save_path)
33 |   for seq in sorted(seqs):
34 |     print('seq', seq)
35 |     # if len(sys.argv) > 2 and not sys.argv[2] in seq:
36 |     #   continue
37 |     if '.DS_Store' in seq:
38 |       continue
39 |     # if SAVE_VIDEO:
40 |     #   fourcc = cv2.VideoWriter_fourcc(*'XVID')
41 |     #   video = cv2.VideoWriter(
42 |     #     '{}/{}.avi'.format(save_path, seq),fourcc, 10.0, (1024, 750))
43 |     seq_path = '{}/{}/'.format(GT_PATH, seq)
44 |     if IS_GT:
45 |       ann_path = seq_path + 'gt/gt.txt'
46 |     else:
47 |       ann_path = seq_path + 'det/det.txt'
48 |     anns = np.loadtxt(ann_path, dtype=np.float32, delimiter=',')
49 |     print('anns shape', anns.shape)
50 |     image_to_anns = defaultdict(list)
51 |     for i in range(anns.shape[0]):
52 |       if (not IS_GT) or (int(anns[i][6]) == 1 and float(anns[i][8]) >= 0.25):
53 |         frame_id = int(anns[i][0])
54 |         track_id = int(anns[i][1])
55 |         bbox = (anns[i][2:6] / RESIZE).tolist()
56 |         image_to_anns[frame_id].append(bbox + [track_id])
57 |     
58 |     image_to_preds = {}
59 |     for K in range(1, len(sys.argv)):
60 |       image_to_preds[K] = defaultdict(list)
61 |       pred_path = sys.argv[K] + '/{}.txt'.format(seq)
62 |       try:
63 |         preds = np.loadtxt(pred_path, dtype=np.float32, delimiter=',')
64 |       except:
65 |         preds = np.loadtxt(pred_path, dtype=np.float32, delimiter=' ')
66 |       for i in range(preds.shape[0]):
67 |         frame_id = int(preds[i][0])
68 |         track_id = int(preds[i][1])
69 |         bbox = (preds[i][2:6] / RESIZE).tolist()
70 |         image_to_preds[K][frame_id].append(bbox + [track_id])
71 |     
72 |     img_path = seq_path + 'img1/'
73 |     images = os.listdir(img_path)
74 |     num_images = len([image for image in images if 'jpg' in image])
75 |     
76 |     for i in range(num_images):
77 |       frame_id = i + 1
78 |       file_name = '{}/img1/{:06d}.jpg'.format(seq, i + 1)
79 |       file_path = IMG_PATH + file_name
80 |       img = cv2.imread(file_path)
81 |       if RESIZE != 1:
82 |         img = cv2.resize(img, (img.shape[1] // RESIZE, img.shape[0] // RESIZE))
83 |       for K in range(1, len(sys.argv)):
84 |         img_pred = img.copy()
85 |         draw_bbox(img_pred, image_to_preds[K][frame_id])
86 |         cv2.imshow('pred{}'.format(K), img_pred)
87 |       draw_bbox(img, image_to_anns[frame_id])
88 |       cv2.imshow('gt', img)
89 |       cv2.waitKey()
90 |       # if SAVE_VIDEO:
91 |       #   video.write(img_pred)
92 |     # if SAVE_VIDEO:
93 |     #   video.release()
94 | 


--------------------------------------------------------------------------------
/videos/nuscenes_mini.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xingyizhou/CenterTrack/e4e7534cc2ebfbd31e0cde680988f286c65fe34f/videos/nuscenes_mini.mp4


--------------------------------------------------------------------------------